1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_compat.h" 41 #include "opt_ddb.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 47 #include <sys/conf.h> 48 #include <sys/domain.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/filedesc.h> 52 #include <sys/filio.h> 53 #include <sys/jail.h> 54 #include <sys/kernel.h> 55 #include <sys/limits.h> 56 #include <sys/lock.h> 57 #include <sys/malloc.h> 58 #include <sys/mount.h> 59 #include <sys/mqueue.h> 60 #include <sys/mutex.h> 61 #include <sys/namei.h> 62 #include <sys/priv.h> 63 #include <sys/proc.h> 64 #include <sys/protosw.h> 65 #include <sys/resourcevar.h> 66 #include <sys/signalvar.h> 67 #include <sys/socketvar.h> 68 #include <sys/stat.h> 69 #include <sys/sx.h> 70 #include <sys/syscallsubr.h> 71 #include <sys/sysctl.h> 72 #include <sys/sysproto.h> 73 #include <sys/unistd.h> 74 #include <sys/user.h> 75 #include <sys/vnode.h> 76 #ifdef KTRACE 77 #include <sys/ktrace.h> 78 #endif 79 80 #include <security/audit/audit.h> 81 82 #include <vm/uma.h> 83 84 #include <ddb/ddb.h> 85 86 static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table"); 87 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader", 88 "file desc to leader structures"); 89 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); 90 91 static uma_zone_t file_zone; 92 93 94 /* How to treat 'new' parameter when allocating a fd for do_dup(). */ 95 enum dup_type { DUP_VARIABLE, DUP_FIXED }; 96 97 static int do_dup(struct thread *td, enum dup_type type, int old, int new, 98 register_t *retval); 99 static int fd_first_free(struct filedesc *, int, int); 100 static int fd_last_used(struct filedesc *, int, int); 101 static void fdgrowtable(struct filedesc *, int); 102 static void fdunused(struct filedesc *fdp, int fd); 103 static void fdused(struct filedesc *fdp, int fd); 104 105 /* 106 * A process is initially started out with NDFILE descriptors stored within 107 * this structure, selected to be enough for typical applications based on 108 * the historical limit of 20 open files (and the usage of descriptors by 109 * shells). If these descriptors are exhausted, a larger descriptor table 110 * may be allocated, up to a process' resource limit; the internal arrays 111 * are then unused. 112 */ 113 #define NDFILE 20 114 #define NDSLOTSIZE sizeof(NDSLOTTYPE) 115 #define NDENTRIES (NDSLOTSIZE * __CHAR_BIT) 116 #define NDSLOT(x) ((x) / NDENTRIES) 117 #define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES)) 118 #define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES) 119 120 /* 121 * Storage required per open file descriptor. 122 */ 123 #define OFILESIZE (sizeof(struct file *) + sizeof(char)) 124 125 /* 126 * Basic allocation of descriptors: 127 * one of the above, plus arrays for NDFILE descriptors. 128 */ 129 struct filedesc0 { 130 struct filedesc fd_fd; 131 /* 132 * These arrays are used when the number of open files is 133 * <= NDFILE, and are then pointed to by the pointers above. 134 */ 135 struct file *fd_dfiles[NDFILE]; 136 char fd_dfileflags[NDFILE]; 137 NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; 138 }; 139 140 /* 141 * Descriptor management. 142 */ 143 volatile int openfiles; /* actual number of open files */ 144 struct mtx sigio_lock; /* mtx to protect pointers to sigio */ 145 void (*mq_fdclose)(struct thread *td, int fd, struct file *fp); 146 147 /* A mutex to protect the association between a proc and filedesc. */ 148 static struct mtx fdesc_mtx; 149 150 /* 151 * Find the first zero bit in the given bitmap, starting at low and not 152 * exceeding size - 1. 153 */ 154 static int 155 fd_first_free(struct filedesc *fdp, int low, int size) 156 { 157 NDSLOTTYPE *map = fdp->fd_map; 158 NDSLOTTYPE mask; 159 int off, maxoff; 160 161 if (low >= size) 162 return (low); 163 164 off = NDSLOT(low); 165 if (low % NDENTRIES) { 166 mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES))); 167 if ((mask &= ~map[off]) != 0UL) 168 return (off * NDENTRIES + ffsl(mask) - 1); 169 ++off; 170 } 171 for (maxoff = NDSLOTS(size); off < maxoff; ++off) 172 if (map[off] != ~0UL) 173 return (off * NDENTRIES + ffsl(~map[off]) - 1); 174 return (size); 175 } 176 177 /* 178 * Find the highest non-zero bit in the given bitmap, starting at low and 179 * not exceeding size - 1. 180 */ 181 static int 182 fd_last_used(struct filedesc *fdp, int low, int size) 183 { 184 NDSLOTTYPE *map = fdp->fd_map; 185 NDSLOTTYPE mask; 186 int off, minoff; 187 188 if (low >= size) 189 return (-1); 190 191 off = NDSLOT(size); 192 if (size % NDENTRIES) { 193 mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES)); 194 if ((mask &= map[off]) != 0) 195 return (off * NDENTRIES + flsl(mask) - 1); 196 --off; 197 } 198 for (minoff = NDSLOT(low); off >= minoff; --off) 199 if (map[off] != 0) 200 return (off * NDENTRIES + flsl(map[off]) - 1); 201 return (low - 1); 202 } 203 204 static int 205 fdisused(struct filedesc *fdp, int fd) 206 { 207 KASSERT(fd >= 0 && fd < fdp->fd_nfiles, 208 ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles)); 209 return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0); 210 } 211 212 /* 213 * Mark a file descriptor as used. 214 */ 215 static void 216 fdused(struct filedesc *fdp, int fd) 217 { 218 219 FILEDESC_XLOCK_ASSERT(fdp); 220 KASSERT(!fdisused(fdp, fd), 221 ("fd already used")); 222 223 fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd); 224 if (fd > fdp->fd_lastfile) 225 fdp->fd_lastfile = fd; 226 if (fd == fdp->fd_freefile) 227 fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles); 228 } 229 230 /* 231 * Mark a file descriptor as unused. 232 */ 233 static void 234 fdunused(struct filedesc *fdp, int fd) 235 { 236 237 FILEDESC_XLOCK_ASSERT(fdp); 238 KASSERT(fdisused(fdp, fd), 239 ("fd is already unused")); 240 KASSERT(fdp->fd_ofiles[fd] == NULL, 241 ("fd is still in use")); 242 243 fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); 244 if (fd < fdp->fd_freefile) 245 fdp->fd_freefile = fd; 246 if (fd == fdp->fd_lastfile) 247 fdp->fd_lastfile = fd_last_used(fdp, 0, fd); 248 } 249 250 /* 251 * System calls on descriptors. 252 */ 253 #ifndef _SYS_SYSPROTO_H_ 254 struct getdtablesize_args { 255 int dummy; 256 }; 257 #endif 258 /* ARGSUSED */ 259 int 260 getdtablesize(struct thread *td, struct getdtablesize_args *uap) 261 { 262 struct proc *p = td->td_proc; 263 264 PROC_LOCK(p); 265 td->td_retval[0] = 266 min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 267 PROC_UNLOCK(p); 268 return (0); 269 } 270 271 /* 272 * Duplicate a file descriptor to a particular value. 273 * 274 * Note: keep in mind that a potential race condition exists when closing 275 * descriptors from a shared descriptor table (via rfork). 276 */ 277 #ifndef _SYS_SYSPROTO_H_ 278 struct dup2_args { 279 u_int from; 280 u_int to; 281 }; 282 #endif 283 /* ARGSUSED */ 284 int 285 dup2(struct thread *td, struct dup2_args *uap) 286 { 287 288 return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to, 289 td->td_retval)); 290 } 291 292 /* 293 * Duplicate a file descriptor. 294 */ 295 #ifndef _SYS_SYSPROTO_H_ 296 struct dup_args { 297 u_int fd; 298 }; 299 #endif 300 /* ARGSUSED */ 301 int 302 dup(struct thread *td, struct dup_args *uap) 303 { 304 305 return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval)); 306 } 307 308 /* 309 * The file control system call. 310 */ 311 #ifndef _SYS_SYSPROTO_H_ 312 struct fcntl_args { 313 int fd; 314 int cmd; 315 long arg; 316 }; 317 #endif 318 /* ARGSUSED */ 319 int 320 fcntl(struct thread *td, struct fcntl_args *uap) 321 { 322 struct flock fl; 323 intptr_t arg; 324 int error; 325 326 error = 0; 327 switch (uap->cmd) { 328 case F_GETLK: 329 case F_SETLK: 330 case F_SETLKW: 331 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl)); 332 arg = (intptr_t)&fl; 333 break; 334 default: 335 arg = uap->arg; 336 break; 337 } 338 if (error) 339 return (error); 340 error = kern_fcntl(td, uap->fd, uap->cmd, arg); 341 if (error) 342 return (error); 343 if (uap->cmd == F_GETLK) 344 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl)); 345 return (error); 346 } 347 348 static inline struct file * 349 fdtofp(int fd, struct filedesc *fdp) 350 { 351 struct file *fp; 352 353 FILEDESC_LOCK_ASSERT(fdp); 354 if ((unsigned)fd >= fdp->fd_nfiles || 355 (fp = fdp->fd_ofiles[fd]) == NULL) 356 return (NULL); 357 return (fp); 358 } 359 360 int 361 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) 362 { 363 struct filedesc *fdp; 364 struct flock *flp; 365 struct file *fp; 366 struct proc *p; 367 char *pop; 368 struct vnode *vp; 369 u_int newmin; 370 int error, flg, tmp; 371 int vfslocked; 372 373 vfslocked = 0; 374 error = 0; 375 flg = F_POSIX; 376 p = td->td_proc; 377 fdp = p->p_fd; 378 379 switch (cmd) { 380 case F_DUPFD: 381 FILEDESC_SLOCK(fdp); 382 if ((fp = fdtofp(fd, fdp)) == NULL) { 383 FILEDESC_SUNLOCK(fdp); 384 error = EBADF; 385 break; 386 } 387 FILEDESC_SUNLOCK(fdp); 388 newmin = arg; 389 PROC_LOCK(p); 390 if (newmin >= lim_cur(p, RLIMIT_NOFILE) || 391 newmin >= maxfilesperproc) { 392 PROC_UNLOCK(p); 393 error = EINVAL; 394 break; 395 } 396 PROC_UNLOCK(p); 397 error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval); 398 break; 399 400 case F_GETFD: 401 FILEDESC_SLOCK(fdp); 402 if ((fp = fdtofp(fd, fdp)) == NULL) { 403 FILEDESC_SUNLOCK(fdp); 404 error = EBADF; 405 break; 406 } 407 pop = &fdp->fd_ofileflags[fd]; 408 td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; 409 FILEDESC_SUNLOCK(fdp); 410 break; 411 412 case F_SETFD: 413 FILEDESC_XLOCK(fdp); 414 if ((fp = fdtofp(fd, fdp)) == NULL) { 415 FILEDESC_XUNLOCK(fdp); 416 error = EBADF; 417 break; 418 } 419 pop = &fdp->fd_ofileflags[fd]; 420 *pop = (*pop &~ UF_EXCLOSE) | 421 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); 422 FILEDESC_XUNLOCK(fdp); 423 break; 424 425 case F_GETFL: 426 FILEDESC_SLOCK(fdp); 427 if ((fp = fdtofp(fd, fdp)) == NULL) { 428 FILEDESC_SUNLOCK(fdp); 429 error = EBADF; 430 break; 431 } 432 td->td_retval[0] = OFLAGS(fp->f_flag); 433 FILEDESC_SUNLOCK(fdp); 434 break; 435 436 case F_SETFL: 437 FILEDESC_SLOCK(fdp); 438 if ((fp = fdtofp(fd, fdp)) == NULL) { 439 FILEDESC_SUNLOCK(fdp); 440 error = EBADF; 441 break; 442 } 443 fhold(fp); 444 FILEDESC_SUNLOCK(fdp); 445 do { 446 tmp = flg = fp->f_flag; 447 tmp &= ~FCNTLFLAGS; 448 tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; 449 } while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0); 450 tmp = fp->f_flag & FNONBLOCK; 451 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 452 if (error) { 453 fdrop(fp, td); 454 break; 455 } 456 tmp = fp->f_flag & FASYNC; 457 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); 458 if (error == 0) { 459 fdrop(fp, td); 460 break; 461 } 462 atomic_clear_int(&fp->f_flag, FNONBLOCK); 463 tmp = 0; 464 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 465 fdrop(fp, td); 466 break; 467 468 case F_GETOWN: 469 FILEDESC_SLOCK(fdp); 470 if ((fp = fdtofp(fd, fdp)) == NULL) { 471 FILEDESC_SUNLOCK(fdp); 472 error = EBADF; 473 break; 474 } 475 fhold(fp); 476 FILEDESC_SUNLOCK(fdp); 477 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); 478 if (error == 0) 479 td->td_retval[0] = tmp; 480 fdrop(fp, td); 481 break; 482 483 case F_SETOWN: 484 FILEDESC_SLOCK(fdp); 485 if ((fp = fdtofp(fd, fdp)) == NULL) { 486 FILEDESC_SUNLOCK(fdp); 487 error = EBADF; 488 break; 489 } 490 fhold(fp); 491 FILEDESC_SUNLOCK(fdp); 492 tmp = arg; 493 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); 494 fdrop(fp, td); 495 break; 496 497 case F_SETLKW: 498 flg |= F_WAIT; 499 /* FALLTHROUGH F_SETLK */ 500 501 case F_SETLK: 502 FILEDESC_SLOCK(fdp); 503 if ((fp = fdtofp(fd, fdp)) == NULL) { 504 FILEDESC_SUNLOCK(fdp); 505 error = EBADF; 506 break; 507 } 508 if (fp->f_type != DTYPE_VNODE) { 509 FILEDESC_SUNLOCK(fdp); 510 error = EBADF; 511 break; 512 } 513 flp = (struct flock *)arg; 514 if (flp->l_whence == SEEK_CUR) { 515 if (fp->f_offset < 0 || 516 (flp->l_start > 0 && 517 fp->f_offset > OFF_MAX - flp->l_start)) { 518 FILEDESC_SUNLOCK(fdp); 519 error = EOVERFLOW; 520 break; 521 } 522 flp->l_start += fp->f_offset; 523 } 524 525 /* 526 * VOP_ADVLOCK() may block. 527 */ 528 fhold(fp); 529 FILEDESC_SUNLOCK(fdp); 530 vp = fp->f_vnode; 531 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 532 switch (flp->l_type) { 533 case F_RDLCK: 534 if ((fp->f_flag & FREAD) == 0) { 535 error = EBADF; 536 break; 537 } 538 PROC_LOCK(p->p_leader); 539 p->p_leader->p_flag |= P_ADVLOCK; 540 PROC_UNLOCK(p->p_leader); 541 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 542 flp, flg); 543 break; 544 case F_WRLCK: 545 if ((fp->f_flag & FWRITE) == 0) { 546 error = EBADF; 547 break; 548 } 549 PROC_LOCK(p->p_leader); 550 p->p_leader->p_flag |= P_ADVLOCK; 551 PROC_UNLOCK(p->p_leader); 552 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 553 flp, flg); 554 break; 555 case F_UNLCK: 556 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, 557 flp, F_POSIX); 558 break; 559 default: 560 error = EINVAL; 561 break; 562 } 563 VFS_UNLOCK_GIANT(vfslocked); 564 vfslocked = 0; 565 /* Check for race with close */ 566 FILEDESC_SLOCK(fdp); 567 if ((unsigned) fd >= fdp->fd_nfiles || 568 fp != fdp->fd_ofiles[fd]) { 569 FILEDESC_SUNLOCK(fdp); 570 flp->l_whence = SEEK_SET; 571 flp->l_start = 0; 572 flp->l_len = 0; 573 flp->l_type = F_UNLCK; 574 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 575 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 576 F_UNLCK, flp, F_POSIX); 577 VFS_UNLOCK_GIANT(vfslocked); 578 vfslocked = 0; 579 } else 580 FILEDESC_SUNLOCK(fdp); 581 fdrop(fp, td); 582 break; 583 584 case F_GETLK: 585 FILEDESC_SLOCK(fdp); 586 if ((fp = fdtofp(fd, fdp)) == NULL) { 587 FILEDESC_SUNLOCK(fdp); 588 error = EBADF; 589 break; 590 } 591 if (fp->f_type != DTYPE_VNODE) { 592 FILEDESC_SUNLOCK(fdp); 593 error = EBADF; 594 break; 595 } 596 flp = (struct flock *)arg; 597 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && 598 flp->l_type != F_UNLCK) { 599 FILEDESC_SUNLOCK(fdp); 600 error = EINVAL; 601 break; 602 } 603 if (flp->l_whence == SEEK_CUR) { 604 if ((flp->l_start > 0 && 605 fp->f_offset > OFF_MAX - flp->l_start) || 606 (flp->l_start < 0 && 607 fp->f_offset < OFF_MIN - flp->l_start)) { 608 FILEDESC_SUNLOCK(fdp); 609 error = EOVERFLOW; 610 break; 611 } 612 flp->l_start += fp->f_offset; 613 } 614 /* 615 * VOP_ADVLOCK() may block. 616 */ 617 fhold(fp); 618 FILEDESC_SUNLOCK(fdp); 619 vp = fp->f_vnode; 620 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 621 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, 622 F_POSIX); 623 VFS_UNLOCK_GIANT(vfslocked); 624 vfslocked = 0; 625 fdrop(fp, td); 626 break; 627 default: 628 error = EINVAL; 629 break; 630 } 631 VFS_UNLOCK_GIANT(vfslocked); 632 return (error); 633 } 634 635 /* 636 * Common code for dup, dup2, and fcntl(F_DUPFD). 637 */ 638 static int 639 do_dup(struct thread *td, enum dup_type type, int old, int new, 640 register_t *retval) 641 { 642 struct filedesc *fdp; 643 struct proc *p; 644 struct file *fp; 645 struct file *delfp; 646 int error, holdleaders, maxfd; 647 648 KASSERT((type == DUP_VARIABLE || type == DUP_FIXED), 649 ("invalid dup type %d", type)); 650 651 p = td->td_proc; 652 fdp = p->p_fd; 653 654 /* 655 * Verify we have a valid descriptor to dup from and possibly to 656 * dup to. 657 */ 658 if (old < 0 || new < 0) 659 return (EBADF); 660 PROC_LOCK(p); 661 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 662 PROC_UNLOCK(p); 663 if (new >= maxfd) 664 return (EMFILE); 665 666 FILEDESC_XLOCK(fdp); 667 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) { 668 FILEDESC_XUNLOCK(fdp); 669 return (EBADF); 670 } 671 if (type == DUP_FIXED && old == new) { 672 *retval = new; 673 FILEDESC_XUNLOCK(fdp); 674 return (0); 675 } 676 fp = fdp->fd_ofiles[old]; 677 fhold(fp); 678 679 /* 680 * If the caller specified a file descriptor, make sure the file 681 * table is large enough to hold it, and grab it. Otherwise, just 682 * allocate a new descriptor the usual way. Since the filedesc 683 * lock may be temporarily dropped in the process, we have to look 684 * out for a race. 685 */ 686 if (type == DUP_FIXED) { 687 if (new >= fdp->fd_nfiles) 688 fdgrowtable(fdp, new + 1); 689 if (fdp->fd_ofiles[new] == NULL) 690 fdused(fdp, new); 691 } else { 692 if ((error = fdalloc(td, new, &new)) != 0) { 693 FILEDESC_XUNLOCK(fdp); 694 fdrop(fp, td); 695 return (error); 696 } 697 } 698 699 /* 700 * If the old file changed out from under us then treat it as a 701 * bad file descriptor. Userland should do its own locking to 702 * avoid this case. 703 */ 704 if (fdp->fd_ofiles[old] != fp) { 705 /* we've allocated a descriptor which we won't use */ 706 if (fdp->fd_ofiles[new] == NULL) 707 fdunused(fdp, new); 708 FILEDESC_XUNLOCK(fdp); 709 fdrop(fp, td); 710 return (EBADF); 711 } 712 KASSERT(old != new, 713 ("new fd is same as old")); 714 715 /* 716 * Save info on the descriptor being overwritten. We cannot close 717 * it without introducing an ownership race for the slot, since we 718 * need to drop the filedesc lock to call closef(). 719 * 720 * XXX this duplicates parts of close(). 721 */ 722 delfp = fdp->fd_ofiles[new]; 723 holdleaders = 0; 724 if (delfp != NULL) { 725 if (td->td_proc->p_fdtol != NULL) { 726 /* 727 * Ask fdfree() to sleep to ensure that all relevant 728 * process leaders can be traversed in closef(). 729 */ 730 fdp->fd_holdleaderscount++; 731 holdleaders = 1; 732 } 733 } 734 735 /* 736 * Duplicate the source descriptor 737 */ 738 fdp->fd_ofiles[new] = fp; 739 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; 740 if (new > fdp->fd_lastfile) 741 fdp->fd_lastfile = new; 742 *retval = new; 743 744 /* 745 * If we dup'd over a valid file, we now own the reference to it 746 * and must dispose of it using closef() semantics (as if a 747 * close() were performed on it). 748 * 749 * XXX this duplicates parts of close(). 750 */ 751 if (delfp != NULL) { 752 knote_fdclose(td, new); 753 if (delfp->f_type == DTYPE_MQUEUE) 754 mq_fdclose(td, new, delfp); 755 FILEDESC_XUNLOCK(fdp); 756 (void) closef(delfp, td); 757 if (holdleaders) { 758 FILEDESC_XLOCK(fdp); 759 fdp->fd_holdleaderscount--; 760 if (fdp->fd_holdleaderscount == 0 && 761 fdp->fd_holdleaderswakeup != 0) { 762 fdp->fd_holdleaderswakeup = 0; 763 wakeup(&fdp->fd_holdleaderscount); 764 } 765 FILEDESC_XUNLOCK(fdp); 766 } 767 } else { 768 FILEDESC_XUNLOCK(fdp); 769 } 770 return (0); 771 } 772 773 /* 774 * If sigio is on the list associated with a process or process group, 775 * disable signalling from the device, remove sigio from the list and 776 * free sigio. 777 */ 778 void 779 funsetown(struct sigio **sigiop) 780 { 781 struct sigio *sigio; 782 783 SIGIO_LOCK(); 784 sigio = *sigiop; 785 if (sigio == NULL) { 786 SIGIO_UNLOCK(); 787 return; 788 } 789 *(sigio->sio_myref) = NULL; 790 if ((sigio)->sio_pgid < 0) { 791 struct pgrp *pg = (sigio)->sio_pgrp; 792 PGRP_LOCK(pg); 793 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, 794 sigio, sio_pgsigio); 795 PGRP_UNLOCK(pg); 796 } else { 797 struct proc *p = (sigio)->sio_proc; 798 PROC_LOCK(p); 799 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, 800 sigio, sio_pgsigio); 801 PROC_UNLOCK(p); 802 } 803 SIGIO_UNLOCK(); 804 crfree(sigio->sio_ucred); 805 FREE(sigio, M_SIGIO); 806 } 807 808 /* 809 * Free a list of sigio structures. 810 * We only need to lock the SIGIO_LOCK because we have made ourselves 811 * inaccessible to callers of fsetown and therefore do not need to lock 812 * the proc or pgrp struct for the list manipulation. 813 */ 814 void 815 funsetownlst(struct sigiolst *sigiolst) 816 { 817 struct proc *p; 818 struct pgrp *pg; 819 struct sigio *sigio; 820 821 sigio = SLIST_FIRST(sigiolst); 822 if (sigio == NULL) 823 return; 824 p = NULL; 825 pg = NULL; 826 827 /* 828 * Every entry of the list should belong 829 * to a single proc or pgrp. 830 */ 831 if (sigio->sio_pgid < 0) { 832 pg = sigio->sio_pgrp; 833 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED); 834 } else /* if (sigio->sio_pgid > 0) */ { 835 p = sigio->sio_proc; 836 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 837 } 838 839 SIGIO_LOCK(); 840 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) { 841 *(sigio->sio_myref) = NULL; 842 if (pg != NULL) { 843 KASSERT(sigio->sio_pgid < 0, 844 ("Proc sigio in pgrp sigio list")); 845 KASSERT(sigio->sio_pgrp == pg, 846 ("Bogus pgrp in sigio list")); 847 PGRP_LOCK(pg); 848 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, 849 sio_pgsigio); 850 PGRP_UNLOCK(pg); 851 } else /* if (p != NULL) */ { 852 KASSERT(sigio->sio_pgid > 0, 853 ("Pgrp sigio in proc sigio list")); 854 KASSERT(sigio->sio_proc == p, 855 ("Bogus proc in sigio list")); 856 PROC_LOCK(p); 857 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, 858 sio_pgsigio); 859 PROC_UNLOCK(p); 860 } 861 SIGIO_UNLOCK(); 862 crfree(sigio->sio_ucred); 863 FREE(sigio, M_SIGIO); 864 SIGIO_LOCK(); 865 } 866 SIGIO_UNLOCK(); 867 } 868 869 /* 870 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). 871 * 872 * After permission checking, add a sigio structure to the sigio list for 873 * the process or process group. 874 */ 875 int 876 fsetown(pid_t pgid, struct sigio **sigiop) 877 { 878 struct proc *proc; 879 struct pgrp *pgrp; 880 struct sigio *sigio; 881 int ret; 882 883 if (pgid == 0) { 884 funsetown(sigiop); 885 return (0); 886 } 887 888 ret = 0; 889 890 /* Allocate and fill in the new sigio out of locks. */ 891 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK); 892 sigio->sio_pgid = pgid; 893 sigio->sio_ucred = crhold(curthread->td_ucred); 894 sigio->sio_myref = sigiop; 895 896 sx_slock(&proctree_lock); 897 if (pgid > 0) { 898 proc = pfind(pgid); 899 if (proc == NULL) { 900 ret = ESRCH; 901 goto fail; 902 } 903 904 /* 905 * Policy - Don't allow a process to FSETOWN a process 906 * in another session. 907 * 908 * Remove this test to allow maximum flexibility or 909 * restrict FSETOWN to the current process or process 910 * group for maximum safety. 911 */ 912 PROC_UNLOCK(proc); 913 if (proc->p_session != curthread->td_proc->p_session) { 914 ret = EPERM; 915 goto fail; 916 } 917 918 pgrp = NULL; 919 } else /* if (pgid < 0) */ { 920 pgrp = pgfind(-pgid); 921 if (pgrp == NULL) { 922 ret = ESRCH; 923 goto fail; 924 } 925 PGRP_UNLOCK(pgrp); 926 927 /* 928 * Policy - Don't allow a process to FSETOWN a process 929 * in another session. 930 * 931 * Remove this test to allow maximum flexibility or 932 * restrict FSETOWN to the current process or process 933 * group for maximum safety. 934 */ 935 if (pgrp->pg_session != curthread->td_proc->p_session) { 936 ret = EPERM; 937 goto fail; 938 } 939 940 proc = NULL; 941 } 942 funsetown(sigiop); 943 if (pgid > 0) { 944 PROC_LOCK(proc); 945 /* 946 * Since funsetownlst() is called without the proctree 947 * locked, we need to check for P_WEXIT. 948 * XXX: is ESRCH correct? 949 */ 950 if ((proc->p_flag & P_WEXIT) != 0) { 951 PROC_UNLOCK(proc); 952 ret = ESRCH; 953 goto fail; 954 } 955 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); 956 sigio->sio_proc = proc; 957 PROC_UNLOCK(proc); 958 } else { 959 PGRP_LOCK(pgrp); 960 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); 961 sigio->sio_pgrp = pgrp; 962 PGRP_UNLOCK(pgrp); 963 } 964 sx_sunlock(&proctree_lock); 965 SIGIO_LOCK(); 966 *sigiop = sigio; 967 SIGIO_UNLOCK(); 968 return (0); 969 970 fail: 971 sx_sunlock(&proctree_lock); 972 crfree(sigio->sio_ucred); 973 FREE(sigio, M_SIGIO); 974 return (ret); 975 } 976 977 /* 978 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). 979 */ 980 pid_t 981 fgetown(sigiop) 982 struct sigio **sigiop; 983 { 984 pid_t pgid; 985 986 SIGIO_LOCK(); 987 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0; 988 SIGIO_UNLOCK(); 989 return (pgid); 990 } 991 992 /* 993 * Close a file descriptor. 994 */ 995 #ifndef _SYS_SYSPROTO_H_ 996 struct close_args { 997 int fd; 998 }; 999 #endif 1000 /* ARGSUSED */ 1001 int 1002 close(td, uap) 1003 struct thread *td; 1004 struct close_args *uap; 1005 { 1006 1007 return (kern_close(td, uap->fd)); 1008 } 1009 1010 int 1011 kern_close(td, fd) 1012 struct thread *td; 1013 int fd; 1014 { 1015 struct filedesc *fdp; 1016 struct file *fp; 1017 int error; 1018 int holdleaders; 1019 1020 error = 0; 1021 holdleaders = 0; 1022 fdp = td->td_proc->p_fd; 1023 1024 AUDIT_SYSCLOSE(td, fd); 1025 1026 FILEDESC_XLOCK(fdp); 1027 if ((unsigned)fd >= fdp->fd_nfiles || 1028 (fp = fdp->fd_ofiles[fd]) == NULL) { 1029 FILEDESC_XUNLOCK(fdp); 1030 return (EBADF); 1031 } 1032 fdp->fd_ofiles[fd] = NULL; 1033 fdp->fd_ofileflags[fd] = 0; 1034 fdunused(fdp, fd); 1035 if (td->td_proc->p_fdtol != NULL) { 1036 /* 1037 * Ask fdfree() to sleep to ensure that all relevant 1038 * process leaders can be traversed in closef(). 1039 */ 1040 fdp->fd_holdleaderscount++; 1041 holdleaders = 1; 1042 } 1043 1044 /* 1045 * We now hold the fp reference that used to be owned by the 1046 * descriptor array. We have to unlock the FILEDESC *AFTER* 1047 * knote_fdclose to prevent a race of the fd getting opened, a knote 1048 * added, and deleteing a knote for the new fd. 1049 */ 1050 knote_fdclose(td, fd); 1051 if (fp->f_type == DTYPE_MQUEUE) 1052 mq_fdclose(td, fd, fp); 1053 FILEDESC_XUNLOCK(fdp); 1054 1055 error = closef(fp, td); 1056 if (holdleaders) { 1057 FILEDESC_XLOCK(fdp); 1058 fdp->fd_holdleaderscount--; 1059 if (fdp->fd_holdleaderscount == 0 && 1060 fdp->fd_holdleaderswakeup != 0) { 1061 fdp->fd_holdleaderswakeup = 0; 1062 wakeup(&fdp->fd_holdleaderscount); 1063 } 1064 FILEDESC_XUNLOCK(fdp); 1065 } 1066 return (error); 1067 } 1068 1069 #if defined(COMPAT_43) 1070 /* 1071 * Return status information about a file descriptor. 1072 */ 1073 #ifndef _SYS_SYSPROTO_H_ 1074 struct ofstat_args { 1075 int fd; 1076 struct ostat *sb; 1077 }; 1078 #endif 1079 /* ARGSUSED */ 1080 int 1081 ofstat(struct thread *td, struct ofstat_args *uap) 1082 { 1083 struct ostat oub; 1084 struct stat ub; 1085 int error; 1086 1087 error = kern_fstat(td, uap->fd, &ub); 1088 if (error == 0) { 1089 cvtstat(&ub, &oub); 1090 error = copyout(&oub, uap->sb, sizeof(oub)); 1091 } 1092 return (error); 1093 } 1094 #endif /* COMPAT_43 */ 1095 1096 /* 1097 * Return status information about a file descriptor. 1098 */ 1099 #ifndef _SYS_SYSPROTO_H_ 1100 struct fstat_args { 1101 int fd; 1102 struct stat *sb; 1103 }; 1104 #endif 1105 /* ARGSUSED */ 1106 int 1107 fstat(struct thread *td, struct fstat_args *uap) 1108 { 1109 struct stat ub; 1110 int error; 1111 1112 error = kern_fstat(td, uap->fd, &ub); 1113 if (error == 0) 1114 error = copyout(&ub, uap->sb, sizeof(ub)); 1115 return (error); 1116 } 1117 1118 int 1119 kern_fstat(struct thread *td, int fd, struct stat *sbp) 1120 { 1121 struct file *fp; 1122 int error; 1123 1124 AUDIT_ARG(fd, fd); 1125 1126 if ((error = fget(td, fd, &fp)) != 0) 1127 return (error); 1128 1129 AUDIT_ARG(file, td->td_proc, fp); 1130 1131 error = fo_stat(fp, sbp, td->td_ucred, td); 1132 fdrop(fp, td); 1133 #ifdef KTRACE 1134 if (error == 0 && KTRPOINT(td, KTR_STRUCT)) 1135 ktrstat(sbp); 1136 #endif 1137 return (error); 1138 } 1139 1140 /* 1141 * Return status information about a file descriptor. 1142 */ 1143 #ifndef _SYS_SYSPROTO_H_ 1144 struct nfstat_args { 1145 int fd; 1146 struct nstat *sb; 1147 }; 1148 #endif 1149 /* ARGSUSED */ 1150 int 1151 nfstat(struct thread *td, struct nfstat_args *uap) 1152 { 1153 struct nstat nub; 1154 struct stat ub; 1155 int error; 1156 1157 error = kern_fstat(td, uap->fd, &ub); 1158 if (error == 0) { 1159 cvtnstat(&ub, &nub); 1160 error = copyout(&nub, uap->sb, sizeof(nub)); 1161 } 1162 return (error); 1163 } 1164 1165 /* 1166 * Return pathconf information about a file descriptor. 1167 */ 1168 #ifndef _SYS_SYSPROTO_H_ 1169 struct fpathconf_args { 1170 int fd; 1171 int name; 1172 }; 1173 #endif 1174 /* ARGSUSED */ 1175 int 1176 fpathconf(struct thread *td, struct fpathconf_args *uap) 1177 { 1178 struct file *fp; 1179 struct vnode *vp; 1180 int error; 1181 1182 if ((error = fget(td, uap->fd, &fp)) != 0) 1183 return (error); 1184 1185 /* If asynchronous I/O is available, it works for all descriptors. */ 1186 if (uap->name == _PC_ASYNC_IO) { 1187 td->td_retval[0] = async_io_version; 1188 goto out; 1189 } 1190 vp = fp->f_vnode; 1191 if (vp != NULL) { 1192 int vfslocked; 1193 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1194 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1195 error = VOP_PATHCONF(vp, uap->name, td->td_retval); 1196 VOP_UNLOCK(vp, 0); 1197 VFS_UNLOCK_GIANT(vfslocked); 1198 } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1199 if (uap->name != _PC_PIPE_BUF) { 1200 error = EINVAL; 1201 } else { 1202 td->td_retval[0] = PIPE_BUF; 1203 error = 0; 1204 } 1205 } else { 1206 error = EOPNOTSUPP; 1207 } 1208 out: 1209 fdrop(fp, td); 1210 return (error); 1211 } 1212 1213 /* 1214 * Grow the file table to accomodate (at least) nfd descriptors. This may 1215 * block and drop the filedesc lock, but it will reacquire it before 1216 * returning. 1217 */ 1218 static void 1219 fdgrowtable(struct filedesc *fdp, int nfd) 1220 { 1221 struct file **ntable; 1222 char *nfileflags; 1223 int nnfiles, onfiles; 1224 NDSLOTTYPE *nmap; 1225 1226 FILEDESC_XLOCK_ASSERT(fdp); 1227 1228 KASSERT(fdp->fd_nfiles > 0, 1229 ("zero-length file table")); 1230 1231 /* compute the size of the new table */ 1232 onfiles = fdp->fd_nfiles; 1233 nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */ 1234 if (nnfiles <= onfiles) 1235 /* the table is already large enough */ 1236 return; 1237 1238 /* allocate a new table and (if required) new bitmaps */ 1239 FILEDESC_XUNLOCK(fdp); 1240 MALLOC(ntable, struct file **, nnfiles * OFILESIZE, 1241 M_FILEDESC, M_ZERO | M_WAITOK); 1242 nfileflags = (char *)&ntable[nnfiles]; 1243 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) 1244 MALLOC(nmap, NDSLOTTYPE *, NDSLOTS(nnfiles) * NDSLOTSIZE, 1245 M_FILEDESC, M_ZERO | M_WAITOK); 1246 else 1247 nmap = NULL; 1248 FILEDESC_XLOCK(fdp); 1249 1250 /* 1251 * We now have new tables ready to go. Since we dropped the 1252 * filedesc lock to call malloc(), watch out for a race. 1253 */ 1254 onfiles = fdp->fd_nfiles; 1255 if (onfiles >= nnfiles) { 1256 /* we lost the race, but that's OK */ 1257 free(ntable, M_FILEDESC); 1258 if (nmap != NULL) 1259 free(nmap, M_FILEDESC); 1260 return; 1261 } 1262 bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable)); 1263 bcopy(fdp->fd_ofileflags, nfileflags, onfiles); 1264 if (onfiles > NDFILE) 1265 free(fdp->fd_ofiles, M_FILEDESC); 1266 fdp->fd_ofiles = ntable; 1267 fdp->fd_ofileflags = nfileflags; 1268 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) { 1269 bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap)); 1270 if (NDSLOTS(onfiles) > NDSLOTS(NDFILE)) 1271 free(fdp->fd_map, M_FILEDESC); 1272 fdp->fd_map = nmap; 1273 } 1274 fdp->fd_nfiles = nnfiles; 1275 } 1276 1277 /* 1278 * Allocate a file descriptor for the process. 1279 */ 1280 int 1281 fdalloc(struct thread *td, int minfd, int *result) 1282 { 1283 struct proc *p = td->td_proc; 1284 struct filedesc *fdp = p->p_fd; 1285 int fd = -1, maxfd; 1286 1287 FILEDESC_XLOCK_ASSERT(fdp); 1288 1289 if (fdp->fd_freefile > minfd) 1290 minfd = fdp->fd_freefile; 1291 1292 PROC_LOCK(p); 1293 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1294 PROC_UNLOCK(p); 1295 1296 /* 1297 * Search the bitmap for a free descriptor. If none is found, try 1298 * to grow the file table. Keep at it until we either get a file 1299 * descriptor or run into process or system limits; fdgrowtable() 1300 * may drop the filedesc lock, so we're in a race. 1301 */ 1302 for (;;) { 1303 fd = fd_first_free(fdp, minfd, fdp->fd_nfiles); 1304 if (fd >= maxfd) 1305 return (EMFILE); 1306 if (fd < fdp->fd_nfiles) 1307 break; 1308 fdgrowtable(fdp, min(fdp->fd_nfiles * 2, maxfd)); 1309 } 1310 1311 /* 1312 * Perform some sanity checks, then mark the file descriptor as 1313 * used and return it to the caller. 1314 */ 1315 KASSERT(!fdisused(fdp, fd), 1316 ("fd_first_free() returned non-free descriptor")); 1317 KASSERT(fdp->fd_ofiles[fd] == NULL, 1318 ("free descriptor isn't")); 1319 fdp->fd_ofileflags[fd] = 0; /* XXX needed? */ 1320 fdused(fdp, fd); 1321 *result = fd; 1322 return (0); 1323 } 1324 1325 /* 1326 * Check to see whether n user file descriptors are available to the process 1327 * p. 1328 */ 1329 int 1330 fdavail(struct thread *td, int n) 1331 { 1332 struct proc *p = td->td_proc; 1333 struct filedesc *fdp = td->td_proc->p_fd; 1334 struct file **fpp; 1335 int i, lim, last; 1336 1337 FILEDESC_LOCK_ASSERT(fdp); 1338 1339 PROC_LOCK(p); 1340 lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1341 PROC_UNLOCK(p); 1342 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) 1343 return (1); 1344 last = min(fdp->fd_nfiles, lim); 1345 fpp = &fdp->fd_ofiles[fdp->fd_freefile]; 1346 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) { 1347 if (*fpp == NULL && --n <= 0) 1348 return (1); 1349 } 1350 return (0); 1351 } 1352 1353 /* 1354 * Create a new open file structure and allocate a file decriptor for the 1355 * process that refers to it. We add one reference to the file for the 1356 * descriptor table and one reference for resultfp. This is to prevent us 1357 * being preempted and the entry in the descriptor table closed after we 1358 * release the FILEDESC lock. 1359 */ 1360 int 1361 falloc(struct thread *td, struct file **resultfp, int *resultfd) 1362 { 1363 struct proc *p = td->td_proc; 1364 struct file *fp; 1365 int error, i; 1366 int maxuserfiles = maxfiles - (maxfiles / 20); 1367 static struct timeval lastfail; 1368 static int curfail; 1369 1370 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO); 1371 if ((openfiles >= maxuserfiles && 1372 priv_check(td, PRIV_MAXFILES) != 0) || 1373 openfiles >= maxfiles) { 1374 if (ppsratecheck(&lastfail, &curfail, 1)) { 1375 printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n", 1376 td->td_ucred->cr_ruid); 1377 } 1378 uma_zfree(file_zone, fp); 1379 return (ENFILE); 1380 } 1381 atomic_add_int(&openfiles, 1); 1382 1383 /* 1384 * If the process has file descriptor zero open, add the new file 1385 * descriptor to the list of open files at that point, otherwise 1386 * put it at the front of the list of open files. 1387 */ 1388 fp->f_count = 1; 1389 if (resultfp) 1390 fp->f_count++; 1391 fp->f_cred = crhold(td->td_ucred); 1392 fp->f_ops = &badfileops; 1393 fp->f_data = NULL; 1394 fp->f_vnode = NULL; 1395 FILEDESC_XLOCK(p->p_fd); 1396 if ((error = fdalloc(td, 0, &i))) { 1397 FILEDESC_XUNLOCK(p->p_fd); 1398 fdrop(fp, td); 1399 if (resultfp) 1400 fdrop(fp, td); 1401 return (error); 1402 } 1403 p->p_fd->fd_ofiles[i] = fp; 1404 FILEDESC_XUNLOCK(p->p_fd); 1405 if (resultfp) 1406 *resultfp = fp; 1407 if (resultfd) 1408 *resultfd = i; 1409 return (0); 1410 } 1411 1412 /* 1413 * Build a new filedesc structure from another. 1414 * Copy the current, root, and jail root vnode references. 1415 */ 1416 struct filedesc * 1417 fdinit(struct filedesc *fdp) 1418 { 1419 struct filedesc0 *newfdp; 1420 1421 newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO); 1422 FILEDESC_LOCK_INIT(&newfdp->fd_fd); 1423 if (fdp != NULL) { 1424 FILEDESC_XLOCK(fdp); 1425 newfdp->fd_fd.fd_cdir = fdp->fd_cdir; 1426 if (newfdp->fd_fd.fd_cdir) 1427 VREF(newfdp->fd_fd.fd_cdir); 1428 newfdp->fd_fd.fd_rdir = fdp->fd_rdir; 1429 if (newfdp->fd_fd.fd_rdir) 1430 VREF(newfdp->fd_fd.fd_rdir); 1431 newfdp->fd_fd.fd_jdir = fdp->fd_jdir; 1432 if (newfdp->fd_fd.fd_jdir) 1433 VREF(newfdp->fd_fd.fd_jdir); 1434 FILEDESC_XUNLOCK(fdp); 1435 } 1436 1437 /* Create the file descriptor table. */ 1438 newfdp->fd_fd.fd_refcnt = 1; 1439 newfdp->fd_fd.fd_holdcnt = 1; 1440 newfdp->fd_fd.fd_cmask = CMASK; 1441 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1442 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 1443 newfdp->fd_fd.fd_nfiles = NDFILE; 1444 newfdp->fd_fd.fd_map = newfdp->fd_dmap; 1445 newfdp->fd_fd.fd_lastfile = -1; 1446 return (&newfdp->fd_fd); 1447 } 1448 1449 static struct filedesc * 1450 fdhold(struct proc *p) 1451 { 1452 struct filedesc *fdp; 1453 1454 mtx_lock(&fdesc_mtx); 1455 fdp = p->p_fd; 1456 if (fdp != NULL) 1457 fdp->fd_holdcnt++; 1458 mtx_unlock(&fdesc_mtx); 1459 return (fdp); 1460 } 1461 1462 static void 1463 fddrop(struct filedesc *fdp) 1464 { 1465 int i; 1466 1467 mtx_lock(&fdesc_mtx); 1468 i = --fdp->fd_holdcnt; 1469 mtx_unlock(&fdesc_mtx); 1470 if (i > 0) 1471 return; 1472 1473 FILEDESC_LOCK_DESTROY(fdp); 1474 FREE(fdp, M_FILEDESC); 1475 } 1476 1477 /* 1478 * Share a filedesc structure. 1479 */ 1480 struct filedesc * 1481 fdshare(struct filedesc *fdp) 1482 { 1483 1484 FILEDESC_XLOCK(fdp); 1485 fdp->fd_refcnt++; 1486 FILEDESC_XUNLOCK(fdp); 1487 return (fdp); 1488 } 1489 1490 /* 1491 * Unshare a filedesc structure, if necessary by making a copy 1492 */ 1493 void 1494 fdunshare(struct proc *p, struct thread *td) 1495 { 1496 1497 FILEDESC_XLOCK(p->p_fd); 1498 if (p->p_fd->fd_refcnt > 1) { 1499 struct filedesc *tmp; 1500 1501 FILEDESC_XUNLOCK(p->p_fd); 1502 tmp = fdcopy(p->p_fd); 1503 fdfree(td); 1504 p->p_fd = tmp; 1505 } else 1506 FILEDESC_XUNLOCK(p->p_fd); 1507 } 1508 1509 /* 1510 * Copy a filedesc structure. A NULL pointer in returns a NULL reference, 1511 * this is to ease callers, not catch errors. 1512 */ 1513 struct filedesc * 1514 fdcopy(struct filedesc *fdp) 1515 { 1516 struct filedesc *newfdp; 1517 int i; 1518 1519 /* Certain daemons might not have file descriptors. */ 1520 if (fdp == NULL) 1521 return (NULL); 1522 1523 newfdp = fdinit(fdp); 1524 FILEDESC_SLOCK(fdp); 1525 while (fdp->fd_lastfile >= newfdp->fd_nfiles) { 1526 FILEDESC_SUNLOCK(fdp); 1527 FILEDESC_XLOCK(newfdp); 1528 fdgrowtable(newfdp, fdp->fd_lastfile + 1); 1529 FILEDESC_XUNLOCK(newfdp); 1530 FILEDESC_SLOCK(fdp); 1531 } 1532 /* copy everything except kqueue descriptors */ 1533 newfdp->fd_freefile = -1; 1534 for (i = 0; i <= fdp->fd_lastfile; ++i) { 1535 if (fdisused(fdp, i) && 1536 fdp->fd_ofiles[i]->f_type != DTYPE_KQUEUE) { 1537 newfdp->fd_ofiles[i] = fdp->fd_ofiles[i]; 1538 newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i]; 1539 fhold(newfdp->fd_ofiles[i]); 1540 newfdp->fd_lastfile = i; 1541 } else { 1542 if (newfdp->fd_freefile == -1) 1543 newfdp->fd_freefile = i; 1544 } 1545 } 1546 FILEDESC_SUNLOCK(fdp); 1547 FILEDESC_XLOCK(newfdp); 1548 for (i = 0; i <= newfdp->fd_lastfile; ++i) 1549 if (newfdp->fd_ofiles[i] != NULL) 1550 fdused(newfdp, i); 1551 FILEDESC_XUNLOCK(newfdp); 1552 FILEDESC_SLOCK(fdp); 1553 if (newfdp->fd_freefile == -1) 1554 newfdp->fd_freefile = i; 1555 newfdp->fd_cmask = fdp->fd_cmask; 1556 FILEDESC_SUNLOCK(fdp); 1557 return (newfdp); 1558 } 1559 1560 /* 1561 * Release a filedesc structure. 1562 */ 1563 void 1564 fdfree(struct thread *td) 1565 { 1566 struct filedesc *fdp; 1567 struct file **fpp; 1568 int i, locked; 1569 struct filedesc_to_leader *fdtol; 1570 struct file *fp; 1571 struct vnode *cdir, *jdir, *rdir, *vp; 1572 struct flock lf; 1573 1574 /* Certain daemons might not have file descriptors. */ 1575 fdp = td->td_proc->p_fd; 1576 if (fdp == NULL) 1577 return; 1578 1579 /* Check for special need to clear POSIX style locks */ 1580 fdtol = td->td_proc->p_fdtol; 1581 if (fdtol != NULL) { 1582 FILEDESC_XLOCK(fdp); 1583 KASSERT(fdtol->fdl_refcount > 0, 1584 ("filedesc_to_refcount botch: fdl_refcount=%d", 1585 fdtol->fdl_refcount)); 1586 if (fdtol->fdl_refcount == 1 && 1587 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1588 for (i = 0, fpp = fdp->fd_ofiles; 1589 i <= fdp->fd_lastfile; 1590 i++, fpp++) { 1591 if (*fpp == NULL || 1592 (*fpp)->f_type != DTYPE_VNODE) 1593 continue; 1594 fp = *fpp; 1595 fhold(fp); 1596 FILEDESC_XUNLOCK(fdp); 1597 lf.l_whence = SEEK_SET; 1598 lf.l_start = 0; 1599 lf.l_len = 0; 1600 lf.l_type = F_UNLCK; 1601 vp = fp->f_vnode; 1602 locked = VFS_LOCK_GIANT(vp->v_mount); 1603 (void) VOP_ADVLOCK(vp, 1604 (caddr_t)td->td_proc-> 1605 p_leader, 1606 F_UNLCK, 1607 &lf, 1608 F_POSIX); 1609 VFS_UNLOCK_GIANT(locked); 1610 FILEDESC_XLOCK(fdp); 1611 fdrop(fp, td); 1612 fpp = fdp->fd_ofiles + i; 1613 } 1614 } 1615 retry: 1616 if (fdtol->fdl_refcount == 1) { 1617 if (fdp->fd_holdleaderscount > 0 && 1618 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1619 /* 1620 * close() or do_dup() has cleared a reference 1621 * in a shared file descriptor table. 1622 */ 1623 fdp->fd_holdleaderswakeup = 1; 1624 sx_sleep(&fdp->fd_holdleaderscount, 1625 FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0); 1626 goto retry; 1627 } 1628 if (fdtol->fdl_holdcount > 0) { 1629 /* 1630 * Ensure that fdtol->fdl_leader remains 1631 * valid in closef(). 1632 */ 1633 fdtol->fdl_wakeup = 1; 1634 sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK, 1635 "fdlhold", 0); 1636 goto retry; 1637 } 1638 } 1639 fdtol->fdl_refcount--; 1640 if (fdtol->fdl_refcount == 0 && 1641 fdtol->fdl_holdcount == 0) { 1642 fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; 1643 fdtol->fdl_prev->fdl_next = fdtol->fdl_next; 1644 } else 1645 fdtol = NULL; 1646 td->td_proc->p_fdtol = NULL; 1647 FILEDESC_XUNLOCK(fdp); 1648 if (fdtol != NULL) 1649 FREE(fdtol, M_FILEDESC_TO_LEADER); 1650 } 1651 FILEDESC_XLOCK(fdp); 1652 i = --fdp->fd_refcnt; 1653 FILEDESC_XUNLOCK(fdp); 1654 if (i > 0) 1655 return; 1656 /* 1657 * We are the last reference to the structure, so we can 1658 * safely assume it will not change out from under us. 1659 */ 1660 fpp = fdp->fd_ofiles; 1661 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) { 1662 if (*fpp) 1663 (void) closef(*fpp, td); 1664 } 1665 FILEDESC_XLOCK(fdp); 1666 1667 /* XXX This should happen earlier. */ 1668 mtx_lock(&fdesc_mtx); 1669 td->td_proc->p_fd = NULL; 1670 mtx_unlock(&fdesc_mtx); 1671 1672 if (fdp->fd_nfiles > NDFILE) 1673 FREE(fdp->fd_ofiles, M_FILEDESC); 1674 if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE)) 1675 FREE(fdp->fd_map, M_FILEDESC); 1676 1677 fdp->fd_nfiles = 0; 1678 1679 cdir = fdp->fd_cdir; 1680 fdp->fd_cdir = NULL; 1681 rdir = fdp->fd_rdir; 1682 fdp->fd_rdir = NULL; 1683 jdir = fdp->fd_jdir; 1684 fdp->fd_jdir = NULL; 1685 FILEDESC_XUNLOCK(fdp); 1686 1687 if (cdir) { 1688 locked = VFS_LOCK_GIANT(cdir->v_mount); 1689 vrele(cdir); 1690 VFS_UNLOCK_GIANT(locked); 1691 } 1692 if (rdir) { 1693 locked = VFS_LOCK_GIANT(rdir->v_mount); 1694 vrele(rdir); 1695 VFS_UNLOCK_GIANT(locked); 1696 } 1697 if (jdir) { 1698 locked = VFS_LOCK_GIANT(jdir->v_mount); 1699 vrele(jdir); 1700 VFS_UNLOCK_GIANT(locked); 1701 } 1702 1703 fddrop(fdp); 1704 } 1705 1706 /* 1707 * For setugid programs, we don't want to people to use that setugidness 1708 * to generate error messages which write to a file which otherwise would 1709 * otherwise be off-limits to the process. We check for filesystems where 1710 * the vnode can change out from under us after execve (like [lin]procfs). 1711 * 1712 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is 1713 * sufficient. We also don't check for setugidness since we know we are. 1714 */ 1715 static int 1716 is_unsafe(struct file *fp) 1717 { 1718 if (fp->f_type == DTYPE_VNODE) { 1719 struct vnode *vp = fp->f_vnode; 1720 1721 if ((vp->v_vflag & VV_PROCDEP) != 0) 1722 return (1); 1723 } 1724 return (0); 1725 } 1726 1727 /* 1728 * Make this setguid thing safe, if at all possible. 1729 */ 1730 void 1731 setugidsafety(struct thread *td) 1732 { 1733 struct filedesc *fdp; 1734 int i; 1735 1736 /* Certain daemons might not have file descriptors. */ 1737 fdp = td->td_proc->p_fd; 1738 if (fdp == NULL) 1739 return; 1740 1741 /* 1742 * Note: fdp->fd_ofiles may be reallocated out from under us while 1743 * we are blocked in a close. Be careful! 1744 */ 1745 FILEDESC_XLOCK(fdp); 1746 for (i = 0; i <= fdp->fd_lastfile; i++) { 1747 if (i > 2) 1748 break; 1749 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { 1750 struct file *fp; 1751 1752 knote_fdclose(td, i); 1753 /* 1754 * NULL-out descriptor prior to close to avoid 1755 * a race while close blocks. 1756 */ 1757 fp = fdp->fd_ofiles[i]; 1758 fdp->fd_ofiles[i] = NULL; 1759 fdp->fd_ofileflags[i] = 0; 1760 fdunused(fdp, i); 1761 FILEDESC_XUNLOCK(fdp); 1762 (void) closef(fp, td); 1763 FILEDESC_XLOCK(fdp); 1764 } 1765 } 1766 FILEDESC_XUNLOCK(fdp); 1767 } 1768 1769 /* 1770 * If a specific file object occupies a specific file descriptor, close the 1771 * file descriptor entry and drop a reference on the file object. This is a 1772 * convenience function to handle a subsequent error in a function that calls 1773 * falloc() that handles the race that another thread might have closed the 1774 * file descriptor out from under the thread creating the file object. 1775 */ 1776 void 1777 fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td) 1778 { 1779 1780 FILEDESC_XLOCK(fdp); 1781 if (fdp->fd_ofiles[idx] == fp) { 1782 fdp->fd_ofiles[idx] = NULL; 1783 fdunused(fdp, idx); 1784 FILEDESC_XUNLOCK(fdp); 1785 fdrop(fp, td); 1786 } else 1787 FILEDESC_XUNLOCK(fdp); 1788 } 1789 1790 /* 1791 * Close any files on exec? 1792 */ 1793 void 1794 fdcloseexec(struct thread *td) 1795 { 1796 struct filedesc *fdp; 1797 int i; 1798 1799 /* Certain daemons might not have file descriptors. */ 1800 fdp = td->td_proc->p_fd; 1801 if (fdp == NULL) 1802 return; 1803 1804 FILEDESC_XLOCK(fdp); 1805 1806 /* 1807 * We cannot cache fd_ofiles or fd_ofileflags since operations 1808 * may block and rip them out from under us. 1809 */ 1810 for (i = 0; i <= fdp->fd_lastfile; i++) { 1811 if (fdp->fd_ofiles[i] != NULL && 1812 (fdp->fd_ofiles[i]->f_type == DTYPE_MQUEUE || 1813 (fdp->fd_ofileflags[i] & UF_EXCLOSE))) { 1814 struct file *fp; 1815 1816 knote_fdclose(td, i); 1817 /* 1818 * NULL-out descriptor prior to close to avoid 1819 * a race while close blocks. 1820 */ 1821 fp = fdp->fd_ofiles[i]; 1822 fdp->fd_ofiles[i] = NULL; 1823 fdp->fd_ofileflags[i] = 0; 1824 fdunused(fdp, i); 1825 if (fp->f_type == DTYPE_MQUEUE) 1826 mq_fdclose(td, i, fp); 1827 FILEDESC_XUNLOCK(fdp); 1828 (void) closef(fp, td); 1829 FILEDESC_XLOCK(fdp); 1830 } 1831 } 1832 FILEDESC_XUNLOCK(fdp); 1833 } 1834 1835 /* 1836 * It is unsafe for set[ug]id processes to be started with file 1837 * descriptors 0..2 closed, as these descriptors are given implicit 1838 * significance in the Standard C library. fdcheckstd() will create a 1839 * descriptor referencing /dev/null for each of stdin, stdout, and 1840 * stderr that is not already open. 1841 */ 1842 int 1843 fdcheckstd(struct thread *td) 1844 { 1845 struct filedesc *fdp; 1846 register_t retval, save; 1847 int i, error, devnull; 1848 1849 fdp = td->td_proc->p_fd; 1850 if (fdp == NULL) 1851 return (0); 1852 KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); 1853 devnull = -1; 1854 error = 0; 1855 for (i = 0; i < 3; i++) { 1856 if (fdp->fd_ofiles[i] != NULL) 1857 continue; 1858 if (devnull < 0) { 1859 save = td->td_retval[0]; 1860 error = kern_open(td, "/dev/null", UIO_SYSSPACE, 1861 O_RDWR, 0); 1862 devnull = td->td_retval[0]; 1863 KASSERT(devnull == i, ("oof, we didn't get our fd")); 1864 td->td_retval[0] = save; 1865 if (error) 1866 break; 1867 } else { 1868 error = do_dup(td, DUP_FIXED, devnull, i, &retval); 1869 if (error != 0) 1870 break; 1871 } 1872 } 1873 return (error); 1874 } 1875 1876 /* 1877 * Internal form of close. Decrement reference count on file structure. 1878 * Note: td may be NULL when closing a file that was being passed in a 1879 * message. 1880 * 1881 * XXXRW: Giant is not required for the caller, but often will be held; this 1882 * makes it moderately likely the Giant will be recursed in the VFS case. 1883 */ 1884 int 1885 closef(struct file *fp, struct thread *td) 1886 { 1887 struct vnode *vp; 1888 struct flock lf; 1889 struct filedesc_to_leader *fdtol; 1890 struct filedesc *fdp; 1891 1892 /* 1893 * POSIX record locking dictates that any close releases ALL 1894 * locks owned by this process. This is handled by setting 1895 * a flag in the unlock to free ONLY locks obeying POSIX 1896 * semantics, and not to free BSD-style file locks. 1897 * If the descriptor was in a message, POSIX-style locks 1898 * aren't passed with the descriptor, and the thread pointer 1899 * will be NULL. Callers should be careful only to pass a 1900 * NULL thread pointer when there really is no owning 1901 * context that might have locks, or the locks will be 1902 * leaked. 1903 */ 1904 if (fp->f_type == DTYPE_VNODE && td != NULL) { 1905 int vfslocked; 1906 1907 vp = fp->f_vnode; 1908 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1909 if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1910 lf.l_whence = SEEK_SET; 1911 lf.l_start = 0; 1912 lf.l_len = 0; 1913 lf.l_type = F_UNLCK; 1914 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, 1915 F_UNLCK, &lf, F_POSIX); 1916 } 1917 fdtol = td->td_proc->p_fdtol; 1918 if (fdtol != NULL) { 1919 /* 1920 * Handle special case where file descriptor table is 1921 * shared between multiple process leaders. 1922 */ 1923 fdp = td->td_proc->p_fd; 1924 FILEDESC_XLOCK(fdp); 1925 for (fdtol = fdtol->fdl_next; 1926 fdtol != td->td_proc->p_fdtol; 1927 fdtol = fdtol->fdl_next) { 1928 if ((fdtol->fdl_leader->p_flag & 1929 P_ADVLOCK) == 0) 1930 continue; 1931 fdtol->fdl_holdcount++; 1932 FILEDESC_XUNLOCK(fdp); 1933 lf.l_whence = SEEK_SET; 1934 lf.l_start = 0; 1935 lf.l_len = 0; 1936 lf.l_type = F_UNLCK; 1937 vp = fp->f_vnode; 1938 (void) VOP_ADVLOCK(vp, 1939 (caddr_t)fdtol->fdl_leader, 1940 F_UNLCK, &lf, F_POSIX); 1941 FILEDESC_XLOCK(fdp); 1942 fdtol->fdl_holdcount--; 1943 if (fdtol->fdl_holdcount == 0 && 1944 fdtol->fdl_wakeup != 0) { 1945 fdtol->fdl_wakeup = 0; 1946 wakeup(fdtol); 1947 } 1948 } 1949 FILEDESC_XUNLOCK(fdp); 1950 } 1951 VFS_UNLOCK_GIANT(vfslocked); 1952 } 1953 return (fdrop(fp, td)); 1954 } 1955 1956 /* 1957 * Initialize the file pointer with the specified properties. 1958 * 1959 * The ops are set with release semantics to be certain that the flags, type, 1960 * and data are visible when ops is. This is to prevent ops methods from being 1961 * called with bad data. 1962 */ 1963 void 1964 finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops) 1965 { 1966 fp->f_data = data; 1967 fp->f_flag = flag; 1968 fp->f_type = type; 1969 atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops); 1970 } 1971 1972 1973 /* 1974 * Extract the file pointer associated with the specified descriptor for the 1975 * current user process. 1976 * 1977 * If the descriptor doesn't exist, EBADF is returned. 1978 * 1979 * If the descriptor exists but doesn't match 'flags' then return EBADF for 1980 * read attempts and EINVAL for write attempts. 1981 * 1982 * If 'hold' is set (non-zero) the file's refcount will be bumped on return. 1983 * It should be dropped with fdrop(). If it is not set, then the refcount 1984 * will not be bumped however the thread's filedesc struct will be returned 1985 * locked (for fgetsock). 1986 * 1987 * If an error occured the non-zero error is returned and *fpp is set to 1988 * NULL. Otherwise *fpp is set and zero is returned. 1989 */ 1990 static __inline int 1991 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold) 1992 { 1993 struct filedesc *fdp; 1994 struct file *fp; 1995 1996 *fpp = NULL; 1997 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) 1998 return (EBADF); 1999 FILEDESC_SLOCK(fdp); 2000 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) { 2001 FILEDESC_SUNLOCK(fdp); 2002 return (EBADF); 2003 } 2004 2005 /* 2006 * FREAD and FWRITE failure return EBADF as per POSIX. 2007 * 2008 * Only one flag, or 0, may be specified. 2009 */ 2010 if (flags == FREAD && (fp->f_flag & FREAD) == 0) { 2011 FILEDESC_SUNLOCK(fdp); 2012 return (EBADF); 2013 } 2014 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) { 2015 FILEDESC_SUNLOCK(fdp); 2016 return (EBADF); 2017 } 2018 if (hold) { 2019 fhold(fp); 2020 FILEDESC_SUNLOCK(fdp); 2021 } 2022 *fpp = fp; 2023 return (0); 2024 } 2025 2026 int 2027 fget(struct thread *td, int fd, struct file **fpp) 2028 { 2029 2030 return(_fget(td, fd, fpp, 0, 1)); 2031 } 2032 2033 int 2034 fget_read(struct thread *td, int fd, struct file **fpp) 2035 { 2036 2037 return(_fget(td, fd, fpp, FREAD, 1)); 2038 } 2039 2040 int 2041 fget_write(struct thread *td, int fd, struct file **fpp) 2042 { 2043 2044 return(_fget(td, fd, fpp, FWRITE, 1)); 2045 } 2046 2047 /* 2048 * Like fget() but loads the underlying vnode, or returns an error if the 2049 * descriptor does not represent a vnode. Note that pipes use vnodes but 2050 * never have VM objects. The returned vnode will be vref()'d. 2051 * 2052 * XXX: what about the unused flags ? 2053 */ 2054 static __inline int 2055 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags) 2056 { 2057 struct file *fp; 2058 int error; 2059 2060 *vpp = NULL; 2061 if ((error = _fget(td, fd, &fp, flags, 0)) != 0) 2062 return (error); 2063 if (fp->f_vnode == NULL) { 2064 error = EINVAL; 2065 } else { 2066 *vpp = fp->f_vnode; 2067 vref(*vpp); 2068 } 2069 FILEDESC_SUNLOCK(td->td_proc->p_fd); 2070 return (error); 2071 } 2072 2073 int 2074 fgetvp(struct thread *td, int fd, struct vnode **vpp) 2075 { 2076 2077 return (_fgetvp(td, fd, vpp, 0)); 2078 } 2079 2080 int 2081 fgetvp_read(struct thread *td, int fd, struct vnode **vpp) 2082 { 2083 2084 return (_fgetvp(td, fd, vpp, FREAD)); 2085 } 2086 2087 #ifdef notyet 2088 int 2089 fgetvp_write(struct thread *td, int fd, struct vnode **vpp) 2090 { 2091 2092 return (_fgetvp(td, fd, vpp, FWRITE)); 2093 } 2094 #endif 2095 2096 /* 2097 * Like fget() but loads the underlying socket, or returns an error if the 2098 * descriptor does not represent a socket. 2099 * 2100 * We bump the ref count on the returned socket. XXX Also obtain the SX lock 2101 * in the future. 2102 * 2103 * XXXRW: fgetsock() and fputsock() are deprecated, as consumers should rely 2104 * on their file descriptor reference to prevent the socket from being free'd 2105 * during use. 2106 */ 2107 int 2108 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp) 2109 { 2110 struct file *fp; 2111 int error; 2112 2113 *spp = NULL; 2114 if (fflagp != NULL) 2115 *fflagp = 0; 2116 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2117 return (error); 2118 if (fp->f_type != DTYPE_SOCKET) { 2119 error = ENOTSOCK; 2120 } else { 2121 *spp = fp->f_data; 2122 if (fflagp) 2123 *fflagp = fp->f_flag; 2124 SOCK_LOCK(*spp); 2125 soref(*spp); 2126 SOCK_UNLOCK(*spp); 2127 } 2128 FILEDESC_SUNLOCK(td->td_proc->p_fd); 2129 return (error); 2130 } 2131 2132 /* 2133 * Drop the reference count on the socket and XXX release the SX lock in the 2134 * future. The last reference closes the socket. 2135 * 2136 * XXXRW: fputsock() is deprecated, see comment for fgetsock(). 2137 */ 2138 void 2139 fputsock(struct socket *so) 2140 { 2141 2142 ACCEPT_LOCK(); 2143 SOCK_LOCK(so); 2144 sorele(so); 2145 } 2146 2147 /* 2148 * Handle the last reference to a file being closed. 2149 */ 2150 int 2151 _fdrop(struct file *fp, struct thread *td) 2152 { 2153 int error; 2154 2155 error = 0; 2156 if (fp->f_count != 0) 2157 panic("fdrop: count %d", fp->f_count); 2158 if (fp->f_ops != &badfileops) 2159 error = fo_close(fp, td); 2160 atomic_subtract_int(&openfiles, 1); 2161 crfree(fp->f_cred); 2162 uma_zfree(file_zone, fp); 2163 2164 return (error); 2165 } 2166 2167 /* 2168 * Apply an advisory lock on a file descriptor. 2169 * 2170 * Just attempt to get a record lock of the requested type on the entire file 2171 * (l_whence = SEEK_SET, l_start = 0, l_len = 0). 2172 */ 2173 #ifndef _SYS_SYSPROTO_H_ 2174 struct flock_args { 2175 int fd; 2176 int how; 2177 }; 2178 #endif 2179 /* ARGSUSED */ 2180 int 2181 flock(struct thread *td, struct flock_args *uap) 2182 { 2183 struct file *fp; 2184 struct vnode *vp; 2185 struct flock lf; 2186 int vfslocked; 2187 int error; 2188 2189 if ((error = fget(td, uap->fd, &fp)) != 0) 2190 return (error); 2191 if (fp->f_type != DTYPE_VNODE) { 2192 fdrop(fp, td); 2193 return (EOPNOTSUPP); 2194 } 2195 2196 vp = fp->f_vnode; 2197 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2198 lf.l_whence = SEEK_SET; 2199 lf.l_start = 0; 2200 lf.l_len = 0; 2201 if (uap->how & LOCK_UN) { 2202 lf.l_type = F_UNLCK; 2203 atomic_clear_int(&fp->f_flag, FHASLOCK); 2204 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 2205 goto done2; 2206 } 2207 if (uap->how & LOCK_EX) 2208 lf.l_type = F_WRLCK; 2209 else if (uap->how & LOCK_SH) 2210 lf.l_type = F_RDLCK; 2211 else { 2212 error = EBADF; 2213 goto done2; 2214 } 2215 atomic_set_int(&fp->f_flag, FHASLOCK); 2216 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 2217 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); 2218 done2: 2219 fdrop(fp, td); 2220 VFS_UNLOCK_GIANT(vfslocked); 2221 return (error); 2222 } 2223 /* 2224 * Duplicate the specified descriptor to a free descriptor. 2225 */ 2226 int 2227 dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error) 2228 { 2229 struct file *wfp; 2230 struct file *fp; 2231 2232 /* 2233 * If the to-be-dup'd fd number is greater than the allowed number 2234 * of file descriptors, or the fd to be dup'd has already been 2235 * closed, then reject. 2236 */ 2237 FILEDESC_XLOCK(fdp); 2238 if (dfd < 0 || dfd >= fdp->fd_nfiles || 2239 (wfp = fdp->fd_ofiles[dfd]) == NULL) { 2240 FILEDESC_XUNLOCK(fdp); 2241 return (EBADF); 2242 } 2243 2244 /* 2245 * There are two cases of interest here. 2246 * 2247 * For ENODEV simply dup (dfd) to file descriptor (indx) and return. 2248 * 2249 * For ENXIO steal away the file structure from (dfd) and store it in 2250 * (indx). (dfd) is effectively closed by this operation. 2251 * 2252 * Any other error code is just returned. 2253 */ 2254 switch (error) { 2255 case ENODEV: 2256 /* 2257 * Check that the mode the file is being opened for is a 2258 * subset of the mode of the existing descriptor. 2259 */ 2260 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { 2261 FILEDESC_XUNLOCK(fdp); 2262 return (EACCES); 2263 } 2264 fp = fdp->fd_ofiles[indx]; 2265 fdp->fd_ofiles[indx] = wfp; 2266 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2267 if (fp == NULL) 2268 fdused(fdp, indx); 2269 fhold(wfp); 2270 FILEDESC_XUNLOCK(fdp); 2271 if (fp != NULL) 2272 /* 2273 * We now own the reference to fp that the ofiles[] 2274 * array used to own. Release it. 2275 */ 2276 fdrop(fp, td); 2277 return (0); 2278 2279 case ENXIO: 2280 /* 2281 * Steal away the file pointer from dfd and stuff it into indx. 2282 */ 2283 fp = fdp->fd_ofiles[indx]; 2284 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 2285 fdp->fd_ofiles[dfd] = NULL; 2286 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2287 fdp->fd_ofileflags[dfd] = 0; 2288 fdunused(fdp, dfd); 2289 if (fp == NULL) 2290 fdused(fdp, indx); 2291 FILEDESC_XUNLOCK(fdp); 2292 2293 /* 2294 * We now own the reference to fp that the ofiles[] array 2295 * used to own. Release it. 2296 */ 2297 if (fp != NULL) 2298 fdrop(fp, td); 2299 return (0); 2300 2301 default: 2302 FILEDESC_XUNLOCK(fdp); 2303 return (error); 2304 } 2305 /* NOTREACHED */ 2306 } 2307 2308 /* 2309 * Scan all active processes to see if any of them have a current or root 2310 * directory of `olddp'. If so, replace them with the new mount point. 2311 */ 2312 void 2313 mountcheckdirs(struct vnode *olddp, struct vnode *newdp) 2314 { 2315 struct filedesc *fdp; 2316 struct proc *p; 2317 int nrele; 2318 2319 if (vrefcnt(olddp) == 1) 2320 return; 2321 sx_slock(&allproc_lock); 2322 FOREACH_PROC_IN_SYSTEM(p) { 2323 fdp = fdhold(p); 2324 if (fdp == NULL) 2325 continue; 2326 nrele = 0; 2327 FILEDESC_XLOCK(fdp); 2328 if (fdp->fd_cdir == olddp) { 2329 vref(newdp); 2330 fdp->fd_cdir = newdp; 2331 nrele++; 2332 } 2333 if (fdp->fd_rdir == olddp) { 2334 vref(newdp); 2335 fdp->fd_rdir = newdp; 2336 nrele++; 2337 } 2338 FILEDESC_XUNLOCK(fdp); 2339 fddrop(fdp); 2340 while (nrele--) 2341 vrele(olddp); 2342 } 2343 sx_sunlock(&allproc_lock); 2344 if (rootvnode == olddp) { 2345 vrele(rootvnode); 2346 vref(newdp); 2347 rootvnode = newdp; 2348 } 2349 } 2350 2351 struct filedesc_to_leader * 2352 filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader) 2353 { 2354 struct filedesc_to_leader *fdtol; 2355 2356 MALLOC(fdtol, struct filedesc_to_leader *, 2357 sizeof(struct filedesc_to_leader), 2358 M_FILEDESC_TO_LEADER, 2359 M_WAITOK); 2360 fdtol->fdl_refcount = 1; 2361 fdtol->fdl_holdcount = 0; 2362 fdtol->fdl_wakeup = 0; 2363 fdtol->fdl_leader = leader; 2364 if (old != NULL) { 2365 FILEDESC_XLOCK(fdp); 2366 fdtol->fdl_next = old->fdl_next; 2367 fdtol->fdl_prev = old; 2368 old->fdl_next = fdtol; 2369 fdtol->fdl_next->fdl_prev = fdtol; 2370 FILEDESC_XUNLOCK(fdp); 2371 } else { 2372 fdtol->fdl_next = fdtol; 2373 fdtol->fdl_prev = fdtol; 2374 } 2375 return (fdtol); 2376 } 2377 2378 /* 2379 * Get file structures globally. 2380 */ 2381 static int 2382 sysctl_kern_file(SYSCTL_HANDLER_ARGS) 2383 { 2384 struct xfile xf; 2385 struct filedesc *fdp; 2386 struct file *fp; 2387 struct proc *p; 2388 int error, n; 2389 2390 error = sysctl_wire_old_buffer(req, 0); 2391 if (error != 0) 2392 return (error); 2393 if (req->oldptr == NULL) { 2394 n = 0; 2395 sx_slock(&allproc_lock); 2396 FOREACH_PROC_IN_SYSTEM(p) { 2397 if (p->p_state == PRS_NEW) 2398 continue; 2399 fdp = fdhold(p); 2400 if (fdp == NULL) 2401 continue; 2402 /* overestimates sparse tables. */ 2403 if (fdp->fd_lastfile > 0) 2404 n += fdp->fd_lastfile; 2405 fddrop(fdp); 2406 } 2407 sx_sunlock(&allproc_lock); 2408 return (SYSCTL_OUT(req, 0, n * sizeof(xf))); 2409 } 2410 error = 0; 2411 bzero(&xf, sizeof(xf)); 2412 xf.xf_size = sizeof(xf); 2413 sx_slock(&allproc_lock); 2414 FOREACH_PROC_IN_SYSTEM(p) { 2415 if (p->p_state == PRS_NEW) 2416 continue; 2417 PROC_LOCK(p); 2418 if (p_cansee(req->td, p) != 0) { 2419 PROC_UNLOCK(p); 2420 continue; 2421 } 2422 xf.xf_pid = p->p_pid; 2423 xf.xf_uid = p->p_ucred->cr_uid; 2424 PROC_UNLOCK(p); 2425 fdp = fdhold(p); 2426 if (fdp == NULL) 2427 continue; 2428 FILEDESC_SLOCK(fdp); 2429 for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) { 2430 if ((fp = fdp->fd_ofiles[n]) == NULL) 2431 continue; 2432 xf.xf_fd = n; 2433 xf.xf_file = fp; 2434 xf.xf_data = fp->f_data; 2435 xf.xf_vnode = fp->f_vnode; 2436 xf.xf_type = fp->f_type; 2437 xf.xf_count = fp->f_count; 2438 xf.xf_msgcount = 0; 2439 xf.xf_offset = fp->f_offset; 2440 xf.xf_flag = fp->f_flag; 2441 error = SYSCTL_OUT(req, &xf, sizeof(xf)); 2442 if (error) 2443 break; 2444 } 2445 FILEDESC_SUNLOCK(fdp); 2446 fddrop(fdp); 2447 if (error) 2448 break; 2449 } 2450 sx_sunlock(&allproc_lock); 2451 return (error); 2452 } 2453 2454 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, 2455 0, 0, sysctl_kern_file, "S,xfile", "Entire file table"); 2456 2457 static int 2458 export_vnode_for_sysctl(struct vnode *vp, int type, 2459 struct kinfo_file *kif, struct filedesc *fdp, struct sysctl_req *req) 2460 { 2461 int error; 2462 char *fullpath, *freepath; 2463 int vfslocked; 2464 2465 bzero(kif, sizeof(*kif)); 2466 kif->kf_structsize = sizeof(*kif); 2467 2468 vref(vp); 2469 kif->kf_fd = type; 2470 kif->kf_type = KF_TYPE_VNODE; 2471 /* This function only handles directories. */ 2472 KASSERT(vp->v_type == VDIR, ("export_vnode_for_sysctl: vnode not directory")); 2473 kif->kf_vnode_type = KF_VTYPE_VDIR; 2474 2475 /* 2476 * This is not a true file descriptor, so we set a bogus refcount 2477 * and offset to indicate these fields should be ignored. 2478 */ 2479 kif->kf_ref_count = -1; 2480 kif->kf_offset = -1; 2481 2482 freepath = NULL; 2483 fullpath = "-"; 2484 FILEDESC_SUNLOCK(fdp); 2485 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2486 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2487 vn_fullpath(curthread, vp, &fullpath, &freepath); 2488 vput(vp); 2489 VFS_UNLOCK_GIANT(vfslocked); 2490 strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path)); 2491 if (freepath != NULL) 2492 free(freepath, M_TEMP); 2493 error = SYSCTL_OUT(req, kif, sizeof(*kif)); 2494 FILEDESC_SLOCK(fdp); 2495 return (error); 2496 } 2497 2498 /* 2499 * Get per-process file descriptors for use by procstat(1), et al. 2500 */ 2501 static int 2502 sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) 2503 { 2504 char *fullpath, *freepath; 2505 struct kinfo_file *kif; 2506 struct filedesc *fdp; 2507 int error, i, *name; 2508 struct socket *so; 2509 struct vnode *vp; 2510 struct file *fp; 2511 struct proc *p; 2512 int vfslocked; 2513 2514 name = (int *)arg1; 2515 if ((p = pfind((pid_t)name[0])) == NULL) 2516 return (ESRCH); 2517 if ((error = p_candebug(curthread, p))) { 2518 PROC_UNLOCK(p); 2519 return (error); 2520 } 2521 fdp = fdhold(p); 2522 PROC_UNLOCK(p); 2523 kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK); 2524 FILEDESC_SLOCK(fdp); 2525 if (fdp->fd_cdir != NULL) 2526 export_vnode_for_sysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif, 2527 fdp, req); 2528 if (fdp->fd_rdir != NULL) 2529 export_vnode_for_sysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif, 2530 fdp, req); 2531 if (fdp->fd_jdir != NULL) 2532 export_vnode_for_sysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif, 2533 fdp, req); 2534 for (i = 0; i < fdp->fd_nfiles; i++) { 2535 if ((fp = fdp->fd_ofiles[i]) == NULL) 2536 continue; 2537 bzero(kif, sizeof(*kif)); 2538 kif->kf_structsize = sizeof(*kif); 2539 vp = NULL; 2540 so = NULL; 2541 kif->kf_fd = i; 2542 switch (fp->f_type) { 2543 case DTYPE_VNODE: 2544 kif->kf_type = KF_TYPE_VNODE; 2545 vp = fp->f_vnode; 2546 break; 2547 2548 case DTYPE_SOCKET: 2549 kif->kf_type = KF_TYPE_SOCKET; 2550 so = fp->f_data; 2551 break; 2552 2553 case DTYPE_PIPE: 2554 kif->kf_type = KF_TYPE_PIPE; 2555 break; 2556 2557 case DTYPE_FIFO: 2558 kif->kf_type = KF_TYPE_FIFO; 2559 vp = fp->f_vnode; 2560 vref(vp); 2561 break; 2562 2563 case DTYPE_KQUEUE: 2564 kif->kf_type = KF_TYPE_KQUEUE; 2565 break; 2566 2567 case DTYPE_CRYPTO: 2568 kif->kf_type = KF_TYPE_CRYPTO; 2569 break; 2570 2571 case DTYPE_MQUEUE: 2572 kif->kf_type = KF_TYPE_MQUEUE; 2573 break; 2574 2575 case DTYPE_SHM: 2576 kif->kf_type = KF_TYPE_SHM; 2577 break; 2578 2579 default: 2580 kif->kf_type = KF_TYPE_UNKNOWN; 2581 break; 2582 } 2583 kif->kf_ref_count = fp->f_count; 2584 if (fp->f_flag & FREAD) 2585 kif->kf_flags |= KF_FLAG_READ; 2586 if (fp->f_flag & FWRITE) 2587 kif->kf_flags |= KF_FLAG_WRITE; 2588 if (fp->f_flag & FAPPEND) 2589 kif->kf_flags |= KF_FLAG_APPEND; 2590 if (fp->f_flag & FASYNC) 2591 kif->kf_flags |= KF_FLAG_ASYNC; 2592 if (fp->f_flag & FFSYNC) 2593 kif->kf_flags |= KF_FLAG_FSYNC; 2594 if (fp->f_flag & FNONBLOCK) 2595 kif->kf_flags |= KF_FLAG_NONBLOCK; 2596 if (fp->f_flag & O_DIRECT) 2597 kif->kf_flags |= KF_FLAG_DIRECT; 2598 if (fp->f_flag & FHASLOCK) 2599 kif->kf_flags |= KF_FLAG_HASLOCK; 2600 kif->kf_offset = fp->f_offset; 2601 if (vp != NULL) { 2602 vref(vp); 2603 switch (vp->v_type) { 2604 case VNON: 2605 kif->kf_vnode_type = KF_VTYPE_VNON; 2606 break; 2607 case VREG: 2608 kif->kf_vnode_type = KF_VTYPE_VREG; 2609 break; 2610 case VDIR: 2611 kif->kf_vnode_type = KF_VTYPE_VDIR; 2612 break; 2613 case VBLK: 2614 kif->kf_vnode_type = KF_VTYPE_VBLK; 2615 break; 2616 case VCHR: 2617 kif->kf_vnode_type = KF_VTYPE_VCHR; 2618 break; 2619 case VLNK: 2620 kif->kf_vnode_type = KF_VTYPE_VLNK; 2621 break; 2622 case VSOCK: 2623 kif->kf_vnode_type = KF_VTYPE_VSOCK; 2624 break; 2625 case VFIFO: 2626 kif->kf_vnode_type = KF_VTYPE_VFIFO; 2627 break; 2628 case VBAD: 2629 kif->kf_vnode_type = KF_VTYPE_VBAD; 2630 break; 2631 default: 2632 kif->kf_vnode_type = KF_VTYPE_UNKNOWN; 2633 break; 2634 } 2635 /* 2636 * It is OK to drop the filedesc lock here as we will 2637 * re-validate and re-evaluate its properties when 2638 * the loop continues. 2639 */ 2640 freepath = NULL; 2641 fullpath = "-"; 2642 FILEDESC_SUNLOCK(fdp); 2643 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2644 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2645 vn_fullpath(curthread, vp, &fullpath, &freepath); 2646 vput(vp); 2647 VFS_UNLOCK_GIANT(vfslocked); 2648 strlcpy(kif->kf_path, fullpath, 2649 sizeof(kif->kf_path)); 2650 if (freepath != NULL) 2651 free(freepath, M_TEMP); 2652 FILEDESC_SLOCK(fdp); 2653 } 2654 if (so != NULL) { 2655 struct sockaddr *sa; 2656 2657 if (so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa) 2658 == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) { 2659 bcopy(sa, &kif->kf_sa_local, sa->sa_len); 2660 free(sa, M_SONAME); 2661 } 2662 if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa) 2663 == 00 && sa->sa_len <= sizeof(kif->kf_sa_peer)) { 2664 bcopy(sa, &kif->kf_sa_peer, sa->sa_len); 2665 free(sa, M_SONAME); 2666 } 2667 kif->kf_sock_domain = 2668 so->so_proto->pr_domain->dom_family; 2669 kif->kf_sock_type = so->so_type; 2670 kif->kf_sock_protocol = so->so_proto->pr_protocol; 2671 } 2672 error = SYSCTL_OUT(req, kif, sizeof(*kif)); 2673 if (error) 2674 break; 2675 } 2676 FILEDESC_SUNLOCK(fdp); 2677 fddrop(fdp); 2678 free(kif, M_TEMP); 2679 return (0); 2680 } 2681 2682 static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc, CTLFLAG_RD, 2683 sysctl_kern_proc_filedesc, "Process filedesc entries"); 2684 2685 #ifdef DDB 2686 /* 2687 * For the purposes of debugging, generate a human-readable string for the 2688 * file type. 2689 */ 2690 static const char * 2691 file_type_to_name(short type) 2692 { 2693 2694 switch (type) { 2695 case 0: 2696 return ("zero"); 2697 case DTYPE_VNODE: 2698 return ("vnod"); 2699 case DTYPE_SOCKET: 2700 return ("sock"); 2701 case DTYPE_PIPE: 2702 return ("pipe"); 2703 case DTYPE_FIFO: 2704 return ("fifo"); 2705 case DTYPE_KQUEUE: 2706 return ("kque"); 2707 case DTYPE_CRYPTO: 2708 return ("crpt"); 2709 case DTYPE_MQUEUE: 2710 return ("mque"); 2711 case DTYPE_SHM: 2712 return ("shm"); 2713 default: 2714 return ("unkn"); 2715 } 2716 } 2717 2718 /* 2719 * For the purposes of debugging, identify a process (if any, perhaps one of 2720 * many) that references the passed file in its file descriptor array. Return 2721 * NULL if none. 2722 */ 2723 static struct proc * 2724 file_to_first_proc(struct file *fp) 2725 { 2726 struct filedesc *fdp; 2727 struct proc *p; 2728 int n; 2729 2730 FOREACH_PROC_IN_SYSTEM(p) { 2731 if (p->p_state == PRS_NEW) 2732 continue; 2733 fdp = p->p_fd; 2734 if (fdp == NULL) 2735 continue; 2736 for (n = 0; n < fdp->fd_nfiles; n++) { 2737 if (fp == fdp->fd_ofiles[n]) 2738 return (p); 2739 } 2740 } 2741 return (NULL); 2742 } 2743 2744 static void 2745 db_print_file(struct file *fp, int header) 2746 { 2747 struct proc *p; 2748 2749 if (header) 2750 db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n", 2751 "File", "Type", "Data", "Flag", "GCFl", "Count", 2752 "MCount", "Vnode", "FPID", "FCmd"); 2753 p = file_to_first_proc(fp); 2754 db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp, 2755 file_type_to_name(fp->f_type), fp->f_data, fp->f_flag, 2756 0, fp->f_count, 0, fp->f_vnode, 2757 p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-"); 2758 } 2759 2760 DB_SHOW_COMMAND(file, db_show_file) 2761 { 2762 struct file *fp; 2763 2764 if (!have_addr) { 2765 db_printf("usage: show file <addr>\n"); 2766 return; 2767 } 2768 fp = (struct file *)addr; 2769 db_print_file(fp, 1); 2770 } 2771 2772 DB_SHOW_COMMAND(files, db_show_files) 2773 { 2774 struct filedesc *fdp; 2775 struct file *fp; 2776 struct proc *p; 2777 int header; 2778 int n; 2779 2780 header = 1; 2781 FOREACH_PROC_IN_SYSTEM(p) { 2782 if (p->p_state == PRS_NEW) 2783 continue; 2784 if ((fdp = p->p_fd) == NULL) 2785 continue; 2786 for (n = 0; n < fdp->fd_nfiles; ++n) { 2787 if ((fp = fdp->fd_ofiles[n]) == NULL) 2788 continue; 2789 db_print_file(fp, header); 2790 header = 0; 2791 } 2792 } 2793 } 2794 #endif 2795 2796 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, 2797 &maxfilesperproc, 0, "Maximum files allowed open per process"); 2798 2799 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, 2800 &maxfiles, 0, "Maximum number of files"); 2801 2802 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, 2803 __DEVOLATILE(int *, &openfiles), 0, "System-wide number of open files"); 2804 2805 /* ARGSUSED*/ 2806 static void 2807 filelistinit(void *dummy) 2808 { 2809 2810 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, 2811 NULL, NULL, UMA_ALIGN_PTR, 0); 2812 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); 2813 mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF); 2814 } 2815 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL) 2816 2817 /*-------------------------------------------------------------------*/ 2818 2819 static int 2820 badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) 2821 { 2822 2823 return (EBADF); 2824 } 2825 2826 static int 2827 badfo_truncate(struct file *fp, off_t length, struct ucred *active_cred, struct thread *td) 2828 { 2829 2830 return (EINVAL); 2831 } 2832 2833 static int 2834 badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td) 2835 { 2836 2837 return (EBADF); 2838 } 2839 2840 static int 2841 badfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) 2842 { 2843 2844 return (0); 2845 } 2846 2847 static int 2848 badfo_kqfilter(struct file *fp, struct knote *kn) 2849 { 2850 2851 return (EBADF); 2852 } 2853 2854 static int 2855 badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td) 2856 { 2857 2858 return (EBADF); 2859 } 2860 2861 static int 2862 badfo_close(struct file *fp, struct thread *td) 2863 { 2864 2865 return (EBADF); 2866 } 2867 2868 struct fileops badfileops = { 2869 .fo_read = badfo_readwrite, 2870 .fo_write = badfo_readwrite, 2871 .fo_truncate = badfo_truncate, 2872 .fo_ioctl = badfo_ioctl, 2873 .fo_poll = badfo_poll, 2874 .fo_kqfilter = badfo_kqfilter, 2875 .fo_stat = badfo_stat, 2876 .fo_close = badfo_close, 2877 }; 2878 2879 2880 /*-------------------------------------------------------------------*/ 2881 2882 /* 2883 * File Descriptor pseudo-device driver (/dev/fd/). 2884 * 2885 * Opening minor device N dup()s the file (if any) connected to file 2886 * descriptor N belonging to the calling process. Note that this driver 2887 * consists of only the ``open()'' routine, because all subsequent 2888 * references to this file will be direct to the other driver. 2889 * 2890 * XXX: we could give this one a cloning event handler if necessary. 2891 */ 2892 2893 /* ARGSUSED */ 2894 static int 2895 fdopen(struct cdev *dev, int mode, int type, struct thread *td) 2896 { 2897 2898 /* 2899 * XXX Kludge: set curthread->td_dupfd to contain the value of the 2900 * the file descriptor being sought for duplication. The error 2901 * return ensures that the vnode for this device will be released 2902 * by vn_open. Open will detect this special error and take the 2903 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 2904 * will simply report the error. 2905 */ 2906 td->td_dupfd = dev2unit(dev); 2907 return (ENODEV); 2908 } 2909 2910 static struct cdevsw fildesc_cdevsw = { 2911 .d_version = D_VERSION, 2912 .d_flags = D_NEEDGIANT, 2913 .d_open = fdopen, 2914 .d_name = "FD", 2915 }; 2916 2917 static void 2918 fildesc_drvinit(void *unused) 2919 { 2920 struct cdev *dev; 2921 2922 dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0"); 2923 make_dev_alias(dev, "stdin"); 2924 dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1"); 2925 make_dev_alias(dev, "stdout"); 2926 dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2"); 2927 make_dev_alias(dev, "stderr"); 2928 } 2929 2930 SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL) 2931