1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_compat.h" 41 #include "opt_ddb.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 46 #include <sys/conf.h> 47 #include <sys/fcntl.h> 48 #include <sys/file.h> 49 #include <sys/filedesc.h> 50 #include <sys/filio.h> 51 #include <sys/jail.h> 52 #include <sys/kernel.h> 53 #include <sys/limits.h> 54 #include <sys/lock.h> 55 #include <sys/malloc.h> 56 #include <sys/mount.h> 57 #include <sys/mqueue.h> 58 #include <sys/mutex.h> 59 #include <sys/namei.h> 60 #include <sys/priv.h> 61 #include <sys/proc.h> 62 #include <sys/resourcevar.h> 63 #include <sys/signalvar.h> 64 #include <sys/socketvar.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/syscallsubr.h> 68 #include <sys/sysctl.h> 69 #include <sys/sysproto.h> 70 #include <sys/unistd.h> 71 #include <sys/vnode.h> 72 73 #include <security/audit/audit.h> 74 75 #include <vm/uma.h> 76 77 #include <ddb/ddb.h> 78 79 static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table"); 80 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader", 81 "file desc to leader structures"); 82 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); 83 84 static uma_zone_t file_zone; 85 86 87 /* How to treat 'new' parameter when allocating a fd for do_dup(). */ 88 enum dup_type { DUP_VARIABLE, DUP_FIXED }; 89 90 static int do_dup(struct thread *td, enum dup_type type, int old, int new, 91 register_t *retval); 92 static int fd_first_free(struct filedesc *, int, int); 93 static int fd_last_used(struct filedesc *, int, int); 94 static void fdgrowtable(struct filedesc *, int); 95 static int fdrop_locked(struct file *fp, struct thread *td); 96 static void fdunused(struct filedesc *fdp, int fd); 97 static void fdused(struct filedesc *fdp, int fd); 98 99 /* 100 * A process is initially started out with NDFILE descriptors stored within 101 * this structure, selected to be enough for typical applications based on 102 * the historical limit of 20 open files (and the usage of descriptors by 103 * shells). If these descriptors are exhausted, a larger descriptor table 104 * may be allocated, up to a process' resource limit; the internal arrays 105 * are then unused. 106 */ 107 #define NDFILE 20 108 #define NDSLOTSIZE sizeof(NDSLOTTYPE) 109 #define NDENTRIES (NDSLOTSIZE * __CHAR_BIT) 110 #define NDSLOT(x) ((x) / NDENTRIES) 111 #define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES)) 112 #define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES) 113 114 /* 115 * Storage required per open file descriptor. 116 */ 117 #define OFILESIZE (sizeof(struct file *) + sizeof(char)) 118 119 /* 120 * Basic allocation of descriptors: 121 * one of the above, plus arrays for NDFILE descriptors. 122 */ 123 struct filedesc0 { 124 struct filedesc fd_fd; 125 /* 126 * These arrays are used when the number of open files is 127 * <= NDFILE, and are then pointed to by the pointers above. 128 */ 129 struct file *fd_dfiles[NDFILE]; 130 char fd_dfileflags[NDFILE]; 131 NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; 132 }; 133 134 /* 135 * Descriptor management. 136 */ 137 struct filelist filehead; /* head of list of open files */ 138 int openfiles; /* actual number of open files */ 139 struct sx filelist_lock; /* sx to protect filelist */ 140 struct mtx sigio_lock; /* mtx to protect pointers to sigio */ 141 void (*mq_fdclose)(struct thread *td, int fd, struct file *fp); 142 143 /* A mutex to protect the association between a proc and filedesc. */ 144 static struct mtx fdesc_mtx; 145 146 /* 147 * Find the first zero bit in the given bitmap, starting at low and not 148 * exceeding size - 1. 149 */ 150 static int 151 fd_first_free(struct filedesc *fdp, int low, int size) 152 { 153 NDSLOTTYPE *map = fdp->fd_map; 154 NDSLOTTYPE mask; 155 int off, maxoff; 156 157 if (low >= size) 158 return (low); 159 160 off = NDSLOT(low); 161 if (low % NDENTRIES) { 162 mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES))); 163 if ((mask &= ~map[off]) != 0UL) 164 return (off * NDENTRIES + ffsl(mask) - 1); 165 ++off; 166 } 167 for (maxoff = NDSLOTS(size); off < maxoff; ++off) 168 if (map[off] != ~0UL) 169 return (off * NDENTRIES + ffsl(~map[off]) - 1); 170 return (size); 171 } 172 173 /* 174 * Find the highest non-zero bit in the given bitmap, starting at low and 175 * not exceeding size - 1. 176 */ 177 static int 178 fd_last_used(struct filedesc *fdp, int low, int size) 179 { 180 NDSLOTTYPE *map = fdp->fd_map; 181 NDSLOTTYPE mask; 182 int off, minoff; 183 184 if (low >= size) 185 return (-1); 186 187 off = NDSLOT(size); 188 if (size % NDENTRIES) { 189 mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES)); 190 if ((mask &= map[off]) != 0) 191 return (off * NDENTRIES + flsl(mask) - 1); 192 --off; 193 } 194 for (minoff = NDSLOT(low); off >= minoff; --off) 195 if (map[off] != 0) 196 return (off * NDENTRIES + flsl(map[off]) - 1); 197 return (low - 1); 198 } 199 200 static int 201 fdisused(struct filedesc *fdp, int fd) 202 { 203 KASSERT(fd >= 0 && fd < fdp->fd_nfiles, 204 ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles)); 205 return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0); 206 } 207 208 /* 209 * Mark a file descriptor as used. 210 */ 211 static void 212 fdused(struct filedesc *fdp, int fd) 213 { 214 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 215 KASSERT(!fdisused(fdp, fd), 216 ("fd already used")); 217 fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd); 218 if (fd > fdp->fd_lastfile) 219 fdp->fd_lastfile = fd; 220 if (fd == fdp->fd_freefile) 221 fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles); 222 } 223 224 /* 225 * Mark a file descriptor as unused. 226 */ 227 static void 228 fdunused(struct filedesc *fdp, int fd) 229 { 230 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 231 KASSERT(fdisused(fdp, fd), 232 ("fd is already unused")); 233 KASSERT(fdp->fd_ofiles[fd] == NULL, 234 ("fd is still in use")); 235 fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); 236 if (fd < fdp->fd_freefile) 237 fdp->fd_freefile = fd; 238 if (fd == fdp->fd_lastfile) 239 fdp->fd_lastfile = fd_last_used(fdp, 0, fd); 240 } 241 242 /* 243 * System calls on descriptors. 244 */ 245 #ifndef _SYS_SYSPROTO_H_ 246 struct getdtablesize_args { 247 int dummy; 248 }; 249 #endif 250 /* ARGSUSED */ 251 int 252 getdtablesize(struct thread *td, struct getdtablesize_args *uap) 253 { 254 struct proc *p = td->td_proc; 255 256 PROC_LOCK(p); 257 td->td_retval[0] = 258 min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 259 PROC_UNLOCK(p); 260 return (0); 261 } 262 263 /* 264 * Duplicate a file descriptor to a particular value. 265 * 266 * Note: keep in mind that a potential race condition exists when closing 267 * descriptors from a shared descriptor table (via rfork). 268 */ 269 #ifndef _SYS_SYSPROTO_H_ 270 struct dup2_args { 271 u_int from; 272 u_int to; 273 }; 274 #endif 275 /* ARGSUSED */ 276 int 277 dup2(struct thread *td, struct dup2_args *uap) 278 { 279 280 return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to, 281 td->td_retval)); 282 } 283 284 /* 285 * Duplicate a file descriptor. 286 */ 287 #ifndef _SYS_SYSPROTO_H_ 288 struct dup_args { 289 u_int fd; 290 }; 291 #endif 292 /* ARGSUSED */ 293 int 294 dup(struct thread *td, struct dup_args *uap) 295 { 296 297 return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval)); 298 } 299 300 /* 301 * The file control system call. 302 */ 303 #ifndef _SYS_SYSPROTO_H_ 304 struct fcntl_args { 305 int fd; 306 int cmd; 307 long arg; 308 }; 309 #endif 310 /* ARGSUSED */ 311 int 312 fcntl(struct thread *td, struct fcntl_args *uap) 313 { 314 struct flock fl; 315 intptr_t arg; 316 int error; 317 318 error = 0; 319 switch (uap->cmd) { 320 case F_GETLK: 321 case F_SETLK: 322 case F_SETLKW: 323 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl)); 324 arg = (intptr_t)&fl; 325 break; 326 default: 327 arg = uap->arg; 328 break; 329 } 330 if (error) 331 return (error); 332 error = kern_fcntl(td, uap->fd, uap->cmd, arg); 333 if (error) 334 return (error); 335 if (uap->cmd == F_GETLK) 336 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl)); 337 return (error); 338 } 339 340 int 341 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) 342 { 343 struct filedesc *fdp; 344 struct flock *flp; 345 struct file *fp; 346 struct proc *p; 347 char *pop; 348 struct vnode *vp; 349 u_int newmin; 350 int error, flg, tmp; 351 int giant_locked; 352 353 /* 354 * XXXRW: Some fcntl() calls require Giant -- others don't. Try to 355 * avoid grabbing Giant for calls we know don't need it. 356 */ 357 switch (cmd) { 358 case F_DUPFD: 359 case F_GETFD: 360 case F_SETFD: 361 case F_GETFL: 362 giant_locked = 0; 363 break; 364 365 default: 366 giant_locked = 1; 367 mtx_lock(&Giant); 368 } 369 370 error = 0; 371 flg = F_POSIX; 372 p = td->td_proc; 373 fdp = p->p_fd; 374 FILEDESC_LOCK(fdp); 375 if ((unsigned)fd >= fdp->fd_nfiles || 376 (fp = fdp->fd_ofiles[fd]) == NULL) { 377 FILEDESC_UNLOCK(fdp); 378 error = EBADF; 379 goto done2; 380 } 381 pop = &fdp->fd_ofileflags[fd]; 382 383 switch (cmd) { 384 case F_DUPFD: 385 /* mtx_assert(&Giant, MA_NOTOWNED); */ 386 FILEDESC_UNLOCK(fdp); 387 newmin = arg; 388 PROC_LOCK(p); 389 if (newmin >= lim_cur(p, RLIMIT_NOFILE) || 390 newmin >= maxfilesperproc) { 391 PROC_UNLOCK(p); 392 error = EINVAL; 393 break; 394 } 395 PROC_UNLOCK(p); 396 error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval); 397 break; 398 399 case F_GETFD: 400 /* mtx_assert(&Giant, MA_NOTOWNED); */ 401 td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; 402 FILEDESC_UNLOCK(fdp); 403 break; 404 405 case F_SETFD: 406 /* mtx_assert(&Giant, MA_NOTOWNED); */ 407 *pop = (*pop &~ UF_EXCLOSE) | 408 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); 409 FILEDESC_UNLOCK(fdp); 410 break; 411 412 case F_GETFL: 413 /* mtx_assert(&Giant, MA_NOTOWNED); */ 414 FILE_LOCK(fp); 415 td->td_retval[0] = OFLAGS(fp->f_flag); 416 FILE_UNLOCK(fp); 417 FILEDESC_UNLOCK(fdp); 418 break; 419 420 case F_SETFL: 421 mtx_assert(&Giant, MA_OWNED); 422 FILE_LOCK(fp); 423 fhold_locked(fp); 424 fp->f_flag &= ~FCNTLFLAGS; 425 fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; 426 FILE_UNLOCK(fp); 427 FILEDESC_UNLOCK(fdp); 428 tmp = fp->f_flag & FNONBLOCK; 429 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 430 if (error) { 431 fdrop(fp, td); 432 break; 433 } 434 tmp = fp->f_flag & FASYNC; 435 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); 436 if (error == 0) { 437 fdrop(fp, td); 438 break; 439 } 440 FILE_LOCK(fp); 441 fp->f_flag &= ~FNONBLOCK; 442 FILE_UNLOCK(fp); 443 tmp = 0; 444 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 445 fdrop(fp, td); 446 break; 447 448 case F_GETOWN: 449 mtx_assert(&Giant, MA_OWNED); 450 fhold(fp); 451 FILEDESC_UNLOCK(fdp); 452 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); 453 if (error == 0) 454 td->td_retval[0] = tmp; 455 fdrop(fp, td); 456 break; 457 458 case F_SETOWN: 459 mtx_assert(&Giant, MA_OWNED); 460 fhold(fp); 461 FILEDESC_UNLOCK(fdp); 462 tmp = arg; 463 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); 464 fdrop(fp, td); 465 break; 466 467 case F_SETLKW: 468 mtx_assert(&Giant, MA_OWNED); 469 flg |= F_WAIT; 470 /* FALLTHROUGH F_SETLK */ 471 472 case F_SETLK: 473 mtx_assert(&Giant, MA_OWNED); 474 if (fp->f_type != DTYPE_VNODE) { 475 FILEDESC_UNLOCK(fdp); 476 error = EBADF; 477 break; 478 } 479 480 flp = (struct flock *)arg; 481 if (flp->l_whence == SEEK_CUR) { 482 if (fp->f_offset < 0 || 483 (flp->l_start > 0 && 484 fp->f_offset > OFF_MAX - flp->l_start)) { 485 FILEDESC_UNLOCK(fdp); 486 error = EOVERFLOW; 487 break; 488 } 489 flp->l_start += fp->f_offset; 490 } 491 492 /* 493 * VOP_ADVLOCK() may block. 494 */ 495 fhold(fp); 496 FILEDESC_UNLOCK(fdp); 497 vp = fp->f_vnode; 498 499 switch (flp->l_type) { 500 case F_RDLCK: 501 if ((fp->f_flag & FREAD) == 0) { 502 error = EBADF; 503 break; 504 } 505 PROC_LOCK(p->p_leader); 506 p->p_leader->p_flag |= P_ADVLOCK; 507 PROC_UNLOCK(p->p_leader); 508 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 509 flp, flg); 510 break; 511 case F_WRLCK: 512 if ((fp->f_flag & FWRITE) == 0) { 513 error = EBADF; 514 break; 515 } 516 PROC_LOCK(p->p_leader); 517 p->p_leader->p_flag |= P_ADVLOCK; 518 PROC_UNLOCK(p->p_leader); 519 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 520 flp, flg); 521 break; 522 case F_UNLCK: 523 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, 524 flp, F_POSIX); 525 break; 526 default: 527 error = EINVAL; 528 break; 529 } 530 /* Check for race with close */ 531 FILEDESC_LOCK_FAST(fdp); 532 if ((unsigned) fd >= fdp->fd_nfiles || 533 fp != fdp->fd_ofiles[fd]) { 534 FILEDESC_UNLOCK_FAST(fdp); 535 flp->l_whence = SEEK_SET; 536 flp->l_start = 0; 537 flp->l_len = 0; 538 flp->l_type = F_UNLCK; 539 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 540 F_UNLCK, flp, F_POSIX); 541 } else 542 FILEDESC_UNLOCK_FAST(fdp); 543 fdrop(fp, td); 544 break; 545 546 case F_GETLK: 547 mtx_assert(&Giant, MA_OWNED); 548 if (fp->f_type != DTYPE_VNODE) { 549 FILEDESC_UNLOCK(fdp); 550 error = EBADF; 551 break; 552 } 553 flp = (struct flock *)arg; 554 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && 555 flp->l_type != F_UNLCK) { 556 FILEDESC_UNLOCK(fdp); 557 error = EINVAL; 558 break; 559 } 560 if (flp->l_whence == SEEK_CUR) { 561 if ((flp->l_start > 0 && 562 fp->f_offset > OFF_MAX - flp->l_start) || 563 (flp->l_start < 0 && 564 fp->f_offset < OFF_MIN - flp->l_start)) { 565 FILEDESC_UNLOCK(fdp); 566 error = EOVERFLOW; 567 break; 568 } 569 flp->l_start += fp->f_offset; 570 } 571 /* 572 * VOP_ADVLOCK() may block. 573 */ 574 fhold(fp); 575 FILEDESC_UNLOCK(fdp); 576 vp = fp->f_vnode; 577 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, 578 F_POSIX); 579 fdrop(fp, td); 580 break; 581 default: 582 FILEDESC_UNLOCK(fdp); 583 error = EINVAL; 584 break; 585 } 586 done2: 587 if (giant_locked) 588 mtx_unlock(&Giant); 589 return (error); 590 } 591 592 /* 593 * Common code for dup, dup2, and fcntl(F_DUPFD). 594 */ 595 static int 596 do_dup(struct thread *td, enum dup_type type, int old, int new, register_t *retval) 597 { 598 struct filedesc *fdp; 599 struct proc *p; 600 struct file *fp; 601 struct file *delfp; 602 int error, holdleaders, maxfd; 603 604 KASSERT((type == DUP_VARIABLE || type == DUP_FIXED), 605 ("invalid dup type %d", type)); 606 607 p = td->td_proc; 608 fdp = p->p_fd; 609 610 /* 611 * Verify we have a valid descriptor to dup from and possibly to 612 * dup to. 613 */ 614 if (old < 0 || new < 0) 615 return (EBADF); 616 PROC_LOCK(p); 617 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 618 PROC_UNLOCK(p); 619 if (new >= maxfd) 620 return (EMFILE); 621 622 FILEDESC_LOCK(fdp); 623 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) { 624 FILEDESC_UNLOCK(fdp); 625 return (EBADF); 626 } 627 if (type == DUP_FIXED && old == new) { 628 *retval = new; 629 FILEDESC_UNLOCK(fdp); 630 return (0); 631 } 632 fp = fdp->fd_ofiles[old]; 633 fhold(fp); 634 635 /* 636 * If the caller specified a file descriptor, make sure the file 637 * table is large enough to hold it, and grab it. Otherwise, just 638 * allocate a new descriptor the usual way. Since the filedesc 639 * lock may be temporarily dropped in the process, we have to look 640 * out for a race. 641 */ 642 if (type == DUP_FIXED) { 643 if (new >= fdp->fd_nfiles) 644 fdgrowtable(fdp, new + 1); 645 if (fdp->fd_ofiles[new] == NULL) 646 fdused(fdp, new); 647 } else { 648 if ((error = fdalloc(td, new, &new)) != 0) { 649 FILEDESC_UNLOCK(fdp); 650 fdrop(fp, td); 651 return (error); 652 } 653 } 654 655 /* 656 * If the old file changed out from under us then treat it as a 657 * bad file descriptor. Userland should do its own locking to 658 * avoid this case. 659 */ 660 if (fdp->fd_ofiles[old] != fp) { 661 /* we've allocated a descriptor which we won't use */ 662 if (fdp->fd_ofiles[new] == NULL) 663 fdunused(fdp, new); 664 FILEDESC_UNLOCK(fdp); 665 fdrop(fp, td); 666 return (EBADF); 667 } 668 KASSERT(old != new, 669 ("new fd is same as old")); 670 671 /* 672 * Save info on the descriptor being overwritten. We cannot close 673 * it without introducing an ownership race for the slot, since we 674 * need to drop the filedesc lock to call closef(). 675 * 676 * XXX this duplicates parts of close(). 677 */ 678 delfp = fdp->fd_ofiles[new]; 679 holdleaders = 0; 680 if (delfp != NULL) { 681 if (td->td_proc->p_fdtol != NULL) { 682 /* 683 * Ask fdfree() to sleep to ensure that all relevant 684 * process leaders can be traversed in closef(). 685 */ 686 fdp->fd_holdleaderscount++; 687 holdleaders = 1; 688 } 689 } 690 691 /* 692 * Duplicate the source descriptor 693 */ 694 fdp->fd_ofiles[new] = fp; 695 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; 696 if (new > fdp->fd_lastfile) 697 fdp->fd_lastfile = new; 698 *retval = new; 699 700 /* 701 * If we dup'd over a valid file, we now own the reference to it 702 * and must dispose of it using closef() semantics (as if a 703 * close() were performed on it). 704 * 705 * XXX this duplicates parts of close(). 706 */ 707 if (delfp != NULL) { 708 knote_fdclose(td, new); 709 if (delfp->f_type == DTYPE_MQUEUE) 710 mq_fdclose(td, new, delfp); 711 FILEDESC_UNLOCK(fdp); 712 (void) closef(delfp, td); 713 if (holdleaders) { 714 FILEDESC_LOCK_FAST(fdp); 715 fdp->fd_holdleaderscount--; 716 if (fdp->fd_holdleaderscount == 0 && 717 fdp->fd_holdleaderswakeup != 0) { 718 fdp->fd_holdleaderswakeup = 0; 719 wakeup(&fdp->fd_holdleaderscount); 720 } 721 FILEDESC_UNLOCK_FAST(fdp); 722 } 723 } else { 724 FILEDESC_UNLOCK(fdp); 725 } 726 return (0); 727 } 728 729 /* 730 * If sigio is on the list associated with a process or process group, 731 * disable signalling from the device, remove sigio from the list and 732 * free sigio. 733 */ 734 void 735 funsetown(struct sigio **sigiop) 736 { 737 struct sigio *sigio; 738 739 SIGIO_LOCK(); 740 sigio = *sigiop; 741 if (sigio == NULL) { 742 SIGIO_UNLOCK(); 743 return; 744 } 745 *(sigio->sio_myref) = NULL; 746 if ((sigio)->sio_pgid < 0) { 747 struct pgrp *pg = (sigio)->sio_pgrp; 748 PGRP_LOCK(pg); 749 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, 750 sigio, sio_pgsigio); 751 PGRP_UNLOCK(pg); 752 } else { 753 struct proc *p = (sigio)->sio_proc; 754 PROC_LOCK(p); 755 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, 756 sigio, sio_pgsigio); 757 PROC_UNLOCK(p); 758 } 759 SIGIO_UNLOCK(); 760 crfree(sigio->sio_ucred); 761 FREE(sigio, M_SIGIO); 762 } 763 764 /* 765 * Free a list of sigio structures. 766 * We only need to lock the SIGIO_LOCK because we have made ourselves 767 * inaccessible to callers of fsetown and therefore do not need to lock 768 * the proc or pgrp struct for the list manipulation. 769 */ 770 void 771 funsetownlst(struct sigiolst *sigiolst) 772 { 773 struct proc *p; 774 struct pgrp *pg; 775 struct sigio *sigio; 776 777 sigio = SLIST_FIRST(sigiolst); 778 if (sigio == NULL) 779 return; 780 p = NULL; 781 pg = NULL; 782 783 /* 784 * Every entry of the list should belong 785 * to a single proc or pgrp. 786 */ 787 if (sigio->sio_pgid < 0) { 788 pg = sigio->sio_pgrp; 789 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED); 790 } else /* if (sigio->sio_pgid > 0) */ { 791 p = sigio->sio_proc; 792 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 793 } 794 795 SIGIO_LOCK(); 796 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) { 797 *(sigio->sio_myref) = NULL; 798 if (pg != NULL) { 799 KASSERT(sigio->sio_pgid < 0, 800 ("Proc sigio in pgrp sigio list")); 801 KASSERT(sigio->sio_pgrp == pg, 802 ("Bogus pgrp in sigio list")); 803 PGRP_LOCK(pg); 804 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, 805 sio_pgsigio); 806 PGRP_UNLOCK(pg); 807 } else /* if (p != NULL) */ { 808 KASSERT(sigio->sio_pgid > 0, 809 ("Pgrp sigio in proc sigio list")); 810 KASSERT(sigio->sio_proc == p, 811 ("Bogus proc in sigio list")); 812 PROC_LOCK(p); 813 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, 814 sio_pgsigio); 815 PROC_UNLOCK(p); 816 } 817 SIGIO_UNLOCK(); 818 crfree(sigio->sio_ucred); 819 FREE(sigio, M_SIGIO); 820 SIGIO_LOCK(); 821 } 822 SIGIO_UNLOCK(); 823 } 824 825 /* 826 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). 827 * 828 * After permission checking, add a sigio structure to the sigio list for 829 * the process or process group. 830 */ 831 int 832 fsetown(pid_t pgid, struct sigio **sigiop) 833 { 834 struct proc *proc; 835 struct pgrp *pgrp; 836 struct sigio *sigio; 837 int ret; 838 839 if (pgid == 0) { 840 funsetown(sigiop); 841 return (0); 842 } 843 844 ret = 0; 845 846 /* Allocate and fill in the new sigio out of locks. */ 847 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK); 848 sigio->sio_pgid = pgid; 849 sigio->sio_ucred = crhold(curthread->td_ucred); 850 sigio->sio_myref = sigiop; 851 852 sx_slock(&proctree_lock); 853 if (pgid > 0) { 854 proc = pfind(pgid); 855 if (proc == NULL) { 856 ret = ESRCH; 857 goto fail; 858 } 859 860 /* 861 * Policy - Don't allow a process to FSETOWN a process 862 * in another session. 863 * 864 * Remove this test to allow maximum flexibility or 865 * restrict FSETOWN to the current process or process 866 * group for maximum safety. 867 */ 868 PROC_UNLOCK(proc); 869 if (proc->p_session != curthread->td_proc->p_session) { 870 ret = EPERM; 871 goto fail; 872 } 873 874 pgrp = NULL; 875 } else /* if (pgid < 0) */ { 876 pgrp = pgfind(-pgid); 877 if (pgrp == NULL) { 878 ret = ESRCH; 879 goto fail; 880 } 881 PGRP_UNLOCK(pgrp); 882 883 /* 884 * Policy - Don't allow a process to FSETOWN a process 885 * in another session. 886 * 887 * Remove this test to allow maximum flexibility or 888 * restrict FSETOWN to the current process or process 889 * group for maximum safety. 890 */ 891 if (pgrp->pg_session != curthread->td_proc->p_session) { 892 ret = EPERM; 893 goto fail; 894 } 895 896 proc = NULL; 897 } 898 funsetown(sigiop); 899 if (pgid > 0) { 900 PROC_LOCK(proc); 901 /* 902 * Since funsetownlst() is called without the proctree 903 * locked, we need to check for P_WEXIT. 904 * XXX: is ESRCH correct? 905 */ 906 if ((proc->p_flag & P_WEXIT) != 0) { 907 PROC_UNLOCK(proc); 908 ret = ESRCH; 909 goto fail; 910 } 911 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); 912 sigio->sio_proc = proc; 913 PROC_UNLOCK(proc); 914 } else { 915 PGRP_LOCK(pgrp); 916 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); 917 sigio->sio_pgrp = pgrp; 918 PGRP_UNLOCK(pgrp); 919 } 920 sx_sunlock(&proctree_lock); 921 SIGIO_LOCK(); 922 *sigiop = sigio; 923 SIGIO_UNLOCK(); 924 return (0); 925 926 fail: 927 sx_sunlock(&proctree_lock); 928 crfree(sigio->sio_ucred); 929 FREE(sigio, M_SIGIO); 930 return (ret); 931 } 932 933 /* 934 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). 935 */ 936 pid_t 937 fgetown(sigiop) 938 struct sigio **sigiop; 939 { 940 pid_t pgid; 941 942 SIGIO_LOCK(); 943 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0; 944 SIGIO_UNLOCK(); 945 return (pgid); 946 } 947 948 /* 949 * Close a file descriptor. 950 */ 951 #ifndef _SYS_SYSPROTO_H_ 952 struct close_args { 953 int fd; 954 }; 955 #endif 956 /* ARGSUSED */ 957 int 958 close(td, uap) 959 struct thread *td; 960 struct close_args *uap; 961 { 962 963 return (kern_close(td, uap->fd)); 964 } 965 966 int 967 kern_close(td, fd) 968 struct thread *td; 969 int fd; 970 { 971 struct filedesc *fdp; 972 struct file *fp; 973 int error; 974 int holdleaders; 975 976 error = 0; 977 holdleaders = 0; 978 fdp = td->td_proc->p_fd; 979 980 AUDIT_SYSCLOSE(td, fd); 981 982 FILEDESC_LOCK(fdp); 983 if ((unsigned)fd >= fdp->fd_nfiles || 984 (fp = fdp->fd_ofiles[fd]) == NULL) { 985 FILEDESC_UNLOCK(fdp); 986 return (EBADF); 987 } 988 fdp->fd_ofiles[fd] = NULL; 989 fdp->fd_ofileflags[fd] = 0; 990 fdunused(fdp, fd); 991 if (td->td_proc->p_fdtol != NULL) { 992 /* 993 * Ask fdfree() to sleep to ensure that all relevant 994 * process leaders can be traversed in closef(). 995 */ 996 fdp->fd_holdleaderscount++; 997 holdleaders = 1; 998 } 999 1000 /* 1001 * We now hold the fp reference that used to be owned by the descriptor 1002 * array. 1003 * We have to unlock the FILEDESC *AFTER* knote_fdclose to prevent a 1004 * race of the fd getting opened, a knote added, and deleteing a knote 1005 * for the new fd. 1006 */ 1007 knote_fdclose(td, fd); 1008 if (fp->f_type == DTYPE_MQUEUE) 1009 mq_fdclose(td, fd, fp); 1010 FILEDESC_UNLOCK(fdp); 1011 1012 error = closef(fp, td); 1013 if (holdleaders) { 1014 FILEDESC_LOCK_FAST(fdp); 1015 fdp->fd_holdleaderscount--; 1016 if (fdp->fd_holdleaderscount == 0 && 1017 fdp->fd_holdleaderswakeup != 0) { 1018 fdp->fd_holdleaderswakeup = 0; 1019 wakeup(&fdp->fd_holdleaderscount); 1020 } 1021 FILEDESC_UNLOCK_FAST(fdp); 1022 } 1023 return (error); 1024 } 1025 1026 #if defined(COMPAT_43) 1027 /* 1028 * Return status information about a file descriptor. 1029 */ 1030 #ifndef _SYS_SYSPROTO_H_ 1031 struct ofstat_args { 1032 int fd; 1033 struct ostat *sb; 1034 }; 1035 #endif 1036 /* ARGSUSED */ 1037 int 1038 ofstat(struct thread *td, struct ofstat_args *uap) 1039 { 1040 struct ostat oub; 1041 struct stat ub; 1042 int error; 1043 1044 error = kern_fstat(td, uap->fd, &ub); 1045 if (error == 0) { 1046 cvtstat(&ub, &oub); 1047 error = copyout(&oub, uap->sb, sizeof(oub)); 1048 } 1049 return (error); 1050 } 1051 #endif /* COMPAT_43 */ 1052 1053 /* 1054 * Return status information about a file descriptor. 1055 */ 1056 #ifndef _SYS_SYSPROTO_H_ 1057 struct fstat_args { 1058 int fd; 1059 struct stat *sb; 1060 }; 1061 #endif 1062 /* ARGSUSED */ 1063 int 1064 fstat(struct thread *td, struct fstat_args *uap) 1065 { 1066 struct stat ub; 1067 int error; 1068 1069 error = kern_fstat(td, uap->fd, &ub); 1070 if (error == 0) 1071 error = copyout(&ub, uap->sb, sizeof(ub)); 1072 return (error); 1073 } 1074 1075 int 1076 kern_fstat(struct thread *td, int fd, struct stat *sbp) 1077 { 1078 struct file *fp; 1079 int error; 1080 1081 AUDIT_ARG(fd, fd); 1082 1083 if ((error = fget(td, fd, &fp)) != 0) 1084 return (error); 1085 1086 AUDIT_ARG(file, td->td_proc, fp); 1087 1088 error = fo_stat(fp, sbp, td->td_ucred, td); 1089 fdrop(fp, td); 1090 return (error); 1091 } 1092 1093 /* 1094 * Return status information about a file descriptor. 1095 */ 1096 #ifndef _SYS_SYSPROTO_H_ 1097 struct nfstat_args { 1098 int fd; 1099 struct nstat *sb; 1100 }; 1101 #endif 1102 /* ARGSUSED */ 1103 int 1104 nfstat(struct thread *td, struct nfstat_args *uap) 1105 { 1106 struct nstat nub; 1107 struct stat ub; 1108 int error; 1109 1110 error = kern_fstat(td, uap->fd, &ub); 1111 if (error == 0) { 1112 cvtnstat(&ub, &nub); 1113 error = copyout(&nub, uap->sb, sizeof(nub)); 1114 } 1115 return (error); 1116 } 1117 1118 /* 1119 * Return pathconf information about a file descriptor. 1120 */ 1121 #ifndef _SYS_SYSPROTO_H_ 1122 struct fpathconf_args { 1123 int fd; 1124 int name; 1125 }; 1126 #endif 1127 /* ARGSUSED */ 1128 int 1129 fpathconf(struct thread *td, struct fpathconf_args *uap) 1130 { 1131 struct file *fp; 1132 struct vnode *vp; 1133 int error; 1134 1135 if ((error = fget(td, uap->fd, &fp)) != 0) 1136 return (error); 1137 1138 /* If asynchronous I/O is available, it works for all descriptors. */ 1139 if (uap->name == _PC_ASYNC_IO) { 1140 td->td_retval[0] = async_io_version; 1141 goto out; 1142 } 1143 vp = fp->f_vnode; 1144 if (vp != NULL) { 1145 int vfslocked; 1146 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1147 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1148 error = VOP_PATHCONF(vp, uap->name, td->td_retval); 1149 VOP_UNLOCK(vp, 0, td); 1150 VFS_UNLOCK_GIANT(vfslocked); 1151 } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1152 if (uap->name != _PC_PIPE_BUF) { 1153 error = EINVAL; 1154 } else { 1155 td->td_retval[0] = PIPE_BUF; 1156 error = 0; 1157 } 1158 } else { 1159 error = EOPNOTSUPP; 1160 } 1161 out: 1162 fdrop(fp, td); 1163 return (error); 1164 } 1165 1166 /* 1167 * Grow the file table to accomodate (at least) nfd descriptors. This may 1168 * block and drop the filedesc lock, but it will reacquire it before 1169 * returning. 1170 */ 1171 static void 1172 fdgrowtable(struct filedesc *fdp, int nfd) 1173 { 1174 struct file **ntable; 1175 char *nfileflags; 1176 int nnfiles, onfiles; 1177 NDSLOTTYPE *nmap; 1178 1179 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1180 1181 KASSERT(fdp->fd_nfiles > 0, 1182 ("zero-length file table")); 1183 1184 /* compute the size of the new table */ 1185 onfiles = fdp->fd_nfiles; 1186 nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */ 1187 if (nnfiles <= onfiles) 1188 /* the table is already large enough */ 1189 return; 1190 1191 /* allocate a new table and (if required) new bitmaps */ 1192 FILEDESC_UNLOCK(fdp); 1193 MALLOC(ntable, struct file **, nnfiles * OFILESIZE, 1194 M_FILEDESC, M_ZERO | M_WAITOK); 1195 nfileflags = (char *)&ntable[nnfiles]; 1196 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) 1197 MALLOC(nmap, NDSLOTTYPE *, NDSLOTS(nnfiles) * NDSLOTSIZE, 1198 M_FILEDESC, M_ZERO | M_WAITOK); 1199 else 1200 nmap = NULL; 1201 FILEDESC_LOCK(fdp); 1202 1203 /* 1204 * We now have new tables ready to go. Since we dropped the 1205 * filedesc lock to call malloc(), watch out for a race. 1206 */ 1207 onfiles = fdp->fd_nfiles; 1208 if (onfiles >= nnfiles) { 1209 /* we lost the race, but that's OK */ 1210 free(ntable, M_FILEDESC); 1211 if (nmap != NULL) 1212 free(nmap, M_FILEDESC); 1213 return; 1214 } 1215 bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable)); 1216 bcopy(fdp->fd_ofileflags, nfileflags, onfiles); 1217 if (onfiles > NDFILE) 1218 free(fdp->fd_ofiles, M_FILEDESC); 1219 fdp->fd_ofiles = ntable; 1220 fdp->fd_ofileflags = nfileflags; 1221 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) { 1222 bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap)); 1223 if (NDSLOTS(onfiles) > NDSLOTS(NDFILE)) 1224 free(fdp->fd_map, M_FILEDESC); 1225 fdp->fd_map = nmap; 1226 } 1227 fdp->fd_nfiles = nnfiles; 1228 } 1229 1230 /* 1231 * Allocate a file descriptor for the process. 1232 */ 1233 int 1234 fdalloc(struct thread *td, int minfd, int *result) 1235 { 1236 struct proc *p = td->td_proc; 1237 struct filedesc *fdp = p->p_fd; 1238 int fd = -1, maxfd; 1239 1240 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1241 1242 if (fdp->fd_freefile > minfd) 1243 minfd = fdp->fd_freefile; 1244 1245 PROC_LOCK(p); 1246 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1247 PROC_UNLOCK(p); 1248 1249 /* 1250 * Search the bitmap for a free descriptor. If none is found, try 1251 * to grow the file table. Keep at it until we either get a file 1252 * descriptor or run into process or system limits; fdgrowtable() 1253 * may drop the filedesc lock, so we're in a race. 1254 */ 1255 for (;;) { 1256 fd = fd_first_free(fdp, minfd, fdp->fd_nfiles); 1257 if (fd >= maxfd) 1258 return (EMFILE); 1259 if (fd < fdp->fd_nfiles) 1260 break; 1261 fdgrowtable(fdp, min(fdp->fd_nfiles * 2, maxfd)); 1262 } 1263 1264 /* 1265 * Perform some sanity checks, then mark the file descriptor as 1266 * used and return it to the caller. 1267 */ 1268 KASSERT(!fdisused(fdp, fd), 1269 ("fd_first_free() returned non-free descriptor")); 1270 KASSERT(fdp->fd_ofiles[fd] == NULL, 1271 ("free descriptor isn't")); 1272 fdp->fd_ofileflags[fd] = 0; /* XXX needed? */ 1273 fdused(fdp, fd); 1274 *result = fd; 1275 return (0); 1276 } 1277 1278 /* 1279 * Check to see whether n user file descriptors 1280 * are available to the process p. 1281 */ 1282 int 1283 fdavail(struct thread *td, int n) 1284 { 1285 struct proc *p = td->td_proc; 1286 struct filedesc *fdp = td->td_proc->p_fd; 1287 struct file **fpp; 1288 int i, lim, last; 1289 1290 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1291 1292 PROC_LOCK(p); 1293 lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1294 PROC_UNLOCK(p); 1295 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) 1296 return (1); 1297 last = min(fdp->fd_nfiles, lim); 1298 fpp = &fdp->fd_ofiles[fdp->fd_freefile]; 1299 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) { 1300 if (*fpp == NULL && --n <= 0) 1301 return (1); 1302 } 1303 return (0); 1304 } 1305 1306 /* 1307 * Create a new open file structure and allocate 1308 * a file decriptor for the process that refers to it. 1309 * We add one reference to the file for the descriptor table 1310 * and one reference for resultfp. This is to prevent us being 1311 * preempted and the entry in the descriptor table closed after 1312 * we release the FILEDESC lock. 1313 */ 1314 int 1315 falloc(struct thread *td, struct file **resultfp, int *resultfd) 1316 { 1317 struct proc *p = td->td_proc; 1318 struct file *fp, *fq; 1319 int error, i; 1320 int maxuserfiles = maxfiles - (maxfiles / 20); 1321 static struct timeval lastfail; 1322 static int curfail; 1323 1324 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO); 1325 sx_xlock(&filelist_lock); 1326 1327 if ((openfiles >= maxuserfiles && 1328 priv_check_cred(td->td_ucred, PRIV_MAXFILES, SUSER_RUID) != 0) || 1329 openfiles >= maxfiles) { 1330 if (ppsratecheck(&lastfail, &curfail, 1)) { 1331 printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n", 1332 td->td_ucred->cr_ruid); 1333 } 1334 sx_xunlock(&filelist_lock); 1335 uma_zfree(file_zone, fp); 1336 return (ENFILE); 1337 } 1338 openfiles++; 1339 1340 /* 1341 * If the process has file descriptor zero open, add the new file 1342 * descriptor to the list of open files at that point, otherwise 1343 * put it at the front of the list of open files. 1344 */ 1345 fp->f_mtxp = mtx_pool_alloc(mtxpool_sleep); 1346 fp->f_count = 1; 1347 if (resultfp) 1348 fp->f_count++; 1349 fp->f_cred = crhold(td->td_ucred); 1350 fp->f_ops = &badfileops; 1351 fp->f_data = NULL; 1352 fp->f_vnode = NULL; 1353 FILEDESC_LOCK(p->p_fd); 1354 if ((fq = p->p_fd->fd_ofiles[0])) { 1355 LIST_INSERT_AFTER(fq, fp, f_list); 1356 } else { 1357 LIST_INSERT_HEAD(&filehead, fp, f_list); 1358 } 1359 sx_xunlock(&filelist_lock); 1360 if ((error = fdalloc(td, 0, &i))) { 1361 FILEDESC_UNLOCK(p->p_fd); 1362 fdrop(fp, td); 1363 if (resultfp) 1364 fdrop(fp, td); 1365 return (error); 1366 } 1367 p->p_fd->fd_ofiles[i] = fp; 1368 FILEDESC_UNLOCK(p->p_fd); 1369 if (resultfp) 1370 *resultfp = fp; 1371 if (resultfd) 1372 *resultfd = i; 1373 return (0); 1374 } 1375 1376 /* 1377 * Build a new filedesc structure from another. 1378 * Copy the current, root, and jail root vnode references. 1379 */ 1380 struct filedesc * 1381 fdinit(struct filedesc *fdp) 1382 { 1383 struct filedesc0 *newfdp; 1384 1385 newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO); 1386 mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF); 1387 if (fdp != NULL) { 1388 FILEDESC_LOCK(fdp); 1389 newfdp->fd_fd.fd_cdir = fdp->fd_cdir; 1390 if (newfdp->fd_fd.fd_cdir) 1391 VREF(newfdp->fd_fd.fd_cdir); 1392 newfdp->fd_fd.fd_rdir = fdp->fd_rdir; 1393 if (newfdp->fd_fd.fd_rdir) 1394 VREF(newfdp->fd_fd.fd_rdir); 1395 newfdp->fd_fd.fd_jdir = fdp->fd_jdir; 1396 if (newfdp->fd_fd.fd_jdir) 1397 VREF(newfdp->fd_fd.fd_jdir); 1398 FILEDESC_UNLOCK(fdp); 1399 } 1400 1401 /* Create the file descriptor table. */ 1402 newfdp->fd_fd.fd_refcnt = 1; 1403 newfdp->fd_fd.fd_holdcnt = 1; 1404 newfdp->fd_fd.fd_cmask = CMASK; 1405 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1406 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 1407 newfdp->fd_fd.fd_nfiles = NDFILE; 1408 newfdp->fd_fd.fd_map = newfdp->fd_dmap; 1409 newfdp->fd_fd.fd_lastfile = -1; 1410 return (&newfdp->fd_fd); 1411 } 1412 1413 static struct filedesc * 1414 fdhold(struct proc *p) 1415 { 1416 struct filedesc *fdp; 1417 1418 mtx_lock(&fdesc_mtx); 1419 fdp = p->p_fd; 1420 if (fdp != NULL) 1421 fdp->fd_holdcnt++; 1422 mtx_unlock(&fdesc_mtx); 1423 return (fdp); 1424 } 1425 1426 static void 1427 fddrop(struct filedesc *fdp) 1428 { 1429 int i; 1430 1431 mtx_lock(&fdesc_mtx); 1432 i = --fdp->fd_holdcnt; 1433 mtx_unlock(&fdesc_mtx); 1434 if (i > 0) 1435 return; 1436 1437 mtx_destroy(&fdp->fd_mtx); 1438 FREE(fdp, M_FILEDESC); 1439 } 1440 1441 /* 1442 * Share a filedesc structure. 1443 */ 1444 struct filedesc * 1445 fdshare(struct filedesc *fdp) 1446 { 1447 FILEDESC_LOCK_FAST(fdp); 1448 fdp->fd_refcnt++; 1449 FILEDESC_UNLOCK_FAST(fdp); 1450 return (fdp); 1451 } 1452 1453 /* 1454 * Unshare a filedesc structure, if necessary by making a copy 1455 */ 1456 void 1457 fdunshare(struct proc *p, struct thread *td) 1458 { 1459 1460 FILEDESC_LOCK_FAST(p->p_fd); 1461 if (p->p_fd->fd_refcnt > 1) { 1462 struct filedesc *tmp; 1463 1464 FILEDESC_UNLOCK_FAST(p->p_fd); 1465 tmp = fdcopy(p->p_fd); 1466 fdfree(td); 1467 p->p_fd = tmp; 1468 } else 1469 FILEDESC_UNLOCK_FAST(p->p_fd); 1470 } 1471 1472 /* 1473 * Copy a filedesc structure. 1474 * A NULL pointer in returns a NULL reference, this is to ease callers, 1475 * not catch errors. 1476 */ 1477 struct filedesc * 1478 fdcopy(struct filedesc *fdp) 1479 { 1480 struct filedesc *newfdp; 1481 int i; 1482 1483 /* Certain daemons might not have file descriptors. */ 1484 if (fdp == NULL) 1485 return (NULL); 1486 1487 newfdp = fdinit(fdp); 1488 FILEDESC_LOCK_FAST(fdp); 1489 while (fdp->fd_lastfile >= newfdp->fd_nfiles) { 1490 FILEDESC_UNLOCK_FAST(fdp); 1491 FILEDESC_LOCK(newfdp); 1492 fdgrowtable(newfdp, fdp->fd_lastfile + 1); 1493 FILEDESC_UNLOCK(newfdp); 1494 FILEDESC_LOCK_FAST(fdp); 1495 } 1496 /* copy everything except kqueue descriptors */ 1497 newfdp->fd_freefile = -1; 1498 for (i = 0; i <= fdp->fd_lastfile; ++i) { 1499 if (fdisused(fdp, i) && 1500 fdp->fd_ofiles[i]->f_type != DTYPE_KQUEUE) { 1501 newfdp->fd_ofiles[i] = fdp->fd_ofiles[i]; 1502 newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i]; 1503 fhold(newfdp->fd_ofiles[i]); 1504 newfdp->fd_lastfile = i; 1505 } else { 1506 if (newfdp->fd_freefile == -1) 1507 newfdp->fd_freefile = i; 1508 } 1509 } 1510 FILEDESC_UNLOCK_FAST(fdp); 1511 FILEDESC_LOCK(newfdp); 1512 for (i = 0; i <= newfdp->fd_lastfile; ++i) 1513 if (newfdp->fd_ofiles[i] != NULL) 1514 fdused(newfdp, i); 1515 FILEDESC_UNLOCK(newfdp); 1516 FILEDESC_LOCK_FAST(fdp); 1517 if (newfdp->fd_freefile == -1) 1518 newfdp->fd_freefile = i; 1519 newfdp->fd_cmask = fdp->fd_cmask; 1520 FILEDESC_UNLOCK_FAST(fdp); 1521 return (newfdp); 1522 } 1523 1524 /* 1525 * Release a filedesc structure. 1526 */ 1527 void 1528 fdfree(struct thread *td) 1529 { 1530 struct filedesc *fdp; 1531 struct file **fpp; 1532 int i, locked; 1533 struct filedesc_to_leader *fdtol; 1534 struct file *fp; 1535 struct vnode *cdir, *jdir, *rdir, *vp; 1536 struct flock lf; 1537 1538 /* Certain daemons might not have file descriptors. */ 1539 fdp = td->td_proc->p_fd; 1540 if (fdp == NULL) 1541 return; 1542 1543 /* Check for special need to clear POSIX style locks */ 1544 fdtol = td->td_proc->p_fdtol; 1545 if (fdtol != NULL) { 1546 FILEDESC_LOCK(fdp); 1547 KASSERT(fdtol->fdl_refcount > 0, 1548 ("filedesc_to_refcount botch: fdl_refcount=%d", 1549 fdtol->fdl_refcount)); 1550 if (fdtol->fdl_refcount == 1 && 1551 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1552 for (i = 0, fpp = fdp->fd_ofiles; 1553 i <= fdp->fd_lastfile; 1554 i++, fpp++) { 1555 if (*fpp == NULL || 1556 (*fpp)->f_type != DTYPE_VNODE) 1557 continue; 1558 fp = *fpp; 1559 fhold(fp); 1560 FILEDESC_UNLOCK(fdp); 1561 lf.l_whence = SEEK_SET; 1562 lf.l_start = 0; 1563 lf.l_len = 0; 1564 lf.l_type = F_UNLCK; 1565 vp = fp->f_vnode; 1566 locked = VFS_LOCK_GIANT(vp->v_mount); 1567 (void) VOP_ADVLOCK(vp, 1568 (caddr_t)td->td_proc-> 1569 p_leader, 1570 F_UNLCK, 1571 &lf, 1572 F_POSIX); 1573 VFS_UNLOCK_GIANT(locked); 1574 FILEDESC_LOCK(fdp); 1575 fdrop(fp, td); 1576 fpp = fdp->fd_ofiles + i; 1577 } 1578 } 1579 retry: 1580 if (fdtol->fdl_refcount == 1) { 1581 if (fdp->fd_holdleaderscount > 0 && 1582 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1583 /* 1584 * close() or do_dup() has cleared a reference 1585 * in a shared file descriptor table. 1586 */ 1587 fdp->fd_holdleaderswakeup = 1; 1588 msleep(&fdp->fd_holdleaderscount, &fdp->fd_mtx, 1589 PLOCK, "fdlhold", 0); 1590 goto retry; 1591 } 1592 if (fdtol->fdl_holdcount > 0) { 1593 /* 1594 * Ensure that fdtol->fdl_leader 1595 * remains valid in closef(). 1596 */ 1597 fdtol->fdl_wakeup = 1; 1598 msleep(fdtol, &fdp->fd_mtx, 1599 PLOCK, "fdlhold", 0); 1600 goto retry; 1601 } 1602 } 1603 fdtol->fdl_refcount--; 1604 if (fdtol->fdl_refcount == 0 && 1605 fdtol->fdl_holdcount == 0) { 1606 fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; 1607 fdtol->fdl_prev->fdl_next = fdtol->fdl_next; 1608 } else 1609 fdtol = NULL; 1610 td->td_proc->p_fdtol = NULL; 1611 FILEDESC_UNLOCK(fdp); 1612 if (fdtol != NULL) 1613 FREE(fdtol, M_FILEDESC_TO_LEADER); 1614 } 1615 FILEDESC_LOCK_FAST(fdp); 1616 i = --fdp->fd_refcnt; 1617 FILEDESC_UNLOCK_FAST(fdp); 1618 if (i > 0) 1619 return; 1620 /* 1621 * We are the last reference to the structure, so we can 1622 * safely assume it will not change out from under us. 1623 */ 1624 fpp = fdp->fd_ofiles; 1625 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) { 1626 if (*fpp) 1627 (void) closef(*fpp, td); 1628 } 1629 FILEDESC_LOCK(fdp); 1630 1631 /* XXX This should happen earlier. */ 1632 mtx_lock(&fdesc_mtx); 1633 td->td_proc->p_fd = NULL; 1634 mtx_unlock(&fdesc_mtx); 1635 1636 if (fdp->fd_nfiles > NDFILE) 1637 FREE(fdp->fd_ofiles, M_FILEDESC); 1638 if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE)) 1639 FREE(fdp->fd_map, M_FILEDESC); 1640 1641 fdp->fd_nfiles = 0; 1642 1643 cdir = fdp->fd_cdir; 1644 fdp->fd_cdir = NULL; 1645 rdir = fdp->fd_rdir; 1646 fdp->fd_rdir = NULL; 1647 jdir = fdp->fd_jdir; 1648 fdp->fd_jdir = NULL; 1649 FILEDESC_UNLOCK(fdp); 1650 1651 if (cdir) { 1652 locked = VFS_LOCK_GIANT(cdir->v_mount); 1653 vrele(cdir); 1654 VFS_UNLOCK_GIANT(locked); 1655 } 1656 if (rdir) { 1657 locked = VFS_LOCK_GIANT(rdir->v_mount); 1658 vrele(rdir); 1659 VFS_UNLOCK_GIANT(locked); 1660 } 1661 if (jdir) { 1662 locked = VFS_LOCK_GIANT(jdir->v_mount); 1663 vrele(jdir); 1664 VFS_UNLOCK_GIANT(locked); 1665 } 1666 1667 fddrop(fdp); 1668 } 1669 1670 /* 1671 * For setugid programs, we don't want to people to use that setugidness 1672 * to generate error messages which write to a file which otherwise would 1673 * otherwise be off-limits to the process. We check for filesystems where 1674 * the vnode can change out from under us after execve (like [lin]procfs). 1675 * 1676 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is 1677 * sufficient. We also don't check for setugidness since we know we are. 1678 */ 1679 static int 1680 is_unsafe(struct file *fp) 1681 { 1682 if (fp->f_type == DTYPE_VNODE) { 1683 struct vnode *vp = fp->f_vnode; 1684 1685 if ((vp->v_vflag & VV_PROCDEP) != 0) 1686 return (1); 1687 } 1688 return (0); 1689 } 1690 1691 /* 1692 * Make this setguid thing safe, if at all possible. 1693 */ 1694 void 1695 setugidsafety(struct thread *td) 1696 { 1697 struct filedesc *fdp; 1698 int i; 1699 1700 /* Certain daemons might not have file descriptors. */ 1701 fdp = td->td_proc->p_fd; 1702 if (fdp == NULL) 1703 return; 1704 1705 /* 1706 * Note: fdp->fd_ofiles may be reallocated out from under us while 1707 * we are blocked in a close. Be careful! 1708 */ 1709 FILEDESC_LOCK(fdp); 1710 for (i = 0; i <= fdp->fd_lastfile; i++) { 1711 if (i > 2) 1712 break; 1713 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { 1714 struct file *fp; 1715 1716 knote_fdclose(td, i); 1717 /* 1718 * NULL-out descriptor prior to close to avoid 1719 * a race while close blocks. 1720 */ 1721 fp = fdp->fd_ofiles[i]; 1722 fdp->fd_ofiles[i] = NULL; 1723 fdp->fd_ofileflags[i] = 0; 1724 fdunused(fdp, i); 1725 FILEDESC_UNLOCK(fdp); 1726 (void) closef(fp, td); 1727 FILEDESC_LOCK(fdp); 1728 } 1729 } 1730 FILEDESC_UNLOCK(fdp); 1731 } 1732 1733 /* 1734 * If a specific file object occupies a specific file descriptor, 1735 * close the file descriptor entry and drop a reference on the file 1736 * object. This is a convenience function to handle a subsequent 1737 * error in a function that calls falloc() that handles the race that 1738 * another thread might have closed the file descriptor out from under 1739 * the thread creating the file object. 1740 */ 1741 void 1742 fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td) 1743 { 1744 1745 FILEDESC_LOCK(fdp); 1746 if (fdp->fd_ofiles[idx] == fp) { 1747 fdp->fd_ofiles[idx] = NULL; 1748 fdunused(fdp, idx); 1749 FILEDESC_UNLOCK(fdp); 1750 fdrop(fp, td); 1751 } else { 1752 FILEDESC_UNLOCK(fdp); 1753 } 1754 } 1755 1756 /* 1757 * Close any files on exec? 1758 */ 1759 void 1760 fdcloseexec(struct thread *td) 1761 { 1762 struct filedesc *fdp; 1763 int i; 1764 1765 /* Certain daemons might not have file descriptors. */ 1766 fdp = td->td_proc->p_fd; 1767 if (fdp == NULL) 1768 return; 1769 1770 FILEDESC_LOCK(fdp); 1771 1772 /* 1773 * We cannot cache fd_ofiles or fd_ofileflags since operations 1774 * may block and rip them out from under us. 1775 */ 1776 for (i = 0; i <= fdp->fd_lastfile; i++) { 1777 if (fdp->fd_ofiles[i] != NULL && 1778 (fdp->fd_ofiles[i]->f_type == DTYPE_MQUEUE || 1779 (fdp->fd_ofileflags[i] & UF_EXCLOSE))) { 1780 struct file *fp; 1781 1782 knote_fdclose(td, i); 1783 /* 1784 * NULL-out descriptor prior to close to avoid 1785 * a race while close blocks. 1786 */ 1787 fp = fdp->fd_ofiles[i]; 1788 fdp->fd_ofiles[i] = NULL; 1789 fdp->fd_ofileflags[i] = 0; 1790 fdunused(fdp, i); 1791 if (fp->f_type == DTYPE_MQUEUE) 1792 mq_fdclose(td, i, fp); 1793 FILEDESC_UNLOCK(fdp); 1794 (void) closef(fp, td); 1795 FILEDESC_LOCK(fdp); 1796 } 1797 } 1798 FILEDESC_UNLOCK(fdp); 1799 } 1800 1801 /* 1802 * It is unsafe for set[ug]id processes to be started with file 1803 * descriptors 0..2 closed, as these descriptors are given implicit 1804 * significance in the Standard C library. fdcheckstd() will create a 1805 * descriptor referencing /dev/null for each of stdin, stdout, and 1806 * stderr that is not already open. 1807 */ 1808 int 1809 fdcheckstd(struct thread *td) 1810 { 1811 struct nameidata nd; 1812 struct filedesc *fdp; 1813 struct file *fp; 1814 register_t retval; 1815 int fd, i, error, flags, devnull; 1816 1817 fdp = td->td_proc->p_fd; 1818 if (fdp == NULL) 1819 return (0); 1820 KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); 1821 devnull = -1; 1822 error = 0; 1823 for (i = 0; i < 3; i++) { 1824 if (fdp->fd_ofiles[i] != NULL) 1825 continue; 1826 if (devnull < 0) { 1827 int vfslocked; 1828 error = falloc(td, &fp, &fd); 1829 if (error != 0) 1830 break; 1831 /* Note extra ref on `fp' held for us by falloc(). */ 1832 KASSERT(fd == i, ("oof, we didn't get our fd")); 1833 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, 1834 "/dev/null", td); 1835 flags = FREAD | FWRITE; 1836 error = vn_open(&nd, &flags, 0, fd); 1837 if (error != 0) { 1838 /* 1839 * Someone may have closed the entry in the 1840 * file descriptor table, so check it hasn't 1841 * changed before dropping the reference count. 1842 */ 1843 FILEDESC_LOCK(fdp); 1844 KASSERT(fdp->fd_ofiles[fd] == fp, 1845 ("table not shared, how did it change?")); 1846 fdp->fd_ofiles[fd] = NULL; 1847 fdunused(fdp, fd); 1848 FILEDESC_UNLOCK(fdp); 1849 fdrop(fp, td); 1850 fdrop(fp, td); 1851 break; 1852 } 1853 vfslocked = NDHASGIANT(&nd); 1854 NDFREE(&nd, NDF_ONLY_PNBUF); 1855 fp->f_flag = flags; 1856 fp->f_vnode = nd.ni_vp; 1857 if (fp->f_data == NULL) 1858 fp->f_data = nd.ni_vp; 1859 if (fp->f_ops == &badfileops) 1860 fp->f_ops = &vnops; 1861 fp->f_type = DTYPE_VNODE; 1862 VOP_UNLOCK(nd.ni_vp, 0, td); 1863 VFS_UNLOCK_GIANT(vfslocked); 1864 devnull = fd; 1865 fdrop(fp, td); 1866 } else { 1867 error = do_dup(td, DUP_FIXED, devnull, i, &retval); 1868 if (error != 0) 1869 break; 1870 } 1871 } 1872 return (error); 1873 } 1874 1875 /* 1876 * Internal form of close. 1877 * Decrement reference count on file structure. 1878 * Note: td may be NULL when closing a file that was being passed in a 1879 * message. 1880 * 1881 * XXXRW: Giant is not required for the caller, but often will be held; this 1882 * makes it moderately likely the Giant will be recursed in the VFS case. 1883 */ 1884 int 1885 closef(struct file *fp, struct thread *td) 1886 { 1887 struct vnode *vp; 1888 struct flock lf; 1889 struct filedesc_to_leader *fdtol; 1890 struct filedesc *fdp; 1891 1892 /* 1893 * POSIX record locking dictates that any close releases ALL 1894 * locks owned by this process. This is handled by setting 1895 * a flag in the unlock to free ONLY locks obeying POSIX 1896 * semantics, and not to free BSD-style file locks. 1897 * If the descriptor was in a message, POSIX-style locks 1898 * aren't passed with the descriptor, and the thread pointer 1899 * will be NULL. Callers should be careful only to pass a 1900 * NULL thread pointer when there really is no owning 1901 * context that might have locks, or the locks will be 1902 * leaked. 1903 */ 1904 if (fp->f_type == DTYPE_VNODE && td != NULL) { 1905 int vfslocked; 1906 1907 vp = fp->f_vnode; 1908 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1909 if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1910 lf.l_whence = SEEK_SET; 1911 lf.l_start = 0; 1912 lf.l_len = 0; 1913 lf.l_type = F_UNLCK; 1914 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, 1915 F_UNLCK, &lf, F_POSIX); 1916 } 1917 fdtol = td->td_proc->p_fdtol; 1918 if (fdtol != NULL) { 1919 /* 1920 * Handle special case where file descriptor table 1921 * is shared between multiple process leaders. 1922 */ 1923 fdp = td->td_proc->p_fd; 1924 FILEDESC_LOCK(fdp); 1925 for (fdtol = fdtol->fdl_next; 1926 fdtol != td->td_proc->p_fdtol; 1927 fdtol = fdtol->fdl_next) { 1928 if ((fdtol->fdl_leader->p_flag & 1929 P_ADVLOCK) == 0) 1930 continue; 1931 fdtol->fdl_holdcount++; 1932 FILEDESC_UNLOCK(fdp); 1933 lf.l_whence = SEEK_SET; 1934 lf.l_start = 0; 1935 lf.l_len = 0; 1936 lf.l_type = F_UNLCK; 1937 vp = fp->f_vnode; 1938 (void) VOP_ADVLOCK(vp, 1939 (caddr_t)fdtol->fdl_leader, 1940 F_UNLCK, &lf, F_POSIX); 1941 FILEDESC_LOCK(fdp); 1942 fdtol->fdl_holdcount--; 1943 if (fdtol->fdl_holdcount == 0 && 1944 fdtol->fdl_wakeup != 0) { 1945 fdtol->fdl_wakeup = 0; 1946 wakeup(fdtol); 1947 } 1948 } 1949 FILEDESC_UNLOCK(fdp); 1950 } 1951 VFS_UNLOCK_GIANT(vfslocked); 1952 } 1953 return (fdrop(fp, td)); 1954 } 1955 1956 /* 1957 * Extract the file pointer associated with the specified descriptor for 1958 * the current user process. 1959 * 1960 * If the descriptor doesn't exist, EBADF is returned. 1961 * 1962 * If the descriptor exists but doesn't match 'flags' then 1963 * return EBADF for read attempts and EINVAL for write attempts. 1964 * 1965 * If 'hold' is set (non-zero) the file's refcount will be bumped on return. 1966 * It should be dropped with fdrop(). 1967 * If it is not set, then the refcount will not be bumped however the 1968 * thread's filedesc struct will be returned locked (for fgetsock). 1969 * 1970 * If an error occured the non-zero error is returned and *fpp is set to NULL. 1971 * Otherwise *fpp is set and zero is returned. 1972 */ 1973 static __inline int 1974 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold) 1975 { 1976 struct filedesc *fdp; 1977 struct file *fp; 1978 1979 *fpp = NULL; 1980 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) 1981 return (EBADF); 1982 FILEDESC_LOCK(fdp); 1983 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) { 1984 FILEDESC_UNLOCK(fdp); 1985 return (EBADF); 1986 } 1987 1988 /* 1989 * FREAD and FWRITE failure return EBADF as per POSIX. 1990 * 1991 * Only one flag, or 0, may be specified. 1992 */ 1993 if (flags == FREAD && (fp->f_flag & FREAD) == 0) { 1994 FILEDESC_UNLOCK(fdp); 1995 return (EBADF); 1996 } 1997 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) { 1998 FILEDESC_UNLOCK(fdp); 1999 return (EBADF); 2000 } 2001 if (hold) { 2002 fhold(fp); 2003 FILEDESC_UNLOCK(fdp); 2004 } 2005 *fpp = fp; 2006 return (0); 2007 } 2008 2009 int 2010 fget(struct thread *td, int fd, struct file **fpp) 2011 { 2012 2013 return(_fget(td, fd, fpp, 0, 1)); 2014 } 2015 2016 int 2017 fget_read(struct thread *td, int fd, struct file **fpp) 2018 { 2019 2020 return(_fget(td, fd, fpp, FREAD, 1)); 2021 } 2022 2023 int 2024 fget_write(struct thread *td, int fd, struct file **fpp) 2025 { 2026 2027 return(_fget(td, fd, fpp, FWRITE, 1)); 2028 } 2029 2030 /* 2031 * Like fget() but loads the underlying vnode, or returns an error if 2032 * the descriptor does not represent a vnode. Note that pipes use vnodes 2033 * but never have VM objects. The returned vnode will be vref()d. 2034 * 2035 * XXX: what about the unused flags ? 2036 */ 2037 static __inline int 2038 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags) 2039 { 2040 struct file *fp; 2041 int error; 2042 2043 *vpp = NULL; 2044 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2045 return (error); 2046 if (fp->f_vnode == NULL) { 2047 error = EINVAL; 2048 } else { 2049 *vpp = fp->f_vnode; 2050 vref(*vpp); 2051 } 2052 FILEDESC_UNLOCK(td->td_proc->p_fd); 2053 return (error); 2054 } 2055 2056 int 2057 fgetvp(struct thread *td, int fd, struct vnode **vpp) 2058 { 2059 2060 return (_fgetvp(td, fd, vpp, 0)); 2061 } 2062 2063 int 2064 fgetvp_read(struct thread *td, int fd, struct vnode **vpp) 2065 { 2066 2067 return (_fgetvp(td, fd, vpp, FREAD)); 2068 } 2069 2070 #ifdef notyet 2071 int 2072 fgetvp_write(struct thread *td, int fd, struct vnode **vpp) 2073 { 2074 2075 return (_fgetvp(td, fd, vpp, FWRITE)); 2076 } 2077 #endif 2078 2079 /* 2080 * Like fget() but loads the underlying socket, or returns an error if 2081 * the descriptor does not represent a socket. 2082 * 2083 * We bump the ref count on the returned socket. XXX Also obtain the SX 2084 * lock in the future. 2085 * 2086 * XXXRW: fgetsock() and fputsock() are deprecated, as consumers should rely 2087 * on their file descriptor reference to prevent the socket from being 2088 * freed during use. 2089 */ 2090 int 2091 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp) 2092 { 2093 struct file *fp; 2094 int error; 2095 2096 NET_ASSERT_GIANT(); 2097 2098 *spp = NULL; 2099 if (fflagp != NULL) 2100 *fflagp = 0; 2101 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2102 return (error); 2103 if (fp->f_type != DTYPE_SOCKET) { 2104 error = ENOTSOCK; 2105 } else { 2106 *spp = fp->f_data; 2107 if (fflagp) 2108 *fflagp = fp->f_flag; 2109 SOCK_LOCK(*spp); 2110 soref(*spp); 2111 SOCK_UNLOCK(*spp); 2112 } 2113 FILEDESC_UNLOCK(td->td_proc->p_fd); 2114 return (error); 2115 } 2116 2117 /* 2118 * Drop the reference count on the socket and XXX release the SX lock in the 2119 * future. The last reference closes the socket. 2120 * 2121 * XXXRW: fputsock() is deprecated, see comment for fgetsock(). 2122 */ 2123 void 2124 fputsock(struct socket *so) 2125 { 2126 2127 NET_ASSERT_GIANT(); 2128 ACCEPT_LOCK(); 2129 SOCK_LOCK(so); 2130 sorele(so); 2131 } 2132 2133 int 2134 fdrop(struct file *fp, struct thread *td) 2135 { 2136 2137 FILE_LOCK(fp); 2138 return (fdrop_locked(fp, td)); 2139 } 2140 2141 /* 2142 * Drop reference on struct file passed in, may call closef if the 2143 * reference hits zero. 2144 * Expects struct file locked, and will unlock it. 2145 */ 2146 static int 2147 fdrop_locked(struct file *fp, struct thread *td) 2148 { 2149 int error; 2150 2151 FILE_LOCK_ASSERT(fp, MA_OWNED); 2152 2153 if (--fp->f_count > 0) { 2154 FILE_UNLOCK(fp); 2155 return (0); 2156 } 2157 2158 /* 2159 * We might have just dropped the last reference to a file 2160 * object that is for a UNIX domain socket whose message 2161 * buffers are being examined in unp_gc(). If that is the 2162 * case, FWAIT will be set in f_gcflag and we need to wait for 2163 * unp_gc() to finish its scan. 2164 */ 2165 while (fp->f_gcflag & FWAIT) 2166 msleep(&fp->f_gcflag, fp->f_mtxp, 0, "fpdrop", 0); 2167 2168 /* We have the last ref so we can proceed without the file lock. */ 2169 FILE_UNLOCK(fp); 2170 if (fp->f_count < 0) 2171 panic("fdrop: count < 0"); 2172 if (fp->f_ops != &badfileops) 2173 error = fo_close(fp, td); 2174 else 2175 error = 0; 2176 2177 sx_xlock(&filelist_lock); 2178 LIST_REMOVE(fp, f_list); 2179 openfiles--; 2180 sx_xunlock(&filelist_lock); 2181 crfree(fp->f_cred); 2182 uma_zfree(file_zone, fp); 2183 2184 return (error); 2185 } 2186 2187 /* 2188 * Apply an advisory lock on a file descriptor. 2189 * 2190 * Just attempt to get a record lock of the requested type on the entire file 2191 * (l_whence = SEEK_SET, l_start = 0, l_len = 0). 2192 */ 2193 #ifndef _SYS_SYSPROTO_H_ 2194 struct flock_args { 2195 int fd; 2196 int how; 2197 }; 2198 #endif 2199 /* ARGSUSED */ 2200 int 2201 flock(struct thread *td, struct flock_args *uap) 2202 { 2203 struct file *fp; 2204 struct vnode *vp; 2205 struct flock lf; 2206 int error; 2207 2208 if ((error = fget(td, uap->fd, &fp)) != 0) 2209 return (error); 2210 if (fp->f_type != DTYPE_VNODE) { 2211 fdrop(fp, td); 2212 return (EOPNOTSUPP); 2213 } 2214 2215 mtx_lock(&Giant); 2216 vp = fp->f_vnode; 2217 lf.l_whence = SEEK_SET; 2218 lf.l_start = 0; 2219 lf.l_len = 0; 2220 if (uap->how & LOCK_UN) { 2221 lf.l_type = F_UNLCK; 2222 FILE_LOCK(fp); 2223 fp->f_flag &= ~FHASLOCK; 2224 FILE_UNLOCK(fp); 2225 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 2226 goto done2; 2227 } 2228 if (uap->how & LOCK_EX) 2229 lf.l_type = F_WRLCK; 2230 else if (uap->how & LOCK_SH) 2231 lf.l_type = F_RDLCK; 2232 else { 2233 error = EBADF; 2234 goto done2; 2235 } 2236 FILE_LOCK(fp); 2237 fp->f_flag |= FHASLOCK; 2238 FILE_UNLOCK(fp); 2239 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 2240 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); 2241 done2: 2242 fdrop(fp, td); 2243 mtx_unlock(&Giant); 2244 return (error); 2245 } 2246 /* 2247 * Duplicate the specified descriptor to a free descriptor. 2248 */ 2249 int 2250 dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error) 2251 { 2252 struct file *wfp; 2253 struct file *fp; 2254 2255 /* 2256 * If the to-be-dup'd fd number is greater than the allowed number 2257 * of file descriptors, or the fd to be dup'd has already been 2258 * closed, then reject. 2259 */ 2260 FILEDESC_LOCK(fdp); 2261 if (dfd < 0 || dfd >= fdp->fd_nfiles || 2262 (wfp = fdp->fd_ofiles[dfd]) == NULL) { 2263 FILEDESC_UNLOCK(fdp); 2264 return (EBADF); 2265 } 2266 2267 /* 2268 * There are two cases of interest here. 2269 * 2270 * For ENODEV simply dup (dfd) to file descriptor 2271 * (indx) and return. 2272 * 2273 * For ENXIO steal away the file structure from (dfd) and 2274 * store it in (indx). (dfd) is effectively closed by 2275 * this operation. 2276 * 2277 * Any other error code is just returned. 2278 */ 2279 switch (error) { 2280 case ENODEV: 2281 /* 2282 * Check that the mode the file is being opened for is a 2283 * subset of the mode of the existing descriptor. 2284 */ 2285 FILE_LOCK(wfp); 2286 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { 2287 FILE_UNLOCK(wfp); 2288 FILEDESC_UNLOCK(fdp); 2289 return (EACCES); 2290 } 2291 fp = fdp->fd_ofiles[indx]; 2292 fdp->fd_ofiles[indx] = wfp; 2293 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2294 if (fp == NULL) 2295 fdused(fdp, indx); 2296 fhold_locked(wfp); 2297 FILE_UNLOCK(wfp); 2298 FILEDESC_UNLOCK(fdp); 2299 if (fp != NULL) { 2300 /* 2301 * We now own the reference to fp that the ofiles[] 2302 * array used to own. Release it. 2303 */ 2304 FILE_LOCK(fp); 2305 fdrop_locked(fp, td); 2306 } 2307 return (0); 2308 2309 case ENXIO: 2310 /* 2311 * Steal away the file pointer from dfd and stuff it into indx. 2312 */ 2313 fp = fdp->fd_ofiles[indx]; 2314 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 2315 fdp->fd_ofiles[dfd] = NULL; 2316 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2317 fdp->fd_ofileflags[dfd] = 0; 2318 fdunused(fdp, dfd); 2319 if (fp == NULL) 2320 fdused(fdp, indx); 2321 if (fp != NULL) 2322 FILE_LOCK(fp); 2323 2324 /* 2325 * We now own the reference to fp that the ofiles[] array 2326 * used to own. Release it. 2327 */ 2328 if (fp != NULL) 2329 fdrop_locked(fp, td); 2330 2331 FILEDESC_UNLOCK(fdp); 2332 2333 return (0); 2334 2335 default: 2336 FILEDESC_UNLOCK(fdp); 2337 return (error); 2338 } 2339 /* NOTREACHED */ 2340 } 2341 2342 /* 2343 * Scan all active processes to see if any of them have a current 2344 * or root directory of `olddp'. If so, replace them with the new 2345 * mount point. 2346 */ 2347 void 2348 mountcheckdirs(struct vnode *olddp, struct vnode *newdp) 2349 { 2350 struct filedesc *fdp; 2351 struct proc *p; 2352 int nrele; 2353 2354 if (vrefcnt(olddp) == 1) 2355 return; 2356 sx_slock(&allproc_lock); 2357 FOREACH_PROC_IN_SYSTEM(p) { 2358 fdp = fdhold(p); 2359 if (fdp == NULL) 2360 continue; 2361 nrele = 0; 2362 FILEDESC_LOCK_FAST(fdp); 2363 if (fdp->fd_cdir == olddp) { 2364 vref(newdp); 2365 fdp->fd_cdir = newdp; 2366 nrele++; 2367 } 2368 if (fdp->fd_rdir == olddp) { 2369 vref(newdp); 2370 fdp->fd_rdir = newdp; 2371 nrele++; 2372 } 2373 FILEDESC_UNLOCK_FAST(fdp); 2374 fddrop(fdp); 2375 while (nrele--) 2376 vrele(olddp); 2377 } 2378 sx_sunlock(&allproc_lock); 2379 if (rootvnode == olddp) { 2380 vrele(rootvnode); 2381 vref(newdp); 2382 rootvnode = newdp; 2383 } 2384 } 2385 2386 struct filedesc_to_leader * 2387 filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader) 2388 { 2389 struct filedesc_to_leader *fdtol; 2390 2391 MALLOC(fdtol, struct filedesc_to_leader *, 2392 sizeof(struct filedesc_to_leader), 2393 M_FILEDESC_TO_LEADER, 2394 M_WAITOK); 2395 fdtol->fdl_refcount = 1; 2396 fdtol->fdl_holdcount = 0; 2397 fdtol->fdl_wakeup = 0; 2398 fdtol->fdl_leader = leader; 2399 if (old != NULL) { 2400 FILEDESC_LOCK(fdp); 2401 fdtol->fdl_next = old->fdl_next; 2402 fdtol->fdl_prev = old; 2403 old->fdl_next = fdtol; 2404 fdtol->fdl_next->fdl_prev = fdtol; 2405 FILEDESC_UNLOCK(fdp); 2406 } else { 2407 fdtol->fdl_next = fdtol; 2408 fdtol->fdl_prev = fdtol; 2409 } 2410 return (fdtol); 2411 } 2412 2413 /* 2414 * Get file structures. 2415 */ 2416 static int 2417 sysctl_kern_file(SYSCTL_HANDLER_ARGS) 2418 { 2419 struct xfile xf; 2420 struct filedesc *fdp; 2421 struct file *fp; 2422 struct proc *p; 2423 int error, n; 2424 2425 /* 2426 * Note: because the number of file descriptors is calculated 2427 * in different ways for sizing vs returning the data, 2428 * there is information leakage from the first loop. However, 2429 * it is of a similar order of magnitude to the leakage from 2430 * global system statistics such as kern.openfiles. 2431 */ 2432 error = sysctl_wire_old_buffer(req, 0); 2433 if (error != 0) 2434 return (error); 2435 if (req->oldptr == NULL) { 2436 n = 16; /* A slight overestimate. */ 2437 sx_slock(&filelist_lock); 2438 LIST_FOREACH(fp, &filehead, f_list) { 2439 /* 2440 * We should grab the lock, but this is an 2441 * estimate, so does it really matter? 2442 */ 2443 /* mtx_lock(fp->f_mtxp); */ 2444 n += fp->f_count; 2445 /* mtx_unlock(f->f_mtxp); */ 2446 } 2447 sx_sunlock(&filelist_lock); 2448 return (SYSCTL_OUT(req, 0, n * sizeof(xf))); 2449 } 2450 error = 0; 2451 bzero(&xf, sizeof(xf)); 2452 xf.xf_size = sizeof(xf); 2453 sx_slock(&allproc_lock); 2454 FOREACH_PROC_IN_SYSTEM(p) { 2455 if (p->p_state == PRS_NEW) 2456 continue; 2457 PROC_LOCK(p); 2458 if (p_cansee(req->td, p) != 0) { 2459 PROC_UNLOCK(p); 2460 continue; 2461 } 2462 xf.xf_pid = p->p_pid; 2463 xf.xf_uid = p->p_ucred->cr_uid; 2464 PROC_UNLOCK(p); 2465 fdp = fdhold(p); 2466 if (fdp == NULL) 2467 continue; 2468 FILEDESC_LOCK_FAST(fdp); 2469 for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) { 2470 if ((fp = fdp->fd_ofiles[n]) == NULL) 2471 continue; 2472 xf.xf_fd = n; 2473 xf.xf_file = fp; 2474 xf.xf_data = fp->f_data; 2475 xf.xf_vnode = fp->f_vnode; 2476 xf.xf_type = fp->f_type; 2477 xf.xf_count = fp->f_count; 2478 xf.xf_msgcount = fp->f_msgcount; 2479 xf.xf_offset = fp->f_offset; 2480 xf.xf_flag = fp->f_flag; 2481 error = SYSCTL_OUT(req, &xf, sizeof(xf)); 2482 if (error) 2483 break; 2484 } 2485 FILEDESC_UNLOCK_FAST(fdp); 2486 fddrop(fdp); 2487 if (error) 2488 break; 2489 } 2490 sx_sunlock(&allproc_lock); 2491 return (error); 2492 } 2493 2494 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, 2495 0, 0, sysctl_kern_file, "S,xfile", "Entire file table"); 2496 2497 #ifdef DDB 2498 /* 2499 * For the purposes of debugging, generate a human-readable string for the 2500 * file type. 2501 */ 2502 static const char * 2503 file_type_to_name(short type) 2504 { 2505 2506 switch (type) { 2507 case 0: 2508 return ("zero"); 2509 case DTYPE_VNODE: 2510 return ("vnod"); 2511 case DTYPE_SOCKET: 2512 return ("sock"); 2513 case DTYPE_PIPE: 2514 return ("pipe"); 2515 case DTYPE_FIFO: 2516 return ("fifo"); 2517 case DTYPE_KQUEUE: 2518 return ("kque"); 2519 case DTYPE_CRYPTO: 2520 return ("crpt"); 2521 case DTYPE_MQUEUE: 2522 return ("mque"); 2523 default: 2524 return ("unkn"); 2525 } 2526 } 2527 2528 /* 2529 * For the purposes of debugging, identify a process (if any, perhaps one of 2530 * many) that references the passed file in its file descriptor array. Return 2531 * NULL if none. 2532 */ 2533 static struct proc * 2534 file_to_first_proc(struct file *fp) 2535 { 2536 struct filedesc *fdp; 2537 struct proc *p; 2538 int n; 2539 2540 FOREACH_PROC_IN_SYSTEM(p) { 2541 if (p->p_state == PRS_NEW) 2542 continue; 2543 fdp = p->p_fd; 2544 if (fdp == NULL) 2545 continue; 2546 for (n = 0; n < fdp->fd_nfiles; n++) { 2547 if (fp == fdp->fd_ofiles[n]) 2548 return (p); 2549 } 2550 } 2551 return (NULL); 2552 } 2553 2554 static void 2555 db_print_file(struct file *fp, int header) 2556 { 2557 struct proc *p; 2558 2559 if (header) 2560 db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n", 2561 "File", "Type", "Data", "Flag", "GCFl", "Count", 2562 "MCount", "Vnode", "FPID", "FCmd"); 2563 p = file_to_first_proc(fp); 2564 db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp, 2565 file_type_to_name(fp->f_type), fp->f_data, fp->f_flag, 2566 fp->f_gcflag, fp->f_count, fp->f_msgcount, fp->f_vnode, 2567 p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-"); 2568 } 2569 2570 DB_SHOW_COMMAND(file, db_show_file) 2571 { 2572 struct file *fp; 2573 2574 if (!have_addr) { 2575 db_printf("usage: show file <addr>\n"); 2576 return; 2577 } 2578 fp = (struct file *)addr; 2579 db_print_file(fp, 1); 2580 } 2581 2582 DB_SHOW_COMMAND(files, db_show_files) 2583 { 2584 struct file *fp; 2585 int header; 2586 2587 header = 1; 2588 LIST_FOREACH(fp, &filehead, f_list) { 2589 db_print_file(fp, header); 2590 header = 0; 2591 } 2592 } 2593 #endif 2594 2595 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, 2596 &maxfilesperproc, 0, "Maximum files allowed open per process"); 2597 2598 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, 2599 &maxfiles, 0, "Maximum number of files"); 2600 2601 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, 2602 &openfiles, 0, "System-wide number of open files"); 2603 2604 /* ARGSUSED*/ 2605 static void 2606 filelistinit(void *dummy) 2607 { 2608 2609 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, 2610 NULL, NULL, UMA_ALIGN_PTR, 0); 2611 sx_init(&filelist_lock, "filelist lock"); 2612 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); 2613 mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF); 2614 } 2615 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL) 2616 2617 /*-------------------------------------------------------------------*/ 2618 2619 static int 2620 badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) 2621 { 2622 2623 return (EBADF); 2624 } 2625 2626 static int 2627 badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td) 2628 { 2629 2630 return (EBADF); 2631 } 2632 2633 static int 2634 badfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) 2635 { 2636 2637 return (0); 2638 } 2639 2640 static int 2641 badfo_kqfilter(struct file *fp, struct knote *kn) 2642 { 2643 2644 return (EBADF); 2645 } 2646 2647 static int 2648 badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td) 2649 { 2650 2651 return (EBADF); 2652 } 2653 2654 static int 2655 badfo_close(struct file *fp, struct thread *td) 2656 { 2657 2658 return (EBADF); 2659 } 2660 2661 struct fileops badfileops = { 2662 .fo_read = badfo_readwrite, 2663 .fo_write = badfo_readwrite, 2664 .fo_ioctl = badfo_ioctl, 2665 .fo_poll = badfo_poll, 2666 .fo_kqfilter = badfo_kqfilter, 2667 .fo_stat = badfo_stat, 2668 .fo_close = badfo_close, 2669 }; 2670 2671 2672 /*-------------------------------------------------------------------*/ 2673 2674 /* 2675 * File Descriptor pseudo-device driver (/dev/fd/). 2676 * 2677 * Opening minor device N dup()s the file (if any) connected to file 2678 * descriptor N belonging to the calling process. Note that this driver 2679 * consists of only the ``open()'' routine, because all subsequent 2680 * references to this file will be direct to the other driver. 2681 * 2682 * XXX: we could give this one a cloning event handler if necessary. 2683 */ 2684 2685 /* ARGSUSED */ 2686 static int 2687 fdopen(struct cdev *dev, int mode, int type, struct thread *td) 2688 { 2689 2690 /* 2691 * XXX Kludge: set curthread->td_dupfd to contain the value of the 2692 * the file descriptor being sought for duplication. The error 2693 * return ensures that the vnode for this device will be released 2694 * by vn_open. Open will detect this special error and take the 2695 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 2696 * will simply report the error. 2697 */ 2698 td->td_dupfd = dev2unit(dev); 2699 return (ENODEV); 2700 } 2701 2702 static struct cdevsw fildesc_cdevsw = { 2703 .d_version = D_VERSION, 2704 .d_flags = D_NEEDGIANT, 2705 .d_open = fdopen, 2706 .d_name = "FD", 2707 }; 2708 2709 static void 2710 fildesc_drvinit(void *unused) 2711 { 2712 struct cdev *dev; 2713 2714 dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0"); 2715 make_dev_alias(dev, "stdin"); 2716 dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1"); 2717 make_dev_alias(dev, "stdout"); 2718 dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2"); 2719 make_dev_alias(dev, "stderr"); 2720 } 2721 2722 SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL) 2723