1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_compat.h" 41 #include "opt_ddb.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 46 #include <sys/conf.h> 47 #include <sys/fcntl.h> 48 #include <sys/file.h> 49 #include <sys/filedesc.h> 50 #include <sys/filio.h> 51 #include <sys/jail.h> 52 #include <sys/kernel.h> 53 #include <sys/limits.h> 54 #include <sys/lock.h> 55 #include <sys/malloc.h> 56 #include <sys/mount.h> 57 #include <sys/mqueue.h> 58 #include <sys/mutex.h> 59 #include <sys/namei.h> 60 #include <sys/proc.h> 61 #include <sys/resourcevar.h> 62 #include <sys/signalvar.h> 63 #include <sys/socketvar.h> 64 #include <sys/stat.h> 65 #include <sys/sx.h> 66 #include <sys/syscallsubr.h> 67 #include <sys/sysctl.h> 68 #include <sys/sysproto.h> 69 #include <sys/unistd.h> 70 #include <sys/vnode.h> 71 72 #include <security/audit/audit.h> 73 74 #include <vm/uma.h> 75 76 #include <ddb/ddb.h> 77 78 static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table"); 79 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader", 80 "file desc to leader structures"); 81 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); 82 83 static uma_zone_t file_zone; 84 85 86 /* How to treat 'new' parameter when allocating a fd for do_dup(). */ 87 enum dup_type { DUP_VARIABLE, DUP_FIXED }; 88 89 static int do_dup(struct thread *td, enum dup_type type, int old, int new, 90 register_t *retval); 91 static int fd_first_free(struct filedesc *, int, int); 92 static int fd_last_used(struct filedesc *, int, int); 93 static void fdgrowtable(struct filedesc *, int); 94 static int fdrop_locked(struct file *fp, struct thread *td); 95 static void fdunused(struct filedesc *fdp, int fd); 96 static void fdused(struct filedesc *fdp, int fd); 97 98 /* 99 * A process is initially started out with NDFILE descriptors stored within 100 * this structure, selected to be enough for typical applications based on 101 * the historical limit of 20 open files (and the usage of descriptors by 102 * shells). If these descriptors are exhausted, a larger descriptor table 103 * may be allocated, up to a process' resource limit; the internal arrays 104 * are then unused. 105 */ 106 #define NDFILE 20 107 #define NDSLOTSIZE sizeof(NDSLOTTYPE) 108 #define NDENTRIES (NDSLOTSIZE * __CHAR_BIT) 109 #define NDSLOT(x) ((x) / NDENTRIES) 110 #define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES)) 111 #define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES) 112 113 /* 114 * Storage required per open file descriptor. 115 */ 116 #define OFILESIZE (sizeof(struct file *) + sizeof(char)) 117 118 /* 119 * Basic allocation of descriptors: 120 * one of the above, plus arrays for NDFILE descriptors. 121 */ 122 struct filedesc0 { 123 struct filedesc fd_fd; 124 /* 125 * These arrays are used when the number of open files is 126 * <= NDFILE, and are then pointed to by the pointers above. 127 */ 128 struct file *fd_dfiles[NDFILE]; 129 char fd_dfileflags[NDFILE]; 130 NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; 131 }; 132 133 /* 134 * Descriptor management. 135 */ 136 struct filelist filehead; /* head of list of open files */ 137 int openfiles; /* actual number of open files */ 138 struct sx filelist_lock; /* sx to protect filelist */ 139 struct mtx sigio_lock; /* mtx to protect pointers to sigio */ 140 void (*mq_fdclose)(struct thread *td, int fd, struct file *fp); 141 142 /* A mutex to protect the association between a proc and filedesc. */ 143 static struct mtx fdesc_mtx; 144 145 /* 146 * Find the first zero bit in the given bitmap, starting at low and not 147 * exceeding size - 1. 148 */ 149 static int 150 fd_first_free(struct filedesc *fdp, int low, int size) 151 { 152 NDSLOTTYPE *map = fdp->fd_map; 153 NDSLOTTYPE mask; 154 int off, maxoff; 155 156 if (low >= size) 157 return (low); 158 159 off = NDSLOT(low); 160 if (low % NDENTRIES) { 161 mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES))); 162 if ((mask &= ~map[off]) != 0UL) 163 return (off * NDENTRIES + ffsl(mask) - 1); 164 ++off; 165 } 166 for (maxoff = NDSLOTS(size); off < maxoff; ++off) 167 if (map[off] != ~0UL) 168 return (off * NDENTRIES + ffsl(~map[off]) - 1); 169 return (size); 170 } 171 172 /* 173 * Find the highest non-zero bit in the given bitmap, starting at low and 174 * not exceeding size - 1. 175 */ 176 static int 177 fd_last_used(struct filedesc *fdp, int low, int size) 178 { 179 NDSLOTTYPE *map = fdp->fd_map; 180 NDSLOTTYPE mask; 181 int off, minoff; 182 183 if (low >= size) 184 return (-1); 185 186 off = NDSLOT(size); 187 if (size % NDENTRIES) { 188 mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES)); 189 if ((mask &= map[off]) != 0) 190 return (off * NDENTRIES + flsl(mask) - 1); 191 --off; 192 } 193 for (minoff = NDSLOT(low); off >= minoff; --off) 194 if (map[off] != 0) 195 return (off * NDENTRIES + flsl(map[off]) - 1); 196 return (low - 1); 197 } 198 199 static int 200 fdisused(struct filedesc *fdp, int fd) 201 { 202 KASSERT(fd >= 0 && fd < fdp->fd_nfiles, 203 ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles)); 204 return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0); 205 } 206 207 /* 208 * Mark a file descriptor as used. 209 */ 210 static void 211 fdused(struct filedesc *fdp, int fd) 212 { 213 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 214 KASSERT(!fdisused(fdp, fd), 215 ("fd already used")); 216 fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd); 217 if (fd > fdp->fd_lastfile) 218 fdp->fd_lastfile = fd; 219 if (fd == fdp->fd_freefile) 220 fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles); 221 } 222 223 /* 224 * Mark a file descriptor as unused. 225 */ 226 static void 227 fdunused(struct filedesc *fdp, int fd) 228 { 229 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 230 KASSERT(fdisused(fdp, fd), 231 ("fd is already unused")); 232 KASSERT(fdp->fd_ofiles[fd] == NULL, 233 ("fd is still in use")); 234 fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); 235 if (fd < fdp->fd_freefile) 236 fdp->fd_freefile = fd; 237 if (fd == fdp->fd_lastfile) 238 fdp->fd_lastfile = fd_last_used(fdp, 0, fd); 239 } 240 241 /* 242 * System calls on descriptors. 243 */ 244 #ifndef _SYS_SYSPROTO_H_ 245 struct getdtablesize_args { 246 int dummy; 247 }; 248 #endif 249 /* 250 * MPSAFE 251 */ 252 /* ARGSUSED */ 253 int 254 getdtablesize(struct thread *td, struct getdtablesize_args *uap) 255 { 256 struct proc *p = td->td_proc; 257 258 PROC_LOCK(p); 259 td->td_retval[0] = 260 min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 261 PROC_UNLOCK(p); 262 return (0); 263 } 264 265 /* 266 * Duplicate a file descriptor to a particular value. 267 * 268 * note: keep in mind that a potential race condition exists when closing 269 * descriptors from a shared descriptor table (via rfork). 270 */ 271 #ifndef _SYS_SYSPROTO_H_ 272 struct dup2_args { 273 u_int from; 274 u_int to; 275 }; 276 #endif 277 /* 278 * MPSAFE 279 */ 280 /* ARGSUSED */ 281 int 282 dup2(struct thread *td, struct dup2_args *uap) 283 { 284 285 return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to, 286 td->td_retval)); 287 } 288 289 /* 290 * Duplicate a file descriptor. 291 */ 292 #ifndef _SYS_SYSPROTO_H_ 293 struct dup_args { 294 u_int fd; 295 }; 296 #endif 297 /* 298 * MPSAFE 299 */ 300 /* ARGSUSED */ 301 int 302 dup(struct thread *td, struct dup_args *uap) 303 { 304 305 return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval)); 306 } 307 308 /* 309 * The file control system call. 310 */ 311 #ifndef _SYS_SYSPROTO_H_ 312 struct fcntl_args { 313 int fd; 314 int cmd; 315 long arg; 316 }; 317 #endif 318 /* 319 * MPSAFE 320 */ 321 /* ARGSUSED */ 322 int 323 fcntl(struct thread *td, struct fcntl_args *uap) 324 { 325 struct flock fl; 326 intptr_t arg; 327 int error; 328 329 error = 0; 330 switch (uap->cmd) { 331 case F_GETLK: 332 case F_SETLK: 333 case F_SETLKW: 334 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl)); 335 arg = (intptr_t)&fl; 336 break; 337 default: 338 arg = uap->arg; 339 break; 340 } 341 if (error) 342 return (error); 343 error = kern_fcntl(td, uap->fd, uap->cmd, arg); 344 if (error) 345 return (error); 346 if (uap->cmd == F_GETLK) 347 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl)); 348 return (error); 349 } 350 351 int 352 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) 353 { 354 struct filedesc *fdp; 355 struct flock *flp; 356 struct file *fp; 357 struct proc *p; 358 char *pop; 359 struct vnode *vp; 360 u_int newmin; 361 int error, flg, tmp; 362 int giant_locked; 363 364 /* 365 * XXXRW: Some fcntl() calls require Giant -- others don't. Try to 366 * avoid grabbing Giant for calls we know don't need it. 367 */ 368 switch (cmd) { 369 case F_DUPFD: 370 case F_GETFD: 371 case F_SETFD: 372 case F_GETFL: 373 giant_locked = 0; 374 break; 375 376 default: 377 giant_locked = 1; 378 mtx_lock(&Giant); 379 } 380 381 error = 0; 382 flg = F_POSIX; 383 p = td->td_proc; 384 fdp = p->p_fd; 385 FILEDESC_LOCK(fdp); 386 if ((unsigned)fd >= fdp->fd_nfiles || 387 (fp = fdp->fd_ofiles[fd]) == NULL) { 388 FILEDESC_UNLOCK(fdp); 389 error = EBADF; 390 goto done2; 391 } 392 pop = &fdp->fd_ofileflags[fd]; 393 394 switch (cmd) { 395 case F_DUPFD: 396 /* mtx_assert(&Giant, MA_NOTOWNED); */ 397 FILEDESC_UNLOCK(fdp); 398 newmin = arg; 399 PROC_LOCK(p); 400 if (newmin >= lim_cur(p, RLIMIT_NOFILE) || 401 newmin >= maxfilesperproc) { 402 PROC_UNLOCK(p); 403 error = EINVAL; 404 break; 405 } 406 PROC_UNLOCK(p); 407 error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval); 408 break; 409 410 case F_GETFD: 411 /* mtx_assert(&Giant, MA_NOTOWNED); */ 412 td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; 413 FILEDESC_UNLOCK(fdp); 414 break; 415 416 case F_SETFD: 417 /* mtx_assert(&Giant, MA_NOTOWNED); */ 418 *pop = (*pop &~ UF_EXCLOSE) | 419 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); 420 FILEDESC_UNLOCK(fdp); 421 break; 422 423 case F_GETFL: 424 /* mtx_assert(&Giant, MA_NOTOWNED); */ 425 FILE_LOCK(fp); 426 td->td_retval[0] = OFLAGS(fp->f_flag); 427 FILE_UNLOCK(fp); 428 FILEDESC_UNLOCK(fdp); 429 break; 430 431 case F_SETFL: 432 mtx_assert(&Giant, MA_OWNED); 433 FILE_LOCK(fp); 434 fhold_locked(fp); 435 fp->f_flag &= ~FCNTLFLAGS; 436 fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; 437 FILE_UNLOCK(fp); 438 FILEDESC_UNLOCK(fdp); 439 tmp = fp->f_flag & FNONBLOCK; 440 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 441 if (error) { 442 fdrop(fp, td); 443 break; 444 } 445 tmp = fp->f_flag & FASYNC; 446 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); 447 if (error == 0) { 448 fdrop(fp, td); 449 break; 450 } 451 FILE_LOCK(fp); 452 fp->f_flag &= ~FNONBLOCK; 453 FILE_UNLOCK(fp); 454 tmp = 0; 455 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 456 fdrop(fp, td); 457 break; 458 459 case F_GETOWN: 460 mtx_assert(&Giant, MA_OWNED); 461 fhold(fp); 462 FILEDESC_UNLOCK(fdp); 463 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); 464 if (error == 0) 465 td->td_retval[0] = tmp; 466 fdrop(fp, td); 467 break; 468 469 case F_SETOWN: 470 mtx_assert(&Giant, MA_OWNED); 471 fhold(fp); 472 FILEDESC_UNLOCK(fdp); 473 tmp = arg; 474 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); 475 fdrop(fp, td); 476 break; 477 478 case F_SETLKW: 479 mtx_assert(&Giant, MA_OWNED); 480 flg |= F_WAIT; 481 /* FALLTHROUGH F_SETLK */ 482 483 case F_SETLK: 484 mtx_assert(&Giant, MA_OWNED); 485 if (fp->f_type != DTYPE_VNODE) { 486 FILEDESC_UNLOCK(fdp); 487 error = EBADF; 488 break; 489 } 490 491 flp = (struct flock *)arg; 492 if (flp->l_whence == SEEK_CUR) { 493 if (fp->f_offset < 0 || 494 (flp->l_start > 0 && 495 fp->f_offset > OFF_MAX - flp->l_start)) { 496 FILEDESC_UNLOCK(fdp); 497 error = EOVERFLOW; 498 break; 499 } 500 flp->l_start += fp->f_offset; 501 } 502 503 /* 504 * VOP_ADVLOCK() may block. 505 */ 506 fhold(fp); 507 FILEDESC_UNLOCK(fdp); 508 vp = fp->f_vnode; 509 510 switch (flp->l_type) { 511 case F_RDLCK: 512 if ((fp->f_flag & FREAD) == 0) { 513 error = EBADF; 514 break; 515 } 516 PROC_LOCK(p->p_leader); 517 p->p_leader->p_flag |= P_ADVLOCK; 518 PROC_UNLOCK(p->p_leader); 519 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 520 flp, flg); 521 break; 522 case F_WRLCK: 523 if ((fp->f_flag & FWRITE) == 0) { 524 error = EBADF; 525 break; 526 } 527 PROC_LOCK(p->p_leader); 528 p->p_leader->p_flag |= P_ADVLOCK; 529 PROC_UNLOCK(p->p_leader); 530 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 531 flp, flg); 532 break; 533 case F_UNLCK: 534 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, 535 flp, F_POSIX); 536 break; 537 default: 538 error = EINVAL; 539 break; 540 } 541 /* Check for race with close */ 542 FILEDESC_LOCK_FAST(fdp); 543 if ((unsigned) fd >= fdp->fd_nfiles || 544 fp != fdp->fd_ofiles[fd]) { 545 FILEDESC_UNLOCK_FAST(fdp); 546 flp->l_whence = SEEK_SET; 547 flp->l_start = 0; 548 flp->l_len = 0; 549 flp->l_type = F_UNLCK; 550 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 551 F_UNLCK, flp, F_POSIX); 552 } else 553 FILEDESC_UNLOCK_FAST(fdp); 554 fdrop(fp, td); 555 break; 556 557 case F_GETLK: 558 mtx_assert(&Giant, MA_OWNED); 559 if (fp->f_type != DTYPE_VNODE) { 560 FILEDESC_UNLOCK(fdp); 561 error = EBADF; 562 break; 563 } 564 flp = (struct flock *)arg; 565 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && 566 flp->l_type != F_UNLCK) { 567 FILEDESC_UNLOCK(fdp); 568 error = EINVAL; 569 break; 570 } 571 if (flp->l_whence == SEEK_CUR) { 572 if ((flp->l_start > 0 && 573 fp->f_offset > OFF_MAX - flp->l_start) || 574 (flp->l_start < 0 && 575 fp->f_offset < OFF_MIN - flp->l_start)) { 576 FILEDESC_UNLOCK(fdp); 577 error = EOVERFLOW; 578 break; 579 } 580 flp->l_start += fp->f_offset; 581 } 582 /* 583 * VOP_ADVLOCK() may block. 584 */ 585 fhold(fp); 586 FILEDESC_UNLOCK(fdp); 587 vp = fp->f_vnode; 588 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, 589 F_POSIX); 590 fdrop(fp, td); 591 break; 592 default: 593 FILEDESC_UNLOCK(fdp); 594 error = EINVAL; 595 break; 596 } 597 done2: 598 if (giant_locked) 599 mtx_unlock(&Giant); 600 return (error); 601 } 602 603 /* 604 * Common code for dup, dup2, and fcntl(F_DUPFD). 605 */ 606 static int 607 do_dup(struct thread *td, enum dup_type type, int old, int new, register_t *retval) 608 { 609 struct filedesc *fdp; 610 struct proc *p; 611 struct file *fp; 612 struct file *delfp; 613 int error, holdleaders, maxfd; 614 615 KASSERT((type == DUP_VARIABLE || type == DUP_FIXED), 616 ("invalid dup type %d", type)); 617 618 p = td->td_proc; 619 fdp = p->p_fd; 620 621 /* 622 * Verify we have a valid descriptor to dup from and possibly to 623 * dup to. 624 */ 625 if (old < 0 || new < 0) 626 return (EBADF); 627 PROC_LOCK(p); 628 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 629 PROC_UNLOCK(p); 630 if (new >= maxfd) 631 return (EMFILE); 632 633 FILEDESC_LOCK(fdp); 634 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) { 635 FILEDESC_UNLOCK(fdp); 636 return (EBADF); 637 } 638 if (type == DUP_FIXED && old == new) { 639 *retval = new; 640 FILEDESC_UNLOCK(fdp); 641 return (0); 642 } 643 fp = fdp->fd_ofiles[old]; 644 fhold(fp); 645 646 /* 647 * If the caller specified a file descriptor, make sure the file 648 * table is large enough to hold it, and grab it. Otherwise, just 649 * allocate a new descriptor the usual way. Since the filedesc 650 * lock may be temporarily dropped in the process, we have to look 651 * out for a race. 652 */ 653 if (type == DUP_FIXED) { 654 if (new >= fdp->fd_nfiles) 655 fdgrowtable(fdp, new + 1); 656 if (fdp->fd_ofiles[new] == NULL) 657 fdused(fdp, new); 658 } else { 659 if ((error = fdalloc(td, new, &new)) != 0) { 660 FILEDESC_UNLOCK(fdp); 661 fdrop(fp, td); 662 return (error); 663 } 664 } 665 666 /* 667 * If the old file changed out from under us then treat it as a 668 * bad file descriptor. Userland should do its own locking to 669 * avoid this case. 670 */ 671 if (fdp->fd_ofiles[old] != fp) { 672 /* we've allocated a descriptor which we won't use */ 673 if (fdp->fd_ofiles[new] == NULL) 674 fdunused(fdp, new); 675 FILEDESC_UNLOCK(fdp); 676 fdrop(fp, td); 677 return (EBADF); 678 } 679 KASSERT(old != new, 680 ("new fd is same as old")); 681 682 /* 683 * Save info on the descriptor being overwritten. We cannot close 684 * it without introducing an ownership race for the slot, since we 685 * need to drop the filedesc lock to call closef(). 686 * 687 * XXX this duplicates parts of close(). 688 */ 689 delfp = fdp->fd_ofiles[new]; 690 holdleaders = 0; 691 if (delfp != NULL) { 692 if (td->td_proc->p_fdtol != NULL) { 693 /* 694 * Ask fdfree() to sleep to ensure that all relevant 695 * process leaders can be traversed in closef(). 696 */ 697 fdp->fd_holdleaderscount++; 698 holdleaders = 1; 699 } 700 } 701 702 /* 703 * Duplicate the source descriptor 704 */ 705 fdp->fd_ofiles[new] = fp; 706 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; 707 if (new > fdp->fd_lastfile) 708 fdp->fd_lastfile = new; 709 *retval = new; 710 711 /* 712 * If we dup'd over a valid file, we now own the reference to it 713 * and must dispose of it using closef() semantics (as if a 714 * close() were performed on it). 715 * 716 * XXX this duplicates parts of close(). 717 */ 718 if (delfp != NULL) { 719 knote_fdclose(td, new); 720 if (delfp->f_type == DTYPE_MQUEUE) 721 mq_fdclose(td, new, delfp); 722 FILEDESC_UNLOCK(fdp); 723 (void) closef(delfp, td); 724 if (holdleaders) { 725 FILEDESC_LOCK_FAST(fdp); 726 fdp->fd_holdleaderscount--; 727 if (fdp->fd_holdleaderscount == 0 && 728 fdp->fd_holdleaderswakeup != 0) { 729 fdp->fd_holdleaderswakeup = 0; 730 wakeup(&fdp->fd_holdleaderscount); 731 } 732 FILEDESC_UNLOCK_FAST(fdp); 733 } 734 } else { 735 FILEDESC_UNLOCK(fdp); 736 } 737 return (0); 738 } 739 740 /* 741 * If sigio is on the list associated with a process or process group, 742 * disable signalling from the device, remove sigio from the list and 743 * free sigio. 744 */ 745 void 746 funsetown(struct sigio **sigiop) 747 { 748 struct sigio *sigio; 749 750 SIGIO_LOCK(); 751 sigio = *sigiop; 752 if (sigio == NULL) { 753 SIGIO_UNLOCK(); 754 return; 755 } 756 *(sigio->sio_myref) = NULL; 757 if ((sigio)->sio_pgid < 0) { 758 struct pgrp *pg = (sigio)->sio_pgrp; 759 PGRP_LOCK(pg); 760 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, 761 sigio, sio_pgsigio); 762 PGRP_UNLOCK(pg); 763 } else { 764 struct proc *p = (sigio)->sio_proc; 765 PROC_LOCK(p); 766 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, 767 sigio, sio_pgsigio); 768 PROC_UNLOCK(p); 769 } 770 SIGIO_UNLOCK(); 771 crfree(sigio->sio_ucred); 772 FREE(sigio, M_SIGIO); 773 } 774 775 /* 776 * Free a list of sigio structures. 777 * We only need to lock the SIGIO_LOCK because we have made ourselves 778 * inaccessible to callers of fsetown and therefore do not need to lock 779 * the proc or pgrp struct for the list manipulation. 780 */ 781 void 782 funsetownlst(struct sigiolst *sigiolst) 783 { 784 struct proc *p; 785 struct pgrp *pg; 786 struct sigio *sigio; 787 788 sigio = SLIST_FIRST(sigiolst); 789 if (sigio == NULL) 790 return; 791 p = NULL; 792 pg = NULL; 793 794 /* 795 * Every entry of the list should belong 796 * to a single proc or pgrp. 797 */ 798 if (sigio->sio_pgid < 0) { 799 pg = sigio->sio_pgrp; 800 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED); 801 } else /* if (sigio->sio_pgid > 0) */ { 802 p = sigio->sio_proc; 803 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 804 } 805 806 SIGIO_LOCK(); 807 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) { 808 *(sigio->sio_myref) = NULL; 809 if (pg != NULL) { 810 KASSERT(sigio->sio_pgid < 0, 811 ("Proc sigio in pgrp sigio list")); 812 KASSERT(sigio->sio_pgrp == pg, 813 ("Bogus pgrp in sigio list")); 814 PGRP_LOCK(pg); 815 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, 816 sio_pgsigio); 817 PGRP_UNLOCK(pg); 818 } else /* if (p != NULL) */ { 819 KASSERT(sigio->sio_pgid > 0, 820 ("Pgrp sigio in proc sigio list")); 821 KASSERT(sigio->sio_proc == p, 822 ("Bogus proc in sigio list")); 823 PROC_LOCK(p); 824 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, 825 sio_pgsigio); 826 PROC_UNLOCK(p); 827 } 828 SIGIO_UNLOCK(); 829 crfree(sigio->sio_ucred); 830 FREE(sigio, M_SIGIO); 831 SIGIO_LOCK(); 832 } 833 SIGIO_UNLOCK(); 834 } 835 836 /* 837 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). 838 * 839 * After permission checking, add a sigio structure to the sigio list for 840 * the process or process group. 841 */ 842 int 843 fsetown(pid_t pgid, struct sigio **sigiop) 844 { 845 struct proc *proc; 846 struct pgrp *pgrp; 847 struct sigio *sigio; 848 int ret; 849 850 if (pgid == 0) { 851 funsetown(sigiop); 852 return (0); 853 } 854 855 ret = 0; 856 857 /* Allocate and fill in the new sigio out of locks. */ 858 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK); 859 sigio->sio_pgid = pgid; 860 sigio->sio_ucred = crhold(curthread->td_ucred); 861 sigio->sio_myref = sigiop; 862 863 sx_slock(&proctree_lock); 864 if (pgid > 0) { 865 proc = pfind(pgid); 866 if (proc == NULL) { 867 ret = ESRCH; 868 goto fail; 869 } 870 871 /* 872 * Policy - Don't allow a process to FSETOWN a process 873 * in another session. 874 * 875 * Remove this test to allow maximum flexibility or 876 * restrict FSETOWN to the current process or process 877 * group for maximum safety. 878 */ 879 PROC_UNLOCK(proc); 880 if (proc->p_session != curthread->td_proc->p_session) { 881 ret = EPERM; 882 goto fail; 883 } 884 885 pgrp = NULL; 886 } else /* if (pgid < 0) */ { 887 pgrp = pgfind(-pgid); 888 if (pgrp == NULL) { 889 ret = ESRCH; 890 goto fail; 891 } 892 PGRP_UNLOCK(pgrp); 893 894 /* 895 * Policy - Don't allow a process to FSETOWN a process 896 * in another session. 897 * 898 * Remove this test to allow maximum flexibility or 899 * restrict FSETOWN to the current process or process 900 * group for maximum safety. 901 */ 902 if (pgrp->pg_session != curthread->td_proc->p_session) { 903 ret = EPERM; 904 goto fail; 905 } 906 907 proc = NULL; 908 } 909 funsetown(sigiop); 910 if (pgid > 0) { 911 PROC_LOCK(proc); 912 /* 913 * Since funsetownlst() is called without the proctree 914 * locked, we need to check for P_WEXIT. 915 * XXX: is ESRCH correct? 916 */ 917 if ((proc->p_flag & P_WEXIT) != 0) { 918 PROC_UNLOCK(proc); 919 ret = ESRCH; 920 goto fail; 921 } 922 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); 923 sigio->sio_proc = proc; 924 PROC_UNLOCK(proc); 925 } else { 926 PGRP_LOCK(pgrp); 927 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); 928 sigio->sio_pgrp = pgrp; 929 PGRP_UNLOCK(pgrp); 930 } 931 sx_sunlock(&proctree_lock); 932 SIGIO_LOCK(); 933 *sigiop = sigio; 934 SIGIO_UNLOCK(); 935 return (0); 936 937 fail: 938 sx_sunlock(&proctree_lock); 939 crfree(sigio->sio_ucred); 940 FREE(sigio, M_SIGIO); 941 return (ret); 942 } 943 944 /* 945 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). 946 */ 947 pid_t 948 fgetown(sigiop) 949 struct sigio **sigiop; 950 { 951 pid_t pgid; 952 953 SIGIO_LOCK(); 954 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0; 955 SIGIO_UNLOCK(); 956 return (pgid); 957 } 958 959 /* 960 * Close a file descriptor. 961 */ 962 #ifndef _SYS_SYSPROTO_H_ 963 struct close_args { 964 int fd; 965 }; 966 #endif 967 /* 968 * MPSAFE 969 */ 970 /* ARGSUSED */ 971 int 972 close(td, uap) 973 struct thread *td; 974 struct close_args *uap; 975 { 976 977 return (kern_close(td, uap->fd)); 978 } 979 980 int 981 kern_close(td, fd) 982 struct thread *td; 983 int fd; 984 { 985 struct filedesc *fdp; 986 struct file *fp; 987 int error; 988 int holdleaders; 989 990 error = 0; 991 holdleaders = 0; 992 fdp = td->td_proc->p_fd; 993 994 AUDIT_SYSCLOSE(td, fd); 995 996 FILEDESC_LOCK(fdp); 997 if ((unsigned)fd >= fdp->fd_nfiles || 998 (fp = fdp->fd_ofiles[fd]) == NULL) { 999 FILEDESC_UNLOCK(fdp); 1000 return (EBADF); 1001 } 1002 fdp->fd_ofiles[fd] = NULL; 1003 fdp->fd_ofileflags[fd] = 0; 1004 fdunused(fdp, fd); 1005 if (td->td_proc->p_fdtol != NULL) { 1006 /* 1007 * Ask fdfree() to sleep to ensure that all relevant 1008 * process leaders can be traversed in closef(). 1009 */ 1010 fdp->fd_holdleaderscount++; 1011 holdleaders = 1; 1012 } 1013 1014 /* 1015 * We now hold the fp reference that used to be owned by the descriptor 1016 * array. 1017 * We have to unlock the FILEDESC *AFTER* knote_fdclose to prevent a 1018 * race of the fd getting opened, a knote added, and deleteing a knote 1019 * for the new fd. 1020 */ 1021 knote_fdclose(td, fd); 1022 if (fp->f_type == DTYPE_MQUEUE) 1023 mq_fdclose(td, fd, fp); 1024 FILEDESC_UNLOCK(fdp); 1025 1026 error = closef(fp, td); 1027 if (holdleaders) { 1028 FILEDESC_LOCK_FAST(fdp); 1029 fdp->fd_holdleaderscount--; 1030 if (fdp->fd_holdleaderscount == 0 && 1031 fdp->fd_holdleaderswakeup != 0) { 1032 fdp->fd_holdleaderswakeup = 0; 1033 wakeup(&fdp->fd_holdleaderscount); 1034 } 1035 FILEDESC_UNLOCK_FAST(fdp); 1036 } 1037 return (error); 1038 } 1039 1040 #if defined(COMPAT_43) 1041 /* 1042 * Return status information about a file descriptor. 1043 */ 1044 #ifndef _SYS_SYSPROTO_H_ 1045 struct ofstat_args { 1046 int fd; 1047 struct ostat *sb; 1048 }; 1049 #endif 1050 /* 1051 * MPSAFE 1052 */ 1053 /* ARGSUSED */ 1054 int 1055 ofstat(struct thread *td, struct ofstat_args *uap) 1056 { 1057 struct ostat oub; 1058 struct stat ub; 1059 int error; 1060 1061 error = kern_fstat(td, uap->fd, &ub); 1062 if (error == 0) { 1063 cvtstat(&ub, &oub); 1064 error = copyout(&oub, uap->sb, sizeof(oub)); 1065 } 1066 return (error); 1067 } 1068 #endif /* COMPAT_43 */ 1069 1070 /* 1071 * Return status information about a file descriptor. 1072 */ 1073 #ifndef _SYS_SYSPROTO_H_ 1074 struct fstat_args { 1075 int fd; 1076 struct stat *sb; 1077 }; 1078 #endif 1079 /* 1080 * MPSAFE 1081 */ 1082 /* ARGSUSED */ 1083 int 1084 fstat(struct thread *td, struct fstat_args *uap) 1085 { 1086 struct stat ub; 1087 int error; 1088 1089 error = kern_fstat(td, uap->fd, &ub); 1090 if (error == 0) 1091 error = copyout(&ub, uap->sb, sizeof(ub)); 1092 return (error); 1093 } 1094 1095 int 1096 kern_fstat(struct thread *td, int fd, struct stat *sbp) 1097 { 1098 struct file *fp; 1099 int error; 1100 1101 AUDIT_ARG(fd, fd); 1102 1103 if ((error = fget(td, fd, &fp)) != 0) 1104 return (error); 1105 1106 AUDIT_ARG(file, td->td_proc, fp); 1107 1108 error = fo_stat(fp, sbp, td->td_ucred, td); 1109 fdrop(fp, td); 1110 return (error); 1111 } 1112 1113 /* 1114 * Return status information about a file descriptor. 1115 */ 1116 #ifndef _SYS_SYSPROTO_H_ 1117 struct nfstat_args { 1118 int fd; 1119 struct nstat *sb; 1120 }; 1121 #endif 1122 /* 1123 * MPSAFE 1124 */ 1125 /* ARGSUSED */ 1126 int 1127 nfstat(struct thread *td, struct nfstat_args *uap) 1128 { 1129 struct nstat nub; 1130 struct stat ub; 1131 int error; 1132 1133 error = kern_fstat(td, uap->fd, &ub); 1134 if (error == 0) { 1135 cvtnstat(&ub, &nub); 1136 error = copyout(&nub, uap->sb, sizeof(nub)); 1137 } 1138 return (error); 1139 } 1140 1141 /* 1142 * Return pathconf information about a file descriptor. 1143 */ 1144 #ifndef _SYS_SYSPROTO_H_ 1145 struct fpathconf_args { 1146 int fd; 1147 int name; 1148 }; 1149 #endif 1150 /* 1151 * MPSAFE 1152 */ 1153 /* ARGSUSED */ 1154 int 1155 fpathconf(struct thread *td, struct fpathconf_args *uap) 1156 { 1157 struct file *fp; 1158 struct vnode *vp; 1159 int error; 1160 1161 if ((error = fget(td, uap->fd, &fp)) != 0) 1162 return (error); 1163 1164 /* If asynchronous I/O is available, it works for all descriptors. */ 1165 if (uap->name == _PC_ASYNC_IO) { 1166 td->td_retval[0] = async_io_version; 1167 goto out; 1168 } 1169 vp = fp->f_vnode; 1170 if (vp != NULL) { 1171 int vfslocked; 1172 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1173 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1174 error = VOP_PATHCONF(vp, uap->name, td->td_retval); 1175 VOP_UNLOCK(vp, 0, td); 1176 VFS_UNLOCK_GIANT(vfslocked); 1177 } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1178 if (uap->name != _PC_PIPE_BUF) { 1179 error = EINVAL; 1180 } else { 1181 td->td_retval[0] = PIPE_BUF; 1182 error = 0; 1183 } 1184 } else { 1185 error = EOPNOTSUPP; 1186 } 1187 out: 1188 fdrop(fp, td); 1189 return (error); 1190 } 1191 1192 /* 1193 * Grow the file table to accomodate (at least) nfd descriptors. This may 1194 * block and drop the filedesc lock, but it will reacquire it before 1195 * returning. 1196 */ 1197 static void 1198 fdgrowtable(struct filedesc *fdp, int nfd) 1199 { 1200 struct file **ntable; 1201 char *nfileflags; 1202 int nnfiles, onfiles; 1203 NDSLOTTYPE *nmap; 1204 1205 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1206 1207 KASSERT(fdp->fd_nfiles > 0, 1208 ("zero-length file table")); 1209 1210 /* compute the size of the new table */ 1211 onfiles = fdp->fd_nfiles; 1212 nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */ 1213 if (nnfiles <= onfiles) 1214 /* the table is already large enough */ 1215 return; 1216 1217 /* allocate a new table and (if required) new bitmaps */ 1218 FILEDESC_UNLOCK(fdp); 1219 MALLOC(ntable, struct file **, nnfiles * OFILESIZE, 1220 M_FILEDESC, M_ZERO | M_WAITOK); 1221 nfileflags = (char *)&ntable[nnfiles]; 1222 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) 1223 MALLOC(nmap, NDSLOTTYPE *, NDSLOTS(nnfiles) * NDSLOTSIZE, 1224 M_FILEDESC, M_ZERO | M_WAITOK); 1225 else 1226 nmap = NULL; 1227 FILEDESC_LOCK(fdp); 1228 1229 /* 1230 * We now have new tables ready to go. Since we dropped the 1231 * filedesc lock to call malloc(), watch out for a race. 1232 */ 1233 onfiles = fdp->fd_nfiles; 1234 if (onfiles >= nnfiles) { 1235 /* we lost the race, but that's OK */ 1236 free(ntable, M_FILEDESC); 1237 if (nmap != NULL) 1238 free(nmap, M_FILEDESC); 1239 return; 1240 } 1241 bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable)); 1242 bcopy(fdp->fd_ofileflags, nfileflags, onfiles); 1243 if (onfiles > NDFILE) 1244 free(fdp->fd_ofiles, M_FILEDESC); 1245 fdp->fd_ofiles = ntable; 1246 fdp->fd_ofileflags = nfileflags; 1247 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) { 1248 bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap)); 1249 if (NDSLOTS(onfiles) > NDSLOTS(NDFILE)) 1250 free(fdp->fd_map, M_FILEDESC); 1251 fdp->fd_map = nmap; 1252 } 1253 fdp->fd_nfiles = nnfiles; 1254 } 1255 1256 /* 1257 * Allocate a file descriptor for the process. 1258 */ 1259 int 1260 fdalloc(struct thread *td, int minfd, int *result) 1261 { 1262 struct proc *p = td->td_proc; 1263 struct filedesc *fdp = p->p_fd; 1264 int fd = -1, maxfd; 1265 1266 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1267 1268 if (fdp->fd_freefile > minfd) 1269 minfd = fdp->fd_freefile; 1270 1271 PROC_LOCK(p); 1272 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1273 PROC_UNLOCK(p); 1274 1275 /* 1276 * Search the bitmap for a free descriptor. If none is found, try 1277 * to grow the file table. Keep at it until we either get a file 1278 * descriptor or run into process or system limits; fdgrowtable() 1279 * may drop the filedesc lock, so we're in a race. 1280 */ 1281 for (;;) { 1282 fd = fd_first_free(fdp, minfd, fdp->fd_nfiles); 1283 if (fd >= maxfd) 1284 return (EMFILE); 1285 if (fd < fdp->fd_nfiles) 1286 break; 1287 fdgrowtable(fdp, min(fdp->fd_nfiles * 2, maxfd)); 1288 } 1289 1290 /* 1291 * Perform some sanity checks, then mark the file descriptor as 1292 * used and return it to the caller. 1293 */ 1294 KASSERT(!fdisused(fdp, fd), 1295 ("fd_first_free() returned non-free descriptor")); 1296 KASSERT(fdp->fd_ofiles[fd] == NULL, 1297 ("free descriptor isn't")); 1298 fdp->fd_ofileflags[fd] = 0; /* XXX needed? */ 1299 fdused(fdp, fd); 1300 *result = fd; 1301 return (0); 1302 } 1303 1304 /* 1305 * Check to see whether n user file descriptors 1306 * are available to the process p. 1307 */ 1308 int 1309 fdavail(struct thread *td, int n) 1310 { 1311 struct proc *p = td->td_proc; 1312 struct filedesc *fdp = td->td_proc->p_fd; 1313 struct file **fpp; 1314 int i, lim, last; 1315 1316 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1317 1318 PROC_LOCK(p); 1319 lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1320 PROC_UNLOCK(p); 1321 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) 1322 return (1); 1323 last = min(fdp->fd_nfiles, lim); 1324 fpp = &fdp->fd_ofiles[fdp->fd_freefile]; 1325 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) { 1326 if (*fpp == NULL && --n <= 0) 1327 return (1); 1328 } 1329 return (0); 1330 } 1331 1332 /* 1333 * Create a new open file structure and allocate 1334 * a file decriptor for the process that refers to it. 1335 * We add one reference to the file for the descriptor table 1336 * and one reference for resultfp. This is to prevent us being 1337 * preempted and the entry in the descriptor table closed after 1338 * we release the FILEDESC lock. 1339 */ 1340 int 1341 falloc(struct thread *td, struct file **resultfp, int *resultfd) 1342 { 1343 struct proc *p = td->td_proc; 1344 struct file *fp, *fq; 1345 int error, i; 1346 int maxuserfiles = maxfiles - (maxfiles / 20); 1347 static struct timeval lastfail; 1348 static int curfail; 1349 1350 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO); 1351 sx_xlock(&filelist_lock); 1352 1353 if ((openfiles >= maxuserfiles && 1354 suser_cred(td->td_ucred, SUSER_RUID) != 0) || 1355 openfiles >= maxfiles) { 1356 if (ppsratecheck(&lastfail, &curfail, 1)) { 1357 printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n", 1358 td->td_ucred->cr_ruid); 1359 } 1360 sx_xunlock(&filelist_lock); 1361 uma_zfree(file_zone, fp); 1362 return (ENFILE); 1363 } 1364 openfiles++; 1365 1366 /* 1367 * If the process has file descriptor zero open, add the new file 1368 * descriptor to the list of open files at that point, otherwise 1369 * put it at the front of the list of open files. 1370 */ 1371 fp->f_mtxp = mtx_pool_alloc(mtxpool_sleep); 1372 fp->f_count = 1; 1373 if (resultfp) 1374 fp->f_count++; 1375 fp->f_cred = crhold(td->td_ucred); 1376 fp->f_ops = &badfileops; 1377 fp->f_data = NULL; 1378 fp->f_vnode = NULL; 1379 FILEDESC_LOCK(p->p_fd); 1380 if ((fq = p->p_fd->fd_ofiles[0])) { 1381 LIST_INSERT_AFTER(fq, fp, f_list); 1382 } else { 1383 LIST_INSERT_HEAD(&filehead, fp, f_list); 1384 } 1385 sx_xunlock(&filelist_lock); 1386 if ((error = fdalloc(td, 0, &i))) { 1387 FILEDESC_UNLOCK(p->p_fd); 1388 fdrop(fp, td); 1389 if (resultfp) 1390 fdrop(fp, td); 1391 return (error); 1392 } 1393 p->p_fd->fd_ofiles[i] = fp; 1394 FILEDESC_UNLOCK(p->p_fd); 1395 if (resultfp) 1396 *resultfp = fp; 1397 if (resultfd) 1398 *resultfd = i; 1399 return (0); 1400 } 1401 1402 /* 1403 * Build a new filedesc structure from another. 1404 * Copy the current, root, and jail root vnode references. 1405 */ 1406 struct filedesc * 1407 fdinit(struct filedesc *fdp) 1408 { 1409 struct filedesc0 *newfdp; 1410 1411 newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO); 1412 mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF); 1413 if (fdp != NULL) { 1414 FILEDESC_LOCK(fdp); 1415 newfdp->fd_fd.fd_cdir = fdp->fd_cdir; 1416 if (newfdp->fd_fd.fd_cdir) 1417 VREF(newfdp->fd_fd.fd_cdir); 1418 newfdp->fd_fd.fd_rdir = fdp->fd_rdir; 1419 if (newfdp->fd_fd.fd_rdir) 1420 VREF(newfdp->fd_fd.fd_rdir); 1421 newfdp->fd_fd.fd_jdir = fdp->fd_jdir; 1422 if (newfdp->fd_fd.fd_jdir) 1423 VREF(newfdp->fd_fd.fd_jdir); 1424 FILEDESC_UNLOCK(fdp); 1425 } 1426 1427 /* Create the file descriptor table. */ 1428 newfdp->fd_fd.fd_refcnt = 1; 1429 newfdp->fd_fd.fd_holdcnt = 1; 1430 newfdp->fd_fd.fd_cmask = CMASK; 1431 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1432 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 1433 newfdp->fd_fd.fd_nfiles = NDFILE; 1434 newfdp->fd_fd.fd_map = newfdp->fd_dmap; 1435 newfdp->fd_fd.fd_lastfile = -1; 1436 return (&newfdp->fd_fd); 1437 } 1438 1439 static struct filedesc * 1440 fdhold(struct proc *p) 1441 { 1442 struct filedesc *fdp; 1443 1444 mtx_lock(&fdesc_mtx); 1445 fdp = p->p_fd; 1446 if (fdp != NULL) 1447 fdp->fd_holdcnt++; 1448 mtx_unlock(&fdesc_mtx); 1449 return (fdp); 1450 } 1451 1452 static void 1453 fddrop(struct filedesc *fdp) 1454 { 1455 int i; 1456 1457 mtx_lock(&fdesc_mtx); 1458 i = --fdp->fd_holdcnt; 1459 mtx_unlock(&fdesc_mtx); 1460 if (i > 0) 1461 return; 1462 1463 mtx_destroy(&fdp->fd_mtx); 1464 FREE(fdp, M_FILEDESC); 1465 } 1466 1467 /* 1468 * Share a filedesc structure. 1469 */ 1470 struct filedesc * 1471 fdshare(struct filedesc *fdp) 1472 { 1473 FILEDESC_LOCK_FAST(fdp); 1474 fdp->fd_refcnt++; 1475 FILEDESC_UNLOCK_FAST(fdp); 1476 return (fdp); 1477 } 1478 1479 /* 1480 * Unshare a filedesc structure, if necessary by making a copy 1481 */ 1482 void 1483 fdunshare(struct proc *p, struct thread *td) 1484 { 1485 1486 FILEDESC_LOCK_FAST(p->p_fd); 1487 if (p->p_fd->fd_refcnt > 1) { 1488 struct filedesc *tmp; 1489 1490 FILEDESC_UNLOCK_FAST(p->p_fd); 1491 tmp = fdcopy(p->p_fd); 1492 fdfree(td); 1493 p->p_fd = tmp; 1494 } else 1495 FILEDESC_UNLOCK_FAST(p->p_fd); 1496 } 1497 1498 /* 1499 * Copy a filedesc structure. 1500 * A NULL pointer in returns a NULL reference, this is to ease callers, 1501 * not catch errors. 1502 */ 1503 struct filedesc * 1504 fdcopy(struct filedesc *fdp) 1505 { 1506 struct filedesc *newfdp; 1507 int i; 1508 1509 /* Certain daemons might not have file descriptors. */ 1510 if (fdp == NULL) 1511 return (NULL); 1512 1513 newfdp = fdinit(fdp); 1514 FILEDESC_LOCK_FAST(fdp); 1515 while (fdp->fd_lastfile >= newfdp->fd_nfiles) { 1516 FILEDESC_UNLOCK_FAST(fdp); 1517 FILEDESC_LOCK(newfdp); 1518 fdgrowtable(newfdp, fdp->fd_lastfile + 1); 1519 FILEDESC_UNLOCK(newfdp); 1520 FILEDESC_LOCK_FAST(fdp); 1521 } 1522 /* copy everything except kqueue descriptors */ 1523 newfdp->fd_freefile = -1; 1524 for (i = 0; i <= fdp->fd_lastfile; ++i) { 1525 if (fdisused(fdp, i) && 1526 fdp->fd_ofiles[i]->f_type != DTYPE_KQUEUE) { 1527 newfdp->fd_ofiles[i] = fdp->fd_ofiles[i]; 1528 newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i]; 1529 fhold(newfdp->fd_ofiles[i]); 1530 newfdp->fd_lastfile = i; 1531 } else { 1532 if (newfdp->fd_freefile == -1) 1533 newfdp->fd_freefile = i; 1534 } 1535 } 1536 FILEDESC_UNLOCK_FAST(fdp); 1537 FILEDESC_LOCK(newfdp); 1538 for (i = 0; i <= newfdp->fd_lastfile; ++i) 1539 if (newfdp->fd_ofiles[i] != NULL) 1540 fdused(newfdp, i); 1541 FILEDESC_UNLOCK(newfdp); 1542 FILEDESC_LOCK_FAST(fdp); 1543 if (newfdp->fd_freefile == -1) 1544 newfdp->fd_freefile = i; 1545 newfdp->fd_cmask = fdp->fd_cmask; 1546 FILEDESC_UNLOCK_FAST(fdp); 1547 return (newfdp); 1548 } 1549 1550 /* 1551 * Release a filedesc structure. 1552 */ 1553 void 1554 fdfree(struct thread *td) 1555 { 1556 struct filedesc *fdp; 1557 struct file **fpp; 1558 int i, locked; 1559 struct filedesc_to_leader *fdtol; 1560 struct file *fp; 1561 struct vnode *cdir, *jdir, *rdir, *vp; 1562 struct flock lf; 1563 1564 /* Certain daemons might not have file descriptors. */ 1565 fdp = td->td_proc->p_fd; 1566 if (fdp == NULL) 1567 return; 1568 1569 /* Check for special need to clear POSIX style locks */ 1570 fdtol = td->td_proc->p_fdtol; 1571 if (fdtol != NULL) { 1572 FILEDESC_LOCK(fdp); 1573 KASSERT(fdtol->fdl_refcount > 0, 1574 ("filedesc_to_refcount botch: fdl_refcount=%d", 1575 fdtol->fdl_refcount)); 1576 if (fdtol->fdl_refcount == 1 && 1577 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1578 for (i = 0, fpp = fdp->fd_ofiles; 1579 i <= fdp->fd_lastfile; 1580 i++, fpp++) { 1581 if (*fpp == NULL || 1582 (*fpp)->f_type != DTYPE_VNODE) 1583 continue; 1584 fp = *fpp; 1585 fhold(fp); 1586 FILEDESC_UNLOCK(fdp); 1587 lf.l_whence = SEEK_SET; 1588 lf.l_start = 0; 1589 lf.l_len = 0; 1590 lf.l_type = F_UNLCK; 1591 vp = fp->f_vnode; 1592 locked = VFS_LOCK_GIANT(vp->v_mount); 1593 (void) VOP_ADVLOCK(vp, 1594 (caddr_t)td->td_proc-> 1595 p_leader, 1596 F_UNLCK, 1597 &lf, 1598 F_POSIX); 1599 VFS_UNLOCK_GIANT(locked); 1600 FILEDESC_LOCK(fdp); 1601 fdrop(fp, td); 1602 fpp = fdp->fd_ofiles + i; 1603 } 1604 } 1605 retry: 1606 if (fdtol->fdl_refcount == 1) { 1607 if (fdp->fd_holdleaderscount > 0 && 1608 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1609 /* 1610 * close() or do_dup() has cleared a reference 1611 * in a shared file descriptor table. 1612 */ 1613 fdp->fd_holdleaderswakeup = 1; 1614 msleep(&fdp->fd_holdleaderscount, &fdp->fd_mtx, 1615 PLOCK, "fdlhold", 0); 1616 goto retry; 1617 } 1618 if (fdtol->fdl_holdcount > 0) { 1619 /* 1620 * Ensure that fdtol->fdl_leader 1621 * remains valid in closef(). 1622 */ 1623 fdtol->fdl_wakeup = 1; 1624 msleep(fdtol, &fdp->fd_mtx, 1625 PLOCK, "fdlhold", 0); 1626 goto retry; 1627 } 1628 } 1629 fdtol->fdl_refcount--; 1630 if (fdtol->fdl_refcount == 0 && 1631 fdtol->fdl_holdcount == 0) { 1632 fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; 1633 fdtol->fdl_prev->fdl_next = fdtol->fdl_next; 1634 } else 1635 fdtol = NULL; 1636 td->td_proc->p_fdtol = NULL; 1637 FILEDESC_UNLOCK(fdp); 1638 if (fdtol != NULL) 1639 FREE(fdtol, M_FILEDESC_TO_LEADER); 1640 } 1641 FILEDESC_LOCK_FAST(fdp); 1642 i = --fdp->fd_refcnt; 1643 FILEDESC_UNLOCK_FAST(fdp); 1644 if (i > 0) 1645 return; 1646 /* 1647 * We are the last reference to the structure, so we can 1648 * safely assume it will not change out from under us. 1649 */ 1650 fpp = fdp->fd_ofiles; 1651 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) { 1652 if (*fpp) 1653 (void) closef(*fpp, td); 1654 } 1655 FILEDESC_LOCK(fdp); 1656 1657 /* XXX This should happen earlier. */ 1658 mtx_lock(&fdesc_mtx); 1659 td->td_proc->p_fd = NULL; 1660 mtx_unlock(&fdesc_mtx); 1661 1662 if (fdp->fd_nfiles > NDFILE) 1663 FREE(fdp->fd_ofiles, M_FILEDESC); 1664 if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE)) 1665 FREE(fdp->fd_map, M_FILEDESC); 1666 1667 fdp->fd_nfiles = 0; 1668 1669 cdir = fdp->fd_cdir; 1670 fdp->fd_cdir = NULL; 1671 rdir = fdp->fd_rdir; 1672 fdp->fd_rdir = NULL; 1673 jdir = fdp->fd_jdir; 1674 fdp->fd_jdir = NULL; 1675 FILEDESC_UNLOCK(fdp); 1676 1677 if (cdir) { 1678 locked = VFS_LOCK_GIANT(cdir->v_mount); 1679 vrele(cdir); 1680 VFS_UNLOCK_GIANT(locked); 1681 } 1682 if (rdir) { 1683 locked = VFS_LOCK_GIANT(rdir->v_mount); 1684 vrele(rdir); 1685 VFS_UNLOCK_GIANT(locked); 1686 } 1687 if (jdir) { 1688 locked = VFS_LOCK_GIANT(jdir->v_mount); 1689 vrele(jdir); 1690 VFS_UNLOCK_GIANT(locked); 1691 } 1692 1693 fddrop(fdp); 1694 } 1695 1696 /* 1697 * For setugid programs, we don't want to people to use that setugidness 1698 * to generate error messages which write to a file which otherwise would 1699 * otherwise be off-limits to the process. We check for filesystems where 1700 * the vnode can change out from under us after execve (like [lin]procfs). 1701 * 1702 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is 1703 * sufficient. We also don't check for setugidness since we know we are. 1704 */ 1705 static int 1706 is_unsafe(struct file *fp) 1707 { 1708 if (fp->f_type == DTYPE_VNODE) { 1709 struct vnode *vp = fp->f_vnode; 1710 1711 if ((vp->v_vflag & VV_PROCDEP) != 0) 1712 return (1); 1713 } 1714 return (0); 1715 } 1716 1717 /* 1718 * Make this setguid thing safe, if at all possible. 1719 */ 1720 void 1721 setugidsafety(struct thread *td) 1722 { 1723 struct filedesc *fdp; 1724 int i; 1725 1726 /* Certain daemons might not have file descriptors. */ 1727 fdp = td->td_proc->p_fd; 1728 if (fdp == NULL) 1729 return; 1730 1731 /* 1732 * Note: fdp->fd_ofiles may be reallocated out from under us while 1733 * we are blocked in a close. Be careful! 1734 */ 1735 FILEDESC_LOCK(fdp); 1736 for (i = 0; i <= fdp->fd_lastfile; i++) { 1737 if (i > 2) 1738 break; 1739 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { 1740 struct file *fp; 1741 1742 knote_fdclose(td, i); 1743 /* 1744 * NULL-out descriptor prior to close to avoid 1745 * a race while close blocks. 1746 */ 1747 fp = fdp->fd_ofiles[i]; 1748 fdp->fd_ofiles[i] = NULL; 1749 fdp->fd_ofileflags[i] = 0; 1750 fdunused(fdp, i); 1751 FILEDESC_UNLOCK(fdp); 1752 (void) closef(fp, td); 1753 FILEDESC_LOCK(fdp); 1754 } 1755 } 1756 FILEDESC_UNLOCK(fdp); 1757 } 1758 1759 void 1760 fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td) 1761 { 1762 1763 FILEDESC_LOCK(fdp); 1764 if (fdp->fd_ofiles[idx] == fp) { 1765 fdp->fd_ofiles[idx] = NULL; 1766 fdunused(fdp, idx); 1767 FILEDESC_UNLOCK(fdp); 1768 fdrop(fp, td); 1769 } else { 1770 FILEDESC_UNLOCK(fdp); 1771 } 1772 } 1773 1774 /* 1775 * Close any files on exec? 1776 */ 1777 void 1778 fdcloseexec(struct thread *td) 1779 { 1780 struct filedesc *fdp; 1781 int i; 1782 1783 /* Certain daemons might not have file descriptors. */ 1784 fdp = td->td_proc->p_fd; 1785 if (fdp == NULL) 1786 return; 1787 1788 FILEDESC_LOCK(fdp); 1789 1790 /* 1791 * We cannot cache fd_ofiles or fd_ofileflags since operations 1792 * may block and rip them out from under us. 1793 */ 1794 for (i = 0; i <= fdp->fd_lastfile; i++) { 1795 if (fdp->fd_ofiles[i] != NULL && 1796 (fdp->fd_ofiles[i]->f_type == DTYPE_MQUEUE || 1797 (fdp->fd_ofileflags[i] & UF_EXCLOSE))) { 1798 struct file *fp; 1799 1800 knote_fdclose(td, i); 1801 /* 1802 * NULL-out descriptor prior to close to avoid 1803 * a race while close blocks. 1804 */ 1805 fp = fdp->fd_ofiles[i]; 1806 fdp->fd_ofiles[i] = NULL; 1807 fdp->fd_ofileflags[i] = 0; 1808 fdunused(fdp, i); 1809 if (fp->f_type == DTYPE_MQUEUE) 1810 mq_fdclose(td, i, fp); 1811 FILEDESC_UNLOCK(fdp); 1812 (void) closef(fp, td); 1813 FILEDESC_LOCK(fdp); 1814 } 1815 } 1816 FILEDESC_UNLOCK(fdp); 1817 } 1818 1819 /* 1820 * It is unsafe for set[ug]id processes to be started with file 1821 * descriptors 0..2 closed, as these descriptors are given implicit 1822 * significance in the Standard C library. fdcheckstd() will create a 1823 * descriptor referencing /dev/null for each of stdin, stdout, and 1824 * stderr that is not already open. 1825 */ 1826 int 1827 fdcheckstd(struct thread *td) 1828 { 1829 struct nameidata nd; 1830 struct filedesc *fdp; 1831 struct file *fp; 1832 register_t retval; 1833 int fd, i, error, flags, devnull; 1834 1835 fdp = td->td_proc->p_fd; 1836 if (fdp == NULL) 1837 return (0); 1838 KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); 1839 devnull = -1; 1840 error = 0; 1841 for (i = 0; i < 3; i++) { 1842 if (fdp->fd_ofiles[i] != NULL) 1843 continue; 1844 if (devnull < 0) { 1845 int vfslocked; 1846 error = falloc(td, &fp, &fd); 1847 if (error != 0) 1848 break; 1849 /* Note extra ref on `fp' held for us by falloc(). */ 1850 KASSERT(fd == i, ("oof, we didn't get our fd")); 1851 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, 1852 "/dev/null", td); 1853 flags = FREAD | FWRITE; 1854 error = vn_open(&nd, &flags, 0, fd); 1855 if (error != 0) { 1856 /* 1857 * Someone may have closed the entry in the 1858 * file descriptor table, so check it hasn't 1859 * changed before dropping the reference count. 1860 */ 1861 FILEDESC_LOCK(fdp); 1862 KASSERT(fdp->fd_ofiles[fd] == fp, 1863 ("table not shared, how did it change?")); 1864 fdp->fd_ofiles[fd] = NULL; 1865 fdunused(fdp, fd); 1866 FILEDESC_UNLOCK(fdp); 1867 fdrop(fp, td); 1868 fdrop(fp, td); 1869 break; 1870 } 1871 vfslocked = NDHASGIANT(&nd); 1872 NDFREE(&nd, NDF_ONLY_PNBUF); 1873 fp->f_flag = flags; 1874 fp->f_vnode = nd.ni_vp; 1875 if (fp->f_data == NULL) 1876 fp->f_data = nd.ni_vp; 1877 if (fp->f_ops == &badfileops) 1878 fp->f_ops = &vnops; 1879 fp->f_type = DTYPE_VNODE; 1880 VOP_UNLOCK(nd.ni_vp, 0, td); 1881 VFS_UNLOCK_GIANT(vfslocked); 1882 devnull = fd; 1883 fdrop(fp, td); 1884 } else { 1885 error = do_dup(td, DUP_FIXED, devnull, i, &retval); 1886 if (error != 0) 1887 break; 1888 } 1889 } 1890 return (error); 1891 } 1892 1893 /* 1894 * Internal form of close. 1895 * Decrement reference count on file structure. 1896 * Note: td may be NULL when closing a file that was being passed in a 1897 * message. 1898 * 1899 * XXXRW: Giant is not required for the caller, but often will be held; this 1900 * makes it moderately likely the Giant will be recursed in the VFS case. 1901 */ 1902 int 1903 closef(struct file *fp, struct thread *td) 1904 { 1905 struct vnode *vp; 1906 struct flock lf; 1907 struct filedesc_to_leader *fdtol; 1908 struct filedesc *fdp; 1909 1910 /* 1911 * POSIX record locking dictates that any close releases ALL 1912 * locks owned by this process. This is handled by setting 1913 * a flag in the unlock to free ONLY locks obeying POSIX 1914 * semantics, and not to free BSD-style file locks. 1915 * If the descriptor was in a message, POSIX-style locks 1916 * aren't passed with the descriptor, and the thread pointer 1917 * will be NULL. Callers should be careful only to pass a 1918 * NULL thread pointer when there really is no owning 1919 * context that might have locks, or the locks will be 1920 * leaked. 1921 */ 1922 if (fp->f_type == DTYPE_VNODE && td != NULL) { 1923 int vfslocked; 1924 1925 vp = fp->f_vnode; 1926 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1927 if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1928 lf.l_whence = SEEK_SET; 1929 lf.l_start = 0; 1930 lf.l_len = 0; 1931 lf.l_type = F_UNLCK; 1932 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, 1933 F_UNLCK, &lf, F_POSIX); 1934 } 1935 fdtol = td->td_proc->p_fdtol; 1936 if (fdtol != NULL) { 1937 /* 1938 * Handle special case where file descriptor table 1939 * is shared between multiple process leaders. 1940 */ 1941 fdp = td->td_proc->p_fd; 1942 FILEDESC_LOCK(fdp); 1943 for (fdtol = fdtol->fdl_next; 1944 fdtol != td->td_proc->p_fdtol; 1945 fdtol = fdtol->fdl_next) { 1946 if ((fdtol->fdl_leader->p_flag & 1947 P_ADVLOCK) == 0) 1948 continue; 1949 fdtol->fdl_holdcount++; 1950 FILEDESC_UNLOCK(fdp); 1951 lf.l_whence = SEEK_SET; 1952 lf.l_start = 0; 1953 lf.l_len = 0; 1954 lf.l_type = F_UNLCK; 1955 vp = fp->f_vnode; 1956 (void) VOP_ADVLOCK(vp, 1957 (caddr_t)fdtol->fdl_leader, 1958 F_UNLCK, &lf, F_POSIX); 1959 FILEDESC_LOCK(fdp); 1960 fdtol->fdl_holdcount--; 1961 if (fdtol->fdl_holdcount == 0 && 1962 fdtol->fdl_wakeup != 0) { 1963 fdtol->fdl_wakeup = 0; 1964 wakeup(fdtol); 1965 } 1966 } 1967 FILEDESC_UNLOCK(fdp); 1968 } 1969 VFS_UNLOCK_GIANT(vfslocked); 1970 } 1971 return (fdrop(fp, td)); 1972 } 1973 1974 /* 1975 * Extract the file pointer associated with the specified descriptor for 1976 * the current user process. 1977 * 1978 * If the descriptor doesn't exist, EBADF is returned. 1979 * 1980 * If the descriptor exists but doesn't match 'flags' then 1981 * return EBADF for read attempts and EINVAL for write attempts. 1982 * 1983 * If 'hold' is set (non-zero) the file's refcount will be bumped on return. 1984 * It should be dropped with fdrop(). 1985 * If it is not set, then the refcount will not be bumped however the 1986 * thread's filedesc struct will be returned locked (for fgetsock). 1987 * 1988 * If an error occured the non-zero error is returned and *fpp is set to NULL. 1989 * Otherwise *fpp is set and zero is returned. 1990 */ 1991 static __inline int 1992 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold) 1993 { 1994 struct filedesc *fdp; 1995 struct file *fp; 1996 1997 *fpp = NULL; 1998 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) 1999 return (EBADF); 2000 FILEDESC_LOCK(fdp); 2001 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) { 2002 FILEDESC_UNLOCK(fdp); 2003 return (EBADF); 2004 } 2005 2006 /* 2007 * FREAD and FWRITE failure return EBADF as per POSIX. 2008 * 2009 * Only one flag, or 0, may be specified. 2010 */ 2011 if (flags == FREAD && (fp->f_flag & FREAD) == 0) { 2012 FILEDESC_UNLOCK(fdp); 2013 return (EBADF); 2014 } 2015 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) { 2016 FILEDESC_UNLOCK(fdp); 2017 return (EBADF); 2018 } 2019 if (hold) { 2020 fhold(fp); 2021 FILEDESC_UNLOCK(fdp); 2022 } 2023 *fpp = fp; 2024 return (0); 2025 } 2026 2027 int 2028 fget(struct thread *td, int fd, struct file **fpp) 2029 { 2030 2031 return(_fget(td, fd, fpp, 0, 1)); 2032 } 2033 2034 int 2035 fget_read(struct thread *td, int fd, struct file **fpp) 2036 { 2037 2038 return(_fget(td, fd, fpp, FREAD, 1)); 2039 } 2040 2041 int 2042 fget_write(struct thread *td, int fd, struct file **fpp) 2043 { 2044 2045 return(_fget(td, fd, fpp, FWRITE, 1)); 2046 } 2047 2048 /* 2049 * Like fget() but loads the underlying vnode, or returns an error if 2050 * the descriptor does not represent a vnode. Note that pipes use vnodes 2051 * but never have VM objects. The returned vnode will be vref()d. 2052 * 2053 * XXX: what about the unused flags ? 2054 */ 2055 static __inline int 2056 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags) 2057 { 2058 struct file *fp; 2059 int error; 2060 2061 *vpp = NULL; 2062 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2063 return (error); 2064 if (fp->f_vnode == NULL) { 2065 error = EINVAL; 2066 } else { 2067 *vpp = fp->f_vnode; 2068 vref(*vpp); 2069 } 2070 FILEDESC_UNLOCK(td->td_proc->p_fd); 2071 return (error); 2072 } 2073 2074 int 2075 fgetvp(struct thread *td, int fd, struct vnode **vpp) 2076 { 2077 2078 return (_fgetvp(td, fd, vpp, 0)); 2079 } 2080 2081 int 2082 fgetvp_read(struct thread *td, int fd, struct vnode **vpp) 2083 { 2084 2085 return (_fgetvp(td, fd, vpp, FREAD)); 2086 } 2087 2088 #ifdef notyet 2089 int 2090 fgetvp_write(struct thread *td, int fd, struct vnode **vpp) 2091 { 2092 2093 return (_fgetvp(td, fd, vpp, FWRITE)); 2094 } 2095 #endif 2096 2097 /* 2098 * Like fget() but loads the underlying socket, or returns an error if 2099 * the descriptor does not represent a socket. 2100 * 2101 * We bump the ref count on the returned socket. XXX Also obtain the SX 2102 * lock in the future. 2103 * 2104 * XXXRW: fgetsock() and fputsock() are deprecated, as consumers should rely 2105 * on their file descriptor reference to prevent the socket from being 2106 * freed during use. 2107 */ 2108 int 2109 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp) 2110 { 2111 struct file *fp; 2112 int error; 2113 2114 NET_ASSERT_GIANT(); 2115 2116 *spp = NULL; 2117 if (fflagp != NULL) 2118 *fflagp = 0; 2119 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2120 return (error); 2121 if (fp->f_type != DTYPE_SOCKET) { 2122 error = ENOTSOCK; 2123 } else { 2124 *spp = fp->f_data; 2125 if (fflagp) 2126 *fflagp = fp->f_flag; 2127 SOCK_LOCK(*spp); 2128 soref(*spp); 2129 SOCK_UNLOCK(*spp); 2130 } 2131 FILEDESC_UNLOCK(td->td_proc->p_fd); 2132 return (error); 2133 } 2134 2135 /* 2136 * Drop the reference count on the socket and XXX release the SX lock in the 2137 * future. The last reference closes the socket. 2138 * 2139 * XXXRW: fputsock() is deprecated, see comment for fgetsock(). 2140 */ 2141 void 2142 fputsock(struct socket *so) 2143 { 2144 2145 NET_ASSERT_GIANT(); 2146 ACCEPT_LOCK(); 2147 SOCK_LOCK(so); 2148 sorele(so); 2149 } 2150 2151 int 2152 fdrop(struct file *fp, struct thread *td) 2153 { 2154 2155 FILE_LOCK(fp); 2156 return (fdrop_locked(fp, td)); 2157 } 2158 2159 /* 2160 * Drop reference on struct file passed in, may call closef if the 2161 * reference hits zero. 2162 * Expects struct file locked, and will unlock it. 2163 */ 2164 static int 2165 fdrop_locked(struct file *fp, struct thread *td) 2166 { 2167 int error; 2168 2169 FILE_LOCK_ASSERT(fp, MA_OWNED); 2170 2171 if (--fp->f_count > 0) { 2172 FILE_UNLOCK(fp); 2173 return (0); 2174 } 2175 /* We have the last ref so we can proceed without the file lock. */ 2176 FILE_UNLOCK(fp); 2177 if (fp->f_count < 0) 2178 panic("fdrop: count < 0"); 2179 if (fp->f_ops != &badfileops) 2180 error = fo_close(fp, td); 2181 else 2182 error = 0; 2183 2184 sx_xlock(&filelist_lock); 2185 LIST_REMOVE(fp, f_list); 2186 openfiles--; 2187 sx_xunlock(&filelist_lock); 2188 crfree(fp->f_cred); 2189 uma_zfree(file_zone, fp); 2190 2191 return (error); 2192 } 2193 2194 /* 2195 * Apply an advisory lock on a file descriptor. 2196 * 2197 * Just attempt to get a record lock of the requested type on 2198 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 2199 */ 2200 #ifndef _SYS_SYSPROTO_H_ 2201 struct flock_args { 2202 int fd; 2203 int how; 2204 }; 2205 #endif 2206 /* 2207 * MPSAFE 2208 */ 2209 /* ARGSUSED */ 2210 int 2211 flock(struct thread *td, struct flock_args *uap) 2212 { 2213 struct file *fp; 2214 struct vnode *vp; 2215 struct flock lf; 2216 int error; 2217 2218 if ((error = fget(td, uap->fd, &fp)) != 0) 2219 return (error); 2220 if (fp->f_type != DTYPE_VNODE) { 2221 fdrop(fp, td); 2222 return (EOPNOTSUPP); 2223 } 2224 2225 mtx_lock(&Giant); 2226 vp = fp->f_vnode; 2227 lf.l_whence = SEEK_SET; 2228 lf.l_start = 0; 2229 lf.l_len = 0; 2230 if (uap->how & LOCK_UN) { 2231 lf.l_type = F_UNLCK; 2232 FILE_LOCK(fp); 2233 fp->f_flag &= ~FHASLOCK; 2234 FILE_UNLOCK(fp); 2235 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 2236 goto done2; 2237 } 2238 if (uap->how & LOCK_EX) 2239 lf.l_type = F_WRLCK; 2240 else if (uap->how & LOCK_SH) 2241 lf.l_type = F_RDLCK; 2242 else { 2243 error = EBADF; 2244 goto done2; 2245 } 2246 FILE_LOCK(fp); 2247 fp->f_flag |= FHASLOCK; 2248 FILE_UNLOCK(fp); 2249 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 2250 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); 2251 done2: 2252 fdrop(fp, td); 2253 mtx_unlock(&Giant); 2254 return (error); 2255 } 2256 /* 2257 * Duplicate the specified descriptor to a free descriptor. 2258 */ 2259 int 2260 dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error) 2261 { 2262 struct file *wfp; 2263 struct file *fp; 2264 2265 /* 2266 * If the to-be-dup'd fd number is greater than the allowed number 2267 * of file descriptors, or the fd to be dup'd has already been 2268 * closed, then reject. 2269 */ 2270 FILEDESC_LOCK(fdp); 2271 if (dfd < 0 || dfd >= fdp->fd_nfiles || 2272 (wfp = fdp->fd_ofiles[dfd]) == NULL) { 2273 FILEDESC_UNLOCK(fdp); 2274 return (EBADF); 2275 } 2276 2277 /* 2278 * There are two cases of interest here. 2279 * 2280 * For ENODEV simply dup (dfd) to file descriptor 2281 * (indx) and return. 2282 * 2283 * For ENXIO steal away the file structure from (dfd) and 2284 * store it in (indx). (dfd) is effectively closed by 2285 * this operation. 2286 * 2287 * Any other error code is just returned. 2288 */ 2289 switch (error) { 2290 case ENODEV: 2291 /* 2292 * Check that the mode the file is being opened for is a 2293 * subset of the mode of the existing descriptor. 2294 */ 2295 FILE_LOCK(wfp); 2296 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { 2297 FILE_UNLOCK(wfp); 2298 FILEDESC_UNLOCK(fdp); 2299 return (EACCES); 2300 } 2301 fp = fdp->fd_ofiles[indx]; 2302 fdp->fd_ofiles[indx] = wfp; 2303 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2304 if (fp == NULL) 2305 fdused(fdp, indx); 2306 fhold_locked(wfp); 2307 FILE_UNLOCK(wfp); 2308 FILEDESC_UNLOCK(fdp); 2309 if (fp != NULL) { 2310 /* 2311 * We now own the reference to fp that the ofiles[] 2312 * array used to own. Release it. 2313 */ 2314 FILE_LOCK(fp); 2315 fdrop_locked(fp, td); 2316 } 2317 return (0); 2318 2319 case ENXIO: 2320 /* 2321 * Steal away the file pointer from dfd and stuff it into indx. 2322 */ 2323 fp = fdp->fd_ofiles[indx]; 2324 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 2325 fdp->fd_ofiles[dfd] = NULL; 2326 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2327 fdp->fd_ofileflags[dfd] = 0; 2328 fdunused(fdp, dfd); 2329 if (fp == NULL) 2330 fdused(fdp, indx); 2331 if (fp != NULL) 2332 FILE_LOCK(fp); 2333 2334 /* 2335 * We now own the reference to fp that the ofiles[] array 2336 * used to own. Release it. 2337 */ 2338 if (fp != NULL) 2339 fdrop_locked(fp, td); 2340 2341 FILEDESC_UNLOCK(fdp); 2342 2343 return (0); 2344 2345 default: 2346 FILEDESC_UNLOCK(fdp); 2347 return (error); 2348 } 2349 /* NOTREACHED */ 2350 } 2351 2352 /* 2353 * Scan all active processes to see if any of them have a current 2354 * or root directory of `olddp'. If so, replace them with the new 2355 * mount point. 2356 */ 2357 void 2358 mountcheckdirs(struct vnode *olddp, struct vnode *newdp) 2359 { 2360 struct filedesc *fdp; 2361 struct proc *p; 2362 int nrele; 2363 2364 if (vrefcnt(olddp) == 1) 2365 return; 2366 sx_slock(&allproc_lock); 2367 LIST_FOREACH(p, &allproc, p_list) { 2368 fdp = fdhold(p); 2369 if (fdp == NULL) 2370 continue; 2371 nrele = 0; 2372 FILEDESC_LOCK_FAST(fdp); 2373 if (fdp->fd_cdir == olddp) { 2374 vref(newdp); 2375 fdp->fd_cdir = newdp; 2376 nrele++; 2377 } 2378 if (fdp->fd_rdir == olddp) { 2379 vref(newdp); 2380 fdp->fd_rdir = newdp; 2381 nrele++; 2382 } 2383 FILEDESC_UNLOCK_FAST(fdp); 2384 fddrop(fdp); 2385 while (nrele--) 2386 vrele(olddp); 2387 } 2388 sx_sunlock(&allproc_lock); 2389 if (rootvnode == olddp) { 2390 vrele(rootvnode); 2391 vref(newdp); 2392 rootvnode = newdp; 2393 } 2394 } 2395 2396 struct filedesc_to_leader * 2397 filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader) 2398 { 2399 struct filedesc_to_leader *fdtol; 2400 2401 MALLOC(fdtol, struct filedesc_to_leader *, 2402 sizeof(struct filedesc_to_leader), 2403 M_FILEDESC_TO_LEADER, 2404 M_WAITOK); 2405 fdtol->fdl_refcount = 1; 2406 fdtol->fdl_holdcount = 0; 2407 fdtol->fdl_wakeup = 0; 2408 fdtol->fdl_leader = leader; 2409 if (old != NULL) { 2410 FILEDESC_LOCK(fdp); 2411 fdtol->fdl_next = old->fdl_next; 2412 fdtol->fdl_prev = old; 2413 old->fdl_next = fdtol; 2414 fdtol->fdl_next->fdl_prev = fdtol; 2415 FILEDESC_UNLOCK(fdp); 2416 } else { 2417 fdtol->fdl_next = fdtol; 2418 fdtol->fdl_prev = fdtol; 2419 } 2420 return (fdtol); 2421 } 2422 2423 /* 2424 * Get file structures. 2425 */ 2426 static int 2427 sysctl_kern_file(SYSCTL_HANDLER_ARGS) 2428 { 2429 struct xfile xf; 2430 struct filedesc *fdp; 2431 struct file *fp; 2432 struct proc *p; 2433 int error, n; 2434 2435 /* 2436 * Note: because the number of file descriptors is calculated 2437 * in different ways for sizing vs returning the data, 2438 * there is information leakage from the first loop. However, 2439 * it is of a similar order of magnitude to the leakage from 2440 * global system statistics such as kern.openfiles. 2441 */ 2442 error = sysctl_wire_old_buffer(req, 0); 2443 if (error != 0) 2444 return (error); 2445 if (req->oldptr == NULL) { 2446 n = 16; /* A slight overestimate. */ 2447 sx_slock(&filelist_lock); 2448 LIST_FOREACH(fp, &filehead, f_list) { 2449 /* 2450 * We should grab the lock, but this is an 2451 * estimate, so does it really matter? 2452 */ 2453 /* mtx_lock(fp->f_mtxp); */ 2454 n += fp->f_count; 2455 /* mtx_unlock(f->f_mtxp); */ 2456 } 2457 sx_sunlock(&filelist_lock); 2458 return (SYSCTL_OUT(req, 0, n * sizeof(xf))); 2459 } 2460 error = 0; 2461 bzero(&xf, sizeof(xf)); 2462 xf.xf_size = sizeof(xf); 2463 sx_slock(&allproc_lock); 2464 LIST_FOREACH(p, &allproc, p_list) { 2465 if (p->p_state == PRS_NEW) 2466 continue; 2467 PROC_LOCK(p); 2468 if (p_cansee(req->td, p) != 0) { 2469 PROC_UNLOCK(p); 2470 continue; 2471 } 2472 xf.xf_pid = p->p_pid; 2473 xf.xf_uid = p->p_ucred->cr_uid; 2474 PROC_UNLOCK(p); 2475 fdp = fdhold(p); 2476 if (fdp == NULL) 2477 continue; 2478 FILEDESC_LOCK_FAST(fdp); 2479 for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) { 2480 if ((fp = fdp->fd_ofiles[n]) == NULL) 2481 continue; 2482 xf.xf_fd = n; 2483 xf.xf_file = fp; 2484 xf.xf_data = fp->f_data; 2485 xf.xf_vnode = fp->f_vnode; 2486 xf.xf_type = fp->f_type; 2487 xf.xf_count = fp->f_count; 2488 xf.xf_msgcount = fp->f_msgcount; 2489 xf.xf_offset = fp->f_offset; 2490 xf.xf_flag = fp->f_flag; 2491 error = SYSCTL_OUT(req, &xf, sizeof(xf)); 2492 if (error) 2493 break; 2494 } 2495 FILEDESC_UNLOCK_FAST(fdp); 2496 fddrop(fdp); 2497 if (error) 2498 break; 2499 } 2500 sx_sunlock(&allproc_lock); 2501 return (error); 2502 } 2503 2504 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, 2505 0, 0, sysctl_kern_file, "S,xfile", "Entire file table"); 2506 2507 #ifdef DDB 2508 /* 2509 * For the purposes of debugging, generate a human-readable string for the 2510 * file type. 2511 */ 2512 static const char * 2513 file_type_to_name(short type) 2514 { 2515 2516 switch (type) { 2517 case 0: 2518 return ("zero"); 2519 case DTYPE_VNODE: 2520 return ("vnod"); 2521 case DTYPE_SOCKET: 2522 return ("sock"); 2523 case DTYPE_PIPE: 2524 return ("pipe"); 2525 case DTYPE_FIFO: 2526 return ("fifo"); 2527 case DTYPE_CRYPTO: 2528 return ("crpt"); 2529 default: 2530 return ("unkn"); 2531 } 2532 } 2533 2534 /* 2535 * For the purposes of debugging, identify a process (if any, perhaps one of 2536 * many) that references the passed file in its file descriptor array. Return 2537 * NULL if none. 2538 */ 2539 static struct proc * 2540 file_to_first_proc(struct file *fp) 2541 { 2542 struct filedesc *fdp; 2543 struct proc *p; 2544 int n; 2545 2546 LIST_FOREACH(p, &allproc, p_list) { 2547 if (p->p_state == PRS_NEW) 2548 continue; 2549 fdp = p->p_fd; 2550 if (fdp == NULL) 2551 continue; 2552 for (n = 0; n < fdp->fd_nfiles; n++) { 2553 if (fp == fdp->fd_ofiles[n]) 2554 return (p); 2555 } 2556 } 2557 return (NULL); 2558 } 2559 2560 DB_SHOW_COMMAND(files, db_show_files) 2561 { 2562 struct file *fp; 2563 struct proc *p; 2564 2565 db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n", "File", 2566 "Type", "Data", "Flag", "GCFl", "Count", "MCount", "Vnode", 2567 "FPID", "FCmd"); 2568 LIST_FOREACH(fp, &filehead, f_list) { 2569 p = file_to_first_proc(fp); 2570 db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp, 2571 file_type_to_name(fp->f_type), fp->f_data, fp->f_flag, 2572 fp->f_gcflag, fp->f_count, fp->f_msgcount, fp->f_vnode, 2573 p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-"); 2574 } 2575 } 2576 #endif 2577 2578 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, 2579 &maxfilesperproc, 0, "Maximum files allowed open per process"); 2580 2581 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, 2582 &maxfiles, 0, "Maximum number of files"); 2583 2584 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, 2585 &openfiles, 0, "System-wide number of open files"); 2586 2587 /* ARGSUSED*/ 2588 static void 2589 filelistinit(void *dummy) 2590 { 2591 2592 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, 2593 NULL, NULL, UMA_ALIGN_PTR, 0); 2594 sx_init(&filelist_lock, "filelist lock"); 2595 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); 2596 mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF); 2597 } 2598 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL) 2599 2600 /*-------------------------------------------------------------------*/ 2601 2602 static int 2603 badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) 2604 { 2605 2606 return (EBADF); 2607 } 2608 2609 static int 2610 badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td) 2611 { 2612 2613 return (EBADF); 2614 } 2615 2616 static int 2617 badfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) 2618 { 2619 2620 return (0); 2621 } 2622 2623 static int 2624 badfo_kqfilter(struct file *fp, struct knote *kn) 2625 { 2626 2627 return (0); 2628 } 2629 2630 static int 2631 badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td) 2632 { 2633 2634 return (EBADF); 2635 } 2636 2637 static int 2638 badfo_close(struct file *fp, struct thread *td) 2639 { 2640 2641 return (EBADF); 2642 } 2643 2644 struct fileops badfileops = { 2645 .fo_read = badfo_readwrite, 2646 .fo_write = badfo_readwrite, 2647 .fo_ioctl = badfo_ioctl, 2648 .fo_poll = badfo_poll, 2649 .fo_kqfilter = badfo_kqfilter, 2650 .fo_stat = badfo_stat, 2651 .fo_close = badfo_close, 2652 }; 2653 2654 2655 /*-------------------------------------------------------------------*/ 2656 2657 /* 2658 * File Descriptor pseudo-device driver (/dev/fd/). 2659 * 2660 * Opening minor device N dup()s the file (if any) connected to file 2661 * descriptor N belonging to the calling process. Note that this driver 2662 * consists of only the ``open()'' routine, because all subsequent 2663 * references to this file will be direct to the other driver. 2664 * 2665 * XXX: we could give this one a cloning event handler if necessary. 2666 */ 2667 2668 /* ARGSUSED */ 2669 static int 2670 fdopen(struct cdev *dev, int mode, int type, struct thread *td) 2671 { 2672 2673 /* 2674 * XXX Kludge: set curthread->td_dupfd to contain the value of the 2675 * the file descriptor being sought for duplication. The error 2676 * return ensures that the vnode for this device will be released 2677 * by vn_open. Open will detect this special error and take the 2678 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 2679 * will simply report the error. 2680 */ 2681 td->td_dupfd = dev2unit(dev); 2682 return (ENODEV); 2683 } 2684 2685 static struct cdevsw fildesc_cdevsw = { 2686 .d_version = D_VERSION, 2687 .d_flags = D_NEEDGIANT, 2688 .d_open = fdopen, 2689 .d_name = "FD", 2690 }; 2691 2692 static void 2693 fildesc_drvinit(void *unused) 2694 { 2695 struct cdev *dev; 2696 2697 dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0"); 2698 make_dev_alias(dev, "stdin"); 2699 dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1"); 2700 make_dev_alias(dev, "stdout"); 2701 dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2"); 2702 make_dev_alias(dev, "stderr"); 2703 } 2704 2705 SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL) 2706