1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_compat.h" 41 #include "opt_ddb.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 46 #include <sys/conf.h> 47 #include <sys/fcntl.h> 48 #include <sys/file.h> 49 #include <sys/filedesc.h> 50 #include <sys/filio.h> 51 #include <sys/jail.h> 52 #include <sys/kernel.h> 53 #include <sys/limits.h> 54 #include <sys/lock.h> 55 #include <sys/malloc.h> 56 #include <sys/mount.h> 57 #include <sys/mqueue.h> 58 #include <sys/mutex.h> 59 #include <sys/namei.h> 60 #include <sys/proc.h> 61 #include <sys/resourcevar.h> 62 #include <sys/signalvar.h> 63 #include <sys/socketvar.h> 64 #include <sys/stat.h> 65 #include <sys/sx.h> 66 #include <sys/syscallsubr.h> 67 #include <sys/sysctl.h> 68 #include <sys/sysproto.h> 69 #include <sys/unistd.h> 70 #include <sys/vnode.h> 71 72 #include <vm/uma.h> 73 74 #include <ddb/ddb.h> 75 76 static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table"); 77 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader", 78 "file desc to leader structures"); 79 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); 80 81 static uma_zone_t file_zone; 82 83 84 /* How to treat 'new' parameter when allocating a fd for do_dup(). */ 85 enum dup_type { DUP_VARIABLE, DUP_FIXED }; 86 87 static int do_dup(struct thread *td, enum dup_type type, int old, int new, 88 register_t *retval); 89 static int fd_first_free(struct filedesc *, int, int); 90 static int fd_last_used(struct filedesc *, int, int); 91 static void fdgrowtable(struct filedesc *, int); 92 static int fdrop_locked(struct file *fp, struct thread *td); 93 static void fdunused(struct filedesc *fdp, int fd); 94 static void fdused(struct filedesc *fdp, int fd); 95 96 /* 97 * A process is initially started out with NDFILE descriptors stored within 98 * this structure, selected to be enough for typical applications based on 99 * the historical limit of 20 open files (and the usage of descriptors by 100 * shells). If these descriptors are exhausted, a larger descriptor table 101 * may be allocated, up to a process' resource limit; the internal arrays 102 * are then unused. 103 */ 104 #define NDFILE 20 105 #define NDSLOTSIZE sizeof(NDSLOTTYPE) 106 #define NDENTRIES (NDSLOTSIZE * __CHAR_BIT) 107 #define NDSLOT(x) ((x) / NDENTRIES) 108 #define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES)) 109 #define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES) 110 111 /* 112 * Storage required per open file descriptor. 113 */ 114 #define OFILESIZE (sizeof(struct file *) + sizeof(char)) 115 116 /* 117 * Basic allocation of descriptors: 118 * one of the above, plus arrays for NDFILE descriptors. 119 */ 120 struct filedesc0 { 121 struct filedesc fd_fd; 122 /* 123 * These arrays are used when the number of open files is 124 * <= NDFILE, and are then pointed to by the pointers above. 125 */ 126 struct file *fd_dfiles[NDFILE]; 127 char fd_dfileflags[NDFILE]; 128 NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; 129 }; 130 131 /* 132 * Descriptor management. 133 */ 134 struct filelist filehead; /* head of list of open files */ 135 int openfiles; /* actual number of open files */ 136 struct sx filelist_lock; /* sx to protect filelist */ 137 struct mtx sigio_lock; /* mtx to protect pointers to sigio */ 138 void (*mq_fdclose)(struct thread *td, int fd, struct file *fp); 139 140 /* A mutex to protect the association between a proc and filedesc. */ 141 static struct mtx fdesc_mtx; 142 143 /* 144 * Find the first zero bit in the given bitmap, starting at low and not 145 * exceeding size - 1. 146 */ 147 static int 148 fd_first_free(struct filedesc *fdp, int low, int size) 149 { 150 NDSLOTTYPE *map = fdp->fd_map; 151 NDSLOTTYPE mask; 152 int off, maxoff; 153 154 if (low >= size) 155 return (low); 156 157 off = NDSLOT(low); 158 if (low % NDENTRIES) { 159 mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES))); 160 if ((mask &= ~map[off]) != 0UL) 161 return (off * NDENTRIES + ffsl(mask) - 1); 162 ++off; 163 } 164 for (maxoff = NDSLOTS(size); off < maxoff; ++off) 165 if (map[off] != ~0UL) 166 return (off * NDENTRIES + ffsl(~map[off]) - 1); 167 return (size); 168 } 169 170 /* 171 * Find the highest non-zero bit in the given bitmap, starting at low and 172 * not exceeding size - 1. 173 */ 174 static int 175 fd_last_used(struct filedesc *fdp, int low, int size) 176 { 177 NDSLOTTYPE *map = fdp->fd_map; 178 NDSLOTTYPE mask; 179 int off, minoff; 180 181 if (low >= size) 182 return (-1); 183 184 off = NDSLOT(size); 185 if (size % NDENTRIES) { 186 mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES)); 187 if ((mask &= map[off]) != 0) 188 return (off * NDENTRIES + flsl(mask) - 1); 189 --off; 190 } 191 for (minoff = NDSLOT(low); off >= minoff; --off) 192 if (map[off] != 0) 193 return (off * NDENTRIES + flsl(map[off]) - 1); 194 return (size - 1); 195 } 196 197 static int 198 fdisused(struct filedesc *fdp, int fd) 199 { 200 KASSERT(fd >= 0 && fd < fdp->fd_nfiles, 201 ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles)); 202 return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0); 203 } 204 205 /* 206 * Mark a file descriptor as used. 207 */ 208 static void 209 fdused(struct filedesc *fdp, int fd) 210 { 211 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 212 KASSERT(!fdisused(fdp, fd), 213 ("fd already used")); 214 fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd); 215 if (fd > fdp->fd_lastfile) 216 fdp->fd_lastfile = fd; 217 if (fd == fdp->fd_freefile) 218 fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles); 219 } 220 221 /* 222 * Mark a file descriptor as unused. 223 */ 224 static void 225 fdunused(struct filedesc *fdp, int fd) 226 { 227 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 228 KASSERT(fdisused(fdp, fd), 229 ("fd is already unused")); 230 KASSERT(fdp->fd_ofiles[fd] == NULL, 231 ("fd is still in use")); 232 fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); 233 if (fd < fdp->fd_freefile) 234 fdp->fd_freefile = fd; 235 if (fd == fdp->fd_lastfile) 236 fdp->fd_lastfile = fd_last_used(fdp, 0, fd); 237 } 238 239 /* 240 * System calls on descriptors. 241 */ 242 #ifndef _SYS_SYSPROTO_H_ 243 struct getdtablesize_args { 244 int dummy; 245 }; 246 #endif 247 /* 248 * MPSAFE 249 */ 250 /* ARGSUSED */ 251 int 252 getdtablesize(struct thread *td, struct getdtablesize_args *uap) 253 { 254 struct proc *p = td->td_proc; 255 256 PROC_LOCK(p); 257 td->td_retval[0] = 258 min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 259 PROC_UNLOCK(p); 260 return (0); 261 } 262 263 /* 264 * Duplicate a file descriptor to a particular value. 265 * 266 * note: keep in mind that a potential race condition exists when closing 267 * descriptors from a shared descriptor table (via rfork). 268 */ 269 #ifndef _SYS_SYSPROTO_H_ 270 struct dup2_args { 271 u_int from; 272 u_int to; 273 }; 274 #endif 275 /* 276 * MPSAFE 277 */ 278 /* ARGSUSED */ 279 int 280 dup2(struct thread *td, struct dup2_args *uap) 281 { 282 283 return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to, 284 td->td_retval)); 285 } 286 287 /* 288 * Duplicate a file descriptor. 289 */ 290 #ifndef _SYS_SYSPROTO_H_ 291 struct dup_args { 292 u_int fd; 293 }; 294 #endif 295 /* 296 * MPSAFE 297 */ 298 /* ARGSUSED */ 299 int 300 dup(struct thread *td, struct dup_args *uap) 301 { 302 303 return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval)); 304 } 305 306 /* 307 * The file control system call. 308 */ 309 #ifndef _SYS_SYSPROTO_H_ 310 struct fcntl_args { 311 int fd; 312 int cmd; 313 long arg; 314 }; 315 #endif 316 /* 317 * MPSAFE 318 */ 319 /* ARGSUSED */ 320 int 321 fcntl(struct thread *td, struct fcntl_args *uap) 322 { 323 struct flock fl; 324 intptr_t arg; 325 int error; 326 327 error = 0; 328 switch (uap->cmd) { 329 case F_GETLK: 330 case F_SETLK: 331 case F_SETLKW: 332 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl)); 333 arg = (intptr_t)&fl; 334 break; 335 default: 336 arg = uap->arg; 337 break; 338 } 339 if (error) 340 return (error); 341 error = kern_fcntl(td, uap->fd, uap->cmd, arg); 342 if (error) 343 return (error); 344 if (uap->cmd == F_GETLK) 345 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl)); 346 return (error); 347 } 348 349 int 350 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) 351 { 352 struct filedesc *fdp; 353 struct flock *flp; 354 struct file *fp; 355 struct proc *p; 356 char *pop; 357 struct vnode *vp; 358 u_int newmin; 359 int error, flg, tmp; 360 int giant_locked; 361 362 /* 363 * XXXRW: Some fcntl() calls require Giant -- others don't. Try to 364 * avoid grabbing Giant for calls we know don't need it. 365 */ 366 switch (cmd) { 367 case F_DUPFD: 368 case F_GETFD: 369 case F_SETFD: 370 case F_GETFL: 371 giant_locked = 0; 372 break; 373 374 default: 375 giant_locked = 1; 376 mtx_lock(&Giant); 377 } 378 379 error = 0; 380 flg = F_POSIX; 381 p = td->td_proc; 382 fdp = p->p_fd; 383 FILEDESC_LOCK(fdp); 384 if ((unsigned)fd >= fdp->fd_nfiles || 385 (fp = fdp->fd_ofiles[fd]) == NULL) { 386 FILEDESC_UNLOCK(fdp); 387 error = EBADF; 388 goto done2; 389 } 390 pop = &fdp->fd_ofileflags[fd]; 391 392 switch (cmd) { 393 case F_DUPFD: 394 /* mtx_assert(&Giant, MA_NOTOWNED); */ 395 FILEDESC_UNLOCK(fdp); 396 newmin = arg; 397 PROC_LOCK(p); 398 if (newmin >= lim_cur(p, RLIMIT_NOFILE) || 399 newmin >= maxfilesperproc) { 400 PROC_UNLOCK(p); 401 error = EINVAL; 402 break; 403 } 404 PROC_UNLOCK(p); 405 error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval); 406 break; 407 408 case F_GETFD: 409 /* mtx_assert(&Giant, MA_NOTOWNED); */ 410 td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; 411 FILEDESC_UNLOCK(fdp); 412 break; 413 414 case F_SETFD: 415 /* mtx_assert(&Giant, MA_NOTOWNED); */ 416 *pop = (*pop &~ UF_EXCLOSE) | 417 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); 418 FILEDESC_UNLOCK(fdp); 419 break; 420 421 case F_GETFL: 422 /* mtx_assert(&Giant, MA_NOTOWNED); */ 423 FILE_LOCK(fp); 424 td->td_retval[0] = OFLAGS(fp->f_flag); 425 FILE_UNLOCK(fp); 426 FILEDESC_UNLOCK(fdp); 427 break; 428 429 case F_SETFL: 430 mtx_assert(&Giant, MA_OWNED); 431 FILE_LOCK(fp); 432 fhold_locked(fp); 433 fp->f_flag &= ~FCNTLFLAGS; 434 fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; 435 FILE_UNLOCK(fp); 436 FILEDESC_UNLOCK(fdp); 437 tmp = fp->f_flag & FNONBLOCK; 438 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 439 if (error) { 440 fdrop(fp, td); 441 break; 442 } 443 tmp = fp->f_flag & FASYNC; 444 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); 445 if (error == 0) { 446 fdrop(fp, td); 447 break; 448 } 449 FILE_LOCK(fp); 450 fp->f_flag &= ~FNONBLOCK; 451 FILE_UNLOCK(fp); 452 tmp = 0; 453 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 454 fdrop(fp, td); 455 break; 456 457 case F_GETOWN: 458 mtx_assert(&Giant, MA_OWNED); 459 fhold(fp); 460 FILEDESC_UNLOCK(fdp); 461 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); 462 if (error == 0) 463 td->td_retval[0] = tmp; 464 fdrop(fp, td); 465 break; 466 467 case F_SETOWN: 468 mtx_assert(&Giant, MA_OWNED); 469 fhold(fp); 470 FILEDESC_UNLOCK(fdp); 471 tmp = arg; 472 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); 473 fdrop(fp, td); 474 break; 475 476 case F_SETLKW: 477 mtx_assert(&Giant, MA_OWNED); 478 flg |= F_WAIT; 479 /* FALLTHROUGH F_SETLK */ 480 481 case F_SETLK: 482 mtx_assert(&Giant, MA_OWNED); 483 if (fp->f_type != DTYPE_VNODE) { 484 FILEDESC_UNLOCK(fdp); 485 error = EBADF; 486 break; 487 } 488 489 flp = (struct flock *)arg; 490 if (flp->l_whence == SEEK_CUR) { 491 if (fp->f_offset < 0 || 492 (flp->l_start > 0 && 493 fp->f_offset > OFF_MAX - flp->l_start)) { 494 FILEDESC_UNLOCK(fdp); 495 error = EOVERFLOW; 496 break; 497 } 498 flp->l_start += fp->f_offset; 499 } 500 501 /* 502 * VOP_ADVLOCK() may block. 503 */ 504 fhold(fp); 505 FILEDESC_UNLOCK(fdp); 506 vp = fp->f_vnode; 507 508 switch (flp->l_type) { 509 case F_RDLCK: 510 if ((fp->f_flag & FREAD) == 0) { 511 error = EBADF; 512 break; 513 } 514 PROC_LOCK(p->p_leader); 515 p->p_leader->p_flag |= P_ADVLOCK; 516 PROC_UNLOCK(p->p_leader); 517 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 518 flp, flg); 519 break; 520 case F_WRLCK: 521 if ((fp->f_flag & FWRITE) == 0) { 522 error = EBADF; 523 break; 524 } 525 PROC_LOCK(p->p_leader); 526 p->p_leader->p_flag |= P_ADVLOCK; 527 PROC_UNLOCK(p->p_leader); 528 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 529 flp, flg); 530 break; 531 case F_UNLCK: 532 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, 533 flp, F_POSIX); 534 break; 535 default: 536 error = EINVAL; 537 break; 538 } 539 /* Check for race with close */ 540 FILEDESC_LOCK_FAST(fdp); 541 if ((unsigned) fd >= fdp->fd_nfiles || 542 fp != fdp->fd_ofiles[fd]) { 543 FILEDESC_UNLOCK_FAST(fdp); 544 flp->l_whence = SEEK_SET; 545 flp->l_start = 0; 546 flp->l_len = 0; 547 flp->l_type = F_UNLCK; 548 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 549 F_UNLCK, flp, F_POSIX); 550 } else 551 FILEDESC_UNLOCK_FAST(fdp); 552 fdrop(fp, td); 553 break; 554 555 case F_GETLK: 556 mtx_assert(&Giant, MA_OWNED); 557 if (fp->f_type != DTYPE_VNODE) { 558 FILEDESC_UNLOCK(fdp); 559 error = EBADF; 560 break; 561 } 562 flp = (struct flock *)arg; 563 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && 564 flp->l_type != F_UNLCK) { 565 FILEDESC_UNLOCK(fdp); 566 error = EINVAL; 567 break; 568 } 569 if (flp->l_whence == SEEK_CUR) { 570 if ((flp->l_start > 0 && 571 fp->f_offset > OFF_MAX - flp->l_start) || 572 (flp->l_start < 0 && 573 fp->f_offset < OFF_MIN - flp->l_start)) { 574 FILEDESC_UNLOCK(fdp); 575 error = EOVERFLOW; 576 break; 577 } 578 flp->l_start += fp->f_offset; 579 } 580 /* 581 * VOP_ADVLOCK() may block. 582 */ 583 fhold(fp); 584 FILEDESC_UNLOCK(fdp); 585 vp = fp->f_vnode; 586 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, 587 F_POSIX); 588 fdrop(fp, td); 589 break; 590 default: 591 FILEDESC_UNLOCK(fdp); 592 error = EINVAL; 593 break; 594 } 595 done2: 596 if (giant_locked) 597 mtx_unlock(&Giant); 598 return (error); 599 } 600 601 /* 602 * Common code for dup, dup2, and fcntl(F_DUPFD). 603 */ 604 static int 605 do_dup(struct thread *td, enum dup_type type, int old, int new, register_t *retval) 606 { 607 struct filedesc *fdp; 608 struct proc *p; 609 struct file *fp; 610 struct file *delfp; 611 int error, holdleaders, maxfd; 612 613 KASSERT((type == DUP_VARIABLE || type == DUP_FIXED), 614 ("invalid dup type %d", type)); 615 616 p = td->td_proc; 617 fdp = p->p_fd; 618 619 /* 620 * Verify we have a valid descriptor to dup from and possibly to 621 * dup to. 622 */ 623 if (old < 0 || new < 0) 624 return (EBADF); 625 PROC_LOCK(p); 626 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 627 PROC_UNLOCK(p); 628 if (new >= maxfd) 629 return (EMFILE); 630 631 FILEDESC_LOCK(fdp); 632 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) { 633 FILEDESC_UNLOCK(fdp); 634 return (EBADF); 635 } 636 if (type == DUP_FIXED && old == new) { 637 *retval = new; 638 FILEDESC_UNLOCK(fdp); 639 return (0); 640 } 641 fp = fdp->fd_ofiles[old]; 642 fhold(fp); 643 644 /* 645 * If the caller specified a file descriptor, make sure the file 646 * table is large enough to hold it, and grab it. Otherwise, just 647 * allocate a new descriptor the usual way. Since the filedesc 648 * lock may be temporarily dropped in the process, we have to look 649 * out for a race. 650 */ 651 if (type == DUP_FIXED) { 652 if (new >= fdp->fd_nfiles) 653 fdgrowtable(fdp, new + 1); 654 if (fdp->fd_ofiles[new] == NULL) 655 fdused(fdp, new); 656 } else { 657 if ((error = fdalloc(td, new, &new)) != 0) { 658 FILEDESC_UNLOCK(fdp); 659 fdrop(fp, td); 660 return (error); 661 } 662 } 663 664 /* 665 * If the old file changed out from under us then treat it as a 666 * bad file descriptor. Userland should do its own locking to 667 * avoid this case. 668 */ 669 if (fdp->fd_ofiles[old] != fp) { 670 /* we've allocated a descriptor which we won't use */ 671 if (fdp->fd_ofiles[new] == NULL) 672 fdunused(fdp, new); 673 FILEDESC_UNLOCK(fdp); 674 fdrop(fp, td); 675 return (EBADF); 676 } 677 KASSERT(old != new, 678 ("new fd is same as old")); 679 680 /* 681 * Save info on the descriptor being overwritten. We cannot close 682 * it without introducing an ownership race for the slot, since we 683 * need to drop the filedesc lock to call closef(). 684 * 685 * XXX this duplicates parts of close(). 686 */ 687 delfp = fdp->fd_ofiles[new]; 688 holdleaders = 0; 689 if (delfp != NULL) { 690 if (td->td_proc->p_fdtol != NULL) { 691 /* 692 * Ask fdfree() to sleep to ensure that all relevant 693 * process leaders can be traversed in closef(). 694 */ 695 fdp->fd_holdleaderscount++; 696 holdleaders = 1; 697 } 698 } 699 700 /* 701 * Duplicate the source descriptor 702 */ 703 fdp->fd_ofiles[new] = fp; 704 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; 705 if (new > fdp->fd_lastfile) 706 fdp->fd_lastfile = new; 707 *retval = new; 708 709 /* 710 * If we dup'd over a valid file, we now own the reference to it 711 * and must dispose of it using closef() semantics (as if a 712 * close() were performed on it). 713 * 714 * XXX this duplicates parts of close(). 715 */ 716 if (delfp != NULL) { 717 knote_fdclose(td, new); 718 if (delfp->f_type == DTYPE_MQUEUE) 719 mq_fdclose(td, new, delfp); 720 FILEDESC_UNLOCK(fdp); 721 (void) closef(delfp, td); 722 if (holdleaders) { 723 FILEDESC_LOCK_FAST(fdp); 724 fdp->fd_holdleaderscount--; 725 if (fdp->fd_holdleaderscount == 0 && 726 fdp->fd_holdleaderswakeup != 0) { 727 fdp->fd_holdleaderswakeup = 0; 728 wakeup(&fdp->fd_holdleaderscount); 729 } 730 FILEDESC_UNLOCK_FAST(fdp); 731 } 732 } else { 733 FILEDESC_UNLOCK(fdp); 734 } 735 return (0); 736 } 737 738 /* 739 * If sigio is on the list associated with a process or process group, 740 * disable signalling from the device, remove sigio from the list and 741 * free sigio. 742 */ 743 void 744 funsetown(struct sigio **sigiop) 745 { 746 struct sigio *sigio; 747 748 SIGIO_LOCK(); 749 sigio = *sigiop; 750 if (sigio == NULL) { 751 SIGIO_UNLOCK(); 752 return; 753 } 754 *(sigio->sio_myref) = NULL; 755 if ((sigio)->sio_pgid < 0) { 756 struct pgrp *pg = (sigio)->sio_pgrp; 757 PGRP_LOCK(pg); 758 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, 759 sigio, sio_pgsigio); 760 PGRP_UNLOCK(pg); 761 } else { 762 struct proc *p = (sigio)->sio_proc; 763 PROC_LOCK(p); 764 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, 765 sigio, sio_pgsigio); 766 PROC_UNLOCK(p); 767 } 768 SIGIO_UNLOCK(); 769 crfree(sigio->sio_ucred); 770 FREE(sigio, M_SIGIO); 771 } 772 773 /* 774 * Free a list of sigio structures. 775 * We only need to lock the SIGIO_LOCK because we have made ourselves 776 * inaccessible to callers of fsetown and therefore do not need to lock 777 * the proc or pgrp struct for the list manipulation. 778 */ 779 void 780 funsetownlst(struct sigiolst *sigiolst) 781 { 782 struct proc *p; 783 struct pgrp *pg; 784 struct sigio *sigio; 785 786 sigio = SLIST_FIRST(sigiolst); 787 if (sigio == NULL) 788 return; 789 p = NULL; 790 pg = NULL; 791 792 /* 793 * Every entry of the list should belong 794 * to a single proc or pgrp. 795 */ 796 if (sigio->sio_pgid < 0) { 797 pg = sigio->sio_pgrp; 798 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED); 799 } else /* if (sigio->sio_pgid > 0) */ { 800 p = sigio->sio_proc; 801 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 802 } 803 804 SIGIO_LOCK(); 805 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) { 806 *(sigio->sio_myref) = NULL; 807 if (pg != NULL) { 808 KASSERT(sigio->sio_pgid < 0, 809 ("Proc sigio in pgrp sigio list")); 810 KASSERT(sigio->sio_pgrp == pg, 811 ("Bogus pgrp in sigio list")); 812 PGRP_LOCK(pg); 813 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, 814 sio_pgsigio); 815 PGRP_UNLOCK(pg); 816 } else /* if (p != NULL) */ { 817 KASSERT(sigio->sio_pgid > 0, 818 ("Pgrp sigio in proc sigio list")); 819 KASSERT(sigio->sio_proc == p, 820 ("Bogus proc in sigio list")); 821 PROC_LOCK(p); 822 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, 823 sio_pgsigio); 824 PROC_UNLOCK(p); 825 } 826 SIGIO_UNLOCK(); 827 crfree(sigio->sio_ucred); 828 FREE(sigio, M_SIGIO); 829 SIGIO_LOCK(); 830 } 831 SIGIO_UNLOCK(); 832 } 833 834 /* 835 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). 836 * 837 * After permission checking, add a sigio structure to the sigio list for 838 * the process or process group. 839 */ 840 int 841 fsetown(pid_t pgid, struct sigio **sigiop) 842 { 843 struct proc *proc; 844 struct pgrp *pgrp; 845 struct sigio *sigio; 846 int ret; 847 848 if (pgid == 0) { 849 funsetown(sigiop); 850 return (0); 851 } 852 853 ret = 0; 854 855 /* Allocate and fill in the new sigio out of locks. */ 856 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK); 857 sigio->sio_pgid = pgid; 858 sigio->sio_ucred = crhold(curthread->td_ucred); 859 sigio->sio_myref = sigiop; 860 861 sx_slock(&proctree_lock); 862 if (pgid > 0) { 863 proc = pfind(pgid); 864 if (proc == NULL) { 865 ret = ESRCH; 866 goto fail; 867 } 868 869 /* 870 * Policy - Don't allow a process to FSETOWN a process 871 * in another session. 872 * 873 * Remove this test to allow maximum flexibility or 874 * restrict FSETOWN to the current process or process 875 * group for maximum safety. 876 */ 877 PROC_UNLOCK(proc); 878 if (proc->p_session != curthread->td_proc->p_session) { 879 ret = EPERM; 880 goto fail; 881 } 882 883 pgrp = NULL; 884 } else /* if (pgid < 0) */ { 885 pgrp = pgfind(-pgid); 886 if (pgrp == NULL) { 887 ret = ESRCH; 888 goto fail; 889 } 890 PGRP_UNLOCK(pgrp); 891 892 /* 893 * Policy - Don't allow a process to FSETOWN a process 894 * in another session. 895 * 896 * Remove this test to allow maximum flexibility or 897 * restrict FSETOWN to the current process or process 898 * group for maximum safety. 899 */ 900 if (pgrp->pg_session != curthread->td_proc->p_session) { 901 ret = EPERM; 902 goto fail; 903 } 904 905 proc = NULL; 906 } 907 funsetown(sigiop); 908 if (pgid > 0) { 909 PROC_LOCK(proc); 910 /* 911 * Since funsetownlst() is called without the proctree 912 * locked, we need to check for P_WEXIT. 913 * XXX: is ESRCH correct? 914 */ 915 if ((proc->p_flag & P_WEXIT) != 0) { 916 PROC_UNLOCK(proc); 917 ret = ESRCH; 918 goto fail; 919 } 920 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); 921 sigio->sio_proc = proc; 922 PROC_UNLOCK(proc); 923 } else { 924 PGRP_LOCK(pgrp); 925 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); 926 sigio->sio_pgrp = pgrp; 927 PGRP_UNLOCK(pgrp); 928 } 929 sx_sunlock(&proctree_lock); 930 SIGIO_LOCK(); 931 *sigiop = sigio; 932 SIGIO_UNLOCK(); 933 return (0); 934 935 fail: 936 sx_sunlock(&proctree_lock); 937 crfree(sigio->sio_ucred); 938 FREE(sigio, M_SIGIO); 939 return (ret); 940 } 941 942 /* 943 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). 944 */ 945 pid_t 946 fgetown(sigiop) 947 struct sigio **sigiop; 948 { 949 pid_t pgid; 950 951 SIGIO_LOCK(); 952 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0; 953 SIGIO_UNLOCK(); 954 return (pgid); 955 } 956 957 /* 958 * Close a file descriptor. 959 */ 960 #ifndef _SYS_SYSPROTO_H_ 961 struct close_args { 962 int fd; 963 }; 964 #endif 965 /* 966 * MPSAFE 967 */ 968 /* ARGSUSED */ 969 int 970 close(td, uap) 971 struct thread *td; 972 struct close_args *uap; 973 { 974 struct filedesc *fdp; 975 struct file *fp; 976 int fd, error; 977 int holdleaders; 978 979 fd = uap->fd; 980 error = 0; 981 holdleaders = 0; 982 fdp = td->td_proc->p_fd; 983 FILEDESC_LOCK(fdp); 984 if ((unsigned)fd >= fdp->fd_nfiles || 985 (fp = fdp->fd_ofiles[fd]) == NULL) { 986 FILEDESC_UNLOCK(fdp); 987 return (EBADF); 988 } 989 fdp->fd_ofiles[fd] = NULL; 990 fdp->fd_ofileflags[fd] = 0; 991 fdunused(fdp, fd); 992 if (td->td_proc->p_fdtol != NULL) { 993 /* 994 * Ask fdfree() to sleep to ensure that all relevant 995 * process leaders can be traversed in closef(). 996 */ 997 fdp->fd_holdleaderscount++; 998 holdleaders = 1; 999 } 1000 1001 /* 1002 * We now hold the fp reference that used to be owned by the descriptor 1003 * array. 1004 * We have to unlock the FILEDESC *AFTER* knote_fdclose to prevent a 1005 * race of the fd getting opened, a knote added, and deleteing a knote 1006 * for the new fd. 1007 */ 1008 knote_fdclose(td, fd); 1009 if (fp->f_type == DTYPE_MQUEUE) 1010 mq_fdclose(td, fd, fp); 1011 FILEDESC_UNLOCK(fdp); 1012 1013 error = closef(fp, td); 1014 if (holdleaders) { 1015 FILEDESC_LOCK_FAST(fdp); 1016 fdp->fd_holdleaderscount--; 1017 if (fdp->fd_holdleaderscount == 0 && 1018 fdp->fd_holdleaderswakeup != 0) { 1019 fdp->fd_holdleaderswakeup = 0; 1020 wakeup(&fdp->fd_holdleaderscount); 1021 } 1022 FILEDESC_UNLOCK_FAST(fdp); 1023 } 1024 return (error); 1025 } 1026 1027 #if defined(COMPAT_43) 1028 /* 1029 * Return status information about a file descriptor. 1030 */ 1031 #ifndef _SYS_SYSPROTO_H_ 1032 struct ofstat_args { 1033 int fd; 1034 struct ostat *sb; 1035 }; 1036 #endif 1037 /* 1038 * MPSAFE 1039 */ 1040 /* ARGSUSED */ 1041 int 1042 ofstat(struct thread *td, struct ofstat_args *uap) 1043 { 1044 struct ostat oub; 1045 struct stat ub; 1046 int error; 1047 1048 error = kern_fstat(td, uap->fd, &ub); 1049 if (error == 0) { 1050 cvtstat(&ub, &oub); 1051 error = copyout(&oub, uap->sb, sizeof(oub)); 1052 } 1053 return (error); 1054 } 1055 #endif /* COMPAT_43 */ 1056 1057 /* 1058 * Return status information about a file descriptor. 1059 */ 1060 #ifndef _SYS_SYSPROTO_H_ 1061 struct fstat_args { 1062 int fd; 1063 struct stat *sb; 1064 }; 1065 #endif 1066 /* 1067 * MPSAFE 1068 */ 1069 /* ARGSUSED */ 1070 int 1071 fstat(struct thread *td, struct fstat_args *uap) 1072 { 1073 struct stat ub; 1074 int error; 1075 1076 error = kern_fstat(td, uap->fd, &ub); 1077 if (error == 0) 1078 error = copyout(&ub, uap->sb, sizeof(ub)); 1079 return (error); 1080 } 1081 1082 int 1083 kern_fstat(struct thread *td, int fd, struct stat *sbp) 1084 { 1085 struct file *fp; 1086 int error; 1087 1088 if ((error = fget(td, fd, &fp)) != 0) 1089 return (error); 1090 error = fo_stat(fp, sbp, td->td_ucred, td); 1091 fdrop(fp, td); 1092 return (error); 1093 } 1094 1095 /* 1096 * Return status information about a file descriptor. 1097 */ 1098 #ifndef _SYS_SYSPROTO_H_ 1099 struct nfstat_args { 1100 int fd; 1101 struct nstat *sb; 1102 }; 1103 #endif 1104 /* 1105 * MPSAFE 1106 */ 1107 /* ARGSUSED */ 1108 int 1109 nfstat(struct thread *td, struct nfstat_args *uap) 1110 { 1111 struct nstat nub; 1112 struct stat ub; 1113 int error; 1114 1115 error = kern_fstat(td, uap->fd, &ub); 1116 if (error == 0) { 1117 cvtnstat(&ub, &nub); 1118 error = copyout(&nub, uap->sb, sizeof(nub)); 1119 } 1120 return (error); 1121 } 1122 1123 /* 1124 * Return pathconf information about a file descriptor. 1125 */ 1126 #ifndef _SYS_SYSPROTO_H_ 1127 struct fpathconf_args { 1128 int fd; 1129 int name; 1130 }; 1131 #endif 1132 /* 1133 * MPSAFE 1134 */ 1135 /* ARGSUSED */ 1136 int 1137 fpathconf(struct thread *td, struct fpathconf_args *uap) 1138 { 1139 struct file *fp; 1140 struct vnode *vp; 1141 int error; 1142 1143 if ((error = fget(td, uap->fd, &fp)) != 0) 1144 return (error); 1145 1146 /* If asynchronous I/O is available, it works for all descriptors. */ 1147 if (uap->name == _PC_ASYNC_IO) { 1148 td->td_retval[0] = async_io_version; 1149 goto out; 1150 } 1151 vp = fp->f_vnode; 1152 if (vp != NULL) { 1153 int vfslocked; 1154 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1155 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1156 error = VOP_PATHCONF(vp, uap->name, td->td_retval); 1157 VOP_UNLOCK(vp, 0, td); 1158 VFS_UNLOCK_GIANT(vfslocked); 1159 } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1160 if (uap->name != _PC_PIPE_BUF) { 1161 error = EINVAL; 1162 } else { 1163 td->td_retval[0] = PIPE_BUF; 1164 error = 0; 1165 } 1166 } else { 1167 error = EOPNOTSUPP; 1168 } 1169 out: 1170 fdrop(fp, td); 1171 return (error); 1172 } 1173 1174 /* 1175 * Grow the file table to accomodate (at least) nfd descriptors. This may 1176 * block and drop the filedesc lock, but it will reacquire it before 1177 * returning. 1178 */ 1179 static void 1180 fdgrowtable(struct filedesc *fdp, int nfd) 1181 { 1182 struct file **ntable; 1183 char *nfileflags; 1184 int nnfiles, onfiles; 1185 NDSLOTTYPE *nmap; 1186 1187 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1188 1189 KASSERT(fdp->fd_nfiles > 0, 1190 ("zero-length file table")); 1191 1192 /* compute the size of the new table */ 1193 onfiles = fdp->fd_nfiles; 1194 nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */ 1195 if (nnfiles <= onfiles) 1196 /* the table is already large enough */ 1197 return; 1198 1199 /* allocate a new table and (if required) new bitmaps */ 1200 FILEDESC_UNLOCK(fdp); 1201 MALLOC(ntable, struct file **, nnfiles * OFILESIZE, 1202 M_FILEDESC, M_ZERO | M_WAITOK); 1203 nfileflags = (char *)&ntable[nnfiles]; 1204 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) 1205 MALLOC(nmap, NDSLOTTYPE *, NDSLOTS(nnfiles) * NDSLOTSIZE, 1206 M_FILEDESC, M_ZERO | M_WAITOK); 1207 else 1208 nmap = NULL; 1209 FILEDESC_LOCK(fdp); 1210 1211 /* 1212 * We now have new tables ready to go. Since we dropped the 1213 * filedesc lock to call malloc(), watch out for a race. 1214 */ 1215 onfiles = fdp->fd_nfiles; 1216 if (onfiles >= nnfiles) { 1217 /* we lost the race, but that's OK */ 1218 free(ntable, M_FILEDESC); 1219 if (nmap != NULL) 1220 free(nmap, M_FILEDESC); 1221 return; 1222 } 1223 bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable)); 1224 bcopy(fdp->fd_ofileflags, nfileflags, onfiles); 1225 if (onfiles > NDFILE) 1226 free(fdp->fd_ofiles, M_FILEDESC); 1227 fdp->fd_ofiles = ntable; 1228 fdp->fd_ofileflags = nfileflags; 1229 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) { 1230 bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap)); 1231 if (NDSLOTS(onfiles) > NDSLOTS(NDFILE)) 1232 free(fdp->fd_map, M_FILEDESC); 1233 fdp->fd_map = nmap; 1234 } 1235 fdp->fd_nfiles = nnfiles; 1236 } 1237 1238 /* 1239 * Allocate a file descriptor for the process. 1240 */ 1241 int 1242 fdalloc(struct thread *td, int minfd, int *result) 1243 { 1244 struct proc *p = td->td_proc; 1245 struct filedesc *fdp = p->p_fd; 1246 int fd = -1, maxfd; 1247 1248 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1249 1250 if (fdp->fd_freefile > minfd) 1251 minfd = fdp->fd_freefile; 1252 1253 PROC_LOCK(p); 1254 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1255 PROC_UNLOCK(p); 1256 1257 /* 1258 * Search the bitmap for a free descriptor. If none is found, try 1259 * to grow the file table. Keep at it until we either get a file 1260 * descriptor or run into process or system limits; fdgrowtable() 1261 * may drop the filedesc lock, so we're in a race. 1262 */ 1263 for (;;) { 1264 fd = fd_first_free(fdp, minfd, fdp->fd_nfiles); 1265 if (fd >= maxfd) 1266 return (EMFILE); 1267 if (fd < fdp->fd_nfiles) 1268 break; 1269 fdgrowtable(fdp, min(fdp->fd_nfiles * 2, maxfd)); 1270 } 1271 1272 /* 1273 * Perform some sanity checks, then mark the file descriptor as 1274 * used and return it to the caller. 1275 */ 1276 KASSERT(!fdisused(fdp, fd), 1277 ("fd_first_free() returned non-free descriptor")); 1278 KASSERT(fdp->fd_ofiles[fd] == NULL, 1279 ("free descriptor isn't")); 1280 fdp->fd_ofileflags[fd] = 0; /* XXX needed? */ 1281 fdused(fdp, fd); 1282 *result = fd; 1283 return (0); 1284 } 1285 1286 /* 1287 * Check to see whether n user file descriptors 1288 * are available to the process p. 1289 */ 1290 int 1291 fdavail(struct thread *td, int n) 1292 { 1293 struct proc *p = td->td_proc; 1294 struct filedesc *fdp = td->td_proc->p_fd; 1295 struct file **fpp; 1296 int i, lim, last; 1297 1298 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1299 1300 PROC_LOCK(p); 1301 lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1302 PROC_UNLOCK(p); 1303 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) 1304 return (1); 1305 last = min(fdp->fd_nfiles, lim); 1306 fpp = &fdp->fd_ofiles[fdp->fd_freefile]; 1307 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) { 1308 if (*fpp == NULL && --n <= 0) 1309 return (1); 1310 } 1311 return (0); 1312 } 1313 1314 /* 1315 * Create a new open file structure and allocate 1316 * a file decriptor for the process that refers to it. 1317 * We add one reference to the file for the descriptor table 1318 * and one reference for resultfp. This is to prevent us being 1319 * preempted and the entry in the descriptor table closed after 1320 * we release the FILEDESC lock. 1321 */ 1322 int 1323 falloc(struct thread *td, struct file **resultfp, int *resultfd) 1324 { 1325 struct proc *p = td->td_proc; 1326 struct file *fp, *fq; 1327 int error, i; 1328 int maxuserfiles = maxfiles - (maxfiles / 20); 1329 static struct timeval lastfail; 1330 static int curfail; 1331 1332 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO); 1333 sx_xlock(&filelist_lock); 1334 if ((openfiles >= maxuserfiles && (td->td_ucred->cr_ruid != 0 || 1335 jailed(td->td_ucred))) || openfiles >= maxfiles) { 1336 if (ppsratecheck(&lastfail, &curfail, 1)) { 1337 printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n", 1338 td->td_ucred->cr_ruid); 1339 } 1340 sx_xunlock(&filelist_lock); 1341 uma_zfree(file_zone, fp); 1342 return (ENFILE); 1343 } 1344 openfiles++; 1345 1346 /* 1347 * If the process has file descriptor zero open, add the new file 1348 * descriptor to the list of open files at that point, otherwise 1349 * put it at the front of the list of open files. 1350 */ 1351 fp->f_mtxp = mtx_pool_alloc(mtxpool_sleep); 1352 fp->f_count = 1; 1353 if (resultfp) 1354 fp->f_count++; 1355 fp->f_cred = crhold(td->td_ucred); 1356 fp->f_ops = &badfileops; 1357 fp->f_data = NULL; 1358 fp->f_vnode = NULL; 1359 FILEDESC_LOCK(p->p_fd); 1360 if ((fq = p->p_fd->fd_ofiles[0])) { 1361 LIST_INSERT_AFTER(fq, fp, f_list); 1362 } else { 1363 LIST_INSERT_HEAD(&filehead, fp, f_list); 1364 } 1365 sx_xunlock(&filelist_lock); 1366 if ((error = fdalloc(td, 0, &i))) { 1367 FILEDESC_UNLOCK(p->p_fd); 1368 fdrop(fp, td); 1369 if (resultfp) 1370 fdrop(fp, td); 1371 return (error); 1372 } 1373 p->p_fd->fd_ofiles[i] = fp; 1374 FILEDESC_UNLOCK(p->p_fd); 1375 if (resultfp) 1376 *resultfp = fp; 1377 if (resultfd) 1378 *resultfd = i; 1379 return (0); 1380 } 1381 1382 /* 1383 * Build a new filedesc structure from another. 1384 * Copy the current, root, and jail root vnode references. 1385 */ 1386 struct filedesc * 1387 fdinit(struct filedesc *fdp) 1388 { 1389 struct filedesc0 *newfdp; 1390 1391 newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO); 1392 mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF); 1393 if (fdp != NULL) { 1394 FILEDESC_LOCK(fdp); 1395 newfdp->fd_fd.fd_cdir = fdp->fd_cdir; 1396 if (newfdp->fd_fd.fd_cdir) 1397 VREF(newfdp->fd_fd.fd_cdir); 1398 newfdp->fd_fd.fd_rdir = fdp->fd_rdir; 1399 if (newfdp->fd_fd.fd_rdir) 1400 VREF(newfdp->fd_fd.fd_rdir); 1401 newfdp->fd_fd.fd_jdir = fdp->fd_jdir; 1402 if (newfdp->fd_fd.fd_jdir) 1403 VREF(newfdp->fd_fd.fd_jdir); 1404 FILEDESC_UNLOCK(fdp); 1405 } 1406 1407 /* Create the file descriptor table. */ 1408 newfdp->fd_fd.fd_refcnt = 1; 1409 newfdp->fd_fd.fd_holdcnt = 1; 1410 newfdp->fd_fd.fd_cmask = CMASK; 1411 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1412 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 1413 newfdp->fd_fd.fd_nfiles = NDFILE; 1414 newfdp->fd_fd.fd_map = newfdp->fd_dmap; 1415 return (&newfdp->fd_fd); 1416 } 1417 1418 static struct filedesc * 1419 fdhold(struct proc *p) 1420 { 1421 struct filedesc *fdp; 1422 1423 mtx_lock(&fdesc_mtx); 1424 fdp = p->p_fd; 1425 if (fdp != NULL) 1426 fdp->fd_holdcnt++; 1427 mtx_unlock(&fdesc_mtx); 1428 return (fdp); 1429 } 1430 1431 static void 1432 fddrop(struct filedesc *fdp) 1433 { 1434 int i; 1435 1436 mtx_lock(&fdesc_mtx); 1437 i = --fdp->fd_holdcnt; 1438 mtx_unlock(&fdesc_mtx); 1439 if (i > 0) 1440 return; 1441 1442 mtx_destroy(&fdp->fd_mtx); 1443 FREE(fdp, M_FILEDESC); 1444 } 1445 1446 /* 1447 * Share a filedesc structure. 1448 */ 1449 struct filedesc * 1450 fdshare(struct filedesc *fdp) 1451 { 1452 FILEDESC_LOCK_FAST(fdp); 1453 fdp->fd_refcnt++; 1454 FILEDESC_UNLOCK_FAST(fdp); 1455 return (fdp); 1456 } 1457 1458 /* 1459 * Unshare a filedesc structure, if necessary by making a copy 1460 */ 1461 void 1462 fdunshare(struct proc *p, struct thread *td) 1463 { 1464 1465 FILEDESC_LOCK_FAST(p->p_fd); 1466 if (p->p_fd->fd_refcnt > 1) { 1467 struct filedesc *tmp; 1468 1469 FILEDESC_UNLOCK_FAST(p->p_fd); 1470 tmp = fdcopy(p->p_fd); 1471 fdfree(td); 1472 p->p_fd = tmp; 1473 } else 1474 FILEDESC_UNLOCK_FAST(p->p_fd); 1475 } 1476 1477 /* 1478 * Copy a filedesc structure. 1479 * A NULL pointer in returns a NULL reference, this is to ease callers, 1480 * not catch errors. 1481 */ 1482 struct filedesc * 1483 fdcopy(struct filedesc *fdp) 1484 { 1485 struct filedesc *newfdp; 1486 int i; 1487 1488 /* Certain daemons might not have file descriptors. */ 1489 if (fdp == NULL) 1490 return (NULL); 1491 1492 newfdp = fdinit(fdp); 1493 FILEDESC_LOCK_FAST(fdp); 1494 while (fdp->fd_lastfile >= newfdp->fd_nfiles) { 1495 FILEDESC_UNLOCK_FAST(fdp); 1496 FILEDESC_LOCK(newfdp); 1497 fdgrowtable(newfdp, fdp->fd_lastfile + 1); 1498 FILEDESC_UNLOCK(newfdp); 1499 FILEDESC_LOCK_FAST(fdp); 1500 } 1501 /* copy everything except kqueue descriptors */ 1502 newfdp->fd_freefile = -1; 1503 for (i = 0; i <= fdp->fd_lastfile; ++i) { 1504 if (fdisused(fdp, i) && 1505 fdp->fd_ofiles[i]->f_type != DTYPE_KQUEUE) { 1506 newfdp->fd_ofiles[i] = fdp->fd_ofiles[i]; 1507 newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i]; 1508 fhold(newfdp->fd_ofiles[i]); 1509 newfdp->fd_lastfile = i; 1510 } else { 1511 if (newfdp->fd_freefile == -1) 1512 newfdp->fd_freefile = i; 1513 } 1514 } 1515 FILEDESC_UNLOCK_FAST(fdp); 1516 FILEDESC_LOCK(newfdp); 1517 for (i = 0; i <= newfdp->fd_lastfile; ++i) 1518 if (newfdp->fd_ofiles[i] != NULL) 1519 fdused(newfdp, i); 1520 FILEDESC_UNLOCK(newfdp); 1521 FILEDESC_LOCK_FAST(fdp); 1522 if (newfdp->fd_freefile == -1) 1523 newfdp->fd_freefile = i; 1524 newfdp->fd_cmask = fdp->fd_cmask; 1525 FILEDESC_UNLOCK_FAST(fdp); 1526 return (newfdp); 1527 } 1528 1529 /* 1530 * Release a filedesc structure. 1531 */ 1532 void 1533 fdfree(struct thread *td) 1534 { 1535 struct filedesc *fdp; 1536 struct file **fpp; 1537 int i, locked; 1538 struct filedesc_to_leader *fdtol; 1539 struct file *fp; 1540 struct vnode *cdir, *jdir, *rdir, *vp; 1541 struct flock lf; 1542 1543 /* Certain daemons might not have file descriptors. */ 1544 fdp = td->td_proc->p_fd; 1545 if (fdp == NULL) 1546 return; 1547 1548 /* Check for special need to clear POSIX style locks */ 1549 fdtol = td->td_proc->p_fdtol; 1550 if (fdtol != NULL) { 1551 FILEDESC_LOCK(fdp); 1552 KASSERT(fdtol->fdl_refcount > 0, 1553 ("filedesc_to_refcount botch: fdl_refcount=%d", 1554 fdtol->fdl_refcount)); 1555 if (fdtol->fdl_refcount == 1 && 1556 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1557 for (i = 0, fpp = fdp->fd_ofiles; 1558 i <= fdp->fd_lastfile; 1559 i++, fpp++) { 1560 if (*fpp == NULL || 1561 (*fpp)->f_type != DTYPE_VNODE) 1562 continue; 1563 fp = *fpp; 1564 fhold(fp); 1565 FILEDESC_UNLOCK(fdp); 1566 lf.l_whence = SEEK_SET; 1567 lf.l_start = 0; 1568 lf.l_len = 0; 1569 lf.l_type = F_UNLCK; 1570 vp = fp->f_vnode; 1571 locked = VFS_LOCK_GIANT(vp->v_mount); 1572 (void) VOP_ADVLOCK(vp, 1573 (caddr_t)td->td_proc-> 1574 p_leader, 1575 F_UNLCK, 1576 &lf, 1577 F_POSIX); 1578 VFS_UNLOCK_GIANT(locked); 1579 FILEDESC_LOCK(fdp); 1580 fdrop(fp, td); 1581 fpp = fdp->fd_ofiles + i; 1582 } 1583 } 1584 retry: 1585 if (fdtol->fdl_refcount == 1) { 1586 if (fdp->fd_holdleaderscount > 0 && 1587 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1588 /* 1589 * close() or do_dup() has cleared a reference 1590 * in a shared file descriptor table. 1591 */ 1592 fdp->fd_holdleaderswakeup = 1; 1593 msleep(&fdp->fd_holdleaderscount, &fdp->fd_mtx, 1594 PLOCK, "fdlhold", 0); 1595 goto retry; 1596 } 1597 if (fdtol->fdl_holdcount > 0) { 1598 /* 1599 * Ensure that fdtol->fdl_leader 1600 * remains valid in closef(). 1601 */ 1602 fdtol->fdl_wakeup = 1; 1603 msleep(fdtol, &fdp->fd_mtx, 1604 PLOCK, "fdlhold", 0); 1605 goto retry; 1606 } 1607 } 1608 fdtol->fdl_refcount--; 1609 if (fdtol->fdl_refcount == 0 && 1610 fdtol->fdl_holdcount == 0) { 1611 fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; 1612 fdtol->fdl_prev->fdl_next = fdtol->fdl_next; 1613 } else 1614 fdtol = NULL; 1615 td->td_proc->p_fdtol = NULL; 1616 FILEDESC_UNLOCK(fdp); 1617 if (fdtol != NULL) 1618 FREE(fdtol, M_FILEDESC_TO_LEADER); 1619 } 1620 FILEDESC_LOCK_FAST(fdp); 1621 i = --fdp->fd_refcnt; 1622 FILEDESC_UNLOCK_FAST(fdp); 1623 if (i > 0) 1624 return; 1625 /* 1626 * We are the last reference to the structure, so we can 1627 * safely assume it will not change out from under us. 1628 */ 1629 fpp = fdp->fd_ofiles; 1630 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) { 1631 if (*fpp) 1632 (void) closef(*fpp, td); 1633 } 1634 FILEDESC_LOCK(fdp); 1635 1636 /* XXX This should happen earlier. */ 1637 mtx_lock(&fdesc_mtx); 1638 td->td_proc->p_fd = NULL; 1639 mtx_unlock(&fdesc_mtx); 1640 1641 if (fdp->fd_nfiles > NDFILE) 1642 FREE(fdp->fd_ofiles, M_FILEDESC); 1643 if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE)) 1644 FREE(fdp->fd_map, M_FILEDESC); 1645 1646 fdp->fd_nfiles = 0; 1647 1648 cdir = fdp->fd_cdir; 1649 fdp->fd_cdir = NULL; 1650 rdir = fdp->fd_rdir; 1651 fdp->fd_rdir = NULL; 1652 jdir = fdp->fd_jdir; 1653 fdp->fd_jdir = NULL; 1654 FILEDESC_UNLOCK(fdp); 1655 1656 if (cdir) { 1657 locked = VFS_LOCK_GIANT(cdir->v_mount); 1658 vrele(cdir); 1659 VFS_UNLOCK_GIANT(locked); 1660 } 1661 if (rdir) { 1662 locked = VFS_LOCK_GIANT(rdir->v_mount); 1663 vrele(rdir); 1664 VFS_UNLOCK_GIANT(locked); 1665 } 1666 if (jdir) { 1667 locked = VFS_LOCK_GIANT(jdir->v_mount); 1668 vrele(jdir); 1669 VFS_UNLOCK_GIANT(locked); 1670 } 1671 1672 fddrop(fdp); 1673 } 1674 1675 /* 1676 * For setugid programs, we don't want to people to use that setugidness 1677 * to generate error messages which write to a file which otherwise would 1678 * otherwise be off-limits to the process. We check for filesystems where 1679 * the vnode can change out from under us after execve (like [lin]procfs). 1680 * 1681 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is 1682 * sufficient. We also don't check for setugidness since we know we are. 1683 */ 1684 static int 1685 is_unsafe(struct file *fp) 1686 { 1687 if (fp->f_type == DTYPE_VNODE) { 1688 struct vnode *vp = fp->f_vnode; 1689 1690 if ((vp->v_vflag & VV_PROCDEP) != 0) 1691 return (1); 1692 } 1693 return (0); 1694 } 1695 1696 /* 1697 * Make this setguid thing safe, if at all possible. 1698 */ 1699 void 1700 setugidsafety(struct thread *td) 1701 { 1702 struct filedesc *fdp; 1703 int i; 1704 1705 /* Certain daemons might not have file descriptors. */ 1706 fdp = td->td_proc->p_fd; 1707 if (fdp == NULL) 1708 return; 1709 1710 /* 1711 * Note: fdp->fd_ofiles may be reallocated out from under us while 1712 * we are blocked in a close. Be careful! 1713 */ 1714 FILEDESC_LOCK(fdp); 1715 for (i = 0; i <= fdp->fd_lastfile; i++) { 1716 if (i > 2) 1717 break; 1718 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { 1719 struct file *fp; 1720 1721 knote_fdclose(td, i); 1722 /* 1723 * NULL-out descriptor prior to close to avoid 1724 * a race while close blocks. 1725 */ 1726 fp = fdp->fd_ofiles[i]; 1727 fdp->fd_ofiles[i] = NULL; 1728 fdp->fd_ofileflags[i] = 0; 1729 fdunused(fdp, i); 1730 FILEDESC_UNLOCK(fdp); 1731 (void) closef(fp, td); 1732 FILEDESC_LOCK(fdp); 1733 } 1734 } 1735 FILEDESC_UNLOCK(fdp); 1736 } 1737 1738 void 1739 fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td) 1740 { 1741 1742 FILEDESC_LOCK(fdp); 1743 if (fdp->fd_ofiles[idx] == fp) { 1744 fdp->fd_ofiles[idx] = NULL; 1745 fdunused(fdp, idx); 1746 FILEDESC_UNLOCK(fdp); 1747 fdrop(fp, td); 1748 } else { 1749 FILEDESC_UNLOCK(fdp); 1750 } 1751 } 1752 1753 /* 1754 * Close any files on exec? 1755 */ 1756 void 1757 fdcloseexec(struct thread *td) 1758 { 1759 struct filedesc *fdp; 1760 int i; 1761 1762 /* Certain daemons might not have file descriptors. */ 1763 fdp = td->td_proc->p_fd; 1764 if (fdp == NULL) 1765 return; 1766 1767 FILEDESC_LOCK(fdp); 1768 1769 /* 1770 * We cannot cache fd_ofiles or fd_ofileflags since operations 1771 * may block and rip them out from under us. 1772 */ 1773 for (i = 0; i <= fdp->fd_lastfile; i++) { 1774 if (fdp->fd_ofiles[i] != NULL && 1775 (fdp->fd_ofiles[i]->f_type == DTYPE_MQUEUE || 1776 (fdp->fd_ofileflags[i] & UF_EXCLOSE))) { 1777 struct file *fp; 1778 1779 knote_fdclose(td, i); 1780 /* 1781 * NULL-out descriptor prior to close to avoid 1782 * a race while close blocks. 1783 */ 1784 fp = fdp->fd_ofiles[i]; 1785 fdp->fd_ofiles[i] = NULL; 1786 fdp->fd_ofileflags[i] = 0; 1787 fdunused(fdp, i); 1788 if (fp->f_type == DTYPE_MQUEUE) 1789 mq_fdclose(td, i, fp); 1790 FILEDESC_UNLOCK(fdp); 1791 (void) closef(fp, td); 1792 FILEDESC_LOCK(fdp); 1793 } 1794 } 1795 FILEDESC_UNLOCK(fdp); 1796 } 1797 1798 /* 1799 * It is unsafe for set[ug]id processes to be started with file 1800 * descriptors 0..2 closed, as these descriptors are given implicit 1801 * significance in the Standard C library. fdcheckstd() will create a 1802 * descriptor referencing /dev/null for each of stdin, stdout, and 1803 * stderr that is not already open. 1804 */ 1805 int 1806 fdcheckstd(struct thread *td) 1807 { 1808 struct nameidata nd; 1809 struct filedesc *fdp; 1810 struct file *fp; 1811 register_t retval; 1812 int fd, i, error, flags, devnull; 1813 1814 fdp = td->td_proc->p_fd; 1815 if (fdp == NULL) 1816 return (0); 1817 KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); 1818 devnull = -1; 1819 error = 0; 1820 for (i = 0; i < 3; i++) { 1821 if (fdp->fd_ofiles[i] != NULL) 1822 continue; 1823 if (devnull < 0) { 1824 int vfslocked; 1825 error = falloc(td, &fp, &fd); 1826 if (error != 0) 1827 break; 1828 /* Note extra ref on `fp' held for us by falloc(). */ 1829 KASSERT(fd == i, ("oof, we didn't get our fd")); 1830 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, 1831 "/dev/null", td); 1832 flags = FREAD | FWRITE; 1833 error = vn_open(&nd, &flags, 0, fd); 1834 if (error != 0) { 1835 /* 1836 * Someone may have closed the entry in the 1837 * file descriptor table, so check it hasn't 1838 * changed before dropping the reference count. 1839 */ 1840 FILEDESC_LOCK(fdp); 1841 KASSERT(fdp->fd_ofiles[fd] == fp, 1842 ("table not shared, how did it change?")); 1843 fdp->fd_ofiles[fd] = NULL; 1844 fdunused(fdp, fd); 1845 FILEDESC_UNLOCK(fdp); 1846 fdrop(fp, td); 1847 fdrop(fp, td); 1848 break; 1849 } 1850 vfslocked = NDHASGIANT(&nd); 1851 NDFREE(&nd, NDF_ONLY_PNBUF); 1852 fp->f_flag = flags; 1853 fp->f_vnode = nd.ni_vp; 1854 if (fp->f_data == NULL) 1855 fp->f_data = nd.ni_vp; 1856 if (fp->f_ops == &badfileops) 1857 fp->f_ops = &vnops; 1858 fp->f_type = DTYPE_VNODE; 1859 VOP_UNLOCK(nd.ni_vp, 0, td); 1860 VFS_UNLOCK_GIANT(vfslocked); 1861 devnull = fd; 1862 fdrop(fp, td); 1863 } else { 1864 error = do_dup(td, DUP_FIXED, devnull, i, &retval); 1865 if (error != 0) 1866 break; 1867 } 1868 } 1869 return (error); 1870 } 1871 1872 /* 1873 * Internal form of close. 1874 * Decrement reference count on file structure. 1875 * Note: td may be NULL when closing a file that was being passed in a 1876 * message. 1877 * 1878 * XXXRW: Giant is not required for the caller, but often will be held; this 1879 * makes it moderately likely the Giant will be recursed in the VFS case. 1880 */ 1881 int 1882 closef(struct file *fp, struct thread *td) 1883 { 1884 struct vnode *vp; 1885 struct flock lf; 1886 struct filedesc_to_leader *fdtol; 1887 struct filedesc *fdp; 1888 1889 /* 1890 * POSIX record locking dictates that any close releases ALL 1891 * locks owned by this process. This is handled by setting 1892 * a flag in the unlock to free ONLY locks obeying POSIX 1893 * semantics, and not to free BSD-style file locks. 1894 * If the descriptor was in a message, POSIX-style locks 1895 * aren't passed with the descriptor, and the thread pointer 1896 * will be NULL. Callers should be careful only to pass a 1897 * NULL thread pointer when there really is no owning 1898 * context that might have locks, or the locks will be 1899 * leaked. 1900 */ 1901 if (fp->f_type == DTYPE_VNODE && td != NULL) { 1902 int vfslocked; 1903 1904 vp = fp->f_vnode; 1905 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1906 if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1907 lf.l_whence = SEEK_SET; 1908 lf.l_start = 0; 1909 lf.l_len = 0; 1910 lf.l_type = F_UNLCK; 1911 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, 1912 F_UNLCK, &lf, F_POSIX); 1913 } 1914 fdtol = td->td_proc->p_fdtol; 1915 if (fdtol != NULL) { 1916 /* 1917 * Handle special case where file descriptor table 1918 * is shared between multiple process leaders. 1919 */ 1920 fdp = td->td_proc->p_fd; 1921 FILEDESC_LOCK(fdp); 1922 for (fdtol = fdtol->fdl_next; 1923 fdtol != td->td_proc->p_fdtol; 1924 fdtol = fdtol->fdl_next) { 1925 if ((fdtol->fdl_leader->p_flag & 1926 P_ADVLOCK) == 0) 1927 continue; 1928 fdtol->fdl_holdcount++; 1929 FILEDESC_UNLOCK(fdp); 1930 lf.l_whence = SEEK_SET; 1931 lf.l_start = 0; 1932 lf.l_len = 0; 1933 lf.l_type = F_UNLCK; 1934 vp = fp->f_vnode; 1935 (void) VOP_ADVLOCK(vp, 1936 (caddr_t)fdtol->fdl_leader, 1937 F_UNLCK, &lf, F_POSIX); 1938 FILEDESC_LOCK(fdp); 1939 fdtol->fdl_holdcount--; 1940 if (fdtol->fdl_holdcount == 0 && 1941 fdtol->fdl_wakeup != 0) { 1942 fdtol->fdl_wakeup = 0; 1943 wakeup(fdtol); 1944 } 1945 } 1946 FILEDESC_UNLOCK(fdp); 1947 } 1948 VFS_UNLOCK_GIANT(vfslocked); 1949 } 1950 return (fdrop(fp, td)); 1951 } 1952 1953 /* 1954 * Extract the file pointer associated with the specified descriptor for 1955 * the current user process. 1956 * 1957 * If the descriptor doesn't exist, EBADF is returned. 1958 * 1959 * If the descriptor exists but doesn't match 'flags' then 1960 * return EBADF for read attempts and EINVAL for write attempts. 1961 * 1962 * If 'hold' is set (non-zero) the file's refcount will be bumped on return. 1963 * It should be dropped with fdrop(). 1964 * If it is not set, then the refcount will not be bumped however the 1965 * thread's filedesc struct will be returned locked (for fgetsock). 1966 * 1967 * If an error occured the non-zero error is returned and *fpp is set to NULL. 1968 * Otherwise *fpp is set and zero is returned. 1969 */ 1970 static __inline int 1971 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold) 1972 { 1973 struct filedesc *fdp; 1974 struct file *fp; 1975 1976 *fpp = NULL; 1977 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) 1978 return (EBADF); 1979 FILEDESC_LOCK(fdp); 1980 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) { 1981 FILEDESC_UNLOCK(fdp); 1982 return (EBADF); 1983 } 1984 1985 /* 1986 * Note: FREAD failure returns EBADF to maintain backwards 1987 * compatibility with what routines returned before. 1988 * 1989 * Only one flag, or 0, may be specified. 1990 */ 1991 if (flags == FREAD && (fp->f_flag & FREAD) == 0) { 1992 FILEDESC_UNLOCK(fdp); 1993 return (EBADF); 1994 } 1995 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) { 1996 FILEDESC_UNLOCK(fdp); 1997 return (EINVAL); 1998 } 1999 if (hold) { 2000 fhold(fp); 2001 FILEDESC_UNLOCK(fdp); 2002 } 2003 *fpp = fp; 2004 return (0); 2005 } 2006 2007 int 2008 fget(struct thread *td, int fd, struct file **fpp) 2009 { 2010 2011 return(_fget(td, fd, fpp, 0, 1)); 2012 } 2013 2014 int 2015 fget_read(struct thread *td, int fd, struct file **fpp) 2016 { 2017 2018 return(_fget(td, fd, fpp, FREAD, 1)); 2019 } 2020 2021 int 2022 fget_write(struct thread *td, int fd, struct file **fpp) 2023 { 2024 2025 return(_fget(td, fd, fpp, FWRITE, 1)); 2026 } 2027 2028 /* 2029 * Like fget() but loads the underlying vnode, or returns an error if 2030 * the descriptor does not represent a vnode. Note that pipes use vnodes 2031 * but never have VM objects. The returned vnode will be vref()d. 2032 * 2033 * XXX: what about the unused flags ? 2034 */ 2035 static __inline int 2036 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags) 2037 { 2038 struct file *fp; 2039 int error; 2040 2041 *vpp = NULL; 2042 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2043 return (error); 2044 if (fp->f_vnode == NULL) { 2045 error = EINVAL; 2046 } else { 2047 *vpp = fp->f_vnode; 2048 vref(*vpp); 2049 } 2050 FILEDESC_UNLOCK(td->td_proc->p_fd); 2051 return (error); 2052 } 2053 2054 int 2055 fgetvp(struct thread *td, int fd, struct vnode **vpp) 2056 { 2057 2058 return (_fgetvp(td, fd, vpp, 0)); 2059 } 2060 2061 int 2062 fgetvp_read(struct thread *td, int fd, struct vnode **vpp) 2063 { 2064 2065 return (_fgetvp(td, fd, vpp, FREAD)); 2066 } 2067 2068 #ifdef notyet 2069 int 2070 fgetvp_write(struct thread *td, int fd, struct vnode **vpp) 2071 { 2072 2073 return (_fgetvp(td, fd, vpp, FWRITE)); 2074 } 2075 #endif 2076 2077 /* 2078 * Like fget() but loads the underlying socket, or returns an error if 2079 * the descriptor does not represent a socket. 2080 * 2081 * We bump the ref count on the returned socket. XXX Also obtain the SX 2082 * lock in the future. 2083 */ 2084 int 2085 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp) 2086 { 2087 struct file *fp; 2088 int error; 2089 2090 NET_ASSERT_GIANT(); 2091 2092 *spp = NULL; 2093 if (fflagp != NULL) 2094 *fflagp = 0; 2095 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2096 return (error); 2097 if (fp->f_type != DTYPE_SOCKET) { 2098 error = ENOTSOCK; 2099 } else { 2100 *spp = fp->f_data; 2101 if (fflagp) 2102 *fflagp = fp->f_flag; 2103 SOCK_LOCK(*spp); 2104 soref(*spp); 2105 SOCK_UNLOCK(*spp); 2106 } 2107 FILEDESC_UNLOCK(td->td_proc->p_fd); 2108 return (error); 2109 } 2110 2111 /* 2112 * Drop the reference count on the socket and XXX release the SX lock in 2113 * the future. The last reference closes the socket. 2114 */ 2115 void 2116 fputsock(struct socket *so) 2117 { 2118 2119 NET_ASSERT_GIANT(); 2120 ACCEPT_LOCK(); 2121 SOCK_LOCK(so); 2122 sorele(so); 2123 } 2124 2125 int 2126 fdrop(struct file *fp, struct thread *td) 2127 { 2128 2129 FILE_LOCK(fp); 2130 return (fdrop_locked(fp, td)); 2131 } 2132 2133 /* 2134 * Drop reference on struct file passed in, may call closef if the 2135 * reference hits zero. 2136 * Expects struct file locked, and will unlock it. 2137 */ 2138 static int 2139 fdrop_locked(struct file *fp, struct thread *td) 2140 { 2141 int error; 2142 2143 FILE_LOCK_ASSERT(fp, MA_OWNED); 2144 2145 if (--fp->f_count > 0) { 2146 FILE_UNLOCK(fp); 2147 return (0); 2148 } 2149 /* We have the last ref so we can proceed without the file lock. */ 2150 FILE_UNLOCK(fp); 2151 if (fp->f_count < 0) 2152 panic("fdrop: count < 0"); 2153 if (fp->f_ops != &badfileops) 2154 error = fo_close(fp, td); 2155 else 2156 error = 0; 2157 2158 sx_xlock(&filelist_lock); 2159 LIST_REMOVE(fp, f_list); 2160 openfiles--; 2161 sx_xunlock(&filelist_lock); 2162 crfree(fp->f_cred); 2163 uma_zfree(file_zone, fp); 2164 2165 return (error); 2166 } 2167 2168 /* 2169 * Apply an advisory lock on a file descriptor. 2170 * 2171 * Just attempt to get a record lock of the requested type on 2172 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 2173 */ 2174 #ifndef _SYS_SYSPROTO_H_ 2175 struct flock_args { 2176 int fd; 2177 int how; 2178 }; 2179 #endif 2180 /* 2181 * MPSAFE 2182 */ 2183 /* ARGSUSED */ 2184 int 2185 flock(struct thread *td, struct flock_args *uap) 2186 { 2187 struct file *fp; 2188 struct vnode *vp; 2189 struct flock lf; 2190 int error; 2191 2192 if ((error = fget(td, uap->fd, &fp)) != 0) 2193 return (error); 2194 if (fp->f_type != DTYPE_VNODE) { 2195 fdrop(fp, td); 2196 return (EOPNOTSUPP); 2197 } 2198 2199 mtx_lock(&Giant); 2200 vp = fp->f_vnode; 2201 lf.l_whence = SEEK_SET; 2202 lf.l_start = 0; 2203 lf.l_len = 0; 2204 if (uap->how & LOCK_UN) { 2205 lf.l_type = F_UNLCK; 2206 FILE_LOCK(fp); 2207 fp->f_flag &= ~FHASLOCK; 2208 FILE_UNLOCK(fp); 2209 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 2210 goto done2; 2211 } 2212 if (uap->how & LOCK_EX) 2213 lf.l_type = F_WRLCK; 2214 else if (uap->how & LOCK_SH) 2215 lf.l_type = F_RDLCK; 2216 else { 2217 error = EBADF; 2218 goto done2; 2219 } 2220 FILE_LOCK(fp); 2221 fp->f_flag |= FHASLOCK; 2222 FILE_UNLOCK(fp); 2223 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 2224 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); 2225 done2: 2226 fdrop(fp, td); 2227 mtx_unlock(&Giant); 2228 return (error); 2229 } 2230 /* 2231 * Duplicate the specified descriptor to a free descriptor. 2232 */ 2233 int 2234 dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error) 2235 { 2236 struct file *wfp; 2237 struct file *fp; 2238 2239 /* 2240 * If the to-be-dup'd fd number is greater than the allowed number 2241 * of file descriptors, or the fd to be dup'd has already been 2242 * closed, then reject. 2243 */ 2244 FILEDESC_LOCK(fdp); 2245 if (dfd < 0 || dfd >= fdp->fd_nfiles || 2246 (wfp = fdp->fd_ofiles[dfd]) == NULL) { 2247 FILEDESC_UNLOCK(fdp); 2248 return (EBADF); 2249 } 2250 2251 /* 2252 * There are two cases of interest here. 2253 * 2254 * For ENODEV simply dup (dfd) to file descriptor 2255 * (indx) and return. 2256 * 2257 * For ENXIO steal away the file structure from (dfd) and 2258 * store it in (indx). (dfd) is effectively closed by 2259 * this operation. 2260 * 2261 * Any other error code is just returned. 2262 */ 2263 switch (error) { 2264 case ENODEV: 2265 /* 2266 * Check that the mode the file is being opened for is a 2267 * subset of the mode of the existing descriptor. 2268 */ 2269 FILE_LOCK(wfp); 2270 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { 2271 FILE_UNLOCK(wfp); 2272 FILEDESC_UNLOCK(fdp); 2273 return (EACCES); 2274 } 2275 fp = fdp->fd_ofiles[indx]; 2276 fdp->fd_ofiles[indx] = wfp; 2277 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2278 if (fp == NULL) 2279 fdused(fdp, indx); 2280 fhold_locked(wfp); 2281 FILE_UNLOCK(wfp); 2282 FILEDESC_UNLOCK(fdp); 2283 if (fp != NULL) { 2284 /* 2285 * We now own the reference to fp that the ofiles[] 2286 * array used to own. Release it. 2287 */ 2288 FILE_LOCK(fp); 2289 fdrop_locked(fp, td); 2290 } 2291 return (0); 2292 2293 case ENXIO: 2294 /* 2295 * Steal away the file pointer from dfd and stuff it into indx. 2296 */ 2297 fp = fdp->fd_ofiles[indx]; 2298 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 2299 fdp->fd_ofiles[dfd] = NULL; 2300 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2301 fdp->fd_ofileflags[dfd] = 0; 2302 fdunused(fdp, dfd); 2303 if (fp == NULL) 2304 fdused(fdp, indx); 2305 if (fp != NULL) 2306 FILE_LOCK(fp); 2307 2308 /* 2309 * We now own the reference to fp that the ofiles[] array 2310 * used to own. Release it. 2311 */ 2312 if (fp != NULL) 2313 fdrop_locked(fp, td); 2314 2315 FILEDESC_UNLOCK(fdp); 2316 2317 return (0); 2318 2319 default: 2320 FILEDESC_UNLOCK(fdp); 2321 return (error); 2322 } 2323 /* NOTREACHED */ 2324 } 2325 2326 /* 2327 * Scan all active processes to see if any of them have a current 2328 * or root directory of `olddp'. If so, replace them with the new 2329 * mount point. 2330 */ 2331 void 2332 mountcheckdirs(struct vnode *olddp, struct vnode *newdp) 2333 { 2334 struct filedesc *fdp; 2335 struct proc *p; 2336 int nrele; 2337 2338 if (vrefcnt(olddp) == 1) 2339 return; 2340 sx_slock(&allproc_lock); 2341 LIST_FOREACH(p, &allproc, p_list) { 2342 fdp = fdhold(p); 2343 if (fdp == NULL) 2344 continue; 2345 nrele = 0; 2346 FILEDESC_LOCK_FAST(fdp); 2347 if (fdp->fd_cdir == olddp) { 2348 vref(newdp); 2349 fdp->fd_cdir = newdp; 2350 nrele++; 2351 } 2352 if (fdp->fd_rdir == olddp) { 2353 vref(newdp); 2354 fdp->fd_rdir = newdp; 2355 nrele++; 2356 } 2357 FILEDESC_UNLOCK_FAST(fdp); 2358 fddrop(fdp); 2359 while (nrele--) 2360 vrele(olddp); 2361 } 2362 sx_sunlock(&allproc_lock); 2363 if (rootvnode == olddp) { 2364 vrele(rootvnode); 2365 vref(newdp); 2366 rootvnode = newdp; 2367 } 2368 } 2369 2370 struct filedesc_to_leader * 2371 filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader) 2372 { 2373 struct filedesc_to_leader *fdtol; 2374 2375 MALLOC(fdtol, struct filedesc_to_leader *, 2376 sizeof(struct filedesc_to_leader), 2377 M_FILEDESC_TO_LEADER, 2378 M_WAITOK); 2379 fdtol->fdl_refcount = 1; 2380 fdtol->fdl_holdcount = 0; 2381 fdtol->fdl_wakeup = 0; 2382 fdtol->fdl_leader = leader; 2383 if (old != NULL) { 2384 FILEDESC_LOCK(fdp); 2385 fdtol->fdl_next = old->fdl_next; 2386 fdtol->fdl_prev = old; 2387 old->fdl_next = fdtol; 2388 fdtol->fdl_next->fdl_prev = fdtol; 2389 FILEDESC_UNLOCK(fdp); 2390 } else { 2391 fdtol->fdl_next = fdtol; 2392 fdtol->fdl_prev = fdtol; 2393 } 2394 return (fdtol); 2395 } 2396 2397 /* 2398 * Get file structures. 2399 */ 2400 static int 2401 sysctl_kern_file(SYSCTL_HANDLER_ARGS) 2402 { 2403 struct xfile xf; 2404 struct filedesc *fdp; 2405 struct file *fp; 2406 struct proc *p; 2407 int error, n; 2408 2409 /* 2410 * Note: because the number of file descriptors is calculated 2411 * in different ways for sizing vs returning the data, 2412 * there is information leakage from the first loop. However, 2413 * it is of a similar order of magnitude to the leakage from 2414 * global system statistics such as kern.openfiles. 2415 */ 2416 error = sysctl_wire_old_buffer(req, 0); 2417 if (error != 0) 2418 return (error); 2419 if (req->oldptr == NULL) { 2420 n = 16; /* A slight overestimate. */ 2421 sx_slock(&filelist_lock); 2422 LIST_FOREACH(fp, &filehead, f_list) { 2423 /* 2424 * We should grab the lock, but this is an 2425 * estimate, so does it really matter? 2426 */ 2427 /* mtx_lock(fp->f_mtxp); */ 2428 n += fp->f_count; 2429 /* mtx_unlock(f->f_mtxp); */ 2430 } 2431 sx_sunlock(&filelist_lock); 2432 return (SYSCTL_OUT(req, 0, n * sizeof(xf))); 2433 } 2434 error = 0; 2435 bzero(&xf, sizeof(xf)); 2436 xf.xf_size = sizeof(xf); 2437 sx_slock(&allproc_lock); 2438 LIST_FOREACH(p, &allproc, p_list) { 2439 if (p->p_state == PRS_NEW) 2440 continue; 2441 PROC_LOCK(p); 2442 if (p_cansee(req->td, p) != 0) { 2443 PROC_UNLOCK(p); 2444 continue; 2445 } 2446 xf.xf_pid = p->p_pid; 2447 xf.xf_uid = p->p_ucred->cr_uid; 2448 PROC_UNLOCK(p); 2449 fdp = fdhold(p); 2450 if (fdp == NULL) 2451 continue; 2452 FILEDESC_LOCK_FAST(fdp); 2453 for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) { 2454 if ((fp = fdp->fd_ofiles[n]) == NULL) 2455 continue; 2456 xf.xf_fd = n; 2457 xf.xf_file = fp; 2458 xf.xf_data = fp->f_data; 2459 xf.xf_vnode = fp->f_vnode; 2460 xf.xf_type = fp->f_type; 2461 xf.xf_count = fp->f_count; 2462 xf.xf_msgcount = fp->f_msgcount; 2463 xf.xf_offset = fp->f_offset; 2464 xf.xf_flag = fp->f_flag; 2465 error = SYSCTL_OUT(req, &xf, sizeof(xf)); 2466 if (error) 2467 break; 2468 } 2469 FILEDESC_UNLOCK_FAST(fdp); 2470 fddrop(fdp); 2471 if (error) 2472 break; 2473 } 2474 sx_sunlock(&allproc_lock); 2475 return (error); 2476 } 2477 2478 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, 2479 0, 0, sysctl_kern_file, "S,xfile", "Entire file table"); 2480 2481 #ifdef DDB 2482 /* 2483 * For the purposes of debugging, generate a human-readable string for the 2484 * file type. 2485 */ 2486 static const char * 2487 file_type_to_name(short type) 2488 { 2489 2490 switch (type) { 2491 case 0: 2492 return ("zero"); 2493 case DTYPE_VNODE: 2494 return ("vnod"); 2495 case DTYPE_SOCKET: 2496 return ("sock"); 2497 case DTYPE_PIPE: 2498 return ("pipe"); 2499 case DTYPE_FIFO: 2500 return ("fifo"); 2501 case DTYPE_CRYPTO: 2502 return ("crpt"); 2503 default: 2504 return ("unkn"); 2505 } 2506 } 2507 2508 /* 2509 * For the purposes of debugging, identify a process (if any, perhaps one of 2510 * many) that references the passed file in its file descriptor array. Return 2511 * NULL if none. 2512 */ 2513 static struct proc * 2514 file_to_first_proc(struct file *fp) 2515 { 2516 struct filedesc *fdp; 2517 struct proc *p; 2518 int n; 2519 2520 LIST_FOREACH(p, &allproc, p_list) { 2521 if (p->p_state == PRS_NEW) 2522 continue; 2523 fdp = p->p_fd; 2524 if (fdp == NULL) 2525 continue; 2526 for (n = 0; n < fdp->fd_nfiles; n++) { 2527 if (fp == fdp->fd_ofiles[n]) 2528 return (p); 2529 } 2530 } 2531 return (NULL); 2532 } 2533 2534 DB_SHOW_COMMAND(files, db_show_files) 2535 { 2536 struct file *fp; 2537 struct proc *p; 2538 2539 db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n", "File", 2540 "Type", "Data", "Flag", "GCFl", "Count", "MCount", "Vnode", 2541 "FPID", "FCmd"); 2542 LIST_FOREACH(fp, &filehead, f_list) { 2543 p = file_to_first_proc(fp); 2544 db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp, 2545 file_type_to_name(fp->f_type), fp->f_data, fp->f_flag, 2546 fp->f_gcflag, fp->f_count, fp->f_msgcount, fp->f_vnode, 2547 p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-"); 2548 } 2549 } 2550 #endif 2551 2552 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, 2553 &maxfilesperproc, 0, "Maximum files allowed open per process"); 2554 2555 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, 2556 &maxfiles, 0, "Maximum number of files"); 2557 2558 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, 2559 &openfiles, 0, "System-wide number of open files"); 2560 2561 /* ARGSUSED*/ 2562 static void 2563 filelistinit(void *dummy) 2564 { 2565 2566 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, 2567 NULL, NULL, UMA_ALIGN_PTR, 0); 2568 sx_init(&filelist_lock, "filelist lock"); 2569 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); 2570 mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF); 2571 } 2572 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL) 2573 2574 /*-------------------------------------------------------------------*/ 2575 2576 static int 2577 badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) 2578 { 2579 2580 return (EBADF); 2581 } 2582 2583 static int 2584 badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td) 2585 { 2586 2587 return (EBADF); 2588 } 2589 2590 static int 2591 badfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) 2592 { 2593 2594 return (0); 2595 } 2596 2597 static int 2598 badfo_kqfilter(struct file *fp, struct knote *kn) 2599 { 2600 2601 return (0); 2602 } 2603 2604 static int 2605 badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td) 2606 { 2607 2608 return (EBADF); 2609 } 2610 2611 static int 2612 badfo_close(struct file *fp, struct thread *td) 2613 { 2614 2615 return (EBADF); 2616 } 2617 2618 struct fileops badfileops = { 2619 .fo_read = badfo_readwrite, 2620 .fo_write = badfo_readwrite, 2621 .fo_ioctl = badfo_ioctl, 2622 .fo_poll = badfo_poll, 2623 .fo_kqfilter = badfo_kqfilter, 2624 .fo_stat = badfo_stat, 2625 .fo_close = badfo_close, 2626 }; 2627 2628 2629 /*-------------------------------------------------------------------*/ 2630 2631 /* 2632 * File Descriptor pseudo-device driver (/dev/fd/). 2633 * 2634 * Opening minor device N dup()s the file (if any) connected to file 2635 * descriptor N belonging to the calling process. Note that this driver 2636 * consists of only the ``open()'' routine, because all subsequent 2637 * references to this file will be direct to the other driver. 2638 * 2639 * XXX: we could give this one a cloning event handler if necessary. 2640 */ 2641 2642 /* ARGSUSED */ 2643 static int 2644 fdopen(struct cdev *dev, int mode, int type, struct thread *td) 2645 { 2646 2647 /* 2648 * XXX Kludge: set curthread->td_dupfd to contain the value of the 2649 * the file descriptor being sought for duplication. The error 2650 * return ensures that the vnode for this device will be released 2651 * by vn_open. Open will detect this special error and take the 2652 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 2653 * will simply report the error. 2654 */ 2655 td->td_dupfd = dev2unit(dev); 2656 return (ENODEV); 2657 } 2658 2659 static struct cdevsw fildesc_cdevsw = { 2660 .d_version = D_VERSION, 2661 .d_flags = D_NEEDGIANT, 2662 .d_open = fdopen, 2663 .d_name = "FD", 2664 }; 2665 2666 static void 2667 fildesc_drvinit(void *unused) 2668 { 2669 struct cdev *dev; 2670 2671 dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0"); 2672 make_dev_alias(dev, "stdin"); 2673 dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1"); 2674 make_dev_alias(dev, "stdout"); 2675 dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2"); 2676 make_dev_alias(dev, "stderr"); 2677 } 2678 2679 SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL) 2680