1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_compat.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 45 #include <sys/conf.h> 46 #include <sys/fcntl.h> 47 #include <sys/file.h> 48 #include <sys/filedesc.h> 49 #include <sys/filio.h> 50 #include <sys/jail.h> 51 #include <sys/kernel.h> 52 #include <sys/limits.h> 53 #include <sys/lock.h> 54 #include <sys/malloc.h> 55 #include <sys/mount.h> 56 #include <sys/mutex.h> 57 #include <sys/namei.h> 58 #include <sys/proc.h> 59 #include <sys/resourcevar.h> 60 #include <sys/signalvar.h> 61 #include <sys/socketvar.h> 62 #include <sys/stat.h> 63 #include <sys/sx.h> 64 #include <sys/syscallsubr.h> 65 #include <sys/sysctl.h> 66 #include <sys/sysproto.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 70 #include <vm/uma.h> 71 72 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table"); 73 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "file desc to leader", 74 "file desc to leader structures"); 75 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); 76 77 static uma_zone_t file_zone; 78 79 80 /* How to treat 'new' parameter when allocating a fd for do_dup(). */ 81 enum dup_type { DUP_VARIABLE, DUP_FIXED }; 82 83 static int do_dup(struct thread *td, enum dup_type type, int old, int new, 84 register_t *retval); 85 static int fd_first_free(struct filedesc *, int, int); 86 static int fd_last_used(struct filedesc *, int, int); 87 static void fdgrowtable(struct filedesc *, int); 88 static void fdunused(struct filedesc *fdp, int fd); 89 90 /* 91 * A process is initially started out with NDFILE descriptors stored within 92 * this structure, selected to be enough for typical applications based on 93 * the historical limit of 20 open files (and the usage of descriptors by 94 * shells). If these descriptors are exhausted, a larger descriptor table 95 * may be allocated, up to a process' resource limit; the internal arrays 96 * are then unused. 97 */ 98 #define NDFILE 20 99 #define NDSLOTSIZE sizeof(NDSLOTTYPE) 100 #define NDENTRIES (NDSLOTSIZE * __CHAR_BIT) 101 #define NDSLOT(x) ((x) / NDENTRIES) 102 #define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES)) 103 #define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES) 104 105 /* 106 * Storage required per open file descriptor. 107 */ 108 #define OFILESIZE (sizeof(struct file *) + sizeof(char)) 109 110 /* 111 * Basic allocation of descriptors: 112 * one of the above, plus arrays for NDFILE descriptors. 113 */ 114 struct filedesc0 { 115 struct filedesc fd_fd; 116 /* 117 * These arrays are used when the number of open files is 118 * <= NDFILE, and are then pointed to by the pointers above. 119 */ 120 struct file *fd_dfiles[NDFILE]; 121 char fd_dfileflags[NDFILE]; 122 NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; 123 }; 124 125 /* 126 * Descriptor management. 127 */ 128 struct filelist filehead; /* head of list of open files */ 129 int openfiles; /* actual number of open files */ 130 struct sx filelist_lock; /* sx to protect filelist */ 131 struct mtx sigio_lock; /* mtx to protect pointers to sigio */ 132 133 /* A mutex to protect the association between a proc and filedesc. */ 134 static struct mtx fdesc_mtx; 135 136 /* 137 * Find the first zero bit in the given bitmap, starting at low and not 138 * exceeding size - 1. 139 */ 140 static int 141 fd_first_free(struct filedesc *fdp, int low, int size) 142 { 143 NDSLOTTYPE *map = fdp->fd_map; 144 NDSLOTTYPE mask; 145 int off, maxoff; 146 147 if (low >= size) 148 return (low); 149 150 off = NDSLOT(low); 151 if (low % NDENTRIES) { 152 mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES))); 153 if ((mask &= ~map[off]) != 0UL) 154 return (off * NDENTRIES + ffsl(mask) - 1); 155 ++off; 156 } 157 for (maxoff = NDSLOTS(size); off < maxoff; ++off) 158 if (map[off] != ~0UL) 159 return (off * NDENTRIES + ffsl(~map[off]) - 1); 160 return (size); 161 } 162 163 /* 164 * Find the highest non-zero bit in the given bitmap, starting at low and 165 * not exceeding size - 1. 166 */ 167 static int 168 fd_last_used(struct filedesc *fdp, int low, int size) 169 { 170 NDSLOTTYPE *map = fdp->fd_map; 171 NDSLOTTYPE mask; 172 int off, minoff; 173 174 if (low >= size) 175 return (-1); 176 177 off = NDSLOT(size); 178 if (size % NDENTRIES) { 179 mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES)); 180 if ((mask &= map[off]) != 0) 181 return (off * NDENTRIES + flsl(mask) - 1); 182 --off; 183 } 184 for (minoff = NDSLOT(low); off >= minoff; --off) 185 if (map[off] != 0) 186 return (off * NDENTRIES + flsl(map[off]) - 1); 187 return (size - 1); 188 } 189 190 static int 191 fdisused(struct filedesc *fdp, int fd) 192 { 193 KASSERT(fd >= 0 && fd < fdp->fd_nfiles, 194 ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles)); 195 return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0); 196 } 197 198 /* 199 * Mark a file descriptor as used. 200 */ 201 void 202 fdused(struct filedesc *fdp, int fd) 203 { 204 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 205 KASSERT(!fdisused(fdp, fd), 206 ("fd already used")); 207 fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd); 208 if (fd > fdp->fd_lastfile) 209 fdp->fd_lastfile = fd; 210 if (fd == fdp->fd_freefile) 211 fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles); 212 } 213 214 /* 215 * Mark a file descriptor as unused. 216 */ 217 static void 218 fdunused(struct filedesc *fdp, int fd) 219 { 220 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 221 KASSERT(fdisused(fdp, fd), 222 ("fd is already unused")); 223 KASSERT(fdp->fd_ofiles[fd] == NULL, 224 ("fd is still in use")); 225 fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); 226 if (fd < fdp->fd_freefile) 227 fdp->fd_freefile = fd; 228 if (fd == fdp->fd_lastfile) 229 fdp->fd_lastfile = fd_last_used(fdp, 0, fd); 230 } 231 232 /* 233 * System calls on descriptors. 234 */ 235 #ifndef _SYS_SYSPROTO_H_ 236 struct getdtablesize_args { 237 int dummy; 238 }; 239 #endif 240 /* 241 * MPSAFE 242 */ 243 /* ARGSUSED */ 244 int 245 getdtablesize(struct thread *td, struct getdtablesize_args *uap) 246 { 247 struct proc *p = td->td_proc; 248 249 PROC_LOCK(p); 250 td->td_retval[0] = 251 min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 252 PROC_UNLOCK(p); 253 return (0); 254 } 255 256 /* 257 * Duplicate a file descriptor to a particular value. 258 * 259 * note: keep in mind that a potential race condition exists when closing 260 * descriptors from a shared descriptor table (via rfork). 261 */ 262 #ifndef _SYS_SYSPROTO_H_ 263 struct dup2_args { 264 u_int from; 265 u_int to; 266 }; 267 #endif 268 /* 269 * MPSAFE 270 */ 271 /* ARGSUSED */ 272 int 273 dup2(struct thread *td, struct dup2_args *uap) 274 { 275 276 return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to, 277 td->td_retval)); 278 } 279 280 /* 281 * Duplicate a file descriptor. 282 */ 283 #ifndef _SYS_SYSPROTO_H_ 284 struct dup_args { 285 u_int fd; 286 }; 287 #endif 288 /* 289 * MPSAFE 290 */ 291 /* ARGSUSED */ 292 int 293 dup(struct thread *td, struct dup_args *uap) 294 { 295 296 return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval)); 297 } 298 299 /* 300 * The file control system call. 301 */ 302 #ifndef _SYS_SYSPROTO_H_ 303 struct fcntl_args { 304 int fd; 305 int cmd; 306 long arg; 307 }; 308 #endif 309 /* 310 * MPSAFE 311 */ 312 /* ARGSUSED */ 313 int 314 fcntl(struct thread *td, struct fcntl_args *uap) 315 { 316 struct flock fl; 317 intptr_t arg; 318 int error; 319 320 error = 0; 321 switch (uap->cmd) { 322 case F_GETLK: 323 case F_SETLK: 324 case F_SETLKW: 325 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl)); 326 arg = (intptr_t)&fl; 327 break; 328 default: 329 arg = uap->arg; 330 break; 331 } 332 if (error) 333 return (error); 334 error = kern_fcntl(td, uap->fd, uap->cmd, arg); 335 if (error) 336 return (error); 337 if (uap->cmd == F_GETLK) 338 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl)); 339 return (error); 340 } 341 342 int 343 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) 344 { 345 struct filedesc *fdp; 346 struct flock *flp; 347 struct file *fp; 348 struct proc *p; 349 char *pop; 350 struct vnode *vp; 351 u_int newmin; 352 int error, flg, tmp; 353 int giant_locked; 354 355 /* 356 * XXXRW: Some fcntl() calls require Giant -- others don't. Try to 357 * avoid grabbing Giant for calls we know don't need it. 358 */ 359 switch (cmd) { 360 case F_DUPFD: 361 case F_GETFD: 362 case F_SETFD: 363 case F_GETFL: 364 giant_locked = 0; 365 break; 366 367 default: 368 giant_locked = 1; 369 mtx_lock(&Giant); 370 } 371 372 error = 0; 373 flg = F_POSIX; 374 p = td->td_proc; 375 fdp = p->p_fd; 376 FILEDESC_LOCK(fdp); 377 if ((unsigned)fd >= fdp->fd_nfiles || 378 (fp = fdp->fd_ofiles[fd]) == NULL) { 379 FILEDESC_UNLOCK(fdp); 380 error = EBADF; 381 goto done2; 382 } 383 pop = &fdp->fd_ofileflags[fd]; 384 385 switch (cmd) { 386 case F_DUPFD: 387 /* mtx_assert(&Giant, MA_NOTOWNED); */ 388 FILEDESC_UNLOCK(fdp); 389 newmin = arg; 390 PROC_LOCK(p); 391 if (newmin >= lim_cur(p, RLIMIT_NOFILE) || 392 newmin >= maxfilesperproc) { 393 PROC_UNLOCK(p); 394 error = EINVAL; 395 break; 396 } 397 PROC_UNLOCK(p); 398 error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval); 399 break; 400 401 case F_GETFD: 402 /* mtx_assert(&Giant, MA_NOTOWNED); */ 403 td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; 404 FILEDESC_UNLOCK(fdp); 405 break; 406 407 case F_SETFD: 408 /* mtx_assert(&Giant, MA_NOTOWNED); */ 409 *pop = (*pop &~ UF_EXCLOSE) | 410 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); 411 FILEDESC_UNLOCK(fdp); 412 break; 413 414 case F_GETFL: 415 /* mtx_assert(&Giant, MA_NOTOWNED); */ 416 FILE_LOCK(fp); 417 td->td_retval[0] = OFLAGS(fp->f_flag); 418 FILE_UNLOCK(fp); 419 FILEDESC_UNLOCK(fdp); 420 break; 421 422 case F_SETFL: 423 mtx_assert(&Giant, MA_OWNED); 424 FILE_LOCK(fp); 425 fhold_locked(fp); 426 fp->f_flag &= ~FCNTLFLAGS; 427 fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; 428 FILE_UNLOCK(fp); 429 FILEDESC_UNLOCK(fdp); 430 tmp = fp->f_flag & FNONBLOCK; 431 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 432 if (error) { 433 fdrop(fp, td); 434 break; 435 } 436 tmp = fp->f_flag & FASYNC; 437 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); 438 if (error == 0) { 439 fdrop(fp, td); 440 break; 441 } 442 FILE_LOCK(fp); 443 fp->f_flag &= ~FNONBLOCK; 444 FILE_UNLOCK(fp); 445 tmp = 0; 446 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 447 fdrop(fp, td); 448 break; 449 450 case F_GETOWN: 451 mtx_assert(&Giant, MA_OWNED); 452 fhold(fp); 453 FILEDESC_UNLOCK(fdp); 454 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); 455 if (error == 0) 456 td->td_retval[0] = tmp; 457 fdrop(fp, td); 458 break; 459 460 case F_SETOWN: 461 mtx_assert(&Giant, MA_OWNED); 462 fhold(fp); 463 FILEDESC_UNLOCK(fdp); 464 tmp = arg; 465 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); 466 fdrop(fp, td); 467 break; 468 469 case F_SETLKW: 470 mtx_assert(&Giant, MA_OWNED); 471 flg |= F_WAIT; 472 /* FALLTHROUGH F_SETLK */ 473 474 case F_SETLK: 475 mtx_assert(&Giant, MA_OWNED); 476 if (fp->f_type != DTYPE_VNODE) { 477 FILEDESC_UNLOCK(fdp); 478 error = EBADF; 479 break; 480 } 481 482 flp = (struct flock *)arg; 483 if (flp->l_whence == SEEK_CUR) { 484 if (fp->f_offset < 0 || 485 (flp->l_start > 0 && 486 fp->f_offset > OFF_MAX - flp->l_start)) { 487 FILEDESC_UNLOCK(fdp); 488 error = EOVERFLOW; 489 break; 490 } 491 flp->l_start += fp->f_offset; 492 } 493 494 /* 495 * VOP_ADVLOCK() may block. 496 */ 497 fhold(fp); 498 FILEDESC_UNLOCK(fdp); 499 vp = fp->f_vnode; 500 501 switch (flp->l_type) { 502 case F_RDLCK: 503 if ((fp->f_flag & FREAD) == 0) { 504 error = EBADF; 505 break; 506 } 507 PROC_LOCK(p->p_leader); 508 p->p_leader->p_flag |= P_ADVLOCK; 509 PROC_UNLOCK(p->p_leader); 510 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 511 flp, flg); 512 break; 513 case F_WRLCK: 514 if ((fp->f_flag & FWRITE) == 0) { 515 error = EBADF; 516 break; 517 } 518 PROC_LOCK(p->p_leader); 519 p->p_leader->p_flag |= P_ADVLOCK; 520 PROC_UNLOCK(p->p_leader); 521 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 522 flp, flg); 523 break; 524 case F_UNLCK: 525 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, 526 flp, F_POSIX); 527 break; 528 default: 529 error = EINVAL; 530 break; 531 } 532 /* Check for race with close */ 533 FILEDESC_LOCK_FAST(fdp); 534 if ((unsigned) fd >= fdp->fd_nfiles || 535 fp != fdp->fd_ofiles[fd]) { 536 FILEDESC_UNLOCK_FAST(fdp); 537 flp->l_whence = SEEK_SET; 538 flp->l_start = 0; 539 flp->l_len = 0; 540 flp->l_type = F_UNLCK; 541 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 542 F_UNLCK, flp, F_POSIX); 543 } else 544 FILEDESC_UNLOCK_FAST(fdp); 545 fdrop(fp, td); 546 break; 547 548 case F_GETLK: 549 mtx_assert(&Giant, MA_OWNED); 550 if (fp->f_type != DTYPE_VNODE) { 551 FILEDESC_UNLOCK(fdp); 552 error = EBADF; 553 break; 554 } 555 flp = (struct flock *)arg; 556 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && 557 flp->l_type != F_UNLCK) { 558 FILEDESC_UNLOCK(fdp); 559 error = EINVAL; 560 break; 561 } 562 if (flp->l_whence == SEEK_CUR) { 563 if ((flp->l_start > 0 && 564 fp->f_offset > OFF_MAX - flp->l_start) || 565 (flp->l_start < 0 && 566 fp->f_offset < OFF_MIN - flp->l_start)) { 567 FILEDESC_UNLOCK(fdp); 568 error = EOVERFLOW; 569 break; 570 } 571 flp->l_start += fp->f_offset; 572 } 573 /* 574 * VOP_ADVLOCK() may block. 575 */ 576 fhold(fp); 577 FILEDESC_UNLOCK(fdp); 578 vp = fp->f_vnode; 579 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, 580 F_POSIX); 581 fdrop(fp, td); 582 break; 583 default: 584 FILEDESC_UNLOCK(fdp); 585 error = EINVAL; 586 break; 587 } 588 done2: 589 if (giant_locked) 590 mtx_unlock(&Giant); 591 return (error); 592 } 593 594 /* 595 * Common code for dup, dup2, and fcntl(F_DUPFD). 596 */ 597 static int 598 do_dup(struct thread *td, enum dup_type type, int old, int new, register_t *retval) 599 { 600 struct filedesc *fdp; 601 struct proc *p; 602 struct file *fp; 603 struct file *delfp; 604 int error, holdleaders, maxfd; 605 606 KASSERT((type == DUP_VARIABLE || type == DUP_FIXED), 607 ("invalid dup type %d", type)); 608 609 p = td->td_proc; 610 fdp = p->p_fd; 611 612 /* 613 * Verify we have a valid descriptor to dup from and possibly to 614 * dup to. 615 */ 616 if (old < 0 || new < 0) 617 return (EBADF); 618 PROC_LOCK(p); 619 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 620 PROC_UNLOCK(p); 621 if (new >= maxfd) 622 return (EMFILE); 623 624 FILEDESC_LOCK(fdp); 625 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) { 626 FILEDESC_UNLOCK(fdp); 627 return (EBADF); 628 } 629 if (type == DUP_FIXED && old == new) { 630 *retval = new; 631 FILEDESC_UNLOCK(fdp); 632 return (0); 633 } 634 fp = fdp->fd_ofiles[old]; 635 fhold(fp); 636 637 /* 638 * If the caller specified a file descriptor, make sure the file 639 * table is large enough to hold it, and grab it. Otherwise, just 640 * allocate a new descriptor the usual way. Since the filedesc 641 * lock may be temporarily dropped in the process, we have to look 642 * out for a race. 643 */ 644 if (type == DUP_FIXED) { 645 if (new >= fdp->fd_nfiles) 646 fdgrowtable(fdp, new + 1); 647 if (fdp->fd_ofiles[new] == NULL) 648 fdused(fdp, new); 649 } else { 650 if ((error = fdalloc(td, new, &new)) != 0) { 651 FILEDESC_UNLOCK(fdp); 652 fdrop(fp, td); 653 return (error); 654 } 655 } 656 657 /* 658 * If the old file changed out from under us then treat it as a 659 * bad file descriptor. Userland should do its own locking to 660 * avoid this case. 661 */ 662 if (fdp->fd_ofiles[old] != fp) { 663 /* we've allocated a descriptor which we won't use */ 664 if (fdp->fd_ofiles[new] == NULL) 665 fdunused(fdp, new); 666 FILEDESC_UNLOCK(fdp); 667 fdrop(fp, td); 668 return (EBADF); 669 } 670 KASSERT(old != new, 671 ("new fd is same as old")); 672 673 /* 674 * Save info on the descriptor being overwritten. We cannot close 675 * it without introducing an ownership race for the slot, since we 676 * need to drop the filedesc lock to call closef(). 677 * 678 * XXX this duplicates parts of close(). 679 */ 680 delfp = fdp->fd_ofiles[new]; 681 holdleaders = 0; 682 if (delfp != NULL) { 683 if (td->td_proc->p_fdtol != NULL) { 684 /* 685 * Ask fdfree() to sleep to ensure that all relevant 686 * process leaders can be traversed in closef(). 687 */ 688 fdp->fd_holdleaderscount++; 689 holdleaders = 1; 690 } 691 } 692 693 /* 694 * Duplicate the source descriptor 695 */ 696 fdp->fd_ofiles[new] = fp; 697 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; 698 if (new > fdp->fd_lastfile) 699 fdp->fd_lastfile = new; 700 *retval = new; 701 702 /* 703 * If we dup'd over a valid file, we now own the reference to it 704 * and must dispose of it using closef() semantics (as if a 705 * close() were performed on it). 706 * 707 * XXX this duplicates parts of close(). 708 */ 709 if (delfp != NULL) { 710 knote_fdclose(td, new); 711 FILEDESC_UNLOCK(fdp); 712 (void) closef(delfp, td); 713 if (holdleaders) { 714 FILEDESC_LOCK_FAST(fdp); 715 fdp->fd_holdleaderscount--; 716 if (fdp->fd_holdleaderscount == 0 && 717 fdp->fd_holdleaderswakeup != 0) { 718 fdp->fd_holdleaderswakeup = 0; 719 wakeup(&fdp->fd_holdleaderscount); 720 } 721 FILEDESC_UNLOCK_FAST(fdp); 722 } 723 } else { 724 FILEDESC_UNLOCK(fdp); 725 } 726 return (0); 727 } 728 729 /* 730 * If sigio is on the list associated with a process or process group, 731 * disable signalling from the device, remove sigio from the list and 732 * free sigio. 733 */ 734 void 735 funsetown(struct sigio **sigiop) 736 { 737 struct sigio *sigio; 738 739 SIGIO_LOCK(); 740 sigio = *sigiop; 741 if (sigio == NULL) { 742 SIGIO_UNLOCK(); 743 return; 744 } 745 *(sigio->sio_myref) = NULL; 746 if ((sigio)->sio_pgid < 0) { 747 struct pgrp *pg = (sigio)->sio_pgrp; 748 PGRP_LOCK(pg); 749 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, 750 sigio, sio_pgsigio); 751 PGRP_UNLOCK(pg); 752 } else { 753 struct proc *p = (sigio)->sio_proc; 754 PROC_LOCK(p); 755 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, 756 sigio, sio_pgsigio); 757 PROC_UNLOCK(p); 758 } 759 SIGIO_UNLOCK(); 760 crfree(sigio->sio_ucred); 761 FREE(sigio, M_SIGIO); 762 } 763 764 /* 765 * Free a list of sigio structures. 766 * We only need to lock the SIGIO_LOCK because we have made ourselves 767 * inaccessable to callers of fsetown and therefore do not need to lock 768 * the proc or pgrp struct for the list manipulation. 769 */ 770 void 771 funsetownlst(struct sigiolst *sigiolst) 772 { 773 struct proc *p; 774 struct pgrp *pg; 775 struct sigio *sigio; 776 777 sigio = SLIST_FIRST(sigiolst); 778 if (sigio == NULL) 779 return; 780 p = NULL; 781 pg = NULL; 782 783 /* 784 * Every entry of the list should belong 785 * to a single proc or pgrp. 786 */ 787 if (sigio->sio_pgid < 0) { 788 pg = sigio->sio_pgrp; 789 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED); 790 } else /* if (sigio->sio_pgid > 0) */ { 791 p = sigio->sio_proc; 792 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 793 } 794 795 SIGIO_LOCK(); 796 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) { 797 *(sigio->sio_myref) = NULL; 798 if (pg != NULL) { 799 KASSERT(sigio->sio_pgid < 0, 800 ("Proc sigio in pgrp sigio list")); 801 KASSERT(sigio->sio_pgrp == pg, 802 ("Bogus pgrp in sigio list")); 803 PGRP_LOCK(pg); 804 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, 805 sio_pgsigio); 806 PGRP_UNLOCK(pg); 807 } else /* if (p != NULL) */ { 808 KASSERT(sigio->sio_pgid > 0, 809 ("Pgrp sigio in proc sigio list")); 810 KASSERT(sigio->sio_proc == p, 811 ("Bogus proc in sigio list")); 812 PROC_LOCK(p); 813 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, 814 sio_pgsigio); 815 PROC_UNLOCK(p); 816 } 817 SIGIO_UNLOCK(); 818 crfree(sigio->sio_ucred); 819 FREE(sigio, M_SIGIO); 820 SIGIO_LOCK(); 821 } 822 SIGIO_UNLOCK(); 823 } 824 825 /* 826 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). 827 * 828 * After permission checking, add a sigio structure to the sigio list for 829 * the process or process group. 830 */ 831 int 832 fsetown(pid_t pgid, struct sigio **sigiop) 833 { 834 struct proc *proc; 835 struct pgrp *pgrp; 836 struct sigio *sigio; 837 int ret; 838 839 if (pgid == 0) { 840 funsetown(sigiop); 841 return (0); 842 } 843 844 ret = 0; 845 846 /* Allocate and fill in the new sigio out of locks. */ 847 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK); 848 sigio->sio_pgid = pgid; 849 sigio->sio_ucred = crhold(curthread->td_ucred); 850 sigio->sio_myref = sigiop; 851 852 sx_slock(&proctree_lock); 853 if (pgid > 0) { 854 proc = pfind(pgid); 855 if (proc == NULL) { 856 ret = ESRCH; 857 goto fail; 858 } 859 860 /* 861 * Policy - Don't allow a process to FSETOWN a process 862 * in another session. 863 * 864 * Remove this test to allow maximum flexibility or 865 * restrict FSETOWN to the current process or process 866 * group for maximum safety. 867 */ 868 PROC_UNLOCK(proc); 869 if (proc->p_session != curthread->td_proc->p_session) { 870 ret = EPERM; 871 goto fail; 872 } 873 874 pgrp = NULL; 875 } else /* if (pgid < 0) */ { 876 pgrp = pgfind(-pgid); 877 if (pgrp == NULL) { 878 ret = ESRCH; 879 goto fail; 880 } 881 PGRP_UNLOCK(pgrp); 882 883 /* 884 * Policy - Don't allow a process to FSETOWN a process 885 * in another session. 886 * 887 * Remove this test to allow maximum flexibility or 888 * restrict FSETOWN to the current process or process 889 * group for maximum safety. 890 */ 891 if (pgrp->pg_session != curthread->td_proc->p_session) { 892 ret = EPERM; 893 goto fail; 894 } 895 896 proc = NULL; 897 } 898 funsetown(sigiop); 899 if (pgid > 0) { 900 PROC_LOCK(proc); 901 /* 902 * Since funsetownlst() is called without the proctree 903 * locked, we need to check for P_WEXIT. 904 * XXX: is ESRCH correct? 905 */ 906 if ((proc->p_flag & P_WEXIT) != 0) { 907 PROC_UNLOCK(proc); 908 ret = ESRCH; 909 goto fail; 910 } 911 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); 912 sigio->sio_proc = proc; 913 PROC_UNLOCK(proc); 914 } else { 915 PGRP_LOCK(pgrp); 916 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); 917 sigio->sio_pgrp = pgrp; 918 PGRP_UNLOCK(pgrp); 919 } 920 sx_sunlock(&proctree_lock); 921 SIGIO_LOCK(); 922 *sigiop = sigio; 923 SIGIO_UNLOCK(); 924 return (0); 925 926 fail: 927 sx_sunlock(&proctree_lock); 928 crfree(sigio->sio_ucred); 929 FREE(sigio, M_SIGIO); 930 return (ret); 931 } 932 933 /* 934 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). 935 */ 936 pid_t 937 fgetown(sigiop) 938 struct sigio **sigiop; 939 { 940 pid_t pgid; 941 942 SIGIO_LOCK(); 943 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0; 944 SIGIO_UNLOCK(); 945 return (pgid); 946 } 947 948 /* 949 * Close a file descriptor. 950 */ 951 #ifndef _SYS_SYSPROTO_H_ 952 struct close_args { 953 int fd; 954 }; 955 #endif 956 /* 957 * MPSAFE 958 */ 959 /* ARGSUSED */ 960 int 961 close(td, uap) 962 struct thread *td; 963 struct close_args *uap; 964 { 965 struct filedesc *fdp; 966 struct file *fp; 967 int fd, error; 968 int holdleaders; 969 970 fd = uap->fd; 971 error = 0; 972 holdleaders = 0; 973 fdp = td->td_proc->p_fd; 974 FILEDESC_LOCK(fdp); 975 if ((unsigned)fd >= fdp->fd_nfiles || 976 (fp = fdp->fd_ofiles[fd]) == NULL) { 977 FILEDESC_UNLOCK(fdp); 978 return (EBADF); 979 } 980 fdp->fd_ofiles[fd] = NULL; 981 fdp->fd_ofileflags[fd] = 0; 982 fdunused(fdp, fd); 983 if (td->td_proc->p_fdtol != NULL) { 984 /* 985 * Ask fdfree() to sleep to ensure that all relevant 986 * process leaders can be traversed in closef(). 987 */ 988 fdp->fd_holdleaderscount++; 989 holdleaders = 1; 990 } 991 992 /* 993 * we now hold the fp reference that used to be owned by the descriptor 994 * array. 995 * We have to unlock the FILEDESC *AFTER* knote_fdclose to prevent a 996 * race of the fd getting opened, a knote added, and deleteing a knote 997 * for the new fd. 998 */ 999 knote_fdclose(td, fd); 1000 FILEDESC_UNLOCK(fdp); 1001 1002 error = closef(fp, td); 1003 if (holdleaders) { 1004 FILEDESC_LOCK_FAST(fdp); 1005 fdp->fd_holdleaderscount--; 1006 if (fdp->fd_holdleaderscount == 0 && 1007 fdp->fd_holdleaderswakeup != 0) { 1008 fdp->fd_holdleaderswakeup = 0; 1009 wakeup(&fdp->fd_holdleaderscount); 1010 } 1011 FILEDESC_UNLOCK_FAST(fdp); 1012 } 1013 return (error); 1014 } 1015 1016 #if defined(COMPAT_43) 1017 /* 1018 * Return status information about a file descriptor. 1019 */ 1020 #ifndef _SYS_SYSPROTO_H_ 1021 struct ofstat_args { 1022 int fd; 1023 struct ostat *sb; 1024 }; 1025 #endif 1026 /* 1027 * MPSAFE 1028 */ 1029 /* ARGSUSED */ 1030 int 1031 ofstat(struct thread *td, struct ofstat_args *uap) 1032 { 1033 struct ostat oub; 1034 struct stat ub; 1035 int error; 1036 1037 error = kern_fstat(td, uap->fd, &ub); 1038 if (error == 0) { 1039 cvtstat(&ub, &oub); 1040 error = copyout(&oub, uap->sb, sizeof(oub)); 1041 } 1042 return (error); 1043 } 1044 #endif /* COMPAT_43 */ 1045 1046 /* 1047 * Return status information about a file descriptor. 1048 */ 1049 #ifndef _SYS_SYSPROTO_H_ 1050 struct fstat_args { 1051 int fd; 1052 struct stat *sb; 1053 }; 1054 #endif 1055 /* 1056 * MPSAFE 1057 */ 1058 /* ARGSUSED */ 1059 int 1060 fstat(struct thread *td, struct fstat_args *uap) 1061 { 1062 struct stat ub; 1063 int error; 1064 1065 error = kern_fstat(td, uap->fd, &ub); 1066 if (error == 0) 1067 error = copyout(&ub, uap->sb, sizeof(ub)); 1068 return (error); 1069 } 1070 1071 int 1072 kern_fstat(struct thread *td, int fd, struct stat *sbp) 1073 { 1074 struct file *fp; 1075 int error; 1076 1077 if ((error = fget(td, fd, &fp)) != 0) 1078 return (error); 1079 error = fo_stat(fp, sbp, td->td_ucred, td); 1080 fdrop(fp, td); 1081 return (error); 1082 } 1083 1084 /* 1085 * Return status information about a file descriptor. 1086 */ 1087 #ifndef _SYS_SYSPROTO_H_ 1088 struct nfstat_args { 1089 int fd; 1090 struct nstat *sb; 1091 }; 1092 #endif 1093 /* 1094 * MPSAFE 1095 */ 1096 /* ARGSUSED */ 1097 int 1098 nfstat(struct thread *td, struct nfstat_args *uap) 1099 { 1100 struct nstat nub; 1101 struct stat ub; 1102 int error; 1103 1104 error = kern_fstat(td, uap->fd, &ub); 1105 if (error == 0) { 1106 cvtnstat(&ub, &nub); 1107 error = copyout(&nub, uap->sb, sizeof(nub)); 1108 } 1109 return (error); 1110 } 1111 1112 /* 1113 * Return pathconf information about a file descriptor. 1114 */ 1115 #ifndef _SYS_SYSPROTO_H_ 1116 struct fpathconf_args { 1117 int fd; 1118 int name; 1119 }; 1120 #endif 1121 /* 1122 * MPSAFE 1123 */ 1124 /* ARGSUSED */ 1125 int 1126 fpathconf(struct thread *td, struct fpathconf_args *uap) 1127 { 1128 struct file *fp; 1129 struct vnode *vp; 1130 int error; 1131 1132 if ((error = fget(td, uap->fd, &fp)) != 0) 1133 return (error); 1134 1135 /* If asynchronous I/O is available, it works for all descriptors. */ 1136 if (uap->name == _PC_ASYNC_IO) { 1137 td->td_retval[0] = async_io_version; 1138 goto out; 1139 } 1140 vp = fp->f_vnode; 1141 if (vp != NULL) { 1142 int vfslocked; 1143 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1144 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1145 error = VOP_PATHCONF(vp, uap->name, td->td_retval); 1146 VOP_UNLOCK(vp, 0, td); 1147 VFS_UNLOCK_GIANT(vfslocked); 1148 } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1149 if (uap->name != _PC_PIPE_BUF) { 1150 error = EINVAL; 1151 } else { 1152 td->td_retval[0] = PIPE_BUF; 1153 error = 0; 1154 } 1155 } else { 1156 error = EOPNOTSUPP; 1157 } 1158 out: 1159 fdrop(fp, td); 1160 return (error); 1161 } 1162 1163 /* 1164 * Grow the file table to accomodate (at least) nfd descriptors. This may 1165 * block and drop the filedesc lock, but it will reacquire it before 1166 * returing. 1167 */ 1168 static void 1169 fdgrowtable(struct filedesc *fdp, int nfd) 1170 { 1171 struct file **ntable; 1172 char *nfileflags; 1173 int nnfiles, onfiles; 1174 NDSLOTTYPE *nmap; 1175 1176 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1177 1178 KASSERT(fdp->fd_nfiles > 0, 1179 ("zero-length file table")); 1180 1181 /* compute the size of the new table */ 1182 onfiles = fdp->fd_nfiles; 1183 nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */ 1184 if (nnfiles <= onfiles) 1185 /* the table is already large enough */ 1186 return; 1187 1188 /* allocate a new table and (if required) new bitmaps */ 1189 FILEDESC_UNLOCK(fdp); 1190 MALLOC(ntable, struct file **, nnfiles * OFILESIZE, 1191 M_FILEDESC, M_ZERO | M_WAITOK); 1192 nfileflags = (char *)&ntable[nnfiles]; 1193 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) 1194 MALLOC(nmap, NDSLOTTYPE *, NDSLOTS(nnfiles) * NDSLOTSIZE, 1195 M_FILEDESC, M_ZERO | M_WAITOK); 1196 else 1197 nmap = NULL; 1198 FILEDESC_LOCK(fdp); 1199 1200 /* 1201 * We now have new tables ready to go. Since we dropped the 1202 * filedesc lock to call malloc(), watch out for a race. 1203 */ 1204 onfiles = fdp->fd_nfiles; 1205 if (onfiles >= nnfiles) { 1206 /* we lost the race, but that's OK */ 1207 free(ntable, M_FILEDESC); 1208 if (nmap != NULL) 1209 free(nmap, M_FILEDESC); 1210 return; 1211 } 1212 bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable)); 1213 bcopy(fdp->fd_ofileflags, nfileflags, onfiles); 1214 if (onfiles > NDFILE) 1215 free(fdp->fd_ofiles, M_FILEDESC); 1216 fdp->fd_ofiles = ntable; 1217 fdp->fd_ofileflags = nfileflags; 1218 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) { 1219 bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap)); 1220 if (NDSLOTS(onfiles) > NDSLOTS(NDFILE)) 1221 free(fdp->fd_map, M_FILEDESC); 1222 fdp->fd_map = nmap; 1223 } 1224 fdp->fd_nfiles = nnfiles; 1225 } 1226 1227 /* 1228 * Allocate a file descriptor for the process. 1229 */ 1230 int 1231 fdalloc(struct thread *td, int minfd, int *result) 1232 { 1233 struct proc *p = td->td_proc; 1234 struct filedesc *fdp = p->p_fd; 1235 int fd = -1, maxfd; 1236 1237 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1238 1239 PROC_LOCK(p); 1240 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1241 PROC_UNLOCK(p); 1242 1243 /* 1244 * Search the bitmap for a free descriptor. If none is found, try 1245 * to grow the file table. Keep at it until we either get a file 1246 * descriptor or run into process or system limits; fdgrowtable() 1247 * may drop the filedesc lock, so we're in a race. 1248 */ 1249 for (;;) { 1250 fd = fd_first_free(fdp, minfd, fdp->fd_nfiles); 1251 if (fd >= maxfd) 1252 return (EMFILE); 1253 if (fd < fdp->fd_nfiles) 1254 break; 1255 fdgrowtable(fdp, min(fdp->fd_nfiles * 2, maxfd)); 1256 } 1257 1258 /* 1259 * Perform some sanity checks, then mark the file descriptor as 1260 * used and return it to the caller. 1261 */ 1262 KASSERT(!fdisused(fdp, fd), 1263 ("fd_first_free() returned non-free descriptor")); 1264 KASSERT(fdp->fd_ofiles[fd] == NULL, 1265 ("free descriptor isn't")); 1266 fdp->fd_ofileflags[fd] = 0; /* XXX needed? */ 1267 fdused(fdp, fd); 1268 fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles); 1269 *result = fd; 1270 return (0); 1271 } 1272 1273 /* 1274 * Check to see whether n user file descriptors 1275 * are available to the process p. 1276 */ 1277 int 1278 fdavail(struct thread *td, int n) 1279 { 1280 struct proc *p = td->td_proc; 1281 struct filedesc *fdp = td->td_proc->p_fd; 1282 struct file **fpp; 1283 int i, lim, last; 1284 1285 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1286 1287 PROC_LOCK(p); 1288 lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1289 PROC_UNLOCK(p); 1290 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) 1291 return (1); 1292 last = min(fdp->fd_nfiles, lim); 1293 fpp = &fdp->fd_ofiles[fdp->fd_freefile]; 1294 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) { 1295 if (*fpp == NULL && --n <= 0) 1296 return (1); 1297 } 1298 return (0); 1299 } 1300 1301 /* 1302 * Create a new open file structure and allocate 1303 * a file decriptor for the process that refers to it. 1304 * We add one reference to the file for the descriptor table 1305 * and one reference for resultfp. This is to prevent us being 1306 * prempted and the entry in the descriptor table closed after 1307 * we release the FILEDESC lock. 1308 */ 1309 int 1310 falloc(struct thread *td, struct file **resultfp, int *resultfd) 1311 { 1312 struct proc *p = td->td_proc; 1313 struct file *fp, *fq; 1314 int error, i; 1315 int maxuserfiles = maxfiles - (maxfiles / 20); 1316 static struct timeval lastfail; 1317 static int curfail; 1318 1319 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO); 1320 sx_xlock(&filelist_lock); 1321 if ((openfiles >= maxuserfiles && (td->td_ucred->cr_ruid != 0 || 1322 jailed(td->td_ucred))) || openfiles >= maxfiles) { 1323 if (ppsratecheck(&lastfail, &curfail, 1)) { 1324 printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n", 1325 td->td_ucred->cr_ruid); 1326 } 1327 sx_xunlock(&filelist_lock); 1328 uma_zfree(file_zone, fp); 1329 return (ENFILE); 1330 } 1331 openfiles++; 1332 1333 /* 1334 * If the process has file descriptor zero open, add the new file 1335 * descriptor to the list of open files at that point, otherwise 1336 * put it at the front of the list of open files. 1337 */ 1338 fp->f_mtxp = mtx_pool_alloc(mtxpool_sleep); 1339 fp->f_count = 1; 1340 if (resultfp) 1341 fp->f_count++; 1342 fp->f_cred = crhold(td->td_ucred); 1343 fp->f_ops = &badfileops; 1344 fp->f_data = NULL; 1345 fp->f_vnode = NULL; 1346 FILEDESC_LOCK(p->p_fd); 1347 if ((fq = p->p_fd->fd_ofiles[0])) { 1348 LIST_INSERT_AFTER(fq, fp, f_list); 1349 } else { 1350 LIST_INSERT_HEAD(&filehead, fp, f_list); 1351 } 1352 sx_xunlock(&filelist_lock); 1353 if ((error = fdalloc(td, 0, &i))) { 1354 FILEDESC_UNLOCK(p->p_fd); 1355 fdrop(fp, td); 1356 if (resultfp) 1357 fdrop(fp, td); 1358 return (error); 1359 } 1360 p->p_fd->fd_ofiles[i] = fp; 1361 FILEDESC_UNLOCK(p->p_fd); 1362 if (resultfp) 1363 *resultfp = fp; 1364 if (resultfd) 1365 *resultfd = i; 1366 return (0); 1367 } 1368 1369 /* 1370 * Build a new filedesc structure from another. 1371 * Copy the current, root, and jail root vnode references. 1372 */ 1373 struct filedesc * 1374 fdinit(struct filedesc *fdp) 1375 { 1376 struct filedesc0 *newfdp; 1377 1378 newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO); 1379 mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF); 1380 if (fdp != NULL) { 1381 FILEDESC_LOCK(fdp); 1382 newfdp->fd_fd.fd_cdir = fdp->fd_cdir; 1383 if (newfdp->fd_fd.fd_cdir) 1384 VREF(newfdp->fd_fd.fd_cdir); 1385 newfdp->fd_fd.fd_rdir = fdp->fd_rdir; 1386 if (newfdp->fd_fd.fd_rdir) 1387 VREF(newfdp->fd_fd.fd_rdir); 1388 newfdp->fd_fd.fd_jdir = fdp->fd_jdir; 1389 if (newfdp->fd_fd.fd_jdir) 1390 VREF(newfdp->fd_fd.fd_jdir); 1391 FILEDESC_UNLOCK(fdp); 1392 } 1393 1394 /* Create the file descriptor table. */ 1395 newfdp->fd_fd.fd_refcnt = 1; 1396 newfdp->fd_fd.fd_holdcnt = 1; 1397 newfdp->fd_fd.fd_cmask = CMASK; 1398 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1399 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 1400 newfdp->fd_fd.fd_nfiles = NDFILE; 1401 newfdp->fd_fd.fd_map = newfdp->fd_dmap; 1402 return (&newfdp->fd_fd); 1403 } 1404 1405 static struct filedesc * 1406 fdhold(struct proc *p) 1407 { 1408 struct filedesc *fdp; 1409 1410 mtx_lock(&fdesc_mtx); 1411 fdp = p->p_fd; 1412 if (fdp != NULL) 1413 fdp->fd_holdcnt++; 1414 mtx_unlock(&fdesc_mtx); 1415 return (fdp); 1416 } 1417 1418 static void 1419 fddrop(struct filedesc *fdp) 1420 { 1421 int i; 1422 1423 mtx_lock(&fdesc_mtx); 1424 i = --fdp->fd_holdcnt; 1425 mtx_unlock(&fdesc_mtx); 1426 if (i > 0) 1427 return; 1428 1429 mtx_destroy(&fdp->fd_mtx); 1430 FREE(fdp, M_FILEDESC); 1431 } 1432 1433 /* 1434 * Share a filedesc structure. 1435 */ 1436 struct filedesc * 1437 fdshare(struct filedesc *fdp) 1438 { 1439 FILEDESC_LOCK_FAST(fdp); 1440 fdp->fd_refcnt++; 1441 FILEDESC_UNLOCK_FAST(fdp); 1442 return (fdp); 1443 } 1444 1445 /* 1446 * Unshare a filedesc structure, if necessary by making a copy 1447 */ 1448 void 1449 fdunshare(struct proc *p, struct thread *td) 1450 { 1451 1452 FILEDESC_LOCK_FAST(p->p_fd); 1453 if (p->p_fd->fd_refcnt > 1) { 1454 struct filedesc *tmp; 1455 1456 FILEDESC_UNLOCK_FAST(p->p_fd); 1457 tmp = fdcopy(p->p_fd); 1458 fdfree(td); 1459 p->p_fd = tmp; 1460 } else 1461 FILEDESC_UNLOCK_FAST(p->p_fd); 1462 } 1463 1464 /* 1465 * Copy a filedesc structure. 1466 * A NULL pointer in returns a NULL reference, this is to ease callers, 1467 * not catch errors. 1468 */ 1469 struct filedesc * 1470 fdcopy(struct filedesc *fdp) 1471 { 1472 struct filedesc *newfdp; 1473 int i; 1474 1475 /* Certain daemons might not have file descriptors. */ 1476 if (fdp == NULL) 1477 return (NULL); 1478 1479 newfdp = fdinit(fdp); 1480 FILEDESC_LOCK_FAST(fdp); 1481 while (fdp->fd_lastfile >= newfdp->fd_nfiles) { 1482 FILEDESC_UNLOCK_FAST(fdp); 1483 FILEDESC_LOCK(newfdp); 1484 fdgrowtable(newfdp, fdp->fd_lastfile + 1); 1485 FILEDESC_UNLOCK(newfdp); 1486 FILEDESC_LOCK_FAST(fdp); 1487 } 1488 /* copy everything except kqueue descriptors */ 1489 newfdp->fd_freefile = -1; 1490 for (i = 0; i <= fdp->fd_lastfile; ++i) { 1491 if (fdisused(fdp, i) && 1492 fdp->fd_ofiles[i]->f_type != DTYPE_KQUEUE) { 1493 newfdp->fd_ofiles[i] = fdp->fd_ofiles[i]; 1494 newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i]; 1495 fhold(newfdp->fd_ofiles[i]); 1496 newfdp->fd_lastfile = i; 1497 } else { 1498 if (newfdp->fd_freefile == -1) 1499 newfdp->fd_freefile = i; 1500 } 1501 } 1502 FILEDESC_UNLOCK_FAST(fdp); 1503 FILEDESC_LOCK(newfdp); 1504 for (i = 0; i <= newfdp->fd_lastfile; ++i) 1505 if (newfdp->fd_ofiles[i] != NULL) 1506 fdused(newfdp, i); 1507 FILEDESC_UNLOCK(newfdp); 1508 FILEDESC_LOCK_FAST(fdp); 1509 if (newfdp->fd_freefile == -1) 1510 newfdp->fd_freefile = i; 1511 newfdp->fd_cmask = fdp->fd_cmask; 1512 FILEDESC_UNLOCK_FAST(fdp); 1513 return (newfdp); 1514 } 1515 1516 /* 1517 * Release a filedesc structure. 1518 */ 1519 void 1520 fdfree(struct thread *td) 1521 { 1522 struct filedesc *fdp; 1523 struct file **fpp; 1524 int i; 1525 struct filedesc_to_leader *fdtol; 1526 struct file *fp; 1527 struct vnode *vp; 1528 struct flock lf; 1529 1530 /* Certain daemons might not have file descriptors. */ 1531 fdp = td->td_proc->p_fd; 1532 if (fdp == NULL) 1533 return; 1534 1535 /* Check for special need to clear POSIX style locks */ 1536 fdtol = td->td_proc->p_fdtol; 1537 if (fdtol != NULL) { 1538 FILEDESC_LOCK(fdp); 1539 KASSERT(fdtol->fdl_refcount > 0, 1540 ("filedesc_to_refcount botch: fdl_refcount=%d", 1541 fdtol->fdl_refcount)); 1542 if (fdtol->fdl_refcount == 1 && 1543 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1544 i = 0; 1545 fpp = fdp->fd_ofiles; 1546 for (i = 0, fpp = fdp->fd_ofiles; 1547 i <= fdp->fd_lastfile; 1548 i++, fpp++) { 1549 if (*fpp == NULL || 1550 (*fpp)->f_type != DTYPE_VNODE) 1551 continue; 1552 fp = *fpp; 1553 fhold(fp); 1554 FILEDESC_UNLOCK(fdp); 1555 lf.l_whence = SEEK_SET; 1556 lf.l_start = 0; 1557 lf.l_len = 0; 1558 lf.l_type = F_UNLCK; 1559 vp = fp->f_vnode; 1560 VFS_ASSERT_GIANT(vp->v_mount); 1561 (void) VOP_ADVLOCK(vp, 1562 (caddr_t)td->td_proc-> 1563 p_leader, 1564 F_UNLCK, 1565 &lf, 1566 F_POSIX); 1567 FILEDESC_LOCK(fdp); 1568 fdrop(fp, td); 1569 fpp = fdp->fd_ofiles + i; 1570 } 1571 } 1572 retry: 1573 if (fdtol->fdl_refcount == 1) { 1574 if (fdp->fd_holdleaderscount > 0 && 1575 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1576 /* 1577 * close() or do_dup() has cleared a reference 1578 * in a shared file descriptor table. 1579 */ 1580 fdp->fd_holdleaderswakeup = 1; 1581 msleep(&fdp->fd_holdleaderscount, &fdp->fd_mtx, 1582 PLOCK, "fdlhold", 0); 1583 goto retry; 1584 } 1585 if (fdtol->fdl_holdcount > 0) { 1586 /* 1587 * Ensure that fdtol->fdl_leader 1588 * remains valid in closef(). 1589 */ 1590 fdtol->fdl_wakeup = 1; 1591 msleep(fdtol, &fdp->fd_mtx, 1592 PLOCK, "fdlhold", 0); 1593 goto retry; 1594 } 1595 } 1596 fdtol->fdl_refcount--; 1597 if (fdtol->fdl_refcount == 0 && 1598 fdtol->fdl_holdcount == 0) { 1599 fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; 1600 fdtol->fdl_prev->fdl_next = fdtol->fdl_next; 1601 } else 1602 fdtol = NULL; 1603 td->td_proc->p_fdtol = NULL; 1604 FILEDESC_UNLOCK(fdp); 1605 if (fdtol != NULL) 1606 FREE(fdtol, M_FILEDESC_TO_LEADER); 1607 } 1608 FILEDESC_LOCK_FAST(fdp); 1609 i = --fdp->fd_refcnt; 1610 FILEDESC_UNLOCK_FAST(fdp); 1611 if (i > 0) 1612 return; 1613 /* 1614 * We are the last reference to the structure, so we can 1615 * safely assume it will not change out from under us. 1616 */ 1617 fpp = fdp->fd_ofiles; 1618 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) { 1619 if (*fpp) 1620 (void) closef(*fpp, td); 1621 } 1622 FILEDESC_LOCK(fdp); 1623 1624 /* XXX This should happen earlier. */ 1625 mtx_lock(&fdesc_mtx); 1626 td->td_proc->p_fd = NULL; 1627 mtx_unlock(&fdesc_mtx); 1628 1629 if (fdp->fd_nfiles > NDFILE) 1630 FREE(fdp->fd_ofiles, M_FILEDESC); 1631 if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE)) 1632 FREE(fdp->fd_map, M_FILEDESC); 1633 1634 fdp->fd_nfiles = 0; 1635 1636 if (fdp->fd_cdir) 1637 vrele(fdp->fd_cdir); 1638 fdp->fd_cdir = NULL; 1639 if (fdp->fd_rdir) 1640 vrele(fdp->fd_rdir); 1641 fdp->fd_rdir = NULL; 1642 if (fdp->fd_jdir) 1643 vrele(fdp->fd_jdir); 1644 fdp->fd_jdir = NULL; 1645 1646 FILEDESC_UNLOCK(fdp); 1647 1648 fddrop(fdp); 1649 } 1650 1651 /* 1652 * For setugid programs, we don't want to people to use that setugidness 1653 * to generate error messages which write to a file which otherwise would 1654 * otherwise be off-limits to the process. We check for filesystems where 1655 * the vnode can change out from under us after execve (like [lin]procfs). 1656 * 1657 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is 1658 * sufficient. We also don't for check setugidness since we know we are. 1659 */ 1660 static int 1661 is_unsafe(struct file *fp) 1662 { 1663 if (fp->f_type == DTYPE_VNODE) { 1664 struct vnode *vp = fp->f_vnode; 1665 1666 if ((vp->v_vflag & VV_PROCDEP) != 0) 1667 return (1); 1668 } 1669 return (0); 1670 } 1671 1672 /* 1673 * Make this setguid thing safe, if at all possible. 1674 */ 1675 void 1676 setugidsafety(struct thread *td) 1677 { 1678 struct filedesc *fdp; 1679 int i; 1680 1681 /* Certain daemons might not have file descriptors. */ 1682 fdp = td->td_proc->p_fd; 1683 if (fdp == NULL) 1684 return; 1685 1686 /* 1687 * Note: fdp->fd_ofiles may be reallocated out from under us while 1688 * we are blocked in a close. Be careful! 1689 */ 1690 FILEDESC_LOCK(fdp); 1691 for (i = 0; i <= fdp->fd_lastfile; i++) { 1692 if (i > 2) 1693 break; 1694 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { 1695 struct file *fp; 1696 1697 knote_fdclose(td, i); 1698 /* 1699 * NULL-out descriptor prior to close to avoid 1700 * a race while close blocks. 1701 */ 1702 fp = fdp->fd_ofiles[i]; 1703 fdp->fd_ofiles[i] = NULL; 1704 fdp->fd_ofileflags[i] = 0; 1705 fdunused(fdp, i); 1706 FILEDESC_UNLOCK(fdp); 1707 (void) closef(fp, td); 1708 FILEDESC_LOCK(fdp); 1709 } 1710 } 1711 FILEDESC_UNLOCK(fdp); 1712 } 1713 1714 void 1715 fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td) 1716 { 1717 1718 FILEDESC_LOCK(fdp); 1719 if (fdp->fd_ofiles[idx] == fp) { 1720 fdp->fd_ofiles[idx] = NULL; 1721 fdunused(fdp, idx); 1722 FILEDESC_UNLOCK(fdp); 1723 fdrop(fp, td); 1724 } else { 1725 FILEDESC_UNLOCK(fdp); 1726 } 1727 } 1728 1729 /* 1730 * Close any files on exec? 1731 */ 1732 void 1733 fdcloseexec(struct thread *td) 1734 { 1735 struct filedesc *fdp; 1736 int i; 1737 1738 /* Certain daemons might not have file descriptors. */ 1739 fdp = td->td_proc->p_fd; 1740 if (fdp == NULL) 1741 return; 1742 1743 FILEDESC_LOCK(fdp); 1744 1745 /* 1746 * We cannot cache fd_ofiles or fd_ofileflags since operations 1747 * may block and rip them out from under us. 1748 */ 1749 for (i = 0; i <= fdp->fd_lastfile; i++) { 1750 if (fdp->fd_ofiles[i] != NULL && 1751 (fdp->fd_ofileflags[i] & UF_EXCLOSE)) { 1752 struct file *fp; 1753 1754 knote_fdclose(td, i); 1755 /* 1756 * NULL-out descriptor prior to close to avoid 1757 * a race while close blocks. 1758 */ 1759 fp = fdp->fd_ofiles[i]; 1760 fdp->fd_ofiles[i] = NULL; 1761 fdp->fd_ofileflags[i] = 0; 1762 fdunused(fdp, i); 1763 FILEDESC_UNLOCK(fdp); 1764 (void) closef(fp, td); 1765 FILEDESC_LOCK(fdp); 1766 } 1767 } 1768 FILEDESC_UNLOCK(fdp); 1769 } 1770 1771 /* 1772 * It is unsafe for set[ug]id processes to be started with file 1773 * descriptors 0..2 closed, as these descriptors are given implicit 1774 * significance in the Standard C library. fdcheckstd() will create a 1775 * descriptor referencing /dev/null for each of stdin, stdout, and 1776 * stderr that is not already open. 1777 */ 1778 int 1779 fdcheckstd(struct thread *td) 1780 { 1781 struct nameidata nd; 1782 struct filedesc *fdp; 1783 struct file *fp; 1784 register_t retval; 1785 int fd, i, error, flags, devnull; 1786 1787 GIANT_REQUIRED; /* VFS */ 1788 1789 fdp = td->td_proc->p_fd; 1790 if (fdp == NULL) 1791 return (0); 1792 KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); 1793 devnull = -1; 1794 error = 0; 1795 for (i = 0; i < 3; i++) { 1796 if (fdp->fd_ofiles[i] != NULL) 1797 continue; 1798 if (devnull < 0) { 1799 error = falloc(td, &fp, &fd); 1800 if (error != 0) 1801 break; 1802 /* Note extra ref on `fp' held for us by falloc(). */ 1803 KASSERT(fd == i, ("oof, we didn't get our fd")); 1804 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null", 1805 td); 1806 flags = FREAD | FWRITE; 1807 error = vn_open(&nd, &flags, 0, -1); 1808 if (error != 0) { 1809 /* 1810 * Someone may have closed the entry in the 1811 * file descriptor table, so check it hasn't 1812 * changed before dropping the reference count. 1813 */ 1814 FILEDESC_LOCK(fdp); 1815 KASSERT(fdp->fd_ofiles[fd] == fp, 1816 ("table not shared, how did it change?")); 1817 fdp->fd_ofiles[fd] = NULL; 1818 fdunused(fdp, fd); 1819 FILEDESC_UNLOCK(fdp); 1820 fdrop(fp, td); 1821 fdrop(fp, td); 1822 break; 1823 } 1824 NDFREE(&nd, NDF_ONLY_PNBUF); 1825 fp->f_flag = flags; 1826 fp->f_vnode = nd.ni_vp; 1827 if (fp->f_data == NULL) 1828 fp->f_data = nd.ni_vp; 1829 if (fp->f_ops == &badfileops) 1830 fp->f_ops = &vnops; 1831 fp->f_type = DTYPE_VNODE; 1832 VOP_UNLOCK(nd.ni_vp, 0, td); 1833 devnull = fd; 1834 fdrop(fp, td); 1835 } else { 1836 error = do_dup(td, DUP_FIXED, devnull, i, &retval); 1837 if (error != 0) 1838 break; 1839 } 1840 } 1841 return (error); 1842 } 1843 1844 /* 1845 * Internal form of close. 1846 * Decrement reference count on file structure. 1847 * Note: td may be NULL when closing a file that was being passed in a 1848 * message. 1849 * 1850 * XXXRW: Giant is not required for the caller, but often will be held; this 1851 * makes it moderately likely the Giant will be recursed in the VFS case. 1852 */ 1853 int 1854 closef(struct file *fp, struct thread *td) 1855 { 1856 struct vnode *vp; 1857 struct flock lf; 1858 struct filedesc_to_leader *fdtol; 1859 struct filedesc *fdp; 1860 1861 /* 1862 * POSIX record locking dictates that any close releases ALL 1863 * locks owned by this process. This is handled by setting 1864 * a flag in the unlock to free ONLY locks obeying POSIX 1865 * semantics, and not to free BSD-style file locks. 1866 * If the descriptor was in a message, POSIX-style locks 1867 * aren't passed with the descriptor. 1868 */ 1869 if (fp->f_type == DTYPE_VNODE) { 1870 int vfslocked; 1871 1872 vp = fp->f_vnode; 1873 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1874 if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1875 lf.l_whence = SEEK_SET; 1876 lf.l_start = 0; 1877 lf.l_len = 0; 1878 lf.l_type = F_UNLCK; 1879 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, 1880 F_UNLCK, &lf, F_POSIX); 1881 } 1882 fdtol = td->td_proc->p_fdtol; 1883 if (fdtol != NULL) { 1884 /* 1885 * Handle special case where file descriptor table 1886 * is shared between multiple process leaders. 1887 */ 1888 fdp = td->td_proc->p_fd; 1889 FILEDESC_LOCK(fdp); 1890 for (fdtol = fdtol->fdl_next; 1891 fdtol != td->td_proc->p_fdtol; 1892 fdtol = fdtol->fdl_next) { 1893 if ((fdtol->fdl_leader->p_flag & 1894 P_ADVLOCK) == 0) 1895 continue; 1896 fdtol->fdl_holdcount++; 1897 FILEDESC_UNLOCK(fdp); 1898 lf.l_whence = SEEK_SET; 1899 lf.l_start = 0; 1900 lf.l_len = 0; 1901 lf.l_type = F_UNLCK; 1902 vp = fp->f_vnode; 1903 (void) VOP_ADVLOCK(vp, 1904 (caddr_t)fdtol->fdl_leader, 1905 F_UNLCK, &lf, F_POSIX); 1906 FILEDESC_LOCK(fdp); 1907 fdtol->fdl_holdcount--; 1908 if (fdtol->fdl_holdcount == 0 && 1909 fdtol->fdl_wakeup != 0) { 1910 fdtol->fdl_wakeup = 0; 1911 wakeup(fdtol); 1912 } 1913 } 1914 FILEDESC_UNLOCK(fdp); 1915 } 1916 VFS_UNLOCK_GIANT(vfslocked); 1917 } 1918 return (fdrop(fp, td)); 1919 } 1920 1921 /* 1922 * Extract the file pointer associated with the specified descriptor for 1923 * the current user process. 1924 * 1925 * If the descriptor doesn't exist, EBADF is returned. 1926 * 1927 * If the descriptor exists but doesn't match 'flags' then 1928 * return EBADF for read attempts and EINVAL for write attempts. 1929 * 1930 * If 'hold' is set (non-zero) the file's refcount will be bumped on return. 1931 * It should be droped with fdrop(). 1932 * If it is not set, then the refcount will not be bumped however the 1933 * thread's filedesc struct will be returned locked (for fgetsock). 1934 * 1935 * If an error occured the non-zero error is returned and *fpp is set to NULL. 1936 * Otherwise *fpp is set and zero is returned. 1937 */ 1938 static __inline int 1939 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold) 1940 { 1941 struct filedesc *fdp; 1942 struct file *fp; 1943 1944 *fpp = NULL; 1945 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) 1946 return (EBADF); 1947 FILEDESC_LOCK(fdp); 1948 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) { 1949 FILEDESC_UNLOCK(fdp); 1950 return (EBADF); 1951 } 1952 1953 /* 1954 * Note: FREAD failures returns EBADF to maintain backwards 1955 * compatibility with what routines returned before. 1956 * 1957 * Only one flag, or 0, may be specified. 1958 */ 1959 if (flags == FREAD && (fp->f_flag & FREAD) == 0) { 1960 FILEDESC_UNLOCK(fdp); 1961 return (EBADF); 1962 } 1963 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) { 1964 FILEDESC_UNLOCK(fdp); 1965 return (EINVAL); 1966 } 1967 if (hold) { 1968 fhold(fp); 1969 FILEDESC_UNLOCK(fdp); 1970 } 1971 *fpp = fp; 1972 return (0); 1973 } 1974 1975 int 1976 fget(struct thread *td, int fd, struct file **fpp) 1977 { 1978 1979 return(_fget(td, fd, fpp, 0, 1)); 1980 } 1981 1982 int 1983 fget_read(struct thread *td, int fd, struct file **fpp) 1984 { 1985 1986 return(_fget(td, fd, fpp, FREAD, 1)); 1987 } 1988 1989 int 1990 fget_write(struct thread *td, int fd, struct file **fpp) 1991 { 1992 1993 return(_fget(td, fd, fpp, FWRITE, 1)); 1994 } 1995 1996 /* 1997 * Like fget() but loads the underlying vnode, or returns an error if 1998 * the descriptor does not represent a vnode. Note that pipes use vnodes 1999 * but never have VM objects. The returned vnode will be vref()d. 2000 * 2001 * XXX: what about the unused flags ? 2002 */ 2003 static __inline int 2004 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags) 2005 { 2006 struct file *fp; 2007 int error; 2008 2009 *vpp = NULL; 2010 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2011 return (error); 2012 if (fp->f_vnode == NULL) { 2013 error = EINVAL; 2014 } else { 2015 *vpp = fp->f_vnode; 2016 vref(*vpp); 2017 } 2018 FILEDESC_UNLOCK(td->td_proc->p_fd); 2019 return (error); 2020 } 2021 2022 int 2023 fgetvp(struct thread *td, int fd, struct vnode **vpp) 2024 { 2025 2026 return (_fgetvp(td, fd, vpp, 0)); 2027 } 2028 2029 int 2030 fgetvp_read(struct thread *td, int fd, struct vnode **vpp) 2031 { 2032 2033 return (_fgetvp(td, fd, vpp, FREAD)); 2034 } 2035 2036 #ifdef notyet 2037 int 2038 fgetvp_write(struct thread *td, int fd, struct vnode **vpp) 2039 { 2040 2041 return (_fgetvp(td, fd, vpp, FWRITE)); 2042 } 2043 #endif 2044 2045 /* 2046 * Like fget() but loads the underlying socket, or returns an error if 2047 * the descriptor does not represent a socket. 2048 * 2049 * We bump the ref count on the returned socket. XXX Also obtain the SX 2050 * lock in the future. 2051 */ 2052 int 2053 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp) 2054 { 2055 struct file *fp; 2056 int error; 2057 2058 NET_ASSERT_GIANT(); 2059 2060 *spp = NULL; 2061 if (fflagp != NULL) 2062 *fflagp = 0; 2063 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2064 return (error); 2065 if (fp->f_type != DTYPE_SOCKET) { 2066 error = ENOTSOCK; 2067 } else { 2068 *spp = fp->f_data; 2069 if (fflagp) 2070 *fflagp = fp->f_flag; 2071 SOCK_LOCK(*spp); 2072 soref(*spp); 2073 SOCK_UNLOCK(*spp); 2074 } 2075 FILEDESC_UNLOCK(td->td_proc->p_fd); 2076 return (error); 2077 } 2078 2079 /* 2080 * Drop the reference count on the the socket and XXX release the SX lock in 2081 * the future. The last reference closes the socket. 2082 */ 2083 void 2084 fputsock(struct socket *so) 2085 { 2086 2087 NET_ASSERT_GIANT(); 2088 ACCEPT_LOCK(); 2089 SOCK_LOCK(so); 2090 sorele(so); 2091 } 2092 2093 int 2094 fdrop(struct file *fp, struct thread *td) 2095 { 2096 2097 FILE_LOCK(fp); 2098 return (fdrop_locked(fp, td)); 2099 } 2100 2101 /* 2102 * Drop reference on struct file passed in, may call closef if the 2103 * reference hits zero. 2104 * Expects struct file locked, and will unlock it. 2105 */ 2106 int 2107 fdrop_locked(struct file *fp, struct thread *td) 2108 { 2109 int error; 2110 2111 FILE_LOCK_ASSERT(fp, MA_OWNED); 2112 2113 if (--fp->f_count > 0) { 2114 FILE_UNLOCK(fp); 2115 return (0); 2116 } 2117 /* We have the last ref so we can proceed without the file lock. */ 2118 FILE_UNLOCK(fp); 2119 if (fp->f_count < 0) 2120 panic("fdrop: count < 0"); 2121 if (fp->f_ops != &badfileops) 2122 error = fo_close(fp, td); 2123 else 2124 error = 0; 2125 2126 sx_xlock(&filelist_lock); 2127 LIST_REMOVE(fp, f_list); 2128 openfiles--; 2129 sx_xunlock(&filelist_lock); 2130 crfree(fp->f_cred); 2131 uma_zfree(file_zone, fp); 2132 2133 return (error); 2134 } 2135 2136 /* 2137 * Apply an advisory lock on a file descriptor. 2138 * 2139 * Just attempt to get a record lock of the requested type on 2140 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 2141 */ 2142 #ifndef _SYS_SYSPROTO_H_ 2143 struct flock_args { 2144 int fd; 2145 int how; 2146 }; 2147 #endif 2148 /* 2149 * MPSAFE 2150 */ 2151 /* ARGSUSED */ 2152 int 2153 flock(struct thread *td, struct flock_args *uap) 2154 { 2155 struct file *fp; 2156 struct vnode *vp; 2157 struct flock lf; 2158 int error; 2159 2160 if ((error = fget(td, uap->fd, &fp)) != 0) 2161 return (error); 2162 if (fp->f_type != DTYPE_VNODE) { 2163 fdrop(fp, td); 2164 return (EOPNOTSUPP); 2165 } 2166 2167 mtx_lock(&Giant); 2168 vp = fp->f_vnode; 2169 lf.l_whence = SEEK_SET; 2170 lf.l_start = 0; 2171 lf.l_len = 0; 2172 if (uap->how & LOCK_UN) { 2173 lf.l_type = F_UNLCK; 2174 FILE_LOCK(fp); 2175 fp->f_flag &= ~FHASLOCK; 2176 FILE_UNLOCK(fp); 2177 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 2178 goto done2; 2179 } 2180 if (uap->how & LOCK_EX) 2181 lf.l_type = F_WRLCK; 2182 else if (uap->how & LOCK_SH) 2183 lf.l_type = F_RDLCK; 2184 else { 2185 error = EBADF; 2186 goto done2; 2187 } 2188 FILE_LOCK(fp); 2189 fp->f_flag |= FHASLOCK; 2190 FILE_UNLOCK(fp); 2191 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 2192 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); 2193 done2: 2194 fdrop(fp, td); 2195 mtx_unlock(&Giant); 2196 return (error); 2197 } 2198 /* 2199 * Duplicate the specified descriptor to a free descriptor. 2200 */ 2201 int 2202 dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error) 2203 { 2204 struct file *wfp; 2205 struct file *fp; 2206 2207 /* 2208 * If the to-be-dup'd fd number is greater than the allowed number 2209 * of file descriptors, or the fd to be dup'd has already been 2210 * closed, then reject. 2211 */ 2212 FILEDESC_LOCK(fdp); 2213 if (dfd < 0 || dfd >= fdp->fd_nfiles || 2214 (wfp = fdp->fd_ofiles[dfd]) == NULL) { 2215 FILEDESC_UNLOCK(fdp); 2216 return (EBADF); 2217 } 2218 2219 /* 2220 * There are two cases of interest here. 2221 * 2222 * For ENODEV simply dup (dfd) to file descriptor 2223 * (indx) and return. 2224 * 2225 * For ENXIO steal away the file structure from (dfd) and 2226 * store it in (indx). (dfd) is effectively closed by 2227 * this operation. 2228 * 2229 * Any other error code is just returned. 2230 */ 2231 switch (error) { 2232 case ENODEV: 2233 /* 2234 * Check that the mode the file is being opened for is a 2235 * subset of the mode of the existing descriptor. 2236 */ 2237 FILE_LOCK(wfp); 2238 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { 2239 FILE_UNLOCK(wfp); 2240 FILEDESC_UNLOCK(fdp); 2241 return (EACCES); 2242 } 2243 fp = fdp->fd_ofiles[indx]; 2244 fdp->fd_ofiles[indx] = wfp; 2245 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2246 if (fp == NULL) 2247 fdused(fdp, indx); 2248 fhold_locked(wfp); 2249 FILE_UNLOCK(wfp); 2250 FILEDESC_UNLOCK(fdp); 2251 if (fp != NULL) { 2252 /* 2253 * We now own the reference to fp that the ofiles[] 2254 * array used to own. Release it. 2255 */ 2256 FILE_LOCK(fp); 2257 fdrop_locked(fp, td); 2258 } 2259 return (0); 2260 2261 case ENXIO: 2262 /* 2263 * Steal away the file pointer from dfd and stuff it into indx. 2264 */ 2265 fp = fdp->fd_ofiles[indx]; 2266 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 2267 fdp->fd_ofiles[dfd] = NULL; 2268 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2269 fdp->fd_ofileflags[dfd] = 0; 2270 fdunused(fdp, dfd); 2271 if (fp == NULL) 2272 fdused(fdp, indx); 2273 if (fp != NULL) 2274 FILE_LOCK(fp); 2275 FILEDESC_UNLOCK(fdp); 2276 2277 /* 2278 * we now own the reference to fp that the ofiles[] array 2279 * used to own. Release it. 2280 */ 2281 if (fp != NULL) 2282 fdrop_locked(fp, td); 2283 return (0); 2284 2285 default: 2286 FILEDESC_UNLOCK(fdp); 2287 return (error); 2288 } 2289 /* NOTREACHED */ 2290 } 2291 2292 /* 2293 * Scan all active processes to see if any of them have a current 2294 * or root directory of `olddp'. If so, replace them with the new 2295 * mount point. 2296 */ 2297 void 2298 mountcheckdirs(struct vnode *olddp, struct vnode *newdp) 2299 { 2300 struct filedesc *fdp; 2301 struct proc *p; 2302 int nrele; 2303 2304 if (vrefcnt(olddp) == 1) 2305 return; 2306 sx_slock(&allproc_lock); 2307 LIST_FOREACH(p, &allproc, p_list) { 2308 fdp = fdhold(p); 2309 if (fdp == NULL) 2310 continue; 2311 nrele = 0; 2312 FILEDESC_LOCK_FAST(fdp); 2313 if (fdp->fd_cdir == olddp) { 2314 vref(newdp); 2315 fdp->fd_cdir = newdp; 2316 nrele++; 2317 } 2318 if (fdp->fd_rdir == olddp) { 2319 vref(newdp); 2320 fdp->fd_rdir = newdp; 2321 nrele++; 2322 } 2323 FILEDESC_UNLOCK_FAST(fdp); 2324 fddrop(fdp); 2325 while (nrele--) 2326 vrele(olddp); 2327 } 2328 sx_sunlock(&allproc_lock); 2329 if (rootvnode == olddp) { 2330 vrele(rootvnode); 2331 vref(newdp); 2332 rootvnode = newdp; 2333 } 2334 } 2335 2336 struct filedesc_to_leader * 2337 filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader) 2338 { 2339 struct filedesc_to_leader *fdtol; 2340 2341 MALLOC(fdtol, struct filedesc_to_leader *, 2342 sizeof(struct filedesc_to_leader), 2343 M_FILEDESC_TO_LEADER, 2344 M_WAITOK); 2345 fdtol->fdl_refcount = 1; 2346 fdtol->fdl_holdcount = 0; 2347 fdtol->fdl_wakeup = 0; 2348 fdtol->fdl_leader = leader; 2349 if (old != NULL) { 2350 FILEDESC_LOCK(fdp); 2351 fdtol->fdl_next = old->fdl_next; 2352 fdtol->fdl_prev = old; 2353 old->fdl_next = fdtol; 2354 fdtol->fdl_next->fdl_prev = fdtol; 2355 FILEDESC_UNLOCK(fdp); 2356 } else { 2357 fdtol->fdl_next = fdtol; 2358 fdtol->fdl_prev = fdtol; 2359 } 2360 return (fdtol); 2361 } 2362 2363 /* 2364 * Get file structures. 2365 */ 2366 static int 2367 sysctl_kern_file(SYSCTL_HANDLER_ARGS) 2368 { 2369 struct xfile xf; 2370 struct filedesc *fdp; 2371 struct file *fp; 2372 struct proc *p; 2373 int error, n; 2374 2375 /* 2376 * Note: because the number of file descriptors is calculated 2377 * in different ways for sizing vs returning the data, 2378 * there is information leakage from the first loop. However, 2379 * it is of a similar order of magnitude to the leakage from 2380 * global system statistics such as kern.openfiles. 2381 */ 2382 error = sysctl_wire_old_buffer(req, 0); 2383 if (error != 0) 2384 return (error); 2385 if (req->oldptr == NULL) { 2386 n = 16; /* A slight overestimate. */ 2387 sx_slock(&filelist_lock); 2388 LIST_FOREACH(fp, &filehead, f_list) { 2389 /* 2390 * We should grab the lock, but this is an 2391 * estimate, so does it really matter? 2392 */ 2393 /* mtx_lock(fp->f_mtxp); */ 2394 n += fp->f_count; 2395 /* mtx_unlock(f->f_mtxp); */ 2396 } 2397 sx_sunlock(&filelist_lock); 2398 return (SYSCTL_OUT(req, 0, n * sizeof(xf))); 2399 } 2400 error = 0; 2401 bzero(&xf, sizeof(xf)); 2402 xf.xf_size = sizeof(xf); 2403 sx_slock(&allproc_lock); 2404 LIST_FOREACH(p, &allproc, p_list) { 2405 if (p->p_state == PRS_NEW) 2406 continue; 2407 PROC_LOCK(p); 2408 if (p_cansee(req->td, p) != 0) { 2409 PROC_UNLOCK(p); 2410 continue; 2411 } 2412 xf.xf_pid = p->p_pid; 2413 xf.xf_uid = p->p_ucred->cr_uid; 2414 PROC_UNLOCK(p); 2415 fdp = fdhold(p); 2416 if (fdp == NULL) 2417 continue; 2418 FILEDESC_LOCK_FAST(fdp); 2419 for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) { 2420 if ((fp = fdp->fd_ofiles[n]) == NULL) 2421 continue; 2422 xf.xf_fd = n; 2423 xf.xf_file = fp; 2424 xf.xf_data = fp->f_data; 2425 xf.xf_vnode = fp->f_vnode; 2426 xf.xf_type = fp->f_type; 2427 xf.xf_count = fp->f_count; 2428 xf.xf_msgcount = fp->f_msgcount; 2429 xf.xf_offset = fp->f_offset; 2430 xf.xf_flag = fp->f_flag; 2431 error = SYSCTL_OUT(req, &xf, sizeof(xf)); 2432 if (error) 2433 break; 2434 } 2435 FILEDESC_UNLOCK_FAST(fdp); 2436 fddrop(fdp); 2437 if (error) 2438 break; 2439 } 2440 sx_sunlock(&allproc_lock); 2441 return (error); 2442 } 2443 2444 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, 2445 0, 0, sysctl_kern_file, "S,xfile", "Entire file table"); 2446 2447 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, 2448 &maxfilesperproc, 0, "Maximum files allowed open per process"); 2449 2450 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, 2451 &maxfiles, 0, "Maximum number of files"); 2452 2453 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, 2454 &openfiles, 0, "System-wide number of open files"); 2455 2456 /* ARGSUSED*/ 2457 static void 2458 filelistinit(void *dummy) 2459 { 2460 2461 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, 2462 NULL, NULL, UMA_ALIGN_PTR, 0); 2463 sx_init(&filelist_lock, "filelist lock"); 2464 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); 2465 mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF); 2466 } 2467 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL) 2468 2469 /*-------------------------------------------------------------------*/ 2470 2471 static int 2472 badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) 2473 { 2474 2475 return (EBADF); 2476 } 2477 2478 static int 2479 badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td) 2480 { 2481 2482 return (EBADF); 2483 } 2484 2485 static int 2486 badfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) 2487 { 2488 2489 return (0); 2490 } 2491 2492 static int 2493 badfo_kqfilter(struct file *fp, struct knote *kn) 2494 { 2495 2496 return (0); 2497 } 2498 2499 static int 2500 badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td) 2501 { 2502 2503 return (EBADF); 2504 } 2505 2506 static int 2507 badfo_close(struct file *fp, struct thread *td) 2508 { 2509 2510 return (EBADF); 2511 } 2512 2513 struct fileops badfileops = { 2514 .fo_read = badfo_readwrite, 2515 .fo_write = badfo_readwrite, 2516 .fo_ioctl = badfo_ioctl, 2517 .fo_poll = badfo_poll, 2518 .fo_kqfilter = badfo_kqfilter, 2519 .fo_stat = badfo_stat, 2520 .fo_close = badfo_close, 2521 }; 2522 2523 2524 /*-------------------------------------------------------------------*/ 2525 2526 /* 2527 * File Descriptor pseudo-device driver (/dev/fd/). 2528 * 2529 * Opening minor device N dup()s the file (if any) connected to file 2530 * descriptor N belonging to the calling process. Note that this driver 2531 * consists of only the ``open()'' routine, because all subsequent 2532 * references to this file will be direct to the other driver. 2533 * 2534 * XXX: we could give this one a cloning event handler if necessary. 2535 */ 2536 2537 /* ARGSUSED */ 2538 static int 2539 fdopen(struct cdev *dev, int mode, int type, struct thread *td) 2540 { 2541 2542 /* 2543 * XXX Kludge: set curthread->td_dupfd to contain the value of the 2544 * the file descriptor being sought for duplication. The error 2545 * return ensures that the vnode for this device will be released 2546 * by vn_open. Open will detect this special error and take the 2547 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 2548 * will simply report the error. 2549 */ 2550 td->td_dupfd = dev2unit(dev); 2551 return (ENODEV); 2552 } 2553 2554 static struct cdevsw fildesc_cdevsw = { 2555 .d_version = D_VERSION, 2556 .d_flags = D_NEEDGIANT, 2557 .d_open = fdopen, 2558 .d_name = "FD", 2559 }; 2560 2561 static void 2562 fildesc_drvinit(void *unused) 2563 { 2564 struct cdev *dev; 2565 2566 dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0"); 2567 make_dev_alias(dev, "stdin"); 2568 dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1"); 2569 make_dev_alias(dev, "stdout"); 2570 dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2"); 2571 make_dev_alias(dev, "stderr"); 2572 } 2573 2574 SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL) 2575