1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_compat.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 45 #include <sys/conf.h> 46 #include <sys/fcntl.h> 47 #include <sys/file.h> 48 #include <sys/filedesc.h> 49 #include <sys/filio.h> 50 #include <sys/jail.h> 51 #include <sys/kernel.h> 52 #include <sys/limits.h> 53 #include <sys/lock.h> 54 #include <sys/malloc.h> 55 #include <sys/mount.h> 56 #include <sys/mutex.h> 57 #include <sys/namei.h> 58 #include <sys/proc.h> 59 #include <sys/resourcevar.h> 60 #include <sys/signalvar.h> 61 #include <sys/socketvar.h> 62 #include <sys/stat.h> 63 #include <sys/sx.h> 64 #include <sys/syscallsubr.h> 65 #include <sys/sysctl.h> 66 #include <sys/sysproto.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 70 #include <vm/uma.h> 71 72 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table"); 73 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "file desc to leader", 74 "file desc to leader structures"); 75 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); 76 77 static uma_zone_t file_zone; 78 79 80 /* How to treat 'new' parameter when allocating a fd for do_dup(). */ 81 enum dup_type { DUP_VARIABLE, DUP_FIXED }; 82 83 static int do_dup(struct thread *td, enum dup_type type, int old, int new, 84 register_t *retval); 85 static int fd_first_free(struct filedesc *, int, int); 86 static int fd_last_used(struct filedesc *, int, int); 87 static void fdgrowtable(struct filedesc *, int); 88 static int fdrop_locked(struct file *fp, struct thread *td); 89 static void fdunused(struct filedesc *fdp, int fd); 90 static void fdused(struct filedesc *fdp, int fd); 91 92 /* 93 * A process is initially started out with NDFILE descriptors stored within 94 * this structure, selected to be enough for typical applications based on 95 * the historical limit of 20 open files (and the usage of descriptors by 96 * shells). If these descriptors are exhausted, a larger descriptor table 97 * may be allocated, up to a process' resource limit; the internal arrays 98 * are then unused. 99 */ 100 #define NDFILE 20 101 #define NDSLOTSIZE sizeof(NDSLOTTYPE) 102 #define NDENTRIES (NDSLOTSIZE * __CHAR_BIT) 103 #define NDSLOT(x) ((x) / NDENTRIES) 104 #define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES)) 105 #define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES) 106 107 /* 108 * Storage required per open file descriptor. 109 */ 110 #define OFILESIZE (sizeof(struct file *) + sizeof(char)) 111 112 /* 113 * Basic allocation of descriptors: 114 * one of the above, plus arrays for NDFILE descriptors. 115 */ 116 struct filedesc0 { 117 struct filedesc fd_fd; 118 /* 119 * These arrays are used when the number of open files is 120 * <= NDFILE, and are then pointed to by the pointers above. 121 */ 122 struct file *fd_dfiles[NDFILE]; 123 char fd_dfileflags[NDFILE]; 124 NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; 125 }; 126 127 /* 128 * Descriptor management. 129 */ 130 struct filelist filehead; /* head of list of open files */ 131 int openfiles; /* actual number of open files */ 132 struct sx filelist_lock; /* sx to protect filelist */ 133 struct mtx sigio_lock; /* mtx to protect pointers to sigio */ 134 135 /* A mutex to protect the association between a proc and filedesc. */ 136 static struct mtx fdesc_mtx; 137 138 /* 139 * Find the first zero bit in the given bitmap, starting at low and not 140 * exceeding size - 1. 141 */ 142 static int 143 fd_first_free(struct filedesc *fdp, int low, int size) 144 { 145 NDSLOTTYPE *map = fdp->fd_map; 146 NDSLOTTYPE mask; 147 int off, maxoff; 148 149 if (low >= size) 150 return (low); 151 152 off = NDSLOT(low); 153 if (low % NDENTRIES) { 154 mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES))); 155 if ((mask &= ~map[off]) != 0UL) 156 return (off * NDENTRIES + ffsl(mask) - 1); 157 ++off; 158 } 159 for (maxoff = NDSLOTS(size); off < maxoff; ++off) 160 if (map[off] != ~0UL) 161 return (off * NDENTRIES + ffsl(~map[off]) - 1); 162 return (size); 163 } 164 165 /* 166 * Find the highest non-zero bit in the given bitmap, starting at low and 167 * not exceeding size - 1. 168 */ 169 static int 170 fd_last_used(struct filedesc *fdp, int low, int size) 171 { 172 NDSLOTTYPE *map = fdp->fd_map; 173 NDSLOTTYPE mask; 174 int off, minoff; 175 176 if (low >= size) 177 return (-1); 178 179 off = NDSLOT(size); 180 if (size % NDENTRIES) { 181 mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES)); 182 if ((mask &= map[off]) != 0) 183 return (off * NDENTRIES + flsl(mask) - 1); 184 --off; 185 } 186 for (minoff = NDSLOT(low); off >= minoff; --off) 187 if (map[off] != 0) 188 return (off * NDENTRIES + flsl(map[off]) - 1); 189 return (size - 1); 190 } 191 192 static int 193 fdisused(struct filedesc *fdp, int fd) 194 { 195 KASSERT(fd >= 0 && fd < fdp->fd_nfiles, 196 ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles)); 197 return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0); 198 } 199 200 /* 201 * Mark a file descriptor as used. 202 */ 203 static void 204 fdused(struct filedesc *fdp, int fd) 205 { 206 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 207 KASSERT(!fdisused(fdp, fd), 208 ("fd already used")); 209 fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd); 210 if (fd > fdp->fd_lastfile) 211 fdp->fd_lastfile = fd; 212 if (fd == fdp->fd_freefile) 213 fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles); 214 } 215 216 /* 217 * Mark a file descriptor as unused. 218 */ 219 static void 220 fdunused(struct filedesc *fdp, int fd) 221 { 222 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 223 KASSERT(fdisused(fdp, fd), 224 ("fd is already unused")); 225 KASSERT(fdp->fd_ofiles[fd] == NULL, 226 ("fd is still in use")); 227 fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); 228 if (fd < fdp->fd_freefile) 229 fdp->fd_freefile = fd; 230 if (fd == fdp->fd_lastfile) 231 fdp->fd_lastfile = fd_last_used(fdp, 0, fd); 232 } 233 234 /* 235 * System calls on descriptors. 236 */ 237 #ifndef _SYS_SYSPROTO_H_ 238 struct getdtablesize_args { 239 int dummy; 240 }; 241 #endif 242 /* 243 * MPSAFE 244 */ 245 /* ARGSUSED */ 246 int 247 getdtablesize(struct thread *td, struct getdtablesize_args *uap) 248 { 249 struct proc *p = td->td_proc; 250 251 PROC_LOCK(p); 252 td->td_retval[0] = 253 min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 254 PROC_UNLOCK(p); 255 return (0); 256 } 257 258 /* 259 * Duplicate a file descriptor to a particular value. 260 * 261 * note: keep in mind that a potential race condition exists when closing 262 * descriptors from a shared descriptor table (via rfork). 263 */ 264 #ifndef _SYS_SYSPROTO_H_ 265 struct dup2_args { 266 u_int from; 267 u_int to; 268 }; 269 #endif 270 /* 271 * MPSAFE 272 */ 273 /* ARGSUSED */ 274 int 275 dup2(struct thread *td, struct dup2_args *uap) 276 { 277 278 return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to, 279 td->td_retval)); 280 } 281 282 /* 283 * Duplicate a file descriptor. 284 */ 285 #ifndef _SYS_SYSPROTO_H_ 286 struct dup_args { 287 u_int fd; 288 }; 289 #endif 290 /* 291 * MPSAFE 292 */ 293 /* ARGSUSED */ 294 int 295 dup(struct thread *td, struct dup_args *uap) 296 { 297 298 return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval)); 299 } 300 301 /* 302 * The file control system call. 303 */ 304 #ifndef _SYS_SYSPROTO_H_ 305 struct fcntl_args { 306 int fd; 307 int cmd; 308 long arg; 309 }; 310 #endif 311 /* 312 * MPSAFE 313 */ 314 /* ARGSUSED */ 315 int 316 fcntl(struct thread *td, struct fcntl_args *uap) 317 { 318 struct flock fl; 319 intptr_t arg; 320 int error; 321 322 error = 0; 323 switch (uap->cmd) { 324 case F_GETLK: 325 case F_SETLK: 326 case F_SETLKW: 327 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl)); 328 arg = (intptr_t)&fl; 329 break; 330 default: 331 arg = uap->arg; 332 break; 333 } 334 if (error) 335 return (error); 336 error = kern_fcntl(td, uap->fd, uap->cmd, arg); 337 if (error) 338 return (error); 339 if (uap->cmd == F_GETLK) 340 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl)); 341 return (error); 342 } 343 344 int 345 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) 346 { 347 struct filedesc *fdp; 348 struct flock *flp; 349 struct file *fp; 350 struct proc *p; 351 char *pop; 352 struct vnode *vp; 353 u_int newmin; 354 int error, flg, tmp; 355 int giant_locked; 356 357 /* 358 * XXXRW: Some fcntl() calls require Giant -- others don't. Try to 359 * avoid grabbing Giant for calls we know don't need it. 360 */ 361 switch (cmd) { 362 case F_DUPFD: 363 case F_GETFD: 364 case F_SETFD: 365 case F_GETFL: 366 giant_locked = 0; 367 break; 368 369 default: 370 giant_locked = 1; 371 mtx_lock(&Giant); 372 } 373 374 error = 0; 375 flg = F_POSIX; 376 p = td->td_proc; 377 fdp = p->p_fd; 378 FILEDESC_LOCK(fdp); 379 if ((unsigned)fd >= fdp->fd_nfiles || 380 (fp = fdp->fd_ofiles[fd]) == NULL) { 381 FILEDESC_UNLOCK(fdp); 382 error = EBADF; 383 goto done2; 384 } 385 pop = &fdp->fd_ofileflags[fd]; 386 387 switch (cmd) { 388 case F_DUPFD: 389 /* mtx_assert(&Giant, MA_NOTOWNED); */ 390 FILEDESC_UNLOCK(fdp); 391 newmin = arg; 392 PROC_LOCK(p); 393 if (newmin >= lim_cur(p, RLIMIT_NOFILE) || 394 newmin >= maxfilesperproc) { 395 PROC_UNLOCK(p); 396 error = EINVAL; 397 break; 398 } 399 PROC_UNLOCK(p); 400 error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval); 401 break; 402 403 case F_GETFD: 404 /* mtx_assert(&Giant, MA_NOTOWNED); */ 405 td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; 406 FILEDESC_UNLOCK(fdp); 407 break; 408 409 case F_SETFD: 410 /* mtx_assert(&Giant, MA_NOTOWNED); */ 411 *pop = (*pop &~ UF_EXCLOSE) | 412 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); 413 FILEDESC_UNLOCK(fdp); 414 break; 415 416 case F_GETFL: 417 /* mtx_assert(&Giant, MA_NOTOWNED); */ 418 FILE_LOCK(fp); 419 td->td_retval[0] = OFLAGS(fp->f_flag); 420 FILE_UNLOCK(fp); 421 FILEDESC_UNLOCK(fdp); 422 break; 423 424 case F_SETFL: 425 mtx_assert(&Giant, MA_OWNED); 426 FILE_LOCK(fp); 427 fhold_locked(fp); 428 fp->f_flag &= ~FCNTLFLAGS; 429 fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; 430 FILE_UNLOCK(fp); 431 FILEDESC_UNLOCK(fdp); 432 tmp = fp->f_flag & FNONBLOCK; 433 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 434 if (error) { 435 fdrop(fp, td); 436 break; 437 } 438 tmp = fp->f_flag & FASYNC; 439 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); 440 if (error == 0) { 441 fdrop(fp, td); 442 break; 443 } 444 FILE_LOCK(fp); 445 fp->f_flag &= ~FNONBLOCK; 446 FILE_UNLOCK(fp); 447 tmp = 0; 448 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 449 fdrop(fp, td); 450 break; 451 452 case F_GETOWN: 453 mtx_assert(&Giant, MA_OWNED); 454 fhold(fp); 455 FILEDESC_UNLOCK(fdp); 456 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); 457 if (error == 0) 458 td->td_retval[0] = tmp; 459 fdrop(fp, td); 460 break; 461 462 case F_SETOWN: 463 mtx_assert(&Giant, MA_OWNED); 464 fhold(fp); 465 FILEDESC_UNLOCK(fdp); 466 tmp = arg; 467 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); 468 fdrop(fp, td); 469 break; 470 471 case F_SETLKW: 472 mtx_assert(&Giant, MA_OWNED); 473 flg |= F_WAIT; 474 /* FALLTHROUGH F_SETLK */ 475 476 case F_SETLK: 477 mtx_assert(&Giant, MA_OWNED); 478 if (fp->f_type != DTYPE_VNODE) { 479 FILEDESC_UNLOCK(fdp); 480 error = EBADF; 481 break; 482 } 483 484 flp = (struct flock *)arg; 485 if (flp->l_whence == SEEK_CUR) { 486 if (fp->f_offset < 0 || 487 (flp->l_start > 0 && 488 fp->f_offset > OFF_MAX - flp->l_start)) { 489 FILEDESC_UNLOCK(fdp); 490 error = EOVERFLOW; 491 break; 492 } 493 flp->l_start += fp->f_offset; 494 } 495 496 /* 497 * VOP_ADVLOCK() may block. 498 */ 499 fhold(fp); 500 FILEDESC_UNLOCK(fdp); 501 vp = fp->f_vnode; 502 503 switch (flp->l_type) { 504 case F_RDLCK: 505 if ((fp->f_flag & FREAD) == 0) { 506 error = EBADF; 507 break; 508 } 509 PROC_LOCK(p->p_leader); 510 p->p_leader->p_flag |= P_ADVLOCK; 511 PROC_UNLOCK(p->p_leader); 512 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 513 flp, flg); 514 break; 515 case F_WRLCK: 516 if ((fp->f_flag & FWRITE) == 0) { 517 error = EBADF; 518 break; 519 } 520 PROC_LOCK(p->p_leader); 521 p->p_leader->p_flag |= P_ADVLOCK; 522 PROC_UNLOCK(p->p_leader); 523 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 524 flp, flg); 525 break; 526 case F_UNLCK: 527 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, 528 flp, F_POSIX); 529 break; 530 default: 531 error = EINVAL; 532 break; 533 } 534 /* Check for race with close */ 535 FILEDESC_LOCK_FAST(fdp); 536 if ((unsigned) fd >= fdp->fd_nfiles || 537 fp != fdp->fd_ofiles[fd]) { 538 FILEDESC_UNLOCK_FAST(fdp); 539 flp->l_whence = SEEK_SET; 540 flp->l_start = 0; 541 flp->l_len = 0; 542 flp->l_type = F_UNLCK; 543 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 544 F_UNLCK, flp, F_POSIX); 545 } else 546 FILEDESC_UNLOCK_FAST(fdp); 547 fdrop(fp, td); 548 break; 549 550 case F_GETLK: 551 mtx_assert(&Giant, MA_OWNED); 552 if (fp->f_type != DTYPE_VNODE) { 553 FILEDESC_UNLOCK(fdp); 554 error = EBADF; 555 break; 556 } 557 flp = (struct flock *)arg; 558 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && 559 flp->l_type != F_UNLCK) { 560 FILEDESC_UNLOCK(fdp); 561 error = EINVAL; 562 break; 563 } 564 if (flp->l_whence == SEEK_CUR) { 565 if ((flp->l_start > 0 && 566 fp->f_offset > OFF_MAX - flp->l_start) || 567 (flp->l_start < 0 && 568 fp->f_offset < OFF_MIN - flp->l_start)) { 569 FILEDESC_UNLOCK(fdp); 570 error = EOVERFLOW; 571 break; 572 } 573 flp->l_start += fp->f_offset; 574 } 575 /* 576 * VOP_ADVLOCK() may block. 577 */ 578 fhold(fp); 579 FILEDESC_UNLOCK(fdp); 580 vp = fp->f_vnode; 581 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, 582 F_POSIX); 583 fdrop(fp, td); 584 break; 585 default: 586 FILEDESC_UNLOCK(fdp); 587 error = EINVAL; 588 break; 589 } 590 done2: 591 if (giant_locked) 592 mtx_unlock(&Giant); 593 return (error); 594 } 595 596 /* 597 * Common code for dup, dup2, and fcntl(F_DUPFD). 598 */ 599 static int 600 do_dup(struct thread *td, enum dup_type type, int old, int new, register_t *retval) 601 { 602 struct filedesc *fdp; 603 struct proc *p; 604 struct file *fp; 605 struct file *delfp; 606 int error, holdleaders, maxfd; 607 608 KASSERT((type == DUP_VARIABLE || type == DUP_FIXED), 609 ("invalid dup type %d", type)); 610 611 p = td->td_proc; 612 fdp = p->p_fd; 613 614 /* 615 * Verify we have a valid descriptor to dup from and possibly to 616 * dup to. 617 */ 618 if (old < 0 || new < 0) 619 return (EBADF); 620 PROC_LOCK(p); 621 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 622 PROC_UNLOCK(p); 623 if (new >= maxfd) 624 return (EMFILE); 625 626 FILEDESC_LOCK(fdp); 627 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) { 628 FILEDESC_UNLOCK(fdp); 629 return (EBADF); 630 } 631 if (type == DUP_FIXED && old == new) { 632 *retval = new; 633 FILEDESC_UNLOCK(fdp); 634 return (0); 635 } 636 fp = fdp->fd_ofiles[old]; 637 fhold(fp); 638 639 /* 640 * If the caller specified a file descriptor, make sure the file 641 * table is large enough to hold it, and grab it. Otherwise, just 642 * allocate a new descriptor the usual way. Since the filedesc 643 * lock may be temporarily dropped in the process, we have to look 644 * out for a race. 645 */ 646 if (type == DUP_FIXED) { 647 if (new >= fdp->fd_nfiles) 648 fdgrowtable(fdp, new + 1); 649 if (fdp->fd_ofiles[new] == NULL) 650 fdused(fdp, new); 651 } else { 652 if ((error = fdalloc(td, new, &new)) != 0) { 653 FILEDESC_UNLOCK(fdp); 654 fdrop(fp, td); 655 return (error); 656 } 657 } 658 659 /* 660 * If the old file changed out from under us then treat it as a 661 * bad file descriptor. Userland should do its own locking to 662 * avoid this case. 663 */ 664 if (fdp->fd_ofiles[old] != fp) { 665 /* we've allocated a descriptor which we won't use */ 666 if (fdp->fd_ofiles[new] == NULL) 667 fdunused(fdp, new); 668 FILEDESC_UNLOCK(fdp); 669 fdrop(fp, td); 670 return (EBADF); 671 } 672 KASSERT(old != new, 673 ("new fd is same as old")); 674 675 /* 676 * Save info on the descriptor being overwritten. We cannot close 677 * it without introducing an ownership race for the slot, since we 678 * need to drop the filedesc lock to call closef(). 679 * 680 * XXX this duplicates parts of close(). 681 */ 682 delfp = fdp->fd_ofiles[new]; 683 holdleaders = 0; 684 if (delfp != NULL) { 685 if (td->td_proc->p_fdtol != NULL) { 686 /* 687 * Ask fdfree() to sleep to ensure that all relevant 688 * process leaders can be traversed in closef(). 689 */ 690 fdp->fd_holdleaderscount++; 691 holdleaders = 1; 692 } 693 } 694 695 /* 696 * Duplicate the source descriptor 697 */ 698 fdp->fd_ofiles[new] = fp; 699 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; 700 if (new > fdp->fd_lastfile) 701 fdp->fd_lastfile = new; 702 *retval = new; 703 704 /* 705 * If we dup'd over a valid file, we now own the reference to it 706 * and must dispose of it using closef() semantics (as if a 707 * close() were performed on it). 708 * 709 * XXX this duplicates parts of close(). 710 */ 711 if (delfp != NULL) { 712 knote_fdclose(td, new); 713 FILEDESC_UNLOCK(fdp); 714 (void) closef(delfp, td); 715 if (holdleaders) { 716 FILEDESC_LOCK_FAST(fdp); 717 fdp->fd_holdleaderscount--; 718 if (fdp->fd_holdleaderscount == 0 && 719 fdp->fd_holdleaderswakeup != 0) { 720 fdp->fd_holdleaderswakeup = 0; 721 wakeup(&fdp->fd_holdleaderscount); 722 } 723 FILEDESC_UNLOCK_FAST(fdp); 724 } 725 } else { 726 FILEDESC_UNLOCK(fdp); 727 } 728 return (0); 729 } 730 731 /* 732 * If sigio is on the list associated with a process or process group, 733 * disable signalling from the device, remove sigio from the list and 734 * free sigio. 735 */ 736 void 737 funsetown(struct sigio **sigiop) 738 { 739 struct sigio *sigio; 740 741 SIGIO_LOCK(); 742 sigio = *sigiop; 743 if (sigio == NULL) { 744 SIGIO_UNLOCK(); 745 return; 746 } 747 *(sigio->sio_myref) = NULL; 748 if ((sigio)->sio_pgid < 0) { 749 struct pgrp *pg = (sigio)->sio_pgrp; 750 PGRP_LOCK(pg); 751 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, 752 sigio, sio_pgsigio); 753 PGRP_UNLOCK(pg); 754 } else { 755 struct proc *p = (sigio)->sio_proc; 756 PROC_LOCK(p); 757 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, 758 sigio, sio_pgsigio); 759 PROC_UNLOCK(p); 760 } 761 SIGIO_UNLOCK(); 762 crfree(sigio->sio_ucred); 763 FREE(sigio, M_SIGIO); 764 } 765 766 /* 767 * Free a list of sigio structures. 768 * We only need to lock the SIGIO_LOCK because we have made ourselves 769 * inaccessable to callers of fsetown and therefore do not need to lock 770 * the proc or pgrp struct for the list manipulation. 771 */ 772 void 773 funsetownlst(struct sigiolst *sigiolst) 774 { 775 struct proc *p; 776 struct pgrp *pg; 777 struct sigio *sigio; 778 779 sigio = SLIST_FIRST(sigiolst); 780 if (sigio == NULL) 781 return; 782 p = NULL; 783 pg = NULL; 784 785 /* 786 * Every entry of the list should belong 787 * to a single proc or pgrp. 788 */ 789 if (sigio->sio_pgid < 0) { 790 pg = sigio->sio_pgrp; 791 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED); 792 } else /* if (sigio->sio_pgid > 0) */ { 793 p = sigio->sio_proc; 794 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 795 } 796 797 SIGIO_LOCK(); 798 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) { 799 *(sigio->sio_myref) = NULL; 800 if (pg != NULL) { 801 KASSERT(sigio->sio_pgid < 0, 802 ("Proc sigio in pgrp sigio list")); 803 KASSERT(sigio->sio_pgrp == pg, 804 ("Bogus pgrp in sigio list")); 805 PGRP_LOCK(pg); 806 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, 807 sio_pgsigio); 808 PGRP_UNLOCK(pg); 809 } else /* if (p != NULL) */ { 810 KASSERT(sigio->sio_pgid > 0, 811 ("Pgrp sigio in proc sigio list")); 812 KASSERT(sigio->sio_proc == p, 813 ("Bogus proc in sigio list")); 814 PROC_LOCK(p); 815 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, 816 sio_pgsigio); 817 PROC_UNLOCK(p); 818 } 819 SIGIO_UNLOCK(); 820 crfree(sigio->sio_ucred); 821 FREE(sigio, M_SIGIO); 822 SIGIO_LOCK(); 823 } 824 SIGIO_UNLOCK(); 825 } 826 827 /* 828 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). 829 * 830 * After permission checking, add a sigio structure to the sigio list for 831 * the process or process group. 832 */ 833 int 834 fsetown(pid_t pgid, struct sigio **sigiop) 835 { 836 struct proc *proc; 837 struct pgrp *pgrp; 838 struct sigio *sigio; 839 int ret; 840 841 if (pgid == 0) { 842 funsetown(sigiop); 843 return (0); 844 } 845 846 ret = 0; 847 848 /* Allocate and fill in the new sigio out of locks. */ 849 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK); 850 sigio->sio_pgid = pgid; 851 sigio->sio_ucred = crhold(curthread->td_ucred); 852 sigio->sio_myref = sigiop; 853 854 sx_slock(&proctree_lock); 855 if (pgid > 0) { 856 proc = pfind(pgid); 857 if (proc == NULL) { 858 ret = ESRCH; 859 goto fail; 860 } 861 862 /* 863 * Policy - Don't allow a process to FSETOWN a process 864 * in another session. 865 * 866 * Remove this test to allow maximum flexibility or 867 * restrict FSETOWN to the current process or process 868 * group for maximum safety. 869 */ 870 PROC_UNLOCK(proc); 871 if (proc->p_session != curthread->td_proc->p_session) { 872 ret = EPERM; 873 goto fail; 874 } 875 876 pgrp = NULL; 877 } else /* if (pgid < 0) */ { 878 pgrp = pgfind(-pgid); 879 if (pgrp == NULL) { 880 ret = ESRCH; 881 goto fail; 882 } 883 PGRP_UNLOCK(pgrp); 884 885 /* 886 * Policy - Don't allow a process to FSETOWN a process 887 * in another session. 888 * 889 * Remove this test to allow maximum flexibility or 890 * restrict FSETOWN to the current process or process 891 * group for maximum safety. 892 */ 893 if (pgrp->pg_session != curthread->td_proc->p_session) { 894 ret = EPERM; 895 goto fail; 896 } 897 898 proc = NULL; 899 } 900 funsetown(sigiop); 901 if (pgid > 0) { 902 PROC_LOCK(proc); 903 /* 904 * Since funsetownlst() is called without the proctree 905 * locked, we need to check for P_WEXIT. 906 * XXX: is ESRCH correct? 907 */ 908 if ((proc->p_flag & P_WEXIT) != 0) { 909 PROC_UNLOCK(proc); 910 ret = ESRCH; 911 goto fail; 912 } 913 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); 914 sigio->sio_proc = proc; 915 PROC_UNLOCK(proc); 916 } else { 917 PGRP_LOCK(pgrp); 918 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); 919 sigio->sio_pgrp = pgrp; 920 PGRP_UNLOCK(pgrp); 921 } 922 sx_sunlock(&proctree_lock); 923 SIGIO_LOCK(); 924 *sigiop = sigio; 925 SIGIO_UNLOCK(); 926 return (0); 927 928 fail: 929 sx_sunlock(&proctree_lock); 930 crfree(sigio->sio_ucred); 931 FREE(sigio, M_SIGIO); 932 return (ret); 933 } 934 935 /* 936 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). 937 */ 938 pid_t 939 fgetown(sigiop) 940 struct sigio **sigiop; 941 { 942 pid_t pgid; 943 944 SIGIO_LOCK(); 945 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0; 946 SIGIO_UNLOCK(); 947 return (pgid); 948 } 949 950 /* 951 * Close a file descriptor. 952 */ 953 #ifndef _SYS_SYSPROTO_H_ 954 struct close_args { 955 int fd; 956 }; 957 #endif 958 /* 959 * MPSAFE 960 */ 961 /* ARGSUSED */ 962 int 963 close(td, uap) 964 struct thread *td; 965 struct close_args *uap; 966 { 967 struct filedesc *fdp; 968 struct file *fp; 969 int fd, error; 970 int holdleaders; 971 972 fd = uap->fd; 973 error = 0; 974 holdleaders = 0; 975 fdp = td->td_proc->p_fd; 976 FILEDESC_LOCK(fdp); 977 if ((unsigned)fd >= fdp->fd_nfiles || 978 (fp = fdp->fd_ofiles[fd]) == NULL) { 979 FILEDESC_UNLOCK(fdp); 980 return (EBADF); 981 } 982 fdp->fd_ofiles[fd] = NULL; 983 fdp->fd_ofileflags[fd] = 0; 984 fdunused(fdp, fd); 985 if (td->td_proc->p_fdtol != NULL) { 986 /* 987 * Ask fdfree() to sleep to ensure that all relevant 988 * process leaders can be traversed in closef(). 989 */ 990 fdp->fd_holdleaderscount++; 991 holdleaders = 1; 992 } 993 994 /* 995 * we now hold the fp reference that used to be owned by the descriptor 996 * array. 997 * We have to unlock the FILEDESC *AFTER* knote_fdclose to prevent a 998 * race of the fd getting opened, a knote added, and deleteing a knote 999 * for the new fd. 1000 */ 1001 knote_fdclose(td, fd); 1002 FILEDESC_UNLOCK(fdp); 1003 1004 error = closef(fp, td); 1005 if (holdleaders) { 1006 FILEDESC_LOCK_FAST(fdp); 1007 fdp->fd_holdleaderscount--; 1008 if (fdp->fd_holdleaderscount == 0 && 1009 fdp->fd_holdleaderswakeup != 0) { 1010 fdp->fd_holdleaderswakeup = 0; 1011 wakeup(&fdp->fd_holdleaderscount); 1012 } 1013 FILEDESC_UNLOCK_FAST(fdp); 1014 } 1015 return (error); 1016 } 1017 1018 #if defined(COMPAT_43) 1019 /* 1020 * Return status information about a file descriptor. 1021 */ 1022 #ifndef _SYS_SYSPROTO_H_ 1023 struct ofstat_args { 1024 int fd; 1025 struct ostat *sb; 1026 }; 1027 #endif 1028 /* 1029 * MPSAFE 1030 */ 1031 /* ARGSUSED */ 1032 int 1033 ofstat(struct thread *td, struct ofstat_args *uap) 1034 { 1035 struct ostat oub; 1036 struct stat ub; 1037 int error; 1038 1039 error = kern_fstat(td, uap->fd, &ub); 1040 if (error == 0) { 1041 cvtstat(&ub, &oub); 1042 error = copyout(&oub, uap->sb, sizeof(oub)); 1043 } 1044 return (error); 1045 } 1046 #endif /* COMPAT_43 */ 1047 1048 /* 1049 * Return status information about a file descriptor. 1050 */ 1051 #ifndef _SYS_SYSPROTO_H_ 1052 struct fstat_args { 1053 int fd; 1054 struct stat *sb; 1055 }; 1056 #endif 1057 /* 1058 * MPSAFE 1059 */ 1060 /* ARGSUSED */ 1061 int 1062 fstat(struct thread *td, struct fstat_args *uap) 1063 { 1064 struct stat ub; 1065 int error; 1066 1067 error = kern_fstat(td, uap->fd, &ub); 1068 if (error == 0) 1069 error = copyout(&ub, uap->sb, sizeof(ub)); 1070 return (error); 1071 } 1072 1073 int 1074 kern_fstat(struct thread *td, int fd, struct stat *sbp) 1075 { 1076 struct file *fp; 1077 int error; 1078 1079 if ((error = fget(td, fd, &fp)) != 0) 1080 return (error); 1081 error = fo_stat(fp, sbp, td->td_ucred, td); 1082 fdrop(fp, td); 1083 return (error); 1084 } 1085 1086 /* 1087 * Return status information about a file descriptor. 1088 */ 1089 #ifndef _SYS_SYSPROTO_H_ 1090 struct nfstat_args { 1091 int fd; 1092 struct nstat *sb; 1093 }; 1094 #endif 1095 /* 1096 * MPSAFE 1097 */ 1098 /* ARGSUSED */ 1099 int 1100 nfstat(struct thread *td, struct nfstat_args *uap) 1101 { 1102 struct nstat nub; 1103 struct stat ub; 1104 int error; 1105 1106 error = kern_fstat(td, uap->fd, &ub); 1107 if (error == 0) { 1108 cvtnstat(&ub, &nub); 1109 error = copyout(&nub, uap->sb, sizeof(nub)); 1110 } 1111 return (error); 1112 } 1113 1114 /* 1115 * Return pathconf information about a file descriptor. 1116 */ 1117 #ifndef _SYS_SYSPROTO_H_ 1118 struct fpathconf_args { 1119 int fd; 1120 int name; 1121 }; 1122 #endif 1123 /* 1124 * MPSAFE 1125 */ 1126 /* ARGSUSED */ 1127 int 1128 fpathconf(struct thread *td, struct fpathconf_args *uap) 1129 { 1130 struct file *fp; 1131 struct vnode *vp; 1132 int error; 1133 1134 if ((error = fget(td, uap->fd, &fp)) != 0) 1135 return (error); 1136 1137 /* If asynchronous I/O is available, it works for all descriptors. */ 1138 if (uap->name == _PC_ASYNC_IO) { 1139 td->td_retval[0] = async_io_version; 1140 goto out; 1141 } 1142 vp = fp->f_vnode; 1143 if (vp != NULL) { 1144 int vfslocked; 1145 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1146 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1147 error = VOP_PATHCONF(vp, uap->name, td->td_retval); 1148 VOP_UNLOCK(vp, 0, td); 1149 VFS_UNLOCK_GIANT(vfslocked); 1150 } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1151 if (uap->name != _PC_PIPE_BUF) { 1152 error = EINVAL; 1153 } else { 1154 td->td_retval[0] = PIPE_BUF; 1155 error = 0; 1156 } 1157 } else { 1158 error = EOPNOTSUPP; 1159 } 1160 out: 1161 fdrop(fp, td); 1162 return (error); 1163 } 1164 1165 /* 1166 * Grow the file table to accomodate (at least) nfd descriptors. This may 1167 * block and drop the filedesc lock, but it will reacquire it before 1168 * returing. 1169 */ 1170 static void 1171 fdgrowtable(struct filedesc *fdp, int nfd) 1172 { 1173 struct file **ntable; 1174 char *nfileflags; 1175 int nnfiles, onfiles; 1176 NDSLOTTYPE *nmap; 1177 1178 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1179 1180 KASSERT(fdp->fd_nfiles > 0, 1181 ("zero-length file table")); 1182 1183 /* compute the size of the new table */ 1184 onfiles = fdp->fd_nfiles; 1185 nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */ 1186 if (nnfiles <= onfiles) 1187 /* the table is already large enough */ 1188 return; 1189 1190 /* allocate a new table and (if required) new bitmaps */ 1191 FILEDESC_UNLOCK(fdp); 1192 MALLOC(ntable, struct file **, nnfiles * OFILESIZE, 1193 M_FILEDESC, M_ZERO | M_WAITOK); 1194 nfileflags = (char *)&ntable[nnfiles]; 1195 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) 1196 MALLOC(nmap, NDSLOTTYPE *, NDSLOTS(nnfiles) * NDSLOTSIZE, 1197 M_FILEDESC, M_ZERO | M_WAITOK); 1198 else 1199 nmap = NULL; 1200 FILEDESC_LOCK(fdp); 1201 1202 /* 1203 * We now have new tables ready to go. Since we dropped the 1204 * filedesc lock to call malloc(), watch out for a race. 1205 */ 1206 onfiles = fdp->fd_nfiles; 1207 if (onfiles >= nnfiles) { 1208 /* we lost the race, but that's OK */ 1209 free(ntable, M_FILEDESC); 1210 if (nmap != NULL) 1211 free(nmap, M_FILEDESC); 1212 return; 1213 } 1214 bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable)); 1215 bcopy(fdp->fd_ofileflags, nfileflags, onfiles); 1216 if (onfiles > NDFILE) 1217 free(fdp->fd_ofiles, M_FILEDESC); 1218 fdp->fd_ofiles = ntable; 1219 fdp->fd_ofileflags = nfileflags; 1220 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) { 1221 bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap)); 1222 if (NDSLOTS(onfiles) > NDSLOTS(NDFILE)) 1223 free(fdp->fd_map, M_FILEDESC); 1224 fdp->fd_map = nmap; 1225 } 1226 fdp->fd_nfiles = nnfiles; 1227 } 1228 1229 /* 1230 * Allocate a file descriptor for the process. 1231 */ 1232 int 1233 fdalloc(struct thread *td, int minfd, int *result) 1234 { 1235 struct proc *p = td->td_proc; 1236 struct filedesc *fdp = p->p_fd; 1237 int fd = -1, maxfd; 1238 1239 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1240 1241 PROC_LOCK(p); 1242 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1243 PROC_UNLOCK(p); 1244 1245 /* 1246 * Search the bitmap for a free descriptor. If none is found, try 1247 * to grow the file table. Keep at it until we either get a file 1248 * descriptor or run into process or system limits; fdgrowtable() 1249 * may drop the filedesc lock, so we're in a race. 1250 */ 1251 for (;;) { 1252 fd = fd_first_free(fdp, minfd, fdp->fd_nfiles); 1253 if (fd >= maxfd) 1254 return (EMFILE); 1255 if (fd < fdp->fd_nfiles) 1256 break; 1257 fdgrowtable(fdp, min(fdp->fd_nfiles * 2, maxfd)); 1258 } 1259 1260 /* 1261 * Perform some sanity checks, then mark the file descriptor as 1262 * used and return it to the caller. 1263 */ 1264 KASSERT(!fdisused(fdp, fd), 1265 ("fd_first_free() returned non-free descriptor")); 1266 KASSERT(fdp->fd_ofiles[fd] == NULL, 1267 ("free descriptor isn't")); 1268 fdp->fd_ofileflags[fd] = 0; /* XXX needed? */ 1269 fdused(fdp, fd); 1270 fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles); 1271 *result = fd; 1272 return (0); 1273 } 1274 1275 /* 1276 * Check to see whether n user file descriptors 1277 * are available to the process p. 1278 */ 1279 int 1280 fdavail(struct thread *td, int n) 1281 { 1282 struct proc *p = td->td_proc; 1283 struct filedesc *fdp = td->td_proc->p_fd; 1284 struct file **fpp; 1285 int i, lim, last; 1286 1287 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1288 1289 PROC_LOCK(p); 1290 lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1291 PROC_UNLOCK(p); 1292 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) 1293 return (1); 1294 last = min(fdp->fd_nfiles, lim); 1295 fpp = &fdp->fd_ofiles[fdp->fd_freefile]; 1296 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) { 1297 if (*fpp == NULL && --n <= 0) 1298 return (1); 1299 } 1300 return (0); 1301 } 1302 1303 /* 1304 * Create a new open file structure and allocate 1305 * a file decriptor for the process that refers to it. 1306 * We add one reference to the file for the descriptor table 1307 * and one reference for resultfp. This is to prevent us being 1308 * prempted and the entry in the descriptor table closed after 1309 * we release the FILEDESC lock. 1310 */ 1311 int 1312 falloc(struct thread *td, struct file **resultfp, int *resultfd) 1313 { 1314 struct proc *p = td->td_proc; 1315 struct file *fp, *fq; 1316 int error, i; 1317 int maxuserfiles = maxfiles - (maxfiles / 20); 1318 static struct timeval lastfail; 1319 static int curfail; 1320 1321 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO); 1322 sx_xlock(&filelist_lock); 1323 if ((openfiles >= maxuserfiles && (td->td_ucred->cr_ruid != 0 || 1324 jailed(td->td_ucred))) || openfiles >= maxfiles) { 1325 if (ppsratecheck(&lastfail, &curfail, 1)) { 1326 printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n", 1327 td->td_ucred->cr_ruid); 1328 } 1329 sx_xunlock(&filelist_lock); 1330 uma_zfree(file_zone, fp); 1331 return (ENFILE); 1332 } 1333 openfiles++; 1334 1335 /* 1336 * If the process has file descriptor zero open, add the new file 1337 * descriptor to the list of open files at that point, otherwise 1338 * put it at the front of the list of open files. 1339 */ 1340 fp->f_mtxp = mtx_pool_alloc(mtxpool_sleep); 1341 fp->f_count = 1; 1342 if (resultfp) 1343 fp->f_count++; 1344 fp->f_cred = crhold(td->td_ucred); 1345 fp->f_ops = &badfileops; 1346 fp->f_data = NULL; 1347 fp->f_vnode = NULL; 1348 FILEDESC_LOCK(p->p_fd); 1349 if ((fq = p->p_fd->fd_ofiles[0])) { 1350 LIST_INSERT_AFTER(fq, fp, f_list); 1351 } else { 1352 LIST_INSERT_HEAD(&filehead, fp, f_list); 1353 } 1354 sx_xunlock(&filelist_lock); 1355 if ((error = fdalloc(td, 0, &i))) { 1356 FILEDESC_UNLOCK(p->p_fd); 1357 fdrop(fp, td); 1358 if (resultfp) 1359 fdrop(fp, td); 1360 return (error); 1361 } 1362 p->p_fd->fd_ofiles[i] = fp; 1363 FILEDESC_UNLOCK(p->p_fd); 1364 if (resultfp) 1365 *resultfp = fp; 1366 if (resultfd) 1367 *resultfd = i; 1368 return (0); 1369 } 1370 1371 /* 1372 * Build a new filedesc structure from another. 1373 * Copy the current, root, and jail root vnode references. 1374 */ 1375 struct filedesc * 1376 fdinit(struct filedesc *fdp) 1377 { 1378 struct filedesc0 *newfdp; 1379 1380 newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO); 1381 mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF); 1382 if (fdp != NULL) { 1383 FILEDESC_LOCK(fdp); 1384 newfdp->fd_fd.fd_cdir = fdp->fd_cdir; 1385 if (newfdp->fd_fd.fd_cdir) 1386 VREF(newfdp->fd_fd.fd_cdir); 1387 newfdp->fd_fd.fd_rdir = fdp->fd_rdir; 1388 if (newfdp->fd_fd.fd_rdir) 1389 VREF(newfdp->fd_fd.fd_rdir); 1390 newfdp->fd_fd.fd_jdir = fdp->fd_jdir; 1391 if (newfdp->fd_fd.fd_jdir) 1392 VREF(newfdp->fd_fd.fd_jdir); 1393 FILEDESC_UNLOCK(fdp); 1394 } 1395 1396 /* Create the file descriptor table. */ 1397 newfdp->fd_fd.fd_refcnt = 1; 1398 newfdp->fd_fd.fd_holdcnt = 1; 1399 newfdp->fd_fd.fd_cmask = CMASK; 1400 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1401 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 1402 newfdp->fd_fd.fd_nfiles = NDFILE; 1403 newfdp->fd_fd.fd_map = newfdp->fd_dmap; 1404 return (&newfdp->fd_fd); 1405 } 1406 1407 static struct filedesc * 1408 fdhold(struct proc *p) 1409 { 1410 struct filedesc *fdp; 1411 1412 mtx_lock(&fdesc_mtx); 1413 fdp = p->p_fd; 1414 if (fdp != NULL) 1415 fdp->fd_holdcnt++; 1416 mtx_unlock(&fdesc_mtx); 1417 return (fdp); 1418 } 1419 1420 static void 1421 fddrop(struct filedesc *fdp) 1422 { 1423 int i; 1424 1425 mtx_lock(&fdesc_mtx); 1426 i = --fdp->fd_holdcnt; 1427 mtx_unlock(&fdesc_mtx); 1428 if (i > 0) 1429 return; 1430 1431 mtx_destroy(&fdp->fd_mtx); 1432 FREE(fdp, M_FILEDESC); 1433 } 1434 1435 /* 1436 * Share a filedesc structure. 1437 */ 1438 struct filedesc * 1439 fdshare(struct filedesc *fdp) 1440 { 1441 FILEDESC_LOCK_FAST(fdp); 1442 fdp->fd_refcnt++; 1443 FILEDESC_UNLOCK_FAST(fdp); 1444 return (fdp); 1445 } 1446 1447 /* 1448 * Unshare a filedesc structure, if necessary by making a copy 1449 */ 1450 void 1451 fdunshare(struct proc *p, struct thread *td) 1452 { 1453 1454 FILEDESC_LOCK_FAST(p->p_fd); 1455 if (p->p_fd->fd_refcnt > 1) { 1456 struct filedesc *tmp; 1457 1458 FILEDESC_UNLOCK_FAST(p->p_fd); 1459 tmp = fdcopy(p->p_fd); 1460 fdfree(td); 1461 p->p_fd = tmp; 1462 } else 1463 FILEDESC_UNLOCK_FAST(p->p_fd); 1464 } 1465 1466 /* 1467 * Copy a filedesc structure. 1468 * A NULL pointer in returns a NULL reference, this is to ease callers, 1469 * not catch errors. 1470 */ 1471 struct filedesc * 1472 fdcopy(struct filedesc *fdp) 1473 { 1474 struct filedesc *newfdp; 1475 int i; 1476 1477 /* Certain daemons might not have file descriptors. */ 1478 if (fdp == NULL) 1479 return (NULL); 1480 1481 newfdp = fdinit(fdp); 1482 FILEDESC_LOCK_FAST(fdp); 1483 while (fdp->fd_lastfile >= newfdp->fd_nfiles) { 1484 FILEDESC_UNLOCK_FAST(fdp); 1485 FILEDESC_LOCK(newfdp); 1486 fdgrowtable(newfdp, fdp->fd_lastfile + 1); 1487 FILEDESC_UNLOCK(newfdp); 1488 FILEDESC_LOCK_FAST(fdp); 1489 } 1490 /* copy everything except kqueue descriptors */ 1491 newfdp->fd_freefile = -1; 1492 for (i = 0; i <= fdp->fd_lastfile; ++i) { 1493 if (fdisused(fdp, i) && 1494 fdp->fd_ofiles[i]->f_type != DTYPE_KQUEUE) { 1495 newfdp->fd_ofiles[i] = fdp->fd_ofiles[i]; 1496 newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i]; 1497 fhold(newfdp->fd_ofiles[i]); 1498 newfdp->fd_lastfile = i; 1499 } else { 1500 if (newfdp->fd_freefile == -1) 1501 newfdp->fd_freefile = i; 1502 } 1503 } 1504 FILEDESC_UNLOCK_FAST(fdp); 1505 FILEDESC_LOCK(newfdp); 1506 for (i = 0; i <= newfdp->fd_lastfile; ++i) 1507 if (newfdp->fd_ofiles[i] != NULL) 1508 fdused(newfdp, i); 1509 FILEDESC_UNLOCK(newfdp); 1510 FILEDESC_LOCK_FAST(fdp); 1511 if (newfdp->fd_freefile == -1) 1512 newfdp->fd_freefile = i; 1513 newfdp->fd_cmask = fdp->fd_cmask; 1514 FILEDESC_UNLOCK_FAST(fdp); 1515 return (newfdp); 1516 } 1517 1518 /* 1519 * Release a filedesc structure. 1520 */ 1521 void 1522 fdfree(struct thread *td) 1523 { 1524 struct filedesc *fdp; 1525 struct file **fpp; 1526 int i; 1527 struct filedesc_to_leader *fdtol; 1528 struct file *fp; 1529 struct vnode *vp; 1530 struct flock lf; 1531 1532 /* Certain daemons might not have file descriptors. */ 1533 fdp = td->td_proc->p_fd; 1534 if (fdp == NULL) 1535 return; 1536 1537 /* Check for special need to clear POSIX style locks */ 1538 fdtol = td->td_proc->p_fdtol; 1539 if (fdtol != NULL) { 1540 FILEDESC_LOCK(fdp); 1541 KASSERT(fdtol->fdl_refcount > 0, 1542 ("filedesc_to_refcount botch: fdl_refcount=%d", 1543 fdtol->fdl_refcount)); 1544 if (fdtol->fdl_refcount == 1 && 1545 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1546 i = 0; 1547 fpp = fdp->fd_ofiles; 1548 for (i = 0, fpp = fdp->fd_ofiles; 1549 i <= fdp->fd_lastfile; 1550 i++, fpp++) { 1551 if (*fpp == NULL || 1552 (*fpp)->f_type != DTYPE_VNODE) 1553 continue; 1554 fp = *fpp; 1555 fhold(fp); 1556 FILEDESC_UNLOCK(fdp); 1557 lf.l_whence = SEEK_SET; 1558 lf.l_start = 0; 1559 lf.l_len = 0; 1560 lf.l_type = F_UNLCK; 1561 vp = fp->f_vnode; 1562 VFS_ASSERT_GIANT(vp->v_mount); 1563 (void) VOP_ADVLOCK(vp, 1564 (caddr_t)td->td_proc-> 1565 p_leader, 1566 F_UNLCK, 1567 &lf, 1568 F_POSIX); 1569 FILEDESC_LOCK(fdp); 1570 fdrop(fp, td); 1571 fpp = fdp->fd_ofiles + i; 1572 } 1573 } 1574 retry: 1575 if (fdtol->fdl_refcount == 1) { 1576 if (fdp->fd_holdleaderscount > 0 && 1577 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1578 /* 1579 * close() or do_dup() has cleared a reference 1580 * in a shared file descriptor table. 1581 */ 1582 fdp->fd_holdleaderswakeup = 1; 1583 msleep(&fdp->fd_holdleaderscount, &fdp->fd_mtx, 1584 PLOCK, "fdlhold", 0); 1585 goto retry; 1586 } 1587 if (fdtol->fdl_holdcount > 0) { 1588 /* 1589 * Ensure that fdtol->fdl_leader 1590 * remains valid in closef(). 1591 */ 1592 fdtol->fdl_wakeup = 1; 1593 msleep(fdtol, &fdp->fd_mtx, 1594 PLOCK, "fdlhold", 0); 1595 goto retry; 1596 } 1597 } 1598 fdtol->fdl_refcount--; 1599 if (fdtol->fdl_refcount == 0 && 1600 fdtol->fdl_holdcount == 0) { 1601 fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; 1602 fdtol->fdl_prev->fdl_next = fdtol->fdl_next; 1603 } else 1604 fdtol = NULL; 1605 td->td_proc->p_fdtol = NULL; 1606 FILEDESC_UNLOCK(fdp); 1607 if (fdtol != NULL) 1608 FREE(fdtol, M_FILEDESC_TO_LEADER); 1609 } 1610 FILEDESC_LOCK_FAST(fdp); 1611 i = --fdp->fd_refcnt; 1612 FILEDESC_UNLOCK_FAST(fdp); 1613 if (i > 0) 1614 return; 1615 /* 1616 * We are the last reference to the structure, so we can 1617 * safely assume it will not change out from under us. 1618 */ 1619 fpp = fdp->fd_ofiles; 1620 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) { 1621 if (*fpp) 1622 (void) closef(*fpp, td); 1623 } 1624 FILEDESC_LOCK(fdp); 1625 1626 /* XXX This should happen earlier. */ 1627 mtx_lock(&fdesc_mtx); 1628 td->td_proc->p_fd = NULL; 1629 mtx_unlock(&fdesc_mtx); 1630 1631 if (fdp->fd_nfiles > NDFILE) 1632 FREE(fdp->fd_ofiles, M_FILEDESC); 1633 if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE)) 1634 FREE(fdp->fd_map, M_FILEDESC); 1635 1636 fdp->fd_nfiles = 0; 1637 1638 if (fdp->fd_cdir) 1639 vrele(fdp->fd_cdir); 1640 fdp->fd_cdir = NULL; 1641 if (fdp->fd_rdir) 1642 vrele(fdp->fd_rdir); 1643 fdp->fd_rdir = NULL; 1644 if (fdp->fd_jdir) 1645 vrele(fdp->fd_jdir); 1646 fdp->fd_jdir = NULL; 1647 1648 FILEDESC_UNLOCK(fdp); 1649 1650 fddrop(fdp); 1651 } 1652 1653 /* 1654 * For setugid programs, we don't want to people to use that setugidness 1655 * to generate error messages which write to a file which otherwise would 1656 * otherwise be off-limits to the process. We check for filesystems where 1657 * the vnode can change out from under us after execve (like [lin]procfs). 1658 * 1659 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is 1660 * sufficient. We also don't for check setugidness since we know we are. 1661 */ 1662 static int 1663 is_unsafe(struct file *fp) 1664 { 1665 if (fp->f_type == DTYPE_VNODE) { 1666 struct vnode *vp = fp->f_vnode; 1667 1668 if ((vp->v_vflag & VV_PROCDEP) != 0) 1669 return (1); 1670 } 1671 return (0); 1672 } 1673 1674 /* 1675 * Make this setguid thing safe, if at all possible. 1676 */ 1677 void 1678 setugidsafety(struct thread *td) 1679 { 1680 struct filedesc *fdp; 1681 int i; 1682 1683 /* Certain daemons might not have file descriptors. */ 1684 fdp = td->td_proc->p_fd; 1685 if (fdp == NULL) 1686 return; 1687 1688 /* 1689 * Note: fdp->fd_ofiles may be reallocated out from under us while 1690 * we are blocked in a close. Be careful! 1691 */ 1692 FILEDESC_LOCK(fdp); 1693 for (i = 0; i <= fdp->fd_lastfile; i++) { 1694 if (i > 2) 1695 break; 1696 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { 1697 struct file *fp; 1698 1699 knote_fdclose(td, i); 1700 /* 1701 * NULL-out descriptor prior to close to avoid 1702 * a race while close blocks. 1703 */ 1704 fp = fdp->fd_ofiles[i]; 1705 fdp->fd_ofiles[i] = NULL; 1706 fdp->fd_ofileflags[i] = 0; 1707 fdunused(fdp, i); 1708 FILEDESC_UNLOCK(fdp); 1709 (void) closef(fp, td); 1710 FILEDESC_LOCK(fdp); 1711 } 1712 } 1713 FILEDESC_UNLOCK(fdp); 1714 } 1715 1716 void 1717 fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td) 1718 { 1719 1720 FILEDESC_LOCK(fdp); 1721 if (fdp->fd_ofiles[idx] == fp) { 1722 fdp->fd_ofiles[idx] = NULL; 1723 fdunused(fdp, idx); 1724 FILEDESC_UNLOCK(fdp); 1725 fdrop(fp, td); 1726 } else { 1727 FILEDESC_UNLOCK(fdp); 1728 } 1729 } 1730 1731 /* 1732 * Close any files on exec? 1733 */ 1734 void 1735 fdcloseexec(struct thread *td) 1736 { 1737 struct filedesc *fdp; 1738 int i; 1739 1740 /* Certain daemons might not have file descriptors. */ 1741 fdp = td->td_proc->p_fd; 1742 if (fdp == NULL) 1743 return; 1744 1745 FILEDESC_LOCK(fdp); 1746 1747 /* 1748 * We cannot cache fd_ofiles or fd_ofileflags since operations 1749 * may block and rip them out from under us. 1750 */ 1751 for (i = 0; i <= fdp->fd_lastfile; i++) { 1752 if (fdp->fd_ofiles[i] != NULL && 1753 (fdp->fd_ofileflags[i] & UF_EXCLOSE)) { 1754 struct file *fp; 1755 1756 knote_fdclose(td, i); 1757 /* 1758 * NULL-out descriptor prior to close to avoid 1759 * a race while close blocks. 1760 */ 1761 fp = fdp->fd_ofiles[i]; 1762 fdp->fd_ofiles[i] = NULL; 1763 fdp->fd_ofileflags[i] = 0; 1764 fdunused(fdp, i); 1765 FILEDESC_UNLOCK(fdp); 1766 (void) closef(fp, td); 1767 FILEDESC_LOCK(fdp); 1768 } 1769 } 1770 FILEDESC_UNLOCK(fdp); 1771 } 1772 1773 /* 1774 * It is unsafe for set[ug]id processes to be started with file 1775 * descriptors 0..2 closed, as these descriptors are given implicit 1776 * significance in the Standard C library. fdcheckstd() will create a 1777 * descriptor referencing /dev/null for each of stdin, stdout, and 1778 * stderr that is not already open. 1779 */ 1780 int 1781 fdcheckstd(struct thread *td) 1782 { 1783 struct nameidata nd; 1784 struct filedesc *fdp; 1785 struct file *fp; 1786 register_t retval; 1787 int fd, i, error, flags, devnull; 1788 1789 GIANT_REQUIRED; /* VFS */ 1790 1791 fdp = td->td_proc->p_fd; 1792 if (fdp == NULL) 1793 return (0); 1794 KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); 1795 devnull = -1; 1796 error = 0; 1797 for (i = 0; i < 3; i++) { 1798 if (fdp->fd_ofiles[i] != NULL) 1799 continue; 1800 if (devnull < 0) { 1801 error = falloc(td, &fp, &fd); 1802 if (error != 0) 1803 break; 1804 /* Note extra ref on `fp' held for us by falloc(). */ 1805 KASSERT(fd == i, ("oof, we didn't get our fd")); 1806 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null", 1807 td); 1808 flags = FREAD | FWRITE; 1809 error = vn_open(&nd, &flags, 0, -1); 1810 if (error != 0) { 1811 /* 1812 * Someone may have closed the entry in the 1813 * file descriptor table, so check it hasn't 1814 * changed before dropping the reference count. 1815 */ 1816 FILEDESC_LOCK(fdp); 1817 KASSERT(fdp->fd_ofiles[fd] == fp, 1818 ("table not shared, how did it change?")); 1819 fdp->fd_ofiles[fd] = NULL; 1820 fdunused(fdp, fd); 1821 FILEDESC_UNLOCK(fdp); 1822 fdrop(fp, td); 1823 fdrop(fp, td); 1824 break; 1825 } 1826 NDFREE(&nd, NDF_ONLY_PNBUF); 1827 fp->f_flag = flags; 1828 fp->f_vnode = nd.ni_vp; 1829 if (fp->f_data == NULL) 1830 fp->f_data = nd.ni_vp; 1831 if (fp->f_ops == &badfileops) 1832 fp->f_ops = &vnops; 1833 fp->f_type = DTYPE_VNODE; 1834 VOP_UNLOCK(nd.ni_vp, 0, td); 1835 devnull = fd; 1836 fdrop(fp, td); 1837 } else { 1838 error = do_dup(td, DUP_FIXED, devnull, i, &retval); 1839 if (error != 0) 1840 break; 1841 } 1842 } 1843 return (error); 1844 } 1845 1846 /* 1847 * Internal form of close. 1848 * Decrement reference count on file structure. 1849 * Note: td may be NULL when closing a file that was being passed in a 1850 * message. 1851 * 1852 * XXXRW: Giant is not required for the caller, but often will be held; this 1853 * makes it moderately likely the Giant will be recursed in the VFS case. 1854 */ 1855 int 1856 closef(struct file *fp, struct thread *td) 1857 { 1858 struct vnode *vp; 1859 struct flock lf; 1860 struct filedesc_to_leader *fdtol; 1861 struct filedesc *fdp; 1862 1863 /* 1864 * POSIX record locking dictates that any close releases ALL 1865 * locks owned by this process. This is handled by setting 1866 * a flag in the unlock to free ONLY locks obeying POSIX 1867 * semantics, and not to free BSD-style file locks. 1868 * If the descriptor was in a message, POSIX-style locks 1869 * aren't passed with the descriptor. 1870 */ 1871 if (fp->f_type == DTYPE_VNODE) { 1872 int vfslocked; 1873 1874 vp = fp->f_vnode; 1875 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1876 if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1877 lf.l_whence = SEEK_SET; 1878 lf.l_start = 0; 1879 lf.l_len = 0; 1880 lf.l_type = F_UNLCK; 1881 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, 1882 F_UNLCK, &lf, F_POSIX); 1883 } 1884 fdtol = td->td_proc->p_fdtol; 1885 if (fdtol != NULL) { 1886 /* 1887 * Handle special case where file descriptor table 1888 * is shared between multiple process leaders. 1889 */ 1890 fdp = td->td_proc->p_fd; 1891 FILEDESC_LOCK(fdp); 1892 for (fdtol = fdtol->fdl_next; 1893 fdtol != td->td_proc->p_fdtol; 1894 fdtol = fdtol->fdl_next) { 1895 if ((fdtol->fdl_leader->p_flag & 1896 P_ADVLOCK) == 0) 1897 continue; 1898 fdtol->fdl_holdcount++; 1899 FILEDESC_UNLOCK(fdp); 1900 lf.l_whence = SEEK_SET; 1901 lf.l_start = 0; 1902 lf.l_len = 0; 1903 lf.l_type = F_UNLCK; 1904 vp = fp->f_vnode; 1905 (void) VOP_ADVLOCK(vp, 1906 (caddr_t)fdtol->fdl_leader, 1907 F_UNLCK, &lf, F_POSIX); 1908 FILEDESC_LOCK(fdp); 1909 fdtol->fdl_holdcount--; 1910 if (fdtol->fdl_holdcount == 0 && 1911 fdtol->fdl_wakeup != 0) { 1912 fdtol->fdl_wakeup = 0; 1913 wakeup(fdtol); 1914 } 1915 } 1916 FILEDESC_UNLOCK(fdp); 1917 } 1918 VFS_UNLOCK_GIANT(vfslocked); 1919 } 1920 return (fdrop(fp, td)); 1921 } 1922 1923 /* 1924 * Extract the file pointer associated with the specified descriptor for 1925 * the current user process. 1926 * 1927 * If the descriptor doesn't exist, EBADF is returned. 1928 * 1929 * If the descriptor exists but doesn't match 'flags' then 1930 * return EBADF for read attempts and EINVAL for write attempts. 1931 * 1932 * If 'hold' is set (non-zero) the file's refcount will be bumped on return. 1933 * It should be droped with fdrop(). 1934 * If it is not set, then the refcount will not be bumped however the 1935 * thread's filedesc struct will be returned locked (for fgetsock). 1936 * 1937 * If an error occured the non-zero error is returned and *fpp is set to NULL. 1938 * Otherwise *fpp is set and zero is returned. 1939 */ 1940 static __inline int 1941 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold) 1942 { 1943 struct filedesc *fdp; 1944 struct file *fp; 1945 1946 *fpp = NULL; 1947 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) 1948 return (EBADF); 1949 FILEDESC_LOCK(fdp); 1950 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) { 1951 FILEDESC_UNLOCK(fdp); 1952 return (EBADF); 1953 } 1954 1955 /* 1956 * Note: FREAD failures returns EBADF to maintain backwards 1957 * compatibility with what routines returned before. 1958 * 1959 * Only one flag, or 0, may be specified. 1960 */ 1961 if (flags == FREAD && (fp->f_flag & FREAD) == 0) { 1962 FILEDESC_UNLOCK(fdp); 1963 return (EBADF); 1964 } 1965 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) { 1966 FILEDESC_UNLOCK(fdp); 1967 return (EINVAL); 1968 } 1969 if (hold) { 1970 fhold(fp); 1971 FILEDESC_UNLOCK(fdp); 1972 } 1973 *fpp = fp; 1974 return (0); 1975 } 1976 1977 int 1978 fget(struct thread *td, int fd, struct file **fpp) 1979 { 1980 1981 return(_fget(td, fd, fpp, 0, 1)); 1982 } 1983 1984 int 1985 fget_read(struct thread *td, int fd, struct file **fpp) 1986 { 1987 1988 return(_fget(td, fd, fpp, FREAD, 1)); 1989 } 1990 1991 int 1992 fget_write(struct thread *td, int fd, struct file **fpp) 1993 { 1994 1995 return(_fget(td, fd, fpp, FWRITE, 1)); 1996 } 1997 1998 /* 1999 * Like fget() but loads the underlying vnode, or returns an error if 2000 * the descriptor does not represent a vnode. Note that pipes use vnodes 2001 * but never have VM objects. The returned vnode will be vref()d. 2002 * 2003 * XXX: what about the unused flags ? 2004 */ 2005 static __inline int 2006 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags) 2007 { 2008 struct file *fp; 2009 int error; 2010 2011 *vpp = NULL; 2012 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2013 return (error); 2014 if (fp->f_vnode == NULL) { 2015 error = EINVAL; 2016 } else { 2017 *vpp = fp->f_vnode; 2018 vref(*vpp); 2019 } 2020 FILEDESC_UNLOCK(td->td_proc->p_fd); 2021 return (error); 2022 } 2023 2024 int 2025 fgetvp(struct thread *td, int fd, struct vnode **vpp) 2026 { 2027 2028 return (_fgetvp(td, fd, vpp, 0)); 2029 } 2030 2031 int 2032 fgetvp_read(struct thread *td, int fd, struct vnode **vpp) 2033 { 2034 2035 return (_fgetvp(td, fd, vpp, FREAD)); 2036 } 2037 2038 #ifdef notyet 2039 int 2040 fgetvp_write(struct thread *td, int fd, struct vnode **vpp) 2041 { 2042 2043 return (_fgetvp(td, fd, vpp, FWRITE)); 2044 } 2045 #endif 2046 2047 /* 2048 * Like fget() but loads the underlying socket, or returns an error if 2049 * the descriptor does not represent a socket. 2050 * 2051 * We bump the ref count on the returned socket. XXX Also obtain the SX 2052 * lock in the future. 2053 */ 2054 int 2055 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp) 2056 { 2057 struct file *fp; 2058 int error; 2059 2060 NET_ASSERT_GIANT(); 2061 2062 *spp = NULL; 2063 if (fflagp != NULL) 2064 *fflagp = 0; 2065 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2066 return (error); 2067 if (fp->f_type != DTYPE_SOCKET) { 2068 error = ENOTSOCK; 2069 } else { 2070 *spp = fp->f_data; 2071 if (fflagp) 2072 *fflagp = fp->f_flag; 2073 SOCK_LOCK(*spp); 2074 soref(*spp); 2075 SOCK_UNLOCK(*spp); 2076 } 2077 FILEDESC_UNLOCK(td->td_proc->p_fd); 2078 return (error); 2079 } 2080 2081 /* 2082 * Drop the reference count on the the socket and XXX release the SX lock in 2083 * the future. The last reference closes the socket. 2084 */ 2085 void 2086 fputsock(struct socket *so) 2087 { 2088 2089 NET_ASSERT_GIANT(); 2090 ACCEPT_LOCK(); 2091 SOCK_LOCK(so); 2092 sorele(so); 2093 } 2094 2095 int 2096 fdrop(struct file *fp, struct thread *td) 2097 { 2098 2099 FILE_LOCK(fp); 2100 return (fdrop_locked(fp, td)); 2101 } 2102 2103 /* 2104 * Drop reference on struct file passed in, may call closef if the 2105 * reference hits zero. 2106 * Expects struct file locked, and will unlock it. 2107 */ 2108 static int 2109 fdrop_locked(struct file *fp, struct thread *td) 2110 { 2111 int error; 2112 2113 FILE_LOCK_ASSERT(fp, MA_OWNED); 2114 2115 if (--fp->f_count > 0) { 2116 FILE_UNLOCK(fp); 2117 return (0); 2118 } 2119 /* We have the last ref so we can proceed without the file lock. */ 2120 FILE_UNLOCK(fp); 2121 if (fp->f_count < 0) 2122 panic("fdrop: count < 0"); 2123 if (fp->f_ops != &badfileops) 2124 error = fo_close(fp, td); 2125 else 2126 error = 0; 2127 2128 sx_xlock(&filelist_lock); 2129 LIST_REMOVE(fp, f_list); 2130 openfiles--; 2131 sx_xunlock(&filelist_lock); 2132 crfree(fp->f_cred); 2133 uma_zfree(file_zone, fp); 2134 2135 return (error); 2136 } 2137 2138 /* 2139 * Apply an advisory lock on a file descriptor. 2140 * 2141 * Just attempt to get a record lock of the requested type on 2142 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 2143 */ 2144 #ifndef _SYS_SYSPROTO_H_ 2145 struct flock_args { 2146 int fd; 2147 int how; 2148 }; 2149 #endif 2150 /* 2151 * MPSAFE 2152 */ 2153 /* ARGSUSED */ 2154 int 2155 flock(struct thread *td, struct flock_args *uap) 2156 { 2157 struct file *fp; 2158 struct vnode *vp; 2159 struct flock lf; 2160 int error; 2161 2162 if ((error = fget(td, uap->fd, &fp)) != 0) 2163 return (error); 2164 if (fp->f_type != DTYPE_VNODE) { 2165 fdrop(fp, td); 2166 return (EOPNOTSUPP); 2167 } 2168 2169 mtx_lock(&Giant); 2170 vp = fp->f_vnode; 2171 lf.l_whence = SEEK_SET; 2172 lf.l_start = 0; 2173 lf.l_len = 0; 2174 if (uap->how & LOCK_UN) { 2175 lf.l_type = F_UNLCK; 2176 FILE_LOCK(fp); 2177 fp->f_flag &= ~FHASLOCK; 2178 FILE_UNLOCK(fp); 2179 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 2180 goto done2; 2181 } 2182 if (uap->how & LOCK_EX) 2183 lf.l_type = F_WRLCK; 2184 else if (uap->how & LOCK_SH) 2185 lf.l_type = F_RDLCK; 2186 else { 2187 error = EBADF; 2188 goto done2; 2189 } 2190 FILE_LOCK(fp); 2191 fp->f_flag |= FHASLOCK; 2192 FILE_UNLOCK(fp); 2193 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 2194 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); 2195 done2: 2196 fdrop(fp, td); 2197 mtx_unlock(&Giant); 2198 return (error); 2199 } 2200 /* 2201 * Duplicate the specified descriptor to a free descriptor. 2202 */ 2203 int 2204 dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error) 2205 { 2206 struct file *wfp; 2207 struct file *fp; 2208 2209 /* 2210 * If the to-be-dup'd fd number is greater than the allowed number 2211 * of file descriptors, or the fd to be dup'd has already been 2212 * closed, then reject. 2213 */ 2214 FILEDESC_LOCK(fdp); 2215 if (dfd < 0 || dfd >= fdp->fd_nfiles || 2216 (wfp = fdp->fd_ofiles[dfd]) == NULL) { 2217 FILEDESC_UNLOCK(fdp); 2218 return (EBADF); 2219 } 2220 2221 /* 2222 * There are two cases of interest here. 2223 * 2224 * For ENODEV simply dup (dfd) to file descriptor 2225 * (indx) and return. 2226 * 2227 * For ENXIO steal away the file structure from (dfd) and 2228 * store it in (indx). (dfd) is effectively closed by 2229 * this operation. 2230 * 2231 * Any other error code is just returned. 2232 */ 2233 switch (error) { 2234 case ENODEV: 2235 /* 2236 * Check that the mode the file is being opened for is a 2237 * subset of the mode of the existing descriptor. 2238 */ 2239 FILE_LOCK(wfp); 2240 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { 2241 FILE_UNLOCK(wfp); 2242 FILEDESC_UNLOCK(fdp); 2243 return (EACCES); 2244 } 2245 fp = fdp->fd_ofiles[indx]; 2246 fdp->fd_ofiles[indx] = wfp; 2247 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2248 if (fp == NULL) 2249 fdused(fdp, indx); 2250 fhold_locked(wfp); 2251 FILE_UNLOCK(wfp); 2252 FILEDESC_UNLOCK(fdp); 2253 if (fp != NULL) { 2254 /* 2255 * We now own the reference to fp that the ofiles[] 2256 * array used to own. Release it. 2257 */ 2258 FILE_LOCK(fp); 2259 fdrop_locked(fp, td); 2260 } 2261 return (0); 2262 2263 case ENXIO: 2264 /* 2265 * Steal away the file pointer from dfd and stuff it into indx. 2266 */ 2267 fp = fdp->fd_ofiles[indx]; 2268 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 2269 fdp->fd_ofiles[dfd] = NULL; 2270 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2271 fdp->fd_ofileflags[dfd] = 0; 2272 fdunused(fdp, dfd); 2273 if (fp == NULL) 2274 fdused(fdp, indx); 2275 if (fp != NULL) 2276 FILE_LOCK(fp); 2277 FILEDESC_UNLOCK(fdp); 2278 2279 /* 2280 * we now own the reference to fp that the ofiles[] array 2281 * used to own. Release it. 2282 */ 2283 if (fp != NULL) 2284 fdrop_locked(fp, td); 2285 return (0); 2286 2287 default: 2288 FILEDESC_UNLOCK(fdp); 2289 return (error); 2290 } 2291 /* NOTREACHED */ 2292 } 2293 2294 /* 2295 * Scan all active processes to see if any of them have a current 2296 * or root directory of `olddp'. If so, replace them with the new 2297 * mount point. 2298 */ 2299 void 2300 mountcheckdirs(struct vnode *olddp, struct vnode *newdp) 2301 { 2302 struct filedesc *fdp; 2303 struct proc *p; 2304 int nrele; 2305 2306 if (vrefcnt(olddp) == 1) 2307 return; 2308 sx_slock(&allproc_lock); 2309 LIST_FOREACH(p, &allproc, p_list) { 2310 fdp = fdhold(p); 2311 if (fdp == NULL) 2312 continue; 2313 nrele = 0; 2314 FILEDESC_LOCK_FAST(fdp); 2315 if (fdp->fd_cdir == olddp) { 2316 vref(newdp); 2317 fdp->fd_cdir = newdp; 2318 nrele++; 2319 } 2320 if (fdp->fd_rdir == olddp) { 2321 vref(newdp); 2322 fdp->fd_rdir = newdp; 2323 nrele++; 2324 } 2325 FILEDESC_UNLOCK_FAST(fdp); 2326 fddrop(fdp); 2327 while (nrele--) 2328 vrele(olddp); 2329 } 2330 sx_sunlock(&allproc_lock); 2331 if (rootvnode == olddp) { 2332 vrele(rootvnode); 2333 vref(newdp); 2334 rootvnode = newdp; 2335 } 2336 } 2337 2338 struct filedesc_to_leader * 2339 filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader) 2340 { 2341 struct filedesc_to_leader *fdtol; 2342 2343 MALLOC(fdtol, struct filedesc_to_leader *, 2344 sizeof(struct filedesc_to_leader), 2345 M_FILEDESC_TO_LEADER, 2346 M_WAITOK); 2347 fdtol->fdl_refcount = 1; 2348 fdtol->fdl_holdcount = 0; 2349 fdtol->fdl_wakeup = 0; 2350 fdtol->fdl_leader = leader; 2351 if (old != NULL) { 2352 FILEDESC_LOCK(fdp); 2353 fdtol->fdl_next = old->fdl_next; 2354 fdtol->fdl_prev = old; 2355 old->fdl_next = fdtol; 2356 fdtol->fdl_next->fdl_prev = fdtol; 2357 FILEDESC_UNLOCK(fdp); 2358 } else { 2359 fdtol->fdl_next = fdtol; 2360 fdtol->fdl_prev = fdtol; 2361 } 2362 return (fdtol); 2363 } 2364 2365 /* 2366 * Get file structures. 2367 */ 2368 static int 2369 sysctl_kern_file(SYSCTL_HANDLER_ARGS) 2370 { 2371 struct xfile xf; 2372 struct filedesc *fdp; 2373 struct file *fp; 2374 struct proc *p; 2375 int error, n; 2376 2377 /* 2378 * Note: because the number of file descriptors is calculated 2379 * in different ways for sizing vs returning the data, 2380 * there is information leakage from the first loop. However, 2381 * it is of a similar order of magnitude to the leakage from 2382 * global system statistics such as kern.openfiles. 2383 */ 2384 error = sysctl_wire_old_buffer(req, 0); 2385 if (error != 0) 2386 return (error); 2387 if (req->oldptr == NULL) { 2388 n = 16; /* A slight overestimate. */ 2389 sx_slock(&filelist_lock); 2390 LIST_FOREACH(fp, &filehead, f_list) { 2391 /* 2392 * We should grab the lock, but this is an 2393 * estimate, so does it really matter? 2394 */ 2395 /* mtx_lock(fp->f_mtxp); */ 2396 n += fp->f_count; 2397 /* mtx_unlock(f->f_mtxp); */ 2398 } 2399 sx_sunlock(&filelist_lock); 2400 return (SYSCTL_OUT(req, 0, n * sizeof(xf))); 2401 } 2402 error = 0; 2403 bzero(&xf, sizeof(xf)); 2404 xf.xf_size = sizeof(xf); 2405 sx_slock(&allproc_lock); 2406 LIST_FOREACH(p, &allproc, p_list) { 2407 if (p->p_state == PRS_NEW) 2408 continue; 2409 PROC_LOCK(p); 2410 if (p_cansee(req->td, p) != 0) { 2411 PROC_UNLOCK(p); 2412 continue; 2413 } 2414 xf.xf_pid = p->p_pid; 2415 xf.xf_uid = p->p_ucred->cr_uid; 2416 PROC_UNLOCK(p); 2417 fdp = fdhold(p); 2418 if (fdp == NULL) 2419 continue; 2420 FILEDESC_LOCK_FAST(fdp); 2421 for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) { 2422 if ((fp = fdp->fd_ofiles[n]) == NULL) 2423 continue; 2424 xf.xf_fd = n; 2425 xf.xf_file = fp; 2426 xf.xf_data = fp->f_data; 2427 xf.xf_vnode = fp->f_vnode; 2428 xf.xf_type = fp->f_type; 2429 xf.xf_count = fp->f_count; 2430 xf.xf_msgcount = fp->f_msgcount; 2431 xf.xf_offset = fp->f_offset; 2432 xf.xf_flag = fp->f_flag; 2433 error = SYSCTL_OUT(req, &xf, sizeof(xf)); 2434 if (error) 2435 break; 2436 } 2437 FILEDESC_UNLOCK_FAST(fdp); 2438 fddrop(fdp); 2439 if (error) 2440 break; 2441 } 2442 sx_sunlock(&allproc_lock); 2443 return (error); 2444 } 2445 2446 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, 2447 0, 0, sysctl_kern_file, "S,xfile", "Entire file table"); 2448 2449 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, 2450 &maxfilesperproc, 0, "Maximum files allowed open per process"); 2451 2452 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, 2453 &maxfiles, 0, "Maximum number of files"); 2454 2455 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, 2456 &openfiles, 0, "System-wide number of open files"); 2457 2458 /* ARGSUSED*/ 2459 static void 2460 filelistinit(void *dummy) 2461 { 2462 2463 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, 2464 NULL, NULL, UMA_ALIGN_PTR, 0); 2465 sx_init(&filelist_lock, "filelist lock"); 2466 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); 2467 mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF); 2468 } 2469 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL) 2470 2471 /*-------------------------------------------------------------------*/ 2472 2473 static int 2474 badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) 2475 { 2476 2477 return (EBADF); 2478 } 2479 2480 static int 2481 badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td) 2482 { 2483 2484 return (EBADF); 2485 } 2486 2487 static int 2488 badfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) 2489 { 2490 2491 return (0); 2492 } 2493 2494 static int 2495 badfo_kqfilter(struct file *fp, struct knote *kn) 2496 { 2497 2498 return (0); 2499 } 2500 2501 static int 2502 badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td) 2503 { 2504 2505 return (EBADF); 2506 } 2507 2508 static int 2509 badfo_close(struct file *fp, struct thread *td) 2510 { 2511 2512 return (EBADF); 2513 } 2514 2515 struct fileops badfileops = { 2516 .fo_read = badfo_readwrite, 2517 .fo_write = badfo_readwrite, 2518 .fo_ioctl = badfo_ioctl, 2519 .fo_poll = badfo_poll, 2520 .fo_kqfilter = badfo_kqfilter, 2521 .fo_stat = badfo_stat, 2522 .fo_close = badfo_close, 2523 }; 2524 2525 2526 /*-------------------------------------------------------------------*/ 2527 2528 /* 2529 * File Descriptor pseudo-device driver (/dev/fd/). 2530 * 2531 * Opening minor device N dup()s the file (if any) connected to file 2532 * descriptor N belonging to the calling process. Note that this driver 2533 * consists of only the ``open()'' routine, because all subsequent 2534 * references to this file will be direct to the other driver. 2535 * 2536 * XXX: we could give this one a cloning event handler if necessary. 2537 */ 2538 2539 /* ARGSUSED */ 2540 static int 2541 fdopen(struct cdev *dev, int mode, int type, struct thread *td) 2542 { 2543 2544 /* 2545 * XXX Kludge: set curthread->td_dupfd to contain the value of the 2546 * the file descriptor being sought for duplication. The error 2547 * return ensures that the vnode for this device will be released 2548 * by vn_open. Open will detect this special error and take the 2549 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 2550 * will simply report the error. 2551 */ 2552 td->td_dupfd = dev2unit(dev); 2553 return (ENODEV); 2554 } 2555 2556 static struct cdevsw fildesc_cdevsw = { 2557 .d_version = D_VERSION, 2558 .d_flags = D_NEEDGIANT, 2559 .d_open = fdopen, 2560 .d_name = "FD", 2561 }; 2562 2563 static void 2564 fildesc_drvinit(void *unused) 2565 { 2566 struct cdev *dev; 2567 2568 dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0"); 2569 make_dev_alias(dev, "stdin"); 2570 dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1"); 2571 make_dev_alias(dev, "stdout"); 2572 dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2"); 2573 make_dev_alias(dev, "stderr"); 2574 } 2575 2576 SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL) 2577