1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_compat.h" 41 #include "opt_ddb.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 46 #include <sys/conf.h> 47 #include <sys/fcntl.h> 48 #include <sys/file.h> 49 #include <sys/filedesc.h> 50 #include <sys/filio.h> 51 #include <sys/jail.h> 52 #include <sys/kernel.h> 53 #include <sys/limits.h> 54 #include <sys/lock.h> 55 #include <sys/malloc.h> 56 #include <sys/mount.h> 57 #include <sys/mqueue.h> 58 #include <sys/mutex.h> 59 #include <sys/namei.h> 60 #include <sys/priv.h> 61 #include <sys/proc.h> 62 #include <sys/resourcevar.h> 63 #include <sys/signalvar.h> 64 #include <sys/socketvar.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/syscallsubr.h> 68 #include <sys/sysctl.h> 69 #include <sys/sysproto.h> 70 #include <sys/unistd.h> 71 #include <sys/vnode.h> 72 73 #include <security/audit/audit.h> 74 75 #include <vm/uma.h> 76 77 #include <ddb/ddb.h> 78 79 static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table"); 80 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader", 81 "file desc to leader structures"); 82 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); 83 84 static uma_zone_t file_zone; 85 86 87 /* How to treat 'new' parameter when allocating a fd for do_dup(). */ 88 enum dup_type { DUP_VARIABLE, DUP_FIXED }; 89 90 static int do_dup(struct thread *td, enum dup_type type, int old, int new, 91 register_t *retval); 92 static int fd_first_free(struct filedesc *, int, int); 93 static int fd_last_used(struct filedesc *, int, int); 94 static void fdgrowtable(struct filedesc *, int); 95 static int fdrop_locked(struct file *fp, struct thread *td); 96 static void fdunused(struct filedesc *fdp, int fd); 97 static void fdused(struct filedesc *fdp, int fd); 98 99 /* 100 * A process is initially started out with NDFILE descriptors stored within 101 * this structure, selected to be enough for typical applications based on 102 * the historical limit of 20 open files (and the usage of descriptors by 103 * shells). If these descriptors are exhausted, a larger descriptor table 104 * may be allocated, up to a process' resource limit; the internal arrays 105 * are then unused. 106 */ 107 #define NDFILE 20 108 #define NDSLOTSIZE sizeof(NDSLOTTYPE) 109 #define NDENTRIES (NDSLOTSIZE * __CHAR_BIT) 110 #define NDSLOT(x) ((x) / NDENTRIES) 111 #define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES)) 112 #define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES) 113 114 /* 115 * Storage required per open file descriptor. 116 */ 117 #define OFILESIZE (sizeof(struct file *) + sizeof(char)) 118 119 /* 120 * Basic allocation of descriptors: 121 * one of the above, plus arrays for NDFILE descriptors. 122 */ 123 struct filedesc0 { 124 struct filedesc fd_fd; 125 /* 126 * These arrays are used when the number of open files is 127 * <= NDFILE, and are then pointed to by the pointers above. 128 */ 129 struct file *fd_dfiles[NDFILE]; 130 char fd_dfileflags[NDFILE]; 131 NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; 132 }; 133 134 /* 135 * Descriptor management. 136 */ 137 struct filelist filehead; /* head of list of open files */ 138 int openfiles; /* actual number of open files */ 139 struct sx filelist_lock; /* sx to protect filelist */ 140 struct mtx sigio_lock; /* mtx to protect pointers to sigio */ 141 void (*mq_fdclose)(struct thread *td, int fd, struct file *fp); 142 143 /* A mutex to protect the association between a proc and filedesc. */ 144 static struct mtx fdesc_mtx; 145 146 /* 147 * Find the first zero bit in the given bitmap, starting at low and not 148 * exceeding size - 1. 149 */ 150 static int 151 fd_first_free(struct filedesc *fdp, int low, int size) 152 { 153 NDSLOTTYPE *map = fdp->fd_map; 154 NDSLOTTYPE mask; 155 int off, maxoff; 156 157 if (low >= size) 158 return (low); 159 160 off = NDSLOT(low); 161 if (low % NDENTRIES) { 162 mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES))); 163 if ((mask &= ~map[off]) != 0UL) 164 return (off * NDENTRIES + ffsl(mask) - 1); 165 ++off; 166 } 167 for (maxoff = NDSLOTS(size); off < maxoff; ++off) 168 if (map[off] != ~0UL) 169 return (off * NDENTRIES + ffsl(~map[off]) - 1); 170 return (size); 171 } 172 173 /* 174 * Find the highest non-zero bit in the given bitmap, starting at low and 175 * not exceeding size - 1. 176 */ 177 static int 178 fd_last_used(struct filedesc *fdp, int low, int size) 179 { 180 NDSLOTTYPE *map = fdp->fd_map; 181 NDSLOTTYPE mask; 182 int off, minoff; 183 184 if (low >= size) 185 return (-1); 186 187 off = NDSLOT(size); 188 if (size % NDENTRIES) { 189 mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES)); 190 if ((mask &= map[off]) != 0) 191 return (off * NDENTRIES + flsl(mask) - 1); 192 --off; 193 } 194 for (minoff = NDSLOT(low); off >= minoff; --off) 195 if (map[off] != 0) 196 return (off * NDENTRIES + flsl(map[off]) - 1); 197 return (low - 1); 198 } 199 200 static int 201 fdisused(struct filedesc *fdp, int fd) 202 { 203 KASSERT(fd >= 0 && fd < fdp->fd_nfiles, 204 ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles)); 205 return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0); 206 } 207 208 /* 209 * Mark a file descriptor as used. 210 */ 211 static void 212 fdused(struct filedesc *fdp, int fd) 213 { 214 215 FILEDESC_XLOCK_ASSERT(fdp); 216 KASSERT(!fdisused(fdp, fd), 217 ("fd already used")); 218 219 fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd); 220 if (fd > fdp->fd_lastfile) 221 fdp->fd_lastfile = fd; 222 if (fd == fdp->fd_freefile) 223 fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles); 224 } 225 226 /* 227 * Mark a file descriptor as unused. 228 */ 229 static void 230 fdunused(struct filedesc *fdp, int fd) 231 { 232 233 FILEDESC_XLOCK_ASSERT(fdp); 234 KASSERT(fdisused(fdp, fd), 235 ("fd is already unused")); 236 KASSERT(fdp->fd_ofiles[fd] == NULL, 237 ("fd is still in use")); 238 239 fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); 240 if (fd < fdp->fd_freefile) 241 fdp->fd_freefile = fd; 242 if (fd == fdp->fd_lastfile) 243 fdp->fd_lastfile = fd_last_used(fdp, 0, fd); 244 } 245 246 /* 247 * System calls on descriptors. 248 */ 249 #ifndef _SYS_SYSPROTO_H_ 250 struct getdtablesize_args { 251 int dummy; 252 }; 253 #endif 254 /* ARGSUSED */ 255 int 256 getdtablesize(struct thread *td, struct getdtablesize_args *uap) 257 { 258 struct proc *p = td->td_proc; 259 260 PROC_LOCK(p); 261 td->td_retval[0] = 262 min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 263 PROC_UNLOCK(p); 264 return (0); 265 } 266 267 /* 268 * Duplicate a file descriptor to a particular value. 269 * 270 * Note: keep in mind that a potential race condition exists when closing 271 * descriptors from a shared descriptor table (via rfork). 272 */ 273 #ifndef _SYS_SYSPROTO_H_ 274 struct dup2_args { 275 u_int from; 276 u_int to; 277 }; 278 #endif 279 /* ARGSUSED */ 280 int 281 dup2(struct thread *td, struct dup2_args *uap) 282 { 283 284 return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to, 285 td->td_retval)); 286 } 287 288 /* 289 * Duplicate a file descriptor. 290 */ 291 #ifndef _SYS_SYSPROTO_H_ 292 struct dup_args { 293 u_int fd; 294 }; 295 #endif 296 /* ARGSUSED */ 297 int 298 dup(struct thread *td, struct dup_args *uap) 299 { 300 301 return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval)); 302 } 303 304 /* 305 * The file control system call. 306 */ 307 #ifndef _SYS_SYSPROTO_H_ 308 struct fcntl_args { 309 int fd; 310 int cmd; 311 long arg; 312 }; 313 #endif 314 /* ARGSUSED */ 315 int 316 fcntl(struct thread *td, struct fcntl_args *uap) 317 { 318 struct flock fl; 319 intptr_t arg; 320 int error; 321 322 error = 0; 323 switch (uap->cmd) { 324 case F_GETLK: 325 case F_SETLK: 326 case F_SETLKW: 327 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl)); 328 arg = (intptr_t)&fl; 329 break; 330 default: 331 arg = uap->arg; 332 break; 333 } 334 if (error) 335 return (error); 336 error = kern_fcntl(td, uap->fd, uap->cmd, arg); 337 if (error) 338 return (error); 339 if (uap->cmd == F_GETLK) 340 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl)); 341 return (error); 342 } 343 344 int 345 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) 346 { 347 struct filedesc *fdp; 348 struct flock *flp; 349 struct file *fp; 350 struct proc *p; 351 char *pop; 352 struct vnode *vp; 353 u_int newmin; 354 int error, flg, tmp; 355 int giant_locked; 356 357 /* 358 * XXXRW: Some fcntl() calls require Giant -- others don't. Try to 359 * avoid grabbing Giant for calls we know don't need it. 360 */ 361 switch (cmd) { 362 case F_DUPFD: 363 case F_GETFD: 364 case F_SETFD: 365 case F_GETFL: 366 giant_locked = 0; 367 break; 368 369 default: 370 giant_locked = 1; 371 mtx_lock(&Giant); 372 } 373 374 error = 0; 375 flg = F_POSIX; 376 p = td->td_proc; 377 fdp = p->p_fd; 378 379 /* 380 * XXXRW: It could be an exclusive lock is not [always] needed here. 381 */ 382 FILEDESC_XLOCK(fdp); 383 if ((unsigned)fd >= fdp->fd_nfiles || 384 (fp = fdp->fd_ofiles[fd]) == NULL) { 385 FILEDESC_XUNLOCK(fdp); 386 error = EBADF; 387 goto done2; 388 } 389 pop = &fdp->fd_ofileflags[fd]; 390 391 switch (cmd) { 392 case F_DUPFD: 393 /* mtx_assert(&Giant, MA_NOTOWNED); */ 394 FILEDESC_XUNLOCK(fdp); 395 newmin = arg; 396 PROC_LOCK(p); 397 if (newmin >= lim_cur(p, RLIMIT_NOFILE) || 398 newmin >= maxfilesperproc) { 399 PROC_UNLOCK(p); 400 error = EINVAL; 401 break; 402 } 403 PROC_UNLOCK(p); 404 error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval); 405 break; 406 407 case F_GETFD: 408 /* mtx_assert(&Giant, MA_NOTOWNED); */ 409 td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; 410 FILEDESC_XUNLOCK(fdp); 411 break; 412 413 case F_SETFD: 414 /* mtx_assert(&Giant, MA_NOTOWNED); */ 415 *pop = (*pop &~ UF_EXCLOSE) | 416 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); 417 FILEDESC_XUNLOCK(fdp); 418 break; 419 420 case F_GETFL: 421 /* mtx_assert(&Giant, MA_NOTOWNED); */ 422 FILE_LOCK(fp); 423 td->td_retval[0] = OFLAGS(fp->f_flag); 424 FILE_UNLOCK(fp); 425 FILEDESC_XUNLOCK(fdp); 426 break; 427 428 case F_SETFL: 429 mtx_assert(&Giant, MA_OWNED); 430 FILE_LOCK(fp); 431 fhold_locked(fp); 432 fp->f_flag &= ~FCNTLFLAGS; 433 fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; 434 FILE_UNLOCK(fp); 435 FILEDESC_XUNLOCK(fdp); 436 tmp = fp->f_flag & FNONBLOCK; 437 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 438 if (error) { 439 fdrop(fp, td); 440 break; 441 } 442 tmp = fp->f_flag & FASYNC; 443 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); 444 if (error == 0) { 445 fdrop(fp, td); 446 break; 447 } 448 FILE_LOCK(fp); 449 fp->f_flag &= ~FNONBLOCK; 450 FILE_UNLOCK(fp); 451 tmp = 0; 452 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 453 fdrop(fp, td); 454 break; 455 456 case F_GETOWN: 457 mtx_assert(&Giant, MA_OWNED); 458 fhold(fp); 459 FILEDESC_XUNLOCK(fdp); 460 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); 461 if (error == 0) 462 td->td_retval[0] = tmp; 463 fdrop(fp, td); 464 break; 465 466 case F_SETOWN: 467 mtx_assert(&Giant, MA_OWNED); 468 fhold(fp); 469 FILEDESC_XUNLOCK(fdp); 470 tmp = arg; 471 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); 472 fdrop(fp, td); 473 break; 474 475 case F_SETLKW: 476 mtx_assert(&Giant, MA_OWNED); 477 flg |= F_WAIT; 478 /* FALLTHROUGH F_SETLK */ 479 480 case F_SETLK: 481 mtx_assert(&Giant, MA_OWNED); 482 if (fp->f_type != DTYPE_VNODE) { 483 FILEDESC_XUNLOCK(fdp); 484 error = EBADF; 485 break; 486 } 487 488 flp = (struct flock *)arg; 489 if (flp->l_whence == SEEK_CUR) { 490 if (fp->f_offset < 0 || 491 (flp->l_start > 0 && 492 fp->f_offset > OFF_MAX - flp->l_start)) { 493 FILEDESC_XUNLOCK(fdp); 494 error = EOVERFLOW; 495 break; 496 } 497 flp->l_start += fp->f_offset; 498 } 499 500 /* 501 * VOP_ADVLOCK() may block. 502 */ 503 fhold(fp); 504 FILEDESC_XUNLOCK(fdp); 505 vp = fp->f_vnode; 506 507 switch (flp->l_type) { 508 case F_RDLCK: 509 if ((fp->f_flag & FREAD) == 0) { 510 error = EBADF; 511 break; 512 } 513 PROC_LOCK(p->p_leader); 514 p->p_leader->p_flag |= P_ADVLOCK; 515 PROC_UNLOCK(p->p_leader); 516 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 517 flp, flg); 518 break; 519 case F_WRLCK: 520 if ((fp->f_flag & FWRITE) == 0) { 521 error = EBADF; 522 break; 523 } 524 PROC_LOCK(p->p_leader); 525 p->p_leader->p_flag |= P_ADVLOCK; 526 PROC_UNLOCK(p->p_leader); 527 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 528 flp, flg); 529 break; 530 case F_UNLCK: 531 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, 532 flp, F_POSIX); 533 break; 534 default: 535 error = EINVAL; 536 break; 537 } 538 /* Check for race with close */ 539 FILEDESC_XLOCK(fdp); 540 if ((unsigned) fd >= fdp->fd_nfiles || 541 fp != fdp->fd_ofiles[fd]) { 542 FILEDESC_XUNLOCK(fdp); 543 flp->l_whence = SEEK_SET; 544 flp->l_start = 0; 545 flp->l_len = 0; 546 flp->l_type = F_UNLCK; 547 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 548 F_UNLCK, flp, F_POSIX); 549 } else 550 FILEDESC_XUNLOCK(fdp); 551 fdrop(fp, td); 552 break; 553 554 case F_GETLK: 555 mtx_assert(&Giant, MA_OWNED); 556 if (fp->f_type != DTYPE_VNODE) { 557 FILEDESC_XUNLOCK(fdp); 558 error = EBADF; 559 break; 560 } 561 flp = (struct flock *)arg; 562 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && 563 flp->l_type != F_UNLCK) { 564 FILEDESC_XUNLOCK(fdp); 565 error = EINVAL; 566 break; 567 } 568 if (flp->l_whence == SEEK_CUR) { 569 if ((flp->l_start > 0 && 570 fp->f_offset > OFF_MAX - flp->l_start) || 571 (flp->l_start < 0 && 572 fp->f_offset < OFF_MIN - flp->l_start)) { 573 FILEDESC_XUNLOCK(fdp); 574 error = EOVERFLOW; 575 break; 576 } 577 flp->l_start += fp->f_offset; 578 } 579 /* 580 * VOP_ADVLOCK() may block. 581 */ 582 fhold(fp); 583 FILEDESC_XUNLOCK(fdp); 584 vp = fp->f_vnode; 585 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, 586 F_POSIX); 587 fdrop(fp, td); 588 break; 589 default: 590 FILEDESC_XUNLOCK(fdp); 591 error = EINVAL; 592 break; 593 } 594 done2: 595 if (giant_locked) 596 mtx_unlock(&Giant); 597 return (error); 598 } 599 600 /* 601 * Common code for dup, dup2, and fcntl(F_DUPFD). 602 */ 603 static int 604 do_dup(struct thread *td, enum dup_type type, int old, int new, 605 register_t *retval) 606 { 607 struct filedesc *fdp; 608 struct proc *p; 609 struct file *fp; 610 struct file *delfp; 611 int error, holdleaders, maxfd; 612 613 KASSERT((type == DUP_VARIABLE || type == DUP_FIXED), 614 ("invalid dup type %d", type)); 615 616 p = td->td_proc; 617 fdp = p->p_fd; 618 619 /* 620 * Verify we have a valid descriptor to dup from and possibly to 621 * dup to. 622 */ 623 if (old < 0 || new < 0) 624 return (EBADF); 625 PROC_LOCK(p); 626 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 627 PROC_UNLOCK(p); 628 if (new >= maxfd) 629 return (EMFILE); 630 631 FILEDESC_XLOCK(fdp); 632 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) { 633 FILEDESC_XUNLOCK(fdp); 634 return (EBADF); 635 } 636 if (type == DUP_FIXED && old == new) { 637 *retval = new; 638 FILEDESC_XUNLOCK(fdp); 639 return (0); 640 } 641 fp = fdp->fd_ofiles[old]; 642 fhold(fp); 643 644 /* 645 * If the caller specified a file descriptor, make sure the file 646 * table is large enough to hold it, and grab it. Otherwise, just 647 * allocate a new descriptor the usual way. Since the filedesc 648 * lock may be temporarily dropped in the process, we have to look 649 * out for a race. 650 */ 651 if (type == DUP_FIXED) { 652 if (new >= fdp->fd_nfiles) 653 fdgrowtable(fdp, new + 1); 654 if (fdp->fd_ofiles[new] == NULL) 655 fdused(fdp, new); 656 } else { 657 if ((error = fdalloc(td, new, &new)) != 0) { 658 FILEDESC_XUNLOCK(fdp); 659 fdrop(fp, td); 660 return (error); 661 } 662 } 663 664 /* 665 * If the old file changed out from under us then treat it as a 666 * bad file descriptor. Userland should do its own locking to 667 * avoid this case. 668 */ 669 if (fdp->fd_ofiles[old] != fp || 670 (fdp->fd_ofileflags[old] & UF_OPENING) != 0 || 671 (fdp->fd_ofileflags[new] & UF_OPENING) != 0) { 672 /* we've allocated a descriptor which we won't use */ 673 if (fdp->fd_ofiles[new] == NULL) 674 fdunused(fdp, new); 675 FILEDESC_XUNLOCK(fdp); 676 fdrop(fp, td); 677 return (EBADF); 678 } 679 KASSERT(old != new, 680 ("new fd is same as old")); 681 682 /* 683 * Save info on the descriptor being overwritten. We cannot close 684 * it without introducing an ownership race for the slot, since we 685 * need to drop the filedesc lock to call closef(). 686 * 687 * XXX this duplicates parts of close(). 688 */ 689 delfp = fdp->fd_ofiles[new]; 690 holdleaders = 0; 691 if (delfp != NULL) { 692 if (td->td_proc->p_fdtol != NULL) { 693 /* 694 * Ask fdfree() to sleep to ensure that all relevant 695 * process leaders can be traversed in closef(). 696 */ 697 fdp->fd_holdleaderscount++; 698 holdleaders = 1; 699 } 700 } 701 702 /* 703 * Duplicate the source descriptor 704 */ 705 fdp->fd_ofiles[new] = fp; 706 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; 707 if (new > fdp->fd_lastfile) 708 fdp->fd_lastfile = new; 709 *retval = new; 710 711 /* 712 * If we dup'd over a valid file, we now own the reference to it 713 * and must dispose of it using closef() semantics (as if a 714 * close() were performed on it). 715 * 716 * XXX this duplicates parts of close(). 717 */ 718 if (delfp != NULL) { 719 knote_fdclose(td, new); 720 if (delfp->f_type == DTYPE_MQUEUE) 721 mq_fdclose(td, new, delfp); 722 FILEDESC_XUNLOCK(fdp); 723 (void) closef(delfp, td); 724 if (holdleaders) { 725 FILEDESC_XLOCK(fdp); 726 fdp->fd_holdleaderscount--; 727 if (fdp->fd_holdleaderscount == 0 && 728 fdp->fd_holdleaderswakeup != 0) { 729 fdp->fd_holdleaderswakeup = 0; 730 wakeup(&fdp->fd_holdleaderscount); 731 } 732 FILEDESC_XUNLOCK(fdp); 733 } 734 } else { 735 FILEDESC_XUNLOCK(fdp); 736 } 737 return (0); 738 } 739 740 /* 741 * If sigio is on the list associated with a process or process group, 742 * disable signalling from the device, remove sigio from the list and 743 * free sigio. 744 */ 745 void 746 funsetown(struct sigio **sigiop) 747 { 748 struct sigio *sigio; 749 750 SIGIO_LOCK(); 751 sigio = *sigiop; 752 if (sigio == NULL) { 753 SIGIO_UNLOCK(); 754 return; 755 } 756 *(sigio->sio_myref) = NULL; 757 if ((sigio)->sio_pgid < 0) { 758 struct pgrp *pg = (sigio)->sio_pgrp; 759 PGRP_LOCK(pg); 760 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, 761 sigio, sio_pgsigio); 762 PGRP_UNLOCK(pg); 763 } else { 764 struct proc *p = (sigio)->sio_proc; 765 PROC_LOCK(p); 766 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, 767 sigio, sio_pgsigio); 768 PROC_UNLOCK(p); 769 } 770 SIGIO_UNLOCK(); 771 crfree(sigio->sio_ucred); 772 FREE(sigio, M_SIGIO); 773 } 774 775 /* 776 * Free a list of sigio structures. 777 * We only need to lock the SIGIO_LOCK because we have made ourselves 778 * inaccessible to callers of fsetown and therefore do not need to lock 779 * the proc or pgrp struct for the list manipulation. 780 */ 781 void 782 funsetownlst(struct sigiolst *sigiolst) 783 { 784 struct proc *p; 785 struct pgrp *pg; 786 struct sigio *sigio; 787 788 sigio = SLIST_FIRST(sigiolst); 789 if (sigio == NULL) 790 return; 791 p = NULL; 792 pg = NULL; 793 794 /* 795 * Every entry of the list should belong 796 * to a single proc or pgrp. 797 */ 798 if (sigio->sio_pgid < 0) { 799 pg = sigio->sio_pgrp; 800 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED); 801 } else /* if (sigio->sio_pgid > 0) */ { 802 p = sigio->sio_proc; 803 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 804 } 805 806 SIGIO_LOCK(); 807 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) { 808 *(sigio->sio_myref) = NULL; 809 if (pg != NULL) { 810 KASSERT(sigio->sio_pgid < 0, 811 ("Proc sigio in pgrp sigio list")); 812 KASSERT(sigio->sio_pgrp == pg, 813 ("Bogus pgrp in sigio list")); 814 PGRP_LOCK(pg); 815 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, 816 sio_pgsigio); 817 PGRP_UNLOCK(pg); 818 } else /* if (p != NULL) */ { 819 KASSERT(sigio->sio_pgid > 0, 820 ("Pgrp sigio in proc sigio list")); 821 KASSERT(sigio->sio_proc == p, 822 ("Bogus proc in sigio list")); 823 PROC_LOCK(p); 824 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, 825 sio_pgsigio); 826 PROC_UNLOCK(p); 827 } 828 SIGIO_UNLOCK(); 829 crfree(sigio->sio_ucred); 830 FREE(sigio, M_SIGIO); 831 SIGIO_LOCK(); 832 } 833 SIGIO_UNLOCK(); 834 } 835 836 /* 837 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). 838 * 839 * After permission checking, add a sigio structure to the sigio list for 840 * the process or process group. 841 */ 842 int 843 fsetown(pid_t pgid, struct sigio **sigiop) 844 { 845 struct proc *proc; 846 struct pgrp *pgrp; 847 struct sigio *sigio; 848 int ret; 849 850 if (pgid == 0) { 851 funsetown(sigiop); 852 return (0); 853 } 854 855 ret = 0; 856 857 /* Allocate and fill in the new sigio out of locks. */ 858 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK); 859 sigio->sio_pgid = pgid; 860 sigio->sio_ucred = crhold(curthread->td_ucred); 861 sigio->sio_myref = sigiop; 862 863 sx_slock(&proctree_lock); 864 if (pgid > 0) { 865 proc = pfind(pgid); 866 if (proc == NULL) { 867 ret = ESRCH; 868 goto fail; 869 } 870 871 /* 872 * Policy - Don't allow a process to FSETOWN a process 873 * in another session. 874 * 875 * Remove this test to allow maximum flexibility or 876 * restrict FSETOWN to the current process or process 877 * group for maximum safety. 878 */ 879 PROC_UNLOCK(proc); 880 if (proc->p_session != curthread->td_proc->p_session) { 881 ret = EPERM; 882 goto fail; 883 } 884 885 pgrp = NULL; 886 } else /* if (pgid < 0) */ { 887 pgrp = pgfind(-pgid); 888 if (pgrp == NULL) { 889 ret = ESRCH; 890 goto fail; 891 } 892 PGRP_UNLOCK(pgrp); 893 894 /* 895 * Policy - Don't allow a process to FSETOWN a process 896 * in another session. 897 * 898 * Remove this test to allow maximum flexibility or 899 * restrict FSETOWN to the current process or process 900 * group for maximum safety. 901 */ 902 if (pgrp->pg_session != curthread->td_proc->p_session) { 903 ret = EPERM; 904 goto fail; 905 } 906 907 proc = NULL; 908 } 909 funsetown(sigiop); 910 if (pgid > 0) { 911 PROC_LOCK(proc); 912 /* 913 * Since funsetownlst() is called without the proctree 914 * locked, we need to check for P_WEXIT. 915 * XXX: is ESRCH correct? 916 */ 917 if ((proc->p_flag & P_WEXIT) != 0) { 918 PROC_UNLOCK(proc); 919 ret = ESRCH; 920 goto fail; 921 } 922 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); 923 sigio->sio_proc = proc; 924 PROC_UNLOCK(proc); 925 } else { 926 PGRP_LOCK(pgrp); 927 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); 928 sigio->sio_pgrp = pgrp; 929 PGRP_UNLOCK(pgrp); 930 } 931 sx_sunlock(&proctree_lock); 932 SIGIO_LOCK(); 933 *sigiop = sigio; 934 SIGIO_UNLOCK(); 935 return (0); 936 937 fail: 938 sx_sunlock(&proctree_lock); 939 crfree(sigio->sio_ucred); 940 FREE(sigio, M_SIGIO); 941 return (ret); 942 } 943 944 /* 945 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). 946 */ 947 pid_t 948 fgetown(sigiop) 949 struct sigio **sigiop; 950 { 951 pid_t pgid; 952 953 SIGIO_LOCK(); 954 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0; 955 SIGIO_UNLOCK(); 956 return (pgid); 957 } 958 959 /* 960 * Close a file descriptor. 961 */ 962 #ifndef _SYS_SYSPROTO_H_ 963 struct close_args { 964 int fd; 965 }; 966 #endif 967 /* ARGSUSED */ 968 int 969 close(td, uap) 970 struct thread *td; 971 struct close_args *uap; 972 { 973 974 return (kern_close(td, uap->fd)); 975 } 976 977 int 978 kern_close(td, fd) 979 struct thread *td; 980 int fd; 981 { 982 struct filedesc *fdp; 983 struct file *fp; 984 int error; 985 int holdleaders; 986 987 error = 0; 988 holdleaders = 0; 989 fdp = td->td_proc->p_fd; 990 991 AUDIT_SYSCLOSE(td, fd); 992 993 FILEDESC_XLOCK(fdp); 994 if ((unsigned)fd >= fdp->fd_nfiles || 995 (fp = fdp->fd_ofiles[fd]) == NULL || 996 (fdp->fd_ofileflags[fd] & UF_OPENING) != 0) { 997 FILEDESC_XUNLOCK(fdp); 998 return (EBADF); 999 } 1000 fdp->fd_ofiles[fd] = NULL; 1001 fdp->fd_ofileflags[fd] = 0; 1002 fdunused(fdp, fd); 1003 if (td->td_proc->p_fdtol != NULL) { 1004 /* 1005 * Ask fdfree() to sleep to ensure that all relevant 1006 * process leaders can be traversed in closef(). 1007 */ 1008 fdp->fd_holdleaderscount++; 1009 holdleaders = 1; 1010 } 1011 1012 /* 1013 * We now hold the fp reference that used to be owned by the 1014 * descriptor array. We have to unlock the FILEDESC *AFTER* 1015 * knote_fdclose to prevent a race of the fd getting opened, a knote 1016 * added, and deleteing a knote for the new fd. 1017 */ 1018 knote_fdclose(td, fd); 1019 if (fp->f_type == DTYPE_MQUEUE) 1020 mq_fdclose(td, fd, fp); 1021 FILEDESC_XUNLOCK(fdp); 1022 1023 error = closef(fp, td); 1024 if (holdleaders) { 1025 FILEDESC_XLOCK(fdp); 1026 fdp->fd_holdleaderscount--; 1027 if (fdp->fd_holdleaderscount == 0 && 1028 fdp->fd_holdleaderswakeup != 0) { 1029 fdp->fd_holdleaderswakeup = 0; 1030 wakeup(&fdp->fd_holdleaderscount); 1031 } 1032 FILEDESC_XUNLOCK(fdp); 1033 } 1034 return (error); 1035 } 1036 1037 #if defined(COMPAT_43) 1038 /* 1039 * Return status information about a file descriptor. 1040 */ 1041 #ifndef _SYS_SYSPROTO_H_ 1042 struct ofstat_args { 1043 int fd; 1044 struct ostat *sb; 1045 }; 1046 #endif 1047 /* ARGSUSED */ 1048 int 1049 ofstat(struct thread *td, struct ofstat_args *uap) 1050 { 1051 struct ostat oub; 1052 struct stat ub; 1053 int error; 1054 1055 error = kern_fstat(td, uap->fd, &ub); 1056 if (error == 0) { 1057 cvtstat(&ub, &oub); 1058 error = copyout(&oub, uap->sb, sizeof(oub)); 1059 } 1060 return (error); 1061 } 1062 #endif /* COMPAT_43 */ 1063 1064 /* 1065 * Return status information about a file descriptor. 1066 */ 1067 #ifndef _SYS_SYSPROTO_H_ 1068 struct fstat_args { 1069 int fd; 1070 struct stat *sb; 1071 }; 1072 #endif 1073 /* ARGSUSED */ 1074 int 1075 fstat(struct thread *td, struct fstat_args *uap) 1076 { 1077 struct stat ub; 1078 int error; 1079 1080 error = kern_fstat(td, uap->fd, &ub); 1081 if (error == 0) 1082 error = copyout(&ub, uap->sb, sizeof(ub)); 1083 return (error); 1084 } 1085 1086 int 1087 kern_fstat(struct thread *td, int fd, struct stat *sbp) 1088 { 1089 struct file *fp; 1090 int error; 1091 1092 AUDIT_ARG(fd, fd); 1093 1094 if ((error = fget(td, fd, &fp)) != 0) 1095 return (error); 1096 1097 AUDIT_ARG(file, td->td_proc, fp); 1098 1099 error = fo_stat(fp, sbp, td->td_ucred, td); 1100 fdrop(fp, td); 1101 return (error); 1102 } 1103 1104 /* 1105 * Return status information about a file descriptor. 1106 */ 1107 #ifndef _SYS_SYSPROTO_H_ 1108 struct nfstat_args { 1109 int fd; 1110 struct nstat *sb; 1111 }; 1112 #endif 1113 /* ARGSUSED */ 1114 int 1115 nfstat(struct thread *td, struct nfstat_args *uap) 1116 { 1117 struct nstat nub; 1118 struct stat ub; 1119 int error; 1120 1121 error = kern_fstat(td, uap->fd, &ub); 1122 if (error == 0) { 1123 cvtnstat(&ub, &nub); 1124 error = copyout(&nub, uap->sb, sizeof(nub)); 1125 } 1126 return (error); 1127 } 1128 1129 /* 1130 * Return pathconf information about a file descriptor. 1131 */ 1132 #ifndef _SYS_SYSPROTO_H_ 1133 struct fpathconf_args { 1134 int fd; 1135 int name; 1136 }; 1137 #endif 1138 /* ARGSUSED */ 1139 int 1140 fpathconf(struct thread *td, struct fpathconf_args *uap) 1141 { 1142 struct file *fp; 1143 struct vnode *vp; 1144 int error; 1145 1146 if ((error = fget(td, uap->fd, &fp)) != 0) 1147 return (error); 1148 1149 /* If asynchronous I/O is available, it works for all descriptors. */ 1150 if (uap->name == _PC_ASYNC_IO) { 1151 td->td_retval[0] = async_io_version; 1152 goto out; 1153 } 1154 vp = fp->f_vnode; 1155 if (vp != NULL) { 1156 int vfslocked; 1157 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1158 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1159 error = VOP_PATHCONF(vp, uap->name, td->td_retval); 1160 VOP_UNLOCK(vp, 0, td); 1161 VFS_UNLOCK_GIANT(vfslocked); 1162 } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1163 if (uap->name != _PC_PIPE_BUF) { 1164 error = EINVAL; 1165 } else { 1166 td->td_retval[0] = PIPE_BUF; 1167 error = 0; 1168 } 1169 } else { 1170 error = EOPNOTSUPP; 1171 } 1172 out: 1173 fdrop(fp, td); 1174 return (error); 1175 } 1176 1177 /* 1178 * Grow the file table to accomodate (at least) nfd descriptors. This may 1179 * block and drop the filedesc lock, but it will reacquire it before 1180 * returning. 1181 */ 1182 static void 1183 fdgrowtable(struct filedesc *fdp, int nfd) 1184 { 1185 struct file **ntable; 1186 char *nfileflags; 1187 int nnfiles, onfiles; 1188 NDSLOTTYPE *nmap; 1189 1190 FILEDESC_XLOCK_ASSERT(fdp); 1191 1192 KASSERT(fdp->fd_nfiles > 0, 1193 ("zero-length file table")); 1194 1195 /* compute the size of the new table */ 1196 onfiles = fdp->fd_nfiles; 1197 nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */ 1198 if (nnfiles <= onfiles) 1199 /* the table is already large enough */ 1200 return; 1201 1202 /* allocate a new table and (if required) new bitmaps */ 1203 FILEDESC_XUNLOCK(fdp); 1204 MALLOC(ntable, struct file **, nnfiles * OFILESIZE, 1205 M_FILEDESC, M_ZERO | M_WAITOK); 1206 nfileflags = (char *)&ntable[nnfiles]; 1207 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) 1208 MALLOC(nmap, NDSLOTTYPE *, NDSLOTS(nnfiles) * NDSLOTSIZE, 1209 M_FILEDESC, M_ZERO | M_WAITOK); 1210 else 1211 nmap = NULL; 1212 FILEDESC_XLOCK(fdp); 1213 1214 /* 1215 * We now have new tables ready to go. Since we dropped the 1216 * filedesc lock to call malloc(), watch out for a race. 1217 */ 1218 onfiles = fdp->fd_nfiles; 1219 if (onfiles >= nnfiles) { 1220 /* we lost the race, but that's OK */ 1221 free(ntable, M_FILEDESC); 1222 if (nmap != NULL) 1223 free(nmap, M_FILEDESC); 1224 return; 1225 } 1226 bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable)); 1227 bcopy(fdp->fd_ofileflags, nfileflags, onfiles); 1228 if (onfiles > NDFILE) 1229 free(fdp->fd_ofiles, M_FILEDESC); 1230 fdp->fd_ofiles = ntable; 1231 fdp->fd_ofileflags = nfileflags; 1232 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) { 1233 bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap)); 1234 if (NDSLOTS(onfiles) > NDSLOTS(NDFILE)) 1235 free(fdp->fd_map, M_FILEDESC); 1236 fdp->fd_map = nmap; 1237 } 1238 fdp->fd_nfiles = nnfiles; 1239 } 1240 1241 /* 1242 * Allocate a file descriptor for the process. 1243 */ 1244 int 1245 fdalloc(struct thread *td, int minfd, int *result) 1246 { 1247 struct proc *p = td->td_proc; 1248 struct filedesc *fdp = p->p_fd; 1249 int fd = -1, maxfd; 1250 1251 FILEDESC_XLOCK_ASSERT(fdp); 1252 1253 if (fdp->fd_freefile > minfd) 1254 minfd = fdp->fd_freefile; 1255 1256 PROC_LOCK(p); 1257 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1258 PROC_UNLOCK(p); 1259 1260 /* 1261 * Search the bitmap for a free descriptor. If none is found, try 1262 * to grow the file table. Keep at it until we either get a file 1263 * descriptor or run into process or system limits; fdgrowtable() 1264 * may drop the filedesc lock, so we're in a race. 1265 */ 1266 for (;;) { 1267 fd = fd_first_free(fdp, minfd, fdp->fd_nfiles); 1268 if (fd >= maxfd) 1269 return (EMFILE); 1270 if (fd < fdp->fd_nfiles) 1271 break; 1272 fdgrowtable(fdp, min(fdp->fd_nfiles * 2, maxfd)); 1273 } 1274 1275 /* 1276 * Perform some sanity checks, then mark the file descriptor as 1277 * used and return it to the caller. 1278 */ 1279 KASSERT(!fdisused(fdp, fd), 1280 ("fd_first_free() returned non-free descriptor")); 1281 KASSERT(fdp->fd_ofiles[fd] == NULL, 1282 ("free descriptor isn't")); 1283 fdp->fd_ofileflags[fd] = 0; /* XXX needed? */ 1284 fdused(fdp, fd); 1285 *result = fd; 1286 return (0); 1287 } 1288 1289 /* 1290 * Check to see whether n user file descriptors are available to the process 1291 * p. 1292 */ 1293 int 1294 fdavail(struct thread *td, int n) 1295 { 1296 struct proc *p = td->td_proc; 1297 struct filedesc *fdp = td->td_proc->p_fd; 1298 struct file **fpp; 1299 int i, lim, last; 1300 1301 FILEDESC_LOCK_ASSERT(fdp); 1302 1303 PROC_LOCK(p); 1304 lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1305 PROC_UNLOCK(p); 1306 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) 1307 return (1); 1308 last = min(fdp->fd_nfiles, lim); 1309 fpp = &fdp->fd_ofiles[fdp->fd_freefile]; 1310 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) { 1311 if (*fpp == NULL && --n <= 0) 1312 return (1); 1313 } 1314 return (0); 1315 } 1316 1317 /* 1318 * Create a new open file structure and allocate a file decriptor for the 1319 * process that refers to it. We add one reference to the file for the 1320 * descriptor table and one reference for resultfp. This is to prevent us 1321 * being preempted and the entry in the descriptor table closed after we 1322 * release the FILEDESC lock. 1323 */ 1324 int 1325 falloc(struct thread *td, struct file **resultfp, int *resultfd) 1326 { 1327 struct proc *p = td->td_proc; 1328 struct file *fp, *fq; 1329 int error, i; 1330 int maxuserfiles = maxfiles - (maxfiles / 20); 1331 static struct timeval lastfail; 1332 static int curfail; 1333 1334 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO); 1335 sx_xlock(&filelist_lock); 1336 1337 if ((openfiles >= maxuserfiles && 1338 priv_check_cred(td->td_ucred, PRIV_MAXFILES, SUSER_RUID) != 0) || 1339 openfiles >= maxfiles) { 1340 if (ppsratecheck(&lastfail, &curfail, 1)) { 1341 printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n", 1342 td->td_ucred->cr_ruid); 1343 } 1344 sx_xunlock(&filelist_lock); 1345 uma_zfree(file_zone, fp); 1346 return (ENFILE); 1347 } 1348 openfiles++; 1349 1350 /* 1351 * If the process has file descriptor zero open, add the new file 1352 * descriptor to the list of open files at that point, otherwise 1353 * put it at the front of the list of open files. 1354 */ 1355 fp->f_mtxp = mtx_pool_alloc(mtxpool_sleep); 1356 fp->f_count = 1; 1357 if (resultfp) 1358 fp->f_count++; 1359 fp->f_cred = crhold(td->td_ucred); 1360 fp->f_ops = &badfileops; 1361 fp->f_data = NULL; 1362 fp->f_vnode = NULL; 1363 FILEDESC_XLOCK(p->p_fd); 1364 if ((fq = p->p_fd->fd_ofiles[0])) { 1365 LIST_INSERT_AFTER(fq, fp, f_list); 1366 } else { 1367 LIST_INSERT_HEAD(&filehead, fp, f_list); 1368 } 1369 sx_xunlock(&filelist_lock); 1370 if ((error = fdalloc(td, 0, &i))) { 1371 FILEDESC_XUNLOCK(p->p_fd); 1372 fdrop(fp, td); 1373 if (resultfp) 1374 fdrop(fp, td); 1375 return (error); 1376 } 1377 p->p_fd->fd_ofiles[i] = fp; 1378 FILEDESC_XUNLOCK(p->p_fd); 1379 if (resultfp) 1380 *resultfp = fp; 1381 if (resultfd) 1382 *resultfd = i; 1383 return (0); 1384 } 1385 1386 /* 1387 * Build a new filedesc structure from another. 1388 * Copy the current, root, and jail root vnode references. 1389 */ 1390 struct filedesc * 1391 fdinit(struct filedesc *fdp) 1392 { 1393 struct filedesc0 *newfdp; 1394 1395 newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO); 1396 FILEDESC_LOCK_INIT(&newfdp->fd_fd); 1397 if (fdp != NULL) { 1398 FILEDESC_XLOCK(fdp); 1399 newfdp->fd_fd.fd_cdir = fdp->fd_cdir; 1400 if (newfdp->fd_fd.fd_cdir) 1401 VREF(newfdp->fd_fd.fd_cdir); 1402 newfdp->fd_fd.fd_rdir = fdp->fd_rdir; 1403 if (newfdp->fd_fd.fd_rdir) 1404 VREF(newfdp->fd_fd.fd_rdir); 1405 newfdp->fd_fd.fd_jdir = fdp->fd_jdir; 1406 if (newfdp->fd_fd.fd_jdir) 1407 VREF(newfdp->fd_fd.fd_jdir); 1408 FILEDESC_XUNLOCK(fdp); 1409 } 1410 1411 /* Create the file descriptor table. */ 1412 newfdp->fd_fd.fd_refcnt = 1; 1413 newfdp->fd_fd.fd_holdcnt = 1; 1414 newfdp->fd_fd.fd_cmask = CMASK; 1415 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1416 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 1417 newfdp->fd_fd.fd_nfiles = NDFILE; 1418 newfdp->fd_fd.fd_map = newfdp->fd_dmap; 1419 newfdp->fd_fd.fd_lastfile = -1; 1420 return (&newfdp->fd_fd); 1421 } 1422 1423 static struct filedesc * 1424 fdhold(struct proc *p) 1425 { 1426 struct filedesc *fdp; 1427 1428 mtx_lock(&fdesc_mtx); 1429 fdp = p->p_fd; 1430 if (fdp != NULL) 1431 fdp->fd_holdcnt++; 1432 mtx_unlock(&fdesc_mtx); 1433 return (fdp); 1434 } 1435 1436 static void 1437 fddrop(struct filedesc *fdp) 1438 { 1439 int i; 1440 1441 mtx_lock(&fdesc_mtx); 1442 i = --fdp->fd_holdcnt; 1443 mtx_unlock(&fdesc_mtx); 1444 if (i > 0) 1445 return; 1446 1447 FILEDESC_LOCK_DESTROY(fdp); 1448 FREE(fdp, M_FILEDESC); 1449 } 1450 1451 /* 1452 * Share a filedesc structure. 1453 */ 1454 struct filedesc * 1455 fdshare(struct filedesc *fdp) 1456 { 1457 1458 FILEDESC_XLOCK(fdp); 1459 fdp->fd_refcnt++; 1460 FILEDESC_XUNLOCK(fdp); 1461 return (fdp); 1462 } 1463 1464 /* 1465 * Unshare a filedesc structure, if necessary by making a copy 1466 */ 1467 void 1468 fdunshare(struct proc *p, struct thread *td) 1469 { 1470 1471 FILEDESC_XLOCK(p->p_fd); 1472 if (p->p_fd->fd_refcnt > 1) { 1473 struct filedesc *tmp; 1474 1475 FILEDESC_XUNLOCK(p->p_fd); 1476 tmp = fdcopy(p->p_fd); 1477 fdfree(td); 1478 p->p_fd = tmp; 1479 } else 1480 FILEDESC_XUNLOCK(p->p_fd); 1481 } 1482 1483 /* 1484 * Copy a filedesc structure. A NULL pointer in returns a NULL reference, 1485 * this is to ease callers, not catch errors. 1486 */ 1487 struct filedesc * 1488 fdcopy(struct filedesc *fdp) 1489 { 1490 struct filedesc *newfdp; 1491 int i; 1492 1493 /* Certain daemons might not have file descriptors. */ 1494 if (fdp == NULL) 1495 return (NULL); 1496 1497 newfdp = fdinit(fdp); 1498 FILEDESC_SLOCK(fdp); 1499 while (fdp->fd_lastfile >= newfdp->fd_nfiles) { 1500 FILEDESC_SUNLOCK(fdp); 1501 FILEDESC_XLOCK(newfdp); 1502 fdgrowtable(newfdp, fdp->fd_lastfile + 1); 1503 FILEDESC_XUNLOCK(newfdp); 1504 FILEDESC_SLOCK(fdp); 1505 } 1506 /* copy everything except kqueue descriptors */ 1507 newfdp->fd_freefile = -1; 1508 for (i = 0; i <= fdp->fd_lastfile; ++i) { 1509 if (fdisused(fdp, i) && 1510 fdp->fd_ofiles[i]->f_type != DTYPE_KQUEUE && 1511 (fdp->fd_ofileflags[i] & UF_OPENING) == 0) { 1512 newfdp->fd_ofiles[i] = fdp->fd_ofiles[i]; 1513 newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i]; 1514 fhold(newfdp->fd_ofiles[i]); 1515 newfdp->fd_lastfile = i; 1516 } else { 1517 if (newfdp->fd_freefile == -1) 1518 newfdp->fd_freefile = i; 1519 } 1520 } 1521 FILEDESC_SUNLOCK(fdp); 1522 FILEDESC_XLOCK(newfdp); 1523 for (i = 0; i <= newfdp->fd_lastfile; ++i) 1524 if (newfdp->fd_ofiles[i] != NULL) 1525 fdused(newfdp, i); 1526 FILEDESC_XUNLOCK(newfdp); 1527 FILEDESC_SLOCK(fdp); 1528 if (newfdp->fd_freefile == -1) 1529 newfdp->fd_freefile = i; 1530 newfdp->fd_cmask = fdp->fd_cmask; 1531 FILEDESC_SUNLOCK(fdp); 1532 return (newfdp); 1533 } 1534 1535 /* 1536 * Release a filedesc structure. 1537 */ 1538 void 1539 fdfree(struct thread *td) 1540 { 1541 struct filedesc *fdp; 1542 struct file **fpp; 1543 int i, locked; 1544 struct filedesc_to_leader *fdtol; 1545 struct file *fp; 1546 struct vnode *cdir, *jdir, *rdir, *vp; 1547 struct flock lf; 1548 1549 /* Certain daemons might not have file descriptors. */ 1550 fdp = td->td_proc->p_fd; 1551 if (fdp == NULL) 1552 return; 1553 1554 /* Check for special need to clear POSIX style locks */ 1555 fdtol = td->td_proc->p_fdtol; 1556 if (fdtol != NULL) { 1557 FILEDESC_XLOCK(fdp); 1558 KASSERT(fdtol->fdl_refcount > 0, 1559 ("filedesc_to_refcount botch: fdl_refcount=%d", 1560 fdtol->fdl_refcount)); 1561 if (fdtol->fdl_refcount == 1 && 1562 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1563 for (i = 0, fpp = fdp->fd_ofiles; 1564 i <= fdp->fd_lastfile; 1565 i++, fpp++) { 1566 if (*fpp == NULL || 1567 (*fpp)->f_type != DTYPE_VNODE) 1568 continue; 1569 fp = *fpp; 1570 fhold(fp); 1571 FILEDESC_XUNLOCK(fdp); 1572 lf.l_whence = SEEK_SET; 1573 lf.l_start = 0; 1574 lf.l_len = 0; 1575 lf.l_type = F_UNLCK; 1576 vp = fp->f_vnode; 1577 locked = VFS_LOCK_GIANT(vp->v_mount); 1578 (void) VOP_ADVLOCK(vp, 1579 (caddr_t)td->td_proc-> 1580 p_leader, 1581 F_UNLCK, 1582 &lf, 1583 F_POSIX); 1584 VFS_UNLOCK_GIANT(locked); 1585 FILEDESC_XLOCK(fdp); 1586 fdrop(fp, td); 1587 fpp = fdp->fd_ofiles + i; 1588 } 1589 } 1590 retry: 1591 if (fdtol->fdl_refcount == 1) { 1592 if (fdp->fd_holdleaderscount > 0 && 1593 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1594 /* 1595 * close() or do_dup() has cleared a reference 1596 * in a shared file descriptor table. 1597 */ 1598 fdp->fd_holdleaderswakeup = 1; 1599 sx_sleep(&fdp->fd_holdleaderscount, 1600 FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0); 1601 goto retry; 1602 } 1603 if (fdtol->fdl_holdcount > 0) { 1604 /* 1605 * Ensure that fdtol->fdl_leader remains 1606 * valid in closef(). 1607 */ 1608 fdtol->fdl_wakeup = 1; 1609 sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK, 1610 "fdlhold", 0); 1611 goto retry; 1612 } 1613 } 1614 fdtol->fdl_refcount--; 1615 if (fdtol->fdl_refcount == 0 && 1616 fdtol->fdl_holdcount == 0) { 1617 fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; 1618 fdtol->fdl_prev->fdl_next = fdtol->fdl_next; 1619 } else 1620 fdtol = NULL; 1621 td->td_proc->p_fdtol = NULL; 1622 FILEDESC_XUNLOCK(fdp); 1623 if (fdtol != NULL) 1624 FREE(fdtol, M_FILEDESC_TO_LEADER); 1625 } 1626 FILEDESC_XLOCK(fdp); 1627 i = --fdp->fd_refcnt; 1628 FILEDESC_XUNLOCK(fdp); 1629 if (i > 0) 1630 return; 1631 /* 1632 * We are the last reference to the structure, so we can 1633 * safely assume it will not change out from under us. 1634 */ 1635 fpp = fdp->fd_ofiles; 1636 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) { 1637 if (*fpp) 1638 (void) closef(*fpp, td); 1639 } 1640 FILEDESC_XLOCK(fdp); 1641 1642 /* XXX This should happen earlier. */ 1643 mtx_lock(&fdesc_mtx); 1644 td->td_proc->p_fd = NULL; 1645 mtx_unlock(&fdesc_mtx); 1646 1647 if (fdp->fd_nfiles > NDFILE) 1648 FREE(fdp->fd_ofiles, M_FILEDESC); 1649 if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE)) 1650 FREE(fdp->fd_map, M_FILEDESC); 1651 1652 fdp->fd_nfiles = 0; 1653 1654 cdir = fdp->fd_cdir; 1655 fdp->fd_cdir = NULL; 1656 rdir = fdp->fd_rdir; 1657 fdp->fd_rdir = NULL; 1658 jdir = fdp->fd_jdir; 1659 fdp->fd_jdir = NULL; 1660 FILEDESC_XUNLOCK(fdp); 1661 1662 if (cdir) { 1663 locked = VFS_LOCK_GIANT(cdir->v_mount); 1664 vrele(cdir); 1665 VFS_UNLOCK_GIANT(locked); 1666 } 1667 if (rdir) { 1668 locked = VFS_LOCK_GIANT(rdir->v_mount); 1669 vrele(rdir); 1670 VFS_UNLOCK_GIANT(locked); 1671 } 1672 if (jdir) { 1673 locked = VFS_LOCK_GIANT(jdir->v_mount); 1674 vrele(jdir); 1675 VFS_UNLOCK_GIANT(locked); 1676 } 1677 1678 fddrop(fdp); 1679 } 1680 1681 /* 1682 * For setugid programs, we don't want to people to use that setugidness 1683 * to generate error messages which write to a file which otherwise would 1684 * otherwise be off-limits to the process. We check for filesystems where 1685 * the vnode can change out from under us after execve (like [lin]procfs). 1686 * 1687 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is 1688 * sufficient. We also don't check for setugidness since we know we are. 1689 */ 1690 static int 1691 is_unsafe(struct file *fp) 1692 { 1693 if (fp->f_type == DTYPE_VNODE) { 1694 struct vnode *vp = fp->f_vnode; 1695 1696 if ((vp->v_vflag & VV_PROCDEP) != 0) 1697 return (1); 1698 } 1699 return (0); 1700 } 1701 1702 /* 1703 * Make this setguid thing safe, if at all possible. 1704 */ 1705 void 1706 setugidsafety(struct thread *td) 1707 { 1708 struct filedesc *fdp; 1709 int i; 1710 1711 /* Certain daemons might not have file descriptors. */ 1712 fdp = td->td_proc->p_fd; 1713 if (fdp == NULL) 1714 return; 1715 1716 /* 1717 * Note: fdp->fd_ofiles may be reallocated out from under us while 1718 * we are blocked in a close. Be careful! 1719 */ 1720 FILEDESC_XLOCK(fdp); 1721 for (i = 0; i <= fdp->fd_lastfile; i++) { 1722 if (i > 2) 1723 break; 1724 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { 1725 struct file *fp; 1726 1727 knote_fdclose(td, i); 1728 /* 1729 * NULL-out descriptor prior to close to avoid 1730 * a race while close blocks. 1731 */ 1732 fp = fdp->fd_ofiles[i]; 1733 fdp->fd_ofiles[i] = NULL; 1734 fdp->fd_ofileflags[i] = 0; 1735 fdunused(fdp, i); 1736 FILEDESC_XUNLOCK(fdp); 1737 (void) closef(fp, td); 1738 FILEDESC_XLOCK(fdp); 1739 } 1740 } 1741 FILEDESC_XUNLOCK(fdp); 1742 } 1743 1744 /* 1745 * If a specific file object occupies a specific file descriptor, close the 1746 * file descriptor entry and drop a reference on the file object. This is a 1747 * convenience function to handle a subsequent error in a function that calls 1748 * falloc() that handles the race that another thread might have closed the 1749 * file descriptor out from under the thread creating the file object. 1750 */ 1751 void 1752 fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td) 1753 { 1754 1755 FILEDESC_XLOCK(fdp); 1756 if (fdp->fd_ofiles[idx] == fp) { 1757 fdp->fd_ofiles[idx] = NULL; 1758 fdunused(fdp, idx); 1759 FILEDESC_XUNLOCK(fdp); 1760 fdrop(fp, td); 1761 } else 1762 FILEDESC_XUNLOCK(fdp); 1763 } 1764 1765 /* 1766 * Close any files on exec? 1767 */ 1768 void 1769 fdcloseexec(struct thread *td) 1770 { 1771 struct filedesc *fdp; 1772 int i; 1773 1774 /* Certain daemons might not have file descriptors. */ 1775 fdp = td->td_proc->p_fd; 1776 if (fdp == NULL) 1777 return; 1778 1779 FILEDESC_XLOCK(fdp); 1780 1781 /* 1782 * We cannot cache fd_ofiles or fd_ofileflags since operations 1783 * may block and rip them out from under us. 1784 */ 1785 for (i = 0; i <= fdp->fd_lastfile; i++) { 1786 if (fdp->fd_ofiles[i] != NULL && 1787 (fdp->fd_ofiles[i]->f_type == DTYPE_MQUEUE || 1788 (fdp->fd_ofileflags[i] & UF_EXCLOSE))) { 1789 struct file *fp; 1790 1791 knote_fdclose(td, i); 1792 /* 1793 * NULL-out descriptor prior to close to avoid 1794 * a race while close blocks. 1795 */ 1796 fp = fdp->fd_ofiles[i]; 1797 fdp->fd_ofiles[i] = NULL; 1798 fdp->fd_ofileflags[i] = 0; 1799 fdunused(fdp, i); 1800 if (fp->f_type == DTYPE_MQUEUE) 1801 mq_fdclose(td, i, fp); 1802 FILEDESC_XUNLOCK(fdp); 1803 (void) closef(fp, td); 1804 FILEDESC_XLOCK(fdp); 1805 } 1806 } 1807 FILEDESC_XUNLOCK(fdp); 1808 } 1809 1810 /* 1811 * It is unsafe for set[ug]id processes to be started with file 1812 * descriptors 0..2 closed, as these descriptors are given implicit 1813 * significance in the Standard C library. fdcheckstd() will create a 1814 * descriptor referencing /dev/null for each of stdin, stdout, and 1815 * stderr that is not already open. 1816 */ 1817 int 1818 fdcheckstd(struct thread *td) 1819 { 1820 struct filedesc *fdp; 1821 register_t retval, save; 1822 int i, error, devnull; 1823 1824 fdp = td->td_proc->p_fd; 1825 if (fdp == NULL) 1826 return (0); 1827 KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); 1828 devnull = -1; 1829 error = 0; 1830 for (i = 0; i < 3; i++) { 1831 if (fdp->fd_ofiles[i] != NULL) 1832 continue; 1833 if (devnull < 0) { 1834 save = td->td_retval[0]; 1835 error = kern_open(td, "/dev/null", UIO_SYSSPACE, 1836 O_RDWR, 0); 1837 devnull = td->td_retval[0]; 1838 KASSERT(devnull == i, ("oof, we didn't get our fd")); 1839 td->td_retval[0] = save; 1840 if (error) 1841 break; 1842 } else { 1843 error = do_dup(td, DUP_FIXED, devnull, i, &retval); 1844 if (error != 0) 1845 break; 1846 } 1847 } 1848 return (error); 1849 } 1850 1851 /* 1852 * Internal form of close. Decrement reference count on file structure. 1853 * Note: td may be NULL when closing a file that was being passed in a 1854 * message. 1855 * 1856 * XXXRW: Giant is not required for the caller, but often will be held; this 1857 * makes it moderately likely the Giant will be recursed in the VFS case. 1858 */ 1859 int 1860 closef(struct file *fp, struct thread *td) 1861 { 1862 struct vnode *vp; 1863 struct flock lf; 1864 struct filedesc_to_leader *fdtol; 1865 struct filedesc *fdp; 1866 1867 /* 1868 * POSIX record locking dictates that any close releases ALL 1869 * locks owned by this process. This is handled by setting 1870 * a flag in the unlock to free ONLY locks obeying POSIX 1871 * semantics, and not to free BSD-style file locks. 1872 * If the descriptor was in a message, POSIX-style locks 1873 * aren't passed with the descriptor, and the thread pointer 1874 * will be NULL. Callers should be careful only to pass a 1875 * NULL thread pointer when there really is no owning 1876 * context that might have locks, or the locks will be 1877 * leaked. 1878 */ 1879 if (fp->f_type == DTYPE_VNODE && td != NULL) { 1880 int vfslocked; 1881 1882 vp = fp->f_vnode; 1883 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1884 if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1885 lf.l_whence = SEEK_SET; 1886 lf.l_start = 0; 1887 lf.l_len = 0; 1888 lf.l_type = F_UNLCK; 1889 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, 1890 F_UNLCK, &lf, F_POSIX); 1891 } 1892 fdtol = td->td_proc->p_fdtol; 1893 if (fdtol != NULL) { 1894 /* 1895 * Handle special case where file descriptor table is 1896 * shared between multiple process leaders. 1897 */ 1898 fdp = td->td_proc->p_fd; 1899 FILEDESC_XLOCK(fdp); 1900 for (fdtol = fdtol->fdl_next; 1901 fdtol != td->td_proc->p_fdtol; 1902 fdtol = fdtol->fdl_next) { 1903 if ((fdtol->fdl_leader->p_flag & 1904 P_ADVLOCK) == 0) 1905 continue; 1906 fdtol->fdl_holdcount++; 1907 FILEDESC_XUNLOCK(fdp); 1908 lf.l_whence = SEEK_SET; 1909 lf.l_start = 0; 1910 lf.l_len = 0; 1911 lf.l_type = F_UNLCK; 1912 vp = fp->f_vnode; 1913 (void) VOP_ADVLOCK(vp, 1914 (caddr_t)fdtol->fdl_leader, 1915 F_UNLCK, &lf, F_POSIX); 1916 FILEDESC_XLOCK(fdp); 1917 fdtol->fdl_holdcount--; 1918 if (fdtol->fdl_holdcount == 0 && 1919 fdtol->fdl_wakeup != 0) { 1920 fdtol->fdl_wakeup = 0; 1921 wakeup(fdtol); 1922 } 1923 } 1924 FILEDESC_XUNLOCK(fdp); 1925 } 1926 VFS_UNLOCK_GIANT(vfslocked); 1927 } 1928 return (fdrop(fp, td)); 1929 } 1930 1931 /* 1932 * Extract the file pointer associated with the specified descriptor for the 1933 * current user process. 1934 * 1935 * If the descriptor doesn't exist, EBADF is returned. 1936 * 1937 * If the descriptor exists but doesn't match 'flags' then return EBADF for 1938 * read attempts and EINVAL for write attempts. 1939 * 1940 * If 'hold' is set (non-zero) the file's refcount will be bumped on return. 1941 * It should be dropped with fdrop(). If it is not set, then the refcount 1942 * will not be bumped however the thread's filedesc struct will be returned 1943 * locked (for fgetsock). 1944 * 1945 * If an error occured the non-zero error is returned and *fpp is set to 1946 * NULL. Otherwise *fpp is set and zero is returned. 1947 */ 1948 static __inline int 1949 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold) 1950 { 1951 struct filedesc *fdp; 1952 struct file *fp; 1953 1954 *fpp = NULL; 1955 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) 1956 return (EBADF); 1957 FILEDESC_SLOCK(fdp); 1958 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) { 1959 FILEDESC_SUNLOCK(fdp); 1960 return (EBADF); 1961 } 1962 1963 /* 1964 * FREAD and FWRITE failure return EBADF as per POSIX. 1965 * 1966 * Only one flag, or 0, may be specified. 1967 */ 1968 if (flags == FREAD && (fp->f_flag & FREAD) == 0) { 1969 FILEDESC_SUNLOCK(fdp); 1970 return (EBADF); 1971 } 1972 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) { 1973 FILEDESC_SUNLOCK(fdp); 1974 return (EBADF); 1975 } 1976 if (hold) { 1977 fhold(fp); 1978 FILEDESC_SUNLOCK(fdp); 1979 } 1980 *fpp = fp; 1981 return (0); 1982 } 1983 1984 int 1985 fget(struct thread *td, int fd, struct file **fpp) 1986 { 1987 1988 return(_fget(td, fd, fpp, 0, 1)); 1989 } 1990 1991 int 1992 fget_read(struct thread *td, int fd, struct file **fpp) 1993 { 1994 1995 return(_fget(td, fd, fpp, FREAD, 1)); 1996 } 1997 1998 int 1999 fget_write(struct thread *td, int fd, struct file **fpp) 2000 { 2001 2002 return(_fget(td, fd, fpp, FWRITE, 1)); 2003 } 2004 2005 /* 2006 * Like fget() but loads the underlying vnode, or returns an error if the 2007 * descriptor does not represent a vnode. Note that pipes use vnodes but 2008 * never have VM objects. The returned vnode will be vref()'d. 2009 * 2010 * XXX: what about the unused flags ? 2011 */ 2012 static __inline int 2013 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags) 2014 { 2015 struct file *fp; 2016 int error; 2017 2018 *vpp = NULL; 2019 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2020 return (error); 2021 if (fp->f_vnode == NULL) { 2022 error = EINVAL; 2023 } else { 2024 *vpp = fp->f_vnode; 2025 vref(*vpp); 2026 } 2027 FILEDESC_SUNLOCK(td->td_proc->p_fd); 2028 return (error); 2029 } 2030 2031 int 2032 fgetvp(struct thread *td, int fd, struct vnode **vpp) 2033 { 2034 2035 return (_fgetvp(td, fd, vpp, 0)); 2036 } 2037 2038 int 2039 fgetvp_read(struct thread *td, int fd, struct vnode **vpp) 2040 { 2041 2042 return (_fgetvp(td, fd, vpp, FREAD)); 2043 } 2044 2045 #ifdef notyet 2046 int 2047 fgetvp_write(struct thread *td, int fd, struct vnode **vpp) 2048 { 2049 2050 return (_fgetvp(td, fd, vpp, FWRITE)); 2051 } 2052 #endif 2053 2054 /* 2055 * Like fget() but loads the underlying socket, or returns an error if the 2056 * descriptor does not represent a socket. 2057 * 2058 * We bump the ref count on the returned socket. XXX Also obtain the SX lock 2059 * in the future. 2060 * 2061 * XXXRW: fgetsock() and fputsock() are deprecated, as consumers should rely 2062 * on their file descriptor reference to prevent the socket from being free'd 2063 * during use. 2064 */ 2065 int 2066 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp) 2067 { 2068 struct file *fp; 2069 int error; 2070 2071 NET_ASSERT_GIANT(); 2072 2073 *spp = NULL; 2074 if (fflagp != NULL) 2075 *fflagp = 0; 2076 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2077 return (error); 2078 if (fp->f_type != DTYPE_SOCKET) { 2079 error = ENOTSOCK; 2080 } else { 2081 *spp = fp->f_data; 2082 if (fflagp) 2083 *fflagp = fp->f_flag; 2084 SOCK_LOCK(*spp); 2085 soref(*spp); 2086 SOCK_UNLOCK(*spp); 2087 } 2088 FILEDESC_SUNLOCK(td->td_proc->p_fd); 2089 return (error); 2090 } 2091 2092 /* 2093 * Drop the reference count on the socket and XXX release the SX lock in the 2094 * future. The last reference closes the socket. 2095 * 2096 * XXXRW: fputsock() is deprecated, see comment for fgetsock(). 2097 */ 2098 void 2099 fputsock(struct socket *so) 2100 { 2101 2102 NET_ASSERT_GIANT(); 2103 ACCEPT_LOCK(); 2104 SOCK_LOCK(so); 2105 sorele(so); 2106 } 2107 2108 int 2109 fdrop(struct file *fp, struct thread *td) 2110 { 2111 2112 FILE_LOCK(fp); 2113 return (fdrop_locked(fp, td)); 2114 } 2115 2116 /* 2117 * Drop reference on struct file passed in, may call closef if the 2118 * reference hits zero. 2119 * Expects struct file locked, and will unlock it. 2120 */ 2121 static int 2122 fdrop_locked(struct file *fp, struct thread *td) 2123 { 2124 int error; 2125 2126 FILE_LOCK_ASSERT(fp, MA_OWNED); 2127 2128 if (--fp->f_count > 0) { 2129 FILE_UNLOCK(fp); 2130 return (0); 2131 } 2132 2133 /* 2134 * We might have just dropped the last reference to a file 2135 * object that is for a UNIX domain socket whose message 2136 * buffers are being examined in unp_gc(). If that is the 2137 * case, FWAIT will be set in f_gcflag and we need to wait for 2138 * unp_gc() to finish its scan. 2139 */ 2140 while (fp->f_gcflag & FWAIT) 2141 msleep(&fp->f_gcflag, fp->f_mtxp, 0, "fpdrop", 0); 2142 2143 /* We have the last ref so we can proceed without the file lock. */ 2144 FILE_UNLOCK(fp); 2145 if (fp->f_count < 0) 2146 panic("fdrop: count < 0"); 2147 if (fp->f_ops != &badfileops) 2148 error = fo_close(fp, td); 2149 else 2150 error = 0; 2151 2152 sx_xlock(&filelist_lock); 2153 LIST_REMOVE(fp, f_list); 2154 openfiles--; 2155 sx_xunlock(&filelist_lock); 2156 crfree(fp->f_cred); 2157 uma_zfree(file_zone, fp); 2158 2159 return (error); 2160 } 2161 2162 /* 2163 * Apply an advisory lock on a file descriptor. 2164 * 2165 * Just attempt to get a record lock of the requested type on the entire file 2166 * (l_whence = SEEK_SET, l_start = 0, l_len = 0). 2167 */ 2168 #ifndef _SYS_SYSPROTO_H_ 2169 struct flock_args { 2170 int fd; 2171 int how; 2172 }; 2173 #endif 2174 /* ARGSUSED */ 2175 int 2176 flock(struct thread *td, struct flock_args *uap) 2177 { 2178 struct file *fp; 2179 struct vnode *vp; 2180 struct flock lf; 2181 int error; 2182 2183 if ((error = fget(td, uap->fd, &fp)) != 0) 2184 return (error); 2185 if (fp->f_type != DTYPE_VNODE) { 2186 fdrop(fp, td); 2187 return (EOPNOTSUPP); 2188 } 2189 2190 mtx_lock(&Giant); 2191 vp = fp->f_vnode; 2192 lf.l_whence = SEEK_SET; 2193 lf.l_start = 0; 2194 lf.l_len = 0; 2195 if (uap->how & LOCK_UN) { 2196 lf.l_type = F_UNLCK; 2197 FILE_LOCK(fp); 2198 fp->f_flag &= ~FHASLOCK; 2199 FILE_UNLOCK(fp); 2200 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 2201 goto done2; 2202 } 2203 if (uap->how & LOCK_EX) 2204 lf.l_type = F_WRLCK; 2205 else if (uap->how & LOCK_SH) 2206 lf.l_type = F_RDLCK; 2207 else { 2208 error = EBADF; 2209 goto done2; 2210 } 2211 FILE_LOCK(fp); 2212 fp->f_flag |= FHASLOCK; 2213 FILE_UNLOCK(fp); 2214 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 2215 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); 2216 done2: 2217 fdrop(fp, td); 2218 mtx_unlock(&Giant); 2219 return (error); 2220 } 2221 /* 2222 * Duplicate the specified descriptor to a free descriptor. 2223 */ 2224 int 2225 dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error) 2226 { 2227 struct file *wfp; 2228 struct file *fp; 2229 2230 /* 2231 * If the to-be-dup'd fd number is greater than the allowed number 2232 * of file descriptors, or the fd to be dup'd has already been 2233 * closed, then reject. 2234 */ 2235 FILEDESC_XLOCK(fdp); 2236 if (dfd < 0 || dfd >= fdp->fd_nfiles || 2237 (wfp = fdp->fd_ofiles[dfd]) == NULL) { 2238 FILEDESC_XUNLOCK(fdp); 2239 return (EBADF); 2240 } 2241 2242 /* 2243 * There are two cases of interest here. 2244 * 2245 * For ENODEV simply dup (dfd) to file descriptor (indx) and return. 2246 * 2247 * For ENXIO steal away the file structure from (dfd) and store it in 2248 * (indx). (dfd) is effectively closed by this operation. 2249 * 2250 * Any other error code is just returned. 2251 */ 2252 switch (error) { 2253 case ENODEV: 2254 /* 2255 * Check that the mode the file is being opened for is a 2256 * subset of the mode of the existing descriptor. 2257 */ 2258 FILE_LOCK(wfp); 2259 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { 2260 FILE_UNLOCK(wfp); 2261 FILEDESC_XUNLOCK(fdp); 2262 return (EACCES); 2263 } 2264 fp = fdp->fd_ofiles[indx]; 2265 fdp->fd_ofiles[indx] = wfp; 2266 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2267 if (fp == NULL) 2268 fdused(fdp, indx); 2269 fhold_locked(wfp); 2270 FILE_UNLOCK(wfp); 2271 FILEDESC_XUNLOCK(fdp); 2272 if (fp != NULL) 2273 /* 2274 * We now own the reference to fp that the ofiles[] 2275 * array used to own. Release it. 2276 */ 2277 fdrop(fp, td); 2278 return (0); 2279 2280 case ENXIO: 2281 /* 2282 * Steal away the file pointer from dfd and stuff it into indx. 2283 */ 2284 fp = fdp->fd_ofiles[indx]; 2285 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 2286 fdp->fd_ofiles[dfd] = NULL; 2287 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2288 fdp->fd_ofileflags[dfd] = 0; 2289 fdunused(fdp, dfd); 2290 if (fp == NULL) 2291 fdused(fdp, indx); 2292 FILEDESC_XUNLOCK(fdp); 2293 2294 /* 2295 * We now own the reference to fp that the ofiles[] array 2296 * used to own. Release it. 2297 */ 2298 if (fp != NULL) 2299 fdrop(fp, td); 2300 return (0); 2301 2302 default: 2303 FILEDESC_XUNLOCK(fdp); 2304 return (error); 2305 } 2306 /* NOTREACHED */ 2307 } 2308 2309 /* 2310 * Scan all active processes to see if any of them have a current or root 2311 * directory of `olddp'. If so, replace them with the new mount point. 2312 */ 2313 void 2314 mountcheckdirs(struct vnode *olddp, struct vnode *newdp) 2315 { 2316 struct filedesc *fdp; 2317 struct proc *p; 2318 int nrele; 2319 2320 if (vrefcnt(olddp) == 1) 2321 return; 2322 sx_slock(&allproc_lock); 2323 FOREACH_PROC_IN_SYSTEM(p) { 2324 fdp = fdhold(p); 2325 if (fdp == NULL) 2326 continue; 2327 nrele = 0; 2328 FILEDESC_XLOCK(fdp); 2329 if (fdp->fd_cdir == olddp) { 2330 vref(newdp); 2331 fdp->fd_cdir = newdp; 2332 nrele++; 2333 } 2334 if (fdp->fd_rdir == olddp) { 2335 vref(newdp); 2336 fdp->fd_rdir = newdp; 2337 nrele++; 2338 } 2339 FILEDESC_XUNLOCK(fdp); 2340 fddrop(fdp); 2341 while (nrele--) 2342 vrele(olddp); 2343 } 2344 sx_sunlock(&allproc_lock); 2345 if (rootvnode == olddp) { 2346 vrele(rootvnode); 2347 vref(newdp); 2348 rootvnode = newdp; 2349 } 2350 } 2351 2352 struct filedesc_to_leader * 2353 filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader) 2354 { 2355 struct filedesc_to_leader *fdtol; 2356 2357 MALLOC(fdtol, struct filedesc_to_leader *, 2358 sizeof(struct filedesc_to_leader), 2359 M_FILEDESC_TO_LEADER, 2360 M_WAITOK); 2361 fdtol->fdl_refcount = 1; 2362 fdtol->fdl_holdcount = 0; 2363 fdtol->fdl_wakeup = 0; 2364 fdtol->fdl_leader = leader; 2365 if (old != NULL) { 2366 FILEDESC_XLOCK(fdp); 2367 fdtol->fdl_next = old->fdl_next; 2368 fdtol->fdl_prev = old; 2369 old->fdl_next = fdtol; 2370 fdtol->fdl_next->fdl_prev = fdtol; 2371 FILEDESC_XUNLOCK(fdp); 2372 } else { 2373 fdtol->fdl_next = fdtol; 2374 fdtol->fdl_prev = fdtol; 2375 } 2376 return (fdtol); 2377 } 2378 2379 /* 2380 * Get file structures. 2381 */ 2382 static int 2383 sysctl_kern_file(SYSCTL_HANDLER_ARGS) 2384 { 2385 struct xfile xf; 2386 struct filedesc *fdp; 2387 struct file *fp; 2388 struct proc *p; 2389 int error, n; 2390 2391 /* 2392 * Note: because the number of file descriptors is calculated 2393 * in different ways for sizing vs returning the data, 2394 * there is information leakage from the first loop. However, 2395 * it is of a similar order of magnitude to the leakage from 2396 * global system statistics such as kern.openfiles. 2397 */ 2398 error = sysctl_wire_old_buffer(req, 0); 2399 if (error != 0) 2400 return (error); 2401 if (req->oldptr == NULL) { 2402 n = 16; /* A slight overestimate. */ 2403 sx_slock(&filelist_lock); 2404 LIST_FOREACH(fp, &filehead, f_list) { 2405 /* 2406 * We should grab the lock, but this is an 2407 * estimate, so does it really matter? 2408 */ 2409 /* mtx_lock(fp->f_mtxp); */ 2410 n += fp->f_count; 2411 /* mtx_unlock(f->f_mtxp); */ 2412 } 2413 sx_sunlock(&filelist_lock); 2414 return (SYSCTL_OUT(req, 0, n * sizeof(xf))); 2415 } 2416 error = 0; 2417 bzero(&xf, sizeof(xf)); 2418 xf.xf_size = sizeof(xf); 2419 sx_slock(&allproc_lock); 2420 FOREACH_PROC_IN_SYSTEM(p) { 2421 if (p->p_state == PRS_NEW) 2422 continue; 2423 PROC_LOCK(p); 2424 if (p_cansee(req->td, p) != 0) { 2425 PROC_UNLOCK(p); 2426 continue; 2427 } 2428 xf.xf_pid = p->p_pid; 2429 xf.xf_uid = p->p_ucred->cr_uid; 2430 PROC_UNLOCK(p); 2431 fdp = fdhold(p); 2432 if (fdp == NULL) 2433 continue; 2434 FILEDESC_SLOCK(fdp); 2435 for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) { 2436 if ((fp = fdp->fd_ofiles[n]) == NULL) 2437 continue; 2438 xf.xf_fd = n; 2439 xf.xf_file = fp; 2440 xf.xf_data = fp->f_data; 2441 xf.xf_vnode = fp->f_vnode; 2442 xf.xf_type = fp->f_type; 2443 xf.xf_count = fp->f_count; 2444 xf.xf_msgcount = fp->f_msgcount; 2445 xf.xf_offset = fp->f_offset; 2446 xf.xf_flag = fp->f_flag; 2447 error = SYSCTL_OUT(req, &xf, sizeof(xf)); 2448 if (error) 2449 break; 2450 } 2451 FILEDESC_SUNLOCK(fdp); 2452 fddrop(fdp); 2453 if (error) 2454 break; 2455 } 2456 sx_sunlock(&allproc_lock); 2457 return (error); 2458 } 2459 2460 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, 2461 0, 0, sysctl_kern_file, "S,xfile", "Entire file table"); 2462 2463 #ifdef DDB 2464 /* 2465 * For the purposes of debugging, generate a human-readable string for the 2466 * file type. 2467 */ 2468 static const char * 2469 file_type_to_name(short type) 2470 { 2471 2472 switch (type) { 2473 case 0: 2474 return ("zero"); 2475 case DTYPE_VNODE: 2476 return ("vnod"); 2477 case DTYPE_SOCKET: 2478 return ("sock"); 2479 case DTYPE_PIPE: 2480 return ("pipe"); 2481 case DTYPE_FIFO: 2482 return ("fifo"); 2483 case DTYPE_KQUEUE: 2484 return ("kque"); 2485 case DTYPE_CRYPTO: 2486 return ("crpt"); 2487 case DTYPE_MQUEUE: 2488 return ("mque"); 2489 default: 2490 return ("unkn"); 2491 } 2492 } 2493 2494 /* 2495 * For the purposes of debugging, identify a process (if any, perhaps one of 2496 * many) that references the passed file in its file descriptor array. Return 2497 * NULL if none. 2498 */ 2499 static struct proc * 2500 file_to_first_proc(struct file *fp) 2501 { 2502 struct filedesc *fdp; 2503 struct proc *p; 2504 int n; 2505 2506 FOREACH_PROC_IN_SYSTEM(p) { 2507 if (p->p_state == PRS_NEW) 2508 continue; 2509 fdp = p->p_fd; 2510 if (fdp == NULL) 2511 continue; 2512 for (n = 0; n < fdp->fd_nfiles; n++) { 2513 if (fp == fdp->fd_ofiles[n]) 2514 return (p); 2515 } 2516 } 2517 return (NULL); 2518 } 2519 2520 static void 2521 db_print_file(struct file *fp, int header) 2522 { 2523 struct proc *p; 2524 2525 if (header) 2526 db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n", 2527 "File", "Type", "Data", "Flag", "GCFl", "Count", 2528 "MCount", "Vnode", "FPID", "FCmd"); 2529 p = file_to_first_proc(fp); 2530 db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp, 2531 file_type_to_name(fp->f_type), fp->f_data, fp->f_flag, 2532 fp->f_gcflag, fp->f_count, fp->f_msgcount, fp->f_vnode, 2533 p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-"); 2534 } 2535 2536 DB_SHOW_COMMAND(file, db_show_file) 2537 { 2538 struct file *fp; 2539 2540 if (!have_addr) { 2541 db_printf("usage: show file <addr>\n"); 2542 return; 2543 } 2544 fp = (struct file *)addr; 2545 db_print_file(fp, 1); 2546 } 2547 2548 DB_SHOW_COMMAND(files, db_show_files) 2549 { 2550 struct file *fp; 2551 int header; 2552 2553 header = 1; 2554 LIST_FOREACH(fp, &filehead, f_list) { 2555 db_print_file(fp, header); 2556 header = 0; 2557 } 2558 } 2559 #endif 2560 2561 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, 2562 &maxfilesperproc, 0, "Maximum files allowed open per process"); 2563 2564 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, 2565 &maxfiles, 0, "Maximum number of files"); 2566 2567 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, 2568 &openfiles, 0, "System-wide number of open files"); 2569 2570 /* ARGSUSED*/ 2571 static void 2572 filelistinit(void *dummy) 2573 { 2574 2575 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, 2576 NULL, NULL, UMA_ALIGN_PTR, 0); 2577 sx_init(&filelist_lock, "filelist lock"); 2578 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); 2579 mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF); 2580 } 2581 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL) 2582 2583 /*-------------------------------------------------------------------*/ 2584 2585 static int 2586 badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) 2587 { 2588 2589 return (EBADF); 2590 } 2591 2592 static int 2593 badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td) 2594 { 2595 2596 return (EBADF); 2597 } 2598 2599 static int 2600 badfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) 2601 { 2602 2603 return (0); 2604 } 2605 2606 static int 2607 badfo_kqfilter(struct file *fp, struct knote *kn) 2608 { 2609 2610 return (EBADF); 2611 } 2612 2613 static int 2614 badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td) 2615 { 2616 2617 return (EBADF); 2618 } 2619 2620 static int 2621 badfo_close(struct file *fp, struct thread *td) 2622 { 2623 2624 return (EBADF); 2625 } 2626 2627 struct fileops badfileops = { 2628 .fo_read = badfo_readwrite, 2629 .fo_write = badfo_readwrite, 2630 .fo_ioctl = badfo_ioctl, 2631 .fo_poll = badfo_poll, 2632 .fo_kqfilter = badfo_kqfilter, 2633 .fo_stat = badfo_stat, 2634 .fo_close = badfo_close, 2635 }; 2636 2637 2638 /*-------------------------------------------------------------------*/ 2639 2640 /* 2641 * File Descriptor pseudo-device driver (/dev/fd/). 2642 * 2643 * Opening minor device N dup()s the file (if any) connected to file 2644 * descriptor N belonging to the calling process. Note that this driver 2645 * consists of only the ``open()'' routine, because all subsequent 2646 * references to this file will be direct to the other driver. 2647 * 2648 * XXX: we could give this one a cloning event handler if necessary. 2649 */ 2650 2651 /* ARGSUSED */ 2652 static int 2653 fdopen(struct cdev *dev, int mode, int type, struct thread *td) 2654 { 2655 2656 /* 2657 * XXX Kludge: set curthread->td_dupfd to contain the value of the 2658 * the file descriptor being sought for duplication. The error 2659 * return ensures that the vnode for this device will be released 2660 * by vn_open. Open will detect this special error and take the 2661 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 2662 * will simply report the error. 2663 */ 2664 td->td_dupfd = dev2unit(dev); 2665 return (ENODEV); 2666 } 2667 2668 static struct cdevsw fildesc_cdevsw = { 2669 .d_version = D_VERSION, 2670 .d_flags = D_NEEDGIANT, 2671 .d_open = fdopen, 2672 .d_name = "FD", 2673 }; 2674 2675 static void 2676 fildesc_drvinit(void *unused) 2677 { 2678 struct cdev *dev; 2679 2680 dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0"); 2681 make_dev_alias(dev, "stdin"); 2682 dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1"); 2683 make_dev_alias(dev, "stdout"); 2684 dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2"); 2685 make_dev_alias(dev, "stderr"); 2686 } 2687 2688 SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL) 2689