1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_compat.h" 41 #include "opt_ddb.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 46 #include <sys/conf.h> 47 #include <sys/fcntl.h> 48 #include <sys/file.h> 49 #include <sys/filedesc.h> 50 #include <sys/filio.h> 51 #include <sys/jail.h> 52 #include <sys/kernel.h> 53 #include <sys/limits.h> 54 #include <sys/lock.h> 55 #include <sys/malloc.h> 56 #include <sys/mount.h> 57 #include <sys/mqueue.h> 58 #include <sys/mutex.h> 59 #include <sys/namei.h> 60 #include <sys/priv.h> 61 #include <sys/proc.h> 62 #include <sys/resourcevar.h> 63 #include <sys/signalvar.h> 64 #include <sys/socketvar.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/syscallsubr.h> 68 #include <sys/sysctl.h> 69 #include <sys/sysproto.h> 70 #include <sys/unistd.h> 71 #include <sys/vnode.h> 72 73 #include <security/audit/audit.h> 74 75 #include <vm/uma.h> 76 77 #include <ddb/ddb.h> 78 79 static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table"); 80 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader", 81 "file desc to leader structures"); 82 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); 83 84 static uma_zone_t file_zone; 85 86 87 /* How to treat 'new' parameter when allocating a fd for do_dup(). */ 88 enum dup_type { DUP_VARIABLE, DUP_FIXED }; 89 90 static int do_dup(struct thread *td, enum dup_type type, int old, int new, 91 register_t *retval); 92 static int fd_first_free(struct filedesc *, int, int); 93 static int fd_last_used(struct filedesc *, int, int); 94 static void fdgrowtable(struct filedesc *, int); 95 static int fdrop_locked(struct file *fp, struct thread *td); 96 static void fdunused(struct filedesc *fdp, int fd); 97 static void fdused(struct filedesc *fdp, int fd); 98 99 /* 100 * A process is initially started out with NDFILE descriptors stored within 101 * this structure, selected to be enough for typical applications based on 102 * the historical limit of 20 open files (and the usage of descriptors by 103 * shells). If these descriptors are exhausted, a larger descriptor table 104 * may be allocated, up to a process' resource limit; the internal arrays 105 * are then unused. 106 */ 107 #define NDFILE 20 108 #define NDSLOTSIZE sizeof(NDSLOTTYPE) 109 #define NDENTRIES (NDSLOTSIZE * __CHAR_BIT) 110 #define NDSLOT(x) ((x) / NDENTRIES) 111 #define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES)) 112 #define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES) 113 114 /* 115 * Storage required per open file descriptor. 116 */ 117 #define OFILESIZE (sizeof(struct file *) + sizeof(char)) 118 119 /* 120 * Basic allocation of descriptors: 121 * one of the above, plus arrays for NDFILE descriptors. 122 */ 123 struct filedesc0 { 124 struct filedesc fd_fd; 125 /* 126 * These arrays are used when the number of open files is 127 * <= NDFILE, and are then pointed to by the pointers above. 128 */ 129 struct file *fd_dfiles[NDFILE]; 130 char fd_dfileflags[NDFILE]; 131 NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; 132 }; 133 134 /* 135 * Descriptor management. 136 */ 137 struct filelist filehead; /* head of list of open files */ 138 int openfiles; /* actual number of open files */ 139 struct sx filelist_lock; /* sx to protect filelist */ 140 struct mtx sigio_lock; /* mtx to protect pointers to sigio */ 141 void (*mq_fdclose)(struct thread *td, int fd, struct file *fp); 142 143 /* A mutex to protect the association between a proc and filedesc. */ 144 static struct mtx fdesc_mtx; 145 146 /* 147 * Find the first zero bit in the given bitmap, starting at low and not 148 * exceeding size - 1. 149 */ 150 static int 151 fd_first_free(struct filedesc *fdp, int low, int size) 152 { 153 NDSLOTTYPE *map = fdp->fd_map; 154 NDSLOTTYPE mask; 155 int off, maxoff; 156 157 if (low >= size) 158 return (low); 159 160 off = NDSLOT(low); 161 if (low % NDENTRIES) { 162 mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES))); 163 if ((mask &= ~map[off]) != 0UL) 164 return (off * NDENTRIES + ffsl(mask) - 1); 165 ++off; 166 } 167 for (maxoff = NDSLOTS(size); off < maxoff; ++off) 168 if (map[off] != ~0UL) 169 return (off * NDENTRIES + ffsl(~map[off]) - 1); 170 return (size); 171 } 172 173 /* 174 * Find the highest non-zero bit in the given bitmap, starting at low and 175 * not exceeding size - 1. 176 */ 177 static int 178 fd_last_used(struct filedesc *fdp, int low, int size) 179 { 180 NDSLOTTYPE *map = fdp->fd_map; 181 NDSLOTTYPE mask; 182 int off, minoff; 183 184 if (low >= size) 185 return (-1); 186 187 off = NDSLOT(size); 188 if (size % NDENTRIES) { 189 mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES)); 190 if ((mask &= map[off]) != 0) 191 return (off * NDENTRIES + flsl(mask) - 1); 192 --off; 193 } 194 for (minoff = NDSLOT(low); off >= minoff; --off) 195 if (map[off] != 0) 196 return (off * NDENTRIES + flsl(map[off]) - 1); 197 return (low - 1); 198 } 199 200 static int 201 fdisused(struct filedesc *fdp, int fd) 202 { 203 KASSERT(fd >= 0 && fd < fdp->fd_nfiles, 204 ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles)); 205 return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0); 206 } 207 208 /* 209 * Mark a file descriptor as used. 210 */ 211 static void 212 fdused(struct filedesc *fdp, int fd) 213 { 214 215 FILEDESC_XLOCK_ASSERT(fdp); 216 KASSERT(!fdisused(fdp, fd), 217 ("fd already used")); 218 219 fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd); 220 if (fd > fdp->fd_lastfile) 221 fdp->fd_lastfile = fd; 222 if (fd == fdp->fd_freefile) 223 fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles); 224 } 225 226 /* 227 * Mark a file descriptor as unused. 228 */ 229 static void 230 fdunused(struct filedesc *fdp, int fd) 231 { 232 233 FILEDESC_XLOCK_ASSERT(fdp); 234 KASSERT(fdisused(fdp, fd), 235 ("fd is already unused")); 236 KASSERT(fdp->fd_ofiles[fd] == NULL, 237 ("fd is still in use")); 238 239 fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); 240 if (fd < fdp->fd_freefile) 241 fdp->fd_freefile = fd; 242 if (fd == fdp->fd_lastfile) 243 fdp->fd_lastfile = fd_last_used(fdp, 0, fd); 244 } 245 246 /* 247 * System calls on descriptors. 248 */ 249 #ifndef _SYS_SYSPROTO_H_ 250 struct getdtablesize_args { 251 int dummy; 252 }; 253 #endif 254 /* ARGSUSED */ 255 int 256 getdtablesize(struct thread *td, struct getdtablesize_args *uap) 257 { 258 struct proc *p = td->td_proc; 259 260 PROC_LOCK(p); 261 td->td_retval[0] = 262 min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 263 PROC_UNLOCK(p); 264 return (0); 265 } 266 267 /* 268 * Duplicate a file descriptor to a particular value. 269 * 270 * Note: keep in mind that a potential race condition exists when closing 271 * descriptors from a shared descriptor table (via rfork). 272 */ 273 #ifndef _SYS_SYSPROTO_H_ 274 struct dup2_args { 275 u_int from; 276 u_int to; 277 }; 278 #endif 279 /* ARGSUSED */ 280 int 281 dup2(struct thread *td, struct dup2_args *uap) 282 { 283 284 return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to, 285 td->td_retval)); 286 } 287 288 /* 289 * Duplicate a file descriptor. 290 */ 291 #ifndef _SYS_SYSPROTO_H_ 292 struct dup_args { 293 u_int fd; 294 }; 295 #endif 296 /* ARGSUSED */ 297 int 298 dup(struct thread *td, struct dup_args *uap) 299 { 300 301 return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval)); 302 } 303 304 /* 305 * The file control system call. 306 */ 307 #ifndef _SYS_SYSPROTO_H_ 308 struct fcntl_args { 309 int fd; 310 int cmd; 311 long arg; 312 }; 313 #endif 314 /* ARGSUSED */ 315 int 316 fcntl(struct thread *td, struct fcntl_args *uap) 317 { 318 struct flock fl; 319 intptr_t arg; 320 int error; 321 322 error = 0; 323 switch (uap->cmd) { 324 case F_GETLK: 325 case F_SETLK: 326 case F_SETLKW: 327 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl)); 328 arg = (intptr_t)&fl; 329 break; 330 default: 331 arg = uap->arg; 332 break; 333 } 334 if (error) 335 return (error); 336 error = kern_fcntl(td, uap->fd, uap->cmd, arg); 337 if (error) 338 return (error); 339 if (uap->cmd == F_GETLK) 340 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl)); 341 return (error); 342 } 343 344 int 345 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) 346 { 347 struct filedesc *fdp; 348 struct flock *flp; 349 struct file *fp; 350 struct proc *p; 351 char *pop; 352 struct vnode *vp; 353 u_int newmin; 354 int error, flg, tmp; 355 int giant_locked; 356 357 /* 358 * XXXRW: Some fcntl() calls require Giant -- others don't. Try to 359 * avoid grabbing Giant for calls we know don't need it. 360 */ 361 switch (cmd) { 362 case F_DUPFD: 363 case F_GETFD: 364 case F_SETFD: 365 case F_GETFL: 366 giant_locked = 0; 367 break; 368 369 default: 370 giant_locked = 1; 371 mtx_lock(&Giant); 372 } 373 374 error = 0; 375 flg = F_POSIX; 376 p = td->td_proc; 377 fdp = p->p_fd; 378 379 /* 380 * XXXRW: It could be an exclusive lock is not [always] needed here. 381 */ 382 FILEDESC_XLOCK(fdp); 383 if ((unsigned)fd >= fdp->fd_nfiles || 384 (fp = fdp->fd_ofiles[fd]) == NULL) { 385 FILEDESC_XUNLOCK(fdp); 386 error = EBADF; 387 goto done2; 388 } 389 pop = &fdp->fd_ofileflags[fd]; 390 391 switch (cmd) { 392 case F_DUPFD: 393 /* mtx_assert(&Giant, MA_NOTOWNED); */ 394 FILEDESC_XUNLOCK(fdp); 395 newmin = arg; 396 PROC_LOCK(p); 397 if (newmin >= lim_cur(p, RLIMIT_NOFILE) || 398 newmin >= maxfilesperproc) { 399 PROC_UNLOCK(p); 400 error = EINVAL; 401 break; 402 } 403 PROC_UNLOCK(p); 404 error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval); 405 break; 406 407 case F_GETFD: 408 /* mtx_assert(&Giant, MA_NOTOWNED); */ 409 td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; 410 FILEDESC_XUNLOCK(fdp); 411 break; 412 413 case F_SETFD: 414 /* mtx_assert(&Giant, MA_NOTOWNED); */ 415 *pop = (*pop &~ UF_EXCLOSE) | 416 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); 417 FILEDESC_XUNLOCK(fdp); 418 break; 419 420 case F_GETFL: 421 /* mtx_assert(&Giant, MA_NOTOWNED); */ 422 FILE_LOCK(fp); 423 td->td_retval[0] = OFLAGS(fp->f_flag); 424 FILE_UNLOCK(fp); 425 FILEDESC_XUNLOCK(fdp); 426 break; 427 428 case F_SETFL: 429 mtx_assert(&Giant, MA_OWNED); 430 FILE_LOCK(fp); 431 fhold_locked(fp); 432 fp->f_flag &= ~FCNTLFLAGS; 433 fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; 434 FILE_UNLOCK(fp); 435 FILEDESC_XUNLOCK(fdp); 436 tmp = fp->f_flag & FNONBLOCK; 437 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 438 if (error) { 439 fdrop(fp, td); 440 break; 441 } 442 tmp = fp->f_flag & FASYNC; 443 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); 444 if (error == 0) { 445 fdrop(fp, td); 446 break; 447 } 448 FILE_LOCK(fp); 449 fp->f_flag &= ~FNONBLOCK; 450 FILE_UNLOCK(fp); 451 tmp = 0; 452 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 453 fdrop(fp, td); 454 break; 455 456 case F_GETOWN: 457 mtx_assert(&Giant, MA_OWNED); 458 fhold(fp); 459 FILEDESC_XUNLOCK(fdp); 460 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); 461 if (error == 0) 462 td->td_retval[0] = tmp; 463 fdrop(fp, td); 464 break; 465 466 case F_SETOWN: 467 mtx_assert(&Giant, MA_OWNED); 468 fhold(fp); 469 FILEDESC_XUNLOCK(fdp); 470 tmp = arg; 471 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); 472 fdrop(fp, td); 473 break; 474 475 case F_SETLKW: 476 mtx_assert(&Giant, MA_OWNED); 477 flg |= F_WAIT; 478 /* FALLTHROUGH F_SETLK */ 479 480 case F_SETLK: 481 mtx_assert(&Giant, MA_OWNED); 482 if (fp->f_type != DTYPE_VNODE) { 483 FILEDESC_XUNLOCK(fdp); 484 error = EBADF; 485 break; 486 } 487 488 flp = (struct flock *)arg; 489 if (flp->l_whence == SEEK_CUR) { 490 if (fp->f_offset < 0 || 491 (flp->l_start > 0 && 492 fp->f_offset > OFF_MAX - flp->l_start)) { 493 FILEDESC_XUNLOCK(fdp); 494 error = EOVERFLOW; 495 break; 496 } 497 flp->l_start += fp->f_offset; 498 } 499 500 /* 501 * VOP_ADVLOCK() may block. 502 */ 503 fhold(fp); 504 FILEDESC_XUNLOCK(fdp); 505 vp = fp->f_vnode; 506 507 switch (flp->l_type) { 508 case F_RDLCK: 509 if ((fp->f_flag & FREAD) == 0) { 510 error = EBADF; 511 break; 512 } 513 PROC_LOCK(p->p_leader); 514 p->p_leader->p_flag |= P_ADVLOCK; 515 PROC_UNLOCK(p->p_leader); 516 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 517 flp, flg); 518 break; 519 case F_WRLCK: 520 if ((fp->f_flag & FWRITE) == 0) { 521 error = EBADF; 522 break; 523 } 524 PROC_LOCK(p->p_leader); 525 p->p_leader->p_flag |= P_ADVLOCK; 526 PROC_UNLOCK(p->p_leader); 527 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 528 flp, flg); 529 break; 530 case F_UNLCK: 531 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, 532 flp, F_POSIX); 533 break; 534 default: 535 error = EINVAL; 536 break; 537 } 538 /* Check for race with close */ 539 FILEDESC_XLOCK(fdp); 540 if ((unsigned) fd >= fdp->fd_nfiles || 541 fp != fdp->fd_ofiles[fd]) { 542 FILEDESC_XUNLOCK(fdp); 543 flp->l_whence = SEEK_SET; 544 flp->l_start = 0; 545 flp->l_len = 0; 546 flp->l_type = F_UNLCK; 547 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 548 F_UNLCK, flp, F_POSIX); 549 } else 550 FILEDESC_XUNLOCK(fdp); 551 fdrop(fp, td); 552 break; 553 554 case F_GETLK: 555 mtx_assert(&Giant, MA_OWNED); 556 if (fp->f_type != DTYPE_VNODE) { 557 FILEDESC_XUNLOCK(fdp); 558 error = EBADF; 559 break; 560 } 561 flp = (struct flock *)arg; 562 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && 563 flp->l_type != F_UNLCK) { 564 FILEDESC_XUNLOCK(fdp); 565 error = EINVAL; 566 break; 567 } 568 if (flp->l_whence == SEEK_CUR) { 569 if ((flp->l_start > 0 && 570 fp->f_offset > OFF_MAX - flp->l_start) || 571 (flp->l_start < 0 && 572 fp->f_offset < OFF_MIN - flp->l_start)) { 573 FILEDESC_XUNLOCK(fdp); 574 error = EOVERFLOW; 575 break; 576 } 577 flp->l_start += fp->f_offset; 578 } 579 /* 580 * VOP_ADVLOCK() may block. 581 */ 582 fhold(fp); 583 FILEDESC_XUNLOCK(fdp); 584 vp = fp->f_vnode; 585 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, 586 F_POSIX); 587 fdrop(fp, td); 588 break; 589 default: 590 FILEDESC_XUNLOCK(fdp); 591 error = EINVAL; 592 break; 593 } 594 done2: 595 if (giant_locked) 596 mtx_unlock(&Giant); 597 return (error); 598 } 599 600 /* 601 * Common code for dup, dup2, and fcntl(F_DUPFD). 602 */ 603 static int 604 do_dup(struct thread *td, enum dup_type type, int old, int new, 605 register_t *retval) 606 { 607 struct filedesc *fdp; 608 struct proc *p; 609 struct file *fp; 610 struct file *delfp; 611 int error, holdleaders, maxfd; 612 613 KASSERT((type == DUP_VARIABLE || type == DUP_FIXED), 614 ("invalid dup type %d", type)); 615 616 p = td->td_proc; 617 fdp = p->p_fd; 618 619 /* 620 * Verify we have a valid descriptor to dup from and possibly to 621 * dup to. 622 */ 623 if (old < 0 || new < 0) 624 return (EBADF); 625 PROC_LOCK(p); 626 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 627 PROC_UNLOCK(p); 628 if (new >= maxfd) 629 return (EMFILE); 630 631 FILEDESC_XLOCK(fdp); 632 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) { 633 FILEDESC_XUNLOCK(fdp); 634 return (EBADF); 635 } 636 if (type == DUP_FIXED && old == new) { 637 *retval = new; 638 FILEDESC_XUNLOCK(fdp); 639 return (0); 640 } 641 fp = fdp->fd_ofiles[old]; 642 fhold(fp); 643 644 /* 645 * If the caller specified a file descriptor, make sure the file 646 * table is large enough to hold it, and grab it. Otherwise, just 647 * allocate a new descriptor the usual way. Since the filedesc 648 * lock may be temporarily dropped in the process, we have to look 649 * out for a race. 650 */ 651 if (type == DUP_FIXED) { 652 if (new >= fdp->fd_nfiles) 653 fdgrowtable(fdp, new + 1); 654 if (fdp->fd_ofiles[new] == NULL) 655 fdused(fdp, new); 656 } else { 657 if ((error = fdalloc(td, new, &new)) != 0) { 658 FILEDESC_XUNLOCK(fdp); 659 fdrop(fp, td); 660 return (error); 661 } 662 } 663 664 /* 665 * If the old file changed out from under us then treat it as a 666 * bad file descriptor. Userland should do its own locking to 667 * avoid this case. 668 */ 669 if (fdp->fd_ofiles[old] != fp) { 670 /* we've allocated a descriptor which we won't use */ 671 if (fdp->fd_ofiles[new] == NULL) 672 fdunused(fdp, new); 673 FILEDESC_XUNLOCK(fdp); 674 fdrop(fp, td); 675 return (EBADF); 676 } 677 KASSERT(old != new, 678 ("new fd is same as old")); 679 680 /* 681 * Save info on the descriptor being overwritten. We cannot close 682 * it without introducing an ownership race for the slot, since we 683 * need to drop the filedesc lock to call closef(). 684 * 685 * XXX this duplicates parts of close(). 686 */ 687 delfp = fdp->fd_ofiles[new]; 688 holdleaders = 0; 689 if (delfp != NULL) { 690 if (td->td_proc->p_fdtol != NULL) { 691 /* 692 * Ask fdfree() to sleep to ensure that all relevant 693 * process leaders can be traversed in closef(). 694 */ 695 fdp->fd_holdleaderscount++; 696 holdleaders = 1; 697 } 698 } 699 700 /* 701 * Duplicate the source descriptor 702 */ 703 fdp->fd_ofiles[new] = fp; 704 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; 705 if (new > fdp->fd_lastfile) 706 fdp->fd_lastfile = new; 707 *retval = new; 708 709 /* 710 * If we dup'd over a valid file, we now own the reference to it 711 * and must dispose of it using closef() semantics (as if a 712 * close() were performed on it). 713 * 714 * XXX this duplicates parts of close(). 715 */ 716 if (delfp != NULL) { 717 knote_fdclose(td, new); 718 if (delfp->f_type == DTYPE_MQUEUE) 719 mq_fdclose(td, new, delfp); 720 FILEDESC_XUNLOCK(fdp); 721 (void) closef(delfp, td); 722 if (holdleaders) { 723 FILEDESC_XLOCK(fdp); 724 fdp->fd_holdleaderscount--; 725 if (fdp->fd_holdleaderscount == 0 && 726 fdp->fd_holdleaderswakeup != 0) { 727 fdp->fd_holdleaderswakeup = 0; 728 wakeup(&fdp->fd_holdleaderscount); 729 } 730 FILEDESC_XUNLOCK(fdp); 731 } 732 } else { 733 FILEDESC_XUNLOCK(fdp); 734 } 735 return (0); 736 } 737 738 /* 739 * If sigio is on the list associated with a process or process group, 740 * disable signalling from the device, remove sigio from the list and 741 * free sigio. 742 */ 743 void 744 funsetown(struct sigio **sigiop) 745 { 746 struct sigio *sigio; 747 748 SIGIO_LOCK(); 749 sigio = *sigiop; 750 if (sigio == NULL) { 751 SIGIO_UNLOCK(); 752 return; 753 } 754 *(sigio->sio_myref) = NULL; 755 if ((sigio)->sio_pgid < 0) { 756 struct pgrp *pg = (sigio)->sio_pgrp; 757 PGRP_LOCK(pg); 758 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, 759 sigio, sio_pgsigio); 760 PGRP_UNLOCK(pg); 761 } else { 762 struct proc *p = (sigio)->sio_proc; 763 PROC_LOCK(p); 764 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, 765 sigio, sio_pgsigio); 766 PROC_UNLOCK(p); 767 } 768 SIGIO_UNLOCK(); 769 crfree(sigio->sio_ucred); 770 FREE(sigio, M_SIGIO); 771 } 772 773 /* 774 * Free a list of sigio structures. 775 * We only need to lock the SIGIO_LOCK because we have made ourselves 776 * inaccessible to callers of fsetown and therefore do not need to lock 777 * the proc or pgrp struct for the list manipulation. 778 */ 779 void 780 funsetownlst(struct sigiolst *sigiolst) 781 { 782 struct proc *p; 783 struct pgrp *pg; 784 struct sigio *sigio; 785 786 sigio = SLIST_FIRST(sigiolst); 787 if (sigio == NULL) 788 return; 789 p = NULL; 790 pg = NULL; 791 792 /* 793 * Every entry of the list should belong 794 * to a single proc or pgrp. 795 */ 796 if (sigio->sio_pgid < 0) { 797 pg = sigio->sio_pgrp; 798 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED); 799 } else /* if (sigio->sio_pgid > 0) */ { 800 p = sigio->sio_proc; 801 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 802 } 803 804 SIGIO_LOCK(); 805 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) { 806 *(sigio->sio_myref) = NULL; 807 if (pg != NULL) { 808 KASSERT(sigio->sio_pgid < 0, 809 ("Proc sigio in pgrp sigio list")); 810 KASSERT(sigio->sio_pgrp == pg, 811 ("Bogus pgrp in sigio list")); 812 PGRP_LOCK(pg); 813 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, 814 sio_pgsigio); 815 PGRP_UNLOCK(pg); 816 } else /* if (p != NULL) */ { 817 KASSERT(sigio->sio_pgid > 0, 818 ("Pgrp sigio in proc sigio list")); 819 KASSERT(sigio->sio_proc == p, 820 ("Bogus proc in sigio list")); 821 PROC_LOCK(p); 822 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, 823 sio_pgsigio); 824 PROC_UNLOCK(p); 825 } 826 SIGIO_UNLOCK(); 827 crfree(sigio->sio_ucred); 828 FREE(sigio, M_SIGIO); 829 SIGIO_LOCK(); 830 } 831 SIGIO_UNLOCK(); 832 } 833 834 /* 835 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). 836 * 837 * After permission checking, add a sigio structure to the sigio list for 838 * the process or process group. 839 */ 840 int 841 fsetown(pid_t pgid, struct sigio **sigiop) 842 { 843 struct proc *proc; 844 struct pgrp *pgrp; 845 struct sigio *sigio; 846 int ret; 847 848 if (pgid == 0) { 849 funsetown(sigiop); 850 return (0); 851 } 852 853 ret = 0; 854 855 /* Allocate and fill in the new sigio out of locks. */ 856 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK); 857 sigio->sio_pgid = pgid; 858 sigio->sio_ucred = crhold(curthread->td_ucred); 859 sigio->sio_myref = sigiop; 860 861 sx_slock(&proctree_lock); 862 if (pgid > 0) { 863 proc = pfind(pgid); 864 if (proc == NULL) { 865 ret = ESRCH; 866 goto fail; 867 } 868 869 /* 870 * Policy - Don't allow a process to FSETOWN a process 871 * in another session. 872 * 873 * Remove this test to allow maximum flexibility or 874 * restrict FSETOWN to the current process or process 875 * group for maximum safety. 876 */ 877 PROC_UNLOCK(proc); 878 if (proc->p_session != curthread->td_proc->p_session) { 879 ret = EPERM; 880 goto fail; 881 } 882 883 pgrp = NULL; 884 } else /* if (pgid < 0) */ { 885 pgrp = pgfind(-pgid); 886 if (pgrp == NULL) { 887 ret = ESRCH; 888 goto fail; 889 } 890 PGRP_UNLOCK(pgrp); 891 892 /* 893 * Policy - Don't allow a process to FSETOWN a process 894 * in another session. 895 * 896 * Remove this test to allow maximum flexibility or 897 * restrict FSETOWN to the current process or process 898 * group for maximum safety. 899 */ 900 if (pgrp->pg_session != curthread->td_proc->p_session) { 901 ret = EPERM; 902 goto fail; 903 } 904 905 proc = NULL; 906 } 907 funsetown(sigiop); 908 if (pgid > 0) { 909 PROC_LOCK(proc); 910 /* 911 * Since funsetownlst() is called without the proctree 912 * locked, we need to check for P_WEXIT. 913 * XXX: is ESRCH correct? 914 */ 915 if ((proc->p_flag & P_WEXIT) != 0) { 916 PROC_UNLOCK(proc); 917 ret = ESRCH; 918 goto fail; 919 } 920 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); 921 sigio->sio_proc = proc; 922 PROC_UNLOCK(proc); 923 } else { 924 PGRP_LOCK(pgrp); 925 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); 926 sigio->sio_pgrp = pgrp; 927 PGRP_UNLOCK(pgrp); 928 } 929 sx_sunlock(&proctree_lock); 930 SIGIO_LOCK(); 931 *sigiop = sigio; 932 SIGIO_UNLOCK(); 933 return (0); 934 935 fail: 936 sx_sunlock(&proctree_lock); 937 crfree(sigio->sio_ucred); 938 FREE(sigio, M_SIGIO); 939 return (ret); 940 } 941 942 /* 943 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). 944 */ 945 pid_t 946 fgetown(sigiop) 947 struct sigio **sigiop; 948 { 949 pid_t pgid; 950 951 SIGIO_LOCK(); 952 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0; 953 SIGIO_UNLOCK(); 954 return (pgid); 955 } 956 957 /* 958 * Close a file descriptor. 959 */ 960 #ifndef _SYS_SYSPROTO_H_ 961 struct close_args { 962 int fd; 963 }; 964 #endif 965 /* ARGSUSED */ 966 int 967 close(td, uap) 968 struct thread *td; 969 struct close_args *uap; 970 { 971 972 return (kern_close(td, uap->fd)); 973 } 974 975 int 976 kern_close(td, fd) 977 struct thread *td; 978 int fd; 979 { 980 struct filedesc *fdp; 981 struct file *fp; 982 int error; 983 int holdleaders; 984 985 error = 0; 986 holdleaders = 0; 987 fdp = td->td_proc->p_fd; 988 989 AUDIT_SYSCLOSE(td, fd); 990 991 FILEDESC_XLOCK(fdp); 992 if ((unsigned)fd >= fdp->fd_nfiles || 993 (fp = fdp->fd_ofiles[fd]) == NULL) { 994 FILEDESC_XUNLOCK(fdp); 995 return (EBADF); 996 } 997 fdp->fd_ofiles[fd] = NULL; 998 fdp->fd_ofileflags[fd] = 0; 999 fdunused(fdp, fd); 1000 if (td->td_proc->p_fdtol != NULL) { 1001 /* 1002 * Ask fdfree() to sleep to ensure that all relevant 1003 * process leaders can be traversed in closef(). 1004 */ 1005 fdp->fd_holdleaderscount++; 1006 holdleaders = 1; 1007 } 1008 1009 /* 1010 * We now hold the fp reference that used to be owned by the 1011 * descriptor array. We have to unlock the FILEDESC *AFTER* 1012 * knote_fdclose to prevent a race of the fd getting opened, a knote 1013 * added, and deleteing a knote for the new fd. 1014 */ 1015 knote_fdclose(td, fd); 1016 if (fp->f_type == DTYPE_MQUEUE) 1017 mq_fdclose(td, fd, fp); 1018 FILEDESC_XUNLOCK(fdp); 1019 1020 error = closef(fp, td); 1021 if (holdleaders) { 1022 FILEDESC_XLOCK(fdp); 1023 fdp->fd_holdleaderscount--; 1024 if (fdp->fd_holdleaderscount == 0 && 1025 fdp->fd_holdleaderswakeup != 0) { 1026 fdp->fd_holdleaderswakeup = 0; 1027 wakeup(&fdp->fd_holdleaderscount); 1028 } 1029 FILEDESC_XUNLOCK(fdp); 1030 } 1031 return (error); 1032 } 1033 1034 #if defined(COMPAT_43) 1035 /* 1036 * Return status information about a file descriptor. 1037 */ 1038 #ifndef _SYS_SYSPROTO_H_ 1039 struct ofstat_args { 1040 int fd; 1041 struct ostat *sb; 1042 }; 1043 #endif 1044 /* ARGSUSED */ 1045 int 1046 ofstat(struct thread *td, struct ofstat_args *uap) 1047 { 1048 struct ostat oub; 1049 struct stat ub; 1050 int error; 1051 1052 error = kern_fstat(td, uap->fd, &ub); 1053 if (error == 0) { 1054 cvtstat(&ub, &oub); 1055 error = copyout(&oub, uap->sb, sizeof(oub)); 1056 } 1057 return (error); 1058 } 1059 #endif /* COMPAT_43 */ 1060 1061 /* 1062 * Return status information about a file descriptor. 1063 */ 1064 #ifndef _SYS_SYSPROTO_H_ 1065 struct fstat_args { 1066 int fd; 1067 struct stat *sb; 1068 }; 1069 #endif 1070 /* ARGSUSED */ 1071 int 1072 fstat(struct thread *td, struct fstat_args *uap) 1073 { 1074 struct stat ub; 1075 int error; 1076 1077 error = kern_fstat(td, uap->fd, &ub); 1078 if (error == 0) 1079 error = copyout(&ub, uap->sb, sizeof(ub)); 1080 return (error); 1081 } 1082 1083 int 1084 kern_fstat(struct thread *td, int fd, struct stat *sbp) 1085 { 1086 struct file *fp; 1087 int error; 1088 1089 AUDIT_ARG(fd, fd); 1090 1091 if ((error = fget(td, fd, &fp)) != 0) 1092 return (error); 1093 1094 AUDIT_ARG(file, td->td_proc, fp); 1095 1096 error = fo_stat(fp, sbp, td->td_ucred, td); 1097 fdrop(fp, td); 1098 return (error); 1099 } 1100 1101 /* 1102 * Return status information about a file descriptor. 1103 */ 1104 #ifndef _SYS_SYSPROTO_H_ 1105 struct nfstat_args { 1106 int fd; 1107 struct nstat *sb; 1108 }; 1109 #endif 1110 /* ARGSUSED */ 1111 int 1112 nfstat(struct thread *td, struct nfstat_args *uap) 1113 { 1114 struct nstat nub; 1115 struct stat ub; 1116 int error; 1117 1118 error = kern_fstat(td, uap->fd, &ub); 1119 if (error == 0) { 1120 cvtnstat(&ub, &nub); 1121 error = copyout(&nub, uap->sb, sizeof(nub)); 1122 } 1123 return (error); 1124 } 1125 1126 /* 1127 * Return pathconf information about a file descriptor. 1128 */ 1129 #ifndef _SYS_SYSPROTO_H_ 1130 struct fpathconf_args { 1131 int fd; 1132 int name; 1133 }; 1134 #endif 1135 /* ARGSUSED */ 1136 int 1137 fpathconf(struct thread *td, struct fpathconf_args *uap) 1138 { 1139 struct file *fp; 1140 struct vnode *vp; 1141 int error; 1142 1143 if ((error = fget(td, uap->fd, &fp)) != 0) 1144 return (error); 1145 1146 /* If asynchronous I/O is available, it works for all descriptors. */ 1147 if (uap->name == _PC_ASYNC_IO) { 1148 td->td_retval[0] = async_io_version; 1149 goto out; 1150 } 1151 vp = fp->f_vnode; 1152 if (vp != NULL) { 1153 int vfslocked; 1154 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1155 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1156 error = VOP_PATHCONF(vp, uap->name, td->td_retval); 1157 VOP_UNLOCK(vp, 0, td); 1158 VFS_UNLOCK_GIANT(vfslocked); 1159 } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1160 if (uap->name != _PC_PIPE_BUF) { 1161 error = EINVAL; 1162 } else { 1163 td->td_retval[0] = PIPE_BUF; 1164 error = 0; 1165 } 1166 } else { 1167 error = EOPNOTSUPP; 1168 } 1169 out: 1170 fdrop(fp, td); 1171 return (error); 1172 } 1173 1174 /* 1175 * Grow the file table to accomodate (at least) nfd descriptors. This may 1176 * block and drop the filedesc lock, but it will reacquire it before 1177 * returning. 1178 */ 1179 static void 1180 fdgrowtable(struct filedesc *fdp, int nfd) 1181 { 1182 struct file **ntable; 1183 char *nfileflags; 1184 int nnfiles, onfiles; 1185 NDSLOTTYPE *nmap; 1186 1187 FILEDESC_XLOCK_ASSERT(fdp); 1188 1189 KASSERT(fdp->fd_nfiles > 0, 1190 ("zero-length file table")); 1191 1192 /* compute the size of the new table */ 1193 onfiles = fdp->fd_nfiles; 1194 nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */ 1195 if (nnfiles <= onfiles) 1196 /* the table is already large enough */ 1197 return; 1198 1199 /* allocate a new table and (if required) new bitmaps */ 1200 FILEDESC_XUNLOCK(fdp); 1201 MALLOC(ntable, struct file **, nnfiles * OFILESIZE, 1202 M_FILEDESC, M_ZERO | M_WAITOK); 1203 nfileflags = (char *)&ntable[nnfiles]; 1204 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) 1205 MALLOC(nmap, NDSLOTTYPE *, NDSLOTS(nnfiles) * NDSLOTSIZE, 1206 M_FILEDESC, M_ZERO | M_WAITOK); 1207 else 1208 nmap = NULL; 1209 FILEDESC_XLOCK(fdp); 1210 1211 /* 1212 * We now have new tables ready to go. Since we dropped the 1213 * filedesc lock to call malloc(), watch out for a race. 1214 */ 1215 onfiles = fdp->fd_nfiles; 1216 if (onfiles >= nnfiles) { 1217 /* we lost the race, but that's OK */ 1218 free(ntable, M_FILEDESC); 1219 if (nmap != NULL) 1220 free(nmap, M_FILEDESC); 1221 return; 1222 } 1223 bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable)); 1224 bcopy(fdp->fd_ofileflags, nfileflags, onfiles); 1225 if (onfiles > NDFILE) 1226 free(fdp->fd_ofiles, M_FILEDESC); 1227 fdp->fd_ofiles = ntable; 1228 fdp->fd_ofileflags = nfileflags; 1229 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) { 1230 bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap)); 1231 if (NDSLOTS(onfiles) > NDSLOTS(NDFILE)) 1232 free(fdp->fd_map, M_FILEDESC); 1233 fdp->fd_map = nmap; 1234 } 1235 fdp->fd_nfiles = nnfiles; 1236 } 1237 1238 /* 1239 * Allocate a file descriptor for the process. 1240 */ 1241 int 1242 fdalloc(struct thread *td, int minfd, int *result) 1243 { 1244 struct proc *p = td->td_proc; 1245 struct filedesc *fdp = p->p_fd; 1246 int fd = -1, maxfd; 1247 1248 FILEDESC_XLOCK_ASSERT(fdp); 1249 1250 if (fdp->fd_freefile > minfd) 1251 minfd = fdp->fd_freefile; 1252 1253 PROC_LOCK(p); 1254 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1255 PROC_UNLOCK(p); 1256 1257 /* 1258 * Search the bitmap for a free descriptor. If none is found, try 1259 * to grow the file table. Keep at it until we either get a file 1260 * descriptor or run into process or system limits; fdgrowtable() 1261 * may drop the filedesc lock, so we're in a race. 1262 */ 1263 for (;;) { 1264 fd = fd_first_free(fdp, minfd, fdp->fd_nfiles); 1265 if (fd >= maxfd) 1266 return (EMFILE); 1267 if (fd < fdp->fd_nfiles) 1268 break; 1269 fdgrowtable(fdp, min(fdp->fd_nfiles * 2, maxfd)); 1270 } 1271 1272 /* 1273 * Perform some sanity checks, then mark the file descriptor as 1274 * used and return it to the caller. 1275 */ 1276 KASSERT(!fdisused(fdp, fd), 1277 ("fd_first_free() returned non-free descriptor")); 1278 KASSERT(fdp->fd_ofiles[fd] == NULL, 1279 ("free descriptor isn't")); 1280 fdp->fd_ofileflags[fd] = 0; /* XXX needed? */ 1281 fdused(fdp, fd); 1282 *result = fd; 1283 return (0); 1284 } 1285 1286 /* 1287 * Check to see whether n user file descriptors are available to the process 1288 * p. 1289 */ 1290 int 1291 fdavail(struct thread *td, int n) 1292 { 1293 struct proc *p = td->td_proc; 1294 struct filedesc *fdp = td->td_proc->p_fd; 1295 struct file **fpp; 1296 int i, lim, last; 1297 1298 FILEDESC_LOCK_ASSERT(fdp); 1299 1300 PROC_LOCK(p); 1301 lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1302 PROC_UNLOCK(p); 1303 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) 1304 return (1); 1305 last = min(fdp->fd_nfiles, lim); 1306 fpp = &fdp->fd_ofiles[fdp->fd_freefile]; 1307 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) { 1308 if (*fpp == NULL && --n <= 0) 1309 return (1); 1310 } 1311 return (0); 1312 } 1313 1314 /* 1315 * Create a new open file structure and allocate a file decriptor for the 1316 * process that refers to it. We add one reference to the file for the 1317 * descriptor table and one reference for resultfp. This is to prevent us 1318 * being preempted and the entry in the descriptor table closed after we 1319 * release the FILEDESC lock. 1320 */ 1321 int 1322 falloc(struct thread *td, struct file **resultfp, int *resultfd) 1323 { 1324 struct proc *p = td->td_proc; 1325 struct file *fp, *fq; 1326 int error, i; 1327 int maxuserfiles = maxfiles - (maxfiles / 20); 1328 static struct timeval lastfail; 1329 static int curfail; 1330 1331 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO); 1332 sx_xlock(&filelist_lock); 1333 1334 if ((openfiles >= maxuserfiles && 1335 priv_check_cred(td->td_ucred, PRIV_MAXFILES, SUSER_RUID) != 0) || 1336 openfiles >= maxfiles) { 1337 if (ppsratecheck(&lastfail, &curfail, 1)) { 1338 printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n", 1339 td->td_ucred->cr_ruid); 1340 } 1341 sx_xunlock(&filelist_lock); 1342 uma_zfree(file_zone, fp); 1343 return (ENFILE); 1344 } 1345 openfiles++; 1346 1347 /* 1348 * If the process has file descriptor zero open, add the new file 1349 * descriptor to the list of open files at that point, otherwise 1350 * put it at the front of the list of open files. 1351 */ 1352 fp->f_mtxp = mtx_pool_alloc(mtxpool_sleep); 1353 fp->f_count = 1; 1354 if (resultfp) 1355 fp->f_count++; 1356 fp->f_cred = crhold(td->td_ucred); 1357 fp->f_ops = &badfileops; 1358 fp->f_data = NULL; 1359 fp->f_vnode = NULL; 1360 FILEDESC_XLOCK(p->p_fd); 1361 if ((fq = p->p_fd->fd_ofiles[0])) { 1362 LIST_INSERT_AFTER(fq, fp, f_list); 1363 } else { 1364 LIST_INSERT_HEAD(&filehead, fp, f_list); 1365 } 1366 sx_xunlock(&filelist_lock); 1367 if ((error = fdalloc(td, 0, &i))) { 1368 FILEDESC_XUNLOCK(p->p_fd); 1369 fdrop(fp, td); 1370 if (resultfp) 1371 fdrop(fp, td); 1372 return (error); 1373 } 1374 p->p_fd->fd_ofiles[i] = fp; 1375 FILEDESC_XUNLOCK(p->p_fd); 1376 if (resultfp) 1377 *resultfp = fp; 1378 if (resultfd) 1379 *resultfd = i; 1380 return (0); 1381 } 1382 1383 /* 1384 * Build a new filedesc structure from another. 1385 * Copy the current, root, and jail root vnode references. 1386 */ 1387 struct filedesc * 1388 fdinit(struct filedesc *fdp) 1389 { 1390 struct filedesc0 *newfdp; 1391 1392 newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO); 1393 FILEDESC_LOCK_INIT(&newfdp->fd_fd); 1394 if (fdp != NULL) { 1395 FILEDESC_XLOCK(fdp); 1396 newfdp->fd_fd.fd_cdir = fdp->fd_cdir; 1397 if (newfdp->fd_fd.fd_cdir) 1398 VREF(newfdp->fd_fd.fd_cdir); 1399 newfdp->fd_fd.fd_rdir = fdp->fd_rdir; 1400 if (newfdp->fd_fd.fd_rdir) 1401 VREF(newfdp->fd_fd.fd_rdir); 1402 newfdp->fd_fd.fd_jdir = fdp->fd_jdir; 1403 if (newfdp->fd_fd.fd_jdir) 1404 VREF(newfdp->fd_fd.fd_jdir); 1405 FILEDESC_XUNLOCK(fdp); 1406 } 1407 1408 /* Create the file descriptor table. */ 1409 newfdp->fd_fd.fd_refcnt = 1; 1410 newfdp->fd_fd.fd_holdcnt = 1; 1411 newfdp->fd_fd.fd_cmask = CMASK; 1412 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1413 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 1414 newfdp->fd_fd.fd_nfiles = NDFILE; 1415 newfdp->fd_fd.fd_map = newfdp->fd_dmap; 1416 newfdp->fd_fd.fd_lastfile = -1; 1417 return (&newfdp->fd_fd); 1418 } 1419 1420 static struct filedesc * 1421 fdhold(struct proc *p) 1422 { 1423 struct filedesc *fdp; 1424 1425 mtx_lock(&fdesc_mtx); 1426 fdp = p->p_fd; 1427 if (fdp != NULL) 1428 fdp->fd_holdcnt++; 1429 mtx_unlock(&fdesc_mtx); 1430 return (fdp); 1431 } 1432 1433 static void 1434 fddrop(struct filedesc *fdp) 1435 { 1436 int i; 1437 1438 mtx_lock(&fdesc_mtx); 1439 i = --fdp->fd_holdcnt; 1440 mtx_unlock(&fdesc_mtx); 1441 if (i > 0) 1442 return; 1443 1444 FILEDESC_LOCK_DESTROY(fdp); 1445 FREE(fdp, M_FILEDESC); 1446 } 1447 1448 /* 1449 * Share a filedesc structure. 1450 */ 1451 struct filedesc * 1452 fdshare(struct filedesc *fdp) 1453 { 1454 1455 FILEDESC_XLOCK(fdp); 1456 fdp->fd_refcnt++; 1457 FILEDESC_XUNLOCK(fdp); 1458 return (fdp); 1459 } 1460 1461 /* 1462 * Unshare a filedesc structure, if necessary by making a copy 1463 */ 1464 void 1465 fdunshare(struct proc *p, struct thread *td) 1466 { 1467 1468 FILEDESC_XLOCK(p->p_fd); 1469 if (p->p_fd->fd_refcnt > 1) { 1470 struct filedesc *tmp; 1471 1472 FILEDESC_XUNLOCK(p->p_fd); 1473 tmp = fdcopy(p->p_fd); 1474 fdfree(td); 1475 p->p_fd = tmp; 1476 } else 1477 FILEDESC_XUNLOCK(p->p_fd); 1478 } 1479 1480 /* 1481 * Copy a filedesc structure. A NULL pointer in returns a NULL reference, 1482 * this is to ease callers, not catch errors. 1483 */ 1484 struct filedesc * 1485 fdcopy(struct filedesc *fdp) 1486 { 1487 struct filedesc *newfdp; 1488 int i; 1489 1490 /* Certain daemons might not have file descriptors. */ 1491 if (fdp == NULL) 1492 return (NULL); 1493 1494 newfdp = fdinit(fdp); 1495 FILEDESC_SLOCK(fdp); 1496 while (fdp->fd_lastfile >= newfdp->fd_nfiles) { 1497 FILEDESC_SUNLOCK(fdp); 1498 FILEDESC_XLOCK(newfdp); 1499 fdgrowtable(newfdp, fdp->fd_lastfile + 1); 1500 FILEDESC_XUNLOCK(newfdp); 1501 FILEDESC_SLOCK(fdp); 1502 } 1503 /* copy everything except kqueue descriptors */ 1504 newfdp->fd_freefile = -1; 1505 for (i = 0; i <= fdp->fd_lastfile; ++i) { 1506 if (fdisused(fdp, i) && 1507 fdp->fd_ofiles[i]->f_type != DTYPE_KQUEUE) { 1508 newfdp->fd_ofiles[i] = fdp->fd_ofiles[i]; 1509 newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i]; 1510 fhold(newfdp->fd_ofiles[i]); 1511 newfdp->fd_lastfile = i; 1512 } else { 1513 if (newfdp->fd_freefile == -1) 1514 newfdp->fd_freefile = i; 1515 } 1516 } 1517 FILEDESC_SUNLOCK(fdp); 1518 FILEDESC_XLOCK(newfdp); 1519 for (i = 0; i <= newfdp->fd_lastfile; ++i) 1520 if (newfdp->fd_ofiles[i] != NULL) 1521 fdused(newfdp, i); 1522 FILEDESC_XUNLOCK(newfdp); 1523 FILEDESC_SLOCK(fdp); 1524 if (newfdp->fd_freefile == -1) 1525 newfdp->fd_freefile = i; 1526 newfdp->fd_cmask = fdp->fd_cmask; 1527 FILEDESC_SUNLOCK(fdp); 1528 return (newfdp); 1529 } 1530 1531 /* 1532 * Release a filedesc structure. 1533 */ 1534 void 1535 fdfree(struct thread *td) 1536 { 1537 struct filedesc *fdp; 1538 struct file **fpp; 1539 int i, locked; 1540 struct filedesc_to_leader *fdtol; 1541 struct file *fp; 1542 struct vnode *cdir, *jdir, *rdir, *vp; 1543 struct flock lf; 1544 1545 /* Certain daemons might not have file descriptors. */ 1546 fdp = td->td_proc->p_fd; 1547 if (fdp == NULL) 1548 return; 1549 1550 /* Check for special need to clear POSIX style locks */ 1551 fdtol = td->td_proc->p_fdtol; 1552 if (fdtol != NULL) { 1553 FILEDESC_XLOCK(fdp); 1554 KASSERT(fdtol->fdl_refcount > 0, 1555 ("filedesc_to_refcount botch: fdl_refcount=%d", 1556 fdtol->fdl_refcount)); 1557 if (fdtol->fdl_refcount == 1 && 1558 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1559 for (i = 0, fpp = fdp->fd_ofiles; 1560 i <= fdp->fd_lastfile; 1561 i++, fpp++) { 1562 if (*fpp == NULL || 1563 (*fpp)->f_type != DTYPE_VNODE) 1564 continue; 1565 fp = *fpp; 1566 fhold(fp); 1567 FILEDESC_XUNLOCK(fdp); 1568 lf.l_whence = SEEK_SET; 1569 lf.l_start = 0; 1570 lf.l_len = 0; 1571 lf.l_type = F_UNLCK; 1572 vp = fp->f_vnode; 1573 locked = VFS_LOCK_GIANT(vp->v_mount); 1574 (void) VOP_ADVLOCK(vp, 1575 (caddr_t)td->td_proc-> 1576 p_leader, 1577 F_UNLCK, 1578 &lf, 1579 F_POSIX); 1580 VFS_UNLOCK_GIANT(locked); 1581 FILEDESC_XLOCK(fdp); 1582 fdrop(fp, td); 1583 fpp = fdp->fd_ofiles + i; 1584 } 1585 } 1586 retry: 1587 if (fdtol->fdl_refcount == 1) { 1588 if (fdp->fd_holdleaderscount > 0 && 1589 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1590 /* 1591 * close() or do_dup() has cleared a reference 1592 * in a shared file descriptor table. 1593 */ 1594 fdp->fd_holdleaderswakeup = 1; 1595 sx_sleep(&fdp->fd_holdleaderscount, 1596 FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0); 1597 goto retry; 1598 } 1599 if (fdtol->fdl_holdcount > 0) { 1600 /* 1601 * Ensure that fdtol->fdl_leader remains 1602 * valid in closef(). 1603 */ 1604 fdtol->fdl_wakeup = 1; 1605 sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK, 1606 "fdlhold", 0); 1607 goto retry; 1608 } 1609 } 1610 fdtol->fdl_refcount--; 1611 if (fdtol->fdl_refcount == 0 && 1612 fdtol->fdl_holdcount == 0) { 1613 fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; 1614 fdtol->fdl_prev->fdl_next = fdtol->fdl_next; 1615 } else 1616 fdtol = NULL; 1617 td->td_proc->p_fdtol = NULL; 1618 FILEDESC_XUNLOCK(fdp); 1619 if (fdtol != NULL) 1620 FREE(fdtol, M_FILEDESC_TO_LEADER); 1621 } 1622 FILEDESC_XLOCK(fdp); 1623 i = --fdp->fd_refcnt; 1624 FILEDESC_XUNLOCK(fdp); 1625 if (i > 0) 1626 return; 1627 /* 1628 * We are the last reference to the structure, so we can 1629 * safely assume it will not change out from under us. 1630 */ 1631 fpp = fdp->fd_ofiles; 1632 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) { 1633 if (*fpp) 1634 (void) closef(*fpp, td); 1635 } 1636 FILEDESC_XLOCK(fdp); 1637 1638 /* XXX This should happen earlier. */ 1639 mtx_lock(&fdesc_mtx); 1640 td->td_proc->p_fd = NULL; 1641 mtx_unlock(&fdesc_mtx); 1642 1643 if (fdp->fd_nfiles > NDFILE) 1644 FREE(fdp->fd_ofiles, M_FILEDESC); 1645 if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE)) 1646 FREE(fdp->fd_map, M_FILEDESC); 1647 1648 fdp->fd_nfiles = 0; 1649 1650 cdir = fdp->fd_cdir; 1651 fdp->fd_cdir = NULL; 1652 rdir = fdp->fd_rdir; 1653 fdp->fd_rdir = NULL; 1654 jdir = fdp->fd_jdir; 1655 fdp->fd_jdir = NULL; 1656 FILEDESC_XUNLOCK(fdp); 1657 1658 if (cdir) { 1659 locked = VFS_LOCK_GIANT(cdir->v_mount); 1660 vrele(cdir); 1661 VFS_UNLOCK_GIANT(locked); 1662 } 1663 if (rdir) { 1664 locked = VFS_LOCK_GIANT(rdir->v_mount); 1665 vrele(rdir); 1666 VFS_UNLOCK_GIANT(locked); 1667 } 1668 if (jdir) { 1669 locked = VFS_LOCK_GIANT(jdir->v_mount); 1670 vrele(jdir); 1671 VFS_UNLOCK_GIANT(locked); 1672 } 1673 1674 fddrop(fdp); 1675 } 1676 1677 /* 1678 * For setugid programs, we don't want to people to use that setugidness 1679 * to generate error messages which write to a file which otherwise would 1680 * otherwise be off-limits to the process. We check for filesystems where 1681 * the vnode can change out from under us after execve (like [lin]procfs). 1682 * 1683 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is 1684 * sufficient. We also don't check for setugidness since we know we are. 1685 */ 1686 static int 1687 is_unsafe(struct file *fp) 1688 { 1689 if (fp->f_type == DTYPE_VNODE) { 1690 struct vnode *vp = fp->f_vnode; 1691 1692 if ((vp->v_vflag & VV_PROCDEP) != 0) 1693 return (1); 1694 } 1695 return (0); 1696 } 1697 1698 /* 1699 * Make this setguid thing safe, if at all possible. 1700 */ 1701 void 1702 setugidsafety(struct thread *td) 1703 { 1704 struct filedesc *fdp; 1705 int i; 1706 1707 /* Certain daemons might not have file descriptors. */ 1708 fdp = td->td_proc->p_fd; 1709 if (fdp == NULL) 1710 return; 1711 1712 /* 1713 * Note: fdp->fd_ofiles may be reallocated out from under us while 1714 * we are blocked in a close. Be careful! 1715 */ 1716 FILEDESC_XLOCK(fdp); 1717 for (i = 0; i <= fdp->fd_lastfile; i++) { 1718 if (i > 2) 1719 break; 1720 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { 1721 struct file *fp; 1722 1723 knote_fdclose(td, i); 1724 /* 1725 * NULL-out descriptor prior to close to avoid 1726 * a race while close blocks. 1727 */ 1728 fp = fdp->fd_ofiles[i]; 1729 fdp->fd_ofiles[i] = NULL; 1730 fdp->fd_ofileflags[i] = 0; 1731 fdunused(fdp, i); 1732 FILEDESC_XUNLOCK(fdp); 1733 (void) closef(fp, td); 1734 FILEDESC_XLOCK(fdp); 1735 } 1736 } 1737 FILEDESC_XUNLOCK(fdp); 1738 } 1739 1740 /* 1741 * If a specific file object occupies a specific file descriptor, close the 1742 * file descriptor entry and drop a reference on the file object. This is a 1743 * convenience function to handle a subsequent error in a function that calls 1744 * falloc() that handles the race that another thread might have closed the 1745 * file descriptor out from under the thread creating the file object. 1746 */ 1747 void 1748 fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td) 1749 { 1750 1751 FILEDESC_XLOCK(fdp); 1752 if (fdp->fd_ofiles[idx] == fp) { 1753 fdp->fd_ofiles[idx] = NULL; 1754 fdunused(fdp, idx); 1755 FILEDESC_XUNLOCK(fdp); 1756 fdrop(fp, td); 1757 } else 1758 FILEDESC_XUNLOCK(fdp); 1759 } 1760 1761 /* 1762 * Close any files on exec? 1763 */ 1764 void 1765 fdcloseexec(struct thread *td) 1766 { 1767 struct filedesc *fdp; 1768 int i; 1769 1770 /* Certain daemons might not have file descriptors. */ 1771 fdp = td->td_proc->p_fd; 1772 if (fdp == NULL) 1773 return; 1774 1775 FILEDESC_XLOCK(fdp); 1776 1777 /* 1778 * We cannot cache fd_ofiles or fd_ofileflags since operations 1779 * may block and rip them out from under us. 1780 */ 1781 for (i = 0; i <= fdp->fd_lastfile; i++) { 1782 if (fdp->fd_ofiles[i] != NULL && 1783 (fdp->fd_ofiles[i]->f_type == DTYPE_MQUEUE || 1784 (fdp->fd_ofileflags[i] & UF_EXCLOSE))) { 1785 struct file *fp; 1786 1787 knote_fdclose(td, i); 1788 /* 1789 * NULL-out descriptor prior to close to avoid 1790 * a race while close blocks. 1791 */ 1792 fp = fdp->fd_ofiles[i]; 1793 fdp->fd_ofiles[i] = NULL; 1794 fdp->fd_ofileflags[i] = 0; 1795 fdunused(fdp, i); 1796 if (fp->f_type == DTYPE_MQUEUE) 1797 mq_fdclose(td, i, fp); 1798 FILEDESC_XUNLOCK(fdp); 1799 (void) closef(fp, td); 1800 FILEDESC_XLOCK(fdp); 1801 } 1802 } 1803 FILEDESC_XUNLOCK(fdp); 1804 } 1805 1806 /* 1807 * It is unsafe for set[ug]id processes to be started with file 1808 * descriptors 0..2 closed, as these descriptors are given implicit 1809 * significance in the Standard C library. fdcheckstd() will create a 1810 * descriptor referencing /dev/null for each of stdin, stdout, and 1811 * stderr that is not already open. 1812 */ 1813 int 1814 fdcheckstd(struct thread *td) 1815 { 1816 struct nameidata nd; 1817 struct filedesc *fdp; 1818 struct file *fp; 1819 register_t retval; 1820 int fd, i, error, flags, devnull; 1821 1822 fdp = td->td_proc->p_fd; 1823 if (fdp == NULL) 1824 return (0); 1825 KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); 1826 devnull = -1; 1827 error = 0; 1828 for (i = 0; i < 3; i++) { 1829 if (fdp->fd_ofiles[i] != NULL) 1830 continue; 1831 if (devnull < 0) { 1832 int vfslocked; 1833 error = falloc(td, &fp, &fd); 1834 if (error != 0) 1835 break; 1836 /* Note extra ref on `fp' held for us by falloc(). */ 1837 KASSERT(fd == i, ("oof, we didn't get our fd")); 1838 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, 1839 "/dev/null", td); 1840 flags = FREAD | FWRITE; 1841 error = vn_open(&nd, &flags, 0, fd); 1842 if (error != 0) { 1843 /* 1844 * Someone may have closed the entry in the 1845 * file descriptor table, so check it hasn't 1846 * changed before dropping the reference 1847 * count. 1848 */ 1849 FILEDESC_XLOCK(fdp); 1850 KASSERT(fdp->fd_ofiles[fd] == fp, 1851 ("table not shared, how did it change?")); 1852 fdp->fd_ofiles[fd] = NULL; 1853 fdunused(fdp, fd); 1854 FILEDESC_XUNLOCK(fdp); 1855 fdrop(fp, td); 1856 fdrop(fp, td); 1857 break; 1858 } 1859 vfslocked = NDHASGIANT(&nd); 1860 NDFREE(&nd, NDF_ONLY_PNBUF); 1861 FILE_LOCK(fp); 1862 fp->f_flag = flags; 1863 fp->f_vnode = nd.ni_vp; 1864 if (fp->f_data == NULL) 1865 fp->f_data = nd.ni_vp; 1866 fp->f_type = DTYPE_VNODE; 1867 if (fp->f_ops == &badfileops) 1868 fp->f_ops = &vnops; 1869 FILE_UNLOCK(fp); 1870 VOP_UNLOCK(nd.ni_vp, 0, td); 1871 VFS_UNLOCK_GIANT(vfslocked); 1872 devnull = fd; 1873 fdrop(fp, td); 1874 } else { 1875 error = do_dup(td, DUP_FIXED, devnull, i, &retval); 1876 if (error != 0) 1877 break; 1878 } 1879 } 1880 return (error); 1881 } 1882 1883 /* 1884 * Internal form of close. Decrement reference count on file structure. 1885 * Note: td may be NULL when closing a file that was being passed in a 1886 * message. 1887 * 1888 * XXXRW: Giant is not required for the caller, but often will be held; this 1889 * makes it moderately likely the Giant will be recursed in the VFS case. 1890 */ 1891 int 1892 closef(struct file *fp, struct thread *td) 1893 { 1894 struct vnode *vp; 1895 struct flock lf; 1896 struct filedesc_to_leader *fdtol; 1897 struct filedesc *fdp; 1898 1899 /* 1900 * POSIX record locking dictates that any close releases ALL 1901 * locks owned by this process. This is handled by setting 1902 * a flag in the unlock to free ONLY locks obeying POSIX 1903 * semantics, and not to free BSD-style file locks. 1904 * If the descriptor was in a message, POSIX-style locks 1905 * aren't passed with the descriptor, and the thread pointer 1906 * will be NULL. Callers should be careful only to pass a 1907 * NULL thread pointer when there really is no owning 1908 * context that might have locks, or the locks will be 1909 * leaked. 1910 */ 1911 if (fp->f_type == DTYPE_VNODE && td != NULL) { 1912 int vfslocked; 1913 1914 vp = fp->f_vnode; 1915 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1916 if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1917 lf.l_whence = SEEK_SET; 1918 lf.l_start = 0; 1919 lf.l_len = 0; 1920 lf.l_type = F_UNLCK; 1921 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, 1922 F_UNLCK, &lf, F_POSIX); 1923 } 1924 fdtol = td->td_proc->p_fdtol; 1925 if (fdtol != NULL) { 1926 /* 1927 * Handle special case where file descriptor table is 1928 * shared between multiple process leaders. 1929 */ 1930 fdp = td->td_proc->p_fd; 1931 FILEDESC_XLOCK(fdp); 1932 for (fdtol = fdtol->fdl_next; 1933 fdtol != td->td_proc->p_fdtol; 1934 fdtol = fdtol->fdl_next) { 1935 if ((fdtol->fdl_leader->p_flag & 1936 P_ADVLOCK) == 0) 1937 continue; 1938 fdtol->fdl_holdcount++; 1939 FILEDESC_XUNLOCK(fdp); 1940 lf.l_whence = SEEK_SET; 1941 lf.l_start = 0; 1942 lf.l_len = 0; 1943 lf.l_type = F_UNLCK; 1944 vp = fp->f_vnode; 1945 (void) VOP_ADVLOCK(vp, 1946 (caddr_t)fdtol->fdl_leader, 1947 F_UNLCK, &lf, F_POSIX); 1948 FILEDESC_XLOCK(fdp); 1949 fdtol->fdl_holdcount--; 1950 if (fdtol->fdl_holdcount == 0 && 1951 fdtol->fdl_wakeup != 0) { 1952 fdtol->fdl_wakeup = 0; 1953 wakeup(fdtol); 1954 } 1955 } 1956 FILEDESC_XUNLOCK(fdp); 1957 } 1958 VFS_UNLOCK_GIANT(vfslocked); 1959 } 1960 return (fdrop(fp, td)); 1961 } 1962 1963 /* 1964 * Extract the file pointer associated with the specified descriptor for the 1965 * current user process. 1966 * 1967 * If the descriptor doesn't exist, EBADF is returned. 1968 * 1969 * If the descriptor exists but doesn't match 'flags' then return EBADF for 1970 * read attempts and EINVAL for write attempts. 1971 * 1972 * If 'hold' is set (non-zero) the file's refcount will be bumped on return. 1973 * It should be dropped with fdrop(). If it is not set, then the refcount 1974 * will not be bumped however the thread's filedesc struct will be returned 1975 * locked (for fgetsock). 1976 * 1977 * If an error occured the non-zero error is returned and *fpp is set to 1978 * NULL. Otherwise *fpp is set and zero is returned. 1979 */ 1980 static __inline int 1981 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold) 1982 { 1983 struct filedesc *fdp; 1984 struct file *fp; 1985 1986 *fpp = NULL; 1987 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) 1988 return (EBADF); 1989 FILEDESC_SLOCK(fdp); 1990 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) { 1991 FILEDESC_SUNLOCK(fdp); 1992 return (EBADF); 1993 } 1994 1995 /* 1996 * FREAD and FWRITE failure return EBADF as per POSIX. 1997 * 1998 * Only one flag, or 0, may be specified. 1999 */ 2000 if (flags == FREAD && (fp->f_flag & FREAD) == 0) { 2001 FILEDESC_SUNLOCK(fdp); 2002 return (EBADF); 2003 } 2004 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) { 2005 FILEDESC_SUNLOCK(fdp); 2006 return (EBADF); 2007 } 2008 if (hold) { 2009 fhold(fp); 2010 FILEDESC_SUNLOCK(fdp); 2011 } 2012 *fpp = fp; 2013 return (0); 2014 } 2015 2016 int 2017 fget(struct thread *td, int fd, struct file **fpp) 2018 { 2019 2020 return(_fget(td, fd, fpp, 0, 1)); 2021 } 2022 2023 int 2024 fget_read(struct thread *td, int fd, struct file **fpp) 2025 { 2026 2027 return(_fget(td, fd, fpp, FREAD, 1)); 2028 } 2029 2030 int 2031 fget_write(struct thread *td, int fd, struct file **fpp) 2032 { 2033 2034 return(_fget(td, fd, fpp, FWRITE, 1)); 2035 } 2036 2037 /* 2038 * Like fget() but loads the underlying vnode, or returns an error if the 2039 * descriptor does not represent a vnode. Note that pipes use vnodes but 2040 * never have VM objects. The returned vnode will be vref()'d. 2041 * 2042 * XXX: what about the unused flags ? 2043 */ 2044 static __inline int 2045 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags) 2046 { 2047 struct file *fp; 2048 int error; 2049 2050 *vpp = NULL; 2051 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2052 return (error); 2053 if (fp->f_vnode == NULL) { 2054 error = EINVAL; 2055 } else { 2056 *vpp = fp->f_vnode; 2057 vref(*vpp); 2058 } 2059 FILEDESC_SUNLOCK(td->td_proc->p_fd); 2060 return (error); 2061 } 2062 2063 int 2064 fgetvp(struct thread *td, int fd, struct vnode **vpp) 2065 { 2066 2067 return (_fgetvp(td, fd, vpp, 0)); 2068 } 2069 2070 int 2071 fgetvp_read(struct thread *td, int fd, struct vnode **vpp) 2072 { 2073 2074 return (_fgetvp(td, fd, vpp, FREAD)); 2075 } 2076 2077 #ifdef notyet 2078 int 2079 fgetvp_write(struct thread *td, int fd, struct vnode **vpp) 2080 { 2081 2082 return (_fgetvp(td, fd, vpp, FWRITE)); 2083 } 2084 #endif 2085 2086 /* 2087 * Like fget() but loads the underlying socket, or returns an error if the 2088 * descriptor does not represent a socket. 2089 * 2090 * We bump the ref count on the returned socket. XXX Also obtain the SX lock 2091 * in the future. 2092 * 2093 * XXXRW: fgetsock() and fputsock() are deprecated, as consumers should rely 2094 * on their file descriptor reference to prevent the socket from being free'd 2095 * during use. 2096 */ 2097 int 2098 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp) 2099 { 2100 struct file *fp; 2101 int error; 2102 2103 NET_ASSERT_GIANT(); 2104 2105 *spp = NULL; 2106 if (fflagp != NULL) 2107 *fflagp = 0; 2108 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2109 return (error); 2110 if (fp->f_type != DTYPE_SOCKET) { 2111 error = ENOTSOCK; 2112 } else { 2113 *spp = fp->f_data; 2114 if (fflagp) 2115 *fflagp = fp->f_flag; 2116 SOCK_LOCK(*spp); 2117 soref(*spp); 2118 SOCK_UNLOCK(*spp); 2119 } 2120 FILEDESC_SUNLOCK(td->td_proc->p_fd); 2121 return (error); 2122 } 2123 2124 /* 2125 * Drop the reference count on the socket and XXX release the SX lock in the 2126 * future. The last reference closes the socket. 2127 * 2128 * XXXRW: fputsock() is deprecated, see comment for fgetsock(). 2129 */ 2130 void 2131 fputsock(struct socket *so) 2132 { 2133 2134 NET_ASSERT_GIANT(); 2135 ACCEPT_LOCK(); 2136 SOCK_LOCK(so); 2137 sorele(so); 2138 } 2139 2140 int 2141 fdrop(struct file *fp, struct thread *td) 2142 { 2143 2144 FILE_LOCK(fp); 2145 return (fdrop_locked(fp, td)); 2146 } 2147 2148 /* 2149 * Drop reference on struct file passed in, may call closef if the 2150 * reference hits zero. 2151 * Expects struct file locked, and will unlock it. 2152 */ 2153 static int 2154 fdrop_locked(struct file *fp, struct thread *td) 2155 { 2156 int error; 2157 2158 FILE_LOCK_ASSERT(fp, MA_OWNED); 2159 2160 if (--fp->f_count > 0) { 2161 FILE_UNLOCK(fp); 2162 return (0); 2163 } 2164 2165 /* 2166 * We might have just dropped the last reference to a file 2167 * object that is for a UNIX domain socket whose message 2168 * buffers are being examined in unp_gc(). If that is the 2169 * case, FWAIT will be set in f_gcflag and we need to wait for 2170 * unp_gc() to finish its scan. 2171 */ 2172 while (fp->f_gcflag & FWAIT) 2173 msleep(&fp->f_gcflag, fp->f_mtxp, 0, "fpdrop", 0); 2174 2175 /* We have the last ref so we can proceed without the file lock. */ 2176 FILE_UNLOCK(fp); 2177 if (fp->f_count < 0) 2178 panic("fdrop: count < 0"); 2179 if (fp->f_ops != &badfileops) 2180 error = fo_close(fp, td); 2181 else 2182 error = 0; 2183 2184 sx_xlock(&filelist_lock); 2185 LIST_REMOVE(fp, f_list); 2186 openfiles--; 2187 sx_xunlock(&filelist_lock); 2188 crfree(fp->f_cred); 2189 uma_zfree(file_zone, fp); 2190 2191 return (error); 2192 } 2193 2194 /* 2195 * Apply an advisory lock on a file descriptor. 2196 * 2197 * Just attempt to get a record lock of the requested type on the entire file 2198 * (l_whence = SEEK_SET, l_start = 0, l_len = 0). 2199 */ 2200 #ifndef _SYS_SYSPROTO_H_ 2201 struct flock_args { 2202 int fd; 2203 int how; 2204 }; 2205 #endif 2206 /* ARGSUSED */ 2207 int 2208 flock(struct thread *td, struct flock_args *uap) 2209 { 2210 struct file *fp; 2211 struct vnode *vp; 2212 struct flock lf; 2213 int error; 2214 2215 if ((error = fget(td, uap->fd, &fp)) != 0) 2216 return (error); 2217 if (fp->f_type != DTYPE_VNODE) { 2218 fdrop(fp, td); 2219 return (EOPNOTSUPP); 2220 } 2221 2222 mtx_lock(&Giant); 2223 vp = fp->f_vnode; 2224 lf.l_whence = SEEK_SET; 2225 lf.l_start = 0; 2226 lf.l_len = 0; 2227 if (uap->how & LOCK_UN) { 2228 lf.l_type = F_UNLCK; 2229 FILE_LOCK(fp); 2230 fp->f_flag &= ~FHASLOCK; 2231 FILE_UNLOCK(fp); 2232 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 2233 goto done2; 2234 } 2235 if (uap->how & LOCK_EX) 2236 lf.l_type = F_WRLCK; 2237 else if (uap->how & LOCK_SH) 2238 lf.l_type = F_RDLCK; 2239 else { 2240 error = EBADF; 2241 goto done2; 2242 } 2243 FILE_LOCK(fp); 2244 fp->f_flag |= FHASLOCK; 2245 FILE_UNLOCK(fp); 2246 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 2247 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); 2248 done2: 2249 fdrop(fp, td); 2250 mtx_unlock(&Giant); 2251 return (error); 2252 } 2253 /* 2254 * Duplicate the specified descriptor to a free descriptor. 2255 */ 2256 int 2257 dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error) 2258 { 2259 struct file *wfp; 2260 struct file *fp; 2261 2262 /* 2263 * If the to-be-dup'd fd number is greater than the allowed number 2264 * of file descriptors, or the fd to be dup'd has already been 2265 * closed, then reject. 2266 */ 2267 FILEDESC_XLOCK(fdp); 2268 if (dfd < 0 || dfd >= fdp->fd_nfiles || 2269 (wfp = fdp->fd_ofiles[dfd]) == NULL) { 2270 FILEDESC_XUNLOCK(fdp); 2271 return (EBADF); 2272 } 2273 2274 /* 2275 * There are two cases of interest here. 2276 * 2277 * For ENODEV simply dup (dfd) to file descriptor (indx) and return. 2278 * 2279 * For ENXIO steal away the file structure from (dfd) and store it in 2280 * (indx). (dfd) is effectively closed by this operation. 2281 * 2282 * Any other error code is just returned. 2283 */ 2284 switch (error) { 2285 case ENODEV: 2286 /* 2287 * Check that the mode the file is being opened for is a 2288 * subset of the mode of the existing descriptor. 2289 */ 2290 FILE_LOCK(wfp); 2291 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { 2292 FILE_UNLOCK(wfp); 2293 FILEDESC_XUNLOCK(fdp); 2294 return (EACCES); 2295 } 2296 fp = fdp->fd_ofiles[indx]; 2297 fdp->fd_ofiles[indx] = wfp; 2298 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2299 if (fp == NULL) 2300 fdused(fdp, indx); 2301 fhold_locked(wfp); 2302 FILE_UNLOCK(wfp); 2303 FILEDESC_XUNLOCK(fdp); 2304 if (fp != NULL) 2305 /* 2306 * We now own the reference to fp that the ofiles[] 2307 * array used to own. Release it. 2308 */ 2309 fdrop(fp, td); 2310 return (0); 2311 2312 case ENXIO: 2313 /* 2314 * Steal away the file pointer from dfd and stuff it into indx. 2315 */ 2316 fp = fdp->fd_ofiles[indx]; 2317 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 2318 fdp->fd_ofiles[dfd] = NULL; 2319 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2320 fdp->fd_ofileflags[dfd] = 0; 2321 fdunused(fdp, dfd); 2322 if (fp == NULL) 2323 fdused(fdp, indx); 2324 FILEDESC_XUNLOCK(fdp); 2325 2326 /* 2327 * We now own the reference to fp that the ofiles[] array 2328 * used to own. Release it. 2329 */ 2330 if (fp != NULL) 2331 fdrop(fp, td); 2332 return (0); 2333 2334 default: 2335 FILEDESC_XUNLOCK(fdp); 2336 return (error); 2337 } 2338 /* NOTREACHED */ 2339 } 2340 2341 /* 2342 * Scan all active processes to see if any of them have a current or root 2343 * directory of `olddp'. If so, replace them with the new mount point. 2344 */ 2345 void 2346 mountcheckdirs(struct vnode *olddp, struct vnode *newdp) 2347 { 2348 struct filedesc *fdp; 2349 struct proc *p; 2350 int nrele; 2351 2352 if (vrefcnt(olddp) == 1) 2353 return; 2354 sx_slock(&allproc_lock); 2355 FOREACH_PROC_IN_SYSTEM(p) { 2356 fdp = fdhold(p); 2357 if (fdp == NULL) 2358 continue; 2359 nrele = 0; 2360 FILEDESC_XLOCK(fdp); 2361 if (fdp->fd_cdir == olddp) { 2362 vref(newdp); 2363 fdp->fd_cdir = newdp; 2364 nrele++; 2365 } 2366 if (fdp->fd_rdir == olddp) { 2367 vref(newdp); 2368 fdp->fd_rdir = newdp; 2369 nrele++; 2370 } 2371 FILEDESC_XUNLOCK(fdp); 2372 fddrop(fdp); 2373 while (nrele--) 2374 vrele(olddp); 2375 } 2376 sx_sunlock(&allproc_lock); 2377 if (rootvnode == olddp) { 2378 vrele(rootvnode); 2379 vref(newdp); 2380 rootvnode = newdp; 2381 } 2382 } 2383 2384 struct filedesc_to_leader * 2385 filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader) 2386 { 2387 struct filedesc_to_leader *fdtol; 2388 2389 MALLOC(fdtol, struct filedesc_to_leader *, 2390 sizeof(struct filedesc_to_leader), 2391 M_FILEDESC_TO_LEADER, 2392 M_WAITOK); 2393 fdtol->fdl_refcount = 1; 2394 fdtol->fdl_holdcount = 0; 2395 fdtol->fdl_wakeup = 0; 2396 fdtol->fdl_leader = leader; 2397 if (old != NULL) { 2398 FILEDESC_XLOCK(fdp); 2399 fdtol->fdl_next = old->fdl_next; 2400 fdtol->fdl_prev = old; 2401 old->fdl_next = fdtol; 2402 fdtol->fdl_next->fdl_prev = fdtol; 2403 FILEDESC_XUNLOCK(fdp); 2404 } else { 2405 fdtol->fdl_next = fdtol; 2406 fdtol->fdl_prev = fdtol; 2407 } 2408 return (fdtol); 2409 } 2410 2411 /* 2412 * Get file structures. 2413 */ 2414 static int 2415 sysctl_kern_file(SYSCTL_HANDLER_ARGS) 2416 { 2417 struct xfile xf; 2418 struct filedesc *fdp; 2419 struct file *fp; 2420 struct proc *p; 2421 int error, n; 2422 2423 /* 2424 * Note: because the number of file descriptors is calculated 2425 * in different ways for sizing vs returning the data, 2426 * there is information leakage from the first loop. However, 2427 * it is of a similar order of magnitude to the leakage from 2428 * global system statistics such as kern.openfiles. 2429 */ 2430 error = sysctl_wire_old_buffer(req, 0); 2431 if (error != 0) 2432 return (error); 2433 if (req->oldptr == NULL) { 2434 n = 16; /* A slight overestimate. */ 2435 sx_slock(&filelist_lock); 2436 LIST_FOREACH(fp, &filehead, f_list) { 2437 /* 2438 * We should grab the lock, but this is an 2439 * estimate, so does it really matter? 2440 */ 2441 /* mtx_lock(fp->f_mtxp); */ 2442 n += fp->f_count; 2443 /* mtx_unlock(f->f_mtxp); */ 2444 } 2445 sx_sunlock(&filelist_lock); 2446 return (SYSCTL_OUT(req, 0, n * sizeof(xf))); 2447 } 2448 error = 0; 2449 bzero(&xf, sizeof(xf)); 2450 xf.xf_size = sizeof(xf); 2451 sx_slock(&allproc_lock); 2452 FOREACH_PROC_IN_SYSTEM(p) { 2453 if (p->p_state == PRS_NEW) 2454 continue; 2455 PROC_LOCK(p); 2456 if (p_cansee(req->td, p) != 0) { 2457 PROC_UNLOCK(p); 2458 continue; 2459 } 2460 xf.xf_pid = p->p_pid; 2461 xf.xf_uid = p->p_ucred->cr_uid; 2462 PROC_UNLOCK(p); 2463 fdp = fdhold(p); 2464 if (fdp == NULL) 2465 continue; 2466 FILEDESC_SLOCK(fdp); 2467 for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) { 2468 if ((fp = fdp->fd_ofiles[n]) == NULL) 2469 continue; 2470 xf.xf_fd = n; 2471 xf.xf_file = fp; 2472 xf.xf_data = fp->f_data; 2473 xf.xf_vnode = fp->f_vnode; 2474 xf.xf_type = fp->f_type; 2475 xf.xf_count = fp->f_count; 2476 xf.xf_msgcount = fp->f_msgcount; 2477 xf.xf_offset = fp->f_offset; 2478 xf.xf_flag = fp->f_flag; 2479 error = SYSCTL_OUT(req, &xf, sizeof(xf)); 2480 if (error) 2481 break; 2482 } 2483 FILEDESC_SUNLOCK(fdp); 2484 fddrop(fdp); 2485 if (error) 2486 break; 2487 } 2488 sx_sunlock(&allproc_lock); 2489 return (error); 2490 } 2491 2492 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, 2493 0, 0, sysctl_kern_file, "S,xfile", "Entire file table"); 2494 2495 #ifdef DDB 2496 /* 2497 * For the purposes of debugging, generate a human-readable string for the 2498 * file type. 2499 */ 2500 static const char * 2501 file_type_to_name(short type) 2502 { 2503 2504 switch (type) { 2505 case 0: 2506 return ("zero"); 2507 case DTYPE_VNODE: 2508 return ("vnod"); 2509 case DTYPE_SOCKET: 2510 return ("sock"); 2511 case DTYPE_PIPE: 2512 return ("pipe"); 2513 case DTYPE_FIFO: 2514 return ("fifo"); 2515 case DTYPE_KQUEUE: 2516 return ("kque"); 2517 case DTYPE_CRYPTO: 2518 return ("crpt"); 2519 case DTYPE_MQUEUE: 2520 return ("mque"); 2521 default: 2522 return ("unkn"); 2523 } 2524 } 2525 2526 /* 2527 * For the purposes of debugging, identify a process (if any, perhaps one of 2528 * many) that references the passed file in its file descriptor array. Return 2529 * NULL if none. 2530 */ 2531 static struct proc * 2532 file_to_first_proc(struct file *fp) 2533 { 2534 struct filedesc *fdp; 2535 struct proc *p; 2536 int n; 2537 2538 FOREACH_PROC_IN_SYSTEM(p) { 2539 if (p->p_state == PRS_NEW) 2540 continue; 2541 fdp = p->p_fd; 2542 if (fdp == NULL) 2543 continue; 2544 for (n = 0; n < fdp->fd_nfiles; n++) { 2545 if (fp == fdp->fd_ofiles[n]) 2546 return (p); 2547 } 2548 } 2549 return (NULL); 2550 } 2551 2552 static void 2553 db_print_file(struct file *fp, int header) 2554 { 2555 struct proc *p; 2556 2557 if (header) 2558 db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n", 2559 "File", "Type", "Data", "Flag", "GCFl", "Count", 2560 "MCount", "Vnode", "FPID", "FCmd"); 2561 p = file_to_first_proc(fp); 2562 db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp, 2563 file_type_to_name(fp->f_type), fp->f_data, fp->f_flag, 2564 fp->f_gcflag, fp->f_count, fp->f_msgcount, fp->f_vnode, 2565 p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-"); 2566 } 2567 2568 DB_SHOW_COMMAND(file, db_show_file) 2569 { 2570 struct file *fp; 2571 2572 if (!have_addr) { 2573 db_printf("usage: show file <addr>\n"); 2574 return; 2575 } 2576 fp = (struct file *)addr; 2577 db_print_file(fp, 1); 2578 } 2579 2580 DB_SHOW_COMMAND(files, db_show_files) 2581 { 2582 struct file *fp; 2583 int header; 2584 2585 header = 1; 2586 LIST_FOREACH(fp, &filehead, f_list) { 2587 db_print_file(fp, header); 2588 header = 0; 2589 } 2590 } 2591 #endif 2592 2593 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, 2594 &maxfilesperproc, 0, "Maximum files allowed open per process"); 2595 2596 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, 2597 &maxfiles, 0, "Maximum number of files"); 2598 2599 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, 2600 &openfiles, 0, "System-wide number of open files"); 2601 2602 /* ARGSUSED*/ 2603 static void 2604 filelistinit(void *dummy) 2605 { 2606 2607 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, 2608 NULL, NULL, UMA_ALIGN_PTR, 0); 2609 sx_init(&filelist_lock, "filelist lock"); 2610 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); 2611 mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF); 2612 } 2613 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL) 2614 2615 /*-------------------------------------------------------------------*/ 2616 2617 static int 2618 badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) 2619 { 2620 2621 return (EBADF); 2622 } 2623 2624 static int 2625 badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td) 2626 { 2627 2628 return (EBADF); 2629 } 2630 2631 static int 2632 badfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) 2633 { 2634 2635 return (0); 2636 } 2637 2638 static int 2639 badfo_kqfilter(struct file *fp, struct knote *kn) 2640 { 2641 2642 return (EBADF); 2643 } 2644 2645 static int 2646 badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td) 2647 { 2648 2649 return (EBADF); 2650 } 2651 2652 static int 2653 badfo_close(struct file *fp, struct thread *td) 2654 { 2655 2656 return (EBADF); 2657 } 2658 2659 struct fileops badfileops = { 2660 .fo_read = badfo_readwrite, 2661 .fo_write = badfo_readwrite, 2662 .fo_ioctl = badfo_ioctl, 2663 .fo_poll = badfo_poll, 2664 .fo_kqfilter = badfo_kqfilter, 2665 .fo_stat = badfo_stat, 2666 .fo_close = badfo_close, 2667 }; 2668 2669 2670 /*-------------------------------------------------------------------*/ 2671 2672 /* 2673 * File Descriptor pseudo-device driver (/dev/fd/). 2674 * 2675 * Opening minor device N dup()s the file (if any) connected to file 2676 * descriptor N belonging to the calling process. Note that this driver 2677 * consists of only the ``open()'' routine, because all subsequent 2678 * references to this file will be direct to the other driver. 2679 * 2680 * XXX: we could give this one a cloning event handler if necessary. 2681 */ 2682 2683 /* ARGSUSED */ 2684 static int 2685 fdopen(struct cdev *dev, int mode, int type, struct thread *td) 2686 { 2687 2688 /* 2689 * XXX Kludge: set curthread->td_dupfd to contain the value of the 2690 * the file descriptor being sought for duplication. The error 2691 * return ensures that the vnode for this device will be released 2692 * by vn_open. Open will detect this special error and take the 2693 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 2694 * will simply report the error. 2695 */ 2696 td->td_dupfd = dev2unit(dev); 2697 return (ENODEV); 2698 } 2699 2700 static struct cdevsw fildesc_cdevsw = { 2701 .d_version = D_VERSION, 2702 .d_flags = D_NEEDGIANT, 2703 .d_open = fdopen, 2704 .d_name = "FD", 2705 }; 2706 2707 static void 2708 fildesc_drvinit(void *unused) 2709 { 2710 struct cdev *dev; 2711 2712 dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0"); 2713 make_dev_alias(dev, "stdin"); 2714 dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1"); 2715 make_dev_alias(dev, "stdout"); 2716 dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2"); 2717 make_dev_alias(dev, "stderr"); 2718 } 2719 2720 SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL) 2721