1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_compat.h" 41 #include "opt_ddb.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 47 #include <sys/conf.h> 48 #include <sys/domain.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/filedesc.h> 52 #include <sys/filio.h> 53 #include <sys/jail.h> 54 #include <sys/kernel.h> 55 #include <sys/limits.h> 56 #include <sys/lock.h> 57 #include <sys/malloc.h> 58 #include <sys/mount.h> 59 #include <sys/mqueue.h> 60 #include <sys/mutex.h> 61 #include <sys/namei.h> 62 #include <sys/priv.h> 63 #include <sys/proc.h> 64 #include <sys/protosw.h> 65 #include <sys/resourcevar.h> 66 #include <sys/signalvar.h> 67 #include <sys/socketvar.h> 68 #include <sys/stat.h> 69 #include <sys/sx.h> 70 #include <sys/syscallsubr.h> 71 #include <sys/sysctl.h> 72 #include <sys/sysproto.h> 73 #include <sys/unistd.h> 74 #include <sys/user.h> 75 #include <sys/vnode.h> 76 #ifdef KTRACE 77 #include <sys/ktrace.h> 78 #endif 79 80 #include <security/audit/audit.h> 81 82 #include <vm/uma.h> 83 84 #include <ddb/ddb.h> 85 86 static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table"); 87 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader", 88 "file desc to leader structures"); 89 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); 90 91 static uma_zone_t file_zone; 92 93 94 /* Flags for do_dup() */ 95 #define DUP_FIXED 0x1 /* Force fixed allocation */ 96 #define DUP_FCNTL 0x2 /* fcntl()-style errors */ 97 98 static int do_dup(struct thread *td, int flags, int old, int new, 99 register_t *retval); 100 static int fd_first_free(struct filedesc *, int, int); 101 static int fd_last_used(struct filedesc *, int, int); 102 static void fdgrowtable(struct filedesc *, int); 103 static void fdunused(struct filedesc *fdp, int fd); 104 static void fdused(struct filedesc *fdp, int fd); 105 106 /* 107 * A process is initially started out with NDFILE descriptors stored within 108 * this structure, selected to be enough for typical applications based on 109 * the historical limit of 20 open files (and the usage of descriptors by 110 * shells). If these descriptors are exhausted, a larger descriptor table 111 * may be allocated, up to a process' resource limit; the internal arrays 112 * are then unused. 113 */ 114 #define NDFILE 20 115 #define NDSLOTSIZE sizeof(NDSLOTTYPE) 116 #define NDENTRIES (NDSLOTSIZE * __CHAR_BIT) 117 #define NDSLOT(x) ((x) / NDENTRIES) 118 #define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES)) 119 #define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES) 120 121 /* 122 * Storage required per open file descriptor. 123 */ 124 #define OFILESIZE (sizeof(struct file *) + sizeof(char)) 125 126 /* 127 * Basic allocation of descriptors: 128 * one of the above, plus arrays for NDFILE descriptors. 129 */ 130 struct filedesc0 { 131 struct filedesc fd_fd; 132 /* 133 * These arrays are used when the number of open files is 134 * <= NDFILE, and are then pointed to by the pointers above. 135 */ 136 struct file *fd_dfiles[NDFILE]; 137 char fd_dfileflags[NDFILE]; 138 NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; 139 }; 140 141 /* 142 * Descriptor management. 143 */ 144 volatile int openfiles; /* actual number of open files */ 145 struct mtx sigio_lock; /* mtx to protect pointers to sigio */ 146 void (*mq_fdclose)(struct thread *td, int fd, struct file *fp); 147 148 /* A mutex to protect the association between a proc and filedesc. */ 149 static struct mtx fdesc_mtx; 150 151 /* 152 * Find the first zero bit in the given bitmap, starting at low and not 153 * exceeding size - 1. 154 */ 155 static int 156 fd_first_free(struct filedesc *fdp, int low, int size) 157 { 158 NDSLOTTYPE *map = fdp->fd_map; 159 NDSLOTTYPE mask; 160 int off, maxoff; 161 162 if (low >= size) 163 return (low); 164 165 off = NDSLOT(low); 166 if (low % NDENTRIES) { 167 mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES))); 168 if ((mask &= ~map[off]) != 0UL) 169 return (off * NDENTRIES + ffsl(mask) - 1); 170 ++off; 171 } 172 for (maxoff = NDSLOTS(size); off < maxoff; ++off) 173 if (map[off] != ~0UL) 174 return (off * NDENTRIES + ffsl(~map[off]) - 1); 175 return (size); 176 } 177 178 /* 179 * Find the highest non-zero bit in the given bitmap, starting at low and 180 * not exceeding size - 1. 181 */ 182 static int 183 fd_last_used(struct filedesc *fdp, int low, int size) 184 { 185 NDSLOTTYPE *map = fdp->fd_map; 186 NDSLOTTYPE mask; 187 int off, minoff; 188 189 if (low >= size) 190 return (-1); 191 192 off = NDSLOT(size); 193 if (size % NDENTRIES) { 194 mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES)); 195 if ((mask &= map[off]) != 0) 196 return (off * NDENTRIES + flsl(mask) - 1); 197 --off; 198 } 199 for (minoff = NDSLOT(low); off >= minoff; --off) 200 if (map[off] != 0) 201 return (off * NDENTRIES + flsl(map[off]) - 1); 202 return (low - 1); 203 } 204 205 static int 206 fdisused(struct filedesc *fdp, int fd) 207 { 208 KASSERT(fd >= 0 && fd < fdp->fd_nfiles, 209 ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles)); 210 return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0); 211 } 212 213 /* 214 * Mark a file descriptor as used. 215 */ 216 static void 217 fdused(struct filedesc *fdp, int fd) 218 { 219 220 FILEDESC_XLOCK_ASSERT(fdp); 221 KASSERT(!fdisused(fdp, fd), 222 ("fd already used")); 223 224 fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd); 225 if (fd > fdp->fd_lastfile) 226 fdp->fd_lastfile = fd; 227 if (fd == fdp->fd_freefile) 228 fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles); 229 } 230 231 /* 232 * Mark a file descriptor as unused. 233 */ 234 static void 235 fdunused(struct filedesc *fdp, int fd) 236 { 237 238 FILEDESC_XLOCK_ASSERT(fdp); 239 KASSERT(fdisused(fdp, fd), 240 ("fd is already unused")); 241 KASSERT(fdp->fd_ofiles[fd] == NULL, 242 ("fd is still in use")); 243 244 fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); 245 if (fd < fdp->fd_freefile) 246 fdp->fd_freefile = fd; 247 if (fd == fdp->fd_lastfile) 248 fdp->fd_lastfile = fd_last_used(fdp, 0, fd); 249 } 250 251 /* 252 * System calls on descriptors. 253 */ 254 #ifndef _SYS_SYSPROTO_H_ 255 struct getdtablesize_args { 256 int dummy; 257 }; 258 #endif 259 /* ARGSUSED */ 260 int 261 getdtablesize(struct thread *td, struct getdtablesize_args *uap) 262 { 263 struct proc *p = td->td_proc; 264 265 PROC_LOCK(p); 266 td->td_retval[0] = 267 min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 268 PROC_UNLOCK(p); 269 return (0); 270 } 271 272 /* 273 * Duplicate a file descriptor to a particular value. 274 * 275 * Note: keep in mind that a potential race condition exists when closing 276 * descriptors from a shared descriptor table (via rfork). 277 */ 278 #ifndef _SYS_SYSPROTO_H_ 279 struct dup2_args { 280 u_int from; 281 u_int to; 282 }; 283 #endif 284 /* ARGSUSED */ 285 int 286 dup2(struct thread *td, struct dup2_args *uap) 287 { 288 289 return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to, 290 td->td_retval)); 291 } 292 293 /* 294 * Duplicate a file descriptor. 295 */ 296 #ifndef _SYS_SYSPROTO_H_ 297 struct dup_args { 298 u_int fd; 299 }; 300 #endif 301 /* ARGSUSED */ 302 int 303 dup(struct thread *td, struct dup_args *uap) 304 { 305 306 return (do_dup(td, 0, (int)uap->fd, 0, td->td_retval)); 307 } 308 309 /* 310 * The file control system call. 311 */ 312 #ifndef _SYS_SYSPROTO_H_ 313 struct fcntl_args { 314 int fd; 315 int cmd; 316 long arg; 317 }; 318 #endif 319 /* ARGSUSED */ 320 int 321 fcntl(struct thread *td, struct fcntl_args *uap) 322 { 323 struct flock fl; 324 struct oflock ofl; 325 intptr_t arg; 326 int error; 327 int cmd; 328 329 error = 0; 330 cmd = uap->cmd; 331 switch (uap->cmd) { 332 case F_OGETLK: 333 case F_OSETLK: 334 case F_OSETLKW: 335 /* 336 * Convert old flock structure to new. 337 */ 338 error = copyin((void *)(intptr_t)uap->arg, &ofl, sizeof(ofl)); 339 fl.l_start = ofl.l_start; 340 fl.l_len = ofl.l_len; 341 fl.l_pid = ofl.l_pid; 342 fl.l_type = ofl.l_type; 343 fl.l_whence = ofl.l_whence; 344 fl.l_sysid = 0; 345 346 switch (uap->cmd) { 347 case F_OGETLK: 348 cmd = F_GETLK; 349 break; 350 case F_OSETLK: 351 cmd = F_SETLK; 352 break; 353 case F_OSETLKW: 354 cmd = F_SETLKW; 355 break; 356 } 357 arg = (intptr_t)&fl; 358 break; 359 case F_GETLK: 360 case F_SETLK: 361 case F_SETLKW: 362 case F_SETLK_REMOTE: 363 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl)); 364 arg = (intptr_t)&fl; 365 break; 366 default: 367 arg = uap->arg; 368 break; 369 } 370 if (error) 371 return (error); 372 error = kern_fcntl(td, uap->fd, cmd, arg); 373 if (error) 374 return (error); 375 if (uap->cmd == F_OGETLK) { 376 ofl.l_start = fl.l_start; 377 ofl.l_len = fl.l_len; 378 ofl.l_pid = fl.l_pid; 379 ofl.l_type = fl.l_type; 380 ofl.l_whence = fl.l_whence; 381 error = copyout(&ofl, (void *)(intptr_t)uap->arg, sizeof(ofl)); 382 } else if (uap->cmd == F_GETLK) { 383 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl)); 384 } 385 return (error); 386 } 387 388 static inline struct file * 389 fdtofp(int fd, struct filedesc *fdp) 390 { 391 struct file *fp; 392 393 FILEDESC_LOCK_ASSERT(fdp); 394 if ((unsigned)fd >= fdp->fd_nfiles || 395 (fp = fdp->fd_ofiles[fd]) == NULL) 396 return (NULL); 397 return (fp); 398 } 399 400 int 401 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) 402 { 403 struct filedesc *fdp; 404 struct flock *flp; 405 struct file *fp; 406 struct proc *p; 407 char *pop; 408 struct vnode *vp; 409 int error, flg, tmp; 410 int vfslocked; 411 412 vfslocked = 0; 413 error = 0; 414 flg = F_POSIX; 415 p = td->td_proc; 416 fdp = p->p_fd; 417 418 switch (cmd) { 419 case F_DUPFD: 420 tmp = arg; 421 error = do_dup(td, DUP_FCNTL, fd, tmp, td->td_retval); 422 break; 423 424 case F_DUP2FD: 425 tmp = arg; 426 error = do_dup(td, DUP_FIXED, fd, tmp, td->td_retval); 427 break; 428 429 case F_GETFD: 430 FILEDESC_SLOCK(fdp); 431 if ((fp = fdtofp(fd, fdp)) == NULL) { 432 FILEDESC_SUNLOCK(fdp); 433 error = EBADF; 434 break; 435 } 436 pop = &fdp->fd_ofileflags[fd]; 437 td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; 438 FILEDESC_SUNLOCK(fdp); 439 break; 440 441 case F_SETFD: 442 FILEDESC_XLOCK(fdp); 443 if ((fp = fdtofp(fd, fdp)) == NULL) { 444 FILEDESC_XUNLOCK(fdp); 445 error = EBADF; 446 break; 447 } 448 pop = &fdp->fd_ofileflags[fd]; 449 *pop = (*pop &~ UF_EXCLOSE) | 450 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); 451 FILEDESC_XUNLOCK(fdp); 452 break; 453 454 case F_GETFL: 455 FILEDESC_SLOCK(fdp); 456 if ((fp = fdtofp(fd, fdp)) == NULL) { 457 FILEDESC_SUNLOCK(fdp); 458 error = EBADF; 459 break; 460 } 461 td->td_retval[0] = OFLAGS(fp->f_flag); 462 FILEDESC_SUNLOCK(fdp); 463 break; 464 465 case F_SETFL: 466 FILEDESC_SLOCK(fdp); 467 if ((fp = fdtofp(fd, fdp)) == NULL) { 468 FILEDESC_SUNLOCK(fdp); 469 error = EBADF; 470 break; 471 } 472 fhold(fp); 473 FILEDESC_SUNLOCK(fdp); 474 do { 475 tmp = flg = fp->f_flag; 476 tmp &= ~FCNTLFLAGS; 477 tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; 478 } while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0); 479 tmp = fp->f_flag & FNONBLOCK; 480 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 481 if (error) { 482 fdrop(fp, td); 483 break; 484 } 485 tmp = fp->f_flag & FASYNC; 486 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); 487 if (error == 0) { 488 fdrop(fp, td); 489 break; 490 } 491 atomic_clear_int(&fp->f_flag, FNONBLOCK); 492 tmp = 0; 493 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 494 fdrop(fp, td); 495 break; 496 497 case F_GETOWN: 498 FILEDESC_SLOCK(fdp); 499 if ((fp = fdtofp(fd, fdp)) == NULL) { 500 FILEDESC_SUNLOCK(fdp); 501 error = EBADF; 502 break; 503 } 504 fhold(fp); 505 FILEDESC_SUNLOCK(fdp); 506 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); 507 if (error == 0) 508 td->td_retval[0] = tmp; 509 fdrop(fp, td); 510 break; 511 512 case F_SETOWN: 513 FILEDESC_SLOCK(fdp); 514 if ((fp = fdtofp(fd, fdp)) == NULL) { 515 FILEDESC_SUNLOCK(fdp); 516 error = EBADF; 517 break; 518 } 519 fhold(fp); 520 FILEDESC_SUNLOCK(fdp); 521 tmp = arg; 522 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); 523 fdrop(fp, td); 524 break; 525 526 case F_SETLK_REMOTE: 527 error = priv_check(td, PRIV_NFS_LOCKD); 528 if (error) 529 return (error); 530 flg = F_REMOTE; 531 goto do_setlk; 532 533 case F_SETLKW: 534 flg |= F_WAIT; 535 /* FALLTHROUGH F_SETLK */ 536 537 case F_SETLK: 538 do_setlk: 539 FILEDESC_SLOCK(fdp); 540 if ((fp = fdtofp(fd, fdp)) == NULL) { 541 FILEDESC_SUNLOCK(fdp); 542 error = EBADF; 543 break; 544 } 545 if (fp->f_type != DTYPE_VNODE) { 546 FILEDESC_SUNLOCK(fdp); 547 error = EBADF; 548 break; 549 } 550 flp = (struct flock *)arg; 551 if (flp->l_whence == SEEK_CUR) { 552 if (fp->f_offset < 0 || 553 (flp->l_start > 0 && 554 fp->f_offset > OFF_MAX - flp->l_start)) { 555 FILEDESC_SUNLOCK(fdp); 556 error = EOVERFLOW; 557 break; 558 } 559 flp->l_start += fp->f_offset; 560 } 561 562 /* 563 * VOP_ADVLOCK() may block. 564 */ 565 fhold(fp); 566 FILEDESC_SUNLOCK(fdp); 567 vp = fp->f_vnode; 568 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 569 switch (flp->l_type) { 570 case F_RDLCK: 571 if ((fp->f_flag & FREAD) == 0) { 572 error = EBADF; 573 break; 574 } 575 PROC_LOCK(p->p_leader); 576 p->p_leader->p_flag |= P_ADVLOCK; 577 PROC_UNLOCK(p->p_leader); 578 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 579 flp, flg); 580 break; 581 case F_WRLCK: 582 if ((fp->f_flag & FWRITE) == 0) { 583 error = EBADF; 584 break; 585 } 586 PROC_LOCK(p->p_leader); 587 p->p_leader->p_flag |= P_ADVLOCK; 588 PROC_UNLOCK(p->p_leader); 589 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 590 flp, flg); 591 break; 592 case F_UNLCK: 593 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, 594 flp, flg); 595 break; 596 case F_UNLCKSYS: 597 /* 598 * Temporary api for testing remote lock 599 * infrastructure. 600 */ 601 if (flg != F_REMOTE) { 602 error = EINVAL; 603 break; 604 } 605 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 606 F_UNLCKSYS, flp, flg); 607 break; 608 default: 609 error = EINVAL; 610 break; 611 } 612 VFS_UNLOCK_GIANT(vfslocked); 613 vfslocked = 0; 614 /* Check for race with close */ 615 FILEDESC_SLOCK(fdp); 616 if ((unsigned) fd >= fdp->fd_nfiles || 617 fp != fdp->fd_ofiles[fd]) { 618 FILEDESC_SUNLOCK(fdp); 619 flp->l_whence = SEEK_SET; 620 flp->l_start = 0; 621 flp->l_len = 0; 622 flp->l_type = F_UNLCK; 623 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 624 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 625 F_UNLCK, flp, F_POSIX); 626 VFS_UNLOCK_GIANT(vfslocked); 627 vfslocked = 0; 628 } else 629 FILEDESC_SUNLOCK(fdp); 630 fdrop(fp, td); 631 break; 632 633 case F_GETLK: 634 FILEDESC_SLOCK(fdp); 635 if ((fp = fdtofp(fd, fdp)) == NULL) { 636 FILEDESC_SUNLOCK(fdp); 637 error = EBADF; 638 break; 639 } 640 if (fp->f_type != DTYPE_VNODE) { 641 FILEDESC_SUNLOCK(fdp); 642 error = EBADF; 643 break; 644 } 645 flp = (struct flock *)arg; 646 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && 647 flp->l_type != F_UNLCK) { 648 FILEDESC_SUNLOCK(fdp); 649 error = EINVAL; 650 break; 651 } 652 if (flp->l_whence == SEEK_CUR) { 653 if ((flp->l_start > 0 && 654 fp->f_offset > OFF_MAX - flp->l_start) || 655 (flp->l_start < 0 && 656 fp->f_offset < OFF_MIN - flp->l_start)) { 657 FILEDESC_SUNLOCK(fdp); 658 error = EOVERFLOW; 659 break; 660 } 661 flp->l_start += fp->f_offset; 662 } 663 /* 664 * VOP_ADVLOCK() may block. 665 */ 666 fhold(fp); 667 FILEDESC_SUNLOCK(fdp); 668 vp = fp->f_vnode; 669 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 670 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, 671 F_POSIX); 672 VFS_UNLOCK_GIANT(vfslocked); 673 vfslocked = 0; 674 fdrop(fp, td); 675 break; 676 default: 677 error = EINVAL; 678 break; 679 } 680 VFS_UNLOCK_GIANT(vfslocked); 681 return (error); 682 } 683 684 /* 685 * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD). 686 */ 687 static int 688 do_dup(struct thread *td, int flags, int old, int new, 689 register_t *retval) 690 { 691 struct filedesc *fdp; 692 struct proc *p; 693 struct file *fp; 694 struct file *delfp; 695 int error, holdleaders, maxfd; 696 697 p = td->td_proc; 698 fdp = p->p_fd; 699 700 /* 701 * Verify we have a valid descriptor to dup from and possibly to 702 * dup to. Unlike dup() and dup2(), fcntl()'s F_DUPFD should 703 * return EINVAL when the new descriptor is out of bounds. 704 */ 705 if (old < 0) 706 return (EBADF); 707 if (new < 0) 708 return (flags & DUP_FCNTL ? EINVAL : EBADF); 709 PROC_LOCK(p); 710 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 711 PROC_UNLOCK(p); 712 if (new >= maxfd) 713 return (flags & DUP_FCNTL ? EINVAL : EMFILE); 714 715 FILEDESC_XLOCK(fdp); 716 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) { 717 FILEDESC_XUNLOCK(fdp); 718 return (EBADF); 719 } 720 if (flags & DUP_FIXED && old == new) { 721 *retval = new; 722 FILEDESC_XUNLOCK(fdp); 723 return (0); 724 } 725 fp = fdp->fd_ofiles[old]; 726 fhold(fp); 727 728 /* 729 * If the caller specified a file descriptor, make sure the file 730 * table is large enough to hold it, and grab it. Otherwise, just 731 * allocate a new descriptor the usual way. Since the filedesc 732 * lock may be temporarily dropped in the process, we have to look 733 * out for a race. 734 */ 735 if (flags & DUP_FIXED) { 736 if (new >= fdp->fd_nfiles) 737 fdgrowtable(fdp, new + 1); 738 if (fdp->fd_ofiles[new] == NULL) 739 fdused(fdp, new); 740 } else { 741 if ((error = fdalloc(td, new, &new)) != 0) { 742 FILEDESC_XUNLOCK(fdp); 743 fdrop(fp, td); 744 return (error); 745 } 746 } 747 748 /* 749 * If the old file changed out from under us then treat it as a 750 * bad file descriptor. Userland should do its own locking to 751 * avoid this case. 752 */ 753 if (fdp->fd_ofiles[old] != fp) { 754 /* we've allocated a descriptor which we won't use */ 755 if (fdp->fd_ofiles[new] == NULL) 756 fdunused(fdp, new); 757 FILEDESC_XUNLOCK(fdp); 758 fdrop(fp, td); 759 return (EBADF); 760 } 761 KASSERT(old != new, 762 ("new fd is same as old")); 763 764 /* 765 * Save info on the descriptor being overwritten. We cannot close 766 * it without introducing an ownership race for the slot, since we 767 * need to drop the filedesc lock to call closef(). 768 * 769 * XXX this duplicates parts of close(). 770 */ 771 delfp = fdp->fd_ofiles[new]; 772 holdleaders = 0; 773 if (delfp != NULL) { 774 if (td->td_proc->p_fdtol != NULL) { 775 /* 776 * Ask fdfree() to sleep to ensure that all relevant 777 * process leaders can be traversed in closef(). 778 */ 779 fdp->fd_holdleaderscount++; 780 holdleaders = 1; 781 } 782 } 783 784 /* 785 * Duplicate the source descriptor 786 */ 787 fdp->fd_ofiles[new] = fp; 788 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; 789 if (new > fdp->fd_lastfile) 790 fdp->fd_lastfile = new; 791 *retval = new; 792 793 /* 794 * If we dup'd over a valid file, we now own the reference to it 795 * and must dispose of it using closef() semantics (as if a 796 * close() were performed on it). 797 * 798 * XXX this duplicates parts of close(). 799 */ 800 if (delfp != NULL) { 801 knote_fdclose(td, new); 802 if (delfp->f_type == DTYPE_MQUEUE) 803 mq_fdclose(td, new, delfp); 804 FILEDESC_XUNLOCK(fdp); 805 (void) closef(delfp, td); 806 if (holdleaders) { 807 FILEDESC_XLOCK(fdp); 808 fdp->fd_holdleaderscount--; 809 if (fdp->fd_holdleaderscount == 0 && 810 fdp->fd_holdleaderswakeup != 0) { 811 fdp->fd_holdleaderswakeup = 0; 812 wakeup(&fdp->fd_holdleaderscount); 813 } 814 FILEDESC_XUNLOCK(fdp); 815 } 816 } else { 817 FILEDESC_XUNLOCK(fdp); 818 } 819 return (0); 820 } 821 822 /* 823 * If sigio is on the list associated with a process or process group, 824 * disable signalling from the device, remove sigio from the list and 825 * free sigio. 826 */ 827 void 828 funsetown(struct sigio **sigiop) 829 { 830 struct sigio *sigio; 831 832 SIGIO_LOCK(); 833 sigio = *sigiop; 834 if (sigio == NULL) { 835 SIGIO_UNLOCK(); 836 return; 837 } 838 *(sigio->sio_myref) = NULL; 839 if ((sigio)->sio_pgid < 0) { 840 struct pgrp *pg = (sigio)->sio_pgrp; 841 PGRP_LOCK(pg); 842 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, 843 sigio, sio_pgsigio); 844 PGRP_UNLOCK(pg); 845 } else { 846 struct proc *p = (sigio)->sio_proc; 847 PROC_LOCK(p); 848 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, 849 sigio, sio_pgsigio); 850 PROC_UNLOCK(p); 851 } 852 SIGIO_UNLOCK(); 853 crfree(sigio->sio_ucred); 854 FREE(sigio, M_SIGIO); 855 } 856 857 /* 858 * Free a list of sigio structures. 859 * We only need to lock the SIGIO_LOCK because we have made ourselves 860 * inaccessible to callers of fsetown and therefore do not need to lock 861 * the proc or pgrp struct for the list manipulation. 862 */ 863 void 864 funsetownlst(struct sigiolst *sigiolst) 865 { 866 struct proc *p; 867 struct pgrp *pg; 868 struct sigio *sigio; 869 870 sigio = SLIST_FIRST(sigiolst); 871 if (sigio == NULL) 872 return; 873 p = NULL; 874 pg = NULL; 875 876 /* 877 * Every entry of the list should belong 878 * to a single proc or pgrp. 879 */ 880 if (sigio->sio_pgid < 0) { 881 pg = sigio->sio_pgrp; 882 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED); 883 } else /* if (sigio->sio_pgid > 0) */ { 884 p = sigio->sio_proc; 885 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 886 } 887 888 SIGIO_LOCK(); 889 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) { 890 *(sigio->sio_myref) = NULL; 891 if (pg != NULL) { 892 KASSERT(sigio->sio_pgid < 0, 893 ("Proc sigio in pgrp sigio list")); 894 KASSERT(sigio->sio_pgrp == pg, 895 ("Bogus pgrp in sigio list")); 896 PGRP_LOCK(pg); 897 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, 898 sio_pgsigio); 899 PGRP_UNLOCK(pg); 900 } else /* if (p != NULL) */ { 901 KASSERT(sigio->sio_pgid > 0, 902 ("Pgrp sigio in proc sigio list")); 903 KASSERT(sigio->sio_proc == p, 904 ("Bogus proc in sigio list")); 905 PROC_LOCK(p); 906 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, 907 sio_pgsigio); 908 PROC_UNLOCK(p); 909 } 910 SIGIO_UNLOCK(); 911 crfree(sigio->sio_ucred); 912 FREE(sigio, M_SIGIO); 913 SIGIO_LOCK(); 914 } 915 SIGIO_UNLOCK(); 916 } 917 918 /* 919 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). 920 * 921 * After permission checking, add a sigio structure to the sigio list for 922 * the process or process group. 923 */ 924 int 925 fsetown(pid_t pgid, struct sigio **sigiop) 926 { 927 struct proc *proc; 928 struct pgrp *pgrp; 929 struct sigio *sigio; 930 int ret; 931 932 if (pgid == 0) { 933 funsetown(sigiop); 934 return (0); 935 } 936 937 ret = 0; 938 939 /* Allocate and fill in the new sigio out of locks. */ 940 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK); 941 sigio->sio_pgid = pgid; 942 sigio->sio_ucred = crhold(curthread->td_ucred); 943 sigio->sio_myref = sigiop; 944 945 sx_slock(&proctree_lock); 946 if (pgid > 0) { 947 proc = pfind(pgid); 948 if (proc == NULL) { 949 ret = ESRCH; 950 goto fail; 951 } 952 953 /* 954 * Policy - Don't allow a process to FSETOWN a process 955 * in another session. 956 * 957 * Remove this test to allow maximum flexibility or 958 * restrict FSETOWN to the current process or process 959 * group for maximum safety. 960 */ 961 PROC_UNLOCK(proc); 962 if (proc->p_session != curthread->td_proc->p_session) { 963 ret = EPERM; 964 goto fail; 965 } 966 967 pgrp = NULL; 968 } else /* if (pgid < 0) */ { 969 pgrp = pgfind(-pgid); 970 if (pgrp == NULL) { 971 ret = ESRCH; 972 goto fail; 973 } 974 PGRP_UNLOCK(pgrp); 975 976 /* 977 * Policy - Don't allow a process to FSETOWN a process 978 * in another session. 979 * 980 * Remove this test to allow maximum flexibility or 981 * restrict FSETOWN to the current process or process 982 * group for maximum safety. 983 */ 984 if (pgrp->pg_session != curthread->td_proc->p_session) { 985 ret = EPERM; 986 goto fail; 987 } 988 989 proc = NULL; 990 } 991 funsetown(sigiop); 992 if (pgid > 0) { 993 PROC_LOCK(proc); 994 /* 995 * Since funsetownlst() is called without the proctree 996 * locked, we need to check for P_WEXIT. 997 * XXX: is ESRCH correct? 998 */ 999 if ((proc->p_flag & P_WEXIT) != 0) { 1000 PROC_UNLOCK(proc); 1001 ret = ESRCH; 1002 goto fail; 1003 } 1004 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); 1005 sigio->sio_proc = proc; 1006 PROC_UNLOCK(proc); 1007 } else { 1008 PGRP_LOCK(pgrp); 1009 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); 1010 sigio->sio_pgrp = pgrp; 1011 PGRP_UNLOCK(pgrp); 1012 } 1013 sx_sunlock(&proctree_lock); 1014 SIGIO_LOCK(); 1015 *sigiop = sigio; 1016 SIGIO_UNLOCK(); 1017 return (0); 1018 1019 fail: 1020 sx_sunlock(&proctree_lock); 1021 crfree(sigio->sio_ucred); 1022 FREE(sigio, M_SIGIO); 1023 return (ret); 1024 } 1025 1026 /* 1027 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). 1028 */ 1029 pid_t 1030 fgetown(sigiop) 1031 struct sigio **sigiop; 1032 { 1033 pid_t pgid; 1034 1035 SIGIO_LOCK(); 1036 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0; 1037 SIGIO_UNLOCK(); 1038 return (pgid); 1039 } 1040 1041 /* 1042 * Close a file descriptor. 1043 */ 1044 #ifndef _SYS_SYSPROTO_H_ 1045 struct close_args { 1046 int fd; 1047 }; 1048 #endif 1049 /* ARGSUSED */ 1050 int 1051 close(td, uap) 1052 struct thread *td; 1053 struct close_args *uap; 1054 { 1055 1056 return (kern_close(td, uap->fd)); 1057 } 1058 1059 int 1060 kern_close(td, fd) 1061 struct thread *td; 1062 int fd; 1063 { 1064 struct filedesc *fdp; 1065 struct file *fp; 1066 int error; 1067 int holdleaders; 1068 1069 error = 0; 1070 holdleaders = 0; 1071 fdp = td->td_proc->p_fd; 1072 1073 AUDIT_SYSCLOSE(td, fd); 1074 1075 FILEDESC_XLOCK(fdp); 1076 if ((unsigned)fd >= fdp->fd_nfiles || 1077 (fp = fdp->fd_ofiles[fd]) == NULL) { 1078 FILEDESC_XUNLOCK(fdp); 1079 return (EBADF); 1080 } 1081 fdp->fd_ofiles[fd] = NULL; 1082 fdp->fd_ofileflags[fd] = 0; 1083 fdunused(fdp, fd); 1084 if (td->td_proc->p_fdtol != NULL) { 1085 /* 1086 * Ask fdfree() to sleep to ensure that all relevant 1087 * process leaders can be traversed in closef(). 1088 */ 1089 fdp->fd_holdleaderscount++; 1090 holdleaders = 1; 1091 } 1092 1093 /* 1094 * We now hold the fp reference that used to be owned by the 1095 * descriptor array. We have to unlock the FILEDESC *AFTER* 1096 * knote_fdclose to prevent a race of the fd getting opened, a knote 1097 * added, and deleteing a knote for the new fd. 1098 */ 1099 knote_fdclose(td, fd); 1100 if (fp->f_type == DTYPE_MQUEUE) 1101 mq_fdclose(td, fd, fp); 1102 FILEDESC_XUNLOCK(fdp); 1103 1104 error = closef(fp, td); 1105 if (holdleaders) { 1106 FILEDESC_XLOCK(fdp); 1107 fdp->fd_holdleaderscount--; 1108 if (fdp->fd_holdleaderscount == 0 && 1109 fdp->fd_holdleaderswakeup != 0) { 1110 fdp->fd_holdleaderswakeup = 0; 1111 wakeup(&fdp->fd_holdleaderscount); 1112 } 1113 FILEDESC_XUNLOCK(fdp); 1114 } 1115 return (error); 1116 } 1117 1118 #if defined(COMPAT_43) 1119 /* 1120 * Return status information about a file descriptor. 1121 */ 1122 #ifndef _SYS_SYSPROTO_H_ 1123 struct ofstat_args { 1124 int fd; 1125 struct ostat *sb; 1126 }; 1127 #endif 1128 /* ARGSUSED */ 1129 int 1130 ofstat(struct thread *td, struct ofstat_args *uap) 1131 { 1132 struct ostat oub; 1133 struct stat ub; 1134 int error; 1135 1136 error = kern_fstat(td, uap->fd, &ub); 1137 if (error == 0) { 1138 cvtstat(&ub, &oub); 1139 error = copyout(&oub, uap->sb, sizeof(oub)); 1140 } 1141 return (error); 1142 } 1143 #endif /* COMPAT_43 */ 1144 1145 /* 1146 * Return status information about a file descriptor. 1147 */ 1148 #ifndef _SYS_SYSPROTO_H_ 1149 struct fstat_args { 1150 int fd; 1151 struct stat *sb; 1152 }; 1153 #endif 1154 /* ARGSUSED */ 1155 int 1156 fstat(struct thread *td, struct fstat_args *uap) 1157 { 1158 struct stat ub; 1159 int error; 1160 1161 error = kern_fstat(td, uap->fd, &ub); 1162 if (error == 0) 1163 error = copyout(&ub, uap->sb, sizeof(ub)); 1164 return (error); 1165 } 1166 1167 int 1168 kern_fstat(struct thread *td, int fd, struct stat *sbp) 1169 { 1170 struct file *fp; 1171 int error; 1172 1173 AUDIT_ARG(fd, fd); 1174 1175 if ((error = fget(td, fd, &fp)) != 0) 1176 return (error); 1177 1178 AUDIT_ARG(file, td->td_proc, fp); 1179 1180 error = fo_stat(fp, sbp, td->td_ucred, td); 1181 fdrop(fp, td); 1182 #ifdef KTRACE 1183 if (error == 0 && KTRPOINT(td, KTR_STRUCT)) 1184 ktrstat(sbp); 1185 #endif 1186 return (error); 1187 } 1188 1189 /* 1190 * Return status information about a file descriptor. 1191 */ 1192 #ifndef _SYS_SYSPROTO_H_ 1193 struct nfstat_args { 1194 int fd; 1195 struct nstat *sb; 1196 }; 1197 #endif 1198 /* ARGSUSED */ 1199 int 1200 nfstat(struct thread *td, struct nfstat_args *uap) 1201 { 1202 struct nstat nub; 1203 struct stat ub; 1204 int error; 1205 1206 error = kern_fstat(td, uap->fd, &ub); 1207 if (error == 0) { 1208 cvtnstat(&ub, &nub); 1209 error = copyout(&nub, uap->sb, sizeof(nub)); 1210 } 1211 return (error); 1212 } 1213 1214 /* 1215 * Return pathconf information about a file descriptor. 1216 */ 1217 #ifndef _SYS_SYSPROTO_H_ 1218 struct fpathconf_args { 1219 int fd; 1220 int name; 1221 }; 1222 #endif 1223 /* ARGSUSED */ 1224 int 1225 fpathconf(struct thread *td, struct fpathconf_args *uap) 1226 { 1227 struct file *fp; 1228 struct vnode *vp; 1229 int error; 1230 1231 if ((error = fget(td, uap->fd, &fp)) != 0) 1232 return (error); 1233 1234 /* If asynchronous I/O is available, it works for all descriptors. */ 1235 if (uap->name == _PC_ASYNC_IO) { 1236 td->td_retval[0] = async_io_version; 1237 goto out; 1238 } 1239 vp = fp->f_vnode; 1240 if (vp != NULL) { 1241 int vfslocked; 1242 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1243 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1244 error = VOP_PATHCONF(vp, uap->name, td->td_retval); 1245 VOP_UNLOCK(vp, 0); 1246 VFS_UNLOCK_GIANT(vfslocked); 1247 } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1248 if (uap->name != _PC_PIPE_BUF) { 1249 error = EINVAL; 1250 } else { 1251 td->td_retval[0] = PIPE_BUF; 1252 error = 0; 1253 } 1254 } else { 1255 error = EOPNOTSUPP; 1256 } 1257 out: 1258 fdrop(fp, td); 1259 return (error); 1260 } 1261 1262 /* 1263 * Grow the file table to accomodate (at least) nfd descriptors. This may 1264 * block and drop the filedesc lock, but it will reacquire it before 1265 * returning. 1266 */ 1267 static void 1268 fdgrowtable(struct filedesc *fdp, int nfd) 1269 { 1270 struct file **ntable; 1271 char *nfileflags; 1272 int nnfiles, onfiles; 1273 NDSLOTTYPE *nmap; 1274 1275 FILEDESC_XLOCK_ASSERT(fdp); 1276 1277 KASSERT(fdp->fd_nfiles > 0, 1278 ("zero-length file table")); 1279 1280 /* compute the size of the new table */ 1281 onfiles = fdp->fd_nfiles; 1282 nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */ 1283 if (nnfiles <= onfiles) 1284 /* the table is already large enough */ 1285 return; 1286 1287 /* allocate a new table and (if required) new bitmaps */ 1288 FILEDESC_XUNLOCK(fdp); 1289 MALLOC(ntable, struct file **, nnfiles * OFILESIZE, 1290 M_FILEDESC, M_ZERO | M_WAITOK); 1291 nfileflags = (char *)&ntable[nnfiles]; 1292 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) 1293 MALLOC(nmap, NDSLOTTYPE *, NDSLOTS(nnfiles) * NDSLOTSIZE, 1294 M_FILEDESC, M_ZERO | M_WAITOK); 1295 else 1296 nmap = NULL; 1297 FILEDESC_XLOCK(fdp); 1298 1299 /* 1300 * We now have new tables ready to go. Since we dropped the 1301 * filedesc lock to call malloc(), watch out for a race. 1302 */ 1303 onfiles = fdp->fd_nfiles; 1304 if (onfiles >= nnfiles) { 1305 /* we lost the race, but that's OK */ 1306 free(ntable, M_FILEDESC); 1307 if (nmap != NULL) 1308 free(nmap, M_FILEDESC); 1309 return; 1310 } 1311 bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable)); 1312 bcopy(fdp->fd_ofileflags, nfileflags, onfiles); 1313 if (onfiles > NDFILE) 1314 free(fdp->fd_ofiles, M_FILEDESC); 1315 fdp->fd_ofiles = ntable; 1316 fdp->fd_ofileflags = nfileflags; 1317 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) { 1318 bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap)); 1319 if (NDSLOTS(onfiles) > NDSLOTS(NDFILE)) 1320 free(fdp->fd_map, M_FILEDESC); 1321 fdp->fd_map = nmap; 1322 } 1323 fdp->fd_nfiles = nnfiles; 1324 } 1325 1326 /* 1327 * Allocate a file descriptor for the process. 1328 */ 1329 int 1330 fdalloc(struct thread *td, int minfd, int *result) 1331 { 1332 struct proc *p = td->td_proc; 1333 struct filedesc *fdp = p->p_fd; 1334 int fd = -1, maxfd; 1335 1336 FILEDESC_XLOCK_ASSERT(fdp); 1337 1338 if (fdp->fd_freefile > minfd) 1339 minfd = fdp->fd_freefile; 1340 1341 PROC_LOCK(p); 1342 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1343 PROC_UNLOCK(p); 1344 1345 /* 1346 * Search the bitmap for a free descriptor. If none is found, try 1347 * to grow the file table. Keep at it until we either get a file 1348 * descriptor or run into process or system limits; fdgrowtable() 1349 * may drop the filedesc lock, so we're in a race. 1350 */ 1351 for (;;) { 1352 fd = fd_first_free(fdp, minfd, fdp->fd_nfiles); 1353 if (fd >= maxfd) 1354 return (EMFILE); 1355 if (fd < fdp->fd_nfiles) 1356 break; 1357 fdgrowtable(fdp, min(fdp->fd_nfiles * 2, maxfd)); 1358 } 1359 1360 /* 1361 * Perform some sanity checks, then mark the file descriptor as 1362 * used and return it to the caller. 1363 */ 1364 KASSERT(!fdisused(fdp, fd), 1365 ("fd_first_free() returned non-free descriptor")); 1366 KASSERT(fdp->fd_ofiles[fd] == NULL, 1367 ("free descriptor isn't")); 1368 fdp->fd_ofileflags[fd] = 0; /* XXX needed? */ 1369 fdused(fdp, fd); 1370 *result = fd; 1371 return (0); 1372 } 1373 1374 /* 1375 * Check to see whether n user file descriptors are available to the process 1376 * p. 1377 */ 1378 int 1379 fdavail(struct thread *td, int n) 1380 { 1381 struct proc *p = td->td_proc; 1382 struct filedesc *fdp = td->td_proc->p_fd; 1383 struct file **fpp; 1384 int i, lim, last; 1385 1386 FILEDESC_LOCK_ASSERT(fdp); 1387 1388 PROC_LOCK(p); 1389 lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1390 PROC_UNLOCK(p); 1391 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) 1392 return (1); 1393 last = min(fdp->fd_nfiles, lim); 1394 fpp = &fdp->fd_ofiles[fdp->fd_freefile]; 1395 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) { 1396 if (*fpp == NULL && --n <= 0) 1397 return (1); 1398 } 1399 return (0); 1400 } 1401 1402 /* 1403 * Create a new open file structure and allocate a file decriptor for the 1404 * process that refers to it. We add one reference to the file for the 1405 * descriptor table and one reference for resultfp. This is to prevent us 1406 * being preempted and the entry in the descriptor table closed after we 1407 * release the FILEDESC lock. 1408 */ 1409 int 1410 falloc(struct thread *td, struct file **resultfp, int *resultfd) 1411 { 1412 struct proc *p = td->td_proc; 1413 struct file *fp; 1414 int error, i; 1415 int maxuserfiles = maxfiles - (maxfiles / 20); 1416 static struct timeval lastfail; 1417 static int curfail; 1418 1419 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO); 1420 if ((openfiles >= maxuserfiles && 1421 priv_check(td, PRIV_MAXFILES) != 0) || 1422 openfiles >= maxfiles) { 1423 if (ppsratecheck(&lastfail, &curfail, 1)) { 1424 printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n", 1425 td->td_ucred->cr_ruid); 1426 } 1427 uma_zfree(file_zone, fp); 1428 return (ENFILE); 1429 } 1430 atomic_add_int(&openfiles, 1); 1431 1432 /* 1433 * If the process has file descriptor zero open, add the new file 1434 * descriptor to the list of open files at that point, otherwise 1435 * put it at the front of the list of open files. 1436 */ 1437 refcount_init(&fp->f_count, 1); 1438 if (resultfp) 1439 fhold(fp); 1440 fp->f_cred = crhold(td->td_ucred); 1441 fp->f_ops = &badfileops; 1442 fp->f_data = NULL; 1443 fp->f_vnode = NULL; 1444 FILEDESC_XLOCK(p->p_fd); 1445 if ((error = fdalloc(td, 0, &i))) { 1446 FILEDESC_XUNLOCK(p->p_fd); 1447 fdrop(fp, td); 1448 if (resultfp) 1449 fdrop(fp, td); 1450 return (error); 1451 } 1452 p->p_fd->fd_ofiles[i] = fp; 1453 FILEDESC_XUNLOCK(p->p_fd); 1454 if (resultfp) 1455 *resultfp = fp; 1456 if (resultfd) 1457 *resultfd = i; 1458 return (0); 1459 } 1460 1461 /* 1462 * Build a new filedesc structure from another. 1463 * Copy the current, root, and jail root vnode references. 1464 */ 1465 struct filedesc * 1466 fdinit(struct filedesc *fdp) 1467 { 1468 struct filedesc0 *newfdp; 1469 1470 newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO); 1471 FILEDESC_LOCK_INIT(&newfdp->fd_fd); 1472 if (fdp != NULL) { 1473 FILEDESC_XLOCK(fdp); 1474 newfdp->fd_fd.fd_cdir = fdp->fd_cdir; 1475 if (newfdp->fd_fd.fd_cdir) 1476 VREF(newfdp->fd_fd.fd_cdir); 1477 newfdp->fd_fd.fd_rdir = fdp->fd_rdir; 1478 if (newfdp->fd_fd.fd_rdir) 1479 VREF(newfdp->fd_fd.fd_rdir); 1480 newfdp->fd_fd.fd_jdir = fdp->fd_jdir; 1481 if (newfdp->fd_fd.fd_jdir) 1482 VREF(newfdp->fd_fd.fd_jdir); 1483 FILEDESC_XUNLOCK(fdp); 1484 } 1485 1486 /* Create the file descriptor table. */ 1487 newfdp->fd_fd.fd_refcnt = 1; 1488 newfdp->fd_fd.fd_holdcnt = 1; 1489 newfdp->fd_fd.fd_cmask = CMASK; 1490 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1491 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 1492 newfdp->fd_fd.fd_nfiles = NDFILE; 1493 newfdp->fd_fd.fd_map = newfdp->fd_dmap; 1494 newfdp->fd_fd.fd_lastfile = -1; 1495 return (&newfdp->fd_fd); 1496 } 1497 1498 static struct filedesc * 1499 fdhold(struct proc *p) 1500 { 1501 struct filedesc *fdp; 1502 1503 mtx_lock(&fdesc_mtx); 1504 fdp = p->p_fd; 1505 if (fdp != NULL) 1506 fdp->fd_holdcnt++; 1507 mtx_unlock(&fdesc_mtx); 1508 return (fdp); 1509 } 1510 1511 static void 1512 fddrop(struct filedesc *fdp) 1513 { 1514 int i; 1515 1516 mtx_lock(&fdesc_mtx); 1517 i = --fdp->fd_holdcnt; 1518 mtx_unlock(&fdesc_mtx); 1519 if (i > 0) 1520 return; 1521 1522 FILEDESC_LOCK_DESTROY(fdp); 1523 FREE(fdp, M_FILEDESC); 1524 } 1525 1526 /* 1527 * Share a filedesc structure. 1528 */ 1529 struct filedesc * 1530 fdshare(struct filedesc *fdp) 1531 { 1532 1533 FILEDESC_XLOCK(fdp); 1534 fdp->fd_refcnt++; 1535 FILEDESC_XUNLOCK(fdp); 1536 return (fdp); 1537 } 1538 1539 /* 1540 * Unshare a filedesc structure, if necessary by making a copy 1541 */ 1542 void 1543 fdunshare(struct proc *p, struct thread *td) 1544 { 1545 1546 FILEDESC_XLOCK(p->p_fd); 1547 if (p->p_fd->fd_refcnt > 1) { 1548 struct filedesc *tmp; 1549 1550 FILEDESC_XUNLOCK(p->p_fd); 1551 tmp = fdcopy(p->p_fd); 1552 fdfree(td); 1553 p->p_fd = tmp; 1554 } else 1555 FILEDESC_XUNLOCK(p->p_fd); 1556 } 1557 1558 /* 1559 * Copy a filedesc structure. A NULL pointer in returns a NULL reference, 1560 * this is to ease callers, not catch errors. 1561 */ 1562 struct filedesc * 1563 fdcopy(struct filedesc *fdp) 1564 { 1565 struct filedesc *newfdp; 1566 int i; 1567 1568 /* Certain daemons might not have file descriptors. */ 1569 if (fdp == NULL) 1570 return (NULL); 1571 1572 newfdp = fdinit(fdp); 1573 FILEDESC_SLOCK(fdp); 1574 while (fdp->fd_lastfile >= newfdp->fd_nfiles) { 1575 FILEDESC_SUNLOCK(fdp); 1576 FILEDESC_XLOCK(newfdp); 1577 fdgrowtable(newfdp, fdp->fd_lastfile + 1); 1578 FILEDESC_XUNLOCK(newfdp); 1579 FILEDESC_SLOCK(fdp); 1580 } 1581 /* copy everything except kqueue descriptors */ 1582 newfdp->fd_freefile = -1; 1583 for (i = 0; i <= fdp->fd_lastfile; ++i) { 1584 if (fdisused(fdp, i) && 1585 fdp->fd_ofiles[i]->f_type != DTYPE_KQUEUE) { 1586 newfdp->fd_ofiles[i] = fdp->fd_ofiles[i]; 1587 newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i]; 1588 fhold(newfdp->fd_ofiles[i]); 1589 newfdp->fd_lastfile = i; 1590 } else { 1591 if (newfdp->fd_freefile == -1) 1592 newfdp->fd_freefile = i; 1593 } 1594 } 1595 FILEDESC_SUNLOCK(fdp); 1596 FILEDESC_XLOCK(newfdp); 1597 for (i = 0; i <= newfdp->fd_lastfile; ++i) 1598 if (newfdp->fd_ofiles[i] != NULL) 1599 fdused(newfdp, i); 1600 FILEDESC_XUNLOCK(newfdp); 1601 FILEDESC_SLOCK(fdp); 1602 if (newfdp->fd_freefile == -1) 1603 newfdp->fd_freefile = i; 1604 newfdp->fd_cmask = fdp->fd_cmask; 1605 FILEDESC_SUNLOCK(fdp); 1606 return (newfdp); 1607 } 1608 1609 /* 1610 * Release a filedesc structure. 1611 */ 1612 void 1613 fdfree(struct thread *td) 1614 { 1615 struct filedesc *fdp; 1616 struct file **fpp; 1617 int i, locked; 1618 struct filedesc_to_leader *fdtol; 1619 struct file *fp; 1620 struct vnode *cdir, *jdir, *rdir, *vp; 1621 struct flock lf; 1622 1623 /* Certain daemons might not have file descriptors. */ 1624 fdp = td->td_proc->p_fd; 1625 if (fdp == NULL) 1626 return; 1627 1628 /* Check for special need to clear POSIX style locks */ 1629 fdtol = td->td_proc->p_fdtol; 1630 if (fdtol != NULL) { 1631 FILEDESC_XLOCK(fdp); 1632 KASSERT(fdtol->fdl_refcount > 0, 1633 ("filedesc_to_refcount botch: fdl_refcount=%d", 1634 fdtol->fdl_refcount)); 1635 if (fdtol->fdl_refcount == 1 && 1636 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1637 for (i = 0, fpp = fdp->fd_ofiles; 1638 i <= fdp->fd_lastfile; 1639 i++, fpp++) { 1640 if (*fpp == NULL || 1641 (*fpp)->f_type != DTYPE_VNODE) 1642 continue; 1643 fp = *fpp; 1644 fhold(fp); 1645 FILEDESC_XUNLOCK(fdp); 1646 lf.l_whence = SEEK_SET; 1647 lf.l_start = 0; 1648 lf.l_len = 0; 1649 lf.l_type = F_UNLCK; 1650 vp = fp->f_vnode; 1651 locked = VFS_LOCK_GIANT(vp->v_mount); 1652 (void) VOP_ADVLOCK(vp, 1653 (caddr_t)td->td_proc-> 1654 p_leader, 1655 F_UNLCK, 1656 &lf, 1657 F_POSIX); 1658 VFS_UNLOCK_GIANT(locked); 1659 FILEDESC_XLOCK(fdp); 1660 fdrop(fp, td); 1661 fpp = fdp->fd_ofiles + i; 1662 } 1663 } 1664 retry: 1665 if (fdtol->fdl_refcount == 1) { 1666 if (fdp->fd_holdleaderscount > 0 && 1667 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1668 /* 1669 * close() or do_dup() has cleared a reference 1670 * in a shared file descriptor table. 1671 */ 1672 fdp->fd_holdleaderswakeup = 1; 1673 sx_sleep(&fdp->fd_holdleaderscount, 1674 FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0); 1675 goto retry; 1676 } 1677 if (fdtol->fdl_holdcount > 0) { 1678 /* 1679 * Ensure that fdtol->fdl_leader remains 1680 * valid in closef(). 1681 */ 1682 fdtol->fdl_wakeup = 1; 1683 sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK, 1684 "fdlhold", 0); 1685 goto retry; 1686 } 1687 } 1688 fdtol->fdl_refcount--; 1689 if (fdtol->fdl_refcount == 0 && 1690 fdtol->fdl_holdcount == 0) { 1691 fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; 1692 fdtol->fdl_prev->fdl_next = fdtol->fdl_next; 1693 } else 1694 fdtol = NULL; 1695 td->td_proc->p_fdtol = NULL; 1696 FILEDESC_XUNLOCK(fdp); 1697 if (fdtol != NULL) 1698 FREE(fdtol, M_FILEDESC_TO_LEADER); 1699 } 1700 FILEDESC_XLOCK(fdp); 1701 i = --fdp->fd_refcnt; 1702 FILEDESC_XUNLOCK(fdp); 1703 if (i > 0) 1704 return; 1705 /* 1706 * We are the last reference to the structure, so we can 1707 * safely assume it will not change out from under us. 1708 */ 1709 fpp = fdp->fd_ofiles; 1710 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) { 1711 if (*fpp) 1712 (void) closef(*fpp, td); 1713 } 1714 FILEDESC_XLOCK(fdp); 1715 1716 /* XXX This should happen earlier. */ 1717 mtx_lock(&fdesc_mtx); 1718 td->td_proc->p_fd = NULL; 1719 mtx_unlock(&fdesc_mtx); 1720 1721 if (fdp->fd_nfiles > NDFILE) 1722 FREE(fdp->fd_ofiles, M_FILEDESC); 1723 if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE)) 1724 FREE(fdp->fd_map, M_FILEDESC); 1725 1726 fdp->fd_nfiles = 0; 1727 1728 cdir = fdp->fd_cdir; 1729 fdp->fd_cdir = NULL; 1730 rdir = fdp->fd_rdir; 1731 fdp->fd_rdir = NULL; 1732 jdir = fdp->fd_jdir; 1733 fdp->fd_jdir = NULL; 1734 FILEDESC_XUNLOCK(fdp); 1735 1736 if (cdir) { 1737 locked = VFS_LOCK_GIANT(cdir->v_mount); 1738 vrele(cdir); 1739 VFS_UNLOCK_GIANT(locked); 1740 } 1741 if (rdir) { 1742 locked = VFS_LOCK_GIANT(rdir->v_mount); 1743 vrele(rdir); 1744 VFS_UNLOCK_GIANT(locked); 1745 } 1746 if (jdir) { 1747 locked = VFS_LOCK_GIANT(jdir->v_mount); 1748 vrele(jdir); 1749 VFS_UNLOCK_GIANT(locked); 1750 } 1751 1752 fddrop(fdp); 1753 } 1754 1755 /* 1756 * For setugid programs, we don't want to people to use that setugidness 1757 * to generate error messages which write to a file which otherwise would 1758 * otherwise be off-limits to the process. We check for filesystems where 1759 * the vnode can change out from under us after execve (like [lin]procfs). 1760 * 1761 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is 1762 * sufficient. We also don't check for setugidness since we know we are. 1763 */ 1764 static int 1765 is_unsafe(struct file *fp) 1766 { 1767 if (fp->f_type == DTYPE_VNODE) { 1768 struct vnode *vp = fp->f_vnode; 1769 1770 if ((vp->v_vflag & VV_PROCDEP) != 0) 1771 return (1); 1772 } 1773 return (0); 1774 } 1775 1776 /* 1777 * Make this setguid thing safe, if at all possible. 1778 */ 1779 void 1780 setugidsafety(struct thread *td) 1781 { 1782 struct filedesc *fdp; 1783 int i; 1784 1785 /* Certain daemons might not have file descriptors. */ 1786 fdp = td->td_proc->p_fd; 1787 if (fdp == NULL) 1788 return; 1789 1790 /* 1791 * Note: fdp->fd_ofiles may be reallocated out from under us while 1792 * we are blocked in a close. Be careful! 1793 */ 1794 FILEDESC_XLOCK(fdp); 1795 for (i = 0; i <= fdp->fd_lastfile; i++) { 1796 if (i > 2) 1797 break; 1798 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { 1799 struct file *fp; 1800 1801 knote_fdclose(td, i); 1802 /* 1803 * NULL-out descriptor prior to close to avoid 1804 * a race while close blocks. 1805 */ 1806 fp = fdp->fd_ofiles[i]; 1807 fdp->fd_ofiles[i] = NULL; 1808 fdp->fd_ofileflags[i] = 0; 1809 fdunused(fdp, i); 1810 FILEDESC_XUNLOCK(fdp); 1811 (void) closef(fp, td); 1812 FILEDESC_XLOCK(fdp); 1813 } 1814 } 1815 FILEDESC_XUNLOCK(fdp); 1816 } 1817 1818 /* 1819 * If a specific file object occupies a specific file descriptor, close the 1820 * file descriptor entry and drop a reference on the file object. This is a 1821 * convenience function to handle a subsequent error in a function that calls 1822 * falloc() that handles the race that another thread might have closed the 1823 * file descriptor out from under the thread creating the file object. 1824 */ 1825 void 1826 fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td) 1827 { 1828 1829 FILEDESC_XLOCK(fdp); 1830 if (fdp->fd_ofiles[idx] == fp) { 1831 fdp->fd_ofiles[idx] = NULL; 1832 fdunused(fdp, idx); 1833 FILEDESC_XUNLOCK(fdp); 1834 fdrop(fp, td); 1835 } else 1836 FILEDESC_XUNLOCK(fdp); 1837 } 1838 1839 /* 1840 * Close any files on exec? 1841 */ 1842 void 1843 fdcloseexec(struct thread *td) 1844 { 1845 struct filedesc *fdp; 1846 int i; 1847 1848 /* Certain daemons might not have file descriptors. */ 1849 fdp = td->td_proc->p_fd; 1850 if (fdp == NULL) 1851 return; 1852 1853 FILEDESC_XLOCK(fdp); 1854 1855 /* 1856 * We cannot cache fd_ofiles or fd_ofileflags since operations 1857 * may block and rip them out from under us. 1858 */ 1859 for (i = 0; i <= fdp->fd_lastfile; i++) { 1860 if (fdp->fd_ofiles[i] != NULL && 1861 (fdp->fd_ofiles[i]->f_type == DTYPE_MQUEUE || 1862 (fdp->fd_ofileflags[i] & UF_EXCLOSE))) { 1863 struct file *fp; 1864 1865 knote_fdclose(td, i); 1866 /* 1867 * NULL-out descriptor prior to close to avoid 1868 * a race while close blocks. 1869 */ 1870 fp = fdp->fd_ofiles[i]; 1871 fdp->fd_ofiles[i] = NULL; 1872 fdp->fd_ofileflags[i] = 0; 1873 fdunused(fdp, i); 1874 if (fp->f_type == DTYPE_MQUEUE) 1875 mq_fdclose(td, i, fp); 1876 FILEDESC_XUNLOCK(fdp); 1877 (void) closef(fp, td); 1878 FILEDESC_XLOCK(fdp); 1879 } 1880 } 1881 FILEDESC_XUNLOCK(fdp); 1882 } 1883 1884 /* 1885 * It is unsafe for set[ug]id processes to be started with file 1886 * descriptors 0..2 closed, as these descriptors are given implicit 1887 * significance in the Standard C library. fdcheckstd() will create a 1888 * descriptor referencing /dev/null for each of stdin, stdout, and 1889 * stderr that is not already open. 1890 */ 1891 int 1892 fdcheckstd(struct thread *td) 1893 { 1894 struct filedesc *fdp; 1895 register_t retval, save; 1896 int i, error, devnull; 1897 1898 fdp = td->td_proc->p_fd; 1899 if (fdp == NULL) 1900 return (0); 1901 KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); 1902 devnull = -1; 1903 error = 0; 1904 for (i = 0; i < 3; i++) { 1905 if (fdp->fd_ofiles[i] != NULL) 1906 continue; 1907 if (devnull < 0) { 1908 save = td->td_retval[0]; 1909 error = kern_open(td, "/dev/null", UIO_SYSSPACE, 1910 O_RDWR, 0); 1911 devnull = td->td_retval[0]; 1912 KASSERT(devnull == i, ("oof, we didn't get our fd")); 1913 td->td_retval[0] = save; 1914 if (error) 1915 break; 1916 } else { 1917 error = do_dup(td, DUP_FIXED, devnull, i, &retval); 1918 if (error != 0) 1919 break; 1920 } 1921 } 1922 return (error); 1923 } 1924 1925 /* 1926 * Internal form of close. Decrement reference count on file structure. 1927 * Note: td may be NULL when closing a file that was being passed in a 1928 * message. 1929 * 1930 * XXXRW: Giant is not required for the caller, but often will be held; this 1931 * makes it moderately likely the Giant will be recursed in the VFS case. 1932 */ 1933 int 1934 closef(struct file *fp, struct thread *td) 1935 { 1936 struct vnode *vp; 1937 struct flock lf; 1938 struct filedesc_to_leader *fdtol; 1939 struct filedesc *fdp; 1940 1941 /* 1942 * POSIX record locking dictates that any close releases ALL 1943 * locks owned by this process. This is handled by setting 1944 * a flag in the unlock to free ONLY locks obeying POSIX 1945 * semantics, and not to free BSD-style file locks. 1946 * If the descriptor was in a message, POSIX-style locks 1947 * aren't passed with the descriptor, and the thread pointer 1948 * will be NULL. Callers should be careful only to pass a 1949 * NULL thread pointer when there really is no owning 1950 * context that might have locks, or the locks will be 1951 * leaked. 1952 */ 1953 if (fp->f_type == DTYPE_VNODE && td != NULL) { 1954 int vfslocked; 1955 1956 vp = fp->f_vnode; 1957 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1958 if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1959 lf.l_whence = SEEK_SET; 1960 lf.l_start = 0; 1961 lf.l_len = 0; 1962 lf.l_type = F_UNLCK; 1963 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, 1964 F_UNLCK, &lf, F_POSIX); 1965 } 1966 fdtol = td->td_proc->p_fdtol; 1967 if (fdtol != NULL) { 1968 /* 1969 * Handle special case where file descriptor table is 1970 * shared between multiple process leaders. 1971 */ 1972 fdp = td->td_proc->p_fd; 1973 FILEDESC_XLOCK(fdp); 1974 for (fdtol = fdtol->fdl_next; 1975 fdtol != td->td_proc->p_fdtol; 1976 fdtol = fdtol->fdl_next) { 1977 if ((fdtol->fdl_leader->p_flag & 1978 P_ADVLOCK) == 0) 1979 continue; 1980 fdtol->fdl_holdcount++; 1981 FILEDESC_XUNLOCK(fdp); 1982 lf.l_whence = SEEK_SET; 1983 lf.l_start = 0; 1984 lf.l_len = 0; 1985 lf.l_type = F_UNLCK; 1986 vp = fp->f_vnode; 1987 (void) VOP_ADVLOCK(vp, 1988 (caddr_t)fdtol->fdl_leader, 1989 F_UNLCK, &lf, F_POSIX); 1990 FILEDESC_XLOCK(fdp); 1991 fdtol->fdl_holdcount--; 1992 if (fdtol->fdl_holdcount == 0 && 1993 fdtol->fdl_wakeup != 0) { 1994 fdtol->fdl_wakeup = 0; 1995 wakeup(fdtol); 1996 } 1997 } 1998 FILEDESC_XUNLOCK(fdp); 1999 } 2000 VFS_UNLOCK_GIANT(vfslocked); 2001 } 2002 return (fdrop(fp, td)); 2003 } 2004 2005 /* 2006 * Initialize the file pointer with the specified properties. 2007 * 2008 * The ops are set with release semantics to be certain that the flags, type, 2009 * and data are visible when ops is. This is to prevent ops methods from being 2010 * called with bad data. 2011 */ 2012 void 2013 finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops) 2014 { 2015 fp->f_data = data; 2016 fp->f_flag = flag; 2017 fp->f_type = type; 2018 atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops); 2019 } 2020 2021 2022 /* 2023 * Extract the file pointer associated with the specified descriptor for the 2024 * current user process. 2025 * 2026 * If the descriptor doesn't exist, EBADF is returned. 2027 * 2028 * If the descriptor exists but doesn't match 'flags' then return EBADF for 2029 * read attempts and EINVAL for write attempts. 2030 * 2031 * If 'hold' is set (non-zero) the file's refcount will be bumped on return. 2032 * It should be dropped with fdrop(). If it is not set, then the refcount 2033 * will not be bumped however the thread's filedesc struct will be returned 2034 * locked (for fgetsock). 2035 * 2036 * If an error occured the non-zero error is returned and *fpp is set to 2037 * NULL. Otherwise *fpp is set and zero is returned. 2038 */ 2039 static __inline int 2040 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold) 2041 { 2042 struct filedesc *fdp; 2043 struct file *fp; 2044 2045 *fpp = NULL; 2046 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) 2047 return (EBADF); 2048 FILEDESC_SLOCK(fdp); 2049 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) { 2050 FILEDESC_SUNLOCK(fdp); 2051 return (EBADF); 2052 } 2053 2054 /* 2055 * FREAD and FWRITE failure return EBADF as per POSIX. 2056 * 2057 * Only one flag, or 0, may be specified. 2058 */ 2059 if (flags == FREAD && (fp->f_flag & FREAD) == 0) { 2060 FILEDESC_SUNLOCK(fdp); 2061 return (EBADF); 2062 } 2063 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) { 2064 FILEDESC_SUNLOCK(fdp); 2065 return (EBADF); 2066 } 2067 if (hold) { 2068 fhold(fp); 2069 FILEDESC_SUNLOCK(fdp); 2070 } 2071 *fpp = fp; 2072 return (0); 2073 } 2074 2075 int 2076 fget(struct thread *td, int fd, struct file **fpp) 2077 { 2078 2079 return(_fget(td, fd, fpp, 0, 1)); 2080 } 2081 2082 int 2083 fget_read(struct thread *td, int fd, struct file **fpp) 2084 { 2085 2086 return(_fget(td, fd, fpp, FREAD, 1)); 2087 } 2088 2089 int 2090 fget_write(struct thread *td, int fd, struct file **fpp) 2091 { 2092 2093 return(_fget(td, fd, fpp, FWRITE, 1)); 2094 } 2095 2096 /* 2097 * Like fget() but loads the underlying vnode, or returns an error if the 2098 * descriptor does not represent a vnode. Note that pipes use vnodes but 2099 * never have VM objects. The returned vnode will be vref()'d. 2100 * 2101 * XXX: what about the unused flags ? 2102 */ 2103 static __inline int 2104 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags) 2105 { 2106 struct file *fp; 2107 int error; 2108 2109 *vpp = NULL; 2110 if ((error = _fget(td, fd, &fp, flags, 0)) != 0) 2111 return (error); 2112 if (fp->f_vnode == NULL) { 2113 error = EINVAL; 2114 } else { 2115 *vpp = fp->f_vnode; 2116 vref(*vpp); 2117 } 2118 FILEDESC_SUNLOCK(td->td_proc->p_fd); 2119 return (error); 2120 } 2121 2122 int 2123 fgetvp(struct thread *td, int fd, struct vnode **vpp) 2124 { 2125 2126 return (_fgetvp(td, fd, vpp, 0)); 2127 } 2128 2129 int 2130 fgetvp_read(struct thread *td, int fd, struct vnode **vpp) 2131 { 2132 2133 return (_fgetvp(td, fd, vpp, FREAD)); 2134 } 2135 2136 #ifdef notyet 2137 int 2138 fgetvp_write(struct thread *td, int fd, struct vnode **vpp) 2139 { 2140 2141 return (_fgetvp(td, fd, vpp, FWRITE)); 2142 } 2143 #endif 2144 2145 /* 2146 * Like fget() but loads the underlying socket, or returns an error if the 2147 * descriptor does not represent a socket. 2148 * 2149 * We bump the ref count on the returned socket. XXX Also obtain the SX lock 2150 * in the future. 2151 * 2152 * XXXRW: fgetsock() and fputsock() are deprecated, as consumers should rely 2153 * on their file descriptor reference to prevent the socket from being free'd 2154 * during use. 2155 */ 2156 int 2157 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp) 2158 { 2159 struct file *fp; 2160 int error; 2161 2162 *spp = NULL; 2163 if (fflagp != NULL) 2164 *fflagp = 0; 2165 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2166 return (error); 2167 if (fp->f_type != DTYPE_SOCKET) { 2168 error = ENOTSOCK; 2169 } else { 2170 *spp = fp->f_data; 2171 if (fflagp) 2172 *fflagp = fp->f_flag; 2173 SOCK_LOCK(*spp); 2174 soref(*spp); 2175 SOCK_UNLOCK(*spp); 2176 } 2177 FILEDESC_SUNLOCK(td->td_proc->p_fd); 2178 return (error); 2179 } 2180 2181 /* 2182 * Drop the reference count on the socket and XXX release the SX lock in the 2183 * future. The last reference closes the socket. 2184 * 2185 * XXXRW: fputsock() is deprecated, see comment for fgetsock(). 2186 */ 2187 void 2188 fputsock(struct socket *so) 2189 { 2190 2191 ACCEPT_LOCK(); 2192 SOCK_LOCK(so); 2193 sorele(so); 2194 } 2195 2196 /* 2197 * Handle the last reference to a file being closed. 2198 */ 2199 int 2200 _fdrop(struct file *fp, struct thread *td) 2201 { 2202 int error; 2203 2204 error = 0; 2205 if (fp->f_count != 0) 2206 panic("fdrop: count %d", fp->f_count); 2207 if (fp->f_ops != &badfileops) 2208 error = fo_close(fp, td); 2209 /* 2210 * The f_cdevpriv cannot be assigned non-NULL value while we 2211 * are destroying the file. 2212 */ 2213 if (fp->f_cdevpriv != NULL) 2214 devfs_fpdrop(fp); 2215 atomic_subtract_int(&openfiles, 1); 2216 crfree(fp->f_cred); 2217 uma_zfree(file_zone, fp); 2218 2219 return (error); 2220 } 2221 2222 /* 2223 * Apply an advisory lock on a file descriptor. 2224 * 2225 * Just attempt to get a record lock of the requested type on the entire file 2226 * (l_whence = SEEK_SET, l_start = 0, l_len = 0). 2227 */ 2228 #ifndef _SYS_SYSPROTO_H_ 2229 struct flock_args { 2230 int fd; 2231 int how; 2232 }; 2233 #endif 2234 /* ARGSUSED */ 2235 int 2236 flock(struct thread *td, struct flock_args *uap) 2237 { 2238 struct file *fp; 2239 struct vnode *vp; 2240 struct flock lf; 2241 int vfslocked; 2242 int error; 2243 2244 if ((error = fget(td, uap->fd, &fp)) != 0) 2245 return (error); 2246 if (fp->f_type != DTYPE_VNODE) { 2247 fdrop(fp, td); 2248 return (EOPNOTSUPP); 2249 } 2250 2251 vp = fp->f_vnode; 2252 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2253 lf.l_whence = SEEK_SET; 2254 lf.l_start = 0; 2255 lf.l_len = 0; 2256 if (uap->how & LOCK_UN) { 2257 lf.l_type = F_UNLCK; 2258 atomic_clear_int(&fp->f_flag, FHASLOCK); 2259 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 2260 goto done2; 2261 } 2262 if (uap->how & LOCK_EX) 2263 lf.l_type = F_WRLCK; 2264 else if (uap->how & LOCK_SH) 2265 lf.l_type = F_RDLCK; 2266 else { 2267 error = EBADF; 2268 goto done2; 2269 } 2270 atomic_set_int(&fp->f_flag, FHASLOCK); 2271 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 2272 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); 2273 done2: 2274 fdrop(fp, td); 2275 VFS_UNLOCK_GIANT(vfslocked); 2276 return (error); 2277 } 2278 /* 2279 * Duplicate the specified descriptor to a free descriptor. 2280 */ 2281 int 2282 dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error) 2283 { 2284 struct file *wfp; 2285 struct file *fp; 2286 2287 /* 2288 * If the to-be-dup'd fd number is greater than the allowed number 2289 * of file descriptors, or the fd to be dup'd has already been 2290 * closed, then reject. 2291 */ 2292 FILEDESC_XLOCK(fdp); 2293 if (dfd < 0 || dfd >= fdp->fd_nfiles || 2294 (wfp = fdp->fd_ofiles[dfd]) == NULL) { 2295 FILEDESC_XUNLOCK(fdp); 2296 return (EBADF); 2297 } 2298 2299 /* 2300 * There are two cases of interest here. 2301 * 2302 * For ENODEV simply dup (dfd) to file descriptor (indx) and return. 2303 * 2304 * For ENXIO steal away the file structure from (dfd) and store it in 2305 * (indx). (dfd) is effectively closed by this operation. 2306 * 2307 * Any other error code is just returned. 2308 */ 2309 switch (error) { 2310 case ENODEV: 2311 /* 2312 * Check that the mode the file is being opened for is a 2313 * subset of the mode of the existing descriptor. 2314 */ 2315 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { 2316 FILEDESC_XUNLOCK(fdp); 2317 return (EACCES); 2318 } 2319 fp = fdp->fd_ofiles[indx]; 2320 fdp->fd_ofiles[indx] = wfp; 2321 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2322 if (fp == NULL) 2323 fdused(fdp, indx); 2324 fhold(wfp); 2325 FILEDESC_XUNLOCK(fdp); 2326 if (fp != NULL) 2327 /* 2328 * We now own the reference to fp that the ofiles[] 2329 * array used to own. Release it. 2330 */ 2331 fdrop(fp, td); 2332 return (0); 2333 2334 case ENXIO: 2335 /* 2336 * Steal away the file pointer from dfd and stuff it into indx. 2337 */ 2338 fp = fdp->fd_ofiles[indx]; 2339 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 2340 fdp->fd_ofiles[dfd] = NULL; 2341 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2342 fdp->fd_ofileflags[dfd] = 0; 2343 fdunused(fdp, dfd); 2344 if (fp == NULL) 2345 fdused(fdp, indx); 2346 FILEDESC_XUNLOCK(fdp); 2347 2348 /* 2349 * We now own the reference to fp that the ofiles[] array 2350 * used to own. Release it. 2351 */ 2352 if (fp != NULL) 2353 fdrop(fp, td); 2354 return (0); 2355 2356 default: 2357 FILEDESC_XUNLOCK(fdp); 2358 return (error); 2359 } 2360 /* NOTREACHED */ 2361 } 2362 2363 /* 2364 * Scan all active processes to see if any of them have a current or root 2365 * directory of `olddp'. If so, replace them with the new mount point. 2366 */ 2367 void 2368 mountcheckdirs(struct vnode *olddp, struct vnode *newdp) 2369 { 2370 struct filedesc *fdp; 2371 struct proc *p; 2372 int nrele; 2373 2374 if (vrefcnt(olddp) == 1) 2375 return; 2376 sx_slock(&allproc_lock); 2377 FOREACH_PROC_IN_SYSTEM(p) { 2378 fdp = fdhold(p); 2379 if (fdp == NULL) 2380 continue; 2381 nrele = 0; 2382 FILEDESC_XLOCK(fdp); 2383 if (fdp->fd_cdir == olddp) { 2384 vref(newdp); 2385 fdp->fd_cdir = newdp; 2386 nrele++; 2387 } 2388 if (fdp->fd_rdir == olddp) { 2389 vref(newdp); 2390 fdp->fd_rdir = newdp; 2391 nrele++; 2392 } 2393 FILEDESC_XUNLOCK(fdp); 2394 fddrop(fdp); 2395 while (nrele--) 2396 vrele(olddp); 2397 } 2398 sx_sunlock(&allproc_lock); 2399 if (rootvnode == olddp) { 2400 vrele(rootvnode); 2401 vref(newdp); 2402 rootvnode = newdp; 2403 } 2404 } 2405 2406 struct filedesc_to_leader * 2407 filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader) 2408 { 2409 struct filedesc_to_leader *fdtol; 2410 2411 MALLOC(fdtol, struct filedesc_to_leader *, 2412 sizeof(struct filedesc_to_leader), 2413 M_FILEDESC_TO_LEADER, 2414 M_WAITOK); 2415 fdtol->fdl_refcount = 1; 2416 fdtol->fdl_holdcount = 0; 2417 fdtol->fdl_wakeup = 0; 2418 fdtol->fdl_leader = leader; 2419 if (old != NULL) { 2420 FILEDESC_XLOCK(fdp); 2421 fdtol->fdl_next = old->fdl_next; 2422 fdtol->fdl_prev = old; 2423 old->fdl_next = fdtol; 2424 fdtol->fdl_next->fdl_prev = fdtol; 2425 FILEDESC_XUNLOCK(fdp); 2426 } else { 2427 fdtol->fdl_next = fdtol; 2428 fdtol->fdl_prev = fdtol; 2429 } 2430 return (fdtol); 2431 } 2432 2433 /* 2434 * Get file structures globally. 2435 */ 2436 static int 2437 sysctl_kern_file(SYSCTL_HANDLER_ARGS) 2438 { 2439 struct xfile xf; 2440 struct filedesc *fdp; 2441 struct file *fp; 2442 struct proc *p; 2443 int error, n; 2444 2445 error = sysctl_wire_old_buffer(req, 0); 2446 if (error != 0) 2447 return (error); 2448 if (req->oldptr == NULL) { 2449 n = 0; 2450 sx_slock(&allproc_lock); 2451 FOREACH_PROC_IN_SYSTEM(p) { 2452 if (p->p_state == PRS_NEW) 2453 continue; 2454 fdp = fdhold(p); 2455 if (fdp == NULL) 2456 continue; 2457 /* overestimates sparse tables. */ 2458 if (fdp->fd_lastfile > 0) 2459 n += fdp->fd_lastfile; 2460 fddrop(fdp); 2461 } 2462 sx_sunlock(&allproc_lock); 2463 return (SYSCTL_OUT(req, 0, n * sizeof(xf))); 2464 } 2465 error = 0; 2466 bzero(&xf, sizeof(xf)); 2467 xf.xf_size = sizeof(xf); 2468 sx_slock(&allproc_lock); 2469 FOREACH_PROC_IN_SYSTEM(p) { 2470 if (p->p_state == PRS_NEW) 2471 continue; 2472 PROC_LOCK(p); 2473 if (p_cansee(req->td, p) != 0) { 2474 PROC_UNLOCK(p); 2475 continue; 2476 } 2477 xf.xf_pid = p->p_pid; 2478 xf.xf_uid = p->p_ucred->cr_uid; 2479 PROC_UNLOCK(p); 2480 fdp = fdhold(p); 2481 if (fdp == NULL) 2482 continue; 2483 FILEDESC_SLOCK(fdp); 2484 for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) { 2485 if ((fp = fdp->fd_ofiles[n]) == NULL) 2486 continue; 2487 xf.xf_fd = n; 2488 xf.xf_file = fp; 2489 xf.xf_data = fp->f_data; 2490 xf.xf_vnode = fp->f_vnode; 2491 xf.xf_type = fp->f_type; 2492 xf.xf_count = fp->f_count; 2493 xf.xf_msgcount = 0; 2494 xf.xf_offset = fp->f_offset; 2495 xf.xf_flag = fp->f_flag; 2496 error = SYSCTL_OUT(req, &xf, sizeof(xf)); 2497 if (error) 2498 break; 2499 } 2500 FILEDESC_SUNLOCK(fdp); 2501 fddrop(fdp); 2502 if (error) 2503 break; 2504 } 2505 sx_sunlock(&allproc_lock); 2506 return (error); 2507 } 2508 2509 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, 2510 0, 0, sysctl_kern_file, "S,xfile", "Entire file table"); 2511 2512 static int 2513 export_vnode_for_sysctl(struct vnode *vp, int type, 2514 struct kinfo_file *kif, struct filedesc *fdp, struct sysctl_req *req) 2515 { 2516 int error; 2517 char *fullpath, *freepath; 2518 int vfslocked; 2519 2520 bzero(kif, sizeof(*kif)); 2521 kif->kf_structsize = sizeof(*kif); 2522 2523 vref(vp); 2524 kif->kf_fd = type; 2525 kif->kf_type = KF_TYPE_VNODE; 2526 /* This function only handles directories. */ 2527 KASSERT(vp->v_type == VDIR, ("export_vnode_for_sysctl: vnode not directory")); 2528 kif->kf_vnode_type = KF_VTYPE_VDIR; 2529 2530 /* 2531 * This is not a true file descriptor, so we set a bogus refcount 2532 * and offset to indicate these fields should be ignored. 2533 */ 2534 kif->kf_ref_count = -1; 2535 kif->kf_offset = -1; 2536 2537 freepath = NULL; 2538 fullpath = "-"; 2539 FILEDESC_SUNLOCK(fdp); 2540 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2541 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2542 vn_fullpath(curthread, vp, &fullpath, &freepath); 2543 vput(vp); 2544 VFS_UNLOCK_GIANT(vfslocked); 2545 strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path)); 2546 if (freepath != NULL) 2547 free(freepath, M_TEMP); 2548 error = SYSCTL_OUT(req, kif, sizeof(*kif)); 2549 FILEDESC_SLOCK(fdp); 2550 return (error); 2551 } 2552 2553 /* 2554 * Get per-process file descriptors for use by procstat(1), et al. 2555 */ 2556 static int 2557 sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) 2558 { 2559 char *fullpath, *freepath; 2560 struct kinfo_file *kif; 2561 struct filedesc *fdp; 2562 int error, i, *name; 2563 struct socket *so; 2564 struct vnode *vp; 2565 struct file *fp; 2566 struct proc *p; 2567 int vfslocked; 2568 2569 name = (int *)arg1; 2570 if ((p = pfind((pid_t)name[0])) == NULL) 2571 return (ESRCH); 2572 if ((error = p_candebug(curthread, p))) { 2573 PROC_UNLOCK(p); 2574 return (error); 2575 } 2576 fdp = fdhold(p); 2577 PROC_UNLOCK(p); 2578 if (fdp == NULL) 2579 return (ENOENT); 2580 kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK); 2581 FILEDESC_SLOCK(fdp); 2582 if (fdp->fd_cdir != NULL) 2583 export_vnode_for_sysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif, 2584 fdp, req); 2585 if (fdp->fd_rdir != NULL) 2586 export_vnode_for_sysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif, 2587 fdp, req); 2588 if (fdp->fd_jdir != NULL) 2589 export_vnode_for_sysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif, 2590 fdp, req); 2591 for (i = 0; i < fdp->fd_nfiles; i++) { 2592 if ((fp = fdp->fd_ofiles[i]) == NULL) 2593 continue; 2594 bzero(kif, sizeof(*kif)); 2595 kif->kf_structsize = sizeof(*kif); 2596 vp = NULL; 2597 so = NULL; 2598 kif->kf_fd = i; 2599 switch (fp->f_type) { 2600 case DTYPE_VNODE: 2601 kif->kf_type = KF_TYPE_VNODE; 2602 vp = fp->f_vnode; 2603 break; 2604 2605 case DTYPE_SOCKET: 2606 kif->kf_type = KF_TYPE_SOCKET; 2607 so = fp->f_data; 2608 break; 2609 2610 case DTYPE_PIPE: 2611 kif->kf_type = KF_TYPE_PIPE; 2612 break; 2613 2614 case DTYPE_FIFO: 2615 kif->kf_type = KF_TYPE_FIFO; 2616 vp = fp->f_vnode; 2617 vref(vp); 2618 break; 2619 2620 case DTYPE_KQUEUE: 2621 kif->kf_type = KF_TYPE_KQUEUE; 2622 break; 2623 2624 case DTYPE_CRYPTO: 2625 kif->kf_type = KF_TYPE_CRYPTO; 2626 break; 2627 2628 case DTYPE_MQUEUE: 2629 kif->kf_type = KF_TYPE_MQUEUE; 2630 break; 2631 2632 case DTYPE_SHM: 2633 kif->kf_type = KF_TYPE_SHM; 2634 break; 2635 2636 default: 2637 kif->kf_type = KF_TYPE_UNKNOWN; 2638 break; 2639 } 2640 kif->kf_ref_count = fp->f_count; 2641 if (fp->f_flag & FREAD) 2642 kif->kf_flags |= KF_FLAG_READ; 2643 if (fp->f_flag & FWRITE) 2644 kif->kf_flags |= KF_FLAG_WRITE; 2645 if (fp->f_flag & FAPPEND) 2646 kif->kf_flags |= KF_FLAG_APPEND; 2647 if (fp->f_flag & FASYNC) 2648 kif->kf_flags |= KF_FLAG_ASYNC; 2649 if (fp->f_flag & FFSYNC) 2650 kif->kf_flags |= KF_FLAG_FSYNC; 2651 if (fp->f_flag & FNONBLOCK) 2652 kif->kf_flags |= KF_FLAG_NONBLOCK; 2653 if (fp->f_flag & O_DIRECT) 2654 kif->kf_flags |= KF_FLAG_DIRECT; 2655 if (fp->f_flag & FHASLOCK) 2656 kif->kf_flags |= KF_FLAG_HASLOCK; 2657 kif->kf_offset = fp->f_offset; 2658 if (vp != NULL) { 2659 vref(vp); 2660 switch (vp->v_type) { 2661 case VNON: 2662 kif->kf_vnode_type = KF_VTYPE_VNON; 2663 break; 2664 case VREG: 2665 kif->kf_vnode_type = KF_VTYPE_VREG; 2666 break; 2667 case VDIR: 2668 kif->kf_vnode_type = KF_VTYPE_VDIR; 2669 break; 2670 case VBLK: 2671 kif->kf_vnode_type = KF_VTYPE_VBLK; 2672 break; 2673 case VCHR: 2674 kif->kf_vnode_type = KF_VTYPE_VCHR; 2675 break; 2676 case VLNK: 2677 kif->kf_vnode_type = KF_VTYPE_VLNK; 2678 break; 2679 case VSOCK: 2680 kif->kf_vnode_type = KF_VTYPE_VSOCK; 2681 break; 2682 case VFIFO: 2683 kif->kf_vnode_type = KF_VTYPE_VFIFO; 2684 break; 2685 case VBAD: 2686 kif->kf_vnode_type = KF_VTYPE_VBAD; 2687 break; 2688 default: 2689 kif->kf_vnode_type = KF_VTYPE_UNKNOWN; 2690 break; 2691 } 2692 /* 2693 * It is OK to drop the filedesc lock here as we will 2694 * re-validate and re-evaluate its properties when 2695 * the loop continues. 2696 */ 2697 freepath = NULL; 2698 fullpath = "-"; 2699 FILEDESC_SUNLOCK(fdp); 2700 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2701 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2702 vn_fullpath(curthread, vp, &fullpath, &freepath); 2703 vput(vp); 2704 VFS_UNLOCK_GIANT(vfslocked); 2705 strlcpy(kif->kf_path, fullpath, 2706 sizeof(kif->kf_path)); 2707 if (freepath != NULL) 2708 free(freepath, M_TEMP); 2709 FILEDESC_SLOCK(fdp); 2710 } 2711 if (so != NULL) { 2712 struct sockaddr *sa; 2713 2714 if (so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa) 2715 == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) { 2716 bcopy(sa, &kif->kf_sa_local, sa->sa_len); 2717 free(sa, M_SONAME); 2718 } 2719 if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa) 2720 == 00 && sa->sa_len <= sizeof(kif->kf_sa_peer)) { 2721 bcopy(sa, &kif->kf_sa_peer, sa->sa_len); 2722 free(sa, M_SONAME); 2723 } 2724 kif->kf_sock_domain = 2725 so->so_proto->pr_domain->dom_family; 2726 kif->kf_sock_type = so->so_type; 2727 kif->kf_sock_protocol = so->so_proto->pr_protocol; 2728 } 2729 error = SYSCTL_OUT(req, kif, sizeof(*kif)); 2730 if (error) 2731 break; 2732 } 2733 FILEDESC_SUNLOCK(fdp); 2734 fddrop(fdp); 2735 free(kif, M_TEMP); 2736 return (0); 2737 } 2738 2739 static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc, CTLFLAG_RD, 2740 sysctl_kern_proc_filedesc, "Process filedesc entries"); 2741 2742 #ifdef DDB 2743 /* 2744 * For the purposes of debugging, generate a human-readable string for the 2745 * file type. 2746 */ 2747 static const char * 2748 file_type_to_name(short type) 2749 { 2750 2751 switch (type) { 2752 case 0: 2753 return ("zero"); 2754 case DTYPE_VNODE: 2755 return ("vnod"); 2756 case DTYPE_SOCKET: 2757 return ("sock"); 2758 case DTYPE_PIPE: 2759 return ("pipe"); 2760 case DTYPE_FIFO: 2761 return ("fifo"); 2762 case DTYPE_KQUEUE: 2763 return ("kque"); 2764 case DTYPE_CRYPTO: 2765 return ("crpt"); 2766 case DTYPE_MQUEUE: 2767 return ("mque"); 2768 case DTYPE_SHM: 2769 return ("shm"); 2770 default: 2771 return ("unkn"); 2772 } 2773 } 2774 2775 /* 2776 * For the purposes of debugging, identify a process (if any, perhaps one of 2777 * many) that references the passed file in its file descriptor array. Return 2778 * NULL if none. 2779 */ 2780 static struct proc * 2781 file_to_first_proc(struct file *fp) 2782 { 2783 struct filedesc *fdp; 2784 struct proc *p; 2785 int n; 2786 2787 FOREACH_PROC_IN_SYSTEM(p) { 2788 if (p->p_state == PRS_NEW) 2789 continue; 2790 fdp = p->p_fd; 2791 if (fdp == NULL) 2792 continue; 2793 for (n = 0; n < fdp->fd_nfiles; n++) { 2794 if (fp == fdp->fd_ofiles[n]) 2795 return (p); 2796 } 2797 } 2798 return (NULL); 2799 } 2800 2801 static void 2802 db_print_file(struct file *fp, int header) 2803 { 2804 struct proc *p; 2805 2806 if (header) 2807 db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n", 2808 "File", "Type", "Data", "Flag", "GCFl", "Count", 2809 "MCount", "Vnode", "FPID", "FCmd"); 2810 p = file_to_first_proc(fp); 2811 db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp, 2812 file_type_to_name(fp->f_type), fp->f_data, fp->f_flag, 2813 0, fp->f_count, 0, fp->f_vnode, 2814 p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-"); 2815 } 2816 2817 DB_SHOW_COMMAND(file, db_show_file) 2818 { 2819 struct file *fp; 2820 2821 if (!have_addr) { 2822 db_printf("usage: show file <addr>\n"); 2823 return; 2824 } 2825 fp = (struct file *)addr; 2826 db_print_file(fp, 1); 2827 } 2828 2829 DB_SHOW_COMMAND(files, db_show_files) 2830 { 2831 struct filedesc *fdp; 2832 struct file *fp; 2833 struct proc *p; 2834 int header; 2835 int n; 2836 2837 header = 1; 2838 FOREACH_PROC_IN_SYSTEM(p) { 2839 if (p->p_state == PRS_NEW) 2840 continue; 2841 if ((fdp = p->p_fd) == NULL) 2842 continue; 2843 for (n = 0; n < fdp->fd_nfiles; ++n) { 2844 if ((fp = fdp->fd_ofiles[n]) == NULL) 2845 continue; 2846 db_print_file(fp, header); 2847 header = 0; 2848 } 2849 } 2850 } 2851 #endif 2852 2853 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, 2854 &maxfilesperproc, 0, "Maximum files allowed open per process"); 2855 2856 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, 2857 &maxfiles, 0, "Maximum number of files"); 2858 2859 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, 2860 __DEVOLATILE(int *, &openfiles), 0, "System-wide number of open files"); 2861 2862 /* ARGSUSED*/ 2863 static void 2864 filelistinit(void *dummy) 2865 { 2866 2867 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, 2868 NULL, NULL, UMA_ALIGN_PTR, 0); 2869 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); 2870 mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF); 2871 } 2872 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL); 2873 2874 /*-------------------------------------------------------------------*/ 2875 2876 static int 2877 badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) 2878 { 2879 2880 return (EBADF); 2881 } 2882 2883 static int 2884 badfo_truncate(struct file *fp, off_t length, struct ucred *active_cred, struct thread *td) 2885 { 2886 2887 return (EINVAL); 2888 } 2889 2890 static int 2891 badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td) 2892 { 2893 2894 return (EBADF); 2895 } 2896 2897 static int 2898 badfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) 2899 { 2900 2901 return (0); 2902 } 2903 2904 static int 2905 badfo_kqfilter(struct file *fp, struct knote *kn) 2906 { 2907 2908 return (EBADF); 2909 } 2910 2911 static int 2912 badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td) 2913 { 2914 2915 return (EBADF); 2916 } 2917 2918 static int 2919 badfo_close(struct file *fp, struct thread *td) 2920 { 2921 2922 return (EBADF); 2923 } 2924 2925 struct fileops badfileops = { 2926 .fo_read = badfo_readwrite, 2927 .fo_write = badfo_readwrite, 2928 .fo_truncate = badfo_truncate, 2929 .fo_ioctl = badfo_ioctl, 2930 .fo_poll = badfo_poll, 2931 .fo_kqfilter = badfo_kqfilter, 2932 .fo_stat = badfo_stat, 2933 .fo_close = badfo_close, 2934 }; 2935 2936 2937 /*-------------------------------------------------------------------*/ 2938 2939 /* 2940 * File Descriptor pseudo-device driver (/dev/fd/). 2941 * 2942 * Opening minor device N dup()s the file (if any) connected to file 2943 * descriptor N belonging to the calling process. Note that this driver 2944 * consists of only the ``open()'' routine, because all subsequent 2945 * references to this file will be direct to the other driver. 2946 * 2947 * XXX: we could give this one a cloning event handler if necessary. 2948 */ 2949 2950 /* ARGSUSED */ 2951 static int 2952 fdopen(struct cdev *dev, int mode, int type, struct thread *td) 2953 { 2954 2955 /* 2956 * XXX Kludge: set curthread->td_dupfd to contain the value of the 2957 * the file descriptor being sought for duplication. The error 2958 * return ensures that the vnode for this device will be released 2959 * by vn_open. Open will detect this special error and take the 2960 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 2961 * will simply report the error. 2962 */ 2963 td->td_dupfd = dev2unit(dev); 2964 return (ENODEV); 2965 } 2966 2967 static struct cdevsw fildesc_cdevsw = { 2968 .d_version = D_VERSION, 2969 .d_flags = D_NEEDGIANT, 2970 .d_open = fdopen, 2971 .d_name = "FD", 2972 }; 2973 2974 static void 2975 fildesc_drvinit(void *unused) 2976 { 2977 struct cdev *dev; 2978 2979 dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0"); 2980 make_dev_alias(dev, "stdin"); 2981 dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1"); 2982 make_dev_alias(dev, "stdout"); 2983 dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2"); 2984 make_dev_alias(dev, "stderr"); 2985 } 2986 2987 SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL); 2988