1 /* 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 39 */ 40 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 #include "opt_compat.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/syscallsubr.h> 49 #include <sys/sysproto.h> 50 #include <sys/conf.h> 51 #include <sys/filedesc.h> 52 #include <sys/lock.h> 53 #include <sys/kernel.h> 54 #include <sys/limits.h> 55 #include <sys/malloc.h> 56 #include <sys/mutex.h> 57 #include <sys/sysctl.h> 58 #include <sys/vnode.h> 59 #include <sys/mount.h> 60 #include <sys/proc.h> 61 #include <sys/namei.h> 62 #include <sys/file.h> 63 #include <sys/stat.h> 64 #include <sys/filio.h> 65 #include <sys/fcntl.h> 66 #include <sys/unistd.h> 67 #include <sys/resourcevar.h> 68 #include <sys/event.h> 69 #include <sys/sx.h> 70 #include <sys/socketvar.h> 71 #include <sys/signalvar.h> 72 73 #include <vm/vm.h> 74 #include <vm/vm_extern.h> 75 #include <vm/uma.h> 76 77 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table"); 78 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "file desc to leader", 79 "file desc to leader structures"); 80 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); 81 82 static uma_zone_t file_zone; 83 84 static d_open_t fdopen; 85 #define NUMFDESC 64 86 87 #define CDEV_MAJOR 22 88 static struct cdevsw fildesc_cdevsw = { 89 .d_open = fdopen, 90 .d_name = "FD", 91 .d_maj = CDEV_MAJOR, 92 }; 93 94 /* How to treat 'new' parameter when allocating a fd for do_dup(). */ 95 enum dup_type { DUP_VARIABLE, DUP_FIXED }; 96 97 static int do_dup(struct thread *td, enum dup_type type, int old, int new, 98 register_t *retval); 99 100 /* 101 * Descriptor management. 102 */ 103 struct filelist filehead; /* head of list of open files */ 104 int nfiles; /* actual number of open files */ 105 struct sx filelist_lock; /* sx to protect filelist */ 106 struct mtx sigio_lock; /* mtx to protect pointers to sigio */ 107 108 /* 109 * System calls on descriptors. 110 */ 111 #ifndef _SYS_SYSPROTO_H_ 112 struct getdtablesize_args { 113 int dummy; 114 }; 115 #endif 116 /* 117 * MPSAFE 118 */ 119 /* ARGSUSED */ 120 int 121 getdtablesize(td, uap) 122 struct thread *td; 123 struct getdtablesize_args *uap; 124 { 125 struct proc *p = td->td_proc; 126 127 mtx_lock(&Giant); 128 td->td_retval[0] = 129 min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc); 130 mtx_unlock(&Giant); 131 return (0); 132 } 133 134 /* 135 * Duplicate a file descriptor to a particular value. 136 * 137 * note: keep in mind that a potential race condition exists when closing 138 * descriptors from a shared descriptor table (via rfork). 139 */ 140 #ifndef _SYS_SYSPROTO_H_ 141 struct dup2_args { 142 u_int from; 143 u_int to; 144 }; 145 #endif 146 /* 147 * MPSAFE 148 */ 149 /* ARGSUSED */ 150 int 151 dup2(td, uap) 152 struct thread *td; 153 struct dup2_args *uap; 154 { 155 156 return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to, 157 td->td_retval)); 158 } 159 160 /* 161 * Duplicate a file descriptor. 162 */ 163 #ifndef _SYS_SYSPROTO_H_ 164 struct dup_args { 165 u_int fd; 166 }; 167 #endif 168 /* 169 * MPSAFE 170 */ 171 /* ARGSUSED */ 172 int 173 dup(td, uap) 174 struct thread *td; 175 struct dup_args *uap; 176 { 177 178 return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval)); 179 } 180 181 /* 182 * The file control system call. 183 */ 184 #ifndef _SYS_SYSPROTO_H_ 185 struct fcntl_args { 186 int fd; 187 int cmd; 188 long arg; 189 }; 190 #endif 191 /* 192 * MPSAFE 193 */ 194 /* ARGSUSED */ 195 int 196 fcntl(td, uap) 197 struct thread *td; 198 struct fcntl_args *uap; 199 { 200 struct flock fl; 201 intptr_t arg; 202 int error; 203 204 error = 0; 205 switch (uap->cmd) { 206 case F_GETLK: 207 case F_SETLK: 208 case F_SETLKW: 209 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl)); 210 arg = (intptr_t)&fl; 211 break; 212 default: 213 arg = uap->arg; 214 break; 215 } 216 if (error) 217 return (error); 218 error = kern_fcntl(td, uap->fd, uap->cmd, arg); 219 if (error) 220 return (error); 221 if (uap->cmd == F_GETLK) 222 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl)); 223 return (error); 224 } 225 226 int 227 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) 228 { 229 struct filedesc *fdp; 230 struct flock *flp; 231 struct file *fp; 232 struct proc *p; 233 char *pop; 234 struct vnode *vp; 235 u_int newmin; 236 int error, flg, tmp; 237 238 error = 0; 239 flg = F_POSIX; 240 p = td->td_proc; 241 fdp = p->p_fd; 242 mtx_lock(&Giant); 243 FILEDESC_LOCK(fdp); 244 if ((unsigned)fd >= fdp->fd_nfiles || 245 (fp = fdp->fd_ofiles[fd]) == NULL) { 246 FILEDESC_UNLOCK(fdp); 247 error = EBADF; 248 goto done2; 249 } 250 pop = &fdp->fd_ofileflags[fd]; 251 252 switch (cmd) { 253 case F_DUPFD: 254 FILEDESC_UNLOCK(fdp); 255 newmin = arg; 256 if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 257 newmin >= maxfilesperproc) { 258 error = EINVAL; 259 break; 260 } 261 error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval); 262 break; 263 264 case F_GETFD: 265 td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; 266 FILEDESC_UNLOCK(fdp); 267 break; 268 269 case F_SETFD: 270 *pop = (*pop &~ UF_EXCLOSE) | 271 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); 272 FILEDESC_UNLOCK(fdp); 273 break; 274 275 case F_GETFL: 276 FILE_LOCK(fp); 277 FILEDESC_UNLOCK(fdp); 278 td->td_retval[0] = OFLAGS(fp->f_flag); 279 FILE_UNLOCK(fp); 280 break; 281 282 case F_SETFL: 283 FILE_LOCK(fp); 284 FILEDESC_UNLOCK(fdp); 285 fhold_locked(fp); 286 fp->f_flag &= ~FCNTLFLAGS; 287 fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; 288 FILE_UNLOCK(fp); 289 tmp = fp->f_flag & FNONBLOCK; 290 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 291 if (error) { 292 fdrop(fp, td); 293 break; 294 } 295 tmp = fp->f_flag & FASYNC; 296 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); 297 if (error == 0) { 298 fdrop(fp, td); 299 break; 300 } 301 FILE_LOCK(fp); 302 fp->f_flag &= ~FNONBLOCK; 303 FILE_UNLOCK(fp); 304 tmp = 0; 305 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 306 fdrop(fp, td); 307 break; 308 309 case F_GETOWN: 310 fhold(fp); 311 FILEDESC_UNLOCK(fdp); 312 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); 313 if (error == 0) 314 td->td_retval[0] = tmp; 315 fdrop(fp, td); 316 break; 317 318 case F_SETOWN: 319 fhold(fp); 320 FILEDESC_UNLOCK(fdp); 321 tmp = arg; 322 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); 323 fdrop(fp, td); 324 break; 325 326 case F_SETLKW: 327 flg |= F_WAIT; 328 /* FALLTHROUGH F_SETLK */ 329 330 case F_SETLK: 331 if (fp->f_type != DTYPE_VNODE) { 332 FILEDESC_UNLOCK(fdp); 333 error = EBADF; 334 break; 335 } 336 337 flp = (struct flock *)arg; 338 if (flp->l_whence == SEEK_CUR) { 339 if (fp->f_offset < 0 || 340 (flp->l_start > 0 && 341 fp->f_offset > OFF_MAX - flp->l_start)) { 342 FILEDESC_UNLOCK(fdp); 343 error = EOVERFLOW; 344 break; 345 } 346 flp->l_start += fp->f_offset; 347 } 348 349 /* 350 * VOP_ADVLOCK() may block. 351 */ 352 fhold(fp); 353 FILEDESC_UNLOCK(fdp); 354 vp = fp->f_vnode; 355 356 switch (flp->l_type) { 357 case F_RDLCK: 358 if ((fp->f_flag & FREAD) == 0) { 359 error = EBADF; 360 break; 361 } 362 PROC_LOCK(p->p_leader); 363 p->p_leader->p_flag |= P_ADVLOCK; 364 PROC_UNLOCK(p->p_leader); 365 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 366 flp, flg); 367 break; 368 case F_WRLCK: 369 if ((fp->f_flag & FWRITE) == 0) { 370 error = EBADF; 371 break; 372 } 373 PROC_LOCK(p->p_leader); 374 p->p_leader->p_flag |= P_ADVLOCK; 375 PROC_UNLOCK(p->p_leader); 376 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 377 flp, flg); 378 break; 379 case F_UNLCK: 380 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, 381 flp, F_POSIX); 382 break; 383 default: 384 error = EINVAL; 385 break; 386 } 387 /* Check for race with close */ 388 FILEDESC_LOCK(fdp); 389 if ((unsigned) fd >= fdp->fd_nfiles || 390 fp != fdp->fd_ofiles[fd]) { 391 FILEDESC_UNLOCK(fdp); 392 flp->l_whence = SEEK_SET; 393 flp->l_start = 0; 394 flp->l_len = 0; 395 flp->l_type = F_UNLCK; 396 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 397 F_UNLCK, flp, F_POSIX); 398 } else 399 FILEDESC_UNLOCK(fdp); 400 fdrop(fp, td); 401 break; 402 403 case F_GETLK: 404 if (fp->f_type != DTYPE_VNODE) { 405 FILEDESC_UNLOCK(fdp); 406 error = EBADF; 407 break; 408 } 409 flp = (struct flock *)arg; 410 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && 411 flp->l_type != F_UNLCK) { 412 FILEDESC_UNLOCK(fdp); 413 error = EINVAL; 414 break; 415 } 416 if (flp->l_whence == SEEK_CUR) { 417 if ((flp->l_start > 0 && 418 fp->f_offset > OFF_MAX - flp->l_start) || 419 (flp->l_start < 0 && 420 fp->f_offset < OFF_MIN - flp->l_start)) { 421 FILEDESC_UNLOCK(fdp); 422 error = EOVERFLOW; 423 break; 424 } 425 flp->l_start += fp->f_offset; 426 } 427 /* 428 * VOP_ADVLOCK() may block. 429 */ 430 fhold(fp); 431 FILEDESC_UNLOCK(fdp); 432 vp = fp->f_vnode; 433 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, 434 F_POSIX); 435 fdrop(fp, td); 436 break; 437 default: 438 FILEDESC_UNLOCK(fdp); 439 error = EINVAL; 440 break; 441 } 442 done2: 443 mtx_unlock(&Giant); 444 return (error); 445 } 446 447 /* 448 * Common code for dup, dup2, and fcntl(F_DUPFD). 449 */ 450 static int 451 do_dup(td, type, old, new, retval) 452 enum dup_type type; 453 int old, new; 454 register_t *retval; 455 struct thread *td; 456 { 457 struct filedesc *fdp; 458 struct proc *p; 459 struct file *fp; 460 struct file *delfp; 461 int error, newfd; 462 int holdleaders; 463 464 p = td->td_proc; 465 fdp = p->p_fd; 466 467 /* 468 * Verify we have a valid descriptor to dup from and possibly to 469 * dup to. 470 */ 471 if (old < 0 || new < 0 || new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 472 new >= maxfilesperproc) 473 return (EBADF); 474 FILEDESC_LOCK(fdp); 475 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) { 476 FILEDESC_UNLOCK(fdp); 477 return (EBADF); 478 } 479 if (type == DUP_FIXED && old == new) { 480 *retval = new; 481 FILEDESC_UNLOCK(fdp); 482 return (0); 483 } 484 fp = fdp->fd_ofiles[old]; 485 fhold(fp); 486 487 /* 488 * Expand the table for the new descriptor if needed. This may 489 * block and drop and reacquire the filedesc lock. 490 */ 491 if (type == DUP_VARIABLE || new >= fdp->fd_nfiles) { 492 error = fdalloc(td, new, &newfd); 493 if (error) { 494 FILEDESC_UNLOCK(fdp); 495 fdrop(fp, td); 496 return (error); 497 } 498 } 499 if (type == DUP_VARIABLE) 500 new = newfd; 501 502 /* 503 * If the old file changed out from under us then treat it as a 504 * bad file descriptor. Userland should do its own locking to 505 * avoid this case. 506 */ 507 if (fdp->fd_ofiles[old] != fp) { 508 if (fdp->fd_ofiles[new] == NULL) { 509 if (new < fdp->fd_freefile) 510 fdp->fd_freefile = new; 511 while (fdp->fd_lastfile > 0 && 512 fdp->fd_ofiles[fdp->fd_lastfile] == NULL) 513 fdp->fd_lastfile--; 514 } 515 FILEDESC_UNLOCK(fdp); 516 fdrop(fp, td); 517 return (EBADF); 518 } 519 KASSERT(old != new, ("new fd is same as old")); 520 521 /* 522 * Save info on the descriptor being overwritten. We have 523 * to do the unmap now, but we cannot close it without 524 * introducing an ownership race for the slot. 525 */ 526 delfp = fdp->fd_ofiles[new]; 527 if (delfp != NULL && p->p_fdtol != NULL) { 528 /* 529 * Ask fdfree() to sleep to ensure that all relevant 530 * process leaders can be traversed in closef(). 531 */ 532 fdp->fd_holdleaderscount++; 533 holdleaders = 1; 534 } else 535 holdleaders = 0; 536 KASSERT(delfp == NULL || type == DUP_FIXED, 537 ("dup() picked an open file")); 538 #if 0 539 if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED)) 540 (void) munmapfd(td, new); 541 #endif 542 543 /* 544 * Duplicate the source descriptor, update lastfile 545 */ 546 fdp->fd_ofiles[new] = fp; 547 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; 548 if (new > fdp->fd_lastfile) 549 fdp->fd_lastfile = new; 550 FILEDESC_UNLOCK(fdp); 551 *retval = new; 552 553 /* 554 * If we dup'd over a valid file, we now own the reference to it 555 * and must dispose of it using closef() semantics (as if a 556 * close() were performed on it). 557 */ 558 if (delfp) { 559 mtx_lock(&Giant); 560 (void) closef(delfp, td); 561 mtx_unlock(&Giant); 562 if (holdleaders) { 563 FILEDESC_LOCK(fdp); 564 fdp->fd_holdleaderscount--; 565 if (fdp->fd_holdleaderscount == 0 && 566 fdp->fd_holdleaderswakeup != 0) { 567 fdp->fd_holdleaderswakeup = 0; 568 wakeup(&fdp->fd_holdleaderscount); 569 } 570 FILEDESC_UNLOCK(fdp); 571 } 572 } 573 return (0); 574 } 575 576 /* 577 * If sigio is on the list associated with a process or process group, 578 * disable signalling from the device, remove sigio from the list and 579 * free sigio. 580 */ 581 void 582 funsetown(sigiop) 583 struct sigio **sigiop; 584 { 585 struct sigio *sigio; 586 587 SIGIO_LOCK(); 588 sigio = *sigiop; 589 if (sigio == NULL) { 590 SIGIO_UNLOCK(); 591 return; 592 } 593 *(sigio->sio_myref) = NULL; 594 if ((sigio)->sio_pgid < 0) { 595 struct pgrp *pg = (sigio)->sio_pgrp; 596 PGRP_LOCK(pg); 597 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, 598 sigio, sio_pgsigio); 599 PGRP_UNLOCK(pg); 600 } else { 601 struct proc *p = (sigio)->sio_proc; 602 PROC_LOCK(p); 603 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, 604 sigio, sio_pgsigio); 605 PROC_UNLOCK(p); 606 } 607 SIGIO_UNLOCK(); 608 crfree(sigio->sio_ucred); 609 FREE(sigio, M_SIGIO); 610 } 611 612 /* 613 * Free a list of sigio structures. 614 * We only need to lock the SIGIO_LOCK because we have made ourselves 615 * inaccessable to callers of fsetown and therefore do not need to lock 616 * the proc or pgrp struct for the list manipulation. 617 */ 618 void 619 funsetownlst(sigiolst) 620 struct sigiolst *sigiolst; 621 { 622 struct proc *p; 623 struct pgrp *pg; 624 struct sigio *sigio; 625 626 sigio = SLIST_FIRST(sigiolst); 627 if (sigio == NULL) 628 return; 629 p = NULL; 630 pg = NULL; 631 632 /* 633 * Every entry of the list should belong 634 * to a single proc or pgrp. 635 */ 636 if (sigio->sio_pgid < 0) { 637 pg = sigio->sio_pgrp; 638 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED); 639 } else /* if (sigio->sio_pgid > 0) */ { 640 p = sigio->sio_proc; 641 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 642 } 643 644 SIGIO_LOCK(); 645 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) { 646 *(sigio->sio_myref) = NULL; 647 if (pg != NULL) { 648 KASSERT(sigio->sio_pgid < 0, 649 ("Proc sigio in pgrp sigio list")); 650 KASSERT(sigio->sio_pgrp == pg, 651 ("Bogus pgrp in sigio list")); 652 PGRP_LOCK(pg); 653 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, 654 sio_pgsigio); 655 PGRP_UNLOCK(pg); 656 } else /* if (p != NULL) */ { 657 KASSERT(sigio->sio_pgid > 0, 658 ("Pgrp sigio in proc sigio list")); 659 KASSERT(sigio->sio_proc == p, 660 ("Bogus proc in sigio list")); 661 PROC_LOCK(p); 662 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, 663 sio_pgsigio); 664 PROC_UNLOCK(p); 665 } 666 SIGIO_UNLOCK(); 667 crfree(sigio->sio_ucred); 668 FREE(sigio, M_SIGIO); 669 SIGIO_LOCK(); 670 } 671 SIGIO_UNLOCK(); 672 } 673 674 /* 675 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). 676 * 677 * After permission checking, add a sigio structure to the sigio list for 678 * the process or process group. 679 */ 680 int 681 fsetown(pgid, sigiop) 682 pid_t pgid; 683 struct sigio **sigiop; 684 { 685 struct proc *proc; 686 struct pgrp *pgrp; 687 struct sigio *sigio; 688 int ret; 689 690 if (pgid == 0) { 691 funsetown(sigiop); 692 return (0); 693 } 694 695 ret = 0; 696 697 /* Allocate and fill in the new sigio out of locks. */ 698 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK); 699 sigio->sio_pgid = pgid; 700 sigio->sio_ucred = crhold(curthread->td_ucred); 701 sigio->sio_myref = sigiop; 702 703 sx_slock(&proctree_lock); 704 if (pgid > 0) { 705 proc = pfind(pgid); 706 if (proc == NULL) { 707 ret = ESRCH; 708 goto fail; 709 } 710 711 /* 712 * Policy - Don't allow a process to FSETOWN a process 713 * in another session. 714 * 715 * Remove this test to allow maximum flexibility or 716 * restrict FSETOWN to the current process or process 717 * group for maximum safety. 718 */ 719 PROC_UNLOCK(proc); 720 if (proc->p_session != curthread->td_proc->p_session) { 721 ret = EPERM; 722 goto fail; 723 } 724 725 pgrp = NULL; 726 } else /* if (pgid < 0) */ { 727 pgrp = pgfind(-pgid); 728 if (pgrp == NULL) { 729 ret = ESRCH; 730 goto fail; 731 } 732 PGRP_UNLOCK(pgrp); 733 734 /* 735 * Policy - Don't allow a process to FSETOWN a process 736 * in another session. 737 * 738 * Remove this test to allow maximum flexibility or 739 * restrict FSETOWN to the current process or process 740 * group for maximum safety. 741 */ 742 if (pgrp->pg_session != curthread->td_proc->p_session) { 743 ret = EPERM; 744 goto fail; 745 } 746 747 proc = NULL; 748 } 749 funsetown(sigiop); 750 if (pgid > 0) { 751 PROC_LOCK(proc); 752 /* 753 * Since funsetownlst() is called without the proctree 754 * locked, we need to check for P_WEXIT. 755 * XXX: is ESRCH correct? 756 */ 757 if ((proc->p_flag & P_WEXIT) != 0) { 758 PROC_UNLOCK(proc); 759 ret = ESRCH; 760 goto fail; 761 } 762 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); 763 sigio->sio_proc = proc; 764 PROC_UNLOCK(proc); 765 } else { 766 PGRP_LOCK(pgrp); 767 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); 768 sigio->sio_pgrp = pgrp; 769 PGRP_UNLOCK(pgrp); 770 } 771 sx_sunlock(&proctree_lock); 772 SIGIO_LOCK(); 773 *sigiop = sigio; 774 SIGIO_UNLOCK(); 775 return (0); 776 777 fail: 778 sx_sunlock(&proctree_lock); 779 crfree(sigio->sio_ucred); 780 FREE(sigio, M_SIGIO); 781 return (ret); 782 } 783 784 /* 785 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). 786 */ 787 pid_t 788 fgetown(sigiop) 789 struct sigio **sigiop; 790 { 791 pid_t pgid; 792 793 SIGIO_LOCK(); 794 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0; 795 SIGIO_UNLOCK(); 796 return (pgid); 797 } 798 799 /* 800 * Close a file descriptor. 801 */ 802 #ifndef _SYS_SYSPROTO_H_ 803 struct close_args { 804 int fd; 805 }; 806 #endif 807 /* 808 * MPSAFE 809 */ 810 /* ARGSUSED */ 811 int 812 close(td, uap) 813 struct thread *td; 814 struct close_args *uap; 815 { 816 struct filedesc *fdp; 817 struct file *fp; 818 int fd, error; 819 int holdleaders; 820 821 fd = uap->fd; 822 error = 0; 823 holdleaders = 0; 824 fdp = td->td_proc->p_fd; 825 mtx_lock(&Giant); 826 FILEDESC_LOCK(fdp); 827 if ((unsigned)fd >= fdp->fd_nfiles || 828 (fp = fdp->fd_ofiles[fd]) == NULL) { 829 FILEDESC_UNLOCK(fdp); 830 error = EBADF; 831 goto done2; 832 } 833 #if 0 834 if (fdp->fd_ofileflags[fd] & UF_MAPPED) 835 (void) munmapfd(td, fd); 836 #endif 837 fdp->fd_ofiles[fd] = NULL; 838 fdp->fd_ofileflags[fd] = 0; 839 if (td->td_proc->p_fdtol != NULL) { 840 /* 841 * Ask fdfree() to sleep to ensure that all relevant 842 * process leaders can be traversed in closef(). 843 */ 844 fdp->fd_holdleaderscount++; 845 holdleaders = 1; 846 } 847 848 /* 849 * we now hold the fp reference that used to be owned by the descriptor 850 * array. 851 */ 852 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) 853 fdp->fd_lastfile--; 854 if (fd < fdp->fd_freefile) 855 fdp->fd_freefile = fd; 856 if (fd < fdp->fd_knlistsize) { 857 FILEDESC_UNLOCK(fdp); 858 knote_fdclose(td, fd); 859 } else 860 FILEDESC_UNLOCK(fdp); 861 862 error = closef(fp, td); 863 done2: 864 mtx_unlock(&Giant); 865 if (holdleaders) { 866 FILEDESC_LOCK(fdp); 867 fdp->fd_holdleaderscount--; 868 if (fdp->fd_holdleaderscount == 0 && 869 fdp->fd_holdleaderswakeup != 0) { 870 fdp->fd_holdleaderswakeup = 0; 871 wakeup(&fdp->fd_holdleaderscount); 872 } 873 FILEDESC_UNLOCK(fdp); 874 } 875 return (error); 876 } 877 878 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 879 /* 880 * Return status information about a file descriptor. 881 */ 882 #ifndef _SYS_SYSPROTO_H_ 883 struct ofstat_args { 884 int fd; 885 struct ostat *sb; 886 }; 887 #endif 888 /* 889 * MPSAFE 890 */ 891 /* ARGSUSED */ 892 int 893 ofstat(td, uap) 894 struct thread *td; 895 struct ofstat_args *uap; 896 { 897 struct file *fp; 898 struct stat ub; 899 struct ostat oub; 900 int error; 901 902 if ((error = fget(td, uap->fd, &fp)) != 0) 903 goto done2; 904 mtx_lock(&Giant); 905 error = fo_stat(fp, &ub, td->td_ucred, td); 906 mtx_unlock(&Giant); 907 if (error == 0) { 908 cvtstat(&ub, &oub); 909 error = copyout(&oub, uap->sb, sizeof(oub)); 910 } 911 fdrop(fp, td); 912 done2: 913 return (error); 914 } 915 #endif /* COMPAT_43 || COMPAT_SUNOS */ 916 917 /* 918 * Return status information about a file descriptor. 919 */ 920 #ifndef _SYS_SYSPROTO_H_ 921 struct fstat_args { 922 int fd; 923 struct stat *sb; 924 }; 925 #endif 926 /* 927 * MPSAFE 928 */ 929 /* ARGSUSED */ 930 int 931 fstat(td, uap) 932 struct thread *td; 933 struct fstat_args *uap; 934 { 935 struct file *fp; 936 struct stat ub; 937 int error; 938 939 if ((error = fget(td, uap->fd, &fp)) != 0) 940 goto done2; 941 mtx_lock(&Giant); 942 error = fo_stat(fp, &ub, td->td_ucred, td); 943 mtx_unlock(&Giant); 944 if (error == 0) 945 error = copyout(&ub, uap->sb, sizeof(ub)); 946 fdrop(fp, td); 947 done2: 948 return (error); 949 } 950 951 /* 952 * Return status information about a file descriptor. 953 */ 954 #ifndef _SYS_SYSPROTO_H_ 955 struct nfstat_args { 956 int fd; 957 struct nstat *sb; 958 }; 959 #endif 960 /* 961 * MPSAFE 962 */ 963 /* ARGSUSED */ 964 int 965 nfstat(td, uap) 966 struct thread *td; 967 struct nfstat_args *uap; 968 { 969 struct file *fp; 970 struct stat ub; 971 struct nstat nub; 972 int error; 973 974 if ((error = fget(td, uap->fd, &fp)) != 0) 975 goto done2; 976 mtx_lock(&Giant); 977 error = fo_stat(fp, &ub, td->td_ucred, td); 978 mtx_unlock(&Giant); 979 if (error == 0) { 980 cvtnstat(&ub, &nub); 981 error = copyout(&nub, uap->sb, sizeof(nub)); 982 } 983 fdrop(fp, td); 984 done2: 985 return (error); 986 } 987 988 /* 989 * Return pathconf information about a file descriptor. 990 */ 991 #ifndef _SYS_SYSPROTO_H_ 992 struct fpathconf_args { 993 int fd; 994 int name; 995 }; 996 #endif 997 /* 998 * MPSAFE 999 */ 1000 /* ARGSUSED */ 1001 int 1002 fpathconf(td, uap) 1003 struct thread *td; 1004 struct fpathconf_args *uap; 1005 { 1006 struct file *fp; 1007 struct vnode *vp; 1008 int error; 1009 1010 if ((error = fget(td, uap->fd, &fp)) != 0) 1011 return (error); 1012 1013 /* If asynchronous I/O is available, it works for all descriptors. */ 1014 if (uap->name == _PC_ASYNC_IO) { 1015 td->td_retval[0] = async_io_version; 1016 goto out; 1017 } 1018 vp = fp->f_vnode; 1019 if (vp != NULL) { 1020 mtx_lock(&Giant); 1021 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1022 error = VOP_PATHCONF(vp, uap->name, td->td_retval); 1023 VOP_UNLOCK(vp, 0, td); 1024 mtx_unlock(&Giant); 1025 } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1026 if (uap->name != _PC_PIPE_BUF) { 1027 error = EINVAL; 1028 } else { 1029 td->td_retval[0] = PIPE_BUF; 1030 error = 0; 1031 } 1032 } else { 1033 error = EOPNOTSUPP; 1034 } 1035 out: 1036 fdrop(fp, td); 1037 return (error); 1038 } 1039 1040 /* 1041 * Allocate a file descriptor for the process. 1042 */ 1043 static int fdexpand; 1044 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, ""); 1045 1046 int 1047 fdalloc(td, want, result) 1048 struct thread *td; 1049 int want; 1050 int *result; 1051 { 1052 struct proc *p = td->td_proc; 1053 struct filedesc *fdp = td->td_proc->p_fd; 1054 int i; 1055 int lim, last, nfiles; 1056 struct file **newofile, **oldofile; 1057 char *newofileflags; 1058 1059 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1060 1061 /* 1062 * Search for a free descriptor starting at the higher 1063 * of want or fd_freefile. If that fails, consider 1064 * expanding the ofile array. 1065 */ 1066 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc); 1067 for (;;) { 1068 last = min(fdp->fd_nfiles, lim); 1069 i = max(want, fdp->fd_freefile); 1070 for (; i < last; i++) { 1071 if (fdp->fd_ofiles[i] == NULL) { 1072 fdp->fd_ofileflags[i] = 0; 1073 if (i > fdp->fd_lastfile) 1074 fdp->fd_lastfile = i; 1075 if (want <= fdp->fd_freefile) 1076 fdp->fd_freefile = i; 1077 *result = i; 1078 return (0); 1079 } 1080 } 1081 1082 /* 1083 * No space in current array. Expand? 1084 */ 1085 if (i >= lim) 1086 return (EMFILE); 1087 if (fdp->fd_nfiles < NDEXTENT) 1088 nfiles = NDEXTENT; 1089 else 1090 nfiles = 2 * fdp->fd_nfiles; 1091 while (nfiles < want) 1092 nfiles <<= 1; 1093 FILEDESC_UNLOCK(fdp); 1094 newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK); 1095 1096 /* 1097 * Deal with file-table extend race that might have 1098 * occurred while filedesc was unlocked. 1099 */ 1100 FILEDESC_LOCK(fdp); 1101 if (fdp->fd_nfiles >= nfiles) { 1102 FILEDESC_UNLOCK(fdp); 1103 free(newofile, M_FILEDESC); 1104 FILEDESC_LOCK(fdp); 1105 continue; 1106 } 1107 newofileflags = (char *) &newofile[nfiles]; 1108 /* 1109 * Copy the existing ofile and ofileflags arrays 1110 * and zero the new portion of each array. 1111 */ 1112 i = fdp->fd_nfiles * sizeof(struct file *); 1113 bcopy(fdp->fd_ofiles, newofile, i); 1114 bzero((char *)newofile + i, 1115 nfiles * sizeof(struct file *) - i); 1116 i = fdp->fd_nfiles * sizeof(char); 1117 bcopy(fdp->fd_ofileflags, newofileflags, i); 1118 bzero(newofileflags + i, nfiles * sizeof(char) - i); 1119 if (fdp->fd_nfiles > NDFILE) 1120 oldofile = fdp->fd_ofiles; 1121 else 1122 oldofile = NULL; 1123 fdp->fd_ofiles = newofile; 1124 fdp->fd_ofileflags = newofileflags; 1125 fdp->fd_nfiles = nfiles; 1126 fdexpand++; 1127 if (oldofile != NULL) { 1128 FILEDESC_UNLOCK(fdp); 1129 free(oldofile, M_FILEDESC); 1130 FILEDESC_LOCK(fdp); 1131 } 1132 } 1133 } 1134 1135 /* 1136 * Check to see whether n user file descriptors 1137 * are available to the process p. 1138 */ 1139 int 1140 fdavail(td, n) 1141 struct thread *td; 1142 int n; 1143 { 1144 struct proc *p = td->td_proc; 1145 struct filedesc *fdp = td->td_proc->p_fd; 1146 struct file **fpp; 1147 int i, lim, last; 1148 1149 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1150 1151 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc); 1152 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) 1153 return (1); 1154 last = min(fdp->fd_nfiles, lim); 1155 fpp = &fdp->fd_ofiles[fdp->fd_freefile]; 1156 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) { 1157 if (*fpp == NULL && --n <= 0) 1158 return (1); 1159 } 1160 return (0); 1161 } 1162 1163 /* 1164 * Create a new open file structure and allocate 1165 * a file decriptor for the process that refers to it. 1166 * We add one reference to the file for the descriptor table 1167 * and one reference for resultfp. This is to prevent us being 1168 * prempted and the entry in the descriptor table closed after 1169 * we release the FILEDESC lock. 1170 */ 1171 int 1172 falloc(td, resultfp, resultfd) 1173 struct thread *td; 1174 struct file **resultfp; 1175 int *resultfd; 1176 { 1177 struct proc *p = td->td_proc; 1178 struct file *fp, *fq; 1179 int error, i; 1180 int maxuserfiles = maxfiles - (maxfiles / 20); 1181 static struct timeval lastfail; 1182 static int curfail; 1183 1184 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO); 1185 sx_xlock(&filelist_lock); 1186 if ((nfiles >= maxuserfiles && td->td_ucred->cr_ruid != 0) 1187 || nfiles >= maxfiles) { 1188 if (ppsratecheck(&lastfail, &curfail, 1)) { 1189 printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n", 1190 td->td_ucred->cr_ruid); 1191 } 1192 sx_xunlock(&filelist_lock); 1193 uma_zfree(file_zone, fp); 1194 return (ENFILE); 1195 } 1196 nfiles++; 1197 1198 /* 1199 * If the process has file descriptor zero open, add the new file 1200 * descriptor to the list of open files at that point, otherwise 1201 * put it at the front of the list of open files. 1202 */ 1203 fp->f_mtxp = mtx_pool_alloc(mtxpool_sleep); 1204 fp->f_count = 1; 1205 if (resultfp) 1206 fp->f_count++; 1207 fp->f_cred = crhold(td->td_ucred); 1208 fp->f_ops = &badfileops; 1209 FILEDESC_LOCK(p->p_fd); 1210 if ((fq = p->p_fd->fd_ofiles[0])) { 1211 LIST_INSERT_AFTER(fq, fp, f_list); 1212 } else { 1213 LIST_INSERT_HEAD(&filehead, fp, f_list); 1214 } 1215 sx_xunlock(&filelist_lock); 1216 if ((error = fdalloc(td, 0, &i))) { 1217 FILEDESC_UNLOCK(p->p_fd); 1218 fdrop(fp, td); 1219 if (resultfp) 1220 fdrop(fp, td); 1221 return (error); 1222 } 1223 p->p_fd->fd_ofiles[i] = fp; 1224 FILEDESC_UNLOCK(p->p_fd); 1225 if (resultfp) 1226 *resultfp = fp; 1227 if (resultfd) 1228 *resultfd = i; 1229 return (0); 1230 } 1231 1232 /* 1233 * Free a file descriptor. 1234 */ 1235 void 1236 ffree(fp) 1237 struct file *fp; 1238 { 1239 1240 KASSERT(fp->f_count == 0, ("ffree: fp_fcount not 0!")); 1241 sx_xlock(&filelist_lock); 1242 LIST_REMOVE(fp, f_list); 1243 nfiles--; 1244 sx_xunlock(&filelist_lock); 1245 crfree(fp->f_cred); 1246 uma_zfree(file_zone, fp); 1247 } 1248 1249 /* 1250 * Build a new filedesc structure from another. 1251 * Copy the current, root, and jail root vnode references. 1252 */ 1253 struct filedesc * 1254 fdinit(fdp) 1255 struct filedesc *fdp; 1256 { 1257 struct filedesc0 *newfdp; 1258 1259 MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0), 1260 M_FILEDESC, M_WAITOK | M_ZERO); 1261 mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF); 1262 newfdp->fd_fd.fd_cdir = fdp->fd_cdir; 1263 if (newfdp->fd_fd.fd_cdir) 1264 VREF(newfdp->fd_fd.fd_cdir); 1265 newfdp->fd_fd.fd_rdir = fdp->fd_rdir; 1266 if (newfdp->fd_fd.fd_rdir) 1267 VREF(newfdp->fd_fd.fd_rdir); 1268 newfdp->fd_fd.fd_jdir = fdp->fd_jdir; 1269 if (newfdp->fd_fd.fd_jdir) 1270 VREF(newfdp->fd_fd.fd_jdir); 1271 1272 /* Create the file descriptor table. */ 1273 newfdp->fd_fd.fd_refcnt = 1; 1274 newfdp->fd_fd.fd_cmask = CMASK; 1275 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1276 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 1277 newfdp->fd_fd.fd_nfiles = NDFILE; 1278 newfdp->fd_fd.fd_knlistsize = -1; 1279 return (&newfdp->fd_fd); 1280 } 1281 1282 /* 1283 * Share a filedesc structure. 1284 */ 1285 struct filedesc * 1286 fdshare(fdp) 1287 struct filedesc *fdp; 1288 { 1289 FILEDESC_LOCK(fdp); 1290 fdp->fd_refcnt++; 1291 FILEDESC_UNLOCK(fdp); 1292 return (fdp); 1293 } 1294 1295 /* 1296 * Copy a filedesc structure. 1297 * A NULL pointer in returns a NULL reference, this is to ease callers, 1298 * not catch errors. 1299 */ 1300 struct filedesc * 1301 fdcopy(fdp) 1302 struct filedesc *fdp; 1303 { 1304 struct filedesc *newfdp; 1305 struct file **fpp; 1306 int i, j; 1307 1308 /* Certain daemons might not have file descriptors. */ 1309 if (fdp == NULL) 1310 return (NULL); 1311 1312 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1313 1314 FILEDESC_UNLOCK(fdp); 1315 MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0), 1316 M_FILEDESC, M_WAITOK); 1317 FILEDESC_LOCK(fdp); 1318 bcopy(fdp, newfdp, sizeof(struct filedesc)); 1319 FILEDESC_UNLOCK(fdp); 1320 bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx)); 1321 mtx_init(&newfdp->fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF); 1322 if (newfdp->fd_cdir) 1323 VREF(newfdp->fd_cdir); 1324 if (newfdp->fd_rdir) 1325 VREF(newfdp->fd_rdir); 1326 if (newfdp->fd_jdir) 1327 VREF(newfdp->fd_jdir); 1328 newfdp->fd_refcnt = 1; 1329 1330 /* 1331 * If the number of open files fits in the internal arrays 1332 * of the open file structure, use them, otherwise allocate 1333 * additional memory for the number of descriptors currently 1334 * in use. 1335 */ 1336 FILEDESC_LOCK(fdp); 1337 newfdp->fd_lastfile = fdp->fd_lastfile; 1338 newfdp->fd_nfiles = fdp->fd_nfiles; 1339 if (newfdp->fd_lastfile < NDFILE) { 1340 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; 1341 newfdp->fd_ofileflags = 1342 ((struct filedesc0 *) newfdp)->fd_dfileflags; 1343 i = NDFILE; 1344 } else { 1345 /* 1346 * Compute the smallest multiple of NDEXTENT needed 1347 * for the file descriptors currently in use, 1348 * allowing the table to shrink. 1349 */ 1350 retry: 1351 i = newfdp->fd_nfiles; 1352 while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2) 1353 i /= 2; 1354 FILEDESC_UNLOCK(fdp); 1355 MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE, 1356 M_FILEDESC, M_WAITOK); 1357 FILEDESC_LOCK(fdp); 1358 newfdp->fd_lastfile = fdp->fd_lastfile; 1359 newfdp->fd_nfiles = fdp->fd_nfiles; 1360 j = newfdp->fd_nfiles; 1361 while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2) 1362 j /= 2; 1363 if (i != j) { 1364 /* 1365 * The size of the original table has changed. 1366 * Go over once again. 1367 */ 1368 FILEDESC_UNLOCK(fdp); 1369 FREE(newfdp->fd_ofiles, M_FILEDESC); 1370 FILEDESC_LOCK(fdp); 1371 newfdp->fd_lastfile = fdp->fd_lastfile; 1372 newfdp->fd_nfiles = fdp->fd_nfiles; 1373 goto retry; 1374 } 1375 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; 1376 } 1377 newfdp->fd_nfiles = i; 1378 bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **)); 1379 bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char)); 1380 1381 /* 1382 * kq descriptors cannot be copied. 1383 */ 1384 if (newfdp->fd_knlistsize != -1) { 1385 fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile]; 1386 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) { 1387 if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) { 1388 *fpp = NULL; 1389 if (i < newfdp->fd_freefile) 1390 newfdp->fd_freefile = i; 1391 } 1392 if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0) 1393 newfdp->fd_lastfile--; 1394 } 1395 newfdp->fd_knlist = NULL; 1396 newfdp->fd_knlistsize = -1; 1397 newfdp->fd_knhash = NULL; 1398 newfdp->fd_knhashmask = 0; 1399 } 1400 1401 fpp = newfdp->fd_ofiles; 1402 for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) { 1403 if (*fpp != NULL) 1404 fhold(*fpp); 1405 } 1406 return (newfdp); 1407 } 1408 1409 /* A mutex to protect the association between a proc and filedesc. */ 1410 struct mtx fdesc_mtx; 1411 MTX_SYSINIT(fdesc, &fdesc_mtx, "fdesc", MTX_DEF); 1412 1413 /* 1414 * Release a filedesc structure. 1415 */ 1416 void 1417 fdfree(td) 1418 struct thread *td; 1419 { 1420 struct filedesc *fdp; 1421 struct file **fpp; 1422 int i; 1423 struct filedesc_to_leader *fdtol; 1424 struct file *fp; 1425 struct vnode *vp; 1426 struct flock lf; 1427 1428 /* Certain daemons might not have file descriptors. */ 1429 fdp = td->td_proc->p_fd; 1430 if (fdp == NULL) 1431 return; 1432 1433 /* Check for special need to clear POSIX style locks */ 1434 fdtol = td->td_proc->p_fdtol; 1435 if (fdtol != NULL) { 1436 FILEDESC_LOCK(fdp); 1437 KASSERT(fdtol->fdl_refcount > 0, 1438 ("filedesc_to_refcount botch: fdl_refcount=%d", 1439 fdtol->fdl_refcount)); 1440 if (fdtol->fdl_refcount == 1 && 1441 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1442 i = 0; 1443 fpp = fdp->fd_ofiles; 1444 for (i = 0, fpp = fdp->fd_ofiles; 1445 i < fdp->fd_lastfile; 1446 i++, fpp++) { 1447 if (*fpp == NULL || 1448 (*fpp)->f_type != DTYPE_VNODE) 1449 continue; 1450 fp = *fpp; 1451 fhold(fp); 1452 FILEDESC_UNLOCK(fdp); 1453 lf.l_whence = SEEK_SET; 1454 lf.l_start = 0; 1455 lf.l_len = 0; 1456 lf.l_type = F_UNLCK; 1457 vp = fp->f_vnode; 1458 (void) VOP_ADVLOCK(vp, 1459 (caddr_t)td->td_proc-> 1460 p_leader, 1461 F_UNLCK, 1462 &lf, 1463 F_POSIX); 1464 FILEDESC_LOCK(fdp); 1465 fdrop(fp, td); 1466 fpp = fdp->fd_ofiles + i; 1467 } 1468 } 1469 retry: 1470 if (fdtol->fdl_refcount == 1) { 1471 if (fdp->fd_holdleaderscount > 0 && 1472 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1473 /* 1474 * close() or do_dup() has cleared a reference 1475 * in a shared file descriptor table. 1476 */ 1477 fdp->fd_holdleaderswakeup = 1; 1478 msleep(&fdp->fd_holdleaderscount, &fdp->fd_mtx, 1479 PLOCK, "fdlhold", 0); 1480 goto retry; 1481 } 1482 if (fdtol->fdl_holdcount > 0) { 1483 /* 1484 * Ensure that fdtol->fdl_leader 1485 * remains valid in closef(). 1486 */ 1487 fdtol->fdl_wakeup = 1; 1488 msleep(fdtol, &fdp->fd_mtx, 1489 PLOCK, "fdlhold", 0); 1490 goto retry; 1491 } 1492 } 1493 fdtol->fdl_refcount--; 1494 if (fdtol->fdl_refcount == 0 && 1495 fdtol->fdl_holdcount == 0) { 1496 fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; 1497 fdtol->fdl_prev->fdl_next = fdtol->fdl_next; 1498 } else 1499 fdtol = NULL; 1500 td->td_proc->p_fdtol = NULL; 1501 FILEDESC_UNLOCK(fdp); 1502 if (fdtol != NULL) 1503 FREE(fdtol, M_FILEDESC_TO_LEADER); 1504 } 1505 FILEDESC_LOCK(fdp); 1506 if (--fdp->fd_refcnt > 0) { 1507 FILEDESC_UNLOCK(fdp); 1508 return; 1509 } 1510 1511 /* 1512 * We are the last reference to the structure, so we can 1513 * safely assume it will not change out from under us. 1514 */ 1515 FILEDESC_UNLOCK(fdp); 1516 fpp = fdp->fd_ofiles; 1517 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) { 1518 if (*fpp) 1519 (void) closef(*fpp, td); 1520 } 1521 1522 /* XXX This should happen earlier. */ 1523 mtx_lock(&fdesc_mtx); 1524 td->td_proc->p_fd = NULL; 1525 mtx_unlock(&fdesc_mtx); 1526 1527 if (fdp->fd_nfiles > NDFILE) 1528 FREE(fdp->fd_ofiles, M_FILEDESC); 1529 if (fdp->fd_cdir) 1530 vrele(fdp->fd_cdir); 1531 if (fdp->fd_rdir) 1532 vrele(fdp->fd_rdir); 1533 if (fdp->fd_jdir) 1534 vrele(fdp->fd_jdir); 1535 if (fdp->fd_knlist) 1536 FREE(fdp->fd_knlist, M_KQUEUE); 1537 if (fdp->fd_knhash) 1538 FREE(fdp->fd_knhash, M_KQUEUE); 1539 mtx_destroy(&fdp->fd_mtx); 1540 FREE(fdp, M_FILEDESC); 1541 } 1542 1543 /* 1544 * For setugid programs, we don't want to people to use that setugidness 1545 * to generate error messages which write to a file which otherwise would 1546 * otherwise be off-limits to the process. We check for filesystems where 1547 * the vnode can change out from under us after execve (like [lin]procfs). 1548 * 1549 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is 1550 * sufficient. We also don't for check setugidness since we know we are. 1551 */ 1552 static int 1553 is_unsafe(struct file *fp) 1554 { 1555 if (fp->f_type == DTYPE_VNODE) { 1556 struct vnode *vp = fp->f_vnode; 1557 1558 if ((vp->v_vflag & VV_PROCDEP) != 0) 1559 return (1); 1560 } 1561 return (0); 1562 } 1563 1564 /* 1565 * Make this setguid thing safe, if at all possible. 1566 */ 1567 void 1568 setugidsafety(td) 1569 struct thread *td; 1570 { 1571 struct filedesc *fdp; 1572 int i; 1573 1574 /* Certain daemons might not have file descriptors. */ 1575 fdp = td->td_proc->p_fd; 1576 if (fdp == NULL) 1577 return; 1578 1579 /* 1580 * Note: fdp->fd_ofiles may be reallocated out from under us while 1581 * we are blocked in a close. Be careful! 1582 */ 1583 FILEDESC_LOCK(fdp); 1584 for (i = 0; i <= fdp->fd_lastfile; i++) { 1585 if (i > 2) 1586 break; 1587 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { 1588 struct file *fp; 1589 1590 #if 0 1591 if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0) 1592 (void) munmapfd(td, i); 1593 #endif 1594 if (i < fdp->fd_knlistsize) { 1595 FILEDESC_UNLOCK(fdp); 1596 knote_fdclose(td, i); 1597 FILEDESC_LOCK(fdp); 1598 } 1599 /* 1600 * NULL-out descriptor prior to close to avoid 1601 * a race while close blocks. 1602 */ 1603 fp = fdp->fd_ofiles[i]; 1604 fdp->fd_ofiles[i] = NULL; 1605 fdp->fd_ofileflags[i] = 0; 1606 if (i < fdp->fd_freefile) 1607 fdp->fd_freefile = i; 1608 FILEDESC_UNLOCK(fdp); 1609 (void) closef(fp, td); 1610 FILEDESC_LOCK(fdp); 1611 } 1612 } 1613 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) 1614 fdp->fd_lastfile--; 1615 FILEDESC_UNLOCK(fdp); 1616 } 1617 1618 /* 1619 * Close any files on exec? 1620 */ 1621 void 1622 fdcloseexec(td) 1623 struct thread *td; 1624 { 1625 struct filedesc *fdp; 1626 int i; 1627 1628 /* Certain daemons might not have file descriptors. */ 1629 fdp = td->td_proc->p_fd; 1630 if (fdp == NULL) 1631 return; 1632 1633 FILEDESC_LOCK(fdp); 1634 1635 /* 1636 * We cannot cache fd_ofiles or fd_ofileflags since operations 1637 * may block and rip them out from under us. 1638 */ 1639 for (i = 0; i <= fdp->fd_lastfile; i++) { 1640 if (fdp->fd_ofiles[i] != NULL && 1641 (fdp->fd_ofileflags[i] & UF_EXCLOSE)) { 1642 struct file *fp; 1643 1644 #if 0 1645 if (fdp->fd_ofileflags[i] & UF_MAPPED) 1646 (void) munmapfd(td, i); 1647 #endif 1648 if (i < fdp->fd_knlistsize) { 1649 FILEDESC_UNLOCK(fdp); 1650 knote_fdclose(td, i); 1651 FILEDESC_LOCK(fdp); 1652 } 1653 /* 1654 * NULL-out descriptor prior to close to avoid 1655 * a race while close blocks. 1656 */ 1657 fp = fdp->fd_ofiles[i]; 1658 fdp->fd_ofiles[i] = NULL; 1659 fdp->fd_ofileflags[i] = 0; 1660 if (i < fdp->fd_freefile) 1661 fdp->fd_freefile = i; 1662 FILEDESC_UNLOCK(fdp); 1663 (void) closef(fp, td); 1664 FILEDESC_LOCK(fdp); 1665 } 1666 } 1667 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) 1668 fdp->fd_lastfile--; 1669 FILEDESC_UNLOCK(fdp); 1670 } 1671 1672 /* 1673 * It is unsafe for set[ug]id processes to be started with file 1674 * descriptors 0..2 closed, as these descriptors are given implicit 1675 * significance in the Standard C library. fdcheckstd() will create a 1676 * descriptor referencing /dev/null for each of stdin, stdout, and 1677 * stderr that is not already open. 1678 */ 1679 int 1680 fdcheckstd(td) 1681 struct thread *td; 1682 { 1683 struct nameidata nd; 1684 struct filedesc *fdp; 1685 struct file *fp; 1686 register_t retval; 1687 int fd, i, error, flags, devnull, extraref; 1688 1689 fdp = td->td_proc->p_fd; 1690 if (fdp == NULL) 1691 return (0); 1692 devnull = -1; 1693 error = 0; 1694 for (i = 0; i < 3; i++) { 1695 if (fdp->fd_ofiles[i] != NULL) 1696 continue; 1697 if (devnull < 0) { 1698 error = falloc(td, &fp, &fd); 1699 if (error != 0) 1700 break; 1701 /* Note extra ref on `fp' held for us by falloc(). */ 1702 KASSERT(fd == i, ("oof, we didn't get our fd")); 1703 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null", 1704 td); 1705 flags = FREAD | FWRITE; 1706 error = vn_open(&nd, &flags, 0, -1); 1707 if (error != 0) { 1708 /* 1709 * Someone may have closed the entry in the 1710 * file descriptor table, so check it hasn't 1711 * changed before dropping the reference count. 1712 */ 1713 extraref = 0; 1714 FILEDESC_LOCK(fdp); 1715 if (fdp->fd_ofiles[fd] == fp) { 1716 fdp->fd_ofiles[fd] = NULL; 1717 extraref = 1; 1718 } 1719 FILEDESC_UNLOCK(fdp); 1720 fdrop(fp, td); 1721 if (extraref) 1722 fdrop(fp, td); 1723 break; 1724 } 1725 NDFREE(&nd, NDF_ONLY_PNBUF); 1726 fp->f_vnode = nd.ni_vp; 1727 fp->f_data = nd.ni_vp; 1728 fp->f_flag = flags; 1729 fp->f_ops = &vnops; 1730 fp->f_type = DTYPE_VNODE; 1731 VOP_UNLOCK(nd.ni_vp, 0, td); 1732 devnull = fd; 1733 } else { 1734 error = do_dup(td, DUP_FIXED, devnull, i, &retval); 1735 if (error != 0) 1736 break; 1737 } 1738 } 1739 return (error); 1740 } 1741 1742 /* 1743 * Internal form of close. 1744 * Decrement reference count on file structure. 1745 * Note: td may be NULL when closing a file 1746 * that was being passed in a message. 1747 */ 1748 int 1749 closef(fp, td) 1750 struct file *fp; 1751 struct thread *td; 1752 { 1753 struct vnode *vp; 1754 struct flock lf; 1755 struct filedesc_to_leader *fdtol; 1756 struct filedesc *fdp; 1757 1758 if (fp == NULL) 1759 return (0); 1760 /* 1761 * POSIX record locking dictates that any close releases ALL 1762 * locks owned by this process. This is handled by setting 1763 * a flag in the unlock to free ONLY locks obeying POSIX 1764 * semantics, and not to free BSD-style file locks. 1765 * If the descriptor was in a message, POSIX-style locks 1766 * aren't passed with the descriptor. 1767 */ 1768 if (td != NULL && 1769 fp->f_type == DTYPE_VNODE) { 1770 if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1771 lf.l_whence = SEEK_SET; 1772 lf.l_start = 0; 1773 lf.l_len = 0; 1774 lf.l_type = F_UNLCK; 1775 vp = fp->f_vnode; 1776 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, 1777 F_UNLCK, &lf, F_POSIX); 1778 } 1779 fdtol = td->td_proc->p_fdtol; 1780 if (fdtol != NULL) { 1781 /* 1782 * Handle special case where file descriptor table 1783 * is shared between multiple process leaders. 1784 */ 1785 fdp = td->td_proc->p_fd; 1786 FILEDESC_LOCK(fdp); 1787 for (fdtol = fdtol->fdl_next; 1788 fdtol != td->td_proc->p_fdtol; 1789 fdtol = fdtol->fdl_next) { 1790 if ((fdtol->fdl_leader->p_flag & 1791 P_ADVLOCK) == 0) 1792 continue; 1793 fdtol->fdl_holdcount++; 1794 FILEDESC_UNLOCK(fdp); 1795 lf.l_whence = SEEK_SET; 1796 lf.l_start = 0; 1797 lf.l_len = 0; 1798 lf.l_type = F_UNLCK; 1799 vp = fp->f_vnode; 1800 (void) VOP_ADVLOCK(vp, 1801 (caddr_t)fdtol->fdl_leader, 1802 F_UNLCK, &lf, F_POSIX); 1803 FILEDESC_LOCK(fdp); 1804 fdtol->fdl_holdcount--; 1805 if (fdtol->fdl_holdcount == 0 && 1806 fdtol->fdl_wakeup != 0) { 1807 fdtol->fdl_wakeup = 0; 1808 wakeup(fdtol); 1809 } 1810 } 1811 FILEDESC_UNLOCK(fdp); 1812 } 1813 } 1814 return (fdrop(fp, td)); 1815 } 1816 1817 /* 1818 * Drop reference on struct file passed in, may call closef if the 1819 * reference hits zero. 1820 */ 1821 int 1822 fdrop(fp, td) 1823 struct file *fp; 1824 struct thread *td; 1825 { 1826 1827 FILE_LOCK(fp); 1828 return (fdrop_locked(fp, td)); 1829 } 1830 1831 /* 1832 * Extract the file pointer associated with the specified descriptor for 1833 * the current user process. 1834 * 1835 * If the descriptor doesn't exist, EBADF is returned. 1836 * 1837 * If the descriptor exists but doesn't match 'flags' then 1838 * return EBADF for read attempts and EINVAL for write attempts. 1839 * 1840 * If 'hold' is set (non-zero) the file's refcount will be bumped on return. 1841 * It should be droped with fdrop(). 1842 * If it is not set, then the refcount will not be bumped however the 1843 * thread's filedesc struct will be returned locked (for fgetsock). 1844 * 1845 * If an error occured the non-zero error is returned and *fpp is set to NULL. 1846 * Otherwise *fpp is set and zero is returned. 1847 */ 1848 static __inline int 1849 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold) 1850 { 1851 struct filedesc *fdp; 1852 struct file *fp; 1853 1854 *fpp = NULL; 1855 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) 1856 return (EBADF); 1857 FILEDESC_LOCK(fdp); 1858 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) { 1859 FILEDESC_UNLOCK(fdp); 1860 return (EBADF); 1861 } 1862 1863 /* 1864 * Note: FREAD failures returns EBADF to maintain backwards 1865 * compatibility with what routines returned before. 1866 * 1867 * Only one flag, or 0, may be specified. 1868 */ 1869 if (flags == FREAD && (fp->f_flag & FREAD) == 0) { 1870 FILEDESC_UNLOCK(fdp); 1871 return (EBADF); 1872 } 1873 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) { 1874 FILEDESC_UNLOCK(fdp); 1875 return (EINVAL); 1876 } 1877 if (hold) { 1878 fhold(fp); 1879 FILEDESC_UNLOCK(fdp); 1880 } 1881 *fpp = fp; 1882 return (0); 1883 } 1884 1885 int 1886 fget(struct thread *td, int fd, struct file **fpp) 1887 { 1888 1889 return(_fget(td, fd, fpp, 0, 1)); 1890 } 1891 1892 int 1893 fget_read(struct thread *td, int fd, struct file **fpp) 1894 { 1895 1896 return(_fget(td, fd, fpp, FREAD, 1)); 1897 } 1898 1899 int 1900 fget_write(struct thread *td, int fd, struct file **fpp) 1901 { 1902 1903 return(_fget(td, fd, fpp, FWRITE, 1)); 1904 } 1905 1906 /* 1907 * Like fget() but loads the underlying vnode, or returns an error if 1908 * the descriptor does not represent a vnode. Note that pipes use vnodes 1909 * but never have VM objects (so VOP_GETVOBJECT() calls will return an 1910 * error). The returned vnode will be vref()d. 1911 */ 1912 static __inline int 1913 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags) 1914 { 1915 struct file *fp; 1916 int error; 1917 1918 *vpp = NULL; 1919 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 1920 return (error); 1921 if (fp->f_vnode == NULL) { 1922 error = EINVAL; 1923 } else { 1924 *vpp = fp->f_vnode; 1925 vref(*vpp); 1926 } 1927 FILEDESC_UNLOCK(td->td_proc->p_fd); 1928 return (error); 1929 } 1930 1931 int 1932 fgetvp(struct thread *td, int fd, struct vnode **vpp) 1933 { 1934 1935 return (_fgetvp(td, fd, vpp, 0)); 1936 } 1937 1938 int 1939 fgetvp_read(struct thread *td, int fd, struct vnode **vpp) 1940 { 1941 1942 return (_fgetvp(td, fd, vpp, FREAD)); 1943 } 1944 1945 int 1946 fgetvp_write(struct thread *td, int fd, struct vnode **vpp) 1947 { 1948 1949 return (_fgetvp(td, fd, vpp, FWRITE)); 1950 } 1951 1952 /* 1953 * Like fget() but loads the underlying socket, or returns an error if 1954 * the descriptor does not represent a socket. 1955 * 1956 * We bump the ref count on the returned socket. XXX Also obtain the SX 1957 * lock in the future. 1958 */ 1959 int 1960 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp) 1961 { 1962 struct file *fp; 1963 int error; 1964 1965 *spp = NULL; 1966 if (fflagp != NULL) 1967 *fflagp = 0; 1968 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 1969 return (error); 1970 if (fp->f_type != DTYPE_SOCKET) { 1971 error = ENOTSOCK; 1972 } else { 1973 *spp = fp->f_data; 1974 if (fflagp) 1975 *fflagp = fp->f_flag; 1976 soref(*spp); 1977 } 1978 FILEDESC_UNLOCK(td->td_proc->p_fd); 1979 return (error); 1980 } 1981 1982 /* 1983 * Drop the reference count on the the socket and XXX release the SX lock in 1984 * the future. The last reference closes the socket. 1985 */ 1986 void 1987 fputsock(struct socket *so) 1988 { 1989 1990 sorele(so); 1991 } 1992 1993 /* 1994 * Drop reference on struct file passed in, may call closef if the 1995 * reference hits zero. 1996 * Expects struct file locked, and will unlock it. 1997 */ 1998 int 1999 fdrop_locked(fp, td) 2000 struct file *fp; 2001 struct thread *td; 2002 { 2003 struct flock lf; 2004 struct vnode *vp; 2005 int error; 2006 2007 FILE_LOCK_ASSERT(fp, MA_OWNED); 2008 2009 if (--fp->f_count > 0) { 2010 FILE_UNLOCK(fp); 2011 return (0); 2012 } 2013 /* We have the last ref so we can proceed without the file lock. */ 2014 FILE_UNLOCK(fp); 2015 mtx_lock(&Giant); 2016 if (fp->f_count < 0) 2017 panic("fdrop: count < 0"); 2018 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { 2019 lf.l_whence = SEEK_SET; 2020 lf.l_start = 0; 2021 lf.l_len = 0; 2022 lf.l_type = F_UNLCK; 2023 vp = fp->f_vnode; 2024 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 2025 } 2026 if (fp->f_ops != &badfileops) 2027 error = fo_close(fp, td); 2028 else 2029 error = 0; 2030 ffree(fp); 2031 mtx_unlock(&Giant); 2032 return (error); 2033 } 2034 2035 /* 2036 * Apply an advisory lock on a file descriptor. 2037 * 2038 * Just attempt to get a record lock of the requested type on 2039 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 2040 */ 2041 #ifndef _SYS_SYSPROTO_H_ 2042 struct flock_args { 2043 int fd; 2044 int how; 2045 }; 2046 #endif 2047 /* 2048 * MPSAFE 2049 */ 2050 /* ARGSUSED */ 2051 int 2052 flock(td, uap) 2053 struct thread *td; 2054 struct flock_args *uap; 2055 { 2056 struct file *fp; 2057 struct vnode *vp; 2058 struct flock lf; 2059 int error; 2060 2061 if ((error = fget(td, uap->fd, &fp)) != 0) 2062 return (error); 2063 if (fp->f_type != DTYPE_VNODE) { 2064 fdrop(fp, td); 2065 return (EOPNOTSUPP); 2066 } 2067 2068 mtx_lock(&Giant); 2069 vp = fp->f_vnode; 2070 lf.l_whence = SEEK_SET; 2071 lf.l_start = 0; 2072 lf.l_len = 0; 2073 if (uap->how & LOCK_UN) { 2074 lf.l_type = F_UNLCK; 2075 FILE_LOCK(fp); 2076 fp->f_flag &= ~FHASLOCK; 2077 FILE_UNLOCK(fp); 2078 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 2079 goto done2; 2080 } 2081 if (uap->how & LOCK_EX) 2082 lf.l_type = F_WRLCK; 2083 else if (uap->how & LOCK_SH) 2084 lf.l_type = F_RDLCK; 2085 else { 2086 error = EBADF; 2087 goto done2; 2088 } 2089 FILE_LOCK(fp); 2090 fp->f_flag |= FHASLOCK; 2091 FILE_UNLOCK(fp); 2092 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 2093 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); 2094 done2: 2095 fdrop(fp, td); 2096 mtx_unlock(&Giant); 2097 return (error); 2098 } 2099 2100 /* 2101 * File Descriptor pseudo-device driver (/dev/fd/). 2102 * 2103 * Opening minor device N dup()s the file (if any) connected to file 2104 * descriptor N belonging to the calling process. Note that this driver 2105 * consists of only the ``open()'' routine, because all subsequent 2106 * references to this file will be direct to the other driver. 2107 */ 2108 /* ARGSUSED */ 2109 static int 2110 fdopen(dev, mode, type, td) 2111 dev_t dev; 2112 int mode, type; 2113 struct thread *td; 2114 { 2115 2116 /* 2117 * XXX Kludge: set curthread->td_dupfd to contain the value of the 2118 * the file descriptor being sought for duplication. The error 2119 * return ensures that the vnode for this device will be released 2120 * by vn_open. Open will detect this special error and take the 2121 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 2122 * will simply report the error. 2123 */ 2124 td->td_dupfd = dev2unit(dev); 2125 return (ENODEV); 2126 } 2127 2128 /* 2129 * Duplicate the specified descriptor to a free descriptor. 2130 */ 2131 int 2132 dupfdopen(td, fdp, indx, dfd, mode, error) 2133 struct thread *td; 2134 struct filedesc *fdp; 2135 int indx, dfd; 2136 int mode; 2137 int error; 2138 { 2139 struct file *wfp; 2140 struct file *fp; 2141 2142 /* 2143 * If the to-be-dup'd fd number is greater than the allowed number 2144 * of file descriptors, or the fd to be dup'd has already been 2145 * closed, then reject. 2146 */ 2147 FILEDESC_LOCK(fdp); 2148 if (dfd < 0 || dfd >= fdp->fd_nfiles || 2149 (wfp = fdp->fd_ofiles[dfd]) == NULL) { 2150 FILEDESC_UNLOCK(fdp); 2151 return (EBADF); 2152 } 2153 2154 /* 2155 * There are two cases of interest here. 2156 * 2157 * For ENODEV simply dup (dfd) to file descriptor 2158 * (indx) and return. 2159 * 2160 * For ENXIO steal away the file structure from (dfd) and 2161 * store it in (indx). (dfd) is effectively closed by 2162 * this operation. 2163 * 2164 * Any other error code is just returned. 2165 */ 2166 switch (error) { 2167 case ENODEV: 2168 /* 2169 * Check that the mode the file is being opened for is a 2170 * subset of the mode of the existing descriptor. 2171 */ 2172 FILE_LOCK(wfp); 2173 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { 2174 FILE_UNLOCK(wfp); 2175 FILEDESC_UNLOCK(fdp); 2176 return (EACCES); 2177 } 2178 fp = fdp->fd_ofiles[indx]; 2179 #if 0 2180 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED) 2181 (void) munmapfd(td, indx); 2182 #endif 2183 fdp->fd_ofiles[indx] = wfp; 2184 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2185 fhold_locked(wfp); 2186 FILE_UNLOCK(wfp); 2187 if (indx > fdp->fd_lastfile) 2188 fdp->fd_lastfile = indx; 2189 if (fp != NULL) 2190 FILE_LOCK(fp); 2191 FILEDESC_UNLOCK(fdp); 2192 /* 2193 * We now own the reference to fp that the ofiles[] array 2194 * used to own. Release it. 2195 */ 2196 if (fp != NULL) 2197 fdrop_locked(fp, td); 2198 return (0); 2199 2200 case ENXIO: 2201 /* 2202 * Steal away the file pointer from dfd and stuff it into indx. 2203 */ 2204 fp = fdp->fd_ofiles[indx]; 2205 #if 0 2206 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED) 2207 (void) munmapfd(td, indx); 2208 #endif 2209 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 2210 fdp->fd_ofiles[dfd] = NULL; 2211 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2212 fdp->fd_ofileflags[dfd] = 0; 2213 2214 /* 2215 * Complete the clean up of the filedesc structure by 2216 * recomputing the various hints. 2217 */ 2218 if (indx > fdp->fd_lastfile) { 2219 fdp->fd_lastfile = indx; 2220 } else { 2221 while (fdp->fd_lastfile > 0 && 2222 fdp->fd_ofiles[fdp->fd_lastfile] == NULL) { 2223 fdp->fd_lastfile--; 2224 } 2225 if (dfd < fdp->fd_freefile) 2226 fdp->fd_freefile = dfd; 2227 } 2228 if (fp != NULL) 2229 FILE_LOCK(fp); 2230 FILEDESC_UNLOCK(fdp); 2231 2232 /* 2233 * we now own the reference to fp that the ofiles[] array 2234 * used to own. Release it. 2235 */ 2236 if (fp != NULL) 2237 fdrop_locked(fp, td); 2238 return (0); 2239 2240 default: 2241 FILEDESC_UNLOCK(fdp); 2242 return (error); 2243 } 2244 /* NOTREACHED */ 2245 } 2246 2247 2248 struct filedesc_to_leader * 2249 filedesc_to_leader_alloc(struct filedesc_to_leader *old, 2250 struct filedesc *fdp, 2251 struct proc *leader) 2252 { 2253 struct filedesc_to_leader *fdtol; 2254 2255 MALLOC(fdtol, struct filedesc_to_leader *, 2256 sizeof(struct filedesc_to_leader), 2257 M_FILEDESC_TO_LEADER, 2258 M_WAITOK); 2259 fdtol->fdl_refcount = 1; 2260 fdtol->fdl_holdcount = 0; 2261 fdtol->fdl_wakeup = 0; 2262 fdtol->fdl_leader = leader; 2263 if (old != NULL) { 2264 FILEDESC_LOCK(fdp); 2265 fdtol->fdl_next = old->fdl_next; 2266 fdtol->fdl_prev = old; 2267 old->fdl_next = fdtol; 2268 fdtol->fdl_next->fdl_prev = fdtol; 2269 FILEDESC_UNLOCK(fdp); 2270 } else { 2271 fdtol->fdl_next = fdtol; 2272 fdtol->fdl_prev = fdtol; 2273 } 2274 return fdtol; 2275 } 2276 2277 /* 2278 * Get file structures. 2279 */ 2280 static int 2281 sysctl_kern_file(SYSCTL_HANDLER_ARGS) 2282 { 2283 struct xfile xf; 2284 struct filedesc *fdp; 2285 struct file *fp; 2286 struct proc *p; 2287 int error, n; 2288 2289 /* 2290 * Note: because the number of file descriptors is calculated 2291 * in different ways for sizing vs returning the data, 2292 * there is information leakage from the first loop. However, 2293 * it is of a similar order of magnitude to the leakage from 2294 * global system statistics such as kern.openfiles. 2295 */ 2296 sysctl_wire_old_buffer(req, 0); 2297 if (req->oldptr == NULL) { 2298 n = 16; /* A slight overestimate. */ 2299 sx_slock(&filelist_lock); 2300 LIST_FOREACH(fp, &filehead, f_list) { 2301 /* 2302 * We should grab the lock, but this is an 2303 * estimate, so does it really matter? 2304 */ 2305 /* mtx_lock(fp->f_mtxp); */ 2306 n += fp->f_count; 2307 /* mtx_unlock(f->f_mtxp); */ 2308 } 2309 sx_sunlock(&filelist_lock); 2310 return (SYSCTL_OUT(req, 0, n * sizeof(xf))); 2311 } 2312 error = 0; 2313 bzero(&xf, sizeof(xf)); 2314 xf.xf_size = sizeof(xf); 2315 sx_slock(&allproc_lock); 2316 LIST_FOREACH(p, &allproc, p_list) { 2317 PROC_LOCK(p); 2318 if (p_cansee(req->td, p) != 0) { 2319 PROC_UNLOCK(p); 2320 continue; 2321 } 2322 xf.xf_pid = p->p_pid; 2323 xf.xf_uid = p->p_ucred->cr_uid; 2324 PROC_UNLOCK(p); 2325 mtx_lock(&fdesc_mtx); 2326 if ((fdp = p->p_fd) == NULL) { 2327 mtx_unlock(&fdesc_mtx); 2328 continue; 2329 } 2330 FILEDESC_LOCK(fdp); 2331 for (n = 0; n < fdp->fd_nfiles; ++n) { 2332 if ((fp = fdp->fd_ofiles[n]) == NULL) 2333 continue; 2334 xf.xf_fd = n; 2335 xf.xf_file = fp; 2336 xf.xf_data = fp->f_data; 2337 xf.xf_type = fp->f_type; 2338 xf.xf_count = fp->f_count; 2339 xf.xf_msgcount = fp->f_msgcount; 2340 xf.xf_offset = fp->f_offset; 2341 xf.xf_flag = fp->f_flag; 2342 error = SYSCTL_OUT(req, &xf, sizeof(xf)); 2343 if (error) 2344 break; 2345 } 2346 FILEDESC_UNLOCK(fdp); 2347 mtx_unlock(&fdesc_mtx); 2348 if (error) 2349 break; 2350 } 2351 sx_sunlock(&allproc_lock); 2352 return (error); 2353 } 2354 2355 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, 2356 0, 0, sysctl_kern_file, "S,xfile", "Entire file table"); 2357 2358 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, 2359 &maxfilesperproc, 0, "Maximum files allowed open per process"); 2360 2361 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, 2362 &maxfiles, 0, "Maximum number of files"); 2363 2364 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, 2365 &nfiles, 0, "System-wide number of open files"); 2366 2367 static void 2368 fildesc_drvinit(void *unused) 2369 { 2370 dev_t dev; 2371 2372 dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0"); 2373 make_dev_alias(dev, "stdin"); 2374 dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1"); 2375 make_dev_alias(dev, "stdout"); 2376 dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2"); 2377 make_dev_alias(dev, "stderr"); 2378 } 2379 2380 static fo_rdwr_t badfo_readwrite; 2381 static fo_ioctl_t badfo_ioctl; 2382 static fo_poll_t badfo_poll; 2383 static fo_kqfilter_t badfo_kqfilter; 2384 static fo_stat_t badfo_stat; 2385 static fo_close_t badfo_close; 2386 2387 struct fileops badfileops = { 2388 .fo_read = badfo_readwrite, 2389 .fo_write = badfo_readwrite, 2390 .fo_ioctl = badfo_ioctl, 2391 .fo_poll = badfo_poll, 2392 .fo_kqfilter = badfo_kqfilter, 2393 .fo_stat = badfo_stat, 2394 .fo_close = badfo_close, 2395 }; 2396 2397 static int 2398 badfo_readwrite(fp, uio, active_cred, flags, td) 2399 struct file *fp; 2400 struct uio *uio; 2401 struct ucred *active_cred; 2402 struct thread *td; 2403 int flags; 2404 { 2405 2406 return (EBADF); 2407 } 2408 2409 static int 2410 badfo_ioctl(fp, com, data, active_cred, td) 2411 struct file *fp; 2412 u_long com; 2413 void *data; 2414 struct ucred *active_cred; 2415 struct thread *td; 2416 { 2417 2418 return (EBADF); 2419 } 2420 2421 static int 2422 badfo_poll(fp, events, active_cred, td) 2423 struct file *fp; 2424 int events; 2425 struct ucred *active_cred; 2426 struct thread *td; 2427 { 2428 2429 return (0); 2430 } 2431 2432 static int 2433 badfo_kqfilter(fp, kn) 2434 struct file *fp; 2435 struct knote *kn; 2436 { 2437 2438 return (0); 2439 } 2440 2441 static int 2442 badfo_stat(fp, sb, active_cred, td) 2443 struct file *fp; 2444 struct stat *sb; 2445 struct ucred *active_cred; 2446 struct thread *td; 2447 { 2448 2449 return (EBADF); 2450 } 2451 2452 static int 2453 badfo_close(fp, td) 2454 struct file *fp; 2455 struct thread *td; 2456 { 2457 2458 return (EBADF); 2459 } 2460 2461 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR, 2462 fildesc_drvinit,NULL) 2463 2464 static void filelistinit(void *); 2465 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL) 2466 2467 /* ARGSUSED*/ 2468 static void 2469 filelistinit(dummy) 2470 void *dummy; 2471 { 2472 2473 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, 2474 NULL, NULL, UMA_ALIGN_PTR, 0); 2475 sx_init(&filelist_lock, "filelist lock"); 2476 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); 2477 } 2478