1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_compat.h" 41 #include "opt_ddb.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 46 #include <sys/conf.h> 47 #include <sys/fcntl.h> 48 #include <sys/file.h> 49 #include <sys/filedesc.h> 50 #include <sys/filio.h> 51 #include <sys/jail.h> 52 #include <sys/kernel.h> 53 #include <sys/limits.h> 54 #include <sys/lock.h> 55 #include <sys/malloc.h> 56 #include <sys/mount.h> 57 #include <sys/mqueue.h> 58 #include <sys/mutex.h> 59 #include <sys/namei.h> 60 #include <sys/priv.h> 61 #include <sys/proc.h> 62 #include <sys/resourcevar.h> 63 #include <sys/signalvar.h> 64 #include <sys/socketvar.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/syscallsubr.h> 68 #include <sys/sysctl.h> 69 #include <sys/sysproto.h> 70 #include <sys/unistd.h> 71 #include <sys/vnode.h> 72 73 #include <security/audit/audit.h> 74 75 #include <vm/uma.h> 76 77 #include <ddb/ddb.h> 78 79 static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table"); 80 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader", 81 "file desc to leader structures"); 82 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); 83 84 static uma_zone_t file_zone; 85 86 87 /* How to treat 'new' parameter when allocating a fd for do_dup(). */ 88 enum dup_type { DUP_VARIABLE, DUP_FIXED }; 89 90 static int do_dup(struct thread *td, enum dup_type type, int old, int new, 91 register_t *retval); 92 static int fd_first_free(struct filedesc *, int, int); 93 static int fd_last_used(struct filedesc *, int, int); 94 static void fdgrowtable(struct filedesc *, int); 95 static int fdrop_locked(struct file *fp, struct thread *td); 96 static void fdunused(struct filedesc *fdp, int fd); 97 static void fdused(struct filedesc *fdp, int fd); 98 99 /* 100 * A process is initially started out with NDFILE descriptors stored within 101 * this structure, selected to be enough for typical applications based on 102 * the historical limit of 20 open files (and the usage of descriptors by 103 * shells). If these descriptors are exhausted, a larger descriptor table 104 * may be allocated, up to a process' resource limit; the internal arrays 105 * are then unused. 106 */ 107 #define NDFILE 20 108 #define NDSLOTSIZE sizeof(NDSLOTTYPE) 109 #define NDENTRIES (NDSLOTSIZE * __CHAR_BIT) 110 #define NDSLOT(x) ((x) / NDENTRIES) 111 #define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES)) 112 #define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES) 113 114 /* 115 * Storage required per open file descriptor. 116 */ 117 #define OFILESIZE (sizeof(struct file *) + sizeof(char)) 118 119 /* 120 * Basic allocation of descriptors: 121 * one of the above, plus arrays for NDFILE descriptors. 122 */ 123 struct filedesc0 { 124 struct filedesc fd_fd; 125 /* 126 * These arrays are used when the number of open files is 127 * <= NDFILE, and are then pointed to by the pointers above. 128 */ 129 struct file *fd_dfiles[NDFILE]; 130 char fd_dfileflags[NDFILE]; 131 NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; 132 }; 133 134 /* 135 * Descriptor management. 136 */ 137 struct filelist filehead; /* head of list of open files */ 138 int openfiles; /* actual number of open files */ 139 struct sx filelist_lock; /* sx to protect filelist */ 140 struct mtx sigio_lock; /* mtx to protect pointers to sigio */ 141 void (*mq_fdclose)(struct thread *td, int fd, struct file *fp); 142 143 /* A mutex to protect the association between a proc and filedesc. */ 144 static struct mtx fdesc_mtx; 145 146 /* 147 * Find the first zero bit in the given bitmap, starting at low and not 148 * exceeding size - 1. 149 */ 150 static int 151 fd_first_free(struct filedesc *fdp, int low, int size) 152 { 153 NDSLOTTYPE *map = fdp->fd_map; 154 NDSLOTTYPE mask; 155 int off, maxoff; 156 157 if (low >= size) 158 return (low); 159 160 off = NDSLOT(low); 161 if (low % NDENTRIES) { 162 mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES))); 163 if ((mask &= ~map[off]) != 0UL) 164 return (off * NDENTRIES + ffsl(mask) - 1); 165 ++off; 166 } 167 for (maxoff = NDSLOTS(size); off < maxoff; ++off) 168 if (map[off] != ~0UL) 169 return (off * NDENTRIES + ffsl(~map[off]) - 1); 170 return (size); 171 } 172 173 /* 174 * Find the highest non-zero bit in the given bitmap, starting at low and 175 * not exceeding size - 1. 176 */ 177 static int 178 fd_last_used(struct filedesc *fdp, int low, int size) 179 { 180 NDSLOTTYPE *map = fdp->fd_map; 181 NDSLOTTYPE mask; 182 int off, minoff; 183 184 if (low >= size) 185 return (-1); 186 187 off = NDSLOT(size); 188 if (size % NDENTRIES) { 189 mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES)); 190 if ((mask &= map[off]) != 0) 191 return (off * NDENTRIES + flsl(mask) - 1); 192 --off; 193 } 194 for (minoff = NDSLOT(low); off >= minoff; --off) 195 if (map[off] != 0) 196 return (off * NDENTRIES + flsl(map[off]) - 1); 197 return (low - 1); 198 } 199 200 static int 201 fdisused(struct filedesc *fdp, int fd) 202 { 203 KASSERT(fd >= 0 && fd < fdp->fd_nfiles, 204 ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles)); 205 return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0); 206 } 207 208 /* 209 * Mark a file descriptor as used. 210 */ 211 static void 212 fdused(struct filedesc *fdp, int fd) 213 { 214 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 215 KASSERT(!fdisused(fdp, fd), 216 ("fd already used")); 217 fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd); 218 if (fd > fdp->fd_lastfile) 219 fdp->fd_lastfile = fd; 220 if (fd == fdp->fd_freefile) 221 fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles); 222 } 223 224 /* 225 * Mark a file descriptor as unused. 226 */ 227 static void 228 fdunused(struct filedesc *fdp, int fd) 229 { 230 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 231 KASSERT(fdisused(fdp, fd), 232 ("fd is already unused")); 233 KASSERT(fdp->fd_ofiles[fd] == NULL, 234 ("fd is still in use")); 235 fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); 236 if (fd < fdp->fd_freefile) 237 fdp->fd_freefile = fd; 238 if (fd == fdp->fd_lastfile) 239 fdp->fd_lastfile = fd_last_used(fdp, 0, fd); 240 } 241 242 /* 243 * System calls on descriptors. 244 */ 245 #ifndef _SYS_SYSPROTO_H_ 246 struct getdtablesize_args { 247 int dummy; 248 }; 249 #endif 250 /* 251 * MPSAFE 252 */ 253 /* ARGSUSED */ 254 int 255 getdtablesize(struct thread *td, struct getdtablesize_args *uap) 256 { 257 struct proc *p = td->td_proc; 258 259 PROC_LOCK(p); 260 td->td_retval[0] = 261 min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 262 PROC_UNLOCK(p); 263 return (0); 264 } 265 266 /* 267 * Duplicate a file descriptor to a particular value. 268 * 269 * note: keep in mind that a potential race condition exists when closing 270 * descriptors from a shared descriptor table (via rfork). 271 */ 272 #ifndef _SYS_SYSPROTO_H_ 273 struct dup2_args { 274 u_int from; 275 u_int to; 276 }; 277 #endif 278 /* 279 * MPSAFE 280 */ 281 /* ARGSUSED */ 282 int 283 dup2(struct thread *td, struct dup2_args *uap) 284 { 285 286 return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to, 287 td->td_retval)); 288 } 289 290 /* 291 * Duplicate a file descriptor. 292 */ 293 #ifndef _SYS_SYSPROTO_H_ 294 struct dup_args { 295 u_int fd; 296 }; 297 #endif 298 /* 299 * MPSAFE 300 */ 301 /* ARGSUSED */ 302 int 303 dup(struct thread *td, struct dup_args *uap) 304 { 305 306 return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval)); 307 } 308 309 /* 310 * The file control system call. 311 */ 312 #ifndef _SYS_SYSPROTO_H_ 313 struct fcntl_args { 314 int fd; 315 int cmd; 316 long arg; 317 }; 318 #endif 319 /* 320 * MPSAFE 321 */ 322 /* ARGSUSED */ 323 int 324 fcntl(struct thread *td, struct fcntl_args *uap) 325 { 326 struct flock fl; 327 intptr_t arg; 328 int error; 329 330 error = 0; 331 switch (uap->cmd) { 332 case F_GETLK: 333 case F_SETLK: 334 case F_SETLKW: 335 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl)); 336 arg = (intptr_t)&fl; 337 break; 338 default: 339 arg = uap->arg; 340 break; 341 } 342 if (error) 343 return (error); 344 error = kern_fcntl(td, uap->fd, uap->cmd, arg); 345 if (error) 346 return (error); 347 if (uap->cmd == F_GETLK) 348 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl)); 349 return (error); 350 } 351 352 int 353 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) 354 { 355 struct filedesc *fdp; 356 struct flock *flp; 357 struct file *fp; 358 struct proc *p; 359 char *pop; 360 struct vnode *vp; 361 u_int newmin; 362 int error, flg, tmp; 363 int giant_locked; 364 365 /* 366 * XXXRW: Some fcntl() calls require Giant -- others don't. Try to 367 * avoid grabbing Giant for calls we know don't need it. 368 */ 369 switch (cmd) { 370 case F_DUPFD: 371 case F_GETFD: 372 case F_SETFD: 373 case F_GETFL: 374 giant_locked = 0; 375 break; 376 377 default: 378 giant_locked = 1; 379 mtx_lock(&Giant); 380 } 381 382 error = 0; 383 flg = F_POSIX; 384 p = td->td_proc; 385 fdp = p->p_fd; 386 FILEDESC_LOCK(fdp); 387 if ((unsigned)fd >= fdp->fd_nfiles || 388 (fp = fdp->fd_ofiles[fd]) == NULL) { 389 FILEDESC_UNLOCK(fdp); 390 error = EBADF; 391 goto done2; 392 } 393 pop = &fdp->fd_ofileflags[fd]; 394 395 switch (cmd) { 396 case F_DUPFD: 397 /* mtx_assert(&Giant, MA_NOTOWNED); */ 398 FILEDESC_UNLOCK(fdp); 399 newmin = arg; 400 PROC_LOCK(p); 401 if (newmin >= lim_cur(p, RLIMIT_NOFILE) || 402 newmin >= maxfilesperproc) { 403 PROC_UNLOCK(p); 404 error = EINVAL; 405 break; 406 } 407 PROC_UNLOCK(p); 408 error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval); 409 break; 410 411 case F_GETFD: 412 /* mtx_assert(&Giant, MA_NOTOWNED); */ 413 td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; 414 FILEDESC_UNLOCK(fdp); 415 break; 416 417 case F_SETFD: 418 /* mtx_assert(&Giant, MA_NOTOWNED); */ 419 *pop = (*pop &~ UF_EXCLOSE) | 420 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); 421 FILEDESC_UNLOCK(fdp); 422 break; 423 424 case F_GETFL: 425 /* mtx_assert(&Giant, MA_NOTOWNED); */ 426 FILE_LOCK(fp); 427 td->td_retval[0] = OFLAGS(fp->f_flag); 428 FILE_UNLOCK(fp); 429 FILEDESC_UNLOCK(fdp); 430 break; 431 432 case F_SETFL: 433 mtx_assert(&Giant, MA_OWNED); 434 FILE_LOCK(fp); 435 fhold_locked(fp); 436 fp->f_flag &= ~FCNTLFLAGS; 437 fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; 438 FILE_UNLOCK(fp); 439 FILEDESC_UNLOCK(fdp); 440 tmp = fp->f_flag & FNONBLOCK; 441 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 442 if (error) { 443 fdrop(fp, td); 444 break; 445 } 446 tmp = fp->f_flag & FASYNC; 447 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); 448 if (error == 0) { 449 fdrop(fp, td); 450 break; 451 } 452 FILE_LOCK(fp); 453 fp->f_flag &= ~FNONBLOCK; 454 FILE_UNLOCK(fp); 455 tmp = 0; 456 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 457 fdrop(fp, td); 458 break; 459 460 case F_GETOWN: 461 mtx_assert(&Giant, MA_OWNED); 462 fhold(fp); 463 FILEDESC_UNLOCK(fdp); 464 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); 465 if (error == 0) 466 td->td_retval[0] = tmp; 467 fdrop(fp, td); 468 break; 469 470 case F_SETOWN: 471 mtx_assert(&Giant, MA_OWNED); 472 fhold(fp); 473 FILEDESC_UNLOCK(fdp); 474 tmp = arg; 475 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); 476 fdrop(fp, td); 477 break; 478 479 case F_SETLKW: 480 mtx_assert(&Giant, MA_OWNED); 481 flg |= F_WAIT; 482 /* FALLTHROUGH F_SETLK */ 483 484 case F_SETLK: 485 mtx_assert(&Giant, MA_OWNED); 486 if (fp->f_type != DTYPE_VNODE) { 487 FILEDESC_UNLOCK(fdp); 488 error = EBADF; 489 break; 490 } 491 492 flp = (struct flock *)arg; 493 if (flp->l_whence == SEEK_CUR) { 494 if (fp->f_offset < 0 || 495 (flp->l_start > 0 && 496 fp->f_offset > OFF_MAX - flp->l_start)) { 497 FILEDESC_UNLOCK(fdp); 498 error = EOVERFLOW; 499 break; 500 } 501 flp->l_start += fp->f_offset; 502 } 503 504 /* 505 * VOP_ADVLOCK() may block. 506 */ 507 fhold(fp); 508 FILEDESC_UNLOCK(fdp); 509 vp = fp->f_vnode; 510 511 switch (flp->l_type) { 512 case F_RDLCK: 513 if ((fp->f_flag & FREAD) == 0) { 514 error = EBADF; 515 break; 516 } 517 PROC_LOCK(p->p_leader); 518 p->p_leader->p_flag |= P_ADVLOCK; 519 PROC_UNLOCK(p->p_leader); 520 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 521 flp, flg); 522 break; 523 case F_WRLCK: 524 if ((fp->f_flag & FWRITE) == 0) { 525 error = EBADF; 526 break; 527 } 528 PROC_LOCK(p->p_leader); 529 p->p_leader->p_flag |= P_ADVLOCK; 530 PROC_UNLOCK(p->p_leader); 531 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 532 flp, flg); 533 break; 534 case F_UNLCK: 535 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, 536 flp, F_POSIX); 537 break; 538 default: 539 error = EINVAL; 540 break; 541 } 542 /* Check for race with close */ 543 FILEDESC_LOCK_FAST(fdp); 544 if ((unsigned) fd >= fdp->fd_nfiles || 545 fp != fdp->fd_ofiles[fd]) { 546 FILEDESC_UNLOCK_FAST(fdp); 547 flp->l_whence = SEEK_SET; 548 flp->l_start = 0; 549 flp->l_len = 0; 550 flp->l_type = F_UNLCK; 551 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 552 F_UNLCK, flp, F_POSIX); 553 } else 554 FILEDESC_UNLOCK_FAST(fdp); 555 fdrop(fp, td); 556 break; 557 558 case F_GETLK: 559 mtx_assert(&Giant, MA_OWNED); 560 if (fp->f_type != DTYPE_VNODE) { 561 FILEDESC_UNLOCK(fdp); 562 error = EBADF; 563 break; 564 } 565 flp = (struct flock *)arg; 566 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && 567 flp->l_type != F_UNLCK) { 568 FILEDESC_UNLOCK(fdp); 569 error = EINVAL; 570 break; 571 } 572 if (flp->l_whence == SEEK_CUR) { 573 if ((flp->l_start > 0 && 574 fp->f_offset > OFF_MAX - flp->l_start) || 575 (flp->l_start < 0 && 576 fp->f_offset < OFF_MIN - flp->l_start)) { 577 FILEDESC_UNLOCK(fdp); 578 error = EOVERFLOW; 579 break; 580 } 581 flp->l_start += fp->f_offset; 582 } 583 /* 584 * VOP_ADVLOCK() may block. 585 */ 586 fhold(fp); 587 FILEDESC_UNLOCK(fdp); 588 vp = fp->f_vnode; 589 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, 590 F_POSIX); 591 fdrop(fp, td); 592 break; 593 default: 594 FILEDESC_UNLOCK(fdp); 595 error = EINVAL; 596 break; 597 } 598 done2: 599 if (giant_locked) 600 mtx_unlock(&Giant); 601 return (error); 602 } 603 604 /* 605 * Common code for dup, dup2, and fcntl(F_DUPFD). 606 */ 607 static int 608 do_dup(struct thread *td, enum dup_type type, int old, int new, register_t *retval) 609 { 610 struct filedesc *fdp; 611 struct proc *p; 612 struct file *fp; 613 struct file *delfp; 614 int error, holdleaders, maxfd; 615 616 KASSERT((type == DUP_VARIABLE || type == DUP_FIXED), 617 ("invalid dup type %d", type)); 618 619 p = td->td_proc; 620 fdp = p->p_fd; 621 622 /* 623 * Verify we have a valid descriptor to dup from and possibly to 624 * dup to. 625 */ 626 if (old < 0 || new < 0) 627 return (EBADF); 628 PROC_LOCK(p); 629 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 630 PROC_UNLOCK(p); 631 if (new >= maxfd) 632 return (EMFILE); 633 634 FILEDESC_LOCK(fdp); 635 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) { 636 FILEDESC_UNLOCK(fdp); 637 return (EBADF); 638 } 639 if (type == DUP_FIXED && old == new) { 640 *retval = new; 641 FILEDESC_UNLOCK(fdp); 642 return (0); 643 } 644 fp = fdp->fd_ofiles[old]; 645 fhold(fp); 646 647 /* 648 * If the caller specified a file descriptor, make sure the file 649 * table is large enough to hold it, and grab it. Otherwise, just 650 * allocate a new descriptor the usual way. Since the filedesc 651 * lock may be temporarily dropped in the process, we have to look 652 * out for a race. 653 */ 654 if (type == DUP_FIXED) { 655 if (new >= fdp->fd_nfiles) 656 fdgrowtable(fdp, new + 1); 657 if (fdp->fd_ofiles[new] == NULL) 658 fdused(fdp, new); 659 } else { 660 if ((error = fdalloc(td, new, &new)) != 0) { 661 FILEDESC_UNLOCK(fdp); 662 fdrop(fp, td); 663 return (error); 664 } 665 } 666 667 /* 668 * If the old file changed out from under us then treat it as a 669 * bad file descriptor. Userland should do its own locking to 670 * avoid this case. 671 */ 672 if (fdp->fd_ofiles[old] != fp) { 673 /* we've allocated a descriptor which we won't use */ 674 if (fdp->fd_ofiles[new] == NULL) 675 fdunused(fdp, new); 676 FILEDESC_UNLOCK(fdp); 677 fdrop(fp, td); 678 return (EBADF); 679 } 680 KASSERT(old != new, 681 ("new fd is same as old")); 682 683 /* 684 * Save info on the descriptor being overwritten. We cannot close 685 * it without introducing an ownership race for the slot, since we 686 * need to drop the filedesc lock to call closef(). 687 * 688 * XXX this duplicates parts of close(). 689 */ 690 delfp = fdp->fd_ofiles[new]; 691 holdleaders = 0; 692 if (delfp != NULL) { 693 if (td->td_proc->p_fdtol != NULL) { 694 /* 695 * Ask fdfree() to sleep to ensure that all relevant 696 * process leaders can be traversed in closef(). 697 */ 698 fdp->fd_holdleaderscount++; 699 holdleaders = 1; 700 } 701 } 702 703 /* 704 * Duplicate the source descriptor 705 */ 706 fdp->fd_ofiles[new] = fp; 707 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; 708 if (new > fdp->fd_lastfile) 709 fdp->fd_lastfile = new; 710 *retval = new; 711 712 /* 713 * If we dup'd over a valid file, we now own the reference to it 714 * and must dispose of it using closef() semantics (as if a 715 * close() were performed on it). 716 * 717 * XXX this duplicates parts of close(). 718 */ 719 if (delfp != NULL) { 720 knote_fdclose(td, new); 721 if (delfp->f_type == DTYPE_MQUEUE) 722 mq_fdclose(td, new, delfp); 723 FILEDESC_UNLOCK(fdp); 724 (void) closef(delfp, td); 725 if (holdleaders) { 726 FILEDESC_LOCK_FAST(fdp); 727 fdp->fd_holdleaderscount--; 728 if (fdp->fd_holdleaderscount == 0 && 729 fdp->fd_holdleaderswakeup != 0) { 730 fdp->fd_holdleaderswakeup = 0; 731 wakeup(&fdp->fd_holdleaderscount); 732 } 733 FILEDESC_UNLOCK_FAST(fdp); 734 } 735 } else { 736 FILEDESC_UNLOCK(fdp); 737 } 738 return (0); 739 } 740 741 /* 742 * If sigio is on the list associated with a process or process group, 743 * disable signalling from the device, remove sigio from the list and 744 * free sigio. 745 */ 746 void 747 funsetown(struct sigio **sigiop) 748 { 749 struct sigio *sigio; 750 751 SIGIO_LOCK(); 752 sigio = *sigiop; 753 if (sigio == NULL) { 754 SIGIO_UNLOCK(); 755 return; 756 } 757 *(sigio->sio_myref) = NULL; 758 if ((sigio)->sio_pgid < 0) { 759 struct pgrp *pg = (sigio)->sio_pgrp; 760 PGRP_LOCK(pg); 761 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, 762 sigio, sio_pgsigio); 763 PGRP_UNLOCK(pg); 764 } else { 765 struct proc *p = (sigio)->sio_proc; 766 PROC_LOCK(p); 767 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, 768 sigio, sio_pgsigio); 769 PROC_UNLOCK(p); 770 } 771 SIGIO_UNLOCK(); 772 crfree(sigio->sio_ucred); 773 FREE(sigio, M_SIGIO); 774 } 775 776 /* 777 * Free a list of sigio structures. 778 * We only need to lock the SIGIO_LOCK because we have made ourselves 779 * inaccessible to callers of fsetown and therefore do not need to lock 780 * the proc or pgrp struct for the list manipulation. 781 */ 782 void 783 funsetownlst(struct sigiolst *sigiolst) 784 { 785 struct proc *p; 786 struct pgrp *pg; 787 struct sigio *sigio; 788 789 sigio = SLIST_FIRST(sigiolst); 790 if (sigio == NULL) 791 return; 792 p = NULL; 793 pg = NULL; 794 795 /* 796 * Every entry of the list should belong 797 * to a single proc or pgrp. 798 */ 799 if (sigio->sio_pgid < 0) { 800 pg = sigio->sio_pgrp; 801 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED); 802 } else /* if (sigio->sio_pgid > 0) */ { 803 p = sigio->sio_proc; 804 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 805 } 806 807 SIGIO_LOCK(); 808 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) { 809 *(sigio->sio_myref) = NULL; 810 if (pg != NULL) { 811 KASSERT(sigio->sio_pgid < 0, 812 ("Proc sigio in pgrp sigio list")); 813 KASSERT(sigio->sio_pgrp == pg, 814 ("Bogus pgrp in sigio list")); 815 PGRP_LOCK(pg); 816 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, 817 sio_pgsigio); 818 PGRP_UNLOCK(pg); 819 } else /* if (p != NULL) */ { 820 KASSERT(sigio->sio_pgid > 0, 821 ("Pgrp sigio in proc sigio list")); 822 KASSERT(sigio->sio_proc == p, 823 ("Bogus proc in sigio list")); 824 PROC_LOCK(p); 825 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, 826 sio_pgsigio); 827 PROC_UNLOCK(p); 828 } 829 SIGIO_UNLOCK(); 830 crfree(sigio->sio_ucred); 831 FREE(sigio, M_SIGIO); 832 SIGIO_LOCK(); 833 } 834 SIGIO_UNLOCK(); 835 } 836 837 /* 838 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). 839 * 840 * After permission checking, add a sigio structure to the sigio list for 841 * the process or process group. 842 */ 843 int 844 fsetown(pid_t pgid, struct sigio **sigiop) 845 { 846 struct proc *proc; 847 struct pgrp *pgrp; 848 struct sigio *sigio; 849 int ret; 850 851 if (pgid == 0) { 852 funsetown(sigiop); 853 return (0); 854 } 855 856 ret = 0; 857 858 /* Allocate and fill in the new sigio out of locks. */ 859 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK); 860 sigio->sio_pgid = pgid; 861 sigio->sio_ucred = crhold(curthread->td_ucred); 862 sigio->sio_myref = sigiop; 863 864 sx_slock(&proctree_lock); 865 if (pgid > 0) { 866 proc = pfind(pgid); 867 if (proc == NULL) { 868 ret = ESRCH; 869 goto fail; 870 } 871 872 /* 873 * Policy - Don't allow a process to FSETOWN a process 874 * in another session. 875 * 876 * Remove this test to allow maximum flexibility or 877 * restrict FSETOWN to the current process or process 878 * group for maximum safety. 879 */ 880 PROC_UNLOCK(proc); 881 if (proc->p_session != curthread->td_proc->p_session) { 882 ret = EPERM; 883 goto fail; 884 } 885 886 pgrp = NULL; 887 } else /* if (pgid < 0) */ { 888 pgrp = pgfind(-pgid); 889 if (pgrp == NULL) { 890 ret = ESRCH; 891 goto fail; 892 } 893 PGRP_UNLOCK(pgrp); 894 895 /* 896 * Policy - Don't allow a process to FSETOWN a process 897 * in another session. 898 * 899 * Remove this test to allow maximum flexibility or 900 * restrict FSETOWN to the current process or process 901 * group for maximum safety. 902 */ 903 if (pgrp->pg_session != curthread->td_proc->p_session) { 904 ret = EPERM; 905 goto fail; 906 } 907 908 proc = NULL; 909 } 910 funsetown(sigiop); 911 if (pgid > 0) { 912 PROC_LOCK(proc); 913 /* 914 * Since funsetownlst() is called without the proctree 915 * locked, we need to check for P_WEXIT. 916 * XXX: is ESRCH correct? 917 */ 918 if ((proc->p_flag & P_WEXIT) != 0) { 919 PROC_UNLOCK(proc); 920 ret = ESRCH; 921 goto fail; 922 } 923 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); 924 sigio->sio_proc = proc; 925 PROC_UNLOCK(proc); 926 } else { 927 PGRP_LOCK(pgrp); 928 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); 929 sigio->sio_pgrp = pgrp; 930 PGRP_UNLOCK(pgrp); 931 } 932 sx_sunlock(&proctree_lock); 933 SIGIO_LOCK(); 934 *sigiop = sigio; 935 SIGIO_UNLOCK(); 936 return (0); 937 938 fail: 939 sx_sunlock(&proctree_lock); 940 crfree(sigio->sio_ucred); 941 FREE(sigio, M_SIGIO); 942 return (ret); 943 } 944 945 /* 946 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). 947 */ 948 pid_t 949 fgetown(sigiop) 950 struct sigio **sigiop; 951 { 952 pid_t pgid; 953 954 SIGIO_LOCK(); 955 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0; 956 SIGIO_UNLOCK(); 957 return (pgid); 958 } 959 960 /* 961 * Close a file descriptor. 962 */ 963 #ifndef _SYS_SYSPROTO_H_ 964 struct close_args { 965 int fd; 966 }; 967 #endif 968 /* 969 * MPSAFE 970 */ 971 /* ARGSUSED */ 972 int 973 close(td, uap) 974 struct thread *td; 975 struct close_args *uap; 976 { 977 978 return (kern_close(td, uap->fd)); 979 } 980 981 int 982 kern_close(td, fd) 983 struct thread *td; 984 int fd; 985 { 986 struct filedesc *fdp; 987 struct file *fp; 988 int error; 989 int holdleaders; 990 991 error = 0; 992 holdleaders = 0; 993 fdp = td->td_proc->p_fd; 994 995 AUDIT_SYSCLOSE(td, fd); 996 997 FILEDESC_LOCK(fdp); 998 if ((unsigned)fd >= fdp->fd_nfiles || 999 (fp = fdp->fd_ofiles[fd]) == NULL) { 1000 FILEDESC_UNLOCK(fdp); 1001 return (EBADF); 1002 } 1003 fdp->fd_ofiles[fd] = NULL; 1004 fdp->fd_ofileflags[fd] = 0; 1005 fdunused(fdp, fd); 1006 if (td->td_proc->p_fdtol != NULL) { 1007 /* 1008 * Ask fdfree() to sleep to ensure that all relevant 1009 * process leaders can be traversed in closef(). 1010 */ 1011 fdp->fd_holdleaderscount++; 1012 holdleaders = 1; 1013 } 1014 1015 /* 1016 * We now hold the fp reference that used to be owned by the descriptor 1017 * array. 1018 * We have to unlock the FILEDESC *AFTER* knote_fdclose to prevent a 1019 * race of the fd getting opened, a knote added, and deleteing a knote 1020 * for the new fd. 1021 */ 1022 knote_fdclose(td, fd); 1023 if (fp->f_type == DTYPE_MQUEUE) 1024 mq_fdclose(td, fd, fp); 1025 FILEDESC_UNLOCK(fdp); 1026 1027 error = closef(fp, td); 1028 if (holdleaders) { 1029 FILEDESC_LOCK_FAST(fdp); 1030 fdp->fd_holdleaderscount--; 1031 if (fdp->fd_holdleaderscount == 0 && 1032 fdp->fd_holdleaderswakeup != 0) { 1033 fdp->fd_holdleaderswakeup = 0; 1034 wakeup(&fdp->fd_holdleaderscount); 1035 } 1036 FILEDESC_UNLOCK_FAST(fdp); 1037 } 1038 return (error); 1039 } 1040 1041 #if defined(COMPAT_43) 1042 /* 1043 * Return status information about a file descriptor. 1044 */ 1045 #ifndef _SYS_SYSPROTO_H_ 1046 struct ofstat_args { 1047 int fd; 1048 struct ostat *sb; 1049 }; 1050 #endif 1051 /* 1052 * MPSAFE 1053 */ 1054 /* ARGSUSED */ 1055 int 1056 ofstat(struct thread *td, struct ofstat_args *uap) 1057 { 1058 struct ostat oub; 1059 struct stat ub; 1060 int error; 1061 1062 error = kern_fstat(td, uap->fd, &ub); 1063 if (error == 0) { 1064 cvtstat(&ub, &oub); 1065 error = copyout(&oub, uap->sb, sizeof(oub)); 1066 } 1067 return (error); 1068 } 1069 #endif /* COMPAT_43 */ 1070 1071 /* 1072 * Return status information about a file descriptor. 1073 */ 1074 #ifndef _SYS_SYSPROTO_H_ 1075 struct fstat_args { 1076 int fd; 1077 struct stat *sb; 1078 }; 1079 #endif 1080 /* 1081 * MPSAFE 1082 */ 1083 /* ARGSUSED */ 1084 int 1085 fstat(struct thread *td, struct fstat_args *uap) 1086 { 1087 struct stat ub; 1088 int error; 1089 1090 error = kern_fstat(td, uap->fd, &ub); 1091 if (error == 0) 1092 error = copyout(&ub, uap->sb, sizeof(ub)); 1093 return (error); 1094 } 1095 1096 int 1097 kern_fstat(struct thread *td, int fd, struct stat *sbp) 1098 { 1099 struct file *fp; 1100 int error; 1101 1102 AUDIT_ARG(fd, fd); 1103 1104 if ((error = fget(td, fd, &fp)) != 0) 1105 return (error); 1106 1107 AUDIT_ARG(file, td->td_proc, fp); 1108 1109 error = fo_stat(fp, sbp, td->td_ucred, td); 1110 fdrop(fp, td); 1111 return (error); 1112 } 1113 1114 /* 1115 * Return status information about a file descriptor. 1116 */ 1117 #ifndef _SYS_SYSPROTO_H_ 1118 struct nfstat_args { 1119 int fd; 1120 struct nstat *sb; 1121 }; 1122 #endif 1123 /* 1124 * MPSAFE 1125 */ 1126 /* ARGSUSED */ 1127 int 1128 nfstat(struct thread *td, struct nfstat_args *uap) 1129 { 1130 struct nstat nub; 1131 struct stat ub; 1132 int error; 1133 1134 error = kern_fstat(td, uap->fd, &ub); 1135 if (error == 0) { 1136 cvtnstat(&ub, &nub); 1137 error = copyout(&nub, uap->sb, sizeof(nub)); 1138 } 1139 return (error); 1140 } 1141 1142 /* 1143 * Return pathconf information about a file descriptor. 1144 */ 1145 #ifndef _SYS_SYSPROTO_H_ 1146 struct fpathconf_args { 1147 int fd; 1148 int name; 1149 }; 1150 #endif 1151 /* 1152 * MPSAFE 1153 */ 1154 /* ARGSUSED */ 1155 int 1156 fpathconf(struct thread *td, struct fpathconf_args *uap) 1157 { 1158 struct file *fp; 1159 struct vnode *vp; 1160 int error; 1161 1162 if ((error = fget(td, uap->fd, &fp)) != 0) 1163 return (error); 1164 1165 /* If asynchronous I/O is available, it works for all descriptors. */ 1166 if (uap->name == _PC_ASYNC_IO) { 1167 td->td_retval[0] = async_io_version; 1168 goto out; 1169 } 1170 vp = fp->f_vnode; 1171 if (vp != NULL) { 1172 int vfslocked; 1173 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1174 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1175 error = VOP_PATHCONF(vp, uap->name, td->td_retval); 1176 VOP_UNLOCK(vp, 0, td); 1177 VFS_UNLOCK_GIANT(vfslocked); 1178 } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1179 if (uap->name != _PC_PIPE_BUF) { 1180 error = EINVAL; 1181 } else { 1182 td->td_retval[0] = PIPE_BUF; 1183 error = 0; 1184 } 1185 } else { 1186 error = EOPNOTSUPP; 1187 } 1188 out: 1189 fdrop(fp, td); 1190 return (error); 1191 } 1192 1193 /* 1194 * Grow the file table to accomodate (at least) nfd descriptors. This may 1195 * block and drop the filedesc lock, but it will reacquire it before 1196 * returning. 1197 */ 1198 static void 1199 fdgrowtable(struct filedesc *fdp, int nfd) 1200 { 1201 struct file **ntable; 1202 char *nfileflags; 1203 int nnfiles, onfiles; 1204 NDSLOTTYPE *nmap; 1205 1206 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1207 1208 KASSERT(fdp->fd_nfiles > 0, 1209 ("zero-length file table")); 1210 1211 /* compute the size of the new table */ 1212 onfiles = fdp->fd_nfiles; 1213 nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */ 1214 if (nnfiles <= onfiles) 1215 /* the table is already large enough */ 1216 return; 1217 1218 /* allocate a new table and (if required) new bitmaps */ 1219 FILEDESC_UNLOCK(fdp); 1220 MALLOC(ntable, struct file **, nnfiles * OFILESIZE, 1221 M_FILEDESC, M_ZERO | M_WAITOK); 1222 nfileflags = (char *)&ntable[nnfiles]; 1223 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) 1224 MALLOC(nmap, NDSLOTTYPE *, NDSLOTS(nnfiles) * NDSLOTSIZE, 1225 M_FILEDESC, M_ZERO | M_WAITOK); 1226 else 1227 nmap = NULL; 1228 FILEDESC_LOCK(fdp); 1229 1230 /* 1231 * We now have new tables ready to go. Since we dropped the 1232 * filedesc lock to call malloc(), watch out for a race. 1233 */ 1234 onfiles = fdp->fd_nfiles; 1235 if (onfiles >= nnfiles) { 1236 /* we lost the race, but that's OK */ 1237 free(ntable, M_FILEDESC); 1238 if (nmap != NULL) 1239 free(nmap, M_FILEDESC); 1240 return; 1241 } 1242 bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable)); 1243 bcopy(fdp->fd_ofileflags, nfileflags, onfiles); 1244 if (onfiles > NDFILE) 1245 free(fdp->fd_ofiles, M_FILEDESC); 1246 fdp->fd_ofiles = ntable; 1247 fdp->fd_ofileflags = nfileflags; 1248 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) { 1249 bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap)); 1250 if (NDSLOTS(onfiles) > NDSLOTS(NDFILE)) 1251 free(fdp->fd_map, M_FILEDESC); 1252 fdp->fd_map = nmap; 1253 } 1254 fdp->fd_nfiles = nnfiles; 1255 } 1256 1257 /* 1258 * Allocate a file descriptor for the process. 1259 */ 1260 int 1261 fdalloc(struct thread *td, int minfd, int *result) 1262 { 1263 struct proc *p = td->td_proc; 1264 struct filedesc *fdp = p->p_fd; 1265 int fd = -1, maxfd; 1266 1267 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1268 1269 if (fdp->fd_freefile > minfd) 1270 minfd = fdp->fd_freefile; 1271 1272 PROC_LOCK(p); 1273 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1274 PROC_UNLOCK(p); 1275 1276 /* 1277 * Search the bitmap for a free descriptor. If none is found, try 1278 * to grow the file table. Keep at it until we either get a file 1279 * descriptor or run into process or system limits; fdgrowtable() 1280 * may drop the filedesc lock, so we're in a race. 1281 */ 1282 for (;;) { 1283 fd = fd_first_free(fdp, minfd, fdp->fd_nfiles); 1284 if (fd >= maxfd) 1285 return (EMFILE); 1286 if (fd < fdp->fd_nfiles) 1287 break; 1288 fdgrowtable(fdp, min(fdp->fd_nfiles * 2, maxfd)); 1289 } 1290 1291 /* 1292 * Perform some sanity checks, then mark the file descriptor as 1293 * used and return it to the caller. 1294 */ 1295 KASSERT(!fdisused(fdp, fd), 1296 ("fd_first_free() returned non-free descriptor")); 1297 KASSERT(fdp->fd_ofiles[fd] == NULL, 1298 ("free descriptor isn't")); 1299 fdp->fd_ofileflags[fd] = 0; /* XXX needed? */ 1300 fdused(fdp, fd); 1301 *result = fd; 1302 return (0); 1303 } 1304 1305 /* 1306 * Check to see whether n user file descriptors 1307 * are available to the process p. 1308 */ 1309 int 1310 fdavail(struct thread *td, int n) 1311 { 1312 struct proc *p = td->td_proc; 1313 struct filedesc *fdp = td->td_proc->p_fd; 1314 struct file **fpp; 1315 int i, lim, last; 1316 1317 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); 1318 1319 PROC_LOCK(p); 1320 lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1321 PROC_UNLOCK(p); 1322 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) 1323 return (1); 1324 last = min(fdp->fd_nfiles, lim); 1325 fpp = &fdp->fd_ofiles[fdp->fd_freefile]; 1326 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) { 1327 if (*fpp == NULL && --n <= 0) 1328 return (1); 1329 } 1330 return (0); 1331 } 1332 1333 /* 1334 * Create a new open file structure and allocate 1335 * a file decriptor for the process that refers to it. 1336 * We add one reference to the file for the descriptor table 1337 * and one reference for resultfp. This is to prevent us being 1338 * preempted and the entry in the descriptor table closed after 1339 * we release the FILEDESC lock. 1340 */ 1341 int 1342 falloc(struct thread *td, struct file **resultfp, int *resultfd) 1343 { 1344 struct proc *p = td->td_proc; 1345 struct file *fp, *fq; 1346 int error, i; 1347 int maxuserfiles = maxfiles - (maxfiles / 20); 1348 static struct timeval lastfail; 1349 static int curfail; 1350 1351 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO); 1352 sx_xlock(&filelist_lock); 1353 1354 if ((openfiles >= maxuserfiles && 1355 priv_check_cred(td->td_ucred, PRIV_MAXFILES, SUSER_RUID) != 0) || 1356 openfiles >= maxfiles) { 1357 if (ppsratecheck(&lastfail, &curfail, 1)) { 1358 printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n", 1359 td->td_ucred->cr_ruid); 1360 } 1361 sx_xunlock(&filelist_lock); 1362 uma_zfree(file_zone, fp); 1363 return (ENFILE); 1364 } 1365 openfiles++; 1366 1367 /* 1368 * If the process has file descriptor zero open, add the new file 1369 * descriptor to the list of open files at that point, otherwise 1370 * put it at the front of the list of open files. 1371 */ 1372 fp->f_mtxp = mtx_pool_alloc(mtxpool_sleep); 1373 fp->f_count = 1; 1374 if (resultfp) 1375 fp->f_count++; 1376 fp->f_cred = crhold(td->td_ucred); 1377 fp->f_ops = &badfileops; 1378 fp->f_data = NULL; 1379 fp->f_vnode = NULL; 1380 FILEDESC_LOCK(p->p_fd); 1381 if ((fq = p->p_fd->fd_ofiles[0])) { 1382 LIST_INSERT_AFTER(fq, fp, f_list); 1383 } else { 1384 LIST_INSERT_HEAD(&filehead, fp, f_list); 1385 } 1386 sx_xunlock(&filelist_lock); 1387 if ((error = fdalloc(td, 0, &i))) { 1388 FILEDESC_UNLOCK(p->p_fd); 1389 fdrop(fp, td); 1390 if (resultfp) 1391 fdrop(fp, td); 1392 return (error); 1393 } 1394 p->p_fd->fd_ofiles[i] = fp; 1395 FILEDESC_UNLOCK(p->p_fd); 1396 if (resultfp) 1397 *resultfp = fp; 1398 if (resultfd) 1399 *resultfd = i; 1400 return (0); 1401 } 1402 1403 /* 1404 * Build a new filedesc structure from another. 1405 * Copy the current, root, and jail root vnode references. 1406 */ 1407 struct filedesc * 1408 fdinit(struct filedesc *fdp) 1409 { 1410 struct filedesc0 *newfdp; 1411 1412 newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO); 1413 mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF); 1414 if (fdp != NULL) { 1415 FILEDESC_LOCK(fdp); 1416 newfdp->fd_fd.fd_cdir = fdp->fd_cdir; 1417 if (newfdp->fd_fd.fd_cdir) 1418 VREF(newfdp->fd_fd.fd_cdir); 1419 newfdp->fd_fd.fd_rdir = fdp->fd_rdir; 1420 if (newfdp->fd_fd.fd_rdir) 1421 VREF(newfdp->fd_fd.fd_rdir); 1422 newfdp->fd_fd.fd_jdir = fdp->fd_jdir; 1423 if (newfdp->fd_fd.fd_jdir) 1424 VREF(newfdp->fd_fd.fd_jdir); 1425 FILEDESC_UNLOCK(fdp); 1426 } 1427 1428 /* Create the file descriptor table. */ 1429 newfdp->fd_fd.fd_refcnt = 1; 1430 newfdp->fd_fd.fd_holdcnt = 1; 1431 newfdp->fd_fd.fd_cmask = CMASK; 1432 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1433 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 1434 newfdp->fd_fd.fd_nfiles = NDFILE; 1435 newfdp->fd_fd.fd_map = newfdp->fd_dmap; 1436 newfdp->fd_fd.fd_lastfile = -1; 1437 return (&newfdp->fd_fd); 1438 } 1439 1440 static struct filedesc * 1441 fdhold(struct proc *p) 1442 { 1443 struct filedesc *fdp; 1444 1445 mtx_lock(&fdesc_mtx); 1446 fdp = p->p_fd; 1447 if (fdp != NULL) 1448 fdp->fd_holdcnt++; 1449 mtx_unlock(&fdesc_mtx); 1450 return (fdp); 1451 } 1452 1453 static void 1454 fddrop(struct filedesc *fdp) 1455 { 1456 int i; 1457 1458 mtx_lock(&fdesc_mtx); 1459 i = --fdp->fd_holdcnt; 1460 mtx_unlock(&fdesc_mtx); 1461 if (i > 0) 1462 return; 1463 1464 mtx_destroy(&fdp->fd_mtx); 1465 FREE(fdp, M_FILEDESC); 1466 } 1467 1468 /* 1469 * Share a filedesc structure. 1470 */ 1471 struct filedesc * 1472 fdshare(struct filedesc *fdp) 1473 { 1474 FILEDESC_LOCK_FAST(fdp); 1475 fdp->fd_refcnt++; 1476 FILEDESC_UNLOCK_FAST(fdp); 1477 return (fdp); 1478 } 1479 1480 /* 1481 * Unshare a filedesc structure, if necessary by making a copy 1482 */ 1483 void 1484 fdunshare(struct proc *p, struct thread *td) 1485 { 1486 1487 FILEDESC_LOCK_FAST(p->p_fd); 1488 if (p->p_fd->fd_refcnt > 1) { 1489 struct filedesc *tmp; 1490 1491 FILEDESC_UNLOCK_FAST(p->p_fd); 1492 tmp = fdcopy(p->p_fd); 1493 fdfree(td); 1494 p->p_fd = tmp; 1495 } else 1496 FILEDESC_UNLOCK_FAST(p->p_fd); 1497 } 1498 1499 /* 1500 * Copy a filedesc structure. 1501 * A NULL pointer in returns a NULL reference, this is to ease callers, 1502 * not catch errors. 1503 */ 1504 struct filedesc * 1505 fdcopy(struct filedesc *fdp) 1506 { 1507 struct filedesc *newfdp; 1508 int i; 1509 1510 /* Certain daemons might not have file descriptors. */ 1511 if (fdp == NULL) 1512 return (NULL); 1513 1514 newfdp = fdinit(fdp); 1515 FILEDESC_LOCK_FAST(fdp); 1516 while (fdp->fd_lastfile >= newfdp->fd_nfiles) { 1517 FILEDESC_UNLOCK_FAST(fdp); 1518 FILEDESC_LOCK(newfdp); 1519 fdgrowtable(newfdp, fdp->fd_lastfile + 1); 1520 FILEDESC_UNLOCK(newfdp); 1521 FILEDESC_LOCK_FAST(fdp); 1522 } 1523 /* copy everything except kqueue descriptors */ 1524 newfdp->fd_freefile = -1; 1525 for (i = 0; i <= fdp->fd_lastfile; ++i) { 1526 if (fdisused(fdp, i) && 1527 fdp->fd_ofiles[i]->f_type != DTYPE_KQUEUE) { 1528 newfdp->fd_ofiles[i] = fdp->fd_ofiles[i]; 1529 newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i]; 1530 fhold(newfdp->fd_ofiles[i]); 1531 newfdp->fd_lastfile = i; 1532 } else { 1533 if (newfdp->fd_freefile == -1) 1534 newfdp->fd_freefile = i; 1535 } 1536 } 1537 FILEDESC_UNLOCK_FAST(fdp); 1538 FILEDESC_LOCK(newfdp); 1539 for (i = 0; i <= newfdp->fd_lastfile; ++i) 1540 if (newfdp->fd_ofiles[i] != NULL) 1541 fdused(newfdp, i); 1542 FILEDESC_UNLOCK(newfdp); 1543 FILEDESC_LOCK_FAST(fdp); 1544 if (newfdp->fd_freefile == -1) 1545 newfdp->fd_freefile = i; 1546 newfdp->fd_cmask = fdp->fd_cmask; 1547 FILEDESC_UNLOCK_FAST(fdp); 1548 return (newfdp); 1549 } 1550 1551 /* 1552 * Release a filedesc structure. 1553 */ 1554 void 1555 fdfree(struct thread *td) 1556 { 1557 struct filedesc *fdp; 1558 struct file **fpp; 1559 int i, locked; 1560 struct filedesc_to_leader *fdtol; 1561 struct file *fp; 1562 struct vnode *cdir, *jdir, *rdir, *vp; 1563 struct flock lf; 1564 1565 /* Certain daemons might not have file descriptors. */ 1566 fdp = td->td_proc->p_fd; 1567 if (fdp == NULL) 1568 return; 1569 1570 /* Check for special need to clear POSIX style locks */ 1571 fdtol = td->td_proc->p_fdtol; 1572 if (fdtol != NULL) { 1573 FILEDESC_LOCK(fdp); 1574 KASSERT(fdtol->fdl_refcount > 0, 1575 ("filedesc_to_refcount botch: fdl_refcount=%d", 1576 fdtol->fdl_refcount)); 1577 if (fdtol->fdl_refcount == 1 && 1578 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1579 for (i = 0, fpp = fdp->fd_ofiles; 1580 i <= fdp->fd_lastfile; 1581 i++, fpp++) { 1582 if (*fpp == NULL || 1583 (*fpp)->f_type != DTYPE_VNODE) 1584 continue; 1585 fp = *fpp; 1586 fhold(fp); 1587 FILEDESC_UNLOCK(fdp); 1588 lf.l_whence = SEEK_SET; 1589 lf.l_start = 0; 1590 lf.l_len = 0; 1591 lf.l_type = F_UNLCK; 1592 vp = fp->f_vnode; 1593 locked = VFS_LOCK_GIANT(vp->v_mount); 1594 (void) VOP_ADVLOCK(vp, 1595 (caddr_t)td->td_proc-> 1596 p_leader, 1597 F_UNLCK, 1598 &lf, 1599 F_POSIX); 1600 VFS_UNLOCK_GIANT(locked); 1601 FILEDESC_LOCK(fdp); 1602 fdrop(fp, td); 1603 fpp = fdp->fd_ofiles + i; 1604 } 1605 } 1606 retry: 1607 if (fdtol->fdl_refcount == 1) { 1608 if (fdp->fd_holdleaderscount > 0 && 1609 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1610 /* 1611 * close() or do_dup() has cleared a reference 1612 * in a shared file descriptor table. 1613 */ 1614 fdp->fd_holdleaderswakeup = 1; 1615 msleep(&fdp->fd_holdleaderscount, &fdp->fd_mtx, 1616 PLOCK, "fdlhold", 0); 1617 goto retry; 1618 } 1619 if (fdtol->fdl_holdcount > 0) { 1620 /* 1621 * Ensure that fdtol->fdl_leader 1622 * remains valid in closef(). 1623 */ 1624 fdtol->fdl_wakeup = 1; 1625 msleep(fdtol, &fdp->fd_mtx, 1626 PLOCK, "fdlhold", 0); 1627 goto retry; 1628 } 1629 } 1630 fdtol->fdl_refcount--; 1631 if (fdtol->fdl_refcount == 0 && 1632 fdtol->fdl_holdcount == 0) { 1633 fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; 1634 fdtol->fdl_prev->fdl_next = fdtol->fdl_next; 1635 } else 1636 fdtol = NULL; 1637 td->td_proc->p_fdtol = NULL; 1638 FILEDESC_UNLOCK(fdp); 1639 if (fdtol != NULL) 1640 FREE(fdtol, M_FILEDESC_TO_LEADER); 1641 } 1642 FILEDESC_LOCK_FAST(fdp); 1643 i = --fdp->fd_refcnt; 1644 FILEDESC_UNLOCK_FAST(fdp); 1645 if (i > 0) 1646 return; 1647 /* 1648 * We are the last reference to the structure, so we can 1649 * safely assume it will not change out from under us. 1650 */ 1651 fpp = fdp->fd_ofiles; 1652 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) { 1653 if (*fpp) 1654 (void) closef(*fpp, td); 1655 } 1656 FILEDESC_LOCK(fdp); 1657 1658 /* XXX This should happen earlier. */ 1659 mtx_lock(&fdesc_mtx); 1660 td->td_proc->p_fd = NULL; 1661 mtx_unlock(&fdesc_mtx); 1662 1663 if (fdp->fd_nfiles > NDFILE) 1664 FREE(fdp->fd_ofiles, M_FILEDESC); 1665 if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE)) 1666 FREE(fdp->fd_map, M_FILEDESC); 1667 1668 fdp->fd_nfiles = 0; 1669 1670 cdir = fdp->fd_cdir; 1671 fdp->fd_cdir = NULL; 1672 rdir = fdp->fd_rdir; 1673 fdp->fd_rdir = NULL; 1674 jdir = fdp->fd_jdir; 1675 fdp->fd_jdir = NULL; 1676 FILEDESC_UNLOCK(fdp); 1677 1678 if (cdir) { 1679 locked = VFS_LOCK_GIANT(cdir->v_mount); 1680 vrele(cdir); 1681 VFS_UNLOCK_GIANT(locked); 1682 } 1683 if (rdir) { 1684 locked = VFS_LOCK_GIANT(rdir->v_mount); 1685 vrele(rdir); 1686 VFS_UNLOCK_GIANT(locked); 1687 } 1688 if (jdir) { 1689 locked = VFS_LOCK_GIANT(jdir->v_mount); 1690 vrele(jdir); 1691 VFS_UNLOCK_GIANT(locked); 1692 } 1693 1694 fddrop(fdp); 1695 } 1696 1697 /* 1698 * For setugid programs, we don't want to people to use that setugidness 1699 * to generate error messages which write to a file which otherwise would 1700 * otherwise be off-limits to the process. We check for filesystems where 1701 * the vnode can change out from under us after execve (like [lin]procfs). 1702 * 1703 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is 1704 * sufficient. We also don't check for setugidness since we know we are. 1705 */ 1706 static int 1707 is_unsafe(struct file *fp) 1708 { 1709 if (fp->f_type == DTYPE_VNODE) { 1710 struct vnode *vp = fp->f_vnode; 1711 1712 if ((vp->v_vflag & VV_PROCDEP) != 0) 1713 return (1); 1714 } 1715 return (0); 1716 } 1717 1718 /* 1719 * Make this setguid thing safe, if at all possible. 1720 */ 1721 void 1722 setugidsafety(struct thread *td) 1723 { 1724 struct filedesc *fdp; 1725 int i; 1726 1727 /* Certain daemons might not have file descriptors. */ 1728 fdp = td->td_proc->p_fd; 1729 if (fdp == NULL) 1730 return; 1731 1732 /* 1733 * Note: fdp->fd_ofiles may be reallocated out from under us while 1734 * we are blocked in a close. Be careful! 1735 */ 1736 FILEDESC_LOCK(fdp); 1737 for (i = 0; i <= fdp->fd_lastfile; i++) { 1738 if (i > 2) 1739 break; 1740 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { 1741 struct file *fp; 1742 1743 knote_fdclose(td, i); 1744 /* 1745 * NULL-out descriptor prior to close to avoid 1746 * a race while close blocks. 1747 */ 1748 fp = fdp->fd_ofiles[i]; 1749 fdp->fd_ofiles[i] = NULL; 1750 fdp->fd_ofileflags[i] = 0; 1751 fdunused(fdp, i); 1752 FILEDESC_UNLOCK(fdp); 1753 (void) closef(fp, td); 1754 FILEDESC_LOCK(fdp); 1755 } 1756 } 1757 FILEDESC_UNLOCK(fdp); 1758 } 1759 1760 /* 1761 * If a specific file object occupies a specific file descriptor, 1762 * close the file descriptor entry and drop a reference on the file 1763 * object. This is a convenience function to handle a subsequent 1764 * error in a function that calls falloc() that handles the race that 1765 * another thread might have closed the file descriptor out from under 1766 * the thread creating the file object. 1767 */ 1768 void 1769 fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td) 1770 { 1771 1772 FILEDESC_LOCK(fdp); 1773 if (fdp->fd_ofiles[idx] == fp) { 1774 fdp->fd_ofiles[idx] = NULL; 1775 fdunused(fdp, idx); 1776 FILEDESC_UNLOCK(fdp); 1777 fdrop(fp, td); 1778 } else { 1779 FILEDESC_UNLOCK(fdp); 1780 } 1781 } 1782 1783 /* 1784 * Close any files on exec? 1785 */ 1786 void 1787 fdcloseexec(struct thread *td) 1788 { 1789 struct filedesc *fdp; 1790 int i; 1791 1792 /* Certain daemons might not have file descriptors. */ 1793 fdp = td->td_proc->p_fd; 1794 if (fdp == NULL) 1795 return; 1796 1797 FILEDESC_LOCK(fdp); 1798 1799 /* 1800 * We cannot cache fd_ofiles or fd_ofileflags since operations 1801 * may block and rip them out from under us. 1802 */ 1803 for (i = 0; i <= fdp->fd_lastfile; i++) { 1804 if (fdp->fd_ofiles[i] != NULL && 1805 (fdp->fd_ofiles[i]->f_type == DTYPE_MQUEUE || 1806 (fdp->fd_ofileflags[i] & UF_EXCLOSE))) { 1807 struct file *fp; 1808 1809 knote_fdclose(td, i); 1810 /* 1811 * NULL-out descriptor prior to close to avoid 1812 * a race while close blocks. 1813 */ 1814 fp = fdp->fd_ofiles[i]; 1815 fdp->fd_ofiles[i] = NULL; 1816 fdp->fd_ofileflags[i] = 0; 1817 fdunused(fdp, i); 1818 if (fp->f_type == DTYPE_MQUEUE) 1819 mq_fdclose(td, i, fp); 1820 FILEDESC_UNLOCK(fdp); 1821 (void) closef(fp, td); 1822 FILEDESC_LOCK(fdp); 1823 } 1824 } 1825 FILEDESC_UNLOCK(fdp); 1826 } 1827 1828 /* 1829 * It is unsafe for set[ug]id processes to be started with file 1830 * descriptors 0..2 closed, as these descriptors are given implicit 1831 * significance in the Standard C library. fdcheckstd() will create a 1832 * descriptor referencing /dev/null for each of stdin, stdout, and 1833 * stderr that is not already open. 1834 */ 1835 int 1836 fdcheckstd(struct thread *td) 1837 { 1838 struct nameidata nd; 1839 struct filedesc *fdp; 1840 struct file *fp; 1841 register_t retval; 1842 int fd, i, error, flags, devnull; 1843 1844 fdp = td->td_proc->p_fd; 1845 if (fdp == NULL) 1846 return (0); 1847 KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); 1848 devnull = -1; 1849 error = 0; 1850 for (i = 0; i < 3; i++) { 1851 if (fdp->fd_ofiles[i] != NULL) 1852 continue; 1853 if (devnull < 0) { 1854 int vfslocked; 1855 error = falloc(td, &fp, &fd); 1856 if (error != 0) 1857 break; 1858 /* Note extra ref on `fp' held for us by falloc(). */ 1859 KASSERT(fd == i, ("oof, we didn't get our fd")); 1860 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, 1861 "/dev/null", td); 1862 flags = FREAD | FWRITE; 1863 error = vn_open(&nd, &flags, 0, fd); 1864 if (error != 0) { 1865 /* 1866 * Someone may have closed the entry in the 1867 * file descriptor table, so check it hasn't 1868 * changed before dropping the reference count. 1869 */ 1870 FILEDESC_LOCK(fdp); 1871 KASSERT(fdp->fd_ofiles[fd] == fp, 1872 ("table not shared, how did it change?")); 1873 fdp->fd_ofiles[fd] = NULL; 1874 fdunused(fdp, fd); 1875 FILEDESC_UNLOCK(fdp); 1876 fdrop(fp, td); 1877 fdrop(fp, td); 1878 break; 1879 } 1880 vfslocked = NDHASGIANT(&nd); 1881 NDFREE(&nd, NDF_ONLY_PNBUF); 1882 fp->f_flag = flags; 1883 fp->f_vnode = nd.ni_vp; 1884 if (fp->f_data == NULL) 1885 fp->f_data = nd.ni_vp; 1886 if (fp->f_ops == &badfileops) 1887 fp->f_ops = &vnops; 1888 fp->f_type = DTYPE_VNODE; 1889 VOP_UNLOCK(nd.ni_vp, 0, td); 1890 VFS_UNLOCK_GIANT(vfslocked); 1891 devnull = fd; 1892 fdrop(fp, td); 1893 } else { 1894 error = do_dup(td, DUP_FIXED, devnull, i, &retval); 1895 if (error != 0) 1896 break; 1897 } 1898 } 1899 return (error); 1900 } 1901 1902 /* 1903 * Internal form of close. 1904 * Decrement reference count on file structure. 1905 * Note: td may be NULL when closing a file that was being passed in a 1906 * message. 1907 * 1908 * XXXRW: Giant is not required for the caller, but often will be held; this 1909 * makes it moderately likely the Giant will be recursed in the VFS case. 1910 */ 1911 int 1912 closef(struct file *fp, struct thread *td) 1913 { 1914 struct vnode *vp; 1915 struct flock lf; 1916 struct filedesc_to_leader *fdtol; 1917 struct filedesc *fdp; 1918 1919 /* 1920 * POSIX record locking dictates that any close releases ALL 1921 * locks owned by this process. This is handled by setting 1922 * a flag in the unlock to free ONLY locks obeying POSIX 1923 * semantics, and not to free BSD-style file locks. 1924 * If the descriptor was in a message, POSIX-style locks 1925 * aren't passed with the descriptor, and the thread pointer 1926 * will be NULL. Callers should be careful only to pass a 1927 * NULL thread pointer when there really is no owning 1928 * context that might have locks, or the locks will be 1929 * leaked. 1930 */ 1931 if (fp->f_type == DTYPE_VNODE && td != NULL) { 1932 int vfslocked; 1933 1934 vp = fp->f_vnode; 1935 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1936 if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1937 lf.l_whence = SEEK_SET; 1938 lf.l_start = 0; 1939 lf.l_len = 0; 1940 lf.l_type = F_UNLCK; 1941 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, 1942 F_UNLCK, &lf, F_POSIX); 1943 } 1944 fdtol = td->td_proc->p_fdtol; 1945 if (fdtol != NULL) { 1946 /* 1947 * Handle special case where file descriptor table 1948 * is shared between multiple process leaders. 1949 */ 1950 fdp = td->td_proc->p_fd; 1951 FILEDESC_LOCK(fdp); 1952 for (fdtol = fdtol->fdl_next; 1953 fdtol != td->td_proc->p_fdtol; 1954 fdtol = fdtol->fdl_next) { 1955 if ((fdtol->fdl_leader->p_flag & 1956 P_ADVLOCK) == 0) 1957 continue; 1958 fdtol->fdl_holdcount++; 1959 FILEDESC_UNLOCK(fdp); 1960 lf.l_whence = SEEK_SET; 1961 lf.l_start = 0; 1962 lf.l_len = 0; 1963 lf.l_type = F_UNLCK; 1964 vp = fp->f_vnode; 1965 (void) VOP_ADVLOCK(vp, 1966 (caddr_t)fdtol->fdl_leader, 1967 F_UNLCK, &lf, F_POSIX); 1968 FILEDESC_LOCK(fdp); 1969 fdtol->fdl_holdcount--; 1970 if (fdtol->fdl_holdcount == 0 && 1971 fdtol->fdl_wakeup != 0) { 1972 fdtol->fdl_wakeup = 0; 1973 wakeup(fdtol); 1974 } 1975 } 1976 FILEDESC_UNLOCK(fdp); 1977 } 1978 VFS_UNLOCK_GIANT(vfslocked); 1979 } 1980 return (fdrop(fp, td)); 1981 } 1982 1983 /* 1984 * Extract the file pointer associated with the specified descriptor for 1985 * the current user process. 1986 * 1987 * If the descriptor doesn't exist, EBADF is returned. 1988 * 1989 * If the descriptor exists but doesn't match 'flags' then 1990 * return EBADF for read attempts and EINVAL for write attempts. 1991 * 1992 * If 'hold' is set (non-zero) the file's refcount will be bumped on return. 1993 * It should be dropped with fdrop(). 1994 * If it is not set, then the refcount will not be bumped however the 1995 * thread's filedesc struct will be returned locked (for fgetsock). 1996 * 1997 * If an error occured the non-zero error is returned and *fpp is set to NULL. 1998 * Otherwise *fpp is set and zero is returned. 1999 */ 2000 static __inline int 2001 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold) 2002 { 2003 struct filedesc *fdp; 2004 struct file *fp; 2005 2006 *fpp = NULL; 2007 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) 2008 return (EBADF); 2009 FILEDESC_LOCK(fdp); 2010 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) { 2011 FILEDESC_UNLOCK(fdp); 2012 return (EBADF); 2013 } 2014 2015 /* 2016 * FREAD and FWRITE failure return EBADF as per POSIX. 2017 * 2018 * Only one flag, or 0, may be specified. 2019 */ 2020 if (flags == FREAD && (fp->f_flag & FREAD) == 0) { 2021 FILEDESC_UNLOCK(fdp); 2022 return (EBADF); 2023 } 2024 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) { 2025 FILEDESC_UNLOCK(fdp); 2026 return (EBADF); 2027 } 2028 if (hold) { 2029 fhold(fp); 2030 FILEDESC_UNLOCK(fdp); 2031 } 2032 *fpp = fp; 2033 return (0); 2034 } 2035 2036 int 2037 fget(struct thread *td, int fd, struct file **fpp) 2038 { 2039 2040 return(_fget(td, fd, fpp, 0, 1)); 2041 } 2042 2043 int 2044 fget_read(struct thread *td, int fd, struct file **fpp) 2045 { 2046 2047 return(_fget(td, fd, fpp, FREAD, 1)); 2048 } 2049 2050 int 2051 fget_write(struct thread *td, int fd, struct file **fpp) 2052 { 2053 2054 return(_fget(td, fd, fpp, FWRITE, 1)); 2055 } 2056 2057 /* 2058 * Like fget() but loads the underlying vnode, or returns an error if 2059 * the descriptor does not represent a vnode. Note that pipes use vnodes 2060 * but never have VM objects. The returned vnode will be vref()d. 2061 * 2062 * XXX: what about the unused flags ? 2063 */ 2064 static __inline int 2065 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags) 2066 { 2067 struct file *fp; 2068 int error; 2069 2070 *vpp = NULL; 2071 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2072 return (error); 2073 if (fp->f_vnode == NULL) { 2074 error = EINVAL; 2075 } else { 2076 *vpp = fp->f_vnode; 2077 vref(*vpp); 2078 } 2079 FILEDESC_UNLOCK(td->td_proc->p_fd); 2080 return (error); 2081 } 2082 2083 int 2084 fgetvp(struct thread *td, int fd, struct vnode **vpp) 2085 { 2086 2087 return (_fgetvp(td, fd, vpp, 0)); 2088 } 2089 2090 int 2091 fgetvp_read(struct thread *td, int fd, struct vnode **vpp) 2092 { 2093 2094 return (_fgetvp(td, fd, vpp, FREAD)); 2095 } 2096 2097 #ifdef notyet 2098 int 2099 fgetvp_write(struct thread *td, int fd, struct vnode **vpp) 2100 { 2101 2102 return (_fgetvp(td, fd, vpp, FWRITE)); 2103 } 2104 #endif 2105 2106 /* 2107 * Like fget() but loads the underlying socket, or returns an error if 2108 * the descriptor does not represent a socket. 2109 * 2110 * We bump the ref count on the returned socket. XXX Also obtain the SX 2111 * lock in the future. 2112 * 2113 * XXXRW: fgetsock() and fputsock() are deprecated, as consumers should rely 2114 * on their file descriptor reference to prevent the socket from being 2115 * freed during use. 2116 */ 2117 int 2118 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp) 2119 { 2120 struct file *fp; 2121 int error; 2122 2123 NET_ASSERT_GIANT(); 2124 2125 *spp = NULL; 2126 if (fflagp != NULL) 2127 *fflagp = 0; 2128 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2129 return (error); 2130 if (fp->f_type != DTYPE_SOCKET) { 2131 error = ENOTSOCK; 2132 } else { 2133 *spp = fp->f_data; 2134 if (fflagp) 2135 *fflagp = fp->f_flag; 2136 SOCK_LOCK(*spp); 2137 soref(*spp); 2138 SOCK_UNLOCK(*spp); 2139 } 2140 FILEDESC_UNLOCK(td->td_proc->p_fd); 2141 return (error); 2142 } 2143 2144 /* 2145 * Drop the reference count on the socket and XXX release the SX lock in the 2146 * future. The last reference closes the socket. 2147 * 2148 * XXXRW: fputsock() is deprecated, see comment for fgetsock(). 2149 */ 2150 void 2151 fputsock(struct socket *so) 2152 { 2153 2154 NET_ASSERT_GIANT(); 2155 ACCEPT_LOCK(); 2156 SOCK_LOCK(so); 2157 sorele(so); 2158 } 2159 2160 int 2161 fdrop(struct file *fp, struct thread *td) 2162 { 2163 2164 FILE_LOCK(fp); 2165 return (fdrop_locked(fp, td)); 2166 } 2167 2168 /* 2169 * Drop reference on struct file passed in, may call closef if the 2170 * reference hits zero. 2171 * Expects struct file locked, and will unlock it. 2172 */ 2173 static int 2174 fdrop_locked(struct file *fp, struct thread *td) 2175 { 2176 int error; 2177 2178 FILE_LOCK_ASSERT(fp, MA_OWNED); 2179 2180 if (--fp->f_count > 0) { 2181 FILE_UNLOCK(fp); 2182 return (0); 2183 } 2184 2185 /* 2186 * We might have just dropped the last reference to a file 2187 * object that is for a UNIX domain socket whose message 2188 * buffers are being examined in unp_gc(). If that is the 2189 * case, FWAIT will be set in f_gcflag and we need to wait for 2190 * unp_gc() to finish its scan. 2191 */ 2192 while (fp->f_gcflag & FWAIT) 2193 msleep(&fp->f_gcflag, fp->f_mtxp, 0, "fpdrop", 0); 2194 2195 /* We have the last ref so we can proceed without the file lock. */ 2196 FILE_UNLOCK(fp); 2197 if (fp->f_count < 0) 2198 panic("fdrop: count < 0"); 2199 if (fp->f_ops != &badfileops) 2200 error = fo_close(fp, td); 2201 else 2202 error = 0; 2203 2204 sx_xlock(&filelist_lock); 2205 LIST_REMOVE(fp, f_list); 2206 openfiles--; 2207 sx_xunlock(&filelist_lock); 2208 crfree(fp->f_cred); 2209 uma_zfree(file_zone, fp); 2210 2211 return (error); 2212 } 2213 2214 /* 2215 * Apply an advisory lock on a file descriptor. 2216 * 2217 * Just attempt to get a record lock of the requested type on 2218 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 2219 */ 2220 #ifndef _SYS_SYSPROTO_H_ 2221 struct flock_args { 2222 int fd; 2223 int how; 2224 }; 2225 #endif 2226 /* 2227 * MPSAFE 2228 */ 2229 /* ARGSUSED */ 2230 int 2231 flock(struct thread *td, struct flock_args *uap) 2232 { 2233 struct file *fp; 2234 struct vnode *vp; 2235 struct flock lf; 2236 int error; 2237 2238 if ((error = fget(td, uap->fd, &fp)) != 0) 2239 return (error); 2240 if (fp->f_type != DTYPE_VNODE) { 2241 fdrop(fp, td); 2242 return (EOPNOTSUPP); 2243 } 2244 2245 mtx_lock(&Giant); 2246 vp = fp->f_vnode; 2247 lf.l_whence = SEEK_SET; 2248 lf.l_start = 0; 2249 lf.l_len = 0; 2250 if (uap->how & LOCK_UN) { 2251 lf.l_type = F_UNLCK; 2252 FILE_LOCK(fp); 2253 fp->f_flag &= ~FHASLOCK; 2254 FILE_UNLOCK(fp); 2255 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 2256 goto done2; 2257 } 2258 if (uap->how & LOCK_EX) 2259 lf.l_type = F_WRLCK; 2260 else if (uap->how & LOCK_SH) 2261 lf.l_type = F_RDLCK; 2262 else { 2263 error = EBADF; 2264 goto done2; 2265 } 2266 FILE_LOCK(fp); 2267 fp->f_flag |= FHASLOCK; 2268 FILE_UNLOCK(fp); 2269 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 2270 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); 2271 done2: 2272 fdrop(fp, td); 2273 mtx_unlock(&Giant); 2274 return (error); 2275 } 2276 /* 2277 * Duplicate the specified descriptor to a free descriptor. 2278 */ 2279 int 2280 dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error) 2281 { 2282 struct file *wfp; 2283 struct file *fp; 2284 2285 /* 2286 * If the to-be-dup'd fd number is greater than the allowed number 2287 * of file descriptors, or the fd to be dup'd has already been 2288 * closed, then reject. 2289 */ 2290 FILEDESC_LOCK(fdp); 2291 if (dfd < 0 || dfd >= fdp->fd_nfiles || 2292 (wfp = fdp->fd_ofiles[dfd]) == NULL) { 2293 FILEDESC_UNLOCK(fdp); 2294 return (EBADF); 2295 } 2296 2297 /* 2298 * There are two cases of interest here. 2299 * 2300 * For ENODEV simply dup (dfd) to file descriptor 2301 * (indx) and return. 2302 * 2303 * For ENXIO steal away the file structure from (dfd) and 2304 * store it in (indx). (dfd) is effectively closed by 2305 * this operation. 2306 * 2307 * Any other error code is just returned. 2308 */ 2309 switch (error) { 2310 case ENODEV: 2311 /* 2312 * Check that the mode the file is being opened for is a 2313 * subset of the mode of the existing descriptor. 2314 */ 2315 FILE_LOCK(wfp); 2316 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { 2317 FILE_UNLOCK(wfp); 2318 FILEDESC_UNLOCK(fdp); 2319 return (EACCES); 2320 } 2321 fp = fdp->fd_ofiles[indx]; 2322 fdp->fd_ofiles[indx] = wfp; 2323 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2324 if (fp == NULL) 2325 fdused(fdp, indx); 2326 fhold_locked(wfp); 2327 FILE_UNLOCK(wfp); 2328 FILEDESC_UNLOCK(fdp); 2329 if (fp != NULL) { 2330 /* 2331 * We now own the reference to fp that the ofiles[] 2332 * array used to own. Release it. 2333 */ 2334 FILE_LOCK(fp); 2335 fdrop_locked(fp, td); 2336 } 2337 return (0); 2338 2339 case ENXIO: 2340 /* 2341 * Steal away the file pointer from dfd and stuff it into indx. 2342 */ 2343 fp = fdp->fd_ofiles[indx]; 2344 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 2345 fdp->fd_ofiles[dfd] = NULL; 2346 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2347 fdp->fd_ofileflags[dfd] = 0; 2348 fdunused(fdp, dfd); 2349 if (fp == NULL) 2350 fdused(fdp, indx); 2351 if (fp != NULL) 2352 FILE_LOCK(fp); 2353 2354 /* 2355 * We now own the reference to fp that the ofiles[] array 2356 * used to own. Release it. 2357 */ 2358 if (fp != NULL) 2359 fdrop_locked(fp, td); 2360 2361 FILEDESC_UNLOCK(fdp); 2362 2363 return (0); 2364 2365 default: 2366 FILEDESC_UNLOCK(fdp); 2367 return (error); 2368 } 2369 /* NOTREACHED */ 2370 } 2371 2372 /* 2373 * Scan all active processes to see if any of them have a current 2374 * or root directory of `olddp'. If so, replace them with the new 2375 * mount point. 2376 */ 2377 void 2378 mountcheckdirs(struct vnode *olddp, struct vnode *newdp) 2379 { 2380 struct filedesc *fdp; 2381 struct proc *p; 2382 int nrele; 2383 2384 if (vrefcnt(olddp) == 1) 2385 return; 2386 sx_slock(&allproc_lock); 2387 FOREACH_PROC_IN_SYSTEM(p) { 2388 fdp = fdhold(p); 2389 if (fdp == NULL) 2390 continue; 2391 nrele = 0; 2392 FILEDESC_LOCK_FAST(fdp); 2393 if (fdp->fd_cdir == olddp) { 2394 vref(newdp); 2395 fdp->fd_cdir = newdp; 2396 nrele++; 2397 } 2398 if (fdp->fd_rdir == olddp) { 2399 vref(newdp); 2400 fdp->fd_rdir = newdp; 2401 nrele++; 2402 } 2403 FILEDESC_UNLOCK_FAST(fdp); 2404 fddrop(fdp); 2405 while (nrele--) 2406 vrele(olddp); 2407 } 2408 sx_sunlock(&allproc_lock); 2409 if (rootvnode == olddp) { 2410 vrele(rootvnode); 2411 vref(newdp); 2412 rootvnode = newdp; 2413 } 2414 } 2415 2416 struct filedesc_to_leader * 2417 filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader) 2418 { 2419 struct filedesc_to_leader *fdtol; 2420 2421 MALLOC(fdtol, struct filedesc_to_leader *, 2422 sizeof(struct filedesc_to_leader), 2423 M_FILEDESC_TO_LEADER, 2424 M_WAITOK); 2425 fdtol->fdl_refcount = 1; 2426 fdtol->fdl_holdcount = 0; 2427 fdtol->fdl_wakeup = 0; 2428 fdtol->fdl_leader = leader; 2429 if (old != NULL) { 2430 FILEDESC_LOCK(fdp); 2431 fdtol->fdl_next = old->fdl_next; 2432 fdtol->fdl_prev = old; 2433 old->fdl_next = fdtol; 2434 fdtol->fdl_next->fdl_prev = fdtol; 2435 FILEDESC_UNLOCK(fdp); 2436 } else { 2437 fdtol->fdl_next = fdtol; 2438 fdtol->fdl_prev = fdtol; 2439 } 2440 return (fdtol); 2441 } 2442 2443 /* 2444 * Get file structures. 2445 */ 2446 static int 2447 sysctl_kern_file(SYSCTL_HANDLER_ARGS) 2448 { 2449 struct xfile xf; 2450 struct filedesc *fdp; 2451 struct file *fp; 2452 struct proc *p; 2453 int error, n; 2454 2455 /* 2456 * Note: because the number of file descriptors is calculated 2457 * in different ways for sizing vs returning the data, 2458 * there is information leakage from the first loop. However, 2459 * it is of a similar order of magnitude to the leakage from 2460 * global system statistics such as kern.openfiles. 2461 */ 2462 error = sysctl_wire_old_buffer(req, 0); 2463 if (error != 0) 2464 return (error); 2465 if (req->oldptr == NULL) { 2466 n = 16; /* A slight overestimate. */ 2467 sx_slock(&filelist_lock); 2468 LIST_FOREACH(fp, &filehead, f_list) { 2469 /* 2470 * We should grab the lock, but this is an 2471 * estimate, so does it really matter? 2472 */ 2473 /* mtx_lock(fp->f_mtxp); */ 2474 n += fp->f_count; 2475 /* mtx_unlock(f->f_mtxp); */ 2476 } 2477 sx_sunlock(&filelist_lock); 2478 return (SYSCTL_OUT(req, 0, n * sizeof(xf))); 2479 } 2480 error = 0; 2481 bzero(&xf, sizeof(xf)); 2482 xf.xf_size = sizeof(xf); 2483 sx_slock(&allproc_lock); 2484 FOREACH_PROC_IN_SYSTEM(p) { 2485 if (p->p_state == PRS_NEW) 2486 continue; 2487 PROC_LOCK(p); 2488 if (p_cansee(req->td, p) != 0) { 2489 PROC_UNLOCK(p); 2490 continue; 2491 } 2492 xf.xf_pid = p->p_pid; 2493 xf.xf_uid = p->p_ucred->cr_uid; 2494 PROC_UNLOCK(p); 2495 fdp = fdhold(p); 2496 if (fdp == NULL) 2497 continue; 2498 FILEDESC_LOCK_FAST(fdp); 2499 for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) { 2500 if ((fp = fdp->fd_ofiles[n]) == NULL) 2501 continue; 2502 xf.xf_fd = n; 2503 xf.xf_file = fp; 2504 xf.xf_data = fp->f_data; 2505 xf.xf_vnode = fp->f_vnode; 2506 xf.xf_type = fp->f_type; 2507 xf.xf_count = fp->f_count; 2508 xf.xf_msgcount = fp->f_msgcount; 2509 xf.xf_offset = fp->f_offset; 2510 xf.xf_flag = fp->f_flag; 2511 error = SYSCTL_OUT(req, &xf, sizeof(xf)); 2512 if (error) 2513 break; 2514 } 2515 FILEDESC_UNLOCK_FAST(fdp); 2516 fddrop(fdp); 2517 if (error) 2518 break; 2519 } 2520 sx_sunlock(&allproc_lock); 2521 return (error); 2522 } 2523 2524 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, 2525 0, 0, sysctl_kern_file, "S,xfile", "Entire file table"); 2526 2527 #ifdef DDB 2528 /* 2529 * For the purposes of debugging, generate a human-readable string for the 2530 * file type. 2531 */ 2532 static const char * 2533 file_type_to_name(short type) 2534 { 2535 2536 switch (type) { 2537 case 0: 2538 return ("zero"); 2539 case DTYPE_VNODE: 2540 return ("vnod"); 2541 case DTYPE_SOCKET: 2542 return ("sock"); 2543 case DTYPE_PIPE: 2544 return ("pipe"); 2545 case DTYPE_FIFO: 2546 return ("fifo"); 2547 case DTYPE_KQUEUE: 2548 return ("kque"); 2549 case DTYPE_CRYPTO: 2550 return ("crpt"); 2551 case DTYPE_MQUEUE: 2552 return ("mque"); 2553 default: 2554 return ("unkn"); 2555 } 2556 } 2557 2558 /* 2559 * For the purposes of debugging, identify a process (if any, perhaps one of 2560 * many) that references the passed file in its file descriptor array. Return 2561 * NULL if none. 2562 */ 2563 static struct proc * 2564 file_to_first_proc(struct file *fp) 2565 { 2566 struct filedesc *fdp; 2567 struct proc *p; 2568 int n; 2569 2570 FOREACH_PROC_IN_SYSTEM(p) { 2571 if (p->p_state == PRS_NEW) 2572 continue; 2573 fdp = p->p_fd; 2574 if (fdp == NULL) 2575 continue; 2576 for (n = 0; n < fdp->fd_nfiles; n++) { 2577 if (fp == fdp->fd_ofiles[n]) 2578 return (p); 2579 } 2580 } 2581 return (NULL); 2582 } 2583 2584 static void 2585 db_print_file(struct file *fp, int header) 2586 { 2587 struct proc *p; 2588 2589 if (header) 2590 db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n", 2591 "File", "Type", "Data", "Flag", "GCFl", "Count", 2592 "MCount", "Vnode", "FPID", "FCmd"); 2593 p = file_to_first_proc(fp); 2594 db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp, 2595 file_type_to_name(fp->f_type), fp->f_data, fp->f_flag, 2596 fp->f_gcflag, fp->f_count, fp->f_msgcount, fp->f_vnode, 2597 p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-"); 2598 } 2599 2600 DB_SHOW_COMMAND(file, db_show_file) 2601 { 2602 struct file *fp; 2603 2604 if (!have_addr) { 2605 db_printf("usage: show file <addr>\n"); 2606 return; 2607 } 2608 fp = (struct file *)addr; 2609 db_print_file(fp, 1); 2610 } 2611 2612 DB_SHOW_COMMAND(files, db_show_files) 2613 { 2614 struct file *fp; 2615 int header; 2616 2617 header = 1; 2618 LIST_FOREACH(fp, &filehead, f_list) { 2619 db_print_file(fp, header); 2620 header = 0; 2621 } 2622 } 2623 #endif 2624 2625 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, 2626 &maxfilesperproc, 0, "Maximum files allowed open per process"); 2627 2628 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, 2629 &maxfiles, 0, "Maximum number of files"); 2630 2631 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, 2632 &openfiles, 0, "System-wide number of open files"); 2633 2634 /* ARGSUSED*/ 2635 static void 2636 filelistinit(void *dummy) 2637 { 2638 2639 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, 2640 NULL, NULL, UMA_ALIGN_PTR, 0); 2641 sx_init(&filelist_lock, "filelist lock"); 2642 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); 2643 mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF); 2644 } 2645 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL) 2646 2647 /*-------------------------------------------------------------------*/ 2648 2649 static int 2650 badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) 2651 { 2652 2653 return (EBADF); 2654 } 2655 2656 static int 2657 badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td) 2658 { 2659 2660 return (EBADF); 2661 } 2662 2663 static int 2664 badfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) 2665 { 2666 2667 return (0); 2668 } 2669 2670 static int 2671 badfo_kqfilter(struct file *fp, struct knote *kn) 2672 { 2673 2674 return (EBADF); 2675 } 2676 2677 static int 2678 badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td) 2679 { 2680 2681 return (EBADF); 2682 } 2683 2684 static int 2685 badfo_close(struct file *fp, struct thread *td) 2686 { 2687 2688 return (EBADF); 2689 } 2690 2691 struct fileops badfileops = { 2692 .fo_read = badfo_readwrite, 2693 .fo_write = badfo_readwrite, 2694 .fo_ioctl = badfo_ioctl, 2695 .fo_poll = badfo_poll, 2696 .fo_kqfilter = badfo_kqfilter, 2697 .fo_stat = badfo_stat, 2698 .fo_close = badfo_close, 2699 }; 2700 2701 2702 /*-------------------------------------------------------------------*/ 2703 2704 /* 2705 * File Descriptor pseudo-device driver (/dev/fd/). 2706 * 2707 * Opening minor device N dup()s the file (if any) connected to file 2708 * descriptor N belonging to the calling process. Note that this driver 2709 * consists of only the ``open()'' routine, because all subsequent 2710 * references to this file will be direct to the other driver. 2711 * 2712 * XXX: we could give this one a cloning event handler if necessary. 2713 */ 2714 2715 /* ARGSUSED */ 2716 static int 2717 fdopen(struct cdev *dev, int mode, int type, struct thread *td) 2718 { 2719 2720 /* 2721 * XXX Kludge: set curthread->td_dupfd to contain the value of the 2722 * the file descriptor being sought for duplication. The error 2723 * return ensures that the vnode for this device will be released 2724 * by vn_open. Open will detect this special error and take the 2725 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 2726 * will simply report the error. 2727 */ 2728 td->td_dupfd = dev2unit(dev); 2729 return (ENODEV); 2730 } 2731 2732 static struct cdevsw fildesc_cdevsw = { 2733 .d_version = D_VERSION, 2734 .d_flags = D_NEEDGIANT, 2735 .d_open = fdopen, 2736 .d_name = "FD", 2737 }; 2738 2739 static void 2740 fildesc_drvinit(void *unused) 2741 { 2742 struct cdev *dev; 2743 2744 dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0"); 2745 make_dev_alias(dev, "stdin"); 2746 dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1"); 2747 make_dev_alias(dev, "stdout"); 2748 dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2"); 2749 make_dev_alias(dev, "stderr"); 2750 } 2751 2752 SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL) 2753