1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ddb.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 48 #include <sys/capsicum.h> 49 #include <sys/conf.h> 50 #include <sys/fcntl.h> 51 #include <sys/file.h> 52 #include <sys/filedesc.h> 53 #include <sys/filio.h> 54 #include <sys/jail.h> 55 #include <sys/kernel.h> 56 #include <sys/limits.h> 57 #include <sys/lock.h> 58 #include <sys/malloc.h> 59 #include <sys/mount.h> 60 #include <sys/mutex.h> 61 #include <sys/namei.h> 62 #include <sys/selinfo.h> 63 #include <sys/priv.h> 64 #include <sys/proc.h> 65 #include <sys/protosw.h> 66 #include <sys/racct.h> 67 #include <sys/resourcevar.h> 68 #include <sys/sbuf.h> 69 #include <sys/signalvar.h> 70 #include <sys/kdb.h> 71 #include <sys/stat.h> 72 #include <sys/sx.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #include <sys/sysproto.h> 76 #include <sys/unistd.h> 77 #include <sys/user.h> 78 #include <sys/vnode.h> 79 #ifdef KTRACE 80 #include <sys/ktrace.h> 81 #endif 82 83 #include <net/vnet.h> 84 85 #include <security/audit/audit.h> 86 87 #include <vm/uma.h> 88 #include <vm/vm.h> 89 90 #include <ddb/ddb.h> 91 92 static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table"); 93 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader", 94 "file desc to leader structures"); 95 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); 96 MALLOC_DEFINE(M_FILECAPS, "filecaps", "descriptor capabilities"); 97 98 MALLOC_DECLARE(M_FADVISE); 99 100 static __read_mostly uma_zone_t file_zone; 101 static __read_mostly uma_zone_t filedesc0_zone; 102 103 static int closefp(struct filedesc *fdp, int fd, struct file *fp, 104 struct thread *td, int holdleaders); 105 static int fd_first_free(struct filedesc *fdp, int low, int size); 106 static int fd_last_used(struct filedesc *fdp, int size); 107 static void fdgrowtable(struct filedesc *fdp, int nfd); 108 static void fdgrowtable_exp(struct filedesc *fdp, int nfd); 109 static void fdunused(struct filedesc *fdp, int fd); 110 static void fdused(struct filedesc *fdp, int fd); 111 static int getmaxfd(struct thread *td); 112 113 /* 114 * Each process has: 115 * 116 * - An array of open file descriptors (fd_ofiles) 117 * - An array of file flags (fd_ofileflags) 118 * - A bitmap recording which descriptors are in use (fd_map) 119 * 120 * A process starts out with NDFILE descriptors. The value of NDFILE has 121 * been selected based the historical limit of 20 open files, and an 122 * assumption that the majority of processes, especially short-lived 123 * processes like shells, will never need more. 124 * 125 * If this initial allocation is exhausted, a larger descriptor table and 126 * map are allocated dynamically, and the pointers in the process's struct 127 * filedesc are updated to point to those. This is repeated every time 128 * the process runs out of file descriptors (provided it hasn't hit its 129 * resource limit). 130 * 131 * Since threads may hold references to individual descriptor table 132 * entries, the tables are never freed. Instead, they are placed on a 133 * linked list and freed only when the struct filedesc is released. 134 */ 135 #define NDFILE 20 136 #define NDSLOTSIZE sizeof(NDSLOTTYPE) 137 #define NDENTRIES (NDSLOTSIZE * __CHAR_BIT) 138 #define NDSLOT(x) ((x) / NDENTRIES) 139 #define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES)) 140 #define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES) 141 142 /* 143 * SLIST entry used to keep track of ofiles which must be reclaimed when 144 * the process exits. 145 */ 146 struct freetable { 147 struct fdescenttbl *ft_table; 148 SLIST_ENTRY(freetable) ft_next; 149 }; 150 151 /* 152 * Initial allocation: a filedesc structure + the head of SLIST used to 153 * keep track of old ofiles + enough space for NDFILE descriptors. 154 */ 155 156 struct fdescenttbl0 { 157 int fdt_nfiles; 158 struct filedescent fdt_ofiles[NDFILE]; 159 }; 160 161 struct filedesc0 { 162 struct filedesc fd_fd; 163 SLIST_HEAD(, freetable) fd_free; 164 struct fdescenttbl0 fd_dfiles; 165 NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; 166 }; 167 168 /* 169 * Descriptor management. 170 */ 171 volatile int __exclusive_cache_line openfiles; /* actual number of open files */ 172 struct mtx sigio_lock; /* mtx to protect pointers to sigio */ 173 void __read_mostly (*mq_fdclose)(struct thread *td, int fd, struct file *fp); 174 175 /* 176 * If low >= size, just return low. Otherwise find the first zero bit in the 177 * given bitmap, starting at low and not exceeding size - 1. Return size if 178 * not found. 179 */ 180 static int 181 fd_first_free(struct filedesc *fdp, int low, int size) 182 { 183 NDSLOTTYPE *map = fdp->fd_map; 184 NDSLOTTYPE mask; 185 int off, maxoff; 186 187 if (low >= size) 188 return (low); 189 190 off = NDSLOT(low); 191 if (low % NDENTRIES) { 192 mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES))); 193 if ((mask &= ~map[off]) != 0UL) 194 return (off * NDENTRIES + ffsl(mask) - 1); 195 ++off; 196 } 197 for (maxoff = NDSLOTS(size); off < maxoff; ++off) 198 if (map[off] != ~0UL) 199 return (off * NDENTRIES + ffsl(~map[off]) - 1); 200 return (size); 201 } 202 203 /* 204 * Find the highest non-zero bit in the given bitmap, starting at 0 and 205 * not exceeding size - 1. Return -1 if not found. 206 */ 207 static int 208 fd_last_used(struct filedesc *fdp, int size) 209 { 210 NDSLOTTYPE *map = fdp->fd_map; 211 NDSLOTTYPE mask; 212 int off, minoff; 213 214 off = NDSLOT(size); 215 if (size % NDENTRIES) { 216 mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES)); 217 if ((mask &= map[off]) != 0) 218 return (off * NDENTRIES + flsl(mask) - 1); 219 --off; 220 } 221 for (minoff = NDSLOT(0); off >= minoff; --off) 222 if (map[off] != 0) 223 return (off * NDENTRIES + flsl(map[off]) - 1); 224 return (-1); 225 } 226 227 static int 228 fdisused(struct filedesc *fdp, int fd) 229 { 230 231 KASSERT(fd >= 0 && fd < fdp->fd_nfiles, 232 ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles)); 233 234 return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0); 235 } 236 237 /* 238 * Mark a file descriptor as used. 239 */ 240 static void 241 fdused_init(struct filedesc *fdp, int fd) 242 { 243 244 KASSERT(!fdisused(fdp, fd), ("fd=%d is already used", fd)); 245 246 fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd); 247 } 248 249 static void 250 fdused(struct filedesc *fdp, int fd) 251 { 252 253 FILEDESC_XLOCK_ASSERT(fdp); 254 255 fdused_init(fdp, fd); 256 if (fd > fdp->fd_lastfile) 257 fdp->fd_lastfile = fd; 258 if (fd == fdp->fd_freefile) 259 fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles); 260 } 261 262 /* 263 * Mark a file descriptor as unused. 264 */ 265 static void 266 fdunused(struct filedesc *fdp, int fd) 267 { 268 269 FILEDESC_XLOCK_ASSERT(fdp); 270 271 KASSERT(fdisused(fdp, fd), ("fd=%d is already unused", fd)); 272 KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, 273 ("fd=%d is still in use", fd)); 274 275 fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); 276 if (fd < fdp->fd_freefile) 277 fdp->fd_freefile = fd; 278 if (fd == fdp->fd_lastfile) 279 fdp->fd_lastfile = fd_last_used(fdp, fd); 280 } 281 282 /* 283 * Free a file descriptor. 284 * 285 * Avoid some work if fdp is about to be destroyed. 286 */ 287 static inline void 288 fdefree_last(struct filedescent *fde) 289 { 290 291 filecaps_free(&fde->fde_caps); 292 } 293 294 static inline void 295 fdfree(struct filedesc *fdp, int fd) 296 { 297 struct filedescent *fde; 298 299 fde = &fdp->fd_ofiles[fd]; 300 #ifdef CAPABILITIES 301 seq_write_begin(&fde->fde_seq); 302 #endif 303 fdefree_last(fde); 304 fde->fde_file = NULL; 305 fdunused(fdp, fd); 306 #ifdef CAPABILITIES 307 seq_write_end(&fde->fde_seq); 308 #endif 309 } 310 311 void 312 pwd_ensure_dirs(void) 313 { 314 struct filedesc *fdp; 315 316 fdp = curproc->p_fd; 317 FILEDESC_XLOCK(fdp); 318 if (fdp->fd_cdir == NULL) { 319 fdp->fd_cdir = rootvnode; 320 vrefact(rootvnode); 321 } 322 if (fdp->fd_rdir == NULL) { 323 fdp->fd_rdir = rootvnode; 324 vrefact(rootvnode); 325 } 326 FILEDESC_XUNLOCK(fdp); 327 } 328 329 /* 330 * System calls on descriptors. 331 */ 332 #ifndef _SYS_SYSPROTO_H_ 333 struct getdtablesize_args { 334 int dummy; 335 }; 336 #endif 337 /* ARGSUSED */ 338 int 339 sys_getdtablesize(struct thread *td, struct getdtablesize_args *uap) 340 { 341 #ifdef RACCT 342 uint64_t lim; 343 #endif 344 345 td->td_retval[0] = 346 min((int)lim_cur(td, RLIMIT_NOFILE), maxfilesperproc); 347 #ifdef RACCT 348 PROC_LOCK(td->td_proc); 349 lim = racct_get_limit(td->td_proc, RACCT_NOFILE); 350 PROC_UNLOCK(td->td_proc); 351 if (lim < td->td_retval[0]) 352 td->td_retval[0] = lim; 353 #endif 354 return (0); 355 } 356 357 /* 358 * Duplicate a file descriptor to a particular value. 359 * 360 * Note: keep in mind that a potential race condition exists when closing 361 * descriptors from a shared descriptor table (via rfork). 362 */ 363 #ifndef _SYS_SYSPROTO_H_ 364 struct dup2_args { 365 u_int from; 366 u_int to; 367 }; 368 #endif 369 /* ARGSUSED */ 370 int 371 sys_dup2(struct thread *td, struct dup2_args *uap) 372 { 373 374 return (kern_dup(td, FDDUP_FIXED, 0, (int)uap->from, (int)uap->to)); 375 } 376 377 /* 378 * Duplicate a file descriptor. 379 */ 380 #ifndef _SYS_SYSPROTO_H_ 381 struct dup_args { 382 u_int fd; 383 }; 384 #endif 385 /* ARGSUSED */ 386 int 387 sys_dup(struct thread *td, struct dup_args *uap) 388 { 389 390 return (kern_dup(td, FDDUP_NORMAL, 0, (int)uap->fd, 0)); 391 } 392 393 /* 394 * The file control system call. 395 */ 396 #ifndef _SYS_SYSPROTO_H_ 397 struct fcntl_args { 398 int fd; 399 int cmd; 400 long arg; 401 }; 402 #endif 403 /* ARGSUSED */ 404 int 405 sys_fcntl(struct thread *td, struct fcntl_args *uap) 406 { 407 408 return (kern_fcntl_freebsd(td, uap->fd, uap->cmd, uap->arg)); 409 } 410 411 int 412 kern_fcntl_freebsd(struct thread *td, int fd, int cmd, long arg) 413 { 414 struct flock fl; 415 struct __oflock ofl; 416 intptr_t arg1; 417 int error, newcmd; 418 419 error = 0; 420 newcmd = cmd; 421 switch (cmd) { 422 case F_OGETLK: 423 case F_OSETLK: 424 case F_OSETLKW: 425 /* 426 * Convert old flock structure to new. 427 */ 428 error = copyin((void *)(intptr_t)arg, &ofl, sizeof(ofl)); 429 fl.l_start = ofl.l_start; 430 fl.l_len = ofl.l_len; 431 fl.l_pid = ofl.l_pid; 432 fl.l_type = ofl.l_type; 433 fl.l_whence = ofl.l_whence; 434 fl.l_sysid = 0; 435 436 switch (cmd) { 437 case F_OGETLK: 438 newcmd = F_GETLK; 439 break; 440 case F_OSETLK: 441 newcmd = F_SETLK; 442 break; 443 case F_OSETLKW: 444 newcmd = F_SETLKW; 445 break; 446 } 447 arg1 = (intptr_t)&fl; 448 break; 449 case F_GETLK: 450 case F_SETLK: 451 case F_SETLKW: 452 case F_SETLK_REMOTE: 453 error = copyin((void *)(intptr_t)arg, &fl, sizeof(fl)); 454 arg1 = (intptr_t)&fl; 455 break; 456 default: 457 arg1 = arg; 458 break; 459 } 460 if (error) 461 return (error); 462 error = kern_fcntl(td, fd, newcmd, arg1); 463 if (error) 464 return (error); 465 if (cmd == F_OGETLK) { 466 ofl.l_start = fl.l_start; 467 ofl.l_len = fl.l_len; 468 ofl.l_pid = fl.l_pid; 469 ofl.l_type = fl.l_type; 470 ofl.l_whence = fl.l_whence; 471 error = copyout(&ofl, (void *)(intptr_t)arg, sizeof(ofl)); 472 } else if (cmd == F_GETLK) { 473 error = copyout(&fl, (void *)(intptr_t)arg, sizeof(fl)); 474 } 475 return (error); 476 } 477 478 int 479 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) 480 { 481 struct filedesc *fdp; 482 struct flock *flp; 483 struct file *fp, *fp2; 484 struct filedescent *fde; 485 struct proc *p; 486 struct vnode *vp; 487 cap_rights_t rights; 488 int error, flg, tmp; 489 uint64_t bsize; 490 off_t foffset; 491 492 error = 0; 493 flg = F_POSIX; 494 p = td->td_proc; 495 fdp = p->p_fd; 496 497 AUDIT_ARG_FD(cmd); 498 AUDIT_ARG_CMD(cmd); 499 switch (cmd) { 500 case F_DUPFD: 501 tmp = arg; 502 error = kern_dup(td, FDDUP_FCNTL, 0, fd, tmp); 503 break; 504 505 case F_DUPFD_CLOEXEC: 506 tmp = arg; 507 error = kern_dup(td, FDDUP_FCNTL, FDDUP_FLAG_CLOEXEC, fd, tmp); 508 break; 509 510 case F_DUP2FD: 511 tmp = arg; 512 error = kern_dup(td, FDDUP_FIXED, 0, fd, tmp); 513 break; 514 515 case F_DUP2FD_CLOEXEC: 516 tmp = arg; 517 error = kern_dup(td, FDDUP_FIXED, FDDUP_FLAG_CLOEXEC, fd, tmp); 518 break; 519 520 case F_GETFD: 521 error = EBADF; 522 FILEDESC_SLOCK(fdp); 523 fde = fdeget_locked(fdp, fd); 524 if (fde != NULL) { 525 td->td_retval[0] = 526 (fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0; 527 error = 0; 528 } 529 FILEDESC_SUNLOCK(fdp); 530 break; 531 532 case F_SETFD: 533 error = EBADF; 534 FILEDESC_XLOCK(fdp); 535 fde = fdeget_locked(fdp, fd); 536 if (fde != NULL) { 537 fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) | 538 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); 539 error = 0; 540 } 541 FILEDESC_XUNLOCK(fdp); 542 break; 543 544 case F_GETFL: 545 error = fget_fcntl(td, fd, 546 cap_rights_init(&rights, CAP_FCNTL), F_GETFL, &fp); 547 if (error != 0) 548 break; 549 td->td_retval[0] = OFLAGS(fp->f_flag); 550 fdrop(fp, td); 551 break; 552 553 case F_SETFL: 554 error = fget_fcntl(td, fd, 555 cap_rights_init(&rights, CAP_FCNTL), F_SETFL, &fp); 556 if (error != 0) 557 break; 558 do { 559 tmp = flg = fp->f_flag; 560 tmp &= ~FCNTLFLAGS; 561 tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; 562 } while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0); 563 tmp = fp->f_flag & FNONBLOCK; 564 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 565 if (error != 0) { 566 fdrop(fp, td); 567 break; 568 } 569 tmp = fp->f_flag & FASYNC; 570 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); 571 if (error == 0) { 572 fdrop(fp, td); 573 break; 574 } 575 atomic_clear_int(&fp->f_flag, FNONBLOCK); 576 tmp = 0; 577 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 578 fdrop(fp, td); 579 break; 580 581 case F_GETOWN: 582 error = fget_fcntl(td, fd, 583 cap_rights_init(&rights, CAP_FCNTL), F_GETOWN, &fp); 584 if (error != 0) 585 break; 586 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); 587 if (error == 0) 588 td->td_retval[0] = tmp; 589 fdrop(fp, td); 590 break; 591 592 case F_SETOWN: 593 error = fget_fcntl(td, fd, 594 cap_rights_init(&rights, CAP_FCNTL), F_SETOWN, &fp); 595 if (error != 0) 596 break; 597 tmp = arg; 598 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); 599 fdrop(fp, td); 600 break; 601 602 case F_SETLK_REMOTE: 603 error = priv_check(td, PRIV_NFS_LOCKD); 604 if (error) 605 return (error); 606 flg = F_REMOTE; 607 goto do_setlk; 608 609 case F_SETLKW: 610 flg |= F_WAIT; 611 /* FALLTHROUGH F_SETLK */ 612 613 case F_SETLK: 614 do_setlk: 615 cap_rights_init(&rights, CAP_FLOCK); 616 error = fget_unlocked(fdp, fd, &rights, &fp, NULL); 617 if (error != 0) 618 break; 619 if (fp->f_type != DTYPE_VNODE) { 620 error = EBADF; 621 fdrop(fp, td); 622 break; 623 } 624 625 flp = (struct flock *)arg; 626 if (flp->l_whence == SEEK_CUR) { 627 foffset = foffset_get(fp); 628 if (foffset < 0 || 629 (flp->l_start > 0 && 630 foffset > OFF_MAX - flp->l_start)) { 631 error = EOVERFLOW; 632 fdrop(fp, td); 633 break; 634 } 635 flp->l_start += foffset; 636 } 637 638 vp = fp->f_vnode; 639 switch (flp->l_type) { 640 case F_RDLCK: 641 if ((fp->f_flag & FREAD) == 0) { 642 error = EBADF; 643 break; 644 } 645 PROC_LOCK(p->p_leader); 646 p->p_leader->p_flag |= P_ADVLOCK; 647 PROC_UNLOCK(p->p_leader); 648 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 649 flp, flg); 650 break; 651 case F_WRLCK: 652 if ((fp->f_flag & FWRITE) == 0) { 653 error = EBADF; 654 break; 655 } 656 PROC_LOCK(p->p_leader); 657 p->p_leader->p_flag |= P_ADVLOCK; 658 PROC_UNLOCK(p->p_leader); 659 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 660 flp, flg); 661 break; 662 case F_UNLCK: 663 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, 664 flp, flg); 665 break; 666 case F_UNLCKSYS: 667 /* 668 * Temporary api for testing remote lock 669 * infrastructure. 670 */ 671 if (flg != F_REMOTE) { 672 error = EINVAL; 673 break; 674 } 675 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 676 F_UNLCKSYS, flp, flg); 677 break; 678 default: 679 error = EINVAL; 680 break; 681 } 682 if (error != 0 || flp->l_type == F_UNLCK || 683 flp->l_type == F_UNLCKSYS) { 684 fdrop(fp, td); 685 break; 686 } 687 688 /* 689 * Check for a race with close. 690 * 691 * The vnode is now advisory locked (or unlocked, but this case 692 * is not really important) as the caller requested. 693 * We had to drop the filedesc lock, so we need to recheck if 694 * the descriptor is still valid, because if it was closed 695 * in the meantime we need to remove advisory lock from the 696 * vnode - close on any descriptor leading to an advisory 697 * locked vnode, removes that lock. 698 * We will return 0 on purpose in that case, as the result of 699 * successful advisory lock might have been externally visible 700 * already. This is fine - effectively we pretend to the caller 701 * that the closing thread was a bit slower and that the 702 * advisory lock succeeded before the close. 703 */ 704 error = fget_unlocked(fdp, fd, &rights, &fp2, NULL); 705 if (error != 0) { 706 fdrop(fp, td); 707 break; 708 } 709 if (fp != fp2) { 710 flp->l_whence = SEEK_SET; 711 flp->l_start = 0; 712 flp->l_len = 0; 713 flp->l_type = F_UNLCK; 714 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 715 F_UNLCK, flp, F_POSIX); 716 } 717 fdrop(fp, td); 718 fdrop(fp2, td); 719 break; 720 721 case F_GETLK: 722 error = fget_unlocked(fdp, fd, 723 cap_rights_init(&rights, CAP_FLOCK), &fp, NULL); 724 if (error != 0) 725 break; 726 if (fp->f_type != DTYPE_VNODE) { 727 error = EBADF; 728 fdrop(fp, td); 729 break; 730 } 731 flp = (struct flock *)arg; 732 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && 733 flp->l_type != F_UNLCK) { 734 error = EINVAL; 735 fdrop(fp, td); 736 break; 737 } 738 if (flp->l_whence == SEEK_CUR) { 739 foffset = foffset_get(fp); 740 if ((flp->l_start > 0 && 741 foffset > OFF_MAX - flp->l_start) || 742 (flp->l_start < 0 && 743 foffset < OFF_MIN - flp->l_start)) { 744 error = EOVERFLOW; 745 fdrop(fp, td); 746 break; 747 } 748 flp->l_start += foffset; 749 } 750 vp = fp->f_vnode; 751 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, 752 F_POSIX); 753 fdrop(fp, td); 754 break; 755 756 case F_RDAHEAD: 757 arg = arg ? 128 * 1024: 0; 758 /* FALLTHROUGH */ 759 case F_READAHEAD: 760 error = fget_unlocked(fdp, fd, 761 cap_rights_init(&rights), &fp, NULL); 762 if (error != 0) 763 break; 764 if (fp->f_type != DTYPE_VNODE) { 765 fdrop(fp, td); 766 error = EBADF; 767 break; 768 } 769 vp = fp->f_vnode; 770 /* 771 * Exclusive lock synchronizes against f_seqcount reads and 772 * writes in sequential_heuristic(). 773 */ 774 error = vn_lock(vp, LK_EXCLUSIVE); 775 if (error != 0) { 776 fdrop(fp, td); 777 break; 778 } 779 if (arg >= 0) { 780 bsize = fp->f_vnode->v_mount->mnt_stat.f_iosize; 781 fp->f_seqcount = (arg + bsize - 1) / bsize; 782 atomic_set_int(&fp->f_flag, FRDAHEAD); 783 } else { 784 atomic_clear_int(&fp->f_flag, FRDAHEAD); 785 } 786 VOP_UNLOCK(vp, 0); 787 fdrop(fp, td); 788 break; 789 790 default: 791 error = EINVAL; 792 break; 793 } 794 return (error); 795 } 796 797 static int 798 getmaxfd(struct thread *td) 799 { 800 801 return (min((int)lim_cur(td, RLIMIT_NOFILE), maxfilesperproc)); 802 } 803 804 /* 805 * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD). 806 */ 807 int 808 kern_dup(struct thread *td, u_int mode, int flags, int old, int new) 809 { 810 struct filedesc *fdp; 811 struct filedescent *oldfde, *newfde; 812 struct proc *p; 813 struct file *delfp; 814 int error, maxfd; 815 816 p = td->td_proc; 817 fdp = p->p_fd; 818 819 MPASS((flags & ~(FDDUP_FLAG_CLOEXEC)) == 0); 820 MPASS(mode < FDDUP_LASTMODE); 821 822 AUDIT_ARG_FD(old); 823 /* XXXRW: if (flags & FDDUP_FIXED) AUDIT_ARG_FD2(new); */ 824 825 /* 826 * Verify we have a valid descriptor to dup from and possibly to 827 * dup to. Unlike dup() and dup2(), fcntl()'s F_DUPFD should 828 * return EINVAL when the new descriptor is out of bounds. 829 */ 830 if (old < 0) 831 return (EBADF); 832 if (new < 0) 833 return (mode == FDDUP_FCNTL ? EINVAL : EBADF); 834 maxfd = getmaxfd(td); 835 if (new >= maxfd) 836 return (mode == FDDUP_FCNTL ? EINVAL : EBADF); 837 838 error = EBADF; 839 FILEDESC_XLOCK(fdp); 840 if (fget_locked(fdp, old) == NULL) 841 goto unlock; 842 if ((mode == FDDUP_FIXED || mode == FDDUP_MUSTREPLACE) && old == new) { 843 td->td_retval[0] = new; 844 if (flags & FDDUP_FLAG_CLOEXEC) 845 fdp->fd_ofiles[new].fde_flags |= UF_EXCLOSE; 846 error = 0; 847 goto unlock; 848 } 849 850 /* 851 * If the caller specified a file descriptor, make sure the file 852 * table is large enough to hold it, and grab it. Otherwise, just 853 * allocate a new descriptor the usual way. 854 */ 855 switch (mode) { 856 case FDDUP_NORMAL: 857 case FDDUP_FCNTL: 858 if ((error = fdalloc(td, new, &new)) != 0) 859 goto unlock; 860 break; 861 case FDDUP_MUSTREPLACE: 862 /* Target file descriptor must exist. */ 863 if (fget_locked(fdp, new) == NULL) 864 goto unlock; 865 break; 866 case FDDUP_FIXED: 867 if (new >= fdp->fd_nfiles) { 868 /* 869 * The resource limits are here instead of e.g. 870 * fdalloc(), because the file descriptor table may be 871 * shared between processes, so we can't really use 872 * racct_add()/racct_sub(). Instead of counting the 873 * number of actually allocated descriptors, just put 874 * the limit on the size of the file descriptor table. 875 */ 876 #ifdef RACCT 877 if (racct_enable) { 878 PROC_LOCK(p); 879 error = racct_set(p, RACCT_NOFILE, new + 1); 880 PROC_UNLOCK(p); 881 if (error != 0) { 882 error = EMFILE; 883 goto unlock; 884 } 885 } 886 #endif 887 fdgrowtable_exp(fdp, new + 1); 888 } 889 if (!fdisused(fdp, new)) 890 fdused(fdp, new); 891 break; 892 default: 893 KASSERT(0, ("%s unsupported mode %d", __func__, mode)); 894 } 895 896 KASSERT(old != new, ("new fd is same as old")); 897 898 oldfde = &fdp->fd_ofiles[old]; 899 fhold(oldfde->fde_file); 900 newfde = &fdp->fd_ofiles[new]; 901 delfp = newfde->fde_file; 902 903 /* 904 * Duplicate the source descriptor. 905 */ 906 #ifdef CAPABILITIES 907 seq_write_begin(&newfde->fde_seq); 908 #endif 909 filecaps_free(&newfde->fde_caps); 910 memcpy(newfde, oldfde, fde_change_size); 911 filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps, true); 912 if ((flags & FDDUP_FLAG_CLOEXEC) != 0) 913 newfde->fde_flags = oldfde->fde_flags | UF_EXCLOSE; 914 else 915 newfde->fde_flags = oldfde->fde_flags & ~UF_EXCLOSE; 916 #ifdef CAPABILITIES 917 seq_write_end(&newfde->fde_seq); 918 #endif 919 td->td_retval[0] = new; 920 921 error = 0; 922 923 if (delfp != NULL) { 924 (void) closefp(fdp, new, delfp, td, 1); 925 FILEDESC_UNLOCK_ASSERT(fdp); 926 } else { 927 unlock: 928 FILEDESC_XUNLOCK(fdp); 929 } 930 931 return (error); 932 } 933 934 /* 935 * If sigio is on the list associated with a process or process group, 936 * disable signalling from the device, remove sigio from the list and 937 * free sigio. 938 */ 939 void 940 funsetown(struct sigio **sigiop) 941 { 942 struct sigio *sigio; 943 944 if (*sigiop == NULL) 945 return; 946 SIGIO_LOCK(); 947 sigio = *sigiop; 948 if (sigio == NULL) { 949 SIGIO_UNLOCK(); 950 return; 951 } 952 *(sigio->sio_myref) = NULL; 953 if ((sigio)->sio_pgid < 0) { 954 struct pgrp *pg = (sigio)->sio_pgrp; 955 PGRP_LOCK(pg); 956 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, 957 sigio, sio_pgsigio); 958 PGRP_UNLOCK(pg); 959 } else { 960 struct proc *p = (sigio)->sio_proc; 961 PROC_LOCK(p); 962 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, 963 sigio, sio_pgsigio); 964 PROC_UNLOCK(p); 965 } 966 SIGIO_UNLOCK(); 967 crfree(sigio->sio_ucred); 968 free(sigio, M_SIGIO); 969 } 970 971 /* 972 * Free a list of sigio structures. 973 * We only need to lock the SIGIO_LOCK because we have made ourselves 974 * inaccessible to callers of fsetown and therefore do not need to lock 975 * the proc or pgrp struct for the list manipulation. 976 */ 977 void 978 funsetownlst(struct sigiolst *sigiolst) 979 { 980 struct proc *p; 981 struct pgrp *pg; 982 struct sigio *sigio; 983 984 sigio = SLIST_FIRST(sigiolst); 985 if (sigio == NULL) 986 return; 987 p = NULL; 988 pg = NULL; 989 990 /* 991 * Every entry of the list should belong 992 * to a single proc or pgrp. 993 */ 994 if (sigio->sio_pgid < 0) { 995 pg = sigio->sio_pgrp; 996 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED); 997 } else /* if (sigio->sio_pgid > 0) */ { 998 p = sigio->sio_proc; 999 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 1000 } 1001 1002 SIGIO_LOCK(); 1003 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) { 1004 *(sigio->sio_myref) = NULL; 1005 if (pg != NULL) { 1006 KASSERT(sigio->sio_pgid < 0, 1007 ("Proc sigio in pgrp sigio list")); 1008 KASSERT(sigio->sio_pgrp == pg, 1009 ("Bogus pgrp in sigio list")); 1010 PGRP_LOCK(pg); 1011 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, 1012 sio_pgsigio); 1013 PGRP_UNLOCK(pg); 1014 } else /* if (p != NULL) */ { 1015 KASSERT(sigio->sio_pgid > 0, 1016 ("Pgrp sigio in proc sigio list")); 1017 KASSERT(sigio->sio_proc == p, 1018 ("Bogus proc in sigio list")); 1019 PROC_LOCK(p); 1020 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, 1021 sio_pgsigio); 1022 PROC_UNLOCK(p); 1023 } 1024 SIGIO_UNLOCK(); 1025 crfree(sigio->sio_ucred); 1026 free(sigio, M_SIGIO); 1027 SIGIO_LOCK(); 1028 } 1029 SIGIO_UNLOCK(); 1030 } 1031 1032 /* 1033 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). 1034 * 1035 * After permission checking, add a sigio structure to the sigio list for 1036 * the process or process group. 1037 */ 1038 int 1039 fsetown(pid_t pgid, struct sigio **sigiop) 1040 { 1041 struct proc *proc; 1042 struct pgrp *pgrp; 1043 struct sigio *sigio; 1044 int ret; 1045 1046 if (pgid == 0) { 1047 funsetown(sigiop); 1048 return (0); 1049 } 1050 1051 ret = 0; 1052 1053 /* Allocate and fill in the new sigio out of locks. */ 1054 sigio = malloc(sizeof(struct sigio), M_SIGIO, M_WAITOK); 1055 sigio->sio_pgid = pgid; 1056 sigio->sio_ucred = crhold(curthread->td_ucred); 1057 sigio->sio_myref = sigiop; 1058 1059 sx_slock(&proctree_lock); 1060 if (pgid > 0) { 1061 proc = pfind(pgid); 1062 if (proc == NULL) { 1063 ret = ESRCH; 1064 goto fail; 1065 } 1066 1067 /* 1068 * Policy - Don't allow a process to FSETOWN a process 1069 * in another session. 1070 * 1071 * Remove this test to allow maximum flexibility or 1072 * restrict FSETOWN to the current process or process 1073 * group for maximum safety. 1074 */ 1075 PROC_UNLOCK(proc); 1076 if (proc->p_session != curthread->td_proc->p_session) { 1077 ret = EPERM; 1078 goto fail; 1079 } 1080 1081 pgrp = NULL; 1082 } else /* if (pgid < 0) */ { 1083 pgrp = pgfind(-pgid); 1084 if (pgrp == NULL) { 1085 ret = ESRCH; 1086 goto fail; 1087 } 1088 PGRP_UNLOCK(pgrp); 1089 1090 /* 1091 * Policy - Don't allow a process to FSETOWN a process 1092 * in another session. 1093 * 1094 * Remove this test to allow maximum flexibility or 1095 * restrict FSETOWN to the current process or process 1096 * group for maximum safety. 1097 */ 1098 if (pgrp->pg_session != curthread->td_proc->p_session) { 1099 ret = EPERM; 1100 goto fail; 1101 } 1102 1103 proc = NULL; 1104 } 1105 funsetown(sigiop); 1106 if (pgid > 0) { 1107 PROC_LOCK(proc); 1108 /* 1109 * Since funsetownlst() is called without the proctree 1110 * locked, we need to check for P_WEXIT. 1111 * XXX: is ESRCH correct? 1112 */ 1113 if ((proc->p_flag & P_WEXIT) != 0) { 1114 PROC_UNLOCK(proc); 1115 ret = ESRCH; 1116 goto fail; 1117 } 1118 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); 1119 sigio->sio_proc = proc; 1120 PROC_UNLOCK(proc); 1121 } else { 1122 PGRP_LOCK(pgrp); 1123 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); 1124 sigio->sio_pgrp = pgrp; 1125 PGRP_UNLOCK(pgrp); 1126 } 1127 sx_sunlock(&proctree_lock); 1128 SIGIO_LOCK(); 1129 *sigiop = sigio; 1130 SIGIO_UNLOCK(); 1131 return (0); 1132 1133 fail: 1134 sx_sunlock(&proctree_lock); 1135 crfree(sigio->sio_ucred); 1136 free(sigio, M_SIGIO); 1137 return (ret); 1138 } 1139 1140 /* 1141 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). 1142 */ 1143 pid_t 1144 fgetown(sigiop) 1145 struct sigio **sigiop; 1146 { 1147 pid_t pgid; 1148 1149 SIGIO_LOCK(); 1150 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0; 1151 SIGIO_UNLOCK(); 1152 return (pgid); 1153 } 1154 1155 /* 1156 * Function drops the filedesc lock on return. 1157 */ 1158 static int 1159 closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, 1160 int holdleaders) 1161 { 1162 int error; 1163 1164 FILEDESC_XLOCK_ASSERT(fdp); 1165 1166 if (holdleaders) { 1167 if (td->td_proc->p_fdtol != NULL) { 1168 /* 1169 * Ask fdfree() to sleep to ensure that all relevant 1170 * process leaders can be traversed in closef(). 1171 */ 1172 fdp->fd_holdleaderscount++; 1173 } else { 1174 holdleaders = 0; 1175 } 1176 } 1177 1178 /* 1179 * We now hold the fp reference that used to be owned by the 1180 * descriptor array. We have to unlock the FILEDESC *AFTER* 1181 * knote_fdclose to prevent a race of the fd getting opened, a knote 1182 * added, and deleteing a knote for the new fd. 1183 */ 1184 knote_fdclose(td, fd); 1185 1186 /* 1187 * We need to notify mqueue if the object is of type mqueue. 1188 */ 1189 if (fp->f_type == DTYPE_MQUEUE) 1190 mq_fdclose(td, fd, fp); 1191 FILEDESC_XUNLOCK(fdp); 1192 1193 error = closef(fp, td); 1194 if (holdleaders) { 1195 FILEDESC_XLOCK(fdp); 1196 fdp->fd_holdleaderscount--; 1197 if (fdp->fd_holdleaderscount == 0 && 1198 fdp->fd_holdleaderswakeup != 0) { 1199 fdp->fd_holdleaderswakeup = 0; 1200 wakeup(&fdp->fd_holdleaderscount); 1201 } 1202 FILEDESC_XUNLOCK(fdp); 1203 } 1204 return (error); 1205 } 1206 1207 /* 1208 * Close a file descriptor. 1209 */ 1210 #ifndef _SYS_SYSPROTO_H_ 1211 struct close_args { 1212 int fd; 1213 }; 1214 #endif 1215 /* ARGSUSED */ 1216 int 1217 sys_close(struct thread *td, struct close_args *uap) 1218 { 1219 1220 return (kern_close(td, uap->fd)); 1221 } 1222 1223 int 1224 kern_close(struct thread *td, int fd) 1225 { 1226 struct filedesc *fdp; 1227 struct file *fp; 1228 1229 fdp = td->td_proc->p_fd; 1230 1231 AUDIT_SYSCLOSE(td, fd); 1232 1233 FILEDESC_XLOCK(fdp); 1234 if ((fp = fget_locked(fdp, fd)) == NULL) { 1235 FILEDESC_XUNLOCK(fdp); 1236 return (EBADF); 1237 } 1238 fdfree(fdp, fd); 1239 1240 /* closefp() drops the FILEDESC lock for us. */ 1241 return (closefp(fdp, fd, fp, td, 1)); 1242 } 1243 1244 /* 1245 * Close open file descriptors. 1246 */ 1247 #ifndef _SYS_SYSPROTO_H_ 1248 struct closefrom_args { 1249 int lowfd; 1250 }; 1251 #endif 1252 /* ARGSUSED */ 1253 int 1254 sys_closefrom(struct thread *td, struct closefrom_args *uap) 1255 { 1256 struct filedesc *fdp; 1257 int fd; 1258 1259 fdp = td->td_proc->p_fd; 1260 AUDIT_ARG_FD(uap->lowfd); 1261 1262 /* 1263 * Treat negative starting file descriptor values identical to 1264 * closefrom(0) which closes all files. 1265 */ 1266 if (uap->lowfd < 0) 1267 uap->lowfd = 0; 1268 FILEDESC_SLOCK(fdp); 1269 for (fd = uap->lowfd; fd <= fdp->fd_lastfile; fd++) { 1270 if (fdp->fd_ofiles[fd].fde_file != NULL) { 1271 FILEDESC_SUNLOCK(fdp); 1272 (void)kern_close(td, fd); 1273 FILEDESC_SLOCK(fdp); 1274 } 1275 } 1276 FILEDESC_SUNLOCK(fdp); 1277 return (0); 1278 } 1279 1280 #if defined(COMPAT_43) 1281 /* 1282 * Return status information about a file descriptor. 1283 */ 1284 #ifndef _SYS_SYSPROTO_H_ 1285 struct ofstat_args { 1286 int fd; 1287 struct ostat *sb; 1288 }; 1289 #endif 1290 /* ARGSUSED */ 1291 int 1292 ofstat(struct thread *td, struct ofstat_args *uap) 1293 { 1294 struct ostat oub; 1295 struct stat ub; 1296 int error; 1297 1298 error = kern_fstat(td, uap->fd, &ub); 1299 if (error == 0) { 1300 cvtstat(&ub, &oub); 1301 error = copyout(&oub, uap->sb, sizeof(oub)); 1302 } 1303 return (error); 1304 } 1305 #endif /* COMPAT_43 */ 1306 1307 #if defined(COMPAT_FREEBSD11) 1308 int 1309 freebsd11_fstat(struct thread *td, struct freebsd11_fstat_args *uap) 1310 { 1311 struct stat sb; 1312 struct freebsd11_stat osb; 1313 int error; 1314 1315 error = kern_fstat(td, uap->fd, &sb); 1316 if (error != 0) 1317 return (error); 1318 error = freebsd11_cvtstat(&sb, &osb); 1319 if (error == 0) 1320 error = copyout(&osb, uap->sb, sizeof(osb)); 1321 return (error); 1322 } 1323 #endif /* COMPAT_FREEBSD11 */ 1324 1325 /* 1326 * Return status information about a file descriptor. 1327 */ 1328 #ifndef _SYS_SYSPROTO_H_ 1329 struct fstat_args { 1330 int fd; 1331 struct stat *sb; 1332 }; 1333 #endif 1334 /* ARGSUSED */ 1335 int 1336 sys_fstat(struct thread *td, struct fstat_args *uap) 1337 { 1338 struct stat ub; 1339 int error; 1340 1341 error = kern_fstat(td, uap->fd, &ub); 1342 if (error == 0) 1343 error = copyout(&ub, uap->sb, sizeof(ub)); 1344 return (error); 1345 } 1346 1347 int 1348 kern_fstat(struct thread *td, int fd, struct stat *sbp) 1349 { 1350 struct file *fp; 1351 cap_rights_t rights; 1352 int error; 1353 1354 AUDIT_ARG_FD(fd); 1355 1356 error = fget(td, fd, cap_rights_init(&rights, CAP_FSTAT), &fp); 1357 if (error != 0) 1358 return (error); 1359 1360 AUDIT_ARG_FILE(td->td_proc, fp); 1361 1362 error = fo_stat(fp, sbp, td->td_ucred, td); 1363 fdrop(fp, td); 1364 #ifdef __STAT_TIME_T_EXT 1365 if (error == 0) { 1366 sbp->st_atim_ext = 0; 1367 sbp->st_mtim_ext = 0; 1368 sbp->st_ctim_ext = 0; 1369 sbp->st_btim_ext = 0; 1370 } 1371 #endif 1372 #ifdef KTRACE 1373 if (error == 0 && KTRPOINT(td, KTR_STRUCT)) 1374 ktrstat(sbp); 1375 #endif 1376 return (error); 1377 } 1378 1379 #if defined(COMPAT_FREEBSD11) 1380 /* 1381 * Return status information about a file descriptor. 1382 */ 1383 #ifndef _SYS_SYSPROTO_H_ 1384 struct freebsd11_nfstat_args { 1385 int fd; 1386 struct nstat *sb; 1387 }; 1388 #endif 1389 /* ARGSUSED */ 1390 int 1391 freebsd11_nfstat(struct thread *td, struct freebsd11_nfstat_args *uap) 1392 { 1393 struct nstat nub; 1394 struct stat ub; 1395 int error; 1396 1397 error = kern_fstat(td, uap->fd, &ub); 1398 if (error == 0) { 1399 freebsd11_cvtnstat(&ub, &nub); 1400 error = copyout(&nub, uap->sb, sizeof(nub)); 1401 } 1402 return (error); 1403 } 1404 #endif /* COMPAT_FREEBSD11 */ 1405 1406 /* 1407 * Return pathconf information about a file descriptor. 1408 */ 1409 #ifndef _SYS_SYSPROTO_H_ 1410 struct fpathconf_args { 1411 int fd; 1412 int name; 1413 }; 1414 #endif 1415 /* ARGSUSED */ 1416 int 1417 sys_fpathconf(struct thread *td, struct fpathconf_args *uap) 1418 { 1419 struct file *fp; 1420 struct vnode *vp; 1421 cap_rights_t rights; 1422 int error; 1423 1424 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FPATHCONF), &fp); 1425 if (error != 0) 1426 return (error); 1427 1428 if (uap->name == _PC_ASYNC_IO) { 1429 td->td_retval[0] = _POSIX_ASYNCHRONOUS_IO; 1430 goto out; 1431 } 1432 vp = fp->f_vnode; 1433 if (vp != NULL) { 1434 vn_lock(vp, LK_SHARED | LK_RETRY); 1435 error = VOP_PATHCONF(vp, uap->name, td->td_retval); 1436 VOP_UNLOCK(vp, 0); 1437 } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1438 if (uap->name != _PC_PIPE_BUF) { 1439 error = EINVAL; 1440 } else { 1441 td->td_retval[0] = PIPE_BUF; 1442 error = 0; 1443 } 1444 } else { 1445 error = EOPNOTSUPP; 1446 } 1447 out: 1448 fdrop(fp, td); 1449 return (error); 1450 } 1451 1452 /* 1453 * Initialize filecaps structure. 1454 */ 1455 void 1456 filecaps_init(struct filecaps *fcaps) 1457 { 1458 1459 bzero(fcaps, sizeof(*fcaps)); 1460 fcaps->fc_nioctls = -1; 1461 } 1462 1463 /* 1464 * Copy filecaps structure allocating memory for ioctls array if needed. 1465 * 1466 * The last parameter indicates whether the fdtable is locked. If it is not and 1467 * ioctls are encountered, copying fails and the caller must lock the table. 1468 * 1469 * Note that if the table was not locked, the caller has to check the relevant 1470 * sequence counter to determine whether the operation was successful. 1471 */ 1472 int 1473 filecaps_copy(const struct filecaps *src, struct filecaps *dst, bool locked) 1474 { 1475 size_t size; 1476 1477 *dst = *src; 1478 if (src->fc_ioctls == NULL) 1479 return (0); 1480 if (!locked) 1481 return (1); 1482 1483 KASSERT(src->fc_nioctls > 0, 1484 ("fc_ioctls != NULL, but fc_nioctls=%hd", src->fc_nioctls)); 1485 1486 size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls; 1487 dst->fc_ioctls = malloc(size, M_FILECAPS, M_WAITOK); 1488 bcopy(src->fc_ioctls, dst->fc_ioctls, size); 1489 return (0); 1490 } 1491 1492 /* 1493 * Move filecaps structure to the new place and clear the old place. 1494 */ 1495 void 1496 filecaps_move(struct filecaps *src, struct filecaps *dst) 1497 { 1498 1499 *dst = *src; 1500 bzero(src, sizeof(*src)); 1501 } 1502 1503 /* 1504 * Fill the given filecaps structure with full rights. 1505 */ 1506 static void 1507 filecaps_fill(struct filecaps *fcaps) 1508 { 1509 1510 CAP_ALL(&fcaps->fc_rights); 1511 fcaps->fc_ioctls = NULL; 1512 fcaps->fc_nioctls = -1; 1513 fcaps->fc_fcntls = CAP_FCNTL_ALL; 1514 } 1515 1516 /* 1517 * Free memory allocated within filecaps structure. 1518 */ 1519 void 1520 filecaps_free(struct filecaps *fcaps) 1521 { 1522 1523 free(fcaps->fc_ioctls, M_FILECAPS); 1524 bzero(fcaps, sizeof(*fcaps)); 1525 } 1526 1527 /* 1528 * Validate the given filecaps structure. 1529 */ 1530 static void 1531 filecaps_validate(const struct filecaps *fcaps, const char *func) 1532 { 1533 1534 KASSERT(cap_rights_is_valid(&fcaps->fc_rights), 1535 ("%s: invalid rights", func)); 1536 KASSERT((fcaps->fc_fcntls & ~CAP_FCNTL_ALL) == 0, 1537 ("%s: invalid fcntls", func)); 1538 KASSERT(fcaps->fc_fcntls == 0 || 1539 cap_rights_is_set(&fcaps->fc_rights, CAP_FCNTL), 1540 ("%s: fcntls without CAP_FCNTL", func)); 1541 KASSERT(fcaps->fc_ioctls != NULL ? fcaps->fc_nioctls > 0 : 1542 (fcaps->fc_nioctls == -1 || fcaps->fc_nioctls == 0), 1543 ("%s: invalid ioctls", func)); 1544 KASSERT(fcaps->fc_nioctls == 0 || 1545 cap_rights_is_set(&fcaps->fc_rights, CAP_IOCTL), 1546 ("%s: ioctls without CAP_IOCTL", func)); 1547 } 1548 1549 static void 1550 fdgrowtable_exp(struct filedesc *fdp, int nfd) 1551 { 1552 int nfd1; 1553 1554 FILEDESC_XLOCK_ASSERT(fdp); 1555 1556 nfd1 = fdp->fd_nfiles * 2; 1557 if (nfd1 < nfd) 1558 nfd1 = nfd; 1559 fdgrowtable(fdp, nfd1); 1560 } 1561 1562 /* 1563 * Grow the file table to accommodate (at least) nfd descriptors. 1564 */ 1565 static void 1566 fdgrowtable(struct filedesc *fdp, int nfd) 1567 { 1568 struct filedesc0 *fdp0; 1569 struct freetable *ft; 1570 struct fdescenttbl *ntable; 1571 struct fdescenttbl *otable; 1572 int nnfiles, onfiles; 1573 NDSLOTTYPE *nmap, *omap; 1574 1575 /* 1576 * If lastfile is -1 this struct filedesc was just allocated and we are 1577 * growing it to accommodate for the one we are going to copy from. There 1578 * is no need to have a lock on this one as it's not visible to anyone. 1579 */ 1580 if (fdp->fd_lastfile != -1) 1581 FILEDESC_XLOCK_ASSERT(fdp); 1582 1583 KASSERT(fdp->fd_nfiles > 0, ("zero-length file table")); 1584 1585 /* save old values */ 1586 onfiles = fdp->fd_nfiles; 1587 otable = fdp->fd_files; 1588 omap = fdp->fd_map; 1589 1590 /* compute the size of the new table */ 1591 nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */ 1592 if (nnfiles <= onfiles) 1593 /* the table is already large enough */ 1594 return; 1595 1596 /* 1597 * Allocate a new table. We need enough space for the number of 1598 * entries, file entries themselves and the struct freetable we will use 1599 * when we decommission the table and place it on the freelist. 1600 * We place the struct freetable in the middle so we don't have 1601 * to worry about padding. 1602 */ 1603 ntable = malloc(offsetof(struct fdescenttbl, fdt_ofiles) + 1604 nnfiles * sizeof(ntable->fdt_ofiles[0]) + 1605 sizeof(struct freetable), 1606 M_FILEDESC, M_ZERO | M_WAITOK); 1607 /* copy the old data */ 1608 ntable->fdt_nfiles = nnfiles; 1609 memcpy(ntable->fdt_ofiles, otable->fdt_ofiles, 1610 onfiles * sizeof(ntable->fdt_ofiles[0])); 1611 1612 /* 1613 * Allocate a new map only if the old is not large enough. It will 1614 * grow at a slower rate than the table as it can map more 1615 * entries than the table can hold. 1616 */ 1617 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) { 1618 nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE, M_FILEDESC, 1619 M_ZERO | M_WAITOK); 1620 /* copy over the old data and update the pointer */ 1621 memcpy(nmap, omap, NDSLOTS(onfiles) * sizeof(*omap)); 1622 fdp->fd_map = nmap; 1623 } 1624 1625 /* 1626 * Make sure that ntable is correctly initialized before we replace 1627 * fd_files poiner. Otherwise fget_unlocked() may see inconsistent 1628 * data. 1629 */ 1630 atomic_store_rel_ptr((volatile void *)&fdp->fd_files, (uintptr_t)ntable); 1631 1632 /* 1633 * Do not free the old file table, as some threads may still 1634 * reference entries within it. Instead, place it on a freelist 1635 * which will be processed when the struct filedesc is released. 1636 * 1637 * Note that if onfiles == NDFILE, we're dealing with the original 1638 * static allocation contained within (struct filedesc0 *)fdp, 1639 * which must not be freed. 1640 */ 1641 if (onfiles > NDFILE) { 1642 ft = (struct freetable *)&otable->fdt_ofiles[onfiles]; 1643 fdp0 = (struct filedesc0 *)fdp; 1644 ft->ft_table = otable; 1645 SLIST_INSERT_HEAD(&fdp0->fd_free, ft, ft_next); 1646 } 1647 /* 1648 * The map does not have the same possibility of threads still 1649 * holding references to it. So always free it as long as it 1650 * does not reference the original static allocation. 1651 */ 1652 if (NDSLOTS(onfiles) > NDSLOTS(NDFILE)) 1653 free(omap, M_FILEDESC); 1654 } 1655 1656 /* 1657 * Allocate a file descriptor for the process. 1658 */ 1659 int 1660 fdalloc(struct thread *td, int minfd, int *result) 1661 { 1662 struct proc *p = td->td_proc; 1663 struct filedesc *fdp = p->p_fd; 1664 int fd, maxfd, allocfd; 1665 #ifdef RACCT 1666 int error; 1667 #endif 1668 1669 FILEDESC_XLOCK_ASSERT(fdp); 1670 1671 if (fdp->fd_freefile > minfd) 1672 minfd = fdp->fd_freefile; 1673 1674 maxfd = getmaxfd(td); 1675 1676 /* 1677 * Search the bitmap for a free descriptor starting at minfd. 1678 * If none is found, grow the file table. 1679 */ 1680 fd = fd_first_free(fdp, minfd, fdp->fd_nfiles); 1681 if (fd >= maxfd) 1682 return (EMFILE); 1683 if (fd >= fdp->fd_nfiles) { 1684 allocfd = min(fd * 2, maxfd); 1685 #ifdef RACCT 1686 if (racct_enable) { 1687 PROC_LOCK(p); 1688 error = racct_set(p, RACCT_NOFILE, allocfd); 1689 PROC_UNLOCK(p); 1690 if (error != 0) 1691 return (EMFILE); 1692 } 1693 #endif 1694 /* 1695 * fd is already equal to first free descriptor >= minfd, so 1696 * we only need to grow the table and we are done. 1697 */ 1698 fdgrowtable_exp(fdp, allocfd); 1699 } 1700 1701 /* 1702 * Perform some sanity checks, then mark the file descriptor as 1703 * used and return it to the caller. 1704 */ 1705 KASSERT(fd >= 0 && fd < min(maxfd, fdp->fd_nfiles), 1706 ("invalid descriptor %d", fd)); 1707 KASSERT(!fdisused(fdp, fd), 1708 ("fd_first_free() returned non-free descriptor")); 1709 KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, 1710 ("file descriptor isn't free")); 1711 fdused(fdp, fd); 1712 *result = fd; 1713 return (0); 1714 } 1715 1716 /* 1717 * Allocate n file descriptors for the process. 1718 */ 1719 int 1720 fdallocn(struct thread *td, int minfd, int *fds, int n) 1721 { 1722 struct proc *p = td->td_proc; 1723 struct filedesc *fdp = p->p_fd; 1724 int i; 1725 1726 FILEDESC_XLOCK_ASSERT(fdp); 1727 1728 for (i = 0; i < n; i++) 1729 if (fdalloc(td, 0, &fds[i]) != 0) 1730 break; 1731 1732 if (i < n) { 1733 for (i--; i >= 0; i--) 1734 fdunused(fdp, fds[i]); 1735 return (EMFILE); 1736 } 1737 1738 return (0); 1739 } 1740 1741 /* 1742 * Create a new open file structure and allocate a file descriptor for the 1743 * process that refers to it. We add one reference to the file for the 1744 * descriptor table and one reference for resultfp. This is to prevent us 1745 * being preempted and the entry in the descriptor table closed after we 1746 * release the FILEDESC lock. 1747 */ 1748 int 1749 falloc_caps(struct thread *td, struct file **resultfp, int *resultfd, int flags, 1750 struct filecaps *fcaps) 1751 { 1752 struct file *fp; 1753 int error, fd; 1754 1755 error = falloc_noinstall(td, &fp); 1756 if (error) 1757 return (error); /* no reference held on error */ 1758 1759 error = finstall(td, fp, &fd, flags, fcaps); 1760 if (error) { 1761 fdrop(fp, td); /* one reference (fp only) */ 1762 return (error); 1763 } 1764 1765 if (resultfp != NULL) 1766 *resultfp = fp; /* copy out result */ 1767 else 1768 fdrop(fp, td); /* release local reference */ 1769 1770 if (resultfd != NULL) 1771 *resultfd = fd; 1772 1773 return (0); 1774 } 1775 1776 /* 1777 * Create a new open file structure without allocating a file descriptor. 1778 */ 1779 int 1780 falloc_noinstall(struct thread *td, struct file **resultfp) 1781 { 1782 struct file *fp; 1783 int maxuserfiles = maxfiles - (maxfiles / 20); 1784 int openfiles_new; 1785 static struct timeval lastfail; 1786 static int curfail; 1787 1788 KASSERT(resultfp != NULL, ("%s: resultfp == NULL", __func__)); 1789 1790 openfiles_new = atomic_fetchadd_int(&openfiles, 1) + 1; 1791 if ((openfiles_new >= maxuserfiles && 1792 priv_check(td, PRIV_MAXFILES) != 0) || 1793 openfiles_new >= maxfiles) { 1794 atomic_subtract_int(&openfiles, 1); 1795 if (ppsratecheck(&lastfail, &curfail, 1)) { 1796 printf("kern.maxfiles limit exceeded by uid %i, (%s) " 1797 "please see tuning(7).\n", td->td_ucred->cr_ruid, td->td_proc->p_comm); 1798 } 1799 return (ENFILE); 1800 } 1801 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO); 1802 refcount_init(&fp->f_count, 1); 1803 fp->f_cred = crhold(td->td_ucred); 1804 fp->f_ops = &badfileops; 1805 *resultfp = fp; 1806 return (0); 1807 } 1808 1809 /* 1810 * Install a file in a file descriptor table. 1811 */ 1812 void 1813 _finstall(struct filedesc *fdp, struct file *fp, int fd, int flags, 1814 struct filecaps *fcaps) 1815 { 1816 struct filedescent *fde; 1817 1818 MPASS(fp != NULL); 1819 if (fcaps != NULL) 1820 filecaps_validate(fcaps, __func__); 1821 FILEDESC_XLOCK_ASSERT(fdp); 1822 1823 fde = &fdp->fd_ofiles[fd]; 1824 #ifdef CAPABILITIES 1825 seq_write_begin(&fde->fde_seq); 1826 #endif 1827 fde->fde_file = fp; 1828 fde->fde_flags = (flags & O_CLOEXEC) != 0 ? UF_EXCLOSE : 0; 1829 if (fcaps != NULL) 1830 filecaps_move(fcaps, &fde->fde_caps); 1831 else 1832 filecaps_fill(&fde->fde_caps); 1833 #ifdef CAPABILITIES 1834 seq_write_end(&fde->fde_seq); 1835 #endif 1836 } 1837 1838 int 1839 finstall(struct thread *td, struct file *fp, int *fd, int flags, 1840 struct filecaps *fcaps) 1841 { 1842 struct filedesc *fdp = td->td_proc->p_fd; 1843 int error; 1844 1845 MPASS(fd != NULL); 1846 1847 FILEDESC_XLOCK(fdp); 1848 if ((error = fdalloc(td, 0, fd))) { 1849 FILEDESC_XUNLOCK(fdp); 1850 return (error); 1851 } 1852 fhold(fp); 1853 _finstall(fdp, fp, *fd, flags, fcaps); 1854 FILEDESC_XUNLOCK(fdp); 1855 return (0); 1856 } 1857 1858 /* 1859 * Build a new filedesc structure from another. 1860 * Copy the current, root, and jail root vnode references. 1861 * 1862 * If fdp is not NULL, return with it shared locked. 1863 */ 1864 struct filedesc * 1865 fdinit(struct filedesc *fdp, bool prepfiles) 1866 { 1867 struct filedesc0 *newfdp0; 1868 struct filedesc *newfdp; 1869 1870 newfdp0 = uma_zalloc(filedesc0_zone, M_WAITOK | M_ZERO); 1871 newfdp = &newfdp0->fd_fd; 1872 1873 /* Create the file descriptor table. */ 1874 FILEDESC_LOCK_INIT(newfdp); 1875 refcount_init(&newfdp->fd_refcnt, 1); 1876 refcount_init(&newfdp->fd_holdcnt, 1); 1877 newfdp->fd_cmask = CMASK; 1878 newfdp->fd_map = newfdp0->fd_dmap; 1879 newfdp->fd_lastfile = -1; 1880 newfdp->fd_files = (struct fdescenttbl *)&newfdp0->fd_dfiles; 1881 newfdp->fd_files->fdt_nfiles = NDFILE; 1882 1883 if (fdp == NULL) 1884 return (newfdp); 1885 1886 if (prepfiles && fdp->fd_lastfile >= newfdp->fd_nfiles) 1887 fdgrowtable(newfdp, fdp->fd_lastfile + 1); 1888 1889 FILEDESC_SLOCK(fdp); 1890 newfdp->fd_cdir = fdp->fd_cdir; 1891 if (newfdp->fd_cdir) 1892 vrefact(newfdp->fd_cdir); 1893 newfdp->fd_rdir = fdp->fd_rdir; 1894 if (newfdp->fd_rdir) 1895 vrefact(newfdp->fd_rdir); 1896 newfdp->fd_jdir = fdp->fd_jdir; 1897 if (newfdp->fd_jdir) 1898 vrefact(newfdp->fd_jdir); 1899 1900 if (!prepfiles) { 1901 FILEDESC_SUNLOCK(fdp); 1902 } else { 1903 while (fdp->fd_lastfile >= newfdp->fd_nfiles) { 1904 FILEDESC_SUNLOCK(fdp); 1905 fdgrowtable(newfdp, fdp->fd_lastfile + 1); 1906 FILEDESC_SLOCK(fdp); 1907 } 1908 } 1909 1910 return (newfdp); 1911 } 1912 1913 static struct filedesc * 1914 fdhold(struct proc *p) 1915 { 1916 struct filedesc *fdp; 1917 1918 PROC_LOCK_ASSERT(p, MA_OWNED); 1919 fdp = p->p_fd; 1920 if (fdp != NULL) 1921 refcount_acquire(&fdp->fd_holdcnt); 1922 return (fdp); 1923 } 1924 1925 static void 1926 fddrop(struct filedesc *fdp) 1927 { 1928 1929 if (fdp->fd_holdcnt > 1) { 1930 if (refcount_release(&fdp->fd_holdcnt) == 0) 1931 return; 1932 } 1933 1934 FILEDESC_LOCK_DESTROY(fdp); 1935 uma_zfree(filedesc0_zone, fdp); 1936 } 1937 1938 /* 1939 * Share a filedesc structure. 1940 */ 1941 struct filedesc * 1942 fdshare(struct filedesc *fdp) 1943 { 1944 1945 refcount_acquire(&fdp->fd_refcnt); 1946 return (fdp); 1947 } 1948 1949 /* 1950 * Unshare a filedesc structure, if necessary by making a copy 1951 */ 1952 void 1953 fdunshare(struct thread *td) 1954 { 1955 struct filedesc *tmp; 1956 struct proc *p = td->td_proc; 1957 1958 if (p->p_fd->fd_refcnt == 1) 1959 return; 1960 1961 tmp = fdcopy(p->p_fd); 1962 fdescfree(td); 1963 p->p_fd = tmp; 1964 } 1965 1966 void 1967 fdinstall_remapped(struct thread *td, struct filedesc *fdp) 1968 { 1969 1970 fdescfree(td); 1971 td->td_proc->p_fd = fdp; 1972 } 1973 1974 /* 1975 * Copy a filedesc structure. A NULL pointer in returns a NULL reference, 1976 * this is to ease callers, not catch errors. 1977 */ 1978 struct filedesc * 1979 fdcopy(struct filedesc *fdp) 1980 { 1981 struct filedesc *newfdp; 1982 struct filedescent *nfde, *ofde; 1983 int i; 1984 1985 MPASS(fdp != NULL); 1986 1987 newfdp = fdinit(fdp, true); 1988 /* copy all passable descriptors (i.e. not kqueue) */ 1989 newfdp->fd_freefile = -1; 1990 for (i = 0; i <= fdp->fd_lastfile; ++i) { 1991 ofde = &fdp->fd_ofiles[i]; 1992 if (ofde->fde_file == NULL || 1993 (ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0) { 1994 if (newfdp->fd_freefile == -1) 1995 newfdp->fd_freefile = i; 1996 continue; 1997 } 1998 nfde = &newfdp->fd_ofiles[i]; 1999 *nfde = *ofde; 2000 filecaps_copy(&ofde->fde_caps, &nfde->fde_caps, true); 2001 fhold(nfde->fde_file); 2002 fdused_init(newfdp, i); 2003 newfdp->fd_lastfile = i; 2004 } 2005 if (newfdp->fd_freefile == -1) 2006 newfdp->fd_freefile = i; 2007 newfdp->fd_cmask = fdp->fd_cmask; 2008 FILEDESC_SUNLOCK(fdp); 2009 return (newfdp); 2010 } 2011 2012 /* 2013 * Copies a filedesc structure, while remapping all file descriptors 2014 * stored inside using a translation table. 2015 * 2016 * File descriptors are copied over to the new file descriptor table, 2017 * regardless of whether the close-on-exec flag is set. 2018 */ 2019 int 2020 fdcopy_remapped(struct filedesc *fdp, const int *fds, size_t nfds, 2021 struct filedesc **ret) 2022 { 2023 struct filedesc *newfdp; 2024 struct filedescent *nfde, *ofde; 2025 int error, i; 2026 2027 MPASS(fdp != NULL); 2028 2029 newfdp = fdinit(fdp, true); 2030 if (nfds > fdp->fd_lastfile + 1) { 2031 /* New table cannot be larger than the old one. */ 2032 error = E2BIG; 2033 goto bad; 2034 } 2035 /* Copy all passable descriptors (i.e. not kqueue). */ 2036 newfdp->fd_freefile = nfds; 2037 for (i = 0; i < nfds; ++i) { 2038 if (fds[i] < 0 || fds[i] > fdp->fd_lastfile) { 2039 /* File descriptor out of bounds. */ 2040 error = EBADF; 2041 goto bad; 2042 } 2043 ofde = &fdp->fd_ofiles[fds[i]]; 2044 if (ofde->fde_file == NULL) { 2045 /* Unused file descriptor. */ 2046 error = EBADF; 2047 goto bad; 2048 } 2049 if ((ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0) { 2050 /* File descriptor cannot be passed. */ 2051 error = EINVAL; 2052 goto bad; 2053 } 2054 nfde = &newfdp->fd_ofiles[i]; 2055 *nfde = *ofde; 2056 filecaps_copy(&ofde->fde_caps, &nfde->fde_caps, true); 2057 fhold(nfde->fde_file); 2058 fdused_init(newfdp, i); 2059 newfdp->fd_lastfile = i; 2060 } 2061 newfdp->fd_cmask = fdp->fd_cmask; 2062 FILEDESC_SUNLOCK(fdp); 2063 *ret = newfdp; 2064 return (0); 2065 bad: 2066 FILEDESC_SUNLOCK(fdp); 2067 fdescfree_remapped(newfdp); 2068 return (error); 2069 } 2070 2071 /* 2072 * Clear POSIX style locks. This is only used when fdp looses a reference (i.e. 2073 * one of processes using it exits) and the table used to be shared. 2074 */ 2075 static void 2076 fdclearlocks(struct thread *td) 2077 { 2078 struct filedesc *fdp; 2079 struct filedesc_to_leader *fdtol; 2080 struct flock lf; 2081 struct file *fp; 2082 struct proc *p; 2083 struct vnode *vp; 2084 int i; 2085 2086 p = td->td_proc; 2087 fdp = p->p_fd; 2088 fdtol = p->p_fdtol; 2089 MPASS(fdtol != NULL); 2090 2091 FILEDESC_XLOCK(fdp); 2092 KASSERT(fdtol->fdl_refcount > 0, 2093 ("filedesc_to_refcount botch: fdl_refcount=%d", 2094 fdtol->fdl_refcount)); 2095 if (fdtol->fdl_refcount == 1 && 2096 (p->p_leader->p_flag & P_ADVLOCK) != 0) { 2097 for (i = 0; i <= fdp->fd_lastfile; i++) { 2098 fp = fdp->fd_ofiles[i].fde_file; 2099 if (fp == NULL || fp->f_type != DTYPE_VNODE) 2100 continue; 2101 fhold(fp); 2102 FILEDESC_XUNLOCK(fdp); 2103 lf.l_whence = SEEK_SET; 2104 lf.l_start = 0; 2105 lf.l_len = 0; 2106 lf.l_type = F_UNLCK; 2107 vp = fp->f_vnode; 2108 (void) VOP_ADVLOCK(vp, 2109 (caddr_t)p->p_leader, F_UNLCK, 2110 &lf, F_POSIX); 2111 FILEDESC_XLOCK(fdp); 2112 fdrop(fp, td); 2113 } 2114 } 2115 retry: 2116 if (fdtol->fdl_refcount == 1) { 2117 if (fdp->fd_holdleaderscount > 0 && 2118 (p->p_leader->p_flag & P_ADVLOCK) != 0) { 2119 /* 2120 * close() or kern_dup() has cleared a reference 2121 * in a shared file descriptor table. 2122 */ 2123 fdp->fd_holdleaderswakeup = 1; 2124 sx_sleep(&fdp->fd_holdleaderscount, 2125 FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0); 2126 goto retry; 2127 } 2128 if (fdtol->fdl_holdcount > 0) { 2129 /* 2130 * Ensure that fdtol->fdl_leader remains 2131 * valid in closef(). 2132 */ 2133 fdtol->fdl_wakeup = 1; 2134 sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK, 2135 "fdlhold", 0); 2136 goto retry; 2137 } 2138 } 2139 fdtol->fdl_refcount--; 2140 if (fdtol->fdl_refcount == 0 && 2141 fdtol->fdl_holdcount == 0) { 2142 fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; 2143 fdtol->fdl_prev->fdl_next = fdtol->fdl_next; 2144 } else 2145 fdtol = NULL; 2146 p->p_fdtol = NULL; 2147 FILEDESC_XUNLOCK(fdp); 2148 if (fdtol != NULL) 2149 free(fdtol, M_FILEDESC_TO_LEADER); 2150 } 2151 2152 /* 2153 * Release a filedesc structure. 2154 */ 2155 static void 2156 fdescfree_fds(struct thread *td, struct filedesc *fdp, bool needclose) 2157 { 2158 struct filedesc0 *fdp0; 2159 struct freetable *ft, *tft; 2160 struct filedescent *fde; 2161 struct file *fp; 2162 int i; 2163 2164 for (i = 0; i <= fdp->fd_lastfile; i++) { 2165 fde = &fdp->fd_ofiles[i]; 2166 fp = fde->fde_file; 2167 if (fp != NULL) { 2168 fdefree_last(fde); 2169 if (needclose) 2170 (void) closef(fp, td); 2171 else 2172 fdrop(fp, td); 2173 } 2174 } 2175 2176 if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE)) 2177 free(fdp->fd_map, M_FILEDESC); 2178 if (fdp->fd_nfiles > NDFILE) 2179 free(fdp->fd_files, M_FILEDESC); 2180 2181 fdp0 = (struct filedesc0 *)fdp; 2182 SLIST_FOREACH_SAFE(ft, &fdp0->fd_free, ft_next, tft) 2183 free(ft->ft_table, M_FILEDESC); 2184 2185 fddrop(fdp); 2186 } 2187 2188 void 2189 fdescfree(struct thread *td) 2190 { 2191 struct proc *p; 2192 struct filedesc *fdp; 2193 struct vnode *cdir, *jdir, *rdir; 2194 2195 p = td->td_proc; 2196 fdp = p->p_fd; 2197 MPASS(fdp != NULL); 2198 2199 #ifdef RACCT 2200 if (racct_enable) { 2201 PROC_LOCK(p); 2202 racct_set(p, RACCT_NOFILE, 0); 2203 PROC_UNLOCK(p); 2204 } 2205 #endif 2206 2207 if (p->p_fdtol != NULL) 2208 fdclearlocks(td); 2209 2210 PROC_LOCK(p); 2211 p->p_fd = NULL; 2212 PROC_UNLOCK(p); 2213 2214 if (refcount_release(&fdp->fd_refcnt) == 0) 2215 return; 2216 2217 FILEDESC_XLOCK(fdp); 2218 cdir = fdp->fd_cdir; 2219 fdp->fd_cdir = NULL; 2220 rdir = fdp->fd_rdir; 2221 fdp->fd_rdir = NULL; 2222 jdir = fdp->fd_jdir; 2223 fdp->fd_jdir = NULL; 2224 FILEDESC_XUNLOCK(fdp); 2225 2226 if (cdir != NULL) 2227 vrele(cdir); 2228 if (rdir != NULL) 2229 vrele(rdir); 2230 if (jdir != NULL) 2231 vrele(jdir); 2232 2233 fdescfree_fds(td, fdp, 1); 2234 } 2235 2236 void 2237 fdescfree_remapped(struct filedesc *fdp) 2238 { 2239 2240 if (fdp->fd_cdir != NULL) 2241 vrele(fdp->fd_cdir); 2242 if (fdp->fd_rdir != NULL) 2243 vrele(fdp->fd_rdir); 2244 if (fdp->fd_jdir != NULL) 2245 vrele(fdp->fd_jdir); 2246 2247 fdescfree_fds(curthread, fdp, 0); 2248 } 2249 2250 /* 2251 * For setugid programs, we don't want to people to use that setugidness 2252 * to generate error messages which write to a file which otherwise would 2253 * otherwise be off-limits to the process. We check for filesystems where 2254 * the vnode can change out from under us after execve (like [lin]procfs). 2255 * 2256 * Since fdsetugidsafety calls this only for fd 0, 1 and 2, this check is 2257 * sufficient. We also don't check for setugidness since we know we are. 2258 */ 2259 static bool 2260 is_unsafe(struct file *fp) 2261 { 2262 struct vnode *vp; 2263 2264 if (fp->f_type != DTYPE_VNODE) 2265 return (false); 2266 2267 vp = fp->f_vnode; 2268 return ((vp->v_vflag & VV_PROCDEP) != 0); 2269 } 2270 2271 /* 2272 * Make this setguid thing safe, if at all possible. 2273 */ 2274 void 2275 fdsetugidsafety(struct thread *td) 2276 { 2277 struct filedesc *fdp; 2278 struct file *fp; 2279 int i; 2280 2281 fdp = td->td_proc->p_fd; 2282 KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); 2283 MPASS(fdp->fd_nfiles >= 3); 2284 for (i = 0; i <= 2; i++) { 2285 fp = fdp->fd_ofiles[i].fde_file; 2286 if (fp != NULL && is_unsafe(fp)) { 2287 FILEDESC_XLOCK(fdp); 2288 knote_fdclose(td, i); 2289 /* 2290 * NULL-out descriptor prior to close to avoid 2291 * a race while close blocks. 2292 */ 2293 fdfree(fdp, i); 2294 FILEDESC_XUNLOCK(fdp); 2295 (void) closef(fp, td); 2296 } 2297 } 2298 } 2299 2300 /* 2301 * If a specific file object occupies a specific file descriptor, close the 2302 * file descriptor entry and drop a reference on the file object. This is a 2303 * convenience function to handle a subsequent error in a function that calls 2304 * falloc() that handles the race that another thread might have closed the 2305 * file descriptor out from under the thread creating the file object. 2306 */ 2307 void 2308 fdclose(struct thread *td, struct file *fp, int idx) 2309 { 2310 struct filedesc *fdp = td->td_proc->p_fd; 2311 2312 FILEDESC_XLOCK(fdp); 2313 if (fdp->fd_ofiles[idx].fde_file == fp) { 2314 fdfree(fdp, idx); 2315 FILEDESC_XUNLOCK(fdp); 2316 fdrop(fp, td); 2317 } else 2318 FILEDESC_XUNLOCK(fdp); 2319 } 2320 2321 /* 2322 * Close any files on exec? 2323 */ 2324 void 2325 fdcloseexec(struct thread *td) 2326 { 2327 struct filedesc *fdp; 2328 struct filedescent *fde; 2329 struct file *fp; 2330 int i; 2331 2332 fdp = td->td_proc->p_fd; 2333 KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); 2334 for (i = 0; i <= fdp->fd_lastfile; i++) { 2335 fde = &fdp->fd_ofiles[i]; 2336 fp = fde->fde_file; 2337 if (fp != NULL && (fp->f_type == DTYPE_MQUEUE || 2338 (fde->fde_flags & UF_EXCLOSE))) { 2339 FILEDESC_XLOCK(fdp); 2340 fdfree(fdp, i); 2341 (void) closefp(fdp, i, fp, td, 0); 2342 FILEDESC_UNLOCK_ASSERT(fdp); 2343 } 2344 } 2345 } 2346 2347 /* 2348 * It is unsafe for set[ug]id processes to be started with file 2349 * descriptors 0..2 closed, as these descriptors are given implicit 2350 * significance in the Standard C library. fdcheckstd() will create a 2351 * descriptor referencing /dev/null for each of stdin, stdout, and 2352 * stderr that is not already open. 2353 */ 2354 int 2355 fdcheckstd(struct thread *td) 2356 { 2357 struct filedesc *fdp; 2358 register_t save; 2359 int i, error, devnull; 2360 2361 fdp = td->td_proc->p_fd; 2362 KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); 2363 MPASS(fdp->fd_nfiles >= 3); 2364 devnull = -1; 2365 for (i = 0; i <= 2; i++) { 2366 if (fdp->fd_ofiles[i].fde_file != NULL) 2367 continue; 2368 2369 save = td->td_retval[0]; 2370 if (devnull != -1) { 2371 error = kern_dup(td, FDDUP_FIXED, 0, devnull, i); 2372 } else { 2373 error = kern_openat(td, AT_FDCWD, "/dev/null", 2374 UIO_SYSSPACE, O_RDWR, 0); 2375 if (error == 0) { 2376 devnull = td->td_retval[0]; 2377 KASSERT(devnull == i, ("we didn't get our fd")); 2378 } 2379 } 2380 td->td_retval[0] = save; 2381 if (error != 0) 2382 return (error); 2383 } 2384 return (0); 2385 } 2386 2387 /* 2388 * Internal form of close. Decrement reference count on file structure. 2389 * Note: td may be NULL when closing a file that was being passed in a 2390 * message. 2391 * 2392 * XXXRW: Giant is not required for the caller, but often will be held; this 2393 * makes it moderately likely the Giant will be recursed in the VFS case. 2394 */ 2395 int 2396 closef(struct file *fp, struct thread *td) 2397 { 2398 struct vnode *vp; 2399 struct flock lf; 2400 struct filedesc_to_leader *fdtol; 2401 struct filedesc *fdp; 2402 2403 /* 2404 * POSIX record locking dictates that any close releases ALL 2405 * locks owned by this process. This is handled by setting 2406 * a flag in the unlock to free ONLY locks obeying POSIX 2407 * semantics, and not to free BSD-style file locks. 2408 * If the descriptor was in a message, POSIX-style locks 2409 * aren't passed with the descriptor, and the thread pointer 2410 * will be NULL. Callers should be careful only to pass a 2411 * NULL thread pointer when there really is no owning 2412 * context that might have locks, or the locks will be 2413 * leaked. 2414 */ 2415 if (fp->f_type == DTYPE_VNODE && td != NULL) { 2416 vp = fp->f_vnode; 2417 if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 2418 lf.l_whence = SEEK_SET; 2419 lf.l_start = 0; 2420 lf.l_len = 0; 2421 lf.l_type = F_UNLCK; 2422 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, 2423 F_UNLCK, &lf, F_POSIX); 2424 } 2425 fdtol = td->td_proc->p_fdtol; 2426 if (fdtol != NULL) { 2427 /* 2428 * Handle special case where file descriptor table is 2429 * shared between multiple process leaders. 2430 */ 2431 fdp = td->td_proc->p_fd; 2432 FILEDESC_XLOCK(fdp); 2433 for (fdtol = fdtol->fdl_next; 2434 fdtol != td->td_proc->p_fdtol; 2435 fdtol = fdtol->fdl_next) { 2436 if ((fdtol->fdl_leader->p_flag & 2437 P_ADVLOCK) == 0) 2438 continue; 2439 fdtol->fdl_holdcount++; 2440 FILEDESC_XUNLOCK(fdp); 2441 lf.l_whence = SEEK_SET; 2442 lf.l_start = 0; 2443 lf.l_len = 0; 2444 lf.l_type = F_UNLCK; 2445 vp = fp->f_vnode; 2446 (void) VOP_ADVLOCK(vp, 2447 (caddr_t)fdtol->fdl_leader, F_UNLCK, &lf, 2448 F_POSIX); 2449 FILEDESC_XLOCK(fdp); 2450 fdtol->fdl_holdcount--; 2451 if (fdtol->fdl_holdcount == 0 && 2452 fdtol->fdl_wakeup != 0) { 2453 fdtol->fdl_wakeup = 0; 2454 wakeup(fdtol); 2455 } 2456 } 2457 FILEDESC_XUNLOCK(fdp); 2458 } 2459 } 2460 return (fdrop(fp, td)); 2461 } 2462 2463 /* 2464 * Initialize the file pointer with the specified properties. 2465 * 2466 * The ops are set with release semantics to be certain that the flags, type, 2467 * and data are visible when ops is. This is to prevent ops methods from being 2468 * called with bad data. 2469 */ 2470 void 2471 finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops) 2472 { 2473 fp->f_data = data; 2474 fp->f_flag = flag; 2475 fp->f_type = type; 2476 atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops); 2477 } 2478 2479 int 2480 fget_cap_locked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp, 2481 struct file **fpp, struct filecaps *havecapsp) 2482 { 2483 struct filedescent *fde; 2484 int error; 2485 2486 FILEDESC_LOCK_ASSERT(fdp); 2487 2488 fde = fdeget_locked(fdp, fd); 2489 if (fde == NULL) { 2490 error = EBADF; 2491 goto out; 2492 } 2493 2494 #ifdef CAPABILITIES 2495 error = cap_check(cap_rights_fde(fde), needrightsp); 2496 if (error != 0) 2497 goto out; 2498 #endif 2499 2500 if (havecapsp != NULL) 2501 filecaps_copy(&fde->fde_caps, havecapsp, true); 2502 2503 *fpp = fde->fde_file; 2504 2505 error = 0; 2506 out: 2507 return (error); 2508 } 2509 2510 int 2511 fget_cap(struct thread *td, int fd, cap_rights_t *needrightsp, 2512 struct file **fpp, struct filecaps *havecapsp) 2513 { 2514 struct filedesc *fdp = td->td_proc->p_fd; 2515 int error; 2516 #ifndef CAPABILITIES 2517 error = fget_unlocked(fdp, fd, needrightsp, fpp, NULL); 2518 if (error == 0 && havecapsp != NULL) 2519 filecaps_fill(havecapsp); 2520 #else 2521 struct file *fp; 2522 seq_t seq; 2523 2524 for (;;) { 2525 error = fget_unlocked(fdp, fd, needrightsp, &fp, &seq); 2526 if (error != 0) 2527 return (error); 2528 2529 if (havecapsp != NULL) { 2530 if (!filecaps_copy(&fdp->fd_ofiles[fd].fde_caps, 2531 havecapsp, false)) { 2532 fdrop(fp, td); 2533 goto get_locked; 2534 } 2535 } 2536 2537 if (!fd_modified(fdp, fd, seq)) 2538 break; 2539 fdrop(fp, td); 2540 } 2541 2542 *fpp = fp; 2543 return (0); 2544 2545 get_locked: 2546 FILEDESC_SLOCK(fdp); 2547 error = fget_cap_locked(fdp, fd, needrightsp, fpp, havecapsp); 2548 if (error == 0) 2549 fhold(*fpp); 2550 FILEDESC_SUNLOCK(fdp); 2551 #endif 2552 return (error); 2553 } 2554 2555 int 2556 fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp, 2557 struct file **fpp, seq_t *seqp) 2558 { 2559 #ifdef CAPABILITIES 2560 struct filedescent *fde; 2561 #endif 2562 struct fdescenttbl *fdt; 2563 struct file *fp; 2564 u_int count; 2565 #ifdef CAPABILITIES 2566 seq_t seq; 2567 cap_rights_t haverights; 2568 int error; 2569 #endif 2570 2571 fdt = fdp->fd_files; 2572 if ((u_int)fd >= fdt->fdt_nfiles) 2573 return (EBADF); 2574 /* 2575 * Fetch the descriptor locklessly. We avoid fdrop() races by 2576 * never raising a refcount above 0. To accomplish this we have 2577 * to use a cmpset loop rather than an atomic_add. The descriptor 2578 * must be re-verified once we acquire a reference to be certain 2579 * that the identity is still correct and we did not lose a race 2580 * due to preemption. 2581 */ 2582 for (;;) { 2583 #ifdef CAPABILITIES 2584 seq = seq_read(fd_seq(fdt, fd)); 2585 fde = &fdt->fdt_ofiles[fd]; 2586 haverights = *cap_rights_fde(fde); 2587 fp = fde->fde_file; 2588 if (!seq_consistent(fd_seq(fdt, fd), seq)) 2589 continue; 2590 #else 2591 fp = fdt->fdt_ofiles[fd].fde_file; 2592 #endif 2593 if (fp == NULL) 2594 return (EBADF); 2595 #ifdef CAPABILITIES 2596 error = cap_check(&haverights, needrightsp); 2597 if (error != 0) 2598 return (error); 2599 #endif 2600 count = fp->f_count; 2601 retry: 2602 if (count == 0) { 2603 /* 2604 * Force a reload. Other thread could reallocate the 2605 * table before this fd was closed, so it possible that 2606 * there is a stale fp pointer in cached version. 2607 */ 2608 fdt = *(struct fdescenttbl * volatile *)&(fdp->fd_files); 2609 continue; 2610 } 2611 /* 2612 * Use an acquire barrier to force re-reading of fdt so it is 2613 * refreshed for verification. 2614 */ 2615 if (atomic_fcmpset_acq_int(&fp->f_count, &count, count + 1) == 0) 2616 goto retry; 2617 fdt = fdp->fd_files; 2618 #ifdef CAPABILITIES 2619 if (seq_consistent_nomb(fd_seq(fdt, fd), seq)) 2620 #else 2621 if (fp == fdt->fdt_ofiles[fd].fde_file) 2622 #endif 2623 break; 2624 fdrop(fp, curthread); 2625 } 2626 *fpp = fp; 2627 if (seqp != NULL) { 2628 #ifdef CAPABILITIES 2629 *seqp = seq; 2630 #endif 2631 } 2632 return (0); 2633 } 2634 2635 /* 2636 * Extract the file pointer associated with the specified descriptor for the 2637 * current user process. 2638 * 2639 * If the descriptor doesn't exist or doesn't match 'flags', EBADF is 2640 * returned. 2641 * 2642 * File's rights will be checked against the capability rights mask. 2643 * 2644 * If an error occurred the non-zero error is returned and *fpp is set to 2645 * NULL. Otherwise *fpp is held and set and zero is returned. Caller is 2646 * responsible for fdrop(). 2647 */ 2648 static __inline int 2649 _fget(struct thread *td, int fd, struct file **fpp, int flags, 2650 cap_rights_t *needrightsp, seq_t *seqp) 2651 { 2652 struct filedesc *fdp; 2653 struct file *fp; 2654 int error; 2655 2656 *fpp = NULL; 2657 fdp = td->td_proc->p_fd; 2658 error = fget_unlocked(fdp, fd, needrightsp, &fp, seqp); 2659 if (error != 0) 2660 return (error); 2661 if (fp->f_ops == &badfileops) { 2662 fdrop(fp, td); 2663 return (EBADF); 2664 } 2665 2666 /* 2667 * FREAD and FWRITE failure return EBADF as per POSIX. 2668 */ 2669 error = 0; 2670 switch (flags) { 2671 case FREAD: 2672 case FWRITE: 2673 if ((fp->f_flag & flags) == 0) 2674 error = EBADF; 2675 break; 2676 case FEXEC: 2677 if ((fp->f_flag & (FREAD | FEXEC)) == 0 || 2678 ((fp->f_flag & FWRITE) != 0)) 2679 error = EBADF; 2680 break; 2681 case 0: 2682 break; 2683 default: 2684 KASSERT(0, ("wrong flags")); 2685 } 2686 2687 if (error != 0) { 2688 fdrop(fp, td); 2689 return (error); 2690 } 2691 2692 *fpp = fp; 2693 return (0); 2694 } 2695 2696 int 2697 fget(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 2698 { 2699 2700 return (_fget(td, fd, fpp, 0, rightsp, NULL)); 2701 } 2702 2703 int 2704 fget_mmap(struct thread *td, int fd, cap_rights_t *rightsp, u_char *maxprotp, 2705 struct file **fpp) 2706 { 2707 int error; 2708 #ifndef CAPABILITIES 2709 error = _fget(td, fd, fpp, 0, rightsp, NULL); 2710 if (maxprotp != NULL) 2711 *maxprotp = VM_PROT_ALL; 2712 #else 2713 struct filedesc *fdp = td->td_proc->p_fd; 2714 seq_t seq; 2715 2716 MPASS(cap_rights_is_set(rightsp, CAP_MMAP)); 2717 for (;;) { 2718 error = _fget(td, fd, fpp, 0, rightsp, &seq); 2719 if (error != 0) 2720 return (error); 2721 /* 2722 * If requested, convert capability rights to access flags. 2723 */ 2724 if (maxprotp != NULL) 2725 *maxprotp = cap_rights_to_vmprot(cap_rights(fdp, fd)); 2726 if (!fd_modified(fdp, fd, seq)) 2727 break; 2728 fdrop(*fpp, td); 2729 } 2730 #endif 2731 return (error); 2732 } 2733 2734 int 2735 fget_read(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 2736 { 2737 2738 return (_fget(td, fd, fpp, FREAD, rightsp, NULL)); 2739 } 2740 2741 int 2742 fget_write(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 2743 { 2744 2745 return (_fget(td, fd, fpp, FWRITE, rightsp, NULL)); 2746 } 2747 2748 int 2749 fget_fcntl(struct thread *td, int fd, cap_rights_t *rightsp, int needfcntl, 2750 struct file **fpp) 2751 { 2752 struct filedesc *fdp = td->td_proc->p_fd; 2753 #ifndef CAPABILITIES 2754 return (fget_unlocked(fdp, fd, rightsp, fpp, NULL)); 2755 #else 2756 int error; 2757 seq_t seq; 2758 2759 MPASS(cap_rights_is_set(rightsp, CAP_FCNTL)); 2760 for (;;) { 2761 error = fget_unlocked(fdp, fd, rightsp, fpp, &seq); 2762 if (error != 0) 2763 return (error); 2764 error = cap_fcntl_check(fdp, fd, needfcntl); 2765 if (!fd_modified(fdp, fd, seq)) 2766 break; 2767 fdrop(*fpp, td); 2768 } 2769 if (error != 0) { 2770 fdrop(*fpp, td); 2771 *fpp = NULL; 2772 } 2773 return (error); 2774 #endif 2775 } 2776 2777 /* 2778 * Like fget() but loads the underlying vnode, or returns an error if the 2779 * descriptor does not represent a vnode. Note that pipes use vnodes but 2780 * never have VM objects. The returned vnode will be vref()'d. 2781 * 2782 * XXX: what about the unused flags ? 2783 */ 2784 static __inline int 2785 _fgetvp(struct thread *td, int fd, int flags, cap_rights_t *needrightsp, 2786 struct vnode **vpp) 2787 { 2788 struct file *fp; 2789 int error; 2790 2791 *vpp = NULL; 2792 error = _fget(td, fd, &fp, flags, needrightsp, NULL); 2793 if (error != 0) 2794 return (error); 2795 if (fp->f_vnode == NULL) { 2796 error = EINVAL; 2797 } else { 2798 *vpp = fp->f_vnode; 2799 vrefact(*vpp); 2800 } 2801 fdrop(fp, td); 2802 2803 return (error); 2804 } 2805 2806 int 2807 fgetvp(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp) 2808 { 2809 2810 return (_fgetvp(td, fd, 0, rightsp, vpp)); 2811 } 2812 2813 int 2814 fgetvp_rights(struct thread *td, int fd, cap_rights_t *needrightsp, 2815 struct filecaps *havecaps, struct vnode **vpp) 2816 { 2817 struct filedesc *fdp; 2818 struct filecaps caps; 2819 struct file *fp; 2820 int error; 2821 2822 fdp = td->td_proc->p_fd; 2823 error = fget_cap_locked(fdp, fd, needrightsp, &fp, &caps); 2824 if (error != 0) 2825 return (error); 2826 if (fp->f_ops == &badfileops) { 2827 error = EBADF; 2828 goto out; 2829 } 2830 if (fp->f_vnode == NULL) { 2831 error = EINVAL; 2832 goto out; 2833 } 2834 2835 *havecaps = caps; 2836 *vpp = fp->f_vnode; 2837 vrefact(*vpp); 2838 2839 return (0); 2840 out: 2841 filecaps_free(&caps); 2842 return (error); 2843 } 2844 2845 int 2846 fgetvp_read(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp) 2847 { 2848 2849 return (_fgetvp(td, fd, FREAD, rightsp, vpp)); 2850 } 2851 2852 int 2853 fgetvp_exec(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp) 2854 { 2855 2856 return (_fgetvp(td, fd, FEXEC, rightsp, vpp)); 2857 } 2858 2859 #ifdef notyet 2860 int 2861 fgetvp_write(struct thread *td, int fd, cap_rights_t *rightsp, 2862 struct vnode **vpp) 2863 { 2864 2865 return (_fgetvp(td, fd, FWRITE, rightsp, vpp)); 2866 } 2867 #endif 2868 2869 /* 2870 * Handle the last reference to a file being closed. 2871 */ 2872 int 2873 _fdrop(struct file *fp, struct thread *td) 2874 { 2875 int error; 2876 2877 if (fp->f_count != 0) 2878 panic("fdrop: count %d", fp->f_count); 2879 error = fo_close(fp, td); 2880 atomic_subtract_int(&openfiles, 1); 2881 crfree(fp->f_cred); 2882 free(fp->f_advice, M_FADVISE); 2883 uma_zfree(file_zone, fp); 2884 2885 return (error); 2886 } 2887 2888 /* 2889 * Apply an advisory lock on a file descriptor. 2890 * 2891 * Just attempt to get a record lock of the requested type on the entire file 2892 * (l_whence = SEEK_SET, l_start = 0, l_len = 0). 2893 */ 2894 #ifndef _SYS_SYSPROTO_H_ 2895 struct flock_args { 2896 int fd; 2897 int how; 2898 }; 2899 #endif 2900 /* ARGSUSED */ 2901 int 2902 sys_flock(struct thread *td, struct flock_args *uap) 2903 { 2904 struct file *fp; 2905 struct vnode *vp; 2906 struct flock lf; 2907 cap_rights_t rights; 2908 int error; 2909 2910 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FLOCK), &fp); 2911 if (error != 0) 2912 return (error); 2913 if (fp->f_type != DTYPE_VNODE) { 2914 fdrop(fp, td); 2915 return (EOPNOTSUPP); 2916 } 2917 2918 vp = fp->f_vnode; 2919 lf.l_whence = SEEK_SET; 2920 lf.l_start = 0; 2921 lf.l_len = 0; 2922 if (uap->how & LOCK_UN) { 2923 lf.l_type = F_UNLCK; 2924 atomic_clear_int(&fp->f_flag, FHASLOCK); 2925 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 2926 goto done2; 2927 } 2928 if (uap->how & LOCK_EX) 2929 lf.l_type = F_WRLCK; 2930 else if (uap->how & LOCK_SH) 2931 lf.l_type = F_RDLCK; 2932 else { 2933 error = EBADF; 2934 goto done2; 2935 } 2936 atomic_set_int(&fp->f_flag, FHASLOCK); 2937 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 2938 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); 2939 done2: 2940 fdrop(fp, td); 2941 return (error); 2942 } 2943 /* 2944 * Duplicate the specified descriptor to a free descriptor. 2945 */ 2946 int 2947 dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, 2948 int openerror, int *indxp) 2949 { 2950 struct filedescent *newfde, *oldfde; 2951 struct file *fp; 2952 int error, indx; 2953 2954 KASSERT(openerror == ENODEV || openerror == ENXIO, 2955 ("unexpected error %d in %s", openerror, __func__)); 2956 2957 /* 2958 * If the to-be-dup'd fd number is greater than the allowed number 2959 * of file descriptors, or the fd to be dup'd has already been 2960 * closed, then reject. 2961 */ 2962 FILEDESC_XLOCK(fdp); 2963 if ((fp = fget_locked(fdp, dfd)) == NULL) { 2964 FILEDESC_XUNLOCK(fdp); 2965 return (EBADF); 2966 } 2967 2968 error = fdalloc(td, 0, &indx); 2969 if (error != 0) { 2970 FILEDESC_XUNLOCK(fdp); 2971 return (error); 2972 } 2973 2974 /* 2975 * There are two cases of interest here. 2976 * 2977 * For ENODEV simply dup (dfd) to file descriptor (indx) and return. 2978 * 2979 * For ENXIO steal away the file structure from (dfd) and store it in 2980 * (indx). (dfd) is effectively closed by this operation. 2981 */ 2982 switch (openerror) { 2983 case ENODEV: 2984 /* 2985 * Check that the mode the file is being opened for is a 2986 * subset of the mode of the existing descriptor. 2987 */ 2988 if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { 2989 fdunused(fdp, indx); 2990 FILEDESC_XUNLOCK(fdp); 2991 return (EACCES); 2992 } 2993 fhold(fp); 2994 newfde = &fdp->fd_ofiles[indx]; 2995 oldfde = &fdp->fd_ofiles[dfd]; 2996 #ifdef CAPABILITIES 2997 seq_write_begin(&newfde->fde_seq); 2998 #endif 2999 memcpy(newfde, oldfde, fde_change_size); 3000 filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps, true); 3001 #ifdef CAPABILITIES 3002 seq_write_end(&newfde->fde_seq); 3003 #endif 3004 break; 3005 case ENXIO: 3006 /* 3007 * Steal away the file pointer from dfd and stuff it into indx. 3008 */ 3009 newfde = &fdp->fd_ofiles[indx]; 3010 oldfde = &fdp->fd_ofiles[dfd]; 3011 #ifdef CAPABILITIES 3012 seq_write_begin(&newfde->fde_seq); 3013 #endif 3014 memcpy(newfde, oldfde, fde_change_size); 3015 oldfde->fde_file = NULL; 3016 fdunused(fdp, dfd); 3017 #ifdef CAPABILITIES 3018 seq_write_end(&newfde->fde_seq); 3019 #endif 3020 break; 3021 } 3022 FILEDESC_XUNLOCK(fdp); 3023 *indxp = indx; 3024 return (0); 3025 } 3026 3027 /* 3028 * This sysctl determines if we will allow a process to chroot(2) if it 3029 * has a directory open: 3030 * 0: disallowed for all processes. 3031 * 1: allowed for processes that were not already chroot(2)'ed. 3032 * 2: allowed for all processes. 3033 */ 3034 3035 static int chroot_allow_open_directories = 1; 3036 3037 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 3038 &chroot_allow_open_directories, 0, 3039 "Allow a process to chroot(2) if it has a directory open"); 3040 3041 /* 3042 * Helper function for raised chroot(2) security function: Refuse if 3043 * any filedescriptors are open directories. 3044 */ 3045 static int 3046 chroot_refuse_vdir_fds(struct filedesc *fdp) 3047 { 3048 struct vnode *vp; 3049 struct file *fp; 3050 int fd; 3051 3052 FILEDESC_LOCK_ASSERT(fdp); 3053 3054 for (fd = 0; fd <= fdp->fd_lastfile; fd++) { 3055 fp = fget_locked(fdp, fd); 3056 if (fp == NULL) 3057 continue; 3058 if (fp->f_type == DTYPE_VNODE) { 3059 vp = fp->f_vnode; 3060 if (vp->v_type == VDIR) 3061 return (EPERM); 3062 } 3063 } 3064 return (0); 3065 } 3066 3067 /* 3068 * Common routine for kern_chroot() and jail_attach(). The caller is 3069 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 3070 * authorize this operation. 3071 */ 3072 int 3073 pwd_chroot(struct thread *td, struct vnode *vp) 3074 { 3075 struct filedesc *fdp; 3076 struct vnode *oldvp; 3077 int error; 3078 3079 fdp = td->td_proc->p_fd; 3080 FILEDESC_XLOCK(fdp); 3081 if (chroot_allow_open_directories == 0 || 3082 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 3083 error = chroot_refuse_vdir_fds(fdp); 3084 if (error != 0) { 3085 FILEDESC_XUNLOCK(fdp); 3086 return (error); 3087 } 3088 } 3089 oldvp = fdp->fd_rdir; 3090 vrefact(vp); 3091 fdp->fd_rdir = vp; 3092 if (fdp->fd_jdir == NULL) { 3093 vrefact(vp); 3094 fdp->fd_jdir = vp; 3095 } 3096 FILEDESC_XUNLOCK(fdp); 3097 vrele(oldvp); 3098 return (0); 3099 } 3100 3101 void 3102 pwd_chdir(struct thread *td, struct vnode *vp) 3103 { 3104 struct filedesc *fdp; 3105 struct vnode *oldvp; 3106 3107 fdp = td->td_proc->p_fd; 3108 FILEDESC_XLOCK(fdp); 3109 VNASSERT(vp->v_usecount > 0, vp, 3110 ("chdir to a vnode with zero usecount")); 3111 oldvp = fdp->fd_cdir; 3112 fdp->fd_cdir = vp; 3113 FILEDESC_XUNLOCK(fdp); 3114 vrele(oldvp); 3115 } 3116 3117 /* 3118 * Scan all active processes and prisons to see if any of them have a current 3119 * or root directory of `olddp'. If so, replace them with the new mount point. 3120 */ 3121 void 3122 mountcheckdirs(struct vnode *olddp, struct vnode *newdp) 3123 { 3124 struct filedesc *fdp; 3125 struct prison *pr; 3126 struct proc *p; 3127 int nrele; 3128 3129 if (vrefcnt(olddp) == 1) 3130 return; 3131 nrele = 0; 3132 sx_slock(&allproc_lock); 3133 FOREACH_PROC_IN_SYSTEM(p) { 3134 PROC_LOCK(p); 3135 fdp = fdhold(p); 3136 PROC_UNLOCK(p); 3137 if (fdp == NULL) 3138 continue; 3139 FILEDESC_XLOCK(fdp); 3140 if (fdp->fd_cdir == olddp) { 3141 vrefact(newdp); 3142 fdp->fd_cdir = newdp; 3143 nrele++; 3144 } 3145 if (fdp->fd_rdir == olddp) { 3146 vrefact(newdp); 3147 fdp->fd_rdir = newdp; 3148 nrele++; 3149 } 3150 if (fdp->fd_jdir == olddp) { 3151 vrefact(newdp); 3152 fdp->fd_jdir = newdp; 3153 nrele++; 3154 } 3155 FILEDESC_XUNLOCK(fdp); 3156 fddrop(fdp); 3157 } 3158 sx_sunlock(&allproc_lock); 3159 if (rootvnode == olddp) { 3160 vrefact(newdp); 3161 rootvnode = newdp; 3162 nrele++; 3163 } 3164 mtx_lock(&prison0.pr_mtx); 3165 if (prison0.pr_root == olddp) { 3166 vrefact(newdp); 3167 prison0.pr_root = newdp; 3168 nrele++; 3169 } 3170 mtx_unlock(&prison0.pr_mtx); 3171 sx_slock(&allprison_lock); 3172 TAILQ_FOREACH(pr, &allprison, pr_list) { 3173 mtx_lock(&pr->pr_mtx); 3174 if (pr->pr_root == olddp) { 3175 vrefact(newdp); 3176 pr->pr_root = newdp; 3177 nrele++; 3178 } 3179 mtx_unlock(&pr->pr_mtx); 3180 } 3181 sx_sunlock(&allprison_lock); 3182 while (nrele--) 3183 vrele(olddp); 3184 } 3185 3186 struct filedesc_to_leader * 3187 filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader) 3188 { 3189 struct filedesc_to_leader *fdtol; 3190 3191 fdtol = malloc(sizeof(struct filedesc_to_leader), 3192 M_FILEDESC_TO_LEADER, M_WAITOK); 3193 fdtol->fdl_refcount = 1; 3194 fdtol->fdl_holdcount = 0; 3195 fdtol->fdl_wakeup = 0; 3196 fdtol->fdl_leader = leader; 3197 if (old != NULL) { 3198 FILEDESC_XLOCK(fdp); 3199 fdtol->fdl_next = old->fdl_next; 3200 fdtol->fdl_prev = old; 3201 old->fdl_next = fdtol; 3202 fdtol->fdl_next->fdl_prev = fdtol; 3203 FILEDESC_XUNLOCK(fdp); 3204 } else { 3205 fdtol->fdl_next = fdtol; 3206 fdtol->fdl_prev = fdtol; 3207 } 3208 return (fdtol); 3209 } 3210 3211 static int 3212 sysctl_kern_proc_nfds(SYSCTL_HANDLER_ARGS) 3213 { 3214 struct filedesc *fdp; 3215 int i, count, slots; 3216 3217 if (*(int *)arg1 != 0) 3218 return (EINVAL); 3219 3220 fdp = curproc->p_fd; 3221 count = 0; 3222 FILEDESC_SLOCK(fdp); 3223 slots = NDSLOTS(fdp->fd_lastfile + 1); 3224 for (i = 0; i < slots; i++) 3225 count += bitcountl(fdp->fd_map[i]); 3226 FILEDESC_SUNLOCK(fdp); 3227 3228 return (SYSCTL_OUT(req, &count, sizeof(count))); 3229 } 3230 3231 static SYSCTL_NODE(_kern_proc, KERN_PROC_NFDS, nfds, 3232 CTLFLAG_RD|CTLFLAG_CAPRD|CTLFLAG_MPSAFE, sysctl_kern_proc_nfds, 3233 "Number of open file descriptors"); 3234 3235 /* 3236 * Get file structures globally. 3237 */ 3238 static int 3239 sysctl_kern_file(SYSCTL_HANDLER_ARGS) 3240 { 3241 struct xfile xf; 3242 struct filedesc *fdp; 3243 struct file *fp; 3244 struct proc *p; 3245 int error, n; 3246 3247 error = sysctl_wire_old_buffer(req, 0); 3248 if (error != 0) 3249 return (error); 3250 if (req->oldptr == NULL) { 3251 n = 0; 3252 sx_slock(&allproc_lock); 3253 FOREACH_PROC_IN_SYSTEM(p) { 3254 PROC_LOCK(p); 3255 if (p->p_state == PRS_NEW) { 3256 PROC_UNLOCK(p); 3257 continue; 3258 } 3259 fdp = fdhold(p); 3260 PROC_UNLOCK(p); 3261 if (fdp == NULL) 3262 continue; 3263 /* overestimates sparse tables. */ 3264 if (fdp->fd_lastfile > 0) 3265 n += fdp->fd_lastfile; 3266 fddrop(fdp); 3267 } 3268 sx_sunlock(&allproc_lock); 3269 return (SYSCTL_OUT(req, 0, n * sizeof(xf))); 3270 } 3271 error = 0; 3272 bzero(&xf, sizeof(xf)); 3273 xf.xf_size = sizeof(xf); 3274 sx_slock(&allproc_lock); 3275 FOREACH_PROC_IN_SYSTEM(p) { 3276 PROC_LOCK(p); 3277 if (p->p_state == PRS_NEW) { 3278 PROC_UNLOCK(p); 3279 continue; 3280 } 3281 if (p_cansee(req->td, p) != 0) { 3282 PROC_UNLOCK(p); 3283 continue; 3284 } 3285 xf.xf_pid = p->p_pid; 3286 xf.xf_uid = p->p_ucred->cr_uid; 3287 fdp = fdhold(p); 3288 PROC_UNLOCK(p); 3289 if (fdp == NULL) 3290 continue; 3291 FILEDESC_SLOCK(fdp); 3292 for (n = 0; fdp->fd_refcnt > 0 && n <= fdp->fd_lastfile; ++n) { 3293 if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) 3294 continue; 3295 xf.xf_fd = n; 3296 xf.xf_file = fp; 3297 xf.xf_data = fp->f_data; 3298 xf.xf_vnode = fp->f_vnode; 3299 xf.xf_type = fp->f_type; 3300 xf.xf_count = fp->f_count; 3301 xf.xf_msgcount = 0; 3302 xf.xf_offset = foffset_get(fp); 3303 xf.xf_flag = fp->f_flag; 3304 error = SYSCTL_OUT(req, &xf, sizeof(xf)); 3305 if (error) 3306 break; 3307 } 3308 FILEDESC_SUNLOCK(fdp); 3309 fddrop(fdp); 3310 if (error) 3311 break; 3312 } 3313 sx_sunlock(&allproc_lock); 3314 return (error); 3315 } 3316 3317 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD|CTLFLAG_MPSAFE, 3318 0, 0, sysctl_kern_file, "S,xfile", "Entire file table"); 3319 3320 #ifdef KINFO_FILE_SIZE 3321 CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE); 3322 #endif 3323 3324 static int 3325 xlate_fflags(int fflags) 3326 { 3327 static const struct { 3328 int fflag; 3329 int kf_fflag; 3330 } fflags_table[] = { 3331 { FAPPEND, KF_FLAG_APPEND }, 3332 { FASYNC, KF_FLAG_ASYNC }, 3333 { FFSYNC, KF_FLAG_FSYNC }, 3334 { FHASLOCK, KF_FLAG_HASLOCK }, 3335 { FNONBLOCK, KF_FLAG_NONBLOCK }, 3336 { FREAD, KF_FLAG_READ }, 3337 { FWRITE, KF_FLAG_WRITE }, 3338 { O_CREAT, KF_FLAG_CREAT }, 3339 { O_DIRECT, KF_FLAG_DIRECT }, 3340 { O_EXCL, KF_FLAG_EXCL }, 3341 { O_EXEC, KF_FLAG_EXEC }, 3342 { O_EXLOCK, KF_FLAG_EXLOCK }, 3343 { O_NOFOLLOW, KF_FLAG_NOFOLLOW }, 3344 { O_SHLOCK, KF_FLAG_SHLOCK }, 3345 { O_TRUNC, KF_FLAG_TRUNC } 3346 }; 3347 unsigned int i; 3348 int kflags; 3349 3350 kflags = 0; 3351 for (i = 0; i < nitems(fflags_table); i++) 3352 if (fflags & fflags_table[i].fflag) 3353 kflags |= fflags_table[i].kf_fflag; 3354 return (kflags); 3355 } 3356 3357 /* Trim unused data from kf_path by truncating the structure size. */ 3358 static void 3359 pack_kinfo(struct kinfo_file *kif) 3360 { 3361 3362 kif->kf_structsize = offsetof(struct kinfo_file, kf_path) + 3363 strlen(kif->kf_path) + 1; 3364 kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t)); 3365 } 3366 3367 static void 3368 export_file_to_kinfo(struct file *fp, int fd, cap_rights_t *rightsp, 3369 struct kinfo_file *kif, struct filedesc *fdp, int flags) 3370 { 3371 int error; 3372 3373 bzero(kif, sizeof(*kif)); 3374 3375 /* Set a default type to allow for empty fill_kinfo() methods. */ 3376 kif->kf_type = KF_TYPE_UNKNOWN; 3377 kif->kf_flags = xlate_fflags(fp->f_flag); 3378 if (rightsp != NULL) 3379 kif->kf_cap_rights = *rightsp; 3380 else 3381 cap_rights_init(&kif->kf_cap_rights); 3382 kif->kf_fd = fd; 3383 kif->kf_ref_count = fp->f_count; 3384 kif->kf_offset = foffset_get(fp); 3385 3386 /* 3387 * This may drop the filedesc lock, so the 'fp' cannot be 3388 * accessed after this call. 3389 */ 3390 error = fo_fill_kinfo(fp, kif, fdp); 3391 if (error == 0) 3392 kif->kf_status |= KF_ATTR_VALID; 3393 if ((flags & KERN_FILEDESC_PACK_KINFO) != 0) 3394 pack_kinfo(kif); 3395 else 3396 kif->kf_structsize = roundup2(sizeof(*kif), sizeof(uint64_t)); 3397 } 3398 3399 static void 3400 export_vnode_to_kinfo(struct vnode *vp, int fd, int fflags, 3401 struct kinfo_file *kif, int flags) 3402 { 3403 int error; 3404 3405 bzero(kif, sizeof(*kif)); 3406 3407 kif->kf_type = KF_TYPE_VNODE; 3408 error = vn_fill_kinfo_vnode(vp, kif); 3409 if (error == 0) 3410 kif->kf_status |= KF_ATTR_VALID; 3411 kif->kf_flags = xlate_fflags(fflags); 3412 cap_rights_init(&kif->kf_cap_rights); 3413 kif->kf_fd = fd; 3414 kif->kf_ref_count = -1; 3415 kif->kf_offset = -1; 3416 if ((flags & KERN_FILEDESC_PACK_KINFO) != 0) 3417 pack_kinfo(kif); 3418 else 3419 kif->kf_structsize = roundup2(sizeof(*kif), sizeof(uint64_t)); 3420 vrele(vp); 3421 } 3422 3423 struct export_fd_buf { 3424 struct filedesc *fdp; 3425 struct sbuf *sb; 3426 ssize_t remainder; 3427 struct kinfo_file kif; 3428 int flags; 3429 }; 3430 3431 static int 3432 export_kinfo_to_sb(struct export_fd_buf *efbuf) 3433 { 3434 struct kinfo_file *kif; 3435 3436 kif = &efbuf->kif; 3437 if (efbuf->remainder != -1) { 3438 if (efbuf->remainder < kif->kf_structsize) { 3439 /* Terminate export. */ 3440 efbuf->remainder = 0; 3441 return (0); 3442 } 3443 efbuf->remainder -= kif->kf_structsize; 3444 } 3445 return (sbuf_bcat(efbuf->sb, kif, kif->kf_structsize) == 0 ? 0 : ENOMEM); 3446 } 3447 3448 static int 3449 export_file_to_sb(struct file *fp, int fd, cap_rights_t *rightsp, 3450 struct export_fd_buf *efbuf) 3451 { 3452 int error; 3453 3454 if (efbuf->remainder == 0) 3455 return (0); 3456 export_file_to_kinfo(fp, fd, rightsp, &efbuf->kif, efbuf->fdp, 3457 efbuf->flags); 3458 FILEDESC_SUNLOCK(efbuf->fdp); 3459 error = export_kinfo_to_sb(efbuf); 3460 FILEDESC_SLOCK(efbuf->fdp); 3461 return (error); 3462 } 3463 3464 static int 3465 export_vnode_to_sb(struct vnode *vp, int fd, int fflags, 3466 struct export_fd_buf *efbuf) 3467 { 3468 int error; 3469 3470 if (efbuf->remainder == 0) 3471 return (0); 3472 if (efbuf->fdp != NULL) 3473 FILEDESC_SUNLOCK(efbuf->fdp); 3474 export_vnode_to_kinfo(vp, fd, fflags, &efbuf->kif, efbuf->flags); 3475 error = export_kinfo_to_sb(efbuf); 3476 if (efbuf->fdp != NULL) 3477 FILEDESC_SLOCK(efbuf->fdp); 3478 return (error); 3479 } 3480 3481 /* 3482 * Store a process file descriptor information to sbuf. 3483 * 3484 * Takes a locked proc as argument, and returns with the proc unlocked. 3485 */ 3486 int 3487 kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, 3488 int flags) 3489 { 3490 struct file *fp; 3491 struct filedesc *fdp; 3492 struct export_fd_buf *efbuf; 3493 struct vnode *cttyvp, *textvp, *tracevp; 3494 int error, i; 3495 cap_rights_t rights; 3496 3497 PROC_LOCK_ASSERT(p, MA_OWNED); 3498 3499 /* ktrace vnode */ 3500 tracevp = p->p_tracevp; 3501 if (tracevp != NULL) 3502 vrefact(tracevp); 3503 /* text vnode */ 3504 textvp = p->p_textvp; 3505 if (textvp != NULL) 3506 vrefact(textvp); 3507 /* Controlling tty. */ 3508 cttyvp = NULL; 3509 if (p->p_pgrp != NULL && p->p_pgrp->pg_session != NULL) { 3510 cttyvp = p->p_pgrp->pg_session->s_ttyvp; 3511 if (cttyvp != NULL) 3512 vrefact(cttyvp); 3513 } 3514 fdp = fdhold(p); 3515 PROC_UNLOCK(p); 3516 efbuf = malloc(sizeof(*efbuf), M_TEMP, M_WAITOK); 3517 efbuf->fdp = NULL; 3518 efbuf->sb = sb; 3519 efbuf->remainder = maxlen; 3520 efbuf->flags = flags; 3521 if (tracevp != NULL) 3522 export_vnode_to_sb(tracevp, KF_FD_TYPE_TRACE, FREAD | FWRITE, 3523 efbuf); 3524 if (textvp != NULL) 3525 export_vnode_to_sb(textvp, KF_FD_TYPE_TEXT, FREAD, efbuf); 3526 if (cttyvp != NULL) 3527 export_vnode_to_sb(cttyvp, KF_FD_TYPE_CTTY, FREAD | FWRITE, 3528 efbuf); 3529 error = 0; 3530 if (fdp == NULL) 3531 goto fail; 3532 efbuf->fdp = fdp; 3533 FILEDESC_SLOCK(fdp); 3534 /* working directory */ 3535 if (fdp->fd_cdir != NULL) { 3536 vrefact(fdp->fd_cdir); 3537 export_vnode_to_sb(fdp->fd_cdir, KF_FD_TYPE_CWD, FREAD, efbuf); 3538 } 3539 /* root directory */ 3540 if (fdp->fd_rdir != NULL) { 3541 vrefact(fdp->fd_rdir); 3542 export_vnode_to_sb(fdp->fd_rdir, KF_FD_TYPE_ROOT, FREAD, efbuf); 3543 } 3544 /* jail directory */ 3545 if (fdp->fd_jdir != NULL) { 3546 vrefact(fdp->fd_jdir); 3547 export_vnode_to_sb(fdp->fd_jdir, KF_FD_TYPE_JAIL, FREAD, efbuf); 3548 } 3549 for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) { 3550 if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) 3551 continue; 3552 #ifdef CAPABILITIES 3553 rights = *cap_rights(fdp, i); 3554 #else /* !CAPABILITIES */ 3555 cap_rights_init(&rights); 3556 #endif 3557 /* 3558 * Create sysctl entry. It is OK to drop the filedesc 3559 * lock inside of export_file_to_sb() as we will 3560 * re-validate and re-evaluate its properties when the 3561 * loop continues. 3562 */ 3563 error = export_file_to_sb(fp, i, &rights, efbuf); 3564 if (error != 0 || efbuf->remainder == 0) 3565 break; 3566 } 3567 FILEDESC_SUNLOCK(fdp); 3568 fddrop(fdp); 3569 fail: 3570 free(efbuf, M_TEMP); 3571 return (error); 3572 } 3573 3574 #define FILEDESC_SBUF_SIZE (sizeof(struct kinfo_file) * 5) 3575 3576 /* 3577 * Get per-process file descriptors for use by procstat(1), et al. 3578 */ 3579 static int 3580 sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) 3581 { 3582 struct sbuf sb; 3583 struct proc *p; 3584 ssize_t maxlen; 3585 int error, error2, *name; 3586 3587 name = (int *)arg1; 3588 3589 sbuf_new_for_sysctl(&sb, NULL, FILEDESC_SBUF_SIZE, req); 3590 sbuf_clear_flags(&sb, SBUF_INCLUDENUL); 3591 error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p); 3592 if (error != 0) { 3593 sbuf_delete(&sb); 3594 return (error); 3595 } 3596 maxlen = req->oldptr != NULL ? req->oldlen : -1; 3597 error = kern_proc_filedesc_out(p, &sb, maxlen, 3598 KERN_FILEDESC_PACK_KINFO); 3599 error2 = sbuf_finish(&sb); 3600 sbuf_delete(&sb); 3601 return (error != 0 ? error : error2); 3602 } 3603 3604 #ifdef COMPAT_FREEBSD7 3605 #ifdef KINFO_OFILE_SIZE 3606 CTASSERT(sizeof(struct kinfo_ofile) == KINFO_OFILE_SIZE); 3607 #endif 3608 3609 static void 3610 kinfo_to_okinfo(struct kinfo_file *kif, struct kinfo_ofile *okif) 3611 { 3612 3613 okif->kf_structsize = sizeof(*okif); 3614 okif->kf_type = kif->kf_type; 3615 okif->kf_fd = kif->kf_fd; 3616 okif->kf_ref_count = kif->kf_ref_count; 3617 okif->kf_flags = kif->kf_flags & (KF_FLAG_READ | KF_FLAG_WRITE | 3618 KF_FLAG_APPEND | KF_FLAG_ASYNC | KF_FLAG_FSYNC | KF_FLAG_NONBLOCK | 3619 KF_FLAG_DIRECT | KF_FLAG_HASLOCK); 3620 okif->kf_offset = kif->kf_offset; 3621 if (kif->kf_type == KF_TYPE_VNODE) 3622 okif->kf_vnode_type = kif->kf_un.kf_file.kf_file_type; 3623 else 3624 okif->kf_vnode_type = KF_VTYPE_VNON; 3625 strlcpy(okif->kf_path, kif->kf_path, sizeof(okif->kf_path)); 3626 if (kif->kf_type == KF_TYPE_SOCKET) { 3627 okif->kf_sock_domain = kif->kf_un.kf_sock.kf_sock_domain0; 3628 okif->kf_sock_type = kif->kf_un.kf_sock.kf_sock_type0; 3629 okif->kf_sock_protocol = kif->kf_un.kf_sock.kf_sock_protocol0; 3630 okif->kf_sa_local = kif->kf_un.kf_sock.kf_sa_local; 3631 okif->kf_sa_peer = kif->kf_un.kf_sock.kf_sa_peer; 3632 } else { 3633 okif->kf_sa_local.ss_family = AF_UNSPEC; 3634 okif->kf_sa_peer.ss_family = AF_UNSPEC; 3635 } 3636 } 3637 3638 static int 3639 export_vnode_for_osysctl(struct vnode *vp, int type, struct kinfo_file *kif, 3640 struct kinfo_ofile *okif, struct filedesc *fdp, struct sysctl_req *req) 3641 { 3642 int error; 3643 3644 vrefact(vp); 3645 FILEDESC_SUNLOCK(fdp); 3646 export_vnode_to_kinfo(vp, type, 0, kif, KERN_FILEDESC_PACK_KINFO); 3647 kinfo_to_okinfo(kif, okif); 3648 error = SYSCTL_OUT(req, okif, sizeof(*okif)); 3649 FILEDESC_SLOCK(fdp); 3650 return (error); 3651 } 3652 3653 /* 3654 * Get per-process file descriptors for use by procstat(1), et al. 3655 */ 3656 static int 3657 sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS) 3658 { 3659 struct kinfo_ofile *okif; 3660 struct kinfo_file *kif; 3661 struct filedesc *fdp; 3662 int error, i, *name; 3663 struct file *fp; 3664 struct proc *p; 3665 3666 name = (int *)arg1; 3667 error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p); 3668 if (error != 0) 3669 return (error); 3670 fdp = fdhold(p); 3671 PROC_UNLOCK(p); 3672 if (fdp == NULL) 3673 return (ENOENT); 3674 kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK); 3675 okif = malloc(sizeof(*okif), M_TEMP, M_WAITOK); 3676 FILEDESC_SLOCK(fdp); 3677 if (fdp->fd_cdir != NULL) 3678 export_vnode_for_osysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif, 3679 okif, fdp, req); 3680 if (fdp->fd_rdir != NULL) 3681 export_vnode_for_osysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif, 3682 okif, fdp, req); 3683 if (fdp->fd_jdir != NULL) 3684 export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif, 3685 okif, fdp, req); 3686 for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) { 3687 if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) 3688 continue; 3689 export_file_to_kinfo(fp, i, NULL, kif, fdp, 3690 KERN_FILEDESC_PACK_KINFO); 3691 FILEDESC_SUNLOCK(fdp); 3692 kinfo_to_okinfo(kif, okif); 3693 error = SYSCTL_OUT(req, okif, sizeof(*okif)); 3694 FILEDESC_SLOCK(fdp); 3695 if (error) 3696 break; 3697 } 3698 FILEDESC_SUNLOCK(fdp); 3699 fddrop(fdp); 3700 free(kif, M_TEMP); 3701 free(okif, M_TEMP); 3702 return (0); 3703 } 3704 3705 static SYSCTL_NODE(_kern_proc, KERN_PROC_OFILEDESC, ofiledesc, 3706 CTLFLAG_RD|CTLFLAG_MPSAFE, sysctl_kern_proc_ofiledesc, 3707 "Process ofiledesc entries"); 3708 #endif /* COMPAT_FREEBSD7 */ 3709 3710 int 3711 vntype_to_kinfo(int vtype) 3712 { 3713 struct { 3714 int vtype; 3715 int kf_vtype; 3716 } vtypes_table[] = { 3717 { VBAD, KF_VTYPE_VBAD }, 3718 { VBLK, KF_VTYPE_VBLK }, 3719 { VCHR, KF_VTYPE_VCHR }, 3720 { VDIR, KF_VTYPE_VDIR }, 3721 { VFIFO, KF_VTYPE_VFIFO }, 3722 { VLNK, KF_VTYPE_VLNK }, 3723 { VNON, KF_VTYPE_VNON }, 3724 { VREG, KF_VTYPE_VREG }, 3725 { VSOCK, KF_VTYPE_VSOCK } 3726 }; 3727 unsigned int i; 3728 3729 /* 3730 * Perform vtype translation. 3731 */ 3732 for (i = 0; i < nitems(vtypes_table); i++) 3733 if (vtypes_table[i].vtype == vtype) 3734 return (vtypes_table[i].kf_vtype); 3735 3736 return (KF_VTYPE_UNKNOWN); 3737 } 3738 3739 static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc, 3740 CTLFLAG_RD|CTLFLAG_MPSAFE, sysctl_kern_proc_filedesc, 3741 "Process filedesc entries"); 3742 3743 /* 3744 * Store a process current working directory information to sbuf. 3745 * 3746 * Takes a locked proc as argument, and returns with the proc unlocked. 3747 */ 3748 int 3749 kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen) 3750 { 3751 struct filedesc *fdp; 3752 struct export_fd_buf *efbuf; 3753 int error; 3754 3755 PROC_LOCK_ASSERT(p, MA_OWNED); 3756 3757 fdp = fdhold(p); 3758 PROC_UNLOCK(p); 3759 if (fdp == NULL) 3760 return (EINVAL); 3761 3762 efbuf = malloc(sizeof(*efbuf), M_TEMP, M_WAITOK); 3763 efbuf->fdp = fdp; 3764 efbuf->sb = sb; 3765 efbuf->remainder = maxlen; 3766 3767 FILEDESC_SLOCK(fdp); 3768 if (fdp->fd_cdir == NULL) 3769 error = EINVAL; 3770 else { 3771 vrefact(fdp->fd_cdir); 3772 error = export_vnode_to_sb(fdp->fd_cdir, KF_FD_TYPE_CWD, 3773 FREAD, efbuf); 3774 } 3775 FILEDESC_SUNLOCK(fdp); 3776 fddrop(fdp); 3777 free(efbuf, M_TEMP); 3778 return (error); 3779 } 3780 3781 /* 3782 * Get per-process current working directory. 3783 */ 3784 static int 3785 sysctl_kern_proc_cwd(SYSCTL_HANDLER_ARGS) 3786 { 3787 struct sbuf sb; 3788 struct proc *p; 3789 ssize_t maxlen; 3790 int error, error2, *name; 3791 3792 name = (int *)arg1; 3793 3794 sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_file), req); 3795 sbuf_clear_flags(&sb, SBUF_INCLUDENUL); 3796 error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p); 3797 if (error != 0) { 3798 sbuf_delete(&sb); 3799 return (error); 3800 } 3801 maxlen = req->oldptr != NULL ? req->oldlen : -1; 3802 error = kern_proc_cwd_out(p, &sb, maxlen); 3803 error2 = sbuf_finish(&sb); 3804 sbuf_delete(&sb); 3805 return (error != 0 ? error : error2); 3806 } 3807 3808 static SYSCTL_NODE(_kern_proc, KERN_PROC_CWD, cwd, CTLFLAG_RD|CTLFLAG_MPSAFE, 3809 sysctl_kern_proc_cwd, "Process current working directory"); 3810 3811 #ifdef DDB 3812 /* 3813 * For the purposes of debugging, generate a human-readable string for the 3814 * file type. 3815 */ 3816 static const char * 3817 file_type_to_name(short type) 3818 { 3819 3820 switch (type) { 3821 case 0: 3822 return ("zero"); 3823 case DTYPE_VNODE: 3824 return ("vnode"); 3825 case DTYPE_SOCKET: 3826 return ("socket"); 3827 case DTYPE_PIPE: 3828 return ("pipe"); 3829 case DTYPE_FIFO: 3830 return ("fifo"); 3831 case DTYPE_KQUEUE: 3832 return ("kqueue"); 3833 case DTYPE_CRYPTO: 3834 return ("crypto"); 3835 case DTYPE_MQUEUE: 3836 return ("mqueue"); 3837 case DTYPE_SHM: 3838 return ("shm"); 3839 case DTYPE_SEM: 3840 return ("ksem"); 3841 case DTYPE_PTS: 3842 return ("pts"); 3843 case DTYPE_DEV: 3844 return ("dev"); 3845 case DTYPE_PROCDESC: 3846 return ("proc"); 3847 case DTYPE_LINUXEFD: 3848 return ("levent"); 3849 case DTYPE_LINUXTFD: 3850 return ("ltimer"); 3851 default: 3852 return ("unkn"); 3853 } 3854 } 3855 3856 /* 3857 * For the purposes of debugging, identify a process (if any, perhaps one of 3858 * many) that references the passed file in its file descriptor array. Return 3859 * NULL if none. 3860 */ 3861 static struct proc * 3862 file_to_first_proc(struct file *fp) 3863 { 3864 struct filedesc *fdp; 3865 struct proc *p; 3866 int n; 3867 3868 FOREACH_PROC_IN_SYSTEM(p) { 3869 if (p->p_state == PRS_NEW) 3870 continue; 3871 fdp = p->p_fd; 3872 if (fdp == NULL) 3873 continue; 3874 for (n = 0; n <= fdp->fd_lastfile; n++) { 3875 if (fp == fdp->fd_ofiles[n].fde_file) 3876 return (p); 3877 } 3878 } 3879 return (NULL); 3880 } 3881 3882 static void 3883 db_print_file(struct file *fp, int header) 3884 { 3885 #define XPTRWIDTH ((int)howmany(sizeof(void *) * NBBY, 4)) 3886 struct proc *p; 3887 3888 if (header) 3889 db_printf("%*s %6s %*s %8s %4s %5s %6s %*s %5s %s\n", 3890 XPTRWIDTH, "File", "Type", XPTRWIDTH, "Data", "Flag", 3891 "GCFl", "Count", "MCount", XPTRWIDTH, "Vnode", "FPID", 3892 "FCmd"); 3893 p = file_to_first_proc(fp); 3894 db_printf("%*p %6s %*p %08x %04x %5d %6d %*p %5d %s\n", XPTRWIDTH, 3895 fp, file_type_to_name(fp->f_type), XPTRWIDTH, fp->f_data, 3896 fp->f_flag, 0, fp->f_count, 0, XPTRWIDTH, fp->f_vnode, 3897 p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-"); 3898 3899 #undef XPTRWIDTH 3900 } 3901 3902 DB_SHOW_COMMAND(file, db_show_file) 3903 { 3904 struct file *fp; 3905 3906 if (!have_addr) { 3907 db_printf("usage: show file <addr>\n"); 3908 return; 3909 } 3910 fp = (struct file *)addr; 3911 db_print_file(fp, 1); 3912 } 3913 3914 DB_SHOW_COMMAND(files, db_show_files) 3915 { 3916 struct filedesc *fdp; 3917 struct file *fp; 3918 struct proc *p; 3919 int header; 3920 int n; 3921 3922 header = 1; 3923 FOREACH_PROC_IN_SYSTEM(p) { 3924 if (p->p_state == PRS_NEW) 3925 continue; 3926 if ((fdp = p->p_fd) == NULL) 3927 continue; 3928 for (n = 0; n <= fdp->fd_lastfile; ++n) { 3929 if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) 3930 continue; 3931 db_print_file(fp, header); 3932 header = 0; 3933 } 3934 } 3935 } 3936 #endif 3937 3938 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, 3939 &maxfilesperproc, 0, "Maximum files allowed open per process"); 3940 3941 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, 3942 &maxfiles, 0, "Maximum number of files"); 3943 3944 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, 3945 __DEVOLATILE(int *, &openfiles), 0, "System-wide number of open files"); 3946 3947 /* ARGSUSED*/ 3948 static void 3949 filelistinit(void *dummy) 3950 { 3951 3952 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, 3953 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 3954 filedesc0_zone = uma_zcreate("filedesc0", sizeof(struct filedesc0), 3955 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3956 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); 3957 } 3958 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL); 3959 3960 /*-------------------------------------------------------------------*/ 3961 3962 static int 3963 badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, 3964 int flags, struct thread *td) 3965 { 3966 3967 return (EBADF); 3968 } 3969 3970 static int 3971 badfo_truncate(struct file *fp, off_t length, struct ucred *active_cred, 3972 struct thread *td) 3973 { 3974 3975 return (EINVAL); 3976 } 3977 3978 static int 3979 badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, 3980 struct thread *td) 3981 { 3982 3983 return (EBADF); 3984 } 3985 3986 static int 3987 badfo_poll(struct file *fp, int events, struct ucred *active_cred, 3988 struct thread *td) 3989 { 3990 3991 return (0); 3992 } 3993 3994 static int 3995 badfo_kqfilter(struct file *fp, struct knote *kn) 3996 { 3997 3998 return (EBADF); 3999 } 4000 4001 static int 4002 badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, 4003 struct thread *td) 4004 { 4005 4006 return (EBADF); 4007 } 4008 4009 static int 4010 badfo_close(struct file *fp, struct thread *td) 4011 { 4012 4013 return (0); 4014 } 4015 4016 static int 4017 badfo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, 4018 struct thread *td) 4019 { 4020 4021 return (EBADF); 4022 } 4023 4024 static int 4025 badfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, 4026 struct thread *td) 4027 { 4028 4029 return (EBADF); 4030 } 4031 4032 static int 4033 badfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, 4034 struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, 4035 struct thread *td) 4036 { 4037 4038 return (EBADF); 4039 } 4040 4041 static int 4042 badfo_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 4043 { 4044 4045 return (0); 4046 } 4047 4048 struct fileops badfileops = { 4049 .fo_read = badfo_readwrite, 4050 .fo_write = badfo_readwrite, 4051 .fo_truncate = badfo_truncate, 4052 .fo_ioctl = badfo_ioctl, 4053 .fo_poll = badfo_poll, 4054 .fo_kqfilter = badfo_kqfilter, 4055 .fo_stat = badfo_stat, 4056 .fo_close = badfo_close, 4057 .fo_chmod = badfo_chmod, 4058 .fo_chown = badfo_chown, 4059 .fo_sendfile = badfo_sendfile, 4060 .fo_fill_kinfo = badfo_fill_kinfo, 4061 }; 4062 4063 int 4064 invfo_rdwr(struct file *fp, struct uio *uio, struct ucred *active_cred, 4065 int flags, struct thread *td) 4066 { 4067 4068 return (EOPNOTSUPP); 4069 } 4070 4071 int 4072 invfo_truncate(struct file *fp, off_t length, struct ucred *active_cred, 4073 struct thread *td) 4074 { 4075 4076 return (EINVAL); 4077 } 4078 4079 int 4080 invfo_ioctl(struct file *fp, u_long com, void *data, 4081 struct ucred *active_cred, struct thread *td) 4082 { 4083 4084 return (ENOTTY); 4085 } 4086 4087 int 4088 invfo_poll(struct file *fp, int events, struct ucred *active_cred, 4089 struct thread *td) 4090 { 4091 4092 return (poll_no_poll(events)); 4093 } 4094 4095 int 4096 invfo_kqfilter(struct file *fp, struct knote *kn) 4097 { 4098 4099 return (EINVAL); 4100 } 4101 4102 int 4103 invfo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, 4104 struct thread *td) 4105 { 4106 4107 return (EINVAL); 4108 } 4109 4110 int 4111 invfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, 4112 struct thread *td) 4113 { 4114 4115 return (EINVAL); 4116 } 4117 4118 int 4119 invfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, 4120 struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, 4121 struct thread *td) 4122 { 4123 4124 return (EINVAL); 4125 } 4126 4127 /*-------------------------------------------------------------------*/ 4128 4129 /* 4130 * File Descriptor pseudo-device driver (/dev/fd/). 4131 * 4132 * Opening minor device N dup()s the file (if any) connected to file 4133 * descriptor N belonging to the calling process. Note that this driver 4134 * consists of only the ``open()'' routine, because all subsequent 4135 * references to this file will be direct to the other driver. 4136 * 4137 * XXX: we could give this one a cloning event handler if necessary. 4138 */ 4139 4140 /* ARGSUSED */ 4141 static int 4142 fdopen(struct cdev *dev, int mode, int type, struct thread *td) 4143 { 4144 4145 /* 4146 * XXX Kludge: set curthread->td_dupfd to contain the value of the 4147 * the file descriptor being sought for duplication. The error 4148 * return ensures that the vnode for this device will be released 4149 * by vn_open. Open will detect this special error and take the 4150 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 4151 * will simply report the error. 4152 */ 4153 td->td_dupfd = dev2unit(dev); 4154 return (ENODEV); 4155 } 4156 4157 static struct cdevsw fildesc_cdevsw = { 4158 .d_version = D_VERSION, 4159 .d_open = fdopen, 4160 .d_name = "FD", 4161 }; 4162 4163 static void 4164 fildesc_drvinit(void *unused) 4165 { 4166 struct cdev *dev; 4167 4168 dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 0, NULL, 4169 UID_ROOT, GID_WHEEL, 0666, "fd/0"); 4170 make_dev_alias(dev, "stdin"); 4171 dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 1, NULL, 4172 UID_ROOT, GID_WHEEL, 0666, "fd/1"); 4173 make_dev_alias(dev, "stdout"); 4174 dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 2, NULL, 4175 UID_ROOT, GID_WHEEL, 0666, "fd/2"); 4176 make_dev_alias(dev, "stderr"); 4177 } 4178 4179 SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL); 4180