1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include "opt_capsicum.h" 38 #include "opt_ktrace.h" 39 40 #define EXTERR_CATEGORY EXTERR_CAT_FILEDESC 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/sysproto.h> 44 #include <sys/capsicum.h> 45 #include <sys/exterrvar.h> 46 #include <sys/filedesc.h> 47 #include <sys/filio.h> 48 #include <sys/fcntl.h> 49 #include <sys/file.h> 50 #include <sys/inotify.h> 51 #include <sys/lock.h> 52 #include <sys/proc.h> 53 #include <sys/signalvar.h> 54 #include <sys/protosw.h> 55 #include <sys/socketvar.h> 56 #include <sys/uio.h> 57 #include <sys/eventfd.h> 58 #include <sys/kernel.h> 59 #include <sys/ktr.h> 60 #include <sys/limits.h> 61 #include <sys/malloc.h> 62 #include <sys/poll.h> 63 #include <sys/resourcevar.h> 64 #include <sys/selinfo.h> 65 #include <sys/sleepqueue.h> 66 #include <sys/specialfd.h> 67 #include <sys/syscallsubr.h> 68 #include <sys/sysctl.h> 69 #include <sys/sysent.h> 70 #include <sys/vnode.h> 71 #include <sys/unistd.h> 72 #include <sys/bio.h> 73 #include <sys/buf.h> 74 #include <sys/condvar.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <security/audit/audit.h> 80 81 /* 82 * The following macro defines how many bytes will be allocated from 83 * the stack instead of memory allocated when passing the IOCTL data 84 * structures from userspace and to the kernel. Some IOCTLs having 85 * small data structures are used very frequently and this small 86 * buffer on the stack gives a significant speedup improvement for 87 * those requests. The value of this define should be greater or equal 88 * to 64 bytes and should also be power of two. The data structure is 89 * currently hard-aligned to a 8-byte boundary on the stack. This 90 * should currently be sufficient for all supported platforms. 91 */ 92 #define SYS_IOCTL_SMALL_SIZE 128 /* bytes */ 93 #define SYS_IOCTL_SMALL_ALIGN 8 /* bytes */ 94 95 #ifdef __LP64__ 96 static int iosize_max_clamp = 0; 97 SYSCTL_INT(_debug, OID_AUTO, iosize_max_clamp, CTLFLAG_RW, 98 &iosize_max_clamp, 0, "Clamp max i/o size to INT_MAX"); 99 static int devfs_iosize_max_clamp = 1; 100 SYSCTL_INT(_debug, OID_AUTO, devfs_iosize_max_clamp, CTLFLAG_RW, 101 &devfs_iosize_max_clamp, 0, "Clamp max i/o size to INT_MAX for devices"); 102 #endif 103 104 /* 105 * Assert that the return value of read(2) and write(2) syscalls fits 106 * into a register. If not, an architecture will need to provide the 107 * usermode wrappers to reconstruct the result. 108 */ 109 CTASSERT(sizeof(register_t) >= sizeof(size_t)); 110 111 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 112 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 113 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 114 115 static int pollout(struct thread *, struct pollfd *, struct pollfd *, 116 u_int); 117 static int pollscan(struct thread *, struct pollfd *, u_int); 118 static int pollrescan(struct thread *); 119 static int selscan(struct thread *, fd_mask **, fd_mask **, int); 120 static int selrescan(struct thread *, fd_mask **, fd_mask **); 121 static void selfdalloc(struct thread *, void *); 122 static void selfdfree(struct seltd *, struct selfd *); 123 static int dofileread(struct thread *, int, struct file *, struct uio *, 124 off_t, int); 125 static int dofilewrite(struct thread *, int, struct file *, struct uio *, 126 off_t, int); 127 static void doselwakeup(struct selinfo *, int); 128 static void seltdinit(struct thread *); 129 static int seltdwait(struct thread *, sbintime_t, sbintime_t); 130 static void seltdclear(struct thread *); 131 132 /* 133 * One seltd per-thread allocated on demand as needed. 134 * 135 * t - protected by st_mtx 136 * k - Only accessed by curthread or read-only 137 */ 138 struct seltd { 139 STAILQ_HEAD(, selfd) st_selq; /* (k) List of selfds. */ 140 struct selfd *st_free1; /* (k) free fd for read set. */ 141 struct selfd *st_free2; /* (k) free fd for write set. */ 142 struct mtx st_mtx; /* Protects struct seltd */ 143 struct cv st_wait; /* (t) Wait channel. */ 144 int st_flags; /* (t) SELTD_ flags. */ 145 }; 146 147 #define SELTD_PENDING 0x0001 /* We have pending events. */ 148 #define SELTD_RESCAN 0x0002 /* Doing a rescan. */ 149 150 /* 151 * One selfd allocated per-thread per-file-descriptor. 152 * f - protected by sf_mtx 153 */ 154 struct selfd { 155 STAILQ_ENTRY(selfd) sf_link; /* (k) fds owned by this td. */ 156 TAILQ_ENTRY(selfd) sf_threads; /* (f) fds on this selinfo. */ 157 struct selinfo *sf_si; /* (f) selinfo when linked. */ 158 struct mtx *sf_mtx; /* Pointer to selinfo mtx. */ 159 struct seltd *sf_td; /* (k) owning seltd. */ 160 void *sf_cookie; /* (k) fd or pollfd. */ 161 }; 162 163 MALLOC_DEFINE(M_SELFD, "selfd", "selfd"); 164 static struct mtx_pool *mtxpool_select; 165 166 #ifdef __LP64__ 167 size_t 168 devfs_iosize_max(void) 169 { 170 171 return (devfs_iosize_max_clamp || SV_CURPROC_FLAG(SV_ILP32) ? 172 INT_MAX : SSIZE_MAX); 173 } 174 175 size_t 176 iosize_max(void) 177 { 178 179 return (iosize_max_clamp || SV_CURPROC_FLAG(SV_ILP32) ? 180 INT_MAX : SSIZE_MAX); 181 } 182 #endif 183 184 #ifndef _SYS_SYSPROTO_H_ 185 struct read_args { 186 int fd; 187 void *buf; 188 size_t nbyte; 189 }; 190 #endif 191 int 192 sys_read(struct thread *td, struct read_args *uap) 193 { 194 struct uio auio; 195 struct iovec aiov; 196 int error; 197 198 if (uap->nbyte > IOSIZE_MAX) 199 return (EINVAL); 200 aiov.iov_base = uap->buf; 201 aiov.iov_len = uap->nbyte; 202 auio.uio_iov = &aiov; 203 auio.uio_iovcnt = 1; 204 auio.uio_resid = uap->nbyte; 205 auio.uio_segflg = UIO_USERSPACE; 206 error = kern_readv(td, uap->fd, &auio); 207 return (error); 208 } 209 210 /* 211 * Positioned read system call 212 */ 213 #ifndef _SYS_SYSPROTO_H_ 214 struct pread_args { 215 int fd; 216 void *buf; 217 size_t nbyte; 218 int pad; 219 off_t offset; 220 }; 221 #endif 222 int 223 sys_pread(struct thread *td, struct pread_args *uap) 224 { 225 226 return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, uap->offset)); 227 } 228 229 int 230 kern_pread(struct thread *td, int fd, void *buf, size_t nbyte, off_t offset) 231 { 232 struct uio auio; 233 struct iovec aiov; 234 int error; 235 236 if (nbyte > IOSIZE_MAX) 237 return (EINVAL); 238 aiov.iov_base = buf; 239 aiov.iov_len = nbyte; 240 auio.uio_iov = &aiov; 241 auio.uio_iovcnt = 1; 242 auio.uio_resid = nbyte; 243 auio.uio_segflg = UIO_USERSPACE; 244 error = kern_preadv(td, fd, &auio, offset); 245 return (error); 246 } 247 248 #if defined(COMPAT_FREEBSD6) 249 int 250 freebsd6_pread(struct thread *td, struct freebsd6_pread_args *uap) 251 { 252 253 return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, uap->offset)); 254 } 255 #endif 256 257 /* 258 * Scatter read system call. 259 */ 260 #ifndef _SYS_SYSPROTO_H_ 261 struct readv_args { 262 int fd; 263 struct iovec *iovp; 264 u_int iovcnt; 265 }; 266 #endif 267 int 268 sys_readv(struct thread *td, struct readv_args *uap) 269 { 270 struct uio *auio; 271 int error; 272 273 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 274 if (error) 275 return (error); 276 error = kern_readv(td, uap->fd, auio); 277 freeuio(auio); 278 return (error); 279 } 280 281 int 282 kern_readv(struct thread *td, int fd, struct uio *auio) 283 { 284 struct file *fp; 285 int error; 286 287 error = fget_read(td, fd, &cap_read_rights, &fp); 288 if (error) 289 return (error); 290 error = dofileread(td, fd, fp, auio, (off_t)-1, 0); 291 fdrop(fp, td); 292 return (error); 293 } 294 295 /* 296 * Scatter positioned read system call. 297 */ 298 #ifndef _SYS_SYSPROTO_H_ 299 struct preadv_args { 300 int fd; 301 struct iovec *iovp; 302 u_int iovcnt; 303 off_t offset; 304 }; 305 #endif 306 int 307 sys_preadv(struct thread *td, struct preadv_args *uap) 308 { 309 struct uio *auio; 310 int error; 311 312 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 313 if (error) 314 return (error); 315 error = kern_preadv(td, uap->fd, auio, uap->offset); 316 freeuio(auio); 317 return (error); 318 } 319 320 int 321 kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset) 322 { 323 struct file *fp; 324 int error; 325 326 error = fget_read(td, fd, &cap_pread_rights, &fp); 327 if (error) 328 return (error); 329 if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) 330 error = ESPIPE; 331 else if (offset < 0 && 332 (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) 333 error = EINVAL; 334 else 335 error = dofileread(td, fd, fp, auio, offset, FOF_OFFSET); 336 fdrop(fp, td); 337 return (error); 338 } 339 340 /* 341 * Common code for readv and preadv that reads data in 342 * from a file using the passed in uio, offset, and flags. 343 */ 344 static int 345 dofileread(struct thread *td, int fd, struct file *fp, struct uio *auio, 346 off_t offset, int flags) 347 { 348 ssize_t cnt; 349 int error; 350 #ifdef KTRACE 351 struct uio *ktruio = NULL; 352 #endif 353 354 AUDIT_ARG_FD(fd); 355 356 /* Finish zero length reads right here */ 357 if (auio->uio_resid == 0) { 358 td->td_retval[0] = 0; 359 return (0); 360 } 361 auio->uio_rw = UIO_READ; 362 auio->uio_offset = offset; 363 auio->uio_td = td; 364 #ifdef KTRACE 365 if (KTRPOINT(td, KTR_GENIO)) 366 ktruio = cloneuio(auio); 367 #endif 368 cnt = auio->uio_resid; 369 if ((error = fo_read(fp, auio, td->td_ucred, flags, td))) { 370 if (auio->uio_resid != cnt && (error == ERESTART || 371 error == EINTR || error == EWOULDBLOCK)) 372 error = 0; 373 } 374 cnt -= auio->uio_resid; 375 #ifdef KTRACE 376 if (ktruio != NULL) { 377 ktruio->uio_resid = cnt; 378 ktrgenio(fd, UIO_READ, ktruio, error); 379 } 380 #endif 381 td->td_retval[0] = cnt; 382 return (error); 383 } 384 385 #ifndef _SYS_SYSPROTO_H_ 386 struct write_args { 387 int fd; 388 const void *buf; 389 size_t nbyte; 390 }; 391 #endif 392 int 393 sys_write(struct thread *td, struct write_args *uap) 394 { 395 struct uio auio; 396 struct iovec aiov; 397 int error; 398 399 if (uap->nbyte > IOSIZE_MAX) 400 return (EINVAL); 401 aiov.iov_base = (void *)(uintptr_t)uap->buf; 402 aiov.iov_len = uap->nbyte; 403 auio.uio_iov = &aiov; 404 auio.uio_iovcnt = 1; 405 auio.uio_resid = uap->nbyte; 406 auio.uio_segflg = UIO_USERSPACE; 407 error = kern_writev(td, uap->fd, &auio); 408 return (error); 409 } 410 411 /* 412 * Positioned write system call. 413 */ 414 #ifndef _SYS_SYSPROTO_H_ 415 struct pwrite_args { 416 int fd; 417 const void *buf; 418 size_t nbyte; 419 int pad; 420 off_t offset; 421 }; 422 #endif 423 int 424 sys_pwrite(struct thread *td, struct pwrite_args *uap) 425 { 426 427 return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, uap->offset)); 428 } 429 430 int 431 kern_pwrite(struct thread *td, int fd, const void *buf, size_t nbyte, 432 off_t offset) 433 { 434 struct uio auio; 435 struct iovec aiov; 436 int error; 437 438 if (nbyte > IOSIZE_MAX) 439 return (EINVAL); 440 aiov.iov_base = (void *)(uintptr_t)buf; 441 aiov.iov_len = nbyte; 442 auio.uio_iov = &aiov; 443 auio.uio_iovcnt = 1; 444 auio.uio_resid = nbyte; 445 auio.uio_segflg = UIO_USERSPACE; 446 error = kern_pwritev(td, fd, &auio, offset); 447 return (error); 448 } 449 450 #if defined(COMPAT_FREEBSD6) 451 int 452 freebsd6_pwrite(struct thread *td, struct freebsd6_pwrite_args *uap) 453 { 454 455 return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, uap->offset)); 456 } 457 #endif 458 459 /* 460 * Gather write system call. 461 */ 462 #ifndef _SYS_SYSPROTO_H_ 463 struct writev_args { 464 int fd; 465 struct iovec *iovp; 466 u_int iovcnt; 467 }; 468 #endif 469 int 470 sys_writev(struct thread *td, struct writev_args *uap) 471 { 472 struct uio *auio; 473 int error; 474 475 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 476 if (error) 477 return (error); 478 error = kern_writev(td, uap->fd, auio); 479 freeuio(auio); 480 return (error); 481 } 482 483 int 484 kern_writev(struct thread *td, int fd, struct uio *auio) 485 { 486 struct file *fp; 487 int error; 488 489 error = fget_write(td, fd, &cap_write_rights, &fp); 490 if (error) 491 return (error); 492 error = dofilewrite(td, fd, fp, auio, (off_t)-1, 0); 493 fdrop(fp, td); 494 return (error); 495 } 496 497 /* 498 * Gather positioned write system call. 499 */ 500 #ifndef _SYS_SYSPROTO_H_ 501 struct pwritev_args { 502 int fd; 503 struct iovec *iovp; 504 u_int iovcnt; 505 off_t offset; 506 }; 507 #endif 508 int 509 sys_pwritev(struct thread *td, struct pwritev_args *uap) 510 { 511 struct uio *auio; 512 int error; 513 514 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 515 if (error) 516 return (error); 517 error = kern_pwritev(td, uap->fd, auio, uap->offset); 518 freeuio(auio); 519 return (error); 520 } 521 522 int 523 kern_pwritev(struct thread *td, int fd, struct uio *auio, off_t offset) 524 { 525 struct file *fp; 526 int error; 527 528 error = fget_write(td, fd, &cap_pwrite_rights, &fp); 529 if (error) 530 return (error); 531 if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) 532 error = ESPIPE; 533 else if (offset < 0 && 534 (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) 535 error = EINVAL; 536 else 537 error = dofilewrite(td, fd, fp, auio, offset, FOF_OFFSET); 538 fdrop(fp, td); 539 return (error); 540 } 541 542 /* 543 * Common code for writev and pwritev that writes data to 544 * a file using the passed in uio, offset, and flags. 545 */ 546 static int 547 dofilewrite(struct thread *td, int fd, struct file *fp, struct uio *auio, 548 off_t offset, int flags) 549 { 550 ssize_t cnt; 551 int error; 552 #ifdef KTRACE 553 struct uio *ktruio = NULL; 554 #endif 555 556 AUDIT_ARG_FD(fd); 557 auio->uio_rw = UIO_WRITE; 558 auio->uio_td = td; 559 auio->uio_offset = offset; 560 #ifdef KTRACE 561 if (KTRPOINT(td, KTR_GENIO)) 562 ktruio = cloneuio(auio); 563 #endif 564 cnt = auio->uio_resid; 565 error = fo_write(fp, auio, td->td_ucred, flags, td); 566 /* 567 * Socket layer is responsible for special error handling, 568 * see sousrsend(). 569 */ 570 if (error != 0 && fp->f_type != DTYPE_SOCKET) { 571 if (auio->uio_resid != cnt && (error == ERESTART || 572 error == EINTR || error == EWOULDBLOCK)) 573 error = 0; 574 if (error == EPIPE) { 575 PROC_LOCK(td->td_proc); 576 tdsignal(td, SIGPIPE); 577 PROC_UNLOCK(td->td_proc); 578 } 579 } 580 cnt -= auio->uio_resid; 581 #ifdef KTRACE 582 if (ktruio != NULL) { 583 if (error == 0) 584 ktruio->uio_resid = cnt; 585 ktrgenio(fd, UIO_WRITE, ktruio, error); 586 } 587 #endif 588 td->td_retval[0] = cnt; 589 return (error); 590 } 591 592 /* 593 * Truncate a file given a file descriptor. 594 * 595 * Can't use fget_write() here, since must return EINVAL and not EBADF if the 596 * descriptor isn't writable. 597 */ 598 int 599 kern_ftruncate(struct thread *td, int fd, off_t length) 600 { 601 struct file *fp; 602 int error; 603 604 AUDIT_ARG_FD(fd); 605 if (length < 0) 606 return (EINVAL); 607 error = fget(td, fd, &cap_ftruncate_rights, &fp); 608 if (error) 609 return (error); 610 AUDIT_ARG_FILE(td->td_proc, fp); 611 if (!(fp->f_flag & FWRITE)) { 612 fdrop(fp, td); 613 return (EINVAL); 614 } 615 error = fo_truncate(fp, length, td->td_ucred, td); 616 fdrop(fp, td); 617 return (error); 618 } 619 620 #ifndef _SYS_SYSPROTO_H_ 621 struct ftruncate_args { 622 int fd; 623 int pad; 624 off_t length; 625 }; 626 #endif 627 int 628 sys_ftruncate(struct thread *td, struct ftruncate_args *uap) 629 { 630 631 return (kern_ftruncate(td, uap->fd, uap->length)); 632 } 633 634 #if defined(COMPAT_43) 635 #ifndef _SYS_SYSPROTO_H_ 636 struct oftruncate_args { 637 int fd; 638 long length; 639 }; 640 #endif 641 int 642 oftruncate(struct thread *td, struct oftruncate_args *uap) 643 { 644 645 return (kern_ftruncate(td, uap->fd, uap->length)); 646 } 647 #endif /* COMPAT_43 */ 648 649 #ifndef _SYS_SYSPROTO_H_ 650 struct ioctl_args { 651 int fd; 652 u_long com; 653 caddr_t data; 654 }; 655 #endif 656 /* ARGSUSED */ 657 int 658 sys_ioctl(struct thread *td, struct ioctl_args *uap) 659 { 660 u_char smalldata[SYS_IOCTL_SMALL_SIZE] __aligned(SYS_IOCTL_SMALL_ALIGN); 661 uint32_t com; 662 int arg, error; 663 u_int size; 664 caddr_t data; 665 666 #ifdef INVARIANTS 667 if (uap->com > 0xffffffff) { 668 printf( 669 "WARNING pid %d (%s): ioctl sign-extension ioctl %lx\n", 670 td->td_proc->p_pid, td->td_name, uap->com); 671 } 672 #endif 673 com = (uint32_t)uap->com; 674 675 /* 676 * Interpret high order word to find amount of data to be 677 * copied to/from the user's address space. 678 */ 679 size = IOCPARM_LEN(com); 680 if ((size > IOCPARM_MAX) || 681 ((com & (IOC_VOID | IOC_IN | IOC_OUT)) == 0) || 682 #if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43) 683 ((com & IOC_OUT) && size == 0) || 684 #else 685 ((com & (IOC_IN | IOC_OUT)) && size == 0) || 686 #endif 687 ((com & IOC_VOID) && size > 0 && size != sizeof(int))) 688 return (ENOTTY); 689 690 if (size > 0) { 691 if (com & IOC_VOID) { 692 /* Integer argument. */ 693 arg = (intptr_t)uap->data; 694 data = (void *)&arg; 695 size = 0; 696 } else { 697 if (size > SYS_IOCTL_SMALL_SIZE) 698 data = malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 699 else 700 data = smalldata; 701 } 702 } else 703 data = (void *)&uap->data; 704 if (com & IOC_IN) { 705 error = copyin(uap->data, data, (u_int)size); 706 if (error != 0) 707 goto out; 708 } else if (com & IOC_OUT) { 709 /* 710 * Zero the buffer so the user always 711 * gets back something deterministic. 712 */ 713 bzero(data, size); 714 } 715 716 error = kern_ioctl(td, uap->fd, com, data); 717 718 if (error == 0 && (com & IOC_OUT)) 719 error = copyout(data, uap->data, (u_int)size); 720 721 out: 722 if (size > SYS_IOCTL_SMALL_SIZE) 723 free(data, M_IOCTLOPS); 724 return (error); 725 } 726 727 int 728 kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data) 729 { 730 struct file *fp; 731 struct filedesc *fdp; 732 int error, tmp, locked; 733 734 AUDIT_ARG_FD(fd); 735 AUDIT_ARG_CMD(com); 736 737 fdp = td->td_proc->p_fd; 738 739 switch (com) { 740 case FIONCLEX: 741 case FIOCLEX: 742 FILEDESC_XLOCK(fdp); 743 locked = LA_XLOCKED; 744 break; 745 default: 746 #ifdef CAPABILITIES 747 FILEDESC_SLOCK(fdp); 748 locked = LA_SLOCKED; 749 #else 750 locked = LA_UNLOCKED; 751 #endif 752 break; 753 } 754 755 #ifdef CAPABILITIES 756 if ((fp = fget_noref(fdp, fd)) == NULL) { 757 error = EBADF; 758 goto out; 759 } 760 if ((error = cap_ioctl_check(fdp, fd, com)) != 0) { 761 fp = NULL; /* fhold() was not called yet */ 762 goto out; 763 } 764 if (!fhold(fp)) { 765 error = EBADF; 766 fp = NULL; 767 goto out; 768 } 769 if (locked == LA_SLOCKED) { 770 FILEDESC_SUNLOCK(fdp); 771 locked = LA_UNLOCKED; 772 } 773 #else 774 error = fget(td, fd, &cap_ioctl_rights, &fp); 775 if (error != 0) { 776 fp = NULL; 777 goto out; 778 } 779 #endif 780 if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 781 error = EBADF; 782 goto out; 783 } 784 785 switch (com) { 786 case FIONCLEX: 787 fdp->fd_ofiles[fd].fde_flags &= ~UF_EXCLOSE; 788 goto out; 789 case FIOCLEX: 790 fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; 791 goto out; 792 case FIONBIO: 793 if ((tmp = *(int *)data)) 794 atomic_set_int(&fp->f_flag, FNONBLOCK); 795 else 796 atomic_clear_int(&fp->f_flag, FNONBLOCK); 797 data = (void *)&tmp; 798 break; 799 case FIOASYNC: 800 if ((tmp = *(int *)data)) 801 atomic_set_int(&fp->f_flag, FASYNC); 802 else 803 atomic_clear_int(&fp->f_flag, FASYNC); 804 data = (void *)&tmp; 805 break; 806 } 807 808 error = fo_ioctl(fp, com, data, td->td_ucred, td); 809 out: 810 switch (locked) { 811 case LA_XLOCKED: 812 FILEDESC_XUNLOCK(fdp); 813 break; 814 #ifdef CAPABILITIES 815 case LA_SLOCKED: 816 FILEDESC_SUNLOCK(fdp); 817 break; 818 #endif 819 default: 820 FILEDESC_UNLOCK_ASSERT(fdp); 821 break; 822 } 823 if (fp != NULL) 824 fdrop(fp, td); 825 return (error); 826 } 827 828 int 829 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 830 { 831 int error; 832 833 error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); 834 return (kern_posix_error(td, error)); 835 } 836 837 int 838 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 839 { 840 struct file *fp; 841 int error; 842 843 AUDIT_ARG_FD(fd); 844 if (offset < 0 || len <= 0) 845 return (EINVAL); 846 /* Check for wrap. */ 847 if (offset > OFF_MAX - len) 848 return (EFBIG); 849 AUDIT_ARG_FD(fd); 850 error = fget(td, fd, &cap_pwrite_rights, &fp); 851 if (error != 0) 852 return (error); 853 AUDIT_ARG_FILE(td->td_proc, fp); 854 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 855 error = ESPIPE; 856 goto out; 857 } 858 if ((fp->f_flag & FWRITE) == 0) { 859 error = EBADF; 860 goto out; 861 } 862 863 error = fo_fallocate(fp, offset, len, td); 864 out: 865 fdrop(fp, td); 866 return (error); 867 } 868 869 int 870 sys_fspacectl(struct thread *td, struct fspacectl_args *uap) 871 { 872 struct spacectl_range rqsr, rmsr; 873 int error, cerror; 874 875 error = copyin(uap->rqsr, &rqsr, sizeof(rqsr)); 876 if (error != 0) 877 return (error); 878 879 error = kern_fspacectl(td, uap->fd, uap->cmd, &rqsr, uap->flags, 880 &rmsr); 881 if (uap->rmsr != NULL) { 882 cerror = copyout(&rmsr, uap->rmsr, sizeof(rmsr)); 883 if (error == 0) 884 error = cerror; 885 } 886 return (error); 887 } 888 889 int 890 kern_fspacectl(struct thread *td, int fd, int cmd, 891 const struct spacectl_range *rqsr, int flags, struct spacectl_range *rmsrp) 892 { 893 struct file *fp; 894 struct spacectl_range rmsr; 895 int error; 896 897 AUDIT_ARG_FD(fd); 898 AUDIT_ARG_CMD(cmd); 899 AUDIT_ARG_FFLAGS(flags); 900 901 if (rqsr == NULL) 902 return (EINVAL); 903 rmsr = *rqsr; 904 if (rmsrp != NULL) 905 *rmsrp = rmsr; 906 907 if (cmd != SPACECTL_DEALLOC || 908 rqsr->r_offset < 0 || rqsr->r_len <= 0 || 909 rqsr->r_offset > OFF_MAX - rqsr->r_len || 910 (flags & ~SPACECTL_F_SUPPORTED) != 0) 911 return (EINVAL); 912 913 error = fget_write(td, fd, &cap_pwrite_rights, &fp); 914 if (error != 0) 915 return (error); 916 AUDIT_ARG_FILE(td->td_proc, fp); 917 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 918 error = ESPIPE; 919 goto out; 920 } 921 if ((fp->f_flag & FWRITE) == 0) { 922 error = EBADF; 923 goto out; 924 } 925 926 error = fo_fspacectl(fp, cmd, &rmsr.r_offset, &rmsr.r_len, flags, 927 td->td_ucred, td); 928 /* fspacectl is not restarted after signals if the file is modified. */ 929 if (rmsr.r_len != rqsr->r_len && (error == ERESTART || 930 error == EINTR || error == EWOULDBLOCK)) 931 error = 0; 932 if (rmsrp != NULL) 933 *rmsrp = rmsr; 934 out: 935 fdrop(fp, td); 936 return (error); 937 } 938 939 int 940 kern_specialfd(struct thread *td, int type, void *arg) 941 { 942 struct file *fp; 943 int error, fd, fflags; 944 945 fflags = 0; 946 error = falloc_noinstall(td, &fp); 947 if (error != 0) 948 return (error); 949 950 switch (type) { 951 case SPECIALFD_EVENTFD: { 952 struct specialfd_eventfd *ae; 953 954 ae = arg; 955 if ((ae->flags & EFD_CLOEXEC) != 0) 956 fflags |= O_CLOEXEC; 957 error = eventfd_create_file(td, fp, ae->initval, ae->flags); 958 break; 959 } 960 case SPECIALFD_INOTIFY: { 961 struct specialfd_inotify *si; 962 963 si = arg; 964 error = inotify_create_file(td, fp, si->flags, &fflags); 965 break; 966 } 967 default: 968 error = EINVAL; 969 break; 970 } 971 972 if (error == 0) 973 error = finstall(td, fp, &fd, fflags, NULL); 974 fdrop(fp, td); 975 if (error == 0) 976 td->td_retval[0] = fd; 977 return (error); 978 } 979 980 int 981 sys___specialfd(struct thread *td, struct __specialfd_args *args) 982 { 983 int error; 984 985 switch (args->type) { 986 case SPECIALFD_EVENTFD: { 987 struct specialfd_eventfd ae; 988 989 if (args->len != sizeof(struct specialfd_eventfd)) { 990 error = EINVAL; 991 break; 992 } 993 error = copyin(args->req, &ae, sizeof(ae)); 994 if (error != 0) 995 break; 996 if ((ae.flags & ~(EFD_CLOEXEC | EFD_NONBLOCK | 997 EFD_SEMAPHORE)) != 0) { 998 error = EINVAL; 999 break; 1000 } 1001 error = kern_specialfd(td, args->type, &ae); 1002 break; 1003 } 1004 case SPECIALFD_INOTIFY: { 1005 struct specialfd_inotify si; 1006 1007 if (args->len != sizeof(si)) { 1008 error = EINVAL; 1009 break; 1010 } 1011 error = copyin(args->req, &si, sizeof(si)); 1012 if (error != 0) 1013 break; 1014 error = kern_specialfd(td, args->type, &si); 1015 break; 1016 } 1017 default: 1018 error = EINVAL; 1019 break; 1020 } 1021 return (error); 1022 } 1023 1024 int 1025 poll_no_poll(int events) 1026 { 1027 /* 1028 * Return true for read/write. If the user asked for something 1029 * special, return POLLNVAL, so that clients have a way of 1030 * determining reliably whether or not the extended 1031 * functionality is present without hard-coding knowledge 1032 * of specific filesystem implementations. 1033 */ 1034 if (events & ~POLLSTANDARD) 1035 return (POLLNVAL); 1036 1037 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 1038 } 1039 1040 int 1041 sys_pselect(struct thread *td, struct pselect_args *uap) 1042 { 1043 struct timespec ts; 1044 struct timeval tv, *tvp; 1045 sigset_t set, *uset; 1046 int error; 1047 1048 if (uap->ts != NULL) { 1049 error = copyin(uap->ts, &ts, sizeof(ts)); 1050 if (error != 0) 1051 return (error); 1052 TIMESPEC_TO_TIMEVAL(&tv, &ts); 1053 tvp = &tv; 1054 } else 1055 tvp = NULL; 1056 if (uap->sm != NULL) { 1057 error = copyin(uap->sm, &set, sizeof(set)); 1058 if (error != 0) 1059 return (error); 1060 uset = &set; 1061 } else 1062 uset = NULL; 1063 return (kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, 1064 uset, NFDBITS)); 1065 } 1066 1067 int 1068 kern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou, fd_set *ex, 1069 struct timeval *tvp, sigset_t *uset, int abi_nfdbits) 1070 { 1071 int error; 1072 1073 if (uset != NULL) { 1074 error = kern_sigprocmask(td, SIG_SETMASK, uset, 1075 &td->td_oldsigmask, 0); 1076 if (error != 0) 1077 return (error); 1078 td->td_pflags |= TDP_OLDMASK; 1079 } 1080 error = kern_select(td, nd, in, ou, ex, tvp, abi_nfdbits); 1081 if (uset != NULL) { 1082 /* 1083 * Make sure that ast() is called on return to 1084 * usermode and TDP_OLDMASK is cleared, restoring old 1085 * sigmask. If we didn't get interrupted, then the caller is 1086 * likely not expecting a signal to hit that should normally be 1087 * blocked by its signal mask, so we restore the mask before 1088 * any signals could be delivered. 1089 */ 1090 if (error == EINTR) { 1091 ast_sched(td, TDA_SIGSUSPEND); 1092 } else { 1093 /* *select(2) should never restart. */ 1094 MPASS(error != ERESTART); 1095 ast_sched(td, TDA_PSELECT); 1096 } 1097 } 1098 1099 return (error); 1100 } 1101 1102 #ifndef _SYS_SYSPROTO_H_ 1103 struct select_args { 1104 int nd; 1105 fd_set *in, *ou, *ex; 1106 struct timeval *tv; 1107 }; 1108 #endif 1109 int 1110 sys_select(struct thread *td, struct select_args *uap) 1111 { 1112 struct timeval tv, *tvp; 1113 int error; 1114 1115 if (uap->tv != NULL) { 1116 error = copyin(uap->tv, &tv, sizeof(tv)); 1117 if (error) 1118 return (error); 1119 tvp = &tv; 1120 } else 1121 tvp = NULL; 1122 1123 return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, 1124 NFDBITS)); 1125 } 1126 1127 /* 1128 * In the unlikely case when user specified n greater then the last 1129 * open file descriptor, check that no bits are set after the last 1130 * valid fd. We must return EBADF if any is set. 1131 * 1132 * There are applications that rely on the behaviour. 1133 * 1134 * nd is fd_nfiles. 1135 */ 1136 static int 1137 select_check_badfd(fd_set *fd_in, int nd, int ndu, int abi_nfdbits) 1138 { 1139 char *addr, *oaddr; 1140 int b, i, res; 1141 uint8_t bits; 1142 1143 if (nd >= ndu || fd_in == NULL) 1144 return (0); 1145 1146 oaddr = NULL; 1147 bits = 0; /* silence gcc */ 1148 for (i = nd; i < ndu; i++) { 1149 b = i / NBBY; 1150 #if BYTE_ORDER == LITTLE_ENDIAN 1151 addr = (char *)fd_in + b; 1152 #else 1153 addr = (char *)fd_in; 1154 if (abi_nfdbits == NFDBITS) { 1155 addr += rounddown(b, sizeof(fd_mask)) + 1156 sizeof(fd_mask) - 1 - b % sizeof(fd_mask); 1157 } else { 1158 addr += rounddown(b, sizeof(uint32_t)) + 1159 sizeof(uint32_t) - 1 - b % sizeof(uint32_t); 1160 } 1161 #endif 1162 if (addr != oaddr) { 1163 res = fubyte(addr); 1164 if (res == -1) 1165 return (EFAULT); 1166 oaddr = addr; 1167 bits = res; 1168 } 1169 if ((bits & (1 << (i % NBBY))) != 0) 1170 return (EBADF); 1171 } 1172 return (0); 1173 } 1174 1175 int 1176 kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, 1177 fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits) 1178 { 1179 struct filedesc *fdp; 1180 /* 1181 * The magic 2048 here is chosen to be just enough for FD_SETSIZE 1182 * infds with the new FD_SETSIZE of 1024, and more than enough for 1183 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 1184 * of 256. 1185 */ 1186 fd_mask s_selbits[howmany(2048, NFDBITS)]; 1187 fd_mask *ibits[3], *obits[3], *selbits, *sbp; 1188 struct timeval rtv; 1189 sbintime_t asbt, precision, rsbt; 1190 u_int nbufbytes, ncpbytes, ncpubytes, nfdbits; 1191 int error, lf, ndu; 1192 1193 if (nd < 0) 1194 return (EINVAL); 1195 fdp = td->td_proc->p_fd; 1196 ndu = nd; 1197 lf = fdp->fd_nfiles; 1198 if (nd > lf) 1199 nd = lf; 1200 1201 error = select_check_badfd(fd_in, nd, ndu, abi_nfdbits); 1202 if (error != 0) 1203 return (error); 1204 error = select_check_badfd(fd_ou, nd, ndu, abi_nfdbits); 1205 if (error != 0) 1206 return (error); 1207 error = select_check_badfd(fd_ex, nd, ndu, abi_nfdbits); 1208 if (error != 0) 1209 return (error); 1210 1211 /* 1212 * Allocate just enough bits for the non-null fd_sets. Use the 1213 * preallocated auto buffer if possible. 1214 */ 1215 nfdbits = roundup(nd, NFDBITS); 1216 ncpbytes = nfdbits / NBBY; 1217 ncpubytes = roundup(nd, abi_nfdbits) / NBBY; 1218 nbufbytes = 0; 1219 if (fd_in != NULL) 1220 nbufbytes += 2 * ncpbytes; 1221 if (fd_ou != NULL) 1222 nbufbytes += 2 * ncpbytes; 1223 if (fd_ex != NULL) 1224 nbufbytes += 2 * ncpbytes; 1225 if (nbufbytes <= sizeof s_selbits) 1226 selbits = &s_selbits[0]; 1227 else 1228 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 1229 1230 /* 1231 * Assign pointers into the bit buffers and fetch the input bits. 1232 * Put the output buffers together so that they can be bzeroed 1233 * together. 1234 */ 1235 sbp = selbits; 1236 #define getbits(name, x) \ 1237 do { \ 1238 if (name == NULL) { \ 1239 ibits[x] = NULL; \ 1240 obits[x] = NULL; \ 1241 } else { \ 1242 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 1243 obits[x] = sbp; \ 1244 sbp += ncpbytes / sizeof *sbp; \ 1245 error = copyin(name, ibits[x], ncpubytes); \ 1246 if (error != 0) \ 1247 goto done; \ 1248 if (ncpbytes != ncpubytes) \ 1249 bzero((char *)ibits[x] + ncpubytes, \ 1250 ncpbytes - ncpubytes); \ 1251 } \ 1252 } while (0) 1253 getbits(fd_in, 0); 1254 getbits(fd_ou, 1); 1255 getbits(fd_ex, 2); 1256 #undef getbits 1257 1258 #if BYTE_ORDER == BIG_ENDIAN && defined(__LP64__) 1259 /* 1260 * XXX: swizzle_fdset assumes that if abi_nfdbits != NFDBITS, 1261 * we are running under 32-bit emulation. This should be more 1262 * generic. 1263 */ 1264 #define swizzle_fdset(bits) \ 1265 if (abi_nfdbits != NFDBITS && bits != NULL) { \ 1266 int i; \ 1267 for (i = 0; i < ncpbytes / sizeof *sbp; i++) \ 1268 bits[i] = (bits[i] >> 32) | (bits[i] << 32); \ 1269 } 1270 #else 1271 #define swizzle_fdset(bits) 1272 #endif 1273 1274 /* Make sure the bit order makes it through an ABI transition */ 1275 swizzle_fdset(ibits[0]); 1276 swizzle_fdset(ibits[1]); 1277 swizzle_fdset(ibits[2]); 1278 1279 if (nbufbytes != 0) 1280 bzero(selbits, nbufbytes / 2); 1281 1282 precision = 0; 1283 if (tvp != NULL) { 1284 rtv = *tvp; 1285 if (rtv.tv_sec < 0 || rtv.tv_usec < 0 || 1286 rtv.tv_usec >= 1000000) { 1287 error = EINVAL; 1288 goto done; 1289 } 1290 if (!timevalisset(&rtv)) 1291 asbt = 0; 1292 else if (rtv.tv_sec <= INT32_MAX) { 1293 rsbt = tvtosbt(rtv); 1294 precision = rsbt; 1295 precision >>= tc_precexp; 1296 if (TIMESEL(&asbt, rsbt)) 1297 asbt += tc_tick_sbt; 1298 if (asbt <= SBT_MAX - rsbt) 1299 asbt += rsbt; 1300 else 1301 asbt = -1; 1302 } else 1303 asbt = -1; 1304 } else 1305 asbt = -1; 1306 seltdinit(td); 1307 /* Iterate until the timeout expires or descriptors become ready. */ 1308 for (;;) { 1309 error = selscan(td, ibits, obits, nd); 1310 if (error || td->td_retval[0] != 0) 1311 break; 1312 error = seltdwait(td, asbt, precision); 1313 if (error) 1314 break; 1315 error = selrescan(td, ibits, obits); 1316 if (error || td->td_retval[0] != 0) 1317 break; 1318 } 1319 seltdclear(td); 1320 1321 done: 1322 /* select is not restarted after signals... */ 1323 if (error == ERESTART) 1324 error = EINTR; 1325 if (error == EWOULDBLOCK) 1326 error = 0; 1327 1328 /* swizzle bit order back, if necessary */ 1329 swizzle_fdset(obits[0]); 1330 swizzle_fdset(obits[1]); 1331 swizzle_fdset(obits[2]); 1332 #undef swizzle_fdset 1333 1334 #define putbits(name, x) \ 1335 if (name && (error2 = copyout(obits[x], name, ncpubytes))) \ 1336 error = error2; 1337 if (error == 0) { 1338 int error2; 1339 1340 putbits(fd_in, 0); 1341 putbits(fd_ou, 1); 1342 putbits(fd_ex, 2); 1343 #undef putbits 1344 } 1345 if (selbits != &s_selbits[0]) 1346 free(selbits, M_SELECT); 1347 1348 return (error); 1349 } 1350 /* 1351 * Convert a select bit set to poll flags. 1352 * 1353 * The backend always returns POLLHUP/POLLERR if appropriate and we 1354 * return this as a set bit in any set. 1355 */ 1356 static const int select_flags[3] = { 1357 POLLRDNORM | POLLHUP | POLLERR, 1358 POLLWRNORM | POLLHUP | POLLERR, 1359 POLLRDBAND | POLLERR 1360 }; 1361 1362 /* 1363 * Compute the fo_poll flags required for a fd given by the index and 1364 * bit position in the fd_mask array. 1365 */ 1366 static __inline int 1367 selflags(fd_mask **ibits, int idx, fd_mask bit) 1368 { 1369 int flags; 1370 int msk; 1371 1372 flags = 0; 1373 for (msk = 0; msk < 3; msk++) { 1374 if (ibits[msk] == NULL) 1375 continue; 1376 if ((ibits[msk][idx] & bit) == 0) 1377 continue; 1378 flags |= select_flags[msk]; 1379 } 1380 return (flags); 1381 } 1382 1383 /* 1384 * Set the appropriate output bits given a mask of fired events and the 1385 * input bits originally requested. 1386 */ 1387 static __inline int 1388 selsetbits(fd_mask **ibits, fd_mask **obits, int idx, fd_mask bit, int events) 1389 { 1390 int msk; 1391 int n; 1392 1393 n = 0; 1394 for (msk = 0; msk < 3; msk++) { 1395 if ((events & select_flags[msk]) == 0) 1396 continue; 1397 if (ibits[msk] == NULL) 1398 continue; 1399 if ((ibits[msk][idx] & bit) == 0) 1400 continue; 1401 /* 1402 * XXX Check for a duplicate set. This can occur because a 1403 * socket calls selrecord() twice for each poll() call 1404 * resulting in two selfds per real fd. selrescan() will 1405 * call selsetbits twice as a result. 1406 */ 1407 if ((obits[msk][idx] & bit) != 0) 1408 continue; 1409 obits[msk][idx] |= bit; 1410 n++; 1411 } 1412 1413 return (n); 1414 } 1415 1416 /* 1417 * Traverse the list of fds attached to this thread's seltd and check for 1418 * completion. 1419 */ 1420 static int 1421 selrescan(struct thread *td, fd_mask **ibits, fd_mask **obits) 1422 { 1423 struct filedesc *fdp; 1424 struct selinfo *si; 1425 struct seltd *stp; 1426 struct selfd *sfp; 1427 struct selfd *sfn; 1428 struct file *fp; 1429 fd_mask bit; 1430 int fd, ev, n, idx; 1431 int error; 1432 bool only_user; 1433 1434 fdp = td->td_proc->p_fd; 1435 stp = td->td_sel; 1436 n = 0; 1437 only_user = FILEDESC_IS_ONLY_USER(fdp); 1438 STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) { 1439 fd = (int)(uintptr_t)sfp->sf_cookie; 1440 si = sfp->sf_si; 1441 selfdfree(stp, sfp); 1442 /* If the selinfo wasn't cleared the event didn't fire. */ 1443 if (si != NULL) 1444 continue; 1445 if (only_user) 1446 error = fget_only_user(fdp, fd, &cap_event_rights, &fp); 1447 else 1448 error = fget_unlocked(td, fd, &cap_event_rights, &fp); 1449 if (__predict_false(error != 0)) 1450 return (error); 1451 idx = fd / NFDBITS; 1452 bit = (fd_mask)1 << (fd % NFDBITS); 1453 ev = fo_poll(fp, selflags(ibits, idx, bit), td->td_ucred, td); 1454 if (only_user) 1455 fput_only_user(fdp, fp); 1456 else 1457 fdrop(fp, td); 1458 if (ev != 0) 1459 n += selsetbits(ibits, obits, idx, bit, ev); 1460 } 1461 stp->st_flags = 0; 1462 td->td_retval[0] = n; 1463 return (0); 1464 } 1465 1466 /* 1467 * Perform the initial filedescriptor scan and register ourselves with 1468 * each selinfo. 1469 */ 1470 static int 1471 selscan(struct thread *td, fd_mask **ibits, fd_mask **obits, int nfd) 1472 { 1473 struct filedesc *fdp; 1474 struct file *fp; 1475 fd_mask bit; 1476 int ev, flags, end, fd; 1477 int n, idx; 1478 int error; 1479 bool only_user; 1480 1481 fdp = td->td_proc->p_fd; 1482 n = 0; 1483 only_user = FILEDESC_IS_ONLY_USER(fdp); 1484 for (idx = 0, fd = 0; fd < nfd; idx++) { 1485 end = imin(fd + NFDBITS, nfd); 1486 for (bit = 1; fd < end; bit <<= 1, fd++) { 1487 /* Compute the list of events we're interested in. */ 1488 flags = selflags(ibits, idx, bit); 1489 if (flags == 0) 1490 continue; 1491 if (only_user) 1492 error = fget_only_user(fdp, fd, &cap_event_rights, &fp); 1493 else 1494 error = fget_unlocked(td, fd, &cap_event_rights, &fp); 1495 if (__predict_false(error != 0)) 1496 return (error); 1497 selfdalloc(td, (void *)(uintptr_t)fd); 1498 ev = fo_poll(fp, flags, td->td_ucred, td); 1499 if (only_user) 1500 fput_only_user(fdp, fp); 1501 else 1502 fdrop(fp, td); 1503 if (ev != 0) 1504 n += selsetbits(ibits, obits, idx, bit, ev); 1505 } 1506 } 1507 1508 td->td_retval[0] = n; 1509 return (0); 1510 } 1511 1512 int 1513 sys_poll(struct thread *td, struct poll_args *uap) 1514 { 1515 struct timespec ts, *tsp; 1516 1517 if (uap->timeout != INFTIM) { 1518 if (uap->timeout < 0) 1519 return (EINVAL); 1520 ts.tv_sec = uap->timeout / 1000; 1521 ts.tv_nsec = (uap->timeout % 1000) * 1000000; 1522 tsp = &ts; 1523 } else 1524 tsp = NULL; 1525 1526 return (kern_poll(td, uap->fds, uap->nfds, tsp, NULL)); 1527 } 1528 1529 /* 1530 * kfds points to an array in the kernel. 1531 */ 1532 int 1533 kern_poll_kfds(struct thread *td, struct pollfd *kfds, u_int nfds, 1534 struct timespec *tsp, sigset_t *uset) 1535 { 1536 sbintime_t sbt, precision, tmp; 1537 time_t over; 1538 struct timespec ts; 1539 int error; 1540 1541 precision = 0; 1542 if (tsp != NULL) { 1543 if (!timespecvalid_interval(tsp)) 1544 return (EINVAL); 1545 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) 1546 sbt = 0; 1547 else { 1548 ts = *tsp; 1549 if (ts.tv_sec > INT32_MAX / 2) { 1550 over = ts.tv_sec - INT32_MAX / 2; 1551 ts.tv_sec -= over; 1552 } else 1553 over = 0; 1554 tmp = tstosbt(ts); 1555 precision = tmp; 1556 precision >>= tc_precexp; 1557 if (TIMESEL(&sbt, tmp)) 1558 sbt += tc_tick_sbt; 1559 sbt += tmp; 1560 } 1561 } else 1562 sbt = -1; 1563 1564 if (uset != NULL) { 1565 error = kern_sigprocmask(td, SIG_SETMASK, uset, 1566 &td->td_oldsigmask, 0); 1567 if (error) 1568 return (error); 1569 td->td_pflags |= TDP_OLDMASK; 1570 } 1571 1572 seltdinit(td); 1573 /* Iterate until the timeout expires or descriptors become ready. */ 1574 for (;;) { 1575 error = pollscan(td, kfds, nfds); 1576 if (error || td->td_retval[0] != 0) 1577 break; 1578 error = seltdwait(td, sbt, precision); 1579 if (error) 1580 break; 1581 error = pollrescan(td); 1582 if (error || td->td_retval[0] != 0) 1583 break; 1584 } 1585 seltdclear(td); 1586 1587 /* poll is not restarted after signals... */ 1588 if (error == ERESTART) 1589 error = EINTR; 1590 if (error == EWOULDBLOCK) 1591 error = 0; 1592 1593 if (uset != NULL) { 1594 /* 1595 * Make sure that ast() is called on return to 1596 * usermode and TDP_OLDMASK is cleared, restoring old 1597 * sigmask. If we didn't get interrupted, then the caller is 1598 * likely not expecting a signal to hit that should normally be 1599 * blocked by its signal mask, so we restore the mask before 1600 * any signals could be delivered. 1601 */ 1602 if (error == EINTR) 1603 ast_sched(td, TDA_SIGSUSPEND); 1604 else 1605 ast_sched(td, TDA_PSELECT); 1606 } 1607 1608 return (error); 1609 } 1610 1611 int 1612 sys_ppoll(struct thread *td, struct ppoll_args *uap) 1613 { 1614 struct timespec ts, *tsp; 1615 sigset_t set, *ssp; 1616 int error; 1617 1618 if (uap->ts != NULL) { 1619 error = copyin(uap->ts, &ts, sizeof(ts)); 1620 if (error) 1621 return (error); 1622 tsp = &ts; 1623 } else 1624 tsp = NULL; 1625 if (uap->set != NULL) { 1626 error = copyin(uap->set, &set, sizeof(set)); 1627 if (error) 1628 return (error); 1629 ssp = &set; 1630 } else 1631 ssp = NULL; 1632 return (kern_poll(td, uap->fds, uap->nfds, tsp, ssp)); 1633 } 1634 1635 /* 1636 * ufds points to an array in user space. 1637 */ 1638 int 1639 kern_poll(struct thread *td, struct pollfd *ufds, u_int nfds, 1640 struct timespec *tsp, sigset_t *set) 1641 { 1642 struct pollfd *kfds; 1643 struct pollfd stackfds[32]; 1644 int error; 1645 1646 if (kern_poll_maxfds(nfds)) 1647 return (EINVAL); 1648 if (nfds > nitems(stackfds)) 1649 kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); 1650 else 1651 kfds = stackfds; 1652 error = copyin(ufds, kfds, nfds * sizeof(*kfds)); 1653 if (error != 0) 1654 goto out; 1655 1656 error = kern_poll_kfds(td, kfds, nfds, tsp, set); 1657 if (error == 0) 1658 error = pollout(td, kfds, ufds, nfds); 1659 #ifdef KTRACE 1660 if (error == 0 && KTRPOINT(td, KTR_STRUCT_ARRAY)) 1661 ktrstructarray("pollfd", UIO_USERSPACE, ufds, nfds, 1662 sizeof(*ufds)); 1663 #endif 1664 1665 out: 1666 if (nfds > nitems(stackfds)) 1667 free(kfds, M_TEMP); 1668 return (error); 1669 } 1670 1671 bool 1672 kern_poll_maxfds(u_int nfds) 1673 { 1674 1675 /* 1676 * This is kinda bogus. We have fd limits, but that is not 1677 * really related to the size of the pollfd array. Make sure 1678 * we let the process use at least FD_SETSIZE entries and at 1679 * least enough for the system-wide limits. We want to be reasonably 1680 * safe, but not overly restrictive. 1681 */ 1682 return (nfds > maxfilesperproc && nfds > FD_SETSIZE); 1683 } 1684 1685 static int 1686 pollrescan(struct thread *td) 1687 { 1688 struct seltd *stp; 1689 struct selfd *sfp; 1690 struct selfd *sfn; 1691 struct selinfo *si; 1692 struct filedesc *fdp; 1693 struct file *fp; 1694 struct pollfd *fd; 1695 int n, error; 1696 bool only_user; 1697 1698 n = 0; 1699 fdp = td->td_proc->p_fd; 1700 stp = td->td_sel; 1701 only_user = FILEDESC_IS_ONLY_USER(fdp); 1702 STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) { 1703 fd = (struct pollfd *)sfp->sf_cookie; 1704 si = sfp->sf_si; 1705 selfdfree(stp, sfp); 1706 /* If the selinfo wasn't cleared the event didn't fire. */ 1707 if (si != NULL) 1708 continue; 1709 if (only_user) 1710 error = fget_only_user(fdp, fd->fd, &cap_event_rights, &fp); 1711 else 1712 error = fget_unlocked(td, fd->fd, &cap_event_rights, &fp); 1713 if (__predict_false(error != 0)) { 1714 fd->revents = POLLNVAL; 1715 n++; 1716 continue; 1717 } 1718 /* 1719 * Note: backend also returns POLLHUP and 1720 * POLLERR if appropriate. 1721 */ 1722 fd->revents = fo_poll(fp, fd->events, td->td_ucred, td); 1723 if (only_user) 1724 fput_only_user(fdp, fp); 1725 else 1726 fdrop(fp, td); 1727 if (fd->revents != 0) 1728 n++; 1729 } 1730 stp->st_flags = 0; 1731 td->td_retval[0] = n; 1732 return (0); 1733 } 1734 1735 static int 1736 pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 1737 { 1738 int error = 0; 1739 u_int i = 0; 1740 u_int n = 0; 1741 1742 for (i = 0; i < nfd; i++) { 1743 error = copyout(&fds->revents, &ufds->revents, 1744 sizeof(ufds->revents)); 1745 if (error) 1746 return (error); 1747 if (fds->revents != 0) 1748 n++; 1749 fds++; 1750 ufds++; 1751 } 1752 td->td_retval[0] = n; 1753 return (0); 1754 } 1755 1756 static int 1757 pollscan(struct thread *td, struct pollfd *fds, u_int nfd) 1758 { 1759 struct filedesc *fdp; 1760 struct file *fp; 1761 int i, n, error; 1762 bool only_user; 1763 1764 n = 0; 1765 fdp = td->td_proc->p_fd; 1766 only_user = FILEDESC_IS_ONLY_USER(fdp); 1767 for (i = 0; i < nfd; i++, fds++) { 1768 if (fds->fd < 0) { 1769 fds->revents = 0; 1770 continue; 1771 } 1772 if (only_user) 1773 error = fget_only_user(fdp, fds->fd, &cap_event_rights, &fp); 1774 else 1775 error = fget_unlocked(td, fds->fd, &cap_event_rights, &fp); 1776 if (__predict_false(error != 0)) { 1777 fds->revents = POLLNVAL; 1778 n++; 1779 continue; 1780 } 1781 /* 1782 * Note: backend also returns POLLHUP and 1783 * POLLERR if appropriate. 1784 */ 1785 selfdalloc(td, fds); 1786 fds->revents = fo_poll(fp, fds->events, 1787 td->td_ucred, td); 1788 if (only_user) 1789 fput_only_user(fdp, fp); 1790 else 1791 fdrop(fp, td); 1792 /* 1793 * POSIX requires POLLOUT to be never 1794 * set simultaneously with POLLHUP. 1795 */ 1796 if ((fds->revents & POLLHUP) != 0) 1797 fds->revents &= ~POLLOUT; 1798 1799 if (fds->revents != 0) 1800 n++; 1801 } 1802 td->td_retval[0] = n; 1803 return (0); 1804 } 1805 1806 /* 1807 * XXX This was created specifically to support netncp and netsmb. This 1808 * allows the caller to specify a socket to wait for events on. It returns 1809 * 0 if any events matched and an error otherwise. There is no way to 1810 * determine which events fired. 1811 */ 1812 int 1813 selsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td) 1814 { 1815 struct timeval rtv; 1816 sbintime_t asbt, precision, rsbt; 1817 int error; 1818 1819 precision = 0; /* stupid gcc! */ 1820 if (tvp != NULL) { 1821 rtv = *tvp; 1822 if (rtv.tv_sec < 0 || rtv.tv_usec < 0 || 1823 rtv.tv_usec >= 1000000) 1824 return (EINVAL); 1825 if (!timevalisset(&rtv)) 1826 asbt = 0; 1827 else if (rtv.tv_sec <= INT32_MAX) { 1828 rsbt = tvtosbt(rtv); 1829 precision = rsbt; 1830 precision >>= tc_precexp; 1831 if (TIMESEL(&asbt, rsbt)) 1832 asbt += tc_tick_sbt; 1833 if (asbt <= SBT_MAX - rsbt) 1834 asbt += rsbt; 1835 else 1836 asbt = -1; 1837 } else 1838 asbt = -1; 1839 } else 1840 asbt = -1; 1841 seltdinit(td); 1842 /* 1843 * Iterate until the timeout expires or the socket becomes ready. 1844 */ 1845 for (;;) { 1846 selfdalloc(td, NULL); 1847 if (so->so_proto->pr_sopoll(so, events, td) != 0) { 1848 error = 0; 1849 break; 1850 } 1851 error = seltdwait(td, asbt, precision); 1852 if (error) 1853 break; 1854 } 1855 seltdclear(td); 1856 /* XXX Duplicates ncp/smb behavior. */ 1857 if (error == ERESTART) 1858 error = 0; 1859 return (error); 1860 } 1861 1862 /* 1863 * Preallocate two selfds associated with 'cookie'. Some fo_poll routines 1864 * have two select sets, one for read and another for write. 1865 */ 1866 static void 1867 selfdalloc(struct thread *td, void *cookie) 1868 { 1869 struct seltd *stp; 1870 1871 stp = td->td_sel; 1872 if (stp->st_free1 == NULL) 1873 stp->st_free1 = malloc(sizeof(*stp->st_free1), M_SELFD, M_WAITOK|M_ZERO); 1874 stp->st_free1->sf_td = stp; 1875 stp->st_free1->sf_cookie = cookie; 1876 if (stp->st_free2 == NULL) 1877 stp->st_free2 = malloc(sizeof(*stp->st_free2), M_SELFD, M_WAITOK|M_ZERO); 1878 stp->st_free2->sf_td = stp; 1879 stp->st_free2->sf_cookie = cookie; 1880 } 1881 1882 static void 1883 selfdfree(struct seltd *stp, struct selfd *sfp) 1884 { 1885 STAILQ_REMOVE(&stp->st_selq, sfp, selfd, sf_link); 1886 /* 1887 * Paired with doselwakeup. 1888 */ 1889 if (atomic_load_acq_ptr((uintptr_t *)&sfp->sf_si) != (uintptr_t)NULL) { 1890 mtx_lock(sfp->sf_mtx); 1891 if (sfp->sf_si != NULL) { 1892 TAILQ_REMOVE(&sfp->sf_si->si_tdlist, sfp, sf_threads); 1893 } 1894 mtx_unlock(sfp->sf_mtx); 1895 } 1896 free(sfp, M_SELFD); 1897 } 1898 1899 /* Drain the waiters tied to all the selfd belonging the specified selinfo. */ 1900 void 1901 seldrain(struct selinfo *sip) 1902 { 1903 1904 /* 1905 * This feature is already provided by doselwakeup(), thus it is 1906 * enough to go for it. 1907 * Eventually, the context, should take care to avoid races 1908 * between thread calling select()/poll() and file descriptor 1909 * detaching, but, again, the races are just the same as 1910 * selwakeup(). 1911 */ 1912 doselwakeup(sip, -1); 1913 } 1914 1915 /* 1916 * Record a select request. 1917 */ 1918 void 1919 selrecord(struct thread *selector, struct selinfo *sip) 1920 { 1921 struct selfd *sfp; 1922 struct seltd *stp; 1923 struct mtx *mtxp; 1924 1925 stp = selector->td_sel; 1926 /* 1927 * Don't record when doing a rescan. 1928 */ 1929 if (stp->st_flags & SELTD_RESCAN) 1930 return; 1931 /* 1932 * Grab one of the preallocated descriptors. 1933 */ 1934 sfp = NULL; 1935 if ((sfp = stp->st_free1) != NULL) 1936 stp->st_free1 = NULL; 1937 else if ((sfp = stp->st_free2) != NULL) 1938 stp->st_free2 = NULL; 1939 else 1940 panic("selrecord: No free selfd on selq"); 1941 mtxp = sip->si_mtx; 1942 if (mtxp == NULL) 1943 mtxp = mtx_pool_find(mtxpool_select, sip); 1944 /* 1945 * Initialize the sfp and queue it in the thread. 1946 */ 1947 sfp->sf_si = sip; 1948 sfp->sf_mtx = mtxp; 1949 STAILQ_INSERT_TAIL(&stp->st_selq, sfp, sf_link); 1950 /* 1951 * Now that we've locked the sip, check for initialization. 1952 */ 1953 mtx_lock(mtxp); 1954 if (sip->si_mtx == NULL) { 1955 sip->si_mtx = mtxp; 1956 TAILQ_INIT(&sip->si_tdlist); 1957 } 1958 /* 1959 * Add this thread to the list of selfds listening on this selinfo. 1960 */ 1961 TAILQ_INSERT_TAIL(&sip->si_tdlist, sfp, sf_threads); 1962 mtx_unlock(sip->si_mtx); 1963 } 1964 1965 /* Wake up a selecting thread. */ 1966 void 1967 selwakeup(struct selinfo *sip) 1968 { 1969 doselwakeup(sip, -1); 1970 } 1971 1972 /* Wake up a selecting thread, and set its priority. */ 1973 void 1974 selwakeuppri(struct selinfo *sip, int pri) 1975 { 1976 doselwakeup(sip, pri); 1977 } 1978 1979 /* 1980 * Do a wakeup when a selectable event occurs. 1981 */ 1982 static void 1983 doselwakeup(struct selinfo *sip, int pri) 1984 { 1985 struct selfd *sfp; 1986 struct selfd *sfn; 1987 struct seltd *stp; 1988 1989 /* If it's not initialized there can't be any waiters. */ 1990 if (sip->si_mtx == NULL) 1991 return; 1992 /* 1993 * Locking the selinfo locks all selfds associated with it. 1994 */ 1995 mtx_lock(sip->si_mtx); 1996 TAILQ_FOREACH_SAFE(sfp, &sip->si_tdlist, sf_threads, sfn) { 1997 /* 1998 * Once we remove this sfp from the list and clear the 1999 * sf_si seltdclear will know to ignore this si. 2000 */ 2001 TAILQ_REMOVE(&sip->si_tdlist, sfp, sf_threads); 2002 stp = sfp->sf_td; 2003 mtx_lock(&stp->st_mtx); 2004 stp->st_flags |= SELTD_PENDING; 2005 cv_broadcastpri(&stp->st_wait, pri); 2006 mtx_unlock(&stp->st_mtx); 2007 /* 2008 * Paired with selfdfree. 2009 * 2010 * Storing this only after the wakeup provides an invariant that 2011 * stp is not used after selfdfree returns. 2012 */ 2013 atomic_store_rel_ptr((uintptr_t *)&sfp->sf_si, (uintptr_t)NULL); 2014 } 2015 mtx_unlock(sip->si_mtx); 2016 } 2017 2018 static void 2019 seltdinit(struct thread *td) 2020 { 2021 struct seltd *stp; 2022 2023 stp = td->td_sel; 2024 if (stp != NULL) { 2025 MPASS(stp->st_flags == 0); 2026 MPASS(STAILQ_EMPTY(&stp->st_selq)); 2027 return; 2028 } 2029 stp = malloc(sizeof(*stp), M_SELECT, M_WAITOK|M_ZERO); 2030 mtx_init(&stp->st_mtx, "sellck", NULL, MTX_DEF); 2031 cv_init(&stp->st_wait, "select"); 2032 stp->st_flags = 0; 2033 STAILQ_INIT(&stp->st_selq); 2034 td->td_sel = stp; 2035 } 2036 2037 static int 2038 seltdwait(struct thread *td, sbintime_t sbt, sbintime_t precision) 2039 { 2040 struct seltd *stp; 2041 int error; 2042 2043 stp = td->td_sel; 2044 /* 2045 * An event of interest may occur while we do not hold the seltd 2046 * locked so check the pending flag before we sleep. 2047 */ 2048 mtx_lock(&stp->st_mtx); 2049 /* 2050 * Any further calls to selrecord will be a rescan. 2051 */ 2052 stp->st_flags |= SELTD_RESCAN; 2053 if (stp->st_flags & SELTD_PENDING) { 2054 mtx_unlock(&stp->st_mtx); 2055 return (0); 2056 } 2057 if (sbt == 0) 2058 error = EWOULDBLOCK; 2059 else if (sbt != -1) 2060 error = cv_timedwait_sig_sbt(&stp->st_wait, &stp->st_mtx, 2061 sbt, precision, C_ABSOLUTE); 2062 else 2063 error = cv_wait_sig(&stp->st_wait, &stp->st_mtx); 2064 mtx_unlock(&stp->st_mtx); 2065 2066 return (error); 2067 } 2068 2069 void 2070 seltdfini(struct thread *td) 2071 { 2072 struct seltd *stp; 2073 2074 stp = td->td_sel; 2075 if (stp == NULL) 2076 return; 2077 MPASS(stp->st_flags == 0); 2078 MPASS(STAILQ_EMPTY(&stp->st_selq)); 2079 if (stp->st_free1) 2080 free(stp->st_free1, M_SELFD); 2081 if (stp->st_free2) 2082 free(stp->st_free2, M_SELFD); 2083 td->td_sel = NULL; 2084 cv_destroy(&stp->st_wait); 2085 mtx_destroy(&stp->st_mtx); 2086 free(stp, M_SELECT); 2087 } 2088 2089 /* 2090 * Remove the references to the thread from all of the objects we were 2091 * polling. 2092 */ 2093 static void 2094 seltdclear(struct thread *td) 2095 { 2096 struct seltd *stp; 2097 struct selfd *sfp; 2098 struct selfd *sfn; 2099 2100 stp = td->td_sel; 2101 STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) 2102 selfdfree(stp, sfp); 2103 stp->st_flags = 0; 2104 } 2105 2106 static void selectinit(void *); 2107 SYSINIT(select, SI_SUB_SYSCALLS, SI_ORDER_ANY, selectinit, NULL); 2108 static void 2109 selectinit(void *dummy __unused) 2110 { 2111 2112 mtxpool_select = mtx_pool_create("select mtxpool", 128, MTX_DEF); 2113 } 2114 2115 /* 2116 * Set up a syscall return value that follows the convention specified for 2117 * posix_* functions. 2118 */ 2119 int 2120 kern_posix_error(struct thread *td, int error) 2121 { 2122 2123 if (error <= 0) 2124 return (error); 2125 td->td_errno = error; 2126 td->td_pflags |= TDP_NERRNO; 2127 td->td_retval[0] = error; 2128 return (0); 2129 } 2130 2131 int 2132 kcmp_cmp(uintptr_t a, uintptr_t b) 2133 { 2134 if (a == b) 2135 return (0); 2136 else if (a < b) 2137 return (1); 2138 return (2); 2139 } 2140 2141 static int 2142 kcmp_pget(struct thread *td, pid_t pid, struct proc **pp) 2143 { 2144 int error; 2145 2146 if (pid == td->td_proc->p_pid) { 2147 *pp = td->td_proc; 2148 return (0); 2149 } 2150 error = pget(pid, PGET_NOTID | PGET_CANDEBUG | PGET_NOTWEXIT | 2151 PGET_HOLD, pp); 2152 MPASS(*pp != td->td_proc); 2153 return (error); 2154 } 2155 2156 int 2157 kern_kcmp(struct thread *td, pid_t pid1, pid_t pid2, int type, 2158 uintptr_t idx1, uintptr_t idx2) 2159 { 2160 struct proc *p1, *p2; 2161 struct file *fp1, *fp2; 2162 int error, res; 2163 2164 res = -1; 2165 p1 = p2 = NULL; 2166 error = kcmp_pget(td, pid1, &p1); 2167 if (error == 0) 2168 error = kcmp_pget(td, pid2, &p2); 2169 if (error != 0) 2170 goto out; 2171 2172 switch (type) { 2173 case KCMP_FILE: 2174 case KCMP_FILEOBJ: 2175 error = fget_remote(td, p1, idx1, &fp1); 2176 if (error == 0) { 2177 error = fget_remote(td, p2, idx2, &fp2); 2178 if (error == 0) { 2179 if (type == KCMP_FILEOBJ) 2180 res = fo_cmp(fp1, fp2, td); 2181 else 2182 res = kcmp_cmp((uintptr_t)fp1, 2183 (uintptr_t)fp2); 2184 fdrop(fp2, td); 2185 } 2186 fdrop(fp1, td); 2187 } 2188 break; 2189 case KCMP_FILES: 2190 res = kcmp_cmp((uintptr_t)p1->p_fd, (uintptr_t)p2->p_fd); 2191 break; 2192 case KCMP_SIGHAND: 2193 res = kcmp_cmp((uintptr_t)p1->p_sigacts, 2194 (uintptr_t)p2->p_sigacts); 2195 break; 2196 case KCMP_VM: 2197 res = kcmp_cmp((uintptr_t)p1->p_vmspace, 2198 (uintptr_t)p2->p_vmspace); 2199 break; 2200 default: 2201 error = EINVAL; 2202 break; 2203 } 2204 2205 out: 2206 if (p1 != NULL && p1 != td->td_proc) 2207 PRELE(p1); 2208 if (p2 != NULL && p2 != td->td_proc) 2209 PRELE(p2); 2210 2211 td->td_retval[0] = res; 2212 return (error); 2213 } 2214 2215 int 2216 sys_kcmp(struct thread *td, struct kcmp_args *uap) 2217 { 2218 return (kern_kcmp(td, uap->pid1, uap->pid2, uap->type, 2219 uap->idx1, uap->idx2)); 2220 } 2221 2222 int 2223 file_kcmp_generic(struct file *fp1, struct file *fp2, struct thread *td) 2224 { 2225 if (fp1->f_type != fp2->f_type) 2226 return (3); 2227 return (kcmp_cmp((uintptr_t)fp1->f_data, (uintptr_t)fp2->f_data)); 2228 } 2229 2230 int 2231 exterr_to_ue(struct thread *td, struct uexterror *ue) 2232 { 2233 if ((td->td_pflags2 & TDP2_EXTERR) == 0) 2234 return (ENOENT); 2235 2236 memset(ue, 0, sizeof(*ue)); 2237 ue->error = td->td_kexterr.error; 2238 ue->cat = td->td_kexterr.cat; 2239 ue->src_line = td->td_kexterr.src_line; 2240 ue->p1 = td->td_kexterr.p1; 2241 ue->p2 = td->td_kexterr.p2; 2242 if (td->td_kexterr.msg != NULL) 2243 strlcpy(ue->msg, td->td_kexterr.msg, sizeof(ue->msg)); 2244 return (0); 2245 } 2246 2247 void 2248 exterr_copyout(struct thread *td) 2249 { 2250 struct uexterror ue; 2251 ksiginfo_t ksi; 2252 void *uloc; 2253 size_t sz; 2254 int error; 2255 2256 MPASS((td->td_pflags2 & TDP2_UEXTERR) != 0); 2257 2258 uloc = (char *)td->td_exterr_ptr + __offsetof(struct uexterror, 2259 error); 2260 error = exterr_to_ue(td, &ue); 2261 if (error != 0) { 2262 ue.error = 0; 2263 sz = sizeof(ue.error); 2264 } else { 2265 sz = sizeof(ue) - __offsetof(struct uexterror, error); 2266 } 2267 error = copyout(&ue.error, uloc, sz); 2268 if (error != 0) { 2269 td->td_pflags2 &= ~TDP2_UEXTERR; 2270 ksiginfo_init_trap(&ksi); 2271 ksi.ksi_signo = SIGSEGV; 2272 ksi.ksi_code = SEGV_ACCERR; 2273 ksi.ksi_addr = uloc; 2274 trapsignal(td, &ksi); 2275 } 2276 } 2277 2278 int 2279 sys_exterrctl(struct thread *td, struct exterrctl_args *uap) 2280 { 2281 uint32_t ver; 2282 int error; 2283 2284 if ((uap->flags & ~(EXTERRCTLF_FORCE)) != 0) 2285 return (EINVAL); 2286 switch (uap->op) { 2287 case EXTERRCTL_ENABLE: 2288 if ((td->td_pflags2 & TDP2_UEXTERR) != 0 && 2289 (uap->flags & EXTERRCTLF_FORCE) == 0) 2290 return (EBUSY); 2291 td->td_pflags2 &= ~TDP2_UEXTERR; 2292 error = copyin(uap->ptr, &ver, sizeof(ver)); 2293 if (error != 0) 2294 return (error); 2295 if (ver != UEXTERROR_VER) 2296 return (EINVAL); 2297 td->td_pflags2 |= TDP2_UEXTERR; 2298 td->td_exterr_ptr = uap->ptr; 2299 return (0); 2300 case EXTERRCTL_DISABLE: 2301 if ((td->td_pflags2 & TDP2_UEXTERR) == 0) 2302 return (EINVAL); 2303 td->td_pflags2 &= ~TDP2_UEXTERR; 2304 return (0); 2305 default: 2306 return (EINVAL); 2307 } 2308 } 2309 2310 int 2311 exterr_set(int eerror, int category, const char *mmsg, uintptr_t pp1, 2312 uintptr_t pp2, int line) 2313 { 2314 struct thread *td; 2315 2316 td = curthread; 2317 if ((td->td_pflags2 & TDP2_UEXTERR) != 0) { 2318 td->td_pflags2 |= TDP2_EXTERR; 2319 td->td_kexterr.error = eerror; 2320 td->td_kexterr.cat = category; 2321 td->td_kexterr.msg = mmsg; 2322 td->td_kexterr.p1 = pp1; 2323 td->td_kexterr.p2 = pp2; 2324 td->td_kexterr.src_line = line; 2325 ktrexterr(td); 2326 } 2327 return (eerror); 2328 } 2329