1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_capsicum.h" 39 #include "opt_inet.h" 40 #include "opt_inet6.h" 41 #include "opt_sctp.h" 42 #include "opt_compat.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/capability.h> 48 #include <sys/condvar.h> 49 #include <sys/kernel.h> 50 #include <sys/lock.h> 51 #include <sys/mutex.h> 52 #include <sys/sysproto.h> 53 #include <sys/malloc.h> 54 #include <sys/filedesc.h> 55 #include <sys/event.h> 56 #include <sys/proc.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/filio.h> 60 #include <sys/jail.h> 61 #include <sys/mman.h> 62 #include <sys/mount.h> 63 #include <sys/mbuf.h> 64 #include <sys/protosw.h> 65 #include <sys/rwlock.h> 66 #include <sys/sf_buf.h> 67 #include <sys/sf_sync.h> 68 #include <sys/sysent.h> 69 #include <sys/socket.h> 70 #include <sys/socketvar.h> 71 #include <sys/signalvar.h> 72 #include <sys/syscallsubr.h> 73 #include <sys/sysctl.h> 74 #include <sys/uio.h> 75 #include <sys/vnode.h> 76 #ifdef KTRACE 77 #include <sys/ktrace.h> 78 #endif 79 #ifdef COMPAT_FREEBSD32 80 #include <compat/freebsd32/freebsd32_util.h> 81 #endif 82 83 #include <net/vnet.h> 84 85 #include <security/audit/audit.h> 86 #include <security/mac/mac_framework.h> 87 88 #include <vm/vm.h> 89 #include <vm/vm_param.h> 90 #include <vm/vm_object.h> 91 #include <vm/vm_page.h> 92 #include <vm/vm_pager.h> 93 #include <vm/vm_kern.h> 94 #include <vm/vm_extern.h> 95 96 #if defined(INET) || defined(INET6) 97 #ifdef SCTP 98 #include <netinet/sctp.h> 99 #include <netinet/sctp_peeloff.h> 100 #endif /* SCTP */ 101 #endif /* INET || INET6 */ 102 103 /* 104 * Flags for accept1() and kern_accept4(), in addition to SOCK_CLOEXEC 105 * and SOCK_NONBLOCK. 106 */ 107 #define ACCEPT4_INHERIT 0x1 108 #define ACCEPT4_COMPAT 0x2 109 110 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 111 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 112 113 static int accept1(struct thread *td, int s, struct sockaddr *uname, 114 socklen_t *anamelen, int flags); 115 static int do_sendfile(struct thread *td, struct sendfile_args *uap, 116 int compat); 117 static int getsockname1(struct thread *td, struct getsockname_args *uap, 118 int compat); 119 static int getpeername1(struct thread *td, struct getpeername_args *uap, 120 int compat); 121 122 counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; 123 124 /* 125 * sendfile(2)-related variables and associated sysctls 126 */ 127 static SYSCTL_NODE(_kern_ipc, OID_AUTO, sendfile, CTLFLAG_RW, 0, 128 "sendfile(2) tunables"); 129 static int sfreadahead = 1; 130 SYSCTL_INT(_kern_ipc_sendfile, OID_AUTO, readahead, CTLFLAG_RW, 131 &sfreadahead, 0, "Number of sendfile(2) read-ahead MAXBSIZE blocks"); 132 133 134 static void 135 sfstat_init(const void *unused) 136 { 137 138 COUNTER_ARRAY_ALLOC(sfstat, sizeof(struct sfstat) / sizeof(uint64_t), 139 M_WAITOK); 140 } 141 SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL); 142 143 static int 144 sfstat_sysctl(SYSCTL_HANDLER_ARGS) 145 { 146 struct sfstat s; 147 148 COUNTER_ARRAY_COPY(sfstat, &s, sizeof(s) / sizeof(uint64_t)); 149 if (req->newptr) 150 COUNTER_ARRAY_ZERO(sfstat, sizeof(s) / sizeof(uint64_t)); 151 return (SYSCTL_OUT(req, &s, sizeof(s))); 152 } 153 SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW, 154 NULL, 0, sfstat_sysctl, "I", "sendfile statistics"); 155 156 /* 157 * Convert a user file descriptor to a kernel file entry and check if required 158 * capability rights are present. 159 * A reference on the file entry is held upon returning. 160 */ 161 static int 162 getsock_cap(struct filedesc *fdp, int fd, cap_rights_t *rightsp, 163 struct file **fpp, u_int *fflagp) 164 { 165 struct file *fp; 166 int error; 167 168 error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL); 169 if (error != 0) 170 return (error); 171 if (fp->f_type != DTYPE_SOCKET) { 172 fdrop(fp, curthread); 173 return (ENOTSOCK); 174 } 175 if (fflagp != NULL) 176 *fflagp = fp->f_flag; 177 *fpp = fp; 178 return (0); 179 } 180 181 /* 182 * System call interface to the socket abstraction. 183 */ 184 #if defined(COMPAT_43) 185 #define COMPAT_OLDSOCK 186 #endif 187 188 int 189 sys_socket(td, uap) 190 struct thread *td; 191 struct socket_args /* { 192 int domain; 193 int type; 194 int protocol; 195 } */ *uap; 196 { 197 struct socket *so; 198 struct file *fp; 199 int fd, error, type, oflag, fflag; 200 201 AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol); 202 203 type = uap->type; 204 oflag = 0; 205 fflag = 0; 206 if ((type & SOCK_CLOEXEC) != 0) { 207 type &= ~SOCK_CLOEXEC; 208 oflag |= O_CLOEXEC; 209 } 210 if ((type & SOCK_NONBLOCK) != 0) { 211 type &= ~SOCK_NONBLOCK; 212 fflag |= FNONBLOCK; 213 } 214 215 #ifdef MAC 216 error = mac_socket_check_create(td->td_ucred, uap->domain, type, 217 uap->protocol); 218 if (error != 0) 219 return (error); 220 #endif 221 error = falloc(td, &fp, &fd, oflag); 222 if (error != 0) 223 return (error); 224 /* An extra reference on `fp' has been held for us by falloc(). */ 225 error = socreate(uap->domain, &so, type, uap->protocol, 226 td->td_ucred, td); 227 if (error != 0) { 228 fdclose(td->td_proc->p_fd, fp, fd, td); 229 } else { 230 finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops); 231 if ((fflag & FNONBLOCK) != 0) 232 (void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td); 233 td->td_retval[0] = fd; 234 } 235 fdrop(fp, td); 236 return (error); 237 } 238 239 /* ARGSUSED */ 240 int 241 sys_bind(td, uap) 242 struct thread *td; 243 struct bind_args /* { 244 int s; 245 caddr_t name; 246 int namelen; 247 } */ *uap; 248 { 249 struct sockaddr *sa; 250 int error; 251 252 error = getsockaddr(&sa, uap->name, uap->namelen); 253 if (error == 0) { 254 error = kern_bind(td, uap->s, sa); 255 free(sa, M_SONAME); 256 } 257 return (error); 258 } 259 260 static int 261 kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa) 262 { 263 struct socket *so; 264 struct file *fp; 265 cap_rights_t rights; 266 int error; 267 268 AUDIT_ARG_FD(fd); 269 AUDIT_ARG_SOCKADDR(td, dirfd, sa); 270 error = getsock_cap(td->td_proc->p_fd, fd, 271 cap_rights_init(&rights, CAP_BIND), &fp, NULL); 272 if (error != 0) 273 return (error); 274 so = fp->f_data; 275 #ifdef KTRACE 276 if (KTRPOINT(td, KTR_STRUCT)) 277 ktrsockaddr(sa); 278 #endif 279 #ifdef MAC 280 error = mac_socket_check_bind(td->td_ucred, so, sa); 281 if (error == 0) { 282 #endif 283 if (dirfd == AT_FDCWD) 284 error = sobind(so, sa, td); 285 else 286 error = sobindat(dirfd, so, sa, td); 287 #ifdef MAC 288 } 289 #endif 290 fdrop(fp, td); 291 return (error); 292 } 293 294 int 295 kern_bind(struct thread *td, int fd, struct sockaddr *sa) 296 { 297 298 return (kern_bindat(td, AT_FDCWD, fd, sa)); 299 } 300 301 /* ARGSUSED */ 302 int 303 sys_bindat(td, uap) 304 struct thread *td; 305 struct bindat_args /* { 306 int fd; 307 int s; 308 caddr_t name; 309 int namelen; 310 } */ *uap; 311 { 312 struct sockaddr *sa; 313 int error; 314 315 error = getsockaddr(&sa, uap->name, uap->namelen); 316 if (error == 0) { 317 error = kern_bindat(td, uap->fd, uap->s, sa); 318 free(sa, M_SONAME); 319 } 320 return (error); 321 } 322 323 /* ARGSUSED */ 324 int 325 sys_listen(td, uap) 326 struct thread *td; 327 struct listen_args /* { 328 int s; 329 int backlog; 330 } */ *uap; 331 { 332 struct socket *so; 333 struct file *fp; 334 cap_rights_t rights; 335 int error; 336 337 AUDIT_ARG_FD(uap->s); 338 error = getsock_cap(td->td_proc->p_fd, uap->s, 339 cap_rights_init(&rights, CAP_LISTEN), &fp, NULL); 340 if (error == 0) { 341 so = fp->f_data; 342 #ifdef MAC 343 error = mac_socket_check_listen(td->td_ucred, so); 344 if (error == 0) 345 #endif 346 error = solisten(so, uap->backlog, td); 347 fdrop(fp, td); 348 } 349 return(error); 350 } 351 352 /* 353 * accept1() 354 */ 355 static int 356 accept1(td, s, uname, anamelen, flags) 357 struct thread *td; 358 int s; 359 struct sockaddr *uname; 360 socklen_t *anamelen; 361 int flags; 362 { 363 struct sockaddr *name; 364 socklen_t namelen; 365 struct file *fp; 366 int error; 367 368 if (uname == NULL) 369 return (kern_accept4(td, s, NULL, NULL, flags, NULL)); 370 371 error = copyin(anamelen, &namelen, sizeof (namelen)); 372 if (error != 0) 373 return (error); 374 375 error = kern_accept4(td, s, &name, &namelen, flags, &fp); 376 377 /* 378 * return a namelen of zero for older code which might 379 * ignore the return value from accept. 380 */ 381 if (error != 0) { 382 (void) copyout(&namelen, anamelen, sizeof(*anamelen)); 383 return (error); 384 } 385 386 if (error == 0 && uname != NULL) { 387 #ifdef COMPAT_OLDSOCK 388 if (flags & ACCEPT4_COMPAT) 389 ((struct osockaddr *)name)->sa_family = 390 name->sa_family; 391 #endif 392 error = copyout(name, uname, namelen); 393 } 394 if (error == 0) 395 error = copyout(&namelen, anamelen, 396 sizeof(namelen)); 397 if (error != 0) 398 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td); 399 fdrop(fp, td); 400 free(name, M_SONAME); 401 return (error); 402 } 403 404 int 405 kern_accept(struct thread *td, int s, struct sockaddr **name, 406 socklen_t *namelen, struct file **fp) 407 { 408 return (kern_accept4(td, s, name, namelen, ACCEPT4_INHERIT, fp)); 409 } 410 411 int 412 kern_accept4(struct thread *td, int s, struct sockaddr **name, 413 socklen_t *namelen, int flags, struct file **fp) 414 { 415 struct filedesc *fdp; 416 struct file *headfp, *nfp = NULL; 417 struct sockaddr *sa = NULL; 418 struct socket *head, *so; 419 cap_rights_t rights; 420 u_int fflag; 421 pid_t pgid; 422 int error, fd, tmp; 423 424 if (name != NULL) 425 *name = NULL; 426 427 AUDIT_ARG_FD(s); 428 fdp = td->td_proc->p_fd; 429 error = getsock_cap(fdp, s, cap_rights_init(&rights, CAP_ACCEPT), 430 &headfp, &fflag); 431 if (error != 0) 432 return (error); 433 head = headfp->f_data; 434 if ((head->so_options & SO_ACCEPTCONN) == 0) { 435 error = EINVAL; 436 goto done; 437 } 438 #ifdef MAC 439 error = mac_socket_check_accept(td->td_ucred, head); 440 if (error != 0) 441 goto done; 442 #endif 443 error = falloc(td, &nfp, &fd, (flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0); 444 if (error != 0) 445 goto done; 446 ACCEPT_LOCK(); 447 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 448 ACCEPT_UNLOCK(); 449 error = EWOULDBLOCK; 450 goto noconnection; 451 } 452 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 453 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 454 head->so_error = ECONNABORTED; 455 break; 456 } 457 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 458 "accept", 0); 459 if (error != 0) { 460 ACCEPT_UNLOCK(); 461 goto noconnection; 462 } 463 } 464 if (head->so_error) { 465 error = head->so_error; 466 head->so_error = 0; 467 ACCEPT_UNLOCK(); 468 goto noconnection; 469 } 470 so = TAILQ_FIRST(&head->so_comp); 471 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 472 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 473 474 /* 475 * Before changing the flags on the socket, we have to bump the 476 * reference count. Otherwise, if the protocol calls sofree(), 477 * the socket will be released due to a zero refcount. 478 */ 479 SOCK_LOCK(so); /* soref() and so_state update */ 480 soref(so); /* file descriptor reference */ 481 482 TAILQ_REMOVE(&head->so_comp, so, so_list); 483 head->so_qlen--; 484 if (flags & ACCEPT4_INHERIT) 485 so->so_state |= (head->so_state & SS_NBIO); 486 else 487 so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0; 488 so->so_qstate &= ~SQ_COMP; 489 so->so_head = NULL; 490 491 SOCK_UNLOCK(so); 492 ACCEPT_UNLOCK(); 493 494 /* An extra reference on `nfp' has been held for us by falloc(). */ 495 td->td_retval[0] = fd; 496 497 /* connection has been removed from the listen queue */ 498 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 499 500 if (flags & ACCEPT4_INHERIT) { 501 pgid = fgetown(&head->so_sigio); 502 if (pgid != 0) 503 fsetown(pgid, &so->so_sigio); 504 } else { 505 fflag &= ~(FNONBLOCK | FASYNC); 506 if (flags & SOCK_NONBLOCK) 507 fflag |= FNONBLOCK; 508 } 509 510 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 511 /* Sync socket nonblocking/async state with file flags */ 512 tmp = fflag & FNONBLOCK; 513 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 514 tmp = fflag & FASYNC; 515 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 516 sa = 0; 517 error = soaccept(so, &sa); 518 if (error != 0) { 519 /* 520 * return a namelen of zero for older code which might 521 * ignore the return value from accept. 522 */ 523 if (name) 524 *namelen = 0; 525 goto noconnection; 526 } 527 if (sa == NULL) { 528 if (name) 529 *namelen = 0; 530 goto done; 531 } 532 AUDIT_ARG_SOCKADDR(td, AT_FDCWD, sa); 533 if (name) { 534 /* check sa_len before it is destroyed */ 535 if (*namelen > sa->sa_len) 536 *namelen = sa->sa_len; 537 #ifdef KTRACE 538 if (KTRPOINT(td, KTR_STRUCT)) 539 ktrsockaddr(sa); 540 #endif 541 *name = sa; 542 sa = NULL; 543 } 544 noconnection: 545 free(sa, M_SONAME); 546 547 /* 548 * close the new descriptor, assuming someone hasn't ripped it 549 * out from under us. 550 */ 551 if (error != 0) 552 fdclose(fdp, nfp, fd, td); 553 554 /* 555 * Release explicitly held references before returning. We return 556 * a reference on nfp to the caller on success if they request it. 557 */ 558 done: 559 if (fp != NULL) { 560 if (error == 0) { 561 *fp = nfp; 562 nfp = NULL; 563 } else 564 *fp = NULL; 565 } 566 if (nfp != NULL) 567 fdrop(nfp, td); 568 fdrop(headfp, td); 569 return (error); 570 } 571 572 int 573 sys_accept(td, uap) 574 struct thread *td; 575 struct accept_args *uap; 576 { 577 578 return (accept1(td, uap->s, uap->name, uap->anamelen, ACCEPT4_INHERIT)); 579 } 580 581 int 582 sys_accept4(td, uap) 583 struct thread *td; 584 struct accept4_args *uap; 585 { 586 587 if (uap->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 588 return (EINVAL); 589 590 return (accept1(td, uap->s, uap->name, uap->anamelen, uap->flags)); 591 } 592 593 #ifdef COMPAT_OLDSOCK 594 int 595 oaccept(td, uap) 596 struct thread *td; 597 struct accept_args *uap; 598 { 599 600 return (accept1(td, uap->s, uap->name, uap->anamelen, 601 ACCEPT4_INHERIT | ACCEPT4_COMPAT)); 602 } 603 #endif /* COMPAT_OLDSOCK */ 604 605 /* ARGSUSED */ 606 int 607 sys_connect(td, uap) 608 struct thread *td; 609 struct connect_args /* { 610 int s; 611 caddr_t name; 612 int namelen; 613 } */ *uap; 614 { 615 struct sockaddr *sa; 616 int error; 617 618 error = getsockaddr(&sa, uap->name, uap->namelen); 619 if (error == 0) { 620 error = kern_connect(td, uap->s, sa); 621 free(sa, M_SONAME); 622 } 623 return (error); 624 } 625 626 static int 627 kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa) 628 { 629 struct socket *so; 630 struct file *fp; 631 cap_rights_t rights; 632 int error, interrupted = 0; 633 634 AUDIT_ARG_FD(fd); 635 AUDIT_ARG_SOCKADDR(td, dirfd, sa); 636 error = getsock_cap(td->td_proc->p_fd, fd, 637 cap_rights_init(&rights, CAP_CONNECT), &fp, NULL); 638 if (error != 0) 639 return (error); 640 so = fp->f_data; 641 if (so->so_state & SS_ISCONNECTING) { 642 error = EALREADY; 643 goto done1; 644 } 645 #ifdef KTRACE 646 if (KTRPOINT(td, KTR_STRUCT)) 647 ktrsockaddr(sa); 648 #endif 649 #ifdef MAC 650 error = mac_socket_check_connect(td->td_ucred, so, sa); 651 if (error != 0) 652 goto bad; 653 #endif 654 if (dirfd == AT_FDCWD) 655 error = soconnect(so, sa, td); 656 else 657 error = soconnectat(dirfd, so, sa, td); 658 if (error != 0) 659 goto bad; 660 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 661 error = EINPROGRESS; 662 goto done1; 663 } 664 SOCK_LOCK(so); 665 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 666 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 667 "connec", 0); 668 if (error != 0) { 669 if (error == EINTR || error == ERESTART) 670 interrupted = 1; 671 break; 672 } 673 } 674 if (error == 0) { 675 error = so->so_error; 676 so->so_error = 0; 677 } 678 SOCK_UNLOCK(so); 679 bad: 680 if (!interrupted) 681 so->so_state &= ~SS_ISCONNECTING; 682 if (error == ERESTART) 683 error = EINTR; 684 done1: 685 fdrop(fp, td); 686 return (error); 687 } 688 689 int 690 kern_connect(struct thread *td, int fd, struct sockaddr *sa) 691 { 692 693 return (kern_connectat(td, AT_FDCWD, fd, sa)); 694 } 695 696 /* ARGSUSED */ 697 int 698 sys_connectat(td, uap) 699 struct thread *td; 700 struct connectat_args /* { 701 int fd; 702 int s; 703 caddr_t name; 704 int namelen; 705 } */ *uap; 706 { 707 struct sockaddr *sa; 708 int error; 709 710 error = getsockaddr(&sa, uap->name, uap->namelen); 711 if (error == 0) { 712 error = kern_connectat(td, uap->fd, uap->s, sa); 713 free(sa, M_SONAME); 714 } 715 return (error); 716 } 717 718 int 719 kern_socketpair(struct thread *td, int domain, int type, int protocol, 720 int *rsv) 721 { 722 struct filedesc *fdp = td->td_proc->p_fd; 723 struct file *fp1, *fp2; 724 struct socket *so1, *so2; 725 int fd, error, oflag, fflag; 726 727 AUDIT_ARG_SOCKET(domain, type, protocol); 728 729 oflag = 0; 730 fflag = 0; 731 if ((type & SOCK_CLOEXEC) != 0) { 732 type &= ~SOCK_CLOEXEC; 733 oflag |= O_CLOEXEC; 734 } 735 if ((type & SOCK_NONBLOCK) != 0) { 736 type &= ~SOCK_NONBLOCK; 737 fflag |= FNONBLOCK; 738 } 739 #ifdef MAC 740 /* We might want to have a separate check for socket pairs. */ 741 error = mac_socket_check_create(td->td_ucred, domain, type, 742 protocol); 743 if (error != 0) 744 return (error); 745 #endif 746 error = socreate(domain, &so1, type, protocol, td->td_ucred, td); 747 if (error != 0) 748 return (error); 749 error = socreate(domain, &so2, type, protocol, td->td_ucred, td); 750 if (error != 0) 751 goto free1; 752 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 753 error = falloc(td, &fp1, &fd, oflag); 754 if (error != 0) 755 goto free2; 756 rsv[0] = fd; 757 fp1->f_data = so1; /* so1 already has ref count */ 758 error = falloc(td, &fp2, &fd, oflag); 759 if (error != 0) 760 goto free3; 761 fp2->f_data = so2; /* so2 already has ref count */ 762 rsv[1] = fd; 763 error = soconnect2(so1, so2); 764 if (error != 0) 765 goto free4; 766 if (type == SOCK_DGRAM) { 767 /* 768 * Datagram socket connection is asymmetric. 769 */ 770 error = soconnect2(so2, so1); 771 if (error != 0) 772 goto free4; 773 } 774 finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data, 775 &socketops); 776 finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data, 777 &socketops); 778 if ((fflag & FNONBLOCK) != 0) { 779 (void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td); 780 (void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td); 781 } 782 fdrop(fp1, td); 783 fdrop(fp2, td); 784 return (0); 785 free4: 786 fdclose(fdp, fp2, rsv[1], td); 787 fdrop(fp2, td); 788 free3: 789 fdclose(fdp, fp1, rsv[0], td); 790 fdrop(fp1, td); 791 free2: 792 if (so2 != NULL) 793 (void)soclose(so2); 794 free1: 795 if (so1 != NULL) 796 (void)soclose(so1); 797 return (error); 798 } 799 800 int 801 sys_socketpair(struct thread *td, struct socketpair_args *uap) 802 { 803 int error, sv[2]; 804 805 error = kern_socketpair(td, uap->domain, uap->type, 806 uap->protocol, sv); 807 if (error != 0) 808 return (error); 809 error = copyout(sv, uap->rsv, 2 * sizeof(int)); 810 if (error != 0) { 811 (void)kern_close(td, sv[0]); 812 (void)kern_close(td, sv[1]); 813 } 814 return (error); 815 } 816 817 static int 818 sendit(td, s, mp, flags) 819 struct thread *td; 820 int s; 821 struct msghdr *mp; 822 int flags; 823 { 824 struct mbuf *control; 825 struct sockaddr *to; 826 int error; 827 828 #ifdef CAPABILITY_MODE 829 if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL)) 830 return (ECAPMODE); 831 #endif 832 833 if (mp->msg_name != NULL) { 834 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 835 if (error != 0) { 836 to = NULL; 837 goto bad; 838 } 839 mp->msg_name = to; 840 } else { 841 to = NULL; 842 } 843 844 if (mp->msg_control) { 845 if (mp->msg_controllen < sizeof(struct cmsghdr) 846 #ifdef COMPAT_OLDSOCK 847 && mp->msg_flags != MSG_COMPAT 848 #endif 849 ) { 850 error = EINVAL; 851 goto bad; 852 } 853 error = sockargs(&control, mp->msg_control, 854 mp->msg_controllen, MT_CONTROL); 855 if (error != 0) 856 goto bad; 857 #ifdef COMPAT_OLDSOCK 858 if (mp->msg_flags == MSG_COMPAT) { 859 struct cmsghdr *cm; 860 861 M_PREPEND(control, sizeof(*cm), M_WAITOK); 862 cm = mtod(control, struct cmsghdr *); 863 cm->cmsg_len = control->m_len; 864 cm->cmsg_level = SOL_SOCKET; 865 cm->cmsg_type = SCM_RIGHTS; 866 } 867 #endif 868 } else { 869 control = NULL; 870 } 871 872 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 873 874 bad: 875 free(to, M_SONAME); 876 return (error); 877 } 878 879 int 880 kern_sendit(td, s, mp, flags, control, segflg) 881 struct thread *td; 882 int s; 883 struct msghdr *mp; 884 int flags; 885 struct mbuf *control; 886 enum uio_seg segflg; 887 { 888 struct file *fp; 889 struct uio auio; 890 struct iovec *iov; 891 struct socket *so; 892 cap_rights_t rights; 893 #ifdef KTRACE 894 struct uio *ktruio = NULL; 895 #endif 896 ssize_t len; 897 int i, error; 898 899 AUDIT_ARG_FD(s); 900 cap_rights_init(&rights, CAP_SEND); 901 if (mp->msg_name != NULL) { 902 AUDIT_ARG_SOCKADDR(td, AT_FDCWD, mp->msg_name); 903 cap_rights_set(&rights, CAP_CONNECT); 904 } 905 error = getsock_cap(td->td_proc->p_fd, s, &rights, &fp, NULL); 906 if (error != 0) 907 return (error); 908 so = (struct socket *)fp->f_data; 909 910 #ifdef KTRACE 911 if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT)) 912 ktrsockaddr(mp->msg_name); 913 #endif 914 #ifdef MAC 915 if (mp->msg_name != NULL) { 916 error = mac_socket_check_connect(td->td_ucred, so, 917 mp->msg_name); 918 if (error != 0) 919 goto bad; 920 } 921 error = mac_socket_check_send(td->td_ucred, so); 922 if (error != 0) 923 goto bad; 924 #endif 925 926 auio.uio_iov = mp->msg_iov; 927 auio.uio_iovcnt = mp->msg_iovlen; 928 auio.uio_segflg = segflg; 929 auio.uio_rw = UIO_WRITE; 930 auio.uio_td = td; 931 auio.uio_offset = 0; /* XXX */ 932 auio.uio_resid = 0; 933 iov = mp->msg_iov; 934 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 935 if ((auio.uio_resid += iov->iov_len) < 0) { 936 error = EINVAL; 937 goto bad; 938 } 939 } 940 #ifdef KTRACE 941 if (KTRPOINT(td, KTR_GENIO)) 942 ktruio = cloneuio(&auio); 943 #endif 944 len = auio.uio_resid; 945 error = sosend(so, mp->msg_name, &auio, 0, control, flags, td); 946 if (error != 0) { 947 if (auio.uio_resid != len && (error == ERESTART || 948 error == EINTR || error == EWOULDBLOCK)) 949 error = 0; 950 /* Generation of SIGPIPE can be controlled per socket */ 951 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 952 !(flags & MSG_NOSIGNAL)) { 953 PROC_LOCK(td->td_proc); 954 tdsignal(td, SIGPIPE); 955 PROC_UNLOCK(td->td_proc); 956 } 957 } 958 if (error == 0) 959 td->td_retval[0] = len - auio.uio_resid; 960 #ifdef KTRACE 961 if (ktruio != NULL) { 962 ktruio->uio_resid = td->td_retval[0]; 963 ktrgenio(s, UIO_WRITE, ktruio, error); 964 } 965 #endif 966 bad: 967 fdrop(fp, td); 968 return (error); 969 } 970 971 int 972 sys_sendto(td, uap) 973 struct thread *td; 974 struct sendto_args /* { 975 int s; 976 caddr_t buf; 977 size_t len; 978 int flags; 979 caddr_t to; 980 int tolen; 981 } */ *uap; 982 { 983 struct msghdr msg; 984 struct iovec aiov; 985 986 msg.msg_name = uap->to; 987 msg.msg_namelen = uap->tolen; 988 msg.msg_iov = &aiov; 989 msg.msg_iovlen = 1; 990 msg.msg_control = 0; 991 #ifdef COMPAT_OLDSOCK 992 msg.msg_flags = 0; 993 #endif 994 aiov.iov_base = uap->buf; 995 aiov.iov_len = uap->len; 996 return (sendit(td, uap->s, &msg, uap->flags)); 997 } 998 999 #ifdef COMPAT_OLDSOCK 1000 int 1001 osend(td, uap) 1002 struct thread *td; 1003 struct osend_args /* { 1004 int s; 1005 caddr_t buf; 1006 int len; 1007 int flags; 1008 } */ *uap; 1009 { 1010 struct msghdr msg; 1011 struct iovec aiov; 1012 1013 msg.msg_name = 0; 1014 msg.msg_namelen = 0; 1015 msg.msg_iov = &aiov; 1016 msg.msg_iovlen = 1; 1017 aiov.iov_base = uap->buf; 1018 aiov.iov_len = uap->len; 1019 msg.msg_control = 0; 1020 msg.msg_flags = 0; 1021 return (sendit(td, uap->s, &msg, uap->flags)); 1022 } 1023 1024 int 1025 osendmsg(td, uap) 1026 struct thread *td; 1027 struct osendmsg_args /* { 1028 int s; 1029 caddr_t msg; 1030 int flags; 1031 } */ *uap; 1032 { 1033 struct msghdr msg; 1034 struct iovec *iov; 1035 int error; 1036 1037 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1038 if (error != 0) 1039 return (error); 1040 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1041 if (error != 0) 1042 return (error); 1043 msg.msg_iov = iov; 1044 msg.msg_flags = MSG_COMPAT; 1045 error = sendit(td, uap->s, &msg, uap->flags); 1046 free(iov, M_IOV); 1047 return (error); 1048 } 1049 #endif 1050 1051 int 1052 sys_sendmsg(td, uap) 1053 struct thread *td; 1054 struct sendmsg_args /* { 1055 int s; 1056 caddr_t msg; 1057 int flags; 1058 } */ *uap; 1059 { 1060 struct msghdr msg; 1061 struct iovec *iov; 1062 int error; 1063 1064 error = copyin(uap->msg, &msg, sizeof (msg)); 1065 if (error != 0) 1066 return (error); 1067 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1068 if (error != 0) 1069 return (error); 1070 msg.msg_iov = iov; 1071 #ifdef COMPAT_OLDSOCK 1072 msg.msg_flags = 0; 1073 #endif 1074 error = sendit(td, uap->s, &msg, uap->flags); 1075 free(iov, M_IOV); 1076 return (error); 1077 } 1078 1079 int 1080 kern_recvit(td, s, mp, fromseg, controlp) 1081 struct thread *td; 1082 int s; 1083 struct msghdr *mp; 1084 enum uio_seg fromseg; 1085 struct mbuf **controlp; 1086 { 1087 struct uio auio; 1088 struct iovec *iov; 1089 struct mbuf *m, *control = NULL; 1090 caddr_t ctlbuf; 1091 struct file *fp; 1092 struct socket *so; 1093 struct sockaddr *fromsa = NULL; 1094 cap_rights_t rights; 1095 #ifdef KTRACE 1096 struct uio *ktruio = NULL; 1097 #endif 1098 ssize_t len; 1099 int error, i; 1100 1101 if (controlp != NULL) 1102 *controlp = NULL; 1103 1104 AUDIT_ARG_FD(s); 1105 error = getsock_cap(td->td_proc->p_fd, s, 1106 cap_rights_init(&rights, CAP_RECV), &fp, NULL); 1107 if (error != 0) 1108 return (error); 1109 so = fp->f_data; 1110 1111 #ifdef MAC 1112 error = mac_socket_check_receive(td->td_ucred, so); 1113 if (error != 0) { 1114 fdrop(fp, td); 1115 return (error); 1116 } 1117 #endif 1118 1119 auio.uio_iov = mp->msg_iov; 1120 auio.uio_iovcnt = mp->msg_iovlen; 1121 auio.uio_segflg = UIO_USERSPACE; 1122 auio.uio_rw = UIO_READ; 1123 auio.uio_td = td; 1124 auio.uio_offset = 0; /* XXX */ 1125 auio.uio_resid = 0; 1126 iov = mp->msg_iov; 1127 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 1128 if ((auio.uio_resid += iov->iov_len) < 0) { 1129 fdrop(fp, td); 1130 return (EINVAL); 1131 } 1132 } 1133 #ifdef KTRACE 1134 if (KTRPOINT(td, KTR_GENIO)) 1135 ktruio = cloneuio(&auio); 1136 #endif 1137 len = auio.uio_resid; 1138 error = soreceive(so, &fromsa, &auio, NULL, 1139 (mp->msg_control || controlp) ? &control : NULL, 1140 &mp->msg_flags); 1141 if (error != 0) { 1142 if (auio.uio_resid != len && (error == ERESTART || 1143 error == EINTR || error == EWOULDBLOCK)) 1144 error = 0; 1145 } 1146 if (fromsa != NULL) 1147 AUDIT_ARG_SOCKADDR(td, AT_FDCWD, fromsa); 1148 #ifdef KTRACE 1149 if (ktruio != NULL) { 1150 ktruio->uio_resid = len - auio.uio_resid; 1151 ktrgenio(s, UIO_READ, ktruio, error); 1152 } 1153 #endif 1154 if (error != 0) 1155 goto out; 1156 td->td_retval[0] = len - auio.uio_resid; 1157 if (mp->msg_name) { 1158 len = mp->msg_namelen; 1159 if (len <= 0 || fromsa == NULL) 1160 len = 0; 1161 else { 1162 /* save sa_len before it is destroyed by MSG_COMPAT */ 1163 len = MIN(len, fromsa->sa_len); 1164 #ifdef COMPAT_OLDSOCK 1165 if (mp->msg_flags & MSG_COMPAT) 1166 ((struct osockaddr *)fromsa)->sa_family = 1167 fromsa->sa_family; 1168 #endif 1169 if (fromseg == UIO_USERSPACE) { 1170 error = copyout(fromsa, mp->msg_name, 1171 (unsigned)len); 1172 if (error != 0) 1173 goto out; 1174 } else 1175 bcopy(fromsa, mp->msg_name, len); 1176 } 1177 mp->msg_namelen = len; 1178 } 1179 if (mp->msg_control && controlp == NULL) { 1180 #ifdef COMPAT_OLDSOCK 1181 /* 1182 * We assume that old recvmsg calls won't receive access 1183 * rights and other control info, esp. as control info 1184 * is always optional and those options didn't exist in 4.3. 1185 * If we receive rights, trim the cmsghdr; anything else 1186 * is tossed. 1187 */ 1188 if (control && mp->msg_flags & MSG_COMPAT) { 1189 if (mtod(control, struct cmsghdr *)->cmsg_level != 1190 SOL_SOCKET || 1191 mtod(control, struct cmsghdr *)->cmsg_type != 1192 SCM_RIGHTS) { 1193 mp->msg_controllen = 0; 1194 goto out; 1195 } 1196 control->m_len -= sizeof (struct cmsghdr); 1197 control->m_data += sizeof (struct cmsghdr); 1198 } 1199 #endif 1200 len = mp->msg_controllen; 1201 m = control; 1202 mp->msg_controllen = 0; 1203 ctlbuf = mp->msg_control; 1204 1205 while (m && len > 0) { 1206 unsigned int tocopy; 1207 1208 if (len >= m->m_len) 1209 tocopy = m->m_len; 1210 else { 1211 mp->msg_flags |= MSG_CTRUNC; 1212 tocopy = len; 1213 } 1214 1215 if ((error = copyout(mtod(m, caddr_t), 1216 ctlbuf, tocopy)) != 0) 1217 goto out; 1218 1219 ctlbuf += tocopy; 1220 len -= tocopy; 1221 m = m->m_next; 1222 } 1223 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1224 } 1225 out: 1226 fdrop(fp, td); 1227 #ifdef KTRACE 1228 if (fromsa && KTRPOINT(td, KTR_STRUCT)) 1229 ktrsockaddr(fromsa); 1230 #endif 1231 free(fromsa, M_SONAME); 1232 1233 if (error == 0 && controlp != NULL) 1234 *controlp = control; 1235 else if (control) 1236 m_freem(control); 1237 1238 return (error); 1239 } 1240 1241 static int 1242 recvit(td, s, mp, namelenp) 1243 struct thread *td; 1244 int s; 1245 struct msghdr *mp; 1246 void *namelenp; 1247 { 1248 int error; 1249 1250 error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL); 1251 if (error != 0) 1252 return (error); 1253 if (namelenp != NULL) { 1254 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t)); 1255 #ifdef COMPAT_OLDSOCK 1256 if (mp->msg_flags & MSG_COMPAT) 1257 error = 0; /* old recvfrom didn't check */ 1258 #endif 1259 } 1260 return (error); 1261 } 1262 1263 int 1264 sys_recvfrom(td, uap) 1265 struct thread *td; 1266 struct recvfrom_args /* { 1267 int s; 1268 caddr_t buf; 1269 size_t len; 1270 int flags; 1271 struct sockaddr * __restrict from; 1272 socklen_t * __restrict fromlenaddr; 1273 } */ *uap; 1274 { 1275 struct msghdr msg; 1276 struct iovec aiov; 1277 int error; 1278 1279 if (uap->fromlenaddr) { 1280 error = copyin(uap->fromlenaddr, 1281 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1282 if (error != 0) 1283 goto done2; 1284 } else { 1285 msg.msg_namelen = 0; 1286 } 1287 msg.msg_name = uap->from; 1288 msg.msg_iov = &aiov; 1289 msg.msg_iovlen = 1; 1290 aiov.iov_base = uap->buf; 1291 aiov.iov_len = uap->len; 1292 msg.msg_control = 0; 1293 msg.msg_flags = uap->flags; 1294 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1295 done2: 1296 return (error); 1297 } 1298 1299 #ifdef COMPAT_OLDSOCK 1300 int 1301 orecvfrom(td, uap) 1302 struct thread *td; 1303 struct recvfrom_args *uap; 1304 { 1305 1306 uap->flags |= MSG_COMPAT; 1307 return (sys_recvfrom(td, uap)); 1308 } 1309 #endif 1310 1311 #ifdef COMPAT_OLDSOCK 1312 int 1313 orecv(td, uap) 1314 struct thread *td; 1315 struct orecv_args /* { 1316 int s; 1317 caddr_t buf; 1318 int len; 1319 int flags; 1320 } */ *uap; 1321 { 1322 struct msghdr msg; 1323 struct iovec aiov; 1324 1325 msg.msg_name = 0; 1326 msg.msg_namelen = 0; 1327 msg.msg_iov = &aiov; 1328 msg.msg_iovlen = 1; 1329 aiov.iov_base = uap->buf; 1330 aiov.iov_len = uap->len; 1331 msg.msg_control = 0; 1332 msg.msg_flags = uap->flags; 1333 return (recvit(td, uap->s, &msg, NULL)); 1334 } 1335 1336 /* 1337 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1338 * overlays the new one, missing only the flags, and with the (old) access 1339 * rights where the control fields are now. 1340 */ 1341 int 1342 orecvmsg(td, uap) 1343 struct thread *td; 1344 struct orecvmsg_args /* { 1345 int s; 1346 struct omsghdr *msg; 1347 int flags; 1348 } */ *uap; 1349 { 1350 struct msghdr msg; 1351 struct iovec *iov; 1352 int error; 1353 1354 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1355 if (error != 0) 1356 return (error); 1357 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1358 if (error != 0) 1359 return (error); 1360 msg.msg_flags = uap->flags | MSG_COMPAT; 1361 msg.msg_iov = iov; 1362 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1363 if (msg.msg_controllen && error == 0) 1364 error = copyout(&msg.msg_controllen, 1365 &uap->msg->msg_accrightslen, sizeof (int)); 1366 free(iov, M_IOV); 1367 return (error); 1368 } 1369 #endif 1370 1371 int 1372 sys_recvmsg(td, uap) 1373 struct thread *td; 1374 struct recvmsg_args /* { 1375 int s; 1376 struct msghdr *msg; 1377 int flags; 1378 } */ *uap; 1379 { 1380 struct msghdr msg; 1381 struct iovec *uiov, *iov; 1382 int error; 1383 1384 error = copyin(uap->msg, &msg, sizeof (msg)); 1385 if (error != 0) 1386 return (error); 1387 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1388 if (error != 0) 1389 return (error); 1390 msg.msg_flags = uap->flags; 1391 #ifdef COMPAT_OLDSOCK 1392 msg.msg_flags &= ~MSG_COMPAT; 1393 #endif 1394 uiov = msg.msg_iov; 1395 msg.msg_iov = iov; 1396 error = recvit(td, uap->s, &msg, NULL); 1397 if (error == 0) { 1398 msg.msg_iov = uiov; 1399 error = copyout(&msg, uap->msg, sizeof(msg)); 1400 } 1401 free(iov, M_IOV); 1402 return (error); 1403 } 1404 1405 /* ARGSUSED */ 1406 int 1407 sys_shutdown(td, uap) 1408 struct thread *td; 1409 struct shutdown_args /* { 1410 int s; 1411 int how; 1412 } */ *uap; 1413 { 1414 struct socket *so; 1415 struct file *fp; 1416 cap_rights_t rights; 1417 int error; 1418 1419 AUDIT_ARG_FD(uap->s); 1420 error = getsock_cap(td->td_proc->p_fd, uap->s, 1421 cap_rights_init(&rights, CAP_SHUTDOWN), &fp, NULL); 1422 if (error == 0) { 1423 so = fp->f_data; 1424 error = soshutdown(so, uap->how); 1425 fdrop(fp, td); 1426 } 1427 return (error); 1428 } 1429 1430 /* ARGSUSED */ 1431 int 1432 sys_setsockopt(td, uap) 1433 struct thread *td; 1434 struct setsockopt_args /* { 1435 int s; 1436 int level; 1437 int name; 1438 caddr_t val; 1439 int valsize; 1440 } */ *uap; 1441 { 1442 1443 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1444 uap->val, UIO_USERSPACE, uap->valsize)); 1445 } 1446 1447 int 1448 kern_setsockopt(td, s, level, name, val, valseg, valsize) 1449 struct thread *td; 1450 int s; 1451 int level; 1452 int name; 1453 void *val; 1454 enum uio_seg valseg; 1455 socklen_t valsize; 1456 { 1457 struct socket *so; 1458 struct file *fp; 1459 struct sockopt sopt; 1460 cap_rights_t rights; 1461 int error; 1462 1463 if (val == NULL && valsize != 0) 1464 return (EFAULT); 1465 if ((int)valsize < 0) 1466 return (EINVAL); 1467 1468 sopt.sopt_dir = SOPT_SET; 1469 sopt.sopt_level = level; 1470 sopt.sopt_name = name; 1471 sopt.sopt_val = val; 1472 sopt.sopt_valsize = valsize; 1473 switch (valseg) { 1474 case UIO_USERSPACE: 1475 sopt.sopt_td = td; 1476 break; 1477 case UIO_SYSSPACE: 1478 sopt.sopt_td = NULL; 1479 break; 1480 default: 1481 panic("kern_setsockopt called with bad valseg"); 1482 } 1483 1484 AUDIT_ARG_FD(s); 1485 error = getsock_cap(td->td_proc->p_fd, s, 1486 cap_rights_init(&rights, CAP_SETSOCKOPT), &fp, NULL); 1487 if (error == 0) { 1488 so = fp->f_data; 1489 error = sosetopt(so, &sopt); 1490 fdrop(fp, td); 1491 } 1492 return(error); 1493 } 1494 1495 /* ARGSUSED */ 1496 int 1497 sys_getsockopt(td, uap) 1498 struct thread *td; 1499 struct getsockopt_args /* { 1500 int s; 1501 int level; 1502 int name; 1503 void * __restrict val; 1504 socklen_t * __restrict avalsize; 1505 } */ *uap; 1506 { 1507 socklen_t valsize; 1508 int error; 1509 1510 if (uap->val) { 1511 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1512 if (error != 0) 1513 return (error); 1514 } 1515 1516 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1517 uap->val, UIO_USERSPACE, &valsize); 1518 1519 if (error == 0) 1520 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1521 return (error); 1522 } 1523 1524 /* 1525 * Kernel version of getsockopt. 1526 * optval can be a userland or userspace. optlen is always a kernel pointer. 1527 */ 1528 int 1529 kern_getsockopt(td, s, level, name, val, valseg, valsize) 1530 struct thread *td; 1531 int s; 1532 int level; 1533 int name; 1534 void *val; 1535 enum uio_seg valseg; 1536 socklen_t *valsize; 1537 { 1538 struct socket *so; 1539 struct file *fp; 1540 struct sockopt sopt; 1541 cap_rights_t rights; 1542 int error; 1543 1544 if (val == NULL) 1545 *valsize = 0; 1546 if ((int)*valsize < 0) 1547 return (EINVAL); 1548 1549 sopt.sopt_dir = SOPT_GET; 1550 sopt.sopt_level = level; 1551 sopt.sopt_name = name; 1552 sopt.sopt_val = val; 1553 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1554 switch (valseg) { 1555 case UIO_USERSPACE: 1556 sopt.sopt_td = td; 1557 break; 1558 case UIO_SYSSPACE: 1559 sopt.sopt_td = NULL; 1560 break; 1561 default: 1562 panic("kern_getsockopt called with bad valseg"); 1563 } 1564 1565 AUDIT_ARG_FD(s); 1566 error = getsock_cap(td->td_proc->p_fd, s, 1567 cap_rights_init(&rights, CAP_GETSOCKOPT), &fp, NULL); 1568 if (error == 0) { 1569 so = fp->f_data; 1570 error = sogetopt(so, &sopt); 1571 *valsize = sopt.sopt_valsize; 1572 fdrop(fp, td); 1573 } 1574 return (error); 1575 } 1576 1577 /* 1578 * getsockname1() - Get socket name. 1579 */ 1580 /* ARGSUSED */ 1581 static int 1582 getsockname1(td, uap, compat) 1583 struct thread *td; 1584 struct getsockname_args /* { 1585 int fdes; 1586 struct sockaddr * __restrict asa; 1587 socklen_t * __restrict alen; 1588 } */ *uap; 1589 int compat; 1590 { 1591 struct sockaddr *sa; 1592 socklen_t len; 1593 int error; 1594 1595 error = copyin(uap->alen, &len, sizeof(len)); 1596 if (error != 0) 1597 return (error); 1598 1599 error = kern_getsockname(td, uap->fdes, &sa, &len); 1600 if (error != 0) 1601 return (error); 1602 1603 if (len != 0) { 1604 #ifdef COMPAT_OLDSOCK 1605 if (compat) 1606 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1607 #endif 1608 error = copyout(sa, uap->asa, (u_int)len); 1609 } 1610 free(sa, M_SONAME); 1611 if (error == 0) 1612 error = copyout(&len, uap->alen, sizeof(len)); 1613 return (error); 1614 } 1615 1616 int 1617 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, 1618 socklen_t *alen) 1619 { 1620 struct socket *so; 1621 struct file *fp; 1622 cap_rights_t rights; 1623 socklen_t len; 1624 int error; 1625 1626 AUDIT_ARG_FD(fd); 1627 error = getsock_cap(td->td_proc->p_fd, fd, 1628 cap_rights_init(&rights, CAP_GETSOCKNAME), &fp, NULL); 1629 if (error != 0) 1630 return (error); 1631 so = fp->f_data; 1632 *sa = NULL; 1633 CURVNET_SET(so->so_vnet); 1634 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa); 1635 CURVNET_RESTORE(); 1636 if (error != 0) 1637 goto bad; 1638 if (*sa == NULL) 1639 len = 0; 1640 else 1641 len = MIN(*alen, (*sa)->sa_len); 1642 *alen = len; 1643 #ifdef KTRACE 1644 if (KTRPOINT(td, KTR_STRUCT)) 1645 ktrsockaddr(*sa); 1646 #endif 1647 bad: 1648 fdrop(fp, td); 1649 if (error != 0 && *sa != NULL) { 1650 free(*sa, M_SONAME); 1651 *sa = NULL; 1652 } 1653 return (error); 1654 } 1655 1656 int 1657 sys_getsockname(td, uap) 1658 struct thread *td; 1659 struct getsockname_args *uap; 1660 { 1661 1662 return (getsockname1(td, uap, 0)); 1663 } 1664 1665 #ifdef COMPAT_OLDSOCK 1666 int 1667 ogetsockname(td, uap) 1668 struct thread *td; 1669 struct getsockname_args *uap; 1670 { 1671 1672 return (getsockname1(td, uap, 1)); 1673 } 1674 #endif /* COMPAT_OLDSOCK */ 1675 1676 /* 1677 * getpeername1() - Get name of peer for connected socket. 1678 */ 1679 /* ARGSUSED */ 1680 static int 1681 getpeername1(td, uap, compat) 1682 struct thread *td; 1683 struct getpeername_args /* { 1684 int fdes; 1685 struct sockaddr * __restrict asa; 1686 socklen_t * __restrict alen; 1687 } */ *uap; 1688 int compat; 1689 { 1690 struct sockaddr *sa; 1691 socklen_t len; 1692 int error; 1693 1694 error = copyin(uap->alen, &len, sizeof (len)); 1695 if (error != 0) 1696 return (error); 1697 1698 error = kern_getpeername(td, uap->fdes, &sa, &len); 1699 if (error != 0) 1700 return (error); 1701 1702 if (len != 0) { 1703 #ifdef COMPAT_OLDSOCK 1704 if (compat) 1705 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1706 #endif 1707 error = copyout(sa, uap->asa, (u_int)len); 1708 } 1709 free(sa, M_SONAME); 1710 if (error == 0) 1711 error = copyout(&len, uap->alen, sizeof(len)); 1712 return (error); 1713 } 1714 1715 int 1716 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, 1717 socklen_t *alen) 1718 { 1719 struct socket *so; 1720 struct file *fp; 1721 cap_rights_t rights; 1722 socklen_t len; 1723 int error; 1724 1725 AUDIT_ARG_FD(fd); 1726 error = getsock_cap(td->td_proc->p_fd, fd, 1727 cap_rights_init(&rights, CAP_GETPEERNAME), &fp, NULL); 1728 if (error != 0) 1729 return (error); 1730 so = fp->f_data; 1731 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1732 error = ENOTCONN; 1733 goto done; 1734 } 1735 *sa = NULL; 1736 CURVNET_SET(so->so_vnet); 1737 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa); 1738 CURVNET_RESTORE(); 1739 if (error != 0) 1740 goto bad; 1741 if (*sa == NULL) 1742 len = 0; 1743 else 1744 len = MIN(*alen, (*sa)->sa_len); 1745 *alen = len; 1746 #ifdef KTRACE 1747 if (KTRPOINT(td, KTR_STRUCT)) 1748 ktrsockaddr(*sa); 1749 #endif 1750 bad: 1751 if (error != 0 && *sa != NULL) { 1752 free(*sa, M_SONAME); 1753 *sa = NULL; 1754 } 1755 done: 1756 fdrop(fp, td); 1757 return (error); 1758 } 1759 1760 int 1761 sys_getpeername(td, uap) 1762 struct thread *td; 1763 struct getpeername_args *uap; 1764 { 1765 1766 return (getpeername1(td, uap, 0)); 1767 } 1768 1769 #ifdef COMPAT_OLDSOCK 1770 int 1771 ogetpeername(td, uap) 1772 struct thread *td; 1773 struct ogetpeername_args *uap; 1774 { 1775 1776 /* XXX uap should have type `getpeername_args *' to begin with. */ 1777 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1778 } 1779 #endif /* COMPAT_OLDSOCK */ 1780 1781 int 1782 sockargs(mp, buf, buflen, type) 1783 struct mbuf **mp; 1784 caddr_t buf; 1785 int buflen, type; 1786 { 1787 struct sockaddr *sa; 1788 struct mbuf *m; 1789 int error; 1790 1791 if (buflen > MLEN) { 1792 #ifdef COMPAT_OLDSOCK 1793 if (type == MT_SONAME && buflen <= 112) 1794 buflen = MLEN; /* unix domain compat. hack */ 1795 else 1796 #endif 1797 if (buflen > MCLBYTES) 1798 return (EINVAL); 1799 } 1800 m = m_get2(buflen, M_WAITOK, type, 0); 1801 m->m_len = buflen; 1802 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1803 if (error != 0) 1804 (void) m_free(m); 1805 else { 1806 *mp = m; 1807 if (type == MT_SONAME) { 1808 sa = mtod(m, struct sockaddr *); 1809 1810 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1811 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1812 sa->sa_family = sa->sa_len; 1813 #endif 1814 sa->sa_len = buflen; 1815 } 1816 } 1817 return (error); 1818 } 1819 1820 int 1821 getsockaddr(namp, uaddr, len) 1822 struct sockaddr **namp; 1823 caddr_t uaddr; 1824 size_t len; 1825 { 1826 struct sockaddr *sa; 1827 int error; 1828 1829 if (len > SOCK_MAXADDRLEN) 1830 return (ENAMETOOLONG); 1831 if (len < offsetof(struct sockaddr, sa_data[0])) 1832 return (EINVAL); 1833 sa = malloc(len, M_SONAME, M_WAITOK); 1834 error = copyin(uaddr, sa, len); 1835 if (error != 0) { 1836 free(sa, M_SONAME); 1837 } else { 1838 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1839 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1840 sa->sa_family = sa->sa_len; 1841 #endif 1842 sa->sa_len = len; 1843 *namp = sa; 1844 } 1845 return (error); 1846 } 1847 1848 /* 1849 * Detach mapped page and release resources back to the system. 1850 */ 1851 int 1852 sf_buf_mext(struct mbuf *mb, void *addr, void *args) 1853 { 1854 vm_page_t m; 1855 struct sendfile_sync *sfs; 1856 1857 m = sf_buf_page(args); 1858 sf_buf_free(args); 1859 vm_page_lock(m); 1860 vm_page_unwire(m, 0); 1861 /* 1862 * Check for the object going away on us. This can 1863 * happen since we don't hold a reference to it. 1864 * If so, we're responsible for freeing the page. 1865 */ 1866 if (m->wire_count == 0 && m->object == NULL) 1867 vm_page_free(m); 1868 vm_page_unlock(m); 1869 if (addr != NULL) { 1870 sfs = addr; 1871 sf_sync_deref(sfs); 1872 } 1873 return (EXT_FREE_OK); 1874 } 1875 1876 void 1877 sf_sync_deref(struct sendfile_sync *sfs) 1878 { 1879 1880 if (sfs == NULL) 1881 return; 1882 1883 mtx_lock(&sfs->mtx); 1884 KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0")); 1885 if (--sfs->count == 0) 1886 cv_signal(&sfs->cv); 1887 mtx_unlock(&sfs->mtx); 1888 } 1889 1890 /* 1891 * Allocate a sendfile_sync state structure. 1892 * 1893 * For now this only knows about the "sleep" sync, but later it will 1894 * grow various other personalities. 1895 */ 1896 struct sendfile_sync * 1897 sf_sync_alloc(uint32_t flags) 1898 { 1899 struct sendfile_sync *sfs; 1900 1901 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO); 1902 mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); 1903 cv_init(&sfs->cv, "sendfile"); 1904 sfs->flags = flags; 1905 1906 return (sfs); 1907 } 1908 1909 /* 1910 * Take a reference to a sfsync instance. 1911 * 1912 * This has to map 1:1 to free calls coming in via sf_buf_mext(), 1913 * so typically this will be referenced once for each mbuf allocated. 1914 */ 1915 void 1916 sf_sync_ref(struct sendfile_sync *sfs) 1917 { 1918 1919 if (sfs == NULL) 1920 return; 1921 1922 mtx_lock(&sfs->mtx); 1923 sfs->count++; 1924 mtx_unlock(&sfs->mtx); 1925 } 1926 1927 void 1928 sf_sync_syscall_wait(struct sendfile_sync *sfs) 1929 { 1930 1931 if (sfs == NULL) 1932 return; 1933 1934 mtx_lock(&sfs->mtx); 1935 if (sfs->count != 0) 1936 cv_wait(&sfs->cv, &sfs->mtx); 1937 KASSERT(sfs->count == 0, ("sendfile sync still busy")); 1938 mtx_unlock(&sfs->mtx); 1939 } 1940 1941 void 1942 sf_sync_free(struct sendfile_sync *sfs) 1943 { 1944 1945 if (sfs == NULL) 1946 return; 1947 1948 /* 1949 * XXX we should ensure that nothing else has this 1950 * locked before freeing. 1951 */ 1952 mtx_lock(&sfs->mtx); 1953 KASSERT(sfs->count == 0, ("sendfile sync still busy")); 1954 cv_destroy(&sfs->cv); 1955 mtx_destroy(&sfs->mtx); 1956 free(sfs, M_TEMP); 1957 } 1958 1959 /* 1960 * sendfile(2) 1961 * 1962 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1963 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1964 * 1965 * Send a file specified by 'fd' and starting at 'offset' to a socket 1966 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes == 1967 * 0. Optionally add a header and/or trailer to the socket output. If 1968 * specified, write the total number of bytes sent into *sbytes. 1969 */ 1970 int 1971 sys_sendfile(struct thread *td, struct sendfile_args *uap) 1972 { 1973 1974 return (do_sendfile(td, uap, 0)); 1975 } 1976 1977 static int 1978 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1979 { 1980 struct sf_hdtr hdtr; 1981 struct uio *hdr_uio, *trl_uio; 1982 struct file *fp; 1983 cap_rights_t rights; 1984 int error; 1985 off_t sbytes; 1986 struct sendfile_sync *sfs; 1987 1988 /* 1989 * File offset must be positive. If it goes beyond EOF 1990 * we send only the header/trailer and no payload data. 1991 */ 1992 if (uap->offset < 0) 1993 return (EINVAL); 1994 1995 hdr_uio = trl_uio = NULL; 1996 sfs = NULL; 1997 1998 if (uap->hdtr != NULL) { 1999 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 2000 if (error != 0) 2001 goto out; 2002 if (hdtr.headers != NULL) { 2003 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 2004 if (error != 0) 2005 goto out; 2006 } 2007 if (hdtr.trailers != NULL) { 2008 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); 2009 if (error != 0) 2010 goto out; 2011 2012 } 2013 } 2014 2015 AUDIT_ARG_FD(uap->fd); 2016 2017 /* 2018 * sendfile(2) can start at any offset within a file so we require 2019 * CAP_READ+CAP_SEEK = CAP_PREAD. 2020 */ 2021 if ((error = fget_read(td, uap->fd, 2022 cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) { 2023 goto out; 2024 } 2025 2026 /* 2027 * If we need to wait for completion, initialise the sfsync 2028 * state here. 2029 */ 2030 if (uap->flags & SF_SYNC) 2031 sfs = sf_sync_alloc(uap->flags & SF_SYNC); 2032 2033 error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset, 2034 uap->nbytes, &sbytes, uap->flags, compat ? SFK_COMPAT : 0, sfs, td); 2035 2036 /* 2037 * If appropriate, do the wait and free here. 2038 */ 2039 if (sfs != NULL) { 2040 sf_sync_syscall_wait(sfs); 2041 sf_sync_free(sfs); 2042 } 2043 2044 /* 2045 * XXX Should we wait until the send has completed before freeing the source 2046 * file handle? It's the previous behaviour, sure, but is it required? 2047 * We've wired down the page references after all. 2048 */ 2049 fdrop(fp, td); 2050 2051 if (uap->sbytes != NULL) { 2052 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2053 } 2054 out: 2055 free(hdr_uio, M_IOV); 2056 free(trl_uio, M_IOV); 2057 return (error); 2058 } 2059 2060 #ifdef COMPAT_FREEBSD4 2061 int 2062 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 2063 { 2064 struct sendfile_args args; 2065 2066 args.fd = uap->fd; 2067 args.s = uap->s; 2068 args.offset = uap->offset; 2069 args.nbytes = uap->nbytes; 2070 args.hdtr = uap->hdtr; 2071 args.sbytes = uap->sbytes; 2072 args.flags = uap->flags; 2073 2074 return (do_sendfile(td, &args, 1)); 2075 } 2076 #endif /* COMPAT_FREEBSD4 */ 2077 2078 static int 2079 sendfile_readpage(vm_object_t obj, struct vnode *vp, int nd, 2080 off_t off, int xfsize, int bsize, struct thread *td, vm_page_t *res) 2081 { 2082 vm_page_t m; 2083 vm_pindex_t pindex; 2084 ssize_t resid; 2085 int error, readahead, rv; 2086 2087 pindex = OFF_TO_IDX(off); 2088 VM_OBJECT_WLOCK(obj); 2089 m = vm_page_grab(obj, pindex, (vp != NULL ? VM_ALLOC_NOBUSY | 2090 VM_ALLOC_IGN_SBUSY : 0) | VM_ALLOC_WIRED | VM_ALLOC_NORMAL); 2091 2092 /* 2093 * Check if page is valid for what we need, otherwise initiate I/O. 2094 * 2095 * The non-zero nd argument prevents disk I/O, instead we 2096 * return the caller what he specified in nd. In particular, 2097 * if we already turned some pages into mbufs, nd == EAGAIN 2098 * and the main function send them the pages before we come 2099 * here again and block. 2100 */ 2101 if (m->valid != 0 && vm_page_is_valid(m, off & PAGE_MASK, xfsize)) { 2102 if (vp == NULL) 2103 vm_page_xunbusy(m); 2104 VM_OBJECT_WUNLOCK(obj); 2105 *res = m; 2106 return (0); 2107 } else if (nd != 0) { 2108 if (vp == NULL) 2109 vm_page_xunbusy(m); 2110 error = nd; 2111 goto free_page; 2112 } 2113 2114 /* 2115 * Get the page from backing store. 2116 */ 2117 error = 0; 2118 if (vp != NULL) { 2119 VM_OBJECT_WUNLOCK(obj); 2120 readahead = sfreadahead * MAXBSIZE; 2121 2122 /* 2123 * Use vn_rdwr() instead of the pager interface for 2124 * the vnode, to allow the read-ahead. 2125 * 2126 * XXXMAC: Because we don't have fp->f_cred here, we 2127 * pass in NOCRED. This is probably wrong, but is 2128 * consistent with our original implementation. 2129 */ 2130 error = vn_rdwr(UIO_READ, vp, NULL, readahead, trunc_page(off), 2131 UIO_NOCOPY, IO_NODELOCKED | IO_VMIO | ((readahead / 2132 bsize) << IO_SEQSHIFT), td->td_ucred, NOCRED, &resid, td); 2133 SFSTAT_INC(sf_iocnt); 2134 VM_OBJECT_WLOCK(obj); 2135 } else { 2136 if (vm_pager_has_page(obj, pindex, NULL, NULL)) { 2137 rv = vm_pager_get_pages(obj, &m, 1, 0); 2138 SFSTAT_INC(sf_iocnt); 2139 m = vm_page_lookup(obj, pindex); 2140 if (m == NULL) 2141 error = EIO; 2142 else if (rv != VM_PAGER_OK) { 2143 vm_page_lock(m); 2144 vm_page_free(m); 2145 vm_page_unlock(m); 2146 m = NULL; 2147 error = EIO; 2148 } 2149 } else { 2150 pmap_zero_page(m); 2151 m->valid = VM_PAGE_BITS_ALL; 2152 m->dirty = 0; 2153 } 2154 if (m != NULL) 2155 vm_page_xunbusy(m); 2156 } 2157 if (error == 0) { 2158 *res = m; 2159 } else if (m != NULL) { 2160 free_page: 2161 vm_page_lock(m); 2162 vm_page_unwire(m, 0); 2163 2164 /* 2165 * See if anyone else might know about this page. If 2166 * not and it is not valid, then free it. 2167 */ 2168 if (m->wire_count == 0 && m->valid == 0 && !vm_page_busied(m)) 2169 vm_page_free(m); 2170 vm_page_unlock(m); 2171 } 2172 KASSERT(error != 0 || (m->wire_count > 0 && 2173 vm_page_is_valid(m, off & PAGE_MASK, xfsize)), 2174 ("wrong page state m %p off %#jx xfsize %d", m, (uintmax_t)off, 2175 xfsize)); 2176 VM_OBJECT_WUNLOCK(obj); 2177 return (error); 2178 } 2179 2180 static int 2181 sendfile_getobj(struct thread *td, struct file *fp, vm_object_t *obj_res, 2182 struct vnode **vp_res, struct shmfd **shmfd_res, off_t *obj_size, 2183 int *bsize) 2184 { 2185 struct vattr va; 2186 vm_object_t obj; 2187 struct vnode *vp; 2188 struct shmfd *shmfd; 2189 int error; 2190 2191 vp = *vp_res = NULL; 2192 obj = NULL; 2193 shmfd = *shmfd_res = NULL; 2194 *bsize = 0; 2195 2196 /* 2197 * The file descriptor must be a regular file and have a 2198 * backing VM object. 2199 */ 2200 if (fp->f_type == DTYPE_VNODE) { 2201 vp = fp->f_vnode; 2202 vn_lock(vp, LK_SHARED | LK_RETRY); 2203 if (vp->v_type != VREG) { 2204 error = EINVAL; 2205 goto out; 2206 } 2207 *bsize = vp->v_mount->mnt_stat.f_iosize; 2208 error = VOP_GETATTR(vp, &va, td->td_ucred); 2209 if (error != 0) 2210 goto out; 2211 *obj_size = va.va_size; 2212 obj = vp->v_object; 2213 if (obj == NULL) { 2214 error = EINVAL; 2215 goto out; 2216 } 2217 } else if (fp->f_type == DTYPE_SHM) { 2218 shmfd = fp->f_data; 2219 obj = shmfd->shm_object; 2220 *obj_size = shmfd->shm_size; 2221 } else { 2222 error = EINVAL; 2223 goto out; 2224 } 2225 2226 VM_OBJECT_WLOCK(obj); 2227 if ((obj->flags & OBJ_DEAD) != 0) { 2228 VM_OBJECT_WUNLOCK(obj); 2229 error = EBADF; 2230 goto out; 2231 } 2232 2233 /* 2234 * Temporarily increase the backing VM object's reference 2235 * count so that a forced reclamation of its vnode does not 2236 * immediately destroy it. 2237 */ 2238 vm_object_reference_locked(obj); 2239 VM_OBJECT_WUNLOCK(obj); 2240 *obj_res = obj; 2241 *vp_res = vp; 2242 *shmfd_res = shmfd; 2243 2244 out: 2245 if (vp != NULL) 2246 VOP_UNLOCK(vp, 0); 2247 return (error); 2248 } 2249 2250 static int 2251 kern_sendfile_getsock(struct thread *td, int s, struct file **sock_fp, 2252 struct socket **so) 2253 { 2254 cap_rights_t rights; 2255 int error; 2256 2257 *sock_fp = NULL; 2258 *so = NULL; 2259 2260 /* 2261 * The socket must be a stream socket and connected. 2262 */ 2263 error = getsock_cap(td->td_proc->p_fd, s, cap_rights_init(&rights, 2264 CAP_SEND), sock_fp, NULL); 2265 if (error != 0) 2266 return (error); 2267 *so = (*sock_fp)->f_data; 2268 if ((*so)->so_type != SOCK_STREAM) 2269 return (EINVAL); 2270 if (((*so)->so_state & SS_ISCONNECTED) == 0) 2271 return (ENOTCONN); 2272 return (0); 2273 } 2274 2275 int 2276 vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, 2277 struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, 2278 int kflags, struct sendfile_sync *sfs, struct thread *td) 2279 { 2280 struct file *sock_fp; 2281 struct vnode *vp; 2282 struct vm_object *obj; 2283 struct socket *so; 2284 struct mbuf *m; 2285 struct sf_buf *sf; 2286 struct vm_page *pg; 2287 struct shmfd *shmfd; 2288 struct vattr va; 2289 off_t off, xfsize, fsbytes, sbytes, rem, obj_size; 2290 int error, bsize, nd, hdrlen, mnw; 2291 2292 pg = NULL; 2293 obj = NULL; 2294 so = NULL; 2295 m = NULL; 2296 fsbytes = sbytes = 0; 2297 hdrlen = mnw = 0; 2298 rem = nbytes; 2299 obj_size = 0; 2300 2301 error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize); 2302 if (error != 0) 2303 return (error); 2304 if (rem == 0) 2305 rem = obj_size; 2306 2307 error = kern_sendfile_getsock(td, sockfd, &sock_fp, &so); 2308 if (error != 0) 2309 goto out; 2310 2311 /* 2312 * Do not wait on memory allocations but return ENOMEM for 2313 * caller to retry later. 2314 * XXX: Experimental. 2315 */ 2316 if (flags & SF_MNOWAIT) 2317 mnw = 1; 2318 2319 #ifdef MAC 2320 error = mac_socket_check_send(td->td_ucred, so); 2321 if (error != 0) 2322 goto out; 2323 #endif 2324 2325 /* If headers are specified copy them into mbufs. */ 2326 if (hdr_uio != NULL) { 2327 hdr_uio->uio_td = td; 2328 hdr_uio->uio_rw = UIO_WRITE; 2329 if (hdr_uio->uio_resid > 0) { 2330 /* 2331 * In FBSD < 5.0 the nbytes to send also included 2332 * the header. If compat is specified subtract the 2333 * header size from nbytes. 2334 */ 2335 if (kflags & SFK_COMPAT) { 2336 if (nbytes > hdr_uio->uio_resid) 2337 nbytes -= hdr_uio->uio_resid; 2338 else 2339 nbytes = 0; 2340 } 2341 m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK), 2342 0, 0, 0); 2343 if (m == NULL) { 2344 error = mnw ? EAGAIN : ENOBUFS; 2345 goto out; 2346 } 2347 hdrlen = m_length(m, NULL); 2348 } 2349 } 2350 2351 /* 2352 * Protect against multiple writers to the socket. 2353 * 2354 * XXXRW: Historically this has assumed non-interruptibility, so now 2355 * we implement that, but possibly shouldn't. 2356 */ 2357 (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); 2358 2359 /* 2360 * Loop through the pages of the file, starting with the requested 2361 * offset. Get a file page (do I/O if necessary), map the file page 2362 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 2363 * it on the socket. 2364 * This is done in two loops. The inner loop turns as many pages 2365 * as it can, up to available socket buffer space, without blocking 2366 * into mbufs to have it bulk delivered into the socket send buffer. 2367 * The outer loop checks the state and available space of the socket 2368 * and takes care of the overall progress. 2369 */ 2370 for (off = offset; ; ) { 2371 struct mbuf *mtail; 2372 int loopbytes; 2373 int space; 2374 int done; 2375 2376 if ((nbytes != 0 && nbytes == fsbytes) || 2377 (nbytes == 0 && obj_size == fsbytes)) 2378 break; 2379 2380 mtail = NULL; 2381 loopbytes = 0; 2382 space = 0; 2383 done = 0; 2384 2385 /* 2386 * Check the socket state for ongoing connection, 2387 * no errors and space in socket buffer. 2388 * If space is low allow for the remainder of the 2389 * file to be processed if it fits the socket buffer. 2390 * Otherwise block in waiting for sufficient space 2391 * to proceed, or if the socket is nonblocking, return 2392 * to userland with EAGAIN while reporting how far 2393 * we've come. 2394 * We wait until the socket buffer has significant free 2395 * space to do bulk sends. This makes good use of file 2396 * system read ahead and allows packet segmentation 2397 * offloading hardware to take over lots of work. If 2398 * we were not careful here we would send off only one 2399 * sfbuf at a time. 2400 */ 2401 SOCKBUF_LOCK(&so->so_snd); 2402 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2) 2403 so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2; 2404 retry_space: 2405 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2406 error = EPIPE; 2407 SOCKBUF_UNLOCK(&so->so_snd); 2408 goto done; 2409 } else if (so->so_error) { 2410 error = so->so_error; 2411 so->so_error = 0; 2412 SOCKBUF_UNLOCK(&so->so_snd); 2413 goto done; 2414 } 2415 space = sbspace(&so->so_snd); 2416 if (space < rem && 2417 (space <= 0 || 2418 space < so->so_snd.sb_lowat)) { 2419 if (so->so_state & SS_NBIO) { 2420 SOCKBUF_UNLOCK(&so->so_snd); 2421 error = EAGAIN; 2422 goto done; 2423 } 2424 /* 2425 * sbwait drops the lock while sleeping. 2426 * When we loop back to retry_space the 2427 * state may have changed and we retest 2428 * for it. 2429 */ 2430 error = sbwait(&so->so_snd); 2431 /* 2432 * An error from sbwait usually indicates that we've 2433 * been interrupted by a signal. If we've sent anything 2434 * then return bytes sent, otherwise return the error. 2435 */ 2436 if (error != 0) { 2437 SOCKBUF_UNLOCK(&so->so_snd); 2438 goto done; 2439 } 2440 goto retry_space; 2441 } 2442 SOCKBUF_UNLOCK(&so->so_snd); 2443 2444 /* 2445 * Reduce space in the socket buffer by the size of 2446 * the header mbuf chain. 2447 * hdrlen is set to 0 after the first loop. 2448 */ 2449 space -= hdrlen; 2450 2451 if (vp != NULL) { 2452 error = vn_lock(vp, LK_SHARED); 2453 if (error != 0) 2454 goto done; 2455 error = VOP_GETATTR(vp, &va, td->td_ucred); 2456 if (error != 0 || off >= va.va_size) { 2457 VOP_UNLOCK(vp, 0); 2458 goto done; 2459 } 2460 obj_size = va.va_size; 2461 } 2462 2463 /* 2464 * Loop and construct maximum sized mbuf chain to be bulk 2465 * dumped into socket buffer. 2466 */ 2467 while (space > loopbytes) { 2468 vm_offset_t pgoff; 2469 struct mbuf *m0; 2470 2471 /* 2472 * Calculate the amount to transfer. 2473 * Not to exceed a page, the EOF, 2474 * or the passed in nbytes. 2475 */ 2476 pgoff = (vm_offset_t)(off & PAGE_MASK); 2477 rem = obj_size - offset; 2478 if (nbytes != 0) 2479 rem = omin(rem, nbytes); 2480 rem -= fsbytes + loopbytes; 2481 xfsize = omin(PAGE_SIZE - pgoff, rem); 2482 xfsize = omin(space - loopbytes, xfsize); 2483 if (xfsize <= 0) { 2484 done = 1; /* all data sent */ 2485 break; 2486 } 2487 2488 /* 2489 * Attempt to look up the page. Allocate 2490 * if not found or wait and loop if busy. 2491 */ 2492 if (m != NULL) 2493 nd = EAGAIN; /* send what we already got */ 2494 else if ((flags & SF_NODISKIO) != 0) 2495 nd = EBUSY; 2496 else 2497 nd = 0; 2498 error = sendfile_readpage(obj, vp, nd, off, 2499 xfsize, bsize, td, &pg); 2500 if (error != 0) { 2501 if (error == EAGAIN) 2502 error = 0; /* not a real error */ 2503 break; 2504 } 2505 2506 /* 2507 * Get a sendfile buf. When allocating the 2508 * first buffer for mbuf chain, we usually 2509 * wait as long as necessary, but this wait 2510 * can be interrupted. For consequent 2511 * buffers, do not sleep, since several 2512 * threads might exhaust the buffers and then 2513 * deadlock. 2514 */ 2515 sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT : 2516 SFB_CATCH); 2517 if (sf == NULL) { 2518 SFSTAT_INC(sf_allocfail); 2519 vm_page_lock(pg); 2520 vm_page_unwire(pg, 0); 2521 KASSERT(pg->object != NULL, 2522 ("%s: object disappeared", __func__)); 2523 vm_page_unlock(pg); 2524 if (m == NULL) 2525 error = (mnw ? EAGAIN : EINTR); 2526 break; 2527 } 2528 2529 /* 2530 * Get an mbuf and set it up as having 2531 * external storage. 2532 */ 2533 m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); 2534 if (m0 == NULL) { 2535 error = (mnw ? EAGAIN : ENOBUFS); 2536 (void)sf_buf_mext(NULL, NULL, sf); 2537 break; 2538 } 2539 if (m_extadd(m0, (caddr_t )sf_buf_kva(sf), PAGE_SIZE, 2540 sf_buf_mext, sfs, sf, M_RDONLY, EXT_SFBUF, 2541 (mnw ? M_NOWAIT : M_WAITOK)) != 0) { 2542 error = (mnw ? EAGAIN : ENOBUFS); 2543 (void)sf_buf_mext(NULL, NULL, sf); 2544 m_freem(m0); 2545 break; 2546 } 2547 m0->m_data = (char *)sf_buf_kva(sf) + pgoff; 2548 m0->m_len = xfsize; 2549 2550 /* Append to mbuf chain. */ 2551 if (mtail != NULL) 2552 mtail->m_next = m0; 2553 else if (m != NULL) 2554 m_last(m)->m_next = m0; 2555 else 2556 m = m0; 2557 mtail = m0; 2558 2559 /* Keep track of bits processed. */ 2560 loopbytes += xfsize; 2561 off += xfsize; 2562 2563 /* 2564 * XXX eventually this should be a sfsync 2565 * method call! 2566 */ 2567 if (sfs != NULL) 2568 sf_sync_ref(sfs); 2569 } 2570 2571 if (vp != NULL) 2572 VOP_UNLOCK(vp, 0); 2573 2574 /* Add the buffer chain to the socket buffer. */ 2575 if (m != NULL) { 2576 int mlen, err; 2577 2578 mlen = m_length(m, NULL); 2579 SOCKBUF_LOCK(&so->so_snd); 2580 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2581 error = EPIPE; 2582 SOCKBUF_UNLOCK(&so->so_snd); 2583 goto done; 2584 } 2585 SOCKBUF_UNLOCK(&so->so_snd); 2586 CURVNET_SET(so->so_vnet); 2587 /* Avoid error aliasing. */ 2588 err = (*so->so_proto->pr_usrreqs->pru_send) 2589 (so, 0, m, NULL, NULL, td); 2590 CURVNET_RESTORE(); 2591 if (err == 0) { 2592 /* 2593 * We need two counters to get the 2594 * file offset and nbytes to send 2595 * right: 2596 * - sbytes contains the total amount 2597 * of bytes sent, including headers. 2598 * - fsbytes contains the total amount 2599 * of bytes sent from the file. 2600 */ 2601 sbytes += mlen; 2602 fsbytes += mlen; 2603 if (hdrlen) { 2604 fsbytes -= hdrlen; 2605 hdrlen = 0; 2606 } 2607 } else if (error == 0) 2608 error = err; 2609 m = NULL; /* pru_send always consumes */ 2610 } 2611 2612 /* Quit outer loop on error or when we're done. */ 2613 if (done) 2614 break; 2615 if (error != 0) 2616 goto done; 2617 } 2618 2619 /* 2620 * Send trailers. Wimp out and use writev(2). 2621 */ 2622 if (trl_uio != NULL) { 2623 sbunlock(&so->so_snd); 2624 error = kern_writev(td, sockfd, trl_uio); 2625 if (error == 0) 2626 sbytes += td->td_retval[0]; 2627 goto out; 2628 } 2629 2630 done: 2631 sbunlock(&so->so_snd); 2632 out: 2633 /* 2634 * If there was no error we have to clear td->td_retval[0] 2635 * because it may have been set by writev. 2636 */ 2637 if (error == 0) { 2638 td->td_retval[0] = 0; 2639 } 2640 if (sent != NULL) { 2641 (*sent) = sbytes; 2642 } 2643 if (obj != NULL) 2644 vm_object_deallocate(obj); 2645 if (so) 2646 fdrop(sock_fp, td); 2647 if (m) 2648 m_freem(m); 2649 2650 if (error == ERESTART) 2651 error = EINTR; 2652 2653 return (error); 2654 } 2655 2656 /* 2657 * SCTP syscalls. 2658 * Functionality only compiled in if SCTP is defined in the kernel Makefile, 2659 * otherwise all return EOPNOTSUPP. 2660 * XXX: We should make this loadable one day. 2661 */ 2662 int 2663 sys_sctp_peeloff(td, uap) 2664 struct thread *td; 2665 struct sctp_peeloff_args /* { 2666 int sd; 2667 caddr_t name; 2668 } */ *uap; 2669 { 2670 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2671 struct file *nfp = NULL; 2672 struct socket *head, *so; 2673 cap_rights_t rights; 2674 u_int fflag; 2675 int error, fd; 2676 2677 AUDIT_ARG_FD(uap->sd); 2678 error = fgetsock(td, uap->sd, cap_rights_init(&rights, CAP_PEELOFF), 2679 &head, &fflag); 2680 if (error != 0) 2681 goto done2; 2682 if (head->so_proto->pr_protocol != IPPROTO_SCTP) { 2683 error = EOPNOTSUPP; 2684 goto done; 2685 } 2686 error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name); 2687 if (error != 0) 2688 goto done; 2689 /* 2690 * At this point we know we do have a assoc to pull 2691 * we proceed to get the fd setup. This may block 2692 * but that is ok. 2693 */ 2694 2695 error = falloc(td, &nfp, &fd, 0); 2696 if (error != 0) 2697 goto done; 2698 td->td_retval[0] = fd; 2699 2700 CURVNET_SET(head->so_vnet); 2701 so = sonewconn(head, SS_ISCONNECTED); 2702 if (so == NULL) { 2703 error = ENOMEM; 2704 goto noconnection; 2705 } 2706 /* 2707 * Before changing the flags on the socket, we have to bump the 2708 * reference count. Otherwise, if the protocol calls sofree(), 2709 * the socket will be released due to a zero refcount. 2710 */ 2711 SOCK_LOCK(so); 2712 soref(so); /* file descriptor reference */ 2713 SOCK_UNLOCK(so); 2714 2715 ACCEPT_LOCK(); 2716 2717 TAILQ_REMOVE(&head->so_comp, so, so_list); 2718 head->so_qlen--; 2719 so->so_state |= (head->so_state & SS_NBIO); 2720 so->so_state &= ~SS_NOFDREF; 2721 so->so_qstate &= ~SQ_COMP; 2722 so->so_head = NULL; 2723 ACCEPT_UNLOCK(); 2724 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 2725 error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name); 2726 if (error != 0) 2727 goto noconnection; 2728 if (head->so_sigio != NULL) 2729 fsetown(fgetown(&head->so_sigio), &so->so_sigio); 2730 2731 noconnection: 2732 /* 2733 * close the new descriptor, assuming someone hasn't ripped it 2734 * out from under us. 2735 */ 2736 if (error != 0) 2737 fdclose(td->td_proc->p_fd, nfp, fd, td); 2738 2739 /* 2740 * Release explicitly held references before returning. 2741 */ 2742 CURVNET_RESTORE(); 2743 done: 2744 if (nfp != NULL) 2745 fdrop(nfp, td); 2746 fputsock(head); 2747 done2: 2748 return (error); 2749 #else /* SCTP */ 2750 return (EOPNOTSUPP); 2751 #endif /* SCTP */ 2752 } 2753 2754 int 2755 sys_sctp_generic_sendmsg (td, uap) 2756 struct thread *td; 2757 struct sctp_generic_sendmsg_args /* { 2758 int sd, 2759 caddr_t msg, 2760 int mlen, 2761 caddr_t to, 2762 __socklen_t tolen, 2763 struct sctp_sndrcvinfo *sinfo, 2764 int flags 2765 } */ *uap; 2766 { 2767 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2768 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2769 struct socket *so; 2770 struct file *fp = NULL; 2771 struct sockaddr *to = NULL; 2772 #ifdef KTRACE 2773 struct uio *ktruio = NULL; 2774 #endif 2775 struct uio auio; 2776 struct iovec iov[1]; 2777 cap_rights_t rights; 2778 int error = 0, len; 2779 2780 if (uap->sinfo != NULL) { 2781 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2782 if (error != 0) 2783 return (error); 2784 u_sinfo = &sinfo; 2785 } 2786 2787 cap_rights_init(&rights, CAP_SEND); 2788 if (uap->tolen != 0) { 2789 error = getsockaddr(&to, uap->to, uap->tolen); 2790 if (error != 0) { 2791 to = NULL; 2792 goto sctp_bad2; 2793 } 2794 cap_rights_set(&rights, CAP_CONNECT); 2795 } 2796 2797 AUDIT_ARG_FD(uap->sd); 2798 error = getsock_cap(td->td_proc->p_fd, uap->sd, &rights, &fp, NULL); 2799 if (error != 0) 2800 goto sctp_bad; 2801 #ifdef KTRACE 2802 if (to && (KTRPOINT(td, KTR_STRUCT))) 2803 ktrsockaddr(to); 2804 #endif 2805 2806 iov[0].iov_base = uap->msg; 2807 iov[0].iov_len = uap->mlen; 2808 2809 so = (struct socket *)fp->f_data; 2810 if (so->so_proto->pr_protocol != IPPROTO_SCTP) { 2811 error = EOPNOTSUPP; 2812 goto sctp_bad; 2813 } 2814 #ifdef MAC 2815 error = mac_socket_check_send(td->td_ucred, so); 2816 if (error != 0) 2817 goto sctp_bad; 2818 #endif /* MAC */ 2819 2820 auio.uio_iov = iov; 2821 auio.uio_iovcnt = 1; 2822 auio.uio_segflg = UIO_USERSPACE; 2823 auio.uio_rw = UIO_WRITE; 2824 auio.uio_td = td; 2825 auio.uio_offset = 0; /* XXX */ 2826 auio.uio_resid = 0; 2827 len = auio.uio_resid = uap->mlen; 2828 CURVNET_SET(so->so_vnet); 2829 error = sctp_lower_sosend(so, to, &auio, (struct mbuf *)NULL, 2830 (struct mbuf *)NULL, uap->flags, u_sinfo, td); 2831 CURVNET_RESTORE(); 2832 if (error != 0) { 2833 if (auio.uio_resid != len && (error == ERESTART || 2834 error == EINTR || error == EWOULDBLOCK)) 2835 error = 0; 2836 /* Generation of SIGPIPE can be controlled per socket. */ 2837 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2838 !(uap->flags & MSG_NOSIGNAL)) { 2839 PROC_LOCK(td->td_proc); 2840 tdsignal(td, SIGPIPE); 2841 PROC_UNLOCK(td->td_proc); 2842 } 2843 } 2844 if (error == 0) 2845 td->td_retval[0] = len - auio.uio_resid; 2846 #ifdef KTRACE 2847 if (ktruio != NULL) { 2848 ktruio->uio_resid = td->td_retval[0]; 2849 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2850 } 2851 #endif /* KTRACE */ 2852 sctp_bad: 2853 if (fp != NULL) 2854 fdrop(fp, td); 2855 sctp_bad2: 2856 free(to, M_SONAME); 2857 return (error); 2858 #else /* SCTP */ 2859 return (EOPNOTSUPP); 2860 #endif /* SCTP */ 2861 } 2862 2863 int 2864 sys_sctp_generic_sendmsg_iov(td, uap) 2865 struct thread *td; 2866 struct sctp_generic_sendmsg_iov_args /* { 2867 int sd, 2868 struct iovec *iov, 2869 int iovlen, 2870 caddr_t to, 2871 __socklen_t tolen, 2872 struct sctp_sndrcvinfo *sinfo, 2873 int flags 2874 } */ *uap; 2875 { 2876 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2877 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2878 struct socket *so; 2879 struct file *fp = NULL; 2880 struct sockaddr *to = NULL; 2881 #ifdef KTRACE 2882 struct uio *ktruio = NULL; 2883 #endif 2884 struct uio auio; 2885 struct iovec *iov, *tiov; 2886 cap_rights_t rights; 2887 ssize_t len; 2888 int error, i; 2889 2890 if (uap->sinfo != NULL) { 2891 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2892 if (error != 0) 2893 return (error); 2894 u_sinfo = &sinfo; 2895 } 2896 cap_rights_init(&rights, CAP_SEND); 2897 if (uap->tolen != 0) { 2898 error = getsockaddr(&to, uap->to, uap->tolen); 2899 if (error != 0) { 2900 to = NULL; 2901 goto sctp_bad2; 2902 } 2903 cap_rights_set(&rights, CAP_CONNECT); 2904 } 2905 2906 AUDIT_ARG_FD(uap->sd); 2907 error = getsock_cap(td->td_proc->p_fd, uap->sd, &rights, &fp, NULL); 2908 if (error != 0) 2909 goto sctp_bad1; 2910 2911 #ifdef COMPAT_FREEBSD32 2912 if (SV_CURPROC_FLAG(SV_ILP32)) 2913 error = freebsd32_copyiniov((struct iovec32 *)uap->iov, 2914 uap->iovlen, &iov, EMSGSIZE); 2915 else 2916 #endif 2917 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2918 if (error != 0) 2919 goto sctp_bad1; 2920 #ifdef KTRACE 2921 if (to && (KTRPOINT(td, KTR_STRUCT))) 2922 ktrsockaddr(to); 2923 #endif 2924 2925 so = (struct socket *)fp->f_data; 2926 if (so->so_proto->pr_protocol != IPPROTO_SCTP) { 2927 error = EOPNOTSUPP; 2928 goto sctp_bad; 2929 } 2930 #ifdef MAC 2931 error = mac_socket_check_send(td->td_ucred, so); 2932 if (error != 0) 2933 goto sctp_bad; 2934 #endif /* MAC */ 2935 2936 auio.uio_iov = iov; 2937 auio.uio_iovcnt = uap->iovlen; 2938 auio.uio_segflg = UIO_USERSPACE; 2939 auio.uio_rw = UIO_WRITE; 2940 auio.uio_td = td; 2941 auio.uio_offset = 0; /* XXX */ 2942 auio.uio_resid = 0; 2943 tiov = iov; 2944 for (i = 0; i <uap->iovlen; i++, tiov++) { 2945 if ((auio.uio_resid += tiov->iov_len) < 0) { 2946 error = EINVAL; 2947 goto sctp_bad; 2948 } 2949 } 2950 len = auio.uio_resid; 2951 CURVNET_SET(so->so_vnet); 2952 error = sctp_lower_sosend(so, to, &auio, 2953 (struct mbuf *)NULL, (struct mbuf *)NULL, 2954 uap->flags, u_sinfo, td); 2955 CURVNET_RESTORE(); 2956 if (error != 0) { 2957 if (auio.uio_resid != len && (error == ERESTART || 2958 error == EINTR || error == EWOULDBLOCK)) 2959 error = 0; 2960 /* Generation of SIGPIPE can be controlled per socket */ 2961 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2962 !(uap->flags & MSG_NOSIGNAL)) { 2963 PROC_LOCK(td->td_proc); 2964 tdsignal(td, SIGPIPE); 2965 PROC_UNLOCK(td->td_proc); 2966 } 2967 } 2968 if (error == 0) 2969 td->td_retval[0] = len - auio.uio_resid; 2970 #ifdef KTRACE 2971 if (ktruio != NULL) { 2972 ktruio->uio_resid = td->td_retval[0]; 2973 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2974 } 2975 #endif /* KTRACE */ 2976 sctp_bad: 2977 free(iov, M_IOV); 2978 sctp_bad1: 2979 if (fp != NULL) 2980 fdrop(fp, td); 2981 sctp_bad2: 2982 free(to, M_SONAME); 2983 return (error); 2984 #else /* SCTP */ 2985 return (EOPNOTSUPP); 2986 #endif /* SCTP */ 2987 } 2988 2989 int 2990 sys_sctp_generic_recvmsg(td, uap) 2991 struct thread *td; 2992 struct sctp_generic_recvmsg_args /* { 2993 int sd, 2994 struct iovec *iov, 2995 int iovlen, 2996 struct sockaddr *from, 2997 __socklen_t *fromlenaddr, 2998 struct sctp_sndrcvinfo *sinfo, 2999 int *msg_flags 3000 } */ *uap; 3001 { 3002 #if (defined(INET) || defined(INET6)) && defined(SCTP) 3003 uint8_t sockbufstore[256]; 3004 struct uio auio; 3005 struct iovec *iov, *tiov; 3006 struct sctp_sndrcvinfo sinfo; 3007 struct socket *so; 3008 struct file *fp = NULL; 3009 struct sockaddr *fromsa; 3010 cap_rights_t rights; 3011 #ifdef KTRACE 3012 struct uio *ktruio = NULL; 3013 #endif 3014 ssize_t len; 3015 int error, fromlen, i, msg_flags; 3016 3017 AUDIT_ARG_FD(uap->sd); 3018 error = getsock_cap(td->td_proc->p_fd, uap->sd, 3019 cap_rights_init(&rights, CAP_RECV), &fp, NULL); 3020 if (error != 0) 3021 return (error); 3022 #ifdef COMPAT_FREEBSD32 3023 if (SV_CURPROC_FLAG(SV_ILP32)) 3024 error = freebsd32_copyiniov((struct iovec32 *)uap->iov, 3025 uap->iovlen, &iov, EMSGSIZE); 3026 else 3027 #endif 3028 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 3029 if (error != 0) 3030 goto out1; 3031 3032 so = fp->f_data; 3033 if (so->so_proto->pr_protocol != IPPROTO_SCTP) { 3034 error = EOPNOTSUPP; 3035 goto out; 3036 } 3037 #ifdef MAC 3038 error = mac_socket_check_receive(td->td_ucred, so); 3039 if (error != 0) 3040 goto out; 3041 #endif /* MAC */ 3042 3043 if (uap->fromlenaddr != NULL) { 3044 error = copyin(uap->fromlenaddr, &fromlen, sizeof (fromlen)); 3045 if (error != 0) 3046 goto out; 3047 } else { 3048 fromlen = 0; 3049 } 3050 if (uap->msg_flags) { 3051 error = copyin(uap->msg_flags, &msg_flags, sizeof (int)); 3052 if (error != 0) 3053 goto out; 3054 } else { 3055 msg_flags = 0; 3056 } 3057 auio.uio_iov = iov; 3058 auio.uio_iovcnt = uap->iovlen; 3059 auio.uio_segflg = UIO_USERSPACE; 3060 auio.uio_rw = UIO_READ; 3061 auio.uio_td = td; 3062 auio.uio_offset = 0; /* XXX */ 3063 auio.uio_resid = 0; 3064 tiov = iov; 3065 for (i = 0; i <uap->iovlen; i++, tiov++) { 3066 if ((auio.uio_resid += tiov->iov_len) < 0) { 3067 error = EINVAL; 3068 goto out; 3069 } 3070 } 3071 len = auio.uio_resid; 3072 fromsa = (struct sockaddr *)sockbufstore; 3073 3074 #ifdef KTRACE 3075 if (KTRPOINT(td, KTR_GENIO)) 3076 ktruio = cloneuio(&auio); 3077 #endif /* KTRACE */ 3078 memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo)); 3079 CURVNET_SET(so->so_vnet); 3080 error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL, 3081 fromsa, fromlen, &msg_flags, 3082 (struct sctp_sndrcvinfo *)&sinfo, 1); 3083 CURVNET_RESTORE(); 3084 if (error != 0) { 3085 if (auio.uio_resid != len && (error == ERESTART || 3086 error == EINTR || error == EWOULDBLOCK)) 3087 error = 0; 3088 } else { 3089 if (uap->sinfo) 3090 error = copyout(&sinfo, uap->sinfo, sizeof (sinfo)); 3091 } 3092 #ifdef KTRACE 3093 if (ktruio != NULL) { 3094 ktruio->uio_resid = len - auio.uio_resid; 3095 ktrgenio(uap->sd, UIO_READ, ktruio, error); 3096 } 3097 #endif /* KTRACE */ 3098 if (error != 0) 3099 goto out; 3100 td->td_retval[0] = len - auio.uio_resid; 3101 3102 if (fromlen && uap->from) { 3103 len = fromlen; 3104 if (len <= 0 || fromsa == 0) 3105 len = 0; 3106 else { 3107 len = MIN(len, fromsa->sa_len); 3108 error = copyout(fromsa, uap->from, (size_t)len); 3109 if (error != 0) 3110 goto out; 3111 } 3112 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t)); 3113 if (error != 0) 3114 goto out; 3115 } 3116 #ifdef KTRACE 3117 if (KTRPOINT(td, KTR_STRUCT)) 3118 ktrsockaddr(fromsa); 3119 #endif 3120 if (uap->msg_flags) { 3121 error = copyout(&msg_flags, uap->msg_flags, sizeof (int)); 3122 if (error != 0) 3123 goto out; 3124 } 3125 out: 3126 free(iov, M_IOV); 3127 out1: 3128 if (fp != NULL) 3129 fdrop(fp, td); 3130 3131 return (error); 3132 #else /* SCTP */ 3133 return (EOPNOTSUPP); 3134 #endif /* SCTP */ 3135 } 3136