1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_capsicum.h" 39 #include "opt_inet.h" 40 #include "opt_inet6.h" 41 #include "opt_sctp.h" 42 #include "opt_compat.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/capability.h> 48 #include <sys/kernel.h> 49 #include <sys/lock.h> 50 #include <sys/mutex.h> 51 #include <sys/sysproto.h> 52 #include <sys/malloc.h> 53 #include <sys/filedesc.h> 54 #include <sys/event.h> 55 #include <sys/proc.h> 56 #include <sys/fcntl.h> 57 #include <sys/file.h> 58 #include <sys/filio.h> 59 #include <sys/jail.h> 60 #include <sys/mount.h> 61 #include <sys/mbuf.h> 62 #include <sys/protosw.h> 63 #include <sys/rwlock.h> 64 #include <sys/sf_buf.h> 65 #include <sys/sysent.h> 66 #include <sys/socket.h> 67 #include <sys/socketvar.h> 68 #include <sys/signalvar.h> 69 #include <sys/syscallsubr.h> 70 #include <sys/sysctl.h> 71 #include <sys/uio.h> 72 #include <sys/vnode.h> 73 #ifdef KTRACE 74 #include <sys/ktrace.h> 75 #endif 76 #ifdef COMPAT_FREEBSD32 77 #include <compat/freebsd32/freebsd32_util.h> 78 #endif 79 80 #include <net/vnet.h> 81 82 #include <security/audit/audit.h> 83 #include <security/mac/mac_framework.h> 84 85 #include <vm/vm.h> 86 #include <vm/vm_param.h> 87 #include <vm/vm_object.h> 88 #include <vm/vm_page.h> 89 #include <vm/vm_pageout.h> 90 #include <vm/vm_kern.h> 91 #include <vm/vm_extern.h> 92 93 #if defined(INET) || defined(INET6) 94 #ifdef SCTP 95 #include <netinet/sctp.h> 96 #include <netinet/sctp_peeloff.h> 97 #endif /* SCTP */ 98 #endif /* INET || INET6 */ 99 100 /* 101 * Flags for accept1() and kern_accept4(), in addition to SOCK_CLOEXEC 102 * and SOCK_NONBLOCK. 103 */ 104 #define ACCEPT4_INHERIT 0x1 105 #define ACCEPT4_COMPAT 0x2 106 107 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 108 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 109 110 static int accept1(struct thread *td, int s, struct sockaddr *uname, 111 socklen_t *anamelen, int flags); 112 static int do_sendfile(struct thread *td, struct sendfile_args *uap, 113 int compat); 114 static int getsockname1(struct thread *td, struct getsockname_args *uap, 115 int compat); 116 static int getpeername1(struct thread *td, struct getpeername_args *uap, 117 int compat); 118 119 counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; 120 121 /* 122 * sendfile(2)-related variables and associated sysctls 123 */ 124 int nsfbufs; 125 int nsfbufspeak; 126 int nsfbufsused; 127 static int sfreadahead = 1; 128 129 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 130 "Maximum number of sendfile(2) sf_bufs available"); 131 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 132 "Number of sendfile(2) sf_bufs at peak usage"); 133 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 134 "Number of sendfile(2) sf_bufs in use"); 135 SYSCTL_INT(_kern_ipc, OID_AUTO, sfreadahead, CTLFLAG_RW, &sfreadahead, 0, 136 "Number of sendfile(2) read-ahead MAXBSIZE blocks"); 137 138 139 static void 140 sfstat_init(const void *unused) 141 { 142 143 COUNTER_ARRAY_ALLOC(sfstat, sizeof(struct sfstat) / sizeof(uint64_t), 144 M_WAITOK); 145 } 146 SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL); 147 148 static int 149 sfstat_sysctl(SYSCTL_HANDLER_ARGS) 150 { 151 struct sfstat s; 152 153 COUNTER_ARRAY_COPY(sfstat, &s, sizeof(s) / sizeof(uint64_t)); 154 if (req->newptr) 155 COUNTER_ARRAY_ZERO(sfstat, sizeof(s) / sizeof(uint64_t)); 156 return (SYSCTL_OUT(req, &s, sizeof(s))); 157 } 158 SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW, 159 NULL, 0, sfstat_sysctl, "I", "sendfile statistics"); 160 161 /* 162 * Convert a user file descriptor to a kernel file entry and check if required 163 * capability rights are present. 164 * A reference on the file entry is held upon returning. 165 */ 166 static int 167 getsock_cap(struct filedesc *fdp, int fd, cap_rights_t *rightsp, 168 struct file **fpp, u_int *fflagp) 169 { 170 struct file *fp; 171 int error; 172 173 error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL); 174 if (error != 0) 175 return (error); 176 if (fp->f_type != DTYPE_SOCKET) { 177 fdrop(fp, curthread); 178 return (ENOTSOCK); 179 } 180 if (fflagp != NULL) 181 *fflagp = fp->f_flag; 182 *fpp = fp; 183 return (0); 184 } 185 186 /* 187 * System call interface to the socket abstraction. 188 */ 189 #if defined(COMPAT_43) 190 #define COMPAT_OLDSOCK 191 #endif 192 193 int 194 sys_socket(td, uap) 195 struct thread *td; 196 struct socket_args /* { 197 int domain; 198 int type; 199 int protocol; 200 } */ *uap; 201 { 202 struct socket *so; 203 struct file *fp; 204 int fd, error, type, oflag, fflag; 205 206 AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol); 207 208 type = uap->type; 209 oflag = 0; 210 fflag = 0; 211 if ((type & SOCK_CLOEXEC) != 0) { 212 type &= ~SOCK_CLOEXEC; 213 oflag |= O_CLOEXEC; 214 } 215 if ((type & SOCK_NONBLOCK) != 0) { 216 type &= ~SOCK_NONBLOCK; 217 fflag |= FNONBLOCK; 218 } 219 220 #ifdef MAC 221 error = mac_socket_check_create(td->td_ucred, uap->domain, type, 222 uap->protocol); 223 if (error != 0) 224 return (error); 225 #endif 226 error = falloc(td, &fp, &fd, oflag); 227 if (error != 0) 228 return (error); 229 /* An extra reference on `fp' has been held for us by falloc(). */ 230 error = socreate(uap->domain, &so, type, uap->protocol, 231 td->td_ucred, td); 232 if (error != 0) { 233 fdclose(td->td_proc->p_fd, fp, fd, td); 234 } else { 235 finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops); 236 if ((fflag & FNONBLOCK) != 0) 237 (void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td); 238 td->td_retval[0] = fd; 239 } 240 fdrop(fp, td); 241 return (error); 242 } 243 244 /* ARGSUSED */ 245 int 246 sys_bind(td, uap) 247 struct thread *td; 248 struct bind_args /* { 249 int s; 250 caddr_t name; 251 int namelen; 252 } */ *uap; 253 { 254 struct sockaddr *sa; 255 int error; 256 257 error = getsockaddr(&sa, uap->name, uap->namelen); 258 if (error == 0) { 259 error = kern_bind(td, uap->s, sa); 260 free(sa, M_SONAME); 261 } 262 return (error); 263 } 264 265 static int 266 kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa) 267 { 268 struct socket *so; 269 struct file *fp; 270 cap_rights_t rights; 271 int error; 272 273 AUDIT_ARG_FD(fd); 274 AUDIT_ARG_SOCKADDR(td, dirfd, sa); 275 error = getsock_cap(td->td_proc->p_fd, fd, 276 cap_rights_init(&rights, CAP_BIND), &fp, NULL); 277 if (error != 0) 278 return (error); 279 so = fp->f_data; 280 #ifdef KTRACE 281 if (KTRPOINT(td, KTR_STRUCT)) 282 ktrsockaddr(sa); 283 #endif 284 #ifdef MAC 285 error = mac_socket_check_bind(td->td_ucred, so, sa); 286 if (error == 0) { 287 #endif 288 if (dirfd == AT_FDCWD) 289 error = sobind(so, sa, td); 290 else 291 error = sobindat(dirfd, so, sa, td); 292 #ifdef MAC 293 } 294 #endif 295 fdrop(fp, td); 296 return (error); 297 } 298 299 int 300 kern_bind(struct thread *td, int fd, struct sockaddr *sa) 301 { 302 303 return (kern_bindat(td, AT_FDCWD, fd, sa)); 304 } 305 306 /* ARGSUSED */ 307 int 308 sys_bindat(td, uap) 309 struct thread *td; 310 struct bindat_args /* { 311 int fd; 312 int s; 313 caddr_t name; 314 int namelen; 315 } */ *uap; 316 { 317 struct sockaddr *sa; 318 int error; 319 320 error = getsockaddr(&sa, uap->name, uap->namelen); 321 if (error == 0) { 322 error = kern_bindat(td, uap->fd, uap->s, sa); 323 free(sa, M_SONAME); 324 } 325 return (error); 326 } 327 328 /* ARGSUSED */ 329 int 330 sys_listen(td, uap) 331 struct thread *td; 332 struct listen_args /* { 333 int s; 334 int backlog; 335 } */ *uap; 336 { 337 struct socket *so; 338 struct file *fp; 339 cap_rights_t rights; 340 int error; 341 342 AUDIT_ARG_FD(uap->s); 343 error = getsock_cap(td->td_proc->p_fd, uap->s, 344 cap_rights_init(&rights, CAP_LISTEN), &fp, NULL); 345 if (error == 0) { 346 so = fp->f_data; 347 #ifdef MAC 348 error = mac_socket_check_listen(td->td_ucred, so); 349 if (error == 0) 350 #endif 351 error = solisten(so, uap->backlog, td); 352 fdrop(fp, td); 353 } 354 return(error); 355 } 356 357 /* 358 * accept1() 359 */ 360 static int 361 accept1(td, s, uname, anamelen, flags) 362 struct thread *td; 363 int s; 364 struct sockaddr *uname; 365 socklen_t *anamelen; 366 int flags; 367 { 368 struct sockaddr *name; 369 socklen_t namelen; 370 struct file *fp; 371 int error; 372 373 if (uname == NULL) 374 return (kern_accept4(td, s, NULL, NULL, flags, NULL)); 375 376 error = copyin(anamelen, &namelen, sizeof (namelen)); 377 if (error != 0) 378 return (error); 379 380 error = kern_accept4(td, s, &name, &namelen, flags, &fp); 381 382 /* 383 * return a namelen of zero for older code which might 384 * ignore the return value from accept. 385 */ 386 if (error != 0) { 387 (void) copyout(&namelen, anamelen, sizeof(*anamelen)); 388 return (error); 389 } 390 391 if (error == 0 && uname != NULL) { 392 #ifdef COMPAT_OLDSOCK 393 if (flags & ACCEPT4_COMPAT) 394 ((struct osockaddr *)name)->sa_family = 395 name->sa_family; 396 #endif 397 error = copyout(name, uname, namelen); 398 } 399 if (error == 0) 400 error = copyout(&namelen, anamelen, 401 sizeof(namelen)); 402 if (error != 0) 403 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td); 404 fdrop(fp, td); 405 free(name, M_SONAME); 406 return (error); 407 } 408 409 int 410 kern_accept(struct thread *td, int s, struct sockaddr **name, 411 socklen_t *namelen, struct file **fp) 412 { 413 return (kern_accept4(td, s, name, namelen, ACCEPT4_INHERIT, fp)); 414 } 415 416 int 417 kern_accept4(struct thread *td, int s, struct sockaddr **name, 418 socklen_t *namelen, int flags, struct file **fp) 419 { 420 struct filedesc *fdp; 421 struct file *headfp, *nfp = NULL; 422 struct sockaddr *sa = NULL; 423 struct socket *head, *so; 424 cap_rights_t rights; 425 u_int fflag; 426 pid_t pgid; 427 int error, fd, tmp; 428 429 if (name != NULL) 430 *name = NULL; 431 432 AUDIT_ARG_FD(s); 433 fdp = td->td_proc->p_fd; 434 error = getsock_cap(fdp, s, cap_rights_init(&rights, CAP_ACCEPT), 435 &headfp, &fflag); 436 if (error != 0) 437 return (error); 438 head = headfp->f_data; 439 if ((head->so_options & SO_ACCEPTCONN) == 0) { 440 error = EINVAL; 441 goto done; 442 } 443 #ifdef MAC 444 error = mac_socket_check_accept(td->td_ucred, head); 445 if (error != 0) 446 goto done; 447 #endif 448 error = falloc(td, &nfp, &fd, (flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0); 449 if (error != 0) 450 goto done; 451 ACCEPT_LOCK(); 452 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 453 ACCEPT_UNLOCK(); 454 error = EWOULDBLOCK; 455 goto noconnection; 456 } 457 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 458 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 459 head->so_error = ECONNABORTED; 460 break; 461 } 462 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 463 "accept", 0); 464 if (error != 0) { 465 ACCEPT_UNLOCK(); 466 goto noconnection; 467 } 468 } 469 if (head->so_error) { 470 error = head->so_error; 471 head->so_error = 0; 472 ACCEPT_UNLOCK(); 473 goto noconnection; 474 } 475 so = TAILQ_FIRST(&head->so_comp); 476 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 477 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 478 479 /* 480 * Before changing the flags on the socket, we have to bump the 481 * reference count. Otherwise, if the protocol calls sofree(), 482 * the socket will be released due to a zero refcount. 483 */ 484 SOCK_LOCK(so); /* soref() and so_state update */ 485 soref(so); /* file descriptor reference */ 486 487 TAILQ_REMOVE(&head->so_comp, so, so_list); 488 head->so_qlen--; 489 if (flags & ACCEPT4_INHERIT) 490 so->so_state |= (head->so_state & SS_NBIO); 491 else 492 so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0; 493 so->so_qstate &= ~SQ_COMP; 494 so->so_head = NULL; 495 496 SOCK_UNLOCK(so); 497 ACCEPT_UNLOCK(); 498 499 /* An extra reference on `nfp' has been held for us by falloc(). */ 500 td->td_retval[0] = fd; 501 502 /* connection has been removed from the listen queue */ 503 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 504 505 if (flags & ACCEPT4_INHERIT) { 506 pgid = fgetown(&head->so_sigio); 507 if (pgid != 0) 508 fsetown(pgid, &so->so_sigio); 509 } else { 510 fflag &= ~(FNONBLOCK | FASYNC); 511 if (flags & SOCK_NONBLOCK) 512 fflag |= FNONBLOCK; 513 } 514 515 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 516 /* Sync socket nonblocking/async state with file flags */ 517 tmp = fflag & FNONBLOCK; 518 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 519 tmp = fflag & FASYNC; 520 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 521 sa = 0; 522 error = soaccept(so, &sa); 523 if (error != 0) { 524 /* 525 * return a namelen of zero for older code which might 526 * ignore the return value from accept. 527 */ 528 if (name) 529 *namelen = 0; 530 goto noconnection; 531 } 532 if (sa == NULL) { 533 if (name) 534 *namelen = 0; 535 goto done; 536 } 537 AUDIT_ARG_SOCKADDR(td, AT_FDCWD, sa); 538 if (name) { 539 /* check sa_len before it is destroyed */ 540 if (*namelen > sa->sa_len) 541 *namelen = sa->sa_len; 542 #ifdef KTRACE 543 if (KTRPOINT(td, KTR_STRUCT)) 544 ktrsockaddr(sa); 545 #endif 546 *name = sa; 547 sa = NULL; 548 } 549 noconnection: 550 free(sa, M_SONAME); 551 552 /* 553 * close the new descriptor, assuming someone hasn't ripped it 554 * out from under us. 555 */ 556 if (error != 0) 557 fdclose(fdp, nfp, fd, td); 558 559 /* 560 * Release explicitly held references before returning. We return 561 * a reference on nfp to the caller on success if they request it. 562 */ 563 done: 564 if (fp != NULL) { 565 if (error == 0) { 566 *fp = nfp; 567 nfp = NULL; 568 } else 569 *fp = NULL; 570 } 571 if (nfp != NULL) 572 fdrop(nfp, td); 573 fdrop(headfp, td); 574 return (error); 575 } 576 577 int 578 sys_accept(td, uap) 579 struct thread *td; 580 struct accept_args *uap; 581 { 582 583 return (accept1(td, uap->s, uap->name, uap->anamelen, ACCEPT4_INHERIT)); 584 } 585 586 int 587 sys_accept4(td, uap) 588 struct thread *td; 589 struct accept4_args *uap; 590 { 591 592 if (uap->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 593 return (EINVAL); 594 595 return (accept1(td, uap->s, uap->name, uap->anamelen, uap->flags)); 596 } 597 598 #ifdef COMPAT_OLDSOCK 599 int 600 oaccept(td, uap) 601 struct thread *td; 602 struct accept_args *uap; 603 { 604 605 return (accept1(td, uap->s, uap->name, uap->anamelen, 606 ACCEPT4_INHERIT | ACCEPT4_COMPAT)); 607 } 608 #endif /* COMPAT_OLDSOCK */ 609 610 /* ARGSUSED */ 611 int 612 sys_connect(td, uap) 613 struct thread *td; 614 struct connect_args /* { 615 int s; 616 caddr_t name; 617 int namelen; 618 } */ *uap; 619 { 620 struct sockaddr *sa; 621 int error; 622 623 error = getsockaddr(&sa, uap->name, uap->namelen); 624 if (error == 0) { 625 error = kern_connect(td, uap->s, sa); 626 free(sa, M_SONAME); 627 } 628 return (error); 629 } 630 631 static int 632 kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa) 633 { 634 struct socket *so; 635 struct file *fp; 636 cap_rights_t rights; 637 int error, interrupted = 0; 638 639 AUDIT_ARG_FD(fd); 640 AUDIT_ARG_SOCKADDR(td, dirfd, sa); 641 error = getsock_cap(td->td_proc->p_fd, fd, 642 cap_rights_init(&rights, CAP_CONNECT), &fp, NULL); 643 if (error != 0) 644 return (error); 645 so = fp->f_data; 646 if (so->so_state & SS_ISCONNECTING) { 647 error = EALREADY; 648 goto done1; 649 } 650 #ifdef KTRACE 651 if (KTRPOINT(td, KTR_STRUCT)) 652 ktrsockaddr(sa); 653 #endif 654 #ifdef MAC 655 error = mac_socket_check_connect(td->td_ucred, so, sa); 656 if (error != 0) 657 goto bad; 658 #endif 659 if (dirfd == AT_FDCWD) 660 error = soconnect(so, sa, td); 661 else 662 error = soconnectat(dirfd, so, sa, td); 663 if (error != 0) 664 goto bad; 665 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 666 error = EINPROGRESS; 667 goto done1; 668 } 669 SOCK_LOCK(so); 670 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 671 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 672 "connec", 0); 673 if (error != 0) { 674 if (error == EINTR || error == ERESTART) 675 interrupted = 1; 676 break; 677 } 678 } 679 if (error == 0) { 680 error = so->so_error; 681 so->so_error = 0; 682 } 683 SOCK_UNLOCK(so); 684 bad: 685 if (!interrupted) 686 so->so_state &= ~SS_ISCONNECTING; 687 if (error == ERESTART) 688 error = EINTR; 689 done1: 690 fdrop(fp, td); 691 return (error); 692 } 693 694 int 695 kern_connect(struct thread *td, int fd, struct sockaddr *sa) 696 { 697 698 return (kern_connectat(td, AT_FDCWD, fd, sa)); 699 } 700 701 /* ARGSUSED */ 702 int 703 sys_connectat(td, uap) 704 struct thread *td; 705 struct connectat_args /* { 706 int fd; 707 int s; 708 caddr_t name; 709 int namelen; 710 } */ *uap; 711 { 712 struct sockaddr *sa; 713 int error; 714 715 error = getsockaddr(&sa, uap->name, uap->namelen); 716 if (error == 0) { 717 error = kern_connectat(td, uap->fd, uap->s, sa); 718 free(sa, M_SONAME); 719 } 720 return (error); 721 } 722 723 int 724 kern_socketpair(struct thread *td, int domain, int type, int protocol, 725 int *rsv) 726 { 727 struct filedesc *fdp = td->td_proc->p_fd; 728 struct file *fp1, *fp2; 729 struct socket *so1, *so2; 730 int fd, error, oflag, fflag; 731 732 AUDIT_ARG_SOCKET(domain, type, protocol); 733 734 oflag = 0; 735 fflag = 0; 736 if ((type & SOCK_CLOEXEC) != 0) { 737 type &= ~SOCK_CLOEXEC; 738 oflag |= O_CLOEXEC; 739 } 740 if ((type & SOCK_NONBLOCK) != 0) { 741 type &= ~SOCK_NONBLOCK; 742 fflag |= FNONBLOCK; 743 } 744 #ifdef MAC 745 /* We might want to have a separate check for socket pairs. */ 746 error = mac_socket_check_create(td->td_ucred, domain, type, 747 protocol); 748 if (error != 0) 749 return (error); 750 #endif 751 error = socreate(domain, &so1, type, protocol, td->td_ucred, td); 752 if (error != 0) 753 return (error); 754 error = socreate(domain, &so2, type, protocol, td->td_ucred, td); 755 if (error != 0) 756 goto free1; 757 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 758 error = falloc(td, &fp1, &fd, oflag); 759 if (error != 0) 760 goto free2; 761 rsv[0] = fd; 762 fp1->f_data = so1; /* so1 already has ref count */ 763 error = falloc(td, &fp2, &fd, oflag); 764 if (error != 0) 765 goto free3; 766 fp2->f_data = so2; /* so2 already has ref count */ 767 rsv[1] = fd; 768 error = soconnect2(so1, so2); 769 if (error != 0) 770 goto free4; 771 if (type == SOCK_DGRAM) { 772 /* 773 * Datagram socket connection is asymmetric. 774 */ 775 error = soconnect2(so2, so1); 776 if (error != 0) 777 goto free4; 778 } 779 finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data, 780 &socketops); 781 finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data, 782 &socketops); 783 if ((fflag & FNONBLOCK) != 0) { 784 (void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td); 785 (void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td); 786 } 787 fdrop(fp1, td); 788 fdrop(fp2, td); 789 return (0); 790 free4: 791 fdclose(fdp, fp2, rsv[1], td); 792 fdrop(fp2, td); 793 free3: 794 fdclose(fdp, fp1, rsv[0], td); 795 fdrop(fp1, td); 796 free2: 797 if (so2 != NULL) 798 (void)soclose(so2); 799 free1: 800 if (so1 != NULL) 801 (void)soclose(so1); 802 return (error); 803 } 804 805 int 806 sys_socketpair(struct thread *td, struct socketpair_args *uap) 807 { 808 int error, sv[2]; 809 810 error = kern_socketpair(td, uap->domain, uap->type, 811 uap->protocol, sv); 812 if (error != 0) 813 return (error); 814 error = copyout(sv, uap->rsv, 2 * sizeof(int)); 815 if (error != 0) { 816 (void)kern_close(td, sv[0]); 817 (void)kern_close(td, sv[1]); 818 } 819 return (error); 820 } 821 822 static int 823 sendit(td, s, mp, flags) 824 struct thread *td; 825 int s; 826 struct msghdr *mp; 827 int flags; 828 { 829 struct mbuf *control; 830 struct sockaddr *to; 831 int error; 832 833 #ifdef CAPABILITY_MODE 834 if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL)) 835 return (ECAPMODE); 836 #endif 837 838 if (mp->msg_name != NULL) { 839 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 840 if (error != 0) { 841 to = NULL; 842 goto bad; 843 } 844 mp->msg_name = to; 845 } else { 846 to = NULL; 847 } 848 849 if (mp->msg_control) { 850 if (mp->msg_controllen < sizeof(struct cmsghdr) 851 #ifdef COMPAT_OLDSOCK 852 && mp->msg_flags != MSG_COMPAT 853 #endif 854 ) { 855 error = EINVAL; 856 goto bad; 857 } 858 error = sockargs(&control, mp->msg_control, 859 mp->msg_controllen, MT_CONTROL); 860 if (error != 0) 861 goto bad; 862 #ifdef COMPAT_OLDSOCK 863 if (mp->msg_flags == MSG_COMPAT) { 864 struct cmsghdr *cm; 865 866 M_PREPEND(control, sizeof(*cm), M_WAITOK); 867 cm = mtod(control, struct cmsghdr *); 868 cm->cmsg_len = control->m_len; 869 cm->cmsg_level = SOL_SOCKET; 870 cm->cmsg_type = SCM_RIGHTS; 871 } 872 #endif 873 } else { 874 control = NULL; 875 } 876 877 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 878 879 bad: 880 free(to, M_SONAME); 881 return (error); 882 } 883 884 int 885 kern_sendit(td, s, mp, flags, control, segflg) 886 struct thread *td; 887 int s; 888 struct msghdr *mp; 889 int flags; 890 struct mbuf *control; 891 enum uio_seg segflg; 892 { 893 struct file *fp; 894 struct uio auio; 895 struct iovec *iov; 896 struct socket *so; 897 cap_rights_t rights; 898 #ifdef KTRACE 899 struct uio *ktruio = NULL; 900 #endif 901 ssize_t len; 902 int i, error; 903 904 AUDIT_ARG_FD(s); 905 cap_rights_init(&rights, CAP_SEND); 906 if (mp->msg_name != NULL) { 907 AUDIT_ARG_SOCKADDR(td, AT_FDCWD, mp->msg_name); 908 cap_rights_set(&rights, CAP_CONNECT); 909 } 910 error = getsock_cap(td->td_proc->p_fd, s, &rights, &fp, NULL); 911 if (error != 0) 912 return (error); 913 so = (struct socket *)fp->f_data; 914 915 #ifdef KTRACE 916 if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT)) 917 ktrsockaddr(mp->msg_name); 918 #endif 919 #ifdef MAC 920 if (mp->msg_name != NULL) { 921 error = mac_socket_check_connect(td->td_ucred, so, 922 mp->msg_name); 923 if (error != 0) 924 goto bad; 925 } 926 error = mac_socket_check_send(td->td_ucred, so); 927 if (error != 0) 928 goto bad; 929 #endif 930 931 auio.uio_iov = mp->msg_iov; 932 auio.uio_iovcnt = mp->msg_iovlen; 933 auio.uio_segflg = segflg; 934 auio.uio_rw = UIO_WRITE; 935 auio.uio_td = td; 936 auio.uio_offset = 0; /* XXX */ 937 auio.uio_resid = 0; 938 iov = mp->msg_iov; 939 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 940 if ((auio.uio_resid += iov->iov_len) < 0) { 941 error = EINVAL; 942 goto bad; 943 } 944 } 945 #ifdef KTRACE 946 if (KTRPOINT(td, KTR_GENIO)) 947 ktruio = cloneuio(&auio); 948 #endif 949 len = auio.uio_resid; 950 error = sosend(so, mp->msg_name, &auio, 0, control, flags, td); 951 if (error != 0) { 952 if (auio.uio_resid != len && (error == ERESTART || 953 error == EINTR || error == EWOULDBLOCK)) 954 error = 0; 955 /* Generation of SIGPIPE can be controlled per socket */ 956 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 957 !(flags & MSG_NOSIGNAL)) { 958 PROC_LOCK(td->td_proc); 959 tdsignal(td, SIGPIPE); 960 PROC_UNLOCK(td->td_proc); 961 } 962 } 963 if (error == 0) 964 td->td_retval[0] = len - auio.uio_resid; 965 #ifdef KTRACE 966 if (ktruio != NULL) { 967 ktruio->uio_resid = td->td_retval[0]; 968 ktrgenio(s, UIO_WRITE, ktruio, error); 969 } 970 #endif 971 bad: 972 fdrop(fp, td); 973 return (error); 974 } 975 976 int 977 sys_sendto(td, uap) 978 struct thread *td; 979 struct sendto_args /* { 980 int s; 981 caddr_t buf; 982 size_t len; 983 int flags; 984 caddr_t to; 985 int tolen; 986 } */ *uap; 987 { 988 struct msghdr msg; 989 struct iovec aiov; 990 991 msg.msg_name = uap->to; 992 msg.msg_namelen = uap->tolen; 993 msg.msg_iov = &aiov; 994 msg.msg_iovlen = 1; 995 msg.msg_control = 0; 996 #ifdef COMPAT_OLDSOCK 997 msg.msg_flags = 0; 998 #endif 999 aiov.iov_base = uap->buf; 1000 aiov.iov_len = uap->len; 1001 return (sendit(td, uap->s, &msg, uap->flags)); 1002 } 1003 1004 #ifdef COMPAT_OLDSOCK 1005 int 1006 osend(td, uap) 1007 struct thread *td; 1008 struct osend_args /* { 1009 int s; 1010 caddr_t buf; 1011 int len; 1012 int flags; 1013 } */ *uap; 1014 { 1015 struct msghdr msg; 1016 struct iovec aiov; 1017 1018 msg.msg_name = 0; 1019 msg.msg_namelen = 0; 1020 msg.msg_iov = &aiov; 1021 msg.msg_iovlen = 1; 1022 aiov.iov_base = uap->buf; 1023 aiov.iov_len = uap->len; 1024 msg.msg_control = 0; 1025 msg.msg_flags = 0; 1026 return (sendit(td, uap->s, &msg, uap->flags)); 1027 } 1028 1029 int 1030 osendmsg(td, uap) 1031 struct thread *td; 1032 struct osendmsg_args /* { 1033 int s; 1034 caddr_t msg; 1035 int flags; 1036 } */ *uap; 1037 { 1038 struct msghdr msg; 1039 struct iovec *iov; 1040 int error; 1041 1042 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1043 if (error != 0) 1044 return (error); 1045 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1046 if (error != 0) 1047 return (error); 1048 msg.msg_iov = iov; 1049 msg.msg_flags = MSG_COMPAT; 1050 error = sendit(td, uap->s, &msg, uap->flags); 1051 free(iov, M_IOV); 1052 return (error); 1053 } 1054 #endif 1055 1056 int 1057 sys_sendmsg(td, uap) 1058 struct thread *td; 1059 struct sendmsg_args /* { 1060 int s; 1061 caddr_t msg; 1062 int flags; 1063 } */ *uap; 1064 { 1065 struct msghdr msg; 1066 struct iovec *iov; 1067 int error; 1068 1069 error = copyin(uap->msg, &msg, sizeof (msg)); 1070 if (error != 0) 1071 return (error); 1072 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1073 if (error != 0) 1074 return (error); 1075 msg.msg_iov = iov; 1076 #ifdef COMPAT_OLDSOCK 1077 msg.msg_flags = 0; 1078 #endif 1079 error = sendit(td, uap->s, &msg, uap->flags); 1080 free(iov, M_IOV); 1081 return (error); 1082 } 1083 1084 int 1085 kern_recvit(td, s, mp, fromseg, controlp) 1086 struct thread *td; 1087 int s; 1088 struct msghdr *mp; 1089 enum uio_seg fromseg; 1090 struct mbuf **controlp; 1091 { 1092 struct uio auio; 1093 struct iovec *iov; 1094 struct mbuf *m, *control = NULL; 1095 caddr_t ctlbuf; 1096 struct file *fp; 1097 struct socket *so; 1098 struct sockaddr *fromsa = NULL; 1099 cap_rights_t rights; 1100 #ifdef KTRACE 1101 struct uio *ktruio = NULL; 1102 #endif 1103 ssize_t len; 1104 int error, i; 1105 1106 if (controlp != NULL) 1107 *controlp = NULL; 1108 1109 AUDIT_ARG_FD(s); 1110 error = getsock_cap(td->td_proc->p_fd, s, 1111 cap_rights_init(&rights, CAP_RECV), &fp, NULL); 1112 if (error != 0) 1113 return (error); 1114 so = fp->f_data; 1115 1116 #ifdef MAC 1117 error = mac_socket_check_receive(td->td_ucred, so); 1118 if (error != 0) { 1119 fdrop(fp, td); 1120 return (error); 1121 } 1122 #endif 1123 1124 auio.uio_iov = mp->msg_iov; 1125 auio.uio_iovcnt = mp->msg_iovlen; 1126 auio.uio_segflg = UIO_USERSPACE; 1127 auio.uio_rw = UIO_READ; 1128 auio.uio_td = td; 1129 auio.uio_offset = 0; /* XXX */ 1130 auio.uio_resid = 0; 1131 iov = mp->msg_iov; 1132 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 1133 if ((auio.uio_resid += iov->iov_len) < 0) { 1134 fdrop(fp, td); 1135 return (EINVAL); 1136 } 1137 } 1138 #ifdef KTRACE 1139 if (KTRPOINT(td, KTR_GENIO)) 1140 ktruio = cloneuio(&auio); 1141 #endif 1142 len = auio.uio_resid; 1143 error = soreceive(so, &fromsa, &auio, NULL, 1144 (mp->msg_control || controlp) ? &control : NULL, 1145 &mp->msg_flags); 1146 if (error != 0) { 1147 if (auio.uio_resid != len && (error == ERESTART || 1148 error == EINTR || error == EWOULDBLOCK)) 1149 error = 0; 1150 } 1151 if (fromsa != NULL) 1152 AUDIT_ARG_SOCKADDR(td, AT_FDCWD, fromsa); 1153 #ifdef KTRACE 1154 if (ktruio != NULL) { 1155 ktruio->uio_resid = len - auio.uio_resid; 1156 ktrgenio(s, UIO_READ, ktruio, error); 1157 } 1158 #endif 1159 if (error != 0) 1160 goto out; 1161 td->td_retval[0] = len - auio.uio_resid; 1162 if (mp->msg_name) { 1163 len = mp->msg_namelen; 1164 if (len <= 0 || fromsa == NULL) 1165 len = 0; 1166 else { 1167 /* save sa_len before it is destroyed by MSG_COMPAT */ 1168 len = MIN(len, fromsa->sa_len); 1169 #ifdef COMPAT_OLDSOCK 1170 if (mp->msg_flags & MSG_COMPAT) 1171 ((struct osockaddr *)fromsa)->sa_family = 1172 fromsa->sa_family; 1173 #endif 1174 if (fromseg == UIO_USERSPACE) { 1175 error = copyout(fromsa, mp->msg_name, 1176 (unsigned)len); 1177 if (error != 0) 1178 goto out; 1179 } else 1180 bcopy(fromsa, mp->msg_name, len); 1181 } 1182 mp->msg_namelen = len; 1183 } 1184 if (mp->msg_control && controlp == NULL) { 1185 #ifdef COMPAT_OLDSOCK 1186 /* 1187 * We assume that old recvmsg calls won't receive access 1188 * rights and other control info, esp. as control info 1189 * is always optional and those options didn't exist in 4.3. 1190 * If we receive rights, trim the cmsghdr; anything else 1191 * is tossed. 1192 */ 1193 if (control && mp->msg_flags & MSG_COMPAT) { 1194 if (mtod(control, struct cmsghdr *)->cmsg_level != 1195 SOL_SOCKET || 1196 mtod(control, struct cmsghdr *)->cmsg_type != 1197 SCM_RIGHTS) { 1198 mp->msg_controllen = 0; 1199 goto out; 1200 } 1201 control->m_len -= sizeof (struct cmsghdr); 1202 control->m_data += sizeof (struct cmsghdr); 1203 } 1204 #endif 1205 len = mp->msg_controllen; 1206 m = control; 1207 mp->msg_controllen = 0; 1208 ctlbuf = mp->msg_control; 1209 1210 while (m && len > 0) { 1211 unsigned int tocopy; 1212 1213 if (len >= m->m_len) 1214 tocopy = m->m_len; 1215 else { 1216 mp->msg_flags |= MSG_CTRUNC; 1217 tocopy = len; 1218 } 1219 1220 if ((error = copyout(mtod(m, caddr_t), 1221 ctlbuf, tocopy)) != 0) 1222 goto out; 1223 1224 ctlbuf += tocopy; 1225 len -= tocopy; 1226 m = m->m_next; 1227 } 1228 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1229 } 1230 out: 1231 fdrop(fp, td); 1232 #ifdef KTRACE 1233 if (fromsa && KTRPOINT(td, KTR_STRUCT)) 1234 ktrsockaddr(fromsa); 1235 #endif 1236 free(fromsa, M_SONAME); 1237 1238 if (error == 0 && controlp != NULL) 1239 *controlp = control; 1240 else if (control) 1241 m_freem(control); 1242 1243 return (error); 1244 } 1245 1246 static int 1247 recvit(td, s, mp, namelenp) 1248 struct thread *td; 1249 int s; 1250 struct msghdr *mp; 1251 void *namelenp; 1252 { 1253 int error; 1254 1255 error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL); 1256 if (error != 0) 1257 return (error); 1258 if (namelenp != NULL) { 1259 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t)); 1260 #ifdef COMPAT_OLDSOCK 1261 if (mp->msg_flags & MSG_COMPAT) 1262 error = 0; /* old recvfrom didn't check */ 1263 #endif 1264 } 1265 return (error); 1266 } 1267 1268 int 1269 sys_recvfrom(td, uap) 1270 struct thread *td; 1271 struct recvfrom_args /* { 1272 int s; 1273 caddr_t buf; 1274 size_t len; 1275 int flags; 1276 struct sockaddr * __restrict from; 1277 socklen_t * __restrict fromlenaddr; 1278 } */ *uap; 1279 { 1280 struct msghdr msg; 1281 struct iovec aiov; 1282 int error; 1283 1284 if (uap->fromlenaddr) { 1285 error = copyin(uap->fromlenaddr, 1286 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1287 if (error != 0) 1288 goto done2; 1289 } else { 1290 msg.msg_namelen = 0; 1291 } 1292 msg.msg_name = uap->from; 1293 msg.msg_iov = &aiov; 1294 msg.msg_iovlen = 1; 1295 aiov.iov_base = uap->buf; 1296 aiov.iov_len = uap->len; 1297 msg.msg_control = 0; 1298 msg.msg_flags = uap->flags; 1299 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1300 done2: 1301 return (error); 1302 } 1303 1304 #ifdef COMPAT_OLDSOCK 1305 int 1306 orecvfrom(td, uap) 1307 struct thread *td; 1308 struct recvfrom_args *uap; 1309 { 1310 1311 uap->flags |= MSG_COMPAT; 1312 return (sys_recvfrom(td, uap)); 1313 } 1314 #endif 1315 1316 #ifdef COMPAT_OLDSOCK 1317 int 1318 orecv(td, uap) 1319 struct thread *td; 1320 struct orecv_args /* { 1321 int s; 1322 caddr_t buf; 1323 int len; 1324 int flags; 1325 } */ *uap; 1326 { 1327 struct msghdr msg; 1328 struct iovec aiov; 1329 1330 msg.msg_name = 0; 1331 msg.msg_namelen = 0; 1332 msg.msg_iov = &aiov; 1333 msg.msg_iovlen = 1; 1334 aiov.iov_base = uap->buf; 1335 aiov.iov_len = uap->len; 1336 msg.msg_control = 0; 1337 msg.msg_flags = uap->flags; 1338 return (recvit(td, uap->s, &msg, NULL)); 1339 } 1340 1341 /* 1342 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1343 * overlays the new one, missing only the flags, and with the (old) access 1344 * rights where the control fields are now. 1345 */ 1346 int 1347 orecvmsg(td, uap) 1348 struct thread *td; 1349 struct orecvmsg_args /* { 1350 int s; 1351 struct omsghdr *msg; 1352 int flags; 1353 } */ *uap; 1354 { 1355 struct msghdr msg; 1356 struct iovec *iov; 1357 int error; 1358 1359 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1360 if (error != 0) 1361 return (error); 1362 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1363 if (error != 0) 1364 return (error); 1365 msg.msg_flags = uap->flags | MSG_COMPAT; 1366 msg.msg_iov = iov; 1367 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1368 if (msg.msg_controllen && error == 0) 1369 error = copyout(&msg.msg_controllen, 1370 &uap->msg->msg_accrightslen, sizeof (int)); 1371 free(iov, M_IOV); 1372 return (error); 1373 } 1374 #endif 1375 1376 int 1377 sys_recvmsg(td, uap) 1378 struct thread *td; 1379 struct recvmsg_args /* { 1380 int s; 1381 struct msghdr *msg; 1382 int flags; 1383 } */ *uap; 1384 { 1385 struct msghdr msg; 1386 struct iovec *uiov, *iov; 1387 int error; 1388 1389 error = copyin(uap->msg, &msg, sizeof (msg)); 1390 if (error != 0) 1391 return (error); 1392 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1393 if (error != 0) 1394 return (error); 1395 msg.msg_flags = uap->flags; 1396 #ifdef COMPAT_OLDSOCK 1397 msg.msg_flags &= ~MSG_COMPAT; 1398 #endif 1399 uiov = msg.msg_iov; 1400 msg.msg_iov = iov; 1401 error = recvit(td, uap->s, &msg, NULL); 1402 if (error == 0) { 1403 msg.msg_iov = uiov; 1404 error = copyout(&msg, uap->msg, sizeof(msg)); 1405 } 1406 free(iov, M_IOV); 1407 return (error); 1408 } 1409 1410 /* ARGSUSED */ 1411 int 1412 sys_shutdown(td, uap) 1413 struct thread *td; 1414 struct shutdown_args /* { 1415 int s; 1416 int how; 1417 } */ *uap; 1418 { 1419 struct socket *so; 1420 struct file *fp; 1421 cap_rights_t rights; 1422 int error; 1423 1424 AUDIT_ARG_FD(uap->s); 1425 error = getsock_cap(td->td_proc->p_fd, uap->s, 1426 cap_rights_init(&rights, CAP_SHUTDOWN), &fp, NULL); 1427 if (error == 0) { 1428 so = fp->f_data; 1429 error = soshutdown(so, uap->how); 1430 fdrop(fp, td); 1431 } 1432 return (error); 1433 } 1434 1435 /* ARGSUSED */ 1436 int 1437 sys_setsockopt(td, uap) 1438 struct thread *td; 1439 struct setsockopt_args /* { 1440 int s; 1441 int level; 1442 int name; 1443 caddr_t val; 1444 int valsize; 1445 } */ *uap; 1446 { 1447 1448 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1449 uap->val, UIO_USERSPACE, uap->valsize)); 1450 } 1451 1452 int 1453 kern_setsockopt(td, s, level, name, val, valseg, valsize) 1454 struct thread *td; 1455 int s; 1456 int level; 1457 int name; 1458 void *val; 1459 enum uio_seg valseg; 1460 socklen_t valsize; 1461 { 1462 struct socket *so; 1463 struct file *fp; 1464 struct sockopt sopt; 1465 cap_rights_t rights; 1466 int error; 1467 1468 if (val == NULL && valsize != 0) 1469 return (EFAULT); 1470 if ((int)valsize < 0) 1471 return (EINVAL); 1472 1473 sopt.sopt_dir = SOPT_SET; 1474 sopt.sopt_level = level; 1475 sopt.sopt_name = name; 1476 sopt.sopt_val = val; 1477 sopt.sopt_valsize = valsize; 1478 switch (valseg) { 1479 case UIO_USERSPACE: 1480 sopt.sopt_td = td; 1481 break; 1482 case UIO_SYSSPACE: 1483 sopt.sopt_td = NULL; 1484 break; 1485 default: 1486 panic("kern_setsockopt called with bad valseg"); 1487 } 1488 1489 AUDIT_ARG_FD(s); 1490 error = getsock_cap(td->td_proc->p_fd, s, 1491 cap_rights_init(&rights, CAP_SETSOCKOPT), &fp, NULL); 1492 if (error == 0) { 1493 so = fp->f_data; 1494 error = sosetopt(so, &sopt); 1495 fdrop(fp, td); 1496 } 1497 return(error); 1498 } 1499 1500 /* ARGSUSED */ 1501 int 1502 sys_getsockopt(td, uap) 1503 struct thread *td; 1504 struct getsockopt_args /* { 1505 int s; 1506 int level; 1507 int name; 1508 void * __restrict val; 1509 socklen_t * __restrict avalsize; 1510 } */ *uap; 1511 { 1512 socklen_t valsize; 1513 int error; 1514 1515 if (uap->val) { 1516 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1517 if (error != 0) 1518 return (error); 1519 } 1520 1521 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1522 uap->val, UIO_USERSPACE, &valsize); 1523 1524 if (error == 0) 1525 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1526 return (error); 1527 } 1528 1529 /* 1530 * Kernel version of getsockopt. 1531 * optval can be a userland or userspace. optlen is always a kernel pointer. 1532 */ 1533 int 1534 kern_getsockopt(td, s, level, name, val, valseg, valsize) 1535 struct thread *td; 1536 int s; 1537 int level; 1538 int name; 1539 void *val; 1540 enum uio_seg valseg; 1541 socklen_t *valsize; 1542 { 1543 struct socket *so; 1544 struct file *fp; 1545 struct sockopt sopt; 1546 cap_rights_t rights; 1547 int error; 1548 1549 if (val == NULL) 1550 *valsize = 0; 1551 if ((int)*valsize < 0) 1552 return (EINVAL); 1553 1554 sopt.sopt_dir = SOPT_GET; 1555 sopt.sopt_level = level; 1556 sopt.sopt_name = name; 1557 sopt.sopt_val = val; 1558 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1559 switch (valseg) { 1560 case UIO_USERSPACE: 1561 sopt.sopt_td = td; 1562 break; 1563 case UIO_SYSSPACE: 1564 sopt.sopt_td = NULL; 1565 break; 1566 default: 1567 panic("kern_getsockopt called with bad valseg"); 1568 } 1569 1570 AUDIT_ARG_FD(s); 1571 error = getsock_cap(td->td_proc->p_fd, s, 1572 cap_rights_init(&rights, CAP_GETSOCKOPT), &fp, NULL); 1573 if (error == 0) { 1574 so = fp->f_data; 1575 error = sogetopt(so, &sopt); 1576 *valsize = sopt.sopt_valsize; 1577 fdrop(fp, td); 1578 } 1579 return (error); 1580 } 1581 1582 /* 1583 * getsockname1() - Get socket name. 1584 */ 1585 /* ARGSUSED */ 1586 static int 1587 getsockname1(td, uap, compat) 1588 struct thread *td; 1589 struct getsockname_args /* { 1590 int fdes; 1591 struct sockaddr * __restrict asa; 1592 socklen_t * __restrict alen; 1593 } */ *uap; 1594 int compat; 1595 { 1596 struct sockaddr *sa; 1597 socklen_t len; 1598 int error; 1599 1600 error = copyin(uap->alen, &len, sizeof(len)); 1601 if (error != 0) 1602 return (error); 1603 1604 error = kern_getsockname(td, uap->fdes, &sa, &len); 1605 if (error != 0) 1606 return (error); 1607 1608 if (len != 0) { 1609 #ifdef COMPAT_OLDSOCK 1610 if (compat) 1611 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1612 #endif 1613 error = copyout(sa, uap->asa, (u_int)len); 1614 } 1615 free(sa, M_SONAME); 1616 if (error == 0) 1617 error = copyout(&len, uap->alen, sizeof(len)); 1618 return (error); 1619 } 1620 1621 int 1622 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, 1623 socklen_t *alen) 1624 { 1625 struct socket *so; 1626 struct file *fp; 1627 cap_rights_t rights; 1628 socklen_t len; 1629 int error; 1630 1631 AUDIT_ARG_FD(fd); 1632 error = getsock_cap(td->td_proc->p_fd, fd, 1633 cap_rights_init(&rights, CAP_GETSOCKNAME), &fp, NULL); 1634 if (error != 0) 1635 return (error); 1636 so = fp->f_data; 1637 *sa = NULL; 1638 CURVNET_SET(so->so_vnet); 1639 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa); 1640 CURVNET_RESTORE(); 1641 if (error != 0) 1642 goto bad; 1643 if (*sa == NULL) 1644 len = 0; 1645 else 1646 len = MIN(*alen, (*sa)->sa_len); 1647 *alen = len; 1648 #ifdef KTRACE 1649 if (KTRPOINT(td, KTR_STRUCT)) 1650 ktrsockaddr(*sa); 1651 #endif 1652 bad: 1653 fdrop(fp, td); 1654 if (error != 0 && *sa != NULL) { 1655 free(*sa, M_SONAME); 1656 *sa = NULL; 1657 } 1658 return (error); 1659 } 1660 1661 int 1662 sys_getsockname(td, uap) 1663 struct thread *td; 1664 struct getsockname_args *uap; 1665 { 1666 1667 return (getsockname1(td, uap, 0)); 1668 } 1669 1670 #ifdef COMPAT_OLDSOCK 1671 int 1672 ogetsockname(td, uap) 1673 struct thread *td; 1674 struct getsockname_args *uap; 1675 { 1676 1677 return (getsockname1(td, uap, 1)); 1678 } 1679 #endif /* COMPAT_OLDSOCK */ 1680 1681 /* 1682 * getpeername1() - Get name of peer for connected socket. 1683 */ 1684 /* ARGSUSED */ 1685 static int 1686 getpeername1(td, uap, compat) 1687 struct thread *td; 1688 struct getpeername_args /* { 1689 int fdes; 1690 struct sockaddr * __restrict asa; 1691 socklen_t * __restrict alen; 1692 } */ *uap; 1693 int compat; 1694 { 1695 struct sockaddr *sa; 1696 socklen_t len; 1697 int error; 1698 1699 error = copyin(uap->alen, &len, sizeof (len)); 1700 if (error != 0) 1701 return (error); 1702 1703 error = kern_getpeername(td, uap->fdes, &sa, &len); 1704 if (error != 0) 1705 return (error); 1706 1707 if (len != 0) { 1708 #ifdef COMPAT_OLDSOCK 1709 if (compat) 1710 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1711 #endif 1712 error = copyout(sa, uap->asa, (u_int)len); 1713 } 1714 free(sa, M_SONAME); 1715 if (error == 0) 1716 error = copyout(&len, uap->alen, sizeof(len)); 1717 return (error); 1718 } 1719 1720 int 1721 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, 1722 socklen_t *alen) 1723 { 1724 struct socket *so; 1725 struct file *fp; 1726 cap_rights_t rights; 1727 socklen_t len; 1728 int error; 1729 1730 AUDIT_ARG_FD(fd); 1731 error = getsock_cap(td->td_proc->p_fd, fd, 1732 cap_rights_init(&rights, CAP_GETPEERNAME), &fp, NULL); 1733 if (error != 0) 1734 return (error); 1735 so = fp->f_data; 1736 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1737 error = ENOTCONN; 1738 goto done; 1739 } 1740 *sa = NULL; 1741 CURVNET_SET(so->so_vnet); 1742 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa); 1743 CURVNET_RESTORE(); 1744 if (error != 0) 1745 goto bad; 1746 if (*sa == NULL) 1747 len = 0; 1748 else 1749 len = MIN(*alen, (*sa)->sa_len); 1750 *alen = len; 1751 #ifdef KTRACE 1752 if (KTRPOINT(td, KTR_STRUCT)) 1753 ktrsockaddr(*sa); 1754 #endif 1755 bad: 1756 if (error != 0 && *sa != NULL) { 1757 free(*sa, M_SONAME); 1758 *sa = NULL; 1759 } 1760 done: 1761 fdrop(fp, td); 1762 return (error); 1763 } 1764 1765 int 1766 sys_getpeername(td, uap) 1767 struct thread *td; 1768 struct getpeername_args *uap; 1769 { 1770 1771 return (getpeername1(td, uap, 0)); 1772 } 1773 1774 #ifdef COMPAT_OLDSOCK 1775 int 1776 ogetpeername(td, uap) 1777 struct thread *td; 1778 struct ogetpeername_args *uap; 1779 { 1780 1781 /* XXX uap should have type `getpeername_args *' to begin with. */ 1782 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1783 } 1784 #endif /* COMPAT_OLDSOCK */ 1785 1786 int 1787 sockargs(mp, buf, buflen, type) 1788 struct mbuf **mp; 1789 caddr_t buf; 1790 int buflen, type; 1791 { 1792 struct sockaddr *sa; 1793 struct mbuf *m; 1794 int error; 1795 1796 if (buflen > MLEN) { 1797 #ifdef COMPAT_OLDSOCK 1798 if (type == MT_SONAME && buflen <= 112) 1799 buflen = MLEN; /* unix domain compat. hack */ 1800 else 1801 #endif 1802 if (buflen > MCLBYTES) 1803 return (EINVAL); 1804 } 1805 m = m_get2(buflen, M_WAITOK, type, 0); 1806 m->m_len = buflen; 1807 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1808 if (error != 0) 1809 (void) m_free(m); 1810 else { 1811 *mp = m; 1812 if (type == MT_SONAME) { 1813 sa = mtod(m, struct sockaddr *); 1814 1815 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1816 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1817 sa->sa_family = sa->sa_len; 1818 #endif 1819 sa->sa_len = buflen; 1820 } 1821 } 1822 return (error); 1823 } 1824 1825 int 1826 getsockaddr(namp, uaddr, len) 1827 struct sockaddr **namp; 1828 caddr_t uaddr; 1829 size_t len; 1830 { 1831 struct sockaddr *sa; 1832 int error; 1833 1834 if (len > SOCK_MAXADDRLEN) 1835 return (ENAMETOOLONG); 1836 if (len < offsetof(struct sockaddr, sa_data[0])) 1837 return (EINVAL); 1838 sa = malloc(len, M_SONAME, M_WAITOK); 1839 error = copyin(uaddr, sa, len); 1840 if (error != 0) { 1841 free(sa, M_SONAME); 1842 } else { 1843 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1844 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1845 sa->sa_family = sa->sa_len; 1846 #endif 1847 sa->sa_len = len; 1848 *namp = sa; 1849 } 1850 return (error); 1851 } 1852 1853 #include <sys/condvar.h> 1854 1855 struct sendfile_sync { 1856 struct mtx mtx; 1857 struct cv cv; 1858 unsigned count; 1859 }; 1860 1861 /* 1862 * Detach mapped page and release resources back to the system. 1863 */ 1864 int 1865 sf_buf_mext(struct mbuf *mb, void *addr, void *args) 1866 { 1867 vm_page_t m; 1868 struct sendfile_sync *sfs; 1869 1870 m = sf_buf_page(args); 1871 sf_buf_free(args); 1872 vm_page_lock(m); 1873 vm_page_unwire(m, 0); 1874 /* 1875 * Check for the object going away on us. This can 1876 * happen since we don't hold a reference to it. 1877 * If so, we're responsible for freeing the page. 1878 */ 1879 if (m->wire_count == 0 && m->object == NULL) 1880 vm_page_free(m); 1881 vm_page_unlock(m); 1882 if (addr == NULL) 1883 return (EXT_FREE_OK); 1884 sfs = addr; 1885 mtx_lock(&sfs->mtx); 1886 KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0")); 1887 if (--sfs->count == 0) 1888 cv_signal(&sfs->cv); 1889 mtx_unlock(&sfs->mtx); 1890 return (EXT_FREE_OK); 1891 } 1892 1893 /* 1894 * sendfile(2) 1895 * 1896 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1897 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1898 * 1899 * Send a file specified by 'fd' and starting at 'offset' to a socket 1900 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes == 1901 * 0. Optionally add a header and/or trailer to the socket output. If 1902 * specified, write the total number of bytes sent into *sbytes. 1903 */ 1904 int 1905 sys_sendfile(struct thread *td, struct sendfile_args *uap) 1906 { 1907 1908 return (do_sendfile(td, uap, 0)); 1909 } 1910 1911 static int 1912 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1913 { 1914 struct sf_hdtr hdtr; 1915 struct uio *hdr_uio, *trl_uio; 1916 struct file *fp; 1917 cap_rights_t rights; 1918 int error; 1919 1920 if (uap->offset < 0) 1921 return (EINVAL); 1922 1923 hdr_uio = trl_uio = NULL; 1924 1925 if (uap->hdtr != NULL) { 1926 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1927 if (error != 0) 1928 goto out; 1929 if (hdtr.headers != NULL) { 1930 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1931 if (error != 0) 1932 goto out; 1933 } 1934 if (hdtr.trailers != NULL) { 1935 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); 1936 if (error != 0) 1937 goto out; 1938 1939 } 1940 } 1941 1942 AUDIT_ARG_FD(uap->fd); 1943 1944 /* 1945 * sendfile(2) can start at any offset within a file so we require 1946 * CAP_READ+CAP_SEEK = CAP_PREAD. 1947 */ 1948 if ((error = fget_read(td, uap->fd, 1949 cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) { 1950 goto out; 1951 } 1952 1953 error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset, 1954 uap->nbytes, uap->sbytes, uap->flags, compat ? SFK_COMPAT : 0, td); 1955 fdrop(fp, td); 1956 1957 out: 1958 free(hdr_uio, M_IOV); 1959 free(trl_uio, M_IOV); 1960 return (error); 1961 } 1962 1963 #ifdef COMPAT_FREEBSD4 1964 int 1965 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1966 { 1967 struct sendfile_args args; 1968 1969 args.fd = uap->fd; 1970 args.s = uap->s; 1971 args.offset = uap->offset; 1972 args.nbytes = uap->nbytes; 1973 args.hdtr = uap->hdtr; 1974 args.sbytes = uap->sbytes; 1975 args.flags = uap->flags; 1976 1977 return (do_sendfile(td, &args, 1)); 1978 } 1979 #endif /* COMPAT_FREEBSD4 */ 1980 1981 int 1982 vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, 1983 struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, 1984 int kflags, struct thread *td) 1985 { 1986 struct vnode *vp = fp->f_vnode; 1987 struct file *sock_fp; 1988 struct vm_object *obj = NULL; 1989 struct socket *so = NULL; 1990 struct mbuf *m = NULL; 1991 struct sf_buf *sf; 1992 struct vm_page *pg; 1993 struct vattr va; 1994 struct sendfile_sync *sfs = NULL; 1995 cap_rights_t rights; 1996 off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0; 1997 int bsize, error, hdrlen = 0, mnw = 0; 1998 1999 vn_lock(vp, LK_SHARED | LK_RETRY); 2000 if (vp->v_type == VREG) { 2001 bsize = vp->v_mount->mnt_stat.f_iosize; 2002 if (nbytes == 0) { 2003 error = VOP_GETATTR(vp, &va, td->td_ucred); 2004 if (error != 0) { 2005 VOP_UNLOCK(vp, 0); 2006 obj = NULL; 2007 goto out; 2008 } 2009 rem = va.va_size; 2010 } else 2011 rem = nbytes; 2012 obj = vp->v_object; 2013 if (obj != NULL) { 2014 /* 2015 * Temporarily increase the backing VM 2016 * object's reference count so that a forced 2017 * reclamation of its vnode does not 2018 * immediately destroy it. 2019 */ 2020 VM_OBJECT_WLOCK(obj); 2021 if ((obj->flags & OBJ_DEAD) == 0) { 2022 vm_object_reference_locked(obj); 2023 VM_OBJECT_WUNLOCK(obj); 2024 } else { 2025 VM_OBJECT_WUNLOCK(obj); 2026 obj = NULL; 2027 } 2028 } 2029 } else 2030 bsize = 0; /* silence gcc */ 2031 VOP_UNLOCK(vp, 0); 2032 if (obj == NULL) { 2033 error = EINVAL; 2034 goto out; 2035 } 2036 2037 /* 2038 * The socket must be a stream socket and connected. 2039 * Remember if it a blocking or non-blocking socket. 2040 */ 2041 error = getsock_cap(td->td_proc->p_fd, sockfd, 2042 cap_rights_init(&rights, CAP_SEND), &sock_fp, NULL); 2043 if (error != 0) 2044 goto out; 2045 so = sock_fp->f_data; 2046 if (so->so_type != SOCK_STREAM) { 2047 error = EINVAL; 2048 goto out; 2049 } 2050 if ((so->so_state & SS_ISCONNECTED) == 0) { 2051 error = ENOTCONN; 2052 goto out; 2053 } 2054 /* 2055 * Do not wait on memory allocations but return ENOMEM for 2056 * caller to retry later. 2057 * XXX: Experimental. 2058 */ 2059 if (flags & SF_MNOWAIT) 2060 mnw = 1; 2061 2062 if (flags & SF_SYNC) { 2063 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO); 2064 mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); 2065 cv_init(&sfs->cv, "sendfile"); 2066 } 2067 2068 #ifdef MAC 2069 error = mac_socket_check_send(td->td_ucred, so); 2070 if (error != 0) 2071 goto out; 2072 #endif 2073 2074 /* If headers are specified copy them into mbufs. */ 2075 if (hdr_uio != NULL) { 2076 hdr_uio->uio_td = td; 2077 hdr_uio->uio_rw = UIO_WRITE; 2078 if (hdr_uio->uio_resid > 0) { 2079 /* 2080 * In FBSD < 5.0 the nbytes to send also included 2081 * the header. If compat is specified subtract the 2082 * header size from nbytes. 2083 */ 2084 if (kflags & SFK_COMPAT) { 2085 if (nbytes > hdr_uio->uio_resid) 2086 nbytes -= hdr_uio->uio_resid; 2087 else 2088 nbytes = 0; 2089 } 2090 m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK), 2091 0, 0, 0); 2092 if (m == NULL) { 2093 error = mnw ? EAGAIN : ENOBUFS; 2094 goto out; 2095 } 2096 hdrlen = m_length(m, NULL); 2097 } 2098 } 2099 2100 /* 2101 * Protect against multiple writers to the socket. 2102 * 2103 * XXXRW: Historically this has assumed non-interruptibility, so now 2104 * we implement that, but possibly shouldn't. 2105 */ 2106 (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); 2107 2108 /* 2109 * Loop through the pages of the file, starting with the requested 2110 * offset. Get a file page (do I/O if necessary), map the file page 2111 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 2112 * it on the socket. 2113 * This is done in two loops. The inner loop turns as many pages 2114 * as it can, up to available socket buffer space, without blocking 2115 * into mbufs to have it bulk delivered into the socket send buffer. 2116 * The outer loop checks the state and available space of the socket 2117 * and takes care of the overall progress. 2118 */ 2119 for (off = offset; ; ) { 2120 struct mbuf *mtail; 2121 int loopbytes; 2122 int space; 2123 int done; 2124 2125 if ((nbytes != 0 && nbytes == fsbytes) || 2126 (nbytes == 0 && va.va_size == fsbytes)) 2127 break; 2128 2129 mtail = NULL; 2130 loopbytes = 0; 2131 space = 0; 2132 done = 0; 2133 2134 /* 2135 * Check the socket state for ongoing connection, 2136 * no errors and space in socket buffer. 2137 * If space is low allow for the remainder of the 2138 * file to be processed if it fits the socket buffer. 2139 * Otherwise block in waiting for sufficient space 2140 * to proceed, or if the socket is nonblocking, return 2141 * to userland with EAGAIN while reporting how far 2142 * we've come. 2143 * We wait until the socket buffer has significant free 2144 * space to do bulk sends. This makes good use of file 2145 * system read ahead and allows packet segmentation 2146 * offloading hardware to take over lots of work. If 2147 * we were not careful here we would send off only one 2148 * sfbuf at a time. 2149 */ 2150 SOCKBUF_LOCK(&so->so_snd); 2151 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2) 2152 so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2; 2153 retry_space: 2154 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2155 error = EPIPE; 2156 SOCKBUF_UNLOCK(&so->so_snd); 2157 goto done; 2158 } else if (so->so_error) { 2159 error = so->so_error; 2160 so->so_error = 0; 2161 SOCKBUF_UNLOCK(&so->so_snd); 2162 goto done; 2163 } 2164 space = sbspace(&so->so_snd); 2165 if (space < rem && 2166 (space <= 0 || 2167 space < so->so_snd.sb_lowat)) { 2168 if (so->so_state & SS_NBIO) { 2169 SOCKBUF_UNLOCK(&so->so_snd); 2170 error = EAGAIN; 2171 goto done; 2172 } 2173 /* 2174 * sbwait drops the lock while sleeping. 2175 * When we loop back to retry_space the 2176 * state may have changed and we retest 2177 * for it. 2178 */ 2179 error = sbwait(&so->so_snd); 2180 /* 2181 * An error from sbwait usually indicates that we've 2182 * been interrupted by a signal. If we've sent anything 2183 * then return bytes sent, otherwise return the error. 2184 */ 2185 if (error != 0) { 2186 SOCKBUF_UNLOCK(&so->so_snd); 2187 goto done; 2188 } 2189 goto retry_space; 2190 } 2191 SOCKBUF_UNLOCK(&so->so_snd); 2192 2193 /* 2194 * Reduce space in the socket buffer by the size of 2195 * the header mbuf chain. 2196 * hdrlen is set to 0 after the first loop. 2197 */ 2198 space -= hdrlen; 2199 2200 error = vn_lock(vp, LK_SHARED); 2201 if (error != 0) 2202 goto done; 2203 error = VOP_GETATTR(vp, &va, td->td_ucred); 2204 if (error != 0 || off >= va.va_size) { 2205 VOP_UNLOCK(vp, 0); 2206 goto done; 2207 } 2208 2209 /* 2210 * Loop and construct maximum sized mbuf chain to be bulk 2211 * dumped into socket buffer. 2212 */ 2213 while (space > loopbytes) { 2214 vm_pindex_t pindex; 2215 vm_offset_t pgoff; 2216 struct mbuf *m0; 2217 2218 /* 2219 * Calculate the amount to transfer. 2220 * Not to exceed a page, the EOF, 2221 * or the passed in nbytes. 2222 */ 2223 pgoff = (vm_offset_t)(off & PAGE_MASK); 2224 if (nbytes) 2225 rem = (nbytes - fsbytes - loopbytes); 2226 else 2227 rem = va.va_size - 2228 offset - fsbytes - loopbytes; 2229 xfsize = omin(PAGE_SIZE - pgoff, rem); 2230 xfsize = omin(space - loopbytes, xfsize); 2231 if (xfsize <= 0) { 2232 done = 1; /* all data sent */ 2233 break; 2234 } 2235 2236 /* 2237 * Attempt to look up the page. Allocate 2238 * if not found or wait and loop if busy. 2239 */ 2240 pindex = OFF_TO_IDX(off); 2241 VM_OBJECT_WLOCK(obj); 2242 pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY | 2243 VM_ALLOC_IGN_SBUSY | VM_ALLOC_NORMAL | 2244 VM_ALLOC_WIRED); 2245 2246 /* 2247 * Check if page is valid for what we need, 2248 * otherwise initiate I/O. 2249 * If we already turned some pages into mbufs, 2250 * send them off before we come here again and 2251 * block. 2252 */ 2253 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) 2254 VM_OBJECT_WUNLOCK(obj); 2255 else if (m != NULL) 2256 error = EAGAIN; /* send what we already got */ 2257 else if (flags & SF_NODISKIO) 2258 error = EBUSY; 2259 else { 2260 ssize_t resid; 2261 int readahead = sfreadahead * MAXBSIZE; 2262 2263 VM_OBJECT_WUNLOCK(obj); 2264 2265 /* 2266 * Get the page from backing store. 2267 * XXXMAC: Because we don't have fp->f_cred 2268 * here, we pass in NOCRED. This is probably 2269 * wrong, but is consistent with our original 2270 * implementation. 2271 */ 2272 error = vn_rdwr(UIO_READ, vp, NULL, readahead, 2273 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 2274 IO_VMIO | ((readahead / bsize) << IO_SEQSHIFT), 2275 td->td_ucred, NOCRED, &resid, td); 2276 SFSTAT_INC(sf_iocnt); 2277 if (error != 0) 2278 VM_OBJECT_WLOCK(obj); 2279 } 2280 if (error != 0) { 2281 vm_page_lock(pg); 2282 vm_page_unwire(pg, 0); 2283 /* 2284 * See if anyone else might know about 2285 * this page. If not and it is not valid, 2286 * then free it. 2287 */ 2288 if (pg->wire_count == 0 && pg->valid == 0 && 2289 !vm_page_busied(pg)) 2290 vm_page_free(pg); 2291 vm_page_unlock(pg); 2292 VM_OBJECT_WUNLOCK(obj); 2293 if (error == EAGAIN) 2294 error = 0; /* not a real error */ 2295 break; 2296 } 2297 2298 /* 2299 * Get a sendfile buf. When allocating the 2300 * first buffer for mbuf chain, we usually 2301 * wait as long as necessary, but this wait 2302 * can be interrupted. For consequent 2303 * buffers, do not sleep, since several 2304 * threads might exhaust the buffers and then 2305 * deadlock. 2306 */ 2307 sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT : 2308 SFB_CATCH); 2309 if (sf == NULL) { 2310 SFSTAT_INC(sf_allocfail); 2311 vm_page_lock(pg); 2312 vm_page_unwire(pg, 0); 2313 KASSERT(pg->object != NULL, 2314 ("%s: object disappeared", __func__)); 2315 vm_page_unlock(pg); 2316 if (m == NULL) 2317 error = (mnw ? EAGAIN : EINTR); 2318 break; 2319 } 2320 2321 /* 2322 * Get an mbuf and set it up as having 2323 * external storage. 2324 */ 2325 m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); 2326 if (m0 == NULL) { 2327 error = (mnw ? EAGAIN : ENOBUFS); 2328 (void)sf_buf_mext(NULL, NULL, sf); 2329 break; 2330 } 2331 if (m_extadd(m0, (caddr_t )sf_buf_kva(sf), PAGE_SIZE, 2332 sf_buf_mext, sfs, sf, M_RDONLY, EXT_SFBUF, 2333 (mnw ? M_NOWAIT : M_WAITOK)) != 0) { 2334 error = (mnw ? EAGAIN : ENOBUFS); 2335 (void)sf_buf_mext(NULL, NULL, sf); 2336 m_freem(m0); 2337 break; 2338 } 2339 m0->m_data = (char *)sf_buf_kva(sf) + pgoff; 2340 m0->m_len = xfsize; 2341 2342 /* Append to mbuf chain. */ 2343 if (mtail != NULL) 2344 mtail->m_next = m0; 2345 else if (m != NULL) 2346 m_last(m)->m_next = m0; 2347 else 2348 m = m0; 2349 mtail = m0; 2350 2351 /* Keep track of bits processed. */ 2352 loopbytes += xfsize; 2353 off += xfsize; 2354 2355 if (sfs != NULL) { 2356 mtx_lock(&sfs->mtx); 2357 sfs->count++; 2358 mtx_unlock(&sfs->mtx); 2359 } 2360 } 2361 2362 VOP_UNLOCK(vp, 0); 2363 2364 /* Add the buffer chain to the socket buffer. */ 2365 if (m != NULL) { 2366 int mlen, err; 2367 2368 mlen = m_length(m, NULL); 2369 SOCKBUF_LOCK(&so->so_snd); 2370 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2371 error = EPIPE; 2372 SOCKBUF_UNLOCK(&so->so_snd); 2373 goto done; 2374 } 2375 SOCKBUF_UNLOCK(&so->so_snd); 2376 CURVNET_SET(so->so_vnet); 2377 /* Avoid error aliasing. */ 2378 err = (*so->so_proto->pr_usrreqs->pru_send) 2379 (so, 0, m, NULL, NULL, td); 2380 CURVNET_RESTORE(); 2381 if (err == 0) { 2382 /* 2383 * We need two counters to get the 2384 * file offset and nbytes to send 2385 * right: 2386 * - sbytes contains the total amount 2387 * of bytes sent, including headers. 2388 * - fsbytes contains the total amount 2389 * of bytes sent from the file. 2390 */ 2391 sbytes += mlen; 2392 fsbytes += mlen; 2393 if (hdrlen) { 2394 fsbytes -= hdrlen; 2395 hdrlen = 0; 2396 } 2397 } else if (error == 0) 2398 error = err; 2399 m = NULL; /* pru_send always consumes */ 2400 } 2401 2402 /* Quit outer loop on error or when we're done. */ 2403 if (done) 2404 break; 2405 if (error != 0) 2406 goto done; 2407 } 2408 2409 /* 2410 * Send trailers. Wimp out and use writev(2). 2411 */ 2412 if (trl_uio != NULL) { 2413 sbunlock(&so->so_snd); 2414 error = kern_writev(td, sockfd, trl_uio); 2415 if (error == 0) 2416 sbytes += td->td_retval[0]; 2417 goto out; 2418 } 2419 2420 done: 2421 sbunlock(&so->so_snd); 2422 out: 2423 /* 2424 * If there was no error we have to clear td->td_retval[0] 2425 * because it may have been set by writev. 2426 */ 2427 if (error == 0) { 2428 td->td_retval[0] = 0; 2429 } 2430 if (sent != NULL) { 2431 copyout(&sbytes, sent, sizeof(off_t)); 2432 } 2433 if (obj != NULL) 2434 vm_object_deallocate(obj); 2435 if (so) 2436 fdrop(sock_fp, td); 2437 if (m) 2438 m_freem(m); 2439 2440 if (sfs != NULL) { 2441 mtx_lock(&sfs->mtx); 2442 if (sfs->count != 0) 2443 cv_wait(&sfs->cv, &sfs->mtx); 2444 KASSERT(sfs->count == 0, ("sendfile sync still busy")); 2445 cv_destroy(&sfs->cv); 2446 mtx_destroy(&sfs->mtx); 2447 free(sfs, M_TEMP); 2448 } 2449 2450 if (error == ERESTART) 2451 error = EINTR; 2452 2453 return (error); 2454 } 2455 2456 /* 2457 * SCTP syscalls. 2458 * Functionality only compiled in if SCTP is defined in the kernel Makefile, 2459 * otherwise all return EOPNOTSUPP. 2460 * XXX: We should make this loadable one day. 2461 */ 2462 int 2463 sys_sctp_peeloff(td, uap) 2464 struct thread *td; 2465 struct sctp_peeloff_args /* { 2466 int sd; 2467 caddr_t name; 2468 } */ *uap; 2469 { 2470 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2471 struct file *nfp = NULL; 2472 struct socket *head, *so; 2473 cap_rights_t rights; 2474 u_int fflag; 2475 int error, fd; 2476 2477 AUDIT_ARG_FD(uap->sd); 2478 error = fgetsock(td, uap->sd, cap_rights_init(&rights, CAP_PEELOFF), 2479 &head, &fflag); 2480 if (error != 0) 2481 goto done2; 2482 if (head->so_proto->pr_protocol != IPPROTO_SCTP) { 2483 error = EOPNOTSUPP; 2484 goto done; 2485 } 2486 error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name); 2487 if (error != 0) 2488 goto done; 2489 /* 2490 * At this point we know we do have a assoc to pull 2491 * we proceed to get the fd setup. This may block 2492 * but that is ok. 2493 */ 2494 2495 error = falloc(td, &nfp, &fd, 0); 2496 if (error != 0) 2497 goto done; 2498 td->td_retval[0] = fd; 2499 2500 CURVNET_SET(head->so_vnet); 2501 so = sonewconn(head, SS_ISCONNECTED); 2502 if (so == NULL) { 2503 error = ENOMEM; 2504 goto noconnection; 2505 } 2506 /* 2507 * Before changing the flags on the socket, we have to bump the 2508 * reference count. Otherwise, if the protocol calls sofree(), 2509 * the socket will be released due to a zero refcount. 2510 */ 2511 SOCK_LOCK(so); 2512 soref(so); /* file descriptor reference */ 2513 SOCK_UNLOCK(so); 2514 2515 ACCEPT_LOCK(); 2516 2517 TAILQ_REMOVE(&head->so_comp, so, so_list); 2518 head->so_qlen--; 2519 so->so_state |= (head->so_state & SS_NBIO); 2520 so->so_state &= ~SS_NOFDREF; 2521 so->so_qstate &= ~SQ_COMP; 2522 so->so_head = NULL; 2523 ACCEPT_UNLOCK(); 2524 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 2525 error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name); 2526 if (error != 0) 2527 goto noconnection; 2528 if (head->so_sigio != NULL) 2529 fsetown(fgetown(&head->so_sigio), &so->so_sigio); 2530 2531 noconnection: 2532 /* 2533 * close the new descriptor, assuming someone hasn't ripped it 2534 * out from under us. 2535 */ 2536 if (error != 0) 2537 fdclose(td->td_proc->p_fd, nfp, fd, td); 2538 2539 /* 2540 * Release explicitly held references before returning. 2541 */ 2542 CURVNET_RESTORE(); 2543 done: 2544 if (nfp != NULL) 2545 fdrop(nfp, td); 2546 fputsock(head); 2547 done2: 2548 return (error); 2549 #else /* SCTP */ 2550 return (EOPNOTSUPP); 2551 #endif /* SCTP */ 2552 } 2553 2554 int 2555 sys_sctp_generic_sendmsg (td, uap) 2556 struct thread *td; 2557 struct sctp_generic_sendmsg_args /* { 2558 int sd, 2559 caddr_t msg, 2560 int mlen, 2561 caddr_t to, 2562 __socklen_t tolen, 2563 struct sctp_sndrcvinfo *sinfo, 2564 int flags 2565 } */ *uap; 2566 { 2567 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2568 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2569 struct socket *so; 2570 struct file *fp = NULL; 2571 struct sockaddr *to = NULL; 2572 #ifdef KTRACE 2573 struct uio *ktruio = NULL; 2574 #endif 2575 struct uio auio; 2576 struct iovec iov[1]; 2577 cap_rights_t rights; 2578 int error = 0, len; 2579 2580 if (uap->sinfo != NULL) { 2581 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2582 if (error != 0) 2583 return (error); 2584 u_sinfo = &sinfo; 2585 } 2586 2587 cap_rights_init(&rights, CAP_SEND); 2588 if (uap->tolen != 0) { 2589 error = getsockaddr(&to, uap->to, uap->tolen); 2590 if (error != 0) { 2591 to = NULL; 2592 goto sctp_bad2; 2593 } 2594 cap_rights_set(&rights, CAP_CONNECT); 2595 } 2596 2597 AUDIT_ARG_FD(uap->sd); 2598 error = getsock_cap(td->td_proc->p_fd, uap->sd, &rights, &fp, NULL); 2599 if (error != 0) 2600 goto sctp_bad; 2601 #ifdef KTRACE 2602 if (to && (KTRPOINT(td, KTR_STRUCT))) 2603 ktrsockaddr(to); 2604 #endif 2605 2606 iov[0].iov_base = uap->msg; 2607 iov[0].iov_len = uap->mlen; 2608 2609 so = (struct socket *)fp->f_data; 2610 if (so->so_proto->pr_protocol != IPPROTO_SCTP) { 2611 error = EOPNOTSUPP; 2612 goto sctp_bad; 2613 } 2614 #ifdef MAC 2615 error = mac_socket_check_send(td->td_ucred, so); 2616 if (error != 0) 2617 goto sctp_bad; 2618 #endif /* MAC */ 2619 2620 auio.uio_iov = iov; 2621 auio.uio_iovcnt = 1; 2622 auio.uio_segflg = UIO_USERSPACE; 2623 auio.uio_rw = UIO_WRITE; 2624 auio.uio_td = td; 2625 auio.uio_offset = 0; /* XXX */ 2626 auio.uio_resid = 0; 2627 len = auio.uio_resid = uap->mlen; 2628 CURVNET_SET(so->so_vnet); 2629 error = sctp_lower_sosend(so, to, &auio, (struct mbuf *)NULL, 2630 (struct mbuf *)NULL, uap->flags, u_sinfo, td); 2631 CURVNET_RESTORE(); 2632 if (error != 0) { 2633 if (auio.uio_resid != len && (error == ERESTART || 2634 error == EINTR || error == EWOULDBLOCK)) 2635 error = 0; 2636 /* Generation of SIGPIPE can be controlled per socket. */ 2637 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2638 !(uap->flags & MSG_NOSIGNAL)) { 2639 PROC_LOCK(td->td_proc); 2640 tdsignal(td, SIGPIPE); 2641 PROC_UNLOCK(td->td_proc); 2642 } 2643 } 2644 if (error == 0) 2645 td->td_retval[0] = len - auio.uio_resid; 2646 #ifdef KTRACE 2647 if (ktruio != NULL) { 2648 ktruio->uio_resid = td->td_retval[0]; 2649 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2650 } 2651 #endif /* KTRACE */ 2652 sctp_bad: 2653 if (fp != NULL) 2654 fdrop(fp, td); 2655 sctp_bad2: 2656 free(to, M_SONAME); 2657 return (error); 2658 #else /* SCTP */ 2659 return (EOPNOTSUPP); 2660 #endif /* SCTP */ 2661 } 2662 2663 int 2664 sys_sctp_generic_sendmsg_iov(td, uap) 2665 struct thread *td; 2666 struct sctp_generic_sendmsg_iov_args /* { 2667 int sd, 2668 struct iovec *iov, 2669 int iovlen, 2670 caddr_t to, 2671 __socklen_t tolen, 2672 struct sctp_sndrcvinfo *sinfo, 2673 int flags 2674 } */ *uap; 2675 { 2676 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2677 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2678 struct socket *so; 2679 struct file *fp = NULL; 2680 struct sockaddr *to = NULL; 2681 #ifdef KTRACE 2682 struct uio *ktruio = NULL; 2683 #endif 2684 struct uio auio; 2685 struct iovec *iov, *tiov; 2686 cap_rights_t rights; 2687 ssize_t len; 2688 int error, i; 2689 2690 if (uap->sinfo != NULL) { 2691 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2692 if (error != 0) 2693 return (error); 2694 u_sinfo = &sinfo; 2695 } 2696 cap_rights_init(&rights, CAP_SEND); 2697 if (uap->tolen != 0) { 2698 error = getsockaddr(&to, uap->to, uap->tolen); 2699 if (error != 0) { 2700 to = NULL; 2701 goto sctp_bad2; 2702 } 2703 cap_rights_set(&rights, CAP_CONNECT); 2704 } 2705 2706 AUDIT_ARG_FD(uap->sd); 2707 error = getsock_cap(td->td_proc->p_fd, uap->sd, &rights, &fp, NULL); 2708 if (error != 0) 2709 goto sctp_bad1; 2710 2711 #ifdef COMPAT_FREEBSD32 2712 if (SV_CURPROC_FLAG(SV_ILP32)) 2713 error = freebsd32_copyiniov((struct iovec32 *)uap->iov, 2714 uap->iovlen, &iov, EMSGSIZE); 2715 else 2716 #endif 2717 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2718 if (error != 0) 2719 goto sctp_bad1; 2720 #ifdef KTRACE 2721 if (to && (KTRPOINT(td, KTR_STRUCT))) 2722 ktrsockaddr(to); 2723 #endif 2724 2725 so = (struct socket *)fp->f_data; 2726 if (so->so_proto->pr_protocol != IPPROTO_SCTP) { 2727 error = EOPNOTSUPP; 2728 goto sctp_bad; 2729 } 2730 #ifdef MAC 2731 error = mac_socket_check_send(td->td_ucred, so); 2732 if (error != 0) 2733 goto sctp_bad; 2734 #endif /* MAC */ 2735 2736 auio.uio_iov = iov; 2737 auio.uio_iovcnt = uap->iovlen; 2738 auio.uio_segflg = UIO_USERSPACE; 2739 auio.uio_rw = UIO_WRITE; 2740 auio.uio_td = td; 2741 auio.uio_offset = 0; /* XXX */ 2742 auio.uio_resid = 0; 2743 tiov = iov; 2744 for (i = 0; i <uap->iovlen; i++, tiov++) { 2745 if ((auio.uio_resid += tiov->iov_len) < 0) { 2746 error = EINVAL; 2747 goto sctp_bad; 2748 } 2749 } 2750 len = auio.uio_resid; 2751 CURVNET_SET(so->so_vnet); 2752 error = sctp_lower_sosend(so, to, &auio, 2753 (struct mbuf *)NULL, (struct mbuf *)NULL, 2754 uap->flags, u_sinfo, td); 2755 CURVNET_RESTORE(); 2756 if (error != 0) { 2757 if (auio.uio_resid != len && (error == ERESTART || 2758 error == EINTR || error == EWOULDBLOCK)) 2759 error = 0; 2760 /* Generation of SIGPIPE can be controlled per socket */ 2761 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2762 !(uap->flags & MSG_NOSIGNAL)) { 2763 PROC_LOCK(td->td_proc); 2764 tdsignal(td, SIGPIPE); 2765 PROC_UNLOCK(td->td_proc); 2766 } 2767 } 2768 if (error == 0) 2769 td->td_retval[0] = len - auio.uio_resid; 2770 #ifdef KTRACE 2771 if (ktruio != NULL) { 2772 ktruio->uio_resid = td->td_retval[0]; 2773 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2774 } 2775 #endif /* KTRACE */ 2776 sctp_bad: 2777 free(iov, M_IOV); 2778 sctp_bad1: 2779 if (fp != NULL) 2780 fdrop(fp, td); 2781 sctp_bad2: 2782 free(to, M_SONAME); 2783 return (error); 2784 #else /* SCTP */ 2785 return (EOPNOTSUPP); 2786 #endif /* SCTP */ 2787 } 2788 2789 int 2790 sys_sctp_generic_recvmsg(td, uap) 2791 struct thread *td; 2792 struct sctp_generic_recvmsg_args /* { 2793 int sd, 2794 struct iovec *iov, 2795 int iovlen, 2796 struct sockaddr *from, 2797 __socklen_t *fromlenaddr, 2798 struct sctp_sndrcvinfo *sinfo, 2799 int *msg_flags 2800 } */ *uap; 2801 { 2802 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2803 uint8_t sockbufstore[256]; 2804 struct uio auio; 2805 struct iovec *iov, *tiov; 2806 struct sctp_sndrcvinfo sinfo; 2807 struct socket *so; 2808 struct file *fp = NULL; 2809 struct sockaddr *fromsa; 2810 cap_rights_t rights; 2811 #ifdef KTRACE 2812 struct uio *ktruio = NULL; 2813 #endif 2814 ssize_t len; 2815 int error, fromlen, i, msg_flags; 2816 2817 AUDIT_ARG_FD(uap->sd); 2818 error = getsock_cap(td->td_proc->p_fd, uap->sd, 2819 cap_rights_init(&rights, CAP_RECV), &fp, NULL); 2820 if (error != 0) 2821 return (error); 2822 #ifdef COMPAT_FREEBSD32 2823 if (SV_CURPROC_FLAG(SV_ILP32)) 2824 error = freebsd32_copyiniov((struct iovec32 *)uap->iov, 2825 uap->iovlen, &iov, EMSGSIZE); 2826 else 2827 #endif 2828 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2829 if (error != 0) 2830 goto out1; 2831 2832 so = fp->f_data; 2833 if (so->so_proto->pr_protocol != IPPROTO_SCTP) { 2834 error = EOPNOTSUPP; 2835 goto out; 2836 } 2837 #ifdef MAC 2838 error = mac_socket_check_receive(td->td_ucred, so); 2839 if (error != 0) 2840 goto out; 2841 #endif /* MAC */ 2842 2843 if (uap->fromlenaddr != NULL) { 2844 error = copyin(uap->fromlenaddr, &fromlen, sizeof (fromlen)); 2845 if (error != 0) 2846 goto out; 2847 } else { 2848 fromlen = 0; 2849 } 2850 if (uap->msg_flags) { 2851 error = copyin(uap->msg_flags, &msg_flags, sizeof (int)); 2852 if (error != 0) 2853 goto out; 2854 } else { 2855 msg_flags = 0; 2856 } 2857 auio.uio_iov = iov; 2858 auio.uio_iovcnt = uap->iovlen; 2859 auio.uio_segflg = UIO_USERSPACE; 2860 auio.uio_rw = UIO_READ; 2861 auio.uio_td = td; 2862 auio.uio_offset = 0; /* XXX */ 2863 auio.uio_resid = 0; 2864 tiov = iov; 2865 for (i = 0; i <uap->iovlen; i++, tiov++) { 2866 if ((auio.uio_resid += tiov->iov_len) < 0) { 2867 error = EINVAL; 2868 goto out; 2869 } 2870 } 2871 len = auio.uio_resid; 2872 fromsa = (struct sockaddr *)sockbufstore; 2873 2874 #ifdef KTRACE 2875 if (KTRPOINT(td, KTR_GENIO)) 2876 ktruio = cloneuio(&auio); 2877 #endif /* KTRACE */ 2878 memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo)); 2879 CURVNET_SET(so->so_vnet); 2880 error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL, 2881 fromsa, fromlen, &msg_flags, 2882 (struct sctp_sndrcvinfo *)&sinfo, 1); 2883 CURVNET_RESTORE(); 2884 if (error != 0) { 2885 if (auio.uio_resid != len && (error == ERESTART || 2886 error == EINTR || error == EWOULDBLOCK)) 2887 error = 0; 2888 } else { 2889 if (uap->sinfo) 2890 error = copyout(&sinfo, uap->sinfo, sizeof (sinfo)); 2891 } 2892 #ifdef KTRACE 2893 if (ktruio != NULL) { 2894 ktruio->uio_resid = len - auio.uio_resid; 2895 ktrgenio(uap->sd, UIO_READ, ktruio, error); 2896 } 2897 #endif /* KTRACE */ 2898 if (error != 0) 2899 goto out; 2900 td->td_retval[0] = len - auio.uio_resid; 2901 2902 if (fromlen && uap->from) { 2903 len = fromlen; 2904 if (len <= 0 || fromsa == 0) 2905 len = 0; 2906 else { 2907 len = MIN(len, fromsa->sa_len); 2908 error = copyout(fromsa, uap->from, (size_t)len); 2909 if (error != 0) 2910 goto out; 2911 } 2912 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t)); 2913 if (error != 0) 2914 goto out; 2915 } 2916 #ifdef KTRACE 2917 if (KTRPOINT(td, KTR_STRUCT)) 2918 ktrsockaddr(fromsa); 2919 #endif 2920 if (uap->msg_flags) { 2921 error = copyout(&msg_flags, uap->msg_flags, sizeof (int)); 2922 if (error != 0) 2923 goto out; 2924 } 2925 out: 2926 free(iov, M_IOV); 2927 out1: 2928 if (fp != NULL) 2929 fdrop(fp, td); 2930 2931 return (error); 2932 #else /* SCTP */ 2933 return (EOPNOTSUPP); 2934 #endif /* SCTP */ 2935 } 2936