1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_inet.h" 39 #include "opt_inet6.h" 40 #include "opt_sctp.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/sysproto.h> 50 #include <sys/malloc.h> 51 #include <sys/filedesc.h> 52 #include <sys/event.h> 53 #include <sys/proc.h> 54 #include <sys/fcntl.h> 55 #include <sys/file.h> 56 #include <sys/filio.h> 57 #include <sys/jail.h> 58 #include <sys/mount.h> 59 #include <sys/mbuf.h> 60 #include <sys/protosw.h> 61 #include <sys/sf_buf.h> 62 #include <sys/sysent.h> 63 #include <sys/socket.h> 64 #include <sys/socketvar.h> 65 #include <sys/signalvar.h> 66 #include <sys/syscallsubr.h> 67 #include <sys/sysctl.h> 68 #include <sys/uio.h> 69 #include <sys/vnode.h> 70 #ifdef KTRACE 71 #include <sys/ktrace.h> 72 #endif 73 #ifdef COMPAT_FREEBSD32 74 #include <compat/freebsd32/freebsd32_util.h> 75 #endif 76 77 #include <net/vnet.h> 78 79 #include <security/audit/audit.h> 80 #include <security/mac/mac_framework.h> 81 82 #include <vm/vm.h> 83 #include <vm/vm_object.h> 84 #include <vm/vm_page.h> 85 #include <vm/vm_pageout.h> 86 #include <vm/vm_kern.h> 87 #include <vm/vm_extern.h> 88 89 #if defined(INET) || defined(INET6) 90 #ifdef SCTP 91 #include <netinet/sctp.h> 92 #include <netinet/sctp_peeloff.h> 93 #endif /* SCTP */ 94 #endif /* INET || INET6 */ 95 96 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 97 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 98 99 static int accept1(struct thread *td, struct accept_args *uap, int compat); 100 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 101 static int getsockname1(struct thread *td, struct getsockname_args *uap, 102 int compat); 103 static int getpeername1(struct thread *td, struct getpeername_args *uap, 104 int compat); 105 106 /* 107 * NSFBUFS-related variables and associated sysctls 108 */ 109 int nsfbufs; 110 int nsfbufspeak; 111 int nsfbufsused; 112 113 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 114 "Maximum number of sendfile(2) sf_bufs available"); 115 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 116 "Number of sendfile(2) sf_bufs at peak usage"); 117 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 118 "Number of sendfile(2) sf_bufs in use"); 119 120 /* 121 * Convert a user file descriptor to a kernel file entry. A reference on the 122 * file entry is held upon returning. This is lighter weight than 123 * fgetsock(), which bumps the socket reference drops the file reference 124 * count instead, as this approach avoids several additional mutex operations 125 * associated with the additional reference count. If requested, return the 126 * open file flags. 127 */ 128 static int 129 getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp) 130 { 131 struct file *fp; 132 int error; 133 134 fp = NULL; 135 if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) { 136 error = EBADF; 137 } else if (fp->f_type != DTYPE_SOCKET) { 138 fdrop(fp, curthread); 139 fp = NULL; 140 error = ENOTSOCK; 141 } else { 142 if (fflagp != NULL) 143 *fflagp = fp->f_flag; 144 error = 0; 145 } 146 *fpp = fp; 147 return (error); 148 } 149 150 /* 151 * System call interface to the socket abstraction. 152 */ 153 #if defined(COMPAT_43) 154 #define COMPAT_OLDSOCK 155 #endif 156 157 int 158 socket(td, uap) 159 struct thread *td; 160 struct socket_args /* { 161 int domain; 162 int type; 163 int protocol; 164 } */ *uap; 165 { 166 struct filedesc *fdp; 167 struct socket *so; 168 struct file *fp; 169 int fd, error; 170 171 AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol); 172 #ifdef MAC 173 error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type, 174 uap->protocol); 175 if (error) 176 return (error); 177 #endif 178 fdp = td->td_proc->p_fd; 179 error = falloc(td, &fp, &fd); 180 if (error) 181 return (error); 182 /* An extra reference on `fp' has been held for us by falloc(). */ 183 error = socreate(uap->domain, &so, uap->type, uap->protocol, 184 td->td_ucred, td); 185 if (error) { 186 fdclose(fdp, fp, fd, td); 187 } else { 188 finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops); 189 td->td_retval[0] = fd; 190 } 191 fdrop(fp, td); 192 return (error); 193 } 194 195 /* ARGSUSED */ 196 int 197 bind(td, uap) 198 struct thread *td; 199 struct bind_args /* { 200 int s; 201 caddr_t name; 202 int namelen; 203 } */ *uap; 204 { 205 struct sockaddr *sa; 206 int error; 207 208 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 209 return (error); 210 211 error = kern_bind(td, uap->s, sa); 212 free(sa, M_SONAME); 213 return (error); 214 } 215 216 int 217 kern_bind(td, fd, sa) 218 struct thread *td; 219 int fd; 220 struct sockaddr *sa; 221 { 222 struct socket *so; 223 struct file *fp; 224 int error; 225 226 AUDIT_ARG_FD(fd); 227 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 228 if (error) 229 return (error); 230 so = fp->f_data; 231 #ifdef KTRACE 232 if (KTRPOINT(td, KTR_STRUCT)) 233 ktrsockaddr(sa); 234 #endif 235 #ifdef MAC 236 error = mac_socket_check_bind(td->td_ucred, so, sa); 237 if (error == 0) 238 #endif 239 error = sobind(so, sa, td); 240 fdrop(fp, td); 241 return (error); 242 } 243 244 /* ARGSUSED */ 245 int 246 listen(td, uap) 247 struct thread *td; 248 struct listen_args /* { 249 int s; 250 int backlog; 251 } */ *uap; 252 { 253 struct socket *so; 254 struct file *fp; 255 int error; 256 257 AUDIT_ARG_FD(uap->s); 258 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); 259 if (error == 0) { 260 so = fp->f_data; 261 #ifdef MAC 262 error = mac_socket_check_listen(td->td_ucred, so); 263 if (error == 0) { 264 #endif 265 CURVNET_SET(so->so_vnet); 266 error = solisten(so, uap->backlog, td); 267 CURVNET_RESTORE(); 268 #ifdef MAC 269 } 270 #endif 271 fdrop(fp, td); 272 } 273 return(error); 274 } 275 276 /* 277 * accept1() 278 */ 279 static int 280 accept1(td, uap, compat) 281 struct thread *td; 282 struct accept_args /* { 283 int s; 284 struct sockaddr * __restrict name; 285 socklen_t * __restrict anamelen; 286 } */ *uap; 287 int compat; 288 { 289 struct sockaddr *name; 290 socklen_t namelen; 291 struct file *fp; 292 int error; 293 294 if (uap->name == NULL) 295 return (kern_accept(td, uap->s, NULL, NULL, NULL)); 296 297 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 298 if (error) 299 return (error); 300 301 error = kern_accept(td, uap->s, &name, &namelen, &fp); 302 303 /* 304 * return a namelen of zero for older code which might 305 * ignore the return value from accept. 306 */ 307 if (error) { 308 (void) copyout(&namelen, 309 uap->anamelen, sizeof(*uap->anamelen)); 310 return (error); 311 } 312 313 if (error == 0 && name != NULL) { 314 #ifdef COMPAT_OLDSOCK 315 if (compat) 316 ((struct osockaddr *)name)->sa_family = 317 name->sa_family; 318 #endif 319 error = copyout(name, uap->name, namelen); 320 } 321 if (error == 0) 322 error = copyout(&namelen, uap->anamelen, 323 sizeof(namelen)); 324 if (error) 325 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td); 326 fdrop(fp, td); 327 free(name, M_SONAME); 328 return (error); 329 } 330 331 int 332 kern_accept(struct thread *td, int s, struct sockaddr **name, 333 socklen_t *namelen, struct file **fp) 334 { 335 struct filedesc *fdp; 336 struct file *headfp, *nfp = NULL; 337 struct sockaddr *sa = NULL; 338 int error; 339 struct socket *head, *so; 340 int fd; 341 u_int fflag; 342 pid_t pgid; 343 int tmp; 344 345 if (name) { 346 *name = NULL; 347 if (*namelen < 0) 348 return (EINVAL); 349 } 350 351 AUDIT_ARG_FD(s); 352 fdp = td->td_proc->p_fd; 353 error = getsock(fdp, s, &headfp, &fflag); 354 if (error) 355 return (error); 356 head = headfp->f_data; 357 if ((head->so_options & SO_ACCEPTCONN) == 0) { 358 error = EINVAL; 359 goto done; 360 } 361 #ifdef MAC 362 error = mac_socket_check_accept(td->td_ucred, head); 363 if (error != 0) 364 goto done; 365 #endif 366 error = falloc(td, &nfp, &fd); 367 if (error) 368 goto done; 369 ACCEPT_LOCK(); 370 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 371 ACCEPT_UNLOCK(); 372 error = EWOULDBLOCK; 373 goto noconnection; 374 } 375 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 376 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 377 head->so_error = ECONNABORTED; 378 break; 379 } 380 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 381 "accept", 0); 382 if (error) { 383 ACCEPT_UNLOCK(); 384 goto noconnection; 385 } 386 } 387 if (head->so_error) { 388 error = head->so_error; 389 head->so_error = 0; 390 ACCEPT_UNLOCK(); 391 goto noconnection; 392 } 393 so = TAILQ_FIRST(&head->so_comp); 394 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 395 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 396 397 /* 398 * Before changing the flags on the socket, we have to bump the 399 * reference count. Otherwise, if the protocol calls sofree(), 400 * the socket will be released due to a zero refcount. 401 */ 402 SOCK_LOCK(so); /* soref() and so_state update */ 403 soref(so); /* file descriptor reference */ 404 405 TAILQ_REMOVE(&head->so_comp, so, so_list); 406 head->so_qlen--; 407 so->so_state |= (head->so_state & SS_NBIO); 408 so->so_qstate &= ~SQ_COMP; 409 so->so_head = NULL; 410 411 SOCK_UNLOCK(so); 412 ACCEPT_UNLOCK(); 413 414 /* An extra reference on `nfp' has been held for us by falloc(). */ 415 td->td_retval[0] = fd; 416 417 /* connection has been removed from the listen queue */ 418 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 419 420 pgid = fgetown(&head->so_sigio); 421 if (pgid != 0) 422 fsetown(pgid, &so->so_sigio); 423 424 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 425 /* Sync socket nonblocking/async state with file flags */ 426 tmp = fflag & FNONBLOCK; 427 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 428 tmp = fflag & FASYNC; 429 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 430 sa = 0; 431 CURVNET_SET(so->so_vnet); 432 error = soaccept(so, &sa); 433 CURVNET_RESTORE(); 434 if (error) { 435 /* 436 * return a namelen of zero for older code which might 437 * ignore the return value from accept. 438 */ 439 if (name) 440 *namelen = 0; 441 goto noconnection; 442 } 443 if (sa == NULL) { 444 if (name) 445 *namelen = 0; 446 goto done; 447 } 448 if (name) { 449 /* check sa_len before it is destroyed */ 450 if (*namelen > sa->sa_len) 451 *namelen = sa->sa_len; 452 #ifdef KTRACE 453 if (KTRPOINT(td, KTR_STRUCT)) 454 ktrsockaddr(sa); 455 #endif 456 *name = sa; 457 sa = NULL; 458 } 459 noconnection: 460 if (sa) 461 free(sa, M_SONAME); 462 463 /* 464 * close the new descriptor, assuming someone hasn't ripped it 465 * out from under us. 466 */ 467 if (error) 468 fdclose(fdp, nfp, fd, td); 469 470 /* 471 * Release explicitly held references before returning. We return 472 * a reference on nfp to the caller on success if they request it. 473 */ 474 done: 475 if (fp != NULL) { 476 if (error == 0) { 477 *fp = nfp; 478 nfp = NULL; 479 } else 480 *fp = NULL; 481 } 482 if (nfp != NULL) 483 fdrop(nfp, td); 484 fdrop(headfp, td); 485 return (error); 486 } 487 488 int 489 accept(td, uap) 490 struct thread *td; 491 struct accept_args *uap; 492 { 493 494 return (accept1(td, uap, 0)); 495 } 496 497 #ifdef COMPAT_OLDSOCK 498 int 499 oaccept(td, uap) 500 struct thread *td; 501 struct accept_args *uap; 502 { 503 504 return (accept1(td, uap, 1)); 505 } 506 #endif /* COMPAT_OLDSOCK */ 507 508 /* ARGSUSED */ 509 int 510 connect(td, uap) 511 struct thread *td; 512 struct connect_args /* { 513 int s; 514 caddr_t name; 515 int namelen; 516 } */ *uap; 517 { 518 struct sockaddr *sa; 519 int error; 520 521 error = getsockaddr(&sa, uap->name, uap->namelen); 522 if (error) 523 return (error); 524 525 error = kern_connect(td, uap->s, sa); 526 free(sa, M_SONAME); 527 return (error); 528 } 529 530 531 int 532 kern_connect(td, fd, sa) 533 struct thread *td; 534 int fd; 535 struct sockaddr *sa; 536 { 537 struct socket *so; 538 struct file *fp; 539 int error; 540 int interrupted = 0; 541 542 AUDIT_ARG_FD(fd); 543 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 544 if (error) 545 return (error); 546 so = fp->f_data; 547 if (so->so_state & SS_ISCONNECTING) { 548 error = EALREADY; 549 goto done1; 550 } 551 #ifdef KTRACE 552 if (KTRPOINT(td, KTR_STRUCT)) 553 ktrsockaddr(sa); 554 #endif 555 #ifdef MAC 556 error = mac_socket_check_connect(td->td_ucred, so, sa); 557 if (error) 558 goto bad; 559 #endif 560 error = soconnect(so, sa, td); 561 if (error) 562 goto bad; 563 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 564 error = EINPROGRESS; 565 goto done1; 566 } 567 SOCK_LOCK(so); 568 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 569 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 570 "connec", 0); 571 if (error) { 572 if (error == EINTR || error == ERESTART) 573 interrupted = 1; 574 break; 575 } 576 } 577 if (error == 0) { 578 error = so->so_error; 579 so->so_error = 0; 580 } 581 SOCK_UNLOCK(so); 582 bad: 583 if (!interrupted) 584 so->so_state &= ~SS_ISCONNECTING; 585 if (error == ERESTART) 586 error = EINTR; 587 done1: 588 fdrop(fp, td); 589 return (error); 590 } 591 592 int 593 kern_socketpair(struct thread *td, int domain, int type, int protocol, 594 int *rsv) 595 { 596 struct filedesc *fdp = td->td_proc->p_fd; 597 struct file *fp1, *fp2; 598 struct socket *so1, *so2; 599 int fd, error; 600 601 AUDIT_ARG_SOCKET(domain, type, protocol); 602 #ifdef MAC 603 /* We might want to have a separate check for socket pairs. */ 604 error = mac_socket_check_create(td->td_ucred, domain, type, 605 protocol); 606 if (error) 607 return (error); 608 #endif 609 error = socreate(domain, &so1, type, protocol, td->td_ucred, td); 610 if (error) 611 return (error); 612 error = socreate(domain, &so2, type, protocol, td->td_ucred, td); 613 if (error) 614 goto free1; 615 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 616 error = falloc(td, &fp1, &fd); 617 if (error) 618 goto free2; 619 rsv[0] = fd; 620 fp1->f_data = so1; /* so1 already has ref count */ 621 error = falloc(td, &fp2, &fd); 622 if (error) 623 goto free3; 624 fp2->f_data = so2; /* so2 already has ref count */ 625 rsv[1] = fd; 626 error = soconnect2(so1, so2); 627 if (error) 628 goto free4; 629 if (type == SOCK_DGRAM) { 630 /* 631 * Datagram socket connection is asymmetric. 632 */ 633 error = soconnect2(so2, so1); 634 if (error) 635 goto free4; 636 } 637 finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops); 638 finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops); 639 fdrop(fp1, td); 640 fdrop(fp2, td); 641 return (0); 642 free4: 643 fdclose(fdp, fp2, rsv[1], td); 644 fdrop(fp2, td); 645 free3: 646 fdclose(fdp, fp1, rsv[0], td); 647 fdrop(fp1, td); 648 free2: 649 if (so2 != NULL) 650 (void)soclose(so2); 651 free1: 652 if (so1 != NULL) 653 (void)soclose(so1); 654 return (error); 655 } 656 657 int 658 socketpair(struct thread *td, struct socketpair_args *uap) 659 { 660 int error, sv[2]; 661 662 error = kern_socketpair(td, uap->domain, uap->type, 663 uap->protocol, sv); 664 if (error) 665 return (error); 666 error = copyout(sv, uap->rsv, 2 * sizeof(int)); 667 if (error) { 668 (void)kern_close(td, sv[0]); 669 (void)kern_close(td, sv[1]); 670 } 671 return (error); 672 } 673 674 static int 675 sendit(td, s, mp, flags) 676 struct thread *td; 677 int s; 678 struct msghdr *mp; 679 int flags; 680 { 681 struct mbuf *control; 682 struct sockaddr *to; 683 int error; 684 685 if (mp->msg_name != NULL) { 686 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 687 if (error) { 688 to = NULL; 689 goto bad; 690 } 691 mp->msg_name = to; 692 } else { 693 to = NULL; 694 } 695 696 if (mp->msg_control) { 697 if (mp->msg_controllen < sizeof(struct cmsghdr) 698 #ifdef COMPAT_OLDSOCK 699 && mp->msg_flags != MSG_COMPAT 700 #endif 701 ) { 702 error = EINVAL; 703 goto bad; 704 } 705 error = sockargs(&control, mp->msg_control, 706 mp->msg_controllen, MT_CONTROL); 707 if (error) 708 goto bad; 709 #ifdef COMPAT_OLDSOCK 710 if (mp->msg_flags == MSG_COMPAT) { 711 struct cmsghdr *cm; 712 713 M_PREPEND(control, sizeof(*cm), M_WAIT); 714 cm = mtod(control, struct cmsghdr *); 715 cm->cmsg_len = control->m_len; 716 cm->cmsg_level = SOL_SOCKET; 717 cm->cmsg_type = SCM_RIGHTS; 718 } 719 #endif 720 } else { 721 control = NULL; 722 } 723 724 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 725 726 bad: 727 if (to) 728 free(to, M_SONAME); 729 return (error); 730 } 731 732 int 733 kern_sendit(td, s, mp, flags, control, segflg) 734 struct thread *td; 735 int s; 736 struct msghdr *mp; 737 int flags; 738 struct mbuf *control; 739 enum uio_seg segflg; 740 { 741 struct file *fp; 742 struct uio auio; 743 struct iovec *iov; 744 struct socket *so; 745 int i; 746 int len, error; 747 #ifdef KTRACE 748 struct uio *ktruio = NULL; 749 #endif 750 751 AUDIT_ARG_FD(s); 752 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 753 if (error) 754 return (error); 755 so = (struct socket *)fp->f_data; 756 757 #ifdef MAC 758 if (mp->msg_name != NULL) { 759 error = mac_socket_check_connect(td->td_ucred, so, 760 mp->msg_name); 761 if (error) 762 goto bad; 763 } 764 error = mac_socket_check_send(td->td_ucred, so); 765 if (error) 766 goto bad; 767 #endif 768 769 auio.uio_iov = mp->msg_iov; 770 auio.uio_iovcnt = mp->msg_iovlen; 771 auio.uio_segflg = segflg; 772 auio.uio_rw = UIO_WRITE; 773 auio.uio_td = td; 774 auio.uio_offset = 0; /* XXX */ 775 auio.uio_resid = 0; 776 iov = mp->msg_iov; 777 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 778 if ((auio.uio_resid += iov->iov_len) < 0) { 779 error = EINVAL; 780 goto bad; 781 } 782 } 783 #ifdef KTRACE 784 if (KTRPOINT(td, KTR_GENIO)) 785 ktruio = cloneuio(&auio); 786 #endif 787 len = auio.uio_resid; 788 error = sosend(so, mp->msg_name, &auio, 0, control, flags, td); 789 if (error) { 790 if (auio.uio_resid != len && (error == ERESTART || 791 error == EINTR || error == EWOULDBLOCK)) 792 error = 0; 793 /* Generation of SIGPIPE can be controlled per socket */ 794 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 795 !(flags & MSG_NOSIGNAL)) { 796 PROC_LOCK(td->td_proc); 797 tdsignal(td, SIGPIPE); 798 PROC_UNLOCK(td->td_proc); 799 } 800 } 801 if (error == 0) 802 td->td_retval[0] = len - auio.uio_resid; 803 #ifdef KTRACE 804 if (ktruio != NULL) { 805 ktruio->uio_resid = td->td_retval[0]; 806 ktrgenio(s, UIO_WRITE, ktruio, error); 807 } 808 #endif 809 bad: 810 fdrop(fp, td); 811 return (error); 812 } 813 814 int 815 sendto(td, uap) 816 struct thread *td; 817 struct sendto_args /* { 818 int s; 819 caddr_t buf; 820 size_t len; 821 int flags; 822 caddr_t to; 823 int tolen; 824 } */ *uap; 825 { 826 struct msghdr msg; 827 struct iovec aiov; 828 int error; 829 830 msg.msg_name = uap->to; 831 msg.msg_namelen = uap->tolen; 832 msg.msg_iov = &aiov; 833 msg.msg_iovlen = 1; 834 msg.msg_control = 0; 835 #ifdef COMPAT_OLDSOCK 836 msg.msg_flags = 0; 837 #endif 838 aiov.iov_base = uap->buf; 839 aiov.iov_len = uap->len; 840 error = sendit(td, uap->s, &msg, uap->flags); 841 return (error); 842 } 843 844 #ifdef COMPAT_OLDSOCK 845 int 846 osend(td, uap) 847 struct thread *td; 848 struct osend_args /* { 849 int s; 850 caddr_t buf; 851 int len; 852 int flags; 853 } */ *uap; 854 { 855 struct msghdr msg; 856 struct iovec aiov; 857 int error; 858 859 msg.msg_name = 0; 860 msg.msg_namelen = 0; 861 msg.msg_iov = &aiov; 862 msg.msg_iovlen = 1; 863 aiov.iov_base = uap->buf; 864 aiov.iov_len = uap->len; 865 msg.msg_control = 0; 866 msg.msg_flags = 0; 867 error = sendit(td, uap->s, &msg, uap->flags); 868 return (error); 869 } 870 871 int 872 osendmsg(td, uap) 873 struct thread *td; 874 struct osendmsg_args /* { 875 int s; 876 caddr_t msg; 877 int flags; 878 } */ *uap; 879 { 880 struct msghdr msg; 881 struct iovec *iov; 882 int error; 883 884 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 885 if (error) 886 return (error); 887 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 888 if (error) 889 return (error); 890 msg.msg_iov = iov; 891 msg.msg_flags = MSG_COMPAT; 892 error = sendit(td, uap->s, &msg, uap->flags); 893 free(iov, M_IOV); 894 return (error); 895 } 896 #endif 897 898 int 899 sendmsg(td, uap) 900 struct thread *td; 901 struct sendmsg_args /* { 902 int s; 903 caddr_t msg; 904 int flags; 905 } */ *uap; 906 { 907 struct msghdr msg; 908 struct iovec *iov; 909 int error; 910 911 error = copyin(uap->msg, &msg, sizeof (msg)); 912 if (error) 913 return (error); 914 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 915 if (error) 916 return (error); 917 msg.msg_iov = iov; 918 #ifdef COMPAT_OLDSOCK 919 msg.msg_flags = 0; 920 #endif 921 error = sendit(td, uap->s, &msg, uap->flags); 922 free(iov, M_IOV); 923 return (error); 924 } 925 926 int 927 kern_recvit(td, s, mp, fromseg, controlp) 928 struct thread *td; 929 int s; 930 struct msghdr *mp; 931 enum uio_seg fromseg; 932 struct mbuf **controlp; 933 { 934 struct uio auio; 935 struct iovec *iov; 936 int i; 937 socklen_t len; 938 int error; 939 struct mbuf *m, *control = 0; 940 caddr_t ctlbuf; 941 struct file *fp; 942 struct socket *so; 943 struct sockaddr *fromsa = 0; 944 #ifdef KTRACE 945 struct uio *ktruio = NULL; 946 #endif 947 948 if (controlp != NULL) 949 *controlp = NULL; 950 951 AUDIT_ARG_FD(s); 952 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 953 if (error) 954 return (error); 955 so = fp->f_data; 956 957 #ifdef MAC 958 error = mac_socket_check_receive(td->td_ucred, so); 959 if (error) { 960 fdrop(fp, td); 961 return (error); 962 } 963 #endif 964 965 auio.uio_iov = mp->msg_iov; 966 auio.uio_iovcnt = mp->msg_iovlen; 967 auio.uio_segflg = UIO_USERSPACE; 968 auio.uio_rw = UIO_READ; 969 auio.uio_td = td; 970 auio.uio_offset = 0; /* XXX */ 971 auio.uio_resid = 0; 972 iov = mp->msg_iov; 973 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 974 if ((auio.uio_resid += iov->iov_len) < 0) { 975 fdrop(fp, td); 976 return (EINVAL); 977 } 978 } 979 #ifdef KTRACE 980 if (KTRPOINT(td, KTR_GENIO)) 981 ktruio = cloneuio(&auio); 982 #endif 983 len = auio.uio_resid; 984 CURVNET_SET(so->so_vnet); 985 error = soreceive(so, &fromsa, &auio, (struct mbuf **)0, 986 (mp->msg_control || controlp) ? &control : (struct mbuf **)0, 987 &mp->msg_flags); 988 CURVNET_RESTORE(); 989 if (error) { 990 if (auio.uio_resid != (int)len && (error == ERESTART || 991 error == EINTR || error == EWOULDBLOCK)) 992 error = 0; 993 } 994 #ifdef KTRACE 995 if (ktruio != NULL) { 996 ktruio->uio_resid = (int)len - auio.uio_resid; 997 ktrgenio(s, UIO_READ, ktruio, error); 998 } 999 #endif 1000 if (error) 1001 goto out; 1002 td->td_retval[0] = (int)len - auio.uio_resid; 1003 if (mp->msg_name) { 1004 len = mp->msg_namelen; 1005 if (len <= 0 || fromsa == 0) 1006 len = 0; 1007 else { 1008 /* save sa_len before it is destroyed by MSG_COMPAT */ 1009 len = MIN(len, fromsa->sa_len); 1010 #ifdef COMPAT_OLDSOCK 1011 if (mp->msg_flags & MSG_COMPAT) 1012 ((struct osockaddr *)fromsa)->sa_family = 1013 fromsa->sa_family; 1014 #endif 1015 if (fromseg == UIO_USERSPACE) { 1016 error = copyout(fromsa, mp->msg_name, 1017 (unsigned)len); 1018 if (error) 1019 goto out; 1020 } else 1021 bcopy(fromsa, mp->msg_name, len); 1022 } 1023 mp->msg_namelen = len; 1024 } 1025 if (mp->msg_control && controlp == NULL) { 1026 #ifdef COMPAT_OLDSOCK 1027 /* 1028 * We assume that old recvmsg calls won't receive access 1029 * rights and other control info, esp. as control info 1030 * is always optional and those options didn't exist in 4.3. 1031 * If we receive rights, trim the cmsghdr; anything else 1032 * is tossed. 1033 */ 1034 if (control && mp->msg_flags & MSG_COMPAT) { 1035 if (mtod(control, struct cmsghdr *)->cmsg_level != 1036 SOL_SOCKET || 1037 mtod(control, struct cmsghdr *)->cmsg_type != 1038 SCM_RIGHTS) { 1039 mp->msg_controllen = 0; 1040 goto out; 1041 } 1042 control->m_len -= sizeof (struct cmsghdr); 1043 control->m_data += sizeof (struct cmsghdr); 1044 } 1045 #endif 1046 len = mp->msg_controllen; 1047 m = control; 1048 mp->msg_controllen = 0; 1049 ctlbuf = mp->msg_control; 1050 1051 while (m && len > 0) { 1052 unsigned int tocopy; 1053 1054 if (len >= m->m_len) 1055 tocopy = m->m_len; 1056 else { 1057 mp->msg_flags |= MSG_CTRUNC; 1058 tocopy = len; 1059 } 1060 1061 if ((error = copyout(mtod(m, caddr_t), 1062 ctlbuf, tocopy)) != 0) 1063 goto out; 1064 1065 ctlbuf += tocopy; 1066 len -= tocopy; 1067 m = m->m_next; 1068 } 1069 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1070 } 1071 out: 1072 fdrop(fp, td); 1073 #ifdef KTRACE 1074 if (fromsa && KTRPOINT(td, KTR_STRUCT)) 1075 ktrsockaddr(fromsa); 1076 #endif 1077 if (fromsa) 1078 free(fromsa, M_SONAME); 1079 1080 if (error == 0 && controlp != NULL) 1081 *controlp = control; 1082 else if (control) 1083 m_freem(control); 1084 1085 return (error); 1086 } 1087 1088 static int 1089 recvit(td, s, mp, namelenp) 1090 struct thread *td; 1091 int s; 1092 struct msghdr *mp; 1093 void *namelenp; 1094 { 1095 int error; 1096 1097 error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL); 1098 if (error) 1099 return (error); 1100 if (namelenp) { 1101 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t)); 1102 #ifdef COMPAT_OLDSOCK 1103 if (mp->msg_flags & MSG_COMPAT) 1104 error = 0; /* old recvfrom didn't check */ 1105 #endif 1106 } 1107 return (error); 1108 } 1109 1110 int 1111 recvfrom(td, uap) 1112 struct thread *td; 1113 struct recvfrom_args /* { 1114 int s; 1115 caddr_t buf; 1116 size_t len; 1117 int flags; 1118 struct sockaddr * __restrict from; 1119 socklen_t * __restrict fromlenaddr; 1120 } */ *uap; 1121 { 1122 struct msghdr msg; 1123 struct iovec aiov; 1124 int error; 1125 1126 if (uap->fromlenaddr) { 1127 error = copyin(uap->fromlenaddr, 1128 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1129 if (error) 1130 goto done2; 1131 } else { 1132 msg.msg_namelen = 0; 1133 } 1134 msg.msg_name = uap->from; 1135 msg.msg_iov = &aiov; 1136 msg.msg_iovlen = 1; 1137 aiov.iov_base = uap->buf; 1138 aiov.iov_len = uap->len; 1139 msg.msg_control = 0; 1140 msg.msg_flags = uap->flags; 1141 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1142 done2: 1143 return(error); 1144 } 1145 1146 #ifdef COMPAT_OLDSOCK 1147 int 1148 orecvfrom(td, uap) 1149 struct thread *td; 1150 struct recvfrom_args *uap; 1151 { 1152 1153 uap->flags |= MSG_COMPAT; 1154 return (recvfrom(td, uap)); 1155 } 1156 #endif 1157 1158 #ifdef COMPAT_OLDSOCK 1159 int 1160 orecv(td, uap) 1161 struct thread *td; 1162 struct orecv_args /* { 1163 int s; 1164 caddr_t buf; 1165 int len; 1166 int flags; 1167 } */ *uap; 1168 { 1169 struct msghdr msg; 1170 struct iovec aiov; 1171 int error; 1172 1173 msg.msg_name = 0; 1174 msg.msg_namelen = 0; 1175 msg.msg_iov = &aiov; 1176 msg.msg_iovlen = 1; 1177 aiov.iov_base = uap->buf; 1178 aiov.iov_len = uap->len; 1179 msg.msg_control = 0; 1180 msg.msg_flags = uap->flags; 1181 error = recvit(td, uap->s, &msg, NULL); 1182 return (error); 1183 } 1184 1185 /* 1186 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1187 * overlays the new one, missing only the flags, and with the (old) access 1188 * rights where the control fields are now. 1189 */ 1190 int 1191 orecvmsg(td, uap) 1192 struct thread *td; 1193 struct orecvmsg_args /* { 1194 int s; 1195 struct omsghdr *msg; 1196 int flags; 1197 } */ *uap; 1198 { 1199 struct msghdr msg; 1200 struct iovec *iov; 1201 int error; 1202 1203 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1204 if (error) 1205 return (error); 1206 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1207 if (error) 1208 return (error); 1209 msg.msg_flags = uap->flags | MSG_COMPAT; 1210 msg.msg_iov = iov; 1211 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1212 if (msg.msg_controllen && error == 0) 1213 error = copyout(&msg.msg_controllen, 1214 &uap->msg->msg_accrightslen, sizeof (int)); 1215 free(iov, M_IOV); 1216 return (error); 1217 } 1218 #endif 1219 1220 int 1221 recvmsg(td, uap) 1222 struct thread *td; 1223 struct recvmsg_args /* { 1224 int s; 1225 struct msghdr *msg; 1226 int flags; 1227 } */ *uap; 1228 { 1229 struct msghdr msg; 1230 struct iovec *uiov, *iov; 1231 int error; 1232 1233 error = copyin(uap->msg, &msg, sizeof (msg)); 1234 if (error) 1235 return (error); 1236 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1237 if (error) 1238 return (error); 1239 msg.msg_flags = uap->flags; 1240 #ifdef COMPAT_OLDSOCK 1241 msg.msg_flags &= ~MSG_COMPAT; 1242 #endif 1243 uiov = msg.msg_iov; 1244 msg.msg_iov = iov; 1245 error = recvit(td, uap->s, &msg, NULL); 1246 if (error == 0) { 1247 msg.msg_iov = uiov; 1248 error = copyout(&msg, uap->msg, sizeof(msg)); 1249 } 1250 free(iov, M_IOV); 1251 return (error); 1252 } 1253 1254 /* ARGSUSED */ 1255 int 1256 shutdown(td, uap) 1257 struct thread *td; 1258 struct shutdown_args /* { 1259 int s; 1260 int how; 1261 } */ *uap; 1262 { 1263 struct socket *so; 1264 struct file *fp; 1265 int error; 1266 1267 AUDIT_ARG_FD(uap->s); 1268 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); 1269 if (error == 0) { 1270 so = fp->f_data; 1271 error = soshutdown(so, uap->how); 1272 fdrop(fp, td); 1273 } 1274 return (error); 1275 } 1276 1277 /* ARGSUSED */ 1278 int 1279 setsockopt(td, uap) 1280 struct thread *td; 1281 struct setsockopt_args /* { 1282 int s; 1283 int level; 1284 int name; 1285 caddr_t val; 1286 int valsize; 1287 } */ *uap; 1288 { 1289 1290 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1291 uap->val, UIO_USERSPACE, uap->valsize)); 1292 } 1293 1294 int 1295 kern_setsockopt(td, s, level, name, val, valseg, valsize) 1296 struct thread *td; 1297 int s; 1298 int level; 1299 int name; 1300 void *val; 1301 enum uio_seg valseg; 1302 socklen_t valsize; 1303 { 1304 int error; 1305 struct socket *so; 1306 struct file *fp; 1307 struct sockopt sopt; 1308 1309 if (val == NULL && valsize != 0) 1310 return (EFAULT); 1311 if ((int)valsize < 0) 1312 return (EINVAL); 1313 1314 sopt.sopt_dir = SOPT_SET; 1315 sopt.sopt_level = level; 1316 sopt.sopt_name = name; 1317 sopt.sopt_val = val; 1318 sopt.sopt_valsize = valsize; 1319 switch (valseg) { 1320 case UIO_USERSPACE: 1321 sopt.sopt_td = td; 1322 break; 1323 case UIO_SYSSPACE: 1324 sopt.sopt_td = NULL; 1325 break; 1326 default: 1327 panic("kern_setsockopt called with bad valseg"); 1328 } 1329 1330 AUDIT_ARG_FD(s); 1331 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 1332 if (error == 0) { 1333 so = fp->f_data; 1334 CURVNET_SET(so->so_vnet); 1335 error = sosetopt(so, &sopt); 1336 CURVNET_RESTORE(); 1337 fdrop(fp, td); 1338 } 1339 return(error); 1340 } 1341 1342 /* ARGSUSED */ 1343 int 1344 getsockopt(td, uap) 1345 struct thread *td; 1346 struct getsockopt_args /* { 1347 int s; 1348 int level; 1349 int name; 1350 void * __restrict val; 1351 socklen_t * __restrict avalsize; 1352 } */ *uap; 1353 { 1354 socklen_t valsize; 1355 int error; 1356 1357 if (uap->val) { 1358 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1359 if (error) 1360 return (error); 1361 } 1362 1363 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1364 uap->val, UIO_USERSPACE, &valsize); 1365 1366 if (error == 0) 1367 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1368 return (error); 1369 } 1370 1371 /* 1372 * Kernel version of getsockopt. 1373 * optval can be a userland or userspace. optlen is always a kernel pointer. 1374 */ 1375 int 1376 kern_getsockopt(td, s, level, name, val, valseg, valsize) 1377 struct thread *td; 1378 int s; 1379 int level; 1380 int name; 1381 void *val; 1382 enum uio_seg valseg; 1383 socklen_t *valsize; 1384 { 1385 int error; 1386 struct socket *so; 1387 struct file *fp; 1388 struct sockopt sopt; 1389 1390 if (val == NULL) 1391 *valsize = 0; 1392 if ((int)*valsize < 0) 1393 return (EINVAL); 1394 1395 sopt.sopt_dir = SOPT_GET; 1396 sopt.sopt_level = level; 1397 sopt.sopt_name = name; 1398 sopt.sopt_val = val; 1399 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1400 switch (valseg) { 1401 case UIO_USERSPACE: 1402 sopt.sopt_td = td; 1403 break; 1404 case UIO_SYSSPACE: 1405 sopt.sopt_td = NULL; 1406 break; 1407 default: 1408 panic("kern_getsockopt called with bad valseg"); 1409 } 1410 1411 AUDIT_ARG_FD(s); 1412 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 1413 if (error == 0) { 1414 so = fp->f_data; 1415 CURVNET_SET(so->so_vnet); 1416 error = sogetopt(so, &sopt); 1417 CURVNET_RESTORE(); 1418 *valsize = sopt.sopt_valsize; 1419 fdrop(fp, td); 1420 } 1421 return (error); 1422 } 1423 1424 /* 1425 * getsockname1() - Get socket name. 1426 */ 1427 /* ARGSUSED */ 1428 static int 1429 getsockname1(td, uap, compat) 1430 struct thread *td; 1431 struct getsockname_args /* { 1432 int fdes; 1433 struct sockaddr * __restrict asa; 1434 socklen_t * __restrict alen; 1435 } */ *uap; 1436 int compat; 1437 { 1438 struct sockaddr *sa; 1439 socklen_t len; 1440 int error; 1441 1442 error = copyin(uap->alen, &len, sizeof(len)); 1443 if (error) 1444 return (error); 1445 1446 error = kern_getsockname(td, uap->fdes, &sa, &len); 1447 if (error) 1448 return (error); 1449 1450 if (len != 0) { 1451 #ifdef COMPAT_OLDSOCK 1452 if (compat) 1453 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1454 #endif 1455 error = copyout(sa, uap->asa, (u_int)len); 1456 } 1457 free(sa, M_SONAME); 1458 if (error == 0) 1459 error = copyout(&len, uap->alen, sizeof(len)); 1460 return (error); 1461 } 1462 1463 int 1464 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, 1465 socklen_t *alen) 1466 { 1467 struct socket *so; 1468 struct file *fp; 1469 socklen_t len; 1470 int error; 1471 1472 if (*alen < 0) 1473 return (EINVAL); 1474 1475 AUDIT_ARG_FD(fd); 1476 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 1477 if (error) 1478 return (error); 1479 so = fp->f_data; 1480 *sa = NULL; 1481 CURVNET_SET(so->so_vnet); 1482 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa); 1483 CURVNET_RESTORE(); 1484 if (error) 1485 goto bad; 1486 if (*sa == NULL) 1487 len = 0; 1488 else 1489 len = MIN(*alen, (*sa)->sa_len); 1490 *alen = len; 1491 #ifdef KTRACE 1492 if (KTRPOINT(td, KTR_STRUCT)) 1493 ktrsockaddr(*sa); 1494 #endif 1495 bad: 1496 fdrop(fp, td); 1497 if (error && *sa) { 1498 free(*sa, M_SONAME); 1499 *sa = NULL; 1500 } 1501 return (error); 1502 } 1503 1504 int 1505 getsockname(td, uap) 1506 struct thread *td; 1507 struct getsockname_args *uap; 1508 { 1509 1510 return (getsockname1(td, uap, 0)); 1511 } 1512 1513 #ifdef COMPAT_OLDSOCK 1514 int 1515 ogetsockname(td, uap) 1516 struct thread *td; 1517 struct getsockname_args *uap; 1518 { 1519 1520 return (getsockname1(td, uap, 1)); 1521 } 1522 #endif /* COMPAT_OLDSOCK */ 1523 1524 /* 1525 * getpeername1() - Get name of peer for connected socket. 1526 */ 1527 /* ARGSUSED */ 1528 static int 1529 getpeername1(td, uap, compat) 1530 struct thread *td; 1531 struct getpeername_args /* { 1532 int fdes; 1533 struct sockaddr * __restrict asa; 1534 socklen_t * __restrict alen; 1535 } */ *uap; 1536 int compat; 1537 { 1538 struct sockaddr *sa; 1539 socklen_t len; 1540 int error; 1541 1542 error = copyin(uap->alen, &len, sizeof (len)); 1543 if (error) 1544 return (error); 1545 1546 error = kern_getpeername(td, uap->fdes, &sa, &len); 1547 if (error) 1548 return (error); 1549 1550 if (len != 0) { 1551 #ifdef COMPAT_OLDSOCK 1552 if (compat) 1553 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1554 #endif 1555 error = copyout(sa, uap->asa, (u_int)len); 1556 } 1557 free(sa, M_SONAME); 1558 if (error == 0) 1559 error = copyout(&len, uap->alen, sizeof(len)); 1560 return (error); 1561 } 1562 1563 int 1564 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, 1565 socklen_t *alen) 1566 { 1567 struct socket *so; 1568 struct file *fp; 1569 socklen_t len; 1570 int error; 1571 1572 if (*alen < 0) 1573 return (EINVAL); 1574 1575 AUDIT_ARG_FD(fd); 1576 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 1577 if (error) 1578 return (error); 1579 so = fp->f_data; 1580 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1581 error = ENOTCONN; 1582 goto done; 1583 } 1584 *sa = NULL; 1585 CURVNET_SET(so->so_vnet); 1586 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa); 1587 CURVNET_RESTORE(); 1588 if (error) 1589 goto bad; 1590 if (*sa == NULL) 1591 len = 0; 1592 else 1593 len = MIN(*alen, (*sa)->sa_len); 1594 *alen = len; 1595 #ifdef KTRACE 1596 if (KTRPOINT(td, KTR_STRUCT)) 1597 ktrsockaddr(*sa); 1598 #endif 1599 bad: 1600 if (error && *sa) { 1601 free(*sa, M_SONAME); 1602 *sa = NULL; 1603 } 1604 done: 1605 fdrop(fp, td); 1606 return (error); 1607 } 1608 1609 int 1610 getpeername(td, uap) 1611 struct thread *td; 1612 struct getpeername_args *uap; 1613 { 1614 1615 return (getpeername1(td, uap, 0)); 1616 } 1617 1618 #ifdef COMPAT_OLDSOCK 1619 int 1620 ogetpeername(td, uap) 1621 struct thread *td; 1622 struct ogetpeername_args *uap; 1623 { 1624 1625 /* XXX uap should have type `getpeername_args *' to begin with. */ 1626 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1627 } 1628 #endif /* COMPAT_OLDSOCK */ 1629 1630 int 1631 sockargs(mp, buf, buflen, type) 1632 struct mbuf **mp; 1633 caddr_t buf; 1634 int buflen, type; 1635 { 1636 struct sockaddr *sa; 1637 struct mbuf *m; 1638 int error; 1639 1640 if ((u_int)buflen > MLEN) { 1641 #ifdef COMPAT_OLDSOCK 1642 if (type == MT_SONAME && (u_int)buflen <= 112) 1643 buflen = MLEN; /* unix domain compat. hack */ 1644 else 1645 #endif 1646 if ((u_int)buflen > MCLBYTES) 1647 return (EINVAL); 1648 } 1649 m = m_get(M_WAIT, type); 1650 if ((u_int)buflen > MLEN) 1651 MCLGET(m, M_WAIT); 1652 m->m_len = buflen; 1653 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1654 if (error) 1655 (void) m_free(m); 1656 else { 1657 *mp = m; 1658 if (type == MT_SONAME) { 1659 sa = mtod(m, struct sockaddr *); 1660 1661 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1662 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1663 sa->sa_family = sa->sa_len; 1664 #endif 1665 sa->sa_len = buflen; 1666 } 1667 } 1668 return (error); 1669 } 1670 1671 int 1672 getsockaddr(namp, uaddr, len) 1673 struct sockaddr **namp; 1674 caddr_t uaddr; 1675 size_t len; 1676 { 1677 struct sockaddr *sa; 1678 int error; 1679 1680 if (len > SOCK_MAXADDRLEN) 1681 return (ENAMETOOLONG); 1682 if (len < offsetof(struct sockaddr, sa_data[0])) 1683 return (EINVAL); 1684 sa = malloc(len, M_SONAME, M_WAITOK); 1685 error = copyin(uaddr, sa, len); 1686 if (error) { 1687 free(sa, M_SONAME); 1688 } else { 1689 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1690 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1691 sa->sa_family = sa->sa_len; 1692 #endif 1693 sa->sa_len = len; 1694 *namp = sa; 1695 } 1696 return (error); 1697 } 1698 1699 #include <sys/condvar.h> 1700 1701 struct sendfile_sync { 1702 struct mtx mtx; 1703 struct cv cv; 1704 unsigned count; 1705 }; 1706 1707 /* 1708 * Detach mapped page and release resources back to the system. 1709 */ 1710 void 1711 sf_buf_mext(void *addr, void *args) 1712 { 1713 vm_page_t m; 1714 struct sendfile_sync *sfs; 1715 1716 m = sf_buf_page(args); 1717 sf_buf_free(args); 1718 vm_page_lock(m); 1719 vm_page_unwire(m, 0); 1720 /* 1721 * Check for the object going away on us. This can 1722 * happen since we don't hold a reference to it. 1723 * If so, we're responsible for freeing the page. 1724 */ 1725 if (m->wire_count == 0 && m->object == NULL) 1726 vm_page_free(m); 1727 vm_page_unlock(m); 1728 if (addr == NULL) 1729 return; 1730 sfs = addr; 1731 mtx_lock(&sfs->mtx); 1732 KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0")); 1733 if (--sfs->count == 0) 1734 cv_signal(&sfs->cv); 1735 mtx_unlock(&sfs->mtx); 1736 } 1737 1738 /* 1739 * sendfile(2) 1740 * 1741 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1742 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1743 * 1744 * Send a file specified by 'fd' and starting at 'offset' to a socket 1745 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes == 1746 * 0. Optionally add a header and/or trailer to the socket output. If 1747 * specified, write the total number of bytes sent into *sbytes. 1748 */ 1749 int 1750 sendfile(struct thread *td, struct sendfile_args *uap) 1751 { 1752 1753 return (do_sendfile(td, uap, 0)); 1754 } 1755 1756 static int 1757 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1758 { 1759 struct sf_hdtr hdtr; 1760 struct uio *hdr_uio, *trl_uio; 1761 int error; 1762 1763 hdr_uio = trl_uio = NULL; 1764 1765 if (uap->hdtr != NULL) { 1766 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1767 if (error) 1768 goto out; 1769 if (hdtr.headers != NULL) { 1770 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1771 if (error) 1772 goto out; 1773 } 1774 if (hdtr.trailers != NULL) { 1775 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); 1776 if (error) 1777 goto out; 1778 1779 } 1780 } 1781 1782 error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat); 1783 out: 1784 if (hdr_uio) 1785 free(hdr_uio, M_IOV); 1786 if (trl_uio) 1787 free(trl_uio, M_IOV); 1788 return (error); 1789 } 1790 1791 #ifdef COMPAT_FREEBSD4 1792 int 1793 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1794 { 1795 struct sendfile_args args; 1796 1797 args.fd = uap->fd; 1798 args.s = uap->s; 1799 args.offset = uap->offset; 1800 args.nbytes = uap->nbytes; 1801 args.hdtr = uap->hdtr; 1802 args.sbytes = uap->sbytes; 1803 args.flags = uap->flags; 1804 1805 return (do_sendfile(td, &args, 1)); 1806 } 1807 #endif /* COMPAT_FREEBSD4 */ 1808 1809 int 1810 kern_sendfile(struct thread *td, struct sendfile_args *uap, 1811 struct uio *hdr_uio, struct uio *trl_uio, int compat) 1812 { 1813 struct file *sock_fp; 1814 struct vnode *vp; 1815 struct vm_object *obj = NULL; 1816 struct socket *so = NULL; 1817 struct mbuf *m = NULL; 1818 struct sf_buf *sf; 1819 struct vm_page *pg; 1820 off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0; 1821 int error, hdrlen = 0, mnw = 0; 1822 int vfslocked; 1823 struct sendfile_sync *sfs = NULL; 1824 1825 /* 1826 * The file descriptor must be a regular file and have a 1827 * backing VM object. 1828 * File offset must be positive. If it goes beyond EOF 1829 * we send only the header/trailer and no payload data. 1830 */ 1831 AUDIT_ARG_FD(uap->fd); 1832 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1833 goto out; 1834 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1835 vn_lock(vp, LK_SHARED | LK_RETRY); 1836 if (vp->v_type == VREG) { 1837 obj = vp->v_object; 1838 if (obj != NULL) { 1839 /* 1840 * Temporarily increase the backing VM 1841 * object's reference count so that a forced 1842 * reclamation of its vnode does not 1843 * immediately destroy it. 1844 */ 1845 VM_OBJECT_LOCK(obj); 1846 if ((obj->flags & OBJ_DEAD) == 0) { 1847 vm_object_reference_locked(obj); 1848 VM_OBJECT_UNLOCK(obj); 1849 } else { 1850 VM_OBJECT_UNLOCK(obj); 1851 obj = NULL; 1852 } 1853 } 1854 } 1855 VOP_UNLOCK(vp, 0); 1856 VFS_UNLOCK_GIANT(vfslocked); 1857 if (obj == NULL) { 1858 error = EINVAL; 1859 goto out; 1860 } 1861 if (uap->offset < 0) { 1862 error = EINVAL; 1863 goto out; 1864 } 1865 1866 /* 1867 * The socket must be a stream socket and connected. 1868 * Remember if it a blocking or non-blocking socket. 1869 */ 1870 if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp, 1871 NULL)) != 0) 1872 goto out; 1873 so = sock_fp->f_data; 1874 if (so->so_type != SOCK_STREAM) { 1875 error = EINVAL; 1876 goto out; 1877 } 1878 if ((so->so_state & SS_ISCONNECTED) == 0) { 1879 error = ENOTCONN; 1880 goto out; 1881 } 1882 /* 1883 * Do not wait on memory allocations but return ENOMEM for 1884 * caller to retry later. 1885 * XXX: Experimental. 1886 */ 1887 if (uap->flags & SF_MNOWAIT) 1888 mnw = 1; 1889 1890 if (uap->flags & SF_SYNC) { 1891 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK); 1892 memset(sfs, 0, sizeof *sfs); 1893 mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); 1894 cv_init(&sfs->cv, "sendfile"); 1895 } 1896 1897 #ifdef MAC 1898 error = mac_socket_check_send(td->td_ucred, so); 1899 if (error) 1900 goto out; 1901 #endif 1902 1903 /* If headers are specified copy them into mbufs. */ 1904 if (hdr_uio != NULL) { 1905 hdr_uio->uio_td = td; 1906 hdr_uio->uio_rw = UIO_WRITE; 1907 if (hdr_uio->uio_resid > 0) { 1908 /* 1909 * In FBSD < 5.0 the nbytes to send also included 1910 * the header. If compat is specified subtract the 1911 * header size from nbytes. 1912 */ 1913 if (compat) { 1914 if (uap->nbytes > hdr_uio->uio_resid) 1915 uap->nbytes -= hdr_uio->uio_resid; 1916 else 1917 uap->nbytes = 0; 1918 } 1919 m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK), 1920 0, 0, 0); 1921 if (m == NULL) { 1922 error = mnw ? EAGAIN : ENOBUFS; 1923 goto out; 1924 } 1925 hdrlen = m_length(m, NULL); 1926 } 1927 } 1928 1929 /* 1930 * Protect against multiple writers to the socket. 1931 * 1932 * XXXRW: Historically this has assumed non-interruptibility, so now 1933 * we implement that, but possibly shouldn't. 1934 */ 1935 (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); 1936 1937 /* 1938 * Loop through the pages of the file, starting with the requested 1939 * offset. Get a file page (do I/O if necessary), map the file page 1940 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1941 * it on the socket. 1942 * This is done in two loops. The inner loop turns as many pages 1943 * as it can, up to available socket buffer space, without blocking 1944 * into mbufs to have it bulk delivered into the socket send buffer. 1945 * The outer loop checks the state and available space of the socket 1946 * and takes care of the overall progress. 1947 */ 1948 for (off = uap->offset, rem = uap->nbytes; ; ) { 1949 int loopbytes = 0; 1950 int space = 0; 1951 int done = 0; 1952 1953 /* 1954 * Check the socket state for ongoing connection, 1955 * no errors and space in socket buffer. 1956 * If space is low allow for the remainder of the 1957 * file to be processed if it fits the socket buffer. 1958 * Otherwise block in waiting for sufficient space 1959 * to proceed, or if the socket is nonblocking, return 1960 * to userland with EAGAIN while reporting how far 1961 * we've come. 1962 * We wait until the socket buffer has significant free 1963 * space to do bulk sends. This makes good use of file 1964 * system read ahead and allows packet segmentation 1965 * offloading hardware to take over lots of work. If 1966 * we were not careful here we would send off only one 1967 * sfbuf at a time. 1968 */ 1969 SOCKBUF_LOCK(&so->so_snd); 1970 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2) 1971 so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2; 1972 retry_space: 1973 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 1974 error = EPIPE; 1975 SOCKBUF_UNLOCK(&so->so_snd); 1976 goto done; 1977 } else if (so->so_error) { 1978 error = so->so_error; 1979 so->so_error = 0; 1980 SOCKBUF_UNLOCK(&so->so_snd); 1981 goto done; 1982 } 1983 space = sbspace(&so->so_snd); 1984 if (space < rem && 1985 (space <= 0 || 1986 space < so->so_snd.sb_lowat)) { 1987 if (so->so_state & SS_NBIO) { 1988 SOCKBUF_UNLOCK(&so->so_snd); 1989 error = EAGAIN; 1990 goto done; 1991 } 1992 /* 1993 * sbwait drops the lock while sleeping. 1994 * When we loop back to retry_space the 1995 * state may have changed and we retest 1996 * for it. 1997 */ 1998 error = sbwait(&so->so_snd); 1999 /* 2000 * An error from sbwait usually indicates that we've 2001 * been interrupted by a signal. If we've sent anything 2002 * then return bytes sent, otherwise return the error. 2003 */ 2004 if (error) { 2005 SOCKBUF_UNLOCK(&so->so_snd); 2006 goto done; 2007 } 2008 goto retry_space; 2009 } 2010 SOCKBUF_UNLOCK(&so->so_snd); 2011 2012 /* 2013 * Reduce space in the socket buffer by the size of 2014 * the header mbuf chain. 2015 * hdrlen is set to 0 after the first loop. 2016 */ 2017 space -= hdrlen; 2018 2019 /* 2020 * Loop and construct maximum sized mbuf chain to be bulk 2021 * dumped into socket buffer. 2022 */ 2023 while (space > loopbytes) { 2024 vm_pindex_t pindex; 2025 vm_offset_t pgoff; 2026 struct mbuf *m0; 2027 2028 VM_OBJECT_LOCK(obj); 2029 /* 2030 * Calculate the amount to transfer. 2031 * Not to exceed a page, the EOF, 2032 * or the passed in nbytes. 2033 */ 2034 pgoff = (vm_offset_t)(off & PAGE_MASK); 2035 xfsize = omin(PAGE_SIZE - pgoff, 2036 obj->un_pager.vnp.vnp_size - uap->offset - 2037 fsbytes - loopbytes); 2038 if (uap->nbytes) 2039 rem = (uap->nbytes - fsbytes - loopbytes); 2040 else 2041 rem = obj->un_pager.vnp.vnp_size - 2042 uap->offset - fsbytes - loopbytes; 2043 xfsize = omin(rem, xfsize); 2044 xfsize = omin(space - loopbytes, xfsize); 2045 if (xfsize <= 0) { 2046 VM_OBJECT_UNLOCK(obj); 2047 done = 1; /* all data sent */ 2048 break; 2049 } 2050 2051 /* 2052 * Attempt to look up the page. Allocate 2053 * if not found or wait and loop if busy. 2054 */ 2055 pindex = OFF_TO_IDX(off); 2056 pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY | 2057 VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY); 2058 2059 /* 2060 * Check if page is valid for what we need, 2061 * otherwise initiate I/O. 2062 * If we already turned some pages into mbufs, 2063 * send them off before we come here again and 2064 * block. 2065 */ 2066 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) 2067 VM_OBJECT_UNLOCK(obj); 2068 else if (m != NULL) 2069 error = EAGAIN; /* send what we already got */ 2070 else if (uap->flags & SF_NODISKIO) 2071 error = EBUSY; 2072 else { 2073 int bsize, resid; 2074 2075 /* 2076 * Ensure that our page is still around 2077 * when the I/O completes. 2078 */ 2079 vm_page_io_start(pg); 2080 VM_OBJECT_UNLOCK(obj); 2081 2082 /* 2083 * Get the page from backing store. 2084 */ 2085 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2086 error = vn_lock(vp, LK_SHARED); 2087 if (error != 0) 2088 goto after_read; 2089 bsize = vp->v_mount->mnt_stat.f_iosize; 2090 2091 /* 2092 * XXXMAC: Because we don't have fp->f_cred 2093 * here, we pass in NOCRED. This is probably 2094 * wrong, but is consistent with our original 2095 * implementation. 2096 */ 2097 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 2098 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 2099 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 2100 td->td_ucred, NOCRED, &resid, td); 2101 VOP_UNLOCK(vp, 0); 2102 after_read: 2103 VFS_UNLOCK_GIANT(vfslocked); 2104 VM_OBJECT_LOCK(obj); 2105 vm_page_io_finish(pg); 2106 if (!error) 2107 VM_OBJECT_UNLOCK(obj); 2108 mbstat.sf_iocnt++; 2109 } 2110 if (error) { 2111 vm_page_lock(pg); 2112 vm_page_unwire(pg, 0); 2113 /* 2114 * See if anyone else might know about 2115 * this page. If not and it is not valid, 2116 * then free it. 2117 */ 2118 if (pg->wire_count == 0 && pg->valid == 0 && 2119 pg->busy == 0 && !(pg->oflags & VPO_BUSY) && 2120 pg->hold_count == 0) 2121 vm_page_free(pg); 2122 vm_page_unlock(pg); 2123 VM_OBJECT_UNLOCK(obj); 2124 if (error == EAGAIN) 2125 error = 0; /* not a real error */ 2126 break; 2127 } 2128 2129 /* 2130 * Get a sendfile buf. We usually wait as long 2131 * as necessary, but this wait can be interrupted. 2132 */ 2133 if ((sf = sf_buf_alloc(pg, 2134 (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) { 2135 mbstat.sf_allocfail++; 2136 vm_page_lock(pg); 2137 vm_page_unwire(pg, 0); 2138 KASSERT(pg->object != NULL, 2139 ("kern_sendfile: object disappeared")); 2140 vm_page_unlock(pg); 2141 error = (mnw ? EAGAIN : EINTR); 2142 break; 2143 } 2144 2145 /* 2146 * Get an mbuf and set it up as having 2147 * external storage. 2148 */ 2149 m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); 2150 if (m0 == NULL) { 2151 error = (mnw ? EAGAIN : ENOBUFS); 2152 sf_buf_mext((void *)sf_buf_kva(sf), sf); 2153 break; 2154 } 2155 MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, 2156 sfs, sf, M_RDONLY, EXT_SFBUF); 2157 m0->m_data = (char *)sf_buf_kva(sf) + pgoff; 2158 m0->m_len = xfsize; 2159 2160 /* Append to mbuf chain. */ 2161 if (m != NULL) 2162 m_cat(m, m0); 2163 else 2164 m = m0; 2165 2166 /* Keep track of bits processed. */ 2167 loopbytes += xfsize; 2168 off += xfsize; 2169 2170 if (sfs != NULL) { 2171 mtx_lock(&sfs->mtx); 2172 sfs->count++; 2173 mtx_unlock(&sfs->mtx); 2174 } 2175 } 2176 2177 /* Add the buffer chain to the socket buffer. */ 2178 if (m != NULL) { 2179 int mlen, err; 2180 2181 mlen = m_length(m, NULL); 2182 SOCKBUF_LOCK(&so->so_snd); 2183 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2184 error = EPIPE; 2185 SOCKBUF_UNLOCK(&so->so_snd); 2186 goto done; 2187 } 2188 SOCKBUF_UNLOCK(&so->so_snd); 2189 CURVNET_SET(so->so_vnet); 2190 /* Avoid error aliasing. */ 2191 err = (*so->so_proto->pr_usrreqs->pru_send) 2192 (so, 0, m, NULL, NULL, td); 2193 CURVNET_RESTORE(); 2194 if (err == 0) { 2195 /* 2196 * We need two counters to get the 2197 * file offset and nbytes to send 2198 * right: 2199 * - sbytes contains the total amount 2200 * of bytes sent, including headers. 2201 * - fsbytes contains the total amount 2202 * of bytes sent from the file. 2203 */ 2204 sbytes += mlen; 2205 fsbytes += mlen; 2206 if (hdrlen) { 2207 fsbytes -= hdrlen; 2208 hdrlen = 0; 2209 } 2210 } else if (error == 0) 2211 error = err; 2212 m = NULL; /* pru_send always consumes */ 2213 } 2214 2215 /* Quit outer loop on error or when we're done. */ 2216 if (done) 2217 break; 2218 if (error) 2219 goto done; 2220 } 2221 2222 /* 2223 * Send trailers. Wimp out and use writev(2). 2224 */ 2225 if (trl_uio != NULL) { 2226 sbunlock(&so->so_snd); 2227 error = kern_writev(td, uap->s, trl_uio); 2228 if (error == 0) 2229 sbytes += td->td_retval[0]; 2230 goto out; 2231 } 2232 2233 done: 2234 sbunlock(&so->so_snd); 2235 out: 2236 /* 2237 * If there was no error we have to clear td->td_retval[0] 2238 * because it may have been set by writev. 2239 */ 2240 if (error == 0) { 2241 td->td_retval[0] = 0; 2242 } 2243 if (uap->sbytes != NULL) { 2244 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2245 } 2246 if (obj != NULL) 2247 vm_object_deallocate(obj); 2248 if (vp != NULL) { 2249 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2250 vrele(vp); 2251 VFS_UNLOCK_GIANT(vfslocked); 2252 } 2253 if (so) 2254 fdrop(sock_fp, td); 2255 if (m) 2256 m_freem(m); 2257 2258 if (sfs != NULL) { 2259 mtx_lock(&sfs->mtx); 2260 if (sfs->count != 0) 2261 cv_wait(&sfs->cv, &sfs->mtx); 2262 KASSERT(sfs->count == 0, ("sendfile sync still busy")); 2263 cv_destroy(&sfs->cv); 2264 mtx_destroy(&sfs->mtx); 2265 free(sfs, M_TEMP); 2266 } 2267 2268 if (error == ERESTART) 2269 error = EINTR; 2270 2271 return (error); 2272 } 2273 2274 /* 2275 * SCTP syscalls. 2276 * Functionality only compiled in if SCTP is defined in the kernel Makefile, 2277 * otherwise all return EOPNOTSUPP. 2278 * XXX: We should make this loadable one day. 2279 */ 2280 int 2281 sctp_peeloff(td, uap) 2282 struct thread *td; 2283 struct sctp_peeloff_args /* { 2284 int sd; 2285 caddr_t name; 2286 } */ *uap; 2287 { 2288 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2289 struct filedesc *fdp; 2290 struct file *nfp = NULL; 2291 int error; 2292 struct socket *head, *so; 2293 int fd; 2294 u_int fflag; 2295 2296 fdp = td->td_proc->p_fd; 2297 AUDIT_ARG_FD(uap->sd); 2298 error = fgetsock(td, uap->sd, &head, &fflag); 2299 if (error) 2300 goto done2; 2301 error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name); 2302 if (error) 2303 goto done2; 2304 /* 2305 * At this point we know we do have a assoc to pull 2306 * we proceed to get the fd setup. This may block 2307 * but that is ok. 2308 */ 2309 2310 error = falloc(td, &nfp, &fd); 2311 if (error) 2312 goto done; 2313 td->td_retval[0] = fd; 2314 2315 CURVNET_SET(head->so_vnet); 2316 so = sonewconn(head, SS_ISCONNECTED); 2317 if (so == NULL) 2318 goto noconnection; 2319 /* 2320 * Before changing the flags on the socket, we have to bump the 2321 * reference count. Otherwise, if the protocol calls sofree(), 2322 * the socket will be released due to a zero refcount. 2323 */ 2324 SOCK_LOCK(so); 2325 soref(so); /* file descriptor reference */ 2326 SOCK_UNLOCK(so); 2327 2328 ACCEPT_LOCK(); 2329 2330 TAILQ_REMOVE(&head->so_comp, so, so_list); 2331 head->so_qlen--; 2332 so->so_state |= (head->so_state & SS_NBIO); 2333 so->so_state &= ~SS_NOFDREF; 2334 so->so_qstate &= ~SQ_COMP; 2335 so->so_head = NULL; 2336 ACCEPT_UNLOCK(); 2337 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 2338 error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name); 2339 if (error) 2340 goto noconnection; 2341 if (head->so_sigio != NULL) 2342 fsetown(fgetown(&head->so_sigio), &so->so_sigio); 2343 2344 noconnection: 2345 /* 2346 * close the new descriptor, assuming someone hasn't ripped it 2347 * out from under us. 2348 */ 2349 if (error) 2350 fdclose(fdp, nfp, fd, td); 2351 2352 /* 2353 * Release explicitly held references before returning. 2354 */ 2355 CURVNET_RESTORE(); 2356 done: 2357 if (nfp != NULL) 2358 fdrop(nfp, td); 2359 fputsock(head); 2360 done2: 2361 return (error); 2362 #else /* SCTP */ 2363 return (EOPNOTSUPP); 2364 #endif /* SCTP */ 2365 } 2366 2367 int 2368 sctp_generic_sendmsg (td, uap) 2369 struct thread *td; 2370 struct sctp_generic_sendmsg_args /* { 2371 int sd, 2372 caddr_t msg, 2373 int mlen, 2374 caddr_t to, 2375 __socklen_t tolen, 2376 struct sctp_sndrcvinfo *sinfo, 2377 int flags 2378 } */ *uap; 2379 { 2380 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2381 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2382 struct socket *so; 2383 struct file *fp = NULL; 2384 int use_rcvinfo = 1; 2385 int error = 0, len; 2386 struct sockaddr *to = NULL; 2387 #ifdef KTRACE 2388 struct uio *ktruio = NULL; 2389 #endif 2390 struct uio auio; 2391 struct iovec iov[1]; 2392 2393 if (uap->sinfo) { 2394 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2395 if (error) 2396 return (error); 2397 u_sinfo = &sinfo; 2398 } 2399 if (uap->tolen) { 2400 error = getsockaddr(&to, uap->to, uap->tolen); 2401 if (error) { 2402 to = NULL; 2403 goto sctp_bad2; 2404 } 2405 } 2406 2407 AUDIT_ARG_FD(uap->sd); 2408 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); 2409 if (error) 2410 goto sctp_bad; 2411 #ifdef KTRACE 2412 if (to && (KTRPOINT(td, KTR_STRUCT))) 2413 ktrsockaddr(to); 2414 #endif 2415 2416 iov[0].iov_base = uap->msg; 2417 iov[0].iov_len = uap->mlen; 2418 2419 so = (struct socket *)fp->f_data; 2420 #ifdef MAC 2421 error = mac_socket_check_send(td->td_ucred, so); 2422 if (error) 2423 goto sctp_bad; 2424 #endif /* MAC */ 2425 2426 auio.uio_iov = iov; 2427 auio.uio_iovcnt = 1; 2428 auio.uio_segflg = UIO_USERSPACE; 2429 auio.uio_rw = UIO_WRITE; 2430 auio.uio_td = td; 2431 auio.uio_offset = 0; /* XXX */ 2432 auio.uio_resid = 0; 2433 len = auio.uio_resid = uap->mlen; 2434 CURVNET_SET(so->so_vnet); 2435 error = sctp_lower_sosend(so, to, &auio, 2436 (struct mbuf *)NULL, (struct mbuf *)NULL, 2437 uap->flags, use_rcvinfo, u_sinfo, td); 2438 CURVNET_RESTORE(); 2439 if (error) { 2440 if (auio.uio_resid != len && (error == ERESTART || 2441 error == EINTR || error == EWOULDBLOCK)) 2442 error = 0; 2443 /* Generation of SIGPIPE can be controlled per socket. */ 2444 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2445 !(uap->flags & MSG_NOSIGNAL)) { 2446 PROC_LOCK(td->td_proc); 2447 tdsignal(td, SIGPIPE); 2448 PROC_UNLOCK(td->td_proc); 2449 } 2450 } 2451 if (error == 0) 2452 td->td_retval[0] = len - auio.uio_resid; 2453 #ifdef KTRACE 2454 if (ktruio != NULL) { 2455 ktruio->uio_resid = td->td_retval[0]; 2456 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2457 } 2458 #endif /* KTRACE */ 2459 sctp_bad: 2460 if (fp) 2461 fdrop(fp, td); 2462 sctp_bad2: 2463 if (to) 2464 free(to, M_SONAME); 2465 return (error); 2466 #else /* SCTP */ 2467 return (EOPNOTSUPP); 2468 #endif /* SCTP */ 2469 } 2470 2471 int 2472 sctp_generic_sendmsg_iov(td, uap) 2473 struct thread *td; 2474 struct sctp_generic_sendmsg_iov_args /* { 2475 int sd, 2476 struct iovec *iov, 2477 int iovlen, 2478 caddr_t to, 2479 __socklen_t tolen, 2480 struct sctp_sndrcvinfo *sinfo, 2481 int flags 2482 } */ *uap; 2483 { 2484 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2485 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2486 struct socket *so; 2487 struct file *fp = NULL; 2488 int use_rcvinfo = 1; 2489 int error=0, len, i; 2490 struct sockaddr *to = NULL; 2491 #ifdef KTRACE 2492 struct uio *ktruio = NULL; 2493 #endif 2494 struct uio auio; 2495 struct iovec *iov, *tiov; 2496 2497 if (uap->sinfo) { 2498 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2499 if (error) 2500 return (error); 2501 u_sinfo = &sinfo; 2502 } 2503 if (uap->tolen) { 2504 error = getsockaddr(&to, uap->to, uap->tolen); 2505 if (error) { 2506 to = NULL; 2507 goto sctp_bad2; 2508 } 2509 } 2510 2511 AUDIT_ARG_FD(uap->sd); 2512 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); 2513 if (error) 2514 goto sctp_bad1; 2515 2516 #ifdef COMPAT_FREEBSD32 2517 if (SV_CURPROC_FLAG(SV_ILP32)) 2518 error = freebsd32_copyiniov((struct iovec32 *)uap->iov, 2519 uap->iovlen, &iov, EMSGSIZE); 2520 else 2521 #endif 2522 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2523 if (error) 2524 goto sctp_bad1; 2525 #ifdef KTRACE 2526 if (to && (KTRPOINT(td, KTR_STRUCT))) 2527 ktrsockaddr(to); 2528 #endif 2529 2530 so = (struct socket *)fp->f_data; 2531 #ifdef MAC 2532 error = mac_socket_check_send(td->td_ucred, so); 2533 if (error) 2534 goto sctp_bad; 2535 #endif /* MAC */ 2536 2537 auio.uio_iov = iov; 2538 auio.uio_iovcnt = uap->iovlen; 2539 auio.uio_segflg = UIO_USERSPACE; 2540 auio.uio_rw = UIO_WRITE; 2541 auio.uio_td = td; 2542 auio.uio_offset = 0; /* XXX */ 2543 auio.uio_resid = 0; 2544 tiov = iov; 2545 for (i = 0; i <uap->iovlen; i++, tiov++) { 2546 if ((auio.uio_resid += tiov->iov_len) < 0) { 2547 error = EINVAL; 2548 goto sctp_bad; 2549 } 2550 } 2551 len = auio.uio_resid; 2552 CURVNET_SET(so->so_vnet); 2553 error = sctp_lower_sosend(so, to, &auio, 2554 (struct mbuf *)NULL, (struct mbuf *)NULL, 2555 uap->flags, use_rcvinfo, u_sinfo, td); 2556 CURVNET_RESTORE(); 2557 if (error) { 2558 if (auio.uio_resid != len && (error == ERESTART || 2559 error == EINTR || error == EWOULDBLOCK)) 2560 error = 0; 2561 /* Generation of SIGPIPE can be controlled per socket */ 2562 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2563 !(uap->flags & MSG_NOSIGNAL)) { 2564 PROC_LOCK(td->td_proc); 2565 tdsignal(td, SIGPIPE); 2566 PROC_UNLOCK(td->td_proc); 2567 } 2568 } 2569 if (error == 0) 2570 td->td_retval[0] = len - auio.uio_resid; 2571 #ifdef KTRACE 2572 if (ktruio != NULL) { 2573 ktruio->uio_resid = td->td_retval[0]; 2574 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2575 } 2576 #endif /* KTRACE */ 2577 sctp_bad: 2578 free(iov, M_IOV); 2579 sctp_bad1: 2580 if (fp) 2581 fdrop(fp, td); 2582 sctp_bad2: 2583 if (to) 2584 free(to, M_SONAME); 2585 return (error); 2586 #else /* SCTP */ 2587 return (EOPNOTSUPP); 2588 #endif /* SCTP */ 2589 } 2590 2591 int 2592 sctp_generic_recvmsg(td, uap) 2593 struct thread *td; 2594 struct sctp_generic_recvmsg_args /* { 2595 int sd, 2596 struct iovec *iov, 2597 int iovlen, 2598 struct sockaddr *from, 2599 __socklen_t *fromlenaddr, 2600 struct sctp_sndrcvinfo *sinfo, 2601 int *msg_flags 2602 } */ *uap; 2603 { 2604 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2605 uint8_t sockbufstore[256]; 2606 struct uio auio; 2607 struct iovec *iov, *tiov; 2608 struct sctp_sndrcvinfo sinfo; 2609 struct socket *so; 2610 struct file *fp = NULL; 2611 struct sockaddr *fromsa; 2612 int fromlen; 2613 int len, i, msg_flags; 2614 int error = 0; 2615 #ifdef KTRACE 2616 struct uio *ktruio = NULL; 2617 #endif 2618 2619 AUDIT_ARG_FD(uap->sd); 2620 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); 2621 if (error) { 2622 return (error); 2623 } 2624 #ifdef COMPAT_FREEBSD32 2625 if (SV_CURPROC_FLAG(SV_ILP32)) 2626 error = freebsd32_copyiniov((struct iovec32 *)uap->iov, 2627 uap->iovlen, &iov, EMSGSIZE); 2628 else 2629 #endif 2630 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2631 if (error) 2632 goto out1; 2633 2634 so = fp->f_data; 2635 #ifdef MAC 2636 error = mac_socket_check_receive(td->td_ucred, so); 2637 if (error) { 2638 goto out; 2639 } 2640 #endif /* MAC */ 2641 2642 if (uap->fromlenaddr) { 2643 error = copyin(uap->fromlenaddr, 2644 &fromlen, sizeof (fromlen)); 2645 if (error) { 2646 goto out; 2647 } 2648 } else { 2649 fromlen = 0; 2650 } 2651 if (uap->msg_flags) { 2652 error = copyin(uap->msg_flags, &msg_flags, sizeof (int)); 2653 if (error) { 2654 goto out; 2655 } 2656 } else { 2657 msg_flags = 0; 2658 } 2659 auio.uio_iov = iov; 2660 auio.uio_iovcnt = uap->iovlen; 2661 auio.uio_segflg = UIO_USERSPACE; 2662 auio.uio_rw = UIO_READ; 2663 auio.uio_td = td; 2664 auio.uio_offset = 0; /* XXX */ 2665 auio.uio_resid = 0; 2666 tiov = iov; 2667 for (i = 0; i <uap->iovlen; i++, tiov++) { 2668 if ((auio.uio_resid += tiov->iov_len) < 0) { 2669 error = EINVAL; 2670 goto out; 2671 } 2672 } 2673 len = auio.uio_resid; 2674 fromsa = (struct sockaddr *)sockbufstore; 2675 2676 #ifdef KTRACE 2677 if (KTRPOINT(td, KTR_GENIO)) 2678 ktruio = cloneuio(&auio); 2679 #endif /* KTRACE */ 2680 memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo)); 2681 CURVNET_SET(so->so_vnet); 2682 error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL, 2683 fromsa, fromlen, &msg_flags, 2684 (struct sctp_sndrcvinfo *)&sinfo, 1); 2685 CURVNET_RESTORE(); 2686 if (error) { 2687 if (auio.uio_resid != (int)len && (error == ERESTART || 2688 error == EINTR || error == EWOULDBLOCK)) 2689 error = 0; 2690 } else { 2691 if (uap->sinfo) 2692 error = copyout(&sinfo, uap->sinfo, sizeof (sinfo)); 2693 } 2694 #ifdef KTRACE 2695 if (ktruio != NULL) { 2696 ktruio->uio_resid = (int)len - auio.uio_resid; 2697 ktrgenio(uap->sd, UIO_READ, ktruio, error); 2698 } 2699 #endif /* KTRACE */ 2700 if (error) 2701 goto out; 2702 td->td_retval[0] = (int)len - auio.uio_resid; 2703 2704 if (fromlen && uap->from) { 2705 len = fromlen; 2706 if (len <= 0 || fromsa == 0) 2707 len = 0; 2708 else { 2709 len = MIN(len, fromsa->sa_len); 2710 error = copyout(fromsa, uap->from, (unsigned)len); 2711 if (error) 2712 goto out; 2713 } 2714 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t)); 2715 if (error) { 2716 goto out; 2717 } 2718 } 2719 #ifdef KTRACE 2720 if (KTRPOINT(td, KTR_STRUCT)) 2721 ktrsockaddr(fromsa); 2722 #endif 2723 if (uap->msg_flags) { 2724 error = copyout(&msg_flags, uap->msg_flags, sizeof (int)); 2725 if (error) { 2726 goto out; 2727 } 2728 } 2729 out: 2730 free(iov, M_IOV); 2731 out1: 2732 if (fp) 2733 fdrop(fp, td); 2734 2735 return (error); 2736 #else /* SCTP */ 2737 return (EOPNOTSUPP); 2738 #endif /* SCTP */ 2739 } 2740