1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_sctp.h" 39 #include "opt_compat.h" 40 #include "opt_ktrace.h" 41 #include "opt_mac.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/lock.h> 47 #include <sys/mutex.h> 48 #include <sys/sysproto.h> 49 #include <sys/malloc.h> 50 #include <sys/filedesc.h> 51 #include <sys/event.h> 52 #include <sys/proc.h> 53 #include <sys/fcntl.h> 54 #include <sys/file.h> 55 #include <sys/filio.h> 56 #include <sys/mount.h> 57 #include <sys/mbuf.h> 58 #include <sys/protosw.h> 59 #include <sys/sf_buf.h> 60 #include <sys/socket.h> 61 #include <sys/socketvar.h> 62 #include <sys/signalvar.h> 63 #include <sys/syscallsubr.h> 64 #include <sys/sysctl.h> 65 #include <sys/uio.h> 66 #include <sys/vimage.h> 67 #include <sys/vnode.h> 68 #ifdef KTRACE 69 #include <sys/ktrace.h> 70 #endif 71 72 #include <security/mac/mac_framework.h> 73 74 #include <vm/vm.h> 75 #include <vm/vm_object.h> 76 #include <vm/vm_page.h> 77 #include <vm/vm_pageout.h> 78 #include <vm/vm_kern.h> 79 #include <vm/vm_extern.h> 80 81 #ifdef SCTP 82 #include <netinet/sctp.h> 83 #include <netinet/sctp_peeloff.h> 84 #endif /* SCTP */ 85 86 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 87 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 88 89 static int accept1(struct thread *td, struct accept_args *uap, int compat); 90 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 91 static int getsockname1(struct thread *td, struct getsockname_args *uap, 92 int compat); 93 static int getpeername1(struct thread *td, struct getpeername_args *uap, 94 int compat); 95 96 /* 97 * NSFBUFS-related variables and associated sysctls 98 */ 99 int nsfbufs; 100 int nsfbufspeak; 101 int nsfbufsused; 102 103 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 104 "Maximum number of sendfile(2) sf_bufs available"); 105 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 106 "Number of sendfile(2) sf_bufs at peak usage"); 107 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 108 "Number of sendfile(2) sf_bufs in use"); 109 110 /* 111 * Convert a user file descriptor to a kernel file entry. A reference on the 112 * file entry is held upon returning. This is lighter weight than 113 * fgetsock(), which bumps the socket reference drops the file reference 114 * count instead, as this approach avoids several additional mutex operations 115 * associated with the additional reference count. If requested, return the 116 * open file flags. 117 */ 118 static int 119 getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp) 120 { 121 struct file *fp; 122 int error; 123 124 fp = NULL; 125 if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) { 126 error = EBADF; 127 } else if (fp->f_type != DTYPE_SOCKET) { 128 fdrop(fp, curthread); 129 fp = NULL; 130 error = ENOTSOCK; 131 } else { 132 if (fflagp != NULL) 133 *fflagp = fp->f_flag; 134 error = 0; 135 } 136 *fpp = fp; 137 return (error); 138 } 139 140 /* 141 * System call interface to the socket abstraction. 142 */ 143 #if defined(COMPAT_43) 144 #define COMPAT_OLDSOCK 145 #endif 146 147 int 148 socket(td, uap) 149 struct thread *td; 150 struct socket_args /* { 151 int domain; 152 int type; 153 int protocol; 154 } */ *uap; 155 { 156 struct filedesc *fdp; 157 struct socket *so; 158 struct file *fp; 159 int fd, error; 160 161 #ifdef MAC 162 error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type, 163 uap->protocol); 164 if (error) 165 return (error); 166 #endif 167 fdp = td->td_proc->p_fd; 168 error = falloc(td, &fp, &fd); 169 if (error) 170 return (error); 171 /* An extra reference on `fp' has been held for us by falloc(). */ 172 error = socreate(uap->domain, &so, uap->type, uap->protocol, 173 td->td_ucred, td); 174 if (error) { 175 fdclose(fdp, fp, fd, td); 176 } else { 177 finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops); 178 td->td_retval[0] = fd; 179 } 180 fdrop(fp, td); 181 return (error); 182 } 183 184 /* ARGSUSED */ 185 int 186 bind(td, uap) 187 struct thread *td; 188 struct bind_args /* { 189 int s; 190 caddr_t name; 191 int namelen; 192 } */ *uap; 193 { 194 struct sockaddr *sa; 195 int error; 196 197 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 198 return (error); 199 200 error = kern_bind(td, uap->s, sa); 201 free(sa, M_SONAME); 202 return (error); 203 } 204 205 int 206 kern_bind(td, fd, sa) 207 struct thread *td; 208 int fd; 209 struct sockaddr *sa; 210 { 211 struct socket *so; 212 struct file *fp; 213 int error; 214 215 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 216 if (error) 217 return (error); 218 so = fp->f_data; 219 #ifdef KTRACE 220 if (KTRPOINT(td, KTR_STRUCT)) 221 ktrsockaddr(sa); 222 #endif 223 #ifdef MAC 224 error = mac_socket_check_bind(td->td_ucred, so, sa); 225 if (error == 0) 226 #endif 227 error = sobind(so, sa, td); 228 fdrop(fp, td); 229 return (error); 230 } 231 232 /* ARGSUSED */ 233 int 234 listen(td, uap) 235 struct thread *td; 236 struct listen_args /* { 237 int s; 238 int backlog; 239 } */ *uap; 240 { 241 struct socket *so; 242 struct file *fp; 243 int error; 244 245 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); 246 if (error == 0) { 247 so = fp->f_data; 248 #ifdef MAC 249 error = mac_socket_check_listen(td->td_ucred, so); 250 if (error == 0) { 251 #endif 252 CURVNET_SET(so->so_vnet); 253 error = solisten(so, uap->backlog, td); 254 CURVNET_RESTORE(); 255 #ifdef MAC 256 } 257 #endif 258 fdrop(fp, td); 259 } 260 return(error); 261 } 262 263 /* 264 * accept1() 265 */ 266 static int 267 accept1(td, uap, compat) 268 struct thread *td; 269 struct accept_args /* { 270 int s; 271 struct sockaddr * __restrict name; 272 socklen_t * __restrict anamelen; 273 } */ *uap; 274 int compat; 275 { 276 struct sockaddr *name; 277 socklen_t namelen; 278 struct file *fp; 279 int error; 280 281 if (uap->name == NULL) 282 return (kern_accept(td, uap->s, NULL, NULL, NULL)); 283 284 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 285 if (error) 286 return (error); 287 288 error = kern_accept(td, uap->s, &name, &namelen, &fp); 289 290 /* 291 * return a namelen of zero for older code which might 292 * ignore the return value from accept. 293 */ 294 if (error) { 295 (void) copyout(&namelen, 296 uap->anamelen, sizeof(*uap->anamelen)); 297 return (error); 298 } 299 300 if (error == 0 && name != NULL) { 301 #ifdef COMPAT_OLDSOCK 302 if (compat) 303 ((struct osockaddr *)name)->sa_family = 304 name->sa_family; 305 #endif 306 error = copyout(name, uap->name, namelen); 307 } 308 if (error == 0) 309 error = copyout(&namelen, uap->anamelen, 310 sizeof(namelen)); 311 if (error) 312 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td); 313 fdrop(fp, td); 314 free(name, M_SONAME); 315 return (error); 316 } 317 318 int 319 kern_accept(struct thread *td, int s, struct sockaddr **name, 320 socklen_t *namelen, struct file **fp) 321 { 322 struct filedesc *fdp; 323 struct file *headfp, *nfp = NULL; 324 struct sockaddr *sa = NULL; 325 int error; 326 struct socket *head, *so; 327 int fd; 328 u_int fflag; 329 pid_t pgid; 330 int tmp; 331 332 if (name) { 333 *name = NULL; 334 if (*namelen < 0) 335 return (EINVAL); 336 } 337 338 fdp = td->td_proc->p_fd; 339 error = getsock(fdp, s, &headfp, &fflag); 340 if (error) 341 return (error); 342 head = headfp->f_data; 343 if ((head->so_options & SO_ACCEPTCONN) == 0) { 344 error = EINVAL; 345 goto done; 346 } 347 #ifdef MAC 348 error = mac_socket_check_accept(td->td_ucred, head); 349 if (error != 0) 350 goto done; 351 #endif 352 error = falloc(td, &nfp, &fd); 353 if (error) 354 goto done; 355 ACCEPT_LOCK(); 356 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 357 ACCEPT_UNLOCK(); 358 error = EWOULDBLOCK; 359 goto noconnection; 360 } 361 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 362 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 363 head->so_error = ECONNABORTED; 364 break; 365 } 366 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 367 "accept", 0); 368 if (error) { 369 ACCEPT_UNLOCK(); 370 goto noconnection; 371 } 372 } 373 if (head->so_error) { 374 error = head->so_error; 375 head->so_error = 0; 376 ACCEPT_UNLOCK(); 377 goto noconnection; 378 } 379 so = TAILQ_FIRST(&head->so_comp); 380 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 381 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 382 383 /* 384 * Before changing the flags on the socket, we have to bump the 385 * reference count. Otherwise, if the protocol calls sofree(), 386 * the socket will be released due to a zero refcount. 387 */ 388 SOCK_LOCK(so); /* soref() and so_state update */ 389 soref(so); /* file descriptor reference */ 390 391 TAILQ_REMOVE(&head->so_comp, so, so_list); 392 head->so_qlen--; 393 so->so_state |= (head->so_state & SS_NBIO); 394 so->so_qstate &= ~SQ_COMP; 395 so->so_head = NULL; 396 397 SOCK_UNLOCK(so); 398 ACCEPT_UNLOCK(); 399 400 /* An extra reference on `nfp' has been held for us by falloc(). */ 401 td->td_retval[0] = fd; 402 403 /* connection has been removed from the listen queue */ 404 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 405 406 pgid = fgetown(&head->so_sigio); 407 if (pgid != 0) 408 fsetown(pgid, &so->so_sigio); 409 410 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 411 /* Sync socket nonblocking/async state with file flags */ 412 tmp = fflag & FNONBLOCK; 413 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 414 tmp = fflag & FASYNC; 415 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 416 sa = 0; 417 CURVNET_SET(so->so_vnet); 418 error = soaccept(so, &sa); 419 CURVNET_RESTORE(); 420 if (error) { 421 /* 422 * return a namelen of zero for older code which might 423 * ignore the return value from accept. 424 */ 425 if (name) 426 *namelen = 0; 427 goto noconnection; 428 } 429 if (sa == NULL) { 430 if (name) 431 *namelen = 0; 432 goto done; 433 } 434 if (name) { 435 /* check sa_len before it is destroyed */ 436 if (*namelen > sa->sa_len) 437 *namelen = sa->sa_len; 438 #ifdef KTRACE 439 if (KTRPOINT(td, KTR_STRUCT)) 440 ktrsockaddr(sa); 441 #endif 442 *name = sa; 443 sa = NULL; 444 } 445 noconnection: 446 if (sa) 447 free(sa, M_SONAME); 448 449 /* 450 * close the new descriptor, assuming someone hasn't ripped it 451 * out from under us. 452 */ 453 if (error) 454 fdclose(fdp, nfp, fd, td); 455 456 /* 457 * Release explicitly held references before returning. We return 458 * a reference on nfp to the caller on success if they request it. 459 */ 460 done: 461 if (fp != NULL) { 462 if (error == 0) { 463 *fp = nfp; 464 nfp = NULL; 465 } else 466 *fp = NULL; 467 } 468 if (nfp != NULL) 469 fdrop(nfp, td); 470 fdrop(headfp, td); 471 return (error); 472 } 473 474 int 475 accept(td, uap) 476 struct thread *td; 477 struct accept_args *uap; 478 { 479 480 return (accept1(td, uap, 0)); 481 } 482 483 #ifdef COMPAT_OLDSOCK 484 int 485 oaccept(td, uap) 486 struct thread *td; 487 struct accept_args *uap; 488 { 489 490 return (accept1(td, uap, 1)); 491 } 492 #endif /* COMPAT_OLDSOCK */ 493 494 /* ARGSUSED */ 495 int 496 connect(td, uap) 497 struct thread *td; 498 struct connect_args /* { 499 int s; 500 caddr_t name; 501 int namelen; 502 } */ *uap; 503 { 504 struct sockaddr *sa; 505 int error; 506 507 error = getsockaddr(&sa, uap->name, uap->namelen); 508 if (error) 509 return (error); 510 511 error = kern_connect(td, uap->s, sa); 512 free(sa, M_SONAME); 513 return (error); 514 } 515 516 517 int 518 kern_connect(td, fd, sa) 519 struct thread *td; 520 int fd; 521 struct sockaddr *sa; 522 { 523 struct socket *so; 524 struct file *fp; 525 int error; 526 int interrupted = 0; 527 528 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 529 if (error) 530 return (error); 531 so = fp->f_data; 532 if (so->so_state & SS_ISCONNECTING) { 533 error = EALREADY; 534 goto done1; 535 } 536 #ifdef KTRACE 537 if (KTRPOINT(td, KTR_STRUCT)) 538 ktrsockaddr(sa); 539 #endif 540 #ifdef MAC 541 error = mac_socket_check_connect(td->td_ucred, so, sa); 542 if (error) 543 goto bad; 544 #endif 545 error = soconnect(so, sa, td); 546 if (error) 547 goto bad; 548 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 549 error = EINPROGRESS; 550 goto done1; 551 } 552 SOCK_LOCK(so); 553 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 554 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 555 "connec", 0); 556 if (error) { 557 if (error == EINTR || error == ERESTART) 558 interrupted = 1; 559 break; 560 } 561 } 562 if (error == 0) { 563 error = so->so_error; 564 so->so_error = 0; 565 } 566 SOCK_UNLOCK(so); 567 bad: 568 if (!interrupted) 569 so->so_state &= ~SS_ISCONNECTING; 570 if (error == ERESTART) 571 error = EINTR; 572 done1: 573 fdrop(fp, td); 574 return (error); 575 } 576 577 int 578 kern_socketpair(struct thread *td, int domain, int type, int protocol, 579 int *rsv) 580 { 581 struct filedesc *fdp = td->td_proc->p_fd; 582 struct file *fp1, *fp2; 583 struct socket *so1, *so2; 584 int fd, error; 585 586 #ifdef MAC 587 /* We might want to have a separate check for socket pairs. */ 588 error = mac_socket_check_create(td->td_ucred, domain, type, 589 protocol); 590 if (error) 591 return (error); 592 #endif 593 error = socreate(domain, &so1, type, protocol, td->td_ucred, td); 594 if (error) 595 return (error); 596 error = socreate(domain, &so2, type, protocol, td->td_ucred, td); 597 if (error) 598 goto free1; 599 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 600 error = falloc(td, &fp1, &fd); 601 if (error) 602 goto free2; 603 rsv[0] = fd; 604 fp1->f_data = so1; /* so1 already has ref count */ 605 error = falloc(td, &fp2, &fd); 606 if (error) 607 goto free3; 608 fp2->f_data = so2; /* so2 already has ref count */ 609 rsv[1] = fd; 610 error = soconnect2(so1, so2); 611 if (error) 612 goto free4; 613 if (type == SOCK_DGRAM) { 614 /* 615 * Datagram socket connection is asymmetric. 616 */ 617 error = soconnect2(so2, so1); 618 if (error) 619 goto free4; 620 } 621 finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops); 622 finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops); 623 fdrop(fp1, td); 624 fdrop(fp2, td); 625 return (0); 626 free4: 627 fdclose(fdp, fp2, rsv[1], td); 628 fdrop(fp2, td); 629 free3: 630 fdclose(fdp, fp1, rsv[0], td); 631 fdrop(fp1, td); 632 free2: 633 if (so2 != NULL) 634 (void)soclose(so2); 635 free1: 636 if (so1 != NULL) 637 (void)soclose(so1); 638 return (error); 639 } 640 641 int 642 socketpair(struct thread *td, struct socketpair_args *uap) 643 { 644 int error, sv[2]; 645 646 error = kern_socketpair(td, uap->domain, uap->type, 647 uap->protocol, sv); 648 if (error) 649 return (error); 650 error = copyout(sv, uap->rsv, 2 * sizeof(int)); 651 if (error) { 652 (void)kern_close(td, sv[0]); 653 (void)kern_close(td, sv[1]); 654 } 655 return (error); 656 } 657 658 static int 659 sendit(td, s, mp, flags) 660 struct thread *td; 661 int s; 662 struct msghdr *mp; 663 int flags; 664 { 665 struct mbuf *control; 666 struct sockaddr *to; 667 int error; 668 669 if (mp->msg_name != NULL) { 670 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 671 if (error) { 672 to = NULL; 673 goto bad; 674 } 675 mp->msg_name = to; 676 } else { 677 to = NULL; 678 } 679 680 if (mp->msg_control) { 681 if (mp->msg_controllen < sizeof(struct cmsghdr) 682 #ifdef COMPAT_OLDSOCK 683 && mp->msg_flags != MSG_COMPAT 684 #endif 685 ) { 686 error = EINVAL; 687 goto bad; 688 } 689 error = sockargs(&control, mp->msg_control, 690 mp->msg_controllen, MT_CONTROL); 691 if (error) 692 goto bad; 693 #ifdef COMPAT_OLDSOCK 694 if (mp->msg_flags == MSG_COMPAT) { 695 struct cmsghdr *cm; 696 697 M_PREPEND(control, sizeof(*cm), M_WAIT); 698 cm = mtod(control, struct cmsghdr *); 699 cm->cmsg_len = control->m_len; 700 cm->cmsg_level = SOL_SOCKET; 701 cm->cmsg_type = SCM_RIGHTS; 702 } 703 #endif 704 } else { 705 control = NULL; 706 } 707 708 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 709 710 bad: 711 if (to) 712 free(to, M_SONAME); 713 return (error); 714 } 715 716 int 717 kern_sendit(td, s, mp, flags, control, segflg) 718 struct thread *td; 719 int s; 720 struct msghdr *mp; 721 int flags; 722 struct mbuf *control; 723 enum uio_seg segflg; 724 { 725 struct file *fp; 726 struct uio auio; 727 struct iovec *iov; 728 struct socket *so; 729 int i; 730 int len, error; 731 #ifdef KTRACE 732 struct uio *ktruio = NULL; 733 #endif 734 735 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 736 if (error) 737 return (error); 738 so = (struct socket *)fp->f_data; 739 740 #ifdef MAC 741 if (mp->msg_name != NULL) { 742 error = mac_socket_check_connect(td->td_ucred, so, 743 mp->msg_name); 744 if (error) 745 goto bad; 746 } 747 error = mac_socket_check_send(td->td_ucred, so); 748 if (error) 749 goto bad; 750 #endif 751 752 auio.uio_iov = mp->msg_iov; 753 auio.uio_iovcnt = mp->msg_iovlen; 754 auio.uio_segflg = segflg; 755 auio.uio_rw = UIO_WRITE; 756 auio.uio_td = td; 757 auio.uio_offset = 0; /* XXX */ 758 auio.uio_resid = 0; 759 iov = mp->msg_iov; 760 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 761 if ((auio.uio_resid += iov->iov_len) < 0) { 762 error = EINVAL; 763 goto bad; 764 } 765 } 766 #ifdef KTRACE 767 if (KTRPOINT(td, KTR_GENIO)) 768 ktruio = cloneuio(&auio); 769 #endif 770 len = auio.uio_resid; 771 error = sosend(so, mp->msg_name, &auio, 0, control, flags, td); 772 if (error) { 773 if (auio.uio_resid != len && (error == ERESTART || 774 error == EINTR || error == EWOULDBLOCK)) 775 error = 0; 776 /* Generation of SIGPIPE can be controlled per socket */ 777 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 778 !(flags & MSG_NOSIGNAL)) { 779 PROC_LOCK(td->td_proc); 780 psignal(td->td_proc, SIGPIPE); 781 PROC_UNLOCK(td->td_proc); 782 } 783 } 784 if (error == 0) 785 td->td_retval[0] = len - auio.uio_resid; 786 #ifdef KTRACE 787 if (ktruio != NULL) { 788 ktruio->uio_resid = td->td_retval[0]; 789 ktrgenio(s, UIO_WRITE, ktruio, error); 790 } 791 #endif 792 bad: 793 fdrop(fp, td); 794 return (error); 795 } 796 797 int 798 sendto(td, uap) 799 struct thread *td; 800 struct sendto_args /* { 801 int s; 802 caddr_t buf; 803 size_t len; 804 int flags; 805 caddr_t to; 806 int tolen; 807 } */ *uap; 808 { 809 struct msghdr msg; 810 struct iovec aiov; 811 int error; 812 813 msg.msg_name = uap->to; 814 msg.msg_namelen = uap->tolen; 815 msg.msg_iov = &aiov; 816 msg.msg_iovlen = 1; 817 msg.msg_control = 0; 818 #ifdef COMPAT_OLDSOCK 819 msg.msg_flags = 0; 820 #endif 821 aiov.iov_base = uap->buf; 822 aiov.iov_len = uap->len; 823 error = sendit(td, uap->s, &msg, uap->flags); 824 return (error); 825 } 826 827 #ifdef COMPAT_OLDSOCK 828 int 829 osend(td, uap) 830 struct thread *td; 831 struct osend_args /* { 832 int s; 833 caddr_t buf; 834 int len; 835 int flags; 836 } */ *uap; 837 { 838 struct msghdr msg; 839 struct iovec aiov; 840 int error; 841 842 msg.msg_name = 0; 843 msg.msg_namelen = 0; 844 msg.msg_iov = &aiov; 845 msg.msg_iovlen = 1; 846 aiov.iov_base = uap->buf; 847 aiov.iov_len = uap->len; 848 msg.msg_control = 0; 849 msg.msg_flags = 0; 850 error = sendit(td, uap->s, &msg, uap->flags); 851 return (error); 852 } 853 854 int 855 osendmsg(td, uap) 856 struct thread *td; 857 struct osendmsg_args /* { 858 int s; 859 caddr_t msg; 860 int flags; 861 } */ *uap; 862 { 863 struct msghdr msg; 864 struct iovec *iov; 865 int error; 866 867 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 868 if (error) 869 return (error); 870 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 871 if (error) 872 return (error); 873 msg.msg_iov = iov; 874 msg.msg_flags = MSG_COMPAT; 875 error = sendit(td, uap->s, &msg, uap->flags); 876 free(iov, M_IOV); 877 return (error); 878 } 879 #endif 880 881 int 882 sendmsg(td, uap) 883 struct thread *td; 884 struct sendmsg_args /* { 885 int s; 886 caddr_t msg; 887 int flags; 888 } */ *uap; 889 { 890 struct msghdr msg; 891 struct iovec *iov; 892 int error; 893 894 error = copyin(uap->msg, &msg, sizeof (msg)); 895 if (error) 896 return (error); 897 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 898 if (error) 899 return (error); 900 msg.msg_iov = iov; 901 #ifdef COMPAT_OLDSOCK 902 msg.msg_flags = 0; 903 #endif 904 error = sendit(td, uap->s, &msg, uap->flags); 905 free(iov, M_IOV); 906 return (error); 907 } 908 909 int 910 kern_recvit(td, s, mp, fromseg, controlp) 911 struct thread *td; 912 int s; 913 struct msghdr *mp; 914 enum uio_seg fromseg; 915 struct mbuf **controlp; 916 { 917 struct uio auio; 918 struct iovec *iov; 919 int i; 920 socklen_t len; 921 int error; 922 struct mbuf *m, *control = 0; 923 caddr_t ctlbuf; 924 struct file *fp; 925 struct socket *so; 926 struct sockaddr *fromsa = 0; 927 #ifdef KTRACE 928 struct uio *ktruio = NULL; 929 #endif 930 931 if(controlp != NULL) 932 *controlp = 0; 933 934 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 935 if (error) 936 return (error); 937 so = fp->f_data; 938 939 #ifdef MAC 940 error = mac_socket_check_receive(td->td_ucred, so); 941 if (error) { 942 fdrop(fp, td); 943 return (error); 944 } 945 #endif 946 947 auio.uio_iov = mp->msg_iov; 948 auio.uio_iovcnt = mp->msg_iovlen; 949 auio.uio_segflg = UIO_USERSPACE; 950 auio.uio_rw = UIO_READ; 951 auio.uio_td = td; 952 auio.uio_offset = 0; /* XXX */ 953 auio.uio_resid = 0; 954 iov = mp->msg_iov; 955 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 956 if ((auio.uio_resid += iov->iov_len) < 0) { 957 fdrop(fp, td); 958 return (EINVAL); 959 } 960 } 961 #ifdef KTRACE 962 if (KTRPOINT(td, KTR_GENIO)) 963 ktruio = cloneuio(&auio); 964 #endif 965 len = auio.uio_resid; 966 CURVNET_SET(so->so_vnet); 967 error = soreceive(so, &fromsa, &auio, (struct mbuf **)0, 968 (mp->msg_control || controlp) ? &control : (struct mbuf **)0, 969 &mp->msg_flags); 970 CURVNET_RESTORE(); 971 if (error) { 972 if (auio.uio_resid != (int)len && (error == ERESTART || 973 error == EINTR || error == EWOULDBLOCK)) 974 error = 0; 975 } 976 #ifdef KTRACE 977 if (ktruio != NULL) { 978 ktruio->uio_resid = (int)len - auio.uio_resid; 979 ktrgenio(s, UIO_READ, ktruio, error); 980 } 981 #endif 982 if (error) 983 goto out; 984 td->td_retval[0] = (int)len - auio.uio_resid; 985 if (mp->msg_name) { 986 len = mp->msg_namelen; 987 if (len <= 0 || fromsa == 0) 988 len = 0; 989 else { 990 /* save sa_len before it is destroyed by MSG_COMPAT */ 991 len = MIN(len, fromsa->sa_len); 992 #ifdef COMPAT_OLDSOCK 993 if (mp->msg_flags & MSG_COMPAT) 994 ((struct osockaddr *)fromsa)->sa_family = 995 fromsa->sa_family; 996 #endif 997 if (fromseg == UIO_USERSPACE) { 998 error = copyout(fromsa, mp->msg_name, 999 (unsigned)len); 1000 if (error) 1001 goto out; 1002 } else 1003 bcopy(fromsa, mp->msg_name, len); 1004 } 1005 mp->msg_namelen = len; 1006 } 1007 if (mp->msg_control && controlp == NULL) { 1008 #ifdef COMPAT_OLDSOCK 1009 /* 1010 * We assume that old recvmsg calls won't receive access 1011 * rights and other control info, esp. as control info 1012 * is always optional and those options didn't exist in 4.3. 1013 * If we receive rights, trim the cmsghdr; anything else 1014 * is tossed. 1015 */ 1016 if (control && mp->msg_flags & MSG_COMPAT) { 1017 if (mtod(control, struct cmsghdr *)->cmsg_level != 1018 SOL_SOCKET || 1019 mtod(control, struct cmsghdr *)->cmsg_type != 1020 SCM_RIGHTS) { 1021 mp->msg_controllen = 0; 1022 goto out; 1023 } 1024 control->m_len -= sizeof (struct cmsghdr); 1025 control->m_data += sizeof (struct cmsghdr); 1026 } 1027 #endif 1028 len = mp->msg_controllen; 1029 m = control; 1030 mp->msg_controllen = 0; 1031 ctlbuf = mp->msg_control; 1032 1033 while (m && len > 0) { 1034 unsigned int tocopy; 1035 1036 if (len >= m->m_len) 1037 tocopy = m->m_len; 1038 else { 1039 mp->msg_flags |= MSG_CTRUNC; 1040 tocopy = len; 1041 } 1042 1043 if ((error = copyout(mtod(m, caddr_t), 1044 ctlbuf, tocopy)) != 0) 1045 goto out; 1046 1047 ctlbuf += tocopy; 1048 len -= tocopy; 1049 m = m->m_next; 1050 } 1051 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1052 } 1053 out: 1054 fdrop(fp, td); 1055 #ifdef KTRACE 1056 if (fromsa && KTRPOINT(td, KTR_STRUCT)) 1057 ktrsockaddr(fromsa); 1058 #endif 1059 if (fromsa) 1060 free(fromsa, M_SONAME); 1061 1062 if (error == 0 && controlp != NULL) 1063 *controlp = control; 1064 else if (control) 1065 m_freem(control); 1066 1067 return (error); 1068 } 1069 1070 static int 1071 recvit(td, s, mp, namelenp) 1072 struct thread *td; 1073 int s; 1074 struct msghdr *mp; 1075 void *namelenp; 1076 { 1077 int error; 1078 1079 error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL); 1080 if (error) 1081 return (error); 1082 if (namelenp) { 1083 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t)); 1084 #ifdef COMPAT_OLDSOCK 1085 if (mp->msg_flags & MSG_COMPAT) 1086 error = 0; /* old recvfrom didn't check */ 1087 #endif 1088 } 1089 return (error); 1090 } 1091 1092 int 1093 recvfrom(td, uap) 1094 struct thread *td; 1095 struct recvfrom_args /* { 1096 int s; 1097 caddr_t buf; 1098 size_t len; 1099 int flags; 1100 struct sockaddr * __restrict from; 1101 socklen_t * __restrict fromlenaddr; 1102 } */ *uap; 1103 { 1104 struct msghdr msg; 1105 struct iovec aiov; 1106 int error; 1107 1108 if (uap->fromlenaddr) { 1109 error = copyin(uap->fromlenaddr, 1110 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1111 if (error) 1112 goto done2; 1113 } else { 1114 msg.msg_namelen = 0; 1115 } 1116 msg.msg_name = uap->from; 1117 msg.msg_iov = &aiov; 1118 msg.msg_iovlen = 1; 1119 aiov.iov_base = uap->buf; 1120 aiov.iov_len = uap->len; 1121 msg.msg_control = 0; 1122 msg.msg_flags = uap->flags; 1123 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1124 done2: 1125 return(error); 1126 } 1127 1128 #ifdef COMPAT_OLDSOCK 1129 int 1130 orecvfrom(td, uap) 1131 struct thread *td; 1132 struct recvfrom_args *uap; 1133 { 1134 1135 uap->flags |= MSG_COMPAT; 1136 return (recvfrom(td, uap)); 1137 } 1138 #endif 1139 1140 #ifdef COMPAT_OLDSOCK 1141 int 1142 orecv(td, uap) 1143 struct thread *td; 1144 struct orecv_args /* { 1145 int s; 1146 caddr_t buf; 1147 int len; 1148 int flags; 1149 } */ *uap; 1150 { 1151 struct msghdr msg; 1152 struct iovec aiov; 1153 int error; 1154 1155 msg.msg_name = 0; 1156 msg.msg_namelen = 0; 1157 msg.msg_iov = &aiov; 1158 msg.msg_iovlen = 1; 1159 aiov.iov_base = uap->buf; 1160 aiov.iov_len = uap->len; 1161 msg.msg_control = 0; 1162 msg.msg_flags = uap->flags; 1163 error = recvit(td, uap->s, &msg, NULL); 1164 return (error); 1165 } 1166 1167 /* 1168 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1169 * overlays the new one, missing only the flags, and with the (old) access 1170 * rights where the control fields are now. 1171 */ 1172 int 1173 orecvmsg(td, uap) 1174 struct thread *td; 1175 struct orecvmsg_args /* { 1176 int s; 1177 struct omsghdr *msg; 1178 int flags; 1179 } */ *uap; 1180 { 1181 struct msghdr msg; 1182 struct iovec *iov; 1183 int error; 1184 1185 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1186 if (error) 1187 return (error); 1188 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1189 if (error) 1190 return (error); 1191 msg.msg_flags = uap->flags | MSG_COMPAT; 1192 msg.msg_iov = iov; 1193 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1194 if (msg.msg_controllen && error == 0) 1195 error = copyout(&msg.msg_controllen, 1196 &uap->msg->msg_accrightslen, sizeof (int)); 1197 free(iov, M_IOV); 1198 return (error); 1199 } 1200 #endif 1201 1202 int 1203 recvmsg(td, uap) 1204 struct thread *td; 1205 struct recvmsg_args /* { 1206 int s; 1207 struct msghdr *msg; 1208 int flags; 1209 } */ *uap; 1210 { 1211 struct msghdr msg; 1212 struct iovec *uiov, *iov; 1213 int error; 1214 1215 error = copyin(uap->msg, &msg, sizeof (msg)); 1216 if (error) 1217 return (error); 1218 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1219 if (error) 1220 return (error); 1221 msg.msg_flags = uap->flags; 1222 #ifdef COMPAT_OLDSOCK 1223 msg.msg_flags &= ~MSG_COMPAT; 1224 #endif 1225 uiov = msg.msg_iov; 1226 msg.msg_iov = iov; 1227 error = recvit(td, uap->s, &msg, NULL); 1228 if (error == 0) { 1229 msg.msg_iov = uiov; 1230 error = copyout(&msg, uap->msg, sizeof(msg)); 1231 } 1232 free(iov, M_IOV); 1233 return (error); 1234 } 1235 1236 /* ARGSUSED */ 1237 int 1238 shutdown(td, uap) 1239 struct thread *td; 1240 struct shutdown_args /* { 1241 int s; 1242 int how; 1243 } */ *uap; 1244 { 1245 struct socket *so; 1246 struct file *fp; 1247 int error; 1248 1249 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); 1250 if (error == 0) { 1251 so = fp->f_data; 1252 error = soshutdown(so, uap->how); 1253 fdrop(fp, td); 1254 } 1255 return (error); 1256 } 1257 1258 /* ARGSUSED */ 1259 int 1260 setsockopt(td, uap) 1261 struct thread *td; 1262 struct setsockopt_args /* { 1263 int s; 1264 int level; 1265 int name; 1266 caddr_t val; 1267 int valsize; 1268 } */ *uap; 1269 { 1270 1271 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1272 uap->val, UIO_USERSPACE, uap->valsize)); 1273 } 1274 1275 int 1276 kern_setsockopt(td, s, level, name, val, valseg, valsize) 1277 struct thread *td; 1278 int s; 1279 int level; 1280 int name; 1281 void *val; 1282 enum uio_seg valseg; 1283 socklen_t valsize; 1284 { 1285 int error; 1286 struct socket *so; 1287 struct file *fp; 1288 struct sockopt sopt; 1289 1290 if (val == NULL && valsize != 0) 1291 return (EFAULT); 1292 if ((int)valsize < 0) 1293 return (EINVAL); 1294 1295 sopt.sopt_dir = SOPT_SET; 1296 sopt.sopt_level = level; 1297 sopt.sopt_name = name; 1298 sopt.sopt_val = val; 1299 sopt.sopt_valsize = valsize; 1300 switch (valseg) { 1301 case UIO_USERSPACE: 1302 sopt.sopt_td = td; 1303 break; 1304 case UIO_SYSSPACE: 1305 sopt.sopt_td = NULL; 1306 break; 1307 default: 1308 panic("kern_setsockopt called with bad valseg"); 1309 } 1310 1311 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 1312 if (error == 0) { 1313 so = fp->f_data; 1314 CURVNET_SET(so->so_vnet); 1315 error = sosetopt(so, &sopt); 1316 CURVNET_RESTORE(); 1317 fdrop(fp, td); 1318 } 1319 return(error); 1320 } 1321 1322 /* ARGSUSED */ 1323 int 1324 getsockopt(td, uap) 1325 struct thread *td; 1326 struct getsockopt_args /* { 1327 int s; 1328 int level; 1329 int name; 1330 void * __restrict val; 1331 socklen_t * __restrict avalsize; 1332 } */ *uap; 1333 { 1334 socklen_t valsize; 1335 int error; 1336 1337 if (uap->val) { 1338 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1339 if (error) 1340 return (error); 1341 } 1342 1343 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1344 uap->val, UIO_USERSPACE, &valsize); 1345 1346 if (error == 0) 1347 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1348 return (error); 1349 } 1350 1351 /* 1352 * Kernel version of getsockopt. 1353 * optval can be a userland or userspace. optlen is always a kernel pointer. 1354 */ 1355 int 1356 kern_getsockopt(td, s, level, name, val, valseg, valsize) 1357 struct thread *td; 1358 int s; 1359 int level; 1360 int name; 1361 void *val; 1362 enum uio_seg valseg; 1363 socklen_t *valsize; 1364 { 1365 int error; 1366 struct socket *so; 1367 struct file *fp; 1368 struct sockopt sopt; 1369 1370 if (val == NULL) 1371 *valsize = 0; 1372 if ((int)*valsize < 0) 1373 return (EINVAL); 1374 1375 sopt.sopt_dir = SOPT_GET; 1376 sopt.sopt_level = level; 1377 sopt.sopt_name = name; 1378 sopt.sopt_val = val; 1379 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1380 switch (valseg) { 1381 case UIO_USERSPACE: 1382 sopt.sopt_td = td; 1383 break; 1384 case UIO_SYSSPACE: 1385 sopt.sopt_td = NULL; 1386 break; 1387 default: 1388 panic("kern_getsockopt called with bad valseg"); 1389 } 1390 1391 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 1392 if (error == 0) { 1393 so = fp->f_data; 1394 CURVNET_SET(so->so_vnet); 1395 error = sogetopt(so, &sopt); 1396 CURVNET_RESTORE(); 1397 *valsize = sopt.sopt_valsize; 1398 fdrop(fp, td); 1399 } 1400 return (error); 1401 } 1402 1403 /* 1404 * getsockname1() - Get socket name. 1405 */ 1406 /* ARGSUSED */ 1407 static int 1408 getsockname1(td, uap, compat) 1409 struct thread *td; 1410 struct getsockname_args /* { 1411 int fdes; 1412 struct sockaddr * __restrict asa; 1413 socklen_t * __restrict alen; 1414 } */ *uap; 1415 int compat; 1416 { 1417 struct sockaddr *sa; 1418 socklen_t len; 1419 int error; 1420 1421 error = copyin(uap->alen, &len, sizeof(len)); 1422 if (error) 1423 return (error); 1424 1425 error = kern_getsockname(td, uap->fdes, &sa, &len); 1426 if (error) 1427 return (error); 1428 1429 if (len != 0) { 1430 #ifdef COMPAT_OLDSOCK 1431 if (compat) 1432 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1433 #endif 1434 error = copyout(sa, uap->asa, (u_int)len); 1435 } 1436 free(sa, M_SONAME); 1437 if (error == 0) 1438 error = copyout(&len, uap->alen, sizeof(len)); 1439 return (error); 1440 } 1441 1442 int 1443 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, 1444 socklen_t *alen) 1445 { 1446 struct socket *so; 1447 struct file *fp; 1448 socklen_t len; 1449 int error; 1450 1451 if (*alen < 0) 1452 return (EINVAL); 1453 1454 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 1455 if (error) 1456 return (error); 1457 so = fp->f_data; 1458 *sa = NULL; 1459 CURVNET_SET(so->so_vnet); 1460 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa); 1461 CURVNET_RESTORE(); 1462 if (error) 1463 goto bad; 1464 if (*sa == NULL) 1465 len = 0; 1466 else 1467 len = MIN(*alen, (*sa)->sa_len); 1468 *alen = len; 1469 #ifdef KTRACE 1470 if (KTRPOINT(td, KTR_STRUCT)) 1471 ktrsockaddr(*sa); 1472 #endif 1473 bad: 1474 fdrop(fp, td); 1475 if (error && *sa) { 1476 free(*sa, M_SONAME); 1477 *sa = NULL; 1478 } 1479 return (error); 1480 } 1481 1482 int 1483 getsockname(td, uap) 1484 struct thread *td; 1485 struct getsockname_args *uap; 1486 { 1487 1488 return (getsockname1(td, uap, 0)); 1489 } 1490 1491 #ifdef COMPAT_OLDSOCK 1492 int 1493 ogetsockname(td, uap) 1494 struct thread *td; 1495 struct getsockname_args *uap; 1496 { 1497 1498 return (getsockname1(td, uap, 1)); 1499 } 1500 #endif /* COMPAT_OLDSOCK */ 1501 1502 /* 1503 * getpeername1() - Get name of peer for connected socket. 1504 */ 1505 /* ARGSUSED */ 1506 static int 1507 getpeername1(td, uap, compat) 1508 struct thread *td; 1509 struct getpeername_args /* { 1510 int fdes; 1511 struct sockaddr * __restrict asa; 1512 socklen_t * __restrict alen; 1513 } */ *uap; 1514 int compat; 1515 { 1516 struct sockaddr *sa; 1517 socklen_t len; 1518 int error; 1519 1520 error = copyin(uap->alen, &len, sizeof (len)); 1521 if (error) 1522 return (error); 1523 1524 error = kern_getpeername(td, uap->fdes, &sa, &len); 1525 if (error) 1526 return (error); 1527 1528 if (len != 0) { 1529 #ifdef COMPAT_OLDSOCK 1530 if (compat) 1531 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1532 #endif 1533 error = copyout(sa, uap->asa, (u_int)len); 1534 } 1535 free(sa, M_SONAME); 1536 if (error == 0) 1537 error = copyout(&len, uap->alen, sizeof(len)); 1538 return (error); 1539 } 1540 1541 int 1542 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, 1543 socklen_t *alen) 1544 { 1545 struct socket *so; 1546 struct file *fp; 1547 socklen_t len; 1548 int error; 1549 1550 if (*alen < 0) 1551 return (EINVAL); 1552 1553 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 1554 if (error) 1555 return (error); 1556 so = fp->f_data; 1557 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1558 error = ENOTCONN; 1559 goto done; 1560 } 1561 *sa = NULL; 1562 CURVNET_SET(so->so_vnet); 1563 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa); 1564 CURVNET_RESTORE(); 1565 if (error) 1566 goto bad; 1567 if (*sa == NULL) 1568 len = 0; 1569 else 1570 len = MIN(*alen, (*sa)->sa_len); 1571 *alen = len; 1572 #ifdef KTRACE 1573 if (KTRPOINT(td, KTR_STRUCT)) 1574 ktrsockaddr(*sa); 1575 #endif 1576 bad: 1577 if (error && *sa) { 1578 free(*sa, M_SONAME); 1579 *sa = NULL; 1580 } 1581 done: 1582 fdrop(fp, td); 1583 return (error); 1584 } 1585 1586 int 1587 getpeername(td, uap) 1588 struct thread *td; 1589 struct getpeername_args *uap; 1590 { 1591 1592 return (getpeername1(td, uap, 0)); 1593 } 1594 1595 #ifdef COMPAT_OLDSOCK 1596 int 1597 ogetpeername(td, uap) 1598 struct thread *td; 1599 struct ogetpeername_args *uap; 1600 { 1601 1602 /* XXX uap should have type `getpeername_args *' to begin with. */ 1603 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1604 } 1605 #endif /* COMPAT_OLDSOCK */ 1606 1607 int 1608 sockargs(mp, buf, buflen, type) 1609 struct mbuf **mp; 1610 caddr_t buf; 1611 int buflen, type; 1612 { 1613 struct sockaddr *sa; 1614 struct mbuf *m; 1615 int error; 1616 1617 if ((u_int)buflen > MLEN) { 1618 #ifdef COMPAT_OLDSOCK 1619 if (type == MT_SONAME && (u_int)buflen <= 112) 1620 buflen = MLEN; /* unix domain compat. hack */ 1621 else 1622 #endif 1623 if ((u_int)buflen > MCLBYTES) 1624 return (EINVAL); 1625 } 1626 m = m_get(M_WAIT, type); 1627 if ((u_int)buflen > MLEN) 1628 MCLGET(m, M_WAIT); 1629 m->m_len = buflen; 1630 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1631 if (error) 1632 (void) m_free(m); 1633 else { 1634 *mp = m; 1635 if (type == MT_SONAME) { 1636 sa = mtod(m, struct sockaddr *); 1637 1638 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1639 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1640 sa->sa_family = sa->sa_len; 1641 #endif 1642 sa->sa_len = buflen; 1643 } 1644 } 1645 return (error); 1646 } 1647 1648 int 1649 getsockaddr(namp, uaddr, len) 1650 struct sockaddr **namp; 1651 caddr_t uaddr; 1652 size_t len; 1653 { 1654 struct sockaddr *sa; 1655 int error; 1656 1657 if (len > SOCK_MAXADDRLEN) 1658 return (ENAMETOOLONG); 1659 if (len < offsetof(struct sockaddr, sa_data[0])) 1660 return (EINVAL); 1661 sa = malloc(len, M_SONAME, M_WAITOK); 1662 error = copyin(uaddr, sa, len); 1663 if (error) { 1664 free(sa, M_SONAME); 1665 } else { 1666 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1667 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1668 sa->sa_family = sa->sa_len; 1669 #endif 1670 sa->sa_len = len; 1671 *namp = sa; 1672 } 1673 return (error); 1674 } 1675 1676 #include <sys/condvar.h> 1677 1678 struct sendfile_sync { 1679 struct mtx mtx; 1680 struct cv cv; 1681 unsigned count; 1682 }; 1683 1684 /* 1685 * Detach mapped page and release resources back to the system. 1686 */ 1687 void 1688 sf_buf_mext(void *addr, void *args) 1689 { 1690 vm_page_t m; 1691 struct sendfile_sync *sfs; 1692 1693 m = sf_buf_page(args); 1694 sf_buf_free(args); 1695 vm_page_lock_queues(); 1696 vm_page_unwire(m, 0); 1697 /* 1698 * Check for the object going away on us. This can 1699 * happen since we don't hold a reference to it. 1700 * If so, we're responsible for freeing the page. 1701 */ 1702 if (m->wire_count == 0 && m->object == NULL) 1703 vm_page_free(m); 1704 vm_page_unlock_queues(); 1705 if (addr == NULL) 1706 return; 1707 sfs = addr; 1708 mtx_lock(&sfs->mtx); 1709 KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0")); 1710 if (--sfs->count == 0) 1711 cv_signal(&sfs->cv); 1712 mtx_unlock(&sfs->mtx); 1713 } 1714 1715 /* 1716 * sendfile(2) 1717 * 1718 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1719 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1720 * 1721 * Send a file specified by 'fd' and starting at 'offset' to a socket 1722 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes == 1723 * 0. Optionally add a header and/or trailer to the socket output. If 1724 * specified, write the total number of bytes sent into *sbytes. 1725 */ 1726 int 1727 sendfile(struct thread *td, struct sendfile_args *uap) 1728 { 1729 1730 return (do_sendfile(td, uap, 0)); 1731 } 1732 1733 static int 1734 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1735 { 1736 struct sf_hdtr hdtr; 1737 struct uio *hdr_uio, *trl_uio; 1738 int error; 1739 1740 hdr_uio = trl_uio = NULL; 1741 1742 if (uap->hdtr != NULL) { 1743 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1744 if (error) 1745 goto out; 1746 if (hdtr.headers != NULL) { 1747 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1748 if (error) 1749 goto out; 1750 } 1751 if (hdtr.trailers != NULL) { 1752 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); 1753 if (error) 1754 goto out; 1755 1756 } 1757 } 1758 1759 error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat); 1760 out: 1761 if (hdr_uio) 1762 free(hdr_uio, M_IOV); 1763 if (trl_uio) 1764 free(trl_uio, M_IOV); 1765 return (error); 1766 } 1767 1768 #ifdef COMPAT_FREEBSD4 1769 int 1770 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1771 { 1772 struct sendfile_args args; 1773 1774 args.fd = uap->fd; 1775 args.s = uap->s; 1776 args.offset = uap->offset; 1777 args.nbytes = uap->nbytes; 1778 args.hdtr = uap->hdtr; 1779 args.sbytes = uap->sbytes; 1780 args.flags = uap->flags; 1781 1782 return (do_sendfile(td, &args, 1)); 1783 } 1784 #endif /* COMPAT_FREEBSD4 */ 1785 1786 int 1787 kern_sendfile(struct thread *td, struct sendfile_args *uap, 1788 struct uio *hdr_uio, struct uio *trl_uio, int compat) 1789 { 1790 struct file *sock_fp; 1791 struct vnode *vp; 1792 struct vm_object *obj = NULL; 1793 struct socket *so = NULL; 1794 struct mbuf *m = NULL; 1795 struct sf_buf *sf; 1796 struct vm_page *pg; 1797 off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0; 1798 int error, hdrlen = 0, mnw = 0; 1799 int vfslocked; 1800 struct sendfile_sync *sfs = NULL; 1801 1802 /* 1803 * The file descriptor must be a regular file and have a 1804 * backing VM object. 1805 * File offset must be positive. If it goes beyond EOF 1806 * we send only the header/trailer and no payload data. 1807 */ 1808 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1809 goto out; 1810 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1811 vn_lock(vp, LK_SHARED | LK_RETRY); 1812 if (vp->v_type == VREG) { 1813 obj = vp->v_object; 1814 if (obj != NULL) { 1815 /* 1816 * Temporarily increase the backing VM 1817 * object's reference count so that a forced 1818 * reclamation of its vnode does not 1819 * immediately destroy it. 1820 */ 1821 VM_OBJECT_LOCK(obj); 1822 if ((obj->flags & OBJ_DEAD) == 0) { 1823 vm_object_reference_locked(obj); 1824 VM_OBJECT_UNLOCK(obj); 1825 } else { 1826 VM_OBJECT_UNLOCK(obj); 1827 obj = NULL; 1828 } 1829 } 1830 } 1831 VOP_UNLOCK(vp, 0); 1832 VFS_UNLOCK_GIANT(vfslocked); 1833 if (obj == NULL) { 1834 error = EINVAL; 1835 goto out; 1836 } 1837 if (uap->offset < 0) { 1838 error = EINVAL; 1839 goto out; 1840 } 1841 1842 /* 1843 * The socket must be a stream socket and connected. 1844 * Remember if it a blocking or non-blocking socket. 1845 */ 1846 if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp, 1847 NULL)) != 0) 1848 goto out; 1849 so = sock_fp->f_data; 1850 if (so->so_type != SOCK_STREAM) { 1851 error = EINVAL; 1852 goto out; 1853 } 1854 if ((so->so_state & SS_ISCONNECTED) == 0) { 1855 error = ENOTCONN; 1856 goto out; 1857 } 1858 /* 1859 * Do not wait on memory allocations but return ENOMEM for 1860 * caller to retry later. 1861 * XXX: Experimental. 1862 */ 1863 if (uap->flags & SF_MNOWAIT) 1864 mnw = 1; 1865 1866 if (uap->flags & SF_SYNC) { 1867 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK); 1868 memset(sfs, 0, sizeof *sfs); 1869 mtx_init(&sfs->mtx, "sendfile", MTX_DEF, 0); 1870 cv_init(&sfs->cv, "sendfile"); 1871 } 1872 1873 #ifdef MAC 1874 error = mac_socket_check_send(td->td_ucred, so); 1875 if (error) 1876 goto out; 1877 #endif 1878 1879 /* If headers are specified copy them into mbufs. */ 1880 if (hdr_uio != NULL) { 1881 hdr_uio->uio_td = td; 1882 hdr_uio->uio_rw = UIO_WRITE; 1883 if (hdr_uio->uio_resid > 0) { 1884 /* 1885 * In FBSD < 5.0 the nbytes to send also included 1886 * the header. If compat is specified subtract the 1887 * header size from nbytes. 1888 */ 1889 if (compat) { 1890 if (uap->nbytes > hdr_uio->uio_resid) 1891 uap->nbytes -= hdr_uio->uio_resid; 1892 else 1893 uap->nbytes = 0; 1894 } 1895 m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK), 1896 0, 0, 0); 1897 if (m == NULL) { 1898 error = mnw ? EAGAIN : ENOBUFS; 1899 goto out; 1900 } 1901 hdrlen = m_length(m, NULL); 1902 } 1903 } 1904 1905 /* 1906 * Protect against multiple writers to the socket. 1907 * 1908 * XXXRW: Historically this has assumed non-interruptibility, so now 1909 * we implement that, but possibly shouldn't. 1910 */ 1911 (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); 1912 1913 /* 1914 * Loop through the pages of the file, starting with the requested 1915 * offset. Get a file page (do I/O if necessary), map the file page 1916 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1917 * it on the socket. 1918 * This is done in two loops. The inner loop turns as many pages 1919 * as it can, up to available socket buffer space, without blocking 1920 * into mbufs to have it bulk delivered into the socket send buffer. 1921 * The outer loop checks the state and available space of the socket 1922 * and takes care of the overall progress. 1923 */ 1924 for (off = uap->offset, rem = uap->nbytes; ; ) { 1925 int loopbytes = 0; 1926 int space = 0; 1927 int done = 0; 1928 1929 /* 1930 * Check the socket state for ongoing connection, 1931 * no errors and space in socket buffer. 1932 * If space is low allow for the remainder of the 1933 * file to be processed if it fits the socket buffer. 1934 * Otherwise block in waiting for sufficient space 1935 * to proceed, or if the socket is nonblocking, return 1936 * to userland with EAGAIN while reporting how far 1937 * we've come. 1938 * We wait until the socket buffer has significant free 1939 * space to do bulk sends. This makes good use of file 1940 * system read ahead and allows packet segmentation 1941 * offloading hardware to take over lots of work. If 1942 * we were not careful here we would send off only one 1943 * sfbuf at a time. 1944 */ 1945 SOCKBUF_LOCK(&so->so_snd); 1946 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2) 1947 so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2; 1948 retry_space: 1949 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 1950 error = EPIPE; 1951 SOCKBUF_UNLOCK(&so->so_snd); 1952 goto done; 1953 } else if (so->so_error) { 1954 error = so->so_error; 1955 so->so_error = 0; 1956 SOCKBUF_UNLOCK(&so->so_snd); 1957 goto done; 1958 } 1959 space = sbspace(&so->so_snd); 1960 if (space < rem && 1961 (space <= 0 || 1962 space < so->so_snd.sb_lowat)) { 1963 if (so->so_state & SS_NBIO) { 1964 SOCKBUF_UNLOCK(&so->so_snd); 1965 error = EAGAIN; 1966 goto done; 1967 } 1968 /* 1969 * sbwait drops the lock while sleeping. 1970 * When we loop back to retry_space the 1971 * state may have changed and we retest 1972 * for it. 1973 */ 1974 error = sbwait(&so->so_snd); 1975 /* 1976 * An error from sbwait usually indicates that we've 1977 * been interrupted by a signal. If we've sent anything 1978 * then return bytes sent, otherwise return the error. 1979 */ 1980 if (error) { 1981 SOCKBUF_UNLOCK(&so->so_snd); 1982 goto done; 1983 } 1984 goto retry_space; 1985 } 1986 SOCKBUF_UNLOCK(&so->so_snd); 1987 1988 /* 1989 * Reduce space in the socket buffer by the size of 1990 * the header mbuf chain. 1991 * hdrlen is set to 0 after the first loop. 1992 */ 1993 space -= hdrlen; 1994 1995 /* 1996 * Loop and construct maximum sized mbuf chain to be bulk 1997 * dumped into socket buffer. 1998 */ 1999 while(space > loopbytes) { 2000 vm_pindex_t pindex; 2001 vm_offset_t pgoff; 2002 struct mbuf *m0; 2003 2004 VM_OBJECT_LOCK(obj); 2005 /* 2006 * Calculate the amount to transfer. 2007 * Not to exceed a page, the EOF, 2008 * or the passed in nbytes. 2009 */ 2010 pgoff = (vm_offset_t)(off & PAGE_MASK); 2011 xfsize = omin(PAGE_SIZE - pgoff, 2012 obj->un_pager.vnp.vnp_size - uap->offset - 2013 fsbytes - loopbytes); 2014 if (uap->nbytes) 2015 rem = (uap->nbytes - fsbytes - loopbytes); 2016 else 2017 rem = obj->un_pager.vnp.vnp_size - 2018 uap->offset - fsbytes - loopbytes; 2019 xfsize = omin(rem, xfsize); 2020 if (xfsize <= 0) { 2021 VM_OBJECT_UNLOCK(obj); 2022 done = 1; /* all data sent */ 2023 break; 2024 } 2025 /* 2026 * Don't overflow the send buffer. 2027 * Stop here and send out what we've 2028 * already got. 2029 */ 2030 if (space < loopbytes + xfsize) { 2031 VM_OBJECT_UNLOCK(obj); 2032 break; 2033 } 2034 2035 /* 2036 * Attempt to look up the page. Allocate 2037 * if not found or wait and loop if busy. 2038 */ 2039 pindex = OFF_TO_IDX(off); 2040 pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY | 2041 VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY); 2042 2043 /* 2044 * Check if page is valid for what we need, 2045 * otherwise initiate I/O. 2046 * If we already turned some pages into mbufs, 2047 * send them off before we come here again and 2048 * block. 2049 */ 2050 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) 2051 VM_OBJECT_UNLOCK(obj); 2052 else if (m != NULL) 2053 error = EAGAIN; /* send what we already got */ 2054 else if (uap->flags & SF_NODISKIO) 2055 error = EBUSY; 2056 else { 2057 int bsize, resid; 2058 2059 /* 2060 * Ensure that our page is still around 2061 * when the I/O completes. 2062 */ 2063 vm_page_io_start(pg); 2064 VM_OBJECT_UNLOCK(obj); 2065 2066 /* 2067 * Get the page from backing store. 2068 */ 2069 bsize = vp->v_mount->mnt_stat.f_iosize; 2070 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2071 vn_lock(vp, LK_SHARED | LK_RETRY); 2072 2073 /* 2074 * XXXMAC: Because we don't have fp->f_cred 2075 * here, we pass in NOCRED. This is probably 2076 * wrong, but is consistent with our original 2077 * implementation. 2078 */ 2079 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 2080 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 2081 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 2082 td->td_ucred, NOCRED, &resid, td); 2083 VOP_UNLOCK(vp, 0); 2084 VFS_UNLOCK_GIANT(vfslocked); 2085 VM_OBJECT_LOCK(obj); 2086 vm_page_io_finish(pg); 2087 if (!error) 2088 VM_OBJECT_UNLOCK(obj); 2089 mbstat.sf_iocnt++; 2090 } 2091 if (error) { 2092 vm_page_lock_queues(); 2093 vm_page_unwire(pg, 0); 2094 /* 2095 * See if anyone else might know about 2096 * this page. If not and it is not valid, 2097 * then free it. 2098 */ 2099 if (pg->wire_count == 0 && pg->valid == 0 && 2100 pg->busy == 0 && !(pg->oflags & VPO_BUSY) && 2101 pg->hold_count == 0) { 2102 vm_page_free(pg); 2103 } 2104 vm_page_unlock_queues(); 2105 VM_OBJECT_UNLOCK(obj); 2106 if (error == EAGAIN) 2107 error = 0; /* not a real error */ 2108 break; 2109 } 2110 2111 /* 2112 * Get a sendfile buf. We usually wait as long 2113 * as necessary, but this wait can be interrupted. 2114 */ 2115 if ((sf = sf_buf_alloc(pg, 2116 (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) { 2117 mbstat.sf_allocfail++; 2118 vm_page_lock_queues(); 2119 vm_page_unwire(pg, 0); 2120 /* 2121 * XXX: Not same check as above!? 2122 */ 2123 if (pg->wire_count == 0 && pg->object == NULL) 2124 vm_page_free(pg); 2125 vm_page_unlock_queues(); 2126 error = (mnw ? EAGAIN : EINTR); 2127 break; 2128 } 2129 2130 /* 2131 * Get an mbuf and set it up as having 2132 * external storage. 2133 */ 2134 m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); 2135 if (m0 == NULL) { 2136 error = (mnw ? EAGAIN : ENOBUFS); 2137 sf_buf_mext((void *)sf_buf_kva(sf), sf); 2138 break; 2139 } 2140 MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, 2141 sfs, sf, M_RDONLY, EXT_SFBUF); 2142 m0->m_data = (char *)sf_buf_kva(sf) + pgoff; 2143 m0->m_len = xfsize; 2144 2145 /* Append to mbuf chain. */ 2146 if (m != NULL) 2147 m_cat(m, m0); 2148 else 2149 m = m0; 2150 2151 /* Keep track of bits processed. */ 2152 loopbytes += xfsize; 2153 off += xfsize; 2154 2155 if (sfs != NULL) { 2156 mtx_lock(&sfs->mtx); 2157 sfs->count++; 2158 mtx_unlock(&sfs->mtx); 2159 } 2160 } 2161 2162 /* Add the buffer chain to the socket buffer. */ 2163 if (m != NULL) { 2164 int mlen, err; 2165 2166 mlen = m_length(m, NULL); 2167 SOCKBUF_LOCK(&so->so_snd); 2168 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2169 error = EPIPE; 2170 SOCKBUF_UNLOCK(&so->so_snd); 2171 goto done; 2172 } 2173 SOCKBUF_UNLOCK(&so->so_snd); 2174 CURVNET_SET(so->so_vnet); 2175 /* Avoid error aliasing. */ 2176 err = (*so->so_proto->pr_usrreqs->pru_send) 2177 (so, 0, m, NULL, NULL, td); 2178 CURVNET_RESTORE(); 2179 if (err == 0) { 2180 /* 2181 * We need two counters to get the 2182 * file offset and nbytes to send 2183 * right: 2184 * - sbytes contains the total amount 2185 * of bytes sent, including headers. 2186 * - fsbytes contains the total amount 2187 * of bytes sent from the file. 2188 */ 2189 sbytes += mlen; 2190 fsbytes += mlen; 2191 if (hdrlen) { 2192 fsbytes -= hdrlen; 2193 hdrlen = 0; 2194 } 2195 } else if (error == 0) 2196 error = err; 2197 m = NULL; /* pru_send always consumes */ 2198 } 2199 2200 /* Quit outer loop on error or when we're done. */ 2201 if (done) 2202 break; 2203 if (error) 2204 goto done; 2205 } 2206 2207 /* 2208 * Send trailers. Wimp out and use writev(2). 2209 */ 2210 if (trl_uio != NULL) { 2211 sbunlock(&so->so_snd); 2212 error = kern_writev(td, uap->s, trl_uio); 2213 if (error == 0) 2214 sbytes += td->td_retval[0]; 2215 goto out; 2216 } 2217 2218 done: 2219 sbunlock(&so->so_snd); 2220 out: 2221 /* 2222 * If there was no error we have to clear td->td_retval[0] 2223 * because it may have been set by writev. 2224 */ 2225 if (error == 0) { 2226 td->td_retval[0] = 0; 2227 } 2228 if (uap->sbytes != NULL) { 2229 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2230 } 2231 if (obj != NULL) 2232 vm_object_deallocate(obj); 2233 if (vp != NULL) { 2234 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2235 vrele(vp); 2236 VFS_UNLOCK_GIANT(vfslocked); 2237 } 2238 if (so) 2239 fdrop(sock_fp, td); 2240 if (m) 2241 m_freem(m); 2242 2243 if (sfs != NULL) { 2244 mtx_lock(&sfs->mtx); 2245 if (sfs->count != 0) 2246 cv_wait(&sfs->cv, &sfs->mtx); 2247 KASSERT(sfs->count == 0, ("sendfile sync still busy")); 2248 cv_destroy(&sfs->cv); 2249 mtx_destroy(&sfs->mtx); 2250 free(sfs, M_TEMP); 2251 } 2252 2253 if (error == ERESTART) 2254 error = EINTR; 2255 2256 return (error); 2257 } 2258 2259 /* 2260 * SCTP syscalls. 2261 * Functionality only compiled in if SCTP is defined in the kernel Makefile, 2262 * otherwise all return EOPNOTSUPP. 2263 * XXX: We should make this loadable one day. 2264 */ 2265 int 2266 sctp_peeloff(td, uap) 2267 struct thread *td; 2268 struct sctp_peeloff_args /* { 2269 int sd; 2270 caddr_t name; 2271 } */ *uap; 2272 { 2273 #ifdef SCTP 2274 struct filedesc *fdp; 2275 struct file *nfp = NULL; 2276 int error; 2277 struct socket *head, *so; 2278 int fd; 2279 u_int fflag; 2280 2281 fdp = td->td_proc->p_fd; 2282 error = fgetsock(td, uap->sd, &head, &fflag); 2283 if (error) 2284 goto done2; 2285 error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name); 2286 if (error) 2287 goto done2; 2288 /* 2289 * At this point we know we do have a assoc to pull 2290 * we proceed to get the fd setup. This may block 2291 * but that is ok. 2292 */ 2293 2294 error = falloc(td, &nfp, &fd); 2295 if (error) 2296 goto done; 2297 td->td_retval[0] = fd; 2298 2299 so = sonewconn(head, SS_ISCONNECTED); 2300 if (so == NULL) 2301 goto noconnection; 2302 /* 2303 * Before changing the flags on the socket, we have to bump the 2304 * reference count. Otherwise, if the protocol calls sofree(), 2305 * the socket will be released due to a zero refcount. 2306 */ 2307 SOCK_LOCK(so); 2308 soref(so); /* file descriptor reference */ 2309 SOCK_UNLOCK(so); 2310 2311 ACCEPT_LOCK(); 2312 2313 TAILQ_REMOVE(&head->so_comp, so, so_list); 2314 head->so_qlen--; 2315 so->so_state |= (head->so_state & SS_NBIO); 2316 so->so_state &= ~SS_NOFDREF; 2317 so->so_qstate &= ~SQ_COMP; 2318 so->so_head = NULL; 2319 ACCEPT_UNLOCK(); 2320 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 2321 error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name); 2322 if (error) 2323 goto noconnection; 2324 if (head->so_sigio != NULL) 2325 fsetown(fgetown(&head->so_sigio), &so->so_sigio); 2326 2327 noconnection: 2328 /* 2329 * close the new descriptor, assuming someone hasn't ripped it 2330 * out from under us. 2331 */ 2332 if (error) 2333 fdclose(fdp, nfp, fd, td); 2334 2335 /* 2336 * Release explicitly held references before returning. 2337 */ 2338 done: 2339 if (nfp != NULL) 2340 fdrop(nfp, td); 2341 fputsock(head); 2342 done2: 2343 return (error); 2344 #else /* SCTP */ 2345 return (EOPNOTSUPP); 2346 #endif /* SCTP */ 2347 } 2348 2349 int 2350 sctp_generic_sendmsg (td, uap) 2351 struct thread *td; 2352 struct sctp_generic_sendmsg_args /* { 2353 int sd, 2354 caddr_t msg, 2355 int mlen, 2356 caddr_t to, 2357 __socklen_t tolen, 2358 struct sctp_sndrcvinfo *sinfo, 2359 int flags 2360 } */ *uap; 2361 { 2362 #ifdef SCTP 2363 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2364 struct socket *so; 2365 struct file *fp = NULL; 2366 int use_rcvinfo = 1; 2367 int error = 0, len; 2368 struct sockaddr *to = NULL; 2369 #ifdef KTRACE 2370 struct uio *ktruio = NULL; 2371 #endif 2372 struct uio auio; 2373 struct iovec iov[1]; 2374 2375 if (uap->sinfo) { 2376 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2377 if (error) 2378 return (error); 2379 u_sinfo = &sinfo; 2380 } 2381 if (uap->tolen) { 2382 error = getsockaddr(&to, uap->to, uap->tolen); 2383 if (error) { 2384 to = NULL; 2385 goto sctp_bad2; 2386 } 2387 } 2388 2389 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); 2390 if (error) 2391 goto sctp_bad; 2392 #ifdef KTRACE 2393 if (KTRPOINT(td, KTR_STRUCT)) 2394 ktrsockaddr(to); 2395 #endif 2396 2397 iov[0].iov_base = uap->msg; 2398 iov[0].iov_len = uap->mlen; 2399 2400 so = (struct socket *)fp->f_data; 2401 #ifdef MAC 2402 error = mac_socket_check_send(td->td_ucred, so); 2403 if (error) 2404 goto sctp_bad; 2405 #endif /* MAC */ 2406 2407 auio.uio_iov = iov; 2408 auio.uio_iovcnt = 1; 2409 auio.uio_segflg = UIO_USERSPACE; 2410 auio.uio_rw = UIO_WRITE; 2411 auio.uio_td = td; 2412 auio.uio_offset = 0; /* XXX */ 2413 auio.uio_resid = 0; 2414 len = auio.uio_resid = uap->mlen; 2415 error = sctp_lower_sosend(so, to, &auio, 2416 (struct mbuf *)NULL, (struct mbuf *)NULL, 2417 uap->flags, use_rcvinfo, u_sinfo, td); 2418 if (error) { 2419 if (auio.uio_resid != len && (error == ERESTART || 2420 error == EINTR || error == EWOULDBLOCK)) 2421 error = 0; 2422 /* Generation of SIGPIPE can be controlled per socket. */ 2423 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2424 !(uap->flags & MSG_NOSIGNAL)) { 2425 PROC_LOCK(td->td_proc); 2426 psignal(td->td_proc, SIGPIPE); 2427 PROC_UNLOCK(td->td_proc); 2428 } 2429 } 2430 if (error == 0) 2431 td->td_retval[0] = len - auio.uio_resid; 2432 #ifdef KTRACE 2433 if (ktruio != NULL) { 2434 ktruio->uio_resid = td->td_retval[0]; 2435 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2436 } 2437 #endif /* KTRACE */ 2438 sctp_bad: 2439 if (fp) 2440 fdrop(fp, td); 2441 sctp_bad2: 2442 if (to) 2443 free(to, M_SONAME); 2444 return (error); 2445 #else /* SCTP */ 2446 return (EOPNOTSUPP); 2447 #endif /* SCTP */ 2448 } 2449 2450 int 2451 sctp_generic_sendmsg_iov(td, uap) 2452 struct thread *td; 2453 struct sctp_generic_sendmsg_iov_args /* { 2454 int sd, 2455 struct iovec *iov, 2456 int iovlen, 2457 caddr_t to, 2458 __socklen_t tolen, 2459 struct sctp_sndrcvinfo *sinfo, 2460 int flags 2461 } */ *uap; 2462 { 2463 #ifdef SCTP 2464 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2465 struct socket *so; 2466 struct file *fp = NULL; 2467 int use_rcvinfo = 1; 2468 int error=0, len, i; 2469 struct sockaddr *to = NULL; 2470 #ifdef KTRACE 2471 struct uio *ktruio = NULL; 2472 #endif 2473 struct uio auio; 2474 struct iovec *iov, *tiov; 2475 2476 if (uap->sinfo) { 2477 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2478 if (error) 2479 return (error); 2480 u_sinfo = &sinfo; 2481 } 2482 if (uap->tolen) { 2483 error = getsockaddr(&to, uap->to, uap->tolen); 2484 if (error) { 2485 to = NULL; 2486 goto sctp_bad2; 2487 } 2488 } 2489 2490 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); 2491 if (error) 2492 goto sctp_bad1; 2493 2494 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2495 if (error) 2496 goto sctp_bad1; 2497 #ifdef KTRACE 2498 if (KTRPOINT(td, KTR_STRUCT)) 2499 ktrsockaddr(to); 2500 #endif 2501 2502 so = (struct socket *)fp->f_data; 2503 #ifdef MAC 2504 error = mac_socket_check_send(td->td_ucred, so); 2505 if (error) 2506 goto sctp_bad; 2507 #endif /* MAC */ 2508 2509 auio.uio_iov = iov; 2510 auio.uio_iovcnt = uap->iovlen; 2511 auio.uio_segflg = UIO_USERSPACE; 2512 auio.uio_rw = UIO_WRITE; 2513 auio.uio_td = td; 2514 auio.uio_offset = 0; /* XXX */ 2515 auio.uio_resid = 0; 2516 tiov = iov; 2517 for (i = 0; i <uap->iovlen; i++, tiov++) { 2518 if ((auio.uio_resid += tiov->iov_len) < 0) { 2519 error = EINVAL; 2520 goto sctp_bad; 2521 } 2522 } 2523 len = auio.uio_resid; 2524 error = sctp_lower_sosend(so, to, &auio, 2525 (struct mbuf *)NULL, (struct mbuf *)NULL, 2526 uap->flags, use_rcvinfo, u_sinfo, td); 2527 if (error) { 2528 if (auio.uio_resid != len && (error == ERESTART || 2529 error == EINTR || error == EWOULDBLOCK)) 2530 error = 0; 2531 /* Generation of SIGPIPE can be controlled per socket */ 2532 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2533 !(uap->flags & MSG_NOSIGNAL)) { 2534 PROC_LOCK(td->td_proc); 2535 psignal(td->td_proc, SIGPIPE); 2536 PROC_UNLOCK(td->td_proc); 2537 } 2538 } 2539 if (error == 0) 2540 td->td_retval[0] = len - auio.uio_resid; 2541 #ifdef KTRACE 2542 if (ktruio != NULL) { 2543 ktruio->uio_resid = td->td_retval[0]; 2544 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2545 } 2546 #endif /* KTRACE */ 2547 sctp_bad: 2548 free(iov, M_IOV); 2549 sctp_bad1: 2550 if (fp) 2551 fdrop(fp, td); 2552 sctp_bad2: 2553 if (to) 2554 free(to, M_SONAME); 2555 return (error); 2556 #else /* SCTP */ 2557 return (EOPNOTSUPP); 2558 #endif /* SCTP */ 2559 } 2560 2561 int 2562 sctp_generic_recvmsg(td, uap) 2563 struct thread *td; 2564 struct sctp_generic_recvmsg_args /* { 2565 int sd, 2566 struct iovec *iov, 2567 int iovlen, 2568 struct sockaddr *from, 2569 __socklen_t *fromlenaddr, 2570 struct sctp_sndrcvinfo *sinfo, 2571 int *msg_flags 2572 } */ *uap; 2573 { 2574 #ifdef SCTP 2575 u_int8_t sockbufstore[256]; 2576 struct uio auio; 2577 struct iovec *iov, *tiov; 2578 struct sctp_sndrcvinfo sinfo; 2579 struct socket *so; 2580 struct file *fp = NULL; 2581 struct sockaddr *fromsa; 2582 int fromlen; 2583 int len, i, msg_flags; 2584 int error = 0; 2585 #ifdef KTRACE 2586 struct uio *ktruio = NULL; 2587 #endif 2588 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); 2589 if (error) { 2590 return (error); 2591 } 2592 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2593 if (error) { 2594 goto out1; 2595 } 2596 2597 so = fp->f_data; 2598 #ifdef MAC 2599 error = mac_socket_check_receive(td->td_ucred, so); 2600 if (error) { 2601 goto out; 2602 return (error); 2603 } 2604 #endif /* MAC */ 2605 2606 if (uap->fromlenaddr) { 2607 error = copyin(uap->fromlenaddr, 2608 &fromlen, sizeof (fromlen)); 2609 if (error) { 2610 goto out; 2611 } 2612 } else { 2613 fromlen = 0; 2614 } 2615 if(uap->msg_flags) { 2616 error = copyin(uap->msg_flags, &msg_flags, sizeof (int)); 2617 if (error) { 2618 goto out; 2619 } 2620 } else { 2621 msg_flags = 0; 2622 } 2623 auio.uio_iov = iov; 2624 auio.uio_iovcnt = uap->iovlen; 2625 auio.uio_segflg = UIO_USERSPACE; 2626 auio.uio_rw = UIO_READ; 2627 auio.uio_td = td; 2628 auio.uio_offset = 0; /* XXX */ 2629 auio.uio_resid = 0; 2630 tiov = iov; 2631 for (i = 0; i <uap->iovlen; i++, tiov++) { 2632 if ((auio.uio_resid += tiov->iov_len) < 0) { 2633 error = EINVAL; 2634 goto out; 2635 } 2636 } 2637 len = auio.uio_resid; 2638 fromsa = (struct sockaddr *)sockbufstore; 2639 2640 #ifdef KTRACE 2641 if (KTRPOINT(td, KTR_GENIO)) 2642 ktruio = cloneuio(&auio); 2643 #endif /* KTRACE */ 2644 error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL, 2645 fromsa, fromlen, &msg_flags, 2646 (struct sctp_sndrcvinfo *)&sinfo, 1); 2647 if (error) { 2648 if (auio.uio_resid != (int)len && (error == ERESTART || 2649 error == EINTR || error == EWOULDBLOCK)) 2650 error = 0; 2651 } else { 2652 if (uap->sinfo) 2653 error = copyout(&sinfo, uap->sinfo, sizeof (sinfo)); 2654 } 2655 #ifdef KTRACE 2656 if (ktruio != NULL) { 2657 ktruio->uio_resid = (int)len - auio.uio_resid; 2658 ktrgenio(uap->sd, UIO_READ, ktruio, error); 2659 } 2660 #endif /* KTRACE */ 2661 if (error) 2662 goto out; 2663 td->td_retval[0] = (int)len - auio.uio_resid; 2664 2665 if (fromlen && uap->from) { 2666 len = fromlen; 2667 if (len <= 0 || fromsa == 0) 2668 len = 0; 2669 else { 2670 len = MIN(len, fromsa->sa_len); 2671 error = copyout(fromsa, uap->from, (unsigned)len); 2672 if (error) 2673 goto out; 2674 } 2675 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t)); 2676 if (error) { 2677 goto out; 2678 } 2679 } 2680 #ifdef KTRACE 2681 if (KTRPOINT(td, KTR_STRUCT)) 2682 ktrsockaddr(fromsa); 2683 #endif 2684 if (uap->msg_flags) { 2685 error = copyout(&msg_flags, uap->msg_flags, sizeof (int)); 2686 if (error) { 2687 goto out; 2688 } 2689 } 2690 out: 2691 free(iov, M_IOV); 2692 out1: 2693 if (fp) 2694 fdrop(fp, td); 2695 2696 return (error); 2697 #else /* SCTP */ 2698 return (EOPNOTSUPP); 2699 #endif /* SCTP */ 2700 } 2701