1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_inet.h" 39 #include "opt_inet6.h" 40 #include "opt_sctp.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/sysproto.h> 50 #include <sys/malloc.h> 51 #include <sys/filedesc.h> 52 #include <sys/event.h> 53 #include <sys/proc.h> 54 #include <sys/fcntl.h> 55 #include <sys/file.h> 56 #include <sys/filio.h> 57 #include <sys/mount.h> 58 #include <sys/mbuf.h> 59 #include <sys/protosw.h> 60 #include <sys/sf_buf.h> 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 #include <sys/signalvar.h> 64 #include <sys/syscallsubr.h> 65 #include <sys/sysctl.h> 66 #include <sys/uio.h> 67 #include <sys/vimage.h> 68 #include <sys/vnode.h> 69 #ifdef KTRACE 70 #include <sys/ktrace.h> 71 #endif 72 73 #include <security/audit/audit.h> 74 #include <security/mac/mac_framework.h> 75 76 #include <vm/vm.h> 77 #include <vm/vm_object.h> 78 #include <vm/vm_page.h> 79 #include <vm/vm_pageout.h> 80 #include <vm/vm_kern.h> 81 #include <vm/vm_extern.h> 82 83 #if defined(INET) || defined(INET6) 84 #ifdef SCTP 85 #include <netinet/sctp.h> 86 #include <netinet/sctp_peeloff.h> 87 #endif /* SCTP */ 88 #endif /* INET || INET6 */ 89 90 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 91 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 92 93 static int accept1(struct thread *td, struct accept_args *uap, int compat); 94 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 95 static int getsockname1(struct thread *td, struct getsockname_args *uap, 96 int compat); 97 static int getpeername1(struct thread *td, struct getpeername_args *uap, 98 int compat); 99 100 /* 101 * NSFBUFS-related variables and associated sysctls 102 */ 103 int nsfbufs; 104 int nsfbufspeak; 105 int nsfbufsused; 106 107 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 108 "Maximum number of sendfile(2) sf_bufs available"); 109 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 110 "Number of sendfile(2) sf_bufs at peak usage"); 111 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 112 "Number of sendfile(2) sf_bufs in use"); 113 114 /* 115 * Convert a user file descriptor to a kernel file entry. A reference on the 116 * file entry is held upon returning. This is lighter weight than 117 * fgetsock(), which bumps the socket reference drops the file reference 118 * count instead, as this approach avoids several additional mutex operations 119 * associated with the additional reference count. If requested, return the 120 * open file flags. 121 */ 122 static int 123 getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp) 124 { 125 struct file *fp; 126 int error; 127 128 fp = NULL; 129 if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) { 130 error = EBADF; 131 } else if (fp->f_type != DTYPE_SOCKET) { 132 fdrop(fp, curthread); 133 fp = NULL; 134 error = ENOTSOCK; 135 } else { 136 if (fflagp != NULL) 137 *fflagp = fp->f_flag; 138 error = 0; 139 } 140 *fpp = fp; 141 return (error); 142 } 143 144 /* 145 * System call interface to the socket abstraction. 146 */ 147 #if defined(COMPAT_43) 148 #define COMPAT_OLDSOCK 149 #endif 150 151 int 152 socket(td, uap) 153 struct thread *td; 154 struct socket_args /* { 155 int domain; 156 int type; 157 int protocol; 158 } */ *uap; 159 { 160 struct filedesc *fdp; 161 struct socket *so; 162 struct file *fp; 163 int fd, error; 164 165 AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol); 166 #ifdef MAC 167 error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type, 168 uap->protocol); 169 if (error) 170 return (error); 171 #endif 172 fdp = td->td_proc->p_fd; 173 error = falloc(td, &fp, &fd); 174 if (error) 175 return (error); 176 /* An extra reference on `fp' has been held for us by falloc(). */ 177 error = socreate(uap->domain, &so, uap->type, uap->protocol, 178 td->td_ucred, td); 179 if (error) { 180 fdclose(fdp, fp, fd, td); 181 } else { 182 finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops); 183 td->td_retval[0] = fd; 184 } 185 fdrop(fp, td); 186 return (error); 187 } 188 189 /* ARGSUSED */ 190 int 191 bind(td, uap) 192 struct thread *td; 193 struct bind_args /* { 194 int s; 195 caddr_t name; 196 int namelen; 197 } */ *uap; 198 { 199 struct sockaddr *sa; 200 int error; 201 202 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 203 return (error); 204 205 error = kern_bind(td, uap->s, sa); 206 free(sa, M_SONAME); 207 return (error); 208 } 209 210 int 211 kern_bind(td, fd, sa) 212 struct thread *td; 213 int fd; 214 struct sockaddr *sa; 215 { 216 struct socket *so; 217 struct file *fp; 218 int error; 219 220 AUDIT_ARG_FD(fd); 221 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 222 if (error) 223 return (error); 224 so = fp->f_data; 225 #ifdef KTRACE 226 if (KTRPOINT(td, KTR_STRUCT)) 227 ktrsockaddr(sa); 228 #endif 229 #ifdef MAC 230 error = mac_socket_check_bind(td->td_ucred, so, sa); 231 if (error == 0) 232 #endif 233 error = sobind(so, sa, td); 234 fdrop(fp, td); 235 return (error); 236 } 237 238 /* ARGSUSED */ 239 int 240 listen(td, uap) 241 struct thread *td; 242 struct listen_args /* { 243 int s; 244 int backlog; 245 } */ *uap; 246 { 247 struct socket *so; 248 struct file *fp; 249 int error; 250 251 AUDIT_ARG_FD(uap->s); 252 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); 253 if (error == 0) { 254 so = fp->f_data; 255 #ifdef MAC 256 error = mac_socket_check_listen(td->td_ucred, so); 257 if (error == 0) { 258 #endif 259 CURVNET_SET(so->so_vnet); 260 error = solisten(so, uap->backlog, td); 261 CURVNET_RESTORE(); 262 #ifdef MAC 263 } 264 #endif 265 fdrop(fp, td); 266 } 267 return(error); 268 } 269 270 /* 271 * accept1() 272 */ 273 static int 274 accept1(td, uap, compat) 275 struct thread *td; 276 struct accept_args /* { 277 int s; 278 struct sockaddr * __restrict name; 279 socklen_t * __restrict anamelen; 280 } */ *uap; 281 int compat; 282 { 283 struct sockaddr *name; 284 socklen_t namelen; 285 struct file *fp; 286 int error; 287 288 if (uap->name == NULL) 289 return (kern_accept(td, uap->s, NULL, NULL, NULL)); 290 291 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 292 if (error) 293 return (error); 294 295 error = kern_accept(td, uap->s, &name, &namelen, &fp); 296 297 /* 298 * return a namelen of zero for older code which might 299 * ignore the return value from accept. 300 */ 301 if (error) { 302 (void) copyout(&namelen, 303 uap->anamelen, sizeof(*uap->anamelen)); 304 return (error); 305 } 306 307 if (error == 0 && name != NULL) { 308 #ifdef COMPAT_OLDSOCK 309 if (compat) 310 ((struct osockaddr *)name)->sa_family = 311 name->sa_family; 312 #endif 313 error = copyout(name, uap->name, namelen); 314 } 315 if (error == 0) 316 error = copyout(&namelen, uap->anamelen, 317 sizeof(namelen)); 318 if (error) 319 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td); 320 fdrop(fp, td); 321 free(name, M_SONAME); 322 return (error); 323 } 324 325 int 326 kern_accept(struct thread *td, int s, struct sockaddr **name, 327 socklen_t *namelen, struct file **fp) 328 { 329 struct filedesc *fdp; 330 struct file *headfp, *nfp = NULL; 331 struct sockaddr *sa = NULL; 332 int error; 333 struct socket *head, *so; 334 int fd; 335 u_int fflag; 336 pid_t pgid; 337 int tmp; 338 339 if (name) { 340 *name = NULL; 341 if (*namelen < 0) 342 return (EINVAL); 343 } 344 345 AUDIT_ARG_FD(s); 346 fdp = td->td_proc->p_fd; 347 error = getsock(fdp, s, &headfp, &fflag); 348 if (error) 349 return (error); 350 head = headfp->f_data; 351 if ((head->so_options & SO_ACCEPTCONN) == 0) { 352 error = EINVAL; 353 goto done; 354 } 355 #ifdef MAC 356 error = mac_socket_check_accept(td->td_ucred, head); 357 if (error != 0) 358 goto done; 359 #endif 360 error = falloc(td, &nfp, &fd); 361 if (error) 362 goto done; 363 ACCEPT_LOCK(); 364 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 365 ACCEPT_UNLOCK(); 366 error = EWOULDBLOCK; 367 goto noconnection; 368 } 369 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 370 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 371 head->so_error = ECONNABORTED; 372 break; 373 } 374 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 375 "accept", 0); 376 if (error) { 377 ACCEPT_UNLOCK(); 378 goto noconnection; 379 } 380 } 381 if (head->so_error) { 382 error = head->so_error; 383 head->so_error = 0; 384 ACCEPT_UNLOCK(); 385 goto noconnection; 386 } 387 so = TAILQ_FIRST(&head->so_comp); 388 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 389 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 390 391 /* 392 * Before changing the flags on the socket, we have to bump the 393 * reference count. Otherwise, if the protocol calls sofree(), 394 * the socket will be released due to a zero refcount. 395 */ 396 SOCK_LOCK(so); /* soref() and so_state update */ 397 soref(so); /* file descriptor reference */ 398 399 TAILQ_REMOVE(&head->so_comp, so, so_list); 400 head->so_qlen--; 401 so->so_state |= (head->so_state & SS_NBIO); 402 so->so_qstate &= ~SQ_COMP; 403 so->so_head = NULL; 404 405 SOCK_UNLOCK(so); 406 ACCEPT_UNLOCK(); 407 408 /* An extra reference on `nfp' has been held for us by falloc(). */ 409 td->td_retval[0] = fd; 410 411 /* connection has been removed from the listen queue */ 412 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 413 414 pgid = fgetown(&head->so_sigio); 415 if (pgid != 0) 416 fsetown(pgid, &so->so_sigio); 417 418 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 419 /* Sync socket nonblocking/async state with file flags */ 420 tmp = fflag & FNONBLOCK; 421 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 422 tmp = fflag & FASYNC; 423 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 424 sa = 0; 425 CURVNET_SET(so->so_vnet); 426 error = soaccept(so, &sa); 427 CURVNET_RESTORE(); 428 if (error) { 429 /* 430 * return a namelen of zero for older code which might 431 * ignore the return value from accept. 432 */ 433 if (name) 434 *namelen = 0; 435 goto noconnection; 436 } 437 if (sa == NULL) { 438 if (name) 439 *namelen = 0; 440 goto done; 441 } 442 if (name) { 443 /* check sa_len before it is destroyed */ 444 if (*namelen > sa->sa_len) 445 *namelen = sa->sa_len; 446 #ifdef KTRACE 447 if (KTRPOINT(td, KTR_STRUCT)) 448 ktrsockaddr(sa); 449 #endif 450 *name = sa; 451 sa = NULL; 452 } 453 noconnection: 454 if (sa) 455 free(sa, M_SONAME); 456 457 /* 458 * close the new descriptor, assuming someone hasn't ripped it 459 * out from under us. 460 */ 461 if (error) 462 fdclose(fdp, nfp, fd, td); 463 464 /* 465 * Release explicitly held references before returning. We return 466 * a reference on nfp to the caller on success if they request it. 467 */ 468 done: 469 if (fp != NULL) { 470 if (error == 0) { 471 *fp = nfp; 472 nfp = NULL; 473 } else 474 *fp = NULL; 475 } 476 if (nfp != NULL) 477 fdrop(nfp, td); 478 fdrop(headfp, td); 479 return (error); 480 } 481 482 int 483 accept(td, uap) 484 struct thread *td; 485 struct accept_args *uap; 486 { 487 488 return (accept1(td, uap, 0)); 489 } 490 491 #ifdef COMPAT_OLDSOCK 492 int 493 oaccept(td, uap) 494 struct thread *td; 495 struct accept_args *uap; 496 { 497 498 return (accept1(td, uap, 1)); 499 } 500 #endif /* COMPAT_OLDSOCK */ 501 502 /* ARGSUSED */ 503 int 504 connect(td, uap) 505 struct thread *td; 506 struct connect_args /* { 507 int s; 508 caddr_t name; 509 int namelen; 510 } */ *uap; 511 { 512 struct sockaddr *sa; 513 int error; 514 515 error = getsockaddr(&sa, uap->name, uap->namelen); 516 if (error) 517 return (error); 518 519 error = kern_connect(td, uap->s, sa); 520 free(sa, M_SONAME); 521 return (error); 522 } 523 524 525 int 526 kern_connect(td, fd, sa) 527 struct thread *td; 528 int fd; 529 struct sockaddr *sa; 530 { 531 struct socket *so; 532 struct file *fp; 533 int error; 534 int interrupted = 0; 535 536 AUDIT_ARG_FD(fd); 537 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 538 if (error) 539 return (error); 540 so = fp->f_data; 541 if (so->so_state & SS_ISCONNECTING) { 542 error = EALREADY; 543 goto done1; 544 } 545 #ifdef KTRACE 546 if (KTRPOINT(td, KTR_STRUCT)) 547 ktrsockaddr(sa); 548 #endif 549 #ifdef MAC 550 error = mac_socket_check_connect(td->td_ucred, so, sa); 551 if (error) 552 goto bad; 553 #endif 554 error = soconnect(so, sa, td); 555 if (error) 556 goto bad; 557 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 558 error = EINPROGRESS; 559 goto done1; 560 } 561 SOCK_LOCK(so); 562 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 563 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 564 "connec", 0); 565 if (error) { 566 if (error == EINTR || error == ERESTART) 567 interrupted = 1; 568 break; 569 } 570 } 571 if (error == 0) { 572 error = so->so_error; 573 so->so_error = 0; 574 } 575 SOCK_UNLOCK(so); 576 bad: 577 if (!interrupted) 578 so->so_state &= ~SS_ISCONNECTING; 579 if (error == ERESTART) 580 error = EINTR; 581 done1: 582 fdrop(fp, td); 583 return (error); 584 } 585 586 int 587 kern_socketpair(struct thread *td, int domain, int type, int protocol, 588 int *rsv) 589 { 590 struct filedesc *fdp = td->td_proc->p_fd; 591 struct file *fp1, *fp2; 592 struct socket *so1, *so2; 593 int fd, error; 594 595 AUDIT_ARG_SOCKET(domain, type, protocol); 596 #ifdef MAC 597 /* We might want to have a separate check for socket pairs. */ 598 error = mac_socket_check_create(td->td_ucred, domain, type, 599 protocol); 600 if (error) 601 return (error); 602 #endif 603 error = socreate(domain, &so1, type, protocol, td->td_ucred, td); 604 if (error) 605 return (error); 606 error = socreate(domain, &so2, type, protocol, td->td_ucred, td); 607 if (error) 608 goto free1; 609 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 610 error = falloc(td, &fp1, &fd); 611 if (error) 612 goto free2; 613 rsv[0] = fd; 614 fp1->f_data = so1; /* so1 already has ref count */ 615 error = falloc(td, &fp2, &fd); 616 if (error) 617 goto free3; 618 fp2->f_data = so2; /* so2 already has ref count */ 619 rsv[1] = fd; 620 error = soconnect2(so1, so2); 621 if (error) 622 goto free4; 623 if (type == SOCK_DGRAM) { 624 /* 625 * Datagram socket connection is asymmetric. 626 */ 627 error = soconnect2(so2, so1); 628 if (error) 629 goto free4; 630 } 631 finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops); 632 finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops); 633 fdrop(fp1, td); 634 fdrop(fp2, td); 635 return (0); 636 free4: 637 fdclose(fdp, fp2, rsv[1], td); 638 fdrop(fp2, td); 639 free3: 640 fdclose(fdp, fp1, rsv[0], td); 641 fdrop(fp1, td); 642 free2: 643 if (so2 != NULL) 644 (void)soclose(so2); 645 free1: 646 if (so1 != NULL) 647 (void)soclose(so1); 648 return (error); 649 } 650 651 int 652 socketpair(struct thread *td, struct socketpair_args *uap) 653 { 654 int error, sv[2]; 655 656 error = kern_socketpair(td, uap->domain, uap->type, 657 uap->protocol, sv); 658 if (error) 659 return (error); 660 error = copyout(sv, uap->rsv, 2 * sizeof(int)); 661 if (error) { 662 (void)kern_close(td, sv[0]); 663 (void)kern_close(td, sv[1]); 664 } 665 return (error); 666 } 667 668 static int 669 sendit(td, s, mp, flags) 670 struct thread *td; 671 int s; 672 struct msghdr *mp; 673 int flags; 674 { 675 struct mbuf *control; 676 struct sockaddr *to; 677 int error; 678 679 if (mp->msg_name != NULL) { 680 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 681 if (error) { 682 to = NULL; 683 goto bad; 684 } 685 mp->msg_name = to; 686 } else { 687 to = NULL; 688 } 689 690 if (mp->msg_control) { 691 if (mp->msg_controllen < sizeof(struct cmsghdr) 692 #ifdef COMPAT_OLDSOCK 693 && mp->msg_flags != MSG_COMPAT 694 #endif 695 ) { 696 error = EINVAL; 697 goto bad; 698 } 699 error = sockargs(&control, mp->msg_control, 700 mp->msg_controllen, MT_CONTROL); 701 if (error) 702 goto bad; 703 #ifdef COMPAT_OLDSOCK 704 if (mp->msg_flags == MSG_COMPAT) { 705 struct cmsghdr *cm; 706 707 M_PREPEND(control, sizeof(*cm), M_WAIT); 708 cm = mtod(control, struct cmsghdr *); 709 cm->cmsg_len = control->m_len; 710 cm->cmsg_level = SOL_SOCKET; 711 cm->cmsg_type = SCM_RIGHTS; 712 } 713 #endif 714 } else { 715 control = NULL; 716 } 717 718 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 719 720 bad: 721 if (to) 722 free(to, M_SONAME); 723 return (error); 724 } 725 726 int 727 kern_sendit(td, s, mp, flags, control, segflg) 728 struct thread *td; 729 int s; 730 struct msghdr *mp; 731 int flags; 732 struct mbuf *control; 733 enum uio_seg segflg; 734 { 735 struct file *fp; 736 struct uio auio; 737 struct iovec *iov; 738 struct socket *so; 739 int i; 740 int len, error; 741 #ifdef KTRACE 742 struct uio *ktruio = NULL; 743 #endif 744 745 AUDIT_ARG_FD(s); 746 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 747 if (error) 748 return (error); 749 so = (struct socket *)fp->f_data; 750 751 #ifdef MAC 752 if (mp->msg_name != NULL) { 753 error = mac_socket_check_connect(td->td_ucred, so, 754 mp->msg_name); 755 if (error) 756 goto bad; 757 } 758 error = mac_socket_check_send(td->td_ucred, so); 759 if (error) 760 goto bad; 761 #endif 762 763 auio.uio_iov = mp->msg_iov; 764 auio.uio_iovcnt = mp->msg_iovlen; 765 auio.uio_segflg = segflg; 766 auio.uio_rw = UIO_WRITE; 767 auio.uio_td = td; 768 auio.uio_offset = 0; /* XXX */ 769 auio.uio_resid = 0; 770 iov = mp->msg_iov; 771 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 772 if ((auio.uio_resid += iov->iov_len) < 0) { 773 error = EINVAL; 774 goto bad; 775 } 776 } 777 #ifdef KTRACE 778 if (KTRPOINT(td, KTR_GENIO)) 779 ktruio = cloneuio(&auio); 780 #endif 781 len = auio.uio_resid; 782 error = sosend(so, mp->msg_name, &auio, 0, control, flags, td); 783 if (error) { 784 if (auio.uio_resid != len && (error == ERESTART || 785 error == EINTR || error == EWOULDBLOCK)) 786 error = 0; 787 /* Generation of SIGPIPE can be controlled per socket */ 788 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 789 !(flags & MSG_NOSIGNAL)) { 790 PROC_LOCK(td->td_proc); 791 psignal(td->td_proc, SIGPIPE); 792 PROC_UNLOCK(td->td_proc); 793 } 794 } 795 if (error == 0) 796 td->td_retval[0] = len - auio.uio_resid; 797 #ifdef KTRACE 798 if (ktruio != NULL) { 799 ktruio->uio_resid = td->td_retval[0]; 800 ktrgenio(s, UIO_WRITE, ktruio, error); 801 } 802 #endif 803 bad: 804 fdrop(fp, td); 805 return (error); 806 } 807 808 int 809 sendto(td, uap) 810 struct thread *td; 811 struct sendto_args /* { 812 int s; 813 caddr_t buf; 814 size_t len; 815 int flags; 816 caddr_t to; 817 int tolen; 818 } */ *uap; 819 { 820 struct msghdr msg; 821 struct iovec aiov; 822 int error; 823 824 msg.msg_name = uap->to; 825 msg.msg_namelen = uap->tolen; 826 msg.msg_iov = &aiov; 827 msg.msg_iovlen = 1; 828 msg.msg_control = 0; 829 #ifdef COMPAT_OLDSOCK 830 msg.msg_flags = 0; 831 #endif 832 aiov.iov_base = uap->buf; 833 aiov.iov_len = uap->len; 834 error = sendit(td, uap->s, &msg, uap->flags); 835 return (error); 836 } 837 838 #ifdef COMPAT_OLDSOCK 839 int 840 osend(td, uap) 841 struct thread *td; 842 struct osend_args /* { 843 int s; 844 caddr_t buf; 845 int len; 846 int flags; 847 } */ *uap; 848 { 849 struct msghdr msg; 850 struct iovec aiov; 851 int error; 852 853 msg.msg_name = 0; 854 msg.msg_namelen = 0; 855 msg.msg_iov = &aiov; 856 msg.msg_iovlen = 1; 857 aiov.iov_base = uap->buf; 858 aiov.iov_len = uap->len; 859 msg.msg_control = 0; 860 msg.msg_flags = 0; 861 error = sendit(td, uap->s, &msg, uap->flags); 862 return (error); 863 } 864 865 int 866 osendmsg(td, uap) 867 struct thread *td; 868 struct osendmsg_args /* { 869 int s; 870 caddr_t msg; 871 int flags; 872 } */ *uap; 873 { 874 struct msghdr msg; 875 struct iovec *iov; 876 int error; 877 878 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 879 if (error) 880 return (error); 881 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 882 if (error) 883 return (error); 884 msg.msg_iov = iov; 885 msg.msg_flags = MSG_COMPAT; 886 error = sendit(td, uap->s, &msg, uap->flags); 887 free(iov, M_IOV); 888 return (error); 889 } 890 #endif 891 892 int 893 sendmsg(td, uap) 894 struct thread *td; 895 struct sendmsg_args /* { 896 int s; 897 caddr_t msg; 898 int flags; 899 } */ *uap; 900 { 901 struct msghdr msg; 902 struct iovec *iov; 903 int error; 904 905 error = copyin(uap->msg, &msg, sizeof (msg)); 906 if (error) 907 return (error); 908 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 909 if (error) 910 return (error); 911 msg.msg_iov = iov; 912 #ifdef COMPAT_OLDSOCK 913 msg.msg_flags = 0; 914 #endif 915 error = sendit(td, uap->s, &msg, uap->flags); 916 free(iov, M_IOV); 917 return (error); 918 } 919 920 int 921 kern_recvit(td, s, mp, fromseg, controlp) 922 struct thread *td; 923 int s; 924 struct msghdr *mp; 925 enum uio_seg fromseg; 926 struct mbuf **controlp; 927 { 928 struct uio auio; 929 struct iovec *iov; 930 int i; 931 socklen_t len; 932 int error; 933 struct mbuf *m, *control = 0; 934 caddr_t ctlbuf; 935 struct file *fp; 936 struct socket *so; 937 struct sockaddr *fromsa = 0; 938 #ifdef KTRACE 939 struct uio *ktruio = NULL; 940 #endif 941 942 if(controlp != NULL) 943 *controlp = 0; 944 945 AUDIT_ARG_FD(s); 946 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 947 if (error) 948 return (error); 949 so = fp->f_data; 950 951 #ifdef MAC 952 error = mac_socket_check_receive(td->td_ucred, so); 953 if (error) { 954 fdrop(fp, td); 955 return (error); 956 } 957 #endif 958 959 auio.uio_iov = mp->msg_iov; 960 auio.uio_iovcnt = mp->msg_iovlen; 961 auio.uio_segflg = UIO_USERSPACE; 962 auio.uio_rw = UIO_READ; 963 auio.uio_td = td; 964 auio.uio_offset = 0; /* XXX */ 965 auio.uio_resid = 0; 966 iov = mp->msg_iov; 967 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 968 if ((auio.uio_resid += iov->iov_len) < 0) { 969 fdrop(fp, td); 970 return (EINVAL); 971 } 972 } 973 #ifdef KTRACE 974 if (KTRPOINT(td, KTR_GENIO)) 975 ktruio = cloneuio(&auio); 976 #endif 977 len = auio.uio_resid; 978 CURVNET_SET(so->so_vnet); 979 error = soreceive(so, &fromsa, &auio, (struct mbuf **)0, 980 (mp->msg_control || controlp) ? &control : (struct mbuf **)0, 981 &mp->msg_flags); 982 CURVNET_RESTORE(); 983 if (error) { 984 if (auio.uio_resid != (int)len && (error == ERESTART || 985 error == EINTR || error == EWOULDBLOCK)) 986 error = 0; 987 } 988 #ifdef KTRACE 989 if (ktruio != NULL) { 990 ktruio->uio_resid = (int)len - auio.uio_resid; 991 ktrgenio(s, UIO_READ, ktruio, error); 992 } 993 #endif 994 if (error) 995 goto out; 996 td->td_retval[0] = (int)len - auio.uio_resid; 997 if (mp->msg_name) { 998 len = mp->msg_namelen; 999 if (len <= 0 || fromsa == 0) 1000 len = 0; 1001 else { 1002 /* save sa_len before it is destroyed by MSG_COMPAT */ 1003 len = MIN(len, fromsa->sa_len); 1004 #ifdef COMPAT_OLDSOCK 1005 if (mp->msg_flags & MSG_COMPAT) 1006 ((struct osockaddr *)fromsa)->sa_family = 1007 fromsa->sa_family; 1008 #endif 1009 if (fromseg == UIO_USERSPACE) { 1010 error = copyout(fromsa, mp->msg_name, 1011 (unsigned)len); 1012 if (error) 1013 goto out; 1014 } else 1015 bcopy(fromsa, mp->msg_name, len); 1016 } 1017 mp->msg_namelen = len; 1018 } 1019 if (mp->msg_control && controlp == NULL) { 1020 #ifdef COMPAT_OLDSOCK 1021 /* 1022 * We assume that old recvmsg calls won't receive access 1023 * rights and other control info, esp. as control info 1024 * is always optional and those options didn't exist in 4.3. 1025 * If we receive rights, trim the cmsghdr; anything else 1026 * is tossed. 1027 */ 1028 if (control && mp->msg_flags & MSG_COMPAT) { 1029 if (mtod(control, struct cmsghdr *)->cmsg_level != 1030 SOL_SOCKET || 1031 mtod(control, struct cmsghdr *)->cmsg_type != 1032 SCM_RIGHTS) { 1033 mp->msg_controllen = 0; 1034 goto out; 1035 } 1036 control->m_len -= sizeof (struct cmsghdr); 1037 control->m_data += sizeof (struct cmsghdr); 1038 } 1039 #endif 1040 len = mp->msg_controllen; 1041 m = control; 1042 mp->msg_controllen = 0; 1043 ctlbuf = mp->msg_control; 1044 1045 while (m && len > 0) { 1046 unsigned int tocopy; 1047 1048 if (len >= m->m_len) 1049 tocopy = m->m_len; 1050 else { 1051 mp->msg_flags |= MSG_CTRUNC; 1052 tocopy = len; 1053 } 1054 1055 if ((error = copyout(mtod(m, caddr_t), 1056 ctlbuf, tocopy)) != 0) 1057 goto out; 1058 1059 ctlbuf += tocopy; 1060 len -= tocopy; 1061 m = m->m_next; 1062 } 1063 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1064 } 1065 out: 1066 fdrop(fp, td); 1067 #ifdef KTRACE 1068 if (fromsa && KTRPOINT(td, KTR_STRUCT)) 1069 ktrsockaddr(fromsa); 1070 #endif 1071 if (fromsa) 1072 free(fromsa, M_SONAME); 1073 1074 if (error == 0 && controlp != NULL) 1075 *controlp = control; 1076 else if (control) 1077 m_freem(control); 1078 1079 return (error); 1080 } 1081 1082 static int 1083 recvit(td, s, mp, namelenp) 1084 struct thread *td; 1085 int s; 1086 struct msghdr *mp; 1087 void *namelenp; 1088 { 1089 int error; 1090 1091 error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL); 1092 if (error) 1093 return (error); 1094 if (namelenp) { 1095 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t)); 1096 #ifdef COMPAT_OLDSOCK 1097 if (mp->msg_flags & MSG_COMPAT) 1098 error = 0; /* old recvfrom didn't check */ 1099 #endif 1100 } 1101 return (error); 1102 } 1103 1104 int 1105 recvfrom(td, uap) 1106 struct thread *td; 1107 struct recvfrom_args /* { 1108 int s; 1109 caddr_t buf; 1110 size_t len; 1111 int flags; 1112 struct sockaddr * __restrict from; 1113 socklen_t * __restrict fromlenaddr; 1114 } */ *uap; 1115 { 1116 struct msghdr msg; 1117 struct iovec aiov; 1118 int error; 1119 1120 if (uap->fromlenaddr) { 1121 error = copyin(uap->fromlenaddr, 1122 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1123 if (error) 1124 goto done2; 1125 } else { 1126 msg.msg_namelen = 0; 1127 } 1128 msg.msg_name = uap->from; 1129 msg.msg_iov = &aiov; 1130 msg.msg_iovlen = 1; 1131 aiov.iov_base = uap->buf; 1132 aiov.iov_len = uap->len; 1133 msg.msg_control = 0; 1134 msg.msg_flags = uap->flags; 1135 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1136 done2: 1137 return(error); 1138 } 1139 1140 #ifdef COMPAT_OLDSOCK 1141 int 1142 orecvfrom(td, uap) 1143 struct thread *td; 1144 struct recvfrom_args *uap; 1145 { 1146 1147 uap->flags |= MSG_COMPAT; 1148 return (recvfrom(td, uap)); 1149 } 1150 #endif 1151 1152 #ifdef COMPAT_OLDSOCK 1153 int 1154 orecv(td, uap) 1155 struct thread *td; 1156 struct orecv_args /* { 1157 int s; 1158 caddr_t buf; 1159 int len; 1160 int flags; 1161 } */ *uap; 1162 { 1163 struct msghdr msg; 1164 struct iovec aiov; 1165 int error; 1166 1167 msg.msg_name = 0; 1168 msg.msg_namelen = 0; 1169 msg.msg_iov = &aiov; 1170 msg.msg_iovlen = 1; 1171 aiov.iov_base = uap->buf; 1172 aiov.iov_len = uap->len; 1173 msg.msg_control = 0; 1174 msg.msg_flags = uap->flags; 1175 error = recvit(td, uap->s, &msg, NULL); 1176 return (error); 1177 } 1178 1179 /* 1180 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1181 * overlays the new one, missing only the flags, and with the (old) access 1182 * rights where the control fields are now. 1183 */ 1184 int 1185 orecvmsg(td, uap) 1186 struct thread *td; 1187 struct orecvmsg_args /* { 1188 int s; 1189 struct omsghdr *msg; 1190 int flags; 1191 } */ *uap; 1192 { 1193 struct msghdr msg; 1194 struct iovec *iov; 1195 int error; 1196 1197 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1198 if (error) 1199 return (error); 1200 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1201 if (error) 1202 return (error); 1203 msg.msg_flags = uap->flags | MSG_COMPAT; 1204 msg.msg_iov = iov; 1205 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1206 if (msg.msg_controllen && error == 0) 1207 error = copyout(&msg.msg_controllen, 1208 &uap->msg->msg_accrightslen, sizeof (int)); 1209 free(iov, M_IOV); 1210 return (error); 1211 } 1212 #endif 1213 1214 int 1215 recvmsg(td, uap) 1216 struct thread *td; 1217 struct recvmsg_args /* { 1218 int s; 1219 struct msghdr *msg; 1220 int flags; 1221 } */ *uap; 1222 { 1223 struct msghdr msg; 1224 struct iovec *uiov, *iov; 1225 int error; 1226 1227 error = copyin(uap->msg, &msg, sizeof (msg)); 1228 if (error) 1229 return (error); 1230 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1231 if (error) 1232 return (error); 1233 msg.msg_flags = uap->flags; 1234 #ifdef COMPAT_OLDSOCK 1235 msg.msg_flags &= ~MSG_COMPAT; 1236 #endif 1237 uiov = msg.msg_iov; 1238 msg.msg_iov = iov; 1239 error = recvit(td, uap->s, &msg, NULL); 1240 if (error == 0) { 1241 msg.msg_iov = uiov; 1242 error = copyout(&msg, uap->msg, sizeof(msg)); 1243 } 1244 free(iov, M_IOV); 1245 return (error); 1246 } 1247 1248 /* ARGSUSED */ 1249 int 1250 shutdown(td, uap) 1251 struct thread *td; 1252 struct shutdown_args /* { 1253 int s; 1254 int how; 1255 } */ *uap; 1256 { 1257 struct socket *so; 1258 struct file *fp; 1259 int error; 1260 1261 AUDIT_ARG_FD(uap->s); 1262 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); 1263 if (error == 0) { 1264 so = fp->f_data; 1265 error = soshutdown(so, uap->how); 1266 fdrop(fp, td); 1267 } 1268 return (error); 1269 } 1270 1271 /* ARGSUSED */ 1272 int 1273 setsockopt(td, uap) 1274 struct thread *td; 1275 struct setsockopt_args /* { 1276 int s; 1277 int level; 1278 int name; 1279 caddr_t val; 1280 int valsize; 1281 } */ *uap; 1282 { 1283 1284 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1285 uap->val, UIO_USERSPACE, uap->valsize)); 1286 } 1287 1288 int 1289 kern_setsockopt(td, s, level, name, val, valseg, valsize) 1290 struct thread *td; 1291 int s; 1292 int level; 1293 int name; 1294 void *val; 1295 enum uio_seg valseg; 1296 socklen_t valsize; 1297 { 1298 int error; 1299 struct socket *so; 1300 struct file *fp; 1301 struct sockopt sopt; 1302 1303 if (val == NULL && valsize != 0) 1304 return (EFAULT); 1305 if ((int)valsize < 0) 1306 return (EINVAL); 1307 1308 sopt.sopt_dir = SOPT_SET; 1309 sopt.sopt_level = level; 1310 sopt.sopt_name = name; 1311 sopt.sopt_val = val; 1312 sopt.sopt_valsize = valsize; 1313 switch (valseg) { 1314 case UIO_USERSPACE: 1315 sopt.sopt_td = td; 1316 break; 1317 case UIO_SYSSPACE: 1318 sopt.sopt_td = NULL; 1319 break; 1320 default: 1321 panic("kern_setsockopt called with bad valseg"); 1322 } 1323 1324 AUDIT_ARG_FD(s); 1325 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 1326 if (error == 0) { 1327 so = fp->f_data; 1328 CURVNET_SET(so->so_vnet); 1329 error = sosetopt(so, &sopt); 1330 CURVNET_RESTORE(); 1331 fdrop(fp, td); 1332 } 1333 return(error); 1334 } 1335 1336 /* ARGSUSED */ 1337 int 1338 getsockopt(td, uap) 1339 struct thread *td; 1340 struct getsockopt_args /* { 1341 int s; 1342 int level; 1343 int name; 1344 void * __restrict val; 1345 socklen_t * __restrict avalsize; 1346 } */ *uap; 1347 { 1348 socklen_t valsize; 1349 int error; 1350 1351 if (uap->val) { 1352 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1353 if (error) 1354 return (error); 1355 } 1356 1357 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1358 uap->val, UIO_USERSPACE, &valsize); 1359 1360 if (error == 0) 1361 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1362 return (error); 1363 } 1364 1365 /* 1366 * Kernel version of getsockopt. 1367 * optval can be a userland or userspace. optlen is always a kernel pointer. 1368 */ 1369 int 1370 kern_getsockopt(td, s, level, name, val, valseg, valsize) 1371 struct thread *td; 1372 int s; 1373 int level; 1374 int name; 1375 void *val; 1376 enum uio_seg valseg; 1377 socklen_t *valsize; 1378 { 1379 int error; 1380 struct socket *so; 1381 struct file *fp; 1382 struct sockopt sopt; 1383 1384 if (val == NULL) 1385 *valsize = 0; 1386 if ((int)*valsize < 0) 1387 return (EINVAL); 1388 1389 sopt.sopt_dir = SOPT_GET; 1390 sopt.sopt_level = level; 1391 sopt.sopt_name = name; 1392 sopt.sopt_val = val; 1393 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1394 switch (valseg) { 1395 case UIO_USERSPACE: 1396 sopt.sopt_td = td; 1397 break; 1398 case UIO_SYSSPACE: 1399 sopt.sopt_td = NULL; 1400 break; 1401 default: 1402 panic("kern_getsockopt called with bad valseg"); 1403 } 1404 1405 AUDIT_ARG_FD(s); 1406 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 1407 if (error == 0) { 1408 so = fp->f_data; 1409 CURVNET_SET(so->so_vnet); 1410 error = sogetopt(so, &sopt); 1411 CURVNET_RESTORE(); 1412 *valsize = sopt.sopt_valsize; 1413 fdrop(fp, td); 1414 } 1415 return (error); 1416 } 1417 1418 /* 1419 * getsockname1() - Get socket name. 1420 */ 1421 /* ARGSUSED */ 1422 static int 1423 getsockname1(td, uap, compat) 1424 struct thread *td; 1425 struct getsockname_args /* { 1426 int fdes; 1427 struct sockaddr * __restrict asa; 1428 socklen_t * __restrict alen; 1429 } */ *uap; 1430 int compat; 1431 { 1432 struct sockaddr *sa; 1433 socklen_t len; 1434 int error; 1435 1436 error = copyin(uap->alen, &len, sizeof(len)); 1437 if (error) 1438 return (error); 1439 1440 error = kern_getsockname(td, uap->fdes, &sa, &len); 1441 if (error) 1442 return (error); 1443 1444 if (len != 0) { 1445 #ifdef COMPAT_OLDSOCK 1446 if (compat) 1447 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1448 #endif 1449 error = copyout(sa, uap->asa, (u_int)len); 1450 } 1451 free(sa, M_SONAME); 1452 if (error == 0) 1453 error = copyout(&len, uap->alen, sizeof(len)); 1454 return (error); 1455 } 1456 1457 int 1458 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, 1459 socklen_t *alen) 1460 { 1461 struct socket *so; 1462 struct file *fp; 1463 socklen_t len; 1464 int error; 1465 1466 if (*alen < 0) 1467 return (EINVAL); 1468 1469 AUDIT_ARG_FD(fd); 1470 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 1471 if (error) 1472 return (error); 1473 so = fp->f_data; 1474 *sa = NULL; 1475 CURVNET_SET(so->so_vnet); 1476 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa); 1477 CURVNET_RESTORE(); 1478 if (error) 1479 goto bad; 1480 if (*sa == NULL) 1481 len = 0; 1482 else 1483 len = MIN(*alen, (*sa)->sa_len); 1484 *alen = len; 1485 #ifdef KTRACE 1486 if (KTRPOINT(td, KTR_STRUCT)) 1487 ktrsockaddr(*sa); 1488 #endif 1489 bad: 1490 fdrop(fp, td); 1491 if (error && *sa) { 1492 free(*sa, M_SONAME); 1493 *sa = NULL; 1494 } 1495 return (error); 1496 } 1497 1498 int 1499 getsockname(td, uap) 1500 struct thread *td; 1501 struct getsockname_args *uap; 1502 { 1503 1504 return (getsockname1(td, uap, 0)); 1505 } 1506 1507 #ifdef COMPAT_OLDSOCK 1508 int 1509 ogetsockname(td, uap) 1510 struct thread *td; 1511 struct getsockname_args *uap; 1512 { 1513 1514 return (getsockname1(td, uap, 1)); 1515 } 1516 #endif /* COMPAT_OLDSOCK */ 1517 1518 /* 1519 * getpeername1() - Get name of peer for connected socket. 1520 */ 1521 /* ARGSUSED */ 1522 static int 1523 getpeername1(td, uap, compat) 1524 struct thread *td; 1525 struct getpeername_args /* { 1526 int fdes; 1527 struct sockaddr * __restrict asa; 1528 socklen_t * __restrict alen; 1529 } */ *uap; 1530 int compat; 1531 { 1532 struct sockaddr *sa; 1533 socklen_t len; 1534 int error; 1535 1536 error = copyin(uap->alen, &len, sizeof (len)); 1537 if (error) 1538 return (error); 1539 1540 error = kern_getpeername(td, uap->fdes, &sa, &len); 1541 if (error) 1542 return (error); 1543 1544 if (len != 0) { 1545 #ifdef COMPAT_OLDSOCK 1546 if (compat) 1547 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1548 #endif 1549 error = copyout(sa, uap->asa, (u_int)len); 1550 } 1551 free(sa, M_SONAME); 1552 if (error == 0) 1553 error = copyout(&len, uap->alen, sizeof(len)); 1554 return (error); 1555 } 1556 1557 int 1558 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, 1559 socklen_t *alen) 1560 { 1561 struct socket *so; 1562 struct file *fp; 1563 socklen_t len; 1564 int error; 1565 1566 if (*alen < 0) 1567 return (EINVAL); 1568 1569 AUDIT_ARG_FD(fd); 1570 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 1571 if (error) 1572 return (error); 1573 so = fp->f_data; 1574 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1575 error = ENOTCONN; 1576 goto done; 1577 } 1578 *sa = NULL; 1579 CURVNET_SET(so->so_vnet); 1580 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa); 1581 CURVNET_RESTORE(); 1582 if (error) 1583 goto bad; 1584 if (*sa == NULL) 1585 len = 0; 1586 else 1587 len = MIN(*alen, (*sa)->sa_len); 1588 *alen = len; 1589 #ifdef KTRACE 1590 if (KTRPOINT(td, KTR_STRUCT)) 1591 ktrsockaddr(*sa); 1592 #endif 1593 bad: 1594 if (error && *sa) { 1595 free(*sa, M_SONAME); 1596 *sa = NULL; 1597 } 1598 done: 1599 fdrop(fp, td); 1600 return (error); 1601 } 1602 1603 int 1604 getpeername(td, uap) 1605 struct thread *td; 1606 struct getpeername_args *uap; 1607 { 1608 1609 return (getpeername1(td, uap, 0)); 1610 } 1611 1612 #ifdef COMPAT_OLDSOCK 1613 int 1614 ogetpeername(td, uap) 1615 struct thread *td; 1616 struct ogetpeername_args *uap; 1617 { 1618 1619 /* XXX uap should have type `getpeername_args *' to begin with. */ 1620 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1621 } 1622 #endif /* COMPAT_OLDSOCK */ 1623 1624 int 1625 sockargs(mp, buf, buflen, type) 1626 struct mbuf **mp; 1627 caddr_t buf; 1628 int buflen, type; 1629 { 1630 struct sockaddr *sa; 1631 struct mbuf *m; 1632 int error; 1633 1634 if ((u_int)buflen > MLEN) { 1635 #ifdef COMPAT_OLDSOCK 1636 if (type == MT_SONAME && (u_int)buflen <= 112) 1637 buflen = MLEN; /* unix domain compat. hack */ 1638 else 1639 #endif 1640 if ((u_int)buflen > MCLBYTES) 1641 return (EINVAL); 1642 } 1643 m = m_get(M_WAIT, type); 1644 if ((u_int)buflen > MLEN) 1645 MCLGET(m, M_WAIT); 1646 m->m_len = buflen; 1647 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1648 if (error) 1649 (void) m_free(m); 1650 else { 1651 *mp = m; 1652 if (type == MT_SONAME) { 1653 sa = mtod(m, struct sockaddr *); 1654 1655 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1656 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1657 sa->sa_family = sa->sa_len; 1658 #endif 1659 sa->sa_len = buflen; 1660 } 1661 } 1662 return (error); 1663 } 1664 1665 int 1666 getsockaddr(namp, uaddr, len) 1667 struct sockaddr **namp; 1668 caddr_t uaddr; 1669 size_t len; 1670 { 1671 struct sockaddr *sa; 1672 int error; 1673 1674 if (len > SOCK_MAXADDRLEN) 1675 return (ENAMETOOLONG); 1676 if (len < offsetof(struct sockaddr, sa_data[0])) 1677 return (EINVAL); 1678 sa = malloc(len, M_SONAME, M_WAITOK); 1679 error = copyin(uaddr, sa, len); 1680 if (error) { 1681 free(sa, M_SONAME); 1682 } else { 1683 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1684 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1685 sa->sa_family = sa->sa_len; 1686 #endif 1687 sa->sa_len = len; 1688 *namp = sa; 1689 } 1690 return (error); 1691 } 1692 1693 #include <sys/condvar.h> 1694 1695 struct sendfile_sync { 1696 struct mtx mtx; 1697 struct cv cv; 1698 unsigned count; 1699 }; 1700 1701 /* 1702 * Detach mapped page and release resources back to the system. 1703 */ 1704 void 1705 sf_buf_mext(void *addr, void *args) 1706 { 1707 vm_page_t m; 1708 struct sendfile_sync *sfs; 1709 1710 m = sf_buf_page(args); 1711 sf_buf_free(args); 1712 vm_page_lock_queues(); 1713 vm_page_unwire(m, 0); 1714 /* 1715 * Check for the object going away on us. This can 1716 * happen since we don't hold a reference to it. 1717 * If so, we're responsible for freeing the page. 1718 */ 1719 if (m->wire_count == 0 && m->object == NULL) 1720 vm_page_free(m); 1721 vm_page_unlock_queues(); 1722 if (addr == NULL) 1723 return; 1724 sfs = addr; 1725 mtx_lock(&sfs->mtx); 1726 KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0")); 1727 if (--sfs->count == 0) 1728 cv_signal(&sfs->cv); 1729 mtx_unlock(&sfs->mtx); 1730 } 1731 1732 /* 1733 * sendfile(2) 1734 * 1735 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1736 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1737 * 1738 * Send a file specified by 'fd' and starting at 'offset' to a socket 1739 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes == 1740 * 0. Optionally add a header and/or trailer to the socket output. If 1741 * specified, write the total number of bytes sent into *sbytes. 1742 */ 1743 int 1744 sendfile(struct thread *td, struct sendfile_args *uap) 1745 { 1746 1747 return (do_sendfile(td, uap, 0)); 1748 } 1749 1750 static int 1751 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1752 { 1753 struct sf_hdtr hdtr; 1754 struct uio *hdr_uio, *trl_uio; 1755 int error; 1756 1757 hdr_uio = trl_uio = NULL; 1758 1759 if (uap->hdtr != NULL) { 1760 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1761 if (error) 1762 goto out; 1763 if (hdtr.headers != NULL) { 1764 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1765 if (error) 1766 goto out; 1767 } 1768 if (hdtr.trailers != NULL) { 1769 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); 1770 if (error) 1771 goto out; 1772 1773 } 1774 } 1775 1776 error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat); 1777 out: 1778 if (hdr_uio) 1779 free(hdr_uio, M_IOV); 1780 if (trl_uio) 1781 free(trl_uio, M_IOV); 1782 return (error); 1783 } 1784 1785 #ifdef COMPAT_FREEBSD4 1786 int 1787 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1788 { 1789 struct sendfile_args args; 1790 1791 args.fd = uap->fd; 1792 args.s = uap->s; 1793 args.offset = uap->offset; 1794 args.nbytes = uap->nbytes; 1795 args.hdtr = uap->hdtr; 1796 args.sbytes = uap->sbytes; 1797 args.flags = uap->flags; 1798 1799 return (do_sendfile(td, &args, 1)); 1800 } 1801 #endif /* COMPAT_FREEBSD4 */ 1802 1803 int 1804 kern_sendfile(struct thread *td, struct sendfile_args *uap, 1805 struct uio *hdr_uio, struct uio *trl_uio, int compat) 1806 { 1807 struct file *sock_fp; 1808 struct vnode *vp; 1809 struct vm_object *obj = NULL; 1810 struct socket *so = NULL; 1811 struct mbuf *m = NULL; 1812 struct sf_buf *sf; 1813 struct vm_page *pg; 1814 off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0; 1815 int error, hdrlen = 0, mnw = 0; 1816 int vfslocked; 1817 struct sendfile_sync *sfs = NULL; 1818 1819 /* 1820 * The file descriptor must be a regular file and have a 1821 * backing VM object. 1822 * File offset must be positive. If it goes beyond EOF 1823 * we send only the header/trailer and no payload data. 1824 */ 1825 AUDIT_ARG_FD(uap->fd); 1826 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1827 goto out; 1828 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1829 vn_lock(vp, LK_SHARED | LK_RETRY); 1830 if (vp->v_type == VREG) { 1831 obj = vp->v_object; 1832 if (obj != NULL) { 1833 /* 1834 * Temporarily increase the backing VM 1835 * object's reference count so that a forced 1836 * reclamation of its vnode does not 1837 * immediately destroy it. 1838 */ 1839 VM_OBJECT_LOCK(obj); 1840 if ((obj->flags & OBJ_DEAD) == 0) { 1841 vm_object_reference_locked(obj); 1842 VM_OBJECT_UNLOCK(obj); 1843 } else { 1844 VM_OBJECT_UNLOCK(obj); 1845 obj = NULL; 1846 } 1847 } 1848 } 1849 VOP_UNLOCK(vp, 0); 1850 VFS_UNLOCK_GIANT(vfslocked); 1851 if (obj == NULL) { 1852 error = EINVAL; 1853 goto out; 1854 } 1855 if (uap->offset < 0) { 1856 error = EINVAL; 1857 goto out; 1858 } 1859 1860 /* 1861 * The socket must be a stream socket and connected. 1862 * Remember if it a blocking or non-blocking socket. 1863 */ 1864 if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp, 1865 NULL)) != 0) 1866 goto out; 1867 so = sock_fp->f_data; 1868 if (so->so_type != SOCK_STREAM) { 1869 error = EINVAL; 1870 goto out; 1871 } 1872 if ((so->so_state & SS_ISCONNECTED) == 0) { 1873 error = ENOTCONN; 1874 goto out; 1875 } 1876 /* 1877 * Do not wait on memory allocations but return ENOMEM for 1878 * caller to retry later. 1879 * XXX: Experimental. 1880 */ 1881 if (uap->flags & SF_MNOWAIT) 1882 mnw = 1; 1883 1884 if (uap->flags & SF_SYNC) { 1885 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK); 1886 memset(sfs, 0, sizeof *sfs); 1887 mtx_init(&sfs->mtx, "sendfile", MTX_DEF, 0); 1888 cv_init(&sfs->cv, "sendfile"); 1889 } 1890 1891 #ifdef MAC 1892 error = mac_socket_check_send(td->td_ucred, so); 1893 if (error) 1894 goto out; 1895 #endif 1896 1897 /* If headers are specified copy them into mbufs. */ 1898 if (hdr_uio != NULL) { 1899 hdr_uio->uio_td = td; 1900 hdr_uio->uio_rw = UIO_WRITE; 1901 if (hdr_uio->uio_resid > 0) { 1902 /* 1903 * In FBSD < 5.0 the nbytes to send also included 1904 * the header. If compat is specified subtract the 1905 * header size from nbytes. 1906 */ 1907 if (compat) { 1908 if (uap->nbytes > hdr_uio->uio_resid) 1909 uap->nbytes -= hdr_uio->uio_resid; 1910 else 1911 uap->nbytes = 0; 1912 } 1913 m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK), 1914 0, 0, 0); 1915 if (m == NULL) { 1916 error = mnw ? EAGAIN : ENOBUFS; 1917 goto out; 1918 } 1919 hdrlen = m_length(m, NULL); 1920 } 1921 } 1922 1923 /* 1924 * Protect against multiple writers to the socket. 1925 * 1926 * XXXRW: Historically this has assumed non-interruptibility, so now 1927 * we implement that, but possibly shouldn't. 1928 */ 1929 (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); 1930 1931 /* 1932 * Loop through the pages of the file, starting with the requested 1933 * offset. Get a file page (do I/O if necessary), map the file page 1934 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1935 * it on the socket. 1936 * This is done in two loops. The inner loop turns as many pages 1937 * as it can, up to available socket buffer space, without blocking 1938 * into mbufs to have it bulk delivered into the socket send buffer. 1939 * The outer loop checks the state and available space of the socket 1940 * and takes care of the overall progress. 1941 */ 1942 for (off = uap->offset, rem = uap->nbytes; ; ) { 1943 int loopbytes = 0; 1944 int space = 0; 1945 int done = 0; 1946 1947 /* 1948 * Check the socket state for ongoing connection, 1949 * no errors and space in socket buffer. 1950 * If space is low allow for the remainder of the 1951 * file to be processed if it fits the socket buffer. 1952 * Otherwise block in waiting for sufficient space 1953 * to proceed, or if the socket is nonblocking, return 1954 * to userland with EAGAIN while reporting how far 1955 * we've come. 1956 * We wait until the socket buffer has significant free 1957 * space to do bulk sends. This makes good use of file 1958 * system read ahead and allows packet segmentation 1959 * offloading hardware to take over lots of work. If 1960 * we were not careful here we would send off only one 1961 * sfbuf at a time. 1962 */ 1963 SOCKBUF_LOCK(&so->so_snd); 1964 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2) 1965 so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2; 1966 retry_space: 1967 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 1968 error = EPIPE; 1969 SOCKBUF_UNLOCK(&so->so_snd); 1970 goto done; 1971 } else if (so->so_error) { 1972 error = so->so_error; 1973 so->so_error = 0; 1974 SOCKBUF_UNLOCK(&so->so_snd); 1975 goto done; 1976 } 1977 space = sbspace(&so->so_snd); 1978 if (space < rem && 1979 (space <= 0 || 1980 space < so->so_snd.sb_lowat)) { 1981 if (so->so_state & SS_NBIO) { 1982 SOCKBUF_UNLOCK(&so->so_snd); 1983 error = EAGAIN; 1984 goto done; 1985 } 1986 /* 1987 * sbwait drops the lock while sleeping. 1988 * When we loop back to retry_space the 1989 * state may have changed and we retest 1990 * for it. 1991 */ 1992 error = sbwait(&so->so_snd); 1993 /* 1994 * An error from sbwait usually indicates that we've 1995 * been interrupted by a signal. If we've sent anything 1996 * then return bytes sent, otherwise return the error. 1997 */ 1998 if (error) { 1999 SOCKBUF_UNLOCK(&so->so_snd); 2000 goto done; 2001 } 2002 goto retry_space; 2003 } 2004 SOCKBUF_UNLOCK(&so->so_snd); 2005 2006 /* 2007 * Reduce space in the socket buffer by the size of 2008 * the header mbuf chain. 2009 * hdrlen is set to 0 after the first loop. 2010 */ 2011 space -= hdrlen; 2012 2013 /* 2014 * Loop and construct maximum sized mbuf chain to be bulk 2015 * dumped into socket buffer. 2016 */ 2017 while(space > loopbytes) { 2018 vm_pindex_t pindex; 2019 vm_offset_t pgoff; 2020 struct mbuf *m0; 2021 2022 VM_OBJECT_LOCK(obj); 2023 /* 2024 * Calculate the amount to transfer. 2025 * Not to exceed a page, the EOF, 2026 * or the passed in nbytes. 2027 */ 2028 pgoff = (vm_offset_t)(off & PAGE_MASK); 2029 xfsize = omin(PAGE_SIZE - pgoff, 2030 obj->un_pager.vnp.vnp_size - uap->offset - 2031 fsbytes - loopbytes); 2032 if (uap->nbytes) 2033 rem = (uap->nbytes - fsbytes - loopbytes); 2034 else 2035 rem = obj->un_pager.vnp.vnp_size - 2036 uap->offset - fsbytes - loopbytes; 2037 xfsize = omin(rem, xfsize); 2038 if (xfsize <= 0) { 2039 VM_OBJECT_UNLOCK(obj); 2040 done = 1; /* all data sent */ 2041 break; 2042 } 2043 /* 2044 * Don't overflow the send buffer. 2045 * Stop here and send out what we've 2046 * already got. 2047 */ 2048 if (space < loopbytes + xfsize) { 2049 VM_OBJECT_UNLOCK(obj); 2050 break; 2051 } 2052 2053 /* 2054 * Attempt to look up the page. Allocate 2055 * if not found or wait and loop if busy. 2056 */ 2057 pindex = OFF_TO_IDX(off); 2058 pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY | 2059 VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY); 2060 2061 /* 2062 * Check if page is valid for what we need, 2063 * otherwise initiate I/O. 2064 * If we already turned some pages into mbufs, 2065 * send them off before we come here again and 2066 * block. 2067 */ 2068 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) 2069 VM_OBJECT_UNLOCK(obj); 2070 else if (m != NULL) 2071 error = EAGAIN; /* send what we already got */ 2072 else if (uap->flags & SF_NODISKIO) 2073 error = EBUSY; 2074 else { 2075 int bsize, resid; 2076 2077 /* 2078 * Ensure that our page is still around 2079 * when the I/O completes. 2080 */ 2081 vm_page_io_start(pg); 2082 VM_OBJECT_UNLOCK(obj); 2083 2084 /* 2085 * Get the page from backing store. 2086 */ 2087 bsize = vp->v_mount->mnt_stat.f_iosize; 2088 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2089 vn_lock(vp, LK_SHARED | LK_RETRY); 2090 2091 /* 2092 * XXXMAC: Because we don't have fp->f_cred 2093 * here, we pass in NOCRED. This is probably 2094 * wrong, but is consistent with our original 2095 * implementation. 2096 */ 2097 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 2098 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 2099 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 2100 td->td_ucred, NOCRED, &resid, td); 2101 VOP_UNLOCK(vp, 0); 2102 VFS_UNLOCK_GIANT(vfslocked); 2103 VM_OBJECT_LOCK(obj); 2104 vm_page_io_finish(pg); 2105 if (!error) 2106 VM_OBJECT_UNLOCK(obj); 2107 mbstat.sf_iocnt++; 2108 } 2109 if (error) { 2110 vm_page_lock_queues(); 2111 vm_page_unwire(pg, 0); 2112 /* 2113 * See if anyone else might know about 2114 * this page. If not and it is not valid, 2115 * then free it. 2116 */ 2117 if (pg->wire_count == 0 && pg->valid == 0 && 2118 pg->busy == 0 && !(pg->oflags & VPO_BUSY) && 2119 pg->hold_count == 0) { 2120 vm_page_free(pg); 2121 } 2122 vm_page_unlock_queues(); 2123 VM_OBJECT_UNLOCK(obj); 2124 if (error == EAGAIN) 2125 error = 0; /* not a real error */ 2126 break; 2127 } 2128 2129 /* 2130 * Get a sendfile buf. We usually wait as long 2131 * as necessary, but this wait can be interrupted. 2132 */ 2133 if ((sf = sf_buf_alloc(pg, 2134 (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) { 2135 mbstat.sf_allocfail++; 2136 vm_page_lock_queues(); 2137 vm_page_unwire(pg, 0); 2138 /* 2139 * XXX: Not same check as above!? 2140 */ 2141 if (pg->wire_count == 0 && pg->object == NULL) 2142 vm_page_free(pg); 2143 vm_page_unlock_queues(); 2144 error = (mnw ? EAGAIN : EINTR); 2145 break; 2146 } 2147 2148 /* 2149 * Get an mbuf and set it up as having 2150 * external storage. 2151 */ 2152 m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); 2153 if (m0 == NULL) { 2154 error = (mnw ? EAGAIN : ENOBUFS); 2155 sf_buf_mext((void *)sf_buf_kva(sf), sf); 2156 break; 2157 } 2158 MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, 2159 sfs, sf, M_RDONLY, EXT_SFBUF); 2160 m0->m_data = (char *)sf_buf_kva(sf) + pgoff; 2161 m0->m_len = xfsize; 2162 2163 /* Append to mbuf chain. */ 2164 if (m != NULL) 2165 m_cat(m, m0); 2166 else 2167 m = m0; 2168 2169 /* Keep track of bits processed. */ 2170 loopbytes += xfsize; 2171 off += xfsize; 2172 2173 if (sfs != NULL) { 2174 mtx_lock(&sfs->mtx); 2175 sfs->count++; 2176 mtx_unlock(&sfs->mtx); 2177 } 2178 } 2179 2180 /* Add the buffer chain to the socket buffer. */ 2181 if (m != NULL) { 2182 int mlen, err; 2183 2184 mlen = m_length(m, NULL); 2185 SOCKBUF_LOCK(&so->so_snd); 2186 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2187 error = EPIPE; 2188 SOCKBUF_UNLOCK(&so->so_snd); 2189 goto done; 2190 } 2191 SOCKBUF_UNLOCK(&so->so_snd); 2192 CURVNET_SET(so->so_vnet); 2193 /* Avoid error aliasing. */ 2194 err = (*so->so_proto->pr_usrreqs->pru_send) 2195 (so, 0, m, NULL, NULL, td); 2196 CURVNET_RESTORE(); 2197 if (err == 0) { 2198 /* 2199 * We need two counters to get the 2200 * file offset and nbytes to send 2201 * right: 2202 * - sbytes contains the total amount 2203 * of bytes sent, including headers. 2204 * - fsbytes contains the total amount 2205 * of bytes sent from the file. 2206 */ 2207 sbytes += mlen; 2208 fsbytes += mlen; 2209 if (hdrlen) { 2210 fsbytes -= hdrlen; 2211 hdrlen = 0; 2212 } 2213 } else if (error == 0) 2214 error = err; 2215 m = NULL; /* pru_send always consumes */ 2216 } 2217 2218 /* Quit outer loop on error or when we're done. */ 2219 if (done) 2220 break; 2221 if (error) 2222 goto done; 2223 } 2224 2225 /* 2226 * Send trailers. Wimp out and use writev(2). 2227 */ 2228 if (trl_uio != NULL) { 2229 sbunlock(&so->so_snd); 2230 error = kern_writev(td, uap->s, trl_uio); 2231 if (error == 0) 2232 sbytes += td->td_retval[0]; 2233 goto out; 2234 } 2235 2236 done: 2237 sbunlock(&so->so_snd); 2238 out: 2239 /* 2240 * If there was no error we have to clear td->td_retval[0] 2241 * because it may have been set by writev. 2242 */ 2243 if (error == 0) { 2244 td->td_retval[0] = 0; 2245 } 2246 if (uap->sbytes != NULL) { 2247 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2248 } 2249 if (obj != NULL) 2250 vm_object_deallocate(obj); 2251 if (vp != NULL) { 2252 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2253 vrele(vp); 2254 VFS_UNLOCK_GIANT(vfslocked); 2255 } 2256 if (so) 2257 fdrop(sock_fp, td); 2258 if (m) 2259 m_freem(m); 2260 2261 if (sfs != NULL) { 2262 mtx_lock(&sfs->mtx); 2263 if (sfs->count != 0) 2264 cv_wait(&sfs->cv, &sfs->mtx); 2265 KASSERT(sfs->count == 0, ("sendfile sync still busy")); 2266 cv_destroy(&sfs->cv); 2267 mtx_destroy(&sfs->mtx); 2268 free(sfs, M_TEMP); 2269 } 2270 2271 if (error == ERESTART) 2272 error = EINTR; 2273 2274 return (error); 2275 } 2276 2277 /* 2278 * SCTP syscalls. 2279 * Functionality only compiled in if SCTP is defined in the kernel Makefile, 2280 * otherwise all return EOPNOTSUPP. 2281 * XXX: We should make this loadable one day. 2282 */ 2283 int 2284 sctp_peeloff(td, uap) 2285 struct thread *td; 2286 struct sctp_peeloff_args /* { 2287 int sd; 2288 caddr_t name; 2289 } */ *uap; 2290 { 2291 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2292 struct filedesc *fdp; 2293 struct file *nfp = NULL; 2294 int error; 2295 struct socket *head, *so; 2296 int fd; 2297 u_int fflag; 2298 2299 fdp = td->td_proc->p_fd; 2300 AUDIT_ARG_FD(uap->sd); 2301 error = fgetsock(td, uap->sd, &head, &fflag); 2302 if (error) 2303 goto done2; 2304 error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name); 2305 if (error) 2306 goto done2; 2307 /* 2308 * At this point we know we do have a assoc to pull 2309 * we proceed to get the fd setup. This may block 2310 * but that is ok. 2311 */ 2312 2313 error = falloc(td, &nfp, &fd); 2314 if (error) 2315 goto done; 2316 td->td_retval[0] = fd; 2317 2318 so = sonewconn(head, SS_ISCONNECTED); 2319 if (so == NULL) 2320 goto noconnection; 2321 /* 2322 * Before changing the flags on the socket, we have to bump the 2323 * reference count. Otherwise, if the protocol calls sofree(), 2324 * the socket will be released due to a zero refcount. 2325 */ 2326 SOCK_LOCK(so); 2327 soref(so); /* file descriptor reference */ 2328 SOCK_UNLOCK(so); 2329 2330 ACCEPT_LOCK(); 2331 2332 TAILQ_REMOVE(&head->so_comp, so, so_list); 2333 head->so_qlen--; 2334 so->so_state |= (head->so_state & SS_NBIO); 2335 so->so_state &= ~SS_NOFDREF; 2336 so->so_qstate &= ~SQ_COMP; 2337 so->so_head = NULL; 2338 ACCEPT_UNLOCK(); 2339 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 2340 error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name); 2341 if (error) 2342 goto noconnection; 2343 if (head->so_sigio != NULL) 2344 fsetown(fgetown(&head->so_sigio), &so->so_sigio); 2345 2346 noconnection: 2347 /* 2348 * close the new descriptor, assuming someone hasn't ripped it 2349 * out from under us. 2350 */ 2351 if (error) 2352 fdclose(fdp, nfp, fd, td); 2353 2354 /* 2355 * Release explicitly held references before returning. 2356 */ 2357 done: 2358 if (nfp != NULL) 2359 fdrop(nfp, td); 2360 fputsock(head); 2361 done2: 2362 return (error); 2363 #else /* SCTP */ 2364 return (EOPNOTSUPP); 2365 #endif /* SCTP */ 2366 } 2367 2368 int 2369 sctp_generic_sendmsg (td, uap) 2370 struct thread *td; 2371 struct sctp_generic_sendmsg_args /* { 2372 int sd, 2373 caddr_t msg, 2374 int mlen, 2375 caddr_t to, 2376 __socklen_t tolen, 2377 struct sctp_sndrcvinfo *sinfo, 2378 int flags 2379 } */ *uap; 2380 { 2381 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2382 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2383 struct socket *so; 2384 struct file *fp = NULL; 2385 int use_rcvinfo = 1; 2386 int error = 0, len; 2387 struct sockaddr *to = NULL; 2388 #ifdef KTRACE 2389 struct uio *ktruio = NULL; 2390 #endif 2391 struct uio auio; 2392 struct iovec iov[1]; 2393 2394 if (uap->sinfo) { 2395 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2396 if (error) 2397 return (error); 2398 u_sinfo = &sinfo; 2399 } 2400 if (uap->tolen) { 2401 error = getsockaddr(&to, uap->to, uap->tolen); 2402 if (error) { 2403 to = NULL; 2404 goto sctp_bad2; 2405 } 2406 } 2407 2408 AUDIT_ARG_FD(uap->sd); 2409 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); 2410 if (error) 2411 goto sctp_bad; 2412 #ifdef KTRACE 2413 if (KTRPOINT(td, KTR_STRUCT)) 2414 ktrsockaddr(to); 2415 #endif 2416 2417 iov[0].iov_base = uap->msg; 2418 iov[0].iov_len = uap->mlen; 2419 2420 so = (struct socket *)fp->f_data; 2421 #ifdef MAC 2422 error = mac_socket_check_send(td->td_ucred, so); 2423 if (error) 2424 goto sctp_bad; 2425 #endif /* MAC */ 2426 2427 auio.uio_iov = iov; 2428 auio.uio_iovcnt = 1; 2429 auio.uio_segflg = UIO_USERSPACE; 2430 auio.uio_rw = UIO_WRITE; 2431 auio.uio_td = td; 2432 auio.uio_offset = 0; /* XXX */ 2433 auio.uio_resid = 0; 2434 len = auio.uio_resid = uap->mlen; 2435 error = sctp_lower_sosend(so, to, &auio, 2436 (struct mbuf *)NULL, (struct mbuf *)NULL, 2437 uap->flags, use_rcvinfo, u_sinfo, td); 2438 if (error) { 2439 if (auio.uio_resid != len && (error == ERESTART || 2440 error == EINTR || error == EWOULDBLOCK)) 2441 error = 0; 2442 /* Generation of SIGPIPE can be controlled per socket. */ 2443 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2444 !(uap->flags & MSG_NOSIGNAL)) { 2445 PROC_LOCK(td->td_proc); 2446 psignal(td->td_proc, SIGPIPE); 2447 PROC_UNLOCK(td->td_proc); 2448 } 2449 } 2450 if (error == 0) 2451 td->td_retval[0] = len - auio.uio_resid; 2452 #ifdef KTRACE 2453 if (ktruio != NULL) { 2454 ktruio->uio_resid = td->td_retval[0]; 2455 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2456 } 2457 #endif /* KTRACE */ 2458 sctp_bad: 2459 if (fp) 2460 fdrop(fp, td); 2461 sctp_bad2: 2462 if (to) 2463 free(to, M_SONAME); 2464 return (error); 2465 #else /* SCTP */ 2466 return (EOPNOTSUPP); 2467 #endif /* SCTP */ 2468 } 2469 2470 int 2471 sctp_generic_sendmsg_iov(td, uap) 2472 struct thread *td; 2473 struct sctp_generic_sendmsg_iov_args /* { 2474 int sd, 2475 struct iovec *iov, 2476 int iovlen, 2477 caddr_t to, 2478 __socklen_t tolen, 2479 struct sctp_sndrcvinfo *sinfo, 2480 int flags 2481 } */ *uap; 2482 { 2483 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2484 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2485 struct socket *so; 2486 struct file *fp = NULL; 2487 int use_rcvinfo = 1; 2488 int error=0, len, i; 2489 struct sockaddr *to = NULL; 2490 #ifdef KTRACE 2491 struct uio *ktruio = NULL; 2492 #endif 2493 struct uio auio; 2494 struct iovec *iov, *tiov; 2495 2496 if (uap->sinfo) { 2497 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2498 if (error) 2499 return (error); 2500 u_sinfo = &sinfo; 2501 } 2502 if (uap->tolen) { 2503 error = getsockaddr(&to, uap->to, uap->tolen); 2504 if (error) { 2505 to = NULL; 2506 goto sctp_bad2; 2507 } 2508 } 2509 2510 AUDIT_ARG_FD(uap->sd); 2511 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); 2512 if (error) 2513 goto sctp_bad1; 2514 2515 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2516 if (error) 2517 goto sctp_bad1; 2518 #ifdef KTRACE 2519 if (KTRPOINT(td, KTR_STRUCT)) 2520 ktrsockaddr(to); 2521 #endif 2522 2523 so = (struct socket *)fp->f_data; 2524 #ifdef MAC 2525 error = mac_socket_check_send(td->td_ucred, so); 2526 if (error) 2527 goto sctp_bad; 2528 #endif /* MAC */ 2529 2530 auio.uio_iov = iov; 2531 auio.uio_iovcnt = uap->iovlen; 2532 auio.uio_segflg = UIO_USERSPACE; 2533 auio.uio_rw = UIO_WRITE; 2534 auio.uio_td = td; 2535 auio.uio_offset = 0; /* XXX */ 2536 auio.uio_resid = 0; 2537 tiov = iov; 2538 for (i = 0; i <uap->iovlen; i++, tiov++) { 2539 if ((auio.uio_resid += tiov->iov_len) < 0) { 2540 error = EINVAL; 2541 goto sctp_bad; 2542 } 2543 } 2544 len = auio.uio_resid; 2545 error = sctp_lower_sosend(so, to, &auio, 2546 (struct mbuf *)NULL, (struct mbuf *)NULL, 2547 uap->flags, use_rcvinfo, u_sinfo, td); 2548 if (error) { 2549 if (auio.uio_resid != len && (error == ERESTART || 2550 error == EINTR || error == EWOULDBLOCK)) 2551 error = 0; 2552 /* Generation of SIGPIPE can be controlled per socket */ 2553 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2554 !(uap->flags & MSG_NOSIGNAL)) { 2555 PROC_LOCK(td->td_proc); 2556 psignal(td->td_proc, SIGPIPE); 2557 PROC_UNLOCK(td->td_proc); 2558 } 2559 } 2560 if (error == 0) 2561 td->td_retval[0] = len - auio.uio_resid; 2562 #ifdef KTRACE 2563 if (ktruio != NULL) { 2564 ktruio->uio_resid = td->td_retval[0]; 2565 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2566 } 2567 #endif /* KTRACE */ 2568 sctp_bad: 2569 free(iov, M_IOV); 2570 sctp_bad1: 2571 if (fp) 2572 fdrop(fp, td); 2573 sctp_bad2: 2574 if (to) 2575 free(to, M_SONAME); 2576 return (error); 2577 #else /* SCTP */ 2578 return (EOPNOTSUPP); 2579 #endif /* SCTP */ 2580 } 2581 2582 int 2583 sctp_generic_recvmsg(td, uap) 2584 struct thread *td; 2585 struct sctp_generic_recvmsg_args /* { 2586 int sd, 2587 struct iovec *iov, 2588 int iovlen, 2589 struct sockaddr *from, 2590 __socklen_t *fromlenaddr, 2591 struct sctp_sndrcvinfo *sinfo, 2592 int *msg_flags 2593 } */ *uap; 2594 { 2595 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2596 u_int8_t sockbufstore[256]; 2597 struct uio auio; 2598 struct iovec *iov, *tiov; 2599 struct sctp_sndrcvinfo sinfo; 2600 struct socket *so; 2601 struct file *fp = NULL; 2602 struct sockaddr *fromsa; 2603 int fromlen; 2604 int len, i, msg_flags; 2605 int error = 0; 2606 #ifdef KTRACE 2607 struct uio *ktruio = NULL; 2608 #endif 2609 2610 AUDIT_ARG_FD(uap->sd); 2611 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); 2612 if (error) { 2613 return (error); 2614 } 2615 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2616 if (error) { 2617 goto out1; 2618 } 2619 2620 so = fp->f_data; 2621 #ifdef MAC 2622 error = mac_socket_check_receive(td->td_ucred, so); 2623 if (error) { 2624 goto out; 2625 return (error); 2626 } 2627 #endif /* MAC */ 2628 2629 if (uap->fromlenaddr) { 2630 error = copyin(uap->fromlenaddr, 2631 &fromlen, sizeof (fromlen)); 2632 if (error) { 2633 goto out; 2634 } 2635 } else { 2636 fromlen = 0; 2637 } 2638 if(uap->msg_flags) { 2639 error = copyin(uap->msg_flags, &msg_flags, sizeof (int)); 2640 if (error) { 2641 goto out; 2642 } 2643 } else { 2644 msg_flags = 0; 2645 } 2646 auio.uio_iov = iov; 2647 auio.uio_iovcnt = uap->iovlen; 2648 auio.uio_segflg = UIO_USERSPACE; 2649 auio.uio_rw = UIO_READ; 2650 auio.uio_td = td; 2651 auio.uio_offset = 0; /* XXX */ 2652 auio.uio_resid = 0; 2653 tiov = iov; 2654 for (i = 0; i <uap->iovlen; i++, tiov++) { 2655 if ((auio.uio_resid += tiov->iov_len) < 0) { 2656 error = EINVAL; 2657 goto out; 2658 } 2659 } 2660 len = auio.uio_resid; 2661 fromsa = (struct sockaddr *)sockbufstore; 2662 2663 #ifdef KTRACE 2664 if (KTRPOINT(td, KTR_GENIO)) 2665 ktruio = cloneuio(&auio); 2666 #endif /* KTRACE */ 2667 error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL, 2668 fromsa, fromlen, &msg_flags, 2669 (struct sctp_sndrcvinfo *)&sinfo, 1); 2670 if (error) { 2671 if (auio.uio_resid != (int)len && (error == ERESTART || 2672 error == EINTR || error == EWOULDBLOCK)) 2673 error = 0; 2674 } else { 2675 if (uap->sinfo) 2676 error = copyout(&sinfo, uap->sinfo, sizeof (sinfo)); 2677 } 2678 #ifdef KTRACE 2679 if (ktruio != NULL) { 2680 ktruio->uio_resid = (int)len - auio.uio_resid; 2681 ktrgenio(uap->sd, UIO_READ, ktruio, error); 2682 } 2683 #endif /* KTRACE */ 2684 if (error) 2685 goto out; 2686 td->td_retval[0] = (int)len - auio.uio_resid; 2687 2688 if (fromlen && uap->from) { 2689 len = fromlen; 2690 if (len <= 0 || fromsa == 0) 2691 len = 0; 2692 else { 2693 len = MIN(len, fromsa->sa_len); 2694 error = copyout(fromsa, uap->from, (unsigned)len); 2695 if (error) 2696 goto out; 2697 } 2698 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t)); 2699 if (error) { 2700 goto out; 2701 } 2702 } 2703 #ifdef KTRACE 2704 if (KTRPOINT(td, KTR_STRUCT)) 2705 ktrsockaddr(fromsa); 2706 #endif 2707 if (uap->msg_flags) { 2708 error = copyout(&msg_flags, uap->msg_flags, sizeof (int)); 2709 if (error) { 2710 goto out; 2711 } 2712 } 2713 out: 2714 free(iov, M_IOV); 2715 out1: 2716 if (fp) 2717 fdrop(fp, td); 2718 2719 return (error); 2720 #else /* SCTP */ 2721 return (EOPNOTSUPP); 2722 #endif /* SCTP */ 2723 } 2724