1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_inet.h" 39 #include "opt_inet6.h" 40 #include "opt_sctp.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/sysproto.h> 50 #include <sys/malloc.h> 51 #include <sys/filedesc.h> 52 #include <sys/event.h> 53 #include <sys/proc.h> 54 #include <sys/fcntl.h> 55 #include <sys/file.h> 56 #include <sys/filio.h> 57 #include <sys/jail.h> 58 #include <sys/mount.h> 59 #include <sys/mbuf.h> 60 #include <sys/protosw.h> 61 #include <sys/sf_buf.h> 62 #include <sys/socket.h> 63 #include <sys/socketvar.h> 64 #include <sys/signalvar.h> 65 #include <sys/syscallsubr.h> 66 #include <sys/sysctl.h> 67 #include <sys/uio.h> 68 #include <sys/vnode.h> 69 #ifdef KTRACE 70 #include <sys/ktrace.h> 71 #endif 72 73 #include <net/vnet.h> 74 75 #include <security/audit/audit.h> 76 #include <security/mac/mac_framework.h> 77 78 #include <vm/vm.h> 79 #include <vm/vm_object.h> 80 #include <vm/vm_page.h> 81 #include <vm/vm_pageout.h> 82 #include <vm/vm_kern.h> 83 #include <vm/vm_extern.h> 84 85 #if defined(INET) || defined(INET6) 86 #ifdef SCTP 87 #include <netinet/sctp.h> 88 #include <netinet/sctp_peeloff.h> 89 #endif /* SCTP */ 90 #endif /* INET || INET6 */ 91 92 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 93 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 94 95 static int accept1(struct thread *td, struct accept_args *uap, int compat); 96 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 97 static int getsockname1(struct thread *td, struct getsockname_args *uap, 98 int compat); 99 static int getpeername1(struct thread *td, struct getpeername_args *uap, 100 int compat); 101 102 /* 103 * NSFBUFS-related variables and associated sysctls 104 */ 105 int nsfbufs; 106 int nsfbufspeak; 107 int nsfbufsused; 108 109 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 110 "Maximum number of sendfile(2) sf_bufs available"); 111 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 112 "Number of sendfile(2) sf_bufs at peak usage"); 113 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 114 "Number of sendfile(2) sf_bufs in use"); 115 116 /* 117 * Convert a user file descriptor to a kernel file entry. A reference on the 118 * file entry is held upon returning. This is lighter weight than 119 * fgetsock(), which bumps the socket reference drops the file reference 120 * count instead, as this approach avoids several additional mutex operations 121 * associated with the additional reference count. If requested, return the 122 * open file flags. 123 */ 124 static int 125 getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp) 126 { 127 struct file *fp; 128 int error; 129 130 fp = NULL; 131 if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) { 132 error = EBADF; 133 } else if (fp->f_type != DTYPE_SOCKET) { 134 fdrop(fp, curthread); 135 fp = NULL; 136 error = ENOTSOCK; 137 } else { 138 if (fflagp != NULL) 139 *fflagp = fp->f_flag; 140 error = 0; 141 } 142 *fpp = fp; 143 return (error); 144 } 145 146 /* 147 * System call interface to the socket abstraction. 148 */ 149 #if defined(COMPAT_43) 150 #define COMPAT_OLDSOCK 151 #endif 152 153 int 154 socket(td, uap) 155 struct thread *td; 156 struct socket_args /* { 157 int domain; 158 int type; 159 int protocol; 160 } */ *uap; 161 { 162 struct filedesc *fdp; 163 struct socket *so; 164 struct file *fp; 165 int fd, error; 166 167 AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol); 168 #ifdef MAC 169 error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type, 170 uap->protocol); 171 if (error) 172 return (error); 173 #endif 174 fdp = td->td_proc->p_fd; 175 error = falloc(td, &fp, &fd); 176 if (error) 177 return (error); 178 /* An extra reference on `fp' has been held for us by falloc(). */ 179 error = socreate(uap->domain, &so, uap->type, uap->protocol, 180 td->td_ucred, td); 181 if (error) { 182 fdclose(fdp, fp, fd, td); 183 } else { 184 finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops); 185 td->td_retval[0] = fd; 186 } 187 fdrop(fp, td); 188 return (error); 189 } 190 191 /* ARGSUSED */ 192 int 193 bind(td, uap) 194 struct thread *td; 195 struct bind_args /* { 196 int s; 197 caddr_t name; 198 int namelen; 199 } */ *uap; 200 { 201 struct sockaddr *sa; 202 int error; 203 204 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 205 return (error); 206 207 error = kern_bind(td, uap->s, sa); 208 free(sa, M_SONAME); 209 return (error); 210 } 211 212 int 213 kern_bind(td, fd, sa) 214 struct thread *td; 215 int fd; 216 struct sockaddr *sa; 217 { 218 struct socket *so; 219 struct file *fp; 220 int error; 221 222 AUDIT_ARG_FD(fd); 223 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 224 if (error) 225 return (error); 226 so = fp->f_data; 227 #ifdef KTRACE 228 if (KTRPOINT(td, KTR_STRUCT)) 229 ktrsockaddr(sa); 230 #endif 231 #ifdef MAC 232 error = mac_socket_check_bind(td->td_ucred, so, sa); 233 if (error == 0) 234 #endif 235 error = sobind(so, sa, td); 236 fdrop(fp, td); 237 return (error); 238 } 239 240 /* ARGSUSED */ 241 int 242 listen(td, uap) 243 struct thread *td; 244 struct listen_args /* { 245 int s; 246 int backlog; 247 } */ *uap; 248 { 249 struct socket *so; 250 struct file *fp; 251 int error; 252 253 AUDIT_ARG_FD(uap->s); 254 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); 255 if (error == 0) { 256 so = fp->f_data; 257 #ifdef MAC 258 error = mac_socket_check_listen(td->td_ucred, so); 259 if (error == 0) { 260 #endif 261 CURVNET_SET(so->so_vnet); 262 error = solisten(so, uap->backlog, td); 263 CURVNET_RESTORE(); 264 #ifdef MAC 265 } 266 #endif 267 fdrop(fp, td); 268 } 269 return(error); 270 } 271 272 /* 273 * accept1() 274 */ 275 static int 276 accept1(td, uap, compat) 277 struct thread *td; 278 struct accept_args /* { 279 int s; 280 struct sockaddr * __restrict name; 281 socklen_t * __restrict anamelen; 282 } */ *uap; 283 int compat; 284 { 285 struct sockaddr *name; 286 socklen_t namelen; 287 struct file *fp; 288 int error; 289 290 if (uap->name == NULL) 291 return (kern_accept(td, uap->s, NULL, NULL, NULL)); 292 293 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 294 if (error) 295 return (error); 296 297 error = kern_accept(td, uap->s, &name, &namelen, &fp); 298 299 /* 300 * return a namelen of zero for older code which might 301 * ignore the return value from accept. 302 */ 303 if (error) { 304 (void) copyout(&namelen, 305 uap->anamelen, sizeof(*uap->anamelen)); 306 return (error); 307 } 308 309 if (error == 0 && name != NULL) { 310 #ifdef COMPAT_OLDSOCK 311 if (compat) 312 ((struct osockaddr *)name)->sa_family = 313 name->sa_family; 314 #endif 315 error = copyout(name, uap->name, namelen); 316 } 317 if (error == 0) 318 error = copyout(&namelen, uap->anamelen, 319 sizeof(namelen)); 320 if (error) 321 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td); 322 fdrop(fp, td); 323 free(name, M_SONAME); 324 return (error); 325 } 326 327 int 328 kern_accept(struct thread *td, int s, struct sockaddr **name, 329 socklen_t *namelen, struct file **fp) 330 { 331 struct filedesc *fdp; 332 struct file *headfp, *nfp = NULL; 333 struct sockaddr *sa = NULL; 334 int error; 335 struct socket *head, *so; 336 int fd; 337 u_int fflag; 338 pid_t pgid; 339 int tmp; 340 341 if (name) { 342 *name = NULL; 343 if (*namelen < 0) 344 return (EINVAL); 345 } 346 347 AUDIT_ARG_FD(s); 348 fdp = td->td_proc->p_fd; 349 error = getsock(fdp, s, &headfp, &fflag); 350 if (error) 351 return (error); 352 head = headfp->f_data; 353 if ((head->so_options & SO_ACCEPTCONN) == 0) { 354 error = EINVAL; 355 goto done; 356 } 357 #ifdef MAC 358 error = mac_socket_check_accept(td->td_ucred, head); 359 if (error != 0) 360 goto done; 361 #endif 362 error = falloc(td, &nfp, &fd); 363 if (error) 364 goto done; 365 ACCEPT_LOCK(); 366 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 367 ACCEPT_UNLOCK(); 368 error = EWOULDBLOCK; 369 goto noconnection; 370 } 371 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 372 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 373 head->so_error = ECONNABORTED; 374 break; 375 } 376 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 377 "accept", 0); 378 if (error) { 379 ACCEPT_UNLOCK(); 380 goto noconnection; 381 } 382 } 383 if (head->so_error) { 384 error = head->so_error; 385 head->so_error = 0; 386 ACCEPT_UNLOCK(); 387 goto noconnection; 388 } 389 so = TAILQ_FIRST(&head->so_comp); 390 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 391 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 392 393 /* 394 * Before changing the flags on the socket, we have to bump the 395 * reference count. Otherwise, if the protocol calls sofree(), 396 * the socket will be released due to a zero refcount. 397 */ 398 SOCK_LOCK(so); /* soref() and so_state update */ 399 soref(so); /* file descriptor reference */ 400 401 TAILQ_REMOVE(&head->so_comp, so, so_list); 402 head->so_qlen--; 403 so->so_state |= (head->so_state & SS_NBIO); 404 so->so_qstate &= ~SQ_COMP; 405 so->so_head = NULL; 406 407 SOCK_UNLOCK(so); 408 ACCEPT_UNLOCK(); 409 410 /* An extra reference on `nfp' has been held for us by falloc(). */ 411 td->td_retval[0] = fd; 412 413 /* connection has been removed from the listen queue */ 414 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 415 416 pgid = fgetown(&head->so_sigio); 417 if (pgid != 0) 418 fsetown(pgid, &so->so_sigio); 419 420 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 421 /* Sync socket nonblocking/async state with file flags */ 422 tmp = fflag & FNONBLOCK; 423 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 424 tmp = fflag & FASYNC; 425 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 426 sa = 0; 427 CURVNET_SET(so->so_vnet); 428 error = soaccept(so, &sa); 429 CURVNET_RESTORE(); 430 if (error) { 431 /* 432 * return a namelen of zero for older code which might 433 * ignore the return value from accept. 434 */ 435 if (name) 436 *namelen = 0; 437 goto noconnection; 438 } 439 if (sa == NULL) { 440 if (name) 441 *namelen = 0; 442 goto done; 443 } 444 if (name) { 445 /* check sa_len before it is destroyed */ 446 if (*namelen > sa->sa_len) 447 *namelen = sa->sa_len; 448 #ifdef KTRACE 449 if (KTRPOINT(td, KTR_STRUCT)) 450 ktrsockaddr(sa); 451 #endif 452 *name = sa; 453 sa = NULL; 454 } 455 noconnection: 456 if (sa) 457 free(sa, M_SONAME); 458 459 /* 460 * close the new descriptor, assuming someone hasn't ripped it 461 * out from under us. 462 */ 463 if (error) 464 fdclose(fdp, nfp, fd, td); 465 466 /* 467 * Release explicitly held references before returning. We return 468 * a reference on nfp to the caller on success if they request it. 469 */ 470 done: 471 if (fp != NULL) { 472 if (error == 0) { 473 *fp = nfp; 474 nfp = NULL; 475 } else 476 *fp = NULL; 477 } 478 if (nfp != NULL) 479 fdrop(nfp, td); 480 fdrop(headfp, td); 481 return (error); 482 } 483 484 int 485 accept(td, uap) 486 struct thread *td; 487 struct accept_args *uap; 488 { 489 490 return (accept1(td, uap, 0)); 491 } 492 493 #ifdef COMPAT_OLDSOCK 494 int 495 oaccept(td, uap) 496 struct thread *td; 497 struct accept_args *uap; 498 { 499 500 return (accept1(td, uap, 1)); 501 } 502 #endif /* COMPAT_OLDSOCK */ 503 504 /* ARGSUSED */ 505 int 506 connect(td, uap) 507 struct thread *td; 508 struct connect_args /* { 509 int s; 510 caddr_t name; 511 int namelen; 512 } */ *uap; 513 { 514 struct sockaddr *sa; 515 int error; 516 517 error = getsockaddr(&sa, uap->name, uap->namelen); 518 if (error) 519 return (error); 520 521 error = kern_connect(td, uap->s, sa); 522 free(sa, M_SONAME); 523 return (error); 524 } 525 526 527 int 528 kern_connect(td, fd, sa) 529 struct thread *td; 530 int fd; 531 struct sockaddr *sa; 532 { 533 struct socket *so; 534 struct file *fp; 535 int error; 536 int interrupted = 0; 537 538 AUDIT_ARG_FD(fd); 539 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 540 if (error) 541 return (error); 542 so = fp->f_data; 543 if (so->so_state & SS_ISCONNECTING) { 544 error = EALREADY; 545 goto done1; 546 } 547 #ifdef KTRACE 548 if (KTRPOINT(td, KTR_STRUCT)) 549 ktrsockaddr(sa); 550 #endif 551 #ifdef MAC 552 error = mac_socket_check_connect(td->td_ucred, so, sa); 553 if (error) 554 goto bad; 555 #endif 556 error = soconnect(so, sa, td); 557 if (error) 558 goto bad; 559 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 560 error = EINPROGRESS; 561 goto done1; 562 } 563 SOCK_LOCK(so); 564 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 565 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 566 "connec", 0); 567 if (error) { 568 if (error == EINTR || error == ERESTART) 569 interrupted = 1; 570 break; 571 } 572 } 573 if (error == 0) { 574 error = so->so_error; 575 so->so_error = 0; 576 } 577 SOCK_UNLOCK(so); 578 bad: 579 if (!interrupted) 580 so->so_state &= ~SS_ISCONNECTING; 581 if (error == ERESTART) 582 error = EINTR; 583 done1: 584 fdrop(fp, td); 585 return (error); 586 } 587 588 int 589 kern_socketpair(struct thread *td, int domain, int type, int protocol, 590 int *rsv) 591 { 592 struct filedesc *fdp = td->td_proc->p_fd; 593 struct file *fp1, *fp2; 594 struct socket *so1, *so2; 595 int fd, error; 596 597 AUDIT_ARG_SOCKET(domain, type, protocol); 598 #ifdef MAC 599 /* We might want to have a separate check for socket pairs. */ 600 error = mac_socket_check_create(td->td_ucred, domain, type, 601 protocol); 602 if (error) 603 return (error); 604 #endif 605 error = socreate(domain, &so1, type, protocol, td->td_ucred, td); 606 if (error) 607 return (error); 608 error = socreate(domain, &so2, type, protocol, td->td_ucred, td); 609 if (error) 610 goto free1; 611 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 612 error = falloc(td, &fp1, &fd); 613 if (error) 614 goto free2; 615 rsv[0] = fd; 616 fp1->f_data = so1; /* so1 already has ref count */ 617 error = falloc(td, &fp2, &fd); 618 if (error) 619 goto free3; 620 fp2->f_data = so2; /* so2 already has ref count */ 621 rsv[1] = fd; 622 error = soconnect2(so1, so2); 623 if (error) 624 goto free4; 625 if (type == SOCK_DGRAM) { 626 /* 627 * Datagram socket connection is asymmetric. 628 */ 629 error = soconnect2(so2, so1); 630 if (error) 631 goto free4; 632 } 633 finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops); 634 finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops); 635 fdrop(fp1, td); 636 fdrop(fp2, td); 637 return (0); 638 free4: 639 fdclose(fdp, fp2, rsv[1], td); 640 fdrop(fp2, td); 641 free3: 642 fdclose(fdp, fp1, rsv[0], td); 643 fdrop(fp1, td); 644 free2: 645 if (so2 != NULL) 646 (void)soclose(so2); 647 free1: 648 if (so1 != NULL) 649 (void)soclose(so1); 650 return (error); 651 } 652 653 int 654 socketpair(struct thread *td, struct socketpair_args *uap) 655 { 656 int error, sv[2]; 657 658 error = kern_socketpair(td, uap->domain, uap->type, 659 uap->protocol, sv); 660 if (error) 661 return (error); 662 error = copyout(sv, uap->rsv, 2 * sizeof(int)); 663 if (error) { 664 (void)kern_close(td, sv[0]); 665 (void)kern_close(td, sv[1]); 666 } 667 return (error); 668 } 669 670 static int 671 sendit(td, s, mp, flags) 672 struct thread *td; 673 int s; 674 struct msghdr *mp; 675 int flags; 676 { 677 struct mbuf *control; 678 struct sockaddr *to; 679 int error; 680 681 if (mp->msg_name != NULL) { 682 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 683 if (error) { 684 to = NULL; 685 goto bad; 686 } 687 mp->msg_name = to; 688 } else { 689 to = NULL; 690 } 691 692 if (mp->msg_control) { 693 if (mp->msg_controllen < sizeof(struct cmsghdr) 694 #ifdef COMPAT_OLDSOCK 695 && mp->msg_flags != MSG_COMPAT 696 #endif 697 ) { 698 error = EINVAL; 699 goto bad; 700 } 701 error = sockargs(&control, mp->msg_control, 702 mp->msg_controllen, MT_CONTROL); 703 if (error) 704 goto bad; 705 #ifdef COMPAT_OLDSOCK 706 if (mp->msg_flags == MSG_COMPAT) { 707 struct cmsghdr *cm; 708 709 M_PREPEND(control, sizeof(*cm), M_WAIT); 710 cm = mtod(control, struct cmsghdr *); 711 cm->cmsg_len = control->m_len; 712 cm->cmsg_level = SOL_SOCKET; 713 cm->cmsg_type = SCM_RIGHTS; 714 } 715 #endif 716 } else { 717 control = NULL; 718 } 719 720 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 721 722 bad: 723 if (to) 724 free(to, M_SONAME); 725 return (error); 726 } 727 728 int 729 kern_sendit(td, s, mp, flags, control, segflg) 730 struct thread *td; 731 int s; 732 struct msghdr *mp; 733 int flags; 734 struct mbuf *control; 735 enum uio_seg segflg; 736 { 737 struct file *fp; 738 struct uio auio; 739 struct iovec *iov; 740 struct socket *so; 741 int i; 742 int len, error; 743 #ifdef KTRACE 744 struct uio *ktruio = NULL; 745 #endif 746 747 AUDIT_ARG_FD(s); 748 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 749 if (error) 750 return (error); 751 so = (struct socket *)fp->f_data; 752 753 #ifdef MAC 754 if (mp->msg_name != NULL) { 755 error = mac_socket_check_connect(td->td_ucred, so, 756 mp->msg_name); 757 if (error) 758 goto bad; 759 } 760 error = mac_socket_check_send(td->td_ucred, so); 761 if (error) 762 goto bad; 763 #endif 764 765 auio.uio_iov = mp->msg_iov; 766 auio.uio_iovcnt = mp->msg_iovlen; 767 auio.uio_segflg = segflg; 768 auio.uio_rw = UIO_WRITE; 769 auio.uio_td = td; 770 auio.uio_offset = 0; /* XXX */ 771 auio.uio_resid = 0; 772 iov = mp->msg_iov; 773 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 774 if ((auio.uio_resid += iov->iov_len) < 0) { 775 error = EINVAL; 776 goto bad; 777 } 778 } 779 #ifdef KTRACE 780 if (KTRPOINT(td, KTR_GENIO)) 781 ktruio = cloneuio(&auio); 782 #endif 783 len = auio.uio_resid; 784 error = sosend(so, mp->msg_name, &auio, 0, control, flags, td); 785 if (error) { 786 if (auio.uio_resid != len && (error == ERESTART || 787 error == EINTR || error == EWOULDBLOCK)) 788 error = 0; 789 /* Generation of SIGPIPE can be controlled per socket */ 790 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 791 !(flags & MSG_NOSIGNAL)) { 792 PROC_LOCK(td->td_proc); 793 psignal(td->td_proc, SIGPIPE); 794 PROC_UNLOCK(td->td_proc); 795 } 796 } 797 if (error == 0) 798 td->td_retval[0] = len - auio.uio_resid; 799 #ifdef KTRACE 800 if (ktruio != NULL) { 801 ktruio->uio_resid = td->td_retval[0]; 802 ktrgenio(s, UIO_WRITE, ktruio, error); 803 } 804 #endif 805 bad: 806 fdrop(fp, td); 807 return (error); 808 } 809 810 int 811 sendto(td, uap) 812 struct thread *td; 813 struct sendto_args /* { 814 int s; 815 caddr_t buf; 816 size_t len; 817 int flags; 818 caddr_t to; 819 int tolen; 820 } */ *uap; 821 { 822 struct msghdr msg; 823 struct iovec aiov; 824 int error; 825 826 msg.msg_name = uap->to; 827 msg.msg_namelen = uap->tolen; 828 msg.msg_iov = &aiov; 829 msg.msg_iovlen = 1; 830 msg.msg_control = 0; 831 #ifdef COMPAT_OLDSOCK 832 msg.msg_flags = 0; 833 #endif 834 aiov.iov_base = uap->buf; 835 aiov.iov_len = uap->len; 836 error = sendit(td, uap->s, &msg, uap->flags); 837 return (error); 838 } 839 840 #ifdef COMPAT_OLDSOCK 841 int 842 osend(td, uap) 843 struct thread *td; 844 struct osend_args /* { 845 int s; 846 caddr_t buf; 847 int len; 848 int flags; 849 } */ *uap; 850 { 851 struct msghdr msg; 852 struct iovec aiov; 853 int error; 854 855 msg.msg_name = 0; 856 msg.msg_namelen = 0; 857 msg.msg_iov = &aiov; 858 msg.msg_iovlen = 1; 859 aiov.iov_base = uap->buf; 860 aiov.iov_len = uap->len; 861 msg.msg_control = 0; 862 msg.msg_flags = 0; 863 error = sendit(td, uap->s, &msg, uap->flags); 864 return (error); 865 } 866 867 int 868 osendmsg(td, uap) 869 struct thread *td; 870 struct osendmsg_args /* { 871 int s; 872 caddr_t msg; 873 int flags; 874 } */ *uap; 875 { 876 struct msghdr msg; 877 struct iovec *iov; 878 int error; 879 880 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 881 if (error) 882 return (error); 883 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 884 if (error) 885 return (error); 886 msg.msg_iov = iov; 887 msg.msg_flags = MSG_COMPAT; 888 error = sendit(td, uap->s, &msg, uap->flags); 889 free(iov, M_IOV); 890 return (error); 891 } 892 #endif 893 894 int 895 sendmsg(td, uap) 896 struct thread *td; 897 struct sendmsg_args /* { 898 int s; 899 caddr_t msg; 900 int flags; 901 } */ *uap; 902 { 903 struct msghdr msg; 904 struct iovec *iov; 905 int error; 906 907 error = copyin(uap->msg, &msg, sizeof (msg)); 908 if (error) 909 return (error); 910 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 911 if (error) 912 return (error); 913 msg.msg_iov = iov; 914 #ifdef COMPAT_OLDSOCK 915 msg.msg_flags = 0; 916 #endif 917 error = sendit(td, uap->s, &msg, uap->flags); 918 free(iov, M_IOV); 919 return (error); 920 } 921 922 int 923 kern_recvit(td, s, mp, fromseg, controlp) 924 struct thread *td; 925 int s; 926 struct msghdr *mp; 927 enum uio_seg fromseg; 928 struct mbuf **controlp; 929 { 930 struct uio auio; 931 struct iovec *iov; 932 int i; 933 socklen_t len; 934 int error; 935 struct mbuf *m, *control = 0; 936 caddr_t ctlbuf; 937 struct file *fp; 938 struct socket *so; 939 struct sockaddr *fromsa = 0; 940 #ifdef KTRACE 941 struct uio *ktruio = NULL; 942 #endif 943 944 if(controlp != NULL) 945 *controlp = 0; 946 947 AUDIT_ARG_FD(s); 948 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 949 if (error) 950 return (error); 951 so = fp->f_data; 952 953 #ifdef MAC 954 error = mac_socket_check_receive(td->td_ucred, so); 955 if (error) { 956 fdrop(fp, td); 957 return (error); 958 } 959 #endif 960 961 auio.uio_iov = mp->msg_iov; 962 auio.uio_iovcnt = mp->msg_iovlen; 963 auio.uio_segflg = UIO_USERSPACE; 964 auio.uio_rw = UIO_READ; 965 auio.uio_td = td; 966 auio.uio_offset = 0; /* XXX */ 967 auio.uio_resid = 0; 968 iov = mp->msg_iov; 969 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 970 if ((auio.uio_resid += iov->iov_len) < 0) { 971 fdrop(fp, td); 972 return (EINVAL); 973 } 974 } 975 #ifdef KTRACE 976 if (KTRPOINT(td, KTR_GENIO)) 977 ktruio = cloneuio(&auio); 978 #endif 979 len = auio.uio_resid; 980 CURVNET_SET(so->so_vnet); 981 error = soreceive(so, &fromsa, &auio, (struct mbuf **)0, 982 (mp->msg_control || controlp) ? &control : (struct mbuf **)0, 983 &mp->msg_flags); 984 CURVNET_RESTORE(); 985 if (error) { 986 if (auio.uio_resid != (int)len && (error == ERESTART || 987 error == EINTR || error == EWOULDBLOCK)) 988 error = 0; 989 } 990 #ifdef KTRACE 991 if (ktruio != NULL) { 992 ktruio->uio_resid = (int)len - auio.uio_resid; 993 ktrgenio(s, UIO_READ, ktruio, error); 994 } 995 #endif 996 if (error) 997 goto out; 998 td->td_retval[0] = (int)len - auio.uio_resid; 999 if (mp->msg_name) { 1000 len = mp->msg_namelen; 1001 if (len <= 0 || fromsa == 0) 1002 len = 0; 1003 else { 1004 /* save sa_len before it is destroyed by MSG_COMPAT */ 1005 len = MIN(len, fromsa->sa_len); 1006 #ifdef COMPAT_OLDSOCK 1007 if (mp->msg_flags & MSG_COMPAT) 1008 ((struct osockaddr *)fromsa)->sa_family = 1009 fromsa->sa_family; 1010 #endif 1011 if (fromseg == UIO_USERSPACE) { 1012 error = copyout(fromsa, mp->msg_name, 1013 (unsigned)len); 1014 if (error) 1015 goto out; 1016 } else 1017 bcopy(fromsa, mp->msg_name, len); 1018 } 1019 mp->msg_namelen = len; 1020 } 1021 if (mp->msg_control && controlp == NULL) { 1022 #ifdef COMPAT_OLDSOCK 1023 /* 1024 * We assume that old recvmsg calls won't receive access 1025 * rights and other control info, esp. as control info 1026 * is always optional and those options didn't exist in 4.3. 1027 * If we receive rights, trim the cmsghdr; anything else 1028 * is tossed. 1029 */ 1030 if (control && mp->msg_flags & MSG_COMPAT) { 1031 if (mtod(control, struct cmsghdr *)->cmsg_level != 1032 SOL_SOCKET || 1033 mtod(control, struct cmsghdr *)->cmsg_type != 1034 SCM_RIGHTS) { 1035 mp->msg_controllen = 0; 1036 goto out; 1037 } 1038 control->m_len -= sizeof (struct cmsghdr); 1039 control->m_data += sizeof (struct cmsghdr); 1040 } 1041 #endif 1042 len = mp->msg_controllen; 1043 m = control; 1044 mp->msg_controllen = 0; 1045 ctlbuf = mp->msg_control; 1046 1047 while (m && len > 0) { 1048 unsigned int tocopy; 1049 1050 if (len >= m->m_len) 1051 tocopy = m->m_len; 1052 else { 1053 mp->msg_flags |= MSG_CTRUNC; 1054 tocopy = len; 1055 } 1056 1057 if ((error = copyout(mtod(m, caddr_t), 1058 ctlbuf, tocopy)) != 0) 1059 goto out; 1060 1061 ctlbuf += tocopy; 1062 len -= tocopy; 1063 m = m->m_next; 1064 } 1065 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1066 } 1067 out: 1068 fdrop(fp, td); 1069 #ifdef KTRACE 1070 if (fromsa && KTRPOINT(td, KTR_STRUCT)) 1071 ktrsockaddr(fromsa); 1072 #endif 1073 if (fromsa) 1074 free(fromsa, M_SONAME); 1075 1076 if (error == 0 && controlp != NULL) 1077 *controlp = control; 1078 else if (control) 1079 m_freem(control); 1080 1081 return (error); 1082 } 1083 1084 static int 1085 recvit(td, s, mp, namelenp) 1086 struct thread *td; 1087 int s; 1088 struct msghdr *mp; 1089 void *namelenp; 1090 { 1091 int error; 1092 1093 error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL); 1094 if (error) 1095 return (error); 1096 if (namelenp) { 1097 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t)); 1098 #ifdef COMPAT_OLDSOCK 1099 if (mp->msg_flags & MSG_COMPAT) 1100 error = 0; /* old recvfrom didn't check */ 1101 #endif 1102 } 1103 return (error); 1104 } 1105 1106 int 1107 recvfrom(td, uap) 1108 struct thread *td; 1109 struct recvfrom_args /* { 1110 int s; 1111 caddr_t buf; 1112 size_t len; 1113 int flags; 1114 struct sockaddr * __restrict from; 1115 socklen_t * __restrict fromlenaddr; 1116 } */ *uap; 1117 { 1118 struct msghdr msg; 1119 struct iovec aiov; 1120 int error; 1121 1122 if (uap->fromlenaddr) { 1123 error = copyin(uap->fromlenaddr, 1124 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1125 if (error) 1126 goto done2; 1127 } else { 1128 msg.msg_namelen = 0; 1129 } 1130 msg.msg_name = uap->from; 1131 msg.msg_iov = &aiov; 1132 msg.msg_iovlen = 1; 1133 aiov.iov_base = uap->buf; 1134 aiov.iov_len = uap->len; 1135 msg.msg_control = 0; 1136 msg.msg_flags = uap->flags; 1137 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1138 done2: 1139 return(error); 1140 } 1141 1142 #ifdef COMPAT_OLDSOCK 1143 int 1144 orecvfrom(td, uap) 1145 struct thread *td; 1146 struct recvfrom_args *uap; 1147 { 1148 1149 uap->flags |= MSG_COMPAT; 1150 return (recvfrom(td, uap)); 1151 } 1152 #endif 1153 1154 #ifdef COMPAT_OLDSOCK 1155 int 1156 orecv(td, uap) 1157 struct thread *td; 1158 struct orecv_args /* { 1159 int s; 1160 caddr_t buf; 1161 int len; 1162 int flags; 1163 } */ *uap; 1164 { 1165 struct msghdr msg; 1166 struct iovec aiov; 1167 int error; 1168 1169 msg.msg_name = 0; 1170 msg.msg_namelen = 0; 1171 msg.msg_iov = &aiov; 1172 msg.msg_iovlen = 1; 1173 aiov.iov_base = uap->buf; 1174 aiov.iov_len = uap->len; 1175 msg.msg_control = 0; 1176 msg.msg_flags = uap->flags; 1177 error = recvit(td, uap->s, &msg, NULL); 1178 return (error); 1179 } 1180 1181 /* 1182 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1183 * overlays the new one, missing only the flags, and with the (old) access 1184 * rights where the control fields are now. 1185 */ 1186 int 1187 orecvmsg(td, uap) 1188 struct thread *td; 1189 struct orecvmsg_args /* { 1190 int s; 1191 struct omsghdr *msg; 1192 int flags; 1193 } */ *uap; 1194 { 1195 struct msghdr msg; 1196 struct iovec *iov; 1197 int error; 1198 1199 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1200 if (error) 1201 return (error); 1202 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1203 if (error) 1204 return (error); 1205 msg.msg_flags = uap->flags | MSG_COMPAT; 1206 msg.msg_iov = iov; 1207 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1208 if (msg.msg_controllen && error == 0) 1209 error = copyout(&msg.msg_controllen, 1210 &uap->msg->msg_accrightslen, sizeof (int)); 1211 free(iov, M_IOV); 1212 return (error); 1213 } 1214 #endif 1215 1216 int 1217 recvmsg(td, uap) 1218 struct thread *td; 1219 struct recvmsg_args /* { 1220 int s; 1221 struct msghdr *msg; 1222 int flags; 1223 } */ *uap; 1224 { 1225 struct msghdr msg; 1226 struct iovec *uiov, *iov; 1227 int error; 1228 1229 error = copyin(uap->msg, &msg, sizeof (msg)); 1230 if (error) 1231 return (error); 1232 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1233 if (error) 1234 return (error); 1235 msg.msg_flags = uap->flags; 1236 #ifdef COMPAT_OLDSOCK 1237 msg.msg_flags &= ~MSG_COMPAT; 1238 #endif 1239 uiov = msg.msg_iov; 1240 msg.msg_iov = iov; 1241 error = recvit(td, uap->s, &msg, NULL); 1242 if (error == 0) { 1243 msg.msg_iov = uiov; 1244 error = copyout(&msg, uap->msg, sizeof(msg)); 1245 } 1246 free(iov, M_IOV); 1247 return (error); 1248 } 1249 1250 /* ARGSUSED */ 1251 int 1252 shutdown(td, uap) 1253 struct thread *td; 1254 struct shutdown_args /* { 1255 int s; 1256 int how; 1257 } */ *uap; 1258 { 1259 struct socket *so; 1260 struct file *fp; 1261 int error; 1262 1263 AUDIT_ARG_FD(uap->s); 1264 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); 1265 if (error == 0) { 1266 so = fp->f_data; 1267 error = soshutdown(so, uap->how); 1268 fdrop(fp, td); 1269 } 1270 return (error); 1271 } 1272 1273 /* ARGSUSED */ 1274 int 1275 setsockopt(td, uap) 1276 struct thread *td; 1277 struct setsockopt_args /* { 1278 int s; 1279 int level; 1280 int name; 1281 caddr_t val; 1282 int valsize; 1283 } */ *uap; 1284 { 1285 1286 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1287 uap->val, UIO_USERSPACE, uap->valsize)); 1288 } 1289 1290 int 1291 kern_setsockopt(td, s, level, name, val, valseg, valsize) 1292 struct thread *td; 1293 int s; 1294 int level; 1295 int name; 1296 void *val; 1297 enum uio_seg valseg; 1298 socklen_t valsize; 1299 { 1300 int error; 1301 struct socket *so; 1302 struct file *fp; 1303 struct sockopt sopt; 1304 1305 if (val == NULL && valsize != 0) 1306 return (EFAULT); 1307 if ((int)valsize < 0) 1308 return (EINVAL); 1309 1310 sopt.sopt_dir = SOPT_SET; 1311 sopt.sopt_level = level; 1312 sopt.sopt_name = name; 1313 sopt.sopt_val = val; 1314 sopt.sopt_valsize = valsize; 1315 switch (valseg) { 1316 case UIO_USERSPACE: 1317 sopt.sopt_td = td; 1318 break; 1319 case UIO_SYSSPACE: 1320 sopt.sopt_td = NULL; 1321 break; 1322 default: 1323 panic("kern_setsockopt called with bad valseg"); 1324 } 1325 1326 AUDIT_ARG_FD(s); 1327 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 1328 if (error == 0) { 1329 so = fp->f_data; 1330 CURVNET_SET(so->so_vnet); 1331 error = sosetopt(so, &sopt); 1332 CURVNET_RESTORE(); 1333 fdrop(fp, td); 1334 } 1335 return(error); 1336 } 1337 1338 /* ARGSUSED */ 1339 int 1340 getsockopt(td, uap) 1341 struct thread *td; 1342 struct getsockopt_args /* { 1343 int s; 1344 int level; 1345 int name; 1346 void * __restrict val; 1347 socklen_t * __restrict avalsize; 1348 } */ *uap; 1349 { 1350 socklen_t valsize; 1351 int error; 1352 1353 if (uap->val) { 1354 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1355 if (error) 1356 return (error); 1357 } 1358 1359 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1360 uap->val, UIO_USERSPACE, &valsize); 1361 1362 if (error == 0) 1363 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1364 return (error); 1365 } 1366 1367 /* 1368 * Kernel version of getsockopt. 1369 * optval can be a userland or userspace. optlen is always a kernel pointer. 1370 */ 1371 int 1372 kern_getsockopt(td, s, level, name, val, valseg, valsize) 1373 struct thread *td; 1374 int s; 1375 int level; 1376 int name; 1377 void *val; 1378 enum uio_seg valseg; 1379 socklen_t *valsize; 1380 { 1381 int error; 1382 struct socket *so; 1383 struct file *fp; 1384 struct sockopt sopt; 1385 1386 if (val == NULL) 1387 *valsize = 0; 1388 if ((int)*valsize < 0) 1389 return (EINVAL); 1390 1391 sopt.sopt_dir = SOPT_GET; 1392 sopt.sopt_level = level; 1393 sopt.sopt_name = name; 1394 sopt.sopt_val = val; 1395 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1396 switch (valseg) { 1397 case UIO_USERSPACE: 1398 sopt.sopt_td = td; 1399 break; 1400 case UIO_SYSSPACE: 1401 sopt.sopt_td = NULL; 1402 break; 1403 default: 1404 panic("kern_getsockopt called with bad valseg"); 1405 } 1406 1407 AUDIT_ARG_FD(s); 1408 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 1409 if (error == 0) { 1410 so = fp->f_data; 1411 CURVNET_SET(so->so_vnet); 1412 error = sogetopt(so, &sopt); 1413 CURVNET_RESTORE(); 1414 *valsize = sopt.sopt_valsize; 1415 fdrop(fp, td); 1416 } 1417 return (error); 1418 } 1419 1420 /* 1421 * getsockname1() - Get socket name. 1422 */ 1423 /* ARGSUSED */ 1424 static int 1425 getsockname1(td, uap, compat) 1426 struct thread *td; 1427 struct getsockname_args /* { 1428 int fdes; 1429 struct sockaddr * __restrict asa; 1430 socklen_t * __restrict alen; 1431 } */ *uap; 1432 int compat; 1433 { 1434 struct sockaddr *sa; 1435 socklen_t len; 1436 int error; 1437 1438 error = copyin(uap->alen, &len, sizeof(len)); 1439 if (error) 1440 return (error); 1441 1442 error = kern_getsockname(td, uap->fdes, &sa, &len); 1443 if (error) 1444 return (error); 1445 1446 if (len != 0) { 1447 #ifdef COMPAT_OLDSOCK 1448 if (compat) 1449 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1450 #endif 1451 error = copyout(sa, uap->asa, (u_int)len); 1452 } 1453 free(sa, M_SONAME); 1454 if (error == 0) 1455 error = copyout(&len, uap->alen, sizeof(len)); 1456 return (error); 1457 } 1458 1459 int 1460 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, 1461 socklen_t *alen) 1462 { 1463 struct socket *so; 1464 struct file *fp; 1465 socklen_t len; 1466 int error; 1467 1468 if (*alen < 0) 1469 return (EINVAL); 1470 1471 AUDIT_ARG_FD(fd); 1472 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 1473 if (error) 1474 return (error); 1475 so = fp->f_data; 1476 *sa = NULL; 1477 CURVNET_SET(so->so_vnet); 1478 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa); 1479 CURVNET_RESTORE(); 1480 if (error) 1481 goto bad; 1482 if (*sa == NULL) 1483 len = 0; 1484 else 1485 len = MIN(*alen, (*sa)->sa_len); 1486 *alen = len; 1487 #ifdef KTRACE 1488 if (KTRPOINT(td, KTR_STRUCT)) 1489 ktrsockaddr(*sa); 1490 #endif 1491 bad: 1492 fdrop(fp, td); 1493 if (error && *sa) { 1494 free(*sa, M_SONAME); 1495 *sa = NULL; 1496 } 1497 return (error); 1498 } 1499 1500 int 1501 getsockname(td, uap) 1502 struct thread *td; 1503 struct getsockname_args *uap; 1504 { 1505 1506 return (getsockname1(td, uap, 0)); 1507 } 1508 1509 #ifdef COMPAT_OLDSOCK 1510 int 1511 ogetsockname(td, uap) 1512 struct thread *td; 1513 struct getsockname_args *uap; 1514 { 1515 1516 return (getsockname1(td, uap, 1)); 1517 } 1518 #endif /* COMPAT_OLDSOCK */ 1519 1520 /* 1521 * getpeername1() - Get name of peer for connected socket. 1522 */ 1523 /* ARGSUSED */ 1524 static int 1525 getpeername1(td, uap, compat) 1526 struct thread *td; 1527 struct getpeername_args /* { 1528 int fdes; 1529 struct sockaddr * __restrict asa; 1530 socklen_t * __restrict alen; 1531 } */ *uap; 1532 int compat; 1533 { 1534 struct sockaddr *sa; 1535 socklen_t len; 1536 int error; 1537 1538 error = copyin(uap->alen, &len, sizeof (len)); 1539 if (error) 1540 return (error); 1541 1542 error = kern_getpeername(td, uap->fdes, &sa, &len); 1543 if (error) 1544 return (error); 1545 1546 if (len != 0) { 1547 #ifdef COMPAT_OLDSOCK 1548 if (compat) 1549 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1550 #endif 1551 error = copyout(sa, uap->asa, (u_int)len); 1552 } 1553 free(sa, M_SONAME); 1554 if (error == 0) 1555 error = copyout(&len, uap->alen, sizeof(len)); 1556 return (error); 1557 } 1558 1559 int 1560 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, 1561 socklen_t *alen) 1562 { 1563 struct socket *so; 1564 struct file *fp; 1565 socklen_t len; 1566 int error; 1567 1568 if (*alen < 0) 1569 return (EINVAL); 1570 1571 AUDIT_ARG_FD(fd); 1572 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 1573 if (error) 1574 return (error); 1575 so = fp->f_data; 1576 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1577 error = ENOTCONN; 1578 goto done; 1579 } 1580 *sa = NULL; 1581 CURVNET_SET(so->so_vnet); 1582 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa); 1583 CURVNET_RESTORE(); 1584 if (error) 1585 goto bad; 1586 if (*sa == NULL) 1587 len = 0; 1588 else 1589 len = MIN(*alen, (*sa)->sa_len); 1590 *alen = len; 1591 #ifdef KTRACE 1592 if (KTRPOINT(td, KTR_STRUCT)) 1593 ktrsockaddr(*sa); 1594 #endif 1595 bad: 1596 if (error && *sa) { 1597 free(*sa, M_SONAME); 1598 *sa = NULL; 1599 } 1600 done: 1601 fdrop(fp, td); 1602 return (error); 1603 } 1604 1605 int 1606 getpeername(td, uap) 1607 struct thread *td; 1608 struct getpeername_args *uap; 1609 { 1610 1611 return (getpeername1(td, uap, 0)); 1612 } 1613 1614 #ifdef COMPAT_OLDSOCK 1615 int 1616 ogetpeername(td, uap) 1617 struct thread *td; 1618 struct ogetpeername_args *uap; 1619 { 1620 1621 /* XXX uap should have type `getpeername_args *' to begin with. */ 1622 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1623 } 1624 #endif /* COMPAT_OLDSOCK */ 1625 1626 int 1627 sockargs(mp, buf, buflen, type) 1628 struct mbuf **mp; 1629 caddr_t buf; 1630 int buflen, type; 1631 { 1632 struct sockaddr *sa; 1633 struct mbuf *m; 1634 int error; 1635 1636 if ((u_int)buflen > MLEN) { 1637 #ifdef COMPAT_OLDSOCK 1638 if (type == MT_SONAME && (u_int)buflen <= 112) 1639 buflen = MLEN; /* unix domain compat. hack */ 1640 else 1641 #endif 1642 if ((u_int)buflen > MCLBYTES) 1643 return (EINVAL); 1644 } 1645 m = m_get(M_WAIT, type); 1646 if ((u_int)buflen > MLEN) 1647 MCLGET(m, M_WAIT); 1648 m->m_len = buflen; 1649 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1650 if (error) 1651 (void) m_free(m); 1652 else { 1653 *mp = m; 1654 if (type == MT_SONAME) { 1655 sa = mtod(m, struct sockaddr *); 1656 1657 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1658 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1659 sa->sa_family = sa->sa_len; 1660 #endif 1661 sa->sa_len = buflen; 1662 } 1663 } 1664 return (error); 1665 } 1666 1667 int 1668 getsockaddr(namp, uaddr, len) 1669 struct sockaddr **namp; 1670 caddr_t uaddr; 1671 size_t len; 1672 { 1673 struct sockaddr *sa; 1674 int error; 1675 1676 if (len > SOCK_MAXADDRLEN) 1677 return (ENAMETOOLONG); 1678 if (len < offsetof(struct sockaddr, sa_data[0])) 1679 return (EINVAL); 1680 sa = malloc(len, M_SONAME, M_WAITOK); 1681 error = copyin(uaddr, sa, len); 1682 if (error) { 1683 free(sa, M_SONAME); 1684 } else { 1685 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1686 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1687 sa->sa_family = sa->sa_len; 1688 #endif 1689 sa->sa_len = len; 1690 *namp = sa; 1691 } 1692 return (error); 1693 } 1694 1695 #include <sys/condvar.h> 1696 1697 struct sendfile_sync { 1698 struct mtx mtx; 1699 struct cv cv; 1700 unsigned count; 1701 }; 1702 1703 /* 1704 * Detach mapped page and release resources back to the system. 1705 */ 1706 void 1707 sf_buf_mext(void *addr, void *args) 1708 { 1709 vm_page_t m; 1710 struct sendfile_sync *sfs; 1711 1712 m = sf_buf_page(args); 1713 sf_buf_free(args); 1714 vm_page_lock_queues(); 1715 vm_page_unwire(m, 0); 1716 /* 1717 * Check for the object going away on us. This can 1718 * happen since we don't hold a reference to it. 1719 * If so, we're responsible for freeing the page. 1720 */ 1721 if (m->wire_count == 0 && m->object == NULL) 1722 vm_page_free(m); 1723 vm_page_unlock_queues(); 1724 if (addr == NULL) 1725 return; 1726 sfs = addr; 1727 mtx_lock(&sfs->mtx); 1728 KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0")); 1729 if (--sfs->count == 0) 1730 cv_signal(&sfs->cv); 1731 mtx_unlock(&sfs->mtx); 1732 } 1733 1734 /* 1735 * sendfile(2) 1736 * 1737 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1738 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1739 * 1740 * Send a file specified by 'fd' and starting at 'offset' to a socket 1741 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes == 1742 * 0. Optionally add a header and/or trailer to the socket output. If 1743 * specified, write the total number of bytes sent into *sbytes. 1744 */ 1745 int 1746 sendfile(struct thread *td, struct sendfile_args *uap) 1747 { 1748 1749 return (do_sendfile(td, uap, 0)); 1750 } 1751 1752 static int 1753 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1754 { 1755 struct sf_hdtr hdtr; 1756 struct uio *hdr_uio, *trl_uio; 1757 int error; 1758 1759 hdr_uio = trl_uio = NULL; 1760 1761 if (uap->hdtr != NULL) { 1762 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1763 if (error) 1764 goto out; 1765 if (hdtr.headers != NULL) { 1766 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1767 if (error) 1768 goto out; 1769 } 1770 if (hdtr.trailers != NULL) { 1771 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); 1772 if (error) 1773 goto out; 1774 1775 } 1776 } 1777 1778 error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat); 1779 out: 1780 if (hdr_uio) 1781 free(hdr_uio, M_IOV); 1782 if (trl_uio) 1783 free(trl_uio, M_IOV); 1784 return (error); 1785 } 1786 1787 #ifdef COMPAT_FREEBSD4 1788 int 1789 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1790 { 1791 struct sendfile_args args; 1792 1793 args.fd = uap->fd; 1794 args.s = uap->s; 1795 args.offset = uap->offset; 1796 args.nbytes = uap->nbytes; 1797 args.hdtr = uap->hdtr; 1798 args.sbytes = uap->sbytes; 1799 args.flags = uap->flags; 1800 1801 return (do_sendfile(td, &args, 1)); 1802 } 1803 #endif /* COMPAT_FREEBSD4 */ 1804 1805 int 1806 kern_sendfile(struct thread *td, struct sendfile_args *uap, 1807 struct uio *hdr_uio, struct uio *trl_uio, int compat) 1808 { 1809 struct file *sock_fp; 1810 struct vnode *vp; 1811 struct vm_object *obj = NULL; 1812 struct socket *so = NULL; 1813 struct mbuf *m = NULL; 1814 struct sf_buf *sf; 1815 struct vm_page *pg; 1816 off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0; 1817 int error, hdrlen = 0, mnw = 0; 1818 int vfslocked; 1819 struct sendfile_sync *sfs = NULL; 1820 1821 /* 1822 * The file descriptor must be a regular file and have a 1823 * backing VM object. 1824 * File offset must be positive. If it goes beyond EOF 1825 * we send only the header/trailer and no payload data. 1826 */ 1827 AUDIT_ARG_FD(uap->fd); 1828 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1829 goto out; 1830 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1831 vn_lock(vp, LK_SHARED | LK_RETRY); 1832 if (vp->v_type == VREG) { 1833 obj = vp->v_object; 1834 if (obj != NULL) { 1835 /* 1836 * Temporarily increase the backing VM 1837 * object's reference count so that a forced 1838 * reclamation of its vnode does not 1839 * immediately destroy it. 1840 */ 1841 VM_OBJECT_LOCK(obj); 1842 if ((obj->flags & OBJ_DEAD) == 0) { 1843 vm_object_reference_locked(obj); 1844 VM_OBJECT_UNLOCK(obj); 1845 } else { 1846 VM_OBJECT_UNLOCK(obj); 1847 obj = NULL; 1848 } 1849 } 1850 } 1851 VOP_UNLOCK(vp, 0); 1852 VFS_UNLOCK_GIANT(vfslocked); 1853 if (obj == NULL) { 1854 error = EINVAL; 1855 goto out; 1856 } 1857 if (uap->offset < 0) { 1858 error = EINVAL; 1859 goto out; 1860 } 1861 1862 /* 1863 * The socket must be a stream socket and connected. 1864 * Remember if it a blocking or non-blocking socket. 1865 */ 1866 if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp, 1867 NULL)) != 0) 1868 goto out; 1869 so = sock_fp->f_data; 1870 if (so->so_type != SOCK_STREAM) { 1871 error = EINVAL; 1872 goto out; 1873 } 1874 if ((so->so_state & SS_ISCONNECTED) == 0) { 1875 error = ENOTCONN; 1876 goto out; 1877 } 1878 /* 1879 * Do not wait on memory allocations but return ENOMEM for 1880 * caller to retry later. 1881 * XXX: Experimental. 1882 */ 1883 if (uap->flags & SF_MNOWAIT) 1884 mnw = 1; 1885 1886 if (uap->flags & SF_SYNC) { 1887 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK); 1888 memset(sfs, 0, sizeof *sfs); 1889 mtx_init(&sfs->mtx, "sendfile", MTX_DEF, 0); 1890 cv_init(&sfs->cv, "sendfile"); 1891 } 1892 1893 #ifdef MAC 1894 error = mac_socket_check_send(td->td_ucred, so); 1895 if (error) 1896 goto out; 1897 #endif 1898 1899 /* If headers are specified copy them into mbufs. */ 1900 if (hdr_uio != NULL) { 1901 hdr_uio->uio_td = td; 1902 hdr_uio->uio_rw = UIO_WRITE; 1903 if (hdr_uio->uio_resid > 0) { 1904 /* 1905 * In FBSD < 5.0 the nbytes to send also included 1906 * the header. If compat is specified subtract the 1907 * header size from nbytes. 1908 */ 1909 if (compat) { 1910 if (uap->nbytes > hdr_uio->uio_resid) 1911 uap->nbytes -= hdr_uio->uio_resid; 1912 else 1913 uap->nbytes = 0; 1914 } 1915 m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK), 1916 0, 0, 0); 1917 if (m == NULL) { 1918 error = mnw ? EAGAIN : ENOBUFS; 1919 goto out; 1920 } 1921 hdrlen = m_length(m, NULL); 1922 } 1923 } 1924 1925 /* 1926 * Protect against multiple writers to the socket. 1927 * 1928 * XXXRW: Historically this has assumed non-interruptibility, so now 1929 * we implement that, but possibly shouldn't. 1930 */ 1931 (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); 1932 1933 /* 1934 * Loop through the pages of the file, starting with the requested 1935 * offset. Get a file page (do I/O if necessary), map the file page 1936 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1937 * it on the socket. 1938 * This is done in two loops. The inner loop turns as many pages 1939 * as it can, up to available socket buffer space, without blocking 1940 * into mbufs to have it bulk delivered into the socket send buffer. 1941 * The outer loop checks the state and available space of the socket 1942 * and takes care of the overall progress. 1943 */ 1944 for (off = uap->offset, rem = uap->nbytes; ; ) { 1945 int loopbytes = 0; 1946 int space = 0; 1947 int done = 0; 1948 1949 /* 1950 * Check the socket state for ongoing connection, 1951 * no errors and space in socket buffer. 1952 * If space is low allow for the remainder of the 1953 * file to be processed if it fits the socket buffer. 1954 * Otherwise block in waiting for sufficient space 1955 * to proceed, or if the socket is nonblocking, return 1956 * to userland with EAGAIN while reporting how far 1957 * we've come. 1958 * We wait until the socket buffer has significant free 1959 * space to do bulk sends. This makes good use of file 1960 * system read ahead and allows packet segmentation 1961 * offloading hardware to take over lots of work. If 1962 * we were not careful here we would send off only one 1963 * sfbuf at a time. 1964 */ 1965 SOCKBUF_LOCK(&so->so_snd); 1966 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2) 1967 so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2; 1968 retry_space: 1969 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 1970 error = EPIPE; 1971 SOCKBUF_UNLOCK(&so->so_snd); 1972 goto done; 1973 } else if (so->so_error) { 1974 error = so->so_error; 1975 so->so_error = 0; 1976 SOCKBUF_UNLOCK(&so->so_snd); 1977 goto done; 1978 } 1979 space = sbspace(&so->so_snd); 1980 if (space < rem && 1981 (space <= 0 || 1982 space < so->so_snd.sb_lowat)) { 1983 if (so->so_state & SS_NBIO) { 1984 SOCKBUF_UNLOCK(&so->so_snd); 1985 error = EAGAIN; 1986 goto done; 1987 } 1988 /* 1989 * sbwait drops the lock while sleeping. 1990 * When we loop back to retry_space the 1991 * state may have changed and we retest 1992 * for it. 1993 */ 1994 error = sbwait(&so->so_snd); 1995 /* 1996 * An error from sbwait usually indicates that we've 1997 * been interrupted by a signal. If we've sent anything 1998 * then return bytes sent, otherwise return the error. 1999 */ 2000 if (error) { 2001 SOCKBUF_UNLOCK(&so->so_snd); 2002 goto done; 2003 } 2004 goto retry_space; 2005 } 2006 SOCKBUF_UNLOCK(&so->so_snd); 2007 2008 /* 2009 * Reduce space in the socket buffer by the size of 2010 * the header mbuf chain. 2011 * hdrlen is set to 0 after the first loop. 2012 */ 2013 space -= hdrlen; 2014 2015 /* 2016 * Loop and construct maximum sized mbuf chain to be bulk 2017 * dumped into socket buffer. 2018 */ 2019 while (space > loopbytes) { 2020 vm_pindex_t pindex; 2021 vm_offset_t pgoff; 2022 struct mbuf *m0; 2023 2024 VM_OBJECT_LOCK(obj); 2025 /* 2026 * Calculate the amount to transfer. 2027 * Not to exceed a page, the EOF, 2028 * or the passed in nbytes. 2029 */ 2030 pgoff = (vm_offset_t)(off & PAGE_MASK); 2031 xfsize = omin(PAGE_SIZE - pgoff, 2032 obj->un_pager.vnp.vnp_size - uap->offset - 2033 fsbytes - loopbytes); 2034 if (uap->nbytes) 2035 rem = (uap->nbytes - fsbytes - loopbytes); 2036 else 2037 rem = obj->un_pager.vnp.vnp_size - 2038 uap->offset - fsbytes - loopbytes; 2039 xfsize = omin(rem, xfsize); 2040 xfsize = omin(space - loopbytes, xfsize); 2041 if (xfsize <= 0) { 2042 VM_OBJECT_UNLOCK(obj); 2043 done = 1; /* all data sent */ 2044 break; 2045 } 2046 2047 /* 2048 * Attempt to look up the page. Allocate 2049 * if not found or wait and loop if busy. 2050 */ 2051 pindex = OFF_TO_IDX(off); 2052 pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY | 2053 VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY); 2054 2055 /* 2056 * Check if page is valid for what we need, 2057 * otherwise initiate I/O. 2058 * If we already turned some pages into mbufs, 2059 * send them off before we come here again and 2060 * block. 2061 */ 2062 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) 2063 VM_OBJECT_UNLOCK(obj); 2064 else if (m != NULL) 2065 error = EAGAIN; /* send what we already got */ 2066 else if (uap->flags & SF_NODISKIO) 2067 error = EBUSY; 2068 else { 2069 int bsize, resid; 2070 2071 /* 2072 * Ensure that our page is still around 2073 * when the I/O completes. 2074 */ 2075 vm_page_io_start(pg); 2076 VM_OBJECT_UNLOCK(obj); 2077 2078 /* 2079 * Get the page from backing store. 2080 */ 2081 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2082 error = vn_lock(vp, LK_SHARED); 2083 if (error != 0) 2084 goto after_read; 2085 bsize = vp->v_mount->mnt_stat.f_iosize; 2086 2087 /* 2088 * XXXMAC: Because we don't have fp->f_cred 2089 * here, we pass in NOCRED. This is probably 2090 * wrong, but is consistent with our original 2091 * implementation. 2092 */ 2093 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 2094 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 2095 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 2096 td->td_ucred, NOCRED, &resid, td); 2097 VOP_UNLOCK(vp, 0); 2098 after_read: 2099 VFS_UNLOCK_GIANT(vfslocked); 2100 VM_OBJECT_LOCK(obj); 2101 vm_page_io_finish(pg); 2102 if (!error) 2103 VM_OBJECT_UNLOCK(obj); 2104 mbstat.sf_iocnt++; 2105 } 2106 if (error) { 2107 vm_page_lock_queues(); 2108 vm_page_unwire(pg, 0); 2109 /* 2110 * See if anyone else might know about 2111 * this page. If not and it is not valid, 2112 * then free it. 2113 */ 2114 if (pg->wire_count == 0 && pg->valid == 0 && 2115 pg->busy == 0 && !(pg->oflags & VPO_BUSY) && 2116 pg->hold_count == 0) { 2117 vm_page_free(pg); 2118 } 2119 vm_page_unlock_queues(); 2120 VM_OBJECT_UNLOCK(obj); 2121 if (error == EAGAIN) 2122 error = 0; /* not a real error */ 2123 break; 2124 } 2125 2126 /* 2127 * Get a sendfile buf. We usually wait as long 2128 * as necessary, but this wait can be interrupted. 2129 */ 2130 if ((sf = sf_buf_alloc(pg, 2131 (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) { 2132 mbstat.sf_allocfail++; 2133 vm_page_lock_queues(); 2134 vm_page_unwire(pg, 0); 2135 /* 2136 * XXX: Not same check as above!? 2137 */ 2138 if (pg->wire_count == 0 && pg->object == NULL) 2139 vm_page_free(pg); 2140 vm_page_unlock_queues(); 2141 error = (mnw ? EAGAIN : EINTR); 2142 break; 2143 } 2144 2145 /* 2146 * Get an mbuf and set it up as having 2147 * external storage. 2148 */ 2149 m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); 2150 if (m0 == NULL) { 2151 error = (mnw ? EAGAIN : ENOBUFS); 2152 sf_buf_mext((void *)sf_buf_kva(sf), sf); 2153 break; 2154 } 2155 MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, 2156 sfs, sf, M_RDONLY, EXT_SFBUF); 2157 m0->m_data = (char *)sf_buf_kva(sf) + pgoff; 2158 m0->m_len = xfsize; 2159 2160 /* Append to mbuf chain. */ 2161 if (m != NULL) 2162 m_cat(m, m0); 2163 else 2164 m = m0; 2165 2166 /* Keep track of bits processed. */ 2167 loopbytes += xfsize; 2168 off += xfsize; 2169 2170 if (sfs != NULL) { 2171 mtx_lock(&sfs->mtx); 2172 sfs->count++; 2173 mtx_unlock(&sfs->mtx); 2174 } 2175 } 2176 2177 /* Add the buffer chain to the socket buffer. */ 2178 if (m != NULL) { 2179 int mlen, err; 2180 2181 mlen = m_length(m, NULL); 2182 SOCKBUF_LOCK(&so->so_snd); 2183 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2184 error = EPIPE; 2185 SOCKBUF_UNLOCK(&so->so_snd); 2186 goto done; 2187 } 2188 SOCKBUF_UNLOCK(&so->so_snd); 2189 CURVNET_SET(so->so_vnet); 2190 /* Avoid error aliasing. */ 2191 err = (*so->so_proto->pr_usrreqs->pru_send) 2192 (so, 0, m, NULL, NULL, td); 2193 CURVNET_RESTORE(); 2194 if (err == 0) { 2195 /* 2196 * We need two counters to get the 2197 * file offset and nbytes to send 2198 * right: 2199 * - sbytes contains the total amount 2200 * of bytes sent, including headers. 2201 * - fsbytes contains the total amount 2202 * of bytes sent from the file. 2203 */ 2204 sbytes += mlen; 2205 fsbytes += mlen; 2206 if (hdrlen) { 2207 fsbytes -= hdrlen; 2208 hdrlen = 0; 2209 } 2210 } else if (error == 0) 2211 error = err; 2212 m = NULL; /* pru_send always consumes */ 2213 } 2214 2215 /* Quit outer loop on error or when we're done. */ 2216 if (done) 2217 break; 2218 if (error) 2219 goto done; 2220 } 2221 2222 /* 2223 * Send trailers. Wimp out and use writev(2). 2224 */ 2225 if (trl_uio != NULL) { 2226 sbunlock(&so->so_snd); 2227 error = kern_writev(td, uap->s, trl_uio); 2228 if (error == 0) 2229 sbytes += td->td_retval[0]; 2230 goto out; 2231 } 2232 2233 done: 2234 sbunlock(&so->so_snd); 2235 out: 2236 /* 2237 * If there was no error we have to clear td->td_retval[0] 2238 * because it may have been set by writev. 2239 */ 2240 if (error == 0) { 2241 td->td_retval[0] = 0; 2242 } 2243 if (uap->sbytes != NULL) { 2244 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2245 } 2246 if (obj != NULL) 2247 vm_object_deallocate(obj); 2248 if (vp != NULL) { 2249 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2250 vrele(vp); 2251 VFS_UNLOCK_GIANT(vfslocked); 2252 } 2253 if (so) 2254 fdrop(sock_fp, td); 2255 if (m) 2256 m_freem(m); 2257 2258 if (sfs != NULL) { 2259 mtx_lock(&sfs->mtx); 2260 if (sfs->count != 0) 2261 cv_wait(&sfs->cv, &sfs->mtx); 2262 KASSERT(sfs->count == 0, ("sendfile sync still busy")); 2263 cv_destroy(&sfs->cv); 2264 mtx_destroy(&sfs->mtx); 2265 free(sfs, M_TEMP); 2266 } 2267 2268 if (error == ERESTART) 2269 error = EINTR; 2270 2271 return (error); 2272 } 2273 2274 /* 2275 * SCTP syscalls. 2276 * Functionality only compiled in if SCTP is defined in the kernel Makefile, 2277 * otherwise all return EOPNOTSUPP. 2278 * XXX: We should make this loadable one day. 2279 */ 2280 int 2281 sctp_peeloff(td, uap) 2282 struct thread *td; 2283 struct sctp_peeloff_args /* { 2284 int sd; 2285 caddr_t name; 2286 } */ *uap; 2287 { 2288 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2289 struct filedesc *fdp; 2290 struct file *nfp = NULL; 2291 int error; 2292 struct socket *head, *so; 2293 int fd; 2294 u_int fflag; 2295 2296 fdp = td->td_proc->p_fd; 2297 AUDIT_ARG_FD(uap->sd); 2298 error = fgetsock(td, uap->sd, &head, &fflag); 2299 if (error) 2300 goto done2; 2301 error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name); 2302 if (error) 2303 goto done2; 2304 /* 2305 * At this point we know we do have a assoc to pull 2306 * we proceed to get the fd setup. This may block 2307 * but that is ok. 2308 */ 2309 2310 error = falloc(td, &nfp, &fd); 2311 if (error) 2312 goto done; 2313 td->td_retval[0] = fd; 2314 2315 CURVNET_SET(head->so_vnet); 2316 so = sonewconn(head, SS_ISCONNECTED); 2317 if (so == NULL) 2318 goto noconnection; 2319 /* 2320 * Before changing the flags on the socket, we have to bump the 2321 * reference count. Otherwise, if the protocol calls sofree(), 2322 * the socket will be released due to a zero refcount. 2323 */ 2324 SOCK_LOCK(so); 2325 soref(so); /* file descriptor reference */ 2326 SOCK_UNLOCK(so); 2327 2328 ACCEPT_LOCK(); 2329 2330 TAILQ_REMOVE(&head->so_comp, so, so_list); 2331 head->so_qlen--; 2332 so->so_state |= (head->so_state & SS_NBIO); 2333 so->so_state &= ~SS_NOFDREF; 2334 so->so_qstate &= ~SQ_COMP; 2335 so->so_head = NULL; 2336 ACCEPT_UNLOCK(); 2337 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 2338 error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name); 2339 if (error) 2340 goto noconnection; 2341 if (head->so_sigio != NULL) 2342 fsetown(fgetown(&head->so_sigio), &so->so_sigio); 2343 2344 noconnection: 2345 /* 2346 * close the new descriptor, assuming someone hasn't ripped it 2347 * out from under us. 2348 */ 2349 if (error) 2350 fdclose(fdp, nfp, fd, td); 2351 2352 /* 2353 * Release explicitly held references before returning. 2354 */ 2355 CURVNET_RESTORE(); 2356 done: 2357 if (nfp != NULL) 2358 fdrop(nfp, td); 2359 fputsock(head); 2360 done2: 2361 return (error); 2362 #else /* SCTP */ 2363 return (EOPNOTSUPP); 2364 #endif /* SCTP */ 2365 } 2366 2367 int 2368 sctp_generic_sendmsg (td, uap) 2369 struct thread *td; 2370 struct sctp_generic_sendmsg_args /* { 2371 int sd, 2372 caddr_t msg, 2373 int mlen, 2374 caddr_t to, 2375 __socklen_t tolen, 2376 struct sctp_sndrcvinfo *sinfo, 2377 int flags 2378 } */ *uap; 2379 { 2380 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2381 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2382 struct socket *so; 2383 struct file *fp = NULL; 2384 int use_rcvinfo = 1; 2385 int error = 0, len; 2386 struct sockaddr *to = NULL; 2387 #ifdef KTRACE 2388 struct uio *ktruio = NULL; 2389 #endif 2390 struct uio auio; 2391 struct iovec iov[1]; 2392 2393 if (uap->sinfo) { 2394 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2395 if (error) 2396 return (error); 2397 u_sinfo = &sinfo; 2398 } 2399 if (uap->tolen) { 2400 error = getsockaddr(&to, uap->to, uap->tolen); 2401 if (error) { 2402 to = NULL; 2403 goto sctp_bad2; 2404 } 2405 } 2406 2407 AUDIT_ARG_FD(uap->sd); 2408 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); 2409 if (error) 2410 goto sctp_bad; 2411 #ifdef KTRACE 2412 if (KTRPOINT(td, KTR_STRUCT)) 2413 ktrsockaddr(to); 2414 #endif 2415 2416 iov[0].iov_base = uap->msg; 2417 iov[0].iov_len = uap->mlen; 2418 2419 so = (struct socket *)fp->f_data; 2420 #ifdef MAC 2421 error = mac_socket_check_send(td->td_ucred, so); 2422 if (error) 2423 goto sctp_bad; 2424 #endif /* MAC */ 2425 2426 auio.uio_iov = iov; 2427 auio.uio_iovcnt = 1; 2428 auio.uio_segflg = UIO_USERSPACE; 2429 auio.uio_rw = UIO_WRITE; 2430 auio.uio_td = td; 2431 auio.uio_offset = 0; /* XXX */ 2432 auio.uio_resid = 0; 2433 len = auio.uio_resid = uap->mlen; 2434 CURVNET_SET(so->so_vnet); 2435 error = sctp_lower_sosend(so, to, &auio, 2436 (struct mbuf *)NULL, (struct mbuf *)NULL, 2437 uap->flags, use_rcvinfo, u_sinfo, td); 2438 CURVNET_RESTORE(); 2439 if (error) { 2440 if (auio.uio_resid != len && (error == ERESTART || 2441 error == EINTR || error == EWOULDBLOCK)) 2442 error = 0; 2443 /* Generation of SIGPIPE can be controlled per socket. */ 2444 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2445 !(uap->flags & MSG_NOSIGNAL)) { 2446 PROC_LOCK(td->td_proc); 2447 psignal(td->td_proc, SIGPIPE); 2448 PROC_UNLOCK(td->td_proc); 2449 } 2450 } 2451 if (error == 0) 2452 td->td_retval[0] = len - auio.uio_resid; 2453 #ifdef KTRACE 2454 if (ktruio != NULL) { 2455 ktruio->uio_resid = td->td_retval[0]; 2456 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2457 } 2458 #endif /* KTRACE */ 2459 sctp_bad: 2460 if (fp) 2461 fdrop(fp, td); 2462 sctp_bad2: 2463 if (to) 2464 free(to, M_SONAME); 2465 return (error); 2466 #else /* SCTP */ 2467 return (EOPNOTSUPP); 2468 #endif /* SCTP */ 2469 } 2470 2471 int 2472 sctp_generic_sendmsg_iov(td, uap) 2473 struct thread *td; 2474 struct sctp_generic_sendmsg_iov_args /* { 2475 int sd, 2476 struct iovec *iov, 2477 int iovlen, 2478 caddr_t to, 2479 __socklen_t tolen, 2480 struct sctp_sndrcvinfo *sinfo, 2481 int flags 2482 } */ *uap; 2483 { 2484 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2485 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2486 struct socket *so; 2487 struct file *fp = NULL; 2488 int use_rcvinfo = 1; 2489 int error=0, len, i; 2490 struct sockaddr *to = NULL; 2491 #ifdef KTRACE 2492 struct uio *ktruio = NULL; 2493 #endif 2494 struct uio auio; 2495 struct iovec *iov, *tiov; 2496 2497 if (uap->sinfo) { 2498 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2499 if (error) 2500 return (error); 2501 u_sinfo = &sinfo; 2502 } 2503 if (uap->tolen) { 2504 error = getsockaddr(&to, uap->to, uap->tolen); 2505 if (error) { 2506 to = NULL; 2507 goto sctp_bad2; 2508 } 2509 } 2510 2511 AUDIT_ARG_FD(uap->sd); 2512 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); 2513 if (error) 2514 goto sctp_bad1; 2515 2516 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2517 if (error) 2518 goto sctp_bad1; 2519 #ifdef KTRACE 2520 if (KTRPOINT(td, KTR_STRUCT)) 2521 ktrsockaddr(to); 2522 #endif 2523 2524 so = (struct socket *)fp->f_data; 2525 #ifdef MAC 2526 error = mac_socket_check_send(td->td_ucred, so); 2527 if (error) 2528 goto sctp_bad; 2529 #endif /* MAC */ 2530 2531 auio.uio_iov = iov; 2532 auio.uio_iovcnt = uap->iovlen; 2533 auio.uio_segflg = UIO_USERSPACE; 2534 auio.uio_rw = UIO_WRITE; 2535 auio.uio_td = td; 2536 auio.uio_offset = 0; /* XXX */ 2537 auio.uio_resid = 0; 2538 tiov = iov; 2539 for (i = 0; i <uap->iovlen; i++, tiov++) { 2540 if ((auio.uio_resid += tiov->iov_len) < 0) { 2541 error = EINVAL; 2542 goto sctp_bad; 2543 } 2544 } 2545 len = auio.uio_resid; 2546 CURVNET_SET(so->so_vnet); 2547 error = sctp_lower_sosend(so, to, &auio, 2548 (struct mbuf *)NULL, (struct mbuf *)NULL, 2549 uap->flags, use_rcvinfo, u_sinfo, td); 2550 CURVNET_RESTORE(); 2551 if (error) { 2552 if (auio.uio_resid != len && (error == ERESTART || 2553 error == EINTR || error == EWOULDBLOCK)) 2554 error = 0; 2555 /* Generation of SIGPIPE can be controlled per socket */ 2556 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2557 !(uap->flags & MSG_NOSIGNAL)) { 2558 PROC_LOCK(td->td_proc); 2559 psignal(td->td_proc, SIGPIPE); 2560 PROC_UNLOCK(td->td_proc); 2561 } 2562 } 2563 if (error == 0) 2564 td->td_retval[0] = len - auio.uio_resid; 2565 #ifdef KTRACE 2566 if (ktruio != NULL) { 2567 ktruio->uio_resid = td->td_retval[0]; 2568 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2569 } 2570 #endif /* KTRACE */ 2571 sctp_bad: 2572 free(iov, M_IOV); 2573 sctp_bad1: 2574 if (fp) 2575 fdrop(fp, td); 2576 sctp_bad2: 2577 if (to) 2578 free(to, M_SONAME); 2579 return (error); 2580 #else /* SCTP */ 2581 return (EOPNOTSUPP); 2582 #endif /* SCTP */ 2583 } 2584 2585 int 2586 sctp_generic_recvmsg(td, uap) 2587 struct thread *td; 2588 struct sctp_generic_recvmsg_args /* { 2589 int sd, 2590 struct iovec *iov, 2591 int iovlen, 2592 struct sockaddr *from, 2593 __socklen_t *fromlenaddr, 2594 struct sctp_sndrcvinfo *sinfo, 2595 int *msg_flags 2596 } */ *uap; 2597 { 2598 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2599 u_int8_t sockbufstore[256]; 2600 struct uio auio; 2601 struct iovec *iov, *tiov; 2602 struct sctp_sndrcvinfo sinfo; 2603 struct socket *so; 2604 struct file *fp = NULL; 2605 struct sockaddr *fromsa; 2606 int fromlen; 2607 int len, i, msg_flags; 2608 int error = 0; 2609 #ifdef KTRACE 2610 struct uio *ktruio = NULL; 2611 #endif 2612 2613 AUDIT_ARG_FD(uap->sd); 2614 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); 2615 if (error) { 2616 return (error); 2617 } 2618 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2619 if (error) { 2620 goto out1; 2621 } 2622 2623 so = fp->f_data; 2624 #ifdef MAC 2625 error = mac_socket_check_receive(td->td_ucred, so); 2626 if (error) { 2627 goto out; 2628 return (error); 2629 } 2630 #endif /* MAC */ 2631 2632 if (uap->fromlenaddr) { 2633 error = copyin(uap->fromlenaddr, 2634 &fromlen, sizeof (fromlen)); 2635 if (error) { 2636 goto out; 2637 } 2638 } else { 2639 fromlen = 0; 2640 } 2641 if(uap->msg_flags) { 2642 error = copyin(uap->msg_flags, &msg_flags, sizeof (int)); 2643 if (error) { 2644 goto out; 2645 } 2646 } else { 2647 msg_flags = 0; 2648 } 2649 auio.uio_iov = iov; 2650 auio.uio_iovcnt = uap->iovlen; 2651 auio.uio_segflg = UIO_USERSPACE; 2652 auio.uio_rw = UIO_READ; 2653 auio.uio_td = td; 2654 auio.uio_offset = 0; /* XXX */ 2655 auio.uio_resid = 0; 2656 tiov = iov; 2657 for (i = 0; i <uap->iovlen; i++, tiov++) { 2658 if ((auio.uio_resid += tiov->iov_len) < 0) { 2659 error = EINVAL; 2660 goto out; 2661 } 2662 } 2663 len = auio.uio_resid; 2664 fromsa = (struct sockaddr *)sockbufstore; 2665 2666 #ifdef KTRACE 2667 if (KTRPOINT(td, KTR_GENIO)) 2668 ktruio = cloneuio(&auio); 2669 #endif /* KTRACE */ 2670 CURVNET_SET(so->so_vnet); 2671 error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL, 2672 fromsa, fromlen, &msg_flags, 2673 (struct sctp_sndrcvinfo *)&sinfo, 1); 2674 CURVNET_RESTORE(); 2675 if (error) { 2676 if (auio.uio_resid != (int)len && (error == ERESTART || 2677 error == EINTR || error == EWOULDBLOCK)) 2678 error = 0; 2679 } else { 2680 if (uap->sinfo) 2681 error = copyout(&sinfo, uap->sinfo, sizeof (sinfo)); 2682 } 2683 #ifdef KTRACE 2684 if (ktruio != NULL) { 2685 ktruio->uio_resid = (int)len - auio.uio_resid; 2686 ktrgenio(uap->sd, UIO_READ, ktruio, error); 2687 } 2688 #endif /* KTRACE */ 2689 if (error) 2690 goto out; 2691 td->td_retval[0] = (int)len - auio.uio_resid; 2692 2693 if (fromlen && uap->from) { 2694 len = fromlen; 2695 if (len <= 0 || fromsa == 0) 2696 len = 0; 2697 else { 2698 len = MIN(len, fromsa->sa_len); 2699 error = copyout(fromsa, uap->from, (unsigned)len); 2700 if (error) 2701 goto out; 2702 } 2703 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t)); 2704 if (error) { 2705 goto out; 2706 } 2707 } 2708 #ifdef KTRACE 2709 if (KTRPOINT(td, KTR_STRUCT)) 2710 ktrsockaddr(fromsa); 2711 #endif 2712 if (uap->msg_flags) { 2713 error = copyout(&msg_flags, uap->msg_flags, sizeof (int)); 2714 if (error) { 2715 goto out; 2716 } 2717 } 2718 out: 2719 free(iov, M_IOV); 2720 out1: 2721 if (fp) 2722 fdrop(fp, td); 2723 2724 return (error); 2725 #else /* SCTP */ 2726 return (EOPNOTSUPP); 2727 #endif /* SCTP */ 2728 } 2729