1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_inet.h" 39 #include "opt_inet6.h" 40 #include "opt_sctp.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/sysproto.h> 50 #include <sys/malloc.h> 51 #include <sys/filedesc.h> 52 #include <sys/event.h> 53 #include <sys/proc.h> 54 #include <sys/fcntl.h> 55 #include <sys/file.h> 56 #include <sys/filio.h> 57 #include <sys/jail.h> 58 #include <sys/mount.h> 59 #include <sys/mbuf.h> 60 #include <sys/protosw.h> 61 #include <sys/sf_buf.h> 62 #include <sys/sysent.h> 63 #include <sys/socket.h> 64 #include <sys/socketvar.h> 65 #include <sys/signalvar.h> 66 #include <sys/syscallsubr.h> 67 #include <sys/sysctl.h> 68 #include <sys/uio.h> 69 #include <sys/vnode.h> 70 #ifdef KTRACE 71 #include <sys/ktrace.h> 72 #endif 73 #ifdef COMPAT_FREEBSD32 74 #include <compat/freebsd32/freebsd32_util.h> 75 #endif 76 77 #include <net/vnet.h> 78 79 #include <security/audit/audit.h> 80 #include <security/mac/mac_framework.h> 81 82 #include <vm/vm.h> 83 #include <vm/vm_object.h> 84 #include <vm/vm_page.h> 85 #include <vm/vm_pageout.h> 86 #include <vm/vm_kern.h> 87 #include <vm/vm_extern.h> 88 89 #if defined(INET) || defined(INET6) 90 #ifdef SCTP 91 #include <netinet/sctp.h> 92 #include <netinet/sctp_peeloff.h> 93 #endif /* SCTP */ 94 #endif /* INET || INET6 */ 95 96 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 97 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 98 99 static int accept1(struct thread *td, struct accept_args *uap, int compat); 100 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 101 static int getsockname1(struct thread *td, struct getsockname_args *uap, 102 int compat); 103 static int getpeername1(struct thread *td, struct getpeername_args *uap, 104 int compat); 105 106 /* 107 * NSFBUFS-related variables and associated sysctls 108 */ 109 int nsfbufs; 110 int nsfbufspeak; 111 int nsfbufsused; 112 113 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 114 "Maximum number of sendfile(2) sf_bufs available"); 115 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 116 "Number of sendfile(2) sf_bufs at peak usage"); 117 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 118 "Number of sendfile(2) sf_bufs in use"); 119 120 /* 121 * Convert a user file descriptor to a kernel file entry. A reference on the 122 * file entry is held upon returning. This is lighter weight than 123 * fgetsock(), which bumps the socket reference drops the file reference 124 * count instead, as this approach avoids several additional mutex operations 125 * associated with the additional reference count. If requested, return the 126 * open file flags. 127 */ 128 static int 129 getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp) 130 { 131 struct file *fp; 132 int error; 133 134 fp = NULL; 135 if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) { 136 error = EBADF; 137 } else if (fp->f_type != DTYPE_SOCKET) { 138 fdrop(fp, curthread); 139 fp = NULL; 140 error = ENOTSOCK; 141 } else { 142 if (fflagp != NULL) 143 *fflagp = fp->f_flag; 144 error = 0; 145 } 146 *fpp = fp; 147 return (error); 148 } 149 150 /* 151 * System call interface to the socket abstraction. 152 */ 153 #if defined(COMPAT_43) 154 #define COMPAT_OLDSOCK 155 #endif 156 157 int 158 socket(td, uap) 159 struct thread *td; 160 struct socket_args /* { 161 int domain; 162 int type; 163 int protocol; 164 } */ *uap; 165 { 166 struct filedesc *fdp; 167 struct socket *so; 168 struct file *fp; 169 int fd, error; 170 171 AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol); 172 #ifdef MAC 173 error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type, 174 uap->protocol); 175 if (error) 176 return (error); 177 #endif 178 fdp = td->td_proc->p_fd; 179 error = falloc(td, &fp, &fd, 0); 180 if (error) 181 return (error); 182 /* An extra reference on `fp' has been held for us by falloc(). */ 183 error = socreate(uap->domain, &so, uap->type, uap->protocol, 184 td->td_ucred, td); 185 if (error) { 186 fdclose(fdp, fp, fd, td); 187 } else { 188 finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops); 189 td->td_retval[0] = fd; 190 } 191 fdrop(fp, td); 192 return (error); 193 } 194 195 /* ARGSUSED */ 196 int 197 bind(td, uap) 198 struct thread *td; 199 struct bind_args /* { 200 int s; 201 caddr_t name; 202 int namelen; 203 } */ *uap; 204 { 205 struct sockaddr *sa; 206 int error; 207 208 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 209 return (error); 210 211 error = kern_bind(td, uap->s, sa); 212 free(sa, M_SONAME); 213 return (error); 214 } 215 216 int 217 kern_bind(td, fd, sa) 218 struct thread *td; 219 int fd; 220 struct sockaddr *sa; 221 { 222 struct socket *so; 223 struct file *fp; 224 int error; 225 226 AUDIT_ARG_FD(fd); 227 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 228 if (error) 229 return (error); 230 so = fp->f_data; 231 #ifdef KTRACE 232 if (KTRPOINT(td, KTR_STRUCT)) 233 ktrsockaddr(sa); 234 #endif 235 #ifdef MAC 236 error = mac_socket_check_bind(td->td_ucred, so, sa); 237 if (error == 0) 238 #endif 239 error = sobind(so, sa, td); 240 fdrop(fp, td); 241 return (error); 242 } 243 244 /* ARGSUSED */ 245 int 246 listen(td, uap) 247 struct thread *td; 248 struct listen_args /* { 249 int s; 250 int backlog; 251 } */ *uap; 252 { 253 struct socket *so; 254 struct file *fp; 255 int error; 256 257 AUDIT_ARG_FD(uap->s); 258 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); 259 if (error == 0) { 260 so = fp->f_data; 261 #ifdef MAC 262 error = mac_socket_check_listen(td->td_ucred, so); 263 if (error == 0) 264 #endif 265 error = solisten(so, uap->backlog, td); 266 fdrop(fp, td); 267 } 268 return(error); 269 } 270 271 /* 272 * accept1() 273 */ 274 static int 275 accept1(td, uap, compat) 276 struct thread *td; 277 struct accept_args /* { 278 int s; 279 struct sockaddr * __restrict name; 280 socklen_t * __restrict anamelen; 281 } */ *uap; 282 int compat; 283 { 284 struct sockaddr *name; 285 socklen_t namelen; 286 struct file *fp; 287 int error; 288 289 if (uap->name == NULL) 290 return (kern_accept(td, uap->s, NULL, NULL, NULL)); 291 292 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 293 if (error) 294 return (error); 295 296 error = kern_accept(td, uap->s, &name, &namelen, &fp); 297 298 /* 299 * return a namelen of zero for older code which might 300 * ignore the return value from accept. 301 */ 302 if (error) { 303 (void) copyout(&namelen, 304 uap->anamelen, sizeof(*uap->anamelen)); 305 return (error); 306 } 307 308 if (error == 0 && name != NULL) { 309 #ifdef COMPAT_OLDSOCK 310 if (compat) 311 ((struct osockaddr *)name)->sa_family = 312 name->sa_family; 313 #endif 314 error = copyout(name, uap->name, namelen); 315 } 316 if (error == 0) 317 error = copyout(&namelen, uap->anamelen, 318 sizeof(namelen)); 319 if (error) 320 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td); 321 fdrop(fp, td); 322 free(name, M_SONAME); 323 return (error); 324 } 325 326 int 327 kern_accept(struct thread *td, int s, struct sockaddr **name, 328 socklen_t *namelen, struct file **fp) 329 { 330 struct filedesc *fdp; 331 struct file *headfp, *nfp = NULL; 332 struct sockaddr *sa = NULL; 333 int error; 334 struct socket *head, *so; 335 int fd; 336 u_int fflag; 337 pid_t pgid; 338 int tmp; 339 340 if (name) { 341 *name = NULL; 342 if (*namelen < 0) 343 return (EINVAL); 344 } 345 346 AUDIT_ARG_FD(s); 347 fdp = td->td_proc->p_fd; 348 error = getsock(fdp, s, &headfp, &fflag); 349 if (error) 350 return (error); 351 head = headfp->f_data; 352 if ((head->so_options & SO_ACCEPTCONN) == 0) { 353 error = EINVAL; 354 goto done; 355 } 356 #ifdef MAC 357 error = mac_socket_check_accept(td->td_ucred, head); 358 if (error != 0) 359 goto done; 360 #endif 361 error = falloc(td, &nfp, &fd, 0); 362 if (error) 363 goto done; 364 ACCEPT_LOCK(); 365 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 366 ACCEPT_UNLOCK(); 367 error = EWOULDBLOCK; 368 goto noconnection; 369 } 370 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 371 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 372 head->so_error = ECONNABORTED; 373 break; 374 } 375 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 376 "accept", 0); 377 if (error) { 378 ACCEPT_UNLOCK(); 379 goto noconnection; 380 } 381 } 382 if (head->so_error) { 383 error = head->so_error; 384 head->so_error = 0; 385 ACCEPT_UNLOCK(); 386 goto noconnection; 387 } 388 so = TAILQ_FIRST(&head->so_comp); 389 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 390 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 391 392 /* 393 * Before changing the flags on the socket, we have to bump the 394 * reference count. Otherwise, if the protocol calls sofree(), 395 * the socket will be released due to a zero refcount. 396 */ 397 SOCK_LOCK(so); /* soref() and so_state update */ 398 soref(so); /* file descriptor reference */ 399 400 TAILQ_REMOVE(&head->so_comp, so, so_list); 401 head->so_qlen--; 402 so->so_state |= (head->so_state & SS_NBIO); 403 so->so_qstate &= ~SQ_COMP; 404 so->so_head = NULL; 405 406 SOCK_UNLOCK(so); 407 ACCEPT_UNLOCK(); 408 409 /* An extra reference on `nfp' has been held for us by falloc(). */ 410 td->td_retval[0] = fd; 411 412 /* connection has been removed from the listen queue */ 413 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 414 415 pgid = fgetown(&head->so_sigio); 416 if (pgid != 0) 417 fsetown(pgid, &so->so_sigio); 418 419 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 420 /* Sync socket nonblocking/async state with file flags */ 421 tmp = fflag & FNONBLOCK; 422 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 423 tmp = fflag & FASYNC; 424 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 425 sa = 0; 426 error = soaccept(so, &sa); 427 if (error) { 428 /* 429 * return a namelen of zero for older code which might 430 * ignore the return value from accept. 431 */ 432 if (name) 433 *namelen = 0; 434 goto noconnection; 435 } 436 if (sa == NULL) { 437 if (name) 438 *namelen = 0; 439 goto done; 440 } 441 if (name) { 442 /* check sa_len before it is destroyed */ 443 if (*namelen > sa->sa_len) 444 *namelen = sa->sa_len; 445 #ifdef KTRACE 446 if (KTRPOINT(td, KTR_STRUCT)) 447 ktrsockaddr(sa); 448 #endif 449 *name = sa; 450 sa = NULL; 451 } 452 noconnection: 453 if (sa) 454 free(sa, M_SONAME); 455 456 /* 457 * close the new descriptor, assuming someone hasn't ripped it 458 * out from under us. 459 */ 460 if (error) 461 fdclose(fdp, nfp, fd, td); 462 463 /* 464 * Release explicitly held references before returning. We return 465 * a reference on nfp to the caller on success if they request it. 466 */ 467 done: 468 if (fp != NULL) { 469 if (error == 0) { 470 *fp = nfp; 471 nfp = NULL; 472 } else 473 *fp = NULL; 474 } 475 if (nfp != NULL) 476 fdrop(nfp, td); 477 fdrop(headfp, td); 478 return (error); 479 } 480 481 int 482 accept(td, uap) 483 struct thread *td; 484 struct accept_args *uap; 485 { 486 487 return (accept1(td, uap, 0)); 488 } 489 490 #ifdef COMPAT_OLDSOCK 491 int 492 oaccept(td, uap) 493 struct thread *td; 494 struct accept_args *uap; 495 { 496 497 return (accept1(td, uap, 1)); 498 } 499 #endif /* COMPAT_OLDSOCK */ 500 501 /* ARGSUSED */ 502 int 503 connect(td, uap) 504 struct thread *td; 505 struct connect_args /* { 506 int s; 507 caddr_t name; 508 int namelen; 509 } */ *uap; 510 { 511 struct sockaddr *sa; 512 int error; 513 514 error = getsockaddr(&sa, uap->name, uap->namelen); 515 if (error) 516 return (error); 517 518 error = kern_connect(td, uap->s, sa); 519 free(sa, M_SONAME); 520 return (error); 521 } 522 523 524 int 525 kern_connect(td, fd, sa) 526 struct thread *td; 527 int fd; 528 struct sockaddr *sa; 529 { 530 struct socket *so; 531 struct file *fp; 532 int error; 533 int interrupted = 0; 534 535 AUDIT_ARG_FD(fd); 536 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 537 if (error) 538 return (error); 539 so = fp->f_data; 540 if (so->so_state & SS_ISCONNECTING) { 541 error = EALREADY; 542 goto done1; 543 } 544 #ifdef KTRACE 545 if (KTRPOINT(td, KTR_STRUCT)) 546 ktrsockaddr(sa); 547 #endif 548 #ifdef MAC 549 error = mac_socket_check_connect(td->td_ucred, so, sa); 550 if (error) 551 goto bad; 552 #endif 553 error = soconnect(so, sa, td); 554 if (error) 555 goto bad; 556 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 557 error = EINPROGRESS; 558 goto done1; 559 } 560 SOCK_LOCK(so); 561 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 562 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 563 "connec", 0); 564 if (error) { 565 if (error == EINTR || error == ERESTART) 566 interrupted = 1; 567 break; 568 } 569 } 570 if (error == 0) { 571 error = so->so_error; 572 so->so_error = 0; 573 } 574 SOCK_UNLOCK(so); 575 bad: 576 if (!interrupted) 577 so->so_state &= ~SS_ISCONNECTING; 578 if (error == ERESTART) 579 error = EINTR; 580 done1: 581 fdrop(fp, td); 582 return (error); 583 } 584 585 int 586 kern_socketpair(struct thread *td, int domain, int type, int protocol, 587 int *rsv) 588 { 589 struct filedesc *fdp = td->td_proc->p_fd; 590 struct file *fp1, *fp2; 591 struct socket *so1, *so2; 592 int fd, error; 593 594 AUDIT_ARG_SOCKET(domain, type, protocol); 595 #ifdef MAC 596 /* We might want to have a separate check for socket pairs. */ 597 error = mac_socket_check_create(td->td_ucred, domain, type, 598 protocol); 599 if (error) 600 return (error); 601 #endif 602 error = socreate(domain, &so1, type, protocol, td->td_ucred, td); 603 if (error) 604 return (error); 605 error = socreate(domain, &so2, type, protocol, td->td_ucred, td); 606 if (error) 607 goto free1; 608 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 609 error = falloc(td, &fp1, &fd, 0); 610 if (error) 611 goto free2; 612 rsv[0] = fd; 613 fp1->f_data = so1; /* so1 already has ref count */ 614 error = falloc(td, &fp2, &fd, 0); 615 if (error) 616 goto free3; 617 fp2->f_data = so2; /* so2 already has ref count */ 618 rsv[1] = fd; 619 error = soconnect2(so1, so2); 620 if (error) 621 goto free4; 622 if (type == SOCK_DGRAM) { 623 /* 624 * Datagram socket connection is asymmetric. 625 */ 626 error = soconnect2(so2, so1); 627 if (error) 628 goto free4; 629 } 630 finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops); 631 finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops); 632 fdrop(fp1, td); 633 fdrop(fp2, td); 634 return (0); 635 free4: 636 fdclose(fdp, fp2, rsv[1], td); 637 fdrop(fp2, td); 638 free3: 639 fdclose(fdp, fp1, rsv[0], td); 640 fdrop(fp1, td); 641 free2: 642 if (so2 != NULL) 643 (void)soclose(so2); 644 free1: 645 if (so1 != NULL) 646 (void)soclose(so1); 647 return (error); 648 } 649 650 int 651 socketpair(struct thread *td, struct socketpair_args *uap) 652 { 653 int error, sv[2]; 654 655 error = kern_socketpair(td, uap->domain, uap->type, 656 uap->protocol, sv); 657 if (error) 658 return (error); 659 error = copyout(sv, uap->rsv, 2 * sizeof(int)); 660 if (error) { 661 (void)kern_close(td, sv[0]); 662 (void)kern_close(td, sv[1]); 663 } 664 return (error); 665 } 666 667 static int 668 sendit(td, s, mp, flags) 669 struct thread *td; 670 int s; 671 struct msghdr *mp; 672 int flags; 673 { 674 struct mbuf *control; 675 struct sockaddr *to; 676 int error; 677 678 if (mp->msg_name != NULL) { 679 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 680 if (error) { 681 to = NULL; 682 goto bad; 683 } 684 mp->msg_name = to; 685 } else { 686 to = NULL; 687 } 688 689 if (mp->msg_control) { 690 if (mp->msg_controllen < sizeof(struct cmsghdr) 691 #ifdef COMPAT_OLDSOCK 692 && mp->msg_flags != MSG_COMPAT 693 #endif 694 ) { 695 error = EINVAL; 696 goto bad; 697 } 698 error = sockargs(&control, mp->msg_control, 699 mp->msg_controllen, MT_CONTROL); 700 if (error) 701 goto bad; 702 #ifdef COMPAT_OLDSOCK 703 if (mp->msg_flags == MSG_COMPAT) { 704 struct cmsghdr *cm; 705 706 M_PREPEND(control, sizeof(*cm), M_WAIT); 707 cm = mtod(control, struct cmsghdr *); 708 cm->cmsg_len = control->m_len; 709 cm->cmsg_level = SOL_SOCKET; 710 cm->cmsg_type = SCM_RIGHTS; 711 } 712 #endif 713 } else { 714 control = NULL; 715 } 716 717 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 718 719 bad: 720 if (to) 721 free(to, M_SONAME); 722 return (error); 723 } 724 725 int 726 kern_sendit(td, s, mp, flags, control, segflg) 727 struct thread *td; 728 int s; 729 struct msghdr *mp; 730 int flags; 731 struct mbuf *control; 732 enum uio_seg segflg; 733 { 734 struct file *fp; 735 struct uio auio; 736 struct iovec *iov; 737 struct socket *so; 738 int i; 739 int len, error; 740 #ifdef KTRACE 741 struct uio *ktruio = NULL; 742 #endif 743 744 AUDIT_ARG_FD(s); 745 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 746 if (error) 747 return (error); 748 so = (struct socket *)fp->f_data; 749 750 #ifdef MAC 751 if (mp->msg_name != NULL) { 752 error = mac_socket_check_connect(td->td_ucred, so, 753 mp->msg_name); 754 if (error) 755 goto bad; 756 } 757 error = mac_socket_check_send(td->td_ucred, so); 758 if (error) 759 goto bad; 760 #endif 761 762 auio.uio_iov = mp->msg_iov; 763 auio.uio_iovcnt = mp->msg_iovlen; 764 auio.uio_segflg = segflg; 765 auio.uio_rw = UIO_WRITE; 766 auio.uio_td = td; 767 auio.uio_offset = 0; /* XXX */ 768 auio.uio_resid = 0; 769 iov = mp->msg_iov; 770 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 771 if ((auio.uio_resid += iov->iov_len) < 0) { 772 error = EINVAL; 773 goto bad; 774 } 775 } 776 #ifdef KTRACE 777 if (KTRPOINT(td, KTR_GENIO)) 778 ktruio = cloneuio(&auio); 779 #endif 780 len = auio.uio_resid; 781 error = sosend(so, mp->msg_name, &auio, 0, control, flags, td); 782 if (error) { 783 if (auio.uio_resid != len && (error == ERESTART || 784 error == EINTR || error == EWOULDBLOCK)) 785 error = 0; 786 /* Generation of SIGPIPE can be controlled per socket */ 787 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 788 !(flags & MSG_NOSIGNAL)) { 789 PROC_LOCK(td->td_proc); 790 tdsignal(td, SIGPIPE); 791 PROC_UNLOCK(td->td_proc); 792 } 793 } 794 if (error == 0) 795 td->td_retval[0] = len - auio.uio_resid; 796 #ifdef KTRACE 797 if (ktruio != NULL) { 798 ktruio->uio_resid = td->td_retval[0]; 799 ktrgenio(s, UIO_WRITE, ktruio, error); 800 } 801 #endif 802 bad: 803 fdrop(fp, td); 804 return (error); 805 } 806 807 int 808 sendto(td, uap) 809 struct thread *td; 810 struct sendto_args /* { 811 int s; 812 caddr_t buf; 813 size_t len; 814 int flags; 815 caddr_t to; 816 int tolen; 817 } */ *uap; 818 { 819 struct msghdr msg; 820 struct iovec aiov; 821 int error; 822 823 msg.msg_name = uap->to; 824 msg.msg_namelen = uap->tolen; 825 msg.msg_iov = &aiov; 826 msg.msg_iovlen = 1; 827 msg.msg_control = 0; 828 #ifdef COMPAT_OLDSOCK 829 msg.msg_flags = 0; 830 #endif 831 aiov.iov_base = uap->buf; 832 aiov.iov_len = uap->len; 833 error = sendit(td, uap->s, &msg, uap->flags); 834 return (error); 835 } 836 837 #ifdef COMPAT_OLDSOCK 838 int 839 osend(td, uap) 840 struct thread *td; 841 struct osend_args /* { 842 int s; 843 caddr_t buf; 844 int len; 845 int flags; 846 } */ *uap; 847 { 848 struct msghdr msg; 849 struct iovec aiov; 850 int error; 851 852 msg.msg_name = 0; 853 msg.msg_namelen = 0; 854 msg.msg_iov = &aiov; 855 msg.msg_iovlen = 1; 856 aiov.iov_base = uap->buf; 857 aiov.iov_len = uap->len; 858 msg.msg_control = 0; 859 msg.msg_flags = 0; 860 error = sendit(td, uap->s, &msg, uap->flags); 861 return (error); 862 } 863 864 int 865 osendmsg(td, uap) 866 struct thread *td; 867 struct osendmsg_args /* { 868 int s; 869 caddr_t msg; 870 int flags; 871 } */ *uap; 872 { 873 struct msghdr msg; 874 struct iovec *iov; 875 int error; 876 877 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 878 if (error) 879 return (error); 880 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 881 if (error) 882 return (error); 883 msg.msg_iov = iov; 884 msg.msg_flags = MSG_COMPAT; 885 error = sendit(td, uap->s, &msg, uap->flags); 886 free(iov, M_IOV); 887 return (error); 888 } 889 #endif 890 891 int 892 sendmsg(td, uap) 893 struct thread *td; 894 struct sendmsg_args /* { 895 int s; 896 caddr_t msg; 897 int flags; 898 } */ *uap; 899 { 900 struct msghdr msg; 901 struct iovec *iov; 902 int error; 903 904 error = copyin(uap->msg, &msg, sizeof (msg)); 905 if (error) 906 return (error); 907 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 908 if (error) 909 return (error); 910 msg.msg_iov = iov; 911 #ifdef COMPAT_OLDSOCK 912 msg.msg_flags = 0; 913 #endif 914 error = sendit(td, uap->s, &msg, uap->flags); 915 free(iov, M_IOV); 916 return (error); 917 } 918 919 int 920 kern_recvit(td, s, mp, fromseg, controlp) 921 struct thread *td; 922 int s; 923 struct msghdr *mp; 924 enum uio_seg fromseg; 925 struct mbuf **controlp; 926 { 927 struct uio auio; 928 struct iovec *iov; 929 int i; 930 socklen_t len; 931 int error; 932 struct mbuf *m, *control = 0; 933 caddr_t ctlbuf; 934 struct file *fp; 935 struct socket *so; 936 struct sockaddr *fromsa = 0; 937 #ifdef KTRACE 938 struct uio *ktruio = NULL; 939 #endif 940 941 if (controlp != NULL) 942 *controlp = NULL; 943 944 AUDIT_ARG_FD(s); 945 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 946 if (error) 947 return (error); 948 so = fp->f_data; 949 950 #ifdef MAC 951 error = mac_socket_check_receive(td->td_ucred, so); 952 if (error) { 953 fdrop(fp, td); 954 return (error); 955 } 956 #endif 957 958 auio.uio_iov = mp->msg_iov; 959 auio.uio_iovcnt = mp->msg_iovlen; 960 auio.uio_segflg = UIO_USERSPACE; 961 auio.uio_rw = UIO_READ; 962 auio.uio_td = td; 963 auio.uio_offset = 0; /* XXX */ 964 auio.uio_resid = 0; 965 iov = mp->msg_iov; 966 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 967 if ((auio.uio_resid += iov->iov_len) < 0) { 968 fdrop(fp, td); 969 return (EINVAL); 970 } 971 } 972 #ifdef KTRACE 973 if (KTRPOINT(td, KTR_GENIO)) 974 ktruio = cloneuio(&auio); 975 #endif 976 len = auio.uio_resid; 977 error = soreceive(so, &fromsa, &auio, (struct mbuf **)0, 978 (mp->msg_control || controlp) ? &control : (struct mbuf **)0, 979 &mp->msg_flags); 980 if (error) { 981 if (auio.uio_resid != (int)len && (error == ERESTART || 982 error == EINTR || error == EWOULDBLOCK)) 983 error = 0; 984 } 985 #ifdef KTRACE 986 if (ktruio != NULL) { 987 ktruio->uio_resid = (int)len - auio.uio_resid; 988 ktrgenio(s, UIO_READ, ktruio, error); 989 } 990 #endif 991 if (error) 992 goto out; 993 td->td_retval[0] = (int)len - auio.uio_resid; 994 if (mp->msg_name) { 995 len = mp->msg_namelen; 996 if (len <= 0 || fromsa == 0) 997 len = 0; 998 else { 999 /* save sa_len before it is destroyed by MSG_COMPAT */ 1000 len = MIN(len, fromsa->sa_len); 1001 #ifdef COMPAT_OLDSOCK 1002 if (mp->msg_flags & MSG_COMPAT) 1003 ((struct osockaddr *)fromsa)->sa_family = 1004 fromsa->sa_family; 1005 #endif 1006 if (fromseg == UIO_USERSPACE) { 1007 error = copyout(fromsa, mp->msg_name, 1008 (unsigned)len); 1009 if (error) 1010 goto out; 1011 } else 1012 bcopy(fromsa, mp->msg_name, len); 1013 } 1014 mp->msg_namelen = len; 1015 } 1016 if (mp->msg_control && controlp == NULL) { 1017 #ifdef COMPAT_OLDSOCK 1018 /* 1019 * We assume that old recvmsg calls won't receive access 1020 * rights and other control info, esp. as control info 1021 * is always optional and those options didn't exist in 4.3. 1022 * If we receive rights, trim the cmsghdr; anything else 1023 * is tossed. 1024 */ 1025 if (control && mp->msg_flags & MSG_COMPAT) { 1026 if (mtod(control, struct cmsghdr *)->cmsg_level != 1027 SOL_SOCKET || 1028 mtod(control, struct cmsghdr *)->cmsg_type != 1029 SCM_RIGHTS) { 1030 mp->msg_controllen = 0; 1031 goto out; 1032 } 1033 control->m_len -= sizeof (struct cmsghdr); 1034 control->m_data += sizeof (struct cmsghdr); 1035 } 1036 #endif 1037 len = mp->msg_controllen; 1038 m = control; 1039 mp->msg_controllen = 0; 1040 ctlbuf = mp->msg_control; 1041 1042 while (m && len > 0) { 1043 unsigned int tocopy; 1044 1045 if (len >= m->m_len) 1046 tocopy = m->m_len; 1047 else { 1048 mp->msg_flags |= MSG_CTRUNC; 1049 tocopy = len; 1050 } 1051 1052 if ((error = copyout(mtod(m, caddr_t), 1053 ctlbuf, tocopy)) != 0) 1054 goto out; 1055 1056 ctlbuf += tocopy; 1057 len -= tocopy; 1058 m = m->m_next; 1059 } 1060 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1061 } 1062 out: 1063 fdrop(fp, td); 1064 #ifdef KTRACE 1065 if (fromsa && KTRPOINT(td, KTR_STRUCT)) 1066 ktrsockaddr(fromsa); 1067 #endif 1068 if (fromsa) 1069 free(fromsa, M_SONAME); 1070 1071 if (error == 0 && controlp != NULL) 1072 *controlp = control; 1073 else if (control) 1074 m_freem(control); 1075 1076 return (error); 1077 } 1078 1079 static int 1080 recvit(td, s, mp, namelenp) 1081 struct thread *td; 1082 int s; 1083 struct msghdr *mp; 1084 void *namelenp; 1085 { 1086 int error; 1087 1088 error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL); 1089 if (error) 1090 return (error); 1091 if (namelenp) { 1092 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t)); 1093 #ifdef COMPAT_OLDSOCK 1094 if (mp->msg_flags & MSG_COMPAT) 1095 error = 0; /* old recvfrom didn't check */ 1096 #endif 1097 } 1098 return (error); 1099 } 1100 1101 int 1102 recvfrom(td, uap) 1103 struct thread *td; 1104 struct recvfrom_args /* { 1105 int s; 1106 caddr_t buf; 1107 size_t len; 1108 int flags; 1109 struct sockaddr * __restrict from; 1110 socklen_t * __restrict fromlenaddr; 1111 } */ *uap; 1112 { 1113 struct msghdr msg; 1114 struct iovec aiov; 1115 int error; 1116 1117 if (uap->fromlenaddr) { 1118 error = copyin(uap->fromlenaddr, 1119 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1120 if (error) 1121 goto done2; 1122 } else { 1123 msg.msg_namelen = 0; 1124 } 1125 msg.msg_name = uap->from; 1126 msg.msg_iov = &aiov; 1127 msg.msg_iovlen = 1; 1128 aiov.iov_base = uap->buf; 1129 aiov.iov_len = uap->len; 1130 msg.msg_control = 0; 1131 msg.msg_flags = uap->flags; 1132 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1133 done2: 1134 return(error); 1135 } 1136 1137 #ifdef COMPAT_OLDSOCK 1138 int 1139 orecvfrom(td, uap) 1140 struct thread *td; 1141 struct recvfrom_args *uap; 1142 { 1143 1144 uap->flags |= MSG_COMPAT; 1145 return (recvfrom(td, uap)); 1146 } 1147 #endif 1148 1149 #ifdef COMPAT_OLDSOCK 1150 int 1151 orecv(td, uap) 1152 struct thread *td; 1153 struct orecv_args /* { 1154 int s; 1155 caddr_t buf; 1156 int len; 1157 int flags; 1158 } */ *uap; 1159 { 1160 struct msghdr msg; 1161 struct iovec aiov; 1162 int error; 1163 1164 msg.msg_name = 0; 1165 msg.msg_namelen = 0; 1166 msg.msg_iov = &aiov; 1167 msg.msg_iovlen = 1; 1168 aiov.iov_base = uap->buf; 1169 aiov.iov_len = uap->len; 1170 msg.msg_control = 0; 1171 msg.msg_flags = uap->flags; 1172 error = recvit(td, uap->s, &msg, NULL); 1173 return (error); 1174 } 1175 1176 /* 1177 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1178 * overlays the new one, missing only the flags, and with the (old) access 1179 * rights where the control fields are now. 1180 */ 1181 int 1182 orecvmsg(td, uap) 1183 struct thread *td; 1184 struct orecvmsg_args /* { 1185 int s; 1186 struct omsghdr *msg; 1187 int flags; 1188 } */ *uap; 1189 { 1190 struct msghdr msg; 1191 struct iovec *iov; 1192 int error; 1193 1194 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1195 if (error) 1196 return (error); 1197 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1198 if (error) 1199 return (error); 1200 msg.msg_flags = uap->flags | MSG_COMPAT; 1201 msg.msg_iov = iov; 1202 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1203 if (msg.msg_controllen && error == 0) 1204 error = copyout(&msg.msg_controllen, 1205 &uap->msg->msg_accrightslen, sizeof (int)); 1206 free(iov, M_IOV); 1207 return (error); 1208 } 1209 #endif 1210 1211 int 1212 recvmsg(td, uap) 1213 struct thread *td; 1214 struct recvmsg_args /* { 1215 int s; 1216 struct msghdr *msg; 1217 int flags; 1218 } */ *uap; 1219 { 1220 struct msghdr msg; 1221 struct iovec *uiov, *iov; 1222 int error; 1223 1224 error = copyin(uap->msg, &msg, sizeof (msg)); 1225 if (error) 1226 return (error); 1227 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1228 if (error) 1229 return (error); 1230 msg.msg_flags = uap->flags; 1231 #ifdef COMPAT_OLDSOCK 1232 msg.msg_flags &= ~MSG_COMPAT; 1233 #endif 1234 uiov = msg.msg_iov; 1235 msg.msg_iov = iov; 1236 error = recvit(td, uap->s, &msg, NULL); 1237 if (error == 0) { 1238 msg.msg_iov = uiov; 1239 error = copyout(&msg, uap->msg, sizeof(msg)); 1240 } 1241 free(iov, M_IOV); 1242 return (error); 1243 } 1244 1245 /* ARGSUSED */ 1246 int 1247 shutdown(td, uap) 1248 struct thread *td; 1249 struct shutdown_args /* { 1250 int s; 1251 int how; 1252 } */ *uap; 1253 { 1254 struct socket *so; 1255 struct file *fp; 1256 int error; 1257 1258 AUDIT_ARG_FD(uap->s); 1259 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); 1260 if (error == 0) { 1261 so = fp->f_data; 1262 error = soshutdown(so, uap->how); 1263 fdrop(fp, td); 1264 } 1265 return (error); 1266 } 1267 1268 /* ARGSUSED */ 1269 int 1270 setsockopt(td, uap) 1271 struct thread *td; 1272 struct setsockopt_args /* { 1273 int s; 1274 int level; 1275 int name; 1276 caddr_t val; 1277 int valsize; 1278 } */ *uap; 1279 { 1280 1281 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1282 uap->val, UIO_USERSPACE, uap->valsize)); 1283 } 1284 1285 int 1286 kern_setsockopt(td, s, level, name, val, valseg, valsize) 1287 struct thread *td; 1288 int s; 1289 int level; 1290 int name; 1291 void *val; 1292 enum uio_seg valseg; 1293 socklen_t valsize; 1294 { 1295 int error; 1296 struct socket *so; 1297 struct file *fp; 1298 struct sockopt sopt; 1299 1300 if (val == NULL && valsize != 0) 1301 return (EFAULT); 1302 if ((int)valsize < 0) 1303 return (EINVAL); 1304 1305 sopt.sopt_dir = SOPT_SET; 1306 sopt.sopt_level = level; 1307 sopt.sopt_name = name; 1308 sopt.sopt_val = val; 1309 sopt.sopt_valsize = valsize; 1310 switch (valseg) { 1311 case UIO_USERSPACE: 1312 sopt.sopt_td = td; 1313 break; 1314 case UIO_SYSSPACE: 1315 sopt.sopt_td = NULL; 1316 break; 1317 default: 1318 panic("kern_setsockopt called with bad valseg"); 1319 } 1320 1321 AUDIT_ARG_FD(s); 1322 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 1323 if (error == 0) { 1324 so = fp->f_data; 1325 error = sosetopt(so, &sopt); 1326 fdrop(fp, td); 1327 } 1328 return(error); 1329 } 1330 1331 /* ARGSUSED */ 1332 int 1333 getsockopt(td, uap) 1334 struct thread *td; 1335 struct getsockopt_args /* { 1336 int s; 1337 int level; 1338 int name; 1339 void * __restrict val; 1340 socklen_t * __restrict avalsize; 1341 } */ *uap; 1342 { 1343 socklen_t valsize; 1344 int error; 1345 1346 if (uap->val) { 1347 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1348 if (error) 1349 return (error); 1350 } 1351 1352 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1353 uap->val, UIO_USERSPACE, &valsize); 1354 1355 if (error == 0) 1356 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1357 return (error); 1358 } 1359 1360 /* 1361 * Kernel version of getsockopt. 1362 * optval can be a userland or userspace. optlen is always a kernel pointer. 1363 */ 1364 int 1365 kern_getsockopt(td, s, level, name, val, valseg, valsize) 1366 struct thread *td; 1367 int s; 1368 int level; 1369 int name; 1370 void *val; 1371 enum uio_seg valseg; 1372 socklen_t *valsize; 1373 { 1374 int error; 1375 struct socket *so; 1376 struct file *fp; 1377 struct sockopt sopt; 1378 1379 if (val == NULL) 1380 *valsize = 0; 1381 if ((int)*valsize < 0) 1382 return (EINVAL); 1383 1384 sopt.sopt_dir = SOPT_GET; 1385 sopt.sopt_level = level; 1386 sopt.sopt_name = name; 1387 sopt.sopt_val = val; 1388 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1389 switch (valseg) { 1390 case UIO_USERSPACE: 1391 sopt.sopt_td = td; 1392 break; 1393 case UIO_SYSSPACE: 1394 sopt.sopt_td = NULL; 1395 break; 1396 default: 1397 panic("kern_getsockopt called with bad valseg"); 1398 } 1399 1400 AUDIT_ARG_FD(s); 1401 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 1402 if (error == 0) { 1403 so = fp->f_data; 1404 error = sogetopt(so, &sopt); 1405 *valsize = sopt.sopt_valsize; 1406 fdrop(fp, td); 1407 } 1408 return (error); 1409 } 1410 1411 /* 1412 * getsockname1() - Get socket name. 1413 */ 1414 /* ARGSUSED */ 1415 static int 1416 getsockname1(td, uap, compat) 1417 struct thread *td; 1418 struct getsockname_args /* { 1419 int fdes; 1420 struct sockaddr * __restrict asa; 1421 socklen_t * __restrict alen; 1422 } */ *uap; 1423 int compat; 1424 { 1425 struct sockaddr *sa; 1426 socklen_t len; 1427 int error; 1428 1429 error = copyin(uap->alen, &len, sizeof(len)); 1430 if (error) 1431 return (error); 1432 1433 error = kern_getsockname(td, uap->fdes, &sa, &len); 1434 if (error) 1435 return (error); 1436 1437 if (len != 0) { 1438 #ifdef COMPAT_OLDSOCK 1439 if (compat) 1440 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1441 #endif 1442 error = copyout(sa, uap->asa, (u_int)len); 1443 } 1444 free(sa, M_SONAME); 1445 if (error == 0) 1446 error = copyout(&len, uap->alen, sizeof(len)); 1447 return (error); 1448 } 1449 1450 int 1451 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, 1452 socklen_t *alen) 1453 { 1454 struct socket *so; 1455 struct file *fp; 1456 socklen_t len; 1457 int error; 1458 1459 if (*alen < 0) 1460 return (EINVAL); 1461 1462 AUDIT_ARG_FD(fd); 1463 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 1464 if (error) 1465 return (error); 1466 so = fp->f_data; 1467 *sa = NULL; 1468 CURVNET_SET(so->so_vnet); 1469 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa); 1470 CURVNET_RESTORE(); 1471 if (error) 1472 goto bad; 1473 if (*sa == NULL) 1474 len = 0; 1475 else 1476 len = MIN(*alen, (*sa)->sa_len); 1477 *alen = len; 1478 #ifdef KTRACE 1479 if (KTRPOINT(td, KTR_STRUCT)) 1480 ktrsockaddr(*sa); 1481 #endif 1482 bad: 1483 fdrop(fp, td); 1484 if (error && *sa) { 1485 free(*sa, M_SONAME); 1486 *sa = NULL; 1487 } 1488 return (error); 1489 } 1490 1491 int 1492 getsockname(td, uap) 1493 struct thread *td; 1494 struct getsockname_args *uap; 1495 { 1496 1497 return (getsockname1(td, uap, 0)); 1498 } 1499 1500 #ifdef COMPAT_OLDSOCK 1501 int 1502 ogetsockname(td, uap) 1503 struct thread *td; 1504 struct getsockname_args *uap; 1505 { 1506 1507 return (getsockname1(td, uap, 1)); 1508 } 1509 #endif /* COMPAT_OLDSOCK */ 1510 1511 /* 1512 * getpeername1() - Get name of peer for connected socket. 1513 */ 1514 /* ARGSUSED */ 1515 static int 1516 getpeername1(td, uap, compat) 1517 struct thread *td; 1518 struct getpeername_args /* { 1519 int fdes; 1520 struct sockaddr * __restrict asa; 1521 socklen_t * __restrict alen; 1522 } */ *uap; 1523 int compat; 1524 { 1525 struct sockaddr *sa; 1526 socklen_t len; 1527 int error; 1528 1529 error = copyin(uap->alen, &len, sizeof (len)); 1530 if (error) 1531 return (error); 1532 1533 error = kern_getpeername(td, uap->fdes, &sa, &len); 1534 if (error) 1535 return (error); 1536 1537 if (len != 0) { 1538 #ifdef COMPAT_OLDSOCK 1539 if (compat) 1540 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1541 #endif 1542 error = copyout(sa, uap->asa, (u_int)len); 1543 } 1544 free(sa, M_SONAME); 1545 if (error == 0) 1546 error = copyout(&len, uap->alen, sizeof(len)); 1547 return (error); 1548 } 1549 1550 int 1551 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, 1552 socklen_t *alen) 1553 { 1554 struct socket *so; 1555 struct file *fp; 1556 socklen_t len; 1557 int error; 1558 1559 if (*alen < 0) 1560 return (EINVAL); 1561 1562 AUDIT_ARG_FD(fd); 1563 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 1564 if (error) 1565 return (error); 1566 so = fp->f_data; 1567 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1568 error = ENOTCONN; 1569 goto done; 1570 } 1571 *sa = NULL; 1572 CURVNET_SET(so->so_vnet); 1573 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa); 1574 CURVNET_RESTORE(); 1575 if (error) 1576 goto bad; 1577 if (*sa == NULL) 1578 len = 0; 1579 else 1580 len = MIN(*alen, (*sa)->sa_len); 1581 *alen = len; 1582 #ifdef KTRACE 1583 if (KTRPOINT(td, KTR_STRUCT)) 1584 ktrsockaddr(*sa); 1585 #endif 1586 bad: 1587 if (error && *sa) { 1588 free(*sa, M_SONAME); 1589 *sa = NULL; 1590 } 1591 done: 1592 fdrop(fp, td); 1593 return (error); 1594 } 1595 1596 int 1597 getpeername(td, uap) 1598 struct thread *td; 1599 struct getpeername_args *uap; 1600 { 1601 1602 return (getpeername1(td, uap, 0)); 1603 } 1604 1605 #ifdef COMPAT_OLDSOCK 1606 int 1607 ogetpeername(td, uap) 1608 struct thread *td; 1609 struct ogetpeername_args *uap; 1610 { 1611 1612 /* XXX uap should have type `getpeername_args *' to begin with. */ 1613 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1614 } 1615 #endif /* COMPAT_OLDSOCK */ 1616 1617 int 1618 sockargs(mp, buf, buflen, type) 1619 struct mbuf **mp; 1620 caddr_t buf; 1621 int buflen, type; 1622 { 1623 struct sockaddr *sa; 1624 struct mbuf *m; 1625 int error; 1626 1627 if ((u_int)buflen > MLEN) { 1628 #ifdef COMPAT_OLDSOCK 1629 if (type == MT_SONAME && (u_int)buflen <= 112) 1630 buflen = MLEN; /* unix domain compat. hack */ 1631 else 1632 #endif 1633 if ((u_int)buflen > MCLBYTES) 1634 return (EINVAL); 1635 } 1636 m = m_get(M_WAIT, type); 1637 if ((u_int)buflen > MLEN) 1638 MCLGET(m, M_WAIT); 1639 m->m_len = buflen; 1640 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1641 if (error) 1642 (void) m_free(m); 1643 else { 1644 *mp = m; 1645 if (type == MT_SONAME) { 1646 sa = mtod(m, struct sockaddr *); 1647 1648 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1649 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1650 sa->sa_family = sa->sa_len; 1651 #endif 1652 sa->sa_len = buflen; 1653 } 1654 } 1655 return (error); 1656 } 1657 1658 int 1659 getsockaddr(namp, uaddr, len) 1660 struct sockaddr **namp; 1661 caddr_t uaddr; 1662 size_t len; 1663 { 1664 struct sockaddr *sa; 1665 int error; 1666 1667 if (len > SOCK_MAXADDRLEN) 1668 return (ENAMETOOLONG); 1669 if (len < offsetof(struct sockaddr, sa_data[0])) 1670 return (EINVAL); 1671 sa = malloc(len, M_SONAME, M_WAITOK); 1672 error = copyin(uaddr, sa, len); 1673 if (error) { 1674 free(sa, M_SONAME); 1675 } else { 1676 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1677 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1678 sa->sa_family = sa->sa_len; 1679 #endif 1680 sa->sa_len = len; 1681 *namp = sa; 1682 } 1683 return (error); 1684 } 1685 1686 #include <sys/condvar.h> 1687 1688 struct sendfile_sync { 1689 struct mtx mtx; 1690 struct cv cv; 1691 unsigned count; 1692 }; 1693 1694 /* 1695 * Detach mapped page and release resources back to the system. 1696 */ 1697 void 1698 sf_buf_mext(void *addr, void *args) 1699 { 1700 vm_page_t m; 1701 struct sendfile_sync *sfs; 1702 1703 m = sf_buf_page(args); 1704 sf_buf_free(args); 1705 vm_page_lock(m); 1706 vm_page_unwire(m, 0); 1707 /* 1708 * Check for the object going away on us. This can 1709 * happen since we don't hold a reference to it. 1710 * If so, we're responsible for freeing the page. 1711 */ 1712 if (m->wire_count == 0 && m->object == NULL) 1713 vm_page_free(m); 1714 vm_page_unlock(m); 1715 if (addr == NULL) 1716 return; 1717 sfs = addr; 1718 mtx_lock(&sfs->mtx); 1719 KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0")); 1720 if (--sfs->count == 0) 1721 cv_signal(&sfs->cv); 1722 mtx_unlock(&sfs->mtx); 1723 } 1724 1725 /* 1726 * sendfile(2) 1727 * 1728 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1729 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1730 * 1731 * Send a file specified by 'fd' and starting at 'offset' to a socket 1732 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes == 1733 * 0. Optionally add a header and/or trailer to the socket output. If 1734 * specified, write the total number of bytes sent into *sbytes. 1735 */ 1736 int 1737 sendfile(struct thread *td, struct sendfile_args *uap) 1738 { 1739 1740 return (do_sendfile(td, uap, 0)); 1741 } 1742 1743 static int 1744 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1745 { 1746 struct sf_hdtr hdtr; 1747 struct uio *hdr_uio, *trl_uio; 1748 int error; 1749 1750 hdr_uio = trl_uio = NULL; 1751 1752 if (uap->hdtr != NULL) { 1753 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1754 if (error) 1755 goto out; 1756 if (hdtr.headers != NULL) { 1757 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1758 if (error) 1759 goto out; 1760 } 1761 if (hdtr.trailers != NULL) { 1762 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); 1763 if (error) 1764 goto out; 1765 1766 } 1767 } 1768 1769 error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat); 1770 out: 1771 if (hdr_uio) 1772 free(hdr_uio, M_IOV); 1773 if (trl_uio) 1774 free(trl_uio, M_IOV); 1775 return (error); 1776 } 1777 1778 #ifdef COMPAT_FREEBSD4 1779 int 1780 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1781 { 1782 struct sendfile_args args; 1783 1784 args.fd = uap->fd; 1785 args.s = uap->s; 1786 args.offset = uap->offset; 1787 args.nbytes = uap->nbytes; 1788 args.hdtr = uap->hdtr; 1789 args.sbytes = uap->sbytes; 1790 args.flags = uap->flags; 1791 1792 return (do_sendfile(td, &args, 1)); 1793 } 1794 #endif /* COMPAT_FREEBSD4 */ 1795 1796 int 1797 kern_sendfile(struct thread *td, struct sendfile_args *uap, 1798 struct uio *hdr_uio, struct uio *trl_uio, int compat) 1799 { 1800 struct file *sock_fp; 1801 struct vnode *vp; 1802 struct vm_object *obj = NULL; 1803 struct socket *so = NULL; 1804 struct mbuf *m = NULL; 1805 struct sf_buf *sf; 1806 struct vm_page *pg; 1807 off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0; 1808 int error, hdrlen = 0, mnw = 0; 1809 int vfslocked; 1810 struct sendfile_sync *sfs = NULL; 1811 1812 /* 1813 * The file descriptor must be a regular file and have a 1814 * backing VM object. 1815 * File offset must be positive. If it goes beyond EOF 1816 * we send only the header/trailer and no payload data. 1817 */ 1818 AUDIT_ARG_FD(uap->fd); 1819 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1820 goto out; 1821 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1822 vn_lock(vp, LK_SHARED | LK_RETRY); 1823 if (vp->v_type == VREG) { 1824 obj = vp->v_object; 1825 if (obj != NULL) { 1826 /* 1827 * Temporarily increase the backing VM 1828 * object's reference count so that a forced 1829 * reclamation of its vnode does not 1830 * immediately destroy it. 1831 */ 1832 VM_OBJECT_LOCK(obj); 1833 if ((obj->flags & OBJ_DEAD) == 0) { 1834 vm_object_reference_locked(obj); 1835 VM_OBJECT_UNLOCK(obj); 1836 } else { 1837 VM_OBJECT_UNLOCK(obj); 1838 obj = NULL; 1839 } 1840 } 1841 } 1842 VOP_UNLOCK(vp, 0); 1843 VFS_UNLOCK_GIANT(vfslocked); 1844 if (obj == NULL) { 1845 error = EINVAL; 1846 goto out; 1847 } 1848 if (uap->offset < 0) { 1849 error = EINVAL; 1850 goto out; 1851 } 1852 1853 /* 1854 * The socket must be a stream socket and connected. 1855 * Remember if it a blocking or non-blocking socket. 1856 */ 1857 if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp, 1858 NULL)) != 0) 1859 goto out; 1860 so = sock_fp->f_data; 1861 if (so->so_type != SOCK_STREAM) { 1862 error = EINVAL; 1863 goto out; 1864 } 1865 if ((so->so_state & SS_ISCONNECTED) == 0) { 1866 error = ENOTCONN; 1867 goto out; 1868 } 1869 /* 1870 * Do not wait on memory allocations but return ENOMEM for 1871 * caller to retry later. 1872 * XXX: Experimental. 1873 */ 1874 if (uap->flags & SF_MNOWAIT) 1875 mnw = 1; 1876 1877 if (uap->flags & SF_SYNC) { 1878 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO); 1879 mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); 1880 cv_init(&sfs->cv, "sendfile"); 1881 } 1882 1883 #ifdef MAC 1884 error = mac_socket_check_send(td->td_ucred, so); 1885 if (error) 1886 goto out; 1887 #endif 1888 1889 /* If headers are specified copy them into mbufs. */ 1890 if (hdr_uio != NULL) { 1891 hdr_uio->uio_td = td; 1892 hdr_uio->uio_rw = UIO_WRITE; 1893 if (hdr_uio->uio_resid > 0) { 1894 /* 1895 * In FBSD < 5.0 the nbytes to send also included 1896 * the header. If compat is specified subtract the 1897 * header size from nbytes. 1898 */ 1899 if (compat) { 1900 if (uap->nbytes > hdr_uio->uio_resid) 1901 uap->nbytes -= hdr_uio->uio_resid; 1902 else 1903 uap->nbytes = 0; 1904 } 1905 m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK), 1906 0, 0, 0); 1907 if (m == NULL) { 1908 error = mnw ? EAGAIN : ENOBUFS; 1909 goto out; 1910 } 1911 hdrlen = m_length(m, NULL); 1912 } 1913 } 1914 1915 /* 1916 * Protect against multiple writers to the socket. 1917 * 1918 * XXXRW: Historically this has assumed non-interruptibility, so now 1919 * we implement that, but possibly shouldn't. 1920 */ 1921 (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); 1922 1923 /* 1924 * Loop through the pages of the file, starting with the requested 1925 * offset. Get a file page (do I/O if necessary), map the file page 1926 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1927 * it on the socket. 1928 * This is done in two loops. The inner loop turns as many pages 1929 * as it can, up to available socket buffer space, without blocking 1930 * into mbufs to have it bulk delivered into the socket send buffer. 1931 * The outer loop checks the state and available space of the socket 1932 * and takes care of the overall progress. 1933 */ 1934 for (off = uap->offset, rem = uap->nbytes; ; ) { 1935 int loopbytes = 0; 1936 int space = 0; 1937 int done = 0; 1938 1939 /* 1940 * Check the socket state for ongoing connection, 1941 * no errors and space in socket buffer. 1942 * If space is low allow for the remainder of the 1943 * file to be processed if it fits the socket buffer. 1944 * Otherwise block in waiting for sufficient space 1945 * to proceed, or if the socket is nonblocking, return 1946 * to userland with EAGAIN while reporting how far 1947 * we've come. 1948 * We wait until the socket buffer has significant free 1949 * space to do bulk sends. This makes good use of file 1950 * system read ahead and allows packet segmentation 1951 * offloading hardware to take over lots of work. If 1952 * we were not careful here we would send off only one 1953 * sfbuf at a time. 1954 */ 1955 SOCKBUF_LOCK(&so->so_snd); 1956 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2) 1957 so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2; 1958 retry_space: 1959 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 1960 error = EPIPE; 1961 SOCKBUF_UNLOCK(&so->so_snd); 1962 goto done; 1963 } else if (so->so_error) { 1964 error = so->so_error; 1965 so->so_error = 0; 1966 SOCKBUF_UNLOCK(&so->so_snd); 1967 goto done; 1968 } 1969 space = sbspace(&so->so_snd); 1970 if (space < rem && 1971 (space <= 0 || 1972 space < so->so_snd.sb_lowat)) { 1973 if (so->so_state & SS_NBIO) { 1974 SOCKBUF_UNLOCK(&so->so_snd); 1975 error = EAGAIN; 1976 goto done; 1977 } 1978 /* 1979 * sbwait drops the lock while sleeping. 1980 * When we loop back to retry_space the 1981 * state may have changed and we retest 1982 * for it. 1983 */ 1984 error = sbwait(&so->so_snd); 1985 /* 1986 * An error from sbwait usually indicates that we've 1987 * been interrupted by a signal. If we've sent anything 1988 * then return bytes sent, otherwise return the error. 1989 */ 1990 if (error) { 1991 SOCKBUF_UNLOCK(&so->so_snd); 1992 goto done; 1993 } 1994 goto retry_space; 1995 } 1996 SOCKBUF_UNLOCK(&so->so_snd); 1997 1998 /* 1999 * Reduce space in the socket buffer by the size of 2000 * the header mbuf chain. 2001 * hdrlen is set to 0 after the first loop. 2002 */ 2003 space -= hdrlen; 2004 2005 /* 2006 * Loop and construct maximum sized mbuf chain to be bulk 2007 * dumped into socket buffer. 2008 */ 2009 while (space > loopbytes) { 2010 vm_pindex_t pindex; 2011 vm_offset_t pgoff; 2012 struct mbuf *m0; 2013 2014 VM_OBJECT_LOCK(obj); 2015 /* 2016 * Calculate the amount to transfer. 2017 * Not to exceed a page, the EOF, 2018 * or the passed in nbytes. 2019 */ 2020 pgoff = (vm_offset_t)(off & PAGE_MASK); 2021 xfsize = omin(PAGE_SIZE - pgoff, 2022 obj->un_pager.vnp.vnp_size - uap->offset - 2023 fsbytes - loopbytes); 2024 if (uap->nbytes) 2025 rem = (uap->nbytes - fsbytes - loopbytes); 2026 else 2027 rem = obj->un_pager.vnp.vnp_size - 2028 uap->offset - fsbytes - loopbytes; 2029 xfsize = omin(rem, xfsize); 2030 xfsize = omin(space - loopbytes, xfsize); 2031 if (xfsize <= 0) { 2032 VM_OBJECT_UNLOCK(obj); 2033 done = 1; /* all data sent */ 2034 break; 2035 } 2036 2037 /* 2038 * Attempt to look up the page. Allocate 2039 * if not found or wait and loop if busy. 2040 */ 2041 pindex = OFF_TO_IDX(off); 2042 pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY | 2043 VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY); 2044 2045 /* 2046 * Check if page is valid for what we need, 2047 * otherwise initiate I/O. 2048 * If we already turned some pages into mbufs, 2049 * send them off before we come here again and 2050 * block. 2051 */ 2052 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) 2053 VM_OBJECT_UNLOCK(obj); 2054 else if (m != NULL) 2055 error = EAGAIN; /* send what we already got */ 2056 else if (uap->flags & SF_NODISKIO) 2057 error = EBUSY; 2058 else { 2059 int bsize, resid; 2060 2061 /* 2062 * Ensure that our page is still around 2063 * when the I/O completes. 2064 */ 2065 vm_page_io_start(pg); 2066 VM_OBJECT_UNLOCK(obj); 2067 2068 /* 2069 * Get the page from backing store. 2070 */ 2071 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2072 error = vn_lock(vp, LK_SHARED); 2073 if (error != 0) 2074 goto after_read; 2075 bsize = vp->v_mount->mnt_stat.f_iosize; 2076 2077 /* 2078 * XXXMAC: Because we don't have fp->f_cred 2079 * here, we pass in NOCRED. This is probably 2080 * wrong, but is consistent with our original 2081 * implementation. 2082 */ 2083 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 2084 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 2085 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 2086 td->td_ucred, NOCRED, &resid, td); 2087 VOP_UNLOCK(vp, 0); 2088 after_read: 2089 VFS_UNLOCK_GIANT(vfslocked); 2090 VM_OBJECT_LOCK(obj); 2091 vm_page_io_finish(pg); 2092 if (!error) 2093 VM_OBJECT_UNLOCK(obj); 2094 mbstat.sf_iocnt++; 2095 } 2096 if (error) { 2097 vm_page_lock(pg); 2098 vm_page_unwire(pg, 0); 2099 /* 2100 * See if anyone else might know about 2101 * this page. If not and it is not valid, 2102 * then free it. 2103 */ 2104 if (pg->wire_count == 0 && pg->valid == 0 && 2105 pg->busy == 0 && !(pg->oflags & VPO_BUSY)) 2106 vm_page_free(pg); 2107 vm_page_unlock(pg); 2108 VM_OBJECT_UNLOCK(obj); 2109 if (error == EAGAIN) 2110 error = 0; /* not a real error */ 2111 break; 2112 } 2113 2114 /* 2115 * Get a sendfile buf. When allocating the 2116 * first buffer for mbuf chain, we usually 2117 * wait as long as necessary, but this wait 2118 * can be interrupted. For consequent 2119 * buffers, do not sleep, since several 2120 * threads might exhaust the buffers and then 2121 * deadlock. 2122 */ 2123 sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT : 2124 SFB_CATCH); 2125 if (sf == NULL) { 2126 mbstat.sf_allocfail++; 2127 vm_page_lock(pg); 2128 vm_page_unwire(pg, 0); 2129 KASSERT(pg->object != NULL, 2130 ("kern_sendfile: object disappeared")); 2131 vm_page_unlock(pg); 2132 if (m == NULL) 2133 error = (mnw ? EAGAIN : EINTR); 2134 break; 2135 } 2136 2137 /* 2138 * Get an mbuf and set it up as having 2139 * external storage. 2140 */ 2141 m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); 2142 if (m0 == NULL) { 2143 error = (mnw ? EAGAIN : ENOBUFS); 2144 sf_buf_mext((void *)sf_buf_kva(sf), sf); 2145 break; 2146 } 2147 MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, 2148 sfs, sf, M_RDONLY, EXT_SFBUF); 2149 m0->m_data = (char *)sf_buf_kva(sf) + pgoff; 2150 m0->m_len = xfsize; 2151 2152 /* Append to mbuf chain. */ 2153 if (m != NULL) 2154 m_cat(m, m0); 2155 else 2156 m = m0; 2157 2158 /* Keep track of bits processed. */ 2159 loopbytes += xfsize; 2160 off += xfsize; 2161 2162 if (sfs != NULL) { 2163 mtx_lock(&sfs->mtx); 2164 sfs->count++; 2165 mtx_unlock(&sfs->mtx); 2166 } 2167 } 2168 2169 /* Add the buffer chain to the socket buffer. */ 2170 if (m != NULL) { 2171 int mlen, err; 2172 2173 mlen = m_length(m, NULL); 2174 SOCKBUF_LOCK(&so->so_snd); 2175 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2176 error = EPIPE; 2177 SOCKBUF_UNLOCK(&so->so_snd); 2178 goto done; 2179 } 2180 SOCKBUF_UNLOCK(&so->so_snd); 2181 CURVNET_SET(so->so_vnet); 2182 /* Avoid error aliasing. */ 2183 err = (*so->so_proto->pr_usrreqs->pru_send) 2184 (so, 0, m, NULL, NULL, td); 2185 CURVNET_RESTORE(); 2186 if (err == 0) { 2187 /* 2188 * We need two counters to get the 2189 * file offset and nbytes to send 2190 * right: 2191 * - sbytes contains the total amount 2192 * of bytes sent, including headers. 2193 * - fsbytes contains the total amount 2194 * of bytes sent from the file. 2195 */ 2196 sbytes += mlen; 2197 fsbytes += mlen; 2198 if (hdrlen) { 2199 fsbytes -= hdrlen; 2200 hdrlen = 0; 2201 } 2202 } else if (error == 0) 2203 error = err; 2204 m = NULL; /* pru_send always consumes */ 2205 } 2206 2207 /* Quit outer loop on error or when we're done. */ 2208 if (done) 2209 break; 2210 if (error) 2211 goto done; 2212 } 2213 2214 /* 2215 * Send trailers. Wimp out and use writev(2). 2216 */ 2217 if (trl_uio != NULL) { 2218 sbunlock(&so->so_snd); 2219 error = kern_writev(td, uap->s, trl_uio); 2220 if (error == 0) 2221 sbytes += td->td_retval[0]; 2222 goto out; 2223 } 2224 2225 done: 2226 sbunlock(&so->so_snd); 2227 out: 2228 /* 2229 * If there was no error we have to clear td->td_retval[0] 2230 * because it may have been set by writev. 2231 */ 2232 if (error == 0) { 2233 td->td_retval[0] = 0; 2234 } 2235 if (uap->sbytes != NULL) { 2236 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2237 } 2238 if (obj != NULL) 2239 vm_object_deallocate(obj); 2240 if (vp != NULL) { 2241 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2242 vrele(vp); 2243 VFS_UNLOCK_GIANT(vfslocked); 2244 } 2245 if (so) 2246 fdrop(sock_fp, td); 2247 if (m) 2248 m_freem(m); 2249 2250 if (sfs != NULL) { 2251 mtx_lock(&sfs->mtx); 2252 if (sfs->count != 0) 2253 cv_wait(&sfs->cv, &sfs->mtx); 2254 KASSERT(sfs->count == 0, ("sendfile sync still busy")); 2255 cv_destroy(&sfs->cv); 2256 mtx_destroy(&sfs->mtx); 2257 free(sfs, M_TEMP); 2258 } 2259 2260 if (error == ERESTART) 2261 error = EINTR; 2262 2263 return (error); 2264 } 2265 2266 /* 2267 * SCTP syscalls. 2268 * Functionality only compiled in if SCTP is defined in the kernel Makefile, 2269 * otherwise all return EOPNOTSUPP. 2270 * XXX: We should make this loadable one day. 2271 */ 2272 int 2273 sctp_peeloff(td, uap) 2274 struct thread *td; 2275 struct sctp_peeloff_args /* { 2276 int sd; 2277 caddr_t name; 2278 } */ *uap; 2279 { 2280 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2281 struct filedesc *fdp; 2282 struct file *nfp = NULL; 2283 int error; 2284 struct socket *head, *so; 2285 int fd; 2286 u_int fflag; 2287 2288 fdp = td->td_proc->p_fd; 2289 AUDIT_ARG_FD(uap->sd); 2290 error = fgetsock(td, uap->sd, &head, &fflag); 2291 if (error) 2292 goto done2; 2293 error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name); 2294 if (error) 2295 goto done2; 2296 /* 2297 * At this point we know we do have a assoc to pull 2298 * we proceed to get the fd setup. This may block 2299 * but that is ok. 2300 */ 2301 2302 error = falloc(td, &nfp, &fd, 0); 2303 if (error) 2304 goto done; 2305 td->td_retval[0] = fd; 2306 2307 CURVNET_SET(head->so_vnet); 2308 so = sonewconn(head, SS_ISCONNECTED); 2309 if (so == NULL) 2310 goto noconnection; 2311 /* 2312 * Before changing the flags on the socket, we have to bump the 2313 * reference count. Otherwise, if the protocol calls sofree(), 2314 * the socket will be released due to a zero refcount. 2315 */ 2316 SOCK_LOCK(so); 2317 soref(so); /* file descriptor reference */ 2318 SOCK_UNLOCK(so); 2319 2320 ACCEPT_LOCK(); 2321 2322 TAILQ_REMOVE(&head->so_comp, so, so_list); 2323 head->so_qlen--; 2324 so->so_state |= (head->so_state & SS_NBIO); 2325 so->so_state &= ~SS_NOFDREF; 2326 so->so_qstate &= ~SQ_COMP; 2327 so->so_head = NULL; 2328 ACCEPT_UNLOCK(); 2329 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 2330 error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name); 2331 if (error) 2332 goto noconnection; 2333 if (head->so_sigio != NULL) 2334 fsetown(fgetown(&head->so_sigio), &so->so_sigio); 2335 2336 noconnection: 2337 /* 2338 * close the new descriptor, assuming someone hasn't ripped it 2339 * out from under us. 2340 */ 2341 if (error) 2342 fdclose(fdp, nfp, fd, td); 2343 2344 /* 2345 * Release explicitly held references before returning. 2346 */ 2347 CURVNET_RESTORE(); 2348 done: 2349 if (nfp != NULL) 2350 fdrop(nfp, td); 2351 fputsock(head); 2352 done2: 2353 return (error); 2354 #else /* SCTP */ 2355 return (EOPNOTSUPP); 2356 #endif /* SCTP */ 2357 } 2358 2359 int 2360 sctp_generic_sendmsg (td, uap) 2361 struct thread *td; 2362 struct sctp_generic_sendmsg_args /* { 2363 int sd, 2364 caddr_t msg, 2365 int mlen, 2366 caddr_t to, 2367 __socklen_t tolen, 2368 struct sctp_sndrcvinfo *sinfo, 2369 int flags 2370 } */ *uap; 2371 { 2372 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2373 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2374 struct socket *so; 2375 struct file *fp = NULL; 2376 int error = 0, len; 2377 struct sockaddr *to = NULL; 2378 #ifdef KTRACE 2379 struct uio *ktruio = NULL; 2380 #endif 2381 struct uio auio; 2382 struct iovec iov[1]; 2383 2384 if (uap->sinfo) { 2385 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2386 if (error) 2387 return (error); 2388 u_sinfo = &sinfo; 2389 } 2390 if (uap->tolen) { 2391 error = getsockaddr(&to, uap->to, uap->tolen); 2392 if (error) { 2393 to = NULL; 2394 goto sctp_bad2; 2395 } 2396 } 2397 2398 AUDIT_ARG_FD(uap->sd); 2399 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); 2400 if (error) 2401 goto sctp_bad; 2402 #ifdef KTRACE 2403 if (to && (KTRPOINT(td, KTR_STRUCT))) 2404 ktrsockaddr(to); 2405 #endif 2406 2407 iov[0].iov_base = uap->msg; 2408 iov[0].iov_len = uap->mlen; 2409 2410 so = (struct socket *)fp->f_data; 2411 #ifdef MAC 2412 error = mac_socket_check_send(td->td_ucred, so); 2413 if (error) 2414 goto sctp_bad; 2415 #endif /* MAC */ 2416 2417 auio.uio_iov = iov; 2418 auio.uio_iovcnt = 1; 2419 auio.uio_segflg = UIO_USERSPACE; 2420 auio.uio_rw = UIO_WRITE; 2421 auio.uio_td = td; 2422 auio.uio_offset = 0; /* XXX */ 2423 auio.uio_resid = 0; 2424 len = auio.uio_resid = uap->mlen; 2425 CURVNET_SET(so->so_vnet); 2426 error = sctp_lower_sosend(so, to, &auio, 2427 (struct mbuf *)NULL, (struct mbuf *)NULL, 2428 uap->flags, u_sinfo, td); 2429 CURVNET_RESTORE(); 2430 if (error) { 2431 if (auio.uio_resid != len && (error == ERESTART || 2432 error == EINTR || error == EWOULDBLOCK)) 2433 error = 0; 2434 /* Generation of SIGPIPE can be controlled per socket. */ 2435 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2436 !(uap->flags & MSG_NOSIGNAL)) { 2437 PROC_LOCK(td->td_proc); 2438 tdsignal(td, SIGPIPE); 2439 PROC_UNLOCK(td->td_proc); 2440 } 2441 } 2442 if (error == 0) 2443 td->td_retval[0] = len - auio.uio_resid; 2444 #ifdef KTRACE 2445 if (ktruio != NULL) { 2446 ktruio->uio_resid = td->td_retval[0]; 2447 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2448 } 2449 #endif /* KTRACE */ 2450 sctp_bad: 2451 if (fp) 2452 fdrop(fp, td); 2453 sctp_bad2: 2454 if (to) 2455 free(to, M_SONAME); 2456 return (error); 2457 #else /* SCTP */ 2458 return (EOPNOTSUPP); 2459 #endif /* SCTP */ 2460 } 2461 2462 int 2463 sctp_generic_sendmsg_iov(td, uap) 2464 struct thread *td; 2465 struct sctp_generic_sendmsg_iov_args /* { 2466 int sd, 2467 struct iovec *iov, 2468 int iovlen, 2469 caddr_t to, 2470 __socklen_t tolen, 2471 struct sctp_sndrcvinfo *sinfo, 2472 int flags 2473 } */ *uap; 2474 { 2475 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2476 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2477 struct socket *so; 2478 struct file *fp = NULL; 2479 int error=0, len, i; 2480 struct sockaddr *to = NULL; 2481 #ifdef KTRACE 2482 struct uio *ktruio = NULL; 2483 #endif 2484 struct uio auio; 2485 struct iovec *iov, *tiov; 2486 2487 if (uap->sinfo) { 2488 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2489 if (error) 2490 return (error); 2491 u_sinfo = &sinfo; 2492 } 2493 if (uap->tolen) { 2494 error = getsockaddr(&to, uap->to, uap->tolen); 2495 if (error) { 2496 to = NULL; 2497 goto sctp_bad2; 2498 } 2499 } 2500 2501 AUDIT_ARG_FD(uap->sd); 2502 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); 2503 if (error) 2504 goto sctp_bad1; 2505 2506 #ifdef COMPAT_FREEBSD32 2507 if (SV_CURPROC_FLAG(SV_ILP32)) 2508 error = freebsd32_copyiniov((struct iovec32 *)uap->iov, 2509 uap->iovlen, &iov, EMSGSIZE); 2510 else 2511 #endif 2512 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2513 if (error) 2514 goto sctp_bad1; 2515 #ifdef KTRACE 2516 if (to && (KTRPOINT(td, KTR_STRUCT))) 2517 ktrsockaddr(to); 2518 #endif 2519 2520 so = (struct socket *)fp->f_data; 2521 #ifdef MAC 2522 error = mac_socket_check_send(td->td_ucred, so); 2523 if (error) 2524 goto sctp_bad; 2525 #endif /* MAC */ 2526 2527 auio.uio_iov = iov; 2528 auio.uio_iovcnt = uap->iovlen; 2529 auio.uio_segflg = UIO_USERSPACE; 2530 auio.uio_rw = UIO_WRITE; 2531 auio.uio_td = td; 2532 auio.uio_offset = 0; /* XXX */ 2533 auio.uio_resid = 0; 2534 tiov = iov; 2535 for (i = 0; i <uap->iovlen; i++, tiov++) { 2536 if ((auio.uio_resid += tiov->iov_len) < 0) { 2537 error = EINVAL; 2538 goto sctp_bad; 2539 } 2540 } 2541 len = auio.uio_resid; 2542 CURVNET_SET(so->so_vnet); 2543 error = sctp_lower_sosend(so, to, &auio, 2544 (struct mbuf *)NULL, (struct mbuf *)NULL, 2545 uap->flags, u_sinfo, td); 2546 CURVNET_RESTORE(); 2547 if (error) { 2548 if (auio.uio_resid != len && (error == ERESTART || 2549 error == EINTR || error == EWOULDBLOCK)) 2550 error = 0; 2551 /* Generation of SIGPIPE can be controlled per socket */ 2552 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2553 !(uap->flags & MSG_NOSIGNAL)) { 2554 PROC_LOCK(td->td_proc); 2555 tdsignal(td, SIGPIPE); 2556 PROC_UNLOCK(td->td_proc); 2557 } 2558 } 2559 if (error == 0) 2560 td->td_retval[0] = len - auio.uio_resid; 2561 #ifdef KTRACE 2562 if (ktruio != NULL) { 2563 ktruio->uio_resid = td->td_retval[0]; 2564 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2565 } 2566 #endif /* KTRACE */ 2567 sctp_bad: 2568 free(iov, M_IOV); 2569 sctp_bad1: 2570 if (fp) 2571 fdrop(fp, td); 2572 sctp_bad2: 2573 if (to) 2574 free(to, M_SONAME); 2575 return (error); 2576 #else /* SCTP */ 2577 return (EOPNOTSUPP); 2578 #endif /* SCTP */ 2579 } 2580 2581 int 2582 sctp_generic_recvmsg(td, uap) 2583 struct thread *td; 2584 struct sctp_generic_recvmsg_args /* { 2585 int sd, 2586 struct iovec *iov, 2587 int iovlen, 2588 struct sockaddr *from, 2589 __socklen_t *fromlenaddr, 2590 struct sctp_sndrcvinfo *sinfo, 2591 int *msg_flags 2592 } */ *uap; 2593 { 2594 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2595 uint8_t sockbufstore[256]; 2596 struct uio auio; 2597 struct iovec *iov, *tiov; 2598 struct sctp_sndrcvinfo sinfo; 2599 struct socket *so; 2600 struct file *fp = NULL; 2601 struct sockaddr *fromsa; 2602 int fromlen; 2603 int len, i, msg_flags; 2604 int error = 0; 2605 #ifdef KTRACE 2606 struct uio *ktruio = NULL; 2607 #endif 2608 2609 AUDIT_ARG_FD(uap->sd); 2610 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); 2611 if (error) { 2612 return (error); 2613 } 2614 #ifdef COMPAT_FREEBSD32 2615 if (SV_CURPROC_FLAG(SV_ILP32)) 2616 error = freebsd32_copyiniov((struct iovec32 *)uap->iov, 2617 uap->iovlen, &iov, EMSGSIZE); 2618 else 2619 #endif 2620 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2621 if (error) 2622 goto out1; 2623 2624 so = fp->f_data; 2625 #ifdef MAC 2626 error = mac_socket_check_receive(td->td_ucred, so); 2627 if (error) { 2628 goto out; 2629 } 2630 #endif /* MAC */ 2631 2632 if (uap->fromlenaddr) { 2633 error = copyin(uap->fromlenaddr, 2634 &fromlen, sizeof (fromlen)); 2635 if (error) { 2636 goto out; 2637 } 2638 } else { 2639 fromlen = 0; 2640 } 2641 if (uap->msg_flags) { 2642 error = copyin(uap->msg_flags, &msg_flags, sizeof (int)); 2643 if (error) { 2644 goto out; 2645 } 2646 } else { 2647 msg_flags = 0; 2648 } 2649 auio.uio_iov = iov; 2650 auio.uio_iovcnt = uap->iovlen; 2651 auio.uio_segflg = UIO_USERSPACE; 2652 auio.uio_rw = UIO_READ; 2653 auio.uio_td = td; 2654 auio.uio_offset = 0; /* XXX */ 2655 auio.uio_resid = 0; 2656 tiov = iov; 2657 for (i = 0; i <uap->iovlen; i++, tiov++) { 2658 if ((auio.uio_resid += tiov->iov_len) < 0) { 2659 error = EINVAL; 2660 goto out; 2661 } 2662 } 2663 len = auio.uio_resid; 2664 fromsa = (struct sockaddr *)sockbufstore; 2665 2666 #ifdef KTRACE 2667 if (KTRPOINT(td, KTR_GENIO)) 2668 ktruio = cloneuio(&auio); 2669 #endif /* KTRACE */ 2670 memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo)); 2671 CURVNET_SET(so->so_vnet); 2672 error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL, 2673 fromsa, fromlen, &msg_flags, 2674 (struct sctp_sndrcvinfo *)&sinfo, 1); 2675 CURVNET_RESTORE(); 2676 if (error) { 2677 if (auio.uio_resid != (int)len && (error == ERESTART || 2678 error == EINTR || error == EWOULDBLOCK)) 2679 error = 0; 2680 } else { 2681 if (uap->sinfo) 2682 error = copyout(&sinfo, uap->sinfo, sizeof (sinfo)); 2683 } 2684 #ifdef KTRACE 2685 if (ktruio != NULL) { 2686 ktruio->uio_resid = (int)len - auio.uio_resid; 2687 ktrgenio(uap->sd, UIO_READ, ktruio, error); 2688 } 2689 #endif /* KTRACE */ 2690 if (error) 2691 goto out; 2692 td->td_retval[0] = (int)len - auio.uio_resid; 2693 2694 if (fromlen && uap->from) { 2695 len = fromlen; 2696 if (len <= 0 || fromsa == 0) 2697 len = 0; 2698 else { 2699 len = MIN(len, fromsa->sa_len); 2700 error = copyout(fromsa, uap->from, (unsigned)len); 2701 if (error) 2702 goto out; 2703 } 2704 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t)); 2705 if (error) { 2706 goto out; 2707 } 2708 } 2709 #ifdef KTRACE 2710 if (KTRPOINT(td, KTR_STRUCT)) 2711 ktrsockaddr(fromsa); 2712 #endif 2713 if (uap->msg_flags) { 2714 error = copyout(&msg_flags, uap->msg_flags, sizeof (int)); 2715 if (error) { 2716 goto out; 2717 } 2718 } 2719 out: 2720 free(iov, M_IOV); 2721 out1: 2722 if (fp) 2723 fdrop(fp, td); 2724 2725 return (error); 2726 #else /* SCTP */ 2727 return (EOPNOTSUPP); 2728 #endif /* SCTP */ 2729 } 2730