1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_inet.h" 39 #include "opt_inet6.h" 40 #include "opt_sctp.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/sysproto.h> 50 #include <sys/malloc.h> 51 #include <sys/filedesc.h> 52 #include <sys/event.h> 53 #include <sys/proc.h> 54 #include <sys/fcntl.h> 55 #include <sys/file.h> 56 #include <sys/filio.h> 57 #include <sys/jail.h> 58 #include <sys/mount.h> 59 #include <sys/mbuf.h> 60 #include <sys/protosw.h> 61 #include <sys/sf_buf.h> 62 #include <sys/sysent.h> 63 #include <sys/socket.h> 64 #include <sys/socketvar.h> 65 #include <sys/signalvar.h> 66 #include <sys/syscallsubr.h> 67 #include <sys/sysctl.h> 68 #include <sys/uio.h> 69 #include <sys/vnode.h> 70 #ifdef KTRACE 71 #include <sys/ktrace.h> 72 #endif 73 #ifdef COMPAT_FREEBSD32 74 #include <compat/freebsd32/freebsd32_util.h> 75 #endif 76 77 #include <net/vnet.h> 78 79 #include <security/audit/audit.h> 80 #include <security/mac/mac_framework.h> 81 82 #include <vm/vm.h> 83 #include <vm/vm_object.h> 84 #include <vm/vm_page.h> 85 #include <vm/vm_pageout.h> 86 #include <vm/vm_kern.h> 87 #include <vm/vm_extern.h> 88 89 #if defined(INET) || defined(INET6) 90 #ifdef SCTP 91 #include <netinet/sctp.h> 92 #include <netinet/sctp_peeloff.h> 93 #endif /* SCTP */ 94 #endif /* INET || INET6 */ 95 96 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 97 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 98 99 static int accept1(struct thread *td, struct accept_args *uap, int compat); 100 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 101 static int getsockname1(struct thread *td, struct getsockname_args *uap, 102 int compat); 103 static int getpeername1(struct thread *td, struct getpeername_args *uap, 104 int compat); 105 106 /* 107 * NSFBUFS-related variables and associated sysctls 108 */ 109 int nsfbufs; 110 int nsfbufspeak; 111 int nsfbufsused; 112 113 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 114 "Maximum number of sendfile(2) sf_bufs available"); 115 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 116 "Number of sendfile(2) sf_bufs at peak usage"); 117 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 118 "Number of sendfile(2) sf_bufs in use"); 119 120 /* 121 * Convert a user file descriptor to a kernel file entry. A reference on the 122 * file entry is held upon returning. This is lighter weight than 123 * fgetsock(), which bumps the socket reference drops the file reference 124 * count instead, as this approach avoids several additional mutex operations 125 * associated with the additional reference count. If requested, return the 126 * open file flags. 127 */ 128 static int 129 getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp) 130 { 131 struct file *fp; 132 int error; 133 134 fp = NULL; 135 if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) { 136 error = EBADF; 137 } else if (fp->f_type != DTYPE_SOCKET) { 138 fdrop(fp, curthread); 139 fp = NULL; 140 error = ENOTSOCK; 141 } else { 142 if (fflagp != NULL) 143 *fflagp = fp->f_flag; 144 error = 0; 145 } 146 *fpp = fp; 147 return (error); 148 } 149 150 /* 151 * System call interface to the socket abstraction. 152 */ 153 #if defined(COMPAT_43) 154 #define COMPAT_OLDSOCK 155 #endif 156 157 int 158 socket(td, uap) 159 struct thread *td; 160 struct socket_args /* { 161 int domain; 162 int type; 163 int protocol; 164 } */ *uap; 165 { 166 struct filedesc *fdp; 167 struct socket *so; 168 struct file *fp; 169 int fd, error; 170 171 AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol); 172 #ifdef MAC 173 error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type, 174 uap->protocol); 175 if (error) 176 return (error); 177 #endif 178 fdp = td->td_proc->p_fd; 179 error = falloc(td, &fp, &fd); 180 if (error) 181 return (error); 182 /* An extra reference on `fp' has been held for us by falloc(). */ 183 error = socreate(uap->domain, &so, uap->type, uap->protocol, 184 td->td_ucred, td); 185 if (error) { 186 fdclose(fdp, fp, fd, td); 187 } else { 188 finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops); 189 td->td_retval[0] = fd; 190 } 191 fdrop(fp, td); 192 return (error); 193 } 194 195 /* ARGSUSED */ 196 int 197 bind(td, uap) 198 struct thread *td; 199 struct bind_args /* { 200 int s; 201 caddr_t name; 202 int namelen; 203 } */ *uap; 204 { 205 struct sockaddr *sa; 206 int error; 207 208 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 209 return (error); 210 211 error = kern_bind(td, uap->s, sa); 212 free(sa, M_SONAME); 213 return (error); 214 } 215 216 int 217 kern_bind(td, fd, sa) 218 struct thread *td; 219 int fd; 220 struct sockaddr *sa; 221 { 222 struct socket *so; 223 struct file *fp; 224 int error; 225 226 AUDIT_ARG_FD(fd); 227 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 228 if (error) 229 return (error); 230 so = fp->f_data; 231 #ifdef KTRACE 232 if (KTRPOINT(td, KTR_STRUCT)) 233 ktrsockaddr(sa); 234 #endif 235 #ifdef MAC 236 error = mac_socket_check_bind(td->td_ucred, so, sa); 237 if (error == 0) 238 #endif 239 error = sobind(so, sa, td); 240 fdrop(fp, td); 241 return (error); 242 } 243 244 /* ARGSUSED */ 245 int 246 listen(td, uap) 247 struct thread *td; 248 struct listen_args /* { 249 int s; 250 int backlog; 251 } */ *uap; 252 { 253 struct socket *so; 254 struct file *fp; 255 int error; 256 257 AUDIT_ARG_FD(uap->s); 258 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); 259 if (error == 0) { 260 so = fp->f_data; 261 #ifdef MAC 262 error = mac_socket_check_listen(td->td_ucred, so); 263 if (error == 0) { 264 #endif 265 CURVNET_SET(so->so_vnet); 266 error = solisten(so, uap->backlog, td); 267 CURVNET_RESTORE(); 268 #ifdef MAC 269 } 270 #endif 271 fdrop(fp, td); 272 } 273 return(error); 274 } 275 276 /* 277 * accept1() 278 */ 279 static int 280 accept1(td, uap, compat) 281 struct thread *td; 282 struct accept_args /* { 283 int s; 284 struct sockaddr * __restrict name; 285 socklen_t * __restrict anamelen; 286 } */ *uap; 287 int compat; 288 { 289 struct sockaddr *name; 290 socklen_t namelen; 291 struct file *fp; 292 int error; 293 294 if (uap->name == NULL) 295 return (kern_accept(td, uap->s, NULL, NULL, NULL)); 296 297 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 298 if (error) 299 return (error); 300 301 error = kern_accept(td, uap->s, &name, &namelen, &fp); 302 303 /* 304 * return a namelen of zero for older code which might 305 * ignore the return value from accept. 306 */ 307 if (error) { 308 (void) copyout(&namelen, 309 uap->anamelen, sizeof(*uap->anamelen)); 310 return (error); 311 } 312 313 if (error == 0 && name != NULL) { 314 #ifdef COMPAT_OLDSOCK 315 if (compat) 316 ((struct osockaddr *)name)->sa_family = 317 name->sa_family; 318 #endif 319 error = copyout(name, uap->name, namelen); 320 } 321 if (error == 0) 322 error = copyout(&namelen, uap->anamelen, 323 sizeof(namelen)); 324 if (error) 325 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td); 326 fdrop(fp, td); 327 free(name, M_SONAME); 328 return (error); 329 } 330 331 int 332 kern_accept(struct thread *td, int s, struct sockaddr **name, 333 socklen_t *namelen, struct file **fp) 334 { 335 struct filedesc *fdp; 336 struct file *headfp, *nfp = NULL; 337 struct sockaddr *sa = NULL; 338 int error; 339 struct socket *head, *so; 340 int fd; 341 u_int fflag; 342 pid_t pgid; 343 int tmp; 344 345 if (name) { 346 *name = NULL; 347 if (*namelen < 0) 348 return (EINVAL); 349 } 350 351 AUDIT_ARG_FD(s); 352 fdp = td->td_proc->p_fd; 353 error = getsock(fdp, s, &headfp, &fflag); 354 if (error) 355 return (error); 356 head = headfp->f_data; 357 if ((head->so_options & SO_ACCEPTCONN) == 0) { 358 error = EINVAL; 359 goto done; 360 } 361 #ifdef MAC 362 error = mac_socket_check_accept(td->td_ucred, head); 363 if (error != 0) 364 goto done; 365 #endif 366 error = falloc(td, &nfp, &fd); 367 if (error) 368 goto done; 369 ACCEPT_LOCK(); 370 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 371 ACCEPT_UNLOCK(); 372 error = EWOULDBLOCK; 373 goto noconnection; 374 } 375 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 376 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 377 head->so_error = ECONNABORTED; 378 break; 379 } 380 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 381 "accept", 0); 382 if (error) { 383 ACCEPT_UNLOCK(); 384 goto noconnection; 385 } 386 } 387 if (head->so_error) { 388 error = head->so_error; 389 head->so_error = 0; 390 ACCEPT_UNLOCK(); 391 goto noconnection; 392 } 393 so = TAILQ_FIRST(&head->so_comp); 394 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 395 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 396 397 /* 398 * Before changing the flags on the socket, we have to bump the 399 * reference count. Otherwise, if the protocol calls sofree(), 400 * the socket will be released due to a zero refcount. 401 */ 402 SOCK_LOCK(so); /* soref() and so_state update */ 403 soref(so); /* file descriptor reference */ 404 405 TAILQ_REMOVE(&head->so_comp, so, so_list); 406 head->so_qlen--; 407 so->so_state |= (head->so_state & SS_NBIO); 408 so->so_qstate &= ~SQ_COMP; 409 so->so_head = NULL; 410 411 SOCK_UNLOCK(so); 412 ACCEPT_UNLOCK(); 413 414 /* An extra reference on `nfp' has been held for us by falloc(). */ 415 td->td_retval[0] = fd; 416 417 /* connection has been removed from the listen queue */ 418 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 419 420 pgid = fgetown(&head->so_sigio); 421 if (pgid != 0) 422 fsetown(pgid, &so->so_sigio); 423 424 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 425 /* Sync socket nonblocking/async state with file flags */ 426 tmp = fflag & FNONBLOCK; 427 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 428 tmp = fflag & FASYNC; 429 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 430 sa = 0; 431 CURVNET_SET(so->so_vnet); 432 error = soaccept(so, &sa); 433 CURVNET_RESTORE(); 434 if (error) { 435 /* 436 * return a namelen of zero for older code which might 437 * ignore the return value from accept. 438 */ 439 if (name) 440 *namelen = 0; 441 goto noconnection; 442 } 443 if (sa == NULL) { 444 if (name) 445 *namelen = 0; 446 goto done; 447 } 448 if (name) { 449 /* check sa_len before it is destroyed */ 450 if (*namelen > sa->sa_len) 451 *namelen = sa->sa_len; 452 #ifdef KTRACE 453 if (KTRPOINT(td, KTR_STRUCT)) 454 ktrsockaddr(sa); 455 #endif 456 *name = sa; 457 sa = NULL; 458 } 459 noconnection: 460 if (sa) 461 free(sa, M_SONAME); 462 463 /* 464 * close the new descriptor, assuming someone hasn't ripped it 465 * out from under us. 466 */ 467 if (error) 468 fdclose(fdp, nfp, fd, td); 469 470 /* 471 * Release explicitly held references before returning. We return 472 * a reference on nfp to the caller on success if they request it. 473 */ 474 done: 475 if (fp != NULL) { 476 if (error == 0) { 477 *fp = nfp; 478 nfp = NULL; 479 } else 480 *fp = NULL; 481 } 482 if (nfp != NULL) 483 fdrop(nfp, td); 484 fdrop(headfp, td); 485 return (error); 486 } 487 488 int 489 accept(td, uap) 490 struct thread *td; 491 struct accept_args *uap; 492 { 493 494 return (accept1(td, uap, 0)); 495 } 496 497 #ifdef COMPAT_OLDSOCK 498 int 499 oaccept(td, uap) 500 struct thread *td; 501 struct accept_args *uap; 502 { 503 504 return (accept1(td, uap, 1)); 505 } 506 #endif /* COMPAT_OLDSOCK */ 507 508 /* ARGSUSED */ 509 int 510 connect(td, uap) 511 struct thread *td; 512 struct connect_args /* { 513 int s; 514 caddr_t name; 515 int namelen; 516 } */ *uap; 517 { 518 struct sockaddr *sa; 519 int error; 520 521 error = getsockaddr(&sa, uap->name, uap->namelen); 522 if (error) 523 return (error); 524 525 error = kern_connect(td, uap->s, sa); 526 free(sa, M_SONAME); 527 return (error); 528 } 529 530 531 int 532 kern_connect(td, fd, sa) 533 struct thread *td; 534 int fd; 535 struct sockaddr *sa; 536 { 537 struct socket *so; 538 struct file *fp; 539 int error; 540 int interrupted = 0; 541 542 AUDIT_ARG_FD(fd); 543 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 544 if (error) 545 return (error); 546 so = fp->f_data; 547 if (so->so_state & SS_ISCONNECTING) { 548 error = EALREADY; 549 goto done1; 550 } 551 #ifdef KTRACE 552 if (KTRPOINT(td, KTR_STRUCT)) 553 ktrsockaddr(sa); 554 #endif 555 #ifdef MAC 556 error = mac_socket_check_connect(td->td_ucred, so, sa); 557 if (error) 558 goto bad; 559 #endif 560 error = soconnect(so, sa, td); 561 if (error) 562 goto bad; 563 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 564 error = EINPROGRESS; 565 goto done1; 566 } 567 SOCK_LOCK(so); 568 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 569 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 570 "connec", 0); 571 if (error) { 572 if (error == EINTR || error == ERESTART) 573 interrupted = 1; 574 break; 575 } 576 } 577 if (error == 0) { 578 error = so->so_error; 579 so->so_error = 0; 580 } 581 SOCK_UNLOCK(so); 582 bad: 583 if (!interrupted) 584 so->so_state &= ~SS_ISCONNECTING; 585 if (error == ERESTART) 586 error = EINTR; 587 done1: 588 fdrop(fp, td); 589 return (error); 590 } 591 592 int 593 kern_socketpair(struct thread *td, int domain, int type, int protocol, 594 int *rsv) 595 { 596 struct filedesc *fdp = td->td_proc->p_fd; 597 struct file *fp1, *fp2; 598 struct socket *so1, *so2; 599 int fd, error; 600 601 AUDIT_ARG_SOCKET(domain, type, protocol); 602 #ifdef MAC 603 /* We might want to have a separate check for socket pairs. */ 604 error = mac_socket_check_create(td->td_ucred, domain, type, 605 protocol); 606 if (error) 607 return (error); 608 #endif 609 error = socreate(domain, &so1, type, protocol, td->td_ucred, td); 610 if (error) 611 return (error); 612 error = socreate(domain, &so2, type, protocol, td->td_ucred, td); 613 if (error) 614 goto free1; 615 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 616 error = falloc(td, &fp1, &fd); 617 if (error) 618 goto free2; 619 rsv[0] = fd; 620 fp1->f_data = so1; /* so1 already has ref count */ 621 error = falloc(td, &fp2, &fd); 622 if (error) 623 goto free3; 624 fp2->f_data = so2; /* so2 already has ref count */ 625 rsv[1] = fd; 626 error = soconnect2(so1, so2); 627 if (error) 628 goto free4; 629 if (type == SOCK_DGRAM) { 630 /* 631 * Datagram socket connection is asymmetric. 632 */ 633 error = soconnect2(so2, so1); 634 if (error) 635 goto free4; 636 } 637 finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops); 638 finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops); 639 fdrop(fp1, td); 640 fdrop(fp2, td); 641 return (0); 642 free4: 643 fdclose(fdp, fp2, rsv[1], td); 644 fdrop(fp2, td); 645 free3: 646 fdclose(fdp, fp1, rsv[0], td); 647 fdrop(fp1, td); 648 free2: 649 if (so2 != NULL) 650 (void)soclose(so2); 651 free1: 652 if (so1 != NULL) 653 (void)soclose(so1); 654 return (error); 655 } 656 657 int 658 socketpair(struct thread *td, struct socketpair_args *uap) 659 { 660 int error, sv[2]; 661 662 error = kern_socketpair(td, uap->domain, uap->type, 663 uap->protocol, sv); 664 if (error) 665 return (error); 666 error = copyout(sv, uap->rsv, 2 * sizeof(int)); 667 if (error) { 668 (void)kern_close(td, sv[0]); 669 (void)kern_close(td, sv[1]); 670 } 671 return (error); 672 } 673 674 static int 675 sendit(td, s, mp, flags) 676 struct thread *td; 677 int s; 678 struct msghdr *mp; 679 int flags; 680 { 681 struct mbuf *control; 682 struct sockaddr *to; 683 int error; 684 685 if (mp->msg_name != NULL) { 686 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 687 if (error) { 688 to = NULL; 689 goto bad; 690 } 691 mp->msg_name = to; 692 } else { 693 to = NULL; 694 } 695 696 if (mp->msg_control) { 697 if (mp->msg_controllen < sizeof(struct cmsghdr) 698 #ifdef COMPAT_OLDSOCK 699 && mp->msg_flags != MSG_COMPAT 700 #endif 701 ) { 702 error = EINVAL; 703 goto bad; 704 } 705 error = sockargs(&control, mp->msg_control, 706 mp->msg_controllen, MT_CONTROL); 707 if (error) 708 goto bad; 709 #ifdef COMPAT_OLDSOCK 710 if (mp->msg_flags == MSG_COMPAT) { 711 struct cmsghdr *cm; 712 713 M_PREPEND(control, sizeof(*cm), M_WAIT); 714 cm = mtod(control, struct cmsghdr *); 715 cm->cmsg_len = control->m_len; 716 cm->cmsg_level = SOL_SOCKET; 717 cm->cmsg_type = SCM_RIGHTS; 718 } 719 #endif 720 } else { 721 control = NULL; 722 } 723 724 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 725 726 bad: 727 if (to) 728 free(to, M_SONAME); 729 return (error); 730 } 731 732 int 733 kern_sendit(td, s, mp, flags, control, segflg) 734 struct thread *td; 735 int s; 736 struct msghdr *mp; 737 int flags; 738 struct mbuf *control; 739 enum uio_seg segflg; 740 { 741 struct file *fp; 742 struct uio auio; 743 struct iovec *iov; 744 struct socket *so; 745 int i; 746 int len, error; 747 #ifdef KTRACE 748 struct uio *ktruio = NULL; 749 #endif 750 751 AUDIT_ARG_FD(s); 752 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 753 if (error) 754 return (error); 755 so = (struct socket *)fp->f_data; 756 757 #ifdef MAC 758 if (mp->msg_name != NULL) { 759 error = mac_socket_check_connect(td->td_ucred, so, 760 mp->msg_name); 761 if (error) 762 goto bad; 763 } 764 error = mac_socket_check_send(td->td_ucred, so); 765 if (error) 766 goto bad; 767 #endif 768 769 auio.uio_iov = mp->msg_iov; 770 auio.uio_iovcnt = mp->msg_iovlen; 771 auio.uio_segflg = segflg; 772 auio.uio_rw = UIO_WRITE; 773 auio.uio_td = td; 774 auio.uio_offset = 0; /* XXX */ 775 auio.uio_resid = 0; 776 iov = mp->msg_iov; 777 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 778 if ((auio.uio_resid += iov->iov_len) < 0) { 779 error = EINVAL; 780 goto bad; 781 } 782 } 783 #ifdef KTRACE 784 if (KTRPOINT(td, KTR_GENIO)) 785 ktruio = cloneuio(&auio); 786 #endif 787 len = auio.uio_resid; 788 error = sosend(so, mp->msg_name, &auio, 0, control, flags, td); 789 if (error) { 790 if (auio.uio_resid != len && (error == ERESTART || 791 error == EINTR || error == EWOULDBLOCK)) 792 error = 0; 793 /* Generation of SIGPIPE can be controlled per socket */ 794 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 795 !(flags & MSG_NOSIGNAL)) { 796 PROC_LOCK(td->td_proc); 797 psignal(td->td_proc, SIGPIPE); 798 PROC_UNLOCK(td->td_proc); 799 } 800 } 801 if (error == 0) 802 td->td_retval[0] = len - auio.uio_resid; 803 #ifdef KTRACE 804 if (ktruio != NULL) { 805 ktruio->uio_resid = td->td_retval[0]; 806 ktrgenio(s, UIO_WRITE, ktruio, error); 807 } 808 #endif 809 bad: 810 fdrop(fp, td); 811 return (error); 812 } 813 814 int 815 sendto(td, uap) 816 struct thread *td; 817 struct sendto_args /* { 818 int s; 819 caddr_t buf; 820 size_t len; 821 int flags; 822 caddr_t to; 823 int tolen; 824 } */ *uap; 825 { 826 struct msghdr msg; 827 struct iovec aiov; 828 int error; 829 830 msg.msg_name = uap->to; 831 msg.msg_namelen = uap->tolen; 832 msg.msg_iov = &aiov; 833 msg.msg_iovlen = 1; 834 msg.msg_control = 0; 835 #ifdef COMPAT_OLDSOCK 836 msg.msg_flags = 0; 837 #endif 838 aiov.iov_base = uap->buf; 839 aiov.iov_len = uap->len; 840 error = sendit(td, uap->s, &msg, uap->flags); 841 return (error); 842 } 843 844 #ifdef COMPAT_OLDSOCK 845 int 846 osend(td, uap) 847 struct thread *td; 848 struct osend_args /* { 849 int s; 850 caddr_t buf; 851 int len; 852 int flags; 853 } */ *uap; 854 { 855 struct msghdr msg; 856 struct iovec aiov; 857 int error; 858 859 msg.msg_name = 0; 860 msg.msg_namelen = 0; 861 msg.msg_iov = &aiov; 862 msg.msg_iovlen = 1; 863 aiov.iov_base = uap->buf; 864 aiov.iov_len = uap->len; 865 msg.msg_control = 0; 866 msg.msg_flags = 0; 867 error = sendit(td, uap->s, &msg, uap->flags); 868 return (error); 869 } 870 871 int 872 osendmsg(td, uap) 873 struct thread *td; 874 struct osendmsg_args /* { 875 int s; 876 caddr_t msg; 877 int flags; 878 } */ *uap; 879 { 880 struct msghdr msg; 881 struct iovec *iov; 882 int error; 883 884 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 885 if (error) 886 return (error); 887 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 888 if (error) 889 return (error); 890 msg.msg_iov = iov; 891 msg.msg_flags = MSG_COMPAT; 892 error = sendit(td, uap->s, &msg, uap->flags); 893 free(iov, M_IOV); 894 return (error); 895 } 896 #endif 897 898 int 899 sendmsg(td, uap) 900 struct thread *td; 901 struct sendmsg_args /* { 902 int s; 903 caddr_t msg; 904 int flags; 905 } */ *uap; 906 { 907 struct msghdr msg; 908 struct iovec *iov; 909 int error; 910 911 error = copyin(uap->msg, &msg, sizeof (msg)); 912 if (error) 913 return (error); 914 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 915 if (error) 916 return (error); 917 msg.msg_iov = iov; 918 #ifdef COMPAT_OLDSOCK 919 msg.msg_flags = 0; 920 #endif 921 error = sendit(td, uap->s, &msg, uap->flags); 922 free(iov, M_IOV); 923 return (error); 924 } 925 926 int 927 kern_recvit(td, s, mp, fromseg, controlp) 928 struct thread *td; 929 int s; 930 struct msghdr *mp; 931 enum uio_seg fromseg; 932 struct mbuf **controlp; 933 { 934 struct uio auio; 935 struct iovec *iov; 936 int i; 937 socklen_t len; 938 int error; 939 struct mbuf *m, *control = 0; 940 caddr_t ctlbuf; 941 struct file *fp; 942 struct socket *so; 943 struct sockaddr *fromsa = 0; 944 #ifdef KTRACE 945 struct uio *ktruio = NULL; 946 #endif 947 948 if (controlp != NULL) 949 *controlp = NULL; 950 951 AUDIT_ARG_FD(s); 952 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 953 if (error) 954 return (error); 955 so = fp->f_data; 956 957 #ifdef MAC 958 error = mac_socket_check_receive(td->td_ucred, so); 959 if (error) { 960 fdrop(fp, td); 961 return (error); 962 } 963 #endif 964 965 auio.uio_iov = mp->msg_iov; 966 auio.uio_iovcnt = mp->msg_iovlen; 967 auio.uio_segflg = UIO_USERSPACE; 968 auio.uio_rw = UIO_READ; 969 auio.uio_td = td; 970 auio.uio_offset = 0; /* XXX */ 971 auio.uio_resid = 0; 972 iov = mp->msg_iov; 973 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 974 if ((auio.uio_resid += iov->iov_len) < 0) { 975 fdrop(fp, td); 976 return (EINVAL); 977 } 978 } 979 #ifdef KTRACE 980 if (KTRPOINT(td, KTR_GENIO)) 981 ktruio = cloneuio(&auio); 982 #endif 983 len = auio.uio_resid; 984 CURVNET_SET(so->so_vnet); 985 error = soreceive(so, &fromsa, &auio, (struct mbuf **)0, 986 (mp->msg_control || controlp) ? &control : (struct mbuf **)0, 987 &mp->msg_flags); 988 CURVNET_RESTORE(); 989 if (error) { 990 if (auio.uio_resid != (int)len && (error == ERESTART || 991 error == EINTR || error == EWOULDBLOCK)) 992 error = 0; 993 } 994 #ifdef KTRACE 995 if (ktruio != NULL) { 996 ktruio->uio_resid = (int)len - auio.uio_resid; 997 ktrgenio(s, UIO_READ, ktruio, error); 998 } 999 #endif 1000 if (error) 1001 goto out; 1002 td->td_retval[0] = (int)len - auio.uio_resid; 1003 if (mp->msg_name) { 1004 len = mp->msg_namelen; 1005 if (len <= 0 || fromsa == 0) 1006 len = 0; 1007 else { 1008 /* save sa_len before it is destroyed by MSG_COMPAT */ 1009 len = MIN(len, fromsa->sa_len); 1010 #ifdef COMPAT_OLDSOCK 1011 if (mp->msg_flags & MSG_COMPAT) 1012 ((struct osockaddr *)fromsa)->sa_family = 1013 fromsa->sa_family; 1014 #endif 1015 if (fromseg == UIO_USERSPACE) { 1016 error = copyout(fromsa, mp->msg_name, 1017 (unsigned)len); 1018 if (error) 1019 goto out; 1020 } else 1021 bcopy(fromsa, mp->msg_name, len); 1022 } 1023 mp->msg_namelen = len; 1024 } 1025 if (mp->msg_control && controlp == NULL) { 1026 #ifdef COMPAT_OLDSOCK 1027 /* 1028 * We assume that old recvmsg calls won't receive access 1029 * rights and other control info, esp. as control info 1030 * is always optional and those options didn't exist in 4.3. 1031 * If we receive rights, trim the cmsghdr; anything else 1032 * is tossed. 1033 */ 1034 if (control && mp->msg_flags & MSG_COMPAT) { 1035 if (mtod(control, struct cmsghdr *)->cmsg_level != 1036 SOL_SOCKET || 1037 mtod(control, struct cmsghdr *)->cmsg_type != 1038 SCM_RIGHTS) { 1039 mp->msg_controllen = 0; 1040 goto out; 1041 } 1042 control->m_len -= sizeof (struct cmsghdr); 1043 control->m_data += sizeof (struct cmsghdr); 1044 } 1045 #endif 1046 len = mp->msg_controllen; 1047 m = control; 1048 mp->msg_controllen = 0; 1049 ctlbuf = mp->msg_control; 1050 1051 while (m && len > 0) { 1052 unsigned int tocopy; 1053 1054 if (len >= m->m_len) 1055 tocopy = m->m_len; 1056 else { 1057 mp->msg_flags |= MSG_CTRUNC; 1058 tocopy = len; 1059 } 1060 1061 if ((error = copyout(mtod(m, caddr_t), 1062 ctlbuf, tocopy)) != 0) 1063 goto out; 1064 1065 ctlbuf += tocopy; 1066 len -= tocopy; 1067 m = m->m_next; 1068 } 1069 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1070 } 1071 out: 1072 fdrop(fp, td); 1073 #ifdef KTRACE 1074 if (fromsa && KTRPOINT(td, KTR_STRUCT)) 1075 ktrsockaddr(fromsa); 1076 #endif 1077 if (fromsa) 1078 free(fromsa, M_SONAME); 1079 1080 if (error == 0 && controlp != NULL) 1081 *controlp = control; 1082 else if (control) 1083 m_freem(control); 1084 1085 return (error); 1086 } 1087 1088 static int 1089 recvit(td, s, mp, namelenp) 1090 struct thread *td; 1091 int s; 1092 struct msghdr *mp; 1093 void *namelenp; 1094 { 1095 int error; 1096 1097 error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL); 1098 if (error) 1099 return (error); 1100 if (namelenp) { 1101 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t)); 1102 #ifdef COMPAT_OLDSOCK 1103 if (mp->msg_flags & MSG_COMPAT) 1104 error = 0; /* old recvfrom didn't check */ 1105 #endif 1106 } 1107 return (error); 1108 } 1109 1110 int 1111 recvfrom(td, uap) 1112 struct thread *td; 1113 struct recvfrom_args /* { 1114 int s; 1115 caddr_t buf; 1116 size_t len; 1117 int flags; 1118 struct sockaddr * __restrict from; 1119 socklen_t * __restrict fromlenaddr; 1120 } */ *uap; 1121 { 1122 struct msghdr msg; 1123 struct iovec aiov; 1124 int error; 1125 1126 if (uap->fromlenaddr) { 1127 error = copyin(uap->fromlenaddr, 1128 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1129 if (error) 1130 goto done2; 1131 } else { 1132 msg.msg_namelen = 0; 1133 } 1134 msg.msg_name = uap->from; 1135 msg.msg_iov = &aiov; 1136 msg.msg_iovlen = 1; 1137 aiov.iov_base = uap->buf; 1138 aiov.iov_len = uap->len; 1139 msg.msg_control = 0; 1140 msg.msg_flags = uap->flags; 1141 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1142 done2: 1143 return(error); 1144 } 1145 1146 #ifdef COMPAT_OLDSOCK 1147 int 1148 orecvfrom(td, uap) 1149 struct thread *td; 1150 struct recvfrom_args *uap; 1151 { 1152 1153 uap->flags |= MSG_COMPAT; 1154 return (recvfrom(td, uap)); 1155 } 1156 #endif 1157 1158 #ifdef COMPAT_OLDSOCK 1159 int 1160 orecv(td, uap) 1161 struct thread *td; 1162 struct orecv_args /* { 1163 int s; 1164 caddr_t buf; 1165 int len; 1166 int flags; 1167 } */ *uap; 1168 { 1169 struct msghdr msg; 1170 struct iovec aiov; 1171 int error; 1172 1173 msg.msg_name = 0; 1174 msg.msg_namelen = 0; 1175 msg.msg_iov = &aiov; 1176 msg.msg_iovlen = 1; 1177 aiov.iov_base = uap->buf; 1178 aiov.iov_len = uap->len; 1179 msg.msg_control = 0; 1180 msg.msg_flags = uap->flags; 1181 error = recvit(td, uap->s, &msg, NULL); 1182 return (error); 1183 } 1184 1185 /* 1186 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1187 * overlays the new one, missing only the flags, and with the (old) access 1188 * rights where the control fields are now. 1189 */ 1190 int 1191 orecvmsg(td, uap) 1192 struct thread *td; 1193 struct orecvmsg_args /* { 1194 int s; 1195 struct omsghdr *msg; 1196 int flags; 1197 } */ *uap; 1198 { 1199 struct msghdr msg; 1200 struct iovec *iov; 1201 int error; 1202 1203 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1204 if (error) 1205 return (error); 1206 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1207 if (error) 1208 return (error); 1209 msg.msg_flags = uap->flags | MSG_COMPAT; 1210 msg.msg_iov = iov; 1211 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1212 if (msg.msg_controllen && error == 0) 1213 error = copyout(&msg.msg_controllen, 1214 &uap->msg->msg_accrightslen, sizeof (int)); 1215 free(iov, M_IOV); 1216 return (error); 1217 } 1218 #endif 1219 1220 int 1221 recvmsg(td, uap) 1222 struct thread *td; 1223 struct recvmsg_args /* { 1224 int s; 1225 struct msghdr *msg; 1226 int flags; 1227 } */ *uap; 1228 { 1229 struct msghdr msg; 1230 struct iovec *uiov, *iov; 1231 int error; 1232 1233 error = copyin(uap->msg, &msg, sizeof (msg)); 1234 if (error) 1235 return (error); 1236 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1237 if (error) 1238 return (error); 1239 msg.msg_flags = uap->flags; 1240 #ifdef COMPAT_OLDSOCK 1241 msg.msg_flags &= ~MSG_COMPAT; 1242 #endif 1243 uiov = msg.msg_iov; 1244 msg.msg_iov = iov; 1245 error = recvit(td, uap->s, &msg, NULL); 1246 if (error == 0) { 1247 msg.msg_iov = uiov; 1248 error = copyout(&msg, uap->msg, sizeof(msg)); 1249 } 1250 free(iov, M_IOV); 1251 return (error); 1252 } 1253 1254 /* ARGSUSED */ 1255 int 1256 shutdown(td, uap) 1257 struct thread *td; 1258 struct shutdown_args /* { 1259 int s; 1260 int how; 1261 } */ *uap; 1262 { 1263 struct socket *so; 1264 struct file *fp; 1265 int error; 1266 1267 AUDIT_ARG_FD(uap->s); 1268 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); 1269 if (error == 0) { 1270 so = fp->f_data; 1271 error = soshutdown(so, uap->how); 1272 fdrop(fp, td); 1273 } 1274 return (error); 1275 } 1276 1277 /* ARGSUSED */ 1278 int 1279 setsockopt(td, uap) 1280 struct thread *td; 1281 struct setsockopt_args /* { 1282 int s; 1283 int level; 1284 int name; 1285 caddr_t val; 1286 int valsize; 1287 } */ *uap; 1288 { 1289 1290 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1291 uap->val, UIO_USERSPACE, uap->valsize)); 1292 } 1293 1294 int 1295 kern_setsockopt(td, s, level, name, val, valseg, valsize) 1296 struct thread *td; 1297 int s; 1298 int level; 1299 int name; 1300 void *val; 1301 enum uio_seg valseg; 1302 socklen_t valsize; 1303 { 1304 int error; 1305 struct socket *so; 1306 struct file *fp; 1307 struct sockopt sopt; 1308 1309 if (val == NULL && valsize != 0) 1310 return (EFAULT); 1311 if ((int)valsize < 0) 1312 return (EINVAL); 1313 1314 sopt.sopt_dir = SOPT_SET; 1315 sopt.sopt_level = level; 1316 sopt.sopt_name = name; 1317 sopt.sopt_val = val; 1318 sopt.sopt_valsize = valsize; 1319 switch (valseg) { 1320 case UIO_USERSPACE: 1321 sopt.sopt_td = td; 1322 break; 1323 case UIO_SYSSPACE: 1324 sopt.sopt_td = NULL; 1325 break; 1326 default: 1327 panic("kern_setsockopt called with bad valseg"); 1328 } 1329 1330 AUDIT_ARG_FD(s); 1331 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 1332 if (error == 0) { 1333 so = fp->f_data; 1334 CURVNET_SET(so->so_vnet); 1335 error = sosetopt(so, &sopt); 1336 CURVNET_RESTORE(); 1337 fdrop(fp, td); 1338 } 1339 return(error); 1340 } 1341 1342 /* ARGSUSED */ 1343 int 1344 getsockopt(td, uap) 1345 struct thread *td; 1346 struct getsockopt_args /* { 1347 int s; 1348 int level; 1349 int name; 1350 void * __restrict val; 1351 socklen_t * __restrict avalsize; 1352 } */ *uap; 1353 { 1354 socklen_t valsize; 1355 int error; 1356 1357 if (uap->val) { 1358 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1359 if (error) 1360 return (error); 1361 } 1362 1363 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1364 uap->val, UIO_USERSPACE, &valsize); 1365 1366 if (error == 0) 1367 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1368 return (error); 1369 } 1370 1371 /* 1372 * Kernel version of getsockopt. 1373 * optval can be a userland or userspace. optlen is always a kernel pointer. 1374 */ 1375 int 1376 kern_getsockopt(td, s, level, name, val, valseg, valsize) 1377 struct thread *td; 1378 int s; 1379 int level; 1380 int name; 1381 void *val; 1382 enum uio_seg valseg; 1383 socklen_t *valsize; 1384 { 1385 int error; 1386 struct socket *so; 1387 struct file *fp; 1388 struct sockopt sopt; 1389 1390 if (val == NULL) 1391 *valsize = 0; 1392 if ((int)*valsize < 0) 1393 return (EINVAL); 1394 1395 sopt.sopt_dir = SOPT_GET; 1396 sopt.sopt_level = level; 1397 sopt.sopt_name = name; 1398 sopt.sopt_val = val; 1399 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1400 switch (valseg) { 1401 case UIO_USERSPACE: 1402 sopt.sopt_td = td; 1403 break; 1404 case UIO_SYSSPACE: 1405 sopt.sopt_td = NULL; 1406 break; 1407 default: 1408 panic("kern_getsockopt called with bad valseg"); 1409 } 1410 1411 AUDIT_ARG_FD(s); 1412 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 1413 if (error == 0) { 1414 so = fp->f_data; 1415 CURVNET_SET(so->so_vnet); 1416 error = sogetopt(so, &sopt); 1417 CURVNET_RESTORE(); 1418 *valsize = sopt.sopt_valsize; 1419 fdrop(fp, td); 1420 } 1421 return (error); 1422 } 1423 1424 /* 1425 * getsockname1() - Get socket name. 1426 */ 1427 /* ARGSUSED */ 1428 static int 1429 getsockname1(td, uap, compat) 1430 struct thread *td; 1431 struct getsockname_args /* { 1432 int fdes; 1433 struct sockaddr * __restrict asa; 1434 socklen_t * __restrict alen; 1435 } */ *uap; 1436 int compat; 1437 { 1438 struct sockaddr *sa; 1439 socklen_t len; 1440 int error; 1441 1442 error = copyin(uap->alen, &len, sizeof(len)); 1443 if (error) 1444 return (error); 1445 1446 error = kern_getsockname(td, uap->fdes, &sa, &len); 1447 if (error) 1448 return (error); 1449 1450 if (len != 0) { 1451 #ifdef COMPAT_OLDSOCK 1452 if (compat) 1453 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1454 #endif 1455 error = copyout(sa, uap->asa, (u_int)len); 1456 } 1457 free(sa, M_SONAME); 1458 if (error == 0) 1459 error = copyout(&len, uap->alen, sizeof(len)); 1460 return (error); 1461 } 1462 1463 int 1464 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, 1465 socklen_t *alen) 1466 { 1467 struct socket *so; 1468 struct file *fp; 1469 socklen_t len; 1470 int error; 1471 1472 if (*alen < 0) 1473 return (EINVAL); 1474 1475 AUDIT_ARG_FD(fd); 1476 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 1477 if (error) 1478 return (error); 1479 so = fp->f_data; 1480 *sa = NULL; 1481 CURVNET_SET(so->so_vnet); 1482 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa); 1483 CURVNET_RESTORE(); 1484 if (error) 1485 goto bad; 1486 if (*sa == NULL) 1487 len = 0; 1488 else 1489 len = MIN(*alen, (*sa)->sa_len); 1490 *alen = len; 1491 #ifdef KTRACE 1492 if (KTRPOINT(td, KTR_STRUCT)) 1493 ktrsockaddr(*sa); 1494 #endif 1495 bad: 1496 fdrop(fp, td); 1497 if (error && *sa) { 1498 free(*sa, M_SONAME); 1499 *sa = NULL; 1500 } 1501 return (error); 1502 } 1503 1504 int 1505 getsockname(td, uap) 1506 struct thread *td; 1507 struct getsockname_args *uap; 1508 { 1509 1510 return (getsockname1(td, uap, 0)); 1511 } 1512 1513 #ifdef COMPAT_OLDSOCK 1514 int 1515 ogetsockname(td, uap) 1516 struct thread *td; 1517 struct getsockname_args *uap; 1518 { 1519 1520 return (getsockname1(td, uap, 1)); 1521 } 1522 #endif /* COMPAT_OLDSOCK */ 1523 1524 /* 1525 * getpeername1() - Get name of peer for connected socket. 1526 */ 1527 /* ARGSUSED */ 1528 static int 1529 getpeername1(td, uap, compat) 1530 struct thread *td; 1531 struct getpeername_args /* { 1532 int fdes; 1533 struct sockaddr * __restrict asa; 1534 socklen_t * __restrict alen; 1535 } */ *uap; 1536 int compat; 1537 { 1538 struct sockaddr *sa; 1539 socklen_t len; 1540 int error; 1541 1542 error = copyin(uap->alen, &len, sizeof (len)); 1543 if (error) 1544 return (error); 1545 1546 error = kern_getpeername(td, uap->fdes, &sa, &len); 1547 if (error) 1548 return (error); 1549 1550 if (len != 0) { 1551 #ifdef COMPAT_OLDSOCK 1552 if (compat) 1553 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1554 #endif 1555 error = copyout(sa, uap->asa, (u_int)len); 1556 } 1557 free(sa, M_SONAME); 1558 if (error == 0) 1559 error = copyout(&len, uap->alen, sizeof(len)); 1560 return (error); 1561 } 1562 1563 int 1564 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, 1565 socklen_t *alen) 1566 { 1567 struct socket *so; 1568 struct file *fp; 1569 socklen_t len; 1570 int error; 1571 1572 if (*alen < 0) 1573 return (EINVAL); 1574 1575 AUDIT_ARG_FD(fd); 1576 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 1577 if (error) 1578 return (error); 1579 so = fp->f_data; 1580 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1581 error = ENOTCONN; 1582 goto done; 1583 } 1584 *sa = NULL; 1585 CURVNET_SET(so->so_vnet); 1586 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa); 1587 CURVNET_RESTORE(); 1588 if (error) 1589 goto bad; 1590 if (*sa == NULL) 1591 len = 0; 1592 else 1593 len = MIN(*alen, (*sa)->sa_len); 1594 *alen = len; 1595 #ifdef KTRACE 1596 if (KTRPOINT(td, KTR_STRUCT)) 1597 ktrsockaddr(*sa); 1598 #endif 1599 bad: 1600 if (error && *sa) { 1601 free(*sa, M_SONAME); 1602 *sa = NULL; 1603 } 1604 done: 1605 fdrop(fp, td); 1606 return (error); 1607 } 1608 1609 int 1610 getpeername(td, uap) 1611 struct thread *td; 1612 struct getpeername_args *uap; 1613 { 1614 1615 return (getpeername1(td, uap, 0)); 1616 } 1617 1618 #ifdef COMPAT_OLDSOCK 1619 int 1620 ogetpeername(td, uap) 1621 struct thread *td; 1622 struct ogetpeername_args *uap; 1623 { 1624 1625 /* XXX uap should have type `getpeername_args *' to begin with. */ 1626 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1627 } 1628 #endif /* COMPAT_OLDSOCK */ 1629 1630 int 1631 sockargs(mp, buf, buflen, type) 1632 struct mbuf **mp; 1633 caddr_t buf; 1634 int buflen, type; 1635 { 1636 struct sockaddr *sa; 1637 struct mbuf *m; 1638 int error; 1639 1640 if ((u_int)buflen > MLEN) { 1641 #ifdef COMPAT_OLDSOCK 1642 if (type == MT_SONAME && (u_int)buflen <= 112) 1643 buflen = MLEN; /* unix domain compat. hack */ 1644 else 1645 #endif 1646 if ((u_int)buflen > MCLBYTES) 1647 return (EINVAL); 1648 } 1649 m = m_get(M_WAIT, type); 1650 if ((u_int)buflen > MLEN) 1651 MCLGET(m, M_WAIT); 1652 m->m_len = buflen; 1653 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1654 if (error) 1655 (void) m_free(m); 1656 else { 1657 *mp = m; 1658 if (type == MT_SONAME) { 1659 sa = mtod(m, struct sockaddr *); 1660 1661 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1662 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1663 sa->sa_family = sa->sa_len; 1664 #endif 1665 sa->sa_len = buflen; 1666 } 1667 } 1668 return (error); 1669 } 1670 1671 int 1672 getsockaddr(namp, uaddr, len) 1673 struct sockaddr **namp; 1674 caddr_t uaddr; 1675 size_t len; 1676 { 1677 struct sockaddr *sa; 1678 int error; 1679 1680 if (len > SOCK_MAXADDRLEN) 1681 return (ENAMETOOLONG); 1682 if (len < offsetof(struct sockaddr, sa_data[0])) 1683 return (EINVAL); 1684 sa = malloc(len, M_SONAME, M_WAITOK); 1685 error = copyin(uaddr, sa, len); 1686 if (error) { 1687 free(sa, M_SONAME); 1688 } else { 1689 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1690 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1691 sa->sa_family = sa->sa_len; 1692 #endif 1693 sa->sa_len = len; 1694 *namp = sa; 1695 } 1696 return (error); 1697 } 1698 1699 #include <sys/condvar.h> 1700 1701 struct sendfile_sync { 1702 struct mtx mtx; 1703 struct cv cv; 1704 unsigned count; 1705 }; 1706 1707 /* 1708 * Detach mapped page and release resources back to the system. 1709 */ 1710 void 1711 sf_buf_mext(void *addr, void *args) 1712 { 1713 vm_page_t m; 1714 struct sendfile_sync *sfs; 1715 1716 m = sf_buf_page(args); 1717 sf_buf_free(args); 1718 vm_page_lock_queues(); 1719 vm_page_unwire(m, 0); 1720 /* 1721 * Check for the object going away on us. This can 1722 * happen since we don't hold a reference to it. 1723 * If so, we're responsible for freeing the page. 1724 */ 1725 if (m->wire_count == 0 && m->object == NULL) 1726 vm_page_free(m); 1727 vm_page_unlock_queues(); 1728 if (addr == NULL) 1729 return; 1730 sfs = addr; 1731 mtx_lock(&sfs->mtx); 1732 KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0")); 1733 if (--sfs->count == 0) 1734 cv_signal(&sfs->cv); 1735 mtx_unlock(&sfs->mtx); 1736 } 1737 1738 /* 1739 * sendfile(2) 1740 * 1741 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1742 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1743 * 1744 * Send a file specified by 'fd' and starting at 'offset' to a socket 1745 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes == 1746 * 0. Optionally add a header and/or trailer to the socket output. If 1747 * specified, write the total number of bytes sent into *sbytes. 1748 */ 1749 int 1750 sendfile(struct thread *td, struct sendfile_args *uap) 1751 { 1752 1753 return (do_sendfile(td, uap, 0)); 1754 } 1755 1756 static int 1757 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1758 { 1759 struct sf_hdtr hdtr; 1760 struct uio *hdr_uio, *trl_uio; 1761 int error; 1762 1763 hdr_uio = trl_uio = NULL; 1764 1765 if (uap->hdtr != NULL) { 1766 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1767 if (error) 1768 goto out; 1769 if (hdtr.headers != NULL) { 1770 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1771 if (error) 1772 goto out; 1773 } 1774 if (hdtr.trailers != NULL) { 1775 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); 1776 if (error) 1777 goto out; 1778 1779 } 1780 } 1781 1782 error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat); 1783 out: 1784 if (hdr_uio) 1785 free(hdr_uio, M_IOV); 1786 if (trl_uio) 1787 free(trl_uio, M_IOV); 1788 return (error); 1789 } 1790 1791 #ifdef COMPAT_FREEBSD4 1792 int 1793 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1794 { 1795 struct sendfile_args args; 1796 1797 args.fd = uap->fd; 1798 args.s = uap->s; 1799 args.offset = uap->offset; 1800 args.nbytes = uap->nbytes; 1801 args.hdtr = uap->hdtr; 1802 args.sbytes = uap->sbytes; 1803 args.flags = uap->flags; 1804 1805 return (do_sendfile(td, &args, 1)); 1806 } 1807 #endif /* COMPAT_FREEBSD4 */ 1808 1809 int 1810 kern_sendfile(struct thread *td, struct sendfile_args *uap, 1811 struct uio *hdr_uio, struct uio *trl_uio, int compat) 1812 { 1813 struct file *sock_fp; 1814 struct vnode *vp; 1815 struct vm_object *obj = NULL; 1816 struct socket *so = NULL; 1817 struct mbuf *m = NULL; 1818 struct sf_buf *sf; 1819 struct vm_page *pg; 1820 off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0; 1821 int error, hdrlen = 0, mnw = 0; 1822 int vfslocked; 1823 struct sendfile_sync *sfs = NULL; 1824 1825 /* 1826 * The file descriptor must be a regular file and have a 1827 * backing VM object. 1828 * File offset must be positive. If it goes beyond EOF 1829 * we send only the header/trailer and no payload data. 1830 */ 1831 AUDIT_ARG_FD(uap->fd); 1832 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1833 goto out; 1834 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1835 vn_lock(vp, LK_SHARED | LK_RETRY); 1836 if (vp->v_type == VREG) { 1837 obj = vp->v_object; 1838 if (obj != NULL) { 1839 /* 1840 * Temporarily increase the backing VM 1841 * object's reference count so that a forced 1842 * reclamation of its vnode does not 1843 * immediately destroy it. 1844 */ 1845 VM_OBJECT_LOCK(obj); 1846 if ((obj->flags & OBJ_DEAD) == 0) { 1847 vm_object_reference_locked(obj); 1848 VM_OBJECT_UNLOCK(obj); 1849 } else { 1850 VM_OBJECT_UNLOCK(obj); 1851 obj = NULL; 1852 } 1853 } 1854 } 1855 VOP_UNLOCK(vp, 0); 1856 VFS_UNLOCK_GIANT(vfslocked); 1857 if (obj == NULL) { 1858 error = EINVAL; 1859 goto out; 1860 } 1861 if (uap->offset < 0) { 1862 error = EINVAL; 1863 goto out; 1864 } 1865 1866 /* 1867 * The socket must be a stream socket and connected. 1868 * Remember if it a blocking or non-blocking socket. 1869 */ 1870 if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp, 1871 NULL)) != 0) 1872 goto out; 1873 so = sock_fp->f_data; 1874 if (so->so_type != SOCK_STREAM) { 1875 error = EINVAL; 1876 goto out; 1877 } 1878 if ((so->so_state & SS_ISCONNECTED) == 0) { 1879 error = ENOTCONN; 1880 goto out; 1881 } 1882 /* 1883 * Do not wait on memory allocations but return ENOMEM for 1884 * caller to retry later. 1885 * XXX: Experimental. 1886 */ 1887 if (uap->flags & SF_MNOWAIT) 1888 mnw = 1; 1889 1890 if (uap->flags & SF_SYNC) { 1891 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK); 1892 memset(sfs, 0, sizeof *sfs); 1893 mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); 1894 cv_init(&sfs->cv, "sendfile"); 1895 } 1896 1897 #ifdef MAC 1898 error = mac_socket_check_send(td->td_ucred, so); 1899 if (error) 1900 goto out; 1901 #endif 1902 1903 /* If headers are specified copy them into mbufs. */ 1904 if (hdr_uio != NULL) { 1905 hdr_uio->uio_td = td; 1906 hdr_uio->uio_rw = UIO_WRITE; 1907 if (hdr_uio->uio_resid > 0) { 1908 /* 1909 * In FBSD < 5.0 the nbytes to send also included 1910 * the header. If compat is specified subtract the 1911 * header size from nbytes. 1912 */ 1913 if (compat) { 1914 if (uap->nbytes > hdr_uio->uio_resid) 1915 uap->nbytes -= hdr_uio->uio_resid; 1916 else 1917 uap->nbytes = 0; 1918 } 1919 m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK), 1920 0, 0, 0); 1921 if (m == NULL) { 1922 error = mnw ? EAGAIN : ENOBUFS; 1923 goto out; 1924 } 1925 hdrlen = m_length(m, NULL); 1926 } 1927 } 1928 1929 /* 1930 * Protect against multiple writers to the socket. 1931 * 1932 * XXXRW: Historically this has assumed non-interruptibility, so now 1933 * we implement that, but possibly shouldn't. 1934 */ 1935 (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); 1936 1937 /* 1938 * Loop through the pages of the file, starting with the requested 1939 * offset. Get a file page (do I/O if necessary), map the file page 1940 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1941 * it on the socket. 1942 * This is done in two loops. The inner loop turns as many pages 1943 * as it can, up to available socket buffer space, without blocking 1944 * into mbufs to have it bulk delivered into the socket send buffer. 1945 * The outer loop checks the state and available space of the socket 1946 * and takes care of the overall progress. 1947 */ 1948 for (off = uap->offset, rem = uap->nbytes; ; ) { 1949 int loopbytes = 0; 1950 int space = 0; 1951 int done = 0; 1952 1953 /* 1954 * Check the socket state for ongoing connection, 1955 * no errors and space in socket buffer. 1956 * If space is low allow for the remainder of the 1957 * file to be processed if it fits the socket buffer. 1958 * Otherwise block in waiting for sufficient space 1959 * to proceed, or if the socket is nonblocking, return 1960 * to userland with EAGAIN while reporting how far 1961 * we've come. 1962 * We wait until the socket buffer has significant free 1963 * space to do bulk sends. This makes good use of file 1964 * system read ahead and allows packet segmentation 1965 * offloading hardware to take over lots of work. If 1966 * we were not careful here we would send off only one 1967 * sfbuf at a time. 1968 */ 1969 SOCKBUF_LOCK(&so->so_snd); 1970 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2) 1971 so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2; 1972 retry_space: 1973 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 1974 error = EPIPE; 1975 SOCKBUF_UNLOCK(&so->so_snd); 1976 goto done; 1977 } else if (so->so_error) { 1978 error = so->so_error; 1979 so->so_error = 0; 1980 SOCKBUF_UNLOCK(&so->so_snd); 1981 goto done; 1982 } 1983 space = sbspace(&so->so_snd); 1984 if (space < rem && 1985 (space <= 0 || 1986 space < so->so_snd.sb_lowat)) { 1987 if (so->so_state & SS_NBIO) { 1988 SOCKBUF_UNLOCK(&so->so_snd); 1989 error = EAGAIN; 1990 goto done; 1991 } 1992 /* 1993 * sbwait drops the lock while sleeping. 1994 * When we loop back to retry_space the 1995 * state may have changed and we retest 1996 * for it. 1997 */ 1998 error = sbwait(&so->so_snd); 1999 /* 2000 * An error from sbwait usually indicates that we've 2001 * been interrupted by a signal. If we've sent anything 2002 * then return bytes sent, otherwise return the error. 2003 */ 2004 if (error) { 2005 SOCKBUF_UNLOCK(&so->so_snd); 2006 goto done; 2007 } 2008 goto retry_space; 2009 } 2010 SOCKBUF_UNLOCK(&so->so_snd); 2011 2012 /* 2013 * Reduce space in the socket buffer by the size of 2014 * the header mbuf chain. 2015 * hdrlen is set to 0 after the first loop. 2016 */ 2017 space -= hdrlen; 2018 2019 /* 2020 * Loop and construct maximum sized mbuf chain to be bulk 2021 * dumped into socket buffer. 2022 */ 2023 while (space > loopbytes) { 2024 vm_pindex_t pindex; 2025 vm_offset_t pgoff; 2026 struct mbuf *m0; 2027 2028 VM_OBJECT_LOCK(obj); 2029 /* 2030 * Calculate the amount to transfer. 2031 * Not to exceed a page, the EOF, 2032 * or the passed in nbytes. 2033 */ 2034 pgoff = (vm_offset_t)(off & PAGE_MASK); 2035 xfsize = omin(PAGE_SIZE - pgoff, 2036 obj->un_pager.vnp.vnp_size - uap->offset - 2037 fsbytes - loopbytes); 2038 if (uap->nbytes) 2039 rem = (uap->nbytes - fsbytes - loopbytes); 2040 else 2041 rem = obj->un_pager.vnp.vnp_size - 2042 uap->offset - fsbytes - loopbytes; 2043 xfsize = omin(rem, xfsize); 2044 xfsize = omin(space - loopbytes, xfsize); 2045 if (xfsize <= 0) { 2046 VM_OBJECT_UNLOCK(obj); 2047 done = 1; /* all data sent */ 2048 break; 2049 } 2050 2051 /* 2052 * Attempt to look up the page. Allocate 2053 * if not found or wait and loop if busy. 2054 */ 2055 pindex = OFF_TO_IDX(off); 2056 pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY | 2057 VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY); 2058 2059 /* 2060 * Check if page is valid for what we need, 2061 * otherwise initiate I/O. 2062 * If we already turned some pages into mbufs, 2063 * send them off before we come here again and 2064 * block. 2065 */ 2066 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) 2067 VM_OBJECT_UNLOCK(obj); 2068 else if (m != NULL) 2069 error = EAGAIN; /* send what we already got */ 2070 else if (uap->flags & SF_NODISKIO) 2071 error = EBUSY; 2072 else { 2073 int bsize, resid; 2074 2075 /* 2076 * Ensure that our page is still around 2077 * when the I/O completes. 2078 */ 2079 vm_page_io_start(pg); 2080 VM_OBJECT_UNLOCK(obj); 2081 2082 /* 2083 * Get the page from backing store. 2084 */ 2085 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2086 error = vn_lock(vp, LK_SHARED); 2087 if (error != 0) 2088 goto after_read; 2089 bsize = vp->v_mount->mnt_stat.f_iosize; 2090 2091 /* 2092 * XXXMAC: Because we don't have fp->f_cred 2093 * here, we pass in NOCRED. This is probably 2094 * wrong, but is consistent with our original 2095 * implementation. 2096 */ 2097 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 2098 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 2099 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 2100 td->td_ucred, NOCRED, &resid, td); 2101 VOP_UNLOCK(vp, 0); 2102 after_read: 2103 VFS_UNLOCK_GIANT(vfslocked); 2104 VM_OBJECT_LOCK(obj); 2105 vm_page_io_finish(pg); 2106 if (!error) 2107 VM_OBJECT_UNLOCK(obj); 2108 mbstat.sf_iocnt++; 2109 } 2110 if (error) { 2111 vm_page_lock_queues(); 2112 vm_page_unwire(pg, 0); 2113 /* 2114 * See if anyone else might know about 2115 * this page. If not and it is not valid, 2116 * then free it. 2117 */ 2118 if (pg->wire_count == 0 && pg->valid == 0 && 2119 pg->busy == 0 && !(pg->oflags & VPO_BUSY) && 2120 pg->hold_count == 0) { 2121 vm_page_free(pg); 2122 } 2123 vm_page_unlock_queues(); 2124 VM_OBJECT_UNLOCK(obj); 2125 if (error == EAGAIN) 2126 error = 0; /* not a real error */ 2127 break; 2128 } 2129 2130 /* 2131 * Get a sendfile buf. We usually wait as long 2132 * as necessary, but this wait can be interrupted. 2133 */ 2134 if ((sf = sf_buf_alloc(pg, 2135 (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) { 2136 mbstat.sf_allocfail++; 2137 vm_page_lock_queues(); 2138 vm_page_unwire(pg, 0); 2139 /* 2140 * XXX: Not same check as above!? 2141 */ 2142 if (pg->wire_count == 0 && pg->object == NULL) 2143 vm_page_free(pg); 2144 vm_page_unlock_queues(); 2145 error = (mnw ? EAGAIN : EINTR); 2146 break; 2147 } 2148 2149 /* 2150 * Get an mbuf and set it up as having 2151 * external storage. 2152 */ 2153 m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); 2154 if (m0 == NULL) { 2155 error = (mnw ? EAGAIN : ENOBUFS); 2156 sf_buf_mext((void *)sf_buf_kva(sf), sf); 2157 break; 2158 } 2159 MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, 2160 sfs, sf, M_RDONLY, EXT_SFBUF); 2161 m0->m_data = (char *)sf_buf_kva(sf) + pgoff; 2162 m0->m_len = xfsize; 2163 2164 /* Append to mbuf chain. */ 2165 if (m != NULL) 2166 m_cat(m, m0); 2167 else 2168 m = m0; 2169 2170 /* Keep track of bits processed. */ 2171 loopbytes += xfsize; 2172 off += xfsize; 2173 2174 if (sfs != NULL) { 2175 mtx_lock(&sfs->mtx); 2176 sfs->count++; 2177 mtx_unlock(&sfs->mtx); 2178 } 2179 } 2180 2181 /* Add the buffer chain to the socket buffer. */ 2182 if (m != NULL) { 2183 int mlen, err; 2184 2185 mlen = m_length(m, NULL); 2186 SOCKBUF_LOCK(&so->so_snd); 2187 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2188 error = EPIPE; 2189 SOCKBUF_UNLOCK(&so->so_snd); 2190 goto done; 2191 } 2192 SOCKBUF_UNLOCK(&so->so_snd); 2193 CURVNET_SET(so->so_vnet); 2194 /* Avoid error aliasing. */ 2195 err = (*so->so_proto->pr_usrreqs->pru_send) 2196 (so, 0, m, NULL, NULL, td); 2197 CURVNET_RESTORE(); 2198 if (err == 0) { 2199 /* 2200 * We need two counters to get the 2201 * file offset and nbytes to send 2202 * right: 2203 * - sbytes contains the total amount 2204 * of bytes sent, including headers. 2205 * - fsbytes contains the total amount 2206 * of bytes sent from the file. 2207 */ 2208 sbytes += mlen; 2209 fsbytes += mlen; 2210 if (hdrlen) { 2211 fsbytes -= hdrlen; 2212 hdrlen = 0; 2213 } 2214 } else if (error == 0) 2215 error = err; 2216 m = NULL; /* pru_send always consumes */ 2217 } 2218 2219 /* Quit outer loop on error or when we're done. */ 2220 if (done) 2221 break; 2222 if (error) 2223 goto done; 2224 } 2225 2226 /* 2227 * Send trailers. Wimp out and use writev(2). 2228 */ 2229 if (trl_uio != NULL) { 2230 sbunlock(&so->so_snd); 2231 error = kern_writev(td, uap->s, trl_uio); 2232 if (error == 0) 2233 sbytes += td->td_retval[0]; 2234 goto out; 2235 } 2236 2237 done: 2238 sbunlock(&so->so_snd); 2239 out: 2240 /* 2241 * If there was no error we have to clear td->td_retval[0] 2242 * because it may have been set by writev. 2243 */ 2244 if (error == 0) { 2245 td->td_retval[0] = 0; 2246 } 2247 if (uap->sbytes != NULL) { 2248 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2249 } 2250 if (obj != NULL) 2251 vm_object_deallocate(obj); 2252 if (vp != NULL) { 2253 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2254 vrele(vp); 2255 VFS_UNLOCK_GIANT(vfslocked); 2256 } 2257 if (so) 2258 fdrop(sock_fp, td); 2259 if (m) 2260 m_freem(m); 2261 2262 if (sfs != NULL) { 2263 mtx_lock(&sfs->mtx); 2264 if (sfs->count != 0) 2265 cv_wait(&sfs->cv, &sfs->mtx); 2266 KASSERT(sfs->count == 0, ("sendfile sync still busy")); 2267 cv_destroy(&sfs->cv); 2268 mtx_destroy(&sfs->mtx); 2269 free(sfs, M_TEMP); 2270 } 2271 2272 if (error == ERESTART) 2273 error = EINTR; 2274 2275 return (error); 2276 } 2277 2278 /* 2279 * SCTP syscalls. 2280 * Functionality only compiled in if SCTP is defined in the kernel Makefile, 2281 * otherwise all return EOPNOTSUPP. 2282 * XXX: We should make this loadable one day. 2283 */ 2284 int 2285 sctp_peeloff(td, uap) 2286 struct thread *td; 2287 struct sctp_peeloff_args /* { 2288 int sd; 2289 caddr_t name; 2290 } */ *uap; 2291 { 2292 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2293 struct filedesc *fdp; 2294 struct file *nfp = NULL; 2295 int error; 2296 struct socket *head, *so; 2297 int fd; 2298 u_int fflag; 2299 2300 fdp = td->td_proc->p_fd; 2301 AUDIT_ARG_FD(uap->sd); 2302 error = fgetsock(td, uap->sd, &head, &fflag); 2303 if (error) 2304 goto done2; 2305 error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name); 2306 if (error) 2307 goto done2; 2308 /* 2309 * At this point we know we do have a assoc to pull 2310 * we proceed to get the fd setup. This may block 2311 * but that is ok. 2312 */ 2313 2314 error = falloc(td, &nfp, &fd); 2315 if (error) 2316 goto done; 2317 td->td_retval[0] = fd; 2318 2319 CURVNET_SET(head->so_vnet); 2320 so = sonewconn(head, SS_ISCONNECTED); 2321 if (so == NULL) 2322 goto noconnection; 2323 /* 2324 * Before changing the flags on the socket, we have to bump the 2325 * reference count. Otherwise, if the protocol calls sofree(), 2326 * the socket will be released due to a zero refcount. 2327 */ 2328 SOCK_LOCK(so); 2329 soref(so); /* file descriptor reference */ 2330 SOCK_UNLOCK(so); 2331 2332 ACCEPT_LOCK(); 2333 2334 TAILQ_REMOVE(&head->so_comp, so, so_list); 2335 head->so_qlen--; 2336 so->so_state |= (head->so_state & SS_NBIO); 2337 so->so_state &= ~SS_NOFDREF; 2338 so->so_qstate &= ~SQ_COMP; 2339 so->so_head = NULL; 2340 ACCEPT_UNLOCK(); 2341 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 2342 error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name); 2343 if (error) 2344 goto noconnection; 2345 if (head->so_sigio != NULL) 2346 fsetown(fgetown(&head->so_sigio), &so->so_sigio); 2347 2348 noconnection: 2349 /* 2350 * close the new descriptor, assuming someone hasn't ripped it 2351 * out from under us. 2352 */ 2353 if (error) 2354 fdclose(fdp, nfp, fd, td); 2355 2356 /* 2357 * Release explicitly held references before returning. 2358 */ 2359 CURVNET_RESTORE(); 2360 done: 2361 if (nfp != NULL) 2362 fdrop(nfp, td); 2363 fputsock(head); 2364 done2: 2365 return (error); 2366 #else /* SCTP */ 2367 return (EOPNOTSUPP); 2368 #endif /* SCTP */ 2369 } 2370 2371 int 2372 sctp_generic_sendmsg (td, uap) 2373 struct thread *td; 2374 struct sctp_generic_sendmsg_args /* { 2375 int sd, 2376 caddr_t msg, 2377 int mlen, 2378 caddr_t to, 2379 __socklen_t tolen, 2380 struct sctp_sndrcvinfo *sinfo, 2381 int flags 2382 } */ *uap; 2383 { 2384 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2385 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2386 struct socket *so; 2387 struct file *fp = NULL; 2388 int use_rcvinfo = 1; 2389 int error = 0, len; 2390 struct sockaddr *to = NULL; 2391 #ifdef KTRACE 2392 struct uio *ktruio = NULL; 2393 #endif 2394 struct uio auio; 2395 struct iovec iov[1]; 2396 2397 if (uap->sinfo) { 2398 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2399 if (error) 2400 return (error); 2401 u_sinfo = &sinfo; 2402 } 2403 if (uap->tolen) { 2404 error = getsockaddr(&to, uap->to, uap->tolen); 2405 if (error) { 2406 to = NULL; 2407 goto sctp_bad2; 2408 } 2409 } 2410 2411 AUDIT_ARG_FD(uap->sd); 2412 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); 2413 if (error) 2414 goto sctp_bad; 2415 #ifdef KTRACE 2416 if (KTRPOINT(td, KTR_STRUCT)) 2417 ktrsockaddr(to); 2418 #endif 2419 2420 iov[0].iov_base = uap->msg; 2421 iov[0].iov_len = uap->mlen; 2422 2423 so = (struct socket *)fp->f_data; 2424 #ifdef MAC 2425 error = mac_socket_check_send(td->td_ucred, so); 2426 if (error) 2427 goto sctp_bad; 2428 #endif /* MAC */ 2429 2430 auio.uio_iov = iov; 2431 auio.uio_iovcnt = 1; 2432 auio.uio_segflg = UIO_USERSPACE; 2433 auio.uio_rw = UIO_WRITE; 2434 auio.uio_td = td; 2435 auio.uio_offset = 0; /* XXX */ 2436 auio.uio_resid = 0; 2437 len = auio.uio_resid = uap->mlen; 2438 CURVNET_SET(so->so_vnet); 2439 error = sctp_lower_sosend(so, to, &auio, 2440 (struct mbuf *)NULL, (struct mbuf *)NULL, 2441 uap->flags, use_rcvinfo, u_sinfo, td); 2442 CURVNET_RESTORE(); 2443 if (error) { 2444 if (auio.uio_resid != len && (error == ERESTART || 2445 error == EINTR || error == EWOULDBLOCK)) 2446 error = 0; 2447 /* Generation of SIGPIPE can be controlled per socket. */ 2448 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2449 !(uap->flags & MSG_NOSIGNAL)) { 2450 PROC_LOCK(td->td_proc); 2451 psignal(td->td_proc, SIGPIPE); 2452 PROC_UNLOCK(td->td_proc); 2453 } 2454 } 2455 if (error == 0) 2456 td->td_retval[0] = len - auio.uio_resid; 2457 #ifdef KTRACE 2458 if (ktruio != NULL) { 2459 ktruio->uio_resid = td->td_retval[0]; 2460 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2461 } 2462 #endif /* KTRACE */ 2463 sctp_bad: 2464 if (fp) 2465 fdrop(fp, td); 2466 sctp_bad2: 2467 if (to) 2468 free(to, M_SONAME); 2469 return (error); 2470 #else /* SCTP */ 2471 return (EOPNOTSUPP); 2472 #endif /* SCTP */ 2473 } 2474 2475 int 2476 sctp_generic_sendmsg_iov(td, uap) 2477 struct thread *td; 2478 struct sctp_generic_sendmsg_iov_args /* { 2479 int sd, 2480 struct iovec *iov, 2481 int iovlen, 2482 caddr_t to, 2483 __socklen_t tolen, 2484 struct sctp_sndrcvinfo *sinfo, 2485 int flags 2486 } */ *uap; 2487 { 2488 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2489 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2490 struct socket *so; 2491 struct file *fp = NULL; 2492 int use_rcvinfo = 1; 2493 int error=0, len, i; 2494 struct sockaddr *to = NULL; 2495 #ifdef KTRACE 2496 struct uio *ktruio = NULL; 2497 #endif 2498 struct uio auio; 2499 struct iovec *iov, *tiov; 2500 2501 if (uap->sinfo) { 2502 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2503 if (error) 2504 return (error); 2505 u_sinfo = &sinfo; 2506 } 2507 if (uap->tolen) { 2508 error = getsockaddr(&to, uap->to, uap->tolen); 2509 if (error) { 2510 to = NULL; 2511 goto sctp_bad2; 2512 } 2513 } 2514 2515 AUDIT_ARG_FD(uap->sd); 2516 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); 2517 if (error) 2518 goto sctp_bad1; 2519 2520 #ifdef COMPAT_FREEBSD32 2521 if (SV_CURPROC_FLAG(SV_ILP32)) 2522 error = freebsd32_copyiniov((struct iovec32 *)uap->iov, 2523 uap->iovlen, &iov, EMSGSIZE); 2524 else 2525 #endif 2526 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2527 if (error) 2528 goto sctp_bad1; 2529 #ifdef KTRACE 2530 if (KTRPOINT(td, KTR_STRUCT)) 2531 ktrsockaddr(to); 2532 #endif 2533 2534 so = (struct socket *)fp->f_data; 2535 #ifdef MAC 2536 error = mac_socket_check_send(td->td_ucred, so); 2537 if (error) 2538 goto sctp_bad; 2539 #endif /* MAC */ 2540 2541 auio.uio_iov = iov; 2542 auio.uio_iovcnt = uap->iovlen; 2543 auio.uio_segflg = UIO_USERSPACE; 2544 auio.uio_rw = UIO_WRITE; 2545 auio.uio_td = td; 2546 auio.uio_offset = 0; /* XXX */ 2547 auio.uio_resid = 0; 2548 tiov = iov; 2549 for (i = 0; i <uap->iovlen; i++, tiov++) { 2550 if ((auio.uio_resid += tiov->iov_len) < 0) { 2551 error = EINVAL; 2552 goto sctp_bad; 2553 } 2554 } 2555 len = auio.uio_resid; 2556 CURVNET_SET(so->so_vnet); 2557 error = sctp_lower_sosend(so, to, &auio, 2558 (struct mbuf *)NULL, (struct mbuf *)NULL, 2559 uap->flags, use_rcvinfo, u_sinfo, td); 2560 CURVNET_RESTORE(); 2561 if (error) { 2562 if (auio.uio_resid != len && (error == ERESTART || 2563 error == EINTR || error == EWOULDBLOCK)) 2564 error = 0; 2565 /* Generation of SIGPIPE can be controlled per socket */ 2566 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2567 !(uap->flags & MSG_NOSIGNAL)) { 2568 PROC_LOCK(td->td_proc); 2569 psignal(td->td_proc, SIGPIPE); 2570 PROC_UNLOCK(td->td_proc); 2571 } 2572 } 2573 if (error == 0) 2574 td->td_retval[0] = len - auio.uio_resid; 2575 #ifdef KTRACE 2576 if (ktruio != NULL) { 2577 ktruio->uio_resid = td->td_retval[0]; 2578 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2579 } 2580 #endif /* KTRACE */ 2581 sctp_bad: 2582 free(iov, M_IOV); 2583 sctp_bad1: 2584 if (fp) 2585 fdrop(fp, td); 2586 sctp_bad2: 2587 if (to) 2588 free(to, M_SONAME); 2589 return (error); 2590 #else /* SCTP */ 2591 return (EOPNOTSUPP); 2592 #endif /* SCTP */ 2593 } 2594 2595 int 2596 sctp_generic_recvmsg(td, uap) 2597 struct thread *td; 2598 struct sctp_generic_recvmsg_args /* { 2599 int sd, 2600 struct iovec *iov, 2601 int iovlen, 2602 struct sockaddr *from, 2603 __socklen_t *fromlenaddr, 2604 struct sctp_sndrcvinfo *sinfo, 2605 int *msg_flags 2606 } */ *uap; 2607 { 2608 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2609 u_int8_t sockbufstore[256]; 2610 struct uio auio; 2611 struct iovec *iov, *tiov; 2612 struct sctp_sndrcvinfo sinfo; 2613 struct socket *so; 2614 struct file *fp = NULL; 2615 struct sockaddr *fromsa; 2616 int fromlen; 2617 int len, i, msg_flags; 2618 int error = 0; 2619 #ifdef KTRACE 2620 struct uio *ktruio = NULL; 2621 #endif 2622 2623 AUDIT_ARG_FD(uap->sd); 2624 error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); 2625 if (error) { 2626 return (error); 2627 } 2628 #ifdef COMPAT_FREEBSD32 2629 if (SV_CURPROC_FLAG(SV_ILP32)) 2630 error = freebsd32_copyiniov((struct iovec32 *)uap->iov, 2631 uap->iovlen, &iov, EMSGSIZE); 2632 else 2633 #endif 2634 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2635 if (error) 2636 goto out1; 2637 2638 so = fp->f_data; 2639 #ifdef MAC 2640 error = mac_socket_check_receive(td->td_ucred, so); 2641 if (error) { 2642 goto out; 2643 } 2644 #endif /* MAC */ 2645 2646 if (uap->fromlenaddr) { 2647 error = copyin(uap->fromlenaddr, 2648 &fromlen, sizeof (fromlen)); 2649 if (error) { 2650 goto out; 2651 } 2652 } else { 2653 fromlen = 0; 2654 } 2655 if (uap->msg_flags) { 2656 error = copyin(uap->msg_flags, &msg_flags, sizeof (int)); 2657 if (error) { 2658 goto out; 2659 } 2660 } else { 2661 msg_flags = 0; 2662 } 2663 auio.uio_iov = iov; 2664 auio.uio_iovcnt = uap->iovlen; 2665 auio.uio_segflg = UIO_USERSPACE; 2666 auio.uio_rw = UIO_READ; 2667 auio.uio_td = td; 2668 auio.uio_offset = 0; /* XXX */ 2669 auio.uio_resid = 0; 2670 tiov = iov; 2671 for (i = 0; i <uap->iovlen; i++, tiov++) { 2672 if ((auio.uio_resid += tiov->iov_len) < 0) { 2673 error = EINVAL; 2674 goto out; 2675 } 2676 } 2677 len = auio.uio_resid; 2678 fromsa = (struct sockaddr *)sockbufstore; 2679 2680 #ifdef KTRACE 2681 if (KTRPOINT(td, KTR_GENIO)) 2682 ktruio = cloneuio(&auio); 2683 #endif /* KTRACE */ 2684 CURVNET_SET(so->so_vnet); 2685 error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL, 2686 fromsa, fromlen, &msg_flags, 2687 (struct sctp_sndrcvinfo *)&sinfo, 1); 2688 CURVNET_RESTORE(); 2689 if (error) { 2690 if (auio.uio_resid != (int)len && (error == ERESTART || 2691 error == EINTR || error == EWOULDBLOCK)) 2692 error = 0; 2693 } else { 2694 if (uap->sinfo) 2695 error = copyout(&sinfo, uap->sinfo, sizeof (sinfo)); 2696 } 2697 #ifdef KTRACE 2698 if (ktruio != NULL) { 2699 ktruio->uio_resid = (int)len - auio.uio_resid; 2700 ktrgenio(uap->sd, UIO_READ, ktruio, error); 2701 } 2702 #endif /* KTRACE */ 2703 if (error) 2704 goto out; 2705 td->td_retval[0] = (int)len - auio.uio_resid; 2706 2707 if (fromlen && uap->from) { 2708 len = fromlen; 2709 if (len <= 0 || fromsa == 0) 2710 len = 0; 2711 else { 2712 len = MIN(len, fromsa->sa_len); 2713 error = copyout(fromsa, uap->from, (unsigned)len); 2714 if (error) 2715 goto out; 2716 } 2717 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t)); 2718 if (error) { 2719 goto out; 2720 } 2721 } 2722 #ifdef KTRACE 2723 if (KTRPOINT(td, KTR_STRUCT)) 2724 ktrsockaddr(fromsa); 2725 #endif 2726 if (uap->msg_flags) { 2727 error = copyout(&msg_flags, uap->msg_flags, sizeof (int)); 2728 if (error) { 2729 goto out; 2730 } 2731 } 2732 out: 2733 free(iov, M_IOV); 2734 out1: 2735 if (fp) 2736 fdrop(fp, td); 2737 2738 return (error); 2739 #else /* SCTP */ 2740 return (EOPNOTSUPP); 2741 #endif /* SCTP */ 2742 } 2743