1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 * $FreeBSD$ 38 */ 39 40 #include "opt_compat.h" 41 #include "opt_ktrace.h" 42 #include "opt_mac.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/mac.h> 49 #include <sys/mutex.h> 50 #include <sys/sysproto.h> 51 #include <sys/malloc.h> 52 #include <sys/filedesc.h> 53 #include <sys/event.h> 54 #include <sys/proc.h> 55 #include <sys/fcntl.h> 56 #include <sys/file.h> 57 #include <sys/filio.h> 58 #include <sys/mount.h> 59 #include <sys/mbuf.h> 60 #include <sys/protosw.h> 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 #include <sys/signalvar.h> 64 #include <sys/syscallsubr.h> 65 #include <sys/uio.h> 66 #include <sys/vnode.h> 67 #ifdef KTRACE 68 #include <sys/ktrace.h> 69 #endif 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 #include <vm/vm_pageout.h> 75 #include <vm/vm_kern.h> 76 #include <vm/vm_extern.h> 77 78 static void sf_buf_init(void *arg); 79 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) 80 81 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 82 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 83 84 static int accept1(struct thread *td, struct accept_args *uap, int compat); 85 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 86 static int getsockname1(struct thread *td, struct getsockname_args *uap, 87 int compat); 88 static int getpeername1(struct thread *td, struct getpeername_args *uap, 89 int compat); 90 91 /* 92 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the 93 * sf_freelist head with the sf_lock mutex. 94 */ 95 static struct { 96 SLIST_HEAD(, sf_buf) sf_head; 97 struct mtx sf_lock; 98 } sf_freelist; 99 100 static u_int sf_buf_alloc_want; 101 102 /* 103 * System call interface to the socket abstraction. 104 */ 105 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 106 #define COMPAT_OLDSOCK 107 #endif 108 109 /* 110 * MPSAFE 111 */ 112 int 113 socket(td, uap) 114 struct thread *td; 115 register struct socket_args /* { 116 int domain; 117 int type; 118 int protocol; 119 } */ *uap; 120 { 121 struct filedesc *fdp; 122 struct socket *so; 123 struct file *fp; 124 int fd, error; 125 126 mtx_lock(&Giant); 127 fdp = td->td_proc->p_fd; 128 error = falloc(td, &fp, &fd); 129 if (error) 130 goto done2; 131 fhold(fp); 132 error = socreate(uap->domain, &so, uap->type, uap->protocol, 133 td->td_ucred, td); 134 FILEDESC_LOCK(fdp); 135 if (error) { 136 if (fdp->fd_ofiles[fd] == fp) { 137 fdp->fd_ofiles[fd] = NULL; 138 FILEDESC_UNLOCK(fdp); 139 fdrop(fp, td); 140 } else 141 FILEDESC_UNLOCK(fdp); 142 } else { 143 fp->f_data = so; /* already has ref count */ 144 fp->f_flag = FREAD|FWRITE; 145 fp->f_ops = &socketops; 146 fp->f_type = DTYPE_SOCKET; 147 FILEDESC_UNLOCK(fdp); 148 td->td_retval[0] = fd; 149 } 150 fdrop(fp, td); 151 done2: 152 mtx_unlock(&Giant); 153 return (error); 154 } 155 156 /* 157 * MPSAFE 158 */ 159 /* ARGSUSED */ 160 int 161 bind(td, uap) 162 struct thread *td; 163 register struct bind_args /* { 164 int s; 165 caddr_t name; 166 int namelen; 167 } */ *uap; 168 { 169 struct sockaddr *sa; 170 int error; 171 172 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 173 return (error); 174 175 return (kern_bind(td, uap->s, sa)); 176 } 177 178 int 179 kern_bind(td, fd, sa) 180 struct thread *td; 181 int fd; 182 struct sockaddr *sa; 183 { 184 struct socket *so; 185 int error; 186 187 mtx_lock(&Giant); 188 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 189 goto done2; 190 #ifdef MAC 191 error = mac_check_socket_bind(td->td_ucred, so, sa); 192 if (error) 193 goto done1; 194 #endif 195 error = sobind(so, sa, td); 196 #ifdef MAC 197 done1: 198 #endif 199 fputsock(so); 200 done2: 201 mtx_unlock(&Giant); 202 FREE(sa, M_SONAME); 203 return (error); 204 } 205 206 /* 207 * MPSAFE 208 */ 209 /* ARGSUSED */ 210 int 211 listen(td, uap) 212 struct thread *td; 213 register struct listen_args /* { 214 int s; 215 int backlog; 216 } */ *uap; 217 { 218 struct socket *so; 219 int error; 220 221 mtx_lock(&Giant); 222 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 223 #ifdef MAC 224 error = mac_check_socket_listen(td->td_ucred, so); 225 if (error) 226 goto done; 227 #endif 228 error = solisten(so, uap->backlog, td); 229 #ifdef MAC 230 done: 231 #endif 232 fputsock(so); 233 } 234 mtx_unlock(&Giant); 235 return(error); 236 } 237 238 /* 239 * accept1() 240 * MPSAFE 241 */ 242 static int 243 accept1(td, uap, compat) 244 struct thread *td; 245 register struct accept_args /* { 246 int s; 247 caddr_t name; 248 int *anamelen; 249 } */ *uap; 250 int compat; 251 { 252 struct filedesc *fdp; 253 struct file *nfp = NULL; 254 struct sockaddr *sa; 255 int namelen, error, s; 256 struct socket *head, *so; 257 int fd; 258 u_int fflag; 259 pid_t pgid; 260 int tmp; 261 262 mtx_lock(&Giant); 263 fdp = td->td_proc->p_fd; 264 if (uap->name) { 265 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 266 if(error) 267 goto done2; 268 if (namelen < 0) { 269 error = EINVAL; 270 goto done2; 271 } 272 } 273 error = fgetsock(td, uap->s, &head, &fflag); 274 if (error) 275 goto done2; 276 s = splnet(); 277 if ((head->so_options & SO_ACCEPTCONN) == 0) { 278 splx(s); 279 error = EINVAL; 280 goto done; 281 } 282 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 283 if (head->so_state & SS_CANTRCVMORE) { 284 head->so_error = ECONNABORTED; 285 break; 286 } 287 if ((head->so_state & SS_NBIO) != 0) { 288 head->so_error = EWOULDBLOCK; 289 break; 290 } 291 error = tsleep(&head->so_timeo, PSOCK | PCATCH, 292 "accept", 0); 293 if (error) { 294 splx(s); 295 goto done; 296 } 297 } 298 if (head->so_error) { 299 error = head->so_error; 300 head->so_error = 0; 301 splx(s); 302 goto done; 303 } 304 305 /* 306 * At this point we know that there is at least one connection 307 * ready to be accepted. Remove it from the queue prior to 308 * allocating the file descriptor for it since falloc() may 309 * block allowing another process to accept the connection 310 * instead. 311 */ 312 so = TAILQ_FIRST(&head->so_comp); 313 TAILQ_REMOVE(&head->so_comp, so, so_list); 314 head->so_qlen--; 315 316 error = falloc(td, &nfp, &fd); 317 if (error) { 318 /* 319 * Probably ran out of file descriptors. Put the 320 * unaccepted connection back onto the queue and 321 * do another wakeup so some other process might 322 * have a chance at it. 323 */ 324 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 325 head->so_qlen++; 326 wakeup_one(&head->so_timeo); 327 splx(s); 328 goto done; 329 } 330 fhold(nfp); 331 td->td_retval[0] = fd; 332 333 /* connection has been removed from the listen queue */ 334 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 335 336 so->so_state &= ~SS_COMP; 337 so->so_head = NULL; 338 pgid = fgetown(&head->so_sigio); 339 if (pgid != 0) 340 fsetown(pgid, &so->so_sigio); 341 342 FILE_LOCK(nfp); 343 soref(so); /* file descriptor reference */ 344 nfp->f_data = so; /* nfp has ref count from falloc */ 345 nfp->f_flag = fflag; 346 nfp->f_ops = &socketops; 347 nfp->f_type = DTYPE_SOCKET; 348 FILE_UNLOCK(nfp); 349 /* Sync socket nonblocking/async state with file flags */ 350 tmp = fflag & FNONBLOCK; 351 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 352 tmp = fflag & FASYNC; 353 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 354 sa = 0; 355 error = soaccept(so, &sa); 356 if (error) { 357 /* 358 * return a namelen of zero for older code which might 359 * ignore the return value from accept. 360 */ 361 if (uap->name != NULL) { 362 namelen = 0; 363 (void) copyout(&namelen, 364 uap->anamelen, sizeof(*uap->anamelen)); 365 } 366 goto noconnection; 367 } 368 if (sa == NULL) { 369 namelen = 0; 370 if (uap->name) 371 goto gotnoname; 372 splx(s); 373 error = 0; 374 goto done; 375 } 376 if (uap->name) { 377 /* check sa_len before it is destroyed */ 378 if (namelen > sa->sa_len) 379 namelen = sa->sa_len; 380 #ifdef COMPAT_OLDSOCK 381 if (compat) 382 ((struct osockaddr *)sa)->sa_family = 383 sa->sa_family; 384 #endif 385 error = copyout(sa, uap->name, (u_int)namelen); 386 if (!error) 387 gotnoname: 388 error = copyout(&namelen, 389 uap->anamelen, sizeof (*uap->anamelen)); 390 } 391 noconnection: 392 if (sa) 393 FREE(sa, M_SONAME); 394 395 /* 396 * close the new descriptor, assuming someone hasn't ripped it 397 * out from under us. 398 */ 399 if (error) { 400 FILEDESC_LOCK(fdp); 401 if (fdp->fd_ofiles[fd] == nfp) { 402 fdp->fd_ofiles[fd] = NULL; 403 FILEDESC_UNLOCK(fdp); 404 fdrop(nfp, td); 405 } else { 406 FILEDESC_UNLOCK(fdp); 407 } 408 } 409 splx(s); 410 411 /* 412 * Release explicitly held references before returning. 413 */ 414 done: 415 if (nfp != NULL) 416 fdrop(nfp, td); 417 fputsock(head); 418 done2: 419 mtx_unlock(&Giant); 420 return (error); 421 } 422 423 /* 424 * MPSAFE (accept1() is MPSAFE) 425 */ 426 int 427 accept(td, uap) 428 struct thread *td; 429 struct accept_args *uap; 430 { 431 432 return (accept1(td, uap, 0)); 433 } 434 435 #ifdef COMPAT_OLDSOCK 436 /* 437 * MPSAFE (accept1() is MPSAFE) 438 */ 439 int 440 oaccept(td, uap) 441 struct thread *td; 442 struct accept_args *uap; 443 { 444 445 return (accept1(td, uap, 1)); 446 } 447 #endif /* COMPAT_OLDSOCK */ 448 449 /* 450 * MPSAFE 451 */ 452 /* ARGSUSED */ 453 int 454 connect(td, uap) 455 struct thread *td; 456 register struct connect_args /* { 457 int s; 458 caddr_t name; 459 int namelen; 460 } */ *uap; 461 { 462 struct sockaddr *sa; 463 int error; 464 465 error = getsockaddr(&sa, uap->name, uap->namelen); 466 if (error) 467 return error; 468 469 return (kern_connect(td, uap->s, sa)); 470 } 471 472 473 int 474 kern_connect(td, fd, sa) 475 struct thread *td; 476 int fd; 477 struct sockaddr *sa; 478 { 479 struct socket *so; 480 int error, s; 481 482 mtx_lock(&Giant); 483 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 484 goto done2; 485 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 486 error = EALREADY; 487 goto done1; 488 } 489 #ifdef MAC 490 error = mac_check_socket_connect(td->td_ucred, so, sa); 491 if (error) 492 goto bad; 493 #endif 494 error = soconnect(so, sa, td); 495 if (error) 496 goto bad; 497 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 498 error = EINPROGRESS; 499 goto done1; 500 } 501 s = splnet(); 502 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 503 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0); 504 if (error) 505 break; 506 } 507 if (error == 0) { 508 error = so->so_error; 509 so->so_error = 0; 510 } 511 splx(s); 512 bad: 513 so->so_state &= ~SS_ISCONNECTING; 514 if (error == ERESTART) 515 error = EINTR; 516 done1: 517 fputsock(so); 518 done2: 519 mtx_unlock(&Giant); 520 FREE(sa, M_SONAME); 521 return (error); 522 } 523 524 /* 525 * MPSAFE 526 */ 527 int 528 socketpair(td, uap) 529 struct thread *td; 530 register struct socketpair_args /* { 531 int domain; 532 int type; 533 int protocol; 534 int *rsv; 535 } */ *uap; 536 { 537 register struct filedesc *fdp = td->td_proc->p_fd; 538 struct file *fp1, *fp2; 539 struct socket *so1, *so2; 540 int fd, error, sv[2]; 541 542 mtx_lock(&Giant); 543 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 544 td->td_ucred, td); 545 if (error) 546 goto done2; 547 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 548 td->td_ucred, td); 549 if (error) 550 goto free1; 551 error = falloc(td, &fp1, &fd); 552 if (error) 553 goto free2; 554 fhold(fp1); 555 sv[0] = fd; 556 fp1->f_data = so1; /* so1 already has ref count */ 557 error = falloc(td, &fp2, &fd); 558 if (error) 559 goto free3; 560 fhold(fp2); 561 fp2->f_data = so2; /* so2 already has ref count */ 562 sv[1] = fd; 563 error = soconnect2(so1, so2); 564 if (error) 565 goto free4; 566 if (uap->type == SOCK_DGRAM) { 567 /* 568 * Datagram socket connection is asymmetric. 569 */ 570 error = soconnect2(so2, so1); 571 if (error) 572 goto free4; 573 } 574 FILE_LOCK(fp1); 575 fp1->f_flag = FREAD|FWRITE; 576 fp1->f_ops = &socketops; 577 fp1->f_type = DTYPE_SOCKET; 578 FILE_UNLOCK(fp1); 579 FILE_LOCK(fp2); 580 fp2->f_flag = FREAD|FWRITE; 581 fp2->f_ops = &socketops; 582 fp2->f_type = DTYPE_SOCKET; 583 FILE_UNLOCK(fp2); 584 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 585 fdrop(fp1, td); 586 fdrop(fp2, td); 587 goto done2; 588 free4: 589 FILEDESC_LOCK(fdp); 590 if (fdp->fd_ofiles[sv[1]] == fp2) { 591 fdp->fd_ofiles[sv[1]] = NULL; 592 FILEDESC_UNLOCK(fdp); 593 fdrop(fp2, td); 594 } else 595 FILEDESC_UNLOCK(fdp); 596 fdrop(fp2, td); 597 free3: 598 FILEDESC_LOCK(fdp); 599 if (fdp->fd_ofiles[sv[0]] == fp1) { 600 fdp->fd_ofiles[sv[0]] = NULL; 601 FILEDESC_UNLOCK(fdp); 602 fdrop(fp1, td); 603 } else 604 FILEDESC_UNLOCK(fdp); 605 fdrop(fp1, td); 606 free2: 607 (void)soclose(so2); 608 free1: 609 (void)soclose(so1); 610 done2: 611 mtx_unlock(&Giant); 612 return (error); 613 } 614 615 static int 616 sendit(td, s, mp, flags) 617 register struct thread *td; 618 int s; 619 register struct msghdr *mp; 620 int flags; 621 { 622 struct uio auio; 623 register struct iovec *iov; 624 register int i; 625 struct mbuf *control; 626 struct sockaddr *to = NULL; 627 int len, error; 628 struct socket *so; 629 #ifdef KTRACE 630 struct iovec *ktriov = NULL; 631 struct uio ktruio; 632 int iovlen; 633 #endif 634 635 if ((error = fgetsock(td, s, &so, NULL)) != 0) 636 return (error); 637 638 #ifdef MAC 639 error = mac_check_socket_send(td->td_ucred, so); 640 if (error) 641 goto bad; 642 #endif 643 644 auio.uio_iov = mp->msg_iov; 645 auio.uio_iovcnt = mp->msg_iovlen; 646 auio.uio_segflg = UIO_USERSPACE; 647 auio.uio_rw = UIO_WRITE; 648 auio.uio_td = td; 649 auio.uio_offset = 0; /* XXX */ 650 auio.uio_resid = 0; 651 iov = mp->msg_iov; 652 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 653 if ((auio.uio_resid += iov->iov_len) < 0) { 654 error = EINVAL; 655 goto bad; 656 } 657 } 658 if (mp->msg_name) { 659 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 660 if (error) 661 goto bad; 662 } 663 if (mp->msg_control) { 664 if (mp->msg_controllen < sizeof(struct cmsghdr) 665 #ifdef COMPAT_OLDSOCK 666 && mp->msg_flags != MSG_COMPAT 667 #endif 668 ) { 669 error = EINVAL; 670 goto bad; 671 } 672 error = sockargs(&control, mp->msg_control, 673 mp->msg_controllen, MT_CONTROL); 674 if (error) 675 goto bad; 676 #ifdef COMPAT_OLDSOCK 677 if (mp->msg_flags == MSG_COMPAT) { 678 register struct cmsghdr *cm; 679 680 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 681 if (control == 0) { 682 error = ENOBUFS; 683 goto bad; 684 } else { 685 cm = mtod(control, struct cmsghdr *); 686 cm->cmsg_len = control->m_len; 687 cm->cmsg_level = SOL_SOCKET; 688 cm->cmsg_type = SCM_RIGHTS; 689 } 690 } 691 #endif 692 } else { 693 control = 0; 694 } 695 #ifdef KTRACE 696 if (KTRPOINT(td, KTR_GENIO)) { 697 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 698 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 699 bcopy(auio.uio_iov, ktriov, iovlen); 700 ktruio = auio; 701 } 702 #endif 703 len = auio.uio_resid; 704 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control, 705 flags, td); 706 if (error) { 707 if (auio.uio_resid != len && (error == ERESTART || 708 error == EINTR || error == EWOULDBLOCK)) 709 error = 0; 710 /* Generation of SIGPIPE can be controlled per socket */ 711 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 712 PROC_LOCK(td->td_proc); 713 psignal(td->td_proc, SIGPIPE); 714 PROC_UNLOCK(td->td_proc); 715 } 716 } 717 if (error == 0) 718 td->td_retval[0] = len - auio.uio_resid; 719 #ifdef KTRACE 720 if (ktriov != NULL) { 721 if (error == 0) { 722 ktruio.uio_iov = ktriov; 723 ktruio.uio_resid = td->td_retval[0]; 724 ktrgenio(s, UIO_WRITE, &ktruio, error); 725 } 726 FREE(ktriov, M_TEMP); 727 } 728 #endif 729 bad: 730 fputsock(so); 731 if (to) 732 FREE(to, M_SONAME); 733 return (error); 734 } 735 736 /* 737 * MPSAFE 738 */ 739 int 740 sendto(td, uap) 741 struct thread *td; 742 register struct sendto_args /* { 743 int s; 744 caddr_t buf; 745 size_t len; 746 int flags; 747 caddr_t to; 748 int tolen; 749 } */ *uap; 750 { 751 struct msghdr msg; 752 struct iovec aiov; 753 int error; 754 755 msg.msg_name = uap->to; 756 msg.msg_namelen = uap->tolen; 757 msg.msg_iov = &aiov; 758 msg.msg_iovlen = 1; 759 msg.msg_control = 0; 760 #ifdef COMPAT_OLDSOCK 761 msg.msg_flags = 0; 762 #endif 763 aiov.iov_base = uap->buf; 764 aiov.iov_len = uap->len; 765 mtx_lock(&Giant); 766 error = sendit(td, uap->s, &msg, uap->flags); 767 mtx_unlock(&Giant); 768 return (error); 769 } 770 771 #ifdef COMPAT_OLDSOCK 772 /* 773 * MPSAFE 774 */ 775 int 776 osend(td, uap) 777 struct thread *td; 778 register struct osend_args /* { 779 int s; 780 caddr_t buf; 781 int len; 782 int flags; 783 } */ *uap; 784 { 785 struct msghdr msg; 786 struct iovec aiov; 787 int error; 788 789 msg.msg_name = 0; 790 msg.msg_namelen = 0; 791 msg.msg_iov = &aiov; 792 msg.msg_iovlen = 1; 793 aiov.iov_base = uap->buf; 794 aiov.iov_len = uap->len; 795 msg.msg_control = 0; 796 msg.msg_flags = 0; 797 mtx_lock(&Giant); 798 error = sendit(td, uap->s, &msg, uap->flags); 799 mtx_unlock(&Giant); 800 return (error); 801 } 802 803 /* 804 * MPSAFE 805 */ 806 int 807 osendmsg(td, uap) 808 struct thread *td; 809 register struct osendmsg_args /* { 810 int s; 811 caddr_t msg; 812 int flags; 813 } */ *uap; 814 { 815 struct msghdr msg; 816 struct iovec aiov[UIO_SMALLIOV], *iov; 817 int error; 818 819 mtx_lock(&Giant); 820 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 821 if (error) 822 goto done2; 823 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 824 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 825 error = EMSGSIZE; 826 goto done2; 827 } 828 MALLOC(iov, struct iovec *, 829 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 830 M_WAITOK); 831 } else { 832 iov = aiov; 833 } 834 error = copyin(msg.msg_iov, iov, 835 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 836 if (error) 837 goto done; 838 msg.msg_flags = MSG_COMPAT; 839 msg.msg_iov = iov; 840 error = sendit(td, uap->s, &msg, uap->flags); 841 done: 842 if (iov != aiov) 843 FREE(iov, M_IOV); 844 done2: 845 mtx_unlock(&Giant); 846 return (error); 847 } 848 #endif 849 850 /* 851 * MPSAFE 852 */ 853 int 854 sendmsg(td, uap) 855 struct thread *td; 856 register struct sendmsg_args /* { 857 int s; 858 caddr_t msg; 859 int flags; 860 } */ *uap; 861 { 862 struct msghdr msg; 863 struct iovec aiov[UIO_SMALLIOV], *iov; 864 int error; 865 866 mtx_lock(&Giant); 867 error = copyin(uap->msg, &msg, sizeof (msg)); 868 if (error) 869 goto done2; 870 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 871 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 872 error = EMSGSIZE; 873 goto done2; 874 } 875 MALLOC(iov, struct iovec *, 876 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 877 M_WAITOK); 878 } else { 879 iov = aiov; 880 } 881 if (msg.msg_iovlen && 882 (error = copyin(msg.msg_iov, iov, 883 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 884 goto done; 885 msg.msg_iov = iov; 886 #ifdef COMPAT_OLDSOCK 887 msg.msg_flags = 0; 888 #endif 889 error = sendit(td, uap->s, &msg, uap->flags); 890 done: 891 if (iov != aiov) 892 FREE(iov, M_IOV); 893 done2: 894 mtx_unlock(&Giant); 895 return (error); 896 } 897 898 static int 899 recvit(td, s, mp, namelenp) 900 register struct thread *td; 901 int s; 902 register struct msghdr *mp; 903 void *namelenp; 904 { 905 struct uio auio; 906 register struct iovec *iov; 907 register int i; 908 int len, error; 909 struct mbuf *m, *control = 0; 910 caddr_t ctlbuf; 911 struct socket *so; 912 struct sockaddr *fromsa = 0; 913 #ifdef KTRACE 914 struct iovec *ktriov = NULL; 915 struct uio ktruio; 916 int iovlen; 917 #endif 918 919 if ((error = fgetsock(td, s, &so, NULL)) != 0) 920 return (error); 921 922 #ifdef MAC 923 error = mac_check_socket_receive(td->td_ucred, so); 924 if (error) { 925 fputsock(so); 926 return (error); 927 } 928 #endif 929 930 auio.uio_iov = mp->msg_iov; 931 auio.uio_iovcnt = mp->msg_iovlen; 932 auio.uio_segflg = UIO_USERSPACE; 933 auio.uio_rw = UIO_READ; 934 auio.uio_td = td; 935 auio.uio_offset = 0; /* XXX */ 936 auio.uio_resid = 0; 937 iov = mp->msg_iov; 938 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 939 if ((auio.uio_resid += iov->iov_len) < 0) { 940 fputsock(so); 941 return (EINVAL); 942 } 943 } 944 #ifdef KTRACE 945 if (KTRPOINT(td, KTR_GENIO)) { 946 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 947 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 948 bcopy(auio.uio_iov, ktriov, iovlen); 949 ktruio = auio; 950 } 951 #endif 952 len = auio.uio_resid; 953 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 954 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 955 &mp->msg_flags); 956 if (error) { 957 if (auio.uio_resid != len && (error == ERESTART || 958 error == EINTR || error == EWOULDBLOCK)) 959 error = 0; 960 } 961 #ifdef KTRACE 962 if (ktriov != NULL) { 963 if (error == 0) { 964 ktruio.uio_iov = ktriov; 965 ktruio.uio_resid = len - auio.uio_resid; 966 ktrgenio(s, UIO_READ, &ktruio, error); 967 } 968 FREE(ktriov, M_TEMP); 969 } 970 #endif 971 if (error) 972 goto out; 973 td->td_retval[0] = len - auio.uio_resid; 974 if (mp->msg_name) { 975 len = mp->msg_namelen; 976 if (len <= 0 || fromsa == 0) 977 len = 0; 978 else { 979 /* save sa_len before it is destroyed by MSG_COMPAT */ 980 len = MIN(len, fromsa->sa_len); 981 #ifdef COMPAT_OLDSOCK 982 if (mp->msg_flags & MSG_COMPAT) 983 ((struct osockaddr *)fromsa)->sa_family = 984 fromsa->sa_family; 985 #endif 986 error = copyout(fromsa, mp->msg_name, (unsigned)len); 987 if (error) 988 goto out; 989 } 990 mp->msg_namelen = len; 991 if (namelenp && 992 (error = copyout(&len, namelenp, sizeof (int)))) { 993 #ifdef COMPAT_OLDSOCK 994 if (mp->msg_flags & MSG_COMPAT) 995 error = 0; /* old recvfrom didn't check */ 996 else 997 #endif 998 goto out; 999 } 1000 } 1001 if (mp->msg_control) { 1002 #ifdef COMPAT_OLDSOCK 1003 /* 1004 * We assume that old recvmsg calls won't receive access 1005 * rights and other control info, esp. as control info 1006 * is always optional and those options didn't exist in 4.3. 1007 * If we receive rights, trim the cmsghdr; anything else 1008 * is tossed. 1009 */ 1010 if (control && mp->msg_flags & MSG_COMPAT) { 1011 if (mtod(control, struct cmsghdr *)->cmsg_level != 1012 SOL_SOCKET || 1013 mtod(control, struct cmsghdr *)->cmsg_type != 1014 SCM_RIGHTS) { 1015 mp->msg_controllen = 0; 1016 goto out; 1017 } 1018 control->m_len -= sizeof (struct cmsghdr); 1019 control->m_data += sizeof (struct cmsghdr); 1020 } 1021 #endif 1022 len = mp->msg_controllen; 1023 m = control; 1024 mp->msg_controllen = 0; 1025 ctlbuf = mp->msg_control; 1026 1027 while (m && len > 0) { 1028 unsigned int tocopy; 1029 1030 if (len >= m->m_len) 1031 tocopy = m->m_len; 1032 else { 1033 mp->msg_flags |= MSG_CTRUNC; 1034 tocopy = len; 1035 } 1036 1037 if ((error = copyout(mtod(m, caddr_t), 1038 ctlbuf, tocopy)) != 0) 1039 goto out; 1040 1041 ctlbuf += tocopy; 1042 len -= tocopy; 1043 m = m->m_next; 1044 } 1045 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1046 } 1047 out: 1048 fputsock(so); 1049 if (fromsa) 1050 FREE(fromsa, M_SONAME); 1051 if (control) 1052 m_freem(control); 1053 return (error); 1054 } 1055 1056 /* 1057 * MPSAFE 1058 */ 1059 int 1060 recvfrom(td, uap) 1061 struct thread *td; 1062 register struct recvfrom_args /* { 1063 int s; 1064 caddr_t buf; 1065 size_t len; 1066 int flags; 1067 caddr_t from; 1068 int *fromlenaddr; 1069 } */ *uap; 1070 { 1071 struct msghdr msg; 1072 struct iovec aiov; 1073 int error; 1074 1075 mtx_lock(&Giant); 1076 if (uap->fromlenaddr) { 1077 error = copyin(uap->fromlenaddr, 1078 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1079 if (error) 1080 goto done2; 1081 } else { 1082 msg.msg_namelen = 0; 1083 } 1084 msg.msg_name = uap->from; 1085 msg.msg_iov = &aiov; 1086 msg.msg_iovlen = 1; 1087 aiov.iov_base = uap->buf; 1088 aiov.iov_len = uap->len; 1089 msg.msg_control = 0; 1090 msg.msg_flags = uap->flags; 1091 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1092 done2: 1093 mtx_unlock(&Giant); 1094 return(error); 1095 } 1096 1097 #ifdef COMPAT_OLDSOCK 1098 /* 1099 * MPSAFE 1100 */ 1101 int 1102 orecvfrom(td, uap) 1103 struct thread *td; 1104 struct recvfrom_args *uap; 1105 { 1106 1107 uap->flags |= MSG_COMPAT; 1108 return (recvfrom(td, uap)); 1109 } 1110 #endif 1111 1112 1113 #ifdef COMPAT_OLDSOCK 1114 /* 1115 * MPSAFE 1116 */ 1117 int 1118 orecv(td, uap) 1119 struct thread *td; 1120 register struct orecv_args /* { 1121 int s; 1122 caddr_t buf; 1123 int len; 1124 int flags; 1125 } */ *uap; 1126 { 1127 struct msghdr msg; 1128 struct iovec aiov; 1129 int error; 1130 1131 mtx_lock(&Giant); 1132 msg.msg_name = 0; 1133 msg.msg_namelen = 0; 1134 msg.msg_iov = &aiov; 1135 msg.msg_iovlen = 1; 1136 aiov.iov_base = uap->buf; 1137 aiov.iov_len = uap->len; 1138 msg.msg_control = 0; 1139 msg.msg_flags = uap->flags; 1140 error = recvit(td, uap->s, &msg, NULL); 1141 mtx_unlock(&Giant); 1142 return (error); 1143 } 1144 1145 /* 1146 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1147 * overlays the new one, missing only the flags, and with the (old) access 1148 * rights where the control fields are now. 1149 * 1150 * MPSAFE 1151 */ 1152 int 1153 orecvmsg(td, uap) 1154 struct thread *td; 1155 register struct orecvmsg_args /* { 1156 int s; 1157 struct omsghdr *msg; 1158 int flags; 1159 } */ *uap; 1160 { 1161 struct msghdr msg; 1162 struct iovec aiov[UIO_SMALLIOV], *iov; 1163 int error; 1164 1165 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1166 if (error) 1167 return (error); 1168 1169 mtx_lock(&Giant); 1170 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1171 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1172 error = EMSGSIZE; 1173 goto done2; 1174 } 1175 MALLOC(iov, struct iovec *, 1176 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1177 M_WAITOK); 1178 } else { 1179 iov = aiov; 1180 } 1181 msg.msg_flags = uap->flags | MSG_COMPAT; 1182 error = copyin(msg.msg_iov, iov, 1183 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1184 if (error) 1185 goto done; 1186 msg.msg_iov = iov; 1187 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1188 1189 if (msg.msg_controllen && error == 0) 1190 error = copyout(&msg.msg_controllen, 1191 &uap->msg->msg_accrightslen, sizeof (int)); 1192 done: 1193 if (iov != aiov) 1194 FREE(iov, M_IOV); 1195 done2: 1196 mtx_unlock(&Giant); 1197 return (error); 1198 } 1199 #endif 1200 1201 /* 1202 * MPSAFE 1203 */ 1204 int 1205 recvmsg(td, uap) 1206 struct thread *td; 1207 register struct recvmsg_args /* { 1208 int s; 1209 struct msghdr *msg; 1210 int flags; 1211 } */ *uap; 1212 { 1213 struct msghdr msg; 1214 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1215 register int error; 1216 1217 mtx_lock(&Giant); 1218 error = copyin(uap->msg, &msg, sizeof (msg)); 1219 if (error) 1220 goto done2; 1221 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1222 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1223 error = EMSGSIZE; 1224 goto done2; 1225 } 1226 MALLOC(iov, struct iovec *, 1227 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1228 M_WAITOK); 1229 } else { 1230 iov = aiov; 1231 } 1232 #ifdef COMPAT_OLDSOCK 1233 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1234 #else 1235 msg.msg_flags = uap->flags; 1236 #endif 1237 uiov = msg.msg_iov; 1238 msg.msg_iov = iov; 1239 error = copyin(uiov, iov, 1240 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1241 if (error) 1242 goto done; 1243 error = recvit(td, uap->s, &msg, NULL); 1244 if (!error) { 1245 msg.msg_iov = uiov; 1246 error = copyout(&msg, uap->msg, sizeof(msg)); 1247 } 1248 done: 1249 if (iov != aiov) 1250 FREE(iov, M_IOV); 1251 done2: 1252 mtx_unlock(&Giant); 1253 return (error); 1254 } 1255 1256 /* 1257 * MPSAFE 1258 */ 1259 /* ARGSUSED */ 1260 int 1261 shutdown(td, uap) 1262 struct thread *td; 1263 register struct shutdown_args /* { 1264 int s; 1265 int how; 1266 } */ *uap; 1267 { 1268 struct socket *so; 1269 int error; 1270 1271 mtx_lock(&Giant); 1272 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1273 error = soshutdown(so, uap->how); 1274 fputsock(so); 1275 } 1276 mtx_unlock(&Giant); 1277 return(error); 1278 } 1279 1280 /* 1281 * MPSAFE 1282 */ 1283 /* ARGSUSED */ 1284 int 1285 setsockopt(td, uap) 1286 struct thread *td; 1287 register struct setsockopt_args /* { 1288 int s; 1289 int level; 1290 int name; 1291 caddr_t val; 1292 int valsize; 1293 } */ *uap; 1294 { 1295 struct socket *so; 1296 struct sockopt sopt; 1297 int error; 1298 1299 if (uap->val == 0 && uap->valsize != 0) 1300 return (EFAULT); 1301 if (uap->valsize < 0) 1302 return (EINVAL); 1303 1304 mtx_lock(&Giant); 1305 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1306 sopt.sopt_dir = SOPT_SET; 1307 sopt.sopt_level = uap->level; 1308 sopt.sopt_name = uap->name; 1309 sopt.sopt_val = uap->val; 1310 sopt.sopt_valsize = uap->valsize; 1311 sopt.sopt_td = td; 1312 error = sosetopt(so, &sopt); 1313 fputsock(so); 1314 } 1315 mtx_unlock(&Giant); 1316 return(error); 1317 } 1318 1319 /* 1320 * MPSAFE 1321 */ 1322 /* ARGSUSED */ 1323 int 1324 getsockopt(td, uap) 1325 struct thread *td; 1326 register struct getsockopt_args /* { 1327 int s; 1328 int level; 1329 int name; 1330 caddr_t val; 1331 int *avalsize; 1332 } */ *uap; 1333 { 1334 int valsize, error; 1335 struct socket *so; 1336 struct sockopt sopt; 1337 1338 mtx_lock(&Giant); 1339 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1340 goto done2; 1341 if (uap->val) { 1342 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1343 if (error) 1344 goto done1; 1345 if (valsize < 0) { 1346 error = EINVAL; 1347 goto done1; 1348 } 1349 } else { 1350 valsize = 0; 1351 } 1352 1353 sopt.sopt_dir = SOPT_GET; 1354 sopt.sopt_level = uap->level; 1355 sopt.sopt_name = uap->name; 1356 sopt.sopt_val = uap->val; 1357 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1358 sopt.sopt_td = td; 1359 1360 error = sogetopt(so, &sopt); 1361 if (error == 0) { 1362 valsize = sopt.sopt_valsize; 1363 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1364 } 1365 done1: 1366 fputsock(so); 1367 done2: 1368 mtx_unlock(&Giant); 1369 return (error); 1370 } 1371 1372 /* 1373 * getsockname1() - Get socket name. 1374 * 1375 * MPSAFE 1376 */ 1377 /* ARGSUSED */ 1378 static int 1379 getsockname1(td, uap, compat) 1380 struct thread *td; 1381 register struct getsockname_args /* { 1382 int fdes; 1383 caddr_t asa; 1384 int *alen; 1385 } */ *uap; 1386 int compat; 1387 { 1388 struct socket *so; 1389 struct sockaddr *sa; 1390 int len, error; 1391 1392 mtx_lock(&Giant); 1393 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1394 goto done2; 1395 error = copyin(uap->alen, &len, sizeof (len)); 1396 if (error) 1397 goto done1; 1398 if (len < 0) { 1399 error = EINVAL; 1400 goto done1; 1401 } 1402 sa = 0; 1403 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1404 if (error) 1405 goto bad; 1406 if (sa == 0) { 1407 len = 0; 1408 goto gotnothing; 1409 } 1410 1411 len = MIN(len, sa->sa_len); 1412 #ifdef COMPAT_OLDSOCK 1413 if (compat) 1414 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1415 #endif 1416 error = copyout(sa, uap->asa, (u_int)len); 1417 if (error == 0) 1418 gotnothing: 1419 error = copyout(&len, uap->alen, sizeof (len)); 1420 bad: 1421 if (sa) 1422 FREE(sa, M_SONAME); 1423 done1: 1424 fputsock(so); 1425 done2: 1426 mtx_unlock(&Giant); 1427 return (error); 1428 } 1429 1430 /* 1431 * MPSAFE 1432 */ 1433 int 1434 getsockname(td, uap) 1435 struct thread *td; 1436 struct getsockname_args *uap; 1437 { 1438 1439 return (getsockname1(td, uap, 0)); 1440 } 1441 1442 #ifdef COMPAT_OLDSOCK 1443 /* 1444 * MPSAFE 1445 */ 1446 int 1447 ogetsockname(td, uap) 1448 struct thread *td; 1449 struct getsockname_args *uap; 1450 { 1451 1452 return (getsockname1(td, uap, 1)); 1453 } 1454 #endif /* COMPAT_OLDSOCK */ 1455 1456 /* 1457 * getpeername1() - Get name of peer for connected socket. 1458 * 1459 * MPSAFE 1460 */ 1461 /* ARGSUSED */ 1462 static int 1463 getpeername1(td, uap, compat) 1464 struct thread *td; 1465 register struct getpeername_args /* { 1466 int fdes; 1467 caddr_t asa; 1468 int *alen; 1469 } */ *uap; 1470 int compat; 1471 { 1472 struct socket *so; 1473 struct sockaddr *sa; 1474 int len, error; 1475 1476 mtx_lock(&Giant); 1477 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1478 goto done2; 1479 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1480 error = ENOTCONN; 1481 goto done1; 1482 } 1483 error = copyin(uap->alen, &len, sizeof (len)); 1484 if (error) 1485 goto done1; 1486 if (len < 0) { 1487 error = EINVAL; 1488 goto done1; 1489 } 1490 sa = 0; 1491 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1492 if (error) 1493 goto bad; 1494 if (sa == 0) { 1495 len = 0; 1496 goto gotnothing; 1497 } 1498 len = MIN(len, sa->sa_len); 1499 #ifdef COMPAT_OLDSOCK 1500 if (compat) 1501 ((struct osockaddr *)sa)->sa_family = 1502 sa->sa_family; 1503 #endif 1504 error = copyout(sa, uap->asa, (u_int)len); 1505 if (error) 1506 goto bad; 1507 gotnothing: 1508 error = copyout(&len, uap->alen, sizeof (len)); 1509 bad: 1510 if (sa) 1511 FREE(sa, M_SONAME); 1512 done1: 1513 fputsock(so); 1514 done2: 1515 mtx_unlock(&Giant); 1516 return (error); 1517 } 1518 1519 /* 1520 * MPSAFE 1521 */ 1522 int 1523 getpeername(td, uap) 1524 struct thread *td; 1525 struct getpeername_args *uap; 1526 { 1527 1528 return (getpeername1(td, uap, 0)); 1529 } 1530 1531 #ifdef COMPAT_OLDSOCK 1532 /* 1533 * MPSAFE 1534 */ 1535 int 1536 ogetpeername(td, uap) 1537 struct thread *td; 1538 struct ogetpeername_args *uap; 1539 { 1540 1541 /* XXX uap should have type `getpeername_args *' to begin with. */ 1542 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1543 } 1544 #endif /* COMPAT_OLDSOCK */ 1545 1546 int 1547 sockargs(mp, buf, buflen, type) 1548 struct mbuf **mp; 1549 caddr_t buf; 1550 int buflen, type; 1551 { 1552 register struct sockaddr *sa; 1553 register struct mbuf *m; 1554 int error; 1555 1556 if ((u_int)buflen > MLEN) { 1557 #ifdef COMPAT_OLDSOCK 1558 if (type == MT_SONAME && (u_int)buflen <= 112) 1559 buflen = MLEN; /* unix domain compat. hack */ 1560 else 1561 #endif 1562 return (EINVAL); 1563 } 1564 m = m_get(M_TRYWAIT, type); 1565 if (m == NULL) 1566 return (ENOBUFS); 1567 m->m_len = buflen; 1568 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1569 if (error) 1570 (void) m_free(m); 1571 else { 1572 *mp = m; 1573 if (type == MT_SONAME) { 1574 sa = mtod(m, struct sockaddr *); 1575 1576 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1577 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1578 sa->sa_family = sa->sa_len; 1579 #endif 1580 sa->sa_len = buflen; 1581 } 1582 } 1583 return (error); 1584 } 1585 1586 int 1587 getsockaddr(namp, uaddr, len) 1588 struct sockaddr **namp; 1589 caddr_t uaddr; 1590 size_t len; 1591 { 1592 struct sockaddr *sa; 1593 int error; 1594 1595 if (len > SOCK_MAXADDRLEN) 1596 return ENAMETOOLONG; 1597 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1598 error = copyin(uaddr, sa, len); 1599 if (error) { 1600 FREE(sa, M_SONAME); 1601 } else { 1602 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1603 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1604 sa->sa_family = sa->sa_len; 1605 #endif 1606 sa->sa_len = len; 1607 *namp = sa; 1608 } 1609 return error; 1610 } 1611 1612 /* 1613 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 1614 */ 1615 static void 1616 sf_buf_init(void *arg) 1617 { 1618 struct sf_buf *sf_bufs; 1619 vm_offset_t sf_base; 1620 int i; 1621 1622 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); 1623 mtx_lock(&sf_freelist.sf_lock); 1624 SLIST_INIT(&sf_freelist.sf_head); 1625 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE); 1626 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, 1627 M_NOWAIT | M_ZERO); 1628 for (i = 0; i < nsfbufs; i++) { 1629 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 1630 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); 1631 } 1632 sf_buf_alloc_want = 0; 1633 mtx_unlock(&sf_freelist.sf_lock); 1634 } 1635 1636 /* 1637 * Get an sf_buf from the freelist. Will block if none are available. 1638 */ 1639 struct sf_buf * 1640 sf_buf_alloc(struct vm_page *m) 1641 { 1642 struct sf_buf *sf; 1643 int error; 1644 1645 mtx_lock(&sf_freelist.sf_lock); 1646 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { 1647 sf_buf_alloc_want++; 1648 error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, 1649 "sfbufa", 0); 1650 sf_buf_alloc_want--; 1651 1652 /* 1653 * If we got a signal, don't risk going back to sleep. 1654 */ 1655 if (error) 1656 break; 1657 } 1658 if (sf != NULL) { 1659 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); 1660 sf->m = m; 1661 pmap_qenter(sf->kva, &sf->m, 1); 1662 } 1663 mtx_unlock(&sf_freelist.sf_lock); 1664 return (sf); 1665 } 1666 1667 /* 1668 * Detatch mapped page and release resources back to the system. 1669 */ 1670 void 1671 sf_buf_free(void *addr, void *args) 1672 { 1673 struct sf_buf *sf; 1674 struct vm_page *m; 1675 1676 sf = args; 1677 pmap_qremove((vm_offset_t)addr, 1); 1678 m = sf->m; 1679 vm_page_lock_queues(); 1680 vm_page_unwire(m, 0); 1681 /* 1682 * Check for the object going away on us. This can 1683 * happen since we don't hold a reference to it. 1684 * If so, we're responsible for freeing the page. 1685 */ 1686 if (m->wire_count == 0 && m->object == NULL) 1687 vm_page_free(m); 1688 vm_page_unlock_queues(); 1689 sf->m = NULL; 1690 mtx_lock(&sf_freelist.sf_lock); 1691 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); 1692 if (sf_buf_alloc_want > 0) 1693 wakeup_one(&sf_freelist); 1694 mtx_unlock(&sf_freelist.sf_lock); 1695 } 1696 1697 /* 1698 * sendfile(2) 1699 * 1700 * MPSAFE 1701 * 1702 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1703 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1704 * 1705 * Send a file specified by 'fd' and starting at 'offset' to a socket 1706 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1707 * nbytes == 0. Optionally add a header and/or trailer to the socket 1708 * output. If specified, write the total number of bytes sent into *sbytes. 1709 * 1710 */ 1711 int 1712 sendfile(struct thread *td, struct sendfile_args *uap) 1713 { 1714 1715 return (do_sendfile(td, uap, 0)); 1716 } 1717 1718 #ifdef COMPAT_FREEBSD4 1719 int 1720 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1721 { 1722 struct sendfile_args args; 1723 1724 args.fd = uap->fd; 1725 args.s = uap->s; 1726 args.offset = uap->offset; 1727 args.nbytes = uap->nbytes; 1728 args.hdtr = uap->hdtr; 1729 args.sbytes = uap->sbytes; 1730 args.flags = uap->flags; 1731 1732 return (do_sendfile(td, &args, 1)); 1733 } 1734 #endif /* COMPAT_FREEBSD4 */ 1735 1736 static int 1737 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1738 { 1739 struct vnode *vp; 1740 struct vm_object *obj; 1741 struct socket *so = NULL; 1742 struct mbuf *m; 1743 struct sf_buf *sf; 1744 struct vm_page *pg; 1745 struct writev_args nuap; 1746 struct sf_hdtr hdtr; 1747 off_t off, xfsize, hdtr_size, sbytes = 0; 1748 int error, s; 1749 1750 mtx_lock(&Giant); 1751 1752 hdtr_size = 0; 1753 1754 /* 1755 * The descriptor must be a regular file and have a backing VM object. 1756 */ 1757 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1758 goto done; 1759 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1760 error = EINVAL; 1761 goto done; 1762 } 1763 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1764 goto done; 1765 if (so->so_type != SOCK_STREAM) { 1766 error = EINVAL; 1767 goto done; 1768 } 1769 if ((so->so_state & SS_ISCONNECTED) == 0) { 1770 error = ENOTCONN; 1771 goto done; 1772 } 1773 if (uap->offset < 0) { 1774 error = EINVAL; 1775 goto done; 1776 } 1777 1778 #ifdef MAC 1779 error = mac_check_socket_send(td->td_ucred, so); 1780 if (error) 1781 goto done; 1782 #endif 1783 1784 /* 1785 * If specified, get the pointer to the sf_hdtr struct for 1786 * any headers/trailers. 1787 */ 1788 if (uap->hdtr != NULL) { 1789 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1790 if (error) 1791 goto done; 1792 /* 1793 * Send any headers. Wimp out and use writev(2). 1794 */ 1795 if (hdtr.headers != NULL) { 1796 nuap.fd = uap->s; 1797 nuap.iovp = hdtr.headers; 1798 nuap.iovcnt = hdtr.hdr_cnt; 1799 error = writev(td, &nuap); 1800 if (error) 1801 goto done; 1802 if (compat) 1803 sbytes += td->td_retval[0]; 1804 else 1805 hdtr_size += td->td_retval[0]; 1806 } 1807 } 1808 1809 /* 1810 * Protect against multiple writers to the socket. 1811 */ 1812 (void) sblock(&so->so_snd, M_WAITOK); 1813 1814 /* 1815 * Loop through the pages in the file, starting with the requested 1816 * offset. Get a file page (do I/O if necessary), map the file page 1817 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1818 * it on the socket. 1819 */ 1820 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1821 vm_pindex_t pindex; 1822 vm_offset_t pgoff; 1823 1824 pindex = OFF_TO_IDX(off); 1825 retry_lookup: 1826 /* 1827 * Calculate the amount to transfer. Not to exceed a page, 1828 * the EOF, or the passed in nbytes. 1829 */ 1830 xfsize = obj->un_pager.vnp.vnp_size - off; 1831 if (xfsize > PAGE_SIZE) 1832 xfsize = PAGE_SIZE; 1833 pgoff = (vm_offset_t)(off & PAGE_MASK); 1834 if (PAGE_SIZE - pgoff < xfsize) 1835 xfsize = PAGE_SIZE - pgoff; 1836 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1837 xfsize = uap->nbytes - sbytes; 1838 if (xfsize <= 0) 1839 break; 1840 /* 1841 * Optimize the non-blocking case by looking at the socket space 1842 * before going to the extra work of constituting the sf_buf. 1843 */ 1844 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1845 if (so->so_state & SS_CANTSENDMORE) 1846 error = EPIPE; 1847 else 1848 error = EAGAIN; 1849 sbunlock(&so->so_snd); 1850 goto done; 1851 } 1852 /* 1853 * Attempt to look up the page. 1854 * 1855 * Allocate if not found 1856 * 1857 * Wait and loop if busy. 1858 */ 1859 pg = vm_page_lookup(obj, pindex); 1860 1861 if (pg == NULL) { 1862 pg = vm_page_alloc(obj, pindex, 1863 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1864 if (pg == NULL) { 1865 VM_WAIT; 1866 goto retry_lookup; 1867 } 1868 vm_page_lock_queues(); 1869 vm_page_wakeup(pg); 1870 } else { 1871 vm_page_lock_queues(); 1872 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1873 goto retry_lookup; 1874 /* 1875 * Wire the page so it does not get ripped out from 1876 * under us. 1877 */ 1878 vm_page_wire(pg); 1879 } 1880 1881 /* 1882 * If page is not valid for what we need, initiate I/O 1883 */ 1884 1885 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1886 int bsize, resid; 1887 1888 /* 1889 * Ensure that our page is still around when the I/O 1890 * completes. 1891 */ 1892 vm_page_io_start(pg); 1893 vm_page_unlock_queues(); 1894 1895 /* 1896 * Get the page from backing store. 1897 */ 1898 bsize = vp->v_mount->mnt_stat.f_iosize; 1899 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1900 /* 1901 * XXXMAC: Because we don't have fp->f_cred here, 1902 * we pass in NOCRED. This is probably wrong, but 1903 * is consistent with our original implementation. 1904 */ 1905 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1906 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1907 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1908 td->td_ucred, NOCRED, &resid, td); 1909 VOP_UNLOCK(vp, 0, td); 1910 vm_page_lock_queues(); 1911 vm_page_flag_clear(pg, PG_ZERO); 1912 vm_page_io_finish(pg); 1913 if (error) { 1914 vm_page_unwire(pg, 0); 1915 /* 1916 * See if anyone else might know about this page. 1917 * If not and it is not valid, then free it. 1918 */ 1919 if (pg->wire_count == 0 && pg->valid == 0 && 1920 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1921 pg->hold_count == 0) { 1922 vm_page_busy(pg); 1923 vm_page_free(pg); 1924 } 1925 vm_page_unlock_queues(); 1926 sbunlock(&so->so_snd); 1927 goto done; 1928 } 1929 } 1930 vm_page_unlock_queues(); 1931 1932 /* 1933 * Get a sendfile buf. We usually wait as long as necessary, 1934 * but this wait can be interrupted. 1935 */ 1936 if ((sf = sf_buf_alloc(pg)) == NULL) { 1937 vm_page_lock_queues(); 1938 vm_page_unwire(pg, 0); 1939 if (pg->wire_count == 0 && pg->object == NULL) 1940 vm_page_free(pg); 1941 vm_page_unlock_queues(); 1942 sbunlock(&so->so_snd); 1943 error = EINTR; 1944 goto done; 1945 } 1946 1947 /* 1948 * Get an mbuf header and set it up as having external storage. 1949 */ 1950 MGETHDR(m, M_TRYWAIT, MT_DATA); 1951 if (m == NULL) { 1952 error = ENOBUFS; 1953 sf_buf_free((void *)sf->kva, sf); 1954 sbunlock(&so->so_snd); 1955 goto done; 1956 } 1957 /* 1958 * Setup external storage for mbuf. 1959 */ 1960 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, sf, M_RDONLY, 1961 EXT_SFBUF); 1962 m->m_data = (char *) sf->kva + pgoff; 1963 m->m_pkthdr.len = m->m_len = xfsize; 1964 /* 1965 * Add the buffer to the socket buffer chain. 1966 */ 1967 s = splnet(); 1968 retry_space: 1969 /* 1970 * Make sure that the socket is still able to take more data. 1971 * CANTSENDMORE being true usually means that the connection 1972 * was closed. so_error is true when an error was sensed after 1973 * a previous send. 1974 * The state is checked after the page mapping and buffer 1975 * allocation above since those operations may block and make 1976 * any socket checks stale. From this point forward, nothing 1977 * blocks before the pru_send (or more accurately, any blocking 1978 * results in a loop back to here to re-check). 1979 */ 1980 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1981 if (so->so_state & SS_CANTSENDMORE) { 1982 error = EPIPE; 1983 } else { 1984 error = so->so_error; 1985 so->so_error = 0; 1986 } 1987 m_freem(m); 1988 sbunlock(&so->so_snd); 1989 splx(s); 1990 goto done; 1991 } 1992 /* 1993 * Wait for socket space to become available. We do this just 1994 * after checking the connection state above in order to avoid 1995 * a race condition with sbwait(). 1996 */ 1997 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1998 if (so->so_state & SS_NBIO) { 1999 m_freem(m); 2000 sbunlock(&so->so_snd); 2001 splx(s); 2002 error = EAGAIN; 2003 goto done; 2004 } 2005 error = sbwait(&so->so_snd); 2006 /* 2007 * An error from sbwait usually indicates that we've 2008 * been interrupted by a signal. If we've sent anything 2009 * then return bytes sent, otherwise return the error. 2010 */ 2011 if (error) { 2012 m_freem(m); 2013 sbunlock(&so->so_snd); 2014 splx(s); 2015 goto done; 2016 } 2017 goto retry_space; 2018 } 2019 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2020 splx(s); 2021 if (error) { 2022 sbunlock(&so->so_snd); 2023 goto done; 2024 } 2025 } 2026 sbunlock(&so->so_snd); 2027 2028 /* 2029 * Send trailers. Wimp out and use writev(2). 2030 */ 2031 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 2032 nuap.fd = uap->s; 2033 nuap.iovp = hdtr.trailers; 2034 nuap.iovcnt = hdtr.trl_cnt; 2035 error = writev(td, &nuap); 2036 if (error) 2037 goto done; 2038 if (compat) 2039 sbytes += td->td_retval[0]; 2040 else 2041 hdtr_size += td->td_retval[0]; 2042 } 2043 2044 done: 2045 /* 2046 * If there was no error we have to clear td->td_retval[0] 2047 * because it may have been set by writev. 2048 */ 2049 if (error == 0) { 2050 td->td_retval[0] = 0; 2051 } 2052 if (uap->sbytes != NULL) { 2053 if (!compat) 2054 sbytes += hdtr_size; 2055 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2056 } 2057 if (vp) 2058 vrele(vp); 2059 if (so) 2060 fputsock(so); 2061 mtx_unlock(&Giant); 2062 return (error); 2063 } 2064