1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_compat.h" 43 #include "opt_ktrace.h" 44 #include "opt_mac.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/lock.h> 50 #include <sys/mac.h> 51 #include <sys/mutex.h> 52 #include <sys/sysproto.h> 53 #include <sys/malloc.h> 54 #include <sys/filedesc.h> 55 #include <sys/event.h> 56 #include <sys/proc.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/filio.h> 60 #include <sys/mount.h> 61 #include <sys/mbuf.h> 62 #include <sys/protosw.h> 63 #include <sys/socket.h> 64 #include <sys/socketvar.h> 65 #include <sys/signalvar.h> 66 #include <sys/syscallsubr.h> 67 #include <sys/uio.h> 68 #include <sys/vnode.h> 69 #ifdef KTRACE 70 #include <sys/ktrace.h> 71 #endif 72 73 #include <vm/vm.h> 74 #include <vm/vm_object.h> 75 #include <vm/vm_page.h> 76 #include <vm/vm_pageout.h> 77 #include <vm/vm_kern.h> 78 #include <vm/vm_extern.h> 79 80 static void sf_buf_init(void *arg); 81 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) 82 83 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 84 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 85 86 static int accept1(struct thread *td, struct accept_args *uap, int compat); 87 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 88 static int getsockname1(struct thread *td, struct getsockname_args *uap, 89 int compat); 90 static int getpeername1(struct thread *td, struct getpeername_args *uap, 91 int compat); 92 93 /* 94 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the 95 * sf_freelist head with the sf_lock mutex. 96 */ 97 static struct { 98 SLIST_HEAD(, sf_buf) sf_head; 99 struct mtx sf_lock; 100 } sf_freelist; 101 102 static u_int sf_buf_alloc_want; 103 104 /* 105 * System call interface to the socket abstraction. 106 */ 107 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 108 #define COMPAT_OLDSOCK 109 #endif 110 111 /* 112 * MPSAFE 113 */ 114 int 115 socket(td, uap) 116 struct thread *td; 117 register struct socket_args /* { 118 int domain; 119 int type; 120 int protocol; 121 } */ *uap; 122 { 123 struct filedesc *fdp; 124 struct socket *so; 125 struct file *fp; 126 int fd, error; 127 128 mtx_lock(&Giant); 129 fdp = td->td_proc->p_fd; 130 error = falloc(td, &fp, &fd); 131 if (error) 132 goto done2; 133 fhold(fp); 134 error = socreate(uap->domain, &so, uap->type, uap->protocol, 135 td->td_ucred, td); 136 FILEDESC_LOCK(fdp); 137 if (error) { 138 if (fdp->fd_ofiles[fd] == fp) { 139 fdp->fd_ofiles[fd] = NULL; 140 FILEDESC_UNLOCK(fdp); 141 fdrop(fp, td); 142 } else 143 FILEDESC_UNLOCK(fdp); 144 } else { 145 fp->f_data = so; /* already has ref count */ 146 fp->f_flag = FREAD|FWRITE; 147 fp->f_ops = &socketops; 148 fp->f_type = DTYPE_SOCKET; 149 FILEDESC_UNLOCK(fdp); 150 td->td_retval[0] = fd; 151 } 152 fdrop(fp, td); 153 done2: 154 mtx_unlock(&Giant); 155 return (error); 156 } 157 158 /* 159 * MPSAFE 160 */ 161 /* ARGSUSED */ 162 int 163 bind(td, uap) 164 struct thread *td; 165 register struct bind_args /* { 166 int s; 167 caddr_t name; 168 int namelen; 169 } */ *uap; 170 { 171 struct sockaddr *sa; 172 int error; 173 174 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 175 return (error); 176 177 return (kern_bind(td, uap->s, sa)); 178 } 179 180 int 181 kern_bind(td, fd, sa) 182 struct thread *td; 183 int fd; 184 struct sockaddr *sa; 185 { 186 struct socket *so; 187 int error; 188 189 mtx_lock(&Giant); 190 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 191 goto done2; 192 #ifdef MAC 193 error = mac_check_socket_bind(td->td_ucred, so, sa); 194 if (error) 195 goto done1; 196 #endif 197 error = sobind(so, sa, td); 198 #ifdef MAC 199 done1: 200 #endif 201 fputsock(so); 202 done2: 203 mtx_unlock(&Giant); 204 FREE(sa, M_SONAME); 205 return (error); 206 } 207 208 /* 209 * MPSAFE 210 */ 211 /* ARGSUSED */ 212 int 213 listen(td, uap) 214 struct thread *td; 215 register struct listen_args /* { 216 int s; 217 int backlog; 218 } */ *uap; 219 { 220 struct socket *so; 221 int error; 222 223 mtx_lock(&Giant); 224 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 225 #ifdef MAC 226 error = mac_check_socket_listen(td->td_ucred, so); 227 if (error) 228 goto done; 229 #endif 230 error = solisten(so, uap->backlog, td); 231 #ifdef MAC 232 done: 233 #endif 234 fputsock(so); 235 } 236 mtx_unlock(&Giant); 237 return(error); 238 } 239 240 /* 241 * accept1() 242 * MPSAFE 243 */ 244 static int 245 accept1(td, uap, compat) 246 struct thread *td; 247 register struct accept_args /* { 248 int s; 249 caddr_t name; 250 int *anamelen; 251 } */ *uap; 252 int compat; 253 { 254 struct filedesc *fdp; 255 struct file *nfp = NULL; 256 struct sockaddr *sa; 257 int namelen, error, s; 258 struct socket *head, *so; 259 int fd; 260 u_int fflag; 261 pid_t pgid; 262 int tmp; 263 264 fdp = td->td_proc->p_fd; 265 if (uap->name) { 266 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 267 if(error) 268 goto done3; 269 if (namelen < 0) { 270 error = EINVAL; 271 goto done3; 272 } 273 } 274 mtx_lock(&Giant); 275 error = fgetsock(td, uap->s, &head, &fflag); 276 if (error) 277 goto done2; 278 s = splnet(); 279 if ((head->so_options & SO_ACCEPTCONN) == 0) { 280 splx(s); 281 error = EINVAL; 282 goto done; 283 } 284 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 285 if (head->so_state & SS_CANTRCVMORE) { 286 head->so_error = ECONNABORTED; 287 break; 288 } 289 if ((head->so_state & SS_NBIO) != 0) { 290 head->so_error = EWOULDBLOCK; 291 break; 292 } 293 error = tsleep(&head->so_timeo, PSOCK | PCATCH, 294 "accept", 0); 295 if (error) { 296 splx(s); 297 goto done; 298 } 299 } 300 if (head->so_error) { 301 error = head->so_error; 302 head->so_error = 0; 303 splx(s); 304 goto done; 305 } 306 307 /* 308 * At this point we know that there is at least one connection 309 * ready to be accepted. Remove it from the queue prior to 310 * allocating the file descriptor for it since falloc() may 311 * block allowing another process to accept the connection 312 * instead. 313 */ 314 so = TAILQ_FIRST(&head->so_comp); 315 TAILQ_REMOVE(&head->so_comp, so, so_list); 316 head->so_qlen--; 317 318 error = falloc(td, &nfp, &fd); 319 if (error) { 320 /* 321 * Probably ran out of file descriptors. Put the 322 * unaccepted connection back onto the queue and 323 * do another wakeup so some other process might 324 * have a chance at it. 325 */ 326 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 327 head->so_qlen++; 328 wakeup_one(&head->so_timeo); 329 splx(s); 330 goto done; 331 } 332 fhold(nfp); 333 td->td_retval[0] = fd; 334 335 /* connection has been removed from the listen queue */ 336 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 337 338 so->so_state &= ~SS_COMP; 339 so->so_head = NULL; 340 pgid = fgetown(&head->so_sigio); 341 if (pgid != 0) 342 fsetown(pgid, &so->so_sigio); 343 344 FILE_LOCK(nfp); 345 soref(so); /* file descriptor reference */ 346 nfp->f_data = so; /* nfp has ref count from falloc */ 347 nfp->f_flag = fflag; 348 nfp->f_ops = &socketops; 349 nfp->f_type = DTYPE_SOCKET; 350 FILE_UNLOCK(nfp); 351 /* Sync socket nonblocking/async state with file flags */ 352 tmp = fflag & FNONBLOCK; 353 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 354 tmp = fflag & FASYNC; 355 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 356 sa = 0; 357 error = soaccept(so, &sa); 358 if (error) { 359 /* 360 * return a namelen of zero for older code which might 361 * ignore the return value from accept. 362 */ 363 if (uap->name != NULL) { 364 namelen = 0; 365 (void) copyout(&namelen, 366 uap->anamelen, sizeof(*uap->anamelen)); 367 } 368 goto noconnection; 369 } 370 if (sa == NULL) { 371 namelen = 0; 372 if (uap->name) 373 goto gotnoname; 374 splx(s); 375 error = 0; 376 goto done; 377 } 378 if (uap->name) { 379 /* check sa_len before it is destroyed */ 380 if (namelen > sa->sa_len) 381 namelen = sa->sa_len; 382 #ifdef COMPAT_OLDSOCK 383 if (compat) 384 ((struct osockaddr *)sa)->sa_family = 385 sa->sa_family; 386 #endif 387 error = copyout(sa, uap->name, (u_int)namelen); 388 if (!error) 389 gotnoname: 390 error = copyout(&namelen, 391 uap->anamelen, sizeof (*uap->anamelen)); 392 } 393 noconnection: 394 if (sa) 395 FREE(sa, M_SONAME); 396 397 /* 398 * close the new descriptor, assuming someone hasn't ripped it 399 * out from under us. 400 */ 401 if (error) { 402 FILEDESC_LOCK(fdp); 403 if (fdp->fd_ofiles[fd] == nfp) { 404 fdp->fd_ofiles[fd] = NULL; 405 FILEDESC_UNLOCK(fdp); 406 fdrop(nfp, td); 407 } else { 408 FILEDESC_UNLOCK(fdp); 409 } 410 } 411 splx(s); 412 413 /* 414 * Release explicitly held references before returning. 415 */ 416 done: 417 if (nfp != NULL) 418 fdrop(nfp, td); 419 fputsock(head); 420 done2: 421 mtx_unlock(&Giant); 422 done3: 423 return (error); 424 } 425 426 /* 427 * MPSAFE (accept1() is MPSAFE) 428 */ 429 int 430 accept(td, uap) 431 struct thread *td; 432 struct accept_args *uap; 433 { 434 435 return (accept1(td, uap, 0)); 436 } 437 438 #ifdef COMPAT_OLDSOCK 439 /* 440 * MPSAFE (accept1() is MPSAFE) 441 */ 442 int 443 oaccept(td, uap) 444 struct thread *td; 445 struct accept_args *uap; 446 { 447 448 return (accept1(td, uap, 1)); 449 } 450 #endif /* COMPAT_OLDSOCK */ 451 452 /* 453 * MPSAFE 454 */ 455 /* ARGSUSED */ 456 int 457 connect(td, uap) 458 struct thread *td; 459 register struct connect_args /* { 460 int s; 461 caddr_t name; 462 int namelen; 463 } */ *uap; 464 { 465 struct sockaddr *sa; 466 int error; 467 468 error = getsockaddr(&sa, uap->name, uap->namelen); 469 if (error) 470 return error; 471 472 return (kern_connect(td, uap->s, sa)); 473 } 474 475 476 int 477 kern_connect(td, fd, sa) 478 struct thread *td; 479 int fd; 480 struct sockaddr *sa; 481 { 482 struct socket *so; 483 int error, s; 484 int interrupted = 0; 485 486 mtx_lock(&Giant); 487 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 488 goto done2; 489 if (so->so_state & SS_ISCONNECTING) { 490 error = EALREADY; 491 goto done1; 492 } 493 #ifdef MAC 494 error = mac_check_socket_connect(td->td_ucred, so, sa); 495 if (error) 496 goto bad; 497 #endif 498 error = soconnect(so, sa, td); 499 if (error) 500 goto bad; 501 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 502 error = EINPROGRESS; 503 goto done1; 504 } 505 s = splnet(); 506 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 507 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0); 508 if (error) { 509 if (error == EINTR || error == ERESTART) 510 interrupted = 1; 511 break; 512 } 513 } 514 if (error == 0) { 515 error = so->so_error; 516 so->so_error = 0; 517 } 518 splx(s); 519 bad: 520 if (!interrupted) 521 so->so_state &= ~SS_ISCONNECTING; 522 if (error == ERESTART) 523 error = EINTR; 524 done1: 525 fputsock(so); 526 done2: 527 mtx_unlock(&Giant); 528 FREE(sa, M_SONAME); 529 return (error); 530 } 531 532 /* 533 * MPSAFE 534 */ 535 int 536 socketpair(td, uap) 537 struct thread *td; 538 register struct socketpair_args /* { 539 int domain; 540 int type; 541 int protocol; 542 int *rsv; 543 } */ *uap; 544 { 545 register struct filedesc *fdp = td->td_proc->p_fd; 546 struct file *fp1, *fp2; 547 struct socket *so1, *so2; 548 int fd, error, sv[2]; 549 550 mtx_lock(&Giant); 551 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 552 td->td_ucred, td); 553 if (error) 554 goto done2; 555 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 556 td->td_ucred, td); 557 if (error) 558 goto free1; 559 error = falloc(td, &fp1, &fd); 560 if (error) 561 goto free2; 562 fhold(fp1); 563 sv[0] = fd; 564 fp1->f_data = so1; /* so1 already has ref count */ 565 error = falloc(td, &fp2, &fd); 566 if (error) 567 goto free3; 568 fhold(fp2); 569 fp2->f_data = so2; /* so2 already has ref count */ 570 sv[1] = fd; 571 error = soconnect2(so1, so2); 572 if (error) 573 goto free4; 574 if (uap->type == SOCK_DGRAM) { 575 /* 576 * Datagram socket connection is asymmetric. 577 */ 578 error = soconnect2(so2, so1); 579 if (error) 580 goto free4; 581 } 582 FILE_LOCK(fp1); 583 fp1->f_flag = FREAD|FWRITE; 584 fp1->f_ops = &socketops; 585 fp1->f_type = DTYPE_SOCKET; 586 FILE_UNLOCK(fp1); 587 FILE_LOCK(fp2); 588 fp2->f_flag = FREAD|FWRITE; 589 fp2->f_ops = &socketops; 590 fp2->f_type = DTYPE_SOCKET; 591 FILE_UNLOCK(fp2); 592 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 593 fdrop(fp1, td); 594 fdrop(fp2, td); 595 goto done2; 596 free4: 597 FILEDESC_LOCK(fdp); 598 if (fdp->fd_ofiles[sv[1]] == fp2) { 599 fdp->fd_ofiles[sv[1]] = NULL; 600 FILEDESC_UNLOCK(fdp); 601 fdrop(fp2, td); 602 } else 603 FILEDESC_UNLOCK(fdp); 604 fdrop(fp2, td); 605 free3: 606 FILEDESC_LOCK(fdp); 607 if (fdp->fd_ofiles[sv[0]] == fp1) { 608 fdp->fd_ofiles[sv[0]] = NULL; 609 FILEDESC_UNLOCK(fdp); 610 fdrop(fp1, td); 611 } else 612 FILEDESC_UNLOCK(fdp); 613 fdrop(fp1, td); 614 free2: 615 (void)soclose(so2); 616 free1: 617 (void)soclose(so1); 618 done2: 619 mtx_unlock(&Giant); 620 return (error); 621 } 622 623 static int 624 sendit(td, s, mp, flags) 625 register struct thread *td; 626 int s; 627 register struct msghdr *mp; 628 int flags; 629 { 630 struct mbuf *control; 631 struct sockaddr *to; 632 int error; 633 634 if (mp->msg_name != NULL) { 635 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 636 if (error) { 637 to = NULL; 638 goto bad; 639 } 640 mp->msg_name = to; 641 } else 642 to = NULL; 643 644 if (mp->msg_control) { 645 if (mp->msg_controllen < sizeof(struct cmsghdr) 646 #ifdef COMPAT_OLDSOCK 647 && mp->msg_flags != MSG_COMPAT 648 #endif 649 ) { 650 error = EINVAL; 651 goto bad; 652 } 653 error = sockargs(&control, mp->msg_control, 654 mp->msg_controllen, MT_CONTROL); 655 if (error) 656 goto bad; 657 #ifdef COMPAT_OLDSOCK 658 if (mp->msg_flags == MSG_COMPAT) { 659 register struct cmsghdr *cm; 660 661 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 662 if (control == 0) { 663 error = ENOBUFS; 664 goto bad; 665 } else { 666 cm = mtod(control, struct cmsghdr *); 667 cm->cmsg_len = control->m_len; 668 cm->cmsg_level = SOL_SOCKET; 669 cm->cmsg_type = SCM_RIGHTS; 670 } 671 } 672 #endif 673 } else { 674 control = NULL; 675 } 676 677 error = kern_sendit(td, s, mp, flags, control); 678 679 bad: 680 if (to) 681 FREE(to, M_SONAME); 682 return (error); 683 } 684 685 int 686 kern_sendit(td, s, mp, flags, control) 687 struct thread *td; 688 int s; 689 struct msghdr *mp; 690 int flags; 691 struct mbuf *control; 692 { 693 struct uio auio; 694 struct iovec *iov; 695 struct socket *so; 696 int i; 697 int len, error; 698 #ifdef KTRACE 699 struct iovec *ktriov = NULL; 700 struct uio ktruio; 701 int iovlen; 702 #endif 703 704 mtx_lock(&Giant); 705 if ((error = fgetsock(td, s, &so, NULL)) != 0) 706 goto bad2; 707 708 #ifdef MAC 709 error = mac_check_socket_send(td->td_ucred, so); 710 if (error) 711 goto bad; 712 #endif 713 714 auio.uio_iov = mp->msg_iov; 715 auio.uio_iovcnt = mp->msg_iovlen; 716 auio.uio_segflg = UIO_USERSPACE; 717 auio.uio_rw = UIO_WRITE; 718 auio.uio_td = td; 719 auio.uio_offset = 0; /* XXX */ 720 auio.uio_resid = 0; 721 iov = mp->msg_iov; 722 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 723 if ((auio.uio_resid += iov->iov_len) < 0) { 724 error = EINVAL; 725 goto bad; 726 } 727 } 728 #ifdef KTRACE 729 if (KTRPOINT(td, KTR_GENIO)) { 730 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 731 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 732 bcopy(auio.uio_iov, ktriov, iovlen); 733 ktruio = auio; 734 } 735 #endif 736 len = auio.uio_resid; 737 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 738 0, control, flags, td); 739 if (error) { 740 if (auio.uio_resid != len && (error == ERESTART || 741 error == EINTR || error == EWOULDBLOCK)) 742 error = 0; 743 /* Generation of SIGPIPE can be controlled per socket */ 744 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 745 PROC_LOCK(td->td_proc); 746 psignal(td->td_proc, SIGPIPE); 747 PROC_UNLOCK(td->td_proc); 748 } 749 } 750 if (error == 0) 751 td->td_retval[0] = len - auio.uio_resid; 752 #ifdef KTRACE 753 if (ktriov != NULL) { 754 if (error == 0) { 755 ktruio.uio_iov = ktriov; 756 ktruio.uio_resid = td->td_retval[0]; 757 ktrgenio(s, UIO_WRITE, &ktruio, error); 758 } 759 FREE(ktriov, M_TEMP); 760 } 761 #endif 762 bad: 763 fputsock(so); 764 bad2: 765 mtx_unlock(&Giant); 766 return (error); 767 } 768 769 /* 770 * MPSAFE 771 */ 772 int 773 sendto(td, uap) 774 struct thread *td; 775 register struct sendto_args /* { 776 int s; 777 caddr_t buf; 778 size_t len; 779 int flags; 780 caddr_t to; 781 int tolen; 782 } */ *uap; 783 { 784 struct msghdr msg; 785 struct iovec aiov; 786 int error; 787 788 msg.msg_name = uap->to; 789 msg.msg_namelen = uap->tolen; 790 msg.msg_iov = &aiov; 791 msg.msg_iovlen = 1; 792 msg.msg_control = 0; 793 #ifdef COMPAT_OLDSOCK 794 msg.msg_flags = 0; 795 #endif 796 aiov.iov_base = uap->buf; 797 aiov.iov_len = uap->len; 798 error = sendit(td, uap->s, &msg, uap->flags); 799 return (error); 800 } 801 802 #ifdef COMPAT_OLDSOCK 803 /* 804 * MPSAFE 805 */ 806 int 807 osend(td, uap) 808 struct thread *td; 809 register struct osend_args /* { 810 int s; 811 caddr_t buf; 812 int len; 813 int flags; 814 } */ *uap; 815 { 816 struct msghdr msg; 817 struct iovec aiov; 818 int error; 819 820 msg.msg_name = 0; 821 msg.msg_namelen = 0; 822 msg.msg_iov = &aiov; 823 msg.msg_iovlen = 1; 824 aiov.iov_base = uap->buf; 825 aiov.iov_len = uap->len; 826 msg.msg_control = 0; 827 msg.msg_flags = 0; 828 error = sendit(td, uap->s, &msg, uap->flags); 829 return (error); 830 } 831 832 /* 833 * MPSAFE 834 */ 835 int 836 osendmsg(td, uap) 837 struct thread *td; 838 register struct osendmsg_args /* { 839 int s; 840 caddr_t msg; 841 int flags; 842 } */ *uap; 843 { 844 struct msghdr msg; 845 struct iovec aiov[UIO_SMALLIOV], *iov; 846 int error; 847 848 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 849 if (error) 850 goto done2; 851 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 852 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 853 error = EMSGSIZE; 854 goto done2; 855 } 856 MALLOC(iov, struct iovec *, 857 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 858 M_WAITOK); 859 } else { 860 iov = aiov; 861 } 862 error = copyin(msg.msg_iov, iov, 863 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 864 if (error) 865 goto done; 866 msg.msg_flags = MSG_COMPAT; 867 msg.msg_iov = iov; 868 error = sendit(td, uap->s, &msg, uap->flags); 869 done: 870 if (iov != aiov) 871 FREE(iov, M_IOV); 872 done2: 873 return (error); 874 } 875 #endif 876 877 /* 878 * MPSAFE 879 */ 880 int 881 sendmsg(td, uap) 882 struct thread *td; 883 register struct sendmsg_args /* { 884 int s; 885 caddr_t msg; 886 int flags; 887 } */ *uap; 888 { 889 struct msghdr msg; 890 struct iovec aiov[UIO_SMALLIOV], *iov; 891 int error; 892 893 error = copyin(uap->msg, &msg, sizeof (msg)); 894 if (error) 895 goto done2; 896 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 897 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 898 error = EMSGSIZE; 899 goto done2; 900 } 901 MALLOC(iov, struct iovec *, 902 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 903 M_WAITOK); 904 } else { 905 iov = aiov; 906 } 907 if (msg.msg_iovlen && 908 (error = copyin(msg.msg_iov, iov, 909 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 910 goto done; 911 msg.msg_iov = iov; 912 #ifdef COMPAT_OLDSOCK 913 msg.msg_flags = 0; 914 #endif 915 error = sendit(td, uap->s, &msg, uap->flags); 916 done: 917 if (iov != aiov) 918 FREE(iov, M_IOV); 919 done2: 920 return (error); 921 } 922 923 static int 924 recvit(td, s, mp, namelenp) 925 register struct thread *td; 926 int s; 927 register struct msghdr *mp; 928 void *namelenp; 929 { 930 struct uio auio; 931 register struct iovec *iov; 932 register int i; 933 int len, error; 934 struct mbuf *m, *control = 0; 935 caddr_t ctlbuf; 936 struct socket *so; 937 struct sockaddr *fromsa = 0; 938 #ifdef KTRACE 939 struct iovec *ktriov = NULL; 940 struct uio ktruio; 941 int iovlen; 942 #endif 943 944 mtx_lock(&Giant); 945 if ((error = fgetsock(td, s, &so, NULL)) != 0) { 946 mtx_unlock(&Giant); 947 return (error); 948 } 949 950 #ifdef MAC 951 error = mac_check_socket_receive(td->td_ucred, so); 952 if (error) { 953 fputsock(so); 954 mtx_unlock(&Giant); 955 return (error); 956 } 957 #endif 958 959 auio.uio_iov = mp->msg_iov; 960 auio.uio_iovcnt = mp->msg_iovlen; 961 auio.uio_segflg = UIO_USERSPACE; 962 auio.uio_rw = UIO_READ; 963 auio.uio_td = td; 964 auio.uio_offset = 0; /* XXX */ 965 auio.uio_resid = 0; 966 iov = mp->msg_iov; 967 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 968 if ((auio.uio_resid += iov->iov_len) < 0) { 969 fputsock(so); 970 return (EINVAL); 971 } 972 } 973 #ifdef KTRACE 974 if (KTRPOINT(td, KTR_GENIO)) { 975 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 976 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 977 bcopy(auio.uio_iov, ktriov, iovlen); 978 ktruio = auio; 979 } 980 #endif 981 len = auio.uio_resid; 982 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 983 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 984 &mp->msg_flags); 985 if (error) { 986 if (auio.uio_resid != len && (error == ERESTART || 987 error == EINTR || error == EWOULDBLOCK)) 988 error = 0; 989 } 990 #ifdef KTRACE 991 if (ktriov != NULL) { 992 if (error == 0) { 993 ktruio.uio_iov = ktriov; 994 ktruio.uio_resid = len - auio.uio_resid; 995 ktrgenio(s, UIO_READ, &ktruio, error); 996 } 997 FREE(ktriov, M_TEMP); 998 } 999 #endif 1000 if (error) 1001 goto out; 1002 td->td_retval[0] = len - auio.uio_resid; 1003 if (mp->msg_name) { 1004 len = mp->msg_namelen; 1005 if (len <= 0 || fromsa == 0) 1006 len = 0; 1007 else { 1008 /* save sa_len before it is destroyed by MSG_COMPAT */ 1009 len = MIN(len, fromsa->sa_len); 1010 #ifdef COMPAT_OLDSOCK 1011 if (mp->msg_flags & MSG_COMPAT) 1012 ((struct osockaddr *)fromsa)->sa_family = 1013 fromsa->sa_family; 1014 #endif 1015 error = copyout(fromsa, mp->msg_name, (unsigned)len); 1016 if (error) 1017 goto out; 1018 } 1019 mp->msg_namelen = len; 1020 if (namelenp && 1021 (error = copyout(&len, namelenp, sizeof (int)))) { 1022 #ifdef COMPAT_OLDSOCK 1023 if (mp->msg_flags & MSG_COMPAT) 1024 error = 0; /* old recvfrom didn't check */ 1025 else 1026 #endif 1027 goto out; 1028 } 1029 } 1030 if (mp->msg_control) { 1031 #ifdef COMPAT_OLDSOCK 1032 /* 1033 * We assume that old recvmsg calls won't receive access 1034 * rights and other control info, esp. as control info 1035 * is always optional and those options didn't exist in 4.3. 1036 * If we receive rights, trim the cmsghdr; anything else 1037 * is tossed. 1038 */ 1039 if (control && mp->msg_flags & MSG_COMPAT) { 1040 if (mtod(control, struct cmsghdr *)->cmsg_level != 1041 SOL_SOCKET || 1042 mtod(control, struct cmsghdr *)->cmsg_type != 1043 SCM_RIGHTS) { 1044 mp->msg_controllen = 0; 1045 goto out; 1046 } 1047 control->m_len -= sizeof (struct cmsghdr); 1048 control->m_data += sizeof (struct cmsghdr); 1049 } 1050 #endif 1051 len = mp->msg_controllen; 1052 m = control; 1053 mp->msg_controllen = 0; 1054 ctlbuf = mp->msg_control; 1055 1056 while (m && len > 0) { 1057 unsigned int tocopy; 1058 1059 if (len >= m->m_len) 1060 tocopy = m->m_len; 1061 else { 1062 mp->msg_flags |= MSG_CTRUNC; 1063 tocopy = len; 1064 } 1065 1066 if ((error = copyout(mtod(m, caddr_t), 1067 ctlbuf, tocopy)) != 0) 1068 goto out; 1069 1070 ctlbuf += tocopy; 1071 len -= tocopy; 1072 m = m->m_next; 1073 } 1074 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1075 } 1076 out: 1077 fputsock(so); 1078 mtx_unlock(&Giant); 1079 if (fromsa) 1080 FREE(fromsa, M_SONAME); 1081 if (control) 1082 m_freem(control); 1083 return (error); 1084 } 1085 1086 /* 1087 * MPSAFE 1088 */ 1089 int 1090 recvfrom(td, uap) 1091 struct thread *td; 1092 register struct recvfrom_args /* { 1093 int s; 1094 caddr_t buf; 1095 size_t len; 1096 int flags; 1097 caddr_t from; 1098 int *fromlenaddr; 1099 } */ *uap; 1100 { 1101 struct msghdr msg; 1102 struct iovec aiov; 1103 int error; 1104 1105 if (uap->fromlenaddr) { 1106 error = copyin(uap->fromlenaddr, 1107 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1108 if (error) 1109 goto done2; 1110 } else { 1111 msg.msg_namelen = 0; 1112 } 1113 msg.msg_name = uap->from; 1114 msg.msg_iov = &aiov; 1115 msg.msg_iovlen = 1; 1116 aiov.iov_base = uap->buf; 1117 aiov.iov_len = uap->len; 1118 msg.msg_control = 0; 1119 msg.msg_flags = uap->flags; 1120 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1121 done2: 1122 return(error); 1123 } 1124 1125 #ifdef COMPAT_OLDSOCK 1126 /* 1127 * MPSAFE 1128 */ 1129 int 1130 orecvfrom(td, uap) 1131 struct thread *td; 1132 struct recvfrom_args *uap; 1133 { 1134 1135 uap->flags |= MSG_COMPAT; 1136 return (recvfrom(td, uap)); 1137 } 1138 #endif 1139 1140 1141 #ifdef COMPAT_OLDSOCK 1142 /* 1143 * MPSAFE 1144 */ 1145 int 1146 orecv(td, uap) 1147 struct thread *td; 1148 register struct orecv_args /* { 1149 int s; 1150 caddr_t buf; 1151 int len; 1152 int flags; 1153 } */ *uap; 1154 { 1155 struct msghdr msg; 1156 struct iovec aiov; 1157 int error; 1158 1159 msg.msg_name = 0; 1160 msg.msg_namelen = 0; 1161 msg.msg_iov = &aiov; 1162 msg.msg_iovlen = 1; 1163 aiov.iov_base = uap->buf; 1164 aiov.iov_len = uap->len; 1165 msg.msg_control = 0; 1166 msg.msg_flags = uap->flags; 1167 error = recvit(td, uap->s, &msg, NULL); 1168 return (error); 1169 } 1170 1171 /* 1172 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1173 * overlays the new one, missing only the flags, and with the (old) access 1174 * rights where the control fields are now. 1175 * 1176 * MPSAFE 1177 */ 1178 int 1179 orecvmsg(td, uap) 1180 struct thread *td; 1181 register struct orecvmsg_args /* { 1182 int s; 1183 struct omsghdr *msg; 1184 int flags; 1185 } */ *uap; 1186 { 1187 struct msghdr msg; 1188 struct iovec aiov[UIO_SMALLIOV], *iov; 1189 int error; 1190 1191 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1192 if (error) 1193 return (error); 1194 1195 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1196 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1197 error = EMSGSIZE; 1198 goto done2; 1199 } 1200 MALLOC(iov, struct iovec *, 1201 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1202 M_WAITOK); 1203 } else { 1204 iov = aiov; 1205 } 1206 msg.msg_flags = uap->flags | MSG_COMPAT; 1207 error = copyin(msg.msg_iov, iov, 1208 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1209 if (error) 1210 goto done; 1211 msg.msg_iov = iov; 1212 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1213 1214 if (msg.msg_controllen && error == 0) 1215 error = copyout(&msg.msg_controllen, 1216 &uap->msg->msg_accrightslen, sizeof (int)); 1217 done: 1218 if (iov != aiov) 1219 FREE(iov, M_IOV); 1220 done2: 1221 return (error); 1222 } 1223 #endif 1224 1225 /* 1226 * MPSAFE 1227 */ 1228 int 1229 recvmsg(td, uap) 1230 struct thread *td; 1231 register struct recvmsg_args /* { 1232 int s; 1233 struct msghdr *msg; 1234 int flags; 1235 } */ *uap; 1236 { 1237 struct msghdr msg; 1238 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1239 register int error; 1240 1241 error = copyin(uap->msg, &msg, sizeof (msg)); 1242 if (error) 1243 goto done2; 1244 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1245 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1246 error = EMSGSIZE; 1247 goto done2; 1248 } 1249 MALLOC(iov, struct iovec *, 1250 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1251 M_WAITOK); 1252 } else { 1253 iov = aiov; 1254 } 1255 #ifdef COMPAT_OLDSOCK 1256 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1257 #else 1258 msg.msg_flags = uap->flags; 1259 #endif 1260 uiov = msg.msg_iov; 1261 msg.msg_iov = iov; 1262 error = copyin(uiov, iov, 1263 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1264 if (error) 1265 goto done; 1266 error = recvit(td, uap->s, &msg, NULL); 1267 if (!error) { 1268 msg.msg_iov = uiov; 1269 error = copyout(&msg, uap->msg, sizeof(msg)); 1270 } 1271 done: 1272 if (iov != aiov) 1273 FREE(iov, M_IOV); 1274 done2: 1275 return (error); 1276 } 1277 1278 /* 1279 * MPSAFE 1280 */ 1281 /* ARGSUSED */ 1282 int 1283 shutdown(td, uap) 1284 struct thread *td; 1285 register struct shutdown_args /* { 1286 int s; 1287 int how; 1288 } */ *uap; 1289 { 1290 struct socket *so; 1291 int error; 1292 1293 mtx_lock(&Giant); 1294 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1295 error = soshutdown(so, uap->how); 1296 fputsock(so); 1297 } 1298 mtx_unlock(&Giant); 1299 return(error); 1300 } 1301 1302 /* 1303 * MPSAFE 1304 */ 1305 /* ARGSUSED */ 1306 int 1307 setsockopt(td, uap) 1308 struct thread *td; 1309 register struct setsockopt_args /* { 1310 int s; 1311 int level; 1312 int name; 1313 caddr_t val; 1314 int valsize; 1315 } */ *uap; 1316 { 1317 struct socket *so; 1318 struct sockopt sopt; 1319 int error; 1320 1321 if (uap->val == 0 && uap->valsize != 0) 1322 return (EFAULT); 1323 if (uap->valsize < 0) 1324 return (EINVAL); 1325 1326 mtx_lock(&Giant); 1327 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1328 sopt.sopt_dir = SOPT_SET; 1329 sopt.sopt_level = uap->level; 1330 sopt.sopt_name = uap->name; 1331 sopt.sopt_val = uap->val; 1332 sopt.sopt_valsize = uap->valsize; 1333 sopt.sopt_td = td; 1334 error = sosetopt(so, &sopt); 1335 fputsock(so); 1336 } 1337 mtx_unlock(&Giant); 1338 return(error); 1339 } 1340 1341 /* 1342 * MPSAFE 1343 */ 1344 /* ARGSUSED */ 1345 int 1346 getsockopt(td, uap) 1347 struct thread *td; 1348 register struct getsockopt_args /* { 1349 int s; 1350 int level; 1351 int name; 1352 caddr_t val; 1353 int *avalsize; 1354 } */ *uap; 1355 { 1356 int valsize, error; 1357 struct socket *so; 1358 struct sockopt sopt; 1359 1360 mtx_lock(&Giant); 1361 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1362 goto done2; 1363 if (uap->val) { 1364 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1365 if (error) 1366 goto done1; 1367 if (valsize < 0) { 1368 error = EINVAL; 1369 goto done1; 1370 } 1371 } else { 1372 valsize = 0; 1373 } 1374 1375 sopt.sopt_dir = SOPT_GET; 1376 sopt.sopt_level = uap->level; 1377 sopt.sopt_name = uap->name; 1378 sopt.sopt_val = uap->val; 1379 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1380 sopt.sopt_td = td; 1381 1382 error = sogetopt(so, &sopt); 1383 if (error == 0) { 1384 valsize = sopt.sopt_valsize; 1385 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1386 } 1387 done1: 1388 fputsock(so); 1389 done2: 1390 mtx_unlock(&Giant); 1391 return (error); 1392 } 1393 1394 /* 1395 * getsockname1() - Get socket name. 1396 * 1397 * MPSAFE 1398 */ 1399 /* ARGSUSED */ 1400 static int 1401 getsockname1(td, uap, compat) 1402 struct thread *td; 1403 register struct getsockname_args /* { 1404 int fdes; 1405 caddr_t asa; 1406 int *alen; 1407 } */ *uap; 1408 int compat; 1409 { 1410 struct socket *so; 1411 struct sockaddr *sa; 1412 int len, error; 1413 1414 mtx_lock(&Giant); 1415 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1416 goto done2; 1417 error = copyin(uap->alen, &len, sizeof (len)); 1418 if (error) 1419 goto done1; 1420 if (len < 0) { 1421 error = EINVAL; 1422 goto done1; 1423 } 1424 sa = 0; 1425 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1426 if (error) 1427 goto bad; 1428 if (sa == 0) { 1429 len = 0; 1430 goto gotnothing; 1431 } 1432 1433 len = MIN(len, sa->sa_len); 1434 #ifdef COMPAT_OLDSOCK 1435 if (compat) 1436 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1437 #endif 1438 error = copyout(sa, uap->asa, (u_int)len); 1439 if (error == 0) 1440 gotnothing: 1441 error = copyout(&len, uap->alen, sizeof (len)); 1442 bad: 1443 if (sa) 1444 FREE(sa, M_SONAME); 1445 done1: 1446 fputsock(so); 1447 done2: 1448 mtx_unlock(&Giant); 1449 return (error); 1450 } 1451 1452 /* 1453 * MPSAFE 1454 */ 1455 int 1456 getsockname(td, uap) 1457 struct thread *td; 1458 struct getsockname_args *uap; 1459 { 1460 1461 return (getsockname1(td, uap, 0)); 1462 } 1463 1464 #ifdef COMPAT_OLDSOCK 1465 /* 1466 * MPSAFE 1467 */ 1468 int 1469 ogetsockname(td, uap) 1470 struct thread *td; 1471 struct getsockname_args *uap; 1472 { 1473 1474 return (getsockname1(td, uap, 1)); 1475 } 1476 #endif /* COMPAT_OLDSOCK */ 1477 1478 /* 1479 * getpeername1() - Get name of peer for connected socket. 1480 * 1481 * MPSAFE 1482 */ 1483 /* ARGSUSED */ 1484 static int 1485 getpeername1(td, uap, compat) 1486 struct thread *td; 1487 register struct getpeername_args /* { 1488 int fdes; 1489 caddr_t asa; 1490 int *alen; 1491 } */ *uap; 1492 int compat; 1493 { 1494 struct socket *so; 1495 struct sockaddr *sa; 1496 int len, error; 1497 1498 mtx_lock(&Giant); 1499 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1500 goto done2; 1501 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1502 error = ENOTCONN; 1503 goto done1; 1504 } 1505 error = copyin(uap->alen, &len, sizeof (len)); 1506 if (error) 1507 goto done1; 1508 if (len < 0) { 1509 error = EINVAL; 1510 goto done1; 1511 } 1512 sa = 0; 1513 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1514 if (error) 1515 goto bad; 1516 if (sa == 0) { 1517 len = 0; 1518 goto gotnothing; 1519 } 1520 len = MIN(len, sa->sa_len); 1521 #ifdef COMPAT_OLDSOCK 1522 if (compat) 1523 ((struct osockaddr *)sa)->sa_family = 1524 sa->sa_family; 1525 #endif 1526 error = copyout(sa, uap->asa, (u_int)len); 1527 if (error) 1528 goto bad; 1529 gotnothing: 1530 error = copyout(&len, uap->alen, sizeof (len)); 1531 bad: 1532 if (sa) 1533 FREE(sa, M_SONAME); 1534 done1: 1535 fputsock(so); 1536 done2: 1537 mtx_unlock(&Giant); 1538 return (error); 1539 } 1540 1541 /* 1542 * MPSAFE 1543 */ 1544 int 1545 getpeername(td, uap) 1546 struct thread *td; 1547 struct getpeername_args *uap; 1548 { 1549 1550 return (getpeername1(td, uap, 0)); 1551 } 1552 1553 #ifdef COMPAT_OLDSOCK 1554 /* 1555 * MPSAFE 1556 */ 1557 int 1558 ogetpeername(td, uap) 1559 struct thread *td; 1560 struct ogetpeername_args *uap; 1561 { 1562 1563 /* XXX uap should have type `getpeername_args *' to begin with. */ 1564 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1565 } 1566 #endif /* COMPAT_OLDSOCK */ 1567 1568 int 1569 sockargs(mp, buf, buflen, type) 1570 struct mbuf **mp; 1571 caddr_t buf; 1572 int buflen, type; 1573 { 1574 register struct sockaddr *sa; 1575 register struct mbuf *m; 1576 int error; 1577 1578 if ((u_int)buflen > MLEN) { 1579 #ifdef COMPAT_OLDSOCK 1580 if (type == MT_SONAME && (u_int)buflen <= 112) 1581 buflen = MLEN; /* unix domain compat. hack */ 1582 else 1583 #endif 1584 return (EINVAL); 1585 } 1586 m = m_get(M_TRYWAIT, type); 1587 if (m == NULL) 1588 return (ENOBUFS); 1589 m->m_len = buflen; 1590 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1591 if (error) 1592 (void) m_free(m); 1593 else { 1594 *mp = m; 1595 if (type == MT_SONAME) { 1596 sa = mtod(m, struct sockaddr *); 1597 1598 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1599 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1600 sa->sa_family = sa->sa_len; 1601 #endif 1602 sa->sa_len = buflen; 1603 } 1604 } 1605 return (error); 1606 } 1607 1608 int 1609 getsockaddr(namp, uaddr, len) 1610 struct sockaddr **namp; 1611 caddr_t uaddr; 1612 size_t len; 1613 { 1614 struct sockaddr *sa; 1615 int error; 1616 1617 if (len > SOCK_MAXADDRLEN) 1618 return ENAMETOOLONG; 1619 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1620 error = copyin(uaddr, sa, len); 1621 if (error) { 1622 FREE(sa, M_SONAME); 1623 } else { 1624 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1625 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1626 sa->sa_family = sa->sa_len; 1627 #endif 1628 sa->sa_len = len; 1629 *namp = sa; 1630 } 1631 return error; 1632 } 1633 1634 /* 1635 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 1636 */ 1637 static void 1638 sf_buf_init(void *arg) 1639 { 1640 struct sf_buf *sf_bufs; 1641 vm_offset_t sf_base; 1642 int i; 1643 1644 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); 1645 mtx_lock(&sf_freelist.sf_lock); 1646 SLIST_INIT(&sf_freelist.sf_head); 1647 sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE); 1648 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, 1649 M_NOWAIT | M_ZERO); 1650 for (i = 0; i < nsfbufs; i++) { 1651 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 1652 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); 1653 } 1654 sf_buf_alloc_want = 0; 1655 mtx_unlock(&sf_freelist.sf_lock); 1656 } 1657 1658 /* 1659 * Get an sf_buf from the freelist. Will block if none are available. 1660 */ 1661 struct sf_buf * 1662 sf_buf_alloc(struct vm_page *m) 1663 { 1664 struct sf_buf *sf; 1665 int error; 1666 1667 mtx_lock(&sf_freelist.sf_lock); 1668 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { 1669 sf_buf_alloc_want++; 1670 error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, 1671 "sfbufa", 0); 1672 sf_buf_alloc_want--; 1673 1674 /* 1675 * If we got a signal, don't risk going back to sleep. 1676 */ 1677 if (error) 1678 break; 1679 } 1680 if (sf != NULL) { 1681 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); 1682 sf->m = m; 1683 pmap_qenter(sf->kva, &sf->m, 1); 1684 } 1685 mtx_unlock(&sf_freelist.sf_lock); 1686 return (sf); 1687 } 1688 1689 /* 1690 * Detatch mapped page and release resources back to the system. 1691 */ 1692 void 1693 sf_buf_free(void *addr, void *args) 1694 { 1695 struct sf_buf *sf; 1696 struct vm_page *m; 1697 1698 sf = args; 1699 pmap_qremove((vm_offset_t)addr, 1); 1700 m = sf->m; 1701 vm_page_lock_queues(); 1702 vm_page_unwire(m, 0); 1703 /* 1704 * Check for the object going away on us. This can 1705 * happen since we don't hold a reference to it. 1706 * If so, we're responsible for freeing the page. 1707 */ 1708 if (m->wire_count == 0 && m->object == NULL) 1709 vm_page_free(m); 1710 vm_page_unlock_queues(); 1711 sf->m = NULL; 1712 mtx_lock(&sf_freelist.sf_lock); 1713 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); 1714 if (sf_buf_alloc_want > 0) 1715 wakeup_one(&sf_freelist); 1716 mtx_unlock(&sf_freelist.sf_lock); 1717 } 1718 1719 /* 1720 * sendfile(2) 1721 * 1722 * MPSAFE 1723 * 1724 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1725 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1726 * 1727 * Send a file specified by 'fd' and starting at 'offset' to a socket 1728 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1729 * nbytes == 0. Optionally add a header and/or trailer to the socket 1730 * output. If specified, write the total number of bytes sent into *sbytes. 1731 * 1732 */ 1733 int 1734 sendfile(struct thread *td, struct sendfile_args *uap) 1735 { 1736 1737 return (do_sendfile(td, uap, 0)); 1738 } 1739 1740 #ifdef COMPAT_FREEBSD4 1741 int 1742 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1743 { 1744 struct sendfile_args args; 1745 1746 args.fd = uap->fd; 1747 args.s = uap->s; 1748 args.offset = uap->offset; 1749 args.nbytes = uap->nbytes; 1750 args.hdtr = uap->hdtr; 1751 args.sbytes = uap->sbytes; 1752 args.flags = uap->flags; 1753 1754 return (do_sendfile(td, &args, 1)); 1755 } 1756 #endif /* COMPAT_FREEBSD4 */ 1757 1758 static int 1759 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1760 { 1761 struct vnode *vp; 1762 struct vm_object *obj; 1763 struct socket *so = NULL; 1764 struct mbuf *m; 1765 struct sf_buf *sf; 1766 struct vm_page *pg; 1767 struct writev_args nuap; 1768 struct sf_hdtr hdtr; 1769 off_t off, xfsize, hdtr_size, sbytes = 0; 1770 int error, s; 1771 1772 mtx_lock(&Giant); 1773 1774 hdtr_size = 0; 1775 1776 /* 1777 * The descriptor must be a regular file and have a backing VM object. 1778 */ 1779 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1780 goto done; 1781 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1782 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1783 error = EINVAL; 1784 VOP_UNLOCK(vp, 0, td); 1785 goto done; 1786 } 1787 VOP_UNLOCK(vp, 0, td); 1788 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1789 goto done; 1790 if (so->so_type != SOCK_STREAM) { 1791 error = EINVAL; 1792 goto done; 1793 } 1794 if ((so->so_state & SS_ISCONNECTED) == 0) { 1795 error = ENOTCONN; 1796 goto done; 1797 } 1798 if (uap->offset < 0) { 1799 error = EINVAL; 1800 goto done; 1801 } 1802 1803 #ifdef MAC 1804 error = mac_check_socket_send(td->td_ucred, so); 1805 if (error) 1806 goto done; 1807 #endif 1808 1809 /* 1810 * If specified, get the pointer to the sf_hdtr struct for 1811 * any headers/trailers. 1812 */ 1813 if (uap->hdtr != NULL) { 1814 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1815 if (error) 1816 goto done; 1817 /* 1818 * Send any headers. Wimp out and use writev(2). 1819 */ 1820 if (hdtr.headers != NULL) { 1821 nuap.fd = uap->s; 1822 nuap.iovp = hdtr.headers; 1823 nuap.iovcnt = hdtr.hdr_cnt; 1824 error = writev(td, &nuap); 1825 if (error) 1826 goto done; 1827 if (compat) 1828 sbytes += td->td_retval[0]; 1829 else 1830 hdtr_size += td->td_retval[0]; 1831 } 1832 } 1833 1834 /* 1835 * Protect against multiple writers to the socket. 1836 */ 1837 (void) sblock(&so->so_snd, M_WAITOK); 1838 1839 /* 1840 * Loop through the pages in the file, starting with the requested 1841 * offset. Get a file page (do I/O if necessary), map the file page 1842 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1843 * it on the socket. 1844 */ 1845 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1846 vm_pindex_t pindex; 1847 vm_offset_t pgoff; 1848 1849 pindex = OFF_TO_IDX(off); 1850 VM_OBJECT_LOCK(obj); 1851 retry_lookup: 1852 /* 1853 * Calculate the amount to transfer. Not to exceed a page, 1854 * the EOF, or the passed in nbytes. 1855 */ 1856 xfsize = obj->un_pager.vnp.vnp_size - off; 1857 VM_OBJECT_UNLOCK(obj); 1858 if (xfsize > PAGE_SIZE) 1859 xfsize = PAGE_SIZE; 1860 pgoff = (vm_offset_t)(off & PAGE_MASK); 1861 if (PAGE_SIZE - pgoff < xfsize) 1862 xfsize = PAGE_SIZE - pgoff; 1863 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1864 xfsize = uap->nbytes - sbytes; 1865 if (xfsize <= 0) 1866 break; 1867 /* 1868 * Optimize the non-blocking case by looking at the socket space 1869 * before going to the extra work of constituting the sf_buf. 1870 */ 1871 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1872 if (so->so_state & SS_CANTSENDMORE) 1873 error = EPIPE; 1874 else 1875 error = EAGAIN; 1876 sbunlock(&so->so_snd); 1877 goto done; 1878 } 1879 VM_OBJECT_LOCK(obj); 1880 /* 1881 * Attempt to look up the page. 1882 * 1883 * Allocate if not found 1884 * 1885 * Wait and loop if busy. 1886 */ 1887 pg = vm_page_lookup(obj, pindex); 1888 1889 if (pg == NULL) { 1890 pg = vm_page_alloc(obj, pindex, 1891 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1892 if (pg == NULL) { 1893 VM_OBJECT_UNLOCK(obj); 1894 VM_WAIT; 1895 VM_OBJECT_LOCK(obj); 1896 goto retry_lookup; 1897 } 1898 vm_page_lock_queues(); 1899 vm_page_wakeup(pg); 1900 } else { 1901 vm_page_lock_queues(); 1902 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1903 goto retry_lookup; 1904 /* 1905 * Wire the page so it does not get ripped out from 1906 * under us. 1907 */ 1908 vm_page_wire(pg); 1909 } 1910 1911 /* 1912 * If page is not valid for what we need, initiate I/O 1913 */ 1914 1915 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1916 int bsize, resid; 1917 1918 /* 1919 * Ensure that our page is still around when the I/O 1920 * completes. 1921 */ 1922 vm_page_io_start(pg); 1923 vm_page_unlock_queues(); 1924 VM_OBJECT_UNLOCK(obj); 1925 1926 /* 1927 * Get the page from backing store. 1928 */ 1929 bsize = vp->v_mount->mnt_stat.f_iosize; 1930 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1931 /* 1932 * XXXMAC: Because we don't have fp->f_cred here, 1933 * we pass in NOCRED. This is probably wrong, but 1934 * is consistent with our original implementation. 1935 */ 1936 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1937 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1938 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1939 td->td_ucred, NOCRED, &resid, td); 1940 VOP_UNLOCK(vp, 0, td); 1941 if (error) 1942 VM_OBJECT_LOCK(obj); 1943 vm_page_lock_queues(); 1944 vm_page_flag_clear(pg, PG_ZERO); 1945 vm_page_io_finish(pg); 1946 if (error) { 1947 vm_page_unwire(pg, 0); 1948 /* 1949 * See if anyone else might know about this page. 1950 * If not and it is not valid, then free it. 1951 */ 1952 if (pg->wire_count == 0 && pg->valid == 0 && 1953 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1954 pg->hold_count == 0) { 1955 vm_page_busy(pg); 1956 vm_page_free(pg); 1957 } 1958 vm_page_unlock_queues(); 1959 VM_OBJECT_UNLOCK(obj); 1960 sbunlock(&so->so_snd); 1961 goto done; 1962 } 1963 } else 1964 VM_OBJECT_UNLOCK(obj); 1965 vm_page_unlock_queues(); 1966 1967 /* 1968 * Get a sendfile buf. We usually wait as long as necessary, 1969 * but this wait can be interrupted. 1970 */ 1971 if ((sf = sf_buf_alloc(pg)) == NULL) { 1972 vm_page_lock_queues(); 1973 vm_page_unwire(pg, 0); 1974 if (pg->wire_count == 0 && pg->object == NULL) 1975 vm_page_free(pg); 1976 vm_page_unlock_queues(); 1977 sbunlock(&so->so_snd); 1978 error = EINTR; 1979 goto done; 1980 } 1981 1982 /* 1983 * Get an mbuf header and set it up as having external storage. 1984 */ 1985 MGETHDR(m, M_TRYWAIT, MT_DATA); 1986 if (m == NULL) { 1987 error = ENOBUFS; 1988 sf_buf_free((void *)sf->kva, sf); 1989 sbunlock(&so->so_snd); 1990 goto done; 1991 } 1992 /* 1993 * Setup external storage for mbuf. 1994 */ 1995 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, sf, M_RDONLY, 1996 EXT_SFBUF); 1997 m->m_data = (char *) sf->kva + pgoff; 1998 m->m_pkthdr.len = m->m_len = xfsize; 1999 /* 2000 * Add the buffer to the socket buffer chain. 2001 */ 2002 s = splnet(); 2003 retry_space: 2004 /* 2005 * Make sure that the socket is still able to take more data. 2006 * CANTSENDMORE being true usually means that the connection 2007 * was closed. so_error is true when an error was sensed after 2008 * a previous send. 2009 * The state is checked after the page mapping and buffer 2010 * allocation above since those operations may block and make 2011 * any socket checks stale. From this point forward, nothing 2012 * blocks before the pru_send (or more accurately, any blocking 2013 * results in a loop back to here to re-check). 2014 */ 2015 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 2016 if (so->so_state & SS_CANTSENDMORE) { 2017 error = EPIPE; 2018 } else { 2019 error = so->so_error; 2020 so->so_error = 0; 2021 } 2022 m_freem(m); 2023 sbunlock(&so->so_snd); 2024 splx(s); 2025 goto done; 2026 } 2027 /* 2028 * Wait for socket space to become available. We do this just 2029 * after checking the connection state above in order to avoid 2030 * a race condition with sbwait(). 2031 */ 2032 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 2033 if (so->so_state & SS_NBIO) { 2034 m_freem(m); 2035 sbunlock(&so->so_snd); 2036 splx(s); 2037 error = EAGAIN; 2038 goto done; 2039 } 2040 error = sbwait(&so->so_snd); 2041 /* 2042 * An error from sbwait usually indicates that we've 2043 * been interrupted by a signal. If we've sent anything 2044 * then return bytes sent, otherwise return the error. 2045 */ 2046 if (error) { 2047 m_freem(m); 2048 sbunlock(&so->so_snd); 2049 splx(s); 2050 goto done; 2051 } 2052 goto retry_space; 2053 } 2054 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2055 splx(s); 2056 if (error) { 2057 sbunlock(&so->so_snd); 2058 goto done; 2059 } 2060 } 2061 sbunlock(&so->so_snd); 2062 2063 /* 2064 * Send trailers. Wimp out and use writev(2). 2065 */ 2066 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 2067 nuap.fd = uap->s; 2068 nuap.iovp = hdtr.trailers; 2069 nuap.iovcnt = hdtr.trl_cnt; 2070 error = writev(td, &nuap); 2071 if (error) 2072 goto done; 2073 if (compat) 2074 sbytes += td->td_retval[0]; 2075 else 2076 hdtr_size += td->td_retval[0]; 2077 } 2078 2079 done: 2080 /* 2081 * If there was no error we have to clear td->td_retval[0] 2082 * because it may have been set by writev. 2083 */ 2084 if (error == 0) { 2085 td->td_retval[0] = 0; 2086 } 2087 if (uap->sbytes != NULL) { 2088 if (!compat) 2089 sbytes += hdtr_size; 2090 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2091 } 2092 if (vp) 2093 vrele(vp); 2094 if (so) 2095 fputsock(so); 2096 mtx_unlock(&Giant); 2097 return (error); 2098 } 2099