1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 * $FreeBSD$ 38 */ 39 40 #include "opt_compat.h" 41 #include "opt_ktrace.h" 42 #include "opt_mac.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/mac.h> 49 #include <sys/mutex.h> 50 #include <sys/sysproto.h> 51 #include <sys/malloc.h> 52 #include <sys/filedesc.h> 53 #include <sys/event.h> 54 #include <sys/proc.h> 55 #include <sys/fcntl.h> 56 #include <sys/file.h> 57 #include <sys/filio.h> 58 #include <sys/mount.h> 59 #include <sys/mbuf.h> 60 #include <sys/protosw.h> 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 #include <sys/signalvar.h> 64 #include <sys/syscallsubr.h> 65 #include <sys/uio.h> 66 #include <sys/vnode.h> 67 #ifdef KTRACE 68 #include <sys/ktrace.h> 69 #endif 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 #include <vm/vm_pageout.h> 75 #include <vm/vm_kern.h> 76 #include <vm/vm_extern.h> 77 78 static void sf_buf_init(void *arg); 79 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) 80 81 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 82 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 83 84 static int accept1(struct thread *td, struct accept_args *uap, int compat); 85 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 86 static int getsockname1(struct thread *td, struct getsockname_args *uap, 87 int compat); 88 static int getpeername1(struct thread *td, struct getpeername_args *uap, 89 int compat); 90 91 /* 92 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the 93 * sf_freelist head with the sf_lock mutex. 94 */ 95 static struct { 96 SLIST_HEAD(, sf_buf) sf_head; 97 struct mtx sf_lock; 98 } sf_freelist; 99 100 static u_int sf_buf_alloc_want; 101 102 /* 103 * System call interface to the socket abstraction. 104 */ 105 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 106 #define COMPAT_OLDSOCK 107 #endif 108 109 /* 110 * MPSAFE 111 */ 112 int 113 socket(td, uap) 114 struct thread *td; 115 register struct socket_args /* { 116 int domain; 117 int type; 118 int protocol; 119 } */ *uap; 120 { 121 struct filedesc *fdp; 122 struct socket *so; 123 struct file *fp; 124 int fd, error; 125 126 mtx_lock(&Giant); 127 fdp = td->td_proc->p_fd; 128 error = falloc(td, &fp, &fd); 129 if (error) 130 goto done2; 131 fhold(fp); 132 error = socreate(uap->domain, &so, uap->type, uap->protocol, 133 td->td_ucred, td); 134 FILEDESC_LOCK(fdp); 135 if (error) { 136 if (fdp->fd_ofiles[fd] == fp) { 137 fdp->fd_ofiles[fd] = NULL; 138 FILEDESC_UNLOCK(fdp); 139 fdrop(fp, td); 140 } else 141 FILEDESC_UNLOCK(fdp); 142 } else { 143 fp->f_data = so; /* already has ref count */ 144 fp->f_flag = FREAD|FWRITE; 145 fp->f_ops = &socketops; 146 fp->f_type = DTYPE_SOCKET; 147 FILEDESC_UNLOCK(fdp); 148 td->td_retval[0] = fd; 149 } 150 fdrop(fp, td); 151 done2: 152 mtx_unlock(&Giant); 153 return (error); 154 } 155 156 /* 157 * MPSAFE 158 */ 159 /* ARGSUSED */ 160 int 161 bind(td, uap) 162 struct thread *td; 163 register struct bind_args /* { 164 int s; 165 caddr_t name; 166 int namelen; 167 } */ *uap; 168 { 169 struct sockaddr *sa; 170 int error; 171 172 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 173 return (error); 174 175 return (kern_bind(td, uap->s, sa)); 176 } 177 178 int 179 kern_bind(td, fd, sa) 180 struct thread *td; 181 int fd; 182 struct sockaddr *sa; 183 { 184 struct socket *so; 185 int error; 186 187 mtx_lock(&Giant); 188 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 189 goto done2; 190 #ifdef MAC 191 error = mac_check_socket_bind(td->td_ucred, so, sa); 192 if (error) 193 goto done1; 194 #endif 195 error = sobind(so, sa, td); 196 #ifdef MAC 197 done1: 198 #endif 199 fputsock(so); 200 done2: 201 mtx_unlock(&Giant); 202 FREE(sa, M_SONAME); 203 return (error); 204 } 205 206 /* 207 * MPSAFE 208 */ 209 /* ARGSUSED */ 210 int 211 listen(td, uap) 212 struct thread *td; 213 register struct listen_args /* { 214 int s; 215 int backlog; 216 } */ *uap; 217 { 218 struct socket *so; 219 int error; 220 221 mtx_lock(&Giant); 222 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 223 #ifdef MAC 224 error = mac_check_socket_listen(td->td_ucred, so); 225 if (error) 226 goto done; 227 #endif 228 error = solisten(so, uap->backlog, td); 229 #ifdef MAC 230 done: 231 #endif 232 fputsock(so); 233 } 234 mtx_unlock(&Giant); 235 return(error); 236 } 237 238 /* 239 * accept1() 240 * MPSAFE 241 */ 242 static int 243 accept1(td, uap, compat) 244 struct thread *td; 245 register struct accept_args /* { 246 int s; 247 caddr_t name; 248 int *anamelen; 249 } */ *uap; 250 int compat; 251 { 252 struct filedesc *fdp; 253 struct file *nfp = NULL; 254 struct sockaddr *sa; 255 int namelen, error, s; 256 struct socket *head, *so; 257 int fd; 258 u_int fflag; 259 pid_t pgid; 260 int tmp; 261 262 mtx_lock(&Giant); 263 fdp = td->td_proc->p_fd; 264 if (uap->name) { 265 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 266 if(error) 267 goto done2; 268 if (namelen < 0) { 269 error = EINVAL; 270 goto done2; 271 } 272 } 273 error = fgetsock(td, uap->s, &head, &fflag); 274 if (error) 275 goto done2; 276 s = splnet(); 277 if ((head->so_options & SO_ACCEPTCONN) == 0) { 278 splx(s); 279 error = EINVAL; 280 goto done; 281 } 282 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 283 if (head->so_state & SS_CANTRCVMORE) { 284 head->so_error = ECONNABORTED; 285 break; 286 } 287 if ((head->so_state & SS_NBIO) != 0) { 288 head->so_error = EWOULDBLOCK; 289 break; 290 } 291 error = tsleep(&head->so_timeo, PSOCK | PCATCH, 292 "accept", 0); 293 if (error) { 294 splx(s); 295 goto done; 296 } 297 } 298 if (head->so_error) { 299 error = head->so_error; 300 head->so_error = 0; 301 splx(s); 302 goto done; 303 } 304 305 /* 306 * At this point we know that there is at least one connection 307 * ready to be accepted. Remove it from the queue prior to 308 * allocating the file descriptor for it since falloc() may 309 * block allowing another process to accept the connection 310 * instead. 311 */ 312 so = TAILQ_FIRST(&head->so_comp); 313 TAILQ_REMOVE(&head->so_comp, so, so_list); 314 head->so_qlen--; 315 316 error = falloc(td, &nfp, &fd); 317 if (error) { 318 /* 319 * Probably ran out of file descriptors. Put the 320 * unaccepted connection back onto the queue and 321 * do another wakeup so some other process might 322 * have a chance at it. 323 */ 324 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 325 head->so_qlen++; 326 wakeup_one(&head->so_timeo); 327 splx(s); 328 goto done; 329 } 330 fhold(nfp); 331 td->td_retval[0] = fd; 332 333 /* connection has been removed from the listen queue */ 334 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 335 336 so->so_state &= ~SS_COMP; 337 so->so_head = NULL; 338 pgid = fgetown(&head->so_sigio); 339 if (pgid != 0) 340 fsetown(pgid, &so->so_sigio); 341 342 FILE_LOCK(nfp); 343 soref(so); /* file descriptor reference */ 344 nfp->f_data = so; /* nfp has ref count from falloc */ 345 nfp->f_flag = fflag; 346 nfp->f_ops = &socketops; 347 nfp->f_type = DTYPE_SOCKET; 348 FILE_UNLOCK(nfp); 349 /* Sync socket nonblocking/async state with file flags */ 350 tmp = fflag & FNONBLOCK; 351 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 352 tmp = fflag & FASYNC; 353 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 354 sa = 0; 355 error = soaccept(so, &sa); 356 if (error) { 357 /* 358 * return a namelen of zero for older code which might 359 * ignore the return value from accept. 360 */ 361 if (uap->name != NULL) { 362 namelen = 0; 363 (void) copyout(&namelen, 364 uap->anamelen, sizeof(*uap->anamelen)); 365 } 366 goto noconnection; 367 } 368 if (sa == NULL) { 369 namelen = 0; 370 if (uap->name) 371 goto gotnoname; 372 splx(s); 373 error = 0; 374 goto done; 375 } 376 if (uap->name) { 377 /* check sa_len before it is destroyed */ 378 if (namelen > sa->sa_len) 379 namelen = sa->sa_len; 380 #ifdef COMPAT_OLDSOCK 381 if (compat) 382 ((struct osockaddr *)sa)->sa_family = 383 sa->sa_family; 384 #endif 385 error = copyout(sa, uap->name, (u_int)namelen); 386 if (!error) 387 gotnoname: 388 error = copyout(&namelen, 389 uap->anamelen, sizeof (*uap->anamelen)); 390 } 391 noconnection: 392 if (sa) 393 FREE(sa, M_SONAME); 394 395 /* 396 * close the new descriptor, assuming someone hasn't ripped it 397 * out from under us. 398 */ 399 if (error) { 400 FILEDESC_LOCK(fdp); 401 if (fdp->fd_ofiles[fd] == nfp) { 402 fdp->fd_ofiles[fd] = NULL; 403 FILEDESC_UNLOCK(fdp); 404 fdrop(nfp, td); 405 } else { 406 FILEDESC_UNLOCK(fdp); 407 } 408 } 409 splx(s); 410 411 /* 412 * Release explicitly held references before returning. 413 */ 414 done: 415 if (nfp != NULL) 416 fdrop(nfp, td); 417 fputsock(head); 418 done2: 419 mtx_unlock(&Giant); 420 return (error); 421 } 422 423 /* 424 * MPSAFE (accept1() is MPSAFE) 425 */ 426 int 427 accept(td, uap) 428 struct thread *td; 429 struct accept_args *uap; 430 { 431 432 return (accept1(td, uap, 0)); 433 } 434 435 #ifdef COMPAT_OLDSOCK 436 /* 437 * MPSAFE (accept1() is MPSAFE) 438 */ 439 int 440 oaccept(td, uap) 441 struct thread *td; 442 struct accept_args *uap; 443 { 444 445 return (accept1(td, uap, 1)); 446 } 447 #endif /* COMPAT_OLDSOCK */ 448 449 /* 450 * MPSAFE 451 */ 452 /* ARGSUSED */ 453 int 454 connect(td, uap) 455 struct thread *td; 456 register struct connect_args /* { 457 int s; 458 caddr_t name; 459 int namelen; 460 } */ *uap; 461 { 462 struct sockaddr *sa; 463 int error; 464 465 error = getsockaddr(&sa, uap->name, uap->namelen); 466 if (error) 467 return error; 468 469 return (kern_connect(td, uap->s, sa)); 470 } 471 472 473 int 474 kern_connect(td, fd, sa) 475 struct thread *td; 476 int fd; 477 struct sockaddr *sa; 478 { 479 struct socket *so; 480 int error, s; 481 482 mtx_lock(&Giant); 483 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 484 goto done2; 485 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 486 error = EALREADY; 487 goto done1; 488 } 489 #ifdef MAC 490 error = mac_check_socket_connect(td->td_ucred, so, sa); 491 if (error) 492 goto bad; 493 #endif 494 error = soconnect(so, sa, td); 495 if (error) 496 goto bad; 497 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 498 error = EINPROGRESS; 499 goto done1; 500 } 501 s = splnet(); 502 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 503 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0); 504 if (error) 505 break; 506 } 507 if (error == 0) { 508 error = so->so_error; 509 so->so_error = 0; 510 } 511 splx(s); 512 bad: 513 so->so_state &= ~SS_ISCONNECTING; 514 if (error == ERESTART) 515 error = EINTR; 516 done1: 517 fputsock(so); 518 done2: 519 mtx_unlock(&Giant); 520 FREE(sa, M_SONAME); 521 return (error); 522 } 523 524 /* 525 * MPSAFE 526 */ 527 int 528 socketpair(td, uap) 529 struct thread *td; 530 register struct socketpair_args /* { 531 int domain; 532 int type; 533 int protocol; 534 int *rsv; 535 } */ *uap; 536 { 537 register struct filedesc *fdp = td->td_proc->p_fd; 538 struct file *fp1, *fp2; 539 struct socket *so1, *so2; 540 int fd, error, sv[2]; 541 542 mtx_lock(&Giant); 543 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 544 td->td_ucred, td); 545 if (error) 546 goto done2; 547 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 548 td->td_ucred, td); 549 if (error) 550 goto free1; 551 error = falloc(td, &fp1, &fd); 552 if (error) 553 goto free2; 554 fhold(fp1); 555 sv[0] = fd; 556 fp1->f_data = so1; /* so1 already has ref count */ 557 error = falloc(td, &fp2, &fd); 558 if (error) 559 goto free3; 560 fhold(fp2); 561 fp2->f_data = so2; /* so2 already has ref count */ 562 sv[1] = fd; 563 error = soconnect2(so1, so2); 564 if (error) 565 goto free4; 566 if (uap->type == SOCK_DGRAM) { 567 /* 568 * Datagram socket connection is asymmetric. 569 */ 570 error = soconnect2(so2, so1); 571 if (error) 572 goto free4; 573 } 574 FILE_LOCK(fp1); 575 fp1->f_flag = FREAD|FWRITE; 576 fp1->f_ops = &socketops; 577 fp1->f_type = DTYPE_SOCKET; 578 FILE_UNLOCK(fp1); 579 FILE_LOCK(fp2); 580 fp2->f_flag = FREAD|FWRITE; 581 fp2->f_ops = &socketops; 582 fp2->f_type = DTYPE_SOCKET; 583 FILE_UNLOCK(fp2); 584 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 585 fdrop(fp1, td); 586 fdrop(fp2, td); 587 goto done2; 588 free4: 589 FILEDESC_LOCK(fdp); 590 if (fdp->fd_ofiles[sv[1]] == fp2) { 591 fdp->fd_ofiles[sv[1]] = NULL; 592 FILEDESC_UNLOCK(fdp); 593 fdrop(fp2, td); 594 } else 595 FILEDESC_UNLOCK(fdp); 596 fdrop(fp2, td); 597 free3: 598 FILEDESC_LOCK(fdp); 599 if (fdp->fd_ofiles[sv[0]] == fp1) { 600 fdp->fd_ofiles[sv[0]] = NULL; 601 FILEDESC_UNLOCK(fdp); 602 fdrop(fp1, td); 603 } else 604 FILEDESC_UNLOCK(fdp); 605 fdrop(fp1, td); 606 free2: 607 (void)soclose(so2); 608 free1: 609 (void)soclose(so1); 610 done2: 611 mtx_unlock(&Giant); 612 return (error); 613 } 614 615 static int 616 sendit(td, s, mp, flags) 617 register struct thread *td; 618 int s; 619 register struct msghdr *mp; 620 int flags; 621 { 622 struct mbuf *control; 623 struct sockaddr *to; 624 int error; 625 626 mtx_lock(&Giant); 627 if (mp->msg_name != NULL) { 628 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 629 if (error) { 630 to = NULL; 631 goto bad; 632 } 633 mp->msg_name = to; 634 } else 635 to = NULL; 636 637 if (mp->msg_control) { 638 if (mp->msg_controllen < sizeof(struct cmsghdr) 639 #ifdef COMPAT_OLDSOCK 640 && mp->msg_flags != MSG_COMPAT 641 #endif 642 ) { 643 error = EINVAL; 644 goto bad; 645 } 646 error = sockargs(&control, mp->msg_control, 647 mp->msg_controllen, MT_CONTROL); 648 if (error) 649 goto bad; 650 #ifdef COMPAT_OLDSOCK 651 if (mp->msg_flags == MSG_COMPAT) { 652 register struct cmsghdr *cm; 653 654 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 655 if (control == 0) { 656 error = ENOBUFS; 657 goto bad; 658 } else { 659 cm = mtod(control, struct cmsghdr *); 660 cm->cmsg_len = control->m_len; 661 cm->cmsg_level = SOL_SOCKET; 662 cm->cmsg_type = SCM_RIGHTS; 663 } 664 } 665 #endif 666 } else { 667 control = NULL; 668 } 669 670 error = kern_sendit(td, s, mp, flags, control); 671 672 bad: 673 if (to) 674 FREE(to, M_SONAME); 675 mtx_unlock(&Giant); 676 return (error); 677 } 678 679 int 680 kern_sendit(td, s, mp, flags, control) 681 struct thread *td; 682 int s; 683 struct msghdr *mp; 684 int flags; 685 struct mbuf *control; 686 { 687 struct uio auio; 688 struct iovec *iov; 689 struct socket *so; 690 int i; 691 int len, error; 692 #ifdef KTRACE 693 struct iovec *ktriov = NULL; 694 struct uio ktruio; 695 int iovlen; 696 #endif 697 698 if ((error = fgetsock(td, s, &so, NULL)) != 0) 699 goto bad2; 700 701 #ifdef MAC 702 error = mac_check_socket_send(td->td_ucred, so); 703 if (error) 704 goto bad; 705 #endif 706 707 auio.uio_iov = mp->msg_iov; 708 auio.uio_iovcnt = mp->msg_iovlen; 709 auio.uio_segflg = UIO_USERSPACE; 710 auio.uio_rw = UIO_WRITE; 711 auio.uio_td = td; 712 auio.uio_offset = 0; /* XXX */ 713 auio.uio_resid = 0; 714 iov = mp->msg_iov; 715 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 716 if ((auio.uio_resid += iov->iov_len) < 0) { 717 error = EINVAL; 718 goto bad; 719 } 720 } 721 #ifdef KTRACE 722 if (KTRPOINT(td, KTR_GENIO)) { 723 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 724 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 725 bcopy(auio.uio_iov, ktriov, iovlen); 726 ktruio = auio; 727 } 728 #endif 729 len = auio.uio_resid; 730 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 731 0, control, flags, td); 732 if (error) { 733 if (auio.uio_resid != len && (error == ERESTART || 734 error == EINTR || error == EWOULDBLOCK)) 735 error = 0; 736 /* Generation of SIGPIPE can be controlled per socket */ 737 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 738 PROC_LOCK(td->td_proc); 739 psignal(td->td_proc, SIGPIPE); 740 PROC_UNLOCK(td->td_proc); 741 } 742 } 743 if (error == 0) 744 td->td_retval[0] = len - auio.uio_resid; 745 #ifdef KTRACE 746 if (ktriov != NULL) { 747 if (error == 0) { 748 ktruio.uio_iov = ktriov; 749 ktruio.uio_resid = td->td_retval[0]; 750 ktrgenio(s, UIO_WRITE, &ktruio, error); 751 } 752 FREE(ktriov, M_TEMP); 753 } 754 #endif 755 bad: 756 fputsock(so); 757 bad2: 758 return (error); 759 } 760 761 /* 762 * MPSAFE 763 */ 764 int 765 sendto(td, uap) 766 struct thread *td; 767 register struct sendto_args /* { 768 int s; 769 caddr_t buf; 770 size_t len; 771 int flags; 772 caddr_t to; 773 int tolen; 774 } */ *uap; 775 { 776 struct msghdr msg; 777 struct iovec aiov; 778 int error; 779 780 msg.msg_name = uap->to; 781 msg.msg_namelen = uap->tolen; 782 msg.msg_iov = &aiov; 783 msg.msg_iovlen = 1; 784 msg.msg_control = 0; 785 #ifdef COMPAT_OLDSOCK 786 msg.msg_flags = 0; 787 #endif 788 aiov.iov_base = uap->buf; 789 aiov.iov_len = uap->len; 790 error = sendit(td, uap->s, &msg, uap->flags); 791 return (error); 792 } 793 794 #ifdef COMPAT_OLDSOCK 795 /* 796 * MPSAFE 797 */ 798 int 799 osend(td, uap) 800 struct thread *td; 801 register struct osend_args /* { 802 int s; 803 caddr_t buf; 804 int len; 805 int flags; 806 } */ *uap; 807 { 808 struct msghdr msg; 809 struct iovec aiov; 810 int error; 811 812 msg.msg_name = 0; 813 msg.msg_namelen = 0; 814 msg.msg_iov = &aiov; 815 msg.msg_iovlen = 1; 816 aiov.iov_base = uap->buf; 817 aiov.iov_len = uap->len; 818 msg.msg_control = 0; 819 msg.msg_flags = 0; 820 error = sendit(td, uap->s, &msg, uap->flags); 821 return (error); 822 } 823 824 /* 825 * MPSAFE 826 */ 827 int 828 osendmsg(td, uap) 829 struct thread *td; 830 register struct osendmsg_args /* { 831 int s; 832 caddr_t msg; 833 int flags; 834 } */ *uap; 835 { 836 struct msghdr msg; 837 struct iovec aiov[UIO_SMALLIOV], *iov; 838 int error; 839 840 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 841 if (error) 842 goto done2; 843 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 844 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 845 error = EMSGSIZE; 846 goto done2; 847 } 848 MALLOC(iov, struct iovec *, 849 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 850 M_WAITOK); 851 } else { 852 iov = aiov; 853 } 854 error = copyin(msg.msg_iov, iov, 855 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 856 if (error) 857 goto done; 858 msg.msg_flags = MSG_COMPAT; 859 msg.msg_iov = iov; 860 error = sendit(td, uap->s, &msg, uap->flags); 861 done: 862 if (iov != aiov) 863 FREE(iov, M_IOV); 864 done2: 865 return (error); 866 } 867 #endif 868 869 /* 870 * MPSAFE 871 */ 872 int 873 sendmsg(td, uap) 874 struct thread *td; 875 register struct sendmsg_args /* { 876 int s; 877 caddr_t msg; 878 int flags; 879 } */ *uap; 880 { 881 struct msghdr msg; 882 struct iovec aiov[UIO_SMALLIOV], *iov; 883 int error; 884 885 error = copyin(uap->msg, &msg, sizeof (msg)); 886 if (error) 887 goto done2; 888 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 889 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 890 error = EMSGSIZE; 891 goto done2; 892 } 893 MALLOC(iov, struct iovec *, 894 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 895 M_WAITOK); 896 } else { 897 iov = aiov; 898 } 899 if (msg.msg_iovlen && 900 (error = copyin(msg.msg_iov, iov, 901 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 902 goto done; 903 msg.msg_iov = iov; 904 #ifdef COMPAT_OLDSOCK 905 msg.msg_flags = 0; 906 #endif 907 error = sendit(td, uap->s, &msg, uap->flags); 908 done: 909 if (iov != aiov) 910 FREE(iov, M_IOV); 911 done2: 912 return (error); 913 } 914 915 static int 916 recvit(td, s, mp, namelenp) 917 register struct thread *td; 918 int s; 919 register struct msghdr *mp; 920 void *namelenp; 921 { 922 struct uio auio; 923 register struct iovec *iov; 924 register int i; 925 int len, error; 926 struct mbuf *m, *control = 0; 927 caddr_t ctlbuf; 928 struct socket *so; 929 struct sockaddr *fromsa = 0; 930 #ifdef KTRACE 931 struct iovec *ktriov = NULL; 932 struct uio ktruio; 933 int iovlen; 934 #endif 935 936 if ((error = fgetsock(td, s, &so, NULL)) != 0) 937 return (error); 938 939 #ifdef MAC 940 error = mac_check_socket_receive(td->td_ucred, so); 941 if (error) { 942 fputsock(so); 943 return (error); 944 } 945 #endif 946 947 auio.uio_iov = mp->msg_iov; 948 auio.uio_iovcnt = mp->msg_iovlen; 949 auio.uio_segflg = UIO_USERSPACE; 950 auio.uio_rw = UIO_READ; 951 auio.uio_td = td; 952 auio.uio_offset = 0; /* XXX */ 953 auio.uio_resid = 0; 954 iov = mp->msg_iov; 955 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 956 if ((auio.uio_resid += iov->iov_len) < 0) { 957 fputsock(so); 958 return (EINVAL); 959 } 960 } 961 #ifdef KTRACE 962 if (KTRPOINT(td, KTR_GENIO)) { 963 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 964 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 965 bcopy(auio.uio_iov, ktriov, iovlen); 966 ktruio = auio; 967 } 968 #endif 969 len = auio.uio_resid; 970 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 971 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 972 &mp->msg_flags); 973 if (error) { 974 if (auio.uio_resid != len && (error == ERESTART || 975 error == EINTR || error == EWOULDBLOCK)) 976 error = 0; 977 } 978 #ifdef KTRACE 979 if (ktriov != NULL) { 980 if (error == 0) { 981 ktruio.uio_iov = ktriov; 982 ktruio.uio_resid = len - auio.uio_resid; 983 ktrgenio(s, UIO_READ, &ktruio, error); 984 } 985 FREE(ktriov, M_TEMP); 986 } 987 #endif 988 if (error) 989 goto out; 990 td->td_retval[0] = len - auio.uio_resid; 991 if (mp->msg_name) { 992 len = mp->msg_namelen; 993 if (len <= 0 || fromsa == 0) 994 len = 0; 995 else { 996 /* save sa_len before it is destroyed by MSG_COMPAT */ 997 len = MIN(len, fromsa->sa_len); 998 #ifdef COMPAT_OLDSOCK 999 if (mp->msg_flags & MSG_COMPAT) 1000 ((struct osockaddr *)fromsa)->sa_family = 1001 fromsa->sa_family; 1002 #endif 1003 error = copyout(fromsa, mp->msg_name, (unsigned)len); 1004 if (error) 1005 goto out; 1006 } 1007 mp->msg_namelen = len; 1008 if (namelenp && 1009 (error = copyout(&len, namelenp, sizeof (int)))) { 1010 #ifdef COMPAT_OLDSOCK 1011 if (mp->msg_flags & MSG_COMPAT) 1012 error = 0; /* old recvfrom didn't check */ 1013 else 1014 #endif 1015 goto out; 1016 } 1017 } 1018 if (mp->msg_control) { 1019 #ifdef COMPAT_OLDSOCK 1020 /* 1021 * We assume that old recvmsg calls won't receive access 1022 * rights and other control info, esp. as control info 1023 * is always optional and those options didn't exist in 4.3. 1024 * If we receive rights, trim the cmsghdr; anything else 1025 * is tossed. 1026 */ 1027 if (control && mp->msg_flags & MSG_COMPAT) { 1028 if (mtod(control, struct cmsghdr *)->cmsg_level != 1029 SOL_SOCKET || 1030 mtod(control, struct cmsghdr *)->cmsg_type != 1031 SCM_RIGHTS) { 1032 mp->msg_controllen = 0; 1033 goto out; 1034 } 1035 control->m_len -= sizeof (struct cmsghdr); 1036 control->m_data += sizeof (struct cmsghdr); 1037 } 1038 #endif 1039 len = mp->msg_controllen; 1040 m = control; 1041 mp->msg_controllen = 0; 1042 ctlbuf = mp->msg_control; 1043 1044 while (m && len > 0) { 1045 unsigned int tocopy; 1046 1047 if (len >= m->m_len) 1048 tocopy = m->m_len; 1049 else { 1050 mp->msg_flags |= MSG_CTRUNC; 1051 tocopy = len; 1052 } 1053 1054 if ((error = copyout(mtod(m, caddr_t), 1055 ctlbuf, tocopy)) != 0) 1056 goto out; 1057 1058 ctlbuf += tocopy; 1059 len -= tocopy; 1060 m = m->m_next; 1061 } 1062 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1063 } 1064 out: 1065 fputsock(so); 1066 if (fromsa) 1067 FREE(fromsa, M_SONAME); 1068 if (control) 1069 m_freem(control); 1070 return (error); 1071 } 1072 1073 /* 1074 * MPSAFE 1075 */ 1076 int 1077 recvfrom(td, uap) 1078 struct thread *td; 1079 register struct recvfrom_args /* { 1080 int s; 1081 caddr_t buf; 1082 size_t len; 1083 int flags; 1084 caddr_t from; 1085 int *fromlenaddr; 1086 } */ *uap; 1087 { 1088 struct msghdr msg; 1089 struct iovec aiov; 1090 int error; 1091 1092 mtx_lock(&Giant); 1093 if (uap->fromlenaddr) { 1094 error = copyin(uap->fromlenaddr, 1095 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1096 if (error) 1097 goto done2; 1098 } else { 1099 msg.msg_namelen = 0; 1100 } 1101 msg.msg_name = uap->from; 1102 msg.msg_iov = &aiov; 1103 msg.msg_iovlen = 1; 1104 aiov.iov_base = uap->buf; 1105 aiov.iov_len = uap->len; 1106 msg.msg_control = 0; 1107 msg.msg_flags = uap->flags; 1108 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1109 done2: 1110 mtx_unlock(&Giant); 1111 return(error); 1112 } 1113 1114 #ifdef COMPAT_OLDSOCK 1115 /* 1116 * MPSAFE 1117 */ 1118 int 1119 orecvfrom(td, uap) 1120 struct thread *td; 1121 struct recvfrom_args *uap; 1122 { 1123 1124 uap->flags |= MSG_COMPAT; 1125 return (recvfrom(td, uap)); 1126 } 1127 #endif 1128 1129 1130 #ifdef COMPAT_OLDSOCK 1131 /* 1132 * MPSAFE 1133 */ 1134 int 1135 orecv(td, uap) 1136 struct thread *td; 1137 register struct orecv_args /* { 1138 int s; 1139 caddr_t buf; 1140 int len; 1141 int flags; 1142 } */ *uap; 1143 { 1144 struct msghdr msg; 1145 struct iovec aiov; 1146 int error; 1147 1148 mtx_lock(&Giant); 1149 msg.msg_name = 0; 1150 msg.msg_namelen = 0; 1151 msg.msg_iov = &aiov; 1152 msg.msg_iovlen = 1; 1153 aiov.iov_base = uap->buf; 1154 aiov.iov_len = uap->len; 1155 msg.msg_control = 0; 1156 msg.msg_flags = uap->flags; 1157 error = recvit(td, uap->s, &msg, NULL); 1158 mtx_unlock(&Giant); 1159 return (error); 1160 } 1161 1162 /* 1163 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1164 * overlays the new one, missing only the flags, and with the (old) access 1165 * rights where the control fields are now. 1166 * 1167 * MPSAFE 1168 */ 1169 int 1170 orecvmsg(td, uap) 1171 struct thread *td; 1172 register struct orecvmsg_args /* { 1173 int s; 1174 struct omsghdr *msg; 1175 int flags; 1176 } */ *uap; 1177 { 1178 struct msghdr msg; 1179 struct iovec aiov[UIO_SMALLIOV], *iov; 1180 int error; 1181 1182 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1183 if (error) 1184 return (error); 1185 1186 mtx_lock(&Giant); 1187 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1188 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1189 error = EMSGSIZE; 1190 goto done2; 1191 } 1192 MALLOC(iov, struct iovec *, 1193 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1194 M_WAITOK); 1195 } else { 1196 iov = aiov; 1197 } 1198 msg.msg_flags = uap->flags | MSG_COMPAT; 1199 error = copyin(msg.msg_iov, iov, 1200 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1201 if (error) 1202 goto done; 1203 msg.msg_iov = iov; 1204 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1205 1206 if (msg.msg_controllen && error == 0) 1207 error = copyout(&msg.msg_controllen, 1208 &uap->msg->msg_accrightslen, sizeof (int)); 1209 done: 1210 if (iov != aiov) 1211 FREE(iov, M_IOV); 1212 done2: 1213 mtx_unlock(&Giant); 1214 return (error); 1215 } 1216 #endif 1217 1218 /* 1219 * MPSAFE 1220 */ 1221 int 1222 recvmsg(td, uap) 1223 struct thread *td; 1224 register struct recvmsg_args /* { 1225 int s; 1226 struct msghdr *msg; 1227 int flags; 1228 } */ *uap; 1229 { 1230 struct msghdr msg; 1231 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1232 register int error; 1233 1234 mtx_lock(&Giant); 1235 error = copyin(uap->msg, &msg, sizeof (msg)); 1236 if (error) 1237 goto done2; 1238 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1239 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1240 error = EMSGSIZE; 1241 goto done2; 1242 } 1243 MALLOC(iov, struct iovec *, 1244 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1245 M_WAITOK); 1246 } else { 1247 iov = aiov; 1248 } 1249 #ifdef COMPAT_OLDSOCK 1250 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1251 #else 1252 msg.msg_flags = uap->flags; 1253 #endif 1254 uiov = msg.msg_iov; 1255 msg.msg_iov = iov; 1256 error = copyin(uiov, iov, 1257 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1258 if (error) 1259 goto done; 1260 error = recvit(td, uap->s, &msg, NULL); 1261 if (!error) { 1262 msg.msg_iov = uiov; 1263 error = copyout(&msg, uap->msg, sizeof(msg)); 1264 } 1265 done: 1266 if (iov != aiov) 1267 FREE(iov, M_IOV); 1268 done2: 1269 mtx_unlock(&Giant); 1270 return (error); 1271 } 1272 1273 /* 1274 * MPSAFE 1275 */ 1276 /* ARGSUSED */ 1277 int 1278 shutdown(td, uap) 1279 struct thread *td; 1280 register struct shutdown_args /* { 1281 int s; 1282 int how; 1283 } */ *uap; 1284 { 1285 struct socket *so; 1286 int error; 1287 1288 mtx_lock(&Giant); 1289 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1290 error = soshutdown(so, uap->how); 1291 fputsock(so); 1292 } 1293 mtx_unlock(&Giant); 1294 return(error); 1295 } 1296 1297 /* 1298 * MPSAFE 1299 */ 1300 /* ARGSUSED */ 1301 int 1302 setsockopt(td, uap) 1303 struct thread *td; 1304 register struct setsockopt_args /* { 1305 int s; 1306 int level; 1307 int name; 1308 caddr_t val; 1309 int valsize; 1310 } */ *uap; 1311 { 1312 struct socket *so; 1313 struct sockopt sopt; 1314 int error; 1315 1316 if (uap->val == 0 && uap->valsize != 0) 1317 return (EFAULT); 1318 if (uap->valsize < 0) 1319 return (EINVAL); 1320 1321 mtx_lock(&Giant); 1322 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1323 sopt.sopt_dir = SOPT_SET; 1324 sopt.sopt_level = uap->level; 1325 sopt.sopt_name = uap->name; 1326 sopt.sopt_val = uap->val; 1327 sopt.sopt_valsize = uap->valsize; 1328 sopt.sopt_td = td; 1329 error = sosetopt(so, &sopt); 1330 fputsock(so); 1331 } 1332 mtx_unlock(&Giant); 1333 return(error); 1334 } 1335 1336 /* 1337 * MPSAFE 1338 */ 1339 /* ARGSUSED */ 1340 int 1341 getsockopt(td, uap) 1342 struct thread *td; 1343 register struct getsockopt_args /* { 1344 int s; 1345 int level; 1346 int name; 1347 caddr_t val; 1348 int *avalsize; 1349 } */ *uap; 1350 { 1351 int valsize, error; 1352 struct socket *so; 1353 struct sockopt sopt; 1354 1355 mtx_lock(&Giant); 1356 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1357 goto done2; 1358 if (uap->val) { 1359 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1360 if (error) 1361 goto done1; 1362 if (valsize < 0) { 1363 error = EINVAL; 1364 goto done1; 1365 } 1366 } else { 1367 valsize = 0; 1368 } 1369 1370 sopt.sopt_dir = SOPT_GET; 1371 sopt.sopt_level = uap->level; 1372 sopt.sopt_name = uap->name; 1373 sopt.sopt_val = uap->val; 1374 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1375 sopt.sopt_td = td; 1376 1377 error = sogetopt(so, &sopt); 1378 if (error == 0) { 1379 valsize = sopt.sopt_valsize; 1380 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1381 } 1382 done1: 1383 fputsock(so); 1384 done2: 1385 mtx_unlock(&Giant); 1386 return (error); 1387 } 1388 1389 /* 1390 * getsockname1() - Get socket name. 1391 * 1392 * MPSAFE 1393 */ 1394 /* ARGSUSED */ 1395 static int 1396 getsockname1(td, uap, compat) 1397 struct thread *td; 1398 register struct getsockname_args /* { 1399 int fdes; 1400 caddr_t asa; 1401 int *alen; 1402 } */ *uap; 1403 int compat; 1404 { 1405 struct socket *so; 1406 struct sockaddr *sa; 1407 int len, error; 1408 1409 mtx_lock(&Giant); 1410 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1411 goto done2; 1412 error = copyin(uap->alen, &len, sizeof (len)); 1413 if (error) 1414 goto done1; 1415 if (len < 0) { 1416 error = EINVAL; 1417 goto done1; 1418 } 1419 sa = 0; 1420 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1421 if (error) 1422 goto bad; 1423 if (sa == 0) { 1424 len = 0; 1425 goto gotnothing; 1426 } 1427 1428 len = MIN(len, sa->sa_len); 1429 #ifdef COMPAT_OLDSOCK 1430 if (compat) 1431 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1432 #endif 1433 error = copyout(sa, uap->asa, (u_int)len); 1434 if (error == 0) 1435 gotnothing: 1436 error = copyout(&len, uap->alen, sizeof (len)); 1437 bad: 1438 if (sa) 1439 FREE(sa, M_SONAME); 1440 done1: 1441 fputsock(so); 1442 done2: 1443 mtx_unlock(&Giant); 1444 return (error); 1445 } 1446 1447 /* 1448 * MPSAFE 1449 */ 1450 int 1451 getsockname(td, uap) 1452 struct thread *td; 1453 struct getsockname_args *uap; 1454 { 1455 1456 return (getsockname1(td, uap, 0)); 1457 } 1458 1459 #ifdef COMPAT_OLDSOCK 1460 /* 1461 * MPSAFE 1462 */ 1463 int 1464 ogetsockname(td, uap) 1465 struct thread *td; 1466 struct getsockname_args *uap; 1467 { 1468 1469 return (getsockname1(td, uap, 1)); 1470 } 1471 #endif /* COMPAT_OLDSOCK */ 1472 1473 /* 1474 * getpeername1() - Get name of peer for connected socket. 1475 * 1476 * MPSAFE 1477 */ 1478 /* ARGSUSED */ 1479 static int 1480 getpeername1(td, uap, compat) 1481 struct thread *td; 1482 register struct getpeername_args /* { 1483 int fdes; 1484 caddr_t asa; 1485 int *alen; 1486 } */ *uap; 1487 int compat; 1488 { 1489 struct socket *so; 1490 struct sockaddr *sa; 1491 int len, error; 1492 1493 mtx_lock(&Giant); 1494 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1495 goto done2; 1496 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1497 error = ENOTCONN; 1498 goto done1; 1499 } 1500 error = copyin(uap->alen, &len, sizeof (len)); 1501 if (error) 1502 goto done1; 1503 if (len < 0) { 1504 error = EINVAL; 1505 goto done1; 1506 } 1507 sa = 0; 1508 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1509 if (error) 1510 goto bad; 1511 if (sa == 0) { 1512 len = 0; 1513 goto gotnothing; 1514 } 1515 len = MIN(len, sa->sa_len); 1516 #ifdef COMPAT_OLDSOCK 1517 if (compat) 1518 ((struct osockaddr *)sa)->sa_family = 1519 sa->sa_family; 1520 #endif 1521 error = copyout(sa, uap->asa, (u_int)len); 1522 if (error) 1523 goto bad; 1524 gotnothing: 1525 error = copyout(&len, uap->alen, sizeof (len)); 1526 bad: 1527 if (sa) 1528 FREE(sa, M_SONAME); 1529 done1: 1530 fputsock(so); 1531 done2: 1532 mtx_unlock(&Giant); 1533 return (error); 1534 } 1535 1536 /* 1537 * MPSAFE 1538 */ 1539 int 1540 getpeername(td, uap) 1541 struct thread *td; 1542 struct getpeername_args *uap; 1543 { 1544 1545 return (getpeername1(td, uap, 0)); 1546 } 1547 1548 #ifdef COMPAT_OLDSOCK 1549 /* 1550 * MPSAFE 1551 */ 1552 int 1553 ogetpeername(td, uap) 1554 struct thread *td; 1555 struct ogetpeername_args *uap; 1556 { 1557 1558 /* XXX uap should have type `getpeername_args *' to begin with. */ 1559 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1560 } 1561 #endif /* COMPAT_OLDSOCK */ 1562 1563 int 1564 sockargs(mp, buf, buflen, type) 1565 struct mbuf **mp; 1566 caddr_t buf; 1567 int buflen, type; 1568 { 1569 register struct sockaddr *sa; 1570 register struct mbuf *m; 1571 int error; 1572 1573 if ((u_int)buflen > MLEN) { 1574 #ifdef COMPAT_OLDSOCK 1575 if (type == MT_SONAME && (u_int)buflen <= 112) 1576 buflen = MLEN; /* unix domain compat. hack */ 1577 else 1578 #endif 1579 return (EINVAL); 1580 } 1581 m = m_get(M_TRYWAIT, type); 1582 if (m == NULL) 1583 return (ENOBUFS); 1584 m->m_len = buflen; 1585 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1586 if (error) 1587 (void) m_free(m); 1588 else { 1589 *mp = m; 1590 if (type == MT_SONAME) { 1591 sa = mtod(m, struct sockaddr *); 1592 1593 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1594 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1595 sa->sa_family = sa->sa_len; 1596 #endif 1597 sa->sa_len = buflen; 1598 } 1599 } 1600 return (error); 1601 } 1602 1603 int 1604 getsockaddr(namp, uaddr, len) 1605 struct sockaddr **namp; 1606 caddr_t uaddr; 1607 size_t len; 1608 { 1609 struct sockaddr *sa; 1610 int error; 1611 1612 if (len > SOCK_MAXADDRLEN) 1613 return ENAMETOOLONG; 1614 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1615 error = copyin(uaddr, sa, len); 1616 if (error) { 1617 FREE(sa, M_SONAME); 1618 } else { 1619 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1620 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1621 sa->sa_family = sa->sa_len; 1622 #endif 1623 sa->sa_len = len; 1624 *namp = sa; 1625 } 1626 return error; 1627 } 1628 1629 /* 1630 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 1631 */ 1632 static void 1633 sf_buf_init(void *arg) 1634 { 1635 struct sf_buf *sf_bufs; 1636 vm_offset_t sf_base; 1637 int i; 1638 1639 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); 1640 mtx_lock(&sf_freelist.sf_lock); 1641 SLIST_INIT(&sf_freelist.sf_head); 1642 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE); 1643 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, 1644 M_NOWAIT | M_ZERO); 1645 for (i = 0; i < nsfbufs; i++) { 1646 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 1647 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); 1648 } 1649 sf_buf_alloc_want = 0; 1650 mtx_unlock(&sf_freelist.sf_lock); 1651 } 1652 1653 /* 1654 * Get an sf_buf from the freelist. Will block if none are available. 1655 */ 1656 struct sf_buf * 1657 sf_buf_alloc(struct vm_page *m) 1658 { 1659 struct sf_buf *sf; 1660 int error; 1661 1662 mtx_lock(&sf_freelist.sf_lock); 1663 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { 1664 sf_buf_alloc_want++; 1665 error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, 1666 "sfbufa", 0); 1667 sf_buf_alloc_want--; 1668 1669 /* 1670 * If we got a signal, don't risk going back to sleep. 1671 */ 1672 if (error) 1673 break; 1674 } 1675 if (sf != NULL) { 1676 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); 1677 sf->m = m; 1678 pmap_qenter(sf->kva, &sf->m, 1); 1679 } 1680 mtx_unlock(&sf_freelist.sf_lock); 1681 return (sf); 1682 } 1683 1684 /* 1685 * Detatch mapped page and release resources back to the system. 1686 */ 1687 void 1688 sf_buf_free(void *addr, void *args) 1689 { 1690 struct sf_buf *sf; 1691 struct vm_page *m; 1692 1693 sf = args; 1694 pmap_qremove((vm_offset_t)addr, 1); 1695 m = sf->m; 1696 vm_page_lock_queues(); 1697 vm_page_unwire(m, 0); 1698 /* 1699 * Check for the object going away on us. This can 1700 * happen since we don't hold a reference to it. 1701 * If so, we're responsible for freeing the page. 1702 */ 1703 if (m->wire_count == 0 && m->object == NULL) 1704 vm_page_free(m); 1705 vm_page_unlock_queues(); 1706 sf->m = NULL; 1707 mtx_lock(&sf_freelist.sf_lock); 1708 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); 1709 if (sf_buf_alloc_want > 0) 1710 wakeup_one(&sf_freelist); 1711 mtx_unlock(&sf_freelist.sf_lock); 1712 } 1713 1714 /* 1715 * sendfile(2) 1716 * 1717 * MPSAFE 1718 * 1719 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1720 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1721 * 1722 * Send a file specified by 'fd' and starting at 'offset' to a socket 1723 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1724 * nbytes == 0. Optionally add a header and/or trailer to the socket 1725 * output. If specified, write the total number of bytes sent into *sbytes. 1726 * 1727 */ 1728 int 1729 sendfile(struct thread *td, struct sendfile_args *uap) 1730 { 1731 1732 return (do_sendfile(td, uap, 0)); 1733 } 1734 1735 #ifdef COMPAT_FREEBSD4 1736 int 1737 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1738 { 1739 struct sendfile_args args; 1740 1741 args.fd = uap->fd; 1742 args.s = uap->s; 1743 args.offset = uap->offset; 1744 args.nbytes = uap->nbytes; 1745 args.hdtr = uap->hdtr; 1746 args.sbytes = uap->sbytes; 1747 args.flags = uap->flags; 1748 1749 return (do_sendfile(td, &args, 1)); 1750 } 1751 #endif /* COMPAT_FREEBSD4 */ 1752 1753 static int 1754 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1755 { 1756 struct vnode *vp; 1757 struct vm_object *obj; 1758 struct socket *so = NULL; 1759 struct mbuf *m; 1760 struct sf_buf *sf; 1761 struct vm_page *pg; 1762 struct writev_args nuap; 1763 struct sf_hdtr hdtr; 1764 off_t off, xfsize, hdtr_size, sbytes = 0; 1765 int error, s; 1766 1767 mtx_lock(&Giant); 1768 1769 hdtr_size = 0; 1770 1771 /* 1772 * The descriptor must be a regular file and have a backing VM object. 1773 */ 1774 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1775 goto done; 1776 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1777 error = EINVAL; 1778 goto done; 1779 } 1780 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1781 goto done; 1782 if (so->so_type != SOCK_STREAM) { 1783 error = EINVAL; 1784 goto done; 1785 } 1786 if ((so->so_state & SS_ISCONNECTED) == 0) { 1787 error = ENOTCONN; 1788 goto done; 1789 } 1790 if (uap->offset < 0) { 1791 error = EINVAL; 1792 goto done; 1793 } 1794 1795 #ifdef MAC 1796 error = mac_check_socket_send(td->td_ucred, so); 1797 if (error) 1798 goto done; 1799 #endif 1800 1801 /* 1802 * If specified, get the pointer to the sf_hdtr struct for 1803 * any headers/trailers. 1804 */ 1805 if (uap->hdtr != NULL) { 1806 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1807 if (error) 1808 goto done; 1809 /* 1810 * Send any headers. Wimp out and use writev(2). 1811 */ 1812 if (hdtr.headers != NULL) { 1813 nuap.fd = uap->s; 1814 nuap.iovp = hdtr.headers; 1815 nuap.iovcnt = hdtr.hdr_cnt; 1816 error = writev(td, &nuap); 1817 if (error) 1818 goto done; 1819 if (compat) 1820 sbytes += td->td_retval[0]; 1821 else 1822 hdtr_size += td->td_retval[0]; 1823 } 1824 } 1825 1826 /* 1827 * Protect against multiple writers to the socket. 1828 */ 1829 (void) sblock(&so->so_snd, M_WAITOK); 1830 1831 /* 1832 * Loop through the pages in the file, starting with the requested 1833 * offset. Get a file page (do I/O if necessary), map the file page 1834 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1835 * it on the socket. 1836 */ 1837 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1838 vm_pindex_t pindex; 1839 vm_offset_t pgoff; 1840 1841 pindex = OFF_TO_IDX(off); 1842 retry_lookup: 1843 /* 1844 * Calculate the amount to transfer. Not to exceed a page, 1845 * the EOF, or the passed in nbytes. 1846 */ 1847 xfsize = obj->un_pager.vnp.vnp_size - off; 1848 if (xfsize > PAGE_SIZE) 1849 xfsize = PAGE_SIZE; 1850 pgoff = (vm_offset_t)(off & PAGE_MASK); 1851 if (PAGE_SIZE - pgoff < xfsize) 1852 xfsize = PAGE_SIZE - pgoff; 1853 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1854 xfsize = uap->nbytes - sbytes; 1855 if (xfsize <= 0) 1856 break; 1857 /* 1858 * Optimize the non-blocking case by looking at the socket space 1859 * before going to the extra work of constituting the sf_buf. 1860 */ 1861 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1862 if (so->so_state & SS_CANTSENDMORE) 1863 error = EPIPE; 1864 else 1865 error = EAGAIN; 1866 sbunlock(&so->so_snd); 1867 goto done; 1868 } 1869 /* 1870 * Attempt to look up the page. 1871 * 1872 * Allocate if not found 1873 * 1874 * Wait and loop if busy. 1875 */ 1876 pg = vm_page_lookup(obj, pindex); 1877 1878 if (pg == NULL) { 1879 pg = vm_page_alloc(obj, pindex, 1880 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1881 if (pg == NULL) { 1882 VM_WAIT; 1883 goto retry_lookup; 1884 } 1885 vm_page_lock_queues(); 1886 vm_page_wakeup(pg); 1887 } else { 1888 vm_page_lock_queues(); 1889 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1890 goto retry_lookup; 1891 /* 1892 * Wire the page so it does not get ripped out from 1893 * under us. 1894 */ 1895 vm_page_wire(pg); 1896 } 1897 1898 /* 1899 * If page is not valid for what we need, initiate I/O 1900 */ 1901 1902 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1903 int bsize, resid; 1904 1905 /* 1906 * Ensure that our page is still around when the I/O 1907 * completes. 1908 */ 1909 vm_page_io_start(pg); 1910 vm_page_unlock_queues(); 1911 1912 /* 1913 * Get the page from backing store. 1914 */ 1915 bsize = vp->v_mount->mnt_stat.f_iosize; 1916 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1917 /* 1918 * XXXMAC: Because we don't have fp->f_cred here, 1919 * we pass in NOCRED. This is probably wrong, but 1920 * is consistent with our original implementation. 1921 */ 1922 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1923 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1924 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1925 td->td_ucred, NOCRED, &resid, td); 1926 VOP_UNLOCK(vp, 0, td); 1927 vm_page_lock_queues(); 1928 vm_page_flag_clear(pg, PG_ZERO); 1929 vm_page_io_finish(pg); 1930 if (error) { 1931 vm_page_unwire(pg, 0); 1932 /* 1933 * See if anyone else might know about this page. 1934 * If not and it is not valid, then free it. 1935 */ 1936 if (pg->wire_count == 0 && pg->valid == 0 && 1937 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1938 pg->hold_count == 0) { 1939 vm_page_busy(pg); 1940 vm_page_free(pg); 1941 } 1942 vm_page_unlock_queues(); 1943 sbunlock(&so->so_snd); 1944 goto done; 1945 } 1946 } 1947 vm_page_unlock_queues(); 1948 1949 /* 1950 * Get a sendfile buf. We usually wait as long as necessary, 1951 * but this wait can be interrupted. 1952 */ 1953 if ((sf = sf_buf_alloc(pg)) == NULL) { 1954 vm_page_lock_queues(); 1955 vm_page_unwire(pg, 0); 1956 if (pg->wire_count == 0 && pg->object == NULL) 1957 vm_page_free(pg); 1958 vm_page_unlock_queues(); 1959 sbunlock(&so->so_snd); 1960 error = EINTR; 1961 goto done; 1962 } 1963 1964 /* 1965 * Get an mbuf header and set it up as having external storage. 1966 */ 1967 MGETHDR(m, M_TRYWAIT, MT_DATA); 1968 if (m == NULL) { 1969 error = ENOBUFS; 1970 sf_buf_free((void *)sf->kva, sf); 1971 sbunlock(&so->so_snd); 1972 goto done; 1973 } 1974 /* 1975 * Setup external storage for mbuf. 1976 */ 1977 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, sf, M_RDONLY, 1978 EXT_SFBUF); 1979 m->m_data = (char *) sf->kva + pgoff; 1980 m->m_pkthdr.len = m->m_len = xfsize; 1981 /* 1982 * Add the buffer to the socket buffer chain. 1983 */ 1984 s = splnet(); 1985 retry_space: 1986 /* 1987 * Make sure that the socket is still able to take more data. 1988 * CANTSENDMORE being true usually means that the connection 1989 * was closed. so_error is true when an error was sensed after 1990 * a previous send. 1991 * The state is checked after the page mapping and buffer 1992 * allocation above since those operations may block and make 1993 * any socket checks stale. From this point forward, nothing 1994 * blocks before the pru_send (or more accurately, any blocking 1995 * results in a loop back to here to re-check). 1996 */ 1997 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1998 if (so->so_state & SS_CANTSENDMORE) { 1999 error = EPIPE; 2000 } else { 2001 error = so->so_error; 2002 so->so_error = 0; 2003 } 2004 m_freem(m); 2005 sbunlock(&so->so_snd); 2006 splx(s); 2007 goto done; 2008 } 2009 /* 2010 * Wait for socket space to become available. We do this just 2011 * after checking the connection state above in order to avoid 2012 * a race condition with sbwait(). 2013 */ 2014 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 2015 if (so->so_state & SS_NBIO) { 2016 m_freem(m); 2017 sbunlock(&so->so_snd); 2018 splx(s); 2019 error = EAGAIN; 2020 goto done; 2021 } 2022 error = sbwait(&so->so_snd); 2023 /* 2024 * An error from sbwait usually indicates that we've 2025 * been interrupted by a signal. If we've sent anything 2026 * then return bytes sent, otherwise return the error. 2027 */ 2028 if (error) { 2029 m_freem(m); 2030 sbunlock(&so->so_snd); 2031 splx(s); 2032 goto done; 2033 } 2034 goto retry_space; 2035 } 2036 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2037 splx(s); 2038 if (error) { 2039 sbunlock(&so->so_snd); 2040 goto done; 2041 } 2042 } 2043 sbunlock(&so->so_snd); 2044 2045 /* 2046 * Send trailers. Wimp out and use writev(2). 2047 */ 2048 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 2049 nuap.fd = uap->s; 2050 nuap.iovp = hdtr.trailers; 2051 nuap.iovcnt = hdtr.trl_cnt; 2052 error = writev(td, &nuap); 2053 if (error) 2054 goto done; 2055 if (compat) 2056 sbytes += td->td_retval[0]; 2057 else 2058 hdtr_size += td->td_retval[0]; 2059 } 2060 2061 done: 2062 /* 2063 * If there was no error we have to clear td->td_retval[0] 2064 * because it may have been set by writev. 2065 */ 2066 if (error == 0) { 2067 td->td_retval[0] = 0; 2068 } 2069 if (uap->sbytes != NULL) { 2070 if (!compat) 2071 sbytes += hdtr_size; 2072 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2073 } 2074 if (vp) 2075 vrele(vp); 2076 if (so) 2077 fputsock(so); 2078 mtx_unlock(&Giant); 2079 return (error); 2080 } 2081