1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 * $FreeBSD$ 38 */ 39 40 #include "opt_compat.h" 41 #include "opt_ktrace.h" 42 #include "opt_mac.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/mac.h> 49 #include <sys/mutex.h> 50 #include <sys/sysproto.h> 51 #include <sys/malloc.h> 52 #include <sys/filedesc.h> 53 #include <sys/event.h> 54 #include <sys/proc.h> 55 #include <sys/fcntl.h> 56 #include <sys/file.h> 57 #include <sys/lock.h> 58 #include <sys/mount.h> 59 #include <sys/mbuf.h> 60 #include <sys/protosw.h> 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 #include <sys/signalvar.h> 64 #include <sys/uio.h> 65 #include <sys/vnode.h> 66 #ifdef KTRACE 67 #include <sys/ktrace.h> 68 #endif 69 70 #include <vm/vm.h> 71 #include <vm/vm_object.h> 72 #include <vm/vm_page.h> 73 #include <vm/vm_pageout.h> 74 #include <vm/vm_kern.h> 75 #include <vm/vm_extern.h> 76 77 static void sf_buf_init(void *arg); 78 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) 79 80 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 81 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 82 83 static int accept1(struct thread *td, struct accept_args *uap, int compat); 84 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 85 static int getsockname1(struct thread *td, struct getsockname_args *uap, 86 int compat); 87 static int getpeername1(struct thread *td, struct getpeername_args *uap, 88 int compat); 89 90 /* 91 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the 92 * sf_freelist head with the sf_lock mutex. 93 */ 94 static struct { 95 SLIST_HEAD(, sf_buf) sf_head; 96 struct mtx sf_lock; 97 } sf_freelist; 98 99 vm_offset_t sf_base; 100 struct sf_buf *sf_bufs; 101 u_int sf_buf_alloc_want; 102 103 /* 104 * System call interface to the socket abstraction. 105 */ 106 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 107 #define COMPAT_OLDSOCK 108 #endif 109 110 /* 111 * MPSAFE 112 */ 113 int 114 socket(td, uap) 115 struct thread *td; 116 register struct socket_args /* { 117 int domain; 118 int type; 119 int protocol; 120 } */ *uap; 121 { 122 struct filedesc *fdp; 123 struct socket *so; 124 struct file *fp; 125 int fd, error; 126 127 mtx_lock(&Giant); 128 fdp = td->td_proc->p_fd; 129 error = falloc(td, &fp, &fd); 130 if (error) 131 goto done2; 132 fhold(fp); 133 error = socreate(uap->domain, &so, uap->type, uap->protocol, 134 td->td_ucred, td); 135 FILEDESC_LOCK(fdp); 136 if (error) { 137 if (fdp->fd_ofiles[fd] == fp) { 138 fdp->fd_ofiles[fd] = NULL; 139 FILEDESC_UNLOCK(fdp); 140 fdrop(fp, td); 141 } else 142 FILEDESC_UNLOCK(fdp); 143 } else { 144 fp->f_data = so; /* already has ref count */ 145 fp->f_flag = FREAD|FWRITE; 146 fp->f_ops = &socketops; 147 fp->f_type = DTYPE_SOCKET; 148 FILEDESC_UNLOCK(fdp); 149 td->td_retval[0] = fd; 150 } 151 fdrop(fp, td); 152 done2: 153 mtx_unlock(&Giant); 154 return (error); 155 } 156 157 /* 158 * MPSAFE 159 */ 160 /* ARGSUSED */ 161 int 162 bind(td, uap) 163 struct thread *td; 164 register struct bind_args /* { 165 int s; 166 caddr_t name; 167 int namelen; 168 } */ *uap; 169 { 170 struct socket *so; 171 struct sockaddr *sa; 172 int error; 173 174 mtx_lock(&Giant); 175 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 176 goto done2; 177 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 178 goto done1; 179 #ifdef MAC 180 error = mac_check_socket_bind(td->td_ucred, so, sa); 181 if (error) { 182 FREE(sa, M_SONAME); 183 goto done1; 184 } 185 #endif 186 error = sobind(so, sa, td); 187 FREE(sa, M_SONAME); 188 done1: 189 fputsock(so); 190 done2: 191 mtx_unlock(&Giant); 192 return (error); 193 } 194 195 /* 196 * MPSAFE 197 */ 198 /* ARGSUSED */ 199 int 200 listen(td, uap) 201 struct thread *td; 202 register struct listen_args /* { 203 int s; 204 int backlog; 205 } */ *uap; 206 { 207 struct socket *so; 208 int error; 209 210 mtx_lock(&Giant); 211 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 212 #ifdef MAC 213 error = mac_check_socket_listen(td->td_ucred, so); 214 if (error) 215 goto done; 216 #endif 217 error = solisten(so, uap->backlog, td); 218 #ifdef MAC 219 done: 220 #endif 221 fputsock(so); 222 } 223 mtx_unlock(&Giant); 224 return(error); 225 } 226 227 /* 228 * accept1() 229 * MPSAFE 230 */ 231 static int 232 accept1(td, uap, compat) 233 struct thread *td; 234 register struct accept_args /* { 235 int s; 236 caddr_t name; 237 int *anamelen; 238 } */ *uap; 239 int compat; 240 { 241 struct filedesc *fdp; 242 struct file *nfp = NULL; 243 struct sockaddr *sa; 244 int namelen, error, s; 245 struct socket *head, *so; 246 int fd; 247 u_int fflag; 248 pid_t pgid; 249 250 mtx_lock(&Giant); 251 fdp = td->td_proc->p_fd; 252 if (uap->name) { 253 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 254 if(error) 255 goto done2; 256 if (namelen < 0) { 257 error = EINVAL; 258 goto done2; 259 } 260 } 261 error = fgetsock(td, uap->s, &head, &fflag); 262 if (error) 263 goto done2; 264 s = splnet(); 265 if ((head->so_options & SO_ACCEPTCONN) == 0) { 266 splx(s); 267 error = EINVAL; 268 goto done; 269 } 270 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 271 if (head->so_state & SS_CANTRCVMORE) { 272 head->so_error = ECONNABORTED; 273 break; 274 } 275 if ((head->so_state & SS_NBIO) != 0) { 276 head->so_error = EWOULDBLOCK; 277 break; 278 } 279 error = tsleep(&head->so_timeo, PSOCK | PCATCH, 280 "accept", 0); 281 if (error) { 282 splx(s); 283 goto done; 284 } 285 } 286 if (head->so_error) { 287 error = head->so_error; 288 head->so_error = 0; 289 splx(s); 290 goto done; 291 } 292 293 /* 294 * At this point we know that there is at least one connection 295 * ready to be accepted. Remove it from the queue prior to 296 * allocating the file descriptor for it since falloc() may 297 * block allowing another process to accept the connection 298 * instead. 299 */ 300 so = TAILQ_FIRST(&head->so_comp); 301 TAILQ_REMOVE(&head->so_comp, so, so_list); 302 head->so_qlen--; 303 304 error = falloc(td, &nfp, &fd); 305 if (error) { 306 /* 307 * Probably ran out of file descriptors. Put the 308 * unaccepted connection back onto the queue and 309 * do another wakeup so some other process might 310 * have a chance at it. 311 */ 312 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 313 head->so_qlen++; 314 wakeup_one(&head->so_timeo); 315 splx(s); 316 goto done; 317 } 318 fhold(nfp); 319 td->td_retval[0] = fd; 320 321 /* connection has been removed from the listen queue */ 322 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 323 324 so->so_state &= ~SS_COMP; 325 so->so_head = NULL; 326 pgid = fgetown(&head->so_sigio); 327 if (pgid != 0) 328 fsetown(pgid, &so->so_sigio); 329 330 FILE_LOCK(nfp); 331 soref(so); /* file descriptor reference */ 332 nfp->f_data = so; /* nfp has ref count from falloc */ 333 nfp->f_flag = fflag; 334 nfp->f_ops = &socketops; 335 nfp->f_type = DTYPE_SOCKET; 336 FILE_UNLOCK(nfp); 337 sa = 0; 338 error = soaccept(so, &sa); 339 if (error) { 340 /* 341 * return a namelen of zero for older code which might 342 * ignore the return value from accept. 343 */ 344 if (uap->name != NULL) { 345 namelen = 0; 346 (void) copyout(&namelen, 347 uap->anamelen, sizeof(*uap->anamelen)); 348 } 349 goto noconnection; 350 } 351 if (sa == NULL) { 352 namelen = 0; 353 if (uap->name) 354 goto gotnoname; 355 splx(s); 356 error = 0; 357 goto done; 358 } 359 if (uap->name) { 360 /* check sa_len before it is destroyed */ 361 if (namelen > sa->sa_len) 362 namelen = sa->sa_len; 363 #ifdef COMPAT_OLDSOCK 364 if (compat) 365 ((struct osockaddr *)sa)->sa_family = 366 sa->sa_family; 367 #endif 368 error = copyout(sa, uap->name, (u_int)namelen); 369 if (!error) 370 gotnoname: 371 error = copyout(&namelen, 372 uap->anamelen, sizeof (*uap->anamelen)); 373 } 374 noconnection: 375 if (sa) 376 FREE(sa, M_SONAME); 377 378 /* 379 * close the new descriptor, assuming someone hasn't ripped it 380 * out from under us. 381 */ 382 if (error) { 383 FILEDESC_LOCK(fdp); 384 if (fdp->fd_ofiles[fd] == nfp) { 385 fdp->fd_ofiles[fd] = NULL; 386 FILEDESC_UNLOCK(fdp); 387 fdrop(nfp, td); 388 } else { 389 FILEDESC_UNLOCK(fdp); 390 } 391 } 392 splx(s); 393 394 /* 395 * Release explicitly held references before returning. 396 */ 397 done: 398 if (nfp != NULL) 399 fdrop(nfp, td); 400 fputsock(head); 401 done2: 402 mtx_unlock(&Giant); 403 return (error); 404 } 405 406 /* 407 * MPSAFE (accept1() is MPSAFE) 408 */ 409 int 410 accept(td, uap) 411 struct thread *td; 412 struct accept_args *uap; 413 { 414 415 return (accept1(td, uap, 0)); 416 } 417 418 #ifdef COMPAT_OLDSOCK 419 /* 420 * MPSAFE (accept1() is MPSAFE) 421 */ 422 int 423 oaccept(td, uap) 424 struct thread *td; 425 struct accept_args *uap; 426 { 427 428 return (accept1(td, uap, 1)); 429 } 430 #endif /* COMPAT_OLDSOCK */ 431 432 /* 433 * MPSAFE 434 */ 435 /* ARGSUSED */ 436 int 437 connect(td, uap) 438 struct thread *td; 439 register struct connect_args /* { 440 int s; 441 caddr_t name; 442 int namelen; 443 } */ *uap; 444 { 445 struct socket *so; 446 struct sockaddr *sa; 447 int error, s; 448 449 mtx_lock(&Giant); 450 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 451 goto done2; 452 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 453 error = EALREADY; 454 goto done1; 455 } 456 error = getsockaddr(&sa, uap->name, uap->namelen); 457 if (error) 458 goto done1; 459 #ifdef MAC 460 error = mac_check_socket_connect(td->td_ucred, so, sa); 461 if (error) 462 goto bad; 463 #endif 464 error = soconnect(so, sa, td); 465 if (error) 466 goto bad; 467 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 468 FREE(sa, M_SONAME); 469 error = EINPROGRESS; 470 goto done1; 471 } 472 s = splnet(); 473 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 474 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0); 475 if (error) 476 break; 477 } 478 if (error == 0) { 479 error = so->so_error; 480 so->so_error = 0; 481 } 482 splx(s); 483 bad: 484 so->so_state &= ~SS_ISCONNECTING; 485 FREE(sa, M_SONAME); 486 if (error == ERESTART) 487 error = EINTR; 488 done1: 489 fputsock(so); 490 done2: 491 mtx_unlock(&Giant); 492 return (error); 493 } 494 495 /* 496 * MPSAFE 497 */ 498 int 499 socketpair(td, uap) 500 struct thread *td; 501 register struct socketpair_args /* { 502 int domain; 503 int type; 504 int protocol; 505 int *rsv; 506 } */ *uap; 507 { 508 register struct filedesc *fdp = td->td_proc->p_fd; 509 struct file *fp1, *fp2; 510 struct socket *so1, *so2; 511 int fd, error, sv[2]; 512 513 mtx_lock(&Giant); 514 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 515 td->td_ucred, td); 516 if (error) 517 goto done2; 518 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 519 td->td_ucred, td); 520 if (error) 521 goto free1; 522 error = falloc(td, &fp1, &fd); 523 if (error) 524 goto free2; 525 fhold(fp1); 526 sv[0] = fd; 527 fp1->f_data = so1; /* so1 already has ref count */ 528 error = falloc(td, &fp2, &fd); 529 if (error) 530 goto free3; 531 fhold(fp2); 532 fp2->f_data = so2; /* so2 already has ref count */ 533 sv[1] = fd; 534 error = soconnect2(so1, so2); 535 if (error) 536 goto free4; 537 if (uap->type == SOCK_DGRAM) { 538 /* 539 * Datagram socket connection is asymmetric. 540 */ 541 error = soconnect2(so2, so1); 542 if (error) 543 goto free4; 544 } 545 FILE_LOCK(fp1); 546 fp1->f_flag = FREAD|FWRITE; 547 fp1->f_ops = &socketops; 548 fp1->f_type = DTYPE_SOCKET; 549 FILE_UNLOCK(fp1); 550 FILE_LOCK(fp2); 551 fp2->f_flag = FREAD|FWRITE; 552 fp2->f_ops = &socketops; 553 fp2->f_type = DTYPE_SOCKET; 554 FILE_UNLOCK(fp2); 555 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 556 fdrop(fp1, td); 557 fdrop(fp2, td); 558 goto done2; 559 free4: 560 FILEDESC_LOCK(fdp); 561 if (fdp->fd_ofiles[sv[1]] == fp2) { 562 fdp->fd_ofiles[sv[1]] = NULL; 563 FILEDESC_UNLOCK(fdp); 564 fdrop(fp2, td); 565 } else 566 FILEDESC_UNLOCK(fdp); 567 fdrop(fp2, td); 568 free3: 569 FILEDESC_LOCK(fdp); 570 if (fdp->fd_ofiles[sv[0]] == fp1) { 571 fdp->fd_ofiles[sv[0]] = NULL; 572 FILEDESC_UNLOCK(fdp); 573 fdrop(fp1, td); 574 } else 575 FILEDESC_UNLOCK(fdp); 576 fdrop(fp1, td); 577 free2: 578 (void)soclose(so2); 579 free1: 580 (void)soclose(so1); 581 done2: 582 mtx_unlock(&Giant); 583 return (error); 584 } 585 586 static int 587 sendit(td, s, mp, flags) 588 register struct thread *td; 589 int s; 590 register struct msghdr *mp; 591 int flags; 592 { 593 struct uio auio; 594 register struct iovec *iov; 595 register int i; 596 struct mbuf *control; 597 struct sockaddr *to = NULL; 598 int len, error; 599 struct socket *so; 600 #ifdef KTRACE 601 struct iovec *ktriov = NULL; 602 struct uio ktruio; 603 int iovlen; 604 #endif 605 606 if ((error = fgetsock(td, s, &so, NULL)) != 0) 607 return (error); 608 609 #ifdef MAC 610 error = mac_check_socket_send(td->td_ucred, so); 611 if (error) 612 goto bad; 613 #endif 614 615 auio.uio_iov = mp->msg_iov; 616 auio.uio_iovcnt = mp->msg_iovlen; 617 auio.uio_segflg = UIO_USERSPACE; 618 auio.uio_rw = UIO_WRITE; 619 auio.uio_td = td; 620 auio.uio_offset = 0; /* XXX */ 621 auio.uio_resid = 0; 622 iov = mp->msg_iov; 623 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 624 if ((auio.uio_resid += iov->iov_len) < 0) { 625 error = EINVAL; 626 goto bad; 627 } 628 } 629 if (mp->msg_name) { 630 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 631 if (error) 632 goto bad; 633 } 634 if (mp->msg_control) { 635 if (mp->msg_controllen < sizeof(struct cmsghdr) 636 #ifdef COMPAT_OLDSOCK 637 && mp->msg_flags != MSG_COMPAT 638 #endif 639 ) { 640 error = EINVAL; 641 goto bad; 642 } 643 error = sockargs(&control, mp->msg_control, 644 mp->msg_controllen, MT_CONTROL); 645 if (error) 646 goto bad; 647 #ifdef COMPAT_OLDSOCK 648 if (mp->msg_flags == MSG_COMPAT) { 649 register struct cmsghdr *cm; 650 651 M_PREPEND(control, sizeof(*cm), 0); 652 if (control == 0) { 653 error = ENOBUFS; 654 goto bad; 655 } else { 656 cm = mtod(control, struct cmsghdr *); 657 cm->cmsg_len = control->m_len; 658 cm->cmsg_level = SOL_SOCKET; 659 cm->cmsg_type = SCM_RIGHTS; 660 } 661 } 662 #endif 663 } else { 664 control = 0; 665 } 666 #ifdef KTRACE 667 if (KTRPOINT(td, KTR_GENIO)) { 668 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 669 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, 0); 670 bcopy(auio.uio_iov, ktriov, iovlen); 671 ktruio = auio; 672 } 673 #endif 674 len = auio.uio_resid; 675 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control, 676 flags, td); 677 if (error) { 678 if (auio.uio_resid != len && (error == ERESTART || 679 error == EINTR || error == EWOULDBLOCK)) 680 error = 0; 681 /* Generation of SIGPIPE can be controlled per socket */ 682 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 683 PROC_LOCK(td->td_proc); 684 psignal(td->td_proc, SIGPIPE); 685 PROC_UNLOCK(td->td_proc); 686 } 687 } 688 if (error == 0) 689 td->td_retval[0] = len - auio.uio_resid; 690 #ifdef KTRACE 691 if (ktriov != NULL) { 692 if (error == 0) { 693 ktruio.uio_iov = ktriov; 694 ktruio.uio_resid = td->td_retval[0]; 695 ktrgenio(s, UIO_WRITE, &ktruio, error); 696 } 697 FREE(ktriov, M_TEMP); 698 } 699 #endif 700 bad: 701 fputsock(so); 702 if (to) 703 FREE(to, M_SONAME); 704 return (error); 705 } 706 707 /* 708 * MPSAFE 709 */ 710 int 711 sendto(td, uap) 712 struct thread *td; 713 register struct sendto_args /* { 714 int s; 715 caddr_t buf; 716 size_t len; 717 int flags; 718 caddr_t to; 719 int tolen; 720 } */ *uap; 721 { 722 struct msghdr msg; 723 struct iovec aiov; 724 int error; 725 726 msg.msg_name = uap->to; 727 msg.msg_namelen = uap->tolen; 728 msg.msg_iov = &aiov; 729 msg.msg_iovlen = 1; 730 msg.msg_control = 0; 731 #ifdef COMPAT_OLDSOCK 732 msg.msg_flags = 0; 733 #endif 734 aiov.iov_base = uap->buf; 735 aiov.iov_len = uap->len; 736 mtx_lock(&Giant); 737 error = sendit(td, uap->s, &msg, uap->flags); 738 mtx_unlock(&Giant); 739 return (error); 740 } 741 742 #ifdef COMPAT_OLDSOCK 743 /* 744 * MPSAFE 745 */ 746 int 747 osend(td, uap) 748 struct thread *td; 749 register struct osend_args /* { 750 int s; 751 caddr_t buf; 752 int len; 753 int flags; 754 } */ *uap; 755 { 756 struct msghdr msg; 757 struct iovec aiov; 758 int error; 759 760 msg.msg_name = 0; 761 msg.msg_namelen = 0; 762 msg.msg_iov = &aiov; 763 msg.msg_iovlen = 1; 764 aiov.iov_base = uap->buf; 765 aiov.iov_len = uap->len; 766 msg.msg_control = 0; 767 msg.msg_flags = 0; 768 mtx_lock(&Giant); 769 error = sendit(td, uap->s, &msg, uap->flags); 770 mtx_unlock(&Giant); 771 return (error); 772 } 773 774 /* 775 * MPSAFE 776 */ 777 int 778 osendmsg(td, uap) 779 struct thread *td; 780 register struct osendmsg_args /* { 781 int s; 782 caddr_t msg; 783 int flags; 784 } */ *uap; 785 { 786 struct msghdr msg; 787 struct iovec aiov[UIO_SMALLIOV], *iov; 788 int error; 789 790 mtx_lock(&Giant); 791 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 792 if (error) 793 goto done2; 794 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 795 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 796 error = EMSGSIZE; 797 goto done2; 798 } 799 MALLOC(iov, struct iovec *, 800 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 801 0); 802 } else { 803 iov = aiov; 804 } 805 error = copyin(msg.msg_iov, iov, 806 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 807 if (error) 808 goto done; 809 msg.msg_flags = MSG_COMPAT; 810 msg.msg_iov = iov; 811 error = sendit(td, uap->s, &msg, uap->flags); 812 done: 813 if (iov != aiov) 814 FREE(iov, M_IOV); 815 done2: 816 mtx_unlock(&Giant); 817 return (error); 818 } 819 #endif 820 821 /* 822 * MPSAFE 823 */ 824 int 825 sendmsg(td, uap) 826 struct thread *td; 827 register struct sendmsg_args /* { 828 int s; 829 caddr_t msg; 830 int flags; 831 } */ *uap; 832 { 833 struct msghdr msg; 834 struct iovec aiov[UIO_SMALLIOV], *iov; 835 int error; 836 837 mtx_lock(&Giant); 838 error = copyin(uap->msg, &msg, sizeof (msg)); 839 if (error) 840 goto done2; 841 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 842 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 843 error = EMSGSIZE; 844 goto done2; 845 } 846 MALLOC(iov, struct iovec *, 847 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 848 0); 849 } else { 850 iov = aiov; 851 } 852 if (msg.msg_iovlen && 853 (error = copyin(msg.msg_iov, iov, 854 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 855 goto done; 856 msg.msg_iov = iov; 857 #ifdef COMPAT_OLDSOCK 858 msg.msg_flags = 0; 859 #endif 860 error = sendit(td, uap->s, &msg, uap->flags); 861 done: 862 if (iov != aiov) 863 FREE(iov, M_IOV); 864 done2: 865 mtx_unlock(&Giant); 866 return (error); 867 } 868 869 static int 870 recvit(td, s, mp, namelenp) 871 register struct thread *td; 872 int s; 873 register struct msghdr *mp; 874 void *namelenp; 875 { 876 struct uio auio; 877 register struct iovec *iov; 878 register int i; 879 int len, error; 880 struct mbuf *m, *control = 0; 881 caddr_t ctlbuf; 882 struct socket *so; 883 struct sockaddr *fromsa = 0; 884 #ifdef KTRACE 885 struct iovec *ktriov = NULL; 886 struct uio ktruio; 887 int iovlen; 888 #endif 889 890 if ((error = fgetsock(td, s, &so, NULL)) != 0) 891 return (error); 892 893 #ifdef MAC 894 error = mac_check_socket_receive(td->td_ucred, so); 895 if (error) { 896 fputsock(so); 897 return (error); 898 } 899 #endif 900 901 auio.uio_iov = mp->msg_iov; 902 auio.uio_iovcnt = mp->msg_iovlen; 903 auio.uio_segflg = UIO_USERSPACE; 904 auio.uio_rw = UIO_READ; 905 auio.uio_td = td; 906 auio.uio_offset = 0; /* XXX */ 907 auio.uio_resid = 0; 908 iov = mp->msg_iov; 909 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 910 if ((auio.uio_resid += iov->iov_len) < 0) { 911 fputsock(so); 912 return (EINVAL); 913 } 914 } 915 #ifdef KTRACE 916 if (KTRPOINT(td, KTR_GENIO)) { 917 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 918 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, 0); 919 bcopy(auio.uio_iov, ktriov, iovlen); 920 ktruio = auio; 921 } 922 #endif 923 len = auio.uio_resid; 924 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 925 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 926 &mp->msg_flags); 927 if (error) { 928 if (auio.uio_resid != len && (error == ERESTART || 929 error == EINTR || error == EWOULDBLOCK)) 930 error = 0; 931 } 932 #ifdef KTRACE 933 if (ktriov != NULL) { 934 if (error == 0) { 935 ktruio.uio_iov = ktriov; 936 ktruio.uio_resid = len - auio.uio_resid; 937 ktrgenio(s, UIO_READ, &ktruio, error); 938 } 939 FREE(ktriov, M_TEMP); 940 } 941 #endif 942 if (error) 943 goto out; 944 td->td_retval[0] = len - auio.uio_resid; 945 if (mp->msg_name) { 946 len = mp->msg_namelen; 947 if (len <= 0 || fromsa == 0) 948 len = 0; 949 else { 950 #ifndef MIN 951 #define MIN(a,b) ((a)>(b)?(b):(a)) 952 #endif 953 /* save sa_len before it is destroyed by MSG_COMPAT */ 954 len = MIN(len, fromsa->sa_len); 955 #ifdef COMPAT_OLDSOCK 956 if (mp->msg_flags & MSG_COMPAT) 957 ((struct osockaddr *)fromsa)->sa_family = 958 fromsa->sa_family; 959 #endif 960 error = copyout(fromsa, mp->msg_name, (unsigned)len); 961 if (error) 962 goto out; 963 } 964 mp->msg_namelen = len; 965 if (namelenp && 966 (error = copyout(&len, namelenp, sizeof (int)))) { 967 #ifdef COMPAT_OLDSOCK 968 if (mp->msg_flags & MSG_COMPAT) 969 error = 0; /* old recvfrom didn't check */ 970 else 971 #endif 972 goto out; 973 } 974 } 975 if (mp->msg_control) { 976 #ifdef COMPAT_OLDSOCK 977 /* 978 * We assume that old recvmsg calls won't receive access 979 * rights and other control info, esp. as control info 980 * is always optional and those options didn't exist in 4.3. 981 * If we receive rights, trim the cmsghdr; anything else 982 * is tossed. 983 */ 984 if (control && mp->msg_flags & MSG_COMPAT) { 985 if (mtod(control, struct cmsghdr *)->cmsg_level != 986 SOL_SOCKET || 987 mtod(control, struct cmsghdr *)->cmsg_type != 988 SCM_RIGHTS) { 989 mp->msg_controllen = 0; 990 goto out; 991 } 992 control->m_len -= sizeof (struct cmsghdr); 993 control->m_data += sizeof (struct cmsghdr); 994 } 995 #endif 996 len = mp->msg_controllen; 997 m = control; 998 mp->msg_controllen = 0; 999 ctlbuf = mp->msg_control; 1000 1001 while (m && len > 0) { 1002 unsigned int tocopy; 1003 1004 if (len >= m->m_len) 1005 tocopy = m->m_len; 1006 else { 1007 mp->msg_flags |= MSG_CTRUNC; 1008 tocopy = len; 1009 } 1010 1011 if ((error = copyout(mtod(m, caddr_t), 1012 ctlbuf, tocopy)) != 0) 1013 goto out; 1014 1015 ctlbuf += tocopy; 1016 len -= tocopy; 1017 m = m->m_next; 1018 } 1019 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1020 } 1021 out: 1022 fputsock(so); 1023 if (fromsa) 1024 FREE(fromsa, M_SONAME); 1025 if (control) 1026 m_freem(control); 1027 return (error); 1028 } 1029 1030 /* 1031 * MPSAFE 1032 */ 1033 int 1034 recvfrom(td, uap) 1035 struct thread *td; 1036 register struct recvfrom_args /* { 1037 int s; 1038 caddr_t buf; 1039 size_t len; 1040 int flags; 1041 caddr_t from; 1042 int *fromlenaddr; 1043 } */ *uap; 1044 { 1045 struct msghdr msg; 1046 struct iovec aiov; 1047 int error; 1048 1049 mtx_lock(&Giant); 1050 if (uap->fromlenaddr) { 1051 error = copyin(uap->fromlenaddr, 1052 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1053 if (error) 1054 goto done2; 1055 } else { 1056 msg.msg_namelen = 0; 1057 } 1058 msg.msg_name = uap->from; 1059 msg.msg_iov = &aiov; 1060 msg.msg_iovlen = 1; 1061 aiov.iov_base = uap->buf; 1062 aiov.iov_len = uap->len; 1063 msg.msg_control = 0; 1064 msg.msg_flags = uap->flags; 1065 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1066 done2: 1067 mtx_unlock(&Giant); 1068 return(error); 1069 } 1070 1071 #ifdef COMPAT_OLDSOCK 1072 /* 1073 * MPSAFE 1074 */ 1075 int 1076 orecvfrom(td, uap) 1077 struct thread *td; 1078 struct recvfrom_args *uap; 1079 { 1080 1081 uap->flags |= MSG_COMPAT; 1082 return (recvfrom(td, uap)); 1083 } 1084 #endif 1085 1086 1087 #ifdef COMPAT_OLDSOCK 1088 /* 1089 * MPSAFE 1090 */ 1091 int 1092 orecv(td, uap) 1093 struct thread *td; 1094 register struct orecv_args /* { 1095 int s; 1096 caddr_t buf; 1097 int len; 1098 int flags; 1099 } */ *uap; 1100 { 1101 struct msghdr msg; 1102 struct iovec aiov; 1103 int error; 1104 1105 mtx_lock(&Giant); 1106 msg.msg_name = 0; 1107 msg.msg_namelen = 0; 1108 msg.msg_iov = &aiov; 1109 msg.msg_iovlen = 1; 1110 aiov.iov_base = uap->buf; 1111 aiov.iov_len = uap->len; 1112 msg.msg_control = 0; 1113 msg.msg_flags = uap->flags; 1114 error = recvit(td, uap->s, &msg, NULL); 1115 mtx_unlock(&Giant); 1116 return (error); 1117 } 1118 1119 /* 1120 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1121 * overlays the new one, missing only the flags, and with the (old) access 1122 * rights where the control fields are now. 1123 * 1124 * MPSAFE 1125 */ 1126 int 1127 orecvmsg(td, uap) 1128 struct thread *td; 1129 register struct orecvmsg_args /* { 1130 int s; 1131 struct omsghdr *msg; 1132 int flags; 1133 } */ *uap; 1134 { 1135 struct msghdr msg; 1136 struct iovec aiov[UIO_SMALLIOV], *iov; 1137 int error; 1138 1139 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1140 if (error) 1141 return (error); 1142 1143 mtx_lock(&Giant); 1144 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1145 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1146 error = EMSGSIZE; 1147 goto done2; 1148 } 1149 MALLOC(iov, struct iovec *, 1150 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1151 0); 1152 } else { 1153 iov = aiov; 1154 } 1155 msg.msg_flags = uap->flags | MSG_COMPAT; 1156 error = copyin(msg.msg_iov, iov, 1157 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1158 if (error) 1159 goto done; 1160 msg.msg_iov = iov; 1161 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1162 1163 if (msg.msg_controllen && error == 0) 1164 error = copyout(&msg.msg_controllen, 1165 &uap->msg->msg_accrightslen, sizeof (int)); 1166 done: 1167 if (iov != aiov) 1168 FREE(iov, M_IOV); 1169 done2: 1170 mtx_unlock(&Giant); 1171 return (error); 1172 } 1173 #endif 1174 1175 /* 1176 * MPSAFE 1177 */ 1178 int 1179 recvmsg(td, uap) 1180 struct thread *td; 1181 register struct recvmsg_args /* { 1182 int s; 1183 struct msghdr *msg; 1184 int flags; 1185 } */ *uap; 1186 { 1187 struct msghdr msg; 1188 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1189 register int error; 1190 1191 mtx_lock(&Giant); 1192 error = copyin(uap->msg, &msg, sizeof (msg)); 1193 if (error) 1194 goto done2; 1195 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1196 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1197 error = EMSGSIZE; 1198 goto done2; 1199 } 1200 MALLOC(iov, struct iovec *, 1201 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1202 0); 1203 } else { 1204 iov = aiov; 1205 } 1206 #ifdef COMPAT_OLDSOCK 1207 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1208 #else 1209 msg.msg_flags = uap->flags; 1210 #endif 1211 uiov = msg.msg_iov; 1212 msg.msg_iov = iov; 1213 error = copyin(uiov, iov, 1214 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1215 if (error) 1216 goto done; 1217 error = recvit(td, uap->s, &msg, NULL); 1218 if (!error) { 1219 msg.msg_iov = uiov; 1220 error = copyout(&msg, uap->msg, sizeof(msg)); 1221 } 1222 done: 1223 if (iov != aiov) 1224 FREE(iov, M_IOV); 1225 done2: 1226 mtx_unlock(&Giant); 1227 return (error); 1228 } 1229 1230 /* 1231 * MPSAFE 1232 */ 1233 /* ARGSUSED */ 1234 int 1235 shutdown(td, uap) 1236 struct thread *td; 1237 register struct shutdown_args /* { 1238 int s; 1239 int how; 1240 } */ *uap; 1241 { 1242 struct socket *so; 1243 int error; 1244 1245 mtx_lock(&Giant); 1246 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1247 error = soshutdown(so, uap->how); 1248 fputsock(so); 1249 } 1250 mtx_unlock(&Giant); 1251 return(error); 1252 } 1253 1254 /* 1255 * MPSAFE 1256 */ 1257 /* ARGSUSED */ 1258 int 1259 setsockopt(td, uap) 1260 struct thread *td; 1261 register struct setsockopt_args /* { 1262 int s; 1263 int level; 1264 int name; 1265 caddr_t val; 1266 int valsize; 1267 } */ *uap; 1268 { 1269 struct socket *so; 1270 struct sockopt sopt; 1271 int error; 1272 1273 if (uap->val == 0 && uap->valsize != 0) 1274 return (EFAULT); 1275 if (uap->valsize < 0) 1276 return (EINVAL); 1277 1278 mtx_lock(&Giant); 1279 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1280 sopt.sopt_dir = SOPT_SET; 1281 sopt.sopt_level = uap->level; 1282 sopt.sopt_name = uap->name; 1283 sopt.sopt_val = uap->val; 1284 sopt.sopt_valsize = uap->valsize; 1285 sopt.sopt_td = td; 1286 error = sosetopt(so, &sopt); 1287 fputsock(so); 1288 } 1289 mtx_unlock(&Giant); 1290 return(error); 1291 } 1292 1293 /* 1294 * MPSAFE 1295 */ 1296 /* ARGSUSED */ 1297 int 1298 getsockopt(td, uap) 1299 struct thread *td; 1300 register struct getsockopt_args /* { 1301 int s; 1302 int level; 1303 int name; 1304 caddr_t val; 1305 int *avalsize; 1306 } */ *uap; 1307 { 1308 int valsize, error; 1309 struct socket *so; 1310 struct sockopt sopt; 1311 1312 mtx_lock(&Giant); 1313 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1314 goto done2; 1315 if (uap->val) { 1316 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1317 if (error) 1318 goto done1; 1319 if (valsize < 0) { 1320 error = EINVAL; 1321 goto done1; 1322 } 1323 } else { 1324 valsize = 0; 1325 } 1326 1327 sopt.sopt_dir = SOPT_GET; 1328 sopt.sopt_level = uap->level; 1329 sopt.sopt_name = uap->name; 1330 sopt.sopt_val = uap->val; 1331 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1332 sopt.sopt_td = td; 1333 1334 error = sogetopt(so, &sopt); 1335 if (error == 0) { 1336 valsize = sopt.sopt_valsize; 1337 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1338 } 1339 done1: 1340 fputsock(so); 1341 done2: 1342 mtx_unlock(&Giant); 1343 return (error); 1344 } 1345 1346 /* 1347 * getsockname1() - Get socket name. 1348 * 1349 * MPSAFE 1350 */ 1351 /* ARGSUSED */ 1352 static int 1353 getsockname1(td, uap, compat) 1354 struct thread *td; 1355 register struct getsockname_args /* { 1356 int fdes; 1357 caddr_t asa; 1358 int *alen; 1359 } */ *uap; 1360 int compat; 1361 { 1362 struct socket *so; 1363 struct sockaddr *sa; 1364 int len, error; 1365 1366 mtx_lock(&Giant); 1367 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1368 goto done2; 1369 error = copyin(uap->alen, &len, sizeof (len)); 1370 if (error) 1371 goto done1; 1372 if (len < 0) { 1373 error = EINVAL; 1374 goto done1; 1375 } 1376 sa = 0; 1377 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1378 if (error) 1379 goto bad; 1380 if (sa == 0) { 1381 len = 0; 1382 goto gotnothing; 1383 } 1384 1385 len = MIN(len, sa->sa_len); 1386 #ifdef COMPAT_OLDSOCK 1387 if (compat) 1388 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1389 #endif 1390 error = copyout(sa, uap->asa, (u_int)len); 1391 if (error == 0) 1392 gotnothing: 1393 error = copyout(&len, uap->alen, sizeof (len)); 1394 bad: 1395 if (sa) 1396 FREE(sa, M_SONAME); 1397 done1: 1398 fputsock(so); 1399 done2: 1400 mtx_unlock(&Giant); 1401 return (error); 1402 } 1403 1404 /* 1405 * MPSAFE 1406 */ 1407 int 1408 getsockname(td, uap) 1409 struct thread *td; 1410 struct getsockname_args *uap; 1411 { 1412 1413 return (getsockname1(td, uap, 0)); 1414 } 1415 1416 #ifdef COMPAT_OLDSOCK 1417 /* 1418 * MPSAFE 1419 */ 1420 int 1421 ogetsockname(td, uap) 1422 struct thread *td; 1423 struct getsockname_args *uap; 1424 { 1425 1426 return (getsockname1(td, uap, 1)); 1427 } 1428 #endif /* COMPAT_OLDSOCK */ 1429 1430 /* 1431 * getpeername1() - Get name of peer for connected socket. 1432 * 1433 * MPSAFE 1434 */ 1435 /* ARGSUSED */ 1436 static int 1437 getpeername1(td, uap, compat) 1438 struct thread *td; 1439 register struct getpeername_args /* { 1440 int fdes; 1441 caddr_t asa; 1442 int *alen; 1443 } */ *uap; 1444 int compat; 1445 { 1446 struct socket *so; 1447 struct sockaddr *sa; 1448 int len, error; 1449 1450 mtx_lock(&Giant); 1451 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1452 goto done2; 1453 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1454 error = ENOTCONN; 1455 goto done1; 1456 } 1457 error = copyin(uap->alen, &len, sizeof (len)); 1458 if (error) 1459 goto done1; 1460 if (len < 0) { 1461 error = EINVAL; 1462 goto done1; 1463 } 1464 sa = 0; 1465 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1466 if (error) 1467 goto bad; 1468 if (sa == 0) { 1469 len = 0; 1470 goto gotnothing; 1471 } 1472 len = MIN(len, sa->sa_len); 1473 #ifdef COMPAT_OLDSOCK 1474 if (compat) 1475 ((struct osockaddr *)sa)->sa_family = 1476 sa->sa_family; 1477 #endif 1478 error = copyout(sa, uap->asa, (u_int)len); 1479 if (error) 1480 goto bad; 1481 gotnothing: 1482 error = copyout(&len, uap->alen, sizeof (len)); 1483 bad: 1484 if (sa) 1485 FREE(sa, M_SONAME); 1486 done1: 1487 fputsock(so); 1488 done2: 1489 mtx_unlock(&Giant); 1490 return (error); 1491 } 1492 1493 /* 1494 * MPSAFE 1495 */ 1496 int 1497 getpeername(td, uap) 1498 struct thread *td; 1499 struct getpeername_args *uap; 1500 { 1501 1502 return (getpeername1(td, uap, 0)); 1503 } 1504 1505 #ifdef COMPAT_OLDSOCK 1506 /* 1507 * MPSAFE 1508 */ 1509 int 1510 ogetpeername(td, uap) 1511 struct thread *td; 1512 struct ogetpeername_args *uap; 1513 { 1514 1515 /* XXX uap should have type `getpeername_args *' to begin with. */ 1516 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1517 } 1518 #endif /* COMPAT_OLDSOCK */ 1519 1520 int 1521 sockargs(mp, buf, buflen, type) 1522 struct mbuf **mp; 1523 caddr_t buf; 1524 int buflen, type; 1525 { 1526 register struct sockaddr *sa; 1527 register struct mbuf *m; 1528 int error; 1529 1530 if ((u_int)buflen > MLEN) { 1531 #ifdef COMPAT_OLDSOCK 1532 if (type == MT_SONAME && (u_int)buflen <= 112) 1533 buflen = MLEN; /* unix domain compat. hack */ 1534 else 1535 #endif 1536 return (EINVAL); 1537 } 1538 m = m_get(0, type); 1539 if (m == NULL) 1540 return (ENOBUFS); 1541 m->m_len = buflen; 1542 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1543 if (error) 1544 (void) m_free(m); 1545 else { 1546 *mp = m; 1547 if (type == MT_SONAME) { 1548 sa = mtod(m, struct sockaddr *); 1549 1550 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1551 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1552 sa->sa_family = sa->sa_len; 1553 #endif 1554 sa->sa_len = buflen; 1555 } 1556 } 1557 return (error); 1558 } 1559 1560 int 1561 getsockaddr(namp, uaddr, len) 1562 struct sockaddr **namp; 1563 caddr_t uaddr; 1564 size_t len; 1565 { 1566 struct sockaddr *sa; 1567 int error; 1568 1569 if (len > SOCK_MAXADDRLEN) 1570 return ENAMETOOLONG; 1571 MALLOC(sa, struct sockaddr *, len, M_SONAME, 0); 1572 error = copyin(uaddr, sa, len); 1573 if (error) { 1574 FREE(sa, M_SONAME); 1575 } else { 1576 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1577 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1578 sa->sa_family = sa->sa_len; 1579 #endif 1580 sa->sa_len = len; 1581 *namp = sa; 1582 } 1583 return error; 1584 } 1585 1586 /* 1587 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 1588 */ 1589 static void 1590 sf_buf_init(void *arg) 1591 { 1592 int i; 1593 1594 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); 1595 mtx_lock(&sf_freelist.sf_lock); 1596 SLIST_INIT(&sf_freelist.sf_head); 1597 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE); 1598 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, 1599 M_NOWAIT | M_ZERO); 1600 for (i = 0; i < nsfbufs; i++) { 1601 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 1602 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); 1603 } 1604 sf_buf_alloc_want = 0; 1605 mtx_unlock(&sf_freelist.sf_lock); 1606 } 1607 1608 /* 1609 * Get an sf_buf from the freelist. Will block if none are available. 1610 */ 1611 struct sf_buf * 1612 sf_buf_alloc() 1613 { 1614 struct sf_buf *sf; 1615 int error; 1616 1617 mtx_lock(&sf_freelist.sf_lock); 1618 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { 1619 sf_buf_alloc_want++; 1620 error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, 1621 "sfbufa", 0); 1622 sf_buf_alloc_want--; 1623 1624 /* 1625 * If we got a signal, don't risk going back to sleep. 1626 */ 1627 if (error) 1628 break; 1629 } 1630 if (sf != NULL) 1631 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); 1632 mtx_unlock(&sf_freelist.sf_lock); 1633 return (sf); 1634 } 1635 1636 #define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT]) 1637 1638 /* 1639 * Detatch mapped page and release resources back to the system. 1640 */ 1641 void 1642 sf_buf_free(void *addr, void *args) 1643 { 1644 struct sf_buf *sf; 1645 struct vm_page *m; 1646 1647 GIANT_REQUIRED; 1648 1649 sf = dtosf(addr); 1650 pmap_qremove((vm_offset_t)addr, 1); 1651 m = sf->m; 1652 vm_page_lock_queues(); 1653 vm_page_unwire(m, 0); 1654 /* 1655 * Check for the object going away on us. This can 1656 * happen since we don't hold a reference to it. 1657 * If so, we're responsible for freeing the page. 1658 */ 1659 if (m->wire_count == 0 && m->object == NULL) 1660 vm_page_free(m); 1661 vm_page_unlock_queues(); 1662 sf->m = NULL; 1663 mtx_lock(&sf_freelist.sf_lock); 1664 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); 1665 if (sf_buf_alloc_want > 0) 1666 wakeup_one(&sf_freelist); 1667 mtx_unlock(&sf_freelist.sf_lock); 1668 } 1669 1670 /* 1671 * sendfile(2) 1672 * 1673 * MPSAFE 1674 * 1675 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1676 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1677 * 1678 * Send a file specified by 'fd' and starting at 'offset' to a socket 1679 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1680 * nbytes == 0. Optionally add a header and/or trailer to the socket 1681 * output. If specified, write the total number of bytes sent into *sbytes. 1682 * 1683 */ 1684 int 1685 sendfile(struct thread *td, struct sendfile_args *uap) 1686 { 1687 1688 return (do_sendfile(td, uap, 0)); 1689 } 1690 1691 #ifdef COMPAT_FREEBSD4 1692 int 1693 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1694 { 1695 struct sendfile_args args; 1696 1697 args.fd = uap->fd; 1698 args.s = uap->s; 1699 args.offset = uap->offset; 1700 args.nbytes = uap->nbytes; 1701 args.hdtr = uap->hdtr; 1702 args.sbytes = uap->sbytes; 1703 args.flags = uap->flags; 1704 1705 return (do_sendfile(td, &args, 1)); 1706 } 1707 #endif /* COMPAT_FREEBSD4 */ 1708 1709 static int 1710 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1711 { 1712 struct vnode *vp; 1713 struct vm_object *obj; 1714 struct socket *so = NULL; 1715 struct mbuf *m; 1716 struct sf_buf *sf; 1717 struct vm_page *pg; 1718 struct writev_args nuap; 1719 struct sf_hdtr hdtr; 1720 off_t off, xfsize, hdtr_size, sbytes = 0; 1721 int error, s; 1722 1723 mtx_lock(&Giant); 1724 1725 hdtr_size = 0; 1726 1727 /* 1728 * The descriptor must be a regular file and have a backing VM object. 1729 */ 1730 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1731 goto done; 1732 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1733 error = EINVAL; 1734 goto done; 1735 } 1736 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1737 goto done; 1738 if (so->so_type != SOCK_STREAM) { 1739 error = EINVAL; 1740 goto done; 1741 } 1742 if ((so->so_state & SS_ISCONNECTED) == 0) { 1743 error = ENOTCONN; 1744 goto done; 1745 } 1746 if (uap->offset < 0) { 1747 error = EINVAL; 1748 goto done; 1749 } 1750 1751 #ifdef MAC 1752 error = mac_check_socket_send(td->td_ucred, so); 1753 if (error) 1754 goto done; 1755 #endif 1756 1757 /* 1758 * If specified, get the pointer to the sf_hdtr struct for 1759 * any headers/trailers. 1760 */ 1761 if (uap->hdtr != NULL) { 1762 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1763 if (error) 1764 goto done; 1765 /* 1766 * Send any headers. Wimp out and use writev(2). 1767 */ 1768 if (hdtr.headers != NULL) { 1769 nuap.fd = uap->s; 1770 nuap.iovp = hdtr.headers; 1771 nuap.iovcnt = hdtr.hdr_cnt; 1772 error = writev(td, &nuap); 1773 if (error) 1774 goto done; 1775 if (compat) 1776 sbytes += td->td_retval[0]; 1777 else 1778 hdtr_size += td->td_retval[0]; 1779 } 1780 } 1781 1782 /* 1783 * Protect against multiple writers to the socket. 1784 */ 1785 (void) sblock(&so->so_snd, 0); 1786 1787 /* 1788 * Loop through the pages in the file, starting with the requested 1789 * offset. Get a file page (do I/O if necessary), map the file page 1790 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1791 * it on the socket. 1792 */ 1793 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1794 vm_pindex_t pindex; 1795 vm_offset_t pgoff; 1796 1797 pindex = OFF_TO_IDX(off); 1798 retry_lookup: 1799 /* 1800 * Calculate the amount to transfer. Not to exceed a page, 1801 * the EOF, or the passed in nbytes. 1802 */ 1803 xfsize = obj->un_pager.vnp.vnp_size - off; 1804 if (xfsize > PAGE_SIZE) 1805 xfsize = PAGE_SIZE; 1806 pgoff = (vm_offset_t)(off & PAGE_MASK); 1807 if (PAGE_SIZE - pgoff < xfsize) 1808 xfsize = PAGE_SIZE - pgoff; 1809 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1810 xfsize = uap->nbytes - sbytes; 1811 if (xfsize <= 0) 1812 break; 1813 /* 1814 * Optimize the non-blocking case by looking at the socket space 1815 * before going to the extra work of constituting the sf_buf. 1816 */ 1817 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1818 if (so->so_state & SS_CANTSENDMORE) 1819 error = EPIPE; 1820 else 1821 error = EAGAIN; 1822 sbunlock(&so->so_snd); 1823 goto done; 1824 } 1825 /* 1826 * Attempt to look up the page. 1827 * 1828 * Allocate if not found 1829 * 1830 * Wait and loop if busy. 1831 */ 1832 pg = vm_page_lookup(obj, pindex); 1833 1834 if (pg == NULL) { 1835 pg = vm_page_alloc(obj, pindex, 1836 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1837 if (pg == NULL) { 1838 VM_WAIT; 1839 goto retry_lookup; 1840 } 1841 vm_page_lock_queues(); 1842 vm_page_wakeup(pg); 1843 } else { 1844 vm_page_lock_queues(); 1845 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1846 goto retry_lookup; 1847 /* 1848 * Wire the page so it does not get ripped out from 1849 * under us. 1850 */ 1851 vm_page_wire(pg); 1852 } 1853 1854 /* 1855 * If page is not valid for what we need, initiate I/O 1856 */ 1857 1858 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1859 int bsize, resid; 1860 1861 /* 1862 * Ensure that our page is still around when the I/O 1863 * completes. 1864 */ 1865 vm_page_io_start(pg); 1866 vm_page_unlock_queues(); 1867 1868 /* 1869 * Get the page from backing store. 1870 */ 1871 bsize = vp->v_mount->mnt_stat.f_iosize; 1872 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1873 /* 1874 * XXXMAC: Because we don't have fp->f_cred here, 1875 * we pass in NOCRED. This is probably wrong, but 1876 * is consistent with our original implementation. 1877 */ 1878 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1879 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1880 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1881 td->td_ucred, NOCRED, &resid, td); 1882 VOP_UNLOCK(vp, 0, td); 1883 vm_page_lock_queues(); 1884 vm_page_flag_clear(pg, PG_ZERO); 1885 vm_page_io_finish(pg); 1886 if (error) { 1887 vm_page_unwire(pg, 0); 1888 /* 1889 * See if anyone else might know about this page. 1890 * If not and it is not valid, then free it. 1891 */ 1892 if (pg->wire_count == 0 && pg->valid == 0 && 1893 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1894 pg->hold_count == 0) { 1895 vm_page_busy(pg); 1896 vm_page_free(pg); 1897 } 1898 vm_page_unlock_queues(); 1899 sbunlock(&so->so_snd); 1900 goto done; 1901 } 1902 } 1903 vm_page_unlock_queues(); 1904 1905 /* 1906 * Get a sendfile buf. We usually wait as long as necessary, 1907 * but this wait can be interrupted. 1908 */ 1909 if ((sf = sf_buf_alloc()) == NULL) { 1910 vm_page_lock_queues(); 1911 vm_page_unwire(pg, 0); 1912 if (pg->wire_count == 0 && pg->object == NULL) 1913 vm_page_free(pg); 1914 vm_page_unlock_queues(); 1915 sbunlock(&so->so_snd); 1916 error = EINTR; 1917 goto done; 1918 } 1919 1920 /* 1921 * Allocate a kernel virtual page and insert the physical page 1922 * into it. 1923 */ 1924 sf->m = pg; 1925 pmap_qenter(sf->kva, &pg, 1); 1926 /* 1927 * Get an mbuf header and set it up as having external storage. 1928 */ 1929 MGETHDR(m, 0, MT_DATA); 1930 if (m == NULL) { 1931 error = ENOBUFS; 1932 sf_buf_free((void *)sf->kva, NULL); 1933 sbunlock(&so->so_snd); 1934 goto done; 1935 } 1936 /* 1937 * Setup external storage for mbuf. 1938 */ 1939 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY, 1940 EXT_SFBUF); 1941 m->m_data = (char *) sf->kva + pgoff; 1942 m->m_pkthdr.len = m->m_len = xfsize; 1943 /* 1944 * Add the buffer to the socket buffer chain. 1945 */ 1946 s = splnet(); 1947 retry_space: 1948 /* 1949 * Make sure that the socket is still able to take more data. 1950 * CANTSENDMORE being true usually means that the connection 1951 * was closed. so_error is true when an error was sensed after 1952 * a previous send. 1953 * The state is checked after the page mapping and buffer 1954 * allocation above since those operations may block and make 1955 * any socket checks stale. From this point forward, nothing 1956 * blocks before the pru_send (or more accurately, any blocking 1957 * results in a loop back to here to re-check). 1958 */ 1959 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1960 if (so->so_state & SS_CANTSENDMORE) { 1961 error = EPIPE; 1962 } else { 1963 error = so->so_error; 1964 so->so_error = 0; 1965 } 1966 m_freem(m); 1967 sbunlock(&so->so_snd); 1968 splx(s); 1969 goto done; 1970 } 1971 /* 1972 * Wait for socket space to become available. We do this just 1973 * after checking the connection state above in order to avoid 1974 * a race condition with sbwait(). 1975 */ 1976 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1977 if (so->so_state & SS_NBIO) { 1978 m_freem(m); 1979 sbunlock(&so->so_snd); 1980 splx(s); 1981 error = EAGAIN; 1982 goto done; 1983 } 1984 error = sbwait(&so->so_snd); 1985 /* 1986 * An error from sbwait usually indicates that we've 1987 * been interrupted by a signal. If we've sent anything 1988 * then return bytes sent, otherwise return the error. 1989 */ 1990 if (error) { 1991 m_freem(m); 1992 sbunlock(&so->so_snd); 1993 splx(s); 1994 goto done; 1995 } 1996 goto retry_space; 1997 } 1998 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 1999 splx(s); 2000 if (error) { 2001 sbunlock(&so->so_snd); 2002 goto done; 2003 } 2004 } 2005 sbunlock(&so->so_snd); 2006 2007 /* 2008 * Send trailers. Wimp out and use writev(2). 2009 */ 2010 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 2011 nuap.fd = uap->s; 2012 nuap.iovp = hdtr.trailers; 2013 nuap.iovcnt = hdtr.trl_cnt; 2014 error = writev(td, &nuap); 2015 if (error) 2016 goto done; 2017 if (compat) 2018 sbytes += td->td_retval[0]; 2019 else 2020 hdtr_size += td->td_retval[0]; 2021 } 2022 2023 done: 2024 /* 2025 * If there was no error we have to clear td->td_retval[0] 2026 * because it may have been set by writev. 2027 */ 2028 if (error == 0) { 2029 td->td_retval[0] = 0; 2030 } 2031 if (uap->sbytes != NULL) { 2032 if (!compat) 2033 sbytes += hdtr_size; 2034 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2035 } 2036 if (vp) 2037 vrele(vp); 2038 if (so) 2039 fputsock(so); 2040 mtx_unlock(&Giant); 2041 return (error); 2042 } 2043