1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 * $FreeBSD$ 38 */ 39 40 #include "opt_compat.h" 41 #include "opt_ktrace.h" 42 #include "opt_mac.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/mac.h> 49 #include <sys/mutex.h> 50 #include <sys/sysproto.h> 51 #include <sys/malloc.h> 52 #include <sys/filedesc.h> 53 #include <sys/event.h> 54 #include <sys/proc.h> 55 #include <sys/fcntl.h> 56 #include <sys/file.h> 57 #include <sys/filio.h> 58 #include <sys/mount.h> 59 #include <sys/mbuf.h> 60 #include <sys/protosw.h> 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 #include <sys/signalvar.h> 64 #include <sys/syscallsubr.h> 65 #include <sys/uio.h> 66 #include <sys/vnode.h> 67 #ifdef KTRACE 68 #include <sys/ktrace.h> 69 #endif 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 #include <vm/vm_pageout.h> 75 #include <vm/vm_kern.h> 76 #include <vm/vm_extern.h> 77 78 static void sf_buf_init(void *arg); 79 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) 80 81 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 82 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 83 84 static int accept1(struct thread *td, struct accept_args *uap, int compat); 85 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 86 static int getsockname1(struct thread *td, struct getsockname_args *uap, 87 int compat); 88 static int getpeername1(struct thread *td, struct getpeername_args *uap, 89 int compat); 90 91 /* 92 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the 93 * sf_freelist head with the sf_lock mutex. 94 */ 95 static struct { 96 SLIST_HEAD(, sf_buf) sf_head; 97 struct mtx sf_lock; 98 } sf_freelist; 99 100 vm_offset_t sf_base; 101 struct sf_buf *sf_bufs; 102 u_int sf_buf_alloc_want; 103 104 /* 105 * System call interface to the socket abstraction. 106 */ 107 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 108 #define COMPAT_OLDSOCK 109 #endif 110 111 /* 112 * MPSAFE 113 */ 114 int 115 socket(td, uap) 116 struct thread *td; 117 register struct socket_args /* { 118 int domain; 119 int type; 120 int protocol; 121 } */ *uap; 122 { 123 struct filedesc *fdp; 124 struct socket *so; 125 struct file *fp; 126 int fd, error; 127 128 mtx_lock(&Giant); 129 fdp = td->td_proc->p_fd; 130 error = falloc(td, &fp, &fd); 131 if (error) 132 goto done2; 133 fhold(fp); 134 error = socreate(uap->domain, &so, uap->type, uap->protocol, 135 td->td_ucred, td); 136 FILEDESC_LOCK(fdp); 137 if (error) { 138 if (fdp->fd_ofiles[fd] == fp) { 139 fdp->fd_ofiles[fd] = NULL; 140 FILEDESC_UNLOCK(fdp); 141 fdrop(fp, td); 142 } else 143 FILEDESC_UNLOCK(fdp); 144 } else { 145 fp->f_data = so; /* already has ref count */ 146 fp->f_flag = FREAD|FWRITE; 147 fp->f_ops = &socketops; 148 fp->f_type = DTYPE_SOCKET; 149 FILEDESC_UNLOCK(fdp); 150 td->td_retval[0] = fd; 151 } 152 fdrop(fp, td); 153 done2: 154 mtx_unlock(&Giant); 155 return (error); 156 } 157 158 /* 159 * MPSAFE 160 */ 161 /* ARGSUSED */ 162 int 163 bind(td, uap) 164 struct thread *td; 165 register struct bind_args /* { 166 int s; 167 caddr_t name; 168 int namelen; 169 } */ *uap; 170 { 171 struct sockaddr *sa; 172 int error; 173 174 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 175 return (error); 176 177 return (kern_bind(td, uap->s, sa)); 178 } 179 180 int 181 kern_bind(td, fd, sa) 182 struct thread *td; 183 int fd; 184 struct sockaddr *sa; 185 { 186 struct socket *so; 187 int error; 188 189 mtx_lock(&Giant); 190 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 191 goto done2; 192 #ifdef MAC 193 error = mac_check_socket_bind(td->td_ucred, so, sa); 194 if (error) 195 goto done1; 196 #endif 197 error = sobind(so, sa, td); 198 #ifdef MAC 199 done1: 200 #endif 201 fputsock(so); 202 done2: 203 mtx_unlock(&Giant); 204 FREE(sa, M_SONAME); 205 return (error); 206 } 207 208 /* 209 * MPSAFE 210 */ 211 /* ARGSUSED */ 212 int 213 listen(td, uap) 214 struct thread *td; 215 register struct listen_args /* { 216 int s; 217 int backlog; 218 } */ *uap; 219 { 220 struct socket *so; 221 int error; 222 223 mtx_lock(&Giant); 224 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 225 #ifdef MAC 226 error = mac_check_socket_listen(td->td_ucred, so); 227 if (error) 228 goto done; 229 #endif 230 error = solisten(so, uap->backlog, td); 231 #ifdef MAC 232 done: 233 #endif 234 fputsock(so); 235 } 236 mtx_unlock(&Giant); 237 return(error); 238 } 239 240 /* 241 * accept1() 242 * MPSAFE 243 */ 244 static int 245 accept1(td, uap, compat) 246 struct thread *td; 247 register struct accept_args /* { 248 int s; 249 caddr_t name; 250 int *anamelen; 251 } */ *uap; 252 int compat; 253 { 254 struct filedesc *fdp; 255 struct file *nfp = NULL; 256 struct sockaddr *sa; 257 int namelen, error, s; 258 struct socket *head, *so; 259 int fd; 260 u_int fflag; 261 pid_t pgid; 262 int tmp; 263 264 mtx_lock(&Giant); 265 fdp = td->td_proc->p_fd; 266 if (uap->name) { 267 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 268 if(error) 269 goto done2; 270 if (namelen < 0) { 271 error = EINVAL; 272 goto done2; 273 } 274 } 275 error = fgetsock(td, uap->s, &head, &fflag); 276 if (error) 277 goto done2; 278 s = splnet(); 279 if ((head->so_options & SO_ACCEPTCONN) == 0) { 280 splx(s); 281 error = EINVAL; 282 goto done; 283 } 284 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 285 if (head->so_state & SS_CANTRCVMORE) { 286 head->so_error = ECONNABORTED; 287 break; 288 } 289 if ((head->so_state & SS_NBIO) != 0) { 290 head->so_error = EWOULDBLOCK; 291 break; 292 } 293 error = tsleep(&head->so_timeo, PSOCK | PCATCH, 294 "accept", 0); 295 if (error) { 296 splx(s); 297 goto done; 298 } 299 } 300 if (head->so_error) { 301 error = head->so_error; 302 head->so_error = 0; 303 splx(s); 304 goto done; 305 } 306 307 /* 308 * At this point we know that there is at least one connection 309 * ready to be accepted. Remove it from the queue prior to 310 * allocating the file descriptor for it since falloc() may 311 * block allowing another process to accept the connection 312 * instead. 313 */ 314 so = TAILQ_FIRST(&head->so_comp); 315 TAILQ_REMOVE(&head->so_comp, so, so_list); 316 head->so_qlen--; 317 318 error = falloc(td, &nfp, &fd); 319 if (error) { 320 /* 321 * Probably ran out of file descriptors. Put the 322 * unaccepted connection back onto the queue and 323 * do another wakeup so some other process might 324 * have a chance at it. 325 */ 326 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 327 head->so_qlen++; 328 wakeup_one(&head->so_timeo); 329 splx(s); 330 goto done; 331 } 332 fhold(nfp); 333 td->td_retval[0] = fd; 334 335 /* connection has been removed from the listen queue */ 336 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 337 338 so->so_state &= ~SS_COMP; 339 so->so_head = NULL; 340 pgid = fgetown(&head->so_sigio); 341 if (pgid != 0) 342 fsetown(pgid, &so->so_sigio); 343 344 FILE_LOCK(nfp); 345 soref(so); /* file descriptor reference */ 346 nfp->f_data = so; /* nfp has ref count from falloc */ 347 nfp->f_flag = fflag; 348 nfp->f_ops = &socketops; 349 nfp->f_type = DTYPE_SOCKET; 350 FILE_UNLOCK(nfp); 351 /* Sync socket nonblocking/async state with file flags */ 352 tmp = fflag & FNONBLOCK; 353 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 354 tmp = fflag & FASYNC; 355 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 356 sa = 0; 357 error = soaccept(so, &sa); 358 if (error) { 359 /* 360 * return a namelen of zero for older code which might 361 * ignore the return value from accept. 362 */ 363 if (uap->name != NULL) { 364 namelen = 0; 365 (void) copyout(&namelen, 366 uap->anamelen, sizeof(*uap->anamelen)); 367 } 368 goto noconnection; 369 } 370 if (sa == NULL) { 371 namelen = 0; 372 if (uap->name) 373 goto gotnoname; 374 splx(s); 375 error = 0; 376 goto done; 377 } 378 if (uap->name) { 379 /* check sa_len before it is destroyed */ 380 if (namelen > sa->sa_len) 381 namelen = sa->sa_len; 382 #ifdef COMPAT_OLDSOCK 383 if (compat) 384 ((struct osockaddr *)sa)->sa_family = 385 sa->sa_family; 386 #endif 387 error = copyout(sa, uap->name, (u_int)namelen); 388 if (!error) 389 gotnoname: 390 error = copyout(&namelen, 391 uap->anamelen, sizeof (*uap->anamelen)); 392 } 393 noconnection: 394 if (sa) 395 FREE(sa, M_SONAME); 396 397 /* 398 * close the new descriptor, assuming someone hasn't ripped it 399 * out from under us. 400 */ 401 if (error) { 402 FILEDESC_LOCK(fdp); 403 if (fdp->fd_ofiles[fd] == nfp) { 404 fdp->fd_ofiles[fd] = NULL; 405 FILEDESC_UNLOCK(fdp); 406 fdrop(nfp, td); 407 } else { 408 FILEDESC_UNLOCK(fdp); 409 } 410 } 411 splx(s); 412 413 /* 414 * Release explicitly held references before returning. 415 */ 416 done: 417 if (nfp != NULL) 418 fdrop(nfp, td); 419 fputsock(head); 420 done2: 421 mtx_unlock(&Giant); 422 return (error); 423 } 424 425 /* 426 * MPSAFE (accept1() is MPSAFE) 427 */ 428 int 429 accept(td, uap) 430 struct thread *td; 431 struct accept_args *uap; 432 { 433 434 return (accept1(td, uap, 0)); 435 } 436 437 #ifdef COMPAT_OLDSOCK 438 /* 439 * MPSAFE (accept1() is MPSAFE) 440 */ 441 int 442 oaccept(td, uap) 443 struct thread *td; 444 struct accept_args *uap; 445 { 446 447 return (accept1(td, uap, 1)); 448 } 449 #endif /* COMPAT_OLDSOCK */ 450 451 /* 452 * MPSAFE 453 */ 454 /* ARGSUSED */ 455 int 456 connect(td, uap) 457 struct thread *td; 458 register struct connect_args /* { 459 int s; 460 caddr_t name; 461 int namelen; 462 } */ *uap; 463 { 464 struct sockaddr *sa; 465 int error; 466 467 error = getsockaddr(&sa, uap->name, uap->namelen); 468 if (error) 469 return error; 470 471 return (kern_connect(td, uap->s, sa)); 472 } 473 474 475 int 476 kern_connect(td, fd, sa) 477 struct thread *td; 478 int fd; 479 struct sockaddr *sa; 480 { 481 struct socket *so; 482 int error, s; 483 484 mtx_lock(&Giant); 485 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 486 goto done2; 487 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 488 error = EALREADY; 489 goto done1; 490 } 491 #ifdef MAC 492 error = mac_check_socket_connect(td->td_ucred, so, sa); 493 if (error) 494 goto bad; 495 #endif 496 error = soconnect(so, sa, td); 497 if (error) 498 goto bad; 499 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 500 error = EINPROGRESS; 501 goto done1; 502 } 503 s = splnet(); 504 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 505 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0); 506 if (error) 507 break; 508 } 509 if (error == 0) { 510 error = so->so_error; 511 so->so_error = 0; 512 } 513 splx(s); 514 bad: 515 so->so_state &= ~SS_ISCONNECTING; 516 if (error == ERESTART) 517 error = EINTR; 518 done1: 519 fputsock(so); 520 done2: 521 mtx_unlock(&Giant); 522 FREE(sa, M_SONAME); 523 return (error); 524 } 525 526 /* 527 * MPSAFE 528 */ 529 int 530 socketpair(td, uap) 531 struct thread *td; 532 register struct socketpair_args /* { 533 int domain; 534 int type; 535 int protocol; 536 int *rsv; 537 } */ *uap; 538 { 539 register struct filedesc *fdp = td->td_proc->p_fd; 540 struct file *fp1, *fp2; 541 struct socket *so1, *so2; 542 int fd, error, sv[2]; 543 544 mtx_lock(&Giant); 545 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 546 td->td_ucred, td); 547 if (error) 548 goto done2; 549 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 550 td->td_ucred, td); 551 if (error) 552 goto free1; 553 error = falloc(td, &fp1, &fd); 554 if (error) 555 goto free2; 556 fhold(fp1); 557 sv[0] = fd; 558 fp1->f_data = so1; /* so1 already has ref count */ 559 error = falloc(td, &fp2, &fd); 560 if (error) 561 goto free3; 562 fhold(fp2); 563 fp2->f_data = so2; /* so2 already has ref count */ 564 sv[1] = fd; 565 error = soconnect2(so1, so2); 566 if (error) 567 goto free4; 568 if (uap->type == SOCK_DGRAM) { 569 /* 570 * Datagram socket connection is asymmetric. 571 */ 572 error = soconnect2(so2, so1); 573 if (error) 574 goto free4; 575 } 576 FILE_LOCK(fp1); 577 fp1->f_flag = FREAD|FWRITE; 578 fp1->f_ops = &socketops; 579 fp1->f_type = DTYPE_SOCKET; 580 FILE_UNLOCK(fp1); 581 FILE_LOCK(fp2); 582 fp2->f_flag = FREAD|FWRITE; 583 fp2->f_ops = &socketops; 584 fp2->f_type = DTYPE_SOCKET; 585 FILE_UNLOCK(fp2); 586 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 587 fdrop(fp1, td); 588 fdrop(fp2, td); 589 goto done2; 590 free4: 591 FILEDESC_LOCK(fdp); 592 if (fdp->fd_ofiles[sv[1]] == fp2) { 593 fdp->fd_ofiles[sv[1]] = NULL; 594 FILEDESC_UNLOCK(fdp); 595 fdrop(fp2, td); 596 } else 597 FILEDESC_UNLOCK(fdp); 598 fdrop(fp2, td); 599 free3: 600 FILEDESC_LOCK(fdp); 601 if (fdp->fd_ofiles[sv[0]] == fp1) { 602 fdp->fd_ofiles[sv[0]] = NULL; 603 FILEDESC_UNLOCK(fdp); 604 fdrop(fp1, td); 605 } else 606 FILEDESC_UNLOCK(fdp); 607 fdrop(fp1, td); 608 free2: 609 (void)soclose(so2); 610 free1: 611 (void)soclose(so1); 612 done2: 613 mtx_unlock(&Giant); 614 return (error); 615 } 616 617 static int 618 sendit(td, s, mp, flags) 619 register struct thread *td; 620 int s; 621 register struct msghdr *mp; 622 int flags; 623 { 624 struct uio auio; 625 register struct iovec *iov; 626 register int i; 627 struct mbuf *control; 628 struct sockaddr *to = NULL; 629 int len, error; 630 struct socket *so; 631 #ifdef KTRACE 632 struct iovec *ktriov = NULL; 633 struct uio ktruio; 634 int iovlen; 635 #endif 636 637 if ((error = fgetsock(td, s, &so, NULL)) != 0) 638 return (error); 639 640 #ifdef MAC 641 error = mac_check_socket_send(td->td_ucred, so); 642 if (error) 643 goto bad; 644 #endif 645 646 auio.uio_iov = mp->msg_iov; 647 auio.uio_iovcnt = mp->msg_iovlen; 648 auio.uio_segflg = UIO_USERSPACE; 649 auio.uio_rw = UIO_WRITE; 650 auio.uio_td = td; 651 auio.uio_offset = 0; /* XXX */ 652 auio.uio_resid = 0; 653 iov = mp->msg_iov; 654 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 655 if ((auio.uio_resid += iov->iov_len) < 0) { 656 error = EINVAL; 657 goto bad; 658 } 659 } 660 if (mp->msg_name) { 661 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 662 if (error) 663 goto bad; 664 } 665 if (mp->msg_control) { 666 if (mp->msg_controllen < sizeof(struct cmsghdr) 667 #ifdef COMPAT_OLDSOCK 668 && mp->msg_flags != MSG_COMPAT 669 #endif 670 ) { 671 error = EINVAL; 672 goto bad; 673 } 674 error = sockargs(&control, mp->msg_control, 675 mp->msg_controllen, MT_CONTROL); 676 if (error) 677 goto bad; 678 #ifdef COMPAT_OLDSOCK 679 if (mp->msg_flags == MSG_COMPAT) { 680 register struct cmsghdr *cm; 681 682 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 683 if (control == 0) { 684 error = ENOBUFS; 685 goto bad; 686 } else { 687 cm = mtod(control, struct cmsghdr *); 688 cm->cmsg_len = control->m_len; 689 cm->cmsg_level = SOL_SOCKET; 690 cm->cmsg_type = SCM_RIGHTS; 691 } 692 } 693 #endif 694 } else { 695 control = 0; 696 } 697 #ifdef KTRACE 698 if (KTRPOINT(td, KTR_GENIO)) { 699 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 700 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 701 bcopy(auio.uio_iov, ktriov, iovlen); 702 ktruio = auio; 703 } 704 #endif 705 len = auio.uio_resid; 706 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control, 707 flags, td); 708 if (error) { 709 if (auio.uio_resid != len && (error == ERESTART || 710 error == EINTR || error == EWOULDBLOCK)) 711 error = 0; 712 /* Generation of SIGPIPE can be controlled per socket */ 713 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 714 PROC_LOCK(td->td_proc); 715 psignal(td->td_proc, SIGPIPE); 716 PROC_UNLOCK(td->td_proc); 717 } 718 } 719 if (error == 0) 720 td->td_retval[0] = len - auio.uio_resid; 721 #ifdef KTRACE 722 if (ktriov != NULL) { 723 if (error == 0) { 724 ktruio.uio_iov = ktriov; 725 ktruio.uio_resid = td->td_retval[0]; 726 ktrgenio(s, UIO_WRITE, &ktruio, error); 727 } 728 FREE(ktriov, M_TEMP); 729 } 730 #endif 731 bad: 732 fputsock(so); 733 if (to) 734 FREE(to, M_SONAME); 735 return (error); 736 } 737 738 /* 739 * MPSAFE 740 */ 741 int 742 sendto(td, uap) 743 struct thread *td; 744 register struct sendto_args /* { 745 int s; 746 caddr_t buf; 747 size_t len; 748 int flags; 749 caddr_t to; 750 int tolen; 751 } */ *uap; 752 { 753 struct msghdr msg; 754 struct iovec aiov; 755 int error; 756 757 msg.msg_name = uap->to; 758 msg.msg_namelen = uap->tolen; 759 msg.msg_iov = &aiov; 760 msg.msg_iovlen = 1; 761 msg.msg_control = 0; 762 #ifdef COMPAT_OLDSOCK 763 msg.msg_flags = 0; 764 #endif 765 aiov.iov_base = uap->buf; 766 aiov.iov_len = uap->len; 767 mtx_lock(&Giant); 768 error = sendit(td, uap->s, &msg, uap->flags); 769 mtx_unlock(&Giant); 770 return (error); 771 } 772 773 #ifdef COMPAT_OLDSOCK 774 /* 775 * MPSAFE 776 */ 777 int 778 osend(td, uap) 779 struct thread *td; 780 register struct osend_args /* { 781 int s; 782 caddr_t buf; 783 int len; 784 int flags; 785 } */ *uap; 786 { 787 struct msghdr msg; 788 struct iovec aiov; 789 int error; 790 791 msg.msg_name = 0; 792 msg.msg_namelen = 0; 793 msg.msg_iov = &aiov; 794 msg.msg_iovlen = 1; 795 aiov.iov_base = uap->buf; 796 aiov.iov_len = uap->len; 797 msg.msg_control = 0; 798 msg.msg_flags = 0; 799 mtx_lock(&Giant); 800 error = sendit(td, uap->s, &msg, uap->flags); 801 mtx_unlock(&Giant); 802 return (error); 803 } 804 805 /* 806 * MPSAFE 807 */ 808 int 809 osendmsg(td, uap) 810 struct thread *td; 811 register struct osendmsg_args /* { 812 int s; 813 caddr_t msg; 814 int flags; 815 } */ *uap; 816 { 817 struct msghdr msg; 818 struct iovec aiov[UIO_SMALLIOV], *iov; 819 int error; 820 821 mtx_lock(&Giant); 822 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 823 if (error) 824 goto done2; 825 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 826 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 827 error = EMSGSIZE; 828 goto done2; 829 } 830 MALLOC(iov, struct iovec *, 831 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 832 M_WAITOK); 833 } else { 834 iov = aiov; 835 } 836 error = copyin(msg.msg_iov, iov, 837 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 838 if (error) 839 goto done; 840 msg.msg_flags = MSG_COMPAT; 841 msg.msg_iov = iov; 842 error = sendit(td, uap->s, &msg, uap->flags); 843 done: 844 if (iov != aiov) 845 FREE(iov, M_IOV); 846 done2: 847 mtx_unlock(&Giant); 848 return (error); 849 } 850 #endif 851 852 /* 853 * MPSAFE 854 */ 855 int 856 sendmsg(td, uap) 857 struct thread *td; 858 register struct sendmsg_args /* { 859 int s; 860 caddr_t msg; 861 int flags; 862 } */ *uap; 863 { 864 struct msghdr msg; 865 struct iovec aiov[UIO_SMALLIOV], *iov; 866 int error; 867 868 mtx_lock(&Giant); 869 error = copyin(uap->msg, &msg, sizeof (msg)); 870 if (error) 871 goto done2; 872 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 873 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 874 error = EMSGSIZE; 875 goto done2; 876 } 877 MALLOC(iov, struct iovec *, 878 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 879 M_WAITOK); 880 } else { 881 iov = aiov; 882 } 883 if (msg.msg_iovlen && 884 (error = copyin(msg.msg_iov, iov, 885 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 886 goto done; 887 msg.msg_iov = iov; 888 #ifdef COMPAT_OLDSOCK 889 msg.msg_flags = 0; 890 #endif 891 error = sendit(td, uap->s, &msg, uap->flags); 892 done: 893 if (iov != aiov) 894 FREE(iov, M_IOV); 895 done2: 896 mtx_unlock(&Giant); 897 return (error); 898 } 899 900 static int 901 recvit(td, s, mp, namelenp) 902 register struct thread *td; 903 int s; 904 register struct msghdr *mp; 905 void *namelenp; 906 { 907 struct uio auio; 908 register struct iovec *iov; 909 register int i; 910 int len, error; 911 struct mbuf *m, *control = 0; 912 caddr_t ctlbuf; 913 struct socket *so; 914 struct sockaddr *fromsa = 0; 915 #ifdef KTRACE 916 struct iovec *ktriov = NULL; 917 struct uio ktruio; 918 int iovlen; 919 #endif 920 921 if ((error = fgetsock(td, s, &so, NULL)) != 0) 922 return (error); 923 924 #ifdef MAC 925 error = mac_check_socket_receive(td->td_ucred, so); 926 if (error) { 927 fputsock(so); 928 return (error); 929 } 930 #endif 931 932 auio.uio_iov = mp->msg_iov; 933 auio.uio_iovcnt = mp->msg_iovlen; 934 auio.uio_segflg = UIO_USERSPACE; 935 auio.uio_rw = UIO_READ; 936 auio.uio_td = td; 937 auio.uio_offset = 0; /* XXX */ 938 auio.uio_resid = 0; 939 iov = mp->msg_iov; 940 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 941 if ((auio.uio_resid += iov->iov_len) < 0) { 942 fputsock(so); 943 return (EINVAL); 944 } 945 } 946 #ifdef KTRACE 947 if (KTRPOINT(td, KTR_GENIO)) { 948 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 949 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 950 bcopy(auio.uio_iov, ktriov, iovlen); 951 ktruio = auio; 952 } 953 #endif 954 len = auio.uio_resid; 955 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 956 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 957 &mp->msg_flags); 958 if (error) { 959 if (auio.uio_resid != len && (error == ERESTART || 960 error == EINTR || error == EWOULDBLOCK)) 961 error = 0; 962 } 963 #ifdef KTRACE 964 if (ktriov != NULL) { 965 if (error == 0) { 966 ktruio.uio_iov = ktriov; 967 ktruio.uio_resid = len - auio.uio_resid; 968 ktrgenio(s, UIO_READ, &ktruio, error); 969 } 970 FREE(ktriov, M_TEMP); 971 } 972 #endif 973 if (error) 974 goto out; 975 td->td_retval[0] = len - auio.uio_resid; 976 if (mp->msg_name) { 977 len = mp->msg_namelen; 978 if (len <= 0 || fromsa == 0) 979 len = 0; 980 else { 981 /* save sa_len before it is destroyed by MSG_COMPAT */ 982 len = MIN(len, fromsa->sa_len); 983 #ifdef COMPAT_OLDSOCK 984 if (mp->msg_flags & MSG_COMPAT) 985 ((struct osockaddr *)fromsa)->sa_family = 986 fromsa->sa_family; 987 #endif 988 error = copyout(fromsa, mp->msg_name, (unsigned)len); 989 if (error) 990 goto out; 991 } 992 mp->msg_namelen = len; 993 if (namelenp && 994 (error = copyout(&len, namelenp, sizeof (int)))) { 995 #ifdef COMPAT_OLDSOCK 996 if (mp->msg_flags & MSG_COMPAT) 997 error = 0; /* old recvfrom didn't check */ 998 else 999 #endif 1000 goto out; 1001 } 1002 } 1003 if (mp->msg_control) { 1004 #ifdef COMPAT_OLDSOCK 1005 /* 1006 * We assume that old recvmsg calls won't receive access 1007 * rights and other control info, esp. as control info 1008 * is always optional and those options didn't exist in 4.3. 1009 * If we receive rights, trim the cmsghdr; anything else 1010 * is tossed. 1011 */ 1012 if (control && mp->msg_flags & MSG_COMPAT) { 1013 if (mtod(control, struct cmsghdr *)->cmsg_level != 1014 SOL_SOCKET || 1015 mtod(control, struct cmsghdr *)->cmsg_type != 1016 SCM_RIGHTS) { 1017 mp->msg_controllen = 0; 1018 goto out; 1019 } 1020 control->m_len -= sizeof (struct cmsghdr); 1021 control->m_data += sizeof (struct cmsghdr); 1022 } 1023 #endif 1024 len = mp->msg_controllen; 1025 m = control; 1026 mp->msg_controllen = 0; 1027 ctlbuf = mp->msg_control; 1028 1029 while (m && len > 0) { 1030 unsigned int tocopy; 1031 1032 if (len >= m->m_len) 1033 tocopy = m->m_len; 1034 else { 1035 mp->msg_flags |= MSG_CTRUNC; 1036 tocopy = len; 1037 } 1038 1039 if ((error = copyout(mtod(m, caddr_t), 1040 ctlbuf, tocopy)) != 0) 1041 goto out; 1042 1043 ctlbuf += tocopy; 1044 len -= tocopy; 1045 m = m->m_next; 1046 } 1047 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1048 } 1049 out: 1050 fputsock(so); 1051 if (fromsa) 1052 FREE(fromsa, M_SONAME); 1053 if (control) 1054 m_freem(control); 1055 return (error); 1056 } 1057 1058 /* 1059 * MPSAFE 1060 */ 1061 int 1062 recvfrom(td, uap) 1063 struct thread *td; 1064 register struct recvfrom_args /* { 1065 int s; 1066 caddr_t buf; 1067 size_t len; 1068 int flags; 1069 caddr_t from; 1070 int *fromlenaddr; 1071 } */ *uap; 1072 { 1073 struct msghdr msg; 1074 struct iovec aiov; 1075 int error; 1076 1077 mtx_lock(&Giant); 1078 if (uap->fromlenaddr) { 1079 error = copyin(uap->fromlenaddr, 1080 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1081 if (error) 1082 goto done2; 1083 } else { 1084 msg.msg_namelen = 0; 1085 } 1086 msg.msg_name = uap->from; 1087 msg.msg_iov = &aiov; 1088 msg.msg_iovlen = 1; 1089 aiov.iov_base = uap->buf; 1090 aiov.iov_len = uap->len; 1091 msg.msg_control = 0; 1092 msg.msg_flags = uap->flags; 1093 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1094 done2: 1095 mtx_unlock(&Giant); 1096 return(error); 1097 } 1098 1099 #ifdef COMPAT_OLDSOCK 1100 /* 1101 * MPSAFE 1102 */ 1103 int 1104 orecvfrom(td, uap) 1105 struct thread *td; 1106 struct recvfrom_args *uap; 1107 { 1108 1109 uap->flags |= MSG_COMPAT; 1110 return (recvfrom(td, uap)); 1111 } 1112 #endif 1113 1114 1115 #ifdef COMPAT_OLDSOCK 1116 /* 1117 * MPSAFE 1118 */ 1119 int 1120 orecv(td, uap) 1121 struct thread *td; 1122 register struct orecv_args /* { 1123 int s; 1124 caddr_t buf; 1125 int len; 1126 int flags; 1127 } */ *uap; 1128 { 1129 struct msghdr msg; 1130 struct iovec aiov; 1131 int error; 1132 1133 mtx_lock(&Giant); 1134 msg.msg_name = 0; 1135 msg.msg_namelen = 0; 1136 msg.msg_iov = &aiov; 1137 msg.msg_iovlen = 1; 1138 aiov.iov_base = uap->buf; 1139 aiov.iov_len = uap->len; 1140 msg.msg_control = 0; 1141 msg.msg_flags = uap->flags; 1142 error = recvit(td, uap->s, &msg, NULL); 1143 mtx_unlock(&Giant); 1144 return (error); 1145 } 1146 1147 /* 1148 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1149 * overlays the new one, missing only the flags, and with the (old) access 1150 * rights where the control fields are now. 1151 * 1152 * MPSAFE 1153 */ 1154 int 1155 orecvmsg(td, uap) 1156 struct thread *td; 1157 register struct orecvmsg_args /* { 1158 int s; 1159 struct omsghdr *msg; 1160 int flags; 1161 } */ *uap; 1162 { 1163 struct msghdr msg; 1164 struct iovec aiov[UIO_SMALLIOV], *iov; 1165 int error; 1166 1167 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1168 if (error) 1169 return (error); 1170 1171 mtx_lock(&Giant); 1172 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1173 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1174 error = EMSGSIZE; 1175 goto done2; 1176 } 1177 MALLOC(iov, struct iovec *, 1178 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1179 M_WAITOK); 1180 } else { 1181 iov = aiov; 1182 } 1183 msg.msg_flags = uap->flags | MSG_COMPAT; 1184 error = copyin(msg.msg_iov, iov, 1185 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1186 if (error) 1187 goto done; 1188 msg.msg_iov = iov; 1189 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1190 1191 if (msg.msg_controllen && error == 0) 1192 error = copyout(&msg.msg_controllen, 1193 &uap->msg->msg_accrightslen, sizeof (int)); 1194 done: 1195 if (iov != aiov) 1196 FREE(iov, M_IOV); 1197 done2: 1198 mtx_unlock(&Giant); 1199 return (error); 1200 } 1201 #endif 1202 1203 /* 1204 * MPSAFE 1205 */ 1206 int 1207 recvmsg(td, uap) 1208 struct thread *td; 1209 register struct recvmsg_args /* { 1210 int s; 1211 struct msghdr *msg; 1212 int flags; 1213 } */ *uap; 1214 { 1215 struct msghdr msg; 1216 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1217 register int error; 1218 1219 mtx_lock(&Giant); 1220 error = copyin(uap->msg, &msg, sizeof (msg)); 1221 if (error) 1222 goto done2; 1223 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1224 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1225 error = EMSGSIZE; 1226 goto done2; 1227 } 1228 MALLOC(iov, struct iovec *, 1229 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1230 M_WAITOK); 1231 } else { 1232 iov = aiov; 1233 } 1234 #ifdef COMPAT_OLDSOCK 1235 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1236 #else 1237 msg.msg_flags = uap->flags; 1238 #endif 1239 uiov = msg.msg_iov; 1240 msg.msg_iov = iov; 1241 error = copyin(uiov, iov, 1242 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1243 if (error) 1244 goto done; 1245 error = recvit(td, uap->s, &msg, NULL); 1246 if (!error) { 1247 msg.msg_iov = uiov; 1248 error = copyout(&msg, uap->msg, sizeof(msg)); 1249 } 1250 done: 1251 if (iov != aiov) 1252 FREE(iov, M_IOV); 1253 done2: 1254 mtx_unlock(&Giant); 1255 return (error); 1256 } 1257 1258 /* 1259 * MPSAFE 1260 */ 1261 /* ARGSUSED */ 1262 int 1263 shutdown(td, uap) 1264 struct thread *td; 1265 register struct shutdown_args /* { 1266 int s; 1267 int how; 1268 } */ *uap; 1269 { 1270 struct socket *so; 1271 int error; 1272 1273 mtx_lock(&Giant); 1274 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1275 error = soshutdown(so, uap->how); 1276 fputsock(so); 1277 } 1278 mtx_unlock(&Giant); 1279 return(error); 1280 } 1281 1282 /* 1283 * MPSAFE 1284 */ 1285 /* ARGSUSED */ 1286 int 1287 setsockopt(td, uap) 1288 struct thread *td; 1289 register struct setsockopt_args /* { 1290 int s; 1291 int level; 1292 int name; 1293 caddr_t val; 1294 int valsize; 1295 } */ *uap; 1296 { 1297 struct socket *so; 1298 struct sockopt sopt; 1299 int error; 1300 1301 if (uap->val == 0 && uap->valsize != 0) 1302 return (EFAULT); 1303 if (uap->valsize < 0) 1304 return (EINVAL); 1305 1306 mtx_lock(&Giant); 1307 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1308 sopt.sopt_dir = SOPT_SET; 1309 sopt.sopt_level = uap->level; 1310 sopt.sopt_name = uap->name; 1311 sopt.sopt_val = uap->val; 1312 sopt.sopt_valsize = uap->valsize; 1313 sopt.sopt_td = td; 1314 error = sosetopt(so, &sopt); 1315 fputsock(so); 1316 } 1317 mtx_unlock(&Giant); 1318 return(error); 1319 } 1320 1321 /* 1322 * MPSAFE 1323 */ 1324 /* ARGSUSED */ 1325 int 1326 getsockopt(td, uap) 1327 struct thread *td; 1328 register struct getsockopt_args /* { 1329 int s; 1330 int level; 1331 int name; 1332 caddr_t val; 1333 int *avalsize; 1334 } */ *uap; 1335 { 1336 int valsize, error; 1337 struct socket *so; 1338 struct sockopt sopt; 1339 1340 mtx_lock(&Giant); 1341 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1342 goto done2; 1343 if (uap->val) { 1344 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1345 if (error) 1346 goto done1; 1347 if (valsize < 0) { 1348 error = EINVAL; 1349 goto done1; 1350 } 1351 } else { 1352 valsize = 0; 1353 } 1354 1355 sopt.sopt_dir = SOPT_GET; 1356 sopt.sopt_level = uap->level; 1357 sopt.sopt_name = uap->name; 1358 sopt.sopt_val = uap->val; 1359 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1360 sopt.sopt_td = td; 1361 1362 error = sogetopt(so, &sopt); 1363 if (error == 0) { 1364 valsize = sopt.sopt_valsize; 1365 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1366 } 1367 done1: 1368 fputsock(so); 1369 done2: 1370 mtx_unlock(&Giant); 1371 return (error); 1372 } 1373 1374 /* 1375 * getsockname1() - Get socket name. 1376 * 1377 * MPSAFE 1378 */ 1379 /* ARGSUSED */ 1380 static int 1381 getsockname1(td, uap, compat) 1382 struct thread *td; 1383 register struct getsockname_args /* { 1384 int fdes; 1385 caddr_t asa; 1386 int *alen; 1387 } */ *uap; 1388 int compat; 1389 { 1390 struct socket *so; 1391 struct sockaddr *sa; 1392 int len, error; 1393 1394 mtx_lock(&Giant); 1395 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1396 goto done2; 1397 error = copyin(uap->alen, &len, sizeof (len)); 1398 if (error) 1399 goto done1; 1400 if (len < 0) { 1401 error = EINVAL; 1402 goto done1; 1403 } 1404 sa = 0; 1405 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1406 if (error) 1407 goto bad; 1408 if (sa == 0) { 1409 len = 0; 1410 goto gotnothing; 1411 } 1412 1413 len = MIN(len, sa->sa_len); 1414 #ifdef COMPAT_OLDSOCK 1415 if (compat) 1416 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1417 #endif 1418 error = copyout(sa, uap->asa, (u_int)len); 1419 if (error == 0) 1420 gotnothing: 1421 error = copyout(&len, uap->alen, sizeof (len)); 1422 bad: 1423 if (sa) 1424 FREE(sa, M_SONAME); 1425 done1: 1426 fputsock(so); 1427 done2: 1428 mtx_unlock(&Giant); 1429 return (error); 1430 } 1431 1432 /* 1433 * MPSAFE 1434 */ 1435 int 1436 getsockname(td, uap) 1437 struct thread *td; 1438 struct getsockname_args *uap; 1439 { 1440 1441 return (getsockname1(td, uap, 0)); 1442 } 1443 1444 #ifdef COMPAT_OLDSOCK 1445 /* 1446 * MPSAFE 1447 */ 1448 int 1449 ogetsockname(td, uap) 1450 struct thread *td; 1451 struct getsockname_args *uap; 1452 { 1453 1454 return (getsockname1(td, uap, 1)); 1455 } 1456 #endif /* COMPAT_OLDSOCK */ 1457 1458 /* 1459 * getpeername1() - Get name of peer for connected socket. 1460 * 1461 * MPSAFE 1462 */ 1463 /* ARGSUSED */ 1464 static int 1465 getpeername1(td, uap, compat) 1466 struct thread *td; 1467 register struct getpeername_args /* { 1468 int fdes; 1469 caddr_t asa; 1470 int *alen; 1471 } */ *uap; 1472 int compat; 1473 { 1474 struct socket *so; 1475 struct sockaddr *sa; 1476 int len, error; 1477 1478 mtx_lock(&Giant); 1479 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1480 goto done2; 1481 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1482 error = ENOTCONN; 1483 goto done1; 1484 } 1485 error = copyin(uap->alen, &len, sizeof (len)); 1486 if (error) 1487 goto done1; 1488 if (len < 0) { 1489 error = EINVAL; 1490 goto done1; 1491 } 1492 sa = 0; 1493 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1494 if (error) 1495 goto bad; 1496 if (sa == 0) { 1497 len = 0; 1498 goto gotnothing; 1499 } 1500 len = MIN(len, sa->sa_len); 1501 #ifdef COMPAT_OLDSOCK 1502 if (compat) 1503 ((struct osockaddr *)sa)->sa_family = 1504 sa->sa_family; 1505 #endif 1506 error = copyout(sa, uap->asa, (u_int)len); 1507 if (error) 1508 goto bad; 1509 gotnothing: 1510 error = copyout(&len, uap->alen, sizeof (len)); 1511 bad: 1512 if (sa) 1513 FREE(sa, M_SONAME); 1514 done1: 1515 fputsock(so); 1516 done2: 1517 mtx_unlock(&Giant); 1518 return (error); 1519 } 1520 1521 /* 1522 * MPSAFE 1523 */ 1524 int 1525 getpeername(td, uap) 1526 struct thread *td; 1527 struct getpeername_args *uap; 1528 { 1529 1530 return (getpeername1(td, uap, 0)); 1531 } 1532 1533 #ifdef COMPAT_OLDSOCK 1534 /* 1535 * MPSAFE 1536 */ 1537 int 1538 ogetpeername(td, uap) 1539 struct thread *td; 1540 struct ogetpeername_args *uap; 1541 { 1542 1543 /* XXX uap should have type `getpeername_args *' to begin with. */ 1544 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1545 } 1546 #endif /* COMPAT_OLDSOCK */ 1547 1548 int 1549 sockargs(mp, buf, buflen, type) 1550 struct mbuf **mp; 1551 caddr_t buf; 1552 int buflen, type; 1553 { 1554 register struct sockaddr *sa; 1555 register struct mbuf *m; 1556 int error; 1557 1558 if ((u_int)buflen > MLEN) { 1559 #ifdef COMPAT_OLDSOCK 1560 if (type == MT_SONAME && (u_int)buflen <= 112) 1561 buflen = MLEN; /* unix domain compat. hack */ 1562 else 1563 #endif 1564 return (EINVAL); 1565 } 1566 m = m_get(M_TRYWAIT, type); 1567 if (m == NULL) 1568 return (ENOBUFS); 1569 m->m_len = buflen; 1570 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1571 if (error) 1572 (void) m_free(m); 1573 else { 1574 *mp = m; 1575 if (type == MT_SONAME) { 1576 sa = mtod(m, struct sockaddr *); 1577 1578 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1579 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1580 sa->sa_family = sa->sa_len; 1581 #endif 1582 sa->sa_len = buflen; 1583 } 1584 } 1585 return (error); 1586 } 1587 1588 int 1589 getsockaddr(namp, uaddr, len) 1590 struct sockaddr **namp; 1591 caddr_t uaddr; 1592 size_t len; 1593 { 1594 struct sockaddr *sa; 1595 int error; 1596 1597 if (len > SOCK_MAXADDRLEN) 1598 return ENAMETOOLONG; 1599 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1600 error = copyin(uaddr, sa, len); 1601 if (error) { 1602 FREE(sa, M_SONAME); 1603 } else { 1604 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1605 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1606 sa->sa_family = sa->sa_len; 1607 #endif 1608 sa->sa_len = len; 1609 *namp = sa; 1610 } 1611 return error; 1612 } 1613 1614 /* 1615 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 1616 */ 1617 static void 1618 sf_buf_init(void *arg) 1619 { 1620 int i; 1621 1622 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); 1623 mtx_lock(&sf_freelist.sf_lock); 1624 SLIST_INIT(&sf_freelist.sf_head); 1625 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE); 1626 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, 1627 M_NOWAIT | M_ZERO); 1628 for (i = 0; i < nsfbufs; i++) { 1629 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 1630 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); 1631 } 1632 sf_buf_alloc_want = 0; 1633 mtx_unlock(&sf_freelist.sf_lock); 1634 } 1635 1636 /* 1637 * Get an sf_buf from the freelist. Will block if none are available. 1638 */ 1639 struct sf_buf * 1640 sf_buf_alloc() 1641 { 1642 struct sf_buf *sf; 1643 int error; 1644 1645 mtx_lock(&sf_freelist.sf_lock); 1646 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { 1647 sf_buf_alloc_want++; 1648 error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, 1649 "sfbufa", 0); 1650 sf_buf_alloc_want--; 1651 1652 /* 1653 * If we got a signal, don't risk going back to sleep. 1654 */ 1655 if (error) 1656 break; 1657 } 1658 if (sf != NULL) 1659 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); 1660 mtx_unlock(&sf_freelist.sf_lock); 1661 return (sf); 1662 } 1663 1664 #define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT]) 1665 1666 /* 1667 * Detatch mapped page and release resources back to the system. 1668 */ 1669 void 1670 sf_buf_free(void *addr, void *args) 1671 { 1672 struct sf_buf *sf; 1673 struct vm_page *m; 1674 1675 GIANT_REQUIRED; 1676 1677 sf = dtosf(addr); 1678 pmap_qremove((vm_offset_t)addr, 1); 1679 m = sf->m; 1680 vm_page_lock_queues(); 1681 vm_page_unwire(m, 0); 1682 /* 1683 * Check for the object going away on us. This can 1684 * happen since we don't hold a reference to it. 1685 * If so, we're responsible for freeing the page. 1686 */ 1687 if (m->wire_count == 0 && m->object == NULL) 1688 vm_page_free(m); 1689 vm_page_unlock_queues(); 1690 sf->m = NULL; 1691 mtx_lock(&sf_freelist.sf_lock); 1692 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); 1693 if (sf_buf_alloc_want > 0) 1694 wakeup_one(&sf_freelist); 1695 mtx_unlock(&sf_freelist.sf_lock); 1696 } 1697 1698 /* 1699 * sendfile(2) 1700 * 1701 * MPSAFE 1702 * 1703 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1704 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1705 * 1706 * Send a file specified by 'fd' and starting at 'offset' to a socket 1707 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1708 * nbytes == 0. Optionally add a header and/or trailer to the socket 1709 * output. If specified, write the total number of bytes sent into *sbytes. 1710 * 1711 */ 1712 int 1713 sendfile(struct thread *td, struct sendfile_args *uap) 1714 { 1715 1716 return (do_sendfile(td, uap, 0)); 1717 } 1718 1719 #ifdef COMPAT_FREEBSD4 1720 int 1721 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1722 { 1723 struct sendfile_args args; 1724 1725 args.fd = uap->fd; 1726 args.s = uap->s; 1727 args.offset = uap->offset; 1728 args.nbytes = uap->nbytes; 1729 args.hdtr = uap->hdtr; 1730 args.sbytes = uap->sbytes; 1731 args.flags = uap->flags; 1732 1733 return (do_sendfile(td, &args, 1)); 1734 } 1735 #endif /* COMPAT_FREEBSD4 */ 1736 1737 static int 1738 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1739 { 1740 struct vnode *vp; 1741 struct vm_object *obj; 1742 struct socket *so = NULL; 1743 struct mbuf *m; 1744 struct sf_buf *sf; 1745 struct vm_page *pg; 1746 struct writev_args nuap; 1747 struct sf_hdtr hdtr; 1748 off_t off, xfsize, hdtr_size, sbytes = 0; 1749 int error, s; 1750 1751 mtx_lock(&Giant); 1752 1753 hdtr_size = 0; 1754 1755 /* 1756 * The descriptor must be a regular file and have a backing VM object. 1757 */ 1758 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1759 goto done; 1760 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1761 error = EINVAL; 1762 goto done; 1763 } 1764 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1765 goto done; 1766 if (so->so_type != SOCK_STREAM) { 1767 error = EINVAL; 1768 goto done; 1769 } 1770 if ((so->so_state & SS_ISCONNECTED) == 0) { 1771 error = ENOTCONN; 1772 goto done; 1773 } 1774 if (uap->offset < 0) { 1775 error = EINVAL; 1776 goto done; 1777 } 1778 1779 #ifdef MAC 1780 error = mac_check_socket_send(td->td_ucred, so); 1781 if (error) 1782 goto done; 1783 #endif 1784 1785 /* 1786 * If specified, get the pointer to the sf_hdtr struct for 1787 * any headers/trailers. 1788 */ 1789 if (uap->hdtr != NULL) { 1790 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1791 if (error) 1792 goto done; 1793 /* 1794 * Send any headers. Wimp out and use writev(2). 1795 */ 1796 if (hdtr.headers != NULL) { 1797 nuap.fd = uap->s; 1798 nuap.iovp = hdtr.headers; 1799 nuap.iovcnt = hdtr.hdr_cnt; 1800 error = writev(td, &nuap); 1801 if (error) 1802 goto done; 1803 if (compat) 1804 sbytes += td->td_retval[0]; 1805 else 1806 hdtr_size += td->td_retval[0]; 1807 } 1808 } 1809 1810 /* 1811 * Protect against multiple writers to the socket. 1812 */ 1813 (void) sblock(&so->so_snd, M_WAITOK); 1814 1815 /* 1816 * Loop through the pages in the file, starting with the requested 1817 * offset. Get a file page (do I/O if necessary), map the file page 1818 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1819 * it on the socket. 1820 */ 1821 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1822 vm_pindex_t pindex; 1823 vm_offset_t pgoff; 1824 1825 pindex = OFF_TO_IDX(off); 1826 retry_lookup: 1827 /* 1828 * Calculate the amount to transfer. Not to exceed a page, 1829 * the EOF, or the passed in nbytes. 1830 */ 1831 xfsize = obj->un_pager.vnp.vnp_size - off; 1832 if (xfsize > PAGE_SIZE) 1833 xfsize = PAGE_SIZE; 1834 pgoff = (vm_offset_t)(off & PAGE_MASK); 1835 if (PAGE_SIZE - pgoff < xfsize) 1836 xfsize = PAGE_SIZE - pgoff; 1837 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1838 xfsize = uap->nbytes - sbytes; 1839 if (xfsize <= 0) 1840 break; 1841 /* 1842 * Optimize the non-blocking case by looking at the socket space 1843 * before going to the extra work of constituting the sf_buf. 1844 */ 1845 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1846 if (so->so_state & SS_CANTSENDMORE) 1847 error = EPIPE; 1848 else 1849 error = EAGAIN; 1850 sbunlock(&so->so_snd); 1851 goto done; 1852 } 1853 /* 1854 * Attempt to look up the page. 1855 * 1856 * Allocate if not found 1857 * 1858 * Wait and loop if busy. 1859 */ 1860 pg = vm_page_lookup(obj, pindex); 1861 1862 if (pg == NULL) { 1863 pg = vm_page_alloc(obj, pindex, 1864 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1865 if (pg == NULL) { 1866 VM_WAIT; 1867 goto retry_lookup; 1868 } 1869 vm_page_lock_queues(); 1870 vm_page_wakeup(pg); 1871 } else { 1872 vm_page_lock_queues(); 1873 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1874 goto retry_lookup; 1875 /* 1876 * Wire the page so it does not get ripped out from 1877 * under us. 1878 */ 1879 vm_page_wire(pg); 1880 } 1881 1882 /* 1883 * If page is not valid for what we need, initiate I/O 1884 */ 1885 1886 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1887 int bsize, resid; 1888 1889 /* 1890 * Ensure that our page is still around when the I/O 1891 * completes. 1892 */ 1893 vm_page_io_start(pg); 1894 vm_page_unlock_queues(); 1895 1896 /* 1897 * Get the page from backing store. 1898 */ 1899 bsize = vp->v_mount->mnt_stat.f_iosize; 1900 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1901 /* 1902 * XXXMAC: Because we don't have fp->f_cred here, 1903 * we pass in NOCRED. This is probably wrong, but 1904 * is consistent with our original implementation. 1905 */ 1906 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1907 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1908 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1909 td->td_ucred, NOCRED, &resid, td); 1910 VOP_UNLOCK(vp, 0, td); 1911 vm_page_lock_queues(); 1912 vm_page_flag_clear(pg, PG_ZERO); 1913 vm_page_io_finish(pg); 1914 if (error) { 1915 vm_page_unwire(pg, 0); 1916 /* 1917 * See if anyone else might know about this page. 1918 * If not and it is not valid, then free it. 1919 */ 1920 if (pg->wire_count == 0 && pg->valid == 0 && 1921 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1922 pg->hold_count == 0) { 1923 vm_page_busy(pg); 1924 vm_page_free(pg); 1925 } 1926 vm_page_unlock_queues(); 1927 sbunlock(&so->so_snd); 1928 goto done; 1929 } 1930 } 1931 vm_page_unlock_queues(); 1932 1933 /* 1934 * Get a sendfile buf. We usually wait as long as necessary, 1935 * but this wait can be interrupted. 1936 */ 1937 if ((sf = sf_buf_alloc()) == NULL) { 1938 vm_page_lock_queues(); 1939 vm_page_unwire(pg, 0); 1940 if (pg->wire_count == 0 && pg->object == NULL) 1941 vm_page_free(pg); 1942 vm_page_unlock_queues(); 1943 sbunlock(&so->so_snd); 1944 error = EINTR; 1945 goto done; 1946 } 1947 1948 /* 1949 * Allocate a kernel virtual page and insert the physical page 1950 * into it. 1951 */ 1952 sf->m = pg; 1953 pmap_qenter(sf->kva, &pg, 1); 1954 /* 1955 * Get an mbuf header and set it up as having external storage. 1956 */ 1957 MGETHDR(m, M_TRYWAIT, MT_DATA); 1958 if (m == NULL) { 1959 error = ENOBUFS; 1960 sf_buf_free((void *)sf->kva, NULL); 1961 sbunlock(&so->so_snd); 1962 goto done; 1963 } 1964 /* 1965 * Setup external storage for mbuf. 1966 */ 1967 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY, 1968 EXT_SFBUF); 1969 m->m_data = (char *) sf->kva + pgoff; 1970 m->m_pkthdr.len = m->m_len = xfsize; 1971 /* 1972 * Add the buffer to the socket buffer chain. 1973 */ 1974 s = splnet(); 1975 retry_space: 1976 /* 1977 * Make sure that the socket is still able to take more data. 1978 * CANTSENDMORE being true usually means that the connection 1979 * was closed. so_error is true when an error was sensed after 1980 * a previous send. 1981 * The state is checked after the page mapping and buffer 1982 * allocation above since those operations may block and make 1983 * any socket checks stale. From this point forward, nothing 1984 * blocks before the pru_send (or more accurately, any blocking 1985 * results in a loop back to here to re-check). 1986 */ 1987 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1988 if (so->so_state & SS_CANTSENDMORE) { 1989 error = EPIPE; 1990 } else { 1991 error = so->so_error; 1992 so->so_error = 0; 1993 } 1994 m_freem(m); 1995 sbunlock(&so->so_snd); 1996 splx(s); 1997 goto done; 1998 } 1999 /* 2000 * Wait for socket space to become available. We do this just 2001 * after checking the connection state above in order to avoid 2002 * a race condition with sbwait(). 2003 */ 2004 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 2005 if (so->so_state & SS_NBIO) { 2006 m_freem(m); 2007 sbunlock(&so->so_snd); 2008 splx(s); 2009 error = EAGAIN; 2010 goto done; 2011 } 2012 error = sbwait(&so->so_snd); 2013 /* 2014 * An error from sbwait usually indicates that we've 2015 * been interrupted by a signal. If we've sent anything 2016 * then return bytes sent, otherwise return the error. 2017 */ 2018 if (error) { 2019 m_freem(m); 2020 sbunlock(&so->so_snd); 2021 splx(s); 2022 goto done; 2023 } 2024 goto retry_space; 2025 } 2026 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2027 splx(s); 2028 if (error) { 2029 sbunlock(&so->so_snd); 2030 goto done; 2031 } 2032 } 2033 sbunlock(&so->so_snd); 2034 2035 /* 2036 * Send trailers. Wimp out and use writev(2). 2037 */ 2038 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 2039 nuap.fd = uap->s; 2040 nuap.iovp = hdtr.trailers; 2041 nuap.iovcnt = hdtr.trl_cnt; 2042 error = writev(td, &nuap); 2043 if (error) 2044 goto done; 2045 if (compat) 2046 sbytes += td->td_retval[0]; 2047 else 2048 hdtr_size += td->td_retval[0]; 2049 } 2050 2051 done: 2052 /* 2053 * If there was no error we have to clear td->td_retval[0] 2054 * because it may have been set by writev. 2055 */ 2056 if (error == 0) { 2057 td->td_retval[0] = 0; 2058 } 2059 if (uap->sbytes != NULL) { 2060 if (!compat) 2061 sbytes += hdtr_size; 2062 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2063 } 2064 if (vp) 2065 vrele(vp); 2066 if (so) 2067 fputsock(so); 2068 mtx_unlock(&Giant); 2069 return (error); 2070 } 2071