1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_compat.h" 43 #include "opt_ktrace.h" 44 #include "opt_mac.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/lock.h> 50 #include <sys/mac.h> 51 #include <sys/mutex.h> 52 #include <sys/sysproto.h> 53 #include <sys/malloc.h> 54 #include <sys/filedesc.h> 55 #include <sys/event.h> 56 #include <sys/proc.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/filio.h> 60 #include <sys/mount.h> 61 #include <sys/mbuf.h> 62 #include <sys/protosw.h> 63 #include <sys/socket.h> 64 #include <sys/socketvar.h> 65 #include <sys/signalvar.h> 66 #include <sys/syscallsubr.h> 67 #include <sys/uio.h> 68 #include <sys/vnode.h> 69 #ifdef KTRACE 70 #include <sys/ktrace.h> 71 #endif 72 73 #include <vm/vm.h> 74 #include <vm/vm_object.h> 75 #include <vm/vm_page.h> 76 #include <vm/vm_pageout.h> 77 #include <vm/vm_kern.h> 78 #include <vm/vm_extern.h> 79 80 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 81 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 82 83 static int accept1(struct thread *td, struct accept_args *uap, int compat); 84 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 85 static int getsockname1(struct thread *td, struct getsockname_args *uap, 86 int compat); 87 static int getpeername1(struct thread *td, struct getpeername_args *uap, 88 int compat); 89 90 /* 91 * System call interface to the socket abstraction. 92 */ 93 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 94 #define COMPAT_OLDSOCK 95 #endif 96 97 /* 98 * MPSAFE 99 */ 100 int 101 socket(td, uap) 102 struct thread *td; 103 register struct socket_args /* { 104 int domain; 105 int type; 106 int protocol; 107 } */ *uap; 108 { 109 struct filedesc *fdp; 110 struct socket *so; 111 struct file *fp; 112 int fd, error; 113 114 mtx_lock(&Giant); 115 fdp = td->td_proc->p_fd; 116 error = falloc(td, &fp, &fd); 117 if (error) 118 goto done2; 119 fhold(fp); 120 error = socreate(uap->domain, &so, uap->type, uap->protocol, 121 td->td_ucred, td); 122 FILEDESC_LOCK(fdp); 123 if (error) { 124 if (fdp->fd_ofiles[fd] == fp) { 125 fdp->fd_ofiles[fd] = NULL; 126 FILEDESC_UNLOCK(fdp); 127 fdrop(fp, td); 128 } else 129 FILEDESC_UNLOCK(fdp); 130 } else { 131 fp->f_data = so; /* already has ref count */ 132 fp->f_flag = FREAD|FWRITE; 133 fp->f_ops = &socketops; 134 fp->f_type = DTYPE_SOCKET; 135 FILEDESC_UNLOCK(fdp); 136 td->td_retval[0] = fd; 137 } 138 fdrop(fp, td); 139 done2: 140 mtx_unlock(&Giant); 141 return (error); 142 } 143 144 /* 145 * MPSAFE 146 */ 147 /* ARGSUSED */ 148 int 149 bind(td, uap) 150 struct thread *td; 151 register struct bind_args /* { 152 int s; 153 caddr_t name; 154 int namelen; 155 } */ *uap; 156 { 157 struct sockaddr *sa; 158 int error; 159 160 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 161 return (error); 162 163 return (kern_bind(td, uap->s, sa)); 164 } 165 166 int 167 kern_bind(td, fd, sa) 168 struct thread *td; 169 int fd; 170 struct sockaddr *sa; 171 { 172 struct socket *so; 173 int error; 174 175 mtx_lock(&Giant); 176 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 177 goto done2; 178 #ifdef MAC 179 error = mac_check_socket_bind(td->td_ucred, so, sa); 180 if (error) 181 goto done1; 182 #endif 183 error = sobind(so, sa, td); 184 #ifdef MAC 185 done1: 186 #endif 187 fputsock(so); 188 done2: 189 mtx_unlock(&Giant); 190 FREE(sa, M_SONAME); 191 return (error); 192 } 193 194 /* 195 * MPSAFE 196 */ 197 /* ARGSUSED */ 198 int 199 listen(td, uap) 200 struct thread *td; 201 register struct listen_args /* { 202 int s; 203 int backlog; 204 } */ *uap; 205 { 206 struct socket *so; 207 int error; 208 209 mtx_lock(&Giant); 210 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 211 #ifdef MAC 212 error = mac_check_socket_listen(td->td_ucred, so); 213 if (error) 214 goto done; 215 #endif 216 error = solisten(so, uap->backlog, td); 217 #ifdef MAC 218 done: 219 #endif 220 fputsock(so); 221 } 222 mtx_unlock(&Giant); 223 return(error); 224 } 225 226 /* 227 * accept1() 228 * MPSAFE 229 */ 230 static int 231 accept1(td, uap, compat) 232 struct thread *td; 233 register struct accept_args /* { 234 int s; 235 caddr_t name; 236 int *anamelen; 237 } */ *uap; 238 int compat; 239 { 240 struct filedesc *fdp; 241 struct file *nfp = NULL; 242 struct sockaddr *sa; 243 int namelen, error, s; 244 struct socket *head, *so; 245 int fd; 246 u_int fflag; 247 pid_t pgid; 248 int tmp; 249 250 fdp = td->td_proc->p_fd; 251 if (uap->name) { 252 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 253 if(error) 254 goto done3; 255 if (namelen < 0) { 256 error = EINVAL; 257 goto done3; 258 } 259 } 260 mtx_lock(&Giant); 261 error = fgetsock(td, uap->s, &head, &fflag); 262 if (error) 263 goto done2; 264 s = splnet(); 265 if ((head->so_options & SO_ACCEPTCONN) == 0) { 266 splx(s); 267 error = EINVAL; 268 goto done; 269 } 270 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 271 if (head->so_state & SS_CANTRCVMORE) { 272 head->so_error = ECONNABORTED; 273 break; 274 } 275 if ((head->so_state & SS_NBIO) != 0) { 276 head->so_error = EWOULDBLOCK; 277 break; 278 } 279 error = tsleep(&head->so_timeo, PSOCK | PCATCH, 280 "accept", 0); 281 if (error) { 282 splx(s); 283 goto done; 284 } 285 } 286 if (head->so_error) { 287 error = head->so_error; 288 head->so_error = 0; 289 splx(s); 290 goto done; 291 } 292 293 /* 294 * At this point we know that there is at least one connection 295 * ready to be accepted. Remove it from the queue prior to 296 * allocating the file descriptor for it since falloc() may 297 * block allowing another process to accept the connection 298 * instead. 299 */ 300 so = TAILQ_FIRST(&head->so_comp); 301 TAILQ_REMOVE(&head->so_comp, so, so_list); 302 head->so_qlen--; 303 304 error = falloc(td, &nfp, &fd); 305 if (error) { 306 /* 307 * Probably ran out of file descriptors. Put the 308 * unaccepted connection back onto the queue and 309 * do another wakeup so some other process might 310 * have a chance at it. 311 */ 312 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 313 head->so_qlen++; 314 wakeup_one(&head->so_timeo); 315 splx(s); 316 goto done; 317 } 318 fhold(nfp); 319 td->td_retval[0] = fd; 320 321 /* connection has been removed from the listen queue */ 322 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 323 324 so->so_state &= ~SS_COMP; 325 so->so_head = NULL; 326 pgid = fgetown(&head->so_sigio); 327 if (pgid != 0) 328 fsetown(pgid, &so->so_sigio); 329 330 FILE_LOCK(nfp); 331 soref(so); /* file descriptor reference */ 332 nfp->f_data = so; /* nfp has ref count from falloc */ 333 nfp->f_flag = fflag; 334 nfp->f_ops = &socketops; 335 nfp->f_type = DTYPE_SOCKET; 336 FILE_UNLOCK(nfp); 337 /* Sync socket nonblocking/async state with file flags */ 338 tmp = fflag & FNONBLOCK; 339 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 340 tmp = fflag & FASYNC; 341 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 342 sa = 0; 343 error = soaccept(so, &sa); 344 if (error) { 345 /* 346 * return a namelen of zero for older code which might 347 * ignore the return value from accept. 348 */ 349 if (uap->name != NULL) { 350 namelen = 0; 351 (void) copyout(&namelen, 352 uap->anamelen, sizeof(*uap->anamelen)); 353 } 354 goto noconnection; 355 } 356 if (sa == NULL) { 357 namelen = 0; 358 if (uap->name) 359 goto gotnoname; 360 splx(s); 361 error = 0; 362 goto done; 363 } 364 if (uap->name) { 365 /* check sa_len before it is destroyed */ 366 if (namelen > sa->sa_len) 367 namelen = sa->sa_len; 368 #ifdef COMPAT_OLDSOCK 369 if (compat) 370 ((struct osockaddr *)sa)->sa_family = 371 sa->sa_family; 372 #endif 373 error = copyout(sa, uap->name, (u_int)namelen); 374 if (!error) 375 gotnoname: 376 error = copyout(&namelen, 377 uap->anamelen, sizeof (*uap->anamelen)); 378 } 379 noconnection: 380 if (sa) 381 FREE(sa, M_SONAME); 382 383 /* 384 * close the new descriptor, assuming someone hasn't ripped it 385 * out from under us. 386 */ 387 if (error) { 388 FILEDESC_LOCK(fdp); 389 if (fdp->fd_ofiles[fd] == nfp) { 390 fdp->fd_ofiles[fd] = NULL; 391 FILEDESC_UNLOCK(fdp); 392 fdrop(nfp, td); 393 } else { 394 FILEDESC_UNLOCK(fdp); 395 } 396 } 397 splx(s); 398 399 /* 400 * Release explicitly held references before returning. 401 */ 402 done: 403 if (nfp != NULL) 404 fdrop(nfp, td); 405 fputsock(head); 406 done2: 407 mtx_unlock(&Giant); 408 done3: 409 return (error); 410 } 411 412 /* 413 * MPSAFE (accept1() is MPSAFE) 414 */ 415 int 416 accept(td, uap) 417 struct thread *td; 418 struct accept_args *uap; 419 { 420 421 return (accept1(td, uap, 0)); 422 } 423 424 #ifdef COMPAT_OLDSOCK 425 /* 426 * MPSAFE (accept1() is MPSAFE) 427 */ 428 int 429 oaccept(td, uap) 430 struct thread *td; 431 struct accept_args *uap; 432 { 433 434 return (accept1(td, uap, 1)); 435 } 436 #endif /* COMPAT_OLDSOCK */ 437 438 /* 439 * MPSAFE 440 */ 441 /* ARGSUSED */ 442 int 443 connect(td, uap) 444 struct thread *td; 445 register struct connect_args /* { 446 int s; 447 caddr_t name; 448 int namelen; 449 } */ *uap; 450 { 451 struct sockaddr *sa; 452 int error; 453 454 error = getsockaddr(&sa, uap->name, uap->namelen); 455 if (error) 456 return error; 457 458 return (kern_connect(td, uap->s, sa)); 459 } 460 461 462 int 463 kern_connect(td, fd, sa) 464 struct thread *td; 465 int fd; 466 struct sockaddr *sa; 467 { 468 struct socket *so; 469 int error, s; 470 int interrupted = 0; 471 472 mtx_lock(&Giant); 473 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 474 goto done2; 475 if (so->so_state & SS_ISCONNECTING) { 476 error = EALREADY; 477 goto done1; 478 } 479 #ifdef MAC 480 error = mac_check_socket_connect(td->td_ucred, so, sa); 481 if (error) 482 goto bad; 483 #endif 484 error = soconnect(so, sa, td); 485 if (error) 486 goto bad; 487 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 488 error = EINPROGRESS; 489 goto done1; 490 } 491 s = splnet(); 492 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 493 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0); 494 if (error) { 495 if (error == EINTR || error == ERESTART) 496 interrupted = 1; 497 break; 498 } 499 } 500 if (error == 0) { 501 error = so->so_error; 502 so->so_error = 0; 503 } 504 splx(s); 505 bad: 506 if (!interrupted) 507 so->so_state &= ~SS_ISCONNECTING; 508 if (error == ERESTART) 509 error = EINTR; 510 done1: 511 fputsock(so); 512 done2: 513 mtx_unlock(&Giant); 514 FREE(sa, M_SONAME); 515 return (error); 516 } 517 518 /* 519 * MPSAFE 520 */ 521 int 522 socketpair(td, uap) 523 struct thread *td; 524 register struct socketpair_args /* { 525 int domain; 526 int type; 527 int protocol; 528 int *rsv; 529 } */ *uap; 530 { 531 register struct filedesc *fdp = td->td_proc->p_fd; 532 struct file *fp1, *fp2; 533 struct socket *so1, *so2; 534 int fd, error, sv[2]; 535 536 mtx_lock(&Giant); 537 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 538 td->td_ucred, td); 539 if (error) 540 goto done2; 541 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 542 td->td_ucred, td); 543 if (error) 544 goto free1; 545 error = falloc(td, &fp1, &fd); 546 if (error) 547 goto free2; 548 fhold(fp1); 549 sv[0] = fd; 550 fp1->f_data = so1; /* so1 already has ref count */ 551 error = falloc(td, &fp2, &fd); 552 if (error) 553 goto free3; 554 fhold(fp2); 555 fp2->f_data = so2; /* so2 already has ref count */ 556 sv[1] = fd; 557 error = soconnect2(so1, so2); 558 if (error) 559 goto free4; 560 if (uap->type == SOCK_DGRAM) { 561 /* 562 * Datagram socket connection is asymmetric. 563 */ 564 error = soconnect2(so2, so1); 565 if (error) 566 goto free4; 567 } 568 FILE_LOCK(fp1); 569 fp1->f_flag = FREAD|FWRITE; 570 fp1->f_ops = &socketops; 571 fp1->f_type = DTYPE_SOCKET; 572 FILE_UNLOCK(fp1); 573 FILE_LOCK(fp2); 574 fp2->f_flag = FREAD|FWRITE; 575 fp2->f_ops = &socketops; 576 fp2->f_type = DTYPE_SOCKET; 577 FILE_UNLOCK(fp2); 578 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 579 fdrop(fp1, td); 580 fdrop(fp2, td); 581 goto done2; 582 free4: 583 FILEDESC_LOCK(fdp); 584 if (fdp->fd_ofiles[sv[1]] == fp2) { 585 fdp->fd_ofiles[sv[1]] = NULL; 586 FILEDESC_UNLOCK(fdp); 587 fdrop(fp2, td); 588 } else 589 FILEDESC_UNLOCK(fdp); 590 fdrop(fp2, td); 591 free3: 592 FILEDESC_LOCK(fdp); 593 if (fdp->fd_ofiles[sv[0]] == fp1) { 594 fdp->fd_ofiles[sv[0]] = NULL; 595 FILEDESC_UNLOCK(fdp); 596 fdrop(fp1, td); 597 } else 598 FILEDESC_UNLOCK(fdp); 599 fdrop(fp1, td); 600 free2: 601 (void)soclose(so2); 602 free1: 603 (void)soclose(so1); 604 done2: 605 mtx_unlock(&Giant); 606 return (error); 607 } 608 609 static int 610 sendit(td, s, mp, flags) 611 register struct thread *td; 612 int s; 613 register struct msghdr *mp; 614 int flags; 615 { 616 struct mbuf *control; 617 struct sockaddr *to; 618 int error; 619 620 if (mp->msg_name != NULL) { 621 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 622 if (error) { 623 to = NULL; 624 goto bad; 625 } 626 mp->msg_name = to; 627 } else 628 to = NULL; 629 630 if (mp->msg_control) { 631 if (mp->msg_controllen < sizeof(struct cmsghdr) 632 #ifdef COMPAT_OLDSOCK 633 && mp->msg_flags != MSG_COMPAT 634 #endif 635 ) { 636 error = EINVAL; 637 goto bad; 638 } 639 error = sockargs(&control, mp->msg_control, 640 mp->msg_controllen, MT_CONTROL); 641 if (error) 642 goto bad; 643 #ifdef COMPAT_OLDSOCK 644 if (mp->msg_flags == MSG_COMPAT) { 645 register struct cmsghdr *cm; 646 647 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 648 if (control == 0) { 649 error = ENOBUFS; 650 goto bad; 651 } else { 652 cm = mtod(control, struct cmsghdr *); 653 cm->cmsg_len = control->m_len; 654 cm->cmsg_level = SOL_SOCKET; 655 cm->cmsg_type = SCM_RIGHTS; 656 } 657 } 658 #endif 659 } else { 660 control = NULL; 661 } 662 663 error = kern_sendit(td, s, mp, flags, control); 664 665 bad: 666 if (to) 667 FREE(to, M_SONAME); 668 return (error); 669 } 670 671 int 672 kern_sendit(td, s, mp, flags, control) 673 struct thread *td; 674 int s; 675 struct msghdr *mp; 676 int flags; 677 struct mbuf *control; 678 { 679 struct uio auio; 680 struct iovec *iov; 681 struct socket *so; 682 int i; 683 int len, error; 684 #ifdef KTRACE 685 struct iovec *ktriov = NULL; 686 struct uio ktruio; 687 int iovlen; 688 #endif 689 690 mtx_lock(&Giant); 691 if ((error = fgetsock(td, s, &so, NULL)) != 0) 692 goto bad2; 693 694 #ifdef MAC 695 error = mac_check_socket_send(td->td_ucred, so); 696 if (error) 697 goto bad; 698 #endif 699 700 auio.uio_iov = mp->msg_iov; 701 auio.uio_iovcnt = mp->msg_iovlen; 702 auio.uio_segflg = UIO_USERSPACE; 703 auio.uio_rw = UIO_WRITE; 704 auio.uio_td = td; 705 auio.uio_offset = 0; /* XXX */ 706 auio.uio_resid = 0; 707 iov = mp->msg_iov; 708 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 709 if ((auio.uio_resid += iov->iov_len) < 0) { 710 error = EINVAL; 711 goto bad; 712 } 713 } 714 #ifdef KTRACE 715 if (KTRPOINT(td, KTR_GENIO)) { 716 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 717 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 718 bcopy(auio.uio_iov, ktriov, iovlen); 719 ktruio = auio; 720 } 721 #endif 722 len = auio.uio_resid; 723 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 724 0, control, flags, td); 725 if (error) { 726 if (auio.uio_resid != len && (error == ERESTART || 727 error == EINTR || error == EWOULDBLOCK)) 728 error = 0; 729 /* Generation of SIGPIPE can be controlled per socket */ 730 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 731 PROC_LOCK(td->td_proc); 732 psignal(td->td_proc, SIGPIPE); 733 PROC_UNLOCK(td->td_proc); 734 } 735 } 736 if (error == 0) 737 td->td_retval[0] = len - auio.uio_resid; 738 #ifdef KTRACE 739 if (ktriov != NULL) { 740 if (error == 0) { 741 ktruio.uio_iov = ktriov; 742 ktruio.uio_resid = td->td_retval[0]; 743 ktrgenio(s, UIO_WRITE, &ktruio, error); 744 } 745 FREE(ktriov, M_TEMP); 746 } 747 #endif 748 bad: 749 fputsock(so); 750 bad2: 751 mtx_unlock(&Giant); 752 return (error); 753 } 754 755 /* 756 * MPSAFE 757 */ 758 int 759 sendto(td, uap) 760 struct thread *td; 761 register struct sendto_args /* { 762 int s; 763 caddr_t buf; 764 size_t len; 765 int flags; 766 caddr_t to; 767 int tolen; 768 } */ *uap; 769 { 770 struct msghdr msg; 771 struct iovec aiov; 772 int error; 773 774 msg.msg_name = uap->to; 775 msg.msg_namelen = uap->tolen; 776 msg.msg_iov = &aiov; 777 msg.msg_iovlen = 1; 778 msg.msg_control = 0; 779 #ifdef COMPAT_OLDSOCK 780 msg.msg_flags = 0; 781 #endif 782 aiov.iov_base = uap->buf; 783 aiov.iov_len = uap->len; 784 error = sendit(td, uap->s, &msg, uap->flags); 785 return (error); 786 } 787 788 #ifdef COMPAT_OLDSOCK 789 /* 790 * MPSAFE 791 */ 792 int 793 osend(td, uap) 794 struct thread *td; 795 register struct osend_args /* { 796 int s; 797 caddr_t buf; 798 int len; 799 int flags; 800 } */ *uap; 801 { 802 struct msghdr msg; 803 struct iovec aiov; 804 int error; 805 806 msg.msg_name = 0; 807 msg.msg_namelen = 0; 808 msg.msg_iov = &aiov; 809 msg.msg_iovlen = 1; 810 aiov.iov_base = uap->buf; 811 aiov.iov_len = uap->len; 812 msg.msg_control = 0; 813 msg.msg_flags = 0; 814 error = sendit(td, uap->s, &msg, uap->flags); 815 return (error); 816 } 817 818 /* 819 * MPSAFE 820 */ 821 int 822 osendmsg(td, uap) 823 struct thread *td; 824 register struct osendmsg_args /* { 825 int s; 826 caddr_t msg; 827 int flags; 828 } */ *uap; 829 { 830 struct msghdr msg; 831 struct iovec aiov[UIO_SMALLIOV], *iov; 832 int error; 833 834 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 835 if (error) 836 goto done2; 837 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 838 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 839 error = EMSGSIZE; 840 goto done2; 841 } 842 MALLOC(iov, struct iovec *, 843 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 844 M_WAITOK); 845 } else { 846 iov = aiov; 847 } 848 error = copyin(msg.msg_iov, iov, 849 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 850 if (error) 851 goto done; 852 msg.msg_flags = MSG_COMPAT; 853 msg.msg_iov = iov; 854 error = sendit(td, uap->s, &msg, uap->flags); 855 done: 856 if (iov != aiov) 857 FREE(iov, M_IOV); 858 done2: 859 return (error); 860 } 861 #endif 862 863 /* 864 * MPSAFE 865 */ 866 int 867 sendmsg(td, uap) 868 struct thread *td; 869 register struct sendmsg_args /* { 870 int s; 871 caddr_t msg; 872 int flags; 873 } */ *uap; 874 { 875 struct msghdr msg; 876 struct iovec aiov[UIO_SMALLIOV], *iov; 877 int error; 878 879 error = copyin(uap->msg, &msg, sizeof (msg)); 880 if (error) 881 goto done2; 882 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 883 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 884 error = EMSGSIZE; 885 goto done2; 886 } 887 MALLOC(iov, struct iovec *, 888 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 889 M_WAITOK); 890 } else { 891 iov = aiov; 892 } 893 if (msg.msg_iovlen && 894 (error = copyin(msg.msg_iov, iov, 895 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 896 goto done; 897 msg.msg_iov = iov; 898 #ifdef COMPAT_OLDSOCK 899 msg.msg_flags = 0; 900 #endif 901 error = sendit(td, uap->s, &msg, uap->flags); 902 done: 903 if (iov != aiov) 904 FREE(iov, M_IOV); 905 done2: 906 return (error); 907 } 908 909 static int 910 recvit(td, s, mp, namelenp) 911 register struct thread *td; 912 int s; 913 register struct msghdr *mp; 914 void *namelenp; 915 { 916 struct uio auio; 917 register struct iovec *iov; 918 register int i; 919 int len, error; 920 struct mbuf *m, *control = 0; 921 caddr_t ctlbuf; 922 struct socket *so; 923 struct sockaddr *fromsa = 0; 924 #ifdef KTRACE 925 struct iovec *ktriov = NULL; 926 struct uio ktruio; 927 int iovlen; 928 #endif 929 930 mtx_lock(&Giant); 931 if ((error = fgetsock(td, s, &so, NULL)) != 0) { 932 mtx_unlock(&Giant); 933 return (error); 934 } 935 936 #ifdef MAC 937 error = mac_check_socket_receive(td->td_ucred, so); 938 if (error) { 939 fputsock(so); 940 mtx_unlock(&Giant); 941 return (error); 942 } 943 #endif 944 945 auio.uio_iov = mp->msg_iov; 946 auio.uio_iovcnt = mp->msg_iovlen; 947 auio.uio_segflg = UIO_USERSPACE; 948 auio.uio_rw = UIO_READ; 949 auio.uio_td = td; 950 auio.uio_offset = 0; /* XXX */ 951 auio.uio_resid = 0; 952 iov = mp->msg_iov; 953 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 954 if ((auio.uio_resid += iov->iov_len) < 0) { 955 fputsock(so); 956 return (EINVAL); 957 } 958 } 959 #ifdef KTRACE 960 if (KTRPOINT(td, KTR_GENIO)) { 961 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 962 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 963 bcopy(auio.uio_iov, ktriov, iovlen); 964 ktruio = auio; 965 } 966 #endif 967 len = auio.uio_resid; 968 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 969 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 970 &mp->msg_flags); 971 if (error) { 972 if (auio.uio_resid != len && (error == ERESTART || 973 error == EINTR || error == EWOULDBLOCK)) 974 error = 0; 975 } 976 #ifdef KTRACE 977 if (ktriov != NULL) { 978 if (error == 0) { 979 ktruio.uio_iov = ktriov; 980 ktruio.uio_resid = len - auio.uio_resid; 981 ktrgenio(s, UIO_READ, &ktruio, error); 982 } 983 FREE(ktriov, M_TEMP); 984 } 985 #endif 986 if (error) 987 goto out; 988 td->td_retval[0] = len - auio.uio_resid; 989 if (mp->msg_name) { 990 len = mp->msg_namelen; 991 if (len <= 0 || fromsa == 0) 992 len = 0; 993 else { 994 /* save sa_len before it is destroyed by MSG_COMPAT */ 995 len = MIN(len, fromsa->sa_len); 996 #ifdef COMPAT_OLDSOCK 997 if (mp->msg_flags & MSG_COMPAT) 998 ((struct osockaddr *)fromsa)->sa_family = 999 fromsa->sa_family; 1000 #endif 1001 error = copyout(fromsa, mp->msg_name, (unsigned)len); 1002 if (error) 1003 goto out; 1004 } 1005 mp->msg_namelen = len; 1006 if (namelenp && 1007 (error = copyout(&len, namelenp, sizeof (int)))) { 1008 #ifdef COMPAT_OLDSOCK 1009 if (mp->msg_flags & MSG_COMPAT) 1010 error = 0; /* old recvfrom didn't check */ 1011 else 1012 #endif 1013 goto out; 1014 } 1015 } 1016 if (mp->msg_control) { 1017 #ifdef COMPAT_OLDSOCK 1018 /* 1019 * We assume that old recvmsg calls won't receive access 1020 * rights and other control info, esp. as control info 1021 * is always optional and those options didn't exist in 4.3. 1022 * If we receive rights, trim the cmsghdr; anything else 1023 * is tossed. 1024 */ 1025 if (control && mp->msg_flags & MSG_COMPAT) { 1026 if (mtod(control, struct cmsghdr *)->cmsg_level != 1027 SOL_SOCKET || 1028 mtod(control, struct cmsghdr *)->cmsg_type != 1029 SCM_RIGHTS) { 1030 mp->msg_controllen = 0; 1031 goto out; 1032 } 1033 control->m_len -= sizeof (struct cmsghdr); 1034 control->m_data += sizeof (struct cmsghdr); 1035 } 1036 #endif 1037 len = mp->msg_controllen; 1038 m = control; 1039 mp->msg_controllen = 0; 1040 ctlbuf = mp->msg_control; 1041 1042 while (m && len > 0) { 1043 unsigned int tocopy; 1044 1045 if (len >= m->m_len) 1046 tocopy = m->m_len; 1047 else { 1048 mp->msg_flags |= MSG_CTRUNC; 1049 tocopy = len; 1050 } 1051 1052 if ((error = copyout(mtod(m, caddr_t), 1053 ctlbuf, tocopy)) != 0) 1054 goto out; 1055 1056 ctlbuf += tocopy; 1057 len -= tocopy; 1058 m = m->m_next; 1059 } 1060 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1061 } 1062 out: 1063 fputsock(so); 1064 mtx_unlock(&Giant); 1065 if (fromsa) 1066 FREE(fromsa, M_SONAME); 1067 if (control) 1068 m_freem(control); 1069 return (error); 1070 } 1071 1072 /* 1073 * MPSAFE 1074 */ 1075 int 1076 recvfrom(td, uap) 1077 struct thread *td; 1078 register struct recvfrom_args /* { 1079 int s; 1080 caddr_t buf; 1081 size_t len; 1082 int flags; 1083 caddr_t from; 1084 int *fromlenaddr; 1085 } */ *uap; 1086 { 1087 struct msghdr msg; 1088 struct iovec aiov; 1089 int error; 1090 1091 if (uap->fromlenaddr) { 1092 error = copyin(uap->fromlenaddr, 1093 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1094 if (error) 1095 goto done2; 1096 } else { 1097 msg.msg_namelen = 0; 1098 } 1099 msg.msg_name = uap->from; 1100 msg.msg_iov = &aiov; 1101 msg.msg_iovlen = 1; 1102 aiov.iov_base = uap->buf; 1103 aiov.iov_len = uap->len; 1104 msg.msg_control = 0; 1105 msg.msg_flags = uap->flags; 1106 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1107 done2: 1108 return(error); 1109 } 1110 1111 #ifdef COMPAT_OLDSOCK 1112 /* 1113 * MPSAFE 1114 */ 1115 int 1116 orecvfrom(td, uap) 1117 struct thread *td; 1118 struct recvfrom_args *uap; 1119 { 1120 1121 uap->flags |= MSG_COMPAT; 1122 return (recvfrom(td, uap)); 1123 } 1124 #endif 1125 1126 1127 #ifdef COMPAT_OLDSOCK 1128 /* 1129 * MPSAFE 1130 */ 1131 int 1132 orecv(td, uap) 1133 struct thread *td; 1134 register struct orecv_args /* { 1135 int s; 1136 caddr_t buf; 1137 int len; 1138 int flags; 1139 } */ *uap; 1140 { 1141 struct msghdr msg; 1142 struct iovec aiov; 1143 int error; 1144 1145 msg.msg_name = 0; 1146 msg.msg_namelen = 0; 1147 msg.msg_iov = &aiov; 1148 msg.msg_iovlen = 1; 1149 aiov.iov_base = uap->buf; 1150 aiov.iov_len = uap->len; 1151 msg.msg_control = 0; 1152 msg.msg_flags = uap->flags; 1153 error = recvit(td, uap->s, &msg, NULL); 1154 return (error); 1155 } 1156 1157 /* 1158 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1159 * overlays the new one, missing only the flags, and with the (old) access 1160 * rights where the control fields are now. 1161 * 1162 * MPSAFE 1163 */ 1164 int 1165 orecvmsg(td, uap) 1166 struct thread *td; 1167 register struct orecvmsg_args /* { 1168 int s; 1169 struct omsghdr *msg; 1170 int flags; 1171 } */ *uap; 1172 { 1173 struct msghdr msg; 1174 struct iovec aiov[UIO_SMALLIOV], *iov; 1175 int error; 1176 1177 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1178 if (error) 1179 return (error); 1180 1181 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1182 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1183 error = EMSGSIZE; 1184 goto done2; 1185 } 1186 MALLOC(iov, struct iovec *, 1187 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1188 M_WAITOK); 1189 } else { 1190 iov = aiov; 1191 } 1192 msg.msg_flags = uap->flags | MSG_COMPAT; 1193 error = copyin(msg.msg_iov, iov, 1194 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1195 if (error) 1196 goto done; 1197 msg.msg_iov = iov; 1198 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1199 1200 if (msg.msg_controllen && error == 0) 1201 error = copyout(&msg.msg_controllen, 1202 &uap->msg->msg_accrightslen, sizeof (int)); 1203 done: 1204 if (iov != aiov) 1205 FREE(iov, M_IOV); 1206 done2: 1207 return (error); 1208 } 1209 #endif 1210 1211 /* 1212 * MPSAFE 1213 */ 1214 int 1215 recvmsg(td, uap) 1216 struct thread *td; 1217 register struct recvmsg_args /* { 1218 int s; 1219 struct msghdr *msg; 1220 int flags; 1221 } */ *uap; 1222 { 1223 struct msghdr msg; 1224 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1225 register int error; 1226 1227 error = copyin(uap->msg, &msg, sizeof (msg)); 1228 if (error) 1229 goto done2; 1230 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1231 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1232 error = EMSGSIZE; 1233 goto done2; 1234 } 1235 MALLOC(iov, struct iovec *, 1236 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1237 M_WAITOK); 1238 } else { 1239 iov = aiov; 1240 } 1241 #ifdef COMPAT_OLDSOCK 1242 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1243 #else 1244 msg.msg_flags = uap->flags; 1245 #endif 1246 uiov = msg.msg_iov; 1247 msg.msg_iov = iov; 1248 error = copyin(uiov, iov, 1249 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1250 if (error) 1251 goto done; 1252 error = recvit(td, uap->s, &msg, NULL); 1253 if (!error) { 1254 msg.msg_iov = uiov; 1255 error = copyout(&msg, uap->msg, sizeof(msg)); 1256 } 1257 done: 1258 if (iov != aiov) 1259 FREE(iov, M_IOV); 1260 done2: 1261 return (error); 1262 } 1263 1264 /* 1265 * MPSAFE 1266 */ 1267 /* ARGSUSED */ 1268 int 1269 shutdown(td, uap) 1270 struct thread *td; 1271 register struct shutdown_args /* { 1272 int s; 1273 int how; 1274 } */ *uap; 1275 { 1276 struct socket *so; 1277 int error; 1278 1279 mtx_lock(&Giant); 1280 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1281 error = soshutdown(so, uap->how); 1282 fputsock(so); 1283 } 1284 mtx_unlock(&Giant); 1285 return(error); 1286 } 1287 1288 /* 1289 * MPSAFE 1290 */ 1291 /* ARGSUSED */ 1292 int 1293 setsockopt(td, uap) 1294 struct thread *td; 1295 register struct setsockopt_args /* { 1296 int s; 1297 int level; 1298 int name; 1299 caddr_t val; 1300 int valsize; 1301 } */ *uap; 1302 { 1303 struct socket *so; 1304 struct sockopt sopt; 1305 int error; 1306 1307 if (uap->val == 0 && uap->valsize != 0) 1308 return (EFAULT); 1309 if (uap->valsize < 0) 1310 return (EINVAL); 1311 1312 mtx_lock(&Giant); 1313 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1314 sopt.sopt_dir = SOPT_SET; 1315 sopt.sopt_level = uap->level; 1316 sopt.sopt_name = uap->name; 1317 sopt.sopt_val = uap->val; 1318 sopt.sopt_valsize = uap->valsize; 1319 sopt.sopt_td = td; 1320 error = sosetopt(so, &sopt); 1321 fputsock(so); 1322 } 1323 mtx_unlock(&Giant); 1324 return(error); 1325 } 1326 1327 /* 1328 * MPSAFE 1329 */ 1330 /* ARGSUSED */ 1331 int 1332 getsockopt(td, uap) 1333 struct thread *td; 1334 register struct getsockopt_args /* { 1335 int s; 1336 int level; 1337 int name; 1338 caddr_t val; 1339 int *avalsize; 1340 } */ *uap; 1341 { 1342 int valsize, error; 1343 struct socket *so; 1344 struct sockopt sopt; 1345 1346 mtx_lock(&Giant); 1347 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1348 goto done2; 1349 if (uap->val) { 1350 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1351 if (error) 1352 goto done1; 1353 if (valsize < 0) { 1354 error = EINVAL; 1355 goto done1; 1356 } 1357 } else { 1358 valsize = 0; 1359 } 1360 1361 sopt.sopt_dir = SOPT_GET; 1362 sopt.sopt_level = uap->level; 1363 sopt.sopt_name = uap->name; 1364 sopt.sopt_val = uap->val; 1365 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1366 sopt.sopt_td = td; 1367 1368 error = sogetopt(so, &sopt); 1369 if (error == 0) { 1370 valsize = sopt.sopt_valsize; 1371 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1372 } 1373 done1: 1374 fputsock(so); 1375 done2: 1376 mtx_unlock(&Giant); 1377 return (error); 1378 } 1379 1380 /* 1381 * getsockname1() - Get socket name. 1382 * 1383 * MPSAFE 1384 */ 1385 /* ARGSUSED */ 1386 static int 1387 getsockname1(td, uap, compat) 1388 struct thread *td; 1389 register struct getsockname_args /* { 1390 int fdes; 1391 caddr_t asa; 1392 int *alen; 1393 } */ *uap; 1394 int compat; 1395 { 1396 struct socket *so; 1397 struct sockaddr *sa; 1398 int len, error; 1399 1400 mtx_lock(&Giant); 1401 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1402 goto done2; 1403 error = copyin(uap->alen, &len, sizeof (len)); 1404 if (error) 1405 goto done1; 1406 if (len < 0) { 1407 error = EINVAL; 1408 goto done1; 1409 } 1410 sa = 0; 1411 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1412 if (error) 1413 goto bad; 1414 if (sa == 0) { 1415 len = 0; 1416 goto gotnothing; 1417 } 1418 1419 len = MIN(len, sa->sa_len); 1420 #ifdef COMPAT_OLDSOCK 1421 if (compat) 1422 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1423 #endif 1424 error = copyout(sa, uap->asa, (u_int)len); 1425 if (error == 0) 1426 gotnothing: 1427 error = copyout(&len, uap->alen, sizeof (len)); 1428 bad: 1429 if (sa) 1430 FREE(sa, M_SONAME); 1431 done1: 1432 fputsock(so); 1433 done2: 1434 mtx_unlock(&Giant); 1435 return (error); 1436 } 1437 1438 /* 1439 * MPSAFE 1440 */ 1441 int 1442 getsockname(td, uap) 1443 struct thread *td; 1444 struct getsockname_args *uap; 1445 { 1446 1447 return (getsockname1(td, uap, 0)); 1448 } 1449 1450 #ifdef COMPAT_OLDSOCK 1451 /* 1452 * MPSAFE 1453 */ 1454 int 1455 ogetsockname(td, uap) 1456 struct thread *td; 1457 struct getsockname_args *uap; 1458 { 1459 1460 return (getsockname1(td, uap, 1)); 1461 } 1462 #endif /* COMPAT_OLDSOCK */ 1463 1464 /* 1465 * getpeername1() - Get name of peer for connected socket. 1466 * 1467 * MPSAFE 1468 */ 1469 /* ARGSUSED */ 1470 static int 1471 getpeername1(td, uap, compat) 1472 struct thread *td; 1473 register struct getpeername_args /* { 1474 int fdes; 1475 caddr_t asa; 1476 int *alen; 1477 } */ *uap; 1478 int compat; 1479 { 1480 struct socket *so; 1481 struct sockaddr *sa; 1482 int len, error; 1483 1484 mtx_lock(&Giant); 1485 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1486 goto done2; 1487 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1488 error = ENOTCONN; 1489 goto done1; 1490 } 1491 error = copyin(uap->alen, &len, sizeof (len)); 1492 if (error) 1493 goto done1; 1494 if (len < 0) { 1495 error = EINVAL; 1496 goto done1; 1497 } 1498 sa = 0; 1499 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1500 if (error) 1501 goto bad; 1502 if (sa == 0) { 1503 len = 0; 1504 goto gotnothing; 1505 } 1506 len = MIN(len, sa->sa_len); 1507 #ifdef COMPAT_OLDSOCK 1508 if (compat) 1509 ((struct osockaddr *)sa)->sa_family = 1510 sa->sa_family; 1511 #endif 1512 error = copyout(sa, uap->asa, (u_int)len); 1513 if (error) 1514 goto bad; 1515 gotnothing: 1516 error = copyout(&len, uap->alen, sizeof (len)); 1517 bad: 1518 if (sa) 1519 FREE(sa, M_SONAME); 1520 done1: 1521 fputsock(so); 1522 done2: 1523 mtx_unlock(&Giant); 1524 return (error); 1525 } 1526 1527 /* 1528 * MPSAFE 1529 */ 1530 int 1531 getpeername(td, uap) 1532 struct thread *td; 1533 struct getpeername_args *uap; 1534 { 1535 1536 return (getpeername1(td, uap, 0)); 1537 } 1538 1539 #ifdef COMPAT_OLDSOCK 1540 /* 1541 * MPSAFE 1542 */ 1543 int 1544 ogetpeername(td, uap) 1545 struct thread *td; 1546 struct ogetpeername_args *uap; 1547 { 1548 1549 /* XXX uap should have type `getpeername_args *' to begin with. */ 1550 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1551 } 1552 #endif /* COMPAT_OLDSOCK */ 1553 1554 int 1555 sockargs(mp, buf, buflen, type) 1556 struct mbuf **mp; 1557 caddr_t buf; 1558 int buflen, type; 1559 { 1560 register struct sockaddr *sa; 1561 register struct mbuf *m; 1562 int error; 1563 1564 if ((u_int)buflen > MLEN) { 1565 #ifdef COMPAT_OLDSOCK 1566 if (type == MT_SONAME && (u_int)buflen <= 112) 1567 buflen = MLEN; /* unix domain compat. hack */ 1568 else 1569 #endif 1570 return (EINVAL); 1571 } 1572 m = m_get(M_TRYWAIT, type); 1573 if (m == NULL) 1574 return (ENOBUFS); 1575 m->m_len = buflen; 1576 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1577 if (error) 1578 (void) m_free(m); 1579 else { 1580 *mp = m; 1581 if (type == MT_SONAME) { 1582 sa = mtod(m, struct sockaddr *); 1583 1584 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1585 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1586 sa->sa_family = sa->sa_len; 1587 #endif 1588 sa->sa_len = buflen; 1589 } 1590 } 1591 return (error); 1592 } 1593 1594 int 1595 getsockaddr(namp, uaddr, len) 1596 struct sockaddr **namp; 1597 caddr_t uaddr; 1598 size_t len; 1599 { 1600 struct sockaddr *sa; 1601 int error; 1602 1603 if (len > SOCK_MAXADDRLEN) 1604 return ENAMETOOLONG; 1605 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1606 error = copyin(uaddr, sa, len); 1607 if (error) { 1608 FREE(sa, M_SONAME); 1609 } else { 1610 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1611 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1612 sa->sa_family = sa->sa_len; 1613 #endif 1614 sa->sa_len = len; 1615 *namp = sa; 1616 } 1617 return error; 1618 } 1619 1620 /* 1621 * sendfile(2) 1622 * 1623 * MPSAFE 1624 * 1625 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1626 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1627 * 1628 * Send a file specified by 'fd' and starting at 'offset' to a socket 1629 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1630 * nbytes == 0. Optionally add a header and/or trailer to the socket 1631 * output. If specified, write the total number of bytes sent into *sbytes. 1632 * 1633 */ 1634 int 1635 sendfile(struct thread *td, struct sendfile_args *uap) 1636 { 1637 1638 return (do_sendfile(td, uap, 0)); 1639 } 1640 1641 #ifdef COMPAT_FREEBSD4 1642 int 1643 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1644 { 1645 struct sendfile_args args; 1646 1647 args.fd = uap->fd; 1648 args.s = uap->s; 1649 args.offset = uap->offset; 1650 args.nbytes = uap->nbytes; 1651 args.hdtr = uap->hdtr; 1652 args.sbytes = uap->sbytes; 1653 args.flags = uap->flags; 1654 1655 return (do_sendfile(td, &args, 1)); 1656 } 1657 #endif /* COMPAT_FREEBSD4 */ 1658 1659 static int 1660 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1661 { 1662 struct vnode *vp; 1663 struct vm_object *obj; 1664 struct socket *so = NULL; 1665 struct mbuf *m; 1666 struct sf_buf *sf; 1667 struct vm_page *pg; 1668 struct writev_args nuap; 1669 struct sf_hdtr hdtr; 1670 off_t off, xfsize, hdtr_size, sbytes = 0; 1671 int error, s; 1672 1673 mtx_lock(&Giant); 1674 1675 hdtr_size = 0; 1676 1677 /* 1678 * The descriptor must be a regular file and have a backing VM object. 1679 */ 1680 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1681 goto done; 1682 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1683 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1684 error = EINVAL; 1685 VOP_UNLOCK(vp, 0, td); 1686 goto done; 1687 } 1688 VOP_UNLOCK(vp, 0, td); 1689 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1690 goto done; 1691 if (so->so_type != SOCK_STREAM) { 1692 error = EINVAL; 1693 goto done; 1694 } 1695 if ((so->so_state & SS_ISCONNECTED) == 0) { 1696 error = ENOTCONN; 1697 goto done; 1698 } 1699 if (uap->offset < 0) { 1700 error = EINVAL; 1701 goto done; 1702 } 1703 1704 #ifdef MAC 1705 error = mac_check_socket_send(td->td_ucred, so); 1706 if (error) 1707 goto done; 1708 #endif 1709 1710 /* 1711 * If specified, get the pointer to the sf_hdtr struct for 1712 * any headers/trailers. 1713 */ 1714 if (uap->hdtr != NULL) { 1715 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1716 if (error) 1717 goto done; 1718 /* 1719 * Send any headers. Wimp out and use writev(2). 1720 */ 1721 if (hdtr.headers != NULL) { 1722 nuap.fd = uap->s; 1723 nuap.iovp = hdtr.headers; 1724 nuap.iovcnt = hdtr.hdr_cnt; 1725 error = writev(td, &nuap); 1726 if (error) 1727 goto done; 1728 if (compat) 1729 sbytes += td->td_retval[0]; 1730 else 1731 hdtr_size += td->td_retval[0]; 1732 } 1733 } 1734 1735 /* 1736 * Protect against multiple writers to the socket. 1737 */ 1738 (void) sblock(&so->so_snd, M_WAITOK); 1739 1740 /* 1741 * Loop through the pages in the file, starting with the requested 1742 * offset. Get a file page (do I/O if necessary), map the file page 1743 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1744 * it on the socket. 1745 */ 1746 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1747 vm_pindex_t pindex; 1748 vm_offset_t pgoff; 1749 1750 pindex = OFF_TO_IDX(off); 1751 VM_OBJECT_LOCK(obj); 1752 retry_lookup: 1753 /* 1754 * Calculate the amount to transfer. Not to exceed a page, 1755 * the EOF, or the passed in nbytes. 1756 */ 1757 xfsize = obj->un_pager.vnp.vnp_size - off; 1758 VM_OBJECT_UNLOCK(obj); 1759 if (xfsize > PAGE_SIZE) 1760 xfsize = PAGE_SIZE; 1761 pgoff = (vm_offset_t)(off & PAGE_MASK); 1762 if (PAGE_SIZE - pgoff < xfsize) 1763 xfsize = PAGE_SIZE - pgoff; 1764 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1765 xfsize = uap->nbytes - sbytes; 1766 if (xfsize <= 0) 1767 break; 1768 /* 1769 * Optimize the non-blocking case by looking at the socket space 1770 * before going to the extra work of constituting the sf_buf. 1771 */ 1772 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1773 if (so->so_state & SS_CANTSENDMORE) 1774 error = EPIPE; 1775 else 1776 error = EAGAIN; 1777 sbunlock(&so->so_snd); 1778 goto done; 1779 } 1780 VM_OBJECT_LOCK(obj); 1781 /* 1782 * Attempt to look up the page. 1783 * 1784 * Allocate if not found 1785 * 1786 * Wait and loop if busy. 1787 */ 1788 pg = vm_page_lookup(obj, pindex); 1789 1790 if (pg == NULL) { 1791 pg = vm_page_alloc(obj, pindex, 1792 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1793 if (pg == NULL) { 1794 VM_OBJECT_UNLOCK(obj); 1795 VM_WAIT; 1796 VM_OBJECT_LOCK(obj); 1797 goto retry_lookup; 1798 } 1799 vm_page_lock_queues(); 1800 vm_page_wakeup(pg); 1801 } else { 1802 vm_page_lock_queues(); 1803 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1804 goto retry_lookup; 1805 /* 1806 * Wire the page so it does not get ripped out from 1807 * under us. 1808 */ 1809 vm_page_wire(pg); 1810 } 1811 1812 /* 1813 * If page is not valid for what we need, initiate I/O 1814 */ 1815 1816 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1817 int bsize, resid; 1818 1819 /* 1820 * Ensure that our page is still around when the I/O 1821 * completes. 1822 */ 1823 vm_page_io_start(pg); 1824 vm_page_unlock_queues(); 1825 VM_OBJECT_UNLOCK(obj); 1826 1827 /* 1828 * Get the page from backing store. 1829 */ 1830 bsize = vp->v_mount->mnt_stat.f_iosize; 1831 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1832 /* 1833 * XXXMAC: Because we don't have fp->f_cred here, 1834 * we pass in NOCRED. This is probably wrong, but 1835 * is consistent with our original implementation. 1836 */ 1837 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1838 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1839 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1840 td->td_ucred, NOCRED, &resid, td); 1841 VOP_UNLOCK(vp, 0, td); 1842 if (error) 1843 VM_OBJECT_LOCK(obj); 1844 vm_page_lock_queues(); 1845 vm_page_flag_clear(pg, PG_ZERO); 1846 vm_page_io_finish(pg); 1847 if (error) { 1848 vm_page_unwire(pg, 0); 1849 /* 1850 * See if anyone else might know about this page. 1851 * If not and it is not valid, then free it. 1852 */ 1853 if (pg->wire_count == 0 && pg->valid == 0 && 1854 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1855 pg->hold_count == 0) { 1856 vm_page_busy(pg); 1857 vm_page_free(pg); 1858 } 1859 vm_page_unlock_queues(); 1860 VM_OBJECT_UNLOCK(obj); 1861 sbunlock(&so->so_snd); 1862 goto done; 1863 } 1864 } else 1865 VM_OBJECT_UNLOCK(obj); 1866 vm_page_unlock_queues(); 1867 1868 /* 1869 * Get a sendfile buf. We usually wait as long as necessary, 1870 * but this wait can be interrupted. 1871 */ 1872 if ((sf = sf_buf_alloc(pg)) == NULL) { 1873 vm_page_lock_queues(); 1874 vm_page_unwire(pg, 0); 1875 if (pg->wire_count == 0 && pg->object == NULL) 1876 vm_page_free(pg); 1877 vm_page_unlock_queues(); 1878 sbunlock(&so->so_snd); 1879 error = EINTR; 1880 goto done; 1881 } 1882 1883 /* 1884 * Get an mbuf header and set it up as having external storage. 1885 */ 1886 MGETHDR(m, M_TRYWAIT, MT_DATA); 1887 if (m == NULL) { 1888 error = ENOBUFS; 1889 sf_buf_free((void *)sf->kva, sf); 1890 sbunlock(&so->so_snd); 1891 goto done; 1892 } 1893 /* 1894 * Setup external storage for mbuf. 1895 */ 1896 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, sf, M_RDONLY, 1897 EXT_SFBUF); 1898 m->m_data = (char *) sf->kva + pgoff; 1899 m->m_pkthdr.len = m->m_len = xfsize; 1900 /* 1901 * Add the buffer to the socket buffer chain. 1902 */ 1903 s = splnet(); 1904 retry_space: 1905 /* 1906 * Make sure that the socket is still able to take more data. 1907 * CANTSENDMORE being true usually means that the connection 1908 * was closed. so_error is true when an error was sensed after 1909 * a previous send. 1910 * The state is checked after the page mapping and buffer 1911 * allocation above since those operations may block and make 1912 * any socket checks stale. From this point forward, nothing 1913 * blocks before the pru_send (or more accurately, any blocking 1914 * results in a loop back to here to re-check). 1915 */ 1916 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1917 if (so->so_state & SS_CANTSENDMORE) { 1918 error = EPIPE; 1919 } else { 1920 error = so->so_error; 1921 so->so_error = 0; 1922 } 1923 m_freem(m); 1924 sbunlock(&so->so_snd); 1925 splx(s); 1926 goto done; 1927 } 1928 /* 1929 * Wait for socket space to become available. We do this just 1930 * after checking the connection state above in order to avoid 1931 * a race condition with sbwait(). 1932 */ 1933 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1934 if (so->so_state & SS_NBIO) { 1935 m_freem(m); 1936 sbunlock(&so->so_snd); 1937 splx(s); 1938 error = EAGAIN; 1939 goto done; 1940 } 1941 error = sbwait(&so->so_snd); 1942 /* 1943 * An error from sbwait usually indicates that we've 1944 * been interrupted by a signal. If we've sent anything 1945 * then return bytes sent, otherwise return the error. 1946 */ 1947 if (error) { 1948 m_freem(m); 1949 sbunlock(&so->so_snd); 1950 splx(s); 1951 goto done; 1952 } 1953 goto retry_space; 1954 } 1955 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 1956 splx(s); 1957 if (error) { 1958 sbunlock(&so->so_snd); 1959 goto done; 1960 } 1961 } 1962 sbunlock(&so->so_snd); 1963 1964 /* 1965 * Send trailers. Wimp out and use writev(2). 1966 */ 1967 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1968 nuap.fd = uap->s; 1969 nuap.iovp = hdtr.trailers; 1970 nuap.iovcnt = hdtr.trl_cnt; 1971 error = writev(td, &nuap); 1972 if (error) 1973 goto done; 1974 if (compat) 1975 sbytes += td->td_retval[0]; 1976 else 1977 hdtr_size += td->td_retval[0]; 1978 } 1979 1980 done: 1981 /* 1982 * If there was no error we have to clear td->td_retval[0] 1983 * because it may have been set by writev. 1984 */ 1985 if (error == 0) { 1986 td->td_retval[0] = 0; 1987 } 1988 if (uap->sbytes != NULL) { 1989 if (!compat) 1990 sbytes += hdtr_size; 1991 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 1992 } 1993 if (vp) 1994 vrele(vp); 1995 if (so) 1996 fputsock(so); 1997 mtx_unlock(&Giant); 1998 return (error); 1999 } 2000