1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_compat.h" 43 #include "opt_ktrace.h" 44 #include "opt_mac.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/lock.h> 50 #include <sys/mac.h> 51 #include <sys/mutex.h> 52 #include <sys/sysproto.h> 53 #include <sys/malloc.h> 54 #include <sys/filedesc.h> 55 #include <sys/event.h> 56 #include <sys/proc.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/filio.h> 60 #include <sys/mount.h> 61 #include <sys/mbuf.h> 62 #include <sys/protosw.h> 63 #include <sys/sf_buf.h> 64 #include <sys/socket.h> 65 #include <sys/socketvar.h> 66 #include <sys/signalvar.h> 67 #include <sys/syscallsubr.h> 68 #include <sys/uio.h> 69 #include <sys/vnode.h> 70 #ifdef KTRACE 71 #include <sys/ktrace.h> 72 #endif 73 74 #include <vm/vm.h> 75 #include <vm/vm_object.h> 76 #include <vm/vm_page.h> 77 #include <vm/vm_pageout.h> 78 #include <vm/vm_kern.h> 79 #include <vm/vm_extern.h> 80 81 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 82 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 83 84 static int accept1(struct thread *td, struct accept_args *uap, int compat); 85 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 86 static int getsockname1(struct thread *td, struct getsockname_args *uap, 87 int compat); 88 static int getpeername1(struct thread *td, struct getpeername_args *uap, 89 int compat); 90 91 /* 92 * System call interface to the socket abstraction. 93 */ 94 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 95 #define COMPAT_OLDSOCK 96 #endif 97 98 /* 99 * MPSAFE 100 */ 101 int 102 socket(td, uap) 103 struct thread *td; 104 register struct socket_args /* { 105 int domain; 106 int type; 107 int protocol; 108 } */ *uap; 109 { 110 struct filedesc *fdp; 111 struct socket *so; 112 struct file *fp; 113 int fd, error; 114 115 mtx_lock(&Giant); 116 fdp = td->td_proc->p_fd; 117 error = falloc(td, &fp, &fd); 118 if (error) 119 goto done2; 120 /* An extra reference on `fp' has been held for us by falloc(). */ 121 error = socreate(uap->domain, &so, uap->type, uap->protocol, 122 td->td_ucred, td); 123 FILEDESC_LOCK(fdp); 124 if (error) { 125 if (fdp->fd_ofiles[fd] == fp) { 126 fdp->fd_ofiles[fd] = NULL; 127 FILEDESC_UNLOCK(fdp); 128 fdrop(fp, td); 129 } else 130 FILEDESC_UNLOCK(fdp); 131 } else { 132 fp->f_data = so; /* already has ref count */ 133 fp->f_flag = FREAD|FWRITE; 134 fp->f_ops = &socketops; 135 fp->f_type = DTYPE_SOCKET; 136 FILEDESC_UNLOCK(fdp); 137 td->td_retval[0] = fd; 138 } 139 fdrop(fp, td); 140 done2: 141 mtx_unlock(&Giant); 142 return (error); 143 } 144 145 /* 146 * MPSAFE 147 */ 148 /* ARGSUSED */ 149 int 150 bind(td, uap) 151 struct thread *td; 152 register struct bind_args /* { 153 int s; 154 caddr_t name; 155 int namelen; 156 } */ *uap; 157 { 158 struct sockaddr *sa; 159 int error; 160 161 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 162 return (error); 163 164 return (kern_bind(td, uap->s, sa)); 165 } 166 167 int 168 kern_bind(td, fd, sa) 169 struct thread *td; 170 int fd; 171 struct sockaddr *sa; 172 { 173 struct socket *so; 174 int error; 175 176 mtx_lock(&Giant); 177 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 178 goto done2; 179 #ifdef MAC 180 error = mac_check_socket_bind(td->td_ucred, so, sa); 181 if (error) 182 goto done1; 183 #endif 184 error = sobind(so, sa, td); 185 #ifdef MAC 186 done1: 187 #endif 188 fputsock(so); 189 done2: 190 mtx_unlock(&Giant); 191 FREE(sa, M_SONAME); 192 return (error); 193 } 194 195 /* 196 * MPSAFE 197 */ 198 /* ARGSUSED */ 199 int 200 listen(td, uap) 201 struct thread *td; 202 register struct listen_args /* { 203 int s; 204 int backlog; 205 } */ *uap; 206 { 207 struct socket *so; 208 int error; 209 210 mtx_lock(&Giant); 211 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 212 #ifdef MAC 213 error = mac_check_socket_listen(td->td_ucred, so); 214 if (error) 215 goto done; 216 #endif 217 error = solisten(so, uap->backlog, td); 218 #ifdef MAC 219 done: 220 #endif 221 fputsock(so); 222 } 223 mtx_unlock(&Giant); 224 return(error); 225 } 226 227 /* 228 * accept1() 229 * MPSAFE 230 */ 231 static int 232 accept1(td, uap, compat) 233 struct thread *td; 234 register struct accept_args /* { 235 int s; 236 caddr_t name; 237 int *anamelen; 238 } */ *uap; 239 int compat; 240 { 241 struct filedesc *fdp; 242 struct file *nfp = NULL; 243 struct sockaddr *sa; 244 int namelen, error, s; 245 struct socket *head, *so; 246 int fd; 247 u_int fflag; 248 pid_t pgid; 249 int tmp; 250 251 fdp = td->td_proc->p_fd; 252 if (uap->name) { 253 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 254 if(error) 255 goto done3; 256 if (namelen < 0) { 257 error = EINVAL; 258 goto done3; 259 } 260 } 261 mtx_lock(&Giant); 262 error = fgetsock(td, uap->s, &head, &fflag); 263 if (error) 264 goto done2; 265 s = splnet(); 266 if ((head->so_options & SO_ACCEPTCONN) == 0) { 267 splx(s); 268 error = EINVAL; 269 goto done; 270 } 271 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 272 if (head->so_state & SS_CANTRCVMORE) { 273 head->so_error = ECONNABORTED; 274 break; 275 } 276 if ((head->so_state & SS_NBIO) != 0) { 277 head->so_error = EWOULDBLOCK; 278 break; 279 } 280 error = tsleep(&head->so_timeo, PSOCK | PCATCH, 281 "accept", 0); 282 if (error) { 283 splx(s); 284 goto done; 285 } 286 } 287 if (head->so_error) { 288 error = head->so_error; 289 head->so_error = 0; 290 splx(s); 291 goto done; 292 } 293 294 /* 295 * At this point we know that there is at least one connection 296 * ready to be accepted. Remove it from the queue prior to 297 * allocating the file descriptor for it since falloc() may 298 * block allowing another process to accept the connection 299 * instead. 300 */ 301 so = TAILQ_FIRST(&head->so_comp); 302 TAILQ_REMOVE(&head->so_comp, so, so_list); 303 head->so_qlen--; 304 305 error = falloc(td, &nfp, &fd); 306 if (error) { 307 /* 308 * Probably ran out of file descriptors. Put the 309 * unaccepted connection back onto the queue and 310 * do another wakeup so some other process might 311 * have a chance at it. 312 */ 313 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 314 head->so_qlen++; 315 wakeup_one(&head->so_timeo); 316 splx(s); 317 goto done; 318 } 319 /* An extra reference on `nfp' has been held for us by falloc(). */ 320 td->td_retval[0] = fd; 321 322 /* connection has been removed from the listen queue */ 323 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 324 325 so->so_state &= ~SS_COMP; 326 so->so_head = NULL; 327 pgid = fgetown(&head->so_sigio); 328 if (pgid != 0) 329 fsetown(pgid, &so->so_sigio); 330 331 FILE_LOCK(nfp); 332 soref(so); /* file descriptor reference */ 333 nfp->f_data = so; /* nfp has ref count from falloc */ 334 nfp->f_flag = fflag; 335 nfp->f_ops = &socketops; 336 nfp->f_type = DTYPE_SOCKET; 337 FILE_UNLOCK(nfp); 338 /* Sync socket nonblocking/async state with file flags */ 339 tmp = fflag & FNONBLOCK; 340 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 341 tmp = fflag & FASYNC; 342 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 343 sa = 0; 344 error = soaccept(so, &sa); 345 if (error) { 346 /* 347 * return a namelen of zero for older code which might 348 * ignore the return value from accept. 349 */ 350 if (uap->name != NULL) { 351 namelen = 0; 352 (void) copyout(&namelen, 353 uap->anamelen, sizeof(*uap->anamelen)); 354 } 355 goto noconnection; 356 } 357 if (sa == NULL) { 358 namelen = 0; 359 if (uap->name) 360 goto gotnoname; 361 splx(s); 362 error = 0; 363 goto done; 364 } 365 if (uap->name) { 366 /* check sa_len before it is destroyed */ 367 if (namelen > sa->sa_len) 368 namelen = sa->sa_len; 369 #ifdef COMPAT_OLDSOCK 370 if (compat) 371 ((struct osockaddr *)sa)->sa_family = 372 sa->sa_family; 373 #endif 374 error = copyout(sa, uap->name, (u_int)namelen); 375 if (!error) 376 gotnoname: 377 error = copyout(&namelen, 378 uap->anamelen, sizeof (*uap->anamelen)); 379 } 380 noconnection: 381 if (sa) 382 FREE(sa, M_SONAME); 383 384 /* 385 * close the new descriptor, assuming someone hasn't ripped it 386 * out from under us. 387 */ 388 if (error) { 389 FILEDESC_LOCK(fdp); 390 if (fdp->fd_ofiles[fd] == nfp) { 391 fdp->fd_ofiles[fd] = NULL; 392 FILEDESC_UNLOCK(fdp); 393 fdrop(nfp, td); 394 } else { 395 FILEDESC_UNLOCK(fdp); 396 } 397 } 398 splx(s); 399 400 /* 401 * Release explicitly held references before returning. 402 */ 403 done: 404 if (nfp != NULL) 405 fdrop(nfp, td); 406 fputsock(head); 407 done2: 408 mtx_unlock(&Giant); 409 done3: 410 return (error); 411 } 412 413 /* 414 * MPSAFE (accept1() is MPSAFE) 415 */ 416 int 417 accept(td, uap) 418 struct thread *td; 419 struct accept_args *uap; 420 { 421 422 return (accept1(td, uap, 0)); 423 } 424 425 #ifdef COMPAT_OLDSOCK 426 /* 427 * MPSAFE (accept1() is MPSAFE) 428 */ 429 int 430 oaccept(td, uap) 431 struct thread *td; 432 struct accept_args *uap; 433 { 434 435 return (accept1(td, uap, 1)); 436 } 437 #endif /* COMPAT_OLDSOCK */ 438 439 /* 440 * MPSAFE 441 */ 442 /* ARGSUSED */ 443 int 444 connect(td, uap) 445 struct thread *td; 446 register struct connect_args /* { 447 int s; 448 caddr_t name; 449 int namelen; 450 } */ *uap; 451 { 452 struct sockaddr *sa; 453 int error; 454 455 error = getsockaddr(&sa, uap->name, uap->namelen); 456 if (error) 457 return error; 458 459 return (kern_connect(td, uap->s, sa)); 460 } 461 462 463 int 464 kern_connect(td, fd, sa) 465 struct thread *td; 466 int fd; 467 struct sockaddr *sa; 468 { 469 struct socket *so; 470 int error, s; 471 int interrupted = 0; 472 473 mtx_lock(&Giant); 474 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 475 goto done2; 476 if (so->so_state & SS_ISCONNECTING) { 477 error = EALREADY; 478 goto done1; 479 } 480 #ifdef MAC 481 error = mac_check_socket_connect(td->td_ucred, so, sa); 482 if (error) 483 goto bad; 484 #endif 485 error = soconnect(so, sa, td); 486 if (error) 487 goto bad; 488 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 489 error = EINPROGRESS; 490 goto done1; 491 } 492 s = splnet(); 493 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 494 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0); 495 if (error) { 496 if (error == EINTR || error == ERESTART) 497 interrupted = 1; 498 break; 499 } 500 } 501 if (error == 0) { 502 error = so->so_error; 503 so->so_error = 0; 504 } 505 splx(s); 506 bad: 507 if (!interrupted) 508 so->so_state &= ~SS_ISCONNECTING; 509 if (error == ERESTART) 510 error = EINTR; 511 done1: 512 fputsock(so); 513 done2: 514 mtx_unlock(&Giant); 515 FREE(sa, M_SONAME); 516 return (error); 517 } 518 519 /* 520 * MPSAFE 521 */ 522 int 523 socketpair(td, uap) 524 struct thread *td; 525 register struct socketpair_args /* { 526 int domain; 527 int type; 528 int protocol; 529 int *rsv; 530 } */ *uap; 531 { 532 register struct filedesc *fdp = td->td_proc->p_fd; 533 struct file *fp1, *fp2; 534 struct socket *so1, *so2; 535 int fd, error, sv[2]; 536 537 mtx_lock(&Giant); 538 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 539 td->td_ucred, td); 540 if (error) 541 goto done2; 542 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 543 td->td_ucred, td); 544 if (error) 545 goto free1; 546 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 547 error = falloc(td, &fp1, &fd); 548 if (error) 549 goto free2; 550 sv[0] = fd; 551 fp1->f_data = so1; /* so1 already has ref count */ 552 error = falloc(td, &fp2, &fd); 553 if (error) 554 goto free3; 555 fp2->f_data = so2; /* so2 already has ref count */ 556 sv[1] = fd; 557 error = soconnect2(so1, so2); 558 if (error) 559 goto free4; 560 if (uap->type == SOCK_DGRAM) { 561 /* 562 * Datagram socket connection is asymmetric. 563 */ 564 error = soconnect2(so2, so1); 565 if (error) 566 goto free4; 567 } 568 FILE_LOCK(fp1); 569 fp1->f_flag = FREAD|FWRITE; 570 fp1->f_ops = &socketops; 571 fp1->f_type = DTYPE_SOCKET; 572 FILE_UNLOCK(fp1); 573 FILE_LOCK(fp2); 574 fp2->f_flag = FREAD|FWRITE; 575 fp2->f_ops = &socketops; 576 fp2->f_type = DTYPE_SOCKET; 577 FILE_UNLOCK(fp2); 578 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 579 fdrop(fp1, td); 580 fdrop(fp2, td); 581 goto done2; 582 free4: 583 FILEDESC_LOCK(fdp); 584 if (fdp->fd_ofiles[sv[1]] == fp2) { 585 fdp->fd_ofiles[sv[1]] = NULL; 586 FILEDESC_UNLOCK(fdp); 587 fdrop(fp2, td); 588 } else 589 FILEDESC_UNLOCK(fdp); 590 fdrop(fp2, td); 591 free3: 592 FILEDESC_LOCK(fdp); 593 if (fdp->fd_ofiles[sv[0]] == fp1) { 594 fdp->fd_ofiles[sv[0]] = NULL; 595 FILEDESC_UNLOCK(fdp); 596 fdrop(fp1, td); 597 } else 598 FILEDESC_UNLOCK(fdp); 599 fdrop(fp1, td); 600 free2: 601 (void)soclose(so2); 602 free1: 603 (void)soclose(so1); 604 done2: 605 mtx_unlock(&Giant); 606 return (error); 607 } 608 609 static int 610 sendit(td, s, mp, flags) 611 register struct thread *td; 612 int s; 613 register struct msghdr *mp; 614 int flags; 615 { 616 struct mbuf *control; 617 struct sockaddr *to; 618 int error; 619 620 if (mp->msg_name != NULL) { 621 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 622 if (error) { 623 to = NULL; 624 goto bad; 625 } 626 mp->msg_name = to; 627 } else 628 to = NULL; 629 630 if (mp->msg_control) { 631 if (mp->msg_controllen < sizeof(struct cmsghdr) 632 #ifdef COMPAT_OLDSOCK 633 && mp->msg_flags != MSG_COMPAT 634 #endif 635 ) { 636 error = EINVAL; 637 goto bad; 638 } 639 error = sockargs(&control, mp->msg_control, 640 mp->msg_controllen, MT_CONTROL); 641 if (error) 642 goto bad; 643 #ifdef COMPAT_OLDSOCK 644 if (mp->msg_flags == MSG_COMPAT) { 645 register struct cmsghdr *cm; 646 647 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 648 if (control == 0) { 649 error = ENOBUFS; 650 goto bad; 651 } else { 652 cm = mtod(control, struct cmsghdr *); 653 cm->cmsg_len = control->m_len; 654 cm->cmsg_level = SOL_SOCKET; 655 cm->cmsg_type = SCM_RIGHTS; 656 } 657 } 658 #endif 659 } else { 660 control = NULL; 661 } 662 663 error = kern_sendit(td, s, mp, flags, control); 664 665 bad: 666 if (to) 667 FREE(to, M_SONAME); 668 return (error); 669 } 670 671 int 672 kern_sendit(td, s, mp, flags, control) 673 struct thread *td; 674 int s; 675 struct msghdr *mp; 676 int flags; 677 struct mbuf *control; 678 { 679 struct uio auio; 680 struct iovec *iov; 681 struct socket *so; 682 int i; 683 int len, error; 684 #ifdef KTRACE 685 struct iovec *ktriov = NULL; 686 struct uio ktruio; 687 int iovlen; 688 #endif 689 690 mtx_lock(&Giant); 691 if ((error = fgetsock(td, s, &so, NULL)) != 0) 692 goto bad2; 693 694 #ifdef MAC 695 error = mac_check_socket_send(td->td_ucred, so); 696 if (error) 697 goto bad; 698 #endif 699 700 auio.uio_iov = mp->msg_iov; 701 auio.uio_iovcnt = mp->msg_iovlen; 702 auio.uio_segflg = UIO_USERSPACE; 703 auio.uio_rw = UIO_WRITE; 704 auio.uio_td = td; 705 auio.uio_offset = 0; /* XXX */ 706 auio.uio_resid = 0; 707 iov = mp->msg_iov; 708 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 709 if ((auio.uio_resid += iov->iov_len) < 0) { 710 error = EINVAL; 711 goto bad; 712 } 713 } 714 #ifdef KTRACE 715 if (KTRPOINT(td, KTR_GENIO)) { 716 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 717 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 718 bcopy(auio.uio_iov, ktriov, iovlen); 719 ktruio = auio; 720 } 721 #endif 722 len = auio.uio_resid; 723 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 724 0, control, flags, td); 725 if (error) { 726 if (auio.uio_resid != len && (error == ERESTART || 727 error == EINTR || error == EWOULDBLOCK)) 728 error = 0; 729 /* Generation of SIGPIPE can be controlled per socket */ 730 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 731 PROC_LOCK(td->td_proc); 732 psignal(td->td_proc, SIGPIPE); 733 PROC_UNLOCK(td->td_proc); 734 } 735 } 736 if (error == 0) 737 td->td_retval[0] = len - auio.uio_resid; 738 #ifdef KTRACE 739 if (ktriov != NULL) { 740 if (error == 0) { 741 ktruio.uio_iov = ktriov; 742 ktruio.uio_resid = td->td_retval[0]; 743 ktrgenio(s, UIO_WRITE, &ktruio, error); 744 } 745 FREE(ktriov, M_TEMP); 746 } 747 #endif 748 bad: 749 fputsock(so); 750 bad2: 751 mtx_unlock(&Giant); 752 return (error); 753 } 754 755 /* 756 * MPSAFE 757 */ 758 int 759 sendto(td, uap) 760 struct thread *td; 761 register struct sendto_args /* { 762 int s; 763 caddr_t buf; 764 size_t len; 765 int flags; 766 caddr_t to; 767 int tolen; 768 } */ *uap; 769 { 770 struct msghdr msg; 771 struct iovec aiov; 772 int error; 773 774 msg.msg_name = uap->to; 775 msg.msg_namelen = uap->tolen; 776 msg.msg_iov = &aiov; 777 msg.msg_iovlen = 1; 778 msg.msg_control = 0; 779 #ifdef COMPAT_OLDSOCK 780 msg.msg_flags = 0; 781 #endif 782 aiov.iov_base = uap->buf; 783 aiov.iov_len = uap->len; 784 error = sendit(td, uap->s, &msg, uap->flags); 785 return (error); 786 } 787 788 #ifdef COMPAT_OLDSOCK 789 /* 790 * MPSAFE 791 */ 792 int 793 osend(td, uap) 794 struct thread *td; 795 register struct osend_args /* { 796 int s; 797 caddr_t buf; 798 int len; 799 int flags; 800 } */ *uap; 801 { 802 struct msghdr msg; 803 struct iovec aiov; 804 int error; 805 806 msg.msg_name = 0; 807 msg.msg_namelen = 0; 808 msg.msg_iov = &aiov; 809 msg.msg_iovlen = 1; 810 aiov.iov_base = uap->buf; 811 aiov.iov_len = uap->len; 812 msg.msg_control = 0; 813 msg.msg_flags = 0; 814 error = sendit(td, uap->s, &msg, uap->flags); 815 return (error); 816 } 817 818 /* 819 * MPSAFE 820 */ 821 int 822 osendmsg(td, uap) 823 struct thread *td; 824 register struct osendmsg_args /* { 825 int s; 826 caddr_t msg; 827 int flags; 828 } */ *uap; 829 { 830 struct msghdr msg; 831 struct iovec aiov[UIO_SMALLIOV], *iov; 832 int error; 833 834 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 835 if (error) 836 goto done2; 837 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 838 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 839 error = EMSGSIZE; 840 goto done2; 841 } 842 MALLOC(iov, struct iovec *, 843 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 844 M_WAITOK); 845 } else { 846 iov = aiov; 847 } 848 error = copyin(msg.msg_iov, iov, 849 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 850 if (error) 851 goto done; 852 msg.msg_flags = MSG_COMPAT; 853 msg.msg_iov = iov; 854 error = sendit(td, uap->s, &msg, uap->flags); 855 done: 856 if (iov != aiov) 857 FREE(iov, M_IOV); 858 done2: 859 return (error); 860 } 861 #endif 862 863 /* 864 * MPSAFE 865 */ 866 int 867 sendmsg(td, uap) 868 struct thread *td; 869 register struct sendmsg_args /* { 870 int s; 871 caddr_t msg; 872 int flags; 873 } */ *uap; 874 { 875 struct msghdr msg; 876 struct iovec aiov[UIO_SMALLIOV], *iov; 877 int error; 878 879 error = copyin(uap->msg, &msg, sizeof (msg)); 880 if (error) 881 goto done2; 882 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 883 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 884 error = EMSGSIZE; 885 goto done2; 886 } 887 MALLOC(iov, struct iovec *, 888 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 889 M_WAITOK); 890 } else { 891 iov = aiov; 892 } 893 if (msg.msg_iovlen && 894 (error = copyin(msg.msg_iov, iov, 895 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 896 goto done; 897 msg.msg_iov = iov; 898 #ifdef COMPAT_OLDSOCK 899 msg.msg_flags = 0; 900 #endif 901 error = sendit(td, uap->s, &msg, uap->flags); 902 done: 903 if (iov != aiov) 904 FREE(iov, M_IOV); 905 done2: 906 return (error); 907 } 908 909 static int 910 recvit(td, s, mp, namelenp) 911 register struct thread *td; 912 int s; 913 register struct msghdr *mp; 914 void *namelenp; 915 { 916 struct uio auio; 917 register struct iovec *iov; 918 register int i; 919 int len, error; 920 struct mbuf *m, *control = 0; 921 caddr_t ctlbuf; 922 struct socket *so; 923 struct sockaddr *fromsa = 0; 924 #ifdef KTRACE 925 struct iovec *ktriov = NULL; 926 struct uio ktruio; 927 int iovlen; 928 #endif 929 930 mtx_lock(&Giant); 931 if ((error = fgetsock(td, s, &so, NULL)) != 0) { 932 mtx_unlock(&Giant); 933 return (error); 934 } 935 936 #ifdef MAC 937 error = mac_check_socket_receive(td->td_ucred, so); 938 if (error) { 939 fputsock(so); 940 mtx_unlock(&Giant); 941 return (error); 942 } 943 #endif 944 945 auio.uio_iov = mp->msg_iov; 946 auio.uio_iovcnt = mp->msg_iovlen; 947 auio.uio_segflg = UIO_USERSPACE; 948 auio.uio_rw = UIO_READ; 949 auio.uio_td = td; 950 auio.uio_offset = 0; /* XXX */ 951 auio.uio_resid = 0; 952 iov = mp->msg_iov; 953 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 954 if ((auio.uio_resid += iov->iov_len) < 0) { 955 fputsock(so); 956 return (EINVAL); 957 } 958 } 959 #ifdef KTRACE 960 if (KTRPOINT(td, KTR_GENIO)) { 961 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 962 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 963 bcopy(auio.uio_iov, ktriov, iovlen); 964 ktruio = auio; 965 } 966 #endif 967 len = auio.uio_resid; 968 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 969 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 970 &mp->msg_flags); 971 if (error) { 972 if (auio.uio_resid != len && (error == ERESTART || 973 error == EINTR || error == EWOULDBLOCK)) 974 error = 0; 975 } 976 #ifdef KTRACE 977 if (ktriov != NULL) { 978 if (error == 0) { 979 ktruio.uio_iov = ktriov; 980 ktruio.uio_resid = len - auio.uio_resid; 981 ktrgenio(s, UIO_READ, &ktruio, error); 982 } 983 FREE(ktriov, M_TEMP); 984 } 985 #endif 986 if (error) 987 goto out; 988 td->td_retval[0] = len - auio.uio_resid; 989 if (mp->msg_name) { 990 len = mp->msg_namelen; 991 if (len <= 0 || fromsa == 0) 992 len = 0; 993 else { 994 /* save sa_len before it is destroyed by MSG_COMPAT */ 995 len = MIN(len, fromsa->sa_len); 996 #ifdef COMPAT_OLDSOCK 997 if (mp->msg_flags & MSG_COMPAT) 998 ((struct osockaddr *)fromsa)->sa_family = 999 fromsa->sa_family; 1000 #endif 1001 error = copyout(fromsa, mp->msg_name, (unsigned)len); 1002 if (error) 1003 goto out; 1004 } 1005 mp->msg_namelen = len; 1006 if (namelenp && 1007 (error = copyout(&len, namelenp, sizeof (int)))) { 1008 #ifdef COMPAT_OLDSOCK 1009 if (mp->msg_flags & MSG_COMPAT) 1010 error = 0; /* old recvfrom didn't check */ 1011 else 1012 #endif 1013 goto out; 1014 } 1015 } 1016 if (mp->msg_control) { 1017 #ifdef COMPAT_OLDSOCK 1018 /* 1019 * We assume that old recvmsg calls won't receive access 1020 * rights and other control info, esp. as control info 1021 * is always optional and those options didn't exist in 4.3. 1022 * If we receive rights, trim the cmsghdr; anything else 1023 * is tossed. 1024 */ 1025 if (control && mp->msg_flags & MSG_COMPAT) { 1026 if (mtod(control, struct cmsghdr *)->cmsg_level != 1027 SOL_SOCKET || 1028 mtod(control, struct cmsghdr *)->cmsg_type != 1029 SCM_RIGHTS) { 1030 mp->msg_controllen = 0; 1031 goto out; 1032 } 1033 control->m_len -= sizeof (struct cmsghdr); 1034 control->m_data += sizeof (struct cmsghdr); 1035 } 1036 #endif 1037 len = mp->msg_controllen; 1038 m = control; 1039 mp->msg_controllen = 0; 1040 ctlbuf = mp->msg_control; 1041 1042 while (m && len > 0) { 1043 unsigned int tocopy; 1044 1045 if (len >= m->m_len) 1046 tocopy = m->m_len; 1047 else { 1048 mp->msg_flags |= MSG_CTRUNC; 1049 tocopy = len; 1050 } 1051 1052 if ((error = copyout(mtod(m, caddr_t), 1053 ctlbuf, tocopy)) != 0) 1054 goto out; 1055 1056 ctlbuf += tocopy; 1057 len -= tocopy; 1058 m = m->m_next; 1059 } 1060 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1061 } 1062 out: 1063 fputsock(so); 1064 mtx_unlock(&Giant); 1065 if (fromsa) 1066 FREE(fromsa, M_SONAME); 1067 if (control) 1068 m_freem(control); 1069 return (error); 1070 } 1071 1072 /* 1073 * MPSAFE 1074 */ 1075 int 1076 recvfrom(td, uap) 1077 struct thread *td; 1078 register struct recvfrom_args /* { 1079 int s; 1080 caddr_t buf; 1081 size_t len; 1082 int flags; 1083 caddr_t from; 1084 int *fromlenaddr; 1085 } */ *uap; 1086 { 1087 struct msghdr msg; 1088 struct iovec aiov; 1089 int error; 1090 1091 if (uap->fromlenaddr) { 1092 error = copyin(uap->fromlenaddr, 1093 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1094 if (error) 1095 goto done2; 1096 } else { 1097 msg.msg_namelen = 0; 1098 } 1099 msg.msg_name = uap->from; 1100 msg.msg_iov = &aiov; 1101 msg.msg_iovlen = 1; 1102 aiov.iov_base = uap->buf; 1103 aiov.iov_len = uap->len; 1104 msg.msg_control = 0; 1105 msg.msg_flags = uap->flags; 1106 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1107 done2: 1108 return(error); 1109 } 1110 1111 #ifdef COMPAT_OLDSOCK 1112 /* 1113 * MPSAFE 1114 */ 1115 int 1116 orecvfrom(td, uap) 1117 struct thread *td; 1118 struct recvfrom_args *uap; 1119 { 1120 1121 uap->flags |= MSG_COMPAT; 1122 return (recvfrom(td, uap)); 1123 } 1124 #endif 1125 1126 1127 #ifdef COMPAT_OLDSOCK 1128 /* 1129 * MPSAFE 1130 */ 1131 int 1132 orecv(td, uap) 1133 struct thread *td; 1134 register struct orecv_args /* { 1135 int s; 1136 caddr_t buf; 1137 int len; 1138 int flags; 1139 } */ *uap; 1140 { 1141 struct msghdr msg; 1142 struct iovec aiov; 1143 int error; 1144 1145 msg.msg_name = 0; 1146 msg.msg_namelen = 0; 1147 msg.msg_iov = &aiov; 1148 msg.msg_iovlen = 1; 1149 aiov.iov_base = uap->buf; 1150 aiov.iov_len = uap->len; 1151 msg.msg_control = 0; 1152 msg.msg_flags = uap->flags; 1153 error = recvit(td, uap->s, &msg, NULL); 1154 return (error); 1155 } 1156 1157 /* 1158 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1159 * overlays the new one, missing only the flags, and with the (old) access 1160 * rights where the control fields are now. 1161 * 1162 * MPSAFE 1163 */ 1164 int 1165 orecvmsg(td, uap) 1166 struct thread *td; 1167 register struct orecvmsg_args /* { 1168 int s; 1169 struct omsghdr *msg; 1170 int flags; 1171 } */ *uap; 1172 { 1173 struct msghdr msg; 1174 struct iovec aiov[UIO_SMALLIOV], *iov; 1175 int error; 1176 1177 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1178 if (error) 1179 return (error); 1180 1181 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1182 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1183 error = EMSGSIZE; 1184 goto done2; 1185 } 1186 MALLOC(iov, struct iovec *, 1187 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1188 M_WAITOK); 1189 } else { 1190 iov = aiov; 1191 } 1192 msg.msg_flags = uap->flags | MSG_COMPAT; 1193 error = copyin(msg.msg_iov, iov, 1194 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1195 if (error) 1196 goto done; 1197 msg.msg_iov = iov; 1198 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1199 1200 if (msg.msg_controllen && error == 0) 1201 error = copyout(&msg.msg_controllen, 1202 &uap->msg->msg_accrightslen, sizeof (int)); 1203 done: 1204 if (iov != aiov) 1205 FREE(iov, M_IOV); 1206 done2: 1207 return (error); 1208 } 1209 #endif 1210 1211 /* 1212 * MPSAFE 1213 */ 1214 int 1215 recvmsg(td, uap) 1216 struct thread *td; 1217 register struct recvmsg_args /* { 1218 int s; 1219 struct msghdr *msg; 1220 int flags; 1221 } */ *uap; 1222 { 1223 struct msghdr msg; 1224 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1225 register int error; 1226 1227 error = copyin(uap->msg, &msg, sizeof (msg)); 1228 if (error) 1229 goto done2; 1230 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1231 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1232 error = EMSGSIZE; 1233 goto done2; 1234 } 1235 MALLOC(iov, struct iovec *, 1236 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1237 M_WAITOK); 1238 } else { 1239 iov = aiov; 1240 } 1241 #ifdef COMPAT_OLDSOCK 1242 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1243 #else 1244 msg.msg_flags = uap->flags; 1245 #endif 1246 uiov = msg.msg_iov; 1247 msg.msg_iov = iov; 1248 error = copyin(uiov, iov, 1249 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1250 if (error) 1251 goto done; 1252 error = recvit(td, uap->s, &msg, NULL); 1253 if (!error) { 1254 msg.msg_iov = uiov; 1255 error = copyout(&msg, uap->msg, sizeof(msg)); 1256 } 1257 done: 1258 if (iov != aiov) 1259 FREE(iov, M_IOV); 1260 done2: 1261 return (error); 1262 } 1263 1264 /* 1265 * MPSAFE 1266 */ 1267 /* ARGSUSED */ 1268 int 1269 shutdown(td, uap) 1270 struct thread *td; 1271 register struct shutdown_args /* { 1272 int s; 1273 int how; 1274 } */ *uap; 1275 { 1276 struct socket *so; 1277 int error; 1278 1279 mtx_lock(&Giant); 1280 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1281 error = soshutdown(so, uap->how); 1282 fputsock(so); 1283 } 1284 mtx_unlock(&Giant); 1285 return(error); 1286 } 1287 1288 /* 1289 * MPSAFE 1290 */ 1291 /* ARGSUSED */ 1292 int 1293 setsockopt(td, uap) 1294 struct thread *td; 1295 register struct setsockopt_args /* { 1296 int s; 1297 int level; 1298 int name; 1299 caddr_t val; 1300 int valsize; 1301 } */ *uap; 1302 { 1303 struct socket *so; 1304 struct sockopt sopt; 1305 int error; 1306 1307 if (uap->val == 0 && uap->valsize != 0) 1308 return (EFAULT); 1309 if (uap->valsize < 0) 1310 return (EINVAL); 1311 1312 mtx_lock(&Giant); 1313 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1314 sopt.sopt_dir = SOPT_SET; 1315 sopt.sopt_level = uap->level; 1316 sopt.sopt_name = uap->name; 1317 sopt.sopt_val = uap->val; 1318 sopt.sopt_valsize = uap->valsize; 1319 sopt.sopt_td = td; 1320 error = sosetopt(so, &sopt); 1321 fputsock(so); 1322 } 1323 mtx_unlock(&Giant); 1324 return(error); 1325 } 1326 1327 /* 1328 * MPSAFE 1329 */ 1330 /* ARGSUSED */ 1331 int 1332 getsockopt(td, uap) 1333 struct thread *td; 1334 register struct getsockopt_args /* { 1335 int s; 1336 int level; 1337 int name; 1338 caddr_t val; 1339 int *avalsize; 1340 } */ *uap; 1341 { 1342 int valsize, error; 1343 struct socket *so; 1344 struct sockopt sopt; 1345 1346 mtx_lock(&Giant); 1347 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1348 goto done2; 1349 if (uap->val) { 1350 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1351 if (error) 1352 goto done1; 1353 if (valsize < 0) { 1354 error = EINVAL; 1355 goto done1; 1356 } 1357 } else { 1358 valsize = 0; 1359 } 1360 1361 sopt.sopt_dir = SOPT_GET; 1362 sopt.sopt_level = uap->level; 1363 sopt.sopt_name = uap->name; 1364 sopt.sopt_val = uap->val; 1365 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1366 sopt.sopt_td = td; 1367 1368 error = sogetopt(so, &sopt); 1369 if (error == 0) { 1370 valsize = sopt.sopt_valsize; 1371 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1372 } 1373 done1: 1374 fputsock(so); 1375 done2: 1376 mtx_unlock(&Giant); 1377 return (error); 1378 } 1379 1380 /* 1381 * getsockname1() - Get socket name. 1382 * 1383 * MPSAFE 1384 */ 1385 /* ARGSUSED */ 1386 static int 1387 getsockname1(td, uap, compat) 1388 struct thread *td; 1389 register struct getsockname_args /* { 1390 int fdes; 1391 caddr_t asa; 1392 int *alen; 1393 } */ *uap; 1394 int compat; 1395 { 1396 struct socket *so; 1397 struct sockaddr *sa; 1398 int len, error; 1399 1400 mtx_lock(&Giant); 1401 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1402 goto done2; 1403 error = copyin(uap->alen, &len, sizeof (len)); 1404 if (error) 1405 goto done1; 1406 if (len < 0) { 1407 error = EINVAL; 1408 goto done1; 1409 } 1410 sa = 0; 1411 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1412 if (error) 1413 goto bad; 1414 if (sa == 0) { 1415 len = 0; 1416 goto gotnothing; 1417 } 1418 1419 len = MIN(len, sa->sa_len); 1420 #ifdef COMPAT_OLDSOCK 1421 if (compat) 1422 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1423 #endif 1424 error = copyout(sa, uap->asa, (u_int)len); 1425 if (error == 0) 1426 gotnothing: 1427 error = copyout(&len, uap->alen, sizeof (len)); 1428 bad: 1429 if (sa) 1430 FREE(sa, M_SONAME); 1431 done1: 1432 fputsock(so); 1433 done2: 1434 mtx_unlock(&Giant); 1435 return (error); 1436 } 1437 1438 /* 1439 * MPSAFE 1440 */ 1441 int 1442 getsockname(td, uap) 1443 struct thread *td; 1444 struct getsockname_args *uap; 1445 { 1446 1447 return (getsockname1(td, uap, 0)); 1448 } 1449 1450 #ifdef COMPAT_OLDSOCK 1451 /* 1452 * MPSAFE 1453 */ 1454 int 1455 ogetsockname(td, uap) 1456 struct thread *td; 1457 struct getsockname_args *uap; 1458 { 1459 1460 return (getsockname1(td, uap, 1)); 1461 } 1462 #endif /* COMPAT_OLDSOCK */ 1463 1464 /* 1465 * getpeername1() - Get name of peer for connected socket. 1466 * 1467 * MPSAFE 1468 */ 1469 /* ARGSUSED */ 1470 static int 1471 getpeername1(td, uap, compat) 1472 struct thread *td; 1473 register struct getpeername_args /* { 1474 int fdes; 1475 caddr_t asa; 1476 int *alen; 1477 } */ *uap; 1478 int compat; 1479 { 1480 struct socket *so; 1481 struct sockaddr *sa; 1482 int len, error; 1483 1484 mtx_lock(&Giant); 1485 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1486 goto done2; 1487 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1488 error = ENOTCONN; 1489 goto done1; 1490 } 1491 error = copyin(uap->alen, &len, sizeof (len)); 1492 if (error) 1493 goto done1; 1494 if (len < 0) { 1495 error = EINVAL; 1496 goto done1; 1497 } 1498 sa = 0; 1499 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1500 if (error) 1501 goto bad; 1502 if (sa == 0) { 1503 len = 0; 1504 goto gotnothing; 1505 } 1506 len = MIN(len, sa->sa_len); 1507 #ifdef COMPAT_OLDSOCK 1508 if (compat) 1509 ((struct osockaddr *)sa)->sa_family = 1510 sa->sa_family; 1511 #endif 1512 error = copyout(sa, uap->asa, (u_int)len); 1513 if (error) 1514 goto bad; 1515 gotnothing: 1516 error = copyout(&len, uap->alen, sizeof (len)); 1517 bad: 1518 if (sa) 1519 FREE(sa, M_SONAME); 1520 done1: 1521 fputsock(so); 1522 done2: 1523 mtx_unlock(&Giant); 1524 return (error); 1525 } 1526 1527 /* 1528 * MPSAFE 1529 */ 1530 int 1531 getpeername(td, uap) 1532 struct thread *td; 1533 struct getpeername_args *uap; 1534 { 1535 1536 return (getpeername1(td, uap, 0)); 1537 } 1538 1539 #ifdef COMPAT_OLDSOCK 1540 /* 1541 * MPSAFE 1542 */ 1543 int 1544 ogetpeername(td, uap) 1545 struct thread *td; 1546 struct ogetpeername_args *uap; 1547 { 1548 1549 /* XXX uap should have type `getpeername_args *' to begin with. */ 1550 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1551 } 1552 #endif /* COMPAT_OLDSOCK */ 1553 1554 int 1555 sockargs(mp, buf, buflen, type) 1556 struct mbuf **mp; 1557 caddr_t buf; 1558 int buflen, type; 1559 { 1560 register struct sockaddr *sa; 1561 register struct mbuf *m; 1562 int error; 1563 1564 if ((u_int)buflen > MLEN) { 1565 #ifdef COMPAT_OLDSOCK 1566 if (type == MT_SONAME && (u_int)buflen <= 112) 1567 buflen = MLEN; /* unix domain compat. hack */ 1568 else 1569 #endif 1570 return (EINVAL); 1571 } 1572 m = m_get(M_TRYWAIT, type); 1573 if (m == NULL) 1574 return (ENOBUFS); 1575 m->m_len = buflen; 1576 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1577 if (error) 1578 (void) m_free(m); 1579 else { 1580 *mp = m; 1581 if (type == MT_SONAME) { 1582 sa = mtod(m, struct sockaddr *); 1583 1584 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1585 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1586 sa->sa_family = sa->sa_len; 1587 #endif 1588 sa->sa_len = buflen; 1589 } 1590 } 1591 return (error); 1592 } 1593 1594 int 1595 getsockaddr(namp, uaddr, len) 1596 struct sockaddr **namp; 1597 caddr_t uaddr; 1598 size_t len; 1599 { 1600 struct sockaddr *sa; 1601 int error; 1602 1603 if (len > SOCK_MAXADDRLEN) 1604 return ENAMETOOLONG; 1605 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1606 error = copyin(uaddr, sa, len); 1607 if (error) { 1608 FREE(sa, M_SONAME); 1609 } else { 1610 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1611 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1612 sa->sa_family = sa->sa_len; 1613 #endif 1614 sa->sa_len = len; 1615 *namp = sa; 1616 } 1617 return error; 1618 } 1619 1620 /* 1621 * sendfile(2) 1622 * 1623 * MPSAFE 1624 * 1625 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1626 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1627 * 1628 * Send a file specified by 'fd' and starting at 'offset' to a socket 1629 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1630 * nbytes == 0. Optionally add a header and/or trailer to the socket 1631 * output. If specified, write the total number of bytes sent into *sbytes. 1632 * 1633 */ 1634 int 1635 sendfile(struct thread *td, struct sendfile_args *uap) 1636 { 1637 1638 return (do_sendfile(td, uap, 0)); 1639 } 1640 1641 #ifdef COMPAT_FREEBSD4 1642 int 1643 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1644 { 1645 struct sendfile_args args; 1646 1647 args.fd = uap->fd; 1648 args.s = uap->s; 1649 args.offset = uap->offset; 1650 args.nbytes = uap->nbytes; 1651 args.hdtr = uap->hdtr; 1652 args.sbytes = uap->sbytes; 1653 args.flags = uap->flags; 1654 1655 return (do_sendfile(td, &args, 1)); 1656 } 1657 #endif /* COMPAT_FREEBSD4 */ 1658 1659 static int 1660 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1661 { 1662 struct vnode *vp; 1663 struct vm_object *obj; 1664 struct socket *so = NULL; 1665 struct mbuf *m; 1666 struct sf_buf *sf; 1667 struct vm_page *pg; 1668 struct writev_args nuap; 1669 struct sf_hdtr hdtr; 1670 off_t off, xfsize, hdtr_size, sbytes = 0; 1671 int error, s; 1672 1673 mtx_lock(&Giant); 1674 1675 hdtr_size = 0; 1676 1677 /* 1678 * The descriptor must be a regular file and have a backing VM object. 1679 */ 1680 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1681 goto done; 1682 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1683 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1684 error = EINVAL; 1685 VOP_UNLOCK(vp, 0, td); 1686 goto done; 1687 } 1688 VOP_UNLOCK(vp, 0, td); 1689 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1690 goto done; 1691 if (so->so_type != SOCK_STREAM) { 1692 error = EINVAL; 1693 goto done; 1694 } 1695 if ((so->so_state & SS_ISCONNECTED) == 0) { 1696 error = ENOTCONN; 1697 goto done; 1698 } 1699 if (uap->offset < 0) { 1700 error = EINVAL; 1701 goto done; 1702 } 1703 1704 #ifdef MAC 1705 error = mac_check_socket_send(td->td_ucred, so); 1706 if (error) 1707 goto done; 1708 #endif 1709 1710 /* 1711 * If specified, get the pointer to the sf_hdtr struct for 1712 * any headers/trailers. 1713 */ 1714 if (uap->hdtr != NULL) { 1715 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1716 if (error) 1717 goto done; 1718 /* 1719 * Send any headers. Wimp out and use writev(2). 1720 */ 1721 if (hdtr.headers != NULL) { 1722 nuap.fd = uap->s; 1723 nuap.iovp = hdtr.headers; 1724 nuap.iovcnt = hdtr.hdr_cnt; 1725 error = writev(td, &nuap); 1726 if (error) 1727 goto done; 1728 if (compat) 1729 sbytes += td->td_retval[0]; 1730 else 1731 hdtr_size += td->td_retval[0]; 1732 } 1733 } 1734 1735 /* 1736 * Protect against multiple writers to the socket. 1737 */ 1738 (void) sblock(&so->so_snd, M_WAITOK); 1739 1740 /* 1741 * Loop through the pages in the file, starting with the requested 1742 * offset. Get a file page (do I/O if necessary), map the file page 1743 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1744 * it on the socket. 1745 */ 1746 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1747 vm_pindex_t pindex; 1748 vm_offset_t pgoff; 1749 1750 pindex = OFF_TO_IDX(off); 1751 VM_OBJECT_LOCK(obj); 1752 retry_lookup: 1753 /* 1754 * Calculate the amount to transfer. Not to exceed a page, 1755 * the EOF, or the passed in nbytes. 1756 */ 1757 xfsize = obj->un_pager.vnp.vnp_size - off; 1758 VM_OBJECT_UNLOCK(obj); 1759 if (xfsize > PAGE_SIZE) 1760 xfsize = PAGE_SIZE; 1761 pgoff = (vm_offset_t)(off & PAGE_MASK); 1762 if (PAGE_SIZE - pgoff < xfsize) 1763 xfsize = PAGE_SIZE - pgoff; 1764 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1765 xfsize = uap->nbytes - sbytes; 1766 if (xfsize <= 0) 1767 break; 1768 /* 1769 * Optimize the non-blocking case by looking at the socket space 1770 * before going to the extra work of constituting the sf_buf. 1771 */ 1772 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1773 if (so->so_state & SS_CANTSENDMORE) 1774 error = EPIPE; 1775 else 1776 error = EAGAIN; 1777 sbunlock(&so->so_snd); 1778 goto done; 1779 } 1780 VM_OBJECT_LOCK(obj); 1781 /* 1782 * Attempt to look up the page. 1783 * 1784 * Allocate if not found 1785 * 1786 * Wait and loop if busy. 1787 */ 1788 pg = vm_page_lookup(obj, pindex); 1789 1790 if (pg == NULL) { 1791 pg = vm_page_alloc(obj, pindex, 1792 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1793 if (pg == NULL) { 1794 VM_OBJECT_UNLOCK(obj); 1795 VM_WAIT; 1796 VM_OBJECT_LOCK(obj); 1797 goto retry_lookup; 1798 } 1799 vm_page_lock_queues(); 1800 vm_page_wakeup(pg); 1801 } else { 1802 vm_page_lock_queues(); 1803 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1804 goto retry_lookup; 1805 /* 1806 * Wire the page so it does not get ripped out from 1807 * under us. 1808 */ 1809 vm_page_wire(pg); 1810 } 1811 1812 /* 1813 * If page is not valid for what we need, initiate I/O 1814 */ 1815 1816 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1817 int bsize, resid; 1818 1819 /* 1820 * Ensure that our page is still around when the I/O 1821 * completes. 1822 */ 1823 vm_page_io_start(pg); 1824 vm_page_unlock_queues(); 1825 VM_OBJECT_UNLOCK(obj); 1826 1827 /* 1828 * Get the page from backing store. 1829 */ 1830 bsize = vp->v_mount->mnt_stat.f_iosize; 1831 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1832 /* 1833 * XXXMAC: Because we don't have fp->f_cred here, 1834 * we pass in NOCRED. This is probably wrong, but 1835 * is consistent with our original implementation. 1836 */ 1837 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1838 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1839 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1840 td->td_ucred, NOCRED, &resid, td); 1841 VOP_UNLOCK(vp, 0, td); 1842 if (error) 1843 VM_OBJECT_LOCK(obj); 1844 vm_page_lock_queues(); 1845 vm_page_flag_clear(pg, PG_ZERO); 1846 vm_page_io_finish(pg); 1847 if (error) { 1848 vm_page_unwire(pg, 0); 1849 /* 1850 * See if anyone else might know about this page. 1851 * If not and it is not valid, then free it. 1852 */ 1853 if (pg->wire_count == 0 && pg->valid == 0 && 1854 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1855 pg->hold_count == 0) { 1856 vm_page_busy(pg); 1857 vm_page_free(pg); 1858 } 1859 vm_page_unlock_queues(); 1860 VM_OBJECT_UNLOCK(obj); 1861 sbunlock(&so->so_snd); 1862 goto done; 1863 } 1864 } else 1865 VM_OBJECT_UNLOCK(obj); 1866 vm_page_unlock_queues(); 1867 1868 /* 1869 * Get a sendfile buf. We usually wait as long as necessary, 1870 * but this wait can be interrupted. 1871 */ 1872 if ((sf = sf_buf_alloc(pg)) == NULL) { 1873 vm_page_lock_queues(); 1874 vm_page_unwire(pg, 0); 1875 if (pg->wire_count == 0 && pg->object == NULL) 1876 vm_page_free(pg); 1877 vm_page_unlock_queues(); 1878 sbunlock(&so->so_snd); 1879 error = EINTR; 1880 goto done; 1881 } 1882 1883 /* 1884 * Get an mbuf header and set it up as having external storage. 1885 */ 1886 MGETHDR(m, M_TRYWAIT, MT_DATA); 1887 if (m == NULL) { 1888 error = ENOBUFS; 1889 sf_buf_free((void *)sf_buf_kva(sf), sf); 1890 sbunlock(&so->so_snd); 1891 goto done; 1892 } 1893 /* 1894 * Setup external storage for mbuf. 1895 */ 1896 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_free, sf, M_RDONLY, 1897 EXT_SFBUF); 1898 m->m_data = (char *)sf_buf_kva(sf) + pgoff; 1899 m->m_pkthdr.len = m->m_len = xfsize; 1900 /* 1901 * Add the buffer to the socket buffer chain. 1902 */ 1903 s = splnet(); 1904 retry_space: 1905 /* 1906 * Make sure that the socket is still able to take more data. 1907 * CANTSENDMORE being true usually means that the connection 1908 * was closed. so_error is true when an error was sensed after 1909 * a previous send. 1910 * The state is checked after the page mapping and buffer 1911 * allocation above since those operations may block and make 1912 * any socket checks stale. From this point forward, nothing 1913 * blocks before the pru_send (or more accurately, any blocking 1914 * results in a loop back to here to re-check). 1915 */ 1916 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1917 if (so->so_state & SS_CANTSENDMORE) { 1918 error = EPIPE; 1919 } else { 1920 error = so->so_error; 1921 so->so_error = 0; 1922 } 1923 m_freem(m); 1924 sbunlock(&so->so_snd); 1925 splx(s); 1926 goto done; 1927 } 1928 /* 1929 * Wait for socket space to become available. We do this just 1930 * after checking the connection state above in order to avoid 1931 * a race condition with sbwait(). 1932 */ 1933 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1934 if (so->so_state & SS_NBIO) { 1935 m_freem(m); 1936 sbunlock(&so->so_snd); 1937 splx(s); 1938 error = EAGAIN; 1939 goto done; 1940 } 1941 error = sbwait(&so->so_snd); 1942 /* 1943 * An error from sbwait usually indicates that we've 1944 * been interrupted by a signal. If we've sent anything 1945 * then return bytes sent, otherwise return the error. 1946 */ 1947 if (error) { 1948 m_freem(m); 1949 sbunlock(&so->so_snd); 1950 splx(s); 1951 goto done; 1952 } 1953 goto retry_space; 1954 } 1955 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 1956 splx(s); 1957 if (error) { 1958 sbunlock(&so->so_snd); 1959 goto done; 1960 } 1961 } 1962 sbunlock(&so->so_snd); 1963 1964 /* 1965 * Send trailers. Wimp out and use writev(2). 1966 */ 1967 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1968 nuap.fd = uap->s; 1969 nuap.iovp = hdtr.trailers; 1970 nuap.iovcnt = hdtr.trl_cnt; 1971 error = writev(td, &nuap); 1972 if (error) 1973 goto done; 1974 if (compat) 1975 sbytes += td->td_retval[0]; 1976 else 1977 hdtr_size += td->td_retval[0]; 1978 } 1979 1980 done: 1981 /* 1982 * If there was no error we have to clear td->td_retval[0] 1983 * because it may have been set by writev. 1984 */ 1985 if (error == 0) { 1986 td->td_retval[0] = 0; 1987 } 1988 if (uap->sbytes != NULL) { 1989 if (!compat) 1990 sbytes += hdtr_size; 1991 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 1992 } 1993 if (vp) 1994 vrele(vp); 1995 if (so) 1996 fputsock(so); 1997 mtx_unlock(&Giant); 1998 return (error); 1999 } 2000