1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_compat.h" 43 #include "opt_ktrace.h" 44 #include "opt_mac.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/lock.h> 50 #include <sys/mac.h> 51 #include <sys/mutex.h> 52 #include <sys/sysproto.h> 53 #include <sys/malloc.h> 54 #include <sys/filedesc.h> 55 #include <sys/event.h> 56 #include <sys/proc.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/filio.h> 60 #include <sys/mount.h> 61 #include <sys/mbuf.h> 62 #include <sys/protosw.h> 63 #include <sys/socket.h> 64 #include <sys/socketvar.h> 65 #include <sys/signalvar.h> 66 #include <sys/syscallsubr.h> 67 #include <sys/uio.h> 68 #include <sys/vnode.h> 69 #ifdef KTRACE 70 #include <sys/ktrace.h> 71 #endif 72 73 #include <vm/vm.h> 74 #include <vm/vm_object.h> 75 #include <vm/vm_page.h> 76 #include <vm/vm_pageout.h> 77 #include <vm/vm_kern.h> 78 #include <vm/vm_extern.h> 79 80 static void sf_buf_init(void *arg); 81 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) 82 83 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 84 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 85 86 static int accept1(struct thread *td, struct accept_args *uap, int compat); 87 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 88 static int getsockname1(struct thread *td, struct getsockname_args *uap, 89 int compat); 90 static int getpeername1(struct thread *td, struct getpeername_args *uap, 91 int compat); 92 93 /* 94 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the 95 * sf_freelist head with the sf_lock mutex. 96 */ 97 static struct { 98 SLIST_HEAD(, sf_buf) sf_head; 99 struct mtx sf_lock; 100 } sf_freelist; 101 102 static u_int sf_buf_alloc_want; 103 104 /* 105 * System call interface to the socket abstraction. 106 */ 107 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 108 #define COMPAT_OLDSOCK 109 #endif 110 111 /* 112 * MPSAFE 113 */ 114 int 115 socket(td, uap) 116 struct thread *td; 117 register struct socket_args /* { 118 int domain; 119 int type; 120 int protocol; 121 } */ *uap; 122 { 123 struct filedesc *fdp; 124 struct socket *so; 125 struct file *fp; 126 int fd, error; 127 128 mtx_lock(&Giant); 129 fdp = td->td_proc->p_fd; 130 error = falloc(td, &fp, &fd); 131 if (error) 132 goto done2; 133 fhold(fp); 134 error = socreate(uap->domain, &so, uap->type, uap->protocol, 135 td->td_ucred, td); 136 FILEDESC_LOCK(fdp); 137 if (error) { 138 if (fdp->fd_ofiles[fd] == fp) { 139 fdp->fd_ofiles[fd] = NULL; 140 FILEDESC_UNLOCK(fdp); 141 fdrop(fp, td); 142 } else 143 FILEDESC_UNLOCK(fdp); 144 } else { 145 fp->f_data = so; /* already has ref count */ 146 fp->f_flag = FREAD|FWRITE; 147 fp->f_ops = &socketops; 148 fp->f_type = DTYPE_SOCKET; 149 FILEDESC_UNLOCK(fdp); 150 td->td_retval[0] = fd; 151 } 152 fdrop(fp, td); 153 done2: 154 mtx_unlock(&Giant); 155 return (error); 156 } 157 158 /* 159 * MPSAFE 160 */ 161 /* ARGSUSED */ 162 int 163 bind(td, uap) 164 struct thread *td; 165 register struct bind_args /* { 166 int s; 167 caddr_t name; 168 int namelen; 169 } */ *uap; 170 { 171 struct sockaddr *sa; 172 int error; 173 174 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 175 return (error); 176 177 return (kern_bind(td, uap->s, sa)); 178 } 179 180 int 181 kern_bind(td, fd, sa) 182 struct thread *td; 183 int fd; 184 struct sockaddr *sa; 185 { 186 struct socket *so; 187 int error; 188 189 mtx_lock(&Giant); 190 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 191 goto done2; 192 #ifdef MAC 193 error = mac_check_socket_bind(td->td_ucred, so, sa); 194 if (error) 195 goto done1; 196 #endif 197 error = sobind(so, sa, td); 198 #ifdef MAC 199 done1: 200 #endif 201 fputsock(so); 202 done2: 203 mtx_unlock(&Giant); 204 FREE(sa, M_SONAME); 205 return (error); 206 } 207 208 /* 209 * MPSAFE 210 */ 211 /* ARGSUSED */ 212 int 213 listen(td, uap) 214 struct thread *td; 215 register struct listen_args /* { 216 int s; 217 int backlog; 218 } */ *uap; 219 { 220 struct socket *so; 221 int error; 222 223 mtx_lock(&Giant); 224 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 225 #ifdef MAC 226 error = mac_check_socket_listen(td->td_ucred, so); 227 if (error) 228 goto done; 229 #endif 230 error = solisten(so, uap->backlog, td); 231 #ifdef MAC 232 done: 233 #endif 234 fputsock(so); 235 } 236 mtx_unlock(&Giant); 237 return(error); 238 } 239 240 /* 241 * accept1() 242 * MPSAFE 243 */ 244 static int 245 accept1(td, uap, compat) 246 struct thread *td; 247 register struct accept_args /* { 248 int s; 249 caddr_t name; 250 int *anamelen; 251 } */ *uap; 252 int compat; 253 { 254 struct filedesc *fdp; 255 struct file *nfp = NULL; 256 struct sockaddr *sa; 257 int namelen, error, s; 258 struct socket *head, *so; 259 int fd; 260 u_int fflag; 261 pid_t pgid; 262 int tmp; 263 264 mtx_lock(&Giant); 265 fdp = td->td_proc->p_fd; 266 if (uap->name) { 267 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 268 if(error) 269 goto done2; 270 if (namelen < 0) { 271 error = EINVAL; 272 goto done2; 273 } 274 } 275 error = fgetsock(td, uap->s, &head, &fflag); 276 if (error) 277 goto done2; 278 s = splnet(); 279 if ((head->so_options & SO_ACCEPTCONN) == 0) { 280 splx(s); 281 error = EINVAL; 282 goto done; 283 } 284 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 285 if (head->so_state & SS_CANTRCVMORE) { 286 head->so_error = ECONNABORTED; 287 break; 288 } 289 if ((head->so_state & SS_NBIO) != 0) { 290 head->so_error = EWOULDBLOCK; 291 break; 292 } 293 error = tsleep(&head->so_timeo, PSOCK | PCATCH, 294 "accept", 0); 295 if (error) { 296 splx(s); 297 goto done; 298 } 299 } 300 if (head->so_error) { 301 error = head->so_error; 302 head->so_error = 0; 303 splx(s); 304 goto done; 305 } 306 307 /* 308 * At this point we know that there is at least one connection 309 * ready to be accepted. Remove it from the queue prior to 310 * allocating the file descriptor for it since falloc() may 311 * block allowing another process to accept the connection 312 * instead. 313 */ 314 so = TAILQ_FIRST(&head->so_comp); 315 TAILQ_REMOVE(&head->so_comp, so, so_list); 316 head->so_qlen--; 317 318 error = falloc(td, &nfp, &fd); 319 if (error) { 320 /* 321 * Probably ran out of file descriptors. Put the 322 * unaccepted connection back onto the queue and 323 * do another wakeup so some other process might 324 * have a chance at it. 325 */ 326 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 327 head->so_qlen++; 328 wakeup_one(&head->so_timeo); 329 splx(s); 330 goto done; 331 } 332 fhold(nfp); 333 td->td_retval[0] = fd; 334 335 /* connection has been removed from the listen queue */ 336 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 337 338 so->so_state &= ~SS_COMP; 339 so->so_head = NULL; 340 pgid = fgetown(&head->so_sigio); 341 if (pgid != 0) 342 fsetown(pgid, &so->so_sigio); 343 344 FILE_LOCK(nfp); 345 soref(so); /* file descriptor reference */ 346 nfp->f_data = so; /* nfp has ref count from falloc */ 347 nfp->f_flag = fflag; 348 nfp->f_ops = &socketops; 349 nfp->f_type = DTYPE_SOCKET; 350 FILE_UNLOCK(nfp); 351 /* Sync socket nonblocking/async state with file flags */ 352 tmp = fflag & FNONBLOCK; 353 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 354 tmp = fflag & FASYNC; 355 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 356 sa = 0; 357 error = soaccept(so, &sa); 358 if (error) { 359 /* 360 * return a namelen of zero for older code which might 361 * ignore the return value from accept. 362 */ 363 if (uap->name != NULL) { 364 namelen = 0; 365 (void) copyout(&namelen, 366 uap->anamelen, sizeof(*uap->anamelen)); 367 } 368 goto noconnection; 369 } 370 if (sa == NULL) { 371 namelen = 0; 372 if (uap->name) 373 goto gotnoname; 374 splx(s); 375 error = 0; 376 goto done; 377 } 378 if (uap->name) { 379 /* check sa_len before it is destroyed */ 380 if (namelen > sa->sa_len) 381 namelen = sa->sa_len; 382 #ifdef COMPAT_OLDSOCK 383 if (compat) 384 ((struct osockaddr *)sa)->sa_family = 385 sa->sa_family; 386 #endif 387 error = copyout(sa, uap->name, (u_int)namelen); 388 if (!error) 389 gotnoname: 390 error = copyout(&namelen, 391 uap->anamelen, sizeof (*uap->anamelen)); 392 } 393 noconnection: 394 if (sa) 395 FREE(sa, M_SONAME); 396 397 /* 398 * close the new descriptor, assuming someone hasn't ripped it 399 * out from under us. 400 */ 401 if (error) { 402 FILEDESC_LOCK(fdp); 403 if (fdp->fd_ofiles[fd] == nfp) { 404 fdp->fd_ofiles[fd] = NULL; 405 FILEDESC_UNLOCK(fdp); 406 fdrop(nfp, td); 407 } else { 408 FILEDESC_UNLOCK(fdp); 409 } 410 } 411 splx(s); 412 413 /* 414 * Release explicitly held references before returning. 415 */ 416 done: 417 if (nfp != NULL) 418 fdrop(nfp, td); 419 fputsock(head); 420 done2: 421 mtx_unlock(&Giant); 422 return (error); 423 } 424 425 /* 426 * MPSAFE (accept1() is MPSAFE) 427 */ 428 int 429 accept(td, uap) 430 struct thread *td; 431 struct accept_args *uap; 432 { 433 434 return (accept1(td, uap, 0)); 435 } 436 437 #ifdef COMPAT_OLDSOCK 438 /* 439 * MPSAFE (accept1() is MPSAFE) 440 */ 441 int 442 oaccept(td, uap) 443 struct thread *td; 444 struct accept_args *uap; 445 { 446 447 return (accept1(td, uap, 1)); 448 } 449 #endif /* COMPAT_OLDSOCK */ 450 451 /* 452 * MPSAFE 453 */ 454 /* ARGSUSED */ 455 int 456 connect(td, uap) 457 struct thread *td; 458 register struct connect_args /* { 459 int s; 460 caddr_t name; 461 int namelen; 462 } */ *uap; 463 { 464 struct sockaddr *sa; 465 int error; 466 467 error = getsockaddr(&sa, uap->name, uap->namelen); 468 if (error) 469 return error; 470 471 return (kern_connect(td, uap->s, sa)); 472 } 473 474 475 int 476 kern_connect(td, fd, sa) 477 struct thread *td; 478 int fd; 479 struct sockaddr *sa; 480 { 481 struct socket *so; 482 int error, s; 483 484 mtx_lock(&Giant); 485 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 486 goto done2; 487 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 488 error = EALREADY; 489 goto done1; 490 } 491 #ifdef MAC 492 error = mac_check_socket_connect(td->td_ucred, so, sa); 493 if (error) 494 goto bad; 495 #endif 496 error = soconnect(so, sa, td); 497 if (error) 498 goto bad; 499 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 500 error = EINPROGRESS; 501 goto done1; 502 } 503 s = splnet(); 504 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 505 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0); 506 if (error) 507 break; 508 } 509 if (error == 0) { 510 error = so->so_error; 511 so->so_error = 0; 512 } 513 splx(s); 514 bad: 515 so->so_state &= ~SS_ISCONNECTING; 516 if (error == ERESTART) 517 error = EINTR; 518 done1: 519 fputsock(so); 520 done2: 521 mtx_unlock(&Giant); 522 FREE(sa, M_SONAME); 523 return (error); 524 } 525 526 /* 527 * MPSAFE 528 */ 529 int 530 socketpair(td, uap) 531 struct thread *td; 532 register struct socketpair_args /* { 533 int domain; 534 int type; 535 int protocol; 536 int *rsv; 537 } */ *uap; 538 { 539 register struct filedesc *fdp = td->td_proc->p_fd; 540 struct file *fp1, *fp2; 541 struct socket *so1, *so2; 542 int fd, error, sv[2]; 543 544 mtx_lock(&Giant); 545 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 546 td->td_ucred, td); 547 if (error) 548 goto done2; 549 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 550 td->td_ucred, td); 551 if (error) 552 goto free1; 553 error = falloc(td, &fp1, &fd); 554 if (error) 555 goto free2; 556 fhold(fp1); 557 sv[0] = fd; 558 fp1->f_data = so1; /* so1 already has ref count */ 559 error = falloc(td, &fp2, &fd); 560 if (error) 561 goto free3; 562 fhold(fp2); 563 fp2->f_data = so2; /* so2 already has ref count */ 564 sv[1] = fd; 565 error = soconnect2(so1, so2); 566 if (error) 567 goto free4; 568 if (uap->type == SOCK_DGRAM) { 569 /* 570 * Datagram socket connection is asymmetric. 571 */ 572 error = soconnect2(so2, so1); 573 if (error) 574 goto free4; 575 } 576 FILE_LOCK(fp1); 577 fp1->f_flag = FREAD|FWRITE; 578 fp1->f_ops = &socketops; 579 fp1->f_type = DTYPE_SOCKET; 580 FILE_UNLOCK(fp1); 581 FILE_LOCK(fp2); 582 fp2->f_flag = FREAD|FWRITE; 583 fp2->f_ops = &socketops; 584 fp2->f_type = DTYPE_SOCKET; 585 FILE_UNLOCK(fp2); 586 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 587 fdrop(fp1, td); 588 fdrop(fp2, td); 589 goto done2; 590 free4: 591 FILEDESC_LOCK(fdp); 592 if (fdp->fd_ofiles[sv[1]] == fp2) { 593 fdp->fd_ofiles[sv[1]] = NULL; 594 FILEDESC_UNLOCK(fdp); 595 fdrop(fp2, td); 596 } else 597 FILEDESC_UNLOCK(fdp); 598 fdrop(fp2, td); 599 free3: 600 FILEDESC_LOCK(fdp); 601 if (fdp->fd_ofiles[sv[0]] == fp1) { 602 fdp->fd_ofiles[sv[0]] = NULL; 603 FILEDESC_UNLOCK(fdp); 604 fdrop(fp1, td); 605 } else 606 FILEDESC_UNLOCK(fdp); 607 fdrop(fp1, td); 608 free2: 609 (void)soclose(so2); 610 free1: 611 (void)soclose(so1); 612 done2: 613 mtx_unlock(&Giant); 614 return (error); 615 } 616 617 static int 618 sendit(td, s, mp, flags) 619 register struct thread *td; 620 int s; 621 register struct msghdr *mp; 622 int flags; 623 { 624 struct mbuf *control; 625 struct sockaddr *to; 626 int error; 627 628 mtx_lock(&Giant); 629 if (mp->msg_name != NULL) { 630 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 631 if (error) { 632 to = NULL; 633 goto bad; 634 } 635 mp->msg_name = to; 636 } else 637 to = NULL; 638 639 if (mp->msg_control) { 640 if (mp->msg_controllen < sizeof(struct cmsghdr) 641 #ifdef COMPAT_OLDSOCK 642 && mp->msg_flags != MSG_COMPAT 643 #endif 644 ) { 645 error = EINVAL; 646 goto bad; 647 } 648 error = sockargs(&control, mp->msg_control, 649 mp->msg_controllen, MT_CONTROL); 650 if (error) 651 goto bad; 652 #ifdef COMPAT_OLDSOCK 653 if (mp->msg_flags == MSG_COMPAT) { 654 register struct cmsghdr *cm; 655 656 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 657 if (control == 0) { 658 error = ENOBUFS; 659 goto bad; 660 } else { 661 cm = mtod(control, struct cmsghdr *); 662 cm->cmsg_len = control->m_len; 663 cm->cmsg_level = SOL_SOCKET; 664 cm->cmsg_type = SCM_RIGHTS; 665 } 666 } 667 #endif 668 } else { 669 control = NULL; 670 } 671 672 error = kern_sendit(td, s, mp, flags, control); 673 674 bad: 675 if (to) 676 FREE(to, M_SONAME); 677 mtx_unlock(&Giant); 678 return (error); 679 } 680 681 int 682 kern_sendit(td, s, mp, flags, control) 683 struct thread *td; 684 int s; 685 struct msghdr *mp; 686 int flags; 687 struct mbuf *control; 688 { 689 struct uio auio; 690 struct iovec *iov; 691 struct socket *so; 692 int i; 693 int len, error; 694 #ifdef KTRACE 695 struct iovec *ktriov = NULL; 696 struct uio ktruio; 697 int iovlen; 698 #endif 699 700 if ((error = fgetsock(td, s, &so, NULL)) != 0) 701 goto bad2; 702 703 #ifdef MAC 704 error = mac_check_socket_send(td->td_ucred, so); 705 if (error) 706 goto bad; 707 #endif 708 709 auio.uio_iov = mp->msg_iov; 710 auio.uio_iovcnt = mp->msg_iovlen; 711 auio.uio_segflg = UIO_USERSPACE; 712 auio.uio_rw = UIO_WRITE; 713 auio.uio_td = td; 714 auio.uio_offset = 0; /* XXX */ 715 auio.uio_resid = 0; 716 iov = mp->msg_iov; 717 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 718 if ((auio.uio_resid += iov->iov_len) < 0) { 719 error = EINVAL; 720 goto bad; 721 } 722 } 723 #ifdef KTRACE 724 if (KTRPOINT(td, KTR_GENIO)) { 725 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 726 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 727 bcopy(auio.uio_iov, ktriov, iovlen); 728 ktruio = auio; 729 } 730 #endif 731 len = auio.uio_resid; 732 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 733 0, control, flags, td); 734 if (error) { 735 if (auio.uio_resid != len && (error == ERESTART || 736 error == EINTR || error == EWOULDBLOCK)) 737 error = 0; 738 /* Generation of SIGPIPE can be controlled per socket */ 739 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 740 PROC_LOCK(td->td_proc); 741 psignal(td->td_proc, SIGPIPE); 742 PROC_UNLOCK(td->td_proc); 743 } 744 } 745 if (error == 0) 746 td->td_retval[0] = len - auio.uio_resid; 747 #ifdef KTRACE 748 if (ktriov != NULL) { 749 if (error == 0) { 750 ktruio.uio_iov = ktriov; 751 ktruio.uio_resid = td->td_retval[0]; 752 ktrgenio(s, UIO_WRITE, &ktruio, error); 753 } 754 FREE(ktriov, M_TEMP); 755 } 756 #endif 757 bad: 758 fputsock(so); 759 bad2: 760 return (error); 761 } 762 763 /* 764 * MPSAFE 765 */ 766 int 767 sendto(td, uap) 768 struct thread *td; 769 register struct sendto_args /* { 770 int s; 771 caddr_t buf; 772 size_t len; 773 int flags; 774 caddr_t to; 775 int tolen; 776 } */ *uap; 777 { 778 struct msghdr msg; 779 struct iovec aiov; 780 int error; 781 782 msg.msg_name = uap->to; 783 msg.msg_namelen = uap->tolen; 784 msg.msg_iov = &aiov; 785 msg.msg_iovlen = 1; 786 msg.msg_control = 0; 787 #ifdef COMPAT_OLDSOCK 788 msg.msg_flags = 0; 789 #endif 790 aiov.iov_base = uap->buf; 791 aiov.iov_len = uap->len; 792 error = sendit(td, uap->s, &msg, uap->flags); 793 return (error); 794 } 795 796 #ifdef COMPAT_OLDSOCK 797 /* 798 * MPSAFE 799 */ 800 int 801 osend(td, uap) 802 struct thread *td; 803 register struct osend_args /* { 804 int s; 805 caddr_t buf; 806 int len; 807 int flags; 808 } */ *uap; 809 { 810 struct msghdr msg; 811 struct iovec aiov; 812 int error; 813 814 msg.msg_name = 0; 815 msg.msg_namelen = 0; 816 msg.msg_iov = &aiov; 817 msg.msg_iovlen = 1; 818 aiov.iov_base = uap->buf; 819 aiov.iov_len = uap->len; 820 msg.msg_control = 0; 821 msg.msg_flags = 0; 822 error = sendit(td, uap->s, &msg, uap->flags); 823 return (error); 824 } 825 826 /* 827 * MPSAFE 828 */ 829 int 830 osendmsg(td, uap) 831 struct thread *td; 832 register struct osendmsg_args /* { 833 int s; 834 caddr_t msg; 835 int flags; 836 } */ *uap; 837 { 838 struct msghdr msg; 839 struct iovec aiov[UIO_SMALLIOV], *iov; 840 int error; 841 842 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 843 if (error) 844 goto done2; 845 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 846 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 847 error = EMSGSIZE; 848 goto done2; 849 } 850 MALLOC(iov, struct iovec *, 851 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 852 M_WAITOK); 853 } else { 854 iov = aiov; 855 } 856 error = copyin(msg.msg_iov, iov, 857 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 858 if (error) 859 goto done; 860 msg.msg_flags = MSG_COMPAT; 861 msg.msg_iov = iov; 862 error = sendit(td, uap->s, &msg, uap->flags); 863 done: 864 if (iov != aiov) 865 FREE(iov, M_IOV); 866 done2: 867 return (error); 868 } 869 #endif 870 871 /* 872 * MPSAFE 873 */ 874 int 875 sendmsg(td, uap) 876 struct thread *td; 877 register struct sendmsg_args /* { 878 int s; 879 caddr_t msg; 880 int flags; 881 } */ *uap; 882 { 883 struct msghdr msg; 884 struct iovec aiov[UIO_SMALLIOV], *iov; 885 int error; 886 887 error = copyin(uap->msg, &msg, sizeof (msg)); 888 if (error) 889 goto done2; 890 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 891 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 892 error = EMSGSIZE; 893 goto done2; 894 } 895 MALLOC(iov, struct iovec *, 896 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 897 M_WAITOK); 898 } else { 899 iov = aiov; 900 } 901 if (msg.msg_iovlen && 902 (error = copyin(msg.msg_iov, iov, 903 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 904 goto done; 905 msg.msg_iov = iov; 906 #ifdef COMPAT_OLDSOCK 907 msg.msg_flags = 0; 908 #endif 909 error = sendit(td, uap->s, &msg, uap->flags); 910 done: 911 if (iov != aiov) 912 FREE(iov, M_IOV); 913 done2: 914 return (error); 915 } 916 917 static int 918 recvit(td, s, mp, namelenp) 919 register struct thread *td; 920 int s; 921 register struct msghdr *mp; 922 void *namelenp; 923 { 924 struct uio auio; 925 register struct iovec *iov; 926 register int i; 927 int len, error; 928 struct mbuf *m, *control = 0; 929 caddr_t ctlbuf; 930 struct socket *so; 931 struct sockaddr *fromsa = 0; 932 #ifdef KTRACE 933 struct iovec *ktriov = NULL; 934 struct uio ktruio; 935 int iovlen; 936 #endif 937 938 if ((error = fgetsock(td, s, &so, NULL)) != 0) 939 return (error); 940 941 #ifdef MAC 942 error = mac_check_socket_receive(td->td_ucred, so); 943 if (error) { 944 fputsock(so); 945 return (error); 946 } 947 #endif 948 949 auio.uio_iov = mp->msg_iov; 950 auio.uio_iovcnt = mp->msg_iovlen; 951 auio.uio_segflg = UIO_USERSPACE; 952 auio.uio_rw = UIO_READ; 953 auio.uio_td = td; 954 auio.uio_offset = 0; /* XXX */ 955 auio.uio_resid = 0; 956 iov = mp->msg_iov; 957 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 958 if ((auio.uio_resid += iov->iov_len) < 0) { 959 fputsock(so); 960 return (EINVAL); 961 } 962 } 963 #ifdef KTRACE 964 if (KTRPOINT(td, KTR_GENIO)) { 965 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 966 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 967 bcopy(auio.uio_iov, ktriov, iovlen); 968 ktruio = auio; 969 } 970 #endif 971 len = auio.uio_resid; 972 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 973 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 974 &mp->msg_flags); 975 if (error) { 976 if (auio.uio_resid != len && (error == ERESTART || 977 error == EINTR || error == EWOULDBLOCK)) 978 error = 0; 979 } 980 #ifdef KTRACE 981 if (ktriov != NULL) { 982 if (error == 0) { 983 ktruio.uio_iov = ktriov; 984 ktruio.uio_resid = len - auio.uio_resid; 985 ktrgenio(s, UIO_READ, &ktruio, error); 986 } 987 FREE(ktriov, M_TEMP); 988 } 989 #endif 990 if (error) 991 goto out; 992 td->td_retval[0] = len - auio.uio_resid; 993 if (mp->msg_name) { 994 len = mp->msg_namelen; 995 if (len <= 0 || fromsa == 0) 996 len = 0; 997 else { 998 /* save sa_len before it is destroyed by MSG_COMPAT */ 999 len = MIN(len, fromsa->sa_len); 1000 #ifdef COMPAT_OLDSOCK 1001 if (mp->msg_flags & MSG_COMPAT) 1002 ((struct osockaddr *)fromsa)->sa_family = 1003 fromsa->sa_family; 1004 #endif 1005 error = copyout(fromsa, mp->msg_name, (unsigned)len); 1006 if (error) 1007 goto out; 1008 } 1009 mp->msg_namelen = len; 1010 if (namelenp && 1011 (error = copyout(&len, namelenp, sizeof (int)))) { 1012 #ifdef COMPAT_OLDSOCK 1013 if (mp->msg_flags & MSG_COMPAT) 1014 error = 0; /* old recvfrom didn't check */ 1015 else 1016 #endif 1017 goto out; 1018 } 1019 } 1020 if (mp->msg_control) { 1021 #ifdef COMPAT_OLDSOCK 1022 /* 1023 * We assume that old recvmsg calls won't receive access 1024 * rights and other control info, esp. as control info 1025 * is always optional and those options didn't exist in 4.3. 1026 * If we receive rights, trim the cmsghdr; anything else 1027 * is tossed. 1028 */ 1029 if (control && mp->msg_flags & MSG_COMPAT) { 1030 if (mtod(control, struct cmsghdr *)->cmsg_level != 1031 SOL_SOCKET || 1032 mtod(control, struct cmsghdr *)->cmsg_type != 1033 SCM_RIGHTS) { 1034 mp->msg_controllen = 0; 1035 goto out; 1036 } 1037 control->m_len -= sizeof (struct cmsghdr); 1038 control->m_data += sizeof (struct cmsghdr); 1039 } 1040 #endif 1041 len = mp->msg_controllen; 1042 m = control; 1043 mp->msg_controllen = 0; 1044 ctlbuf = mp->msg_control; 1045 1046 while (m && len > 0) { 1047 unsigned int tocopy; 1048 1049 if (len >= m->m_len) 1050 tocopy = m->m_len; 1051 else { 1052 mp->msg_flags |= MSG_CTRUNC; 1053 tocopy = len; 1054 } 1055 1056 if ((error = copyout(mtod(m, caddr_t), 1057 ctlbuf, tocopy)) != 0) 1058 goto out; 1059 1060 ctlbuf += tocopy; 1061 len -= tocopy; 1062 m = m->m_next; 1063 } 1064 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1065 } 1066 out: 1067 fputsock(so); 1068 if (fromsa) 1069 FREE(fromsa, M_SONAME); 1070 if (control) 1071 m_freem(control); 1072 return (error); 1073 } 1074 1075 /* 1076 * MPSAFE 1077 */ 1078 int 1079 recvfrom(td, uap) 1080 struct thread *td; 1081 register struct recvfrom_args /* { 1082 int s; 1083 caddr_t buf; 1084 size_t len; 1085 int flags; 1086 caddr_t from; 1087 int *fromlenaddr; 1088 } */ *uap; 1089 { 1090 struct msghdr msg; 1091 struct iovec aiov; 1092 int error; 1093 1094 mtx_lock(&Giant); 1095 if (uap->fromlenaddr) { 1096 error = copyin(uap->fromlenaddr, 1097 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1098 if (error) 1099 goto done2; 1100 } else { 1101 msg.msg_namelen = 0; 1102 } 1103 msg.msg_name = uap->from; 1104 msg.msg_iov = &aiov; 1105 msg.msg_iovlen = 1; 1106 aiov.iov_base = uap->buf; 1107 aiov.iov_len = uap->len; 1108 msg.msg_control = 0; 1109 msg.msg_flags = uap->flags; 1110 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1111 done2: 1112 mtx_unlock(&Giant); 1113 return(error); 1114 } 1115 1116 #ifdef COMPAT_OLDSOCK 1117 /* 1118 * MPSAFE 1119 */ 1120 int 1121 orecvfrom(td, uap) 1122 struct thread *td; 1123 struct recvfrom_args *uap; 1124 { 1125 1126 uap->flags |= MSG_COMPAT; 1127 return (recvfrom(td, uap)); 1128 } 1129 #endif 1130 1131 1132 #ifdef COMPAT_OLDSOCK 1133 /* 1134 * MPSAFE 1135 */ 1136 int 1137 orecv(td, uap) 1138 struct thread *td; 1139 register struct orecv_args /* { 1140 int s; 1141 caddr_t buf; 1142 int len; 1143 int flags; 1144 } */ *uap; 1145 { 1146 struct msghdr msg; 1147 struct iovec aiov; 1148 int error; 1149 1150 mtx_lock(&Giant); 1151 msg.msg_name = 0; 1152 msg.msg_namelen = 0; 1153 msg.msg_iov = &aiov; 1154 msg.msg_iovlen = 1; 1155 aiov.iov_base = uap->buf; 1156 aiov.iov_len = uap->len; 1157 msg.msg_control = 0; 1158 msg.msg_flags = uap->flags; 1159 error = recvit(td, uap->s, &msg, NULL); 1160 mtx_unlock(&Giant); 1161 return (error); 1162 } 1163 1164 /* 1165 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1166 * overlays the new one, missing only the flags, and with the (old) access 1167 * rights where the control fields are now. 1168 * 1169 * MPSAFE 1170 */ 1171 int 1172 orecvmsg(td, uap) 1173 struct thread *td; 1174 register struct orecvmsg_args /* { 1175 int s; 1176 struct omsghdr *msg; 1177 int flags; 1178 } */ *uap; 1179 { 1180 struct msghdr msg; 1181 struct iovec aiov[UIO_SMALLIOV], *iov; 1182 int error; 1183 1184 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1185 if (error) 1186 return (error); 1187 1188 mtx_lock(&Giant); 1189 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1190 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1191 error = EMSGSIZE; 1192 goto done2; 1193 } 1194 MALLOC(iov, struct iovec *, 1195 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1196 M_WAITOK); 1197 } else { 1198 iov = aiov; 1199 } 1200 msg.msg_flags = uap->flags | MSG_COMPAT; 1201 error = copyin(msg.msg_iov, iov, 1202 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1203 if (error) 1204 goto done; 1205 msg.msg_iov = iov; 1206 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1207 1208 if (msg.msg_controllen && error == 0) 1209 error = copyout(&msg.msg_controllen, 1210 &uap->msg->msg_accrightslen, sizeof (int)); 1211 done: 1212 if (iov != aiov) 1213 FREE(iov, M_IOV); 1214 done2: 1215 mtx_unlock(&Giant); 1216 return (error); 1217 } 1218 #endif 1219 1220 /* 1221 * MPSAFE 1222 */ 1223 int 1224 recvmsg(td, uap) 1225 struct thread *td; 1226 register struct recvmsg_args /* { 1227 int s; 1228 struct msghdr *msg; 1229 int flags; 1230 } */ *uap; 1231 { 1232 struct msghdr msg; 1233 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1234 register int error; 1235 1236 mtx_lock(&Giant); 1237 error = copyin(uap->msg, &msg, sizeof (msg)); 1238 if (error) 1239 goto done2; 1240 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1241 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1242 error = EMSGSIZE; 1243 goto done2; 1244 } 1245 MALLOC(iov, struct iovec *, 1246 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1247 M_WAITOK); 1248 } else { 1249 iov = aiov; 1250 } 1251 #ifdef COMPAT_OLDSOCK 1252 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1253 #else 1254 msg.msg_flags = uap->flags; 1255 #endif 1256 uiov = msg.msg_iov; 1257 msg.msg_iov = iov; 1258 error = copyin(uiov, iov, 1259 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1260 if (error) 1261 goto done; 1262 error = recvit(td, uap->s, &msg, NULL); 1263 if (!error) { 1264 msg.msg_iov = uiov; 1265 error = copyout(&msg, uap->msg, sizeof(msg)); 1266 } 1267 done: 1268 if (iov != aiov) 1269 FREE(iov, M_IOV); 1270 done2: 1271 mtx_unlock(&Giant); 1272 return (error); 1273 } 1274 1275 /* 1276 * MPSAFE 1277 */ 1278 /* ARGSUSED */ 1279 int 1280 shutdown(td, uap) 1281 struct thread *td; 1282 register struct shutdown_args /* { 1283 int s; 1284 int how; 1285 } */ *uap; 1286 { 1287 struct socket *so; 1288 int error; 1289 1290 mtx_lock(&Giant); 1291 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1292 error = soshutdown(so, uap->how); 1293 fputsock(so); 1294 } 1295 mtx_unlock(&Giant); 1296 return(error); 1297 } 1298 1299 /* 1300 * MPSAFE 1301 */ 1302 /* ARGSUSED */ 1303 int 1304 setsockopt(td, uap) 1305 struct thread *td; 1306 register struct setsockopt_args /* { 1307 int s; 1308 int level; 1309 int name; 1310 caddr_t val; 1311 int valsize; 1312 } */ *uap; 1313 { 1314 struct socket *so; 1315 struct sockopt sopt; 1316 int error; 1317 1318 if (uap->val == 0 && uap->valsize != 0) 1319 return (EFAULT); 1320 if (uap->valsize < 0) 1321 return (EINVAL); 1322 1323 mtx_lock(&Giant); 1324 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1325 sopt.sopt_dir = SOPT_SET; 1326 sopt.sopt_level = uap->level; 1327 sopt.sopt_name = uap->name; 1328 sopt.sopt_val = uap->val; 1329 sopt.sopt_valsize = uap->valsize; 1330 sopt.sopt_td = td; 1331 error = sosetopt(so, &sopt); 1332 fputsock(so); 1333 } 1334 mtx_unlock(&Giant); 1335 return(error); 1336 } 1337 1338 /* 1339 * MPSAFE 1340 */ 1341 /* ARGSUSED */ 1342 int 1343 getsockopt(td, uap) 1344 struct thread *td; 1345 register struct getsockopt_args /* { 1346 int s; 1347 int level; 1348 int name; 1349 caddr_t val; 1350 int *avalsize; 1351 } */ *uap; 1352 { 1353 int valsize, error; 1354 struct socket *so; 1355 struct sockopt sopt; 1356 1357 mtx_lock(&Giant); 1358 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1359 goto done2; 1360 if (uap->val) { 1361 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1362 if (error) 1363 goto done1; 1364 if (valsize < 0) { 1365 error = EINVAL; 1366 goto done1; 1367 } 1368 } else { 1369 valsize = 0; 1370 } 1371 1372 sopt.sopt_dir = SOPT_GET; 1373 sopt.sopt_level = uap->level; 1374 sopt.sopt_name = uap->name; 1375 sopt.sopt_val = uap->val; 1376 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1377 sopt.sopt_td = td; 1378 1379 error = sogetopt(so, &sopt); 1380 if (error == 0) { 1381 valsize = sopt.sopt_valsize; 1382 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1383 } 1384 done1: 1385 fputsock(so); 1386 done2: 1387 mtx_unlock(&Giant); 1388 return (error); 1389 } 1390 1391 /* 1392 * getsockname1() - Get socket name. 1393 * 1394 * MPSAFE 1395 */ 1396 /* ARGSUSED */ 1397 static int 1398 getsockname1(td, uap, compat) 1399 struct thread *td; 1400 register struct getsockname_args /* { 1401 int fdes; 1402 caddr_t asa; 1403 int *alen; 1404 } */ *uap; 1405 int compat; 1406 { 1407 struct socket *so; 1408 struct sockaddr *sa; 1409 int len, error; 1410 1411 mtx_lock(&Giant); 1412 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1413 goto done2; 1414 error = copyin(uap->alen, &len, sizeof (len)); 1415 if (error) 1416 goto done1; 1417 if (len < 0) { 1418 error = EINVAL; 1419 goto done1; 1420 } 1421 sa = 0; 1422 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1423 if (error) 1424 goto bad; 1425 if (sa == 0) { 1426 len = 0; 1427 goto gotnothing; 1428 } 1429 1430 len = MIN(len, sa->sa_len); 1431 #ifdef COMPAT_OLDSOCK 1432 if (compat) 1433 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1434 #endif 1435 error = copyout(sa, uap->asa, (u_int)len); 1436 if (error == 0) 1437 gotnothing: 1438 error = copyout(&len, uap->alen, sizeof (len)); 1439 bad: 1440 if (sa) 1441 FREE(sa, M_SONAME); 1442 done1: 1443 fputsock(so); 1444 done2: 1445 mtx_unlock(&Giant); 1446 return (error); 1447 } 1448 1449 /* 1450 * MPSAFE 1451 */ 1452 int 1453 getsockname(td, uap) 1454 struct thread *td; 1455 struct getsockname_args *uap; 1456 { 1457 1458 return (getsockname1(td, uap, 0)); 1459 } 1460 1461 #ifdef COMPAT_OLDSOCK 1462 /* 1463 * MPSAFE 1464 */ 1465 int 1466 ogetsockname(td, uap) 1467 struct thread *td; 1468 struct getsockname_args *uap; 1469 { 1470 1471 return (getsockname1(td, uap, 1)); 1472 } 1473 #endif /* COMPAT_OLDSOCK */ 1474 1475 /* 1476 * getpeername1() - Get name of peer for connected socket. 1477 * 1478 * MPSAFE 1479 */ 1480 /* ARGSUSED */ 1481 static int 1482 getpeername1(td, uap, compat) 1483 struct thread *td; 1484 register struct getpeername_args /* { 1485 int fdes; 1486 caddr_t asa; 1487 int *alen; 1488 } */ *uap; 1489 int compat; 1490 { 1491 struct socket *so; 1492 struct sockaddr *sa; 1493 int len, error; 1494 1495 mtx_lock(&Giant); 1496 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1497 goto done2; 1498 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1499 error = ENOTCONN; 1500 goto done1; 1501 } 1502 error = copyin(uap->alen, &len, sizeof (len)); 1503 if (error) 1504 goto done1; 1505 if (len < 0) { 1506 error = EINVAL; 1507 goto done1; 1508 } 1509 sa = 0; 1510 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1511 if (error) 1512 goto bad; 1513 if (sa == 0) { 1514 len = 0; 1515 goto gotnothing; 1516 } 1517 len = MIN(len, sa->sa_len); 1518 #ifdef COMPAT_OLDSOCK 1519 if (compat) 1520 ((struct osockaddr *)sa)->sa_family = 1521 sa->sa_family; 1522 #endif 1523 error = copyout(sa, uap->asa, (u_int)len); 1524 if (error) 1525 goto bad; 1526 gotnothing: 1527 error = copyout(&len, uap->alen, sizeof (len)); 1528 bad: 1529 if (sa) 1530 FREE(sa, M_SONAME); 1531 done1: 1532 fputsock(so); 1533 done2: 1534 mtx_unlock(&Giant); 1535 return (error); 1536 } 1537 1538 /* 1539 * MPSAFE 1540 */ 1541 int 1542 getpeername(td, uap) 1543 struct thread *td; 1544 struct getpeername_args *uap; 1545 { 1546 1547 return (getpeername1(td, uap, 0)); 1548 } 1549 1550 #ifdef COMPAT_OLDSOCK 1551 /* 1552 * MPSAFE 1553 */ 1554 int 1555 ogetpeername(td, uap) 1556 struct thread *td; 1557 struct ogetpeername_args *uap; 1558 { 1559 1560 /* XXX uap should have type `getpeername_args *' to begin with. */ 1561 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1562 } 1563 #endif /* COMPAT_OLDSOCK */ 1564 1565 int 1566 sockargs(mp, buf, buflen, type) 1567 struct mbuf **mp; 1568 caddr_t buf; 1569 int buflen, type; 1570 { 1571 register struct sockaddr *sa; 1572 register struct mbuf *m; 1573 int error; 1574 1575 if ((u_int)buflen > MLEN) { 1576 #ifdef COMPAT_OLDSOCK 1577 if (type == MT_SONAME && (u_int)buflen <= 112) 1578 buflen = MLEN; /* unix domain compat. hack */ 1579 else 1580 #endif 1581 return (EINVAL); 1582 } 1583 m = m_get(M_TRYWAIT, type); 1584 if (m == NULL) 1585 return (ENOBUFS); 1586 m->m_len = buflen; 1587 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1588 if (error) 1589 (void) m_free(m); 1590 else { 1591 *mp = m; 1592 if (type == MT_SONAME) { 1593 sa = mtod(m, struct sockaddr *); 1594 1595 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1596 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1597 sa->sa_family = sa->sa_len; 1598 #endif 1599 sa->sa_len = buflen; 1600 } 1601 } 1602 return (error); 1603 } 1604 1605 int 1606 getsockaddr(namp, uaddr, len) 1607 struct sockaddr **namp; 1608 caddr_t uaddr; 1609 size_t len; 1610 { 1611 struct sockaddr *sa; 1612 int error; 1613 1614 if (len > SOCK_MAXADDRLEN) 1615 return ENAMETOOLONG; 1616 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1617 error = copyin(uaddr, sa, len); 1618 if (error) { 1619 FREE(sa, M_SONAME); 1620 } else { 1621 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1622 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1623 sa->sa_family = sa->sa_len; 1624 #endif 1625 sa->sa_len = len; 1626 *namp = sa; 1627 } 1628 return error; 1629 } 1630 1631 /* 1632 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 1633 */ 1634 static void 1635 sf_buf_init(void *arg) 1636 { 1637 struct sf_buf *sf_bufs; 1638 vm_offset_t sf_base; 1639 int i; 1640 1641 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); 1642 mtx_lock(&sf_freelist.sf_lock); 1643 SLIST_INIT(&sf_freelist.sf_head); 1644 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE); 1645 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, 1646 M_NOWAIT | M_ZERO); 1647 for (i = 0; i < nsfbufs; i++) { 1648 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 1649 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); 1650 } 1651 sf_buf_alloc_want = 0; 1652 mtx_unlock(&sf_freelist.sf_lock); 1653 } 1654 1655 /* 1656 * Get an sf_buf from the freelist. Will block if none are available. 1657 */ 1658 struct sf_buf * 1659 sf_buf_alloc(struct vm_page *m) 1660 { 1661 struct sf_buf *sf; 1662 int error; 1663 1664 mtx_lock(&sf_freelist.sf_lock); 1665 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { 1666 sf_buf_alloc_want++; 1667 error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, 1668 "sfbufa", 0); 1669 sf_buf_alloc_want--; 1670 1671 /* 1672 * If we got a signal, don't risk going back to sleep. 1673 */ 1674 if (error) 1675 break; 1676 } 1677 if (sf != NULL) { 1678 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); 1679 sf->m = m; 1680 pmap_qenter(sf->kva, &sf->m, 1); 1681 } 1682 mtx_unlock(&sf_freelist.sf_lock); 1683 return (sf); 1684 } 1685 1686 /* 1687 * Detatch mapped page and release resources back to the system. 1688 */ 1689 void 1690 sf_buf_free(void *addr, void *args) 1691 { 1692 struct sf_buf *sf; 1693 struct vm_page *m; 1694 1695 sf = args; 1696 pmap_qremove((vm_offset_t)addr, 1); 1697 m = sf->m; 1698 vm_page_lock_queues(); 1699 vm_page_unwire(m, 0); 1700 /* 1701 * Check for the object going away on us. This can 1702 * happen since we don't hold a reference to it. 1703 * If so, we're responsible for freeing the page. 1704 */ 1705 if (m->wire_count == 0 && m->object == NULL) 1706 vm_page_free(m); 1707 vm_page_unlock_queues(); 1708 sf->m = NULL; 1709 mtx_lock(&sf_freelist.sf_lock); 1710 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); 1711 if (sf_buf_alloc_want > 0) 1712 wakeup_one(&sf_freelist); 1713 mtx_unlock(&sf_freelist.sf_lock); 1714 } 1715 1716 /* 1717 * sendfile(2) 1718 * 1719 * MPSAFE 1720 * 1721 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1722 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1723 * 1724 * Send a file specified by 'fd' and starting at 'offset' to a socket 1725 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1726 * nbytes == 0. Optionally add a header and/or trailer to the socket 1727 * output. If specified, write the total number of bytes sent into *sbytes. 1728 * 1729 */ 1730 int 1731 sendfile(struct thread *td, struct sendfile_args *uap) 1732 { 1733 1734 return (do_sendfile(td, uap, 0)); 1735 } 1736 1737 #ifdef COMPAT_FREEBSD4 1738 int 1739 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1740 { 1741 struct sendfile_args args; 1742 1743 args.fd = uap->fd; 1744 args.s = uap->s; 1745 args.offset = uap->offset; 1746 args.nbytes = uap->nbytes; 1747 args.hdtr = uap->hdtr; 1748 args.sbytes = uap->sbytes; 1749 args.flags = uap->flags; 1750 1751 return (do_sendfile(td, &args, 1)); 1752 } 1753 #endif /* COMPAT_FREEBSD4 */ 1754 1755 static int 1756 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1757 { 1758 struct vnode *vp; 1759 struct vm_object *obj; 1760 struct socket *so = NULL; 1761 struct mbuf *m; 1762 struct sf_buf *sf; 1763 struct vm_page *pg; 1764 struct writev_args nuap; 1765 struct sf_hdtr hdtr; 1766 off_t off, xfsize, hdtr_size, sbytes = 0; 1767 int error, s; 1768 1769 mtx_lock(&Giant); 1770 1771 hdtr_size = 0; 1772 1773 /* 1774 * The descriptor must be a regular file and have a backing VM object. 1775 */ 1776 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1777 goto done; 1778 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1779 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1780 error = EINVAL; 1781 VOP_UNLOCK(vp, 0, td); 1782 goto done; 1783 } 1784 VOP_UNLOCK(vp, 0, td); 1785 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1786 goto done; 1787 if (so->so_type != SOCK_STREAM) { 1788 error = EINVAL; 1789 goto done; 1790 } 1791 if ((so->so_state & SS_ISCONNECTED) == 0) { 1792 error = ENOTCONN; 1793 goto done; 1794 } 1795 if (uap->offset < 0) { 1796 error = EINVAL; 1797 goto done; 1798 } 1799 1800 #ifdef MAC 1801 error = mac_check_socket_send(td->td_ucred, so); 1802 if (error) 1803 goto done; 1804 #endif 1805 1806 /* 1807 * If specified, get the pointer to the sf_hdtr struct for 1808 * any headers/trailers. 1809 */ 1810 if (uap->hdtr != NULL) { 1811 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1812 if (error) 1813 goto done; 1814 /* 1815 * Send any headers. Wimp out and use writev(2). 1816 */ 1817 if (hdtr.headers != NULL) { 1818 nuap.fd = uap->s; 1819 nuap.iovp = hdtr.headers; 1820 nuap.iovcnt = hdtr.hdr_cnt; 1821 error = writev(td, &nuap); 1822 if (error) 1823 goto done; 1824 if (compat) 1825 sbytes += td->td_retval[0]; 1826 else 1827 hdtr_size += td->td_retval[0]; 1828 } 1829 } 1830 1831 /* 1832 * Protect against multiple writers to the socket. 1833 */ 1834 (void) sblock(&so->so_snd, M_WAITOK); 1835 1836 /* 1837 * Loop through the pages in the file, starting with the requested 1838 * offset. Get a file page (do I/O if necessary), map the file page 1839 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1840 * it on the socket. 1841 */ 1842 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1843 vm_pindex_t pindex; 1844 vm_offset_t pgoff; 1845 1846 pindex = OFF_TO_IDX(off); 1847 VM_OBJECT_LOCK(obj); 1848 retry_lookup: 1849 /* 1850 * Calculate the amount to transfer. Not to exceed a page, 1851 * the EOF, or the passed in nbytes. 1852 */ 1853 xfsize = obj->un_pager.vnp.vnp_size - off; 1854 VM_OBJECT_UNLOCK(obj); 1855 if (xfsize > PAGE_SIZE) 1856 xfsize = PAGE_SIZE; 1857 pgoff = (vm_offset_t)(off & PAGE_MASK); 1858 if (PAGE_SIZE - pgoff < xfsize) 1859 xfsize = PAGE_SIZE - pgoff; 1860 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1861 xfsize = uap->nbytes - sbytes; 1862 if (xfsize <= 0) 1863 break; 1864 /* 1865 * Optimize the non-blocking case by looking at the socket space 1866 * before going to the extra work of constituting the sf_buf. 1867 */ 1868 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1869 if (so->so_state & SS_CANTSENDMORE) 1870 error = EPIPE; 1871 else 1872 error = EAGAIN; 1873 sbunlock(&so->so_snd); 1874 goto done; 1875 } 1876 VM_OBJECT_LOCK(obj); 1877 /* 1878 * Attempt to look up the page. 1879 * 1880 * Allocate if not found 1881 * 1882 * Wait and loop if busy. 1883 */ 1884 pg = vm_page_lookup(obj, pindex); 1885 1886 if (pg == NULL) { 1887 pg = vm_page_alloc(obj, pindex, 1888 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1889 if (pg == NULL) { 1890 VM_OBJECT_UNLOCK(obj); 1891 VM_WAIT; 1892 VM_OBJECT_LOCK(obj); 1893 goto retry_lookup; 1894 } 1895 vm_page_lock_queues(); 1896 vm_page_wakeup(pg); 1897 } else { 1898 vm_page_lock_queues(); 1899 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1900 goto retry_lookup; 1901 /* 1902 * Wire the page so it does not get ripped out from 1903 * under us. 1904 */ 1905 vm_page_wire(pg); 1906 } 1907 1908 /* 1909 * If page is not valid for what we need, initiate I/O 1910 */ 1911 1912 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1913 int bsize, resid; 1914 1915 /* 1916 * Ensure that our page is still around when the I/O 1917 * completes. 1918 */ 1919 vm_page_io_start(pg); 1920 vm_page_unlock_queues(); 1921 VM_OBJECT_UNLOCK(obj); 1922 1923 /* 1924 * Get the page from backing store. 1925 */ 1926 bsize = vp->v_mount->mnt_stat.f_iosize; 1927 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1928 /* 1929 * XXXMAC: Because we don't have fp->f_cred here, 1930 * we pass in NOCRED. This is probably wrong, but 1931 * is consistent with our original implementation. 1932 */ 1933 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1934 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1935 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1936 td->td_ucred, NOCRED, &resid, td); 1937 VOP_UNLOCK(vp, 0, td); 1938 if (error) 1939 VM_OBJECT_LOCK(obj); 1940 vm_page_lock_queues(); 1941 vm_page_flag_clear(pg, PG_ZERO); 1942 vm_page_io_finish(pg); 1943 if (error) { 1944 vm_page_unwire(pg, 0); 1945 /* 1946 * See if anyone else might know about this page. 1947 * If not and it is not valid, then free it. 1948 */ 1949 if (pg->wire_count == 0 && pg->valid == 0 && 1950 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1951 pg->hold_count == 0) { 1952 vm_page_busy(pg); 1953 vm_page_free(pg); 1954 } 1955 vm_page_unlock_queues(); 1956 VM_OBJECT_UNLOCK(obj); 1957 sbunlock(&so->so_snd); 1958 goto done; 1959 } 1960 } else 1961 VM_OBJECT_UNLOCK(obj); 1962 vm_page_unlock_queues(); 1963 1964 /* 1965 * Get a sendfile buf. We usually wait as long as necessary, 1966 * but this wait can be interrupted. 1967 */ 1968 if ((sf = sf_buf_alloc(pg)) == NULL) { 1969 vm_page_lock_queues(); 1970 vm_page_unwire(pg, 0); 1971 if (pg->wire_count == 0 && pg->object == NULL) 1972 vm_page_free(pg); 1973 vm_page_unlock_queues(); 1974 sbunlock(&so->so_snd); 1975 error = EINTR; 1976 goto done; 1977 } 1978 1979 /* 1980 * Get an mbuf header and set it up as having external storage. 1981 */ 1982 MGETHDR(m, M_TRYWAIT, MT_DATA); 1983 if (m == NULL) { 1984 error = ENOBUFS; 1985 sf_buf_free((void *)sf->kva, sf); 1986 sbunlock(&so->so_snd); 1987 goto done; 1988 } 1989 /* 1990 * Setup external storage for mbuf. 1991 */ 1992 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, sf, M_RDONLY, 1993 EXT_SFBUF); 1994 m->m_data = (char *) sf->kva + pgoff; 1995 m->m_pkthdr.len = m->m_len = xfsize; 1996 /* 1997 * Add the buffer to the socket buffer chain. 1998 */ 1999 s = splnet(); 2000 retry_space: 2001 /* 2002 * Make sure that the socket is still able to take more data. 2003 * CANTSENDMORE being true usually means that the connection 2004 * was closed. so_error is true when an error was sensed after 2005 * a previous send. 2006 * The state is checked after the page mapping and buffer 2007 * allocation above since those operations may block and make 2008 * any socket checks stale. From this point forward, nothing 2009 * blocks before the pru_send (or more accurately, any blocking 2010 * results in a loop back to here to re-check). 2011 */ 2012 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 2013 if (so->so_state & SS_CANTSENDMORE) { 2014 error = EPIPE; 2015 } else { 2016 error = so->so_error; 2017 so->so_error = 0; 2018 } 2019 m_freem(m); 2020 sbunlock(&so->so_snd); 2021 splx(s); 2022 goto done; 2023 } 2024 /* 2025 * Wait for socket space to become available. We do this just 2026 * after checking the connection state above in order to avoid 2027 * a race condition with sbwait(). 2028 */ 2029 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 2030 if (so->so_state & SS_NBIO) { 2031 m_freem(m); 2032 sbunlock(&so->so_snd); 2033 splx(s); 2034 error = EAGAIN; 2035 goto done; 2036 } 2037 error = sbwait(&so->so_snd); 2038 /* 2039 * An error from sbwait usually indicates that we've 2040 * been interrupted by a signal. If we've sent anything 2041 * then return bytes sent, otherwise return the error. 2042 */ 2043 if (error) { 2044 m_freem(m); 2045 sbunlock(&so->so_snd); 2046 splx(s); 2047 goto done; 2048 } 2049 goto retry_space; 2050 } 2051 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2052 splx(s); 2053 if (error) { 2054 sbunlock(&so->so_snd); 2055 goto done; 2056 } 2057 } 2058 sbunlock(&so->so_snd); 2059 2060 /* 2061 * Send trailers. Wimp out and use writev(2). 2062 */ 2063 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 2064 nuap.fd = uap->s; 2065 nuap.iovp = hdtr.trailers; 2066 nuap.iovcnt = hdtr.trl_cnt; 2067 error = writev(td, &nuap); 2068 if (error) 2069 goto done; 2070 if (compat) 2071 sbytes += td->td_retval[0]; 2072 else 2073 hdtr_size += td->td_retval[0]; 2074 } 2075 2076 done: 2077 /* 2078 * If there was no error we have to clear td->td_retval[0] 2079 * because it may have been set by writev. 2080 */ 2081 if (error == 0) { 2082 td->td_retval[0] = 0; 2083 } 2084 if (uap->sbytes != NULL) { 2085 if (!compat) 2086 sbytes += hdtr_size; 2087 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2088 } 2089 if (vp) 2090 vrele(vp); 2091 if (so) 2092 fputsock(so); 2093 mtx_unlock(&Giant); 2094 return (error); 2095 } 2096