1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 * $FreeBSD$ 38 */ 39 40 #include "opt_compat.h" 41 #include "opt_ktrace.h" 42 #include "opt_mac.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/mac.h> 49 #include <sys/mutex.h> 50 #include <sys/sysproto.h> 51 #include <sys/malloc.h> 52 #include <sys/filedesc.h> 53 #include <sys/event.h> 54 #include <sys/proc.h> 55 #include <sys/fcntl.h> 56 #include <sys/file.h> 57 #include <sys/lock.h> 58 #include <sys/mount.h> 59 #include <sys/mbuf.h> 60 #include <sys/protosw.h> 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 #include <sys/signalvar.h> 64 #include <sys/uio.h> 65 #include <sys/vnode.h> 66 #ifdef KTRACE 67 #include <sys/ktrace.h> 68 #endif 69 70 #include <vm/vm.h> 71 #include <vm/vm_object.h> 72 #include <vm/vm_page.h> 73 #include <vm/vm_pageout.h> 74 #include <vm/vm_kern.h> 75 #include <vm/vm_extern.h> 76 77 static void sf_buf_init(void *arg); 78 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) 79 80 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 81 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 82 83 static int accept1(struct thread *td, struct accept_args *uap, int compat); 84 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 85 static int getsockname1(struct thread *td, struct getsockname_args *uap, 86 int compat); 87 static int getpeername1(struct thread *td, struct getpeername_args *uap, 88 int compat); 89 90 /* 91 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the 92 * sf_freelist head with the sf_lock mutex. 93 */ 94 static struct { 95 SLIST_HEAD(, sf_buf) sf_head; 96 struct mtx sf_lock; 97 } sf_freelist; 98 99 vm_offset_t sf_base; 100 struct sf_buf *sf_bufs; 101 u_int sf_buf_alloc_want; 102 103 /* 104 * System call interface to the socket abstraction. 105 */ 106 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 107 #define COMPAT_OLDSOCK 108 #endif 109 110 extern struct fileops socketops; 111 112 /* 113 * MPSAFE 114 */ 115 int 116 socket(td, uap) 117 struct thread *td; 118 register struct socket_args /* { 119 int domain; 120 int type; 121 int protocol; 122 } */ *uap; 123 { 124 struct filedesc *fdp; 125 struct socket *so; 126 struct file *fp; 127 int fd, error; 128 129 mtx_lock(&Giant); 130 fdp = td->td_proc->p_fd; 131 error = falloc(td, &fp, &fd); 132 if (error) 133 goto done2; 134 fhold(fp); 135 error = socreate(uap->domain, &so, uap->type, uap->protocol, 136 td->td_ucred, td); 137 FILEDESC_LOCK(fdp); 138 if (error) { 139 if (fdp->fd_ofiles[fd] == fp) { 140 fdp->fd_ofiles[fd] = NULL; 141 FILEDESC_UNLOCK(fdp); 142 fdrop(fp, td); 143 } else 144 FILEDESC_UNLOCK(fdp); 145 } else { 146 fp->f_data = so; /* already has ref count */ 147 fp->f_flag = FREAD|FWRITE; 148 fp->f_ops = &socketops; 149 fp->f_type = DTYPE_SOCKET; 150 FILEDESC_UNLOCK(fdp); 151 td->td_retval[0] = fd; 152 } 153 fdrop(fp, td); 154 done2: 155 mtx_unlock(&Giant); 156 return (error); 157 } 158 159 /* 160 * MPSAFE 161 */ 162 /* ARGSUSED */ 163 int 164 bind(td, uap) 165 struct thread *td; 166 register struct bind_args /* { 167 int s; 168 caddr_t name; 169 int namelen; 170 } */ *uap; 171 { 172 struct socket *so; 173 struct sockaddr *sa; 174 int error; 175 176 mtx_lock(&Giant); 177 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 178 goto done2; 179 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 180 goto done1; 181 #ifdef MAC 182 error = mac_check_socket_bind(td->td_ucred, so, sa); 183 if (error) { 184 FREE(sa, M_SONAME); 185 goto done1; 186 } 187 #endif 188 error = sobind(so, sa, td); 189 FREE(sa, M_SONAME); 190 done1: 191 fputsock(so); 192 done2: 193 mtx_unlock(&Giant); 194 return (error); 195 } 196 197 /* 198 * MPSAFE 199 */ 200 /* ARGSUSED */ 201 int 202 listen(td, uap) 203 struct thread *td; 204 register struct listen_args /* { 205 int s; 206 int backlog; 207 } */ *uap; 208 { 209 struct socket *so; 210 int error; 211 212 mtx_lock(&Giant); 213 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 214 #ifdef MAC 215 error = mac_check_socket_listen(td->td_ucred, so); 216 if (error) 217 goto done; 218 #endif 219 error = solisten(so, uap->backlog, td); 220 #ifdef MAC 221 done: 222 #endif 223 fputsock(so); 224 } 225 mtx_unlock(&Giant); 226 return(error); 227 } 228 229 /* 230 * accept1() 231 * MPSAFE 232 */ 233 static int 234 accept1(td, uap, compat) 235 struct thread *td; 236 register struct accept_args /* { 237 int s; 238 caddr_t name; 239 int *anamelen; 240 } */ *uap; 241 int compat; 242 { 243 struct filedesc *fdp; 244 struct file *nfp = NULL; 245 struct sockaddr *sa; 246 int namelen, error, s; 247 struct socket *head, *so; 248 int fd; 249 u_int fflag; 250 pid_t pgid; 251 252 mtx_lock(&Giant); 253 fdp = td->td_proc->p_fd; 254 if (uap->name) { 255 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 256 if(error) 257 goto done2; 258 if (namelen < 0) { 259 error = EINVAL; 260 goto done2; 261 } 262 } 263 error = fgetsock(td, uap->s, &head, &fflag); 264 if (error) 265 goto done2; 266 s = splnet(); 267 if ((head->so_options & SO_ACCEPTCONN) == 0) { 268 splx(s); 269 error = EINVAL; 270 goto done; 271 } 272 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 273 if (head->so_state & SS_CANTRCVMORE) { 274 head->so_error = ECONNABORTED; 275 break; 276 } 277 if ((head->so_state & SS_NBIO) != 0) { 278 head->so_error = EWOULDBLOCK; 279 break; 280 } 281 error = tsleep(&head->so_timeo, PSOCK | PCATCH, 282 "accept", 0); 283 if (error) { 284 splx(s); 285 goto done; 286 } 287 } 288 if (head->so_error) { 289 error = head->so_error; 290 head->so_error = 0; 291 splx(s); 292 goto done; 293 } 294 295 /* 296 * At this point we know that there is at least one connection 297 * ready to be accepted. Remove it from the queue prior to 298 * allocating the file descriptor for it since falloc() may 299 * block allowing another process to accept the connection 300 * instead. 301 */ 302 so = TAILQ_FIRST(&head->so_comp); 303 TAILQ_REMOVE(&head->so_comp, so, so_list); 304 head->so_qlen--; 305 306 error = falloc(td, &nfp, &fd); 307 if (error) { 308 /* 309 * Probably ran out of file descriptors. Put the 310 * unaccepted connection back onto the queue and 311 * do another wakeup so some other process might 312 * have a chance at it. 313 */ 314 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 315 head->so_qlen++; 316 wakeup_one(&head->so_timeo); 317 splx(s); 318 goto done; 319 } 320 fhold(nfp); 321 td->td_retval[0] = fd; 322 323 /* connection has been removed from the listen queue */ 324 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 325 326 so->so_state &= ~SS_COMP; 327 so->so_head = NULL; 328 pgid = fgetown(&head->so_sigio); 329 if (pgid != 0) 330 fsetown(pgid, &so->so_sigio); 331 332 FILE_LOCK(nfp); 333 soref(so); /* file descriptor reference */ 334 nfp->f_data = so; /* nfp has ref count from falloc */ 335 nfp->f_flag = fflag; 336 nfp->f_ops = &socketops; 337 nfp->f_type = DTYPE_SOCKET; 338 FILE_UNLOCK(nfp); 339 sa = 0; 340 error = soaccept(so, &sa); 341 if (error) { 342 /* 343 * return a namelen of zero for older code which might 344 * ignore the return value from accept. 345 */ 346 if (uap->name != NULL) { 347 namelen = 0; 348 (void) copyout(&namelen, 349 uap->anamelen, sizeof(*uap->anamelen)); 350 } 351 goto noconnection; 352 } 353 if (sa == NULL) { 354 namelen = 0; 355 if (uap->name) 356 goto gotnoname; 357 splx(s); 358 error = 0; 359 goto done; 360 } 361 if (uap->name) { 362 /* check sa_len before it is destroyed */ 363 if (namelen > sa->sa_len) 364 namelen = sa->sa_len; 365 #ifdef COMPAT_OLDSOCK 366 if (compat) 367 ((struct osockaddr *)sa)->sa_family = 368 sa->sa_family; 369 #endif 370 error = copyout(sa, uap->name, (u_int)namelen); 371 if (!error) 372 gotnoname: 373 error = copyout(&namelen, 374 uap->anamelen, sizeof (*uap->anamelen)); 375 } 376 noconnection: 377 if (sa) 378 FREE(sa, M_SONAME); 379 380 /* 381 * close the new descriptor, assuming someone hasn't ripped it 382 * out from under us. 383 */ 384 if (error) { 385 FILEDESC_LOCK(fdp); 386 if (fdp->fd_ofiles[fd] == nfp) { 387 fdp->fd_ofiles[fd] = NULL; 388 FILEDESC_UNLOCK(fdp); 389 fdrop(nfp, td); 390 } else { 391 FILEDESC_UNLOCK(fdp); 392 } 393 } 394 splx(s); 395 396 /* 397 * Release explicitly held references before returning. 398 */ 399 done: 400 if (nfp != NULL) 401 fdrop(nfp, td); 402 fputsock(head); 403 done2: 404 mtx_unlock(&Giant); 405 return (error); 406 } 407 408 /* 409 * MPSAFE (accept1() is MPSAFE) 410 */ 411 int 412 accept(td, uap) 413 struct thread *td; 414 struct accept_args *uap; 415 { 416 417 return (accept1(td, uap, 0)); 418 } 419 420 #ifdef COMPAT_OLDSOCK 421 /* 422 * MPSAFE (accept1() is MPSAFE) 423 */ 424 int 425 oaccept(td, uap) 426 struct thread *td; 427 struct accept_args *uap; 428 { 429 430 return (accept1(td, uap, 1)); 431 } 432 #endif /* COMPAT_OLDSOCK */ 433 434 /* 435 * MPSAFE 436 */ 437 /* ARGSUSED */ 438 int 439 connect(td, uap) 440 struct thread *td; 441 register struct connect_args /* { 442 int s; 443 caddr_t name; 444 int namelen; 445 } */ *uap; 446 { 447 struct socket *so; 448 struct sockaddr *sa; 449 int error, s; 450 451 mtx_lock(&Giant); 452 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 453 goto done2; 454 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 455 error = EALREADY; 456 goto done1; 457 } 458 error = getsockaddr(&sa, uap->name, uap->namelen); 459 if (error) 460 goto done1; 461 #ifdef MAC 462 error = mac_check_socket_connect(td->td_ucred, so, sa); 463 if (error) 464 goto bad; 465 #endif 466 error = soconnect(so, sa, td); 467 if (error) 468 goto bad; 469 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 470 FREE(sa, M_SONAME); 471 error = EINPROGRESS; 472 goto done1; 473 } 474 s = splnet(); 475 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 476 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0); 477 if (error) 478 break; 479 } 480 if (error == 0) { 481 error = so->so_error; 482 so->so_error = 0; 483 } 484 splx(s); 485 bad: 486 so->so_state &= ~SS_ISCONNECTING; 487 FREE(sa, M_SONAME); 488 if (error == ERESTART) 489 error = EINTR; 490 done1: 491 fputsock(so); 492 done2: 493 mtx_unlock(&Giant); 494 return (error); 495 } 496 497 /* 498 * MPSAFE 499 */ 500 int 501 socketpair(td, uap) 502 struct thread *td; 503 register struct socketpair_args /* { 504 int domain; 505 int type; 506 int protocol; 507 int *rsv; 508 } */ *uap; 509 { 510 register struct filedesc *fdp = td->td_proc->p_fd; 511 struct file *fp1, *fp2; 512 struct socket *so1, *so2; 513 int fd, error, sv[2]; 514 515 mtx_lock(&Giant); 516 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 517 td->td_ucred, td); 518 if (error) 519 goto done2; 520 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 521 td->td_ucred, td); 522 if (error) 523 goto free1; 524 error = falloc(td, &fp1, &fd); 525 if (error) 526 goto free2; 527 fhold(fp1); 528 sv[0] = fd; 529 fp1->f_data = so1; /* so1 already has ref count */ 530 error = falloc(td, &fp2, &fd); 531 if (error) 532 goto free3; 533 fhold(fp2); 534 fp2->f_data = so2; /* so2 already has ref count */ 535 sv[1] = fd; 536 error = soconnect2(so1, so2); 537 if (error) 538 goto free4; 539 if (uap->type == SOCK_DGRAM) { 540 /* 541 * Datagram socket connection is asymmetric. 542 */ 543 error = soconnect2(so2, so1); 544 if (error) 545 goto free4; 546 } 547 FILE_LOCK(fp1); 548 fp1->f_flag = FREAD|FWRITE; 549 fp1->f_ops = &socketops; 550 fp1->f_type = DTYPE_SOCKET; 551 FILE_UNLOCK(fp1); 552 FILE_LOCK(fp2); 553 fp2->f_flag = FREAD|FWRITE; 554 fp2->f_ops = &socketops; 555 fp2->f_type = DTYPE_SOCKET; 556 FILE_UNLOCK(fp2); 557 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 558 fdrop(fp1, td); 559 fdrop(fp2, td); 560 goto done2; 561 free4: 562 FILEDESC_LOCK(fdp); 563 if (fdp->fd_ofiles[sv[1]] == fp2) { 564 fdp->fd_ofiles[sv[1]] = NULL; 565 FILEDESC_UNLOCK(fdp); 566 fdrop(fp2, td); 567 } else 568 FILEDESC_UNLOCK(fdp); 569 fdrop(fp2, td); 570 free3: 571 FILEDESC_LOCK(fdp); 572 if (fdp->fd_ofiles[sv[0]] == fp1) { 573 fdp->fd_ofiles[sv[0]] = NULL; 574 FILEDESC_UNLOCK(fdp); 575 fdrop(fp1, td); 576 } else 577 FILEDESC_UNLOCK(fdp); 578 fdrop(fp1, td); 579 free2: 580 (void)soclose(so2); 581 free1: 582 (void)soclose(so1); 583 done2: 584 mtx_unlock(&Giant); 585 return (error); 586 } 587 588 static int 589 sendit(td, s, mp, flags) 590 register struct thread *td; 591 int s; 592 register struct msghdr *mp; 593 int flags; 594 { 595 struct uio auio; 596 register struct iovec *iov; 597 register int i; 598 struct mbuf *control; 599 struct sockaddr *to = NULL; 600 int len, error; 601 struct socket *so; 602 #ifdef KTRACE 603 struct iovec *ktriov = NULL; 604 struct uio ktruio; 605 int iovlen; 606 #endif 607 608 if ((error = fgetsock(td, s, &so, NULL)) != 0) 609 return (error); 610 611 #ifdef MAC 612 error = mac_check_socket_send(td->td_ucred, so); 613 if (error) 614 goto bad; 615 #endif 616 617 auio.uio_iov = mp->msg_iov; 618 auio.uio_iovcnt = mp->msg_iovlen; 619 auio.uio_segflg = UIO_USERSPACE; 620 auio.uio_rw = UIO_WRITE; 621 auio.uio_td = td; 622 auio.uio_offset = 0; /* XXX */ 623 auio.uio_resid = 0; 624 iov = mp->msg_iov; 625 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 626 if ((auio.uio_resid += iov->iov_len) < 0) { 627 error = EINVAL; 628 goto bad; 629 } 630 } 631 if (mp->msg_name) { 632 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 633 if (error) 634 goto bad; 635 } 636 if (mp->msg_control) { 637 if (mp->msg_controllen < sizeof(struct cmsghdr) 638 #ifdef COMPAT_OLDSOCK 639 && mp->msg_flags != MSG_COMPAT 640 #endif 641 ) { 642 error = EINVAL; 643 goto bad; 644 } 645 error = sockargs(&control, mp->msg_control, 646 mp->msg_controllen, MT_CONTROL); 647 if (error) 648 goto bad; 649 #ifdef COMPAT_OLDSOCK 650 if (mp->msg_flags == MSG_COMPAT) { 651 register struct cmsghdr *cm; 652 653 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 654 if (control == 0) { 655 error = ENOBUFS; 656 goto bad; 657 } else { 658 cm = mtod(control, struct cmsghdr *); 659 cm->cmsg_len = control->m_len; 660 cm->cmsg_level = SOL_SOCKET; 661 cm->cmsg_type = SCM_RIGHTS; 662 } 663 } 664 #endif 665 } else { 666 control = 0; 667 } 668 #ifdef KTRACE 669 if (KTRPOINT(td, KTR_GENIO)) { 670 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 671 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 672 bcopy(auio.uio_iov, ktriov, iovlen); 673 ktruio = auio; 674 } 675 #endif 676 len = auio.uio_resid; 677 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control, 678 flags, td); 679 if (error) { 680 if (auio.uio_resid != len && (error == ERESTART || 681 error == EINTR || error == EWOULDBLOCK)) 682 error = 0; 683 /* Generation of SIGPIPE can be controlled per socket */ 684 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 685 PROC_LOCK(td->td_proc); 686 psignal(td->td_proc, SIGPIPE); 687 PROC_UNLOCK(td->td_proc); 688 } 689 } 690 if (error == 0) 691 td->td_retval[0] = len - auio.uio_resid; 692 #ifdef KTRACE 693 if (ktriov != NULL) { 694 if (error == 0) { 695 ktruio.uio_iov = ktriov; 696 ktruio.uio_resid = td->td_retval[0]; 697 ktrgenio(s, UIO_WRITE, &ktruio, error); 698 } 699 FREE(ktriov, M_TEMP); 700 } 701 #endif 702 bad: 703 fputsock(so); 704 if (to) 705 FREE(to, M_SONAME); 706 return (error); 707 } 708 709 /* 710 * MPSAFE 711 */ 712 int 713 sendto(td, uap) 714 struct thread *td; 715 register struct sendto_args /* { 716 int s; 717 caddr_t buf; 718 size_t len; 719 int flags; 720 caddr_t to; 721 int tolen; 722 } */ *uap; 723 { 724 struct msghdr msg; 725 struct iovec aiov; 726 int error; 727 728 msg.msg_name = uap->to; 729 msg.msg_namelen = uap->tolen; 730 msg.msg_iov = &aiov; 731 msg.msg_iovlen = 1; 732 msg.msg_control = 0; 733 #ifdef COMPAT_OLDSOCK 734 msg.msg_flags = 0; 735 #endif 736 aiov.iov_base = uap->buf; 737 aiov.iov_len = uap->len; 738 mtx_lock(&Giant); 739 error = sendit(td, uap->s, &msg, uap->flags); 740 mtx_unlock(&Giant); 741 return (error); 742 } 743 744 #ifdef COMPAT_OLDSOCK 745 /* 746 * MPSAFE 747 */ 748 int 749 osend(td, uap) 750 struct thread *td; 751 register struct osend_args /* { 752 int s; 753 caddr_t buf; 754 int len; 755 int flags; 756 } */ *uap; 757 { 758 struct msghdr msg; 759 struct iovec aiov; 760 int error; 761 762 msg.msg_name = 0; 763 msg.msg_namelen = 0; 764 msg.msg_iov = &aiov; 765 msg.msg_iovlen = 1; 766 aiov.iov_base = uap->buf; 767 aiov.iov_len = uap->len; 768 msg.msg_control = 0; 769 msg.msg_flags = 0; 770 mtx_lock(&Giant); 771 error = sendit(td, uap->s, &msg, uap->flags); 772 mtx_unlock(&Giant); 773 return (error); 774 } 775 776 /* 777 * MPSAFE 778 */ 779 int 780 osendmsg(td, uap) 781 struct thread *td; 782 register struct osendmsg_args /* { 783 int s; 784 caddr_t msg; 785 int flags; 786 } */ *uap; 787 { 788 struct msghdr msg; 789 struct iovec aiov[UIO_SMALLIOV], *iov; 790 int error; 791 792 mtx_lock(&Giant); 793 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 794 if (error) 795 goto done2; 796 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 797 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 798 error = EMSGSIZE; 799 goto done2; 800 } 801 MALLOC(iov, struct iovec *, 802 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 803 M_WAITOK); 804 } else { 805 iov = aiov; 806 } 807 error = copyin(msg.msg_iov, iov, 808 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 809 if (error) 810 goto done; 811 msg.msg_flags = MSG_COMPAT; 812 msg.msg_iov = iov; 813 error = sendit(td, uap->s, &msg, uap->flags); 814 done: 815 if (iov != aiov) 816 FREE(iov, M_IOV); 817 done2: 818 mtx_unlock(&Giant); 819 return (error); 820 } 821 #endif 822 823 /* 824 * MPSAFE 825 */ 826 int 827 sendmsg(td, uap) 828 struct thread *td; 829 register struct sendmsg_args /* { 830 int s; 831 caddr_t msg; 832 int flags; 833 } */ *uap; 834 { 835 struct msghdr msg; 836 struct iovec aiov[UIO_SMALLIOV], *iov; 837 int error; 838 839 mtx_lock(&Giant); 840 error = copyin(uap->msg, &msg, sizeof (msg)); 841 if (error) 842 goto done2; 843 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 844 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 845 error = EMSGSIZE; 846 goto done2; 847 } 848 MALLOC(iov, struct iovec *, 849 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 850 M_WAITOK); 851 } else { 852 iov = aiov; 853 } 854 if (msg.msg_iovlen && 855 (error = copyin(msg.msg_iov, iov, 856 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 857 goto done; 858 msg.msg_iov = iov; 859 #ifdef COMPAT_OLDSOCK 860 msg.msg_flags = 0; 861 #endif 862 error = sendit(td, uap->s, &msg, uap->flags); 863 done: 864 if (iov != aiov) 865 FREE(iov, M_IOV); 866 done2: 867 mtx_unlock(&Giant); 868 return (error); 869 } 870 871 static int 872 recvit(td, s, mp, namelenp) 873 register struct thread *td; 874 int s; 875 register struct msghdr *mp; 876 void *namelenp; 877 { 878 struct uio auio; 879 register struct iovec *iov; 880 register int i; 881 int len, error; 882 struct mbuf *m, *control = 0; 883 caddr_t ctlbuf; 884 struct socket *so; 885 struct sockaddr *fromsa = 0; 886 #ifdef KTRACE 887 struct iovec *ktriov = NULL; 888 struct uio ktruio; 889 int iovlen; 890 #endif 891 892 if ((error = fgetsock(td, s, &so, NULL)) != 0) 893 return (error); 894 895 #ifdef MAC 896 error = mac_check_socket_receive(td->td_ucred, so); 897 if (error) { 898 fputsock(so); 899 return (error); 900 } 901 #endif 902 903 auio.uio_iov = mp->msg_iov; 904 auio.uio_iovcnt = mp->msg_iovlen; 905 auio.uio_segflg = UIO_USERSPACE; 906 auio.uio_rw = UIO_READ; 907 auio.uio_td = td; 908 auio.uio_offset = 0; /* XXX */ 909 auio.uio_resid = 0; 910 iov = mp->msg_iov; 911 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 912 if ((auio.uio_resid += iov->iov_len) < 0) { 913 fputsock(so); 914 return (EINVAL); 915 } 916 } 917 #ifdef KTRACE 918 if (KTRPOINT(td, KTR_GENIO)) { 919 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 920 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 921 bcopy(auio.uio_iov, ktriov, iovlen); 922 ktruio = auio; 923 } 924 #endif 925 len = auio.uio_resid; 926 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 927 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 928 &mp->msg_flags); 929 if (error) { 930 if (auio.uio_resid != len && (error == ERESTART || 931 error == EINTR || error == EWOULDBLOCK)) 932 error = 0; 933 } 934 #ifdef KTRACE 935 if (ktriov != NULL) { 936 if (error == 0) { 937 ktruio.uio_iov = ktriov; 938 ktruio.uio_resid = len - auio.uio_resid; 939 ktrgenio(s, UIO_READ, &ktruio, error); 940 } 941 FREE(ktriov, M_TEMP); 942 } 943 #endif 944 if (error) 945 goto out; 946 td->td_retval[0] = len - auio.uio_resid; 947 if (mp->msg_name) { 948 len = mp->msg_namelen; 949 if (len <= 0 || fromsa == 0) 950 len = 0; 951 else { 952 #ifndef MIN 953 #define MIN(a,b) ((a)>(b)?(b):(a)) 954 #endif 955 /* save sa_len before it is destroyed by MSG_COMPAT */ 956 len = MIN(len, fromsa->sa_len); 957 #ifdef COMPAT_OLDSOCK 958 if (mp->msg_flags & MSG_COMPAT) 959 ((struct osockaddr *)fromsa)->sa_family = 960 fromsa->sa_family; 961 #endif 962 error = copyout(fromsa, mp->msg_name, (unsigned)len); 963 if (error) 964 goto out; 965 } 966 mp->msg_namelen = len; 967 if (namelenp && 968 (error = copyout(&len, namelenp, sizeof (int)))) { 969 #ifdef COMPAT_OLDSOCK 970 if (mp->msg_flags & MSG_COMPAT) 971 error = 0; /* old recvfrom didn't check */ 972 else 973 #endif 974 goto out; 975 } 976 } 977 if (mp->msg_control) { 978 #ifdef COMPAT_OLDSOCK 979 /* 980 * We assume that old recvmsg calls won't receive access 981 * rights and other control info, esp. as control info 982 * is always optional and those options didn't exist in 4.3. 983 * If we receive rights, trim the cmsghdr; anything else 984 * is tossed. 985 */ 986 if (control && mp->msg_flags & MSG_COMPAT) { 987 if (mtod(control, struct cmsghdr *)->cmsg_level != 988 SOL_SOCKET || 989 mtod(control, struct cmsghdr *)->cmsg_type != 990 SCM_RIGHTS) { 991 mp->msg_controllen = 0; 992 goto out; 993 } 994 control->m_len -= sizeof (struct cmsghdr); 995 control->m_data += sizeof (struct cmsghdr); 996 } 997 #endif 998 len = mp->msg_controllen; 999 m = control; 1000 mp->msg_controllen = 0; 1001 ctlbuf = mp->msg_control; 1002 1003 while (m && len > 0) { 1004 unsigned int tocopy; 1005 1006 if (len >= m->m_len) 1007 tocopy = m->m_len; 1008 else { 1009 mp->msg_flags |= MSG_CTRUNC; 1010 tocopy = len; 1011 } 1012 1013 if ((error = copyout(mtod(m, caddr_t), 1014 ctlbuf, tocopy)) != 0) 1015 goto out; 1016 1017 ctlbuf += tocopy; 1018 len -= tocopy; 1019 m = m->m_next; 1020 } 1021 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1022 } 1023 out: 1024 fputsock(so); 1025 if (fromsa) 1026 FREE(fromsa, M_SONAME); 1027 if (control) 1028 m_freem(control); 1029 return (error); 1030 } 1031 1032 /* 1033 * MPSAFE 1034 */ 1035 int 1036 recvfrom(td, uap) 1037 struct thread *td; 1038 register struct recvfrom_args /* { 1039 int s; 1040 caddr_t buf; 1041 size_t len; 1042 int flags; 1043 caddr_t from; 1044 int *fromlenaddr; 1045 } */ *uap; 1046 { 1047 struct msghdr msg; 1048 struct iovec aiov; 1049 int error; 1050 1051 mtx_lock(&Giant); 1052 if (uap->fromlenaddr) { 1053 error = copyin(uap->fromlenaddr, 1054 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1055 if (error) 1056 goto done2; 1057 } else { 1058 msg.msg_namelen = 0; 1059 } 1060 msg.msg_name = uap->from; 1061 msg.msg_iov = &aiov; 1062 msg.msg_iovlen = 1; 1063 aiov.iov_base = uap->buf; 1064 aiov.iov_len = uap->len; 1065 msg.msg_control = 0; 1066 msg.msg_flags = uap->flags; 1067 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1068 done2: 1069 mtx_unlock(&Giant); 1070 return(error); 1071 } 1072 1073 #ifdef COMPAT_OLDSOCK 1074 /* 1075 * MPSAFE 1076 */ 1077 int 1078 orecvfrom(td, uap) 1079 struct thread *td; 1080 struct recvfrom_args *uap; 1081 { 1082 1083 uap->flags |= MSG_COMPAT; 1084 return (recvfrom(td, uap)); 1085 } 1086 #endif 1087 1088 1089 #ifdef COMPAT_OLDSOCK 1090 /* 1091 * MPSAFE 1092 */ 1093 int 1094 orecv(td, uap) 1095 struct thread *td; 1096 register struct orecv_args /* { 1097 int s; 1098 caddr_t buf; 1099 int len; 1100 int flags; 1101 } */ *uap; 1102 { 1103 struct msghdr msg; 1104 struct iovec aiov; 1105 int error; 1106 1107 mtx_lock(&Giant); 1108 msg.msg_name = 0; 1109 msg.msg_namelen = 0; 1110 msg.msg_iov = &aiov; 1111 msg.msg_iovlen = 1; 1112 aiov.iov_base = uap->buf; 1113 aiov.iov_len = uap->len; 1114 msg.msg_control = 0; 1115 msg.msg_flags = uap->flags; 1116 error = recvit(td, uap->s, &msg, NULL); 1117 mtx_unlock(&Giant); 1118 return (error); 1119 } 1120 1121 /* 1122 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1123 * overlays the new one, missing only the flags, and with the (old) access 1124 * rights where the control fields are now. 1125 * 1126 * MPSAFE 1127 */ 1128 int 1129 orecvmsg(td, uap) 1130 struct thread *td; 1131 register struct orecvmsg_args /* { 1132 int s; 1133 struct omsghdr *msg; 1134 int flags; 1135 } */ *uap; 1136 { 1137 struct msghdr msg; 1138 struct iovec aiov[UIO_SMALLIOV], *iov; 1139 int error; 1140 1141 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1142 if (error) 1143 return (error); 1144 1145 mtx_lock(&Giant); 1146 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1147 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1148 error = EMSGSIZE; 1149 goto done2; 1150 } 1151 MALLOC(iov, struct iovec *, 1152 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1153 M_WAITOK); 1154 } else { 1155 iov = aiov; 1156 } 1157 msg.msg_flags = uap->flags | MSG_COMPAT; 1158 error = copyin(msg.msg_iov, iov, 1159 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1160 if (error) 1161 goto done; 1162 msg.msg_iov = iov; 1163 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1164 1165 if (msg.msg_controllen && error == 0) 1166 error = copyout(&msg.msg_controllen, 1167 &uap->msg->msg_accrightslen, sizeof (int)); 1168 done: 1169 if (iov != aiov) 1170 FREE(iov, M_IOV); 1171 done2: 1172 mtx_unlock(&Giant); 1173 return (error); 1174 } 1175 #endif 1176 1177 /* 1178 * MPSAFE 1179 */ 1180 int 1181 recvmsg(td, uap) 1182 struct thread *td; 1183 register struct recvmsg_args /* { 1184 int s; 1185 struct msghdr *msg; 1186 int flags; 1187 } */ *uap; 1188 { 1189 struct msghdr msg; 1190 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1191 register int error; 1192 1193 mtx_lock(&Giant); 1194 error = copyin(uap->msg, &msg, sizeof (msg)); 1195 if (error) 1196 goto done2; 1197 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1198 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1199 error = EMSGSIZE; 1200 goto done2; 1201 } 1202 MALLOC(iov, struct iovec *, 1203 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1204 M_WAITOK); 1205 } else { 1206 iov = aiov; 1207 } 1208 #ifdef COMPAT_OLDSOCK 1209 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1210 #else 1211 msg.msg_flags = uap->flags; 1212 #endif 1213 uiov = msg.msg_iov; 1214 msg.msg_iov = iov; 1215 error = copyin(uiov, iov, 1216 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1217 if (error) 1218 goto done; 1219 error = recvit(td, uap->s, &msg, NULL); 1220 if (!error) { 1221 msg.msg_iov = uiov; 1222 error = copyout(&msg, uap->msg, sizeof(msg)); 1223 } 1224 done: 1225 if (iov != aiov) 1226 FREE(iov, M_IOV); 1227 done2: 1228 mtx_unlock(&Giant); 1229 return (error); 1230 } 1231 1232 /* 1233 * MPSAFE 1234 */ 1235 /* ARGSUSED */ 1236 int 1237 shutdown(td, uap) 1238 struct thread *td; 1239 register struct shutdown_args /* { 1240 int s; 1241 int how; 1242 } */ *uap; 1243 { 1244 struct socket *so; 1245 int error; 1246 1247 mtx_lock(&Giant); 1248 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1249 error = soshutdown(so, uap->how); 1250 fputsock(so); 1251 } 1252 mtx_unlock(&Giant); 1253 return(error); 1254 } 1255 1256 /* 1257 * MPSAFE 1258 */ 1259 /* ARGSUSED */ 1260 int 1261 setsockopt(td, uap) 1262 struct thread *td; 1263 register struct setsockopt_args /* { 1264 int s; 1265 int level; 1266 int name; 1267 caddr_t val; 1268 int valsize; 1269 } */ *uap; 1270 { 1271 struct socket *so; 1272 struct sockopt sopt; 1273 int error; 1274 1275 if (uap->val == 0 && uap->valsize != 0) 1276 return (EFAULT); 1277 if (uap->valsize < 0) 1278 return (EINVAL); 1279 1280 mtx_lock(&Giant); 1281 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1282 sopt.sopt_dir = SOPT_SET; 1283 sopt.sopt_level = uap->level; 1284 sopt.sopt_name = uap->name; 1285 sopt.sopt_val = uap->val; 1286 sopt.sopt_valsize = uap->valsize; 1287 sopt.sopt_td = td; 1288 error = sosetopt(so, &sopt); 1289 fputsock(so); 1290 } 1291 mtx_unlock(&Giant); 1292 return(error); 1293 } 1294 1295 /* 1296 * MPSAFE 1297 */ 1298 /* ARGSUSED */ 1299 int 1300 getsockopt(td, uap) 1301 struct thread *td; 1302 register struct getsockopt_args /* { 1303 int s; 1304 int level; 1305 int name; 1306 caddr_t val; 1307 int *avalsize; 1308 } */ *uap; 1309 { 1310 int valsize, error; 1311 struct socket *so; 1312 struct sockopt sopt; 1313 1314 mtx_lock(&Giant); 1315 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1316 goto done2; 1317 if (uap->val) { 1318 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1319 if (error) 1320 goto done1; 1321 if (valsize < 0) { 1322 error = EINVAL; 1323 goto done1; 1324 } 1325 } else { 1326 valsize = 0; 1327 } 1328 1329 sopt.sopt_dir = SOPT_GET; 1330 sopt.sopt_level = uap->level; 1331 sopt.sopt_name = uap->name; 1332 sopt.sopt_val = uap->val; 1333 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1334 sopt.sopt_td = td; 1335 1336 error = sogetopt(so, &sopt); 1337 if (error == 0) { 1338 valsize = sopt.sopt_valsize; 1339 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1340 } 1341 done1: 1342 fputsock(so); 1343 done2: 1344 mtx_unlock(&Giant); 1345 return (error); 1346 } 1347 1348 /* 1349 * getsockname1() - Get socket name. 1350 * 1351 * MPSAFE 1352 */ 1353 /* ARGSUSED */ 1354 static int 1355 getsockname1(td, uap, compat) 1356 struct thread *td; 1357 register struct getsockname_args /* { 1358 int fdes; 1359 caddr_t asa; 1360 int *alen; 1361 } */ *uap; 1362 int compat; 1363 { 1364 struct socket *so; 1365 struct sockaddr *sa; 1366 int len, error; 1367 1368 mtx_lock(&Giant); 1369 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1370 goto done2; 1371 error = copyin(uap->alen, &len, sizeof (len)); 1372 if (error) 1373 goto done1; 1374 if (len < 0) { 1375 error = EINVAL; 1376 goto done1; 1377 } 1378 sa = 0; 1379 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1380 if (error) 1381 goto bad; 1382 if (sa == 0) { 1383 len = 0; 1384 goto gotnothing; 1385 } 1386 1387 len = MIN(len, sa->sa_len); 1388 #ifdef COMPAT_OLDSOCK 1389 if (compat) 1390 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1391 #endif 1392 error = copyout(sa, uap->asa, (u_int)len); 1393 if (error == 0) 1394 gotnothing: 1395 error = copyout(&len, uap->alen, sizeof (len)); 1396 bad: 1397 if (sa) 1398 FREE(sa, M_SONAME); 1399 done1: 1400 fputsock(so); 1401 done2: 1402 mtx_unlock(&Giant); 1403 return (error); 1404 } 1405 1406 /* 1407 * MPSAFE 1408 */ 1409 int 1410 getsockname(td, uap) 1411 struct thread *td; 1412 struct getsockname_args *uap; 1413 { 1414 1415 return (getsockname1(td, uap, 0)); 1416 } 1417 1418 #ifdef COMPAT_OLDSOCK 1419 /* 1420 * MPSAFE 1421 */ 1422 int 1423 ogetsockname(td, uap) 1424 struct thread *td; 1425 struct getsockname_args *uap; 1426 { 1427 1428 return (getsockname1(td, uap, 1)); 1429 } 1430 #endif /* COMPAT_OLDSOCK */ 1431 1432 /* 1433 * getpeername1() - Get name of peer for connected socket. 1434 * 1435 * MPSAFE 1436 */ 1437 /* ARGSUSED */ 1438 static int 1439 getpeername1(td, uap, compat) 1440 struct thread *td; 1441 register struct getpeername_args /* { 1442 int fdes; 1443 caddr_t asa; 1444 int *alen; 1445 } */ *uap; 1446 int compat; 1447 { 1448 struct socket *so; 1449 struct sockaddr *sa; 1450 int len, error; 1451 1452 mtx_lock(&Giant); 1453 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1454 goto done2; 1455 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1456 error = ENOTCONN; 1457 goto done1; 1458 } 1459 error = copyin(uap->alen, &len, sizeof (len)); 1460 if (error) 1461 goto done1; 1462 if (len < 0) { 1463 error = EINVAL; 1464 goto done1; 1465 } 1466 sa = 0; 1467 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1468 if (error) 1469 goto bad; 1470 if (sa == 0) { 1471 len = 0; 1472 goto gotnothing; 1473 } 1474 len = MIN(len, sa->sa_len); 1475 #ifdef COMPAT_OLDSOCK 1476 if (compat) 1477 ((struct osockaddr *)sa)->sa_family = 1478 sa->sa_family; 1479 #endif 1480 error = copyout(sa, uap->asa, (u_int)len); 1481 if (error) 1482 goto bad; 1483 gotnothing: 1484 error = copyout(&len, uap->alen, sizeof (len)); 1485 bad: 1486 if (sa) 1487 FREE(sa, M_SONAME); 1488 done1: 1489 fputsock(so); 1490 done2: 1491 mtx_unlock(&Giant); 1492 return (error); 1493 } 1494 1495 /* 1496 * MPSAFE 1497 */ 1498 int 1499 getpeername(td, uap) 1500 struct thread *td; 1501 struct getpeername_args *uap; 1502 { 1503 1504 return (getpeername1(td, uap, 0)); 1505 } 1506 1507 #ifdef COMPAT_OLDSOCK 1508 /* 1509 * MPSAFE 1510 */ 1511 int 1512 ogetpeername(td, uap) 1513 struct thread *td; 1514 struct ogetpeername_args *uap; 1515 { 1516 1517 /* XXX uap should have type `getpeername_args *' to begin with. */ 1518 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1519 } 1520 #endif /* COMPAT_OLDSOCK */ 1521 1522 int 1523 sockargs(mp, buf, buflen, type) 1524 struct mbuf **mp; 1525 caddr_t buf; 1526 int buflen, type; 1527 { 1528 register struct sockaddr *sa; 1529 register struct mbuf *m; 1530 int error; 1531 1532 if ((u_int)buflen > MLEN) { 1533 #ifdef COMPAT_OLDSOCK 1534 if (type == MT_SONAME && (u_int)buflen <= 112) 1535 buflen = MLEN; /* unix domain compat. hack */ 1536 else 1537 #endif 1538 return (EINVAL); 1539 } 1540 m = m_get(M_TRYWAIT, type); 1541 if (m == NULL) 1542 return (ENOBUFS); 1543 m->m_len = buflen; 1544 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1545 if (error) 1546 (void) m_free(m); 1547 else { 1548 *mp = m; 1549 if (type == MT_SONAME) { 1550 sa = mtod(m, struct sockaddr *); 1551 1552 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1553 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1554 sa->sa_family = sa->sa_len; 1555 #endif 1556 sa->sa_len = buflen; 1557 } 1558 } 1559 return (error); 1560 } 1561 1562 int 1563 getsockaddr(namp, uaddr, len) 1564 struct sockaddr **namp; 1565 caddr_t uaddr; 1566 size_t len; 1567 { 1568 struct sockaddr *sa; 1569 int error; 1570 1571 if (len > SOCK_MAXADDRLEN) 1572 return ENAMETOOLONG; 1573 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1574 error = copyin(uaddr, sa, len); 1575 if (error) { 1576 FREE(sa, M_SONAME); 1577 } else { 1578 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1579 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1580 sa->sa_family = sa->sa_len; 1581 #endif 1582 sa->sa_len = len; 1583 *namp = sa; 1584 } 1585 return error; 1586 } 1587 1588 /* 1589 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 1590 */ 1591 static void 1592 sf_buf_init(void *arg) 1593 { 1594 int i; 1595 1596 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); 1597 mtx_lock(&sf_freelist.sf_lock); 1598 SLIST_INIT(&sf_freelist.sf_head); 1599 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE); 1600 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, 1601 M_NOWAIT | M_ZERO); 1602 for (i = 0; i < nsfbufs; i++) { 1603 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 1604 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); 1605 } 1606 sf_buf_alloc_want = 0; 1607 mtx_unlock(&sf_freelist.sf_lock); 1608 } 1609 1610 /* 1611 * Get an sf_buf from the freelist. Will block if none are available. 1612 */ 1613 struct sf_buf * 1614 sf_buf_alloc() 1615 { 1616 struct sf_buf *sf; 1617 int error; 1618 1619 mtx_lock(&sf_freelist.sf_lock); 1620 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { 1621 sf_buf_alloc_want++; 1622 error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, 1623 "sfbufa", 0); 1624 sf_buf_alloc_want--; 1625 1626 /* 1627 * If we got a signal, don't risk going back to sleep. 1628 */ 1629 if (error) 1630 break; 1631 } 1632 if (sf != NULL) 1633 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); 1634 mtx_unlock(&sf_freelist.sf_lock); 1635 return (sf); 1636 } 1637 1638 #define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT]) 1639 1640 /* 1641 * Detatch mapped page and release resources back to the system. 1642 */ 1643 void 1644 sf_buf_free(void *addr, void *args) 1645 { 1646 struct sf_buf *sf; 1647 struct vm_page *m; 1648 1649 GIANT_REQUIRED; 1650 1651 sf = dtosf(addr); 1652 pmap_qremove((vm_offset_t)addr, 1); 1653 m = sf->m; 1654 vm_page_lock_queues(); 1655 vm_page_unwire(m, 0); 1656 /* 1657 * Check for the object going away on us. This can 1658 * happen since we don't hold a reference to it. 1659 * If so, we're responsible for freeing the page. 1660 */ 1661 if (m->wire_count == 0 && m->object == NULL) 1662 vm_page_free(m); 1663 vm_page_unlock_queues(); 1664 sf->m = NULL; 1665 mtx_lock(&sf_freelist.sf_lock); 1666 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); 1667 if (sf_buf_alloc_want > 0) 1668 wakeup_one(&sf_freelist); 1669 mtx_unlock(&sf_freelist.sf_lock); 1670 } 1671 1672 /* 1673 * sendfile(2) 1674 * 1675 * MPSAFE 1676 * 1677 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1678 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1679 * 1680 * Send a file specified by 'fd' and starting at 'offset' to a socket 1681 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1682 * nbytes == 0. Optionally add a header and/or trailer to the socket 1683 * output. If specified, write the total number of bytes sent into *sbytes. 1684 * 1685 */ 1686 int 1687 sendfile(struct thread *td, struct sendfile_args *uap) 1688 { 1689 1690 return (do_sendfile(td, uap, 0)); 1691 } 1692 1693 #ifdef COMPAT_FREEBSD4 1694 int 1695 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1696 { 1697 struct sendfile_args args; 1698 1699 args.fd = uap->fd; 1700 args.s = uap->s; 1701 args.offset = uap->offset; 1702 args.nbytes = uap->nbytes; 1703 args.hdtr = uap->hdtr; 1704 args.sbytes = uap->sbytes; 1705 args.flags = uap->flags; 1706 1707 return (do_sendfile(td, &args, 1)); 1708 } 1709 #endif /* COMPAT_FREEBSD4 */ 1710 1711 static int 1712 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1713 { 1714 struct vnode *vp; 1715 struct vm_object *obj; 1716 struct socket *so = NULL; 1717 struct mbuf *m; 1718 struct sf_buf *sf; 1719 struct vm_page *pg; 1720 struct writev_args nuap; 1721 struct sf_hdtr hdtr; 1722 off_t off, xfsize, hdtr_size, sbytes = 0; 1723 int error, s; 1724 1725 mtx_lock(&Giant); 1726 1727 hdtr_size = 0; 1728 1729 /* 1730 * The descriptor must be a regular file and have a backing VM object. 1731 */ 1732 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1733 goto done; 1734 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1735 error = EINVAL; 1736 goto done; 1737 } 1738 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1739 goto done; 1740 if (so->so_type != SOCK_STREAM) { 1741 error = EINVAL; 1742 goto done; 1743 } 1744 if ((so->so_state & SS_ISCONNECTED) == 0) { 1745 error = ENOTCONN; 1746 goto done; 1747 } 1748 if (uap->offset < 0) { 1749 error = EINVAL; 1750 goto done; 1751 } 1752 1753 #ifdef MAC 1754 error = mac_check_socket_send(td->td_ucred, so); 1755 if (error) 1756 goto done; 1757 #endif 1758 1759 /* 1760 * If specified, get the pointer to the sf_hdtr struct for 1761 * any headers/trailers. 1762 */ 1763 if (uap->hdtr != NULL) { 1764 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1765 if (error) 1766 goto done; 1767 /* 1768 * Send any headers. Wimp out and use writev(2). 1769 */ 1770 if (hdtr.headers != NULL) { 1771 nuap.fd = uap->s; 1772 nuap.iovp = hdtr.headers; 1773 nuap.iovcnt = hdtr.hdr_cnt; 1774 error = writev(td, &nuap); 1775 if (error) 1776 goto done; 1777 if (compat) 1778 sbytes += td->td_retval[0]; 1779 else 1780 hdtr_size += td->td_retval[0]; 1781 } 1782 } 1783 1784 /* 1785 * Protect against multiple writers to the socket. 1786 */ 1787 (void) sblock(&so->so_snd, M_WAITOK); 1788 1789 /* 1790 * Loop through the pages in the file, starting with the requested 1791 * offset. Get a file page (do I/O if necessary), map the file page 1792 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1793 * it on the socket. 1794 */ 1795 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1796 vm_pindex_t pindex; 1797 vm_offset_t pgoff; 1798 1799 pindex = OFF_TO_IDX(off); 1800 retry_lookup: 1801 /* 1802 * Calculate the amount to transfer. Not to exceed a page, 1803 * the EOF, or the passed in nbytes. 1804 */ 1805 xfsize = obj->un_pager.vnp.vnp_size - off; 1806 if (xfsize > PAGE_SIZE) 1807 xfsize = PAGE_SIZE; 1808 pgoff = (vm_offset_t)(off & PAGE_MASK); 1809 if (PAGE_SIZE - pgoff < xfsize) 1810 xfsize = PAGE_SIZE - pgoff; 1811 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1812 xfsize = uap->nbytes - sbytes; 1813 if (xfsize <= 0) 1814 break; 1815 /* 1816 * Optimize the non-blocking case by looking at the socket space 1817 * before going to the extra work of constituting the sf_buf. 1818 */ 1819 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1820 if (so->so_state & SS_CANTSENDMORE) 1821 error = EPIPE; 1822 else 1823 error = EAGAIN; 1824 sbunlock(&so->so_snd); 1825 goto done; 1826 } 1827 /* 1828 * Attempt to look up the page. 1829 * 1830 * Allocate if not found 1831 * 1832 * Wait and loop if busy. 1833 */ 1834 pg = vm_page_lookup(obj, pindex); 1835 1836 if (pg == NULL) { 1837 pg = vm_page_alloc(obj, pindex, 1838 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1839 if (pg == NULL) { 1840 VM_WAIT; 1841 goto retry_lookup; 1842 } 1843 vm_page_lock_queues(); 1844 vm_page_wakeup(pg); 1845 } else { 1846 vm_page_lock_queues(); 1847 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1848 goto retry_lookup; 1849 /* 1850 * Wire the page so it does not get ripped out from 1851 * under us. 1852 */ 1853 vm_page_wire(pg); 1854 } 1855 1856 /* 1857 * If page is not valid for what we need, initiate I/O 1858 */ 1859 1860 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1861 int bsize, resid; 1862 1863 /* 1864 * Ensure that our page is still around when the I/O 1865 * completes. 1866 */ 1867 vm_page_io_start(pg); 1868 vm_page_unlock_queues(); 1869 1870 /* 1871 * Get the page from backing store. 1872 */ 1873 bsize = vp->v_mount->mnt_stat.f_iosize; 1874 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1875 /* 1876 * XXXMAC: Because we don't have fp->f_cred here, 1877 * we pass in NOCRED. This is probably wrong, but 1878 * is consistent with our original implementation. 1879 */ 1880 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1881 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1882 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1883 td->td_ucred, NOCRED, &resid, td); 1884 VOP_UNLOCK(vp, 0, td); 1885 vm_page_lock_queues(); 1886 vm_page_flag_clear(pg, PG_ZERO); 1887 vm_page_io_finish(pg); 1888 if (error) { 1889 vm_page_unwire(pg, 0); 1890 /* 1891 * See if anyone else might know about this page. 1892 * If not and it is not valid, then free it. 1893 */ 1894 if (pg->wire_count == 0 && pg->valid == 0 && 1895 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1896 pg->hold_count == 0) { 1897 vm_page_busy(pg); 1898 vm_page_free(pg); 1899 } 1900 vm_page_unlock_queues(); 1901 sbunlock(&so->so_snd); 1902 goto done; 1903 } 1904 } 1905 vm_page_unlock_queues(); 1906 1907 /* 1908 * Get a sendfile buf. We usually wait as long as necessary, 1909 * but this wait can be interrupted. 1910 */ 1911 if ((sf = sf_buf_alloc()) == NULL) { 1912 vm_page_lock_queues(); 1913 vm_page_unwire(pg, 0); 1914 if (pg->wire_count == 0 && pg->object == NULL) 1915 vm_page_free(pg); 1916 vm_page_unlock_queues(); 1917 sbunlock(&so->so_snd); 1918 error = EINTR; 1919 goto done; 1920 } 1921 1922 /* 1923 * Allocate a kernel virtual page and insert the physical page 1924 * into it. 1925 */ 1926 sf->m = pg; 1927 pmap_qenter(sf->kva, &pg, 1); 1928 /* 1929 * Get an mbuf header and set it up as having external storage. 1930 */ 1931 MGETHDR(m, M_TRYWAIT, MT_DATA); 1932 if (m == NULL) { 1933 error = ENOBUFS; 1934 sf_buf_free((void *)sf->kva, NULL); 1935 sbunlock(&so->so_snd); 1936 goto done; 1937 } 1938 /* 1939 * Setup external storage for mbuf. 1940 */ 1941 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY, 1942 EXT_SFBUF); 1943 m->m_data = (char *) sf->kva + pgoff; 1944 m->m_pkthdr.len = m->m_len = xfsize; 1945 /* 1946 * Add the buffer to the socket buffer chain. 1947 */ 1948 s = splnet(); 1949 retry_space: 1950 /* 1951 * Make sure that the socket is still able to take more data. 1952 * CANTSENDMORE being true usually means that the connection 1953 * was closed. so_error is true when an error was sensed after 1954 * a previous send. 1955 * The state is checked after the page mapping and buffer 1956 * allocation above since those operations may block and make 1957 * any socket checks stale. From this point forward, nothing 1958 * blocks before the pru_send (or more accurately, any blocking 1959 * results in a loop back to here to re-check). 1960 */ 1961 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1962 if (so->so_state & SS_CANTSENDMORE) { 1963 error = EPIPE; 1964 } else { 1965 error = so->so_error; 1966 so->so_error = 0; 1967 } 1968 m_freem(m); 1969 sbunlock(&so->so_snd); 1970 splx(s); 1971 goto done; 1972 } 1973 /* 1974 * Wait for socket space to become available. We do this just 1975 * after checking the connection state above in order to avoid 1976 * a race condition with sbwait(). 1977 */ 1978 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1979 if (so->so_state & SS_NBIO) { 1980 m_freem(m); 1981 sbunlock(&so->so_snd); 1982 splx(s); 1983 error = EAGAIN; 1984 goto done; 1985 } 1986 error = sbwait(&so->so_snd); 1987 /* 1988 * An error from sbwait usually indicates that we've 1989 * been interrupted by a signal. If we've sent anything 1990 * then return bytes sent, otherwise return the error. 1991 */ 1992 if (error) { 1993 m_freem(m); 1994 sbunlock(&so->so_snd); 1995 splx(s); 1996 goto done; 1997 } 1998 goto retry_space; 1999 } 2000 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2001 splx(s); 2002 if (error) { 2003 sbunlock(&so->so_snd); 2004 goto done; 2005 } 2006 } 2007 sbunlock(&so->so_snd); 2008 2009 /* 2010 * Send trailers. Wimp out and use writev(2). 2011 */ 2012 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 2013 nuap.fd = uap->s; 2014 nuap.iovp = hdtr.trailers; 2015 nuap.iovcnt = hdtr.trl_cnt; 2016 error = writev(td, &nuap); 2017 if (error) 2018 goto done; 2019 if (compat) 2020 sbytes += td->td_retval[0]; 2021 else 2022 hdtr_size += td->td_retval[0]; 2023 } 2024 2025 done: 2026 /* 2027 * If there was no error we have to clear td->td_retval[0] 2028 * because it may have been set by writev. 2029 */ 2030 if (error == 0) { 2031 td->td_retval[0] = 0; 2032 } 2033 if (uap->sbytes != NULL) { 2034 if (!compat) 2035 sbytes += hdtr_size; 2036 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2037 } 2038 if (vp) 2039 vrele(vp); 2040 if (so) 2041 fputsock(so); 2042 mtx_unlock(&Giant); 2043 return (error); 2044 } 2045