1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 * $FreeBSD$ 38 */ 39 40 #include "opt_compat.h" 41 #include "opt_ktrace.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/lock.h> 47 #include <sys/mutex.h> 48 #include <sys/sysproto.h> 49 #include <sys/malloc.h> 50 #include <sys/filedesc.h> 51 #include <sys/event.h> 52 #include <sys/proc.h> 53 #include <sys/fcntl.h> 54 #include <sys/file.h> 55 #include <sys/lock.h> 56 #include <sys/mount.h> 57 #include <sys/mbuf.h> 58 #include <sys/protosw.h> 59 #include <sys/socket.h> 60 #include <sys/socketvar.h> 61 #include <sys/signalvar.h> 62 #include <sys/uio.h> 63 #include <sys/vnode.h> 64 #ifdef KTRACE 65 #include <sys/ktrace.h> 66 #endif 67 68 #include <vm/vm.h> 69 #include <vm/vm_object.h> 70 #include <vm/vm_page.h> 71 #include <vm/vm_pageout.h> 72 #include <vm/vm_kern.h> 73 #include <vm/vm_extern.h> 74 75 static void sf_buf_init(void *arg); 76 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) 77 static struct sf_buf *sf_buf_alloc(void); 78 static void sf_buf_free(caddr_t addr, void *args); 79 80 static int sendit __P((struct thread *td, int s, struct msghdr *mp, int flags)); 81 static int recvit __P((struct thread *td, int s, struct msghdr *mp, 82 caddr_t namelenp)); 83 84 static int accept1 __P((struct thread *td, struct accept_args *uap, int compat)); 85 static int getsockname1 __P((struct thread *td, struct getsockname_args *uap, 86 int compat)); 87 static int getpeername1 __P((struct thread *td, struct getpeername_args *uap, 88 int compat)); 89 90 /* 91 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the 92 * sf_freelist head with the sf_lock mutex. 93 */ 94 static struct { 95 SLIST_HEAD(, sf_buf) sf_head; 96 struct mtx sf_lock; 97 } sf_freelist; 98 99 static vm_offset_t sf_base; 100 static struct sf_buf *sf_bufs; 101 static u_int sf_buf_alloc_want; 102 103 /* 104 * System call interface to the socket abstraction. 105 */ 106 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 107 #define COMPAT_OLDSOCK 108 #endif 109 110 extern struct fileops socketops; 111 112 /* 113 * MPSAFE 114 */ 115 int 116 socket(td, uap) 117 struct thread *td; 118 register struct socket_args /* { 119 int domain; 120 int type; 121 int protocol; 122 } */ *uap; 123 { 124 struct filedesc *fdp; 125 struct socket *so; 126 struct file *fp; 127 int fd, error; 128 129 mtx_lock(&Giant); 130 fdp = td->td_proc->p_fd; 131 error = falloc(td, &fp, &fd); 132 if (error) 133 goto done2; 134 fhold(fp); 135 error = socreate(uap->domain, &so, uap->type, uap->protocol, 136 td->td_proc->p_ucred, td); 137 FILEDESC_LOCK(fdp); 138 if (error) { 139 if (fdp->fd_ofiles[fd] == fp) { 140 fdp->fd_ofiles[fd] = NULL; 141 FILEDESC_UNLOCK(fdp); 142 fdrop(fp, td); 143 } else 144 FILEDESC_UNLOCK(fdp); 145 } else { 146 fp->f_data = (caddr_t)so; /* already has ref count */ 147 fp->f_flag = FREAD|FWRITE; 148 fp->f_ops = &socketops; 149 fp->f_type = DTYPE_SOCKET; 150 FILEDESC_UNLOCK(fdp); 151 td->td_retval[0] = fd; 152 } 153 fdrop(fp, td); 154 done2: 155 mtx_unlock(&Giant); 156 return (error); 157 } 158 159 /* 160 * MPSAFE 161 */ 162 /* ARGSUSED */ 163 int 164 bind(td, uap) 165 struct thread *td; 166 register struct bind_args /* { 167 int s; 168 caddr_t name; 169 int namelen; 170 } */ *uap; 171 { 172 struct socket *so; 173 struct sockaddr *sa; 174 int error; 175 176 mtx_lock(&Giant); 177 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 178 goto done2; 179 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 180 goto done1; 181 error = sobind(so, sa, td); 182 FREE(sa, M_SONAME); 183 done1: 184 fputsock(so); 185 done2: 186 mtx_unlock(&Giant); 187 return (error); 188 } 189 190 /* 191 * MPSAFE 192 */ 193 /* ARGSUSED */ 194 int 195 listen(td, uap) 196 struct thread *td; 197 register struct listen_args /* { 198 int s; 199 int backlog; 200 } */ *uap; 201 { 202 struct socket *so; 203 int error; 204 205 mtx_lock(&Giant); 206 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 207 error = solisten(so, uap->backlog, td); 208 fputsock(so); 209 } 210 mtx_unlock(&Giant); 211 return(error); 212 } 213 214 /* 215 * accept1() 216 * MPSAFE 217 */ 218 static int 219 accept1(td, uap, compat) 220 struct thread *td; 221 register struct accept_args /* { 222 int s; 223 caddr_t name; 224 int *anamelen; 225 } */ *uap; 226 int compat; 227 { 228 struct filedesc *fdp; 229 struct file *nfp = NULL; 230 struct sockaddr *sa; 231 int namelen, error, s; 232 struct socket *head, *so; 233 int fd; 234 u_int fflag; 235 236 mtx_lock(&Giant); 237 fdp = td->td_proc->p_fd; 238 if (uap->name) { 239 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen, 240 sizeof (namelen)); 241 if(error) 242 goto done2; 243 } 244 error = fgetsock(td, uap->s, &head, &fflag); 245 if (error) 246 goto done2; 247 s = splnet(); 248 if ((head->so_options & SO_ACCEPTCONN) == 0) { 249 splx(s); 250 error = EINVAL; 251 goto done; 252 } 253 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 254 splx(s); 255 error = EWOULDBLOCK; 256 goto done; 257 } 258 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 259 if (head->so_state & SS_CANTRCVMORE) { 260 head->so_error = ECONNABORTED; 261 break; 262 } 263 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH, 264 "accept", 0); 265 if (error) { 266 splx(s); 267 goto done; 268 } 269 } 270 if (head->so_error) { 271 error = head->so_error; 272 head->so_error = 0; 273 splx(s); 274 goto done; 275 } 276 277 /* 278 * At this point we know that there is at least one connection 279 * ready to be accepted. Remove it from the queue prior to 280 * allocating the file descriptor for it since falloc() may 281 * block allowing another process to accept the connection 282 * instead. 283 */ 284 so = TAILQ_FIRST(&head->so_comp); 285 TAILQ_REMOVE(&head->so_comp, so, so_list); 286 head->so_qlen--; 287 288 error = falloc(td, &nfp, &fd); 289 if (error) { 290 /* 291 * Probably ran out of file descriptors. Put the 292 * unaccepted connection back onto the queue and 293 * do another wakeup so some other process might 294 * have a chance at it. 295 */ 296 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 297 head->so_qlen++; 298 wakeup_one(&head->so_timeo); 299 splx(s); 300 goto done; 301 } 302 fhold(nfp); 303 td->td_retval[0] = fd; 304 305 /* connection has been removed from the listen queue */ 306 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 307 308 so->so_state &= ~SS_COMP; 309 so->so_head = NULL; 310 if (head->so_sigio != NULL) 311 fsetown(fgetown(head->so_sigio), &so->so_sigio); 312 313 FILE_LOCK(nfp); 314 soref(so); /* file descriptor reference */ 315 nfp->f_data = (caddr_t)so; /* nfp has ref count from falloc */ 316 nfp->f_flag = fflag; 317 nfp->f_ops = &socketops; 318 nfp->f_type = DTYPE_SOCKET; 319 FILE_UNLOCK(nfp); 320 sa = 0; 321 error = soaccept(so, &sa); 322 if (error) { 323 /* 324 * return a namelen of zero for older code which might 325 * ignore the return value from accept. 326 */ 327 if (uap->name != NULL) { 328 namelen = 0; 329 (void) copyout((caddr_t)&namelen, 330 (caddr_t)uap->anamelen, sizeof(*uap->anamelen)); 331 } 332 goto noconnection; 333 } 334 if (sa == NULL) { 335 namelen = 0; 336 if (uap->name) 337 goto gotnoname; 338 splx(s); 339 error = 0; 340 goto done; 341 } 342 if (uap->name) { 343 /* check sa_len before it is destroyed */ 344 if (namelen > sa->sa_len) 345 namelen = sa->sa_len; 346 #ifdef COMPAT_OLDSOCK 347 if (compat) 348 ((struct osockaddr *)sa)->sa_family = 349 sa->sa_family; 350 #endif 351 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen); 352 if (!error) 353 gotnoname: 354 error = copyout((caddr_t)&namelen, 355 (caddr_t)uap->anamelen, sizeof (*uap->anamelen)); 356 } 357 noconnection: 358 if (sa) 359 FREE(sa, M_SONAME); 360 361 /* 362 * close the new descriptor, assuming someone hasn't ripped it 363 * out from under us. 364 */ 365 if (error) { 366 FILEDESC_LOCK(fdp); 367 if (fdp->fd_ofiles[fd] == nfp) { 368 fdp->fd_ofiles[fd] = NULL; 369 FILEDESC_UNLOCK(fdp); 370 fdrop(nfp, td); 371 } else { 372 FILEDESC_UNLOCK(fdp); 373 } 374 } 375 splx(s); 376 377 /* 378 * Release explicitly held references before returning. 379 */ 380 done: 381 if (nfp != NULL) 382 fdrop(nfp, td); 383 fputsock(head); 384 done2: 385 mtx_unlock(&Giant); 386 return (error); 387 } 388 389 /* 390 * MPSAFE (accept1() is MPSAFE) 391 */ 392 int 393 accept(td, uap) 394 struct thread *td; 395 struct accept_args *uap; 396 { 397 398 return (accept1(td, uap, 0)); 399 } 400 401 #ifdef COMPAT_OLDSOCK 402 /* 403 * MPSAFE (accept1() is MPSAFE) 404 */ 405 int 406 oaccept(td, uap) 407 struct thread *td; 408 struct accept_args *uap; 409 { 410 411 return (accept1(td, uap, 1)); 412 } 413 #endif /* COMPAT_OLDSOCK */ 414 415 /* 416 * MPSAFE 417 */ 418 /* ARGSUSED */ 419 int 420 connect(td, uap) 421 struct thread *td; 422 register struct connect_args /* { 423 int s; 424 caddr_t name; 425 int namelen; 426 } */ *uap; 427 { 428 struct socket *so; 429 struct sockaddr *sa; 430 int error, s; 431 432 mtx_lock(&Giant); 433 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 434 goto done2; 435 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 436 error = EALREADY; 437 goto done1; 438 } 439 error = getsockaddr(&sa, uap->name, uap->namelen); 440 if (error) 441 goto done1; 442 error = soconnect(so, sa, td); 443 if (error) 444 goto bad; 445 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 446 FREE(sa, M_SONAME); 447 error = EINPROGRESS; 448 goto done1; 449 } 450 s = splnet(); 451 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 452 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, "connec", 0); 453 if (error) 454 break; 455 } 456 if (error == 0) { 457 error = so->so_error; 458 so->so_error = 0; 459 } 460 splx(s); 461 bad: 462 so->so_state &= ~SS_ISCONNECTING; 463 FREE(sa, M_SONAME); 464 if (error == ERESTART) 465 error = EINTR; 466 done1: 467 fputsock(so); 468 done2: 469 mtx_unlock(&Giant); 470 return (error); 471 } 472 473 /* 474 * MPSAFE 475 */ 476 int 477 socketpair(td, uap) 478 struct thread *td; 479 register struct socketpair_args /* { 480 int domain; 481 int type; 482 int protocol; 483 int *rsv; 484 } */ *uap; 485 { 486 register struct filedesc *fdp = td->td_proc->p_fd; 487 struct file *fp1, *fp2; 488 struct socket *so1, *so2; 489 int fd, error, sv[2]; 490 491 mtx_lock(&Giant); 492 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 493 td->td_proc->p_ucred, td); 494 if (error) 495 goto done2; 496 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 497 td->td_proc->p_ucred, td); 498 if (error) 499 goto free1; 500 error = falloc(td, &fp1, &fd); 501 if (error) 502 goto free2; 503 fhold(fp1); 504 sv[0] = fd; 505 fp1->f_data = (caddr_t)so1; /* so1 already has ref count */ 506 error = falloc(td, &fp2, &fd); 507 if (error) 508 goto free3; 509 fhold(fp2); 510 fp2->f_data = (caddr_t)so2; /* so2 already has ref count */ 511 sv[1] = fd; 512 error = soconnect2(so1, so2); 513 if (error) 514 goto free4; 515 if (uap->type == SOCK_DGRAM) { 516 /* 517 * Datagram socket connection is asymmetric. 518 */ 519 error = soconnect2(so2, so1); 520 if (error) 521 goto free4; 522 } 523 FILE_LOCK(fp1); 524 fp1->f_flag = FREAD|FWRITE; 525 fp1->f_ops = &socketops; 526 fp1->f_type = DTYPE_SOCKET; 527 FILE_UNLOCK(fp1); 528 FILE_LOCK(fp2); 529 fp2->f_flag = FREAD|FWRITE; 530 fp2->f_ops = &socketops; 531 fp2->f_type = DTYPE_SOCKET; 532 FILE_UNLOCK(fp2); 533 error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int)); 534 fdrop(fp1, td); 535 fdrop(fp2, td); 536 goto done2; 537 free4: 538 FILEDESC_LOCK(fdp); 539 if (fdp->fd_ofiles[sv[1]] == fp2) { 540 fdp->fd_ofiles[sv[1]] = NULL; 541 FILEDESC_UNLOCK(fdp); 542 fdrop(fp2, td); 543 } else 544 FILEDESC_UNLOCK(fdp); 545 fdrop(fp2, td); 546 free3: 547 FILEDESC_LOCK(fdp); 548 if (fdp->fd_ofiles[sv[0]] == fp1) { 549 fdp->fd_ofiles[sv[0]] = NULL; 550 FILEDESC_UNLOCK(fdp); 551 fdrop(fp1, td); 552 } else 553 FILEDESC_UNLOCK(fdp); 554 fdrop(fp1, td); 555 free2: 556 (void)soclose(so2); 557 free1: 558 (void)soclose(so1); 559 done2: 560 mtx_unlock(&Giant); 561 return (error); 562 } 563 564 static int 565 sendit(td, s, mp, flags) 566 register struct thread *td; 567 int s; 568 register struct msghdr *mp; 569 int flags; 570 { 571 struct uio auio; 572 register struct iovec *iov; 573 register int i; 574 struct mbuf *control; 575 struct sockaddr *to = NULL; 576 int len, error; 577 struct socket *so; 578 #ifdef KTRACE 579 struct iovec *ktriov = NULL; 580 struct uio ktruio; 581 #endif 582 583 if ((error = fgetsock(td, s, &so, NULL)) != 0) 584 return (error); 585 auio.uio_iov = mp->msg_iov; 586 auio.uio_iovcnt = mp->msg_iovlen; 587 auio.uio_segflg = UIO_USERSPACE; 588 auio.uio_rw = UIO_WRITE; 589 auio.uio_td = td; 590 auio.uio_offset = 0; /* XXX */ 591 auio.uio_resid = 0; 592 iov = mp->msg_iov; 593 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 594 if ((auio.uio_resid += iov->iov_len) < 0) { 595 error = EINVAL; 596 goto bad; 597 } 598 } 599 if (mp->msg_name) { 600 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 601 if (error) 602 goto bad; 603 } 604 if (mp->msg_control) { 605 if (mp->msg_controllen < sizeof(struct cmsghdr) 606 #ifdef COMPAT_OLDSOCK 607 && mp->msg_flags != MSG_COMPAT 608 #endif 609 ) { 610 error = EINVAL; 611 goto bad; 612 } 613 error = sockargs(&control, mp->msg_control, 614 mp->msg_controllen, MT_CONTROL); 615 if (error) 616 goto bad; 617 #ifdef COMPAT_OLDSOCK 618 if (mp->msg_flags == MSG_COMPAT) { 619 register struct cmsghdr *cm; 620 621 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 622 if (control == 0) { 623 error = ENOBUFS; 624 goto bad; 625 } else { 626 cm = mtod(control, struct cmsghdr *); 627 cm->cmsg_len = control->m_len; 628 cm->cmsg_level = SOL_SOCKET; 629 cm->cmsg_type = SCM_RIGHTS; 630 } 631 } 632 #endif 633 } else { 634 control = 0; 635 } 636 #ifdef KTRACE 637 if (KTRPOINT(td->td_proc, KTR_GENIO)) { 638 int iovlen = auio.uio_iovcnt * sizeof (struct iovec); 639 640 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 641 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 642 ktruio = auio; 643 } 644 #endif 645 len = auio.uio_resid; 646 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control, 647 flags, td); 648 if (error) { 649 if (auio.uio_resid != len && (error == ERESTART || 650 error == EINTR || error == EWOULDBLOCK)) 651 error = 0; 652 if (error == EPIPE) { 653 PROC_LOCK(td->td_proc); 654 psignal(td->td_proc, SIGPIPE); 655 PROC_UNLOCK(td->td_proc); 656 } 657 } 658 if (error == 0) 659 td->td_retval[0] = len - auio.uio_resid; 660 #ifdef KTRACE 661 if (ktriov != NULL) { 662 if (error == 0) { 663 ktruio.uio_iov = ktriov; 664 ktruio.uio_resid = td->td_retval[0]; 665 ktrgenio(td->td_proc->p_tracep, s, UIO_WRITE, &ktruio, error); 666 } 667 FREE(ktriov, M_TEMP); 668 } 669 #endif 670 bad: 671 fputsock(so); 672 if (to) 673 FREE(to, M_SONAME); 674 return (error); 675 } 676 677 /* 678 * MPSAFE 679 */ 680 int 681 sendto(td, uap) 682 struct thread *td; 683 register struct sendto_args /* { 684 int s; 685 caddr_t buf; 686 size_t len; 687 int flags; 688 caddr_t to; 689 int tolen; 690 } */ *uap; 691 { 692 struct msghdr msg; 693 struct iovec aiov; 694 int error; 695 696 msg.msg_name = uap->to; 697 msg.msg_namelen = uap->tolen; 698 msg.msg_iov = &aiov; 699 msg.msg_iovlen = 1; 700 msg.msg_control = 0; 701 #ifdef COMPAT_OLDSOCK 702 msg.msg_flags = 0; 703 #endif 704 aiov.iov_base = uap->buf; 705 aiov.iov_len = uap->len; 706 mtx_lock(&Giant); 707 error = sendit(td, uap->s, &msg, uap->flags); 708 mtx_unlock(&Giant); 709 return (error); 710 } 711 712 #ifdef COMPAT_OLDSOCK 713 /* 714 * MPSAFE 715 */ 716 int 717 osend(td, uap) 718 struct thread *td; 719 register struct osend_args /* { 720 int s; 721 caddr_t buf; 722 int len; 723 int flags; 724 } */ *uap; 725 { 726 struct msghdr msg; 727 struct iovec aiov; 728 int error; 729 730 msg.msg_name = 0; 731 msg.msg_namelen = 0; 732 msg.msg_iov = &aiov; 733 msg.msg_iovlen = 1; 734 aiov.iov_base = uap->buf; 735 aiov.iov_len = uap->len; 736 msg.msg_control = 0; 737 msg.msg_flags = 0; 738 mtx_lock(&Giant); 739 error = sendit(td, uap->s, &msg, uap->flags); 740 mtx_unlock(&Giant); 741 return (error); 742 } 743 744 /* 745 * MPSAFE 746 */ 747 int 748 osendmsg(td, uap) 749 struct thread *td; 750 register struct osendmsg_args /* { 751 int s; 752 caddr_t msg; 753 int flags; 754 } */ *uap; 755 { 756 struct msghdr msg; 757 struct iovec aiov[UIO_SMALLIOV], *iov; 758 int error; 759 760 mtx_lock(&Giant); 761 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr)); 762 if (error) 763 goto done2; 764 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 765 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 766 error = EMSGSIZE; 767 goto done2; 768 } 769 MALLOC(iov, struct iovec *, 770 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 771 M_WAITOK); 772 } else { 773 iov = aiov; 774 } 775 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 776 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 777 if (error) 778 goto done; 779 msg.msg_flags = MSG_COMPAT; 780 msg.msg_iov = iov; 781 error = sendit(td, uap->s, &msg, uap->flags); 782 done: 783 if (iov != aiov) 784 FREE(iov, M_IOV); 785 done2: 786 mtx_unlock(&Giant); 787 return (error); 788 } 789 #endif 790 791 /* 792 * MPSAFE 793 */ 794 int 795 sendmsg(td, uap) 796 struct thread *td; 797 register struct sendmsg_args /* { 798 int s; 799 caddr_t msg; 800 int flags; 801 } */ *uap; 802 { 803 struct msghdr msg; 804 struct iovec aiov[UIO_SMALLIOV], *iov; 805 int error; 806 807 mtx_lock(&Giant); 808 error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg)); 809 if (error) 810 goto done2; 811 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 812 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 813 error = EMSGSIZE; 814 goto done2; 815 } 816 MALLOC(iov, struct iovec *, 817 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 818 M_WAITOK); 819 } else { 820 iov = aiov; 821 } 822 if (msg.msg_iovlen && 823 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 824 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 825 goto done; 826 msg.msg_iov = iov; 827 #ifdef COMPAT_OLDSOCK 828 msg.msg_flags = 0; 829 #endif 830 error = sendit(td, uap->s, &msg, uap->flags); 831 done: 832 if (iov != aiov) 833 FREE(iov, M_IOV); 834 done2: 835 mtx_unlock(&Giant); 836 return (error); 837 } 838 839 static int 840 recvit(td, s, mp, namelenp) 841 register struct thread *td; 842 int s; 843 register struct msghdr *mp; 844 caddr_t namelenp; 845 { 846 struct uio auio; 847 register struct iovec *iov; 848 register int i; 849 int len, error; 850 struct mbuf *m, *control = 0; 851 caddr_t ctlbuf; 852 struct socket *so; 853 struct sockaddr *fromsa = 0; 854 #ifdef KTRACE 855 struct iovec *ktriov = NULL; 856 struct uio ktruio; 857 #endif 858 859 if ((error = fgetsock(td, s, &so, NULL)) != 0) 860 return (error); 861 auio.uio_iov = mp->msg_iov; 862 auio.uio_iovcnt = mp->msg_iovlen; 863 auio.uio_segflg = UIO_USERSPACE; 864 auio.uio_rw = UIO_READ; 865 auio.uio_td = td; 866 auio.uio_offset = 0; /* XXX */ 867 auio.uio_resid = 0; 868 iov = mp->msg_iov; 869 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 870 if ((auio.uio_resid += iov->iov_len) < 0) { 871 fputsock(so); 872 return (EINVAL); 873 } 874 } 875 #ifdef KTRACE 876 if (KTRPOINT(td->td_proc, KTR_GENIO)) { 877 int iovlen = auio.uio_iovcnt * sizeof (struct iovec); 878 879 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 880 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 881 ktruio = auio; 882 } 883 #endif 884 len = auio.uio_resid; 885 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 886 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 887 &mp->msg_flags); 888 if (error) { 889 if (auio.uio_resid != len && (error == ERESTART || 890 error == EINTR || error == EWOULDBLOCK)) 891 error = 0; 892 } 893 #ifdef KTRACE 894 if (ktriov != NULL) { 895 if (error == 0) { 896 ktruio.uio_iov = ktriov; 897 ktruio.uio_resid = len - auio.uio_resid; 898 ktrgenio(td->td_proc->p_tracep, s, UIO_READ, &ktruio, error); 899 } 900 FREE(ktriov, M_TEMP); 901 } 902 #endif 903 if (error) 904 goto out; 905 td->td_retval[0] = len - auio.uio_resid; 906 if (mp->msg_name) { 907 len = mp->msg_namelen; 908 if (len <= 0 || fromsa == 0) 909 len = 0; 910 else { 911 #ifndef MIN 912 #define MIN(a,b) ((a)>(b)?(b):(a)) 913 #endif 914 /* save sa_len before it is destroyed by MSG_COMPAT */ 915 len = MIN(len, fromsa->sa_len); 916 #ifdef COMPAT_OLDSOCK 917 if (mp->msg_flags & MSG_COMPAT) 918 ((struct osockaddr *)fromsa)->sa_family = 919 fromsa->sa_family; 920 #endif 921 error = copyout(fromsa, 922 (caddr_t)mp->msg_name, (unsigned)len); 923 if (error) 924 goto out; 925 } 926 mp->msg_namelen = len; 927 if (namelenp && 928 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) { 929 #ifdef COMPAT_OLDSOCK 930 if (mp->msg_flags & MSG_COMPAT) 931 error = 0; /* old recvfrom didn't check */ 932 else 933 #endif 934 goto out; 935 } 936 } 937 if (mp->msg_control) { 938 #ifdef COMPAT_OLDSOCK 939 /* 940 * We assume that old recvmsg calls won't receive access 941 * rights and other control info, esp. as control info 942 * is always optional and those options didn't exist in 4.3. 943 * If we receive rights, trim the cmsghdr; anything else 944 * is tossed. 945 */ 946 if (control && mp->msg_flags & MSG_COMPAT) { 947 if (mtod(control, struct cmsghdr *)->cmsg_level != 948 SOL_SOCKET || 949 mtod(control, struct cmsghdr *)->cmsg_type != 950 SCM_RIGHTS) { 951 mp->msg_controllen = 0; 952 goto out; 953 } 954 control->m_len -= sizeof (struct cmsghdr); 955 control->m_data += sizeof (struct cmsghdr); 956 } 957 #endif 958 len = mp->msg_controllen; 959 m = control; 960 mp->msg_controllen = 0; 961 ctlbuf = (caddr_t) mp->msg_control; 962 963 while (m && len > 0) { 964 unsigned int tocopy; 965 966 if (len >= m->m_len) 967 tocopy = m->m_len; 968 else { 969 mp->msg_flags |= MSG_CTRUNC; 970 tocopy = len; 971 } 972 973 if ((error = copyout((caddr_t)mtod(m, caddr_t), 974 ctlbuf, tocopy)) != 0) 975 goto out; 976 977 ctlbuf += tocopy; 978 len -= tocopy; 979 m = m->m_next; 980 } 981 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 982 } 983 out: 984 fputsock(so); 985 if (fromsa) 986 FREE(fromsa, M_SONAME); 987 if (control) 988 m_freem(control); 989 return (error); 990 } 991 992 /* 993 * MPSAFE 994 */ 995 int 996 recvfrom(td, uap) 997 struct thread *td; 998 register struct recvfrom_args /* { 999 int s; 1000 caddr_t buf; 1001 size_t len; 1002 int flags; 1003 caddr_t from; 1004 int *fromlenaddr; 1005 } */ *uap; 1006 { 1007 struct msghdr msg; 1008 struct iovec aiov; 1009 int error; 1010 1011 mtx_lock(&Giant); 1012 if (uap->fromlenaddr) { 1013 error = copyin((caddr_t)uap->fromlenaddr, 1014 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen)); 1015 if (error) 1016 goto done2; 1017 } else { 1018 msg.msg_namelen = 0; 1019 } 1020 msg.msg_name = uap->from; 1021 msg.msg_iov = &aiov; 1022 msg.msg_iovlen = 1; 1023 aiov.iov_base = uap->buf; 1024 aiov.iov_len = uap->len; 1025 msg.msg_control = 0; 1026 msg.msg_flags = uap->flags; 1027 error = recvit(td, uap->s, &msg, (caddr_t)uap->fromlenaddr); 1028 done2: 1029 mtx_unlock(&Giant); 1030 return(error); 1031 } 1032 1033 #ifdef COMPAT_OLDSOCK 1034 /* 1035 * MPSAFE 1036 */ 1037 int 1038 orecvfrom(td, uap) 1039 struct thread *td; 1040 struct recvfrom_args *uap; 1041 { 1042 1043 uap->flags |= MSG_COMPAT; 1044 return (recvfrom(td, uap)); 1045 } 1046 #endif 1047 1048 1049 #ifdef COMPAT_OLDSOCK 1050 /* 1051 * MPSAFE 1052 */ 1053 int 1054 orecv(td, uap) 1055 struct thread *td; 1056 register struct orecv_args /* { 1057 int s; 1058 caddr_t buf; 1059 int len; 1060 int flags; 1061 } */ *uap; 1062 { 1063 struct msghdr msg; 1064 struct iovec aiov; 1065 int error; 1066 1067 mtx_lock(&Giant); 1068 msg.msg_name = 0; 1069 msg.msg_namelen = 0; 1070 msg.msg_iov = &aiov; 1071 msg.msg_iovlen = 1; 1072 aiov.iov_base = uap->buf; 1073 aiov.iov_len = uap->len; 1074 msg.msg_control = 0; 1075 msg.msg_flags = uap->flags; 1076 error = recvit(td, uap->s, &msg, (caddr_t)0); 1077 mtx_unlock(&Giant); 1078 return (error); 1079 } 1080 1081 /* 1082 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1083 * overlays the new one, missing only the flags, and with the (old) access 1084 * rights where the control fields are now. 1085 * 1086 * MPSAFE 1087 */ 1088 int 1089 orecvmsg(td, uap) 1090 struct thread *td; 1091 register struct orecvmsg_args /* { 1092 int s; 1093 struct omsghdr *msg; 1094 int flags; 1095 } */ *uap; 1096 { 1097 struct msghdr msg; 1098 struct iovec aiov[UIO_SMALLIOV], *iov; 1099 int error; 1100 1101 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, 1102 sizeof (struct omsghdr)); 1103 if (error) 1104 return (error); 1105 1106 mtx_lock(&Giant); 1107 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1108 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1109 error = EMSGSIZE; 1110 goto done2; 1111 } 1112 MALLOC(iov, struct iovec *, 1113 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1114 M_WAITOK); 1115 } else { 1116 iov = aiov; 1117 } 1118 msg.msg_flags = uap->flags | MSG_COMPAT; 1119 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 1120 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1121 if (error) 1122 goto done; 1123 msg.msg_iov = iov; 1124 error = recvit(td, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen); 1125 1126 if (msg.msg_controllen && error == 0) 1127 error = copyout((caddr_t)&msg.msg_controllen, 1128 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int)); 1129 done: 1130 if (iov != aiov) 1131 FREE(iov, M_IOV); 1132 done2: 1133 mtx_unlock(&Giant); 1134 return (error); 1135 } 1136 #endif 1137 1138 /* 1139 * MPSAFE 1140 */ 1141 int 1142 recvmsg(td, uap) 1143 struct thread *td; 1144 register struct recvmsg_args /* { 1145 int s; 1146 struct msghdr *msg; 1147 int flags; 1148 } */ *uap; 1149 { 1150 struct msghdr msg; 1151 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1152 register int error; 1153 1154 mtx_lock(&Giant); 1155 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg)); 1156 if (error) 1157 goto done2; 1158 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1159 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1160 error = EMSGSIZE; 1161 goto done2; 1162 } 1163 MALLOC(iov, struct iovec *, 1164 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1165 M_WAITOK); 1166 } else { 1167 iov = aiov; 1168 } 1169 #ifdef COMPAT_OLDSOCK 1170 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1171 #else 1172 msg.msg_flags = uap->flags; 1173 #endif 1174 uiov = msg.msg_iov; 1175 msg.msg_iov = iov; 1176 error = copyin((caddr_t)uiov, (caddr_t)iov, 1177 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1178 if (error) 1179 goto done; 1180 error = recvit(td, uap->s, &msg, (caddr_t)0); 1181 if (!error) { 1182 msg.msg_iov = uiov; 1183 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg)); 1184 } 1185 done: 1186 if (iov != aiov) 1187 FREE(iov, M_IOV); 1188 done2: 1189 mtx_unlock(&Giant); 1190 return (error); 1191 } 1192 1193 /* 1194 * MPSAFE 1195 */ 1196 /* ARGSUSED */ 1197 int 1198 shutdown(td, uap) 1199 struct thread *td; 1200 register struct shutdown_args /* { 1201 int s; 1202 int how; 1203 } */ *uap; 1204 { 1205 struct socket *so; 1206 int error; 1207 1208 mtx_lock(&Giant); 1209 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1210 error = soshutdown(so, uap->how); 1211 fputsock(so); 1212 } 1213 mtx_unlock(&Giant); 1214 return(error); 1215 } 1216 1217 /* 1218 * MPSAFE 1219 */ 1220 /* ARGSUSED */ 1221 int 1222 setsockopt(td, uap) 1223 struct thread *td; 1224 register struct setsockopt_args /* { 1225 int s; 1226 int level; 1227 int name; 1228 caddr_t val; 1229 int valsize; 1230 } */ *uap; 1231 { 1232 struct socket *so; 1233 struct sockopt sopt; 1234 int error; 1235 1236 if (uap->val == 0 && uap->valsize != 0) 1237 return (EFAULT); 1238 if (uap->valsize < 0) 1239 return (EINVAL); 1240 1241 mtx_lock(&Giant); 1242 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1243 sopt.sopt_dir = SOPT_SET; 1244 sopt.sopt_level = uap->level; 1245 sopt.sopt_name = uap->name; 1246 sopt.sopt_val = uap->val; 1247 sopt.sopt_valsize = uap->valsize; 1248 sopt.sopt_td = td; 1249 error = sosetopt(so, &sopt); 1250 fputsock(so); 1251 } 1252 mtx_unlock(&Giant); 1253 return(error); 1254 } 1255 1256 /* 1257 * MPSAFE 1258 */ 1259 /* ARGSUSED */ 1260 int 1261 getsockopt(td, uap) 1262 struct thread *td; 1263 register struct getsockopt_args /* { 1264 int s; 1265 int level; 1266 int name; 1267 caddr_t val; 1268 int *avalsize; 1269 } */ *uap; 1270 { 1271 int valsize, error; 1272 struct socket *so; 1273 struct sockopt sopt; 1274 1275 mtx_lock(&Giant); 1276 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1277 goto done2; 1278 if (uap->val) { 1279 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize, 1280 sizeof (valsize)); 1281 if (error) 1282 goto done1; 1283 if (valsize < 0) { 1284 error = EINVAL; 1285 goto done1; 1286 } 1287 } else { 1288 valsize = 0; 1289 } 1290 1291 sopt.sopt_dir = SOPT_GET; 1292 sopt.sopt_level = uap->level; 1293 sopt.sopt_name = uap->name; 1294 sopt.sopt_val = uap->val; 1295 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1296 sopt.sopt_td = td; 1297 1298 error = sogetopt(so, &sopt); 1299 if (error == 0) { 1300 valsize = sopt.sopt_valsize; 1301 error = copyout((caddr_t)&valsize, 1302 (caddr_t)uap->avalsize, sizeof (valsize)); 1303 } 1304 done1: 1305 fputsock(so); 1306 done2: 1307 mtx_unlock(&Giant); 1308 return (error); 1309 } 1310 1311 /* 1312 * getsockname1() - Get socket name. 1313 * 1314 * MPSAFE 1315 */ 1316 /* ARGSUSED */ 1317 static int 1318 getsockname1(td, uap, compat) 1319 struct thread *td; 1320 register struct getsockname_args /* { 1321 int fdes; 1322 caddr_t asa; 1323 int *alen; 1324 } */ *uap; 1325 int compat; 1326 { 1327 struct socket *so; 1328 struct sockaddr *sa; 1329 int len, error; 1330 1331 mtx_lock(&Giant); 1332 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1333 goto done2; 1334 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); 1335 if (error) 1336 goto done1; 1337 sa = 0; 1338 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1339 if (error) 1340 goto bad; 1341 if (sa == 0) { 1342 len = 0; 1343 goto gotnothing; 1344 } 1345 1346 len = MIN(len, sa->sa_len); 1347 #ifdef COMPAT_OLDSOCK 1348 if (compat) 1349 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1350 #endif 1351 error = copyout(sa, (caddr_t)uap->asa, (u_int)len); 1352 if (error == 0) 1353 gotnothing: 1354 error = copyout((caddr_t)&len, (caddr_t)uap->alen, 1355 sizeof (len)); 1356 bad: 1357 if (sa) 1358 FREE(sa, M_SONAME); 1359 done1: 1360 fputsock(so); 1361 done2: 1362 mtx_unlock(&Giant); 1363 return (error); 1364 } 1365 1366 /* 1367 * MPSAFE 1368 */ 1369 int 1370 getsockname(td, uap) 1371 struct thread *td; 1372 struct getsockname_args *uap; 1373 { 1374 1375 return (getsockname1(td, uap, 0)); 1376 } 1377 1378 #ifdef COMPAT_OLDSOCK 1379 /* 1380 * MPSAFE 1381 */ 1382 int 1383 ogetsockname(td, uap) 1384 struct thread *td; 1385 struct getsockname_args *uap; 1386 { 1387 1388 return (getsockname1(td, uap, 1)); 1389 } 1390 #endif /* COMPAT_OLDSOCK */ 1391 1392 /* 1393 * getpeername1() - Get name of peer for connected socket. 1394 * 1395 * MPSAFE 1396 */ 1397 /* ARGSUSED */ 1398 static int 1399 getpeername1(td, uap, compat) 1400 struct thread *td; 1401 register struct getpeername_args /* { 1402 int fdes; 1403 caddr_t asa; 1404 int *alen; 1405 } */ *uap; 1406 int compat; 1407 { 1408 struct socket *so; 1409 struct sockaddr *sa; 1410 int len, error; 1411 1412 mtx_lock(&Giant); 1413 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1414 goto done2; 1415 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1416 error = ENOTCONN; 1417 goto done1; 1418 } 1419 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); 1420 if (error) 1421 goto done1; 1422 sa = 0; 1423 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1424 if (error) 1425 goto bad; 1426 if (sa == 0) { 1427 len = 0; 1428 goto gotnothing; 1429 } 1430 len = MIN(len, sa->sa_len); 1431 #ifdef COMPAT_OLDSOCK 1432 if (compat) 1433 ((struct osockaddr *)sa)->sa_family = 1434 sa->sa_family; 1435 #endif 1436 error = copyout(sa, (caddr_t)uap->asa, (u_int)len); 1437 if (error) 1438 goto bad; 1439 gotnothing: 1440 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len)); 1441 bad: 1442 if (sa) 1443 FREE(sa, M_SONAME); 1444 done1: 1445 fputsock(so); 1446 done2: 1447 mtx_unlock(&Giant); 1448 return (error); 1449 } 1450 1451 /* 1452 * MPSAFE 1453 */ 1454 int 1455 getpeername(td, uap) 1456 struct thread *td; 1457 struct getpeername_args *uap; 1458 { 1459 1460 return (getpeername1(td, uap, 0)); 1461 } 1462 1463 #ifdef COMPAT_OLDSOCK 1464 /* 1465 * MPSAFE 1466 */ 1467 int 1468 ogetpeername(td, uap) 1469 struct thread *td; 1470 struct ogetpeername_args *uap; 1471 { 1472 1473 /* XXX uap should have type `getpeername_args *' to begin with. */ 1474 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1475 } 1476 #endif /* COMPAT_OLDSOCK */ 1477 1478 int 1479 sockargs(mp, buf, buflen, type) 1480 struct mbuf **mp; 1481 caddr_t buf; 1482 int buflen, type; 1483 { 1484 register struct sockaddr *sa; 1485 register struct mbuf *m; 1486 int error; 1487 1488 if ((u_int)buflen > MLEN) { 1489 #ifdef COMPAT_OLDSOCK 1490 if (type == MT_SONAME && (u_int)buflen <= 112) 1491 buflen = MLEN; /* unix domain compat. hack */ 1492 else 1493 #endif 1494 return (EINVAL); 1495 } 1496 m = m_get(M_TRYWAIT, type); 1497 if (m == NULL) 1498 return (ENOBUFS); 1499 m->m_len = buflen; 1500 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1501 if (error) 1502 (void) m_free(m); 1503 else { 1504 *mp = m; 1505 if (type == MT_SONAME) { 1506 sa = mtod(m, struct sockaddr *); 1507 1508 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1509 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1510 sa->sa_family = sa->sa_len; 1511 #endif 1512 sa->sa_len = buflen; 1513 } 1514 } 1515 return (error); 1516 } 1517 1518 int 1519 getsockaddr(namp, uaddr, len) 1520 struct sockaddr **namp; 1521 caddr_t uaddr; 1522 size_t len; 1523 { 1524 struct sockaddr *sa; 1525 int error; 1526 1527 if (len > SOCK_MAXADDRLEN) 1528 return ENAMETOOLONG; 1529 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1530 error = copyin(uaddr, sa, len); 1531 if (error) { 1532 FREE(sa, M_SONAME); 1533 } else { 1534 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1535 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1536 sa->sa_family = sa->sa_len; 1537 #endif 1538 sa->sa_len = len; 1539 *namp = sa; 1540 } 1541 return error; 1542 } 1543 1544 /* 1545 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 1546 * XXX - The sf_buf functions are currently private to sendfile(2), so have 1547 * been made static, but may be useful in the future for doing zero-copy in 1548 * other parts of the networking code. 1549 */ 1550 static void 1551 sf_buf_init(void *arg) 1552 { 1553 int i; 1554 1555 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", MTX_DEF); 1556 mtx_lock(&sf_freelist.sf_lock); 1557 SLIST_INIT(&sf_freelist.sf_head); 1558 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE); 1559 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, 1560 M_NOWAIT | M_ZERO); 1561 for (i = 0; i < nsfbufs; i++) { 1562 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 1563 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); 1564 } 1565 sf_buf_alloc_want = 0; 1566 mtx_unlock(&sf_freelist.sf_lock); 1567 } 1568 1569 /* 1570 * Get an sf_buf from the freelist. Will block if none are available. 1571 */ 1572 static struct sf_buf * 1573 sf_buf_alloc() 1574 { 1575 struct sf_buf *sf; 1576 int error; 1577 1578 mtx_lock(&sf_freelist.sf_lock); 1579 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { 1580 sf_buf_alloc_want++; 1581 error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, 1582 "sfbufa", 0); 1583 sf_buf_alloc_want--; 1584 1585 /* 1586 * If we got a signal, don't risk going back to sleep. 1587 */ 1588 if (error) 1589 break; 1590 } 1591 if (sf != NULL) 1592 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); 1593 mtx_unlock(&sf_freelist.sf_lock); 1594 return (sf); 1595 } 1596 1597 #define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT]) 1598 1599 /* 1600 * Detatch mapped page and release resources back to the system. 1601 */ 1602 static void 1603 sf_buf_free(caddr_t addr, void *args) 1604 { 1605 struct sf_buf *sf; 1606 struct vm_page *m; 1607 1608 GIANT_REQUIRED; 1609 1610 sf = dtosf(addr); 1611 pmap_qremove((vm_offset_t)addr, 1); 1612 m = sf->m; 1613 vm_page_unwire(m, 0); 1614 /* 1615 * Check for the object going away on us. This can 1616 * happen since we don't hold a reference to it. 1617 * If so, we're responsible for freeing the page. 1618 */ 1619 if (m->wire_count == 0 && m->object == NULL) 1620 vm_page_free(m); 1621 sf->m = NULL; 1622 mtx_lock(&sf_freelist.sf_lock); 1623 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); 1624 if (sf_buf_alloc_want > 0) 1625 wakeup_one(&sf_freelist); 1626 mtx_unlock(&sf_freelist.sf_lock); 1627 } 1628 1629 /* 1630 * sendfile(2) 1631 * 1632 * MPSAFE 1633 * 1634 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1635 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1636 * 1637 * Send a file specified by 'fd' and starting at 'offset' to a socket 1638 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1639 * nbytes == 0. Optionally add a header and/or trailer to the socket 1640 * output. If specified, write the total number of bytes sent into *sbytes. 1641 * 1642 */ 1643 int 1644 sendfile(struct thread *td, struct sendfile_args *uap) 1645 { 1646 struct vnode *vp; 1647 struct vm_object *obj; 1648 struct socket *so = NULL; 1649 struct mbuf *m; 1650 struct sf_buf *sf; 1651 struct vm_page *pg; 1652 struct writev_args nuap; 1653 struct sf_hdtr hdtr; 1654 off_t off, xfsize, hdtr_size, sbytes = 0; 1655 int error, s; 1656 1657 mtx_lock(&Giant); 1658 1659 hdtr_size = 0; 1660 1661 /* 1662 * The descriptor must be a regular file and have a backing VM object. 1663 */ 1664 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1665 goto done; 1666 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1667 error = EINVAL; 1668 goto done; 1669 } 1670 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1671 goto done; 1672 if (so->so_type != SOCK_STREAM) { 1673 error = EINVAL; 1674 goto done; 1675 } 1676 if ((so->so_state & SS_ISCONNECTED) == 0) { 1677 error = ENOTCONN; 1678 goto done; 1679 } 1680 if (uap->offset < 0) { 1681 error = EINVAL; 1682 goto done; 1683 } 1684 1685 /* 1686 * If specified, get the pointer to the sf_hdtr struct for 1687 * any headers/trailers. 1688 */ 1689 if (uap->hdtr != NULL) { 1690 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1691 if (error) 1692 goto done; 1693 /* 1694 * Send any headers. Wimp out and use writev(2). 1695 */ 1696 if (hdtr.headers != NULL) { 1697 nuap.fd = uap->s; 1698 nuap.iovp = hdtr.headers; 1699 nuap.iovcnt = hdtr.hdr_cnt; 1700 error = writev(td, &nuap); 1701 if (error) 1702 goto done; 1703 hdtr_size += td->td_retval[0]; 1704 } 1705 } 1706 1707 /* 1708 * Protect against multiple writers to the socket. 1709 */ 1710 (void) sblock(&so->so_snd, M_WAITOK); 1711 1712 /* 1713 * Loop through the pages in the file, starting with the requested 1714 * offset. Get a file page (do I/O if necessary), map the file page 1715 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1716 * it on the socket. 1717 */ 1718 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1719 vm_pindex_t pindex; 1720 vm_offset_t pgoff; 1721 1722 pindex = OFF_TO_IDX(off); 1723 retry_lookup: 1724 /* 1725 * Calculate the amount to transfer. Not to exceed a page, 1726 * the EOF, or the passed in nbytes. 1727 */ 1728 xfsize = obj->un_pager.vnp.vnp_size - off; 1729 if (xfsize > PAGE_SIZE) 1730 xfsize = PAGE_SIZE; 1731 pgoff = (vm_offset_t)(off & PAGE_MASK); 1732 if (PAGE_SIZE - pgoff < xfsize) 1733 xfsize = PAGE_SIZE - pgoff; 1734 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1735 xfsize = uap->nbytes - sbytes; 1736 if (xfsize <= 0) 1737 break; 1738 /* 1739 * Optimize the non-blocking case by looking at the socket space 1740 * before going to the extra work of constituting the sf_buf. 1741 */ 1742 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1743 if (so->so_state & SS_CANTSENDMORE) 1744 error = EPIPE; 1745 else 1746 error = EAGAIN; 1747 sbunlock(&so->so_snd); 1748 goto done; 1749 } 1750 /* 1751 * Attempt to look up the page. 1752 * 1753 * Allocate if not found 1754 * 1755 * Wait and loop if busy. 1756 */ 1757 pg = vm_page_lookup(obj, pindex); 1758 1759 if (pg == NULL) { 1760 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL); 1761 if (pg == NULL) { 1762 VM_WAIT; 1763 goto retry_lookup; 1764 } 1765 vm_page_wakeup(pg); 1766 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) { 1767 goto retry_lookup; 1768 } 1769 1770 /* 1771 * Wire the page so it does not get ripped out from under 1772 * us. 1773 */ 1774 1775 vm_page_wire(pg); 1776 1777 /* 1778 * If page is not valid for what we need, initiate I/O 1779 */ 1780 1781 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1782 struct uio auio; 1783 struct iovec aiov; 1784 int bsize; 1785 1786 /* 1787 * Ensure that our page is still around when the I/O 1788 * completes. 1789 */ 1790 vm_page_io_start(pg); 1791 1792 /* 1793 * Get the page from backing store. 1794 */ 1795 bsize = vp->v_mount->mnt_stat.f_iosize; 1796 auio.uio_iov = &aiov; 1797 auio.uio_iovcnt = 1; 1798 aiov.iov_base = 0; 1799 aiov.iov_len = MAXBSIZE; 1800 auio.uio_resid = MAXBSIZE; 1801 auio.uio_offset = trunc_page(off); 1802 auio.uio_segflg = UIO_NOCOPY; 1803 auio.uio_rw = UIO_READ; 1804 auio.uio_td = td; 1805 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1806 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16), 1807 td->td_proc->p_ucred); 1808 VOP_UNLOCK(vp, 0, td); 1809 vm_page_flag_clear(pg, PG_ZERO); 1810 vm_page_io_finish(pg); 1811 if (error) { 1812 vm_page_unwire(pg, 0); 1813 /* 1814 * See if anyone else might know about this page. 1815 * If not and it is not valid, then free it. 1816 */ 1817 if (pg->wire_count == 0 && pg->valid == 0 && 1818 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1819 pg->hold_count == 0) { 1820 vm_page_busy(pg); 1821 vm_page_free(pg); 1822 } 1823 sbunlock(&so->so_snd); 1824 goto done; 1825 } 1826 } 1827 1828 1829 /* 1830 * Get a sendfile buf. We usually wait as long as necessary, 1831 * but this wait can be interrupted. 1832 */ 1833 if ((sf = sf_buf_alloc()) == NULL) { 1834 vm_page_unwire(pg, 0); 1835 if (pg->wire_count == 0 && pg->object == NULL) 1836 vm_page_free(pg); 1837 sbunlock(&so->so_snd); 1838 error = EINTR; 1839 goto done; 1840 } 1841 1842 /* 1843 * Allocate a kernel virtual page and insert the physical page 1844 * into it. 1845 */ 1846 sf->m = pg; 1847 pmap_qenter(sf->kva, &pg, 1); 1848 /* 1849 * Get an mbuf header and set it up as having external storage. 1850 */ 1851 MGETHDR(m, M_TRYWAIT, MT_DATA); 1852 if (m == NULL) { 1853 error = ENOBUFS; 1854 sf_buf_free((void *)sf->kva, NULL); 1855 sbunlock(&so->so_snd); 1856 goto done; 1857 } 1858 /* 1859 * Setup external storage for mbuf. 1860 */ 1861 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY, 1862 EXT_SFBUF); 1863 m->m_data = (char *) sf->kva + pgoff; 1864 m->m_pkthdr.len = m->m_len = xfsize; 1865 /* 1866 * Add the buffer to the socket buffer chain. 1867 */ 1868 s = splnet(); 1869 retry_space: 1870 /* 1871 * Make sure that the socket is still able to take more data. 1872 * CANTSENDMORE being true usually means that the connection 1873 * was closed. so_error is true when an error was sensed after 1874 * a previous send. 1875 * The state is checked after the page mapping and buffer 1876 * allocation above since those operations may block and make 1877 * any socket checks stale. From this point forward, nothing 1878 * blocks before the pru_send (or more accurately, any blocking 1879 * results in a loop back to here to re-check). 1880 */ 1881 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1882 if (so->so_state & SS_CANTSENDMORE) { 1883 error = EPIPE; 1884 } else { 1885 error = so->so_error; 1886 so->so_error = 0; 1887 } 1888 m_freem(m); 1889 sbunlock(&so->so_snd); 1890 splx(s); 1891 goto done; 1892 } 1893 /* 1894 * Wait for socket space to become available. We do this just 1895 * after checking the connection state above in order to avoid 1896 * a race condition with sbwait(). 1897 */ 1898 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1899 if (so->so_state & SS_NBIO) { 1900 m_freem(m); 1901 sbunlock(&so->so_snd); 1902 splx(s); 1903 error = EAGAIN; 1904 goto done; 1905 } 1906 error = sbwait(&so->so_snd); 1907 /* 1908 * An error from sbwait usually indicates that we've 1909 * been interrupted by a signal. If we've sent anything 1910 * then return bytes sent, otherwise return the error. 1911 */ 1912 if (error) { 1913 m_freem(m); 1914 sbunlock(&so->so_snd); 1915 splx(s); 1916 goto done; 1917 } 1918 goto retry_space; 1919 } 1920 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 1921 splx(s); 1922 if (error) { 1923 sbunlock(&so->so_snd); 1924 goto done; 1925 } 1926 } 1927 sbunlock(&so->so_snd); 1928 1929 /* 1930 * Send trailers. Wimp out and use writev(2). 1931 */ 1932 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1933 nuap.fd = uap->s; 1934 nuap.iovp = hdtr.trailers; 1935 nuap.iovcnt = hdtr.trl_cnt; 1936 error = writev(td, &nuap); 1937 if (error) 1938 goto done; 1939 hdtr_size += td->td_retval[0]; 1940 } 1941 1942 done: 1943 /* 1944 * If there was no error we have to clear td->td_retval[0] 1945 * because it may have been set by writev. 1946 */ 1947 if (error == 0) { 1948 td->td_retval[0] = 0; 1949 } 1950 if (uap->sbytes != NULL) { 1951 sbytes += hdtr_size; 1952 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 1953 } 1954 if (vp) 1955 vrele(vp); 1956 if (so) 1957 fputsock(so); 1958 mtx_unlock(&Giant); 1959 return (error); 1960 } 1961