1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 * $FreeBSD$ 38 */ 39 40 #include "opt_compat.h" 41 #include "opt_ktrace.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/lock.h> 47 #include <sys/mutex.h> 48 #include <sys/sysproto.h> 49 #include <sys/malloc.h> 50 #include <sys/filedesc.h> 51 #include <sys/event.h> 52 #include <sys/proc.h> 53 #include <sys/fcntl.h> 54 #include <sys/file.h> 55 #include <sys/lock.h> 56 #include <sys/mount.h> 57 #include <sys/mbuf.h> 58 #include <sys/protosw.h> 59 #include <sys/socket.h> 60 #include <sys/socketvar.h> 61 #include <sys/signalvar.h> 62 #include <sys/uio.h> 63 #include <sys/vnode.h> 64 #ifdef KTRACE 65 #include <sys/ktrace.h> 66 #endif 67 68 #include <vm/vm.h> 69 #include <vm/vm_object.h> 70 #include <vm/vm_page.h> 71 #include <vm/vm_pageout.h> 72 #include <vm/vm_kern.h> 73 #include <vm/vm_extern.h> 74 75 static void sf_buf_init(void *arg); 76 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) 77 struct sf_buf *sf_buf_alloc(void); 78 void sf_buf_free(caddr_t addr, void *args); 79 80 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 81 static int recvit(struct thread *td, int s, struct msghdr *mp, 82 caddr_t namelenp); 83 84 static int accept1(struct thread *td, struct accept_args *uap, int compat); 85 static int getsockname1(struct thread *td, struct getsockname_args *uap, 86 int compat); 87 static int getpeername1(struct thread *td, struct getpeername_args *uap, 88 int compat); 89 90 /* 91 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the 92 * sf_freelist head with the sf_lock mutex. 93 */ 94 static struct { 95 SLIST_HEAD(, sf_buf) sf_head; 96 struct mtx sf_lock; 97 } sf_freelist; 98 99 vm_offset_t sf_base; 100 struct sf_buf *sf_bufs; 101 u_int sf_buf_alloc_want; 102 103 /* 104 * System call interface to the socket abstraction. 105 */ 106 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 107 #define COMPAT_OLDSOCK 108 #endif 109 110 extern struct fileops socketops; 111 112 /* 113 * MPSAFE 114 */ 115 int 116 socket(td, uap) 117 struct thread *td; 118 register struct socket_args /* { 119 int domain; 120 int type; 121 int protocol; 122 } */ *uap; 123 { 124 struct filedesc *fdp; 125 struct socket *so; 126 struct file *fp; 127 int fd, error; 128 129 mtx_lock(&Giant); 130 fdp = td->td_proc->p_fd; 131 error = falloc(td, &fp, &fd); 132 if (error) 133 goto done2; 134 fhold(fp); 135 error = socreate(uap->domain, &so, uap->type, uap->protocol, 136 td->td_ucred, td); 137 FILEDESC_LOCK(fdp); 138 if (error) { 139 if (fdp->fd_ofiles[fd] == fp) { 140 fdp->fd_ofiles[fd] = NULL; 141 FILEDESC_UNLOCK(fdp); 142 fdrop(fp, td); 143 } else 144 FILEDESC_UNLOCK(fdp); 145 } else { 146 fp->f_data = (caddr_t)so; /* already has ref count */ 147 fp->f_flag = FREAD|FWRITE; 148 fp->f_ops = &socketops; 149 fp->f_type = DTYPE_SOCKET; 150 FILEDESC_UNLOCK(fdp); 151 td->td_retval[0] = fd; 152 } 153 fdrop(fp, td); 154 done2: 155 mtx_unlock(&Giant); 156 return (error); 157 } 158 159 /* 160 * MPSAFE 161 */ 162 /* ARGSUSED */ 163 int 164 bind(td, uap) 165 struct thread *td; 166 register struct bind_args /* { 167 int s; 168 caddr_t name; 169 int namelen; 170 } */ *uap; 171 { 172 struct socket *so; 173 struct sockaddr *sa; 174 int error; 175 176 mtx_lock(&Giant); 177 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 178 goto done2; 179 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 180 goto done1; 181 error = sobind(so, sa, td); 182 FREE(sa, M_SONAME); 183 done1: 184 fputsock(so); 185 done2: 186 mtx_unlock(&Giant); 187 return (error); 188 } 189 190 /* 191 * MPSAFE 192 */ 193 /* ARGSUSED */ 194 int 195 listen(td, uap) 196 struct thread *td; 197 register struct listen_args /* { 198 int s; 199 int backlog; 200 } */ *uap; 201 { 202 struct socket *so; 203 int error; 204 205 mtx_lock(&Giant); 206 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 207 error = solisten(so, uap->backlog, td); 208 fputsock(so); 209 } 210 mtx_unlock(&Giant); 211 return(error); 212 } 213 214 /* 215 * accept1() 216 * MPSAFE 217 */ 218 static int 219 accept1(td, uap, compat) 220 struct thread *td; 221 register struct accept_args /* { 222 int s; 223 caddr_t name; 224 int *anamelen; 225 } */ *uap; 226 int compat; 227 { 228 struct filedesc *fdp; 229 struct file *nfp = NULL; 230 struct sockaddr *sa; 231 int namelen, error, s; 232 struct socket *head, *so; 233 int fd; 234 u_int fflag; 235 236 mtx_lock(&Giant); 237 fdp = td->td_proc->p_fd; 238 if (uap->name) { 239 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen, 240 sizeof (namelen)); 241 if(error) 242 goto done2; 243 } 244 error = fgetsock(td, uap->s, &head, &fflag); 245 if (error) 246 goto done2; 247 s = splnet(); 248 if ((head->so_options & SO_ACCEPTCONN) == 0) { 249 splx(s); 250 error = EINVAL; 251 goto done; 252 } 253 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 254 splx(s); 255 error = EWOULDBLOCK; 256 goto done; 257 } 258 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 259 if (head->so_state & SS_CANTRCVMORE) { 260 head->so_error = ECONNABORTED; 261 break; 262 } 263 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH, 264 "accept", 0); 265 if (error) { 266 splx(s); 267 goto done; 268 } 269 } 270 if (head->so_error) { 271 error = head->so_error; 272 head->so_error = 0; 273 splx(s); 274 goto done; 275 } 276 277 /* 278 * At this point we know that there is at least one connection 279 * ready to be accepted. Remove it from the queue prior to 280 * allocating the file descriptor for it since falloc() may 281 * block allowing another process to accept the connection 282 * instead. 283 */ 284 so = TAILQ_FIRST(&head->so_comp); 285 TAILQ_REMOVE(&head->so_comp, so, so_list); 286 head->so_qlen--; 287 288 error = falloc(td, &nfp, &fd); 289 if (error) { 290 /* 291 * Probably ran out of file descriptors. Put the 292 * unaccepted connection back onto the queue and 293 * do another wakeup so some other process might 294 * have a chance at it. 295 */ 296 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 297 head->so_qlen++; 298 wakeup_one(&head->so_timeo); 299 splx(s); 300 goto done; 301 } 302 fhold(nfp); 303 td->td_retval[0] = fd; 304 305 /* connection has been removed from the listen queue */ 306 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 307 308 so->so_state &= ~SS_COMP; 309 so->so_head = NULL; 310 if (head->so_sigio != NULL) 311 fsetown(fgetown(head->so_sigio), &so->so_sigio); 312 313 FILE_LOCK(nfp); 314 soref(so); /* file descriptor reference */ 315 nfp->f_data = (caddr_t)so; /* nfp has ref count from falloc */ 316 nfp->f_flag = fflag; 317 nfp->f_ops = &socketops; 318 nfp->f_type = DTYPE_SOCKET; 319 FILE_UNLOCK(nfp); 320 sa = 0; 321 error = soaccept(so, &sa); 322 if (error) { 323 /* 324 * return a namelen of zero for older code which might 325 * ignore the return value from accept. 326 */ 327 if (uap->name != NULL) { 328 namelen = 0; 329 (void) copyout((caddr_t)&namelen, 330 (caddr_t)uap->anamelen, sizeof(*uap->anamelen)); 331 } 332 goto noconnection; 333 } 334 if (sa == NULL) { 335 namelen = 0; 336 if (uap->name) 337 goto gotnoname; 338 splx(s); 339 error = 0; 340 goto done; 341 } 342 if (uap->name) { 343 /* check sa_len before it is destroyed */ 344 if (namelen > sa->sa_len) 345 namelen = sa->sa_len; 346 #ifdef COMPAT_OLDSOCK 347 if (compat) 348 ((struct osockaddr *)sa)->sa_family = 349 sa->sa_family; 350 #endif 351 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen); 352 if (!error) 353 gotnoname: 354 error = copyout((caddr_t)&namelen, 355 (caddr_t)uap->anamelen, sizeof (*uap->anamelen)); 356 } 357 noconnection: 358 if (sa) 359 FREE(sa, M_SONAME); 360 361 /* 362 * close the new descriptor, assuming someone hasn't ripped it 363 * out from under us. 364 */ 365 if (error) { 366 FILEDESC_LOCK(fdp); 367 if (fdp->fd_ofiles[fd] == nfp) { 368 fdp->fd_ofiles[fd] = NULL; 369 FILEDESC_UNLOCK(fdp); 370 fdrop(nfp, td); 371 } else { 372 FILEDESC_UNLOCK(fdp); 373 } 374 } 375 splx(s); 376 377 /* 378 * Release explicitly held references before returning. 379 */ 380 done: 381 if (nfp != NULL) 382 fdrop(nfp, td); 383 fputsock(head); 384 done2: 385 mtx_unlock(&Giant); 386 return (error); 387 } 388 389 /* 390 * MPSAFE (accept1() is MPSAFE) 391 */ 392 int 393 accept(td, uap) 394 struct thread *td; 395 struct accept_args *uap; 396 { 397 398 return (accept1(td, uap, 0)); 399 } 400 401 #ifdef COMPAT_OLDSOCK 402 /* 403 * MPSAFE (accept1() is MPSAFE) 404 */ 405 int 406 oaccept(td, uap) 407 struct thread *td; 408 struct accept_args *uap; 409 { 410 411 return (accept1(td, uap, 1)); 412 } 413 #endif /* COMPAT_OLDSOCK */ 414 415 /* 416 * MPSAFE 417 */ 418 /* ARGSUSED */ 419 int 420 connect(td, uap) 421 struct thread *td; 422 register struct connect_args /* { 423 int s; 424 caddr_t name; 425 int namelen; 426 } */ *uap; 427 { 428 struct socket *so; 429 struct sockaddr *sa; 430 int error, s; 431 432 mtx_lock(&Giant); 433 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 434 goto done2; 435 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 436 error = EALREADY; 437 goto done1; 438 } 439 error = getsockaddr(&sa, uap->name, uap->namelen); 440 if (error) 441 goto done1; 442 error = soconnect(so, sa, td); 443 if (error) 444 goto bad; 445 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 446 FREE(sa, M_SONAME); 447 error = EINPROGRESS; 448 goto done1; 449 } 450 s = splnet(); 451 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 452 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, "connec", 0); 453 if (error) 454 break; 455 } 456 if (error == 0) { 457 error = so->so_error; 458 so->so_error = 0; 459 } 460 splx(s); 461 bad: 462 so->so_state &= ~SS_ISCONNECTING; 463 FREE(sa, M_SONAME); 464 if (error == ERESTART) 465 error = EINTR; 466 done1: 467 fputsock(so); 468 done2: 469 mtx_unlock(&Giant); 470 return (error); 471 } 472 473 /* 474 * MPSAFE 475 */ 476 int 477 socketpair(td, uap) 478 struct thread *td; 479 register struct socketpair_args /* { 480 int domain; 481 int type; 482 int protocol; 483 int *rsv; 484 } */ *uap; 485 { 486 register struct filedesc *fdp = td->td_proc->p_fd; 487 struct file *fp1, *fp2; 488 struct socket *so1, *so2; 489 int fd, error, sv[2]; 490 491 mtx_lock(&Giant); 492 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 493 td->td_ucred, td); 494 if (error) 495 goto done2; 496 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 497 td->td_ucred, td); 498 if (error) 499 goto free1; 500 error = falloc(td, &fp1, &fd); 501 if (error) 502 goto free2; 503 fhold(fp1); 504 sv[0] = fd; 505 fp1->f_data = (caddr_t)so1; /* so1 already has ref count */ 506 error = falloc(td, &fp2, &fd); 507 if (error) 508 goto free3; 509 fhold(fp2); 510 fp2->f_data = (caddr_t)so2; /* so2 already has ref count */ 511 sv[1] = fd; 512 error = soconnect2(so1, so2); 513 if (error) 514 goto free4; 515 if (uap->type == SOCK_DGRAM) { 516 /* 517 * Datagram socket connection is asymmetric. 518 */ 519 error = soconnect2(so2, so1); 520 if (error) 521 goto free4; 522 } 523 FILE_LOCK(fp1); 524 fp1->f_flag = FREAD|FWRITE; 525 fp1->f_ops = &socketops; 526 fp1->f_type = DTYPE_SOCKET; 527 FILE_UNLOCK(fp1); 528 FILE_LOCK(fp2); 529 fp2->f_flag = FREAD|FWRITE; 530 fp2->f_ops = &socketops; 531 fp2->f_type = DTYPE_SOCKET; 532 FILE_UNLOCK(fp2); 533 error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int)); 534 fdrop(fp1, td); 535 fdrop(fp2, td); 536 goto done2; 537 free4: 538 FILEDESC_LOCK(fdp); 539 if (fdp->fd_ofiles[sv[1]] == fp2) { 540 fdp->fd_ofiles[sv[1]] = NULL; 541 FILEDESC_UNLOCK(fdp); 542 fdrop(fp2, td); 543 } else 544 FILEDESC_UNLOCK(fdp); 545 fdrop(fp2, td); 546 free3: 547 FILEDESC_LOCK(fdp); 548 if (fdp->fd_ofiles[sv[0]] == fp1) { 549 fdp->fd_ofiles[sv[0]] = NULL; 550 FILEDESC_UNLOCK(fdp); 551 fdrop(fp1, td); 552 } else 553 FILEDESC_UNLOCK(fdp); 554 fdrop(fp1, td); 555 free2: 556 (void)soclose(so2); 557 free1: 558 (void)soclose(so1); 559 done2: 560 mtx_unlock(&Giant); 561 return (error); 562 } 563 564 static int 565 sendit(td, s, mp, flags) 566 register struct thread *td; 567 int s; 568 register struct msghdr *mp; 569 int flags; 570 { 571 struct uio auio; 572 register struct iovec *iov; 573 register int i; 574 struct mbuf *control; 575 struct sockaddr *to = NULL; 576 int len, error; 577 struct socket *so; 578 #ifdef KTRACE 579 struct iovec *ktriov = NULL; 580 struct uio ktruio; 581 int iovlen; 582 #endif 583 584 if ((error = fgetsock(td, s, &so, NULL)) != 0) 585 return (error); 586 auio.uio_iov = mp->msg_iov; 587 auio.uio_iovcnt = mp->msg_iovlen; 588 auio.uio_segflg = UIO_USERSPACE; 589 auio.uio_rw = UIO_WRITE; 590 auio.uio_td = td; 591 auio.uio_offset = 0; /* XXX */ 592 auio.uio_resid = 0; 593 iov = mp->msg_iov; 594 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 595 if ((auio.uio_resid += iov->iov_len) < 0) { 596 error = EINVAL; 597 goto bad; 598 } 599 } 600 if (mp->msg_name) { 601 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 602 if (error) 603 goto bad; 604 } 605 if (mp->msg_control) { 606 if (mp->msg_controllen < sizeof(struct cmsghdr) 607 #ifdef COMPAT_OLDSOCK 608 && mp->msg_flags != MSG_COMPAT 609 #endif 610 ) { 611 error = EINVAL; 612 goto bad; 613 } 614 error = sockargs(&control, mp->msg_control, 615 mp->msg_controllen, MT_CONTROL); 616 if (error) 617 goto bad; 618 #ifdef COMPAT_OLDSOCK 619 if (mp->msg_flags == MSG_COMPAT) { 620 register struct cmsghdr *cm; 621 622 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 623 if (control == 0) { 624 error = ENOBUFS; 625 goto bad; 626 } else { 627 cm = mtod(control, struct cmsghdr *); 628 cm->cmsg_len = control->m_len; 629 cm->cmsg_level = SOL_SOCKET; 630 cm->cmsg_type = SCM_RIGHTS; 631 } 632 } 633 #endif 634 } else { 635 control = 0; 636 } 637 #ifdef KTRACE 638 if (KTRPOINT(td, KTR_GENIO)) { 639 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 640 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 641 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 642 ktruio = auio; 643 } 644 #endif 645 len = auio.uio_resid; 646 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control, 647 flags, td); 648 if (error) { 649 if (auio.uio_resid != len && (error == ERESTART || 650 error == EINTR || error == EWOULDBLOCK)) 651 error = 0; 652 /* Generation of SIGPIPE can be controlled per socket */ 653 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 654 PROC_LOCK(td->td_proc); 655 psignal(td->td_proc, SIGPIPE); 656 PROC_UNLOCK(td->td_proc); 657 } 658 } 659 if (error == 0) 660 td->td_retval[0] = len - auio.uio_resid; 661 #ifdef KTRACE 662 if (ktriov != NULL) { 663 if (error == 0) { 664 ktruio.uio_iov = ktriov; 665 ktruio.uio_resid = td->td_retval[0]; 666 ktrgenio(s, UIO_WRITE, &ktruio, error); 667 } 668 FREE(ktriov, M_TEMP); 669 } 670 #endif 671 bad: 672 fputsock(so); 673 if (to) 674 FREE(to, M_SONAME); 675 return (error); 676 } 677 678 /* 679 * MPSAFE 680 */ 681 int 682 sendto(td, uap) 683 struct thread *td; 684 register struct sendto_args /* { 685 int s; 686 caddr_t buf; 687 size_t len; 688 int flags; 689 caddr_t to; 690 int tolen; 691 } */ *uap; 692 { 693 struct msghdr msg; 694 struct iovec aiov; 695 int error; 696 697 msg.msg_name = uap->to; 698 msg.msg_namelen = uap->tolen; 699 msg.msg_iov = &aiov; 700 msg.msg_iovlen = 1; 701 msg.msg_control = 0; 702 #ifdef COMPAT_OLDSOCK 703 msg.msg_flags = 0; 704 #endif 705 aiov.iov_base = uap->buf; 706 aiov.iov_len = uap->len; 707 mtx_lock(&Giant); 708 error = sendit(td, uap->s, &msg, uap->flags); 709 mtx_unlock(&Giant); 710 return (error); 711 } 712 713 #ifdef COMPAT_OLDSOCK 714 /* 715 * MPSAFE 716 */ 717 int 718 osend(td, uap) 719 struct thread *td; 720 register struct osend_args /* { 721 int s; 722 caddr_t buf; 723 int len; 724 int flags; 725 } */ *uap; 726 { 727 struct msghdr msg; 728 struct iovec aiov; 729 int error; 730 731 msg.msg_name = 0; 732 msg.msg_namelen = 0; 733 msg.msg_iov = &aiov; 734 msg.msg_iovlen = 1; 735 aiov.iov_base = uap->buf; 736 aiov.iov_len = uap->len; 737 msg.msg_control = 0; 738 msg.msg_flags = 0; 739 mtx_lock(&Giant); 740 error = sendit(td, uap->s, &msg, uap->flags); 741 mtx_unlock(&Giant); 742 return (error); 743 } 744 745 /* 746 * MPSAFE 747 */ 748 int 749 osendmsg(td, uap) 750 struct thread *td; 751 register struct osendmsg_args /* { 752 int s; 753 caddr_t msg; 754 int flags; 755 } */ *uap; 756 { 757 struct msghdr msg; 758 struct iovec aiov[UIO_SMALLIOV], *iov; 759 int error; 760 761 mtx_lock(&Giant); 762 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr)); 763 if (error) 764 goto done2; 765 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 766 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 767 error = EMSGSIZE; 768 goto done2; 769 } 770 MALLOC(iov, struct iovec *, 771 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 772 M_WAITOK); 773 } else { 774 iov = aiov; 775 } 776 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 777 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 778 if (error) 779 goto done; 780 msg.msg_flags = MSG_COMPAT; 781 msg.msg_iov = iov; 782 error = sendit(td, uap->s, &msg, uap->flags); 783 done: 784 if (iov != aiov) 785 FREE(iov, M_IOV); 786 done2: 787 mtx_unlock(&Giant); 788 return (error); 789 } 790 #endif 791 792 /* 793 * MPSAFE 794 */ 795 int 796 sendmsg(td, uap) 797 struct thread *td; 798 register struct sendmsg_args /* { 799 int s; 800 caddr_t msg; 801 int flags; 802 } */ *uap; 803 { 804 struct msghdr msg; 805 struct iovec aiov[UIO_SMALLIOV], *iov; 806 int error; 807 808 mtx_lock(&Giant); 809 error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg)); 810 if (error) 811 goto done2; 812 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 813 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 814 error = EMSGSIZE; 815 goto done2; 816 } 817 MALLOC(iov, struct iovec *, 818 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 819 M_WAITOK); 820 } else { 821 iov = aiov; 822 } 823 if (msg.msg_iovlen && 824 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 825 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 826 goto done; 827 msg.msg_iov = iov; 828 #ifdef COMPAT_OLDSOCK 829 msg.msg_flags = 0; 830 #endif 831 error = sendit(td, uap->s, &msg, uap->flags); 832 done: 833 if (iov != aiov) 834 FREE(iov, M_IOV); 835 done2: 836 mtx_unlock(&Giant); 837 return (error); 838 } 839 840 static int 841 recvit(td, s, mp, namelenp) 842 register struct thread *td; 843 int s; 844 register struct msghdr *mp; 845 caddr_t namelenp; 846 { 847 struct uio auio; 848 register struct iovec *iov; 849 register int i; 850 int len, error; 851 struct mbuf *m, *control = 0; 852 caddr_t ctlbuf; 853 struct socket *so; 854 struct sockaddr *fromsa = 0; 855 #ifdef KTRACE 856 struct iovec *ktriov = NULL; 857 struct uio ktruio; 858 int iovlen; 859 #endif 860 861 if ((error = fgetsock(td, s, &so, NULL)) != 0) 862 return (error); 863 auio.uio_iov = mp->msg_iov; 864 auio.uio_iovcnt = mp->msg_iovlen; 865 auio.uio_segflg = UIO_USERSPACE; 866 auio.uio_rw = UIO_READ; 867 auio.uio_td = td; 868 auio.uio_offset = 0; /* XXX */ 869 auio.uio_resid = 0; 870 iov = mp->msg_iov; 871 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 872 if ((auio.uio_resid += iov->iov_len) < 0) { 873 fputsock(so); 874 return (EINVAL); 875 } 876 } 877 #ifdef KTRACE 878 if (KTRPOINT(td, KTR_GENIO)) { 879 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 880 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 881 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 882 ktruio = auio; 883 } 884 #endif 885 len = auio.uio_resid; 886 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 887 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 888 &mp->msg_flags); 889 if (error) { 890 if (auio.uio_resid != len && (error == ERESTART || 891 error == EINTR || error == EWOULDBLOCK)) 892 error = 0; 893 } 894 #ifdef KTRACE 895 if (ktriov != NULL) { 896 if (error == 0) { 897 ktruio.uio_iov = ktriov; 898 ktruio.uio_resid = len - auio.uio_resid; 899 ktrgenio(s, UIO_READ, &ktruio, error); 900 } 901 FREE(ktriov, M_TEMP); 902 } 903 #endif 904 if (error) 905 goto out; 906 td->td_retval[0] = len - auio.uio_resid; 907 if (mp->msg_name) { 908 len = mp->msg_namelen; 909 if (len <= 0 || fromsa == 0) 910 len = 0; 911 else { 912 #ifndef MIN 913 #define MIN(a,b) ((a)>(b)?(b):(a)) 914 #endif 915 /* save sa_len before it is destroyed by MSG_COMPAT */ 916 len = MIN(len, fromsa->sa_len); 917 #ifdef COMPAT_OLDSOCK 918 if (mp->msg_flags & MSG_COMPAT) 919 ((struct osockaddr *)fromsa)->sa_family = 920 fromsa->sa_family; 921 #endif 922 error = copyout(fromsa, 923 (caddr_t)mp->msg_name, (unsigned)len); 924 if (error) 925 goto out; 926 } 927 mp->msg_namelen = len; 928 if (namelenp && 929 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) { 930 #ifdef COMPAT_OLDSOCK 931 if (mp->msg_flags & MSG_COMPAT) 932 error = 0; /* old recvfrom didn't check */ 933 else 934 #endif 935 goto out; 936 } 937 } 938 if (mp->msg_control) { 939 #ifdef COMPAT_OLDSOCK 940 /* 941 * We assume that old recvmsg calls won't receive access 942 * rights and other control info, esp. as control info 943 * is always optional and those options didn't exist in 4.3. 944 * If we receive rights, trim the cmsghdr; anything else 945 * is tossed. 946 */ 947 if (control && mp->msg_flags & MSG_COMPAT) { 948 if (mtod(control, struct cmsghdr *)->cmsg_level != 949 SOL_SOCKET || 950 mtod(control, struct cmsghdr *)->cmsg_type != 951 SCM_RIGHTS) { 952 mp->msg_controllen = 0; 953 goto out; 954 } 955 control->m_len -= sizeof (struct cmsghdr); 956 control->m_data += sizeof (struct cmsghdr); 957 } 958 #endif 959 len = mp->msg_controllen; 960 m = control; 961 mp->msg_controllen = 0; 962 ctlbuf = (caddr_t) mp->msg_control; 963 964 while (m && len > 0) { 965 unsigned int tocopy; 966 967 if (len >= m->m_len) 968 tocopy = m->m_len; 969 else { 970 mp->msg_flags |= MSG_CTRUNC; 971 tocopy = len; 972 } 973 974 if ((error = copyout((caddr_t)mtod(m, caddr_t), 975 ctlbuf, tocopy)) != 0) 976 goto out; 977 978 ctlbuf += tocopy; 979 len -= tocopy; 980 m = m->m_next; 981 } 982 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 983 } 984 out: 985 fputsock(so); 986 if (fromsa) 987 FREE(fromsa, M_SONAME); 988 if (control) 989 m_freem(control); 990 return (error); 991 } 992 993 /* 994 * MPSAFE 995 */ 996 int 997 recvfrom(td, uap) 998 struct thread *td; 999 register struct recvfrom_args /* { 1000 int s; 1001 caddr_t buf; 1002 size_t len; 1003 int flags; 1004 caddr_t from; 1005 int *fromlenaddr; 1006 } */ *uap; 1007 { 1008 struct msghdr msg; 1009 struct iovec aiov; 1010 int error; 1011 1012 mtx_lock(&Giant); 1013 if (uap->fromlenaddr) { 1014 error = copyin((caddr_t)uap->fromlenaddr, 1015 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen)); 1016 if (error) 1017 goto done2; 1018 } else { 1019 msg.msg_namelen = 0; 1020 } 1021 msg.msg_name = uap->from; 1022 msg.msg_iov = &aiov; 1023 msg.msg_iovlen = 1; 1024 aiov.iov_base = uap->buf; 1025 aiov.iov_len = uap->len; 1026 msg.msg_control = 0; 1027 msg.msg_flags = uap->flags; 1028 error = recvit(td, uap->s, &msg, (caddr_t)uap->fromlenaddr); 1029 done2: 1030 mtx_unlock(&Giant); 1031 return(error); 1032 } 1033 1034 #ifdef COMPAT_OLDSOCK 1035 /* 1036 * MPSAFE 1037 */ 1038 int 1039 orecvfrom(td, uap) 1040 struct thread *td; 1041 struct recvfrom_args *uap; 1042 { 1043 1044 uap->flags |= MSG_COMPAT; 1045 return (recvfrom(td, uap)); 1046 } 1047 #endif 1048 1049 1050 #ifdef COMPAT_OLDSOCK 1051 /* 1052 * MPSAFE 1053 */ 1054 int 1055 orecv(td, uap) 1056 struct thread *td; 1057 register struct orecv_args /* { 1058 int s; 1059 caddr_t buf; 1060 int len; 1061 int flags; 1062 } */ *uap; 1063 { 1064 struct msghdr msg; 1065 struct iovec aiov; 1066 int error; 1067 1068 mtx_lock(&Giant); 1069 msg.msg_name = 0; 1070 msg.msg_namelen = 0; 1071 msg.msg_iov = &aiov; 1072 msg.msg_iovlen = 1; 1073 aiov.iov_base = uap->buf; 1074 aiov.iov_len = uap->len; 1075 msg.msg_control = 0; 1076 msg.msg_flags = uap->flags; 1077 error = recvit(td, uap->s, &msg, (caddr_t)0); 1078 mtx_unlock(&Giant); 1079 return (error); 1080 } 1081 1082 /* 1083 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1084 * overlays the new one, missing only the flags, and with the (old) access 1085 * rights where the control fields are now. 1086 * 1087 * MPSAFE 1088 */ 1089 int 1090 orecvmsg(td, uap) 1091 struct thread *td; 1092 register struct orecvmsg_args /* { 1093 int s; 1094 struct omsghdr *msg; 1095 int flags; 1096 } */ *uap; 1097 { 1098 struct msghdr msg; 1099 struct iovec aiov[UIO_SMALLIOV], *iov; 1100 int error; 1101 1102 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, 1103 sizeof (struct omsghdr)); 1104 if (error) 1105 return (error); 1106 1107 mtx_lock(&Giant); 1108 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1109 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1110 error = EMSGSIZE; 1111 goto done2; 1112 } 1113 MALLOC(iov, struct iovec *, 1114 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1115 M_WAITOK); 1116 } else { 1117 iov = aiov; 1118 } 1119 msg.msg_flags = uap->flags | MSG_COMPAT; 1120 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 1121 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1122 if (error) 1123 goto done; 1124 msg.msg_iov = iov; 1125 error = recvit(td, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen); 1126 1127 if (msg.msg_controllen && error == 0) 1128 error = copyout((caddr_t)&msg.msg_controllen, 1129 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int)); 1130 done: 1131 if (iov != aiov) 1132 FREE(iov, M_IOV); 1133 done2: 1134 mtx_unlock(&Giant); 1135 return (error); 1136 } 1137 #endif 1138 1139 /* 1140 * MPSAFE 1141 */ 1142 int 1143 recvmsg(td, uap) 1144 struct thread *td; 1145 register struct recvmsg_args /* { 1146 int s; 1147 struct msghdr *msg; 1148 int flags; 1149 } */ *uap; 1150 { 1151 struct msghdr msg; 1152 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1153 register int error; 1154 1155 mtx_lock(&Giant); 1156 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg)); 1157 if (error) 1158 goto done2; 1159 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1160 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1161 error = EMSGSIZE; 1162 goto done2; 1163 } 1164 MALLOC(iov, struct iovec *, 1165 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1166 M_WAITOK); 1167 } else { 1168 iov = aiov; 1169 } 1170 #ifdef COMPAT_OLDSOCK 1171 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1172 #else 1173 msg.msg_flags = uap->flags; 1174 #endif 1175 uiov = msg.msg_iov; 1176 msg.msg_iov = iov; 1177 error = copyin((caddr_t)uiov, (caddr_t)iov, 1178 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1179 if (error) 1180 goto done; 1181 error = recvit(td, uap->s, &msg, (caddr_t)0); 1182 if (!error) { 1183 msg.msg_iov = uiov; 1184 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg)); 1185 } 1186 done: 1187 if (iov != aiov) 1188 FREE(iov, M_IOV); 1189 done2: 1190 mtx_unlock(&Giant); 1191 return (error); 1192 } 1193 1194 /* 1195 * MPSAFE 1196 */ 1197 /* ARGSUSED */ 1198 int 1199 shutdown(td, uap) 1200 struct thread *td; 1201 register struct shutdown_args /* { 1202 int s; 1203 int how; 1204 } */ *uap; 1205 { 1206 struct socket *so; 1207 int error; 1208 1209 mtx_lock(&Giant); 1210 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1211 error = soshutdown(so, uap->how); 1212 fputsock(so); 1213 } 1214 mtx_unlock(&Giant); 1215 return(error); 1216 } 1217 1218 /* 1219 * MPSAFE 1220 */ 1221 /* ARGSUSED */ 1222 int 1223 setsockopt(td, uap) 1224 struct thread *td; 1225 register struct setsockopt_args /* { 1226 int s; 1227 int level; 1228 int name; 1229 caddr_t val; 1230 int valsize; 1231 } */ *uap; 1232 { 1233 struct socket *so; 1234 struct sockopt sopt; 1235 int error; 1236 1237 if (uap->val == 0 && uap->valsize != 0) 1238 return (EFAULT); 1239 if (uap->valsize < 0) 1240 return (EINVAL); 1241 1242 mtx_lock(&Giant); 1243 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1244 sopt.sopt_dir = SOPT_SET; 1245 sopt.sopt_level = uap->level; 1246 sopt.sopt_name = uap->name; 1247 sopt.sopt_val = uap->val; 1248 sopt.sopt_valsize = uap->valsize; 1249 sopt.sopt_td = td; 1250 error = sosetopt(so, &sopt); 1251 fputsock(so); 1252 } 1253 mtx_unlock(&Giant); 1254 return(error); 1255 } 1256 1257 /* 1258 * MPSAFE 1259 */ 1260 /* ARGSUSED */ 1261 int 1262 getsockopt(td, uap) 1263 struct thread *td; 1264 register struct getsockopt_args /* { 1265 int s; 1266 int level; 1267 int name; 1268 caddr_t val; 1269 int *avalsize; 1270 } */ *uap; 1271 { 1272 int valsize, error; 1273 struct socket *so; 1274 struct sockopt sopt; 1275 1276 mtx_lock(&Giant); 1277 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1278 goto done2; 1279 if (uap->val) { 1280 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize, 1281 sizeof (valsize)); 1282 if (error) 1283 goto done1; 1284 if (valsize < 0) { 1285 error = EINVAL; 1286 goto done1; 1287 } 1288 } else { 1289 valsize = 0; 1290 } 1291 1292 sopt.sopt_dir = SOPT_GET; 1293 sopt.sopt_level = uap->level; 1294 sopt.sopt_name = uap->name; 1295 sopt.sopt_val = uap->val; 1296 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1297 sopt.sopt_td = td; 1298 1299 error = sogetopt(so, &sopt); 1300 if (error == 0) { 1301 valsize = sopt.sopt_valsize; 1302 error = copyout((caddr_t)&valsize, 1303 (caddr_t)uap->avalsize, sizeof (valsize)); 1304 } 1305 done1: 1306 fputsock(so); 1307 done2: 1308 mtx_unlock(&Giant); 1309 return (error); 1310 } 1311 1312 /* 1313 * getsockname1() - Get socket name. 1314 * 1315 * MPSAFE 1316 */ 1317 /* ARGSUSED */ 1318 static int 1319 getsockname1(td, uap, compat) 1320 struct thread *td; 1321 register struct getsockname_args /* { 1322 int fdes; 1323 caddr_t asa; 1324 int *alen; 1325 } */ *uap; 1326 int compat; 1327 { 1328 struct socket *so; 1329 struct sockaddr *sa; 1330 int len, error; 1331 1332 mtx_lock(&Giant); 1333 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1334 goto done2; 1335 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); 1336 if (error) 1337 goto done1; 1338 sa = 0; 1339 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1340 if (error) 1341 goto bad; 1342 if (sa == 0) { 1343 len = 0; 1344 goto gotnothing; 1345 } 1346 1347 len = MIN(len, sa->sa_len); 1348 #ifdef COMPAT_OLDSOCK 1349 if (compat) 1350 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1351 #endif 1352 error = copyout(sa, (caddr_t)uap->asa, (u_int)len); 1353 if (error == 0) 1354 gotnothing: 1355 error = copyout((caddr_t)&len, (caddr_t)uap->alen, 1356 sizeof (len)); 1357 bad: 1358 if (sa) 1359 FREE(sa, M_SONAME); 1360 done1: 1361 fputsock(so); 1362 done2: 1363 mtx_unlock(&Giant); 1364 return (error); 1365 } 1366 1367 /* 1368 * MPSAFE 1369 */ 1370 int 1371 getsockname(td, uap) 1372 struct thread *td; 1373 struct getsockname_args *uap; 1374 { 1375 1376 return (getsockname1(td, uap, 0)); 1377 } 1378 1379 #ifdef COMPAT_OLDSOCK 1380 /* 1381 * MPSAFE 1382 */ 1383 int 1384 ogetsockname(td, uap) 1385 struct thread *td; 1386 struct getsockname_args *uap; 1387 { 1388 1389 return (getsockname1(td, uap, 1)); 1390 } 1391 #endif /* COMPAT_OLDSOCK */ 1392 1393 /* 1394 * getpeername1() - Get name of peer for connected socket. 1395 * 1396 * MPSAFE 1397 */ 1398 /* ARGSUSED */ 1399 static int 1400 getpeername1(td, uap, compat) 1401 struct thread *td; 1402 register struct getpeername_args /* { 1403 int fdes; 1404 caddr_t asa; 1405 int *alen; 1406 } */ *uap; 1407 int compat; 1408 { 1409 struct socket *so; 1410 struct sockaddr *sa; 1411 int len, error; 1412 1413 mtx_lock(&Giant); 1414 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1415 goto done2; 1416 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1417 error = ENOTCONN; 1418 goto done1; 1419 } 1420 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); 1421 if (error) 1422 goto done1; 1423 sa = 0; 1424 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1425 if (error) 1426 goto bad; 1427 if (sa == 0) { 1428 len = 0; 1429 goto gotnothing; 1430 } 1431 len = MIN(len, sa->sa_len); 1432 #ifdef COMPAT_OLDSOCK 1433 if (compat) 1434 ((struct osockaddr *)sa)->sa_family = 1435 sa->sa_family; 1436 #endif 1437 error = copyout(sa, (caddr_t)uap->asa, (u_int)len); 1438 if (error) 1439 goto bad; 1440 gotnothing: 1441 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len)); 1442 bad: 1443 if (sa) 1444 FREE(sa, M_SONAME); 1445 done1: 1446 fputsock(so); 1447 done2: 1448 mtx_unlock(&Giant); 1449 return (error); 1450 } 1451 1452 /* 1453 * MPSAFE 1454 */ 1455 int 1456 getpeername(td, uap) 1457 struct thread *td; 1458 struct getpeername_args *uap; 1459 { 1460 1461 return (getpeername1(td, uap, 0)); 1462 } 1463 1464 #ifdef COMPAT_OLDSOCK 1465 /* 1466 * MPSAFE 1467 */ 1468 int 1469 ogetpeername(td, uap) 1470 struct thread *td; 1471 struct ogetpeername_args *uap; 1472 { 1473 1474 /* XXX uap should have type `getpeername_args *' to begin with. */ 1475 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1476 } 1477 #endif /* COMPAT_OLDSOCK */ 1478 1479 int 1480 sockargs(mp, buf, buflen, type) 1481 struct mbuf **mp; 1482 caddr_t buf; 1483 int buflen, type; 1484 { 1485 register struct sockaddr *sa; 1486 register struct mbuf *m; 1487 int error; 1488 1489 if ((u_int)buflen > MLEN) { 1490 #ifdef COMPAT_OLDSOCK 1491 if (type == MT_SONAME && (u_int)buflen <= 112) 1492 buflen = MLEN; /* unix domain compat. hack */ 1493 else 1494 #endif 1495 return (EINVAL); 1496 } 1497 m = m_get(M_TRYWAIT, type); 1498 if (m == NULL) 1499 return (ENOBUFS); 1500 m->m_len = buflen; 1501 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1502 if (error) 1503 (void) m_free(m); 1504 else { 1505 *mp = m; 1506 if (type == MT_SONAME) { 1507 sa = mtod(m, struct sockaddr *); 1508 1509 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1510 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1511 sa->sa_family = sa->sa_len; 1512 #endif 1513 sa->sa_len = buflen; 1514 } 1515 } 1516 return (error); 1517 } 1518 1519 int 1520 getsockaddr(namp, uaddr, len) 1521 struct sockaddr **namp; 1522 caddr_t uaddr; 1523 size_t len; 1524 { 1525 struct sockaddr *sa; 1526 int error; 1527 1528 if (len > SOCK_MAXADDRLEN) 1529 return ENAMETOOLONG; 1530 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1531 error = copyin(uaddr, sa, len); 1532 if (error) { 1533 FREE(sa, M_SONAME); 1534 } else { 1535 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1536 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1537 sa->sa_family = sa->sa_len; 1538 #endif 1539 sa->sa_len = len; 1540 *namp = sa; 1541 } 1542 return error; 1543 } 1544 1545 /* 1546 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 1547 * XXX - The sf_buf functions are currently private to sendfile(2), so have 1548 * been made static, but may be useful in the future for doing zero-copy in 1549 * other parts of the networking code. 1550 */ 1551 static void 1552 sf_buf_init(void *arg) 1553 { 1554 int i; 1555 1556 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); 1557 mtx_lock(&sf_freelist.sf_lock); 1558 SLIST_INIT(&sf_freelist.sf_head); 1559 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE); 1560 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, 1561 M_NOWAIT | M_ZERO); 1562 for (i = 0; i < nsfbufs; i++) { 1563 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 1564 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); 1565 } 1566 sf_buf_alloc_want = 0; 1567 mtx_unlock(&sf_freelist.sf_lock); 1568 } 1569 1570 /* 1571 * Get an sf_buf from the freelist. Will block if none are available. 1572 */ 1573 struct sf_buf * 1574 sf_buf_alloc() 1575 { 1576 struct sf_buf *sf; 1577 int error; 1578 1579 mtx_lock(&sf_freelist.sf_lock); 1580 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { 1581 sf_buf_alloc_want++; 1582 error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, 1583 "sfbufa", 0); 1584 sf_buf_alloc_want--; 1585 1586 /* 1587 * If we got a signal, don't risk going back to sleep. 1588 */ 1589 if (error) 1590 break; 1591 } 1592 if (sf != NULL) 1593 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); 1594 mtx_unlock(&sf_freelist.sf_lock); 1595 return (sf); 1596 } 1597 1598 #define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT]) 1599 1600 /* 1601 * Detatch mapped page and release resources back to the system. 1602 */ 1603 void 1604 sf_buf_free(caddr_t addr, void *args) 1605 { 1606 struct sf_buf *sf; 1607 struct vm_page *m; 1608 1609 GIANT_REQUIRED; 1610 1611 sf = dtosf(addr); 1612 pmap_qremove((vm_offset_t)addr, 1); 1613 m = sf->m; 1614 vm_page_unwire(m, 0); 1615 /* 1616 * Check for the object going away on us. This can 1617 * happen since we don't hold a reference to it. 1618 * If so, we're responsible for freeing the page. 1619 */ 1620 if (m->wire_count == 0 && m->object == NULL) 1621 vm_page_free(m); 1622 sf->m = NULL; 1623 mtx_lock(&sf_freelist.sf_lock); 1624 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); 1625 if (sf_buf_alloc_want > 0) 1626 wakeup_one(&sf_freelist); 1627 mtx_unlock(&sf_freelist.sf_lock); 1628 } 1629 1630 /* 1631 * sendfile(2) 1632 * 1633 * MPSAFE 1634 * 1635 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1636 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1637 * 1638 * Send a file specified by 'fd' and starting at 'offset' to a socket 1639 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1640 * nbytes == 0. Optionally add a header and/or trailer to the socket 1641 * output. If specified, write the total number of bytes sent into *sbytes. 1642 * 1643 */ 1644 int 1645 sendfile(struct thread *td, struct sendfile_args *uap) 1646 { 1647 struct vnode *vp; 1648 struct vm_object *obj; 1649 struct socket *so = NULL; 1650 struct mbuf *m; 1651 struct sf_buf *sf; 1652 struct vm_page *pg; 1653 struct writev_args nuap; 1654 struct sf_hdtr hdtr; 1655 off_t off, xfsize, hdtr_size, sbytes = 0; 1656 int error, s; 1657 1658 mtx_lock(&Giant); 1659 1660 hdtr_size = 0; 1661 1662 /* 1663 * The descriptor must be a regular file and have a backing VM object. 1664 */ 1665 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1666 goto done; 1667 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1668 error = EINVAL; 1669 goto done; 1670 } 1671 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1672 goto done; 1673 if (so->so_type != SOCK_STREAM) { 1674 error = EINVAL; 1675 goto done; 1676 } 1677 if ((so->so_state & SS_ISCONNECTED) == 0) { 1678 error = ENOTCONN; 1679 goto done; 1680 } 1681 if (uap->offset < 0) { 1682 error = EINVAL; 1683 goto done; 1684 } 1685 1686 /* 1687 * If specified, get the pointer to the sf_hdtr struct for 1688 * any headers/trailers. 1689 */ 1690 if (uap->hdtr != NULL) { 1691 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1692 if (error) 1693 goto done; 1694 /* 1695 * Send any headers. Wimp out and use writev(2). 1696 */ 1697 if (hdtr.headers != NULL) { 1698 nuap.fd = uap->s; 1699 nuap.iovp = hdtr.headers; 1700 nuap.iovcnt = hdtr.hdr_cnt; 1701 error = writev(td, &nuap); 1702 if (error) 1703 goto done; 1704 hdtr_size += td->td_retval[0]; 1705 } 1706 } 1707 1708 /* 1709 * Protect against multiple writers to the socket. 1710 */ 1711 (void) sblock(&so->so_snd, M_WAITOK); 1712 1713 /* 1714 * Loop through the pages in the file, starting with the requested 1715 * offset. Get a file page (do I/O if necessary), map the file page 1716 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1717 * it on the socket. 1718 */ 1719 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1720 vm_pindex_t pindex; 1721 vm_offset_t pgoff; 1722 1723 pindex = OFF_TO_IDX(off); 1724 retry_lookup: 1725 /* 1726 * Calculate the amount to transfer. Not to exceed a page, 1727 * the EOF, or the passed in nbytes. 1728 */ 1729 xfsize = obj->un_pager.vnp.vnp_size - off; 1730 if (xfsize > PAGE_SIZE) 1731 xfsize = PAGE_SIZE; 1732 pgoff = (vm_offset_t)(off & PAGE_MASK); 1733 if (PAGE_SIZE - pgoff < xfsize) 1734 xfsize = PAGE_SIZE - pgoff; 1735 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1736 xfsize = uap->nbytes - sbytes; 1737 if (xfsize <= 0) 1738 break; 1739 /* 1740 * Optimize the non-blocking case by looking at the socket space 1741 * before going to the extra work of constituting the sf_buf. 1742 */ 1743 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1744 if (so->so_state & SS_CANTSENDMORE) 1745 error = EPIPE; 1746 else 1747 error = EAGAIN; 1748 sbunlock(&so->so_snd); 1749 goto done; 1750 } 1751 /* 1752 * Attempt to look up the page. 1753 * 1754 * Allocate if not found 1755 * 1756 * Wait and loop if busy. 1757 */ 1758 pg = vm_page_lookup(obj, pindex); 1759 1760 if (pg == NULL) { 1761 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL); 1762 if (pg == NULL) { 1763 VM_WAIT; 1764 goto retry_lookup; 1765 } 1766 vm_page_wakeup(pg); 1767 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) { 1768 goto retry_lookup; 1769 } 1770 1771 /* 1772 * Wire the page so it does not get ripped out from under 1773 * us. 1774 */ 1775 1776 vm_page_wire(pg); 1777 1778 /* 1779 * If page is not valid for what we need, initiate I/O 1780 */ 1781 1782 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1783 int bsize; 1784 1785 /* 1786 * Ensure that our page is still around when the I/O 1787 * completes. 1788 */ 1789 vm_page_io_start(pg); 1790 1791 /* 1792 * Get the page from backing store. 1793 */ 1794 bsize = vp->v_mount->mnt_stat.f_iosize; 1795 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1796 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1797 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1798 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1799 td->td_ucred, NULL, td); 1800 VOP_UNLOCK(vp, 0, td); 1801 vm_page_flag_clear(pg, PG_ZERO); 1802 vm_page_io_finish(pg); 1803 if (error) { 1804 vm_page_unwire(pg, 0); 1805 /* 1806 * See if anyone else might know about this page. 1807 * If not and it is not valid, then free it. 1808 */ 1809 if (pg->wire_count == 0 && pg->valid == 0 && 1810 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1811 pg->hold_count == 0) { 1812 vm_page_busy(pg); 1813 vm_page_free(pg); 1814 } 1815 sbunlock(&so->so_snd); 1816 goto done; 1817 } 1818 } 1819 1820 1821 /* 1822 * Get a sendfile buf. We usually wait as long as necessary, 1823 * but this wait can be interrupted. 1824 */ 1825 if ((sf = sf_buf_alloc()) == NULL) { 1826 vm_page_unwire(pg, 0); 1827 if (pg->wire_count == 0 && pg->object == NULL) 1828 vm_page_free(pg); 1829 sbunlock(&so->so_snd); 1830 error = EINTR; 1831 goto done; 1832 } 1833 1834 /* 1835 * Allocate a kernel virtual page and insert the physical page 1836 * into it. 1837 */ 1838 sf->m = pg; 1839 pmap_qenter(sf->kva, &pg, 1); 1840 /* 1841 * Get an mbuf header and set it up as having external storage. 1842 */ 1843 MGETHDR(m, M_TRYWAIT, MT_DATA); 1844 if (m == NULL) { 1845 error = ENOBUFS; 1846 sf_buf_free((void *)sf->kva, NULL); 1847 sbunlock(&so->so_snd); 1848 goto done; 1849 } 1850 /* 1851 * Setup external storage for mbuf. 1852 */ 1853 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY, 1854 EXT_SFBUF); 1855 m->m_data = (char *) sf->kva + pgoff; 1856 m->m_pkthdr.len = m->m_len = xfsize; 1857 /* 1858 * Add the buffer to the socket buffer chain. 1859 */ 1860 s = splnet(); 1861 retry_space: 1862 /* 1863 * Make sure that the socket is still able to take more data. 1864 * CANTSENDMORE being true usually means that the connection 1865 * was closed. so_error is true when an error was sensed after 1866 * a previous send. 1867 * The state is checked after the page mapping and buffer 1868 * allocation above since those operations may block and make 1869 * any socket checks stale. From this point forward, nothing 1870 * blocks before the pru_send (or more accurately, any blocking 1871 * results in a loop back to here to re-check). 1872 */ 1873 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1874 if (so->so_state & SS_CANTSENDMORE) { 1875 error = EPIPE; 1876 } else { 1877 error = so->so_error; 1878 so->so_error = 0; 1879 } 1880 m_freem(m); 1881 sbunlock(&so->so_snd); 1882 splx(s); 1883 goto done; 1884 } 1885 /* 1886 * Wait for socket space to become available. We do this just 1887 * after checking the connection state above in order to avoid 1888 * a race condition with sbwait(). 1889 */ 1890 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1891 if (so->so_state & SS_NBIO) { 1892 m_freem(m); 1893 sbunlock(&so->so_snd); 1894 splx(s); 1895 error = EAGAIN; 1896 goto done; 1897 } 1898 error = sbwait(&so->so_snd); 1899 /* 1900 * An error from sbwait usually indicates that we've 1901 * been interrupted by a signal. If we've sent anything 1902 * then return bytes sent, otherwise return the error. 1903 */ 1904 if (error) { 1905 m_freem(m); 1906 sbunlock(&so->so_snd); 1907 splx(s); 1908 goto done; 1909 } 1910 goto retry_space; 1911 } 1912 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 1913 splx(s); 1914 if (error) { 1915 sbunlock(&so->so_snd); 1916 goto done; 1917 } 1918 } 1919 sbunlock(&so->so_snd); 1920 1921 /* 1922 * Send trailers. Wimp out and use writev(2). 1923 */ 1924 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1925 nuap.fd = uap->s; 1926 nuap.iovp = hdtr.trailers; 1927 nuap.iovcnt = hdtr.trl_cnt; 1928 error = writev(td, &nuap); 1929 if (error) 1930 goto done; 1931 hdtr_size += td->td_retval[0]; 1932 } 1933 1934 done: 1935 /* 1936 * If there was no error we have to clear td->td_retval[0] 1937 * because it may have been set by writev. 1938 */ 1939 if (error == 0) { 1940 td->td_retval[0] = 0; 1941 } 1942 if (uap->sbytes != NULL) { 1943 sbytes += hdtr_size; 1944 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 1945 } 1946 if (vp) 1947 vrele(vp); 1948 if (so) 1949 fputsock(so); 1950 mtx_unlock(&Giant); 1951 return (error); 1952 } 1953