1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 * $FreeBSD$ 38 */ 39 40 #include "opt_compat.h" 41 #include "opt_ktrace.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/lock.h> 47 #include <sys/mutex.h> 48 #include <sys/sysproto.h> 49 #include <sys/malloc.h> 50 #include <sys/filedesc.h> 51 #include <sys/event.h> 52 #include <sys/proc.h> 53 #include <sys/fcntl.h> 54 #include <sys/file.h> 55 #include <sys/lock.h> 56 #include <sys/mount.h> 57 #include <sys/mbuf.h> 58 #include <sys/protosw.h> 59 #include <sys/socket.h> 60 #include <sys/socketvar.h> 61 #include <sys/signalvar.h> 62 #include <sys/uio.h> 63 #include <sys/vnode.h> 64 #ifdef KTRACE 65 #include <sys/ktrace.h> 66 #endif 67 68 #include <vm/vm.h> 69 #include <vm/vm_object.h> 70 #include <vm/vm_page.h> 71 #include <vm/vm_pageout.h> 72 #include <vm/vm_kern.h> 73 #include <vm/vm_extern.h> 74 75 static void sf_buf_init(void *arg); 76 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) 77 static struct sf_buf *sf_buf_alloc(void); 78 static void sf_buf_free(caddr_t addr, void *args); 79 80 static int sendit __P((struct thread *td, int s, struct msghdr *mp, int flags)); 81 static int recvit __P((struct thread *td, int s, struct msghdr *mp, 82 caddr_t namelenp)); 83 84 static int accept1 __P((struct thread *td, struct accept_args *uap, int compat)); 85 static int getsockname1 __P((struct thread *td, struct getsockname_args *uap, 86 int compat)); 87 static int getpeername1 __P((struct thread *td, struct getpeername_args *uap, 88 int compat)); 89 90 /* 91 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the 92 * sf_freelist head with the sf_lock mutex. 93 */ 94 static struct { 95 SLIST_HEAD(, sf_buf) sf_head; 96 struct mtx sf_lock; 97 } sf_freelist; 98 99 static vm_offset_t sf_base; 100 static struct sf_buf *sf_bufs; 101 static u_int sf_buf_alloc_want; 102 103 /* 104 * System call interface to the socket abstraction. 105 */ 106 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 107 #define COMPAT_OLDSOCK 108 #endif 109 110 extern struct fileops socketops; 111 112 /* 113 * MPSAFE 114 */ 115 int 116 socket(td, uap) 117 struct thread *td; 118 register struct socket_args /* { 119 int domain; 120 int type; 121 int protocol; 122 } */ *uap; 123 { 124 struct filedesc *fdp; 125 struct socket *so; 126 struct file *fp; 127 int fd, error; 128 129 mtx_lock(&Giant); 130 fdp = td->td_proc->p_fd; 131 error = falloc(td, &fp, &fd); 132 if (error) 133 goto done2; 134 fhold(fp); 135 error = socreate(uap->domain, &so, uap->type, uap->protocol, td); 136 if (error) { 137 if (fdp->fd_ofiles[fd] == fp) { 138 fdp->fd_ofiles[fd] = NULL; 139 fdrop(fp, td); 140 } 141 } else { 142 fp->f_data = (caddr_t)so; 143 fp->f_flag = FREAD|FWRITE; 144 fp->f_ops = &socketops; 145 fp->f_type = DTYPE_SOCKET; 146 td->td_retval[0] = fd; 147 } 148 fdrop(fp, td); 149 done2: 150 mtx_unlock(&Giant); 151 return (error); 152 } 153 154 /* 155 * MPSAFE 156 */ 157 /* ARGSUSED */ 158 int 159 bind(td, uap) 160 struct thread *td; 161 register struct bind_args /* { 162 int s; 163 caddr_t name; 164 int namelen; 165 } */ *uap; 166 { 167 struct file *fp; 168 struct sockaddr *sa; 169 int error; 170 171 mtx_lock(&Giant); 172 error = holdsock(td->td_proc->p_fd, uap->s, &fp); 173 if (error) 174 goto done2; 175 error = getsockaddr(&sa, uap->name, uap->namelen); 176 if (error) { 177 fdrop(fp, td); 178 goto done2; 179 } 180 error = sobind((struct socket *)fp->f_data, sa, td); 181 FREE(sa, M_SONAME); 182 fdrop(fp, td); 183 done2: 184 mtx_unlock(&Giant); 185 return (error); 186 } 187 188 /* 189 * MPSAFE 190 */ 191 /* ARGSUSED */ 192 int 193 listen(td, uap) 194 struct thread *td; 195 register struct listen_args /* { 196 int s; 197 int backlog; 198 } */ *uap; 199 { 200 struct file *fp; 201 int error; 202 203 mtx_lock(&Giant); 204 error = holdsock(td->td_proc->p_fd, uap->s, &fp); 205 if (error == 0) { 206 error = solisten((struct socket *)fp->f_data, uap->backlog, td); 207 fdrop(fp, td); 208 } 209 mtx_unlock(&Giant); 210 return(error); 211 } 212 213 /* 214 * accept1() 215 * MPSAFE 216 */ 217 static int 218 accept1(td, uap, compat) 219 struct thread *td; 220 register struct accept_args /* { 221 int s; 222 caddr_t name; 223 int *anamelen; 224 } */ *uap; 225 int compat; 226 { 227 struct filedesc *fdp; 228 struct file *lfp = NULL; 229 struct file *nfp = NULL; 230 struct sockaddr *sa; 231 int namelen, error, s; 232 struct socket *head, *so; 233 int fd; 234 short fflag; /* type must match fp->f_flag */ 235 236 mtx_lock(&Giant); 237 fdp = td->td_proc->p_fd; 238 if (uap->name) { 239 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen, 240 sizeof (namelen)); 241 if(error) 242 goto done2; 243 } 244 error = holdsock(fdp, uap->s, &lfp); 245 if (error) 246 goto done2; 247 s = splnet(); 248 head = (struct socket *)lfp->f_data; 249 if ((head->so_options & SO_ACCEPTCONN) == 0) { 250 splx(s); 251 error = EINVAL; 252 goto done; 253 } 254 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 255 splx(s); 256 error = EWOULDBLOCK; 257 goto done; 258 } 259 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 260 if (head->so_state & SS_CANTRCVMORE) { 261 head->so_error = ECONNABORTED; 262 break; 263 } 264 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH, 265 "accept", 0); 266 if (error) { 267 splx(s); 268 goto done; 269 } 270 } 271 if (head->so_error) { 272 error = head->so_error; 273 head->so_error = 0; 274 splx(s); 275 goto done; 276 } 277 278 /* 279 * At this point we know that there is at least one connection 280 * ready to be accepted. Remove it from the queue prior to 281 * allocating the file descriptor for it since falloc() may 282 * block allowing another process to accept the connection 283 * instead. 284 */ 285 so = TAILQ_FIRST(&head->so_comp); 286 TAILQ_REMOVE(&head->so_comp, so, so_list); 287 head->so_qlen--; 288 289 fflag = lfp->f_flag; 290 error = falloc(td, &nfp, &fd); 291 if (error) { 292 /* 293 * Probably ran out of file descriptors. Put the 294 * unaccepted connection back onto the queue and 295 * do another wakeup so some other process might 296 * have a chance at it. 297 */ 298 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 299 head->so_qlen++; 300 wakeup_one(&head->so_timeo); 301 splx(s); 302 goto done; 303 } 304 fhold(nfp); 305 td->td_retval[0] = fd; 306 307 /* connection has been removed from the listen queue */ 308 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 309 310 so->so_state &= ~SS_COMP; 311 so->so_head = NULL; 312 if (head->so_sigio != NULL) 313 fsetown(fgetown(head->so_sigio), &so->so_sigio); 314 315 nfp->f_data = (caddr_t)so; 316 nfp->f_flag = fflag; 317 nfp->f_ops = &socketops; 318 nfp->f_type = DTYPE_SOCKET; 319 sa = 0; 320 error = soaccept(so, &sa); 321 if (error) { 322 /* 323 * return a namelen of zero for older code which might 324 * ignore the return value from accept. 325 */ 326 if (uap->name != NULL) { 327 namelen = 0; 328 (void) copyout((caddr_t)&namelen, 329 (caddr_t)uap->anamelen, sizeof(*uap->anamelen)); 330 } 331 goto noconnection; 332 } 333 if (sa == NULL) { 334 namelen = 0; 335 if (uap->name) 336 goto gotnoname; 337 splx(s); 338 error = 0; 339 goto done; 340 } 341 if (uap->name) { 342 /* check sa_len before it is destroyed */ 343 if (namelen > sa->sa_len) 344 namelen = sa->sa_len; 345 #ifdef COMPAT_OLDSOCK 346 if (compat) 347 ((struct osockaddr *)sa)->sa_family = 348 sa->sa_family; 349 #endif 350 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen); 351 if (!error) 352 gotnoname: 353 error = copyout((caddr_t)&namelen, 354 (caddr_t)uap->anamelen, sizeof (*uap->anamelen)); 355 } 356 noconnection: 357 if (sa) 358 FREE(sa, M_SONAME); 359 360 /* 361 * close the new descriptor, assuming someone hasn't ripped it 362 * out from under us. 363 */ 364 if (error) { 365 if (fdp->fd_ofiles[fd] == nfp) { 366 fdp->fd_ofiles[fd] = NULL; 367 fdrop(nfp, td); 368 } 369 } 370 splx(s); 371 372 /* 373 * Release explicitly held references before returning. 374 */ 375 done: 376 if (nfp != NULL) 377 fdrop(nfp, td); 378 fdrop(lfp, td); 379 done2: 380 mtx_unlock(&Giant); 381 return (error); 382 } 383 384 /* 385 * MPSAFE (accept1() is MPSAFE) 386 */ 387 int 388 accept(td, uap) 389 struct thread *td; 390 struct accept_args *uap; 391 { 392 393 return (accept1(td, uap, 0)); 394 } 395 396 #ifdef COMPAT_OLDSOCK 397 /* 398 * MPSAFE (accept1() is MPSAFE) 399 */ 400 int 401 oaccept(td, uap) 402 struct thread *td; 403 struct accept_args *uap; 404 { 405 406 return (accept1(td, uap, 1)); 407 } 408 #endif /* COMPAT_OLDSOCK */ 409 410 /* 411 * MPSAFE 412 */ 413 /* ARGSUSED */ 414 int 415 connect(td, uap) 416 struct thread *td; 417 register struct connect_args /* { 418 int s; 419 caddr_t name; 420 int namelen; 421 } */ *uap; 422 { 423 struct file *fp; 424 register struct socket *so; 425 struct sockaddr *sa; 426 int error, s; 427 428 mtx_lock(&Giant); 429 error = holdsock(td->td_proc->p_fd, uap->s, &fp); 430 if (error) 431 goto done2; 432 so = (struct socket *)fp->f_data; 433 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 434 error = EALREADY; 435 goto done; 436 } 437 error = getsockaddr(&sa, uap->name, uap->namelen); 438 if (error) 439 goto done; 440 error = soconnect(so, sa, td); 441 if (error) 442 goto bad; 443 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 444 FREE(sa, M_SONAME); 445 error = EINPROGRESS; 446 goto done; 447 } 448 s = splnet(); 449 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 450 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, 451 "connec", 0); 452 if (error) 453 break; 454 } 455 if (error == 0) { 456 error = so->so_error; 457 so->so_error = 0; 458 } 459 splx(s); 460 bad: 461 so->so_state &= ~SS_ISCONNECTING; 462 FREE(sa, M_SONAME); 463 if (error == ERESTART) 464 error = EINTR; 465 done: 466 fdrop(fp, td); 467 done2: 468 mtx_unlock(&Giant); 469 return (error); 470 } 471 472 /* 473 * MPSAFE 474 */ 475 int 476 socketpair(td, uap) 477 struct thread *td; 478 register struct socketpair_args /* { 479 int domain; 480 int type; 481 int protocol; 482 int *rsv; 483 } */ *uap; 484 { 485 register struct filedesc *fdp = td->td_proc->p_fd; 486 struct file *fp1, *fp2; 487 struct socket *so1, *so2; 488 int fd, error, sv[2]; 489 490 mtx_lock(&Giant); 491 error = socreate(uap->domain, &so1, uap->type, uap->protocol, td); 492 if (error) 493 goto done2; 494 error = socreate(uap->domain, &so2, uap->type, uap->protocol, td); 495 if (error) 496 goto free1; 497 error = falloc(td, &fp1, &fd); 498 if (error) 499 goto free2; 500 fhold(fp1); 501 sv[0] = fd; 502 fp1->f_data = (caddr_t)so1; 503 error = falloc(td, &fp2, &fd); 504 if (error) 505 goto free3; 506 fhold(fp2); 507 fp2->f_data = (caddr_t)so2; 508 sv[1] = fd; 509 error = soconnect2(so1, so2); 510 if (error) 511 goto free4; 512 if (uap->type == SOCK_DGRAM) { 513 /* 514 * Datagram socket connection is asymmetric. 515 */ 516 error = soconnect2(so2, so1); 517 if (error) 518 goto free4; 519 } 520 fp1->f_flag = fp2->f_flag = FREAD|FWRITE; 521 fp1->f_ops = fp2->f_ops = &socketops; 522 fp1->f_type = fp2->f_type = DTYPE_SOCKET; 523 error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int)); 524 fdrop(fp1, td); 525 fdrop(fp2, td); 526 goto done2; 527 free4: 528 if (fdp->fd_ofiles[sv[1]] == fp2) { 529 fdp->fd_ofiles[sv[1]] = NULL; 530 fdrop(fp2, td); 531 } 532 fdrop(fp2, td); 533 free3: 534 if (fdp->fd_ofiles[sv[0]] == fp1) { 535 fdp->fd_ofiles[sv[0]] = NULL; 536 fdrop(fp1, td); 537 } 538 fdrop(fp1, td); 539 free2: 540 (void)soclose(so2); 541 free1: 542 (void)soclose(so1); 543 done2: 544 mtx_unlock(&Giant); 545 return (error); 546 } 547 548 static int 549 sendit(td, s, mp, flags) 550 register struct thread *td; 551 int s; 552 register struct msghdr *mp; 553 int flags; 554 { 555 struct file *fp; 556 struct uio auio; 557 register struct iovec *iov; 558 register int i; 559 struct mbuf *control; 560 struct sockaddr *to; 561 int len, error; 562 struct socket *so; 563 #ifdef KTRACE 564 struct iovec *ktriov = NULL; 565 struct uio ktruio; 566 #endif 567 568 error = holdsock(td->td_proc->p_fd, s, &fp); 569 if (error) 570 return (error); 571 auio.uio_iov = mp->msg_iov; 572 auio.uio_iovcnt = mp->msg_iovlen; 573 auio.uio_segflg = UIO_USERSPACE; 574 auio.uio_rw = UIO_WRITE; 575 auio.uio_td = td; 576 auio.uio_offset = 0; /* XXX */ 577 auio.uio_resid = 0; 578 iov = mp->msg_iov; 579 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 580 if ((auio.uio_resid += iov->iov_len) < 0) { 581 fdrop(fp, td); 582 return (EINVAL); 583 } 584 } 585 if (mp->msg_name) { 586 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 587 if (error) { 588 fdrop(fp, td); 589 return (error); 590 } 591 } else { 592 to = 0; 593 } 594 if (mp->msg_control) { 595 if (mp->msg_controllen < sizeof(struct cmsghdr) 596 #ifdef COMPAT_OLDSOCK 597 && mp->msg_flags != MSG_COMPAT 598 #endif 599 ) { 600 error = EINVAL; 601 goto bad; 602 } 603 error = sockargs(&control, mp->msg_control, 604 mp->msg_controllen, MT_CONTROL); 605 if (error) 606 goto bad; 607 #ifdef COMPAT_OLDSOCK 608 if (mp->msg_flags == MSG_COMPAT) { 609 register struct cmsghdr *cm; 610 611 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 612 if (control == 0) { 613 error = ENOBUFS; 614 goto bad; 615 } else { 616 cm = mtod(control, struct cmsghdr *); 617 cm->cmsg_len = control->m_len; 618 cm->cmsg_level = SOL_SOCKET; 619 cm->cmsg_type = SCM_RIGHTS; 620 } 621 } 622 #endif 623 } else { 624 control = 0; 625 } 626 #ifdef KTRACE 627 if (KTRPOINT(td->td_proc, KTR_GENIO)) { 628 int iovlen = auio.uio_iovcnt * sizeof (struct iovec); 629 630 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 631 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 632 ktruio = auio; 633 } 634 #endif 635 len = auio.uio_resid; 636 so = (struct socket *)fp->f_data; 637 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control, 638 flags, td); 639 if (error) { 640 if (auio.uio_resid != len && (error == ERESTART || 641 error == EINTR || error == EWOULDBLOCK)) 642 error = 0; 643 if (error == EPIPE) { 644 PROC_LOCK(td->td_proc); 645 psignal(td->td_proc, SIGPIPE); 646 PROC_UNLOCK(td->td_proc); 647 } 648 } 649 if (error == 0) 650 td->td_retval[0] = len - auio.uio_resid; 651 #ifdef KTRACE 652 if (ktriov != NULL) { 653 if (error == 0) { 654 ktruio.uio_iov = ktriov; 655 ktruio.uio_resid = td->td_retval[0]; 656 ktrgenio(td->td_proc->p_tracep, s, UIO_WRITE, &ktruio, error); 657 } 658 FREE(ktriov, M_TEMP); 659 } 660 #endif 661 bad: 662 fdrop(fp, td); 663 if (to) 664 FREE(to, M_SONAME); 665 return (error); 666 } 667 668 /* 669 * MPSAFE 670 */ 671 int 672 sendto(td, uap) 673 struct thread *td; 674 register struct sendto_args /* { 675 int s; 676 caddr_t buf; 677 size_t len; 678 int flags; 679 caddr_t to; 680 int tolen; 681 } */ *uap; 682 { 683 struct msghdr msg; 684 struct iovec aiov; 685 int error; 686 687 msg.msg_name = uap->to; 688 msg.msg_namelen = uap->tolen; 689 msg.msg_iov = &aiov; 690 msg.msg_iovlen = 1; 691 msg.msg_control = 0; 692 #ifdef COMPAT_OLDSOCK 693 msg.msg_flags = 0; 694 #endif 695 aiov.iov_base = uap->buf; 696 aiov.iov_len = uap->len; 697 mtx_lock(&Giant); 698 error = sendit(td, uap->s, &msg, uap->flags); 699 mtx_unlock(&Giant); 700 return (error); 701 } 702 703 #ifdef COMPAT_OLDSOCK 704 /* 705 * MPSAFE 706 */ 707 int 708 osend(td, uap) 709 struct thread *td; 710 register struct osend_args /* { 711 int s; 712 caddr_t buf; 713 int len; 714 int flags; 715 } */ *uap; 716 { 717 struct msghdr msg; 718 struct iovec aiov; 719 int error; 720 721 msg.msg_name = 0; 722 msg.msg_namelen = 0; 723 msg.msg_iov = &aiov; 724 msg.msg_iovlen = 1; 725 aiov.iov_base = uap->buf; 726 aiov.iov_len = uap->len; 727 msg.msg_control = 0; 728 msg.msg_flags = 0; 729 mtx_lock(&Giant); 730 error = sendit(td, uap->s, &msg, uap->flags); 731 mtx_unlock(&Giant); 732 return (error); 733 } 734 735 /* 736 * MPSAFE 737 */ 738 int 739 osendmsg(td, uap) 740 struct thread *td; 741 register struct osendmsg_args /* { 742 int s; 743 caddr_t msg; 744 int flags; 745 } */ *uap; 746 { 747 struct msghdr msg; 748 struct iovec aiov[UIO_SMALLIOV], *iov; 749 int error; 750 751 mtx_lock(&Giant); 752 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr)); 753 if (error) 754 goto done2; 755 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 756 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 757 error = EMSGSIZE; 758 goto done2; 759 } 760 MALLOC(iov, struct iovec *, 761 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 762 M_WAITOK); 763 } else { 764 iov = aiov; 765 } 766 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 767 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 768 if (error) 769 goto done; 770 msg.msg_flags = MSG_COMPAT; 771 msg.msg_iov = iov; 772 error = sendit(td, uap->s, &msg, uap->flags); 773 done: 774 if (iov != aiov) 775 FREE(iov, M_IOV); 776 done2: 777 mtx_unlock(&Giant); 778 return (error); 779 } 780 #endif 781 782 /* 783 * MPSAFE 784 */ 785 int 786 sendmsg(td, uap) 787 struct thread *td; 788 register struct sendmsg_args /* { 789 int s; 790 caddr_t msg; 791 int flags; 792 } */ *uap; 793 { 794 struct msghdr msg; 795 struct iovec aiov[UIO_SMALLIOV], *iov; 796 int error; 797 798 mtx_lock(&Giant); 799 error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg)); 800 if (error) 801 goto done2; 802 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 803 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 804 error = EMSGSIZE; 805 goto done2; 806 } 807 MALLOC(iov, struct iovec *, 808 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 809 M_WAITOK); 810 } else { 811 iov = aiov; 812 } 813 if (msg.msg_iovlen && 814 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 815 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 816 goto done; 817 msg.msg_iov = iov; 818 #ifdef COMPAT_OLDSOCK 819 msg.msg_flags = 0; 820 #endif 821 error = sendit(td, uap->s, &msg, uap->flags); 822 done: 823 if (iov != aiov) 824 FREE(iov, M_IOV); 825 done2: 826 mtx_unlock(&Giant); 827 return (error); 828 } 829 830 static int 831 recvit(td, s, mp, namelenp) 832 register struct thread *td; 833 int s; 834 register struct msghdr *mp; 835 caddr_t namelenp; 836 { 837 struct file *fp; 838 struct uio auio; 839 register struct iovec *iov; 840 register int i; 841 int len, error; 842 struct mbuf *m, *control = 0; 843 caddr_t ctlbuf; 844 struct socket *so; 845 struct sockaddr *fromsa = 0; 846 #ifdef KTRACE 847 struct iovec *ktriov = NULL; 848 struct uio ktruio; 849 #endif 850 851 error = holdsock(td->td_proc->p_fd, s, &fp); 852 if (error) 853 return (error); 854 auio.uio_iov = mp->msg_iov; 855 auio.uio_iovcnt = mp->msg_iovlen; 856 auio.uio_segflg = UIO_USERSPACE; 857 auio.uio_rw = UIO_READ; 858 auio.uio_td = td; 859 auio.uio_offset = 0; /* XXX */ 860 auio.uio_resid = 0; 861 iov = mp->msg_iov; 862 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 863 if ((auio.uio_resid += iov->iov_len) < 0) { 864 fdrop(fp, td); 865 return (EINVAL); 866 } 867 } 868 #ifdef KTRACE 869 if (KTRPOINT(td->td_proc, KTR_GENIO)) { 870 int iovlen = auio.uio_iovcnt * sizeof (struct iovec); 871 872 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 873 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 874 ktruio = auio; 875 } 876 #endif 877 len = auio.uio_resid; 878 so = (struct socket *)fp->f_data; 879 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 880 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 881 &mp->msg_flags); 882 if (error) { 883 if (auio.uio_resid != len && (error == ERESTART || 884 error == EINTR || error == EWOULDBLOCK)) 885 error = 0; 886 } 887 #ifdef KTRACE 888 if (ktriov != NULL) { 889 if (error == 0) { 890 ktruio.uio_iov = ktriov; 891 ktruio.uio_resid = len - auio.uio_resid; 892 ktrgenio(td->td_proc->p_tracep, s, UIO_READ, &ktruio, error); 893 } 894 FREE(ktriov, M_TEMP); 895 } 896 #endif 897 if (error) 898 goto out; 899 td->td_retval[0] = len - auio.uio_resid; 900 if (mp->msg_name) { 901 len = mp->msg_namelen; 902 if (len <= 0 || fromsa == 0) 903 len = 0; 904 else { 905 #ifndef MIN 906 #define MIN(a,b) ((a)>(b)?(b):(a)) 907 #endif 908 /* save sa_len before it is destroyed by MSG_COMPAT */ 909 len = MIN(len, fromsa->sa_len); 910 #ifdef COMPAT_OLDSOCK 911 if (mp->msg_flags & MSG_COMPAT) 912 ((struct osockaddr *)fromsa)->sa_family = 913 fromsa->sa_family; 914 #endif 915 error = copyout(fromsa, 916 (caddr_t)mp->msg_name, (unsigned)len); 917 if (error) 918 goto out; 919 } 920 mp->msg_namelen = len; 921 if (namelenp && 922 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) { 923 #ifdef COMPAT_OLDSOCK 924 if (mp->msg_flags & MSG_COMPAT) 925 error = 0; /* old recvfrom didn't check */ 926 else 927 #endif 928 goto out; 929 } 930 } 931 if (mp->msg_control) { 932 #ifdef COMPAT_OLDSOCK 933 /* 934 * We assume that old recvmsg calls won't receive access 935 * rights and other control info, esp. as control info 936 * is always optional and those options didn't exist in 4.3. 937 * If we receive rights, trim the cmsghdr; anything else 938 * is tossed. 939 */ 940 if (control && mp->msg_flags & MSG_COMPAT) { 941 if (mtod(control, struct cmsghdr *)->cmsg_level != 942 SOL_SOCKET || 943 mtod(control, struct cmsghdr *)->cmsg_type != 944 SCM_RIGHTS) { 945 mp->msg_controllen = 0; 946 goto out; 947 } 948 control->m_len -= sizeof (struct cmsghdr); 949 control->m_data += sizeof (struct cmsghdr); 950 } 951 #endif 952 len = mp->msg_controllen; 953 m = control; 954 mp->msg_controllen = 0; 955 ctlbuf = (caddr_t) mp->msg_control; 956 957 while (m && len > 0) { 958 unsigned int tocopy; 959 960 if (len >= m->m_len) 961 tocopy = m->m_len; 962 else { 963 mp->msg_flags |= MSG_CTRUNC; 964 tocopy = len; 965 } 966 967 if ((error = copyout((caddr_t)mtod(m, caddr_t), 968 ctlbuf, tocopy)) != 0) 969 goto out; 970 971 ctlbuf += tocopy; 972 len -= tocopy; 973 m = m->m_next; 974 } 975 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 976 } 977 out: 978 fdrop(fp, td); 979 if (fromsa) 980 FREE(fromsa, M_SONAME); 981 if (control) 982 m_freem(control); 983 return (error); 984 } 985 986 /* 987 * MPSAFE 988 */ 989 int 990 recvfrom(td, uap) 991 struct thread *td; 992 register struct recvfrom_args /* { 993 int s; 994 caddr_t buf; 995 size_t len; 996 int flags; 997 caddr_t from; 998 int *fromlenaddr; 999 } */ *uap; 1000 { 1001 struct msghdr msg; 1002 struct iovec aiov; 1003 int error; 1004 1005 mtx_lock(&Giant); 1006 if (uap->fromlenaddr) { 1007 error = copyin((caddr_t)uap->fromlenaddr, 1008 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen)); 1009 if (error) 1010 goto done2; 1011 } else { 1012 msg.msg_namelen = 0; 1013 } 1014 msg.msg_name = uap->from; 1015 msg.msg_iov = &aiov; 1016 msg.msg_iovlen = 1; 1017 aiov.iov_base = uap->buf; 1018 aiov.iov_len = uap->len; 1019 msg.msg_control = 0; 1020 msg.msg_flags = uap->flags; 1021 error = recvit(td, uap->s, &msg, (caddr_t)uap->fromlenaddr); 1022 done2: 1023 mtx_unlock(&Giant); 1024 return(error); 1025 } 1026 1027 #ifdef COMPAT_OLDSOCK 1028 /* 1029 * MPSAFE 1030 */ 1031 int 1032 orecvfrom(td, uap) 1033 struct thread *td; 1034 struct recvfrom_args *uap; 1035 { 1036 1037 uap->flags |= MSG_COMPAT; 1038 return (recvfrom(td, uap)); 1039 } 1040 #endif 1041 1042 1043 #ifdef COMPAT_OLDSOCK 1044 /* 1045 * MPSAFE 1046 */ 1047 int 1048 orecv(td, uap) 1049 struct thread *td; 1050 register struct orecv_args /* { 1051 int s; 1052 caddr_t buf; 1053 int len; 1054 int flags; 1055 } */ *uap; 1056 { 1057 struct msghdr msg; 1058 struct iovec aiov; 1059 int error; 1060 1061 mtx_lock(&Giant); 1062 msg.msg_name = 0; 1063 msg.msg_namelen = 0; 1064 msg.msg_iov = &aiov; 1065 msg.msg_iovlen = 1; 1066 aiov.iov_base = uap->buf; 1067 aiov.iov_len = uap->len; 1068 msg.msg_control = 0; 1069 msg.msg_flags = uap->flags; 1070 error = recvit(td, uap->s, &msg, (caddr_t)0); 1071 mtx_unlock(&Giant); 1072 return (error); 1073 } 1074 1075 /* 1076 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1077 * overlays the new one, missing only the flags, and with the (old) access 1078 * rights where the control fields are now. 1079 * 1080 * MPSAFE 1081 */ 1082 int 1083 orecvmsg(td, uap) 1084 struct thread *td; 1085 register struct orecvmsg_args /* { 1086 int s; 1087 struct omsghdr *msg; 1088 int flags; 1089 } */ *uap; 1090 { 1091 struct msghdr msg; 1092 struct iovec aiov[UIO_SMALLIOV], *iov; 1093 int error; 1094 1095 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, 1096 sizeof (struct omsghdr)); 1097 if (error) 1098 return (error); 1099 1100 mtx_lock(&Giant); 1101 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1102 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1103 error = EMSGSIZE; 1104 goto done2; 1105 } 1106 MALLOC(iov, struct iovec *, 1107 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1108 M_WAITOK); 1109 } else { 1110 iov = aiov; 1111 } 1112 msg.msg_flags = uap->flags | MSG_COMPAT; 1113 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 1114 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1115 if (error) 1116 goto done; 1117 msg.msg_iov = iov; 1118 error = recvit(td, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen); 1119 1120 if (msg.msg_controllen && error == 0) 1121 error = copyout((caddr_t)&msg.msg_controllen, 1122 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int)); 1123 done: 1124 if (iov != aiov) 1125 FREE(iov, M_IOV); 1126 done2: 1127 mtx_unlock(&Giant); 1128 return (error); 1129 } 1130 #endif 1131 1132 /* 1133 * MPSAFE 1134 */ 1135 int 1136 recvmsg(td, uap) 1137 struct thread *td; 1138 register struct recvmsg_args /* { 1139 int s; 1140 struct msghdr *msg; 1141 int flags; 1142 } */ *uap; 1143 { 1144 struct msghdr msg; 1145 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1146 register int error; 1147 1148 mtx_lock(&Giant); 1149 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg)); 1150 if (error) 1151 goto done2; 1152 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1153 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1154 error = EMSGSIZE; 1155 goto done2; 1156 } 1157 MALLOC(iov, struct iovec *, 1158 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1159 M_WAITOK); 1160 } else { 1161 iov = aiov; 1162 } 1163 #ifdef COMPAT_OLDSOCK 1164 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1165 #else 1166 msg.msg_flags = uap->flags; 1167 #endif 1168 uiov = msg.msg_iov; 1169 msg.msg_iov = iov; 1170 error = copyin((caddr_t)uiov, (caddr_t)iov, 1171 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1172 if (error) 1173 goto done; 1174 error = recvit(td, uap->s, &msg, (caddr_t)0); 1175 if (!error) { 1176 msg.msg_iov = uiov; 1177 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg)); 1178 } 1179 done: 1180 if (iov != aiov) 1181 FREE(iov, M_IOV); 1182 done2: 1183 mtx_unlock(&Giant); 1184 return (error); 1185 } 1186 1187 /* 1188 * MPSAFE 1189 */ 1190 /* ARGSUSED */ 1191 int 1192 shutdown(td, uap) 1193 struct thread *td; 1194 register struct shutdown_args /* { 1195 int s; 1196 int how; 1197 } */ *uap; 1198 { 1199 struct file *fp; 1200 int error; 1201 1202 mtx_lock(&Giant); 1203 error = holdsock(td->td_proc->p_fd, uap->s, &fp); 1204 if (error == 0) { 1205 error = soshutdown((struct socket *)fp->f_data, uap->how); 1206 fdrop(fp, td); 1207 } 1208 mtx_unlock(&Giant); 1209 return(error); 1210 } 1211 1212 /* 1213 * MPSAFE 1214 */ 1215 /* ARGSUSED */ 1216 int 1217 setsockopt(td, uap) 1218 struct thread *td; 1219 register struct setsockopt_args /* { 1220 int s; 1221 int level; 1222 int name; 1223 caddr_t val; 1224 int valsize; 1225 } */ *uap; 1226 { 1227 struct file *fp; 1228 struct sockopt sopt; 1229 int error; 1230 1231 if (uap->val == 0 && uap->valsize != 0) 1232 return (EFAULT); 1233 if (uap->valsize < 0) 1234 return (EINVAL); 1235 1236 mtx_lock(&Giant); 1237 error = holdsock(td->td_proc->p_fd, uap->s, &fp); 1238 if (error == 0) { 1239 sopt.sopt_dir = SOPT_SET; 1240 sopt.sopt_level = uap->level; 1241 sopt.sopt_name = uap->name; 1242 sopt.sopt_val = uap->val; 1243 sopt.sopt_valsize = uap->valsize; 1244 sopt.sopt_td = td; 1245 error = sosetopt((struct socket *)fp->f_data, &sopt); 1246 fdrop(fp, td); 1247 } 1248 mtx_unlock(&Giant); 1249 return(error); 1250 } 1251 1252 /* 1253 * MPSAFE 1254 */ 1255 /* ARGSUSED */ 1256 int 1257 getsockopt(td, uap) 1258 struct thread *td; 1259 register struct getsockopt_args /* { 1260 int s; 1261 int level; 1262 int name; 1263 caddr_t val; 1264 int *avalsize; 1265 } */ *uap; 1266 { 1267 int valsize, error; 1268 struct file *fp; 1269 struct sockopt sopt; 1270 1271 mtx_lock(&Giant); 1272 error = holdsock(td->td_proc->p_fd, uap->s, &fp); 1273 if (error) 1274 goto done2; 1275 if (uap->val) { 1276 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize, 1277 sizeof (valsize)); 1278 if (error) { 1279 fdrop(fp, td); 1280 goto done2; 1281 } 1282 if (valsize < 0) { 1283 fdrop(fp, td); 1284 error = EINVAL; 1285 goto done2; 1286 } 1287 } else { 1288 valsize = 0; 1289 } 1290 1291 sopt.sopt_dir = SOPT_GET; 1292 sopt.sopt_level = uap->level; 1293 sopt.sopt_name = uap->name; 1294 sopt.sopt_val = uap->val; 1295 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1296 sopt.sopt_td = td; 1297 1298 error = sogetopt((struct socket *)fp->f_data, &sopt); 1299 if (error == 0) { 1300 valsize = sopt.sopt_valsize; 1301 error = copyout((caddr_t)&valsize, 1302 (caddr_t)uap->avalsize, sizeof (valsize)); 1303 } 1304 fdrop(fp, td); 1305 done2: 1306 mtx_unlock(&Giant); 1307 return (error); 1308 } 1309 1310 /* 1311 * getsockname1() - Get socket name. 1312 * 1313 * MPSAFE 1314 */ 1315 /* ARGSUSED */ 1316 static int 1317 getsockname1(td, uap, compat) 1318 struct thread *td; 1319 register struct getsockname_args /* { 1320 int fdes; 1321 caddr_t asa; 1322 int *alen; 1323 } */ *uap; 1324 int compat; 1325 { 1326 struct file *fp; 1327 register struct socket *so; 1328 struct sockaddr *sa; 1329 int len, error; 1330 1331 mtx_lock(&Giant); 1332 error = holdsock(td->td_proc->p_fd, uap->fdes, &fp); 1333 if (error) 1334 goto done2; 1335 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); 1336 if (error) { 1337 fdrop(fp, td); 1338 goto done2; 1339 } 1340 so = (struct socket *)fp->f_data; 1341 sa = 0; 1342 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1343 if (error) 1344 goto bad; 1345 if (sa == 0) { 1346 len = 0; 1347 goto gotnothing; 1348 } 1349 1350 len = MIN(len, sa->sa_len); 1351 #ifdef COMPAT_OLDSOCK 1352 if (compat) 1353 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1354 #endif 1355 error = copyout(sa, (caddr_t)uap->asa, (u_int)len); 1356 if (error == 0) 1357 gotnothing: 1358 error = copyout((caddr_t)&len, (caddr_t)uap->alen, 1359 sizeof (len)); 1360 bad: 1361 if (sa) 1362 FREE(sa, M_SONAME); 1363 fdrop(fp, td); 1364 done2: 1365 mtx_unlock(&Giant); 1366 return (error); 1367 } 1368 1369 /* 1370 * MPSAFE 1371 */ 1372 int 1373 getsockname(td, uap) 1374 struct thread *td; 1375 struct getsockname_args *uap; 1376 { 1377 1378 return (getsockname1(td, uap, 0)); 1379 } 1380 1381 #ifdef COMPAT_OLDSOCK 1382 /* 1383 * MPSAFE 1384 */ 1385 int 1386 ogetsockname(td, uap) 1387 struct thread *td; 1388 struct getsockname_args *uap; 1389 { 1390 1391 return (getsockname1(td, uap, 1)); 1392 } 1393 #endif /* COMPAT_OLDSOCK */ 1394 1395 /* 1396 * getpeername1() - Get name of peer for connected socket. 1397 * 1398 * MPSAFE 1399 */ 1400 /* ARGSUSED */ 1401 static int 1402 getpeername1(td, uap, compat) 1403 struct thread *td; 1404 register struct getpeername_args /* { 1405 int fdes; 1406 caddr_t asa; 1407 int *alen; 1408 } */ *uap; 1409 int compat; 1410 { 1411 struct file *fp; 1412 register struct socket *so; 1413 struct sockaddr *sa; 1414 int len, error; 1415 1416 mtx_lock(&Giant); 1417 error = holdsock(td->td_proc->p_fd, uap->fdes, &fp); 1418 if (error) 1419 goto done2; 1420 so = (struct socket *)fp->f_data; 1421 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1422 fdrop(fp, td); 1423 error = ENOTCONN; 1424 goto done2; 1425 } 1426 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); 1427 if (error) { 1428 fdrop(fp, td); 1429 goto done2; 1430 } 1431 sa = 0; 1432 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1433 if (error) 1434 goto bad; 1435 if (sa == 0) { 1436 len = 0; 1437 goto gotnothing; 1438 } 1439 len = MIN(len, sa->sa_len); 1440 #ifdef COMPAT_OLDSOCK 1441 if (compat) 1442 ((struct osockaddr *)sa)->sa_family = 1443 sa->sa_family; 1444 #endif 1445 error = copyout(sa, (caddr_t)uap->asa, (u_int)len); 1446 if (error) 1447 goto bad; 1448 gotnothing: 1449 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len)); 1450 bad: 1451 if (sa) 1452 FREE(sa, M_SONAME); 1453 fdrop(fp, td); 1454 done2: 1455 mtx_unlock(&Giant); 1456 return (error); 1457 } 1458 1459 /* 1460 * MPSAFE 1461 */ 1462 int 1463 getpeername(td, uap) 1464 struct thread *td; 1465 struct getpeername_args *uap; 1466 { 1467 1468 return (getpeername1(td, uap, 0)); 1469 } 1470 1471 #ifdef COMPAT_OLDSOCK 1472 /* 1473 * MPSAFE 1474 */ 1475 int 1476 ogetpeername(td, uap) 1477 struct thread *td; 1478 struct ogetpeername_args *uap; 1479 { 1480 1481 /* XXX uap should have type `getpeername_args *' to begin with. */ 1482 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1483 } 1484 #endif /* COMPAT_OLDSOCK */ 1485 1486 int 1487 sockargs(mp, buf, buflen, type) 1488 struct mbuf **mp; 1489 caddr_t buf; 1490 int buflen, type; 1491 { 1492 register struct sockaddr *sa; 1493 register struct mbuf *m; 1494 int error; 1495 1496 if ((u_int)buflen > MLEN) { 1497 #ifdef COMPAT_OLDSOCK 1498 if (type == MT_SONAME && (u_int)buflen <= 112) 1499 buflen = MLEN; /* unix domain compat. hack */ 1500 else 1501 #endif 1502 return (EINVAL); 1503 } 1504 m = m_get(M_TRYWAIT, type); 1505 if (m == NULL) 1506 return (ENOBUFS); 1507 m->m_len = buflen; 1508 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1509 if (error) 1510 (void) m_free(m); 1511 else { 1512 *mp = m; 1513 if (type == MT_SONAME) { 1514 sa = mtod(m, struct sockaddr *); 1515 1516 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1517 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1518 sa->sa_family = sa->sa_len; 1519 #endif 1520 sa->sa_len = buflen; 1521 } 1522 } 1523 return (error); 1524 } 1525 1526 int 1527 getsockaddr(namp, uaddr, len) 1528 struct sockaddr **namp; 1529 caddr_t uaddr; 1530 size_t len; 1531 { 1532 struct sockaddr *sa; 1533 int error; 1534 1535 if (len > SOCK_MAXADDRLEN) 1536 return ENAMETOOLONG; 1537 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1538 error = copyin(uaddr, sa, len); 1539 if (error) { 1540 FREE(sa, M_SONAME); 1541 } else { 1542 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1543 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1544 sa->sa_family = sa->sa_len; 1545 #endif 1546 sa->sa_len = len; 1547 *namp = sa; 1548 } 1549 return error; 1550 } 1551 1552 /* 1553 * holdsock() - load the struct file pointer associated 1554 * with a socket into *fpp. If an error occurs, non-zero 1555 * will be returned and *fpp will be set to NULL. 1556 */ 1557 int 1558 holdsock(fdp, fdes, fpp) 1559 struct filedesc *fdp; 1560 int fdes; 1561 struct file **fpp; 1562 { 1563 register struct file *fp = NULL; 1564 int error = 0; 1565 1566 if ((unsigned)fdes >= fdp->fd_nfiles || 1567 (fp = fdp->fd_ofiles[fdes]) == NULL) { 1568 error = EBADF; 1569 } else if (fp->f_type != DTYPE_SOCKET) { 1570 error = ENOTSOCK; 1571 fp = NULL; 1572 } else { 1573 fhold(fp); 1574 } 1575 *fpp = fp; 1576 return(error); 1577 } 1578 1579 /* 1580 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 1581 * XXX - The sf_buf functions are currently private to sendfile(2), so have 1582 * been made static, but may be useful in the future for doing zero-copy in 1583 * other parts of the networking code. 1584 */ 1585 static void 1586 sf_buf_init(void *arg) 1587 { 1588 int i; 1589 1590 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", MTX_DEF); 1591 mtx_lock(&sf_freelist.sf_lock); 1592 SLIST_INIT(&sf_freelist.sf_head); 1593 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE); 1594 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, 1595 M_NOWAIT | M_ZERO); 1596 for (i = 0; i < nsfbufs; i++) { 1597 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 1598 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); 1599 } 1600 sf_buf_alloc_want = 0; 1601 mtx_unlock(&sf_freelist.sf_lock); 1602 } 1603 1604 /* 1605 * Get an sf_buf from the freelist. Will block if none are available. 1606 */ 1607 static struct sf_buf * 1608 sf_buf_alloc() 1609 { 1610 struct sf_buf *sf; 1611 int error; 1612 1613 mtx_lock(&sf_freelist.sf_lock); 1614 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { 1615 sf_buf_alloc_want++; 1616 error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, 1617 "sfbufa", 0); 1618 sf_buf_alloc_want--; 1619 1620 /* 1621 * If we got a signal, don't risk going back to sleep. 1622 */ 1623 if (error) 1624 break; 1625 } 1626 if (sf != NULL) 1627 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); 1628 mtx_unlock(&sf_freelist.sf_lock); 1629 return (sf); 1630 } 1631 1632 #define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT]) 1633 1634 /* 1635 * Detatch mapped page and release resources back to the system. 1636 */ 1637 static void 1638 sf_buf_free(caddr_t addr, void *args) 1639 { 1640 struct sf_buf *sf; 1641 struct vm_page *m; 1642 1643 GIANT_REQUIRED; 1644 1645 sf = dtosf(addr); 1646 pmap_qremove((vm_offset_t)addr, 1); 1647 m = sf->m; 1648 vm_page_unwire(m, 0); 1649 /* 1650 * Check for the object going away on us. This can 1651 * happen since we don't hold a reference to it. 1652 * If so, we're responsible for freeing the page. 1653 */ 1654 if (m->wire_count == 0 && m->object == NULL) 1655 vm_page_free(m); 1656 sf->m = NULL; 1657 mtx_lock(&sf_freelist.sf_lock); 1658 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); 1659 if (sf_buf_alloc_want > 0) 1660 wakeup_one(&sf_freelist); 1661 mtx_unlock(&sf_freelist.sf_lock); 1662 } 1663 1664 /* 1665 * sendfile(2) 1666 * 1667 * MPSAFE 1668 * 1669 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1670 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1671 * 1672 * Send a file specified by 'fd' and starting at 'offset' to a socket 1673 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1674 * nbytes == 0. Optionally add a header and/or trailer to the socket 1675 * output. If specified, write the total number of bytes sent into *sbytes. 1676 * 1677 */ 1678 int 1679 sendfile(struct thread *td, struct sendfile_args *uap) 1680 { 1681 struct file *fp; 1682 struct filedesc *fdp = td->td_proc->p_fd; 1683 struct vnode *vp; 1684 struct vm_object *obj; 1685 struct socket *so; 1686 struct mbuf *m; 1687 struct sf_buf *sf; 1688 struct vm_page *pg; 1689 struct writev_args nuap; 1690 struct sf_hdtr hdtr; 1691 off_t off, xfsize, sbytes = 0; 1692 int error = 0, s; 1693 1694 mtx_lock(&Giant); 1695 vp = NULL; 1696 /* 1697 * Do argument checking. Must be a regular file in, stream 1698 * type and connected socket out, positive offset. 1699 */ 1700 fp = holdfp(fdp, uap->fd, FREAD); 1701 if (fp == NULL) { 1702 error = EBADF; 1703 goto done; 1704 } 1705 if (fp->f_type != DTYPE_VNODE) { 1706 error = EINVAL; 1707 goto done; 1708 } 1709 vp = (struct vnode *)fp->f_data; 1710 vref(vp); 1711 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1712 error = EINVAL; 1713 goto done; 1714 } 1715 fdrop(fp, td); 1716 error = holdsock(td->td_proc->p_fd, uap->s, &fp); 1717 if (error) 1718 goto done; 1719 so = (struct socket *)fp->f_data; 1720 if (so->so_type != SOCK_STREAM) { 1721 error = EINVAL; 1722 goto done; 1723 } 1724 if ((so->so_state & SS_ISCONNECTED) == 0) { 1725 error = ENOTCONN; 1726 goto done; 1727 } 1728 if (uap->offset < 0) { 1729 error = EINVAL; 1730 goto done; 1731 } 1732 1733 /* 1734 * If specified, get the pointer to the sf_hdtr struct for 1735 * any headers/trailers. 1736 */ 1737 if (uap->hdtr != NULL) { 1738 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1739 if (error) 1740 goto done; 1741 /* 1742 * Send any headers. Wimp out and use writev(2). 1743 */ 1744 if (hdtr.headers != NULL) { 1745 nuap.fd = uap->s; 1746 nuap.iovp = hdtr.headers; 1747 nuap.iovcnt = hdtr.hdr_cnt; 1748 error = writev(td, &nuap); 1749 if (error) 1750 goto done; 1751 sbytes += td->td_retval[0]; 1752 } 1753 } 1754 1755 /* 1756 * Protect against multiple writers to the socket. 1757 */ 1758 (void) sblock(&so->so_snd, M_WAITOK); 1759 1760 /* 1761 * Loop through the pages in the file, starting with the requested 1762 * offset. Get a file page (do I/O if necessary), map the file page 1763 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1764 * it on the socket. 1765 */ 1766 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1767 vm_pindex_t pindex; 1768 vm_offset_t pgoff; 1769 1770 pindex = OFF_TO_IDX(off); 1771 retry_lookup: 1772 /* 1773 * Calculate the amount to transfer. Not to exceed a page, 1774 * the EOF, or the passed in nbytes. 1775 */ 1776 xfsize = obj->un_pager.vnp.vnp_size - off; 1777 if (xfsize > PAGE_SIZE) 1778 xfsize = PAGE_SIZE; 1779 pgoff = (vm_offset_t)(off & PAGE_MASK); 1780 if (PAGE_SIZE - pgoff < xfsize) 1781 xfsize = PAGE_SIZE - pgoff; 1782 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1783 xfsize = uap->nbytes - sbytes; 1784 if (xfsize <= 0) 1785 break; 1786 /* 1787 * Optimize the non-blocking case by looking at the socket space 1788 * before going to the extra work of constituting the sf_buf. 1789 */ 1790 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1791 if (so->so_state & SS_CANTSENDMORE) 1792 error = EPIPE; 1793 else 1794 error = EAGAIN; 1795 sbunlock(&so->so_snd); 1796 goto done; 1797 } 1798 /* 1799 * Attempt to look up the page. 1800 * 1801 * Allocate if not found 1802 * 1803 * Wait and loop if busy. 1804 */ 1805 pg = vm_page_lookup(obj, pindex); 1806 1807 if (pg == NULL) { 1808 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL); 1809 if (pg == NULL) { 1810 VM_WAIT; 1811 goto retry_lookup; 1812 } 1813 vm_page_wakeup(pg); 1814 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) { 1815 goto retry_lookup; 1816 } 1817 1818 /* 1819 * Wire the page so it does not get ripped out from under 1820 * us. 1821 */ 1822 1823 vm_page_wire(pg); 1824 1825 /* 1826 * If page is not valid for what we need, initiate I/O 1827 */ 1828 1829 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1830 struct uio auio; 1831 struct iovec aiov; 1832 int bsize; 1833 1834 /* 1835 * Ensure that our page is still around when the I/O 1836 * completes. 1837 */ 1838 vm_page_io_start(pg); 1839 1840 /* 1841 * Get the page from backing store. 1842 */ 1843 bsize = vp->v_mount->mnt_stat.f_iosize; 1844 auio.uio_iov = &aiov; 1845 auio.uio_iovcnt = 1; 1846 aiov.iov_base = 0; 1847 aiov.iov_len = MAXBSIZE; 1848 auio.uio_resid = MAXBSIZE; 1849 auio.uio_offset = trunc_page(off); 1850 auio.uio_segflg = UIO_NOCOPY; 1851 auio.uio_rw = UIO_READ; 1852 auio.uio_td = td; 1853 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1854 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16), 1855 td->td_proc->p_ucred); 1856 VOP_UNLOCK(vp, 0, td); 1857 vm_page_flag_clear(pg, PG_ZERO); 1858 vm_page_io_finish(pg); 1859 if (error) { 1860 vm_page_unwire(pg, 0); 1861 /* 1862 * See if anyone else might know about this page. 1863 * If not and it is not valid, then free it. 1864 */ 1865 if (pg->wire_count == 0 && pg->valid == 0 && 1866 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1867 pg->hold_count == 0) { 1868 vm_page_busy(pg); 1869 vm_page_free(pg); 1870 } 1871 sbunlock(&so->so_snd); 1872 goto done; 1873 } 1874 } 1875 1876 1877 /* 1878 * Get a sendfile buf. We usually wait as long as necessary, 1879 * but this wait can be interrupted. 1880 */ 1881 if ((sf = sf_buf_alloc()) == NULL) { 1882 vm_page_unwire(pg, 0); 1883 if (pg->wire_count == 0 && pg->object == NULL) 1884 vm_page_free(pg); 1885 sbunlock(&so->so_snd); 1886 error = EINTR; 1887 goto done; 1888 } 1889 1890 /* 1891 * Allocate a kernel virtual page and insert the physical page 1892 * into it. 1893 */ 1894 sf->m = pg; 1895 pmap_qenter(sf->kva, &pg, 1); 1896 /* 1897 * Get an mbuf header and set it up as having external storage. 1898 */ 1899 MGETHDR(m, M_TRYWAIT, MT_DATA); 1900 if (m == NULL) { 1901 error = ENOBUFS; 1902 sf_buf_free((void *)sf->kva, NULL); 1903 sbunlock(&so->so_snd); 1904 goto done; 1905 } 1906 /* 1907 * Setup external storage for mbuf. 1908 */ 1909 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY, 1910 EXT_SFBUF); 1911 m->m_data = (char *) sf->kva + pgoff; 1912 m->m_pkthdr.len = m->m_len = xfsize; 1913 /* 1914 * Add the buffer to the socket buffer chain. 1915 */ 1916 s = splnet(); 1917 retry_space: 1918 /* 1919 * Make sure that the socket is still able to take more data. 1920 * CANTSENDMORE being true usually means that the connection 1921 * was closed. so_error is true when an error was sensed after 1922 * a previous send. 1923 * The state is checked after the page mapping and buffer 1924 * allocation above since those operations may block and make 1925 * any socket checks stale. From this point forward, nothing 1926 * blocks before the pru_send (or more accurately, any blocking 1927 * results in a loop back to here to re-check). 1928 */ 1929 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1930 if (so->so_state & SS_CANTSENDMORE) { 1931 error = EPIPE; 1932 } else { 1933 error = so->so_error; 1934 so->so_error = 0; 1935 } 1936 m_freem(m); 1937 sbunlock(&so->so_snd); 1938 splx(s); 1939 goto done; 1940 } 1941 /* 1942 * Wait for socket space to become available. We do this just 1943 * after checking the connection state above in order to avoid 1944 * a race condition with sbwait(). 1945 */ 1946 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1947 if (so->so_state & SS_NBIO) { 1948 m_freem(m); 1949 sbunlock(&so->so_snd); 1950 splx(s); 1951 error = EAGAIN; 1952 goto done; 1953 } 1954 error = sbwait(&so->so_snd); 1955 /* 1956 * An error from sbwait usually indicates that we've 1957 * been interrupted by a signal. If we've sent anything 1958 * then return bytes sent, otherwise return the error. 1959 */ 1960 if (error) { 1961 m_freem(m); 1962 sbunlock(&so->so_snd); 1963 splx(s); 1964 goto done; 1965 } 1966 goto retry_space; 1967 } 1968 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 1969 splx(s); 1970 if (error) { 1971 sbunlock(&so->so_snd); 1972 goto done; 1973 } 1974 } 1975 sbunlock(&so->so_snd); 1976 1977 /* 1978 * Send trailers. Wimp out and use writev(2). 1979 */ 1980 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1981 nuap.fd = uap->s; 1982 nuap.iovp = hdtr.trailers; 1983 nuap.iovcnt = hdtr.trl_cnt; 1984 error = writev(td, &nuap); 1985 if (error) 1986 goto done; 1987 sbytes += td->td_retval[0]; 1988 } 1989 1990 done: 1991 /* 1992 * If there was no error we have to clear td->td_retval[0] 1993 * because it may have been set by writev. 1994 */ 1995 if (error == 0) { 1996 td->td_retval[0] = 0; 1997 } 1998 if (uap->sbytes != NULL) { 1999 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2000 } 2001 if (vp) 2002 vrele(vp); 2003 if (fp) 2004 fdrop(fp, td); 2005 mtx_unlock(&Giant); 2006 return (error); 2007 } 2008