1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 * $FreeBSD$ 38 */ 39 40 #include "opt_compat.h" 41 #include "opt_ktrace.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/sysproto.h> 47 #include <sys/malloc.h> 48 #include <sys/filedesc.h> 49 #include <sys/event.h> 50 #include <sys/proc.h> 51 #include <sys/fcntl.h> 52 #include <sys/file.h> 53 #include <sys/mbuf.h> 54 #include <sys/protosw.h> 55 #include <sys/socket.h> 56 #include <sys/socketvar.h> 57 #include <sys/signalvar.h> 58 #include <sys/uio.h> 59 #include <sys/vnode.h> 60 #include <sys/lock.h> 61 #include <sys/mount.h> 62 #ifdef KTRACE 63 #include <sys/ktrace.h> 64 #endif 65 #include <vm/vm.h> 66 #include <vm/vm_object.h> 67 #include <vm/vm_page.h> 68 #include <vm/vm_pageout.h> 69 #include <vm/vm_kern.h> 70 #include <vm/vm_extern.h> 71 72 static void sf_buf_init(void *arg); 73 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) 74 static struct sf_buf *sf_buf_alloc(void); 75 static void sf_buf_free(caddr_t addr, void *args); 76 77 static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags)); 78 static int recvit __P((struct proc *p, int s, struct msghdr *mp, 79 caddr_t namelenp)); 80 81 static int accept1 __P((struct proc *p, struct accept_args *uap, int compat)); 82 static int getsockname1 __P((struct proc *p, struct getsockname_args *uap, 83 int compat)); 84 static int getpeername1 __P((struct proc *p, struct getpeername_args *uap, 85 int compat)); 86 87 /* 88 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the 89 * additional sf_lock mutex. 90 */ 91 static struct { 92 struct sf_buf *slh_first; 93 struct mtx sf_lock; 94 } sf_freelist; 95 96 static vm_offset_t sf_base; 97 static struct sf_buf *sf_bufs; 98 static u_int sf_buf_alloc_want; 99 100 /* 101 * System call interface to the socket abstraction. 102 */ 103 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 104 #define COMPAT_OLDSOCK 105 #endif 106 107 extern struct fileops socketops; 108 109 int 110 socket(p, uap) 111 struct proc *p; 112 register struct socket_args /* { 113 int domain; 114 int type; 115 int protocol; 116 } */ *uap; 117 { 118 struct filedesc *fdp = p->p_fd; 119 struct socket *so; 120 struct file *fp; 121 int fd, error; 122 123 error = falloc(p, &fp, &fd); 124 if (error) 125 return (error); 126 fhold(fp); 127 error = socreate(uap->domain, &so, uap->type, uap->protocol, p); 128 if (error) { 129 if (fdp->fd_ofiles[fd] == fp) { 130 fdp->fd_ofiles[fd] = NULL; 131 fdrop(fp, p); 132 } 133 } else { 134 fp->f_data = (caddr_t)so; 135 fp->f_flag = FREAD|FWRITE; 136 fp->f_ops = &socketops; 137 fp->f_type = DTYPE_SOCKET; 138 p->p_retval[0] = fd; 139 } 140 fdrop(fp, p); 141 return (error); 142 } 143 144 /* ARGSUSED */ 145 int 146 bind(p, uap) 147 struct proc *p; 148 register struct bind_args /* { 149 int s; 150 caddr_t name; 151 int namelen; 152 } */ *uap; 153 { 154 struct file *fp; 155 struct sockaddr *sa; 156 int error; 157 158 error = holdsock(p->p_fd, uap->s, &fp); 159 if (error) 160 return (error); 161 error = getsockaddr(&sa, uap->name, uap->namelen); 162 if (error) { 163 fdrop(fp, p); 164 return (error); 165 } 166 error = sobind((struct socket *)fp->f_data, sa, p); 167 FREE(sa, M_SONAME); 168 fdrop(fp, p); 169 return (error); 170 } 171 172 /* ARGSUSED */ 173 int 174 listen(p, uap) 175 struct proc *p; 176 register struct listen_args /* { 177 int s; 178 int backlog; 179 } */ *uap; 180 { 181 struct file *fp; 182 int error; 183 184 error = holdsock(p->p_fd, uap->s, &fp); 185 if (error) 186 return (error); 187 error = solisten((struct socket *)fp->f_data, uap->backlog, p); 188 fdrop(fp, p); 189 return(error); 190 } 191 192 static int 193 accept1(p, uap, compat) 194 struct proc *p; 195 register struct accept_args /* { 196 int s; 197 caddr_t name; 198 int *anamelen; 199 } */ *uap; 200 int compat; 201 { 202 struct filedesc *fdp = p->p_fd; 203 struct file *lfp = NULL; 204 struct file *nfp = NULL; 205 struct sockaddr *sa; 206 int namelen, error, s; 207 struct socket *head, *so; 208 int fd; 209 short fflag; /* type must match fp->f_flag */ 210 211 if (uap->name) { 212 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen, 213 sizeof (namelen)); 214 if(error) 215 return (error); 216 } 217 error = holdsock(fdp, uap->s, &lfp); 218 if (error) 219 return (error); 220 s = splnet(); 221 head = (struct socket *)lfp->f_data; 222 if ((head->so_options & SO_ACCEPTCONN) == 0) { 223 splx(s); 224 error = EINVAL; 225 goto done; 226 } 227 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 228 splx(s); 229 error = EWOULDBLOCK; 230 goto done; 231 } 232 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 233 if (head->so_state & SS_CANTRCVMORE) { 234 head->so_error = ECONNABORTED; 235 break; 236 } 237 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH, 238 "accept", 0); 239 if (error) { 240 splx(s); 241 goto done; 242 } 243 } 244 if (head->so_error) { 245 error = head->so_error; 246 head->so_error = 0; 247 splx(s); 248 goto done; 249 } 250 251 /* 252 * At this point we know that there is at least one connection 253 * ready to be accepted. Remove it from the queue prior to 254 * allocating the file descriptor for it since falloc() may 255 * block allowing another process to accept the connection 256 * instead. 257 */ 258 so = TAILQ_FIRST(&head->so_comp); 259 TAILQ_REMOVE(&head->so_comp, so, so_list); 260 head->so_qlen--; 261 262 fflag = lfp->f_flag; 263 error = falloc(p, &nfp, &fd); 264 if (error) { 265 /* 266 * Probably ran out of file descriptors. Put the 267 * unaccepted connection back onto the queue and 268 * do another wakeup so some other process might 269 * have a chance at it. 270 */ 271 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 272 head->so_qlen++; 273 wakeup_one(&head->so_timeo); 274 splx(s); 275 goto done; 276 } 277 fhold(nfp); 278 p->p_retval[0] = fd; 279 280 /* connection has been removed from the listen queue */ 281 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 282 283 so->so_state &= ~SS_COMP; 284 so->so_head = NULL; 285 if (head->so_sigio != NULL) 286 fsetown(fgetown(head->so_sigio), &so->so_sigio); 287 288 nfp->f_data = (caddr_t)so; 289 nfp->f_flag = fflag; 290 nfp->f_ops = &socketops; 291 nfp->f_type = DTYPE_SOCKET; 292 sa = 0; 293 (void) soaccept(so, &sa); 294 if (sa == NULL) { 295 namelen = 0; 296 if (uap->name) 297 goto gotnoname; 298 splx(s); 299 error = 0; 300 goto done; 301 } 302 if (uap->name) { 303 /* check sa_len before it is destroyed */ 304 if (namelen > sa->sa_len) 305 namelen = sa->sa_len; 306 #ifdef COMPAT_OLDSOCK 307 if (compat) 308 ((struct osockaddr *)sa)->sa_family = 309 sa->sa_family; 310 #endif 311 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen); 312 if (!error) 313 gotnoname: 314 error = copyout((caddr_t)&namelen, 315 (caddr_t)uap->anamelen, sizeof (*uap->anamelen)); 316 } 317 if (sa) 318 FREE(sa, M_SONAME); 319 320 /* 321 * close the new descriptor, assuming someone hasn't ripped it 322 * out from under us. 323 */ 324 if (error) { 325 if (fdp->fd_ofiles[fd] == nfp) { 326 fdp->fd_ofiles[fd] = NULL; 327 fdrop(nfp, p); 328 } 329 } 330 splx(s); 331 332 /* 333 * Release explicitly held references before returning. 334 */ 335 done: 336 if (nfp != NULL) 337 fdrop(nfp, p); 338 fdrop(lfp, p); 339 return (error); 340 } 341 342 int 343 accept(p, uap) 344 struct proc *p; 345 struct accept_args *uap; 346 { 347 348 return (accept1(p, uap, 0)); 349 } 350 351 #ifdef COMPAT_OLDSOCK 352 int 353 oaccept(p, uap) 354 struct proc *p; 355 struct accept_args *uap; 356 { 357 358 return (accept1(p, uap, 1)); 359 } 360 #endif /* COMPAT_OLDSOCK */ 361 362 /* ARGSUSED */ 363 int 364 connect(p, uap) 365 struct proc *p; 366 register struct connect_args /* { 367 int s; 368 caddr_t name; 369 int namelen; 370 } */ *uap; 371 { 372 struct file *fp; 373 register struct socket *so; 374 struct sockaddr *sa; 375 int error, s; 376 377 error = holdsock(p->p_fd, uap->s, &fp); 378 if (error) 379 return (error); 380 so = (struct socket *)fp->f_data; 381 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 382 error = EALREADY; 383 goto done; 384 } 385 error = getsockaddr(&sa, uap->name, uap->namelen); 386 if (error) 387 goto done; 388 error = soconnect(so, sa, p); 389 if (error) 390 goto bad; 391 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 392 FREE(sa, M_SONAME); 393 error = EINPROGRESS; 394 goto done; 395 } 396 s = splnet(); 397 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 398 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, 399 "connec", 0); 400 if (error) 401 break; 402 } 403 if (error == 0) { 404 error = so->so_error; 405 so->so_error = 0; 406 } 407 splx(s); 408 bad: 409 so->so_state &= ~SS_ISCONNECTING; 410 FREE(sa, M_SONAME); 411 if (error == ERESTART) 412 error = EINTR; 413 done: 414 fdrop(fp, p); 415 return (error); 416 } 417 418 int 419 socketpair(p, uap) 420 struct proc *p; 421 register struct socketpair_args /* { 422 int domain; 423 int type; 424 int protocol; 425 int *rsv; 426 } */ *uap; 427 { 428 register struct filedesc *fdp = p->p_fd; 429 struct file *fp1, *fp2; 430 struct socket *so1, *so2; 431 int fd, error, sv[2]; 432 433 error = socreate(uap->domain, &so1, uap->type, uap->protocol, p); 434 if (error) 435 return (error); 436 error = socreate(uap->domain, &so2, uap->type, uap->protocol, p); 437 if (error) 438 goto free1; 439 error = falloc(p, &fp1, &fd); 440 if (error) 441 goto free2; 442 fhold(fp1); 443 sv[0] = fd; 444 fp1->f_data = (caddr_t)so1; 445 error = falloc(p, &fp2, &fd); 446 if (error) 447 goto free3; 448 fhold(fp2); 449 fp2->f_data = (caddr_t)so2; 450 sv[1] = fd; 451 error = soconnect2(so1, so2); 452 if (error) 453 goto free4; 454 if (uap->type == SOCK_DGRAM) { 455 /* 456 * Datagram socket connection is asymmetric. 457 */ 458 error = soconnect2(so2, so1); 459 if (error) 460 goto free4; 461 } 462 fp1->f_flag = fp2->f_flag = FREAD|FWRITE; 463 fp1->f_ops = fp2->f_ops = &socketops; 464 fp1->f_type = fp2->f_type = DTYPE_SOCKET; 465 error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int)); 466 fdrop(fp1, p); 467 fdrop(fp2, p); 468 return (error); 469 free4: 470 if (fdp->fd_ofiles[sv[1]] == fp2) { 471 fdp->fd_ofiles[sv[1]] = NULL; 472 fdrop(fp2, p); 473 } 474 fdrop(fp2, p); 475 free3: 476 if (fdp->fd_ofiles[sv[0]] == fp1) { 477 fdp->fd_ofiles[sv[0]] = NULL; 478 fdrop(fp1, p); 479 } 480 fdrop(fp1, p); 481 free2: 482 (void)soclose(so2); 483 free1: 484 (void)soclose(so1); 485 return (error); 486 } 487 488 static int 489 sendit(p, s, mp, flags) 490 register struct proc *p; 491 int s; 492 register struct msghdr *mp; 493 int flags; 494 { 495 struct file *fp; 496 struct uio auio; 497 register struct iovec *iov; 498 register int i; 499 struct mbuf *control; 500 struct sockaddr *to; 501 int len, error; 502 struct socket *so; 503 #ifdef KTRACE 504 struct iovec *ktriov = NULL; 505 struct uio ktruio; 506 #endif 507 508 error = holdsock(p->p_fd, s, &fp); 509 if (error) 510 return (error); 511 auio.uio_iov = mp->msg_iov; 512 auio.uio_iovcnt = mp->msg_iovlen; 513 auio.uio_segflg = UIO_USERSPACE; 514 auio.uio_rw = UIO_WRITE; 515 auio.uio_procp = p; 516 auio.uio_offset = 0; /* XXX */ 517 auio.uio_resid = 0; 518 iov = mp->msg_iov; 519 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 520 if ((auio.uio_resid += iov->iov_len) < 0) { 521 fdrop(fp, p); 522 return (EINVAL); 523 } 524 } 525 if (mp->msg_name) { 526 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 527 if (error) { 528 fdrop(fp, p); 529 return (error); 530 } 531 } else { 532 to = 0; 533 } 534 if (mp->msg_control) { 535 if (mp->msg_controllen < sizeof(struct cmsghdr) 536 #ifdef COMPAT_OLDSOCK 537 && mp->msg_flags != MSG_COMPAT 538 #endif 539 ) { 540 error = EINVAL; 541 goto bad; 542 } 543 error = sockargs(&control, mp->msg_control, 544 mp->msg_controllen, MT_CONTROL); 545 if (error) 546 goto bad; 547 #ifdef COMPAT_OLDSOCK 548 if (mp->msg_flags == MSG_COMPAT) { 549 register struct cmsghdr *cm; 550 551 M_PREPEND(control, sizeof(*cm), M_WAIT); 552 if (control == 0) { 553 error = ENOBUFS; 554 goto bad; 555 } else { 556 cm = mtod(control, struct cmsghdr *); 557 cm->cmsg_len = control->m_len; 558 cm->cmsg_level = SOL_SOCKET; 559 cm->cmsg_type = SCM_RIGHTS; 560 } 561 } 562 #endif 563 } else { 564 control = 0; 565 } 566 #ifdef KTRACE 567 if (KTRPOINT(p, KTR_GENIO)) { 568 int iovlen = auio.uio_iovcnt * sizeof (struct iovec); 569 570 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 571 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 572 ktruio = auio; 573 } 574 #endif 575 len = auio.uio_resid; 576 so = (struct socket *)fp->f_data; 577 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control, 578 flags, p); 579 if (error) { 580 if (auio.uio_resid != len && (error == ERESTART || 581 error == EINTR || error == EWOULDBLOCK)) 582 error = 0; 583 if (error == EPIPE) 584 psignal(p, SIGPIPE); 585 } 586 if (error == 0) 587 p->p_retval[0] = len - auio.uio_resid; 588 #ifdef KTRACE 589 if (ktriov != NULL) { 590 if (error == 0) { 591 ktruio.uio_iov = ktriov; 592 ktruio.uio_resid = p->p_retval[0]; 593 ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error); 594 } 595 FREE(ktriov, M_TEMP); 596 } 597 #endif 598 bad: 599 fdrop(fp, p); 600 if (to) 601 FREE(to, M_SONAME); 602 return (error); 603 } 604 605 int 606 sendto(p, uap) 607 struct proc *p; 608 register struct sendto_args /* { 609 int s; 610 caddr_t buf; 611 size_t len; 612 int flags; 613 caddr_t to; 614 int tolen; 615 } */ *uap; 616 { 617 struct msghdr msg; 618 struct iovec aiov; 619 620 msg.msg_name = uap->to; 621 msg.msg_namelen = uap->tolen; 622 msg.msg_iov = &aiov; 623 msg.msg_iovlen = 1; 624 msg.msg_control = 0; 625 #ifdef COMPAT_OLDSOCK 626 msg.msg_flags = 0; 627 #endif 628 aiov.iov_base = uap->buf; 629 aiov.iov_len = uap->len; 630 return (sendit(p, uap->s, &msg, uap->flags)); 631 } 632 633 #ifdef COMPAT_OLDSOCK 634 int 635 osend(p, uap) 636 struct proc *p; 637 register struct osend_args /* { 638 int s; 639 caddr_t buf; 640 int len; 641 int flags; 642 } */ *uap; 643 { 644 struct msghdr msg; 645 struct iovec aiov; 646 647 msg.msg_name = 0; 648 msg.msg_namelen = 0; 649 msg.msg_iov = &aiov; 650 msg.msg_iovlen = 1; 651 aiov.iov_base = uap->buf; 652 aiov.iov_len = uap->len; 653 msg.msg_control = 0; 654 msg.msg_flags = 0; 655 return (sendit(p, uap->s, &msg, uap->flags)); 656 } 657 658 int 659 osendmsg(p, uap) 660 struct proc *p; 661 register struct osendmsg_args /* { 662 int s; 663 caddr_t msg; 664 int flags; 665 } */ *uap; 666 { 667 struct msghdr msg; 668 struct iovec aiov[UIO_SMALLIOV], *iov; 669 int error; 670 671 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr)); 672 if (error) 673 return (error); 674 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 675 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 676 return (EMSGSIZE); 677 MALLOC(iov, struct iovec *, 678 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 679 M_WAITOK); 680 } else 681 iov = aiov; 682 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 683 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 684 if (error) 685 goto done; 686 msg.msg_flags = MSG_COMPAT; 687 msg.msg_iov = iov; 688 error = sendit(p, uap->s, &msg, uap->flags); 689 done: 690 if (iov != aiov) 691 FREE(iov, M_IOV); 692 return (error); 693 } 694 #endif 695 696 int 697 sendmsg(p, uap) 698 struct proc *p; 699 register struct sendmsg_args /* { 700 int s; 701 caddr_t msg; 702 int flags; 703 } */ *uap; 704 { 705 struct msghdr msg; 706 struct iovec aiov[UIO_SMALLIOV], *iov; 707 int error; 708 709 error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg)); 710 if (error) 711 return (error); 712 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 713 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 714 return (EMSGSIZE); 715 MALLOC(iov, struct iovec *, 716 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 717 M_WAITOK); 718 } else 719 iov = aiov; 720 if (msg.msg_iovlen && 721 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 722 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 723 goto done; 724 msg.msg_iov = iov; 725 #ifdef COMPAT_OLDSOCK 726 msg.msg_flags = 0; 727 #endif 728 error = sendit(p, uap->s, &msg, uap->flags); 729 done: 730 if (iov != aiov) 731 FREE(iov, M_IOV); 732 return (error); 733 } 734 735 static int 736 recvit(p, s, mp, namelenp) 737 register struct proc *p; 738 int s; 739 register struct msghdr *mp; 740 caddr_t namelenp; 741 { 742 struct file *fp; 743 struct uio auio; 744 register struct iovec *iov; 745 register int i; 746 int len, error; 747 struct mbuf *m, *control = 0; 748 caddr_t ctlbuf; 749 struct socket *so; 750 struct sockaddr *fromsa = 0; 751 #ifdef KTRACE 752 struct iovec *ktriov = NULL; 753 struct uio ktruio; 754 #endif 755 756 error = holdsock(p->p_fd, s, &fp); 757 if (error) 758 return (error); 759 auio.uio_iov = mp->msg_iov; 760 auio.uio_iovcnt = mp->msg_iovlen; 761 auio.uio_segflg = UIO_USERSPACE; 762 auio.uio_rw = UIO_READ; 763 auio.uio_procp = p; 764 auio.uio_offset = 0; /* XXX */ 765 auio.uio_resid = 0; 766 iov = mp->msg_iov; 767 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 768 if ((auio.uio_resid += iov->iov_len) < 0) { 769 fdrop(fp, p); 770 return (EINVAL); 771 } 772 } 773 #ifdef KTRACE 774 if (KTRPOINT(p, KTR_GENIO)) { 775 int iovlen = auio.uio_iovcnt * sizeof (struct iovec); 776 777 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 778 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 779 ktruio = auio; 780 } 781 #endif 782 len = auio.uio_resid; 783 so = (struct socket *)fp->f_data; 784 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 785 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 786 &mp->msg_flags); 787 if (error) { 788 if (auio.uio_resid != len && (error == ERESTART || 789 error == EINTR || error == EWOULDBLOCK)) 790 error = 0; 791 } 792 #ifdef KTRACE 793 if (ktriov != NULL) { 794 if (error == 0) { 795 ktruio.uio_iov = ktriov; 796 ktruio.uio_resid = len - auio.uio_resid; 797 ktrgenio(p->p_tracep, s, UIO_READ, &ktruio, error); 798 } 799 FREE(ktriov, M_TEMP); 800 } 801 #endif 802 if (error) 803 goto out; 804 p->p_retval[0] = len - auio.uio_resid; 805 if (mp->msg_name) { 806 len = mp->msg_namelen; 807 if (len <= 0 || fromsa == 0) 808 len = 0; 809 else { 810 #ifndef MIN 811 #define MIN(a,b) ((a)>(b)?(b):(a)) 812 #endif 813 /* save sa_len before it is destroyed by MSG_COMPAT */ 814 len = MIN(len, fromsa->sa_len); 815 #ifdef COMPAT_OLDSOCK 816 if (mp->msg_flags & MSG_COMPAT) 817 ((struct osockaddr *)fromsa)->sa_family = 818 fromsa->sa_family; 819 #endif 820 error = copyout(fromsa, 821 (caddr_t)mp->msg_name, (unsigned)len); 822 if (error) 823 goto out; 824 } 825 mp->msg_namelen = len; 826 if (namelenp && 827 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) { 828 #ifdef COMPAT_OLDSOCK 829 if (mp->msg_flags & MSG_COMPAT) 830 error = 0; /* old recvfrom didn't check */ 831 else 832 #endif 833 goto out; 834 } 835 } 836 if (mp->msg_control) { 837 #ifdef COMPAT_OLDSOCK 838 /* 839 * We assume that old recvmsg calls won't receive access 840 * rights and other control info, esp. as control info 841 * is always optional and those options didn't exist in 4.3. 842 * If we receive rights, trim the cmsghdr; anything else 843 * is tossed. 844 */ 845 if (control && mp->msg_flags & MSG_COMPAT) { 846 if (mtod(control, struct cmsghdr *)->cmsg_level != 847 SOL_SOCKET || 848 mtod(control, struct cmsghdr *)->cmsg_type != 849 SCM_RIGHTS) { 850 mp->msg_controllen = 0; 851 goto out; 852 } 853 control->m_len -= sizeof (struct cmsghdr); 854 control->m_data += sizeof (struct cmsghdr); 855 } 856 #endif 857 len = mp->msg_controllen; 858 m = control; 859 mp->msg_controllen = 0; 860 ctlbuf = (caddr_t) mp->msg_control; 861 862 while (m && len > 0) { 863 unsigned int tocopy; 864 865 if (len >= m->m_len) 866 tocopy = m->m_len; 867 else { 868 mp->msg_flags |= MSG_CTRUNC; 869 tocopy = len; 870 } 871 872 if ((error = copyout((caddr_t)mtod(m, caddr_t), 873 ctlbuf, tocopy)) != 0) 874 goto out; 875 876 ctlbuf += tocopy; 877 len -= tocopy; 878 m = m->m_next; 879 } 880 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 881 } 882 out: 883 fdrop(fp, p); 884 if (fromsa) 885 FREE(fromsa, M_SONAME); 886 if (control) 887 m_freem(control); 888 return (error); 889 } 890 891 int 892 recvfrom(p, uap) 893 struct proc *p; 894 register struct recvfrom_args /* { 895 int s; 896 caddr_t buf; 897 size_t len; 898 int flags; 899 caddr_t from; 900 int *fromlenaddr; 901 } */ *uap; 902 { 903 struct msghdr msg; 904 struct iovec aiov; 905 int error; 906 907 if (uap->fromlenaddr) { 908 error = copyin((caddr_t)uap->fromlenaddr, 909 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen)); 910 if (error) 911 return (error); 912 } else 913 msg.msg_namelen = 0; 914 msg.msg_name = uap->from; 915 msg.msg_iov = &aiov; 916 msg.msg_iovlen = 1; 917 aiov.iov_base = uap->buf; 918 aiov.iov_len = uap->len; 919 msg.msg_control = 0; 920 msg.msg_flags = uap->flags; 921 return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr)); 922 } 923 924 #ifdef COMPAT_OLDSOCK 925 int 926 orecvfrom(p, uap) 927 struct proc *p; 928 struct recvfrom_args *uap; 929 { 930 931 uap->flags |= MSG_COMPAT; 932 return (recvfrom(p, uap)); 933 } 934 #endif 935 936 937 #ifdef COMPAT_OLDSOCK 938 int 939 orecv(p, uap) 940 struct proc *p; 941 register struct orecv_args /* { 942 int s; 943 caddr_t buf; 944 int len; 945 int flags; 946 } */ *uap; 947 { 948 struct msghdr msg; 949 struct iovec aiov; 950 951 msg.msg_name = 0; 952 msg.msg_namelen = 0; 953 msg.msg_iov = &aiov; 954 msg.msg_iovlen = 1; 955 aiov.iov_base = uap->buf; 956 aiov.iov_len = uap->len; 957 msg.msg_control = 0; 958 msg.msg_flags = uap->flags; 959 return (recvit(p, uap->s, &msg, (caddr_t)0)); 960 } 961 962 /* 963 * Old recvmsg. This code takes advantage of the fact that the old msghdr 964 * overlays the new one, missing only the flags, and with the (old) access 965 * rights where the control fields are now. 966 */ 967 int 968 orecvmsg(p, uap) 969 struct proc *p; 970 register struct orecvmsg_args /* { 971 int s; 972 struct omsghdr *msg; 973 int flags; 974 } */ *uap; 975 { 976 struct msghdr msg; 977 struct iovec aiov[UIO_SMALLIOV], *iov; 978 int error; 979 980 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, 981 sizeof (struct omsghdr)); 982 if (error) 983 return (error); 984 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 985 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 986 return (EMSGSIZE); 987 MALLOC(iov, struct iovec *, 988 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 989 M_WAITOK); 990 } else 991 iov = aiov; 992 msg.msg_flags = uap->flags | MSG_COMPAT; 993 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 994 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 995 if (error) 996 goto done; 997 msg.msg_iov = iov; 998 error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen); 999 1000 if (msg.msg_controllen && error == 0) 1001 error = copyout((caddr_t)&msg.msg_controllen, 1002 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int)); 1003 done: 1004 if (iov != aiov) 1005 FREE(iov, M_IOV); 1006 return (error); 1007 } 1008 #endif 1009 1010 int 1011 recvmsg(p, uap) 1012 struct proc *p; 1013 register struct recvmsg_args /* { 1014 int s; 1015 struct msghdr *msg; 1016 int flags; 1017 } */ *uap; 1018 { 1019 struct msghdr msg; 1020 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1021 register int error; 1022 1023 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg)); 1024 if (error) 1025 return (error); 1026 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1027 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 1028 return (EMSGSIZE); 1029 MALLOC(iov, struct iovec *, 1030 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1031 M_WAITOK); 1032 } else 1033 iov = aiov; 1034 #ifdef COMPAT_OLDSOCK 1035 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1036 #else 1037 msg.msg_flags = uap->flags; 1038 #endif 1039 uiov = msg.msg_iov; 1040 msg.msg_iov = iov; 1041 error = copyin((caddr_t)uiov, (caddr_t)iov, 1042 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1043 if (error) 1044 goto done; 1045 error = recvit(p, uap->s, &msg, (caddr_t)0); 1046 if (!error) { 1047 msg.msg_iov = uiov; 1048 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg)); 1049 } 1050 done: 1051 if (iov != aiov) 1052 FREE(iov, M_IOV); 1053 return (error); 1054 } 1055 1056 /* ARGSUSED */ 1057 int 1058 shutdown(p, uap) 1059 struct proc *p; 1060 register struct shutdown_args /* { 1061 int s; 1062 int how; 1063 } */ *uap; 1064 { 1065 struct file *fp; 1066 int error; 1067 1068 error = holdsock(p->p_fd, uap->s, &fp); 1069 if (error) 1070 return (error); 1071 error = soshutdown((struct socket *)fp->f_data, uap->how); 1072 fdrop(fp, p); 1073 return(error); 1074 } 1075 1076 /* ARGSUSED */ 1077 int 1078 setsockopt(p, uap) 1079 struct proc *p; 1080 register struct setsockopt_args /* { 1081 int s; 1082 int level; 1083 int name; 1084 caddr_t val; 1085 int valsize; 1086 } */ *uap; 1087 { 1088 struct file *fp; 1089 struct sockopt sopt; 1090 int error; 1091 1092 if (uap->val == 0 && uap->valsize != 0) 1093 return (EFAULT); 1094 if (uap->valsize < 0) 1095 return (EINVAL); 1096 1097 error = holdsock(p->p_fd, uap->s, &fp); 1098 if (error) 1099 return (error); 1100 1101 sopt.sopt_dir = SOPT_SET; 1102 sopt.sopt_level = uap->level; 1103 sopt.sopt_name = uap->name; 1104 sopt.sopt_val = uap->val; 1105 sopt.sopt_valsize = uap->valsize; 1106 sopt.sopt_p = p; 1107 error = sosetopt((struct socket *)fp->f_data, &sopt); 1108 fdrop(fp, p); 1109 return(error); 1110 } 1111 1112 /* ARGSUSED */ 1113 int 1114 getsockopt(p, uap) 1115 struct proc *p; 1116 register struct getsockopt_args /* { 1117 int s; 1118 int level; 1119 int name; 1120 caddr_t val; 1121 int *avalsize; 1122 } */ *uap; 1123 { 1124 int valsize, error; 1125 struct file *fp; 1126 struct sockopt sopt; 1127 1128 error = holdsock(p->p_fd, uap->s, &fp); 1129 if (error) 1130 return (error); 1131 if (uap->val) { 1132 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize, 1133 sizeof (valsize)); 1134 if (error) { 1135 fdrop(fp, p); 1136 return (error); 1137 } 1138 if (valsize < 0) { 1139 fdrop(fp, p); 1140 return (EINVAL); 1141 } 1142 } else { 1143 valsize = 0; 1144 } 1145 1146 sopt.sopt_dir = SOPT_GET; 1147 sopt.sopt_level = uap->level; 1148 sopt.sopt_name = uap->name; 1149 sopt.sopt_val = uap->val; 1150 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1151 sopt.sopt_p = p; 1152 1153 error = sogetopt((struct socket *)fp->f_data, &sopt); 1154 if (error == 0) { 1155 valsize = sopt.sopt_valsize; 1156 error = copyout((caddr_t)&valsize, 1157 (caddr_t)uap->avalsize, sizeof (valsize)); 1158 } 1159 fdrop(fp, p); 1160 return (error); 1161 } 1162 1163 /* 1164 * Get socket name. 1165 */ 1166 /* ARGSUSED */ 1167 static int 1168 getsockname1(p, uap, compat) 1169 struct proc *p; 1170 register struct getsockname_args /* { 1171 int fdes; 1172 caddr_t asa; 1173 int *alen; 1174 } */ *uap; 1175 int compat; 1176 { 1177 struct file *fp; 1178 register struct socket *so; 1179 struct sockaddr *sa; 1180 int len, error; 1181 1182 error = holdsock(p->p_fd, uap->fdes, &fp); 1183 if (error) 1184 return (error); 1185 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); 1186 if (error) { 1187 fdrop(fp, p); 1188 return (error); 1189 } 1190 so = (struct socket *)fp->f_data; 1191 sa = 0; 1192 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1193 if (error) 1194 goto bad; 1195 if (sa == 0) { 1196 len = 0; 1197 goto gotnothing; 1198 } 1199 1200 len = MIN(len, sa->sa_len); 1201 #ifdef COMPAT_OLDSOCK 1202 if (compat) 1203 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1204 #endif 1205 error = copyout(sa, (caddr_t)uap->asa, (u_int)len); 1206 if (error == 0) 1207 gotnothing: 1208 error = copyout((caddr_t)&len, (caddr_t)uap->alen, 1209 sizeof (len)); 1210 bad: 1211 if (sa) 1212 FREE(sa, M_SONAME); 1213 fdrop(fp, p); 1214 return (error); 1215 } 1216 1217 int 1218 getsockname(p, uap) 1219 struct proc *p; 1220 struct getsockname_args *uap; 1221 { 1222 1223 return (getsockname1(p, uap, 0)); 1224 } 1225 1226 #ifdef COMPAT_OLDSOCK 1227 int 1228 ogetsockname(p, uap) 1229 struct proc *p; 1230 struct getsockname_args *uap; 1231 { 1232 1233 return (getsockname1(p, uap, 1)); 1234 } 1235 #endif /* COMPAT_OLDSOCK */ 1236 1237 /* 1238 * Get name of peer for connected socket. 1239 */ 1240 /* ARGSUSED */ 1241 static int 1242 getpeername1(p, uap, compat) 1243 struct proc *p; 1244 register struct getpeername_args /* { 1245 int fdes; 1246 caddr_t asa; 1247 int *alen; 1248 } */ *uap; 1249 int compat; 1250 { 1251 struct file *fp; 1252 register struct socket *so; 1253 struct sockaddr *sa; 1254 int len, error; 1255 1256 error = holdsock(p->p_fd, uap->fdes, &fp); 1257 if (error) 1258 return (error); 1259 so = (struct socket *)fp->f_data; 1260 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1261 fdrop(fp, p); 1262 return (ENOTCONN); 1263 } 1264 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); 1265 if (error) { 1266 fdrop(fp, p); 1267 return (error); 1268 } 1269 sa = 0; 1270 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1271 if (error) 1272 goto bad; 1273 if (sa == 0) { 1274 len = 0; 1275 goto gotnothing; 1276 } 1277 len = MIN(len, sa->sa_len); 1278 #ifdef COMPAT_OLDSOCK 1279 if (compat) 1280 ((struct osockaddr *)sa)->sa_family = 1281 sa->sa_family; 1282 #endif 1283 error = copyout(sa, (caddr_t)uap->asa, (u_int)len); 1284 if (error) 1285 goto bad; 1286 gotnothing: 1287 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len)); 1288 bad: 1289 if (sa) 1290 FREE(sa, M_SONAME); 1291 fdrop(fp, p); 1292 return (error); 1293 } 1294 1295 int 1296 getpeername(p, uap) 1297 struct proc *p; 1298 struct getpeername_args *uap; 1299 { 1300 1301 return (getpeername1(p, uap, 0)); 1302 } 1303 1304 #ifdef COMPAT_OLDSOCK 1305 int 1306 ogetpeername(p, uap) 1307 struct proc *p; 1308 struct ogetpeername_args *uap; 1309 { 1310 1311 /* XXX uap should have type `getpeername_args *' to begin with. */ 1312 return (getpeername1(p, (struct getpeername_args *)uap, 1)); 1313 } 1314 #endif /* COMPAT_OLDSOCK */ 1315 1316 int 1317 sockargs(mp, buf, buflen, type) 1318 struct mbuf **mp; 1319 caddr_t buf; 1320 int buflen, type; 1321 { 1322 register struct sockaddr *sa; 1323 register struct mbuf *m; 1324 int error; 1325 1326 if ((u_int)buflen > MLEN) { 1327 #ifdef COMPAT_OLDSOCK 1328 if (type == MT_SONAME && (u_int)buflen <= 112) 1329 buflen = MLEN; /* unix domain compat. hack */ 1330 else 1331 #endif 1332 return (EINVAL); 1333 } 1334 m = m_get(M_WAIT, type); 1335 if (m == NULL) 1336 return (ENOBUFS); 1337 m->m_len = buflen; 1338 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1339 if (error) 1340 (void) m_free(m); 1341 else { 1342 *mp = m; 1343 if (type == MT_SONAME) { 1344 sa = mtod(m, struct sockaddr *); 1345 1346 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1347 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1348 sa->sa_family = sa->sa_len; 1349 #endif 1350 sa->sa_len = buflen; 1351 } 1352 } 1353 return (error); 1354 } 1355 1356 int 1357 getsockaddr(namp, uaddr, len) 1358 struct sockaddr **namp; 1359 caddr_t uaddr; 1360 size_t len; 1361 { 1362 struct sockaddr *sa; 1363 int error; 1364 1365 if (len > SOCK_MAXADDRLEN) 1366 return ENAMETOOLONG; 1367 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1368 error = copyin(uaddr, sa, len); 1369 if (error) { 1370 FREE(sa, M_SONAME); 1371 } else { 1372 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1373 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1374 sa->sa_family = sa->sa_len; 1375 #endif 1376 sa->sa_len = len; 1377 *namp = sa; 1378 } 1379 return error; 1380 } 1381 1382 /* 1383 * holdsock() - load the struct file pointer associated 1384 * with a socket into *fpp. If an error occurs, non-zero 1385 * will be returned and *fpp will be set to NULL. 1386 */ 1387 int 1388 holdsock(fdp, fdes, fpp) 1389 struct filedesc *fdp; 1390 int fdes; 1391 struct file **fpp; 1392 { 1393 register struct file *fp = NULL; 1394 int error = 0; 1395 1396 if ((unsigned)fdes >= fdp->fd_nfiles || 1397 (fp = fdp->fd_ofiles[fdes]) == NULL) { 1398 error = EBADF; 1399 } else if (fp->f_type != DTYPE_SOCKET) { 1400 error = ENOTSOCK; 1401 fp = NULL; 1402 } else { 1403 fhold(fp); 1404 } 1405 *fpp = fp; 1406 return(error); 1407 } 1408 1409 /* 1410 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 1411 * XXX - The sf_buf functions are currently private to sendfile(2), so have 1412 * been made static, but may be useful in the future for doing zero-copy in 1413 * other parts of the networking code. 1414 */ 1415 static void 1416 sf_buf_init(void *arg) 1417 { 1418 int i; 1419 1420 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", MTX_DEF); 1421 mtx_enter(&sf_freelist.sf_lock, MTX_DEF); 1422 SLIST_INIT(&sf_freelist); 1423 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE); 1424 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT); 1425 bzero(sf_bufs, nsfbufs * sizeof(struct sf_buf)); 1426 for (i = 0; i < nsfbufs; i++) { 1427 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 1428 SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list); 1429 } 1430 sf_buf_alloc_want = 0; 1431 mtx_exit(&sf_freelist.sf_lock, MTX_DEF); 1432 } 1433 1434 /* 1435 * Get an sf_buf from the freelist. Will block if none are available. 1436 */ 1437 static struct sf_buf * 1438 sf_buf_alloc() 1439 { 1440 struct sf_buf *sf; 1441 1442 mtx_enter(&sf_freelist.sf_lock, MTX_DEF); 1443 while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) { 1444 sf_buf_alloc_want++; 1445 msleep(&sf_freelist, &sf_freelist.sf_lock, PVM, "sfbufa", 0); 1446 } 1447 SLIST_REMOVE_HEAD(&sf_freelist, free_list); 1448 mtx_exit(&sf_freelist.sf_lock, MTX_DEF); 1449 return (sf); 1450 } 1451 1452 #define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT]) 1453 1454 /* 1455 * Detatch mapped page and release resources back to the system. 1456 */ 1457 static void 1458 sf_buf_free(caddr_t addr, void *args) 1459 { 1460 struct sf_buf *sf; 1461 struct vm_page *m; 1462 int s; 1463 1464 sf = dtosf(addr); 1465 pmap_qremove((vm_offset_t)addr, 1); 1466 m = sf->m; 1467 s = splvm(); 1468 vm_page_unwire(m, 0); 1469 /* 1470 * Check for the object going away on us. This can 1471 * happen since we don't hold a reference to it. 1472 * If so, we're responsible for freeing the page. 1473 */ 1474 if (m->wire_count == 0 && m->object == NULL) 1475 vm_page_free(m); 1476 splx(s); 1477 sf->m = NULL; 1478 mtx_enter(&sf_freelist.sf_lock, MTX_DEF); 1479 SLIST_INSERT_HEAD(&sf_freelist, sf, free_list); 1480 if (sf_buf_alloc_want) { 1481 sf_buf_alloc_want--; 1482 wakeup_one(&sf_freelist); 1483 } 1484 mtx_exit(&sf_freelist.sf_lock, MTX_DEF); 1485 } 1486 1487 /* 1488 * sendfile(2) 1489 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1490 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1491 * 1492 * Send a file specified by 'fd' and starting at 'offset' to a socket 1493 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1494 * nbytes == 0. Optionally add a header and/or trailer to the socket 1495 * output. If specified, write the total number of bytes sent into *sbytes. 1496 */ 1497 int 1498 sendfile(struct proc *p, struct sendfile_args *uap) 1499 { 1500 struct file *fp; 1501 struct filedesc *fdp = p->p_fd; 1502 struct vnode *vp; 1503 struct vm_object *obj; 1504 struct socket *so; 1505 struct mbuf *m; 1506 struct sf_buf *sf; 1507 struct vm_page *pg; 1508 struct writev_args nuap; 1509 struct sf_hdtr hdtr; 1510 off_t off, xfsize, sbytes = 0; 1511 int error = 0, s; 1512 1513 vp = NULL; 1514 /* 1515 * Do argument checking. Must be a regular file in, stream 1516 * type and connected socket out, positive offset. 1517 */ 1518 fp = holdfp(fdp, uap->fd, FREAD); 1519 if (fp == NULL) { 1520 error = EBADF; 1521 goto done; 1522 } 1523 if (fp->f_type != DTYPE_VNODE) { 1524 error = EINVAL; 1525 goto done; 1526 } 1527 vp = (struct vnode *)fp->f_data; 1528 vref(vp); 1529 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1530 error = EINVAL; 1531 goto done; 1532 } 1533 fdrop(fp, p); 1534 error = holdsock(p->p_fd, uap->s, &fp); 1535 if (error) 1536 goto done; 1537 so = (struct socket *)fp->f_data; 1538 if (so->so_type != SOCK_STREAM) { 1539 error = EINVAL; 1540 goto done; 1541 } 1542 if ((so->so_state & SS_ISCONNECTED) == 0) { 1543 error = ENOTCONN; 1544 goto done; 1545 } 1546 if (uap->offset < 0) { 1547 error = EINVAL; 1548 goto done; 1549 } 1550 1551 /* 1552 * If specified, get the pointer to the sf_hdtr struct for 1553 * any headers/trailers. 1554 */ 1555 if (uap->hdtr != NULL) { 1556 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1557 if (error) 1558 goto done; 1559 /* 1560 * Send any headers. Wimp out and use writev(2). 1561 */ 1562 if (hdtr.headers != NULL) { 1563 nuap.fd = uap->s; 1564 nuap.iovp = hdtr.headers; 1565 nuap.iovcnt = hdtr.hdr_cnt; 1566 error = writev(p, &nuap); 1567 if (error) 1568 goto done; 1569 sbytes += p->p_retval[0]; 1570 } 1571 } 1572 1573 /* 1574 * Protect against multiple writers to the socket. 1575 */ 1576 (void) sblock(&so->so_snd, M_WAITOK); 1577 1578 /* 1579 * Loop through the pages in the file, starting with the requested 1580 * offset. Get a file page (do I/O if necessary), map the file page 1581 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1582 * it on the socket. 1583 */ 1584 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1585 vm_pindex_t pindex; 1586 vm_offset_t pgoff; 1587 1588 pindex = OFF_TO_IDX(off); 1589 retry_lookup: 1590 /* 1591 * Calculate the amount to transfer. Not to exceed a page, 1592 * the EOF, or the passed in nbytes. 1593 */ 1594 xfsize = obj->un_pager.vnp.vnp_size - off; 1595 if (xfsize > PAGE_SIZE) 1596 xfsize = PAGE_SIZE; 1597 pgoff = (vm_offset_t)(off & PAGE_MASK); 1598 if (PAGE_SIZE - pgoff < xfsize) 1599 xfsize = PAGE_SIZE - pgoff; 1600 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1601 xfsize = uap->nbytes - sbytes; 1602 if (xfsize <= 0) 1603 break; 1604 /* 1605 * Optimize the non-blocking case by looking at the socket space 1606 * before going to the extra work of constituting the sf_buf. 1607 */ 1608 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1609 if (so->so_state & SS_CANTSENDMORE) 1610 error = EPIPE; 1611 else 1612 error = EAGAIN; 1613 sbunlock(&so->so_snd); 1614 goto done; 1615 } 1616 /* 1617 * Attempt to look up the page. 1618 * 1619 * Allocate if not found 1620 * 1621 * Wait and loop if busy. 1622 */ 1623 pg = vm_page_lookup(obj, pindex); 1624 1625 if (pg == NULL) { 1626 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL); 1627 if (pg == NULL) { 1628 VM_WAIT; 1629 goto retry_lookup; 1630 } 1631 vm_page_wakeup(pg); 1632 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) { 1633 goto retry_lookup; 1634 } 1635 1636 /* 1637 * Wire the page so it does not get ripped out from under 1638 * us. 1639 */ 1640 1641 vm_page_wire(pg); 1642 1643 /* 1644 * If page is not valid for what we need, initiate I/O 1645 */ 1646 1647 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1648 struct uio auio; 1649 struct iovec aiov; 1650 int bsize; 1651 1652 /* 1653 * Ensure that our page is still around when the I/O 1654 * completes. 1655 */ 1656 vm_page_io_start(pg); 1657 1658 /* 1659 * Get the page from backing store. 1660 */ 1661 bsize = vp->v_mount->mnt_stat.f_iosize; 1662 auio.uio_iov = &aiov; 1663 auio.uio_iovcnt = 1; 1664 aiov.iov_base = 0; 1665 aiov.iov_len = MAXBSIZE; 1666 auio.uio_resid = MAXBSIZE; 1667 auio.uio_offset = trunc_page(off); 1668 auio.uio_segflg = UIO_NOCOPY; 1669 auio.uio_rw = UIO_READ; 1670 auio.uio_procp = p; 1671 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p); 1672 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16), 1673 p->p_ucred); 1674 VOP_UNLOCK(vp, 0, p); 1675 vm_page_flag_clear(pg, PG_ZERO); 1676 vm_page_io_finish(pg); 1677 if (error) { 1678 vm_page_unwire(pg, 0); 1679 /* 1680 * See if anyone else might know about this page. 1681 * If not and it is not valid, then free it. 1682 */ 1683 if (pg->wire_count == 0 && pg->valid == 0 && 1684 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1685 pg->hold_count == 0) { 1686 vm_page_busy(pg); 1687 vm_page_free(pg); 1688 } 1689 sbunlock(&so->so_snd); 1690 goto done; 1691 } 1692 } 1693 1694 /* 1695 * Allocate a kernel virtual page and insert the physical page 1696 * into it. 1697 */ 1698 1699 sf = sf_buf_alloc(); 1700 sf->m = pg; 1701 pmap_qenter(sf->kva, &pg, 1); 1702 /* 1703 * Get an mbuf header and set it up as having external storage. 1704 */ 1705 MGETHDR(m, M_WAIT, MT_DATA); 1706 if (m == NULL) { 1707 error = ENOBUFS; 1708 goto done; 1709 } 1710 /* 1711 * Setup external storage for mbuf. 1712 */ 1713 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY, 1714 EXT_SFBUF); 1715 m->m_data = (char *) sf->kva + pgoff; 1716 m->m_pkthdr.len = m->m_len = xfsize; 1717 /* 1718 * Add the buffer to the socket buffer chain. 1719 */ 1720 s = splnet(); 1721 retry_space: 1722 /* 1723 * Make sure that the socket is still able to take more data. 1724 * CANTSENDMORE being true usually means that the connection 1725 * was closed. so_error is true when an error was sensed after 1726 * a previous send. 1727 * The state is checked after the page mapping and buffer 1728 * allocation above since those operations may block and make 1729 * any socket checks stale. From this point forward, nothing 1730 * blocks before the pru_send (or more accurately, any blocking 1731 * results in a loop back to here to re-check). 1732 */ 1733 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1734 if (so->so_state & SS_CANTSENDMORE) { 1735 error = EPIPE; 1736 } else { 1737 error = so->so_error; 1738 so->so_error = 0; 1739 } 1740 m_freem(m); 1741 sbunlock(&so->so_snd); 1742 splx(s); 1743 goto done; 1744 } 1745 /* 1746 * Wait for socket space to become available. We do this just 1747 * after checking the connection state above in order to avoid 1748 * a race condition with sbwait(). 1749 */ 1750 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1751 if (so->so_state & SS_NBIO) { 1752 m_freem(m); 1753 sbunlock(&so->so_snd); 1754 splx(s); 1755 error = EAGAIN; 1756 goto done; 1757 } 1758 error = sbwait(&so->so_snd); 1759 /* 1760 * An error from sbwait usually indicates that we've 1761 * been interrupted by a signal. If we've sent anything 1762 * then return bytes sent, otherwise return the error. 1763 */ 1764 if (error) { 1765 m_freem(m); 1766 sbunlock(&so->so_snd); 1767 splx(s); 1768 goto done; 1769 } 1770 goto retry_space; 1771 } 1772 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p); 1773 splx(s); 1774 if (error) { 1775 sbunlock(&so->so_snd); 1776 goto done; 1777 } 1778 } 1779 sbunlock(&so->so_snd); 1780 1781 /* 1782 * Send trailers. Wimp out and use writev(2). 1783 */ 1784 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1785 nuap.fd = uap->s; 1786 nuap.iovp = hdtr.trailers; 1787 nuap.iovcnt = hdtr.trl_cnt; 1788 error = writev(p, &nuap); 1789 if (error) 1790 goto done; 1791 sbytes += p->p_retval[0]; 1792 } 1793 1794 done: 1795 if (uap->sbytes != NULL) { 1796 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 1797 } 1798 if (vp) 1799 vrele(vp); 1800 if (fp) 1801 fdrop(fp, p); 1802 return (error); 1803 } 1804