1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 * $FreeBSD$ 38 */ 39 40 #include "opt_compat.h" 41 #include "opt_ktrace.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/sysproto.h> 47 #include <sys/malloc.h> 48 #include <sys/filedesc.h> 49 #include <sys/proc.h> 50 #include <sys/fcntl.h> 51 #include <sys/file.h> 52 #include <sys/mbuf.h> 53 #include <sys/protosw.h> 54 #include <sys/socket.h> 55 #include <sys/socketvar.h> 56 #include <sys/signalvar.h> 57 #include <sys/uio.h> 58 #include <sys/vnode.h> 59 #include <sys/lock.h> 60 #include <sys/mount.h> 61 #ifdef KTRACE 62 #include <sys/ktrace.h> 63 #endif 64 #include <vm/vm.h> 65 #include <vm/vm_prot.h> 66 #include <vm/vm_object.h> 67 #include <vm/vm_page.h> 68 #include <vm/vm_pageout.h> 69 #include <vm/vm_kern.h> 70 #include <vm/vm_extern.h> 71 72 static void sf_buf_init(void *arg); 73 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) 74 static struct sf_buf *sf_buf_alloc(void); 75 static void sf_buf_ref(caddr_t addr, u_int size); 76 static void sf_buf_free(caddr_t addr, u_int size); 77 78 static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags)); 79 static int recvit __P((struct proc *p, int s, struct msghdr *mp, 80 caddr_t namelenp)); 81 82 static int accept1 __P((struct proc *p, struct accept_args *uap, int compat)); 83 static int getsockname1 __P((struct proc *p, struct getsockname_args *uap, 84 int compat)); 85 static int getpeername1 __P((struct proc *p, struct getpeername_args *uap, 86 int compat)); 87 88 static SLIST_HEAD(, sf_buf) sf_freelist; 89 static vm_offset_t sf_base; 90 static struct sf_buf *sf_bufs; 91 static int sf_buf_alloc_want; 92 93 /* 94 * System call interface to the socket abstraction. 95 */ 96 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 97 #define COMPAT_OLDSOCK 98 #endif 99 100 extern struct fileops socketops; 101 102 int 103 socket(p, uap) 104 struct proc *p; 105 register struct socket_args /* { 106 int domain; 107 int type; 108 int protocol; 109 } */ *uap; 110 { 111 struct filedesc *fdp = p->p_fd; 112 struct socket *so; 113 struct file *fp; 114 int fd, error; 115 116 error = falloc(p, &fp, &fd); 117 if (error) 118 return (error); 119 error = socreate(uap->domain, &so, uap->type, uap->protocol, p); 120 if (error) { 121 fdp->fd_ofiles[fd] = 0; 122 ffree(fp); 123 } else { 124 fp->f_data = (caddr_t)so; 125 fp->f_flag = FREAD|FWRITE; 126 fp->f_ops = &socketops; 127 fp->f_type = DTYPE_SOCKET; 128 p->p_retval[0] = fd; 129 } 130 return (error); 131 } 132 133 /* ARGSUSED */ 134 int 135 bind(p, uap) 136 struct proc *p; 137 register struct bind_args /* { 138 int s; 139 caddr_t name; 140 int namelen; 141 } */ *uap; 142 { 143 struct file *fp; 144 struct sockaddr *sa; 145 int error; 146 147 error = getsock(p->p_fd, uap->s, &fp); 148 if (error) 149 return (error); 150 error = getsockaddr(&sa, uap->name, uap->namelen); 151 if (error) 152 return (error); 153 error = sobind((struct socket *)fp->f_data, sa, p); 154 FREE(sa, M_SONAME); 155 return (error); 156 } 157 158 /* ARGSUSED */ 159 int 160 listen(p, uap) 161 struct proc *p; 162 register struct listen_args /* { 163 int s; 164 int backlog; 165 } */ *uap; 166 { 167 struct file *fp; 168 int error; 169 170 error = getsock(p->p_fd, uap->s, &fp); 171 if (error) 172 return (error); 173 return (solisten((struct socket *)fp->f_data, uap->backlog, p)); 174 } 175 176 static int 177 accept1(p, uap, compat) 178 struct proc *p; 179 register struct accept_args /* { 180 int s; 181 caddr_t name; 182 int *anamelen; 183 } */ *uap; 184 int compat; 185 { 186 struct filedesc *fdp = p->p_fd; 187 struct file *fp; 188 struct sockaddr *sa; 189 int namelen, error, s; 190 struct socket *head, *so; 191 int fd; 192 short fflag; /* type must match fp->f_flag */ 193 194 if (uap->name) { 195 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen, 196 sizeof (namelen)); 197 if(error) 198 return (error); 199 } 200 error = getsock(fdp, uap->s, &fp); 201 if (error) 202 return (error); 203 s = splnet(); 204 head = (struct socket *)fp->f_data; 205 if ((head->so_options & SO_ACCEPTCONN) == 0) { 206 splx(s); 207 return (EINVAL); 208 } 209 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) { 210 splx(s); 211 return (EWOULDBLOCK); 212 } 213 while (head->so_comp.tqh_first == NULL && head->so_error == 0) { 214 if (head->so_state & SS_CANTRCVMORE) { 215 head->so_error = ECONNABORTED; 216 break; 217 } 218 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH, 219 "accept", 0); 220 if (error) { 221 splx(s); 222 return (error); 223 } 224 } 225 if (head->so_error) { 226 error = head->so_error; 227 head->so_error = 0; 228 splx(s); 229 return (error); 230 } 231 232 /* 233 * At this point we know that there is at least one connection 234 * ready to be accepted. Remove it from the queue prior to 235 * allocating the file descriptor for it since falloc() may 236 * block allowing another process to accept the connection 237 * instead. 238 */ 239 so = head->so_comp.tqh_first; 240 TAILQ_REMOVE(&head->so_comp, so, so_list); 241 head->so_qlen--; 242 243 fflag = fp->f_flag; 244 error = falloc(p, &fp, &fd); 245 if (error) { 246 /* 247 * Probably ran out of file descriptors. Put the 248 * unaccepted connection back onto the queue and 249 * do another wakeup so some other process might 250 * have a chance at it. 251 */ 252 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 253 head->so_qlen++; 254 wakeup_one(&head->so_timeo); 255 splx(s); 256 return (error); 257 } else 258 p->p_retval[0] = fd; 259 260 so->so_state &= ~SS_COMP; 261 so->so_head = NULL; 262 if (head->so_sigio != NULL) 263 fsetown(fgetown(head->so_sigio), &so->so_sigio); 264 265 fp->f_data = (caddr_t)so; 266 fp->f_flag = fflag; 267 fp->f_ops = &socketops; 268 fp->f_type = DTYPE_SOCKET; 269 sa = 0; 270 (void) soaccept(so, &sa); 271 if (sa == 0) { 272 namelen = 0; 273 if (uap->name) 274 goto gotnoname; 275 splx(s); 276 return 0; 277 } 278 if (uap->name) { 279 /* check sa_len before it is destroyed */ 280 if (namelen > sa->sa_len) 281 namelen = sa->sa_len; 282 #ifdef COMPAT_OLDSOCK 283 if (compat) 284 ((struct osockaddr *)sa)->sa_family = 285 sa->sa_family; 286 #endif 287 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen); 288 if (!error) 289 gotnoname: 290 error = copyout((caddr_t)&namelen, 291 (caddr_t)uap->anamelen, sizeof (*uap->anamelen)); 292 } 293 if (sa) 294 FREE(sa, M_SONAME); 295 if (error) { 296 fdp->fd_ofiles[fd] = 0; 297 ffree(fp); 298 } 299 splx(s); 300 return (error); 301 } 302 303 int 304 accept(p, uap) 305 struct proc *p; 306 struct accept_args *uap; 307 { 308 309 return (accept1(p, uap, 0)); 310 } 311 312 #ifdef COMPAT_OLDSOCK 313 int 314 oaccept(p, uap) 315 struct proc *p; 316 struct accept_args *uap; 317 { 318 319 return (accept1(p, uap, 1)); 320 } 321 #endif /* COMPAT_OLDSOCK */ 322 323 /* ARGSUSED */ 324 int 325 connect(p, uap) 326 struct proc *p; 327 register struct connect_args /* { 328 int s; 329 caddr_t name; 330 int namelen; 331 } */ *uap; 332 { 333 struct file *fp; 334 register struct socket *so; 335 struct sockaddr *sa; 336 int error, s; 337 338 error = getsock(p->p_fd, uap->s, &fp); 339 if (error) 340 return (error); 341 so = (struct socket *)fp->f_data; 342 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) 343 return (EALREADY); 344 error = getsockaddr(&sa, uap->name, uap->namelen); 345 if (error) 346 return (error); 347 error = soconnect(so, sa, p); 348 if (error) 349 goto bad; 350 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 351 FREE(sa, M_SONAME); 352 return (EINPROGRESS); 353 } 354 s = splnet(); 355 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 356 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, 357 "connec", 0); 358 if (error) 359 break; 360 } 361 if (error == 0) { 362 error = so->so_error; 363 so->so_error = 0; 364 } 365 splx(s); 366 bad: 367 so->so_state &= ~SS_ISCONNECTING; 368 FREE(sa, M_SONAME); 369 if (error == ERESTART) 370 error = EINTR; 371 return (error); 372 } 373 374 int 375 socketpair(p, uap) 376 struct proc *p; 377 register struct socketpair_args /* { 378 int domain; 379 int type; 380 int protocol; 381 int *rsv; 382 } */ *uap; 383 { 384 register struct filedesc *fdp = p->p_fd; 385 struct file *fp1, *fp2; 386 struct socket *so1, *so2; 387 int fd, error, sv[2]; 388 389 error = socreate(uap->domain, &so1, uap->type, uap->protocol, p); 390 if (error) 391 return (error); 392 error = socreate(uap->domain, &so2, uap->type, uap->protocol, p); 393 if (error) 394 goto free1; 395 error = falloc(p, &fp1, &fd); 396 if (error) 397 goto free2; 398 sv[0] = fd; 399 fp1->f_data = (caddr_t)so1; 400 error = falloc(p, &fp2, &fd); 401 if (error) 402 goto free3; 403 fp2->f_data = (caddr_t)so2; 404 sv[1] = fd; 405 error = soconnect2(so1, so2); 406 if (error) 407 goto free4; 408 if (uap->type == SOCK_DGRAM) { 409 /* 410 * Datagram socket connection is asymmetric. 411 */ 412 error = soconnect2(so2, so1); 413 if (error) 414 goto free4; 415 } 416 fp1->f_flag = fp2->f_flag = FREAD|FWRITE; 417 fp1->f_ops = fp2->f_ops = &socketops; 418 fp1->f_type = fp2->f_type = DTYPE_SOCKET; 419 error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int)); 420 return (error); 421 free4: 422 fdp->fd_ofiles[sv[1]] = 0; 423 ffree(fp2); 424 free3: 425 fdp->fd_ofiles[sv[0]] = 0; 426 ffree(fp1); 427 free2: 428 (void)soclose(so2); 429 free1: 430 (void)soclose(so1); 431 return (error); 432 } 433 434 static int 435 sendit(p, s, mp, flags) 436 register struct proc *p; 437 int s; 438 register struct msghdr *mp; 439 int flags; 440 { 441 struct file *fp; 442 struct uio auio; 443 register struct iovec *iov; 444 register int i; 445 struct mbuf *control; 446 struct sockaddr *to; 447 int len, error; 448 struct socket *so; 449 #ifdef KTRACE 450 struct iovec *ktriov = NULL; 451 #endif 452 453 error = getsock(p->p_fd, s, &fp); 454 if (error) 455 return (error); 456 auio.uio_iov = mp->msg_iov; 457 auio.uio_iovcnt = mp->msg_iovlen; 458 auio.uio_segflg = UIO_USERSPACE; 459 auio.uio_rw = UIO_WRITE; 460 auio.uio_procp = p; 461 auio.uio_offset = 0; /* XXX */ 462 auio.uio_resid = 0; 463 iov = mp->msg_iov; 464 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 465 if ((auio.uio_resid += iov->iov_len) < 0) 466 return (EINVAL); 467 } 468 if (mp->msg_name) { 469 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 470 if (error) 471 return (error); 472 } else 473 to = 0; 474 if (mp->msg_control) { 475 if (mp->msg_controllen < sizeof(struct cmsghdr) 476 #ifdef COMPAT_OLDSOCK 477 && mp->msg_flags != MSG_COMPAT 478 #endif 479 ) { 480 error = EINVAL; 481 goto bad; 482 } 483 error = sockargs(&control, mp->msg_control, 484 mp->msg_controllen, MT_CONTROL); 485 if (error) 486 goto bad; 487 #ifdef COMPAT_OLDSOCK 488 if (mp->msg_flags == MSG_COMPAT) { 489 register struct cmsghdr *cm; 490 491 M_PREPEND(control, sizeof(*cm), M_WAIT); 492 if (control == 0) { 493 error = ENOBUFS; 494 goto bad; 495 } else { 496 cm = mtod(control, struct cmsghdr *); 497 cm->cmsg_len = control->m_len; 498 cm->cmsg_level = SOL_SOCKET; 499 cm->cmsg_type = SCM_RIGHTS; 500 } 501 } 502 #endif 503 } else 504 control = 0; 505 #ifdef KTRACE 506 if (KTRPOINT(p, KTR_GENIO)) { 507 int iovlen = auio.uio_iovcnt * sizeof (struct iovec); 508 509 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 510 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 511 } 512 #endif 513 len = auio.uio_resid; 514 so = (struct socket *)fp->f_data; 515 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control, 516 flags, p); 517 if (error) { 518 if (auio.uio_resid != len && (error == ERESTART || 519 error == EINTR || error == EWOULDBLOCK)) 520 error = 0; 521 if (error == EPIPE) 522 psignal(p, SIGPIPE); 523 } 524 if (error == 0) 525 p->p_retval[0] = len - auio.uio_resid; 526 #ifdef KTRACE 527 if (ktriov != NULL) { 528 if (error == 0) 529 ktrgenio(p->p_tracep, s, UIO_WRITE, 530 ktriov, p->p_retval[0], error); 531 FREE(ktriov, M_TEMP); 532 } 533 #endif 534 bad: 535 if (to) 536 FREE(to, M_SONAME); 537 return (error); 538 } 539 540 int 541 sendto(p, uap) 542 struct proc *p; 543 register struct sendto_args /* { 544 int s; 545 caddr_t buf; 546 size_t len; 547 int flags; 548 caddr_t to; 549 int tolen; 550 } */ *uap; 551 { 552 struct msghdr msg; 553 struct iovec aiov; 554 555 msg.msg_name = uap->to; 556 msg.msg_namelen = uap->tolen; 557 msg.msg_iov = &aiov; 558 msg.msg_iovlen = 1; 559 msg.msg_control = 0; 560 #ifdef COMPAT_OLDSOCK 561 msg.msg_flags = 0; 562 #endif 563 aiov.iov_base = uap->buf; 564 aiov.iov_len = uap->len; 565 return (sendit(p, uap->s, &msg, uap->flags)); 566 } 567 568 #ifdef COMPAT_OLDSOCK 569 int 570 osend(p, uap) 571 struct proc *p; 572 register struct osend_args /* { 573 int s; 574 caddr_t buf; 575 int len; 576 int flags; 577 } */ *uap; 578 { 579 struct msghdr msg; 580 struct iovec aiov; 581 582 msg.msg_name = 0; 583 msg.msg_namelen = 0; 584 msg.msg_iov = &aiov; 585 msg.msg_iovlen = 1; 586 aiov.iov_base = uap->buf; 587 aiov.iov_len = uap->len; 588 msg.msg_control = 0; 589 msg.msg_flags = 0; 590 return (sendit(p, uap->s, &msg, uap->flags)); 591 } 592 593 int 594 osendmsg(p, uap) 595 struct proc *p; 596 register struct osendmsg_args /* { 597 int s; 598 caddr_t msg; 599 int flags; 600 } */ *uap; 601 { 602 struct msghdr msg; 603 struct iovec aiov[UIO_SMALLIOV], *iov; 604 int error; 605 606 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr)); 607 if (error) 608 return (error); 609 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 610 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 611 return (EMSGSIZE); 612 MALLOC(iov, struct iovec *, 613 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 614 M_WAITOK); 615 } else 616 iov = aiov; 617 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 618 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 619 if (error) 620 goto done; 621 msg.msg_flags = MSG_COMPAT; 622 msg.msg_iov = iov; 623 error = sendit(p, uap->s, &msg, uap->flags); 624 done: 625 if (iov != aiov) 626 FREE(iov, M_IOV); 627 return (error); 628 } 629 #endif 630 631 int 632 sendmsg(p, uap) 633 struct proc *p; 634 register struct sendmsg_args /* { 635 int s; 636 caddr_t msg; 637 int flags; 638 } */ *uap; 639 { 640 struct msghdr msg; 641 struct iovec aiov[UIO_SMALLIOV], *iov; 642 int error; 643 644 error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg)); 645 if (error) 646 return (error); 647 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 648 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 649 return (EMSGSIZE); 650 MALLOC(iov, struct iovec *, 651 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 652 M_WAITOK); 653 } else 654 iov = aiov; 655 if (msg.msg_iovlen && 656 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 657 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 658 goto done; 659 msg.msg_iov = iov; 660 #ifdef COMPAT_OLDSOCK 661 msg.msg_flags = 0; 662 #endif 663 error = sendit(p, uap->s, &msg, uap->flags); 664 done: 665 if (iov != aiov) 666 FREE(iov, M_IOV); 667 return (error); 668 } 669 670 static int 671 recvit(p, s, mp, namelenp) 672 register struct proc *p; 673 int s; 674 register struct msghdr *mp; 675 caddr_t namelenp; 676 { 677 struct file *fp; 678 struct uio auio; 679 register struct iovec *iov; 680 register int i; 681 int len, error; 682 struct mbuf *m, *control = 0; 683 caddr_t ctlbuf; 684 struct socket *so; 685 struct sockaddr *fromsa = 0; 686 #ifdef KTRACE 687 struct iovec *ktriov = NULL; 688 #endif 689 690 error = getsock(p->p_fd, s, &fp); 691 if (error) 692 return (error); 693 auio.uio_iov = mp->msg_iov; 694 auio.uio_iovcnt = mp->msg_iovlen; 695 auio.uio_segflg = UIO_USERSPACE; 696 auio.uio_rw = UIO_READ; 697 auio.uio_procp = p; 698 auio.uio_offset = 0; /* XXX */ 699 auio.uio_resid = 0; 700 iov = mp->msg_iov; 701 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 702 if ((auio.uio_resid += iov->iov_len) < 0) 703 return (EINVAL); 704 } 705 #ifdef KTRACE 706 if (KTRPOINT(p, KTR_GENIO)) { 707 int iovlen = auio.uio_iovcnt * sizeof (struct iovec); 708 709 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 710 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 711 } 712 #endif 713 len = auio.uio_resid; 714 so = (struct socket *)fp->f_data; 715 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 716 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 717 &mp->msg_flags); 718 if (error) { 719 if (auio.uio_resid != len && (error == ERESTART || 720 error == EINTR || error == EWOULDBLOCK)) 721 error = 0; 722 } 723 #ifdef KTRACE 724 if (ktriov != NULL) { 725 if (error == 0) 726 ktrgenio(p->p_tracep, s, UIO_READ, 727 ktriov, len - auio.uio_resid, error); 728 FREE(ktriov, M_TEMP); 729 } 730 #endif 731 if (error) 732 goto out; 733 p->p_retval[0] = len - auio.uio_resid; 734 if (mp->msg_name) { 735 len = mp->msg_namelen; 736 if (len <= 0 || fromsa == 0) 737 len = 0; 738 else { 739 #ifndef MIN 740 #define MIN(a,b) ((a)>(b)?(b):(a)) 741 #endif 742 /* save sa_len before it is destroyed by MSG_COMPAT */ 743 len = MIN(len, fromsa->sa_len); 744 #ifdef COMPAT_OLDSOCK 745 if (mp->msg_flags & MSG_COMPAT) 746 ((struct osockaddr *)fromsa)->sa_family = 747 fromsa->sa_family; 748 #endif 749 error = copyout(fromsa, 750 (caddr_t)mp->msg_name, (unsigned)len); 751 if (error) 752 goto out; 753 } 754 mp->msg_namelen = len; 755 if (namelenp && 756 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) { 757 #ifdef COMPAT_OLDSOCK 758 if (mp->msg_flags & MSG_COMPAT) 759 error = 0; /* old recvfrom didn't check */ 760 else 761 #endif 762 goto out; 763 } 764 } 765 if (mp->msg_control) { 766 #ifdef COMPAT_OLDSOCK 767 /* 768 * We assume that old recvmsg calls won't receive access 769 * rights and other control info, esp. as control info 770 * is always optional and those options didn't exist in 4.3. 771 * If we receive rights, trim the cmsghdr; anything else 772 * is tossed. 773 */ 774 if (control && mp->msg_flags & MSG_COMPAT) { 775 if (mtod(control, struct cmsghdr *)->cmsg_level != 776 SOL_SOCKET || 777 mtod(control, struct cmsghdr *)->cmsg_type != 778 SCM_RIGHTS) { 779 mp->msg_controllen = 0; 780 goto out; 781 } 782 control->m_len -= sizeof (struct cmsghdr); 783 control->m_data += sizeof (struct cmsghdr); 784 } 785 #endif 786 len = mp->msg_controllen; 787 m = control; 788 mp->msg_controllen = 0; 789 ctlbuf = (caddr_t) mp->msg_control; 790 791 while (m && len > 0) { 792 unsigned int tocopy; 793 794 if (len >= m->m_len) 795 tocopy = m->m_len; 796 else { 797 mp->msg_flags |= MSG_CTRUNC; 798 tocopy = len; 799 } 800 801 if ((error = copyout((caddr_t)mtod(m, caddr_t), 802 ctlbuf, tocopy)) != 0) 803 goto out; 804 805 ctlbuf += tocopy; 806 len -= tocopy; 807 m = m->m_next; 808 } 809 mp->msg_controllen = ctlbuf - mp->msg_control; 810 } 811 out: 812 if (fromsa) 813 FREE(fromsa, M_SONAME); 814 if (control) 815 m_freem(control); 816 return (error); 817 } 818 819 int 820 recvfrom(p, uap) 821 struct proc *p; 822 register struct recvfrom_args /* { 823 int s; 824 caddr_t buf; 825 size_t len; 826 int flags; 827 caddr_t from; 828 int *fromlenaddr; 829 } */ *uap; 830 { 831 struct msghdr msg; 832 struct iovec aiov; 833 int error; 834 835 if (uap->fromlenaddr) { 836 error = copyin((caddr_t)uap->fromlenaddr, 837 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen)); 838 if (error) 839 return (error); 840 } else 841 msg.msg_namelen = 0; 842 msg.msg_name = uap->from; 843 msg.msg_iov = &aiov; 844 msg.msg_iovlen = 1; 845 aiov.iov_base = uap->buf; 846 aiov.iov_len = uap->len; 847 msg.msg_control = 0; 848 msg.msg_flags = uap->flags; 849 return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr)); 850 } 851 852 #ifdef COMPAT_OLDSOCK 853 int 854 orecvfrom(p, uap) 855 struct proc *p; 856 struct recvfrom_args *uap; 857 { 858 859 uap->flags |= MSG_COMPAT; 860 return (recvfrom(p, uap)); 861 } 862 #endif 863 864 865 #ifdef COMPAT_OLDSOCK 866 int 867 orecv(p, uap) 868 struct proc *p; 869 register struct orecv_args /* { 870 int s; 871 caddr_t buf; 872 int len; 873 int flags; 874 } */ *uap; 875 { 876 struct msghdr msg; 877 struct iovec aiov; 878 879 msg.msg_name = 0; 880 msg.msg_namelen = 0; 881 msg.msg_iov = &aiov; 882 msg.msg_iovlen = 1; 883 aiov.iov_base = uap->buf; 884 aiov.iov_len = uap->len; 885 msg.msg_control = 0; 886 msg.msg_flags = uap->flags; 887 return (recvit(p, uap->s, &msg, (caddr_t)0)); 888 } 889 890 /* 891 * Old recvmsg. This code takes advantage of the fact that the old msghdr 892 * overlays the new one, missing only the flags, and with the (old) access 893 * rights where the control fields are now. 894 */ 895 int 896 orecvmsg(p, uap) 897 struct proc *p; 898 register struct orecvmsg_args /* { 899 int s; 900 struct omsghdr *msg; 901 int flags; 902 } */ *uap; 903 { 904 struct msghdr msg; 905 struct iovec aiov[UIO_SMALLIOV], *iov; 906 int error; 907 908 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, 909 sizeof (struct omsghdr)); 910 if (error) 911 return (error); 912 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 913 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 914 return (EMSGSIZE); 915 MALLOC(iov, struct iovec *, 916 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 917 M_WAITOK); 918 } else 919 iov = aiov; 920 msg.msg_flags = uap->flags | MSG_COMPAT; 921 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 922 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 923 if (error) 924 goto done; 925 msg.msg_iov = iov; 926 error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen); 927 928 if (msg.msg_controllen && error == 0) 929 error = copyout((caddr_t)&msg.msg_controllen, 930 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int)); 931 done: 932 if (iov != aiov) 933 FREE(iov, M_IOV); 934 return (error); 935 } 936 #endif 937 938 int 939 recvmsg(p, uap) 940 struct proc *p; 941 register struct recvmsg_args /* { 942 int s; 943 struct msghdr *msg; 944 int flags; 945 } */ *uap; 946 { 947 struct msghdr msg; 948 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 949 register int error; 950 951 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg)); 952 if (error) 953 return (error); 954 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 955 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 956 return (EMSGSIZE); 957 MALLOC(iov, struct iovec *, 958 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 959 M_WAITOK); 960 } else 961 iov = aiov; 962 #ifdef COMPAT_OLDSOCK 963 msg.msg_flags = uap->flags &~ MSG_COMPAT; 964 #else 965 msg.msg_flags = uap->flags; 966 #endif 967 uiov = msg.msg_iov; 968 msg.msg_iov = iov; 969 error = copyin((caddr_t)uiov, (caddr_t)iov, 970 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 971 if (error) 972 goto done; 973 error = recvit(p, uap->s, &msg, (caddr_t)0); 974 if (!error) { 975 msg.msg_iov = uiov; 976 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg)); 977 } 978 done: 979 if (iov != aiov) 980 FREE(iov, M_IOV); 981 return (error); 982 } 983 984 /* ARGSUSED */ 985 int 986 shutdown(p, uap) 987 struct proc *p; 988 register struct shutdown_args /* { 989 int s; 990 int how; 991 } */ *uap; 992 { 993 struct file *fp; 994 int error; 995 996 error = getsock(p->p_fd, uap->s, &fp); 997 if (error) 998 return (error); 999 return (soshutdown((struct socket *)fp->f_data, uap->how)); 1000 } 1001 1002 /* ARGSUSED */ 1003 int 1004 setsockopt(p, uap) 1005 struct proc *p; 1006 register struct setsockopt_args /* { 1007 int s; 1008 int level; 1009 int name; 1010 caddr_t val; 1011 int valsize; 1012 } */ *uap; 1013 { 1014 struct file *fp; 1015 struct sockopt sopt; 1016 int error; 1017 1018 if (uap->val == 0 && uap->valsize != 0) 1019 return (EFAULT); 1020 if (uap->valsize < 0) 1021 return (EINVAL); 1022 1023 error = getsock(p->p_fd, uap->s, &fp); 1024 if (error) 1025 return (error); 1026 1027 sopt.sopt_dir = SOPT_SET; 1028 sopt.sopt_level = uap->level; 1029 sopt.sopt_name = uap->name; 1030 sopt.sopt_val = uap->val; 1031 sopt.sopt_valsize = uap->valsize; 1032 sopt.sopt_p = p; 1033 1034 return (sosetopt((struct socket *)fp->f_data, &sopt)); 1035 } 1036 1037 /* ARGSUSED */ 1038 int 1039 getsockopt(p, uap) 1040 struct proc *p; 1041 register struct getsockopt_args /* { 1042 int s; 1043 int level; 1044 int name; 1045 caddr_t val; 1046 int *avalsize; 1047 } */ *uap; 1048 { 1049 int valsize, error; 1050 struct file *fp; 1051 struct sockopt sopt; 1052 1053 error = getsock(p->p_fd, uap->s, &fp); 1054 if (error) 1055 return (error); 1056 if (uap->val) { 1057 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize, 1058 sizeof (valsize)); 1059 if (error) 1060 return (error); 1061 if (valsize < 0) 1062 return (EINVAL); 1063 } else 1064 valsize = 0; 1065 1066 sopt.sopt_dir = SOPT_GET; 1067 sopt.sopt_level = uap->level; 1068 sopt.sopt_name = uap->name; 1069 sopt.sopt_val = uap->val; 1070 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1071 sopt.sopt_p = p; 1072 1073 error = sogetopt((struct socket *)fp->f_data, &sopt); 1074 if (error == 0) { 1075 valsize = sopt.sopt_valsize; 1076 error = copyout((caddr_t)&valsize, 1077 (caddr_t)uap->avalsize, sizeof (valsize)); 1078 } 1079 return (error); 1080 } 1081 1082 /* 1083 * Get socket name. 1084 */ 1085 /* ARGSUSED */ 1086 static int 1087 getsockname1(p, uap, compat) 1088 struct proc *p; 1089 register struct getsockname_args /* { 1090 int fdes; 1091 caddr_t asa; 1092 int *alen; 1093 } */ *uap; 1094 int compat; 1095 { 1096 struct file *fp; 1097 register struct socket *so; 1098 struct sockaddr *sa; 1099 int len, error; 1100 1101 error = getsock(p->p_fd, uap->fdes, &fp); 1102 if (error) 1103 return (error); 1104 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); 1105 if (error) 1106 return (error); 1107 so = (struct socket *)fp->f_data; 1108 sa = 0; 1109 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1110 if (error) 1111 goto bad; 1112 if (sa == 0) { 1113 len = 0; 1114 goto gotnothing; 1115 } 1116 1117 len = MIN(len, sa->sa_len); 1118 #ifdef COMPAT_OLDSOCK 1119 if (compat) 1120 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1121 #endif 1122 error = copyout(sa, (caddr_t)uap->asa, (u_int)len); 1123 if (error == 0) 1124 gotnothing: 1125 error = copyout((caddr_t)&len, (caddr_t)uap->alen, 1126 sizeof (len)); 1127 bad: 1128 if (sa) 1129 FREE(sa, M_SONAME); 1130 return (error); 1131 } 1132 1133 int 1134 getsockname(p, uap) 1135 struct proc *p; 1136 struct getsockname_args *uap; 1137 { 1138 1139 return (getsockname1(p, uap, 0)); 1140 } 1141 1142 #ifdef COMPAT_OLDSOCK 1143 int 1144 ogetsockname(p, uap) 1145 struct proc *p; 1146 struct getsockname_args *uap; 1147 { 1148 1149 return (getsockname1(p, uap, 1)); 1150 } 1151 #endif /* COMPAT_OLDSOCK */ 1152 1153 /* 1154 * Get name of peer for connected socket. 1155 */ 1156 /* ARGSUSED */ 1157 static int 1158 getpeername1(p, uap, compat) 1159 struct proc *p; 1160 register struct getpeername_args /* { 1161 int fdes; 1162 caddr_t asa; 1163 int *alen; 1164 } */ *uap; 1165 int compat; 1166 { 1167 struct file *fp; 1168 register struct socket *so; 1169 struct sockaddr *sa; 1170 int len, error; 1171 1172 error = getsock(p->p_fd, uap->fdes, &fp); 1173 if (error) 1174 return (error); 1175 so = (struct socket *)fp->f_data; 1176 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) 1177 return (ENOTCONN); 1178 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); 1179 if (error) 1180 return (error); 1181 sa = 0; 1182 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1183 if (error) 1184 goto bad; 1185 if (sa == 0) { 1186 len = 0; 1187 goto gotnothing; 1188 } 1189 len = MIN(len, sa->sa_len); 1190 #ifdef COMPAT_OLDSOCK 1191 if (compat) 1192 ((struct osockaddr *)sa)->sa_family = 1193 sa->sa_family; 1194 #endif 1195 error = copyout(sa, (caddr_t)uap->asa, (u_int)len); 1196 if (error) 1197 goto bad; 1198 gotnothing: 1199 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len)); 1200 bad: 1201 if (sa) FREE(sa, M_SONAME); 1202 return (error); 1203 } 1204 1205 int 1206 getpeername(p, uap) 1207 struct proc *p; 1208 struct getpeername_args *uap; 1209 { 1210 1211 return (getpeername1(p, uap, 0)); 1212 } 1213 1214 #ifdef COMPAT_OLDSOCK 1215 int 1216 ogetpeername(p, uap) 1217 struct proc *p; 1218 struct ogetpeername_args *uap; 1219 { 1220 1221 /* XXX uap should have type `getpeername_args *' to begin with. */ 1222 return (getpeername1(p, (struct getpeername_args *)uap, 1)); 1223 } 1224 #endif /* COMPAT_OLDSOCK */ 1225 1226 int 1227 sockargs(mp, buf, buflen, type) 1228 struct mbuf **mp; 1229 caddr_t buf; 1230 int buflen, type; 1231 { 1232 register struct sockaddr *sa; 1233 register struct mbuf *m; 1234 int error; 1235 1236 if ((u_int)buflen > MLEN) { 1237 #ifdef COMPAT_OLDSOCK 1238 if (type == MT_SONAME && (u_int)buflen <= 112) 1239 buflen = MLEN; /* unix domain compat. hack */ 1240 else 1241 #endif 1242 return (EINVAL); 1243 } 1244 m = m_get(M_WAIT, type); 1245 if (m == NULL) 1246 return (ENOBUFS); 1247 m->m_len = buflen; 1248 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1249 if (error) 1250 (void) m_free(m); 1251 else { 1252 *mp = m; 1253 if (type == MT_SONAME) { 1254 sa = mtod(m, struct sockaddr *); 1255 1256 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1257 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1258 sa->sa_family = sa->sa_len; 1259 #endif 1260 sa->sa_len = buflen; 1261 } 1262 } 1263 return (error); 1264 } 1265 1266 int 1267 getsockaddr(namp, uaddr, len) 1268 struct sockaddr **namp; 1269 caddr_t uaddr; 1270 size_t len; 1271 { 1272 struct sockaddr *sa; 1273 int error; 1274 1275 if (len > SOCK_MAXADDRLEN) 1276 return ENAMETOOLONG; 1277 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1278 error = copyin(uaddr, sa, len); 1279 if (error) { 1280 FREE(sa, M_SONAME); 1281 } else { 1282 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1283 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1284 sa->sa_family = sa->sa_len; 1285 #endif 1286 sa->sa_len = len; 1287 *namp = sa; 1288 } 1289 return error; 1290 } 1291 1292 int 1293 getsock(fdp, fdes, fpp) 1294 struct filedesc *fdp; 1295 int fdes; 1296 struct file **fpp; 1297 { 1298 register struct file *fp; 1299 1300 if ((unsigned)fdes >= fdp->fd_nfiles || 1301 (fp = fdp->fd_ofiles[fdes]) == NULL) 1302 return (EBADF); 1303 if (fp->f_type != DTYPE_SOCKET) 1304 return (ENOTSOCK); 1305 *fpp = fp; 1306 return (0); 1307 } 1308 1309 /* 1310 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 1311 * XXX - The sf_buf functions are currently private to sendfile(2), so have 1312 * been made static, but may be useful in the future for doing zero-copy in 1313 * other parts of the networking code. 1314 */ 1315 static void 1316 sf_buf_init(void *arg) 1317 { 1318 int i; 1319 1320 SLIST_INIT(&sf_freelist); 1321 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE); 1322 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT); 1323 bzero(sf_bufs, nsfbufs * sizeof(struct sf_buf)); 1324 for (i = 0; i < nsfbufs; i++) { 1325 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 1326 SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list); 1327 } 1328 } 1329 1330 /* 1331 * Get an sf_buf from the freelist. Will block if none are available. 1332 */ 1333 static struct sf_buf * 1334 sf_buf_alloc() 1335 { 1336 struct sf_buf *sf; 1337 int s; 1338 1339 s = splimp(); 1340 while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) { 1341 sf_buf_alloc_want = 1; 1342 tsleep(&sf_freelist, PVM, "sfbufa", 0); 1343 } 1344 SLIST_REMOVE_HEAD(&sf_freelist, free_list); 1345 splx(s); 1346 sf->refcnt = 1; 1347 return (sf); 1348 } 1349 1350 #define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT]) 1351 static void 1352 sf_buf_ref(caddr_t addr, u_int size) 1353 { 1354 struct sf_buf *sf; 1355 1356 sf = dtosf(addr); 1357 if (sf->refcnt == 0) 1358 panic("sf_buf_ref: referencing a free sf_buf"); 1359 sf->refcnt++; 1360 } 1361 1362 /* 1363 * Lose a reference to an sf_buf. When none left, detach mapped page 1364 * and release resources back to the system. 1365 * 1366 * Must be called at splimp. 1367 */ 1368 static void 1369 sf_buf_free(caddr_t addr, u_int size) 1370 { 1371 struct sf_buf *sf; 1372 struct vm_page *m; 1373 int s; 1374 1375 sf = dtosf(addr); 1376 if (sf->refcnt == 0) 1377 panic("sf_buf_free: freeing free sf_buf"); 1378 sf->refcnt--; 1379 if (sf->refcnt == 0) { 1380 pmap_qremove((vm_offset_t)addr, 1); 1381 m = sf->m; 1382 s = splvm(); 1383 vm_page_unwire(m, 0); 1384 /* 1385 * Check for the object going away on us. This can 1386 * happen since we don't hold a reference to it. 1387 * If so, we're responsible for freeing the page. 1388 */ 1389 if (m->wire_count == 0 && m->object == NULL) 1390 vm_page_free(m); 1391 splx(s); 1392 sf->m = NULL; 1393 SLIST_INSERT_HEAD(&sf_freelist, sf, free_list); 1394 if (sf_buf_alloc_want) { 1395 sf_buf_alloc_want = 0; 1396 wakeup(&sf_freelist); 1397 } 1398 } 1399 } 1400 1401 /* 1402 * sendfile(2). 1403 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1404 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1405 * 1406 * Send a file specified by 'fd' and starting at 'offset' to a socket 1407 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1408 * nbytes == 0. Optionally add a header and/or trailer to the socket 1409 * output. If specified, write the total number of bytes sent into *sbytes. 1410 */ 1411 int 1412 sendfile(struct proc *p, struct sendfile_args *uap) 1413 { 1414 struct file *fp; 1415 struct filedesc *fdp = p->p_fd; 1416 struct vnode *vp; 1417 struct vm_object *obj; 1418 struct socket *so; 1419 struct mbuf *m; 1420 struct sf_buf *sf; 1421 struct vm_page *pg; 1422 struct writev_args nuap; 1423 struct sf_hdtr hdtr; 1424 off_t off, xfsize, sbytes = 0; 1425 int error = 0, s; 1426 1427 vp = NULL; 1428 /* 1429 * Do argument checking. Must be a regular file in, stream 1430 * type and connected socket out, positive offset. 1431 */ 1432 if (((u_int)uap->fd) >= fdp->fd_nfiles || 1433 (fp = fdp->fd_ofiles[uap->fd]) == NULL || 1434 (fp->f_flag & FREAD) == 0) { 1435 error = EBADF; 1436 goto done; 1437 } 1438 if (fp->f_type != DTYPE_VNODE) { 1439 error = EINVAL; 1440 goto done; 1441 } 1442 vp = (struct vnode *)fp->f_data; 1443 vref(vp); 1444 obj = vp->v_object; 1445 if (vp->v_type != VREG || obj == NULL) { 1446 error = EINVAL; 1447 goto done; 1448 } 1449 error = getsock(p->p_fd, uap->s, &fp); 1450 if (error) 1451 goto done; 1452 so = (struct socket *)fp->f_data; 1453 if (so->so_type != SOCK_STREAM) { 1454 error = EINVAL; 1455 goto done; 1456 } 1457 if ((so->so_state & SS_ISCONNECTED) == 0) { 1458 error = ENOTCONN; 1459 goto done; 1460 } 1461 if (uap->offset < 0) { 1462 error = EINVAL; 1463 goto done; 1464 } 1465 1466 /* 1467 * If specified, get the pointer to the sf_hdtr struct for 1468 * any headers/trailers. 1469 */ 1470 if (uap->hdtr != NULL) { 1471 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1472 if (error) 1473 goto done; 1474 /* 1475 * Send any headers. Wimp out and use writev(2). 1476 */ 1477 if (hdtr.headers != NULL) { 1478 nuap.fd = uap->s; 1479 nuap.iovp = hdtr.headers; 1480 nuap.iovcnt = hdtr.hdr_cnt; 1481 error = writev(p, &nuap); 1482 if (error) 1483 goto done; 1484 sbytes += p->p_retval[0]; 1485 } 1486 } 1487 1488 /* 1489 * Protect against multiple writers to the socket. 1490 */ 1491 (void) sblock(&so->so_snd, M_WAITOK); 1492 1493 /* 1494 * Loop through the pages in the file, starting with the requested 1495 * offset. Get a file page (do I/O if necessary), map the file page 1496 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1497 * it on the socket. 1498 */ 1499 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1500 vm_pindex_t pindex; 1501 vm_offset_t pgoff; 1502 1503 pindex = OFF_TO_IDX(off); 1504 retry_lookup: 1505 /* 1506 * Calculate the amount to transfer. Not to exceed a page, 1507 * the EOF, or the passed in nbytes. 1508 */ 1509 xfsize = obj->un_pager.vnp.vnp_size - off; 1510 if (xfsize > PAGE_SIZE) 1511 xfsize = PAGE_SIZE; 1512 pgoff = (vm_offset_t)(off & PAGE_MASK); 1513 if (PAGE_SIZE - pgoff < xfsize) 1514 xfsize = PAGE_SIZE - pgoff; 1515 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1516 xfsize = uap->nbytes - sbytes; 1517 if (xfsize <= 0) 1518 break; 1519 /* 1520 * Optimize the non-blocking case by looking at the socket space 1521 * before going to the extra work of constituting the sf_buf. 1522 */ 1523 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1524 if (so->so_state & SS_CANTSENDMORE) 1525 error = EPIPE; 1526 else 1527 error = EAGAIN; 1528 sbunlock(&so->so_snd); 1529 goto done; 1530 } 1531 /* 1532 * Attempt to look up the page. 1533 * 1534 * Allocate if not found 1535 * 1536 * Wait and loop if busy. 1537 */ 1538 pg = vm_page_lookup(obj, pindex); 1539 1540 if (pg == NULL) { 1541 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL); 1542 if (pg == NULL) { 1543 VM_WAIT; 1544 goto retry_lookup; 1545 } 1546 vm_page_wakeup(pg); 1547 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) { 1548 goto retry_lookup; 1549 } 1550 1551 /* 1552 * Wire the page so it does not get ripped out from under 1553 * us. 1554 */ 1555 1556 vm_page_wire(pg); 1557 1558 /* 1559 * If page is not valid for what we need, initiate I/O 1560 */ 1561 1562 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1563 struct uio auio; 1564 struct iovec aiov; 1565 int bsize; 1566 1567 /* 1568 * Ensure that our page is still around when the I/O 1569 * completes. 1570 */ 1571 vm_page_io_start(pg); 1572 1573 /* 1574 * Get the page from backing store. 1575 */ 1576 bsize = vp->v_mount->mnt_stat.f_iosize; 1577 auio.uio_iov = &aiov; 1578 auio.uio_iovcnt = 1; 1579 aiov.iov_base = 0; 1580 aiov.iov_len = MAXBSIZE; 1581 auio.uio_resid = MAXBSIZE; 1582 auio.uio_offset = trunc_page(off); 1583 auio.uio_segflg = UIO_NOCOPY; 1584 auio.uio_rw = UIO_READ; 1585 auio.uio_procp = p; 1586 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p); 1587 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16), 1588 p->p_ucred); 1589 VOP_UNLOCK(vp, 0, p); 1590 vm_page_flag_clear(pg, PG_ZERO); 1591 vm_page_io_finish(pg); 1592 if (error) { 1593 vm_page_unwire(pg, 0); 1594 /* 1595 * See if anyone else might know about this page. 1596 * If not and it is not valid, then free it. 1597 */ 1598 if (pg->wire_count == 0 && pg->valid == 0 && 1599 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1600 pg->hold_count == 0) 1601 vm_page_free(pg); 1602 sbunlock(&so->so_snd); 1603 goto done; 1604 } 1605 } 1606 1607 /* 1608 * Allocate a kernel virtual page and insert the physical page 1609 * into it. 1610 */ 1611 1612 sf = sf_buf_alloc(); 1613 sf->m = pg; 1614 pmap_qenter(sf->kva, &pg, 1); 1615 /* 1616 * Get an mbuf header and set it up as having external storage. 1617 */ 1618 MGETHDR(m, M_WAIT, MT_DATA); 1619 m->m_ext.ext_free = sf_buf_free; 1620 m->m_ext.ext_ref = sf_buf_ref; 1621 m->m_ext.ext_buf = (void *)sf->kva; 1622 m->m_ext.ext_size = PAGE_SIZE; 1623 m->m_data = (char *) sf->kva + pgoff; 1624 m->m_flags |= M_EXT; 1625 m->m_pkthdr.len = m->m_len = xfsize; 1626 /* 1627 * Add the buffer to the socket buffer chain. 1628 */ 1629 s = splnet(); 1630 retry_space: 1631 /* 1632 * Make sure that the socket is still able to take more data. 1633 * CANTSENDMORE being true usually means that the connection 1634 * was closed. so_error is true when an error was sensed after 1635 * a previous send. 1636 * The state is checked after the page mapping and buffer 1637 * allocation above since those operations may block and make 1638 * any socket checks stale. From this point forward, nothing 1639 * blocks before the pru_send (or more accurately, any blocking 1640 * results in a loop back to here to re-check). 1641 */ 1642 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1643 if (so->so_state & SS_CANTSENDMORE) { 1644 error = EPIPE; 1645 } else { 1646 error = so->so_error; 1647 so->so_error = 0; 1648 } 1649 m_freem(m); 1650 sbunlock(&so->so_snd); 1651 splx(s); 1652 goto done; 1653 } 1654 /* 1655 * Wait for socket space to become available. We do this just 1656 * after checking the connection state above in order to avoid 1657 * a race condition with sbwait(). 1658 */ 1659 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1660 if (so->so_state & SS_NBIO) { 1661 m_freem(m); 1662 sbunlock(&so->so_snd); 1663 splx(s); 1664 error = EAGAIN; 1665 goto done; 1666 } 1667 error = sbwait(&so->so_snd); 1668 /* 1669 * An error from sbwait usually indicates that we've 1670 * been interrupted by a signal. If we've sent anything 1671 * then return bytes sent, otherwise return the error. 1672 */ 1673 if (error) { 1674 m_freem(m); 1675 sbunlock(&so->so_snd); 1676 splx(s); 1677 goto done; 1678 } 1679 goto retry_space; 1680 } 1681 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p); 1682 splx(s); 1683 if (error) { 1684 sbunlock(&so->so_snd); 1685 goto done; 1686 } 1687 } 1688 sbunlock(&so->so_snd); 1689 1690 /* 1691 * Send trailers. Wimp out and use writev(2). 1692 */ 1693 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1694 nuap.fd = uap->s; 1695 nuap.iovp = hdtr.trailers; 1696 nuap.iovcnt = hdtr.trl_cnt; 1697 error = writev(p, &nuap); 1698 if (error) 1699 goto done; 1700 sbytes += p->p_retval[0]; 1701 } 1702 1703 done: 1704 if (uap->sbytes != NULL) { 1705 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 1706 } 1707 if (vp) 1708 vrele(vp); 1709 return (error); 1710 } 1711