1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 * $FreeBSD$ 38 */ 39 40 #include "opt_compat.h" 41 #include "opt_ktrace.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/sysproto.h> 47 #include <sys/malloc.h> 48 #include <sys/filedesc.h> 49 #include <sys/proc.h> 50 #include <sys/fcntl.h> 51 #include <sys/file.h> 52 #include <sys/mbuf.h> 53 #include <sys/protosw.h> 54 #include <sys/socket.h> 55 #include <sys/socketvar.h> 56 #include <sys/signalvar.h> 57 #include <sys/uio.h> 58 #include <sys/vnode.h> 59 #include <sys/lock.h> 60 #include <sys/mount.h> 61 #ifdef KTRACE 62 #include <sys/ktrace.h> 63 #endif 64 #include <vm/vm.h> 65 #include <vm/vm_object.h> 66 #include <vm/vm_page.h> 67 #include <vm/vm_pageout.h> 68 #include <vm/vm_kern.h> 69 #include <vm/vm_extern.h> 70 71 static void sf_buf_init(void *arg); 72 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) 73 static struct sf_buf *sf_buf_alloc(void); 74 static void sf_buf_ref(caddr_t addr, u_int size); 75 static void sf_buf_free(caddr_t addr, u_int size); 76 77 static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags)); 78 static int recvit __P((struct proc *p, int s, struct msghdr *mp, 79 caddr_t namelenp)); 80 81 static int accept1 __P((struct proc *p, struct accept_args *uap, int compat)); 82 static int getsockname1 __P((struct proc *p, struct getsockname_args *uap, 83 int compat)); 84 static int getpeername1 __P((struct proc *p, struct getpeername_args *uap, 85 int compat)); 86 87 static SLIST_HEAD(, sf_buf) sf_freelist; 88 static vm_offset_t sf_base; 89 static struct sf_buf *sf_bufs; 90 static int sf_buf_alloc_want; 91 92 /* 93 * System call interface to the socket abstraction. 94 */ 95 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 96 #define COMPAT_OLDSOCK 97 #endif 98 99 extern struct fileops socketops; 100 101 int 102 socket(p, uap) 103 struct proc *p; 104 register struct socket_args /* { 105 int domain; 106 int type; 107 int protocol; 108 } */ *uap; 109 { 110 struct filedesc *fdp = p->p_fd; 111 struct socket *so; 112 struct file *fp; 113 int fd, error; 114 115 error = falloc(p, &fp, &fd); 116 if (error) 117 return (error); 118 error = socreate(uap->domain, &so, uap->type, uap->protocol, p); 119 if (error) { 120 fdp->fd_ofiles[fd] = 0; 121 ffree(fp); 122 } else { 123 fp->f_data = (caddr_t)so; 124 fp->f_flag = FREAD|FWRITE; 125 fp->f_ops = &socketops; 126 fp->f_type = DTYPE_SOCKET; 127 p->p_retval[0] = fd; 128 } 129 return (error); 130 } 131 132 /* ARGSUSED */ 133 int 134 bind(p, uap) 135 struct proc *p; 136 register struct bind_args /* { 137 int s; 138 caddr_t name; 139 int namelen; 140 } */ *uap; 141 { 142 struct file *fp; 143 struct sockaddr *sa; 144 int error; 145 146 error = getsock(p->p_fd, uap->s, &fp); 147 if (error) 148 return (error); 149 error = getsockaddr(&sa, uap->name, uap->namelen); 150 if (error) 151 return (error); 152 error = sobind((struct socket *)fp->f_data, sa, p); 153 FREE(sa, M_SONAME); 154 return (error); 155 } 156 157 /* ARGSUSED */ 158 int 159 listen(p, uap) 160 struct proc *p; 161 register struct listen_args /* { 162 int s; 163 int backlog; 164 } */ *uap; 165 { 166 struct file *fp; 167 int error; 168 169 error = getsock(p->p_fd, uap->s, &fp); 170 if (error) 171 return (error); 172 return (solisten((struct socket *)fp->f_data, uap->backlog, p)); 173 } 174 175 static int 176 accept1(p, uap, compat) 177 struct proc *p; 178 register struct accept_args /* { 179 int s; 180 caddr_t name; 181 int *anamelen; 182 } */ *uap; 183 int compat; 184 { 185 struct filedesc *fdp = p->p_fd; 186 struct file *fp; 187 struct sockaddr *sa; 188 int namelen, error, s; 189 struct socket *head, *so; 190 int fd; 191 short fflag; /* type must match fp->f_flag */ 192 193 if (uap->name) { 194 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen, 195 sizeof (namelen)); 196 if(error) 197 return (error); 198 } 199 error = getsock(fdp, uap->s, &fp); 200 if (error) 201 return (error); 202 s = splnet(); 203 head = (struct socket *)fp->f_data; 204 if ((head->so_options & SO_ACCEPTCONN) == 0) { 205 splx(s); 206 return (EINVAL); 207 } 208 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) { 209 splx(s); 210 return (EWOULDBLOCK); 211 } 212 while (head->so_comp.tqh_first == NULL && head->so_error == 0) { 213 if (head->so_state & SS_CANTRCVMORE) { 214 head->so_error = ECONNABORTED; 215 break; 216 } 217 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH, 218 "accept", 0); 219 if (error) { 220 splx(s); 221 return (error); 222 } 223 } 224 if (head->so_error) { 225 error = head->so_error; 226 head->so_error = 0; 227 splx(s); 228 return (error); 229 } 230 231 /* 232 * At this point we know that there is at least one connection 233 * ready to be accepted. Remove it from the queue prior to 234 * allocating the file descriptor for it since falloc() may 235 * block allowing another process to accept the connection 236 * instead. 237 */ 238 so = head->so_comp.tqh_first; 239 TAILQ_REMOVE(&head->so_comp, so, so_list); 240 head->so_qlen--; 241 242 fflag = fp->f_flag; 243 error = falloc(p, &fp, &fd); 244 if (error) { 245 /* 246 * Probably ran out of file descriptors. Put the 247 * unaccepted connection back onto the queue and 248 * do another wakeup so some other process might 249 * have a chance at it. 250 */ 251 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 252 head->so_qlen++; 253 wakeup_one(&head->so_timeo); 254 splx(s); 255 return (error); 256 } else 257 p->p_retval[0] = fd; 258 259 so->so_state &= ~SS_COMP; 260 so->so_head = NULL; 261 if (head->so_sigio != NULL) 262 fsetown(fgetown(head->so_sigio), &so->so_sigio); 263 264 fp->f_data = (caddr_t)so; 265 fp->f_flag = fflag; 266 fp->f_ops = &socketops; 267 fp->f_type = DTYPE_SOCKET; 268 sa = 0; 269 (void) soaccept(so, &sa); 270 if (sa == 0) { 271 namelen = 0; 272 if (uap->name) 273 goto gotnoname; 274 splx(s); 275 return 0; 276 } 277 if (uap->name) { 278 /* check sa_len before it is destroyed */ 279 if (namelen > sa->sa_len) 280 namelen = sa->sa_len; 281 #ifdef COMPAT_OLDSOCK 282 if (compat) 283 ((struct osockaddr *)sa)->sa_family = 284 sa->sa_family; 285 #endif 286 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen); 287 if (!error) 288 gotnoname: 289 error = copyout((caddr_t)&namelen, 290 (caddr_t)uap->anamelen, sizeof (*uap->anamelen)); 291 } 292 if (sa) 293 FREE(sa, M_SONAME); 294 if (error) { 295 fdp->fd_ofiles[fd] = 0; 296 ffree(fp); 297 } 298 splx(s); 299 return (error); 300 } 301 302 int 303 accept(p, uap) 304 struct proc *p; 305 struct accept_args *uap; 306 { 307 308 return (accept1(p, uap, 0)); 309 } 310 311 #ifdef COMPAT_OLDSOCK 312 int 313 oaccept(p, uap) 314 struct proc *p; 315 struct accept_args *uap; 316 { 317 318 return (accept1(p, uap, 1)); 319 } 320 #endif /* COMPAT_OLDSOCK */ 321 322 /* ARGSUSED */ 323 int 324 connect(p, uap) 325 struct proc *p; 326 register struct connect_args /* { 327 int s; 328 caddr_t name; 329 int namelen; 330 } */ *uap; 331 { 332 struct file *fp; 333 register struct socket *so; 334 struct sockaddr *sa; 335 int error, s; 336 337 error = getsock(p->p_fd, uap->s, &fp); 338 if (error) 339 return (error); 340 so = (struct socket *)fp->f_data; 341 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) 342 return (EALREADY); 343 error = getsockaddr(&sa, uap->name, uap->namelen); 344 if (error) 345 return (error); 346 error = soconnect(so, sa, p); 347 if (error) 348 goto bad; 349 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 350 FREE(sa, M_SONAME); 351 return (EINPROGRESS); 352 } 353 s = splnet(); 354 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 355 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, 356 "connec", 0); 357 if (error) 358 break; 359 } 360 if (error == 0) { 361 error = so->so_error; 362 so->so_error = 0; 363 } 364 splx(s); 365 bad: 366 so->so_state &= ~SS_ISCONNECTING; 367 FREE(sa, M_SONAME); 368 if (error == ERESTART) 369 error = EINTR; 370 return (error); 371 } 372 373 int 374 socketpair(p, uap) 375 struct proc *p; 376 register struct socketpair_args /* { 377 int domain; 378 int type; 379 int protocol; 380 int *rsv; 381 } */ *uap; 382 { 383 register struct filedesc *fdp = p->p_fd; 384 struct file *fp1, *fp2; 385 struct socket *so1, *so2; 386 int fd, error, sv[2]; 387 388 error = socreate(uap->domain, &so1, uap->type, uap->protocol, p); 389 if (error) 390 return (error); 391 error = socreate(uap->domain, &so2, uap->type, uap->protocol, p); 392 if (error) 393 goto free1; 394 error = falloc(p, &fp1, &fd); 395 if (error) 396 goto free2; 397 sv[0] = fd; 398 fp1->f_data = (caddr_t)so1; 399 error = falloc(p, &fp2, &fd); 400 if (error) 401 goto free3; 402 fp2->f_data = (caddr_t)so2; 403 sv[1] = fd; 404 error = soconnect2(so1, so2); 405 if (error) 406 goto free4; 407 if (uap->type == SOCK_DGRAM) { 408 /* 409 * Datagram socket connection is asymmetric. 410 */ 411 error = soconnect2(so2, so1); 412 if (error) 413 goto free4; 414 } 415 fp1->f_flag = fp2->f_flag = FREAD|FWRITE; 416 fp1->f_ops = fp2->f_ops = &socketops; 417 fp1->f_type = fp2->f_type = DTYPE_SOCKET; 418 error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int)); 419 return (error); 420 free4: 421 fdp->fd_ofiles[sv[1]] = 0; 422 ffree(fp2); 423 free3: 424 fdp->fd_ofiles[sv[0]] = 0; 425 ffree(fp1); 426 free2: 427 (void)soclose(so2); 428 free1: 429 (void)soclose(so1); 430 return (error); 431 } 432 433 static int 434 sendit(p, s, mp, flags) 435 register struct proc *p; 436 int s; 437 register struct msghdr *mp; 438 int flags; 439 { 440 struct file *fp; 441 struct uio auio; 442 register struct iovec *iov; 443 register int i; 444 struct mbuf *control; 445 struct sockaddr *to; 446 int len, error; 447 struct socket *so; 448 #ifdef KTRACE 449 struct iovec *ktriov = NULL; 450 #endif 451 452 error = getsock(p->p_fd, s, &fp); 453 if (error) 454 return (error); 455 auio.uio_iov = mp->msg_iov; 456 auio.uio_iovcnt = mp->msg_iovlen; 457 auio.uio_segflg = UIO_USERSPACE; 458 auio.uio_rw = UIO_WRITE; 459 auio.uio_procp = p; 460 auio.uio_offset = 0; /* XXX */ 461 auio.uio_resid = 0; 462 iov = mp->msg_iov; 463 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 464 if ((auio.uio_resid += iov->iov_len) < 0) 465 return (EINVAL); 466 } 467 if (mp->msg_name) { 468 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 469 if (error) 470 return (error); 471 } else 472 to = 0; 473 if (mp->msg_control) { 474 if (mp->msg_controllen < sizeof(struct cmsghdr) 475 #ifdef COMPAT_OLDSOCK 476 && mp->msg_flags != MSG_COMPAT 477 #endif 478 ) { 479 error = EINVAL; 480 goto bad; 481 } 482 error = sockargs(&control, mp->msg_control, 483 mp->msg_controllen, MT_CONTROL); 484 if (error) 485 goto bad; 486 #ifdef COMPAT_OLDSOCK 487 if (mp->msg_flags == MSG_COMPAT) { 488 register struct cmsghdr *cm; 489 490 M_PREPEND(control, sizeof(*cm), M_WAIT); 491 if (control == 0) { 492 error = ENOBUFS; 493 goto bad; 494 } else { 495 cm = mtod(control, struct cmsghdr *); 496 cm->cmsg_len = control->m_len; 497 cm->cmsg_level = SOL_SOCKET; 498 cm->cmsg_type = SCM_RIGHTS; 499 } 500 } 501 #endif 502 } else 503 control = 0; 504 #ifdef KTRACE 505 if (KTRPOINT(p, KTR_GENIO)) { 506 int iovlen = auio.uio_iovcnt * sizeof (struct iovec); 507 508 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 509 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 510 } 511 #endif 512 len = auio.uio_resid; 513 so = (struct socket *)fp->f_data; 514 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control, 515 flags, p); 516 if (error) { 517 if (auio.uio_resid != len && (error == ERESTART || 518 error == EINTR || error == EWOULDBLOCK)) 519 error = 0; 520 if (error == EPIPE) 521 psignal(p, SIGPIPE); 522 } 523 if (error == 0) 524 p->p_retval[0] = len - auio.uio_resid; 525 #ifdef KTRACE 526 if (ktriov != NULL) { 527 if (error == 0) 528 ktrgenio(p->p_tracep, s, UIO_WRITE, 529 ktriov, p->p_retval[0], error); 530 FREE(ktriov, M_TEMP); 531 } 532 #endif 533 bad: 534 if (to) 535 FREE(to, M_SONAME); 536 return (error); 537 } 538 539 int 540 sendto(p, uap) 541 struct proc *p; 542 register struct sendto_args /* { 543 int s; 544 caddr_t buf; 545 size_t len; 546 int flags; 547 caddr_t to; 548 int tolen; 549 } */ *uap; 550 { 551 struct msghdr msg; 552 struct iovec aiov; 553 554 msg.msg_name = uap->to; 555 msg.msg_namelen = uap->tolen; 556 msg.msg_iov = &aiov; 557 msg.msg_iovlen = 1; 558 msg.msg_control = 0; 559 #ifdef COMPAT_OLDSOCK 560 msg.msg_flags = 0; 561 #endif 562 aiov.iov_base = uap->buf; 563 aiov.iov_len = uap->len; 564 return (sendit(p, uap->s, &msg, uap->flags)); 565 } 566 567 #ifdef COMPAT_OLDSOCK 568 int 569 osend(p, uap) 570 struct proc *p; 571 register struct osend_args /* { 572 int s; 573 caddr_t buf; 574 int len; 575 int flags; 576 } */ *uap; 577 { 578 struct msghdr msg; 579 struct iovec aiov; 580 581 msg.msg_name = 0; 582 msg.msg_namelen = 0; 583 msg.msg_iov = &aiov; 584 msg.msg_iovlen = 1; 585 aiov.iov_base = uap->buf; 586 aiov.iov_len = uap->len; 587 msg.msg_control = 0; 588 msg.msg_flags = 0; 589 return (sendit(p, uap->s, &msg, uap->flags)); 590 } 591 592 int 593 osendmsg(p, uap) 594 struct proc *p; 595 register struct osendmsg_args /* { 596 int s; 597 caddr_t msg; 598 int flags; 599 } */ *uap; 600 { 601 struct msghdr msg; 602 struct iovec aiov[UIO_SMALLIOV], *iov; 603 int error; 604 605 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr)); 606 if (error) 607 return (error); 608 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 609 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 610 return (EMSGSIZE); 611 MALLOC(iov, struct iovec *, 612 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 613 M_WAITOK); 614 } else 615 iov = aiov; 616 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 617 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 618 if (error) 619 goto done; 620 msg.msg_flags = MSG_COMPAT; 621 msg.msg_iov = iov; 622 error = sendit(p, uap->s, &msg, uap->flags); 623 done: 624 if (iov != aiov) 625 FREE(iov, M_IOV); 626 return (error); 627 } 628 #endif 629 630 int 631 sendmsg(p, uap) 632 struct proc *p; 633 register struct sendmsg_args /* { 634 int s; 635 caddr_t msg; 636 int flags; 637 } */ *uap; 638 { 639 struct msghdr msg; 640 struct iovec aiov[UIO_SMALLIOV], *iov; 641 int error; 642 643 error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg)); 644 if (error) 645 return (error); 646 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 647 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 648 return (EMSGSIZE); 649 MALLOC(iov, struct iovec *, 650 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 651 M_WAITOK); 652 } else 653 iov = aiov; 654 if (msg.msg_iovlen && 655 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 656 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 657 goto done; 658 msg.msg_iov = iov; 659 #ifdef COMPAT_OLDSOCK 660 msg.msg_flags = 0; 661 #endif 662 error = sendit(p, uap->s, &msg, uap->flags); 663 done: 664 if (iov != aiov) 665 FREE(iov, M_IOV); 666 return (error); 667 } 668 669 static int 670 recvit(p, s, mp, namelenp) 671 register struct proc *p; 672 int s; 673 register struct msghdr *mp; 674 caddr_t namelenp; 675 { 676 struct file *fp; 677 struct uio auio; 678 register struct iovec *iov; 679 register int i; 680 int len, error; 681 struct mbuf *m, *control = 0; 682 caddr_t ctlbuf; 683 struct socket *so; 684 struct sockaddr *fromsa = 0; 685 #ifdef KTRACE 686 struct iovec *ktriov = NULL; 687 #endif 688 689 error = getsock(p->p_fd, s, &fp); 690 if (error) 691 return (error); 692 auio.uio_iov = mp->msg_iov; 693 auio.uio_iovcnt = mp->msg_iovlen; 694 auio.uio_segflg = UIO_USERSPACE; 695 auio.uio_rw = UIO_READ; 696 auio.uio_procp = p; 697 auio.uio_offset = 0; /* XXX */ 698 auio.uio_resid = 0; 699 iov = mp->msg_iov; 700 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 701 if ((auio.uio_resid += iov->iov_len) < 0) 702 return (EINVAL); 703 } 704 #ifdef KTRACE 705 if (KTRPOINT(p, KTR_GENIO)) { 706 int iovlen = auio.uio_iovcnt * sizeof (struct iovec); 707 708 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 709 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 710 } 711 #endif 712 len = auio.uio_resid; 713 so = (struct socket *)fp->f_data; 714 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 715 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 716 &mp->msg_flags); 717 if (error) { 718 if (auio.uio_resid != len && (error == ERESTART || 719 error == EINTR || error == EWOULDBLOCK)) 720 error = 0; 721 } 722 #ifdef KTRACE 723 if (ktriov != NULL) { 724 if (error == 0) 725 ktrgenio(p->p_tracep, s, UIO_READ, 726 ktriov, len - auio.uio_resid, error); 727 FREE(ktriov, M_TEMP); 728 } 729 #endif 730 if (error) 731 goto out; 732 p->p_retval[0] = len - auio.uio_resid; 733 if (mp->msg_name) { 734 len = mp->msg_namelen; 735 if (len <= 0 || fromsa == 0) 736 len = 0; 737 else { 738 #ifndef MIN 739 #define MIN(a,b) ((a)>(b)?(b):(a)) 740 #endif 741 /* save sa_len before it is destroyed by MSG_COMPAT */ 742 len = MIN(len, fromsa->sa_len); 743 #ifdef COMPAT_OLDSOCK 744 if (mp->msg_flags & MSG_COMPAT) 745 ((struct osockaddr *)fromsa)->sa_family = 746 fromsa->sa_family; 747 #endif 748 error = copyout(fromsa, 749 (caddr_t)mp->msg_name, (unsigned)len); 750 if (error) 751 goto out; 752 } 753 mp->msg_namelen = len; 754 if (namelenp && 755 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) { 756 #ifdef COMPAT_OLDSOCK 757 if (mp->msg_flags & MSG_COMPAT) 758 error = 0; /* old recvfrom didn't check */ 759 else 760 #endif 761 goto out; 762 } 763 } 764 if (mp->msg_control) { 765 #ifdef COMPAT_OLDSOCK 766 /* 767 * We assume that old recvmsg calls won't receive access 768 * rights and other control info, esp. as control info 769 * is always optional and those options didn't exist in 4.3. 770 * If we receive rights, trim the cmsghdr; anything else 771 * is tossed. 772 */ 773 if (control && mp->msg_flags & MSG_COMPAT) { 774 if (mtod(control, struct cmsghdr *)->cmsg_level != 775 SOL_SOCKET || 776 mtod(control, struct cmsghdr *)->cmsg_type != 777 SCM_RIGHTS) { 778 mp->msg_controllen = 0; 779 goto out; 780 } 781 control->m_len -= sizeof (struct cmsghdr); 782 control->m_data += sizeof (struct cmsghdr); 783 } 784 #endif 785 len = mp->msg_controllen; 786 m = control; 787 mp->msg_controllen = 0; 788 ctlbuf = (caddr_t) mp->msg_control; 789 790 while (m && len > 0) { 791 unsigned int tocopy; 792 793 if (len >= m->m_len) 794 tocopy = m->m_len; 795 else { 796 mp->msg_flags |= MSG_CTRUNC; 797 tocopy = len; 798 } 799 800 if ((error = copyout((caddr_t)mtod(m, caddr_t), 801 ctlbuf, tocopy)) != 0) 802 goto out; 803 804 ctlbuf += tocopy; 805 len -= tocopy; 806 m = m->m_next; 807 } 808 mp->msg_controllen = ctlbuf - mp->msg_control; 809 } 810 out: 811 if (fromsa) 812 FREE(fromsa, M_SONAME); 813 if (control) 814 m_freem(control); 815 return (error); 816 } 817 818 int 819 recvfrom(p, uap) 820 struct proc *p; 821 register struct recvfrom_args /* { 822 int s; 823 caddr_t buf; 824 size_t len; 825 int flags; 826 caddr_t from; 827 int *fromlenaddr; 828 } */ *uap; 829 { 830 struct msghdr msg; 831 struct iovec aiov; 832 int error; 833 834 if (uap->fromlenaddr) { 835 error = copyin((caddr_t)uap->fromlenaddr, 836 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen)); 837 if (error) 838 return (error); 839 } else 840 msg.msg_namelen = 0; 841 msg.msg_name = uap->from; 842 msg.msg_iov = &aiov; 843 msg.msg_iovlen = 1; 844 aiov.iov_base = uap->buf; 845 aiov.iov_len = uap->len; 846 msg.msg_control = 0; 847 msg.msg_flags = uap->flags; 848 return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr)); 849 } 850 851 #ifdef COMPAT_OLDSOCK 852 int 853 orecvfrom(p, uap) 854 struct proc *p; 855 struct recvfrom_args *uap; 856 { 857 858 uap->flags |= MSG_COMPAT; 859 return (recvfrom(p, uap)); 860 } 861 #endif 862 863 864 #ifdef COMPAT_OLDSOCK 865 int 866 orecv(p, uap) 867 struct proc *p; 868 register struct orecv_args /* { 869 int s; 870 caddr_t buf; 871 int len; 872 int flags; 873 } */ *uap; 874 { 875 struct msghdr msg; 876 struct iovec aiov; 877 878 msg.msg_name = 0; 879 msg.msg_namelen = 0; 880 msg.msg_iov = &aiov; 881 msg.msg_iovlen = 1; 882 aiov.iov_base = uap->buf; 883 aiov.iov_len = uap->len; 884 msg.msg_control = 0; 885 msg.msg_flags = uap->flags; 886 return (recvit(p, uap->s, &msg, (caddr_t)0)); 887 } 888 889 /* 890 * Old recvmsg. This code takes advantage of the fact that the old msghdr 891 * overlays the new one, missing only the flags, and with the (old) access 892 * rights where the control fields are now. 893 */ 894 int 895 orecvmsg(p, uap) 896 struct proc *p; 897 register struct orecvmsg_args /* { 898 int s; 899 struct omsghdr *msg; 900 int flags; 901 } */ *uap; 902 { 903 struct msghdr msg; 904 struct iovec aiov[UIO_SMALLIOV], *iov; 905 int error; 906 907 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, 908 sizeof (struct omsghdr)); 909 if (error) 910 return (error); 911 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 912 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 913 return (EMSGSIZE); 914 MALLOC(iov, struct iovec *, 915 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 916 M_WAITOK); 917 } else 918 iov = aiov; 919 msg.msg_flags = uap->flags | MSG_COMPAT; 920 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 921 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 922 if (error) 923 goto done; 924 msg.msg_iov = iov; 925 error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen); 926 927 if (msg.msg_controllen && error == 0) 928 error = copyout((caddr_t)&msg.msg_controllen, 929 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int)); 930 done: 931 if (iov != aiov) 932 FREE(iov, M_IOV); 933 return (error); 934 } 935 #endif 936 937 int 938 recvmsg(p, uap) 939 struct proc *p; 940 register struct recvmsg_args /* { 941 int s; 942 struct msghdr *msg; 943 int flags; 944 } */ *uap; 945 { 946 struct msghdr msg; 947 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 948 register int error; 949 950 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg)); 951 if (error) 952 return (error); 953 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 954 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 955 return (EMSGSIZE); 956 MALLOC(iov, struct iovec *, 957 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 958 M_WAITOK); 959 } else 960 iov = aiov; 961 #ifdef COMPAT_OLDSOCK 962 msg.msg_flags = uap->flags &~ MSG_COMPAT; 963 #else 964 msg.msg_flags = uap->flags; 965 #endif 966 uiov = msg.msg_iov; 967 msg.msg_iov = iov; 968 error = copyin((caddr_t)uiov, (caddr_t)iov, 969 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 970 if (error) 971 goto done; 972 error = recvit(p, uap->s, &msg, (caddr_t)0); 973 if (!error) { 974 msg.msg_iov = uiov; 975 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg)); 976 } 977 done: 978 if (iov != aiov) 979 FREE(iov, M_IOV); 980 return (error); 981 } 982 983 /* ARGSUSED */ 984 int 985 shutdown(p, uap) 986 struct proc *p; 987 register struct shutdown_args /* { 988 int s; 989 int how; 990 } */ *uap; 991 { 992 struct file *fp; 993 int error; 994 995 error = getsock(p->p_fd, uap->s, &fp); 996 if (error) 997 return (error); 998 return (soshutdown((struct socket *)fp->f_data, uap->how)); 999 } 1000 1001 /* ARGSUSED */ 1002 int 1003 setsockopt(p, uap) 1004 struct proc *p; 1005 register struct setsockopt_args /* { 1006 int s; 1007 int level; 1008 int name; 1009 caddr_t val; 1010 int valsize; 1011 } */ *uap; 1012 { 1013 struct file *fp; 1014 struct sockopt sopt; 1015 int error; 1016 1017 if (uap->val == 0 && uap->valsize != 0) 1018 return (EFAULT); 1019 if (uap->valsize < 0) 1020 return (EINVAL); 1021 1022 error = getsock(p->p_fd, uap->s, &fp); 1023 if (error) 1024 return (error); 1025 1026 sopt.sopt_dir = SOPT_SET; 1027 sopt.sopt_level = uap->level; 1028 sopt.sopt_name = uap->name; 1029 sopt.sopt_val = uap->val; 1030 sopt.sopt_valsize = uap->valsize; 1031 sopt.sopt_p = p; 1032 1033 return (sosetopt((struct socket *)fp->f_data, &sopt)); 1034 } 1035 1036 /* ARGSUSED */ 1037 int 1038 getsockopt(p, uap) 1039 struct proc *p; 1040 register struct getsockopt_args /* { 1041 int s; 1042 int level; 1043 int name; 1044 caddr_t val; 1045 int *avalsize; 1046 } */ *uap; 1047 { 1048 int valsize, error; 1049 struct file *fp; 1050 struct sockopt sopt; 1051 1052 error = getsock(p->p_fd, uap->s, &fp); 1053 if (error) 1054 return (error); 1055 if (uap->val) { 1056 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize, 1057 sizeof (valsize)); 1058 if (error) 1059 return (error); 1060 if (valsize < 0) 1061 return (EINVAL); 1062 } else 1063 valsize = 0; 1064 1065 sopt.sopt_dir = SOPT_GET; 1066 sopt.sopt_level = uap->level; 1067 sopt.sopt_name = uap->name; 1068 sopt.sopt_val = uap->val; 1069 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1070 sopt.sopt_p = p; 1071 1072 error = sogetopt((struct socket *)fp->f_data, &sopt); 1073 if (error == 0) { 1074 valsize = sopt.sopt_valsize; 1075 error = copyout((caddr_t)&valsize, 1076 (caddr_t)uap->avalsize, sizeof (valsize)); 1077 } 1078 return (error); 1079 } 1080 1081 /* 1082 * Get socket name. 1083 */ 1084 /* ARGSUSED */ 1085 static int 1086 getsockname1(p, uap, compat) 1087 struct proc *p; 1088 register struct getsockname_args /* { 1089 int fdes; 1090 caddr_t asa; 1091 int *alen; 1092 } */ *uap; 1093 int compat; 1094 { 1095 struct file *fp; 1096 register struct socket *so; 1097 struct sockaddr *sa; 1098 int len, error; 1099 1100 error = getsock(p->p_fd, uap->fdes, &fp); 1101 if (error) 1102 return (error); 1103 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); 1104 if (error) 1105 return (error); 1106 so = (struct socket *)fp->f_data; 1107 sa = 0; 1108 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1109 if (error) 1110 goto bad; 1111 if (sa == 0) { 1112 len = 0; 1113 goto gotnothing; 1114 } 1115 1116 len = MIN(len, sa->sa_len); 1117 #ifdef COMPAT_OLDSOCK 1118 if (compat) 1119 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1120 #endif 1121 error = copyout(sa, (caddr_t)uap->asa, (u_int)len); 1122 if (error == 0) 1123 gotnothing: 1124 error = copyout((caddr_t)&len, (caddr_t)uap->alen, 1125 sizeof (len)); 1126 bad: 1127 if (sa) 1128 FREE(sa, M_SONAME); 1129 return (error); 1130 } 1131 1132 int 1133 getsockname(p, uap) 1134 struct proc *p; 1135 struct getsockname_args *uap; 1136 { 1137 1138 return (getsockname1(p, uap, 0)); 1139 } 1140 1141 #ifdef COMPAT_OLDSOCK 1142 int 1143 ogetsockname(p, uap) 1144 struct proc *p; 1145 struct getsockname_args *uap; 1146 { 1147 1148 return (getsockname1(p, uap, 1)); 1149 } 1150 #endif /* COMPAT_OLDSOCK */ 1151 1152 /* 1153 * Get name of peer for connected socket. 1154 */ 1155 /* ARGSUSED */ 1156 static int 1157 getpeername1(p, uap, compat) 1158 struct proc *p; 1159 register struct getpeername_args /* { 1160 int fdes; 1161 caddr_t asa; 1162 int *alen; 1163 } */ *uap; 1164 int compat; 1165 { 1166 struct file *fp; 1167 register struct socket *so; 1168 struct sockaddr *sa; 1169 int len, error; 1170 1171 error = getsock(p->p_fd, uap->fdes, &fp); 1172 if (error) 1173 return (error); 1174 so = (struct socket *)fp->f_data; 1175 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) 1176 return (ENOTCONN); 1177 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); 1178 if (error) 1179 return (error); 1180 sa = 0; 1181 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1182 if (error) 1183 goto bad; 1184 if (sa == 0) { 1185 len = 0; 1186 goto gotnothing; 1187 } 1188 len = MIN(len, sa->sa_len); 1189 #ifdef COMPAT_OLDSOCK 1190 if (compat) 1191 ((struct osockaddr *)sa)->sa_family = 1192 sa->sa_family; 1193 #endif 1194 error = copyout(sa, (caddr_t)uap->asa, (u_int)len); 1195 if (error) 1196 goto bad; 1197 gotnothing: 1198 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len)); 1199 bad: 1200 if (sa) FREE(sa, M_SONAME); 1201 return (error); 1202 } 1203 1204 int 1205 getpeername(p, uap) 1206 struct proc *p; 1207 struct getpeername_args *uap; 1208 { 1209 1210 return (getpeername1(p, uap, 0)); 1211 } 1212 1213 #ifdef COMPAT_OLDSOCK 1214 int 1215 ogetpeername(p, uap) 1216 struct proc *p; 1217 struct ogetpeername_args *uap; 1218 { 1219 1220 /* XXX uap should have type `getpeername_args *' to begin with. */ 1221 return (getpeername1(p, (struct getpeername_args *)uap, 1)); 1222 } 1223 #endif /* COMPAT_OLDSOCK */ 1224 1225 int 1226 sockargs(mp, buf, buflen, type) 1227 struct mbuf **mp; 1228 caddr_t buf; 1229 int buflen, type; 1230 { 1231 register struct sockaddr *sa; 1232 register struct mbuf *m; 1233 int error; 1234 1235 if ((u_int)buflen > MLEN) { 1236 #ifdef COMPAT_OLDSOCK 1237 if (type == MT_SONAME && (u_int)buflen <= 112) 1238 buflen = MLEN; /* unix domain compat. hack */ 1239 else 1240 #endif 1241 return (EINVAL); 1242 } 1243 m = m_get(M_WAIT, type); 1244 if (m == NULL) 1245 return (ENOBUFS); 1246 m->m_len = buflen; 1247 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1248 if (error) 1249 (void) m_free(m); 1250 else { 1251 *mp = m; 1252 if (type == MT_SONAME) { 1253 sa = mtod(m, struct sockaddr *); 1254 1255 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1256 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1257 sa->sa_family = sa->sa_len; 1258 #endif 1259 sa->sa_len = buflen; 1260 } 1261 } 1262 return (error); 1263 } 1264 1265 int 1266 getsockaddr(namp, uaddr, len) 1267 struct sockaddr **namp; 1268 caddr_t uaddr; 1269 size_t len; 1270 { 1271 struct sockaddr *sa; 1272 int error; 1273 1274 if (len > SOCK_MAXADDRLEN) 1275 return ENAMETOOLONG; 1276 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1277 error = copyin(uaddr, sa, len); 1278 if (error) { 1279 FREE(sa, M_SONAME); 1280 } else { 1281 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1282 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1283 sa->sa_family = sa->sa_len; 1284 #endif 1285 sa->sa_len = len; 1286 *namp = sa; 1287 } 1288 return error; 1289 } 1290 1291 int 1292 getsock(fdp, fdes, fpp) 1293 struct filedesc *fdp; 1294 int fdes; 1295 struct file **fpp; 1296 { 1297 register struct file *fp; 1298 1299 if ((unsigned)fdes >= fdp->fd_nfiles || 1300 (fp = fdp->fd_ofiles[fdes]) == NULL) 1301 return (EBADF); 1302 if (fp->f_type != DTYPE_SOCKET) 1303 return (ENOTSOCK); 1304 *fpp = fp; 1305 return (0); 1306 } 1307 1308 /* 1309 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 1310 * XXX - The sf_buf functions are currently private to sendfile(2), so have 1311 * been made static, but may be useful in the future for doing zero-copy in 1312 * other parts of the networking code. 1313 */ 1314 static void 1315 sf_buf_init(void *arg) 1316 { 1317 int i; 1318 1319 SLIST_INIT(&sf_freelist); 1320 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE); 1321 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT); 1322 bzero(sf_bufs, nsfbufs * sizeof(struct sf_buf)); 1323 for (i = 0; i < nsfbufs; i++) { 1324 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 1325 SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list); 1326 } 1327 } 1328 1329 /* 1330 * Get an sf_buf from the freelist. Will block if none are available. 1331 */ 1332 static struct sf_buf * 1333 sf_buf_alloc() 1334 { 1335 struct sf_buf *sf; 1336 int s; 1337 1338 s = splimp(); 1339 while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) { 1340 sf_buf_alloc_want = 1; 1341 tsleep(&sf_freelist, PVM, "sfbufa", 0); 1342 } 1343 SLIST_REMOVE_HEAD(&sf_freelist, free_list); 1344 splx(s); 1345 sf->refcnt = 1; 1346 return (sf); 1347 } 1348 1349 #define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT]) 1350 static void 1351 sf_buf_ref(caddr_t addr, u_int size) 1352 { 1353 struct sf_buf *sf; 1354 1355 sf = dtosf(addr); 1356 if (sf->refcnt == 0) 1357 panic("sf_buf_ref: referencing a free sf_buf"); 1358 sf->refcnt++; 1359 } 1360 1361 /* 1362 * Lose a reference to an sf_buf. When none left, detach mapped page 1363 * and release resources back to the system. 1364 * 1365 * Must be called at splimp. 1366 */ 1367 static void 1368 sf_buf_free(caddr_t addr, u_int size) 1369 { 1370 struct sf_buf *sf; 1371 struct vm_page *m; 1372 int s; 1373 1374 sf = dtosf(addr); 1375 if (sf->refcnt == 0) 1376 panic("sf_buf_free: freeing free sf_buf"); 1377 sf->refcnt--; 1378 if (sf->refcnt == 0) { 1379 pmap_qremove((vm_offset_t)addr, 1); 1380 m = sf->m; 1381 s = splvm(); 1382 vm_page_unwire(m, 0); 1383 /* 1384 * Check for the object going away on us. This can 1385 * happen since we don't hold a reference to it. 1386 * If so, we're responsible for freeing the page. 1387 */ 1388 if (m->wire_count == 0 && m->object == NULL) 1389 vm_page_free(m); 1390 splx(s); 1391 sf->m = NULL; 1392 SLIST_INSERT_HEAD(&sf_freelist, sf, free_list); 1393 if (sf_buf_alloc_want) { 1394 sf_buf_alloc_want = 0; 1395 wakeup(&sf_freelist); 1396 } 1397 } 1398 } 1399 1400 /* 1401 * sendfile(2). 1402 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1403 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1404 * 1405 * Send a file specified by 'fd' and starting at 'offset' to a socket 1406 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1407 * nbytes == 0. Optionally add a header and/or trailer to the socket 1408 * output. If specified, write the total number of bytes sent into *sbytes. 1409 */ 1410 int 1411 sendfile(struct proc *p, struct sendfile_args *uap) 1412 { 1413 struct file *fp; 1414 struct filedesc *fdp = p->p_fd; 1415 struct vnode *vp; 1416 struct vm_object *obj; 1417 struct socket *so; 1418 struct mbuf *m; 1419 struct sf_buf *sf; 1420 struct vm_page *pg; 1421 struct writev_args nuap; 1422 struct sf_hdtr hdtr; 1423 off_t off, xfsize, sbytes = 0; 1424 int error = 0, s; 1425 1426 vp = NULL; 1427 /* 1428 * Do argument checking. Must be a regular file in, stream 1429 * type and connected socket out, positive offset. 1430 */ 1431 if (((u_int)uap->fd) >= fdp->fd_nfiles || 1432 (fp = fdp->fd_ofiles[uap->fd]) == NULL || 1433 (fp->f_flag & FREAD) == 0) { 1434 error = EBADF; 1435 goto done; 1436 } 1437 if (fp->f_type != DTYPE_VNODE) { 1438 error = EINVAL; 1439 goto done; 1440 } 1441 vp = (struct vnode *)fp->f_data; 1442 vref(vp); 1443 obj = vp->v_object; 1444 if (vp->v_type != VREG || obj == NULL) { 1445 error = EINVAL; 1446 goto done; 1447 } 1448 error = getsock(p->p_fd, uap->s, &fp); 1449 if (error) 1450 goto done; 1451 so = (struct socket *)fp->f_data; 1452 if (so->so_type != SOCK_STREAM) { 1453 error = EINVAL; 1454 goto done; 1455 } 1456 if ((so->so_state & SS_ISCONNECTED) == 0) { 1457 error = ENOTCONN; 1458 goto done; 1459 } 1460 if (uap->offset < 0) { 1461 error = EINVAL; 1462 goto done; 1463 } 1464 1465 /* 1466 * If specified, get the pointer to the sf_hdtr struct for 1467 * any headers/trailers. 1468 */ 1469 if (uap->hdtr != NULL) { 1470 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1471 if (error) 1472 goto done; 1473 /* 1474 * Send any headers. Wimp out and use writev(2). 1475 */ 1476 if (hdtr.headers != NULL) { 1477 nuap.fd = uap->s; 1478 nuap.iovp = hdtr.headers; 1479 nuap.iovcnt = hdtr.hdr_cnt; 1480 error = writev(p, &nuap); 1481 if (error) 1482 goto done; 1483 sbytes += p->p_retval[0]; 1484 } 1485 } 1486 1487 /* 1488 * Protect against multiple writers to the socket. 1489 */ 1490 (void) sblock(&so->so_snd, M_WAITOK); 1491 1492 /* 1493 * Loop through the pages in the file, starting with the requested 1494 * offset. Get a file page (do I/O if necessary), map the file page 1495 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1496 * it on the socket. 1497 */ 1498 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1499 vm_pindex_t pindex; 1500 vm_offset_t pgoff; 1501 1502 pindex = OFF_TO_IDX(off); 1503 retry_lookup: 1504 /* 1505 * Calculate the amount to transfer. Not to exceed a page, 1506 * the EOF, or the passed in nbytes. 1507 */ 1508 xfsize = obj->un_pager.vnp.vnp_size - off; 1509 if (xfsize > PAGE_SIZE) 1510 xfsize = PAGE_SIZE; 1511 pgoff = (vm_offset_t)(off & PAGE_MASK); 1512 if (PAGE_SIZE - pgoff < xfsize) 1513 xfsize = PAGE_SIZE - pgoff; 1514 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1515 xfsize = uap->nbytes - sbytes; 1516 if (xfsize <= 0) 1517 break; 1518 /* 1519 * Optimize the non-blocking case by looking at the socket space 1520 * before going to the extra work of constituting the sf_buf. 1521 */ 1522 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1523 if (so->so_state & SS_CANTSENDMORE) 1524 error = EPIPE; 1525 else 1526 error = EAGAIN; 1527 sbunlock(&so->so_snd); 1528 goto done; 1529 } 1530 /* 1531 * Attempt to look up the page. 1532 * 1533 * Allocate if not found 1534 * 1535 * Wait and loop if busy. 1536 */ 1537 pg = vm_page_lookup(obj, pindex); 1538 1539 if (pg == NULL) { 1540 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL); 1541 if (pg == NULL) { 1542 VM_WAIT; 1543 goto retry_lookup; 1544 } 1545 vm_page_wakeup(pg); 1546 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) { 1547 goto retry_lookup; 1548 } 1549 1550 /* 1551 * Wire the page so it does not get ripped out from under 1552 * us. 1553 */ 1554 1555 vm_page_wire(pg); 1556 1557 /* 1558 * If page is not valid for what we need, initiate I/O 1559 */ 1560 1561 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1562 struct uio auio; 1563 struct iovec aiov; 1564 int bsize; 1565 1566 /* 1567 * Ensure that our page is still around when the I/O 1568 * completes. 1569 */ 1570 vm_page_io_start(pg); 1571 1572 /* 1573 * Get the page from backing store. 1574 */ 1575 bsize = vp->v_mount->mnt_stat.f_iosize; 1576 auio.uio_iov = &aiov; 1577 auio.uio_iovcnt = 1; 1578 aiov.iov_base = 0; 1579 aiov.iov_len = MAXBSIZE; 1580 auio.uio_resid = MAXBSIZE; 1581 auio.uio_offset = trunc_page(off); 1582 auio.uio_segflg = UIO_NOCOPY; 1583 auio.uio_rw = UIO_READ; 1584 auio.uio_procp = p; 1585 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p); 1586 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16), 1587 p->p_ucred); 1588 VOP_UNLOCK(vp, 0, p); 1589 vm_page_flag_clear(pg, PG_ZERO); 1590 vm_page_io_finish(pg); 1591 if (error) { 1592 vm_page_unwire(pg, 0); 1593 /* 1594 * See if anyone else might know about this page. 1595 * If not and it is not valid, then free it. 1596 */ 1597 if (pg->wire_count == 0 && pg->valid == 0 && 1598 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1599 pg->hold_count == 0) 1600 vm_page_free(pg); 1601 sbunlock(&so->so_snd); 1602 goto done; 1603 } 1604 } 1605 1606 /* 1607 * Allocate a kernel virtual page and insert the physical page 1608 * into it. 1609 */ 1610 1611 sf = sf_buf_alloc(); 1612 sf->m = pg; 1613 pmap_qenter(sf->kva, &pg, 1); 1614 /* 1615 * Get an mbuf header and set it up as having external storage. 1616 */ 1617 MGETHDR(m, M_WAIT, MT_DATA); 1618 m->m_ext.ext_free = sf_buf_free; 1619 m->m_ext.ext_ref = sf_buf_ref; 1620 m->m_ext.ext_buf = (void *)sf->kva; 1621 m->m_ext.ext_size = PAGE_SIZE; 1622 m->m_data = (char *) sf->kva + pgoff; 1623 m->m_flags |= M_EXT; 1624 m->m_pkthdr.len = m->m_len = xfsize; 1625 /* 1626 * Add the buffer to the socket buffer chain. 1627 */ 1628 s = splnet(); 1629 retry_space: 1630 /* 1631 * Make sure that the socket is still able to take more data. 1632 * CANTSENDMORE being true usually means that the connection 1633 * was closed. so_error is true when an error was sensed after 1634 * a previous send. 1635 * The state is checked after the page mapping and buffer 1636 * allocation above since those operations may block and make 1637 * any socket checks stale. From this point forward, nothing 1638 * blocks before the pru_send (or more accurately, any blocking 1639 * results in a loop back to here to re-check). 1640 */ 1641 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1642 if (so->so_state & SS_CANTSENDMORE) { 1643 error = EPIPE; 1644 } else { 1645 error = so->so_error; 1646 so->so_error = 0; 1647 } 1648 m_freem(m); 1649 sbunlock(&so->so_snd); 1650 splx(s); 1651 goto done; 1652 } 1653 /* 1654 * Wait for socket space to become available. We do this just 1655 * after checking the connection state above in order to avoid 1656 * a race condition with sbwait(). 1657 */ 1658 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1659 if (so->so_state & SS_NBIO) { 1660 m_freem(m); 1661 sbunlock(&so->so_snd); 1662 splx(s); 1663 error = EAGAIN; 1664 goto done; 1665 } 1666 error = sbwait(&so->so_snd); 1667 /* 1668 * An error from sbwait usually indicates that we've 1669 * been interrupted by a signal. If we've sent anything 1670 * then return bytes sent, otherwise return the error. 1671 */ 1672 if (error) { 1673 m_freem(m); 1674 sbunlock(&so->so_snd); 1675 splx(s); 1676 goto done; 1677 } 1678 goto retry_space; 1679 } 1680 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p); 1681 splx(s); 1682 if (error) { 1683 sbunlock(&so->so_snd); 1684 goto done; 1685 } 1686 } 1687 sbunlock(&so->so_snd); 1688 1689 /* 1690 * Send trailers. Wimp out and use writev(2). 1691 */ 1692 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1693 nuap.fd = uap->s; 1694 nuap.iovp = hdtr.trailers; 1695 nuap.iovcnt = hdtr.trl_cnt; 1696 error = writev(p, &nuap); 1697 if (error) 1698 goto done; 1699 sbytes += p->p_retval[0]; 1700 } 1701 1702 done: 1703 if (uap->sbytes != NULL) { 1704 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 1705 } 1706 if (vp) 1707 vrele(vp); 1708 return (error); 1709 } 1710