1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_compat.h" 39 #include "opt_ktrace.h" 40 #include "opt_mac.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/mac.h> 47 #include <sys/mutex.h> 48 #include <sys/sysproto.h> 49 #include <sys/malloc.h> 50 #include <sys/filedesc.h> 51 #include <sys/event.h> 52 #include <sys/proc.h> 53 #include <sys/fcntl.h> 54 #include <sys/file.h> 55 #include <sys/filio.h> 56 #include <sys/mount.h> 57 #include <sys/mbuf.h> 58 #include <sys/protosw.h> 59 #include <sys/sf_buf.h> 60 #include <sys/socket.h> 61 #include <sys/socketvar.h> 62 #include <sys/signalvar.h> 63 #include <sys/syscallsubr.h> 64 #include <sys/sysctl.h> 65 #include <sys/uio.h> 66 #include <sys/vnode.h> 67 #ifdef KTRACE 68 #include <sys/ktrace.h> 69 #endif 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 #include <vm/vm_pageout.h> 75 #include <vm/vm_kern.h> 76 #include <vm/vm_extern.h> 77 78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 80 81 static int accept1(struct thread *td, struct accept_args *uap, int compat); 82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 83 static int getsockname1(struct thread *td, struct getsockname_args *uap, 84 int compat); 85 static int getpeername1(struct thread *td, struct getpeername_args *uap, 86 int compat); 87 88 /* 89 * NSFBUFS-related variables and associated sysctls 90 */ 91 int nsfbufs; 92 int nsfbufspeak; 93 int nsfbufsused; 94 95 SYSCTL_DECL(_kern_ipc); 96 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 97 "Maximum number of sendfile(2) sf_bufs available"); 98 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 99 "Number of sendfile(2) sf_bufs at peak usage"); 100 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 101 "Number of sendfile(2) sf_bufs in use"); 102 103 /* 104 * System call interface to the socket abstraction. 105 */ 106 #if defined(COMPAT_43) 107 #define COMPAT_OLDSOCK 108 #endif 109 110 /* 111 * MPSAFE 112 */ 113 int 114 socket(td, uap) 115 struct thread *td; 116 register struct socket_args /* { 117 int domain; 118 int type; 119 int protocol; 120 } */ *uap; 121 { 122 struct filedesc *fdp; 123 struct socket *so; 124 struct file *fp; 125 int fd, error; 126 127 fdp = td->td_proc->p_fd; 128 error = falloc(td, &fp, &fd); 129 if (error) 130 return (error); 131 /* An extra reference on `fp' has been held for us by falloc(). */ 132 NET_LOCK_GIANT(); 133 error = socreate(uap->domain, &so, uap->type, uap->protocol, 134 td->td_ucred, td); 135 NET_UNLOCK_GIANT(); 136 FILEDESC_LOCK(fdp); 137 if (error) { 138 if (fdp->fd_ofiles[fd] == fp) { 139 fdp->fd_ofiles[fd] = NULL; 140 fdunused(fdp, fd); 141 FILEDESC_UNLOCK(fdp); 142 fdrop(fp, td); 143 } else { 144 FILEDESC_UNLOCK(fdp); 145 } 146 } else { 147 fp->f_data = so; /* already has ref count */ 148 fp->f_flag = FREAD|FWRITE; 149 fp->f_ops = &socketops; 150 fp->f_type = DTYPE_SOCKET; 151 FILEDESC_UNLOCK(fdp); 152 td->td_retval[0] = fd; 153 } 154 fdrop(fp, td); 155 return (error); 156 } 157 158 /* 159 * MPSAFE 160 */ 161 /* ARGSUSED */ 162 int 163 bind(td, uap) 164 struct thread *td; 165 register struct bind_args /* { 166 int s; 167 caddr_t name; 168 int namelen; 169 } */ *uap; 170 { 171 struct sockaddr *sa; 172 int error; 173 174 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 175 return (error); 176 177 return (kern_bind(td, uap->s, sa)); 178 } 179 180 int 181 kern_bind(td, fd, sa) 182 struct thread *td; 183 int fd; 184 struct sockaddr *sa; 185 { 186 struct socket *so; 187 int error; 188 189 NET_LOCK_GIANT(); 190 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 191 goto done2; 192 #ifdef MAC 193 SOCK_LOCK(so); 194 error = mac_check_socket_bind(td->td_ucred, so, sa); 195 SOCK_UNLOCK(so); 196 if (error) 197 goto done1; 198 #endif 199 error = sobind(so, sa, td); 200 #ifdef MAC 201 done1: 202 #endif 203 fputsock(so); 204 done2: 205 NET_UNLOCK_GIANT(); 206 FREE(sa, M_SONAME); 207 return (error); 208 } 209 210 /* 211 * MPSAFE 212 */ 213 /* ARGSUSED */ 214 int 215 listen(td, uap) 216 struct thread *td; 217 register struct listen_args /* { 218 int s; 219 int backlog; 220 } */ *uap; 221 { 222 struct socket *so; 223 int error; 224 225 NET_LOCK_GIANT(); 226 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 227 #ifdef MAC 228 SOCK_LOCK(so); 229 error = mac_check_socket_listen(td->td_ucred, so); 230 SOCK_UNLOCK(so); 231 if (error) 232 goto done; 233 #endif 234 error = solisten(so, uap->backlog, td); 235 #ifdef MAC 236 done: 237 #endif 238 fputsock(so); 239 } 240 NET_UNLOCK_GIANT(); 241 return(error); 242 } 243 244 /* 245 * accept1() 246 * MPSAFE 247 */ 248 static int 249 accept1(td, uap, compat) 250 struct thread *td; 251 register struct accept_args /* { 252 int s; 253 struct sockaddr * __restrict name; 254 socklen_t * __restrict anamelen; 255 } */ *uap; 256 int compat; 257 { 258 struct filedesc *fdp; 259 struct file *nfp = NULL; 260 struct sockaddr *sa = NULL; 261 socklen_t namelen; 262 int error; 263 struct socket *head, *so; 264 int fd; 265 u_int fflag; 266 pid_t pgid; 267 int tmp; 268 269 fdp = td->td_proc->p_fd; 270 if (uap->name) { 271 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 272 if(error) 273 return (error); 274 if (namelen < 0) 275 return (EINVAL); 276 } 277 NET_LOCK_GIANT(); 278 error = fgetsock(td, uap->s, &head, &fflag); 279 if (error) 280 goto done2; 281 if ((head->so_options & SO_ACCEPTCONN) == 0) { 282 error = EINVAL; 283 goto done; 284 } 285 error = falloc(td, &nfp, &fd); 286 if (error) 287 goto done; 288 ACCEPT_LOCK(); 289 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 290 ACCEPT_UNLOCK(); 291 error = EWOULDBLOCK; 292 goto noconnection; 293 } 294 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 295 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 296 head->so_error = ECONNABORTED; 297 break; 298 } 299 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 300 "accept", 0); 301 if (error) { 302 ACCEPT_UNLOCK(); 303 goto noconnection; 304 } 305 } 306 if (head->so_error) { 307 error = head->so_error; 308 head->so_error = 0; 309 ACCEPT_UNLOCK(); 310 goto noconnection; 311 } 312 so = TAILQ_FIRST(&head->so_comp); 313 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 314 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 315 316 /* 317 * Before changing the flags on the socket, we have to bump the 318 * reference count. Otherwise, if the protocol calls sofree(), 319 * the socket will be released due to a zero refcount. 320 */ 321 SOCK_LOCK(so); 322 soref(so); /* file descriptor reference */ 323 SOCK_UNLOCK(so); 324 325 TAILQ_REMOVE(&head->so_comp, so, so_list); 326 head->so_qlen--; 327 so->so_state |= (head->so_state & SS_NBIO); 328 so->so_qstate &= ~SQ_COMP; 329 so->so_head = NULL; 330 331 ACCEPT_UNLOCK(); 332 333 /* An extra reference on `nfp' has been held for us by falloc(). */ 334 td->td_retval[0] = fd; 335 336 /* connection has been removed from the listen queue */ 337 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 338 339 pgid = fgetown(&head->so_sigio); 340 if (pgid != 0) 341 fsetown(pgid, &so->so_sigio); 342 343 FILE_LOCK(nfp); 344 nfp->f_data = so; /* nfp has ref count from falloc */ 345 nfp->f_flag = fflag; 346 nfp->f_ops = &socketops; 347 nfp->f_type = DTYPE_SOCKET; 348 FILE_UNLOCK(nfp); 349 /* Sync socket nonblocking/async state with file flags */ 350 tmp = fflag & FNONBLOCK; 351 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 352 tmp = fflag & FASYNC; 353 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 354 sa = 0; 355 error = soaccept(so, &sa); 356 if (error) { 357 /* 358 * return a namelen of zero for older code which might 359 * ignore the return value from accept. 360 */ 361 if (uap->name != NULL) { 362 namelen = 0; 363 (void) copyout(&namelen, 364 uap->anamelen, sizeof(*uap->anamelen)); 365 } 366 goto noconnection; 367 } 368 if (sa == NULL) { 369 namelen = 0; 370 if (uap->name) 371 goto gotnoname; 372 error = 0; 373 goto done; 374 } 375 if (uap->name) { 376 /* check sa_len before it is destroyed */ 377 if (namelen > sa->sa_len) 378 namelen = sa->sa_len; 379 #ifdef COMPAT_OLDSOCK 380 if (compat) 381 ((struct osockaddr *)sa)->sa_family = 382 sa->sa_family; 383 #endif 384 error = copyout(sa, uap->name, (u_int)namelen); 385 if (!error) 386 gotnoname: 387 error = copyout(&namelen, 388 uap->anamelen, sizeof (*uap->anamelen)); 389 } 390 noconnection: 391 if (sa) 392 FREE(sa, M_SONAME); 393 394 /* 395 * close the new descriptor, assuming someone hasn't ripped it 396 * out from under us. 397 */ 398 if (error) { 399 FILEDESC_LOCK(fdp); 400 if (fdp->fd_ofiles[fd] == nfp) { 401 fdp->fd_ofiles[fd] = NULL; 402 fdunused(fdp, fd); 403 FILEDESC_UNLOCK(fdp); 404 fdrop(nfp, td); 405 } else { 406 FILEDESC_UNLOCK(fdp); 407 } 408 } 409 410 /* 411 * Release explicitly held references before returning. 412 */ 413 done: 414 if (nfp != NULL) 415 fdrop(nfp, td); 416 fputsock(head); 417 done2: 418 NET_UNLOCK_GIANT(); 419 return (error); 420 } 421 422 /* 423 * MPSAFE (accept1() is MPSAFE) 424 */ 425 int 426 accept(td, uap) 427 struct thread *td; 428 struct accept_args *uap; 429 { 430 431 return (accept1(td, uap, 0)); 432 } 433 434 #ifdef COMPAT_OLDSOCK 435 /* 436 * MPSAFE (accept1() is MPSAFE) 437 */ 438 int 439 oaccept(td, uap) 440 struct thread *td; 441 struct accept_args *uap; 442 { 443 444 return (accept1(td, uap, 1)); 445 } 446 #endif /* COMPAT_OLDSOCK */ 447 448 /* 449 * MPSAFE 450 */ 451 /* ARGSUSED */ 452 int 453 connect(td, uap) 454 struct thread *td; 455 register struct connect_args /* { 456 int s; 457 caddr_t name; 458 int namelen; 459 } */ *uap; 460 { 461 struct sockaddr *sa; 462 int error; 463 464 error = getsockaddr(&sa, uap->name, uap->namelen); 465 if (error) 466 return (error); 467 468 return (kern_connect(td, uap->s, sa)); 469 } 470 471 472 int 473 kern_connect(td, fd, sa) 474 struct thread *td; 475 int fd; 476 struct sockaddr *sa; 477 { 478 struct socket *so; 479 int error, s; 480 int interrupted = 0; 481 482 NET_LOCK_GIANT(); 483 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 484 goto done2; 485 if (so->so_state & SS_ISCONNECTING) { 486 error = EALREADY; 487 goto done1; 488 } 489 #ifdef MAC 490 SOCK_LOCK(so); 491 error = mac_check_socket_connect(td->td_ucred, so, sa); 492 SOCK_UNLOCK(so); 493 if (error) 494 goto bad; 495 #endif 496 error = soconnect(so, sa, td); 497 if (error) 498 goto bad; 499 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 500 error = EINPROGRESS; 501 goto done1; 502 } 503 s = splnet(); 504 SOCK_LOCK(so); 505 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 506 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 507 "connec", 0); 508 if (error) { 509 if (error == EINTR || error == ERESTART) 510 interrupted = 1; 511 break; 512 } 513 } 514 if (error == 0) { 515 error = so->so_error; 516 so->so_error = 0; 517 } 518 SOCK_UNLOCK(so); 519 splx(s); 520 bad: 521 if (!interrupted) 522 so->so_state &= ~SS_ISCONNECTING; 523 if (error == ERESTART) 524 error = EINTR; 525 done1: 526 fputsock(so); 527 done2: 528 NET_UNLOCK_GIANT(); 529 FREE(sa, M_SONAME); 530 return (error); 531 } 532 533 /* 534 * MPSAFE 535 */ 536 int 537 socketpair(td, uap) 538 struct thread *td; 539 register struct socketpair_args /* { 540 int domain; 541 int type; 542 int protocol; 543 int *rsv; 544 } */ *uap; 545 { 546 register struct filedesc *fdp = td->td_proc->p_fd; 547 struct file *fp1, *fp2; 548 struct socket *so1, *so2; 549 int fd, error, sv[2]; 550 551 NET_LOCK_GIANT(); 552 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 553 td->td_ucred, td); 554 if (error) 555 goto done2; 556 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 557 td->td_ucred, td); 558 if (error) 559 goto free1; 560 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 561 error = falloc(td, &fp1, &fd); 562 if (error) 563 goto free2; 564 sv[0] = fd; 565 fp1->f_data = so1; /* so1 already has ref count */ 566 error = falloc(td, &fp2, &fd); 567 if (error) 568 goto free3; 569 fp2->f_data = so2; /* so2 already has ref count */ 570 sv[1] = fd; 571 error = soconnect2(so1, so2); 572 if (error) 573 goto free4; 574 if (uap->type == SOCK_DGRAM) { 575 /* 576 * Datagram socket connection is asymmetric. 577 */ 578 error = soconnect2(so2, so1); 579 if (error) 580 goto free4; 581 } 582 FILE_LOCK(fp1); 583 fp1->f_flag = FREAD|FWRITE; 584 fp1->f_ops = &socketops; 585 fp1->f_type = DTYPE_SOCKET; 586 FILE_UNLOCK(fp1); 587 FILE_LOCK(fp2); 588 fp2->f_flag = FREAD|FWRITE; 589 fp2->f_ops = &socketops; 590 fp2->f_type = DTYPE_SOCKET; 591 FILE_UNLOCK(fp2); 592 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 593 fdrop(fp1, td); 594 fdrop(fp2, td); 595 goto done2; 596 free4: 597 FILEDESC_LOCK(fdp); 598 if (fdp->fd_ofiles[sv[1]] == fp2) { 599 fdp->fd_ofiles[sv[1]] = NULL; 600 fdunused(fdp, sv[1]); 601 FILEDESC_UNLOCK(fdp); 602 fdrop(fp2, td); 603 } else { 604 FILEDESC_UNLOCK(fdp); 605 } 606 fdrop(fp2, td); 607 free3: 608 FILEDESC_LOCK(fdp); 609 if (fdp->fd_ofiles[sv[0]] == fp1) { 610 fdp->fd_ofiles[sv[0]] = NULL; 611 fdunused(fdp, sv[0]); 612 FILEDESC_UNLOCK(fdp); 613 fdrop(fp1, td); 614 } else { 615 FILEDESC_UNLOCK(fdp); 616 } 617 fdrop(fp1, td); 618 free2: 619 (void)soclose(so2); 620 free1: 621 (void)soclose(so1); 622 done2: 623 NET_UNLOCK_GIANT(); 624 return (error); 625 } 626 627 static int 628 sendit(td, s, mp, flags) 629 register struct thread *td; 630 int s; 631 register struct msghdr *mp; 632 int flags; 633 { 634 struct mbuf *control; 635 struct sockaddr *to; 636 int error; 637 638 if (mp->msg_name != NULL) { 639 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 640 if (error) { 641 to = NULL; 642 goto bad; 643 } 644 mp->msg_name = to; 645 } else { 646 to = NULL; 647 } 648 649 if (mp->msg_control) { 650 if (mp->msg_controllen < sizeof(struct cmsghdr) 651 #ifdef COMPAT_OLDSOCK 652 && mp->msg_flags != MSG_COMPAT 653 #endif 654 ) { 655 error = EINVAL; 656 goto bad; 657 } 658 error = sockargs(&control, mp->msg_control, 659 mp->msg_controllen, MT_CONTROL); 660 if (error) 661 goto bad; 662 #ifdef COMPAT_OLDSOCK 663 if (mp->msg_flags == MSG_COMPAT) { 664 register struct cmsghdr *cm; 665 666 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 667 if (control == 0) { 668 error = ENOBUFS; 669 goto bad; 670 } else { 671 cm = mtod(control, struct cmsghdr *); 672 cm->cmsg_len = control->m_len; 673 cm->cmsg_level = SOL_SOCKET; 674 cm->cmsg_type = SCM_RIGHTS; 675 } 676 } 677 #endif 678 } else { 679 control = NULL; 680 } 681 682 error = kern_sendit(td, s, mp, flags, control); 683 684 bad: 685 if (to) 686 FREE(to, M_SONAME); 687 return (error); 688 } 689 690 int 691 kern_sendit(td, s, mp, flags, control) 692 struct thread *td; 693 int s; 694 struct msghdr *mp; 695 int flags; 696 struct mbuf *control; 697 { 698 struct uio auio; 699 struct iovec *iov; 700 struct socket *so; 701 int i; 702 int len, error; 703 #ifdef KTRACE 704 struct uio *ktruio = NULL; 705 #endif 706 707 NET_LOCK_GIANT(); 708 if ((error = fgetsock(td, s, &so, NULL)) != 0) 709 goto bad2; 710 711 #ifdef MAC 712 SOCK_LOCK(so); 713 error = mac_check_socket_send(td->td_ucred, so); 714 SOCK_UNLOCK(so); 715 if (error) 716 goto bad; 717 #endif 718 719 auio.uio_iov = mp->msg_iov; 720 auio.uio_iovcnt = mp->msg_iovlen; 721 auio.uio_segflg = UIO_USERSPACE; 722 auio.uio_rw = UIO_WRITE; 723 auio.uio_td = td; 724 auio.uio_offset = 0; /* XXX */ 725 auio.uio_resid = 0; 726 iov = mp->msg_iov; 727 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 728 if ((auio.uio_resid += iov->iov_len) < 0) { 729 error = EINVAL; 730 goto bad; 731 } 732 } 733 #ifdef KTRACE 734 if (KTRPOINT(td, KTR_GENIO)) 735 ktruio = cloneuio(&auio); 736 #endif 737 len = auio.uio_resid; 738 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 739 0, control, flags, td); 740 if (error) { 741 if (auio.uio_resid != len && (error == ERESTART || 742 error == EINTR || error == EWOULDBLOCK)) 743 error = 0; 744 /* Generation of SIGPIPE can be controlled per socket */ 745 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 746 PROC_LOCK(td->td_proc); 747 psignal(td->td_proc, SIGPIPE); 748 PROC_UNLOCK(td->td_proc); 749 } 750 } 751 if (error == 0) 752 td->td_retval[0] = len - auio.uio_resid; 753 #ifdef KTRACE 754 if (ktruio != NULL) { 755 ktruio->uio_resid = td->td_retval[0]; 756 ktrgenio(s, UIO_WRITE, ktruio, error); 757 } 758 #endif 759 bad: 760 fputsock(so); 761 bad2: 762 NET_UNLOCK_GIANT(); 763 return (error); 764 } 765 766 /* 767 * MPSAFE 768 */ 769 int 770 sendto(td, uap) 771 struct thread *td; 772 register struct sendto_args /* { 773 int s; 774 caddr_t buf; 775 size_t len; 776 int flags; 777 caddr_t to; 778 int tolen; 779 } */ *uap; 780 { 781 struct msghdr msg; 782 struct iovec aiov; 783 int error; 784 785 msg.msg_name = uap->to; 786 msg.msg_namelen = uap->tolen; 787 msg.msg_iov = &aiov; 788 msg.msg_iovlen = 1; 789 msg.msg_control = 0; 790 #ifdef COMPAT_OLDSOCK 791 msg.msg_flags = 0; 792 #endif 793 aiov.iov_base = uap->buf; 794 aiov.iov_len = uap->len; 795 error = sendit(td, uap->s, &msg, uap->flags); 796 return (error); 797 } 798 799 #ifdef COMPAT_OLDSOCK 800 /* 801 * MPSAFE 802 */ 803 int 804 osend(td, uap) 805 struct thread *td; 806 register struct osend_args /* { 807 int s; 808 caddr_t buf; 809 int len; 810 int flags; 811 } */ *uap; 812 { 813 struct msghdr msg; 814 struct iovec aiov; 815 int error; 816 817 msg.msg_name = 0; 818 msg.msg_namelen = 0; 819 msg.msg_iov = &aiov; 820 msg.msg_iovlen = 1; 821 aiov.iov_base = uap->buf; 822 aiov.iov_len = uap->len; 823 msg.msg_control = 0; 824 msg.msg_flags = 0; 825 error = sendit(td, uap->s, &msg, uap->flags); 826 return (error); 827 } 828 829 /* 830 * MPSAFE 831 */ 832 int 833 osendmsg(td, uap) 834 struct thread *td; 835 struct osendmsg_args /* { 836 int s; 837 caddr_t msg; 838 int flags; 839 } */ *uap; 840 { 841 struct msghdr msg; 842 struct iovec *iov; 843 int error; 844 845 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 846 if (error) 847 return (error); 848 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 849 if (error) 850 return (error); 851 msg.msg_iov = iov; 852 msg.msg_flags = MSG_COMPAT; 853 error = sendit(td, uap->s, &msg, uap->flags); 854 free(iov, M_IOV); 855 return (error); 856 } 857 #endif 858 859 /* 860 * MPSAFE 861 */ 862 int 863 sendmsg(td, uap) 864 struct thread *td; 865 struct sendmsg_args /* { 866 int s; 867 caddr_t msg; 868 int flags; 869 } */ *uap; 870 { 871 struct msghdr msg; 872 struct iovec *iov; 873 int error; 874 875 error = copyin(uap->msg, &msg, sizeof (msg)); 876 if (error) 877 return (error); 878 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 879 if (error) 880 return (error); 881 msg.msg_iov = iov; 882 #ifdef COMPAT_OLDSOCK 883 msg.msg_flags = 0; 884 #endif 885 error = sendit(td, uap->s, &msg, uap->flags); 886 free(iov, M_IOV); 887 return (error); 888 } 889 890 static int 891 recvit(td, s, mp, namelenp) 892 struct thread *td; 893 int s; 894 struct msghdr *mp; 895 void *namelenp; 896 { 897 struct uio auio; 898 struct iovec *iov; 899 int i; 900 socklen_t len; 901 int error; 902 struct mbuf *m, *control = 0; 903 caddr_t ctlbuf; 904 struct socket *so; 905 struct sockaddr *fromsa = 0; 906 #ifdef KTRACE 907 struct uio *ktruio = NULL; 908 #endif 909 910 NET_LOCK_GIANT(); 911 if ((error = fgetsock(td, s, &so, NULL)) != 0) { 912 NET_UNLOCK_GIANT(); 913 return (error); 914 } 915 916 #ifdef MAC 917 SOCK_LOCK(so); 918 error = mac_check_socket_receive(td->td_ucred, so); 919 SOCK_UNLOCK(so); 920 if (error) { 921 fputsock(so); 922 NET_UNLOCK_GIANT(); 923 return (error); 924 } 925 #endif 926 927 auio.uio_iov = mp->msg_iov; 928 auio.uio_iovcnt = mp->msg_iovlen; 929 auio.uio_segflg = UIO_USERSPACE; 930 auio.uio_rw = UIO_READ; 931 auio.uio_td = td; 932 auio.uio_offset = 0; /* XXX */ 933 auio.uio_resid = 0; 934 iov = mp->msg_iov; 935 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 936 if ((auio.uio_resid += iov->iov_len) < 0) { 937 fputsock(so); 938 NET_UNLOCK_GIANT(); 939 return (EINVAL); 940 } 941 } 942 #ifdef KTRACE 943 if (KTRPOINT(td, KTR_GENIO)) 944 ktruio = cloneuio(&auio); 945 #endif 946 len = auio.uio_resid; 947 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 948 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 949 &mp->msg_flags); 950 if (error) { 951 if (auio.uio_resid != (int)len && (error == ERESTART || 952 error == EINTR || error == EWOULDBLOCK)) 953 error = 0; 954 } 955 #ifdef KTRACE 956 if (ktruio != NULL) { 957 ktruio->uio_resid = (int)len - auio.uio_resid; 958 ktrgenio(s, UIO_READ, ktruio, error); 959 } 960 #endif 961 if (error) 962 goto out; 963 td->td_retval[0] = (int)len - auio.uio_resid; 964 if (mp->msg_name) { 965 len = mp->msg_namelen; 966 if (len <= 0 || fromsa == 0) 967 len = 0; 968 else { 969 /* save sa_len before it is destroyed by MSG_COMPAT */ 970 len = MIN(len, fromsa->sa_len); 971 #ifdef COMPAT_OLDSOCK 972 if (mp->msg_flags & MSG_COMPAT) 973 ((struct osockaddr *)fromsa)->sa_family = 974 fromsa->sa_family; 975 #endif 976 error = copyout(fromsa, mp->msg_name, (unsigned)len); 977 if (error) 978 goto out; 979 } 980 mp->msg_namelen = len; 981 if (namelenp && 982 (error = copyout(&len, namelenp, sizeof (socklen_t)))) { 983 #ifdef COMPAT_OLDSOCK 984 if (mp->msg_flags & MSG_COMPAT) 985 error = 0; /* old recvfrom didn't check */ 986 else 987 #endif 988 goto out; 989 } 990 } 991 if (mp->msg_control) { 992 #ifdef COMPAT_OLDSOCK 993 /* 994 * We assume that old recvmsg calls won't receive access 995 * rights and other control info, esp. as control info 996 * is always optional and those options didn't exist in 4.3. 997 * If we receive rights, trim the cmsghdr; anything else 998 * is tossed. 999 */ 1000 if (control && mp->msg_flags & MSG_COMPAT) { 1001 if (mtod(control, struct cmsghdr *)->cmsg_level != 1002 SOL_SOCKET || 1003 mtod(control, struct cmsghdr *)->cmsg_type != 1004 SCM_RIGHTS) { 1005 mp->msg_controllen = 0; 1006 goto out; 1007 } 1008 control->m_len -= sizeof (struct cmsghdr); 1009 control->m_data += sizeof (struct cmsghdr); 1010 } 1011 #endif 1012 len = mp->msg_controllen; 1013 m = control; 1014 mp->msg_controllen = 0; 1015 ctlbuf = mp->msg_control; 1016 1017 while (m && len > 0) { 1018 unsigned int tocopy; 1019 1020 if (len >= m->m_len) 1021 tocopy = m->m_len; 1022 else { 1023 mp->msg_flags |= MSG_CTRUNC; 1024 tocopy = len; 1025 } 1026 1027 if ((error = copyout(mtod(m, caddr_t), 1028 ctlbuf, tocopy)) != 0) 1029 goto out; 1030 1031 ctlbuf += tocopy; 1032 len -= tocopy; 1033 m = m->m_next; 1034 } 1035 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1036 } 1037 out: 1038 fputsock(so); 1039 NET_UNLOCK_GIANT(); 1040 if (fromsa) 1041 FREE(fromsa, M_SONAME); 1042 if (control) 1043 m_freem(control); 1044 return (error); 1045 } 1046 1047 /* 1048 * MPSAFE 1049 */ 1050 int 1051 recvfrom(td, uap) 1052 struct thread *td; 1053 register struct recvfrom_args /* { 1054 int s; 1055 caddr_t buf; 1056 size_t len; 1057 int flags; 1058 struct sockaddr * __restrict from; 1059 socklen_t * __restrict fromlenaddr; 1060 } */ *uap; 1061 { 1062 struct msghdr msg; 1063 struct iovec aiov; 1064 int error; 1065 1066 if (uap->fromlenaddr) { 1067 error = copyin(uap->fromlenaddr, 1068 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1069 if (error) 1070 goto done2; 1071 } else { 1072 msg.msg_namelen = 0; 1073 } 1074 msg.msg_name = uap->from; 1075 msg.msg_iov = &aiov; 1076 msg.msg_iovlen = 1; 1077 aiov.iov_base = uap->buf; 1078 aiov.iov_len = uap->len; 1079 msg.msg_control = 0; 1080 msg.msg_flags = uap->flags; 1081 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1082 done2: 1083 return(error); 1084 } 1085 1086 #ifdef COMPAT_OLDSOCK 1087 /* 1088 * MPSAFE 1089 */ 1090 int 1091 orecvfrom(td, uap) 1092 struct thread *td; 1093 struct recvfrom_args *uap; 1094 { 1095 1096 uap->flags |= MSG_COMPAT; 1097 return (recvfrom(td, uap)); 1098 } 1099 #endif 1100 1101 1102 #ifdef COMPAT_OLDSOCK 1103 /* 1104 * MPSAFE 1105 */ 1106 int 1107 orecv(td, uap) 1108 struct thread *td; 1109 register struct orecv_args /* { 1110 int s; 1111 caddr_t buf; 1112 int len; 1113 int flags; 1114 } */ *uap; 1115 { 1116 struct msghdr msg; 1117 struct iovec aiov; 1118 int error; 1119 1120 msg.msg_name = 0; 1121 msg.msg_namelen = 0; 1122 msg.msg_iov = &aiov; 1123 msg.msg_iovlen = 1; 1124 aiov.iov_base = uap->buf; 1125 aiov.iov_len = uap->len; 1126 msg.msg_control = 0; 1127 msg.msg_flags = uap->flags; 1128 error = recvit(td, uap->s, &msg, NULL); 1129 return (error); 1130 } 1131 1132 /* 1133 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1134 * overlays the new one, missing only the flags, and with the (old) access 1135 * rights where the control fields are now. 1136 * 1137 * MPSAFE 1138 */ 1139 int 1140 orecvmsg(td, uap) 1141 struct thread *td; 1142 struct orecvmsg_args /* { 1143 int s; 1144 struct omsghdr *msg; 1145 int flags; 1146 } */ *uap; 1147 { 1148 struct msghdr msg; 1149 struct iovec *iov; 1150 int error; 1151 1152 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1153 if (error) 1154 return (error); 1155 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1156 if (error) 1157 return (error); 1158 msg.msg_flags = uap->flags | MSG_COMPAT; 1159 msg.msg_iov = iov; 1160 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1161 if (msg.msg_controllen && error == 0) 1162 error = copyout(&msg.msg_controllen, 1163 &uap->msg->msg_accrightslen, sizeof (int)); 1164 free(iov, M_IOV); 1165 return (error); 1166 } 1167 #endif 1168 1169 /* 1170 * MPSAFE 1171 */ 1172 int 1173 recvmsg(td, uap) 1174 struct thread *td; 1175 struct recvmsg_args /* { 1176 int s; 1177 struct msghdr *msg; 1178 int flags; 1179 } */ *uap; 1180 { 1181 struct msghdr msg; 1182 struct iovec *uiov, *iov; 1183 int error; 1184 1185 error = copyin(uap->msg, &msg, sizeof (msg)); 1186 if (error) 1187 return (error); 1188 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1189 if (error) 1190 return (error); 1191 msg.msg_flags = uap->flags; 1192 #ifdef COMPAT_OLDSOCK 1193 msg.msg_flags &= ~MSG_COMPAT; 1194 #endif 1195 uiov = msg.msg_iov; 1196 msg.msg_iov = iov; 1197 error = recvit(td, uap->s, &msg, NULL); 1198 if (error == 0) { 1199 msg.msg_iov = uiov; 1200 error = copyout(&msg, uap->msg, sizeof(msg)); 1201 } 1202 free(iov, M_IOV); 1203 return (error); 1204 } 1205 1206 /* 1207 * MPSAFE 1208 */ 1209 /* ARGSUSED */ 1210 int 1211 shutdown(td, uap) 1212 struct thread *td; 1213 register struct shutdown_args /* { 1214 int s; 1215 int how; 1216 } */ *uap; 1217 { 1218 struct socket *so; 1219 int error; 1220 1221 NET_LOCK_GIANT(); 1222 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1223 error = soshutdown(so, uap->how); 1224 fputsock(so); 1225 } 1226 NET_UNLOCK_GIANT(); 1227 return(error); 1228 } 1229 1230 /* 1231 * MPSAFE 1232 */ 1233 /* ARGSUSED */ 1234 int 1235 setsockopt(td, uap) 1236 struct thread *td; 1237 register struct setsockopt_args /* { 1238 int s; 1239 int level; 1240 int name; 1241 caddr_t val; 1242 int valsize; 1243 } */ *uap; 1244 { 1245 struct socket *so; 1246 struct sockopt sopt; 1247 int error; 1248 1249 if (uap->val == 0 && uap->valsize != 0) 1250 return (EFAULT); 1251 if (uap->valsize < 0) 1252 return (EINVAL); 1253 1254 NET_LOCK_GIANT(); 1255 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1256 sopt.sopt_dir = SOPT_SET; 1257 sopt.sopt_level = uap->level; 1258 sopt.sopt_name = uap->name; 1259 sopt.sopt_val = uap->val; 1260 sopt.sopt_valsize = uap->valsize; 1261 sopt.sopt_td = td; 1262 error = sosetopt(so, &sopt); 1263 fputsock(so); 1264 } 1265 NET_UNLOCK_GIANT(); 1266 return(error); 1267 } 1268 1269 /* 1270 * MPSAFE 1271 */ 1272 /* ARGSUSED */ 1273 int 1274 getsockopt(td, uap) 1275 struct thread *td; 1276 register struct getsockopt_args /* { 1277 int s; 1278 int level; 1279 int name; 1280 void * __restrict val; 1281 socklen_t * __restrict avalsize; 1282 } */ *uap; 1283 { 1284 socklen_t valsize; 1285 int error; 1286 struct socket *so; 1287 struct sockopt sopt; 1288 1289 NET_LOCK_GIANT(); 1290 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1291 goto done2; 1292 if (uap->val) { 1293 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1294 if (error) 1295 goto done1; 1296 if (valsize < 0) { 1297 error = EINVAL; 1298 goto done1; 1299 } 1300 } else { 1301 valsize = 0; 1302 } 1303 1304 sopt.sopt_dir = SOPT_GET; 1305 sopt.sopt_level = uap->level; 1306 sopt.sopt_name = uap->name; 1307 sopt.sopt_val = uap->val; 1308 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1309 sopt.sopt_td = td; 1310 1311 error = sogetopt(so, &sopt); 1312 if (error == 0) { 1313 valsize = sopt.sopt_valsize; 1314 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1315 } 1316 done1: 1317 fputsock(so); 1318 done2: 1319 NET_UNLOCK_GIANT(); 1320 return (error); 1321 } 1322 1323 /* 1324 * getsockname1() - Get socket name. 1325 * 1326 * MPSAFE 1327 */ 1328 /* ARGSUSED */ 1329 static int 1330 getsockname1(td, uap, compat) 1331 struct thread *td; 1332 register struct getsockname_args /* { 1333 int fdes; 1334 struct sockaddr * __restrict asa; 1335 socklen_t * __restrict alen; 1336 } */ *uap; 1337 int compat; 1338 { 1339 struct socket *so; 1340 struct sockaddr *sa; 1341 socklen_t len; 1342 int error; 1343 1344 NET_LOCK_GIANT(); 1345 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1346 goto done2; 1347 error = copyin(uap->alen, &len, sizeof (len)); 1348 if (error) 1349 goto done1; 1350 if (len < 0) { 1351 error = EINVAL; 1352 goto done1; 1353 } 1354 sa = 0; 1355 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1356 if (error) 1357 goto bad; 1358 if (sa == 0) { 1359 len = 0; 1360 goto gotnothing; 1361 } 1362 1363 len = MIN(len, sa->sa_len); 1364 #ifdef COMPAT_OLDSOCK 1365 if (compat) 1366 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1367 #endif 1368 error = copyout(sa, uap->asa, (u_int)len); 1369 if (error == 0) 1370 gotnothing: 1371 error = copyout(&len, uap->alen, sizeof (len)); 1372 bad: 1373 if (sa) 1374 FREE(sa, M_SONAME); 1375 done1: 1376 fputsock(so); 1377 done2: 1378 NET_UNLOCK_GIANT(); 1379 return (error); 1380 } 1381 1382 /* 1383 * MPSAFE 1384 */ 1385 int 1386 getsockname(td, uap) 1387 struct thread *td; 1388 struct getsockname_args *uap; 1389 { 1390 1391 return (getsockname1(td, uap, 0)); 1392 } 1393 1394 #ifdef COMPAT_OLDSOCK 1395 /* 1396 * MPSAFE 1397 */ 1398 int 1399 ogetsockname(td, uap) 1400 struct thread *td; 1401 struct getsockname_args *uap; 1402 { 1403 1404 return (getsockname1(td, uap, 1)); 1405 } 1406 #endif /* COMPAT_OLDSOCK */ 1407 1408 /* 1409 * getpeername1() - Get name of peer for connected socket. 1410 * 1411 * MPSAFE 1412 */ 1413 /* ARGSUSED */ 1414 static int 1415 getpeername1(td, uap, compat) 1416 struct thread *td; 1417 register struct getpeername_args /* { 1418 int fdes; 1419 struct sockaddr * __restrict asa; 1420 socklen_t * __restrict alen; 1421 } */ *uap; 1422 int compat; 1423 { 1424 struct socket *so; 1425 struct sockaddr *sa; 1426 socklen_t len; 1427 int error; 1428 1429 NET_LOCK_GIANT(); 1430 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1431 goto done2; 1432 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1433 error = ENOTCONN; 1434 goto done1; 1435 } 1436 error = copyin(uap->alen, &len, sizeof (len)); 1437 if (error) 1438 goto done1; 1439 if (len < 0) { 1440 error = EINVAL; 1441 goto done1; 1442 } 1443 sa = 0; 1444 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1445 if (error) 1446 goto bad; 1447 if (sa == 0) { 1448 len = 0; 1449 goto gotnothing; 1450 } 1451 len = MIN(len, sa->sa_len); 1452 #ifdef COMPAT_OLDSOCK 1453 if (compat) 1454 ((struct osockaddr *)sa)->sa_family = 1455 sa->sa_family; 1456 #endif 1457 error = copyout(sa, uap->asa, (u_int)len); 1458 if (error) 1459 goto bad; 1460 gotnothing: 1461 error = copyout(&len, uap->alen, sizeof (len)); 1462 bad: 1463 if (sa) 1464 FREE(sa, M_SONAME); 1465 done1: 1466 fputsock(so); 1467 done2: 1468 NET_UNLOCK_GIANT(); 1469 return (error); 1470 } 1471 1472 /* 1473 * MPSAFE 1474 */ 1475 int 1476 getpeername(td, uap) 1477 struct thread *td; 1478 struct getpeername_args *uap; 1479 { 1480 1481 return (getpeername1(td, uap, 0)); 1482 } 1483 1484 #ifdef COMPAT_OLDSOCK 1485 /* 1486 * MPSAFE 1487 */ 1488 int 1489 ogetpeername(td, uap) 1490 struct thread *td; 1491 struct ogetpeername_args *uap; 1492 { 1493 1494 /* XXX uap should have type `getpeername_args *' to begin with. */ 1495 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1496 } 1497 #endif /* COMPAT_OLDSOCK */ 1498 1499 int 1500 sockargs(mp, buf, buflen, type) 1501 struct mbuf **mp; 1502 caddr_t buf; 1503 int buflen, type; 1504 { 1505 register struct sockaddr *sa; 1506 register struct mbuf *m; 1507 int error; 1508 1509 if ((u_int)buflen > MLEN) { 1510 #ifdef COMPAT_OLDSOCK 1511 if (type == MT_SONAME && (u_int)buflen <= 112) 1512 buflen = MLEN; /* unix domain compat. hack */ 1513 else 1514 #endif 1515 if ((u_int)buflen > MCLBYTES) 1516 return (EINVAL); 1517 } 1518 m = m_get(M_TRYWAIT, type); 1519 if (m == NULL) 1520 return (ENOBUFS); 1521 if ((u_int)buflen > MLEN) { 1522 MCLGET(m, M_TRYWAIT); 1523 if ((m->m_flags & M_EXT) == 0) { 1524 m_free(m); 1525 return (ENOBUFS); 1526 } 1527 } 1528 m->m_len = buflen; 1529 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1530 if (error) 1531 (void) m_free(m); 1532 else { 1533 *mp = m; 1534 if (type == MT_SONAME) { 1535 sa = mtod(m, struct sockaddr *); 1536 1537 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1538 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1539 sa->sa_family = sa->sa_len; 1540 #endif 1541 sa->sa_len = buflen; 1542 } 1543 } 1544 return (error); 1545 } 1546 1547 int 1548 getsockaddr(namp, uaddr, len) 1549 struct sockaddr **namp; 1550 caddr_t uaddr; 1551 size_t len; 1552 { 1553 struct sockaddr *sa; 1554 int error; 1555 1556 if (len > SOCK_MAXADDRLEN) 1557 return (ENAMETOOLONG); 1558 if (len < offsetof(struct sockaddr, sa_data[0])) 1559 return (EINVAL); 1560 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1561 error = copyin(uaddr, sa, len); 1562 if (error) { 1563 FREE(sa, M_SONAME); 1564 } else { 1565 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1566 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1567 sa->sa_family = sa->sa_len; 1568 #endif 1569 sa->sa_len = len; 1570 *namp = sa; 1571 } 1572 return (error); 1573 } 1574 1575 /* 1576 * Detach mapped page and release resources back to the system. 1577 */ 1578 void 1579 sf_buf_mext(void *addr, void *args) 1580 { 1581 vm_page_t m; 1582 1583 m = sf_buf_page(args); 1584 sf_buf_free(args); 1585 vm_page_lock_queues(); 1586 vm_page_unwire(m, 0); 1587 /* 1588 * Check for the object going away on us. This can 1589 * happen since we don't hold a reference to it. 1590 * If so, we're responsible for freeing the page. 1591 */ 1592 if (m->wire_count == 0 && m->object == NULL) 1593 vm_page_free(m); 1594 vm_page_unlock_queues(); 1595 } 1596 1597 /* 1598 * sendfile(2) 1599 * 1600 * MPSAFE 1601 * 1602 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1603 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1604 * 1605 * Send a file specified by 'fd' and starting at 'offset' to a socket 1606 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1607 * nbytes == 0. Optionally add a header and/or trailer to the socket 1608 * output. If specified, write the total number of bytes sent into *sbytes. 1609 * 1610 */ 1611 int 1612 sendfile(struct thread *td, struct sendfile_args *uap) 1613 { 1614 1615 return (do_sendfile(td, uap, 0)); 1616 } 1617 1618 #ifdef COMPAT_FREEBSD4 1619 int 1620 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1621 { 1622 struct sendfile_args args; 1623 1624 args.fd = uap->fd; 1625 args.s = uap->s; 1626 args.offset = uap->offset; 1627 args.nbytes = uap->nbytes; 1628 args.hdtr = uap->hdtr; 1629 args.sbytes = uap->sbytes; 1630 args.flags = uap->flags; 1631 1632 return (do_sendfile(td, &args, 1)); 1633 } 1634 #endif /* COMPAT_FREEBSD4 */ 1635 1636 static int 1637 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1638 { 1639 struct vnode *vp; 1640 struct vm_object *obj; 1641 struct socket *so = NULL; 1642 struct mbuf *m, *m_header = NULL; 1643 struct sf_buf *sf; 1644 struct vm_page *pg; 1645 struct writev_args nuap; 1646 struct sf_hdtr hdtr; 1647 struct uio *hdr_uio = NULL; 1648 off_t off, xfsize, hdtr_size, sbytes = 0; 1649 int error, headersize = 0, headersent = 0; 1650 1651 mtx_lock(&Giant); 1652 1653 hdtr_size = 0; 1654 1655 /* 1656 * The descriptor must be a regular file and have a backing VM object. 1657 */ 1658 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1659 goto done; 1660 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1661 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1662 error = EINVAL; 1663 VOP_UNLOCK(vp, 0, td); 1664 goto done; 1665 } 1666 VOP_UNLOCK(vp, 0, td); 1667 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1668 goto done; 1669 if (so->so_type != SOCK_STREAM) { 1670 error = EINVAL; 1671 goto done; 1672 } 1673 if ((so->so_state & SS_ISCONNECTED) == 0) { 1674 error = ENOTCONN; 1675 goto done; 1676 } 1677 if (uap->offset < 0) { 1678 error = EINVAL; 1679 goto done; 1680 } 1681 1682 #ifdef MAC 1683 SOCK_LOCK(so); 1684 error = mac_check_socket_send(td->td_ucred, so); 1685 SOCK_UNLOCK(so); 1686 if (error) 1687 goto done; 1688 #endif 1689 1690 /* 1691 * If specified, get the pointer to the sf_hdtr struct for 1692 * any headers/trailers. 1693 */ 1694 if (uap->hdtr != NULL) { 1695 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1696 if (error) 1697 goto done; 1698 /* 1699 * Send any headers. 1700 */ 1701 if (hdtr.headers != NULL) { 1702 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1703 if (error) 1704 goto done; 1705 hdr_uio->uio_td = td; 1706 hdr_uio->uio_rw = UIO_WRITE; 1707 if (hdr_uio->uio_resid > 0) { 1708 m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0); 1709 if (m_header == NULL) 1710 goto done; 1711 headersize = m_header->m_pkthdr.len; 1712 if (compat) 1713 sbytes += headersize; 1714 } 1715 } 1716 } 1717 1718 /* 1719 * Protect against multiple writers to the socket. 1720 */ 1721 SOCKBUF_LOCK(&so->so_snd); 1722 (void) sblock(&so->so_snd, M_WAITOK); 1723 SOCKBUF_UNLOCK(&so->so_snd); 1724 1725 /* 1726 * Loop through the pages in the file, starting with the requested 1727 * offset. Get a file page (do I/O if necessary), map the file page 1728 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1729 * it on the socket. 1730 */ 1731 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1732 vm_pindex_t pindex; 1733 vm_offset_t pgoff; 1734 1735 pindex = OFF_TO_IDX(off); 1736 VM_OBJECT_LOCK(obj); 1737 retry_lookup: 1738 /* 1739 * Calculate the amount to transfer. Not to exceed a page, 1740 * the EOF, or the passed in nbytes. 1741 */ 1742 xfsize = obj->un_pager.vnp.vnp_size - off; 1743 VM_OBJECT_UNLOCK(obj); 1744 if (xfsize > PAGE_SIZE) 1745 xfsize = PAGE_SIZE; 1746 pgoff = (vm_offset_t)(off & PAGE_MASK); 1747 if (PAGE_SIZE - pgoff < xfsize) 1748 xfsize = PAGE_SIZE - pgoff; 1749 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1750 xfsize = uap->nbytes - sbytes; 1751 if (xfsize <= 0) { 1752 if (m_header != NULL) { 1753 m = m_header; 1754 m_header = NULL; 1755 goto retry_space; 1756 } else 1757 break; 1758 } 1759 /* 1760 * Optimize the non-blocking case by looking at the socket space 1761 * before going to the extra work of constituting the sf_buf. 1762 */ 1763 SOCKBUF_LOCK(&so->so_snd); 1764 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1765 if (so->so_snd.sb_state & SBS_CANTSENDMORE) 1766 error = EPIPE; 1767 else 1768 error = EAGAIN; 1769 sbunlock(&so->so_snd); 1770 SOCKBUF_UNLOCK(&so->so_snd); 1771 goto done; 1772 } 1773 SOCKBUF_UNLOCK(&so->so_snd); 1774 VM_OBJECT_LOCK(obj); 1775 /* 1776 * Attempt to look up the page. 1777 * 1778 * Allocate if not found 1779 * 1780 * Wait and loop if busy. 1781 */ 1782 pg = vm_page_lookup(obj, pindex); 1783 1784 if (pg == NULL) { 1785 pg = vm_page_alloc(obj, pindex, 1786 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1787 if (pg == NULL) { 1788 VM_OBJECT_UNLOCK(obj); 1789 VM_WAIT; 1790 VM_OBJECT_LOCK(obj); 1791 goto retry_lookup; 1792 } 1793 vm_page_lock_queues(); 1794 vm_page_wakeup(pg); 1795 } else { 1796 vm_page_lock_queues(); 1797 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1798 goto retry_lookup; 1799 /* 1800 * Wire the page so it does not get ripped out from 1801 * under us. 1802 */ 1803 vm_page_wire(pg); 1804 } 1805 1806 /* 1807 * If page is not valid for what we need, initiate I/O 1808 */ 1809 1810 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) { 1811 VM_OBJECT_UNLOCK(obj); 1812 } else if (uap->flags & SF_NODISKIO) { 1813 error = EBUSY; 1814 } else { 1815 int bsize, resid; 1816 1817 /* 1818 * Ensure that our page is still around when the I/O 1819 * completes. 1820 */ 1821 vm_page_io_start(pg); 1822 vm_page_unlock_queues(); 1823 VM_OBJECT_UNLOCK(obj); 1824 1825 /* 1826 * Get the page from backing store. 1827 */ 1828 bsize = vp->v_mount->mnt_stat.f_iosize; 1829 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1830 /* 1831 * XXXMAC: Because we don't have fp->f_cred here, 1832 * we pass in NOCRED. This is probably wrong, but 1833 * is consistent with our original implementation. 1834 */ 1835 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1836 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1837 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 1838 td->td_ucred, NOCRED, &resid, td); 1839 VOP_UNLOCK(vp, 0, td); 1840 if (error) 1841 VM_OBJECT_LOCK(obj); 1842 vm_page_lock_queues(); 1843 vm_page_io_finish(pg); 1844 mbstat.sf_iocnt++; 1845 } 1846 1847 if (error) { 1848 vm_page_unwire(pg, 0); 1849 /* 1850 * See if anyone else might know about this page. 1851 * If not and it is not valid, then free it. 1852 */ 1853 if (pg->wire_count == 0 && pg->valid == 0 && 1854 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1855 pg->hold_count == 0) { 1856 vm_page_busy(pg); 1857 vm_page_free(pg); 1858 } 1859 vm_page_unlock_queues(); 1860 VM_OBJECT_UNLOCK(obj); 1861 SOCKBUF_LOCK(&so->so_snd); 1862 sbunlock(&so->so_snd); 1863 SOCKBUF_UNLOCK(&so->so_snd); 1864 goto done; 1865 } 1866 vm_page_unlock_queues(); 1867 1868 /* 1869 * Get a sendfile buf. We usually wait as long as necessary, 1870 * but this wait can be interrupted. 1871 */ 1872 if ((sf = sf_buf_alloc(pg, PCATCH)) == NULL) { 1873 mbstat.sf_allocfail++; 1874 vm_page_lock_queues(); 1875 vm_page_unwire(pg, 0); 1876 if (pg->wire_count == 0 && pg->object == NULL) 1877 vm_page_free(pg); 1878 vm_page_unlock_queues(); 1879 SOCKBUF_LOCK(&so->so_snd); 1880 sbunlock(&so->so_snd); 1881 SOCKBUF_UNLOCK(&so->so_snd); 1882 error = EINTR; 1883 goto done; 1884 } 1885 1886 /* 1887 * Get an mbuf header and set it up as having external storage. 1888 */ 1889 if (m_header) 1890 MGET(m, M_TRYWAIT, MT_DATA); 1891 else 1892 MGETHDR(m, M_TRYWAIT, MT_DATA); 1893 if (m == NULL) { 1894 error = ENOBUFS; 1895 sf_buf_mext((void *)sf_buf_kva(sf), sf); 1896 SOCKBUF_LOCK(&so->so_snd); 1897 sbunlock(&so->so_snd); 1898 SOCKBUF_UNLOCK(&so->so_snd); 1899 goto done; 1900 } 1901 /* 1902 * Setup external storage for mbuf. 1903 */ 1904 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY, 1905 EXT_SFBUF); 1906 m->m_data = (char *)sf_buf_kva(sf) + pgoff; 1907 m->m_pkthdr.len = m->m_len = xfsize; 1908 1909 if (m_header) { 1910 m_cat(m_header, m); 1911 m = m_header; 1912 m_header = NULL; 1913 m_fixhdr(m); 1914 } 1915 1916 /* 1917 * Add the buffer to the socket buffer chain. 1918 */ 1919 SOCKBUF_LOCK(&so->so_snd); 1920 retry_space: 1921 /* 1922 * Make sure that the socket is still able to take more data. 1923 * CANTSENDMORE being true usually means that the connection 1924 * was closed. so_error is true when an error was sensed after 1925 * a previous send. 1926 * The state is checked after the page mapping and buffer 1927 * allocation above since those operations may block and make 1928 * any socket checks stale. From this point forward, nothing 1929 * blocks before the pru_send (or more accurately, any blocking 1930 * results in a loop back to here to re-check). 1931 */ 1932 SOCKBUF_LOCK_ASSERT(&so->so_snd); 1933 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) { 1934 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 1935 error = EPIPE; 1936 } else { 1937 error = so->so_error; 1938 so->so_error = 0; 1939 } 1940 m_freem(m); 1941 sbunlock(&so->so_snd); 1942 SOCKBUF_UNLOCK(&so->so_snd); 1943 goto done; 1944 } 1945 /* 1946 * Wait for socket space to become available. We do this just 1947 * after checking the connection state above in order to avoid 1948 * a race condition with sbwait(). 1949 */ 1950 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1951 if (so->so_state & SS_NBIO) { 1952 m_freem(m); 1953 sbunlock(&so->so_snd); 1954 SOCKBUF_UNLOCK(&so->so_snd); 1955 error = EAGAIN; 1956 goto done; 1957 } 1958 error = sbwait(&so->so_snd); 1959 /* 1960 * An error from sbwait usually indicates that we've 1961 * been interrupted by a signal. If we've sent anything 1962 * then return bytes sent, otherwise return the error. 1963 */ 1964 if (error) { 1965 m_freem(m); 1966 sbunlock(&so->so_snd); 1967 SOCKBUF_UNLOCK(&so->so_snd); 1968 goto done; 1969 } 1970 goto retry_space; 1971 } 1972 SOCKBUF_UNLOCK(&so->so_snd); 1973 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 1974 if (error) { 1975 SOCKBUF_LOCK(&so->so_snd); 1976 sbunlock(&so->so_snd); 1977 SOCKBUF_UNLOCK(&so->so_snd); 1978 goto done; 1979 } 1980 headersent = 1; 1981 } 1982 SOCKBUF_LOCK(&so->so_snd); 1983 sbunlock(&so->so_snd); 1984 SOCKBUF_UNLOCK(&so->so_snd); 1985 1986 /* 1987 * Send trailers. Wimp out and use writev(2). 1988 */ 1989 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1990 nuap.fd = uap->s; 1991 nuap.iovp = hdtr.trailers; 1992 nuap.iovcnt = hdtr.trl_cnt; 1993 error = writev(td, &nuap); 1994 if (error) 1995 goto done; 1996 if (compat) 1997 sbytes += td->td_retval[0]; 1998 else 1999 hdtr_size += td->td_retval[0]; 2000 } 2001 2002 done: 2003 if (headersent) { 2004 if (!compat) 2005 hdtr_size += headersize; 2006 } else { 2007 if (compat) 2008 sbytes -= headersize; 2009 } 2010 /* 2011 * If there was no error we have to clear td->td_retval[0] 2012 * because it may have been set by writev. 2013 */ 2014 if (error == 0) { 2015 td->td_retval[0] = 0; 2016 } 2017 if (uap->sbytes != NULL) { 2018 if (!compat) 2019 sbytes += hdtr_size; 2020 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2021 } 2022 if (vp) 2023 vrele(vp); 2024 if (so) 2025 fputsock(so); 2026 if (hdr_uio != NULL) 2027 free(hdr_uio, M_IOV); 2028 if (m_header) 2029 m_freem(m_header); 2030 2031 mtx_unlock(&Giant); 2032 2033 if (error == ERESTART) 2034 error = EINTR; 2035 2036 return (error); 2037 } 2038