1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_compat.h" 39 #include "opt_ktrace.h" 40 #include "opt_mac.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/mac.h> 47 #include <sys/mutex.h> 48 #include <sys/sysproto.h> 49 #include <sys/malloc.h> 50 #include <sys/filedesc.h> 51 #include <sys/event.h> 52 #include <sys/proc.h> 53 #include <sys/fcntl.h> 54 #include <sys/file.h> 55 #include <sys/filio.h> 56 #include <sys/mount.h> 57 #include <sys/mbuf.h> 58 #include <sys/protosw.h> 59 #include <sys/sf_buf.h> 60 #include <sys/socket.h> 61 #include <sys/socketvar.h> 62 #include <sys/signalvar.h> 63 #include <sys/syscallsubr.h> 64 #include <sys/uio.h> 65 #include <sys/vnode.h> 66 #ifdef KTRACE 67 #include <sys/ktrace.h> 68 #endif 69 70 #include <vm/vm.h> 71 #include <vm/vm_object.h> 72 #include <vm/vm_page.h> 73 #include <vm/vm_pageout.h> 74 #include <vm/vm_kern.h> 75 #include <vm/vm_extern.h> 76 77 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 78 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 79 80 static int accept1(struct thread *td, struct accept_args *uap, int compat); 81 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 82 static int getsockname1(struct thread *td, struct getsockname_args *uap, 83 int compat); 84 static int getpeername1(struct thread *td, struct getpeername_args *uap, 85 int compat); 86 87 /* 88 * System call interface to the socket abstraction. 89 */ 90 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 91 #define COMPAT_OLDSOCK 92 #endif 93 94 /* 95 * MPSAFE 96 */ 97 int 98 socket(td, uap) 99 struct thread *td; 100 register struct socket_args /* { 101 int domain; 102 int type; 103 int protocol; 104 } */ *uap; 105 { 106 struct filedesc *fdp; 107 struct socket *so; 108 struct file *fp; 109 int fd, error; 110 111 fdp = td->td_proc->p_fd; 112 error = falloc(td, &fp, &fd); 113 if (error) 114 return (error); 115 /* An extra reference on `fp' has been held for us by falloc(). */ 116 NET_LOCK_GIANT(); 117 error = socreate(uap->domain, &so, uap->type, uap->protocol, 118 td->td_ucred, td); 119 NET_UNLOCK_GIANT(); 120 FILEDESC_LOCK(fdp); 121 if (error) { 122 if (fdp->fd_ofiles[fd] == fp) { 123 fdp->fd_ofiles[fd] = NULL; 124 fdunused(fdp, fd); 125 FILEDESC_UNLOCK(fdp); 126 fdrop(fp, td); 127 } else { 128 FILEDESC_UNLOCK(fdp); 129 } 130 } else { 131 fp->f_data = so; /* already has ref count */ 132 fp->f_flag = FREAD|FWRITE; 133 fp->f_ops = &socketops; 134 fp->f_type = DTYPE_SOCKET; 135 FILEDESC_UNLOCK(fdp); 136 td->td_retval[0] = fd; 137 } 138 fdrop(fp, td); 139 return (error); 140 } 141 142 /* 143 * MPSAFE 144 */ 145 /* ARGSUSED */ 146 int 147 bind(td, uap) 148 struct thread *td; 149 register struct bind_args /* { 150 int s; 151 caddr_t name; 152 int namelen; 153 } */ *uap; 154 { 155 struct sockaddr *sa; 156 int error; 157 158 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 159 return (error); 160 161 return (kern_bind(td, uap->s, sa)); 162 } 163 164 int 165 kern_bind(td, fd, sa) 166 struct thread *td; 167 int fd; 168 struct sockaddr *sa; 169 { 170 struct socket *so; 171 int error; 172 173 NET_LOCK_GIANT(); 174 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 175 goto done2; 176 #ifdef MAC 177 error = mac_check_socket_bind(td->td_ucred, so, sa); 178 if (error) 179 goto done1; 180 #endif 181 error = sobind(so, sa, td); 182 #ifdef MAC 183 done1: 184 #endif 185 fputsock(so); 186 done2: 187 NET_UNLOCK_GIANT(); 188 FREE(sa, M_SONAME); 189 return (error); 190 } 191 192 /* 193 * MPSAFE 194 */ 195 /* ARGSUSED */ 196 int 197 listen(td, uap) 198 struct thread *td; 199 register struct listen_args /* { 200 int s; 201 int backlog; 202 } */ *uap; 203 { 204 struct socket *so; 205 int error; 206 207 NET_LOCK_GIANT(); 208 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 209 #ifdef MAC 210 error = mac_check_socket_listen(td->td_ucred, so); 211 if (error) 212 goto done; 213 #endif 214 error = solisten(so, uap->backlog, td); 215 #ifdef MAC 216 done: 217 #endif 218 fputsock(so); 219 } 220 NET_UNLOCK_GIANT(); 221 return(error); 222 } 223 224 /* 225 * accept1() 226 * MPSAFE 227 */ 228 static int 229 accept1(td, uap, compat) 230 struct thread *td; 231 register struct accept_args /* { 232 int s; 233 struct sockaddr * __restrict name; 234 socklen_t * __restrict anamelen; 235 } */ *uap; 236 int compat; 237 { 238 struct filedesc *fdp; 239 struct file *nfp = NULL; 240 struct sockaddr *sa; 241 socklen_t namelen; 242 int error, s; 243 struct socket *head, *so; 244 int fd; 245 u_int fflag; 246 pid_t pgid; 247 int tmp; 248 249 fdp = td->td_proc->p_fd; 250 if (uap->name) { 251 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 252 if(error) 253 goto done3; 254 if (namelen < 0) { 255 error = EINVAL; 256 goto done3; 257 } 258 } 259 NET_LOCK_GIANT(); 260 error = fgetsock(td, uap->s, &head, &fflag); 261 if (error) 262 goto done2; 263 s = splnet(); 264 if ((head->so_options & SO_ACCEPTCONN) == 0) { 265 splx(s); 266 error = EINVAL; 267 goto done; 268 } 269 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 270 if (head->so_state & SS_CANTRCVMORE) { 271 head->so_error = ECONNABORTED; 272 break; 273 } 274 if ((head->so_state & SS_NBIO) != 0) { 275 head->so_error = EWOULDBLOCK; 276 break; 277 } 278 error = tsleep(&head->so_timeo, PSOCK | PCATCH, 279 "accept", 0); 280 if (error) { 281 splx(s); 282 goto done; 283 } 284 } 285 if (head->so_error) { 286 error = head->so_error; 287 head->so_error = 0; 288 splx(s); 289 goto done; 290 } 291 292 /* 293 * At this point we know that there is at least one connection 294 * ready to be accepted. Remove it from the queue prior to 295 * allocating the file descriptor for it since falloc() may 296 * block allowing another process to accept the connection 297 * instead. 298 */ 299 so = TAILQ_FIRST(&head->so_comp); 300 TAILQ_REMOVE(&head->so_comp, so, so_list); 301 head->so_qlen--; 302 303 error = falloc(td, &nfp, &fd); 304 if (error) { 305 /* 306 * Probably ran out of file descriptors. Put the 307 * unaccepted connection back onto the queue and 308 * do another wakeup so some other process might 309 * have a chance at it. 310 */ 311 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 312 head->so_qlen++; 313 wakeup_one(&head->so_timeo); 314 splx(s); 315 goto done; 316 } 317 /* An extra reference on `nfp' has been held for us by falloc(). */ 318 td->td_retval[0] = fd; 319 320 /* connection has been removed from the listen queue */ 321 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 322 323 so->so_state &= ~SS_COMP; 324 so->so_head = NULL; 325 pgid = fgetown(&head->so_sigio); 326 if (pgid != 0) 327 fsetown(pgid, &so->so_sigio); 328 329 FILE_LOCK(nfp); 330 soref(so); /* file descriptor reference */ 331 nfp->f_data = so; /* nfp has ref count from falloc */ 332 nfp->f_flag = fflag; 333 nfp->f_ops = &socketops; 334 nfp->f_type = DTYPE_SOCKET; 335 FILE_UNLOCK(nfp); 336 /* Sync socket nonblocking/async state with file flags */ 337 tmp = fflag & FNONBLOCK; 338 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 339 tmp = fflag & FASYNC; 340 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 341 sa = 0; 342 error = soaccept(so, &sa); 343 if (error) { 344 /* 345 * return a namelen of zero for older code which might 346 * ignore the return value from accept. 347 */ 348 if (uap->name != NULL) { 349 namelen = 0; 350 (void) copyout(&namelen, 351 uap->anamelen, sizeof(*uap->anamelen)); 352 } 353 goto noconnection; 354 } 355 if (sa == NULL) { 356 namelen = 0; 357 if (uap->name) 358 goto gotnoname; 359 splx(s); 360 error = 0; 361 goto done; 362 } 363 if (uap->name) { 364 /* check sa_len before it is destroyed */ 365 if (namelen > sa->sa_len) 366 namelen = sa->sa_len; 367 #ifdef COMPAT_OLDSOCK 368 if (compat) 369 ((struct osockaddr *)sa)->sa_family = 370 sa->sa_family; 371 #endif 372 error = copyout(sa, uap->name, (u_int)namelen); 373 if (!error) 374 gotnoname: 375 error = copyout(&namelen, 376 uap->anamelen, sizeof (*uap->anamelen)); 377 } 378 noconnection: 379 if (sa) 380 FREE(sa, M_SONAME); 381 382 /* 383 * close the new descriptor, assuming someone hasn't ripped it 384 * out from under us. 385 */ 386 if (error) { 387 FILEDESC_LOCK(fdp); 388 if (fdp->fd_ofiles[fd] == nfp) { 389 fdp->fd_ofiles[fd] = NULL; 390 fdunused(fdp, fd); 391 FILEDESC_UNLOCK(fdp); 392 fdrop(nfp, td); 393 } else { 394 FILEDESC_UNLOCK(fdp); 395 } 396 } 397 splx(s); 398 399 /* 400 * Release explicitly held references before returning. 401 */ 402 done: 403 if (nfp != NULL) 404 fdrop(nfp, td); 405 fputsock(head); 406 done2: 407 NET_UNLOCK_GIANT(); 408 done3: 409 return (error); 410 } 411 412 /* 413 * MPSAFE (accept1() is MPSAFE) 414 */ 415 int 416 accept(td, uap) 417 struct thread *td; 418 struct accept_args *uap; 419 { 420 421 return (accept1(td, uap, 0)); 422 } 423 424 #ifdef COMPAT_OLDSOCK 425 /* 426 * MPSAFE (accept1() is MPSAFE) 427 */ 428 int 429 oaccept(td, uap) 430 struct thread *td; 431 struct accept_args *uap; 432 { 433 434 return (accept1(td, uap, 1)); 435 } 436 #endif /* COMPAT_OLDSOCK */ 437 438 /* 439 * MPSAFE 440 */ 441 /* ARGSUSED */ 442 int 443 connect(td, uap) 444 struct thread *td; 445 register struct connect_args /* { 446 int s; 447 caddr_t name; 448 int namelen; 449 } */ *uap; 450 { 451 struct sockaddr *sa; 452 int error; 453 454 error = getsockaddr(&sa, uap->name, uap->namelen); 455 if (error) 456 return (error); 457 458 return (kern_connect(td, uap->s, sa)); 459 } 460 461 462 int 463 kern_connect(td, fd, sa) 464 struct thread *td; 465 int fd; 466 struct sockaddr *sa; 467 { 468 struct socket *so; 469 int error, s; 470 int interrupted = 0; 471 472 NET_LOCK_GIANT(); 473 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 474 goto done2; 475 if (so->so_state & SS_ISCONNECTING) { 476 error = EALREADY; 477 goto done1; 478 } 479 #ifdef MAC 480 error = mac_check_socket_connect(td->td_ucred, so, sa); 481 if (error) 482 goto bad; 483 #endif 484 error = soconnect(so, sa, td); 485 if (error) 486 goto bad; 487 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 488 error = EINPROGRESS; 489 goto done1; 490 } 491 s = splnet(); 492 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 493 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0); 494 if (error) { 495 if (error == EINTR || error == ERESTART) 496 interrupted = 1; 497 break; 498 } 499 } 500 if (error == 0) { 501 error = so->so_error; 502 so->so_error = 0; 503 } 504 splx(s); 505 bad: 506 if (!interrupted) 507 so->so_state &= ~SS_ISCONNECTING; 508 if (error == ERESTART) 509 error = EINTR; 510 done1: 511 fputsock(so); 512 done2: 513 NET_UNLOCK_GIANT(); 514 FREE(sa, M_SONAME); 515 return (error); 516 } 517 518 /* 519 * MPSAFE 520 */ 521 int 522 socketpair(td, uap) 523 struct thread *td; 524 register struct socketpair_args /* { 525 int domain; 526 int type; 527 int protocol; 528 int *rsv; 529 } */ *uap; 530 { 531 register struct filedesc *fdp = td->td_proc->p_fd; 532 struct file *fp1, *fp2; 533 struct socket *so1, *so2; 534 int fd, error, sv[2]; 535 536 NET_LOCK_GIANT(); 537 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 538 td->td_ucred, td); 539 if (error) 540 goto done2; 541 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 542 td->td_ucred, td); 543 if (error) 544 goto free1; 545 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 546 error = falloc(td, &fp1, &fd); 547 if (error) 548 goto free2; 549 sv[0] = fd; 550 fp1->f_data = so1; /* so1 already has ref count */ 551 error = falloc(td, &fp2, &fd); 552 if (error) 553 goto free3; 554 fp2->f_data = so2; /* so2 already has ref count */ 555 sv[1] = fd; 556 error = soconnect2(so1, so2); 557 if (error) 558 goto free4; 559 if (uap->type == SOCK_DGRAM) { 560 /* 561 * Datagram socket connection is asymmetric. 562 */ 563 error = soconnect2(so2, so1); 564 if (error) 565 goto free4; 566 } 567 FILE_LOCK(fp1); 568 fp1->f_flag = FREAD|FWRITE; 569 fp1->f_ops = &socketops; 570 fp1->f_type = DTYPE_SOCKET; 571 FILE_UNLOCK(fp1); 572 FILE_LOCK(fp2); 573 fp2->f_flag = FREAD|FWRITE; 574 fp2->f_ops = &socketops; 575 fp2->f_type = DTYPE_SOCKET; 576 FILE_UNLOCK(fp2); 577 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 578 fdrop(fp1, td); 579 fdrop(fp2, td); 580 goto done2; 581 free4: 582 FILEDESC_LOCK(fdp); 583 if (fdp->fd_ofiles[sv[1]] == fp2) { 584 fdp->fd_ofiles[sv[1]] = NULL; 585 fdunused(fdp, sv[1]); 586 FILEDESC_UNLOCK(fdp); 587 fdrop(fp2, td); 588 } else { 589 FILEDESC_UNLOCK(fdp); 590 } 591 fdrop(fp2, td); 592 free3: 593 FILEDESC_LOCK(fdp); 594 if (fdp->fd_ofiles[sv[0]] == fp1) { 595 fdp->fd_ofiles[sv[0]] = NULL; 596 fdunused(fdp, sv[0]); 597 FILEDESC_UNLOCK(fdp); 598 fdrop(fp1, td); 599 } else { 600 FILEDESC_UNLOCK(fdp); 601 } 602 fdrop(fp1, td); 603 free2: 604 (void)soclose(so2); 605 free1: 606 (void)soclose(so1); 607 done2: 608 NET_UNLOCK_GIANT(); 609 return (error); 610 } 611 612 static int 613 sendit(td, s, mp, flags) 614 register struct thread *td; 615 int s; 616 register struct msghdr *mp; 617 int flags; 618 { 619 struct mbuf *control; 620 struct sockaddr *to; 621 int error; 622 623 if (mp->msg_name != NULL) { 624 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 625 if (error) { 626 to = NULL; 627 goto bad; 628 } 629 mp->msg_name = to; 630 } else { 631 to = NULL; 632 } 633 634 if (mp->msg_control) { 635 if (mp->msg_controllen < sizeof(struct cmsghdr) 636 #ifdef COMPAT_OLDSOCK 637 && mp->msg_flags != MSG_COMPAT 638 #endif 639 ) { 640 error = EINVAL; 641 goto bad; 642 } 643 error = sockargs(&control, mp->msg_control, 644 mp->msg_controllen, MT_CONTROL); 645 if (error) 646 goto bad; 647 #ifdef COMPAT_OLDSOCK 648 if (mp->msg_flags == MSG_COMPAT) { 649 register struct cmsghdr *cm; 650 651 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 652 if (control == 0) { 653 error = ENOBUFS; 654 goto bad; 655 } else { 656 cm = mtod(control, struct cmsghdr *); 657 cm->cmsg_len = control->m_len; 658 cm->cmsg_level = SOL_SOCKET; 659 cm->cmsg_type = SCM_RIGHTS; 660 } 661 } 662 #endif 663 } else { 664 control = NULL; 665 } 666 667 error = kern_sendit(td, s, mp, flags, control); 668 669 bad: 670 if (to) 671 FREE(to, M_SONAME); 672 return (error); 673 } 674 675 int 676 kern_sendit(td, s, mp, flags, control) 677 struct thread *td; 678 int s; 679 struct msghdr *mp; 680 int flags; 681 struct mbuf *control; 682 { 683 struct uio auio; 684 struct iovec *iov; 685 struct socket *so; 686 int i; 687 int len, error; 688 #ifdef KTRACE 689 struct iovec *ktriov = NULL; 690 struct uio ktruio; 691 int iovlen; 692 #endif 693 694 NET_LOCK_GIANT(); 695 if ((error = fgetsock(td, s, &so, NULL)) != 0) 696 goto bad2; 697 698 #ifdef MAC 699 error = mac_check_socket_send(td->td_ucred, so); 700 if (error) 701 goto bad; 702 #endif 703 704 auio.uio_iov = mp->msg_iov; 705 auio.uio_iovcnt = mp->msg_iovlen; 706 auio.uio_segflg = UIO_USERSPACE; 707 auio.uio_rw = UIO_WRITE; 708 auio.uio_td = td; 709 auio.uio_offset = 0; /* XXX */ 710 auio.uio_resid = 0; 711 iov = mp->msg_iov; 712 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 713 if ((auio.uio_resid += iov->iov_len) < 0) { 714 error = EINVAL; 715 goto bad; 716 } 717 } 718 #ifdef KTRACE 719 if (KTRPOINT(td, KTR_GENIO)) { 720 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 721 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 722 bcopy(auio.uio_iov, ktriov, iovlen); 723 ktruio = auio; 724 } 725 #endif 726 len = auio.uio_resid; 727 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 728 0, control, flags, td); 729 if (error) { 730 if (auio.uio_resid != len && (error == ERESTART || 731 error == EINTR || error == EWOULDBLOCK)) 732 error = 0; 733 /* Generation of SIGPIPE can be controlled per socket */ 734 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 735 PROC_LOCK(td->td_proc); 736 psignal(td->td_proc, SIGPIPE); 737 PROC_UNLOCK(td->td_proc); 738 } 739 } 740 if (error == 0) 741 td->td_retval[0] = len - auio.uio_resid; 742 #ifdef KTRACE 743 if (ktriov != NULL) { 744 if (error == 0) { 745 ktruio.uio_iov = ktriov; 746 ktruio.uio_resid = td->td_retval[0]; 747 ktrgenio(s, UIO_WRITE, &ktruio, error); 748 } 749 FREE(ktriov, M_TEMP); 750 } 751 #endif 752 bad: 753 fputsock(so); 754 bad2: 755 NET_UNLOCK_GIANT(); 756 return (error); 757 } 758 759 /* 760 * MPSAFE 761 */ 762 int 763 sendto(td, uap) 764 struct thread *td; 765 register struct sendto_args /* { 766 int s; 767 caddr_t buf; 768 size_t len; 769 int flags; 770 caddr_t to; 771 int tolen; 772 } */ *uap; 773 { 774 struct msghdr msg; 775 struct iovec aiov; 776 int error; 777 778 msg.msg_name = uap->to; 779 msg.msg_namelen = uap->tolen; 780 msg.msg_iov = &aiov; 781 msg.msg_iovlen = 1; 782 msg.msg_control = 0; 783 #ifdef COMPAT_OLDSOCK 784 msg.msg_flags = 0; 785 #endif 786 aiov.iov_base = uap->buf; 787 aiov.iov_len = uap->len; 788 error = sendit(td, uap->s, &msg, uap->flags); 789 return (error); 790 } 791 792 #ifdef COMPAT_OLDSOCK 793 /* 794 * MPSAFE 795 */ 796 int 797 osend(td, uap) 798 struct thread *td; 799 register struct osend_args /* { 800 int s; 801 caddr_t buf; 802 int len; 803 int flags; 804 } */ *uap; 805 { 806 struct msghdr msg; 807 struct iovec aiov; 808 int error; 809 810 msg.msg_name = 0; 811 msg.msg_namelen = 0; 812 msg.msg_iov = &aiov; 813 msg.msg_iovlen = 1; 814 aiov.iov_base = uap->buf; 815 aiov.iov_len = uap->len; 816 msg.msg_control = 0; 817 msg.msg_flags = 0; 818 error = sendit(td, uap->s, &msg, uap->flags); 819 return (error); 820 } 821 822 /* 823 * MPSAFE 824 */ 825 int 826 osendmsg(td, uap) 827 struct thread *td; 828 register struct osendmsg_args /* { 829 int s; 830 caddr_t msg; 831 int flags; 832 } */ *uap; 833 { 834 struct msghdr msg; 835 struct iovec aiov[UIO_SMALLIOV], *iov; 836 int error; 837 838 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 839 if (error) 840 goto done2; 841 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 842 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 843 error = EMSGSIZE; 844 goto done2; 845 } 846 MALLOC(iov, struct iovec *, 847 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 848 M_WAITOK); 849 } else { 850 iov = aiov; 851 } 852 error = copyin(msg.msg_iov, iov, 853 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 854 if (error) 855 goto done; 856 msg.msg_flags = MSG_COMPAT; 857 msg.msg_iov = iov; 858 error = sendit(td, uap->s, &msg, uap->flags); 859 done: 860 if (iov != aiov) 861 FREE(iov, M_IOV); 862 done2: 863 return (error); 864 } 865 #endif 866 867 /* 868 * MPSAFE 869 */ 870 int 871 sendmsg(td, uap) 872 struct thread *td; 873 register struct sendmsg_args /* { 874 int s; 875 caddr_t msg; 876 int flags; 877 } */ *uap; 878 { 879 struct msghdr msg; 880 struct iovec aiov[UIO_SMALLIOV], *iov; 881 int error; 882 883 error = copyin(uap->msg, &msg, sizeof (msg)); 884 if (error) 885 goto done2; 886 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 887 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 888 error = EMSGSIZE; 889 goto done2; 890 } 891 MALLOC(iov, struct iovec *, 892 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 893 M_WAITOK); 894 } else { 895 iov = aiov; 896 } 897 if (msg.msg_iovlen && 898 (error = copyin(msg.msg_iov, iov, 899 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 900 goto done; 901 msg.msg_iov = iov; 902 #ifdef COMPAT_OLDSOCK 903 msg.msg_flags = 0; 904 #endif 905 error = sendit(td, uap->s, &msg, uap->flags); 906 done: 907 if (iov != aiov) 908 FREE(iov, M_IOV); 909 done2: 910 return (error); 911 } 912 913 static int 914 recvit(td, s, mp, namelenp) 915 register struct thread *td; 916 int s; 917 register struct msghdr *mp; 918 void *namelenp; 919 { 920 struct uio auio; 921 register struct iovec *iov; 922 register int i; 923 socklen_t len; 924 int error; 925 struct mbuf *m, *control = 0; 926 caddr_t ctlbuf; 927 struct socket *so; 928 struct sockaddr *fromsa = 0; 929 #ifdef KTRACE 930 struct iovec *ktriov = NULL; 931 struct uio ktruio; 932 int iovlen; 933 #endif 934 935 NET_LOCK_GIANT(); 936 if ((error = fgetsock(td, s, &so, NULL)) != 0) { 937 NET_UNLOCK_GIANT(); 938 return (error); 939 } 940 941 #ifdef MAC 942 error = mac_check_socket_receive(td->td_ucred, so); 943 if (error) { 944 fputsock(so); 945 NET_UNLOCK_GIANT(); 946 return (error); 947 } 948 #endif 949 950 auio.uio_iov = mp->msg_iov; 951 auio.uio_iovcnt = mp->msg_iovlen; 952 auio.uio_segflg = UIO_USERSPACE; 953 auio.uio_rw = UIO_READ; 954 auio.uio_td = td; 955 auio.uio_offset = 0; /* XXX */ 956 auio.uio_resid = 0; 957 iov = mp->msg_iov; 958 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 959 if ((auio.uio_resid += iov->iov_len) < 0) { 960 fputsock(so); 961 NET_UNLOCK_GIANT(); 962 return (EINVAL); 963 } 964 } 965 #ifdef KTRACE 966 if (KTRPOINT(td, KTR_GENIO)) { 967 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 968 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 969 bcopy(auio.uio_iov, ktriov, iovlen); 970 ktruio = auio; 971 } 972 #endif 973 len = auio.uio_resid; 974 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 975 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 976 &mp->msg_flags); 977 if (error) { 978 if (auio.uio_resid != (int)len && (error == ERESTART || 979 error == EINTR || error == EWOULDBLOCK)) 980 error = 0; 981 } 982 #ifdef KTRACE 983 if (ktriov != NULL) { 984 if (error == 0) { 985 ktruio.uio_iov = ktriov; 986 ktruio.uio_resid = (int)len - auio.uio_resid; 987 ktrgenio(s, UIO_READ, &ktruio, error); 988 } 989 FREE(ktriov, M_TEMP); 990 } 991 #endif 992 if (error) 993 goto out; 994 td->td_retval[0] = (int)len - auio.uio_resid; 995 if (mp->msg_name) { 996 len = mp->msg_namelen; 997 if (len <= 0 || fromsa == 0) 998 len = 0; 999 else { 1000 /* save sa_len before it is destroyed by MSG_COMPAT */ 1001 len = MIN(len, fromsa->sa_len); 1002 #ifdef COMPAT_OLDSOCK 1003 if (mp->msg_flags & MSG_COMPAT) 1004 ((struct osockaddr *)fromsa)->sa_family = 1005 fromsa->sa_family; 1006 #endif 1007 error = copyout(fromsa, mp->msg_name, (unsigned)len); 1008 if (error) 1009 goto out; 1010 } 1011 mp->msg_namelen = len; 1012 if (namelenp && 1013 (error = copyout(&len, namelenp, sizeof (socklen_t)))) { 1014 #ifdef COMPAT_OLDSOCK 1015 if (mp->msg_flags & MSG_COMPAT) 1016 error = 0; /* old recvfrom didn't check */ 1017 else 1018 #endif 1019 goto out; 1020 } 1021 } 1022 if (mp->msg_control) { 1023 #ifdef COMPAT_OLDSOCK 1024 /* 1025 * We assume that old recvmsg calls won't receive access 1026 * rights and other control info, esp. as control info 1027 * is always optional and those options didn't exist in 4.3. 1028 * If we receive rights, trim the cmsghdr; anything else 1029 * is tossed. 1030 */ 1031 if (control && mp->msg_flags & MSG_COMPAT) { 1032 if (mtod(control, struct cmsghdr *)->cmsg_level != 1033 SOL_SOCKET || 1034 mtod(control, struct cmsghdr *)->cmsg_type != 1035 SCM_RIGHTS) { 1036 mp->msg_controllen = 0; 1037 goto out; 1038 } 1039 control->m_len -= sizeof (struct cmsghdr); 1040 control->m_data += sizeof (struct cmsghdr); 1041 } 1042 #endif 1043 len = mp->msg_controllen; 1044 m = control; 1045 mp->msg_controllen = 0; 1046 ctlbuf = mp->msg_control; 1047 1048 while (m && len > 0) { 1049 unsigned int tocopy; 1050 1051 if (len >= m->m_len) 1052 tocopy = m->m_len; 1053 else { 1054 mp->msg_flags |= MSG_CTRUNC; 1055 tocopy = len; 1056 } 1057 1058 if ((error = copyout(mtod(m, caddr_t), 1059 ctlbuf, tocopy)) != 0) 1060 goto out; 1061 1062 ctlbuf += tocopy; 1063 len -= tocopy; 1064 m = m->m_next; 1065 } 1066 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1067 } 1068 out: 1069 fputsock(so); 1070 NET_UNLOCK_GIANT(); 1071 if (fromsa) 1072 FREE(fromsa, M_SONAME); 1073 if (control) 1074 m_freem(control); 1075 return (error); 1076 } 1077 1078 /* 1079 * MPSAFE 1080 */ 1081 int 1082 recvfrom(td, uap) 1083 struct thread *td; 1084 register struct recvfrom_args /* { 1085 int s; 1086 caddr_t buf; 1087 size_t len; 1088 int flags; 1089 struct sockaddr * __restrict from; 1090 socklen_t * __restrict fromlenaddr; 1091 } */ *uap; 1092 { 1093 struct msghdr msg; 1094 struct iovec aiov; 1095 int error; 1096 1097 if (uap->fromlenaddr) { 1098 error = copyin(uap->fromlenaddr, 1099 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1100 if (error) 1101 goto done2; 1102 } else { 1103 msg.msg_namelen = 0; 1104 } 1105 msg.msg_name = uap->from; 1106 msg.msg_iov = &aiov; 1107 msg.msg_iovlen = 1; 1108 aiov.iov_base = uap->buf; 1109 aiov.iov_len = uap->len; 1110 msg.msg_control = 0; 1111 msg.msg_flags = uap->flags; 1112 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1113 done2: 1114 return(error); 1115 } 1116 1117 #ifdef COMPAT_OLDSOCK 1118 /* 1119 * MPSAFE 1120 */ 1121 int 1122 orecvfrom(td, uap) 1123 struct thread *td; 1124 struct recvfrom_args *uap; 1125 { 1126 1127 uap->flags |= MSG_COMPAT; 1128 return (recvfrom(td, uap)); 1129 } 1130 #endif 1131 1132 1133 #ifdef COMPAT_OLDSOCK 1134 /* 1135 * MPSAFE 1136 */ 1137 int 1138 orecv(td, uap) 1139 struct thread *td; 1140 register struct orecv_args /* { 1141 int s; 1142 caddr_t buf; 1143 int len; 1144 int flags; 1145 } */ *uap; 1146 { 1147 struct msghdr msg; 1148 struct iovec aiov; 1149 int error; 1150 1151 msg.msg_name = 0; 1152 msg.msg_namelen = 0; 1153 msg.msg_iov = &aiov; 1154 msg.msg_iovlen = 1; 1155 aiov.iov_base = uap->buf; 1156 aiov.iov_len = uap->len; 1157 msg.msg_control = 0; 1158 msg.msg_flags = uap->flags; 1159 error = recvit(td, uap->s, &msg, NULL); 1160 return (error); 1161 } 1162 1163 /* 1164 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1165 * overlays the new one, missing only the flags, and with the (old) access 1166 * rights where the control fields are now. 1167 * 1168 * MPSAFE 1169 */ 1170 int 1171 orecvmsg(td, uap) 1172 struct thread *td; 1173 register struct orecvmsg_args /* { 1174 int s; 1175 struct omsghdr *msg; 1176 int flags; 1177 } */ *uap; 1178 { 1179 struct msghdr msg; 1180 struct iovec aiov[UIO_SMALLIOV], *iov; 1181 int error; 1182 1183 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1184 if (error) 1185 return (error); 1186 1187 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1188 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1189 error = EMSGSIZE; 1190 goto done2; 1191 } 1192 MALLOC(iov, struct iovec *, 1193 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1194 M_WAITOK); 1195 } else { 1196 iov = aiov; 1197 } 1198 msg.msg_flags = uap->flags | MSG_COMPAT; 1199 error = copyin(msg.msg_iov, iov, 1200 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1201 if (error) 1202 goto done; 1203 msg.msg_iov = iov; 1204 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1205 1206 if (msg.msg_controllen && error == 0) 1207 error = copyout(&msg.msg_controllen, 1208 &uap->msg->msg_accrightslen, sizeof (int)); 1209 done: 1210 if (iov != aiov) 1211 FREE(iov, M_IOV); 1212 done2: 1213 return (error); 1214 } 1215 #endif 1216 1217 /* 1218 * MPSAFE 1219 */ 1220 int 1221 recvmsg(td, uap) 1222 struct thread *td; 1223 register struct recvmsg_args /* { 1224 int s; 1225 struct msghdr *msg; 1226 int flags; 1227 } */ *uap; 1228 { 1229 struct msghdr msg; 1230 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1231 register int error; 1232 1233 error = copyin(uap->msg, &msg, sizeof (msg)); 1234 if (error) 1235 goto done2; 1236 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1237 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1238 error = EMSGSIZE; 1239 goto done2; 1240 } 1241 MALLOC(iov, struct iovec *, 1242 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1243 M_WAITOK); 1244 } else { 1245 iov = aiov; 1246 } 1247 #ifdef COMPAT_OLDSOCK 1248 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1249 #else 1250 msg.msg_flags = uap->flags; 1251 #endif 1252 uiov = msg.msg_iov; 1253 msg.msg_iov = iov; 1254 error = copyin(uiov, iov, 1255 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1256 if (error) 1257 goto done; 1258 error = recvit(td, uap->s, &msg, NULL); 1259 if (!error) { 1260 msg.msg_iov = uiov; 1261 error = copyout(&msg, uap->msg, sizeof(msg)); 1262 } 1263 done: 1264 if (iov != aiov) 1265 FREE(iov, M_IOV); 1266 done2: 1267 return (error); 1268 } 1269 1270 /* 1271 * MPSAFE 1272 */ 1273 /* ARGSUSED */ 1274 int 1275 shutdown(td, uap) 1276 struct thread *td; 1277 register struct shutdown_args /* { 1278 int s; 1279 int how; 1280 } */ *uap; 1281 { 1282 struct socket *so; 1283 int error; 1284 1285 NET_LOCK_GIANT(); 1286 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1287 error = soshutdown(so, uap->how); 1288 fputsock(so); 1289 } 1290 NET_UNLOCK_GIANT(); 1291 return(error); 1292 } 1293 1294 /* 1295 * MPSAFE 1296 */ 1297 /* ARGSUSED */ 1298 int 1299 setsockopt(td, uap) 1300 struct thread *td; 1301 register struct setsockopt_args /* { 1302 int s; 1303 int level; 1304 int name; 1305 caddr_t val; 1306 int valsize; 1307 } */ *uap; 1308 { 1309 struct socket *so; 1310 struct sockopt sopt; 1311 int error; 1312 1313 if (uap->val == 0 && uap->valsize != 0) 1314 return (EFAULT); 1315 if (uap->valsize < 0) 1316 return (EINVAL); 1317 1318 NET_LOCK_GIANT(); 1319 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1320 sopt.sopt_dir = SOPT_SET; 1321 sopt.sopt_level = uap->level; 1322 sopt.sopt_name = uap->name; 1323 sopt.sopt_val = uap->val; 1324 sopt.sopt_valsize = uap->valsize; 1325 sopt.sopt_td = td; 1326 error = sosetopt(so, &sopt); 1327 fputsock(so); 1328 } 1329 NET_UNLOCK_GIANT(); 1330 return(error); 1331 } 1332 1333 /* 1334 * MPSAFE 1335 */ 1336 /* ARGSUSED */ 1337 int 1338 getsockopt(td, uap) 1339 struct thread *td; 1340 register struct getsockopt_args /* { 1341 int s; 1342 int level; 1343 int name; 1344 void * __restrict val; 1345 socklen_t * __restrict avalsize; 1346 } */ *uap; 1347 { 1348 socklen_t valsize; 1349 int error; 1350 struct socket *so; 1351 struct sockopt sopt; 1352 1353 NET_LOCK_GIANT(); 1354 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1355 goto done2; 1356 if (uap->val) { 1357 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1358 if (error) 1359 goto done1; 1360 if (valsize < 0) { 1361 error = EINVAL; 1362 goto done1; 1363 } 1364 } else { 1365 valsize = 0; 1366 } 1367 1368 sopt.sopt_dir = SOPT_GET; 1369 sopt.sopt_level = uap->level; 1370 sopt.sopt_name = uap->name; 1371 sopt.sopt_val = uap->val; 1372 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1373 sopt.sopt_td = td; 1374 1375 error = sogetopt(so, &sopt); 1376 if (error == 0) { 1377 valsize = sopt.sopt_valsize; 1378 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1379 } 1380 done1: 1381 fputsock(so); 1382 done2: 1383 NET_UNLOCK_GIANT(); 1384 return (error); 1385 } 1386 1387 /* 1388 * getsockname1() - Get socket name. 1389 * 1390 * MPSAFE 1391 */ 1392 /* ARGSUSED */ 1393 static int 1394 getsockname1(td, uap, compat) 1395 struct thread *td; 1396 register struct getsockname_args /* { 1397 int fdes; 1398 struct sockaddr * __restrict asa; 1399 socklen_t * __restrict alen; 1400 } */ *uap; 1401 int compat; 1402 { 1403 struct socket *so; 1404 struct sockaddr *sa; 1405 socklen_t len; 1406 int error; 1407 1408 NET_LOCK_GIANT(); 1409 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1410 goto done2; 1411 error = copyin(uap->alen, &len, sizeof (len)); 1412 if (error) 1413 goto done1; 1414 if (len < 0) { 1415 error = EINVAL; 1416 goto done1; 1417 } 1418 sa = 0; 1419 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1420 if (error) 1421 goto bad; 1422 if (sa == 0) { 1423 len = 0; 1424 goto gotnothing; 1425 } 1426 1427 len = MIN(len, sa->sa_len); 1428 #ifdef COMPAT_OLDSOCK 1429 if (compat) 1430 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1431 #endif 1432 error = copyout(sa, uap->asa, (u_int)len); 1433 if (error == 0) 1434 gotnothing: 1435 error = copyout(&len, uap->alen, sizeof (len)); 1436 bad: 1437 if (sa) 1438 FREE(sa, M_SONAME); 1439 done1: 1440 fputsock(so); 1441 done2: 1442 NET_UNLOCK_GIANT(); 1443 return (error); 1444 } 1445 1446 /* 1447 * MPSAFE 1448 */ 1449 int 1450 getsockname(td, uap) 1451 struct thread *td; 1452 struct getsockname_args *uap; 1453 { 1454 1455 return (getsockname1(td, uap, 0)); 1456 } 1457 1458 #ifdef COMPAT_OLDSOCK 1459 /* 1460 * MPSAFE 1461 */ 1462 int 1463 ogetsockname(td, uap) 1464 struct thread *td; 1465 struct getsockname_args *uap; 1466 { 1467 1468 return (getsockname1(td, uap, 1)); 1469 } 1470 #endif /* COMPAT_OLDSOCK */ 1471 1472 /* 1473 * getpeername1() - Get name of peer for connected socket. 1474 * 1475 * MPSAFE 1476 */ 1477 /* ARGSUSED */ 1478 static int 1479 getpeername1(td, uap, compat) 1480 struct thread *td; 1481 register struct getpeername_args /* { 1482 int fdes; 1483 struct sockaddr * __restrict asa; 1484 socklen_t * __restrict alen; 1485 } */ *uap; 1486 int compat; 1487 { 1488 struct socket *so; 1489 struct sockaddr *sa; 1490 socklen_t len; 1491 int error; 1492 1493 NET_LOCK_GIANT(); 1494 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1495 goto done2; 1496 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1497 error = ENOTCONN; 1498 goto done1; 1499 } 1500 error = copyin(uap->alen, &len, sizeof (len)); 1501 if (error) 1502 goto done1; 1503 if (len < 0) { 1504 error = EINVAL; 1505 goto done1; 1506 } 1507 sa = 0; 1508 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1509 if (error) 1510 goto bad; 1511 if (sa == 0) { 1512 len = 0; 1513 goto gotnothing; 1514 } 1515 len = MIN(len, sa->sa_len); 1516 #ifdef COMPAT_OLDSOCK 1517 if (compat) 1518 ((struct osockaddr *)sa)->sa_family = 1519 sa->sa_family; 1520 #endif 1521 error = copyout(sa, uap->asa, (u_int)len); 1522 if (error) 1523 goto bad; 1524 gotnothing: 1525 error = copyout(&len, uap->alen, sizeof (len)); 1526 bad: 1527 if (sa) 1528 FREE(sa, M_SONAME); 1529 done1: 1530 fputsock(so); 1531 done2: 1532 NET_UNLOCK_GIANT(); 1533 return (error); 1534 } 1535 1536 /* 1537 * MPSAFE 1538 */ 1539 int 1540 getpeername(td, uap) 1541 struct thread *td; 1542 struct getpeername_args *uap; 1543 { 1544 1545 return (getpeername1(td, uap, 0)); 1546 } 1547 1548 #ifdef COMPAT_OLDSOCK 1549 /* 1550 * MPSAFE 1551 */ 1552 int 1553 ogetpeername(td, uap) 1554 struct thread *td; 1555 struct ogetpeername_args *uap; 1556 { 1557 1558 /* XXX uap should have type `getpeername_args *' to begin with. */ 1559 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1560 } 1561 #endif /* COMPAT_OLDSOCK */ 1562 1563 int 1564 sockargs(mp, buf, buflen, type) 1565 struct mbuf **mp; 1566 caddr_t buf; 1567 int buflen, type; 1568 { 1569 register struct sockaddr *sa; 1570 register struct mbuf *m; 1571 int error; 1572 1573 if ((u_int)buflen > MLEN) { 1574 #ifdef COMPAT_OLDSOCK 1575 if (type == MT_SONAME && (u_int)buflen <= 112) 1576 buflen = MLEN; /* unix domain compat. hack */ 1577 else 1578 #endif 1579 return (EINVAL); 1580 } 1581 m = m_get(M_TRYWAIT, type); 1582 if (m == NULL) 1583 return (ENOBUFS); 1584 m->m_len = buflen; 1585 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1586 if (error) 1587 (void) m_free(m); 1588 else { 1589 *mp = m; 1590 if (type == MT_SONAME) { 1591 sa = mtod(m, struct sockaddr *); 1592 1593 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1594 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1595 sa->sa_family = sa->sa_len; 1596 #endif 1597 sa->sa_len = buflen; 1598 } 1599 } 1600 return (error); 1601 } 1602 1603 int 1604 getsockaddr(namp, uaddr, len) 1605 struct sockaddr **namp; 1606 caddr_t uaddr; 1607 size_t len; 1608 { 1609 struct sockaddr *sa; 1610 int error; 1611 1612 if (len > SOCK_MAXADDRLEN) 1613 return (ENAMETOOLONG); 1614 if (len < offsetof(struct sockaddr, sa_data[0])) 1615 return (EINVAL); 1616 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1617 error = copyin(uaddr, sa, len); 1618 if (error) { 1619 FREE(sa, M_SONAME); 1620 } else { 1621 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1622 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1623 sa->sa_family = sa->sa_len; 1624 #endif 1625 sa->sa_len = len; 1626 *namp = sa; 1627 } 1628 return (error); 1629 } 1630 1631 /* 1632 * Detach mapped page and release resources back to the system. 1633 */ 1634 void 1635 sf_buf_mext(void *addr, void *args) 1636 { 1637 vm_page_t m; 1638 1639 m = sf_buf_page(args); 1640 sf_buf_free(args); 1641 vm_page_lock_queues(); 1642 vm_page_unwire(m, 0); 1643 /* 1644 * Check for the object going away on us. This can 1645 * happen since we don't hold a reference to it. 1646 * If so, we're responsible for freeing the page. 1647 */ 1648 if (m->wire_count == 0 && m->object == NULL) 1649 vm_page_free(m); 1650 vm_page_unlock_queues(); 1651 } 1652 1653 /* 1654 * sendfile(2) 1655 * 1656 * MPSAFE 1657 * 1658 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1659 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1660 * 1661 * Send a file specified by 'fd' and starting at 'offset' to a socket 1662 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1663 * nbytes == 0. Optionally add a header and/or trailer to the socket 1664 * output. If specified, write the total number of bytes sent into *sbytes. 1665 * 1666 */ 1667 int 1668 sendfile(struct thread *td, struct sendfile_args *uap) 1669 { 1670 1671 return (do_sendfile(td, uap, 0)); 1672 } 1673 1674 #ifdef COMPAT_FREEBSD4 1675 int 1676 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1677 { 1678 struct sendfile_args args; 1679 1680 args.fd = uap->fd; 1681 args.s = uap->s; 1682 args.offset = uap->offset; 1683 args.nbytes = uap->nbytes; 1684 args.hdtr = uap->hdtr; 1685 args.sbytes = uap->sbytes; 1686 args.flags = uap->flags; 1687 1688 return (do_sendfile(td, &args, 1)); 1689 } 1690 #endif /* COMPAT_FREEBSD4 */ 1691 1692 static int 1693 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1694 { 1695 struct vnode *vp; 1696 struct vm_object *obj; 1697 struct socket *so = NULL; 1698 struct mbuf *m, *m_header = NULL; 1699 struct sf_buf *sf; 1700 struct vm_page *pg; 1701 struct writev_args nuap; 1702 struct sf_hdtr hdtr; 1703 struct uio hdr_uio; 1704 off_t off, xfsize, hdtr_size, sbytes = 0; 1705 int error, s, headersize = 0, headersent = 0; 1706 struct iovec *hdr_iov = NULL; 1707 1708 mtx_lock(&Giant); 1709 1710 hdtr_size = 0; 1711 1712 /* 1713 * The descriptor must be a regular file and have a backing VM object. 1714 */ 1715 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1716 goto done; 1717 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1718 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1719 error = EINVAL; 1720 VOP_UNLOCK(vp, 0, td); 1721 goto done; 1722 } 1723 VOP_UNLOCK(vp, 0, td); 1724 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1725 goto done; 1726 if (so->so_type != SOCK_STREAM) { 1727 error = EINVAL; 1728 goto done; 1729 } 1730 if ((so->so_state & SS_ISCONNECTED) == 0) { 1731 error = ENOTCONN; 1732 goto done; 1733 } 1734 if (uap->offset < 0) { 1735 error = EINVAL; 1736 goto done; 1737 } 1738 1739 #ifdef MAC 1740 error = mac_check_socket_send(td->td_ucred, so); 1741 if (error) 1742 goto done; 1743 #endif 1744 1745 /* 1746 * If specified, get the pointer to the sf_hdtr struct for 1747 * any headers/trailers. 1748 */ 1749 if (uap->hdtr != NULL) { 1750 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1751 if (error) 1752 goto done; 1753 /* 1754 * Send any headers. 1755 */ 1756 if (hdtr.headers != NULL) { 1757 hdr_uio.uio_td = td; 1758 hdr_uio.uio_rw = UIO_WRITE; 1759 error = uiofromiov(hdtr.headers, hdtr.hdr_cnt, 1760 &hdr_uio); 1761 if (error) 1762 goto done; 1763 /* Cache hdr_iov, m_uiotombuf may change it. */ 1764 hdr_iov = hdr_uio.uio_iov; 1765 if (hdr_uio.uio_resid > 0) { 1766 m_header = m_uiotombuf(&hdr_uio, M_DONTWAIT, 0); 1767 if (m_header == NULL) 1768 goto done; 1769 headersize = m_header->m_pkthdr.len; 1770 if (compat) 1771 sbytes += headersize; 1772 } 1773 } 1774 } 1775 1776 /* 1777 * Protect against multiple writers to the socket. 1778 */ 1779 (void) sblock(&so->so_snd, M_WAITOK); 1780 1781 /* 1782 * Loop through the pages in the file, starting with the requested 1783 * offset. Get a file page (do I/O if necessary), map the file page 1784 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1785 * it on the socket. 1786 */ 1787 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1788 vm_pindex_t pindex; 1789 vm_offset_t pgoff; 1790 1791 pindex = OFF_TO_IDX(off); 1792 VM_OBJECT_LOCK(obj); 1793 retry_lookup: 1794 /* 1795 * Calculate the amount to transfer. Not to exceed a page, 1796 * the EOF, or the passed in nbytes. 1797 */ 1798 xfsize = obj->un_pager.vnp.vnp_size - off; 1799 VM_OBJECT_UNLOCK(obj); 1800 if (xfsize > PAGE_SIZE) 1801 xfsize = PAGE_SIZE; 1802 pgoff = (vm_offset_t)(off & PAGE_MASK); 1803 if (PAGE_SIZE - pgoff < xfsize) 1804 xfsize = PAGE_SIZE - pgoff; 1805 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1806 xfsize = uap->nbytes - sbytes; 1807 if (xfsize <= 0) { 1808 if (m_header != NULL) { 1809 m = m_header; 1810 m_header = NULL; 1811 goto retry_space; 1812 } else 1813 break; 1814 } 1815 /* 1816 * Optimize the non-blocking case by looking at the socket space 1817 * before going to the extra work of constituting the sf_buf. 1818 */ 1819 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1820 if (so->so_state & SS_CANTSENDMORE) 1821 error = EPIPE; 1822 else 1823 error = EAGAIN; 1824 sbunlock(&so->so_snd); 1825 goto done; 1826 } 1827 VM_OBJECT_LOCK(obj); 1828 /* 1829 * Attempt to look up the page. 1830 * 1831 * Allocate if not found 1832 * 1833 * Wait and loop if busy. 1834 */ 1835 pg = vm_page_lookup(obj, pindex); 1836 1837 if (pg == NULL) { 1838 pg = vm_page_alloc(obj, pindex, 1839 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1840 if (pg == NULL) { 1841 VM_OBJECT_UNLOCK(obj); 1842 VM_WAIT; 1843 VM_OBJECT_LOCK(obj); 1844 goto retry_lookup; 1845 } 1846 vm_page_lock_queues(); 1847 vm_page_wakeup(pg); 1848 } else { 1849 vm_page_lock_queues(); 1850 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1851 goto retry_lookup; 1852 /* 1853 * Wire the page so it does not get ripped out from 1854 * under us. 1855 */ 1856 vm_page_wire(pg); 1857 } 1858 1859 /* 1860 * If page is not valid for what we need, initiate I/O 1861 */ 1862 1863 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) { 1864 VM_OBJECT_UNLOCK(obj); 1865 } else if (uap->flags & SF_NODISKIO) { 1866 error = EBUSY; 1867 } else { 1868 int bsize, resid; 1869 1870 /* 1871 * Ensure that our page is still around when the I/O 1872 * completes. 1873 */ 1874 vm_page_io_start(pg); 1875 vm_page_unlock_queues(); 1876 VM_OBJECT_UNLOCK(obj); 1877 1878 /* 1879 * Get the page from backing store. 1880 */ 1881 bsize = vp->v_mount->mnt_stat.f_iosize; 1882 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1883 /* 1884 * XXXMAC: Because we don't have fp->f_cred here, 1885 * we pass in NOCRED. This is probably wrong, but 1886 * is consistent with our original implementation. 1887 */ 1888 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1889 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1890 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 1891 td->td_ucred, NOCRED, &resid, td); 1892 VOP_UNLOCK(vp, 0, td); 1893 if (error) 1894 VM_OBJECT_LOCK(obj); 1895 vm_page_lock_queues(); 1896 vm_page_io_finish(pg); 1897 mbstat.sf_iocnt++; 1898 } 1899 1900 if (error) { 1901 vm_page_unwire(pg, 0); 1902 /* 1903 * See if anyone else might know about this page. 1904 * If not and it is not valid, then free it. 1905 */ 1906 if (pg->wire_count == 0 && pg->valid == 0 && 1907 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1908 pg->hold_count == 0) { 1909 vm_page_busy(pg); 1910 vm_page_free(pg); 1911 } 1912 vm_page_unlock_queues(); 1913 VM_OBJECT_UNLOCK(obj); 1914 sbunlock(&so->so_snd); 1915 goto done; 1916 } 1917 vm_page_unlock_queues(); 1918 1919 /* 1920 * Get a sendfile buf. We usually wait as long as necessary, 1921 * but this wait can be interrupted. 1922 */ 1923 if ((sf = sf_buf_alloc(pg, PCATCH)) == NULL) { 1924 mbstat.sf_allocfail++; 1925 vm_page_lock_queues(); 1926 vm_page_unwire(pg, 0); 1927 if (pg->wire_count == 0 && pg->object == NULL) 1928 vm_page_free(pg); 1929 vm_page_unlock_queues(); 1930 sbunlock(&so->so_snd); 1931 error = EINTR; 1932 goto done; 1933 } 1934 1935 /* 1936 * Get an mbuf header and set it up as having external storage. 1937 */ 1938 if (m_header) 1939 MGET(m, M_TRYWAIT, MT_DATA); 1940 else 1941 MGETHDR(m, M_TRYWAIT, MT_DATA); 1942 if (m == NULL) { 1943 error = ENOBUFS; 1944 sf_buf_mext((void *)sf_buf_kva(sf), sf); 1945 sbunlock(&so->so_snd); 1946 goto done; 1947 } 1948 /* 1949 * Setup external storage for mbuf. 1950 */ 1951 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY, 1952 EXT_SFBUF); 1953 m->m_data = (char *)sf_buf_kva(sf) + pgoff; 1954 m->m_pkthdr.len = m->m_len = xfsize; 1955 1956 if (m_header) { 1957 m_cat(m_header, m); 1958 m = m_header; 1959 m_header = NULL; 1960 m_fixhdr(m); 1961 } 1962 1963 /* 1964 * Add the buffer to the socket buffer chain. 1965 */ 1966 s = splnet(); 1967 retry_space: 1968 /* 1969 * Make sure that the socket is still able to take more data. 1970 * CANTSENDMORE being true usually means that the connection 1971 * was closed. so_error is true when an error was sensed after 1972 * a previous send. 1973 * The state is checked after the page mapping and buffer 1974 * allocation above since those operations may block and make 1975 * any socket checks stale. From this point forward, nothing 1976 * blocks before the pru_send (or more accurately, any blocking 1977 * results in a loop back to here to re-check). 1978 */ 1979 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1980 if (so->so_state & SS_CANTSENDMORE) { 1981 error = EPIPE; 1982 } else { 1983 error = so->so_error; 1984 so->so_error = 0; 1985 } 1986 m_freem(m); 1987 sbunlock(&so->so_snd); 1988 splx(s); 1989 goto done; 1990 } 1991 /* 1992 * Wait for socket space to become available. We do this just 1993 * after checking the connection state above in order to avoid 1994 * a race condition with sbwait(). 1995 */ 1996 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1997 if (so->so_state & SS_NBIO) { 1998 m_freem(m); 1999 sbunlock(&so->so_snd); 2000 splx(s); 2001 error = EAGAIN; 2002 goto done; 2003 } 2004 error = sbwait(&so->so_snd); 2005 /* 2006 * An error from sbwait usually indicates that we've 2007 * been interrupted by a signal. If we've sent anything 2008 * then return bytes sent, otherwise return the error. 2009 */ 2010 if (error) { 2011 m_freem(m); 2012 sbunlock(&so->so_snd); 2013 splx(s); 2014 goto done; 2015 } 2016 goto retry_space; 2017 } 2018 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2019 splx(s); 2020 if (error) { 2021 sbunlock(&so->so_snd); 2022 goto done; 2023 } 2024 headersent = 1; 2025 } 2026 sbunlock(&so->so_snd); 2027 2028 /* 2029 * Send trailers. Wimp out and use writev(2). 2030 */ 2031 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 2032 nuap.fd = uap->s; 2033 nuap.iovp = hdtr.trailers; 2034 nuap.iovcnt = hdtr.trl_cnt; 2035 error = writev(td, &nuap); 2036 if (error) 2037 goto done; 2038 if (compat) 2039 sbytes += td->td_retval[0]; 2040 else 2041 hdtr_size += td->td_retval[0]; 2042 } 2043 2044 done: 2045 if (headersent) { 2046 if (!compat) 2047 hdtr_size += headersize; 2048 } else { 2049 if (compat) 2050 sbytes -= headersize; 2051 } 2052 /* 2053 * If there was no error we have to clear td->td_retval[0] 2054 * because it may have been set by writev. 2055 */ 2056 if (error == 0) { 2057 td->td_retval[0] = 0; 2058 } 2059 if (uap->sbytes != NULL) { 2060 if (!compat) 2061 sbytes += hdtr_size; 2062 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2063 } 2064 if (vp) 2065 vrele(vp); 2066 if (so) 2067 fputsock(so); 2068 if (hdr_iov) 2069 FREE(hdr_iov, M_IOV); 2070 if (m_header) 2071 m_freem(m_header); 2072 2073 mtx_unlock(&Giant); 2074 2075 if (error == ERESTART) 2076 error = EINTR; 2077 2078 return (error); 2079 } 2080