1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_compat.h" 39 #include "opt_ktrace.h" 40 #include "opt_mac.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/mac.h> 47 #include <sys/mutex.h> 48 #include <sys/sysproto.h> 49 #include <sys/malloc.h> 50 #include <sys/filedesc.h> 51 #include <sys/event.h> 52 #include <sys/proc.h> 53 #include <sys/fcntl.h> 54 #include <sys/file.h> 55 #include <sys/filio.h> 56 #include <sys/mount.h> 57 #include <sys/mbuf.h> 58 #include <sys/protosw.h> 59 #include <sys/sf_buf.h> 60 #include <sys/socket.h> 61 #include <sys/socketvar.h> 62 #include <sys/signalvar.h> 63 #include <sys/syscallsubr.h> 64 #include <sys/sysctl.h> 65 #include <sys/uio.h> 66 #include <sys/vnode.h> 67 #ifdef KTRACE 68 #include <sys/ktrace.h> 69 #endif 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 #include <vm/vm_pageout.h> 75 #include <vm/vm_kern.h> 76 #include <vm/vm_extern.h> 77 78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 80 81 static int accept1(struct thread *td, struct accept_args *uap, int compat); 82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 83 static int getsockname1(struct thread *td, struct getsockname_args *uap, 84 int compat); 85 static int getpeername1(struct thread *td, struct getpeername_args *uap, 86 int compat); 87 88 /* 89 * NSFBUFS-related variables and associated sysctls 90 */ 91 int nsfbufs; 92 int nsfbufspeak; 93 int nsfbufsused; 94 95 SYSCTL_DECL(_kern_ipc); 96 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 97 "Maximum number of sendfile(2) sf_bufs available"); 98 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 99 "Number of sendfile(2) sf_bufs at peak usage"); 100 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 101 "Number of sendfile(2) sf_bufs in use"); 102 103 /* 104 * System call interface to the socket abstraction. 105 */ 106 #if defined(COMPAT_43) 107 #define COMPAT_OLDSOCK 108 #endif 109 110 /* 111 * MPSAFE 112 */ 113 int 114 socket(td, uap) 115 struct thread *td; 116 register struct socket_args /* { 117 int domain; 118 int type; 119 int protocol; 120 } */ *uap; 121 { 122 struct filedesc *fdp; 123 struct socket *so; 124 struct file *fp; 125 int fd, error; 126 127 fdp = td->td_proc->p_fd; 128 error = falloc(td, &fp, &fd); 129 if (error) 130 return (error); 131 /* An extra reference on `fp' has been held for us by falloc(). */ 132 NET_LOCK_GIANT(); 133 error = socreate(uap->domain, &so, uap->type, uap->protocol, 134 td->td_ucred, td); 135 NET_UNLOCK_GIANT(); 136 FILEDESC_LOCK(fdp); 137 if (error) { 138 if (fdp->fd_ofiles[fd] == fp) { 139 fdp->fd_ofiles[fd] = NULL; 140 fdunused(fdp, fd); 141 FILEDESC_UNLOCK(fdp); 142 fdrop(fp, td); 143 } else { 144 FILEDESC_UNLOCK(fdp); 145 } 146 } else { 147 fp->f_data = so; /* already has ref count */ 148 fp->f_flag = FREAD|FWRITE; 149 fp->f_ops = &socketops; 150 fp->f_type = DTYPE_SOCKET; 151 FILEDESC_UNLOCK(fdp); 152 td->td_retval[0] = fd; 153 } 154 fdrop(fp, td); 155 return (error); 156 } 157 158 /* 159 * MPSAFE 160 */ 161 /* ARGSUSED */ 162 int 163 bind(td, uap) 164 struct thread *td; 165 register struct bind_args /* { 166 int s; 167 caddr_t name; 168 int namelen; 169 } */ *uap; 170 { 171 struct sockaddr *sa; 172 int error; 173 174 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 175 return (error); 176 177 return (kern_bind(td, uap->s, sa)); 178 } 179 180 int 181 kern_bind(td, fd, sa) 182 struct thread *td; 183 int fd; 184 struct sockaddr *sa; 185 { 186 struct socket *so; 187 int error; 188 189 NET_LOCK_GIANT(); 190 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 191 goto done2; 192 #ifdef MAC 193 SOCK_LOCK(so); 194 error = mac_check_socket_bind(td->td_ucred, so, sa); 195 SOCK_UNLOCK(so); 196 if (error) 197 goto done1; 198 #endif 199 error = sobind(so, sa, td); 200 #ifdef MAC 201 done1: 202 #endif 203 fputsock(so); 204 done2: 205 NET_UNLOCK_GIANT(); 206 FREE(sa, M_SONAME); 207 return (error); 208 } 209 210 /* 211 * MPSAFE 212 */ 213 /* ARGSUSED */ 214 int 215 listen(td, uap) 216 struct thread *td; 217 register struct listen_args /* { 218 int s; 219 int backlog; 220 } */ *uap; 221 { 222 struct socket *so; 223 int error; 224 225 NET_LOCK_GIANT(); 226 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 227 #ifdef MAC 228 SOCK_LOCK(so); 229 error = mac_check_socket_listen(td->td_ucred, so); 230 SOCK_UNLOCK(so); 231 if (error) 232 goto done; 233 #endif 234 error = solisten(so, uap->backlog, td); 235 #ifdef MAC 236 done: 237 #endif 238 fputsock(so); 239 } 240 NET_UNLOCK_GIANT(); 241 return(error); 242 } 243 244 /* 245 * accept1() 246 * MPSAFE 247 */ 248 static int 249 accept1(td, uap, compat) 250 struct thread *td; 251 register struct accept_args /* { 252 int s; 253 struct sockaddr * __restrict name; 254 socklen_t * __restrict anamelen; 255 } */ *uap; 256 int compat; 257 { 258 struct filedesc *fdp; 259 struct file *nfp = NULL; 260 struct sockaddr *sa = NULL; 261 socklen_t namelen; 262 int error; 263 struct socket *head, *so; 264 int fd; 265 u_int fflag; 266 pid_t pgid; 267 int tmp; 268 269 fdp = td->td_proc->p_fd; 270 if (uap->name) { 271 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 272 if(error) 273 return (error); 274 if (namelen < 0) 275 return (EINVAL); 276 } 277 NET_LOCK_GIANT(); 278 error = fgetsock(td, uap->s, &head, &fflag); 279 if (error) 280 goto done2; 281 if ((head->so_options & SO_ACCEPTCONN) == 0) { 282 error = EINVAL; 283 goto done; 284 } 285 error = falloc(td, &nfp, &fd); 286 if (error) 287 goto done; 288 ACCEPT_LOCK(); 289 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 290 ACCEPT_UNLOCK(); 291 error = EWOULDBLOCK; 292 goto noconnection; 293 } 294 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 295 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 296 head->so_error = ECONNABORTED; 297 break; 298 } 299 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 300 "accept", 0); 301 if (error) { 302 ACCEPT_UNLOCK(); 303 goto noconnection; 304 } 305 } 306 if (head->so_error) { 307 error = head->so_error; 308 head->so_error = 0; 309 ACCEPT_UNLOCK(); 310 goto noconnection; 311 } 312 so = TAILQ_FIRST(&head->so_comp); 313 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 314 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 315 316 /* 317 * Before changing the flags on the socket, we have to bump the 318 * reference count. Otherwise, if the protocol calls sofree(), 319 * the socket will be released due to a zero refcount. 320 */ 321 SOCK_LOCK(so); 322 soref(so); /* file descriptor reference */ 323 SOCK_UNLOCK(so); 324 325 TAILQ_REMOVE(&head->so_comp, so, so_list); 326 head->so_qlen--; 327 so->so_state |= (head->so_state & SS_NBIO); 328 so->so_qstate &= ~SQ_COMP; 329 so->so_head = NULL; 330 331 ACCEPT_UNLOCK(); 332 333 /* An extra reference on `nfp' has been held for us by falloc(). */ 334 td->td_retval[0] = fd; 335 336 /* connection has been removed from the listen queue */ 337 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 338 339 pgid = fgetown(&head->so_sigio); 340 if (pgid != 0) 341 fsetown(pgid, &so->so_sigio); 342 343 FILE_LOCK(nfp); 344 nfp->f_data = so; /* nfp has ref count from falloc */ 345 nfp->f_flag = fflag; 346 nfp->f_ops = &socketops; 347 nfp->f_type = DTYPE_SOCKET; 348 FILE_UNLOCK(nfp); 349 /* Sync socket nonblocking/async state with file flags */ 350 tmp = fflag & FNONBLOCK; 351 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 352 tmp = fflag & FASYNC; 353 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 354 sa = 0; 355 error = soaccept(so, &sa); 356 if (error) { 357 /* 358 * return a namelen of zero for older code which might 359 * ignore the return value from accept. 360 */ 361 if (uap->name != NULL) { 362 namelen = 0; 363 (void) copyout(&namelen, 364 uap->anamelen, sizeof(*uap->anamelen)); 365 } 366 goto noconnection; 367 } 368 if (sa == NULL) { 369 namelen = 0; 370 if (uap->name) 371 goto gotnoname; 372 error = 0; 373 goto done; 374 } 375 if (uap->name) { 376 /* check sa_len before it is destroyed */ 377 if (namelen > sa->sa_len) 378 namelen = sa->sa_len; 379 #ifdef COMPAT_OLDSOCK 380 if (compat) 381 ((struct osockaddr *)sa)->sa_family = 382 sa->sa_family; 383 #endif 384 error = copyout(sa, uap->name, (u_int)namelen); 385 if (!error) 386 gotnoname: 387 error = copyout(&namelen, 388 uap->anamelen, sizeof (*uap->anamelen)); 389 } 390 noconnection: 391 if (sa) 392 FREE(sa, M_SONAME); 393 394 /* 395 * close the new descriptor, assuming someone hasn't ripped it 396 * out from under us. 397 */ 398 if (error) { 399 FILEDESC_LOCK(fdp); 400 if (fdp->fd_ofiles[fd] == nfp) { 401 fdp->fd_ofiles[fd] = NULL; 402 fdunused(fdp, fd); 403 FILEDESC_UNLOCK(fdp); 404 fdrop(nfp, td); 405 } else { 406 FILEDESC_UNLOCK(fdp); 407 } 408 } 409 410 /* 411 * Release explicitly held references before returning. 412 */ 413 done: 414 if (nfp != NULL) 415 fdrop(nfp, td); 416 fputsock(head); 417 done2: 418 NET_UNLOCK_GIANT(); 419 return (error); 420 } 421 422 /* 423 * MPSAFE (accept1() is MPSAFE) 424 */ 425 int 426 accept(td, uap) 427 struct thread *td; 428 struct accept_args *uap; 429 { 430 431 return (accept1(td, uap, 0)); 432 } 433 434 #ifdef COMPAT_OLDSOCK 435 /* 436 * MPSAFE (accept1() is MPSAFE) 437 */ 438 int 439 oaccept(td, uap) 440 struct thread *td; 441 struct accept_args *uap; 442 { 443 444 return (accept1(td, uap, 1)); 445 } 446 #endif /* COMPAT_OLDSOCK */ 447 448 /* 449 * MPSAFE 450 */ 451 /* ARGSUSED */ 452 int 453 connect(td, uap) 454 struct thread *td; 455 register struct connect_args /* { 456 int s; 457 caddr_t name; 458 int namelen; 459 } */ *uap; 460 { 461 struct sockaddr *sa; 462 int error; 463 464 error = getsockaddr(&sa, uap->name, uap->namelen); 465 if (error) 466 return (error); 467 468 return (kern_connect(td, uap->s, sa)); 469 } 470 471 472 int 473 kern_connect(td, fd, sa) 474 struct thread *td; 475 int fd; 476 struct sockaddr *sa; 477 { 478 struct socket *so; 479 int error, s; 480 int interrupted = 0; 481 482 NET_LOCK_GIANT(); 483 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 484 goto done2; 485 if (so->so_state & SS_ISCONNECTING) { 486 error = EALREADY; 487 goto done1; 488 } 489 #ifdef MAC 490 SOCK_LOCK(so); 491 error = mac_check_socket_connect(td->td_ucred, so, sa); 492 SOCK_UNLOCK(so); 493 if (error) 494 goto bad; 495 #endif 496 error = soconnect(so, sa, td); 497 if (error) 498 goto bad; 499 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 500 error = EINPROGRESS; 501 goto done1; 502 } 503 s = splnet(); 504 SOCK_LOCK(so); 505 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 506 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 507 "connec", 0); 508 if (error) { 509 if (error == EINTR || error == ERESTART) 510 interrupted = 1; 511 break; 512 } 513 } 514 if (error == 0) { 515 error = so->so_error; 516 so->so_error = 0; 517 } 518 SOCK_UNLOCK(so); 519 splx(s); 520 bad: 521 if (!interrupted) 522 so->so_state &= ~SS_ISCONNECTING; 523 if (error == ERESTART) 524 error = EINTR; 525 done1: 526 fputsock(so); 527 done2: 528 NET_UNLOCK_GIANT(); 529 FREE(sa, M_SONAME); 530 return (error); 531 } 532 533 /* 534 * MPSAFE 535 */ 536 int 537 socketpair(td, uap) 538 struct thread *td; 539 register struct socketpair_args /* { 540 int domain; 541 int type; 542 int protocol; 543 int *rsv; 544 } */ *uap; 545 { 546 register struct filedesc *fdp = td->td_proc->p_fd; 547 struct file *fp1, *fp2; 548 struct socket *so1, *so2; 549 int fd, error, sv[2]; 550 551 NET_LOCK_GIANT(); 552 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 553 td->td_ucred, td); 554 if (error) 555 goto done2; 556 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 557 td->td_ucred, td); 558 if (error) 559 goto free1; 560 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 561 error = falloc(td, &fp1, &fd); 562 if (error) 563 goto free2; 564 sv[0] = fd; 565 fp1->f_data = so1; /* so1 already has ref count */ 566 error = falloc(td, &fp2, &fd); 567 if (error) 568 goto free3; 569 fp2->f_data = so2; /* so2 already has ref count */ 570 sv[1] = fd; 571 error = soconnect2(so1, so2); 572 if (error) 573 goto free4; 574 if (uap->type == SOCK_DGRAM) { 575 /* 576 * Datagram socket connection is asymmetric. 577 */ 578 error = soconnect2(so2, so1); 579 if (error) 580 goto free4; 581 } 582 FILE_LOCK(fp1); 583 fp1->f_flag = FREAD|FWRITE; 584 fp1->f_ops = &socketops; 585 fp1->f_type = DTYPE_SOCKET; 586 FILE_UNLOCK(fp1); 587 FILE_LOCK(fp2); 588 fp2->f_flag = FREAD|FWRITE; 589 fp2->f_ops = &socketops; 590 fp2->f_type = DTYPE_SOCKET; 591 FILE_UNLOCK(fp2); 592 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 593 fdrop(fp1, td); 594 fdrop(fp2, td); 595 goto done2; 596 free4: 597 FILEDESC_LOCK(fdp); 598 if (fdp->fd_ofiles[sv[1]] == fp2) { 599 fdp->fd_ofiles[sv[1]] = NULL; 600 fdunused(fdp, sv[1]); 601 FILEDESC_UNLOCK(fdp); 602 fdrop(fp2, td); 603 } else { 604 FILEDESC_UNLOCK(fdp); 605 } 606 fdrop(fp2, td); 607 free3: 608 FILEDESC_LOCK(fdp); 609 if (fdp->fd_ofiles[sv[0]] == fp1) { 610 fdp->fd_ofiles[sv[0]] = NULL; 611 fdunused(fdp, sv[0]); 612 FILEDESC_UNLOCK(fdp); 613 fdrop(fp1, td); 614 } else { 615 FILEDESC_UNLOCK(fdp); 616 } 617 fdrop(fp1, td); 618 free2: 619 (void)soclose(so2); 620 free1: 621 (void)soclose(so1); 622 done2: 623 NET_UNLOCK_GIANT(); 624 return (error); 625 } 626 627 static int 628 sendit(td, s, mp, flags) 629 register struct thread *td; 630 int s; 631 register struct msghdr *mp; 632 int flags; 633 { 634 struct mbuf *control; 635 struct sockaddr *to; 636 int error; 637 638 if (mp->msg_name != NULL) { 639 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 640 if (error) { 641 to = NULL; 642 goto bad; 643 } 644 mp->msg_name = to; 645 } else { 646 to = NULL; 647 } 648 649 if (mp->msg_control) { 650 if (mp->msg_controllen < sizeof(struct cmsghdr) 651 #ifdef COMPAT_OLDSOCK 652 && mp->msg_flags != MSG_COMPAT 653 #endif 654 ) { 655 error = EINVAL; 656 goto bad; 657 } 658 error = sockargs(&control, mp->msg_control, 659 mp->msg_controllen, MT_CONTROL); 660 if (error) 661 goto bad; 662 #ifdef COMPAT_OLDSOCK 663 if (mp->msg_flags == MSG_COMPAT) { 664 register struct cmsghdr *cm; 665 666 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 667 if (control == 0) { 668 error = ENOBUFS; 669 goto bad; 670 } else { 671 cm = mtod(control, struct cmsghdr *); 672 cm->cmsg_len = control->m_len; 673 cm->cmsg_level = SOL_SOCKET; 674 cm->cmsg_type = SCM_RIGHTS; 675 } 676 } 677 #endif 678 } else { 679 control = NULL; 680 } 681 682 error = kern_sendit(td, s, mp, flags, control); 683 684 bad: 685 if (to) 686 FREE(to, M_SONAME); 687 return (error); 688 } 689 690 int 691 kern_sendit(td, s, mp, flags, control) 692 struct thread *td; 693 int s; 694 struct msghdr *mp; 695 int flags; 696 struct mbuf *control; 697 { 698 struct uio auio; 699 struct iovec *iov; 700 struct socket *so; 701 int i; 702 int len, error; 703 #ifdef KTRACE 704 struct uio *ktruio = NULL; 705 #endif 706 707 NET_LOCK_GIANT(); 708 if ((error = fgetsock(td, s, &so, NULL)) != 0) 709 goto bad2; 710 711 #ifdef MAC 712 SOCK_LOCK(so); 713 error = mac_check_socket_send(td->td_ucred, so); 714 SOCK_UNLOCK(so); 715 if (error) 716 goto bad; 717 #endif 718 719 auio.uio_iov = mp->msg_iov; 720 auio.uio_iovcnt = mp->msg_iovlen; 721 auio.uio_segflg = UIO_USERSPACE; 722 auio.uio_rw = UIO_WRITE; 723 auio.uio_td = td; 724 auio.uio_offset = 0; /* XXX */ 725 auio.uio_resid = 0; 726 iov = mp->msg_iov; 727 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 728 if ((auio.uio_resid += iov->iov_len) < 0) { 729 error = EINVAL; 730 goto bad; 731 } 732 } 733 #ifdef KTRACE 734 if (KTRPOINT(td, KTR_GENIO)) 735 ktruio = cloneuio(&auio); 736 #endif 737 len = auio.uio_resid; 738 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 739 0, control, flags, td); 740 if (error) { 741 if (auio.uio_resid != len && (error == ERESTART || 742 error == EINTR || error == EWOULDBLOCK)) 743 error = 0; 744 /* Generation of SIGPIPE can be controlled per socket */ 745 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 746 PROC_LOCK(td->td_proc); 747 psignal(td->td_proc, SIGPIPE); 748 PROC_UNLOCK(td->td_proc); 749 } 750 } 751 if (error == 0) 752 td->td_retval[0] = len - auio.uio_resid; 753 #ifdef KTRACE 754 if (ktruio != NULL) { 755 ktruio->uio_resid = td->td_retval[0]; 756 ktrgenio(s, UIO_WRITE, ktruio, error); 757 } 758 #endif 759 bad: 760 fputsock(so); 761 bad2: 762 NET_UNLOCK_GIANT(); 763 return (error); 764 } 765 766 /* 767 * MPSAFE 768 */ 769 int 770 sendto(td, uap) 771 struct thread *td; 772 register struct sendto_args /* { 773 int s; 774 caddr_t buf; 775 size_t len; 776 int flags; 777 caddr_t to; 778 int tolen; 779 } */ *uap; 780 { 781 struct msghdr msg; 782 struct iovec aiov; 783 int error; 784 785 msg.msg_name = uap->to; 786 msg.msg_namelen = uap->tolen; 787 msg.msg_iov = &aiov; 788 msg.msg_iovlen = 1; 789 msg.msg_control = 0; 790 #ifdef COMPAT_OLDSOCK 791 msg.msg_flags = 0; 792 #endif 793 aiov.iov_base = uap->buf; 794 aiov.iov_len = uap->len; 795 error = sendit(td, uap->s, &msg, uap->flags); 796 return (error); 797 } 798 799 #ifdef COMPAT_OLDSOCK 800 /* 801 * MPSAFE 802 */ 803 int 804 osend(td, uap) 805 struct thread *td; 806 register struct osend_args /* { 807 int s; 808 caddr_t buf; 809 int len; 810 int flags; 811 } */ *uap; 812 { 813 struct msghdr msg; 814 struct iovec aiov; 815 int error; 816 817 msg.msg_name = 0; 818 msg.msg_namelen = 0; 819 msg.msg_iov = &aiov; 820 msg.msg_iovlen = 1; 821 aiov.iov_base = uap->buf; 822 aiov.iov_len = uap->len; 823 msg.msg_control = 0; 824 msg.msg_flags = 0; 825 error = sendit(td, uap->s, &msg, uap->flags); 826 return (error); 827 } 828 829 /* 830 * MPSAFE 831 */ 832 int 833 osendmsg(td, uap) 834 struct thread *td; 835 struct osendmsg_args /* { 836 int s; 837 caddr_t msg; 838 int flags; 839 } */ *uap; 840 { 841 struct msghdr msg; 842 struct iovec *iov; 843 int error; 844 845 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 846 if (error) 847 return (error); 848 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 849 if (error) 850 return (error); 851 msg.msg_iov = iov; 852 msg.msg_flags = MSG_COMPAT; 853 error = sendit(td, uap->s, &msg, uap->flags); 854 free(iov, M_IOV); 855 return (error); 856 } 857 #endif 858 859 /* 860 * MPSAFE 861 */ 862 int 863 sendmsg(td, uap) 864 struct thread *td; 865 struct sendmsg_args /* { 866 int s; 867 caddr_t msg; 868 int flags; 869 } */ *uap; 870 { 871 struct msghdr msg; 872 struct iovec *iov; 873 int error; 874 875 error = copyin(uap->msg, &msg, sizeof (msg)); 876 if (error) 877 return (error); 878 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 879 if (error) 880 return (error); 881 msg.msg_iov = iov; 882 #ifdef COMPAT_OLDSOCK 883 msg.msg_flags = 0; 884 #endif 885 error = sendit(td, uap->s, &msg, uap->flags); 886 free(iov, M_IOV); 887 return (error); 888 } 889 890 static int 891 recvit(td, s, mp, namelenp) 892 struct thread *td; 893 int s; 894 struct msghdr *mp; 895 void *namelenp; 896 { 897 struct uio auio; 898 struct iovec *iov; 899 int i; 900 socklen_t len; 901 int error; 902 struct mbuf *m, *control = 0; 903 caddr_t ctlbuf; 904 struct socket *so; 905 struct sockaddr *fromsa = 0; 906 #ifdef KTRACE 907 struct uio *ktruio = NULL; 908 #endif 909 910 NET_LOCK_GIANT(); 911 if ((error = fgetsock(td, s, &so, NULL)) != 0) { 912 NET_UNLOCK_GIANT(); 913 return (error); 914 } 915 916 #ifdef MAC 917 SOCK_LOCK(so); 918 error = mac_check_socket_receive(td->td_ucred, so); 919 SOCK_UNLOCK(so); 920 if (error) { 921 fputsock(so); 922 NET_UNLOCK_GIANT(); 923 return (error); 924 } 925 #endif 926 927 auio.uio_iov = mp->msg_iov; 928 auio.uio_iovcnt = mp->msg_iovlen; 929 auio.uio_segflg = UIO_USERSPACE; 930 auio.uio_rw = UIO_READ; 931 auio.uio_td = td; 932 auio.uio_offset = 0; /* XXX */ 933 auio.uio_resid = 0; 934 iov = mp->msg_iov; 935 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 936 if ((auio.uio_resid += iov->iov_len) < 0) { 937 fputsock(so); 938 NET_UNLOCK_GIANT(); 939 return (EINVAL); 940 } 941 } 942 #ifdef KTRACE 943 if (KTRPOINT(td, KTR_GENIO)) 944 ktruio = cloneuio(&auio); 945 #endif 946 len = auio.uio_resid; 947 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 948 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 949 &mp->msg_flags); 950 if (error) { 951 if (auio.uio_resid != (int)len && (error == ERESTART || 952 error == EINTR || error == EWOULDBLOCK)) 953 error = 0; 954 } 955 #ifdef KTRACE 956 if (ktruio != NULL) { 957 ktruio->uio_resid = (int)len - auio.uio_resid; 958 ktrgenio(s, UIO_READ, ktruio, error); 959 } 960 #endif 961 if (error) 962 goto out; 963 td->td_retval[0] = (int)len - auio.uio_resid; 964 if (mp->msg_name) { 965 len = mp->msg_namelen; 966 if (len <= 0 || fromsa == 0) 967 len = 0; 968 else { 969 /* save sa_len before it is destroyed by MSG_COMPAT */ 970 len = MIN(len, fromsa->sa_len); 971 #ifdef COMPAT_OLDSOCK 972 if (mp->msg_flags & MSG_COMPAT) 973 ((struct osockaddr *)fromsa)->sa_family = 974 fromsa->sa_family; 975 #endif 976 error = copyout(fromsa, mp->msg_name, (unsigned)len); 977 if (error) 978 goto out; 979 } 980 mp->msg_namelen = len; 981 if (namelenp && 982 (error = copyout(&len, namelenp, sizeof (socklen_t)))) { 983 #ifdef COMPAT_OLDSOCK 984 if (mp->msg_flags & MSG_COMPAT) 985 error = 0; /* old recvfrom didn't check */ 986 else 987 #endif 988 goto out; 989 } 990 } 991 if (mp->msg_control) { 992 #ifdef COMPAT_OLDSOCK 993 /* 994 * We assume that old recvmsg calls won't receive access 995 * rights and other control info, esp. as control info 996 * is always optional and those options didn't exist in 4.3. 997 * If we receive rights, trim the cmsghdr; anything else 998 * is tossed. 999 */ 1000 if (control && mp->msg_flags & MSG_COMPAT) { 1001 if (mtod(control, struct cmsghdr *)->cmsg_level != 1002 SOL_SOCKET || 1003 mtod(control, struct cmsghdr *)->cmsg_type != 1004 SCM_RIGHTS) { 1005 mp->msg_controllen = 0; 1006 goto out; 1007 } 1008 control->m_len -= sizeof (struct cmsghdr); 1009 control->m_data += sizeof (struct cmsghdr); 1010 } 1011 #endif 1012 len = mp->msg_controllen; 1013 m = control; 1014 mp->msg_controllen = 0; 1015 ctlbuf = mp->msg_control; 1016 1017 while (m && len > 0) { 1018 unsigned int tocopy; 1019 1020 if (len >= m->m_len) 1021 tocopy = m->m_len; 1022 else { 1023 mp->msg_flags |= MSG_CTRUNC; 1024 tocopy = len; 1025 } 1026 1027 if ((error = copyout(mtod(m, caddr_t), 1028 ctlbuf, tocopy)) != 0) 1029 goto out; 1030 1031 ctlbuf += tocopy; 1032 len -= tocopy; 1033 m = m->m_next; 1034 } 1035 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1036 } 1037 out: 1038 fputsock(so); 1039 NET_UNLOCK_GIANT(); 1040 if (fromsa) 1041 FREE(fromsa, M_SONAME); 1042 if (control) 1043 m_freem(control); 1044 return (error); 1045 } 1046 1047 /* 1048 * MPSAFE 1049 */ 1050 int 1051 recvfrom(td, uap) 1052 struct thread *td; 1053 register struct recvfrom_args /* { 1054 int s; 1055 caddr_t buf; 1056 size_t len; 1057 int flags; 1058 struct sockaddr * __restrict from; 1059 socklen_t * __restrict fromlenaddr; 1060 } */ *uap; 1061 { 1062 struct msghdr msg; 1063 struct iovec aiov; 1064 int error; 1065 1066 if (uap->fromlenaddr) { 1067 error = copyin(uap->fromlenaddr, 1068 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1069 if (error) 1070 goto done2; 1071 } else { 1072 msg.msg_namelen = 0; 1073 } 1074 msg.msg_name = uap->from; 1075 msg.msg_iov = &aiov; 1076 msg.msg_iovlen = 1; 1077 aiov.iov_base = uap->buf; 1078 aiov.iov_len = uap->len; 1079 msg.msg_control = 0; 1080 msg.msg_flags = uap->flags; 1081 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1082 done2: 1083 return(error); 1084 } 1085 1086 #ifdef COMPAT_OLDSOCK 1087 /* 1088 * MPSAFE 1089 */ 1090 int 1091 orecvfrom(td, uap) 1092 struct thread *td; 1093 struct recvfrom_args *uap; 1094 { 1095 1096 uap->flags |= MSG_COMPAT; 1097 return (recvfrom(td, uap)); 1098 } 1099 #endif 1100 1101 1102 #ifdef COMPAT_OLDSOCK 1103 /* 1104 * MPSAFE 1105 */ 1106 int 1107 orecv(td, uap) 1108 struct thread *td; 1109 register struct orecv_args /* { 1110 int s; 1111 caddr_t buf; 1112 int len; 1113 int flags; 1114 } */ *uap; 1115 { 1116 struct msghdr msg; 1117 struct iovec aiov; 1118 int error; 1119 1120 msg.msg_name = 0; 1121 msg.msg_namelen = 0; 1122 msg.msg_iov = &aiov; 1123 msg.msg_iovlen = 1; 1124 aiov.iov_base = uap->buf; 1125 aiov.iov_len = uap->len; 1126 msg.msg_control = 0; 1127 msg.msg_flags = uap->flags; 1128 error = recvit(td, uap->s, &msg, NULL); 1129 return (error); 1130 } 1131 1132 /* 1133 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1134 * overlays the new one, missing only the flags, and with the (old) access 1135 * rights where the control fields are now. 1136 * 1137 * MPSAFE 1138 */ 1139 int 1140 orecvmsg(td, uap) 1141 struct thread *td; 1142 struct orecvmsg_args /* { 1143 int s; 1144 struct omsghdr *msg; 1145 int flags; 1146 } */ *uap; 1147 { 1148 struct msghdr msg; 1149 struct iovec *iov; 1150 int error; 1151 1152 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1153 if (error) 1154 return (error); 1155 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1156 if (error) 1157 return (error); 1158 msg.msg_flags = uap->flags | MSG_COMPAT; 1159 msg.msg_iov = iov; 1160 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1161 if (msg.msg_controllen && error == 0) 1162 error = copyout(&msg.msg_controllen, 1163 &uap->msg->msg_accrightslen, sizeof (int)); 1164 free(iov, M_IOV); 1165 return (error); 1166 } 1167 #endif 1168 1169 /* 1170 * MPSAFE 1171 */ 1172 int 1173 recvmsg(td, uap) 1174 struct thread *td; 1175 struct recvmsg_args /* { 1176 int s; 1177 struct msghdr *msg; 1178 int flags; 1179 } */ *uap; 1180 { 1181 struct msghdr msg; 1182 struct iovec *uiov, *iov; 1183 int error; 1184 1185 error = copyin(uap->msg, &msg, sizeof (msg)); 1186 if (error) 1187 return (error); 1188 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1189 if (error) 1190 return (error); 1191 msg.msg_flags = uap->flags; 1192 #ifdef COMPAT_OLDSOCK 1193 msg.msg_flags &= ~MSG_COMPAT; 1194 #endif 1195 uiov = msg.msg_iov; 1196 msg.msg_iov = iov; 1197 error = recvit(td, uap->s, &msg, NULL); 1198 if (error == 0) { 1199 msg.msg_iov = uiov; 1200 error = copyout(&msg, uap->msg, sizeof(msg)); 1201 } 1202 free(iov, M_IOV); 1203 return (error); 1204 } 1205 1206 /* 1207 * MPSAFE 1208 */ 1209 /* ARGSUSED */ 1210 int 1211 shutdown(td, uap) 1212 struct thread *td; 1213 register struct shutdown_args /* { 1214 int s; 1215 int how; 1216 } */ *uap; 1217 { 1218 struct socket *so; 1219 int error; 1220 1221 NET_LOCK_GIANT(); 1222 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1223 error = soshutdown(so, uap->how); 1224 fputsock(so); 1225 } 1226 NET_UNLOCK_GIANT(); 1227 return(error); 1228 } 1229 1230 /* 1231 * MPSAFE 1232 */ 1233 /* ARGSUSED */ 1234 int 1235 setsockopt(td, uap) 1236 struct thread *td; 1237 register struct setsockopt_args /* { 1238 int s; 1239 int level; 1240 int name; 1241 caddr_t val; 1242 int valsize; 1243 } */ *uap; 1244 { 1245 1246 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1247 uap->val, UIO_USERSPACE, uap->valsize)); 1248 } 1249 1250 int 1251 kern_setsockopt(td, s, level, name, val, valseg, valsize) 1252 struct thread *td; 1253 int s; 1254 int level; 1255 int name; 1256 void *val; 1257 enum uio_seg valseg; 1258 socklen_t valsize; 1259 { 1260 int error; 1261 struct socket *so; 1262 struct sockopt sopt; 1263 1264 if (val == NULL && valsize != 0) 1265 return (EFAULT); 1266 if (valsize < 0) 1267 return (EINVAL); 1268 1269 sopt.sopt_dir = SOPT_SET; 1270 sopt.sopt_level = level; 1271 sopt.sopt_name = name; 1272 sopt.sopt_val = val; 1273 sopt.sopt_valsize = valsize; 1274 switch (valseg) { 1275 case UIO_USERSPACE: 1276 sopt.sopt_td = td; 1277 break; 1278 case UIO_SYSSPACE: 1279 sopt.sopt_td = NULL; 1280 break; 1281 default: 1282 panic("kern_setsockopt called with bad valseg"); 1283 } 1284 1285 NET_LOCK_GIANT(); 1286 if ((error = fgetsock(td, s, &so, NULL)) == 0) { 1287 error = sosetopt(so, &sopt); 1288 fputsock(so); 1289 } 1290 NET_UNLOCK_GIANT(); 1291 return(error); 1292 } 1293 1294 /* 1295 * MPSAFE 1296 */ 1297 /* ARGSUSED */ 1298 int 1299 getsockopt(td, uap) 1300 struct thread *td; 1301 register struct getsockopt_args /* { 1302 int s; 1303 int level; 1304 int name; 1305 void * __restrict val; 1306 socklen_t * __restrict avalsize; 1307 } */ *uap; 1308 { 1309 socklen_t valsize; 1310 int error; 1311 1312 if (uap->val) { 1313 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1314 if (error) 1315 return (error); 1316 } 1317 1318 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1319 uap->val, UIO_USERSPACE, &valsize); 1320 1321 if (error == 0) 1322 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1323 return (error); 1324 } 1325 1326 /* 1327 * Kernel version of getsockopt. 1328 * optval can be a userland or userspace. optlen is always a kernel pointer. 1329 */ 1330 int 1331 kern_getsockopt(td, s, level, name, val, valseg, valsize) 1332 struct thread *td; 1333 int s; 1334 int level; 1335 int name; 1336 void *val; 1337 enum uio_seg valseg; 1338 socklen_t *valsize; 1339 { 1340 int error; 1341 struct socket *so; 1342 struct sockopt sopt; 1343 1344 if (val == NULL) 1345 *valsize = 0; 1346 if (*valsize < 0) 1347 return (EINVAL); 1348 1349 sopt.sopt_dir = SOPT_GET; 1350 sopt.sopt_level = level; 1351 sopt.sopt_name = name; 1352 sopt.sopt_val = val; 1353 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1354 switch (valseg) { 1355 case UIO_USERSPACE: 1356 sopt.sopt_td = td; 1357 break; 1358 case UIO_SYSSPACE: 1359 sopt.sopt_td = NULL; 1360 break; 1361 default: 1362 panic("kern_getsockopt called with bad valseg"); 1363 } 1364 1365 NET_LOCK_GIANT(); 1366 if ((error = fgetsock(td, s, &so, NULL)) == 0) { 1367 error = sogetopt(so, &sopt); 1368 *valsize = sopt.sopt_valsize; 1369 fputsock(so); 1370 } 1371 NET_UNLOCK_GIANT(); 1372 return (error); 1373 } 1374 1375 /* 1376 * getsockname1() - Get socket name. 1377 * 1378 * MPSAFE 1379 */ 1380 /* ARGSUSED */ 1381 static int 1382 getsockname1(td, uap, compat) 1383 struct thread *td; 1384 register struct getsockname_args /* { 1385 int fdes; 1386 struct sockaddr * __restrict asa; 1387 socklen_t * __restrict alen; 1388 } */ *uap; 1389 int compat; 1390 { 1391 struct socket *so; 1392 struct sockaddr *sa; 1393 socklen_t len; 1394 int error; 1395 1396 NET_LOCK_GIANT(); 1397 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1398 goto done2; 1399 error = copyin(uap->alen, &len, sizeof (len)); 1400 if (error) 1401 goto done1; 1402 if (len < 0) { 1403 error = EINVAL; 1404 goto done1; 1405 } 1406 sa = 0; 1407 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1408 if (error) 1409 goto bad; 1410 if (sa == 0) { 1411 len = 0; 1412 goto gotnothing; 1413 } 1414 1415 len = MIN(len, sa->sa_len); 1416 #ifdef COMPAT_OLDSOCK 1417 if (compat) 1418 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1419 #endif 1420 error = copyout(sa, uap->asa, (u_int)len); 1421 if (error == 0) 1422 gotnothing: 1423 error = copyout(&len, uap->alen, sizeof (len)); 1424 bad: 1425 if (sa) 1426 FREE(sa, M_SONAME); 1427 done1: 1428 fputsock(so); 1429 done2: 1430 NET_UNLOCK_GIANT(); 1431 return (error); 1432 } 1433 1434 /* 1435 * MPSAFE 1436 */ 1437 int 1438 getsockname(td, uap) 1439 struct thread *td; 1440 struct getsockname_args *uap; 1441 { 1442 1443 return (getsockname1(td, uap, 0)); 1444 } 1445 1446 #ifdef COMPAT_OLDSOCK 1447 /* 1448 * MPSAFE 1449 */ 1450 int 1451 ogetsockname(td, uap) 1452 struct thread *td; 1453 struct getsockname_args *uap; 1454 { 1455 1456 return (getsockname1(td, uap, 1)); 1457 } 1458 #endif /* COMPAT_OLDSOCK */ 1459 1460 /* 1461 * getpeername1() - Get name of peer for connected socket. 1462 * 1463 * MPSAFE 1464 */ 1465 /* ARGSUSED */ 1466 static int 1467 getpeername1(td, uap, compat) 1468 struct thread *td; 1469 register struct getpeername_args /* { 1470 int fdes; 1471 struct sockaddr * __restrict asa; 1472 socklen_t * __restrict alen; 1473 } */ *uap; 1474 int compat; 1475 { 1476 struct socket *so; 1477 struct sockaddr *sa; 1478 socklen_t len; 1479 int error; 1480 1481 NET_LOCK_GIANT(); 1482 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1483 goto done2; 1484 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1485 error = ENOTCONN; 1486 goto done1; 1487 } 1488 error = copyin(uap->alen, &len, sizeof (len)); 1489 if (error) 1490 goto done1; 1491 if (len < 0) { 1492 error = EINVAL; 1493 goto done1; 1494 } 1495 sa = 0; 1496 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1497 if (error) 1498 goto bad; 1499 if (sa == 0) { 1500 len = 0; 1501 goto gotnothing; 1502 } 1503 len = MIN(len, sa->sa_len); 1504 #ifdef COMPAT_OLDSOCK 1505 if (compat) 1506 ((struct osockaddr *)sa)->sa_family = 1507 sa->sa_family; 1508 #endif 1509 error = copyout(sa, uap->asa, (u_int)len); 1510 if (error) 1511 goto bad; 1512 gotnothing: 1513 error = copyout(&len, uap->alen, sizeof (len)); 1514 bad: 1515 if (sa) 1516 FREE(sa, M_SONAME); 1517 done1: 1518 fputsock(so); 1519 done2: 1520 NET_UNLOCK_GIANT(); 1521 return (error); 1522 } 1523 1524 /* 1525 * MPSAFE 1526 */ 1527 int 1528 getpeername(td, uap) 1529 struct thread *td; 1530 struct getpeername_args *uap; 1531 { 1532 1533 return (getpeername1(td, uap, 0)); 1534 } 1535 1536 #ifdef COMPAT_OLDSOCK 1537 /* 1538 * MPSAFE 1539 */ 1540 int 1541 ogetpeername(td, uap) 1542 struct thread *td; 1543 struct ogetpeername_args *uap; 1544 { 1545 1546 /* XXX uap should have type `getpeername_args *' to begin with. */ 1547 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1548 } 1549 #endif /* COMPAT_OLDSOCK */ 1550 1551 int 1552 sockargs(mp, buf, buflen, type) 1553 struct mbuf **mp; 1554 caddr_t buf; 1555 int buflen, type; 1556 { 1557 register struct sockaddr *sa; 1558 register struct mbuf *m; 1559 int error; 1560 1561 if ((u_int)buflen > MLEN) { 1562 #ifdef COMPAT_OLDSOCK 1563 if (type == MT_SONAME && (u_int)buflen <= 112) 1564 buflen = MLEN; /* unix domain compat. hack */ 1565 else 1566 #endif 1567 if ((u_int)buflen > MCLBYTES) 1568 return (EINVAL); 1569 } 1570 m = m_get(M_TRYWAIT, type); 1571 if (m == NULL) 1572 return (ENOBUFS); 1573 if ((u_int)buflen > MLEN) { 1574 MCLGET(m, M_TRYWAIT); 1575 if ((m->m_flags & M_EXT) == 0) { 1576 m_free(m); 1577 return (ENOBUFS); 1578 } 1579 } 1580 m->m_len = buflen; 1581 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1582 if (error) 1583 (void) m_free(m); 1584 else { 1585 *mp = m; 1586 if (type == MT_SONAME) { 1587 sa = mtod(m, struct sockaddr *); 1588 1589 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1590 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1591 sa->sa_family = sa->sa_len; 1592 #endif 1593 sa->sa_len = buflen; 1594 } 1595 } 1596 return (error); 1597 } 1598 1599 int 1600 getsockaddr(namp, uaddr, len) 1601 struct sockaddr **namp; 1602 caddr_t uaddr; 1603 size_t len; 1604 { 1605 struct sockaddr *sa; 1606 int error; 1607 1608 if (len > SOCK_MAXADDRLEN) 1609 return (ENAMETOOLONG); 1610 if (len < offsetof(struct sockaddr, sa_data[0])) 1611 return (EINVAL); 1612 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1613 error = copyin(uaddr, sa, len); 1614 if (error) { 1615 FREE(sa, M_SONAME); 1616 } else { 1617 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1618 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1619 sa->sa_family = sa->sa_len; 1620 #endif 1621 sa->sa_len = len; 1622 *namp = sa; 1623 } 1624 return (error); 1625 } 1626 1627 /* 1628 * Detach mapped page and release resources back to the system. 1629 */ 1630 void 1631 sf_buf_mext(void *addr, void *args) 1632 { 1633 vm_page_t m; 1634 1635 m = sf_buf_page(args); 1636 sf_buf_free(args); 1637 vm_page_lock_queues(); 1638 vm_page_unwire(m, 0); 1639 /* 1640 * Check for the object going away on us. This can 1641 * happen since we don't hold a reference to it. 1642 * If so, we're responsible for freeing the page. 1643 */ 1644 if (m->wire_count == 0 && m->object == NULL) 1645 vm_page_free(m); 1646 vm_page_unlock_queues(); 1647 } 1648 1649 /* 1650 * sendfile(2) 1651 * 1652 * MPSAFE 1653 * 1654 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1655 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1656 * 1657 * Send a file specified by 'fd' and starting at 'offset' to a socket 1658 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1659 * nbytes == 0. Optionally add a header and/or trailer to the socket 1660 * output. If specified, write the total number of bytes sent into *sbytes. 1661 * 1662 */ 1663 int 1664 sendfile(struct thread *td, struct sendfile_args *uap) 1665 { 1666 1667 return (do_sendfile(td, uap, 0)); 1668 } 1669 1670 #ifdef COMPAT_FREEBSD4 1671 int 1672 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1673 { 1674 struct sendfile_args args; 1675 1676 args.fd = uap->fd; 1677 args.s = uap->s; 1678 args.offset = uap->offset; 1679 args.nbytes = uap->nbytes; 1680 args.hdtr = uap->hdtr; 1681 args.sbytes = uap->sbytes; 1682 args.flags = uap->flags; 1683 1684 return (do_sendfile(td, &args, 1)); 1685 } 1686 #endif /* COMPAT_FREEBSD4 */ 1687 1688 static int 1689 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1690 { 1691 struct vnode *vp; 1692 struct vm_object *obj; 1693 struct socket *so = NULL; 1694 struct mbuf *m, *m_header = NULL; 1695 struct sf_buf *sf; 1696 struct vm_page *pg; 1697 struct writev_args nuap; 1698 struct sf_hdtr hdtr; 1699 struct uio *hdr_uio = NULL; 1700 off_t off, xfsize, hdtr_size, sbytes = 0; 1701 int error, headersize = 0, headersent = 0; 1702 1703 mtx_lock(&Giant); 1704 1705 hdtr_size = 0; 1706 1707 /* 1708 * The descriptor must be a regular file and have a backing VM object. 1709 */ 1710 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1711 goto done; 1712 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1713 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1714 error = EINVAL; 1715 VOP_UNLOCK(vp, 0, td); 1716 goto done; 1717 } 1718 VOP_UNLOCK(vp, 0, td); 1719 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1720 goto done; 1721 if (so->so_type != SOCK_STREAM) { 1722 error = EINVAL; 1723 goto done; 1724 } 1725 if ((so->so_state & SS_ISCONNECTED) == 0) { 1726 error = ENOTCONN; 1727 goto done; 1728 } 1729 if (uap->offset < 0) { 1730 error = EINVAL; 1731 goto done; 1732 } 1733 1734 #ifdef MAC 1735 SOCK_LOCK(so); 1736 error = mac_check_socket_send(td->td_ucred, so); 1737 SOCK_UNLOCK(so); 1738 if (error) 1739 goto done; 1740 #endif 1741 1742 /* 1743 * If specified, get the pointer to the sf_hdtr struct for 1744 * any headers/trailers. 1745 */ 1746 if (uap->hdtr != NULL) { 1747 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1748 if (error) 1749 goto done; 1750 /* 1751 * Send any headers. 1752 */ 1753 if (hdtr.headers != NULL) { 1754 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1755 if (error) 1756 goto done; 1757 hdr_uio->uio_td = td; 1758 hdr_uio->uio_rw = UIO_WRITE; 1759 if (hdr_uio->uio_resid > 0) { 1760 m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0); 1761 if (m_header == NULL) 1762 goto done; 1763 headersize = m_header->m_pkthdr.len; 1764 if (compat) 1765 sbytes += headersize; 1766 } 1767 } 1768 } 1769 1770 /* 1771 * Protect against multiple writers to the socket. 1772 */ 1773 SOCKBUF_LOCK(&so->so_snd); 1774 (void) sblock(&so->so_snd, M_WAITOK); 1775 SOCKBUF_UNLOCK(&so->so_snd); 1776 1777 /* 1778 * Loop through the pages in the file, starting with the requested 1779 * offset. Get a file page (do I/O if necessary), map the file page 1780 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1781 * it on the socket. 1782 */ 1783 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1784 vm_pindex_t pindex; 1785 vm_offset_t pgoff; 1786 1787 pindex = OFF_TO_IDX(off); 1788 VM_OBJECT_LOCK(obj); 1789 retry_lookup: 1790 /* 1791 * Calculate the amount to transfer. Not to exceed a page, 1792 * the EOF, or the passed in nbytes. 1793 */ 1794 xfsize = obj->un_pager.vnp.vnp_size - off; 1795 VM_OBJECT_UNLOCK(obj); 1796 if (xfsize > PAGE_SIZE) 1797 xfsize = PAGE_SIZE; 1798 pgoff = (vm_offset_t)(off & PAGE_MASK); 1799 if (PAGE_SIZE - pgoff < xfsize) 1800 xfsize = PAGE_SIZE - pgoff; 1801 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1802 xfsize = uap->nbytes - sbytes; 1803 if (xfsize <= 0) { 1804 if (m_header != NULL) { 1805 m = m_header; 1806 m_header = NULL; 1807 goto retry_space; 1808 } else 1809 break; 1810 } 1811 /* 1812 * Optimize the non-blocking case by looking at the socket space 1813 * before going to the extra work of constituting the sf_buf. 1814 */ 1815 SOCKBUF_LOCK(&so->so_snd); 1816 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1817 if (so->so_snd.sb_state & SBS_CANTSENDMORE) 1818 error = EPIPE; 1819 else 1820 error = EAGAIN; 1821 sbunlock(&so->so_snd); 1822 SOCKBUF_UNLOCK(&so->so_snd); 1823 goto done; 1824 } 1825 SOCKBUF_UNLOCK(&so->so_snd); 1826 VM_OBJECT_LOCK(obj); 1827 /* 1828 * Attempt to look up the page. 1829 * 1830 * Allocate if not found 1831 * 1832 * Wait and loop if busy. 1833 */ 1834 pg = vm_page_lookup(obj, pindex); 1835 1836 if (pg == NULL) { 1837 pg = vm_page_alloc(obj, pindex, 1838 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1839 if (pg == NULL) { 1840 VM_OBJECT_UNLOCK(obj); 1841 VM_WAIT; 1842 VM_OBJECT_LOCK(obj); 1843 goto retry_lookup; 1844 } 1845 vm_page_lock_queues(); 1846 vm_page_wakeup(pg); 1847 } else { 1848 vm_page_lock_queues(); 1849 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1850 goto retry_lookup; 1851 /* 1852 * Wire the page so it does not get ripped out from 1853 * under us. 1854 */ 1855 vm_page_wire(pg); 1856 } 1857 1858 /* 1859 * If page is not valid for what we need, initiate I/O 1860 */ 1861 1862 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) { 1863 VM_OBJECT_UNLOCK(obj); 1864 } else if (uap->flags & SF_NODISKIO) { 1865 error = EBUSY; 1866 } else { 1867 int bsize, resid; 1868 1869 /* 1870 * Ensure that our page is still around when the I/O 1871 * completes. 1872 */ 1873 vm_page_io_start(pg); 1874 vm_page_unlock_queues(); 1875 VM_OBJECT_UNLOCK(obj); 1876 1877 /* 1878 * Get the page from backing store. 1879 */ 1880 bsize = vp->v_mount->mnt_stat.f_iosize; 1881 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1882 /* 1883 * XXXMAC: Because we don't have fp->f_cred here, 1884 * we pass in NOCRED. This is probably wrong, but 1885 * is consistent with our original implementation. 1886 */ 1887 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1888 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1889 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 1890 td->td_ucred, NOCRED, &resid, td); 1891 VOP_UNLOCK(vp, 0, td); 1892 if (error) 1893 VM_OBJECT_LOCK(obj); 1894 vm_page_lock_queues(); 1895 vm_page_io_finish(pg); 1896 mbstat.sf_iocnt++; 1897 } 1898 1899 if (error) { 1900 vm_page_unwire(pg, 0); 1901 /* 1902 * See if anyone else might know about this page. 1903 * If not and it is not valid, then free it. 1904 */ 1905 if (pg->wire_count == 0 && pg->valid == 0 && 1906 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1907 pg->hold_count == 0) { 1908 vm_page_busy(pg); 1909 vm_page_free(pg); 1910 } 1911 vm_page_unlock_queues(); 1912 VM_OBJECT_UNLOCK(obj); 1913 SOCKBUF_LOCK(&so->so_snd); 1914 sbunlock(&so->so_snd); 1915 SOCKBUF_UNLOCK(&so->so_snd); 1916 goto done; 1917 } 1918 vm_page_unlock_queues(); 1919 1920 /* 1921 * Get a sendfile buf. We usually wait as long as necessary, 1922 * but this wait can be interrupted. 1923 */ 1924 if ((sf = sf_buf_alloc(pg, PCATCH)) == NULL) { 1925 mbstat.sf_allocfail++; 1926 vm_page_lock_queues(); 1927 vm_page_unwire(pg, 0); 1928 if (pg->wire_count == 0 && pg->object == NULL) 1929 vm_page_free(pg); 1930 vm_page_unlock_queues(); 1931 SOCKBUF_LOCK(&so->so_snd); 1932 sbunlock(&so->so_snd); 1933 SOCKBUF_UNLOCK(&so->so_snd); 1934 error = EINTR; 1935 goto done; 1936 } 1937 1938 /* 1939 * Get an mbuf header and set it up as having external storage. 1940 */ 1941 if (m_header) 1942 MGET(m, M_TRYWAIT, MT_DATA); 1943 else 1944 MGETHDR(m, M_TRYWAIT, MT_DATA); 1945 if (m == NULL) { 1946 error = ENOBUFS; 1947 sf_buf_mext((void *)sf_buf_kva(sf), sf); 1948 SOCKBUF_LOCK(&so->so_snd); 1949 sbunlock(&so->so_snd); 1950 SOCKBUF_UNLOCK(&so->so_snd); 1951 goto done; 1952 } 1953 /* 1954 * Setup external storage for mbuf. 1955 */ 1956 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY, 1957 EXT_SFBUF); 1958 m->m_data = (char *)sf_buf_kva(sf) + pgoff; 1959 m->m_pkthdr.len = m->m_len = xfsize; 1960 1961 if (m_header) { 1962 m_cat(m_header, m); 1963 m = m_header; 1964 m_header = NULL; 1965 m_fixhdr(m); 1966 } 1967 1968 /* 1969 * Add the buffer to the socket buffer chain. 1970 */ 1971 SOCKBUF_LOCK(&so->so_snd); 1972 retry_space: 1973 /* 1974 * Make sure that the socket is still able to take more data. 1975 * CANTSENDMORE being true usually means that the connection 1976 * was closed. so_error is true when an error was sensed after 1977 * a previous send. 1978 * The state is checked after the page mapping and buffer 1979 * allocation above since those operations may block and make 1980 * any socket checks stale. From this point forward, nothing 1981 * blocks before the pru_send (or more accurately, any blocking 1982 * results in a loop back to here to re-check). 1983 */ 1984 SOCKBUF_LOCK_ASSERT(&so->so_snd); 1985 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) { 1986 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 1987 error = EPIPE; 1988 } else { 1989 error = so->so_error; 1990 so->so_error = 0; 1991 } 1992 m_freem(m); 1993 sbunlock(&so->so_snd); 1994 SOCKBUF_UNLOCK(&so->so_snd); 1995 goto done; 1996 } 1997 /* 1998 * Wait for socket space to become available. We do this just 1999 * after checking the connection state above in order to avoid 2000 * a race condition with sbwait(). 2001 */ 2002 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 2003 if (so->so_state & SS_NBIO) { 2004 m_freem(m); 2005 sbunlock(&so->so_snd); 2006 SOCKBUF_UNLOCK(&so->so_snd); 2007 error = EAGAIN; 2008 goto done; 2009 } 2010 error = sbwait(&so->so_snd); 2011 /* 2012 * An error from sbwait usually indicates that we've 2013 * been interrupted by a signal. If we've sent anything 2014 * then return bytes sent, otherwise return the error. 2015 */ 2016 if (error) { 2017 m_freem(m); 2018 sbunlock(&so->so_snd); 2019 SOCKBUF_UNLOCK(&so->so_snd); 2020 goto done; 2021 } 2022 goto retry_space; 2023 } 2024 SOCKBUF_UNLOCK(&so->so_snd); 2025 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2026 if (error) { 2027 SOCKBUF_LOCK(&so->so_snd); 2028 sbunlock(&so->so_snd); 2029 SOCKBUF_UNLOCK(&so->so_snd); 2030 goto done; 2031 } 2032 headersent = 1; 2033 } 2034 SOCKBUF_LOCK(&so->so_snd); 2035 sbunlock(&so->so_snd); 2036 SOCKBUF_UNLOCK(&so->so_snd); 2037 2038 /* 2039 * Send trailers. Wimp out and use writev(2). 2040 */ 2041 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 2042 nuap.fd = uap->s; 2043 nuap.iovp = hdtr.trailers; 2044 nuap.iovcnt = hdtr.trl_cnt; 2045 error = writev(td, &nuap); 2046 if (error) 2047 goto done; 2048 if (compat) 2049 sbytes += td->td_retval[0]; 2050 else 2051 hdtr_size += td->td_retval[0]; 2052 } 2053 2054 done: 2055 if (headersent) { 2056 if (!compat) 2057 hdtr_size += headersize; 2058 } else { 2059 if (compat) 2060 sbytes -= headersize; 2061 } 2062 /* 2063 * If there was no error we have to clear td->td_retval[0] 2064 * because it may have been set by writev. 2065 */ 2066 if (error == 0) { 2067 td->td_retval[0] = 0; 2068 } 2069 if (uap->sbytes != NULL) { 2070 if (!compat) 2071 sbytes += hdtr_size; 2072 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2073 } 2074 if (vp) 2075 vrele(vp); 2076 if (so) 2077 fputsock(so); 2078 if (hdr_uio != NULL) 2079 free(hdr_uio, M_IOV); 2080 if (m_header) 2081 m_freem(m_header); 2082 2083 mtx_unlock(&Giant); 2084 2085 if (error == ERESTART) 2086 error = EINTR; 2087 2088 return (error); 2089 } 2090