1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_compat.h" 39 #include "opt_ktrace.h" 40 #include "opt_mac.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/mac.h> 47 #include <sys/mutex.h> 48 #include <sys/sysproto.h> 49 #include <sys/malloc.h> 50 #include <sys/filedesc.h> 51 #include <sys/event.h> 52 #include <sys/proc.h> 53 #include <sys/fcntl.h> 54 #include <sys/file.h> 55 #include <sys/filio.h> 56 #include <sys/mount.h> 57 #include <sys/mbuf.h> 58 #include <sys/protosw.h> 59 #include <sys/sf_buf.h> 60 #include <sys/socket.h> 61 #include <sys/socketvar.h> 62 #include <sys/signalvar.h> 63 #include <sys/syscallsubr.h> 64 #include <sys/sysctl.h> 65 #include <sys/uio.h> 66 #include <sys/vnode.h> 67 #ifdef KTRACE 68 #include <sys/ktrace.h> 69 #endif 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 #include <vm/vm_pageout.h> 75 #include <vm/vm_kern.h> 76 #include <vm/vm_extern.h> 77 78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 80 81 static int accept1(struct thread *td, struct accept_args *uap, int compat); 82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 83 static int getsockname1(struct thread *td, struct getsockname_args *uap, 84 int compat); 85 static int getpeername1(struct thread *td, struct getpeername_args *uap, 86 int compat); 87 88 /* 89 * NSFBUFS-related variables and associated sysctls 90 */ 91 int nsfbufs; 92 int nsfbufspeak; 93 int nsfbufsused; 94 95 SYSCTL_DECL(_kern_ipc); 96 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 97 "Maximum number of sendfile(2) sf_bufs available"); 98 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 99 "Number of sendfile(2) sf_bufs at peak usage"); 100 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 101 "Number of sendfile(2) sf_bufs in use"); 102 103 /* 104 * Convert a user file descriptor to a kernel file entry. A reference on the 105 * file entry is held upon returning. This is lighter weight than 106 * fgetsock(), which bumps the socket reference drops the file reference 107 * count instead, as this approach avoids several additional mutex operations 108 * associated with the additional reference count. 109 */ 110 static int 111 getsock(struct filedesc *fdp, int fd, struct file **fpp) 112 { 113 struct file *fp; 114 int error; 115 116 fp = NULL; 117 if (fdp == NULL) 118 error = EBADF; 119 else { 120 FILEDESC_LOCK_FAST(fdp); 121 fp = fget_locked(fdp, fd); 122 if (fp == NULL) 123 error = EBADF; 124 else if (fp->f_type != DTYPE_SOCKET) { 125 fp = NULL; 126 error = ENOTSOCK; 127 } else { 128 fhold(fp); 129 error = 0; 130 } 131 FILEDESC_UNLOCK_FAST(fdp); 132 } 133 *fpp = fp; 134 return (error); 135 } 136 137 /* 138 * System call interface to the socket abstraction. 139 */ 140 #if defined(COMPAT_43) 141 #define COMPAT_OLDSOCK 142 #endif 143 144 /* 145 * MPSAFE 146 */ 147 int 148 socket(td, uap) 149 struct thread *td; 150 register struct socket_args /* { 151 int domain; 152 int type; 153 int protocol; 154 } */ *uap; 155 { 156 struct filedesc *fdp; 157 struct socket *so; 158 struct file *fp; 159 int fd, error; 160 161 fdp = td->td_proc->p_fd; 162 error = falloc(td, &fp, &fd); 163 if (error) 164 return (error); 165 /* An extra reference on `fp' has been held for us by falloc(). */ 166 NET_LOCK_GIANT(); 167 error = socreate(uap->domain, &so, uap->type, uap->protocol, 168 td->td_ucred, td); 169 NET_UNLOCK_GIANT(); 170 if (error) { 171 fdclose(fdp, fp, fd, td); 172 } else { 173 FILEDESC_LOCK_FAST(fdp); 174 fp->f_data = so; /* already has ref count */ 175 fp->f_flag = FREAD|FWRITE; 176 fp->f_ops = &socketops; 177 fp->f_type = DTYPE_SOCKET; 178 FILEDESC_UNLOCK_FAST(fdp); 179 td->td_retval[0] = fd; 180 } 181 fdrop(fp, td); 182 return (error); 183 } 184 185 /* 186 * MPSAFE 187 */ 188 /* ARGSUSED */ 189 int 190 bind(td, uap) 191 struct thread *td; 192 register struct bind_args /* { 193 int s; 194 caddr_t name; 195 int namelen; 196 } */ *uap; 197 { 198 struct sockaddr *sa; 199 int error; 200 201 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 202 return (error); 203 204 return (kern_bind(td, uap->s, sa)); 205 } 206 207 int 208 kern_bind(td, fd, sa) 209 struct thread *td; 210 int fd; 211 struct sockaddr *sa; 212 { 213 struct socket *so; 214 struct file *fp; 215 int error; 216 217 NET_LOCK_GIANT(); 218 error = getsock(td->td_proc->p_fd, fd, &fp); 219 if (error) 220 goto done2; 221 so = fp->f_data; 222 #ifdef MAC 223 SOCK_LOCK(so); 224 error = mac_check_socket_bind(td->td_ucred, so, sa); 225 SOCK_UNLOCK(so); 226 if (error) 227 goto done1; 228 #endif 229 error = sobind(so, sa, td); 230 #ifdef MAC 231 done1: 232 #endif 233 fdrop(fp, td); 234 done2: 235 NET_UNLOCK_GIANT(); 236 FREE(sa, M_SONAME); 237 return (error); 238 } 239 240 /* 241 * MPSAFE 242 */ 243 /* ARGSUSED */ 244 int 245 listen(td, uap) 246 struct thread *td; 247 register struct listen_args /* { 248 int s; 249 int backlog; 250 } */ *uap; 251 { 252 struct socket *so; 253 struct file *fp; 254 int error; 255 256 NET_LOCK_GIANT(); 257 error = getsock(td->td_proc->p_fd, uap->s, &fp); 258 if (error == 0) { 259 so = fp->f_data; 260 #ifdef MAC 261 SOCK_LOCK(so); 262 error = mac_check_socket_listen(td->td_ucred, so); 263 SOCK_UNLOCK(so); 264 if (error) 265 goto done; 266 #endif 267 error = solisten(so, uap->backlog, td); 268 #ifdef MAC 269 done: 270 #endif 271 fdrop(fp, td); 272 } 273 NET_UNLOCK_GIANT(); 274 return(error); 275 } 276 277 /* 278 * accept1() 279 * MPSAFE 280 */ 281 static int 282 accept1(td, uap, compat) 283 struct thread *td; 284 register struct accept_args /* { 285 int s; 286 struct sockaddr * __restrict name; 287 socklen_t * __restrict anamelen; 288 } */ *uap; 289 int compat; 290 { 291 struct filedesc *fdp; 292 struct file *nfp = NULL; 293 struct sockaddr *sa = NULL; 294 socklen_t namelen; 295 int error; 296 struct socket *head, *so; 297 int fd; 298 u_int fflag; 299 pid_t pgid; 300 int tmp; 301 302 fdp = td->td_proc->p_fd; 303 if (uap->name) { 304 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 305 if(error) 306 return (error); 307 if (namelen < 0) 308 return (EINVAL); 309 } 310 NET_LOCK_GIANT(); 311 error = fgetsock(td, uap->s, &head, &fflag); 312 if (error) 313 goto done2; 314 if ((head->so_options & SO_ACCEPTCONN) == 0) { 315 error = EINVAL; 316 goto done; 317 } 318 error = falloc(td, &nfp, &fd); 319 if (error) 320 goto done; 321 ACCEPT_LOCK(); 322 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 323 ACCEPT_UNLOCK(); 324 error = EWOULDBLOCK; 325 goto noconnection; 326 } 327 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 328 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 329 head->so_error = ECONNABORTED; 330 break; 331 } 332 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 333 "accept", 0); 334 if (error) { 335 ACCEPT_UNLOCK(); 336 goto noconnection; 337 } 338 } 339 if (head->so_error) { 340 error = head->so_error; 341 head->so_error = 0; 342 ACCEPT_UNLOCK(); 343 goto noconnection; 344 } 345 so = TAILQ_FIRST(&head->so_comp); 346 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 347 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 348 349 /* 350 * Before changing the flags on the socket, we have to bump the 351 * reference count. Otherwise, if the protocol calls sofree(), 352 * the socket will be released due to a zero refcount. 353 */ 354 SOCK_LOCK(so); 355 soref(so); /* file descriptor reference */ 356 SOCK_UNLOCK(so); 357 358 TAILQ_REMOVE(&head->so_comp, so, so_list); 359 head->so_qlen--; 360 so->so_state |= (head->so_state & SS_NBIO); 361 so->so_qstate &= ~SQ_COMP; 362 so->so_head = NULL; 363 364 ACCEPT_UNLOCK(); 365 366 /* An extra reference on `nfp' has been held for us by falloc(). */ 367 td->td_retval[0] = fd; 368 369 /* connection has been removed from the listen queue */ 370 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 371 372 pgid = fgetown(&head->so_sigio); 373 if (pgid != 0) 374 fsetown(pgid, &so->so_sigio); 375 376 FILE_LOCK(nfp); 377 nfp->f_data = so; /* nfp has ref count from falloc */ 378 nfp->f_flag = fflag; 379 nfp->f_ops = &socketops; 380 nfp->f_type = DTYPE_SOCKET; 381 FILE_UNLOCK(nfp); 382 /* Sync socket nonblocking/async state with file flags */ 383 tmp = fflag & FNONBLOCK; 384 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 385 tmp = fflag & FASYNC; 386 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 387 sa = 0; 388 error = soaccept(so, &sa); 389 if (error) { 390 /* 391 * return a namelen of zero for older code which might 392 * ignore the return value from accept. 393 */ 394 if (uap->name != NULL) { 395 namelen = 0; 396 (void) copyout(&namelen, 397 uap->anamelen, sizeof(*uap->anamelen)); 398 } 399 goto noconnection; 400 } 401 if (sa == NULL) { 402 namelen = 0; 403 if (uap->name) 404 goto gotnoname; 405 error = 0; 406 goto done; 407 } 408 if (uap->name) { 409 /* check sa_len before it is destroyed */ 410 if (namelen > sa->sa_len) 411 namelen = sa->sa_len; 412 #ifdef COMPAT_OLDSOCK 413 if (compat) 414 ((struct osockaddr *)sa)->sa_family = 415 sa->sa_family; 416 #endif 417 error = copyout(sa, uap->name, (u_int)namelen); 418 if (!error) 419 gotnoname: 420 error = copyout(&namelen, 421 uap->anamelen, sizeof (*uap->anamelen)); 422 } 423 noconnection: 424 if (sa) 425 FREE(sa, M_SONAME); 426 427 /* 428 * close the new descriptor, assuming someone hasn't ripped it 429 * out from under us. 430 */ 431 if (error) 432 fdclose(fdp, nfp, fd, td); 433 434 /* 435 * Release explicitly held references before returning. 436 */ 437 done: 438 if (nfp != NULL) 439 fdrop(nfp, td); 440 fputsock(head); 441 done2: 442 NET_UNLOCK_GIANT(); 443 return (error); 444 } 445 446 /* 447 * MPSAFE (accept1() is MPSAFE) 448 */ 449 int 450 accept(td, uap) 451 struct thread *td; 452 struct accept_args *uap; 453 { 454 455 return (accept1(td, uap, 0)); 456 } 457 458 #ifdef COMPAT_OLDSOCK 459 /* 460 * MPSAFE (accept1() is MPSAFE) 461 */ 462 int 463 oaccept(td, uap) 464 struct thread *td; 465 struct accept_args *uap; 466 { 467 468 return (accept1(td, uap, 1)); 469 } 470 #endif /* COMPAT_OLDSOCK */ 471 472 /* 473 * MPSAFE 474 */ 475 /* ARGSUSED */ 476 int 477 connect(td, uap) 478 struct thread *td; 479 register struct connect_args /* { 480 int s; 481 caddr_t name; 482 int namelen; 483 } */ *uap; 484 { 485 struct sockaddr *sa; 486 int error; 487 488 error = getsockaddr(&sa, uap->name, uap->namelen); 489 if (error) 490 return (error); 491 492 return (kern_connect(td, uap->s, sa)); 493 } 494 495 496 int 497 kern_connect(td, fd, sa) 498 struct thread *td; 499 int fd; 500 struct sockaddr *sa; 501 { 502 struct socket *so; 503 struct file *fp; 504 int error, s; 505 int interrupted = 0; 506 507 NET_LOCK_GIANT(); 508 error = getsock(td->td_proc->p_fd, fd, &fp); 509 if (error) 510 goto done2; 511 so = fp->f_data; 512 if (so->so_state & SS_ISCONNECTING) { 513 error = EALREADY; 514 goto done1; 515 } 516 #ifdef MAC 517 SOCK_LOCK(so); 518 error = mac_check_socket_connect(td->td_ucred, so, sa); 519 SOCK_UNLOCK(so); 520 if (error) 521 goto bad; 522 #endif 523 error = soconnect(so, sa, td); 524 if (error) 525 goto bad; 526 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 527 error = EINPROGRESS; 528 goto done1; 529 } 530 s = splnet(); 531 SOCK_LOCK(so); 532 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 533 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 534 "connec", 0); 535 if (error) { 536 if (error == EINTR || error == ERESTART) 537 interrupted = 1; 538 break; 539 } 540 } 541 if (error == 0) { 542 error = so->so_error; 543 so->so_error = 0; 544 } 545 SOCK_UNLOCK(so); 546 splx(s); 547 bad: 548 if (!interrupted) 549 so->so_state &= ~SS_ISCONNECTING; 550 if (error == ERESTART) 551 error = EINTR; 552 done1: 553 fdrop(fp, td); 554 done2: 555 NET_UNLOCK_GIANT(); 556 FREE(sa, M_SONAME); 557 return (error); 558 } 559 560 /* 561 * MPSAFE 562 */ 563 int 564 socketpair(td, uap) 565 struct thread *td; 566 register struct socketpair_args /* { 567 int domain; 568 int type; 569 int protocol; 570 int *rsv; 571 } */ *uap; 572 { 573 register struct filedesc *fdp = td->td_proc->p_fd; 574 struct file *fp1, *fp2; 575 struct socket *so1, *so2; 576 int fd, error, sv[2]; 577 578 NET_LOCK_GIANT(); 579 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 580 td->td_ucred, td); 581 if (error) 582 goto done2; 583 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 584 td->td_ucred, td); 585 if (error) 586 goto free1; 587 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 588 error = falloc(td, &fp1, &fd); 589 if (error) 590 goto free2; 591 sv[0] = fd; 592 fp1->f_data = so1; /* so1 already has ref count */ 593 error = falloc(td, &fp2, &fd); 594 if (error) 595 goto free3; 596 fp2->f_data = so2; /* so2 already has ref count */ 597 sv[1] = fd; 598 error = soconnect2(so1, so2); 599 if (error) 600 goto free4; 601 if (uap->type == SOCK_DGRAM) { 602 /* 603 * Datagram socket connection is asymmetric. 604 */ 605 error = soconnect2(so2, so1); 606 if (error) 607 goto free4; 608 } 609 FILE_LOCK(fp1); 610 fp1->f_flag = FREAD|FWRITE; 611 fp1->f_ops = &socketops; 612 fp1->f_type = DTYPE_SOCKET; 613 FILE_UNLOCK(fp1); 614 FILE_LOCK(fp2); 615 fp2->f_flag = FREAD|FWRITE; 616 fp2->f_ops = &socketops; 617 fp2->f_type = DTYPE_SOCKET; 618 FILE_UNLOCK(fp2); 619 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 620 fdrop(fp1, td); 621 fdrop(fp2, td); 622 goto done2; 623 free4: 624 fdclose(fdp, fp2, sv[1], td); 625 fdrop(fp2, td); 626 free3: 627 fdclose(fdp, fp1, sv[0], td); 628 fdrop(fp1, td); 629 free2: 630 (void)soclose(so2); 631 free1: 632 (void)soclose(so1); 633 done2: 634 NET_UNLOCK_GIANT(); 635 return (error); 636 } 637 638 static int 639 sendit(td, s, mp, flags) 640 register struct thread *td; 641 int s; 642 register struct msghdr *mp; 643 int flags; 644 { 645 struct mbuf *control; 646 struct sockaddr *to; 647 int error; 648 649 if (mp->msg_name != NULL) { 650 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 651 if (error) { 652 to = NULL; 653 goto bad; 654 } 655 mp->msg_name = to; 656 } else { 657 to = NULL; 658 } 659 660 if (mp->msg_control) { 661 if (mp->msg_controllen < sizeof(struct cmsghdr) 662 #ifdef COMPAT_OLDSOCK 663 && mp->msg_flags != MSG_COMPAT 664 #endif 665 ) { 666 error = EINVAL; 667 goto bad; 668 } 669 error = sockargs(&control, mp->msg_control, 670 mp->msg_controllen, MT_CONTROL); 671 if (error) 672 goto bad; 673 #ifdef COMPAT_OLDSOCK 674 if (mp->msg_flags == MSG_COMPAT) { 675 register struct cmsghdr *cm; 676 677 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 678 if (control == 0) { 679 error = ENOBUFS; 680 goto bad; 681 } else { 682 cm = mtod(control, struct cmsghdr *); 683 cm->cmsg_len = control->m_len; 684 cm->cmsg_level = SOL_SOCKET; 685 cm->cmsg_type = SCM_RIGHTS; 686 } 687 } 688 #endif 689 } else { 690 control = NULL; 691 } 692 693 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 694 695 bad: 696 if (to) 697 FREE(to, M_SONAME); 698 return (error); 699 } 700 701 int 702 kern_sendit(td, s, mp, flags, control, segflg) 703 struct thread *td; 704 int s; 705 struct msghdr *mp; 706 int flags; 707 struct mbuf *control; 708 enum uio_seg segflg; 709 { 710 struct file *fp; 711 struct uio auio; 712 struct iovec *iov; 713 struct socket *so; 714 int i; 715 int len, error; 716 #ifdef KTRACE 717 struct uio *ktruio = NULL; 718 #endif 719 720 NET_LOCK_GIANT(); 721 error = getsock(td->td_proc->p_fd, s, &fp); 722 if (error) 723 goto bad2; 724 so = (struct socket *)fp->f_data; 725 726 #ifdef MAC 727 SOCK_LOCK(so); 728 error = mac_check_socket_send(td->td_ucred, so); 729 SOCK_UNLOCK(so); 730 if (error) 731 goto bad; 732 #endif 733 734 auio.uio_iov = mp->msg_iov; 735 auio.uio_iovcnt = mp->msg_iovlen; 736 auio.uio_segflg = segflg; 737 auio.uio_rw = UIO_WRITE; 738 auio.uio_td = td; 739 auio.uio_offset = 0; /* XXX */ 740 auio.uio_resid = 0; 741 iov = mp->msg_iov; 742 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 743 if ((auio.uio_resid += iov->iov_len) < 0) { 744 error = EINVAL; 745 goto bad; 746 } 747 } 748 #ifdef KTRACE 749 if (KTRPOINT(td, KTR_GENIO)) 750 ktruio = cloneuio(&auio); 751 #endif 752 len = auio.uio_resid; 753 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 754 0, control, flags, td); 755 if (error) { 756 if (auio.uio_resid != len && (error == ERESTART || 757 error == EINTR || error == EWOULDBLOCK)) 758 error = 0; 759 /* Generation of SIGPIPE can be controlled per socket */ 760 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 761 PROC_LOCK(td->td_proc); 762 psignal(td->td_proc, SIGPIPE); 763 PROC_UNLOCK(td->td_proc); 764 } 765 } 766 if (error == 0) 767 td->td_retval[0] = len - auio.uio_resid; 768 #ifdef KTRACE 769 if (ktruio != NULL) { 770 ktruio->uio_resid = td->td_retval[0]; 771 ktrgenio(s, UIO_WRITE, ktruio, error); 772 } 773 #endif 774 bad: 775 fdrop(fp, td); 776 bad2: 777 NET_UNLOCK_GIANT(); 778 return (error); 779 } 780 781 /* 782 * MPSAFE 783 */ 784 int 785 sendto(td, uap) 786 struct thread *td; 787 register struct sendto_args /* { 788 int s; 789 caddr_t buf; 790 size_t len; 791 int flags; 792 caddr_t to; 793 int tolen; 794 } */ *uap; 795 { 796 struct msghdr msg; 797 struct iovec aiov; 798 int error; 799 800 msg.msg_name = uap->to; 801 msg.msg_namelen = uap->tolen; 802 msg.msg_iov = &aiov; 803 msg.msg_iovlen = 1; 804 msg.msg_control = 0; 805 #ifdef COMPAT_OLDSOCK 806 msg.msg_flags = 0; 807 #endif 808 aiov.iov_base = uap->buf; 809 aiov.iov_len = uap->len; 810 error = sendit(td, uap->s, &msg, uap->flags); 811 return (error); 812 } 813 814 #ifdef COMPAT_OLDSOCK 815 /* 816 * MPSAFE 817 */ 818 int 819 osend(td, uap) 820 struct thread *td; 821 register struct osend_args /* { 822 int s; 823 caddr_t buf; 824 int len; 825 int flags; 826 } */ *uap; 827 { 828 struct msghdr msg; 829 struct iovec aiov; 830 int error; 831 832 msg.msg_name = 0; 833 msg.msg_namelen = 0; 834 msg.msg_iov = &aiov; 835 msg.msg_iovlen = 1; 836 aiov.iov_base = uap->buf; 837 aiov.iov_len = uap->len; 838 msg.msg_control = 0; 839 msg.msg_flags = 0; 840 error = sendit(td, uap->s, &msg, uap->flags); 841 return (error); 842 } 843 844 /* 845 * MPSAFE 846 */ 847 int 848 osendmsg(td, uap) 849 struct thread *td; 850 struct osendmsg_args /* { 851 int s; 852 caddr_t msg; 853 int flags; 854 } */ *uap; 855 { 856 struct msghdr msg; 857 struct iovec *iov; 858 int error; 859 860 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 861 if (error) 862 return (error); 863 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 864 if (error) 865 return (error); 866 msg.msg_iov = iov; 867 msg.msg_flags = MSG_COMPAT; 868 error = sendit(td, uap->s, &msg, uap->flags); 869 free(iov, M_IOV); 870 return (error); 871 } 872 #endif 873 874 /* 875 * MPSAFE 876 */ 877 int 878 sendmsg(td, uap) 879 struct thread *td; 880 struct sendmsg_args /* { 881 int s; 882 caddr_t msg; 883 int flags; 884 } */ *uap; 885 { 886 struct msghdr msg; 887 struct iovec *iov; 888 int error; 889 890 error = copyin(uap->msg, &msg, sizeof (msg)); 891 if (error) 892 return (error); 893 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 894 if (error) 895 return (error); 896 msg.msg_iov = iov; 897 #ifdef COMPAT_OLDSOCK 898 msg.msg_flags = 0; 899 #endif 900 error = sendit(td, uap->s, &msg, uap->flags); 901 free(iov, M_IOV); 902 return (error); 903 } 904 905 static int 906 recvit(td, s, mp, namelenp) 907 struct thread *td; 908 int s; 909 struct msghdr *mp; 910 void *namelenp; 911 { 912 struct uio auio; 913 struct iovec *iov; 914 int i; 915 socklen_t len; 916 int error; 917 struct mbuf *m, *control = 0; 918 caddr_t ctlbuf; 919 struct file *fp; 920 struct socket *so; 921 struct sockaddr *fromsa = 0; 922 #ifdef KTRACE 923 struct uio *ktruio = NULL; 924 #endif 925 926 NET_LOCK_GIANT(); 927 error = getsock(td->td_proc->p_fd, s, &fp); 928 if (error) { 929 NET_UNLOCK_GIANT(); 930 return (error); 931 } 932 so = fp->f_data; 933 934 #ifdef MAC 935 SOCK_LOCK(so); 936 error = mac_check_socket_receive(td->td_ucred, so); 937 SOCK_UNLOCK(so); 938 if (error) { 939 fdrop(fp, td); 940 NET_UNLOCK_GIANT(); 941 return (error); 942 } 943 #endif 944 945 auio.uio_iov = mp->msg_iov; 946 auio.uio_iovcnt = mp->msg_iovlen; 947 auio.uio_segflg = UIO_USERSPACE; 948 auio.uio_rw = UIO_READ; 949 auio.uio_td = td; 950 auio.uio_offset = 0; /* XXX */ 951 auio.uio_resid = 0; 952 iov = mp->msg_iov; 953 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 954 if ((auio.uio_resid += iov->iov_len) < 0) { 955 fdrop(fp, td); 956 NET_UNLOCK_GIANT(); 957 return (EINVAL); 958 } 959 } 960 #ifdef KTRACE 961 if (KTRPOINT(td, KTR_GENIO)) 962 ktruio = cloneuio(&auio); 963 #endif 964 len = auio.uio_resid; 965 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 966 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 967 &mp->msg_flags); 968 if (error) { 969 if (auio.uio_resid != (int)len && (error == ERESTART || 970 error == EINTR || error == EWOULDBLOCK)) 971 error = 0; 972 } 973 #ifdef KTRACE 974 if (ktruio != NULL) { 975 ktruio->uio_resid = (int)len - auio.uio_resid; 976 ktrgenio(s, UIO_READ, ktruio, error); 977 } 978 #endif 979 if (error) 980 goto out; 981 td->td_retval[0] = (int)len - auio.uio_resid; 982 if (mp->msg_name) { 983 len = mp->msg_namelen; 984 if (len <= 0 || fromsa == 0) 985 len = 0; 986 else { 987 /* save sa_len before it is destroyed by MSG_COMPAT */ 988 len = MIN(len, fromsa->sa_len); 989 #ifdef COMPAT_OLDSOCK 990 if (mp->msg_flags & MSG_COMPAT) 991 ((struct osockaddr *)fromsa)->sa_family = 992 fromsa->sa_family; 993 #endif 994 error = copyout(fromsa, mp->msg_name, (unsigned)len); 995 if (error) 996 goto out; 997 } 998 mp->msg_namelen = len; 999 if (namelenp && 1000 (error = copyout(&len, namelenp, sizeof (socklen_t)))) { 1001 #ifdef COMPAT_OLDSOCK 1002 if (mp->msg_flags & MSG_COMPAT) 1003 error = 0; /* old recvfrom didn't check */ 1004 else 1005 #endif 1006 goto out; 1007 } 1008 } 1009 if (mp->msg_control) { 1010 #ifdef COMPAT_OLDSOCK 1011 /* 1012 * We assume that old recvmsg calls won't receive access 1013 * rights and other control info, esp. as control info 1014 * is always optional and those options didn't exist in 4.3. 1015 * If we receive rights, trim the cmsghdr; anything else 1016 * is tossed. 1017 */ 1018 if (control && mp->msg_flags & MSG_COMPAT) { 1019 if (mtod(control, struct cmsghdr *)->cmsg_level != 1020 SOL_SOCKET || 1021 mtod(control, struct cmsghdr *)->cmsg_type != 1022 SCM_RIGHTS) { 1023 mp->msg_controllen = 0; 1024 goto out; 1025 } 1026 control->m_len -= sizeof (struct cmsghdr); 1027 control->m_data += sizeof (struct cmsghdr); 1028 } 1029 #endif 1030 len = mp->msg_controllen; 1031 m = control; 1032 mp->msg_controllen = 0; 1033 ctlbuf = mp->msg_control; 1034 1035 while (m && len > 0) { 1036 unsigned int tocopy; 1037 1038 if (len >= m->m_len) 1039 tocopy = m->m_len; 1040 else { 1041 mp->msg_flags |= MSG_CTRUNC; 1042 tocopy = len; 1043 } 1044 1045 if ((error = copyout(mtod(m, caddr_t), 1046 ctlbuf, tocopy)) != 0) 1047 goto out; 1048 1049 ctlbuf += tocopy; 1050 len -= tocopy; 1051 m = m->m_next; 1052 } 1053 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1054 } 1055 out: 1056 fdrop(fp, td); 1057 NET_UNLOCK_GIANT(); 1058 if (fromsa) 1059 FREE(fromsa, M_SONAME); 1060 if (control) 1061 m_freem(control); 1062 return (error); 1063 } 1064 1065 /* 1066 * MPSAFE 1067 */ 1068 int 1069 recvfrom(td, uap) 1070 struct thread *td; 1071 register struct recvfrom_args /* { 1072 int s; 1073 caddr_t buf; 1074 size_t len; 1075 int flags; 1076 struct sockaddr * __restrict from; 1077 socklen_t * __restrict fromlenaddr; 1078 } */ *uap; 1079 { 1080 struct msghdr msg; 1081 struct iovec aiov; 1082 int error; 1083 1084 if (uap->fromlenaddr) { 1085 error = copyin(uap->fromlenaddr, 1086 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1087 if (error) 1088 goto done2; 1089 } else { 1090 msg.msg_namelen = 0; 1091 } 1092 msg.msg_name = uap->from; 1093 msg.msg_iov = &aiov; 1094 msg.msg_iovlen = 1; 1095 aiov.iov_base = uap->buf; 1096 aiov.iov_len = uap->len; 1097 msg.msg_control = 0; 1098 msg.msg_flags = uap->flags; 1099 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1100 done2: 1101 return(error); 1102 } 1103 1104 #ifdef COMPAT_OLDSOCK 1105 /* 1106 * MPSAFE 1107 */ 1108 int 1109 orecvfrom(td, uap) 1110 struct thread *td; 1111 struct recvfrom_args *uap; 1112 { 1113 1114 uap->flags |= MSG_COMPAT; 1115 return (recvfrom(td, uap)); 1116 } 1117 #endif 1118 1119 1120 #ifdef COMPAT_OLDSOCK 1121 /* 1122 * MPSAFE 1123 */ 1124 int 1125 orecv(td, uap) 1126 struct thread *td; 1127 register struct orecv_args /* { 1128 int s; 1129 caddr_t buf; 1130 int len; 1131 int flags; 1132 } */ *uap; 1133 { 1134 struct msghdr msg; 1135 struct iovec aiov; 1136 int error; 1137 1138 msg.msg_name = 0; 1139 msg.msg_namelen = 0; 1140 msg.msg_iov = &aiov; 1141 msg.msg_iovlen = 1; 1142 aiov.iov_base = uap->buf; 1143 aiov.iov_len = uap->len; 1144 msg.msg_control = 0; 1145 msg.msg_flags = uap->flags; 1146 error = recvit(td, uap->s, &msg, NULL); 1147 return (error); 1148 } 1149 1150 /* 1151 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1152 * overlays the new one, missing only the flags, and with the (old) access 1153 * rights where the control fields are now. 1154 * 1155 * MPSAFE 1156 */ 1157 int 1158 orecvmsg(td, uap) 1159 struct thread *td; 1160 struct orecvmsg_args /* { 1161 int s; 1162 struct omsghdr *msg; 1163 int flags; 1164 } */ *uap; 1165 { 1166 struct msghdr msg; 1167 struct iovec *iov; 1168 int error; 1169 1170 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1171 if (error) 1172 return (error); 1173 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1174 if (error) 1175 return (error); 1176 msg.msg_flags = uap->flags | MSG_COMPAT; 1177 msg.msg_iov = iov; 1178 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1179 if (msg.msg_controllen && error == 0) 1180 error = copyout(&msg.msg_controllen, 1181 &uap->msg->msg_accrightslen, sizeof (int)); 1182 free(iov, M_IOV); 1183 return (error); 1184 } 1185 #endif 1186 1187 /* 1188 * MPSAFE 1189 */ 1190 int 1191 recvmsg(td, uap) 1192 struct thread *td; 1193 struct recvmsg_args /* { 1194 int s; 1195 struct msghdr *msg; 1196 int flags; 1197 } */ *uap; 1198 { 1199 struct msghdr msg; 1200 struct iovec *uiov, *iov; 1201 int error; 1202 1203 error = copyin(uap->msg, &msg, sizeof (msg)); 1204 if (error) 1205 return (error); 1206 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1207 if (error) 1208 return (error); 1209 msg.msg_flags = uap->flags; 1210 #ifdef COMPAT_OLDSOCK 1211 msg.msg_flags &= ~MSG_COMPAT; 1212 #endif 1213 uiov = msg.msg_iov; 1214 msg.msg_iov = iov; 1215 error = recvit(td, uap->s, &msg, NULL); 1216 if (error == 0) { 1217 msg.msg_iov = uiov; 1218 error = copyout(&msg, uap->msg, sizeof(msg)); 1219 } 1220 free(iov, M_IOV); 1221 return (error); 1222 } 1223 1224 /* 1225 * MPSAFE 1226 */ 1227 /* ARGSUSED */ 1228 int 1229 shutdown(td, uap) 1230 struct thread *td; 1231 register struct shutdown_args /* { 1232 int s; 1233 int how; 1234 } */ *uap; 1235 { 1236 struct socket *so; 1237 struct file *fp; 1238 int error; 1239 1240 NET_LOCK_GIANT(); 1241 error = getsock(td->td_proc->p_fd, uap->s, &fp); 1242 if (error == 0) { 1243 so = fp->f_data; 1244 error = soshutdown(so, uap->how); 1245 fdrop(fp, td); 1246 } 1247 NET_UNLOCK_GIANT(); 1248 return (error); 1249 } 1250 1251 /* 1252 * MPSAFE 1253 */ 1254 /* ARGSUSED */ 1255 int 1256 setsockopt(td, uap) 1257 struct thread *td; 1258 register struct setsockopt_args /* { 1259 int s; 1260 int level; 1261 int name; 1262 caddr_t val; 1263 int valsize; 1264 } */ *uap; 1265 { 1266 1267 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1268 uap->val, UIO_USERSPACE, uap->valsize)); 1269 } 1270 1271 int 1272 kern_setsockopt(td, s, level, name, val, valseg, valsize) 1273 struct thread *td; 1274 int s; 1275 int level; 1276 int name; 1277 void *val; 1278 enum uio_seg valseg; 1279 socklen_t valsize; 1280 { 1281 int error; 1282 struct socket *so; 1283 struct file *fp; 1284 struct sockopt sopt; 1285 1286 if (val == NULL && valsize != 0) 1287 return (EFAULT); 1288 if (valsize < 0) 1289 return (EINVAL); 1290 1291 sopt.sopt_dir = SOPT_SET; 1292 sopt.sopt_level = level; 1293 sopt.sopt_name = name; 1294 sopt.sopt_val = val; 1295 sopt.sopt_valsize = valsize; 1296 switch (valseg) { 1297 case UIO_USERSPACE: 1298 sopt.sopt_td = td; 1299 break; 1300 case UIO_SYSSPACE: 1301 sopt.sopt_td = NULL; 1302 break; 1303 default: 1304 panic("kern_setsockopt called with bad valseg"); 1305 } 1306 1307 NET_LOCK_GIANT(); 1308 error = getsock(td->td_proc->p_fd, s, &fp); 1309 if (error == 0) { 1310 so = fp->f_data; 1311 error = sosetopt(so, &sopt); 1312 fdrop(fp, td); 1313 } 1314 NET_UNLOCK_GIANT(); 1315 return(error); 1316 } 1317 1318 /* 1319 * MPSAFE 1320 */ 1321 /* ARGSUSED */ 1322 int 1323 getsockopt(td, uap) 1324 struct thread *td; 1325 register struct getsockopt_args /* { 1326 int s; 1327 int level; 1328 int name; 1329 void * __restrict val; 1330 socklen_t * __restrict avalsize; 1331 } */ *uap; 1332 { 1333 socklen_t valsize; 1334 int error; 1335 1336 if (uap->val) { 1337 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1338 if (error) 1339 return (error); 1340 } 1341 1342 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1343 uap->val, UIO_USERSPACE, &valsize); 1344 1345 if (error == 0) 1346 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1347 return (error); 1348 } 1349 1350 /* 1351 * Kernel version of getsockopt. 1352 * optval can be a userland or userspace. optlen is always a kernel pointer. 1353 */ 1354 int 1355 kern_getsockopt(td, s, level, name, val, valseg, valsize) 1356 struct thread *td; 1357 int s; 1358 int level; 1359 int name; 1360 void *val; 1361 enum uio_seg valseg; 1362 socklen_t *valsize; 1363 { 1364 int error; 1365 struct socket *so; 1366 struct file *fp; 1367 struct sockopt sopt; 1368 1369 if (val == NULL) 1370 *valsize = 0; 1371 if (*valsize < 0) 1372 return (EINVAL); 1373 1374 sopt.sopt_dir = SOPT_GET; 1375 sopt.sopt_level = level; 1376 sopt.sopt_name = name; 1377 sopt.sopt_val = val; 1378 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1379 switch (valseg) { 1380 case UIO_USERSPACE: 1381 sopt.sopt_td = td; 1382 break; 1383 case UIO_SYSSPACE: 1384 sopt.sopt_td = NULL; 1385 break; 1386 default: 1387 panic("kern_getsockopt called with bad valseg"); 1388 } 1389 1390 NET_LOCK_GIANT(); 1391 error = getsock(td->td_proc->p_fd, s, &fp); 1392 if (error == 0) { 1393 so = fp->f_data; 1394 error = sogetopt(so, &sopt); 1395 *valsize = sopt.sopt_valsize; 1396 fdrop(fp, td); 1397 } 1398 NET_UNLOCK_GIANT(); 1399 return (error); 1400 } 1401 1402 /* 1403 * getsockname1() - Get socket name. 1404 * 1405 * MPSAFE 1406 */ 1407 /* ARGSUSED */ 1408 static int 1409 getsockname1(td, uap, compat) 1410 struct thread *td; 1411 register struct getsockname_args /* { 1412 int fdes; 1413 struct sockaddr * __restrict asa; 1414 socklen_t * __restrict alen; 1415 } */ *uap; 1416 int compat; 1417 { 1418 struct socket *so; 1419 struct sockaddr *sa; 1420 struct file *fp; 1421 socklen_t len; 1422 int error; 1423 1424 NET_LOCK_GIANT(); 1425 error = getsock(td->td_proc->p_fd, uap->fdes, &fp); 1426 if (error) 1427 goto done2; 1428 so = fp->f_data; 1429 error = copyin(uap->alen, &len, sizeof (len)); 1430 if (error) 1431 goto done1; 1432 if (len < 0) { 1433 error = EINVAL; 1434 goto done1; 1435 } 1436 sa = 0; 1437 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1438 if (error) 1439 goto bad; 1440 if (sa == 0) { 1441 len = 0; 1442 goto gotnothing; 1443 } 1444 1445 len = MIN(len, sa->sa_len); 1446 #ifdef COMPAT_OLDSOCK 1447 if (compat) 1448 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1449 #endif 1450 error = copyout(sa, uap->asa, (u_int)len); 1451 if (error == 0) 1452 gotnothing: 1453 error = copyout(&len, uap->alen, sizeof (len)); 1454 bad: 1455 if (sa) 1456 FREE(sa, M_SONAME); 1457 done1: 1458 fdrop(fp, td); 1459 done2: 1460 NET_UNLOCK_GIANT(); 1461 return (error); 1462 } 1463 1464 /* 1465 * MPSAFE 1466 */ 1467 int 1468 getsockname(td, uap) 1469 struct thread *td; 1470 struct getsockname_args *uap; 1471 { 1472 1473 return (getsockname1(td, uap, 0)); 1474 } 1475 1476 #ifdef COMPAT_OLDSOCK 1477 /* 1478 * MPSAFE 1479 */ 1480 int 1481 ogetsockname(td, uap) 1482 struct thread *td; 1483 struct getsockname_args *uap; 1484 { 1485 1486 return (getsockname1(td, uap, 1)); 1487 } 1488 #endif /* COMPAT_OLDSOCK */ 1489 1490 /* 1491 * getpeername1() - Get name of peer for connected socket. 1492 * 1493 * MPSAFE 1494 */ 1495 /* ARGSUSED */ 1496 static int 1497 getpeername1(td, uap, compat) 1498 struct thread *td; 1499 register struct getpeername_args /* { 1500 int fdes; 1501 struct sockaddr * __restrict asa; 1502 socklen_t * __restrict alen; 1503 } */ *uap; 1504 int compat; 1505 { 1506 struct socket *so; 1507 struct sockaddr *sa; 1508 struct file *fp; 1509 socklen_t len; 1510 int error; 1511 1512 NET_LOCK_GIANT(); 1513 error = getsock(td->td_proc->p_fd, uap->fdes, &fp); 1514 if (error) 1515 goto done2; 1516 so = fp->f_data; 1517 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1518 error = ENOTCONN; 1519 goto done1; 1520 } 1521 error = copyin(uap->alen, &len, sizeof (len)); 1522 if (error) 1523 goto done1; 1524 if (len < 0) { 1525 error = EINVAL; 1526 goto done1; 1527 } 1528 sa = 0; 1529 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1530 if (error) 1531 goto bad; 1532 if (sa == 0) { 1533 len = 0; 1534 goto gotnothing; 1535 } 1536 len = MIN(len, sa->sa_len); 1537 #ifdef COMPAT_OLDSOCK 1538 if (compat) 1539 ((struct osockaddr *)sa)->sa_family = 1540 sa->sa_family; 1541 #endif 1542 error = copyout(sa, uap->asa, (u_int)len); 1543 if (error) 1544 goto bad; 1545 gotnothing: 1546 error = copyout(&len, uap->alen, sizeof (len)); 1547 bad: 1548 if (sa) 1549 FREE(sa, M_SONAME); 1550 done1: 1551 fdrop(fp, td); 1552 done2: 1553 NET_UNLOCK_GIANT(); 1554 return (error); 1555 } 1556 1557 /* 1558 * MPSAFE 1559 */ 1560 int 1561 getpeername(td, uap) 1562 struct thread *td; 1563 struct getpeername_args *uap; 1564 { 1565 1566 return (getpeername1(td, uap, 0)); 1567 } 1568 1569 #ifdef COMPAT_OLDSOCK 1570 /* 1571 * MPSAFE 1572 */ 1573 int 1574 ogetpeername(td, uap) 1575 struct thread *td; 1576 struct ogetpeername_args *uap; 1577 { 1578 1579 /* XXX uap should have type `getpeername_args *' to begin with. */ 1580 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1581 } 1582 #endif /* COMPAT_OLDSOCK */ 1583 1584 int 1585 sockargs(mp, buf, buflen, type) 1586 struct mbuf **mp; 1587 caddr_t buf; 1588 int buflen, type; 1589 { 1590 register struct sockaddr *sa; 1591 register struct mbuf *m; 1592 int error; 1593 1594 if ((u_int)buflen > MLEN) { 1595 #ifdef COMPAT_OLDSOCK 1596 if (type == MT_SONAME && (u_int)buflen <= 112) 1597 buflen = MLEN; /* unix domain compat. hack */ 1598 else 1599 #endif 1600 if ((u_int)buflen > MCLBYTES) 1601 return (EINVAL); 1602 } 1603 m = m_get(M_TRYWAIT, type); 1604 if (m == NULL) 1605 return (ENOBUFS); 1606 if ((u_int)buflen > MLEN) { 1607 MCLGET(m, M_TRYWAIT); 1608 if ((m->m_flags & M_EXT) == 0) { 1609 m_free(m); 1610 return (ENOBUFS); 1611 } 1612 } 1613 m->m_len = buflen; 1614 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1615 if (error) 1616 (void) m_free(m); 1617 else { 1618 *mp = m; 1619 if (type == MT_SONAME) { 1620 sa = mtod(m, struct sockaddr *); 1621 1622 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1623 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1624 sa->sa_family = sa->sa_len; 1625 #endif 1626 sa->sa_len = buflen; 1627 } 1628 } 1629 return (error); 1630 } 1631 1632 int 1633 getsockaddr(namp, uaddr, len) 1634 struct sockaddr **namp; 1635 caddr_t uaddr; 1636 size_t len; 1637 { 1638 struct sockaddr *sa; 1639 int error; 1640 1641 if (len > SOCK_MAXADDRLEN) 1642 return (ENAMETOOLONG); 1643 if (len < offsetof(struct sockaddr, sa_data[0])) 1644 return (EINVAL); 1645 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1646 error = copyin(uaddr, sa, len); 1647 if (error) { 1648 FREE(sa, M_SONAME); 1649 } else { 1650 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1651 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1652 sa->sa_family = sa->sa_len; 1653 #endif 1654 sa->sa_len = len; 1655 *namp = sa; 1656 } 1657 return (error); 1658 } 1659 1660 /* 1661 * Detach mapped page and release resources back to the system. 1662 */ 1663 void 1664 sf_buf_mext(void *addr, void *args) 1665 { 1666 vm_page_t m; 1667 1668 m = sf_buf_page(args); 1669 sf_buf_free(args); 1670 vm_page_lock_queues(); 1671 vm_page_unwire(m, 0); 1672 /* 1673 * Check for the object going away on us. This can 1674 * happen since we don't hold a reference to it. 1675 * If so, we're responsible for freeing the page. 1676 */ 1677 if (m->wire_count == 0 && m->object == NULL) 1678 vm_page_free(m); 1679 vm_page_unlock_queues(); 1680 } 1681 1682 /* 1683 * sendfile(2) 1684 * 1685 * MPSAFE 1686 * 1687 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1688 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1689 * 1690 * Send a file specified by 'fd' and starting at 'offset' to a socket 1691 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1692 * nbytes == 0. Optionally add a header and/or trailer to the socket 1693 * output. If specified, write the total number of bytes sent into *sbytes. 1694 * 1695 */ 1696 int 1697 sendfile(struct thread *td, struct sendfile_args *uap) 1698 { 1699 1700 return (do_sendfile(td, uap, 0)); 1701 } 1702 1703 #ifdef COMPAT_FREEBSD4 1704 int 1705 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1706 { 1707 struct sendfile_args args; 1708 1709 args.fd = uap->fd; 1710 args.s = uap->s; 1711 args.offset = uap->offset; 1712 args.nbytes = uap->nbytes; 1713 args.hdtr = uap->hdtr; 1714 args.sbytes = uap->sbytes; 1715 args.flags = uap->flags; 1716 1717 return (do_sendfile(td, &args, 1)); 1718 } 1719 #endif /* COMPAT_FREEBSD4 */ 1720 1721 static int 1722 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1723 { 1724 struct vnode *vp; 1725 struct vm_object *obj; 1726 struct socket *so = NULL; 1727 struct mbuf *m, *m_header = NULL; 1728 struct sf_buf *sf; 1729 struct vm_page *pg; 1730 struct writev_args nuap; 1731 struct sf_hdtr hdtr; 1732 struct uio *hdr_uio = NULL; 1733 off_t off, xfsize, hdtr_size, sbytes = 0; 1734 int error, headersize = 0, headersent = 0; 1735 1736 mtx_lock(&Giant); 1737 1738 hdtr_size = 0; 1739 1740 /* 1741 * The descriptor must be a regular file and have a backing VM object. 1742 */ 1743 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1744 goto done; 1745 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1746 obj = vp->v_object; 1747 VOP_UNLOCK(vp, 0, td); 1748 if (obj == NULL) { 1749 error = EINVAL; 1750 goto done; 1751 } 1752 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1753 goto done; 1754 if (so->so_type != SOCK_STREAM) { 1755 error = EINVAL; 1756 goto done; 1757 } 1758 if ((so->so_state & SS_ISCONNECTED) == 0) { 1759 error = ENOTCONN; 1760 goto done; 1761 } 1762 if (uap->offset < 0) { 1763 error = EINVAL; 1764 goto done; 1765 } 1766 1767 #ifdef MAC 1768 SOCK_LOCK(so); 1769 error = mac_check_socket_send(td->td_ucred, so); 1770 SOCK_UNLOCK(so); 1771 if (error) 1772 goto done; 1773 #endif 1774 1775 /* 1776 * If specified, get the pointer to the sf_hdtr struct for 1777 * any headers/trailers. 1778 */ 1779 if (uap->hdtr != NULL) { 1780 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1781 if (error) 1782 goto done; 1783 /* 1784 * Send any headers. 1785 */ 1786 if (hdtr.headers != NULL) { 1787 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1788 if (error) 1789 goto done; 1790 hdr_uio->uio_td = td; 1791 hdr_uio->uio_rw = UIO_WRITE; 1792 if (hdr_uio->uio_resid > 0) { 1793 m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0); 1794 if (m_header == NULL) 1795 goto done; 1796 headersize = m_header->m_pkthdr.len; 1797 if (compat) 1798 sbytes += headersize; 1799 } 1800 } 1801 } 1802 1803 /* 1804 * Protect against multiple writers to the socket. 1805 */ 1806 SOCKBUF_LOCK(&so->so_snd); 1807 (void) sblock(&so->so_snd, M_WAITOK); 1808 SOCKBUF_UNLOCK(&so->so_snd); 1809 1810 /* 1811 * Loop through the pages in the file, starting with the requested 1812 * offset. Get a file page (do I/O if necessary), map the file page 1813 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1814 * it on the socket. 1815 */ 1816 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1817 vm_pindex_t pindex; 1818 vm_offset_t pgoff; 1819 1820 pindex = OFF_TO_IDX(off); 1821 VM_OBJECT_LOCK(obj); 1822 retry_lookup: 1823 /* 1824 * Calculate the amount to transfer. Not to exceed a page, 1825 * the EOF, or the passed in nbytes. 1826 */ 1827 xfsize = obj->un_pager.vnp.vnp_size - off; 1828 VM_OBJECT_UNLOCK(obj); 1829 if (xfsize > PAGE_SIZE) 1830 xfsize = PAGE_SIZE; 1831 pgoff = (vm_offset_t)(off & PAGE_MASK); 1832 if (PAGE_SIZE - pgoff < xfsize) 1833 xfsize = PAGE_SIZE - pgoff; 1834 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1835 xfsize = uap->nbytes - sbytes; 1836 if (xfsize <= 0) { 1837 if (m_header != NULL) { 1838 m = m_header; 1839 m_header = NULL; 1840 SOCKBUF_LOCK(&so->so_snd); 1841 goto retry_space; 1842 } else 1843 break; 1844 } 1845 /* 1846 * Optimize the non-blocking case by looking at the socket space 1847 * before going to the extra work of constituting the sf_buf. 1848 */ 1849 SOCKBUF_LOCK(&so->so_snd); 1850 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1851 if (so->so_snd.sb_state & SBS_CANTSENDMORE) 1852 error = EPIPE; 1853 else 1854 error = EAGAIN; 1855 sbunlock(&so->so_snd); 1856 SOCKBUF_UNLOCK(&so->so_snd); 1857 goto done; 1858 } 1859 SOCKBUF_UNLOCK(&so->so_snd); 1860 VM_OBJECT_LOCK(obj); 1861 /* 1862 * Attempt to look up the page. 1863 * 1864 * Allocate if not found 1865 * 1866 * Wait and loop if busy. 1867 */ 1868 pg = vm_page_lookup(obj, pindex); 1869 1870 if (pg == NULL) { 1871 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY | 1872 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1873 if (pg == NULL) { 1874 VM_OBJECT_UNLOCK(obj); 1875 VM_WAIT; 1876 VM_OBJECT_LOCK(obj); 1877 goto retry_lookup; 1878 } 1879 vm_page_lock_queues(); 1880 } else { 1881 vm_page_lock_queues(); 1882 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1883 goto retry_lookup; 1884 /* 1885 * Wire the page so it does not get ripped out from 1886 * under us. 1887 */ 1888 vm_page_wire(pg); 1889 } 1890 1891 /* 1892 * If page is not valid for what we need, initiate I/O 1893 */ 1894 1895 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) { 1896 VM_OBJECT_UNLOCK(obj); 1897 } else if (uap->flags & SF_NODISKIO) { 1898 error = EBUSY; 1899 } else { 1900 int bsize, resid; 1901 1902 /* 1903 * Ensure that our page is still around when the I/O 1904 * completes. 1905 */ 1906 vm_page_io_start(pg); 1907 vm_page_unlock_queues(); 1908 VM_OBJECT_UNLOCK(obj); 1909 1910 /* 1911 * Get the page from backing store. 1912 */ 1913 bsize = vp->v_mount->mnt_stat.f_iosize; 1914 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1915 /* 1916 * XXXMAC: Because we don't have fp->f_cred here, 1917 * we pass in NOCRED. This is probably wrong, but 1918 * is consistent with our original implementation. 1919 */ 1920 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1921 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1922 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 1923 td->td_ucred, NOCRED, &resid, td); 1924 VOP_UNLOCK(vp, 0, td); 1925 VM_OBJECT_LOCK(obj); 1926 vm_page_lock_queues(); 1927 vm_page_io_finish(pg); 1928 if (!error) 1929 VM_OBJECT_UNLOCK(obj); 1930 mbstat.sf_iocnt++; 1931 } 1932 1933 if (error) { 1934 vm_page_unwire(pg, 0); 1935 /* 1936 * See if anyone else might know about this page. 1937 * If not and it is not valid, then free it. 1938 */ 1939 if (pg->wire_count == 0 && pg->valid == 0 && 1940 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1941 pg->hold_count == 0) { 1942 vm_page_free(pg); 1943 } 1944 vm_page_unlock_queues(); 1945 VM_OBJECT_UNLOCK(obj); 1946 SOCKBUF_LOCK(&so->so_snd); 1947 sbunlock(&so->so_snd); 1948 SOCKBUF_UNLOCK(&so->so_snd); 1949 goto done; 1950 } 1951 vm_page_unlock_queues(); 1952 1953 /* 1954 * Get a sendfile buf. We usually wait as long as necessary, 1955 * but this wait can be interrupted. 1956 */ 1957 if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) { 1958 mbstat.sf_allocfail++; 1959 vm_page_lock_queues(); 1960 vm_page_unwire(pg, 0); 1961 if (pg->wire_count == 0 && pg->object == NULL) 1962 vm_page_free(pg); 1963 vm_page_unlock_queues(); 1964 SOCKBUF_LOCK(&so->so_snd); 1965 sbunlock(&so->so_snd); 1966 SOCKBUF_UNLOCK(&so->so_snd); 1967 error = EINTR; 1968 goto done; 1969 } 1970 1971 /* 1972 * Get an mbuf header and set it up as having external storage. 1973 */ 1974 if (m_header) 1975 MGET(m, M_TRYWAIT, MT_DATA); 1976 else 1977 MGETHDR(m, M_TRYWAIT, MT_DATA); 1978 if (m == NULL) { 1979 error = ENOBUFS; 1980 sf_buf_mext((void *)sf_buf_kva(sf), sf); 1981 SOCKBUF_LOCK(&so->so_snd); 1982 sbunlock(&so->so_snd); 1983 SOCKBUF_UNLOCK(&so->so_snd); 1984 goto done; 1985 } 1986 /* 1987 * Setup external storage for mbuf. 1988 */ 1989 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY, 1990 EXT_SFBUF); 1991 m->m_data = (char *)sf_buf_kva(sf) + pgoff; 1992 m->m_pkthdr.len = m->m_len = xfsize; 1993 1994 if (m_header) { 1995 m_cat(m_header, m); 1996 m = m_header; 1997 m_header = NULL; 1998 m_fixhdr(m); 1999 } 2000 2001 /* 2002 * Add the buffer to the socket buffer chain. 2003 */ 2004 SOCKBUF_LOCK(&so->so_snd); 2005 retry_space: 2006 /* 2007 * Make sure that the socket is still able to take more data. 2008 * CANTSENDMORE being true usually means that the connection 2009 * was closed. so_error is true when an error was sensed after 2010 * a previous send. 2011 * The state is checked after the page mapping and buffer 2012 * allocation above since those operations may block and make 2013 * any socket checks stale. From this point forward, nothing 2014 * blocks before the pru_send (or more accurately, any blocking 2015 * results in a loop back to here to re-check). 2016 */ 2017 SOCKBUF_LOCK_ASSERT(&so->so_snd); 2018 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) { 2019 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2020 error = EPIPE; 2021 } else { 2022 error = so->so_error; 2023 so->so_error = 0; 2024 } 2025 m_freem(m); 2026 sbunlock(&so->so_snd); 2027 SOCKBUF_UNLOCK(&so->so_snd); 2028 goto done; 2029 } 2030 /* 2031 * Wait for socket space to become available. We do this just 2032 * after checking the connection state above in order to avoid 2033 * a race condition with sbwait(). 2034 */ 2035 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 2036 if (so->so_state & SS_NBIO) { 2037 m_freem(m); 2038 sbunlock(&so->so_snd); 2039 SOCKBUF_UNLOCK(&so->so_snd); 2040 error = EAGAIN; 2041 goto done; 2042 } 2043 error = sbwait(&so->so_snd); 2044 /* 2045 * An error from sbwait usually indicates that we've 2046 * been interrupted by a signal. If we've sent anything 2047 * then return bytes sent, otherwise return the error. 2048 */ 2049 if (error) { 2050 m_freem(m); 2051 sbunlock(&so->so_snd); 2052 SOCKBUF_UNLOCK(&so->so_snd); 2053 goto done; 2054 } 2055 goto retry_space; 2056 } 2057 SOCKBUF_UNLOCK(&so->so_snd); 2058 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2059 if (error) { 2060 SOCKBUF_LOCK(&so->so_snd); 2061 sbunlock(&so->so_snd); 2062 SOCKBUF_UNLOCK(&so->so_snd); 2063 goto done; 2064 } 2065 headersent = 1; 2066 } 2067 SOCKBUF_LOCK(&so->so_snd); 2068 sbunlock(&so->so_snd); 2069 SOCKBUF_UNLOCK(&so->so_snd); 2070 2071 /* 2072 * Send trailers. Wimp out and use writev(2). 2073 */ 2074 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 2075 nuap.fd = uap->s; 2076 nuap.iovp = hdtr.trailers; 2077 nuap.iovcnt = hdtr.trl_cnt; 2078 error = writev(td, &nuap); 2079 if (error) 2080 goto done; 2081 if (compat) 2082 sbytes += td->td_retval[0]; 2083 else 2084 hdtr_size += td->td_retval[0]; 2085 } 2086 2087 done: 2088 if (headersent) { 2089 if (!compat) 2090 hdtr_size += headersize; 2091 } else { 2092 if (compat) 2093 sbytes -= headersize; 2094 } 2095 /* 2096 * If there was no error we have to clear td->td_retval[0] 2097 * because it may have been set by writev. 2098 */ 2099 if (error == 0) { 2100 td->td_retval[0] = 0; 2101 } 2102 if (uap->sbytes != NULL) { 2103 if (!compat) 2104 sbytes += hdtr_size; 2105 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2106 } 2107 if (vp) 2108 vrele(vp); 2109 if (so) 2110 fputsock(so); 2111 if (hdr_uio != NULL) 2112 free(hdr_uio, M_IOV); 2113 if (m_header) 2114 m_freem(m_header); 2115 2116 mtx_unlock(&Giant); 2117 2118 if (error == ERESTART) 2119 error = EINTR; 2120 2121 return (error); 2122 } 2123