1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_compat.h" 39 #include "opt_ktrace.h" 40 #include "opt_mac.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/mac.h> 47 #include <sys/mutex.h> 48 #include <sys/sysproto.h> 49 #include <sys/malloc.h> 50 #include <sys/filedesc.h> 51 #include <sys/event.h> 52 #include <sys/proc.h> 53 #include <sys/fcntl.h> 54 #include <sys/file.h> 55 #include <sys/filio.h> 56 #include <sys/mount.h> 57 #include <sys/mbuf.h> 58 #include <sys/protosw.h> 59 #include <sys/sf_buf.h> 60 #include <sys/socket.h> 61 #include <sys/socketvar.h> 62 #include <sys/signalvar.h> 63 #include <sys/syscallsubr.h> 64 #include <sys/sysctl.h> 65 #include <sys/uio.h> 66 #include <sys/vnode.h> 67 #ifdef KTRACE 68 #include <sys/ktrace.h> 69 #endif 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 #include <vm/vm_pageout.h> 75 #include <vm/vm_kern.h> 76 #include <vm/vm_extern.h> 77 78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 80 81 static int accept1(struct thread *td, struct accept_args *uap, int compat); 82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 83 static int getsockname1(struct thread *td, struct getsockname_args *uap, 84 int compat); 85 static int getpeername1(struct thread *td, struct getpeername_args *uap, 86 int compat); 87 88 /* 89 * NSFBUFS-related variables and associated sysctls 90 */ 91 int nsfbufs; 92 int nsfbufspeak; 93 int nsfbufsused; 94 95 SYSCTL_DECL(_kern_ipc); 96 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 97 "Maximum number of sendfile(2) sf_bufs available"); 98 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 99 "Number of sendfile(2) sf_bufs at peak usage"); 100 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 101 "Number of sendfile(2) sf_bufs in use"); 102 103 /* 104 * Convert a user file descriptor to a kernel file entry. A reference on the 105 * file entry is held upon returning. This is lighter weight than 106 * fgetsock(), which bumps the socket reference drops the file reference 107 * count instead, as this approach avoids several additional mutex operations 108 * associated with the additional reference count. 109 */ 110 static int 111 getsock(struct filedesc *fdp, int fd, struct file **fpp) 112 { 113 struct file *fp; 114 int error; 115 116 fp = NULL; 117 if (fdp == NULL) 118 error = EBADF; 119 else { 120 FILEDESC_LOCK_FAST(fdp); 121 fp = fget_locked(fdp, fd); 122 if (fp == NULL) 123 error = EBADF; 124 else if (fp->f_type != DTYPE_SOCKET) { 125 fp = NULL; 126 error = ENOTSOCK; 127 } else { 128 fhold(fp); 129 error = 0; 130 } 131 FILEDESC_UNLOCK_FAST(fdp); 132 } 133 *fpp = fp; 134 return (error); 135 } 136 137 /* 138 * System call interface to the socket abstraction. 139 */ 140 #if defined(COMPAT_43) 141 #define COMPAT_OLDSOCK 142 #endif 143 144 /* 145 * MPSAFE 146 */ 147 int 148 socket(td, uap) 149 struct thread *td; 150 register struct socket_args /* { 151 int domain; 152 int type; 153 int protocol; 154 } */ *uap; 155 { 156 struct filedesc *fdp; 157 struct socket *so; 158 struct file *fp; 159 int fd, error; 160 161 fdp = td->td_proc->p_fd; 162 error = falloc(td, &fp, &fd); 163 if (error) 164 return (error); 165 /* An extra reference on `fp' has been held for us by falloc(). */ 166 NET_LOCK_GIANT(); 167 error = socreate(uap->domain, &so, uap->type, uap->protocol, 168 td->td_ucred, td); 169 NET_UNLOCK_GIANT(); 170 if (error) { 171 fdclose(fdp, fp, fd, td); 172 } else { 173 FILEDESC_LOCK_FAST(fdp); 174 fp->f_data = so; /* already has ref count */ 175 fp->f_flag = FREAD|FWRITE; 176 fp->f_ops = &socketops; 177 fp->f_type = DTYPE_SOCKET; 178 FILEDESC_UNLOCK_FAST(fdp); 179 td->td_retval[0] = fd; 180 } 181 fdrop(fp, td); 182 return (error); 183 } 184 185 /* 186 * MPSAFE 187 */ 188 /* ARGSUSED */ 189 int 190 bind(td, uap) 191 struct thread *td; 192 register struct bind_args /* { 193 int s; 194 caddr_t name; 195 int namelen; 196 } */ *uap; 197 { 198 struct sockaddr *sa; 199 int error; 200 201 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 202 return (error); 203 204 return (kern_bind(td, uap->s, sa)); 205 } 206 207 int 208 kern_bind(td, fd, sa) 209 struct thread *td; 210 int fd; 211 struct sockaddr *sa; 212 { 213 struct socket *so; 214 struct file *fp; 215 int error; 216 217 NET_LOCK_GIANT(); 218 error = getsock(td->td_proc->p_fd, fd, &fp); 219 if (error) 220 goto done2; 221 so = fp->f_data; 222 #ifdef MAC 223 SOCK_LOCK(so); 224 error = mac_check_socket_bind(td->td_ucred, so, sa); 225 SOCK_UNLOCK(so); 226 if (error) 227 goto done1; 228 #endif 229 error = sobind(so, sa, td); 230 #ifdef MAC 231 done1: 232 #endif 233 fdrop(fp, td); 234 done2: 235 NET_UNLOCK_GIANT(); 236 FREE(sa, M_SONAME); 237 return (error); 238 } 239 240 /* 241 * MPSAFE 242 */ 243 /* ARGSUSED */ 244 int 245 listen(td, uap) 246 struct thread *td; 247 register struct listen_args /* { 248 int s; 249 int backlog; 250 } */ *uap; 251 { 252 struct socket *so; 253 struct file *fp; 254 int error; 255 256 NET_LOCK_GIANT(); 257 error = getsock(td->td_proc->p_fd, uap->s, &fp); 258 if (error == 0) { 259 so = fp->f_data; 260 #ifdef MAC 261 SOCK_LOCK(so); 262 error = mac_check_socket_listen(td->td_ucred, so); 263 SOCK_UNLOCK(so); 264 if (error) 265 goto done; 266 #endif 267 error = solisten(so, uap->backlog, td); 268 #ifdef MAC 269 done: 270 #endif 271 fdrop(fp, td); 272 } 273 NET_UNLOCK_GIANT(); 274 return(error); 275 } 276 277 /* 278 * accept1() 279 * MPSAFE 280 */ 281 static int 282 accept1(td, uap, compat) 283 struct thread *td; 284 register struct accept_args /* { 285 int s; 286 struct sockaddr * __restrict name; 287 socklen_t * __restrict anamelen; 288 } */ *uap; 289 int compat; 290 { 291 struct filedesc *fdp; 292 struct file *nfp = NULL; 293 struct sockaddr *sa = NULL; 294 socklen_t namelen; 295 int error; 296 struct socket *head, *so; 297 int fd; 298 u_int fflag; 299 pid_t pgid; 300 int tmp; 301 302 fdp = td->td_proc->p_fd; 303 if (uap->name) { 304 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 305 if(error) 306 return (error); 307 if (namelen < 0) 308 return (EINVAL); 309 } 310 NET_LOCK_GIANT(); 311 error = fgetsock(td, uap->s, &head, &fflag); 312 if (error) 313 goto done2; 314 if ((head->so_options & SO_ACCEPTCONN) == 0) { 315 error = EINVAL; 316 goto done; 317 } 318 #ifdef MAC 319 SOCK_LOCK(head); 320 error = mac_check_socket_accept(td->td_ucred, head); 321 SOCK_UNLOCK(head); 322 if (error != 0) 323 goto done; 324 #endif 325 error = falloc(td, &nfp, &fd); 326 if (error) 327 goto done; 328 ACCEPT_LOCK(); 329 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 330 ACCEPT_UNLOCK(); 331 error = EWOULDBLOCK; 332 goto noconnection; 333 } 334 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 335 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 336 head->so_error = ECONNABORTED; 337 break; 338 } 339 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 340 "accept", 0); 341 if (error) { 342 ACCEPT_UNLOCK(); 343 goto noconnection; 344 } 345 } 346 if (head->so_error) { 347 error = head->so_error; 348 head->so_error = 0; 349 ACCEPT_UNLOCK(); 350 goto noconnection; 351 } 352 so = TAILQ_FIRST(&head->so_comp); 353 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 354 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 355 356 /* 357 * Before changing the flags on the socket, we have to bump the 358 * reference count. Otherwise, if the protocol calls sofree(), 359 * the socket will be released due to a zero refcount. 360 */ 361 SOCK_LOCK(so); /* soref() and so_state update */ 362 soref(so); /* file descriptor reference */ 363 364 TAILQ_REMOVE(&head->so_comp, so, so_list); 365 head->so_qlen--; 366 so->so_state |= (head->so_state & SS_NBIO); 367 so->so_qstate &= ~SQ_COMP; 368 so->so_head = NULL; 369 370 SOCK_UNLOCK(so); 371 ACCEPT_UNLOCK(); 372 373 /* An extra reference on `nfp' has been held for us by falloc(). */ 374 td->td_retval[0] = fd; 375 376 /* connection has been removed from the listen queue */ 377 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 378 379 pgid = fgetown(&head->so_sigio); 380 if (pgid != 0) 381 fsetown(pgid, &so->so_sigio); 382 383 FILE_LOCK(nfp); 384 nfp->f_data = so; /* nfp has ref count from falloc */ 385 nfp->f_flag = fflag; 386 nfp->f_ops = &socketops; 387 nfp->f_type = DTYPE_SOCKET; 388 FILE_UNLOCK(nfp); 389 /* Sync socket nonblocking/async state with file flags */ 390 tmp = fflag & FNONBLOCK; 391 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 392 tmp = fflag & FASYNC; 393 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 394 sa = 0; 395 error = soaccept(so, &sa); 396 if (error) { 397 /* 398 * return a namelen of zero for older code which might 399 * ignore the return value from accept. 400 */ 401 if (uap->name != NULL) { 402 namelen = 0; 403 (void) copyout(&namelen, 404 uap->anamelen, sizeof(*uap->anamelen)); 405 } 406 goto noconnection; 407 } 408 if (sa == NULL) { 409 namelen = 0; 410 if (uap->name) 411 goto gotnoname; 412 error = 0; 413 goto done; 414 } 415 if (uap->name) { 416 /* check sa_len before it is destroyed */ 417 if (namelen > sa->sa_len) 418 namelen = sa->sa_len; 419 #ifdef COMPAT_OLDSOCK 420 if (compat) 421 ((struct osockaddr *)sa)->sa_family = 422 sa->sa_family; 423 #endif 424 error = copyout(sa, uap->name, (u_int)namelen); 425 if (!error) 426 gotnoname: 427 error = copyout(&namelen, 428 uap->anamelen, sizeof (*uap->anamelen)); 429 } 430 noconnection: 431 if (sa) 432 FREE(sa, M_SONAME); 433 434 /* 435 * close the new descriptor, assuming someone hasn't ripped it 436 * out from under us. 437 */ 438 if (error) 439 fdclose(fdp, nfp, fd, td); 440 441 /* 442 * Release explicitly held references before returning. 443 */ 444 done: 445 if (nfp != NULL) 446 fdrop(nfp, td); 447 fputsock(head); 448 done2: 449 NET_UNLOCK_GIANT(); 450 return (error); 451 } 452 453 /* 454 * MPSAFE (accept1() is MPSAFE) 455 */ 456 int 457 accept(td, uap) 458 struct thread *td; 459 struct accept_args *uap; 460 { 461 462 return (accept1(td, uap, 0)); 463 } 464 465 #ifdef COMPAT_OLDSOCK 466 /* 467 * MPSAFE (accept1() is MPSAFE) 468 */ 469 int 470 oaccept(td, uap) 471 struct thread *td; 472 struct accept_args *uap; 473 { 474 475 return (accept1(td, uap, 1)); 476 } 477 #endif /* COMPAT_OLDSOCK */ 478 479 /* 480 * MPSAFE 481 */ 482 /* ARGSUSED */ 483 int 484 connect(td, uap) 485 struct thread *td; 486 register struct connect_args /* { 487 int s; 488 caddr_t name; 489 int namelen; 490 } */ *uap; 491 { 492 struct sockaddr *sa; 493 int error; 494 495 error = getsockaddr(&sa, uap->name, uap->namelen); 496 if (error) 497 return (error); 498 499 return (kern_connect(td, uap->s, sa)); 500 } 501 502 503 int 504 kern_connect(td, fd, sa) 505 struct thread *td; 506 int fd; 507 struct sockaddr *sa; 508 { 509 struct socket *so; 510 struct file *fp; 511 int error; 512 int interrupted = 0; 513 514 NET_LOCK_GIANT(); 515 error = getsock(td->td_proc->p_fd, fd, &fp); 516 if (error) 517 goto done2; 518 so = fp->f_data; 519 if (so->so_state & SS_ISCONNECTING) { 520 error = EALREADY; 521 goto done1; 522 } 523 #ifdef MAC 524 SOCK_LOCK(so); 525 error = mac_check_socket_connect(td->td_ucred, so, sa); 526 SOCK_UNLOCK(so); 527 if (error) 528 goto bad; 529 #endif 530 error = soconnect(so, sa, td); 531 if (error) 532 goto bad; 533 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 534 error = EINPROGRESS; 535 goto done1; 536 } 537 SOCK_LOCK(so); 538 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 539 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 540 "connec", 0); 541 if (error) { 542 if (error == EINTR || error == ERESTART) 543 interrupted = 1; 544 break; 545 } 546 } 547 if (error == 0) { 548 error = so->so_error; 549 so->so_error = 0; 550 } 551 SOCK_UNLOCK(so); 552 bad: 553 if (!interrupted) 554 so->so_state &= ~SS_ISCONNECTING; 555 if (error == ERESTART) 556 error = EINTR; 557 done1: 558 fdrop(fp, td); 559 done2: 560 NET_UNLOCK_GIANT(); 561 FREE(sa, M_SONAME); 562 return (error); 563 } 564 565 /* 566 * MPSAFE 567 */ 568 int 569 socketpair(td, uap) 570 struct thread *td; 571 register struct socketpair_args /* { 572 int domain; 573 int type; 574 int protocol; 575 int *rsv; 576 } */ *uap; 577 { 578 register struct filedesc *fdp = td->td_proc->p_fd; 579 struct file *fp1, *fp2; 580 struct socket *so1, *so2; 581 int fd, error, sv[2]; 582 583 NET_LOCK_GIANT(); 584 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 585 td->td_ucred, td); 586 if (error) 587 goto done2; 588 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 589 td->td_ucred, td); 590 if (error) 591 goto free1; 592 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 593 error = falloc(td, &fp1, &fd); 594 if (error) 595 goto free2; 596 sv[0] = fd; 597 fp1->f_data = so1; /* so1 already has ref count */ 598 error = falloc(td, &fp2, &fd); 599 if (error) 600 goto free3; 601 fp2->f_data = so2; /* so2 already has ref count */ 602 sv[1] = fd; 603 error = soconnect2(so1, so2); 604 if (error) 605 goto free4; 606 if (uap->type == SOCK_DGRAM) { 607 /* 608 * Datagram socket connection is asymmetric. 609 */ 610 error = soconnect2(so2, so1); 611 if (error) 612 goto free4; 613 } 614 FILE_LOCK(fp1); 615 fp1->f_flag = FREAD|FWRITE; 616 fp1->f_ops = &socketops; 617 fp1->f_type = DTYPE_SOCKET; 618 FILE_UNLOCK(fp1); 619 FILE_LOCK(fp2); 620 fp2->f_flag = FREAD|FWRITE; 621 fp2->f_ops = &socketops; 622 fp2->f_type = DTYPE_SOCKET; 623 FILE_UNLOCK(fp2); 624 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 625 fdrop(fp1, td); 626 fdrop(fp2, td); 627 goto done2; 628 free4: 629 fdclose(fdp, fp2, sv[1], td); 630 fdrop(fp2, td); 631 free3: 632 fdclose(fdp, fp1, sv[0], td); 633 fdrop(fp1, td); 634 free2: 635 (void)soclose(so2); 636 free1: 637 (void)soclose(so1); 638 done2: 639 NET_UNLOCK_GIANT(); 640 return (error); 641 } 642 643 static int 644 sendit(td, s, mp, flags) 645 register struct thread *td; 646 int s; 647 register struct msghdr *mp; 648 int flags; 649 { 650 struct mbuf *control; 651 struct sockaddr *to; 652 int error; 653 654 if (mp->msg_name != NULL) { 655 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 656 if (error) { 657 to = NULL; 658 goto bad; 659 } 660 mp->msg_name = to; 661 } else { 662 to = NULL; 663 } 664 665 if (mp->msg_control) { 666 if (mp->msg_controllen < sizeof(struct cmsghdr) 667 #ifdef COMPAT_OLDSOCK 668 && mp->msg_flags != MSG_COMPAT 669 #endif 670 ) { 671 error = EINVAL; 672 goto bad; 673 } 674 error = sockargs(&control, mp->msg_control, 675 mp->msg_controllen, MT_CONTROL); 676 if (error) 677 goto bad; 678 #ifdef COMPAT_OLDSOCK 679 if (mp->msg_flags == MSG_COMPAT) { 680 register struct cmsghdr *cm; 681 682 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 683 if (control == 0) { 684 error = ENOBUFS; 685 goto bad; 686 } else { 687 cm = mtod(control, struct cmsghdr *); 688 cm->cmsg_len = control->m_len; 689 cm->cmsg_level = SOL_SOCKET; 690 cm->cmsg_type = SCM_RIGHTS; 691 } 692 } 693 #endif 694 } else { 695 control = NULL; 696 } 697 698 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 699 700 bad: 701 if (to) 702 FREE(to, M_SONAME); 703 return (error); 704 } 705 706 int 707 kern_sendit(td, s, mp, flags, control, segflg) 708 struct thread *td; 709 int s; 710 struct msghdr *mp; 711 int flags; 712 struct mbuf *control; 713 enum uio_seg segflg; 714 { 715 struct file *fp; 716 struct uio auio; 717 struct iovec *iov; 718 struct socket *so; 719 int i; 720 int len, error; 721 #ifdef KTRACE 722 struct uio *ktruio = NULL; 723 #endif 724 725 NET_LOCK_GIANT(); 726 error = getsock(td->td_proc->p_fd, s, &fp); 727 if (error) 728 goto bad2; 729 so = (struct socket *)fp->f_data; 730 731 #ifdef MAC 732 SOCK_LOCK(so); 733 error = mac_check_socket_send(td->td_ucred, so); 734 SOCK_UNLOCK(so); 735 if (error) 736 goto bad; 737 #endif 738 739 auio.uio_iov = mp->msg_iov; 740 auio.uio_iovcnt = mp->msg_iovlen; 741 auio.uio_segflg = segflg; 742 auio.uio_rw = UIO_WRITE; 743 auio.uio_td = td; 744 auio.uio_offset = 0; /* XXX */ 745 auio.uio_resid = 0; 746 iov = mp->msg_iov; 747 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 748 if ((auio.uio_resid += iov->iov_len) < 0) { 749 error = EINVAL; 750 goto bad; 751 } 752 } 753 #ifdef KTRACE 754 if (KTRPOINT(td, KTR_GENIO)) 755 ktruio = cloneuio(&auio); 756 #endif 757 len = auio.uio_resid; 758 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 759 0, control, flags, td); 760 if (error) { 761 if (auio.uio_resid != len && (error == ERESTART || 762 error == EINTR || error == EWOULDBLOCK)) 763 error = 0; 764 /* Generation of SIGPIPE can be controlled per socket */ 765 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 766 !(flags & MSG_NOSIGNAL)) { 767 PROC_LOCK(td->td_proc); 768 psignal(td->td_proc, SIGPIPE); 769 PROC_UNLOCK(td->td_proc); 770 } 771 } 772 if (error == 0) 773 td->td_retval[0] = len - auio.uio_resid; 774 #ifdef KTRACE 775 if (ktruio != NULL) { 776 ktruio->uio_resid = td->td_retval[0]; 777 ktrgenio(s, UIO_WRITE, ktruio, error); 778 } 779 #endif 780 bad: 781 fdrop(fp, td); 782 bad2: 783 NET_UNLOCK_GIANT(); 784 return (error); 785 } 786 787 /* 788 * MPSAFE 789 */ 790 int 791 sendto(td, uap) 792 struct thread *td; 793 register struct sendto_args /* { 794 int s; 795 caddr_t buf; 796 size_t len; 797 int flags; 798 caddr_t to; 799 int tolen; 800 } */ *uap; 801 { 802 struct msghdr msg; 803 struct iovec aiov; 804 int error; 805 806 msg.msg_name = uap->to; 807 msg.msg_namelen = uap->tolen; 808 msg.msg_iov = &aiov; 809 msg.msg_iovlen = 1; 810 msg.msg_control = 0; 811 #ifdef COMPAT_OLDSOCK 812 msg.msg_flags = 0; 813 #endif 814 aiov.iov_base = uap->buf; 815 aiov.iov_len = uap->len; 816 error = sendit(td, uap->s, &msg, uap->flags); 817 return (error); 818 } 819 820 #ifdef COMPAT_OLDSOCK 821 /* 822 * MPSAFE 823 */ 824 int 825 osend(td, uap) 826 struct thread *td; 827 register struct osend_args /* { 828 int s; 829 caddr_t buf; 830 int len; 831 int flags; 832 } */ *uap; 833 { 834 struct msghdr msg; 835 struct iovec aiov; 836 int error; 837 838 msg.msg_name = 0; 839 msg.msg_namelen = 0; 840 msg.msg_iov = &aiov; 841 msg.msg_iovlen = 1; 842 aiov.iov_base = uap->buf; 843 aiov.iov_len = uap->len; 844 msg.msg_control = 0; 845 msg.msg_flags = 0; 846 error = sendit(td, uap->s, &msg, uap->flags); 847 return (error); 848 } 849 850 /* 851 * MPSAFE 852 */ 853 int 854 osendmsg(td, uap) 855 struct thread *td; 856 struct osendmsg_args /* { 857 int s; 858 caddr_t msg; 859 int flags; 860 } */ *uap; 861 { 862 struct msghdr msg; 863 struct iovec *iov; 864 int error; 865 866 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 867 if (error) 868 return (error); 869 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 870 if (error) 871 return (error); 872 msg.msg_iov = iov; 873 msg.msg_flags = MSG_COMPAT; 874 error = sendit(td, uap->s, &msg, uap->flags); 875 free(iov, M_IOV); 876 return (error); 877 } 878 #endif 879 880 /* 881 * MPSAFE 882 */ 883 int 884 sendmsg(td, uap) 885 struct thread *td; 886 struct sendmsg_args /* { 887 int s; 888 caddr_t msg; 889 int flags; 890 } */ *uap; 891 { 892 struct msghdr msg; 893 struct iovec *iov; 894 int error; 895 896 error = copyin(uap->msg, &msg, sizeof (msg)); 897 if (error) 898 return (error); 899 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 900 if (error) 901 return (error); 902 msg.msg_iov = iov; 903 #ifdef COMPAT_OLDSOCK 904 msg.msg_flags = 0; 905 #endif 906 error = sendit(td, uap->s, &msg, uap->flags); 907 free(iov, M_IOV); 908 return (error); 909 } 910 911 static int 912 recvit(td, s, mp, namelenp) 913 struct thread *td; 914 int s; 915 struct msghdr *mp; 916 void *namelenp; 917 { 918 struct uio auio; 919 struct iovec *iov; 920 int i; 921 socklen_t len; 922 int error; 923 struct mbuf *m, *control = 0; 924 caddr_t ctlbuf; 925 struct file *fp; 926 struct socket *so; 927 struct sockaddr *fromsa = 0; 928 #ifdef KTRACE 929 struct uio *ktruio = NULL; 930 #endif 931 932 NET_LOCK_GIANT(); 933 error = getsock(td->td_proc->p_fd, s, &fp); 934 if (error) { 935 NET_UNLOCK_GIANT(); 936 return (error); 937 } 938 so = fp->f_data; 939 940 #ifdef MAC 941 SOCK_LOCK(so); 942 error = mac_check_socket_receive(td->td_ucred, so); 943 SOCK_UNLOCK(so); 944 if (error) { 945 fdrop(fp, td); 946 NET_UNLOCK_GIANT(); 947 return (error); 948 } 949 #endif 950 951 auio.uio_iov = mp->msg_iov; 952 auio.uio_iovcnt = mp->msg_iovlen; 953 auio.uio_segflg = UIO_USERSPACE; 954 auio.uio_rw = UIO_READ; 955 auio.uio_td = td; 956 auio.uio_offset = 0; /* XXX */ 957 auio.uio_resid = 0; 958 iov = mp->msg_iov; 959 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 960 if ((auio.uio_resid += iov->iov_len) < 0) { 961 fdrop(fp, td); 962 NET_UNLOCK_GIANT(); 963 return (EINVAL); 964 } 965 } 966 #ifdef KTRACE 967 if (KTRPOINT(td, KTR_GENIO)) 968 ktruio = cloneuio(&auio); 969 #endif 970 len = auio.uio_resid; 971 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 972 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 973 &mp->msg_flags); 974 if (error) { 975 if (auio.uio_resid != (int)len && (error == ERESTART || 976 error == EINTR || error == EWOULDBLOCK)) 977 error = 0; 978 } 979 #ifdef KTRACE 980 if (ktruio != NULL) { 981 ktruio->uio_resid = (int)len - auio.uio_resid; 982 ktrgenio(s, UIO_READ, ktruio, error); 983 } 984 #endif 985 if (error) 986 goto out; 987 td->td_retval[0] = (int)len - auio.uio_resid; 988 if (mp->msg_name) { 989 len = mp->msg_namelen; 990 if (len <= 0 || fromsa == 0) 991 len = 0; 992 else { 993 /* save sa_len before it is destroyed by MSG_COMPAT */ 994 len = MIN(len, fromsa->sa_len); 995 #ifdef COMPAT_OLDSOCK 996 if (mp->msg_flags & MSG_COMPAT) 997 ((struct osockaddr *)fromsa)->sa_family = 998 fromsa->sa_family; 999 #endif 1000 error = copyout(fromsa, mp->msg_name, (unsigned)len); 1001 if (error) 1002 goto out; 1003 } 1004 mp->msg_namelen = len; 1005 if (namelenp && 1006 (error = copyout(&len, namelenp, sizeof (socklen_t)))) { 1007 #ifdef COMPAT_OLDSOCK 1008 if (mp->msg_flags & MSG_COMPAT) 1009 error = 0; /* old recvfrom didn't check */ 1010 else 1011 #endif 1012 goto out; 1013 } 1014 } 1015 if (mp->msg_control) { 1016 #ifdef COMPAT_OLDSOCK 1017 /* 1018 * We assume that old recvmsg calls won't receive access 1019 * rights and other control info, esp. as control info 1020 * is always optional and those options didn't exist in 4.3. 1021 * If we receive rights, trim the cmsghdr; anything else 1022 * is tossed. 1023 */ 1024 if (control && mp->msg_flags & MSG_COMPAT) { 1025 if (mtod(control, struct cmsghdr *)->cmsg_level != 1026 SOL_SOCKET || 1027 mtod(control, struct cmsghdr *)->cmsg_type != 1028 SCM_RIGHTS) { 1029 mp->msg_controllen = 0; 1030 goto out; 1031 } 1032 control->m_len -= sizeof (struct cmsghdr); 1033 control->m_data += sizeof (struct cmsghdr); 1034 } 1035 #endif 1036 len = mp->msg_controllen; 1037 m = control; 1038 mp->msg_controllen = 0; 1039 ctlbuf = mp->msg_control; 1040 1041 while (m && len > 0) { 1042 unsigned int tocopy; 1043 1044 if (len >= m->m_len) 1045 tocopy = m->m_len; 1046 else { 1047 mp->msg_flags |= MSG_CTRUNC; 1048 tocopy = len; 1049 } 1050 1051 if ((error = copyout(mtod(m, caddr_t), 1052 ctlbuf, tocopy)) != 0) 1053 goto out; 1054 1055 ctlbuf += tocopy; 1056 len -= tocopy; 1057 m = m->m_next; 1058 } 1059 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1060 } 1061 out: 1062 fdrop(fp, td); 1063 NET_UNLOCK_GIANT(); 1064 if (fromsa) 1065 FREE(fromsa, M_SONAME); 1066 if (control) 1067 m_freem(control); 1068 return (error); 1069 } 1070 1071 /* 1072 * MPSAFE 1073 */ 1074 int 1075 recvfrom(td, uap) 1076 struct thread *td; 1077 register struct recvfrom_args /* { 1078 int s; 1079 caddr_t buf; 1080 size_t len; 1081 int flags; 1082 struct sockaddr * __restrict from; 1083 socklen_t * __restrict fromlenaddr; 1084 } */ *uap; 1085 { 1086 struct msghdr msg; 1087 struct iovec aiov; 1088 int error; 1089 1090 if (uap->fromlenaddr) { 1091 error = copyin(uap->fromlenaddr, 1092 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1093 if (error) 1094 goto done2; 1095 } else { 1096 msg.msg_namelen = 0; 1097 } 1098 msg.msg_name = uap->from; 1099 msg.msg_iov = &aiov; 1100 msg.msg_iovlen = 1; 1101 aiov.iov_base = uap->buf; 1102 aiov.iov_len = uap->len; 1103 msg.msg_control = 0; 1104 msg.msg_flags = uap->flags; 1105 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1106 done2: 1107 return(error); 1108 } 1109 1110 #ifdef COMPAT_OLDSOCK 1111 /* 1112 * MPSAFE 1113 */ 1114 int 1115 orecvfrom(td, uap) 1116 struct thread *td; 1117 struct recvfrom_args *uap; 1118 { 1119 1120 uap->flags |= MSG_COMPAT; 1121 return (recvfrom(td, uap)); 1122 } 1123 #endif 1124 1125 1126 #ifdef COMPAT_OLDSOCK 1127 /* 1128 * MPSAFE 1129 */ 1130 int 1131 orecv(td, uap) 1132 struct thread *td; 1133 register struct orecv_args /* { 1134 int s; 1135 caddr_t buf; 1136 int len; 1137 int flags; 1138 } */ *uap; 1139 { 1140 struct msghdr msg; 1141 struct iovec aiov; 1142 int error; 1143 1144 msg.msg_name = 0; 1145 msg.msg_namelen = 0; 1146 msg.msg_iov = &aiov; 1147 msg.msg_iovlen = 1; 1148 aiov.iov_base = uap->buf; 1149 aiov.iov_len = uap->len; 1150 msg.msg_control = 0; 1151 msg.msg_flags = uap->flags; 1152 error = recvit(td, uap->s, &msg, NULL); 1153 return (error); 1154 } 1155 1156 /* 1157 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1158 * overlays the new one, missing only the flags, and with the (old) access 1159 * rights where the control fields are now. 1160 * 1161 * MPSAFE 1162 */ 1163 int 1164 orecvmsg(td, uap) 1165 struct thread *td; 1166 struct orecvmsg_args /* { 1167 int s; 1168 struct omsghdr *msg; 1169 int flags; 1170 } */ *uap; 1171 { 1172 struct msghdr msg; 1173 struct iovec *iov; 1174 int error; 1175 1176 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1177 if (error) 1178 return (error); 1179 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1180 if (error) 1181 return (error); 1182 msg.msg_flags = uap->flags | MSG_COMPAT; 1183 msg.msg_iov = iov; 1184 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1185 if (msg.msg_controllen && error == 0) 1186 error = copyout(&msg.msg_controllen, 1187 &uap->msg->msg_accrightslen, sizeof (int)); 1188 free(iov, M_IOV); 1189 return (error); 1190 } 1191 #endif 1192 1193 /* 1194 * MPSAFE 1195 */ 1196 int 1197 recvmsg(td, uap) 1198 struct thread *td; 1199 struct recvmsg_args /* { 1200 int s; 1201 struct msghdr *msg; 1202 int flags; 1203 } */ *uap; 1204 { 1205 struct msghdr msg; 1206 struct iovec *uiov, *iov; 1207 int error; 1208 1209 error = copyin(uap->msg, &msg, sizeof (msg)); 1210 if (error) 1211 return (error); 1212 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1213 if (error) 1214 return (error); 1215 msg.msg_flags = uap->flags; 1216 #ifdef COMPAT_OLDSOCK 1217 msg.msg_flags &= ~MSG_COMPAT; 1218 #endif 1219 uiov = msg.msg_iov; 1220 msg.msg_iov = iov; 1221 error = recvit(td, uap->s, &msg, NULL); 1222 if (error == 0) { 1223 msg.msg_iov = uiov; 1224 error = copyout(&msg, uap->msg, sizeof(msg)); 1225 } 1226 free(iov, M_IOV); 1227 return (error); 1228 } 1229 1230 /* 1231 * MPSAFE 1232 */ 1233 /* ARGSUSED */ 1234 int 1235 shutdown(td, uap) 1236 struct thread *td; 1237 register struct shutdown_args /* { 1238 int s; 1239 int how; 1240 } */ *uap; 1241 { 1242 struct socket *so; 1243 struct file *fp; 1244 int error; 1245 1246 NET_LOCK_GIANT(); 1247 error = getsock(td->td_proc->p_fd, uap->s, &fp); 1248 if (error == 0) { 1249 so = fp->f_data; 1250 error = soshutdown(so, uap->how); 1251 fdrop(fp, td); 1252 } 1253 NET_UNLOCK_GIANT(); 1254 return (error); 1255 } 1256 1257 /* 1258 * MPSAFE 1259 */ 1260 /* ARGSUSED */ 1261 int 1262 setsockopt(td, uap) 1263 struct thread *td; 1264 register struct setsockopt_args /* { 1265 int s; 1266 int level; 1267 int name; 1268 caddr_t val; 1269 int valsize; 1270 } */ *uap; 1271 { 1272 1273 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1274 uap->val, UIO_USERSPACE, uap->valsize)); 1275 } 1276 1277 int 1278 kern_setsockopt(td, s, level, name, val, valseg, valsize) 1279 struct thread *td; 1280 int s; 1281 int level; 1282 int name; 1283 void *val; 1284 enum uio_seg valseg; 1285 socklen_t valsize; 1286 { 1287 int error; 1288 struct socket *so; 1289 struct file *fp; 1290 struct sockopt sopt; 1291 1292 if (val == NULL && valsize != 0) 1293 return (EFAULT); 1294 if (valsize < 0) 1295 return (EINVAL); 1296 1297 sopt.sopt_dir = SOPT_SET; 1298 sopt.sopt_level = level; 1299 sopt.sopt_name = name; 1300 sopt.sopt_val = val; 1301 sopt.sopt_valsize = valsize; 1302 switch (valseg) { 1303 case UIO_USERSPACE: 1304 sopt.sopt_td = td; 1305 break; 1306 case UIO_SYSSPACE: 1307 sopt.sopt_td = NULL; 1308 break; 1309 default: 1310 panic("kern_setsockopt called with bad valseg"); 1311 } 1312 1313 NET_LOCK_GIANT(); 1314 error = getsock(td->td_proc->p_fd, s, &fp); 1315 if (error == 0) { 1316 so = fp->f_data; 1317 error = sosetopt(so, &sopt); 1318 fdrop(fp, td); 1319 } 1320 NET_UNLOCK_GIANT(); 1321 return(error); 1322 } 1323 1324 /* 1325 * MPSAFE 1326 */ 1327 /* ARGSUSED */ 1328 int 1329 getsockopt(td, uap) 1330 struct thread *td; 1331 register struct getsockopt_args /* { 1332 int s; 1333 int level; 1334 int name; 1335 void * __restrict val; 1336 socklen_t * __restrict avalsize; 1337 } */ *uap; 1338 { 1339 socklen_t valsize; 1340 int error; 1341 1342 if (uap->val) { 1343 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1344 if (error) 1345 return (error); 1346 } 1347 1348 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1349 uap->val, UIO_USERSPACE, &valsize); 1350 1351 if (error == 0) 1352 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1353 return (error); 1354 } 1355 1356 /* 1357 * Kernel version of getsockopt. 1358 * optval can be a userland or userspace. optlen is always a kernel pointer. 1359 */ 1360 int 1361 kern_getsockopt(td, s, level, name, val, valseg, valsize) 1362 struct thread *td; 1363 int s; 1364 int level; 1365 int name; 1366 void *val; 1367 enum uio_seg valseg; 1368 socklen_t *valsize; 1369 { 1370 int error; 1371 struct socket *so; 1372 struct file *fp; 1373 struct sockopt sopt; 1374 1375 if (val == NULL) 1376 *valsize = 0; 1377 if (*valsize < 0) 1378 return (EINVAL); 1379 1380 sopt.sopt_dir = SOPT_GET; 1381 sopt.sopt_level = level; 1382 sopt.sopt_name = name; 1383 sopt.sopt_val = val; 1384 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1385 switch (valseg) { 1386 case UIO_USERSPACE: 1387 sopt.sopt_td = td; 1388 break; 1389 case UIO_SYSSPACE: 1390 sopt.sopt_td = NULL; 1391 break; 1392 default: 1393 panic("kern_getsockopt called with bad valseg"); 1394 } 1395 1396 NET_LOCK_GIANT(); 1397 error = getsock(td->td_proc->p_fd, s, &fp); 1398 if (error == 0) { 1399 so = fp->f_data; 1400 error = sogetopt(so, &sopt); 1401 *valsize = sopt.sopt_valsize; 1402 fdrop(fp, td); 1403 } 1404 NET_UNLOCK_GIANT(); 1405 return (error); 1406 } 1407 1408 /* 1409 * getsockname1() - Get socket name. 1410 * 1411 * MPSAFE 1412 */ 1413 /* ARGSUSED */ 1414 static int 1415 getsockname1(td, uap, compat) 1416 struct thread *td; 1417 register struct getsockname_args /* { 1418 int fdes; 1419 struct sockaddr * __restrict asa; 1420 socklen_t * __restrict alen; 1421 } */ *uap; 1422 int compat; 1423 { 1424 struct socket *so; 1425 struct sockaddr *sa; 1426 struct file *fp; 1427 socklen_t len; 1428 int error; 1429 1430 NET_LOCK_GIANT(); 1431 error = getsock(td->td_proc->p_fd, uap->fdes, &fp); 1432 if (error) 1433 goto done2; 1434 so = fp->f_data; 1435 error = copyin(uap->alen, &len, sizeof (len)); 1436 if (error) 1437 goto done1; 1438 if (len < 0) { 1439 error = EINVAL; 1440 goto done1; 1441 } 1442 sa = 0; 1443 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1444 if (error) 1445 goto bad; 1446 if (sa == 0) { 1447 len = 0; 1448 goto gotnothing; 1449 } 1450 1451 len = MIN(len, sa->sa_len); 1452 #ifdef COMPAT_OLDSOCK 1453 if (compat) 1454 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1455 #endif 1456 error = copyout(sa, uap->asa, (u_int)len); 1457 if (error == 0) 1458 gotnothing: 1459 error = copyout(&len, uap->alen, sizeof (len)); 1460 bad: 1461 if (sa) 1462 FREE(sa, M_SONAME); 1463 done1: 1464 fdrop(fp, td); 1465 done2: 1466 NET_UNLOCK_GIANT(); 1467 return (error); 1468 } 1469 1470 /* 1471 * MPSAFE 1472 */ 1473 int 1474 getsockname(td, uap) 1475 struct thread *td; 1476 struct getsockname_args *uap; 1477 { 1478 1479 return (getsockname1(td, uap, 0)); 1480 } 1481 1482 #ifdef COMPAT_OLDSOCK 1483 /* 1484 * MPSAFE 1485 */ 1486 int 1487 ogetsockname(td, uap) 1488 struct thread *td; 1489 struct getsockname_args *uap; 1490 { 1491 1492 return (getsockname1(td, uap, 1)); 1493 } 1494 #endif /* COMPAT_OLDSOCK */ 1495 1496 /* 1497 * getpeername1() - Get name of peer for connected socket. 1498 * 1499 * MPSAFE 1500 */ 1501 /* ARGSUSED */ 1502 static int 1503 getpeername1(td, uap, compat) 1504 struct thread *td; 1505 register struct getpeername_args /* { 1506 int fdes; 1507 struct sockaddr * __restrict asa; 1508 socklen_t * __restrict alen; 1509 } */ *uap; 1510 int compat; 1511 { 1512 struct socket *so; 1513 struct sockaddr *sa; 1514 struct file *fp; 1515 socklen_t len; 1516 int error; 1517 1518 NET_LOCK_GIANT(); 1519 error = getsock(td->td_proc->p_fd, uap->fdes, &fp); 1520 if (error) 1521 goto done2; 1522 so = fp->f_data; 1523 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1524 error = ENOTCONN; 1525 goto done1; 1526 } 1527 error = copyin(uap->alen, &len, sizeof (len)); 1528 if (error) 1529 goto done1; 1530 if (len < 0) { 1531 error = EINVAL; 1532 goto done1; 1533 } 1534 sa = 0; 1535 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1536 if (error) 1537 goto bad; 1538 if (sa == 0) { 1539 len = 0; 1540 goto gotnothing; 1541 } 1542 len = MIN(len, sa->sa_len); 1543 #ifdef COMPAT_OLDSOCK 1544 if (compat) 1545 ((struct osockaddr *)sa)->sa_family = 1546 sa->sa_family; 1547 #endif 1548 error = copyout(sa, uap->asa, (u_int)len); 1549 if (error) 1550 goto bad; 1551 gotnothing: 1552 error = copyout(&len, uap->alen, sizeof (len)); 1553 bad: 1554 if (sa) 1555 FREE(sa, M_SONAME); 1556 done1: 1557 fdrop(fp, td); 1558 done2: 1559 NET_UNLOCK_GIANT(); 1560 return (error); 1561 } 1562 1563 /* 1564 * MPSAFE 1565 */ 1566 int 1567 getpeername(td, uap) 1568 struct thread *td; 1569 struct getpeername_args *uap; 1570 { 1571 1572 return (getpeername1(td, uap, 0)); 1573 } 1574 1575 #ifdef COMPAT_OLDSOCK 1576 /* 1577 * MPSAFE 1578 */ 1579 int 1580 ogetpeername(td, uap) 1581 struct thread *td; 1582 struct ogetpeername_args *uap; 1583 { 1584 1585 /* XXX uap should have type `getpeername_args *' to begin with. */ 1586 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1587 } 1588 #endif /* COMPAT_OLDSOCK */ 1589 1590 int 1591 sockargs(mp, buf, buflen, type) 1592 struct mbuf **mp; 1593 caddr_t buf; 1594 int buflen, type; 1595 { 1596 register struct sockaddr *sa; 1597 register struct mbuf *m; 1598 int error; 1599 1600 if ((u_int)buflen > MLEN) { 1601 #ifdef COMPAT_OLDSOCK 1602 if (type == MT_SONAME && (u_int)buflen <= 112) 1603 buflen = MLEN; /* unix domain compat. hack */ 1604 else 1605 #endif 1606 if ((u_int)buflen > MCLBYTES) 1607 return (EINVAL); 1608 } 1609 m = m_get(M_TRYWAIT, type); 1610 if (m == NULL) 1611 return (ENOBUFS); 1612 if ((u_int)buflen > MLEN) { 1613 MCLGET(m, M_TRYWAIT); 1614 if ((m->m_flags & M_EXT) == 0) { 1615 m_free(m); 1616 return (ENOBUFS); 1617 } 1618 } 1619 m->m_len = buflen; 1620 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1621 if (error) 1622 (void) m_free(m); 1623 else { 1624 *mp = m; 1625 if (type == MT_SONAME) { 1626 sa = mtod(m, struct sockaddr *); 1627 1628 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1629 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1630 sa->sa_family = sa->sa_len; 1631 #endif 1632 sa->sa_len = buflen; 1633 } 1634 } 1635 return (error); 1636 } 1637 1638 int 1639 getsockaddr(namp, uaddr, len) 1640 struct sockaddr **namp; 1641 caddr_t uaddr; 1642 size_t len; 1643 { 1644 struct sockaddr *sa; 1645 int error; 1646 1647 if (len > SOCK_MAXADDRLEN) 1648 return (ENAMETOOLONG); 1649 if (len < offsetof(struct sockaddr, sa_data[0])) 1650 return (EINVAL); 1651 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1652 error = copyin(uaddr, sa, len); 1653 if (error) { 1654 FREE(sa, M_SONAME); 1655 } else { 1656 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1657 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1658 sa->sa_family = sa->sa_len; 1659 #endif 1660 sa->sa_len = len; 1661 *namp = sa; 1662 } 1663 return (error); 1664 } 1665 1666 /* 1667 * Detach mapped page and release resources back to the system. 1668 */ 1669 void 1670 sf_buf_mext(void *addr, void *args) 1671 { 1672 vm_page_t m; 1673 1674 m = sf_buf_page(args); 1675 sf_buf_free(args); 1676 vm_page_lock_queues(); 1677 vm_page_unwire(m, 0); 1678 /* 1679 * Check for the object going away on us. This can 1680 * happen since we don't hold a reference to it. 1681 * If so, we're responsible for freeing the page. 1682 */ 1683 if (m->wire_count == 0 && m->object == NULL) 1684 vm_page_free(m); 1685 vm_page_unlock_queues(); 1686 } 1687 1688 /* 1689 * sendfile(2) 1690 * 1691 * MPSAFE 1692 * 1693 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1694 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1695 * 1696 * Send a file specified by 'fd' and starting at 'offset' to a socket 1697 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1698 * nbytes == 0. Optionally add a header and/or trailer to the socket 1699 * output. If specified, write the total number of bytes sent into *sbytes. 1700 * 1701 */ 1702 int 1703 sendfile(struct thread *td, struct sendfile_args *uap) 1704 { 1705 1706 return (do_sendfile(td, uap, 0)); 1707 } 1708 1709 #ifdef COMPAT_FREEBSD4 1710 int 1711 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1712 { 1713 struct sendfile_args args; 1714 1715 args.fd = uap->fd; 1716 args.s = uap->s; 1717 args.offset = uap->offset; 1718 args.nbytes = uap->nbytes; 1719 args.hdtr = uap->hdtr; 1720 args.sbytes = uap->sbytes; 1721 args.flags = uap->flags; 1722 1723 return (do_sendfile(td, &args, 1)); 1724 } 1725 #endif /* COMPAT_FREEBSD4 */ 1726 1727 static int 1728 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1729 { 1730 struct vnode *vp; 1731 struct vm_object *obj; 1732 struct socket *so = NULL; 1733 struct mbuf *m, *m_header = NULL; 1734 struct sf_buf *sf; 1735 struct vm_page *pg; 1736 struct writev_args nuap; 1737 struct sf_hdtr hdtr; 1738 struct uio *hdr_uio = NULL; 1739 off_t off, xfsize, hdtr_size, sbytes = 0; 1740 int error, headersize = 0, headersent = 0; 1741 1742 mtx_lock(&Giant); 1743 1744 hdtr_size = 0; 1745 1746 /* 1747 * The descriptor must be a regular file and have a backing VM object. 1748 */ 1749 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1750 goto done; 1751 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1752 obj = vp->v_object; 1753 VOP_UNLOCK(vp, 0, td); 1754 if (obj == NULL) { 1755 error = EINVAL; 1756 goto done; 1757 } 1758 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1759 goto done; 1760 if (so->so_type != SOCK_STREAM) { 1761 error = EINVAL; 1762 goto done; 1763 } 1764 if ((so->so_state & SS_ISCONNECTED) == 0) { 1765 error = ENOTCONN; 1766 goto done; 1767 } 1768 if (uap->offset < 0) { 1769 error = EINVAL; 1770 goto done; 1771 } 1772 1773 #ifdef MAC 1774 SOCK_LOCK(so); 1775 error = mac_check_socket_send(td->td_ucred, so); 1776 SOCK_UNLOCK(so); 1777 if (error) 1778 goto done; 1779 #endif 1780 1781 /* 1782 * If specified, get the pointer to the sf_hdtr struct for 1783 * any headers/trailers. 1784 */ 1785 if (uap->hdtr != NULL) { 1786 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1787 if (error) 1788 goto done; 1789 /* 1790 * Send any headers. 1791 */ 1792 if (hdtr.headers != NULL) { 1793 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1794 if (error) 1795 goto done; 1796 hdr_uio->uio_td = td; 1797 hdr_uio->uio_rw = UIO_WRITE; 1798 if (hdr_uio->uio_resid > 0) { 1799 m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0); 1800 if (m_header == NULL) 1801 goto done; 1802 headersize = m_header->m_pkthdr.len; 1803 if (compat) 1804 sbytes += headersize; 1805 } 1806 } 1807 } 1808 1809 /* 1810 * Protect against multiple writers to the socket. 1811 */ 1812 SOCKBUF_LOCK(&so->so_snd); 1813 (void) sblock(&so->so_snd, M_WAITOK); 1814 SOCKBUF_UNLOCK(&so->so_snd); 1815 1816 /* 1817 * Loop through the pages in the file, starting with the requested 1818 * offset. Get a file page (do I/O if necessary), map the file page 1819 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1820 * it on the socket. 1821 */ 1822 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1823 vm_pindex_t pindex; 1824 vm_offset_t pgoff; 1825 1826 pindex = OFF_TO_IDX(off); 1827 VM_OBJECT_LOCK(obj); 1828 retry_lookup: 1829 /* 1830 * Calculate the amount to transfer. Not to exceed a page, 1831 * the EOF, or the passed in nbytes. 1832 */ 1833 xfsize = obj->un_pager.vnp.vnp_size - off; 1834 VM_OBJECT_UNLOCK(obj); 1835 if (xfsize > PAGE_SIZE) 1836 xfsize = PAGE_SIZE; 1837 pgoff = (vm_offset_t)(off & PAGE_MASK); 1838 if (PAGE_SIZE - pgoff < xfsize) 1839 xfsize = PAGE_SIZE - pgoff; 1840 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1841 xfsize = uap->nbytes - sbytes; 1842 if (xfsize <= 0) { 1843 if (m_header != NULL) { 1844 m = m_header; 1845 m_header = NULL; 1846 SOCKBUF_LOCK(&so->so_snd); 1847 goto retry_space; 1848 } else 1849 break; 1850 } 1851 /* 1852 * Optimize the non-blocking case by looking at the socket space 1853 * before going to the extra work of constituting the sf_buf. 1854 */ 1855 SOCKBUF_LOCK(&so->so_snd); 1856 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1857 if (so->so_snd.sb_state & SBS_CANTSENDMORE) 1858 error = EPIPE; 1859 else 1860 error = EAGAIN; 1861 sbunlock(&so->so_snd); 1862 SOCKBUF_UNLOCK(&so->so_snd); 1863 goto done; 1864 } 1865 SOCKBUF_UNLOCK(&so->so_snd); 1866 VM_OBJECT_LOCK(obj); 1867 /* 1868 * Attempt to look up the page. 1869 * 1870 * Allocate if not found 1871 * 1872 * Wait and loop if busy. 1873 */ 1874 pg = vm_page_lookup(obj, pindex); 1875 1876 if (pg == NULL) { 1877 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY | 1878 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1879 if (pg == NULL) { 1880 VM_OBJECT_UNLOCK(obj); 1881 VM_WAIT; 1882 VM_OBJECT_LOCK(obj); 1883 goto retry_lookup; 1884 } 1885 vm_page_lock_queues(); 1886 } else { 1887 vm_page_lock_queues(); 1888 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1889 goto retry_lookup; 1890 /* 1891 * Wire the page so it does not get ripped out from 1892 * under us. 1893 */ 1894 vm_page_wire(pg); 1895 } 1896 1897 /* 1898 * If page is not valid for what we need, initiate I/O 1899 */ 1900 1901 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) { 1902 VM_OBJECT_UNLOCK(obj); 1903 } else if (uap->flags & SF_NODISKIO) { 1904 error = EBUSY; 1905 } else { 1906 int bsize, resid; 1907 1908 /* 1909 * Ensure that our page is still around when the I/O 1910 * completes. 1911 */ 1912 vm_page_io_start(pg); 1913 vm_page_unlock_queues(); 1914 VM_OBJECT_UNLOCK(obj); 1915 1916 /* 1917 * Get the page from backing store. 1918 */ 1919 bsize = vp->v_mount->mnt_stat.f_iosize; 1920 vn_lock(vp, LK_SHARED | LK_RETRY, td); 1921 /* 1922 * XXXMAC: Because we don't have fp->f_cred here, 1923 * we pass in NOCRED. This is probably wrong, but 1924 * is consistent with our original implementation. 1925 */ 1926 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1927 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1928 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 1929 td->td_ucred, NOCRED, &resid, td); 1930 VOP_UNLOCK(vp, 0, td); 1931 VM_OBJECT_LOCK(obj); 1932 vm_page_lock_queues(); 1933 vm_page_io_finish(pg); 1934 if (!error) 1935 VM_OBJECT_UNLOCK(obj); 1936 mbstat.sf_iocnt++; 1937 } 1938 1939 if (error) { 1940 vm_page_unwire(pg, 0); 1941 /* 1942 * See if anyone else might know about this page. 1943 * If not and it is not valid, then free it. 1944 */ 1945 if (pg->wire_count == 0 && pg->valid == 0 && 1946 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1947 pg->hold_count == 0) { 1948 vm_page_free(pg); 1949 } 1950 vm_page_unlock_queues(); 1951 VM_OBJECT_UNLOCK(obj); 1952 SOCKBUF_LOCK(&so->so_snd); 1953 sbunlock(&so->so_snd); 1954 SOCKBUF_UNLOCK(&so->so_snd); 1955 goto done; 1956 } 1957 vm_page_unlock_queues(); 1958 1959 /* 1960 * Get a sendfile buf. We usually wait as long as necessary, 1961 * but this wait can be interrupted. 1962 */ 1963 if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) { 1964 mbstat.sf_allocfail++; 1965 vm_page_lock_queues(); 1966 vm_page_unwire(pg, 0); 1967 if (pg->wire_count == 0 && pg->object == NULL) 1968 vm_page_free(pg); 1969 vm_page_unlock_queues(); 1970 SOCKBUF_LOCK(&so->so_snd); 1971 sbunlock(&so->so_snd); 1972 SOCKBUF_UNLOCK(&so->so_snd); 1973 error = EINTR; 1974 goto done; 1975 } 1976 1977 /* 1978 * Get an mbuf header and set it up as having external storage. 1979 */ 1980 if (m_header) 1981 MGET(m, M_TRYWAIT, MT_DATA); 1982 else 1983 MGETHDR(m, M_TRYWAIT, MT_DATA); 1984 if (m == NULL) { 1985 error = ENOBUFS; 1986 sf_buf_mext((void *)sf_buf_kva(sf), sf); 1987 SOCKBUF_LOCK(&so->so_snd); 1988 sbunlock(&so->so_snd); 1989 SOCKBUF_UNLOCK(&so->so_snd); 1990 goto done; 1991 } 1992 /* 1993 * Setup external storage for mbuf. 1994 */ 1995 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY, 1996 EXT_SFBUF); 1997 m->m_data = (char *)sf_buf_kva(sf) + pgoff; 1998 m->m_pkthdr.len = m->m_len = xfsize; 1999 2000 if (m_header) { 2001 m_cat(m_header, m); 2002 m = m_header; 2003 m_header = NULL; 2004 m_fixhdr(m); 2005 } 2006 2007 /* 2008 * Add the buffer to the socket buffer chain. 2009 */ 2010 SOCKBUF_LOCK(&so->so_snd); 2011 retry_space: 2012 /* 2013 * Make sure that the socket is still able to take more data. 2014 * CANTSENDMORE being true usually means that the connection 2015 * was closed. so_error is true when an error was sensed after 2016 * a previous send. 2017 * The state is checked after the page mapping and buffer 2018 * allocation above since those operations may block and make 2019 * any socket checks stale. From this point forward, nothing 2020 * blocks before the pru_send (or more accurately, any blocking 2021 * results in a loop back to here to re-check). 2022 */ 2023 SOCKBUF_LOCK_ASSERT(&so->so_snd); 2024 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) { 2025 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2026 error = EPIPE; 2027 } else { 2028 error = so->so_error; 2029 so->so_error = 0; 2030 } 2031 m_freem(m); 2032 sbunlock(&so->so_snd); 2033 SOCKBUF_UNLOCK(&so->so_snd); 2034 goto done; 2035 } 2036 /* 2037 * Wait for socket space to become available. We do this just 2038 * after checking the connection state above in order to avoid 2039 * a race condition with sbwait(). 2040 */ 2041 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 2042 if (so->so_state & SS_NBIO) { 2043 m_freem(m); 2044 sbunlock(&so->so_snd); 2045 SOCKBUF_UNLOCK(&so->so_snd); 2046 error = EAGAIN; 2047 goto done; 2048 } 2049 error = sbwait(&so->so_snd); 2050 /* 2051 * An error from sbwait usually indicates that we've 2052 * been interrupted by a signal. If we've sent anything 2053 * then return bytes sent, otherwise return the error. 2054 */ 2055 if (error) { 2056 m_freem(m); 2057 sbunlock(&so->so_snd); 2058 SOCKBUF_UNLOCK(&so->so_snd); 2059 goto done; 2060 } 2061 goto retry_space; 2062 } 2063 SOCKBUF_UNLOCK(&so->so_snd); 2064 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2065 if (error) { 2066 SOCKBUF_LOCK(&so->so_snd); 2067 sbunlock(&so->so_snd); 2068 SOCKBUF_UNLOCK(&so->so_snd); 2069 goto done; 2070 } 2071 headersent = 1; 2072 } 2073 SOCKBUF_LOCK(&so->so_snd); 2074 sbunlock(&so->so_snd); 2075 SOCKBUF_UNLOCK(&so->so_snd); 2076 2077 /* 2078 * Send trailers. Wimp out and use writev(2). 2079 */ 2080 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 2081 nuap.fd = uap->s; 2082 nuap.iovp = hdtr.trailers; 2083 nuap.iovcnt = hdtr.trl_cnt; 2084 error = writev(td, &nuap); 2085 if (error) 2086 goto done; 2087 if (compat) 2088 sbytes += td->td_retval[0]; 2089 else 2090 hdtr_size += td->td_retval[0]; 2091 } 2092 2093 done: 2094 if (headersent) { 2095 if (!compat) 2096 hdtr_size += headersize; 2097 } else { 2098 if (compat) 2099 sbytes -= headersize; 2100 } 2101 /* 2102 * If there was no error we have to clear td->td_retval[0] 2103 * because it may have been set by writev. 2104 */ 2105 if (error == 0) { 2106 td->td_retval[0] = 0; 2107 } 2108 if (uap->sbytes != NULL) { 2109 if (!compat) 2110 sbytes += hdtr_size; 2111 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2112 } 2113 if (vp) 2114 vrele(vp); 2115 if (so) 2116 fputsock(so); 2117 if (hdr_uio != NULL) 2118 free(hdr_uio, M_IOV); 2119 if (m_header) 2120 m_freem(m_header); 2121 2122 mtx_unlock(&Giant); 2123 2124 if (error == ERESTART) 2125 error = EINTR; 2126 2127 return (error); 2128 } 2129