1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_compat.h" 39 #include "opt_ktrace.h" 40 #include "opt_mac.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/mac.h> 47 #include <sys/mutex.h> 48 #include <sys/sysproto.h> 49 #include <sys/malloc.h> 50 #include <sys/filedesc.h> 51 #include <sys/event.h> 52 #include <sys/proc.h> 53 #include <sys/fcntl.h> 54 #include <sys/file.h> 55 #include <sys/filio.h> 56 #include <sys/mount.h> 57 #include <sys/mbuf.h> 58 #include <sys/protosw.h> 59 #include <sys/sf_buf.h> 60 #include <sys/socket.h> 61 #include <sys/socketvar.h> 62 #include <sys/signalvar.h> 63 #include <sys/syscallsubr.h> 64 #include <sys/sysctl.h> 65 #include <sys/uio.h> 66 #include <sys/vnode.h> 67 #ifdef KTRACE 68 #include <sys/ktrace.h> 69 #endif 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 #include <vm/vm_pageout.h> 75 #include <vm/vm_kern.h> 76 #include <vm/vm_extern.h> 77 78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 80 81 static int accept1(struct thread *td, struct accept_args *uap, int compat); 82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 83 static int getsockname1(struct thread *td, struct getsockname_args *uap, 84 int compat); 85 static int getpeername1(struct thread *td, struct getpeername_args *uap, 86 int compat); 87 88 /* 89 * NSFBUFS-related variables and associated sysctls 90 */ 91 int nsfbufs; 92 int nsfbufspeak; 93 int nsfbufsused; 94 95 SYSCTL_DECL(_kern_ipc); 96 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 97 "Maximum number of sendfile(2) sf_bufs available"); 98 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 99 "Number of sendfile(2) sf_bufs at peak usage"); 100 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 101 "Number of sendfile(2) sf_bufs in use"); 102 103 /* 104 * Convert a user file descriptor to a kernel file entry. A reference on the 105 * file entry is held upon returning. This is lighter weight than 106 * fgetsock(), which bumps the socket reference drops the file reference 107 * count instead, as this approach avoids several additional mutex operations 108 * associated with the additional reference count. 109 */ 110 static int 111 getsock(struct filedesc *fdp, int fd, struct file **fpp) 112 { 113 struct file *fp; 114 int error; 115 116 fp = NULL; 117 if (fdp == NULL) 118 error = EBADF; 119 else { 120 FILEDESC_LOCK_FAST(fdp); 121 fp = fget_locked(fdp, fd); 122 if (fp == NULL) 123 error = EBADF; 124 else if (fp->f_type != DTYPE_SOCKET) { 125 fp = NULL; 126 error = ENOTSOCK; 127 } else { 128 fhold(fp); 129 error = 0; 130 } 131 FILEDESC_UNLOCK_FAST(fdp); 132 } 133 *fpp = fp; 134 return (error); 135 } 136 137 /* 138 * System call interface to the socket abstraction. 139 */ 140 #if defined(COMPAT_43) 141 #define COMPAT_OLDSOCK 142 #endif 143 144 /* 145 * MPSAFE 146 */ 147 int 148 socket(td, uap) 149 struct thread *td; 150 register struct socket_args /* { 151 int domain; 152 int type; 153 int protocol; 154 } */ *uap; 155 { 156 struct filedesc *fdp; 157 struct socket *so; 158 struct file *fp; 159 int fd, error; 160 161 #ifdef MAC 162 error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type, 163 uap->protocol); 164 if (error) 165 return (error); 166 #endif 167 fdp = td->td_proc->p_fd; 168 error = falloc(td, &fp, &fd); 169 if (error) 170 return (error); 171 /* An extra reference on `fp' has been held for us by falloc(). */ 172 NET_LOCK_GIANT(); 173 error = socreate(uap->domain, &so, uap->type, uap->protocol, 174 td->td_ucred, td); 175 NET_UNLOCK_GIANT(); 176 if (error) { 177 fdclose(fdp, fp, fd, td); 178 } else { 179 FILEDESC_LOCK_FAST(fdp); 180 fp->f_data = so; /* already has ref count */ 181 fp->f_flag = FREAD|FWRITE; 182 fp->f_ops = &socketops; 183 fp->f_type = DTYPE_SOCKET; 184 FILEDESC_UNLOCK_FAST(fdp); 185 td->td_retval[0] = fd; 186 } 187 fdrop(fp, td); 188 return (error); 189 } 190 191 /* 192 * MPSAFE 193 */ 194 /* ARGSUSED */ 195 int 196 bind(td, uap) 197 struct thread *td; 198 register struct bind_args /* { 199 int s; 200 caddr_t name; 201 int namelen; 202 } */ *uap; 203 { 204 struct sockaddr *sa; 205 int error; 206 207 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 208 return (error); 209 210 return (kern_bind(td, uap->s, sa)); 211 } 212 213 int 214 kern_bind(td, fd, sa) 215 struct thread *td; 216 int fd; 217 struct sockaddr *sa; 218 { 219 struct socket *so; 220 struct file *fp; 221 int error; 222 223 NET_LOCK_GIANT(); 224 error = getsock(td->td_proc->p_fd, fd, &fp); 225 if (error) 226 goto done2; 227 so = fp->f_data; 228 #ifdef MAC 229 SOCK_LOCK(so); 230 error = mac_check_socket_bind(td->td_ucred, so, sa); 231 SOCK_UNLOCK(so); 232 if (error) 233 goto done1; 234 #endif 235 error = sobind(so, sa, td); 236 #ifdef MAC 237 done1: 238 #endif 239 fdrop(fp, td); 240 done2: 241 NET_UNLOCK_GIANT(); 242 FREE(sa, M_SONAME); 243 return (error); 244 } 245 246 /* 247 * MPSAFE 248 */ 249 /* ARGSUSED */ 250 int 251 listen(td, uap) 252 struct thread *td; 253 register struct listen_args /* { 254 int s; 255 int backlog; 256 } */ *uap; 257 { 258 struct socket *so; 259 struct file *fp; 260 int error; 261 262 NET_LOCK_GIANT(); 263 error = getsock(td->td_proc->p_fd, uap->s, &fp); 264 if (error == 0) { 265 so = fp->f_data; 266 #ifdef MAC 267 SOCK_LOCK(so); 268 error = mac_check_socket_listen(td->td_ucred, so); 269 SOCK_UNLOCK(so); 270 if (error) 271 goto done; 272 #endif 273 error = solisten(so, uap->backlog, td); 274 #ifdef MAC 275 done: 276 #endif 277 fdrop(fp, td); 278 } 279 NET_UNLOCK_GIANT(); 280 return(error); 281 } 282 283 /* 284 * accept1() 285 * MPSAFE 286 * 287 * XXXRW: Use getsock() instead of fgetsock() here to avoid additional mutex 288 * operations due to soref()/sorele(). 289 */ 290 static int 291 accept1(td, uap, compat) 292 struct thread *td; 293 register struct accept_args /* { 294 int s; 295 struct sockaddr * __restrict name; 296 socklen_t * __restrict anamelen; 297 } */ *uap; 298 int compat; 299 { 300 struct filedesc *fdp; 301 struct file *nfp = NULL; 302 struct sockaddr *sa = NULL; 303 socklen_t namelen; 304 int error; 305 struct socket *head, *so; 306 int fd; 307 u_int fflag; 308 pid_t pgid; 309 int tmp; 310 311 fdp = td->td_proc->p_fd; 312 if (uap->name) { 313 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 314 if(error) 315 return (error); 316 if (namelen < 0) 317 return (EINVAL); 318 } 319 NET_LOCK_GIANT(); 320 error = fgetsock(td, uap->s, &head, &fflag); 321 if (error) 322 goto done2; 323 if ((head->so_options & SO_ACCEPTCONN) == 0) { 324 error = EINVAL; 325 goto done; 326 } 327 #ifdef MAC 328 SOCK_LOCK(head); 329 error = mac_check_socket_accept(td->td_ucred, head); 330 SOCK_UNLOCK(head); 331 if (error != 0) 332 goto done; 333 #endif 334 error = falloc(td, &nfp, &fd); 335 if (error) 336 goto done; 337 ACCEPT_LOCK(); 338 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 339 ACCEPT_UNLOCK(); 340 error = EWOULDBLOCK; 341 goto noconnection; 342 } 343 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 344 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 345 head->so_error = ECONNABORTED; 346 break; 347 } 348 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 349 "accept", 0); 350 if (error) { 351 ACCEPT_UNLOCK(); 352 goto noconnection; 353 } 354 } 355 if (head->so_error) { 356 error = head->so_error; 357 head->so_error = 0; 358 ACCEPT_UNLOCK(); 359 goto noconnection; 360 } 361 so = TAILQ_FIRST(&head->so_comp); 362 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 363 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 364 365 /* 366 * Before changing the flags on the socket, we have to bump the 367 * reference count. Otherwise, if the protocol calls sofree(), 368 * the socket will be released due to a zero refcount. 369 */ 370 SOCK_LOCK(so); /* soref() and so_state update */ 371 soref(so); /* file descriptor reference */ 372 373 TAILQ_REMOVE(&head->so_comp, so, so_list); 374 head->so_qlen--; 375 so->so_state |= (head->so_state & SS_NBIO); 376 so->so_qstate &= ~SQ_COMP; 377 so->so_head = NULL; 378 379 SOCK_UNLOCK(so); 380 ACCEPT_UNLOCK(); 381 382 /* An extra reference on `nfp' has been held for us by falloc(). */ 383 td->td_retval[0] = fd; 384 385 /* connection has been removed from the listen queue */ 386 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 387 388 pgid = fgetown(&head->so_sigio); 389 if (pgid != 0) 390 fsetown(pgid, &so->so_sigio); 391 392 FILE_LOCK(nfp); 393 nfp->f_data = so; /* nfp has ref count from falloc */ 394 nfp->f_flag = fflag; 395 nfp->f_ops = &socketops; 396 nfp->f_type = DTYPE_SOCKET; 397 FILE_UNLOCK(nfp); 398 /* Sync socket nonblocking/async state with file flags */ 399 tmp = fflag & FNONBLOCK; 400 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 401 tmp = fflag & FASYNC; 402 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 403 sa = 0; 404 error = soaccept(so, &sa); 405 if (error) { 406 /* 407 * return a namelen of zero for older code which might 408 * ignore the return value from accept. 409 */ 410 if (uap->name != NULL) { 411 namelen = 0; 412 (void) copyout(&namelen, 413 uap->anamelen, sizeof(*uap->anamelen)); 414 } 415 goto noconnection; 416 } 417 if (sa == NULL) { 418 namelen = 0; 419 if (uap->name) 420 goto gotnoname; 421 error = 0; 422 goto done; 423 } 424 if (uap->name) { 425 /* check sa_len before it is destroyed */ 426 if (namelen > sa->sa_len) 427 namelen = sa->sa_len; 428 #ifdef COMPAT_OLDSOCK 429 if (compat) 430 ((struct osockaddr *)sa)->sa_family = 431 sa->sa_family; 432 #endif 433 error = copyout(sa, uap->name, (u_int)namelen); 434 if (!error) 435 gotnoname: 436 error = copyout(&namelen, 437 uap->anamelen, sizeof (*uap->anamelen)); 438 } 439 noconnection: 440 if (sa) 441 FREE(sa, M_SONAME); 442 443 /* 444 * close the new descriptor, assuming someone hasn't ripped it 445 * out from under us. 446 */ 447 if (error) 448 fdclose(fdp, nfp, fd, td); 449 450 /* 451 * Release explicitly held references before returning. 452 */ 453 done: 454 if (nfp != NULL) 455 fdrop(nfp, td); 456 fputsock(head); 457 done2: 458 NET_UNLOCK_GIANT(); 459 return (error); 460 } 461 462 /* 463 * MPSAFE (accept1() is MPSAFE) 464 */ 465 int 466 accept(td, uap) 467 struct thread *td; 468 struct accept_args *uap; 469 { 470 471 return (accept1(td, uap, 0)); 472 } 473 474 #ifdef COMPAT_OLDSOCK 475 /* 476 * MPSAFE (accept1() is MPSAFE) 477 */ 478 int 479 oaccept(td, uap) 480 struct thread *td; 481 struct accept_args *uap; 482 { 483 484 return (accept1(td, uap, 1)); 485 } 486 #endif /* COMPAT_OLDSOCK */ 487 488 /* 489 * MPSAFE 490 */ 491 /* ARGSUSED */ 492 int 493 connect(td, uap) 494 struct thread *td; 495 register struct connect_args /* { 496 int s; 497 caddr_t name; 498 int namelen; 499 } */ *uap; 500 { 501 struct sockaddr *sa; 502 int error; 503 504 error = getsockaddr(&sa, uap->name, uap->namelen); 505 if (error) 506 return (error); 507 508 return (kern_connect(td, uap->s, sa)); 509 } 510 511 512 int 513 kern_connect(td, fd, sa) 514 struct thread *td; 515 int fd; 516 struct sockaddr *sa; 517 { 518 struct socket *so; 519 struct file *fp; 520 int error; 521 int interrupted = 0; 522 523 NET_LOCK_GIANT(); 524 error = getsock(td->td_proc->p_fd, fd, &fp); 525 if (error) 526 goto done2; 527 so = fp->f_data; 528 if (so->so_state & SS_ISCONNECTING) { 529 error = EALREADY; 530 goto done1; 531 } 532 #ifdef MAC 533 SOCK_LOCK(so); 534 error = mac_check_socket_connect(td->td_ucred, so, sa); 535 SOCK_UNLOCK(so); 536 if (error) 537 goto bad; 538 #endif 539 error = soconnect(so, sa, td); 540 if (error) 541 goto bad; 542 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 543 error = EINPROGRESS; 544 goto done1; 545 } 546 SOCK_LOCK(so); 547 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 548 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 549 "connec", 0); 550 if (error) { 551 if (error == EINTR || error == ERESTART) 552 interrupted = 1; 553 break; 554 } 555 } 556 if (error == 0) { 557 error = so->so_error; 558 so->so_error = 0; 559 } 560 SOCK_UNLOCK(so); 561 bad: 562 if (!interrupted) 563 so->so_state &= ~SS_ISCONNECTING; 564 if (error == ERESTART) 565 error = EINTR; 566 done1: 567 fdrop(fp, td); 568 done2: 569 NET_UNLOCK_GIANT(); 570 FREE(sa, M_SONAME); 571 return (error); 572 } 573 574 /* 575 * MPSAFE 576 */ 577 int 578 socketpair(td, uap) 579 struct thread *td; 580 register struct socketpair_args /* { 581 int domain; 582 int type; 583 int protocol; 584 int *rsv; 585 } */ *uap; 586 { 587 register struct filedesc *fdp = td->td_proc->p_fd; 588 struct file *fp1, *fp2; 589 struct socket *so1, *so2; 590 int fd, error, sv[2]; 591 592 #ifdef MAC 593 /* We might want to have a separate check for socket pairs. */ 594 error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type, 595 uap->protocol); 596 if (error) 597 return (error); 598 #endif 599 600 NET_LOCK_GIANT(); 601 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 602 td->td_ucred, td); 603 if (error) 604 goto done2; 605 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 606 td->td_ucred, td); 607 if (error) 608 goto free1; 609 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 610 error = falloc(td, &fp1, &fd); 611 if (error) 612 goto free2; 613 sv[0] = fd; 614 fp1->f_data = so1; /* so1 already has ref count */ 615 error = falloc(td, &fp2, &fd); 616 if (error) 617 goto free3; 618 fp2->f_data = so2; /* so2 already has ref count */ 619 sv[1] = fd; 620 error = soconnect2(so1, so2); 621 if (error) 622 goto free4; 623 if (uap->type == SOCK_DGRAM) { 624 /* 625 * Datagram socket connection is asymmetric. 626 */ 627 error = soconnect2(so2, so1); 628 if (error) 629 goto free4; 630 } 631 FILE_LOCK(fp1); 632 fp1->f_flag = FREAD|FWRITE; 633 fp1->f_ops = &socketops; 634 fp1->f_type = DTYPE_SOCKET; 635 FILE_UNLOCK(fp1); 636 FILE_LOCK(fp2); 637 fp2->f_flag = FREAD|FWRITE; 638 fp2->f_ops = &socketops; 639 fp2->f_type = DTYPE_SOCKET; 640 FILE_UNLOCK(fp2); 641 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 642 fdrop(fp1, td); 643 fdrop(fp2, td); 644 goto done2; 645 free4: 646 fdclose(fdp, fp2, sv[1], td); 647 fdrop(fp2, td); 648 free3: 649 fdclose(fdp, fp1, sv[0], td); 650 fdrop(fp1, td); 651 free2: 652 (void)soclose(so2); 653 free1: 654 (void)soclose(so1); 655 done2: 656 NET_UNLOCK_GIANT(); 657 return (error); 658 } 659 660 static int 661 sendit(td, s, mp, flags) 662 register struct thread *td; 663 int s; 664 register struct msghdr *mp; 665 int flags; 666 { 667 struct mbuf *control; 668 struct sockaddr *to; 669 int error; 670 671 if (mp->msg_name != NULL) { 672 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 673 if (error) { 674 to = NULL; 675 goto bad; 676 } 677 mp->msg_name = to; 678 } else { 679 to = NULL; 680 } 681 682 if (mp->msg_control) { 683 if (mp->msg_controllen < sizeof(struct cmsghdr) 684 #ifdef COMPAT_OLDSOCK 685 && mp->msg_flags != MSG_COMPAT 686 #endif 687 ) { 688 error = EINVAL; 689 goto bad; 690 } 691 error = sockargs(&control, mp->msg_control, 692 mp->msg_controllen, MT_CONTROL); 693 if (error) 694 goto bad; 695 #ifdef COMPAT_OLDSOCK 696 if (mp->msg_flags == MSG_COMPAT) { 697 register struct cmsghdr *cm; 698 699 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 700 if (control == 0) { 701 error = ENOBUFS; 702 goto bad; 703 } else { 704 cm = mtod(control, struct cmsghdr *); 705 cm->cmsg_len = control->m_len; 706 cm->cmsg_level = SOL_SOCKET; 707 cm->cmsg_type = SCM_RIGHTS; 708 } 709 } 710 #endif 711 } else { 712 control = NULL; 713 } 714 715 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 716 717 bad: 718 if (to) 719 FREE(to, M_SONAME); 720 return (error); 721 } 722 723 int 724 kern_sendit(td, s, mp, flags, control, segflg) 725 struct thread *td; 726 int s; 727 struct msghdr *mp; 728 int flags; 729 struct mbuf *control; 730 enum uio_seg segflg; 731 { 732 struct file *fp; 733 struct uio auio; 734 struct iovec *iov; 735 struct socket *so; 736 int i; 737 int len, error; 738 #ifdef KTRACE 739 struct uio *ktruio = NULL; 740 #endif 741 742 NET_LOCK_GIANT(); 743 error = getsock(td->td_proc->p_fd, s, &fp); 744 if (error) 745 goto bad2; 746 so = (struct socket *)fp->f_data; 747 748 #ifdef MAC 749 SOCK_LOCK(so); 750 error = mac_check_socket_send(td->td_ucred, so); 751 SOCK_UNLOCK(so); 752 if (error) 753 goto bad; 754 #endif 755 756 auio.uio_iov = mp->msg_iov; 757 auio.uio_iovcnt = mp->msg_iovlen; 758 auio.uio_segflg = segflg; 759 auio.uio_rw = UIO_WRITE; 760 auio.uio_td = td; 761 auio.uio_offset = 0; /* XXX */ 762 auio.uio_resid = 0; 763 iov = mp->msg_iov; 764 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 765 if ((auio.uio_resid += iov->iov_len) < 0) { 766 error = EINVAL; 767 goto bad; 768 } 769 } 770 #ifdef KTRACE 771 if (KTRPOINT(td, KTR_GENIO)) 772 ktruio = cloneuio(&auio); 773 #endif 774 len = auio.uio_resid; 775 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 776 0, control, flags, td); 777 if (error) { 778 if (auio.uio_resid != len && (error == ERESTART || 779 error == EINTR || error == EWOULDBLOCK)) 780 error = 0; 781 /* Generation of SIGPIPE can be controlled per socket */ 782 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 783 !(flags & MSG_NOSIGNAL)) { 784 PROC_LOCK(td->td_proc); 785 psignal(td->td_proc, SIGPIPE); 786 PROC_UNLOCK(td->td_proc); 787 } 788 } 789 if (error == 0) 790 td->td_retval[0] = len - auio.uio_resid; 791 #ifdef KTRACE 792 if (ktruio != NULL) { 793 ktruio->uio_resid = td->td_retval[0]; 794 ktrgenio(s, UIO_WRITE, ktruio, error); 795 } 796 #endif 797 bad: 798 fdrop(fp, td); 799 bad2: 800 NET_UNLOCK_GIANT(); 801 return (error); 802 } 803 804 /* 805 * MPSAFE 806 */ 807 int 808 sendto(td, uap) 809 struct thread *td; 810 register struct sendto_args /* { 811 int s; 812 caddr_t buf; 813 size_t len; 814 int flags; 815 caddr_t to; 816 int tolen; 817 } */ *uap; 818 { 819 struct msghdr msg; 820 struct iovec aiov; 821 int error; 822 823 msg.msg_name = uap->to; 824 msg.msg_namelen = uap->tolen; 825 msg.msg_iov = &aiov; 826 msg.msg_iovlen = 1; 827 msg.msg_control = 0; 828 #ifdef COMPAT_OLDSOCK 829 msg.msg_flags = 0; 830 #endif 831 aiov.iov_base = uap->buf; 832 aiov.iov_len = uap->len; 833 error = sendit(td, uap->s, &msg, uap->flags); 834 return (error); 835 } 836 837 #ifdef COMPAT_OLDSOCK 838 /* 839 * MPSAFE 840 */ 841 int 842 osend(td, uap) 843 struct thread *td; 844 register struct osend_args /* { 845 int s; 846 caddr_t buf; 847 int len; 848 int flags; 849 } */ *uap; 850 { 851 struct msghdr msg; 852 struct iovec aiov; 853 int error; 854 855 msg.msg_name = 0; 856 msg.msg_namelen = 0; 857 msg.msg_iov = &aiov; 858 msg.msg_iovlen = 1; 859 aiov.iov_base = uap->buf; 860 aiov.iov_len = uap->len; 861 msg.msg_control = 0; 862 msg.msg_flags = 0; 863 error = sendit(td, uap->s, &msg, uap->flags); 864 return (error); 865 } 866 867 /* 868 * MPSAFE 869 */ 870 int 871 osendmsg(td, uap) 872 struct thread *td; 873 struct osendmsg_args /* { 874 int s; 875 caddr_t msg; 876 int flags; 877 } */ *uap; 878 { 879 struct msghdr msg; 880 struct iovec *iov; 881 int error; 882 883 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 884 if (error) 885 return (error); 886 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 887 if (error) 888 return (error); 889 msg.msg_iov = iov; 890 msg.msg_flags = MSG_COMPAT; 891 error = sendit(td, uap->s, &msg, uap->flags); 892 free(iov, M_IOV); 893 return (error); 894 } 895 #endif 896 897 /* 898 * MPSAFE 899 */ 900 int 901 sendmsg(td, uap) 902 struct thread *td; 903 struct sendmsg_args /* { 904 int s; 905 caddr_t msg; 906 int flags; 907 } */ *uap; 908 { 909 struct msghdr msg; 910 struct iovec *iov; 911 int error; 912 913 error = copyin(uap->msg, &msg, sizeof (msg)); 914 if (error) 915 return (error); 916 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 917 if (error) 918 return (error); 919 msg.msg_iov = iov; 920 #ifdef COMPAT_OLDSOCK 921 msg.msg_flags = 0; 922 #endif 923 error = sendit(td, uap->s, &msg, uap->flags); 924 free(iov, M_IOV); 925 return (error); 926 } 927 928 int 929 kern_recvit(td, s, mp, namelenp, segflg, controlp) 930 struct thread *td; 931 int s; 932 struct msghdr *mp; 933 void *namelenp; 934 enum uio_seg segflg; 935 struct mbuf **controlp; 936 { 937 struct uio auio; 938 struct iovec *iov; 939 int i; 940 socklen_t len; 941 int error; 942 struct mbuf *m, *control = 0; 943 caddr_t ctlbuf; 944 struct file *fp; 945 struct socket *so; 946 struct sockaddr *fromsa = 0; 947 #ifdef KTRACE 948 struct uio *ktruio = NULL; 949 #endif 950 951 if(controlp != NULL) 952 *controlp = 0; 953 954 NET_LOCK_GIANT(); 955 error = getsock(td->td_proc->p_fd, s, &fp); 956 if (error) { 957 NET_UNLOCK_GIANT(); 958 return (error); 959 } 960 so = fp->f_data; 961 962 #ifdef MAC 963 SOCK_LOCK(so); 964 error = mac_check_socket_receive(td->td_ucred, so); 965 SOCK_UNLOCK(so); 966 if (error) { 967 fdrop(fp, td); 968 NET_UNLOCK_GIANT(); 969 return (error); 970 } 971 #endif 972 973 auio.uio_iov = mp->msg_iov; 974 auio.uio_iovcnt = mp->msg_iovlen; 975 auio.uio_segflg = segflg; 976 auio.uio_rw = UIO_READ; 977 auio.uio_td = td; 978 auio.uio_offset = 0; /* XXX */ 979 auio.uio_resid = 0; 980 iov = mp->msg_iov; 981 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 982 if ((auio.uio_resid += iov->iov_len) < 0) { 983 fdrop(fp, td); 984 NET_UNLOCK_GIANT(); 985 return (EINVAL); 986 } 987 } 988 #ifdef KTRACE 989 if (KTRPOINT(td, KTR_GENIO)) 990 ktruio = cloneuio(&auio); 991 #endif 992 len = auio.uio_resid; 993 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 994 (struct mbuf **)0, 995 (mp->msg_control || controlp) ? &control : (struct mbuf **)0, 996 &mp->msg_flags); 997 if (error) { 998 if (auio.uio_resid != (int)len && (error == ERESTART || 999 error == EINTR || error == EWOULDBLOCK)) 1000 error = 0; 1001 } 1002 #ifdef KTRACE 1003 if (ktruio != NULL) { 1004 ktruio->uio_resid = (int)len - auio.uio_resid; 1005 ktrgenio(s, UIO_READ, ktruio, error); 1006 } 1007 #endif 1008 if (error) 1009 goto out; 1010 td->td_retval[0] = (int)len - auio.uio_resid; 1011 if (mp->msg_name) { 1012 len = mp->msg_namelen; 1013 if (len <= 0 || fromsa == 0) 1014 len = 0; 1015 else { 1016 /* save sa_len before it is destroyed by MSG_COMPAT */ 1017 len = MIN(len, fromsa->sa_len); 1018 #ifdef COMPAT_OLDSOCK 1019 if (mp->msg_flags & MSG_COMPAT) 1020 ((struct osockaddr *)fromsa)->sa_family = 1021 fromsa->sa_family; 1022 #endif 1023 error = copyout(fromsa, mp->msg_name, (unsigned)len); 1024 if (error) 1025 goto out; 1026 } 1027 mp->msg_namelen = len; 1028 if (namelenp && 1029 (error = copyout(&len, namelenp, sizeof (socklen_t)))) { 1030 #ifdef COMPAT_OLDSOCK 1031 if (mp->msg_flags & MSG_COMPAT) 1032 error = 0; /* old recvfrom didn't check */ 1033 else 1034 #endif 1035 goto out; 1036 } 1037 } 1038 if (mp->msg_control && controlp == NULL) { 1039 #ifdef COMPAT_OLDSOCK 1040 /* 1041 * We assume that old recvmsg calls won't receive access 1042 * rights and other control info, esp. as control info 1043 * is always optional and those options didn't exist in 4.3. 1044 * If we receive rights, trim the cmsghdr; anything else 1045 * is tossed. 1046 */ 1047 if (control && mp->msg_flags & MSG_COMPAT) { 1048 if (mtod(control, struct cmsghdr *)->cmsg_level != 1049 SOL_SOCKET || 1050 mtod(control, struct cmsghdr *)->cmsg_type != 1051 SCM_RIGHTS) { 1052 mp->msg_controllen = 0; 1053 goto out; 1054 } 1055 control->m_len -= sizeof (struct cmsghdr); 1056 control->m_data += sizeof (struct cmsghdr); 1057 } 1058 #endif 1059 len = mp->msg_controllen; 1060 m = control; 1061 mp->msg_controllen = 0; 1062 ctlbuf = mp->msg_control; 1063 1064 while (m && len > 0) { 1065 unsigned int tocopy; 1066 1067 if (len >= m->m_len) 1068 tocopy = m->m_len; 1069 else { 1070 mp->msg_flags |= MSG_CTRUNC; 1071 tocopy = len; 1072 } 1073 1074 if ((error = copyout(mtod(m, caddr_t), 1075 ctlbuf, tocopy)) != 0) 1076 goto out; 1077 1078 ctlbuf += tocopy; 1079 len -= tocopy; 1080 m = m->m_next; 1081 } 1082 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1083 } 1084 out: 1085 fdrop(fp, td); 1086 NET_UNLOCK_GIANT(); 1087 if (fromsa) 1088 FREE(fromsa, M_SONAME); 1089 1090 if (error == 0 && controlp != NULL) 1091 *controlp = control; 1092 else if (control) 1093 m_freem(control); 1094 1095 return (error); 1096 } 1097 1098 static int 1099 recvit(td, s, mp, namelenp) 1100 struct thread *td; 1101 int s; 1102 struct msghdr *mp; 1103 void *namelenp; 1104 { 1105 1106 return (kern_recvit(td, s, mp, namelenp, UIO_USERSPACE, NULL)); 1107 } 1108 1109 /* 1110 * MPSAFE 1111 */ 1112 int 1113 recvfrom(td, uap) 1114 struct thread *td; 1115 register struct recvfrom_args /* { 1116 int s; 1117 caddr_t buf; 1118 size_t len; 1119 int flags; 1120 struct sockaddr * __restrict from; 1121 socklen_t * __restrict fromlenaddr; 1122 } */ *uap; 1123 { 1124 struct msghdr msg; 1125 struct iovec aiov; 1126 int error; 1127 1128 if (uap->fromlenaddr) { 1129 error = copyin(uap->fromlenaddr, 1130 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1131 if (error) 1132 goto done2; 1133 } else { 1134 msg.msg_namelen = 0; 1135 } 1136 msg.msg_name = uap->from; 1137 msg.msg_iov = &aiov; 1138 msg.msg_iovlen = 1; 1139 aiov.iov_base = uap->buf; 1140 aiov.iov_len = uap->len; 1141 msg.msg_control = 0; 1142 msg.msg_flags = uap->flags; 1143 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1144 done2: 1145 return(error); 1146 } 1147 1148 #ifdef COMPAT_OLDSOCK 1149 /* 1150 * MPSAFE 1151 */ 1152 int 1153 orecvfrom(td, uap) 1154 struct thread *td; 1155 struct recvfrom_args *uap; 1156 { 1157 1158 uap->flags |= MSG_COMPAT; 1159 return (recvfrom(td, uap)); 1160 } 1161 #endif 1162 1163 1164 #ifdef COMPAT_OLDSOCK 1165 /* 1166 * MPSAFE 1167 */ 1168 int 1169 orecv(td, uap) 1170 struct thread *td; 1171 register struct orecv_args /* { 1172 int s; 1173 caddr_t buf; 1174 int len; 1175 int flags; 1176 } */ *uap; 1177 { 1178 struct msghdr msg; 1179 struct iovec aiov; 1180 int error; 1181 1182 msg.msg_name = 0; 1183 msg.msg_namelen = 0; 1184 msg.msg_iov = &aiov; 1185 msg.msg_iovlen = 1; 1186 aiov.iov_base = uap->buf; 1187 aiov.iov_len = uap->len; 1188 msg.msg_control = 0; 1189 msg.msg_flags = uap->flags; 1190 error = recvit(td, uap->s, &msg, NULL); 1191 return (error); 1192 } 1193 1194 /* 1195 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1196 * overlays the new one, missing only the flags, and with the (old) access 1197 * rights where the control fields are now. 1198 * 1199 * MPSAFE 1200 */ 1201 int 1202 orecvmsg(td, uap) 1203 struct thread *td; 1204 struct orecvmsg_args /* { 1205 int s; 1206 struct omsghdr *msg; 1207 int flags; 1208 } */ *uap; 1209 { 1210 struct msghdr msg; 1211 struct iovec *iov; 1212 int error; 1213 1214 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1215 if (error) 1216 return (error); 1217 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1218 if (error) 1219 return (error); 1220 msg.msg_flags = uap->flags | MSG_COMPAT; 1221 msg.msg_iov = iov; 1222 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1223 if (msg.msg_controllen && error == 0) 1224 error = copyout(&msg.msg_controllen, 1225 &uap->msg->msg_accrightslen, sizeof (int)); 1226 free(iov, M_IOV); 1227 return (error); 1228 } 1229 #endif 1230 1231 /* 1232 * MPSAFE 1233 */ 1234 int 1235 recvmsg(td, uap) 1236 struct thread *td; 1237 struct recvmsg_args /* { 1238 int s; 1239 struct msghdr *msg; 1240 int flags; 1241 } */ *uap; 1242 { 1243 struct msghdr msg; 1244 struct iovec *uiov, *iov; 1245 int error; 1246 1247 error = copyin(uap->msg, &msg, sizeof (msg)); 1248 if (error) 1249 return (error); 1250 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1251 if (error) 1252 return (error); 1253 msg.msg_flags = uap->flags; 1254 #ifdef COMPAT_OLDSOCK 1255 msg.msg_flags &= ~MSG_COMPAT; 1256 #endif 1257 uiov = msg.msg_iov; 1258 msg.msg_iov = iov; 1259 error = recvit(td, uap->s, &msg, NULL); 1260 if (error == 0) { 1261 msg.msg_iov = uiov; 1262 error = copyout(&msg, uap->msg, sizeof(msg)); 1263 } 1264 free(iov, M_IOV); 1265 return (error); 1266 } 1267 1268 /* 1269 * MPSAFE 1270 */ 1271 /* ARGSUSED */ 1272 int 1273 shutdown(td, uap) 1274 struct thread *td; 1275 register struct shutdown_args /* { 1276 int s; 1277 int how; 1278 } */ *uap; 1279 { 1280 struct socket *so; 1281 struct file *fp; 1282 int error; 1283 1284 NET_LOCK_GIANT(); 1285 error = getsock(td->td_proc->p_fd, uap->s, &fp); 1286 if (error == 0) { 1287 so = fp->f_data; 1288 error = soshutdown(so, uap->how); 1289 fdrop(fp, td); 1290 } 1291 NET_UNLOCK_GIANT(); 1292 return (error); 1293 } 1294 1295 /* 1296 * MPSAFE 1297 */ 1298 /* ARGSUSED */ 1299 int 1300 setsockopt(td, uap) 1301 struct thread *td; 1302 register struct setsockopt_args /* { 1303 int s; 1304 int level; 1305 int name; 1306 caddr_t val; 1307 int valsize; 1308 } */ *uap; 1309 { 1310 1311 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1312 uap->val, UIO_USERSPACE, uap->valsize)); 1313 } 1314 1315 int 1316 kern_setsockopt(td, s, level, name, val, valseg, valsize) 1317 struct thread *td; 1318 int s; 1319 int level; 1320 int name; 1321 void *val; 1322 enum uio_seg valseg; 1323 socklen_t valsize; 1324 { 1325 int error; 1326 struct socket *so; 1327 struct file *fp; 1328 struct sockopt sopt; 1329 1330 if (val == NULL && valsize != 0) 1331 return (EFAULT); 1332 if (valsize < 0) 1333 return (EINVAL); 1334 1335 sopt.sopt_dir = SOPT_SET; 1336 sopt.sopt_level = level; 1337 sopt.sopt_name = name; 1338 sopt.sopt_val = val; 1339 sopt.sopt_valsize = valsize; 1340 switch (valseg) { 1341 case UIO_USERSPACE: 1342 sopt.sopt_td = td; 1343 break; 1344 case UIO_SYSSPACE: 1345 sopt.sopt_td = NULL; 1346 break; 1347 default: 1348 panic("kern_setsockopt called with bad valseg"); 1349 } 1350 1351 NET_LOCK_GIANT(); 1352 error = getsock(td->td_proc->p_fd, s, &fp); 1353 if (error == 0) { 1354 so = fp->f_data; 1355 error = sosetopt(so, &sopt); 1356 fdrop(fp, td); 1357 } 1358 NET_UNLOCK_GIANT(); 1359 return(error); 1360 } 1361 1362 /* 1363 * MPSAFE 1364 */ 1365 /* ARGSUSED */ 1366 int 1367 getsockopt(td, uap) 1368 struct thread *td; 1369 register struct getsockopt_args /* { 1370 int s; 1371 int level; 1372 int name; 1373 void * __restrict val; 1374 socklen_t * __restrict avalsize; 1375 } */ *uap; 1376 { 1377 socklen_t valsize; 1378 int error; 1379 1380 if (uap->val) { 1381 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1382 if (error) 1383 return (error); 1384 } 1385 1386 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1387 uap->val, UIO_USERSPACE, &valsize); 1388 1389 if (error == 0) 1390 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1391 return (error); 1392 } 1393 1394 /* 1395 * Kernel version of getsockopt. 1396 * optval can be a userland or userspace. optlen is always a kernel pointer. 1397 */ 1398 int 1399 kern_getsockopt(td, s, level, name, val, valseg, valsize) 1400 struct thread *td; 1401 int s; 1402 int level; 1403 int name; 1404 void *val; 1405 enum uio_seg valseg; 1406 socklen_t *valsize; 1407 { 1408 int error; 1409 struct socket *so; 1410 struct file *fp; 1411 struct sockopt sopt; 1412 1413 if (val == NULL) 1414 *valsize = 0; 1415 if (*valsize < 0) 1416 return (EINVAL); 1417 1418 sopt.sopt_dir = SOPT_GET; 1419 sopt.sopt_level = level; 1420 sopt.sopt_name = name; 1421 sopt.sopt_val = val; 1422 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1423 switch (valseg) { 1424 case UIO_USERSPACE: 1425 sopt.sopt_td = td; 1426 break; 1427 case UIO_SYSSPACE: 1428 sopt.sopt_td = NULL; 1429 break; 1430 default: 1431 panic("kern_getsockopt called with bad valseg"); 1432 } 1433 1434 NET_LOCK_GIANT(); 1435 error = getsock(td->td_proc->p_fd, s, &fp); 1436 if (error == 0) { 1437 so = fp->f_data; 1438 error = sogetopt(so, &sopt); 1439 *valsize = sopt.sopt_valsize; 1440 fdrop(fp, td); 1441 } 1442 NET_UNLOCK_GIANT(); 1443 return (error); 1444 } 1445 1446 /* 1447 * getsockname1() - Get socket name. 1448 * 1449 * MPSAFE 1450 */ 1451 /* ARGSUSED */ 1452 static int 1453 getsockname1(td, uap, compat) 1454 struct thread *td; 1455 register struct getsockname_args /* { 1456 int fdes; 1457 struct sockaddr * __restrict asa; 1458 socklen_t * __restrict alen; 1459 } */ *uap; 1460 int compat; 1461 { 1462 struct socket *so; 1463 struct sockaddr *sa; 1464 struct file *fp; 1465 socklen_t len; 1466 int error; 1467 1468 NET_LOCK_GIANT(); 1469 error = getsock(td->td_proc->p_fd, uap->fdes, &fp); 1470 if (error) 1471 goto done2; 1472 so = fp->f_data; 1473 error = copyin(uap->alen, &len, sizeof (len)); 1474 if (error) 1475 goto done1; 1476 if (len < 0) { 1477 error = EINVAL; 1478 goto done1; 1479 } 1480 sa = 0; 1481 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1482 if (error) 1483 goto bad; 1484 if (sa == 0) { 1485 len = 0; 1486 goto gotnothing; 1487 } 1488 1489 len = MIN(len, sa->sa_len); 1490 #ifdef COMPAT_OLDSOCK 1491 if (compat) 1492 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1493 #endif 1494 error = copyout(sa, uap->asa, (u_int)len); 1495 if (error == 0) 1496 gotnothing: 1497 error = copyout(&len, uap->alen, sizeof (len)); 1498 bad: 1499 if (sa) 1500 FREE(sa, M_SONAME); 1501 done1: 1502 fdrop(fp, td); 1503 done2: 1504 NET_UNLOCK_GIANT(); 1505 return (error); 1506 } 1507 1508 /* 1509 * MPSAFE 1510 */ 1511 int 1512 getsockname(td, uap) 1513 struct thread *td; 1514 struct getsockname_args *uap; 1515 { 1516 1517 return (getsockname1(td, uap, 0)); 1518 } 1519 1520 #ifdef COMPAT_OLDSOCK 1521 /* 1522 * MPSAFE 1523 */ 1524 int 1525 ogetsockname(td, uap) 1526 struct thread *td; 1527 struct getsockname_args *uap; 1528 { 1529 1530 return (getsockname1(td, uap, 1)); 1531 } 1532 #endif /* COMPAT_OLDSOCK */ 1533 1534 /* 1535 * getpeername1() - Get name of peer for connected socket. 1536 * 1537 * MPSAFE 1538 */ 1539 /* ARGSUSED */ 1540 static int 1541 getpeername1(td, uap, compat) 1542 struct thread *td; 1543 register struct getpeername_args /* { 1544 int fdes; 1545 struct sockaddr * __restrict asa; 1546 socklen_t * __restrict alen; 1547 } */ *uap; 1548 int compat; 1549 { 1550 struct socket *so; 1551 struct sockaddr *sa; 1552 struct file *fp; 1553 socklen_t len; 1554 int error; 1555 1556 NET_LOCK_GIANT(); 1557 error = getsock(td->td_proc->p_fd, uap->fdes, &fp); 1558 if (error) 1559 goto done2; 1560 so = fp->f_data; 1561 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1562 error = ENOTCONN; 1563 goto done1; 1564 } 1565 error = copyin(uap->alen, &len, sizeof (len)); 1566 if (error) 1567 goto done1; 1568 if (len < 0) { 1569 error = EINVAL; 1570 goto done1; 1571 } 1572 sa = 0; 1573 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1574 if (error) 1575 goto bad; 1576 if (sa == 0) { 1577 len = 0; 1578 goto gotnothing; 1579 } 1580 len = MIN(len, sa->sa_len); 1581 #ifdef COMPAT_OLDSOCK 1582 if (compat) 1583 ((struct osockaddr *)sa)->sa_family = 1584 sa->sa_family; 1585 #endif 1586 error = copyout(sa, uap->asa, (u_int)len); 1587 if (error) 1588 goto bad; 1589 gotnothing: 1590 error = copyout(&len, uap->alen, sizeof (len)); 1591 bad: 1592 if (sa) 1593 FREE(sa, M_SONAME); 1594 done1: 1595 fdrop(fp, td); 1596 done2: 1597 NET_UNLOCK_GIANT(); 1598 return (error); 1599 } 1600 1601 /* 1602 * MPSAFE 1603 */ 1604 int 1605 getpeername(td, uap) 1606 struct thread *td; 1607 struct getpeername_args *uap; 1608 { 1609 1610 return (getpeername1(td, uap, 0)); 1611 } 1612 1613 #ifdef COMPAT_OLDSOCK 1614 /* 1615 * MPSAFE 1616 */ 1617 int 1618 ogetpeername(td, uap) 1619 struct thread *td; 1620 struct ogetpeername_args *uap; 1621 { 1622 1623 /* XXX uap should have type `getpeername_args *' to begin with. */ 1624 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1625 } 1626 #endif /* COMPAT_OLDSOCK */ 1627 1628 int 1629 sockargs(mp, buf, buflen, type) 1630 struct mbuf **mp; 1631 caddr_t buf; 1632 int buflen, type; 1633 { 1634 register struct sockaddr *sa; 1635 register struct mbuf *m; 1636 int error; 1637 1638 if ((u_int)buflen > MLEN) { 1639 #ifdef COMPAT_OLDSOCK 1640 if (type == MT_SONAME && (u_int)buflen <= 112) 1641 buflen = MLEN; /* unix domain compat. hack */ 1642 else 1643 #endif 1644 if ((u_int)buflen > MCLBYTES) 1645 return (EINVAL); 1646 } 1647 m = m_get(M_TRYWAIT, type); 1648 if (m == NULL) 1649 return (ENOBUFS); 1650 if ((u_int)buflen > MLEN) { 1651 MCLGET(m, M_TRYWAIT); 1652 if ((m->m_flags & M_EXT) == 0) { 1653 m_free(m); 1654 return (ENOBUFS); 1655 } 1656 } 1657 m->m_len = buflen; 1658 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1659 if (error) 1660 (void) m_free(m); 1661 else { 1662 *mp = m; 1663 if (type == MT_SONAME) { 1664 sa = mtod(m, struct sockaddr *); 1665 1666 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1667 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1668 sa->sa_family = sa->sa_len; 1669 #endif 1670 sa->sa_len = buflen; 1671 } 1672 } 1673 return (error); 1674 } 1675 1676 int 1677 getsockaddr(namp, uaddr, len) 1678 struct sockaddr **namp; 1679 caddr_t uaddr; 1680 size_t len; 1681 { 1682 struct sockaddr *sa; 1683 int error; 1684 1685 if (len > SOCK_MAXADDRLEN) 1686 return (ENAMETOOLONG); 1687 if (len < offsetof(struct sockaddr, sa_data[0])) 1688 return (EINVAL); 1689 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1690 error = copyin(uaddr, sa, len); 1691 if (error) { 1692 FREE(sa, M_SONAME); 1693 } else { 1694 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1695 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1696 sa->sa_family = sa->sa_len; 1697 #endif 1698 sa->sa_len = len; 1699 *namp = sa; 1700 } 1701 return (error); 1702 } 1703 1704 /* 1705 * Detach mapped page and release resources back to the system. 1706 */ 1707 void 1708 sf_buf_mext(void *addr, void *args) 1709 { 1710 vm_page_t m; 1711 1712 m = sf_buf_page(args); 1713 sf_buf_free(args); 1714 vm_page_lock_queues(); 1715 vm_page_unwire(m, 0); 1716 /* 1717 * Check for the object going away on us. This can 1718 * happen since we don't hold a reference to it. 1719 * If so, we're responsible for freeing the page. 1720 */ 1721 if (m->wire_count == 0 && m->object == NULL) 1722 vm_page_free(m); 1723 vm_page_unlock_queues(); 1724 } 1725 1726 /* 1727 * sendfile(2) 1728 * 1729 * MPSAFE 1730 * 1731 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1732 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1733 * 1734 * Send a file specified by 'fd' and starting at 'offset' to a socket 1735 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1736 * nbytes == 0. Optionally add a header and/or trailer to the socket 1737 * output. If specified, write the total number of bytes sent into *sbytes. 1738 * 1739 */ 1740 int 1741 sendfile(struct thread *td, struct sendfile_args *uap) 1742 { 1743 1744 return (do_sendfile(td, uap, 0)); 1745 } 1746 1747 static int 1748 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1749 { 1750 struct sf_hdtr hdtr; 1751 struct uio *hdr_uio, *trl_uio; 1752 int error; 1753 1754 hdr_uio = trl_uio = NULL; 1755 1756 if (uap->hdtr != NULL) { 1757 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1758 if (error) 1759 goto out; 1760 if (hdtr.headers != NULL) { 1761 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1762 if (error) 1763 goto out; 1764 } 1765 if (hdtr.trailers != NULL) { 1766 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); 1767 if (error) 1768 goto out; 1769 1770 } 1771 } 1772 1773 error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat); 1774 out: 1775 if (hdr_uio) 1776 free(hdr_uio, M_IOV); 1777 if (trl_uio) 1778 free(trl_uio, M_IOV); 1779 return (error); 1780 } 1781 1782 #ifdef COMPAT_FREEBSD4 1783 int 1784 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1785 { 1786 struct sendfile_args args; 1787 1788 args.fd = uap->fd; 1789 args.s = uap->s; 1790 args.offset = uap->offset; 1791 args.nbytes = uap->nbytes; 1792 args.hdtr = uap->hdtr; 1793 args.sbytes = uap->sbytes; 1794 args.flags = uap->flags; 1795 1796 return (do_sendfile(td, &args, 1)); 1797 } 1798 #endif /* COMPAT_FREEBSD4 */ 1799 1800 int 1801 kern_sendfile(struct thread *td, struct sendfile_args *uap, 1802 struct uio *hdr_uio, struct uio *trl_uio, int compat) 1803 { 1804 struct vnode *vp; 1805 struct vm_object *obj = NULL; 1806 struct socket *so = NULL; 1807 struct mbuf *m, *m_header = NULL; 1808 struct sf_buf *sf; 1809 struct vm_page *pg; 1810 off_t off, xfsize, hdtr_size, sbytes = 0; 1811 int error, headersize = 0, headersent = 0; 1812 int vfslocked; 1813 1814 NET_LOCK_GIANT(); 1815 1816 hdtr_size = 0; 1817 1818 /* 1819 * The descriptor must be a regular file and have a backing VM object. 1820 */ 1821 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1822 goto done; 1823 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1824 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1825 obj = vp->v_object; 1826 if (obj != NULL) { 1827 /* 1828 * Temporarily increase the backing VM object's reference 1829 * count so that a forced reclamation of its vnode does not 1830 * immediately destroy it. 1831 */ 1832 VM_OBJECT_LOCK(obj); 1833 if ((obj->flags & OBJ_DEAD) == 0) { 1834 vm_object_reference_locked(obj); 1835 VM_OBJECT_UNLOCK(obj); 1836 } else { 1837 VM_OBJECT_UNLOCK(obj); 1838 obj = NULL; 1839 } 1840 } 1841 VOP_UNLOCK(vp, 0, td); 1842 VFS_UNLOCK_GIANT(vfslocked); 1843 if (obj == NULL) { 1844 error = EINVAL; 1845 goto done; 1846 } 1847 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1848 goto done; 1849 if (so->so_type != SOCK_STREAM) { 1850 error = EINVAL; 1851 goto done; 1852 } 1853 if ((so->so_state & SS_ISCONNECTED) == 0) { 1854 error = ENOTCONN; 1855 goto done; 1856 } 1857 if (uap->offset < 0) { 1858 error = EINVAL; 1859 goto done; 1860 } 1861 1862 #ifdef MAC 1863 SOCK_LOCK(so); 1864 error = mac_check_socket_send(td->td_ucred, so); 1865 SOCK_UNLOCK(so); 1866 if (error) 1867 goto done; 1868 #endif 1869 1870 /* 1871 * If specified, get the pointer to the sf_hdtr struct for 1872 * any headers/trailers. 1873 */ 1874 if (hdr_uio != NULL) { 1875 hdr_uio->uio_td = td; 1876 hdr_uio->uio_rw = UIO_WRITE; 1877 if (hdr_uio->uio_resid > 0) { 1878 m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0, 0); 1879 if (m_header == NULL) 1880 goto done; 1881 headersize = m_header->m_pkthdr.len; 1882 if (compat) 1883 sbytes += headersize; 1884 } 1885 } 1886 1887 /* 1888 * Protect against multiple writers to the socket. 1889 */ 1890 SOCKBUF_LOCK(&so->so_snd); 1891 (void) sblock(&so->so_snd, M_WAITOK); 1892 SOCKBUF_UNLOCK(&so->so_snd); 1893 1894 /* 1895 * Loop through the pages in the file, starting with the requested 1896 * offset. Get a file page (do I/O if necessary), map the file page 1897 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1898 * it on the socket. 1899 */ 1900 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1901 vm_pindex_t pindex; 1902 vm_offset_t pgoff; 1903 1904 pindex = OFF_TO_IDX(off); 1905 VM_OBJECT_LOCK(obj); 1906 retry_lookup: 1907 /* 1908 * Calculate the amount to transfer. Not to exceed a page, 1909 * the EOF, or the passed in nbytes. 1910 */ 1911 xfsize = obj->un_pager.vnp.vnp_size - off; 1912 VM_OBJECT_UNLOCK(obj); 1913 if (xfsize > PAGE_SIZE) 1914 xfsize = PAGE_SIZE; 1915 pgoff = (vm_offset_t)(off & PAGE_MASK); 1916 if (PAGE_SIZE - pgoff < xfsize) 1917 xfsize = PAGE_SIZE - pgoff; 1918 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1919 xfsize = uap->nbytes - sbytes; 1920 if (xfsize <= 0) { 1921 if (m_header != NULL) { 1922 m = m_header; 1923 m_header = NULL; 1924 SOCKBUF_LOCK(&so->so_snd); 1925 goto retry_space; 1926 } else 1927 break; 1928 } 1929 /* 1930 * Optimize the non-blocking case by looking at the socket space 1931 * before going to the extra work of constituting the sf_buf. 1932 */ 1933 SOCKBUF_LOCK(&so->so_snd); 1934 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1935 if (so->so_snd.sb_state & SBS_CANTSENDMORE) 1936 error = EPIPE; 1937 else 1938 error = EAGAIN; 1939 sbunlock(&so->so_snd); 1940 SOCKBUF_UNLOCK(&so->so_snd); 1941 goto done; 1942 } 1943 SOCKBUF_UNLOCK(&so->so_snd); 1944 VM_OBJECT_LOCK(obj); 1945 /* 1946 * Attempt to look up the page. 1947 * 1948 * Allocate if not found 1949 * 1950 * Wait and loop if busy. 1951 */ 1952 pg = vm_page_lookup(obj, pindex); 1953 1954 if (pg == NULL) { 1955 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY | 1956 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1957 if (pg == NULL) { 1958 VM_OBJECT_UNLOCK(obj); 1959 VM_WAIT; 1960 VM_OBJECT_LOCK(obj); 1961 goto retry_lookup; 1962 } 1963 vm_page_lock_queues(); 1964 } else { 1965 vm_page_lock_queues(); 1966 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1967 goto retry_lookup; 1968 /* 1969 * Wire the page so it does not get ripped out from 1970 * under us. 1971 */ 1972 vm_page_wire(pg); 1973 } 1974 1975 /* 1976 * If page is not valid for what we need, initiate I/O 1977 */ 1978 1979 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) { 1980 VM_OBJECT_UNLOCK(obj); 1981 } else if (uap->flags & SF_NODISKIO) { 1982 error = EBUSY; 1983 } else { 1984 int bsize, resid; 1985 1986 /* 1987 * Ensure that our page is still around when the I/O 1988 * completes. 1989 */ 1990 vm_page_io_start(pg); 1991 vm_page_unlock_queues(); 1992 VM_OBJECT_UNLOCK(obj); 1993 1994 /* 1995 * Get the page from backing store. 1996 */ 1997 bsize = vp->v_mount->mnt_stat.f_iosize; 1998 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1999 vn_lock(vp, LK_SHARED | LK_RETRY, td); 2000 /* 2001 * XXXMAC: Because we don't have fp->f_cred here, 2002 * we pass in NOCRED. This is probably wrong, but 2003 * is consistent with our original implementation. 2004 */ 2005 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 2006 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 2007 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 2008 td->td_ucred, NOCRED, &resid, td); 2009 VOP_UNLOCK(vp, 0, td); 2010 VFS_UNLOCK_GIANT(vfslocked); 2011 VM_OBJECT_LOCK(obj); 2012 vm_page_lock_queues(); 2013 vm_page_io_finish(pg); 2014 if (!error) 2015 VM_OBJECT_UNLOCK(obj); 2016 mbstat.sf_iocnt++; 2017 } 2018 2019 if (error) { 2020 vm_page_unwire(pg, 0); 2021 /* 2022 * See if anyone else might know about this page. 2023 * If not and it is not valid, then free it. 2024 */ 2025 if (pg->wire_count == 0 && pg->valid == 0 && 2026 pg->busy == 0 && !(pg->flags & PG_BUSY) && 2027 pg->hold_count == 0) { 2028 vm_page_free(pg); 2029 } 2030 vm_page_unlock_queues(); 2031 VM_OBJECT_UNLOCK(obj); 2032 SOCKBUF_LOCK(&so->so_snd); 2033 sbunlock(&so->so_snd); 2034 SOCKBUF_UNLOCK(&so->so_snd); 2035 goto done; 2036 } 2037 vm_page_unlock_queues(); 2038 2039 /* 2040 * Get a sendfile buf. We usually wait as long as necessary, 2041 * but this wait can be interrupted. 2042 */ 2043 if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) { 2044 mbstat.sf_allocfail++; 2045 vm_page_lock_queues(); 2046 vm_page_unwire(pg, 0); 2047 if (pg->wire_count == 0 && pg->object == NULL) 2048 vm_page_free(pg); 2049 vm_page_unlock_queues(); 2050 SOCKBUF_LOCK(&so->so_snd); 2051 sbunlock(&so->so_snd); 2052 SOCKBUF_UNLOCK(&so->so_snd); 2053 error = EINTR; 2054 goto done; 2055 } 2056 2057 /* 2058 * Get an mbuf header and set it up as having external storage. 2059 */ 2060 if (m_header) 2061 MGET(m, M_TRYWAIT, MT_DATA); 2062 else 2063 MGETHDR(m, M_TRYWAIT, MT_DATA); 2064 if (m == NULL) { 2065 error = ENOBUFS; 2066 sf_buf_mext((void *)sf_buf_kva(sf), sf); 2067 SOCKBUF_LOCK(&so->so_snd); 2068 sbunlock(&so->so_snd); 2069 SOCKBUF_UNLOCK(&so->so_snd); 2070 goto done; 2071 } 2072 /* 2073 * Setup external storage for mbuf. 2074 */ 2075 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY, 2076 EXT_SFBUF); 2077 m->m_data = (char *)sf_buf_kva(sf) + pgoff; 2078 m->m_pkthdr.len = m->m_len = xfsize; 2079 2080 if (m_header) { 2081 m_cat(m_header, m); 2082 m = m_header; 2083 m_header = NULL; 2084 m_fixhdr(m); 2085 } 2086 2087 /* 2088 * Add the buffer to the socket buffer chain. 2089 */ 2090 SOCKBUF_LOCK(&so->so_snd); 2091 retry_space: 2092 /* 2093 * Make sure that the socket is still able to take more data. 2094 * CANTSENDMORE being true usually means that the connection 2095 * was closed. so_error is true when an error was sensed after 2096 * a previous send. 2097 * The state is checked after the page mapping and buffer 2098 * allocation above since those operations may block and make 2099 * any socket checks stale. From this point forward, nothing 2100 * blocks before the pru_send (or more accurately, any blocking 2101 * results in a loop back to here to re-check). 2102 */ 2103 SOCKBUF_LOCK_ASSERT(&so->so_snd); 2104 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) { 2105 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2106 error = EPIPE; 2107 } else { 2108 error = so->so_error; 2109 so->so_error = 0; 2110 } 2111 m_freem(m); 2112 sbunlock(&so->so_snd); 2113 SOCKBUF_UNLOCK(&so->so_snd); 2114 goto done; 2115 } 2116 /* 2117 * Wait for socket space to become available. We do this just 2118 * after checking the connection state above in order to avoid 2119 * a race condition with sbwait(). 2120 */ 2121 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 2122 if (so->so_state & SS_NBIO) { 2123 m_freem(m); 2124 sbunlock(&so->so_snd); 2125 SOCKBUF_UNLOCK(&so->so_snd); 2126 error = EAGAIN; 2127 goto done; 2128 } 2129 error = sbwait(&so->so_snd); 2130 /* 2131 * An error from sbwait usually indicates that we've 2132 * been interrupted by a signal. If we've sent anything 2133 * then return bytes sent, otherwise return the error. 2134 */ 2135 if (error) { 2136 m_freem(m); 2137 sbunlock(&so->so_snd); 2138 SOCKBUF_UNLOCK(&so->so_snd); 2139 goto done; 2140 } 2141 goto retry_space; 2142 } 2143 SOCKBUF_UNLOCK(&so->so_snd); 2144 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2145 if (error) { 2146 SOCKBUF_LOCK(&so->so_snd); 2147 sbunlock(&so->so_snd); 2148 SOCKBUF_UNLOCK(&so->so_snd); 2149 goto done; 2150 } 2151 headersent = 1; 2152 } 2153 SOCKBUF_LOCK(&so->so_snd); 2154 sbunlock(&so->so_snd); 2155 SOCKBUF_UNLOCK(&so->so_snd); 2156 2157 /* 2158 * Send trailers. Wimp out and use writev(2). 2159 */ 2160 if (trl_uio != NULL) { 2161 error = kern_writev(td, uap->s, trl_uio); 2162 if (error) 2163 goto done; 2164 if (compat) 2165 sbytes += td->td_retval[0]; 2166 else 2167 hdtr_size += td->td_retval[0]; 2168 } 2169 2170 done: 2171 if (headersent) { 2172 if (!compat) 2173 hdtr_size += headersize; 2174 } else { 2175 if (compat) 2176 sbytes -= headersize; 2177 } 2178 /* 2179 * If there was no error we have to clear td->td_retval[0] 2180 * because it may have been set by writev. 2181 */ 2182 if (error == 0) { 2183 td->td_retval[0] = 0; 2184 } 2185 if (uap->sbytes != NULL) { 2186 if (!compat) 2187 sbytes += hdtr_size; 2188 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2189 } 2190 if (obj != NULL) 2191 vm_object_deallocate(obj); 2192 if (vp != NULL) { 2193 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2194 vrele(vp); 2195 VFS_UNLOCK_GIANT(vfslocked); 2196 } 2197 if (so) 2198 fputsock(so); 2199 if (m_header) 2200 m_freem(m_header); 2201 2202 NET_UNLOCK_GIANT(); 2203 2204 if (error == ERESTART) 2205 error = EINTR; 2206 2207 return (error); 2208 } 2209