1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_compat.h" 39 #include "opt_ktrace.h" 40 #include "opt_mac.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/mac.h> 47 #include <sys/mutex.h> 48 #include <sys/sysproto.h> 49 #include <sys/malloc.h> 50 #include <sys/filedesc.h> 51 #include <sys/event.h> 52 #include <sys/proc.h> 53 #include <sys/fcntl.h> 54 #include <sys/file.h> 55 #include <sys/filio.h> 56 #include <sys/mount.h> 57 #include <sys/mbuf.h> 58 #include <sys/protosw.h> 59 #include <sys/sf_buf.h> 60 #include <sys/socket.h> 61 #include <sys/socketvar.h> 62 #include <sys/signalvar.h> 63 #include <sys/syscallsubr.h> 64 #include <sys/sysctl.h> 65 #include <sys/uio.h> 66 #include <sys/vnode.h> 67 #ifdef KTRACE 68 #include <sys/ktrace.h> 69 #endif 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 #include <vm/vm_pageout.h> 75 #include <vm/vm_kern.h> 76 #include <vm/vm_extern.h> 77 78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 80 81 static int accept1(struct thread *td, struct accept_args *uap, int compat); 82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 83 static int getsockname1(struct thread *td, struct getsockname_args *uap, 84 int compat); 85 static int getpeername1(struct thread *td, struct getpeername_args *uap, 86 int compat); 87 88 /* 89 * NSFBUFS-related variables and associated sysctls 90 */ 91 int nsfbufs; 92 int nsfbufspeak; 93 int nsfbufsused; 94 95 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 96 "Maximum number of sendfile(2) sf_bufs available"); 97 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 98 "Number of sendfile(2) sf_bufs at peak usage"); 99 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 100 "Number of sendfile(2) sf_bufs in use"); 101 102 /* 103 * Convert a user file descriptor to a kernel file entry. A reference on the 104 * file entry is held upon returning. This is lighter weight than 105 * fgetsock(), which bumps the socket reference drops the file reference 106 * count instead, as this approach avoids several additional mutex operations 107 * associated with the additional reference count. If requested, return the 108 * open file flags. 109 */ 110 static int 111 getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp) 112 { 113 struct file *fp; 114 int error; 115 116 fp = NULL; 117 if (fdp == NULL) 118 error = EBADF; 119 else { 120 FILEDESC_LOCK_FAST(fdp); 121 fp = fget_locked(fdp, fd); 122 if (fp == NULL) 123 error = EBADF; 124 else if (fp->f_type != DTYPE_SOCKET) { 125 fp = NULL; 126 error = ENOTSOCK; 127 } else { 128 fhold(fp); 129 if (fflagp != NULL) 130 *fflagp = fp->f_flag; 131 error = 0; 132 } 133 FILEDESC_UNLOCK_FAST(fdp); 134 } 135 *fpp = fp; 136 return (error); 137 } 138 139 /* 140 * System call interface to the socket abstraction. 141 */ 142 #if defined(COMPAT_43) 143 #define COMPAT_OLDSOCK 144 #endif 145 146 /* 147 * MPSAFE 148 */ 149 int 150 socket(td, uap) 151 struct thread *td; 152 register struct socket_args /* { 153 int domain; 154 int type; 155 int protocol; 156 } */ *uap; 157 { 158 struct filedesc *fdp; 159 struct socket *so; 160 struct file *fp; 161 int fd, error; 162 163 #ifdef MAC 164 error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type, 165 uap->protocol); 166 if (error) 167 return (error); 168 #endif 169 fdp = td->td_proc->p_fd; 170 error = falloc(td, &fp, &fd); 171 if (error) 172 return (error); 173 /* An extra reference on `fp' has been held for us by falloc(). */ 174 NET_LOCK_GIANT(); 175 error = socreate(uap->domain, &so, uap->type, uap->protocol, 176 td->td_ucred, td); 177 NET_UNLOCK_GIANT(); 178 if (error) { 179 fdclose(fdp, fp, fd, td); 180 } else { 181 FILEDESC_LOCK_FAST(fdp); 182 fp->f_data = so; /* already has ref count */ 183 fp->f_flag = FREAD|FWRITE; 184 fp->f_ops = &socketops; 185 fp->f_type = DTYPE_SOCKET; 186 FILEDESC_UNLOCK_FAST(fdp); 187 td->td_retval[0] = fd; 188 } 189 fdrop(fp, td); 190 return (error); 191 } 192 193 /* 194 * MPSAFE 195 */ 196 /* ARGSUSED */ 197 int 198 bind(td, uap) 199 struct thread *td; 200 register struct bind_args /* { 201 int s; 202 caddr_t name; 203 int namelen; 204 } */ *uap; 205 { 206 struct sockaddr *sa; 207 int error; 208 209 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 210 return (error); 211 212 return (kern_bind(td, uap->s, sa)); 213 } 214 215 int 216 kern_bind(td, fd, sa) 217 struct thread *td; 218 int fd; 219 struct sockaddr *sa; 220 { 221 struct socket *so; 222 struct file *fp; 223 int error; 224 225 NET_LOCK_GIANT(); 226 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 227 if (error) 228 goto done2; 229 so = fp->f_data; 230 #ifdef MAC 231 SOCK_LOCK(so); 232 error = mac_check_socket_bind(td->td_ucred, so, sa); 233 SOCK_UNLOCK(so); 234 if (error) 235 goto done1; 236 #endif 237 error = sobind(so, sa, td); 238 #ifdef MAC 239 done1: 240 #endif 241 fdrop(fp, td); 242 done2: 243 NET_UNLOCK_GIANT(); 244 FREE(sa, M_SONAME); 245 return (error); 246 } 247 248 /* 249 * MPSAFE 250 */ 251 /* ARGSUSED */ 252 int 253 listen(td, uap) 254 struct thread *td; 255 register struct listen_args /* { 256 int s; 257 int backlog; 258 } */ *uap; 259 { 260 struct socket *so; 261 struct file *fp; 262 int error; 263 264 NET_LOCK_GIANT(); 265 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); 266 if (error == 0) { 267 so = fp->f_data; 268 #ifdef MAC 269 SOCK_LOCK(so); 270 error = mac_check_socket_listen(td->td_ucred, so); 271 SOCK_UNLOCK(so); 272 if (error) 273 goto done; 274 #endif 275 error = solisten(so, uap->backlog, td); 276 #ifdef MAC 277 done: 278 #endif 279 fdrop(fp, td); 280 } 281 NET_UNLOCK_GIANT(); 282 return(error); 283 } 284 285 /* 286 * accept1() 287 * MPSAFE 288 */ 289 static int 290 accept1(td, uap, compat) 291 struct thread *td; 292 register struct accept_args /* { 293 int s; 294 struct sockaddr * __restrict name; 295 socklen_t * __restrict anamelen; 296 } */ *uap; 297 int compat; 298 { 299 struct sockaddr *name; 300 socklen_t namelen; 301 int error; 302 303 if (uap->name == NULL) 304 return (kern_accept(td, uap->s, NULL, NULL)); 305 306 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 307 if (error) 308 return (error); 309 310 error = kern_accept(td, uap->s, &name, &namelen); 311 312 /* 313 * return a namelen of zero for older code which might 314 * ignore the return value from accept. 315 */ 316 if (error) { 317 (void) copyout(&namelen, 318 uap->anamelen, sizeof(*uap->anamelen)); 319 return (error); 320 } 321 322 if (error == 0 && name != NULL) { 323 #ifdef COMPAT_OLDSOCK 324 if (compat) 325 ((struct osockaddr *)name)->sa_family = 326 name->sa_family; 327 #endif 328 error = copyout(name, uap->name, namelen); 329 } 330 if (error == 0) 331 error = copyout(&namelen, uap->anamelen, 332 sizeof(namelen)); 333 if (error) 334 kern_close(td, td->td_retval[0]); 335 free(name, M_SONAME); 336 return (error); 337 } 338 339 int 340 kern_accept(struct thread *td, int s, struct sockaddr **name, 341 socklen_t *namelen) 342 { 343 struct filedesc *fdp; 344 struct file *headfp, *nfp = NULL; 345 struct sockaddr *sa = NULL; 346 int error; 347 struct socket *head, *so; 348 int fd; 349 u_int fflag; 350 pid_t pgid; 351 int tmp; 352 353 if (name) { 354 *name = NULL; 355 if (*namelen < 0) 356 return (EINVAL); 357 } 358 359 fdp = td->td_proc->p_fd; 360 NET_LOCK_GIANT(); 361 error = getsock(fdp, s, &headfp, &fflag); 362 if (error) 363 goto done2; 364 head = headfp->f_data; 365 if ((head->so_options & SO_ACCEPTCONN) == 0) { 366 error = EINVAL; 367 goto done; 368 } 369 #ifdef MAC 370 SOCK_LOCK(head); 371 error = mac_check_socket_accept(td->td_ucred, head); 372 SOCK_UNLOCK(head); 373 if (error != 0) 374 goto done; 375 #endif 376 error = falloc(td, &nfp, &fd); 377 if (error) 378 goto done; 379 ACCEPT_LOCK(); 380 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 381 ACCEPT_UNLOCK(); 382 error = EWOULDBLOCK; 383 goto noconnection; 384 } 385 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 386 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 387 head->so_error = ECONNABORTED; 388 break; 389 } 390 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 391 "accept", 0); 392 if (error) { 393 ACCEPT_UNLOCK(); 394 goto noconnection; 395 } 396 } 397 if (head->so_error) { 398 error = head->so_error; 399 head->so_error = 0; 400 ACCEPT_UNLOCK(); 401 goto noconnection; 402 } 403 so = TAILQ_FIRST(&head->so_comp); 404 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 405 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 406 407 /* 408 * Before changing the flags on the socket, we have to bump the 409 * reference count. Otherwise, if the protocol calls sofree(), 410 * the socket will be released due to a zero refcount. 411 */ 412 SOCK_LOCK(so); /* soref() and so_state update */ 413 soref(so); /* file descriptor reference */ 414 415 TAILQ_REMOVE(&head->so_comp, so, so_list); 416 head->so_qlen--; 417 so->so_state |= (head->so_state & SS_NBIO); 418 so->so_qstate &= ~SQ_COMP; 419 so->so_head = NULL; 420 421 SOCK_UNLOCK(so); 422 ACCEPT_UNLOCK(); 423 424 /* An extra reference on `nfp' has been held for us by falloc(). */ 425 td->td_retval[0] = fd; 426 427 /* connection has been removed from the listen queue */ 428 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 429 430 pgid = fgetown(&head->so_sigio); 431 if (pgid != 0) 432 fsetown(pgid, &so->so_sigio); 433 434 FILE_LOCK(nfp); 435 nfp->f_data = so; /* nfp has ref count from falloc */ 436 nfp->f_flag = fflag; 437 nfp->f_ops = &socketops; 438 nfp->f_type = DTYPE_SOCKET; 439 FILE_UNLOCK(nfp); 440 /* Sync socket nonblocking/async state with file flags */ 441 tmp = fflag & FNONBLOCK; 442 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 443 tmp = fflag & FASYNC; 444 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 445 sa = 0; 446 error = soaccept(so, &sa); 447 if (error) { 448 /* 449 * return a namelen of zero for older code which might 450 * ignore the return value from accept. 451 */ 452 if (name) 453 *namelen = 0; 454 goto noconnection; 455 } 456 if (sa == NULL) { 457 if (name) 458 *namelen = 0; 459 goto done; 460 } 461 if (name) { 462 /* check sa_len before it is destroyed */ 463 if (*namelen > sa->sa_len) 464 *namelen = sa->sa_len; 465 *name = sa; 466 sa = NULL; 467 } 468 noconnection: 469 if (sa) 470 FREE(sa, M_SONAME); 471 472 /* 473 * close the new descriptor, assuming someone hasn't ripped it 474 * out from under us. 475 */ 476 if (error) 477 fdclose(fdp, nfp, fd, td); 478 479 /* 480 * Release explicitly held references before returning. 481 */ 482 done: 483 if (nfp != NULL) 484 fdrop(nfp, td); 485 fdrop(headfp, td); 486 done2: 487 NET_UNLOCK_GIANT(); 488 return (error); 489 } 490 491 /* 492 * MPSAFE (accept1() is MPSAFE) 493 */ 494 int 495 accept(td, uap) 496 struct thread *td; 497 struct accept_args *uap; 498 { 499 500 return (accept1(td, uap, 0)); 501 } 502 503 #ifdef COMPAT_OLDSOCK 504 /* 505 * MPSAFE (accept1() is MPSAFE) 506 */ 507 int 508 oaccept(td, uap) 509 struct thread *td; 510 struct accept_args *uap; 511 { 512 513 return (accept1(td, uap, 1)); 514 } 515 #endif /* COMPAT_OLDSOCK */ 516 517 /* 518 * MPSAFE 519 */ 520 /* ARGSUSED */ 521 int 522 connect(td, uap) 523 struct thread *td; 524 register struct connect_args /* { 525 int s; 526 caddr_t name; 527 int namelen; 528 } */ *uap; 529 { 530 struct sockaddr *sa; 531 int error; 532 533 error = getsockaddr(&sa, uap->name, uap->namelen); 534 if (error) 535 return (error); 536 537 return (kern_connect(td, uap->s, sa)); 538 } 539 540 541 int 542 kern_connect(td, fd, sa) 543 struct thread *td; 544 int fd; 545 struct sockaddr *sa; 546 { 547 struct socket *so; 548 struct file *fp; 549 int error; 550 int interrupted = 0; 551 552 NET_LOCK_GIANT(); 553 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 554 if (error) 555 goto done2; 556 so = fp->f_data; 557 if (so->so_state & SS_ISCONNECTING) { 558 error = EALREADY; 559 goto done1; 560 } 561 #ifdef MAC 562 SOCK_LOCK(so); 563 error = mac_check_socket_connect(td->td_ucred, so, sa); 564 SOCK_UNLOCK(so); 565 if (error) 566 goto bad; 567 #endif 568 error = soconnect(so, sa, td); 569 if (error) 570 goto bad; 571 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 572 error = EINPROGRESS; 573 goto done1; 574 } 575 SOCK_LOCK(so); 576 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 577 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 578 "connec", 0); 579 if (error) { 580 if (error == EINTR || error == ERESTART) 581 interrupted = 1; 582 break; 583 } 584 } 585 if (error == 0) { 586 error = so->so_error; 587 so->so_error = 0; 588 } 589 SOCK_UNLOCK(so); 590 bad: 591 if (!interrupted) 592 so->so_state &= ~SS_ISCONNECTING; 593 if (error == ERESTART) 594 error = EINTR; 595 done1: 596 fdrop(fp, td); 597 done2: 598 NET_UNLOCK_GIANT(); 599 FREE(sa, M_SONAME); 600 return (error); 601 } 602 603 /* 604 * MPSAFE 605 */ 606 int 607 socketpair(td, uap) 608 struct thread *td; 609 register struct socketpair_args /* { 610 int domain; 611 int type; 612 int protocol; 613 int *rsv; 614 } */ *uap; 615 { 616 register struct filedesc *fdp = td->td_proc->p_fd; 617 struct file *fp1, *fp2; 618 struct socket *so1, *so2; 619 int fd, error, sv[2]; 620 621 #ifdef MAC 622 /* We might want to have a separate check for socket pairs. */ 623 error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type, 624 uap->protocol); 625 if (error) 626 return (error); 627 #endif 628 629 NET_LOCK_GIANT(); 630 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 631 td->td_ucred, td); 632 if (error) 633 goto done2; 634 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 635 td->td_ucred, td); 636 if (error) 637 goto free1; 638 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 639 error = falloc(td, &fp1, &fd); 640 if (error) 641 goto free2; 642 sv[0] = fd; 643 fp1->f_data = so1; /* so1 already has ref count */ 644 error = falloc(td, &fp2, &fd); 645 if (error) 646 goto free3; 647 fp2->f_data = so2; /* so2 already has ref count */ 648 sv[1] = fd; 649 error = soconnect2(so1, so2); 650 if (error) 651 goto free4; 652 if (uap->type == SOCK_DGRAM) { 653 /* 654 * Datagram socket connection is asymmetric. 655 */ 656 error = soconnect2(so2, so1); 657 if (error) 658 goto free4; 659 } 660 FILE_LOCK(fp1); 661 fp1->f_flag = FREAD|FWRITE; 662 fp1->f_ops = &socketops; 663 fp1->f_type = DTYPE_SOCKET; 664 FILE_UNLOCK(fp1); 665 FILE_LOCK(fp2); 666 fp2->f_flag = FREAD|FWRITE; 667 fp2->f_ops = &socketops; 668 fp2->f_type = DTYPE_SOCKET; 669 FILE_UNLOCK(fp2); 670 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 671 fdrop(fp1, td); 672 fdrop(fp2, td); 673 goto done2; 674 free4: 675 fdclose(fdp, fp2, sv[1], td); 676 fdrop(fp2, td); 677 free3: 678 fdclose(fdp, fp1, sv[0], td); 679 fdrop(fp1, td); 680 free2: 681 (void)soclose(so2); 682 free1: 683 (void)soclose(so1); 684 done2: 685 NET_UNLOCK_GIANT(); 686 return (error); 687 } 688 689 static int 690 sendit(td, s, mp, flags) 691 register struct thread *td; 692 int s; 693 register struct msghdr *mp; 694 int flags; 695 { 696 struct mbuf *control; 697 struct sockaddr *to; 698 int error; 699 700 if (mp->msg_name != NULL) { 701 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 702 if (error) { 703 to = NULL; 704 goto bad; 705 } 706 mp->msg_name = to; 707 } else { 708 to = NULL; 709 } 710 711 if (mp->msg_control) { 712 if (mp->msg_controllen < sizeof(struct cmsghdr) 713 #ifdef COMPAT_OLDSOCK 714 && mp->msg_flags != MSG_COMPAT 715 #endif 716 ) { 717 error = EINVAL; 718 goto bad; 719 } 720 error = sockargs(&control, mp->msg_control, 721 mp->msg_controllen, MT_CONTROL); 722 if (error) 723 goto bad; 724 #ifdef COMPAT_OLDSOCK 725 if (mp->msg_flags == MSG_COMPAT) { 726 register struct cmsghdr *cm; 727 728 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 729 if (control == 0) { 730 error = ENOBUFS; 731 goto bad; 732 } else { 733 cm = mtod(control, struct cmsghdr *); 734 cm->cmsg_len = control->m_len; 735 cm->cmsg_level = SOL_SOCKET; 736 cm->cmsg_type = SCM_RIGHTS; 737 } 738 } 739 #endif 740 } else { 741 control = NULL; 742 } 743 744 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 745 746 bad: 747 if (to) 748 FREE(to, M_SONAME); 749 return (error); 750 } 751 752 int 753 kern_sendit(td, s, mp, flags, control, segflg) 754 struct thread *td; 755 int s; 756 struct msghdr *mp; 757 int flags; 758 struct mbuf *control; 759 enum uio_seg segflg; 760 { 761 struct file *fp; 762 struct uio auio; 763 struct iovec *iov; 764 struct socket *so; 765 int i; 766 int len, error; 767 #ifdef KTRACE 768 struct uio *ktruio = NULL; 769 #endif 770 771 NET_LOCK_GIANT(); 772 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 773 if (error) 774 goto bad2; 775 so = (struct socket *)fp->f_data; 776 777 #ifdef MAC 778 SOCK_LOCK(so); 779 error = mac_check_socket_send(td->td_ucred, so); 780 SOCK_UNLOCK(so); 781 if (error) 782 goto bad; 783 #endif 784 785 auio.uio_iov = mp->msg_iov; 786 auio.uio_iovcnt = mp->msg_iovlen; 787 auio.uio_segflg = segflg; 788 auio.uio_rw = UIO_WRITE; 789 auio.uio_td = td; 790 auio.uio_offset = 0; /* XXX */ 791 auio.uio_resid = 0; 792 iov = mp->msg_iov; 793 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 794 if ((auio.uio_resid += iov->iov_len) < 0) { 795 error = EINVAL; 796 goto bad; 797 } 798 } 799 #ifdef KTRACE 800 if (KTRPOINT(td, KTR_GENIO)) 801 ktruio = cloneuio(&auio); 802 #endif 803 len = auio.uio_resid; 804 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 805 0, control, flags, td); 806 if (error) { 807 if (auio.uio_resid != len && (error == ERESTART || 808 error == EINTR || error == EWOULDBLOCK)) 809 error = 0; 810 /* Generation of SIGPIPE can be controlled per socket */ 811 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 812 !(flags & MSG_NOSIGNAL)) { 813 PROC_LOCK(td->td_proc); 814 psignal(td->td_proc, SIGPIPE); 815 PROC_UNLOCK(td->td_proc); 816 } 817 } 818 if (error == 0) 819 td->td_retval[0] = len - auio.uio_resid; 820 #ifdef KTRACE 821 if (ktruio != NULL) { 822 ktruio->uio_resid = td->td_retval[0]; 823 ktrgenio(s, UIO_WRITE, ktruio, error); 824 } 825 #endif 826 bad: 827 fdrop(fp, td); 828 bad2: 829 NET_UNLOCK_GIANT(); 830 return (error); 831 } 832 833 /* 834 * MPSAFE 835 */ 836 int 837 sendto(td, uap) 838 struct thread *td; 839 register struct sendto_args /* { 840 int s; 841 caddr_t buf; 842 size_t len; 843 int flags; 844 caddr_t to; 845 int tolen; 846 } */ *uap; 847 { 848 struct msghdr msg; 849 struct iovec aiov; 850 int error; 851 852 msg.msg_name = uap->to; 853 msg.msg_namelen = uap->tolen; 854 msg.msg_iov = &aiov; 855 msg.msg_iovlen = 1; 856 msg.msg_control = 0; 857 #ifdef COMPAT_OLDSOCK 858 msg.msg_flags = 0; 859 #endif 860 aiov.iov_base = uap->buf; 861 aiov.iov_len = uap->len; 862 error = sendit(td, uap->s, &msg, uap->flags); 863 return (error); 864 } 865 866 #ifdef COMPAT_OLDSOCK 867 /* 868 * MPSAFE 869 */ 870 int 871 osend(td, uap) 872 struct thread *td; 873 register struct osend_args /* { 874 int s; 875 caddr_t buf; 876 int len; 877 int flags; 878 } */ *uap; 879 { 880 struct msghdr msg; 881 struct iovec aiov; 882 int error; 883 884 msg.msg_name = 0; 885 msg.msg_namelen = 0; 886 msg.msg_iov = &aiov; 887 msg.msg_iovlen = 1; 888 aiov.iov_base = uap->buf; 889 aiov.iov_len = uap->len; 890 msg.msg_control = 0; 891 msg.msg_flags = 0; 892 error = sendit(td, uap->s, &msg, uap->flags); 893 return (error); 894 } 895 896 /* 897 * MPSAFE 898 */ 899 int 900 osendmsg(td, uap) 901 struct thread *td; 902 struct osendmsg_args /* { 903 int s; 904 caddr_t msg; 905 int flags; 906 } */ *uap; 907 { 908 struct msghdr msg; 909 struct iovec *iov; 910 int error; 911 912 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 913 if (error) 914 return (error); 915 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 916 if (error) 917 return (error); 918 msg.msg_iov = iov; 919 msg.msg_flags = MSG_COMPAT; 920 error = sendit(td, uap->s, &msg, uap->flags); 921 free(iov, M_IOV); 922 return (error); 923 } 924 #endif 925 926 /* 927 * MPSAFE 928 */ 929 int 930 sendmsg(td, uap) 931 struct thread *td; 932 struct sendmsg_args /* { 933 int s; 934 caddr_t msg; 935 int flags; 936 } */ *uap; 937 { 938 struct msghdr msg; 939 struct iovec *iov; 940 int error; 941 942 error = copyin(uap->msg, &msg, sizeof (msg)); 943 if (error) 944 return (error); 945 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 946 if (error) 947 return (error); 948 msg.msg_iov = iov; 949 #ifdef COMPAT_OLDSOCK 950 msg.msg_flags = 0; 951 #endif 952 error = sendit(td, uap->s, &msg, uap->flags); 953 free(iov, M_IOV); 954 return (error); 955 } 956 957 int 958 kern_recvit(td, s, mp, fromseg, controlp) 959 struct thread *td; 960 int s; 961 struct msghdr *mp; 962 enum uio_seg fromseg; 963 struct mbuf **controlp; 964 { 965 struct uio auio; 966 struct iovec *iov; 967 int i; 968 socklen_t len; 969 int error; 970 struct mbuf *m, *control = 0; 971 caddr_t ctlbuf; 972 struct file *fp; 973 struct socket *so; 974 struct sockaddr *fromsa = 0; 975 #ifdef KTRACE 976 struct uio *ktruio = NULL; 977 #endif 978 979 if(controlp != NULL) 980 *controlp = 0; 981 982 NET_LOCK_GIANT(); 983 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 984 if (error) { 985 NET_UNLOCK_GIANT(); 986 return (error); 987 } 988 so = fp->f_data; 989 990 #ifdef MAC 991 SOCK_LOCK(so); 992 error = mac_check_socket_receive(td->td_ucred, so); 993 SOCK_UNLOCK(so); 994 if (error) { 995 fdrop(fp, td); 996 NET_UNLOCK_GIANT(); 997 return (error); 998 } 999 #endif 1000 1001 auio.uio_iov = mp->msg_iov; 1002 auio.uio_iovcnt = mp->msg_iovlen; 1003 auio.uio_segflg = UIO_USERSPACE; 1004 auio.uio_rw = UIO_READ; 1005 auio.uio_td = td; 1006 auio.uio_offset = 0; /* XXX */ 1007 auio.uio_resid = 0; 1008 iov = mp->msg_iov; 1009 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 1010 if ((auio.uio_resid += iov->iov_len) < 0) { 1011 fdrop(fp, td); 1012 NET_UNLOCK_GIANT(); 1013 return (EINVAL); 1014 } 1015 } 1016 #ifdef KTRACE 1017 if (KTRPOINT(td, KTR_GENIO)) 1018 ktruio = cloneuio(&auio); 1019 #endif 1020 len = auio.uio_resid; 1021 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 1022 (struct mbuf **)0, 1023 (mp->msg_control || controlp) ? &control : (struct mbuf **)0, 1024 &mp->msg_flags); 1025 if (error) { 1026 if (auio.uio_resid != (int)len && (error == ERESTART || 1027 error == EINTR || error == EWOULDBLOCK)) 1028 error = 0; 1029 } 1030 #ifdef KTRACE 1031 if (ktruio != NULL) { 1032 ktruio->uio_resid = (int)len - auio.uio_resid; 1033 ktrgenio(s, UIO_READ, ktruio, error); 1034 } 1035 #endif 1036 if (error) 1037 goto out; 1038 td->td_retval[0] = (int)len - auio.uio_resid; 1039 if (mp->msg_name) { 1040 len = mp->msg_namelen; 1041 if (len <= 0 || fromsa == 0) 1042 len = 0; 1043 else { 1044 /* save sa_len before it is destroyed by MSG_COMPAT */ 1045 len = MIN(len, fromsa->sa_len); 1046 #ifdef COMPAT_OLDSOCK 1047 if (mp->msg_flags & MSG_COMPAT) 1048 ((struct osockaddr *)fromsa)->sa_family = 1049 fromsa->sa_family; 1050 #endif 1051 if (fromseg == UIO_USERSPACE) { 1052 error = copyout(fromsa, mp->msg_name, 1053 (unsigned)len); 1054 if (error) 1055 goto out; 1056 } else 1057 bcopy(fromsa, mp->msg_name, len); 1058 } 1059 mp->msg_namelen = len; 1060 } 1061 if (mp->msg_control && controlp == NULL) { 1062 #ifdef COMPAT_OLDSOCK 1063 /* 1064 * We assume that old recvmsg calls won't receive access 1065 * rights and other control info, esp. as control info 1066 * is always optional and those options didn't exist in 4.3. 1067 * If we receive rights, trim the cmsghdr; anything else 1068 * is tossed. 1069 */ 1070 if (control && mp->msg_flags & MSG_COMPAT) { 1071 if (mtod(control, struct cmsghdr *)->cmsg_level != 1072 SOL_SOCKET || 1073 mtod(control, struct cmsghdr *)->cmsg_type != 1074 SCM_RIGHTS) { 1075 mp->msg_controllen = 0; 1076 goto out; 1077 } 1078 control->m_len -= sizeof (struct cmsghdr); 1079 control->m_data += sizeof (struct cmsghdr); 1080 } 1081 #endif 1082 len = mp->msg_controllen; 1083 m = control; 1084 mp->msg_controllen = 0; 1085 ctlbuf = mp->msg_control; 1086 1087 while (m && len > 0) { 1088 unsigned int tocopy; 1089 1090 if (len >= m->m_len) 1091 tocopy = m->m_len; 1092 else { 1093 mp->msg_flags |= MSG_CTRUNC; 1094 tocopy = len; 1095 } 1096 1097 if ((error = copyout(mtod(m, caddr_t), 1098 ctlbuf, tocopy)) != 0) 1099 goto out; 1100 1101 ctlbuf += tocopy; 1102 len -= tocopy; 1103 m = m->m_next; 1104 } 1105 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1106 } 1107 out: 1108 fdrop(fp, td); 1109 NET_UNLOCK_GIANT(); 1110 if (fromsa) 1111 FREE(fromsa, M_SONAME); 1112 1113 if (error == 0 && controlp != NULL) 1114 *controlp = control; 1115 else if (control) 1116 m_freem(control); 1117 1118 return (error); 1119 } 1120 1121 static int 1122 recvit(td, s, mp, namelenp) 1123 struct thread *td; 1124 int s; 1125 struct msghdr *mp; 1126 void *namelenp; 1127 { 1128 int error; 1129 1130 error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL); 1131 if (error) 1132 return (error); 1133 if (namelenp) { 1134 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t)); 1135 #ifdef COMPAT_OLDSOCK 1136 if (mp->msg_flags & MSG_COMPAT) 1137 error = 0; /* old recvfrom didn't check */ 1138 #endif 1139 } 1140 return (error); 1141 } 1142 1143 /* 1144 * MPSAFE 1145 */ 1146 int 1147 recvfrom(td, uap) 1148 struct thread *td; 1149 register struct recvfrom_args /* { 1150 int s; 1151 caddr_t buf; 1152 size_t len; 1153 int flags; 1154 struct sockaddr * __restrict from; 1155 socklen_t * __restrict fromlenaddr; 1156 } */ *uap; 1157 { 1158 struct msghdr msg; 1159 struct iovec aiov; 1160 int error; 1161 1162 if (uap->fromlenaddr) { 1163 error = copyin(uap->fromlenaddr, 1164 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1165 if (error) 1166 goto done2; 1167 } else { 1168 msg.msg_namelen = 0; 1169 } 1170 msg.msg_name = uap->from; 1171 msg.msg_iov = &aiov; 1172 msg.msg_iovlen = 1; 1173 aiov.iov_base = uap->buf; 1174 aiov.iov_len = uap->len; 1175 msg.msg_control = 0; 1176 msg.msg_flags = uap->flags; 1177 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1178 done2: 1179 return(error); 1180 } 1181 1182 #ifdef COMPAT_OLDSOCK 1183 /* 1184 * MPSAFE 1185 */ 1186 int 1187 orecvfrom(td, uap) 1188 struct thread *td; 1189 struct recvfrom_args *uap; 1190 { 1191 1192 uap->flags |= MSG_COMPAT; 1193 return (recvfrom(td, uap)); 1194 } 1195 #endif 1196 1197 1198 #ifdef COMPAT_OLDSOCK 1199 /* 1200 * MPSAFE 1201 */ 1202 int 1203 orecv(td, uap) 1204 struct thread *td; 1205 register struct orecv_args /* { 1206 int s; 1207 caddr_t buf; 1208 int len; 1209 int flags; 1210 } */ *uap; 1211 { 1212 struct msghdr msg; 1213 struct iovec aiov; 1214 int error; 1215 1216 msg.msg_name = 0; 1217 msg.msg_namelen = 0; 1218 msg.msg_iov = &aiov; 1219 msg.msg_iovlen = 1; 1220 aiov.iov_base = uap->buf; 1221 aiov.iov_len = uap->len; 1222 msg.msg_control = 0; 1223 msg.msg_flags = uap->flags; 1224 error = recvit(td, uap->s, &msg, NULL); 1225 return (error); 1226 } 1227 1228 /* 1229 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1230 * overlays the new one, missing only the flags, and with the (old) access 1231 * rights where the control fields are now. 1232 * 1233 * MPSAFE 1234 */ 1235 int 1236 orecvmsg(td, uap) 1237 struct thread *td; 1238 struct orecvmsg_args /* { 1239 int s; 1240 struct omsghdr *msg; 1241 int flags; 1242 } */ *uap; 1243 { 1244 struct msghdr msg; 1245 struct iovec *iov; 1246 int error; 1247 1248 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1249 if (error) 1250 return (error); 1251 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1252 if (error) 1253 return (error); 1254 msg.msg_flags = uap->flags | MSG_COMPAT; 1255 msg.msg_iov = iov; 1256 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1257 if (msg.msg_controllen && error == 0) 1258 error = copyout(&msg.msg_controllen, 1259 &uap->msg->msg_accrightslen, sizeof (int)); 1260 free(iov, M_IOV); 1261 return (error); 1262 } 1263 #endif 1264 1265 /* 1266 * MPSAFE 1267 */ 1268 int 1269 recvmsg(td, uap) 1270 struct thread *td; 1271 struct recvmsg_args /* { 1272 int s; 1273 struct msghdr *msg; 1274 int flags; 1275 } */ *uap; 1276 { 1277 struct msghdr msg; 1278 struct iovec *uiov, *iov; 1279 int error; 1280 1281 error = copyin(uap->msg, &msg, sizeof (msg)); 1282 if (error) 1283 return (error); 1284 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1285 if (error) 1286 return (error); 1287 msg.msg_flags = uap->flags; 1288 #ifdef COMPAT_OLDSOCK 1289 msg.msg_flags &= ~MSG_COMPAT; 1290 #endif 1291 uiov = msg.msg_iov; 1292 msg.msg_iov = iov; 1293 error = recvit(td, uap->s, &msg, NULL); 1294 if (error == 0) { 1295 msg.msg_iov = uiov; 1296 error = copyout(&msg, uap->msg, sizeof(msg)); 1297 } 1298 free(iov, M_IOV); 1299 return (error); 1300 } 1301 1302 /* 1303 * MPSAFE 1304 */ 1305 /* ARGSUSED */ 1306 int 1307 shutdown(td, uap) 1308 struct thread *td; 1309 register struct shutdown_args /* { 1310 int s; 1311 int how; 1312 } */ *uap; 1313 { 1314 struct socket *so; 1315 struct file *fp; 1316 int error; 1317 1318 NET_LOCK_GIANT(); 1319 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); 1320 if (error == 0) { 1321 so = fp->f_data; 1322 error = soshutdown(so, uap->how); 1323 fdrop(fp, td); 1324 } 1325 NET_UNLOCK_GIANT(); 1326 return (error); 1327 } 1328 1329 /* 1330 * MPSAFE 1331 */ 1332 /* ARGSUSED */ 1333 int 1334 setsockopt(td, uap) 1335 struct thread *td; 1336 register struct setsockopt_args /* { 1337 int s; 1338 int level; 1339 int name; 1340 caddr_t val; 1341 int valsize; 1342 } */ *uap; 1343 { 1344 1345 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1346 uap->val, UIO_USERSPACE, uap->valsize)); 1347 } 1348 1349 int 1350 kern_setsockopt(td, s, level, name, val, valseg, valsize) 1351 struct thread *td; 1352 int s; 1353 int level; 1354 int name; 1355 void *val; 1356 enum uio_seg valseg; 1357 socklen_t valsize; 1358 { 1359 int error; 1360 struct socket *so; 1361 struct file *fp; 1362 struct sockopt sopt; 1363 1364 if (val == NULL && valsize != 0) 1365 return (EFAULT); 1366 if ((int)valsize < 0) 1367 return (EINVAL); 1368 1369 sopt.sopt_dir = SOPT_SET; 1370 sopt.sopt_level = level; 1371 sopt.sopt_name = name; 1372 sopt.sopt_val = val; 1373 sopt.sopt_valsize = valsize; 1374 switch (valseg) { 1375 case UIO_USERSPACE: 1376 sopt.sopt_td = td; 1377 break; 1378 case UIO_SYSSPACE: 1379 sopt.sopt_td = NULL; 1380 break; 1381 default: 1382 panic("kern_setsockopt called with bad valseg"); 1383 } 1384 1385 NET_LOCK_GIANT(); 1386 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 1387 if (error == 0) { 1388 so = fp->f_data; 1389 error = sosetopt(so, &sopt); 1390 fdrop(fp, td); 1391 } 1392 NET_UNLOCK_GIANT(); 1393 return(error); 1394 } 1395 1396 /* 1397 * MPSAFE 1398 */ 1399 /* ARGSUSED */ 1400 int 1401 getsockopt(td, uap) 1402 struct thread *td; 1403 register struct getsockopt_args /* { 1404 int s; 1405 int level; 1406 int name; 1407 void * __restrict val; 1408 socklen_t * __restrict avalsize; 1409 } */ *uap; 1410 { 1411 socklen_t valsize; 1412 int error; 1413 1414 if (uap->val) { 1415 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1416 if (error) 1417 return (error); 1418 } 1419 1420 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1421 uap->val, UIO_USERSPACE, &valsize); 1422 1423 if (error == 0) 1424 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1425 return (error); 1426 } 1427 1428 /* 1429 * Kernel version of getsockopt. 1430 * optval can be a userland or userspace. optlen is always a kernel pointer. 1431 */ 1432 int 1433 kern_getsockopt(td, s, level, name, val, valseg, valsize) 1434 struct thread *td; 1435 int s; 1436 int level; 1437 int name; 1438 void *val; 1439 enum uio_seg valseg; 1440 socklen_t *valsize; 1441 { 1442 int error; 1443 struct socket *so; 1444 struct file *fp; 1445 struct sockopt sopt; 1446 1447 if (val == NULL) 1448 *valsize = 0; 1449 if ((int)*valsize < 0) 1450 return (EINVAL); 1451 1452 sopt.sopt_dir = SOPT_GET; 1453 sopt.sopt_level = level; 1454 sopt.sopt_name = name; 1455 sopt.sopt_val = val; 1456 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1457 switch (valseg) { 1458 case UIO_USERSPACE: 1459 sopt.sopt_td = td; 1460 break; 1461 case UIO_SYSSPACE: 1462 sopt.sopt_td = NULL; 1463 break; 1464 default: 1465 panic("kern_getsockopt called with bad valseg"); 1466 } 1467 1468 NET_LOCK_GIANT(); 1469 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 1470 if (error == 0) { 1471 so = fp->f_data; 1472 error = sogetopt(so, &sopt); 1473 *valsize = sopt.sopt_valsize; 1474 fdrop(fp, td); 1475 } 1476 NET_UNLOCK_GIANT(); 1477 return (error); 1478 } 1479 1480 /* 1481 * getsockname1() - Get socket name. 1482 * 1483 * MPSAFE 1484 */ 1485 /* ARGSUSED */ 1486 static int 1487 getsockname1(td, uap, compat) 1488 struct thread *td; 1489 register struct getsockname_args /* { 1490 int fdes; 1491 struct sockaddr * __restrict asa; 1492 socklen_t * __restrict alen; 1493 } */ *uap; 1494 int compat; 1495 { 1496 struct sockaddr *sa; 1497 socklen_t len; 1498 int error; 1499 1500 error = copyin(uap->alen, &len, sizeof(len)); 1501 if (error) 1502 return (error); 1503 1504 error = kern_getsockname(td, uap->fdes, &sa, &len); 1505 if (error) 1506 return (error); 1507 1508 if (len != 0) { 1509 #ifdef COMPAT_OLDSOCK 1510 if (compat) 1511 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1512 #endif 1513 error = copyout(sa, uap->asa, (u_int)len); 1514 } 1515 free(sa, M_SONAME); 1516 if (error == 0) 1517 error = copyout(&len, uap->alen, sizeof(len)); 1518 return (error); 1519 } 1520 1521 int 1522 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, 1523 socklen_t *alen) 1524 { 1525 struct socket *so; 1526 struct file *fp; 1527 socklen_t len; 1528 int error; 1529 1530 if (*alen < 0) 1531 return (EINVAL); 1532 1533 NET_LOCK_GIANT(); 1534 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 1535 if (error) 1536 goto done; 1537 so = fp->f_data; 1538 *sa = NULL; 1539 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa); 1540 if (error) 1541 goto bad; 1542 if (*sa == NULL) 1543 len = 0; 1544 else 1545 len = MIN(*alen, (*sa)->sa_len); 1546 *alen = len; 1547 bad: 1548 fdrop(fp, td); 1549 if (error && *sa) { 1550 free(*sa, M_SONAME); 1551 *sa = NULL; 1552 } 1553 done: 1554 NET_UNLOCK_GIANT(); 1555 return (error); 1556 } 1557 1558 /* 1559 * MPSAFE 1560 */ 1561 int 1562 getsockname(td, uap) 1563 struct thread *td; 1564 struct getsockname_args *uap; 1565 { 1566 1567 return (getsockname1(td, uap, 0)); 1568 } 1569 1570 #ifdef COMPAT_OLDSOCK 1571 /* 1572 * MPSAFE 1573 */ 1574 int 1575 ogetsockname(td, uap) 1576 struct thread *td; 1577 struct getsockname_args *uap; 1578 { 1579 1580 return (getsockname1(td, uap, 1)); 1581 } 1582 #endif /* COMPAT_OLDSOCK */ 1583 1584 /* 1585 * getpeername1() - Get name of peer for connected socket. 1586 * 1587 * MPSAFE 1588 */ 1589 /* ARGSUSED */ 1590 static int 1591 getpeername1(td, uap, compat) 1592 struct thread *td; 1593 register struct getpeername_args /* { 1594 int fdes; 1595 struct sockaddr * __restrict asa; 1596 socklen_t * __restrict alen; 1597 } */ *uap; 1598 int compat; 1599 { 1600 struct sockaddr *sa; 1601 socklen_t len; 1602 int error; 1603 1604 error = copyin(uap->alen, &len, sizeof (len)); 1605 if (error) 1606 return (error); 1607 1608 error = kern_getpeername(td, uap->fdes, &sa, &len); 1609 if (error) 1610 return (error); 1611 1612 if (len != 0) { 1613 #ifdef COMPAT_OLDSOCK 1614 if (compat) 1615 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1616 #endif 1617 error = copyout(sa, uap->asa, (u_int)len); 1618 } 1619 free(sa, M_SONAME); 1620 if (error == 0) 1621 error = copyout(&len, uap->alen, sizeof(len)); 1622 return (error); 1623 } 1624 1625 int 1626 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, 1627 socklen_t *alen) 1628 { 1629 struct socket *so; 1630 struct file *fp; 1631 socklen_t len; 1632 int error; 1633 1634 if (*alen < 0) 1635 return (EINVAL); 1636 1637 NET_LOCK_GIANT(); 1638 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 1639 if (error) 1640 goto done2; 1641 so = fp->f_data; 1642 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1643 error = ENOTCONN; 1644 goto done1; 1645 } 1646 *sa = NULL; 1647 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa); 1648 if (error) 1649 goto bad; 1650 if (*sa == NULL) 1651 len = 0; 1652 else 1653 len = MIN(*alen, (*sa)->sa_len); 1654 *alen = len; 1655 bad: 1656 if (error && *sa) { 1657 free(*sa, M_SONAME); 1658 *sa = NULL; 1659 } 1660 done1: 1661 fdrop(fp, td); 1662 done2: 1663 NET_UNLOCK_GIANT(); 1664 return (error); 1665 } 1666 1667 /* 1668 * MPSAFE 1669 */ 1670 int 1671 getpeername(td, uap) 1672 struct thread *td; 1673 struct getpeername_args *uap; 1674 { 1675 1676 return (getpeername1(td, uap, 0)); 1677 } 1678 1679 #ifdef COMPAT_OLDSOCK 1680 /* 1681 * MPSAFE 1682 */ 1683 int 1684 ogetpeername(td, uap) 1685 struct thread *td; 1686 struct ogetpeername_args *uap; 1687 { 1688 1689 /* XXX uap should have type `getpeername_args *' to begin with. */ 1690 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1691 } 1692 #endif /* COMPAT_OLDSOCK */ 1693 1694 int 1695 sockargs(mp, buf, buflen, type) 1696 struct mbuf **mp; 1697 caddr_t buf; 1698 int buflen, type; 1699 { 1700 register struct sockaddr *sa; 1701 register struct mbuf *m; 1702 int error; 1703 1704 if ((u_int)buflen > MLEN) { 1705 #ifdef COMPAT_OLDSOCK 1706 if (type == MT_SONAME && (u_int)buflen <= 112) 1707 buflen = MLEN; /* unix domain compat. hack */ 1708 else 1709 #endif 1710 if ((u_int)buflen > MCLBYTES) 1711 return (EINVAL); 1712 } 1713 m = m_get(M_TRYWAIT, type); 1714 if (m == NULL) 1715 return (ENOBUFS); 1716 if ((u_int)buflen > MLEN) { 1717 MCLGET(m, M_TRYWAIT); 1718 if ((m->m_flags & M_EXT) == 0) { 1719 m_free(m); 1720 return (ENOBUFS); 1721 } 1722 } 1723 m->m_len = buflen; 1724 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1725 if (error) 1726 (void) m_free(m); 1727 else { 1728 *mp = m; 1729 if (type == MT_SONAME) { 1730 sa = mtod(m, struct sockaddr *); 1731 1732 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1733 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1734 sa->sa_family = sa->sa_len; 1735 #endif 1736 sa->sa_len = buflen; 1737 } 1738 } 1739 return (error); 1740 } 1741 1742 int 1743 getsockaddr(namp, uaddr, len) 1744 struct sockaddr **namp; 1745 caddr_t uaddr; 1746 size_t len; 1747 { 1748 struct sockaddr *sa; 1749 int error; 1750 1751 if (len > SOCK_MAXADDRLEN) 1752 return (ENAMETOOLONG); 1753 if (len < offsetof(struct sockaddr, sa_data[0])) 1754 return (EINVAL); 1755 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1756 error = copyin(uaddr, sa, len); 1757 if (error) { 1758 FREE(sa, M_SONAME); 1759 } else { 1760 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1761 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1762 sa->sa_family = sa->sa_len; 1763 #endif 1764 sa->sa_len = len; 1765 *namp = sa; 1766 } 1767 return (error); 1768 } 1769 1770 /* 1771 * Detach mapped page and release resources back to the system. 1772 */ 1773 void 1774 sf_buf_mext(void *addr, void *args) 1775 { 1776 vm_page_t m; 1777 1778 m = sf_buf_page(args); 1779 sf_buf_free(args); 1780 vm_page_lock_queues(); 1781 vm_page_unwire(m, 0); 1782 /* 1783 * Check for the object going away on us. This can 1784 * happen since we don't hold a reference to it. 1785 * If so, we're responsible for freeing the page. 1786 */ 1787 if (m->wire_count == 0 && m->object == NULL) 1788 vm_page_free(m); 1789 vm_page_unlock_queues(); 1790 } 1791 1792 /* 1793 * sendfile(2) 1794 * 1795 * MPSAFE 1796 * 1797 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1798 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1799 * 1800 * Send a file specified by 'fd' and starting at 'offset' to a socket 1801 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1802 * nbytes == 0. Optionally add a header and/or trailer to the socket 1803 * output. If specified, write the total number of bytes sent into *sbytes. 1804 * 1805 */ 1806 int 1807 sendfile(struct thread *td, struct sendfile_args *uap) 1808 { 1809 1810 return (do_sendfile(td, uap, 0)); 1811 } 1812 1813 static int 1814 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1815 { 1816 struct sf_hdtr hdtr; 1817 struct uio *hdr_uio, *trl_uio; 1818 int error; 1819 1820 hdr_uio = trl_uio = NULL; 1821 1822 if (uap->hdtr != NULL) { 1823 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1824 if (error) 1825 goto out; 1826 if (hdtr.headers != NULL) { 1827 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1828 if (error) 1829 goto out; 1830 } 1831 if (hdtr.trailers != NULL) { 1832 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); 1833 if (error) 1834 goto out; 1835 1836 } 1837 } 1838 1839 error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat); 1840 out: 1841 if (hdr_uio) 1842 free(hdr_uio, M_IOV); 1843 if (trl_uio) 1844 free(trl_uio, M_IOV); 1845 return (error); 1846 } 1847 1848 #ifdef COMPAT_FREEBSD4 1849 int 1850 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1851 { 1852 struct sendfile_args args; 1853 1854 args.fd = uap->fd; 1855 args.s = uap->s; 1856 args.offset = uap->offset; 1857 args.nbytes = uap->nbytes; 1858 args.hdtr = uap->hdtr; 1859 args.sbytes = uap->sbytes; 1860 args.flags = uap->flags; 1861 1862 return (do_sendfile(td, &args, 1)); 1863 } 1864 #endif /* COMPAT_FREEBSD4 */ 1865 1866 int 1867 kern_sendfile(struct thread *td, struct sendfile_args *uap, 1868 struct uio *hdr_uio, struct uio *trl_uio, int compat) 1869 { 1870 struct file *sock_fp; 1871 struct vnode *vp; 1872 struct vm_object *obj = NULL; 1873 struct socket *so = NULL; 1874 struct mbuf *m, *m_header = NULL; 1875 struct sf_buf *sf; 1876 struct vm_page *pg; 1877 off_t off, xfsize, hdtr_size, sbytes = 0; 1878 int error, headersize = 0, headersent = 0; 1879 int vfslocked; 1880 1881 NET_LOCK_GIANT(); 1882 1883 hdtr_size = 0; 1884 1885 /* 1886 * The descriptor must be a regular file and have a backing VM object. 1887 */ 1888 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1889 goto done; 1890 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1891 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1892 obj = vp->v_object; 1893 if (obj != NULL) { 1894 /* 1895 * Temporarily increase the backing VM object's reference 1896 * count so that a forced reclamation of its vnode does not 1897 * immediately destroy it. 1898 */ 1899 VM_OBJECT_LOCK(obj); 1900 if ((obj->flags & OBJ_DEAD) == 0) { 1901 vm_object_reference_locked(obj); 1902 VM_OBJECT_UNLOCK(obj); 1903 } else { 1904 VM_OBJECT_UNLOCK(obj); 1905 obj = NULL; 1906 } 1907 } 1908 VOP_UNLOCK(vp, 0, td); 1909 VFS_UNLOCK_GIANT(vfslocked); 1910 if (obj == NULL) { 1911 error = EINVAL; 1912 goto done; 1913 } 1914 if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp, NULL)) != 0) 1915 goto done; 1916 so = sock_fp->f_data; 1917 if (so->so_type != SOCK_STREAM) { 1918 error = EINVAL; 1919 goto done; 1920 } 1921 if ((so->so_state & SS_ISCONNECTED) == 0) { 1922 error = ENOTCONN; 1923 goto done; 1924 } 1925 if (uap->offset < 0) { 1926 error = EINVAL; 1927 goto done; 1928 } 1929 1930 #ifdef MAC 1931 SOCK_LOCK(so); 1932 error = mac_check_socket_send(td->td_ucred, so); 1933 SOCK_UNLOCK(so); 1934 if (error) 1935 goto done; 1936 #endif 1937 1938 /* 1939 * If specified, get the pointer to the sf_hdtr struct for 1940 * any headers/trailers. 1941 */ 1942 if (hdr_uio != NULL) { 1943 hdr_uio->uio_td = td; 1944 hdr_uio->uio_rw = UIO_WRITE; 1945 if (hdr_uio->uio_resid > 0) { 1946 m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0, 0); 1947 if (m_header == NULL) 1948 goto done; 1949 headersize = m_header->m_pkthdr.len; 1950 if (compat) 1951 sbytes += headersize; 1952 } 1953 } 1954 1955 /* 1956 * Protect against multiple writers to the socket. 1957 */ 1958 SOCKBUF_LOCK(&so->so_snd); 1959 (void) sblock(&so->so_snd, M_WAITOK); 1960 SOCKBUF_UNLOCK(&so->so_snd); 1961 1962 /* 1963 * Loop through the pages in the file, starting with the requested 1964 * offset. Get a file page (do I/O if necessary), map the file page 1965 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1966 * it on the socket. 1967 */ 1968 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1969 vm_pindex_t pindex; 1970 vm_offset_t pgoff; 1971 1972 pindex = OFF_TO_IDX(off); 1973 VM_OBJECT_LOCK(obj); 1974 retry_lookup: 1975 /* 1976 * Calculate the amount to transfer. Not to exceed a page, 1977 * the EOF, or the passed in nbytes. 1978 */ 1979 xfsize = obj->un_pager.vnp.vnp_size - off; 1980 VM_OBJECT_UNLOCK(obj); 1981 if (xfsize > PAGE_SIZE) 1982 xfsize = PAGE_SIZE; 1983 pgoff = (vm_offset_t)(off & PAGE_MASK); 1984 if (PAGE_SIZE - pgoff < xfsize) 1985 xfsize = PAGE_SIZE - pgoff; 1986 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1987 xfsize = uap->nbytes - sbytes; 1988 if (xfsize <= 0) { 1989 if (m_header != NULL) { 1990 m = m_header; 1991 m_header = NULL; 1992 SOCKBUF_LOCK(&so->so_snd); 1993 goto retry_space; 1994 } else 1995 break; 1996 } 1997 /* 1998 * Optimize the non-blocking case by looking at the socket space 1999 * before going to the extra work of constituting the sf_buf. 2000 */ 2001 SOCKBUF_LOCK(&so->so_snd); 2002 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 2003 if (so->so_snd.sb_state & SBS_CANTSENDMORE) 2004 error = EPIPE; 2005 else 2006 error = EAGAIN; 2007 sbunlock(&so->so_snd); 2008 SOCKBUF_UNLOCK(&so->so_snd); 2009 goto done; 2010 } 2011 SOCKBUF_UNLOCK(&so->so_snd); 2012 VM_OBJECT_LOCK(obj); 2013 /* 2014 * Attempt to look up the page. 2015 * 2016 * Allocate if not found 2017 * 2018 * Wait and loop if busy. 2019 */ 2020 pg = vm_page_lookup(obj, pindex); 2021 2022 if (pg == NULL) { 2023 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY | 2024 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 2025 if (pg == NULL) { 2026 VM_OBJECT_UNLOCK(obj); 2027 VM_WAIT; 2028 VM_OBJECT_LOCK(obj); 2029 goto retry_lookup; 2030 } 2031 vm_page_lock_queues(); 2032 } else { 2033 vm_page_lock_queues(); 2034 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 2035 goto retry_lookup; 2036 /* 2037 * Wire the page so it does not get ripped out from 2038 * under us. 2039 */ 2040 vm_page_wire(pg); 2041 } 2042 2043 /* 2044 * If page is not valid for what we need, initiate I/O 2045 */ 2046 2047 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) { 2048 VM_OBJECT_UNLOCK(obj); 2049 } else if (uap->flags & SF_NODISKIO) { 2050 error = EBUSY; 2051 } else { 2052 int bsize, resid; 2053 2054 /* 2055 * Ensure that our page is still around when the I/O 2056 * completes. 2057 */ 2058 vm_page_io_start(pg); 2059 vm_page_unlock_queues(); 2060 VM_OBJECT_UNLOCK(obj); 2061 2062 /* 2063 * Get the page from backing store. 2064 */ 2065 bsize = vp->v_mount->mnt_stat.f_iosize; 2066 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2067 vn_lock(vp, LK_SHARED | LK_RETRY, td); 2068 /* 2069 * XXXMAC: Because we don't have fp->f_cred here, 2070 * we pass in NOCRED. This is probably wrong, but 2071 * is consistent with our original implementation. 2072 */ 2073 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 2074 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 2075 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 2076 td->td_ucred, NOCRED, &resid, td); 2077 VOP_UNLOCK(vp, 0, td); 2078 VFS_UNLOCK_GIANT(vfslocked); 2079 VM_OBJECT_LOCK(obj); 2080 vm_page_lock_queues(); 2081 vm_page_io_finish(pg); 2082 if (!error) 2083 VM_OBJECT_UNLOCK(obj); 2084 mbstat.sf_iocnt++; 2085 } 2086 2087 if (error) { 2088 vm_page_unwire(pg, 0); 2089 /* 2090 * See if anyone else might know about this page. 2091 * If not and it is not valid, then free it. 2092 */ 2093 if (pg->wire_count == 0 && pg->valid == 0 && 2094 pg->busy == 0 && !(pg->flags & PG_BUSY) && 2095 pg->hold_count == 0) { 2096 vm_page_free(pg); 2097 } 2098 vm_page_unlock_queues(); 2099 VM_OBJECT_UNLOCK(obj); 2100 SOCKBUF_LOCK(&so->so_snd); 2101 sbunlock(&so->so_snd); 2102 SOCKBUF_UNLOCK(&so->so_snd); 2103 goto done; 2104 } 2105 vm_page_unlock_queues(); 2106 2107 /* 2108 * Get a sendfile buf. We usually wait as long as necessary, 2109 * but this wait can be interrupted. 2110 */ 2111 if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) { 2112 mbstat.sf_allocfail++; 2113 vm_page_lock_queues(); 2114 vm_page_unwire(pg, 0); 2115 if (pg->wire_count == 0 && pg->object == NULL) 2116 vm_page_free(pg); 2117 vm_page_unlock_queues(); 2118 SOCKBUF_LOCK(&so->so_snd); 2119 sbunlock(&so->so_snd); 2120 SOCKBUF_UNLOCK(&so->so_snd); 2121 error = EINTR; 2122 goto done; 2123 } 2124 2125 /* 2126 * Get an mbuf header and set it up as having external storage. 2127 */ 2128 if (m_header) 2129 MGET(m, M_TRYWAIT, MT_DATA); 2130 else 2131 MGETHDR(m, M_TRYWAIT, MT_DATA); 2132 if (m == NULL) { 2133 error = ENOBUFS; 2134 sf_buf_mext((void *)sf_buf_kva(sf), sf); 2135 SOCKBUF_LOCK(&so->so_snd); 2136 sbunlock(&so->so_snd); 2137 SOCKBUF_UNLOCK(&so->so_snd); 2138 goto done; 2139 } 2140 /* 2141 * Setup external storage for mbuf. 2142 */ 2143 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY, 2144 EXT_SFBUF); 2145 m->m_data = (char *)sf_buf_kva(sf) + pgoff; 2146 m->m_pkthdr.len = m->m_len = xfsize; 2147 2148 if (m_header) { 2149 m_cat(m_header, m); 2150 m = m_header; 2151 m_header = NULL; 2152 m_fixhdr(m); 2153 } 2154 2155 /* 2156 * Add the buffer to the socket buffer chain. 2157 */ 2158 SOCKBUF_LOCK(&so->so_snd); 2159 retry_space: 2160 /* 2161 * Make sure that the socket is still able to take more data. 2162 * CANTSENDMORE being true usually means that the connection 2163 * was closed. so_error is true when an error was sensed after 2164 * a previous send. 2165 * The state is checked after the page mapping and buffer 2166 * allocation above since those operations may block and make 2167 * any socket checks stale. From this point forward, nothing 2168 * blocks before the pru_send (or more accurately, any blocking 2169 * results in a loop back to here to re-check). 2170 */ 2171 SOCKBUF_LOCK_ASSERT(&so->so_snd); 2172 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) { 2173 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2174 error = EPIPE; 2175 } else { 2176 error = so->so_error; 2177 so->so_error = 0; 2178 } 2179 m_freem(m); 2180 sbunlock(&so->so_snd); 2181 SOCKBUF_UNLOCK(&so->so_snd); 2182 goto done; 2183 } 2184 /* 2185 * Wait for socket space to become available. We do this just 2186 * after checking the connection state above in order to avoid 2187 * a race condition with sbwait(). 2188 */ 2189 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 2190 if (so->so_state & SS_NBIO) { 2191 m_freem(m); 2192 sbunlock(&so->so_snd); 2193 SOCKBUF_UNLOCK(&so->so_snd); 2194 error = EAGAIN; 2195 goto done; 2196 } 2197 error = sbwait(&so->so_snd); 2198 /* 2199 * An error from sbwait usually indicates that we've 2200 * been interrupted by a signal. If we've sent anything 2201 * then return bytes sent, otherwise return the error. 2202 */ 2203 if (error) { 2204 m_freem(m); 2205 sbunlock(&so->so_snd); 2206 SOCKBUF_UNLOCK(&so->so_snd); 2207 goto done; 2208 } 2209 goto retry_space; 2210 } 2211 SOCKBUF_UNLOCK(&so->so_snd); 2212 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2213 if (error) { 2214 SOCKBUF_LOCK(&so->so_snd); 2215 sbunlock(&so->so_snd); 2216 SOCKBUF_UNLOCK(&so->so_snd); 2217 goto done; 2218 } 2219 headersent = 1; 2220 } 2221 SOCKBUF_LOCK(&so->so_snd); 2222 sbunlock(&so->so_snd); 2223 SOCKBUF_UNLOCK(&so->so_snd); 2224 2225 /* 2226 * Send trailers. Wimp out and use writev(2). 2227 */ 2228 if (trl_uio != NULL) { 2229 error = kern_writev(td, uap->s, trl_uio); 2230 if (error) 2231 goto done; 2232 if (compat) 2233 sbytes += td->td_retval[0]; 2234 else 2235 hdtr_size += td->td_retval[0]; 2236 } 2237 2238 done: 2239 if (headersent) { 2240 if (!compat) 2241 hdtr_size += headersize; 2242 } else { 2243 if (compat) 2244 sbytes -= headersize; 2245 } 2246 /* 2247 * If there was no error we have to clear td->td_retval[0] 2248 * because it may have been set by writev. 2249 */ 2250 if (error == 0) { 2251 td->td_retval[0] = 0; 2252 } 2253 if (uap->sbytes != NULL) { 2254 if (!compat) 2255 sbytes += hdtr_size; 2256 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2257 } 2258 if (obj != NULL) 2259 vm_object_deallocate(obj); 2260 if (vp != NULL) { 2261 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2262 vrele(vp); 2263 VFS_UNLOCK_GIANT(vfslocked); 2264 } 2265 if (so) 2266 fdrop(sock_fp, td); 2267 if (m_header) 2268 m_freem(m_header); 2269 2270 NET_UNLOCK_GIANT(); 2271 2272 if (error == ERESTART) 2273 error = EINTR; 2274 2275 return (error); 2276 } 2277