1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_compat.h" 39 #include "opt_ktrace.h" 40 #include "opt_mac.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/mac.h> 47 #include <sys/mutex.h> 48 #include <sys/sysproto.h> 49 #include <sys/malloc.h> 50 #include <sys/filedesc.h> 51 #include <sys/event.h> 52 #include <sys/proc.h> 53 #include <sys/fcntl.h> 54 #include <sys/file.h> 55 #include <sys/filio.h> 56 #include <sys/mount.h> 57 #include <sys/mbuf.h> 58 #include <sys/protosw.h> 59 #include <sys/sf_buf.h> 60 #include <sys/socket.h> 61 #include <sys/socketvar.h> 62 #include <sys/signalvar.h> 63 #include <sys/syscallsubr.h> 64 #include <sys/sysctl.h> 65 #include <sys/uio.h> 66 #include <sys/vnode.h> 67 #ifdef KTRACE 68 #include <sys/ktrace.h> 69 #endif 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 #include <vm/vm_pageout.h> 75 #include <vm/vm_kern.h> 76 #include <vm/vm_extern.h> 77 78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 80 81 static int accept1(struct thread *td, struct accept_args *uap, int compat); 82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 83 static int getsockname1(struct thread *td, struct getsockname_args *uap, 84 int compat); 85 static int getpeername1(struct thread *td, struct getpeername_args *uap, 86 int compat); 87 88 /* 89 * NSFBUFS-related variables and associated sysctls 90 */ 91 int nsfbufs; 92 int nsfbufspeak; 93 int nsfbufsused; 94 95 SYSCTL_DECL(_kern_ipc); 96 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 97 "Maximum number of sendfile(2) sf_bufs available"); 98 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 99 "Number of sendfile(2) sf_bufs at peak usage"); 100 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 101 "Number of sendfile(2) sf_bufs in use"); 102 103 /* 104 * Convert a user file descriptor to a kernel file entry. A reference on the 105 * file entry is held upon returning. This is lighter weight than 106 * fgetsock(), which bumps the socket reference drops the file reference 107 * count instead, as this approach avoids several additional mutex operations 108 * associated with the additional reference count. 109 */ 110 static int 111 getsock(struct filedesc *fdp, int fd, struct file **fpp) 112 { 113 struct file *fp; 114 int error; 115 116 fp = NULL; 117 if (fdp == NULL) 118 error = EBADF; 119 else { 120 FILEDESC_LOCK_FAST(fdp); 121 fp = fget_locked(fdp, fd); 122 if (fp == NULL) 123 error = EBADF; 124 else if (fp->f_type != DTYPE_SOCKET) { 125 fp = NULL; 126 error = ENOTSOCK; 127 } else { 128 fhold(fp); 129 error = 0; 130 } 131 FILEDESC_UNLOCK_FAST(fdp); 132 } 133 *fpp = fp; 134 return (error); 135 } 136 137 /* 138 * System call interface to the socket abstraction. 139 */ 140 #if defined(COMPAT_43) 141 #define COMPAT_OLDSOCK 142 #endif 143 144 /* 145 * MPSAFE 146 */ 147 int 148 socket(td, uap) 149 struct thread *td; 150 register struct socket_args /* { 151 int domain; 152 int type; 153 int protocol; 154 } */ *uap; 155 { 156 struct filedesc *fdp; 157 struct socket *so; 158 struct file *fp; 159 int fd, error; 160 161 #ifdef MAC 162 error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type, 163 uap->protocol); 164 if (error) 165 return (error); 166 #endif 167 fdp = td->td_proc->p_fd; 168 error = falloc(td, &fp, &fd); 169 if (error) 170 return (error); 171 /* An extra reference on `fp' has been held for us by falloc(). */ 172 NET_LOCK_GIANT(); 173 error = socreate(uap->domain, &so, uap->type, uap->protocol, 174 td->td_ucred, td); 175 NET_UNLOCK_GIANT(); 176 if (error) { 177 fdclose(fdp, fp, fd, td); 178 } else { 179 FILEDESC_LOCK_FAST(fdp); 180 fp->f_data = so; /* already has ref count */ 181 fp->f_flag = FREAD|FWRITE; 182 fp->f_ops = &socketops; 183 fp->f_type = DTYPE_SOCKET; 184 FILEDESC_UNLOCK_FAST(fdp); 185 td->td_retval[0] = fd; 186 } 187 fdrop(fp, td); 188 return (error); 189 } 190 191 /* 192 * MPSAFE 193 */ 194 /* ARGSUSED */ 195 int 196 bind(td, uap) 197 struct thread *td; 198 register struct bind_args /* { 199 int s; 200 caddr_t name; 201 int namelen; 202 } */ *uap; 203 { 204 struct sockaddr *sa; 205 int error; 206 207 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 208 return (error); 209 210 return (kern_bind(td, uap->s, sa)); 211 } 212 213 int 214 kern_bind(td, fd, sa) 215 struct thread *td; 216 int fd; 217 struct sockaddr *sa; 218 { 219 struct socket *so; 220 struct file *fp; 221 int error; 222 223 NET_LOCK_GIANT(); 224 error = getsock(td->td_proc->p_fd, fd, &fp); 225 if (error) 226 goto done2; 227 so = fp->f_data; 228 #ifdef MAC 229 SOCK_LOCK(so); 230 error = mac_check_socket_bind(td->td_ucred, so, sa); 231 SOCK_UNLOCK(so); 232 if (error) 233 goto done1; 234 #endif 235 error = sobind(so, sa, td); 236 #ifdef MAC 237 done1: 238 #endif 239 fdrop(fp, td); 240 done2: 241 NET_UNLOCK_GIANT(); 242 FREE(sa, M_SONAME); 243 return (error); 244 } 245 246 /* 247 * MPSAFE 248 */ 249 /* ARGSUSED */ 250 int 251 listen(td, uap) 252 struct thread *td; 253 register struct listen_args /* { 254 int s; 255 int backlog; 256 } */ *uap; 257 { 258 struct socket *so; 259 struct file *fp; 260 int error; 261 262 NET_LOCK_GIANT(); 263 error = getsock(td->td_proc->p_fd, uap->s, &fp); 264 if (error == 0) { 265 so = fp->f_data; 266 #ifdef MAC 267 SOCK_LOCK(so); 268 error = mac_check_socket_listen(td->td_ucred, so); 269 SOCK_UNLOCK(so); 270 if (error) 271 goto done; 272 #endif 273 error = solisten(so, uap->backlog, td); 274 #ifdef MAC 275 done: 276 #endif 277 fdrop(fp, td); 278 } 279 NET_UNLOCK_GIANT(); 280 return(error); 281 } 282 283 /* 284 * accept1() 285 * MPSAFE 286 */ 287 static int 288 accept1(td, uap, compat) 289 struct thread *td; 290 register struct accept_args /* { 291 int s; 292 struct sockaddr * __restrict name; 293 socklen_t * __restrict anamelen; 294 } */ *uap; 295 int compat; 296 { 297 struct filedesc *fdp; 298 struct file *nfp = NULL; 299 struct sockaddr *sa = NULL; 300 socklen_t namelen; 301 int error; 302 struct socket *head, *so; 303 int fd; 304 u_int fflag; 305 pid_t pgid; 306 int tmp; 307 308 fdp = td->td_proc->p_fd; 309 if (uap->name) { 310 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 311 if(error) 312 return (error); 313 if (namelen < 0) 314 return (EINVAL); 315 } 316 NET_LOCK_GIANT(); 317 error = fgetsock(td, uap->s, &head, &fflag); 318 if (error) 319 goto done2; 320 if ((head->so_options & SO_ACCEPTCONN) == 0) { 321 error = EINVAL; 322 goto done; 323 } 324 #ifdef MAC 325 SOCK_LOCK(head); 326 error = mac_check_socket_accept(td->td_ucred, head); 327 SOCK_UNLOCK(head); 328 if (error != 0) 329 goto done; 330 #endif 331 error = falloc(td, &nfp, &fd); 332 if (error) 333 goto done; 334 ACCEPT_LOCK(); 335 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 336 ACCEPT_UNLOCK(); 337 error = EWOULDBLOCK; 338 goto noconnection; 339 } 340 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 341 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 342 head->so_error = ECONNABORTED; 343 break; 344 } 345 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 346 "accept", 0); 347 if (error) { 348 ACCEPT_UNLOCK(); 349 goto noconnection; 350 } 351 } 352 if (head->so_error) { 353 error = head->so_error; 354 head->so_error = 0; 355 ACCEPT_UNLOCK(); 356 goto noconnection; 357 } 358 so = TAILQ_FIRST(&head->so_comp); 359 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 360 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 361 362 /* 363 * Before changing the flags on the socket, we have to bump the 364 * reference count. Otherwise, if the protocol calls sofree(), 365 * the socket will be released due to a zero refcount. 366 */ 367 SOCK_LOCK(so); /* soref() and so_state update */ 368 soref(so); /* file descriptor reference */ 369 370 TAILQ_REMOVE(&head->so_comp, so, so_list); 371 head->so_qlen--; 372 so->so_state |= (head->so_state & SS_NBIO); 373 so->so_qstate &= ~SQ_COMP; 374 so->so_head = NULL; 375 376 SOCK_UNLOCK(so); 377 ACCEPT_UNLOCK(); 378 379 /* An extra reference on `nfp' has been held for us by falloc(). */ 380 td->td_retval[0] = fd; 381 382 /* connection has been removed from the listen queue */ 383 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 384 385 pgid = fgetown(&head->so_sigio); 386 if (pgid != 0) 387 fsetown(pgid, &so->so_sigio); 388 389 FILE_LOCK(nfp); 390 nfp->f_data = so; /* nfp has ref count from falloc */ 391 nfp->f_flag = fflag; 392 nfp->f_ops = &socketops; 393 nfp->f_type = DTYPE_SOCKET; 394 FILE_UNLOCK(nfp); 395 /* Sync socket nonblocking/async state with file flags */ 396 tmp = fflag & FNONBLOCK; 397 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 398 tmp = fflag & FASYNC; 399 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 400 sa = 0; 401 error = soaccept(so, &sa); 402 if (error) { 403 /* 404 * return a namelen of zero for older code which might 405 * ignore the return value from accept. 406 */ 407 if (uap->name != NULL) { 408 namelen = 0; 409 (void) copyout(&namelen, 410 uap->anamelen, sizeof(*uap->anamelen)); 411 } 412 goto noconnection; 413 } 414 if (sa == NULL) { 415 namelen = 0; 416 if (uap->name) 417 goto gotnoname; 418 error = 0; 419 goto done; 420 } 421 if (uap->name) { 422 /* check sa_len before it is destroyed */ 423 if (namelen > sa->sa_len) 424 namelen = sa->sa_len; 425 #ifdef COMPAT_OLDSOCK 426 if (compat) 427 ((struct osockaddr *)sa)->sa_family = 428 sa->sa_family; 429 #endif 430 error = copyout(sa, uap->name, (u_int)namelen); 431 if (!error) 432 gotnoname: 433 error = copyout(&namelen, 434 uap->anamelen, sizeof (*uap->anamelen)); 435 } 436 noconnection: 437 if (sa) 438 FREE(sa, M_SONAME); 439 440 /* 441 * close the new descriptor, assuming someone hasn't ripped it 442 * out from under us. 443 */ 444 if (error) 445 fdclose(fdp, nfp, fd, td); 446 447 /* 448 * Release explicitly held references before returning. 449 */ 450 done: 451 if (nfp != NULL) 452 fdrop(nfp, td); 453 fputsock(head); 454 done2: 455 NET_UNLOCK_GIANT(); 456 return (error); 457 } 458 459 /* 460 * MPSAFE (accept1() is MPSAFE) 461 */ 462 int 463 accept(td, uap) 464 struct thread *td; 465 struct accept_args *uap; 466 { 467 468 return (accept1(td, uap, 0)); 469 } 470 471 #ifdef COMPAT_OLDSOCK 472 /* 473 * MPSAFE (accept1() is MPSAFE) 474 */ 475 int 476 oaccept(td, uap) 477 struct thread *td; 478 struct accept_args *uap; 479 { 480 481 return (accept1(td, uap, 1)); 482 } 483 #endif /* COMPAT_OLDSOCK */ 484 485 /* 486 * MPSAFE 487 */ 488 /* ARGSUSED */ 489 int 490 connect(td, uap) 491 struct thread *td; 492 register struct connect_args /* { 493 int s; 494 caddr_t name; 495 int namelen; 496 } */ *uap; 497 { 498 struct sockaddr *sa; 499 int error; 500 501 error = getsockaddr(&sa, uap->name, uap->namelen); 502 if (error) 503 return (error); 504 505 return (kern_connect(td, uap->s, sa)); 506 } 507 508 509 int 510 kern_connect(td, fd, sa) 511 struct thread *td; 512 int fd; 513 struct sockaddr *sa; 514 { 515 struct socket *so; 516 struct file *fp; 517 int error; 518 int interrupted = 0; 519 520 NET_LOCK_GIANT(); 521 error = getsock(td->td_proc->p_fd, fd, &fp); 522 if (error) 523 goto done2; 524 so = fp->f_data; 525 if (so->so_state & SS_ISCONNECTING) { 526 error = EALREADY; 527 goto done1; 528 } 529 #ifdef MAC 530 SOCK_LOCK(so); 531 error = mac_check_socket_connect(td->td_ucred, so, sa); 532 SOCK_UNLOCK(so); 533 if (error) 534 goto bad; 535 #endif 536 error = soconnect(so, sa, td); 537 if (error) 538 goto bad; 539 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 540 error = EINPROGRESS; 541 goto done1; 542 } 543 SOCK_LOCK(so); 544 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 545 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 546 "connec", 0); 547 if (error) { 548 if (error == EINTR || error == ERESTART) 549 interrupted = 1; 550 break; 551 } 552 } 553 if (error == 0) { 554 error = so->so_error; 555 so->so_error = 0; 556 } 557 SOCK_UNLOCK(so); 558 bad: 559 if (!interrupted) 560 so->so_state &= ~SS_ISCONNECTING; 561 if (error == ERESTART) 562 error = EINTR; 563 done1: 564 fdrop(fp, td); 565 done2: 566 NET_UNLOCK_GIANT(); 567 FREE(sa, M_SONAME); 568 return (error); 569 } 570 571 /* 572 * MPSAFE 573 */ 574 int 575 socketpair(td, uap) 576 struct thread *td; 577 register struct socketpair_args /* { 578 int domain; 579 int type; 580 int protocol; 581 int *rsv; 582 } */ *uap; 583 { 584 register struct filedesc *fdp = td->td_proc->p_fd; 585 struct file *fp1, *fp2; 586 struct socket *so1, *so2; 587 int fd, error, sv[2]; 588 589 #ifdef MAC 590 /* We might want to have a separate check for socket pairs. */ 591 error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type, 592 uap->protocol); 593 if (error) 594 return (error); 595 #endif 596 597 NET_LOCK_GIANT(); 598 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 599 td->td_ucred, td); 600 if (error) 601 goto done2; 602 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 603 td->td_ucred, td); 604 if (error) 605 goto free1; 606 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 607 error = falloc(td, &fp1, &fd); 608 if (error) 609 goto free2; 610 sv[0] = fd; 611 fp1->f_data = so1; /* so1 already has ref count */ 612 error = falloc(td, &fp2, &fd); 613 if (error) 614 goto free3; 615 fp2->f_data = so2; /* so2 already has ref count */ 616 sv[1] = fd; 617 error = soconnect2(so1, so2); 618 if (error) 619 goto free4; 620 if (uap->type == SOCK_DGRAM) { 621 /* 622 * Datagram socket connection is asymmetric. 623 */ 624 error = soconnect2(so2, so1); 625 if (error) 626 goto free4; 627 } 628 FILE_LOCK(fp1); 629 fp1->f_flag = FREAD|FWRITE; 630 fp1->f_ops = &socketops; 631 fp1->f_type = DTYPE_SOCKET; 632 FILE_UNLOCK(fp1); 633 FILE_LOCK(fp2); 634 fp2->f_flag = FREAD|FWRITE; 635 fp2->f_ops = &socketops; 636 fp2->f_type = DTYPE_SOCKET; 637 FILE_UNLOCK(fp2); 638 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 639 fdrop(fp1, td); 640 fdrop(fp2, td); 641 goto done2; 642 free4: 643 fdclose(fdp, fp2, sv[1], td); 644 fdrop(fp2, td); 645 free3: 646 fdclose(fdp, fp1, sv[0], td); 647 fdrop(fp1, td); 648 free2: 649 (void)soclose(so2); 650 free1: 651 (void)soclose(so1); 652 done2: 653 NET_UNLOCK_GIANT(); 654 return (error); 655 } 656 657 static int 658 sendit(td, s, mp, flags) 659 register struct thread *td; 660 int s; 661 register struct msghdr *mp; 662 int flags; 663 { 664 struct mbuf *control; 665 struct sockaddr *to; 666 int error; 667 668 if (mp->msg_name != NULL) { 669 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 670 if (error) { 671 to = NULL; 672 goto bad; 673 } 674 mp->msg_name = to; 675 } else { 676 to = NULL; 677 } 678 679 if (mp->msg_control) { 680 if (mp->msg_controllen < sizeof(struct cmsghdr) 681 #ifdef COMPAT_OLDSOCK 682 && mp->msg_flags != MSG_COMPAT 683 #endif 684 ) { 685 error = EINVAL; 686 goto bad; 687 } 688 error = sockargs(&control, mp->msg_control, 689 mp->msg_controllen, MT_CONTROL); 690 if (error) 691 goto bad; 692 #ifdef COMPAT_OLDSOCK 693 if (mp->msg_flags == MSG_COMPAT) { 694 register struct cmsghdr *cm; 695 696 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 697 if (control == 0) { 698 error = ENOBUFS; 699 goto bad; 700 } else { 701 cm = mtod(control, struct cmsghdr *); 702 cm->cmsg_len = control->m_len; 703 cm->cmsg_level = SOL_SOCKET; 704 cm->cmsg_type = SCM_RIGHTS; 705 } 706 } 707 #endif 708 } else { 709 control = NULL; 710 } 711 712 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 713 714 bad: 715 if (to) 716 FREE(to, M_SONAME); 717 return (error); 718 } 719 720 int 721 kern_sendit(td, s, mp, flags, control, segflg) 722 struct thread *td; 723 int s; 724 struct msghdr *mp; 725 int flags; 726 struct mbuf *control; 727 enum uio_seg segflg; 728 { 729 struct file *fp; 730 struct uio auio; 731 struct iovec *iov; 732 struct socket *so; 733 int i; 734 int len, error; 735 #ifdef KTRACE 736 struct uio *ktruio = NULL; 737 #endif 738 739 NET_LOCK_GIANT(); 740 error = getsock(td->td_proc->p_fd, s, &fp); 741 if (error) 742 goto bad2; 743 so = (struct socket *)fp->f_data; 744 745 #ifdef MAC 746 SOCK_LOCK(so); 747 error = mac_check_socket_send(td->td_ucred, so); 748 SOCK_UNLOCK(so); 749 if (error) 750 goto bad; 751 #endif 752 753 auio.uio_iov = mp->msg_iov; 754 auio.uio_iovcnt = mp->msg_iovlen; 755 auio.uio_segflg = segflg; 756 auio.uio_rw = UIO_WRITE; 757 auio.uio_td = td; 758 auio.uio_offset = 0; /* XXX */ 759 auio.uio_resid = 0; 760 iov = mp->msg_iov; 761 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 762 if ((auio.uio_resid += iov->iov_len) < 0) { 763 error = EINVAL; 764 goto bad; 765 } 766 } 767 #ifdef KTRACE 768 if (KTRPOINT(td, KTR_GENIO)) 769 ktruio = cloneuio(&auio); 770 #endif 771 len = auio.uio_resid; 772 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 773 0, control, flags, td); 774 if (error) { 775 if (auio.uio_resid != len && (error == ERESTART || 776 error == EINTR || error == EWOULDBLOCK)) 777 error = 0; 778 /* Generation of SIGPIPE can be controlled per socket */ 779 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 780 !(flags & MSG_NOSIGNAL)) { 781 PROC_LOCK(td->td_proc); 782 psignal(td->td_proc, SIGPIPE); 783 PROC_UNLOCK(td->td_proc); 784 } 785 } 786 if (error == 0) 787 td->td_retval[0] = len - auio.uio_resid; 788 #ifdef KTRACE 789 if (ktruio != NULL) { 790 ktruio->uio_resid = td->td_retval[0]; 791 ktrgenio(s, UIO_WRITE, ktruio, error); 792 } 793 #endif 794 bad: 795 fdrop(fp, td); 796 bad2: 797 NET_UNLOCK_GIANT(); 798 return (error); 799 } 800 801 /* 802 * MPSAFE 803 */ 804 int 805 sendto(td, uap) 806 struct thread *td; 807 register struct sendto_args /* { 808 int s; 809 caddr_t buf; 810 size_t len; 811 int flags; 812 caddr_t to; 813 int tolen; 814 } */ *uap; 815 { 816 struct msghdr msg; 817 struct iovec aiov; 818 int error; 819 820 msg.msg_name = uap->to; 821 msg.msg_namelen = uap->tolen; 822 msg.msg_iov = &aiov; 823 msg.msg_iovlen = 1; 824 msg.msg_control = 0; 825 #ifdef COMPAT_OLDSOCK 826 msg.msg_flags = 0; 827 #endif 828 aiov.iov_base = uap->buf; 829 aiov.iov_len = uap->len; 830 error = sendit(td, uap->s, &msg, uap->flags); 831 return (error); 832 } 833 834 #ifdef COMPAT_OLDSOCK 835 /* 836 * MPSAFE 837 */ 838 int 839 osend(td, uap) 840 struct thread *td; 841 register struct osend_args /* { 842 int s; 843 caddr_t buf; 844 int len; 845 int flags; 846 } */ *uap; 847 { 848 struct msghdr msg; 849 struct iovec aiov; 850 int error; 851 852 msg.msg_name = 0; 853 msg.msg_namelen = 0; 854 msg.msg_iov = &aiov; 855 msg.msg_iovlen = 1; 856 aiov.iov_base = uap->buf; 857 aiov.iov_len = uap->len; 858 msg.msg_control = 0; 859 msg.msg_flags = 0; 860 error = sendit(td, uap->s, &msg, uap->flags); 861 return (error); 862 } 863 864 /* 865 * MPSAFE 866 */ 867 int 868 osendmsg(td, uap) 869 struct thread *td; 870 struct osendmsg_args /* { 871 int s; 872 caddr_t msg; 873 int flags; 874 } */ *uap; 875 { 876 struct msghdr msg; 877 struct iovec *iov; 878 int error; 879 880 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 881 if (error) 882 return (error); 883 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 884 if (error) 885 return (error); 886 msg.msg_iov = iov; 887 msg.msg_flags = MSG_COMPAT; 888 error = sendit(td, uap->s, &msg, uap->flags); 889 free(iov, M_IOV); 890 return (error); 891 } 892 #endif 893 894 /* 895 * MPSAFE 896 */ 897 int 898 sendmsg(td, uap) 899 struct thread *td; 900 struct sendmsg_args /* { 901 int s; 902 caddr_t msg; 903 int flags; 904 } */ *uap; 905 { 906 struct msghdr msg; 907 struct iovec *iov; 908 int error; 909 910 error = copyin(uap->msg, &msg, sizeof (msg)); 911 if (error) 912 return (error); 913 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 914 if (error) 915 return (error); 916 msg.msg_iov = iov; 917 #ifdef COMPAT_OLDSOCK 918 msg.msg_flags = 0; 919 #endif 920 error = sendit(td, uap->s, &msg, uap->flags); 921 free(iov, M_IOV); 922 return (error); 923 } 924 925 int 926 kern_recvit(td, s, mp, namelenp, segflg, controlp) 927 struct thread *td; 928 int s; 929 struct msghdr *mp; 930 void *namelenp; 931 enum uio_seg segflg; 932 struct mbuf **controlp; 933 { 934 struct uio auio; 935 struct iovec *iov; 936 int i; 937 socklen_t len; 938 int error; 939 struct mbuf *m, *control = 0; 940 caddr_t ctlbuf; 941 struct file *fp; 942 struct socket *so; 943 struct sockaddr *fromsa = 0; 944 #ifdef KTRACE 945 struct uio *ktruio = NULL; 946 #endif 947 948 if(controlp != NULL) 949 *controlp = 0; 950 951 NET_LOCK_GIANT(); 952 error = getsock(td->td_proc->p_fd, s, &fp); 953 if (error) { 954 NET_UNLOCK_GIANT(); 955 return (error); 956 } 957 so = fp->f_data; 958 959 #ifdef MAC 960 SOCK_LOCK(so); 961 error = mac_check_socket_receive(td->td_ucred, so); 962 SOCK_UNLOCK(so); 963 if (error) { 964 fdrop(fp, td); 965 NET_UNLOCK_GIANT(); 966 return (error); 967 } 968 #endif 969 970 auio.uio_iov = mp->msg_iov; 971 auio.uio_iovcnt = mp->msg_iovlen; 972 auio.uio_segflg = segflg; 973 auio.uio_rw = UIO_READ; 974 auio.uio_td = td; 975 auio.uio_offset = 0; /* XXX */ 976 auio.uio_resid = 0; 977 iov = mp->msg_iov; 978 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 979 if ((auio.uio_resid += iov->iov_len) < 0) { 980 fdrop(fp, td); 981 NET_UNLOCK_GIANT(); 982 return (EINVAL); 983 } 984 } 985 #ifdef KTRACE 986 if (KTRPOINT(td, KTR_GENIO)) 987 ktruio = cloneuio(&auio); 988 #endif 989 len = auio.uio_resid; 990 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 991 (struct mbuf **)0, 992 (mp->msg_control || controlp) ? &control : (struct mbuf **)0, 993 &mp->msg_flags); 994 if (error) { 995 if (auio.uio_resid != (int)len && (error == ERESTART || 996 error == EINTR || error == EWOULDBLOCK)) 997 error = 0; 998 } 999 #ifdef KTRACE 1000 if (ktruio != NULL) { 1001 ktruio->uio_resid = (int)len - auio.uio_resid; 1002 ktrgenio(s, UIO_READ, ktruio, error); 1003 } 1004 #endif 1005 if (error) 1006 goto out; 1007 td->td_retval[0] = (int)len - auio.uio_resid; 1008 if (mp->msg_name) { 1009 len = mp->msg_namelen; 1010 if (len <= 0 || fromsa == 0) 1011 len = 0; 1012 else { 1013 /* save sa_len before it is destroyed by MSG_COMPAT */ 1014 len = MIN(len, fromsa->sa_len); 1015 #ifdef COMPAT_OLDSOCK 1016 if (mp->msg_flags & MSG_COMPAT) 1017 ((struct osockaddr *)fromsa)->sa_family = 1018 fromsa->sa_family; 1019 #endif 1020 error = copyout(fromsa, mp->msg_name, (unsigned)len); 1021 if (error) 1022 goto out; 1023 } 1024 mp->msg_namelen = len; 1025 if (namelenp && 1026 (error = copyout(&len, namelenp, sizeof (socklen_t)))) { 1027 #ifdef COMPAT_OLDSOCK 1028 if (mp->msg_flags & MSG_COMPAT) 1029 error = 0; /* old recvfrom didn't check */ 1030 else 1031 #endif 1032 goto out; 1033 } 1034 } 1035 if (mp->msg_control && controlp == NULL) { 1036 #ifdef COMPAT_OLDSOCK 1037 /* 1038 * We assume that old recvmsg calls won't receive access 1039 * rights and other control info, esp. as control info 1040 * is always optional and those options didn't exist in 4.3. 1041 * If we receive rights, trim the cmsghdr; anything else 1042 * is tossed. 1043 */ 1044 if (control && mp->msg_flags & MSG_COMPAT) { 1045 if (mtod(control, struct cmsghdr *)->cmsg_level != 1046 SOL_SOCKET || 1047 mtod(control, struct cmsghdr *)->cmsg_type != 1048 SCM_RIGHTS) { 1049 mp->msg_controllen = 0; 1050 goto out; 1051 } 1052 control->m_len -= sizeof (struct cmsghdr); 1053 control->m_data += sizeof (struct cmsghdr); 1054 } 1055 #endif 1056 len = mp->msg_controllen; 1057 m = control; 1058 mp->msg_controllen = 0; 1059 ctlbuf = mp->msg_control; 1060 1061 while (m && len > 0) { 1062 unsigned int tocopy; 1063 1064 if (len >= m->m_len) 1065 tocopy = m->m_len; 1066 else { 1067 mp->msg_flags |= MSG_CTRUNC; 1068 tocopy = len; 1069 } 1070 1071 if ((error = copyout(mtod(m, caddr_t), 1072 ctlbuf, tocopy)) != 0) 1073 goto out; 1074 1075 ctlbuf += tocopy; 1076 len -= tocopy; 1077 m = m->m_next; 1078 } 1079 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1080 } 1081 out: 1082 fdrop(fp, td); 1083 NET_UNLOCK_GIANT(); 1084 if (fromsa) 1085 FREE(fromsa, M_SONAME); 1086 1087 if (error == 0 && controlp != NULL) 1088 *controlp = control; 1089 else if (control) 1090 m_freem(control); 1091 1092 return (error); 1093 } 1094 1095 static int 1096 recvit(td, s, mp, namelenp) 1097 struct thread *td; 1098 int s; 1099 struct msghdr *mp; 1100 void *namelenp; 1101 { 1102 1103 return (kern_recvit(td, s, mp, namelenp, UIO_USERSPACE, NULL)); 1104 } 1105 1106 /* 1107 * MPSAFE 1108 */ 1109 int 1110 recvfrom(td, uap) 1111 struct thread *td; 1112 register struct recvfrom_args /* { 1113 int s; 1114 caddr_t buf; 1115 size_t len; 1116 int flags; 1117 struct sockaddr * __restrict from; 1118 socklen_t * __restrict fromlenaddr; 1119 } */ *uap; 1120 { 1121 struct msghdr msg; 1122 struct iovec aiov; 1123 int error; 1124 1125 if (uap->fromlenaddr) { 1126 error = copyin(uap->fromlenaddr, 1127 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1128 if (error) 1129 goto done2; 1130 } else { 1131 msg.msg_namelen = 0; 1132 } 1133 msg.msg_name = uap->from; 1134 msg.msg_iov = &aiov; 1135 msg.msg_iovlen = 1; 1136 aiov.iov_base = uap->buf; 1137 aiov.iov_len = uap->len; 1138 msg.msg_control = 0; 1139 msg.msg_flags = uap->flags; 1140 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1141 done2: 1142 return(error); 1143 } 1144 1145 #ifdef COMPAT_OLDSOCK 1146 /* 1147 * MPSAFE 1148 */ 1149 int 1150 orecvfrom(td, uap) 1151 struct thread *td; 1152 struct recvfrom_args *uap; 1153 { 1154 1155 uap->flags |= MSG_COMPAT; 1156 return (recvfrom(td, uap)); 1157 } 1158 #endif 1159 1160 1161 #ifdef COMPAT_OLDSOCK 1162 /* 1163 * MPSAFE 1164 */ 1165 int 1166 orecv(td, uap) 1167 struct thread *td; 1168 register struct orecv_args /* { 1169 int s; 1170 caddr_t buf; 1171 int len; 1172 int flags; 1173 } */ *uap; 1174 { 1175 struct msghdr msg; 1176 struct iovec aiov; 1177 int error; 1178 1179 msg.msg_name = 0; 1180 msg.msg_namelen = 0; 1181 msg.msg_iov = &aiov; 1182 msg.msg_iovlen = 1; 1183 aiov.iov_base = uap->buf; 1184 aiov.iov_len = uap->len; 1185 msg.msg_control = 0; 1186 msg.msg_flags = uap->flags; 1187 error = recvit(td, uap->s, &msg, NULL); 1188 return (error); 1189 } 1190 1191 /* 1192 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1193 * overlays the new one, missing only the flags, and with the (old) access 1194 * rights where the control fields are now. 1195 * 1196 * MPSAFE 1197 */ 1198 int 1199 orecvmsg(td, uap) 1200 struct thread *td; 1201 struct orecvmsg_args /* { 1202 int s; 1203 struct omsghdr *msg; 1204 int flags; 1205 } */ *uap; 1206 { 1207 struct msghdr msg; 1208 struct iovec *iov; 1209 int error; 1210 1211 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1212 if (error) 1213 return (error); 1214 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1215 if (error) 1216 return (error); 1217 msg.msg_flags = uap->flags | MSG_COMPAT; 1218 msg.msg_iov = iov; 1219 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1220 if (msg.msg_controllen && error == 0) 1221 error = copyout(&msg.msg_controllen, 1222 &uap->msg->msg_accrightslen, sizeof (int)); 1223 free(iov, M_IOV); 1224 return (error); 1225 } 1226 #endif 1227 1228 /* 1229 * MPSAFE 1230 */ 1231 int 1232 recvmsg(td, uap) 1233 struct thread *td; 1234 struct recvmsg_args /* { 1235 int s; 1236 struct msghdr *msg; 1237 int flags; 1238 } */ *uap; 1239 { 1240 struct msghdr msg; 1241 struct iovec *uiov, *iov; 1242 int error; 1243 1244 error = copyin(uap->msg, &msg, sizeof (msg)); 1245 if (error) 1246 return (error); 1247 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1248 if (error) 1249 return (error); 1250 msg.msg_flags = uap->flags; 1251 #ifdef COMPAT_OLDSOCK 1252 msg.msg_flags &= ~MSG_COMPAT; 1253 #endif 1254 uiov = msg.msg_iov; 1255 msg.msg_iov = iov; 1256 error = recvit(td, uap->s, &msg, NULL); 1257 if (error == 0) { 1258 msg.msg_iov = uiov; 1259 error = copyout(&msg, uap->msg, sizeof(msg)); 1260 } 1261 free(iov, M_IOV); 1262 return (error); 1263 } 1264 1265 /* 1266 * MPSAFE 1267 */ 1268 /* ARGSUSED */ 1269 int 1270 shutdown(td, uap) 1271 struct thread *td; 1272 register struct shutdown_args /* { 1273 int s; 1274 int how; 1275 } */ *uap; 1276 { 1277 struct socket *so; 1278 struct file *fp; 1279 int error; 1280 1281 NET_LOCK_GIANT(); 1282 error = getsock(td->td_proc->p_fd, uap->s, &fp); 1283 if (error == 0) { 1284 so = fp->f_data; 1285 error = soshutdown(so, uap->how); 1286 fdrop(fp, td); 1287 } 1288 NET_UNLOCK_GIANT(); 1289 return (error); 1290 } 1291 1292 /* 1293 * MPSAFE 1294 */ 1295 /* ARGSUSED */ 1296 int 1297 setsockopt(td, uap) 1298 struct thread *td; 1299 register struct setsockopt_args /* { 1300 int s; 1301 int level; 1302 int name; 1303 caddr_t val; 1304 int valsize; 1305 } */ *uap; 1306 { 1307 1308 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1309 uap->val, UIO_USERSPACE, uap->valsize)); 1310 } 1311 1312 int 1313 kern_setsockopt(td, s, level, name, val, valseg, valsize) 1314 struct thread *td; 1315 int s; 1316 int level; 1317 int name; 1318 void *val; 1319 enum uio_seg valseg; 1320 socklen_t valsize; 1321 { 1322 int error; 1323 struct socket *so; 1324 struct file *fp; 1325 struct sockopt sopt; 1326 1327 if (val == NULL && valsize != 0) 1328 return (EFAULT); 1329 if (valsize < 0) 1330 return (EINVAL); 1331 1332 sopt.sopt_dir = SOPT_SET; 1333 sopt.sopt_level = level; 1334 sopt.sopt_name = name; 1335 sopt.sopt_val = val; 1336 sopt.sopt_valsize = valsize; 1337 switch (valseg) { 1338 case UIO_USERSPACE: 1339 sopt.sopt_td = td; 1340 break; 1341 case UIO_SYSSPACE: 1342 sopt.sopt_td = NULL; 1343 break; 1344 default: 1345 panic("kern_setsockopt called with bad valseg"); 1346 } 1347 1348 NET_LOCK_GIANT(); 1349 error = getsock(td->td_proc->p_fd, s, &fp); 1350 if (error == 0) { 1351 so = fp->f_data; 1352 error = sosetopt(so, &sopt); 1353 fdrop(fp, td); 1354 } 1355 NET_UNLOCK_GIANT(); 1356 return(error); 1357 } 1358 1359 /* 1360 * MPSAFE 1361 */ 1362 /* ARGSUSED */ 1363 int 1364 getsockopt(td, uap) 1365 struct thread *td; 1366 register struct getsockopt_args /* { 1367 int s; 1368 int level; 1369 int name; 1370 void * __restrict val; 1371 socklen_t * __restrict avalsize; 1372 } */ *uap; 1373 { 1374 socklen_t valsize; 1375 int error; 1376 1377 if (uap->val) { 1378 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1379 if (error) 1380 return (error); 1381 } 1382 1383 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1384 uap->val, UIO_USERSPACE, &valsize); 1385 1386 if (error == 0) 1387 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1388 return (error); 1389 } 1390 1391 /* 1392 * Kernel version of getsockopt. 1393 * optval can be a userland or userspace. optlen is always a kernel pointer. 1394 */ 1395 int 1396 kern_getsockopt(td, s, level, name, val, valseg, valsize) 1397 struct thread *td; 1398 int s; 1399 int level; 1400 int name; 1401 void *val; 1402 enum uio_seg valseg; 1403 socklen_t *valsize; 1404 { 1405 int error; 1406 struct socket *so; 1407 struct file *fp; 1408 struct sockopt sopt; 1409 1410 if (val == NULL) 1411 *valsize = 0; 1412 if (*valsize < 0) 1413 return (EINVAL); 1414 1415 sopt.sopt_dir = SOPT_GET; 1416 sopt.sopt_level = level; 1417 sopt.sopt_name = name; 1418 sopt.sopt_val = val; 1419 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1420 switch (valseg) { 1421 case UIO_USERSPACE: 1422 sopt.sopt_td = td; 1423 break; 1424 case UIO_SYSSPACE: 1425 sopt.sopt_td = NULL; 1426 break; 1427 default: 1428 panic("kern_getsockopt called with bad valseg"); 1429 } 1430 1431 NET_LOCK_GIANT(); 1432 error = getsock(td->td_proc->p_fd, s, &fp); 1433 if (error == 0) { 1434 so = fp->f_data; 1435 error = sogetopt(so, &sopt); 1436 *valsize = sopt.sopt_valsize; 1437 fdrop(fp, td); 1438 } 1439 NET_UNLOCK_GIANT(); 1440 return (error); 1441 } 1442 1443 /* 1444 * getsockname1() - Get socket name. 1445 * 1446 * MPSAFE 1447 */ 1448 /* ARGSUSED */ 1449 static int 1450 getsockname1(td, uap, compat) 1451 struct thread *td; 1452 register struct getsockname_args /* { 1453 int fdes; 1454 struct sockaddr * __restrict asa; 1455 socklen_t * __restrict alen; 1456 } */ *uap; 1457 int compat; 1458 { 1459 struct socket *so; 1460 struct sockaddr *sa; 1461 struct file *fp; 1462 socklen_t len; 1463 int error; 1464 1465 NET_LOCK_GIANT(); 1466 error = getsock(td->td_proc->p_fd, uap->fdes, &fp); 1467 if (error) 1468 goto done2; 1469 so = fp->f_data; 1470 error = copyin(uap->alen, &len, sizeof (len)); 1471 if (error) 1472 goto done1; 1473 if (len < 0) { 1474 error = EINVAL; 1475 goto done1; 1476 } 1477 sa = 0; 1478 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1479 if (error) 1480 goto bad; 1481 if (sa == 0) { 1482 len = 0; 1483 goto gotnothing; 1484 } 1485 1486 len = MIN(len, sa->sa_len); 1487 #ifdef COMPAT_OLDSOCK 1488 if (compat) 1489 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1490 #endif 1491 error = copyout(sa, uap->asa, (u_int)len); 1492 if (error == 0) 1493 gotnothing: 1494 error = copyout(&len, uap->alen, sizeof (len)); 1495 bad: 1496 if (sa) 1497 FREE(sa, M_SONAME); 1498 done1: 1499 fdrop(fp, td); 1500 done2: 1501 NET_UNLOCK_GIANT(); 1502 return (error); 1503 } 1504 1505 /* 1506 * MPSAFE 1507 */ 1508 int 1509 getsockname(td, uap) 1510 struct thread *td; 1511 struct getsockname_args *uap; 1512 { 1513 1514 return (getsockname1(td, uap, 0)); 1515 } 1516 1517 #ifdef COMPAT_OLDSOCK 1518 /* 1519 * MPSAFE 1520 */ 1521 int 1522 ogetsockname(td, uap) 1523 struct thread *td; 1524 struct getsockname_args *uap; 1525 { 1526 1527 return (getsockname1(td, uap, 1)); 1528 } 1529 #endif /* COMPAT_OLDSOCK */ 1530 1531 /* 1532 * getpeername1() - Get name of peer for connected socket. 1533 * 1534 * MPSAFE 1535 */ 1536 /* ARGSUSED */ 1537 static int 1538 getpeername1(td, uap, compat) 1539 struct thread *td; 1540 register struct getpeername_args /* { 1541 int fdes; 1542 struct sockaddr * __restrict asa; 1543 socklen_t * __restrict alen; 1544 } */ *uap; 1545 int compat; 1546 { 1547 struct socket *so; 1548 struct sockaddr *sa; 1549 struct file *fp; 1550 socklen_t len; 1551 int error; 1552 1553 NET_LOCK_GIANT(); 1554 error = getsock(td->td_proc->p_fd, uap->fdes, &fp); 1555 if (error) 1556 goto done2; 1557 so = fp->f_data; 1558 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1559 error = ENOTCONN; 1560 goto done1; 1561 } 1562 error = copyin(uap->alen, &len, sizeof (len)); 1563 if (error) 1564 goto done1; 1565 if (len < 0) { 1566 error = EINVAL; 1567 goto done1; 1568 } 1569 sa = 0; 1570 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1571 if (error) 1572 goto bad; 1573 if (sa == 0) { 1574 len = 0; 1575 goto gotnothing; 1576 } 1577 len = MIN(len, sa->sa_len); 1578 #ifdef COMPAT_OLDSOCK 1579 if (compat) 1580 ((struct osockaddr *)sa)->sa_family = 1581 sa->sa_family; 1582 #endif 1583 error = copyout(sa, uap->asa, (u_int)len); 1584 if (error) 1585 goto bad; 1586 gotnothing: 1587 error = copyout(&len, uap->alen, sizeof (len)); 1588 bad: 1589 if (sa) 1590 FREE(sa, M_SONAME); 1591 done1: 1592 fdrop(fp, td); 1593 done2: 1594 NET_UNLOCK_GIANT(); 1595 return (error); 1596 } 1597 1598 /* 1599 * MPSAFE 1600 */ 1601 int 1602 getpeername(td, uap) 1603 struct thread *td; 1604 struct getpeername_args *uap; 1605 { 1606 1607 return (getpeername1(td, uap, 0)); 1608 } 1609 1610 #ifdef COMPAT_OLDSOCK 1611 /* 1612 * MPSAFE 1613 */ 1614 int 1615 ogetpeername(td, uap) 1616 struct thread *td; 1617 struct ogetpeername_args *uap; 1618 { 1619 1620 /* XXX uap should have type `getpeername_args *' to begin with. */ 1621 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1622 } 1623 #endif /* COMPAT_OLDSOCK */ 1624 1625 int 1626 sockargs(mp, buf, buflen, type) 1627 struct mbuf **mp; 1628 caddr_t buf; 1629 int buflen, type; 1630 { 1631 register struct sockaddr *sa; 1632 register struct mbuf *m; 1633 int error; 1634 1635 if ((u_int)buflen > MLEN) { 1636 #ifdef COMPAT_OLDSOCK 1637 if (type == MT_SONAME && (u_int)buflen <= 112) 1638 buflen = MLEN; /* unix domain compat. hack */ 1639 else 1640 #endif 1641 if ((u_int)buflen > MCLBYTES) 1642 return (EINVAL); 1643 } 1644 m = m_get(M_TRYWAIT, type); 1645 if (m == NULL) 1646 return (ENOBUFS); 1647 if ((u_int)buflen > MLEN) { 1648 MCLGET(m, M_TRYWAIT); 1649 if ((m->m_flags & M_EXT) == 0) { 1650 m_free(m); 1651 return (ENOBUFS); 1652 } 1653 } 1654 m->m_len = buflen; 1655 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1656 if (error) 1657 (void) m_free(m); 1658 else { 1659 *mp = m; 1660 if (type == MT_SONAME) { 1661 sa = mtod(m, struct sockaddr *); 1662 1663 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1664 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1665 sa->sa_family = sa->sa_len; 1666 #endif 1667 sa->sa_len = buflen; 1668 } 1669 } 1670 return (error); 1671 } 1672 1673 int 1674 getsockaddr(namp, uaddr, len) 1675 struct sockaddr **namp; 1676 caddr_t uaddr; 1677 size_t len; 1678 { 1679 struct sockaddr *sa; 1680 int error; 1681 1682 if (len > SOCK_MAXADDRLEN) 1683 return (ENAMETOOLONG); 1684 if (len < offsetof(struct sockaddr, sa_data[0])) 1685 return (EINVAL); 1686 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1687 error = copyin(uaddr, sa, len); 1688 if (error) { 1689 FREE(sa, M_SONAME); 1690 } else { 1691 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1692 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1693 sa->sa_family = sa->sa_len; 1694 #endif 1695 sa->sa_len = len; 1696 *namp = sa; 1697 } 1698 return (error); 1699 } 1700 1701 /* 1702 * Detach mapped page and release resources back to the system. 1703 */ 1704 void 1705 sf_buf_mext(void *addr, void *args) 1706 { 1707 vm_page_t m; 1708 1709 m = sf_buf_page(args); 1710 sf_buf_free(args); 1711 vm_page_lock_queues(); 1712 vm_page_unwire(m, 0); 1713 /* 1714 * Check for the object going away on us. This can 1715 * happen since we don't hold a reference to it. 1716 * If so, we're responsible for freeing the page. 1717 */ 1718 if (m->wire_count == 0 && m->object == NULL) 1719 vm_page_free(m); 1720 vm_page_unlock_queues(); 1721 } 1722 1723 /* 1724 * sendfile(2) 1725 * 1726 * MPSAFE 1727 * 1728 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1729 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1730 * 1731 * Send a file specified by 'fd' and starting at 'offset' to a socket 1732 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1733 * nbytes == 0. Optionally add a header and/or trailer to the socket 1734 * output. If specified, write the total number of bytes sent into *sbytes. 1735 * 1736 */ 1737 int 1738 sendfile(struct thread *td, struct sendfile_args *uap) 1739 { 1740 1741 return (do_sendfile(td, uap, 0)); 1742 } 1743 1744 #ifdef COMPAT_FREEBSD4 1745 int 1746 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1747 { 1748 struct sendfile_args args; 1749 1750 args.fd = uap->fd; 1751 args.s = uap->s; 1752 args.offset = uap->offset; 1753 args.nbytes = uap->nbytes; 1754 args.hdtr = uap->hdtr; 1755 args.sbytes = uap->sbytes; 1756 args.flags = uap->flags; 1757 1758 return (do_sendfile(td, &args, 1)); 1759 } 1760 #endif /* COMPAT_FREEBSD4 */ 1761 1762 static int 1763 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1764 { 1765 struct vnode *vp; 1766 struct vm_object *obj; 1767 struct socket *so = NULL; 1768 struct mbuf *m, *m_header = NULL; 1769 struct sf_buf *sf; 1770 struct vm_page *pg; 1771 struct writev_args nuap; 1772 struct sf_hdtr hdtr; 1773 struct uio *hdr_uio = NULL; 1774 off_t off, xfsize, hdtr_size, sbytes = 0; 1775 int error, headersize = 0, headersent = 0; 1776 1777 mtx_lock(&Giant); 1778 1779 hdtr_size = 0; 1780 1781 /* 1782 * The descriptor must be a regular file and have a backing VM object. 1783 */ 1784 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1785 goto done; 1786 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1787 obj = vp->v_object; 1788 VOP_UNLOCK(vp, 0, td); 1789 if (obj == NULL) { 1790 error = EINVAL; 1791 goto done; 1792 } 1793 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1794 goto done; 1795 if (so->so_type != SOCK_STREAM) { 1796 error = EINVAL; 1797 goto done; 1798 } 1799 if ((so->so_state & SS_ISCONNECTED) == 0) { 1800 error = ENOTCONN; 1801 goto done; 1802 } 1803 if (uap->offset < 0) { 1804 error = EINVAL; 1805 goto done; 1806 } 1807 1808 #ifdef MAC 1809 SOCK_LOCK(so); 1810 error = mac_check_socket_send(td->td_ucred, so); 1811 SOCK_UNLOCK(so); 1812 if (error) 1813 goto done; 1814 #endif 1815 1816 /* 1817 * If specified, get the pointer to the sf_hdtr struct for 1818 * any headers/trailers. 1819 */ 1820 if (uap->hdtr != NULL) { 1821 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1822 if (error) 1823 goto done; 1824 /* 1825 * Send any headers. 1826 */ 1827 if (hdtr.headers != NULL) { 1828 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1829 if (error) 1830 goto done; 1831 hdr_uio->uio_td = td; 1832 hdr_uio->uio_rw = UIO_WRITE; 1833 if (hdr_uio->uio_resid > 0) { 1834 m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0, 0); 1835 if (m_header == NULL) 1836 goto done; 1837 headersize = m_header->m_pkthdr.len; 1838 if (compat) 1839 sbytes += headersize; 1840 } 1841 } 1842 } 1843 1844 /* 1845 * Protect against multiple writers to the socket. 1846 */ 1847 SOCKBUF_LOCK(&so->so_snd); 1848 (void) sblock(&so->so_snd, M_WAITOK); 1849 SOCKBUF_UNLOCK(&so->so_snd); 1850 1851 /* 1852 * Loop through the pages in the file, starting with the requested 1853 * offset. Get a file page (do I/O if necessary), map the file page 1854 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1855 * it on the socket. 1856 */ 1857 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1858 vm_pindex_t pindex; 1859 vm_offset_t pgoff; 1860 1861 pindex = OFF_TO_IDX(off); 1862 VM_OBJECT_LOCK(obj); 1863 retry_lookup: 1864 /* 1865 * Calculate the amount to transfer. Not to exceed a page, 1866 * the EOF, or the passed in nbytes. 1867 */ 1868 xfsize = obj->un_pager.vnp.vnp_size - off; 1869 VM_OBJECT_UNLOCK(obj); 1870 if (xfsize > PAGE_SIZE) 1871 xfsize = PAGE_SIZE; 1872 pgoff = (vm_offset_t)(off & PAGE_MASK); 1873 if (PAGE_SIZE - pgoff < xfsize) 1874 xfsize = PAGE_SIZE - pgoff; 1875 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1876 xfsize = uap->nbytes - sbytes; 1877 if (xfsize <= 0) { 1878 if (m_header != NULL) { 1879 m = m_header; 1880 m_header = NULL; 1881 SOCKBUF_LOCK(&so->so_snd); 1882 goto retry_space; 1883 } else 1884 break; 1885 } 1886 /* 1887 * Optimize the non-blocking case by looking at the socket space 1888 * before going to the extra work of constituting the sf_buf. 1889 */ 1890 SOCKBUF_LOCK(&so->so_snd); 1891 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1892 if (so->so_snd.sb_state & SBS_CANTSENDMORE) 1893 error = EPIPE; 1894 else 1895 error = EAGAIN; 1896 sbunlock(&so->so_snd); 1897 SOCKBUF_UNLOCK(&so->so_snd); 1898 goto done; 1899 } 1900 SOCKBUF_UNLOCK(&so->so_snd); 1901 VM_OBJECT_LOCK(obj); 1902 /* 1903 * Attempt to look up the page. 1904 * 1905 * Allocate if not found 1906 * 1907 * Wait and loop if busy. 1908 */ 1909 pg = vm_page_lookup(obj, pindex); 1910 1911 if (pg == NULL) { 1912 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY | 1913 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1914 if (pg == NULL) { 1915 VM_OBJECT_UNLOCK(obj); 1916 VM_WAIT; 1917 VM_OBJECT_LOCK(obj); 1918 goto retry_lookup; 1919 } 1920 vm_page_lock_queues(); 1921 } else { 1922 vm_page_lock_queues(); 1923 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1924 goto retry_lookup; 1925 /* 1926 * Wire the page so it does not get ripped out from 1927 * under us. 1928 */ 1929 vm_page_wire(pg); 1930 } 1931 1932 /* 1933 * If page is not valid for what we need, initiate I/O 1934 */ 1935 1936 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) { 1937 VM_OBJECT_UNLOCK(obj); 1938 } else if (uap->flags & SF_NODISKIO) { 1939 error = EBUSY; 1940 } else { 1941 int bsize, resid; 1942 1943 /* 1944 * Ensure that our page is still around when the I/O 1945 * completes. 1946 */ 1947 vm_page_io_start(pg); 1948 vm_page_unlock_queues(); 1949 VM_OBJECT_UNLOCK(obj); 1950 1951 /* 1952 * Get the page from backing store. 1953 */ 1954 bsize = vp->v_mount->mnt_stat.f_iosize; 1955 vn_lock(vp, LK_SHARED | LK_RETRY, td); 1956 /* 1957 * XXXMAC: Because we don't have fp->f_cred here, 1958 * we pass in NOCRED. This is probably wrong, but 1959 * is consistent with our original implementation. 1960 */ 1961 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1962 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1963 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 1964 td->td_ucred, NOCRED, &resid, td); 1965 VOP_UNLOCK(vp, 0, td); 1966 VM_OBJECT_LOCK(obj); 1967 vm_page_lock_queues(); 1968 vm_page_io_finish(pg); 1969 if (!error) 1970 VM_OBJECT_UNLOCK(obj); 1971 mbstat.sf_iocnt++; 1972 } 1973 1974 if (error) { 1975 vm_page_unwire(pg, 0); 1976 /* 1977 * See if anyone else might know about this page. 1978 * If not and it is not valid, then free it. 1979 */ 1980 if (pg->wire_count == 0 && pg->valid == 0 && 1981 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1982 pg->hold_count == 0) { 1983 vm_page_free(pg); 1984 } 1985 vm_page_unlock_queues(); 1986 VM_OBJECT_UNLOCK(obj); 1987 SOCKBUF_LOCK(&so->so_snd); 1988 sbunlock(&so->so_snd); 1989 SOCKBUF_UNLOCK(&so->so_snd); 1990 goto done; 1991 } 1992 vm_page_unlock_queues(); 1993 1994 /* 1995 * Get a sendfile buf. We usually wait as long as necessary, 1996 * but this wait can be interrupted. 1997 */ 1998 if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) { 1999 mbstat.sf_allocfail++; 2000 vm_page_lock_queues(); 2001 vm_page_unwire(pg, 0); 2002 if (pg->wire_count == 0 && pg->object == NULL) 2003 vm_page_free(pg); 2004 vm_page_unlock_queues(); 2005 SOCKBUF_LOCK(&so->so_snd); 2006 sbunlock(&so->so_snd); 2007 SOCKBUF_UNLOCK(&so->so_snd); 2008 error = EINTR; 2009 goto done; 2010 } 2011 2012 /* 2013 * Get an mbuf header and set it up as having external storage. 2014 */ 2015 if (m_header) 2016 MGET(m, M_TRYWAIT, MT_DATA); 2017 else 2018 MGETHDR(m, M_TRYWAIT, MT_DATA); 2019 if (m == NULL) { 2020 error = ENOBUFS; 2021 sf_buf_mext((void *)sf_buf_kva(sf), sf); 2022 SOCKBUF_LOCK(&so->so_snd); 2023 sbunlock(&so->so_snd); 2024 SOCKBUF_UNLOCK(&so->so_snd); 2025 goto done; 2026 } 2027 /* 2028 * Setup external storage for mbuf. 2029 */ 2030 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY, 2031 EXT_SFBUF); 2032 m->m_data = (char *)sf_buf_kva(sf) + pgoff; 2033 m->m_pkthdr.len = m->m_len = xfsize; 2034 2035 if (m_header) { 2036 m_cat(m_header, m); 2037 m = m_header; 2038 m_header = NULL; 2039 m_fixhdr(m); 2040 } 2041 2042 /* 2043 * Add the buffer to the socket buffer chain. 2044 */ 2045 SOCKBUF_LOCK(&so->so_snd); 2046 retry_space: 2047 /* 2048 * Make sure that the socket is still able to take more data. 2049 * CANTSENDMORE being true usually means that the connection 2050 * was closed. so_error is true when an error was sensed after 2051 * a previous send. 2052 * The state is checked after the page mapping and buffer 2053 * allocation above since those operations may block and make 2054 * any socket checks stale. From this point forward, nothing 2055 * blocks before the pru_send (or more accurately, any blocking 2056 * results in a loop back to here to re-check). 2057 */ 2058 SOCKBUF_LOCK_ASSERT(&so->so_snd); 2059 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) { 2060 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2061 error = EPIPE; 2062 } else { 2063 error = so->so_error; 2064 so->so_error = 0; 2065 } 2066 m_freem(m); 2067 sbunlock(&so->so_snd); 2068 SOCKBUF_UNLOCK(&so->so_snd); 2069 goto done; 2070 } 2071 /* 2072 * Wait for socket space to become available. We do this just 2073 * after checking the connection state above in order to avoid 2074 * a race condition with sbwait(). 2075 */ 2076 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 2077 if (so->so_state & SS_NBIO) { 2078 m_freem(m); 2079 sbunlock(&so->so_snd); 2080 SOCKBUF_UNLOCK(&so->so_snd); 2081 error = EAGAIN; 2082 goto done; 2083 } 2084 error = sbwait(&so->so_snd); 2085 /* 2086 * An error from sbwait usually indicates that we've 2087 * been interrupted by a signal. If we've sent anything 2088 * then return bytes sent, otherwise return the error. 2089 */ 2090 if (error) { 2091 m_freem(m); 2092 sbunlock(&so->so_snd); 2093 SOCKBUF_UNLOCK(&so->so_snd); 2094 goto done; 2095 } 2096 goto retry_space; 2097 } 2098 SOCKBUF_UNLOCK(&so->so_snd); 2099 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2100 if (error) { 2101 SOCKBUF_LOCK(&so->so_snd); 2102 sbunlock(&so->so_snd); 2103 SOCKBUF_UNLOCK(&so->so_snd); 2104 goto done; 2105 } 2106 headersent = 1; 2107 } 2108 SOCKBUF_LOCK(&so->so_snd); 2109 sbunlock(&so->so_snd); 2110 SOCKBUF_UNLOCK(&so->so_snd); 2111 2112 /* 2113 * Send trailers. Wimp out and use writev(2). 2114 */ 2115 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 2116 nuap.fd = uap->s; 2117 nuap.iovp = hdtr.trailers; 2118 nuap.iovcnt = hdtr.trl_cnt; 2119 error = writev(td, &nuap); 2120 if (error) 2121 goto done; 2122 if (compat) 2123 sbytes += td->td_retval[0]; 2124 else 2125 hdtr_size += td->td_retval[0]; 2126 } 2127 2128 done: 2129 if (headersent) { 2130 if (!compat) 2131 hdtr_size += headersize; 2132 } else { 2133 if (compat) 2134 sbytes -= headersize; 2135 } 2136 /* 2137 * If there was no error we have to clear td->td_retval[0] 2138 * because it may have been set by writev. 2139 */ 2140 if (error == 0) { 2141 td->td_retval[0] = 0; 2142 } 2143 if (uap->sbytes != NULL) { 2144 if (!compat) 2145 sbytes += hdtr_size; 2146 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2147 } 2148 if (vp) 2149 vrele(vp); 2150 if (so) 2151 fputsock(so); 2152 if (hdr_uio != NULL) 2153 free(hdr_uio, M_IOV); 2154 if (m_header) 2155 m_freem(m_header); 2156 2157 mtx_unlock(&Giant); 2158 2159 if (error == ERESTART) 2160 error = EINTR; 2161 2162 return (error); 2163 } 2164