1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_capsicum.h" 39 #include "opt_inet.h" 40 #include "opt_inet6.h" 41 #include "opt_sctp.h" 42 #include "opt_compat.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/capability.h> 48 #include <sys/kernel.h> 49 #include <sys/lock.h> 50 #include <sys/mutex.h> 51 #include <sys/sysproto.h> 52 #include <sys/malloc.h> 53 #include <sys/filedesc.h> 54 #include <sys/event.h> 55 #include <sys/proc.h> 56 #include <sys/fcntl.h> 57 #include <sys/file.h> 58 #include <sys/filio.h> 59 #include <sys/jail.h> 60 #include <sys/mount.h> 61 #include <sys/mbuf.h> 62 #include <sys/protosw.h> 63 #include <sys/rwlock.h> 64 #include <sys/sf_buf.h> 65 #include <sys/sysent.h> 66 #include <sys/socket.h> 67 #include <sys/socketvar.h> 68 #include <sys/signalvar.h> 69 #include <sys/syscallsubr.h> 70 #include <sys/sysctl.h> 71 #include <sys/uio.h> 72 #include <sys/vnode.h> 73 #ifdef KTRACE 74 #include <sys/ktrace.h> 75 #endif 76 #ifdef COMPAT_FREEBSD32 77 #include <compat/freebsd32/freebsd32_util.h> 78 #endif 79 80 #include <net/vnet.h> 81 82 #include <security/audit/audit.h> 83 #include <security/mac/mac_framework.h> 84 85 #include <vm/vm.h> 86 #include <vm/vm_param.h> 87 #include <vm/vm_object.h> 88 #include <vm/vm_page.h> 89 #include <vm/vm_pageout.h> 90 #include <vm/vm_kern.h> 91 #include <vm/vm_extern.h> 92 93 #if defined(INET) || defined(INET6) 94 #ifdef SCTP 95 #include <netinet/sctp.h> 96 #include <netinet/sctp_peeloff.h> 97 #endif /* SCTP */ 98 #endif /* INET || INET6 */ 99 100 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 101 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 102 103 static int accept1(struct thread *td, struct accept_args *uap, int compat); 104 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 105 static int getsockname1(struct thread *td, struct getsockname_args *uap, 106 int compat); 107 static int getpeername1(struct thread *td, struct getpeername_args *uap, 108 int compat); 109 110 /* 111 * NSFBUFS-related variables and associated sysctls 112 */ 113 int nsfbufs; 114 int nsfbufspeak; 115 int nsfbufsused; 116 117 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 118 "Maximum number of sendfile(2) sf_bufs available"); 119 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 120 "Number of sendfile(2) sf_bufs at peak usage"); 121 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 122 "Number of sendfile(2) sf_bufs in use"); 123 124 /* 125 * Convert a user file descriptor to a kernel file entry and check if required 126 * capability rights are present. 127 * A reference on the file entry is held upon returning. 128 */ 129 static int 130 getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights, 131 struct file **fpp, u_int *fflagp) 132 { 133 struct file *fp; 134 int error; 135 136 error = fget_unlocked(fdp, fd, rights, 0, &fp, NULL); 137 if (error != 0) 138 return (error); 139 if (fp->f_type != DTYPE_SOCKET) { 140 fdrop(fp, curthread); 141 return (ENOTSOCK); 142 } 143 if (fflagp != NULL) 144 *fflagp = fp->f_flag; 145 *fpp = fp; 146 return (0); 147 } 148 149 /* 150 * System call interface to the socket abstraction. 151 */ 152 #if defined(COMPAT_43) 153 #define COMPAT_OLDSOCK 154 #endif 155 156 int 157 sys_socket(td, uap) 158 struct thread *td; 159 struct socket_args /* { 160 int domain; 161 int type; 162 int protocol; 163 } */ *uap; 164 { 165 struct socket *so; 166 struct file *fp; 167 int fd, error, type, oflag, fflag; 168 169 AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol); 170 171 type = uap->type; 172 oflag = 0; 173 fflag = 0; 174 if ((type & SOCK_CLOEXEC) != 0) { 175 type &= ~SOCK_CLOEXEC; 176 oflag |= O_CLOEXEC; 177 } 178 if ((type & SOCK_NONBLOCK) != 0) { 179 type &= ~SOCK_NONBLOCK; 180 fflag |= FNONBLOCK; 181 } 182 183 #ifdef MAC 184 error = mac_socket_check_create(td->td_ucred, uap->domain, type, 185 uap->protocol); 186 if (error) 187 return (error); 188 #endif 189 error = falloc(td, &fp, &fd, oflag); 190 if (error) 191 return (error); 192 /* An extra reference on `fp' has been held for us by falloc(). */ 193 error = socreate(uap->domain, &so, type, uap->protocol, 194 td->td_ucred, td); 195 if (error) { 196 fdclose(td->td_proc->p_fd, fp, fd, td); 197 } else { 198 finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops); 199 if ((fflag & FNONBLOCK) != 0) 200 (void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td); 201 td->td_retval[0] = fd; 202 } 203 fdrop(fp, td); 204 return (error); 205 } 206 207 /* ARGSUSED */ 208 int 209 sys_bind(td, uap) 210 struct thread *td; 211 struct bind_args /* { 212 int s; 213 caddr_t name; 214 int namelen; 215 } */ *uap; 216 { 217 struct sockaddr *sa; 218 int error; 219 220 error = getsockaddr(&sa, uap->name, uap->namelen); 221 if (error == 0) { 222 error = kern_bind(td, uap->s, sa); 223 free(sa, M_SONAME); 224 } 225 return (error); 226 } 227 228 static int 229 kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa) 230 { 231 struct socket *so; 232 struct file *fp; 233 int error; 234 235 AUDIT_ARG_FD(fd); 236 AUDIT_ARG_SOCKADDR(td, dirfd, sa); 237 error = getsock_cap(td->td_proc->p_fd, fd, CAP_BIND, &fp, NULL); 238 if (error) 239 return (error); 240 so = fp->f_data; 241 #ifdef KTRACE 242 if (KTRPOINT(td, KTR_STRUCT)) 243 ktrsockaddr(sa); 244 #endif 245 #ifdef MAC 246 error = mac_socket_check_bind(td->td_ucred, so, sa); 247 if (error == 0) { 248 #endif 249 if (dirfd == AT_FDCWD) 250 error = sobind(so, sa, td); 251 else 252 error = sobindat(dirfd, so, sa, td); 253 #ifdef MAC 254 } 255 #endif 256 fdrop(fp, td); 257 return (error); 258 } 259 260 int 261 kern_bind(struct thread *td, int fd, struct sockaddr *sa) 262 { 263 264 return (kern_bindat(td, AT_FDCWD, fd, sa)); 265 } 266 267 /* ARGSUSED */ 268 int 269 sys_bindat(td, uap) 270 struct thread *td; 271 struct bindat_args /* { 272 int fd; 273 int s; 274 caddr_t name; 275 int namelen; 276 } */ *uap; 277 { 278 struct sockaddr *sa; 279 int error; 280 281 error = getsockaddr(&sa, uap->name, uap->namelen); 282 if (error == 0) { 283 error = kern_bindat(td, uap->fd, uap->s, sa); 284 free(sa, M_SONAME); 285 } 286 return (error); 287 } 288 289 /* ARGSUSED */ 290 int 291 sys_listen(td, uap) 292 struct thread *td; 293 struct listen_args /* { 294 int s; 295 int backlog; 296 } */ *uap; 297 { 298 struct socket *so; 299 struct file *fp; 300 int error; 301 302 AUDIT_ARG_FD(uap->s); 303 error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_LISTEN, &fp, NULL); 304 if (error == 0) { 305 so = fp->f_data; 306 #ifdef MAC 307 error = mac_socket_check_listen(td->td_ucred, so); 308 if (error == 0) 309 #endif 310 error = solisten(so, uap->backlog, td); 311 fdrop(fp, td); 312 } 313 return(error); 314 } 315 316 /* 317 * accept1() 318 */ 319 static int 320 accept1(td, uap, compat) 321 struct thread *td; 322 struct accept_args /* { 323 int s; 324 struct sockaddr * __restrict name; 325 socklen_t * __restrict anamelen; 326 } */ *uap; 327 int compat; 328 { 329 struct sockaddr *name; 330 socklen_t namelen; 331 struct file *fp; 332 int error; 333 334 if (uap->name == NULL) 335 return (kern_accept(td, uap->s, NULL, NULL, NULL)); 336 337 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 338 if (error) 339 return (error); 340 341 error = kern_accept(td, uap->s, &name, &namelen, &fp); 342 343 /* 344 * return a namelen of zero for older code which might 345 * ignore the return value from accept. 346 */ 347 if (error) { 348 (void) copyout(&namelen, 349 uap->anamelen, sizeof(*uap->anamelen)); 350 return (error); 351 } 352 353 if (error == 0 && name != NULL) { 354 #ifdef COMPAT_OLDSOCK 355 if (compat) 356 ((struct osockaddr *)name)->sa_family = 357 name->sa_family; 358 #endif 359 error = copyout(name, uap->name, namelen); 360 } 361 if (error == 0) 362 error = copyout(&namelen, uap->anamelen, 363 sizeof(namelen)); 364 if (error) 365 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td); 366 fdrop(fp, td); 367 free(name, M_SONAME); 368 return (error); 369 } 370 371 int 372 kern_accept(struct thread *td, int s, struct sockaddr **name, 373 socklen_t *namelen, struct file **fp) 374 { 375 struct filedesc *fdp; 376 struct file *headfp, *nfp = NULL; 377 struct sockaddr *sa = NULL; 378 int error; 379 struct socket *head, *so; 380 int fd; 381 u_int fflag; 382 pid_t pgid; 383 int tmp; 384 385 if (name) { 386 *name = NULL; 387 if (*namelen < 0) 388 return (EINVAL); 389 } 390 391 AUDIT_ARG_FD(s); 392 fdp = td->td_proc->p_fd; 393 error = getsock_cap(fdp, s, CAP_ACCEPT, &headfp, &fflag); 394 if (error) 395 return (error); 396 head = headfp->f_data; 397 if ((head->so_options & SO_ACCEPTCONN) == 0) { 398 error = EINVAL; 399 goto done; 400 } 401 #ifdef MAC 402 error = mac_socket_check_accept(td->td_ucred, head); 403 if (error != 0) 404 goto done; 405 #endif 406 error = falloc(td, &nfp, &fd, 0); 407 if (error) 408 goto done; 409 ACCEPT_LOCK(); 410 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 411 ACCEPT_UNLOCK(); 412 error = EWOULDBLOCK; 413 goto noconnection; 414 } 415 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 416 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 417 head->so_error = ECONNABORTED; 418 break; 419 } 420 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 421 "accept", 0); 422 if (error) { 423 ACCEPT_UNLOCK(); 424 goto noconnection; 425 } 426 } 427 if (head->so_error) { 428 error = head->so_error; 429 head->so_error = 0; 430 ACCEPT_UNLOCK(); 431 goto noconnection; 432 } 433 so = TAILQ_FIRST(&head->so_comp); 434 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 435 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 436 437 /* 438 * Before changing the flags on the socket, we have to bump the 439 * reference count. Otherwise, if the protocol calls sofree(), 440 * the socket will be released due to a zero refcount. 441 */ 442 SOCK_LOCK(so); /* soref() and so_state update */ 443 soref(so); /* file descriptor reference */ 444 445 TAILQ_REMOVE(&head->so_comp, so, so_list); 446 head->so_qlen--; 447 so->so_state |= (head->so_state & SS_NBIO); 448 so->so_qstate &= ~SQ_COMP; 449 so->so_head = NULL; 450 451 SOCK_UNLOCK(so); 452 ACCEPT_UNLOCK(); 453 454 /* An extra reference on `nfp' has been held for us by falloc(). */ 455 td->td_retval[0] = fd; 456 457 /* connection has been removed from the listen queue */ 458 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 459 460 pgid = fgetown(&head->so_sigio); 461 if (pgid != 0) 462 fsetown(pgid, &so->so_sigio); 463 464 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 465 /* Sync socket nonblocking/async state with file flags */ 466 tmp = fflag & FNONBLOCK; 467 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 468 tmp = fflag & FASYNC; 469 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 470 sa = 0; 471 error = soaccept(so, &sa); 472 if (error) { 473 /* 474 * return a namelen of zero for older code which might 475 * ignore the return value from accept. 476 */ 477 if (name) 478 *namelen = 0; 479 goto noconnection; 480 } 481 if (sa == NULL) { 482 if (name) 483 *namelen = 0; 484 goto done; 485 } 486 AUDIT_ARG_SOCKADDR(td, AT_FDCWD, sa); 487 if (name) { 488 /* check sa_len before it is destroyed */ 489 if (*namelen > sa->sa_len) 490 *namelen = sa->sa_len; 491 #ifdef KTRACE 492 if (KTRPOINT(td, KTR_STRUCT)) 493 ktrsockaddr(sa); 494 #endif 495 *name = sa; 496 sa = NULL; 497 } 498 noconnection: 499 if (sa) 500 free(sa, M_SONAME); 501 502 /* 503 * close the new descriptor, assuming someone hasn't ripped it 504 * out from under us. 505 */ 506 if (error) 507 fdclose(fdp, nfp, fd, td); 508 509 /* 510 * Release explicitly held references before returning. We return 511 * a reference on nfp to the caller on success if they request it. 512 */ 513 done: 514 if (fp != NULL) { 515 if (error == 0) { 516 *fp = nfp; 517 nfp = NULL; 518 } else 519 *fp = NULL; 520 } 521 if (nfp != NULL) 522 fdrop(nfp, td); 523 fdrop(headfp, td); 524 return (error); 525 } 526 527 int 528 sys_accept(td, uap) 529 struct thread *td; 530 struct accept_args *uap; 531 { 532 533 return (accept1(td, uap, 0)); 534 } 535 536 #ifdef COMPAT_OLDSOCK 537 int 538 oaccept(td, uap) 539 struct thread *td; 540 struct accept_args *uap; 541 { 542 543 return (accept1(td, uap, 1)); 544 } 545 #endif /* COMPAT_OLDSOCK */ 546 547 /* ARGSUSED */ 548 int 549 sys_connect(td, uap) 550 struct thread *td; 551 struct connect_args /* { 552 int s; 553 caddr_t name; 554 int namelen; 555 } */ *uap; 556 { 557 struct sockaddr *sa; 558 int error; 559 560 error = getsockaddr(&sa, uap->name, uap->namelen); 561 if (error == 0) { 562 error = kern_connect(td, uap->s, sa); 563 free(sa, M_SONAME); 564 } 565 return (error); 566 } 567 568 static int 569 kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa) 570 { 571 struct socket *so; 572 struct file *fp; 573 int error; 574 int interrupted = 0; 575 576 AUDIT_ARG_FD(fd); 577 AUDIT_ARG_SOCKADDR(td, dirfd, sa); 578 error = getsock_cap(td->td_proc->p_fd, fd, CAP_CONNECT, &fp, NULL); 579 if (error) 580 return (error); 581 so = fp->f_data; 582 if (so->so_state & SS_ISCONNECTING) { 583 error = EALREADY; 584 goto done1; 585 } 586 #ifdef KTRACE 587 if (KTRPOINT(td, KTR_STRUCT)) 588 ktrsockaddr(sa); 589 #endif 590 #ifdef MAC 591 error = mac_socket_check_connect(td->td_ucred, so, sa); 592 if (error) 593 goto bad; 594 #endif 595 if (dirfd == AT_FDCWD) 596 error = soconnect(so, sa, td); 597 else 598 error = soconnectat(dirfd, so, sa, td); 599 if (error) 600 goto bad; 601 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 602 error = EINPROGRESS; 603 goto done1; 604 } 605 SOCK_LOCK(so); 606 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 607 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 608 "connec", 0); 609 if (error) { 610 if (error == EINTR || error == ERESTART) 611 interrupted = 1; 612 break; 613 } 614 } 615 if (error == 0) { 616 error = so->so_error; 617 so->so_error = 0; 618 } 619 SOCK_UNLOCK(so); 620 bad: 621 if (!interrupted) 622 so->so_state &= ~SS_ISCONNECTING; 623 if (error == ERESTART) 624 error = EINTR; 625 done1: 626 fdrop(fp, td); 627 return (error); 628 } 629 630 int 631 kern_connect(struct thread *td, int fd, struct sockaddr *sa) 632 { 633 634 return (kern_connectat(td, AT_FDCWD, fd, sa)); 635 } 636 637 /* ARGSUSED */ 638 int 639 sys_connectat(td, uap) 640 struct thread *td; 641 struct connectat_args /* { 642 int fd; 643 int s; 644 caddr_t name; 645 int namelen; 646 } */ *uap; 647 { 648 struct sockaddr *sa; 649 int error; 650 651 error = getsockaddr(&sa, uap->name, uap->namelen); 652 if (error == 0) { 653 error = kern_connectat(td, uap->fd, uap->s, sa); 654 free(sa, M_SONAME); 655 } 656 return (error); 657 } 658 659 int 660 kern_socketpair(struct thread *td, int domain, int type, int protocol, 661 int *rsv) 662 { 663 struct filedesc *fdp = td->td_proc->p_fd; 664 struct file *fp1, *fp2; 665 struct socket *so1, *so2; 666 int fd, error, oflag, fflag; 667 668 AUDIT_ARG_SOCKET(domain, type, protocol); 669 670 oflag = 0; 671 fflag = 0; 672 if ((type & SOCK_CLOEXEC) != 0) { 673 type &= ~SOCK_CLOEXEC; 674 oflag |= O_CLOEXEC; 675 } 676 if ((type & SOCK_NONBLOCK) != 0) { 677 type &= ~SOCK_NONBLOCK; 678 fflag |= FNONBLOCK; 679 } 680 #ifdef MAC 681 /* We might want to have a separate check for socket pairs. */ 682 error = mac_socket_check_create(td->td_ucred, domain, type, 683 protocol); 684 if (error) 685 return (error); 686 #endif 687 error = socreate(domain, &so1, type, protocol, td->td_ucred, td); 688 if (error) 689 return (error); 690 error = socreate(domain, &so2, type, protocol, td->td_ucred, td); 691 if (error) 692 goto free1; 693 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 694 error = falloc(td, &fp1, &fd, oflag); 695 if (error) 696 goto free2; 697 rsv[0] = fd; 698 fp1->f_data = so1; /* so1 already has ref count */ 699 error = falloc(td, &fp2, &fd, oflag); 700 if (error) 701 goto free3; 702 fp2->f_data = so2; /* so2 already has ref count */ 703 rsv[1] = fd; 704 error = soconnect2(so1, so2); 705 if (error) 706 goto free4; 707 if (type == SOCK_DGRAM) { 708 /* 709 * Datagram socket connection is asymmetric. 710 */ 711 error = soconnect2(so2, so1); 712 if (error) 713 goto free4; 714 } 715 finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data, 716 &socketops); 717 finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data, 718 &socketops); 719 if ((fflag & FNONBLOCK) != 0) { 720 (void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td); 721 (void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td); 722 } 723 fdrop(fp1, td); 724 fdrop(fp2, td); 725 return (0); 726 free4: 727 fdclose(fdp, fp2, rsv[1], td); 728 fdrop(fp2, td); 729 free3: 730 fdclose(fdp, fp1, rsv[0], td); 731 fdrop(fp1, td); 732 free2: 733 if (so2 != NULL) 734 (void)soclose(so2); 735 free1: 736 if (so1 != NULL) 737 (void)soclose(so1); 738 return (error); 739 } 740 741 int 742 sys_socketpair(struct thread *td, struct socketpair_args *uap) 743 { 744 int error, sv[2]; 745 746 error = kern_socketpair(td, uap->domain, uap->type, 747 uap->protocol, sv); 748 if (error) 749 return (error); 750 error = copyout(sv, uap->rsv, 2 * sizeof(int)); 751 if (error) { 752 (void)kern_close(td, sv[0]); 753 (void)kern_close(td, sv[1]); 754 } 755 return (error); 756 } 757 758 static int 759 sendit(td, s, mp, flags) 760 struct thread *td; 761 int s; 762 struct msghdr *mp; 763 int flags; 764 { 765 struct mbuf *control; 766 struct sockaddr *to; 767 int error; 768 769 #ifdef CAPABILITY_MODE 770 if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL)) 771 return (ECAPMODE); 772 #endif 773 774 if (mp->msg_name != NULL) { 775 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 776 if (error) { 777 to = NULL; 778 goto bad; 779 } 780 mp->msg_name = to; 781 } else { 782 to = NULL; 783 } 784 785 if (mp->msg_control) { 786 if (mp->msg_controllen < sizeof(struct cmsghdr) 787 #ifdef COMPAT_OLDSOCK 788 && mp->msg_flags != MSG_COMPAT 789 #endif 790 ) { 791 error = EINVAL; 792 goto bad; 793 } 794 error = sockargs(&control, mp->msg_control, 795 mp->msg_controllen, MT_CONTROL); 796 if (error) 797 goto bad; 798 #ifdef COMPAT_OLDSOCK 799 if (mp->msg_flags == MSG_COMPAT) { 800 struct cmsghdr *cm; 801 802 M_PREPEND(control, sizeof(*cm), M_WAITOK); 803 cm = mtod(control, struct cmsghdr *); 804 cm->cmsg_len = control->m_len; 805 cm->cmsg_level = SOL_SOCKET; 806 cm->cmsg_type = SCM_RIGHTS; 807 } 808 #endif 809 } else { 810 control = NULL; 811 } 812 813 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 814 815 bad: 816 if (to) 817 free(to, M_SONAME); 818 return (error); 819 } 820 821 int 822 kern_sendit(td, s, mp, flags, control, segflg) 823 struct thread *td; 824 int s; 825 struct msghdr *mp; 826 int flags; 827 struct mbuf *control; 828 enum uio_seg segflg; 829 { 830 struct file *fp; 831 struct uio auio; 832 struct iovec *iov; 833 struct socket *so; 834 int i, error; 835 ssize_t len; 836 cap_rights_t rights; 837 #ifdef KTRACE 838 struct uio *ktruio = NULL; 839 #endif 840 841 AUDIT_ARG_FD(s); 842 rights = CAP_SEND; 843 if (mp->msg_name != NULL) { 844 AUDIT_ARG_SOCKADDR(td, AT_FDCWD, mp->msg_name); 845 rights |= CAP_CONNECT; 846 } 847 error = getsock_cap(td->td_proc->p_fd, s, rights, &fp, NULL); 848 if (error) 849 return (error); 850 so = (struct socket *)fp->f_data; 851 852 #ifdef KTRACE 853 if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT)) 854 ktrsockaddr(mp->msg_name); 855 #endif 856 #ifdef MAC 857 if (mp->msg_name != NULL) { 858 error = mac_socket_check_connect(td->td_ucred, so, 859 mp->msg_name); 860 if (error) 861 goto bad; 862 } 863 error = mac_socket_check_send(td->td_ucred, so); 864 if (error) 865 goto bad; 866 #endif 867 868 auio.uio_iov = mp->msg_iov; 869 auio.uio_iovcnt = mp->msg_iovlen; 870 auio.uio_segflg = segflg; 871 auio.uio_rw = UIO_WRITE; 872 auio.uio_td = td; 873 auio.uio_offset = 0; /* XXX */ 874 auio.uio_resid = 0; 875 iov = mp->msg_iov; 876 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 877 if ((auio.uio_resid += iov->iov_len) < 0) { 878 error = EINVAL; 879 goto bad; 880 } 881 } 882 #ifdef KTRACE 883 if (KTRPOINT(td, KTR_GENIO)) 884 ktruio = cloneuio(&auio); 885 #endif 886 len = auio.uio_resid; 887 error = sosend(so, mp->msg_name, &auio, 0, control, flags, td); 888 if (error) { 889 if (auio.uio_resid != len && (error == ERESTART || 890 error == EINTR || error == EWOULDBLOCK)) 891 error = 0; 892 /* Generation of SIGPIPE can be controlled per socket */ 893 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 894 !(flags & MSG_NOSIGNAL)) { 895 PROC_LOCK(td->td_proc); 896 tdsignal(td, SIGPIPE); 897 PROC_UNLOCK(td->td_proc); 898 } 899 } 900 if (error == 0) 901 td->td_retval[0] = len - auio.uio_resid; 902 #ifdef KTRACE 903 if (ktruio != NULL) { 904 ktruio->uio_resid = td->td_retval[0]; 905 ktrgenio(s, UIO_WRITE, ktruio, error); 906 } 907 #endif 908 bad: 909 fdrop(fp, td); 910 return (error); 911 } 912 913 int 914 sys_sendto(td, uap) 915 struct thread *td; 916 struct sendto_args /* { 917 int s; 918 caddr_t buf; 919 size_t len; 920 int flags; 921 caddr_t to; 922 int tolen; 923 } */ *uap; 924 { 925 struct msghdr msg; 926 struct iovec aiov; 927 int error; 928 929 msg.msg_name = uap->to; 930 msg.msg_namelen = uap->tolen; 931 msg.msg_iov = &aiov; 932 msg.msg_iovlen = 1; 933 msg.msg_control = 0; 934 #ifdef COMPAT_OLDSOCK 935 msg.msg_flags = 0; 936 #endif 937 aiov.iov_base = uap->buf; 938 aiov.iov_len = uap->len; 939 error = sendit(td, uap->s, &msg, uap->flags); 940 return (error); 941 } 942 943 #ifdef COMPAT_OLDSOCK 944 int 945 osend(td, uap) 946 struct thread *td; 947 struct osend_args /* { 948 int s; 949 caddr_t buf; 950 int len; 951 int flags; 952 } */ *uap; 953 { 954 struct msghdr msg; 955 struct iovec aiov; 956 int error; 957 958 msg.msg_name = 0; 959 msg.msg_namelen = 0; 960 msg.msg_iov = &aiov; 961 msg.msg_iovlen = 1; 962 aiov.iov_base = uap->buf; 963 aiov.iov_len = uap->len; 964 msg.msg_control = 0; 965 msg.msg_flags = 0; 966 error = sendit(td, uap->s, &msg, uap->flags); 967 return (error); 968 } 969 970 int 971 osendmsg(td, uap) 972 struct thread *td; 973 struct osendmsg_args /* { 974 int s; 975 caddr_t msg; 976 int flags; 977 } */ *uap; 978 { 979 struct msghdr msg; 980 struct iovec *iov; 981 int error; 982 983 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 984 if (error) 985 return (error); 986 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 987 if (error) 988 return (error); 989 msg.msg_iov = iov; 990 msg.msg_flags = MSG_COMPAT; 991 error = sendit(td, uap->s, &msg, uap->flags); 992 free(iov, M_IOV); 993 return (error); 994 } 995 #endif 996 997 int 998 sys_sendmsg(td, uap) 999 struct thread *td; 1000 struct sendmsg_args /* { 1001 int s; 1002 caddr_t msg; 1003 int flags; 1004 } */ *uap; 1005 { 1006 struct msghdr msg; 1007 struct iovec *iov; 1008 int error; 1009 1010 error = copyin(uap->msg, &msg, sizeof (msg)); 1011 if (error) 1012 return (error); 1013 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1014 if (error) 1015 return (error); 1016 msg.msg_iov = iov; 1017 #ifdef COMPAT_OLDSOCK 1018 msg.msg_flags = 0; 1019 #endif 1020 error = sendit(td, uap->s, &msg, uap->flags); 1021 free(iov, M_IOV); 1022 return (error); 1023 } 1024 1025 int 1026 kern_recvit(td, s, mp, fromseg, controlp) 1027 struct thread *td; 1028 int s; 1029 struct msghdr *mp; 1030 enum uio_seg fromseg; 1031 struct mbuf **controlp; 1032 { 1033 struct uio auio; 1034 struct iovec *iov; 1035 int i; 1036 ssize_t len; 1037 int error; 1038 struct mbuf *m, *control = NULL; 1039 caddr_t ctlbuf; 1040 struct file *fp; 1041 struct socket *so; 1042 struct sockaddr *fromsa = NULL; 1043 #ifdef KTRACE 1044 struct uio *ktruio = NULL; 1045 #endif 1046 1047 if (controlp != NULL) 1048 *controlp = NULL; 1049 1050 AUDIT_ARG_FD(s); 1051 error = getsock_cap(td->td_proc->p_fd, s, CAP_RECV, &fp, NULL); 1052 if (error) 1053 return (error); 1054 so = fp->f_data; 1055 1056 #ifdef MAC 1057 error = mac_socket_check_receive(td->td_ucred, so); 1058 if (error) { 1059 fdrop(fp, td); 1060 return (error); 1061 } 1062 #endif 1063 1064 auio.uio_iov = mp->msg_iov; 1065 auio.uio_iovcnt = mp->msg_iovlen; 1066 auio.uio_segflg = UIO_USERSPACE; 1067 auio.uio_rw = UIO_READ; 1068 auio.uio_td = td; 1069 auio.uio_offset = 0; /* XXX */ 1070 auio.uio_resid = 0; 1071 iov = mp->msg_iov; 1072 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 1073 if ((auio.uio_resid += iov->iov_len) < 0) { 1074 fdrop(fp, td); 1075 return (EINVAL); 1076 } 1077 } 1078 #ifdef KTRACE 1079 if (KTRPOINT(td, KTR_GENIO)) 1080 ktruio = cloneuio(&auio); 1081 #endif 1082 len = auio.uio_resid; 1083 error = soreceive(so, &fromsa, &auio, NULL, 1084 (mp->msg_control || controlp) ? &control : NULL, 1085 &mp->msg_flags); 1086 if (error) { 1087 if (auio.uio_resid != len && (error == ERESTART || 1088 error == EINTR || error == EWOULDBLOCK)) 1089 error = 0; 1090 } 1091 if (fromsa != NULL) 1092 AUDIT_ARG_SOCKADDR(td, AT_FDCWD, fromsa); 1093 #ifdef KTRACE 1094 if (ktruio != NULL) { 1095 ktruio->uio_resid = len - auio.uio_resid; 1096 ktrgenio(s, UIO_READ, ktruio, error); 1097 } 1098 #endif 1099 if (error) 1100 goto out; 1101 td->td_retval[0] = len - auio.uio_resid; 1102 if (mp->msg_name) { 1103 len = mp->msg_namelen; 1104 if (len <= 0 || fromsa == NULL) 1105 len = 0; 1106 else { 1107 /* save sa_len before it is destroyed by MSG_COMPAT */ 1108 len = MIN(len, fromsa->sa_len); 1109 #ifdef COMPAT_OLDSOCK 1110 if (mp->msg_flags & MSG_COMPAT) 1111 ((struct osockaddr *)fromsa)->sa_family = 1112 fromsa->sa_family; 1113 #endif 1114 if (fromseg == UIO_USERSPACE) { 1115 error = copyout(fromsa, mp->msg_name, 1116 (unsigned)len); 1117 if (error) 1118 goto out; 1119 } else 1120 bcopy(fromsa, mp->msg_name, len); 1121 } 1122 mp->msg_namelen = len; 1123 } 1124 if (mp->msg_control && controlp == NULL) { 1125 #ifdef COMPAT_OLDSOCK 1126 /* 1127 * We assume that old recvmsg calls won't receive access 1128 * rights and other control info, esp. as control info 1129 * is always optional and those options didn't exist in 4.3. 1130 * If we receive rights, trim the cmsghdr; anything else 1131 * is tossed. 1132 */ 1133 if (control && mp->msg_flags & MSG_COMPAT) { 1134 if (mtod(control, struct cmsghdr *)->cmsg_level != 1135 SOL_SOCKET || 1136 mtod(control, struct cmsghdr *)->cmsg_type != 1137 SCM_RIGHTS) { 1138 mp->msg_controllen = 0; 1139 goto out; 1140 } 1141 control->m_len -= sizeof (struct cmsghdr); 1142 control->m_data += sizeof (struct cmsghdr); 1143 } 1144 #endif 1145 len = mp->msg_controllen; 1146 m = control; 1147 mp->msg_controllen = 0; 1148 ctlbuf = mp->msg_control; 1149 1150 while (m && len > 0) { 1151 unsigned int tocopy; 1152 1153 if (len >= m->m_len) 1154 tocopy = m->m_len; 1155 else { 1156 mp->msg_flags |= MSG_CTRUNC; 1157 tocopy = len; 1158 } 1159 1160 if ((error = copyout(mtod(m, caddr_t), 1161 ctlbuf, tocopy)) != 0) 1162 goto out; 1163 1164 ctlbuf += tocopy; 1165 len -= tocopy; 1166 m = m->m_next; 1167 } 1168 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1169 } 1170 out: 1171 fdrop(fp, td); 1172 #ifdef KTRACE 1173 if (fromsa && KTRPOINT(td, KTR_STRUCT)) 1174 ktrsockaddr(fromsa); 1175 #endif 1176 if (fromsa) 1177 free(fromsa, M_SONAME); 1178 1179 if (error == 0 && controlp != NULL) 1180 *controlp = control; 1181 else if (control) 1182 m_freem(control); 1183 1184 return (error); 1185 } 1186 1187 static int 1188 recvit(td, s, mp, namelenp) 1189 struct thread *td; 1190 int s; 1191 struct msghdr *mp; 1192 void *namelenp; 1193 { 1194 int error; 1195 1196 error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL); 1197 if (error) 1198 return (error); 1199 if (namelenp) { 1200 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t)); 1201 #ifdef COMPAT_OLDSOCK 1202 if (mp->msg_flags & MSG_COMPAT) 1203 error = 0; /* old recvfrom didn't check */ 1204 #endif 1205 } 1206 return (error); 1207 } 1208 1209 int 1210 sys_recvfrom(td, uap) 1211 struct thread *td; 1212 struct recvfrom_args /* { 1213 int s; 1214 caddr_t buf; 1215 size_t len; 1216 int flags; 1217 struct sockaddr * __restrict from; 1218 socklen_t * __restrict fromlenaddr; 1219 } */ *uap; 1220 { 1221 struct msghdr msg; 1222 struct iovec aiov; 1223 int error; 1224 1225 if (uap->fromlenaddr) { 1226 error = copyin(uap->fromlenaddr, 1227 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1228 if (error) 1229 goto done2; 1230 } else { 1231 msg.msg_namelen = 0; 1232 } 1233 msg.msg_name = uap->from; 1234 msg.msg_iov = &aiov; 1235 msg.msg_iovlen = 1; 1236 aiov.iov_base = uap->buf; 1237 aiov.iov_len = uap->len; 1238 msg.msg_control = 0; 1239 msg.msg_flags = uap->flags; 1240 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1241 done2: 1242 return(error); 1243 } 1244 1245 #ifdef COMPAT_OLDSOCK 1246 int 1247 orecvfrom(td, uap) 1248 struct thread *td; 1249 struct recvfrom_args *uap; 1250 { 1251 1252 uap->flags |= MSG_COMPAT; 1253 return (sys_recvfrom(td, uap)); 1254 } 1255 #endif 1256 1257 #ifdef COMPAT_OLDSOCK 1258 int 1259 orecv(td, uap) 1260 struct thread *td; 1261 struct orecv_args /* { 1262 int s; 1263 caddr_t buf; 1264 int len; 1265 int flags; 1266 } */ *uap; 1267 { 1268 struct msghdr msg; 1269 struct iovec aiov; 1270 int error; 1271 1272 msg.msg_name = 0; 1273 msg.msg_namelen = 0; 1274 msg.msg_iov = &aiov; 1275 msg.msg_iovlen = 1; 1276 aiov.iov_base = uap->buf; 1277 aiov.iov_len = uap->len; 1278 msg.msg_control = 0; 1279 msg.msg_flags = uap->flags; 1280 error = recvit(td, uap->s, &msg, NULL); 1281 return (error); 1282 } 1283 1284 /* 1285 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1286 * overlays the new one, missing only the flags, and with the (old) access 1287 * rights where the control fields are now. 1288 */ 1289 int 1290 orecvmsg(td, uap) 1291 struct thread *td; 1292 struct orecvmsg_args /* { 1293 int s; 1294 struct omsghdr *msg; 1295 int flags; 1296 } */ *uap; 1297 { 1298 struct msghdr msg; 1299 struct iovec *iov; 1300 int error; 1301 1302 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1303 if (error) 1304 return (error); 1305 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1306 if (error) 1307 return (error); 1308 msg.msg_flags = uap->flags | MSG_COMPAT; 1309 msg.msg_iov = iov; 1310 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1311 if (msg.msg_controllen && error == 0) 1312 error = copyout(&msg.msg_controllen, 1313 &uap->msg->msg_accrightslen, sizeof (int)); 1314 free(iov, M_IOV); 1315 return (error); 1316 } 1317 #endif 1318 1319 int 1320 sys_recvmsg(td, uap) 1321 struct thread *td; 1322 struct recvmsg_args /* { 1323 int s; 1324 struct msghdr *msg; 1325 int flags; 1326 } */ *uap; 1327 { 1328 struct msghdr msg; 1329 struct iovec *uiov, *iov; 1330 int error; 1331 1332 error = copyin(uap->msg, &msg, sizeof (msg)); 1333 if (error) 1334 return (error); 1335 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1336 if (error) 1337 return (error); 1338 msg.msg_flags = uap->flags; 1339 #ifdef COMPAT_OLDSOCK 1340 msg.msg_flags &= ~MSG_COMPAT; 1341 #endif 1342 uiov = msg.msg_iov; 1343 msg.msg_iov = iov; 1344 error = recvit(td, uap->s, &msg, NULL); 1345 if (error == 0) { 1346 msg.msg_iov = uiov; 1347 error = copyout(&msg, uap->msg, sizeof(msg)); 1348 } 1349 free(iov, M_IOV); 1350 return (error); 1351 } 1352 1353 /* ARGSUSED */ 1354 int 1355 sys_shutdown(td, uap) 1356 struct thread *td; 1357 struct shutdown_args /* { 1358 int s; 1359 int how; 1360 } */ *uap; 1361 { 1362 struct socket *so; 1363 struct file *fp; 1364 int error; 1365 1366 AUDIT_ARG_FD(uap->s); 1367 error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SHUTDOWN, &fp, 1368 NULL); 1369 if (error == 0) { 1370 so = fp->f_data; 1371 error = soshutdown(so, uap->how); 1372 fdrop(fp, td); 1373 } 1374 return (error); 1375 } 1376 1377 /* ARGSUSED */ 1378 int 1379 sys_setsockopt(td, uap) 1380 struct thread *td; 1381 struct setsockopt_args /* { 1382 int s; 1383 int level; 1384 int name; 1385 caddr_t val; 1386 int valsize; 1387 } */ *uap; 1388 { 1389 1390 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1391 uap->val, UIO_USERSPACE, uap->valsize)); 1392 } 1393 1394 int 1395 kern_setsockopt(td, s, level, name, val, valseg, valsize) 1396 struct thread *td; 1397 int s; 1398 int level; 1399 int name; 1400 void *val; 1401 enum uio_seg valseg; 1402 socklen_t valsize; 1403 { 1404 int error; 1405 struct socket *so; 1406 struct file *fp; 1407 struct sockopt sopt; 1408 1409 if (val == NULL && valsize != 0) 1410 return (EFAULT); 1411 if ((int)valsize < 0) 1412 return (EINVAL); 1413 1414 sopt.sopt_dir = SOPT_SET; 1415 sopt.sopt_level = level; 1416 sopt.sopt_name = name; 1417 sopt.sopt_val = val; 1418 sopt.sopt_valsize = valsize; 1419 switch (valseg) { 1420 case UIO_USERSPACE: 1421 sopt.sopt_td = td; 1422 break; 1423 case UIO_SYSSPACE: 1424 sopt.sopt_td = NULL; 1425 break; 1426 default: 1427 panic("kern_setsockopt called with bad valseg"); 1428 } 1429 1430 AUDIT_ARG_FD(s); 1431 error = getsock_cap(td->td_proc->p_fd, s, CAP_SETSOCKOPT, &fp, NULL); 1432 if (error == 0) { 1433 so = fp->f_data; 1434 error = sosetopt(so, &sopt); 1435 fdrop(fp, td); 1436 } 1437 return(error); 1438 } 1439 1440 /* ARGSUSED */ 1441 int 1442 sys_getsockopt(td, uap) 1443 struct thread *td; 1444 struct getsockopt_args /* { 1445 int s; 1446 int level; 1447 int name; 1448 void * __restrict val; 1449 socklen_t * __restrict avalsize; 1450 } */ *uap; 1451 { 1452 socklen_t valsize; 1453 int error; 1454 1455 if (uap->val) { 1456 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1457 if (error) 1458 return (error); 1459 } 1460 1461 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1462 uap->val, UIO_USERSPACE, &valsize); 1463 1464 if (error == 0) 1465 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1466 return (error); 1467 } 1468 1469 /* 1470 * Kernel version of getsockopt. 1471 * optval can be a userland or userspace. optlen is always a kernel pointer. 1472 */ 1473 int 1474 kern_getsockopt(td, s, level, name, val, valseg, valsize) 1475 struct thread *td; 1476 int s; 1477 int level; 1478 int name; 1479 void *val; 1480 enum uio_seg valseg; 1481 socklen_t *valsize; 1482 { 1483 int error; 1484 struct socket *so; 1485 struct file *fp; 1486 struct sockopt sopt; 1487 1488 if (val == NULL) 1489 *valsize = 0; 1490 if ((int)*valsize < 0) 1491 return (EINVAL); 1492 1493 sopt.sopt_dir = SOPT_GET; 1494 sopt.sopt_level = level; 1495 sopt.sopt_name = name; 1496 sopt.sopt_val = val; 1497 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1498 switch (valseg) { 1499 case UIO_USERSPACE: 1500 sopt.sopt_td = td; 1501 break; 1502 case UIO_SYSSPACE: 1503 sopt.sopt_td = NULL; 1504 break; 1505 default: 1506 panic("kern_getsockopt called with bad valseg"); 1507 } 1508 1509 AUDIT_ARG_FD(s); 1510 error = getsock_cap(td->td_proc->p_fd, s, CAP_GETSOCKOPT, &fp, NULL); 1511 if (error == 0) { 1512 so = fp->f_data; 1513 error = sogetopt(so, &sopt); 1514 *valsize = sopt.sopt_valsize; 1515 fdrop(fp, td); 1516 } 1517 return (error); 1518 } 1519 1520 /* 1521 * getsockname1() - Get socket name. 1522 */ 1523 /* ARGSUSED */ 1524 static int 1525 getsockname1(td, uap, compat) 1526 struct thread *td; 1527 struct getsockname_args /* { 1528 int fdes; 1529 struct sockaddr * __restrict asa; 1530 socklen_t * __restrict alen; 1531 } */ *uap; 1532 int compat; 1533 { 1534 struct sockaddr *sa; 1535 socklen_t len; 1536 int error; 1537 1538 error = copyin(uap->alen, &len, sizeof(len)); 1539 if (error) 1540 return (error); 1541 1542 error = kern_getsockname(td, uap->fdes, &sa, &len); 1543 if (error) 1544 return (error); 1545 1546 if (len != 0) { 1547 #ifdef COMPAT_OLDSOCK 1548 if (compat) 1549 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1550 #endif 1551 error = copyout(sa, uap->asa, (u_int)len); 1552 } 1553 free(sa, M_SONAME); 1554 if (error == 0) 1555 error = copyout(&len, uap->alen, sizeof(len)); 1556 return (error); 1557 } 1558 1559 int 1560 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, 1561 socklen_t *alen) 1562 { 1563 struct socket *so; 1564 struct file *fp; 1565 socklen_t len; 1566 int error; 1567 1568 if (*alen < 0) 1569 return (EINVAL); 1570 1571 AUDIT_ARG_FD(fd); 1572 error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETSOCKNAME, &fp, NULL); 1573 if (error) 1574 return (error); 1575 so = fp->f_data; 1576 *sa = NULL; 1577 CURVNET_SET(so->so_vnet); 1578 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa); 1579 CURVNET_RESTORE(); 1580 if (error) 1581 goto bad; 1582 if (*sa == NULL) 1583 len = 0; 1584 else 1585 len = MIN(*alen, (*sa)->sa_len); 1586 *alen = len; 1587 #ifdef KTRACE 1588 if (KTRPOINT(td, KTR_STRUCT)) 1589 ktrsockaddr(*sa); 1590 #endif 1591 bad: 1592 fdrop(fp, td); 1593 if (error && *sa) { 1594 free(*sa, M_SONAME); 1595 *sa = NULL; 1596 } 1597 return (error); 1598 } 1599 1600 int 1601 sys_getsockname(td, uap) 1602 struct thread *td; 1603 struct getsockname_args *uap; 1604 { 1605 1606 return (getsockname1(td, uap, 0)); 1607 } 1608 1609 #ifdef COMPAT_OLDSOCK 1610 int 1611 ogetsockname(td, uap) 1612 struct thread *td; 1613 struct getsockname_args *uap; 1614 { 1615 1616 return (getsockname1(td, uap, 1)); 1617 } 1618 #endif /* COMPAT_OLDSOCK */ 1619 1620 /* 1621 * getpeername1() - Get name of peer for connected socket. 1622 */ 1623 /* ARGSUSED */ 1624 static int 1625 getpeername1(td, uap, compat) 1626 struct thread *td; 1627 struct getpeername_args /* { 1628 int fdes; 1629 struct sockaddr * __restrict asa; 1630 socklen_t * __restrict alen; 1631 } */ *uap; 1632 int compat; 1633 { 1634 struct sockaddr *sa; 1635 socklen_t len; 1636 int error; 1637 1638 error = copyin(uap->alen, &len, sizeof (len)); 1639 if (error) 1640 return (error); 1641 1642 error = kern_getpeername(td, uap->fdes, &sa, &len); 1643 if (error) 1644 return (error); 1645 1646 if (len != 0) { 1647 #ifdef COMPAT_OLDSOCK 1648 if (compat) 1649 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1650 #endif 1651 error = copyout(sa, uap->asa, (u_int)len); 1652 } 1653 free(sa, M_SONAME); 1654 if (error == 0) 1655 error = copyout(&len, uap->alen, sizeof(len)); 1656 return (error); 1657 } 1658 1659 int 1660 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, 1661 socklen_t *alen) 1662 { 1663 struct socket *so; 1664 struct file *fp; 1665 socklen_t len; 1666 int error; 1667 1668 if (*alen < 0) 1669 return (EINVAL); 1670 1671 AUDIT_ARG_FD(fd); 1672 error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETPEERNAME, &fp, NULL); 1673 if (error) 1674 return (error); 1675 so = fp->f_data; 1676 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1677 error = ENOTCONN; 1678 goto done; 1679 } 1680 *sa = NULL; 1681 CURVNET_SET(so->so_vnet); 1682 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa); 1683 CURVNET_RESTORE(); 1684 if (error) 1685 goto bad; 1686 if (*sa == NULL) 1687 len = 0; 1688 else 1689 len = MIN(*alen, (*sa)->sa_len); 1690 *alen = len; 1691 #ifdef KTRACE 1692 if (KTRPOINT(td, KTR_STRUCT)) 1693 ktrsockaddr(*sa); 1694 #endif 1695 bad: 1696 if (error && *sa) { 1697 free(*sa, M_SONAME); 1698 *sa = NULL; 1699 } 1700 done: 1701 fdrop(fp, td); 1702 return (error); 1703 } 1704 1705 int 1706 sys_getpeername(td, uap) 1707 struct thread *td; 1708 struct getpeername_args *uap; 1709 { 1710 1711 return (getpeername1(td, uap, 0)); 1712 } 1713 1714 #ifdef COMPAT_OLDSOCK 1715 int 1716 ogetpeername(td, uap) 1717 struct thread *td; 1718 struct ogetpeername_args *uap; 1719 { 1720 1721 /* XXX uap should have type `getpeername_args *' to begin with. */ 1722 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1723 } 1724 #endif /* COMPAT_OLDSOCK */ 1725 1726 int 1727 sockargs(mp, buf, buflen, type) 1728 struct mbuf **mp; 1729 caddr_t buf; 1730 int buflen, type; 1731 { 1732 struct sockaddr *sa; 1733 struct mbuf *m; 1734 int error; 1735 1736 if (buflen > MLEN) { 1737 #ifdef COMPAT_OLDSOCK 1738 if (type == MT_SONAME && buflen <= 112) 1739 buflen = MLEN; /* unix domain compat. hack */ 1740 else 1741 #endif 1742 if (buflen > MCLBYTES) 1743 return (EINVAL); 1744 } 1745 m = m_get2(buflen, M_WAITOK, type, 0); 1746 m->m_len = buflen; 1747 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1748 if (error) 1749 (void) m_free(m); 1750 else { 1751 *mp = m; 1752 if (type == MT_SONAME) { 1753 sa = mtod(m, struct sockaddr *); 1754 1755 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1756 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1757 sa->sa_family = sa->sa_len; 1758 #endif 1759 sa->sa_len = buflen; 1760 } 1761 } 1762 return (error); 1763 } 1764 1765 int 1766 getsockaddr(namp, uaddr, len) 1767 struct sockaddr **namp; 1768 caddr_t uaddr; 1769 size_t len; 1770 { 1771 struct sockaddr *sa; 1772 int error; 1773 1774 if (len > SOCK_MAXADDRLEN) 1775 return (ENAMETOOLONG); 1776 if (len < offsetof(struct sockaddr, sa_data[0])) 1777 return (EINVAL); 1778 sa = malloc(len, M_SONAME, M_WAITOK); 1779 error = copyin(uaddr, sa, len); 1780 if (error) { 1781 free(sa, M_SONAME); 1782 } else { 1783 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1784 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1785 sa->sa_family = sa->sa_len; 1786 #endif 1787 sa->sa_len = len; 1788 *namp = sa; 1789 } 1790 return (error); 1791 } 1792 1793 #include <sys/condvar.h> 1794 1795 struct sendfile_sync { 1796 struct mtx mtx; 1797 struct cv cv; 1798 unsigned count; 1799 }; 1800 1801 /* 1802 * Detach mapped page and release resources back to the system. 1803 */ 1804 void 1805 sf_buf_mext(void *addr, void *args) 1806 { 1807 vm_page_t m; 1808 struct sendfile_sync *sfs; 1809 1810 m = sf_buf_page(args); 1811 sf_buf_free(args); 1812 vm_page_lock(m); 1813 vm_page_unwire(m, 0); 1814 /* 1815 * Check for the object going away on us. This can 1816 * happen since we don't hold a reference to it. 1817 * If so, we're responsible for freeing the page. 1818 */ 1819 if (m->wire_count == 0 && m->object == NULL) 1820 vm_page_free(m); 1821 vm_page_unlock(m); 1822 if (addr == NULL) 1823 return; 1824 sfs = addr; 1825 mtx_lock(&sfs->mtx); 1826 KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0")); 1827 if (--sfs->count == 0) 1828 cv_signal(&sfs->cv); 1829 mtx_unlock(&sfs->mtx); 1830 } 1831 1832 /* 1833 * sendfile(2) 1834 * 1835 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1836 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1837 * 1838 * Send a file specified by 'fd' and starting at 'offset' to a socket 1839 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes == 1840 * 0. Optionally add a header and/or trailer to the socket output. If 1841 * specified, write the total number of bytes sent into *sbytes. 1842 */ 1843 int 1844 sys_sendfile(struct thread *td, struct sendfile_args *uap) 1845 { 1846 1847 return (do_sendfile(td, uap, 0)); 1848 } 1849 1850 static int 1851 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1852 { 1853 struct sf_hdtr hdtr; 1854 struct uio *hdr_uio, *trl_uio; 1855 int error; 1856 1857 hdr_uio = trl_uio = NULL; 1858 1859 if (uap->hdtr != NULL) { 1860 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1861 if (error) 1862 goto out; 1863 if (hdtr.headers != NULL) { 1864 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1865 if (error) 1866 goto out; 1867 } 1868 if (hdtr.trailers != NULL) { 1869 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); 1870 if (error) 1871 goto out; 1872 1873 } 1874 } 1875 1876 error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat); 1877 out: 1878 if (hdr_uio) 1879 free(hdr_uio, M_IOV); 1880 if (trl_uio) 1881 free(trl_uio, M_IOV); 1882 return (error); 1883 } 1884 1885 #ifdef COMPAT_FREEBSD4 1886 int 1887 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1888 { 1889 struct sendfile_args args; 1890 1891 args.fd = uap->fd; 1892 args.s = uap->s; 1893 args.offset = uap->offset; 1894 args.nbytes = uap->nbytes; 1895 args.hdtr = uap->hdtr; 1896 args.sbytes = uap->sbytes; 1897 args.flags = uap->flags; 1898 1899 return (do_sendfile(td, &args, 1)); 1900 } 1901 #endif /* COMPAT_FREEBSD4 */ 1902 1903 int 1904 kern_sendfile(struct thread *td, struct sendfile_args *uap, 1905 struct uio *hdr_uio, struct uio *trl_uio, int compat) 1906 { 1907 struct file *sock_fp; 1908 struct vnode *vp; 1909 struct vm_object *obj = NULL; 1910 struct socket *so = NULL; 1911 struct mbuf *m = NULL; 1912 struct sf_buf *sf; 1913 struct vm_page *pg; 1914 off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0; 1915 int error, hdrlen = 0, mnw = 0; 1916 struct sendfile_sync *sfs = NULL; 1917 1918 /* 1919 * The file descriptor must be a regular file and have a 1920 * backing VM object. 1921 * File offset must be positive. If it goes beyond EOF 1922 * we send only the header/trailer and no payload data. 1923 */ 1924 AUDIT_ARG_FD(uap->fd); 1925 /* 1926 * sendfile(2) can start at any offset within a file so we require 1927 * CAP_READ+CAP_SEEK = CAP_PREAD. 1928 */ 1929 if ((error = fgetvp_read(td, uap->fd, CAP_PREAD, &vp)) != 0) 1930 goto out; 1931 vn_lock(vp, LK_SHARED | LK_RETRY); 1932 if (vp->v_type == VREG) { 1933 obj = vp->v_object; 1934 if (obj != NULL) { 1935 /* 1936 * Temporarily increase the backing VM 1937 * object's reference count so that a forced 1938 * reclamation of its vnode does not 1939 * immediately destroy it. 1940 */ 1941 VM_OBJECT_WLOCK(obj); 1942 if ((obj->flags & OBJ_DEAD) == 0) { 1943 vm_object_reference_locked(obj); 1944 VM_OBJECT_WUNLOCK(obj); 1945 } else { 1946 VM_OBJECT_WUNLOCK(obj); 1947 obj = NULL; 1948 } 1949 } 1950 } 1951 VOP_UNLOCK(vp, 0); 1952 if (obj == NULL) { 1953 error = EINVAL; 1954 goto out; 1955 } 1956 if (uap->offset < 0) { 1957 error = EINVAL; 1958 goto out; 1959 } 1960 1961 /* 1962 * The socket must be a stream socket and connected. 1963 * Remember if it a blocking or non-blocking socket. 1964 */ 1965 if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SEND, 1966 &sock_fp, NULL)) != 0) 1967 goto out; 1968 so = sock_fp->f_data; 1969 if (so->so_type != SOCK_STREAM) { 1970 error = EINVAL; 1971 goto out; 1972 } 1973 if ((so->so_state & SS_ISCONNECTED) == 0) { 1974 error = ENOTCONN; 1975 goto out; 1976 } 1977 /* 1978 * Do not wait on memory allocations but return ENOMEM for 1979 * caller to retry later. 1980 * XXX: Experimental. 1981 */ 1982 if (uap->flags & SF_MNOWAIT) 1983 mnw = 1; 1984 1985 if (uap->flags & SF_SYNC) { 1986 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO); 1987 mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); 1988 cv_init(&sfs->cv, "sendfile"); 1989 } 1990 1991 #ifdef MAC 1992 error = mac_socket_check_send(td->td_ucred, so); 1993 if (error) 1994 goto out; 1995 #endif 1996 1997 /* If headers are specified copy them into mbufs. */ 1998 if (hdr_uio != NULL) { 1999 hdr_uio->uio_td = td; 2000 hdr_uio->uio_rw = UIO_WRITE; 2001 if (hdr_uio->uio_resid > 0) { 2002 /* 2003 * In FBSD < 5.0 the nbytes to send also included 2004 * the header. If compat is specified subtract the 2005 * header size from nbytes. 2006 */ 2007 if (compat) { 2008 if (uap->nbytes > hdr_uio->uio_resid) 2009 uap->nbytes -= hdr_uio->uio_resid; 2010 else 2011 uap->nbytes = 0; 2012 } 2013 m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK), 2014 0, 0, 0); 2015 if (m == NULL) { 2016 error = mnw ? EAGAIN : ENOBUFS; 2017 goto out; 2018 } 2019 hdrlen = m_length(m, NULL); 2020 } 2021 } 2022 2023 /* 2024 * Protect against multiple writers to the socket. 2025 * 2026 * XXXRW: Historically this has assumed non-interruptibility, so now 2027 * we implement that, but possibly shouldn't. 2028 */ 2029 (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); 2030 2031 /* 2032 * Loop through the pages of the file, starting with the requested 2033 * offset. Get a file page (do I/O if necessary), map the file page 2034 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 2035 * it on the socket. 2036 * This is done in two loops. The inner loop turns as many pages 2037 * as it can, up to available socket buffer space, without blocking 2038 * into mbufs to have it bulk delivered into the socket send buffer. 2039 * The outer loop checks the state and available space of the socket 2040 * and takes care of the overall progress. 2041 */ 2042 for (off = uap->offset, rem = uap->nbytes; ; ) { 2043 struct mbuf *mtail = NULL; 2044 int loopbytes = 0; 2045 int space = 0; 2046 int done = 0; 2047 2048 /* 2049 * Check the socket state for ongoing connection, 2050 * no errors and space in socket buffer. 2051 * If space is low allow for the remainder of the 2052 * file to be processed if it fits the socket buffer. 2053 * Otherwise block in waiting for sufficient space 2054 * to proceed, or if the socket is nonblocking, return 2055 * to userland with EAGAIN while reporting how far 2056 * we've come. 2057 * We wait until the socket buffer has significant free 2058 * space to do bulk sends. This makes good use of file 2059 * system read ahead and allows packet segmentation 2060 * offloading hardware to take over lots of work. If 2061 * we were not careful here we would send off only one 2062 * sfbuf at a time. 2063 */ 2064 SOCKBUF_LOCK(&so->so_snd); 2065 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2) 2066 so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2; 2067 retry_space: 2068 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2069 error = EPIPE; 2070 SOCKBUF_UNLOCK(&so->so_snd); 2071 goto done; 2072 } else if (so->so_error) { 2073 error = so->so_error; 2074 so->so_error = 0; 2075 SOCKBUF_UNLOCK(&so->so_snd); 2076 goto done; 2077 } 2078 space = sbspace(&so->so_snd); 2079 if (space < rem && 2080 (space <= 0 || 2081 space < so->so_snd.sb_lowat)) { 2082 if (so->so_state & SS_NBIO) { 2083 SOCKBUF_UNLOCK(&so->so_snd); 2084 error = EAGAIN; 2085 goto done; 2086 } 2087 /* 2088 * sbwait drops the lock while sleeping. 2089 * When we loop back to retry_space the 2090 * state may have changed and we retest 2091 * for it. 2092 */ 2093 error = sbwait(&so->so_snd); 2094 /* 2095 * An error from sbwait usually indicates that we've 2096 * been interrupted by a signal. If we've sent anything 2097 * then return bytes sent, otherwise return the error. 2098 */ 2099 if (error) { 2100 SOCKBUF_UNLOCK(&so->so_snd); 2101 goto done; 2102 } 2103 goto retry_space; 2104 } 2105 SOCKBUF_UNLOCK(&so->so_snd); 2106 2107 /* 2108 * Reduce space in the socket buffer by the size of 2109 * the header mbuf chain. 2110 * hdrlen is set to 0 after the first loop. 2111 */ 2112 space -= hdrlen; 2113 2114 /* 2115 * Loop and construct maximum sized mbuf chain to be bulk 2116 * dumped into socket buffer. 2117 */ 2118 while (1) { 2119 vm_pindex_t pindex; 2120 vm_offset_t pgoff; 2121 struct mbuf *m0; 2122 2123 VM_OBJECT_WLOCK(obj); 2124 /* 2125 * Calculate the amount to transfer. 2126 * Not to exceed a page, the EOF, 2127 * or the passed in nbytes. 2128 */ 2129 pgoff = (vm_offset_t)(off & PAGE_MASK); 2130 if (uap->nbytes) 2131 rem = (uap->nbytes - fsbytes - loopbytes); 2132 else 2133 rem = obj->un_pager.vnp.vnp_size - 2134 uap->offset - fsbytes - loopbytes; 2135 xfsize = omin(PAGE_SIZE - pgoff, rem); 2136 xfsize = omin(space - loopbytes, xfsize); 2137 if (xfsize <= 0) { 2138 VM_OBJECT_WUNLOCK(obj); 2139 done = 1; /* all data sent */ 2140 break; 2141 } 2142 2143 /* 2144 * We've already overfilled the socket. 2145 * Let the outer loop figure out how to handle it. 2146 */ 2147 if (space <= loopbytes) { 2148 VM_OBJECT_WUNLOCK(obj); 2149 done = 0; 2150 break; 2151 } 2152 2153 /* 2154 * Attempt to look up the page. Allocate 2155 * if not found or wait and loop if busy. 2156 */ 2157 pindex = OFF_TO_IDX(off); 2158 pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY | 2159 VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY); 2160 2161 /* 2162 * Check if page is valid for what we need, 2163 * otherwise initiate I/O. 2164 * If we already turned some pages into mbufs, 2165 * send them off before we come here again and 2166 * block. 2167 */ 2168 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) 2169 VM_OBJECT_WUNLOCK(obj); 2170 else if (m != NULL) 2171 error = EAGAIN; /* send what we already got */ 2172 else if (uap->flags & SF_NODISKIO) 2173 error = EBUSY; 2174 else { 2175 int bsize; 2176 ssize_t resid; 2177 2178 /* 2179 * Ensure that our page is still around 2180 * when the I/O completes. 2181 */ 2182 vm_page_io_start(pg); 2183 VM_OBJECT_WUNLOCK(obj); 2184 2185 /* 2186 * Get the page from backing store. 2187 */ 2188 error = vn_lock(vp, LK_SHARED); 2189 if (error != 0) 2190 goto after_read; 2191 bsize = vp->v_mount->mnt_stat.f_iosize; 2192 2193 /* 2194 * XXXMAC: Because we don't have fp->f_cred 2195 * here, we pass in NOCRED. This is probably 2196 * wrong, but is consistent with our original 2197 * implementation. 2198 */ 2199 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 2200 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 2201 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 2202 td->td_ucred, NOCRED, &resid, td); 2203 VOP_UNLOCK(vp, 0); 2204 after_read: 2205 VM_OBJECT_WLOCK(obj); 2206 vm_page_io_finish(pg); 2207 if (!error) 2208 VM_OBJECT_WUNLOCK(obj); 2209 mbstat.sf_iocnt++; 2210 } 2211 if (error) { 2212 vm_page_lock(pg); 2213 vm_page_unwire(pg, 0); 2214 /* 2215 * See if anyone else might know about 2216 * this page. If not and it is not valid, 2217 * then free it. 2218 */ 2219 if (pg->wire_count == 0 && pg->valid == 0 && 2220 pg->busy == 0 && !(pg->oflags & VPO_BUSY)) 2221 vm_page_free(pg); 2222 vm_page_unlock(pg); 2223 VM_OBJECT_WUNLOCK(obj); 2224 if (error == EAGAIN) 2225 error = 0; /* not a real error */ 2226 break; 2227 } 2228 2229 /* 2230 * Get a sendfile buf. When allocating the 2231 * first buffer for mbuf chain, we usually 2232 * wait as long as necessary, but this wait 2233 * can be interrupted. For consequent 2234 * buffers, do not sleep, since several 2235 * threads might exhaust the buffers and then 2236 * deadlock. 2237 */ 2238 sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT : 2239 SFB_CATCH); 2240 if (sf == NULL) { 2241 mbstat.sf_allocfail++; 2242 vm_page_lock(pg); 2243 vm_page_unwire(pg, 0); 2244 KASSERT(pg->object != NULL, 2245 ("kern_sendfile: object disappeared")); 2246 vm_page_unlock(pg); 2247 if (m == NULL) 2248 error = (mnw ? EAGAIN : EINTR); 2249 break; 2250 } 2251 2252 /* 2253 * Get an mbuf and set it up as having 2254 * external storage. 2255 */ 2256 m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); 2257 if (m0 == NULL) { 2258 error = (mnw ? EAGAIN : ENOBUFS); 2259 sf_buf_mext(NULL, sf); 2260 break; 2261 } 2262 if (m_extadd(m0, (caddr_t )sf_buf_kva(sf), PAGE_SIZE, 2263 sf_buf_mext, sfs, sf, M_RDONLY, EXT_SFBUF, 2264 (mnw ? M_NOWAIT : M_WAITOK)) != 0) { 2265 error = (mnw ? EAGAIN : ENOBUFS); 2266 sf_buf_mext(NULL, sf); 2267 m_freem(m0); 2268 break; 2269 } 2270 m0->m_data = (char *)sf_buf_kva(sf) + pgoff; 2271 m0->m_len = xfsize; 2272 2273 /* Append to mbuf chain. */ 2274 if (mtail != NULL) 2275 mtail->m_next = m0; 2276 else if (m != NULL) 2277 m_last(m)->m_next = m0; 2278 else 2279 m = m0; 2280 mtail = m0; 2281 2282 /* Keep track of bits processed. */ 2283 loopbytes += xfsize; 2284 off += xfsize; 2285 2286 if (sfs != NULL) { 2287 mtx_lock(&sfs->mtx); 2288 sfs->count++; 2289 mtx_unlock(&sfs->mtx); 2290 } 2291 } 2292 2293 /* Add the buffer chain to the socket buffer. */ 2294 if (m != NULL) { 2295 int mlen, err; 2296 2297 mlen = m_length(m, NULL); 2298 SOCKBUF_LOCK(&so->so_snd); 2299 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2300 error = EPIPE; 2301 SOCKBUF_UNLOCK(&so->so_snd); 2302 goto done; 2303 } 2304 SOCKBUF_UNLOCK(&so->so_snd); 2305 CURVNET_SET(so->so_vnet); 2306 /* Avoid error aliasing. */ 2307 err = (*so->so_proto->pr_usrreqs->pru_send) 2308 (so, 0, m, NULL, NULL, td); 2309 CURVNET_RESTORE(); 2310 if (err == 0) { 2311 /* 2312 * We need two counters to get the 2313 * file offset and nbytes to send 2314 * right: 2315 * - sbytes contains the total amount 2316 * of bytes sent, including headers. 2317 * - fsbytes contains the total amount 2318 * of bytes sent from the file. 2319 */ 2320 sbytes += mlen; 2321 fsbytes += mlen; 2322 if (hdrlen) { 2323 fsbytes -= hdrlen; 2324 hdrlen = 0; 2325 } 2326 } else if (error == 0) 2327 error = err; 2328 m = NULL; /* pru_send always consumes */ 2329 } 2330 2331 /* Quit outer loop on error or when we're done. */ 2332 if (done) 2333 break; 2334 if (error) 2335 goto done; 2336 } 2337 2338 /* 2339 * Send trailers. Wimp out and use writev(2). 2340 */ 2341 if (trl_uio != NULL) { 2342 sbunlock(&so->so_snd); 2343 error = kern_writev(td, uap->s, trl_uio); 2344 if (error == 0) 2345 sbytes += td->td_retval[0]; 2346 goto out; 2347 } 2348 2349 done: 2350 sbunlock(&so->so_snd); 2351 out: 2352 /* 2353 * If there was no error we have to clear td->td_retval[0] 2354 * because it may have been set by writev. 2355 */ 2356 if (error == 0) { 2357 td->td_retval[0] = 0; 2358 } 2359 if (uap->sbytes != NULL) { 2360 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2361 } 2362 if (obj != NULL) 2363 vm_object_deallocate(obj); 2364 if (vp != NULL) 2365 vrele(vp); 2366 if (so) 2367 fdrop(sock_fp, td); 2368 if (m) 2369 m_freem(m); 2370 2371 if (sfs != NULL) { 2372 mtx_lock(&sfs->mtx); 2373 if (sfs->count != 0) 2374 cv_wait(&sfs->cv, &sfs->mtx); 2375 KASSERT(sfs->count == 0, ("sendfile sync still busy")); 2376 cv_destroy(&sfs->cv); 2377 mtx_destroy(&sfs->mtx); 2378 free(sfs, M_TEMP); 2379 } 2380 2381 if (error == ERESTART) 2382 error = EINTR; 2383 2384 return (error); 2385 } 2386 2387 /* 2388 * SCTP syscalls. 2389 * Functionality only compiled in if SCTP is defined in the kernel Makefile, 2390 * otherwise all return EOPNOTSUPP. 2391 * XXX: We should make this loadable one day. 2392 */ 2393 int 2394 sys_sctp_peeloff(td, uap) 2395 struct thread *td; 2396 struct sctp_peeloff_args /* { 2397 int sd; 2398 caddr_t name; 2399 } */ *uap; 2400 { 2401 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2402 struct file *nfp = NULL; 2403 int error; 2404 struct socket *head, *so; 2405 int fd; 2406 u_int fflag; 2407 2408 AUDIT_ARG_FD(uap->sd); 2409 error = fgetsock(td, uap->sd, CAP_PEELOFF, &head, &fflag); 2410 if (error) 2411 goto done2; 2412 if (head->so_proto->pr_protocol != IPPROTO_SCTP) { 2413 error = EOPNOTSUPP; 2414 goto done; 2415 } 2416 error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name); 2417 if (error) 2418 goto done; 2419 /* 2420 * At this point we know we do have a assoc to pull 2421 * we proceed to get the fd setup. This may block 2422 * but that is ok. 2423 */ 2424 2425 error = falloc(td, &nfp, &fd, 0); 2426 if (error) 2427 goto done; 2428 td->td_retval[0] = fd; 2429 2430 CURVNET_SET(head->so_vnet); 2431 so = sonewconn(head, SS_ISCONNECTED); 2432 if (so == NULL) { 2433 error = ENOMEM; 2434 goto noconnection; 2435 } 2436 /* 2437 * Before changing the flags on the socket, we have to bump the 2438 * reference count. Otherwise, if the protocol calls sofree(), 2439 * the socket will be released due to a zero refcount. 2440 */ 2441 SOCK_LOCK(so); 2442 soref(so); /* file descriptor reference */ 2443 SOCK_UNLOCK(so); 2444 2445 ACCEPT_LOCK(); 2446 2447 TAILQ_REMOVE(&head->so_comp, so, so_list); 2448 head->so_qlen--; 2449 so->so_state |= (head->so_state & SS_NBIO); 2450 so->so_state &= ~SS_NOFDREF; 2451 so->so_qstate &= ~SQ_COMP; 2452 so->so_head = NULL; 2453 ACCEPT_UNLOCK(); 2454 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 2455 error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name); 2456 if (error) 2457 goto noconnection; 2458 if (head->so_sigio != NULL) 2459 fsetown(fgetown(&head->so_sigio), &so->so_sigio); 2460 2461 noconnection: 2462 /* 2463 * close the new descriptor, assuming someone hasn't ripped it 2464 * out from under us. 2465 */ 2466 if (error) 2467 fdclose(td->td_proc->p_fd, nfp, fd, td); 2468 2469 /* 2470 * Release explicitly held references before returning. 2471 */ 2472 CURVNET_RESTORE(); 2473 done: 2474 if (nfp != NULL) 2475 fdrop(nfp, td); 2476 fputsock(head); 2477 done2: 2478 return (error); 2479 #else /* SCTP */ 2480 return (EOPNOTSUPP); 2481 #endif /* SCTP */ 2482 } 2483 2484 int 2485 sys_sctp_generic_sendmsg (td, uap) 2486 struct thread *td; 2487 struct sctp_generic_sendmsg_args /* { 2488 int sd, 2489 caddr_t msg, 2490 int mlen, 2491 caddr_t to, 2492 __socklen_t tolen, 2493 struct sctp_sndrcvinfo *sinfo, 2494 int flags 2495 } */ *uap; 2496 { 2497 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2498 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2499 struct socket *so; 2500 struct file *fp = NULL; 2501 int error = 0, len; 2502 struct sockaddr *to = NULL; 2503 #ifdef KTRACE 2504 struct uio *ktruio = NULL; 2505 #endif 2506 struct uio auio; 2507 struct iovec iov[1]; 2508 cap_rights_t rights; 2509 2510 if (uap->sinfo) { 2511 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2512 if (error) 2513 return (error); 2514 u_sinfo = &sinfo; 2515 } 2516 2517 rights = CAP_SEND; 2518 if (uap->tolen) { 2519 error = getsockaddr(&to, uap->to, uap->tolen); 2520 if (error) { 2521 to = NULL; 2522 goto sctp_bad2; 2523 } 2524 rights |= CAP_CONNECT; 2525 } 2526 2527 AUDIT_ARG_FD(uap->sd); 2528 error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL); 2529 if (error) 2530 goto sctp_bad; 2531 #ifdef KTRACE 2532 if (to && (KTRPOINT(td, KTR_STRUCT))) 2533 ktrsockaddr(to); 2534 #endif 2535 2536 iov[0].iov_base = uap->msg; 2537 iov[0].iov_len = uap->mlen; 2538 2539 so = (struct socket *)fp->f_data; 2540 if (so->so_proto->pr_protocol != IPPROTO_SCTP) { 2541 error = EOPNOTSUPP; 2542 goto sctp_bad; 2543 } 2544 #ifdef MAC 2545 error = mac_socket_check_send(td->td_ucred, so); 2546 if (error) 2547 goto sctp_bad; 2548 #endif /* MAC */ 2549 2550 auio.uio_iov = iov; 2551 auio.uio_iovcnt = 1; 2552 auio.uio_segflg = UIO_USERSPACE; 2553 auio.uio_rw = UIO_WRITE; 2554 auio.uio_td = td; 2555 auio.uio_offset = 0; /* XXX */ 2556 auio.uio_resid = 0; 2557 len = auio.uio_resid = uap->mlen; 2558 CURVNET_SET(so->so_vnet); 2559 error = sctp_lower_sosend(so, to, &auio, 2560 (struct mbuf *)NULL, (struct mbuf *)NULL, 2561 uap->flags, u_sinfo, td); 2562 CURVNET_RESTORE(); 2563 if (error) { 2564 if (auio.uio_resid != len && (error == ERESTART || 2565 error == EINTR || error == EWOULDBLOCK)) 2566 error = 0; 2567 /* Generation of SIGPIPE can be controlled per socket. */ 2568 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2569 !(uap->flags & MSG_NOSIGNAL)) { 2570 PROC_LOCK(td->td_proc); 2571 tdsignal(td, SIGPIPE); 2572 PROC_UNLOCK(td->td_proc); 2573 } 2574 } 2575 if (error == 0) 2576 td->td_retval[0] = len - auio.uio_resid; 2577 #ifdef KTRACE 2578 if (ktruio != NULL) { 2579 ktruio->uio_resid = td->td_retval[0]; 2580 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2581 } 2582 #endif /* KTRACE */ 2583 sctp_bad: 2584 if (fp) 2585 fdrop(fp, td); 2586 sctp_bad2: 2587 if (to) 2588 free(to, M_SONAME); 2589 return (error); 2590 #else /* SCTP */ 2591 return (EOPNOTSUPP); 2592 #endif /* SCTP */ 2593 } 2594 2595 int 2596 sys_sctp_generic_sendmsg_iov(td, uap) 2597 struct thread *td; 2598 struct sctp_generic_sendmsg_iov_args /* { 2599 int sd, 2600 struct iovec *iov, 2601 int iovlen, 2602 caddr_t to, 2603 __socklen_t tolen, 2604 struct sctp_sndrcvinfo *sinfo, 2605 int flags 2606 } */ *uap; 2607 { 2608 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2609 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2610 struct socket *so; 2611 struct file *fp = NULL; 2612 int error=0, i; 2613 ssize_t len; 2614 struct sockaddr *to = NULL; 2615 #ifdef KTRACE 2616 struct uio *ktruio = NULL; 2617 #endif 2618 struct uio auio; 2619 struct iovec *iov, *tiov; 2620 cap_rights_t rights; 2621 2622 if (uap->sinfo) { 2623 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2624 if (error) 2625 return (error); 2626 u_sinfo = &sinfo; 2627 } 2628 rights = CAP_SEND; 2629 if (uap->tolen) { 2630 error = getsockaddr(&to, uap->to, uap->tolen); 2631 if (error) { 2632 to = NULL; 2633 goto sctp_bad2; 2634 } 2635 rights |= CAP_CONNECT; 2636 } 2637 2638 AUDIT_ARG_FD(uap->sd); 2639 error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL); 2640 if (error) 2641 goto sctp_bad1; 2642 2643 #ifdef COMPAT_FREEBSD32 2644 if (SV_CURPROC_FLAG(SV_ILP32)) 2645 error = freebsd32_copyiniov((struct iovec32 *)uap->iov, 2646 uap->iovlen, &iov, EMSGSIZE); 2647 else 2648 #endif 2649 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2650 if (error) 2651 goto sctp_bad1; 2652 #ifdef KTRACE 2653 if (to && (KTRPOINT(td, KTR_STRUCT))) 2654 ktrsockaddr(to); 2655 #endif 2656 2657 so = (struct socket *)fp->f_data; 2658 if (so->so_proto->pr_protocol != IPPROTO_SCTP) { 2659 error = EOPNOTSUPP; 2660 goto sctp_bad; 2661 } 2662 #ifdef MAC 2663 error = mac_socket_check_send(td->td_ucred, so); 2664 if (error) 2665 goto sctp_bad; 2666 #endif /* MAC */ 2667 2668 auio.uio_iov = iov; 2669 auio.uio_iovcnt = uap->iovlen; 2670 auio.uio_segflg = UIO_USERSPACE; 2671 auio.uio_rw = UIO_WRITE; 2672 auio.uio_td = td; 2673 auio.uio_offset = 0; /* XXX */ 2674 auio.uio_resid = 0; 2675 tiov = iov; 2676 for (i = 0; i <uap->iovlen; i++, tiov++) { 2677 if ((auio.uio_resid += tiov->iov_len) < 0) { 2678 error = EINVAL; 2679 goto sctp_bad; 2680 } 2681 } 2682 len = auio.uio_resid; 2683 CURVNET_SET(so->so_vnet); 2684 error = sctp_lower_sosend(so, to, &auio, 2685 (struct mbuf *)NULL, (struct mbuf *)NULL, 2686 uap->flags, u_sinfo, td); 2687 CURVNET_RESTORE(); 2688 if (error) { 2689 if (auio.uio_resid != len && (error == ERESTART || 2690 error == EINTR || error == EWOULDBLOCK)) 2691 error = 0; 2692 /* Generation of SIGPIPE can be controlled per socket */ 2693 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2694 !(uap->flags & MSG_NOSIGNAL)) { 2695 PROC_LOCK(td->td_proc); 2696 tdsignal(td, SIGPIPE); 2697 PROC_UNLOCK(td->td_proc); 2698 } 2699 } 2700 if (error == 0) 2701 td->td_retval[0] = len - auio.uio_resid; 2702 #ifdef KTRACE 2703 if (ktruio != NULL) { 2704 ktruio->uio_resid = td->td_retval[0]; 2705 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2706 } 2707 #endif /* KTRACE */ 2708 sctp_bad: 2709 free(iov, M_IOV); 2710 sctp_bad1: 2711 if (fp) 2712 fdrop(fp, td); 2713 sctp_bad2: 2714 if (to) 2715 free(to, M_SONAME); 2716 return (error); 2717 #else /* SCTP */ 2718 return (EOPNOTSUPP); 2719 #endif /* SCTP */ 2720 } 2721 2722 int 2723 sys_sctp_generic_recvmsg(td, uap) 2724 struct thread *td; 2725 struct sctp_generic_recvmsg_args /* { 2726 int sd, 2727 struct iovec *iov, 2728 int iovlen, 2729 struct sockaddr *from, 2730 __socklen_t *fromlenaddr, 2731 struct sctp_sndrcvinfo *sinfo, 2732 int *msg_flags 2733 } */ *uap; 2734 { 2735 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2736 uint8_t sockbufstore[256]; 2737 struct uio auio; 2738 struct iovec *iov, *tiov; 2739 struct sctp_sndrcvinfo sinfo; 2740 struct socket *so; 2741 struct file *fp = NULL; 2742 struct sockaddr *fromsa; 2743 int fromlen; 2744 ssize_t len; 2745 int i, msg_flags; 2746 int error = 0; 2747 #ifdef KTRACE 2748 struct uio *ktruio = NULL; 2749 #endif 2750 2751 AUDIT_ARG_FD(uap->sd); 2752 error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_RECV, &fp, NULL); 2753 if (error) { 2754 return (error); 2755 } 2756 #ifdef COMPAT_FREEBSD32 2757 if (SV_CURPROC_FLAG(SV_ILP32)) 2758 error = freebsd32_copyiniov((struct iovec32 *)uap->iov, 2759 uap->iovlen, &iov, EMSGSIZE); 2760 else 2761 #endif 2762 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2763 if (error) 2764 goto out1; 2765 2766 so = fp->f_data; 2767 if (so->so_proto->pr_protocol != IPPROTO_SCTP) { 2768 error = EOPNOTSUPP; 2769 goto out; 2770 } 2771 #ifdef MAC 2772 error = mac_socket_check_receive(td->td_ucred, so); 2773 if (error) { 2774 goto out; 2775 } 2776 #endif /* MAC */ 2777 2778 if (uap->fromlenaddr) { 2779 error = copyin(uap->fromlenaddr, 2780 &fromlen, sizeof (fromlen)); 2781 if (error) { 2782 goto out; 2783 } 2784 } else { 2785 fromlen = 0; 2786 } 2787 if (uap->msg_flags) { 2788 error = copyin(uap->msg_flags, &msg_flags, sizeof (int)); 2789 if (error) { 2790 goto out; 2791 } 2792 } else { 2793 msg_flags = 0; 2794 } 2795 auio.uio_iov = iov; 2796 auio.uio_iovcnt = uap->iovlen; 2797 auio.uio_segflg = UIO_USERSPACE; 2798 auio.uio_rw = UIO_READ; 2799 auio.uio_td = td; 2800 auio.uio_offset = 0; /* XXX */ 2801 auio.uio_resid = 0; 2802 tiov = iov; 2803 for (i = 0; i <uap->iovlen; i++, tiov++) { 2804 if ((auio.uio_resid += tiov->iov_len) < 0) { 2805 error = EINVAL; 2806 goto out; 2807 } 2808 } 2809 len = auio.uio_resid; 2810 fromsa = (struct sockaddr *)sockbufstore; 2811 2812 #ifdef KTRACE 2813 if (KTRPOINT(td, KTR_GENIO)) 2814 ktruio = cloneuio(&auio); 2815 #endif /* KTRACE */ 2816 memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo)); 2817 CURVNET_SET(so->so_vnet); 2818 error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL, 2819 fromsa, fromlen, &msg_flags, 2820 (struct sctp_sndrcvinfo *)&sinfo, 1); 2821 CURVNET_RESTORE(); 2822 if (error) { 2823 if (auio.uio_resid != len && (error == ERESTART || 2824 error == EINTR || error == EWOULDBLOCK)) 2825 error = 0; 2826 } else { 2827 if (uap->sinfo) 2828 error = copyout(&sinfo, uap->sinfo, sizeof (sinfo)); 2829 } 2830 #ifdef KTRACE 2831 if (ktruio != NULL) { 2832 ktruio->uio_resid = len - auio.uio_resid; 2833 ktrgenio(uap->sd, UIO_READ, ktruio, error); 2834 } 2835 #endif /* KTRACE */ 2836 if (error) 2837 goto out; 2838 td->td_retval[0] = len - auio.uio_resid; 2839 2840 if (fromlen && uap->from) { 2841 len = fromlen; 2842 if (len <= 0 || fromsa == 0) 2843 len = 0; 2844 else { 2845 len = MIN(len, fromsa->sa_len); 2846 error = copyout(fromsa, uap->from, (size_t)len); 2847 if (error) 2848 goto out; 2849 } 2850 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t)); 2851 if (error) { 2852 goto out; 2853 } 2854 } 2855 #ifdef KTRACE 2856 if (KTRPOINT(td, KTR_STRUCT)) 2857 ktrsockaddr(fromsa); 2858 #endif 2859 if (uap->msg_flags) { 2860 error = copyout(&msg_flags, uap->msg_flags, sizeof (int)); 2861 if (error) { 2862 goto out; 2863 } 2864 } 2865 out: 2866 free(iov, M_IOV); 2867 out1: 2868 if (fp) 2869 fdrop(fp, td); 2870 2871 return (error); 2872 #else /* SCTP */ 2873 return (EOPNOTSUPP); 2874 #endif /* SCTP */ 2875 } 2876