1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_capsicum.h" 39 #include "opt_inet.h" 40 #include "opt_inet6.h" 41 #include "opt_sctp.h" 42 #include "opt_compat.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/capability.h> 48 #include <sys/condvar.h> 49 #include <sys/kernel.h> 50 #include <sys/lock.h> 51 #include <sys/mutex.h> 52 #include <sys/sysproto.h> 53 #include <sys/malloc.h> 54 #include <sys/filedesc.h> 55 #include <sys/event.h> 56 #include <sys/proc.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/filio.h> 60 #include <sys/jail.h> 61 #include <sys/mman.h> 62 #include <sys/mount.h> 63 #include <sys/mbuf.h> 64 #include <sys/protosw.h> 65 #include <sys/rwlock.h> 66 #include <sys/sf_buf.h> 67 #include <sys/sysent.h> 68 #include <sys/socket.h> 69 #include <sys/socketvar.h> 70 #include <sys/signalvar.h> 71 #include <sys/syscallsubr.h> 72 #include <sys/sysctl.h> 73 #include <sys/uio.h> 74 #include <sys/vnode.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 #ifdef COMPAT_FREEBSD32 79 #include <compat/freebsd32/freebsd32_util.h> 80 #endif 81 82 #include <net/vnet.h> 83 84 #include <security/audit/audit.h> 85 #include <security/mac/mac_framework.h> 86 87 #include <vm/vm.h> 88 #include <vm/vm_param.h> 89 #include <vm/vm_object.h> 90 #include <vm/vm_page.h> 91 #include <vm/vm_pager.h> 92 #include <vm/vm_kern.h> 93 #include <vm/vm_extern.h> 94 95 #if defined(INET) || defined(INET6) 96 #ifdef SCTP 97 #include <netinet/sctp.h> 98 #include <netinet/sctp_peeloff.h> 99 #endif /* SCTP */ 100 #endif /* INET || INET6 */ 101 102 /* 103 * Flags for accept1() and kern_accept4(), in addition to SOCK_CLOEXEC 104 * and SOCK_NONBLOCK. 105 */ 106 #define ACCEPT4_INHERIT 0x1 107 #define ACCEPT4_COMPAT 0x2 108 109 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 110 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 111 112 static int accept1(struct thread *td, int s, struct sockaddr *uname, 113 socklen_t *anamelen, int flags); 114 static int do_sendfile(struct thread *td, struct sendfile_args *uap, 115 int compat); 116 static int getsockname1(struct thread *td, struct getsockname_args *uap, 117 int compat); 118 static int getpeername1(struct thread *td, struct getpeername_args *uap, 119 int compat); 120 121 counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; 122 123 /* 124 * sendfile(2)-related variables and associated sysctls 125 */ 126 int nsfbufs; 127 int nsfbufspeak; 128 int nsfbufsused; 129 static int sfreadahead = 1; 130 131 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 132 "Maximum number of sendfile(2) sf_bufs available"); 133 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 134 "Number of sendfile(2) sf_bufs at peak usage"); 135 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 136 "Number of sendfile(2) sf_bufs in use"); 137 SYSCTL_INT(_kern_ipc, OID_AUTO, sfreadahead, CTLFLAG_RW, &sfreadahead, 0, 138 "Number of sendfile(2) read-ahead MAXBSIZE blocks"); 139 140 141 static void 142 sfstat_init(const void *unused) 143 { 144 145 COUNTER_ARRAY_ALLOC(sfstat, sizeof(struct sfstat) / sizeof(uint64_t), 146 M_WAITOK); 147 } 148 SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL); 149 150 static int 151 sfstat_sysctl(SYSCTL_HANDLER_ARGS) 152 { 153 struct sfstat s; 154 155 COUNTER_ARRAY_COPY(sfstat, &s, sizeof(s) / sizeof(uint64_t)); 156 if (req->newptr) 157 COUNTER_ARRAY_ZERO(sfstat, sizeof(s) / sizeof(uint64_t)); 158 return (SYSCTL_OUT(req, &s, sizeof(s))); 159 } 160 SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW, 161 NULL, 0, sfstat_sysctl, "I", "sendfile statistics"); 162 163 /* 164 * Convert a user file descriptor to a kernel file entry and check if required 165 * capability rights are present. 166 * A reference on the file entry is held upon returning. 167 */ 168 static int 169 getsock_cap(struct filedesc *fdp, int fd, cap_rights_t *rightsp, 170 struct file **fpp, u_int *fflagp) 171 { 172 struct file *fp; 173 int error; 174 175 error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL); 176 if (error != 0) 177 return (error); 178 if (fp->f_type != DTYPE_SOCKET) { 179 fdrop(fp, curthread); 180 return (ENOTSOCK); 181 } 182 if (fflagp != NULL) 183 *fflagp = fp->f_flag; 184 *fpp = fp; 185 return (0); 186 } 187 188 /* 189 * System call interface to the socket abstraction. 190 */ 191 #if defined(COMPAT_43) 192 #define COMPAT_OLDSOCK 193 #endif 194 195 int 196 sys_socket(td, uap) 197 struct thread *td; 198 struct socket_args /* { 199 int domain; 200 int type; 201 int protocol; 202 } */ *uap; 203 { 204 struct socket *so; 205 struct file *fp; 206 int fd, error, type, oflag, fflag; 207 208 AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol); 209 210 type = uap->type; 211 oflag = 0; 212 fflag = 0; 213 if ((type & SOCK_CLOEXEC) != 0) { 214 type &= ~SOCK_CLOEXEC; 215 oflag |= O_CLOEXEC; 216 } 217 if ((type & SOCK_NONBLOCK) != 0) { 218 type &= ~SOCK_NONBLOCK; 219 fflag |= FNONBLOCK; 220 } 221 222 #ifdef MAC 223 error = mac_socket_check_create(td->td_ucred, uap->domain, type, 224 uap->protocol); 225 if (error != 0) 226 return (error); 227 #endif 228 error = falloc(td, &fp, &fd, oflag); 229 if (error != 0) 230 return (error); 231 /* An extra reference on `fp' has been held for us by falloc(). */ 232 error = socreate(uap->domain, &so, type, uap->protocol, 233 td->td_ucred, td); 234 if (error != 0) { 235 fdclose(td->td_proc->p_fd, fp, fd, td); 236 } else { 237 finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops); 238 if ((fflag & FNONBLOCK) != 0) 239 (void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td); 240 td->td_retval[0] = fd; 241 } 242 fdrop(fp, td); 243 return (error); 244 } 245 246 /* ARGSUSED */ 247 int 248 sys_bind(td, uap) 249 struct thread *td; 250 struct bind_args /* { 251 int s; 252 caddr_t name; 253 int namelen; 254 } */ *uap; 255 { 256 struct sockaddr *sa; 257 int error; 258 259 error = getsockaddr(&sa, uap->name, uap->namelen); 260 if (error == 0) { 261 error = kern_bind(td, uap->s, sa); 262 free(sa, M_SONAME); 263 } 264 return (error); 265 } 266 267 static int 268 kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa) 269 { 270 struct socket *so; 271 struct file *fp; 272 cap_rights_t rights; 273 int error; 274 275 AUDIT_ARG_FD(fd); 276 AUDIT_ARG_SOCKADDR(td, dirfd, sa); 277 error = getsock_cap(td->td_proc->p_fd, fd, 278 cap_rights_init(&rights, CAP_BIND), &fp, NULL); 279 if (error != 0) 280 return (error); 281 so = fp->f_data; 282 #ifdef KTRACE 283 if (KTRPOINT(td, KTR_STRUCT)) 284 ktrsockaddr(sa); 285 #endif 286 #ifdef MAC 287 error = mac_socket_check_bind(td->td_ucred, so, sa); 288 if (error == 0) { 289 #endif 290 if (dirfd == AT_FDCWD) 291 error = sobind(so, sa, td); 292 else 293 error = sobindat(dirfd, so, sa, td); 294 #ifdef MAC 295 } 296 #endif 297 fdrop(fp, td); 298 return (error); 299 } 300 301 int 302 kern_bind(struct thread *td, int fd, struct sockaddr *sa) 303 { 304 305 return (kern_bindat(td, AT_FDCWD, fd, sa)); 306 } 307 308 /* ARGSUSED */ 309 int 310 sys_bindat(td, uap) 311 struct thread *td; 312 struct bindat_args /* { 313 int fd; 314 int s; 315 caddr_t name; 316 int namelen; 317 } */ *uap; 318 { 319 struct sockaddr *sa; 320 int error; 321 322 error = getsockaddr(&sa, uap->name, uap->namelen); 323 if (error == 0) { 324 error = kern_bindat(td, uap->fd, uap->s, sa); 325 free(sa, M_SONAME); 326 } 327 return (error); 328 } 329 330 /* ARGSUSED */ 331 int 332 sys_listen(td, uap) 333 struct thread *td; 334 struct listen_args /* { 335 int s; 336 int backlog; 337 } */ *uap; 338 { 339 struct socket *so; 340 struct file *fp; 341 cap_rights_t rights; 342 int error; 343 344 AUDIT_ARG_FD(uap->s); 345 error = getsock_cap(td->td_proc->p_fd, uap->s, 346 cap_rights_init(&rights, CAP_LISTEN), &fp, NULL); 347 if (error == 0) { 348 so = fp->f_data; 349 #ifdef MAC 350 error = mac_socket_check_listen(td->td_ucred, so); 351 if (error == 0) 352 #endif 353 error = solisten(so, uap->backlog, td); 354 fdrop(fp, td); 355 } 356 return(error); 357 } 358 359 /* 360 * accept1() 361 */ 362 static int 363 accept1(td, s, uname, anamelen, flags) 364 struct thread *td; 365 int s; 366 struct sockaddr *uname; 367 socklen_t *anamelen; 368 int flags; 369 { 370 struct sockaddr *name; 371 socklen_t namelen; 372 struct file *fp; 373 int error; 374 375 if (uname == NULL) 376 return (kern_accept4(td, s, NULL, NULL, flags, NULL)); 377 378 error = copyin(anamelen, &namelen, sizeof (namelen)); 379 if (error != 0) 380 return (error); 381 382 error = kern_accept4(td, s, &name, &namelen, flags, &fp); 383 384 /* 385 * return a namelen of zero for older code which might 386 * ignore the return value from accept. 387 */ 388 if (error != 0) { 389 (void) copyout(&namelen, anamelen, sizeof(*anamelen)); 390 return (error); 391 } 392 393 if (error == 0 && uname != NULL) { 394 #ifdef COMPAT_OLDSOCK 395 if (flags & ACCEPT4_COMPAT) 396 ((struct osockaddr *)name)->sa_family = 397 name->sa_family; 398 #endif 399 error = copyout(name, uname, namelen); 400 } 401 if (error == 0) 402 error = copyout(&namelen, anamelen, 403 sizeof(namelen)); 404 if (error != 0) 405 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td); 406 fdrop(fp, td); 407 free(name, M_SONAME); 408 return (error); 409 } 410 411 int 412 kern_accept(struct thread *td, int s, struct sockaddr **name, 413 socklen_t *namelen, struct file **fp) 414 { 415 return (kern_accept4(td, s, name, namelen, ACCEPT4_INHERIT, fp)); 416 } 417 418 int 419 kern_accept4(struct thread *td, int s, struct sockaddr **name, 420 socklen_t *namelen, int flags, struct file **fp) 421 { 422 struct filedesc *fdp; 423 struct file *headfp, *nfp = NULL; 424 struct sockaddr *sa = NULL; 425 struct socket *head, *so; 426 cap_rights_t rights; 427 u_int fflag; 428 pid_t pgid; 429 int error, fd, tmp; 430 431 if (name != NULL) 432 *name = NULL; 433 434 AUDIT_ARG_FD(s); 435 fdp = td->td_proc->p_fd; 436 error = getsock_cap(fdp, s, cap_rights_init(&rights, CAP_ACCEPT), 437 &headfp, &fflag); 438 if (error != 0) 439 return (error); 440 head = headfp->f_data; 441 if ((head->so_options & SO_ACCEPTCONN) == 0) { 442 error = EINVAL; 443 goto done; 444 } 445 #ifdef MAC 446 error = mac_socket_check_accept(td->td_ucred, head); 447 if (error != 0) 448 goto done; 449 #endif 450 error = falloc(td, &nfp, &fd, (flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0); 451 if (error != 0) 452 goto done; 453 ACCEPT_LOCK(); 454 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 455 ACCEPT_UNLOCK(); 456 error = EWOULDBLOCK; 457 goto noconnection; 458 } 459 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 460 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 461 head->so_error = ECONNABORTED; 462 break; 463 } 464 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 465 "accept", 0); 466 if (error != 0) { 467 ACCEPT_UNLOCK(); 468 goto noconnection; 469 } 470 } 471 if (head->so_error) { 472 error = head->so_error; 473 head->so_error = 0; 474 ACCEPT_UNLOCK(); 475 goto noconnection; 476 } 477 so = TAILQ_FIRST(&head->so_comp); 478 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 479 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 480 481 /* 482 * Before changing the flags on the socket, we have to bump the 483 * reference count. Otherwise, if the protocol calls sofree(), 484 * the socket will be released due to a zero refcount. 485 */ 486 SOCK_LOCK(so); /* soref() and so_state update */ 487 soref(so); /* file descriptor reference */ 488 489 TAILQ_REMOVE(&head->so_comp, so, so_list); 490 head->so_qlen--; 491 if (flags & ACCEPT4_INHERIT) 492 so->so_state |= (head->so_state & SS_NBIO); 493 else 494 so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0; 495 so->so_qstate &= ~SQ_COMP; 496 so->so_head = NULL; 497 498 SOCK_UNLOCK(so); 499 ACCEPT_UNLOCK(); 500 501 /* An extra reference on `nfp' has been held for us by falloc(). */ 502 td->td_retval[0] = fd; 503 504 /* connection has been removed from the listen queue */ 505 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 506 507 if (flags & ACCEPT4_INHERIT) { 508 pgid = fgetown(&head->so_sigio); 509 if (pgid != 0) 510 fsetown(pgid, &so->so_sigio); 511 } else { 512 fflag &= ~(FNONBLOCK | FASYNC); 513 if (flags & SOCK_NONBLOCK) 514 fflag |= FNONBLOCK; 515 } 516 517 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 518 /* Sync socket nonblocking/async state with file flags */ 519 tmp = fflag & FNONBLOCK; 520 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 521 tmp = fflag & FASYNC; 522 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 523 sa = 0; 524 error = soaccept(so, &sa); 525 if (error != 0) { 526 /* 527 * return a namelen of zero for older code which might 528 * ignore the return value from accept. 529 */ 530 if (name) 531 *namelen = 0; 532 goto noconnection; 533 } 534 if (sa == NULL) { 535 if (name) 536 *namelen = 0; 537 goto done; 538 } 539 AUDIT_ARG_SOCKADDR(td, AT_FDCWD, sa); 540 if (name) { 541 /* check sa_len before it is destroyed */ 542 if (*namelen > sa->sa_len) 543 *namelen = sa->sa_len; 544 #ifdef KTRACE 545 if (KTRPOINT(td, KTR_STRUCT)) 546 ktrsockaddr(sa); 547 #endif 548 *name = sa; 549 sa = NULL; 550 } 551 noconnection: 552 free(sa, M_SONAME); 553 554 /* 555 * close the new descriptor, assuming someone hasn't ripped it 556 * out from under us. 557 */ 558 if (error != 0) 559 fdclose(fdp, nfp, fd, td); 560 561 /* 562 * Release explicitly held references before returning. We return 563 * a reference on nfp to the caller on success if they request it. 564 */ 565 done: 566 if (fp != NULL) { 567 if (error == 0) { 568 *fp = nfp; 569 nfp = NULL; 570 } else 571 *fp = NULL; 572 } 573 if (nfp != NULL) 574 fdrop(nfp, td); 575 fdrop(headfp, td); 576 return (error); 577 } 578 579 int 580 sys_accept(td, uap) 581 struct thread *td; 582 struct accept_args *uap; 583 { 584 585 return (accept1(td, uap->s, uap->name, uap->anamelen, ACCEPT4_INHERIT)); 586 } 587 588 int 589 sys_accept4(td, uap) 590 struct thread *td; 591 struct accept4_args *uap; 592 { 593 594 if (uap->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 595 return (EINVAL); 596 597 return (accept1(td, uap->s, uap->name, uap->anamelen, uap->flags)); 598 } 599 600 #ifdef COMPAT_OLDSOCK 601 int 602 oaccept(td, uap) 603 struct thread *td; 604 struct accept_args *uap; 605 { 606 607 return (accept1(td, uap->s, uap->name, uap->anamelen, 608 ACCEPT4_INHERIT | ACCEPT4_COMPAT)); 609 } 610 #endif /* COMPAT_OLDSOCK */ 611 612 /* ARGSUSED */ 613 int 614 sys_connect(td, uap) 615 struct thread *td; 616 struct connect_args /* { 617 int s; 618 caddr_t name; 619 int namelen; 620 } */ *uap; 621 { 622 struct sockaddr *sa; 623 int error; 624 625 error = getsockaddr(&sa, uap->name, uap->namelen); 626 if (error == 0) { 627 error = kern_connect(td, uap->s, sa); 628 free(sa, M_SONAME); 629 } 630 return (error); 631 } 632 633 static int 634 kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa) 635 { 636 struct socket *so; 637 struct file *fp; 638 cap_rights_t rights; 639 int error, interrupted = 0; 640 641 AUDIT_ARG_FD(fd); 642 AUDIT_ARG_SOCKADDR(td, dirfd, sa); 643 error = getsock_cap(td->td_proc->p_fd, fd, 644 cap_rights_init(&rights, CAP_CONNECT), &fp, NULL); 645 if (error != 0) 646 return (error); 647 so = fp->f_data; 648 if (so->so_state & SS_ISCONNECTING) { 649 error = EALREADY; 650 goto done1; 651 } 652 #ifdef KTRACE 653 if (KTRPOINT(td, KTR_STRUCT)) 654 ktrsockaddr(sa); 655 #endif 656 #ifdef MAC 657 error = mac_socket_check_connect(td->td_ucred, so, sa); 658 if (error != 0) 659 goto bad; 660 #endif 661 if (dirfd == AT_FDCWD) 662 error = soconnect(so, sa, td); 663 else 664 error = soconnectat(dirfd, so, sa, td); 665 if (error != 0) 666 goto bad; 667 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 668 error = EINPROGRESS; 669 goto done1; 670 } 671 SOCK_LOCK(so); 672 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 673 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 674 "connec", 0); 675 if (error != 0) { 676 if (error == EINTR || error == ERESTART) 677 interrupted = 1; 678 break; 679 } 680 } 681 if (error == 0) { 682 error = so->so_error; 683 so->so_error = 0; 684 } 685 SOCK_UNLOCK(so); 686 bad: 687 if (!interrupted) 688 so->so_state &= ~SS_ISCONNECTING; 689 if (error == ERESTART) 690 error = EINTR; 691 done1: 692 fdrop(fp, td); 693 return (error); 694 } 695 696 int 697 kern_connect(struct thread *td, int fd, struct sockaddr *sa) 698 { 699 700 return (kern_connectat(td, AT_FDCWD, fd, sa)); 701 } 702 703 /* ARGSUSED */ 704 int 705 sys_connectat(td, uap) 706 struct thread *td; 707 struct connectat_args /* { 708 int fd; 709 int s; 710 caddr_t name; 711 int namelen; 712 } */ *uap; 713 { 714 struct sockaddr *sa; 715 int error; 716 717 error = getsockaddr(&sa, uap->name, uap->namelen); 718 if (error == 0) { 719 error = kern_connectat(td, uap->fd, uap->s, sa); 720 free(sa, M_SONAME); 721 } 722 return (error); 723 } 724 725 int 726 kern_socketpair(struct thread *td, int domain, int type, int protocol, 727 int *rsv) 728 { 729 struct filedesc *fdp = td->td_proc->p_fd; 730 struct file *fp1, *fp2; 731 struct socket *so1, *so2; 732 int fd, error, oflag, fflag; 733 734 AUDIT_ARG_SOCKET(domain, type, protocol); 735 736 oflag = 0; 737 fflag = 0; 738 if ((type & SOCK_CLOEXEC) != 0) { 739 type &= ~SOCK_CLOEXEC; 740 oflag |= O_CLOEXEC; 741 } 742 if ((type & SOCK_NONBLOCK) != 0) { 743 type &= ~SOCK_NONBLOCK; 744 fflag |= FNONBLOCK; 745 } 746 #ifdef MAC 747 /* We might want to have a separate check for socket pairs. */ 748 error = mac_socket_check_create(td->td_ucred, domain, type, 749 protocol); 750 if (error != 0) 751 return (error); 752 #endif 753 error = socreate(domain, &so1, type, protocol, td->td_ucred, td); 754 if (error != 0) 755 return (error); 756 error = socreate(domain, &so2, type, protocol, td->td_ucred, td); 757 if (error != 0) 758 goto free1; 759 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 760 error = falloc(td, &fp1, &fd, oflag); 761 if (error != 0) 762 goto free2; 763 rsv[0] = fd; 764 fp1->f_data = so1; /* so1 already has ref count */ 765 error = falloc(td, &fp2, &fd, oflag); 766 if (error != 0) 767 goto free3; 768 fp2->f_data = so2; /* so2 already has ref count */ 769 rsv[1] = fd; 770 error = soconnect2(so1, so2); 771 if (error != 0) 772 goto free4; 773 if (type == SOCK_DGRAM) { 774 /* 775 * Datagram socket connection is asymmetric. 776 */ 777 error = soconnect2(so2, so1); 778 if (error != 0) 779 goto free4; 780 } 781 finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data, 782 &socketops); 783 finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data, 784 &socketops); 785 if ((fflag & FNONBLOCK) != 0) { 786 (void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td); 787 (void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td); 788 } 789 fdrop(fp1, td); 790 fdrop(fp2, td); 791 return (0); 792 free4: 793 fdclose(fdp, fp2, rsv[1], td); 794 fdrop(fp2, td); 795 free3: 796 fdclose(fdp, fp1, rsv[0], td); 797 fdrop(fp1, td); 798 free2: 799 if (so2 != NULL) 800 (void)soclose(so2); 801 free1: 802 if (so1 != NULL) 803 (void)soclose(so1); 804 return (error); 805 } 806 807 int 808 sys_socketpair(struct thread *td, struct socketpair_args *uap) 809 { 810 int error, sv[2]; 811 812 error = kern_socketpair(td, uap->domain, uap->type, 813 uap->protocol, sv); 814 if (error != 0) 815 return (error); 816 error = copyout(sv, uap->rsv, 2 * sizeof(int)); 817 if (error != 0) { 818 (void)kern_close(td, sv[0]); 819 (void)kern_close(td, sv[1]); 820 } 821 return (error); 822 } 823 824 static int 825 sendit(td, s, mp, flags) 826 struct thread *td; 827 int s; 828 struct msghdr *mp; 829 int flags; 830 { 831 struct mbuf *control; 832 struct sockaddr *to; 833 int error; 834 835 #ifdef CAPABILITY_MODE 836 if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL)) 837 return (ECAPMODE); 838 #endif 839 840 if (mp->msg_name != NULL) { 841 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 842 if (error != 0) { 843 to = NULL; 844 goto bad; 845 } 846 mp->msg_name = to; 847 } else { 848 to = NULL; 849 } 850 851 if (mp->msg_control) { 852 if (mp->msg_controllen < sizeof(struct cmsghdr) 853 #ifdef COMPAT_OLDSOCK 854 && mp->msg_flags != MSG_COMPAT 855 #endif 856 ) { 857 error = EINVAL; 858 goto bad; 859 } 860 error = sockargs(&control, mp->msg_control, 861 mp->msg_controllen, MT_CONTROL); 862 if (error != 0) 863 goto bad; 864 #ifdef COMPAT_OLDSOCK 865 if (mp->msg_flags == MSG_COMPAT) { 866 struct cmsghdr *cm; 867 868 M_PREPEND(control, sizeof(*cm), M_WAITOK); 869 cm = mtod(control, struct cmsghdr *); 870 cm->cmsg_len = control->m_len; 871 cm->cmsg_level = SOL_SOCKET; 872 cm->cmsg_type = SCM_RIGHTS; 873 } 874 #endif 875 } else { 876 control = NULL; 877 } 878 879 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 880 881 bad: 882 free(to, M_SONAME); 883 return (error); 884 } 885 886 int 887 kern_sendit(td, s, mp, flags, control, segflg) 888 struct thread *td; 889 int s; 890 struct msghdr *mp; 891 int flags; 892 struct mbuf *control; 893 enum uio_seg segflg; 894 { 895 struct file *fp; 896 struct uio auio; 897 struct iovec *iov; 898 struct socket *so; 899 cap_rights_t rights; 900 #ifdef KTRACE 901 struct uio *ktruio = NULL; 902 #endif 903 ssize_t len; 904 int i, error; 905 906 AUDIT_ARG_FD(s); 907 cap_rights_init(&rights, CAP_SEND); 908 if (mp->msg_name != NULL) { 909 AUDIT_ARG_SOCKADDR(td, AT_FDCWD, mp->msg_name); 910 cap_rights_set(&rights, CAP_CONNECT); 911 } 912 error = getsock_cap(td->td_proc->p_fd, s, &rights, &fp, NULL); 913 if (error != 0) 914 return (error); 915 so = (struct socket *)fp->f_data; 916 917 #ifdef KTRACE 918 if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT)) 919 ktrsockaddr(mp->msg_name); 920 #endif 921 #ifdef MAC 922 if (mp->msg_name != NULL) { 923 error = mac_socket_check_connect(td->td_ucred, so, 924 mp->msg_name); 925 if (error != 0) 926 goto bad; 927 } 928 error = mac_socket_check_send(td->td_ucred, so); 929 if (error != 0) 930 goto bad; 931 #endif 932 933 auio.uio_iov = mp->msg_iov; 934 auio.uio_iovcnt = mp->msg_iovlen; 935 auio.uio_segflg = segflg; 936 auio.uio_rw = UIO_WRITE; 937 auio.uio_td = td; 938 auio.uio_offset = 0; /* XXX */ 939 auio.uio_resid = 0; 940 iov = mp->msg_iov; 941 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 942 if ((auio.uio_resid += iov->iov_len) < 0) { 943 error = EINVAL; 944 goto bad; 945 } 946 } 947 #ifdef KTRACE 948 if (KTRPOINT(td, KTR_GENIO)) 949 ktruio = cloneuio(&auio); 950 #endif 951 len = auio.uio_resid; 952 error = sosend(so, mp->msg_name, &auio, 0, control, flags, td); 953 if (error != 0) { 954 if (auio.uio_resid != len && (error == ERESTART || 955 error == EINTR || error == EWOULDBLOCK)) 956 error = 0; 957 /* Generation of SIGPIPE can be controlled per socket */ 958 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 959 !(flags & MSG_NOSIGNAL)) { 960 PROC_LOCK(td->td_proc); 961 tdsignal(td, SIGPIPE); 962 PROC_UNLOCK(td->td_proc); 963 } 964 } 965 if (error == 0) 966 td->td_retval[0] = len - auio.uio_resid; 967 #ifdef KTRACE 968 if (ktruio != NULL) { 969 ktruio->uio_resid = td->td_retval[0]; 970 ktrgenio(s, UIO_WRITE, ktruio, error); 971 } 972 #endif 973 bad: 974 fdrop(fp, td); 975 return (error); 976 } 977 978 int 979 sys_sendto(td, uap) 980 struct thread *td; 981 struct sendto_args /* { 982 int s; 983 caddr_t buf; 984 size_t len; 985 int flags; 986 caddr_t to; 987 int tolen; 988 } */ *uap; 989 { 990 struct msghdr msg; 991 struct iovec aiov; 992 993 msg.msg_name = uap->to; 994 msg.msg_namelen = uap->tolen; 995 msg.msg_iov = &aiov; 996 msg.msg_iovlen = 1; 997 msg.msg_control = 0; 998 #ifdef COMPAT_OLDSOCK 999 msg.msg_flags = 0; 1000 #endif 1001 aiov.iov_base = uap->buf; 1002 aiov.iov_len = uap->len; 1003 return (sendit(td, uap->s, &msg, uap->flags)); 1004 } 1005 1006 #ifdef COMPAT_OLDSOCK 1007 int 1008 osend(td, uap) 1009 struct thread *td; 1010 struct osend_args /* { 1011 int s; 1012 caddr_t buf; 1013 int len; 1014 int flags; 1015 } */ *uap; 1016 { 1017 struct msghdr msg; 1018 struct iovec aiov; 1019 1020 msg.msg_name = 0; 1021 msg.msg_namelen = 0; 1022 msg.msg_iov = &aiov; 1023 msg.msg_iovlen = 1; 1024 aiov.iov_base = uap->buf; 1025 aiov.iov_len = uap->len; 1026 msg.msg_control = 0; 1027 msg.msg_flags = 0; 1028 return (sendit(td, uap->s, &msg, uap->flags)); 1029 } 1030 1031 int 1032 osendmsg(td, uap) 1033 struct thread *td; 1034 struct osendmsg_args /* { 1035 int s; 1036 caddr_t msg; 1037 int flags; 1038 } */ *uap; 1039 { 1040 struct msghdr msg; 1041 struct iovec *iov; 1042 int error; 1043 1044 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1045 if (error != 0) 1046 return (error); 1047 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1048 if (error != 0) 1049 return (error); 1050 msg.msg_iov = iov; 1051 msg.msg_flags = MSG_COMPAT; 1052 error = sendit(td, uap->s, &msg, uap->flags); 1053 free(iov, M_IOV); 1054 return (error); 1055 } 1056 #endif 1057 1058 int 1059 sys_sendmsg(td, uap) 1060 struct thread *td; 1061 struct sendmsg_args /* { 1062 int s; 1063 caddr_t msg; 1064 int flags; 1065 } */ *uap; 1066 { 1067 struct msghdr msg; 1068 struct iovec *iov; 1069 int error; 1070 1071 error = copyin(uap->msg, &msg, sizeof (msg)); 1072 if (error != 0) 1073 return (error); 1074 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1075 if (error != 0) 1076 return (error); 1077 msg.msg_iov = iov; 1078 #ifdef COMPAT_OLDSOCK 1079 msg.msg_flags = 0; 1080 #endif 1081 error = sendit(td, uap->s, &msg, uap->flags); 1082 free(iov, M_IOV); 1083 return (error); 1084 } 1085 1086 int 1087 kern_recvit(td, s, mp, fromseg, controlp) 1088 struct thread *td; 1089 int s; 1090 struct msghdr *mp; 1091 enum uio_seg fromseg; 1092 struct mbuf **controlp; 1093 { 1094 struct uio auio; 1095 struct iovec *iov; 1096 struct mbuf *m, *control = NULL; 1097 caddr_t ctlbuf; 1098 struct file *fp; 1099 struct socket *so; 1100 struct sockaddr *fromsa = NULL; 1101 cap_rights_t rights; 1102 #ifdef KTRACE 1103 struct uio *ktruio = NULL; 1104 #endif 1105 ssize_t len; 1106 int error, i; 1107 1108 if (controlp != NULL) 1109 *controlp = NULL; 1110 1111 AUDIT_ARG_FD(s); 1112 error = getsock_cap(td->td_proc->p_fd, s, 1113 cap_rights_init(&rights, CAP_RECV), &fp, NULL); 1114 if (error != 0) 1115 return (error); 1116 so = fp->f_data; 1117 1118 #ifdef MAC 1119 error = mac_socket_check_receive(td->td_ucred, so); 1120 if (error != 0) { 1121 fdrop(fp, td); 1122 return (error); 1123 } 1124 #endif 1125 1126 auio.uio_iov = mp->msg_iov; 1127 auio.uio_iovcnt = mp->msg_iovlen; 1128 auio.uio_segflg = UIO_USERSPACE; 1129 auio.uio_rw = UIO_READ; 1130 auio.uio_td = td; 1131 auio.uio_offset = 0; /* XXX */ 1132 auio.uio_resid = 0; 1133 iov = mp->msg_iov; 1134 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 1135 if ((auio.uio_resid += iov->iov_len) < 0) { 1136 fdrop(fp, td); 1137 return (EINVAL); 1138 } 1139 } 1140 #ifdef KTRACE 1141 if (KTRPOINT(td, KTR_GENIO)) 1142 ktruio = cloneuio(&auio); 1143 #endif 1144 len = auio.uio_resid; 1145 error = soreceive(so, &fromsa, &auio, NULL, 1146 (mp->msg_control || controlp) ? &control : NULL, 1147 &mp->msg_flags); 1148 if (error != 0) { 1149 if (auio.uio_resid != len && (error == ERESTART || 1150 error == EINTR || error == EWOULDBLOCK)) 1151 error = 0; 1152 } 1153 if (fromsa != NULL) 1154 AUDIT_ARG_SOCKADDR(td, AT_FDCWD, fromsa); 1155 #ifdef KTRACE 1156 if (ktruio != NULL) { 1157 ktruio->uio_resid = len - auio.uio_resid; 1158 ktrgenio(s, UIO_READ, ktruio, error); 1159 } 1160 #endif 1161 if (error != 0) 1162 goto out; 1163 td->td_retval[0] = len - auio.uio_resid; 1164 if (mp->msg_name) { 1165 len = mp->msg_namelen; 1166 if (len <= 0 || fromsa == NULL) 1167 len = 0; 1168 else { 1169 /* save sa_len before it is destroyed by MSG_COMPAT */ 1170 len = MIN(len, fromsa->sa_len); 1171 #ifdef COMPAT_OLDSOCK 1172 if (mp->msg_flags & MSG_COMPAT) 1173 ((struct osockaddr *)fromsa)->sa_family = 1174 fromsa->sa_family; 1175 #endif 1176 if (fromseg == UIO_USERSPACE) { 1177 error = copyout(fromsa, mp->msg_name, 1178 (unsigned)len); 1179 if (error != 0) 1180 goto out; 1181 } else 1182 bcopy(fromsa, mp->msg_name, len); 1183 } 1184 mp->msg_namelen = len; 1185 } 1186 if (mp->msg_control && controlp == NULL) { 1187 #ifdef COMPAT_OLDSOCK 1188 /* 1189 * We assume that old recvmsg calls won't receive access 1190 * rights and other control info, esp. as control info 1191 * is always optional and those options didn't exist in 4.3. 1192 * If we receive rights, trim the cmsghdr; anything else 1193 * is tossed. 1194 */ 1195 if (control && mp->msg_flags & MSG_COMPAT) { 1196 if (mtod(control, struct cmsghdr *)->cmsg_level != 1197 SOL_SOCKET || 1198 mtod(control, struct cmsghdr *)->cmsg_type != 1199 SCM_RIGHTS) { 1200 mp->msg_controllen = 0; 1201 goto out; 1202 } 1203 control->m_len -= sizeof (struct cmsghdr); 1204 control->m_data += sizeof (struct cmsghdr); 1205 } 1206 #endif 1207 len = mp->msg_controllen; 1208 m = control; 1209 mp->msg_controllen = 0; 1210 ctlbuf = mp->msg_control; 1211 1212 while (m && len > 0) { 1213 unsigned int tocopy; 1214 1215 if (len >= m->m_len) 1216 tocopy = m->m_len; 1217 else { 1218 mp->msg_flags |= MSG_CTRUNC; 1219 tocopy = len; 1220 } 1221 1222 if ((error = copyout(mtod(m, caddr_t), 1223 ctlbuf, tocopy)) != 0) 1224 goto out; 1225 1226 ctlbuf += tocopy; 1227 len -= tocopy; 1228 m = m->m_next; 1229 } 1230 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1231 } 1232 out: 1233 fdrop(fp, td); 1234 #ifdef KTRACE 1235 if (fromsa && KTRPOINT(td, KTR_STRUCT)) 1236 ktrsockaddr(fromsa); 1237 #endif 1238 free(fromsa, M_SONAME); 1239 1240 if (error == 0 && controlp != NULL) 1241 *controlp = control; 1242 else if (control) 1243 m_freem(control); 1244 1245 return (error); 1246 } 1247 1248 static int 1249 recvit(td, s, mp, namelenp) 1250 struct thread *td; 1251 int s; 1252 struct msghdr *mp; 1253 void *namelenp; 1254 { 1255 int error; 1256 1257 error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL); 1258 if (error != 0) 1259 return (error); 1260 if (namelenp != NULL) { 1261 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t)); 1262 #ifdef COMPAT_OLDSOCK 1263 if (mp->msg_flags & MSG_COMPAT) 1264 error = 0; /* old recvfrom didn't check */ 1265 #endif 1266 } 1267 return (error); 1268 } 1269 1270 int 1271 sys_recvfrom(td, uap) 1272 struct thread *td; 1273 struct recvfrom_args /* { 1274 int s; 1275 caddr_t buf; 1276 size_t len; 1277 int flags; 1278 struct sockaddr * __restrict from; 1279 socklen_t * __restrict fromlenaddr; 1280 } */ *uap; 1281 { 1282 struct msghdr msg; 1283 struct iovec aiov; 1284 int error; 1285 1286 if (uap->fromlenaddr) { 1287 error = copyin(uap->fromlenaddr, 1288 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1289 if (error != 0) 1290 goto done2; 1291 } else { 1292 msg.msg_namelen = 0; 1293 } 1294 msg.msg_name = uap->from; 1295 msg.msg_iov = &aiov; 1296 msg.msg_iovlen = 1; 1297 aiov.iov_base = uap->buf; 1298 aiov.iov_len = uap->len; 1299 msg.msg_control = 0; 1300 msg.msg_flags = uap->flags; 1301 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1302 done2: 1303 return (error); 1304 } 1305 1306 #ifdef COMPAT_OLDSOCK 1307 int 1308 orecvfrom(td, uap) 1309 struct thread *td; 1310 struct recvfrom_args *uap; 1311 { 1312 1313 uap->flags |= MSG_COMPAT; 1314 return (sys_recvfrom(td, uap)); 1315 } 1316 #endif 1317 1318 #ifdef COMPAT_OLDSOCK 1319 int 1320 orecv(td, uap) 1321 struct thread *td; 1322 struct orecv_args /* { 1323 int s; 1324 caddr_t buf; 1325 int len; 1326 int flags; 1327 } */ *uap; 1328 { 1329 struct msghdr msg; 1330 struct iovec aiov; 1331 1332 msg.msg_name = 0; 1333 msg.msg_namelen = 0; 1334 msg.msg_iov = &aiov; 1335 msg.msg_iovlen = 1; 1336 aiov.iov_base = uap->buf; 1337 aiov.iov_len = uap->len; 1338 msg.msg_control = 0; 1339 msg.msg_flags = uap->flags; 1340 return (recvit(td, uap->s, &msg, NULL)); 1341 } 1342 1343 /* 1344 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1345 * overlays the new one, missing only the flags, and with the (old) access 1346 * rights where the control fields are now. 1347 */ 1348 int 1349 orecvmsg(td, uap) 1350 struct thread *td; 1351 struct orecvmsg_args /* { 1352 int s; 1353 struct omsghdr *msg; 1354 int flags; 1355 } */ *uap; 1356 { 1357 struct msghdr msg; 1358 struct iovec *iov; 1359 int error; 1360 1361 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1362 if (error != 0) 1363 return (error); 1364 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1365 if (error != 0) 1366 return (error); 1367 msg.msg_flags = uap->flags | MSG_COMPAT; 1368 msg.msg_iov = iov; 1369 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1370 if (msg.msg_controllen && error == 0) 1371 error = copyout(&msg.msg_controllen, 1372 &uap->msg->msg_accrightslen, sizeof (int)); 1373 free(iov, M_IOV); 1374 return (error); 1375 } 1376 #endif 1377 1378 int 1379 sys_recvmsg(td, uap) 1380 struct thread *td; 1381 struct recvmsg_args /* { 1382 int s; 1383 struct msghdr *msg; 1384 int flags; 1385 } */ *uap; 1386 { 1387 struct msghdr msg; 1388 struct iovec *uiov, *iov; 1389 int error; 1390 1391 error = copyin(uap->msg, &msg, sizeof (msg)); 1392 if (error != 0) 1393 return (error); 1394 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1395 if (error != 0) 1396 return (error); 1397 msg.msg_flags = uap->flags; 1398 #ifdef COMPAT_OLDSOCK 1399 msg.msg_flags &= ~MSG_COMPAT; 1400 #endif 1401 uiov = msg.msg_iov; 1402 msg.msg_iov = iov; 1403 error = recvit(td, uap->s, &msg, NULL); 1404 if (error == 0) { 1405 msg.msg_iov = uiov; 1406 error = copyout(&msg, uap->msg, sizeof(msg)); 1407 } 1408 free(iov, M_IOV); 1409 return (error); 1410 } 1411 1412 /* ARGSUSED */ 1413 int 1414 sys_shutdown(td, uap) 1415 struct thread *td; 1416 struct shutdown_args /* { 1417 int s; 1418 int how; 1419 } */ *uap; 1420 { 1421 struct socket *so; 1422 struct file *fp; 1423 cap_rights_t rights; 1424 int error; 1425 1426 AUDIT_ARG_FD(uap->s); 1427 error = getsock_cap(td->td_proc->p_fd, uap->s, 1428 cap_rights_init(&rights, CAP_SHUTDOWN), &fp, NULL); 1429 if (error == 0) { 1430 so = fp->f_data; 1431 error = soshutdown(so, uap->how); 1432 fdrop(fp, td); 1433 } 1434 return (error); 1435 } 1436 1437 /* ARGSUSED */ 1438 int 1439 sys_setsockopt(td, uap) 1440 struct thread *td; 1441 struct setsockopt_args /* { 1442 int s; 1443 int level; 1444 int name; 1445 caddr_t val; 1446 int valsize; 1447 } */ *uap; 1448 { 1449 1450 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1451 uap->val, UIO_USERSPACE, uap->valsize)); 1452 } 1453 1454 int 1455 kern_setsockopt(td, s, level, name, val, valseg, valsize) 1456 struct thread *td; 1457 int s; 1458 int level; 1459 int name; 1460 void *val; 1461 enum uio_seg valseg; 1462 socklen_t valsize; 1463 { 1464 struct socket *so; 1465 struct file *fp; 1466 struct sockopt sopt; 1467 cap_rights_t rights; 1468 int error; 1469 1470 if (val == NULL && valsize != 0) 1471 return (EFAULT); 1472 if ((int)valsize < 0) 1473 return (EINVAL); 1474 1475 sopt.sopt_dir = SOPT_SET; 1476 sopt.sopt_level = level; 1477 sopt.sopt_name = name; 1478 sopt.sopt_val = val; 1479 sopt.sopt_valsize = valsize; 1480 switch (valseg) { 1481 case UIO_USERSPACE: 1482 sopt.sopt_td = td; 1483 break; 1484 case UIO_SYSSPACE: 1485 sopt.sopt_td = NULL; 1486 break; 1487 default: 1488 panic("kern_setsockopt called with bad valseg"); 1489 } 1490 1491 AUDIT_ARG_FD(s); 1492 error = getsock_cap(td->td_proc->p_fd, s, 1493 cap_rights_init(&rights, CAP_SETSOCKOPT), &fp, NULL); 1494 if (error == 0) { 1495 so = fp->f_data; 1496 error = sosetopt(so, &sopt); 1497 fdrop(fp, td); 1498 } 1499 return(error); 1500 } 1501 1502 /* ARGSUSED */ 1503 int 1504 sys_getsockopt(td, uap) 1505 struct thread *td; 1506 struct getsockopt_args /* { 1507 int s; 1508 int level; 1509 int name; 1510 void * __restrict val; 1511 socklen_t * __restrict avalsize; 1512 } */ *uap; 1513 { 1514 socklen_t valsize; 1515 int error; 1516 1517 if (uap->val) { 1518 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1519 if (error != 0) 1520 return (error); 1521 } 1522 1523 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1524 uap->val, UIO_USERSPACE, &valsize); 1525 1526 if (error == 0) 1527 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1528 return (error); 1529 } 1530 1531 /* 1532 * Kernel version of getsockopt. 1533 * optval can be a userland or userspace. optlen is always a kernel pointer. 1534 */ 1535 int 1536 kern_getsockopt(td, s, level, name, val, valseg, valsize) 1537 struct thread *td; 1538 int s; 1539 int level; 1540 int name; 1541 void *val; 1542 enum uio_seg valseg; 1543 socklen_t *valsize; 1544 { 1545 struct socket *so; 1546 struct file *fp; 1547 struct sockopt sopt; 1548 cap_rights_t rights; 1549 int error; 1550 1551 if (val == NULL) 1552 *valsize = 0; 1553 if ((int)*valsize < 0) 1554 return (EINVAL); 1555 1556 sopt.sopt_dir = SOPT_GET; 1557 sopt.sopt_level = level; 1558 sopt.sopt_name = name; 1559 sopt.sopt_val = val; 1560 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1561 switch (valseg) { 1562 case UIO_USERSPACE: 1563 sopt.sopt_td = td; 1564 break; 1565 case UIO_SYSSPACE: 1566 sopt.sopt_td = NULL; 1567 break; 1568 default: 1569 panic("kern_getsockopt called with bad valseg"); 1570 } 1571 1572 AUDIT_ARG_FD(s); 1573 error = getsock_cap(td->td_proc->p_fd, s, 1574 cap_rights_init(&rights, CAP_GETSOCKOPT), &fp, NULL); 1575 if (error == 0) { 1576 so = fp->f_data; 1577 error = sogetopt(so, &sopt); 1578 *valsize = sopt.sopt_valsize; 1579 fdrop(fp, td); 1580 } 1581 return (error); 1582 } 1583 1584 /* 1585 * getsockname1() - Get socket name. 1586 */ 1587 /* ARGSUSED */ 1588 static int 1589 getsockname1(td, uap, compat) 1590 struct thread *td; 1591 struct getsockname_args /* { 1592 int fdes; 1593 struct sockaddr * __restrict asa; 1594 socklen_t * __restrict alen; 1595 } */ *uap; 1596 int compat; 1597 { 1598 struct sockaddr *sa; 1599 socklen_t len; 1600 int error; 1601 1602 error = copyin(uap->alen, &len, sizeof(len)); 1603 if (error != 0) 1604 return (error); 1605 1606 error = kern_getsockname(td, uap->fdes, &sa, &len); 1607 if (error != 0) 1608 return (error); 1609 1610 if (len != 0) { 1611 #ifdef COMPAT_OLDSOCK 1612 if (compat) 1613 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1614 #endif 1615 error = copyout(sa, uap->asa, (u_int)len); 1616 } 1617 free(sa, M_SONAME); 1618 if (error == 0) 1619 error = copyout(&len, uap->alen, sizeof(len)); 1620 return (error); 1621 } 1622 1623 int 1624 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, 1625 socklen_t *alen) 1626 { 1627 struct socket *so; 1628 struct file *fp; 1629 cap_rights_t rights; 1630 socklen_t len; 1631 int error; 1632 1633 AUDIT_ARG_FD(fd); 1634 error = getsock_cap(td->td_proc->p_fd, fd, 1635 cap_rights_init(&rights, CAP_GETSOCKNAME), &fp, NULL); 1636 if (error != 0) 1637 return (error); 1638 so = fp->f_data; 1639 *sa = NULL; 1640 CURVNET_SET(so->so_vnet); 1641 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa); 1642 CURVNET_RESTORE(); 1643 if (error != 0) 1644 goto bad; 1645 if (*sa == NULL) 1646 len = 0; 1647 else 1648 len = MIN(*alen, (*sa)->sa_len); 1649 *alen = len; 1650 #ifdef KTRACE 1651 if (KTRPOINT(td, KTR_STRUCT)) 1652 ktrsockaddr(*sa); 1653 #endif 1654 bad: 1655 fdrop(fp, td); 1656 if (error != 0 && *sa != NULL) { 1657 free(*sa, M_SONAME); 1658 *sa = NULL; 1659 } 1660 return (error); 1661 } 1662 1663 int 1664 sys_getsockname(td, uap) 1665 struct thread *td; 1666 struct getsockname_args *uap; 1667 { 1668 1669 return (getsockname1(td, uap, 0)); 1670 } 1671 1672 #ifdef COMPAT_OLDSOCK 1673 int 1674 ogetsockname(td, uap) 1675 struct thread *td; 1676 struct getsockname_args *uap; 1677 { 1678 1679 return (getsockname1(td, uap, 1)); 1680 } 1681 #endif /* COMPAT_OLDSOCK */ 1682 1683 /* 1684 * getpeername1() - Get name of peer for connected socket. 1685 */ 1686 /* ARGSUSED */ 1687 static int 1688 getpeername1(td, uap, compat) 1689 struct thread *td; 1690 struct getpeername_args /* { 1691 int fdes; 1692 struct sockaddr * __restrict asa; 1693 socklen_t * __restrict alen; 1694 } */ *uap; 1695 int compat; 1696 { 1697 struct sockaddr *sa; 1698 socklen_t len; 1699 int error; 1700 1701 error = copyin(uap->alen, &len, sizeof (len)); 1702 if (error != 0) 1703 return (error); 1704 1705 error = kern_getpeername(td, uap->fdes, &sa, &len); 1706 if (error != 0) 1707 return (error); 1708 1709 if (len != 0) { 1710 #ifdef COMPAT_OLDSOCK 1711 if (compat) 1712 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1713 #endif 1714 error = copyout(sa, uap->asa, (u_int)len); 1715 } 1716 free(sa, M_SONAME); 1717 if (error == 0) 1718 error = copyout(&len, uap->alen, sizeof(len)); 1719 return (error); 1720 } 1721 1722 int 1723 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, 1724 socklen_t *alen) 1725 { 1726 struct socket *so; 1727 struct file *fp; 1728 cap_rights_t rights; 1729 socklen_t len; 1730 int error; 1731 1732 AUDIT_ARG_FD(fd); 1733 error = getsock_cap(td->td_proc->p_fd, fd, 1734 cap_rights_init(&rights, CAP_GETPEERNAME), &fp, NULL); 1735 if (error != 0) 1736 return (error); 1737 so = fp->f_data; 1738 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1739 error = ENOTCONN; 1740 goto done; 1741 } 1742 *sa = NULL; 1743 CURVNET_SET(so->so_vnet); 1744 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa); 1745 CURVNET_RESTORE(); 1746 if (error != 0) 1747 goto bad; 1748 if (*sa == NULL) 1749 len = 0; 1750 else 1751 len = MIN(*alen, (*sa)->sa_len); 1752 *alen = len; 1753 #ifdef KTRACE 1754 if (KTRPOINT(td, KTR_STRUCT)) 1755 ktrsockaddr(*sa); 1756 #endif 1757 bad: 1758 if (error != 0 && *sa != NULL) { 1759 free(*sa, M_SONAME); 1760 *sa = NULL; 1761 } 1762 done: 1763 fdrop(fp, td); 1764 return (error); 1765 } 1766 1767 int 1768 sys_getpeername(td, uap) 1769 struct thread *td; 1770 struct getpeername_args *uap; 1771 { 1772 1773 return (getpeername1(td, uap, 0)); 1774 } 1775 1776 #ifdef COMPAT_OLDSOCK 1777 int 1778 ogetpeername(td, uap) 1779 struct thread *td; 1780 struct ogetpeername_args *uap; 1781 { 1782 1783 /* XXX uap should have type `getpeername_args *' to begin with. */ 1784 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1785 } 1786 #endif /* COMPAT_OLDSOCK */ 1787 1788 int 1789 sockargs(mp, buf, buflen, type) 1790 struct mbuf **mp; 1791 caddr_t buf; 1792 int buflen, type; 1793 { 1794 struct sockaddr *sa; 1795 struct mbuf *m; 1796 int error; 1797 1798 if (buflen > MLEN) { 1799 #ifdef COMPAT_OLDSOCK 1800 if (type == MT_SONAME && buflen <= 112) 1801 buflen = MLEN; /* unix domain compat. hack */ 1802 else 1803 #endif 1804 if (buflen > MCLBYTES) 1805 return (EINVAL); 1806 } 1807 m = m_get2(buflen, M_WAITOK, type, 0); 1808 m->m_len = buflen; 1809 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1810 if (error != 0) 1811 (void) m_free(m); 1812 else { 1813 *mp = m; 1814 if (type == MT_SONAME) { 1815 sa = mtod(m, struct sockaddr *); 1816 1817 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1818 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1819 sa->sa_family = sa->sa_len; 1820 #endif 1821 sa->sa_len = buflen; 1822 } 1823 } 1824 return (error); 1825 } 1826 1827 int 1828 getsockaddr(namp, uaddr, len) 1829 struct sockaddr **namp; 1830 caddr_t uaddr; 1831 size_t len; 1832 { 1833 struct sockaddr *sa; 1834 int error; 1835 1836 if (len > SOCK_MAXADDRLEN) 1837 return (ENAMETOOLONG); 1838 if (len < offsetof(struct sockaddr, sa_data[0])) 1839 return (EINVAL); 1840 sa = malloc(len, M_SONAME, M_WAITOK); 1841 error = copyin(uaddr, sa, len); 1842 if (error != 0) { 1843 free(sa, M_SONAME); 1844 } else { 1845 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1846 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1847 sa->sa_family = sa->sa_len; 1848 #endif 1849 sa->sa_len = len; 1850 *namp = sa; 1851 } 1852 return (error); 1853 } 1854 1855 struct sendfile_sync { 1856 struct mtx mtx; 1857 struct cv cv; 1858 unsigned count; 1859 }; 1860 1861 /* 1862 * Detach mapped page and release resources back to the system. 1863 */ 1864 int 1865 sf_buf_mext(struct mbuf *mb, void *addr, void *args) 1866 { 1867 vm_page_t m; 1868 struct sendfile_sync *sfs; 1869 1870 m = sf_buf_page(args); 1871 sf_buf_free(args); 1872 vm_page_lock(m); 1873 vm_page_unwire(m, 0); 1874 /* 1875 * Check for the object going away on us. This can 1876 * happen since we don't hold a reference to it. 1877 * If so, we're responsible for freeing the page. 1878 */ 1879 if (m->wire_count == 0 && m->object == NULL) 1880 vm_page_free(m); 1881 vm_page_unlock(m); 1882 if (addr == NULL) 1883 return (EXT_FREE_OK); 1884 sfs = addr; 1885 mtx_lock(&sfs->mtx); 1886 KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0")); 1887 if (--sfs->count == 0) 1888 cv_signal(&sfs->cv); 1889 mtx_unlock(&sfs->mtx); 1890 return (EXT_FREE_OK); 1891 } 1892 1893 /* 1894 * sendfile(2) 1895 * 1896 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1897 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1898 * 1899 * Send a file specified by 'fd' and starting at 'offset' to a socket 1900 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes == 1901 * 0. Optionally add a header and/or trailer to the socket output. If 1902 * specified, write the total number of bytes sent into *sbytes. 1903 */ 1904 int 1905 sys_sendfile(struct thread *td, struct sendfile_args *uap) 1906 { 1907 1908 return (do_sendfile(td, uap, 0)); 1909 } 1910 1911 static int 1912 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1913 { 1914 struct sf_hdtr hdtr; 1915 struct uio *hdr_uio, *trl_uio; 1916 struct file *fp; 1917 cap_rights_t rights; 1918 int error; 1919 1920 /* 1921 * File offset must be positive. If it goes beyond EOF 1922 * we send only the header/trailer and no payload data. 1923 */ 1924 if (uap->offset < 0) 1925 return (EINVAL); 1926 1927 hdr_uio = trl_uio = NULL; 1928 1929 if (uap->hdtr != NULL) { 1930 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1931 if (error != 0) 1932 goto out; 1933 if (hdtr.headers != NULL) { 1934 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1935 if (error != 0) 1936 goto out; 1937 } 1938 if (hdtr.trailers != NULL) { 1939 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); 1940 if (error != 0) 1941 goto out; 1942 1943 } 1944 } 1945 1946 AUDIT_ARG_FD(uap->fd); 1947 1948 /* 1949 * sendfile(2) can start at any offset within a file so we require 1950 * CAP_READ+CAP_SEEK = CAP_PREAD. 1951 */ 1952 if ((error = fget_read(td, uap->fd, 1953 cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) { 1954 goto out; 1955 } 1956 1957 error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset, 1958 uap->nbytes, uap->sbytes, uap->flags, compat ? SFK_COMPAT : 0, td); 1959 fdrop(fp, td); 1960 1961 out: 1962 free(hdr_uio, M_IOV); 1963 free(trl_uio, M_IOV); 1964 return (error); 1965 } 1966 1967 #ifdef COMPAT_FREEBSD4 1968 int 1969 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1970 { 1971 struct sendfile_args args; 1972 1973 args.fd = uap->fd; 1974 args.s = uap->s; 1975 args.offset = uap->offset; 1976 args.nbytes = uap->nbytes; 1977 args.hdtr = uap->hdtr; 1978 args.sbytes = uap->sbytes; 1979 args.flags = uap->flags; 1980 1981 return (do_sendfile(td, &args, 1)); 1982 } 1983 #endif /* COMPAT_FREEBSD4 */ 1984 1985 static int 1986 sendfile_readpage(vm_object_t obj, struct vnode *vp, int nd, 1987 off_t off, int xfsize, int bsize, struct thread *td, vm_page_t *res) 1988 { 1989 vm_page_t m; 1990 vm_pindex_t pindex; 1991 ssize_t resid; 1992 int error, readahead, rv; 1993 1994 pindex = OFF_TO_IDX(off); 1995 VM_OBJECT_WLOCK(obj); 1996 m = vm_page_grab(obj, pindex, (vp != NULL ? VM_ALLOC_NOBUSY | 1997 VM_ALLOC_IGN_SBUSY : 0) | VM_ALLOC_WIRED | VM_ALLOC_NORMAL); 1998 1999 /* 2000 * Check if page is valid for what we need, otherwise initiate I/O. 2001 * 2002 * The non-zero nd argument prevents disk I/O, instead we 2003 * return the caller what he specified in nd. In particular, 2004 * if we already turned some pages into mbufs, nd == EAGAIN 2005 * and the main function send them the pages before we come 2006 * here again and block. 2007 */ 2008 if (m->valid != 0 && vm_page_is_valid(m, off & PAGE_MASK, xfsize)) { 2009 if (vp == NULL) 2010 vm_page_xunbusy(m); 2011 VM_OBJECT_WUNLOCK(obj); 2012 *res = m; 2013 return (0); 2014 } else if (nd != 0) { 2015 if (vp == NULL) 2016 vm_page_xunbusy(m); 2017 error = nd; 2018 goto free_page; 2019 } 2020 2021 /* 2022 * Get the page from backing store. 2023 */ 2024 error = 0; 2025 if (vp != NULL) { 2026 VM_OBJECT_WUNLOCK(obj); 2027 readahead = sfreadahead * MAXBSIZE; 2028 2029 /* 2030 * Use vn_rdwr() instead of the pager interface for 2031 * the vnode, to allow the read-ahead. 2032 * 2033 * XXXMAC: Because we don't have fp->f_cred here, we 2034 * pass in NOCRED. This is probably wrong, but is 2035 * consistent with our original implementation. 2036 */ 2037 error = vn_rdwr(UIO_READ, vp, NULL, readahead, trunc_page(off), 2038 UIO_NOCOPY, IO_NODELOCKED | IO_VMIO | ((readahead / 2039 bsize) << IO_SEQSHIFT), td->td_ucred, NOCRED, &resid, td); 2040 SFSTAT_INC(sf_iocnt); 2041 VM_OBJECT_WLOCK(obj); 2042 } else { 2043 if (vm_pager_has_page(obj, pindex, NULL, NULL)) { 2044 rv = vm_pager_get_pages(obj, &m, 1, 0); 2045 SFSTAT_INC(sf_iocnt); 2046 m = vm_page_lookup(obj, pindex); 2047 if (m == NULL) 2048 error = EIO; 2049 else if (rv != VM_PAGER_OK) { 2050 vm_page_lock(m); 2051 vm_page_free(m); 2052 vm_page_unlock(m); 2053 m = NULL; 2054 error = EIO; 2055 } 2056 } else { 2057 pmap_zero_page(m); 2058 m->valid = VM_PAGE_BITS_ALL; 2059 m->dirty = 0; 2060 } 2061 if (m != NULL) 2062 vm_page_xunbusy(m); 2063 } 2064 if (error == 0) { 2065 *res = m; 2066 } else if (m != NULL) { 2067 free_page: 2068 vm_page_lock(m); 2069 vm_page_unwire(m, 0); 2070 2071 /* 2072 * See if anyone else might know about this page. If 2073 * not and it is not valid, then free it. 2074 */ 2075 if (m->wire_count == 0 && m->valid == 0 && !vm_page_busied(m)) 2076 vm_page_free(m); 2077 vm_page_unlock(m); 2078 } 2079 KASSERT(error != 0 || (m->wire_count > 0 && 2080 vm_page_is_valid(m, off & PAGE_MASK, xfsize)), 2081 ("wrong page state m %p", m)); 2082 VM_OBJECT_WUNLOCK(obj); 2083 return (error); 2084 } 2085 2086 static int 2087 sendfile_getobj(struct thread *td, struct file *fp, vm_object_t *obj_res, 2088 struct vnode **vp_res, struct shmfd **shmfd_res, off_t *obj_size, 2089 int *bsize) 2090 { 2091 struct vattr va; 2092 vm_object_t obj; 2093 struct vnode *vp; 2094 struct shmfd *shmfd; 2095 int error; 2096 2097 vp = *vp_res = NULL; 2098 obj = NULL; 2099 shmfd = *shmfd_res = NULL; 2100 *bsize = 0; 2101 2102 /* 2103 * The file descriptor must be a regular file and have a 2104 * backing VM object. 2105 */ 2106 if (fp->f_type == DTYPE_VNODE) { 2107 vp = fp->f_vnode; 2108 vn_lock(vp, LK_SHARED | LK_RETRY); 2109 if (vp->v_type != VREG) { 2110 error = EINVAL; 2111 goto out; 2112 } 2113 *bsize = vp->v_mount->mnt_stat.f_iosize; 2114 error = VOP_GETATTR(vp, &va, td->td_ucred); 2115 if (error != 0) 2116 goto out; 2117 *obj_size = va.va_size; 2118 obj = vp->v_object; 2119 if (obj == NULL) { 2120 error = EINVAL; 2121 goto out; 2122 } 2123 } else if (fp->f_type == DTYPE_SHM) { 2124 shmfd = fp->f_data; 2125 obj = shmfd->shm_object; 2126 *obj_size = shmfd->shm_size; 2127 } else { 2128 error = EINVAL; 2129 goto out; 2130 } 2131 2132 VM_OBJECT_WLOCK(obj); 2133 if ((obj->flags & OBJ_DEAD) != 0) { 2134 VM_OBJECT_WUNLOCK(obj); 2135 error = EBADF; 2136 goto out; 2137 } 2138 2139 /* 2140 * Temporarily increase the backing VM object's reference 2141 * count so that a forced reclamation of its vnode does not 2142 * immediately destroy it. 2143 */ 2144 vm_object_reference_locked(obj); 2145 VM_OBJECT_WUNLOCK(obj); 2146 *obj_res = obj; 2147 *vp_res = vp; 2148 *shmfd_res = shmfd; 2149 2150 out: 2151 if (vp != NULL) 2152 VOP_UNLOCK(vp, 0); 2153 return (error); 2154 } 2155 2156 static int 2157 kern_sendfile_getsock(struct thread *td, int s, struct file **sock_fp, 2158 struct socket **so) 2159 { 2160 cap_rights_t rights; 2161 int error; 2162 2163 *sock_fp = NULL; 2164 *so = NULL; 2165 2166 /* 2167 * The socket must be a stream socket and connected. 2168 */ 2169 error = getsock_cap(td->td_proc->p_fd, s, cap_rights_init(&rights, 2170 CAP_SEND), sock_fp, NULL); 2171 if (error != 0) 2172 return (error); 2173 *so = (*sock_fp)->f_data; 2174 if ((*so)->so_type != SOCK_STREAM) 2175 return (EINVAL); 2176 if (((*so)->so_state & SS_ISCONNECTED) == 0) 2177 return (ENOTCONN); 2178 return (0); 2179 } 2180 2181 int 2182 vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, 2183 struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, 2184 int kflags, struct thread *td) 2185 { 2186 struct file *sock_fp; 2187 struct vnode *vp; 2188 struct vm_object *obj; 2189 struct socket *so; 2190 struct mbuf *m; 2191 struct sf_buf *sf; 2192 struct vm_page *pg; 2193 struct shmfd *shmfd; 2194 struct sendfile_sync *sfs; 2195 struct vattr va; 2196 off_t off, xfsize, fsbytes, sbytes, rem, obj_size; 2197 int error, bsize, nd, hdrlen, mnw; 2198 bool inflight_called; 2199 2200 pg = NULL; 2201 obj = NULL; 2202 so = NULL; 2203 m = NULL; 2204 sfs = NULL; 2205 fsbytes = sbytes = 0; 2206 hdrlen = mnw = 0; 2207 rem = nbytes; 2208 obj_size = 0; 2209 inflight_called = false; 2210 2211 error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize); 2212 if (error != 0) 2213 return (error); 2214 if (rem == 0) 2215 rem = obj_size; 2216 2217 error = kern_sendfile_getsock(td, sockfd, &sock_fp, &so); 2218 if (error != 0) 2219 goto out; 2220 2221 /* 2222 * Do not wait on memory allocations but return ENOMEM for 2223 * caller to retry later. 2224 * XXX: Experimental. 2225 */ 2226 if (flags & SF_MNOWAIT) 2227 mnw = 1; 2228 2229 if (flags & SF_SYNC) { 2230 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO); 2231 mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); 2232 cv_init(&sfs->cv, "sendfile"); 2233 } 2234 2235 #ifdef MAC 2236 error = mac_socket_check_send(td->td_ucred, so); 2237 if (error != 0) 2238 goto out; 2239 #endif 2240 2241 /* If headers are specified copy them into mbufs. */ 2242 if (hdr_uio != NULL) { 2243 hdr_uio->uio_td = td; 2244 hdr_uio->uio_rw = UIO_WRITE; 2245 if (hdr_uio->uio_resid > 0) { 2246 /* 2247 * In FBSD < 5.0 the nbytes to send also included 2248 * the header. If compat is specified subtract the 2249 * header size from nbytes. 2250 */ 2251 if (kflags & SFK_COMPAT) { 2252 if (nbytes > hdr_uio->uio_resid) 2253 nbytes -= hdr_uio->uio_resid; 2254 else 2255 nbytes = 0; 2256 } 2257 m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK), 2258 0, 0, 0); 2259 if (m == NULL) { 2260 error = mnw ? EAGAIN : ENOBUFS; 2261 goto out; 2262 } 2263 hdrlen = m_length(m, NULL); 2264 } 2265 } 2266 2267 /* 2268 * Protect against multiple writers to the socket. 2269 * 2270 * XXXRW: Historically this has assumed non-interruptibility, so now 2271 * we implement that, but possibly shouldn't. 2272 */ 2273 (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); 2274 2275 /* 2276 * Loop through the pages of the file, starting with the requested 2277 * offset. Get a file page (do I/O if necessary), map the file page 2278 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 2279 * it on the socket. 2280 * This is done in two loops. The inner loop turns as many pages 2281 * as it can, up to available socket buffer space, without blocking 2282 * into mbufs to have it bulk delivered into the socket send buffer. 2283 * The outer loop checks the state and available space of the socket 2284 * and takes care of the overall progress. 2285 */ 2286 for (off = offset; ; ) { 2287 struct mbuf *mtail; 2288 int loopbytes; 2289 int space; 2290 int done; 2291 2292 if ((nbytes != 0 && nbytes == fsbytes) || 2293 (nbytes == 0 && obj_size == fsbytes)) 2294 break; 2295 2296 mtail = NULL; 2297 loopbytes = 0; 2298 space = 0; 2299 done = 0; 2300 2301 /* 2302 * Check the socket state for ongoing connection, 2303 * no errors and space in socket buffer. 2304 * If space is low allow for the remainder of the 2305 * file to be processed if it fits the socket buffer. 2306 * Otherwise block in waiting for sufficient space 2307 * to proceed, or if the socket is nonblocking, return 2308 * to userland with EAGAIN while reporting how far 2309 * we've come. 2310 * We wait until the socket buffer has significant free 2311 * space to do bulk sends. This makes good use of file 2312 * system read ahead and allows packet segmentation 2313 * offloading hardware to take over lots of work. If 2314 * we were not careful here we would send off only one 2315 * sfbuf at a time. 2316 */ 2317 SOCKBUF_LOCK(&so->so_snd); 2318 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2) 2319 so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2; 2320 retry_space: 2321 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2322 error = EPIPE; 2323 SOCKBUF_UNLOCK(&so->so_snd); 2324 goto done; 2325 } else if (so->so_error) { 2326 error = so->so_error; 2327 so->so_error = 0; 2328 SOCKBUF_UNLOCK(&so->so_snd); 2329 goto done; 2330 } 2331 space = sbspace(&so->so_snd); 2332 if (space < rem && 2333 (space <= 0 || 2334 space < so->so_snd.sb_lowat)) { 2335 if (so->so_state & SS_NBIO) { 2336 SOCKBUF_UNLOCK(&so->so_snd); 2337 error = EAGAIN; 2338 goto done; 2339 } 2340 /* 2341 * sbwait drops the lock while sleeping. 2342 * When we loop back to retry_space the 2343 * state may have changed and we retest 2344 * for it. 2345 */ 2346 error = sbwait(&so->so_snd); 2347 /* 2348 * An error from sbwait usually indicates that we've 2349 * been interrupted by a signal. If we've sent anything 2350 * then return bytes sent, otherwise return the error. 2351 */ 2352 if (error != 0) { 2353 SOCKBUF_UNLOCK(&so->so_snd); 2354 goto done; 2355 } 2356 goto retry_space; 2357 } 2358 SOCKBUF_UNLOCK(&so->so_snd); 2359 2360 /* 2361 * Reduce space in the socket buffer by the size of 2362 * the header mbuf chain. 2363 * hdrlen is set to 0 after the first loop. 2364 */ 2365 space -= hdrlen; 2366 2367 if (vp != NULL) { 2368 error = vn_lock(vp, LK_SHARED); 2369 if (error != 0) 2370 goto done; 2371 error = VOP_GETATTR(vp, &va, td->td_ucred); 2372 if (error != 0 || off >= va.va_size) { 2373 VOP_UNLOCK(vp, 0); 2374 goto done; 2375 } 2376 obj_size = va.va_size; 2377 } 2378 2379 /* 2380 * Loop and construct maximum sized mbuf chain to be bulk 2381 * dumped into socket buffer. 2382 */ 2383 while (space > loopbytes) { 2384 vm_offset_t pgoff; 2385 struct mbuf *m0; 2386 2387 /* 2388 * Calculate the amount to transfer. 2389 * Not to exceed a page, the EOF, 2390 * or the passed in nbytes. 2391 */ 2392 pgoff = (vm_offset_t)(off & PAGE_MASK); 2393 rem = obj_size - offset; 2394 if (nbytes != 0) 2395 rem = omin(rem, nbytes); 2396 rem -= fsbytes + loopbytes; 2397 xfsize = omin(PAGE_SIZE - pgoff, rem); 2398 xfsize = omin(space - loopbytes, xfsize); 2399 if (xfsize <= 0) { 2400 done = 1; /* all data sent */ 2401 break; 2402 } 2403 2404 /* 2405 * Attempt to look up the page. Allocate 2406 * if not found or wait and loop if busy. 2407 */ 2408 if (m != NULL) 2409 nd = EAGAIN; /* send what we already got */ 2410 else if ((flags & SF_NODISKIO) != 0) 2411 nd = EBUSY; 2412 else 2413 nd = 0; 2414 error = sendfile_readpage(obj, vp, nd, off, 2415 xfsize, bsize, td, &pg); 2416 if (error != 0) { 2417 if (error == EAGAIN) 2418 error = 0; /* not a real error */ 2419 break; 2420 } 2421 2422 /* 2423 * Get a sendfile buf. When allocating the 2424 * first buffer for mbuf chain, we usually 2425 * wait as long as necessary, but this wait 2426 * can be interrupted. For consequent 2427 * buffers, do not sleep, since several 2428 * threads might exhaust the buffers and then 2429 * deadlock. 2430 */ 2431 sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT : 2432 SFB_CATCH); 2433 if (sf == NULL) { 2434 SFSTAT_INC(sf_allocfail); 2435 vm_page_lock(pg); 2436 vm_page_unwire(pg, 0); 2437 KASSERT(pg->object != NULL, 2438 ("%s: object disappeared", __func__)); 2439 vm_page_unlock(pg); 2440 if (m == NULL) 2441 error = (mnw ? EAGAIN : EINTR); 2442 break; 2443 } 2444 2445 /* 2446 * Get an mbuf and set it up as having 2447 * external storage. 2448 */ 2449 m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); 2450 if (m0 == NULL) { 2451 error = (mnw ? EAGAIN : ENOBUFS); 2452 (void)sf_buf_mext(NULL, NULL, sf); 2453 break; 2454 } 2455 if (m_extadd(m0, (caddr_t )sf_buf_kva(sf), PAGE_SIZE, 2456 sf_buf_mext, sfs, sf, M_RDONLY, EXT_SFBUF, 2457 (mnw ? M_NOWAIT : M_WAITOK)) != 0) { 2458 error = (mnw ? EAGAIN : ENOBUFS); 2459 (void)sf_buf_mext(NULL, NULL, sf); 2460 m_freem(m0); 2461 break; 2462 } 2463 m0->m_data = (char *)sf_buf_kva(sf) + pgoff; 2464 m0->m_len = xfsize; 2465 2466 /* Append to mbuf chain. */ 2467 if (mtail != NULL) 2468 mtail->m_next = m0; 2469 else if (m != NULL) 2470 m_last(m)->m_next = m0; 2471 else 2472 m = m0; 2473 mtail = m0; 2474 2475 /* Keep track of bits processed. */ 2476 loopbytes += xfsize; 2477 off += xfsize; 2478 2479 if (sfs != NULL) { 2480 mtx_lock(&sfs->mtx); 2481 sfs->count++; 2482 mtx_unlock(&sfs->mtx); 2483 } 2484 } 2485 2486 if (vp != NULL) 2487 VOP_UNLOCK(vp, 0); 2488 2489 /* Add the buffer chain to the socket buffer. */ 2490 if (m != NULL) { 2491 int mlen, err; 2492 2493 mlen = m_length(m, NULL); 2494 SOCKBUF_LOCK(&so->so_snd); 2495 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2496 error = EPIPE; 2497 SOCKBUF_UNLOCK(&so->so_snd); 2498 goto done; 2499 } 2500 SOCKBUF_UNLOCK(&so->so_snd); 2501 CURVNET_SET(so->so_vnet); 2502 /* Avoid error aliasing. */ 2503 err = (*so->so_proto->pr_usrreqs->pru_send) 2504 (so, 0, m, NULL, NULL, td); 2505 CURVNET_RESTORE(); 2506 if (err == 0) { 2507 /* 2508 * We need two counters to get the 2509 * file offset and nbytes to send 2510 * right: 2511 * - sbytes contains the total amount 2512 * of bytes sent, including headers. 2513 * - fsbytes contains the total amount 2514 * of bytes sent from the file. 2515 */ 2516 sbytes += mlen; 2517 fsbytes += mlen; 2518 if (hdrlen) { 2519 fsbytes -= hdrlen; 2520 hdrlen = 0; 2521 } 2522 } else if (error == 0) 2523 error = err; 2524 m = NULL; /* pru_send always consumes */ 2525 } 2526 2527 /* Quit outer loop on error or when we're done. */ 2528 if (done) 2529 break; 2530 if (error != 0) 2531 goto done; 2532 } 2533 2534 /* 2535 * Send trailers. Wimp out and use writev(2). 2536 */ 2537 if (trl_uio != NULL) { 2538 sbunlock(&so->so_snd); 2539 error = kern_writev(td, sockfd, trl_uio); 2540 if (error == 0) 2541 sbytes += td->td_retval[0]; 2542 goto out; 2543 } 2544 2545 done: 2546 sbunlock(&so->so_snd); 2547 out: 2548 /* 2549 * If there was no error we have to clear td->td_retval[0] 2550 * because it may have been set by writev. 2551 */ 2552 if (error == 0) { 2553 td->td_retval[0] = 0; 2554 } 2555 if (sent != NULL) { 2556 copyout(&sbytes, sent, sizeof(off_t)); 2557 } 2558 if (obj != NULL) 2559 vm_object_deallocate(obj); 2560 if (so) 2561 fdrop(sock_fp, td); 2562 if (m) 2563 m_freem(m); 2564 2565 if (sfs != NULL) { 2566 mtx_lock(&sfs->mtx); 2567 if (sfs->count != 0) 2568 cv_wait(&sfs->cv, &sfs->mtx); 2569 KASSERT(sfs->count == 0, ("sendfile sync still busy")); 2570 cv_destroy(&sfs->cv); 2571 mtx_destroy(&sfs->mtx); 2572 free(sfs, M_TEMP); 2573 } 2574 2575 if (error == ERESTART) 2576 error = EINTR; 2577 2578 return (error); 2579 } 2580 2581 /* 2582 * SCTP syscalls. 2583 * Functionality only compiled in if SCTP is defined in the kernel Makefile, 2584 * otherwise all return EOPNOTSUPP. 2585 * XXX: We should make this loadable one day. 2586 */ 2587 int 2588 sys_sctp_peeloff(td, uap) 2589 struct thread *td; 2590 struct sctp_peeloff_args /* { 2591 int sd; 2592 caddr_t name; 2593 } */ *uap; 2594 { 2595 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2596 struct file *nfp = NULL; 2597 struct socket *head, *so; 2598 cap_rights_t rights; 2599 u_int fflag; 2600 int error, fd; 2601 2602 AUDIT_ARG_FD(uap->sd); 2603 error = fgetsock(td, uap->sd, cap_rights_init(&rights, CAP_PEELOFF), 2604 &head, &fflag); 2605 if (error != 0) 2606 goto done2; 2607 if (head->so_proto->pr_protocol != IPPROTO_SCTP) { 2608 error = EOPNOTSUPP; 2609 goto done; 2610 } 2611 error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name); 2612 if (error != 0) 2613 goto done; 2614 /* 2615 * At this point we know we do have a assoc to pull 2616 * we proceed to get the fd setup. This may block 2617 * but that is ok. 2618 */ 2619 2620 error = falloc(td, &nfp, &fd, 0); 2621 if (error != 0) 2622 goto done; 2623 td->td_retval[0] = fd; 2624 2625 CURVNET_SET(head->so_vnet); 2626 so = sonewconn(head, SS_ISCONNECTED); 2627 if (so == NULL) { 2628 error = ENOMEM; 2629 goto noconnection; 2630 } 2631 /* 2632 * Before changing the flags on the socket, we have to bump the 2633 * reference count. Otherwise, if the protocol calls sofree(), 2634 * the socket will be released due to a zero refcount. 2635 */ 2636 SOCK_LOCK(so); 2637 soref(so); /* file descriptor reference */ 2638 SOCK_UNLOCK(so); 2639 2640 ACCEPT_LOCK(); 2641 2642 TAILQ_REMOVE(&head->so_comp, so, so_list); 2643 head->so_qlen--; 2644 so->so_state |= (head->so_state & SS_NBIO); 2645 so->so_state &= ~SS_NOFDREF; 2646 so->so_qstate &= ~SQ_COMP; 2647 so->so_head = NULL; 2648 ACCEPT_UNLOCK(); 2649 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 2650 error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name); 2651 if (error != 0) 2652 goto noconnection; 2653 if (head->so_sigio != NULL) 2654 fsetown(fgetown(&head->so_sigio), &so->so_sigio); 2655 2656 noconnection: 2657 /* 2658 * close the new descriptor, assuming someone hasn't ripped it 2659 * out from under us. 2660 */ 2661 if (error != 0) 2662 fdclose(td->td_proc->p_fd, nfp, fd, td); 2663 2664 /* 2665 * Release explicitly held references before returning. 2666 */ 2667 CURVNET_RESTORE(); 2668 done: 2669 if (nfp != NULL) 2670 fdrop(nfp, td); 2671 fputsock(head); 2672 done2: 2673 return (error); 2674 #else /* SCTP */ 2675 return (EOPNOTSUPP); 2676 #endif /* SCTP */ 2677 } 2678 2679 int 2680 sys_sctp_generic_sendmsg (td, uap) 2681 struct thread *td; 2682 struct sctp_generic_sendmsg_args /* { 2683 int sd, 2684 caddr_t msg, 2685 int mlen, 2686 caddr_t to, 2687 __socklen_t tolen, 2688 struct sctp_sndrcvinfo *sinfo, 2689 int flags 2690 } */ *uap; 2691 { 2692 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2693 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2694 struct socket *so; 2695 struct file *fp = NULL; 2696 struct sockaddr *to = NULL; 2697 #ifdef KTRACE 2698 struct uio *ktruio = NULL; 2699 #endif 2700 struct uio auio; 2701 struct iovec iov[1]; 2702 cap_rights_t rights; 2703 int error = 0, len; 2704 2705 if (uap->sinfo != NULL) { 2706 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2707 if (error != 0) 2708 return (error); 2709 u_sinfo = &sinfo; 2710 } 2711 2712 cap_rights_init(&rights, CAP_SEND); 2713 if (uap->tolen != 0) { 2714 error = getsockaddr(&to, uap->to, uap->tolen); 2715 if (error != 0) { 2716 to = NULL; 2717 goto sctp_bad2; 2718 } 2719 cap_rights_set(&rights, CAP_CONNECT); 2720 } 2721 2722 AUDIT_ARG_FD(uap->sd); 2723 error = getsock_cap(td->td_proc->p_fd, uap->sd, &rights, &fp, NULL); 2724 if (error != 0) 2725 goto sctp_bad; 2726 #ifdef KTRACE 2727 if (to && (KTRPOINT(td, KTR_STRUCT))) 2728 ktrsockaddr(to); 2729 #endif 2730 2731 iov[0].iov_base = uap->msg; 2732 iov[0].iov_len = uap->mlen; 2733 2734 so = (struct socket *)fp->f_data; 2735 if (so->so_proto->pr_protocol != IPPROTO_SCTP) { 2736 error = EOPNOTSUPP; 2737 goto sctp_bad; 2738 } 2739 #ifdef MAC 2740 error = mac_socket_check_send(td->td_ucred, so); 2741 if (error != 0) 2742 goto sctp_bad; 2743 #endif /* MAC */ 2744 2745 auio.uio_iov = iov; 2746 auio.uio_iovcnt = 1; 2747 auio.uio_segflg = UIO_USERSPACE; 2748 auio.uio_rw = UIO_WRITE; 2749 auio.uio_td = td; 2750 auio.uio_offset = 0; /* XXX */ 2751 auio.uio_resid = 0; 2752 len = auio.uio_resid = uap->mlen; 2753 CURVNET_SET(so->so_vnet); 2754 error = sctp_lower_sosend(so, to, &auio, (struct mbuf *)NULL, 2755 (struct mbuf *)NULL, uap->flags, u_sinfo, td); 2756 CURVNET_RESTORE(); 2757 if (error != 0) { 2758 if (auio.uio_resid != len && (error == ERESTART || 2759 error == EINTR || error == EWOULDBLOCK)) 2760 error = 0; 2761 /* Generation of SIGPIPE can be controlled per socket. */ 2762 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2763 !(uap->flags & MSG_NOSIGNAL)) { 2764 PROC_LOCK(td->td_proc); 2765 tdsignal(td, SIGPIPE); 2766 PROC_UNLOCK(td->td_proc); 2767 } 2768 } 2769 if (error == 0) 2770 td->td_retval[0] = len - auio.uio_resid; 2771 #ifdef KTRACE 2772 if (ktruio != NULL) { 2773 ktruio->uio_resid = td->td_retval[0]; 2774 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2775 } 2776 #endif /* KTRACE */ 2777 sctp_bad: 2778 if (fp != NULL) 2779 fdrop(fp, td); 2780 sctp_bad2: 2781 free(to, M_SONAME); 2782 return (error); 2783 #else /* SCTP */ 2784 return (EOPNOTSUPP); 2785 #endif /* SCTP */ 2786 } 2787 2788 int 2789 sys_sctp_generic_sendmsg_iov(td, uap) 2790 struct thread *td; 2791 struct sctp_generic_sendmsg_iov_args /* { 2792 int sd, 2793 struct iovec *iov, 2794 int iovlen, 2795 caddr_t to, 2796 __socklen_t tolen, 2797 struct sctp_sndrcvinfo *sinfo, 2798 int flags 2799 } */ *uap; 2800 { 2801 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2802 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2803 struct socket *so; 2804 struct file *fp = NULL; 2805 struct sockaddr *to = NULL; 2806 #ifdef KTRACE 2807 struct uio *ktruio = NULL; 2808 #endif 2809 struct uio auio; 2810 struct iovec *iov, *tiov; 2811 cap_rights_t rights; 2812 ssize_t len; 2813 int error, i; 2814 2815 if (uap->sinfo != NULL) { 2816 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2817 if (error != 0) 2818 return (error); 2819 u_sinfo = &sinfo; 2820 } 2821 cap_rights_init(&rights, CAP_SEND); 2822 if (uap->tolen != 0) { 2823 error = getsockaddr(&to, uap->to, uap->tolen); 2824 if (error != 0) { 2825 to = NULL; 2826 goto sctp_bad2; 2827 } 2828 cap_rights_set(&rights, CAP_CONNECT); 2829 } 2830 2831 AUDIT_ARG_FD(uap->sd); 2832 error = getsock_cap(td->td_proc->p_fd, uap->sd, &rights, &fp, NULL); 2833 if (error != 0) 2834 goto sctp_bad1; 2835 2836 #ifdef COMPAT_FREEBSD32 2837 if (SV_CURPROC_FLAG(SV_ILP32)) 2838 error = freebsd32_copyiniov((struct iovec32 *)uap->iov, 2839 uap->iovlen, &iov, EMSGSIZE); 2840 else 2841 #endif 2842 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2843 if (error != 0) 2844 goto sctp_bad1; 2845 #ifdef KTRACE 2846 if (to && (KTRPOINT(td, KTR_STRUCT))) 2847 ktrsockaddr(to); 2848 #endif 2849 2850 so = (struct socket *)fp->f_data; 2851 if (so->so_proto->pr_protocol != IPPROTO_SCTP) { 2852 error = EOPNOTSUPP; 2853 goto sctp_bad; 2854 } 2855 #ifdef MAC 2856 error = mac_socket_check_send(td->td_ucred, so); 2857 if (error != 0) 2858 goto sctp_bad; 2859 #endif /* MAC */ 2860 2861 auio.uio_iov = iov; 2862 auio.uio_iovcnt = uap->iovlen; 2863 auio.uio_segflg = UIO_USERSPACE; 2864 auio.uio_rw = UIO_WRITE; 2865 auio.uio_td = td; 2866 auio.uio_offset = 0; /* XXX */ 2867 auio.uio_resid = 0; 2868 tiov = iov; 2869 for (i = 0; i <uap->iovlen; i++, tiov++) { 2870 if ((auio.uio_resid += tiov->iov_len) < 0) { 2871 error = EINVAL; 2872 goto sctp_bad; 2873 } 2874 } 2875 len = auio.uio_resid; 2876 CURVNET_SET(so->so_vnet); 2877 error = sctp_lower_sosend(so, to, &auio, 2878 (struct mbuf *)NULL, (struct mbuf *)NULL, 2879 uap->flags, u_sinfo, td); 2880 CURVNET_RESTORE(); 2881 if (error != 0) { 2882 if (auio.uio_resid != len && (error == ERESTART || 2883 error == EINTR || error == EWOULDBLOCK)) 2884 error = 0; 2885 /* Generation of SIGPIPE can be controlled per socket */ 2886 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2887 !(uap->flags & MSG_NOSIGNAL)) { 2888 PROC_LOCK(td->td_proc); 2889 tdsignal(td, SIGPIPE); 2890 PROC_UNLOCK(td->td_proc); 2891 } 2892 } 2893 if (error == 0) 2894 td->td_retval[0] = len - auio.uio_resid; 2895 #ifdef KTRACE 2896 if (ktruio != NULL) { 2897 ktruio->uio_resid = td->td_retval[0]; 2898 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2899 } 2900 #endif /* KTRACE */ 2901 sctp_bad: 2902 free(iov, M_IOV); 2903 sctp_bad1: 2904 if (fp != NULL) 2905 fdrop(fp, td); 2906 sctp_bad2: 2907 free(to, M_SONAME); 2908 return (error); 2909 #else /* SCTP */ 2910 return (EOPNOTSUPP); 2911 #endif /* SCTP */ 2912 } 2913 2914 int 2915 sys_sctp_generic_recvmsg(td, uap) 2916 struct thread *td; 2917 struct sctp_generic_recvmsg_args /* { 2918 int sd, 2919 struct iovec *iov, 2920 int iovlen, 2921 struct sockaddr *from, 2922 __socklen_t *fromlenaddr, 2923 struct sctp_sndrcvinfo *sinfo, 2924 int *msg_flags 2925 } */ *uap; 2926 { 2927 #if (defined(INET) || defined(INET6)) && defined(SCTP) 2928 uint8_t sockbufstore[256]; 2929 struct uio auio; 2930 struct iovec *iov, *tiov; 2931 struct sctp_sndrcvinfo sinfo; 2932 struct socket *so; 2933 struct file *fp = NULL; 2934 struct sockaddr *fromsa; 2935 cap_rights_t rights; 2936 #ifdef KTRACE 2937 struct uio *ktruio = NULL; 2938 #endif 2939 ssize_t len; 2940 int error, fromlen, i, msg_flags; 2941 2942 AUDIT_ARG_FD(uap->sd); 2943 error = getsock_cap(td->td_proc->p_fd, uap->sd, 2944 cap_rights_init(&rights, CAP_RECV), &fp, NULL); 2945 if (error != 0) 2946 return (error); 2947 #ifdef COMPAT_FREEBSD32 2948 if (SV_CURPROC_FLAG(SV_ILP32)) 2949 error = freebsd32_copyiniov((struct iovec32 *)uap->iov, 2950 uap->iovlen, &iov, EMSGSIZE); 2951 else 2952 #endif 2953 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2954 if (error != 0) 2955 goto out1; 2956 2957 so = fp->f_data; 2958 if (so->so_proto->pr_protocol != IPPROTO_SCTP) { 2959 error = EOPNOTSUPP; 2960 goto out; 2961 } 2962 #ifdef MAC 2963 error = mac_socket_check_receive(td->td_ucred, so); 2964 if (error != 0) 2965 goto out; 2966 #endif /* MAC */ 2967 2968 if (uap->fromlenaddr != NULL) { 2969 error = copyin(uap->fromlenaddr, &fromlen, sizeof (fromlen)); 2970 if (error != 0) 2971 goto out; 2972 } else { 2973 fromlen = 0; 2974 } 2975 if (uap->msg_flags) { 2976 error = copyin(uap->msg_flags, &msg_flags, sizeof (int)); 2977 if (error != 0) 2978 goto out; 2979 } else { 2980 msg_flags = 0; 2981 } 2982 auio.uio_iov = iov; 2983 auio.uio_iovcnt = uap->iovlen; 2984 auio.uio_segflg = UIO_USERSPACE; 2985 auio.uio_rw = UIO_READ; 2986 auio.uio_td = td; 2987 auio.uio_offset = 0; /* XXX */ 2988 auio.uio_resid = 0; 2989 tiov = iov; 2990 for (i = 0; i <uap->iovlen; i++, tiov++) { 2991 if ((auio.uio_resid += tiov->iov_len) < 0) { 2992 error = EINVAL; 2993 goto out; 2994 } 2995 } 2996 len = auio.uio_resid; 2997 fromsa = (struct sockaddr *)sockbufstore; 2998 2999 #ifdef KTRACE 3000 if (KTRPOINT(td, KTR_GENIO)) 3001 ktruio = cloneuio(&auio); 3002 #endif /* KTRACE */ 3003 memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo)); 3004 CURVNET_SET(so->so_vnet); 3005 error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL, 3006 fromsa, fromlen, &msg_flags, 3007 (struct sctp_sndrcvinfo *)&sinfo, 1); 3008 CURVNET_RESTORE(); 3009 if (error != 0) { 3010 if (auio.uio_resid != len && (error == ERESTART || 3011 error == EINTR || error == EWOULDBLOCK)) 3012 error = 0; 3013 } else { 3014 if (uap->sinfo) 3015 error = copyout(&sinfo, uap->sinfo, sizeof (sinfo)); 3016 } 3017 #ifdef KTRACE 3018 if (ktruio != NULL) { 3019 ktruio->uio_resid = len - auio.uio_resid; 3020 ktrgenio(uap->sd, UIO_READ, ktruio, error); 3021 } 3022 #endif /* KTRACE */ 3023 if (error != 0) 3024 goto out; 3025 td->td_retval[0] = len - auio.uio_resid; 3026 3027 if (fromlen && uap->from) { 3028 len = fromlen; 3029 if (len <= 0 || fromsa == 0) 3030 len = 0; 3031 else { 3032 len = MIN(len, fromsa->sa_len); 3033 error = copyout(fromsa, uap->from, (size_t)len); 3034 if (error != 0) 3035 goto out; 3036 } 3037 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t)); 3038 if (error != 0) 3039 goto out; 3040 } 3041 #ifdef KTRACE 3042 if (KTRPOINT(td, KTR_STRUCT)) 3043 ktrsockaddr(fromsa); 3044 #endif 3045 if (uap->msg_flags) { 3046 error = copyout(&msg_flags, uap->msg_flags, sizeof (int)); 3047 if (error != 0) 3048 goto out; 3049 } 3050 out: 3051 free(iov, M_IOV); 3052 out1: 3053 if (fp != NULL) 3054 fdrop(fp, td); 3055 3056 return (error); 3057 #else /* SCTP */ 3058 return (EOPNOTSUPP); 3059 #endif /* SCTP */ 3060 } 3061