1 /*- 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2006 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 32 * $FreeBSD$ 33 */ 34 35 #include "opt_inet.h" 36 #include "opt_inet6.h" 37 #include "opt_tcpdebug.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/malloc.h> 42 #include <sys/kernel.h> 43 #include <sys/sysctl.h> 44 #include <sys/mbuf.h> 45 #ifdef INET6 46 #include <sys/domain.h> 47 #endif /* INET6 */ 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/protosw.h> 51 #include <sys/proc.h> 52 #include <sys/jail.h> 53 54 #include <net/if.h> 55 #include <net/route.h> 56 57 #include <netinet/in.h> 58 #include <netinet/in_systm.h> 59 #ifdef INET6 60 #include <netinet/ip6.h> 61 #endif 62 #include <netinet/in_pcb.h> 63 #ifdef INET6 64 #include <netinet6/in6_pcb.h> 65 #endif 66 #include <netinet/in_var.h> 67 #include <netinet/ip_var.h> 68 #ifdef INET6 69 #include <netinet6/ip6_var.h> 70 #include <netinet6/scope6_var.h> 71 #endif 72 #include <netinet/tcp.h> 73 #include <netinet/tcp_fsm.h> 74 #include <netinet/tcp_seq.h> 75 #include <netinet/tcp_timer.h> 76 #include <netinet/tcp_var.h> 77 #include <netinet/tcpip.h> 78 #ifdef TCPDEBUG 79 #include <netinet/tcp_debug.h> 80 #endif 81 82 /* 83 * TCP protocol interface to socket abstraction. 84 */ 85 extern char *tcpstates[]; /* XXX ??? */ 86 87 static int tcp_attach(struct socket *); 88 static int tcp_connect(struct tcpcb *, struct sockaddr *, 89 struct thread *td); 90 #ifdef INET6 91 static int tcp6_connect(struct tcpcb *, struct sockaddr *, 92 struct thread *td); 93 #endif /* INET6 */ 94 static void tcp_disconnect(struct tcpcb *); 95 static void tcp_usrclosed(struct tcpcb *); 96 static void tcp_fill_info(struct tcpcb *, struct tcp_info *); 97 98 #ifdef TCPDEBUG 99 #define TCPDEBUG0 int ostate = 0 100 #define TCPDEBUG1() ostate = tp ? tp->t_state : 0 101 #define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ 102 tcp_trace(TA_USER, ostate, tp, 0, 0, req) 103 #else 104 #define TCPDEBUG0 105 #define TCPDEBUG1() 106 #define TCPDEBUG2(req) 107 #endif 108 109 /* 110 * TCP attaches to socket via pru_attach(), reserving space, 111 * and an internet control block. 112 */ 113 static int 114 tcp_usr_attach(struct socket *so, int proto, struct thread *td) 115 { 116 struct inpcb *inp; 117 struct tcpcb *tp = NULL; 118 int error; 119 TCPDEBUG0; 120 121 inp = sotoinpcb(so); 122 KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL")); 123 INP_INFO_WLOCK(&tcbinfo); 124 TCPDEBUG1(); 125 126 error = tcp_attach(so); 127 if (error) 128 goto out; 129 130 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 131 so->so_linger = TCP_LINGERTIME; 132 133 inp = sotoinpcb(so); 134 tp = intotcpcb(inp); 135 out: 136 TCPDEBUG2(PRU_ATTACH); 137 INP_INFO_WUNLOCK(&tcbinfo); 138 return error; 139 } 140 141 /* 142 * pru_detach() detaches the TCP protocol from the socket. 143 * If the protocol state is non-embryonic, then can't 144 * do this directly: have to initiate a pru_disconnect(), 145 * which may finish later; embryonic TCB's can just 146 * be discarded here. 147 */ 148 static void 149 tcp_usr_detach(struct socket *so) 150 { 151 struct inpcb *inp; 152 struct tcpcb *tp; 153 #ifdef INET6 154 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0; 155 #endif 156 TCPDEBUG0; 157 158 inp = sotoinpcb(so); 159 KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL")); 160 INP_INFO_WLOCK(&tcbinfo); 161 INP_LOCK(inp); 162 KASSERT(inp->inp_socket != NULL, 163 ("tcp_usr_detach: inp_socket == NULL")); 164 TCPDEBUG1(); 165 166 /* 167 * First, if we still have full TCP state, and we're not dropped, 168 * initiate a disconnect. 169 */ 170 if (!(inp->inp_vflag & INP_TIMEWAIT) && 171 !(inp->inp_vflag & INP_DROPPED)) { 172 tp = intotcpcb(inp); 173 tcp_disconnect(tp); 174 } 175 176 /* 177 * Second, release any protocol state that we can reasonably release. 178 * Note that the call to tcp_disconnect() may actually have changed 179 * the TCP state, so we have to re-evaluate INP_TIMEWAIT and 180 * INP_DROPPED. 181 */ 182 if (inp->inp_vflag & INP_TIMEWAIT) { 183 if (inp->inp_vflag & INP_DROPPED) { 184 /* 185 * Connection was in time wait and has been dropped; 186 * the calling path is either via tcp_twclose(), or 187 * as a result of an eventual soclose() after 188 * tcp_twclose() has been called. In either case, 189 * tcp_twclose() has detached the tcptw from the 190 * inpcb, so we just detach and free the inpcb. 191 * 192 * XXXRW: Would it be cleaner to free the tcptw 193 * here? 194 */ 195 #ifdef INET6 196 if (isipv6) { 197 in6_pcbdetach(inp); 198 in6_pcbfree(inp); 199 } else { 200 #endif 201 in_pcbdetach(inp); 202 in_pcbfree(inp); 203 #ifdef INET6 204 } 205 #endif 206 } else { 207 /* 208 * Connection is in time wait and has not yet been 209 * dropped; allow the socket to be discarded, but 210 * need to keep inpcb until end of time wait. 211 */ 212 #ifdef INET6 213 if (isipv6) 214 in6_pcbdetach(inp); 215 else 216 #endif 217 in_pcbdetach(inp); 218 INP_UNLOCK(inp); 219 } 220 } else { 221 /* 222 * If not in timewait, there are two possible paths. First, 223 * the TCP connection is either embryonic or done, in which 224 * case we tear down all state. Second, it may still be 225 * active, in which case we acquire a reference to the socket 226 * and will free it later when TCP is done. 227 */ 228 tp = intotcpcb(inp); 229 if (inp->inp_vflag & INP_DROPPED || 230 tp->t_state < TCPS_SYN_SENT) { 231 tcp_discardcb(tp); 232 #ifdef INET6 233 if (isipv6) { 234 in_pcbdetach(inp); 235 in_pcbfree(inp); 236 } else { 237 #endif 238 in_pcbdetach(inp); 239 in_pcbfree(inp); 240 #ifdef INET6 241 } 242 #endif 243 } else { 244 SOCK_LOCK(so); 245 so->so_state |= SS_PROTOREF; 246 SOCK_UNLOCK(so); 247 inp->inp_vflag |= INP_SOCKREF; 248 INP_UNLOCK(inp); 249 } 250 } 251 tp = NULL; 252 TCPDEBUG2(PRU_DETACH); 253 INP_INFO_WUNLOCK(&tcbinfo); 254 } 255 256 /* 257 * Give the socket an address. 258 */ 259 static int 260 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 261 { 262 int error = 0; 263 struct inpcb *inp; 264 struct tcpcb *tp = NULL; 265 struct sockaddr_in *sinp; 266 267 sinp = (struct sockaddr_in *)nam; 268 if (nam->sa_len != sizeof (*sinp)) 269 return (EINVAL); 270 /* 271 * Must check for multicast addresses and disallow binding 272 * to them. 273 */ 274 if (sinp->sin_family == AF_INET && 275 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) 276 return (EAFNOSUPPORT); 277 278 TCPDEBUG0; 279 INP_INFO_WLOCK(&tcbinfo); 280 inp = sotoinpcb(so); 281 KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL")); 282 INP_LOCK(inp); 283 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 284 error = EINVAL; 285 goto out; 286 } 287 tp = intotcpcb(inp); 288 TCPDEBUG1(); 289 error = in_pcbbind(inp, nam, td->td_ucred); 290 out: 291 TCPDEBUG2(PRU_BIND); 292 INP_UNLOCK(inp); 293 INP_INFO_WUNLOCK(&tcbinfo); 294 295 return (error); 296 } 297 298 #ifdef INET6 299 static int 300 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 301 { 302 int error = 0; 303 struct inpcb *inp; 304 struct tcpcb *tp = NULL; 305 struct sockaddr_in6 *sin6p; 306 307 sin6p = (struct sockaddr_in6 *)nam; 308 if (nam->sa_len != sizeof (*sin6p)) 309 return (EINVAL); 310 /* 311 * Must check for multicast addresses and disallow binding 312 * to them. 313 */ 314 if (sin6p->sin6_family == AF_INET6 && 315 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) 316 return (EAFNOSUPPORT); 317 318 TCPDEBUG0; 319 INP_INFO_WLOCK(&tcbinfo); 320 inp = sotoinpcb(so); 321 KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL")); 322 INP_LOCK(inp); 323 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 324 error = EINVAL; 325 goto out; 326 } 327 tp = intotcpcb(inp); 328 TCPDEBUG1(); 329 inp->inp_vflag &= ~INP_IPV4; 330 inp->inp_vflag |= INP_IPV6; 331 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { 332 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) 333 inp->inp_vflag |= INP_IPV4; 334 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 335 struct sockaddr_in sin; 336 337 in6_sin6_2_sin(&sin, sin6p); 338 inp->inp_vflag |= INP_IPV4; 339 inp->inp_vflag &= ~INP_IPV6; 340 error = in_pcbbind(inp, (struct sockaddr *)&sin, 341 td->td_ucred); 342 goto out; 343 } 344 } 345 error = in6_pcbbind(inp, nam, td->td_ucred); 346 out: 347 TCPDEBUG2(PRU_BIND); 348 INP_UNLOCK(inp); 349 INP_INFO_WUNLOCK(&tcbinfo); 350 return (error); 351 } 352 #endif /* INET6 */ 353 354 /* 355 * Prepare to accept connections. 356 */ 357 static int 358 tcp_usr_listen(struct socket *so, int backlog, struct thread *td) 359 { 360 int error = 0; 361 struct inpcb *inp; 362 struct tcpcb *tp = NULL; 363 364 TCPDEBUG0; 365 INP_INFO_WLOCK(&tcbinfo); 366 inp = sotoinpcb(so); 367 KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL")); 368 INP_LOCK(inp); 369 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 370 error = EINVAL; 371 goto out; 372 } 373 tp = intotcpcb(inp); 374 TCPDEBUG1(); 375 SOCK_LOCK(so); 376 error = solisten_proto_check(so); 377 if (error == 0 && inp->inp_lport == 0) 378 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 379 if (error == 0) { 380 tp->t_state = TCPS_LISTEN; 381 solisten_proto(so, backlog); 382 } 383 SOCK_UNLOCK(so); 384 385 out: 386 TCPDEBUG2(PRU_LISTEN); 387 INP_UNLOCK(inp); 388 INP_INFO_WUNLOCK(&tcbinfo); 389 return (error); 390 } 391 392 #ifdef INET6 393 static int 394 tcp6_usr_listen(struct socket *so, int backlog, struct thread *td) 395 { 396 int error = 0; 397 struct inpcb *inp; 398 struct tcpcb *tp = NULL; 399 400 TCPDEBUG0; 401 INP_INFO_WLOCK(&tcbinfo); 402 inp = sotoinpcb(so); 403 KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL")); 404 INP_LOCK(inp); 405 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 406 error = EINVAL; 407 goto out; 408 } 409 tp = intotcpcb(inp); 410 TCPDEBUG1(); 411 SOCK_LOCK(so); 412 error = solisten_proto_check(so); 413 if (error == 0 && inp->inp_lport == 0) { 414 inp->inp_vflag &= ~INP_IPV4; 415 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) 416 inp->inp_vflag |= INP_IPV4; 417 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 418 } 419 if (error == 0) { 420 tp->t_state = TCPS_LISTEN; 421 solisten_proto(so, backlog); 422 } 423 SOCK_UNLOCK(so); 424 425 out: 426 TCPDEBUG2(PRU_LISTEN); 427 INP_UNLOCK(inp); 428 INP_INFO_WUNLOCK(&tcbinfo); 429 return (error); 430 } 431 #endif /* INET6 */ 432 433 /* 434 * Initiate connection to peer. 435 * Create a template for use in transmissions on this connection. 436 * Enter SYN_SENT state, and mark socket as connecting. 437 * Start keep-alive timer, and seed output sequence space. 438 * Send initial segment on connection. 439 */ 440 static int 441 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 442 { 443 int error = 0; 444 struct inpcb *inp; 445 struct tcpcb *tp = NULL; 446 struct sockaddr_in *sinp; 447 448 sinp = (struct sockaddr_in *)nam; 449 if (nam->sa_len != sizeof (*sinp)) 450 return (EINVAL); 451 /* 452 * Must disallow TCP ``connections'' to multicast addresses. 453 */ 454 if (sinp->sin_family == AF_INET 455 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) 456 return (EAFNOSUPPORT); 457 if (jailed(td->td_ucred)) 458 prison_remote_ip(td->td_ucred, 0, &sinp->sin_addr.s_addr); 459 460 TCPDEBUG0; 461 INP_INFO_WLOCK(&tcbinfo); 462 inp = sotoinpcb(so); 463 KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL")); 464 INP_LOCK(inp); 465 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 466 error = EINVAL; 467 goto out; 468 } 469 tp = intotcpcb(inp); 470 TCPDEBUG1(); 471 if ((error = tcp_connect(tp, nam, td)) != 0) 472 goto out; 473 error = tcp_output(tp); 474 out: 475 TCPDEBUG2(PRU_CONNECT); 476 INP_UNLOCK(inp); 477 INP_INFO_WUNLOCK(&tcbinfo); 478 return (error); 479 } 480 481 #ifdef INET6 482 static int 483 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 484 { 485 int error = 0; 486 struct inpcb *inp; 487 struct tcpcb *tp = NULL; 488 struct sockaddr_in6 *sin6p; 489 490 TCPDEBUG0; 491 492 sin6p = (struct sockaddr_in6 *)nam; 493 if (nam->sa_len != sizeof (*sin6p)) 494 return (EINVAL); 495 /* 496 * Must disallow TCP ``connections'' to multicast addresses. 497 */ 498 if (sin6p->sin6_family == AF_INET6 499 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) 500 return (EAFNOSUPPORT); 501 502 INP_INFO_WLOCK(&tcbinfo); 503 inp = sotoinpcb(so); 504 KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL")); 505 INP_LOCK(inp); 506 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 507 error = EINVAL; 508 goto out; 509 } 510 tp = intotcpcb(inp); 511 TCPDEBUG1(); 512 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 513 struct sockaddr_in sin; 514 515 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { 516 error = EINVAL; 517 goto out; 518 } 519 520 in6_sin6_2_sin(&sin, sin6p); 521 inp->inp_vflag |= INP_IPV4; 522 inp->inp_vflag &= ~INP_IPV6; 523 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0) 524 goto out; 525 error = tcp_output(tp); 526 goto out; 527 } 528 inp->inp_vflag &= ~INP_IPV4; 529 inp->inp_vflag |= INP_IPV6; 530 inp->inp_inc.inc_isipv6 = 1; 531 if ((error = tcp6_connect(tp, nam, td)) != 0) 532 goto out; 533 error = tcp_output(tp); 534 535 out: 536 TCPDEBUG2(PRU_CONNECT); 537 INP_UNLOCK(inp); 538 INP_INFO_WUNLOCK(&tcbinfo); 539 return (error); 540 } 541 #endif /* INET6 */ 542 543 /* 544 * Initiate disconnect from peer. 545 * If connection never passed embryonic stage, just drop; 546 * else if don't need to let data drain, then can just drop anyways, 547 * else have to begin TCP shutdown process: mark socket disconnecting, 548 * drain unread data, state switch to reflect user close, and 549 * send segment (e.g. FIN) to peer. Socket will be really disconnected 550 * when peer sends FIN and acks ours. 551 * 552 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 553 */ 554 static int 555 tcp_usr_disconnect(struct socket *so) 556 { 557 struct inpcb *inp; 558 struct tcpcb *tp = NULL; 559 int error = 0; 560 561 TCPDEBUG0; 562 INP_INFO_WLOCK(&tcbinfo); 563 inp = sotoinpcb(so); 564 KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL")); 565 INP_LOCK(inp); 566 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 567 error = EINVAL; 568 goto out; 569 } 570 tp = intotcpcb(inp); 571 TCPDEBUG1(); 572 tcp_disconnect(tp); 573 out: 574 TCPDEBUG2(PRU_DISCONNECT); 575 INP_UNLOCK(inp); 576 INP_INFO_WUNLOCK(&tcbinfo); 577 return (error); 578 } 579 580 /* 581 * Accept a connection. Essentially all the work is 582 * done at higher levels; just return the address 583 * of the peer, storing through addr. 584 */ 585 static int 586 tcp_usr_accept(struct socket *so, struct sockaddr **nam) 587 { 588 int error = 0; 589 struct inpcb *inp = NULL; 590 struct tcpcb *tp = NULL; 591 struct in_addr addr; 592 in_port_t port = 0; 593 TCPDEBUG0; 594 595 if (so->so_state & SS_ISDISCONNECTED) 596 return (ECONNABORTED); 597 598 inp = sotoinpcb(so); 599 KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL")); 600 INP_LOCK(inp); 601 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 602 error = ECONNABORTED; 603 goto out; 604 } 605 tp = intotcpcb(inp); 606 TCPDEBUG1(); 607 608 /* 609 * We inline in_setpeeraddr and COMMON_END here, so that we can 610 * copy the data of interest and defer the malloc until after we 611 * release the lock. 612 */ 613 port = inp->inp_fport; 614 addr = inp->inp_faddr; 615 616 out: 617 TCPDEBUG2(PRU_ACCEPT); 618 INP_UNLOCK(inp); 619 if (error == 0) 620 *nam = in_sockaddr(port, &addr); 621 return error; 622 } 623 624 #ifdef INET6 625 static int 626 tcp6_usr_accept(struct socket *so, struct sockaddr **nam) 627 { 628 struct inpcb *inp = NULL; 629 int error = 0; 630 struct tcpcb *tp = NULL; 631 struct in_addr addr; 632 struct in6_addr addr6; 633 in_port_t port = 0; 634 int v4 = 0; 635 TCPDEBUG0; 636 637 if (so->so_state & SS_ISDISCONNECTED) { 638 error = ECONNABORTED; 639 goto out; 640 } 641 642 inp = sotoinpcb(so); 643 KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL")); 644 INP_LOCK(inp); 645 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 646 error = EINVAL; 647 goto out; 648 } 649 tp = intotcpcb(inp); 650 TCPDEBUG1(); 651 652 /* 653 * We inline in6_mapped_peeraddr and COMMON_END here, so that we can 654 * copy the data of interest and defer the malloc until after we 655 * release the lock. 656 */ 657 if (inp->inp_vflag & INP_IPV4) { 658 v4 = 1; 659 port = inp->inp_fport; 660 addr = inp->inp_faddr; 661 } else { 662 port = inp->inp_fport; 663 addr6 = inp->in6p_faddr; 664 } 665 666 out: 667 TCPDEBUG2(PRU_ACCEPT); 668 INP_UNLOCK(inp); 669 if (error == 0) { 670 if (v4) 671 *nam = in6_v4mapsin6_sockaddr(port, &addr); 672 else 673 *nam = in6_sockaddr(port, &addr6); 674 } 675 return error; 676 } 677 #endif /* INET6 */ 678 679 /* 680 * This is the wrapper function for in_setsockaddr. We just pass down 681 * the pcbinfo for in_setsockaddr to lock. We don't want to do the locking 682 * here because in_setsockaddr will call malloc and can block. 683 */ 684 static int 685 tcp_sockaddr(struct socket *so, struct sockaddr **nam) 686 { 687 return (in_setsockaddr(so, nam, &tcbinfo)); 688 } 689 690 /* 691 * This is the wrapper function for in_setpeeraddr. We just pass down 692 * the pcbinfo for in_setpeeraddr to lock. 693 */ 694 static int 695 tcp_peeraddr(struct socket *so, struct sockaddr **nam) 696 { 697 return (in_setpeeraddr(so, nam, &tcbinfo)); 698 } 699 700 /* 701 * Mark the connection as being incapable of further output. 702 */ 703 static int 704 tcp_usr_shutdown(struct socket *so) 705 { 706 int error = 0; 707 struct inpcb *inp; 708 struct tcpcb *tp = NULL; 709 710 TCPDEBUG0; 711 INP_INFO_WLOCK(&tcbinfo); 712 inp = sotoinpcb(so); 713 KASSERT(inp != NULL, ("inp == NULL")); 714 INP_LOCK(inp); 715 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 716 error = EINVAL; 717 goto out; 718 } 719 tp = intotcpcb(inp); 720 TCPDEBUG1(); 721 socantsendmore(so); 722 tcp_usrclosed(tp); 723 error = tcp_output(tp); 724 725 out: 726 TCPDEBUG2(PRU_SHUTDOWN); 727 INP_UNLOCK(inp); 728 INP_INFO_WUNLOCK(&tcbinfo); 729 730 return (error); 731 } 732 733 /* 734 * After a receive, possibly send window update to peer. 735 */ 736 static int 737 tcp_usr_rcvd(struct socket *so, int flags) 738 { 739 struct inpcb *inp; 740 struct tcpcb *tp = NULL; 741 int error = 0; 742 743 TCPDEBUG0; 744 inp = sotoinpcb(so); 745 KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL")); 746 INP_LOCK(inp); 747 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 748 error = EINVAL; 749 goto out; 750 } 751 tp = intotcpcb(inp); 752 TCPDEBUG1(); 753 tcp_output(tp); 754 755 out: 756 TCPDEBUG2(PRU_RCVD); 757 INP_UNLOCK(inp); 758 return (error); 759 } 760 761 /* 762 * Do a send by putting data in output queue and updating urgent 763 * marker if URG set. Possibly send more data. Unlike the other 764 * pru_*() routines, the mbuf chains are our responsibility. We 765 * must either enqueue them or free them. The other pru_* routines 766 * generally are caller-frees. 767 */ 768 static int 769 tcp_usr_send(struct socket *so, int flags, struct mbuf *m, 770 struct sockaddr *nam, struct mbuf *control, struct thread *td) 771 { 772 int error = 0; 773 struct inpcb *inp; 774 struct tcpcb *tp = NULL; 775 int headlocked = 0; 776 #ifdef INET6 777 int isipv6; 778 #endif 779 TCPDEBUG0; 780 781 /* 782 * We require the pcbinfo lock in two cases: 783 * 784 * (1) An implied connect is taking place, which can result in 785 * binding IPs and ports and hence modification of the pcb hash 786 * chains. 787 * 788 * (2) PRUS_EOF is set, resulting in explicit close on the send. 789 */ 790 if ((nam != NULL) || (flags & PRUS_EOF)) { 791 INP_INFO_WLOCK(&tcbinfo); 792 headlocked = 1; 793 } 794 inp = sotoinpcb(so); 795 KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL")); 796 INP_LOCK(inp); 797 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 798 error = EINVAL; 799 goto out; 800 } 801 #ifdef INET6 802 isipv6 = nam && nam->sa_family == AF_INET6; 803 #endif /* INET6 */ 804 tp = intotcpcb(inp); 805 TCPDEBUG1(); 806 if (control) { 807 /* TCP doesn't do control messages (rights, creds, etc) */ 808 if (control->m_len) { 809 m_freem(control); 810 if (m) 811 m_freem(m); 812 error = EINVAL; 813 goto out; 814 } 815 m_freem(control); /* empty control, just free it */ 816 } 817 if (!(flags & PRUS_OOB)) { 818 sbappendstream(&so->so_snd, m); 819 if (nam && tp->t_state < TCPS_SYN_SENT) { 820 /* 821 * Do implied connect if not yet connected, 822 * initialize window to default value, and 823 * initialize maxseg/maxopd using peer's cached 824 * MSS. 825 */ 826 INP_INFO_WLOCK_ASSERT(&tcbinfo); 827 #ifdef INET6 828 if (isipv6) 829 error = tcp6_connect(tp, nam, td); 830 else 831 #endif /* INET6 */ 832 error = tcp_connect(tp, nam, td); 833 if (error) 834 goto out; 835 tp->snd_wnd = TTCP_CLIENT_SND_WND; 836 tcp_mss(tp, -1); 837 } 838 if (flags & PRUS_EOF) { 839 /* 840 * Close the send side of the connection after 841 * the data is sent. 842 */ 843 INP_INFO_WLOCK_ASSERT(&tcbinfo); 844 socantsendmore(so); 845 tcp_usrclosed(tp); 846 } 847 if (headlocked) { 848 INP_INFO_WUNLOCK(&tcbinfo); 849 headlocked = 0; 850 } 851 if (tp != NULL) { 852 if (flags & PRUS_MORETOCOME) 853 tp->t_flags |= TF_MORETOCOME; 854 error = tcp_output(tp); 855 if (flags & PRUS_MORETOCOME) 856 tp->t_flags &= ~TF_MORETOCOME; 857 } 858 } else { 859 /* 860 * XXXRW: PRUS_EOF not implemented with PRUS_OOB? 861 */ 862 SOCKBUF_LOCK(&so->so_snd); 863 if (sbspace(&so->so_snd) < -512) { 864 SOCKBUF_UNLOCK(&so->so_snd); 865 m_freem(m); 866 error = ENOBUFS; 867 goto out; 868 } 869 /* 870 * According to RFC961 (Assigned Protocols), 871 * the urgent pointer points to the last octet 872 * of urgent data. We continue, however, 873 * to consider it to indicate the first octet 874 * of data past the urgent section. 875 * Otherwise, snd_up should be one lower. 876 */ 877 sbappendstream_locked(&so->so_snd, m); 878 SOCKBUF_UNLOCK(&so->so_snd); 879 if (nam && tp->t_state < TCPS_SYN_SENT) { 880 /* 881 * Do implied connect if not yet connected, 882 * initialize window to default value, and 883 * initialize maxseg/maxopd using peer's cached 884 * MSS. 885 */ 886 INP_INFO_WLOCK_ASSERT(&tcbinfo); 887 #ifdef INET6 888 if (isipv6) 889 error = tcp6_connect(tp, nam, td); 890 else 891 #endif /* INET6 */ 892 error = tcp_connect(tp, nam, td); 893 if (error) 894 goto out; 895 tp->snd_wnd = TTCP_CLIENT_SND_WND; 896 tcp_mss(tp, -1); 897 INP_INFO_WUNLOCK(&tcbinfo); 898 headlocked = 0; 899 } else if (nam) { 900 INP_INFO_WUNLOCK(&tcbinfo); 901 headlocked = 0; 902 } 903 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 904 tp->t_flags |= TF_FORCEDATA; 905 error = tcp_output(tp); 906 tp->t_flags &= ~TF_FORCEDATA; 907 } 908 out: 909 TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB : 910 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); 911 INP_UNLOCK(inp); 912 if (headlocked) 913 INP_INFO_WUNLOCK(&tcbinfo); 914 return (error); 915 } 916 917 /* 918 * Abort the TCP. 919 */ 920 static void 921 tcp_usr_abort(struct socket *so) 922 { 923 #if 0 924 struct inpcb *inp; 925 struct tcpcb *tp; 926 #endif 927 928 /* 929 * XXXRW: This is not really quite the same, as we want to tcp_drop() 930 * rather than tcp_disconnect(), I think, but for now I'll avoid 931 * replicating all the tear-down logic here. 932 */ 933 tcp_usr_detach(so); 934 935 #if 0 936 TCPDEBUG0; 937 INP_INFO_WLOCK(&tcbinfo); 938 inp = sotoinpcb(so); 939 INP_LOCK(inp); 940 /* 941 * Do we need to handle timewait here? Aborted connections should 942 * never generate a FIN? 943 */ 944 KASSERT((inp->inp_vflag & INP_TIMEWAIT) == 0, 945 ("tcp_usr_abort: timewait")); 946 tp = intotcpcb(inp); 947 TCPDEBUG1(); 948 tp = tcp_drop(tp, ECONNABORTED); 949 TCPDEBUG2(PRU_ABORT); 950 if (tp != NULL) 951 INP_UNLOCK(inp); 952 INP_INFO_WUNLOCK(&tcbinfo); 953 #endif 954 } 955 956 /* 957 * Receive out-of-band data. 958 */ 959 static int 960 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) 961 { 962 int error = 0; 963 struct inpcb *inp; 964 struct tcpcb *tp = NULL; 965 966 TCPDEBUG0; 967 inp = sotoinpcb(so); 968 KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL")); 969 INP_LOCK(inp); 970 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 971 error = EINVAL; 972 goto out; 973 } 974 tp = intotcpcb(inp); 975 TCPDEBUG1(); 976 if ((so->so_oobmark == 0 && 977 (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) || 978 so->so_options & SO_OOBINLINE || 979 tp->t_oobflags & TCPOOB_HADDATA) { 980 error = EINVAL; 981 goto out; 982 } 983 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 984 error = EWOULDBLOCK; 985 goto out; 986 } 987 m->m_len = 1; 988 *mtod(m, caddr_t) = tp->t_iobc; 989 if ((flags & MSG_PEEK) == 0) 990 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 991 992 out: 993 TCPDEBUG2(PRU_RCVOOB); 994 INP_UNLOCK(inp); 995 return (error); 996 } 997 998 struct pr_usrreqs tcp_usrreqs = { 999 .pru_abort = tcp_usr_abort, 1000 .pru_accept = tcp_usr_accept, 1001 .pru_attach = tcp_usr_attach, 1002 .pru_bind = tcp_usr_bind, 1003 .pru_connect = tcp_usr_connect, 1004 .pru_control = in_control, 1005 .pru_detach = tcp_usr_detach, 1006 .pru_disconnect = tcp_usr_disconnect, 1007 .pru_listen = tcp_usr_listen, 1008 .pru_peeraddr = tcp_peeraddr, 1009 .pru_rcvd = tcp_usr_rcvd, 1010 .pru_rcvoob = tcp_usr_rcvoob, 1011 .pru_send = tcp_usr_send, 1012 .pru_shutdown = tcp_usr_shutdown, 1013 .pru_sockaddr = tcp_sockaddr, 1014 .pru_sosetlabel = in_pcbsosetlabel 1015 }; 1016 1017 #ifdef INET6 1018 struct pr_usrreqs tcp6_usrreqs = { 1019 .pru_abort = tcp_usr_abort, 1020 .pru_accept = tcp6_usr_accept, 1021 .pru_attach = tcp_usr_attach, 1022 .pru_bind = tcp6_usr_bind, 1023 .pru_connect = tcp6_usr_connect, 1024 .pru_control = in6_control, 1025 .pru_detach = tcp_usr_detach, 1026 .pru_disconnect = tcp_usr_disconnect, 1027 .pru_listen = tcp6_usr_listen, 1028 .pru_peeraddr = in6_mapped_peeraddr, 1029 .pru_rcvd = tcp_usr_rcvd, 1030 .pru_rcvoob = tcp_usr_rcvoob, 1031 .pru_send = tcp_usr_send, 1032 .pru_shutdown = tcp_usr_shutdown, 1033 .pru_sockaddr = in6_mapped_sockaddr, 1034 .pru_sosetlabel = in_pcbsosetlabel 1035 }; 1036 #endif /* INET6 */ 1037 1038 /* 1039 * Common subroutine to open a TCP connection to remote host specified 1040 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local 1041 * port number if needed. Call in_pcbconnect_setup to do the routing and 1042 * to choose a local host address (interface). If there is an existing 1043 * incarnation of the same connection in TIME-WAIT state and if the remote 1044 * host was sending CC options and if the connection duration was < MSL, then 1045 * truncate the previous TIME-WAIT state and proceed. 1046 * Initialize connection parameters and enter SYN-SENT state. 1047 */ 1048 static int 1049 tcp_connect(tp, nam, td) 1050 register struct tcpcb *tp; 1051 struct sockaddr *nam; 1052 struct thread *td; 1053 { 1054 struct inpcb *inp = tp->t_inpcb, *oinp; 1055 struct socket *so = inp->inp_socket; 1056 struct in_addr laddr; 1057 u_short lport; 1058 int error; 1059 1060 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1061 INP_LOCK_ASSERT(inp); 1062 1063 if (inp->inp_lport == 0) { 1064 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 1065 if (error) 1066 return error; 1067 } 1068 1069 /* 1070 * Cannot simply call in_pcbconnect, because there might be an 1071 * earlier incarnation of this same connection still in 1072 * TIME_WAIT state, creating an ADDRINUSE error. 1073 */ 1074 laddr = inp->inp_laddr; 1075 lport = inp->inp_lport; 1076 error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport, 1077 &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred); 1078 if (error && oinp == NULL) 1079 return error; 1080 if (oinp) 1081 return EADDRINUSE; 1082 inp->inp_laddr = laddr; 1083 in_pcbrehash(inp); 1084 1085 /* Compute window scaling to request. */ 1086 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 1087 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 1088 tp->request_r_scale++; 1089 1090 soisconnecting(so); 1091 tcpstat.tcps_connattempt++; 1092 tp->t_state = TCPS_SYN_SENT; 1093 callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); 1094 tp->iss = tcp_new_isn(tp); 1095 tp->t_bw_rtseq = tp->iss; 1096 tcp_sendseqinit(tp); 1097 1098 return 0; 1099 } 1100 1101 #ifdef INET6 1102 static int 1103 tcp6_connect(tp, nam, td) 1104 register struct tcpcb *tp; 1105 struct sockaddr *nam; 1106 struct thread *td; 1107 { 1108 struct inpcb *inp = tp->t_inpcb, *oinp; 1109 struct socket *so = inp->inp_socket; 1110 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 1111 struct in6_addr *addr6; 1112 int error; 1113 1114 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1115 INP_LOCK_ASSERT(inp); 1116 1117 if (inp->inp_lport == 0) { 1118 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 1119 if (error) 1120 return error; 1121 } 1122 1123 /* 1124 * Cannot simply call in_pcbconnect, because there might be an 1125 * earlier incarnation of this same connection still in 1126 * TIME_WAIT state, creating an ADDRINUSE error. 1127 * in6_pcbladdr() also handles scope zone IDs. 1128 */ 1129 error = in6_pcbladdr(inp, nam, &addr6); 1130 if (error) 1131 return error; 1132 oinp = in6_pcblookup_hash(inp->inp_pcbinfo, 1133 &sin6->sin6_addr, sin6->sin6_port, 1134 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) 1135 ? addr6 1136 : &inp->in6p_laddr, 1137 inp->inp_lport, 0, NULL); 1138 if (oinp) 1139 return EADDRINUSE; 1140 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) 1141 inp->in6p_laddr = *addr6; 1142 inp->in6p_faddr = sin6->sin6_addr; 1143 inp->inp_fport = sin6->sin6_port; 1144 /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ 1145 inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; 1146 if (inp->in6p_flags & IN6P_AUTOFLOWLABEL) 1147 inp->in6p_flowinfo |= 1148 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); 1149 in_pcbrehash(inp); 1150 1151 /* Compute window scaling to request. */ 1152 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 1153 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 1154 tp->request_r_scale++; 1155 1156 soisconnecting(so); 1157 tcpstat.tcps_connattempt++; 1158 tp->t_state = TCPS_SYN_SENT; 1159 callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); 1160 tp->iss = tcp_new_isn(tp); 1161 tp->t_bw_rtseq = tp->iss; 1162 tcp_sendseqinit(tp); 1163 1164 return 0; 1165 } 1166 #endif /* INET6 */ 1167 1168 /* 1169 * Export TCP internal state information via a struct tcp_info, based on the 1170 * Linux 2.6 API. Not ABI compatible as our constants are mapped differently 1171 * (TCP state machine, etc). We export all information using FreeBSD-native 1172 * constants -- for example, the numeric values for tcpi_state will differ 1173 * from Linux. 1174 */ 1175 static void 1176 tcp_fill_info(tp, ti) 1177 struct tcpcb *tp; 1178 struct tcp_info *ti; 1179 { 1180 1181 INP_LOCK_ASSERT(tp->t_inpcb); 1182 bzero(ti, sizeof(*ti)); 1183 1184 ti->tcpi_state = tp->t_state; 1185 if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) 1186 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; 1187 if (tp->sack_enable) 1188 ti->tcpi_options |= TCPI_OPT_SACK; 1189 if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { 1190 ti->tcpi_options |= TCPI_OPT_WSCALE; 1191 ti->tcpi_snd_wscale = tp->snd_scale; 1192 ti->tcpi_rcv_wscale = tp->rcv_scale; 1193 } 1194 ti->tcpi_snd_ssthresh = tp->snd_ssthresh; 1195 ti->tcpi_snd_cwnd = tp->snd_cwnd; 1196 1197 /* 1198 * FreeBSD-specific extension fields for tcp_info. 1199 */ 1200 ti->tcpi_rcv_space = tp->rcv_wnd; 1201 ti->tcpi_snd_wnd = tp->snd_wnd; 1202 ti->tcpi_snd_bwnd = tp->snd_bwnd; 1203 } 1204 1205 /* 1206 * The new sockopt interface makes it possible for us to block in the 1207 * copyin/out step (if we take a page fault). Taking a page fault at 1208 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now 1209 * use TSM, there probably isn't any need for this function to run at 1210 * splnet() any more. This needs more examination.) 1211 * 1212 * XXXRW: The locking here is wrong; we may take a page fault while holding 1213 * the inpcb lock. 1214 */ 1215 int 1216 tcp_ctloutput(so, sopt) 1217 struct socket *so; 1218 struct sockopt *sopt; 1219 { 1220 int error, opt, optval; 1221 struct inpcb *inp; 1222 struct tcpcb *tp; 1223 struct tcp_info ti; 1224 1225 error = 0; 1226 inp = sotoinpcb(so); 1227 KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL")); 1228 INP_LOCK(inp); 1229 if (sopt->sopt_level != IPPROTO_TCP) { 1230 INP_UNLOCK(inp); 1231 #ifdef INET6 1232 if (INP_CHECK_SOCKAF(so, AF_INET6)) 1233 error = ip6_ctloutput(so, sopt); 1234 else 1235 #endif /* INET6 */ 1236 error = ip_ctloutput(so, sopt); 1237 return (error); 1238 } 1239 if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { 1240 error = ECONNRESET; 1241 goto out; 1242 } 1243 tp = intotcpcb(inp); 1244 1245 switch (sopt->sopt_dir) { 1246 case SOPT_SET: 1247 switch (sopt->sopt_name) { 1248 #ifdef TCP_SIGNATURE 1249 case TCP_MD5SIG: 1250 error = sooptcopyin(sopt, &optval, sizeof optval, 1251 sizeof optval); 1252 if (error) 1253 break; 1254 1255 if (optval > 0) 1256 tp->t_flags |= TF_SIGNATURE; 1257 else 1258 tp->t_flags &= ~TF_SIGNATURE; 1259 break; 1260 #endif /* TCP_SIGNATURE */ 1261 case TCP_NODELAY: 1262 case TCP_NOOPT: 1263 error = sooptcopyin(sopt, &optval, sizeof optval, 1264 sizeof optval); 1265 if (error) 1266 break; 1267 1268 switch (sopt->sopt_name) { 1269 case TCP_NODELAY: 1270 opt = TF_NODELAY; 1271 break; 1272 case TCP_NOOPT: 1273 opt = TF_NOOPT; 1274 break; 1275 default: 1276 opt = 0; /* dead code to fool gcc */ 1277 break; 1278 } 1279 1280 if (optval) 1281 tp->t_flags |= opt; 1282 else 1283 tp->t_flags &= ~opt; 1284 break; 1285 1286 case TCP_NOPUSH: 1287 error = sooptcopyin(sopt, &optval, sizeof optval, 1288 sizeof optval); 1289 if (error) 1290 break; 1291 1292 if (optval) 1293 tp->t_flags |= TF_NOPUSH; 1294 else { 1295 tp->t_flags &= ~TF_NOPUSH; 1296 error = tcp_output(tp); 1297 } 1298 break; 1299 1300 case TCP_MAXSEG: 1301 error = sooptcopyin(sopt, &optval, sizeof optval, 1302 sizeof optval); 1303 if (error) 1304 break; 1305 1306 if (optval > 0 && optval <= tp->t_maxseg && 1307 optval + 40 >= tcp_minmss) 1308 tp->t_maxseg = optval; 1309 else 1310 error = EINVAL; 1311 break; 1312 1313 case TCP_INFO: 1314 error = EINVAL; 1315 break; 1316 1317 default: 1318 error = ENOPROTOOPT; 1319 break; 1320 } 1321 break; 1322 1323 case SOPT_GET: 1324 switch (sopt->sopt_name) { 1325 #ifdef TCP_SIGNATURE 1326 case TCP_MD5SIG: 1327 optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 1328 error = sooptcopyout(sopt, &optval, sizeof optval); 1329 break; 1330 #endif 1331 case TCP_NODELAY: 1332 optval = tp->t_flags & TF_NODELAY; 1333 error = sooptcopyout(sopt, &optval, sizeof optval); 1334 break; 1335 case TCP_MAXSEG: 1336 optval = tp->t_maxseg; 1337 error = sooptcopyout(sopt, &optval, sizeof optval); 1338 break; 1339 case TCP_NOOPT: 1340 optval = tp->t_flags & TF_NOOPT; 1341 error = sooptcopyout(sopt, &optval, sizeof optval); 1342 break; 1343 case TCP_NOPUSH: 1344 optval = tp->t_flags & TF_NOPUSH; 1345 error = sooptcopyout(sopt, &optval, sizeof optval); 1346 break; 1347 case TCP_INFO: 1348 tcp_fill_info(tp, &ti); 1349 error = sooptcopyout(sopt, &ti, sizeof ti); 1350 break; 1351 default: 1352 error = ENOPROTOOPT; 1353 break; 1354 } 1355 break; 1356 } 1357 out: 1358 INP_UNLOCK(inp); 1359 return (error); 1360 } 1361 1362 /* 1363 * tcp_sendspace and tcp_recvspace are the default send and receive window 1364 * sizes, respectively. These are obsolescent (this information should 1365 * be set by the route). 1366 */ 1367 u_long tcp_sendspace = 1024*32; 1368 SYSCTL_ULONG(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW, 1369 &tcp_sendspace , 0, "Maximum outgoing TCP datagram size"); 1370 u_long tcp_recvspace = 1024*64; 1371 SYSCTL_ULONG(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1372 &tcp_recvspace , 0, "Maximum incoming TCP datagram size"); 1373 1374 /* 1375 * Attach TCP protocol to socket, allocating 1376 * internet protocol control block, tcp control block, 1377 * bufer space, and entering LISTEN state if to accept connections. 1378 */ 1379 static int 1380 tcp_attach(so) 1381 struct socket *so; 1382 { 1383 register struct tcpcb *tp; 1384 struct inpcb *inp; 1385 int error; 1386 #ifdef INET6 1387 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0; 1388 #endif 1389 1390 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1391 1392 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 1393 error = soreserve(so, tcp_sendspace, tcp_recvspace); 1394 if (error) 1395 return (error); 1396 } 1397 error = in_pcballoc(so, &tcbinfo, "tcpinp"); 1398 if (error) 1399 return (error); 1400 inp = sotoinpcb(so); 1401 #ifdef INET6 1402 if (isipv6) { 1403 inp->inp_vflag |= INP_IPV6; 1404 inp->in6p_hops = -1; /* use kernel default */ 1405 } 1406 else 1407 #endif 1408 inp->inp_vflag |= INP_IPV4; 1409 tp = tcp_newtcpcb(inp); 1410 if (tp == NULL) { 1411 INP_LOCK(inp); 1412 #ifdef INET6 1413 if (isipv6) { 1414 in6_pcbdetach(inp); 1415 in6_pcbfree(inp); 1416 } else { 1417 #endif 1418 in_pcbdetach(inp); 1419 in_pcbfree(inp); 1420 #ifdef INET6 1421 } 1422 #endif 1423 return (ENOBUFS); 1424 } 1425 tp->t_state = TCPS_CLOSED; 1426 return (0); 1427 } 1428 1429 /* 1430 * Initiate (or continue) disconnect. 1431 * If embryonic state, just send reset (once). 1432 * If in ``let data drain'' option and linger null, just drop. 1433 * Otherwise (hard), mark socket disconnecting and drop 1434 * current input data; switch states based on user close, and 1435 * send segment to peer (with FIN). 1436 */ 1437 static void 1438 tcp_disconnect(tp) 1439 register struct tcpcb *tp; 1440 { 1441 struct inpcb *inp = tp->t_inpcb; 1442 struct socket *so = inp->inp_socket; 1443 1444 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1445 INP_LOCK_ASSERT(inp); 1446 1447 /* 1448 * Neither tcp_close() nor tcp_drop() should return NULL, as the 1449 * socket is still open. 1450 */ 1451 if (tp->t_state < TCPS_ESTABLISHED) { 1452 tp = tcp_close(tp); 1453 KASSERT(tp != NULL, 1454 ("tcp_disconnect: tcp_close() returned NULL")); 1455 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) { 1456 tp = tcp_drop(tp, 0); 1457 KASSERT(tp != NULL, 1458 ("tcp_disconnect: tcp_drop() returned NULL")); 1459 } else { 1460 soisdisconnecting(so); 1461 sbflush(&so->so_rcv); 1462 tcp_usrclosed(tp); 1463 if (!(inp->inp_vflag & INP_DROPPED)) 1464 tcp_output(tp); 1465 } 1466 } 1467 1468 /* 1469 * User issued close, and wish to trail through shutdown states: 1470 * if never received SYN, just forget it. If got a SYN from peer, 1471 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1472 * If already got a FIN from peer, then almost done; go to LAST_ACK 1473 * state. In all other cases, have already sent FIN to peer (e.g. 1474 * after PRU_SHUTDOWN), and just have to play tedious game waiting 1475 * for peer to send FIN or not respond to keep-alives, etc. 1476 * We can let the user exit from the close as soon as the FIN is acked. 1477 */ 1478 static void 1479 tcp_usrclosed(tp) 1480 register struct tcpcb *tp; 1481 { 1482 1483 INP_INFO_WLOCK_ASSERT(&tcbinfo); 1484 INP_LOCK_ASSERT(tp->t_inpcb); 1485 1486 switch (tp->t_state) { 1487 1488 case TCPS_CLOSED: 1489 case TCPS_LISTEN: 1490 tp->t_state = TCPS_CLOSED; 1491 tp = tcp_close(tp); 1492 /* 1493 * tcp_close() should never return NULL here as the socket is 1494 * still open. 1495 */ 1496 KASSERT(tp != NULL, 1497 ("tcp_usrclosed: tcp_close() returned NULL")); 1498 break; 1499 1500 case TCPS_SYN_SENT: 1501 case TCPS_SYN_RECEIVED: 1502 tp->t_flags |= TF_NEEDFIN; 1503 break; 1504 1505 case TCPS_ESTABLISHED: 1506 tp->t_state = TCPS_FIN_WAIT_1; 1507 break; 1508 1509 case TCPS_CLOSE_WAIT: 1510 tp->t_state = TCPS_LAST_ACK; 1511 break; 1512 } 1513 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 1514 soisdisconnected(tp->t_inpcb->inp_socket); 1515 /* To prevent the connection hanging in FIN_WAIT_2 forever. */ 1516 if (tp->t_state == TCPS_FIN_WAIT_2) 1517 callout_reset(tp->tt_2msl, tcp_maxidle, 1518 tcp_timer_2msl, tp); 1519 } 1520 } 1521