1 /* 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 34 * $FreeBSD$ 35 */ 36 37 #include "opt_ipsec.h" 38 #include "opt_inet6.h" 39 #include "opt_tcpdebug.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/sysctl.h> 45 #include <sys/mbuf.h> 46 #ifdef INET6 47 #include <sys/domain.h> 48 #endif /* INET6 */ 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/protosw.h> 52 #include <sys/proc.h> 53 #include <sys/jail.h> 54 55 #include <net/if.h> 56 #include <net/route.h> 57 58 #include <netinet/in.h> 59 #include <netinet/in_systm.h> 60 #ifdef INET6 61 #include <netinet/ip6.h> 62 #endif 63 #include <netinet/in_pcb.h> 64 #ifdef INET6 65 #include <netinet6/in6_pcb.h> 66 #endif 67 #include <netinet/in_var.h> 68 #include <netinet/ip_var.h> 69 #ifdef INET6 70 #include <netinet6/ip6_var.h> 71 #endif 72 #include <netinet/tcp.h> 73 #include <netinet/tcp_fsm.h> 74 #include <netinet/tcp_seq.h> 75 #include <netinet/tcp_timer.h> 76 #include <netinet/tcp_var.h> 77 #include <netinet/tcpip.h> 78 #ifdef TCPDEBUG 79 #include <netinet/tcp_debug.h> 80 #endif 81 82 #ifdef IPSEC 83 #include <netinet6/ipsec.h> 84 #endif /*IPSEC*/ 85 86 /* 87 * TCP protocol interface to socket abstraction. 88 */ 89 extern char *tcpstates[]; /* XXX ??? */ 90 91 static int tcp_attach __P((struct socket *, struct thread *td)); 92 static int tcp_connect __P((struct tcpcb *, struct sockaddr *, 93 struct thread *td)); 94 #ifdef INET6 95 static int tcp6_connect __P((struct tcpcb *, struct sockaddr *, 96 struct thread *td)); 97 #endif /* INET6 */ 98 static struct tcpcb * 99 tcp_disconnect __P((struct tcpcb *)); 100 static struct tcpcb * 101 tcp_usrclosed __P((struct tcpcb *)); 102 103 #ifdef TCPDEBUG 104 #define TCPDEBUG0 int ostate = 0 105 #define TCPDEBUG1() ostate = tp ? tp->t_state : 0 106 #define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ 107 tcp_trace(TA_USER, ostate, tp, 0, 0, req) 108 #else 109 #define TCPDEBUG0 110 #define TCPDEBUG1() 111 #define TCPDEBUG2(req) 112 #endif 113 114 /* 115 * TCP attaches to socket via pru_attach(), reserving space, 116 * and an internet control block. 117 */ 118 static int 119 tcp_usr_attach(struct socket *so, int proto, struct thread *td) 120 { 121 int s = splnet(); 122 int error; 123 struct inpcb *inp = sotoinpcb(so); 124 struct tcpcb *tp = 0; 125 TCPDEBUG0; 126 127 TCPDEBUG1(); 128 if (inp) { 129 error = EISCONN; 130 goto out; 131 } 132 133 error = tcp_attach(so, td); 134 if (error) 135 goto out; 136 137 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 138 so->so_linger = TCP_LINGERTIME; 139 tp = sototcpcb(so); 140 out: 141 TCPDEBUG2(PRU_ATTACH); 142 splx(s); 143 return error; 144 } 145 146 /* 147 * pru_detach() detaches the TCP protocol from the socket. 148 * If the protocol state is non-embryonic, then can't 149 * do this directly: have to initiate a pru_disconnect(), 150 * which may finish later; embryonic TCB's can just 151 * be discarded here. 152 */ 153 static int 154 tcp_usr_detach(struct socket *so) 155 { 156 int s = splnet(); 157 int error = 0; 158 struct inpcb *inp = sotoinpcb(so); 159 struct tcpcb *tp; 160 TCPDEBUG0; 161 162 if (inp == 0) { 163 splx(s); 164 return EINVAL; /* XXX */ 165 } 166 tp = intotcpcb(inp); 167 TCPDEBUG1(); 168 tp = tcp_disconnect(tp); 169 170 TCPDEBUG2(PRU_DETACH); 171 splx(s); 172 return error; 173 } 174 175 #define COMMON_START() TCPDEBUG0; \ 176 do { \ 177 if (inp == 0) { \ 178 splx(s); \ 179 return EINVAL; \ 180 } \ 181 tp = intotcpcb(inp); \ 182 TCPDEBUG1(); \ 183 } while(0) 184 185 #define COMMON_END(req) out: TCPDEBUG2(req); splx(s); return error; goto out 186 187 188 /* 189 * Give the socket an address. 190 */ 191 static int 192 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 193 { 194 int s = splnet(); 195 int error = 0; 196 struct inpcb *inp = sotoinpcb(so); 197 struct tcpcb *tp; 198 struct sockaddr_in *sinp; 199 200 COMMON_START(); 201 202 /* 203 * Must check for multicast addresses and disallow binding 204 * to them. 205 */ 206 sinp = (struct sockaddr_in *)nam; 207 if (sinp->sin_family == AF_INET && 208 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { 209 error = EAFNOSUPPORT; 210 goto out; 211 } 212 error = in_pcbbind(inp, nam, td); 213 if (error) 214 goto out; 215 COMMON_END(PRU_BIND); 216 217 } 218 219 #ifdef INET6 220 static int 221 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 222 { 223 int s = splnet(); 224 int error = 0; 225 struct inpcb *inp = sotoinpcb(so); 226 struct tcpcb *tp; 227 struct sockaddr_in6 *sin6p; 228 229 COMMON_START(); 230 231 /* 232 * Must check for multicast addresses and disallow binding 233 * to them. 234 */ 235 sin6p = (struct sockaddr_in6 *)nam; 236 if (sin6p->sin6_family == AF_INET6 && 237 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) { 238 error = EAFNOSUPPORT; 239 goto out; 240 } 241 inp->inp_vflag &= ~INP_IPV4; 242 inp->inp_vflag |= INP_IPV6; 243 if (ip6_mapped_addr_on && (inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { 244 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) 245 inp->inp_vflag |= INP_IPV4; 246 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 247 struct sockaddr_in sin; 248 249 in6_sin6_2_sin(&sin, sin6p); 250 inp->inp_vflag |= INP_IPV4; 251 inp->inp_vflag &= ~INP_IPV6; 252 error = in_pcbbind(inp, (struct sockaddr *)&sin, td); 253 goto out; 254 } 255 } 256 error = in6_pcbbind(inp, nam, td); 257 if (error) 258 goto out; 259 COMMON_END(PRU_BIND); 260 } 261 #endif /* INET6 */ 262 263 /* 264 * Prepare to accept connections. 265 */ 266 static int 267 tcp_usr_listen(struct socket *so, struct thread *td) 268 { 269 int s = splnet(); 270 int error = 0; 271 struct inpcb *inp = sotoinpcb(so); 272 struct tcpcb *tp; 273 274 COMMON_START(); 275 if (inp->inp_lport == 0) 276 error = in_pcbbind(inp, (struct sockaddr *)0, td); 277 if (error == 0) 278 tp->t_state = TCPS_LISTEN; 279 COMMON_END(PRU_LISTEN); 280 } 281 282 #ifdef INET6 283 static int 284 tcp6_usr_listen(struct socket *so, struct thread *td) 285 { 286 int s = splnet(); 287 int error = 0; 288 struct inpcb *inp = sotoinpcb(so); 289 struct tcpcb *tp; 290 291 COMMON_START(); 292 if (inp->inp_lport == 0) { 293 inp->inp_vflag &= ~INP_IPV4; 294 if (ip6_mapped_addr_on && 295 (inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) 296 inp->inp_vflag |= INP_IPV4; 297 error = in6_pcbbind(inp, (struct sockaddr *)0, td); 298 } 299 if (error == 0) 300 tp->t_state = TCPS_LISTEN; 301 COMMON_END(PRU_LISTEN); 302 } 303 #endif /* INET6 */ 304 305 /* 306 * Initiate connection to peer. 307 * Create a template for use in transmissions on this connection. 308 * Enter SYN_SENT state, and mark socket as connecting. 309 * Start keep-alive timer, and seed output sequence space. 310 * Send initial segment on connection. 311 */ 312 static int 313 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 314 { 315 int s = splnet(); 316 int error = 0; 317 struct inpcb *inp = sotoinpcb(so); 318 struct tcpcb *tp; 319 struct sockaddr_in *sinp; 320 321 COMMON_START(); 322 323 /* 324 * Must disallow TCP ``connections'' to multicast addresses. 325 */ 326 sinp = (struct sockaddr_in *)nam; 327 if (sinp->sin_family == AF_INET 328 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { 329 error = EAFNOSUPPORT; 330 goto out; 331 } 332 333 if (td && jailed(td->td_proc->p_ucred)) 334 prison_remote_ip(td->td_proc->p_ucred, 0, &sinp->sin_addr.s_addr); 335 336 if ((error = tcp_connect(tp, nam, td)) != 0) 337 goto out; 338 error = tcp_output(tp); 339 COMMON_END(PRU_CONNECT); 340 } 341 342 #ifdef INET6 343 static int 344 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 345 { 346 int s = splnet(); 347 int error = 0; 348 struct inpcb *inp = sotoinpcb(so); 349 struct tcpcb *tp; 350 struct sockaddr_in6 *sin6p; 351 352 COMMON_START(); 353 354 /* 355 * Must disallow TCP ``connections'' to multicast addresses. 356 */ 357 sin6p = (struct sockaddr_in6 *)nam; 358 if (sin6p->sin6_family == AF_INET6 359 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) { 360 error = EAFNOSUPPORT; 361 goto out; 362 } 363 364 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 365 struct sockaddr_in sin; 366 367 if (!ip6_mapped_addr_on || 368 (inp->inp_flags & IN6P_IPV6_V6ONLY)) 369 return(EINVAL); 370 371 in6_sin6_2_sin(&sin, sin6p); 372 inp->inp_vflag |= INP_IPV4; 373 inp->inp_vflag &= ~INP_IPV6; 374 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0) 375 goto out; 376 error = tcp_output(tp); 377 goto out; 378 } 379 inp->inp_vflag &= ~INP_IPV4; 380 inp->inp_vflag |= INP_IPV6; 381 if ((error = tcp6_connect(tp, nam, td)) != 0) 382 goto out; 383 error = tcp_output(tp); 384 COMMON_END(PRU_CONNECT); 385 } 386 #endif /* INET6 */ 387 388 /* 389 * Initiate disconnect from peer. 390 * If connection never passed embryonic stage, just drop; 391 * else if don't need to let data drain, then can just drop anyways, 392 * else have to begin TCP shutdown process: mark socket disconnecting, 393 * drain unread data, state switch to reflect user close, and 394 * send segment (e.g. FIN) to peer. Socket will be really disconnected 395 * when peer sends FIN and acks ours. 396 * 397 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 398 */ 399 static int 400 tcp_usr_disconnect(struct socket *so) 401 { 402 int s = splnet(); 403 int error = 0; 404 struct inpcb *inp = sotoinpcb(so); 405 struct tcpcb *tp; 406 407 COMMON_START(); 408 tp = tcp_disconnect(tp); 409 COMMON_END(PRU_DISCONNECT); 410 } 411 412 /* 413 * Accept a connection. Essentially all the work is 414 * done at higher levels; just return the address 415 * of the peer, storing through addr. 416 */ 417 static int 418 tcp_usr_accept(struct socket *so, struct sockaddr **nam) 419 { 420 int s = splnet(); 421 int error = 0; 422 struct inpcb *inp = sotoinpcb(so); 423 struct tcpcb *tp = NULL; 424 TCPDEBUG0; 425 426 if (so->so_state & SS_ISDISCONNECTED) { 427 error = ECONNABORTED; 428 goto out; 429 } 430 if (inp == 0) { 431 splx(s); 432 return (EINVAL); 433 } 434 tp = intotcpcb(inp); 435 TCPDEBUG1(); 436 in_setpeeraddr(so, nam); 437 COMMON_END(PRU_ACCEPT); 438 } 439 440 #ifdef INET6 441 static int 442 tcp6_usr_accept(struct socket *so, struct sockaddr **nam) 443 { 444 int s = splnet(); 445 int error = 0; 446 struct inpcb *inp = sotoinpcb(so); 447 struct tcpcb *tp = NULL; 448 TCPDEBUG0; 449 450 if (so->so_state & SS_ISDISCONNECTED) { 451 error = ECONNABORTED; 452 goto out; 453 } 454 if (inp == 0) { 455 splx(s); 456 return (EINVAL); 457 } 458 tp = intotcpcb(inp); 459 TCPDEBUG1(); 460 in6_mapped_peeraddr(so, nam); 461 COMMON_END(PRU_ACCEPT); 462 } 463 #endif /* INET6 */ 464 /* 465 * Mark the connection as being incapable of further output. 466 */ 467 static int 468 tcp_usr_shutdown(struct socket *so) 469 { 470 int s = splnet(); 471 int error = 0; 472 struct inpcb *inp = sotoinpcb(so); 473 struct tcpcb *tp; 474 475 COMMON_START(); 476 socantsendmore(so); 477 tp = tcp_usrclosed(tp); 478 if (tp) 479 error = tcp_output(tp); 480 COMMON_END(PRU_SHUTDOWN); 481 } 482 483 /* 484 * After a receive, possibly send window update to peer. 485 */ 486 static int 487 tcp_usr_rcvd(struct socket *so, int flags) 488 { 489 int s = splnet(); 490 int error = 0; 491 struct inpcb *inp = sotoinpcb(so); 492 struct tcpcb *tp; 493 494 COMMON_START(); 495 tcp_output(tp); 496 COMMON_END(PRU_RCVD); 497 } 498 499 /* 500 * Do a send by putting data in output queue and updating urgent 501 * marker if URG set. Possibly send more data. Unlike the other 502 * pru_*() routines, the mbuf chains are our responsibility. We 503 * must either enqueue them or free them. The other pru_* routines 504 * generally are caller-frees. 505 */ 506 static int 507 tcp_usr_send(struct socket *so, int flags, struct mbuf *m, 508 struct sockaddr *nam, struct mbuf *control, struct thread *td) 509 { 510 int s = splnet(); 511 int error = 0; 512 struct inpcb *inp = sotoinpcb(so); 513 struct tcpcb *tp; 514 #ifdef INET6 515 int isipv6; 516 #endif 517 TCPDEBUG0; 518 519 if (inp == NULL) { 520 /* 521 * OOPS! we lost a race, the TCP session got reset after 522 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a 523 * network interrupt in the non-splnet() section of sosend(). 524 */ 525 if (m) 526 m_freem(m); 527 if (control) 528 m_freem(control); 529 error = ECONNRESET; /* XXX EPIPE? */ 530 tp = NULL; 531 TCPDEBUG1(); 532 goto out; 533 } 534 #ifdef INET6 535 isipv6 = nam && nam->sa_family == AF_INET6; 536 #endif /* INET6 */ 537 tp = intotcpcb(inp); 538 TCPDEBUG1(); 539 if (control) { 540 /* TCP doesn't do control messages (rights, creds, etc) */ 541 if (control->m_len) { 542 m_freem(control); 543 if (m) 544 m_freem(m); 545 error = EINVAL; 546 goto out; 547 } 548 m_freem(control); /* empty control, just free it */ 549 } 550 if(!(flags & PRUS_OOB)) { 551 sbappend(&so->so_snd, m); 552 if (nam && tp->t_state < TCPS_SYN_SENT) { 553 /* 554 * Do implied connect if not yet connected, 555 * initialize window to default value, and 556 * initialize maxseg/maxopd using peer's cached 557 * MSS. 558 */ 559 #ifdef INET6 560 if (isipv6) 561 error = tcp6_connect(tp, nam, td); 562 else 563 #endif /* INET6 */ 564 error = tcp_connect(tp, nam, td); 565 if (error) 566 goto out; 567 tp->snd_wnd = TTCP_CLIENT_SND_WND; 568 tcp_mss(tp, -1); 569 } 570 571 if (flags & PRUS_EOF) { 572 /* 573 * Close the send side of the connection after 574 * the data is sent. 575 */ 576 socantsendmore(so); 577 tp = tcp_usrclosed(tp); 578 } 579 if (tp != NULL) { 580 if (flags & PRUS_MORETOCOME) 581 tp->t_flags |= TF_MORETOCOME; 582 error = tcp_output(tp); 583 if (flags & PRUS_MORETOCOME) 584 tp->t_flags &= ~TF_MORETOCOME; 585 } 586 } else { 587 if (sbspace(&so->so_snd) < -512) { 588 m_freem(m); 589 error = ENOBUFS; 590 goto out; 591 } 592 /* 593 * According to RFC961 (Assigned Protocols), 594 * the urgent pointer points to the last octet 595 * of urgent data. We continue, however, 596 * to consider it to indicate the first octet 597 * of data past the urgent section. 598 * Otherwise, snd_up should be one lower. 599 */ 600 sbappend(&so->so_snd, m); 601 if (nam && tp->t_state < TCPS_SYN_SENT) { 602 /* 603 * Do implied connect if not yet connected, 604 * initialize window to default value, and 605 * initialize maxseg/maxopd using peer's cached 606 * MSS. 607 */ 608 #ifdef INET6 609 if (isipv6) 610 error = tcp6_connect(tp, nam, td); 611 else 612 #endif /* INET6 */ 613 error = tcp_connect(tp, nam, td); 614 if (error) 615 goto out; 616 tp->snd_wnd = TTCP_CLIENT_SND_WND; 617 tcp_mss(tp, -1); 618 } 619 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 620 tp->t_force = 1; 621 error = tcp_output(tp); 622 tp->t_force = 0; 623 } 624 COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : 625 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); 626 } 627 628 /* 629 * Abort the TCP. 630 */ 631 static int 632 tcp_usr_abort(struct socket *so) 633 { 634 int s = splnet(); 635 int error = 0; 636 struct inpcb *inp = sotoinpcb(so); 637 struct tcpcb *tp; 638 639 COMMON_START(); 640 tp = tcp_drop(tp, ECONNABORTED); 641 COMMON_END(PRU_ABORT); 642 } 643 644 /* 645 * Receive out-of-band data. 646 */ 647 static int 648 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) 649 { 650 int s = splnet(); 651 int error = 0; 652 struct inpcb *inp = sotoinpcb(so); 653 struct tcpcb *tp; 654 655 COMMON_START(); 656 if ((so->so_oobmark == 0 && 657 (so->so_state & SS_RCVATMARK) == 0) || 658 so->so_options & SO_OOBINLINE || 659 tp->t_oobflags & TCPOOB_HADDATA) { 660 error = EINVAL; 661 goto out; 662 } 663 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 664 error = EWOULDBLOCK; 665 goto out; 666 } 667 m->m_len = 1; 668 *mtod(m, caddr_t) = tp->t_iobc; 669 if ((flags & MSG_PEEK) == 0) 670 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 671 COMMON_END(PRU_RCVOOB); 672 } 673 674 /* xxx - should be const */ 675 struct pr_usrreqs tcp_usrreqs = { 676 tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind, 677 tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach, 678 tcp_usr_disconnect, tcp_usr_listen, in_setpeeraddr, tcp_usr_rcvd, 679 tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown, 680 in_setsockaddr, sosend, soreceive, sopoll 681 }; 682 683 #ifdef INET6 684 struct pr_usrreqs tcp6_usrreqs = { 685 tcp_usr_abort, tcp6_usr_accept, tcp_usr_attach, tcp6_usr_bind, 686 tcp6_usr_connect, pru_connect2_notsupp, in6_control, tcp_usr_detach, 687 tcp_usr_disconnect, tcp6_usr_listen, in6_mapped_peeraddr, tcp_usr_rcvd, 688 tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown, 689 in6_mapped_sockaddr, sosend, soreceive, sopoll 690 }; 691 #endif /* INET6 */ 692 693 /* 694 * Common subroutine to open a TCP connection to remote host specified 695 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local 696 * port number if needed. Call in_pcbladdr to do the routing and to choose 697 * a local host address (interface). If there is an existing incarnation 698 * of the same connection in TIME-WAIT state and if the remote host was 699 * sending CC options and if the connection duration was < MSL, then 700 * truncate the previous TIME-WAIT state and proceed. 701 * Initialize connection parameters and enter SYN-SENT state. 702 */ 703 static int 704 tcp_connect(tp, nam, td) 705 register struct tcpcb *tp; 706 struct sockaddr *nam; 707 struct thread *td; 708 { 709 struct inpcb *inp = tp->t_inpcb, *oinp; 710 struct socket *so = inp->inp_socket; 711 struct tcpcb *otp; 712 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 713 struct sockaddr_in *ifaddr; 714 struct rmxp_tao *taop; 715 struct rmxp_tao tao_noncached; 716 int error; 717 718 if (inp->inp_lport == 0) { 719 error = in_pcbbind(inp, (struct sockaddr *)0, td); 720 if (error) 721 return error; 722 } 723 724 /* 725 * Cannot simply call in_pcbconnect, because there might be an 726 * earlier incarnation of this same connection still in 727 * TIME_WAIT state, creating an ADDRINUSE error. 728 */ 729 error = in_pcbladdr(inp, nam, &ifaddr); 730 if (error) 731 return error; 732 oinp = in_pcblookup_hash(inp->inp_pcbinfo, 733 sin->sin_addr, sin->sin_port, 734 inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr 735 : ifaddr->sin_addr, 736 inp->inp_lport, 0, NULL); 737 if (oinp) { 738 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL && 739 otp->t_state == TCPS_TIME_WAIT && 740 (ticks - otp->t_starttime) < tcp_msl && 741 (otp->t_flags & TF_RCVD_CC)) 742 otp = tcp_close(otp); 743 else 744 return EADDRINUSE; 745 } 746 if (inp->inp_laddr.s_addr == INADDR_ANY) 747 inp->inp_laddr = ifaddr->sin_addr; 748 inp->inp_faddr = sin->sin_addr; 749 inp->inp_fport = sin->sin_port; 750 in_pcbrehash(inp); 751 752 /* Compute window scaling to request. */ 753 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 754 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 755 tp->request_r_scale++; 756 757 soisconnecting(so); 758 tcpstat.tcps_connattempt++; 759 tp->t_state = TCPS_SYN_SENT; 760 callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); 761 tp->iss = tcp_new_isn(tp); 762 tcp_sendseqinit(tp); 763 764 /* 765 * Generate a CC value for this connection and 766 * check whether CC or CCnew should be used. 767 */ 768 if ((taop = tcp_gettaocache(&tp->t_inpcb->inp_inc)) == NULL) { 769 taop = &tao_noncached; 770 bzero(taop, sizeof(*taop)); 771 } 772 773 tp->cc_send = CC_INC(tcp_ccgen); 774 if (taop->tao_ccsent != 0 && 775 CC_GEQ(tp->cc_send, taop->tao_ccsent)) { 776 taop->tao_ccsent = tp->cc_send; 777 } else { 778 taop->tao_ccsent = 0; 779 tp->t_flags |= TF_SENDCCNEW; 780 } 781 782 return 0; 783 } 784 785 #ifdef INET6 786 static int 787 tcp6_connect(tp, nam, td) 788 register struct tcpcb *tp; 789 struct sockaddr *nam; 790 struct thread *td; 791 { 792 struct inpcb *inp = tp->t_inpcb, *oinp; 793 struct socket *so = inp->inp_socket; 794 struct tcpcb *otp; 795 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 796 struct in6_addr *addr6; 797 struct rmxp_tao *taop; 798 struct rmxp_tao tao_noncached; 799 int error; 800 801 if (inp->inp_lport == 0) { 802 error = in6_pcbbind(inp, (struct sockaddr *)0, td); 803 if (error) 804 return error; 805 } 806 807 /* 808 * Cannot simply call in_pcbconnect, because there might be an 809 * earlier incarnation of this same connection still in 810 * TIME_WAIT state, creating an ADDRINUSE error. 811 */ 812 error = in6_pcbladdr(inp, nam, &addr6); 813 if (error) 814 return error; 815 oinp = in6_pcblookup_hash(inp->inp_pcbinfo, 816 &sin6->sin6_addr, sin6->sin6_port, 817 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) 818 ? addr6 819 : &inp->in6p_laddr, 820 inp->inp_lport, 0, NULL); 821 if (oinp) { 822 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL && 823 otp->t_state == TCPS_TIME_WAIT && 824 (ticks - otp->t_starttime) < tcp_msl && 825 (otp->t_flags & TF_RCVD_CC)) 826 otp = tcp_close(otp); 827 else 828 return EADDRINUSE; 829 } 830 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) 831 inp->in6p_laddr = *addr6; 832 inp->in6p_faddr = sin6->sin6_addr; 833 inp->inp_fport = sin6->sin6_port; 834 if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != NULL) 835 inp->in6p_flowinfo = sin6->sin6_flowinfo; 836 in_pcbrehash(inp); 837 838 /* Compute window scaling to request. */ 839 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 840 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 841 tp->request_r_scale++; 842 843 soisconnecting(so); 844 tcpstat.tcps_connattempt++; 845 tp->t_state = TCPS_SYN_SENT; 846 callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); 847 tp->iss = tcp_new_isn(tp); 848 tcp_sendseqinit(tp); 849 850 /* 851 * Generate a CC value for this connection and 852 * check whether CC or CCnew should be used. 853 */ 854 if ((taop = tcp_gettaocache(&tp->t_inpcb->inp_inc)) == NULL) { 855 taop = &tao_noncached; 856 bzero(taop, sizeof(*taop)); 857 } 858 859 tp->cc_send = CC_INC(tcp_ccgen); 860 if (taop->tao_ccsent != 0 && 861 CC_GEQ(tp->cc_send, taop->tao_ccsent)) { 862 taop->tao_ccsent = tp->cc_send; 863 } else { 864 taop->tao_ccsent = 0; 865 tp->t_flags |= TF_SENDCCNEW; 866 } 867 868 return 0; 869 } 870 #endif /* INET6 */ 871 872 /* 873 * The new sockopt interface makes it possible for us to block in the 874 * copyin/out step (if we take a page fault). Taking a page fault at 875 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now 876 * use TSM, there probably isn't any need for this function to run at 877 * splnet() any more. This needs more examination.) 878 */ 879 int 880 tcp_ctloutput(so, sopt) 881 struct socket *so; 882 struct sockopt *sopt; 883 { 884 int error, opt, optval, s; 885 struct inpcb *inp; 886 struct tcpcb *tp; 887 888 error = 0; 889 s = splnet(); /* XXX */ 890 inp = sotoinpcb(so); 891 if (inp == NULL) { 892 splx(s); 893 return (ECONNRESET); 894 } 895 if (sopt->sopt_level != IPPROTO_TCP) { 896 #ifdef INET6 897 if (INP_CHECK_SOCKAF(so, AF_INET6)) 898 error = ip6_ctloutput(so, sopt); 899 else 900 #endif /* INET6 */ 901 error = ip_ctloutput(so, sopt); 902 splx(s); 903 return (error); 904 } 905 tp = intotcpcb(inp); 906 907 switch (sopt->sopt_dir) { 908 case SOPT_SET: 909 switch (sopt->sopt_name) { 910 case TCP_NODELAY: 911 case TCP_NOOPT: 912 error = sooptcopyin(sopt, &optval, sizeof optval, 913 sizeof optval); 914 if (error) 915 break; 916 917 switch (sopt->sopt_name) { 918 case TCP_NODELAY: 919 opt = TF_NODELAY; 920 break; 921 case TCP_NOOPT: 922 opt = TF_NOOPT; 923 break; 924 default: 925 opt = 0; /* dead code to fool gcc */ 926 break; 927 } 928 929 if (optval) 930 tp->t_flags |= opt; 931 else 932 tp->t_flags &= ~opt; 933 break; 934 935 case TCP_NOPUSH: 936 error = sooptcopyin(sopt, &optval, sizeof optval, 937 sizeof optval); 938 if (error) 939 break; 940 941 if (optval) 942 tp->t_flags |= TF_NOPUSH; 943 else { 944 tp->t_flags &= ~TF_NOPUSH; 945 error = tcp_output(tp); 946 } 947 break; 948 949 case TCP_MAXSEG: 950 error = sooptcopyin(sopt, &optval, sizeof optval, 951 sizeof optval); 952 if (error) 953 break; 954 955 if (optval > 0 && optval <= tp->t_maxseg) 956 tp->t_maxseg = optval; 957 else 958 error = EINVAL; 959 break; 960 961 default: 962 error = ENOPROTOOPT; 963 break; 964 } 965 break; 966 967 case SOPT_GET: 968 switch (sopt->sopt_name) { 969 case TCP_NODELAY: 970 optval = tp->t_flags & TF_NODELAY; 971 break; 972 case TCP_MAXSEG: 973 optval = tp->t_maxseg; 974 break; 975 case TCP_NOOPT: 976 optval = tp->t_flags & TF_NOOPT; 977 break; 978 case TCP_NOPUSH: 979 optval = tp->t_flags & TF_NOPUSH; 980 break; 981 default: 982 error = ENOPROTOOPT; 983 break; 984 } 985 if (error == 0) 986 error = sooptcopyout(sopt, &optval, sizeof optval); 987 break; 988 } 989 splx(s); 990 return (error); 991 } 992 993 /* 994 * tcp_sendspace and tcp_recvspace are the default send and receive window 995 * sizes, respectively. These are obsolescent (this information should 996 * be set by the route). 997 */ 998 u_long tcp_sendspace = 1024*32; 999 SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW, 1000 &tcp_sendspace , 0, "Maximum outgoing TCP datagram size"); 1001 u_long tcp_recvspace = 1024*64; 1002 SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1003 &tcp_recvspace , 0, "Maximum incoming TCP datagram size"); 1004 1005 /* 1006 * Attach TCP protocol to socket, allocating 1007 * internet protocol control block, tcp control block, 1008 * bufer space, and entering LISTEN state if to accept connections. 1009 */ 1010 static int 1011 tcp_attach(so, td) 1012 struct socket *so; 1013 struct thread *td; 1014 { 1015 register struct tcpcb *tp; 1016 struct inpcb *inp; 1017 int error; 1018 #ifdef INET6 1019 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != NULL; 1020 #endif 1021 1022 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 1023 error = soreserve(so, tcp_sendspace, tcp_recvspace); 1024 if (error) 1025 return (error); 1026 } 1027 error = in_pcballoc(so, &tcbinfo, td); 1028 if (error) 1029 return (error); 1030 inp = sotoinpcb(so); 1031 #ifdef INET6 1032 if (isipv6) { 1033 inp->inp_vflag |= INP_IPV6; 1034 inp->in6p_hops = -1; /* use kernel default */ 1035 } 1036 else 1037 #endif 1038 inp->inp_vflag |= INP_IPV4; 1039 tp = tcp_newtcpcb(inp); 1040 if (tp == 0) { 1041 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 1042 1043 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 1044 #ifdef INET6 1045 if (isipv6) 1046 in6_pcbdetach(inp); 1047 else 1048 #endif 1049 in_pcbdetach(inp); 1050 so->so_state |= nofd; 1051 return (ENOBUFS); 1052 } 1053 tp->t_state = TCPS_CLOSED; 1054 return (0); 1055 } 1056 1057 /* 1058 * Initiate (or continue) disconnect. 1059 * If embryonic state, just send reset (once). 1060 * If in ``let data drain'' option and linger null, just drop. 1061 * Otherwise (hard), mark socket disconnecting and drop 1062 * current input data; switch states based on user close, and 1063 * send segment to peer (with FIN). 1064 */ 1065 static struct tcpcb * 1066 tcp_disconnect(tp) 1067 register struct tcpcb *tp; 1068 { 1069 struct socket *so = tp->t_inpcb->inp_socket; 1070 1071 if (tp->t_state < TCPS_ESTABLISHED) 1072 tp = tcp_close(tp); 1073 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 1074 tp = tcp_drop(tp, 0); 1075 else { 1076 soisdisconnecting(so); 1077 sbflush(&so->so_rcv); 1078 tp = tcp_usrclosed(tp); 1079 if (tp) 1080 (void) tcp_output(tp); 1081 } 1082 return (tp); 1083 } 1084 1085 /* 1086 * User issued close, and wish to trail through shutdown states: 1087 * if never received SYN, just forget it. If got a SYN from peer, 1088 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1089 * If already got a FIN from peer, then almost done; go to LAST_ACK 1090 * state. In all other cases, have already sent FIN to peer (e.g. 1091 * after PRU_SHUTDOWN), and just have to play tedious game waiting 1092 * for peer to send FIN or not respond to keep-alives, etc. 1093 * We can let the user exit from the close as soon as the FIN is acked. 1094 */ 1095 static struct tcpcb * 1096 tcp_usrclosed(tp) 1097 register struct tcpcb *tp; 1098 { 1099 1100 switch (tp->t_state) { 1101 1102 case TCPS_CLOSED: 1103 case TCPS_LISTEN: 1104 tp->t_state = TCPS_CLOSED; 1105 tp = tcp_close(tp); 1106 break; 1107 1108 case TCPS_SYN_SENT: 1109 case TCPS_SYN_RECEIVED: 1110 tp->t_flags |= TF_NEEDFIN; 1111 break; 1112 1113 case TCPS_ESTABLISHED: 1114 tp->t_state = TCPS_FIN_WAIT_1; 1115 break; 1116 1117 case TCPS_CLOSE_WAIT: 1118 tp->t_state = TCPS_LAST_ACK; 1119 break; 1120 } 1121 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 1122 soisdisconnected(tp->t_inpcb->inp_socket); 1123 /* To prevent the connection hanging in FIN_WAIT_2 forever. */ 1124 if (tp->t_state == TCPS_FIN_WAIT_2) 1125 callout_reset(tp->tt_2msl, tcp_maxidle, 1126 tcp_timer_2msl, tp); 1127 } 1128 return (tp); 1129 } 1130 1131