1 /*- 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2006-2007 Robert N. M. Watson 5 * Copyright (c) 2010-2011 Juniper Networks, Inc. 6 * All rights reserved. 7 * 8 * Portions of this software were developed by Robert N. M. Watson under 9 * contract to Juniper Networks, Inc. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 36 */ 37 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 #include "opt_ddb.h" 42 #include "opt_inet.h" 43 #include "opt_inet6.h" 44 #include "opt_tcpdebug.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/limits.h> 49 #include <sys/malloc.h> 50 #include <sys/refcount.h> 51 #include <sys/kernel.h> 52 #include <sys/sysctl.h> 53 #include <sys/mbuf.h> 54 #ifdef INET6 55 #include <sys/domain.h> 56 #endif /* INET6 */ 57 #include <sys/socket.h> 58 #include <sys/socketvar.h> 59 #include <sys/protosw.h> 60 #include <sys/proc.h> 61 #include <sys/jail.h> 62 63 #ifdef DDB 64 #include <ddb/ddb.h> 65 #endif 66 67 #include <net/if.h> 68 #include <net/if_var.h> 69 #include <net/route.h> 70 #include <net/vnet.h> 71 72 #include <netinet/cc.h> 73 #include <netinet/in.h> 74 #include <netinet/in_kdtrace.h> 75 #include <netinet/in_pcb.h> 76 #include <netinet/in_systm.h> 77 #include <netinet/in_var.h> 78 #include <netinet/ip_var.h> 79 #ifdef INET6 80 #include <netinet/ip6.h> 81 #include <netinet6/in6_pcb.h> 82 #include <netinet6/ip6_var.h> 83 #include <netinet6/scope6_var.h> 84 #endif 85 #include <netinet/tcp_fsm.h> 86 #include <netinet/tcp_seq.h> 87 #include <netinet/tcp_timer.h> 88 #include <netinet/tcp_var.h> 89 #include <netinet/tcpip.h> 90 #ifdef TCPPCAP 91 #include <netinet/tcp_pcap.h> 92 #endif 93 #ifdef TCPDEBUG 94 #include <netinet/tcp_debug.h> 95 #endif 96 #ifdef TCP_OFFLOAD 97 #include <netinet/tcp_offload.h> 98 #endif 99 100 /* 101 * TCP protocol interface to socket abstraction. 102 */ 103 static int tcp_attach(struct socket *); 104 #ifdef INET 105 static int tcp_connect(struct tcpcb *, struct sockaddr *, 106 struct thread *td); 107 #endif /* INET */ 108 #ifdef INET6 109 static int tcp6_connect(struct tcpcb *, struct sockaddr *, 110 struct thread *td); 111 #endif /* INET6 */ 112 static void tcp_disconnect(struct tcpcb *); 113 static void tcp_usrclosed(struct tcpcb *); 114 static void tcp_fill_info(struct tcpcb *, struct tcp_info *); 115 116 #ifdef TCPDEBUG 117 #define TCPDEBUG0 int ostate = 0 118 #define TCPDEBUG1() ostate = tp ? tp->t_state : 0 119 #define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ 120 tcp_trace(TA_USER, ostate, tp, 0, 0, req) 121 #else 122 #define TCPDEBUG0 123 #define TCPDEBUG1() 124 #define TCPDEBUG2(req) 125 #endif 126 127 /* 128 * TCP attaches to socket via pru_attach(), reserving space, 129 * and an internet control block. 130 */ 131 static int 132 tcp_usr_attach(struct socket *so, int proto, struct thread *td) 133 { 134 struct inpcb *inp; 135 struct tcpcb *tp = NULL; 136 int error; 137 TCPDEBUG0; 138 139 inp = sotoinpcb(so); 140 KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL")); 141 TCPDEBUG1(); 142 143 error = tcp_attach(so); 144 if (error) 145 goto out; 146 147 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 148 so->so_linger = TCP_LINGERTIME; 149 150 inp = sotoinpcb(so); 151 tp = intotcpcb(inp); 152 out: 153 TCPDEBUG2(PRU_ATTACH); 154 TCP_PROBE2(debug__user, tp, PRU_ATTACH); 155 return error; 156 } 157 158 /* 159 * tcp_detach is called when the socket layer loses its final reference 160 * to the socket, be it a file descriptor reference, a reference from TCP, 161 * etc. At this point, there is only one case in which we will keep around 162 * inpcb state: time wait. 163 * 164 * This function can probably be re-absorbed back into tcp_usr_detach() now 165 * that there is a single detach path. 166 */ 167 static void 168 tcp_detach(struct socket *so, struct inpcb *inp) 169 { 170 struct tcpcb *tp; 171 172 INP_INFO_LOCK_ASSERT(&V_tcbinfo); 173 INP_WLOCK_ASSERT(inp); 174 175 KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp")); 176 KASSERT(inp->inp_socket == so, ("tcp_detach: inp_socket != so")); 177 178 tp = intotcpcb(inp); 179 180 if (inp->inp_flags & INP_TIMEWAIT) { 181 /* 182 * There are two cases to handle: one in which the time wait 183 * state is being discarded (INP_DROPPED), and one in which 184 * this connection will remain in timewait. In the former, 185 * it is time to discard all state (except tcptw, which has 186 * already been discarded by the timewait close code, which 187 * should be further up the call stack somewhere). In the 188 * latter case, we detach from the socket, but leave the pcb 189 * present until timewait ends. 190 * 191 * XXXRW: Would it be cleaner to free the tcptw here? 192 * 193 * Astute question indeed, from twtcp perspective there are 194 * three cases to consider: 195 * 196 * #1 tcp_detach is called at tcptw creation time by 197 * tcp_twstart, then do not discard the newly created tcptw 198 * and leave inpcb present until timewait ends 199 * #2 tcp_detach is called at timewait end (or reuse) by 200 * tcp_twclose, then the tcptw has already been discarded 201 * (or reused) and inpcb is freed here 202 * #3 tcp_detach is called() after timewait ends (or reuse) 203 * (e.g. by soclose), then tcptw has already been discarded 204 * (or reused) and inpcb is freed here 205 * 206 * In all three cases the tcptw should not be freed here. 207 */ 208 if (inp->inp_flags & INP_DROPPED) { 209 KASSERT(tp == NULL, ("tcp_detach: INP_TIMEWAIT && " 210 "INP_DROPPED && tp != NULL")); 211 in_pcbdetach(inp); 212 in_pcbfree(inp); 213 } else { 214 in_pcbdetach(inp); 215 INP_WUNLOCK(inp); 216 } 217 } else { 218 /* 219 * If the connection is not in timewait, we consider two 220 * two conditions: one in which no further processing is 221 * necessary (dropped || embryonic), and one in which TCP is 222 * not yet done, but no longer requires the socket, so the 223 * pcb will persist for the time being. 224 * 225 * XXXRW: Does the second case still occur? 226 */ 227 if (inp->inp_flags & INP_DROPPED || 228 tp->t_state < TCPS_SYN_SENT) { 229 tcp_discardcb(tp); 230 in_pcbdetach(inp); 231 in_pcbfree(inp); 232 } else { 233 in_pcbdetach(inp); 234 INP_WUNLOCK(inp); 235 } 236 } 237 } 238 239 /* 240 * pru_detach() detaches the TCP protocol from the socket. 241 * If the protocol state is non-embryonic, then can't 242 * do this directly: have to initiate a pru_disconnect(), 243 * which may finish later; embryonic TCB's can just 244 * be discarded here. 245 */ 246 static void 247 tcp_usr_detach(struct socket *so) 248 { 249 struct inpcb *inp; 250 int rlock = 0; 251 252 inp = sotoinpcb(so); 253 KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL")); 254 if (!INP_INFO_WLOCKED(&V_tcbinfo)) { 255 INP_INFO_RLOCK(&V_tcbinfo); 256 rlock = 1; 257 } 258 INP_WLOCK(inp); 259 KASSERT(inp->inp_socket != NULL, 260 ("tcp_usr_detach: inp_socket == NULL")); 261 tcp_detach(so, inp); 262 if (rlock) 263 INP_INFO_RUNLOCK(&V_tcbinfo); 264 } 265 266 #ifdef INET 267 /* 268 * Give the socket an address. 269 */ 270 static int 271 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 272 { 273 int error = 0; 274 struct inpcb *inp; 275 struct tcpcb *tp = NULL; 276 struct sockaddr_in *sinp; 277 278 sinp = (struct sockaddr_in *)nam; 279 if (nam->sa_len != sizeof (*sinp)) 280 return (EINVAL); 281 /* 282 * Must check for multicast addresses and disallow binding 283 * to them. 284 */ 285 if (sinp->sin_family == AF_INET && 286 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) 287 return (EAFNOSUPPORT); 288 289 TCPDEBUG0; 290 inp = sotoinpcb(so); 291 KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL")); 292 INP_WLOCK(inp); 293 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 294 error = EINVAL; 295 goto out; 296 } 297 tp = intotcpcb(inp); 298 TCPDEBUG1(); 299 INP_HASH_WLOCK(&V_tcbinfo); 300 error = in_pcbbind(inp, nam, td->td_ucred); 301 INP_HASH_WUNLOCK(&V_tcbinfo); 302 out: 303 TCPDEBUG2(PRU_BIND); 304 TCP_PROBE2(debug__user, tp, PRU_BIND); 305 INP_WUNLOCK(inp); 306 307 return (error); 308 } 309 #endif /* INET */ 310 311 #ifdef INET6 312 static int 313 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 314 { 315 int error = 0; 316 struct inpcb *inp; 317 struct tcpcb *tp = NULL; 318 struct sockaddr_in6 *sin6p; 319 320 sin6p = (struct sockaddr_in6 *)nam; 321 if (nam->sa_len != sizeof (*sin6p)) 322 return (EINVAL); 323 /* 324 * Must check for multicast addresses and disallow binding 325 * to them. 326 */ 327 if (sin6p->sin6_family == AF_INET6 && 328 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) 329 return (EAFNOSUPPORT); 330 331 TCPDEBUG0; 332 inp = sotoinpcb(so); 333 KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL")); 334 INP_WLOCK(inp); 335 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 336 error = EINVAL; 337 goto out; 338 } 339 tp = intotcpcb(inp); 340 TCPDEBUG1(); 341 INP_HASH_WLOCK(&V_tcbinfo); 342 inp->inp_vflag &= ~INP_IPV4; 343 inp->inp_vflag |= INP_IPV6; 344 #ifdef INET 345 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { 346 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) 347 inp->inp_vflag |= INP_IPV4; 348 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 349 struct sockaddr_in sin; 350 351 in6_sin6_2_sin(&sin, sin6p); 352 inp->inp_vflag |= INP_IPV4; 353 inp->inp_vflag &= ~INP_IPV6; 354 error = in_pcbbind(inp, (struct sockaddr *)&sin, 355 td->td_ucred); 356 INP_HASH_WUNLOCK(&V_tcbinfo); 357 goto out; 358 } 359 } 360 #endif 361 error = in6_pcbbind(inp, nam, td->td_ucred); 362 INP_HASH_WUNLOCK(&V_tcbinfo); 363 out: 364 TCPDEBUG2(PRU_BIND); 365 TCP_PROBE2(debug__user, tp, PRU_BIND); 366 INP_WUNLOCK(inp); 367 return (error); 368 } 369 #endif /* INET6 */ 370 371 #ifdef INET 372 /* 373 * Prepare to accept connections. 374 */ 375 static int 376 tcp_usr_listen(struct socket *so, int backlog, struct thread *td) 377 { 378 int error = 0; 379 struct inpcb *inp; 380 struct tcpcb *tp = NULL; 381 382 TCPDEBUG0; 383 inp = sotoinpcb(so); 384 KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL")); 385 INP_WLOCK(inp); 386 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 387 error = EINVAL; 388 goto out; 389 } 390 tp = intotcpcb(inp); 391 TCPDEBUG1(); 392 SOCK_LOCK(so); 393 error = solisten_proto_check(so); 394 INP_HASH_WLOCK(&V_tcbinfo); 395 if (error == 0 && inp->inp_lport == 0) 396 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 397 INP_HASH_WUNLOCK(&V_tcbinfo); 398 if (error == 0) { 399 tcp_state_change(tp, TCPS_LISTEN); 400 solisten_proto(so, backlog); 401 #ifdef TCP_OFFLOAD 402 if ((so->so_options & SO_NO_OFFLOAD) == 0) 403 tcp_offload_listen_start(tp); 404 #endif 405 } 406 SOCK_UNLOCK(so); 407 408 out: 409 TCPDEBUG2(PRU_LISTEN); 410 TCP_PROBE2(debug__user, tp, PRU_LISTEN); 411 INP_WUNLOCK(inp); 412 return (error); 413 } 414 #endif /* INET */ 415 416 #ifdef INET6 417 static int 418 tcp6_usr_listen(struct socket *so, int backlog, struct thread *td) 419 { 420 int error = 0; 421 struct inpcb *inp; 422 struct tcpcb *tp = NULL; 423 424 TCPDEBUG0; 425 inp = sotoinpcb(so); 426 KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL")); 427 INP_WLOCK(inp); 428 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 429 error = EINVAL; 430 goto out; 431 } 432 tp = intotcpcb(inp); 433 TCPDEBUG1(); 434 SOCK_LOCK(so); 435 error = solisten_proto_check(so); 436 INP_HASH_WLOCK(&V_tcbinfo); 437 if (error == 0 && inp->inp_lport == 0) { 438 inp->inp_vflag &= ~INP_IPV4; 439 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) 440 inp->inp_vflag |= INP_IPV4; 441 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 442 } 443 INP_HASH_WUNLOCK(&V_tcbinfo); 444 if (error == 0) { 445 tcp_state_change(tp, TCPS_LISTEN); 446 solisten_proto(so, backlog); 447 #ifdef TCP_OFFLOAD 448 if ((so->so_options & SO_NO_OFFLOAD) == 0) 449 tcp_offload_listen_start(tp); 450 #endif 451 } 452 SOCK_UNLOCK(so); 453 454 out: 455 TCPDEBUG2(PRU_LISTEN); 456 TCP_PROBE2(debug__user, tp, PRU_LISTEN); 457 INP_WUNLOCK(inp); 458 return (error); 459 } 460 #endif /* INET6 */ 461 462 #ifdef INET 463 /* 464 * Initiate connection to peer. 465 * Create a template for use in transmissions on this connection. 466 * Enter SYN_SENT state, and mark socket as connecting. 467 * Start keep-alive timer, and seed output sequence space. 468 * Send initial segment on connection. 469 */ 470 static int 471 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 472 { 473 int error = 0; 474 struct inpcb *inp; 475 struct tcpcb *tp = NULL; 476 struct sockaddr_in *sinp; 477 478 sinp = (struct sockaddr_in *)nam; 479 if (nam->sa_len != sizeof (*sinp)) 480 return (EINVAL); 481 /* 482 * Must disallow TCP ``connections'' to multicast addresses. 483 */ 484 if (sinp->sin_family == AF_INET 485 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) 486 return (EAFNOSUPPORT); 487 if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0) 488 return (error); 489 490 TCPDEBUG0; 491 inp = sotoinpcb(so); 492 KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL")); 493 INP_WLOCK(inp); 494 if (inp->inp_flags & INP_TIMEWAIT) { 495 error = EADDRINUSE; 496 goto out; 497 } 498 if (inp->inp_flags & INP_DROPPED) { 499 error = ECONNREFUSED; 500 goto out; 501 } 502 tp = intotcpcb(inp); 503 TCPDEBUG1(); 504 if ((error = tcp_connect(tp, nam, td)) != 0) 505 goto out; 506 #ifdef TCP_OFFLOAD 507 if (registered_toedevs > 0 && 508 (so->so_options & SO_NO_OFFLOAD) == 0 && 509 (error = tcp_offload_connect(so, nam)) == 0) 510 goto out; 511 #endif 512 tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); 513 error = tp->t_fb->tfb_tcp_output(tp); 514 out: 515 TCPDEBUG2(PRU_CONNECT); 516 INP_WUNLOCK(inp); 517 return (error); 518 } 519 #endif /* INET */ 520 521 #ifdef INET6 522 static int 523 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 524 { 525 int error = 0; 526 struct inpcb *inp; 527 struct tcpcb *tp = NULL; 528 struct sockaddr_in6 *sin6p; 529 530 TCPDEBUG0; 531 532 sin6p = (struct sockaddr_in6 *)nam; 533 if (nam->sa_len != sizeof (*sin6p)) 534 return (EINVAL); 535 /* 536 * Must disallow TCP ``connections'' to multicast addresses. 537 */ 538 if (sin6p->sin6_family == AF_INET6 539 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) 540 return (EAFNOSUPPORT); 541 542 inp = sotoinpcb(so); 543 KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL")); 544 INP_WLOCK(inp); 545 if (inp->inp_flags & INP_TIMEWAIT) { 546 error = EADDRINUSE; 547 goto out; 548 } 549 if (inp->inp_flags & INP_DROPPED) { 550 error = ECONNREFUSED; 551 goto out; 552 } 553 tp = intotcpcb(inp); 554 TCPDEBUG1(); 555 #ifdef INET 556 /* 557 * XXXRW: Some confusion: V4/V6 flags relate to binding, and 558 * therefore probably require the hash lock, which isn't held here. 559 * Is this a significant problem? 560 */ 561 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 562 struct sockaddr_in sin; 563 564 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { 565 error = EINVAL; 566 goto out; 567 } 568 569 in6_sin6_2_sin(&sin, sin6p); 570 inp->inp_vflag |= INP_IPV4; 571 inp->inp_vflag &= ~INP_IPV6; 572 if ((error = prison_remote_ip4(td->td_ucred, 573 &sin.sin_addr)) != 0) 574 goto out; 575 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0) 576 goto out; 577 #ifdef TCP_OFFLOAD 578 if (registered_toedevs > 0 && 579 (so->so_options & SO_NO_OFFLOAD) == 0 && 580 (error = tcp_offload_connect(so, nam)) == 0) 581 goto out; 582 #endif 583 error = tp->t_fb->tfb_tcp_output(tp); 584 goto out; 585 } 586 #endif 587 inp->inp_vflag &= ~INP_IPV4; 588 inp->inp_vflag |= INP_IPV6; 589 inp->inp_inc.inc_flags |= INC_ISIPV6; 590 if ((error = prison_remote_ip6(td->td_ucred, &sin6p->sin6_addr)) != 0) 591 goto out; 592 if ((error = tcp6_connect(tp, nam, td)) != 0) 593 goto out; 594 #ifdef TCP_OFFLOAD 595 if (registered_toedevs > 0 && 596 (so->so_options & SO_NO_OFFLOAD) == 0 && 597 (error = tcp_offload_connect(so, nam)) == 0) 598 goto out; 599 #endif 600 tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); 601 error = tp->t_fb->tfb_tcp_output(tp); 602 603 out: 604 TCPDEBUG2(PRU_CONNECT); 605 TCP_PROBE2(debug__user, tp, PRU_CONNECT); 606 INP_WUNLOCK(inp); 607 return (error); 608 } 609 #endif /* INET6 */ 610 611 /* 612 * Initiate disconnect from peer. 613 * If connection never passed embryonic stage, just drop; 614 * else if don't need to let data drain, then can just drop anyways, 615 * else have to begin TCP shutdown process: mark socket disconnecting, 616 * drain unread data, state switch to reflect user close, and 617 * send segment (e.g. FIN) to peer. Socket will be really disconnected 618 * when peer sends FIN and acks ours. 619 * 620 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 621 */ 622 static int 623 tcp_usr_disconnect(struct socket *so) 624 { 625 struct inpcb *inp; 626 struct tcpcb *tp = NULL; 627 int error = 0; 628 629 TCPDEBUG0; 630 INP_INFO_RLOCK(&V_tcbinfo); 631 inp = sotoinpcb(so); 632 KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL")); 633 INP_WLOCK(inp); 634 if (inp->inp_flags & INP_TIMEWAIT) 635 goto out; 636 if (inp->inp_flags & INP_DROPPED) { 637 error = ECONNRESET; 638 goto out; 639 } 640 tp = intotcpcb(inp); 641 TCPDEBUG1(); 642 tcp_disconnect(tp); 643 out: 644 TCPDEBUG2(PRU_DISCONNECT); 645 TCP_PROBE2(debug__user, tp, PRU_DISCONNECT); 646 INP_WUNLOCK(inp); 647 INP_INFO_RUNLOCK(&V_tcbinfo); 648 return (error); 649 } 650 651 #ifdef INET 652 /* 653 * Accept a connection. Essentially all the work is done at higher levels; 654 * just return the address of the peer, storing through addr. 655 */ 656 static int 657 tcp_usr_accept(struct socket *so, struct sockaddr **nam) 658 { 659 int error = 0; 660 struct inpcb *inp = NULL; 661 struct tcpcb *tp = NULL; 662 struct in_addr addr; 663 in_port_t port = 0; 664 TCPDEBUG0; 665 666 if (so->so_state & SS_ISDISCONNECTED) 667 return (ECONNABORTED); 668 669 inp = sotoinpcb(so); 670 KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL")); 671 INP_WLOCK(inp); 672 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 673 error = ECONNABORTED; 674 goto out; 675 } 676 tp = intotcpcb(inp); 677 TCPDEBUG1(); 678 679 /* 680 * We inline in_getpeeraddr and COMMON_END here, so that we can 681 * copy the data of interest and defer the malloc until after we 682 * release the lock. 683 */ 684 port = inp->inp_fport; 685 addr = inp->inp_faddr; 686 687 out: 688 TCPDEBUG2(PRU_ACCEPT); 689 TCP_PROBE2(debug__user, tp, PRU_ACCEPT); 690 INP_WUNLOCK(inp); 691 if (error == 0) 692 *nam = in_sockaddr(port, &addr); 693 return error; 694 } 695 #endif /* INET */ 696 697 #ifdef INET6 698 static int 699 tcp6_usr_accept(struct socket *so, struct sockaddr **nam) 700 { 701 struct inpcb *inp = NULL; 702 int error = 0; 703 struct tcpcb *tp = NULL; 704 struct in_addr addr; 705 struct in6_addr addr6; 706 in_port_t port = 0; 707 int v4 = 0; 708 TCPDEBUG0; 709 710 if (so->so_state & SS_ISDISCONNECTED) 711 return (ECONNABORTED); 712 713 inp = sotoinpcb(so); 714 KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL")); 715 INP_INFO_RLOCK(&V_tcbinfo); 716 INP_WLOCK(inp); 717 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 718 error = ECONNABORTED; 719 goto out; 720 } 721 tp = intotcpcb(inp); 722 TCPDEBUG1(); 723 724 /* 725 * We inline in6_mapped_peeraddr and COMMON_END here, so that we can 726 * copy the data of interest and defer the malloc until after we 727 * release the lock. 728 */ 729 if (inp->inp_vflag & INP_IPV4) { 730 v4 = 1; 731 port = inp->inp_fport; 732 addr = inp->inp_faddr; 733 } else { 734 port = inp->inp_fport; 735 addr6 = inp->in6p_faddr; 736 } 737 738 out: 739 TCPDEBUG2(PRU_ACCEPT); 740 TCP_PROBE2(debug__user, tp, PRU_ACCEPT); 741 INP_WUNLOCK(inp); 742 INP_INFO_RUNLOCK(&V_tcbinfo); 743 if (error == 0) { 744 if (v4) 745 *nam = in6_v4mapsin6_sockaddr(port, &addr); 746 else 747 *nam = in6_sockaddr(port, &addr6); 748 } 749 return error; 750 } 751 #endif /* INET6 */ 752 753 /* 754 * Mark the connection as being incapable of further output. 755 */ 756 static int 757 tcp_usr_shutdown(struct socket *so) 758 { 759 int error = 0; 760 struct inpcb *inp; 761 struct tcpcb *tp = NULL; 762 763 TCPDEBUG0; 764 INP_INFO_RLOCK(&V_tcbinfo); 765 inp = sotoinpcb(so); 766 KASSERT(inp != NULL, ("inp == NULL")); 767 INP_WLOCK(inp); 768 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 769 error = ECONNRESET; 770 goto out; 771 } 772 tp = intotcpcb(inp); 773 TCPDEBUG1(); 774 socantsendmore(so); 775 tcp_usrclosed(tp); 776 if (!(inp->inp_flags & INP_DROPPED)) 777 error = tp->t_fb->tfb_tcp_output(tp); 778 779 out: 780 TCPDEBUG2(PRU_SHUTDOWN); 781 TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN); 782 INP_WUNLOCK(inp); 783 INP_INFO_RUNLOCK(&V_tcbinfo); 784 785 return (error); 786 } 787 788 /* 789 * After a receive, possibly send window update to peer. 790 */ 791 static int 792 tcp_usr_rcvd(struct socket *so, int flags) 793 { 794 struct inpcb *inp; 795 struct tcpcb *tp = NULL; 796 int error = 0; 797 798 TCPDEBUG0; 799 inp = sotoinpcb(so); 800 KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL")); 801 INP_WLOCK(inp); 802 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 803 error = ECONNRESET; 804 goto out; 805 } 806 tp = intotcpcb(inp); 807 TCPDEBUG1(); 808 #ifdef TCP_OFFLOAD 809 if (tp->t_flags & TF_TOE) 810 tcp_offload_rcvd(tp); 811 else 812 #endif 813 tp->t_fb->tfb_tcp_output(tp); 814 815 out: 816 TCPDEBUG2(PRU_RCVD); 817 TCP_PROBE2(debug__user, tp, PRU_RCVD); 818 INP_WUNLOCK(inp); 819 return (error); 820 } 821 822 /* 823 * Do a send by putting data in output queue and updating urgent 824 * marker if URG set. Possibly send more data. Unlike the other 825 * pru_*() routines, the mbuf chains are our responsibility. We 826 * must either enqueue them or free them. The other pru_* routines 827 * generally are caller-frees. 828 */ 829 static int 830 tcp_usr_send(struct socket *so, int flags, struct mbuf *m, 831 struct sockaddr *nam, struct mbuf *control, struct thread *td) 832 { 833 int error = 0; 834 struct inpcb *inp; 835 struct tcpcb *tp = NULL; 836 #ifdef INET6 837 int isipv6; 838 #endif 839 TCPDEBUG0; 840 841 /* 842 * We require the pcbinfo lock if we will close the socket as part of 843 * this call. 844 */ 845 if (flags & PRUS_EOF) 846 INP_INFO_RLOCK(&V_tcbinfo); 847 inp = sotoinpcb(so); 848 KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL")); 849 INP_WLOCK(inp); 850 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 851 if (control) 852 m_freem(control); 853 /* 854 * In case of PRUS_NOTREADY, tcp_usr_ready() is responsible 855 * for freeing memory. 856 */ 857 if (m && (flags & PRUS_NOTREADY) == 0) 858 m_freem(m); 859 error = ECONNRESET; 860 goto out; 861 } 862 #ifdef INET6 863 isipv6 = nam && nam->sa_family == AF_INET6; 864 #endif /* INET6 */ 865 tp = intotcpcb(inp); 866 TCPDEBUG1(); 867 if (control) { 868 /* TCP doesn't do control messages (rights, creds, etc) */ 869 if (control->m_len) { 870 m_freem(control); 871 if (m) 872 m_freem(m); 873 error = EINVAL; 874 goto out; 875 } 876 m_freem(control); /* empty control, just free it */ 877 } 878 if (!(flags & PRUS_OOB)) { 879 sbappendstream(&so->so_snd, m, flags); 880 if (nam && tp->t_state < TCPS_SYN_SENT) { 881 /* 882 * Do implied connect if not yet connected, 883 * initialize window to default value, and 884 * initialize maxseg/maxopd using peer's cached 885 * MSS. 886 */ 887 #ifdef INET6 888 if (isipv6) 889 error = tcp6_connect(tp, nam, td); 890 #endif /* INET6 */ 891 #if defined(INET6) && defined(INET) 892 else 893 #endif 894 #ifdef INET 895 error = tcp_connect(tp, nam, td); 896 #endif 897 if (error) 898 goto out; 899 tp->snd_wnd = TTCP_CLIENT_SND_WND; 900 tcp_mss(tp, -1); 901 } 902 if (flags & PRUS_EOF) { 903 /* 904 * Close the send side of the connection after 905 * the data is sent. 906 */ 907 INP_INFO_RLOCK_ASSERT(&V_tcbinfo); 908 socantsendmore(so); 909 tcp_usrclosed(tp); 910 } 911 if (!(inp->inp_flags & INP_DROPPED) && 912 !(flags & PRUS_NOTREADY)) { 913 if (flags & PRUS_MORETOCOME) 914 tp->t_flags |= TF_MORETOCOME; 915 error = tp->t_fb->tfb_tcp_output(tp); 916 if (flags & PRUS_MORETOCOME) 917 tp->t_flags &= ~TF_MORETOCOME; 918 } 919 } else { 920 /* 921 * XXXRW: PRUS_EOF not implemented with PRUS_OOB? 922 */ 923 SOCKBUF_LOCK(&so->so_snd); 924 if (sbspace(&so->so_snd) < -512) { 925 SOCKBUF_UNLOCK(&so->so_snd); 926 m_freem(m); 927 error = ENOBUFS; 928 goto out; 929 } 930 /* 931 * According to RFC961 (Assigned Protocols), 932 * the urgent pointer points to the last octet 933 * of urgent data. We continue, however, 934 * to consider it to indicate the first octet 935 * of data past the urgent section. 936 * Otherwise, snd_up should be one lower. 937 */ 938 sbappendstream_locked(&so->so_snd, m, flags); 939 SOCKBUF_UNLOCK(&so->so_snd); 940 if (nam && tp->t_state < TCPS_SYN_SENT) { 941 /* 942 * Do implied connect if not yet connected, 943 * initialize window to default value, and 944 * initialize maxseg/maxopd using peer's cached 945 * MSS. 946 */ 947 #ifdef INET6 948 if (isipv6) 949 error = tcp6_connect(tp, nam, td); 950 #endif /* INET6 */ 951 #if defined(INET6) && defined(INET) 952 else 953 #endif 954 #ifdef INET 955 error = tcp_connect(tp, nam, td); 956 #endif 957 if (error) 958 goto out; 959 tp->snd_wnd = TTCP_CLIENT_SND_WND; 960 tcp_mss(tp, -1); 961 } 962 tp->snd_up = tp->snd_una + sbavail(&so->so_snd); 963 if (!(flags & PRUS_NOTREADY)) { 964 tp->t_flags |= TF_FORCEDATA; 965 error = tp->t_fb->tfb_tcp_output(tp); 966 tp->t_flags &= ~TF_FORCEDATA; 967 } 968 } 969 out: 970 TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB : 971 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); 972 TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB : 973 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); 974 INP_WUNLOCK(inp); 975 if (flags & PRUS_EOF) 976 INP_INFO_RUNLOCK(&V_tcbinfo); 977 return (error); 978 } 979 980 static int 981 tcp_usr_ready(struct socket *so, struct mbuf *m, int count) 982 { 983 struct inpcb *inp; 984 struct tcpcb *tp; 985 int error; 986 987 inp = sotoinpcb(so); 988 INP_WLOCK(inp); 989 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 990 INP_WUNLOCK(inp); 991 for (int i = 0; i < count; i++) 992 m = m_free(m); 993 return (ECONNRESET); 994 } 995 tp = intotcpcb(inp); 996 997 SOCKBUF_LOCK(&so->so_snd); 998 error = sbready(&so->so_snd, m, count); 999 SOCKBUF_UNLOCK(&so->so_snd); 1000 if (error == 0) 1001 error = tp->t_fb->tfb_tcp_output(tp); 1002 INP_WUNLOCK(inp); 1003 1004 return (error); 1005 } 1006 1007 /* 1008 * Abort the TCP. Drop the connection abruptly. 1009 */ 1010 static void 1011 tcp_usr_abort(struct socket *so) 1012 { 1013 struct inpcb *inp; 1014 struct tcpcb *tp = NULL; 1015 TCPDEBUG0; 1016 1017 inp = sotoinpcb(so); 1018 KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL")); 1019 1020 INP_INFO_RLOCK(&V_tcbinfo); 1021 INP_WLOCK(inp); 1022 KASSERT(inp->inp_socket != NULL, 1023 ("tcp_usr_abort: inp_socket == NULL")); 1024 1025 /* 1026 * If we still have full TCP state, and we're not dropped, drop. 1027 */ 1028 if (!(inp->inp_flags & INP_TIMEWAIT) && 1029 !(inp->inp_flags & INP_DROPPED)) { 1030 tp = intotcpcb(inp); 1031 TCPDEBUG1(); 1032 tcp_drop(tp, ECONNABORTED); 1033 TCPDEBUG2(PRU_ABORT); 1034 TCP_PROBE2(debug__user, tp, PRU_ABORT); 1035 } 1036 if (!(inp->inp_flags & INP_DROPPED)) { 1037 SOCK_LOCK(so); 1038 so->so_state |= SS_PROTOREF; 1039 SOCK_UNLOCK(so); 1040 inp->inp_flags |= INP_SOCKREF; 1041 } 1042 INP_WUNLOCK(inp); 1043 INP_INFO_RUNLOCK(&V_tcbinfo); 1044 } 1045 1046 /* 1047 * TCP socket is closed. Start friendly disconnect. 1048 */ 1049 static void 1050 tcp_usr_close(struct socket *so) 1051 { 1052 struct inpcb *inp; 1053 struct tcpcb *tp = NULL; 1054 TCPDEBUG0; 1055 1056 inp = sotoinpcb(so); 1057 KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL")); 1058 1059 INP_INFO_RLOCK(&V_tcbinfo); 1060 INP_WLOCK(inp); 1061 KASSERT(inp->inp_socket != NULL, 1062 ("tcp_usr_close: inp_socket == NULL")); 1063 1064 /* 1065 * If we still have full TCP state, and we're not dropped, initiate 1066 * a disconnect. 1067 */ 1068 if (!(inp->inp_flags & INP_TIMEWAIT) && 1069 !(inp->inp_flags & INP_DROPPED)) { 1070 tp = intotcpcb(inp); 1071 TCPDEBUG1(); 1072 tcp_disconnect(tp); 1073 TCPDEBUG2(PRU_CLOSE); 1074 TCP_PROBE2(debug__user, tp, PRU_CLOSE); 1075 } 1076 if (!(inp->inp_flags & INP_DROPPED)) { 1077 SOCK_LOCK(so); 1078 so->so_state |= SS_PROTOREF; 1079 SOCK_UNLOCK(so); 1080 inp->inp_flags |= INP_SOCKREF; 1081 } 1082 INP_WUNLOCK(inp); 1083 INP_INFO_RUNLOCK(&V_tcbinfo); 1084 } 1085 1086 /* 1087 * Receive out-of-band data. 1088 */ 1089 static int 1090 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) 1091 { 1092 int error = 0; 1093 struct inpcb *inp; 1094 struct tcpcb *tp = NULL; 1095 1096 TCPDEBUG0; 1097 inp = sotoinpcb(so); 1098 KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL")); 1099 INP_WLOCK(inp); 1100 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 1101 error = ECONNRESET; 1102 goto out; 1103 } 1104 tp = intotcpcb(inp); 1105 TCPDEBUG1(); 1106 if ((so->so_oobmark == 0 && 1107 (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) || 1108 so->so_options & SO_OOBINLINE || 1109 tp->t_oobflags & TCPOOB_HADDATA) { 1110 error = EINVAL; 1111 goto out; 1112 } 1113 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 1114 error = EWOULDBLOCK; 1115 goto out; 1116 } 1117 m->m_len = 1; 1118 *mtod(m, caddr_t) = tp->t_iobc; 1119 if ((flags & MSG_PEEK) == 0) 1120 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 1121 1122 out: 1123 TCPDEBUG2(PRU_RCVOOB); 1124 TCP_PROBE2(debug__user, tp, PRU_RCVOOB); 1125 INP_WUNLOCK(inp); 1126 return (error); 1127 } 1128 1129 #ifdef INET 1130 struct pr_usrreqs tcp_usrreqs = { 1131 .pru_abort = tcp_usr_abort, 1132 .pru_accept = tcp_usr_accept, 1133 .pru_attach = tcp_usr_attach, 1134 .pru_bind = tcp_usr_bind, 1135 .pru_connect = tcp_usr_connect, 1136 .pru_control = in_control, 1137 .pru_detach = tcp_usr_detach, 1138 .pru_disconnect = tcp_usr_disconnect, 1139 .pru_listen = tcp_usr_listen, 1140 .pru_peeraddr = in_getpeeraddr, 1141 .pru_rcvd = tcp_usr_rcvd, 1142 .pru_rcvoob = tcp_usr_rcvoob, 1143 .pru_send = tcp_usr_send, 1144 .pru_ready = tcp_usr_ready, 1145 .pru_shutdown = tcp_usr_shutdown, 1146 .pru_sockaddr = in_getsockaddr, 1147 .pru_sosetlabel = in_pcbsosetlabel, 1148 .pru_close = tcp_usr_close, 1149 }; 1150 #endif /* INET */ 1151 1152 #ifdef INET6 1153 struct pr_usrreqs tcp6_usrreqs = { 1154 .pru_abort = tcp_usr_abort, 1155 .pru_accept = tcp6_usr_accept, 1156 .pru_attach = tcp_usr_attach, 1157 .pru_bind = tcp6_usr_bind, 1158 .pru_connect = tcp6_usr_connect, 1159 .pru_control = in6_control, 1160 .pru_detach = tcp_usr_detach, 1161 .pru_disconnect = tcp_usr_disconnect, 1162 .pru_listen = tcp6_usr_listen, 1163 .pru_peeraddr = in6_mapped_peeraddr, 1164 .pru_rcvd = tcp_usr_rcvd, 1165 .pru_rcvoob = tcp_usr_rcvoob, 1166 .pru_send = tcp_usr_send, 1167 .pru_ready = tcp_usr_ready, 1168 .pru_shutdown = tcp_usr_shutdown, 1169 .pru_sockaddr = in6_mapped_sockaddr, 1170 .pru_sosetlabel = in_pcbsosetlabel, 1171 .pru_close = tcp_usr_close, 1172 }; 1173 #endif /* INET6 */ 1174 1175 #ifdef INET 1176 /* 1177 * Common subroutine to open a TCP connection to remote host specified 1178 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local 1179 * port number if needed. Call in_pcbconnect_setup to do the routing and 1180 * to choose a local host address (interface). If there is an existing 1181 * incarnation of the same connection in TIME-WAIT state and if the remote 1182 * host was sending CC options and if the connection duration was < MSL, then 1183 * truncate the previous TIME-WAIT state and proceed. 1184 * Initialize connection parameters and enter SYN-SENT state. 1185 */ 1186 static int 1187 tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) 1188 { 1189 struct inpcb *inp = tp->t_inpcb, *oinp; 1190 struct socket *so = inp->inp_socket; 1191 struct in_addr laddr; 1192 u_short lport; 1193 int error; 1194 1195 INP_WLOCK_ASSERT(inp); 1196 INP_HASH_WLOCK(&V_tcbinfo); 1197 1198 if (inp->inp_lport == 0) { 1199 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 1200 if (error) 1201 goto out; 1202 } 1203 1204 /* 1205 * Cannot simply call in_pcbconnect, because there might be an 1206 * earlier incarnation of this same connection still in 1207 * TIME_WAIT state, creating an ADDRINUSE error. 1208 */ 1209 laddr = inp->inp_laddr; 1210 lport = inp->inp_lport; 1211 error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport, 1212 &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred); 1213 if (error && oinp == NULL) 1214 goto out; 1215 if (oinp) { 1216 error = EADDRINUSE; 1217 goto out; 1218 } 1219 inp->inp_laddr = laddr; 1220 in_pcbrehash(inp); 1221 INP_HASH_WUNLOCK(&V_tcbinfo); 1222 1223 /* 1224 * Compute window scaling to request: 1225 * Scale to fit into sweet spot. See tcp_syncache.c. 1226 * XXX: This should move to tcp_output(). 1227 */ 1228 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 1229 (TCP_MAXWIN << tp->request_r_scale) < sb_max) 1230 tp->request_r_scale++; 1231 1232 soisconnecting(so); 1233 TCPSTAT_INC(tcps_connattempt); 1234 tcp_state_change(tp, TCPS_SYN_SENT); 1235 tp->iss = tcp_new_isn(tp); 1236 tcp_sendseqinit(tp); 1237 1238 return 0; 1239 1240 out: 1241 INP_HASH_WUNLOCK(&V_tcbinfo); 1242 return (error); 1243 } 1244 #endif /* INET */ 1245 1246 #ifdef INET6 1247 static int 1248 tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) 1249 { 1250 struct inpcb *inp = tp->t_inpcb; 1251 int error; 1252 1253 INP_WLOCK_ASSERT(inp); 1254 INP_HASH_WLOCK(&V_tcbinfo); 1255 1256 if (inp->inp_lport == 0) { 1257 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 1258 if (error) 1259 goto out; 1260 } 1261 error = in6_pcbconnect(inp, nam, td->td_ucred); 1262 if (error != 0) 1263 goto out; 1264 INP_HASH_WUNLOCK(&V_tcbinfo); 1265 1266 /* Compute window scaling to request. */ 1267 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 1268 (TCP_MAXWIN << tp->request_r_scale) < sb_max) 1269 tp->request_r_scale++; 1270 1271 soisconnecting(inp->inp_socket); 1272 TCPSTAT_INC(tcps_connattempt); 1273 tcp_state_change(tp, TCPS_SYN_SENT); 1274 tp->iss = tcp_new_isn(tp); 1275 tcp_sendseqinit(tp); 1276 1277 return 0; 1278 1279 out: 1280 INP_HASH_WUNLOCK(&V_tcbinfo); 1281 return error; 1282 } 1283 #endif /* INET6 */ 1284 1285 /* 1286 * Export TCP internal state information via a struct tcp_info, based on the 1287 * Linux 2.6 API. Not ABI compatible as our constants are mapped differently 1288 * (TCP state machine, etc). We export all information using FreeBSD-native 1289 * constants -- for example, the numeric values for tcpi_state will differ 1290 * from Linux. 1291 */ 1292 static void 1293 tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) 1294 { 1295 1296 INP_WLOCK_ASSERT(tp->t_inpcb); 1297 bzero(ti, sizeof(*ti)); 1298 1299 ti->tcpi_state = tp->t_state; 1300 if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) 1301 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; 1302 if (tp->t_flags & TF_SACK_PERMIT) 1303 ti->tcpi_options |= TCPI_OPT_SACK; 1304 if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { 1305 ti->tcpi_options |= TCPI_OPT_WSCALE; 1306 ti->tcpi_snd_wscale = tp->snd_scale; 1307 ti->tcpi_rcv_wscale = tp->rcv_scale; 1308 } 1309 1310 ti->tcpi_rto = tp->t_rxtcur * tick; 1311 ti->tcpi_last_data_recv = (long)(ticks - (int)tp->t_rcvtime) * tick; 1312 ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT; 1313 ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT; 1314 1315 ti->tcpi_snd_ssthresh = tp->snd_ssthresh; 1316 ti->tcpi_snd_cwnd = tp->snd_cwnd; 1317 1318 /* 1319 * FreeBSD-specific extension fields for tcp_info. 1320 */ 1321 ti->tcpi_rcv_space = tp->rcv_wnd; 1322 ti->tcpi_rcv_nxt = tp->rcv_nxt; 1323 ti->tcpi_snd_wnd = tp->snd_wnd; 1324 ti->tcpi_snd_bwnd = 0; /* Unused, kept for compat. */ 1325 ti->tcpi_snd_nxt = tp->snd_nxt; 1326 ti->tcpi_snd_mss = tp->t_maxseg; 1327 ti->tcpi_rcv_mss = tp->t_maxseg; 1328 if (tp->t_flags & TF_TOE) 1329 ti->tcpi_options |= TCPI_OPT_TOE; 1330 ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack; 1331 ti->tcpi_rcv_ooopack = tp->t_rcvoopack; 1332 ti->tcpi_snd_zerowin = tp->t_sndzerowin; 1333 } 1334 1335 /* 1336 * tcp_ctloutput() must drop the inpcb lock before performing copyin on 1337 * socket option arguments. When it re-acquires the lock after the copy, it 1338 * has to revalidate that the connection is still valid for the socket 1339 * option. 1340 */ 1341 #define INP_WLOCK_RECHECK(inp) do { \ 1342 INP_WLOCK(inp); \ 1343 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { \ 1344 INP_WUNLOCK(inp); \ 1345 return (ECONNRESET); \ 1346 } \ 1347 tp = intotcpcb(inp); \ 1348 } while(0) 1349 1350 int 1351 tcp_ctloutput(struct socket *so, struct sockopt *sopt) 1352 { 1353 int error; 1354 struct inpcb *inp; 1355 struct tcpcb *tp; 1356 struct tcp_function_block *blk; 1357 struct tcp_function_set fsn; 1358 1359 error = 0; 1360 inp = sotoinpcb(so); 1361 KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL")); 1362 INP_WLOCK(inp); 1363 if (sopt->sopt_level != IPPROTO_TCP) { 1364 #ifdef INET6 1365 if (inp->inp_vflag & INP_IPV6PROTO) { 1366 INP_WUNLOCK(inp); 1367 error = ip6_ctloutput(so, sopt); 1368 } 1369 #endif /* INET6 */ 1370 #if defined(INET6) && defined(INET) 1371 else 1372 #endif 1373 #ifdef INET 1374 { 1375 INP_WUNLOCK(inp); 1376 error = ip_ctloutput(so, sopt); 1377 } 1378 #endif 1379 return (error); 1380 } 1381 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 1382 INP_WUNLOCK(inp); 1383 return (ECONNRESET); 1384 } 1385 tp = intotcpcb(inp); 1386 /* 1387 * Protect the TCP option TCP_FUNCTION_BLK so 1388 * that a sub-function can *never* overwrite this. 1389 */ 1390 if ((sopt->sopt_dir == SOPT_SET) && 1391 (sopt->sopt_name == TCP_FUNCTION_BLK)) { 1392 INP_WUNLOCK(inp); 1393 error = sooptcopyin(sopt, &fsn, sizeof fsn, 1394 sizeof fsn); 1395 if (error) 1396 return (error); 1397 INP_WLOCK_RECHECK(inp); 1398 if (tp->t_state != TCPS_CLOSED) { 1399 /* 1400 * The user has advanced the state 1401 * past the initial point, we can't 1402 * switch since we are down the road 1403 * and a new set of functions may 1404 * not be compatibile. 1405 */ 1406 INP_WUNLOCK(inp); 1407 return(EINVAL); 1408 } 1409 blk = find_and_ref_tcp_functions(&fsn); 1410 if (blk == NULL) { 1411 INP_WUNLOCK(inp); 1412 return (ENOENT); 1413 } 1414 if (tp->t_fb != blk) { 1415 if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) { 1416 refcount_release(&blk->tfb_refcnt); 1417 INP_WUNLOCK(inp); 1418 return (ENOENT); 1419 } 1420 /* 1421 * Release the old refcnt, the 1422 * lookup acquires a ref on the 1423 * new one. 1424 */ 1425 if (tp->t_fb->tfb_tcp_fb_fini) 1426 (*tp->t_fb->tfb_tcp_fb_fini)(tp); 1427 refcount_release(&tp->t_fb->tfb_refcnt); 1428 tp->t_fb = blk; 1429 if (tp->t_fb->tfb_tcp_fb_init) { 1430 (*tp->t_fb->tfb_tcp_fb_init)(tp); 1431 } 1432 } 1433 #ifdef TCP_OFFLOAD 1434 if (tp->t_flags & TF_TOE) { 1435 tcp_offload_ctloutput(tp, sopt->sopt_dir, 1436 sopt->sopt_name); 1437 } 1438 #endif 1439 INP_WUNLOCK(inp); 1440 return (error); 1441 } else if ((sopt->sopt_dir == SOPT_GET) && 1442 (sopt->sopt_name == TCP_FUNCTION_BLK)) { 1443 strcpy(fsn.function_set_name, tp->t_fb->tfb_tcp_block_name); 1444 fsn.pcbcnt = tp->t_fb->tfb_refcnt; 1445 INP_WUNLOCK(inp); 1446 error = sooptcopyout(sopt, &fsn, sizeof fsn); 1447 return (error); 1448 } 1449 /* Pass in the INP locked, called must unlock it */ 1450 return (tp->t_fb->tfb_tcp_ctloutput(so, sopt, inp, tp)); 1451 } 1452 1453 int 1454 tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp) 1455 { 1456 int error, opt, optval; 1457 u_int ui; 1458 struct tcp_info ti; 1459 struct cc_algo *algo; 1460 char buf[TCP_CA_NAME_MAX]; 1461 1462 switch (sopt->sopt_dir) { 1463 case SOPT_SET: 1464 switch (sopt->sopt_name) { 1465 #ifdef TCP_SIGNATURE 1466 case TCP_MD5SIG: 1467 INP_WUNLOCK(inp); 1468 error = sooptcopyin(sopt, &optval, sizeof optval, 1469 sizeof optval); 1470 if (error) 1471 return (error); 1472 1473 INP_WLOCK_RECHECK(inp); 1474 if (optval > 0) 1475 tp->t_flags |= TF_SIGNATURE; 1476 else 1477 tp->t_flags &= ~TF_SIGNATURE; 1478 goto unlock_and_done; 1479 #endif /* TCP_SIGNATURE */ 1480 1481 case TCP_NODELAY: 1482 case TCP_NOOPT: 1483 INP_WUNLOCK(inp); 1484 error = sooptcopyin(sopt, &optval, sizeof optval, 1485 sizeof optval); 1486 if (error) 1487 return (error); 1488 1489 INP_WLOCK_RECHECK(inp); 1490 switch (sopt->sopt_name) { 1491 case TCP_NODELAY: 1492 opt = TF_NODELAY; 1493 break; 1494 case TCP_NOOPT: 1495 opt = TF_NOOPT; 1496 break; 1497 default: 1498 opt = 0; /* dead code to fool gcc */ 1499 break; 1500 } 1501 1502 if (optval) 1503 tp->t_flags |= opt; 1504 else 1505 tp->t_flags &= ~opt; 1506 unlock_and_done: 1507 #ifdef TCP_OFFLOAD 1508 if (tp->t_flags & TF_TOE) { 1509 tcp_offload_ctloutput(tp, sopt->sopt_dir, 1510 sopt->sopt_name); 1511 } 1512 #endif 1513 INP_WUNLOCK(inp); 1514 break; 1515 1516 case TCP_NOPUSH: 1517 INP_WUNLOCK(inp); 1518 error = sooptcopyin(sopt, &optval, sizeof optval, 1519 sizeof optval); 1520 if (error) 1521 return (error); 1522 1523 INP_WLOCK_RECHECK(inp); 1524 if (optval) 1525 tp->t_flags |= TF_NOPUSH; 1526 else if (tp->t_flags & TF_NOPUSH) { 1527 tp->t_flags &= ~TF_NOPUSH; 1528 if (TCPS_HAVEESTABLISHED(tp->t_state)) 1529 error = tp->t_fb->tfb_tcp_output(tp); 1530 } 1531 goto unlock_and_done; 1532 1533 case TCP_MAXSEG: 1534 INP_WUNLOCK(inp); 1535 error = sooptcopyin(sopt, &optval, sizeof optval, 1536 sizeof optval); 1537 if (error) 1538 return (error); 1539 1540 INP_WLOCK_RECHECK(inp); 1541 if (optval > 0 && optval <= tp->t_maxseg && 1542 optval + 40 >= V_tcp_minmss) 1543 tp->t_maxseg = optval; 1544 else 1545 error = EINVAL; 1546 goto unlock_and_done; 1547 1548 case TCP_INFO: 1549 INP_WUNLOCK(inp); 1550 error = EINVAL; 1551 break; 1552 1553 case TCP_CONGESTION: 1554 INP_WUNLOCK(inp); 1555 bzero(buf, sizeof(buf)); 1556 error = sooptcopyin(sopt, &buf, sizeof(buf), 1); 1557 if (error) 1558 break; 1559 INP_WLOCK_RECHECK(inp); 1560 /* 1561 * Return EINVAL if we can't find the requested cc algo. 1562 */ 1563 error = EINVAL; 1564 CC_LIST_RLOCK(); 1565 STAILQ_FOREACH(algo, &cc_list, entries) { 1566 if (strncmp(buf, algo->name, TCP_CA_NAME_MAX) 1567 == 0) { 1568 /* We've found the requested algo. */ 1569 error = 0; 1570 /* 1571 * We hold a write lock over the tcb 1572 * so it's safe to do these things 1573 * without ordering concerns. 1574 */ 1575 if (CC_ALGO(tp)->cb_destroy != NULL) 1576 CC_ALGO(tp)->cb_destroy(tp->ccv); 1577 CC_ALGO(tp) = algo; 1578 /* 1579 * If something goes pear shaped 1580 * initialising the new algo, 1581 * fall back to newreno (which 1582 * does not require initialisation). 1583 */ 1584 if (algo->cb_init != NULL) 1585 if (algo->cb_init(tp->ccv) > 0) { 1586 CC_ALGO(tp) = &newreno_cc_algo; 1587 /* 1588 * The only reason init 1589 * should fail is 1590 * because of malloc. 1591 */ 1592 error = ENOMEM; 1593 } 1594 break; /* Break the STAILQ_FOREACH. */ 1595 } 1596 } 1597 CC_LIST_RUNLOCK(); 1598 goto unlock_and_done; 1599 1600 case TCP_KEEPIDLE: 1601 case TCP_KEEPINTVL: 1602 case TCP_KEEPINIT: 1603 INP_WUNLOCK(inp); 1604 error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui)); 1605 if (error) 1606 return (error); 1607 1608 if (ui > (UINT_MAX / hz)) { 1609 error = EINVAL; 1610 break; 1611 } 1612 ui *= hz; 1613 1614 INP_WLOCK_RECHECK(inp); 1615 switch (sopt->sopt_name) { 1616 case TCP_KEEPIDLE: 1617 tp->t_keepidle = ui; 1618 /* 1619 * XXX: better check current remaining 1620 * timeout and "merge" it with new value. 1621 */ 1622 if ((tp->t_state > TCPS_LISTEN) && 1623 (tp->t_state <= TCPS_CLOSING)) 1624 tcp_timer_activate(tp, TT_KEEP, 1625 TP_KEEPIDLE(tp)); 1626 break; 1627 case TCP_KEEPINTVL: 1628 tp->t_keepintvl = ui; 1629 if ((tp->t_state == TCPS_FIN_WAIT_2) && 1630 (TP_MAXIDLE(tp) > 0)) 1631 tcp_timer_activate(tp, TT_2MSL, 1632 TP_MAXIDLE(tp)); 1633 break; 1634 case TCP_KEEPINIT: 1635 tp->t_keepinit = ui; 1636 if (tp->t_state == TCPS_SYN_RECEIVED || 1637 tp->t_state == TCPS_SYN_SENT) 1638 tcp_timer_activate(tp, TT_KEEP, 1639 TP_KEEPINIT(tp)); 1640 break; 1641 } 1642 goto unlock_and_done; 1643 1644 case TCP_KEEPCNT: 1645 INP_WUNLOCK(inp); 1646 error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui)); 1647 if (error) 1648 return (error); 1649 1650 INP_WLOCK_RECHECK(inp); 1651 tp->t_keepcnt = ui; 1652 if ((tp->t_state == TCPS_FIN_WAIT_2) && 1653 (TP_MAXIDLE(tp) > 0)) 1654 tcp_timer_activate(tp, TT_2MSL, 1655 TP_MAXIDLE(tp)); 1656 goto unlock_and_done; 1657 1658 #ifdef TCPPCAP 1659 case TCP_PCAP_OUT: 1660 case TCP_PCAP_IN: 1661 INP_WUNLOCK(inp); 1662 error = sooptcopyin(sopt, &optval, sizeof optval, 1663 sizeof optval); 1664 if (error) 1665 return (error); 1666 1667 INP_WLOCK_RECHECK(inp); 1668 if (optval >= 0) 1669 tcp_pcap_set_sock_max(TCP_PCAP_OUT ? 1670 &(tp->t_outpkts) : &(tp->t_inpkts), 1671 optval); 1672 else 1673 error = EINVAL; 1674 goto unlock_and_done; 1675 #endif 1676 1677 default: 1678 INP_WUNLOCK(inp); 1679 error = ENOPROTOOPT; 1680 break; 1681 } 1682 break; 1683 1684 case SOPT_GET: 1685 tp = intotcpcb(inp); 1686 switch (sopt->sopt_name) { 1687 #ifdef TCP_SIGNATURE 1688 case TCP_MD5SIG: 1689 optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 1690 INP_WUNLOCK(inp); 1691 error = sooptcopyout(sopt, &optval, sizeof optval); 1692 break; 1693 #endif 1694 1695 case TCP_NODELAY: 1696 optval = tp->t_flags & TF_NODELAY; 1697 INP_WUNLOCK(inp); 1698 error = sooptcopyout(sopt, &optval, sizeof optval); 1699 break; 1700 case TCP_MAXSEG: 1701 optval = tp->t_maxseg; 1702 INP_WUNLOCK(inp); 1703 error = sooptcopyout(sopt, &optval, sizeof optval); 1704 break; 1705 case TCP_NOOPT: 1706 optval = tp->t_flags & TF_NOOPT; 1707 INP_WUNLOCK(inp); 1708 error = sooptcopyout(sopt, &optval, sizeof optval); 1709 break; 1710 case TCP_NOPUSH: 1711 optval = tp->t_flags & TF_NOPUSH; 1712 INP_WUNLOCK(inp); 1713 error = sooptcopyout(sopt, &optval, sizeof optval); 1714 break; 1715 case TCP_INFO: 1716 tcp_fill_info(tp, &ti); 1717 INP_WUNLOCK(inp); 1718 error = sooptcopyout(sopt, &ti, sizeof ti); 1719 break; 1720 case TCP_CONGESTION: 1721 bzero(buf, sizeof(buf)); 1722 strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX); 1723 INP_WUNLOCK(inp); 1724 error = sooptcopyout(sopt, buf, TCP_CA_NAME_MAX); 1725 break; 1726 case TCP_KEEPIDLE: 1727 case TCP_KEEPINTVL: 1728 case TCP_KEEPINIT: 1729 case TCP_KEEPCNT: 1730 switch (sopt->sopt_name) { 1731 case TCP_KEEPIDLE: 1732 ui = tp->t_keepidle / hz; 1733 break; 1734 case TCP_KEEPINTVL: 1735 ui = tp->t_keepintvl / hz; 1736 break; 1737 case TCP_KEEPINIT: 1738 ui = tp->t_keepinit / hz; 1739 break; 1740 case TCP_KEEPCNT: 1741 ui = tp->t_keepcnt; 1742 break; 1743 } 1744 INP_WUNLOCK(inp); 1745 error = sooptcopyout(sopt, &ui, sizeof(ui)); 1746 break; 1747 #ifdef TCPPCAP 1748 case TCP_PCAP_OUT: 1749 case TCP_PCAP_IN: 1750 optval = tcp_pcap_get_sock_max(TCP_PCAP_OUT ? 1751 &(tp->t_outpkts) : &(tp->t_inpkts)); 1752 INP_WUNLOCK(inp); 1753 error = sooptcopyout(sopt, &optval, sizeof optval); 1754 break; 1755 #endif 1756 default: 1757 INP_WUNLOCK(inp); 1758 error = ENOPROTOOPT; 1759 break; 1760 } 1761 break; 1762 } 1763 return (error); 1764 } 1765 #undef INP_WLOCK_RECHECK 1766 1767 /* 1768 * Attach TCP protocol to socket, allocating 1769 * internet protocol control block, tcp control block, 1770 * bufer space, and entering LISTEN state if to accept connections. 1771 */ 1772 static int 1773 tcp_attach(struct socket *so) 1774 { 1775 struct tcpcb *tp; 1776 struct inpcb *inp; 1777 int error; 1778 1779 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 1780 error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace); 1781 if (error) 1782 return (error); 1783 } 1784 so->so_rcv.sb_flags |= SB_AUTOSIZE; 1785 so->so_snd.sb_flags |= SB_AUTOSIZE; 1786 INP_INFO_RLOCK(&V_tcbinfo); 1787 error = in_pcballoc(so, &V_tcbinfo); 1788 if (error) { 1789 INP_INFO_RUNLOCK(&V_tcbinfo); 1790 return (error); 1791 } 1792 inp = sotoinpcb(so); 1793 #ifdef INET6 1794 if (inp->inp_vflag & INP_IPV6PROTO) { 1795 inp->inp_vflag |= INP_IPV6; 1796 inp->in6p_hops = -1; /* use kernel default */ 1797 } 1798 else 1799 #endif 1800 inp->inp_vflag |= INP_IPV4; 1801 tp = tcp_newtcpcb(inp); 1802 if (tp == NULL) { 1803 in_pcbdetach(inp); 1804 in_pcbfree(inp); 1805 INP_INFO_RUNLOCK(&V_tcbinfo); 1806 return (ENOBUFS); 1807 } 1808 tp->t_state = TCPS_CLOSED; 1809 INP_WUNLOCK(inp); 1810 INP_INFO_RUNLOCK(&V_tcbinfo); 1811 return (0); 1812 } 1813 1814 /* 1815 * Initiate (or continue) disconnect. 1816 * If embryonic state, just send reset (once). 1817 * If in ``let data drain'' option and linger null, just drop. 1818 * Otherwise (hard), mark socket disconnecting and drop 1819 * current input data; switch states based on user close, and 1820 * send segment to peer (with FIN). 1821 */ 1822 static void 1823 tcp_disconnect(struct tcpcb *tp) 1824 { 1825 struct inpcb *inp = tp->t_inpcb; 1826 struct socket *so = inp->inp_socket; 1827 1828 INP_INFO_RLOCK_ASSERT(&V_tcbinfo); 1829 INP_WLOCK_ASSERT(inp); 1830 1831 /* 1832 * Neither tcp_close() nor tcp_drop() should return NULL, as the 1833 * socket is still open. 1834 */ 1835 if (tp->t_state < TCPS_ESTABLISHED) { 1836 tp = tcp_close(tp); 1837 KASSERT(tp != NULL, 1838 ("tcp_disconnect: tcp_close() returned NULL")); 1839 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) { 1840 tp = tcp_drop(tp, 0); 1841 KASSERT(tp != NULL, 1842 ("tcp_disconnect: tcp_drop() returned NULL")); 1843 } else { 1844 soisdisconnecting(so); 1845 sbflush(&so->so_rcv); 1846 tcp_usrclosed(tp); 1847 if (!(inp->inp_flags & INP_DROPPED)) 1848 tp->t_fb->tfb_tcp_output(tp); 1849 } 1850 } 1851 1852 /* 1853 * User issued close, and wish to trail through shutdown states: 1854 * if never received SYN, just forget it. If got a SYN from peer, 1855 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1856 * If already got a FIN from peer, then almost done; go to LAST_ACK 1857 * state. In all other cases, have already sent FIN to peer (e.g. 1858 * after PRU_SHUTDOWN), and just have to play tedious game waiting 1859 * for peer to send FIN or not respond to keep-alives, etc. 1860 * We can let the user exit from the close as soon as the FIN is acked. 1861 */ 1862 static void 1863 tcp_usrclosed(struct tcpcb *tp) 1864 { 1865 1866 INP_INFO_RLOCK_ASSERT(&V_tcbinfo); 1867 INP_WLOCK_ASSERT(tp->t_inpcb); 1868 1869 switch (tp->t_state) { 1870 case TCPS_LISTEN: 1871 #ifdef TCP_OFFLOAD 1872 tcp_offload_listen_stop(tp); 1873 #endif 1874 tcp_state_change(tp, TCPS_CLOSED); 1875 /* FALLTHROUGH */ 1876 case TCPS_CLOSED: 1877 tp = tcp_close(tp); 1878 /* 1879 * tcp_close() should never return NULL here as the socket is 1880 * still open. 1881 */ 1882 KASSERT(tp != NULL, 1883 ("tcp_usrclosed: tcp_close() returned NULL")); 1884 break; 1885 1886 case TCPS_SYN_SENT: 1887 case TCPS_SYN_RECEIVED: 1888 tp->t_flags |= TF_NEEDFIN; 1889 break; 1890 1891 case TCPS_ESTABLISHED: 1892 tcp_state_change(tp, TCPS_FIN_WAIT_1); 1893 break; 1894 1895 case TCPS_CLOSE_WAIT: 1896 tcp_state_change(tp, TCPS_LAST_ACK); 1897 break; 1898 } 1899 if (tp->t_state >= TCPS_FIN_WAIT_2) { 1900 soisdisconnected(tp->t_inpcb->inp_socket); 1901 /* Prevent the connection hanging in FIN_WAIT_2 forever. */ 1902 if (tp->t_state == TCPS_FIN_WAIT_2) { 1903 int timeout; 1904 1905 timeout = (tcp_fast_finwait2_recycle) ? 1906 tcp_finwait2_timeout : TP_MAXIDLE(tp); 1907 tcp_timer_activate(tp, TT_2MSL, timeout); 1908 } 1909 } 1910 } 1911 1912 #ifdef DDB 1913 static void 1914 db_print_indent(int indent) 1915 { 1916 int i; 1917 1918 for (i = 0; i < indent; i++) 1919 db_printf(" "); 1920 } 1921 1922 static void 1923 db_print_tstate(int t_state) 1924 { 1925 1926 switch (t_state) { 1927 case TCPS_CLOSED: 1928 db_printf("TCPS_CLOSED"); 1929 return; 1930 1931 case TCPS_LISTEN: 1932 db_printf("TCPS_LISTEN"); 1933 return; 1934 1935 case TCPS_SYN_SENT: 1936 db_printf("TCPS_SYN_SENT"); 1937 return; 1938 1939 case TCPS_SYN_RECEIVED: 1940 db_printf("TCPS_SYN_RECEIVED"); 1941 return; 1942 1943 case TCPS_ESTABLISHED: 1944 db_printf("TCPS_ESTABLISHED"); 1945 return; 1946 1947 case TCPS_CLOSE_WAIT: 1948 db_printf("TCPS_CLOSE_WAIT"); 1949 return; 1950 1951 case TCPS_FIN_WAIT_1: 1952 db_printf("TCPS_FIN_WAIT_1"); 1953 return; 1954 1955 case TCPS_CLOSING: 1956 db_printf("TCPS_CLOSING"); 1957 return; 1958 1959 case TCPS_LAST_ACK: 1960 db_printf("TCPS_LAST_ACK"); 1961 return; 1962 1963 case TCPS_FIN_WAIT_2: 1964 db_printf("TCPS_FIN_WAIT_2"); 1965 return; 1966 1967 case TCPS_TIME_WAIT: 1968 db_printf("TCPS_TIME_WAIT"); 1969 return; 1970 1971 default: 1972 db_printf("unknown"); 1973 return; 1974 } 1975 } 1976 1977 static void 1978 db_print_tflags(u_int t_flags) 1979 { 1980 int comma; 1981 1982 comma = 0; 1983 if (t_flags & TF_ACKNOW) { 1984 db_printf("%sTF_ACKNOW", comma ? ", " : ""); 1985 comma = 1; 1986 } 1987 if (t_flags & TF_DELACK) { 1988 db_printf("%sTF_DELACK", comma ? ", " : ""); 1989 comma = 1; 1990 } 1991 if (t_flags & TF_NODELAY) { 1992 db_printf("%sTF_NODELAY", comma ? ", " : ""); 1993 comma = 1; 1994 } 1995 if (t_flags & TF_NOOPT) { 1996 db_printf("%sTF_NOOPT", comma ? ", " : ""); 1997 comma = 1; 1998 } 1999 if (t_flags & TF_SENTFIN) { 2000 db_printf("%sTF_SENTFIN", comma ? ", " : ""); 2001 comma = 1; 2002 } 2003 if (t_flags & TF_REQ_SCALE) { 2004 db_printf("%sTF_REQ_SCALE", comma ? ", " : ""); 2005 comma = 1; 2006 } 2007 if (t_flags & TF_RCVD_SCALE) { 2008 db_printf("%sTF_RECVD_SCALE", comma ? ", " : ""); 2009 comma = 1; 2010 } 2011 if (t_flags & TF_REQ_TSTMP) { 2012 db_printf("%sTF_REQ_TSTMP", comma ? ", " : ""); 2013 comma = 1; 2014 } 2015 if (t_flags & TF_RCVD_TSTMP) { 2016 db_printf("%sTF_RCVD_TSTMP", comma ? ", " : ""); 2017 comma = 1; 2018 } 2019 if (t_flags & TF_SACK_PERMIT) { 2020 db_printf("%sTF_SACK_PERMIT", comma ? ", " : ""); 2021 comma = 1; 2022 } 2023 if (t_flags & TF_NEEDSYN) { 2024 db_printf("%sTF_NEEDSYN", comma ? ", " : ""); 2025 comma = 1; 2026 } 2027 if (t_flags & TF_NEEDFIN) { 2028 db_printf("%sTF_NEEDFIN", comma ? ", " : ""); 2029 comma = 1; 2030 } 2031 if (t_flags & TF_NOPUSH) { 2032 db_printf("%sTF_NOPUSH", comma ? ", " : ""); 2033 comma = 1; 2034 } 2035 if (t_flags & TF_MORETOCOME) { 2036 db_printf("%sTF_MORETOCOME", comma ? ", " : ""); 2037 comma = 1; 2038 } 2039 if (t_flags & TF_LQ_OVERFLOW) { 2040 db_printf("%sTF_LQ_OVERFLOW", comma ? ", " : ""); 2041 comma = 1; 2042 } 2043 if (t_flags & TF_LASTIDLE) { 2044 db_printf("%sTF_LASTIDLE", comma ? ", " : ""); 2045 comma = 1; 2046 } 2047 if (t_flags & TF_RXWIN0SENT) { 2048 db_printf("%sTF_RXWIN0SENT", comma ? ", " : ""); 2049 comma = 1; 2050 } 2051 if (t_flags & TF_FASTRECOVERY) { 2052 db_printf("%sTF_FASTRECOVERY", comma ? ", " : ""); 2053 comma = 1; 2054 } 2055 if (t_flags & TF_CONGRECOVERY) { 2056 db_printf("%sTF_CONGRECOVERY", comma ? ", " : ""); 2057 comma = 1; 2058 } 2059 if (t_flags & TF_WASFRECOVERY) { 2060 db_printf("%sTF_WASFRECOVERY", comma ? ", " : ""); 2061 comma = 1; 2062 } 2063 if (t_flags & TF_SIGNATURE) { 2064 db_printf("%sTF_SIGNATURE", comma ? ", " : ""); 2065 comma = 1; 2066 } 2067 if (t_flags & TF_FORCEDATA) { 2068 db_printf("%sTF_FORCEDATA", comma ? ", " : ""); 2069 comma = 1; 2070 } 2071 if (t_flags & TF_TSO) { 2072 db_printf("%sTF_TSO", comma ? ", " : ""); 2073 comma = 1; 2074 } 2075 if (t_flags & TF_ECN_PERMIT) { 2076 db_printf("%sTF_ECN_PERMIT", comma ? ", " : ""); 2077 comma = 1; 2078 } 2079 } 2080 2081 static void 2082 db_print_toobflags(char t_oobflags) 2083 { 2084 int comma; 2085 2086 comma = 0; 2087 if (t_oobflags & TCPOOB_HAVEDATA) { 2088 db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : ""); 2089 comma = 1; 2090 } 2091 if (t_oobflags & TCPOOB_HADDATA) { 2092 db_printf("%sTCPOOB_HADDATA", comma ? ", " : ""); 2093 comma = 1; 2094 } 2095 } 2096 2097 static void 2098 db_print_tcpcb(struct tcpcb *tp, const char *name, int indent) 2099 { 2100 2101 db_print_indent(indent); 2102 db_printf("%s at %p\n", name, tp); 2103 2104 indent += 2; 2105 2106 db_print_indent(indent); 2107 db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n", 2108 LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks); 2109 2110 db_print_indent(indent); 2111 db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n", 2112 &tp->t_timers->tt_rexmt, &tp->t_timers->tt_persist, &tp->t_timers->tt_keep); 2113 2114 db_print_indent(indent); 2115 db_printf("tt_2msl: %p tt_delack: %p t_inpcb: %p\n", &tp->t_timers->tt_2msl, 2116 &tp->t_timers->tt_delack, tp->t_inpcb); 2117 2118 db_print_indent(indent); 2119 db_printf("t_state: %d (", tp->t_state); 2120 db_print_tstate(tp->t_state); 2121 db_printf(")\n"); 2122 2123 db_print_indent(indent); 2124 db_printf("t_flags: 0x%x (", tp->t_flags); 2125 db_print_tflags(tp->t_flags); 2126 db_printf(")\n"); 2127 2128 db_print_indent(indent); 2129 db_printf("snd_una: 0x%08x snd_max: 0x%08x snd_nxt: x0%08x\n", 2130 tp->snd_una, tp->snd_max, tp->snd_nxt); 2131 2132 db_print_indent(indent); 2133 db_printf("snd_up: 0x%08x snd_wl1: 0x%08x snd_wl2: 0x%08x\n", 2134 tp->snd_up, tp->snd_wl1, tp->snd_wl2); 2135 2136 db_print_indent(indent); 2137 db_printf("iss: 0x%08x irs: 0x%08x rcv_nxt: 0x%08x\n", 2138 tp->iss, tp->irs, tp->rcv_nxt); 2139 2140 db_print_indent(indent); 2141 db_printf("rcv_adv: 0x%08x rcv_wnd: %lu rcv_up: 0x%08x\n", 2142 tp->rcv_adv, tp->rcv_wnd, tp->rcv_up); 2143 2144 db_print_indent(indent); 2145 db_printf("snd_wnd: %lu snd_cwnd: %lu\n", 2146 tp->snd_wnd, tp->snd_cwnd); 2147 2148 db_print_indent(indent); 2149 db_printf("snd_ssthresh: %lu snd_recover: " 2150 "0x%08x\n", tp->snd_ssthresh, tp->snd_recover); 2151 2152 db_print_indent(indent); 2153 db_printf("t_maxopd: %u t_rcvtime: %u t_startime: %u\n", 2154 tp->t_maxopd, tp->t_rcvtime, tp->t_starttime); 2155 2156 db_print_indent(indent); 2157 db_printf("t_rttime: %u t_rtsq: 0x%08x\n", 2158 tp->t_rtttime, tp->t_rtseq); 2159 2160 db_print_indent(indent); 2161 db_printf("t_rxtcur: %d t_maxseg: %u t_srtt: %d\n", 2162 tp->t_rxtcur, tp->t_maxseg, tp->t_srtt); 2163 2164 db_print_indent(indent); 2165 db_printf("t_rttvar: %d t_rxtshift: %d t_rttmin: %u " 2166 "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin, 2167 tp->t_rttbest); 2168 2169 db_print_indent(indent); 2170 db_printf("t_rttupdated: %lu max_sndwnd: %lu t_softerror: %d\n", 2171 tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror); 2172 2173 db_print_indent(indent); 2174 db_printf("t_oobflags: 0x%x (", tp->t_oobflags); 2175 db_print_toobflags(tp->t_oobflags); 2176 db_printf(") t_iobc: 0x%02x\n", tp->t_iobc); 2177 2178 db_print_indent(indent); 2179 db_printf("snd_scale: %u rcv_scale: %u request_r_scale: %u\n", 2180 tp->snd_scale, tp->rcv_scale, tp->request_r_scale); 2181 2182 db_print_indent(indent); 2183 db_printf("ts_recent: %u ts_recent_age: %u\n", 2184 tp->ts_recent, tp->ts_recent_age); 2185 2186 db_print_indent(indent); 2187 db_printf("ts_offset: %u last_ack_sent: 0x%08x snd_cwnd_prev: " 2188 "%lu\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev); 2189 2190 db_print_indent(indent); 2191 db_printf("snd_ssthresh_prev: %lu snd_recover_prev: 0x%08x " 2192 "t_badrxtwin: %u\n", tp->snd_ssthresh_prev, 2193 tp->snd_recover_prev, tp->t_badrxtwin); 2194 2195 db_print_indent(indent); 2196 db_printf("snd_numholes: %d snd_holes first: %p\n", 2197 tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes)); 2198 2199 db_print_indent(indent); 2200 db_printf("snd_fack: 0x%08x rcv_numsacks: %d sack_newdata: " 2201 "0x%08x\n", tp->snd_fack, tp->rcv_numsacks, tp->sack_newdata); 2202 2203 /* Skip sackblks, sackhint. */ 2204 2205 db_print_indent(indent); 2206 db_printf("t_rttlow: %d rfbuf_ts: %u rfbuf_cnt: %d\n", 2207 tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt); 2208 } 2209 2210 DB_SHOW_COMMAND(tcpcb, db_show_tcpcb) 2211 { 2212 struct tcpcb *tp; 2213 2214 if (!have_addr) { 2215 db_printf("usage: show tcpcb <addr>\n"); 2216 return; 2217 } 2218 tp = (struct tcpcb *)addr; 2219 2220 db_print_tcpcb(tp, "tcpcb", 0); 2221 } 2222 #endif 2223