1 /*- 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2006-2007 Robert N. M. Watson 5 * Copyright (c) 2010-2011 Juniper Networks, Inc. 6 * All rights reserved. 7 * 8 * Portions of this software were developed by Robert N. M. Watson under 9 * contract to Juniper Networks, Inc. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 36 */ 37 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 #include "opt_ddb.h" 42 #include "opt_inet.h" 43 #include "opt_inet6.h" 44 #include "opt_tcpdebug.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/limits.h> 49 #include <sys/malloc.h> 50 #include <sys/kernel.h> 51 #include <sys/sysctl.h> 52 #include <sys/mbuf.h> 53 #ifdef INET6 54 #include <sys/domain.h> 55 #endif /* INET6 */ 56 #include <sys/socket.h> 57 #include <sys/socketvar.h> 58 #include <sys/protosw.h> 59 #include <sys/proc.h> 60 #include <sys/jail.h> 61 62 #ifdef DDB 63 #include <ddb/ddb.h> 64 #endif 65 66 #include <net/if.h> 67 #include <net/if_var.h> 68 #include <net/route.h> 69 #include <net/vnet.h> 70 71 #include <netinet/cc.h> 72 #include <netinet/in.h> 73 #include <netinet/in_pcb.h> 74 #include <netinet/in_systm.h> 75 #include <netinet/in_var.h> 76 #include <netinet/ip_var.h> 77 #ifdef INET6 78 #include <netinet/ip6.h> 79 #include <netinet6/in6_pcb.h> 80 #include <netinet6/ip6_var.h> 81 #include <netinet6/scope6_var.h> 82 #endif 83 #include <netinet/tcp_fsm.h> 84 #include <netinet/tcp_seq.h> 85 #include <netinet/tcp_timer.h> 86 #include <netinet/tcp_var.h> 87 #include <netinet/tcpip.h> 88 #ifdef TCPDEBUG 89 #include <netinet/tcp_debug.h> 90 #endif 91 #ifdef TCP_OFFLOAD 92 #include <netinet/tcp_offload.h> 93 #endif 94 95 /* 96 * TCP protocol interface to socket abstraction. 97 */ 98 static int tcp_attach(struct socket *); 99 #ifdef INET 100 static int tcp_connect(struct tcpcb *, struct sockaddr *, 101 struct thread *td); 102 #endif /* INET */ 103 #ifdef INET6 104 static int tcp6_connect(struct tcpcb *, struct sockaddr *, 105 struct thread *td); 106 #endif /* INET6 */ 107 static void tcp_disconnect(struct tcpcb *); 108 static void tcp_usrclosed(struct tcpcb *); 109 static void tcp_fill_info(struct tcpcb *, struct tcp_info *); 110 111 #ifdef TCPDEBUG 112 #define TCPDEBUG0 int ostate = 0 113 #define TCPDEBUG1() ostate = tp ? tp->t_state : 0 114 #define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ 115 tcp_trace(TA_USER, ostate, tp, 0, 0, req) 116 #else 117 #define TCPDEBUG0 118 #define TCPDEBUG1() 119 #define TCPDEBUG2(req) 120 #endif 121 122 /* 123 * TCP attaches to socket via pru_attach(), reserving space, 124 * and an internet control block. 125 */ 126 static int 127 tcp_usr_attach(struct socket *so, int proto, struct thread *td) 128 { 129 struct inpcb *inp; 130 struct tcpcb *tp = NULL; 131 int error; 132 TCPDEBUG0; 133 134 inp = sotoinpcb(so); 135 KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL")); 136 TCPDEBUG1(); 137 138 error = tcp_attach(so); 139 if (error) 140 goto out; 141 142 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 143 so->so_linger = TCP_LINGERTIME; 144 145 inp = sotoinpcb(so); 146 tp = intotcpcb(inp); 147 out: 148 TCPDEBUG2(PRU_ATTACH); 149 return error; 150 } 151 152 /* 153 * tcp_detach is called when the socket layer loses its final reference 154 * to the socket, be it a file descriptor reference, a reference from TCP, 155 * etc. At this point, there is only one case in which we will keep around 156 * inpcb state: time wait. 157 * 158 * This function can probably be re-absorbed back into tcp_usr_detach() now 159 * that there is a single detach path. 160 */ 161 static void 162 tcp_detach(struct socket *so, struct inpcb *inp) 163 { 164 struct tcpcb *tp; 165 166 INP_INFO_LOCK_ASSERT(&V_tcbinfo); 167 INP_WLOCK_ASSERT(inp); 168 169 KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp")); 170 KASSERT(inp->inp_socket == so, ("tcp_detach: inp_socket != so")); 171 172 tp = intotcpcb(inp); 173 174 if (inp->inp_flags & INP_TIMEWAIT) { 175 /* 176 * There are two cases to handle: one in which the time wait 177 * state is being discarded (INP_DROPPED), and one in which 178 * this connection will remain in timewait. In the former, 179 * it is time to discard all state (except tcptw, which has 180 * already been discarded by the timewait close code, which 181 * should be further up the call stack somewhere). In the 182 * latter case, we detach from the socket, but leave the pcb 183 * present until timewait ends. 184 * 185 * XXXRW: Would it be cleaner to free the tcptw here? 186 * 187 * Astute question indeed, from twtcp perspective there are 188 * three cases to consider: 189 * 190 * #1 tcp_detach is called at tcptw creation time by 191 * tcp_twstart, then do not discard the newly created tcptw 192 * and leave inpcb present until timewait ends 193 * #2 tcp_detach is called at timewait end (or reuse) by 194 * tcp_twclose, then the tcptw has already been discarded 195 * (or reused) and inpcb is freed here 196 * #3 tcp_detach is called() after timewait ends (or reuse) 197 * (e.g. by soclose), then tcptw has already been discarded 198 * (or reused) and inpcb is freed here 199 * 200 * In all three cases the tcptw should not be freed here. 201 */ 202 if (inp->inp_flags & INP_DROPPED) { 203 KASSERT(tp == NULL, ("tcp_detach: INP_TIMEWAIT && " 204 "INP_DROPPED && tp != NULL")); 205 in_pcbdetach(inp); 206 in_pcbfree(inp); 207 } else { 208 in_pcbdetach(inp); 209 INP_WUNLOCK(inp); 210 } 211 } else { 212 /* 213 * If the connection is not in timewait, we consider two 214 * two conditions: one in which no further processing is 215 * necessary (dropped || embryonic), and one in which TCP is 216 * not yet done, but no longer requires the socket, so the 217 * pcb will persist for the time being. 218 * 219 * XXXRW: Does the second case still occur? 220 */ 221 if (inp->inp_flags & INP_DROPPED || 222 tp->t_state < TCPS_SYN_SENT) { 223 tcp_discardcb(tp); 224 in_pcbdetach(inp); 225 in_pcbfree(inp); 226 } else { 227 in_pcbdetach(inp); 228 INP_WUNLOCK(inp); 229 } 230 } 231 } 232 233 /* 234 * pru_detach() detaches the TCP protocol from the socket. 235 * If the protocol state is non-embryonic, then can't 236 * do this directly: have to initiate a pru_disconnect(), 237 * which may finish later; embryonic TCB's can just 238 * be discarded here. 239 */ 240 static void 241 tcp_usr_detach(struct socket *so) 242 { 243 struct inpcb *inp; 244 int rlock = 0; 245 246 inp = sotoinpcb(so); 247 KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL")); 248 if (!INP_INFO_WLOCKED(&V_tcbinfo)) { 249 INP_INFO_RLOCK(&V_tcbinfo); 250 rlock = 1; 251 } 252 INP_WLOCK(inp); 253 KASSERT(inp->inp_socket != NULL, 254 ("tcp_usr_detach: inp_socket == NULL")); 255 tcp_detach(so, inp); 256 if (rlock) 257 INP_INFO_RUNLOCK(&V_tcbinfo); 258 } 259 260 #ifdef INET 261 /* 262 * Give the socket an address. 263 */ 264 static int 265 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 266 { 267 int error = 0; 268 struct inpcb *inp; 269 struct tcpcb *tp = NULL; 270 struct sockaddr_in *sinp; 271 272 sinp = (struct sockaddr_in *)nam; 273 if (nam->sa_len != sizeof (*sinp)) 274 return (EINVAL); 275 /* 276 * Must check for multicast addresses and disallow binding 277 * to them. 278 */ 279 if (sinp->sin_family == AF_INET && 280 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) 281 return (EAFNOSUPPORT); 282 283 TCPDEBUG0; 284 inp = sotoinpcb(so); 285 KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL")); 286 INP_WLOCK(inp); 287 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 288 error = EINVAL; 289 goto out; 290 } 291 tp = intotcpcb(inp); 292 TCPDEBUG1(); 293 INP_HASH_WLOCK(&V_tcbinfo); 294 error = in_pcbbind(inp, nam, td->td_ucred); 295 INP_HASH_WUNLOCK(&V_tcbinfo); 296 out: 297 TCPDEBUG2(PRU_BIND); 298 INP_WUNLOCK(inp); 299 300 return (error); 301 } 302 #endif /* INET */ 303 304 #ifdef INET6 305 static int 306 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 307 { 308 int error = 0; 309 struct inpcb *inp; 310 struct tcpcb *tp = NULL; 311 struct sockaddr_in6 *sin6p; 312 313 sin6p = (struct sockaddr_in6 *)nam; 314 if (nam->sa_len != sizeof (*sin6p)) 315 return (EINVAL); 316 /* 317 * Must check for multicast addresses and disallow binding 318 * to them. 319 */ 320 if (sin6p->sin6_family == AF_INET6 && 321 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) 322 return (EAFNOSUPPORT); 323 324 TCPDEBUG0; 325 inp = sotoinpcb(so); 326 KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL")); 327 INP_WLOCK(inp); 328 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 329 error = EINVAL; 330 goto out; 331 } 332 tp = intotcpcb(inp); 333 TCPDEBUG1(); 334 INP_HASH_WLOCK(&V_tcbinfo); 335 inp->inp_vflag &= ~INP_IPV4; 336 inp->inp_vflag |= INP_IPV6; 337 #ifdef INET 338 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { 339 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) 340 inp->inp_vflag |= INP_IPV4; 341 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 342 struct sockaddr_in sin; 343 344 in6_sin6_2_sin(&sin, sin6p); 345 inp->inp_vflag |= INP_IPV4; 346 inp->inp_vflag &= ~INP_IPV6; 347 error = in_pcbbind(inp, (struct sockaddr *)&sin, 348 td->td_ucred); 349 INP_HASH_WUNLOCK(&V_tcbinfo); 350 goto out; 351 } 352 } 353 #endif 354 error = in6_pcbbind(inp, nam, td->td_ucred); 355 INP_HASH_WUNLOCK(&V_tcbinfo); 356 out: 357 TCPDEBUG2(PRU_BIND); 358 INP_WUNLOCK(inp); 359 return (error); 360 } 361 #endif /* INET6 */ 362 363 #ifdef INET 364 /* 365 * Prepare to accept connections. 366 */ 367 static int 368 tcp_usr_listen(struct socket *so, int backlog, struct thread *td) 369 { 370 int error = 0; 371 struct inpcb *inp; 372 struct tcpcb *tp = NULL; 373 374 TCPDEBUG0; 375 inp = sotoinpcb(so); 376 KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL")); 377 INP_WLOCK(inp); 378 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 379 error = EINVAL; 380 goto out; 381 } 382 tp = intotcpcb(inp); 383 TCPDEBUG1(); 384 SOCK_LOCK(so); 385 error = solisten_proto_check(so); 386 INP_HASH_WLOCK(&V_tcbinfo); 387 if (error == 0 && inp->inp_lport == 0) 388 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 389 INP_HASH_WUNLOCK(&V_tcbinfo); 390 if (error == 0) { 391 tcp_state_change(tp, TCPS_LISTEN); 392 solisten_proto(so, backlog); 393 #ifdef TCP_OFFLOAD 394 if ((so->so_options & SO_NO_OFFLOAD) == 0) 395 tcp_offload_listen_start(tp); 396 #endif 397 } 398 SOCK_UNLOCK(so); 399 400 out: 401 TCPDEBUG2(PRU_LISTEN); 402 INP_WUNLOCK(inp); 403 return (error); 404 } 405 #endif /* INET */ 406 407 #ifdef INET6 408 static int 409 tcp6_usr_listen(struct socket *so, int backlog, struct thread *td) 410 { 411 int error = 0; 412 struct inpcb *inp; 413 struct tcpcb *tp = NULL; 414 415 TCPDEBUG0; 416 inp = sotoinpcb(so); 417 KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL")); 418 INP_WLOCK(inp); 419 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 420 error = EINVAL; 421 goto out; 422 } 423 tp = intotcpcb(inp); 424 TCPDEBUG1(); 425 SOCK_LOCK(so); 426 error = solisten_proto_check(so); 427 INP_HASH_WLOCK(&V_tcbinfo); 428 if (error == 0 && inp->inp_lport == 0) { 429 inp->inp_vflag &= ~INP_IPV4; 430 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) 431 inp->inp_vflag |= INP_IPV4; 432 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 433 } 434 INP_HASH_WUNLOCK(&V_tcbinfo); 435 if (error == 0) { 436 tcp_state_change(tp, TCPS_LISTEN); 437 solisten_proto(so, backlog); 438 #ifdef TCP_OFFLOAD 439 if ((so->so_options & SO_NO_OFFLOAD) == 0) 440 tcp_offload_listen_start(tp); 441 #endif 442 } 443 SOCK_UNLOCK(so); 444 445 out: 446 TCPDEBUG2(PRU_LISTEN); 447 INP_WUNLOCK(inp); 448 return (error); 449 } 450 #endif /* INET6 */ 451 452 #ifdef INET 453 /* 454 * Initiate connection to peer. 455 * Create a template for use in transmissions on this connection. 456 * Enter SYN_SENT state, and mark socket as connecting. 457 * Start keep-alive timer, and seed output sequence space. 458 * Send initial segment on connection. 459 */ 460 static int 461 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 462 { 463 int error = 0; 464 struct inpcb *inp; 465 struct tcpcb *tp = NULL; 466 struct sockaddr_in *sinp; 467 468 sinp = (struct sockaddr_in *)nam; 469 if (nam->sa_len != sizeof (*sinp)) 470 return (EINVAL); 471 /* 472 * Must disallow TCP ``connections'' to multicast addresses. 473 */ 474 if (sinp->sin_family == AF_INET 475 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) 476 return (EAFNOSUPPORT); 477 if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0) 478 return (error); 479 480 TCPDEBUG0; 481 inp = sotoinpcb(so); 482 KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL")); 483 INP_WLOCK(inp); 484 if (inp->inp_flags & INP_TIMEWAIT) { 485 error = EADDRINUSE; 486 goto out; 487 } 488 if (inp->inp_flags & INP_DROPPED) { 489 error = ECONNREFUSED; 490 goto out; 491 } 492 tp = intotcpcb(inp); 493 TCPDEBUG1(); 494 if ((error = tcp_connect(tp, nam, td)) != 0) 495 goto out; 496 #ifdef TCP_OFFLOAD 497 if (registered_toedevs > 0 && 498 (so->so_options & SO_NO_OFFLOAD) == 0 && 499 (error = tcp_offload_connect(so, nam)) == 0) 500 goto out; 501 #endif 502 tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); 503 error = tcp_output(tp); 504 out: 505 TCPDEBUG2(PRU_CONNECT); 506 INP_WUNLOCK(inp); 507 return (error); 508 } 509 #endif /* INET */ 510 511 #ifdef INET6 512 static int 513 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 514 { 515 int error = 0; 516 struct inpcb *inp; 517 struct tcpcb *tp = NULL; 518 struct sockaddr_in6 *sin6p; 519 520 TCPDEBUG0; 521 522 sin6p = (struct sockaddr_in6 *)nam; 523 if (nam->sa_len != sizeof (*sin6p)) 524 return (EINVAL); 525 /* 526 * Must disallow TCP ``connections'' to multicast addresses. 527 */ 528 if (sin6p->sin6_family == AF_INET6 529 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) 530 return (EAFNOSUPPORT); 531 532 inp = sotoinpcb(so); 533 KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL")); 534 INP_WLOCK(inp); 535 if (inp->inp_flags & INP_TIMEWAIT) { 536 error = EADDRINUSE; 537 goto out; 538 } 539 if (inp->inp_flags & INP_DROPPED) { 540 error = ECONNREFUSED; 541 goto out; 542 } 543 tp = intotcpcb(inp); 544 TCPDEBUG1(); 545 #ifdef INET 546 /* 547 * XXXRW: Some confusion: V4/V6 flags relate to binding, and 548 * therefore probably require the hash lock, which isn't held here. 549 * Is this a significant problem? 550 */ 551 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 552 struct sockaddr_in sin; 553 554 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { 555 error = EINVAL; 556 goto out; 557 } 558 559 in6_sin6_2_sin(&sin, sin6p); 560 inp->inp_vflag |= INP_IPV4; 561 inp->inp_vflag &= ~INP_IPV6; 562 if ((error = prison_remote_ip4(td->td_ucred, 563 &sin.sin_addr)) != 0) 564 goto out; 565 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0) 566 goto out; 567 #ifdef TCP_OFFLOAD 568 if (registered_toedevs > 0 && 569 (so->so_options & SO_NO_OFFLOAD) == 0 && 570 (error = tcp_offload_connect(so, nam)) == 0) 571 goto out; 572 #endif 573 error = tcp_output(tp); 574 goto out; 575 } 576 #endif 577 inp->inp_vflag &= ~INP_IPV4; 578 inp->inp_vflag |= INP_IPV6; 579 inp->inp_inc.inc_flags |= INC_ISIPV6; 580 if ((error = prison_remote_ip6(td->td_ucred, &sin6p->sin6_addr)) != 0) 581 goto out; 582 if ((error = tcp6_connect(tp, nam, td)) != 0) 583 goto out; 584 #ifdef TCP_OFFLOAD 585 if (registered_toedevs > 0 && 586 (so->so_options & SO_NO_OFFLOAD) == 0 && 587 (error = tcp_offload_connect(so, nam)) == 0) 588 goto out; 589 #endif 590 tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); 591 error = tcp_output(tp); 592 593 out: 594 TCPDEBUG2(PRU_CONNECT); 595 INP_WUNLOCK(inp); 596 return (error); 597 } 598 #endif /* INET6 */ 599 600 /* 601 * Initiate disconnect from peer. 602 * If connection never passed embryonic stage, just drop; 603 * else if don't need to let data drain, then can just drop anyways, 604 * else have to begin TCP shutdown process: mark socket disconnecting, 605 * drain unread data, state switch to reflect user close, and 606 * send segment (e.g. FIN) to peer. Socket will be really disconnected 607 * when peer sends FIN and acks ours. 608 * 609 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 610 */ 611 static int 612 tcp_usr_disconnect(struct socket *so) 613 { 614 struct inpcb *inp; 615 struct tcpcb *tp = NULL; 616 int error = 0; 617 618 TCPDEBUG0; 619 INP_INFO_RLOCK(&V_tcbinfo); 620 inp = sotoinpcb(so); 621 KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL")); 622 INP_WLOCK(inp); 623 if (inp->inp_flags & INP_TIMEWAIT) 624 goto out; 625 if (inp->inp_flags & INP_DROPPED) { 626 error = ECONNRESET; 627 goto out; 628 } 629 tp = intotcpcb(inp); 630 TCPDEBUG1(); 631 tcp_disconnect(tp); 632 out: 633 TCPDEBUG2(PRU_DISCONNECT); 634 INP_WUNLOCK(inp); 635 INP_INFO_RUNLOCK(&V_tcbinfo); 636 return (error); 637 } 638 639 #ifdef INET 640 /* 641 * Accept a connection. Essentially all the work is done at higher levels; 642 * just return the address of the peer, storing through addr. 643 */ 644 static int 645 tcp_usr_accept(struct socket *so, struct sockaddr **nam) 646 { 647 int error = 0; 648 struct inpcb *inp = NULL; 649 struct tcpcb *tp = NULL; 650 struct in_addr addr; 651 in_port_t port = 0; 652 TCPDEBUG0; 653 654 if (so->so_state & SS_ISDISCONNECTED) 655 return (ECONNABORTED); 656 657 inp = sotoinpcb(so); 658 KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL")); 659 INP_WLOCK(inp); 660 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 661 error = ECONNABORTED; 662 goto out; 663 } 664 tp = intotcpcb(inp); 665 TCPDEBUG1(); 666 667 /* 668 * We inline in_getpeeraddr and COMMON_END here, so that we can 669 * copy the data of interest and defer the malloc until after we 670 * release the lock. 671 */ 672 port = inp->inp_fport; 673 addr = inp->inp_faddr; 674 675 out: 676 TCPDEBUG2(PRU_ACCEPT); 677 INP_WUNLOCK(inp); 678 if (error == 0) 679 *nam = in_sockaddr(port, &addr); 680 return error; 681 } 682 #endif /* INET */ 683 684 #ifdef INET6 685 static int 686 tcp6_usr_accept(struct socket *so, struct sockaddr **nam) 687 { 688 struct inpcb *inp = NULL; 689 int error = 0; 690 struct tcpcb *tp = NULL; 691 struct in_addr addr; 692 struct in6_addr addr6; 693 in_port_t port = 0; 694 int v4 = 0; 695 TCPDEBUG0; 696 697 if (so->so_state & SS_ISDISCONNECTED) 698 return (ECONNABORTED); 699 700 inp = sotoinpcb(so); 701 KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL")); 702 INP_INFO_RLOCK(&V_tcbinfo); 703 INP_WLOCK(inp); 704 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 705 error = ECONNABORTED; 706 goto out; 707 } 708 tp = intotcpcb(inp); 709 TCPDEBUG1(); 710 711 /* 712 * We inline in6_mapped_peeraddr and COMMON_END here, so that we can 713 * copy the data of interest and defer the malloc until after we 714 * release the lock. 715 */ 716 if (inp->inp_vflag & INP_IPV4) { 717 v4 = 1; 718 port = inp->inp_fport; 719 addr = inp->inp_faddr; 720 } else { 721 port = inp->inp_fport; 722 addr6 = inp->in6p_faddr; 723 } 724 725 out: 726 TCPDEBUG2(PRU_ACCEPT); 727 INP_WUNLOCK(inp); 728 INP_INFO_RUNLOCK(&V_tcbinfo); 729 if (error == 0) { 730 if (v4) 731 *nam = in6_v4mapsin6_sockaddr(port, &addr); 732 else 733 *nam = in6_sockaddr(port, &addr6); 734 } 735 return error; 736 } 737 #endif /* INET6 */ 738 739 /* 740 * Mark the connection as being incapable of further output. 741 */ 742 static int 743 tcp_usr_shutdown(struct socket *so) 744 { 745 int error = 0; 746 struct inpcb *inp; 747 struct tcpcb *tp = NULL; 748 749 TCPDEBUG0; 750 INP_INFO_RLOCK(&V_tcbinfo); 751 inp = sotoinpcb(so); 752 KASSERT(inp != NULL, ("inp == NULL")); 753 INP_WLOCK(inp); 754 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 755 error = ECONNRESET; 756 goto out; 757 } 758 tp = intotcpcb(inp); 759 TCPDEBUG1(); 760 socantsendmore(so); 761 tcp_usrclosed(tp); 762 if (!(inp->inp_flags & INP_DROPPED)) 763 error = tcp_output(tp); 764 765 out: 766 TCPDEBUG2(PRU_SHUTDOWN); 767 INP_WUNLOCK(inp); 768 INP_INFO_RUNLOCK(&V_tcbinfo); 769 770 return (error); 771 } 772 773 /* 774 * After a receive, possibly send window update to peer. 775 */ 776 static int 777 tcp_usr_rcvd(struct socket *so, int flags) 778 { 779 struct inpcb *inp; 780 struct tcpcb *tp = NULL; 781 int error = 0; 782 783 TCPDEBUG0; 784 inp = sotoinpcb(so); 785 KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL")); 786 INP_WLOCK(inp); 787 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 788 error = ECONNRESET; 789 goto out; 790 } 791 tp = intotcpcb(inp); 792 TCPDEBUG1(); 793 #ifdef TCP_OFFLOAD 794 if (tp->t_flags & TF_TOE) 795 tcp_offload_rcvd(tp); 796 else 797 #endif 798 tcp_output(tp); 799 800 out: 801 TCPDEBUG2(PRU_RCVD); 802 INP_WUNLOCK(inp); 803 return (error); 804 } 805 806 /* 807 * Do a send by putting data in output queue and updating urgent 808 * marker if URG set. Possibly send more data. Unlike the other 809 * pru_*() routines, the mbuf chains are our responsibility. We 810 * must either enqueue them or free them. The other pru_* routines 811 * generally are caller-frees. 812 */ 813 static int 814 tcp_usr_send(struct socket *so, int flags, struct mbuf *m, 815 struct sockaddr *nam, struct mbuf *control, struct thread *td) 816 { 817 int error = 0; 818 struct inpcb *inp; 819 struct tcpcb *tp = NULL; 820 #ifdef INET6 821 int isipv6; 822 #endif 823 TCPDEBUG0; 824 825 /* 826 * We require the pcbinfo lock if we will close the socket as part of 827 * this call. 828 */ 829 if (flags & PRUS_EOF) 830 INP_INFO_RLOCK(&V_tcbinfo); 831 inp = sotoinpcb(so); 832 KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL")); 833 INP_WLOCK(inp); 834 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 835 if (control) 836 m_freem(control); 837 /* 838 * In case of PRUS_NOTREADY, tcp_usr_ready() is responsible 839 * for freeing memory. 840 */ 841 if (m && (flags & PRUS_NOTREADY) == 0) 842 m_freem(m); 843 error = ECONNRESET; 844 goto out; 845 } 846 #ifdef INET6 847 isipv6 = nam && nam->sa_family == AF_INET6; 848 #endif /* INET6 */ 849 tp = intotcpcb(inp); 850 TCPDEBUG1(); 851 if (control) { 852 /* TCP doesn't do control messages (rights, creds, etc) */ 853 if (control->m_len) { 854 m_freem(control); 855 if (m) 856 m_freem(m); 857 error = EINVAL; 858 goto out; 859 } 860 m_freem(control); /* empty control, just free it */ 861 } 862 if (!(flags & PRUS_OOB)) { 863 sbappendstream(&so->so_snd, m, flags); 864 if (nam && tp->t_state < TCPS_SYN_SENT) { 865 /* 866 * Do implied connect if not yet connected, 867 * initialize window to default value, and 868 * initialize maxseg/maxopd using peer's cached 869 * MSS. 870 */ 871 #ifdef INET6 872 if (isipv6) 873 error = tcp6_connect(tp, nam, td); 874 #endif /* INET6 */ 875 #if defined(INET6) && defined(INET) 876 else 877 #endif 878 #ifdef INET 879 error = tcp_connect(tp, nam, td); 880 #endif 881 if (error) 882 goto out; 883 tp->snd_wnd = TTCP_CLIENT_SND_WND; 884 tcp_mss(tp, -1); 885 } 886 if (flags & PRUS_EOF) { 887 /* 888 * Close the send side of the connection after 889 * the data is sent. 890 */ 891 INP_INFO_RLOCK_ASSERT(&V_tcbinfo); 892 socantsendmore(so); 893 tcp_usrclosed(tp); 894 } 895 if (!(inp->inp_flags & INP_DROPPED) && 896 !(flags & PRUS_NOTREADY)) { 897 if (flags & PRUS_MORETOCOME) 898 tp->t_flags |= TF_MORETOCOME; 899 error = tcp_output(tp); 900 if (flags & PRUS_MORETOCOME) 901 tp->t_flags &= ~TF_MORETOCOME; 902 } 903 } else { 904 /* 905 * XXXRW: PRUS_EOF not implemented with PRUS_OOB? 906 */ 907 SOCKBUF_LOCK(&so->so_snd); 908 if (sbspace(&so->so_snd) < -512) { 909 SOCKBUF_UNLOCK(&so->so_snd); 910 m_freem(m); 911 error = ENOBUFS; 912 goto out; 913 } 914 /* 915 * According to RFC961 (Assigned Protocols), 916 * the urgent pointer points to the last octet 917 * of urgent data. We continue, however, 918 * to consider it to indicate the first octet 919 * of data past the urgent section. 920 * Otherwise, snd_up should be one lower. 921 */ 922 sbappendstream_locked(&so->so_snd, m, flags); 923 SOCKBUF_UNLOCK(&so->so_snd); 924 if (nam && tp->t_state < TCPS_SYN_SENT) { 925 /* 926 * Do implied connect if not yet connected, 927 * initialize window to default value, and 928 * initialize maxseg/maxopd using peer's cached 929 * MSS. 930 */ 931 #ifdef INET6 932 if (isipv6) 933 error = tcp6_connect(tp, nam, td); 934 #endif /* INET6 */ 935 #if defined(INET6) && defined(INET) 936 else 937 #endif 938 #ifdef INET 939 error = tcp_connect(tp, nam, td); 940 #endif 941 if (error) 942 goto out; 943 tp->snd_wnd = TTCP_CLIENT_SND_WND; 944 tcp_mss(tp, -1); 945 } 946 tp->snd_up = tp->snd_una + sbavail(&so->so_snd); 947 if (!(flags & PRUS_NOTREADY)) { 948 tp->t_flags |= TF_FORCEDATA; 949 error = tcp_output(tp); 950 tp->t_flags &= ~TF_FORCEDATA; 951 } 952 } 953 out: 954 TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB : 955 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); 956 INP_WUNLOCK(inp); 957 if (flags & PRUS_EOF) 958 INP_INFO_RUNLOCK(&V_tcbinfo); 959 return (error); 960 } 961 962 static int 963 tcp_usr_ready(struct socket *so, struct mbuf *m, int count) 964 { 965 struct inpcb *inp; 966 struct tcpcb *tp; 967 int error; 968 969 inp = sotoinpcb(so); 970 INP_WLOCK(inp); 971 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 972 INP_WUNLOCK(inp); 973 for (int i = 0; i < count; i++) 974 m = m_free(m); 975 return (ECONNRESET); 976 } 977 tp = intotcpcb(inp); 978 979 SOCKBUF_LOCK(&so->so_snd); 980 error = sbready(&so->so_snd, m, count); 981 SOCKBUF_UNLOCK(&so->so_snd); 982 if (error == 0) 983 error = tcp_output(tp); 984 INP_WUNLOCK(inp); 985 986 return (error); 987 } 988 989 /* 990 * Abort the TCP. Drop the connection abruptly. 991 */ 992 static void 993 tcp_usr_abort(struct socket *so) 994 { 995 struct inpcb *inp; 996 struct tcpcb *tp = NULL; 997 TCPDEBUG0; 998 999 inp = sotoinpcb(so); 1000 KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL")); 1001 1002 INP_INFO_RLOCK(&V_tcbinfo); 1003 INP_WLOCK(inp); 1004 KASSERT(inp->inp_socket != NULL, 1005 ("tcp_usr_abort: inp_socket == NULL")); 1006 1007 /* 1008 * If we still have full TCP state, and we're not dropped, drop. 1009 */ 1010 if (!(inp->inp_flags & INP_TIMEWAIT) && 1011 !(inp->inp_flags & INP_DROPPED)) { 1012 tp = intotcpcb(inp); 1013 TCPDEBUG1(); 1014 tcp_drop(tp, ECONNABORTED); 1015 TCPDEBUG2(PRU_ABORT); 1016 } 1017 if (!(inp->inp_flags & INP_DROPPED)) { 1018 SOCK_LOCK(so); 1019 so->so_state |= SS_PROTOREF; 1020 SOCK_UNLOCK(so); 1021 inp->inp_flags |= INP_SOCKREF; 1022 } 1023 INP_WUNLOCK(inp); 1024 INP_INFO_RUNLOCK(&V_tcbinfo); 1025 } 1026 1027 /* 1028 * TCP socket is closed. Start friendly disconnect. 1029 */ 1030 static void 1031 tcp_usr_close(struct socket *so) 1032 { 1033 struct inpcb *inp; 1034 struct tcpcb *tp = NULL; 1035 TCPDEBUG0; 1036 1037 inp = sotoinpcb(so); 1038 KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL")); 1039 1040 INP_INFO_RLOCK(&V_tcbinfo); 1041 INP_WLOCK(inp); 1042 KASSERT(inp->inp_socket != NULL, 1043 ("tcp_usr_close: inp_socket == NULL")); 1044 1045 /* 1046 * If we still have full TCP state, and we're not dropped, initiate 1047 * a disconnect. 1048 */ 1049 if (!(inp->inp_flags & INP_TIMEWAIT) && 1050 !(inp->inp_flags & INP_DROPPED)) { 1051 tp = intotcpcb(inp); 1052 TCPDEBUG1(); 1053 tcp_disconnect(tp); 1054 TCPDEBUG2(PRU_CLOSE); 1055 } 1056 if (!(inp->inp_flags & INP_DROPPED)) { 1057 SOCK_LOCK(so); 1058 so->so_state |= SS_PROTOREF; 1059 SOCK_UNLOCK(so); 1060 inp->inp_flags |= INP_SOCKREF; 1061 } 1062 INP_WUNLOCK(inp); 1063 INP_INFO_RUNLOCK(&V_tcbinfo); 1064 } 1065 1066 /* 1067 * Receive out-of-band data. 1068 */ 1069 static int 1070 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) 1071 { 1072 int error = 0; 1073 struct inpcb *inp; 1074 struct tcpcb *tp = NULL; 1075 1076 TCPDEBUG0; 1077 inp = sotoinpcb(so); 1078 KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL")); 1079 INP_WLOCK(inp); 1080 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 1081 error = ECONNRESET; 1082 goto out; 1083 } 1084 tp = intotcpcb(inp); 1085 TCPDEBUG1(); 1086 if ((so->so_oobmark == 0 && 1087 (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) || 1088 so->so_options & SO_OOBINLINE || 1089 tp->t_oobflags & TCPOOB_HADDATA) { 1090 error = EINVAL; 1091 goto out; 1092 } 1093 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 1094 error = EWOULDBLOCK; 1095 goto out; 1096 } 1097 m->m_len = 1; 1098 *mtod(m, caddr_t) = tp->t_iobc; 1099 if ((flags & MSG_PEEK) == 0) 1100 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 1101 1102 out: 1103 TCPDEBUG2(PRU_RCVOOB); 1104 INP_WUNLOCK(inp); 1105 return (error); 1106 } 1107 1108 #ifdef INET 1109 struct pr_usrreqs tcp_usrreqs = { 1110 .pru_abort = tcp_usr_abort, 1111 .pru_accept = tcp_usr_accept, 1112 .pru_attach = tcp_usr_attach, 1113 .pru_bind = tcp_usr_bind, 1114 .pru_connect = tcp_usr_connect, 1115 .pru_control = in_control, 1116 .pru_detach = tcp_usr_detach, 1117 .pru_disconnect = tcp_usr_disconnect, 1118 .pru_listen = tcp_usr_listen, 1119 .pru_peeraddr = in_getpeeraddr, 1120 .pru_rcvd = tcp_usr_rcvd, 1121 .pru_rcvoob = tcp_usr_rcvoob, 1122 .pru_send = tcp_usr_send, 1123 .pru_ready = tcp_usr_ready, 1124 .pru_shutdown = tcp_usr_shutdown, 1125 .pru_sockaddr = in_getsockaddr, 1126 .pru_sosetlabel = in_pcbsosetlabel, 1127 .pru_close = tcp_usr_close, 1128 }; 1129 #endif /* INET */ 1130 1131 #ifdef INET6 1132 struct pr_usrreqs tcp6_usrreqs = { 1133 .pru_abort = tcp_usr_abort, 1134 .pru_accept = tcp6_usr_accept, 1135 .pru_attach = tcp_usr_attach, 1136 .pru_bind = tcp6_usr_bind, 1137 .pru_connect = tcp6_usr_connect, 1138 .pru_control = in6_control, 1139 .pru_detach = tcp_usr_detach, 1140 .pru_disconnect = tcp_usr_disconnect, 1141 .pru_listen = tcp6_usr_listen, 1142 .pru_peeraddr = in6_mapped_peeraddr, 1143 .pru_rcvd = tcp_usr_rcvd, 1144 .pru_rcvoob = tcp_usr_rcvoob, 1145 .pru_send = tcp_usr_send, 1146 .pru_ready = tcp_usr_ready, 1147 .pru_shutdown = tcp_usr_shutdown, 1148 .pru_sockaddr = in6_mapped_sockaddr, 1149 .pru_sosetlabel = in_pcbsosetlabel, 1150 .pru_close = tcp_usr_close, 1151 }; 1152 #endif /* INET6 */ 1153 1154 #ifdef INET 1155 /* 1156 * Common subroutine to open a TCP connection to remote host specified 1157 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local 1158 * port number if needed. Call in_pcbconnect_setup to do the routing and 1159 * to choose a local host address (interface). If there is an existing 1160 * incarnation of the same connection in TIME-WAIT state and if the remote 1161 * host was sending CC options and if the connection duration was < MSL, then 1162 * truncate the previous TIME-WAIT state and proceed. 1163 * Initialize connection parameters and enter SYN-SENT state. 1164 */ 1165 static int 1166 tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) 1167 { 1168 struct inpcb *inp = tp->t_inpcb, *oinp; 1169 struct socket *so = inp->inp_socket; 1170 struct in_addr laddr; 1171 u_short lport; 1172 int error; 1173 1174 INP_WLOCK_ASSERT(inp); 1175 INP_HASH_WLOCK(&V_tcbinfo); 1176 1177 if (inp->inp_lport == 0) { 1178 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 1179 if (error) 1180 goto out; 1181 } 1182 1183 /* 1184 * Cannot simply call in_pcbconnect, because there might be an 1185 * earlier incarnation of this same connection still in 1186 * TIME_WAIT state, creating an ADDRINUSE error. 1187 */ 1188 laddr = inp->inp_laddr; 1189 lport = inp->inp_lport; 1190 error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport, 1191 &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred); 1192 if (error && oinp == NULL) 1193 goto out; 1194 if (oinp) { 1195 error = EADDRINUSE; 1196 goto out; 1197 } 1198 inp->inp_laddr = laddr; 1199 in_pcbrehash(inp); 1200 INP_HASH_WUNLOCK(&V_tcbinfo); 1201 1202 /* 1203 * Compute window scaling to request: 1204 * Scale to fit into sweet spot. See tcp_syncache.c. 1205 * XXX: This should move to tcp_output(). 1206 */ 1207 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 1208 (TCP_MAXWIN << tp->request_r_scale) < sb_max) 1209 tp->request_r_scale++; 1210 1211 soisconnecting(so); 1212 TCPSTAT_INC(tcps_connattempt); 1213 tcp_state_change(tp, TCPS_SYN_SENT); 1214 tp->iss = tcp_new_isn(tp); 1215 tcp_sendseqinit(tp); 1216 1217 return 0; 1218 1219 out: 1220 INP_HASH_WUNLOCK(&V_tcbinfo); 1221 return (error); 1222 } 1223 #endif /* INET */ 1224 1225 #ifdef INET6 1226 static int 1227 tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) 1228 { 1229 struct inpcb *inp = tp->t_inpcb; 1230 int error; 1231 1232 INP_WLOCK_ASSERT(inp); 1233 INP_HASH_WLOCK(&V_tcbinfo); 1234 1235 if (inp->inp_lport == 0) { 1236 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); 1237 if (error) 1238 goto out; 1239 } 1240 error = in6_pcbconnect(inp, nam, td->td_ucred); 1241 if (error != 0) 1242 goto out; 1243 INP_HASH_WUNLOCK(&V_tcbinfo); 1244 1245 /* Compute window scaling to request. */ 1246 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 1247 (TCP_MAXWIN << tp->request_r_scale) < sb_max) 1248 tp->request_r_scale++; 1249 1250 soisconnecting(inp->inp_socket); 1251 TCPSTAT_INC(tcps_connattempt); 1252 tcp_state_change(tp, TCPS_SYN_SENT); 1253 tp->iss = tcp_new_isn(tp); 1254 tcp_sendseqinit(tp); 1255 1256 return 0; 1257 1258 out: 1259 INP_HASH_WUNLOCK(&V_tcbinfo); 1260 return error; 1261 } 1262 #endif /* INET6 */ 1263 1264 /* 1265 * Export TCP internal state information via a struct tcp_info, based on the 1266 * Linux 2.6 API. Not ABI compatible as our constants are mapped differently 1267 * (TCP state machine, etc). We export all information using FreeBSD-native 1268 * constants -- for example, the numeric values for tcpi_state will differ 1269 * from Linux. 1270 */ 1271 static void 1272 tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) 1273 { 1274 1275 INP_WLOCK_ASSERT(tp->t_inpcb); 1276 bzero(ti, sizeof(*ti)); 1277 1278 ti->tcpi_state = tp->t_state; 1279 if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) 1280 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; 1281 if (tp->t_flags & TF_SACK_PERMIT) 1282 ti->tcpi_options |= TCPI_OPT_SACK; 1283 if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { 1284 ti->tcpi_options |= TCPI_OPT_WSCALE; 1285 ti->tcpi_snd_wscale = tp->snd_scale; 1286 ti->tcpi_rcv_wscale = tp->rcv_scale; 1287 } 1288 1289 ti->tcpi_rto = tp->t_rxtcur * tick; 1290 ti->tcpi_last_data_recv = (long)(ticks - (int)tp->t_rcvtime) * tick; 1291 ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT; 1292 ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT; 1293 1294 ti->tcpi_snd_ssthresh = tp->snd_ssthresh; 1295 ti->tcpi_snd_cwnd = tp->snd_cwnd; 1296 1297 /* 1298 * FreeBSD-specific extension fields for tcp_info. 1299 */ 1300 ti->tcpi_rcv_space = tp->rcv_wnd; 1301 ti->tcpi_rcv_nxt = tp->rcv_nxt; 1302 ti->tcpi_snd_wnd = tp->snd_wnd; 1303 ti->tcpi_snd_bwnd = 0; /* Unused, kept for compat. */ 1304 ti->tcpi_snd_nxt = tp->snd_nxt; 1305 ti->tcpi_snd_mss = tp->t_maxseg; 1306 ti->tcpi_rcv_mss = tp->t_maxseg; 1307 if (tp->t_flags & TF_TOE) 1308 ti->tcpi_options |= TCPI_OPT_TOE; 1309 ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack; 1310 ti->tcpi_rcv_ooopack = tp->t_rcvoopack; 1311 ti->tcpi_snd_zerowin = tp->t_sndzerowin; 1312 } 1313 1314 /* 1315 * tcp_ctloutput() must drop the inpcb lock before performing copyin on 1316 * socket option arguments. When it re-acquires the lock after the copy, it 1317 * has to revalidate that the connection is still valid for the socket 1318 * option. 1319 */ 1320 #define INP_WLOCK_RECHECK(inp) do { \ 1321 INP_WLOCK(inp); \ 1322 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { \ 1323 INP_WUNLOCK(inp); \ 1324 return (ECONNRESET); \ 1325 } \ 1326 tp = intotcpcb(inp); \ 1327 } while(0) 1328 1329 int 1330 tcp_ctloutput(struct socket *so, struct sockopt *sopt) 1331 { 1332 int error, opt, optval; 1333 u_int ui; 1334 struct inpcb *inp; 1335 struct tcpcb *tp; 1336 struct tcp_info ti; 1337 char buf[TCP_CA_NAME_MAX]; 1338 struct cc_algo *algo; 1339 1340 error = 0; 1341 inp = sotoinpcb(so); 1342 KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL")); 1343 INP_WLOCK(inp); 1344 if (sopt->sopt_level != IPPROTO_TCP) { 1345 #ifdef INET6 1346 if (inp->inp_vflag & INP_IPV6PROTO) { 1347 INP_WUNLOCK(inp); 1348 error = ip6_ctloutput(so, sopt); 1349 } 1350 #endif /* INET6 */ 1351 #if defined(INET6) && defined(INET) 1352 else 1353 #endif 1354 #ifdef INET 1355 { 1356 INP_WUNLOCK(inp); 1357 error = ip_ctloutput(so, sopt); 1358 } 1359 #endif 1360 return (error); 1361 } 1362 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 1363 INP_WUNLOCK(inp); 1364 return (ECONNRESET); 1365 } 1366 1367 switch (sopt->sopt_dir) { 1368 case SOPT_SET: 1369 switch (sopt->sopt_name) { 1370 #ifdef TCP_SIGNATURE 1371 case TCP_MD5SIG: 1372 INP_WUNLOCK(inp); 1373 error = sooptcopyin(sopt, &optval, sizeof optval, 1374 sizeof optval); 1375 if (error) 1376 return (error); 1377 1378 INP_WLOCK_RECHECK(inp); 1379 if (optval > 0) 1380 tp->t_flags |= TF_SIGNATURE; 1381 else 1382 tp->t_flags &= ~TF_SIGNATURE; 1383 goto unlock_and_done; 1384 #endif /* TCP_SIGNATURE */ 1385 1386 case TCP_NODELAY: 1387 case TCP_NOOPT: 1388 INP_WUNLOCK(inp); 1389 error = sooptcopyin(sopt, &optval, sizeof optval, 1390 sizeof optval); 1391 if (error) 1392 return (error); 1393 1394 INP_WLOCK_RECHECK(inp); 1395 switch (sopt->sopt_name) { 1396 case TCP_NODELAY: 1397 opt = TF_NODELAY; 1398 break; 1399 case TCP_NOOPT: 1400 opt = TF_NOOPT; 1401 break; 1402 default: 1403 opt = 0; /* dead code to fool gcc */ 1404 break; 1405 } 1406 1407 if (optval) 1408 tp->t_flags |= opt; 1409 else 1410 tp->t_flags &= ~opt; 1411 unlock_and_done: 1412 #ifdef TCP_OFFLOAD 1413 if (tp->t_flags & TF_TOE) { 1414 tcp_offload_ctloutput(tp, sopt->sopt_dir, 1415 sopt->sopt_name); 1416 } 1417 #endif 1418 INP_WUNLOCK(inp); 1419 break; 1420 1421 case TCP_NOPUSH: 1422 INP_WUNLOCK(inp); 1423 error = sooptcopyin(sopt, &optval, sizeof optval, 1424 sizeof optval); 1425 if (error) 1426 return (error); 1427 1428 INP_WLOCK_RECHECK(inp); 1429 if (optval) 1430 tp->t_flags |= TF_NOPUSH; 1431 else if (tp->t_flags & TF_NOPUSH) { 1432 tp->t_flags &= ~TF_NOPUSH; 1433 if (TCPS_HAVEESTABLISHED(tp->t_state)) 1434 error = tcp_output(tp); 1435 } 1436 goto unlock_and_done; 1437 1438 case TCP_MAXSEG: 1439 INP_WUNLOCK(inp); 1440 error = sooptcopyin(sopt, &optval, sizeof optval, 1441 sizeof optval); 1442 if (error) 1443 return (error); 1444 1445 INP_WLOCK_RECHECK(inp); 1446 if (optval > 0 && optval <= tp->t_maxseg && 1447 optval + 40 >= V_tcp_minmss) 1448 tp->t_maxseg = optval; 1449 else 1450 error = EINVAL; 1451 goto unlock_and_done; 1452 1453 case TCP_INFO: 1454 INP_WUNLOCK(inp); 1455 error = EINVAL; 1456 break; 1457 1458 case TCP_CONGESTION: 1459 INP_WUNLOCK(inp); 1460 bzero(buf, sizeof(buf)); 1461 error = sooptcopyin(sopt, &buf, sizeof(buf), 1); 1462 if (error) 1463 break; 1464 INP_WLOCK_RECHECK(inp); 1465 /* 1466 * Return EINVAL if we can't find the requested cc algo. 1467 */ 1468 error = EINVAL; 1469 CC_LIST_RLOCK(); 1470 STAILQ_FOREACH(algo, &cc_list, entries) { 1471 if (strncmp(buf, algo->name, TCP_CA_NAME_MAX) 1472 == 0) { 1473 /* We've found the requested algo. */ 1474 error = 0; 1475 /* 1476 * We hold a write lock over the tcb 1477 * so it's safe to do these things 1478 * without ordering concerns. 1479 */ 1480 if (CC_ALGO(tp)->cb_destroy != NULL) 1481 CC_ALGO(tp)->cb_destroy(tp->ccv); 1482 CC_ALGO(tp) = algo; 1483 /* 1484 * If something goes pear shaped 1485 * initialising the new algo, 1486 * fall back to newreno (which 1487 * does not require initialisation). 1488 */ 1489 if (algo->cb_init != NULL) 1490 if (algo->cb_init(tp->ccv) > 0) { 1491 CC_ALGO(tp) = &newreno_cc_algo; 1492 /* 1493 * The only reason init 1494 * should fail is 1495 * because of malloc. 1496 */ 1497 error = ENOMEM; 1498 } 1499 break; /* Break the STAILQ_FOREACH. */ 1500 } 1501 } 1502 CC_LIST_RUNLOCK(); 1503 goto unlock_and_done; 1504 1505 case TCP_KEEPIDLE: 1506 case TCP_KEEPINTVL: 1507 case TCP_KEEPINIT: 1508 INP_WUNLOCK(inp); 1509 error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui)); 1510 if (error) 1511 return (error); 1512 1513 if (ui > (UINT_MAX / hz)) { 1514 error = EINVAL; 1515 break; 1516 } 1517 ui *= hz; 1518 1519 INP_WLOCK_RECHECK(inp); 1520 switch (sopt->sopt_name) { 1521 case TCP_KEEPIDLE: 1522 tp->t_keepidle = ui; 1523 /* 1524 * XXX: better check current remaining 1525 * timeout and "merge" it with new value. 1526 */ 1527 if ((tp->t_state > TCPS_LISTEN) && 1528 (tp->t_state <= TCPS_CLOSING)) 1529 tcp_timer_activate(tp, TT_KEEP, 1530 TP_KEEPIDLE(tp)); 1531 break; 1532 case TCP_KEEPINTVL: 1533 tp->t_keepintvl = ui; 1534 if ((tp->t_state == TCPS_FIN_WAIT_2) && 1535 (TP_MAXIDLE(tp) > 0)) 1536 tcp_timer_activate(tp, TT_2MSL, 1537 TP_MAXIDLE(tp)); 1538 break; 1539 case TCP_KEEPINIT: 1540 tp->t_keepinit = ui; 1541 if (tp->t_state == TCPS_SYN_RECEIVED || 1542 tp->t_state == TCPS_SYN_SENT) 1543 tcp_timer_activate(tp, TT_KEEP, 1544 TP_KEEPINIT(tp)); 1545 break; 1546 } 1547 goto unlock_and_done; 1548 1549 case TCP_KEEPCNT: 1550 INP_WUNLOCK(inp); 1551 error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui)); 1552 if (error) 1553 return (error); 1554 1555 INP_WLOCK_RECHECK(inp); 1556 tp->t_keepcnt = ui; 1557 if ((tp->t_state == TCPS_FIN_WAIT_2) && 1558 (TP_MAXIDLE(tp) > 0)) 1559 tcp_timer_activate(tp, TT_2MSL, 1560 TP_MAXIDLE(tp)); 1561 goto unlock_and_done; 1562 1563 default: 1564 INP_WUNLOCK(inp); 1565 error = ENOPROTOOPT; 1566 break; 1567 } 1568 break; 1569 1570 case SOPT_GET: 1571 tp = intotcpcb(inp); 1572 switch (sopt->sopt_name) { 1573 #ifdef TCP_SIGNATURE 1574 case TCP_MD5SIG: 1575 optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 1576 INP_WUNLOCK(inp); 1577 error = sooptcopyout(sopt, &optval, sizeof optval); 1578 break; 1579 #endif 1580 1581 case TCP_NODELAY: 1582 optval = tp->t_flags & TF_NODELAY; 1583 INP_WUNLOCK(inp); 1584 error = sooptcopyout(sopt, &optval, sizeof optval); 1585 break; 1586 case TCP_MAXSEG: 1587 optval = tp->t_maxseg; 1588 INP_WUNLOCK(inp); 1589 error = sooptcopyout(sopt, &optval, sizeof optval); 1590 break; 1591 case TCP_NOOPT: 1592 optval = tp->t_flags & TF_NOOPT; 1593 INP_WUNLOCK(inp); 1594 error = sooptcopyout(sopt, &optval, sizeof optval); 1595 break; 1596 case TCP_NOPUSH: 1597 optval = tp->t_flags & TF_NOPUSH; 1598 INP_WUNLOCK(inp); 1599 error = sooptcopyout(sopt, &optval, sizeof optval); 1600 break; 1601 case TCP_INFO: 1602 tcp_fill_info(tp, &ti); 1603 INP_WUNLOCK(inp); 1604 error = sooptcopyout(sopt, &ti, sizeof ti); 1605 break; 1606 case TCP_CONGESTION: 1607 bzero(buf, sizeof(buf)); 1608 strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX); 1609 INP_WUNLOCK(inp); 1610 error = sooptcopyout(sopt, buf, TCP_CA_NAME_MAX); 1611 break; 1612 case TCP_KEEPIDLE: 1613 case TCP_KEEPINTVL: 1614 case TCP_KEEPINIT: 1615 case TCP_KEEPCNT: 1616 switch (sopt->sopt_name) { 1617 case TCP_KEEPIDLE: 1618 ui = tp->t_keepidle / hz; 1619 break; 1620 case TCP_KEEPINTVL: 1621 ui = tp->t_keepintvl / hz; 1622 break; 1623 case TCP_KEEPINIT: 1624 ui = tp->t_keepinit / hz; 1625 break; 1626 case TCP_KEEPCNT: 1627 ui = tp->t_keepcnt; 1628 break; 1629 } 1630 INP_WUNLOCK(inp); 1631 error = sooptcopyout(sopt, &ui, sizeof(ui)); 1632 break; 1633 default: 1634 INP_WUNLOCK(inp); 1635 error = ENOPROTOOPT; 1636 break; 1637 } 1638 break; 1639 } 1640 return (error); 1641 } 1642 #undef INP_WLOCK_RECHECK 1643 1644 /* 1645 * Attach TCP protocol to socket, allocating 1646 * internet protocol control block, tcp control block, 1647 * bufer space, and entering LISTEN state if to accept connections. 1648 */ 1649 static int 1650 tcp_attach(struct socket *so) 1651 { 1652 struct tcpcb *tp; 1653 struct inpcb *inp; 1654 int error; 1655 1656 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 1657 error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace); 1658 if (error) 1659 return (error); 1660 } 1661 so->so_rcv.sb_flags |= SB_AUTOSIZE; 1662 so->so_snd.sb_flags |= SB_AUTOSIZE; 1663 INP_INFO_RLOCK(&V_tcbinfo); 1664 error = in_pcballoc(so, &V_tcbinfo); 1665 if (error) { 1666 INP_INFO_RUNLOCK(&V_tcbinfo); 1667 return (error); 1668 } 1669 inp = sotoinpcb(so); 1670 #ifdef INET6 1671 if (inp->inp_vflag & INP_IPV6PROTO) { 1672 inp->inp_vflag |= INP_IPV6; 1673 inp->in6p_hops = -1; /* use kernel default */ 1674 } 1675 else 1676 #endif 1677 inp->inp_vflag |= INP_IPV4; 1678 tp = tcp_newtcpcb(inp); 1679 if (tp == NULL) { 1680 in_pcbdetach(inp); 1681 in_pcbfree(inp); 1682 INP_INFO_RUNLOCK(&V_tcbinfo); 1683 return (ENOBUFS); 1684 } 1685 tp->t_state = TCPS_CLOSED; 1686 INP_WUNLOCK(inp); 1687 INP_INFO_RUNLOCK(&V_tcbinfo); 1688 return (0); 1689 } 1690 1691 /* 1692 * Initiate (or continue) disconnect. 1693 * If embryonic state, just send reset (once). 1694 * If in ``let data drain'' option and linger null, just drop. 1695 * Otherwise (hard), mark socket disconnecting and drop 1696 * current input data; switch states based on user close, and 1697 * send segment to peer (with FIN). 1698 */ 1699 static void 1700 tcp_disconnect(struct tcpcb *tp) 1701 { 1702 struct inpcb *inp = tp->t_inpcb; 1703 struct socket *so = inp->inp_socket; 1704 1705 INP_INFO_RLOCK_ASSERT(&V_tcbinfo); 1706 INP_WLOCK_ASSERT(inp); 1707 1708 /* 1709 * Neither tcp_close() nor tcp_drop() should return NULL, as the 1710 * socket is still open. 1711 */ 1712 if (tp->t_state < TCPS_ESTABLISHED) { 1713 tp = tcp_close(tp); 1714 KASSERT(tp != NULL, 1715 ("tcp_disconnect: tcp_close() returned NULL")); 1716 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) { 1717 tp = tcp_drop(tp, 0); 1718 KASSERT(tp != NULL, 1719 ("tcp_disconnect: tcp_drop() returned NULL")); 1720 } else { 1721 soisdisconnecting(so); 1722 sbflush(&so->so_rcv); 1723 tcp_usrclosed(tp); 1724 if (!(inp->inp_flags & INP_DROPPED)) 1725 tcp_output(tp); 1726 } 1727 } 1728 1729 /* 1730 * User issued close, and wish to trail through shutdown states: 1731 * if never received SYN, just forget it. If got a SYN from peer, 1732 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1733 * If already got a FIN from peer, then almost done; go to LAST_ACK 1734 * state. In all other cases, have already sent FIN to peer (e.g. 1735 * after PRU_SHUTDOWN), and just have to play tedious game waiting 1736 * for peer to send FIN or not respond to keep-alives, etc. 1737 * We can let the user exit from the close as soon as the FIN is acked. 1738 */ 1739 static void 1740 tcp_usrclosed(struct tcpcb *tp) 1741 { 1742 1743 INP_INFO_RLOCK_ASSERT(&V_tcbinfo); 1744 INP_WLOCK_ASSERT(tp->t_inpcb); 1745 1746 switch (tp->t_state) { 1747 case TCPS_LISTEN: 1748 #ifdef TCP_OFFLOAD 1749 tcp_offload_listen_stop(tp); 1750 #endif 1751 /* FALLTHROUGH */ 1752 case TCPS_CLOSED: 1753 tcp_state_change(tp, TCPS_CLOSED); 1754 tp = tcp_close(tp); 1755 /* 1756 * tcp_close() should never return NULL here as the socket is 1757 * still open. 1758 */ 1759 KASSERT(tp != NULL, 1760 ("tcp_usrclosed: tcp_close() returned NULL")); 1761 break; 1762 1763 case TCPS_SYN_SENT: 1764 case TCPS_SYN_RECEIVED: 1765 tp->t_flags |= TF_NEEDFIN; 1766 break; 1767 1768 case TCPS_ESTABLISHED: 1769 tcp_state_change(tp, TCPS_FIN_WAIT_1); 1770 break; 1771 1772 case TCPS_CLOSE_WAIT: 1773 tcp_state_change(tp, TCPS_LAST_ACK); 1774 break; 1775 } 1776 if (tp->t_state >= TCPS_FIN_WAIT_2) { 1777 soisdisconnected(tp->t_inpcb->inp_socket); 1778 /* Prevent the connection hanging in FIN_WAIT_2 forever. */ 1779 if (tp->t_state == TCPS_FIN_WAIT_2) { 1780 int timeout; 1781 1782 timeout = (tcp_fast_finwait2_recycle) ? 1783 tcp_finwait2_timeout : TP_MAXIDLE(tp); 1784 tcp_timer_activate(tp, TT_2MSL, timeout); 1785 } 1786 } 1787 } 1788 1789 #ifdef DDB 1790 static void 1791 db_print_indent(int indent) 1792 { 1793 int i; 1794 1795 for (i = 0; i < indent; i++) 1796 db_printf(" "); 1797 } 1798 1799 static void 1800 db_print_tstate(int t_state) 1801 { 1802 1803 switch (t_state) { 1804 case TCPS_CLOSED: 1805 db_printf("TCPS_CLOSED"); 1806 return; 1807 1808 case TCPS_LISTEN: 1809 db_printf("TCPS_LISTEN"); 1810 return; 1811 1812 case TCPS_SYN_SENT: 1813 db_printf("TCPS_SYN_SENT"); 1814 return; 1815 1816 case TCPS_SYN_RECEIVED: 1817 db_printf("TCPS_SYN_RECEIVED"); 1818 return; 1819 1820 case TCPS_ESTABLISHED: 1821 db_printf("TCPS_ESTABLISHED"); 1822 return; 1823 1824 case TCPS_CLOSE_WAIT: 1825 db_printf("TCPS_CLOSE_WAIT"); 1826 return; 1827 1828 case TCPS_FIN_WAIT_1: 1829 db_printf("TCPS_FIN_WAIT_1"); 1830 return; 1831 1832 case TCPS_CLOSING: 1833 db_printf("TCPS_CLOSING"); 1834 return; 1835 1836 case TCPS_LAST_ACK: 1837 db_printf("TCPS_LAST_ACK"); 1838 return; 1839 1840 case TCPS_FIN_WAIT_2: 1841 db_printf("TCPS_FIN_WAIT_2"); 1842 return; 1843 1844 case TCPS_TIME_WAIT: 1845 db_printf("TCPS_TIME_WAIT"); 1846 return; 1847 1848 default: 1849 db_printf("unknown"); 1850 return; 1851 } 1852 } 1853 1854 static void 1855 db_print_tflags(u_int t_flags) 1856 { 1857 int comma; 1858 1859 comma = 0; 1860 if (t_flags & TF_ACKNOW) { 1861 db_printf("%sTF_ACKNOW", comma ? ", " : ""); 1862 comma = 1; 1863 } 1864 if (t_flags & TF_DELACK) { 1865 db_printf("%sTF_DELACK", comma ? ", " : ""); 1866 comma = 1; 1867 } 1868 if (t_flags & TF_NODELAY) { 1869 db_printf("%sTF_NODELAY", comma ? ", " : ""); 1870 comma = 1; 1871 } 1872 if (t_flags & TF_NOOPT) { 1873 db_printf("%sTF_NOOPT", comma ? ", " : ""); 1874 comma = 1; 1875 } 1876 if (t_flags & TF_SENTFIN) { 1877 db_printf("%sTF_SENTFIN", comma ? ", " : ""); 1878 comma = 1; 1879 } 1880 if (t_flags & TF_REQ_SCALE) { 1881 db_printf("%sTF_REQ_SCALE", comma ? ", " : ""); 1882 comma = 1; 1883 } 1884 if (t_flags & TF_RCVD_SCALE) { 1885 db_printf("%sTF_RECVD_SCALE", comma ? ", " : ""); 1886 comma = 1; 1887 } 1888 if (t_flags & TF_REQ_TSTMP) { 1889 db_printf("%sTF_REQ_TSTMP", comma ? ", " : ""); 1890 comma = 1; 1891 } 1892 if (t_flags & TF_RCVD_TSTMP) { 1893 db_printf("%sTF_RCVD_TSTMP", comma ? ", " : ""); 1894 comma = 1; 1895 } 1896 if (t_flags & TF_SACK_PERMIT) { 1897 db_printf("%sTF_SACK_PERMIT", comma ? ", " : ""); 1898 comma = 1; 1899 } 1900 if (t_flags & TF_NEEDSYN) { 1901 db_printf("%sTF_NEEDSYN", comma ? ", " : ""); 1902 comma = 1; 1903 } 1904 if (t_flags & TF_NEEDFIN) { 1905 db_printf("%sTF_NEEDFIN", comma ? ", " : ""); 1906 comma = 1; 1907 } 1908 if (t_flags & TF_NOPUSH) { 1909 db_printf("%sTF_NOPUSH", comma ? ", " : ""); 1910 comma = 1; 1911 } 1912 if (t_flags & TF_MORETOCOME) { 1913 db_printf("%sTF_MORETOCOME", comma ? ", " : ""); 1914 comma = 1; 1915 } 1916 if (t_flags & TF_LQ_OVERFLOW) { 1917 db_printf("%sTF_LQ_OVERFLOW", comma ? ", " : ""); 1918 comma = 1; 1919 } 1920 if (t_flags & TF_LASTIDLE) { 1921 db_printf("%sTF_LASTIDLE", comma ? ", " : ""); 1922 comma = 1; 1923 } 1924 if (t_flags & TF_RXWIN0SENT) { 1925 db_printf("%sTF_RXWIN0SENT", comma ? ", " : ""); 1926 comma = 1; 1927 } 1928 if (t_flags & TF_FASTRECOVERY) { 1929 db_printf("%sTF_FASTRECOVERY", comma ? ", " : ""); 1930 comma = 1; 1931 } 1932 if (t_flags & TF_CONGRECOVERY) { 1933 db_printf("%sTF_CONGRECOVERY", comma ? ", " : ""); 1934 comma = 1; 1935 } 1936 if (t_flags & TF_WASFRECOVERY) { 1937 db_printf("%sTF_WASFRECOVERY", comma ? ", " : ""); 1938 comma = 1; 1939 } 1940 if (t_flags & TF_SIGNATURE) { 1941 db_printf("%sTF_SIGNATURE", comma ? ", " : ""); 1942 comma = 1; 1943 } 1944 if (t_flags & TF_FORCEDATA) { 1945 db_printf("%sTF_FORCEDATA", comma ? ", " : ""); 1946 comma = 1; 1947 } 1948 if (t_flags & TF_TSO) { 1949 db_printf("%sTF_TSO", comma ? ", " : ""); 1950 comma = 1; 1951 } 1952 if (t_flags & TF_ECN_PERMIT) { 1953 db_printf("%sTF_ECN_PERMIT", comma ? ", " : ""); 1954 comma = 1; 1955 } 1956 } 1957 1958 static void 1959 db_print_toobflags(char t_oobflags) 1960 { 1961 int comma; 1962 1963 comma = 0; 1964 if (t_oobflags & TCPOOB_HAVEDATA) { 1965 db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : ""); 1966 comma = 1; 1967 } 1968 if (t_oobflags & TCPOOB_HADDATA) { 1969 db_printf("%sTCPOOB_HADDATA", comma ? ", " : ""); 1970 comma = 1; 1971 } 1972 } 1973 1974 static void 1975 db_print_tcpcb(struct tcpcb *tp, const char *name, int indent) 1976 { 1977 1978 db_print_indent(indent); 1979 db_printf("%s at %p\n", name, tp); 1980 1981 indent += 2; 1982 1983 db_print_indent(indent); 1984 db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n", 1985 LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks); 1986 1987 db_print_indent(indent); 1988 db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n", 1989 &tp->t_timers->tt_rexmt, &tp->t_timers->tt_persist, &tp->t_timers->tt_keep); 1990 1991 db_print_indent(indent); 1992 db_printf("tt_2msl: %p tt_delack: %p t_inpcb: %p\n", &tp->t_timers->tt_2msl, 1993 &tp->t_timers->tt_delack, tp->t_inpcb); 1994 1995 db_print_indent(indent); 1996 db_printf("t_state: %d (", tp->t_state); 1997 db_print_tstate(tp->t_state); 1998 db_printf(")\n"); 1999 2000 db_print_indent(indent); 2001 db_printf("t_flags: 0x%x (", tp->t_flags); 2002 db_print_tflags(tp->t_flags); 2003 db_printf(")\n"); 2004 2005 db_print_indent(indent); 2006 db_printf("snd_una: 0x%08x snd_max: 0x%08x snd_nxt: x0%08x\n", 2007 tp->snd_una, tp->snd_max, tp->snd_nxt); 2008 2009 db_print_indent(indent); 2010 db_printf("snd_up: 0x%08x snd_wl1: 0x%08x snd_wl2: 0x%08x\n", 2011 tp->snd_up, tp->snd_wl1, tp->snd_wl2); 2012 2013 db_print_indent(indent); 2014 db_printf("iss: 0x%08x irs: 0x%08x rcv_nxt: 0x%08x\n", 2015 tp->iss, tp->irs, tp->rcv_nxt); 2016 2017 db_print_indent(indent); 2018 db_printf("rcv_adv: 0x%08x rcv_wnd: %lu rcv_up: 0x%08x\n", 2019 tp->rcv_adv, tp->rcv_wnd, tp->rcv_up); 2020 2021 db_print_indent(indent); 2022 db_printf("snd_wnd: %lu snd_cwnd: %lu\n", 2023 tp->snd_wnd, tp->snd_cwnd); 2024 2025 db_print_indent(indent); 2026 db_printf("snd_ssthresh: %lu snd_recover: " 2027 "0x%08x\n", tp->snd_ssthresh, tp->snd_recover); 2028 2029 db_print_indent(indent); 2030 db_printf("t_maxopd: %u t_rcvtime: %u t_startime: %u\n", 2031 tp->t_maxopd, tp->t_rcvtime, tp->t_starttime); 2032 2033 db_print_indent(indent); 2034 db_printf("t_rttime: %u t_rtsq: 0x%08x\n", 2035 tp->t_rtttime, tp->t_rtseq); 2036 2037 db_print_indent(indent); 2038 db_printf("t_rxtcur: %d t_maxseg: %u t_srtt: %d\n", 2039 tp->t_rxtcur, tp->t_maxseg, tp->t_srtt); 2040 2041 db_print_indent(indent); 2042 db_printf("t_rttvar: %d t_rxtshift: %d t_rttmin: %u " 2043 "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin, 2044 tp->t_rttbest); 2045 2046 db_print_indent(indent); 2047 db_printf("t_rttupdated: %lu max_sndwnd: %lu t_softerror: %d\n", 2048 tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror); 2049 2050 db_print_indent(indent); 2051 db_printf("t_oobflags: 0x%x (", tp->t_oobflags); 2052 db_print_toobflags(tp->t_oobflags); 2053 db_printf(") t_iobc: 0x%02x\n", tp->t_iobc); 2054 2055 db_print_indent(indent); 2056 db_printf("snd_scale: %u rcv_scale: %u request_r_scale: %u\n", 2057 tp->snd_scale, tp->rcv_scale, tp->request_r_scale); 2058 2059 db_print_indent(indent); 2060 db_printf("ts_recent: %u ts_recent_age: %u\n", 2061 tp->ts_recent, tp->ts_recent_age); 2062 2063 db_print_indent(indent); 2064 db_printf("ts_offset: %u last_ack_sent: 0x%08x snd_cwnd_prev: " 2065 "%lu\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev); 2066 2067 db_print_indent(indent); 2068 db_printf("snd_ssthresh_prev: %lu snd_recover_prev: 0x%08x " 2069 "t_badrxtwin: %u\n", tp->snd_ssthresh_prev, 2070 tp->snd_recover_prev, tp->t_badrxtwin); 2071 2072 db_print_indent(indent); 2073 db_printf("snd_numholes: %d snd_holes first: %p\n", 2074 tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes)); 2075 2076 db_print_indent(indent); 2077 db_printf("snd_fack: 0x%08x rcv_numsacks: %d sack_newdata: " 2078 "0x%08x\n", tp->snd_fack, tp->rcv_numsacks, tp->sack_newdata); 2079 2080 /* Skip sackblks, sackhint. */ 2081 2082 db_print_indent(indent); 2083 db_printf("t_rttlow: %d rfbuf_ts: %u rfbuf_cnt: %d\n", 2084 tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt); 2085 } 2086 2087 DB_SHOW_COMMAND(tcpcb, db_show_tcpcb) 2088 { 2089 struct tcpcb *tp; 2090 2091 if (!have_addr) { 2092 db_printf("usage: show tcpcb <addr>\n"); 2093 return; 2094 } 2095 tp = (struct tcpcb *)addr; 2096 2097 db_print_tcpcb(tp, "tcpcb", 0); 2098 } 2099 #endif 2100