1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 34 * $FreeBSD$ 35 */ 36 37 #include "opt_compat.h" 38 #include "opt_inet6.h" 39 #include "opt_ipsec.h" 40 #include "opt_tcpdebug.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/callout.h> 45 #include <sys/kernel.h> 46 #include <sys/sysctl.h> 47 #include <sys/malloc.h> 48 #include <sys/mbuf.h> 49 #ifdef INET6 50 #include <sys/domain.h> 51 #endif 52 #include <sys/proc.h> 53 #include <sys/socket.h> 54 #include <sys/socketvar.h> 55 #include <sys/protosw.h> 56 57 #include <vm/vm_zone.h> 58 59 #include <net/route.h> 60 #include <net/if.h> 61 62 #define _IP_VHL 63 #include <netinet/in.h> 64 #include <netinet/in_systm.h> 65 #include <netinet/ip.h> 66 #ifdef INET6 67 #include <netinet/ip6.h> 68 #endif 69 #include <netinet/in_pcb.h> 70 #ifdef INET6 71 #include <netinet6/in6_pcb.h> 72 #endif 73 #include <netinet/in_var.h> 74 #include <netinet/ip_var.h> 75 #ifdef INET6 76 #include <netinet6/ip6_var.h> 77 #endif 78 #include <netinet/tcp.h> 79 #include <netinet/tcp_fsm.h> 80 #include <netinet/tcp_seq.h> 81 #include <netinet/tcp_timer.h> 82 #include <netinet/tcp_var.h> 83 #ifdef INET6 84 #include <netinet6/tcp6_var.h> 85 #endif 86 #include <netinet/tcpip.h> 87 #ifdef TCPDEBUG 88 #include <netinet/tcp_debug.h> 89 #endif 90 #include <netinet6/ip6protosw.h> 91 92 #ifdef IPSEC 93 #include <netinet6/ipsec.h> 94 #ifdef INET6 95 #include <netinet6/ipsec6.h> 96 #endif 97 #endif /*IPSEC*/ 98 99 #include <machine/in_cksum.h> 100 101 int tcp_mssdflt = TCP_MSS; 102 SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW, 103 &tcp_mssdflt , 0, "Default TCP Maximum Segment Size"); 104 105 #ifdef INET6 106 int tcp_v6mssdflt = TCP6_MSS; 107 SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt, 108 CTLFLAG_RW, &tcp_v6mssdflt , 0, 109 "Default TCP Maximum Segment Size for IPv6"); 110 #endif 111 112 #if 0 113 static int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; 114 SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt, CTLFLAG_RW, 115 &tcp_rttdflt , 0, "Default maximum TCP Round Trip Time"); 116 #endif 117 118 static int tcp_do_rfc1323 = 1; 119 SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW, 120 &tcp_do_rfc1323 , 0, "Enable rfc1323 (high performance TCP) extensions"); 121 122 static int tcp_do_rfc1644 = 0; 123 SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644, CTLFLAG_RW, 124 &tcp_do_rfc1644 , 0, "Enable rfc1644 (TTCP) extensions"); 125 126 static int tcp_tcbhashsize = 0; 127 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RD, 128 &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable"); 129 130 static int do_tcpdrain = 1; 131 SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0, 132 "Enable tcp_drain routine for extra help when low on mbufs"); 133 134 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, 135 &tcbinfo.ipi_count, 0, "Number of active PCBs"); 136 137 /* 138 * Treat ICMP administratively prohibited like a TCP RST 139 * as required by rfc1122 section 3.2.2.1 140 */ 141 142 static int icmp_admin_prohib_like_rst = 1; 143 SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_admin_prohib_like_rst, CTLFLAG_RW, 144 &icmp_admin_prohib_like_rst, 0, 145 "Treat ICMP administratively prohibited messages like TCP RST, rfc1122 section 3.2.2.1"); 146 147 /* 148 * When icmp_admin_prohib_like_rst is enabled, only act on 149 * sessions in SYN-SENT state 150 */ 151 152 static int icmp_like_rst_syn_sent_only = 1; 153 SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_like_rst_syn_sent_only, CTLFLAG_RW, 154 &icmp_like_rst_syn_sent_only, 0, 155 "When icmp_admin_prohib_like_rst is enabled, only act on sessions in SYN-SENT state"); 156 157 static void tcp_cleartaocache __P((void)); 158 static void tcp_notify __P((struct inpcb *, int)); 159 160 /* 161 * Target size of TCP PCB hash tables. Must be a power of two. 162 * 163 * Note that this can be overridden by the kernel environment 164 * variable net.inet.tcp.tcbhashsize 165 */ 166 #ifndef TCBHASHSIZE 167 #define TCBHASHSIZE 512 168 #endif 169 170 /* 171 * This is the actual shape of what we allocate using the zone 172 * allocator. Doing it this way allows us to protect both structures 173 * using the same generation count, and also eliminates the overhead 174 * of allocating tcpcbs separately. By hiding the structure here, 175 * we avoid changing most of the rest of the code (although it needs 176 * to be changed, eventually, for greater efficiency). 177 */ 178 #define ALIGNMENT 32 179 #define ALIGNM1 (ALIGNMENT - 1) 180 struct inp_tp { 181 union { 182 struct inpcb inp; 183 char align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1]; 184 } inp_tp_u; 185 struct tcpcb tcb; 186 struct callout inp_tp_rexmt, inp_tp_persist, inp_tp_keep, inp_tp_2msl; 187 struct callout inp_tp_delack; 188 }; 189 #undef ALIGNMENT 190 #undef ALIGNM1 191 192 /* 193 * Tcp initialization 194 */ 195 void 196 tcp_init() 197 { 198 int hashsize; 199 200 tcp_iss = arc4random(); /* wrong, but better than a constant */ 201 tcp_ccgen = 1; 202 tcp_cleartaocache(); 203 204 tcp_delacktime = TCPTV_DELACK; 205 tcp_keepinit = TCPTV_KEEP_INIT; 206 tcp_keepidle = TCPTV_KEEP_IDLE; 207 tcp_keepintvl = TCPTV_KEEPINTVL; 208 tcp_maxpersistidle = TCPTV_KEEP_IDLE; 209 tcp_msl = TCPTV_MSL; 210 211 LIST_INIT(&tcb); 212 tcbinfo.listhead = &tcb; 213 TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", TCBHASHSIZE, hashsize); 214 if (!powerof2(hashsize)) { 215 printf("WARNING: TCB hash size not a power of 2\n"); 216 hashsize = 512; /* safe default */ 217 } 218 tcp_tcbhashsize = hashsize; 219 tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask); 220 tcbinfo.porthashbase = hashinit(hashsize, M_PCB, 221 &tcbinfo.porthashmask); 222 tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), maxsockets, 223 ZONE_INTERRUPT, 0); 224 #ifdef INET6 225 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) 226 #else /* INET6 */ 227 #define TCP_MINPROTOHDR (sizeof(struct tcpiphdr)) 228 #endif /* INET6 */ 229 if (max_protohdr < TCP_MINPROTOHDR) 230 max_protohdr = TCP_MINPROTOHDR; 231 if (max_linkhdr + TCP_MINPROTOHDR > MHLEN) 232 panic("tcp_init"); 233 #undef TCP_MINPROTOHDR 234 } 235 236 /* 237 * Create template to be used to send tcp packets on a connection. 238 * Call after host entry created, allocates an mbuf and fills 239 * in a skeletal tcp/ip header, minimizing the amount of work 240 * necessary when the connection is used. 241 */ 242 struct tcptemp * 243 tcp_template(tp) 244 struct tcpcb *tp; 245 { 246 register struct inpcb *inp = tp->t_inpcb; 247 register struct mbuf *m; 248 register struct tcptemp *n; 249 250 if ((n = tp->t_template) == 0) { 251 m = m_get(M_DONTWAIT, MT_HEADER); 252 if (m == NULL) 253 return (0); 254 m->m_len = sizeof (struct tcptemp); 255 n = mtod(m, struct tcptemp *); 256 } 257 #ifdef INET6 258 if ((inp->inp_vflag & INP_IPV6) != 0) { 259 register struct ip6_hdr *ip6; 260 261 ip6 = (struct ip6_hdr *)n->tt_ipgen; 262 ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | 263 (inp->in6p_flowinfo & IPV6_FLOWINFO_MASK); 264 ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | 265 (IPV6_VERSION & IPV6_VERSION_MASK); 266 ip6->ip6_nxt = IPPROTO_TCP; 267 ip6->ip6_plen = sizeof(struct tcphdr); 268 ip6->ip6_src = inp->in6p_laddr; 269 ip6->ip6_dst = inp->in6p_faddr; 270 n->tt_t.th_sum = 0; 271 } else 272 #endif 273 { 274 struct ip *ip = (struct ip *)n->tt_ipgen; 275 276 bzero(ip, sizeof(struct ip)); /* XXX overkill? */ 277 ip->ip_vhl = IP_VHL_BORING; 278 ip->ip_p = IPPROTO_TCP; 279 ip->ip_src = inp->inp_laddr; 280 ip->ip_dst = inp->inp_faddr; 281 n->tt_t.th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 282 htons(sizeof(struct tcphdr) + IPPROTO_TCP)); 283 } 284 n->tt_t.th_sport = inp->inp_lport; 285 n->tt_t.th_dport = inp->inp_fport; 286 n->tt_t.th_seq = 0; 287 n->tt_t.th_ack = 0; 288 n->tt_t.th_x2 = 0; 289 n->tt_t.th_off = 5; 290 n->tt_t.th_flags = 0; 291 n->tt_t.th_win = 0; 292 n->tt_t.th_urp = 0; 293 return (n); 294 } 295 296 /* 297 * Send a single message to the TCP at address specified by 298 * the given TCP/IP header. If m == 0, then we make a copy 299 * of the tcpiphdr at ti and send directly to the addressed host. 300 * This is used to force keep alive messages out using the TCP 301 * template for a connection tp->t_template. If flags are given 302 * then we send a message back to the TCP which originated the 303 * segment ti, and discard the mbuf containing it and any other 304 * attached mbufs. 305 * 306 * In any case the ack and sequence number of the transmitted 307 * segment are as specified by the parameters. 308 * 309 * NOTE: If m != NULL, then ti must point to *inside* the mbuf. 310 */ 311 void 312 tcp_respond(tp, ipgen, th, m, ack, seq, flags) 313 struct tcpcb *tp; 314 void *ipgen; 315 register struct tcphdr *th; 316 register struct mbuf *m; 317 tcp_seq ack, seq; 318 int flags; 319 { 320 register int tlen; 321 int win = 0; 322 struct route *ro = 0; 323 struct route sro; 324 struct ip *ip; 325 struct tcphdr *nth; 326 #ifdef INET6 327 struct route_in6 *ro6 = 0; 328 struct route_in6 sro6; 329 struct ip6_hdr *ip6; 330 int isipv6; 331 #endif /* INET6 */ 332 int ipflags = 0; 333 334 #ifdef INET6 335 isipv6 = IP_VHL_V(((struct ip *)ipgen)->ip_vhl) == 6; 336 ip6 = ipgen; 337 #endif /* INET6 */ 338 ip = ipgen; 339 340 if (tp) { 341 if (!(flags & TH_RST)) { 342 win = sbspace(&tp->t_inpcb->inp_socket->so_rcv); 343 if (win > (long)TCP_MAXWIN << tp->rcv_scale) 344 win = (long)TCP_MAXWIN << tp->rcv_scale; 345 } 346 #ifdef INET6 347 if (isipv6) 348 ro6 = &tp->t_inpcb->in6p_route; 349 else 350 #endif /* INET6 */ 351 ro = &tp->t_inpcb->inp_route; 352 } else { 353 #ifdef INET6 354 if (isipv6) { 355 ro6 = &sro6; 356 bzero(ro6, sizeof *ro6); 357 } else 358 #endif /* INET6 */ 359 { 360 ro = &sro; 361 bzero(ro, sizeof *ro); 362 } 363 } 364 if (m == 0) { 365 m = m_gethdr(M_DONTWAIT, MT_HEADER); 366 if (m == NULL) 367 return; 368 #ifdef TCP_COMPAT_42 369 tlen = 1; 370 #else 371 tlen = 0; 372 #endif 373 m->m_data += max_linkhdr; 374 #ifdef INET6 375 if (isipv6) { 376 bcopy((caddr_t)ip6, mtod(m, caddr_t), 377 sizeof(struct ip6_hdr)); 378 ip6 = mtod(m, struct ip6_hdr *); 379 nth = (struct tcphdr *)(ip6 + 1); 380 } else 381 #endif /* INET6 */ 382 { 383 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 384 ip = mtod(m, struct ip *); 385 nth = (struct tcphdr *)(ip + 1); 386 } 387 bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr)); 388 flags = TH_ACK; 389 } else { 390 m_freem(m->m_next); 391 m->m_next = 0; 392 m->m_data = (caddr_t)ipgen; 393 /* m_len is set later */ 394 tlen = 0; 395 #define xchg(a,b,type) { type t; t=a; a=b; b=t; } 396 #ifdef INET6 397 if (isipv6) { 398 xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr); 399 nth = (struct tcphdr *)(ip6 + 1); 400 } else 401 #endif /* INET6 */ 402 { 403 xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, n_long); 404 nth = (struct tcphdr *)(ip + 1); 405 } 406 if (th != nth) { 407 /* 408 * this is usually a case when an extension header 409 * exists between the IPv6 header and the 410 * TCP header. 411 */ 412 nth->th_sport = th->th_sport; 413 nth->th_dport = th->th_dport; 414 } 415 xchg(nth->th_dport, nth->th_sport, n_short); 416 #undef xchg 417 } 418 #ifdef INET6 419 if (isipv6) { 420 ip6->ip6_plen = htons((u_short)(sizeof (struct tcphdr) + 421 tlen)); 422 tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr); 423 } else 424 #endif 425 { 426 tlen += sizeof (struct tcpiphdr); 427 ip->ip_len = tlen; 428 ip->ip_ttl = ip_defttl; 429 } 430 m->m_len = tlen; 431 m->m_pkthdr.len = tlen; 432 m->m_pkthdr.rcvif = (struct ifnet *) 0; 433 nth->th_seq = htonl(seq); 434 nth->th_ack = htonl(ack); 435 nth->th_x2 = 0; 436 nth->th_off = sizeof (struct tcphdr) >> 2; 437 nth->th_flags = flags; 438 if (tp) 439 nth->th_win = htons((u_short) (win >> tp->rcv_scale)); 440 else 441 nth->th_win = htons((u_short)win); 442 nth->th_urp = 0; 443 #ifdef INET6 444 if (isipv6) { 445 nth->th_sum = 0; 446 nth->th_sum = in6_cksum(m, IPPROTO_TCP, 447 sizeof(struct ip6_hdr), 448 tlen - sizeof(struct ip6_hdr)); 449 ip6->ip6_hlim = in6_selecthlim(tp ? tp->t_inpcb : NULL, 450 ro6 && ro6->ro_rt ? 451 ro6->ro_rt->rt_ifp : 452 NULL); 453 } else 454 #endif /* INET6 */ 455 { 456 nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 457 htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p))); 458 m->m_pkthdr.csum_flags = CSUM_TCP; 459 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 460 } 461 #ifdef TCPDEBUG 462 if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 463 tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0); 464 #endif 465 #ifdef IPSEC 466 ipsec_setsocket(m, tp ? tp->t_inpcb->inp_socket : NULL); 467 #endif 468 #ifdef INET6 469 if (isipv6) { 470 (void)ip6_output(m, NULL, ro6, ipflags, NULL, NULL); 471 if (ro6 == &sro6 && ro6->ro_rt) { 472 RTFREE(ro6->ro_rt); 473 ro6->ro_rt = NULL; 474 } 475 } else 476 #endif /* INET6 */ 477 { 478 (void) ip_output(m, NULL, ro, ipflags, NULL); 479 if (ro == &sro && ro->ro_rt) { 480 RTFREE(ro->ro_rt); 481 ro->ro_rt = NULL; 482 } 483 } 484 } 485 486 /* 487 * Create a new TCP control block, making an 488 * empty reassembly queue and hooking it to the argument 489 * protocol control block. The `inp' parameter must have 490 * come from the zone allocator set up in tcp_init(). 491 */ 492 struct tcpcb * 493 tcp_newtcpcb(inp) 494 struct inpcb *inp; 495 { 496 struct inp_tp *it; 497 register struct tcpcb *tp; 498 #ifdef INET6 499 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; 500 #endif /* INET6 */ 501 502 it = (struct inp_tp *)inp; 503 tp = &it->tcb; 504 bzero((char *) tp, sizeof(struct tcpcb)); 505 LIST_INIT(&tp->t_segq); 506 tp->t_maxseg = tp->t_maxopd = 507 #ifdef INET6 508 isipv6 ? tcp_v6mssdflt : 509 #endif /* INET6 */ 510 tcp_mssdflt; 511 512 /* Set up our timeouts. */ 513 callout_init(tp->tt_rexmt = &it->inp_tp_rexmt, 0); 514 callout_init(tp->tt_persist = &it->inp_tp_persist, 0); 515 callout_init(tp->tt_keep = &it->inp_tp_keep, 0); 516 callout_init(tp->tt_2msl = &it->inp_tp_2msl, 0); 517 callout_init(tp->tt_delack = &it->inp_tp_delack, 0); 518 519 if (tcp_do_rfc1323) 520 tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); 521 if (tcp_do_rfc1644) 522 tp->t_flags |= TF_REQ_CC; 523 tp->t_inpcb = inp; /* XXX */ 524 /* 525 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no 526 * rtt estimate. Set rttvar so that srtt + 4 * rttvar gives 527 * reasonable initial retransmit time. 528 */ 529 tp->t_srtt = TCPTV_SRTTBASE; 530 tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4; 531 tp->t_rttmin = TCPTV_MIN; 532 tp->t_rxtcur = TCPTV_RTOBASE; 533 tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; 534 tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; 535 tp->t_rcvtime = ticks; 536 /* 537 * IPv4 TTL initialization is necessary for an IPv6 socket as well, 538 * because the socket may be bound to an IPv6 wildcard address, 539 * which may match an IPv4-mapped IPv6 address. 540 */ 541 inp->inp_ip_ttl = ip_defttl; 542 inp->inp_ppcb = (caddr_t)tp; 543 return (tp); /* XXX */ 544 } 545 546 /* 547 * Drop a TCP connection, reporting 548 * the specified error. If connection is synchronized, 549 * then send a RST to peer. 550 */ 551 struct tcpcb * 552 tcp_drop(tp, errno) 553 register struct tcpcb *tp; 554 int errno; 555 { 556 struct socket *so = tp->t_inpcb->inp_socket; 557 558 if (TCPS_HAVERCVDSYN(tp->t_state)) { 559 tp->t_state = TCPS_CLOSED; 560 (void) tcp_output(tp); 561 tcpstat.tcps_drops++; 562 } else 563 tcpstat.tcps_conndrops++; 564 if (errno == ETIMEDOUT && tp->t_softerror) 565 errno = tp->t_softerror; 566 so->so_error = errno; 567 return (tcp_close(tp)); 568 } 569 570 /* 571 * Close a TCP control block: 572 * discard all space held by the tcp 573 * discard internet protocol block 574 * wake up any sleepers 575 */ 576 struct tcpcb * 577 tcp_close(tp) 578 register struct tcpcb *tp; 579 { 580 register struct tseg_qent *q; 581 struct inpcb *inp = tp->t_inpcb; 582 struct socket *so = inp->inp_socket; 583 #ifdef INET6 584 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; 585 #endif /* INET6 */ 586 register struct rtentry *rt; 587 int dosavessthresh; 588 589 /* 590 * Make sure that all of our timers are stopped before we 591 * delete the PCB. 592 */ 593 callout_stop(tp->tt_rexmt); 594 callout_stop(tp->tt_persist); 595 callout_stop(tp->tt_keep); 596 callout_stop(tp->tt_2msl); 597 callout_stop(tp->tt_delack); 598 599 /* 600 * If we got enough samples through the srtt filter, 601 * save the rtt and rttvar in the routing entry. 602 * 'Enough' is arbitrarily defined as the 16 samples. 603 * 16 samples is enough for the srtt filter to converge 604 * to within 5% of the correct value; fewer samples and 605 * we could save a very bogus rtt. 606 * 607 * Don't update the default route's characteristics and don't 608 * update anything that the user "locked". 609 */ 610 if (tp->t_rttupdated >= 16) { 611 register u_long i = 0; 612 #ifdef INET6 613 if (isipv6) { 614 struct sockaddr_in6 *sin6; 615 616 if ((rt = inp->in6p_route.ro_rt) == NULL) 617 goto no_valid_rt; 618 sin6 = (struct sockaddr_in6 *)rt_key(rt); 619 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 620 goto no_valid_rt; 621 } 622 else 623 #endif /* INET6 */ 624 if ((rt = inp->inp_route.ro_rt) == NULL || 625 ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr 626 == INADDR_ANY) 627 goto no_valid_rt; 628 629 if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) { 630 i = tp->t_srtt * 631 (RTM_RTTUNIT / (hz * TCP_RTT_SCALE)); 632 if (rt->rt_rmx.rmx_rtt && i) 633 /* 634 * filter this update to half the old & half 635 * the new values, converting scale. 636 * See route.h and tcp_var.h for a 637 * description of the scaling constants. 638 */ 639 rt->rt_rmx.rmx_rtt = 640 (rt->rt_rmx.rmx_rtt + i) / 2; 641 else 642 rt->rt_rmx.rmx_rtt = i; 643 tcpstat.tcps_cachedrtt++; 644 } 645 if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) { 646 i = tp->t_rttvar * 647 (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE)); 648 if (rt->rt_rmx.rmx_rttvar && i) 649 rt->rt_rmx.rmx_rttvar = 650 (rt->rt_rmx.rmx_rttvar + i) / 2; 651 else 652 rt->rt_rmx.rmx_rttvar = i; 653 tcpstat.tcps_cachedrttvar++; 654 } 655 /* 656 * The old comment here said: 657 * update the pipelimit (ssthresh) if it has been updated 658 * already or if a pipesize was specified & the threshhold 659 * got below half the pipesize. I.e., wait for bad news 660 * before we start updating, then update on both good 661 * and bad news. 662 * 663 * But we want to save the ssthresh even if no pipesize is 664 * specified explicitly in the route, because such 665 * connections still have an implicit pipesize specified 666 * by the global tcp_sendspace. In the absence of a reliable 667 * way to calculate the pipesize, it will have to do. 668 */ 669 i = tp->snd_ssthresh; 670 if (rt->rt_rmx.rmx_sendpipe != 0) 671 dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2); 672 else 673 dosavessthresh = (i < so->so_snd.sb_hiwat / 2); 674 if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 && 675 i != 0 && rt->rt_rmx.rmx_ssthresh != 0) 676 || dosavessthresh) { 677 /* 678 * convert the limit from user data bytes to 679 * packets then to packet data bytes. 680 */ 681 i = (i + tp->t_maxseg / 2) / tp->t_maxseg; 682 if (i < 2) 683 i = 2; 684 i *= (u_long)(tp->t_maxseg + 685 #ifdef INET6 686 (isipv6 ? sizeof (struct ip6_hdr) + 687 sizeof (struct tcphdr) : 688 #endif 689 sizeof (struct tcpiphdr) 690 #ifdef INET6 691 ) 692 #endif 693 ); 694 if (rt->rt_rmx.rmx_ssthresh) 695 rt->rt_rmx.rmx_ssthresh = 696 (rt->rt_rmx.rmx_ssthresh + i) / 2; 697 else 698 rt->rt_rmx.rmx_ssthresh = i; 699 tcpstat.tcps_cachedssthresh++; 700 } 701 } 702 rt = inp->inp_route.ro_rt; 703 if (rt) { 704 /* 705 * mark route for deletion if no information is 706 * cached. 707 */ 708 if ((tp->t_flags & TF_LQ_OVERFLOW) && 709 ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0)){ 710 if (rt->rt_rmx.rmx_rtt == 0) 711 rt->rt_flags |= RTF_DELCLONE; 712 } 713 } 714 no_valid_rt: 715 /* free the reassembly queue, if any */ 716 while((q = LIST_FIRST(&tp->t_segq)) != NULL) { 717 LIST_REMOVE(q, tqe_q); 718 m_freem(q->tqe_m); 719 FREE(q, M_TSEGQ); 720 } 721 if (tp->t_template) 722 (void) m_free(dtom(tp->t_template)); 723 inp->inp_ppcb = NULL; 724 soisdisconnected(so); 725 #ifdef INET6 726 if (INP_CHECK_SOCKAF(so, AF_INET6)) 727 in6_pcbdetach(inp); 728 else 729 #endif /* INET6 */ 730 in_pcbdetach(inp); 731 tcpstat.tcps_closed++; 732 return ((struct tcpcb *)0); 733 } 734 735 void 736 tcp_drain() 737 { 738 if (do_tcpdrain) 739 { 740 struct inpcb *inpb; 741 struct tcpcb *tcpb; 742 struct tseg_qent *te; 743 744 /* 745 * Walk the tcpbs, if existing, and flush the reassembly queue, 746 * if there is one... 747 * XXX: The "Net/3" implementation doesn't imply that the TCP 748 * reassembly queue should be flushed, but in a situation 749 * where we're really low on mbufs, this is potentially 750 * usefull. 751 */ 752 for (inpb = tcbinfo.listhead->lh_first; inpb; 753 inpb = inpb->inp_list.le_next) { 754 if ((tcpb = intotcpcb(inpb))) { 755 while ((te = LIST_FIRST(&tcpb->t_segq)) 756 != NULL) { 757 LIST_REMOVE(te, tqe_q); 758 m_freem(te->tqe_m); 759 FREE(te, M_TSEGQ); 760 } 761 } 762 } 763 764 } 765 } 766 767 /* 768 * Notify a tcp user of an asynchronous error; 769 * store error as soft error, but wake up user 770 * (for now, won't do anything until can select for soft error). 771 */ 772 static void 773 tcp_notify(inp, error) 774 struct inpcb *inp; 775 int error; 776 { 777 register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb; 778 register struct socket *so = inp->inp_socket; 779 780 /* 781 * Ignore some errors if we are hooked up. 782 * If connection hasn't completed, has retransmitted several times, 783 * and receives a second error, give up now. This is better 784 * than waiting a long time to establish a connection that 785 * can never complete. 786 */ 787 if (tp->t_state == TCPS_ESTABLISHED && 788 (error == EHOSTUNREACH || error == ENETUNREACH || 789 error == EHOSTDOWN)) { 790 return; 791 } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 && 792 tp->t_softerror) 793 so->so_error = error; 794 else 795 tp->t_softerror = error; 796 wakeup((caddr_t) &so->so_timeo); 797 sorwakeup(so); 798 sowwakeup(so); 799 } 800 801 static int 802 tcp_pcblist(SYSCTL_HANDLER_ARGS) 803 { 804 int error, i, n, s; 805 struct inpcb *inp, **inp_list; 806 inp_gen_t gencnt; 807 struct xinpgen xig; 808 809 /* 810 * The process of preparing the TCB list is too time-consuming and 811 * resource-intensive to repeat twice on every request. 812 */ 813 if (req->oldptr == 0) { 814 n = tcbinfo.ipi_count; 815 req->oldidx = 2 * (sizeof xig) 816 + (n + n/8) * sizeof(struct xtcpcb); 817 return 0; 818 } 819 820 if (req->newptr != 0) 821 return EPERM; 822 823 /* 824 * OK, now we're committed to doing something. 825 */ 826 s = splnet(); 827 gencnt = tcbinfo.ipi_gencnt; 828 n = tcbinfo.ipi_count; 829 splx(s); 830 831 xig.xig_len = sizeof xig; 832 xig.xig_count = n; 833 xig.xig_gen = gencnt; 834 xig.xig_sogen = so_gencnt; 835 error = SYSCTL_OUT(req, &xig, sizeof xig); 836 if (error) 837 return error; 838 839 inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); 840 if (inp_list == 0) 841 return ENOMEM; 842 843 s = splnet(); 844 for (inp = tcbinfo.listhead->lh_first, i = 0; inp && i < n; 845 inp = inp->inp_list.le_next) { 846 if (inp->inp_gencnt <= gencnt && !prison_xinpcb(req->p, inp)) 847 inp_list[i++] = inp; 848 } 849 splx(s); 850 n = i; 851 852 error = 0; 853 for (i = 0; i < n; i++) { 854 inp = inp_list[i]; 855 if (inp->inp_gencnt <= gencnt) { 856 struct xtcpcb xt; 857 caddr_t inp_ppcb; 858 xt.xt_len = sizeof xt; 859 /* XXX should avoid extra copy */ 860 bcopy(inp, &xt.xt_inp, sizeof *inp); 861 inp_ppcb = inp->inp_ppcb; 862 if (inp_ppcb != NULL) 863 bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); 864 else 865 bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); 866 if (inp->inp_socket) 867 sotoxsocket(inp->inp_socket, &xt.xt_socket); 868 error = SYSCTL_OUT(req, &xt, sizeof xt); 869 } 870 } 871 if (!error) { 872 /* 873 * Give the user an updated idea of our state. 874 * If the generation differs from what we told 875 * her before, she knows that something happened 876 * while we were processing this request, and it 877 * might be necessary to retry. 878 */ 879 s = splnet(); 880 xig.xig_gen = tcbinfo.ipi_gencnt; 881 xig.xig_sogen = so_gencnt; 882 xig.xig_count = tcbinfo.ipi_count; 883 splx(s); 884 error = SYSCTL_OUT(req, &xig, sizeof xig); 885 } 886 free(inp_list, M_TEMP); 887 return error; 888 } 889 890 SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, 891 tcp_pcblist, "S,xtcpcb", "List of active TCP connections"); 892 893 static int 894 tcp_getcred(SYSCTL_HANDLER_ARGS) 895 { 896 struct sockaddr_in addrs[2]; 897 struct inpcb *inp; 898 int error, s; 899 900 error = suser(req->p); 901 if (error) 902 return (error); 903 error = SYSCTL_IN(req, addrs, sizeof(addrs)); 904 if (error) 905 return (error); 906 s = splnet(); 907 inp = in_pcblookup_hash(&tcbinfo, addrs[1].sin_addr, addrs[1].sin_port, 908 addrs[0].sin_addr, addrs[0].sin_port, 0, NULL); 909 if (inp == NULL || inp->inp_socket == NULL) { 910 error = ENOENT; 911 goto out; 912 } 913 error = SYSCTL_OUT(req, inp->inp_socket->so_cred, sizeof(struct ucred)); 914 out: 915 splx(s); 916 return (error); 917 } 918 919 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 920 0, 0, tcp_getcred, "S,ucred", "Get the ucred of a TCP connection"); 921 922 #ifdef INET6 923 static int 924 tcp6_getcred(SYSCTL_HANDLER_ARGS) 925 { 926 struct sockaddr_in6 addrs[2]; 927 struct inpcb *inp; 928 int error, s, mapped = 0; 929 930 error = suser(req->p); 931 if (error) 932 return (error); 933 error = SYSCTL_IN(req, addrs, sizeof(addrs)); 934 if (error) 935 return (error); 936 if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) { 937 if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr)) 938 mapped = 1; 939 else 940 return (EINVAL); 941 } 942 s = splnet(); 943 if (mapped == 1) 944 inp = in_pcblookup_hash(&tcbinfo, 945 *(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12], 946 addrs[1].sin6_port, 947 *(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12], 948 addrs[0].sin6_port, 949 0, NULL); 950 else 951 inp = in6_pcblookup_hash(&tcbinfo, &addrs[1].sin6_addr, 952 addrs[1].sin6_port, 953 &addrs[0].sin6_addr, addrs[0].sin6_port, 954 0, NULL); 955 if (inp == NULL || inp->inp_socket == NULL) { 956 error = ENOENT; 957 goto out; 958 } 959 error = SYSCTL_OUT(req, inp->inp_socket->so_cred, 960 sizeof(struct ucred)); 961 out: 962 splx(s); 963 return (error); 964 } 965 966 SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 967 0, 0, 968 tcp6_getcred, "S,ucred", "Get the ucred of a TCP6 connection"); 969 #endif 970 971 972 void 973 tcp_ctlinput(cmd, sa, vip) 974 int cmd; 975 struct sockaddr *sa; 976 void *vip; 977 { 978 register struct ip *ip = vip; 979 register struct tcphdr *th; 980 void (*notify) __P((struct inpcb *, int)) = tcp_notify; 981 tcp_seq tcp_sequence = 0; 982 int tcp_seq_check = 0; 983 984 if (cmd == PRC_QUENCH) 985 notify = tcp_quench; 986 else if ((icmp_admin_prohib_like_rst == 1) && (cmd == PRC_UNREACH_PORT) && 987 (ip) && ((IP_VHL_HL(ip->ip_vhl) << 2) == sizeof(struct ip))) { 988 /* 989 * Only go here if the length of the IP header in the ICMP packet 990 * is 20 bytes, that is it doesn't have options, if it does have 991 * options, we will not have the first 8 bytes of the TCP header, 992 * and thus we cannot match against TCP source/destination port 993 * numbers and TCP sequence number. 994 */ 995 tcp_seq_check = 1; 996 notify = tcp_drop_syn_sent; 997 } else if (cmd == PRC_MSGSIZE) 998 notify = tcp_mtudisc; 999 else if (!PRC_IS_REDIRECT(cmd) && 1000 ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0)) 1001 return; 1002 if (ip) { 1003 th = (struct tcphdr *)((caddr_t)ip 1004 + (IP_VHL_HL(ip->ip_vhl) << 2)); 1005 if (tcp_seq_check == 1) 1006 tcp_sequence = ntohl(th->th_seq); 1007 in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport, 1008 cmd, notify, tcp_sequence, tcp_seq_check); 1009 } else 1010 in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify, 0, 0); 1011 } 1012 1013 #ifdef INET6 1014 void 1015 tcp6_ctlinput(cmd, sa, d) 1016 int cmd; 1017 struct sockaddr *sa; 1018 void *d; 1019 { 1020 register struct tcphdr *thp; 1021 struct tcphdr th; 1022 void (*notify) __P((struct inpcb *, int)) = tcp_notify; 1023 struct sockaddr_in6 sa6; 1024 struct ip6_hdr *ip6; 1025 struct mbuf *m; 1026 int off; 1027 1028 if (sa->sa_family != AF_INET6 || 1029 sa->sa_len != sizeof(struct sockaddr_in6)) 1030 return; 1031 1032 if (cmd == PRC_QUENCH) 1033 notify = tcp_quench; 1034 else if (cmd == PRC_MSGSIZE) 1035 notify = tcp_mtudisc; 1036 else if (!PRC_IS_REDIRECT(cmd) && 1037 ((unsigned)cmd > PRC_NCMDS || inet6ctlerrmap[cmd] == 0)) 1038 return; 1039 1040 /* if the parameter is from icmp6, decode it. */ 1041 if (d != NULL) { 1042 struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d; 1043 m = ip6cp->ip6c_m; 1044 ip6 = ip6cp->ip6c_ip6; 1045 off = ip6cp->ip6c_off; 1046 } else { 1047 m = NULL; 1048 ip6 = NULL; 1049 off = 0; /* fool gcc */ 1050 } 1051 1052 /* 1053 * Translate addresses into internal form. 1054 * Sa check if it is AF_INET6 is done at the top of this funciton. 1055 */ 1056 sa6 = *(struct sockaddr_in6 *)sa; 1057 if (IN6_IS_ADDR_LINKLOCAL(&sa6.sin6_addr) != 0 && m != NULL && 1058 m->m_pkthdr.rcvif != NULL) 1059 sa6.sin6_addr.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index); 1060 1061 if (ip6) { 1062 /* 1063 * XXX: We assume that when IPV6 is non NULL, 1064 * M and OFF are valid. 1065 */ 1066 struct in6_addr s; 1067 1068 /* translate addresses into internal form */ 1069 memcpy(&s, &ip6->ip6_src, sizeof(s)); 1070 if (IN6_IS_ADDR_LINKLOCAL(&s) != 0 && m != NULL && 1071 m->m_pkthdr.rcvif != NULL) 1072 s.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index); 1073 1074 /* check if we can safely examine src and dst ports */ 1075 if (m->m_pkthdr.len < off + sizeof(th)) 1076 return; 1077 1078 if (m->m_len < off + sizeof(th)) { 1079 /* 1080 * this should be rare case 1081 * because now MINCLSIZE is "(MHLEN + 1)", 1082 * so we compromise on this copy... 1083 */ 1084 m_copydata(m, off, sizeof(th), (caddr_t)&th); 1085 thp = &th; 1086 } else 1087 thp = (struct tcphdr *)(mtod(m, caddr_t) + off); 1088 in6_pcbnotify(&tcb, (struct sockaddr *)&sa6, thp->th_dport, 1089 &s, thp->th_sport, cmd, notify); 1090 } else 1091 in6_pcbnotify(&tcb, (struct sockaddr *)&sa6, 0, &zeroin6_addr, 1092 0, cmd, notify); 1093 } 1094 #endif /* INET6 */ 1095 1096 /* 1097 * Check if the supplied TCP sequence number is a sequence number 1098 * for a sent but unacknowledged packet on the given TCP session. 1099 */ 1100 int 1101 tcp_seq_vs_sess(inp, tcp_sequence) 1102 struct inpcb *inp; 1103 tcp_seq tcp_sequence; 1104 { 1105 struct tcpcb *tp = intotcpcb(inp); 1106 /* 1107 * If the sequence number is less than that of the last 1108 * unacknowledged packet, or greater than that of the 1109 * last sent, the given sequence number is not that 1110 * of a sent but unacknowledged packet for this session. 1111 */ 1112 if (SEQ_LT(tcp_sequence, tp->snd_una) || 1113 SEQ_GT(tcp_sequence, tp->snd_max)) { 1114 return(0); 1115 } else { 1116 return(1); 1117 } 1118 } 1119 1120 /* 1121 * When a source quench is received, close congestion window 1122 * to one segment. We will gradually open it again as we proceed. 1123 */ 1124 void 1125 tcp_quench(inp, errno) 1126 struct inpcb *inp; 1127 int errno; 1128 { 1129 struct tcpcb *tp = intotcpcb(inp); 1130 1131 if (tp) 1132 tp->snd_cwnd = tp->t_maxseg; 1133 } 1134 1135 /* 1136 * When a ICMP unreachable is recieved, drop the 1137 * TCP connection, depending on the sysctl 1138 * icmp_like_rst_syn_sent_only, it only drops 1139 * the session if it's in SYN-SENT state 1140 */ 1141 void 1142 tcp_drop_syn_sent(inp, errno) 1143 struct inpcb *inp; 1144 int errno; 1145 { 1146 struct tcpcb *tp = intotcpcb(inp); 1147 if((tp) && ((icmp_like_rst_syn_sent_only == 0) || 1148 (tp->t_state == TCPS_SYN_SENT))) 1149 tcp_drop(tp, errno); 1150 } 1151 1152 /* 1153 * When `need fragmentation' ICMP is received, update our idea of the MSS 1154 * based on the new value in the route. Also nudge TCP to send something, 1155 * since we know the packet we just sent was dropped. 1156 * This duplicates some code in the tcp_mss() function in tcp_input.c. 1157 */ 1158 void 1159 tcp_mtudisc(inp, errno) 1160 struct inpcb *inp; 1161 int errno; 1162 { 1163 struct tcpcb *tp = intotcpcb(inp); 1164 struct rtentry *rt; 1165 struct rmxp_tao *taop; 1166 struct socket *so = inp->inp_socket; 1167 int offered; 1168 int mss; 1169 #ifdef INET6 1170 int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0; 1171 #endif /* INET6 */ 1172 1173 if (tp) { 1174 #ifdef INET6 1175 if (isipv6) 1176 rt = tcp_rtlookup6(inp); 1177 else 1178 #endif /* INET6 */ 1179 rt = tcp_rtlookup(inp); 1180 if (!rt || !rt->rt_rmx.rmx_mtu) { 1181 tp->t_maxopd = tp->t_maxseg = 1182 #ifdef INET6 1183 isipv6 ? tcp_v6mssdflt : 1184 #endif /* INET6 */ 1185 tcp_mssdflt; 1186 return; 1187 } 1188 taop = rmx_taop(rt->rt_rmx); 1189 offered = taop->tao_mssopt; 1190 mss = rt->rt_rmx.rmx_mtu - 1191 #ifdef INET6 1192 (isipv6 ? 1193 sizeof(struct ip6_hdr) + sizeof(struct tcphdr) : 1194 #endif /* INET6 */ 1195 sizeof(struct tcpiphdr) 1196 #ifdef INET6 1197 ) 1198 #endif /* INET6 */ 1199 ; 1200 1201 if (offered) 1202 mss = min(mss, offered); 1203 /* 1204 * XXX - The above conditional probably violates the TCP 1205 * spec. The problem is that, since we don't know the 1206 * other end's MSS, we are supposed to use a conservative 1207 * default. But, if we do that, then MTU discovery will 1208 * never actually take place, because the conservative 1209 * default is much less than the MTUs typically seen 1210 * on the Internet today. For the moment, we'll sweep 1211 * this under the carpet. 1212 * 1213 * The conservative default might not actually be a problem 1214 * if the only case this occurs is when sending an initial 1215 * SYN with options and data to a host we've never talked 1216 * to before. Then, they will reply with an MSS value which 1217 * will get recorded and the new parameters should get 1218 * recomputed. For Further Study. 1219 */ 1220 if (tp->t_maxopd <= mss) 1221 return; 1222 tp->t_maxopd = mss; 1223 1224 if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && 1225 (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP) 1226 mss -= TCPOLEN_TSTAMP_APPA; 1227 if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC && 1228 (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC) 1229 mss -= TCPOLEN_CC_APPA; 1230 #if (MCLBYTES & (MCLBYTES - 1)) == 0 1231 if (mss > MCLBYTES) 1232 mss &= ~(MCLBYTES-1); 1233 #else 1234 if (mss > MCLBYTES) 1235 mss = mss / MCLBYTES * MCLBYTES; 1236 #endif 1237 if (so->so_snd.sb_hiwat < mss) 1238 mss = so->so_snd.sb_hiwat; 1239 1240 tp->t_maxseg = mss; 1241 1242 tcpstat.tcps_mturesent++; 1243 tp->t_rtttime = 0; 1244 tp->snd_nxt = tp->snd_una; 1245 tcp_output(tp); 1246 } 1247 } 1248 1249 /* 1250 * Look-up the routing entry to the peer of this inpcb. If no route 1251 * is found and it cannot be allocated the return NULL. This routine 1252 * is called by TCP routines that access the rmx structure and by tcp_mss 1253 * to get the interface MTU. 1254 */ 1255 struct rtentry * 1256 tcp_rtlookup(inp) 1257 struct inpcb *inp; 1258 { 1259 struct route *ro; 1260 struct rtentry *rt; 1261 1262 ro = &inp->inp_route; 1263 rt = ro->ro_rt; 1264 if (rt == NULL || !(rt->rt_flags & RTF_UP)) { 1265 /* No route yet, so try to acquire one */ 1266 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1267 ro->ro_dst.sa_family = AF_INET; 1268 ro->ro_dst.sa_len = sizeof(ro->ro_dst); 1269 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = 1270 inp->inp_faddr; 1271 rtalloc(ro); 1272 rt = ro->ro_rt; 1273 } 1274 } 1275 return rt; 1276 } 1277 1278 #ifdef INET6 1279 struct rtentry * 1280 tcp_rtlookup6(inp) 1281 struct inpcb *inp; 1282 { 1283 struct route_in6 *ro6; 1284 struct rtentry *rt; 1285 1286 ro6 = &inp->in6p_route; 1287 rt = ro6->ro_rt; 1288 if (rt == NULL || !(rt->rt_flags & RTF_UP)) { 1289 /* No route yet, so try to acquire one */ 1290 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { 1291 ro6->ro_dst.sin6_family = AF_INET6; 1292 ro6->ro_dst.sin6_len = sizeof(ro6->ro_dst); 1293 ro6->ro_dst.sin6_addr = inp->in6p_faddr; 1294 rtalloc((struct route *)ro6); 1295 rt = ro6->ro_rt; 1296 } 1297 } 1298 return rt; 1299 } 1300 #endif /* INET6 */ 1301 1302 #ifdef IPSEC 1303 /* compute ESP/AH header size for TCP, including outer IP header. */ 1304 size_t 1305 ipsec_hdrsiz_tcp(tp) 1306 struct tcpcb *tp; 1307 { 1308 struct inpcb *inp; 1309 struct mbuf *m; 1310 size_t hdrsiz; 1311 struct ip *ip; 1312 #ifdef INET6 1313 struct ip6_hdr *ip6; 1314 #endif /* INET6 */ 1315 struct tcphdr *th; 1316 1317 if (!tp || !tp->t_template || !(inp = tp->t_inpcb)) 1318 return 0; 1319 MGETHDR(m, M_DONTWAIT, MT_DATA); 1320 if (!m) 1321 return 0; 1322 1323 #ifdef INET6 1324 if ((inp->inp_vflag & INP_IPV6) != 0) { 1325 ip6 = mtod(m, struct ip6_hdr *); 1326 th = (struct tcphdr *)(ip6 + 1); 1327 m->m_pkthdr.len = m->m_len = 1328 sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 1329 bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip6, 1330 sizeof(struct ip6_hdr)); 1331 bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th, 1332 sizeof(struct tcphdr)); 1333 hdrsiz = ipsec6_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); 1334 } else 1335 #endif /* INET6 */ 1336 { 1337 ip = mtod(m, struct ip *); 1338 th = (struct tcphdr *)(ip + 1); 1339 m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr); 1340 bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip, 1341 sizeof(struct ip)); 1342 bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th, 1343 sizeof(struct tcphdr)); 1344 hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); 1345 } 1346 1347 m_free(m); 1348 return hdrsiz; 1349 } 1350 #endif /*IPSEC*/ 1351 1352 /* 1353 * Return a pointer to the cached information about the remote host. 1354 * The cached information is stored in the protocol specific part of 1355 * the route metrics. 1356 */ 1357 struct rmxp_tao * 1358 tcp_gettaocache(inp) 1359 struct inpcb *inp; 1360 { 1361 struct rtentry *rt; 1362 1363 #ifdef INET6 1364 if ((inp->inp_vflag & INP_IPV6) != 0) 1365 rt = tcp_rtlookup6(inp); 1366 else 1367 #endif /* INET6 */ 1368 rt = tcp_rtlookup(inp); 1369 1370 /* Make sure this is a host route and is up. */ 1371 if (rt == NULL || 1372 (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST)) 1373 return NULL; 1374 1375 return rmx_taop(rt->rt_rmx); 1376 } 1377 1378 /* 1379 * Clear all the TAO cache entries, called from tcp_init. 1380 * 1381 * XXX 1382 * This routine is just an empty one, because we assume that the routing 1383 * routing tables are initialized at the same time when TCP, so there is 1384 * nothing in the cache left over. 1385 */ 1386 static void 1387 tcp_cleartaocache() 1388 { 1389 } 1390