Lines Matching +full:always +full:- +full:wait +full:- +full:for +full:- +full:ack
1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
6 * Copyright (c) 2007-2008,2010
8 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
10 * Copyright (c) 2010-2011 Juniper Networks, Inc.
13 * Portions of this software were developed at the Centre for Advanced Internet
18 * Portions of this software were developed at the Centre for Advanced
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 #include <sys/proc.h> /* for proc0 declaration */
76 #include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */
86 #define TCPSTATES /* for logging */
94 #include <netinet/ip_icmp.h> /* required for icmp_var.h */
95 #include <netinet/icmp_var.h> /* for ICMP_BANDLIM */
153 "Enforce net.inet.tcp.blackhole for locally originated packets");
158 "Delay ACK to try and piggyback it onto a data packet");
188 "Slow-start flight size (initial congestion window) in number of segments");
198 "Cap the max cwnd increment during slow-start to this number of segments");
203 "Follow RFC793 instead of RFC5961 criteria for accepting SYN packets");
208 "Follow RFC793 instead of RFC5961 criteria for accepting RST packets");
213 "Follow RFC793 criteria for validating SEG.ACK");
245 * Kernel module interface for updating tcpstat. The first argument is an index
257 * receiving a duplicate ACK with a SACK block, and also
264 return ((tp->t_flags & TF_SACK_PERMIT) && in tcp_is_sack_recovery()
265 ((to->to_flags & TOF_SACK) || in tcp_is_sack_recovery()
266 (!TAILQ_EMPTY(&tp->snd_holes)))); in tcp_is_sack_recovery()
271 * Wrapper for the TCP established input helper hook.
278 if (V_tcp_hhh[HHOOK_TCP_EST_IN]->hhh_nhooks > 0) { in hhook_run_tcp_est_in()
284 &tp->t_osd); in hhook_run_tcp_est_in()
302 tp->t_ccv.nsegs = nsegs; in cc_ack_received()
303 tp->t_ccv.bytes_this_ack = BYTES_THIS_ACK(tp, th); in cc_ack_received()
304 if ((!V_tcp_do_newcwv && (tp->snd_cwnd <= tp->snd_wnd)) || in cc_ack_received()
305 (V_tcp_do_newcwv && (tp->snd_cwnd <= tp->snd_wnd) && in cc_ack_received()
306 (tp->snd_cwnd < (tcp_compute_pipe(tp) * 2)))) in cc_ack_received()
307 tp->t_ccv.flags |= CCF_CWND_LIMITED; in cc_ack_received()
309 tp->t_ccv.flags &= ~CCF_CWND_LIMITED; in cc_ack_received()
313 stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF, in cc_ack_received()
314 ((int32_t)tp->snd_cwnd) - tp->snd_wnd); in cc_ack_received()
315 if (!IN_RECOVERY(tp->t_flags)) in cc_ack_received()
316 stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_ACKLEN, in cc_ack_received()
317 tp->t_ccv.bytes_this_ack / (tcp_maxseg(tp) * nsegs)); in cc_ack_received()
318 if ((tp->t_flags & TF_GPUTINPROG) && in cc_ack_received()
319 SEQ_GEQ(th->th_ack, tp->gput_ack)) { in cc_ack_received()
323 gput = (((int64_t)SEQ_SUB(th->th_ack, tp->gput_seq)) << 3) / in cc_ack_received()
324 max(1, tcp_ts_getticks() - tp->gput_ts); in cc_ack_received()
325 stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_GPUT, in cc_ack_received()
332 if (tp->t_stats_gput_prev > 0) in cc_ack_received()
333 stats_voi_update_abs_s32(tp->t_stats, in cc_ack_received()
335 ((gput - tp->t_stats_gput_prev) * 100) / in cc_ack_received()
336 tp->t_stats_gput_prev); in cc_ack_received()
337 tp->t_flags &= ~TF_GPUTINPROG; in cc_ack_received()
338 tp->t_stats_gput_prev = gput; in cc_ack_received()
341 if (tp->snd_cwnd > tp->snd_ssthresh) { in cc_ack_received()
342 tp->t_bytes_acked += tp->t_ccv.bytes_this_ack; in cc_ack_received()
343 if (tp->t_bytes_acked >= tp->snd_cwnd) { in cc_ack_received()
344 tp->t_bytes_acked -= tp->snd_cwnd; in cc_ack_received()
345 tp->t_ccv.flags |= CCF_ABC_SENTAWND; in cc_ack_received()
348 tp->t_ccv.flags &= ~CCF_ABC_SENTAWND; in cc_ack_received()
349 tp->t_bytes_acked = 0; in cc_ack_received()
353 if (CC_ALGO(tp)->ack_received != NULL) { in cc_ack_received()
355 tp->t_ccv.curack = th->th_ack; in cc_ack_received()
356 CC_ALGO(tp)->ack_received(&tp->t_ccv, type); in cc_ack_received()
359 stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd); in cc_ack_received()
373 tcp_hc_get(&inp->inp_inc, &metrics); in cc_conn_init()
376 if (tp->t_srtt == 0 && (rtt = metrics.hc_rtt)) { in cc_conn_init()
377 tp->t_srtt = rtt; in cc_conn_init()
380 tp->t_rttvar = metrics.hc_rttvar; in cc_conn_init()
383 /* default variation is +- 1 rtt */ in cc_conn_init()
384 tp->t_rttvar = in cc_conn_init()
385 tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; in cc_conn_init()
387 TCPT_RANGESET(tp->t_rxtcur, in cc_conn_init()
388 ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, in cc_conn_init()
389 tp->t_rttmin, TCPTV_REXMTMAX); in cc_conn_init()
398 tp->snd_ssthresh = max(2 * maxseg, metrics.hc_ssthresh); in cc_conn_init()
403 * Set the initial slow-start flight size. in cc_conn_init()
405 * If a SYN or SYN/ACK was lost and retransmitted, we have to in cc_conn_init()
409 if (tp->snd_cwnd == 1) in cc_conn_init()
410 tp->snd_cwnd = maxseg; /* SYN(-ACK) lost */ in cc_conn_init()
412 tp->snd_cwnd = tcp_compute_initwnd(maxseg); in cc_conn_init()
414 if (CC_ALGO(tp)->conn_init != NULL) in cc_conn_init()
415 CC_ALGO(tp)->conn_init(&tp->t_ccv); in cc_conn_init()
424 stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_CSIG, type); in cc_cong_signal()
429 if (!IN_FASTRECOVERY(tp->t_flags)) { in cc_cong_signal()
430 tp->snd_recover = tp->snd_max; in cc_cong_signal()
431 if (tp->t_flags2 & TF2_ECN_PERMIT) in cc_cong_signal()
432 tp->t_flags2 |= TF2_ECN_SND_CWR; in cc_cong_signal()
436 if (!IN_CONGRECOVERY(tp->t_flags) || in cc_cong_signal()
438 * Allow ECN reaction on ACK to CWR, if in cc_cong_signal()
441 SEQ_GEQ(th->th_ack, tp->snd_recover)) { in cc_cong_signal()
442 EXIT_CONGRECOVERY(tp->t_flags); in cc_cong_signal()
444 tp->snd_recover = tp->snd_max + 1; in cc_cong_signal()
445 if (tp->t_flags2 & TF2_ECN_PERMIT) in cc_cong_signal()
446 tp->t_flags2 |= TF2_ECN_SND_CWR; in cc_cong_signal()
450 tp->t_dupacks = 0; in cc_cong_signal()
451 tp->t_bytes_acked = 0; in cc_cong_signal()
452 EXIT_RECOVERY(tp->t_flags); in cc_cong_signal()
453 if (tp->t_flags2 & TF2_ECN_PERMIT) in cc_cong_signal()
454 tp->t_flags2 |= TF2_ECN_SND_CWR; in cc_cong_signal()
459 tp->snd_cwnd = tp->snd_cwnd_prev; in cc_cong_signal()
460 tp->snd_ssthresh = tp->snd_ssthresh_prev; in cc_cong_signal()
461 tp->snd_recover = tp->snd_recover_prev; in cc_cong_signal()
462 if (tp->t_flags & TF_WASFRECOVERY) in cc_cong_signal()
463 ENTER_FASTRECOVERY(tp->t_flags); in cc_cong_signal()
464 if (tp->t_flags & TF_WASCRECOVERY) in cc_cong_signal()
465 ENTER_CONGRECOVERY(tp->t_flags); in cc_cong_signal()
466 tp->snd_nxt = tp->snd_max; in cc_cong_signal()
467 tp->t_flags &= ~TF_PREVVALID; in cc_cong_signal()
468 tp->t_badrxtwin = 0; in cc_cong_signal()
471 if (SEQ_LT(tp->snd_fack, tp->snd_una) || in cc_cong_signal()
472 SEQ_GT(tp->snd_fack, tp->snd_max)) { in cc_cong_signal()
473 tp->snd_fack = tp->snd_una; in cc_cong_signal()
476 if (CC_ALGO(tp)->cong_signal != NULL) { in cc_cong_signal()
478 tp->t_ccv.curack = th->th_ack; in cc_cong_signal()
479 CC_ALGO(tp)->cong_signal(&tp->t_ccv, type); in cc_cong_signal()
488 if (CC_ALGO(tp)->post_recovery != NULL) { in cc_post_recovery()
489 if (SEQ_LT(tp->snd_fack, th->th_ack) || in cc_post_recovery()
490 SEQ_GT(tp->snd_fack, tp->snd_max)) { in cc_post_recovery()
491 tp->snd_fack = th->th_ack; in cc_post_recovery()
493 tp->t_ccv.curack = th->th_ack; in cc_post_recovery()
494 CC_ALGO(tp)->post_recovery(&tp->t_ccv); in cc_post_recovery()
496 EXIT_RECOVERY(tp->t_flags); in cc_post_recovery()
498 tp->t_bytes_acked = 0; in cc_post_recovery()
499 tp->sackhint.delivered_data = 0; in cc_post_recovery()
500 tp->sackhint.prr_delivered = 0; in cc_post_recovery()
501 tp->sackhint.prr_out = 0; in cc_post_recovery()
505 * Indicate whether this ack should be delayed. We can delay the ack if
507 * - There is no delayed ack timer in progress.
508 * - Our last ack wasn't a 0-sized window. We never want to delay
509 * the ack that opens up a 0-sized window.
510 * - LRO wasn't used for this segment. We make sure by checking that the
515 (tp->t_flags & TF_RXWIN0SENT) == 0) && \
516 (tlen <= tp->t_maxseg) && \
517 (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
524 if (CC_ALGO(tp)->ecnpkt_handler != NULL) { in cc_ecnpkt_handler_flags()
527 tp->t_ccv.flags |= CCF_IPHDR_CE; in cc_ecnpkt_handler_flags()
534 tp->t_ccv.flags &= ~CCF_IPHDR_CE; in cc_ecnpkt_handler_flags()
539 tp->t_ccv.flags |= CCF_TCPHDR_CWR; in cc_ecnpkt_handler_flags()
541 tp->t_ccv.flags &= ~CCF_TCPHDR_CWR; in cc_ecnpkt_handler_flags()
543 CC_ALGO(tp)->ecnpkt_handler(&tp->t_ccv); in cc_ecnpkt_handler_flags()
545 if (tp->t_ccv.flags & CCF_ACKNOW) { in cc_ecnpkt_handler_flags()
547 tp->t_flags |= TF_ACKNOW; in cc_ecnpkt_handler_flags()
560 * tcp6_input is a thin wrapper around tcp_input for the extended
564 * tcp_do_segment processes the ACK and text of the segment for
576 if (m->m_len < *offp + sizeof(struct tcphdr)) { in tcp6_input_with_port()
586 * draft-itojun-ipv6-tcp-to-anycast in tcp6_input_with_port()
590 ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */, false); in tcp6_input_with_port()
591 if (ia6 && (ia6->ia6_flags & IN6_IFF_ANYCAST)) { in tcp6_input_with_port()
593 (caddr_t)&ip6->ip6_dst - (caddr_t)ip6); in tcp6_input_with_port()
629 int rstreason = 0; /* For badport_bandlim accounting purposes */ in tcp_input_with_port()
645 isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0; in tcp_input_with_port()
654 m->m_pkthdr.tcp_tun_port = port; in tcp_input_with_port()
659 tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0; in tcp_input_with_port()
662 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) { in tcp_input_with_port()
663 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) in tcp_input_with_port()
664 th->th_sum = m->m_pkthdr.csum_data; in tcp_input_with_port()
666 th->th_sum = in6_cksum_pseudo(ip6, tlen, in tcp_input_with_port()
667 IPPROTO_TCP, m->m_pkthdr.csum_data); in tcp_input_with_port()
668 th->th_sum ^= 0xffff; in tcp_input_with_port()
670 th->th_sum = in6_cksum(m, IPPROTO_TCP, off0, tlen); in tcp_input_with_port()
671 if (th->th_sum) { in tcp_input_with_port()
678 * As we use all-zero to indicate unbounded/unconnected pcb, in tcp_input_with_port()
684 KASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst), in tcp_input_with_port()
686 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { in tcp_input_with_port()
706 if (m->m_len < sizeof (struct tcpiphdr)) { in tcp_input_with_port()
715 tlen = ntohs(ip->ip_len) - off0; in tcp_input_with_port()
717 iptos = ip->ip_tos; in tcp_input_with_port()
720 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { in tcp_input_with_port()
721 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) in tcp_input_with_port()
722 th->th_sum = m->m_pkthdr.csum_data; in tcp_input_with_port()
724 th->th_sum = in_pseudo(ip->ip_src.s_addr, in tcp_input_with_port()
725 ip->ip_dst.s_addr, in tcp_input_with_port()
726 htonl(m->m_pkthdr.csum_data + tlen + in tcp_input_with_port()
728 th->th_sum ^= 0xffff; in tcp_input_with_port()
736 ipttl = ip->ip_ttl; in tcp_input_with_port()
737 bzero(ipov->ih_x1, sizeof(ipov->ih_x1)); in tcp_input_with_port()
738 ipov->ih_len = htons(tlen); in tcp_input_with_port()
739 th->th_sum = in_cksum(m, len); in tcp_input_with_port()
740 /* Reset length for SDT probes. */ in tcp_input_with_port()
741 ip->ip_len = htons(len); in tcp_input_with_port()
743 ip->ip_tos = iptos; in tcp_input_with_port()
744 /* Re-initialization for later version check */ in tcp_input_with_port()
745 ip->ip_ttl = ipttl; in tcp_input_with_port()
746 ip->ip_v = IPVERSION; in tcp_input_with_port()
747 ip->ip_hl = off0 >> 2; in tcp_input_with_port()
750 if (th->th_sum && (port == 0)) { in tcp_input_with_port()
754 KASSERT(ip->ip_dst.s_addr != INADDR_ANY, in tcp_input_with_port()
756 if (__predict_false(ip->ip_src.s_addr == INADDR_ANY)) { in tcp_input_with_port()
767 off = th->th_off << 2; in tcp_input_with_port()
772 tlen -= off; /* tlen is used instead of ti->ti_len */ in tcp_input_with_port()
776 if (m->m_len < off0 + off) { in tcp_input_with_port()
792 if (m->m_len < sizeof(struct ip) + off) { in tcp_input_with_port()
803 optlen = off - sizeof (struct tcphdr); in tcp_input_with_port()
823 (isipv6 && (m->m_flags & M_IP6_NEXTHOP)) in tcp_input_with_port()
825 || (!isipv6 && (m->m_flags & M_IP_NEXTHOP)) in tcp_input_with_port()
829 (m->m_flags & M_IP_NEXTHOP) in tcp_input_with_port()
835 * For initial SYN packets we don't need write lock on matching in tcp_input_with_port()
855 &ip6->ip6_src, th->th_sport, &ip6->ip6_dst, th->th_dport, in tcp_input_with_port()
856 lookupflag & ~INPLOOKUP_WILDCARD, m->m_pkthdr.rcvif, m); in tcp_input_with_port()
861 * any hardware-generated hash is ignored. in tcp_input_with_port()
863 inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_src, in tcp_input_with_port()
864 th->th_sport, &next_hop6->sin6_addr, in tcp_input_with_port()
865 next_hop6->sin6_port ? ntohs(next_hop6->sin6_port) : in tcp_input_with_port()
866 th->th_dport, lookupflag, m->m_pkthdr.rcvif); in tcp_input_with_port()
869 inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src, in tcp_input_with_port()
870 th->th_sport, &ip6->ip6_dst, th->th_dport, lookupflag, in tcp_input_with_port()
871 m->m_pkthdr.rcvif, m); in tcp_input_with_port()
886 inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, th->th_sport, in tcp_input_with_port()
887 ip->ip_dst, th->th_dport, lookupflag & ~INPLOOKUP_WILDCARD, in tcp_input_with_port()
888 m->m_pkthdr.rcvif, m); in tcp_input_with_port()
893 * any hardware-generated hash is ignored. in tcp_input_with_port()
895 inp = in_pcblookup(&V_tcbinfo, ip->ip_src, in tcp_input_with_port()
896 th->th_sport, next_hop->sin_addr, in tcp_input_with_port()
897 next_hop->sin_port ? ntohs(next_hop->sin_port) : in tcp_input_with_port()
898 th->th_dport, lookupflag, m->m_pkthdr.rcvif); in tcp_input_with_port()
901 inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, in tcp_input_with_port()
902 th->th_sport, ip->ip_dst, th->th_dport, lookupflag, in tcp_input_with_port()
903 m->m_pkthdr.rcvif, m); in tcp_input_with_port()
932 if ((inp->inp_flowtype == M_HASHTYPE_NONE) && in tcp_input_with_port()
933 !SOLISTENING(inp->inp_socket)) { in tcp_input_with_port()
935 inp->inp_flowid = m->m_pkthdr.flowid; in tcp_input_with_port()
936 inp->inp_flowtype = M_HASHTYPE_GET(m); in tcp_input_with_port()
942 rss_proto_software_hash_v6(&inp->in6p_faddr, in tcp_input_with_port()
943 &inp->in6p_laddr, in tcp_input_with_port()
944 inp->inp_fport, in tcp_input_with_port()
945 inp->inp_lport, in tcp_input_with_port()
947 &inp->inp_flowid, in tcp_input_with_port()
948 &inp->inp_flowtype); in tcp_input_with_port()
952 rss_proto_software_hash_v4(inp->inp_faddr, in tcp_input_with_port()
953 inp->inp_laddr, in tcp_input_with_port()
954 inp->inp_fport, in tcp_input_with_port()
955 inp->inp_lport, in tcp_input_with_port()
957 &inp->inp_flowid, in tcp_input_with_port()
958 &inp->inp_flowtype); in tcp_input_with_port()
982 * Check the minimum TTL for socket. in tcp_input_with_port()
984 if (inp->inp_ip_minttl != 0) { in tcp_input_with_port()
987 if (inp->inp_ip_minttl > ip6->ip6_hlim) in tcp_input_with_port()
991 if (inp->inp_ip_minttl > ip->ip_ttl) in tcp_input_with_port()
996 switch (tp->t_state) { in tcp_input_with_port()
1007 * tcp_twcheck unlocks the inp always, and frees the m if fails. in tcp_input_with_port()
1022 if ((tp->t_port != port) && (tp->t_state > TCPS_LISTEN)) { in tcp_input_with_port()
1028 if (tp->t_flags & TF_TOE) { in tcp_input_with_port()
1039 so = inp->inp_socket; in tcp_input_with_port()
1046 KASSERT(tp->t_state == TCPS_LISTEN || !SOLISTENING(so), in tcp_input_with_port()
1048 if (tp->t_state == TCPS_LISTEN && SOLISTENING(so)) { in tcp_input_with_port()
1055 if (inp->inp_inc.inc_flags & INC_IPV6MINMTU) in tcp_input_with_port()
1057 inc.inc6_faddr = ip6->ip6_src; in tcp_input_with_port()
1058 inc.inc6_laddr = ip6->ip6_dst; in tcp_input_with_port()
1062 inc.inc_faddr = ip->ip_src; in tcp_input_with_port()
1063 inc.inc_laddr = ip->ip_dst; in tcp_input_with_port()
1065 inc.inc_fport = th->th_sport; in tcp_input_with_port()
1066 inc.inc_lport = th->th_dport; in tcp_input_with_port()
1067 inc.inc_fibnum = so->so_fibnum; in tcp_input_with_port()
1070 * Check for an existing connection attempt in syncache if in tcp_input_with_port()
1071 * the flag is only ACK. A successful lookup creates a new in tcp_input_with_port()
1095 * No syncache entry, or ACK was not for our in tcp_input_with_port()
1096 * SYN/ACK. Do our protection against double in tcp_input_with_port()
1097 * ACK. If peer sent us 2 ACKs, then for the in tcp_input_with_port()
1101 * don't want to sent RST for the second ACK, in tcp_input_with_port()
1104 * the ACK is stray indeed, rstreason would in tcp_input_with_port()
1119 * We completed the 3-way handshake in tcp_input_with_port()
1124 * or wait and have the remote end in tcp_input_with_port()
1125 * retransmit the ACK for another in tcp_input_with_port()
1146 * then listening socket is read-locked. in tcp_input_with_port()
1156 KASSERT(tp->t_state == TCPS_SYN_RECEIVED, in tcp_input_with_port()
1164 tp->t_fb->tfb_tcp_do_segment(tp, m, th, drop_hdrlen, in tcp_input_with_port()
1169 * Segment flag validation for new connection attempts: in tcp_input_with_port()
1171 * Our (SYN|ACK) response was rejected. in tcp_input_with_port()
1194 * (SYN|ACK) is bogus on a listen socket. in tcp_input_with_port()
1199 "SYN|ACK invalid, segment rejected\n", in tcp_input_with_port()
1245 * it). We compromise it as it is much better for peer in tcp_input_with_port()
1247 * for the exchange. in tcp_input_with_port()
1254 * communication is okay - "SHOULD continue to be in tcp_input_with_port()
1261 * our source address selection - we must obey the peer. in tcp_input_with_port()
1264 * multiple description text for deprecated address in tcp_input_with_port()
1265 * handling - worse, they are not exactly the same. in tcp_input_with_port()
1271 ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */, false); in tcp_input_with_port()
1273 (ia6->ia6_flags & IN6_IFF_DEPRECATED)) { in tcp_input_with_port()
1290 * global or subnet broad- or multicast address. in tcp_input_with_port()
1292 * link-layer packets with a broadcast IP address. Use in tcp_input_with_port()
1295 if (m->m_flags & (M_BCAST|M_MCAST)) { in tcp_input_with_port()
1298 "Connection attempt from broad- or multicast " in tcp_input_with_port()
1304 if (th->th_dport == th->th_sport && in tcp_input_with_port()
1305 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) { in tcp_input_with_port()
1312 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || in tcp_input_with_port()
1313 IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { in tcp_input_with_port()
1327 if (th->th_dport == th->th_sport && in tcp_input_with_port()
1328 ip->ip_dst.s_addr == ip->ip_src.s_addr) { in tcp_input_with_port()
1335 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || in tcp_input_with_port()
1336 IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || in tcp_input_with_port()
1337 ip->ip_src.s_addr == htonl(INADDR_BROADCAST) || in tcp_input_with_port()
1338 in_ifnet_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { in tcp_input_with_port()
1341 "Connection attempt from/to broad- " in tcp_input_with_port()
1350 * for syncache. in tcp_input_with_port()
1365 if (tp->t_flags & TF_SIGNATURE) { in tcp_input_with_port()
1380 * state. tcp_do_segment() always consumes the mbuf chain, unlocks in tcp_input_with_port()
1385 * the segment silently, or send a challenge ACK. However, we try in tcp_input_with_port()
1386 * to upgrade the lock, because calling convention for stacks is in tcp_input_with_port()
1387 * write-lock on PCB. If upgrade fails, drop the SYN. in tcp_input_with_port()
1392 tp->t_fb->tfb_tcp_do_segment(tp, m, th, drop_hdrlen, tlen, iptos); in tcp_input_with_port()
1405 isipv6 ? !in6_localaddr(&ip6->ip6_src) : in tcp_input_with_port()
1408 !in_localip(ip->ip_src) in tcp_input_with_port()
1438 * bandwidth and high delay (eg. trans-continental/oceanic links).
1442 * more aggressive in scaling the receive socket buffer. For
1460 * of slow-start but also makes it so our peer never gets limited
1477 if (V_tcp_do_autorcvbuf && (so->so_rcv.sb_flags & SB_AUTOSIZE) && in tcp_autorcvbuf()
1478 tp->t_srtt != 0 && tp->rfbuf_ts != 0 && in tcp_autorcvbuf()
1479 TCP_TS_TO_TICKS(tcp_ts_getticks() - tp->rfbuf_ts) > in tcp_autorcvbuf()
1480 ((tp->t_srtt >> TCP_RTT_SHIFT)/2)) { in tcp_autorcvbuf()
1481 if (tp->rfbuf_cnt > ((so->so_rcv.sb_hiwat / 2)/ 4 * 3) && in tcp_autorcvbuf()
1482 so->so_rcv.sb_hiwat < V_tcp_autorcvbuf_max) { in tcp_autorcvbuf()
1483 newsize = min((so->so_rcv.sb_hiwat + (so->so_rcv.sb_hiwat/2)), V_tcp_autorcvbuf_max); in tcp_autorcvbuf()
1488 tp->rfbuf_ts = 0; in tcp_autorcvbuf()
1489 tp->rfbuf_cnt = 0; in tcp_autorcvbuf()
1491 tp->rfbuf_cnt += tlen; /* add up */ in tcp_autorcvbuf()
1508 if (tp->t_flags & TF_WAKESOR) { in tcp_handle_wakeup()
1511 tp->t_flags &= ~TF_WAKESOR; in tcp_handle_wakeup()
1530 struct in_conninfo *inc = &inp->inp_inc; in tcp_do_segment()
1537 tp->sackhint.last_sack_ack = 0; in tcp_do_segment()
1539 nsegs = max(1, m->m_pkthdr.lro_nsegs); in tcp_do_segment()
1543 KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", in tcp_do_segment()
1545 KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT", in tcp_do_segment()
1550 tcp_pcap_add(th, m, &(tp->t_inpkts)); in tcp_do_segment()
1552 TCP_LOG_EVENT(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_IN, 0, in tcp_do_segment()
1566 * If a segment with the ACK-bit set arrives in the SYN-SENT state in tcp_do_segment()
1567 * check SEQ.ACK first. in tcp_do_segment()
1569 if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) && in tcp_do_segment()
1570 (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { in tcp_do_segment()
1578 * Reset idle time and keep-alive timer. in tcp_do_segment()
1582 if (tp->t_idle_reduce && in tcp_do_segment()
1583 (tp->snd_max == tp->snd_una) && in tcp_do_segment()
1584 ((ticks - tp->t_rcvtime) >= tp->t_rxtcur)) in tcp_do_segment()
1586 tp->t_rcvtime = ticks; in tcp_do_segment()
1591 * Scale up the window into a 32-bit value. in tcp_do_segment()
1592 * For the SYN_SENT state the scale is zero. in tcp_do_segment()
1594 tiwin = th->th_win << tp->snd_scale; in tcp_do_segment()
1596 stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin); in tcp_do_segment()
1603 tcp_packets_this_ack(tp, th->th_ack), in tcp_do_segment()
1611 (th->th_off << 2) - sizeof(struct tcphdr), in tcp_do_segment()
1613 if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) { in tcp_do_segment()
1622 if ((tp->t_flags & TF_SIGNATURE) != 0 && in tcp_do_segment()
1635 to.to_tsecr -= tp->ts_offset; in tcp_do_segment()
1638 } else if (tp->t_rxtshift == 1 && in tcp_do_segment()
1639 tp->t_flags & TF_PREVVALID && in tcp_do_segment()
1640 tp->t_badrxtwin != 0 && in tcp_do_segment()
1641 TSTMP_LT(to.to_tsecr, tp->t_badrxtwin)) { in tcp_do_segment()
1646 * Process options only when we get SYN/ACK back. The SYN case in tcp_do_segment()
1647 * for incoming connections is handled in tcp_syncache. in tcp_do_segment()
1649 * or <SYN,ACK>) segment itself is never scaled. in tcp_do_segment()
1652 if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) { in tcp_do_segment()
1653 /* Handle parallel SYN for ECN */ in tcp_do_segment()
1656 (tp->t_flags & TF_REQ_SCALE) && in tcp_do_segment()
1657 !(tp->t_flags & TF_NOOPT)) { in tcp_do_segment()
1658 tp->t_flags |= TF_RCVD_SCALE; in tcp_do_segment()
1659 tp->snd_scale = to.to_wscale; in tcp_do_segment()
1661 tp->t_flags &= ~TF_REQ_SCALE; in tcp_do_segment()
1667 tp->snd_wnd = th->th_win; in tcp_do_segment()
1669 (tp->t_flags & TF_REQ_TSTMP) && in tcp_do_segment()
1670 !(tp->t_flags & TF_NOOPT)) { in tcp_do_segment()
1671 tp->t_flags |= TF_RCVD_TSTMP; in tcp_do_segment()
1672 tp->ts_recent = to.to_tsval; in tcp_do_segment()
1673 tp->ts_recent_age = tcp_ts_getticks(); in tcp_do_segment()
1675 tp->t_flags &= ~TF_REQ_TSTMP; in tcp_do_segment()
1680 if ((tp->t_flags & TF_SACK_PERMIT) && in tcp_do_segment()
1682 (tp->t_flags & TF_NOOPT))) { in tcp_do_segment()
1683 tp->t_flags &= ~TF_SACK_PERMIT; in tcp_do_segment()
1685 if (tp->t_flags & TF_FASTOPEN) { in tcp_do_segment()
1687 !(tp->t_flags & TF_NOOPT)) { in tcp_do_segment()
1693 if ((inp->inp_vflag & INP_IPV6) != 0) { in tcp_do_segment()
1708 * If timestamps were negotiated during SYN/ACK and a in tcp_do_segment()
1714 if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) { in tcp_do_segment()
1732 * If timestamps were not negotiated during SYN/ACK and a in tcp_do_segment()
1737 if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) { in tcp_do_segment()
1746 * Header prediction: check for the two common cases in tcp_do_segment()
1747 * of a uni-directional data xfer. If the packet has in tcp_do_segment()
1748 * no control flags, is in-sequence, the window didn't in tcp_do_segment()
1750 * candidate. If the length is zero and the ack moved in tcp_do_segment()
1753 * that was blocked waiting for space. If the length in tcp_do_segment()
1754 * is non-zero and the ack didn't move, we're the in tcp_do_segment()
1755 * receiver side. If we're getting packets in-order in tcp_do_segment()
1757 * the socket buffer and note that we need a delayed ack. in tcp_do_segment()
1758 * Make sure that the hidden state-flags are also off. in tcp_do_segment()
1759 * Since we check for TCPS_ESTABLISHED first, it can only in tcp_do_segment()
1762 if (tp->t_state == TCPS_ESTABLISHED && in tcp_do_segment()
1763 th->th_seq == tp->rcv_nxt && in tcp_do_segment()
1765 tp->snd_nxt == tp->snd_max && in tcp_do_segment()
1766 tiwin && tiwin == tp->snd_wnd && in tcp_do_segment()
1767 ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) && in tcp_do_segment()
1770 TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) { in tcp_do_segment()
1772 * If last ACK falls within this segment's sequence numbers, in tcp_do_segment()
1778 SEQ_LEQ(th->th_seq, tp->last_ack_sent)) { in tcp_do_segment()
1779 tp->ts_recent_age = tcp_ts_getticks(); in tcp_do_segment()
1780 tp->ts_recent = to.to_tsval; in tcp_do_segment()
1784 if (SEQ_GT(th->th_ack, tp->snd_una) && in tcp_do_segment()
1785 SEQ_LEQ(th->th_ack, tp->snd_max) && in tcp_do_segment()
1786 !IN_RECOVERY(tp->t_flags) && in tcp_do_segment()
1788 TAILQ_EMPTY(&tp->snd_holes)) { in tcp_do_segment()
1790 * This is a pure ack for outstanding data. in tcp_do_segment()
1798 tp->t_rxtshift == 1 && in tcp_do_segment()
1799 tp->t_flags & TF_PREVVALID && in tcp_do_segment()
1800 tp->t_badrxtwin != 0 && in tcp_do_segment()
1801 TSTMP_LT(ticks, tp->t_badrxtwin)) { in tcp_do_segment()
1809 * during the SYN+ACK phase, ignore in tcp_do_segment()
1817 t = tcp_ts_getticks() - to.to_tsecr; in tcp_do_segment()
1818 if (!tp->t_rttlow || tp->t_rttlow > t) in tcp_do_segment()
1819 tp->t_rttlow = t; in tcp_do_segment()
1822 } else if (tp->t_rtttime && in tcp_do_segment()
1823 SEQ_GT(th->th_ack, tp->t_rtseq)) { in tcp_do_segment()
1824 if (!tp->t_rttlow || in tcp_do_segment()
1825 tp->t_rttlow > ticks - tp->t_rtttime) in tcp_do_segment()
1826 tp->t_rttlow = ticks - tp->t_rtttime; in tcp_do_segment()
1828 ticks - tp->t_rtttime); in tcp_do_segment()
1839 sbdrop(&so->so_snd, acked); in tcp_do_segment()
1840 if (SEQ_GT(tp->snd_una, tp->snd_recover) && in tcp_do_segment()
1841 SEQ_LEQ(th->th_ack, tp->snd_recover)) in tcp_do_segment()
1842 tp->snd_recover = th->th_ack - 1; in tcp_do_segment()
1852 tp->snd_una = th->th_ack; in tcp_do_segment()
1857 tp->snd_wl2 = th->th_ack; in tcp_do_segment()
1858 tp->t_dupacks = 0; in tcp_do_segment()
1864 * using current (possibly backed-off) value. in tcp_do_segment()
1865 * If process is waiting for space, in tcp_do_segment()
1875 * a sufficiently large ACK. in tcp_do_segment()
1877 if (sbavail(&so->so_snd) == 0) in tcp_do_segment()
1878 tp->t_acktime = 0; in tcp_do_segment()
1880 tp->t_acktime = ticks; in tcp_do_segment()
1881 if (tp->snd_una == tp->snd_max) in tcp_do_segment()
1890 * or we need to send an ACK. in tcp_do_segment()
1892 if ((tp->t_flags & TF_ACKNOW) || in tcp_do_segment()
1893 (sbavail(&so->so_snd) >= in tcp_do_segment()
1894 SEQ_SUB(tp->snd_max, tp->snd_una))) { in tcp_do_segment()
1899 } else if (th->th_ack == tp->snd_una && in tcp_do_segment()
1900 tlen <= sbspace(&so->so_rcv)) { in tcp_do_segment()
1904 * This is a pure, in-sequence data packet with in tcp_do_segment()
1909 if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks) in tcp_do_segment()
1912 tp->rcv_nxt += tlen; in tcp_do_segment()
1914 ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) && in tcp_do_segment()
1915 (tp->t_fbyte_in == 0)) { in tcp_do_segment()
1916 tp->t_fbyte_in = ticks; in tcp_do_segment()
1917 if (tp->t_fbyte_in == 0) in tcp_do_segment()
1918 tp->t_fbyte_in = 1; in tcp_do_segment()
1919 if (tp->t_fbyte_out && tp->t_fbyte_in) in tcp_do_segment()
1920 tp->t_flags2 |= TF2_FBYTES_COMPLETE; in tcp_do_segment()
1926 tp->snd_wl1 = th->th_seq; in tcp_do_segment()
1931 tp->rcv_up = tp->rcv_nxt; in tcp_do_segment()
1940 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { in tcp_do_segment()
1950 so->so_rcv.sb_flags &= ~SB_AUTOSIZE; in tcp_do_segment()
1952 sbappendstream_locked(&so->so_rcv, m, 0); in tcp_do_segment()
1957 tp->t_flags |= TF_DELACK; in tcp_do_segment()
1959 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
1972 win = sbspace(&so->so_rcv); in tcp_do_segment()
1975 tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); in tcp_do_segment()
1977 switch (tp->t_state) { in tcp_do_segment()
1980 * if seg contains an ACK, but not for our SYN/ACK, send a RST. in tcp_do_segment()
1988 (SEQ_LEQ(th->th_ack, tp->snd_una) || in tcp_do_segment()
1989 SEQ_GT(th->th_ack, tp->snd_max))) { in tcp_do_segment()
1994 if (tp->t_flags & TF_FASTOPEN) { in tcp_do_segment()
1999 * a subset of the original data), a valid ACK, a in tcp_do_segment()
2007 /* non-initial SYN is ignored */ in tcp_do_segment()
2019 * if seg contains a RST with valid ACK (SEQ.ACK has already in tcp_do_segment()
2021 * if seg contains a RST without an ACK, drop the seg. in tcp_do_segment()
2024 * initialize tp->rcv_nxt and tp->irs in tcp_do_segment()
2025 * if seg contains ack then advance tp->snd_una in tcp_do_segment()
2029 * arrange for segment to be acked (eventually) in tcp_do_segment()
2044 tp->irs = th->th_seq; in tcp_do_segment()
2055 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == in tcp_do_segment()
2057 tp->rcv_scale = tp->request_r_scale; in tcp_do_segment()
2059 tp->rcv_adv += min(tp->rcv_wnd, in tcp_do_segment()
2060 TCP_MAXWIN << tp->rcv_scale); in tcp_do_segment()
2061 tp->snd_una++; /* SYN is acked */ in tcp_do_segment()
2062 if (SEQ_LT(tp->snd_nxt, tp->snd_una)) in tcp_do_segment()
2063 tp->snd_nxt = tp->snd_una; in tcp_do_segment()
2068 if ((tp->t_flags & TF_FASTOPEN) && in tcp_do_segment()
2069 (tp->snd_una != tp->snd_max)) { in tcp_do_segment()
2070 tp->snd_nxt = th->th_ack; in tcp_do_segment()
2074 * If there's data, delay ACK; if there's also a FIN in tcp_do_segment()
2081 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
2086 * Received <SYN,ACK> in SYN_SENT[*] state. in tcp_do_segment()
2088 * SYN_SENT --> ESTABLISHED in tcp_do_segment()
2089 * SYN_SENT* --> FIN_WAIT_1 in tcp_do_segment()
2091 tp->t_starttime = ticks; in tcp_do_segment()
2092 if (tp->t_flags & TF_NEEDFIN) { in tcp_do_segment()
2093 tp->t_acktime = ticks; in tcp_do_segment()
2095 tp->t_flags &= ~TF_NEEDFIN; in tcp_do_segment()
2107 * Received initial SYN in SYN-SENT[*] state => in tcp_do_segment()
2109 * If it succeeds, connection is * half-synchronized. in tcp_do_segment()
2110 * Otherwise, do 3-way handshake: in tcp_do_segment()
2111 * SYN-SENT -> SYN-RECEIVED in tcp_do_segment()
2112 * SYN-SENT* -> SYN-RECEIVED* in tcp_do_segment()
2114 tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN | TF_SONOTCONN); in tcp_do_segment()
2120 * Advance th->th_seq to correspond to first data byte. in tcp_do_segment()
2124 th->th_seq++; in tcp_do_segment()
2125 if (tlen > tp->rcv_wnd) { in tcp_do_segment()
2126 todrop = tlen - tp->rcv_wnd; in tcp_do_segment()
2127 m_adj(m, -todrop); in tcp_do_segment()
2128 tlen = tp->rcv_wnd; in tcp_do_segment()
2133 tp->snd_wl1 = th->th_seq - 1; in tcp_do_segment()
2134 tp->rcv_up = th->th_seq; in tcp_do_segment()
2138 * our data will be ACK'd; if so, enter normal data segment in tcp_do_segment()
2139 * processing in the middle of step 5, ack processing. in tcp_do_segment()
2161 * drop leading data (and SYN); if nothing left, just ack. in tcp_do_segment()
2167 * - RST drops connection only if SEG.SEQ == RCV.NXT. in tcp_do_segment()
2168 * - If RST is in window, we send challenge ACK. in tcp_do_segment()
2175 if ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) && in tcp_do_segment()
2176 SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) || in tcp_do_segment()
2177 (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) { in tcp_do_segment()
2178 KASSERT(tp->t_state != TCPS_SYN_SENT, in tcp_do_segment()
2179 ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p", in tcp_do_segment()
2183 tp->last_ack_sent == th->th_seq) { in tcp_do_segment()
2186 switch (tp->t_state) { in tcp_do_segment()
2188 so->so_error = ECONNREFUSED; in tcp_do_segment()
2196 so->so_error = ECONNRESET; in tcp_do_segment()
2214 * Send challenge ACK for any SYN in synchronized state. in tcp_do_segment()
2216 if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT && in tcp_do_segment()
2217 tp->t_state != TCPS_SYN_RECEIVED) { in tcp_do_segment()
2220 SEQ_GEQ(th->th_seq, tp->last_ack_sent) && in tcp_do_segment()
2221 SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) { in tcp_do_segment()
2237 if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent && in tcp_do_segment()
2238 TSTMP_LT(to.to_tsval, tp->ts_recent)) { in tcp_do_segment()
2240 if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) { in tcp_do_segment()
2249 * because we don't want out-of-order segments to be in tcp_do_segment()
2252 tp->ts_recent = 0; in tcp_do_segment()
2264 * In the SYN-RECEIVED state, validate that the packet belongs to in tcp_do_segment()
2268 * for the "LAND" DoS attack. in tcp_do_segment()
2270 if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) { in tcp_do_segment()
2276 todrop = tp->rcv_nxt - th->th_seq; in tcp_do_segment()
2280 th->th_seq++; in tcp_do_segment()
2281 if (th->th_urp > 1) in tcp_do_segment()
2282 th->th_urp--; in tcp_do_segment()
2285 todrop--; in tcp_do_segment()
2300 * Send an ACK to resynchronize and drop any data. in tcp_do_segment()
2301 * But keep on processing for RST or ACK. in tcp_do_segment()
2303 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
2312 * DSACK - add SACK block for dropped range in tcp_do_segment()
2314 if ((todrop > 0) && (tp->t_flags & TF_SACK_PERMIT)) { in tcp_do_segment()
2315 tcp_update_sack_list(tp, th->th_seq, in tcp_do_segment()
2316 th->th_seq + todrop); in tcp_do_segment()
2318 * ACK now, as the next in-sequence segment in tcp_do_segment()
2321 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
2324 th->th_seq += todrop; in tcp_do_segment()
2325 tlen -= todrop; in tcp_do_segment()
2326 if (th->th_urp > todrop) in tcp_do_segment()
2327 th->th_urp -= todrop; in tcp_do_segment()
2330 th->th_urp = 0; in tcp_do_segment()
2339 if ((tp->t_flags & TF_CLOSED) && tlen > 0 && in tcp_do_segment()
2340 TCPS_HAVERCVDFIN(tp->t_state) == 0) { in tcp_do_segment()
2345 s, __func__, tcpstates[tp->t_state], tlen); in tcp_do_segment()
2349 /* tcp_close will kill the inp pre-log the Reset */ in tcp_do_segment()
2359 * (and PUSH and FIN); if nothing left, just ACK. in tcp_do_segment()
2361 todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd); in tcp_do_segment()
2370 * remember to ack. Otherwise, drop segment in tcp_do_segment()
2371 * and ack. in tcp_do_segment()
2373 if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) { in tcp_do_segment()
2374 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
2380 m_adj(m, -todrop); in tcp_do_segment()
2381 tlen -= todrop; in tcp_do_segment()
2386 * If last ACK falls within this segment's sequence numbers, in tcp_do_segment()
2395 * Last.ACK.Sent <= SEG.SEQ + SEG.Len in tcp_do_segment()
2397 * Last.ACK.Sent < SEG.SEQ + SEG.Len, in tcp_do_segment()
2401 * RTT correctly when RCV.NXT == Last.ACK.Sent. in tcp_do_segment()
2404 SEQ_LEQ(th->th_seq, tp->last_ack_sent) && in tcp_do_segment()
2405 SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen + in tcp_do_segment()
2407 tp->ts_recent_age = tcp_ts_getticks(); in tcp_do_segment()
2408 tp->ts_recent = to.to_tsval; in tcp_do_segment()
2412 * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN in tcp_do_segment()
2413 * flag is on (half-synchronized state), then queue data for in tcp_do_segment()
2417 if (tp->t_state == TCPS_SYN_RECEIVED || in tcp_do_segment()
2418 (tp->t_flags & TF_NEEDSYN)) { in tcp_do_segment()
2419 if (tp->t_state == TCPS_SYN_RECEIVED && in tcp_do_segment()
2420 (tp->t_flags & TF_FASTOPEN)) { in tcp_do_segment()
2421 tp->snd_wnd = tiwin; in tcp_do_segment()
2425 } else if (tp->t_flags & TF_ACKNOW) in tcp_do_segment()
2432 * Ack processing. in tcp_do_segment()
2434 if (SEQ_GEQ(tp->snd_una, tp->iss + (TCP_MAXWIN << tp->snd_scale))) { in tcp_do_segment()
2435 /* Checking SEG.ACK against ISS is definitely redundant. */ in tcp_do_segment()
2436 tp->t_flags2 |= TF2_NO_ISS_CHECK; in tcp_do_segment()
2442 if (tp->t_flags2 & TF2_NO_ISS_CHECK) { in tcp_do_segment()
2443 /* Check for too old ACKs (RFC 5961, Section 5.2). */ in tcp_do_segment()
2444 seq_min = tp->snd_una - tp->max_sndwnd; in tcp_do_segment()
2447 if (SEQ_GT(tp->iss + 1, tp->snd_una - tp->max_sndwnd)) { in tcp_do_segment()
2448 /* Checking for ghost ACKs is stricter. */ in tcp_do_segment()
2449 seq_min = tp->iss + 1; in tcp_do_segment()
2453 * Checking for too old ACKs (RFC 5961, in tcp_do_segment()
2456 seq_min = tp->snd_una - tp->max_sndwnd; in tcp_do_segment()
2460 if (SEQ_LT(th->th_ack, seq_min)) { in tcp_do_segment()
2470 switch (tp->t_state) { in tcp_do_segment()
2472 * In SYN_RECEIVED state, the ack ACKs our SYN, so enter in tcp_do_segment()
2474 * The ACK was checked above. in tcp_do_segment()
2479 if (tp->t_flags & TF_SONOTCONN) { in tcp_do_segment()
2485 * with TF_SONOTCONN. The other reason for this mark in tcp_do_segment()
2489 tp->t_flags &= ~TF_SONOTCONN; in tcp_do_segment()
2493 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == in tcp_do_segment()
2495 tp->rcv_scale = tp->request_r_scale; in tcp_do_segment()
2497 tp->snd_wnd = tiwin; in tcp_do_segment()
2500 * SYN-RECEIVED -> ESTABLISHED in tcp_do_segment()
2501 * SYN-RECEIVED* -> FIN-WAIT-1 in tcp_do_segment()
2503 tp->t_starttime = ticks; in tcp_do_segment()
2504 if ((tp->t_flags & TF_FASTOPEN) && tp->t_tfo_pending) { in tcp_do_segment()
2505 tcp_fastopen_decrement_counter(tp->t_tfo_pending); in tcp_do_segment()
2506 tp->t_tfo_pending = NULL; in tcp_do_segment()
2508 if (tp->t_flags & TF_NEEDFIN) { in tcp_do_segment()
2509 tp->t_acktime = ticks; in tcp_do_segment()
2511 tp->t_flags &= ~TF_NEEDFIN; in tcp_do_segment()
2518 * processing. Calling it again here for such in tcp_do_segment()
2520 * snd_cwnd reduction that occurs when a TFO SYN|ACK in tcp_do_segment()
2523 if (!(tp->t_flags & TF_FASTOPEN)) in tcp_do_segment()
2528 * Account for the ACK of our SYN prior to in tcp_do_segment()
2529 * regular ACK processing below, except for in tcp_do_segment()
2532 if (SEQ_GT(th->th_ack, tp->snd_una) && !(tp->t_flags & TF_NEEDSYN)) in tcp_do_segment()
2535 * If segment contains data or ACK, will call tcp_reass() in tcp_do_segment()
2543 tp->snd_wl1 = th->th_seq - 1; in tcp_do_segment()
2547 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range in tcp_do_segment()
2548 * ACKs. If the ack is in the range in tcp_do_segment()
2549 * tp->snd_una < th->th_ack <= tp->snd_max in tcp_do_segment()
2550 * then advance tp->snd_una to th->th_ack and drop in tcp_do_segment()
2551 * data from the retransmission queue. If this ACK reflects in tcp_do_segment()
2560 if (SEQ_GT(th->th_ack, tp->snd_max)) { in tcp_do_segment()
2565 sack_changed = tcp_sack_doack(tp, &to, th->th_ack); in tcp_do_segment()
2567 (tp->t_flags & TF_LRD)) { in tcp_do_segment()
2573 * from the last ack with SACK doesn't get used. in tcp_do_segment()
2575 tp->sackhint.sacked_bytes = 0; in tcp_do_segment()
2582 if (SEQ_LEQ(th->th_ack, tp->snd_una)) { in tcp_do_segment()
2585 (tiwin == tp->snd_wnd || in tcp_do_segment()
2586 (tp->t_flags & TF_SACK_PERMIT))) { in tcp_do_segment()
2595 (TCPS_HAVERCVDFIN(tp->t_state) == 0)) { in tcp_do_segment()
2596 tp->t_dupacks = 0; in tcp_do_segment()
2603 * duplicate ack (ie, window info didn't in tcp_do_segment()
2605 * the ack is the biggest we've in tcp_do_segment()
2637 if (th->th_ack != tp->snd_una || in tcp_do_segment()
2642 tp->t_dupacks = 0; in tcp_do_segment()
2643 } else if (++tp->t_dupacks > tcprexmtthresh || in tcp_do_segment()
2644 IN_FASTRECOVERY(tp->t_flags)) { in tcp_do_segment()
2648 IN_FASTRECOVERY(tp->t_flags) && in tcp_do_segment()
2649 (tp->t_flags & TF_SACK_PERMIT)) { in tcp_do_segment()
2653 IN_FASTRECOVERY(tp->t_flags) && in tcp_do_segment()
2654 (tp->snd_nxt == tp->snd_max)) { in tcp_do_segment()
2666 awnd = (tp->snd_nxt - tp->snd_fack) + in tcp_do_segment()
2667 tp->sackhint.sack_bytes_rexmit; in tcp_do_segment()
2669 if (awnd < tp->snd_ssthresh) { in tcp_do_segment()
2670 tp->snd_cwnd += imax(maxseg, in tcp_do_segment()
2672 tp->sackhint.delivered_data)); in tcp_do_segment()
2673 if (tp->snd_cwnd > tp->snd_ssthresh) in tcp_do_segment()
2674 tp->snd_cwnd = tp->snd_ssthresh; in tcp_do_segment()
2677 IN_FASTRECOVERY(tp->t_flags) && in tcp_do_segment()
2678 SEQ_LT(tp->snd_nxt, tp->snd_max)) { in tcp_do_segment()
2679 tp->snd_cwnd += imax(maxseg, in tcp_do_segment()
2681 tp->sackhint.delivered_data)); in tcp_do_segment()
2683 tp->snd_cwnd += maxseg; in tcp_do_segment()
2687 } else if (tp->t_dupacks == tcprexmtthresh || in tcp_do_segment()
2688 (tp->t_flags & TF_SACK_PERMIT && in tcp_do_segment()
2690 tp->sackhint.sacked_bytes > in tcp_do_segment()
2691 (tcprexmtthresh - 1) * maxseg)) { in tcp_do_segment()
2695 * more than (dupthresh-1)*maxseg sacked data. in tcp_do_segment()
2701 tp->t_dupacks = tcprexmtthresh; in tcp_do_segment()
2702 tcp_seq onxt = tp->snd_nxt; in tcp_do_segment()
2712 if (IN_FASTRECOVERY(tp->t_flags)) { in tcp_do_segment()
2713 tp->t_dupacks = 0; in tcp_do_segment()
2717 if (SEQ_LEQ(th->th_ack, in tcp_do_segment()
2718 tp->snd_recover)) { in tcp_do_segment()
2719 tp->t_dupacks = 0; in tcp_do_segment()
2723 /* Congestion signal before ack. */ in tcp_do_segment()
2728 tp->t_rtttime = 0; in tcp_do_segment()
2739 tp->sackhint.prr_delivered = in tcp_do_segment()
2740 imin(tp->snd_max - th->th_ack, in tcp_do_segment()
2741 (tp->snd_limited + 1) * maxseg); in tcp_do_segment()
2743 tp->sackhint.prr_delivered = in tcp_do_segment()
2746 tp->sackhint.recover_fs = max(1, in tcp_do_segment()
2747 tp->snd_nxt - tp->snd_una); in tcp_do_segment()
2749 tp->snd_limited = 0; in tcp_do_segment()
2759 tp->snd_nxt = tp->snd_max; in tcp_do_segment()
2760 tp->snd_cwnd = tcp_compute_pipe(tp) + in tcp_do_segment()
2764 tp->snd_cwnd = tp->snd_ssthresh; in tcp_do_segment()
2765 if (SEQ_GT(th->th_ack, tp->snd_una)) { in tcp_do_segment()
2770 tp->snd_nxt = th->th_ack; in tcp_do_segment()
2771 tp->snd_cwnd = maxseg; in tcp_do_segment()
2773 KASSERT(tp->snd_limited <= 2, in tcp_do_segment()
2774 ("%s: tp->snd_limited too big", in tcp_do_segment()
2776 tp->snd_cwnd = tp->snd_ssthresh + in tcp_do_segment()
2778 (tp->t_dupacks - tp->snd_limited); in tcp_do_segment()
2779 if (SEQ_GT(onxt, tp->snd_nxt)) in tcp_do_segment()
2780 tp->snd_nxt = onxt; in tcp_do_segment()
2787 * for more. Make sure we can send a in tcp_do_segment()
2789 * ACK by increasing snd_cwnd by one in tcp_do_segment()
2794 uint32_t oldcwnd = tp->snd_cwnd; in tcp_do_segment()
2795 tcp_seq oldsndmax = tp->snd_max; in tcp_do_segment()
2799 KASSERT(tp->t_dupacks == 1 || in tcp_do_segment()
2800 tp->t_dupacks == 2, in tcp_do_segment()
2803 if (tp->t_dupacks == 1) in tcp_do_segment()
2804 tp->snd_limited = 0; in tcp_do_segment()
2805 if ((tp->snd_nxt == tp->snd_max) && in tcp_do_segment()
2806 (tp->t_rxtshift == 0)) in tcp_do_segment()
2807 tp->snd_cwnd = in tcp_do_segment()
2808 SEQ_SUB(tp->snd_nxt, in tcp_do_segment()
2809 tp->snd_una) - in tcp_do_segment()
2811 tp->snd_cwnd += in tcp_do_segment()
2812 (tp->t_dupacks - tp->snd_limited) * in tcp_do_segment()
2813 maxseg - tcp_sack_adjust(tp); in tcp_do_segment()
2817 * or we need to send an ACK. in tcp_do_segment()
2820 avail = sbavail(&so->so_snd); in tcp_do_segment()
2822 if (tp->t_flags & TF_ACKNOW || in tcp_do_segment()
2824 SEQ_SUB(tp->snd_nxt, tp->snd_una))) { in tcp_do_segment()
2827 sent = SEQ_SUB(tp->snd_max, oldsndmax); in tcp_do_segment()
2829 KASSERT((tp->t_dupacks == 2 && in tcp_do_segment()
2830 tp->snd_limited == 0) || in tcp_do_segment()
2832 tp->t_flags & TF_SENTFIN), in tcp_do_segment()
2835 tp->snd_limited = 2; in tcp_do_segment()
2837 ++tp->snd_limited; in tcp_do_segment()
2839 tp->snd_cwnd = oldcwnd; in tcp_do_segment()
2846 * This ack is advancing the left edge, reset the in tcp_do_segment()
2849 tp->t_dupacks = 0; in tcp_do_segment()
2851 * If this ack also has new SACK info, increment the in tcp_do_segment()
2860 (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) || in tcp_do_segment()
2861 ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) && in tcp_do_segment()
2862 (tp->snd_nxt == tp->snd_max)) { in tcp_do_segment()
2863 tp->t_dupacks++; in tcp_do_segment()
2865 if (!IN_FASTRECOVERY(tp->t_flags) && in tcp_do_segment()
2866 (tp->sackhint.sacked_bytes > in tcp_do_segment()
2867 ((tcprexmtthresh - 1) * in tcp_do_segment()
2875 KASSERT(SEQ_GT(th->th_ack, tp->snd_una), in tcp_do_segment()
2880 * for the other side's cached packets, retract it. in tcp_do_segment()
2882 if (SEQ_LT(th->th_ack, tp->snd_recover)) { in tcp_do_segment()
2883 if (IN_FASTRECOVERY(tp->t_flags)) { in tcp_do_segment()
2884 if (tp->t_flags & TF_SACK_PERMIT) { in tcp_do_segment()
2889 tp->t_rtttime = 0; in tcp_do_segment()
2892 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
2901 } else if (IN_CONGRECOVERY(tp->t_flags) && in tcp_do_segment()
2903 tp->sackhint.delivered_data = in tcp_do_segment()
2905 tp->snd_fack = th->th_ack; in tcp_do_segment()
2908 * always use PRR-SSRB in tcp_do_segment()
2916 * If we reach this point, ACK is not a duplicate, in tcp_do_segment()
2919 if (tp->t_flags & TF_NEEDSYN) { in tcp_do_segment()
2921 * T/TCP: Connection was half-synchronized, and our in tcp_do_segment()
2922 * SYN has been ACK'd (so connection is now fully in tcp_do_segment()
2923 * synchronized). Go to non-starred state, in tcp_do_segment()
2924 * increment snd_una for ACK of SYN, and check if in tcp_do_segment()
2927 tp->t_flags &= ~TF_NEEDSYN; in tcp_do_segment()
2928 tp->snd_una++; in tcp_do_segment()
2930 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == in tcp_do_segment()
2932 tp->rcv_scale = tp->request_r_scale; in tcp_do_segment()
2941 * Adjust for the SYN bit in sequence space, in tcp_do_segment()
2942 * but don't account for it in cwnd calculations. in tcp_do_segment()
2943 * This is for the SYN_RECEIVED, non-simultaneous in tcp_do_segment()
2948 tp->snd_una++; in tcp_do_segment()
2951 "(tp->snd_una=%u, th->th_ack=%u, tp=%p, m=%p)", __func__, in tcp_do_segment()
2952 tp->snd_una, th->th_ack, tp, m)); in tcp_do_segment()
2957 * If we just performed our first retransmit, and the ACK in tcp_do_segment()
2963 if (tp->t_rxtshift == 1 && in tcp_do_segment()
2964 tp->t_flags & TF_PREVVALID && in tcp_do_segment()
2965 tp->t_badrxtwin != 0 && in tcp_do_segment()
2968 TSTMP_LT(to.to_tsecr, tp->t_badrxtwin)) in tcp_do_segment()
2981 * during the SYN+ACK phase, ignore in tcp_do_segment()
2988 t = tcp_ts_getticks() - to.to_tsecr; in tcp_do_segment()
2989 if (!tp->t_rttlow || tp->t_rttlow > t) in tcp_do_segment()
2990 tp->t_rttlow = t; in tcp_do_segment()
2992 } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) { in tcp_do_segment()
2993 if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime) in tcp_do_segment()
2994 tp->t_rttlow = ticks - tp->t_rtttime; in tcp_do_segment()
2995 tcp_xmit_timer(tp, ticks - tp->t_rtttime); in tcp_do_segment()
3003 * large ACK. in tcp_do_segment()
3005 if ((tp->t_state <= TCPS_CLOSE_WAIT && in tcp_do_segment()
3006 acked == sbavail(&so->so_snd)) || in tcp_do_segment()
3007 acked > sbavail(&so->so_snd)) in tcp_do_segment()
3008 tp->t_acktime = 0; in tcp_do_segment()
3010 tp->t_acktime = ticks; in tcp_do_segment()
3016 * timer, using current (possibly backed-off) value. in tcp_do_segment()
3018 if (th->th_ack == tp->snd_max) { in tcp_do_segment()
3025 * If no data (only SYN) was ACK'd, in tcp_do_segment()
3026 * skip rest of ACK processing. in tcp_do_segment()
3040 if (acked > sbavail(&so->so_snd)) { in tcp_do_segment()
3041 if (tp->snd_wnd >= sbavail(&so->so_snd)) in tcp_do_segment()
3042 tp->snd_wnd -= sbavail(&so->so_snd); in tcp_do_segment()
3044 tp->snd_wnd = 0; in tcp_do_segment()
3045 mfree = sbcut_locked(&so->so_snd, in tcp_do_segment()
3046 (int)sbavail(&so->so_snd)); in tcp_do_segment()
3049 mfree = sbcut_locked(&so->so_snd, acked); in tcp_do_segment()
3050 if (tp->snd_wnd >= (uint32_t) acked) in tcp_do_segment()
3051 tp->snd_wnd -= acked; in tcp_do_segment()
3053 tp->snd_wnd = 0; in tcp_do_segment()
3060 if (!IN_RECOVERY(tp->t_flags) && in tcp_do_segment()
3061 SEQ_GT(tp->snd_una, tp->snd_recover) && in tcp_do_segment()
3062 SEQ_LEQ(th->th_ack, tp->snd_recover)) in tcp_do_segment()
3063 tp->snd_recover = th->th_ack - 1; in tcp_do_segment()
3064 tp->snd_una = th->th_ack; in tcp_do_segment()
3065 if (IN_RECOVERY(tp->t_flags) && in tcp_do_segment()
3066 SEQ_GEQ(th->th_ack, tp->snd_recover)) { in tcp_do_segment()
3069 if (SEQ_GT(tp->snd_una, tp->snd_recover)) { in tcp_do_segment()
3070 tp->snd_recover = tp->snd_una; in tcp_do_segment()
3072 if (SEQ_LT(tp->snd_nxt, tp->snd_una)) in tcp_do_segment()
3073 tp->snd_nxt = tp->snd_una; in tcp_do_segment()
3075 switch (tp->t_state) { in tcp_do_segment()
3078 * for the ESTABLISHED state if our FIN is now acknowledged in tcp_do_segment()
3090 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { in tcp_do_segment()
3103 * In CLOSING STATE in addition to the processing for in tcp_do_segment()
3104 * the ESTABLISHED state if the ACK acknowledges our FIN in tcp_do_segment()
3105 * then enter the TIME-WAIT state, otherwise ignore in tcp_do_segment()
3117 * In LAST_ACK, we may still be waiting for data to drain in tcp_do_segment()
3118 * and/or to be acked, as well as for the ack of our FIN. in tcp_do_segment()
3136 * Don't look at window if no ACK: TAC's send garbage on first SYN. in tcp_do_segment()
3139 (SEQ_LT(tp->snd_wl1, th->th_seq) || in tcp_do_segment()
3140 (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) || in tcp_do_segment()
3141 (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) { in tcp_do_segment()
3144 tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) in tcp_do_segment()
3146 tp->snd_wnd = tiwin; in tcp_do_segment()
3147 tp->snd_wl1 = th->th_seq; in tcp_do_segment()
3148 tp->snd_wl2 = th->th_ack; in tcp_do_segment()
3149 if (tp->snd_wnd > tp->max_sndwnd) in tcp_do_segment()
3150 tp->max_sndwnd = tp->snd_wnd; in tcp_do_segment()
3157 if ((thflags & TH_URG) && th->th_urp && in tcp_do_segment()
3158 TCPS_HAVERCVDFIN(tp->t_state) == 0) { in tcp_do_segment()
3166 if (th->th_urp + sbavail(&so->so_rcv) > sb_max) { in tcp_do_segment()
3167 th->th_urp = 0; /* XXX */ in tcp_do_segment()
3186 if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) { in tcp_do_segment()
3187 tp->rcv_up = th->th_seq + th->th_urp; in tcp_do_segment()
3188 so->so_oobmark = sbavail(&so->so_rcv) + in tcp_do_segment()
3189 (tp->rcv_up - tp->rcv_nxt) - 1; in tcp_do_segment()
3190 if (so->so_oobmark == 0) in tcp_do_segment()
3191 so->so_rcv.sb_state |= SBS_RCVATMARK; in tcp_do_segment()
3193 tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA); in tcp_do_segment()
3199 * but if two URG's are pending at once, some out-of-band in tcp_do_segment()
3202 if (th->th_urp <= (uint32_t)tlen && in tcp_do_segment()
3203 !(so->so_options & SO_OOBINLINE)) { in tcp_do_segment()
3213 if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) in tcp_do_segment()
3214 tp->rcv_up = tp->rcv_nxt; in tcp_do_segment()
3221 * and arranging for acknowledgment of receipt if necessary. in tcp_do_segment()
3222 * This process logically involves adjusting tp->rcv_wnd as data in tcp_do_segment()
3227 tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) && in tcp_do_segment()
3228 (tp->t_flags & TF_FASTOPEN)); in tcp_do_segment()
3230 TCPS_HAVERCVDFIN(tp->t_state) == 0) { in tcp_do_segment()
3231 tcp_seq save_start = th->th_seq; in tcp_do_segment()
3232 tcp_seq save_rnxt = tp->rcv_nxt; in tcp_do_segment()
3243 * Set DELACK for segments received in order, but ack in tcp_do_segment()
3247 if (th->th_seq == tp->rcv_nxt && in tcp_do_segment()
3249 (TCPS_HAVEESTABLISHED(tp->t_state) || in tcp_do_segment()
3252 tp->t_flags |= TF_DELACK; in tcp_do_segment()
3254 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
3255 tp->rcv_nxt += tlen; in tcp_do_segment()
3257 ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) && in tcp_do_segment()
3258 (tp->t_fbyte_in == 0)) { in tcp_do_segment()
3259 tp->t_fbyte_in = ticks; in tcp_do_segment()
3260 if (tp->t_fbyte_in == 0) in tcp_do_segment()
3261 tp->t_fbyte_in = 1; in tcp_do_segment()
3262 if (tp->t_fbyte_out && tp->t_fbyte_in) in tcp_do_segment()
3263 tp->t_flags2 |= TF2_FBYTES_COMPLETE; in tcp_do_segment()
3269 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) in tcp_do_segment()
3272 sbappendstream_locked(&so->so_rcv, m, 0); in tcp_do_segment()
3273 tp->t_flags |= TF_WAKESOR; in tcp_do_segment()
3284 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
3286 if ((tp->t_flags & TF_SACK_PERMIT) && in tcp_do_segment()
3288 TCPS_HAVEESTABLISHED(tp->t_state)) { in tcp_do_segment()
3296 } else if ((tlen > 0) && SEQ_GT(tp->rcv_nxt, save_rnxt)) { in tcp_do_segment()
3297 if ((tp->rcv_numsacks >= 1) && in tcp_do_segment()
3298 (tp->sackblks[0].end == save_start)) { in tcp_do_segment()
3304 tp->sackblks[0].start, in tcp_do_segment()
3305 tp->sackblks[0].end); in tcp_do_segment()
3327 if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) in tcp_do_segment()
3328 len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt); in tcp_do_segment()
3330 len = so->so_rcv.sb_hiwat; in tcp_do_segment()
3341 tcpstates[tp->t_state], tlen); in tcp_do_segment()
3348 tcpstates[tp->t_state], tlen); in tcp_do_segment()
3357 tcpstates[tp->t_state]); in tcp_do_segment()
3367 * If FIN is received ACK the FIN and let the user know in tcp_do_segment()
3371 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { in tcp_do_segment()
3375 * If connection is half-synchronized in tcp_do_segment()
3376 * (ie NEEDSYN flag on) then delay ACK, in tcp_do_segment()
3379 * more input can be expected, send ACK now. in tcp_do_segment()
3381 if (tp->t_flags & TF_NEEDSYN) in tcp_do_segment()
3382 tp->t_flags |= TF_DELACK; in tcp_do_segment()
3384 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
3385 tp->rcv_nxt++; in tcp_do_segment()
3387 switch (tp->t_state) { in tcp_do_segment()
3393 tp->t_starttime = ticks; in tcp_do_segment()
3409 * starting the time-wait timer, turning off the other in tcp_do_segment()
3422 if (needoutput || (tp->t_flags & TF_ACKNOW)) { in tcp_do_segment()
3428 if (tp->t_flags & TF_DELACK) { in tcp_do_segment()
3429 tp->t_flags &= ~TF_DELACK; in tcp_do_segment()
3437 * Generate an ACK dropping incoming segment if it occupies in tcp_do_segment()
3438 * sequence space, where the ACK reflects our state. in tcp_do_segment()
3440 * We can now skip the test for the RST flag since all in tcp_do_segment()
3444 * In the SYN-RECEIVED state, don't send an ACK unless the in tcp_do_segment()
3445 * segment we received passes the SYN-RECEIVED ACK test. in tcp_do_segment()
3447 * "LAND" DoS attack, and also prevents an ACK storm in tcp_do_segment()
3451 if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) && in tcp_do_segment()
3452 (SEQ_GT(tp->snd_una, th->th_ack) || in tcp_do_segment()
3453 SEQ_GT(th->th_ack, tp->snd_max)) ) { in tcp_do_segment()
3459 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
3485 * Issue RST and make ACK acceptable to originator of segment.
3505 if ((tcp_get_flags(th) & TH_RST) || m->m_flags & (M_BCAST|M_MCAST)) in tcp_dropwithreset()
3508 if (mtod(m, struct ip *)->ip_v == 6) { in tcp_dropwithreset()
3510 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || in tcp_dropwithreset()
3511 IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) in tcp_dropwithreset()
3522 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || in tcp_dropwithreset()
3523 IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || in tcp_dropwithreset()
3524 ip->ip_src.s_addr == htonl(INADDR_BROADCAST) || in tcp_dropwithreset()
3525 in_ifnet_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) in tcp_dropwithreset()
3537 th->th_ack, TH_RST); in tcp_dropwithreset()
3543 tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen, in tcp_dropwithreset()
3559 to->to_flags = 0; in tcp_dooptions()
3560 for (; cnt > 0; cnt -= optlen, cp += optlen) { in tcp_dooptions()
3579 to->to_flags |= TOF_MSS; in tcp_dooptions()
3581 (char *)&to->to_mss, sizeof(to->to_mss)); in tcp_dooptions()
3582 to->to_mss = ntohs(to->to_mss); in tcp_dooptions()
3589 to->to_flags |= TOF_SCALE; in tcp_dooptions()
3590 to->to_wscale = min(cp[2], TCP_MAX_WINSHIFT); in tcp_dooptions()
3595 to->to_flags |= TOF_TS; in tcp_dooptions()
3597 (char *)&to->to_tsval, sizeof(to->to_tsval)); in tcp_dooptions()
3598 to->to_tsval = ntohl(to->to_tsval); in tcp_dooptions()
3600 (char *)&to->to_tsecr, sizeof(to->to_tsecr)); in tcp_dooptions()
3601 to->to_tsecr = ntohl(to->to_tsecr); in tcp_dooptions()
3608 * here for the syncache code to perform the correct in tcp_dooptions()
3613 to->to_flags |= TOF_SIGNATURE; in tcp_dooptions()
3614 to->to_signature = cp + 2; in tcp_dooptions()
3623 to->to_flags |= TOF_SACKPERM; in tcp_dooptions()
3626 if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0) in tcp_dooptions()
3630 to->to_flags |= TOF_SACK; in tcp_dooptions()
3631 to->to_nsacks = (optlen - 2) / TCPOLEN_SACK; in tcp_dooptions()
3632 to->to_sacks = cp + 2; in tcp_dooptions()
3646 to->to_flags |= TOF_FASTOPEN; in tcp_dooptions()
3647 to->to_tfo_len = optlen - 2; in tcp_dooptions()
3648 to->to_tfo_cookie = to->to_tfo_len ? cp + 2 : NULL; in tcp_dooptions()
3659 * It is still reflected in the segment length for
3666 int cnt = off + th->th_urp - 1; in tcp_pulloutofband()
3669 if (m->m_len > cnt) { in tcp_pulloutofband()
3675 tp->t_iobc = *cp; in tcp_pulloutofband()
3676 tp->t_oobflags |= TCPOOB_HAVEDATA; in tcp_pulloutofband()
3677 bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1)); in tcp_pulloutofband()
3678 m->m_len--; in tcp_pulloutofband()
3679 if (m->m_flags & M_PKTHDR) in tcp_pulloutofband()
3680 m->m_pkthdr.len--; in tcp_pulloutofband()
3683 cnt -= m->m_len; in tcp_pulloutofband()
3684 m = m->m_next; in tcp_pulloutofband()
3692 * Collect new round-trip time estimate
3703 if (tp->t_rttupdated < UCHAR_MAX) in tcp_xmit_timer()
3704 tp->t_rttupdated++; in tcp_xmit_timer()
3706 stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, in tcp_xmit_timer()
3709 if ((tp->t_srtt != 0) && (tp->t_rxtshift <= TCP_RTT_INVALIDATE)) { in tcp_xmit_timer()
3717 delta = ((rtt - 1) << TCP_DELTA_SHIFT) in tcp_xmit_timer()
3718 - (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)); in tcp_xmit_timer()
3720 if ((tp->t_srtt += delta) <= 0) in tcp_xmit_timer()
3721 tp->t_srtt = 1; in tcp_xmit_timer()
3731 * rfc793's wired-in beta. in tcp_xmit_timer()
3734 delta = -delta; in tcp_xmit_timer()
3735 delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT); in tcp_xmit_timer()
3736 if ((tp->t_rttvar += delta) <= 0) in tcp_xmit_timer()
3737 tp->t_rttvar = 1; in tcp_xmit_timer()
3740 * No rtt measurement yet - use the unsmoothed rtt. in tcp_xmit_timer()
3744 tp->t_srtt = rtt << TCP_RTT_SHIFT; in tcp_xmit_timer()
3745 tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); in tcp_xmit_timer()
3747 tp->t_rtttime = 0; in tcp_xmit_timer()
3748 tp->t_rxtshift = 0; in tcp_xmit_timer()
3755 * 1 extra tick because of +-1/2 tick uncertainty in the in tcp_xmit_timer()
3761 TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), in tcp_xmit_timer()
3762 max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX); in tcp_xmit_timer()
3765 * We received an ack for a packet that wasn't retransmitted; in tcp_xmit_timer()
3768 * for now (a route might have failed after we sent a segment, in tcp_xmit_timer()
3771 tp->t_softerror = 0; in tcp_xmit_timer()
3775 * Determine a reasonable value for maxseg size.
3776 * If the route is known, check route for mtu.
3784 * While looking at the routing entry, we also initialize other path-dependent
3785 * parameters from pre-set or cached values in the routing entry.
3787 * NOTE that resulting t_maxseg doesn't include space for TCP options or
3789 * thus it is calculated for every segment separately in tcp_output().
3792 * segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS
3804 int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; in tcp_mss_update()
3814 if (tp->t_port) in tcp_mss_update()
3816 if (mtuoffer != -1) { in tcp_mss_update()
3817 KASSERT(offer == -1, ("%s: conflict", __func__)); in tcp_mss_update()
3818 offer = mtuoffer - min_protoh; in tcp_mss_update()
3824 maxmtu = tcp_maxmtu6(&inp->inp_inc, cap); in tcp_mss_update()
3825 tp->t_maxseg = V_tcp_v6mssdflt; in tcp_mss_update()
3833 maxmtu = tcp_maxmtu(&inp->inp_inc, cap); in tcp_mss_update()
3834 tp->t_maxseg = V_tcp_mssdflt; in tcp_mss_update()
3844 * to a defined state as tcp_hc_get() would do for us in tcp_mss_update()
3860 offer = tp->t_maxseg; in tcp_mss_update()
3863 case -1: in tcp_mss_update()
3865 * Offer == -1 means that we didn't receive SYN yet. in tcp_mss_update()
3879 tcp_hc_get(&inp->inp_inc, metricptr); in tcp_mss_update()
3885 if (metricptr->hc_mtu) in tcp_mss_update()
3886 mss = min(metricptr->hc_mtu, maxmtu) - min_protoh; in tcp_mss_update()
3890 mss = maxmtu - min_protoh; in tcp_mss_update()
3892 !in6_localaddr(&inp->in6p_faddr)) in tcp_mss_update()
3901 mss = maxmtu - min_protoh; in tcp_mss_update()
3903 !in_localaddr(inp->inp_faddr)) in tcp_mss_update()
3908 * XXX - The above conditional (mss = maxmtu - min_protoh) in tcp_mss_update()
3915 * on the Internet today. For the moment, we'll sweep in tcp_mss_update()
3923 * recomputed. For Further Study. in tcp_mss_update()
3934 * XXXGL: shouldn't we reserve space for IP/IPv6 options? in tcp_mss_update()
3938 tp->t_maxseg = mss; in tcp_mss_update()
3939 if (tp->t_maxseg < V_tcp_mssdflt) { in tcp_mss_update()
3945 tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; in tcp_mss_update()
3947 tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; in tcp_mss_update()
3965 tcp_mss_update(tp, offer, -1, &metrics, &cap); in tcp_mss()
3967 mss = tp->t_maxseg; in tcp_mss()
3976 so = inp->inp_socket; in tcp_mss()
3978 if ((so->so_snd.sb_hiwat == V_tcp_sendspace) && metrics.hc_sendpipe) in tcp_mss()
3981 bufsize = so->so_snd.sb_hiwat; in tcp_mss()
3988 if (bufsize > so->so_snd.sb_hiwat) in tcp_mss()
3998 * XXXGL: shouldn't we reserve space for IP/IPv6 options? in tcp_mss()
4000 tp->t_maxseg = max(mss, 64); in tcp_mss()
4001 if (tp->t_maxseg < V_tcp_mssdflt) { in tcp_mss()
4007 tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; in tcp_mss()
4009 tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; in tcp_mss()
4013 if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.hc_recvpipe) in tcp_mss()
4016 bufsize = so->so_rcv.sb_hiwat; in tcp_mss()
4021 if (bufsize > so->so_rcv.sb_hiwat) in tcp_mss()
4026 /* Check the interface for TSO capabilities. */ in tcp_mss()
4028 tp->t_flags |= TF_TSO; in tcp_mss()
4029 tp->t_tsomax = cap.tsomax; in tcp_mss()
4030 tp->t_tsomaxsegcount = cap.tsomaxsegcount; in tcp_mss()
4031 tp->t_tsomaxsegsize = cap.tsomaxsegsize; in tcp_mss()
4033 tp->t_flags2 |= TF2_IPSEC_TSO; in tcp_mss()
4051 if (inc->inc_flags & INC_ISIPV6) { in tcp_mssopt()
4072 mss = min(maxmtu, thcmtu) - min_protoh; in tcp_mssopt()
4074 mss = max(maxmtu, thcmtu) - min_protoh; in tcp_mssopt()
4093 * Compute the amount of data that this ACK is indicating in tcp_do_prr_ack()
4098 (IN_CONGRECOVERY(tp->t_flags) && in tcp_do_prr_ack()
4099 !IN_FASTRECOVERY(tp->t_flags))) { in tcp_do_prr_ack()
4100 del_data = tp->sackhint.delivered_data; in tcp_do_prr_ack()
4104 pipe = (tp->snd_nxt - tp->snd_fack) + in tcp_do_prr_ack()
4105 tp->sackhint.sack_bytes_rexmit; in tcp_do_prr_ack()
4107 if (tp->sackhint.prr_delivered < (tcprexmtthresh * maxseg + in tcp_do_prr_ack()
4108 tp->snd_recover - tp->snd_una)) { in tcp_do_prr_ack()
4111 pipe = imax(0, tp->snd_max - tp->snd_una - in tcp_do_prr_ack()
4112 imin(INT_MAX / 65536, tp->t_dupacks) * maxseg); in tcp_do_prr_ack()
4114 tp->sackhint.prr_delivered += del_data; in tcp_do_prr_ack()
4118 if (pipe >= tp->snd_ssthresh) { in tcp_do_prr_ack()
4119 if (tp->sackhint.recover_fs == 0) in tcp_do_prr_ack()
4120 tp->sackhint.recover_fs = in tcp_do_prr_ack()
4121 imax(1, tp->snd_nxt - tp->snd_una); in tcp_do_prr_ack()
4122 snd_cnt = howmany((long)tp->sackhint.prr_delivered * in tcp_do_prr_ack()
4123 tp->snd_ssthresh, tp->sackhint.recover_fs) - in tcp_do_prr_ack()
4124 tp->sackhint.prr_out + maxseg - 1; in tcp_do_prr_ack()
4128 * - A partial ack without SACK block beneath snd_recover in tcp_do_prr_ack()
4130 * - An SACK scoreboard update adding a new hole indicates in tcp_do_prr_ack()
4133 * - Prevent ACK splitting attacks, by being conservative in tcp_do_prr_ack()
4137 limit = tp->sackhint.prr_delivered - in tcp_do_prr_ack()
4138 tp->sackhint.prr_out; in tcp_do_prr_ack()
4140 limit = imax(tp->sackhint.prr_delivered - in tcp_do_prr_ack()
4141 tp->sackhint.prr_out, del_data) + in tcp_do_prr_ack()
4144 snd_cnt = imin((tp->snd_ssthresh - pipe), limit); in tcp_do_prr_ack()
4148 * Send snd_cnt new data into the network in response to this ack. in tcp_do_prr_ack()
4152 if (IN_FASTRECOVERY(tp->t_flags)) { in tcp_do_prr_ack()
4154 tp->snd_cwnd = pipe - del_data + (snd_cnt * maxseg); in tcp_do_prr_ack()
4156 tp->snd_cwnd = (tp->snd_max - tp->snd_una) + in tcp_do_prr_ack()
4159 } else if (IN_CONGRECOVERY(tp->t_flags)) { in tcp_do_prr_ack()
4160 tp->snd_cwnd = pipe - del_data + (snd_cnt * maxseg); in tcp_do_prr_ack()
4162 tp->snd_cwnd = imax(maxseg, tp->snd_cwnd); in tcp_do_prr_ack()
4166 * On a partial ack arrives, force the retransmission of the
4167 * next unacknowledged segment. Do not clear tp->t_dupacks.
4174 tcp_seq onxt = tp->snd_nxt; in tcp_newreno_partial_ack()
4175 uint32_t ocwnd = tp->snd_cwnd; in tcp_newreno_partial_ack()
4181 tp->t_rtttime = 0; in tcp_newreno_partial_ack()
4182 if (IN_FASTRECOVERY(tp->t_flags)) { in tcp_newreno_partial_ack()
4183 tp->snd_nxt = th->th_ack; in tcp_newreno_partial_ack()
4186 * (tp->snd_una has not yet been updated when this function is called.) in tcp_newreno_partial_ack()
4188 tp->snd_cwnd = maxseg + BYTES_THIS_ACK(tp, th); in tcp_newreno_partial_ack()
4189 tp->t_flags |= TF_ACKNOW; in tcp_newreno_partial_ack()
4191 tp->snd_cwnd = ocwnd; in tcp_newreno_partial_ack()
4192 if (SEQ_GT(onxt, tp->snd_nxt)) in tcp_newreno_partial_ack()
4193 tp->snd_nxt = onxt; in tcp_newreno_partial_ack()
4196 * Partial window deflation. Relies on fact that tp->snd_una in tcp_newreno_partial_ack()
4199 if (tp->snd_cwnd > BYTES_THIS_ACK(tp, th)) in tcp_newreno_partial_ack()
4200 tp->snd_cwnd -= BYTES_THIS_ACK(tp, th); in tcp_newreno_partial_ack()
4202 tp->snd_cwnd = 0; in tcp_newreno_partial_ack()
4203 tp->snd_cwnd += maxseg; in tcp_newreno_partial_ack()
4209 if (tp->t_fb->tfb_compute_pipe == NULL) { in tcp_compute_pipe()
4210 return (tp->snd_max - tp->snd_una + in tcp_compute_pipe()
4211 tp->sackhint.sack_bytes_rexmit - in tcp_compute_pipe()
4212 tp->sackhint.sacked_bytes - in tcp_compute_pipe()
4213 tp->sackhint.lost_bytes); in tcp_compute_pipe()
4215 return((*tp->t_fb->tfb_compute_pipe)(tp)); in tcp_compute_pipe()
4228 * Support for user specified value for initial flight size. in tcp_compute_initwnd()