Lines Matching +full:always +full:- +full:wait +full:- +full:for +full:- +full:ack
1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
6 * Copyright (c) 2007-2008,2010
8 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
10 * Copyright (c) 2010-2011 Juniper Networks, Inc.
13 * Portions of this software were developed at the Centre for Advanced Internet
18 * Portions of this software were developed at the Centre for Advanced
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 #include <sys/proc.h> /* for proc0 declaration */
76 #include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */
86 #define TCPSTATES /* for logging */
94 #include <netinet/ip_icmp.h> /* required for icmp_var.h */
95 #include <netinet/icmp_var.h> /* for ICMP_BANDLIM */
150 "Enforce net.inet.tcp.blackhole for locally originated packets");
155 "Delay ACK to try and piggyback it onto a data packet");
185 "Slow-start flight size (initial congestion window) in number of segments");
195 "Cap the max cwnd increment during slow-start to this number of segments");
200 "Follow RFC793 instead of RFC5961 criteria for accepting SYN packets");
205 "Follow RFC793 instead of RFC5961 criteria for accepting RST packets");
210 "Follow RFC793 criteria for validating SEG.ACK");
242 * Kernel module interface for updating tcpstat. The first argument is an index
254 * receiving a duplicate ACK with a SACK block, and also
261 return ((tp->t_flags & TF_SACK_PERMIT) && in tcp_is_sack_recovery()
262 ((to->to_flags & TOF_SACK) || in tcp_is_sack_recovery()
263 (!TAILQ_EMPTY(&tp->snd_holes)))); in tcp_is_sack_recovery()
268 * Wrapper for the TCP established input helper hook.
275 if (V_tcp_hhh[HHOOK_TCP_EST_IN]->hhh_nhooks > 0) { in hhook_run_tcp_est_in()
281 &tp->t_osd); in hhook_run_tcp_est_in()
299 tp->t_ccv.nsegs = nsegs; in cc_ack_received()
300 tp->t_ccv.bytes_this_ack = BYTES_THIS_ACK(tp, th); in cc_ack_received()
301 if ((!V_tcp_do_newcwv && (tp->snd_cwnd <= tp->snd_wnd)) || in cc_ack_received()
302 (V_tcp_do_newcwv && (tp->snd_cwnd <= tp->snd_wnd) && in cc_ack_received()
303 (tp->snd_cwnd < (tcp_compute_pipe(tp) * 2)))) in cc_ack_received()
304 tp->t_ccv.flags |= CCF_CWND_LIMITED; in cc_ack_received()
306 tp->t_ccv.flags &= ~CCF_CWND_LIMITED; in cc_ack_received()
310 stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF, in cc_ack_received()
311 ((int32_t)tp->snd_cwnd) - tp->snd_wnd); in cc_ack_received()
312 if (!IN_RECOVERY(tp->t_flags)) in cc_ack_received()
313 stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_ACKLEN, in cc_ack_received()
314 tp->t_ccv.bytes_this_ack / (tcp_maxseg(tp) * nsegs)); in cc_ack_received()
315 if ((tp->t_flags & TF_GPUTINPROG) && in cc_ack_received()
316 SEQ_GEQ(th->th_ack, tp->gput_ack)) { in cc_ack_received()
320 gput = (((int64_t)SEQ_SUB(th->th_ack, tp->gput_seq)) << 3) / in cc_ack_received()
321 max(1, tcp_ts_getticks() - tp->gput_ts); in cc_ack_received()
322 stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_GPUT, in cc_ack_received()
329 if (tp->t_stats_gput_prev > 0) in cc_ack_received()
330 stats_voi_update_abs_s32(tp->t_stats, in cc_ack_received()
332 ((gput - tp->t_stats_gput_prev) * 100) / in cc_ack_received()
333 tp->t_stats_gput_prev); in cc_ack_received()
334 tp->t_flags &= ~TF_GPUTINPROG; in cc_ack_received()
335 tp->t_stats_gput_prev = gput; in cc_ack_received()
338 if (tp->snd_cwnd > tp->snd_ssthresh) { in cc_ack_received()
339 tp->t_bytes_acked += tp->t_ccv.bytes_this_ack; in cc_ack_received()
340 if (tp->t_bytes_acked >= tp->snd_cwnd) { in cc_ack_received()
341 tp->t_bytes_acked -= tp->snd_cwnd; in cc_ack_received()
342 tp->t_ccv.flags |= CCF_ABC_SENTAWND; in cc_ack_received()
345 tp->t_ccv.flags &= ~CCF_ABC_SENTAWND; in cc_ack_received()
346 tp->t_bytes_acked = 0; in cc_ack_received()
350 if (CC_ALGO(tp)->ack_received != NULL) { in cc_ack_received()
352 tp->t_ccv.curack = th->th_ack; in cc_ack_received()
353 CC_ALGO(tp)->ack_received(&tp->t_ccv, type); in cc_ack_received()
356 stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd); in cc_ack_received()
370 tcp_hc_get(&inp->inp_inc, &metrics); in cc_conn_init()
373 if (tp->t_srtt == 0 && (rtt = metrics.hc_rtt)) { in cc_conn_init()
374 tp->t_srtt = rtt; in cc_conn_init()
377 tp->t_rttvar = metrics.hc_rttvar; in cc_conn_init()
380 /* default variation is +- 1 rtt */ in cc_conn_init()
381 tp->t_rttvar = in cc_conn_init()
382 tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; in cc_conn_init()
384 TCPT_RANGESET(tp->t_rxtcur, in cc_conn_init()
385 ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, in cc_conn_init()
386 tp->t_rttmin, tcp_rexmit_max); in cc_conn_init()
395 tp->snd_ssthresh = max(2 * maxseg, metrics.hc_ssthresh); in cc_conn_init()
400 * Set the initial slow-start flight size. in cc_conn_init()
402 * If a SYN or SYN/ACK was lost and retransmitted, we have to in cc_conn_init()
406 if (tp->snd_cwnd == 1) in cc_conn_init()
407 tp->snd_cwnd = maxseg; /* SYN(-ACK) lost */ in cc_conn_init()
409 tp->snd_cwnd = tcp_compute_initwnd(maxseg); in cc_conn_init()
411 if (CC_ALGO(tp)->conn_init != NULL) in cc_conn_init()
412 CC_ALGO(tp)->conn_init(&tp->t_ccv); in cc_conn_init()
421 stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_CSIG, type); in cc_cong_signal()
426 if (!IN_FASTRECOVERY(tp->t_flags)) { in cc_cong_signal()
427 tp->snd_recover = tp->snd_max; in cc_cong_signal()
428 if (tp->t_flags2 & TF2_ECN_PERMIT) in cc_cong_signal()
429 tp->t_flags2 |= TF2_ECN_SND_CWR; in cc_cong_signal()
433 if (!IN_CONGRECOVERY(tp->t_flags) || in cc_cong_signal()
435 * Allow ECN reaction on ACK to CWR, if in cc_cong_signal()
438 SEQ_GEQ(th->th_ack, tp->snd_recover)) { in cc_cong_signal()
439 EXIT_CONGRECOVERY(tp->t_flags); in cc_cong_signal()
441 tp->snd_recover = tp->snd_max + 1; in cc_cong_signal()
442 if (tp->t_flags2 & TF2_ECN_PERMIT) in cc_cong_signal()
443 tp->t_flags2 |= TF2_ECN_SND_CWR; in cc_cong_signal()
447 tp->t_dupacks = 0; in cc_cong_signal()
448 tp->t_bytes_acked = 0; in cc_cong_signal()
449 EXIT_RECOVERY(tp->t_flags); in cc_cong_signal()
450 if (tp->t_flags2 & TF2_ECN_PERMIT) in cc_cong_signal()
451 tp->t_flags2 |= TF2_ECN_SND_CWR; in cc_cong_signal()
456 tp->snd_cwnd = tp->snd_cwnd_prev; in cc_cong_signal()
457 tp->snd_ssthresh = tp->snd_ssthresh_prev; in cc_cong_signal()
458 tp->snd_recover = tp->snd_recover_prev; in cc_cong_signal()
459 if (tp->t_flags & TF_WASFRECOVERY) in cc_cong_signal()
460 ENTER_FASTRECOVERY(tp->t_flags); in cc_cong_signal()
461 if (tp->t_flags & TF_WASCRECOVERY) in cc_cong_signal()
462 ENTER_CONGRECOVERY(tp->t_flags); in cc_cong_signal()
463 tp->snd_nxt = tp->snd_max; in cc_cong_signal()
464 tp->t_flags &= ~TF_PREVVALID; in cc_cong_signal()
465 tp->t_rxtshift = 0; in cc_cong_signal()
466 tp->t_badrxtwin = 0; in cc_cong_signal()
469 if (SEQ_LT(tp->snd_fack, tp->snd_una) || in cc_cong_signal()
470 SEQ_GT(tp->snd_fack, tp->snd_max)) { in cc_cong_signal()
471 tp->snd_fack = tp->snd_una; in cc_cong_signal()
474 if (CC_ALGO(tp)->cong_signal != NULL) { in cc_cong_signal()
476 tp->t_ccv.curack = th->th_ack; in cc_cong_signal()
477 CC_ALGO(tp)->cong_signal(&tp->t_ccv, type); in cc_cong_signal()
486 if (CC_ALGO(tp)->post_recovery != NULL) { in cc_post_recovery()
487 if (SEQ_LT(tp->snd_fack, th->th_ack) || in cc_post_recovery()
488 SEQ_GT(tp->snd_fack, tp->snd_max)) { in cc_post_recovery()
489 tp->snd_fack = th->th_ack; in cc_post_recovery()
491 tp->t_ccv.curack = th->th_ack; in cc_post_recovery()
492 CC_ALGO(tp)->post_recovery(&tp->t_ccv); in cc_post_recovery()
494 EXIT_RECOVERY(tp->t_flags); in cc_post_recovery()
496 tp->t_bytes_acked = 0; in cc_post_recovery()
497 tp->sackhint.delivered_data = 0; in cc_post_recovery()
498 tp->sackhint.prr_delivered = 0; in cc_post_recovery()
499 tp->sackhint.prr_out = 0; in cc_post_recovery()
503 * Indicate whether this ack should be delayed. We can delay the ack if
505 * - There is no delayed ack timer in progress.
506 * - Our last ack wasn't a 0-sized window. We never want to delay
507 * the ack that opens up a 0-sized window.
508 * - LRO wasn't used for this segment. We make sure by checking that the
513 (tp->t_flags & TF_RXWIN0SENT) == 0) && \
514 (tlen <= tp->t_maxseg) && \
515 (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
522 if (CC_ALGO(tp)->ecnpkt_handler != NULL) { in cc_ecnpkt_handler_flags()
525 tp->t_ccv.flags |= CCF_IPHDR_CE; in cc_ecnpkt_handler_flags()
532 tp->t_ccv.flags &= ~CCF_IPHDR_CE; in cc_ecnpkt_handler_flags()
537 tp->t_ccv.flags |= CCF_TCPHDR_CWR; in cc_ecnpkt_handler_flags()
539 tp->t_ccv.flags &= ~CCF_TCPHDR_CWR; in cc_ecnpkt_handler_flags()
541 CC_ALGO(tp)->ecnpkt_handler(&tp->t_ccv); in cc_ecnpkt_handler_flags()
543 if (tp->t_ccv.flags & CCF_ACKNOW) { in cc_ecnpkt_handler_flags()
545 tp->t_flags |= TF_ACKNOW; in cc_ecnpkt_handler_flags()
558 * tcp6_input is a thin wrapper around tcp_input for the extended
562 * tcp_do_segment processes the ACK and text of the segment for
572 if (m->m_len < *offp + sizeof(struct tcphdr)) { in tcp6_input_with_port()
612 int rstreason = 0; /* For badport_bandlim accounting purposes */ in tcp_input_with_port()
628 isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0; in tcp_input_with_port()
637 m->m_pkthdr.tcp_tun_port = port; in tcp_input_with_port()
642 tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0; in tcp_input_with_port()
645 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) { in tcp_input_with_port()
646 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) in tcp_input_with_port()
647 th->th_sum = m->m_pkthdr.csum_data; in tcp_input_with_port()
649 th->th_sum = in6_cksum_pseudo(ip6, tlen, in tcp_input_with_port()
650 IPPROTO_TCP, m->m_pkthdr.csum_data); in tcp_input_with_port()
651 th->th_sum ^= 0xffff; in tcp_input_with_port()
653 th->th_sum = in6_cksum(m, IPPROTO_TCP, off0, tlen); in tcp_input_with_port()
654 if (th->th_sum) { in tcp_input_with_port()
661 * As we use all-zero to indicate unbounded/unconnected pcb, in tcp_input_with_port()
667 KASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst), in tcp_input_with_port()
669 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { in tcp_input_with_port()
689 if (m->m_len < sizeof (struct tcpiphdr)) { in tcp_input_with_port()
698 tlen = ntohs(ip->ip_len) - off0; in tcp_input_with_port()
700 iptos = ip->ip_tos; in tcp_input_with_port()
703 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { in tcp_input_with_port()
704 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) in tcp_input_with_port()
705 th->th_sum = m->m_pkthdr.csum_data; in tcp_input_with_port()
707 th->th_sum = in_pseudo(ip->ip_src.s_addr, in tcp_input_with_port()
708 ip->ip_dst.s_addr, in tcp_input_with_port()
709 htonl(m->m_pkthdr.csum_data + tlen + in tcp_input_with_port()
711 th->th_sum ^= 0xffff; in tcp_input_with_port()
719 ipttl = ip->ip_ttl; in tcp_input_with_port()
720 bzero(ipov->ih_x1, sizeof(ipov->ih_x1)); in tcp_input_with_port()
721 ipov->ih_len = htons(tlen); in tcp_input_with_port()
722 th->th_sum = in_cksum(m, len); in tcp_input_with_port()
723 /* Reset length for SDT probes. */ in tcp_input_with_port()
724 ip->ip_len = htons(len); in tcp_input_with_port()
726 ip->ip_tos = iptos; in tcp_input_with_port()
727 /* Re-initialization for later version check */ in tcp_input_with_port()
728 ip->ip_ttl = ipttl; in tcp_input_with_port()
729 ip->ip_v = IPVERSION; in tcp_input_with_port()
730 ip->ip_hl = off0 >> 2; in tcp_input_with_port()
733 if (th->th_sum && (port == 0)) { in tcp_input_with_port()
737 KASSERT(ip->ip_dst.s_addr != INADDR_ANY, in tcp_input_with_port()
739 if (__predict_false(ip->ip_src.s_addr == INADDR_ANY)) { in tcp_input_with_port()
750 off = th->th_off << 2; in tcp_input_with_port()
755 tlen -= off; /* tlen is used instead of ti->ti_len */ in tcp_input_with_port()
759 if (m->m_len < off0 + off) { in tcp_input_with_port()
775 if (m->m_len < sizeof(struct ip) + off) { in tcp_input_with_port()
786 optlen = off - sizeof (struct tcphdr); in tcp_input_with_port()
806 (isipv6 && (m->m_flags & M_IP6_NEXTHOP)) in tcp_input_with_port()
808 || (!isipv6 && (m->m_flags & M_IP_NEXTHOP)) in tcp_input_with_port()
812 (m->m_flags & M_IP_NEXTHOP) in tcp_input_with_port()
818 * For initial SYN packets we don't need write lock on matching in tcp_input_with_port()
838 &ip6->ip6_src, th->th_sport, &ip6->ip6_dst, th->th_dport, in tcp_input_with_port()
839 lookupflag & ~INPLOOKUP_WILDCARD, m->m_pkthdr.rcvif, m); in tcp_input_with_port()
844 * any hardware-generated hash is ignored. in tcp_input_with_port()
846 inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_src, in tcp_input_with_port()
847 th->th_sport, &next_hop6->sin6_addr, in tcp_input_with_port()
848 next_hop6->sin6_port ? ntohs(next_hop6->sin6_port) : in tcp_input_with_port()
849 th->th_dport, lookupflag, m->m_pkthdr.rcvif); in tcp_input_with_port()
852 inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src, in tcp_input_with_port()
853 th->th_sport, &ip6->ip6_dst, th->th_dport, lookupflag, in tcp_input_with_port()
854 m->m_pkthdr.rcvif, m); in tcp_input_with_port()
869 inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, th->th_sport, in tcp_input_with_port()
870 ip->ip_dst, th->th_dport, lookupflag & ~INPLOOKUP_WILDCARD, in tcp_input_with_port()
871 m->m_pkthdr.rcvif, m); in tcp_input_with_port()
876 * any hardware-generated hash is ignored. in tcp_input_with_port()
878 inp = in_pcblookup(&V_tcbinfo, ip->ip_src, in tcp_input_with_port()
879 th->th_sport, next_hop->sin_addr, in tcp_input_with_port()
880 next_hop->sin_port ? ntohs(next_hop->sin_port) : in tcp_input_with_port()
881 th->th_dport, lookupflag, m->m_pkthdr.rcvif); in tcp_input_with_port()
884 inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, in tcp_input_with_port()
885 th->th_sport, ip->ip_dst, th->th_dport, lookupflag, in tcp_input_with_port()
886 m->m_pkthdr.rcvif, m); in tcp_input_with_port()
915 if ((inp->inp_flowtype == M_HASHTYPE_NONE) && in tcp_input_with_port()
916 !SOLISTENING(inp->inp_socket)) { in tcp_input_with_port()
918 inp->inp_flowid = m->m_pkthdr.flowid; in tcp_input_with_port()
919 inp->inp_flowtype = M_HASHTYPE_GET(m); in tcp_input_with_port()
925 rss_proto_software_hash_v6(&inp->in6p_faddr, in tcp_input_with_port()
926 &inp->in6p_laddr, in tcp_input_with_port()
927 inp->inp_fport, in tcp_input_with_port()
928 inp->inp_lport, in tcp_input_with_port()
930 &inp->inp_flowid, in tcp_input_with_port()
931 &inp->inp_flowtype); in tcp_input_with_port()
935 rss_proto_software_hash_v4(inp->inp_faddr, in tcp_input_with_port()
936 inp->inp_laddr, in tcp_input_with_port()
937 inp->inp_fport, in tcp_input_with_port()
938 inp->inp_lport, in tcp_input_with_port()
940 &inp->inp_flowid, in tcp_input_with_port()
941 &inp->inp_flowtype); in tcp_input_with_port()
965 * Check the minimum TTL for socket. in tcp_input_with_port()
967 if (inp->inp_ip_minttl != 0) { in tcp_input_with_port()
970 if (inp->inp_ip_minttl > ip6->ip6_hlim) in tcp_input_with_port()
974 if (inp->inp_ip_minttl > ip->ip_ttl) in tcp_input_with_port()
979 switch (tp->t_state) { in tcp_input_with_port()
990 * tcp_twcheck unlocks the inp always, and frees the m if fails. in tcp_input_with_port()
1005 if ((tp->t_port != port) && (tp->t_state > TCPS_LISTEN)) { in tcp_input_with_port()
1011 if (tp->t_flags & TF_TOE) { in tcp_input_with_port()
1022 so = inp->inp_socket; in tcp_input_with_port()
1029 KASSERT(tp->t_state == TCPS_LISTEN || !SOLISTENING(so), in tcp_input_with_port()
1031 if (tp->t_state == TCPS_LISTEN && SOLISTENING(so)) { in tcp_input_with_port()
1038 if (inp->inp_inc.inc_flags & INC_IPV6MINMTU) in tcp_input_with_port()
1040 inc.inc6_faddr = ip6->ip6_src; in tcp_input_with_port()
1041 inc.inc6_laddr = ip6->ip6_dst; in tcp_input_with_port()
1045 inc.inc_faddr = ip->ip_src; in tcp_input_with_port()
1046 inc.inc_laddr = ip->ip_dst; in tcp_input_with_port()
1048 inc.inc_fport = th->th_sport; in tcp_input_with_port()
1049 inc.inc_lport = th->th_dport; in tcp_input_with_port()
1050 inc.inc_fibnum = so->so_fibnum; in tcp_input_with_port()
1053 * Check for an existing connection attempt in syncache if in tcp_input_with_port()
1054 * the flag is only ACK. A successful lookup creates a new in tcp_input_with_port()
1078 * No syncache entry, or ACK was not for our in tcp_input_with_port()
1079 * SYN/ACK. Do our protection against double in tcp_input_with_port()
1080 * ACK. If peer sent us 2 ACKs, then for the in tcp_input_with_port()
1084 * don't want to sent RST for the second ACK, in tcp_input_with_port()
1087 * the ACK is stray indeed, rstreason would in tcp_input_with_port()
1102 * We completed the 3-way handshake in tcp_input_with_port()
1107 * or wait and have the remote end in tcp_input_with_port()
1108 * retransmit the ACK for another in tcp_input_with_port()
1129 * then listening socket is read-locked. in tcp_input_with_port()
1139 KASSERT(tp->t_state == TCPS_SYN_RECEIVED, in tcp_input_with_port()
1147 tp->t_fb->tfb_tcp_do_segment(tp, m, th, drop_hdrlen, in tcp_input_with_port()
1152 * Segment flag validation for new connection attempts: in tcp_input_with_port()
1154 * Our (SYN|ACK) response was rejected. in tcp_input_with_port()
1177 * (SYN|ACK) is bogus on a listen socket. in tcp_input_with_port()
1182 "SYN|ACK invalid, segment rejected\n", in tcp_input_with_port()
1228 * it). We compromise it as it is much better for peer in tcp_input_with_port()
1230 * for the exchange. in tcp_input_with_port()
1237 * communication is okay - "SHOULD continue to be in tcp_input_with_port()
1244 * our source address selection - we must obey the peer. in tcp_input_with_port()
1247 * multiple description text for deprecated address in tcp_input_with_port()
1248 * handling - worse, they are not exactly the same. in tcp_input_with_port()
1254 ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */, false); in tcp_input_with_port()
1256 (ia6->ia6_flags & IN6_IFF_DEPRECATED)) { in tcp_input_with_port()
1273 * global or subnet broad- or multicast address. in tcp_input_with_port()
1275 * link-layer packets with a broadcast IP address. Use in tcp_input_with_port()
1278 if (m->m_flags & (M_BCAST|M_MCAST)) { in tcp_input_with_port()
1281 "Connection attempt from broad- or multicast " in tcp_input_with_port()
1287 if (th->th_dport == th->th_sport && in tcp_input_with_port()
1288 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) { in tcp_input_with_port()
1295 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || in tcp_input_with_port()
1296 IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { in tcp_input_with_port()
1310 if (th->th_dport == th->th_sport && in tcp_input_with_port()
1311 ip->ip_dst.s_addr == ip->ip_src.s_addr) { in tcp_input_with_port()
1318 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || in tcp_input_with_port()
1319 IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || in tcp_input_with_port()
1320 ip->ip_src.s_addr == htonl(INADDR_BROADCAST) || in tcp_input_with_port()
1321 in_ifnet_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { in tcp_input_with_port()
1324 "Connection attempt from/to broad- " in tcp_input_with_port()
1333 * for syncache. in tcp_input_with_port()
1348 if (tp->t_flags & TF_SIGNATURE) { in tcp_input_with_port()
1363 * state. tcp_do_segment() always consumes the mbuf chain, unlocks in tcp_input_with_port()
1368 * the segment silently, or send a challenge ACK. However, we try in tcp_input_with_port()
1369 * to upgrade the lock, because calling convention for stacks is in tcp_input_with_port()
1370 * write-lock on PCB. If upgrade fails, drop the SYN. in tcp_input_with_port()
1375 tp->t_fb->tfb_tcp_do_segment(tp, m, th, drop_hdrlen, tlen, iptos); in tcp_input_with_port()
1388 isipv6 ? !in6_localip(&ip6->ip6_src) : in tcp_input_with_port()
1391 !in_localip(ip->ip_src) in tcp_input_with_port()
1421 * bandwidth and high delay (eg. trans-continental/oceanic links).
1425 * more aggressive in scaling the receive socket buffer. For
1443 * of slow-start but also makes it so our peer never gets limited
1460 if (V_tcp_do_autorcvbuf && (so->so_rcv.sb_flags & SB_AUTOSIZE) && in tcp_autorcvbuf()
1461 tp->t_srtt != 0 && tp->rfbuf_ts != 0 && in tcp_autorcvbuf()
1462 TCP_TS_TO_TICKS(tcp_ts_getticks() - tp->rfbuf_ts) > in tcp_autorcvbuf()
1463 ((tp->t_srtt >> TCP_RTT_SHIFT)/2)) { in tcp_autorcvbuf()
1464 if (tp->rfbuf_cnt > ((so->so_rcv.sb_hiwat / 2)/ 4 * 3) && in tcp_autorcvbuf()
1465 so->so_rcv.sb_hiwat < V_tcp_autorcvbuf_max) { in tcp_autorcvbuf()
1466 newsize = min((so->so_rcv.sb_hiwat + (so->so_rcv.sb_hiwat/2)), V_tcp_autorcvbuf_max); in tcp_autorcvbuf()
1471 tp->rfbuf_ts = 0; in tcp_autorcvbuf()
1472 tp->rfbuf_cnt = 0; in tcp_autorcvbuf()
1474 tp->rfbuf_cnt += tlen; /* add up */ in tcp_autorcvbuf()
1491 if (tp->t_flags & TF_WAKESOR) { in tcp_handle_wakeup()
1494 tp->t_flags &= ~TF_WAKESOR; in tcp_handle_wakeup()
1513 struct in_conninfo *inc = &inp->inp_inc; in tcp_do_segment()
1520 tp->sackhint.last_sack_ack = 0; in tcp_do_segment()
1522 nsegs = max(1, m->m_pkthdr.lro_nsegs); in tcp_do_segment()
1526 KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", in tcp_do_segment()
1528 KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT", in tcp_do_segment()
1531 TCP_LOG_EVENT(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_IN, 0, in tcp_do_segment()
1545 * If a segment with the ACK-bit set arrives in the SYN-SENT state in tcp_do_segment()
1546 * check SEQ.ACK first. in tcp_do_segment()
1548 if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) && in tcp_do_segment()
1549 (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { in tcp_do_segment()
1557 * Reset idle time and keep-alive timer. in tcp_do_segment()
1561 if (tp->t_idle_reduce && in tcp_do_segment()
1562 (tp->snd_max == tp->snd_una) && in tcp_do_segment()
1563 ((ticks - tp->t_rcvtime) >= tp->t_rxtcur)) in tcp_do_segment()
1565 tp->t_rcvtime = ticks; in tcp_do_segment()
1570 * Scale up the window into a 32-bit value. in tcp_do_segment()
1571 * For the SYN_SENT state the scale is zero. in tcp_do_segment()
1573 tiwin = th->th_win << tp->snd_scale; in tcp_do_segment()
1575 stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin); in tcp_do_segment()
1582 tcp_packets_this_ack(tp, th->th_ack), in tcp_do_segment()
1590 (th->th_off << 2) - sizeof(struct tcphdr), in tcp_do_segment()
1592 if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) { in tcp_do_segment()
1601 if ((tp->t_flags & TF_SIGNATURE) != 0 && in tcp_do_segment()
1614 to.to_tsecr -= tp->ts_offset; in tcp_do_segment()
1620 * Process options only when we get SYN/ACK back. The SYN case in tcp_do_segment()
1621 * for incoming connections is handled in tcp_syncache. in tcp_do_segment()
1623 * or <SYN,ACK>) segment itself is never scaled. in tcp_do_segment()
1626 if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) { in tcp_do_segment()
1627 /* Handle parallel SYN for ECN */ in tcp_do_segment()
1630 (tp->t_flags & TF_REQ_SCALE) && in tcp_do_segment()
1631 !(tp->t_flags & TF_NOOPT)) { in tcp_do_segment()
1632 tp->t_flags |= TF_RCVD_SCALE; in tcp_do_segment()
1633 tp->snd_scale = to.to_wscale; in tcp_do_segment()
1635 tp->t_flags &= ~TF_REQ_SCALE; in tcp_do_segment()
1641 tp->snd_wnd = th->th_win; in tcp_do_segment()
1643 (tp->t_flags & TF_REQ_TSTMP) && in tcp_do_segment()
1644 !(tp->t_flags & TF_NOOPT)) { in tcp_do_segment()
1645 tp->t_flags |= TF_RCVD_TSTMP; in tcp_do_segment()
1646 tp->ts_recent = to.to_tsval; in tcp_do_segment()
1647 tp->ts_recent_age = tcp_ts_getticks(); in tcp_do_segment()
1649 tp->t_flags &= ~TF_REQ_TSTMP; in tcp_do_segment()
1654 if ((tp->t_flags & TF_SACK_PERMIT) && in tcp_do_segment()
1656 (tp->t_flags & TF_NOOPT))) { in tcp_do_segment()
1657 tp->t_flags &= ~TF_SACK_PERMIT; in tcp_do_segment()
1659 if (tp->t_flags & TF_FASTOPEN) { in tcp_do_segment()
1661 !(tp->t_flags & TF_NOOPT)) { in tcp_do_segment()
1667 if ((inp->inp_vflag & INP_IPV6) != 0) { in tcp_do_segment()
1682 * If timestamps were negotiated during SYN/ACK and a in tcp_do_segment()
1688 if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) { in tcp_do_segment()
1706 * If timestamps were not negotiated during SYN/ACK and a in tcp_do_segment()
1711 if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) { in tcp_do_segment()
1720 * Header prediction: check for the two common cases in tcp_do_segment()
1721 * of a uni-directional data xfer. If the packet has in tcp_do_segment()
1722 * no control flags, is in-sequence, the window didn't in tcp_do_segment()
1724 * candidate. If the length is zero and the ack moved in tcp_do_segment()
1727 * that was blocked waiting for space. If the length in tcp_do_segment()
1728 * is non-zero and the ack didn't move, we're the in tcp_do_segment()
1729 * receiver side. If we're getting packets in-order in tcp_do_segment()
1731 * the socket buffer and note that we need a delayed ack. in tcp_do_segment()
1732 * Make sure that the hidden state-flags are also off. in tcp_do_segment()
1733 * Since we check for TCPS_ESTABLISHED first, it can only in tcp_do_segment()
1736 if (tp->t_state == TCPS_ESTABLISHED && in tcp_do_segment()
1737 th->th_seq == tp->rcv_nxt && in tcp_do_segment()
1739 tp->snd_nxt == tp->snd_max && in tcp_do_segment()
1740 tiwin && tiwin == tp->snd_wnd && in tcp_do_segment()
1741 ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) && in tcp_do_segment()
1744 TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) { in tcp_do_segment()
1746 * If last ACK falls within this segment's sequence numbers, in tcp_do_segment()
1752 SEQ_LEQ(th->th_seq, tp->last_ack_sent)) { in tcp_do_segment()
1753 tp->ts_recent_age = tcp_ts_getticks(); in tcp_do_segment()
1754 tp->ts_recent = to.to_tsval; in tcp_do_segment()
1758 if (SEQ_GT(th->th_ack, tp->snd_una) && in tcp_do_segment()
1759 SEQ_LEQ(th->th_ack, tp->snd_max) && in tcp_do_segment()
1760 !IN_RECOVERY(tp->t_flags) && in tcp_do_segment()
1762 TAILQ_EMPTY(&tp->snd_holes)) { in tcp_do_segment()
1764 * This is a pure ack for outstanding data. in tcp_do_segment()
1771 if (tp->t_rxtshift == 1 && in tcp_do_segment()
1772 tp->t_flags & TF_PREVVALID && in tcp_do_segment()
1773 tp->t_badrxtwin != 0 && in tcp_do_segment()
1776 TSTMP_LT(to.to_tsecr, tp->t_badrxtwin)) || in tcp_do_segment()
1778 TSTMP_LT(ticks, tp->t_badrxtwin)))) in tcp_do_segment()
1785 * during the SYN+ACK phase, ignore in tcp_do_segment()
1793 t = tcp_ts_getticks() - to.to_tsecr; in tcp_do_segment()
1794 if (!tp->t_rttlow || tp->t_rttlow > t) in tcp_do_segment()
1795 tp->t_rttlow = t; in tcp_do_segment()
1798 } else if (tp->t_rtttime && in tcp_do_segment()
1799 SEQ_GT(th->th_ack, tp->t_rtseq)) { in tcp_do_segment()
1800 if (!tp->t_rttlow || in tcp_do_segment()
1801 tp->t_rttlow > ticks - tp->t_rtttime) in tcp_do_segment()
1802 tp->t_rttlow = ticks - tp->t_rtttime; in tcp_do_segment()
1804 ticks - tp->t_rtttime); in tcp_do_segment()
1815 sbdrop(&so->so_snd, acked); in tcp_do_segment()
1816 if (SEQ_GT(tp->snd_una, tp->snd_recover) && in tcp_do_segment()
1817 SEQ_LEQ(th->th_ack, tp->snd_recover)) in tcp_do_segment()
1818 tp->snd_recover = th->th_ack - 1; in tcp_do_segment()
1828 tp->snd_una = th->th_ack; in tcp_do_segment()
1833 tp->snd_wl2 = th->th_ack; in tcp_do_segment()
1834 tp->t_dupacks = 0; in tcp_do_segment()
1840 * using current (possibly backed-off) value. in tcp_do_segment()
1841 * If process is waiting for space, in tcp_do_segment()
1851 * a sufficiently large ACK. in tcp_do_segment()
1853 if (sbavail(&so->so_snd) == 0) in tcp_do_segment()
1854 tp->t_acktime = 0; in tcp_do_segment()
1856 tp->t_acktime = ticks; in tcp_do_segment()
1857 if (tp->snd_una == tp->snd_max) in tcp_do_segment()
1866 * or we need to send an ACK. in tcp_do_segment()
1868 if ((tp->t_flags & TF_ACKNOW) || in tcp_do_segment()
1869 (sbavail(&so->so_snd) >= in tcp_do_segment()
1870 SEQ_SUB(tp->snd_max, tp->snd_una))) { in tcp_do_segment()
1875 } else if (th->th_ack == tp->snd_una && in tcp_do_segment()
1876 tlen <= sbspace(&so->so_rcv)) { in tcp_do_segment()
1880 * This is a pure, in-sequence data packet with in tcp_do_segment()
1885 if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks) in tcp_do_segment()
1888 tp->rcv_nxt += tlen; in tcp_do_segment()
1890 ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) && in tcp_do_segment()
1891 (tp->t_fbyte_in == 0)) { in tcp_do_segment()
1892 tp->t_fbyte_in = ticks; in tcp_do_segment()
1893 if (tp->t_fbyte_in == 0) in tcp_do_segment()
1894 tp->t_fbyte_in = 1; in tcp_do_segment()
1895 if (tp->t_fbyte_out && tp->t_fbyte_in) in tcp_do_segment()
1896 tp->t_flags2 |= TF2_FBYTES_COMPLETE; in tcp_do_segment()
1902 tp->snd_wl1 = th->th_seq; in tcp_do_segment()
1907 tp->rcv_up = tp->rcv_nxt; in tcp_do_segment()
1916 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { in tcp_do_segment()
1926 so->so_rcv.sb_flags &= ~SB_AUTOSIZE; in tcp_do_segment()
1928 sbappendstream_locked(&so->so_rcv, m, 0); in tcp_do_segment()
1933 tp->t_flags |= TF_DELACK; in tcp_do_segment()
1935 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
1948 win = sbspace(&so->so_rcv); in tcp_do_segment()
1951 tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); in tcp_do_segment()
1953 switch (tp->t_state) { in tcp_do_segment()
1956 * if seg contains an ACK, but not for our SYN/ACK, send a RST. in tcp_do_segment()
1964 (SEQ_LEQ(th->th_ack, tp->snd_una) || in tcp_do_segment()
1965 SEQ_GT(th->th_ack, tp->snd_max))) { in tcp_do_segment()
1970 if (tp->t_flags & TF_FASTOPEN) { in tcp_do_segment()
1975 * a subset of the original data), a valid ACK, a in tcp_do_segment()
1983 /* non-initial SYN is ignored */ in tcp_do_segment()
1995 * if seg contains a RST with valid ACK (SEQ.ACK has already in tcp_do_segment()
1997 * if seg contains a RST without an ACK, drop the seg. in tcp_do_segment()
2000 * initialize tp->rcv_nxt and tp->irs in tcp_do_segment()
2001 * if seg contains ack then advance tp->snd_una in tcp_do_segment()
2005 * arrange for segment to be acked (eventually) in tcp_do_segment()
2020 tp->irs = th->th_seq; in tcp_do_segment()
2031 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == in tcp_do_segment()
2033 tp->rcv_scale = tp->request_r_scale; in tcp_do_segment()
2035 tp->rcv_adv += min(tp->rcv_wnd, in tcp_do_segment()
2036 TCP_MAXWIN << tp->rcv_scale); in tcp_do_segment()
2037 tp->snd_una++; /* SYN is acked */ in tcp_do_segment()
2038 if (SEQ_LT(tp->snd_nxt, tp->snd_una)) in tcp_do_segment()
2039 tp->snd_nxt = tp->snd_una; in tcp_do_segment()
2044 if ((tp->t_flags & TF_FASTOPEN) && in tcp_do_segment()
2045 (tp->snd_una != tp->snd_max)) { in tcp_do_segment()
2046 tp->snd_nxt = th->th_ack; in tcp_do_segment()
2050 * If there's data, delay ACK; if there's also a FIN in tcp_do_segment()
2057 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
2062 * Received <SYN,ACK> in SYN_SENT[*] state. in tcp_do_segment()
2064 * SYN_SENT --> ESTABLISHED in tcp_do_segment()
2065 * SYN_SENT* --> FIN_WAIT_1 in tcp_do_segment()
2067 tp->t_starttime = ticks; in tcp_do_segment()
2068 if (tp->t_flags & TF_NEEDFIN) { in tcp_do_segment()
2069 tp->t_acktime = ticks; in tcp_do_segment()
2071 tp->t_flags &= ~TF_NEEDFIN; in tcp_do_segment()
2083 * Received initial SYN in SYN-SENT[*] state => in tcp_do_segment()
2085 * If it succeeds, connection is * half-synchronized. in tcp_do_segment()
2086 * Otherwise, do 3-way handshake: in tcp_do_segment()
2087 * SYN-SENT -> SYN-RECEIVED in tcp_do_segment()
2088 * SYN-SENT* -> SYN-RECEIVED* in tcp_do_segment()
2090 tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN | TF_SONOTCONN); in tcp_do_segment()
2096 * Advance th->th_seq to correspond to first data byte. in tcp_do_segment()
2100 th->th_seq++; in tcp_do_segment()
2101 if (tlen > tp->rcv_wnd) { in tcp_do_segment()
2102 todrop = tlen - tp->rcv_wnd; in tcp_do_segment()
2103 m_adj(m, -todrop); in tcp_do_segment()
2104 tlen = tp->rcv_wnd; in tcp_do_segment()
2109 tp->snd_wl1 = th->th_seq - 1; in tcp_do_segment()
2110 tp->rcv_up = th->th_seq; in tcp_do_segment()
2114 * our data will be ACK'd; if so, enter normal data segment in tcp_do_segment()
2115 * processing in the middle of step 5, ack processing. in tcp_do_segment()
2137 * drop leading data (and SYN); if nothing left, just ack. in tcp_do_segment()
2143 * - RST drops connection only if SEG.SEQ == RCV.NXT. in tcp_do_segment()
2144 * - If RST is in window, we send challenge ACK. in tcp_do_segment()
2151 if ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) && in tcp_do_segment()
2152 SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) || in tcp_do_segment()
2153 (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) { in tcp_do_segment()
2154 KASSERT(tp->t_state != TCPS_SYN_SENT, in tcp_do_segment()
2155 ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p", in tcp_do_segment()
2159 tp->last_ack_sent == th->th_seq) { in tcp_do_segment()
2162 switch (tp->t_state) { in tcp_do_segment()
2164 so->so_error = ECONNREFUSED; in tcp_do_segment()
2172 so->so_error = ECONNRESET; in tcp_do_segment()
2190 * Send challenge ACK for any SYN in synchronized state. in tcp_do_segment()
2192 if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT && in tcp_do_segment()
2193 tp->t_state != TCPS_SYN_RECEIVED) { in tcp_do_segment()
2196 SEQ_GEQ(th->th_seq, tp->last_ack_sent) && in tcp_do_segment()
2197 SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) { in tcp_do_segment()
2213 if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent && in tcp_do_segment()
2214 TSTMP_LT(to.to_tsval, tp->ts_recent)) { in tcp_do_segment()
2216 if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) { in tcp_do_segment()
2225 * because we don't want out-of-order segments to be in tcp_do_segment()
2228 tp->ts_recent = 0; in tcp_do_segment()
2240 * In the SYN-RECEIVED state, validate that the packet belongs to in tcp_do_segment()
2244 * for the "LAND" DoS attack. in tcp_do_segment()
2246 if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) { in tcp_do_segment()
2252 todrop = tp->rcv_nxt - th->th_seq; in tcp_do_segment()
2256 th->th_seq++; in tcp_do_segment()
2257 if (th->th_urp > 1) in tcp_do_segment()
2258 th->th_urp--; in tcp_do_segment()
2261 todrop--; in tcp_do_segment()
2276 * Send an ACK to resynchronize and drop any data. in tcp_do_segment()
2277 * But keep on processing for RST or ACK. in tcp_do_segment()
2279 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
2288 * DSACK - add SACK block for dropped range in tcp_do_segment()
2290 if ((todrop > 0) && (tp->t_flags & TF_SACK_PERMIT)) { in tcp_do_segment()
2291 tcp_update_sack_list(tp, th->th_seq, in tcp_do_segment()
2292 th->th_seq + todrop); in tcp_do_segment()
2294 * ACK now, as the next in-sequence segment in tcp_do_segment()
2297 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
2300 th->th_seq += todrop; in tcp_do_segment()
2301 tlen -= todrop; in tcp_do_segment()
2302 if (th->th_urp > todrop) in tcp_do_segment()
2303 th->th_urp -= todrop; in tcp_do_segment()
2306 th->th_urp = 0; in tcp_do_segment()
2315 if ((tp->t_flags & TF_CLOSED) && tlen > 0 && in tcp_do_segment()
2316 TCPS_HAVERCVDFIN(tp->t_state) == 0) { in tcp_do_segment()
2321 s, __func__, tcpstates[tp->t_state], tlen); in tcp_do_segment()
2325 /* tcp_close will kill the inp pre-log the Reset */ in tcp_do_segment()
2335 * (and PUSH and FIN); if nothing left, just ACK. in tcp_do_segment()
2337 todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd); in tcp_do_segment()
2346 * remember to ack. Otherwise, drop segment in tcp_do_segment()
2347 * and ack. in tcp_do_segment()
2349 if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) { in tcp_do_segment()
2350 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
2356 m_adj(m, -todrop); in tcp_do_segment()
2357 tlen -= todrop; in tcp_do_segment()
2362 * If last ACK falls within this segment's sequence numbers, in tcp_do_segment()
2371 * Last.ACK.Sent <= SEG.SEQ + SEG.Len in tcp_do_segment()
2373 * Last.ACK.Sent < SEG.SEQ + SEG.Len, in tcp_do_segment()
2377 * RTT correctly when RCV.NXT == Last.ACK.Sent. in tcp_do_segment()
2380 SEQ_LEQ(th->th_seq, tp->last_ack_sent) && in tcp_do_segment()
2381 SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen + in tcp_do_segment()
2383 tp->ts_recent_age = tcp_ts_getticks(); in tcp_do_segment()
2384 tp->ts_recent = to.to_tsval; in tcp_do_segment()
2388 * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN in tcp_do_segment()
2389 * flag is on (half-synchronized state), then queue data for in tcp_do_segment()
2393 if (tp->t_state == TCPS_SYN_RECEIVED || in tcp_do_segment()
2394 (tp->t_flags & TF_NEEDSYN)) { in tcp_do_segment()
2395 if (tp->t_state == TCPS_SYN_RECEIVED && in tcp_do_segment()
2396 (tp->t_flags & TF_FASTOPEN)) { in tcp_do_segment()
2397 tp->snd_wnd = tiwin; in tcp_do_segment()
2401 } else if (tp->t_flags & TF_ACKNOW) in tcp_do_segment()
2408 * Ack processing. in tcp_do_segment()
2410 if (SEQ_GEQ(tp->snd_una, tp->iss + (TCP_MAXWIN << tp->snd_scale))) { in tcp_do_segment()
2411 /* Checking SEG.ACK against ISS is definitely redundant. */ in tcp_do_segment()
2412 tp->t_flags2 |= TF2_NO_ISS_CHECK; in tcp_do_segment()
2418 if (tp->t_flags2 & TF2_NO_ISS_CHECK) { in tcp_do_segment()
2419 /* Check for too old ACKs (RFC 5961, Section 5.2). */ in tcp_do_segment()
2420 seq_min = tp->snd_una - tp->max_sndwnd; in tcp_do_segment()
2423 if (SEQ_GT(tp->iss + 1, tp->snd_una - tp->max_sndwnd)) { in tcp_do_segment()
2424 /* Checking for ghost ACKs is stricter. */ in tcp_do_segment()
2425 seq_min = tp->iss + 1; in tcp_do_segment()
2429 * Checking for too old ACKs (RFC 5961, in tcp_do_segment()
2432 seq_min = tp->snd_una - tp->max_sndwnd; in tcp_do_segment()
2436 if (SEQ_LT(th->th_ack, seq_min)) { in tcp_do_segment()
2446 switch (tp->t_state) { in tcp_do_segment()
2448 * In SYN_RECEIVED state, the ack ACKs our SYN, so enter in tcp_do_segment()
2450 * The ACK was checked above. in tcp_do_segment()
2455 if (tp->t_flags & TF_SONOTCONN) { in tcp_do_segment()
2461 * with TF_SONOTCONN. The other reason for this mark in tcp_do_segment()
2465 tp->t_flags &= ~TF_SONOTCONN; in tcp_do_segment()
2469 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == in tcp_do_segment()
2471 tp->rcv_scale = tp->request_r_scale; in tcp_do_segment()
2473 tp->snd_wnd = tiwin; in tcp_do_segment()
2476 * SYN-RECEIVED -> ESTABLISHED in tcp_do_segment()
2477 * SYN-RECEIVED* -> FIN-WAIT-1 in tcp_do_segment()
2479 tp->t_starttime = ticks; in tcp_do_segment()
2480 if ((tp->t_flags & TF_FASTOPEN) && tp->t_tfo_pending) { in tcp_do_segment()
2481 tcp_fastopen_decrement_counter(tp->t_tfo_pending); in tcp_do_segment()
2482 tp->t_tfo_pending = NULL; in tcp_do_segment()
2484 if (tp->t_flags & TF_NEEDFIN) { in tcp_do_segment()
2485 tp->t_acktime = ticks; in tcp_do_segment()
2487 tp->t_flags &= ~TF_NEEDFIN; in tcp_do_segment()
2494 * processing. Calling it again here for such in tcp_do_segment()
2496 * snd_cwnd reduction that occurs when a TFO SYN|ACK in tcp_do_segment()
2499 if (!(tp->t_flags & TF_FASTOPEN)) in tcp_do_segment()
2504 * Account for the ACK of our SYN prior to in tcp_do_segment()
2505 * regular ACK processing below, except for in tcp_do_segment()
2508 if (SEQ_GT(th->th_ack, tp->snd_una) && !(tp->t_flags & TF_NEEDSYN)) in tcp_do_segment()
2511 * If segment contains data or ACK, will call tcp_reass() in tcp_do_segment()
2519 tp->snd_wl1 = th->th_seq - 1; in tcp_do_segment()
2523 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range in tcp_do_segment()
2524 * ACKs. If the ack is in the range in tcp_do_segment()
2525 * tp->snd_una < th->th_ack <= tp->snd_max in tcp_do_segment()
2526 * then advance tp->snd_una to th->th_ack and drop in tcp_do_segment()
2527 * data from the retransmission queue. If this ACK reflects in tcp_do_segment()
2536 if (SEQ_GT(th->th_ack, tp->snd_max)) { in tcp_do_segment()
2541 sack_changed = tcp_sack_doack(tp, &to, th->th_ack); in tcp_do_segment()
2543 (tp->t_flags & TF_LRD)) { in tcp_do_segment()
2549 * from the last ack with SACK doesn't get used. in tcp_do_segment()
2551 tp->sackhint.sacked_bytes = 0; in tcp_do_segment()
2558 if (SEQ_LEQ(th->th_ack, tp->snd_una)) { in tcp_do_segment()
2561 (tiwin == tp->snd_wnd || in tcp_do_segment()
2562 (tp->t_flags & TF_SACK_PERMIT))) { in tcp_do_segment()
2571 (TCPS_HAVERCVDFIN(tp->t_state) == 0)) { in tcp_do_segment()
2572 tp->t_dupacks = 0; in tcp_do_segment()
2579 * duplicate ack (ie, window info didn't in tcp_do_segment()
2581 * the ack is the biggest we've in tcp_do_segment()
2613 if (th->th_ack != tp->snd_una || in tcp_do_segment()
2618 tp->t_dupacks = 0; in tcp_do_segment()
2619 } else if (++tp->t_dupacks > tcprexmtthresh || in tcp_do_segment()
2620 IN_FASTRECOVERY(tp->t_flags)) { in tcp_do_segment()
2624 IN_FASTRECOVERY(tp->t_flags) && in tcp_do_segment()
2625 (tp->t_flags & TF_SACK_PERMIT)) { in tcp_do_segment()
2629 IN_FASTRECOVERY(tp->t_flags) && in tcp_do_segment()
2630 (tp->snd_nxt == tp->snd_max)) { in tcp_do_segment()
2640 if (awnd < tp->snd_ssthresh) { in tcp_do_segment()
2641 tp->snd_cwnd += imax(maxseg, in tcp_do_segment()
2643 tp->sackhint.delivered_data)); in tcp_do_segment()
2644 if (tp->snd_cwnd > tp->snd_ssthresh) in tcp_do_segment()
2645 tp->snd_cwnd = tp->snd_ssthresh; in tcp_do_segment()
2648 IN_FASTRECOVERY(tp->t_flags) && in tcp_do_segment()
2649 SEQ_LT(tp->snd_nxt, tp->snd_max)) { in tcp_do_segment()
2650 tp->snd_cwnd += imax(maxseg, in tcp_do_segment()
2652 tp->sackhint.delivered_data)); in tcp_do_segment()
2654 tp->snd_cwnd += maxseg; in tcp_do_segment()
2658 } else if (tp->t_dupacks == tcprexmtthresh || in tcp_do_segment()
2659 (tp->t_flags & TF_SACK_PERMIT && in tcp_do_segment()
2661 tp->sackhint.sacked_bytes > in tcp_do_segment()
2662 (tcprexmtthresh - 1) * maxseg)) { in tcp_do_segment()
2666 * more than (dupthresh-1)*maxseg sacked data. in tcp_do_segment()
2672 tp->t_dupacks = tcprexmtthresh; in tcp_do_segment()
2673 tcp_seq onxt = tp->snd_nxt; in tcp_do_segment()
2683 if (IN_FASTRECOVERY(tp->t_flags)) { in tcp_do_segment()
2684 tp->t_dupacks = 0; in tcp_do_segment()
2688 if (SEQ_LEQ(th->th_ack, in tcp_do_segment()
2689 tp->snd_recover)) { in tcp_do_segment()
2690 tp->t_dupacks = 0; in tcp_do_segment()
2694 /* Congestion signal before ack. */ in tcp_do_segment()
2699 tp->t_rtttime = 0; in tcp_do_segment()
2710 tp->sackhint.prr_delivered = in tcp_do_segment()
2711 imin(tp->snd_max - th->th_ack, in tcp_do_segment()
2712 (tp->snd_limited + 1) * maxseg); in tcp_do_segment()
2714 tp->sackhint.prr_delivered = in tcp_do_segment()
2717 tp->sackhint.recover_fs = max(1, in tcp_do_segment()
2718 tp->snd_nxt - tp->snd_una); in tcp_do_segment()
2720 tp->snd_limited = 0; in tcp_do_segment()
2730 tp->snd_nxt = tp->snd_max; in tcp_do_segment()
2731 tp->snd_cwnd = tcp_compute_pipe(tp) + in tcp_do_segment()
2735 tp->snd_cwnd = tp->snd_ssthresh; in tcp_do_segment()
2736 if (SEQ_GT(th->th_ack, tp->snd_una)) { in tcp_do_segment()
2741 tp->snd_nxt = th->th_ack; in tcp_do_segment()
2742 tp->snd_cwnd = maxseg; in tcp_do_segment()
2744 KASSERT(tp->snd_limited <= 2, in tcp_do_segment()
2745 ("%s: tp->snd_limited too big", in tcp_do_segment()
2747 tp->snd_cwnd = tp->snd_ssthresh + in tcp_do_segment()
2749 (tp->t_dupacks - tp->snd_limited); in tcp_do_segment()
2750 if (SEQ_GT(onxt, tp->snd_nxt)) in tcp_do_segment()
2751 tp->snd_nxt = onxt; in tcp_do_segment()
2758 * for more. Make sure we can send a in tcp_do_segment()
2760 * ACK by increasing snd_cwnd by one in tcp_do_segment()
2765 uint32_t oldcwnd = tp->snd_cwnd; in tcp_do_segment()
2766 tcp_seq oldsndmax = tp->snd_max; in tcp_do_segment()
2770 KASSERT(tp->t_dupacks == 1 || in tcp_do_segment()
2771 tp->t_dupacks == 2, in tcp_do_segment()
2774 if (tp->t_dupacks == 1) in tcp_do_segment()
2775 tp->snd_limited = 0; in tcp_do_segment()
2776 if ((tp->snd_nxt == tp->snd_max) && in tcp_do_segment()
2777 (tp->t_rxtshift == 0)) in tcp_do_segment()
2778 tp->snd_cwnd = in tcp_do_segment()
2779 SEQ_SUB(tp->snd_nxt, in tcp_do_segment()
2780 tp->snd_una) - in tcp_do_segment()
2782 tp->snd_cwnd += in tcp_do_segment()
2783 (tp->t_dupacks - tp->snd_limited) * in tcp_do_segment()
2784 maxseg - tcp_sack_adjust(tp); in tcp_do_segment()
2788 * or we need to send an ACK. in tcp_do_segment()
2791 avail = sbavail(&so->so_snd); in tcp_do_segment()
2793 if (tp->t_flags & TF_ACKNOW || in tcp_do_segment()
2795 SEQ_SUB(tp->snd_nxt, tp->snd_una))) { in tcp_do_segment()
2798 sent = SEQ_SUB(tp->snd_max, oldsndmax); in tcp_do_segment()
2800 KASSERT((tp->t_dupacks == 2 && in tcp_do_segment()
2801 tp->snd_limited == 0) || in tcp_do_segment()
2803 tp->t_flags & TF_SENTFIN) || in tcp_do_segment()
2805 tp->t_flags & TF_NODELAY), in tcp_do_segment()
2808 tp->snd_limited = 2; in tcp_do_segment()
2810 ++tp->snd_limited; in tcp_do_segment()
2812 tp->snd_cwnd = oldcwnd; in tcp_do_segment()
2819 * This ack is advancing the left edge, reset the in tcp_do_segment()
2822 tp->t_dupacks = 0; in tcp_do_segment()
2824 * If this ack also has new SACK info, increment the in tcp_do_segment()
2833 (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) || in tcp_do_segment()
2834 ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) && in tcp_do_segment()
2835 (tp->snd_nxt == tp->snd_max)) { in tcp_do_segment()
2836 tp->t_dupacks++; in tcp_do_segment()
2838 if (!IN_FASTRECOVERY(tp->t_flags) && in tcp_do_segment()
2839 (tp->sackhint.sacked_bytes > in tcp_do_segment()
2840 ((tcprexmtthresh - 1) * in tcp_do_segment()
2848 KASSERT(SEQ_GT(th->th_ack, tp->snd_una), in tcp_do_segment()
2853 * for the other side's cached packets, retract it. in tcp_do_segment()
2855 if (SEQ_LT(th->th_ack, tp->snd_recover)) { in tcp_do_segment()
2856 if (IN_FASTRECOVERY(tp->t_flags)) { in tcp_do_segment()
2857 if (tp->t_flags & TF_SACK_PERMIT) { in tcp_do_segment()
2862 tp->t_rtttime = 0; in tcp_do_segment()
2865 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
2874 } else if (IN_CONGRECOVERY(tp->t_flags) && in tcp_do_segment()
2876 tp->sackhint.delivered_data = in tcp_do_segment()
2878 tp->snd_fack = th->th_ack; in tcp_do_segment()
2881 * always use PRR-SSRB in tcp_do_segment()
2889 * If we reach this point, ACK is not a duplicate, in tcp_do_segment()
2892 if (tp->t_flags & TF_NEEDSYN) { in tcp_do_segment()
2894 * T/TCP: Connection was half-synchronized, and our in tcp_do_segment()
2895 * SYN has been ACK'd (so connection is now fully in tcp_do_segment()
2896 * synchronized). Go to non-starred state, in tcp_do_segment()
2897 * increment snd_una for ACK of SYN, and check if in tcp_do_segment()
2900 tp->t_flags &= ~TF_NEEDSYN; in tcp_do_segment()
2901 tp->snd_una++; in tcp_do_segment()
2903 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == in tcp_do_segment()
2905 tp->rcv_scale = tp->request_r_scale; in tcp_do_segment()
2914 * Adjust for the SYN bit in sequence space, in tcp_do_segment()
2915 * but don't account for it in cwnd calculations. in tcp_do_segment()
2916 * This is for the SYN_RECEIVED, non-simultaneous in tcp_do_segment()
2921 tp->snd_una++; in tcp_do_segment()
2924 "(tp->snd_una=%u, th->th_ack=%u, tp=%p, m=%p)", __func__, in tcp_do_segment()
2925 tp->snd_una, th->th_ack, tp, m)); in tcp_do_segment()
2930 * If we just performed our first retransmit, and the ACK in tcp_do_segment()
2936 if (tp->t_rxtshift == 1 && in tcp_do_segment()
2937 tp->t_flags & TF_PREVVALID && in tcp_do_segment()
2938 tp->t_badrxtwin != 0 && in tcp_do_segment()
2941 TSTMP_LT(to.to_tsecr, tp->t_badrxtwin)) in tcp_do_segment()
2954 * during the SYN+ACK phase, ignore in tcp_do_segment()
2961 t = tcp_ts_getticks() - to.to_tsecr; in tcp_do_segment()
2962 if (!tp->t_rttlow || tp->t_rttlow > t) in tcp_do_segment()
2963 tp->t_rttlow = t; in tcp_do_segment()
2965 } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) { in tcp_do_segment()
2966 if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime) in tcp_do_segment()
2967 tp->t_rttlow = ticks - tp->t_rtttime; in tcp_do_segment()
2968 tcp_xmit_timer(tp, ticks - tp->t_rtttime); in tcp_do_segment()
2976 * large ACK. in tcp_do_segment()
2978 if ((tp->t_state <= TCPS_CLOSE_WAIT && in tcp_do_segment()
2979 acked == sbavail(&so->so_snd)) || in tcp_do_segment()
2980 acked > sbavail(&so->so_snd)) in tcp_do_segment()
2981 tp->t_acktime = 0; in tcp_do_segment()
2983 tp->t_acktime = ticks; in tcp_do_segment()
2989 * timer, using current (possibly backed-off) value. in tcp_do_segment()
2991 if (th->th_ack == tp->snd_max) { in tcp_do_segment()
2998 * If no data (only SYN) was ACK'd, in tcp_do_segment()
2999 * skip rest of ACK processing. in tcp_do_segment()
3013 if (acked > sbavail(&so->so_snd)) { in tcp_do_segment()
3014 if (tp->snd_wnd >= sbavail(&so->so_snd)) in tcp_do_segment()
3015 tp->snd_wnd -= sbavail(&so->so_snd); in tcp_do_segment()
3017 tp->snd_wnd = 0; in tcp_do_segment()
3018 mfree = sbcut_locked(&so->so_snd, in tcp_do_segment()
3019 (int)sbavail(&so->so_snd)); in tcp_do_segment()
3022 mfree = sbcut_locked(&so->so_snd, acked); in tcp_do_segment()
3023 if (tp->snd_wnd >= (uint32_t) acked) in tcp_do_segment()
3024 tp->snd_wnd -= acked; in tcp_do_segment()
3026 tp->snd_wnd = 0; in tcp_do_segment()
3033 if (!IN_RECOVERY(tp->t_flags) && in tcp_do_segment()
3034 SEQ_GT(tp->snd_una, tp->snd_recover) && in tcp_do_segment()
3035 SEQ_LEQ(th->th_ack, tp->snd_recover)) in tcp_do_segment()
3036 tp->snd_recover = th->th_ack - 1; in tcp_do_segment()
3037 tp->snd_una = th->th_ack; in tcp_do_segment()
3038 if (IN_RECOVERY(tp->t_flags) && in tcp_do_segment()
3039 SEQ_GEQ(th->th_ack, tp->snd_recover)) { in tcp_do_segment()
3042 if (SEQ_GT(tp->snd_una, tp->snd_recover)) { in tcp_do_segment()
3043 tp->snd_recover = tp->snd_una; in tcp_do_segment()
3045 if (SEQ_LT(tp->snd_nxt, tp->snd_una)) in tcp_do_segment()
3046 tp->snd_nxt = tp->snd_una; in tcp_do_segment()
3048 switch (tp->t_state) { in tcp_do_segment()
3051 * for the ESTABLISHED state if our FIN is now acknowledged in tcp_do_segment()
3063 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { in tcp_do_segment()
3076 * In CLOSING STATE in addition to the processing for in tcp_do_segment()
3077 * the ESTABLISHED state if the ACK acknowledges our FIN in tcp_do_segment()
3078 * then enter the TIME-WAIT state, otherwise ignore in tcp_do_segment()
3090 * In LAST_ACK, we may still be waiting for data to drain in tcp_do_segment()
3091 * and/or to be acked, as well as for the ack of our FIN. in tcp_do_segment()
3109 * Don't look at window if no ACK: TAC's send garbage on first SYN. in tcp_do_segment()
3112 (SEQ_LT(tp->snd_wl1, th->th_seq) || in tcp_do_segment()
3113 (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) || in tcp_do_segment()
3114 (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) { in tcp_do_segment()
3117 tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) in tcp_do_segment()
3119 tp->snd_wnd = tiwin; in tcp_do_segment()
3120 tp->snd_wl1 = th->th_seq; in tcp_do_segment()
3121 tp->snd_wl2 = th->th_ack; in tcp_do_segment()
3122 if (tp->snd_wnd > tp->max_sndwnd) in tcp_do_segment()
3123 tp->max_sndwnd = tp->snd_wnd; in tcp_do_segment()
3130 if ((thflags & TH_URG) && th->th_urp && in tcp_do_segment()
3131 TCPS_HAVERCVDFIN(tp->t_state) == 0) { in tcp_do_segment()
3139 if (th->th_urp + sbavail(&so->so_rcv) > sb_max) { in tcp_do_segment()
3140 th->th_urp = 0; /* XXX */ in tcp_do_segment()
3159 if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) { in tcp_do_segment()
3160 tp->rcv_up = th->th_seq + th->th_urp; in tcp_do_segment()
3161 so->so_oobmark = sbavail(&so->so_rcv) + in tcp_do_segment()
3162 (tp->rcv_up - tp->rcv_nxt) - 1; in tcp_do_segment()
3163 if (so->so_oobmark == 0) in tcp_do_segment()
3164 so->so_rcv.sb_state |= SBS_RCVATMARK; in tcp_do_segment()
3166 tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA); in tcp_do_segment()
3172 * but if two URG's are pending at once, some out-of-band in tcp_do_segment()
3175 if (th->th_urp <= (uint32_t)tlen && in tcp_do_segment()
3176 !(so->so_options & SO_OOBINLINE)) { in tcp_do_segment()
3186 if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) in tcp_do_segment()
3187 tp->rcv_up = tp->rcv_nxt; in tcp_do_segment()
3194 * and arranging for acknowledgment of receipt if necessary. in tcp_do_segment()
3195 * This process logically involves adjusting tp->rcv_wnd as data in tcp_do_segment()
3200 tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) && in tcp_do_segment()
3201 (tp->t_flags & TF_FASTOPEN)); in tcp_do_segment()
3203 TCPS_HAVERCVDFIN(tp->t_state) == 0) { in tcp_do_segment()
3204 tcp_seq save_start = th->th_seq; in tcp_do_segment()
3205 tcp_seq save_rnxt = tp->rcv_nxt; in tcp_do_segment()
3216 * Set DELACK for segments received in order, but ack in tcp_do_segment()
3220 if (th->th_seq == tp->rcv_nxt && in tcp_do_segment()
3222 (TCPS_HAVEESTABLISHED(tp->t_state) || in tcp_do_segment()
3225 tp->t_flags |= TF_DELACK; in tcp_do_segment()
3227 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
3228 tp->rcv_nxt += tlen; in tcp_do_segment()
3230 ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) && in tcp_do_segment()
3231 (tp->t_fbyte_in == 0)) { in tcp_do_segment()
3232 tp->t_fbyte_in = ticks; in tcp_do_segment()
3233 if (tp->t_fbyte_in == 0) in tcp_do_segment()
3234 tp->t_fbyte_in = 1; in tcp_do_segment()
3235 if (tp->t_fbyte_out && tp->t_fbyte_in) in tcp_do_segment()
3236 tp->t_flags2 |= TF2_FBYTES_COMPLETE; in tcp_do_segment()
3242 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) in tcp_do_segment()
3245 sbappendstream_locked(&so->so_rcv, m, 0); in tcp_do_segment()
3246 tp->t_flags |= TF_WAKESOR; in tcp_do_segment()
3257 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
3259 if ((tp->t_flags & TF_SACK_PERMIT) && in tcp_do_segment()
3261 TCPS_HAVEESTABLISHED(tp->t_state)) { in tcp_do_segment()
3269 } else if ((tlen > 0) && SEQ_GT(tp->rcv_nxt, save_rnxt)) { in tcp_do_segment()
3270 if ((tp->rcv_numsacks >= 1) && in tcp_do_segment()
3271 (tp->sackblks[0].end == save_start)) { in tcp_do_segment()
3277 tp->sackblks[0].start, in tcp_do_segment()
3278 tp->sackblks[0].end); in tcp_do_segment()
3300 if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) in tcp_do_segment()
3301 len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt); in tcp_do_segment()
3303 len = so->so_rcv.sb_hiwat; in tcp_do_segment()
3314 tcpstates[tp->t_state], tlen); in tcp_do_segment()
3321 tcpstates[tp->t_state], tlen); in tcp_do_segment()
3330 tcpstates[tp->t_state]); in tcp_do_segment()
3340 * If FIN is received ACK the FIN and let the user know in tcp_do_segment()
3344 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { in tcp_do_segment()
3348 * If connection is half-synchronized in tcp_do_segment()
3349 * (ie NEEDSYN flag on) then delay ACK, in tcp_do_segment()
3352 * more input can be expected, send ACK now. in tcp_do_segment()
3354 if (tp->t_flags & TF_NEEDSYN) in tcp_do_segment()
3355 tp->t_flags |= TF_DELACK; in tcp_do_segment()
3357 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
3358 tp->rcv_nxt++; in tcp_do_segment()
3360 switch (tp->t_state) { in tcp_do_segment()
3366 tp->t_starttime = ticks; in tcp_do_segment()
3382 * starting the time-wait timer, turning off the other in tcp_do_segment()
3395 if (needoutput || (tp->t_flags & TF_ACKNOW)) { in tcp_do_segment()
3401 if (tp->t_flags & TF_DELACK) { in tcp_do_segment()
3402 tp->t_flags &= ~TF_DELACK; in tcp_do_segment()
3410 * Generate an ACK dropping incoming segment if it occupies in tcp_do_segment()
3411 * sequence space, where the ACK reflects our state. in tcp_do_segment()
3413 * We can now skip the test for the RST flag since all in tcp_do_segment()
3417 * In the SYN-RECEIVED state, don't send an ACK unless the in tcp_do_segment()
3418 * segment we received passes the SYN-RECEIVED ACK test. in tcp_do_segment()
3420 * "LAND" DoS attack, and also prevents an ACK storm in tcp_do_segment()
3424 if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) && in tcp_do_segment()
3425 (SEQ_GT(tp->snd_una, th->th_ack) || in tcp_do_segment()
3426 SEQ_GT(th->th_ack, tp->snd_max)) ) { in tcp_do_segment()
3432 tp->t_flags |= TF_ACKNOW; in tcp_do_segment()
3458 * Issue RST and make ACK acceptable to originator of segment.
3478 if ((tcp_get_flags(th) & TH_RST) || m->m_flags & (M_BCAST|M_MCAST)) in tcp_dropwithreset()
3481 if (mtod(m, struct ip *)->ip_v == 6) { in tcp_dropwithreset()
3483 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || in tcp_dropwithreset()
3484 IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) in tcp_dropwithreset()
3495 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || in tcp_dropwithreset()
3496 IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || in tcp_dropwithreset()
3497 ip->ip_src.s_addr == htonl(INADDR_BROADCAST) || in tcp_dropwithreset()
3498 in_ifnet_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) in tcp_dropwithreset()
3510 th->th_ack, TH_RST); in tcp_dropwithreset()
3516 tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen, in tcp_dropwithreset()
3532 to->to_flags = 0; in tcp_dooptions()
3533 for (; cnt > 0; cnt -= optlen, cp += optlen) { in tcp_dooptions()
3552 to->to_flags |= TOF_MSS; in tcp_dooptions()
3554 (char *)&to->to_mss, sizeof(to->to_mss)); in tcp_dooptions()
3555 to->to_mss = ntohs(to->to_mss); in tcp_dooptions()
3562 to->to_flags |= TOF_SCALE; in tcp_dooptions()
3563 to->to_wscale = min(cp[2], TCP_MAX_WINSHIFT); in tcp_dooptions()
3568 to->to_flags |= TOF_TS; in tcp_dooptions()
3570 (char *)&to->to_tsval, sizeof(to->to_tsval)); in tcp_dooptions()
3571 to->to_tsval = ntohl(to->to_tsval); in tcp_dooptions()
3573 (char *)&to->to_tsecr, sizeof(to->to_tsecr)); in tcp_dooptions()
3574 to->to_tsecr = ntohl(to->to_tsecr); in tcp_dooptions()
3581 * here for the syncache code to perform the correct in tcp_dooptions()
3586 to->to_flags |= TOF_SIGNATURE; in tcp_dooptions()
3587 to->to_signature = cp + 2; in tcp_dooptions()
3596 to->to_flags |= TOF_SACKPERM; in tcp_dooptions()
3599 if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0) in tcp_dooptions()
3603 to->to_flags |= TOF_SACK; in tcp_dooptions()
3604 to->to_nsacks = (optlen - 2) / TCPOLEN_SACK; in tcp_dooptions()
3605 to->to_sacks = cp + 2; in tcp_dooptions()
3619 to->to_flags |= TOF_FASTOPEN; in tcp_dooptions()
3620 to->to_tfo_len = optlen - 2; in tcp_dooptions()
3621 to->to_tfo_cookie = to->to_tfo_len ? cp + 2 : NULL; in tcp_dooptions()
3632 * It is still reflected in the segment length for
3639 int cnt = off + th->th_urp - 1; in tcp_pulloutofband()
3642 if (m->m_len > cnt) { in tcp_pulloutofband()
3648 tp->t_iobc = *cp; in tcp_pulloutofband()
3649 tp->t_oobflags |= TCPOOB_HAVEDATA; in tcp_pulloutofband()
3650 bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1)); in tcp_pulloutofband()
3651 m->m_len--; in tcp_pulloutofband()
3652 if (m->m_flags & M_PKTHDR) in tcp_pulloutofband()
3653 m->m_pkthdr.len--; in tcp_pulloutofband()
3656 cnt -= m->m_len; in tcp_pulloutofband()
3657 m = m->m_next; in tcp_pulloutofband()
3665 * Collect new round-trip time estimate
3676 if (tp->t_rttupdated < UCHAR_MAX) in tcp_xmit_timer()
3677 tp->t_rttupdated++; in tcp_xmit_timer()
3679 stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, in tcp_xmit_timer()
3682 if ((tp->t_srtt != 0) && (tp->t_rxtshift <= TCP_RTT_INVALIDATE)) { in tcp_xmit_timer()
3690 delta = ((rtt - 1) << TCP_DELTA_SHIFT) in tcp_xmit_timer()
3691 - (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)); in tcp_xmit_timer()
3693 if ((tp->t_srtt += delta) <= 0) in tcp_xmit_timer()
3694 tp->t_srtt = 1; in tcp_xmit_timer()
3704 * rfc793's wired-in beta. in tcp_xmit_timer()
3707 delta = -delta; in tcp_xmit_timer()
3708 delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT); in tcp_xmit_timer()
3709 if ((tp->t_rttvar += delta) <= 0) in tcp_xmit_timer()
3710 tp->t_rttvar = 1; in tcp_xmit_timer()
3713 * No rtt measurement yet - use the unsmoothed rtt. in tcp_xmit_timer()
3717 tp->t_srtt = rtt << TCP_RTT_SHIFT; in tcp_xmit_timer()
3718 tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); in tcp_xmit_timer()
3720 tp->t_rtttime = 0; in tcp_xmit_timer()
3721 tp->t_rxtshift = 0; in tcp_xmit_timer()
3728 * 1 extra tick because of +-1/2 tick uncertainty in the in tcp_xmit_timer()
3734 TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), in tcp_xmit_timer()
3735 max(tp->t_rttmin, rtt + 2), tcp_rexmit_max); in tcp_xmit_timer()
3738 * We received an ack for a packet that wasn't retransmitted; in tcp_xmit_timer()
3741 * for now (a route might have failed after we sent a segment, in tcp_xmit_timer()
3744 tp->t_softerror = 0; in tcp_xmit_timer()
3748 * Determine a reasonable value for maxseg size.
3749 * If the route is known, check route for mtu.
3757 * While looking at the routing entry, we also initialize other path-dependent
3758 * parameters from pre-set or cached values in the routing entry.
3760 * NOTE that resulting t_maxseg doesn't include space for TCP options or
3762 * thus it is calculated for every segment separately in tcp_output().
3765 * segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS
3777 int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; in tcp_mss_update()
3787 if (tp->t_port) in tcp_mss_update()
3789 if (mtuoffer != -1) { in tcp_mss_update()
3790 KASSERT(offer == -1, ("%s: conflict", __func__)); in tcp_mss_update()
3791 offer = mtuoffer - min_protoh; in tcp_mss_update()
3797 maxmtu = tcp_maxmtu6(&inp->inp_inc, cap); in tcp_mss_update()
3798 tp->t_maxseg = V_tcp_v6mssdflt; in tcp_mss_update()
3806 maxmtu = tcp_maxmtu(&inp->inp_inc, cap); in tcp_mss_update()
3807 tp->t_maxseg = V_tcp_mssdflt; in tcp_mss_update()
3817 * to a defined state as tcp_hc_get() would do for us in tcp_mss_update()
3833 offer = tp->t_maxseg; in tcp_mss_update()
3836 case -1: in tcp_mss_update()
3838 * Offer == -1 means that we didn't receive SYN yet. in tcp_mss_update()
3852 tcp_hc_get(&inp->inp_inc, metricptr); in tcp_mss_update()
3858 if (metricptr->hc_mtu) in tcp_mss_update()
3859 mss = min(metricptr->hc_mtu, maxmtu) - min_protoh; in tcp_mss_update()
3863 mss = maxmtu - min_protoh; in tcp_mss_update()
3865 !in6_localaddr(&inp->in6p_faddr)) in tcp_mss_update()
3874 mss = maxmtu - min_protoh; in tcp_mss_update()
3876 !in_localaddr(inp->inp_faddr)) in tcp_mss_update()
3881 * XXX - The above conditional (mss = maxmtu - min_protoh) in tcp_mss_update()
3888 * on the Internet today. For the moment, we'll sweep in tcp_mss_update()
3896 * recomputed. For Further Study. in tcp_mss_update()
3907 * XXXGL: shouldn't we reserve space for IP/IPv6 options? in tcp_mss_update()
3911 tp->t_maxseg = mss; in tcp_mss_update()
3912 if (tp->t_maxseg < V_tcp_mssdflt) { in tcp_mss_update()
3918 tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; in tcp_mss_update()
3920 tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; in tcp_mss_update()
3938 tcp_mss_update(tp, offer, -1, &metrics, &cap); in tcp_mss()
3940 mss = tp->t_maxseg; in tcp_mss()
3949 so = inp->inp_socket; in tcp_mss()
3951 if ((so->so_snd.sb_hiwat == V_tcp_sendspace) && metrics.hc_sendpipe) in tcp_mss()
3954 bufsize = so->so_snd.sb_hiwat; in tcp_mss()
3961 if (bufsize > so->so_snd.sb_hiwat) in tcp_mss()
3971 * XXXGL: shouldn't we reserve space for IP/IPv6 options? in tcp_mss()
3973 tp->t_maxseg = max(mss, 64); in tcp_mss()
3974 if (tp->t_maxseg < V_tcp_mssdflt) { in tcp_mss()
3980 tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; in tcp_mss()
3982 tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; in tcp_mss()
3986 if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.hc_recvpipe) in tcp_mss()
3989 bufsize = so->so_rcv.sb_hiwat; in tcp_mss()
3994 if (bufsize > so->so_rcv.sb_hiwat) in tcp_mss()
3999 /* Check the interface for TSO capabilities. */ in tcp_mss()
4001 tp->t_flags |= TF_TSO; in tcp_mss()
4002 tp->t_tsomax = cap.tsomax; in tcp_mss()
4003 tp->t_tsomaxsegcount = cap.tsomaxsegcount; in tcp_mss()
4004 tp->t_tsomaxsegsize = cap.tsomaxsegsize; in tcp_mss()
4006 tp->t_flags2 |= TF2_IPSEC_TSO; in tcp_mss()
4024 if (inc->inc_flags & INC_ISIPV6) { in tcp_mssopt()
4045 mss = min(maxmtu, thcmtu) - min_protoh; in tcp_mssopt()
4047 mss = max(maxmtu, thcmtu) - min_protoh; in tcp_mssopt()
4066 * Compute the amount of data that this ACK is indicating in tcp_do_prr_ack()
4071 (IN_CONGRECOVERY(tp->t_flags) && in tcp_do_prr_ack()
4072 !IN_FASTRECOVERY(tp->t_flags))) { in tcp_do_prr_ack()
4073 del_data = tp->sackhint.delivered_data; in tcp_do_prr_ack()
4076 if (tp->sackhint.prr_delivered < (tcprexmtthresh * maxseg + in tcp_do_prr_ack()
4077 tp->snd_recover - tp->snd_una)) { in tcp_do_prr_ack()
4080 pipe = imax(0, tp->snd_max - tp->snd_una - in tcp_do_prr_ack()
4081 imin(INT_MAX / 65536, tp->t_dupacks) * maxseg); in tcp_do_prr_ack()
4083 tp->sackhint.prr_delivered += del_data; in tcp_do_prr_ack()
4087 if (pipe >= tp->snd_ssthresh) { in tcp_do_prr_ack()
4088 if (tp->sackhint.recover_fs == 0) in tcp_do_prr_ack()
4089 tp->sackhint.recover_fs = in tcp_do_prr_ack()
4090 imax(1, tp->snd_nxt - tp->snd_una); in tcp_do_prr_ack()
4091 snd_cnt = howmany((long)tp->sackhint.prr_delivered * in tcp_do_prr_ack()
4092 tp->snd_ssthresh, tp->sackhint.recover_fs) - in tcp_do_prr_ack()
4093 tp->sackhint.prr_out + maxseg - 1; in tcp_do_prr_ack()
4097 * - A partial ack without SACK block beneath snd_recover in tcp_do_prr_ack()
4099 * - An SACK scoreboard update adding a new hole indicates in tcp_do_prr_ack()
4102 * - Prevent ACK splitting attacks, by being conservative in tcp_do_prr_ack()
4106 limit = tp->sackhint.prr_delivered - in tcp_do_prr_ack()
4107 tp->sackhint.prr_out; in tcp_do_prr_ack()
4109 limit = imax(tp->sackhint.prr_delivered - in tcp_do_prr_ack()
4110 tp->sackhint.prr_out, del_data) + in tcp_do_prr_ack()
4113 snd_cnt = imin((tp->snd_ssthresh - pipe), limit); in tcp_do_prr_ack()
4117 * Send snd_cnt new data into the network in response to this ack. in tcp_do_prr_ack()
4121 if (IN_FASTRECOVERY(tp->t_flags)) { in tcp_do_prr_ack()
4123 tp->snd_cwnd = pipe - del_data + (snd_cnt * maxseg); in tcp_do_prr_ack()
4125 tp->snd_cwnd = (tp->snd_max - tp->snd_una) + in tcp_do_prr_ack()
4128 } else if (IN_CONGRECOVERY(tp->t_flags)) { in tcp_do_prr_ack()
4129 tp->snd_cwnd = pipe - del_data + (snd_cnt * maxseg); in tcp_do_prr_ack()
4131 tp->snd_cwnd = imax(maxseg, tp->snd_cwnd); in tcp_do_prr_ack()
4135 * On a partial ack arrives, force the retransmission of the
4136 * next unacknowledged segment. Do not clear tp->t_dupacks.
4143 tcp_seq onxt = tp->snd_nxt; in tcp_newreno_partial_ack()
4144 uint32_t ocwnd = tp->snd_cwnd; in tcp_newreno_partial_ack()
4150 tp->t_rtttime = 0; in tcp_newreno_partial_ack()
4151 if (IN_FASTRECOVERY(tp->t_flags)) { in tcp_newreno_partial_ack()
4152 tp->snd_nxt = th->th_ack; in tcp_newreno_partial_ack()
4155 * (tp->snd_una has not yet been updated when this function is called.) in tcp_newreno_partial_ack()
4157 tp->snd_cwnd = maxseg + BYTES_THIS_ACK(tp, th); in tcp_newreno_partial_ack()
4158 tp->t_flags |= TF_ACKNOW; in tcp_newreno_partial_ack()
4160 tp->snd_cwnd = ocwnd; in tcp_newreno_partial_ack()
4161 if (SEQ_GT(onxt, tp->snd_nxt)) in tcp_newreno_partial_ack()
4162 tp->snd_nxt = onxt; in tcp_newreno_partial_ack()
4165 * Partial window deflation. Relies on fact that tp->snd_una in tcp_newreno_partial_ack()
4168 if (tp->snd_cwnd > BYTES_THIS_ACK(tp, th)) in tcp_newreno_partial_ack()
4169 tp->snd_cwnd -= BYTES_THIS_ACK(tp, th); in tcp_newreno_partial_ack()
4171 tp->snd_cwnd = 0; in tcp_newreno_partial_ack()
4172 tp->snd_cwnd += maxseg; in tcp_newreno_partial_ack()
4180 if (tp->t_fb->tfb_compute_pipe != NULL) { in tcp_compute_pipe()
4181 pipe = (*tp->t_fb->tfb_compute_pipe)(tp); in tcp_compute_pipe()
4183 pipe = tp->snd_max - tp->snd_una + in tcp_compute_pipe()
4184 tp->sackhint.sack_bytes_rexmit - in tcp_compute_pipe()
4185 tp->sackhint.sacked_bytes - in tcp_compute_pipe()
4186 tp->sackhint.lost_bytes; in tcp_compute_pipe()
4188 pipe = tp->snd_nxt - tp->snd_fack + tp->sackhint.sack_bytes_rexmit; in tcp_compute_pipe()
4202 * Support for user specified value for initial flight size. in tcp_compute_initwnd()