Lines Matching +full:gain +full:- +full:scaling +full:- +full:p

1 /*-
2 * Copyright (c) 2016-2020 Netflix, Inc.
162 * - Matt Mathis's Rate Halving which slowly drops
165 * - Yuchung Cheng's RACK TCP (for which its named) that
168 * - Reorder Detection of RFC4737 and the Tail-Loss probe draft
186 * TCP output is also over-written with a new version since it
191 static int32_t rack_tlp_limit = 2; /* No more than 2 TLPs w-out new data */
194 static int32_t rack_reorder_fade = 60000000; /* 0 - never fade, def 60,000,000
195 * - 60 seconds */
199 static uint8_t rack_ssthresh_rest_rto_rec = 0; /* Do we restore ssthresh when we have rec -> rto -> rec */
201 static uint32_t rack_gp_gain_req = 1200; /* Amount percent wise required to gain to record a round has "gaining" */
217 static int32_t rack_hw_rate_cap_per = 0; /* 0 -- off */
253 static int32_t rack_non_rxt_use_cr = 0; /* does a non-rxt in recovery use the configured rate (ss/ca)? */
258 static int32_t rack_sack_not_required = 1; /* set to one to allow non-sack to use rack */
265 static int32_t rack_hw_check_queue = 0; /* Do we always pre-check queue depth of a hw queue */
295 static uint16_t rack_per_of_gp_ss = 250; /* 250 % slow-start */
296 static uint16_t rack_per_of_gp_ca = 200; /* 200 % congestion-avoidance */
311 static uint32_t rack_probe_rtt_safety_val = 2000000; /* No more than 2 sec in probe-rtt */
313 static uint32_t rack_probertt_gpsrtt_cnt_mul = 0; /* How many srtt periods does probe-rtt last top fraction */
314 static uint32_t rack_probertt_gpsrtt_cnt_div = 0; /* How many srtt periods does probe-rtt last bottom fraction */
333 * the way fill-cw interacts with timely and caps how much
334 * timely can boost the fill-cw b/w.
338 * gain (returned by rack_get_output_gain(). Remember too that
339 * the gain returned can be overriden by other factors such as
340 * probeRTT as well as fixed-rate-pacing.
434 #define RACK_REXMTVAL(tp) max(rack_rto_min, ((tp)->t_srtt + ((tp)->t_rttvar << 2)))
605 tim = rack->r_ctl.lt_bw_time;
606 bytes = rack->r_ctl.lt_bw_bytes;
607 if (rack->lt_bw_up) {
610 bytes += (rack->rc_tp->snd_una - rack->r_ctl.lt_seq);
611 tim += (tcp_tv_to_lusectick(&tv) - rack->r_ctl.lt_timemark);
628 tp = rack->rc_tp;
629 if (tp->t_cc == NULL) {
633 rack->rc_pacing_cc_set = 1;
634 if (strcmp(tp->t_cc->name, CCALGONAME_NEWRENO) != 0) {
635 /* Not new-reno we can't play games with beta! */
640 if (CC_ALGO(tp)->ctl_output == NULL) {
641 /* Huh, not using new-reno so no swaps.? */
650 error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, &sopt, &opt);
657 error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, &sopt, &opt);
667 opt.val = rack->r_ctl.rc_saved_beta.beta;
668 error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, &sopt, &opt);
674 opt.val = rack->r_ctl.rc_saved_beta.beta_ecn;
675 error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, &sopt, &opt);
681 memcpy(&rack->r_ctl.rc_saved_beta, &old, sizeof(struct newreno));
683 if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
688 ptr = ((struct newreno *)tp->t_ccv.cc_data);
691 log.u_bbr.flex1 = ptr->beta;
692 log.u_bbr.flex2 = ptr->beta_ecn;
693 log.u_bbr.flex3 = ptr->newreno_flags;
694 log.u_bbr.flex4 = rack->r_ctl.rc_saved_beta.beta;
695 log.u_bbr.flex5 = rack->r_ctl.rc_saved_beta.beta_ecn;
697 log.u_bbr.flex7 = rack->gp_ready;
699 log.u_bbr.flex7 |= rack->use_fixed_rate;
701 log.u_bbr.flex7 |= rack->rc_pacing_cc_set;
702 log.u_bbr.pkts_out = rack->r_ctl.rc_prr_sndcnt;
712 if (rack->rc_pacing_cc_set)
718 rack->rc_pacing_cc_set = 1;
725 if (rack->rc_pacing_cc_set == 0)
731 rack->rc_pacing_cc_set = 0;
738 if (rack->rc_pacing_cc_set)
740 if (rack->r_ctl.pacing_method & RACK_REG_PACING)
742 if (rack->r_ctl.pacing_method & RACK_DGP_PACING)
744 rack->rc_always_pace = 0;
745 rack->r_ctl.pacing_method = RACK_PACING_NONE;
746 rack->dgp_on = 0;
747 rack->rc_hybrid_mode = 0;
748 rack->use_fixed_rate = 0;
755 if (tcp_bblogging_on(rack->rc_tp) && (rack_verbose_logging != 0)) {
761 log.u_bbr.flex2 = rack->rc_tp->gput_seq;
763 log.u_bbr.flex4 = rack->rc_tp->gput_ts;
765 log.u_bbr.flex6 = rack->rc_tp->gput_ack;
768 log.u_bbr.rttProp = rack->r_ctl.rc_gp_cumack_ts;
769 log.u_bbr.delRate = rack->r_ctl.rc_gp_output_ts;
771 log.u_bbr.cwnd_gain = rack->app_limited_needs_set;
772 log.u_bbr.pkt_epoch = rack->r_ctl.rc_app_limited_cnt;
773 log.u_bbr.epoch = rack->r_ctl.current_round;
774 log.u_bbr.lt_epoch = rack->r_ctl.rc_considered_lost;
776 log.u_bbr.applimited = rsm->r_start;
777 log.u_bbr.delivered = rsm->r_end;
778 log.u_bbr.epoch = rsm->r_flags;
781 TCP_LOG_EVENTP(rack->rc_tp, NULL,
782 &rack->rc_inp->inp_socket->so_rcv,
783 &rack->rc_inp->inp_socket->so_snd,
796 if (error || req->newptr == NULL)
919 "What percentage above goodput do we clamp CA/SS to at exit on high-BDP path 110%");
924 "What percentage above goodput do we clamp CA/SS to at exit on a non high-BDP path 100%");
969 "Should we use the min-rtt to calculate the goal rtt (else gp_srtt) at entry");
974 "How to set cwnd at exit, 0 - dynamic, 1 - use min-rtt, 2 - use curgprtt, 3 - entry gp-rtt");
999 "If the rtt goes lower within this percentage of the time, go into probe-rtt");
1009 "Do we clear I/S counts on exiting probe-rtt");
1019 "We are highly buffered if min_rtt_seen / max_rtt_seen > this-threshold");
1198 "If we fall below this rate, dis-engage hw pacing?");
1260 "Profile 5 upper bound to timely gain");
1354 "What method do we do for TLP time calc 0=no-de-ack-comp, 1=ID, 2=2.1, 3=2.2");
1374 "Should we always send the oldest TLP and RACK-TLP");
1412 "When doing recovery -> rto -> recovery do we reset SSthresh?");
1447 "Minimum RTO in microseconds -- set with caution below 1000 due to TLP");
1452 "Maximum RTO in microseconds -- should be at least as large as min_rto");
1474 "Does a cwnd just-return end the measurement window (app limited)");
1479 "Does an rwnd just-return end the measurement window (app limited -- not persists)");
1536 "Should RACK use mbuf queuing for non-paced connections");
1580 "When a persist or keep-alive probe is not answered do we calculate rtt on subsequent answers?");
1664 "Highest move to non-move ratio seen");
1805 "Total number of times a sends returned enobuf for non-hdwr paced connections");
2002 return (tcp_compute_initwnd(tcp_maxseg(rack->rc_tp)));
2009 if (IN_FASTRECOVERY(rack->rc_tp->t_flags))
2010 return (rack->r_ctl.rc_fixed_pacing_rate_rec);
2011 else if (rack->r_ctl.cwnd_to_use < rack->rc_tp->snd_ssthresh)
2012 return (rack->r_ctl.rc_fixed_pacing_rate_ss);
2014 return (rack->r_ctl.rc_fixed_pacing_rate_ca);
2036 do_log = tcp_bblogging_on(rack->rc_tp);
2044 do_log = tcp_bblogging_on(rack->rc_tp);
2046 do_log = tcp_bblogging_point_on(rack->rc_tp, TCP_BBPOINT_REQ_LEVEL_LOGGING);
2071 cur = rack->r_ctl.rc_last_sft;
2073 if (rack->r_ctl.rack_rs.rs_flags != RACK_RTT_EMPTY)
2074 log.u_bbr.inflight = rack->r_ctl.rack_rs.rs_us_rtt;
2076 /* Use the last known rtt i.e. the rack-rtt */
2077 log.u_bbr.inflight = rack->rc_rack_rtt;
2082 log.u_bbr.cur_del_rate = cur->deadline;
2085 log.u_bbr.pkt_epoch = (uint32_t)(cur->start & 0x00000000ffffffff);
2086 log.u_bbr.lost = (uint32_t)((cur->start >> 32) & 0x00000000ffffffff);
2087 log.u_bbr.flex6 = cur->start_seq;
2088 log.u_bbr.pkts_out = cur->end_seq;
2091 log.u_bbr.pkt_epoch = (uint32_t)(cur->start & 0x00000000ffffffff);
2092 log.u_bbr.lost = (uint32_t)((cur->start >> 32) & 0x00000000ffffffff);
2094 log.u_bbr.flex6 = (uint32_t)(cur->end & 0x00000000ffffffff);
2095 log.u_bbr.pkts_out = (uint32_t)((cur->end >> 32) & 0x00000000ffffffff);
2098 log.u_bbr.epoch = (uint32_t)(cur->first_send & 0x00000000ffffffff);
2099 log.u_bbr.lt_epoch = (uint32_t)((cur->first_send >> 32) & 0x00000000ffffffff);
2101 log.u_bbr.applimited = (uint32_t)(cur->localtime & 0x00000000ffffffff);
2102 log.u_bbr.delivered = (uint32_t)((cur->localtime >> 32) & 0x00000000ffffffff);
2104 off = (uint64_t)(cur) - (uint64_t)(&rack->rc_tp->t_tcpreq_info[0]);
2108 log.u_bbr.flex4 = (uint32_t)(rack->rc_tp->t_sndbytes - cur->sent_at_fs);
2109 log.u_bbr.flex5 = (uint32_t)(rack->rc_tp->t_snd_rxt_bytes - cur->rxt_at_fs);
2110 log.u_bbr.flex7 = (uint16_t)cur->hybrid_flags;
2122 log.u_bbr.bbr_state = rack->rc_always_pace;
2124 log.u_bbr.bbr_state |= rack->dgp_on;
2126 log.u_bbr.bbr_state |= rack->rc_hybrid_mode;
2128 log.u_bbr.bbr_state |= rack->use_fixed_rate;
2130 tcp_log_event(rack->rc_tp, NULL,
2131 &rack->rc_inp->inp_socket->so_rcv,
2132 &rack->rc_inp->inp_socket->so_snd,
2144 if (tcp_bblogging_point_on(rack->rc_tp, TCP_BBPOINT_REQ_LEVEL_LOGGING)) {
2153 log.u_bbr.delRate = cur->sent_at_fs;
2155 if ((cur->flags & TCP_TRK_TRACK_FLG_LSND) == 0) {
2161 log.u_bbr.cur_del_rate = rack->rc_tp->t_sndbytes;
2162 log.u_bbr.rttProp = rack->rc_tp->t_snd_rxt_bytes;
2168 log.u_bbr.cur_del_rate = cur->sent_at_ls;
2169 log.u_bbr.rttProp = cur->rxt_at_ls;
2171 log.u_bbr.bw_inuse = cur->rxt_at_fs;
2173 off = (uint64_t)(cur) - (uint64_t)(&rack->rc_tp->t_tcpreq_info[0]);
2176 log.u_bbr.flex2 = (uint32_t)(cur->start & 0x00000000ffffffff);
2177 log.u_bbr.flex1 = (uint32_t)((cur->start >> 32) & 0x00000000ffffffff);
2179 log.u_bbr.flex4 = (uint32_t)(cur->end & 0x00000000ffffffff);
2180 log.u_bbr.flex3 = (uint32_t)((cur->end >> 32) & 0x00000000ffffffff);
2183 log.u_bbr.applimited = (uint32_t)(cur->localtime & 0x00000000ffffffff);
2184 log.u_bbr.delivered = (uint32_t)((cur->localtime >> 32) & 0x00000000ffffffff);
2186 log.u_bbr.epoch = (uint32_t)(cur->timestamp & 0x00000000ffffffff);
2187 log.u_bbr.lt_epoch = (uint32_t)((cur->timestamp >> 32) & 0x00000000ffffffff);
2189 log.u_bbr.pkts_out = cur->hybrid_flags;
2190 log.u_bbr.lost = cur->playout_ms;
2191 log.u_bbr.flex6 = cur->flags;
2194 * where a false retransmit occurred so first_send <-> lastsend may
2197 log.u_bbr.pkt_epoch = (uint32_t)(rack->r_ctl.last_tmit_time_acked & 0x00000000ffffffff);
2198 log.u_bbr.flex5 = (uint32_t)((rack->r_ctl.last_tmit_time_acked >> 32) & 0x00000000ffffffff);
2206 log.u_bbr.bbr_state = rack->rc_always_pace;
2208 log.u_bbr.bbr_state |= rack->dgp_on;
2210 log.u_bbr.bbr_state |= rack->rc_hybrid_mode;
2212 log.u_bbr.bbr_state |= rack->use_fixed_rate;
2215 tcp_log_event(rack->rc_tp, NULL,
2216 &rack->rc_inp->inp_socket->so_rcv,
2217 &rack->rc_inp->inp_socket->so_snd,
2230 ether = rack->rc_tp->t_maxseg + sizeof(struct tcphdr);
2231 if (rack->r_is_v6){
2242 u_segsiz = (uint64_t)min(ctf_fixed_maxseg(rack->rc_tp), rack->r_ctl.rc_pace_min_segs);
2257 if (rack->r_ctl.bw_rate_cap == 0)
2260 if (rack->rc_catch_up && rack->rc_hybrid_mode &&
2261 (rack->r_ctl.rc_last_sft != NULL)) {
2269 ent = rack->r_ctl.rc_last_sft;
2272 if (timenow >= ent->deadline) {
2274 rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
2276 rack->r_ctl.bw_rate_cap = 0;
2280 timeleft = rack->r_ctl.rc_last_sft->deadline - timenow;
2283 rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
2285 rack->r_ctl.bw_rate_cap = 0;
2294 if (ent->flags & TCP_TRK_TRACK_FLG_COMP) {
2295 if (SEQ_GT(ent->end_seq, rack->rc_tp->snd_una))
2296 lenleft = ent->end_seq - rack->rc_tp->snd_una;
2299 rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
2301 rack->r_ctl.bw_rate_cap = 0;
2310 if (SEQ_GT(rack->rc_tp->snd_una, ent->start_seq))
2311 lengone = rack->rc_tp->snd_una - ent->start_seq;
2314 if (lengone < (ent->end - ent->start))
2315 lenleft = (ent->end - ent->start) - lengone;
2318 rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
2320 rack->r_ctl.bw_rate_cap = 0;
2326 rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
2328 if (rack->r_ctl.bw_rate_cap)
2338 rack->r_ctl.bw_rate_cap = calcbw;
2339 if ((rack->r_ctl.rc_last_sft->hybrid_flags & TCP_HYBRID_PACING_S_MSS) &&
2341 ((rack->r_ctl.rc_last_sft->hybrid_flags & TCP_HYBRID_PACING_SETMSS) == 0)) {
2342 /* Lets set in a smaller mss possibly here to match our rate-cap */
2345 orig_max = rack->r_ctl.rc_pace_max_segs;
2346 rack->r_ctl.rc_last_sft->hybrid_flags |= TCP_HYBRID_PACING_SETMSS;
2347 rack->r_ctl.rc_pace_max_segs = rack_get_pacing_len(rack, calcbw, ctf_fixed_maxseg(rack->rc_tp));
2348 rack_log_type_pacing_sizes(rack->rc_tp, rack, rack->r_ctl.client_suggested_maxseg, orig_max, __LINE__, 5);
2350 rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
2353 rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
2354 *bw, ent->deadline, lenleft, HYBRID_LOG_RATE_CAP, 0, ent, __LINE__);
2362 if ((rack->r_ctl.bw_rate_cap > 0) && (*bw > rack->r_ctl.bw_rate_cap)) {
2364 if (rack->rc_hybrid_mode &&
2365 rack->rc_catch_up &&
2366 (rack->r_ctl.rc_last_sft != NULL) &&
2367 (rack->r_ctl.rc_last_sft->hybrid_flags & TCP_HYBRID_PACING_S_MSS) &&
2369 ((rack->r_ctl.rc_last_sft->hybrid_flags & TCP_HYBRID_PACING_SETMSS) == 0)) {
2370 /* Lets set in a smaller mss possibly here to match our rate-cap */
2373 orig_max = rack->r_ctl.rc_pace_max_segs;
2374 rack->r_ctl.rc_last_sft->hybrid_flags |= TCP_HYBRID_PACING_SETMSS;
2375 rack->r_ctl.rc_pace_max_segs = rack_get_pacing_len(rack, rack->r_ctl.bw_rate_cap, ctf_fixed_maxseg(rack->rc_tp));
2376 rack_log_type_pacing_sizes(rack->rc_tp, rack, rack->r_ctl.client_suggested_maxseg, orig_max, __LINE__, 5);
2380 *bw = rack->r_ctl.bw_rate_cap;
2381 rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
2392 if (rack->rc_gp_filled == 0) {
2406 if (rack->dis_lt_bw == 1)
2412 * No goodput bw but a long-term b/w does exist
2418 if (rack->r_ctl.init_rate)
2419 return (rack->r_ctl.init_rate);
2422 if (rack->rc_tp->t_srtt == 0) {
2430 bw = tcp_compute_initwnd(tcp_maxseg(rack->rc_tp));
2431 srtt = (uint64_t)rack->rc_tp->t_srtt;
2438 if (rack->r_ctl.num_measurements >= RACK_REQ_AVG) {
2440 bw = rack->r_ctl.gp_bw;
2443 bw = rack->r_ctl.gp_bw / max(rack->r_ctl.num_measurements, 1);
2445 if (rack->dis_lt_bw) {
2446 /* We are not using lt-bw */
2453 lt_bw = rack->r_ctl.gp_bw;
2455 if (rack->use_lesser_lt_bw) {
2487 if (rack->use_fixed_rate) {
2498 if (rack->use_fixed_rate) {
2500 } else if (rack->in_probe_rtt && (rsm == NULL))
2501 return (rack->r_ctl.rack_per_of_gp_probertt);
2502 else if ((IN_FASTRECOVERY(rack->rc_tp->t_flags) &&
2503 rack->r_ctl.rack_per_of_gp_rec)) {
2506 return (rack->r_ctl.rack_per_of_gp_rec);
2507 } else if (rack->rack_rec_nonrxt_use_cr) {
2510 } else if (rack->rack_no_prr &&
2511 (rack->r_ctl.rack_per_of_gp_rec > 100)) {
2516 * Here we may have a non-retransmit but we
2520 return (rack->r_ctl.rack_per_of_gp_rec);
2525 if (rack->r_ctl.cwnd_to_use < rack->rc_tp->snd_ssthresh)
2526 return (rack->r_ctl.rack_per_of_gp_ss);
2528 return (rack->r_ctl.rack_per_of_gp_ca);
2536 * 1 = dsack_persists reduced by 1 via T-O or fast recovery exit.
2543 if (tcp_bblogging_on(rack->rc_tp)) {
2548 log.u_bbr.flex1 = rack->rc_rack_tmr_std_based;
2550 log.u_bbr.flex1 |= rack->rc_rack_use_dsack;
2552 log.u_bbr.flex1 |= rack->rc_dsack_round_seen;
2553 log.u_bbr.flex2 = rack->r_ctl.dsack_round_end;
2554 log.u_bbr.flex3 = rack->r_ctl.num_dsack;
2558 log.u_bbr.flex7 = rack->r_ctl.dsack_persist;
2561 log.u_bbr.epoch = rack->r_ctl.current_round;
2562 log.u_bbr.lt_epoch = rack->r_ctl.rc_considered_lost;
2563 TCP_LOG_EVENTP(rack->rc_tp, NULL,
2564 &rack->rc_inp->inp_socket->so_rcv,
2565 &rack->rc_inp->inp_socket->so_snd,
2576 if (tcp_bblogging_on(rack->rc_tp)) {
2585 if (rack->r_ctl.crte) {
2586 ifp = rack->r_ctl.crte->ptbl->rs_ifp;
2587 } else if (rack->rc_inp->inp_route.ro_nh &&
2588 rack->rc_inp->inp_route.ro_nh->nh_ifp) {
2589 ifp = rack->rc_inp->inp_route.ro_nh->nh_ifp;
2602 log.u_bbr.applimited = rack->r_ctl.rc_pace_max_segs;
2603 log.u_bbr.flex8 = rack->use_fixed_rate;
2605 log.u_bbr.flex8 |= rack->rack_hdrw_pacing;
2606 log.u_bbr.pkts_out = rack->rc_tp->t_maxseg;
2607 log.u_bbr.delRate = rack->r_ctl.crte_prev_rate;
2608 if (rack->r_ctl.crte)
2609 log.u_bbr.cur_del_rate = rack->r_ctl.crte->rate;
2612 log.u_bbr.rttProp = rack->r_ctl.last_hw_bw_req;
2613 log.u_bbr.epoch = rack->r_ctl.current_round;
2614 log.u_bbr.lt_epoch = rack->r_ctl.rc_considered_lost;
2615 TCP_LOG_EVENTP(rack->rc_tp, NULL,
2616 &rack->rc_inp->inp_socket->so_rcv,
2617 &rack->rc_inp->inp_socket->so_snd,
2630 uint64_t gain;
2632 gain = (uint64_t)rack_get_output_gain(rack, rsm);
2633 bw_est = bw * gain;
2638 if (rack->r_rack_hw_rate_caps) {
2640 if (rack->r_ctl.crte != NULL) {
2642 high_rate = tcp_hw_highest_rate(rack->r_ctl.crte);
2647 rack->r_rack_hw_rate_caps = 0;
2657 } else if ((rack->rack_hdrw_pacing == 0) &&
2658 (rack->rack_hdw_pace_ena) &&
2659 (rack->rack_attempt_hdwr_pace == 0) &&
2660 (rack->rc_inp->inp_route.ro_nh != NULL) &&
2661 (rack->rc_inp->inp_route.ro_nh->nh_ifp != NULL)) {
2669 high_rate = tcp_hw_highest_rate_ifp(rack->rc_inp->inp_route.ro_nh->nh_ifp, rack->rc_inp);
2687 if (tcp_bblogging_on(rack->rc_tp)) {
2694 * 1 - We are retransmitting and this tells the reason.
2695 * 2 - We are clearing a dup-ack count.
2696 * 3 - We are incrementing a dup-ack count.
2706 log.u_bbr.flex3 = rsm->r_flags;
2707 log.u_bbr.flex4 = rsm->r_dupack;
2708 log.u_bbr.flex5 = rsm->r_start;
2709 log.u_bbr.flex6 = rsm->r_end;
2711 log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
2713 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
2714 log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
2715 log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
2716 log.u_bbr.pacing_gain = rack->r_must_retran;
2717 log.u_bbr.epoch = rack->r_ctl.current_round;
2718 log.u_bbr.lt_epoch = rack->r_ctl.rc_considered_lost;
2719 TCP_LOG_EVENTP(rack->rc_tp, NULL,
2720 &rack->rc_inp->inp_socket->so_rcv,
2721 &rack->rc_inp->inp_socket->so_snd,
2730 if (tcp_bblogging_on(rack->rc_tp)) {
2735 log.u_bbr.flex1 = rack->rc_tp->t_srtt;
2737 log.u_bbr.flex3 = rack->r_ctl.rc_hpts_flags;
2739 log.u_bbr.flex5 = rack->rc_tp->t_hpts_slot;
2740 log.u_bbr.flex6 = rack->rc_tp->t_rxtcur;
2741 log.u_bbr.flex7 = rack->rc_in_persist;
2743 if (rack->rack_no_prr)
2746 log.u_bbr.pkts_out = rack->r_ctl.rc_prr_sndcnt;
2747 log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
2749 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
2750 log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
2751 log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
2752 log.u_bbr.pacing_gain = rack->r_must_retran;
2753 log.u_bbr.cwnd_gain = rack->rack_deferred_inited;
2754 log.u_bbr.pkt_epoch = rack->rc_has_collapsed;
2755 log.u_bbr.lt_epoch = rack->rc_tp->t_rxtshift;
2757 log.u_bbr.epoch = rack->r_ctl.roundends;
2758 log.u_bbr.bw_inuse = rack->r_ctl.current_round;
2760 log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
2761 log.u_bbr.applimited = rack->rc_tp->t_flags2;
2762 TCP_LOG_EVENTP(rack->rc_tp, NULL,
2763 &rack->rc_inp->inp_socket->so_rcv,
2764 &rack->rc_inp->inp_socket->so_snd,
2773 if (tcp_bblogging_on(rack->rc_tp)) {
2778 log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
2780 log.u_bbr.flex1 = rack->r_ctl.rc_rack_min_rtt;
2781 log.u_bbr.flex2 = rack->rc_rack_rtt;
2785 log.u_bbr.flex3 = rsm->r_end - rsm->r_start;
2786 if (rack->rack_no_prr)
2789 log.u_bbr.flex5 = rack->r_ctl.rc_prr_sndcnt;
2791 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
2792 log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
2793 log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
2794 log.u_bbr.pacing_gain = rack->r_must_retran;
2795 log.u_bbr.bw_inuse = rack->r_ctl.current_round;
2797 log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
2798 TCP_LOG_EVENTP(rack->rc_tp, NULL,
2799 &rack->rc_inp->inp_socket->so_rcv,
2800 &rack->rc_inp->inp_socket->so_snd,
2813 if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
2819 log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
2825 log.u_bbr.flex1 = prev->r_start;
2826 log.u_bbr.flex2 = prev->r_end;
2830 log.u_bbr.flex3 = rsm->r_start;
2831 log.u_bbr.flex4 = rsm->r_end;
2835 log.u_bbr.flex5 = next->r_start;
2836 log.u_bbr.flex6 = next->r_end;
2842 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
2843 if (rack->rack_no_prr)
2846 log.u_bbr.lost = rack->r_ctl.rc_prr_sndcnt;
2847 log.u_bbr.bw_inuse = rack->r_ctl.current_round;
2849 log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
2850 TCP_LOG_EVENTP(rack->rc_tp, NULL,
2851 &rack->rc_inp->inp_socket->so_rcv,
2852 &rack->rc_inp->inp_socket->so_snd,
2866 log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
2869 log.u_bbr.flex3 = rack->r_ctl.rc_rack_min_rtt;
2870 log.u_bbr.flex4 = rack->r_ctl.rack_rs.rs_rtt_lowest;
2871 log.u_bbr.flex5 = rack->r_ctl.rack_rs.rs_rtt_highest;
2872 log.u_bbr.flex6 = rack->r_ctl.rack_rs.rs_us_rtrcnt;
2874 log.u_bbr.rttProp = (uint64_t)rack->r_ctl.rack_rs.rs_rtt_tot;
2875 log.u_bbr.flex8 = rack->r_ctl.rc_rate_sample_method;
2877 log.u_bbr.delivered = rack->r_ctl.rack_rs.rs_us_rtrcnt;
2878 log.u_bbr.pkts_out = rack->r_ctl.rack_rs.rs_flags;
2879 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
2881 log.u_bbr.pkt_epoch = rsm->r_start;
2882 log.u_bbr.lost = rsm->r_end;
2883 log.u_bbr.cwnd_gain = rsm->r_rtr_cnt;
2885 log.u_bbr.pacing_gain = (uint16_t)rsm->r_flags;
2888 log.u_bbr.pkt_epoch = rack->rc_tp->iss;
2894 log.u_bbr.use_lt_bw = rack->rc_highly_buffered;
2896 log.u_bbr.use_lt_bw |= rack->forced_ack;
2898 log.u_bbr.use_lt_bw |= rack->rc_gp_dyn_mul;
2900 log.u_bbr.use_lt_bw |= rack->in_probe_rtt;
2902 log.u_bbr.use_lt_bw |= rack->measure_saw_probe_rtt;
2904 log.u_bbr.use_lt_bw |= rack->app_limited_needs_set;
2906 log.u_bbr.use_lt_bw |= rack->rc_gp_filled;
2908 log.u_bbr.use_lt_bw |= rack->rc_dragged_bottom;
2909 log.u_bbr.applimited = rack->r_ctl.rc_target_probertt_flight;
2910 log.u_bbr.epoch = rack->r_ctl.rc_time_probertt_starts;
2911 log.u_bbr.lt_epoch = rack->r_ctl.rc_time_probertt_entered;
2912 log.u_bbr.cur_del_rate = rack->r_ctl.rc_lower_rtt_us_cts;
2913 log.u_bbr.delRate = rack->r_ctl.rc_gp_srtt;
2914 log.u_bbr.bw_inuse = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time);
2917 log.u_bbr.bw_inuse |= ((uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)]);
2919 &rack->rc_inp->inp_socket->so_rcv,
2920 &rack->rc_inp->inp_socket->so_snd,
2936 if (tcp_bblogging_on(rack->rc_tp)) {
2943 log.u_bbr.flex6 = rack->rc_tp->t_rxtcur;
2946 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
2947 log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
2948 log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
2949 log.u_bbr.pacing_gain = rack->r_must_retran;
2956 log.u_bbr.delRate = rack->r_ctl.rack_rs.confidence;
2958 log.u_bbr.delRate |= rack->r_ctl.rack_rs.rs_us_rtt;
2962 log.u_bbr.lt_epoch = rack->r_ctl.timer_slop;
2965 log.u_bbr.rttProp = RACK_REXMTVAL(rack->rc_tp);
2966 log.u_bbr.bw_inuse = rack->r_ctl.act_rcv_time.tv_sec;
2968 log.u_bbr.bw_inuse += rack->r_ctl.act_rcv_time.tv_usec;
2969 TCP_LOG_EVENTP(rack->rc_tp, NULL,
2970 &rack->rc_inp->inp_socket->so_rcv,
2971 &rack->rc_inp->inp_socket->so_snd,
2980 if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
2992 log.u_bbr.bw_inuse = rack->r_ctl.current_round;
2994 log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
2995 TCP_LOG_EVENTP(rack->rc_tp, NULL,
2996 &rack->rc_inp->inp_socket->so_rcv,
2997 &rack->rc_inp->inp_socket->so_snd,
3007 if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
3019 log.u_bbr.bw_inuse = rack->r_ctl.current_round;
3021 log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
3022 TCP_LOG_EVENTP(rack->rc_tp, NULL,
3023 &rack->rc_inp->inp_socket->so_rcv,
3024 &rack->rc_inp->inp_socket->so_snd,
3034 if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
3039 log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
3042 log.u_bbr.flex3 = tp->t_maxunacktime;
3043 log.u_bbr.flex4 = tp->t_acktime;
3046 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
3047 log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
3048 log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
3049 log.u_bbr.pacing_gain = rack->r_must_retran;
3050 log.u_bbr.bw_inuse = rack->r_ctl.current_round;
3052 log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
3054 &rack->rc_inp->inp_socket->so_rcv,
3055 &rack->rc_inp->inp_socket->so_snd,
3064 if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
3068 log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
3070 if (rack->rack_no_prr)
3073 log.u_bbr.flex2 = rack->r_ctl.rc_prr_sndcnt;
3074 log.u_bbr.flex4 = rack->r_ctl.rc_hpts_flags;
3076 log.u_bbr.flex7 = (0x0000ffff & rack->r_ctl.rc_hpts_flags);
3077 log.u_bbr.flex8 = rack->rc_in_persist;
3079 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
3080 log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
3081 log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
3082 log.u_bbr.pacing_gain = rack->r_must_retran;
3083 TCP_LOG_EVENTP(rack->rc_tp, NULL,
3084 &rack->rc_inp->inp_socket->so_rcv,
3085 &rack->rc_inp->inp_socket->so_snd,
3094 if (tcp_bblogging_on(rack->rc_tp)) {
3102 log.u_bbr.flex4 = rack->r_ctl.rc_hpts_flags;
3103 if (rack->rack_no_prr)
3106 log.u_bbr.flex5 = rack->r_ctl.rc_prr_sndcnt;
3108 log.u_bbr.applimited = rack->r_ctl.rc_pace_min_segs;
3109 log.u_bbr.flex7 = rack->rc_ack_can_sendout_data; /* Do we have ack-can-send set */
3111 log.u_bbr.flex7 |= rack->r_fast_output; /* is fast output primed */
3113 log.u_bbr.flex7 |= rack->r_wanted_output; /* Do we want output */
3114 log.u_bbr.flex8 = rack->rc_in_persist;
3115 log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
3117 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
3118 log.u_bbr.use_lt_bw = rack->r_ent_rec_ns;
3120 log.u_bbr.use_lt_bw |= rack->r_might_revert;
3121 log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
3122 log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
3123 log.u_bbr.pacing_gain = rack->r_must_retran;
3124 log.u_bbr.bw_inuse = rack->r_ctl.current_round;
3126 log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
3127 log.u_bbr.epoch = rack->rc_inp->inp_socket->so_snd.sb_hiwat;
3128 log.u_bbr.lt_epoch = rack->rc_inp->inp_socket->so_rcv.sb_hiwat;
3129 log.u_bbr.lost = rack->rc_tp->t_srtt;
3130 log.u_bbr.pkt_epoch = rack->rc_tp->rfbuf_cnt;
3131 TCP_LOG_EVENTP(rack->rc_tp, NULL,
3132 &rack->rc_inp->inp_socket->so_rcv,
3133 &rack->rc_inp->inp_socket->so_snd,
3142 if (tcp_bblogging_on(rack->rc_tp)) {
3147 log.u_bbr.flex1 = rack->r_ctl.rc_pace_min_segs;
3148 log.u_bbr.flex3 = rack->r_ctl.rc_pace_max_segs;
3151 log.u_bbr.flex7 = rack->r_ctl.rc_user_set_min_segs;
3155 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
3156 log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
3157 log.u_bbr.applimited = rack->r_ctl.rc_sacked;
3158 log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
3159 log.u_bbr.pacing_gain = rack->r_must_retran;
3160 TCP_LOG_EVENTP(tp, NULL, &tptosocket(tp)->so_rcv,
3161 &tptosocket(tp)->so_snd,
3170 if (tcp_bblogging_on(rack->rc_tp)) {
3175 log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
3177 log.u_bbr.flex2 = rack->r_ctl.rc_hpts_flags;
3179 if (rack->rack_no_prr)
3182 log.u_bbr.flex5 = rack->r_ctl.rc_prr_sndcnt;
3184 log.u_bbr.flex8 = rack->rc_in_persist;
3187 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
3188 log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
3189 log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
3190 log.u_bbr.pacing_gain = rack->r_must_retran;
3191 log.u_bbr.cwnd_gain = rack->rc_has_collapsed;
3192 log.u_bbr.bw_inuse = rack->r_ctl.current_round;
3194 log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
3195 TCP_LOG_EVENTP(rack->rc_tp, NULL,
3196 &rack->rc_inp->inp_socket->so_rcv,
3197 &rack->rc_inp->inp_socket->so_snd,
3207 if (tcp_bblogging_on(rack->rc_tp)) {
3211 log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
3213 log.u_bbr.flex2 = rack->r_ctl.rc_last_output_to;
3216 if (rack->rack_no_prr)
3219 log.u_bbr.flex5 = rack->r_ctl.rc_prr_sndcnt;
3220 log.u_bbr.flex6 = rack->rc_tp->t_rxtcur;
3223 log.u_bbr.applimited = rack->r_ctl.rc_hpts_flags;
3225 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
3226 log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
3227 log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
3228 log.u_bbr.pacing_gain = rack->r_must_retran;
3229 log.u_bbr.bw_inuse = rack->r_ctl.current_round;
3231 log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
3232 TCP_LOG_EVENTP(rack->rc_tp, NULL,
3233 &rack->rc_inp->inp_socket->so_rcv,
3234 &rack->rc_inp->inp_socket->so_snd,
3247 if (tcp_bblogging_on(rack->rc_tp)) {
3265 TCP_LOG_EVENTP(rack->rc_tp, NULL,
3266 &rack->rc_inp->inp_socket->so_rcv,
3267 &rack->rc_inp->inp_socket->so_snd,
3276 if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
3283 log.u_bbr.flex3 = rack->r_ctl.rc_timer_exp;
3284 log.u_bbr.flex4 = rack->r_ctl.rc_hpts_flags;
3286 if (rack->rack_no_prr)
3289 log.u_bbr.flex6 = rack->r_ctl.rc_prr_sndcnt;
3290 log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
3291 log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
3292 log.u_bbr.pacing_gain = rack->r_must_retran;
3294 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
3295 TCP_LOG_EVENTP(rack->rc_tp, NULL,
3296 &rack->rc_inp->inp_socket->so_rcv,
3297 &rack->rc_inp->inp_socket->so_snd,
3306 if (tcp_bblogging_on(rack->rc_tp)) {
3311 log.u_bbr.flex1 = rack->r_ctl.rc_prr_out;
3312 log.u_bbr.flex2 = rack->r_ctl.rc_prr_recovery_fs;
3313 if (rack->rack_no_prr)
3316 log.u_bbr.flex3 = rack->r_ctl.rc_prr_sndcnt;
3317 log.u_bbr.flex4 = rack->r_ctl.rc_prr_delivered;
3318 log.u_bbr.flex5 = rack->r_ctl.rc_sacked;
3319 log.u_bbr.flex6 = rack->r_ctl.rc_holes_rxt;
3324 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
3325 log.u_bbr.use_lt_bw = rack->r_ent_rec_ns;
3327 log.u_bbr.use_lt_bw |= rack->r_might_revert;
3328 TCP_LOG_EVENTP(rack->rc_tp, NULL,
3329 &rack->rc_inp->inp_socket->so_rcv,
3330 &rack->rc_inp->inp_socket->so_snd,
3408 if (rack->rc_free_cnt > rack_free_cache) {
3409 rsm = TAILQ_FIRST(&rack->r_ctl.rc_free);
3410 TAILQ_REMOVE(&rack->r_ctl.rc_free, rsm, r_tnext);
3412 rack->rc_free_cnt--;
3422 rack->r_ctl.rc_num_maps_alloced++;
3430 if (rack->rc_free_cnt) {
3432 rsm = TAILQ_FIRST(&rack->r_ctl.rc_free);
3433 TAILQ_REMOVE(&rack->r_ctl.rc_free, rsm, r_tnext);
3434 rack->rc_free_cnt--;
3444 (rack->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
3446 if (!rack->alloc_limit_reported) {
3447 rack->alloc_limit_reported = 1;
3463 if (rack->r_ctl.rc_split_limit > 0 &&
3464 rack->r_ctl.rc_num_split_allocs >= rack->r_ctl.rc_split_limit) {
3466 if (!rack->alloc_limit_reported) {
3467 rack->alloc_limit_reported = 1;
3477 rsm->r_limit_type = limit_type;
3478 rack->r_ctl.rc_num_split_allocs++;
3492 while (rack->rc_free_cnt > rack_free_cache) {
3493 rsm = TAILQ_LAST(&rack->r_ctl.rc_free, rack_head);
3494 TAILQ_REMOVE(&rack->r_ctl.rc_free, rsm, r_tnext);
3495 rack->rc_free_cnt--;
3496 rack->r_ctl.rc_num_maps_alloced--;
3504 if (rsm->r_flags & RACK_APP_LIMITED) {
3505 if (rack->r_ctl.rc_app_limited_cnt > 0) {
3506 rack->r_ctl.rc_app_limited_cnt--;
3509 if (rsm->r_limit_type) {
3511 rack->r_ctl.rc_num_split_allocs--;
3513 if (rsm == rack->r_ctl.rc_first_appl) {
3514 rack->r_ctl.cleared_app_ack_seq = rsm->r_start + (rsm->r_end - rsm->r_start);
3515 rack->r_ctl.cleared_app_ack = 1;
3516 if (rack->r_ctl.rc_app_limited_cnt == 0)
3517 rack->r_ctl.rc_first_appl = NULL;
3519 rack->r_ctl.rc_first_appl = tqhash_find(rack->r_ctl.tqh, rsm->r_nseq_appl);
3521 if (rsm == rack->r_ctl.rc_resend)
3522 rack->r_ctl.rc_resend = NULL;
3523 if (rsm == rack->r_ctl.rc_end_appl)
3524 rack->r_ctl.rc_end_appl = NULL;
3525 if (rack->r_ctl.rc_tlpsend == rsm)
3526 rack->r_ctl.rc_tlpsend = NULL;
3527 if (rack->r_ctl.rc_sacklast == rsm)
3528 rack->r_ctl.rc_sacklast = NULL;
3531 if ((rack->rc_free_cnt + 1) > RACK_FREE_CNT_MAX) {
3534 TAILQ_INSERT_HEAD(&rack->r_ctl.rc_free, rsm, r_tnext);
3535 rack->rc_free_cnt++;
3544 segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
3546 if (rack->rc_gp_filled == 0) {
3583 srtt = (uint64_t)tp->t_srtt;
3625 if (SEQ_LT(th_ack, tp->gput_seq)) {
3629 if ((tp->snd_max == tp->snd_una) ||
3630 (th_ack == tp->snd_max)){
3644 if (SEQ_GEQ(th_ack, tp->gput_ack)) {
3654 segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
3655 if (SEQ_LT(th_ack, tp->gput_ack) &&
3656 ((th_ack - tp->gput_seq) < max(rc_init_window(rack), (MIN_GP_WIN * segsiz)))) {
3660 if (rack->r_ctl.rc_first_appl &&
3661 (SEQ_GEQ(th_ack, rack->r_ctl.rc_first_appl->r_end))) {
3670 srtts = (rack->r_ctl.rc_gp_srtt * rack_min_srtts);
3671 tim = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time) - tp->gput_ts;
3672 if ((tim >= srtts) && (IN_RECOVERY(rack->rc_tp->t_flags) == 0)) {
3691 if (tcp_bblogging_on(rack->rc_tp)) {
3697 log.u_bbr.flex2 = rack->rc_gp_timely_inc_cnt;
3699 log.u_bbr.flex2 |= rack->rc_gp_timely_dec_cnt;
3701 log.u_bbr.flex2 |= rack->rc_gp_incr;
3703 log.u_bbr.flex2 |= rack->rc_gp_bwred;
3704 log.u_bbr.flex3 = rack->rc_gp_incr;
3705 log.u_bbr.flex4 = rack->r_ctl.rack_per_of_gp_ss;
3706 log.u_bbr.flex5 = rack->r_ctl.rack_per_of_gp_ca;
3707 log.u_bbr.flex6 = rack->r_ctl.rack_per_of_gp_rec;
3708 log.u_bbr.flex7 = rack->rc_gp_bwred;
3715 log.u_bbr.pkts_out = rack->r_ctl.rc_rtt_diff;
3717 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
3718 log.u_bbr.epoch = rack->r_ctl.rc_gp_srtt;
3719 log.u_bbr.lt_epoch = rack->r_ctl.rc_prev_gp_srtt;
3720 log.u_bbr.cwnd_gain = rack->rc_dragged_bottom;
3722 log.u_bbr.cwnd_gain |= rack->rc_gp_saw_rec;
3724 log.u_bbr.cwnd_gain |= rack->rc_gp_saw_ss;
3726 log.u_bbr.cwnd_gain |= rack->rc_gp_saw_ca;
3727 log.u_bbr.lost = rack->r_ctl.rc_loss_count;
3728 TCP_LOG_EVENTP(rack->rc_tp, NULL,
3729 &rack->rc_inp->inp_socket->so_rcv,
3730 &rack->rc_inp->inp_socket->so_snd,
3817 if (rack->r_ctl.rack_per_of_gp_rec < 100) {
3819 rack->r_ctl.rack_per_of_gp_rec = 100;
3821 if (rack->r_ctl.rack_per_of_gp_ca < 100) {
3822 rack->r_ctl.rack_per_of_gp_ca = 100;
3824 if (rack->r_ctl.rack_per_of_gp_ss < 100) {
3825 rack->r_ctl.rack_per_of_gp_ss = 100;
3832 if (rack->r_ctl.rack_per_of_gp_ca > 100) {
3833 rack->r_ctl.rack_per_of_gp_ca = 100;
3835 if (rack->r_ctl.rack_per_of_gp_ss > 100) {
3836 rack->r_ctl.rack_per_of_gp_ss = 100;
3847 if (rack->rc_skip_timely)
3854 * to a new-reno flow.
3859 if (rack->rc_gp_incr &&
3860 ((rack->rc_gp_timely_inc_cnt + 1) >= RACK_TIMELY_CNT_BOOST)) {
3867 rack->rc_gp_timely_inc_cnt = 0;
3872 ((rack->r_ctl.rc_rtt_diff <= 0) || (timely_says <= 0)))
3874 if (rack->rc_gp_saw_rec &&
3875 (rack->rc_gp_no_rec_chg == 0) &&
3877 rack->r_ctl.rack_per_of_gp_rec)) {
3879 calc = rack->r_ctl.rack_per_of_gp_rec + plus;
3883 rack->r_ctl.rack_per_of_gp_rec = (uint16_t)calc;
3884 if (rack->r_ctl.rack_per_upper_bound_ca &&
3885 (rack->rc_dragged_bottom == 0) &&
3886 (rack->r_ctl.rack_per_of_gp_rec > rack->r_ctl.rack_per_upper_bound_ca))
3887 rack->r_ctl.rack_per_of_gp_rec = rack->r_ctl.rack_per_upper_bound_ca;
3889 if (rack->rc_gp_saw_ca &&
3890 (rack->rc_gp_saw_ss == 0) &&
3892 rack->r_ctl.rack_per_of_gp_ca)) {
3894 calc = rack->r_ctl.rack_per_of_gp_ca + plus;
3898 rack->r_ctl.rack_per_of_gp_ca = (uint16_t)calc;
3899 if (rack->r_ctl.rack_per_upper_bound_ca &&
3900 (rack->rc_dragged_bottom == 0) &&
3901 (rack->r_ctl.rack_per_of_gp_ca > rack->r_ctl.rack_per_upper_bound_ca))
3902 rack->r_ctl.rack_per_of_gp_ca = rack->r_ctl.rack_per_upper_bound_ca;
3904 if (rack->rc_gp_saw_ss &&
3906 rack->r_ctl.rack_per_of_gp_ss)) {
3908 calc = rack->r_ctl.rack_per_of_gp_ss + plus;
3911 rack->r_ctl.rack_per_of_gp_ss = (uint16_t)calc;
3912 if (rack->r_ctl.rack_per_upper_bound_ss &&
3913 (rack->rc_dragged_bottom == 0) &&
3914 (rack->r_ctl.rack_per_of_gp_ss > rack->r_ctl.rack_per_upper_bound_ss))
3915 rack->r_ctl.rack_per_of_gp_ss = rack->r_ctl.rack_per_upper_bound_ss;
3919 (rack->rc_gp_incr == 0)){
3921 rack->rc_gp_incr = 1;
3922 rack->rc_gp_timely_inc_cnt = 0;
3924 if (rack->rc_gp_incr &&
3926 (rack->rc_gp_timely_inc_cnt < RACK_TIMELY_CNT_BOOST)) {
3927 rack->rc_gp_timely_inc_cnt++;
3936 /*-
3938 * new_per = curper * (1 - B * norm_grad)
3941 * rtt_dif = input var current rtt-diff
3954 * (uint64_t)get_filter_small(&rack->r_ctl.rc_gp_min_rtt));
3957 * reduce_by = (1000000 - inverse);
3963 perf = (((uint64_t)curper * ((uint64_t)1000000 -
3966 (uint64_t)get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt)))/
3971 perf = curper - 1;
3981 * result = curper * (1 - (B * ( 1 - ------ ))
3990 highrttthresh = get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt) * rack_gp_rtt_maxmul;
3992 perf = (((uint64_t)curper * ((uint64_t)1000000 -
3993 ((uint64_t)rack_gp_decrease_per * ((uint64_t)1000000 -
3996 if (tcp_bblogging_on(rack->rc_tp)) {
4019 if (rack->rc_skip_timely)
4021 if (rack->rc_gp_incr) {
4023 rack->rc_gp_incr = 0;
4024 rack->rc_gp_timely_inc_cnt = 0;
4030 rtt_diff *= -1;
4033 if (rack->rc_gp_saw_rec && (rack->rc_gp_no_rec_chg == 0)) {
4036 new_per = rack_decrease_highrtt(rack, rack->r_ctl.rack_per_of_gp_rec, rtt);
4037 alt = rack_get_decrease(rack, rack->r_ctl.rack_per_of_gp_rec, rtt_diff);
4043 val = new_per = alt = rack_get_decrease(rack, rack->r_ctl.rack_per_of_gp_rec, rtt_diff);
4044 if (rack->r_ctl.rack_per_of_gp_rec > val) {
4045 rec_red = (rack->r_ctl.rack_per_of_gp_rec - val);
4046 rack->r_ctl.rack_per_of_gp_rec = (uint16_t)val;
4048 rack->r_ctl.rack_per_of_gp_rec = rack_per_lower_bound;
4051 if (rack_per_lower_bound > rack->r_ctl.rack_per_of_gp_rec)
4052 rack->r_ctl.rack_per_of_gp_rec = rack_per_lower_bound;
4055 if (rack->rc_gp_saw_ss) {
4058 new_per = rack_decrease_highrtt(rack, rack->r_ctl.rack_per_of_gp_ss, rtt);
4059 alt = rack_get_decrease(rack, rack->r_ctl.rack_per_of_gp_ss, rtt_diff);
4065 val = new_per = alt = rack_get_decrease(rack, rack->r_ctl.rack_per_of_gp_ss, rtt_diff);
4066 if (rack->r_ctl.rack_per_of_gp_ss > new_per) {
4067 ss_red = rack->r_ctl.rack_per_of_gp_ss - val;
4068 rack->r_ctl.rack_per_of_gp_ss = (uint16_t)val;
4071 rack->r_ctl.rack_per_of_gp_ss = rack_per_lower_bound;
4080 logvar3 |= get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt);
4085 if (rack_per_lower_bound > rack->r_ctl.rack_per_of_gp_ss)
4086 rack->r_ctl.rack_per_of_gp_ss = rack_per_lower_bound;
4088 } else if (rack->rc_gp_saw_ca) {
4091 new_per = rack_decrease_highrtt(rack, rack->r_ctl.rack_per_of_gp_ca, rtt);
4092 alt = rack_get_decrease(rack, rack->r_ctl.rack_per_of_gp_ca, rtt_diff);
4098 val = new_per = alt = rack_get_decrease(rack, rack->r_ctl.rack_per_of_gp_ca, rtt_diff);
4099 if (rack->r_ctl.rack_per_of_gp_ca > val) {
4100 ca_red = rack->r_ctl.rack_per_of_gp_ca - val;
4101 rack->r_ctl.rack_per_of_gp_ca = (uint16_t)val;
4103 rack->r_ctl.rack_per_of_gp_ca = rack_per_lower_bound;
4113 logvar3 |= get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt);
4118 if (rack_per_lower_bound > rack->r_ctl.rack_per_of_gp_ca)
4119 rack->r_ctl.rack_per_of_gp_ca = rack_per_lower_bound;
4122 if (rack->rc_gp_timely_dec_cnt < 0x7) {
4123 rack->rc_gp_timely_dec_cnt++;
4125 (rack->rc_gp_timely_dec_cnt == rack_timely_dec_clear))
4126 rack->rc_gp_timely_dec_cnt = 0;
4139 if (tcp_bblogging_on(rack->rc_tp)) {
4145 log.u_bbr.flex2 = rack->r_ctl.rc_time_probertt_starts;
4146 log.u_bbr.flex3 = rack->r_ctl.rc_lower_rtt_us_cts;
4147 log.u_bbr.flex4 = rack->r_ctl.rack_per_of_gp_ss;
4149 log.u_bbr.flex6 = rack->rc_highly_buffered;
4151 log.u_bbr.flex6 |= rack->forced_ack;
4153 log.u_bbr.flex6 |= rack->rc_gp_dyn_mul;
4155 log.u_bbr.flex6 |= rack->in_probe_rtt;
4157 log.u_bbr.flex6 |= rack->measure_saw_probe_rtt;
4158 log.u_bbr.flex7 = rack->r_ctl.rack_per_of_gp_probertt;
4159 log.u_bbr.pacing_gain = rack->r_ctl.rack_per_of_gp_ca;
4160 log.u_bbr.cwnd_gain = rack->r_ctl.rack_per_of_gp_rec;
4164 log.u_bbr.cur_del_rate = rack->r_ctl.rc_highest_us_rtt;
4166 log.u_bbr.cur_del_rate |= rack->r_ctl.rc_lowest_us_rtt;
4167 log.u_bbr.applimited = rack->r_ctl.rc_time_probertt_entered;
4168 log.u_bbr.pkts_out = rack->r_ctl.rc_rtt_diff;
4169 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
4170 log.u_bbr.epoch = rack->r_ctl.rc_gp_srtt;
4171 log.u_bbr.lt_epoch = rack->r_ctl.rc_prev_gp_srtt;
4172 log.u_bbr.pkt_epoch = rack->r_ctl.rc_lower_rtt_us_cts;
4173 log.u_bbr.delivered = rack->r_ctl.rc_target_probertt_flight;
4174 log.u_bbr.lost = get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt);
4177 log.u_bbr.rttProp |= rack->r_ctl.rc_entry_gp_rtt;
4178 TCP_LOG_EVENTP(rack->rc_tp, NULL,
4179 &rack->rc_inp->inp_socket->so_rcv,
4180 &rack->rc_inp->inp_socket->so_snd,
4182 0, &log, false, &rack->r_ctl.act_rcv_time);
4194 rack->r_ctl.rc_target_probertt_flight = roundup((uint32_t)bwdp, segsiz);
4195 if (rack->r_ctl.rc_target_probertt_flight < (segsiz * rack_timely_min_segs)) {
4201 rack->r_ctl.rc_target_probertt_flight = (segsiz * rack_timely_min_segs);
4222 rack->r_ctl.rc_lower_rtt_us_cts = us_cts;
4223 if (rack->rc_gp_dyn_mul == 0)
4226 if (rack->rc_tp->snd_max == rack->rc_tp->snd_una) {
4230 if ((rack->rc_tp->t_flags & TF_GPUTINPROG) &&
4231 SEQ_GT(rack->rc_tp->snd_una, rack->rc_tp->gput_seq)) {
4239 rack_do_goodput_measurement(rack->rc_tp, rack,
4240 rack->rc_tp->snd_una, __LINE__,
4243 rack->r_ctl.rack_per_of_gp_probertt = rack_per_of_gp_probertt;
4244 rack->r_ctl.rc_time_probertt_entered = us_cts;
4245 segsiz = min(ctf_fixed_maxseg(rack->rc_tp),
4246 rack->r_ctl.rc_pace_min_segs);
4247 rack->in_probe_rtt = 1;
4248 rack->measure_saw_probe_rtt = 1;
4249 rack->r_ctl.rc_time_probertt_starts = 0;
4250 rack->r_ctl.rc_entry_gp_rtt = rack->r_ctl.rc_gp_srtt;
4252 rack_set_prtt_target(rack, segsiz, get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt));
4254 rack_set_prtt_target(rack, segsiz, rack->r_ctl.rc_gp_srtt);
4255 rack_log_rtt_shrinks(rack, us_cts, get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt),
4265 segsiz = min(ctf_fixed_maxseg(rack->rc_tp),
4266 rack->r_ctl.rc_pace_min_segs);
4267 rack->in_probe_rtt = 0;
4268 if ((rack->rc_tp->t_flags & TF_GPUTINPROG) &&
4269 SEQ_GT(rack->rc_tp->snd_una, rack->rc_tp->gput_seq)) {
4277 rack_do_goodput_measurement(rack->rc_tp, rack,
4278 rack->rc_tp->snd_una, __LINE__,
4280 } else if (rack->rc_tp->t_flags & TF_GPUTINPROG) {
4284 * probe-rtt. We probably are not interested in
4287 rack->rc_tp->t_flags &= ~TF_GPUTINPROG;
4293 * We need to mark these as app-limited so we
4296 rsm = tqhash_max(rack->r_ctl.tqh);
4297 if (rsm && ((rsm->r_flags & RACK_APP_LIMITED) == 0)) {
4298 if (rack->r_ctl.rc_app_limited_cnt == 0)
4299 rack->r_ctl.rc_end_appl = rack->r_ctl.rc_first_appl = rsm;
4306 if (rack->r_ctl.rc_end_appl)
4307 rack->r_ctl.rc_end_appl->r_nseq_appl = rsm->r_start;
4308 rack->r_ctl.rc_end_appl = rsm;
4310 rsm->r_flags |= RACK_APP_LIMITED;
4311 rack->r_ctl.rc_app_limited_cnt++;
4323 rack->rc_gp_incr = 0;
4324 rack->rc_gp_bwred = 0;
4325 rack->rc_gp_timely_inc_cnt = 0;
4326 rack->rc_gp_timely_dec_cnt = 0;
4329 if (rack->rc_highly_buffered && rack_atexit_prtt_hbp) {
4330 rack->r_ctl.rack_per_of_gp_ca = rack_atexit_prtt_hbp;
4331 rack->r_ctl.rack_per_of_gp_ss = rack_atexit_prtt_hbp;
4333 if ((rack->rc_highly_buffered == 0) && rack_atexit_prtt) {
4334 rack->r_ctl.rack_per_of_gp_ca = rack_atexit_prtt;
4335 rack->r_ctl.rack_per_of_gp_ss = rack_atexit_prtt;
4341 rack->r_ctl.rc_rtt_diff = 0;
4344 rack->rc_tp->t_bytes_acked = 0;
4345 rack->rc_tp->t_ccv.flags &= ~CCF_ABC_SENTAWND;
4358 get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt));
4362 rack->r_ctl.rc_gp_srtt);
4366 rack->r_ctl.rc_entry_gp_rtt);
4371 sum = rack->r_ctl.rc_entry_gp_rtt;
4373 sum /= (uint64_t)(max(1, rack->r_ctl.rc_gp_srtt));
4381 setval = rack->r_ctl.rc_entry_gp_rtt;
4388 setval = rack->r_ctl.rc_gp_srtt;
4389 if (setval > rack->r_ctl.rc_entry_gp_rtt)
4390 setval = rack->r_ctl.rc_entry_gp_rtt;
4397 setval = get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt);
4404 ebdp = rack->r_ctl.rc_target_probertt_flight;
4407 setto = rack->r_ctl.rc_target_probertt_flight + ebdp;
4409 setto = rack->r_ctl.rc_target_probertt_flight;
4410 rack->rc_tp->snd_cwnd = roundup(setto, segsiz);
4411 if (rack->rc_tp->snd_cwnd < (segsiz * rack_timely_min_segs)) {
4413 rack->rc_tp->snd_cwnd = segsiz * rack_timely_min_segs;
4416 rack->rc_tp->snd_ssthresh = (rack->rc_tp->snd_cwnd - 1);
4419 get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt),
4422 rack->r_ctl.rc_probertt_sndmax_atexit = rack->rc_tp->snd_max;
4423 rack->r_ctl.rc_time_probertt_entered = us_cts;
4424 rack->r_ctl.rc_time_probertt_starts = rack->r_ctl.rc_lower_rtt_us_cts = us_cts;
4425 rack->r_ctl.rc_time_of_last_probertt = us_cts;
4431 /* Check in on probe-rtt */
4433 if (rack->rc_gp_filled == 0) {
4434 /* We do not do p-rtt unless we have gp measurements */
4437 if (rack->in_probe_rtt) {
4441 if (rack->r_ctl.rc_went_idle_time &&
4442 ((us_cts - rack->r_ctl.rc_went_idle_time) > rack_min_probertt_hold)) {
4448 TSTMP_GT(us_cts, rack->r_ctl.rc_time_probertt_entered) &&
4449 ((us_cts - rack->r_ctl.rc_time_probertt_entered) > rack_probe_rtt_safety_val)) {
4454 get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt),
4459 endtime = rack->r_ctl.rc_time_probertt_entered + (rack->r_ctl.rc_gp_srtt * rack_max_drain_wait);
4460 if (rack->rc_highly_buffered)
4461 endtime += (rack->r_ctl.rc_gp_srtt * rack_max_drain_hbp);
4463 must_stay = rack->r_ctl.rc_time_probertt_entered + (rack->r_ctl.rc_gp_srtt * rack_must_drain);
4464 if ((ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked) > rack->r_ctl.rc_target_probertt_flight) &&
4469 if (TSTMP_GT(us_cts, rack->r_ctl.rc_time_probertt_entered))
4470 calc = us_cts - rack->r_ctl.rc_time_probertt_entered;
4473 calc /= max(rack->r_ctl.rc_gp_srtt, 1);
4478 rack->r_ctl.rack_per_of_gp_probertt = rack_per_of_gp_lowthresh;
4480 rack->r_ctl.rack_per_of_gp_probertt = rack_per_of_gp_probertt - calc;
4482 if (rack->r_ctl.rack_per_of_gp_probertt < rack_per_of_gp_lowthresh)
4483 rack->r_ctl.rack_per_of_gp_probertt = rack_per_of_gp_lowthresh;
4488 if (rack->r_ctl.rc_time_probertt_starts == 0) {
4490 rack->rc_highly_buffered) ||
4491 (ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked) >
4492 rack->r_ctl.rc_target_probertt_flight)) {
4497 get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt),
4499 rack->r_ctl.rc_time_probertt_starts = us_cts;
4500 if (rack->r_ctl.rc_time_probertt_starts == 0)
4501 rack->r_ctl.rc_time_probertt_starts = 1;
4503 rack->r_ctl.rack_per_of_gp_probertt = rack_per_of_gp_probertt;
4508 no_overflow = ((uint64_t)rack->r_ctl.rc_gp_srtt *
4515 endtime += rack->r_ctl.rc_time_probertt_starts;
4521 } else if ((rack->rc_skip_timely == 0) &&
4522 (TSTMP_GT(us_cts, rack->r_ctl.rc_lower_rtt_us_cts)) &&
4523 ((us_cts - rack->r_ctl.rc_lower_rtt_us_cts) >= rack_time_between_probertt)) {
4536 if ((rack->rc_gp_dyn_mul == 0) ||
4537 (rack->use_fixed_rate) ||
4538 (rack->in_probe_rtt) ||
4539 (rack->rc_always_pace == 0)) {
4543 losses = rack->r_ctl.rc_loss_count - rack->r_ctl.rc_loss_at_start;
4546 up_bnd = rack->r_ctl.last_gp_comp_bw * (uint64_t)rack_gp_per_bw_mul_up;
4548 up_bnd += rack->r_ctl.last_gp_comp_bw;
4550 subfr = (uint64_t)rack->r_ctl.last_gp_comp_bw * (uint64_t)rack_gp_per_bw_mul_down;
4552 low_bnd = rack->r_ctl.last_gp_comp_bw - subfr;
4553 if ((timely_says == 2) && (rack->r_ctl.rc_no_push_at_mrtt)) {
4566 if (rack->r_ctl.rc_no_push_at_mrtt > 1)
4585 rack->r_ctl.last_gp_comp_bw = cur_bw;
4586 if (rack->rc_gp_bwred == 0) {
4588 rack->rc_gp_bwred = 1;
4589 rack->rc_gp_timely_dec_cnt = 0;
4591 if (rack->rc_gp_timely_dec_cnt < rack_timely_max_push_drop) {
4594 * to try to gain back (we include override to
4597 if ((rack->rc_gp_saw_ca && rack->r_ctl.rack_per_of_gp_ca <= rack_down_raise_thresh) ||
4598 (rack->rc_gp_saw_ss && rack->r_ctl.rack_per_of_gp_ss <= rack_down_raise_thresh) ||
4612 rack->rc_gp_timely_dec_cnt++;
4613 /* We are not incrementing really no-count */
4614 rack->rc_gp_incr = 0;
4615 rack->rc_gp_timely_inc_cnt = 0;
4635 rack->r_ctl.last_gp_comp_bw = cur_bw;
4636 if (rack->rc_gp_saw_ss &&
4637 rack->r_ctl.rack_per_upper_bound_ss &&
4638 (rack->r_ctl.rack_per_of_gp_ss == rack->r_ctl.rack_per_upper_bound_ss)) {
4645 if (rack->rc_gp_saw_ca &&
4646 rack->r_ctl.rack_per_upper_bound_ca &&
4647 (rack->r_ctl.rack_per_of_gp_ca == rack->r_ctl.rack_per_upper_bound_ca)) {
4654 rack->rc_gp_bwred = 0;
4655 rack->rc_gp_timely_dec_cnt = 0;
4657 if ((rack->rc_gp_incr < rack_timely_max_push_rise) || (timely_says == 0)) {
4674 rack->rc_gp_incr = 0;
4675 rack->rc_gp_timely_inc_cnt = 0;
4676 if ((rack->rc_gp_timely_dec_cnt < rack_timely_max_push_drop) &&
4681 rack->rc_gp_timely_dec_cnt++;
4682 /* We are not incrementing really no-count */
4683 rack->rc_gp_incr = 0;
4684 rack->rc_gp_timely_inc_cnt = 0;
4688 rack->rc_gp_bwred = 0;
4689 rack->rc_gp_timely_dec_cnt = 0;
4704 if (rtt >= (get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt) *
4708 log_mult = get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt) * rack_gp_rtt_maxmul;
4712 get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt),
4714 } else if (rtt <= (get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt) +
4715 ((get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt) * rack_gp_rtt_minmul) /
4718 log_mult = get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt) +
4719 ((get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt) * rack_gp_rtt_minmul) /
4725 get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt),
4748 get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt), log_rtt_a_diff, __LINE__, 6);
4753 get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt), log_rtt_a_diff, __LINE__, 7);
4762 if (SEQ_GEQ(rsm->r_start, tp->gput_seq) &&
4763 SEQ_LEQ(rsm->r_end, tp->gput_ack)) {
4768 * |----------------|
4769 * |-----| <or>
4770 * |----|
4771 * <or> |---|
4774 } else if (SEQ_LT(rsm->r_start, tp->gput_seq) &&
4775 SEQ_GT(rsm->r_end, tp->gput_seq)){
4778 * |--------------|
4779 * |-------->|
4782 } else if (SEQ_GEQ(rsm->r_start, tp->gput_seq) &&
4783 SEQ_LT(rsm->r_start, tp->gput_ack) &&
4784 SEQ_GEQ(rsm->r_end, tp->gput_ack)) {
4788 * |--------------|
4789 * |-------->|
4800 if ((tp->t_flags & TF_GPUTINPROG) == 0)
4808 rsm->r_flags |= RACK_IN_GP_WIN;
4810 rsm->r_flags &= ~RACK_IN_GP_WIN;
4819 rsm = tqhash_find(rack->r_ctl.tqh, tp->gput_seq);
4821 rsm = tqhash_min(rack->r_ctl.tqh);
4824 while ((rsm != NULL) && (SEQ_GEQ(tp->gput_ack, rsm->r_start))){
4825 rsm->r_flags &= ~RACK_IN_GP_WIN;
4826 rsm = tqhash_next(rack->r_ctl.tqh, rsm);
4836 if (tp->snd_una == tp->snd_max) {
4840 if (SEQ_GT(tp->gput_seq, tp->snd_una)) {
4847 rsm = tqhash_min(rack->r_ctl.tqh);
4850 if (SEQ_GEQ(rsm->r_end, tp->gput_seq))
4852 rsm = tqhash_next(rack->r_ctl.tqh, rsm);
4860 rsm = tqhash_find(rack->r_ctl.tqh, tp->gput_seq);
4868 * *before* we started our measurment. The rsm, if non-null
4873 rsm = tqhash_next(rack->r_ctl.tqh, rsm);
4876 if (SEQ_GT(rsm->r_end, tp->gput_ack))
4878 rsm = tqhash_next(rack->r_ctl.tqh, rsm);
4885 if (tcp_bblogging_on(rack->rc_tp)) {
4897 log.u_bbr.delRate = rack->r_ctl.gp_bw;
4900 TCP_LOG_EVENTP(rack->rc_tp, NULL,
4901 &rack->rc_inp->inp_socket->so_rcv,
4902 &rack->rc_inp->inp_socket->so_snd,
4904 0, &log, false, &rack->r_ctl.act_rcv_time);
4918 us_cts = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time);
4919 segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
4920 if (TSTMP_GEQ(us_cts, tp->gput_ts))
4921 tim = us_cts - tp->gput_ts;
4924 if (rack->r_ctl.rc_gp_cumack_ts > rack->r_ctl.rc_gp_output_ts)
4925 stim = rack->r_ctl.rc_gp_cumack_ts - rack->r_ctl.rc_gp_output_ts;
4940 rack_log_gpset(rack, th_ack, us_cts, rack->r_ctl.rc_gp_cumack_ts, __LINE__, 3, NULL);
4952 if (rack->r_ctl.rc_gp_lowrtt == 0xffffffff) {
4986 rack->r_ctl.last_max_bw = rack->r_ctl.rc_gp_high_rwnd;
4987 rack->r_ctl.last_max_bw *= HPTS_USEC_IN_SEC;
4988 rack->r_ctl.last_max_bw /= rack->r_ctl.rc_gp_lowrtt;
4989 if (SEQ_LT(th_ack, tp->gput_seq)) {
4997 bytes = (th_ack - tp->gput_seq);
5008 if (rack->rc_gp_filled == 0) {
5017 * IW - 2MSS.
5019 reqbytes -= (2 * segsiz);
5021 rack->r_ctl.rc_prev_gp_srtt = rack->r_ctl.rc_gp_srtt;
5023 if ((bytes_ps < reqbytes) || rack->app_limited_needs_set) {
5025 rack->r_ctl.rc_app_limited_cnt,
5033 new_rtt_diff = (int32_t)rack->r_ctl.rc_gp_srtt - (int32_t)rack->r_ctl.rc_prev_gp_srtt;
5034 if (rack->rc_gp_filled == 0) {
5036 rack->r_ctl.rc_rtt_diff = new_rtt_diff;
5038 if (rack->measure_saw_probe_rtt == 0) {
5045 rack->r_ctl.rc_rtt_diff -= (rack->r_ctl.rc_rtt_diff / 8);
5046 rack->r_ctl.rc_rtt_diff += (new_rtt_diff / 8);
5050 rack->r_ctl.rc_gp_srtt,
5051 rack->r_ctl.rc_rtt_diff,
5052 rack->r_ctl.rc_prev_gp_srtt
5056 if (bytes_ps > rack->r_ctl.last_max_bw) {
5067 bytes_ps, rack->r_ctl.last_max_bw, 0,
5069 bytes_ps = rack->r_ctl.last_max_bw;
5072 if (rack->rc_gp_filled == 0) {
5075 rack->r_ctl.gp_bw = bytes_ps;
5076 rack->rc_gp_filled = 1;
5077 rack->r_ctl.num_measurements = 1;
5078 rack_set_pace_segments(rack->rc_tp, rack, __LINE__, NULL);
5081 rack->r_ctl.rc_app_limited_cnt,
5084 if (tcp_in_hpts(rack->rc_tp) &&
5085 (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT)) {
5088 * where we transition from un-paced to paced.
5094 tcp_hpts_remove(rack->rc_tp);
5095 rack->r_ctl.rc_hpts_flags = 0;
5096 rack->r_ctl.rc_last_output_to = 0;
5099 } else if (rack->r_ctl.num_measurements < RACK_REQ_AVG) {
5101 rack->r_ctl.gp_bw += bytes_ps;
5102 addpart = rack->r_ctl.num_measurements;
5103 rack->r_ctl.num_measurements++;
5104 if (rack->r_ctl.num_measurements >= RACK_REQ_AVG) {
5106 rack->r_ctl.gp_bw /= (uint64_t)rack->r_ctl.num_measurements;
5125 if (rack->r_ctl.num_measurements < 0xff) {
5126 rack->r_ctl.num_measurements++;
5128 srtt = (uint64_t)tp->t_srtt;
5133 if (rack->r_ctl.rc_rack_min_rtt)
5134 srtt = rack->r_ctl.rc_rack_min_rtt;
5149 * and non-dynamic... but considering lots of folks
5154 if (rack->rc_gp_dyn_mul == 0) {
5155 subpart = rack->r_ctl.gp_bw * utim;
5157 if (subpart < (rack->r_ctl.gp_bw / 2)) {
5176 subpart = rack->r_ctl.gp_bw / 2;
5181 resid_bw = rack->r_ctl.gp_bw - subpart;
5182 rack->r_ctl.gp_bw = resid_bw + addpart;
5194 subpart = rack->r_ctl.gp_bw * utim;
5205 subpart = rack->r_ctl.gp_bw / rack_wma_divisor;
5209 if ((rack->measure_saw_probe_rtt == 0) ||
5210 (bytes_ps > rack->r_ctl.gp_bw)) {
5212 * For probe-rtt we only add it in
5218 resid_bw = rack->r_ctl.gp_bw - subpart;
5219 rack->r_ctl.gp_bw = resid_bw + addpart;
5226 * or first-slowstart that ensues. If we ever needed to watch
5230 if ((rack->rc_initial_ss_comp == 0) &&
5231 (rack->r_ctl.num_measurements >= RACK_REQ_AVG)) {
5235 if (tcp_bblogging_on(rack->rc_tp)) {
5241 log.u_bbr.flex1 = rack->r_ctl.current_round;
5242 log.u_bbr.flex2 = rack->r_ctl.last_rnd_of_gp_rise;
5244 log.u_bbr.cur_del_rate = rack->r_ctl.last_gpest;
5249 if ((rack->r_ctl.num_measurements == RACK_REQ_AVG) ||
5250 (rack->r_ctl.last_gpest == 0)) {
5257 rack->r_ctl.last_rnd_of_gp_rise = rack->r_ctl.current_round;
5258 rack->r_ctl.last_gpest = rack->r_ctl.gp_bw;
5259 } else if (gp_est >= rack->r_ctl.last_gpest) {
5266 gp_est /= rack->r_ctl.last_gpest;
5267 if ((uint32_t)gp_est > rack->r_ctl.gp_gain_req) {
5271 if (tcp_bblogging_on(rack->rc_tp)) {
5277 log.u_bbr.flex1 = rack->r_ctl.current_round;
5279 log.u_bbr.flex3 = rack->r_ctl.gp_gain_req;
5281 log.u_bbr.cur_del_rate = rack->r_ctl.last_gpest;
5286 rack->r_ctl.last_rnd_of_gp_rise = rack->r_ctl.current_round;
5287 if (rack->r_ctl.use_gp_not_last == 1)
5288 rack->r_ctl.last_gpest = rack->r_ctl.gp_bw;
5290 rack->r_ctl.last_gpest = bytes_ps;
5294 if ((rack->gp_ready == 0) &&
5295 (rack->r_ctl.num_measurements >= rack->r_ctl.req_measurements)) {
5297 rack->gp_ready = 1;
5298 if (rack->dgp_on ||
5299 rack->rack_hibeta)
5301 if (rack->defer_options)
5306 /* We do not update any multipliers if we are in or have seen a probe-rtt */
5308 if ((rack->measure_saw_probe_rtt == 0) &&
5309 rack->rc_gp_rtt_set) {
5310 if (rack->rc_skip_timely == 0) {
5312 rack->r_ctl.rc_gp_srtt,
5313 rack->r_ctl.rc_rtt_diff);
5322 rack->r_ctl.gp_bw, /* delRate */
5326 rack->r_ctl.rc_prev_gp_srtt = rack->r_ctl.rc_gp_srtt;
5328 rack->r_ctl.rc_loss_at_start = rack->r_ctl.rc_loss_count;
5334 rack->rc_gp_rtt_set = 0;
5335 rack->rc_gp_saw_rec = 0;
5336 rack->rc_gp_saw_ca = 0;
5337 rack->rc_gp_saw_ss = 0;
5338 rack->rc_dragged_bottom = 0;
5346 stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_GPUT,
5353 if (tp->t_stats_gput_prev > 0)
5354 stats_voi_update_abs_s32(tp->t_stats,
5356 ((gput - tp->t_stats_gput_prev) * 100) /
5357 tp->t_stats_gput_prev);
5359 tp->t_stats_gput_prev = gput;
5361 tp->t_flags &= ~TF_GPUTINPROG;
5366 * We don't do the other case i.e. non-applimited here since
5369 if (rack->r_ctl.rc_first_appl &&
5370 TCPS_HAVEESTABLISHED(tp->t_state) &&
5371 rack->r_ctl.rc_app_limited_cnt &&
5372 (SEQ_GT(rack->r_ctl.rc_first_appl->r_start, th_ack)) &&
5373 ((rack->r_ctl.rc_first_appl->r_end - th_ack) >
5380 rack->r_ctl.rc_gp_lowrtt = 0xffffffff;
5381 rack->r_ctl.rc_gp_high_rwnd = rack->rc_tp->snd_wnd;
5382 tp->gput_ts = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time);
5383 rack->app_limited_needs_set = 0;
5384 tp->gput_seq = th_ack;
5385 if (rack->in_probe_rtt)
5386 rack->measure_saw_probe_rtt = 1;
5387 else if ((rack->measure_saw_probe_rtt) &&
5388 (SEQ_GEQ(tp->gput_seq, rack->r_ctl.rc_probertt_sndmax_atexit)))
5389 rack->measure_saw_probe_rtt = 0;
5390 if ((rack->r_ctl.rc_first_appl->r_end - th_ack) >= rack_get_measure_window(tp, rack)) {
5391 /* There is a full window to gain info from */
5392 tp->gput_ack = tp->gput_seq + rack_get_measure_window(tp, rack);
5395 tp->gput_ack = tp->gput_seq + (rack->r_ctl.rc_first_appl->r_end - th_ack);
5396 if ((tp->gput_ack - tp->gput_seq) < (MIN_GP_WIN * segsiz)) {
5400 tp->t_flags &= ~TF_GPUTINPROG;
5401 rack_log_pacing_delay_calc(rack, tp->gput_ack, tp->gput_seq,
5406 if (tp->t_state >= TCPS_FIN_WAIT_1) {
5412 if (sbavail(&tptosocket(tp)->so_snd) < (tp->gput_ack - tp->gput_seq)) {
5417 tp->t_flags |= TF_GPUTINPROG;
5419 * Now we need to find the timestamp of the send at tp->gput_seq
5422 rack->r_ctl.rc_gp_cumack_ts = 0;
5423 rsm = tqhash_find(rack->r_ctl.tqh, tp->gput_seq);
5425 /* Ok send-based limit is set */
5426 if (SEQ_LT(rsm->r_start, tp->gput_seq)) {
5433 tp->gput_seq = rsm->r_start;
5435 if (rsm->r_flags & RACK_ACKED) {
5438 tp->gput_ts = (uint32_t)rsm->r_ack_arrival;
5439 tp->gput_seq = rsm->r_end;
5440 nrsm = tqhash_next(rack->r_ctl.tqh, rsm);
5444 rack->app_limited_needs_set = 1;
5447 rack->app_limited_needs_set = 1;
5449 rack->r_ctl.rc_gp_output_ts = rsm->r_tim_lastsent[0];
5453 * send-limit set the current time, which
5454 * basically disables the send-limit.
5459 rack->r_ctl.rc_gp_output_ts = rack_to_usec_ts(&tv);
5463 tp->gput_seq,
5464 tp->gput_ack,
5466 tp->gput_ts,
5467 (((uint64_t)rack->r_ctl.rc_app_limited_cnt << 32) | (uint64_t)rack->r_ctl.rc_gp_output_ts),
5470 rack_log_gpset(rack, tp->gput_ack, 0, 0, __LINE__, 1, NULL);
5492 tp->t_ccv.nsegs = nsegs;
5493 acked = tp->t_ccv.bytes_this_ack = (th_ack - tp->snd_una);
5494 if ((post_recovery) && (rack->r_ctl.rc_early_recovery_segs)) {
5497 max = rack->r_ctl.rc_early_recovery_segs * ctf_fixed_maxseg(tp);
5498 if (tp->t_ccv.bytes_this_ack > max) {
5499 tp->t_ccv.bytes_this_ack = max;
5503 stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF,
5504 ((int32_t)rack->r_ctl.cwnd_to_use) - tp->snd_wnd);
5506 if ((th_ack == tp->snd_max) && rack->lt_bw_up) {
5515 rack->r_ctl.lt_bw_bytes += (tp->snd_max - rack->r_ctl.lt_seq);
5516 rack->r_ctl.lt_seq = tp->snd_max;
5517 tmark = tcp_tv_to_lusectick(&rack->r_ctl.act_rcv_time);
5518 if (tmark >= rack->r_ctl.lt_timemark) {
5519 rack->r_ctl.lt_bw_time += (tmark - rack->r_ctl.lt_timemark);
5521 rack->r_ctl.lt_timemark = tmark;
5522 rack->lt_bw_up = 0;
5525 if ((tp->t_flags & TF_GPUTINPROG) &&
5531 if (tp->snd_cwnd <= tp->snd_wnd)
5532 tp->t_ccv.flags |= CCF_CWND_LIMITED;
5534 tp->t_ccv.flags &= ~CCF_CWND_LIMITED;
5535 if (tp->snd_cwnd > tp->snd_ssthresh) {
5536 tp->t_bytes_acked += min(tp->t_ccv.bytes_this_ack,
5539 if (tp->t_bytes_acked >= rack->r_ctl.cwnd_to_use) {
5540 tp->t_bytes_acked -= rack->r_ctl.cwnd_to_use;
5541 tp->t_ccv.flags |= CCF_ABC_SENTAWND;
5544 tp->t_ccv.flags &= ~CCF_ABC_SENTAWND;
5545 tp->t_bytes_acked = 0;
5547 prior_cwnd = tp->snd_cwnd;
5548 if ((post_recovery == 0) || (rack_max_abc_post_recovery == 0) || rack->r_use_labc_for_rec ||
5549 (rack_client_low_buf && rack->client_bufferlvl &&
5550 (rack->client_bufferlvl < rack_client_low_buf)))
5551 labc_to_use = rack->rc_labc;
5554 if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
5561 log.u_bbr.flex2 = tp->t_ccv.flags;
5562 log.u_bbr.flex3 = tp->t_ccv.bytes_this_ack;
5563 log.u_bbr.flex4 = tp->t_ccv.nsegs;
5571 if (CC_ALGO(tp)->ack_received != NULL) {
5573 tp->t_ccv.curack = th_ack;
5574 tp->t_ccv.labc = labc_to_use;
5575 tp->t_ccv.flags |= CCF_USE_LOCAL_ABC;
5576 CC_ALGO(tp)->ack_received(&tp->t_ccv, type);
5579 lgb->tlb_stackinfo.u_bbr.flex6 = tp->snd_cwnd;
5581 if (rack->r_must_retran) {
5582 if (SEQ_GEQ(th_ack, rack->r_ctl.rc_snd_max_at_rto)) {
5587 rack->r_ctl.rc_out_at_rto = 0;
5588 rack->r_must_retran = 0;
5589 } else if ((prior_cwnd + ctf_fixed_maxseg(tp)) <= tp->snd_cwnd) {
5596 if (acked <= rack->r_ctl.rc_out_at_rto){
5597 rack->r_ctl.rc_out_at_rto -= acked;
5599 rack->r_ctl.rc_out_at_rto = 0;
5604 stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, rack->r_ctl.cwnd_to_use);
5606 if (rack->r_ctl.rc_rack_largest_cwnd < rack->r_ctl.cwnd_to_use) {
5607 rack->r_ctl.rc_rack_largest_cwnd = rack->r_ctl.cwnd_to_use;
5609 if ((rack->rc_initial_ss_comp == 0) &&
5610 (tp->snd_cwnd >= tp->snd_ssthresh)) {
5615 rack->rc_initial_ss_comp = 1;
5624 rack = (struct tcp_rack *)tp->t_fb_ptr;
5633 if ((rack->r_ctl.rc_prr_sndcnt > 0) ||
5634 rack->rack_no_prr)
5635 rack->r_wanted_output = 1;
5644 EXIT_RECOVERY(tp->t_flags);
5653 orig_cwnd = tp->snd_cwnd;
5655 rack = (struct tcp_rack *)tp->t_fb_ptr;
5657 if (CC_ALGO(tp)->post_recovery != NULL) {
5658 tp->t_ccv.curack = th_ack;
5659 CC_ALGO(tp)->post_recovery(&tp->t_ccv);
5660 if (tp->snd_cwnd < tp->snd_ssthresh) {
5664 * snd_ssthresh per RFC-6582 (option 2).
5666 tp->snd_cwnd = tp->snd_ssthresh;
5669 if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
5676 log.u_bbr.flex2 = tp->t_ccv.flags;
5677 log.u_bbr.flex3 = tp->t_ccv.bytes_this_ack;
5678 log.u_bbr.flex4 = tp->t_ccv.nsegs;
5682 log.u_bbr.pkts_out = rack->r_ctl.rc_prr_sndcnt;
5687 if ((rack->rack_no_prr == 0) &&
5688 (rack->no_prr_addback == 0) &&
5689 (rack->r_ctl.rc_prr_sndcnt > 0)) {
5694 if (ctf_outstanding(tp) <= sbavail(&tptosocket(tp)->so_snd)) {
5704 tp->snd_cwnd += min((ctf_fixed_maxseg(tp) * rack_prr_addbackmax),
5705 rack->r_ctl.rc_prr_sndcnt);
5707 rack->r_ctl.rc_prr_sndcnt = 0;
5711 tp->snd_recover = tp->snd_una;
5712 if (rack->r_ctl.dsack_persist) {
5713 rack->r_ctl.dsack_persist--;
5714 if (rack->r_ctl.num_dsack && (rack->r_ctl.dsack_persist == 0)) {
5715 rack->r_ctl.num_dsack = 0;
5719 if (rack->rto_from_rec == 1) {
5720 rack->rto_from_rec = 0;
5721 if (rack->r_ctl.rto_ssthresh > tp->snd_ssthresh)
5722 tp->snd_ssthresh = rack->r_ctl.rto_ssthresh;
5735 stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_CSIG, type);
5737 if (IN_RECOVERY(tp->t_flags) == 0) {
5739 ssthresh_enter = tp->snd_ssthresh;
5740 cwnd_enter = tp->snd_cwnd;
5743 rack = (struct tcp_rack *)tp->t_fb_ptr;
5746 tp->t_flags &= ~TF_WASFRECOVERY;
5747 tp->t_flags &= ~TF_WASCRECOVERY;
5748 if (!IN_FASTRECOVERY(tp->t_flags)) {
5749 /* Check if this is the end of the initial Start-up i.e. initial slow-start */
5750 if (rack->rc_initial_ss_comp == 0) {
5752 rack->rc_initial_ss_comp = 1;
5754 rack->r_ctl.rc_prr_delivered = 0;
5755 rack->r_ctl.rc_prr_out = 0;
5756 rack->r_fast_output = 0;
5757 if (rack->rack_no_prr == 0) {
5758 rack->r_ctl.rc_prr_sndcnt = ctf_fixed_maxseg(tp);
5761 rack->r_ctl.rc_prr_recovery_fs = tp->snd_max - tp->snd_una;
5762 tp->snd_recover = tp->snd_max;
5763 if (tp->t_flags2 & TF2_ECN_PERMIT)
5764 tp->t_flags2 |= TF2_ECN_SND_CWR;
5768 if (!IN_CONGRECOVERY(tp->t_flags) ||
5773 SEQ_GEQ(ack, tp->snd_recover)) {
5774 EXIT_CONGRECOVERY(tp->t_flags);
5776 rack->r_fast_output = 0;
5777 tp->snd_recover = tp->snd_max + 1;
5778 if (tp->t_flags2 & TF2_ECN_PERMIT)
5779 tp->t_flags2 |= TF2_ECN_SND_CWR;
5783 tp->t_dupacks = 0;
5784 tp->t_bytes_acked = 0;
5785 rack->r_fast_output = 0;
5786 if (IN_RECOVERY(tp->t_flags))
5788 orig_cwnd = tp->snd_cwnd;
5790 if (CC_ALGO(tp)->cong_signal == NULL) {
5792 tp->snd_ssthresh = max(2,
5793 min(tp->snd_wnd, rack->r_ctl.cwnd_to_use) / 2 /
5795 tp->snd_cwnd = ctf_fixed_maxseg(tp);
5797 if (tp->t_flags2 & TF2_ECN_PERMIT)
5798 tp->t_flags2 |= TF2_ECN_SND_CWR;
5803 tp->snd_cwnd = tp->snd_cwnd_prev;
5804 tp->snd_ssthresh = tp->snd_ssthresh_prev;
5805 tp->snd_recover = tp->snd_recover_prev;
5806 if (tp->t_flags & TF_WASFRECOVERY) {
5807 ENTER_FASTRECOVERY(tp->t_flags);
5808 tp->t_flags &= ~TF_WASFRECOVERY;
5810 if (tp->t_flags & TF_WASCRECOVERY) {
5811 ENTER_CONGRECOVERY(tp->t_flags);
5812 tp->t_flags &= ~TF_WASCRECOVERY;
5814 tp->snd_nxt = tp->snd_max;
5815 tp->t_badrxtwin = 0;
5818 if ((CC_ALGO(tp)->cong_signal != NULL) &&
5820 tp->t_ccv.curack = ack;
5821 CC_ALGO(tp)->cong_signal(&tp->t_ccv, type);
5823 if ((in_rec_at_entry == 0) && IN_RECOVERY(tp->t_flags)) {
5825 rack->r_ctl.dsack_byte_cnt = 0;
5826 rack->r_ctl.retran_during_recovery = 0;
5827 rack->r_ctl.rc_cwnd_at_erec = cwnd_enter;
5828 rack->r_ctl.rc_ssthresh_at_erec = ssthresh_enter;
5829 rack->r_ent_rec_ns = 1;
5840 if (CC_ALGO(tp)->after_idle != NULL)
5841 CC_ALGO(tp)->after_idle(&tp->t_ccv);
5843 if (tp->snd_cwnd == 1)
5844 i_cwnd = tp->t_maxseg; /* SYN(-ACK) lost */
5853 if (tp->snd_cwnd < i_cwnd) {
5854 tp->snd_cwnd = i_cwnd;
5861 * - There is no delayed ack timer in progress.
5862 * - Our last ack wasn't a 0-sized window. We never want to delay
5863 * the ack that opens up a 0-sized window.
5864 * - LRO wasn't used for this segment. We make sure by checking that the
5866 * - Delayed acks are enabled or this is a half-synchronized T/TCP
5870 (((tp->t_flags & TF_RXWIN0SENT) == 0) && \
5871 ((tp->t_flags & TF_DELACK) == 0) && \
5872 (tlen <= tp->t_maxseg) && \
5873 (tp->t_delayed_ack || (tp->t_flags & TF_NEEDSYN)))
5881 * Walk the time-order transmitted list looking for an rsm that is
5885 TAILQ_FOREACH(rsm, &rack->r_ctl.rc_tmap, r_tnext) {
5886 if (rsm->r_flags & RACK_ACKED) {
5907 TQHASH_FOREACH_REVERSE_FROM(prsm, rack->r_ctl.tqh) {
5908 if (prsm->r_flags & (RACK_ACKED | RACK_HAS_FIN)) {
5927 * If reorder-fade is configured, then we track the last time we saw
5928 * re-ordering occur. If we reach the point where enough time as
5931 * Or if reorder-face is 0, then once we see reordering we consider
5935 * In the end if lro is non-zero we add the extra time for
5940 if (rack->r_ctl.rc_reorder_ts) {
5941 if (rack->r_ctl.rc_reorder_fade) {
5942 if (SEQ_GEQ(cts, rack->r_ctl.rc_reorder_ts)) {
5943 lro = cts - rack->r_ctl.rc_reorder_ts;
5955 if (lro > rack->r_ctl.rc_reorder_fade) {
5957 rack->r_ctl.rc_reorder_ts = 0;
5967 if (rack->rc_rack_tmr_std_based == 0) {
5968 thresh = srtt + rack->r_ctl.rc_pkt_delay;
5970 /* Standards based pkt-delay is 1/4 srtt */
5973 if (lro && (rack->rc_rack_tmr_std_based == 0)) {
5975 if (rack->r_ctl.rc_reorder_shift)
5976 thresh += (srtt >> rack->r_ctl.rc_reorder_shift);
5980 if (rack->rc_rack_use_dsack &&
5982 (rack->r_ctl.num_dsack > 0)) {
5987 thresh += rack->r_ctl.num_dsack * (srtt >> 2);
6014 if (rack->r_ctl.rc_tlp_threshold)
6015 thresh = srtt + (srtt / rack->r_ctl.rc_tlp_threshold);
6020 segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
6021 len = rsm->r_end - rsm->r_start;
6022 if (rack->rack_tlp_threshold_use == TLP_USE_ID) {
6024 if (((tp->snd_max - tp->snd_una) - rack->r_ctl.rc_sacked + rack->r_ctl.rc_holes_rxt) <= segsiz) {
6027 * Compensate for delayed-ack with the d-ack time.
6033 } else if (rack->rack_tlp_threshold_use == TLP_USE_TWO_ONE) {
6039 * possible inter-packet delay (if any).
6044 idx = rsm->r_rtr_cnt - 1;
6045 nidx = prsm->r_rtr_cnt - 1;
6046 if (rsm->r_tim_lastsent[nidx] >= prsm->r_tim_lastsent[idx]) {
6048 inter_gap = rsm->r_tim_lastsent[idx] - prsm->r_tim_lastsent[nidx];
6053 * Possibly compensate for delayed-ack.
6061 } else if (rack->rack_tlp_threshold_use == TLP_USE_TWO_TWO) {
6066 * Compensate for delayed-ack with the d-ack time.
6074 if (thresh > tp->t_rxtcur) {
6075 thresh = tp->t_rxtcur;
6099 if (rack->rc_rack_rtt)
6100 return (rack->rc_rack_rtt);
6101 else if (tp->t_srtt == 0)
6103 return (tp->t_srtt);
6119 rack = (struct tcp_rack *)tp->t_fb_ptr;
6120 if (tqhash_empty(rack->r_ctl.tqh)) {
6123 rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
6128 if (rsm->r_flags & RACK_ACKED) {
6133 idx = rsm->r_rtr_cnt - 1;
6136 if (TSTMP_LT(tsused, ((uint32_t)rsm->r_tim_lastsent[idx]))) {
6139 if ((tsused - ((uint32_t)rsm->r_tim_lastsent[idx])) < thresh) {
6142 /* Ok if we reach here we are over-due and this guy can be sent */
6143 rack_cong_signal(tp, CC_NDUPACK, tp->snd_una, __LINE__);
6154 t = (tp->t_srtt + (tp->t_rttvar << 2));
6155 RACK_TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
6156 rack_persist_min, rack_persist_max, rack->r_ctl.timer_slop);
6157 rack->r_ctl.rc_hpts_flags |= PACE_TMR_PERSIT;
6177 if (rack->t_timers_stopped) {
6181 if (rack->rc_in_persist) {
6185 rack->rc_on_min_to = 0;
6186 if ((tp->t_state < TCPS_ESTABLISHED) ||
6187 ((tp->t_flags & TF_SACK_PERMIT) == 0)) {
6190 rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
6195 rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
6202 * recently thats the discount we want to use (now - timer time).
6204 * we want to use that (now - oldest-packet-last_transmit_time).
6207 idx = rsm->r_rtr_cnt - 1;
6208 if (TSTMP_GEQ(rack->r_ctl.rc_tlp_rxt_last_time, ((uint32_t)rsm->r_tim_lastsent[idx])))
6209 tstmp_touse = (uint32_t)rack->r_ctl.rc_tlp_rxt_last_time;
6211 tstmp_touse = (uint32_t)rsm->r_tim_lastsent[idx];
6213 time_since_sent = cts - tstmp_touse;
6215 if (SEQ_LT(tp->snd_una, tp->snd_max) ||
6216 sbavail(&tptosocket(tp)->so_snd)) {
6217 rack->r_ctl.rc_hpts_flags |= PACE_TMR_RXT;
6218 to = tp->t_rxtcur;
6220 to -= time_since_sent;
6222 to = rack->r_ctl.rc_min_to;
6226 if ((TCPS_HAVEESTABLISHED(tp->t_state) == 0) &&
6231 * of the keep-init timeout.
6236 if (TSTMP_GT(cts, (uint32_t)rsm->r_tim_lastsent[0])) {
6237 red = (cts - (uint32_t)rsm->r_tim_lastsent[0]);
6239 max_time -= red;
6251 if (rsm->r_flags & RACK_ACKED) {
6259 if ((rsm->r_flags & RACK_SACK_PASSED) ||
6260 (rsm->r_flags & RACK_RWND_COLLAPSED) ||
6261 (rsm->r_dupack >= DUP_ACK_THRESHOLD)) {
6262 if ((tp->t_flags & TF_SENTFIN) &&
6263 ((tp->snd_max - tp->snd_una) == 1) &&
6264 (rsm->r_flags & RACK_HAS_FIN)) {
6271 if ((rack->use_rack_rr == 0) &&
6272 (IN_FASTRECOVERY(tp->t_flags)) &&
6273 (rack->rack_no_prr == 0) &&
6274 (rack->r_ctl.rc_prr_sndcnt < ctf_fixed_maxseg(tp))) {
6281 * get to use the rack-cheat.
6287 idx = rsm->r_rtr_cnt - 1;
6288 exp = ((uint32_t)rsm->r_tim_lastsent[idx]) + thresh;
6290 to = exp - cts;
6291 if (to < rack->r_ctl.rc_min_to) {
6292 to = rack->r_ctl.rc_min_to;
6293 if (rack->r_rr_config == 3)
6294 rack->rc_on_min_to = 1;
6297 to = rack->r_ctl.rc_min_to;
6298 if (rack->r_rr_config == 3)
6299 rack->rc_on_min_to = 1;
6304 if ((rack->rc_tlp_in_progress != 0) &&
6305 (rack->r_ctl.rc_tlp_cnt_out >= rack_tlp_limit)) {
6312 rsm = TAILQ_LAST_FAST(&rack->r_ctl.rc_tmap, rack_sendmap, r_tnext);
6317 if (rsm->r_flags & RACK_HAS_FIN) {
6322 idx = rsm->r_rtr_cnt - 1;
6324 if (TSTMP_GEQ(((uint32_t)rsm->r_tim_lastsent[idx]), rack->r_ctl.rc_tlp_rxt_last_time))
6325 tstmp_touse = (uint32_t)rsm->r_tim_lastsent[idx];
6327 tstmp_touse = (uint32_t)rack->r_ctl.rc_tlp_rxt_last_time;
6329 time_since_sent = cts - tstmp_touse;
6331 if (tp->t_srtt) {
6332 if ((rack->rc_srtt_measure_made == 0) &&
6333 (tp->t_srtt == 1)) {
6340 srtt_cur = tp->t_srtt;
6351 tp->t_srtt &&
6357 to = thresh - time_since_sent;
6359 to = rack->r_ctl.rc_min_to;
6364 rack->r_ctl.rc_tlp_rxt_last_time, /* flex4 */
6365 (uint32_t)rsm->r_tim_lastsent[idx],
6381 rack->r_ctl.rc_hpts_flags |= PACE_TMR_RACK;
6383 rack->r_ctl.rc_hpts_flags |= PACE_TMR_TLP;
6393 if (rack->rc_in_persist == 0) {
6394 if (tp->t_flags & TF_GPUTINPROG) {
6399 rack_do_goodput_measurement(tp, rack, tp->snd_una, __LINE__,
6403 if (rack->r_ctl.rc_scw) {
6404 tcp_shared_cwnd_idle(rack->r_ctl.rc_scw, rack->r_ctl.rc_scw_index);
6405 rack->rack_scwnd_is_idle = 1;
6408 rack->r_ctl.rc_went_idle_time = cts;
6409 if (rack->r_ctl.rc_went_idle_time == 0)
6410 rack->r_ctl.rc_went_idle_time = 1;
6411 if (rack->lt_bw_up) {
6415 rack->r_ctl.lt_bw_bytes += (snd_una - rack->r_ctl.lt_seq);
6416 rack->r_ctl.lt_seq = snd_una;
6417 tmark = tcp_tv_to_lusectick(&rack->r_ctl.act_rcv_time);
6418 if (tmark >= rack->r_ctl.lt_timemark) {
6419 rack->r_ctl.lt_bw_time += (tmark - rack->r_ctl.lt_timemark);
6421 rack->r_ctl.lt_timemark = tmark;
6422 rack->lt_bw_up = 0;
6423 rack->r_persist_lt_bw_off = 1;
6426 rack->r_ctl.persist_lost_ends = 0;
6427 rack->probe_not_answered = 0;
6428 rack->forced_ack = 0;
6429 tp->t_rxtshift = 0;
6430 RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
6431 rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
6432 rack->rc_in_persist = 1;
6439 if (tcp_in_hpts(rack->rc_tp)) {
6440 tcp_hpts_remove(rack->rc_tp);
6441 rack->r_ctl.rc_hpts_flags = 0;
6444 if (rack->r_ctl.rc_scw) {
6445 tcp_shared_cwnd_active(rack->r_ctl.rc_scw, rack->r_ctl.rc_scw_index);
6446 rack->rack_scwnd_is_idle = 0;
6449 if (rack->rc_gp_dyn_mul &&
6450 (rack->use_fixed_rate == 0) &&
6451 (rack->rc_always_pace)) {
6453 * Do we count this as if a probe-rtt just
6458 time_idle = cts - rack->r_ctl.rc_went_idle_time;
6462 extra = (uint64_t)rack->r_ctl.rc_gp_srtt *
6468 /* Yes, we count it as a probe-rtt. */
6472 if (rack->in_probe_rtt == 0) {
6473 rack->r_ctl.rc_lower_rtt_us_cts = us_cts;
6474 rack->r_ctl.rc_time_probertt_entered = rack->r_ctl.rc_lower_rtt_us_cts;
6475 rack->r_ctl.rc_time_probertt_starts = rack->r_ctl.rc_lower_rtt_us_cts;
6476 rack->r_ctl.rc_time_of_last_probertt = rack->r_ctl.rc_lower_rtt_us_cts;
6482 if (rack->r_persist_lt_bw_off) {
6484 rack->r_ctl.lt_timemark = tcp_get_u64_usecs(NULL);
6485 rack->lt_bw_up = 1;
6486 rack->r_persist_lt_bw_off = 0;
6488 rack->rc_in_persist = 0;
6489 rack->r_ctl.rc_went_idle_time = 0;
6490 tp->t_rxtshift = 0;
6491 RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
6492 rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
6493 rack->r_ctl.rc_agg_delayed = 0;
6494 rack->r_early = 0;
6495 rack->r_late = 0;
6496 rack->r_ctl.rc_agg_early = 0;
6503 if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
6507 log.u_bbr.flex1 = diag->p_nxt_slot;
6508 log.u_bbr.flex2 = diag->p_cur_slot;
6509 log.u_bbr.flex3 = diag->slot_req;
6510 log.u_bbr.flex4 = diag->inp_hptsslot;
6511 log.u_bbr.flex5 = diag->slot_remaining;
6512 log.u_bbr.flex6 = diag->need_new_to;
6513 log.u_bbr.flex7 = diag->p_hpts_active;
6514 log.u_bbr.flex8 = diag->p_on_min_sleep;
6516 log.u_bbr.epoch = diag->have_slept;
6517 log.u_bbr.lt_epoch = diag->yet_to_sleep;
6518 log.u_bbr.pkts_out = diag->co_ret;
6519 log.u_bbr.applimited = diag->hpts_sleep_time;
6520 log.u_bbr.delivered = diag->p_prev_slot;
6521 log.u_bbr.inflight = diag->p_runningslot;
6522 log.u_bbr.bw_inuse = diag->wheel_slot;
6523 log.u_bbr.rttProp = diag->wheel_cts;
6525 log.u_bbr.delRate = diag->maxslots;
6526 log.u_bbr.cur_del_rate = diag->p_curtick;
6528 log.u_bbr.cur_del_rate |= diag->p_lasttick;
6529 TCP_LOG_EVENTP(rack->rc_tp, NULL,
6530 &rack->rc_inp->inp_socket->so_rcv,
6531 &rack->rc_inp->inp_socket->so_snd,
6541 if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
6546 log.u_bbr.flex1 = sb->sb_flags;
6548 log.u_bbr.flex3 = sb->sb_state;
6551 TCP_LOG_EVENTP(rack->rc_tp, NULL,
6552 &rack->rc_inp->inp_socket->so_rcv,
6553 &rack->rc_inp->inp_socket->so_snd,
6573 if ((tp->t_state == TCPS_CLOSED) ||
6574 (tp->t_state == TCPS_LISTEN)) {
6581 stopped = rack->rc_tmr_stopped;
6582 if (stopped && TSTMP_GT(rack->r_ctl.rc_timer_exp, cts)) {
6583 left = rack->r_ctl.rc_timer_exp - cts;
6585 rack->r_ctl.rc_timer_exp = 0;
6586 rack->r_ctl.rc_hpts_flags = 0;
6590 if (rack->r_early && (rack->rc_ack_can_sendout_data == 0)) {
6598 * by an ack aka the rc_agg_early (non-paced mode).
6600 slot += rack->r_ctl.rc_agg_early;
6601 rack->r_early = 0;
6602 rack->r_ctl.rc_agg_early = 0;
6604 if ((rack->r_late) &&
6605 ((rack->r_use_hpts_min == 0) || (rack->dgp_on == 0))) {
6612 if (rack->r_ctl.rc_agg_delayed >= slot) {
6620 /* We gain delay */
6621 rack->r_ctl.rc_agg_delayed += (HPTS_TICKS_PER_SLOT - slot);
6625 rack->r_ctl.rc_agg_delayed -= (slot - HPTS_TICKS_PER_SLOT);
6629 slot -= rack->r_ctl.rc_agg_delayed;
6630 rack->r_ctl.rc_agg_delayed = 0;
6633 rack->r_ctl.rc_agg_delayed = HPTS_TICKS_PER_SLOT - slot;
6636 if (rack->r_ctl.rc_agg_delayed == 0)
6637 rack->r_late = 0;
6639 } else if (rack->r_late) {
6643 max_red = (slot * rack->r_ctl.max_reduction) / 100;
6644 if (max_red >= rack->r_ctl.rc_agg_delayed) {
6645 slot -= rack->r_ctl.rc_agg_delayed;
6646 rack->r_ctl.rc_agg_delayed = 0;
6648 slot -= max_red;
6649 rack->r_ctl.rc_agg_delayed -= max_red;
6652 if ((rack->r_use_hpts_min == 1) &&
6654 (rack->dgp_on == 1)) {
6667 if (tp->t_flags & TF_DELACK) {
6669 rack->r_ctl.rc_hpts_flags |= PACE_TMR_DELACK;
6675 rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_DELACK;
6678 * wheel, we resort to a keep-alive timer if its configured.
6682 if ((V_tcp_always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
6683 (tp->t_state <= TCPS_CLOSING)) {
6686 * del-ack), we don't have segments being paced. So
6689 if (TCPS_HAVEESTABLISHED(tp->t_state)) {
6690 /* Get the established keep-alive time */
6694 * Get the initial setup keep-alive time,
6702 rack->r_ctl.rc_hpts_flags |= PACE_TMR_KEEP;
6703 if (rack->in_probe_rtt) {
6707 * exit probe-rtt and initiate a keep-alive ack.
6708 * This will get us out of probe-rtt and update
6709 * our min-rtt.
6716 (rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK)) {
6722 * keep-alive, delayed_ack we keep track of what was left
6730 * Hack alert for now we can't time-out over 2,147,483
6736 rack->r_ctl.rc_timer_exp = cts + hpts_timeout;
6739 if ((rack->gp_ready == 0) &&
6740 (rack->use_fixed_rate == 0) &&
6742 (rack->r_ctl.rc_hpts_flags & (PACE_TMR_TLP|PACE_TMR_RXT))) {
6762 * TF2_MBUF_QUEUE_READY - This flags says that I am busy
6767 * TF2_DONT_SACK_QUEUE - This flag is used in conjunction
6782 tp->t_flags2 &= ~(TF2_DONT_SACK_QUEUE|TF2_MBUF_QUEUE_READY);
6784 rack->r_ctl.rc_hpts_flags |= PACE_PKT_OUTPUT;
6785 rack->r_ctl.rc_last_output_to = us_cts + slot;
6794 tp->t_flags2 |= TF2_MBUF_QUEUE_READY;
6800 if ((rack->r_ctl.rc_hpts_flags & PACE_TMR_RACK) ||
6801 (IN_RECOVERY(tp->t_flags))) {
6802 if (rack->r_rr_config != 3)
6803 tp->t_flags2 |= TF2_DONT_SACK_QUEUE;
6804 else if (rack->rc_pace_dnd) {
6813 tp->t_flags2 |= TF2_DONT_SACK_QUEUE;
6816 if (rack->rc_ack_can_sendout_data) {
6820 * backout the changes (used for non-paced
6823 tp->t_flags2 &= ~(TF2_DONT_SACK_QUEUE |
6826 if ((rack->use_rack_rr) &&
6827 (rack->r_rr_config < 2) &&
6831 * t-o if the t-o does not cause a send.
6852 rack->r_ctl.rc_hpts_flags &= ~PACE_PKT_OUTPUT;
6860 if (SEQ_GT(tp->snd_max, tp->snd_una)) {
6861 panic("tp:%p rack:%p tlts:%d cts:%u slot:%u pto:%u -- no timer started?",
6866 rack->rc_tmr_stopped = 0;
6880 TAILQ_FOREACH_FROM(nrsm, &rack->r_ctl.rc_tmap, r_tnext) {
6881 if ((nrsm->r_flags & RACK_SACK_PASSED) == 0) {
6882 /* Got up to all that were marked sack-passed */
6885 if ((nrsm->r_flags & RACK_WAS_LOST) == 0) {
6886 exp = ((uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)]) + thresh;
6889 nrsm->r_flags |= RACK_WAS_LOST;
6890 rack->r_ctl.rc_considered_lost += nrsm->r_end - nrsm->r_start;
6912 * retransmissions, if so we will enter fast-recovery. The output
6919 if (rack->r_state && (rack->r_state != tp->t_state))
6921 rack->rc_on_min_to = 0;
6927 rack->r_ctl.rc_resend = rsm;
6928 rack->r_timer_override = 1;
6929 if (rack->use_rack_rr) {
6933 * over-ride pacing i.e. rrr takes precedence
6938 rack->r_ctl.rc_hpts_flags &= ~PACE_PKT_OUTPUT;
6941 rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_RACK;
6957 if ((M_TRAILINGROOM(rsm->m) != rsm->orig_t_space)) {
6964 KASSERT((rsm->orig_t_space > M_TRAILINGROOM(rsm->m)),
6965 ("mbuf:%p rsm:%p trailing_space:%jd ots:%u oml:%u mlen:%u\n",
6966 rsm->m,
6968 (intmax_t)M_TRAILINGROOM(rsm->m),
6969 rsm->orig_t_space,
6970 rsm->orig_m_len,
6971 rsm->m->m_len));
6972 rsm->orig_m_len += (rsm->orig_t_space - M_TRAILINGROOM(rsm->m));
6973 rsm->orig_t_space = M_TRAILINGROOM(rsm->m);
6975 if (rsm->m->m_len < rsm->orig_m_len) {
6980 KASSERT((rsm->soff >= (rsm->orig_m_len - rsm->m->m_len)),
6981 ("mbuf:%p len:%u rsm:%p oml:%u soff:%u\n",
6982 rsm->m, rsm->m->m_len,
6983 rsm, rsm->orig_m_len,
6984 rsm->soff));
6985 if (rsm->soff >= (rsm->orig_m_len - rsm->m->m_len))
6986 rsm->soff -= (rsm->orig_m_len - rsm->m->m_len);
6988 rsm->soff = 0;
6989 rsm->orig_m_len = rsm->m->m_len;
6991 } else if (rsm->m->m_len > rsm->orig_m_len) {
6992 panic("rsm:%p m:%p m_len grew outside of t_space compensation",
6993 rsm, rsm->m);
7004 if (src_rsm->m &&
7005 ((src_rsm->orig_m_len != src_rsm->m->m_len) ||
7006 (M_TRAILINGROOM(src_rsm->m) != src_rsm->orig_t_space))) {
7010 m = src_rsm->m;
7011 soff = src_rsm->soff + (src_rsm->r_end - src_rsm->r_start);
7012 while (soff >= m->m_len) {
7014 soff -= m->m_len;
7015 m = m->m_next;
7017 ("rsm:%p nrsm:%p hit at soff:%u null m",
7021 src_rsm->m = sbsndmbuf(&rack->rc_inp->inp_socket->so_snd,
7022 (src_rsm->r_start - rack->rc_tp->snd_una),
7023 &src_rsm->soff);
7024 src_rsm->orig_m_len = src_rsm->m->m_len;
7025 src_rsm->orig_t_space = M_TRAILINGROOM(src_rsm->m);
7026 rsm->m = sbsndmbuf(&rack->rc_inp->inp_socket->so_snd,
7027 (rsm->r_start - rack->rc_tp->snd_una),
7028 &rsm->soff);
7029 rsm->orig_m_len = rsm->m->m_len;
7030 rsm->orig_t_space = M_TRAILINGROOM(rsm->m);
7034 rsm->m = m;
7035 rsm->soff = soff;
7036 rsm->orig_m_len = m->m_len;
7037 rsm->orig_t_space = M_TRAILINGROOM(rsm->m);
7046 nrsm->r_start = start;
7047 nrsm->r_end = rsm->r_end;
7048 nrsm->r_rtr_cnt = rsm->r_rtr_cnt;
7049 nrsm->r_act_rxt_cnt = rsm->r_act_rxt_cnt;
7050 nrsm->r_flags = rsm->r_flags;
7051 nrsm->r_dupack = rsm->r_dupack;
7052 nrsm->r_no_rtt_allowed = rsm->r_no_rtt_allowed;
7053 nrsm->r_rtr_bytes = 0;
7054 nrsm->r_fas = rsm->r_fas;
7055 nrsm->r_bas = rsm->r_bas;
7056 tqhash_update_end(rack->r_ctl.tqh, rsm, nrsm->r_start);
7057 nrsm->r_just_ret = rsm->r_just_ret;
7058 for (idx = 0; idx < nrsm->r_rtr_cnt; idx++) {
7059 nrsm->r_tim_lastsent[idx] = rsm->r_tim_lastsent[idx];
7062 if (nrsm->r_flags & RACK_HAS_SYN)
7063 nrsm->r_flags &= ~RACK_HAS_SYN;
7065 if (rsm->r_flags & RACK_HAS_FIN)
7066 rsm->r_flags &= ~RACK_HAS_FIN;
7068 if (rsm->r_flags & RACK_HAD_PUSH)
7069 rsm->r_flags &= ~RACK_HAD_PUSH;
7071 nrsm->r_hw_tls = rsm->r_hw_tls;
7079 KASSERT(((rsm->m != NULL) ||
7080 (rsm->r_flags & (RACK_HAS_SYN|RACK_HAS_FIN))),
7081 ("rsm:%p nrsm:%p rack:%p -- rsm->m is NULL?", rsm, nrsm, rack));
7082 if (rsm->m)
7101 rack_log_map_chg(rack->rc_tp, rack, NULL,
7102 l_rsm, r_rsm, MAP_MERGE, r_rsm->r_end, __LINE__);
7103 tqhash_update_end(rack->r_ctl.tqh, l_rsm, r_rsm->r_end);
7104 if (l_rsm->r_dupack < r_rsm->r_dupack)
7105 l_rsm->r_dupack = r_rsm->r_dupack;
7106 if (r_rsm->r_rtr_bytes)
7107 l_rsm->r_rtr_bytes += r_rsm->r_rtr_bytes;
7108 if (r_rsm->r_in_tmap) {
7110 TAILQ_REMOVE(&rack->r_ctl.rc_tmap, r_rsm, r_tnext);
7111 r_rsm->r_in_tmap = 0;
7115 if (r_rsm->r_flags & RACK_HAS_FIN)
7116 l_rsm->r_flags |= RACK_HAS_FIN;
7117 if (r_rsm->r_flags & RACK_TLP)
7118 l_rsm->r_flags |= RACK_TLP;
7119 if (r_rsm->r_flags & RACK_RWND_COLLAPSED)
7120 l_rsm->r_flags |= RACK_RWND_COLLAPSED;
7121 if ((r_rsm->r_flags & RACK_APP_LIMITED) &&
7122 ((l_rsm->r_flags & RACK_APP_LIMITED) == 0)) {
7124 * If both are app-limited then let the
7128 l_rsm->r_flags |= RACK_APP_LIMITED;
7129 r_rsm->r_flags &= ~RACK_APP_LIMITED;
7130 if (r_rsm == rack->r_ctl.rc_first_appl)
7131 rack->r_ctl.rc_first_appl = l_rsm;
7133 tqhash_remove(rack->r_ctl.tqh, r_rsm, REMOVE_TYPE_MERGE);
7148 if(l_rsm->r_tim_lastsent[(l_rsm->r_rtr_cnt-1)] <
7149 r_rsm->r_tim_lastsent[(r_rsm->r_rtr_cnt-1)]) {
7150 l_rsm->r_tim_lastsent[(l_rsm->r_rtr_cnt-1)] = r_rsm->r_tim_lastsent[(r_rsm->r_rtr_cnt-1)];
7157 if(l_rsm->r_ack_arrival < r_rsm->r_ack_arrival)
7158 l_rsm->r_ack_arrival = r_rsm->r_ack_arrival;
7160 if ((r_rsm->r_limit_type == 0) && (l_rsm->r_limit_type != 0)) {
7162 r_rsm->r_limit_type = l_rsm->r_limit_type;
7163 l_rsm->r_limit_type = 0;
7166 l_rsm->r_flags |= RACK_MERGED;
7191 if (TSTMP_LT(cts, rack->r_ctl.rc_timer_exp)) {
7197 return (-ETIMEDOUT); /* tcp_drop() */
7204 rack->r_ctl.retran_during_recovery = 0;
7205 rack->r_might_revert = 0;
7206 rack->r_ctl.dsack_byte_cnt = 0;
7208 if (rack->r_state && (rack->r_state != tp->t_state))
7210 avail = sbavail(&so->so_snd);
7211 out = tp->snd_max - tp->snd_una;
7212 if ((out > tp->snd_wnd) || rack->rc_has_collapsed) {
7217 if (rack->r_ctl.dsack_persist && (rack->r_ctl.rc_tlp_cnt_out >= 1)) {
7218 rack->r_ctl.dsack_persist--;
7219 if (rack->r_ctl.num_dsack && (rack->r_ctl.dsack_persist == 0)) {
7220 rack->r_ctl.num_dsack = 0;
7224 if ((tp->t_flags & TF_GPUTINPROG) &&
7225 (rack->r_ctl.rc_tlp_cnt_out == 1)) {
7234 tp->t_flags &= ~TF_GPUTINPROG;
7235 rack_log_pacing_delay_calc(rack, (tp->gput_ack - tp->gput_seq) /*flex2*/,
7236 rack->r_ctl.rc_gp_srtt /*flex1*/,
7237 tp->gput_seq,
7244 if (rack_always_send_oldest && (TAILQ_EMPTY(&rack->r_ctl.rc_tmap) == 0))
7249 amm = avail - out;
7252 if ((amm + out) > tp->snd_wnd) {
7260 if (IN_FASTRECOVERY(tp->t_flags)) {
7262 if (rack->rack_no_prr == 0) {
7263 if (out + amm <= tp->snd_wnd) {
7264 rack->r_ctl.rc_prr_sndcnt = amm;
7265 rack->r_ctl.rc_tlp_new_data = amm;
7271 /* Set the send-new override */
7272 if (out + amm <= tp->snd_wnd)
7273 rack->r_ctl.rc_tlp_new_data = amm;
7277 rack->r_ctl.rc_tlpsend = NULL;
7283 * Ok we need to arrange the last un-acked segment to be re-sent, or
7284 * optionally the first un-acked segment.
7288 rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
7290 rsm = tqhash_max(rack->r_ctl.tqh);
7291 if (rsm && (rsm->r_flags & (RACK_ACKED | RACK_HAS_FIN))) {
7306 if (SEQ_GT((rack->r_ctl.last_collapse_point - 1), rack->rc_tp->snd_una))
7307 rsm = tqhash_find(rack->r_ctl.tqh, (rack->r_ctl.last_collapse_point - 1));
7309 rsm = tqhash_min(rack->r_ctl.tqh);
7316 if ((rsm->r_end - rsm->r_start) > ctf_fixed_maxseg(tp)) {
7331 (rsm->r_end - ctf_fixed_maxseg(tp)));
7334 (void)tqhash_insert(rack->r_ctl.tqh, nrsm);
7336 if ((insret = tqhash_insert(rack->r_ctl.tqh, nrsm)) != 0) {
7337 panic("Insert in tailq_hash of %p fails ret:%d rack:%p rsm:%p",
7341 if (rsm->r_in_tmap) {
7342 TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, rsm, nrsm, r_tnext);
7343 nrsm->r_in_tmap = 1;
7347 rack->r_ctl.rc_tlpsend = rsm;
7351 rack->r_timer_override = 1;
7352 rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_TLP;
7355 rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_TLP;
7372 tp->t_flags &= ~TF_DELACK;
7373 tp->t_flags |= TF_ACKNOW;
7375 rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_DELACK;
7384 t_template = tcpip_maketemplate(rack->rc_inp);
7386 if (rack->forced_ack == 0) {
7387 rack->forced_ack = 1;
7388 rack->r_ctl.forced_ack_ts = tcp_get_usecs(NULL);
7390 rack->probe_not_answered = 1;
7392 tcp_respond(rack->rc_tp, t_template->tt_ipgen,
7393 &t_template->tt_t, (struct mbuf *)NULL,
7394 rack->rc_tp->rcv_nxt, rack->rc_tp->snd_una - 1, 0);
7396 /* This does send an ack so kill any D-ack timer */
7397 if (rack->rc_tp->t_flags & TF_DELACK)
7398 rack->rc_tp->t_flags &= ~TF_DELACK;
7418 if (rack->rc_in_persist == 0)
7423 counter_u64_add(rack_persists_lost_ends, rack->r_ctl.persist_lost_ends);
7424 return (-ETIMEDOUT); /* tcp_drop() */
7437 if (tp->t_rxtshift >= V_tcp_retries &&
7438 (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
7439 TICKS_2_USEC(ticks - tp->t_rcvtime) >= RACK_REXMTVAL(tp) * tcp_totbackoff)) {
7442 counter_u64_add(rack_persists_lost_ends, rack->r_ctl.persist_lost_ends);
7443 retval = -ETIMEDOUT; /* tcp_drop() */
7446 if ((sbavail(&rack->rc_inp->inp_socket->so_snd) == 0) &&
7447 tp->snd_una == tp->snd_max)
7449 rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_PERSIT;
7454 if (tp->t_state > TCPS_CLOSE_WAIT &&
7455 (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
7458 counter_u64_add(rack_persists_lost_ends, rack->r_ctl.persist_lost_ends);
7459 retval = -ETIMEDOUT; /* tcp_drop() */
7464 if (rack->probe_not_answered) {
7466 rack->r_ctl.persist_lost_ends++;
7471 if (tp->t_rxtshift < V_tcp_retries)
7472 tp->t_rxtshift++;
7491 rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_KEEP;
7494 * Keep-alive timer went off; send something or drop connection if
7498 if (tp->t_state < TCPS_ESTABLISHED)
7500 if ((V_tcp_always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
7501 tp->t_state <= TCPS_CLOSING) {
7502 if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
7509 * number tp->snd_una-1 causes the transmitted zero-length
7522 return (-ETIMEDOUT); /* tcp_drop() */
7534 * un-acked.
7539 rack = (struct tcp_rack *)tp->t_fb_ptr;
7542 rack->r_timer_override = 1;
7543 rack->r_ctl.rc_snd_max_at_rto = tp->snd_max;
7544 rack->r_ctl.rc_last_timeout_snduna = tp->snd_una;
7545 rack->r_late = 0;
7546 rack->r_early = 0;
7547 rack->r_ctl.rc_agg_delayed = 0;
7548 rack->r_ctl.rc_agg_early = 0;
7549 if (rack->r_state && (rack->r_state != tp->t_state))
7551 if (tp->t_rxtshift <= rack_rxt_scoreboard_clear_thresh) {
7554 * more than rack_rxt_scoreboard_clear_thresh time-outs.
7556 rack->r_ctl.rc_resend = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
7557 if (rack->r_ctl.rc_resend != NULL)
7558 rack->r_ctl.rc_resend->r_flags |= RACK_TO_REXT;
7564 * mark SACK-PASS on anything not acked here.
7573 * sacks that come floating in will "re-ack" the data.
7578 TAILQ_INIT(&rack->r_ctl.rc_tmap);
7580 TQHASH_FOREACH(rsm, rack->r_ctl.tqh) {
7581 rsm->r_dupack = 0;
7584 /* We must re-add it back to the tlist */
7586 TAILQ_INSERT_HEAD(&rack->r_ctl.rc_tmap, rsm, r_tnext);
7588 TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, trsm, rsm, r_tnext);
7590 rsm->r_in_tmap = 1;
7592 if (rsm->r_flags & RACK_ACKED)
7593 rsm->r_flags |= RACK_WAS_ACKED;
7594 rsm->r_flags &= ~(RACK_ACKED | RACK_SACK_PASSED | RACK_WAS_SACKPASS | RACK_RWND_COLLAPSED | RACK_WAS_LOST);
7595 rsm->r_flags |= RACK_MUST_RXT;
7598 rack->r_ctl.rc_considered_lost = 0;
7599 /* Clear the count (we just un-acked them) */
7600 rack->r_ctl.rc_sacked = 0;
7601 rack->r_ctl.rc_sacklast = NULL;
7603 rack->r_ctl.rc_resend = tqhash_min(rack->r_ctl.tqh);
7604 if (rack->r_ctl.rc_resend != NULL)
7605 rack->r_ctl.rc_resend->r_flags |= RACK_TO_REXT;
7606 rack->r_ctl.rc_prr_sndcnt = 0;
7608 rack->r_ctl.rc_resend = tqhash_min(rack->r_ctl.tqh);
7609 if (rack->r_ctl.rc_resend != NULL)
7610 rack->r_ctl.rc_resend->r_flags |= RACK_TO_REXT;
7611 if (((tp->t_flags & TF_SACK_PERMIT) == 0) &&
7612 ((tp->t_flags & TF_SENTFIN) == 0)) {
7614 * For non-sack customers new data
7618 rack->r_must_retran = 1;
7619 rack->r_ctl.rc_out_at_rto = ctf_flight_size(rack->rc_tp,
7620 rack->r_ctl.rc_sacked);
7628 tp->t_rxtcur = RACK_REXMTVAL(tp);
7629 if (TCPS_HAVEESTABLISHED(tp->t_state)) {
7630 tp->t_rxtcur += TICKS_2_USEC(tcp_rexmit_slop);
7632 if (tp->t_rxtcur > rack_rto_max) {
7633 tp->t_rxtcur = rack_rto_max;
7643 rack = (struct tcp_rack *)tp->t_fb_ptr;
7644 srtt = tp->t_srtt;
7650 if ((srtt == 0) && (tp->t_srtt != 0))
7658 if (tp->snd_ssthresh < tp->snd_wnd) {
7659 tp->snd_ssthresh = tp->snd_wnd;
7665 if (rc_init_window(rack) < tp->snd_cwnd)
7666 tp->snd_cwnd = rc_init_window(rack);
7670 * Re-transmit timeout! If we drop the PCB we will return 1, otherwise
7681 if ((tp->t_flags & TF_GPUTINPROG) &&
7682 (tp->t_rxtshift)) {
7689 tp->t_flags &= ~TF_GPUTINPROG;
7690 rack_log_pacing_delay_calc(rack, (tp->gput_ack - tp->gput_seq) /*flex2*/,
7691 rack->r_ctl.rc_gp_srtt /*flex1*/,
7692 tp->gput_seq,
7698 return (-ETIMEDOUT); /* tcp_drop() */
7700 rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_RXT;
7701 rack->r_ctl.retran_during_recovery = 0;
7702 rack->rc_ack_required = 1;
7703 rack->r_ctl.dsack_byte_cnt = 0;
7704 if (IN_RECOVERY(tp->t_flags) &&
7705 (rack->rto_from_rec == 0)) {
7712 rack->rto_from_rec = 1;
7713 rack->r_ctl.rto_ssthresh = tp->snd_ssthresh;
7715 if (IN_FASTRECOVERY(tp->t_flags))
7716 tp->t_flags |= TF_WASFRECOVERY;
7718 tp->t_flags &= ~TF_WASFRECOVERY;
7719 if (IN_CONGRECOVERY(tp->t_flags))
7720 tp->t_flags |= TF_WASCRECOVERY;
7722 tp->t_flags &= ~TF_WASCRECOVERY;
7723 if (TCPS_HAVEESTABLISHED(tp->t_state) &&
7724 (tp->snd_una == tp->snd_max)) {
7728 if (rack->r_ctl.dsack_persist) {
7729 rack->r_ctl.dsack_persist--;
7730 if (rack->r_ctl.num_dsack && (rack->r_ctl.dsack_persist == 0)) {
7731 rack->r_ctl.num_dsack = 0;
7743 if ((TCPS_HAVEESTABLISHED(tp->t_state) == 0) &&
7747 rsm = tqhash_min(rack->r_ctl.tqh);
7750 if ((TSTMP_GT(cts, (uint32_t)rsm->r_tim_lastsent[0])) &&
7751 ((cts - (uint32_t)rsm->r_tim_lastsent[0]) >= TICKS_2_USEC(TP_KEEPINIT(tp)))) {
7763 if ((rack->r_ctl.rc_resend == NULL) ||
7764 ((rack->r_ctl.rc_resend->r_flags & RACK_RWND_COLLAPSED) == 0)) {
7771 tp->t_rxtshift++;
7774 if (tp->t_rxtshift > V_tcp_retries) {
7777 tp->t_rxtshift = V_tcp_retries;
7780 MPASS(tp->t_softerror >= 0);
7781 retval = tp->t_softerror ? -tp->t_softerror : -ETIMEDOUT;
7784 if (tp->t_state == TCPS_SYN_SENT) {
7789 tp->snd_cwnd = 1;
7790 } else if (tp->t_rxtshift == 1) {
7797 * End-to-End Network Path Properties" by Allman and Paxson
7800 tp->snd_cwnd_prev = tp->snd_cwnd;
7801 tp->snd_ssthresh_prev = tp->snd_ssthresh;
7802 tp->snd_recover_prev = tp->snd_recover;
7803 tp->t_badrxtwin = ticks + (USEC_2_TICKS(tp->t_srtt)/2);
7804 tp->t_flags |= TF_PREVVALID;
7805 } else if ((tp->t_flags & TF_RCVD_TSTMP) == 0)
7806 tp->t_flags &= ~TF_PREVVALID;
7808 if ((tp->t_state == TCPS_SYN_SENT) ||
7809 (tp->t_state == TCPS_SYN_RECEIVED))
7810 rexmt = RACK_INITIAL_RTO * tcp_backoff[tp->t_rxtshift];
7812 rexmt = max(rack_rto_min, (tp->t_srtt + (tp->t_rttvar << 2))) * tcp_backoff[tp->t_rxtshift];
7814 RACK_TCPT_RANGESET(tp->t_rxtcur, rexmt,
7815 max(rack_rto_min, rexmt), rack_rto_max, rack->r_ctl.timer_slop);
7824 isipv6 = (inp->inp_vflag & INP_IPV6) ? true : false;
7831 ((tp->t_state == TCPS_ESTABLISHED) ||
7832 (tp->t_state == TCPS_FIN_WAIT_1))) {
7835 * 1448 -> 1188 -> 524) should be given 2 chances to recover
7836 * before further clamping down. 'tp->t_rxtshift % 2 == 0'
7839 if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD | TF2_PLPMTU_MAXSEGSNT)) ==
7841 (tp->t_rxtshift >= 2 && tp->t_rxtshift < 6 &&
7842 tp->t_rxtshift % 2 == 0)) {
7844 * Enter Path MTU Black-hole Detection mechanism: -
7845 * Disable Path MTU Discovery (IP "DF" bit). -
7849 if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) == 0) {
7851 tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
7853 tp->t_pmtud_saved_maxseg = tp->t_maxseg;
7862 tp->t_maxseg > V_tcp_v6pmtud_blackhole_mss) {
7864 tp->t_maxseg = V_tcp_v6pmtud_blackhole_mss;
7868 tp->t_maxseg = V_tcp_v6mssdflt;
7873 tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
7881 if (tp->t_maxseg > V_tcp_pmtud_blackhole_mss) {
7883 tp->t_maxseg = V_tcp_pmtud_blackhole_mss;
7887 tp->t_maxseg = V_tcp_mssdflt;
7892 tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
7905 if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
7906 (tp->t_rxtshift >= 6)) {
7907 tp->t_flags2 |= TF2_PLPMTU_PMTUD;
7908 tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
7909 tp->t_maxseg = tp->t_pmtud_saved_maxseg;
7910 if (tp->t_maxseg < V_tcp_mssdflt) {
7916 tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
7918 tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
7926 * our third SYN to work-around some broken terminal servers
7929 * unknown-to-them TCP options.
7931 if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
7932 (tp->t_rxtshift == 3))
7933 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
7940 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
7942 if ((inp->inp_vflag & INP_IPV6) != 0)
7947 tp->t_rttvar += tp->t_srtt;
7948 tp->t_srtt = 0;
7950 sack_filter_clear(&rack->r_ctl.rack_sf, tp->snd_una);
7951 tp->snd_recover = tp->snd_max;
7952 tp->t_flags |= TF_ACKNOW;
7953 tp->t_rtttime = 0;
7954 rack_cong_signal(tp, CC_RTO, tp->snd_una, __LINE__);
7963 int32_t timers = (rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK);
7965 if ((tp->t_state >= TCPS_FIN_WAIT_1) &&
7966 (tp->t_flags & TF_GPUTINPROG)) {
7975 bytes = tp->gput_ack - tp->gput_seq;
7976 if (SEQ_GT(tp->gput_seq, tp->snd_una))
7977 bytes += tp->gput_seq - tp->snd_una;
7978 if (bytes > sbavail(&tptosocket(tp)->so_snd)) {
7984 rack_log_pacing_delay_calc(rack, (tp->gput_ack - tp->gput_seq) /*flex2*/,
7985 rack->r_ctl.rc_gp_srtt /*flex1*/,
7986 tp->gput_seq,
7988 tp->t_flags &= ~TF_GPUTINPROG;
7994 if (tp->t_state == TCPS_LISTEN) {
7996 if (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT)
8001 rack->rc_on_min_to) {
8004 * are on a min-timeout (which means rrr_conf = 3)
8009 * If its on a normal rack timer (non-min) then
8014 if (TSTMP_LT(cts, rack->r_ctl.rc_timer_exp)) {
8017 if (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) {
8018 ret = -1;
8029 ret = -2;
8036 * no-sack wakeup on since we no longer have a PKT_OUTPUT
8039 rack->rc_tp->t_flags2 &= ~TF2_DONT_SACK_QUEUE;
8040 ret = -3;
8041 left = rack->r_ctl.rc_timer_exp - cts;
8047 rack->rc_tmr_stopped = 0;
8048 rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_MASK;
8052 rack->r_ctl.rc_tlp_rxt_last_time = cts;
8053 rack->r_fast_output = 0;
8056 rack->r_ctl.rc_tlp_rxt_last_time = cts;
8059 rack->r_ctl.rc_tlp_rxt_last_time = cts;
8060 rack->r_fast_output = 0;
8078 flags_on_entry = rack->r_ctl.rc_hpts_flags;
8080 if ((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) &&
8081 ((TSTMP_GEQ(us_cts, rack->r_ctl.rc_last_output_to)) ||
8082 ((tp->snd_max - tp->snd_una) == 0))) {
8083 tcp_hpts_remove(rack->rc_tp);
8086 if ((tp->snd_max - tp->snd_una) == 0)
8087 rack->r_ctl.rc_hpts_flags &= ~PACE_PKT_OUTPUT;
8090 if (rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) {
8091 rack->rc_tmr_stopped = rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK;
8092 if (tcp_in_hpts(rack->rc_tp) &&
8093 ((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) == 0)) {
8099 tcp_hpts_remove(rack->rc_tp);
8102 rack->r_ctl.rc_hpts_flags &= ~(PACE_TMR_MASK);
8113 rack = (struct tcp_rack *)tp->t_fb_ptr;
8114 rack->t_timers_stopped = 1;
8129 rack->rc_in_persist = 1;
8131 if (tcp_in_hpts(rack->rc_tp)) {
8132 tcp_hpts_remove(rack->rc_tp);
8142 rsm->r_rtr_cnt++;
8143 if (rsm->r_rtr_cnt > RACK_NUM_OF_RETRANS) {
8144 rsm->r_rtr_cnt = RACK_NUM_OF_RETRANS;
8145 rsm->r_flags |= RACK_OVERMAX;
8147 rsm->r_act_rxt_cnt++;
8150 rsm->r_dupack = 0;
8151 if ((rsm->r_rtr_cnt > 1) && ((rsm->r_flags & RACK_TLP) == 0)) {
8152 rack->r_ctl.rc_holes_rxt += (rsm->r_end - rsm->r_start);
8153 rsm->r_rtr_bytes += (rsm->r_end - rsm->r_start);
8155 if (rsm->r_flags & RACK_WAS_LOST) {
8161 rsm->r_flags &= ~RACK_WAS_LOST;
8162 KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)),
8163 ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack));
8164 if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start))
8165 rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start;
8167 rack->r_ctl.rc_considered_lost = 0;
8169 idx = rsm->r_rtr_cnt - 1;
8170 rsm->r_tim_lastsent[idx] = ts;
8173 * in snduna <->snd_max.
8175 rsm->r_fas = ctf_flight_size(rack->rc_tp,
8176 rack->r_ctl.rc_sacked);
8177 if (rsm->r_flags & RACK_ACKED) {
8179 rsm->r_flags &= ~RACK_ACKED;
8180 rack->r_ctl.rc_sacked -= (rsm->r_end - rsm->r_start);
8182 if (rsm->r_in_tmap) {
8183 TAILQ_REMOVE(&rack->r_ctl.rc_tmap, rsm, r_tnext);
8184 rsm->r_in_tmap = 0;
8188 TAILQ_INSERT_TAIL(&rack->r_ctl.rc_tmap, rsm, r_tnext);
8189 rsm->r_in_tmap = 1;
8190 rsm->r_bas = (uint8_t)(((rsm->r_end - rsm->r_start) + segsiz - 1) / segsiz);
8192 if (rsm->r_flags & RACK_MUST_RXT) {
8193 if (rack->r_must_retran)
8194 rack->r_ctl.rc_out_at_rto -= (rsm->r_end - rsm->r_start);
8195 if (SEQ_GEQ(rsm->r_end, rack->r_ctl.rc_snd_max_at_rto)) {
8200 rack->r_must_retran = 0;
8201 rack->r_ctl.rc_out_at_rto = 0;
8203 rsm->r_flags &= ~RACK_MUST_RXT;
8206 rsm->r_flags &= ~RACK_RWND_COLLAPSED;
8207 if (rsm->r_flags & RACK_SACK_PASSED) {
8209 rsm->r_flags &= ~RACK_SACK_PASSED;
8210 rsm->r_flags |= RACK_WAS_SACKPASS;
8219 * We (re-)transmitted starting at rsm->r_start for some length
8228 c_end = rsm->r_start + len;
8229 if (SEQ_GEQ(c_end, rsm->r_end)) {
8235 if (c_end == rsm->r_end) {
8242 act_len = rsm->r_end - rsm->r_start;
8243 *lenp = (len - act_len);
8244 return (rsm->r_end);
8268 nrsm->r_dupack = 0;
8271 (void)tqhash_insert(rack->r_ctl.tqh, nrsm);
8273 if ((insret = tqhash_insert(rack->r_ctl.tqh, nrsm)) != 0) {
8274 panic("Insert in tailq_hash of %p fails ret:%d rack:%p rsm:%p",
8278 if (rsm->r_in_tmap) {
8279 TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, rsm, nrsm, r_tnext);
8280 nrsm->r_in_tmap = 1;
8282 rsm->r_flags &= (~RACK_HAS_FIN);
8320 * -- i.e. return if err != 0 or should we pretend we sent it? --
8326 * We don't log errors -- we could but snd_max does not
8338 rack = (struct tcp_rack *)tp->t_fb_ptr;
8339 snd_una = tp->snd_una;
8340 snd_max = tp->snd_max;
8348 if ((th_flags & TH_SYN) && (seq_out == tp->iss))
8354 /* Are sending an old segment to induce an ack (keep-alive)? */
8364 len = end - seq_out;
8372 if (IN_FASTRECOVERY(tp->t_flags)) {
8373 rack->r_ctl.rc_prr_out += len;
8389 rsm->r_flags = RACK_HAS_FIN|add_flag;
8391 rsm->r_flags = add_flag;
8394 rsm->r_hw_tls = 1;
8395 rsm->r_tim_lastsent[0] = cts;
8396 rsm->r_rtr_cnt = 1;
8397 rsm->r_act_rxt_cnt = 0;
8398 rsm->r_rtr_bytes = 0;
8401 rsm->r_flags |= RACK_HAS_SYN;
8403 rsm->r_start = seq_out;
8404 rsm->r_end = rsm->r_start + len;
8406 rsm->r_dupack = 0;
8412 rsm->m = s_mb;
8413 rsm->soff = s_moff;
8416 * reflected in in snduna <->snd_max
8418 rsm->r_fas = (ctf_flight_size(rack->rc_tp,
8419 rack->r_ctl.rc_sacked) +
8420 (rsm->r_end - rsm->r_start));
8421 if ((rack->rc_initial_ss_comp == 0) &&
8422 (rack->r_ctl.ss_hi_fs < rsm->r_fas)) {
8423 rack->r_ctl.ss_hi_fs = rsm->r_fas;
8425 /* rsm->m will be NULL if RACK_HAS_SYN or RACK_HAS_FIN is set */
8426 if (rsm->m) {
8427 if (rsm->m->m_len <= rsm->soff) {
8433 * within rsm->m. But if the sbsndptr was
8439 lm = rsm->m;
8440 while (lm->m_len <= rsm->soff) {
8441 rsm->soff -= lm->m_len;
8442 lm = lm->m_next;
8443 KASSERT(lm != NULL, ("%s rack:%p lm goes null orig_off:%u origmb:%p rsm->soff:%u",
8444 __func__, rack, s_moff, s_mb, rsm->soff));
8446 rsm->m = lm;
8448 rsm->orig_m_len = rsm->m->m_len;
8449 rsm->orig_t_space = M_TRAILINGROOM(rsm->m);
8451 rsm->orig_m_len = 0;
8452 rsm->orig_t_space = 0;
8454 rsm->r_bas = (uint8_t)((len + segsiz - 1) / segsiz);
8459 (void)tqhash_insert(rack->r_ctl.tqh, rsm);
8461 if ((insret = tqhash_insert(rack->r_ctl.tqh, rsm)) != 0) {
8462 panic("Insert in tailq_hash of %p fails ret:%d rack:%p rsm:%p",
8466 TAILQ_INSERT_TAIL(&rack->r_ctl.rc_tmap, rsm, r_tnext);
8467 rsm->r_in_tmap = 1;
8468 if (rsm->r_flags & RACK_IS_PCM) {
8469 rack->r_ctl.pcm_i.send_time = cts;
8470 rack->r_ctl.pcm_i.eseq = rsm->r_end;
8472 if (rack->pcm_in_progress == 0)
8473 rack->r_ctl.pcm_i.sseq = rsm->r_start;
8481 if ((IN_FASTRECOVERY(tp->t_flags) == 0) &&
8482 (ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked) == ctf_fixed_maxseg(tp))) {
8485 prsm = tqhash_prev(rack->r_ctl.tqh, rsm);
8487 prsm->r_one_out_nr = 1;
8495 if (hintrsm && (hintrsm->r_start == seq_out)) {
8502 if ((rsm) && (rsm->r_start == seq_out)) {
8512 rsm = tqhash_find(rack->r_ctl.tqh, seq_out);
8514 if (rsm->r_start == seq_out) {
8522 if (SEQ_GEQ(seq_out, rsm->r_start) && SEQ_LT(seq_out, rsm->r_end)) {
8540 (void)tqhash_insert(rack->r_ctl.tqh, nrsm);
8542 if ((insret = tqhash_insert(rack->r_ctl.tqh, nrsm)) != 0) {
8543 panic("Insert in tailq_hash of %p fails ret:%d rack:%p rsm:%p",
8547 if (rsm->r_in_tmap) {
8548 TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, rsm, nrsm, r_tnext);
8549 nrsm->r_in_tmap = 1;
8551 rsm->r_flags &= (~RACK_HAS_FIN);
8563 if (seq_out == tp->snd_max) {
8565 } else if (SEQ_LT(seq_out, tp->snd_max)) {
8567 printf("seq_out:%u len:%d snd_una:%u snd_max:%u -- but rsm not found?\n",
8568 seq_out, len, tp->snd_una, tp->snd_max);
8570 TQHASH_FOREACH(rsm, rack->r_ctl.tqh) {
8571 printf("rsm:%p start:%u end:%u\n",
8572 rsm, rsm->r_start, rsm->r_end);
8575 panic("seq_out not found rack:%p tp:%p",
8581 * Hmm beyond sndmax? (only if we are using the new rtt-pack
8584 panic("seq_out:%u(%d) is beyond snd_max:%u tp:%p",
8585 seq_out, len, tp->snd_max, tp);
8599 if ((rack->r_ctl.rack_rs.rs_flags & RACK_RTT_EMPTY) ||
8600 (rack->r_ctl.rack_rs.rs_rtt_lowest > rtt)) {
8601 rack->r_ctl.rack_rs.rs_rtt_lowest = rtt;
8603 if ((rack->r_ctl.rack_rs.rs_flags & RACK_RTT_EMPTY) ||
8604 (rack->r_ctl.rack_rs.rs_rtt_highest < rtt)) {
8605 rack->r_ctl.rack_rs.rs_rtt_highest = rtt;
8607 if (rack->rc_tp->t_flags & TF_GPUTINPROG) {
8608 if (us_rtt < rack->r_ctl.rc_gp_lowrtt)
8609 rack->r_ctl.rc_gp_lowrtt = us_rtt;
8610 if (rack->rc_tp->snd_wnd > rack->r_ctl.rc_gp_high_rwnd)
8611 rack->r_ctl.rc_gp_high_rwnd = rack->rc_tp->snd_wnd;
8615 (rsm->r_just_ret) ||
8616 (rsm->r_one_out_nr &&
8617 len < (ctf_fixed_maxseg(rack->rc_tp) * 2)))) {
8624 * the r_one_out_nr. If it was a CUM-ACK and
8631 if ((rack->r_ctl.rack_rs.rs_flags & RACK_RTT_EMPTY) ||
8632 (rack->r_ctl.rack_rs.rs_us_rtt > us_rtt)) {
8633 if (rack->r_ctl.rack_rs.confidence == 0) {
8638 rack->r_ctl.rack_rs.rs_us_rtt = us_rtt;
8639 rack->r_ctl.rack_rs.confidence = confidence;
8640 rack->r_ctl.rack_rs.rs_us_rtrcnt = rtrcnt;
8649 rack->r_ctl.rack_rs.rs_us_rtt = us_rtt;
8650 rack->r_ctl.rack_rs.confidence = confidence;
8651 rack->r_ctl.rack_rs.rs_us_rtrcnt = rtrcnt;
8654 rack_log_rtt_upd(rack->rc_tp, rack, us_rtt, len, rsm, confidence);
8655 rack->r_ctl.rack_rs.rs_flags = RACK_RTT_VALID;
8656 rack->r_ctl.rack_rs.rs_rtt_tot += rtt;
8657 rack->r_ctl.rack_rs.rs_rtt_cnt++;
8661 * Collect new round-trip time estimate
8670 if (rack->r_ctl.rack_rs.rs_flags & RACK_RTT_EMPTY)
8673 if (rack->r_ctl.rc_rate_sample_method == USE_RTT_LOW) {
8675 rtt = rack->r_ctl.rack_rs.rs_rtt_lowest;
8676 } else if (rack->r_ctl.rc_rate_sample_method == USE_RTT_HIGH) {
8678 rtt = rack->r_ctl.rack_rs.rs_rtt_highest;
8679 } else if (rack->r_ctl.rc_rate_sample_method == USE_RTT_AVG) {
8681 rtt = (int32_t)(rack->r_ctl.rack_rs.rs_rtt_tot /
8682 (uint64_t)rack->r_ctl.rack_rs.rs_rtt_cnt);
8685 panic("Unknown rtt variant %d", rack->r_ctl.rc_rate_sample_method);
8691 if (rack->rc_gp_rtt_set == 0) {
8696 rack->r_ctl.rc_gp_srtt = rack->r_ctl.rack_rs.rs_us_rtt;
8697 rack->rc_gp_rtt_set = 1;
8698 } else if (rack->r_ctl.rack_rs.confidence) {
8700 rack->r_ctl.rc_gp_srtt -= (rack->r_ctl.rc_gp_srtt/8);
8701 rack->r_ctl.rc_gp_srtt += rack->r_ctl.rack_rs.rs_us_rtt / 8;
8703 if (rack->r_ctl.rack_rs.confidence) {
8708 if (rack->r_ctl.rc_highest_us_rtt < rack->r_ctl.rack_rs.rs_us_rtt) {
8709 rack->r_ctl.rc_highest_us_rtt = rack->r_ctl.rack_rs.rs_us_rtt;
8711 if (rack->rc_highly_buffered == 0) {
8717 if ((rack->r_ctl.rc_highest_us_rtt / rack->r_ctl.rc_lowest_us_rtt) > rack_hbp_thresh) {
8718 rack_log_rtt_shrinks(rack, rack->r_ctl.rack_rs.rs_us_rtt,
8719 rack->r_ctl.rc_highest_us_rtt,
8720 rack->r_ctl.rc_lowest_us_rtt,
8722 rack->rc_highly_buffered = 1;
8726 if ((rack->r_ctl.rack_rs.confidence) ||
8727 (rack->r_ctl.rack_rs.rs_us_rtrcnt == 1)) {
8732 rack->r_ctl.rc_last_us_rtt = rack->r_ctl.rack_rs.rs_us_rtt;
8734 if (rack->r_ctl.rc_lowest_us_rtt > rack->r_ctl.rack_rs.rs_us_rtt) {
8735 rack->r_ctl.rc_lowest_us_rtt = rack->r_ctl.rack_rs.rs_us_rtt;
8736 if (rack->r_ctl.rc_lowest_us_rtt == 0)
8737 rack->r_ctl.rc_lowest_us_rtt = 1;
8740 rack = (struct tcp_rack *)tp->t_fb_ptr;
8741 if (tp->t_srtt != 0) {
8750 delta = tp->t_srtt - rtt;
8752 tp->t_srtt -= (tp->t_srtt >> 3);
8754 tp->t_srtt += (rtt >> 3);
8755 if (tp->t_srtt <= 0)
8756 tp->t_srtt = 1;
8759 delta = -delta;
8761 tp->t_rttvar -= (tp->t_rttvar >> 3);
8763 tp->t_rttvar += (delta >> 3);
8764 if (tp->t_rttvar <= 0)
8765 tp->t_rttvar = 1;
8768 * No rtt measurement yet - use the unsmoothed rtt. Set the
8772 tp->t_srtt = rtt;
8773 tp->t_rttvar = rtt >> 1;
8775 rack->rc_srtt_measure_made = 1;
8777 if (tp->t_rttupdated < UCHAR_MAX)
8778 tp->t_rttupdated++;
8782 stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt));
8788 ms_rtt = (rtt + HPTS_USEC_IN_MSEC - 1) / HPTS_USEC_IN_MSEC;
8789 stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, ms_rtt));
8795 ms_rtt = (rack->r_ctl.rack_rs.rs_us_rtt + HPTS_USEC_IN_MSEC - 1) / HPTS_USEC_IN_MSEC;
8796 stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, ms_rtt));
8799 stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rack->r_ctl.rack_rs.rs_us_rtt));
8801 stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_PATHRTT, imax(0, rack->r_ctl.rack_rs.rs_us_rtt));
8803 rack->r_ctl.last_rcv_tstmp_for_rtt = tcp_tv_to_mssectick(&rack->r_ctl.act_rcv_time);
8808 * tick of rounding and 1 extra tick because of +-1/2 tick
8814 tp->t_rxtshift = 0;
8815 RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
8816 max(rack_rto_min, rtt + 2), rack_rto_max, rack->r_ctl.timer_slop);
8818 tp->t_softerror = 0;
8826 * Apply to filter the inbound us-rtt at us_cts.
8830 old_rtt = get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt);
8831 apply_filter_min_small(&rack->r_ctl.rc_gp_min_rtt,
8841 if ((old_rtt - us_rtt) > rack_min_rtt_movement) {
8843 rack->rc_gp_dyn_mul &&
8844 (rack->use_fixed_rate == 0) &&
8845 (rack->rc_always_pace)) {
8848 * to the time that we would have entered probe-rtt.
8850 * has entered probe-rtt. Lets go in now too.
8856 if ((rack->in_probe_rtt == 0) &&
8857 (rack->rc_skip_timely == 0) &&
8858 ((us_cts - rack->r_ctl.rc_lower_rtt_us_cts) >= (rack_time_between_probertt - val))) {
8862 rack->r_ctl.rc_lower_rtt_us_cts = us_cts;
8875 if ((rsm->r_flags & RACK_ACKED) ||
8876 (rsm->r_flags & RACK_WAS_ACKED))
8879 if (rsm->r_no_rtt_allowed) {
8884 if (SEQ_GT(th_ack, rsm->r_end)) {
8885 len_acked = rsm->r_end - rsm->r_start;
8888 len_acked = th_ack - rsm->r_start;
8892 len_acked = rsm->r_end - rsm->r_start;
8895 if (rsm->r_rtr_cnt == 1) {
8897 t = cts - (uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)];
8900 if (!tp->t_rttlow || tp->t_rttlow > t)
8901 tp->t_rttlow = t;
8902 if (!rack->r_ctl.rc_rack_min_rtt ||
8903 SEQ_LT(t, rack->r_ctl.rc_rack_min_rtt)) {
8904 rack->r_ctl.rc_rack_min_rtt = t;
8905 if (rack->r_ctl.rc_rack_min_rtt == 0) {
8906 rack->r_ctl.rc_rack_min_rtt = 1;
8909 if (TSTMP_GT(tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time), rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)]))
8910 us_rtt = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time) - (uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)];
8912 us_rtt = tcp_get_usecs(NULL) - (uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)];
8915 if (CC_ALGO(tp)->rttsample != NULL) {
8917 CC_ALGO(tp)->rttsample(&tp->t_ccv, us_rtt, 1, rsm->r_fas);
8919 rack_apply_updated_usrtt(rack, us_rtt, tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time));
8921 rack_log_rtt_sample_calc(rack, t, (uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)], cts, 1);
8922 tcp_rack_xmit_timer(rack, t + 1, len_acked, us_rtt, 2 , rsm, rsm->r_rtr_cnt);
8935 * When we are not app-limited then we see if
8952 if (rsm->r_flags & RACK_APP_LIMITED) {
8957 } else if (rack->app_limited_needs_set == 0) {
8962 rack_log_rtt_sample_calc(rack, t, (uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)], cts, 2);
8964 calc_conf, rsm, rsm->r_rtr_cnt);
8966 if ((rsm->r_flags & RACK_TLP) &&
8967 (!IN_FASTRECOVERY(tp->t_flags))) {
8969 if (rack->r_ctl.rc_tlp_cwnd_reduce) {
8973 if ((rack->r_ctl.rc_rack_tmit_time == 0) ||
8974 (SEQ_LT(rack->r_ctl.rc_rack_tmit_time,
8975 (uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)]))) {
8977 rack->r_ctl.rc_rack_tmit_time = (uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)];
8978 if (rack->r_ctl.rc_rack_tmit_time == 0)
8979 rack->r_ctl.rc_rack_tmit_time = 1;
8980 rack->rc_rack_rtt = t;
8989 tp->t_rxtshift = 0;
8990 RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
8991 rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
8992 tp->t_softerror = 0;
8993 if (to && (to->to_flags & TOF_TS) &&
8995 (to->to_tsecr) &&
8996 ((rsm->r_flags & RACK_OVERMAX) == 0)) {
9001 for (i = 0; i < rsm->r_rtr_cnt; i++) {
9002 if (rack_ts_to_msec(rsm->r_tim_lastsent[i]) == to->to_tsecr) {
9003 t = cts - (uint32_t)rsm->r_tim_lastsent[i];
9006 if (CC_ALGO(tp)->rttsample != NULL) {
9014 if (TSTMP_GT(tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time), rsm->r_tim_lastsent[i]))
9015 us_rtt = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time) - (uint32_t)rsm->r_tim_lastsent[i];
9017 us_rtt = tcp_get_usecs(NULL) - (uint32_t)rsm->r_tim_lastsent[i];
9018 CC_ALGO(tp)->rttsample(&tp->t_ccv, us_rtt, 1, rsm->r_fas);
9020 if ((i + 1) < rsm->r_rtr_cnt) {
9032 if (!tp->t_rttlow || tp->t_rttlow > t)
9033 tp->t_rttlow = t;
9034 if (!rack->r_ctl.rc_rack_min_rtt || SEQ_LT(t, rack->r_ctl.rc_rack_min_rtt)) {
9035 rack->r_ctl.rc_rack_min_rtt = t;
9036 if (rack->r_ctl.rc_rack_min_rtt == 0) {
9037 rack->r_ctl.rc_rack_min_rtt = 1;
9040 if ((rack->r_ctl.rc_rack_tmit_time == 0) ||
9041 (SEQ_LT(rack->r_ctl.rc_rack_tmit_time,
9042 (uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)]))) {
9044 rack->r_ctl.rc_rack_tmit_time = (uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)];
9045 if (rack->r_ctl.rc_rack_tmit_time == 0)
9046 rack->r_ctl.rc_rack_tmit_time = 1;
9047 rack->rc_rack_rtt = t;
9049 rack_log_rtt_sample_calc(rack, t, (uint32_t)rsm->r_tim_lastsent[i], cts, 3);
9051 rsm->r_rtr_cnt);
9056 if (tcp_bblogging_on(rack->rc_tp)) {
9057 for (i = 0; i < rsm->r_rtr_cnt; i++) {
9058 rack_log_rtt_sendmap(rack, i, rsm->r_tim_lastsent[i], to->to_tsecr);
9066 * time-stamp since its not there or the time the peer last
9067 * received a segment that moved forward its cum-ack point.
9070 i = rsm->r_rtr_cnt - 1;
9071 t = cts - (uint32_t)rsm->r_tim_lastsent[i];
9074 if (rack->r_ctl.rc_rack_min_rtt && SEQ_LT(t, rack->r_ctl.rc_rack_min_rtt)) {
9079 * 6.2 Step 2 point 2 in the rack-draft so we
9085 } else if (rack->r_ctl.rc_rack_min_rtt) {
9090 if (!rack->r_ctl.rc_rack_min_rtt ||
9091 SEQ_LT(t, rack->r_ctl.rc_rack_min_rtt)) {
9092 rack->r_ctl.rc_rack_min_rtt = t;
9093 if (rack->r_ctl.rc_rack_min_rtt == 0) {
9094 rack->r_ctl.rc_rack_min_rtt = 1;
9097 if ((rack->r_ctl.rc_rack_tmit_time == 0) ||
9098 (SEQ_LT(rack->r_ctl.rc_rack_tmit_time,
9099 (uint32_t)rsm->r_tim_lastsent[i]))) {
9101 rack->r_ctl.rc_rack_tmit_time = (uint32_t)rsm->r_tim_lastsent[i];
9102 if (rack->r_ctl.rc_rack_tmit_time == 0)
9103 rack->r_ctl.rc_rack_tmit_time = 1;
9104 rack->rc_rack_rtt = t;
9126 TAILQ_FOREACH_REVERSE_FROM(nrsm, &rack->r_ctl.rc_tmap,
9132 if (nrsm->r_flags & RACK_ACKED) {
9140 if (nrsm->r_flags & RACK_RWND_COLLAPSED) {
9148 if ((nrsm->r_flags & RACK_WAS_LOST) == 0) {
9151 exp = ((uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)]) + thresh;
9154 nrsm->r_flags |= RACK_WAS_LOST;
9155 rack->r_ctl.rc_considered_lost += nrsm->r_end - nrsm->r_start;
9158 if (nrsm->r_flags & RACK_SACK_PASSED) {
9166 nrsm->r_flags |= RACK_SACK_PASSED;
9167 nrsm->r_flags &= ~RACK_WAS_SACKPASS;
9181 if ((tp->t_flags & TF_GPUTINPROG) &&
9182 SEQ_GEQ(rsm->r_end, tp->gput_seq)) {
9192 if (rsm->r_rtr_cnt > 1) {
9205 seq = tp->gput_seq;
9206 ts = tp->gput_ts;
9207 rack->app_limited_needs_set = 0;
9208 tp->gput_ts = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time);
9211 SEQ_GEQ(rsm->r_start, tp->gput_seq)) {
9219 tp->gput_seq = rsm->r_start;
9222 SEQ_GEQ(rsm->r_end, tp->gput_seq)) {
9234 tp->gput_seq = rsm->r_end;
9240 * way up to where this ack cum-ack moves
9243 if (SEQ_GT(th_ack, rsm->r_end))
9244 tp->gput_seq = th_ack;
9246 tp->gput_seq = rsm->r_end;
9248 if (SEQ_LT(tp->gput_seq, tp->snd_max))
9249 s_rsm = tqhash_find(rack->r_ctl.tqh, tp->gput_seq);
9263 rack->r_ctl.rc_gp_output_ts = s_rsm->r_tim_lastsent[0];
9265 /* If we hit here we have to have *not* sent tp->gput_seq */
9266 rack->r_ctl.rc_gp_output_ts = rsm->r_tim_lastsent[0];
9268 rack->app_limited_needs_set = 1;
9270 if (SEQ_GT(tp->gput_seq, tp->gput_ack)) {
9272 * We moved beyond this guy's range, re-calculate
9275 if (rack->rc_gp_filled == 0) {
9276 tp->gput_ack = tp->gput_seq + max(rc_init_window(rack), (MIN_GP_WIN * ctf_fixed_maxseg(tp)));
9278 tp->gput_ack = tp->gput_seq + rack_get_measure_window(tp, rack);
9285 if ((rack->in_probe_rtt == 0) &&
9286 (rack->measure_saw_probe_rtt) &&
9287 (SEQ_GEQ(tp->gput_seq, rack->r_ctl.rc_probertt_sndmax_atexit)))
9288 rack->measure_saw_probe_rtt = 0;
9289 rack_log_pacing_delay_calc(rack, ts, tp->gput_ts,
9290 seq, tp->gput_seq,
9291 (((uint64_t)rack->r_ctl.rc_app_limited_cnt << 32) |
9292 (uint64_t)rack->r_ctl.rc_gp_output_ts),
9294 if (rack->rc_gp_filled &&
9295 ((tp->gput_ack - tp->gput_seq) <
9301 if (ideal_amount > sbavail(&tptosocket(tp)->so_snd)) {
9304 * because its too small to gain us anything we
9308 tp->t_flags &= ~TF_GPUTINPROG;
9309 rack_log_pacing_delay_calc(rack, tp->gput_ack, tp->gput_seq,
9311 (((uint64_t)rack->r_ctl.rc_app_limited_cnt << 32) |
9312 (uint64_t)rack->r_ctl.rc_gp_output_ts),
9318 tp->gput_ack = tp->gput_seq + ideal_amount;
9322 rack_log_gpset(rack, tp->gput_ack, 0, 0, line, 2, rsm);
9329 if (SEQ_LT(rsm->r_end, rack->r_ctl.last_tlp_acked_start)) {
9333 if (SEQ_GT(rsm->r_start, rack->r_ctl.last_tlp_acked_end)) {
9337 /* It has to be a sub-part of the original TLP recorded */
9353 start = sack->start;
9354 end = sack->end;
9359 (SEQ_LT(end, rsm->r_start)) ||
9360 (SEQ_GEQ(start, rsm->r_end)) ||
9361 (SEQ_LT(start, rsm->r_start))) {
9367 rsm = tqhash_find(rack->r_ctl.tqh, start);
9374 if (rsm->r_start != start) {
9375 if ((rsm->r_flags & RACK_ACKED) == 0) {
9380 if ((rsm->r_flags & RACK_TLP) &&
9381 (rsm->r_rtr_cnt > 1)) {
9386 if (rack->rc_last_tlp_acked_set &&
9394 rack_log_dsack_event(rack, 10, __LINE__, rsm->r_start, rsm->r_end);
9398 if (SEQ_LT(rsm->r_start, rack->r_ctl.last_tlp_acked_start)) {
9399 rack->r_ctl.last_tlp_acked_start = rsm->r_start;
9400 rack_log_dsack_event(rack, 11, __LINE__, rack->r_ctl.last_tlp_acked_start,
9401 rack->r_ctl.last_tlp_acked_end);
9403 if (SEQ_GT(rsm->r_end, rack->r_ctl.last_tlp_acked_end)) {
9404 rack->r_ctl.last_tlp_acked_end = rsm->r_end;
9405 rack_log_dsack_event(rack, 11, __LINE__, rack->r_ctl.last_tlp_acked_start,
9406 rack->r_ctl.last_tlp_acked_end);
9409 rack->r_ctl.last_tlp_acked_start = rsm->r_start;
9410 rack->r_ctl.last_tlp_acked_end = rsm->r_end;
9411 rack->rc_last_tlp_past_cumack = 0;
9412 rack->rc_last_tlp_acked_set = 1;
9413 rack_log_dsack_event(rack, 8, __LINE__, rsm->r_start, rsm->r_end);
9420 * rsm |--------------|
9421 * sackblk |------->
9423 * rsm |---|
9425 * nrsm |----------|
9437 next = tqhash_next(rack->r_ctl.tqh, rsm);
9439 (rsm->bindex == next->bindex) &&
9440 ((rsm->r_flags & RACK_STRADDLE) == 0) &&
9441 ((next->r_flags & RACK_STRADDLE) == 0) &&
9442 ((rsm->r_flags & RACK_IS_PCM) == 0) &&
9443 ((next->r_flags & RACK_IS_PCM) == 0) &&
9444 (rsm->r_flags & RACK_IN_GP_WIN) &&
9445 (next->r_flags & RACK_IN_GP_WIN))
9450 (next->r_flags & RACK_ACKED) &&
9451 SEQ_GEQ(end, next->r_start)) {
9458 * rsm |------------| (not-acked)
9459 * next |-----------| (acked)
9460 * sackblk |-------->
9462 * rsm |------| (not-acked)
9463 * next |-----------------| (acked)
9464 * nrsm |-----|
9472 tqhash_update_end(rack->r_ctl.tqh, rsm, start);
9473 next->r_start = start;
9474 rsm->r_flags |= RACK_SHUFFLED;
9475 next->r_flags |= RACK_SHUFFLED;
9476 /* Now we must adjust back where next->m is */
9496 if (next->r_tim_lastsent[(next->r_rtr_cnt-1)] <
9497 nrsm->r_tim_lastsent[(nrsm->r_rtr_cnt-1)])
9498 next->r_tim_lastsent[(next->r_rtr_cnt-1)] = nrsm->r_tim_lastsent[(nrsm->r_rtr_cnt-1)];
9502 if (next->r_ack_arrival <
9503 rack_to_usec_ts(&rack->r_ctl.act_rcv_time))
9504 next->r_ack_arrival = rack_to_usec_ts(&rack->r_ctl.act_rcv_time);
9509 rsm->r_dupack = 0;
9510 rsm->r_just_ret = 0;
9513 nrsm->r_start = start;
9516 if (rack->app_limited_needs_set)
9517 rack_need_set_test(tp, rack, nrsm, tp->snd_una, __LINE__, RACK_USE_END);
9518 changed += (nrsm->r_end - nrsm->r_start);
9519 rack->r_ctl.rc_sacked += (nrsm->r_end - nrsm->r_start);
9520 if (rsm->r_flags & RACK_WAS_LOST) {
9523 my_chg = (nrsm->r_end - nrsm->r_start);
9524 KASSERT((rack->r_ctl.rc_considered_lost >= my_chg),
9525 ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack));
9526 if (my_chg <= rack->r_ctl.rc_considered_lost)
9527 rack->r_ctl.rc_considered_lost -= my_chg;
9529 rack->r_ctl.rc_considered_lost = 0;
9531 if (nrsm->r_flags & RACK_SACK_PASSED) {
9532 rack->r_ctl.rc_reorder_ts = cts;
9533 if (rack->r_ctl.rc_reorder_ts == 0)
9534 rack->r_ctl.rc_reorder_ts = 1;
9538 * one left un-acked) to the next one
9541 * sack-passed on rsm (The one passed in
9546 if (rsm->r_in_tmap) {
9552 if (nrsm && nrsm->r_in_tmap)
9556 if (SEQ_LT(end, next->r_end) ||
9557 (end == next->r_end)) {
9564 start = next->r_end;
9565 rsm = tqhash_next(rack->r_ctl.tqh, next);
9573 * rsm |--------|
9574 * sackblk |----->
9579 * rsm |----|
9580 * sackblk |----->
9581 * nrsm |---|
9596 rsm->r_just_ret = 0;
9598 (void)tqhash_insert(rack->r_ctl.tqh, nrsm);
9600 if ((insret = tqhash_insert(rack->r_ctl.tqh, nrsm)) != 0) {
9601 panic("Insert in tailq_hash of %p fails ret:%d rack:%p rsm:%p",
9605 if (rsm->r_in_tmap) {
9606 TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, rsm, nrsm, r_tnext);
9607 nrsm->r_in_tmap = 1;
9610 rsm->r_flags &= (~RACK_HAS_FIN);
9617 if (end == rsm->r_end) {
9619 rsm = tqhash_next(rack->r_ctl.tqh, rsm);
9621 } else if (SEQ_LT(end, rsm->r_end)) {
9623 rsm = tqhash_next(rack->r_ctl.tqh, rsm);
9631 start = rsm->r_end;
9632 rsm = tqhash_next(rack->r_ctl.tqh, rsm);
9638 if (SEQ_GEQ(end, rsm->r_end)) {
9642 * rsm --- |-----|
9643 * end |-----|
9645 * end |---------|
9647 if ((rsm->r_flags & RACK_ACKED) == 0) {
9651 if ((rsm->r_flags & RACK_TLP) &&
9652 (rsm->r_rtr_cnt > 1)) {
9657 if (rack->rc_last_tlp_acked_set &&
9664 rack_log_dsack_event(rack, 10, __LINE__, rsm->r_start, rsm->r_end);
9668 if (SEQ_LT(rsm->r_start, rack->r_ctl.last_tlp_acked_start)) {
9669 rack->r_ctl.last_tlp_acked_start = rsm->r_start;
9670 rack_log_dsack_event(rack, 11, __LINE__, rack->r_ctl.last_tlp_acked_start,
9671 rack->r_ctl.last_tlp_acked_end);
9673 if (SEQ_GT(rsm->r_end, rack->r_ctl.last_tlp_acked_end)) {
9674 rack->r_ctl.last_tlp_acked_end = rsm->r_end;
9675 rack_log_dsack_event(rack, 11, __LINE__, rack->r_ctl.last_tlp_acked_start,
9676 rack->r_ctl.last_tlp_acked_end);
9679 rack->r_ctl.last_tlp_acked_start = rsm->r_start;
9680 rack->r_ctl.last_tlp_acked_end = rsm->r_end;
9681 rack->rc_last_tlp_past_cumack = 0;
9682 rack->rc_last_tlp_acked_set = 1;
9683 rack_log_dsack_event(rack, 8, __LINE__, rsm->r_start, rsm->r_end);
9687 changed += (rsm->r_end - rsm->r_start);
9689 if (rsm->r_flags & RACK_WAS_LOST) {
9692 my_chg = (rsm->r_end - rsm->r_start);
9693 rsm->r_flags &= ~RACK_WAS_LOST;
9694 KASSERT((rack->r_ctl.rc_considered_lost >= my_chg),
9695 ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack));
9696 if (my_chg <= rack->r_ctl.rc_considered_lost)
9697 rack->r_ctl.rc_considered_lost -= my_chg;
9699 rack->r_ctl.rc_considered_lost = 0;
9701 rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start);
9702 if (rsm->r_in_tmap) /* should be true */
9705 if (rsm->r_flags & RACK_SACK_PASSED) {
9706 rsm->r_flags &= ~RACK_SACK_PASSED;
9707 rack->r_ctl.rc_reorder_ts = cts;
9708 if (rack->r_ctl.rc_reorder_ts == 0)
9709 rack->r_ctl.rc_reorder_ts = 1;
9711 if (rack->app_limited_needs_set)
9712 rack_need_set_test(tp, rack, rsm, tp->snd_una, __LINE__, RACK_USE_END);
9713 rsm->r_ack_arrival = rack_to_usec_ts(&rack->r_ctl.act_rcv_time);
9714 rsm->r_flags |= RACK_ACKED;
9715 rack_update_pcm_ack(rack, 0, rsm->r_start, rsm->r_end);
9716 if (rsm->r_in_tmap) {
9717 TAILQ_REMOVE(&rack->r_ctl.rc_tmap, rsm, r_tnext);
9718 rsm->r_in_tmap = 0;
9724 if (end == rsm->r_end) {
9725 /* This block only - done, setup for next */
9732 nrsm = tqhash_next(rack->r_ctl.tqh, rsm);
9733 start = rsm->r_end;
9742 * rsm --- |-----|
9743 * end |--|
9745 if ((rsm->r_flags & RACK_ACKED) == 0) {
9749 if ((rsm->r_flags & RACK_TLP) &&
9750 (rsm->r_rtr_cnt > 1)) {
9755 if (rack->rc_last_tlp_acked_set &&
9762 rack_log_dsack_event(rack, 10, __LINE__, rsm->r_start, rsm->r_end);
9766 if (SEQ_LT(rsm->r_start, rack->r_ctl.last_tlp_acked_start)) {
9767 rack->r_ctl.last_tlp_acked_start = rsm->r_start;
9768 rack_log_dsack_event(rack, 11, __LINE__, rack->r_ctl.last_tlp_acked_start,
9769 rack->r_ctl.last_tlp_acked_end);
9771 if (SEQ_GT(rsm->r_end, rack->r_ctl.last_tlp_acked_end)) {
9772 rack->r_ctl.last_tlp_acked_end = rsm->r_end;
9773 rack_log_dsack_event(rack, 11, __LINE__, rack->r_ctl.last_tlp_acked_start,
9774 rack->r_ctl.last_tlp_acked_end);
9777 rack->r_ctl.last_tlp_acked_start = rsm->r_start;
9778 rack->r_ctl.last_tlp_acked_end = rsm->r_end;
9779 rack->rc_last_tlp_past_cumack = 0;
9780 rack->rc_last_tlp_acked_set = 1;
9781 rack_log_dsack_event(rack, 8, __LINE__, rsm->r_start, rsm->r_end);
9789 prev = tqhash_prev(rack->r_ctl.tqh, rsm);
9791 (rsm->bindex == prev->bindex) &&
9792 ((rsm->r_flags & RACK_STRADDLE) == 0) &&
9793 ((prev->r_flags & RACK_STRADDLE) == 0) &&
9794 ((rsm->r_flags & RACK_IS_PCM) == 0) &&
9795 ((prev->r_flags & RACK_IS_PCM) == 0) &&
9796 (rsm->r_flags & RACK_IN_GP_WIN) &&
9797 (prev->r_flags & RACK_IN_GP_WIN))
9802 (prev->r_flags & RACK_ACKED)) {
9805 * in place and span from (rsm->r_start = end) to rsm->r_end.
9807 * to prev->r_end <- end.
9809 * prev |--------| (acked)
9810 * rsm |-------| (non-acked)
9811 * sackblk |-|
9813 * prev |----------| (acked)
9814 * rsm |-----| (non-acked)
9815 * nrsm |-| (temporary)
9822 tqhash_update_end(rack->r_ctl.tqh, prev, end);
9823 rsm->r_start = end;
9824 rsm->r_flags |= RACK_SHUFFLED;
9825 prev->r_flags |= RACK_SHUFFLED;
9830 nrsm->r_end = end;
9831 rsm->r_dupack = 0;
9850 if(prev->r_tim_lastsent[(prev->r_rtr_cnt-1)] <
9851 nrsm->r_tim_lastsent[(nrsm->r_rtr_cnt-1)]) {
9852 prev->r_tim_lastsent[(prev->r_rtr_cnt-1)] = nrsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)];
9858 if(prev->r_ack_arrival <
9859 rack_to_usec_ts(&rack->r_ctl.act_rcv_time))
9860 prev->r_ack_arrival = rack_to_usec_ts(&rack->r_ctl.act_rcv_time);
9875 if (rack->app_limited_needs_set)
9876 rack_need_set_test(tp, rack, nrsm, tp->snd_una, __LINE__, RACK_USE_END);
9877 changed += (nrsm->r_end - nrsm->r_start);
9878 rack->r_ctl.rc_sacked += (nrsm->r_end - nrsm->r_start);
9879 if (rsm->r_flags & RACK_WAS_LOST) {
9882 my_chg = (nrsm->r_end - nrsm->r_start);
9883 KASSERT((rack->r_ctl.rc_considered_lost >= my_chg),
9884 ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack));
9885 if (my_chg <= rack->r_ctl.rc_considered_lost)
9886 rack->r_ctl.rc_considered_lost -= my_chg;
9888 rack->r_ctl.rc_considered_lost = 0;
9890 if (nrsm->r_flags & RACK_SACK_PASSED) {
9891 rack->r_ctl.rc_reorder_ts = cts;
9892 if (rack->r_ctl.rc_reorder_ts == 0)
9893 rack->r_ctl.rc_reorder_ts = 1;
9909 if ((rsm->r_flags & RACK_TLP) &&
9910 (rsm->r_rtr_cnt > 1)) {
9915 if (rack->rc_last_tlp_acked_set &&
9922 rack_log_dsack_event(rack, 10, __LINE__, rsm->r_start, rsm->r_end);
9926 if (SEQ_LT(rsm->r_start, rack->r_ctl.last_tlp_acked_start)) {
9927 rack->r_ctl.last_tlp_acked_start = rsm->r_start;
9928 rack_log_dsack_event(rack, 11, __LINE__, rack->r_ctl.last_tlp_acked_start,
9929 rack->r_ctl.last_tlp_acked_end);
9931 if (SEQ_GT(rsm->r_end, rack->r_ctl.last_tlp_acked_end)) {
9932 rack->r_ctl.last_tlp_acked_end = rsm->r_end;
9933 rack_log_dsack_event(rack, 11, __LINE__, rack->r_ctl.last_tlp_acked_start,
9934 rack->r_ctl.last_tlp_acked_end);
9937 rack->r_ctl.last_tlp_acked_start = rsm->r_start;
9938 rack->r_ctl.last_tlp_acked_end = rsm->r_end;
9939 rack->rc_last_tlp_acked_set = 1;
9940 rack->rc_last_tlp_past_cumack = 0;
9941 rack_log_dsack_event(rack, 8, __LINE__, rsm->r_start, rsm->r_end);
9946 * nrsm->r_start = end;
9947 * nrsm->r_end = rsm->r_end;
9948 * which is un-acked.
9950 * rsm->r_end = nrsm->r_start;
9951 * i.e. the remaining un-acked
9956 * rsm |----------| (not acked)
9957 * sackblk |---|
9959 * rsm |---| (acked)
9960 * nrsm |------| (not acked)
9964 rsm->r_flags &= (~RACK_HAS_FIN);
9965 rsm->r_just_ret = 0;
9967 (void)tqhash_insert(rack->r_ctl.tqh, nrsm);
9969 if ((insret = tqhash_insert(rack->r_ctl.tqh, nrsm)) != 0) {
9970 panic("Insert in tailq_hash of %p fails ret:% rack:%p rsm:%p",
9974 if (rsm->r_in_tmap) {
9975 TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, rsm, nrsm, r_tnext);
9976 nrsm->r_in_tmap = 1;
9978 nrsm->r_dupack = 0;
9981 changed += (rsm->r_end - rsm->r_start);
9982 if (rsm->r_flags & RACK_WAS_LOST) {
9985 my_chg = (rsm->r_end - rsm->r_start);
9986 rsm->r_flags &= ~RACK_WAS_LOST;
9987 KASSERT((rack->r_ctl.rc_considered_lost >= my_chg),
9988 ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack));
9989 if (my_chg <= rack->r_ctl.rc_considered_lost)
9990 rack->r_ctl.rc_considered_lost -= my_chg;
9992 rack->r_ctl.rc_considered_lost = 0;
9994 rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start);
9996 if (rsm->r_in_tmap) /* should be true */
9999 if (rsm->r_flags & RACK_SACK_PASSED) {
10000 rsm->r_flags &= ~RACK_SACK_PASSED;
10001 rack->r_ctl.rc_reorder_ts = cts;
10002 if (rack->r_ctl.rc_reorder_ts == 0)
10003 rack->r_ctl.rc_reorder_ts = 1;
10005 if (rack->app_limited_needs_set)
10006 rack_need_set_test(tp, rack, rsm, tp->snd_una, __LINE__, RACK_USE_END);
10007 rsm->r_ack_arrival = rack_to_usec_ts(&rack->r_ctl.act_rcv_time);
10008 rsm->r_flags |= RACK_ACKED;
10009 rack_update_pcm_ack(rack, 0, rsm->r_start, rsm->r_end);
10011 if (rsm->r_in_tmap) {
10012 TAILQ_REMOVE(&rack->r_ctl.rc_tmap, rsm, r_tnext);
10013 rsm->r_in_tmap = 0;
10024 ((rsm->r_flags & RACK_TLP) == 0) &&
10025 (rsm->r_flags & RACK_ACKED)) {
10031 next = tqhash_next(rack->r_ctl.tqh, rsm);
10033 if (next->r_flags & RACK_TLP)
10036 if ((next->r_flags & RACK_IN_GP_WIN) &&
10037 ((rsm->r_flags & RACK_IN_GP_WIN) == 0)) {
10040 if ((rsm->r_flags & RACK_IN_GP_WIN) &&
10041 ((next->r_flags & RACK_IN_GP_WIN) == 0)) {
10044 if (rsm->bindex != next->bindex)
10046 if (rsm->r_flags & RACK_STRADDLE)
10048 if (rsm->r_flags & RACK_IS_PCM)
10050 if (next->r_flags & RACK_STRADDLE)
10052 if (next->r_flags & RACK_IS_PCM)
10054 if (next->r_flags & RACK_ACKED) {
10057 next = tqhash_next(rack->r_ctl.tqh, rsm);
10062 prev = tqhash_prev(rack->r_ctl.tqh, rsm);
10064 if (prev->r_flags & RACK_TLP)
10067 if ((prev->r_flags & RACK_IN_GP_WIN) &&
10068 ((rsm->r_flags & RACK_IN_GP_WIN) == 0)) {
10071 if ((rsm->r_flags & RACK_IN_GP_WIN) &&
10072 ((prev->r_flags & RACK_IN_GP_WIN) == 0)) {
10075 if (rsm->bindex != prev->bindex)
10077 if (rsm->r_flags & RACK_STRADDLE)
10079 if (rsm->r_flags & RACK_IS_PCM)
10081 if (prev->r_flags & RACK_STRADDLE)
10083 if (prev->r_flags & RACK_IS_PCM)
10085 if (prev->r_flags & RACK_ACKED) {
10088 prev = tqhash_prev(rack->r_ctl.tqh, rsm);
10099 nrsm = tqhash_find(rack->r_ctl.tqh, end);
10100 *prsm = rack->r_ctl.rc_sacklast = nrsm;
10110 while (rsm && (rsm->r_flags & RACK_ACKED)) {
10112 rack->r_ctl.rc_sacked -= (rsm->r_end - rsm->r_start);
10114 if (rsm->r_in_tmap) {
10115 panic("rack:%p rsm:%p flags:0x%x in tmap?",
10116 rack, rsm, rsm->r_flags);
10119 rsm->r_flags &= ~(RACK_ACKED|RACK_SACK_PASSED|RACK_WAS_SACKPASS);
10122 TAILQ_INSERT_HEAD(&rack->r_ctl.rc_tmap, rsm, r_tnext);
10125 TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, tmap, rsm, r_tnext);
10128 tmap->r_in_tmap = 1;
10129 rsm = tqhash_next(rack->r_ctl.tqh, rsm);
10135 sack_filter_clear(&rack->r_ctl.rack_sf, th_ack);
10180 * The cum-ack is being advanced upon the sendmap.
10186 if ((tp->t_flags & TF_GPUTINPROG) == 0)
10193 if (SEQ_GT(rsm->r_end, tp->gput_ack)) {
10194 tp->gput_ack = rsm->r_end;
10203 if (rack->app_limited_needs_set)
10221 if ((ts = rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)]) <=
10222 rack->r_ctl.rc_gp_cumack_ts)
10225 rack->r_ctl.rc_gp_cumack_ts = ts;
10226 rack_log_gpset(rack, tp->gput_ack, (uint32_t)ts, rsm->r_end,
10241 if (sack_filter_blks_used(&rack->r_ctl.rack_sf)) {
10246 sack_filter_blks(tp, &rack->r_ctl.rack_sf, NULL, 0, th_ack);
10248 if (SEQ_GT(th_ack, tp->snd_una)) {
10250 rack->r_ctl.cleared_app_ack = 0;
10252 rack->r_wanted_output = 1;
10253 if (SEQ_GT(th_ack, tp->snd_una))
10254 rack->r_ctl.last_cumack_advance = acktime;
10257 if ((rack->rc_last_tlp_acked_set == 1)&&
10258 (rack->rc_last_tlp_past_cumack == 1) &&
10259 (SEQ_GT(rack->r_ctl.last_tlp_acked_start, th_ack))) {
10262 * tlp retransmit sequence is ahead of the cum-ack.
10263 * This can only happen when the cum-ack moves all
10270 * the cum-ack is by the TLP before checking which is
10274 rack->r_ctl.last_tlp_acked_start,
10275 rack->r_ctl.last_tlp_acked_end);
10276 rack->rc_last_tlp_acked_set = 0;
10277 rack->rc_last_tlp_past_cumack = 0;
10278 } else if ((rack->rc_last_tlp_acked_set == 1) &&
10279 (rack->rc_last_tlp_past_cumack == 0) &&
10280 (SEQ_GEQ(th_ack, rack->r_ctl.last_tlp_acked_end))) {
10284 rack->rc_last_tlp_past_cumack = 1;
10287 if ((rack->rc_last_sent_tlp_seq_valid == 1) &&
10288 (rack->rc_last_sent_tlp_past_cumack == 1) &&
10289 (SEQ_GT(rack->r_ctl.last_sent_tlp_seq, th_ack))) {
10291 rack->r_ctl.last_sent_tlp_seq,
10292 (rack->r_ctl.last_sent_tlp_seq +
10293 rack->r_ctl.last_sent_tlp_len));
10294 rack->rc_last_sent_tlp_seq_valid = 0;
10295 rack->rc_last_sent_tlp_past_cumack = 0;
10296 } else if ((rack->rc_last_sent_tlp_seq_valid == 1) &&
10297 (rack->rc_last_sent_tlp_past_cumack == 0) &&
10298 (SEQ_GEQ(th_ack, rack->r_ctl.last_sent_tlp_seq))) {
10302 rack->rc_last_sent_tlp_past_cumack = 1;
10305 rsm = tqhash_min(rack->r_ctl.tqh);
10307 if ((th_ack - 1) == tp->iss) {
10316 if (tp->t_flags & TF_SENTFIN) {
10321 panic("No rack map tp:%p for state:%d ack:%u rack:%p snd_una:%u snd_max:%u\n",
10323 tp->t_state, th_ack, rack,
10324 tp->snd_una, tp->snd_max);
10328 if (SEQ_LT(th_ack, rsm->r_start)) {
10332 rsm->r_start,
10333 th_ack, tp->t_state, rack->r_state);
10340 if ((rsm->r_flags & RACK_TLP) &&
10341 (rsm->r_rtr_cnt > 1)) {
10351 if (rack->rc_last_tlp_acked_set &&
10358 rack_log_dsack_event(rack, 10, __LINE__, rsm->r_start, rsm->r_end);
10362 if (SEQ_LT(rsm->r_start, rack->r_ctl.last_tlp_acked_start)) {
10363 rack->r_ctl.last_tlp_acked_start = rsm->r_start;
10364 rack_log_dsack_event(rack, 11, __LINE__, rack->r_ctl.last_tlp_acked_start,
10365 rack->r_ctl.last_tlp_acked_end);
10367 if (SEQ_GT(rsm->r_end, rack->r_ctl.last_tlp_acked_end)) {
10368 rack->r_ctl.last_tlp_acked_end = rsm->r_end;
10369 rack_log_dsack_event(rack, 11, __LINE__, rack->r_ctl.last_tlp_acked_start,
10370 rack->r_ctl.last_tlp_acked_end);
10373 rack->rc_last_tlp_past_cumack = 1;
10374 rack->r_ctl.last_tlp_acked_start = rsm->r_start;
10375 rack->r_ctl.last_tlp_acked_end = rsm->r_end;
10376 rack->rc_last_tlp_acked_set = 1;
10377 rack_log_dsack_event(rack, 8, __LINE__, rsm->r_start, rsm->r_end);
10381 rack->r_ctl.last_tmit_time_acked = rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)];
10382 if (SEQ_GEQ(th_ack, rsm->r_end)) {
10387 if (rsm->r_flags & RACK_WAS_LOST) {
10393 rsm->r_flags &= ~RACK_WAS_LOST;
10394 KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)),
10395 ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack));
10396 if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start))
10397 rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start;
10399 rack->r_ctl.rc_considered_lost = 0;
10401 rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_FREE, rsm->r_end, __LINE__);
10402 rack->r_ctl.rc_holes_rxt -= rsm->r_rtr_bytes;
10403 rsm->r_rtr_bytes = 0;
10409 tqhash_remove(rack->r_ctl.tqh, rsm, REMOVE_TYPE_CUMACK);
10410 if (rsm->r_in_tmap) {
10411 TAILQ_REMOVE(&rack->r_ctl.rc_tmap, rsm, r_tnext);
10412 rsm->r_in_tmap = 0;
10415 if (rsm->r_flags & RACK_ACKED) {
10417 * It was acked on the scoreboard -- remove
10420 rack->r_ctl.rc_sacked -= (rsm->r_end - rsm->r_start);
10422 } else if (rsm->r_flags & RACK_SACK_PASSED) {
10428 rsm->r_flags &= ~RACK_SACK_PASSED;
10429 rsm->r_ack_arrival = rack_to_usec_ts(&rack->r_ctl.act_rcv_time);
10430 rsm->r_flags |= RACK_ACKED;
10431 rack->r_ctl.rc_reorder_ts = cts;
10432 if (rack->r_ctl.rc_reorder_ts == 0)
10433 rack->r_ctl.rc_reorder_ts = 1;
10434 if (rack->r_ent_rec_ns) {
10439 rack->r_might_revert = 1;
10441 rack_update_pcm_ack(rack, 1, rsm->r_start, rsm->r_end);
10443 rack_update_pcm_ack(rack, 1, rsm->r_start, rsm->r_end);
10445 if ((rsm->r_flags & RACK_TO_REXT) &&
10446 (tp->t_flags & TF_RCVD_TSTMP) &&
10447 (to->to_flags & TOF_TS) &&
10448 (to->to_tsecr != 0) &&
10449 (tp->t_flags & TF_PREVVALID)) {
10455 tp->t_flags &= ~TF_PREVVALID;
10456 if (to->to_tsecr == rack_ts_to_msec(rsm->r_tim_lastsent[0])) {
10461 left = th_ack - rsm->r_end;
10462 if (rack->app_limited_needs_set && newly_acked)
10470 rsm = tqhash_min(rack->r_ctl.tqh);
10471 if (rsm && (rsm->r_flags & RACK_ACKED) && (th_ack == rsm->r_start)) {
10479 * given us snd_una up to (rsm->r_end).
10483 * our rsm->r_start in case we get an old ack
10490 if (rsm->r_flags & RACK_ACKED) {
10492 * It was acked on the scoreboard -- remove it from
10493 * total for the part being cum-acked.
10495 rack->r_ctl.rc_sacked -= (th_ack - rsm->r_start);
10497 rack_update_pcm_ack(rack, 1, rsm->r_start, th_ack);
10500 if (rsm->r_flags & RACK_WAS_LOST) {
10507 KASSERT((rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start)),
10508 ("rsm:%p rack:%p rc_considered_lost goes negative th_ack:%u", rsm, rack, th_ack));
10509 if (rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start))
10510 rack->r_ctl.rc_considered_lost -= th_ack - rsm->r_start;
10512 rack->r_ctl.rc_considered_lost = 0;
10518 rsm->r_dupack = 0;
10520 if (rsm->r_rtr_bytes) {
10527 ack_am = (th_ack - rsm->r_start);
10528 if (ack_am >= rsm->r_rtr_bytes) {
10529 rack->r_ctl.rc_holes_rxt -= ack_am;
10530 rsm->r_rtr_bytes -= ack_am;
10540 if (rsm->m &&
10541 ((rsm->orig_m_len != rsm->m->m_len) ||
10542 (M_TRAILINGROOM(rsm->m) != rsm->orig_t_space))) {
10546 rsm->soff += (th_ack - rsm->r_start);
10549 tqhash_trim(rack->r_ctl.tqh, th_ack);
10555 m = rsm->m;
10556 soff = rsm->soff;
10558 while (soff >= m->m_len) {
10559 soff -= m->m_len;
10560 KASSERT((m->m_next != NULL),
10561 (" rsm:%p off:%u soff:%u m:%p",
10562 rsm, rsm->soff, soff, m));
10563 m = m->m_next;
10566 * This is a fall-back that prevents a panic. In reality
10569 * but tqhash_trim did update rsm->r_start so the offset calcuation
10574 m = sbsndmbuf(&rack->rc_inp->inp_socket->so_snd,
10575 (rsm->r_start - tp->snd_una),
10583 rsm->m = m;
10584 rsm->soff = soff;
10585 rsm->orig_m_len = rsm->m->m_len;
10586 rsm->orig_t_space = M_TRAILINGROOM(rsm->m);
10589 if (rack->app_limited_needs_set &&
10590 SEQ_GEQ(th_ack, tp->gput_seq))
10591 rack_need_set_test(tp, rack, rsm, tp->snd_una, __LINE__, RACK_USE_BEG);
10600 if (rack->r_might_revert) {
10611 TAILQ_FOREACH(rsm, &rack->r_ctl.rc_tmap, r_tnext) {
10612 if (rsm->r_flags & RACK_SACK_PASSED) {
10624 rack->r_ent_rec_ns = 0;
10625 orig_cwnd = tp->snd_cwnd;
10626 tp->snd_ssthresh = rack->r_ctl.rc_ssthresh_at_erec;
10627 tp->snd_recover = tp->snd_una;
10629 if (IN_RECOVERY(tp->t_flags)) {
10631 if ((rack->rto_from_rec == 1) && (rack_ssthresh_rest_rto_rec != 0) ){
10634 * and then re-entered recovery (more sack's arrived)
10636 * the first recovery. We want to be able to slow-start
10640 * so we get no slow-start after our RTO.
10642 rack->rto_from_rec = 0;
10643 if (rack->r_ctl.rto_ssthresh > tp->snd_ssthresh)
10644 tp->snd_ssthresh = rack->r_ctl.rto_ssthresh;
10648 rack->r_might_revert = 0;
10661 am = end - start;
10664 if ((rack->rc_last_tlp_acked_set ) &&
10665 (SEQ_GEQ(start, rack->r_ctl.last_tlp_acked_start)) &&
10666 (SEQ_LEQ(end, rack->r_ctl.last_tlp_acked_end))) {
10677 if (rack->rc_last_sent_tlp_seq_valid) {
10678 l_end = rack->r_ctl.last_sent_tlp_seq + rack->r_ctl.last_sent_tlp_len;
10679 if (SEQ_GEQ(start, rack->r_ctl.last_sent_tlp_seq) &&
10690 if (rack->rc_dsack_round_seen == 0) {
10691 rack->rc_dsack_round_seen = 1;
10692 rack->r_ctl.dsack_round_end = rack->rc_tp->snd_max;
10693 rack->r_ctl.num_dsack++;
10694 rack->r_ctl.dsack_persist = 16; /* 16 is from the standard */
10702 rack->r_ctl.dsack_byte_cnt += am;
10703 if (!IN_FASTRECOVERY(rack->rc_tp->t_flags) &&
10704 rack->r_ctl.retran_during_recovery &&
10705 (rack->r_ctl.dsack_byte_cnt >= rack->r_ctl.retran_during_recovery)) {
10710 rack->r_might_revert = 1;
10711 rack_handle_might_revert(rack->rc_tp, rack);
10712 rack->r_might_revert = 0;
10713 rack->r_ctl.retran_during_recovery = 0;
10714 rack->r_ctl.dsack_byte_cnt = 0;
10722 return (((tp->snd_max - snd_una) -
10723 (rack->r_ctl.rc_sacked + rack->r_ctl.rc_considered_lost)) + rack->r_ctl.rc_holes_rxt);
10730 (struct tcp_rack *)tp->t_fb_ptr,
10731 tp->snd_una));
10740 rack->r_ctl.rc_prr_delivered += changed;
10742 if (sbavail(&rack->rc_inp->inp_socket->so_snd) <= (tp->snd_max - tp->snd_una)) {
10746 * Note we use tp->snd_una here and not th_ack because
10749 rack->r_ctl.rc_prr_sndcnt = 0;
10753 if (SEQ_GT(tp->snd_una, th_ack)) {
10754 snd_una = tp->snd_una;
10759 if (pipe > tp->snd_ssthresh) {
10762 sndcnt = rack->r_ctl.rc_prr_delivered * tp->snd_ssthresh;
10763 if (rack->r_ctl.rc_prr_recovery_fs > 0)
10764 sndcnt /= (long)rack->r_ctl.rc_prr_recovery_fs;
10766 rack->r_ctl.rc_prr_sndcnt = 0;
10771 if (sndcnt > (long)rack->r_ctl.rc_prr_out)
10772 sndcnt -= rack->r_ctl.rc_prr_out;
10775 rack->r_ctl.rc_prr_sndcnt = sndcnt;
10780 if (rack->r_ctl.rc_prr_delivered > rack->r_ctl.rc_prr_out)
10781 limit = (rack->r_ctl.rc_prr_delivered - rack->r_ctl.rc_prr_out);
10787 if (tp->snd_ssthresh > pipe) {
10788 rack->r_ctl.rc_prr_sndcnt = min((tp->snd_ssthresh - pipe), limit);
10791 rack->r_ctl.rc_prr_sndcnt = min(0, limit);
10818 rack = (struct tcp_rack *)tp->t_fb_ptr;
10820 rsm = tqhash_min(rack->r_ctl.tqh);
10822 th_ack = th->th_ack;
10823 segsiz = ctf_fixed_maxseg(rack->rc_tp);
10828 * credit for larger cum-ack moves).
10832 ac = BYTES_THIS_ACK(tp, th) / ctf_fixed_maxseg(rack->rc_tp);
10835 if (SEQ_GT(th_ack, tp->snd_una)) {
10837 tp->t_acktime = ticks;
10839 if (rsm && SEQ_GT(th_ack, rsm->r_start))
10840 changed = th_ack - rsm->r_start;
10843 tcp_tv_to_lusectick(&rack->r_ctl.act_rcv_time));
10845 if ((to->to_flags & TOF_SACK) == 0) {
10849 * For cases where we struck a dup-ack
10854 changed += ctf_fixed_maxseg(rack->rc_tp);
10859 if (SEQ_GT(th_ack, tp->snd_una))
10862 ack_point = tp->snd_una;
10863 for (i = 0; i < to->to_nsacks; i++) {
10864 bcopy((to->to_sacks + i * TCPOLEN_SACK),
10870 SEQ_LT(sack.start, tp->snd_max) &&
10872 SEQ_LEQ(sack.end, tp->snd_max)) {
10883 * Its a D-SACK block.
10888 if (rack->rc_dsack_round_seen) {
10890 if (SEQ_GEQ(th_ack, rack->r_ctl.dsack_round_end)) {
10892 rack->rc_dsack_round_seen = 0;
10900 num_sack_blks = sack_filter_blks(tp, &rack->r_ctl.rack_sf, sack_blocks,
10901 num_sack_blks, th->th_ack);
10902 ctf_log_sack_filter(rack->rc_tp, num_sack_blks, sack_blocks);
10949 * Now collapse out the dup-sack and
10957 num_sack_blks--;
10969 rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
10971 SEQ_GT(sack_blocks[0].end, rsm->r_start) &&
10972 SEQ_LT(sack_blocks[0].start, rsm->r_end)) {
10979 rack->r_wanted_output = 1;
10987 * i.e the sack-filter pushes down
10993 counter_u64_add(rack_ack_total, (acked / ctf_fixed_maxseg(rack->rc_tp)));
11005 rsm = rack->r_ctl.rc_sacklast;
11009 rack->r_wanted_output = 1;
11017 * you have more than one sack-blk, this
11019 * and the sack-filter is still working, or
11028 rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
11032 if ((!IN_FASTRECOVERY(tp->t_flags)) &&
11034 ((rsm->r_flags & RACK_MUST_RXT) == 0)) {
11042 if (rack->rack_no_prr == 0) {
11043 rack->r_ctl.rc_prr_sndcnt = ctf_fixed_maxseg(tp);
11046 rack->r_timer_override = 1;
11047 rack->r_early = 0;
11048 rack->r_ctl.rc_agg_early = 0;
11049 } else if (IN_FASTRECOVERY(tp->t_flags) &&
11051 (rack->r_rr_config == 3)) {
11056 rack->r_timer_override = 1;
11057 rack->r_ctl.rc_hpts_flags &= ~PACE_PKT_OUTPUT;
11058 rack->r_ctl.rc_resend = rsm;
11060 if (IN_FASTRECOVERY(tp->t_flags) &&
11061 (rack->rack_no_prr == 0) &&
11064 if ((rsm && (rack->r_ctl.rc_prr_sndcnt >= ctf_fixed_maxseg(tp)) &&
11065 ((tcp_in_hpts(rack->rc_tp) == 0) &&
11066 ((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) == 0)))) {
11071 rack->r_early = 0;
11072 rack->r_ctl.rc_agg_early = 0;
11073 rack->r_timer_override = 1;
11083 rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
11089 if ((rsm->r_dupack >= DUP_ACK_THRESHOLD) ||
11090 (rsm->r_flags & RACK_MUST_RXT)) {
11096 if (rsm && (rsm->r_dupack < 0xff)) {
11097 rsm->r_dupack++;
11098 if (rsm->r_dupack >= DUP_ACK_THRESHOLD) {
11104 * we will get a return of the rsm. For a non-sack
11109 rack->r_ctl.rc_resend = tcp_rack_output(rack->rc_tp, rack, cts);
11110 if (rack->r_ctl.rc_resend != NULL) {
11111 if (!IN_FASTRECOVERY(rack->rc_tp->t_flags)) {
11112 rack_cong_signal(rack->rc_tp, CC_NDUPACK,
11115 rack->r_wanted_output = 1;
11116 rack->r_timer_override = 1;
11140 * gauge the inter-ack times). If that occurs we have a real problem
11153 if (tp->snd_max == tp->snd_una) {
11165 tcp_trace_point(rack->rc_tp, TCP_TP_PACED_BOTTOM);
11167 rack->rc_dragged_bottom = 1;
11169 if ((rack->r_ctl.rack_rs.rs_flags & RACK_RTT_VALID) &&
11170 (rack->dis_lt_bw == 0) &&
11171 (rack->use_lesser_lt_bw == 0) &&
11174 * Lets use the long-term b/w we have
11177 if (rack->rc_gp_filled == 0) {
11189 rack->r_ctl.rc_rtt_diff = 0;
11190 rack->r_ctl.gp_bw = lt_bw;
11191 rack->rc_gp_filled = 1;
11192 if (rack->r_ctl.num_measurements < RACK_REQ_AVG)
11193 rack->r_ctl.num_measurements = RACK_REQ_AVG;
11194 rack_set_pace_segments(rack->rc_tp, rack, __LINE__, NULL);
11195 } else if (lt_bw > rack->r_ctl.gp_bw) {
11196 rack->r_ctl.rc_rtt_diff = 0;
11197 if (rack->r_ctl.num_measurements < RACK_REQ_AVG)
11198 rack->r_ctl.num_measurements = RACK_REQ_AVG;
11199 rack->r_ctl.gp_bw = lt_bw;
11200 rack_set_pace_segments(rack->rc_tp, rack, __LINE__, NULL);
11202 rack_increase_bw_mul(rack, -1, 0, 0, 1);
11203 if ((rack->gp_ready == 0) &&
11204 (rack->r_ctl.num_measurements >= rack->r_ctl.req_measurements)) {
11206 rack->gp_ready = 1;
11207 if (rack->dgp_on ||
11208 rack->rack_hibeta)
11210 if (rack->defer_options)
11217 rack_increase_bw_mul(rack, -1, 0, 0, 1);
11219 } else if ((IN_FASTRECOVERY(tp->t_flags) == 0) &&
11220 (sbavail(&so->so_snd) > max((segsiz * (4 + rack_req_segs)),
11222 (rack->r_ctl.cwnd_to_use > max((segsiz * (rack_req_segs + 2)), minseg)) &&
11223 (tp->snd_wnd > max((segsiz * (rack_req_segs + 2)), minseg)) &&
11224 (ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked) <=
11235 rack->rc_dragged_bottom = 1;
11236 rack_increase_bw_mul(rack, -1, 0, 0, 1);
11247 do_log = tcp_bblogging_on(rack->rc_tp);
11249 if ((do_log = tcp_bblogging_point_on(rack->rc_tp, TCP_BBPOINT_REQ_LEVEL_LOGGING) )== 0)
11270 log.u_bbr.flex2 = cur->start_seq;
11271 log.u_bbr.flex3 = cur->end_seq;
11272 log.u_bbr.flex4 = (uint32_t)((cur->localtime >> 32) & 0x00000000ffffffff);
11273 log.u_bbr.flex5 = (uint32_t)(cur->localtime & 0x00000000ffffffff);
11274 log.u_bbr.flex6 = cur->flags;
11275 log.u_bbr.pkts_out = cur->hybrid_flags;
11276 log.u_bbr.rttProp = cur->timestamp;
11277 log.u_bbr.cur_del_rate = cur->cspr;
11278 log.u_bbr.bw_inuse = cur->start;
11279 log.u_bbr.applimited = (uint32_t)(cur->end & 0x00000000ffffffff);
11280 log.u_bbr.delivered = (uint32_t)((cur->end >> 32) & 0x00000000ffffffff) ;
11281 log.u_bbr.epoch = (uint32_t)(cur->deadline & 0x00000000ffffffff);
11282 log.u_bbr.lt_epoch = (uint32_t)((cur->deadline >> 32) & 0x00000000ffffffff) ;
11285 off = (uint64_t)(cur) - (uint64_t)(&rack->rc_tp->t_tcpreq_info[0]);
11294 log.u_bbr.flex7 = rack->rc_catch_up;
11296 log.u_bbr.flex7 |= rack->rc_hybrid_mode;
11298 log.u_bbr.flex7 |= rack->dgp_on;
11306 log.u_bbr.bbr_state = rack->rc_always_pace;
11308 log.u_bbr.bbr_state |= rack->dgp_on;
11310 log.u_bbr.bbr_state |= rack->rc_hybrid_mode;
11312 log.u_bbr.bbr_state |= rack->use_fixed_rate;
11314 log.u_bbr.delRate = rack->r_ctl.bw_rate_cap;
11315 log.u_bbr.bbr_substate = rack->r_ctl.client_suggested_maxseg;
11316 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
11317 log.u_bbr.pkt_epoch = rack->rc_tp->tcp_hybrid_start;
11318 log.u_bbr.lost = rack->rc_tp->tcp_hybrid_error;
11319 log.u_bbr.pacing_gain = (uint16_t)rack->rc_tp->tcp_hybrid_stop;
11320 tcp_log_event(rack->rc_tp, NULL,
11321 &rack->rc_inp->inp_socket->so_rcv,
11322 &rack->rc_inp->inp_socket->so_snd,
11337 orig_ent = rack->r_ctl.rc_last_sft;
11338 rc_cur = tcp_req_find_req_for_seq(rack->rc_tp, seq);
11341 if (rack->rc_hybrid_mode)
11343 rc_cur = tcp_req_find_req_for_seq(rack->rc_tp, (seq + len - 1));
11350 if (rack->rc_hybrid_mode) {
11351 rack->r_ctl.client_suggested_maxseg = 0;
11352 rack->rc_catch_up = 0;
11353 if (rack->cspr_is_fcc == 0)
11354 rack->r_ctl.bw_rate_cap = 0;
11356 rack->r_ctl.fillcw_cap = rack_fillcw_bw_cap;
11358 if (rack->rc_hybrid_mode) {
11359 rack_log_hybrid(rack, (seq + len - 1), NULL, HYBRID_LOG_NO_RANGE, __LINE__, err);
11361 if (rack->r_ctl.rc_last_sft) {
11362 rack->r_ctl.rc_last_sft = NULL;
11366 if ((rc_cur->hybrid_flags & TCP_HYBRID_PACING_WASSET) == 0) {
11368 if (rack->rc_hybrid_mode) {
11369 rack->r_ctl.client_suggested_maxseg = 0;
11370 rack->rc_catch_up = 0;
11371 rack->r_ctl.bw_rate_cap = 0;
11373 if (rack->r_ctl.rc_last_sft) {
11374 rack->r_ctl.rc_last_sft = NULL;
11376 if ((rc_cur->flags & TCP_TRK_TRACK_FLG_FSND) == 0) {
11377 rc_cur->flags |= TCP_TRK_TRACK_FLG_FSND;
11378 rc_cur->first_send = cts;
11379 rc_cur->sent_at_fs = rack->rc_tp->t_sndbytes;
11380 rc_cur->rxt_at_fs = rack->rc_tp->t_snd_rxt_bytes;
11391 tp = rack->rc_tp;
11392 if ((rack->r_ctl.rc_last_sft != NULL) &&
11393 (rack->r_ctl.rc_last_sft == rc_cur)) {
11395 if (rack->rc_hybrid_mode)
11399 if (rack->rc_hybrid_mode == 0) {
11400 rack->r_ctl.rc_last_sft = rc_cur;
11402 orig_ent->sent_at_ls = rack->rc_tp->t_sndbytes;
11403 orig_ent->rxt_at_ls = rack->rc_tp->t_snd_rxt_bytes;
11404 orig_ent->flags |= TCP_TRK_TRACK_FLG_LSND;
11409 if ((rc_cur->hybrid_flags & TCP_HYBRID_PACING_CSPR) && rc_cur->cspr){
11411 if (rack->cspr_is_fcc == 0)
11412 rack->r_ctl.bw_rate_cap = rack_compensate_for_linerate(rack, rc_cur->cspr);
11414 rack->r_ctl.fillcw_cap = rack_compensate_for_linerate(rack, rc_cur->cspr);
11416 if (rack->rc_hybrid_mode) {
11417 if (rack->cspr_is_fcc == 0)
11418 rack->r_ctl.bw_rate_cap = 0;
11420 rack->r_ctl.fillcw_cap = rack_fillcw_bw_cap;
11423 if (rc_cur->hybrid_flags & TCP_HYBRID_PACING_H_MS)
11424 rack->r_ctl.client_suggested_maxseg = rc_cur->hint_maxseg;
11426 rack->r_ctl.client_suggested_maxseg = 0;
11427 if (rc_cur->timestamp == rack->r_ctl.last_tm_mark) {
11431 * sendtime not arrival time for catch-up mode.
11433 rc_cur->hybrid_flags |= TCP_HYBRID_PACING_SENDTIME;
11435 if ((rc_cur->hybrid_flags & TCP_HYBRID_PACING_CU) &&
11436 (rc_cur->cspr > 0)) {
11439 rack->rc_catch_up = 1;
11444 if (rc_cur->hybrid_flags & TCP_HYBRID_PACING_SENDTIME) {
11450 rc_cur->deadline = cts;
11456 rc_cur->deadline = rc_cur->localtime;
11462 len = rc_cur->end - rc_cur->start;
11463 if (tp->t_inpcb.inp_socket->so_snd.sb_tls_info) {
11468 len += tcp_estimate_tls_overhead(tp->t_inpcb.inp_socket, len);
11478 len /= rc_cur->cspr;
11479 rc_cur->deadline += len;
11481 rack->rc_catch_up = 0;
11482 rc_cur->deadline = 0;
11484 if (rack->r_ctl.client_suggested_maxseg != 0) {
11492 orig_ent->sent_at_ls = rack->rc_tp->t_sndbytes;
11493 orig_ent->rxt_at_ls = rack->rc_tp->t_snd_rxt_bytes;
11494 orig_ent->flags |= TCP_TRK_TRACK_FLG_LSND;
11498 rack->r_ctl.rc_last_sft = rc_cur;
11499 rack->r_ctl.last_tm_mark = rc_cur->timestamp;
11509 ent = rack->r_ctl.rc_last_sft;
11511 (ent->flags == TCP_TRK_TRACK_FLG_EMPTY) ||
11512 (SEQ_GEQ(seq, ent->end_seq))) {
11515 ent = rack->r_ctl.rc_last_sft;
11521 if (SEQ_LT(ent->end_seq, (seq + len))) {
11532 ent->end_seq = (seq + len);
11533 if (rack->rc_hybrid_mode)
11537 if ((ent->flags & TCP_TRK_TRACK_FLG_FSND) == 0) {
11538 ent->flags |= TCP_TRK_TRACK_FLG_FSND;
11539 ent->first_send = cts;
11540 ent->sent_at_fs = rack->rc_tp->t_sndbytes;
11541 ent->rxt_at_fs = rack->rc_tp->t_snd_rxt_bytes;
11570 new_total = acked_amount + rack->r_ctl.fsb.left_to_send;
11571 gating_val = min((sbavail(&so->so_snd) - (tp->snd_max - tp->snd_una)),
11572 (tp->snd_wnd - (tp->snd_max - tp->snd_una)));
11576 rack->r_ctl.fsb.left_to_send = new_total;
11577 KASSERT((rack->r_ctl.fsb.left_to_send <= (sbavail(&rack->rc_inp->inp_socket->so_snd) - (tp->snd_max - tp->snd_una))),
11578 ("rack:%p left_to_send:%u sbavail:%u out:%u",
11579 rack, rack->r_ctl.fsb.left_to_send,
11580 sbavail(&rack->rc_inp->inp_socket->so_snd),
11581 (tp->snd_max - tp->snd_una)));
11620 snd_una = rack->rc_tp->snd_una;
11622 m = sb->sb_mb;
11623 rsm = tqhash_min(rack->r_ctl.tqh);
11629 KASSERT((rsm->m == m),
11630 ("Rack:%p sb:%p rsm:%p -- first rsm mbuf not aligned to sb",
11632 while (rsm->m && (rsm->m == m)) {
11638 tm = sbsndmbuf(sb, (rsm->r_start - snd_una), &soff);
11639 if ((rsm->orig_m_len != m->m_len) ||
11640 (rsm->orig_t_space != M_TRAILINGROOM(m))){
11644 KASSERT((rsm->soff == 0),
11645 ("Rack:%p rsm:%p -- rsm at head but soff not zero",
11649 if ((rsm->soff != soff) || (rsm->m != tm)) {
11658 rsm->m = tm;
11659 rsm->soff = soff;
11661 rsm->orig_m_len = rsm->m->m_len;
11662 rsm->orig_t_space = M_TRAILINGROOM(rsm->m);
11664 rsm->orig_m_len = 0;
11665 rsm->orig_t_space = 0;
11668 rsm->m = sbsndmbuf(sb, (rsm->r_start - snd_una), &rsm->soff);
11669 if (rsm->m) {
11670 rsm->orig_m_len = rsm->m->m_len;
11671 rsm->orig_t_space = M_TRAILINGROOM(rsm->m);
11673 rsm->orig_m_len = 0;
11674 rsm->orig_t_space = 0;
11677 rsm = tqhash_next(rack->r_ctl.tqh, rsm);
11690 if ((rack->rc_hybrid_mode == 0) &&
11691 (tcp_bblogging_point_on(rack->rc_tp, TCP_BBPOINT_REQ_LEVEL_LOGGING) == 0)) {
11696 tcp_req_check_for_comp(rack->rc_tp, th_ack);
11706 ent = tcp_req_find_a_req_that_is_completed_by(rack->rc_tp, th_ack, &i);
11720 data = ent->end - ent->start;
11721 laa = tcp_tv_to_lusectick(&rack->r_ctl.act_rcv_time);
11722 if (ent->flags & TCP_TRK_TRACK_FLG_FSND) {
11723 if (ent->first_send > ent->localtime)
11724 ftim = ent->first_send;
11726 ftim = ent->localtime;
11729 ftim = ent->localtime;
11731 if (laa > ent->localtime)
11732 tim = laa - ftim;
11746 if (ent == rack->r_ctl.rc_last_sft) {
11747 rack->r_ctl.rc_last_sft = NULL;
11748 if (rack->rc_hybrid_mode) {
11749 rack->rc_catch_up = 0;
11750 if (rack->cspr_is_fcc == 0)
11751 rack->r_ctl.bw_rate_cap = 0;
11753 rack->r_ctl.fillcw_cap = rack_fillcw_bw_cap;
11754 rack->r_ctl.client_suggested_maxseg = 0;
11758 tcp_req_log_req_info(rack->rc_tp, ent,
11761 tcp_req_free_a_slot(rack->rc_tp, ent);
11762 ent = tcp_req_find_a_req_that_is_completed_by(rack->rc_tp, th_ack, &i);
11771 * For ret_val if its 0 the TCP is locked, if its non-zero
11791 rack = (struct tcp_rack *)tp->t_fb_ptr;
11792 if (SEQ_GEQ(tp->snd_una, tp->iss + (65535 << tp->snd_scale))) {
11794 tp->t_flags2 |= TF2_NO_ISS_CHECK;
11800 if (tp->t_flags2 & TF2_NO_ISS_CHECK) {
11802 seq_min = tp->snd_una - tp->max_sndwnd;
11805 if (SEQ_GT(tp->iss + 1, tp->snd_una - tp->max_sndwnd)) {
11807 seq_min = tp->iss + 1;
11814 seq_min = tp->snd_una - tp->max_sndwnd;
11818 if (SEQ_LT(th->th_ack, seq_min)) {
11825 rack->r_wanted_output = 1;
11829 if (SEQ_GT(th->th_ack, tp->snd_max)) {
11831 rack->r_wanted_output = 1;
11834 if (rack->gp_ready &&
11835 (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT)) {
11838 if (SEQ_GEQ(th->th_ack, tp->snd_una) || to->to_nsacks) {
11842 in_rec = IN_FASTRECOVERY(tp->t_flags);
11843 if (rack->rc_in_persist) {
11844 tp->t_rxtshift = 0;
11845 RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
11846 rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
11849 if ((th->th_ack == tp->snd_una) &&
11850 (tiwin == tp->snd_wnd) &&
11852 ((to->to_flags & TOF_SACK) == 0)) {
11853 rack_strike_dupack(rack, th->th_ack);
11856 rack_log_ack(tp, to, th, ((in_rec == 0) && IN_FASTRECOVERY(tp->t_flags)),
11860 if (__predict_false(SEQ_LEQ(th->th_ack, tp->snd_una))) {
11866 if (SEQ_LT(th->th_ack, tp->snd_una) && (sbspace(&so->so_rcv) > ctf_fixed_maxseg(tp))) {
11867 rack->r_ctl.rc_reorder_ts = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time);
11868 if (rack->r_ctl.rc_reorder_ts == 0)
11869 rack->r_ctl.rc_reorder_ts = 1;
11877 if (tp->t_flags & TF_NEEDSYN) {
11879 * T/TCP: Connection was half-synchronized, and our SYN has
11881 * to non-starred state, increment snd_una for ACK of SYN,
11882 * and check if we can do window scaling.
11884 tp->t_flags &= ~TF_NEEDSYN;
11885 tp->snd_una++;
11886 /* Do window scaling? */
11887 if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
11889 tp->rcv_scale = tp->request_r_scale;
11893 nsegs = max(1, m->m_pkthdr.lro_nsegs);
11898 * Any time we move the cum-ack forward clear
11899 * keep-alive tied probe-not-answered. The
11902 rack->probe_not_answered = 0;
11912 if ((tp->t_flags & TF_PREVVALID) &&
11913 ((tp->t_flags & TF_RCVD_TSTMP) == 0)) {
11914 tp->t_flags &= ~TF_PREVVALID;
11915 if (tp->t_rxtshift == 1 &&
11916 (int)(ticks - tp->t_badrxtwin) < 0)
11917 rack_cong_signal(tp, CC_RTO_ERR, th->th_ack, __LINE__);
11921 tp->t_rxtshift = 0;
11922 RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
11923 rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
11924 rack->rc_tlp_in_progress = 0;
11925 rack->r_ctl.rc_tlp_cnt_out = 0;
11930 if (rack->r_ctl.rc_hpts_flags & PACE_TMR_RXT)
11931 rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
11933 rack_req_check_for_comp(rack, th->th_ack);
11952 * (possibly backed-off) value.
11959 if (IN_RECOVERY(tp->t_flags)) {
11960 if (SEQ_LT(th->th_ack, tp->snd_recover) &&
11961 (SEQ_LT(th->th_ack, tp->snd_max))) {
11964 rack_post_recovery(tp, th->th_ack);
11971 p_cwnd = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
11973 p_cwnd += tp->snd_cwnd;
11975 } else if ((rack->rto_from_rec == 1) &&
11976 SEQ_GEQ(th->th_ack, tp->snd_recover)) {
11979 * and never re-entered recovery. The timeout(s)
11983 rack->rto_from_rec = 0;
11990 rack_ack_received(tp, rack, th->th_ack, nsegs, CC_ACK, post_recovery);
11992 (tp->snd_cwnd > p_cwnd)) {
11993 /* Must be non-newreno (cubic) getting too ahead of itself */
11994 tp->snd_cwnd = p_cwnd;
11997 acked_amount = min(acked, (int)sbavail(&so->so_snd));
11998 tp->snd_wnd -= acked_amount;
11999 mfree = sbcut_locked(&so->so_snd, acked_amount);
12000 if ((sbused(&so->so_snd) == 0) &&
12002 (tp->t_state >= TCPS_FIN_WAIT_1) &&
12003 (tp->t_flags & TF_SENTFIN)) {
12012 tp->snd_una = th->th_ack;
12014 if (acked_amount && sbavail(&so->so_snd))
12015 rack_adjust_sendmap_head(rack, &so->so_snd);
12016 rack_log_wakeup(tp,rack, &so->so_snd, acked, 2);
12020 if (SEQ_GT(tp->snd_una, tp->snd_recover))
12021 tp->snd_recover = tp->snd_una;
12023 if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
12024 tp->snd_nxt = tp->snd_max;
12027 (rack->use_fixed_rate == 0) &&
12028 (rack->in_probe_rtt == 0) &&
12029 rack->rc_gp_dyn_mul &&
12030 rack->rc_always_pace) {
12034 if (tp->snd_una == tp->snd_max) {
12036 tp->t_flags &= ~TF_PREVVALID;
12037 if (rack->r_ctl.rc_went_idle_time == 0)
12038 rack->r_ctl.rc_went_idle_time = 1;
12039 rack->r_ctl.retran_during_recovery = 0;
12040 rack->r_ctl.dsack_byte_cnt = 0;
12042 if (sbavail(&tptosocket(tp)->so_snd) == 0)
12043 tp->t_acktime = 0;
12044 rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
12045 rack->rc_suspicious = 0;
12047 rack->r_wanted_output = 1;
12048 sack_filter_clear(&rack->r_ctl.rack_sf, tp->snd_una);
12049 if ((tp->t_state >= TCPS_FIN_WAIT_1) &&
12050 (sbavail(&so->so_snd) == 0) &&
12051 (tp->t_flags2 & TF2_DROP_AF_DATA)) {
12058 /* tcp_close will kill the inp pre-log the Reset */
12075 if (tcp_bblogging_on(rack->rc_tp)) {
12084 log.u_bbr.flex5 = rack->r_must_retran;
12086 log.u_bbr.flex7 = rack->rc_has_collapsed;
12096 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
12097 TCP_LOG_EVENTP(rack->rc_tp, NULL,
12098 &rack->rc_inp->inp_socket->so_rcv,
12099 &rack->rc_inp->inp_socket->so_snd,
12114 tcp_trace_point(rack->rc_tp, TCP_TP_COLLAPSED_WND);
12115 if ((rack->rc_has_collapsed == 0) ||
12116 (rack->r_ctl.last_collapse_point != (th_ack + rack->rc_tp->snd_wnd)))
12118 rack->r_ctl.last_collapse_point = th_ack + rack->rc_tp->snd_wnd;
12119 rack->r_ctl.high_collapse_point = rack->rc_tp->snd_max;
12120 rack->rc_has_collapsed = 1;
12121 rack->r_collapse_point_valid = 1;
12122 rack_log_collapse(rack, 0, th_ack, rack->r_ctl.last_collapse_point, line, 1, 0, NULL);
12133 tcp_trace_point(rack->rc_tp, TCP_TP_COLLAPSED_WND);
12134 rack->rc_has_collapsed = 0;
12135 rsm = tqhash_find(rack->r_ctl.tqh, rack->r_ctl.last_collapse_point);
12138 rack_log_collapse(rack, 0, 0, ctf_outstanding(rack->rc_tp), line, 0, 0, NULL);
12142 if (SEQ_GT(rack->r_ctl.last_collapse_point, rsm->r_start)) {
12143 rack_log_collapse(rack, rsm->r_start, rsm->r_end,
12144 rack->r_ctl.last_collapse_point, line, 3, rsm->r_flags, rsm);
12153 rack_clone_rsm(rack, nrsm, rsm, rack->r_ctl.last_collapse_point);
12155 (void)tqhash_insert(rack->r_ctl.tqh, nrsm);
12157 if ((insret = tqhash_insert(rack->r_ctl.tqh, nrsm)) != 0) {
12158 panic("Insert in tailq_hash of %p fails ret:%d rack:%p rsm:%p",
12162 rack_log_map_chg(rack->rc_tp, rack, NULL, rsm, nrsm, MAP_SPLIT,
12163 rack->r_ctl.last_collapse_point, __LINE__);
12164 if (rsm->r_in_tmap) {
12165 TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, rsm, nrsm, r_tnext);
12166 nrsm->r_in_tmap = 1;
12176 TQHASH_FOREACH_FROM(nrsm, rack->r_ctl.tqh, rsm) {
12178 nrsm->r_flags |= RACK_RWND_COLLAPSED;
12179 rack_log_collapse(rack, nrsm->r_start, nrsm->r_end, 0, line, 4, nrsm->r_flags, nrsm);
12185 rack_log_collapse(rack, cnt, split, ctf_outstanding(rack->rc_tp), line, 0, 0, NULL);
12194 rack->r_ctl.rc_rcvtime, __LINE__);
12195 tp->t_flags |= TF_DELACK;
12197 rack->r_wanted_output = 1;
12198 tp->t_flags |= TF_ACKNOW;
12210 if (rack->r_fast_output) {
12218 if ((out + rack->r_ctl.fsb.left_to_send) > tp->snd_wnd) {
12220 if (out >= tp->snd_wnd) {
12222 rack->r_fast_output = 0;
12225 rack->r_ctl.fsb.left_to_send = tp->snd_wnd - out;
12226 if (rack->r_ctl.fsb.left_to_send < ctf_fixed_maxseg(tp)) {
12228 rack->r_fast_output = 0;
12255 rack = (struct tcp_rack *)tp->t_fb_ptr;
12256 nsegs = max(1, m->m_pkthdr.lro_nsegs);
12258 (SEQ_LT(tp->snd_wl1, th->th_seq) ||
12259 (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) ||
12260 (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
12263 tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
12265 tp->snd_wnd = tiwin;
12267 tp->snd_wl1 = th->th_seq;
12268 tp->snd_wl2 = th->th_ack;
12269 if (tp->snd_wnd > tp->max_sndwnd)
12270 tp->max_sndwnd = tp->snd_wnd;
12271 rack->r_wanted_output = 1;
12273 if ((tp->snd_wl2 == th->th_ack) && (tiwin < tp->snd_wnd)) {
12274 tp->snd_wnd = tiwin;
12276 tp->snd_wl1 = th->th_seq;
12277 tp->snd_wl2 = th->th_ack;
12280 if (tp->snd_wnd < ctf_outstanding(tp))
12282 rack_collapsed_window(rack, ctf_outstanding(tp), th->th_ack, __LINE__);
12283 else if (rack->rc_has_collapsed)
12285 if ((rack->r_collapse_point_valid) &&
12286 (SEQ_GT(th->th_ack, rack->r_ctl.high_collapse_point)))
12287 rack->r_collapse_point_valid = 0;
12289 if ((rack->rc_in_persist != 0) &&
12290 (tp->snd_wnd >= min((rack->r_ctl.rc_high_rwnd/2),
12291 rack->r_ctl.rc_pace_min_segs))) {
12292 rack_exit_persist(tp, rack, rack->r_ctl.rc_rcvtime);
12293 tp->snd_nxt = tp->snd_max;
12295 rack->r_wanted_output = 1;
12298 if ((rack->rc_in_persist == 0) &&
12299 (tp->snd_wnd < min((rack->r_ctl.rc_high_rwnd/2), rack->r_ctl.rc_pace_min_segs)) &&
12300 TCPS_HAVEESTABLISHED(tp->t_state) &&
12301 ((tp->snd_max == tp->snd_una) || rack->rc_has_collapsed) &&
12302 sbavail(&tptosocket(tp)->so_snd) &&
12303 (sbavail(&tptosocket(tp)->so_snd) > tp->snd_wnd)) {
12310 rack_enter_persist(tp, rack, rack->r_ctl.rc_rcvtime, tp->snd_una);
12312 if (tp->t_flags2 & TF2_DROP_AF_DATA) {
12320 tp->rcv_up = tp->rcv_nxt;
12325 * This process logically involves adjusting tp->rcv_wnd as data is
12330 tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
12331 (tp->t_flags & TF_FASTOPEN));
12333 TCPS_HAVERCVDFIN(tp->t_state) == 0) {
12334 tcp_seq save_start = th->th_seq;
12335 tcp_seq save_rnxt = tp->rcv_nxt;
12350 if (th->th_seq == tp->rcv_nxt &&
12352 (TCPS_HAVEESTABLISHED(tp->t_state) ||
12357 if (so->so_rcv.sb_shlim) {
12360 if (counter_fo_get(so->so_rcv.sb_shlim, mcnt,
12369 tp->rcv_nxt += tlen;
12371 ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) &&
12372 (tp->t_fbyte_in == 0)) {
12373 tp->t_fbyte_in = ticks;
12374 if (tp->t_fbyte_in == 0)
12375 tp->t_fbyte_in = 1;
12376 if (tp->t_fbyte_out && tp->t_fbyte_in)
12377 tp->t_flags2 |= TF2_FBYTES_COMPLETE;
12383 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
12392 so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
12397 sbappendstream_locked(&so->so_rcv, m, 0);
12399 rack_log_wakeup(tp,rack, &so->so_rcv, tlen, 1);
12403 if (so->so_rcv.sb_shlim && appended != mcnt)
12404 counter_fo_release(so->so_rcv.sb_shlim,
12405 mcnt - appended);
12417 tp->t_flags |= TF_ACKNOW;
12418 if (tp->t_flags & TF_WAKESOR) {
12419 tp->t_flags &= ~TF_WAKESOR;
12424 if ((tp->t_flags & TF_SACK_PERMIT) &&
12426 TCPS_HAVEESTABLISHED(tp->t_state)) {
12434 } else if ((tlen > 0) && SEQ_GT(tp->rcv_nxt, save_rnxt)) {
12435 if ((tp->rcv_numsacks >= 1) &&
12436 (tp->sackblks[0].end == save_start)) {
12442 tp->sackblks[0].start,
12443 tp->sackblks[0].end);
12467 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
12471 * If connection is half-synchronized (ie NEEDSYN
12477 if (tp->t_flags & TF_NEEDSYN) {
12479 rack->r_ctl.rc_rcvtime, __LINE__);
12480 tp->t_flags |= TF_DELACK;
12482 tp->t_flags |= TF_ACKNOW;
12484 tp->rcv_nxt++;
12486 switch (tp->t_state) {
12492 tp->t_starttime = ticks;
12496 rack->r_ctl.rc_rcvtime, __LINE__);
12506 rack->r_ctl.rc_rcvtime, __LINE__);
12512 * starting the time-wait timer, turning off the
12517 rack->r_ctl.rc_rcvtime, __LINE__);
12525 if ((tp->t_flags & TF_ACKNOW) ||
12526 (sbavail(&so->so_snd) > (tp->snd_max - tp->snd_una))) {
12527 rack->r_wanted_output = 1;
12534 * have broken out the fast-data path also just like
12535 * the fast-ack.
12543 int32_t newsize = 0; /* automatic sockbuf scaling */
12554 if (__predict_false(th->th_seq != tp->rcv_nxt)) {
12557 if (tiwin && tiwin != tp->snd_wnd) {
12560 if (__predict_false((tp->t_flags & (TF_NEEDSYN | TF_NEEDFIN)))) {
12563 if (__predict_false((to->to_flags & TOF_TS) &&
12564 (TSTMP_LT(to->to_tsval, tp->ts_recent)))) {
12567 if (__predict_false((th->th_ack != tp->snd_una))) {
12570 if (__predict_false(tlen > sbspace(&so->so_rcv))) {
12573 if ((to->to_flags & TOF_TS) != 0 &&
12574 SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
12575 tp->ts_recent_age = tcp_ts_getticks();
12576 tp->ts_recent = to->to_tsval;
12578 rack = (struct tcp_rack *)tp->t_fb_ptr;
12580 * This is a pure, in-sequence data packet with nothing on the
12583 nsegs = max(1, m->m_pkthdr.lro_nsegs);
12586 if (so->so_rcv.sb_shlim) {
12589 if (counter_fo_get(so->so_rcv.sb_shlim, mcnt,
12598 if (tp->rcv_numsacks)
12601 tp->rcv_nxt += tlen;
12603 ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) &&
12604 (tp->t_fbyte_in == 0)) {
12605 tp->t_fbyte_in = ticks;
12606 if (tp->t_fbyte_in == 0)
12607 tp->t_fbyte_in = 1;
12608 if (tp->t_fbyte_out && tp->t_fbyte_in)
12609 tp->t_flags2 |= TF2_FBYTES_COMPLETE;
12614 tp->snd_wl1 = th->th_seq;
12618 tp->rcv_up = tp->rcv_nxt;
12625 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
12634 so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
12639 sbappendstream_locked(&so->so_rcv, m, 0);
12642 rack_log_wakeup(tp,rack, &so->so_rcv, tlen, 1);
12646 if (so->so_rcv.sb_shlim && mcnt != appended)
12647 counter_fo_release(so->so_rcv.sb_shlim, mcnt - appended);
12650 if (tp->snd_una == tp->snd_max)
12651 sack_filter_clear(&rack->r_ctl.rack_sf, tp->snd_una);
12658 * in sequence to remain in the fast-path. We also add
12662 * slow-path.
12674 if (__predict_false(SEQ_LEQ(th->th_ack, tp->snd_una))) {
12678 if (__predict_false(SEQ_GT(th->th_ack, tp->snd_max))) {
12686 if (__predict_false(tp->t_flags & (TF_NEEDSYN | TF_NEEDFIN))) {
12690 if ((to->to_flags & TOF_TS) && __predict_false(TSTMP_LT(to->to_tsval, tp->ts_recent))) {
12694 if (__predict_false(IN_RECOVERY(tp->t_flags))) {
12698 rack = (struct tcp_rack *)tp->t_fb_ptr;
12699 if (rack->r_ctl.rc_sacked) {
12703 /* Ok if we reach here, we can process a fast-ack */
12704 if (rack->gp_ready &&
12705 (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT)) {
12708 nsegs = max(1, m->m_pkthdr.lro_nsegs);
12711 if (tiwin != tp->snd_wnd) {
12712 tp->snd_wnd = tiwin;
12714 tp->snd_wl1 = th->th_seq;
12715 if (tp->snd_wnd > tp->max_sndwnd)
12716 tp->max_sndwnd = tp->snd_wnd;
12719 if ((rack->rc_in_persist != 0) &&
12720 (tp->snd_wnd >= min((rack->r_ctl.rc_high_rwnd/2),
12721 rack->r_ctl.rc_pace_min_segs))) {
12725 if ((rack->rc_in_persist == 0) &&
12726 (tp->snd_wnd < min((rack->r_ctl.rc_high_rwnd/2), rack->r_ctl.rc_pace_min_segs)) &&
12727 TCPS_HAVEESTABLISHED(tp->t_state) &&
12728 ((tp->snd_max == tp->snd_una) || rack->rc_has_collapsed) &&
12729 sbavail(&tptosocket(tp)->so_snd) &&
12730 (sbavail(&tptosocket(tp)->so_snd) > tp->snd_wnd)) {
12737 rack_enter_persist(tp, rack, rack->r_ctl.rc_rcvtime, th->th_ack);
12744 if ((to->to_flags & TOF_TS) != 0 &&
12745 SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
12746 tp->ts_recent_age = tcp_ts_getticks();
12747 tp->ts_recent = to->to_tsval;
12757 if ((tp->t_flags & TF_PREVVALID) &&
12758 ((tp->t_flags & TF_RCVD_TSTMP) == 0)) {
12759 tp->t_flags &= ~TF_PREVVALID;
12760 if (tp->t_rxtshift == 1 &&
12761 (int)(ticks - tp->t_badrxtwin) < 0)
12762 rack_cong_signal(tp, CC_RTO_ERR, th->th_ack, __LINE__);
12782 rack_ack_received(tp, rack, th->th_ack, nsegs, CC_ACK, 0);
12784 mfree = sbcut_locked(&so->so_snd, acked);
12785 tp->snd_una = th->th_ack;
12787 rack_adjust_sendmap_head(rack, &so->so_snd);
12789 rack_log_wakeup(tp,rack, &so->so_snd, acked, 2);
12792 tp->t_rxtshift = 0;
12793 RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
12794 rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
12795 rack->rc_tlp_in_progress = 0;
12796 rack->r_ctl.rc_tlp_cnt_out = 0;
12801 if (rack->r_ctl.rc_hpts_flags & PACE_TMR_RXT)
12802 rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
12805 rack_req_check_for_comp(rack, th->th_ack);
12813 if (tp->snd_wnd < ctf_outstanding(tp)) {
12815 rack_collapsed_window(rack, ctf_outstanding(tp), th->th_ack, __LINE__);
12816 } else if (rack->rc_has_collapsed)
12818 if ((rack->r_collapse_point_valid) &&
12819 (SEQ_GT(tp->snd_una, rack->r_ctl.high_collapse_point)))
12820 rack->r_collapse_point_valid = 0;
12824 tp->snd_wl2 = th->th_ack;
12825 tp->t_dupacks = 0;
12831 * otherwise restart timer using current (possibly backed-off)
12837 (rack->use_fixed_rate == 0) &&
12838 (rack->in_probe_rtt == 0) &&
12839 rack->rc_gp_dyn_mul &&
12840 rack->rc_always_pace) {
12844 if (tp->snd_una == tp->snd_max) {
12845 tp->t_flags &= ~TF_PREVVALID;
12846 rack->r_ctl.retran_during_recovery = 0;
12847 rack->rc_suspicious = 0;
12848 rack->r_ctl.dsack_byte_cnt = 0;
12849 rack->r_ctl.rc_went_idle_time = tcp_get_usecs(NULL);
12850 if (rack->r_ctl.rc_went_idle_time == 0)
12851 rack->r_ctl.rc_went_idle_time = 1;
12853 if (sbavail(&tptosocket(tp)->so_snd) == 0)
12854 tp->t_acktime = 0;
12855 rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
12857 if (acked && rack->r_fast_output)
12859 if (sbavail(&so->so_snd)) {
12860 rack->r_wanted_output = 1;
12888 * this is an acceptable SYN segment initialize tp->rcv_nxt and
12889 * tp->irs if seg contains ack then advance tp->snd_una if seg
12896 (SEQ_LEQ(th->th_ack, tp->iss) ||
12897 SEQ_GT(th->th_ack, tp->snd_max))) {
12917 tp->irs = th->th_seq;
12919 rack = (struct tcp_rack *)tp->t_fb_ptr;
12928 /* Do window scaling on this connection? */
12929 if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
12931 tp->rcv_scale = tp->request_r_scale;
12933 tp->rcv_adv += min(tp->rcv_wnd,
12934 TCP_MAXWIN << tp->rcv_scale);
12939 if ((tp->t_flags & TF_FASTOPEN) &&
12940 (tp->snd_una != tp->snd_max)) {
12942 if (SEQ_LT(th->th_ack, tp->snd_max))
12951 rack->r_ctl.rc_rcvtime, __LINE__);
12952 tp->t_flags |= TF_DELACK;
12954 rack->r_wanted_output = 1;
12955 tp->t_flags |= TF_ACKNOW;
12960 if (SEQ_GT(th->th_ack, tp->snd_una)) {
12966 * ack-processing since the
12967 * data stream in our send-map
12973 tp->snd_una++;
12974 if (tfo_partial && (SEQ_GT(tp->snd_max, tp->snd_una))) {
12983 rsm = tqhash_min(rack->r_ctl.tqh);
12985 if (rsm->r_flags & RACK_HAS_SYN) {
12986 rsm->r_flags &= ~RACK_HAS_SYN;
12987 rsm->r_start++;
12989 rack->r_ctl.rc_resend = rsm;
12995 * SYN_SENT --> ESTABLISHED SYN_SENT* --> FIN_WAIT_1
12997 tp->t_starttime = ticks;
12998 if (tp->t_flags & TF_NEEDFIN) {
13000 tp->t_flags &= ~TF_NEEDFIN;
13010 * Received initial SYN in SYN-SENT[*] state => simultaneous
13013 * half-synchronized. Otherwise, do 3-way handshake:
13014 * SYN-SENT -> SYN-RECEIVED SYN-SENT* -> SYN-RECEIVED* If
13017 tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN | TF_SONOTCONN);
13021 * Advance th->th_seq to correspond to first data byte. If data,
13024 th->th_seq++;
13025 if (tlen > tp->rcv_wnd) {
13026 todrop = tlen - tp->rcv_wnd;
13027 m_adj(m, -todrop);
13028 tlen = tp->rcv_wnd;
13033 tp->snd_wl1 = th->th_seq - 1;
13034 tp->rcv_up = th->th_seq;
13042 /* For syn-sent we need to possibly update the rtt */
13043 if ((to->to_flags & TOF_TS) != 0 && to->to_tsecr) {
13047 t = (mcts - to->to_tsecr) * HPTS_USEC_IN_MSEC;
13048 if (!tp->t_rttlow || tp->t_rttlow > t)
13049 tp->t_rttlow = t;
13050 rack_log_rtt_sample_calc(rack, t, (to->to_tsecr * 1000), (mcts * 1000), 4);
13057 if (tp->t_state == TCPS_FIN_WAIT_1) {
13074 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
13104 rack = (struct tcp_rack *)tp->t_fb_ptr;
13107 (tp->t_fin_is_rst && (thflags & TH_FIN)))
13110 (SEQ_LEQ(th->th_ack, tp->snd_una) ||
13111 SEQ_GT(th->th_ack, tp->snd_max))) {
13116 if (tp->t_flags & TF_FASTOPEN) {
13129 /* non-initial SYN is ignored */
13130 if ((rack->r_ctl.rc_hpts_flags & PACE_TMR_RXT) ||
13131 (rack->r_ctl.rc_hpts_flags & PACE_TMR_TLP) ||
13132 (rack->r_ctl.rc_hpts_flags & PACE_TMR_RACK)) {
13146 if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
13147 TSTMP_LT(to->to_tsval, tp->ts_recent)) {
13152 * In the SYN-RECEIVED state, validate that the packet belongs to
13158 if (SEQ_LT(th->th_seq, tp->irs)) {
13177 * p.869. In such cases, we can still calculate the RTT correctly
13180 if ((to->to_flags & TOF_TS) != 0 &&
13181 SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
13182 SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
13184 tp->ts_recent_age = tcp_ts_getticks();
13185 tp->ts_recent = to->to_tsval;
13187 tp->snd_wnd = tiwin;
13190 * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN flag
13191 * is on (half-synchronized state), then queue data for later
13195 if (tp->t_flags & TF_FASTOPEN) {
13202 if (tp->t_flags & TF_SONOTCONN) {
13203 tp->t_flags &= ~TF_SONOTCONN;
13206 /* Do window scaling? */
13207 if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
13209 tp->rcv_scale = tp->request_r_scale;
13212 * Make transitions: SYN-RECEIVED -> ESTABLISHED SYN-RECEIVED* ->
13213 * FIN-WAIT-1
13215 tp->t_starttime = ticks;
13216 if ((tp->t_flags & TF_FASTOPEN) && tp->t_tfo_pending) {
13217 tcp_fastopen_decrement_counter(tp->t_tfo_pending);
13218 tp->t_tfo_pending = NULL;
13220 if (tp->t_flags & TF_NEEDFIN) {
13222 tp->t_flags &= ~TF_NEEDFIN;
13233 if (!(tp->t_flags & TF_FASTOPEN))
13241 if (SEQ_GT(th->th_ack, tp->snd_una) && !(tp->t_flags & TF_NEEDSYN))
13242 tp->snd_una++;
13250 if (tp->t_flags & TF_WAKESOR) {
13251 tp->t_flags &= ~TF_WAKESOR;
13256 tp->snd_wl1 = th->th_seq - 1;
13257 /* For syn-recv we need to possibly update the rtt */
13258 if ((to->to_flags & TOF_TS) != 0 && to->to_tsecr) {
13262 t = (mcts - to->to_tsecr) * HPTS_USEC_IN_MSEC;
13263 if (!tp->t_rttlow || tp->t_rttlow > t)
13264 tp->t_rttlow = t;
13265 rack_log_rtt_sample_calc(rack, t, (to->to_tsecr * 1000), (mcts * 1000), 5);
13272 if (tp->t_state == TCPS_FIN_WAIT_1) {
13289 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
13319 * uni-directional data xfer. If the packet has no control flags,
13320 * is in-sequence, the window didn't change and we're not
13324 * waiting for space. If the length is non-zero and the ack didn't
13325 * move, we're the receiver side. If we're getting packets in-order
13328 * hidden state-flags are also off. Since we check for
13331 rack = (struct tcp_rack *)tp->t_fb_ptr;
13332 if (__predict_true(((to->to_flags & TOF_SACK) == 0)) &&
13335 __predict_true(th->th_seq == tp->rcv_nxt)) {
13338 tiwin, nxt_pkt, rack->r_ctl.rc_rcvtime)) {
13351 (tp->t_fin_is_rst && (thflags & TH_FIN)))
13366 if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
13367 TSTMP_LT(to->to_tsval, tp->ts_recent)) {
13385 * p.869. In such cases, we can still calculate the RTT correctly
13388 if ((to->to_flags & TOF_TS) != 0 &&
13389 SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
13390 SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
13392 tp->ts_recent_age = tcp_ts_getticks();
13393 tp->ts_recent = to->to_tsval;
13396 * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN flag
13397 * is on (half-synchronized state), then queue data for later
13401 if (tp->t_flags & TF_NEEDSYN) {
13405 } else if (tp->t_flags & TF_ACKNOW) {
13407 ((struct tcp_rack *)tp->t_fb_ptr)->r_wanted_output = 1;
13420 if (sbavail(&so->so_snd)) {
13447 (tp->t_fin_is_rst && (thflags & TH_FIN)))
13461 if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
13462 TSTMP_LT(to->to_tsval, tp->ts_recent)) {
13480 * p.869. In such cases, we can still calculate the RTT correctly
13483 if ((to->to_flags & TOF_TS) != 0 &&
13484 SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
13485 SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
13487 tp->ts_recent_age = tcp_ts_getticks();
13488 tp->ts_recent = to->to_tsval;
13491 * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN flag
13492 * is on (half-synchronized state), then queue data for later
13496 if (tp->t_flags & TF_NEEDSYN) {
13500 } else if (tp->t_flags & TF_ACKNOW) {
13502 ((struct tcp_rack *)tp->t_fb_ptr)->r_wanted_output = 1;
13515 if (sbavail(&so->so_snd)) {
13517 rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr,
13533 rack = (struct tcp_rack *)tp->t_fb_ptr;
13534 if (rack->rc_allow_data_af_clo == 0) {
13537 /* tcp_close will kill the inp pre-log the Reset */
13544 if (sbavail(&so->so_snd) == 0)
13548 tp->rcv_nxt = th->th_seq + *tlen;
13549 tp->t_flags2 |= TF2_DROP_AF_DATA;
13550 rack->r_wanted_output = 1;
13572 (tp->t_fin_is_rst && (thflags & TH_FIN)))
13586 if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
13587 TSTMP_LT(to->to_tsval, tp->ts_recent)) {
13598 if ((tp->t_flags & TF_CLOSED) && tlen &&
13612 * p.869. In such cases, we can still calculate the RTT correctly
13615 if ((to->to_flags & TOF_TS) != 0 &&
13616 SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
13617 SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
13619 tp->ts_recent_age = tcp_ts_getticks();
13620 tp->ts_recent = to->to_tsval;
13623 * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN flag
13624 * is on (half-synchronized state), then queue data for later
13628 if (tp->t_flags & TF_NEEDSYN) {
13631 } else if (tp->t_flags & TF_ACKNOW) {
13633 ((struct tcp_rack *)tp->t_fb_ptr)->r_wanted_output = 1;
13656 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
13665 if (sbavail(&so->so_snd)) {
13667 rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr,
13694 (tp->t_fin_is_rst && (thflags & TH_FIN)))
13708 if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
13709 TSTMP_LT(to->to_tsval, tp->ts_recent)) {
13727 * p.869. In such cases, we can still calculate the RTT correctly
13730 if ((to->to_flags & TOF_TS) != 0 &&
13731 SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
13732 SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
13734 tp->ts_recent_age = tcp_ts_getticks();
13735 tp->ts_recent = to->to_tsval;
13738 * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN flag
13739 * is on (half-synchronized state), then queue data for later
13743 if (tp->t_flags & TF_NEEDSYN) {
13746 } else if (tp->t_flags & TF_ACKNOW) {
13748 ((struct tcp_rack *)tp->t_fb_ptr)->r_wanted_output = 1;
13766 if (sbavail(&so->so_snd)) {
13768 rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr,
13795 (tp->t_fin_is_rst && (thflags & TH_FIN)))
13809 if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
13810 TSTMP_LT(to->to_tsval, tp->ts_recent)) {
13829 * p.869. In such cases, we can still calculate the RTT correctly
13832 if ((to->to_flags & TOF_TS) != 0 &&
13833 SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
13834 SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
13836 tp->ts_recent_age = tcp_ts_getticks();
13837 tp->ts_recent = to->to_tsval;
13840 * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN flag
13841 * is on (half-synchronized state), then queue data for later
13845 if (tp->t_flags & TF_NEEDSYN) {
13848 } else if (tp->t_flags & TF_ACKNOW) {
13850 ((struct tcp_rack *)tp->t_fb_ptr)->r_wanted_output = 1;
13868 if (sbavail(&so->so_snd)) {
13870 rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr,
13896 /* Reset receive buffer auto scaling when not in bulk receive mode. */
13898 (tp->t_fin_is_rst && (thflags & TH_FIN)))
13912 if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
13913 TSTMP_LT(to->to_tsval, tp->ts_recent)) {
13924 if ((tp->t_flags & TF_CLOSED) && tlen &&
13938 * p.869. In such cases, we can still calculate the RTT correctly
13941 if ((to->to_flags & TOF_TS) != 0 &&
13942 SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
13943 SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
13945 tp->ts_recent_age = tcp_ts_getticks();
13946 tp->ts_recent = to->to_tsval;
13949 * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN flag
13950 * is on (half-synchronized state), then queue data for later
13954 if (tp->t_flags & TF_NEEDSYN) {
13957 } else if (tp->t_flags & TF_ACKNOW) {
13959 ((struct tcp_rack *)tp->t_fb_ptr)->r_wanted_output = 1;
13972 if (sbavail(&so->so_snd)) {
13974 rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr,
13987 rack->r_ctl.rack_rs.rs_flags = RACK_RTT_EMPTY;
13988 rack->r_ctl.rack_rs.rs_rtt_cnt = 0;
13989 rack->r_ctl.rack_rs.rs_rtt_tot = 0;
14000 if (rack->rc_hybrid_mode &&
14001 (rack->r_ctl.rc_pace_max_segs != 0) &&
14003 (rack->r_ctl.rc_last_sft != NULL)) {
14004 rack->r_ctl.rc_last_sft->hybrid_flags &= ~TCP_HYBRID_PACING_SETMSS;
14008 orig_min = rack->r_ctl.rc_pace_min_segs;
14009 orig_max = rack->r_ctl.rc_pace_max_segs;
14010 user_max = ctf_fixed_maxseg(tp) * rack->rc_user_set_max_segs;
14011 if (ctf_fixed_maxseg(tp) != rack->r_ctl.rc_pace_min_segs)
14013 rack->r_ctl.rc_pace_min_segs = ctf_fixed_maxseg(tp);
14014 if (rack->use_fixed_rate || rack->rc_force_max_seg) {
14015 if (user_max != rack->r_ctl.rc_pace_max_segs)
14018 if (rack->rc_force_max_seg) {
14019 rack->r_ctl.rc_pace_max_segs = user_max;
14020 } else if (rack->use_fixed_rate) {
14022 if ((rack->r_ctl.crte == NULL) ||
14023 (bw_est != rack->r_ctl.crte->rate)) {
14024 rack->r_ctl.rc_pace_max_segs = user_max;
14030 (rack->r_ctl.rc_user_set_min_segs == 1))
14035 rack->r_ctl.rc_pace_min_segs);
14036 rack->r_ctl.rc_pace_max_segs = tcp_get_pacing_burst_size_w_divisor(
14038 rack->r_ctl.crte, NULL, rack->r_ctl.pace_len_divisor);
14040 } else if (rack->rc_always_pace) {
14041 if (rack->r_ctl.gp_bw ||
14042 rack->r_ctl.init_rate) {
14047 orig = rack->r_ctl.rc_pace_max_segs;
14054 rack->r_ctl.rc_pace_max_segs = rack_get_pacing_len(rack,
14056 ctf_fixed_maxseg(rack->rc_tp));
14058 rack->r_ctl.rc_pace_max_segs = rack->r_ctl.rc_pace_min_segs;
14059 if (orig != rack->r_ctl.rc_pace_max_segs)
14061 } else if ((rack->r_ctl.gp_bw == 0) &&
14062 (rack->r_ctl.rc_pace_max_segs == 0)) {
14068 rack->r_ctl.rc_pace_max_segs = rc_init_window(rack);
14071 if (rack->r_ctl.rc_pace_max_segs > PACE_MAX_IP_BYTES) {
14073 rack->r_ctl.rc_pace_max_segs = PACE_MAX_IP_BYTES;
14093 if (rack->r_is_v6) {
14094 rack->r_ctl.fsb.tcp_ip_hdr_len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
14095 ip6 = (struct ip6_hdr *)rack->r_ctl.fsb.tcp_ip_hdr;
14096 if (tp->t_port) {
14097 rack->r_ctl.fsb.tcp_ip_hdr_len += sizeof(struct udphdr);
14099 udp->uh_sport = htons(V_tcp_udp_tunneling_port);
14100 udp->uh_dport = tp->t_port;
14101 rack->r_ctl.fsb.udp = udp;
14102 rack->r_ctl.fsb.th = (struct tcphdr *)(udp + 1);
14105 rack->r_ctl.fsb.th = (struct tcphdr *)(ip6 + 1);
14106 rack->r_ctl.fsb.udp = NULL;
14108 tcpip_fillheaders(rack->rc_inp,
14109 tp->t_port,
14110 ip6, rack->r_ctl.fsb.th);
14111 rack->r_ctl.fsb.hoplimit = in6_selecthlim(rack->rc_inp, NULL);
14116 rack->r_ctl.fsb.tcp_ip_hdr_len = sizeof(struct tcpiphdr);
14117 ip = (struct ip *)rack->r_ctl.fsb.tcp_ip_hdr;
14118 if (tp->t_port) {
14119 rack->r_ctl.fsb.tcp_ip_hdr_len += sizeof(struct udphdr);
14121 udp->uh_sport = htons(V_tcp_udp_tunneling_port);
14122 udp->uh_dport = tp->t_port;
14123 rack->r_ctl.fsb.udp = udp;
14124 rack->r_ctl.fsb.th = (struct tcphdr *)(udp + 1);
14127 rack->r_ctl.fsb.udp = NULL;
14128 rack->r_ctl.fsb.th = (struct tcphdr *)(ip + 1);
14130 tcpip_fillheaders(rack->rc_inp,
14131 tp->t_port,
14132 ip, rack->r_ctl.fsb.th);
14133 rack->r_ctl.fsb.hoplimit = tptoinpcb(tp)->inp_ip_ttl;
14136 rack->r_ctl.fsb.recwin = lmin(lmax(sbspace(&tptosocket(tp)->so_rcv), 0),
14137 (long)TCP_MAXWIN << tp->rcv_scale);
14138 rack->r_fsb_inited = 1;
14149 rack->r_ctl.fsb.tcp_ip_hdr_len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr) + sizeof(struct udphdr);
14151 rack->r_ctl.fsb.tcp_ip_hdr_len = sizeof(struct tcpiphdr) + sizeof(struct udphdr);
14153 rack->r_ctl.fsb.tcp_ip_hdr = malloc(rack->r_ctl.fsb.tcp_ip_hdr_len,
14155 if (rack->r_ctl.fsb.tcp_ip_hdr == NULL) {
14158 rack->r_fsb_inited = 0;
14167 * 20 - Initial round setup
14168 * 21 - Rack declares a new round.
14172 tp = rack->rc_tp;
14178 log.u_bbr.flex1 = rack->r_ctl.current_round;
14179 log.u_bbr.flex2 = rack->r_ctl.roundends;
14181 log.u_bbr.flex4 = tp->snd_max;
14184 log.u_bbr.cur_del_rate = rack->rc_tp->t_sndbytes;
14185 log.u_bbr.delRate = rack->rc_tp->t_snd_rxt_bytes;
14187 &tptosocket(tp)->so_rcv,
14188 &tptosocket(tp)->so_snd,
14197 rack->rack_deferred_inited = 1;
14198 rack->r_ctl.roundends = tp->snd_max;
14199 rack->r_ctl.rc_high_rwnd = tp->snd_wnd;
14200 rack->r_ctl.cwnd_to_use = tp->snd_cwnd;
14214 * 1 - Use full sized retransmits i.e. limit
14218 * 2 - Use pacer min granularity as a guide to
14226 * 0 - The rack default 1 MSS (anything not 0/1/2
14231 rack->full_size_rxt = 1;
14232 rack->shape_rxt_to_pacing_min = 0;
14234 rack->full_size_rxt = 0;
14235 rack->shape_rxt_to_pacing_min = 1;
14237 rack->full_size_rxt = 0;
14238 rack->shape_rxt_to_pacing_min = 0;
14248 if (tcp_bblogging_on(rack->rc_tp)) {
14271 rack = (struct tcp_rack *)tp->t_fb_ptr;
14272 switch (reqr->req) {
14274 if ((reqr->req_param == tp->snd_max) ||
14275 (tp->snd_max == tp->snd_una)){
14279 rsm = tqhash_find(rack->r_ctl.tqh, reqr->req_param);
14281 /* Can't find that seq -- unlikely */
14284 reqr->sendmap_start = rsm->r_start;
14285 reqr->sendmap_end = rsm->r_end;
14286 reqr->sendmap_send_cnt = rsm->r_rtr_cnt;
14287 reqr->sendmap_fas = rsm->r_fas;
14288 if (reqr->sendmap_send_cnt > SNDMAP_NRTX)
14289 reqr->sendmap_send_cnt = SNDMAP_NRTX;
14290 for(i=0; i<reqr->sendmap_send_cnt; i++)
14291 reqr->sendmap_time[i] = rsm->r_tim_lastsent[i];
14292 reqr->sendmap_ack_arrival = rsm->r_ack_arrival;
14293 reqr->sendmap_flags = rsm->r_flags & SNDMAP_MASK;
14294 reqr->sendmap_r_rtr_bytes = rsm->r_rtr_bytes;
14295 reqr->sendmap_dupacks = rsm->r_dupack;
14297 rsm->r_start,
14298 rsm->r_end,
14299 rsm->r_flags);
14303 if (rack->r_ctl.rc_hpts_flags == 0) {
14307 reqr->timer_hpts_flags = rack->r_ctl.rc_hpts_flags;
14308 if (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) {
14309 reqr->timer_pacing_to = rack->r_ctl.rc_last_output_to;
14311 if (rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) {
14312 reqr->timer_timer_exp = rack->r_ctl.rc_timer_exp;
14315 rack->r_ctl.rc_hpts_flags,
14316 rack->r_ctl.rc_last_output_to,
14317 rack->r_ctl.rc_timer_exp);
14322 reqr->rack_num_dsacks = rack->r_ctl.num_dsack;
14323 reqr->rack_reorder_ts = rack->r_ctl.rc_reorder_ts;
14325 reqr->rack_rxt_last_time = rack->r_ctl.rc_tlp_rxt_last_time;
14326 reqr->rack_min_rtt = rack->r_ctl.rc_rack_min_rtt;
14327 reqr->rack_rtt = rack->rc_rack_rtt;
14328 reqr->rack_tmit_time = rack->r_ctl.rc_rack_tmit_time;
14329 reqr->rack_srtt_measured = rack->rc_srtt_measure_made;
14331 reqr->rack_sacked = rack->r_ctl.rc_sacked;
14332 reqr->rack_holes_rxt = rack->r_ctl.rc_holes_rxt;
14333 reqr->rack_prr_delivered = rack->r_ctl.rc_prr_delivered;
14334 reqr->rack_prr_recovery_fs = rack->r_ctl.rc_prr_recovery_fs;
14335 reqr->rack_prr_sndcnt = rack->r_ctl.rc_prr_sndcnt;
14336 reqr->rack_prr_out = rack->r_ctl.rc_prr_out;
14338 reqr->rack_tlp_out = rack->rc_tlp_in_progress;
14339 reqr->rack_tlp_cnt_out = rack->r_ctl.rc_tlp_cnt_out;
14340 if (rack->rc_in_persist) {
14341 reqr->rack_time_went_idle = rack->r_ctl.rc_went_idle_time;
14342 reqr->rack_in_persist = 1;
14344 reqr->rack_time_went_idle = 0;
14345 reqr->rack_in_persist = 0;
14347 if (rack->r_wanted_output)
14348 reqr->rack_wanted_output = 1;
14350 reqr->rack_wanted_output = 0;
14354 return (-EINVAL);
14373 rack = (struct tcp_rack *)tp->t_fb_ptr;
14375 if (rack->r_mbuf_queue || rack->rc_always_pace || rack->r_use_cmp_ack)
14376 tp->t_flags2 |= TF2_SUPPORTS_MBUFQ;
14378 tp->t_flags2 &= ~TF2_SUPPORTS_MBUFQ;
14379 if (rack->r_use_cmp_ack && TCPS_HAVEESTABLISHED(tp->t_state))
14380 tp->t_flags2 |= TF2_MBUF_ACKCMP;
14381 if (tp->t_in_hpts > IHPTS_NONE) {
14386 if (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) {
14387 if (TSTMP_GT(rack->r_ctl.rc_last_output_to, cts)) {
14388 toval = rack->r_ctl.rc_last_output_to - cts;
14393 } else if (rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) {
14394 if (TSTMP_GT(rack->r_ctl.rc_timer_exp, cts)) {
14395 toval = rack->r_ctl.rc_timer_exp - cts;
14414 * to not refer to tp->t_fb_ptr. This has the old rack
14420 if (tp->t_fb->tfb_chg_query == NULL) {
14428 rsm->r_no_rtt_allowed = 1;
14429 rsm->r_tim_lastsent[0] = rack_to_usec_ts(&rack->r_ctl.act_rcv_time);
14430 rsm->r_rtr_cnt = 1;
14431 rsm->r_rtr_bytes = 0;
14432 if (tp->t_flags & TF_SENTFIN)
14433 rsm->r_flags |= RACK_HAS_FIN;
14434 rsm->r_end = tp->snd_max;
14435 if (tp->snd_una == tp->iss) {
14437 rsm->r_flags |= RACK_HAS_SYN;
14438 rsm->r_start = tp->iss;
14439 rsm->r_end = rsm->r_start + (tp->snd_max - tp->snd_una);
14441 rsm->r_start = tp->snd_una;
14442 rsm->r_dupack = 0;
14443 if (rack->rc_inp->inp_socket->so_snd.sb_mb != NULL) {
14444 rsm->m = sbsndmbuf(&rack->rc_inp->inp_socket->so_snd, 0, &rsm->soff);
14445 if (rsm->m) {
14446 rsm->orig_m_len = rsm->m->m_len;
14447 rsm->orig_t_space = M_TRAILINGROOM(rsm->m);
14449 rsm->orig_m_len = 0;
14450 rsm->orig_t_space = 0;
14454 * This can happen if we have a stand-alone FIN or
14457 rsm->m = NULL;
14458 rsm->orig_m_len = 0;
14459 rsm->orig_t_space = 0;
14460 rsm->soff = 0;
14463 if ((insret = tqhash_insert(rack->r_ctl.tqh, rsm)) != 0) {
14464 panic("Insert in tailq_hash fails ret:%d rack:%p rsm:%p",
14468 (void)tqhash_insert(rack->r_ctl.tqh, rsm);
14470 TAILQ_INSERT_TAIL(&rack->r_ctl.rc_tmap, rsm, r_tnext);
14471 rsm->r_in_tmap = 1;
14478 at = tp->snd_una;
14479 while (at != tp->snd_max) {
14483 if ((*tp->t_fb->tfb_chg_query)(tp, &qr) == 0)
14495 rsm->r_dupack = qr.sendmap_dupacks;
14496 rsm->r_start = qr.sendmap_start;
14497 rsm->r_end = qr.sendmap_end;
14499 rsm->r_fas = qr.sendmap_end;
14501 rsm->r_fas = rsm->r_start - tp->snd_una;
14507 rsm->r_flags = qr.sendmap_flags & SNDMAP_MASK;
14508 rsm->r_rtr_bytes = qr.sendmap_r_rtr_bytes;
14509 rsm->r_rtr_cnt = qr.sendmap_send_cnt;
14510 rsm->r_ack_arrival = qr.sendmap_ack_arrival;
14511 for (i=0 ; i<rsm->r_rtr_cnt; i++)
14512 rsm->r_tim_lastsent[i] = qr.sendmap_time[i];
14513 rsm->m = sbsndmbuf(&rack->rc_inp->inp_socket->so_snd,
14514 (rsm->r_start - tp->snd_una), &rsm->soff);
14515 if (rsm->m) {
14516 rsm->orig_m_len = rsm->m->m_len;
14517 rsm->orig_t_space = M_TRAILINGROOM(rsm->m);
14519 rsm->orig_m_len = 0;
14520 rsm->orig_t_space = 0;
14523 if ((insret = tqhash_insert(rack->r_ctl.tqh, rsm)) != 0) {
14524 panic("Insert in tailq_hash fails ret:%d rack:%p rsm:%p",
14528 (void)tqhash_insert(rack->r_ctl.tqh, rsm);
14530 if ((rsm->r_flags & RACK_ACKED) == 0) {
14531 TAILQ_FOREACH(ersm, &rack->r_ctl.rc_tmap, r_tnext) {
14532 if (ersm->r_tim_lastsent[(ersm->r_rtr_cnt-1)] >
14533 rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)]) {
14540 rsm->r_in_tmap = 1;
14545 if (rsm->r_in_tmap == 0) {
14549 TAILQ_INSERT_TAIL(&rack->r_ctl.rc_tmap, rsm, r_tnext);
14550 rsm->r_in_tmap = 1;
14553 if ((rack->r_ctl.rc_sacklast == NULL) ||
14554 (SEQ_GT(rsm->r_end, rack->r_ctl.rc_sacklast->r_end))) {
14555 rack->r_ctl.rc_sacklast = rsm;
14559 rsm->r_start,
14560 rsm->r_end,
14561 rsm->r_flags);
14582 * will be tp->t_fb_ptr. If its a stack switch that
14586 if (ptr == &tp->t_fb_ptr)
14602 rack->r_ctl.tqh = malloc(sizeof(struct tailq_hash), M_TCPFSB, M_NOWAIT);
14603 if (rack->r_ctl.tqh == NULL) {
14607 tqhash_init(rack->r_ctl.tqh);
14608 TAILQ_INIT(&rack->r_ctl.rc_free);
14609 TAILQ_INIT(&rack->r_ctl.rc_tmap);
14610 rack->rc_tp = tp;
14611 rack->rc_inp = inp;
14613 rack->r_is_v6 = (inp->inp_vflag & INP_IPV6) != 0;
14630 rack->rc_new_rnd_needed = 1;
14631 rack->r_ctl.rc_split_limit = V_tcp_map_split_limit;
14634 rack->r_ctl.rc_saved_beta.newreno_flags |= CC_NEWRENO_BETA_ECN_ENABLED;
14635 rack->r_ctl.rc_reorder_fade = rack_reorder_fade;
14636 rack->rc_allow_data_af_clo = rack_ignore_data_after_close;
14637 rack->r_ctl.rc_tlp_threshold = rack_tlp_thresh;
14639 rack->rc_pace_to_cwnd = 1;
14641 rack->r_ctl.rc_user_set_min_segs = rack_pacing_min_seg;
14643 rack->use_rack_rr = 1;
14645 rack->rc_pace_dnd = 1;
14648 tp->t_delayed_ack = 1;
14650 tp->t_delayed_ack = 0;
14653 tp->t_flags2 |= TF2_TCP_ACCOUNTING;
14656 rack->r_ctl.pcm_i.cnt_alloc = RACK_DEFAULT_PCM_ARRAY;
14657 sz = (sizeof(struct rack_pcm_stats) * rack->r_ctl.pcm_i.cnt_alloc);
14658 rack->r_ctl.pcm_s = malloc(sz,M_TCPPCM, M_NOWAIT);
14659 if (rack->r_ctl.pcm_s == NULL) {
14660 rack->r_ctl.pcm_i.cnt_alloc = 0;
14663 rack->r_ctl.side_chan_dis_mask = tcp_sidechannel_disable_mask;
14665 rack->r_ctl.rack_per_upper_bound_ss = (uint8_t)rack_per_upper_bound_ss;
14666 rack->r_ctl.rack_per_upper_bound_ca = (uint8_t)rack_per_upper_bound_ca;
14668 rack->rack_enable_scwnd = 1;
14669 rack->r_ctl.pace_len_divisor = rack_default_pacing_divisor;
14670 rack->rc_user_set_max_segs = rack_hptsi_segments;
14671 rack->r_ctl.max_reduction = rack_max_reduce;
14672 rack->rc_force_max_seg = 0;
14673 TAILQ_INIT(&rack->r_ctl.opt_list);
14674 rack->r_ctl.rc_saved_beta.beta = V_newreno_beta_ecn;
14675 rack->r_ctl.rc_saved_beta.beta_ecn = V_newreno_beta_ecn;
14677 rack->rack_hibeta = 1;
14680 rack->r_ctl.rc_saved_beta.beta = rack_hibeta_setting;
14681 rack->r_ctl.saved_hibeta = rack_hibeta_setting;
14684 rack->r_ctl.saved_hibeta = 50;
14689 * will never have all 1's in ms :-)
14691 rack->r_ctl.last_tm_mark = 0xffffffffffffffff;
14692 rack->r_ctl.rc_reorder_shift = rack_reorder_thresh;
14693 rack->r_ctl.rc_pkt_delay = rack_pkt_delay;
14694 rack->r_ctl.rc_tlp_cwnd_reduce = rack_lower_cwnd_at_tlp;
14695 rack->r_ctl.rc_lowest_us_rtt = 0xffffffff;
14696 rack->r_ctl.rc_highest_us_rtt = 0;
14697 rack->r_ctl.bw_rate_cap = rack_bw_rate_cap;
14698 rack->pcm_enabled = rack_pcm_is_enabled;
14700 rack->r_ctl.fillcw_cap = rack_fillcw_bw_cap;
14701 rack->r_ctl.timer_slop = TICKS_2_USEC(tcp_rexmit_slop);
14703 rack->r_use_cmp_ack = 1;
14705 rack->rack_no_prr = 1;
14707 rack->rc_gp_no_rec_chg = 1;
14709 rack->r_ctl.pacing_method |= RACK_REG_PACING;
14710 rack->rc_always_pace = 1;
14711 if (rack->rack_hibeta)
14714 rack->rc_always_pace = 0;
14715 if (rack_enable_mqueue_for_nonpaced || rack->r_use_cmp_ack)
14716 rack->r_mbuf_queue = 1;
14718 rack->r_mbuf_queue = 0;
14721 rack->r_limit_scw = 1;
14723 rack->r_limit_scw = 0;
14725 rack->rc_labc = V_tcp_abc_l_var;
14727 rack->r_use_hpts_min = 1;
14728 if (tp->snd_una != 0) {
14729 rack->rc_sendvars_notset = 0;
14737 * syn-cache. This means none of the
14741 rack->rc_sendvars_notset = 1;
14744 rack->r_ctl.rc_rate_sample_method = rack_rate_sample_method;
14745 rack->rack_tlp_threshold_use = rack_tlp_threshold_use;
14746 rack->r_ctl.rc_prr_sendalot = rack_send_a_lot_in_prr;
14747 rack->r_ctl.rc_min_to = rack_min_to;
14748 microuptime(&rack->r_ctl.act_rcv_time);
14749 rack->r_ctl.rc_last_time_decay = rack->r_ctl.act_rcv_time;
14750 rack->r_ctl.rack_per_of_gp_ss = rack_per_of_gp_ss;
14752 rack->r_up_only = 1;
14755 rack->rc_gp_dyn_mul = 1;
14757 rack->r_ctl.rack_per_of_gp_ca = rack_do_dyn_mul;
14759 rack->r_ctl.rack_per_of_gp_ca = rack_per_of_gp_ca;
14760 rack->r_ctl.rack_per_of_gp_rec = rack_per_of_gp_rec;
14762 rack->rc_skip_timely = 1;
14764 if (rack->rc_skip_timely) {
14765 rack->r_ctl.rack_per_of_gp_rec = 90;
14766 rack->r_ctl.rack_per_of_gp_ca = 100;
14767 rack->r_ctl.rack_per_of_gp_ss = 250;
14769 rack->r_ctl.rack_per_of_gp_probertt = rack_per_of_gp_probertt;
14770 rack->r_ctl.rc_tlp_rxt_last_time = tcp_tv_to_mssectick(&rack->r_ctl.act_rcv_time);
14771 rack->r_ctl.last_rcv_tstmp_for_rtt = tcp_tv_to_mssectick(&rack->r_ctl.act_rcv_time);
14773 setup_time_filter_small(&rack->r_ctl.rc_gp_min_rtt, FILTER_TYPE_MIN,
14775 us_cts = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time);
14776 rack->r_ctl.rc_lower_rtt_us_cts = us_cts;
14777 rack->r_ctl.rc_time_of_last_probertt = us_cts;
14778 rack->r_ctl.rc_went_idle_time = us_cts;
14779 rack->r_ctl.rc_time_probertt_starts = 0;
14781 rack->r_ctl.gp_rnd_thresh = rack_rnd_cnt_req & 0xff;
14783 rack->r_ctl.gate_to_fs = 1;
14784 rack->r_ctl.gp_gain_req = rack_gp_gain_req;
14790 rack->rc_rack_tmr_std_based = 1;
14794 rack->rc_rack_use_dsack = 1;
14798 rack->r_ctl.req_measurements = rack_req_measurements;
14800 rack->r_ctl.req_measurements = 1;
14802 rack->rack_hdw_pace_ena = 1;
14804 rack->r_rack_hw_rate_caps = 1;
14806 rack->rack_rec_nonrxt_use_cr = 1;
14815 tp->t_ccv.flags |= CCF_HYSTART_ALLOWED;
14817 tp->t_ccv.flags |= CCF_HYSTART_CAN_SH_CWND;
14819 tp->t_ccv.flags |= CCF_HYSTART_CONS_SSTH;
14827 tp->t_flags &= ~TF_GPUTINPROG;
14828 if ((tp->t_state != TCPS_CLOSED) &&
14829 (tp->t_state != TCPS_TIME_WAIT)) {
14834 if (SEQ_GT(tp->snd_max, tp->iss))
14835 snt = tp->snd_max - tp->iss;
14846 if (tp->snd_cwnd < iwin)
14847 tp->snd_cwnd = iwin;
14868 tp->snd_ssthresh = 0xffffffff;
14879 if ((tp->t_state != TCPS_CLOSED) &&
14880 (tp->t_state != TCPS_TIME_WAIT) &&
14882 (tp->snd_una != tp->snd_max)) {
14891 if (rack->r_mbuf_queue || rack->rc_always_pace || rack->r_use_cmp_ack)
14892 tp->t_flags2 |= TF2_SUPPORTS_MBUFQ;
14894 tp->t_flags2 &= ~TF2_SUPPORTS_MBUFQ;
14895 if (rack->r_use_cmp_ack && TCPS_HAVEESTABLISHED(tp->t_state))
14896 tp->t_flags2 |= TF2_MBUF_ACKCMP;
14902 * they are non-zero. They are kept with a 5
14907 rack_log_hystart_event(rack, rack->r_ctl.roundends, 20);
14908 if ((tptoinpcb(tp)->inp_flags & INP_DROPPED) == 0) {
14910 if (tp->t_fb->tfb_chg_query == NULL) {
14920 ret = (*tp->t_fb->tfb_chg_query)(tp, &qr);
14922 rack->r_ctl.rc_reorder_ts = qr.rack_reorder_ts;
14923 rack->r_ctl.num_dsack = qr.rack_num_dsacks;
14924 rack->r_ctl.rc_tlp_rxt_last_time = qr.rack_rxt_last_time;
14925 rack->r_ctl.rc_rack_min_rtt = qr.rack_min_rtt;
14926 rack->rc_rack_rtt = qr.rack_rtt;
14927 rack->r_ctl.rc_rack_tmit_time = qr.rack_tmit_time;
14928 rack->r_ctl.rc_sacked = qr.rack_sacked;
14929 rack->r_ctl.rc_holes_rxt = qr.rack_holes_rxt;
14930 rack->r_ctl.rc_prr_delivered = qr.rack_prr_delivered;
14931 rack->r_ctl.rc_prr_recovery_fs = qr.rack_prr_recovery_fs;
14932 rack->r_ctl.rc_prr_sndcnt = qr.rack_prr_sndcnt;
14933 rack->r_ctl.rc_prr_out = qr.rack_prr_out;
14935 rack->rc_tlp_in_progress = 1;
14936 rack->r_ctl.rc_tlp_cnt_out = qr.rack_tlp_cnt_out;
14938 rack->rc_tlp_in_progress = 0;
14939 rack->r_ctl.rc_tlp_cnt_out = 0;
14942 rack->rc_srtt_measure_made = 1;
14944 rack->r_ctl.rc_went_idle_time = qr.rack_time_went_idle;
14946 if (rack->r_ctl.rc_scw) {
14947 tcp_shared_cwnd_idle(rack->r_ctl.rc_scw, rack->r_ctl.rc_scw_index);
14948 rack->rack_scwnd_is_idle = 1;
14951 rack->r_ctl.persist_lost_ends = 0;
14952 rack->probe_not_answered = 0;
14953 rack->forced_ack = 0;
14954 tp->t_rxtshift = 0;
14955 rack->rc_in_persist = 1;
14956 RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
14957 rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
14960 rack->r_wanted_output = 1;
14969 ret = (*tp->t_fb->tfb_chg_query)(tp, &qr);
14972 * non-zero return means we have a timer('s)
14978 rack->r_ctl.rc_hpts_flags = qr.timer_hpts_flags;
14980 rack->r_ctl.rc_last_output_to = qr.timer_pacing_to;
14982 tov = qr.timer_pacing_to - us_cts;
14987 rack->r_ctl.rc_timer_exp = qr.timer_timer_exp;
14990 tov = qr.timer_timer_exp - us_cts;
14996 rack->r_ctl.rc_hpts_flags,
14997 rack->r_ctl.rc_last_output_to,
14998 rack->r_ctl.rc_timer_exp);
15004 rack_log_hpts_diag(rack, us_cts, &diag, &rack->r_ctl.act_rcv_time);
15008 rack_log_rtt_shrinks(rack, us_cts, tp->t_rxtcur,
15017 if ((tp->t_state == TCPS_CLOSED) ||
15018 (tp->t_state == TCPS_LISTEN)) {
15022 if ((tp->t_state == TCPS_SYN_SENT) ||
15023 (tp->t_state == TCPS_SYN_RECEIVED)) {
15030 if ((tp->t_flags & TF_SENTFIN) && ((tp->snd_max - tp->snd_una) > 1)) {
15043 if ((tp->t_flags & TF_SACK_PERMIT) || rack_sack_not_required){
15057 if (tp->t_fb_ptr) {
15063 tp->t_flags &= ~TF_FORCEDATA;
15064 rack = (struct tcp_rack *)tp->t_fb_ptr;
15073 if (rack->r_ctl.rc_scw) {
15076 if (rack->r_limit_scw)
15077 limit = max(1, rack->r_ctl.rc_lowest_us_rtt);
15080 tcp_shared_cwnd_free_full(tp, rack->r_ctl.rc_scw,
15081 rack->r_ctl.rc_scw_index,
15083 rack->r_ctl.rc_scw = NULL;
15086 if (rack->r_ctl.fsb.tcp_ip_hdr) {
15087 free(rack->r_ctl.fsb.tcp_ip_hdr, M_TCPFSB);
15088 rack->r_ctl.fsb.tcp_ip_hdr = NULL;
15089 rack->r_ctl.fsb.th = NULL;
15091 if (rack->rc_always_pace == 1) {
15095 while (!TAILQ_EMPTY(&rack->r_ctl.opt_list)) {
15098 dol = TAILQ_FIRST(&rack->r_ctl.opt_list);
15099 TAILQ_REMOVE(&rack->r_ctl.opt_list, dol, next);
15103 if (rack->r_ctl.crte != NULL) {
15104 tcp_rel_pacing_rate(rack->r_ctl.crte, tp);
15105 rack->rack_hdrw_pacing = 0;
15106 rack->r_ctl.crte = NULL;
15113 * get each one and free it like a cum-ack would and
15116 rsm = tqhash_min(rack->r_ctl.tqh);
15118 tqhash_remove(rack->r_ctl.tqh, rsm, REMOVE_TYPE_CUMACK);
15119 rack->r_ctl.rc_num_maps_alloced--;
15121 rsm = tqhash_min(rack->r_ctl.tqh);
15123 rsm = TAILQ_FIRST(&rack->r_ctl.rc_free);
15125 TAILQ_REMOVE(&rack->r_ctl.rc_free, rsm, r_tnext);
15126 rack->r_ctl.rc_num_maps_alloced--;
15127 rack->rc_free_cnt--;
15130 rsm = TAILQ_FIRST(&rack->r_ctl.rc_free);
15132 if (rack->r_ctl.pcm_s != NULL) {
15133 free(rack->r_ctl.pcm_s, M_TCPPCM);
15134 rack->r_ctl.pcm_s = NULL;
15135 rack->r_ctl.pcm_i.cnt_alloc = 0;
15136 rack->r_ctl.pcm_i.cnt = 0;
15138 if ((rack->r_ctl.rc_num_maps_alloced > 0) &&
15145 log.u_bbr.flex1 = rack->r_ctl.rc_num_maps_alloced;
15146 log.u_bbr.flex2 = rack->rc_free_cnt;
15148 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
15149 rsm = tqhash_min(rack->r_ctl.tqh);
15151 rsm = TAILQ_FIRST(&rack->r_ctl.rc_free);
15158 KASSERT((rack->r_ctl.rc_num_maps_alloced == 0),
15159 ("rack:%p num_aloc:%u after freeing all?",
15161 rack->r_ctl.rc_num_maps_alloced));
15162 rack->rc_free_cnt = 0;
15163 free(rack->r_ctl.tqh, M_TCPFSB);
15164 rack->r_ctl.tqh = NULL;
15165 uma_zfree(rack_pcb_zone, tp->t_fb_ptr);
15166 tp->t_fb_ptr = NULL;
15169 tp->snd_nxt = tp->snd_max;
15175 if ((rack->r_state == TCPS_CLOSED) && (tp->t_state != TCPS_CLOSED)) {
15176 rack->r_is_v6 = (tptoinpcb(tp)->inp_vflag & INP_IPV6) != 0;
15178 switch (tp->t_state) {
15180 rack->r_state = TCPS_SYN_SENT;
15181 rack->r_substate = rack_do_syn_sent;
15184 rack->r_state = TCPS_SYN_RECEIVED;
15185 rack->r_substate = rack_do_syn_recv;
15189 rack->r_state = TCPS_ESTABLISHED;
15190 rack->r_substate = rack_do_established;
15193 rack->r_state = TCPS_CLOSE_WAIT;
15194 rack->r_substate = rack_do_close_wait;
15198 rack->r_state = TCPS_FIN_WAIT_1;
15199 rack->r_substate = rack_do_fin_wait_1;
15203 rack->r_state = TCPS_CLOSING;
15204 rack->r_substate = rack_do_closing;
15208 rack->r_state = TCPS_LAST_ACK;
15209 rack->r_substate = rack_do_lastack;
15212 rack->r_state = TCPS_FIN_WAIT_2;
15213 rack->r_substate = rack_do_fin_wait_2;
15221 if (rack->r_use_cmp_ack && TCPS_HAVEESTABLISHED(tp->t_state))
15222 rack->rc_tp->t_flags2 |= TF2_MBUF_ACKCMP;
15238 tmr_up = rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK;
15239 if (tcp_in_hpts(rack->rc_tp) == 0) {
15245 rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
15249 if (rack->rc_in_persist && (tmr_up == PACE_TMR_PERSIT))
15251 rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
15252 if (((rsm == NULL) || (tp->t_state < TCPS_ESTABLISHED)) &&
15259 if (tp->t_flags & TF_DELACK) {
15264 rack->rc_inp->inp_socket->so_options & SO_KEEPALIVE) &&
15265 (tp->t_state <= TCPS_CLOSING)) &&
15267 (tp->snd_max == tp->snd_una)) {
15272 if (SEQ_GT(tp->snd_max, tp->snd_una) &&
15296 if (tcp_in_hpts(rack->rc_tp)) {
15297 if (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) {
15301 if (TSTMP_GT(rack->r_ctl.rc_last_output_to, us_cts)) {
15302 rack->r_early = 1;
15303 rack->r_ctl.rc_agg_early += (rack->r_ctl.rc_last_output_to - us_cts);
15305 rack->r_ctl.rc_hpts_flags &= ~PACE_PKT_OUTPUT;
15307 tcp_hpts_remove(rack->rc_tp);
15309 rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
15317 if ((SEQ_LT(tp->snd_wl1, seq) ||
15318 (tp->snd_wl1 == seq && (SEQ_LT(tp->snd_wl2, ack) ||
15319 (tp->snd_wl2 == ack && tiwin > tp->snd_wnd))))) {
15321 if ((tp->snd_wl2 == ack) && (tiwin > tp->snd_wnd))
15323 tp->snd_wnd = tiwin;
15325 tp->snd_wl1 = seq;
15326 tp->snd_wl2 = ack;
15327 if (tp->snd_wnd > tp->max_sndwnd)
15328 tp->max_sndwnd = tp->snd_wnd;
15329 rack->r_wanted_output = 1;
15330 } else if ((tp->snd_wl2 == ack) && (tiwin < tp->snd_wnd)) {
15331 tp->snd_wnd = tiwin;
15333 tp->snd_wl1 = seq;
15334 tp->snd_wl2 = ack;
15339 if (tp->snd_wnd > tp->max_sndwnd)
15340 tp->max_sndwnd = tp->snd_wnd;
15342 if ((rack->rc_in_persist != 0) &&
15343 (tp->snd_wnd >= min((rack->r_ctl.rc_high_rwnd/2),
15344 rack->r_ctl.rc_pace_min_segs))) {
15348 if ((rack->rc_in_persist == 0) &&
15349 (tp->snd_wnd < min((rack->r_ctl.rc_high_rwnd/2), rack->r_ctl.rc_pace_min_segs)) &&
15350 TCPS_HAVEESTABLISHED(tp->t_state) &&
15351 ((tp->snd_max == tp->snd_una) || rack->rc_has_collapsed) &&
15352 sbavail(&tptosocket(tp)->so_snd) &&
15353 (sbavail(&tptosocket(tp)->so_snd) > tp->snd_wnd)) {
15360 rack_enter_persist(tp, rack, rack->r_ctl.rc_rcvtime, ack);
15368 if (tcp_bblogging_on(rack->rc_tp)) {
15381 if (SEQ_GT(ae->ack, tp->snd_una)) {
15382 tcp_req = tcp_req_find_req_for_seq(tp, (ae->ack-1));
15384 tcp_req = tcp_req_find_req_for_seq(tp, ae->ack);
15388 log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
15389 if (rack->rack_no_prr == 0)
15390 log.u_bbr.flex1 = rack->r_ctl.rc_prr_sndcnt;
15393 log.u_bbr.use_lt_bw = rack->r_ent_rec_ns;
15395 log.u_bbr.use_lt_bw |= rack->r_might_revert;
15396 log.u_bbr.flex2 = rack->r_ctl.rc_num_maps_alloced;
15397 log.u_bbr.bbr_state = rack->rc_free_cnt;
15398 log.u_bbr.inflight = ctf_flight_size(tp, rack->r_ctl.rc_sacked);
15399 log.u_bbr.pkts_out = tp->t_maxseg;
15400 log.u_bbr.flex4 = rack->r_ctl.rc_hpts_flags;
15402 log.u_bbr.lost = ae->flags;
15405 if (ae->flags & TSTMP_HDWR) {
15408 ts.tv_sec = ae->timestamp / 1000000000;
15409 ts.tv_nsec = ae->timestamp % 1000000000;
15413 } else if (ae->flags & TSTMP_LRO) {
15416 ts.tv_sec = ae->timestamp / 1000000000;
15417 ts.tv_nsec = ae->timestamp % 1000000000;
15424 log.u_bbr.delRate = ae->timestamp;
15426 log.u_bbr.applimited = tp->t_tcpreq_closed;
15428 log.u_bbr.applimited |= tp->t_tcpreq_open;
15430 log.u_bbr.applimited |= tp->t_tcpreq_req;
15434 log.u_bbr.pkt_epoch = (tcp_req->localtime / HPTS_USEC_IN_SEC);
15436 log.u_bbr.delivered = (tcp_req->localtime % HPTS_USEC_IN_SEC);
15437 log.u_bbr.rttProp = tcp_req->timestamp;
15438 log.u_bbr.cur_del_rate = tcp_req->start;
15439 if (tcp_req->flags & TCP_TRK_TRACK_FLG_OPEN) {
15443 log.u_bbr.bw_inuse = tcp_req->end;
15445 log.u_bbr.flex6 = tcp_req->start_seq;
15446 if (tcp_req->flags & TCP_TRK_TRACK_FLG_COMP) {
15448 log.u_bbr.epoch = tcp_req->end_seq;
15454 th->th_seq = ae->seq;
15455 th->th_ack = ae->ack;
15456 th->th_win = ae->win;
15458 th->th_sport = inp->inp_fport;
15459 th->th_dport = inp->inp_lport;
15460 tcp_set_flags(th, ae->flags);
15462 if (ae->flags & HAS_TSTMP) {
15466 th->th_off = ((sizeof(struct tcphdr) + TCPOLEN_TSTAMP_APPA) >> 2);
15476 val = htonl(ae->ts_value);
15479 val = htonl(ae->ts_echo);
15483 th->th_off = (sizeof(struct tcphdr) >> 2);
15492 * snd_una was advanced and then un-advancing it so that the
15495 if (tp->snd_una != high_seq) {
15496 orig_snd_una = tp->snd_una;
15497 tp->snd_una = high_seq;
15502 &tptosocket(tp)->so_rcv,
15503 &tptosocket(tp)->so_snd, TCP_LOG_IN, 0,
15506 tp->snd_una = orig_snd_una;
15517 * A persist or keep-alive was forced out, update our
15519 * When a subsequent keep-alive or persist times out
15525 * will clear the probe_not_answered flag i.e. cum-ack
15529 rack->forced_ack = 0;
15530 rack->rc_tp->t_rxtshift = 0;
15531 if ((rack->rc_in_persist &&
15532 (tiwin == rack->rc_tp->snd_wnd)) ||
15533 (rack->rc_in_persist == 0)) {
15548 if (rack->rc_in_persist)
15550 us_rtt = us_cts - rack->r_ctl.forced_ack_ts;
15553 if (rack->probe_not_answered == 0) {
15575 rack->r_ctl.roundends = tp->snd_max;
15576 rack->rc_new_rnd_needed = 0;
15577 rack_log_hystart_event(rack, tp->snd_max, 4);
15585 if (tcp_bblogging_on(rack->rc_tp)) {
15592 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
15598 log.u_bbr.flex5 = rack->r_ctl.pcm_idle_rounds;
15599 log.u_bbr.bbr_substate = rack->pcm_needed;
15601 log.u_bbr.bbr_substate |= rack->pcm_in_progress;
15603 log.u_bbr.bbr_substate |= rack->pcm_enabled; /* bits are NIE for Needed, Inprogress, Enabled */
15604 (void)tcp_log_event(rack->rc_tp, NULL, NULL, NULL, TCP_PCM_MEASURE, ERRNO_UNK,
15619 rack->r_ctl.current_round++;
15621 rack->rc_new_rnd_needed = 1;
15622 if ((rack->pcm_enabled == 1) &&
15623 (rack->pcm_needed == 0) &&
15624 (rack->pcm_in_progress == 0)) {
15632 rnds = rack->r_ctl.current_round - rack->r_ctl.last_pcm_round;
15633 if ((rnds + rack->r_ctl.pcm_idle_rounds) >= rack_pcm_every_n_rounds) {
15634 rack->pcm_needed = 1;
15635 rack_log_pcm(rack, 3, rack->r_ctl.last_pcm_round, rack_pcm_every_n_rounds, rack->r_ctl.current_round );
15637 rack_log_pcm(rack, 3, rack->r_ctl.last_pcm_round, rack_pcm_every_n_rounds, rack->r_ctl.current_round );
15640 if (tp->t_ccv.flags & CCF_HYSTART_ALLOWED) {
15642 if (CC_ALGO(tp)->newround != NULL) {
15643 CC_ALGO(tp)->newround(&tp->t_ccv, rack->r_ctl.current_round);
15648 * that we are not just pushing on slow-start and just
15650 * boost in b/w during the inital slow-start.
15652 if (rack->dgp_on &&
15653 (rack->rc_initial_ss_comp == 0) &&
15654 (tp->snd_cwnd < tp->snd_ssthresh) &&
15655 (rack->r_ctl.num_measurements >= RACK_REQ_AVG) &&
15656 (rack->r_ctl.gp_rnd_thresh > 0) &&
15657 ((rack->r_ctl.current_round - rack->r_ctl.last_rnd_of_gp_rise) >= rack->r_ctl.gp_rnd_thresh)) {
15667 rack->rc_initial_ss_comp = 1;
15669 if (tcp_bblogging_on(rack->rc_tp)) {
15675 log.u_bbr.flex1 = rack->r_ctl.current_round;
15676 log.u_bbr.flex2 = rack->r_ctl.last_rnd_of_gp_rise;
15677 log.u_bbr.flex3 = rack->r_ctl.gp_rnd_thresh;
15678 log.u_bbr.flex4 = rack->r_ctl.gate_to_fs;
15679 log.u_bbr.flex5 = rack->r_ctl.ss_hi_fs;
15684 if ((rack->r_ctl.gate_to_fs == 1) &&
15685 (tp->snd_cwnd > rack->r_ctl.ss_hi_fs)) {
15686 tp->snd_cwnd = rack->r_ctl.ss_hi_fs;
15688 tp->snd_ssthresh = tp->snd_cwnd - 1;
15690 rack->r_fast_output = 0;
15701 * A) It moves the cum-ack forward
15702 * B) It is behind the cum-ack.
15703 * C) It is a window-update ack.
15704 * D) It is a dup-ack.
15706 * Note that we can have between 1 -> TCP_COMP_ACK_ENTRIES
15731 rack = (struct tcp_rack *)tp->t_fb_ptr;
15732 if (rack->gp_ready &&
15733 (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT))
15736 if (rack->r_state != tp->t_state)
15738 if ((tp->t_state >= TCPS_FIN_WAIT_1) &&
15739 (tp->t_flags & TF_GPUTINPROG)) {
15748 bytes = tp->gput_ack - tp->gput_seq;
15749 if (SEQ_GT(tp->gput_seq, tp->snd_una))
15750 bytes += tp->gput_seq - tp->snd_una;
15751 if (bytes > sbavail(&tptosocket(tp)->so_snd)) {
15757 rack_log_pacing_delay_calc(rack, (tp->gput_ack - tp->gput_seq) /*flex2*/,
15758 rack->r_ctl.rc_gp_srtt /*flex1*/,
15759 tp->gput_seq,
15761 tp->t_flags &= ~TF_GPUTINPROG;
15765 to->to_flags = 0;
15766 KASSERT((m->m_len >= sizeof(struct tcp_ackent)),
15767 ("tp:%p m_cmpack:%p with invalid len:%u", tp, m, m->m_len));
15768 cnt = m->m_len / sizeof(struct tcp_ackent);
15770 high_seq = tp->snd_una;
15771 the_win = tp->snd_wnd;
15772 win_seq = tp->snd_wl1;
15773 win_upd_ack = tp->snd_wl2;
15776 rack->r_ctl.rc_rcvtime = cts;
15778 if ((rack->rc_gp_dyn_mul) &&
15779 (rack->use_fixed_rate == 0) &&
15780 (rack->rc_always_pace)) {
15790 if (ae->flags & TH_FIN)
15799 tiwin = ae->win << tp->snd_scale;
15800 if (tiwin > rack->r_ctl.rc_high_rwnd)
15801 rack->r_ctl.rc_high_rwnd = tiwin;
15803 if (SEQ_LT(ae->ack, high_seq)) {
15805 ae->ack_val_set = ACK_BEHIND;
15806 } else if (SEQ_GT(ae->ack, high_seq)) {
15808 ae->ack_val_set = ACK_CUMACK;
15809 } else if ((tiwin == the_win) && (rack->rc_in_persist == 0)){
15811 ae->ack_val_set = ACK_DUPACK;
15814 ae->ack_val_set = ACK_RWND;
15817 rack_log_input_packet(tp, rack, ae, ae->ack_val_set, high_seq);
15819 if (ae->flags & HAS_TSTMP) {
15821 to->to_flags = TOF_TS;
15822 ae->ts_echo -= tp->ts_offset;
15823 to->to_tsecr = ae->ts_echo;
15824 to->to_tsval = ae->ts_value;
15830 if (TSTMP_GT(ae->ts_echo, ms_cts))
15831 to->to_tsecr = 0;
15832 if (tp->ts_recent &&
15833 TSTMP_LT(ae->ts_value, tp->ts_recent)) {
15834 if (ctf_ts_check_ac(tp, (ae->flags & 0xff))) {
15838 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
15839 tp->tcp_proc_time[ae->ack_val_set] += (rdstc - ts_val);
15846 if (SEQ_LEQ(ae->seq, tp->last_ack_sent) &&
15847 SEQ_LEQ(tp->last_ack_sent, ae->seq)) {
15848 tp->ts_recent_age = tcp_ts_getticks();
15849 tp->ts_recent = ae->ts_value;
15853 to->to_flags = 0;
15856 if (tp->t_idle_reduce &&
15857 (tp->snd_max == tp->snd_una) &&
15858 (TICKS_2_USEC(ticks - tp->t_rcvtime) >= tp->t_rxtcur)) {
15862 tp->t_rcvtime = ticks;
15864 if (tcp_ecn_input_segment(tp, ae->flags, 0,
15865 tcp_packets_this_ack(tp, ae->ack),
15866 ae->codepoint))
15867 rack_cong_signal(tp, CC_ECN, ae->ack, __LINE__);
15870 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
15871 tp->tcp_cnt_counters[ae->ack_val_set]++;
15878 * The non-compressed path through the code has this
15885 if (ae->ack_val_set == ACK_BEHIND) {
15888 * or it could be a keep-alive or persists
15890 if (SEQ_LT(ae->ack, tp->snd_una) && (sbspace(&so->so_rcv) > segsiz)) {
15891 rack->r_ctl.rc_reorder_ts = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time);
15892 if (rack->r_ctl.rc_reorder_ts == 0)
15893 rack->r_ctl.rc_reorder_ts = 1;
15895 } else if (ae->ack_val_set == ACK_DUPACK) {
15897 rack_strike_dupack(rack, ae->ack);
15898 } else if (ae->ack_val_set == ACK_RWND) {
15900 if ((ae->flags & TSTMP_LRO) || (ae->flags & TSTMP_HDWR)) {
15901 ts.tv_sec = ae->timestamp / 1000000000;
15902 ts.tv_nsec = ae->timestamp % 1000000000;
15903 rack->r_ctl.act_rcv_time.tv_sec = ts.tv_sec;
15904 rack->r_ctl.act_rcv_time.tv_usec = ts.tv_nsec/1000;
15906 rack->r_ctl.act_rcv_time = *tv;
15908 if (rack->forced_ack) {
15910 tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time));
15915 win_upd_ack = ae->ack;
15916 win_seq = ae->seq;
15921 if (SEQ_GT(ae->ack, tp->snd_max)) {
15926 if ((tp->t_flags & TF_ACKNOW) == 0) {
15928 if (tp->t_flags && TF_ACKNOW)
15929 rack->r_wanted_output = 1;
15934 if (tiwin != tp->snd_wnd) {
15935 win_upd_ack = ae->ack;
15936 win_seq = ae->seq;
15942 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
15943 tp->tcp_cnt_counters[CNT_OF_ACKS_IN] += (((ae->ack - high_seq) + segsiz - 1) / segsiz);
15946 high_seq = ae->ack;
15948 if ((ae->flags & TSTMP_LRO) || (ae->flags & TSTMP_HDWR)) {
15949 ts.tv_sec = ae->timestamp / 1000000000;
15950 ts.tv_nsec = ae->timestamp % 1000000000;
15951 rack->r_ctl.act_rcv_time.tv_sec = ts.tv_sec;
15952 rack->r_ctl.act_rcv_time.tv_usec = ts.tv_nsec/1000;
15954 rack->r_ctl.act_rcv_time = *tv;
15956 rack_process_to_cumack(tp, rack, ae->ack, cts, to,
15957 tcp_tv_to_lusectick(&rack->r_ctl.act_rcv_time));
15961 if (rack->rc_dsack_round_seen) {
15963 if (SEQ_GEQ(ae->ack, rack->r_ctl.dsack_round_end)) {
15965 rack->rc_dsack_round_seen = 0;
15976 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
15977 tp->tcp_proc_time[ae->ack_val_set] += (rdstc - ts_val);
15978 if (ae->ack_val_set == ACK_CUMACK)
15979 tp->tcp_proc_time[CYC_HANDLE_MAP] += (rdstc - ts_val);
15988 if (SEQ_GT(tp->snd_max, high_seq) && (tp->snd_wnd < (tp->snd_max - high_seq))) {
15990 rack_collapsed_window(rack, (tp->snd_max - high_seq), high_seq, __LINE__);
15991 } else if (rack->rc_has_collapsed)
15993 if ((rack->r_collapse_point_valid) &&
15994 (SEQ_GT(high_seq, rack->r_ctl.high_collapse_point)))
15995 rack->r_collapse_point_valid = 0;
15996 acked_amount = acked = (high_seq - tp->snd_una);
16009 if (SEQ_GEQ(high_seq, rack->r_ctl.roundends) &&
16010 (rack->rc_new_rnd_needed == 0) &&
16020 * since cum-ack moved forward.
16022 rack->probe_not_answered = 0;
16023 if (tp->t_flags & TF_NEEDSYN) {
16025 * T/TCP: Connection was half-synchronized, and our SYN has
16027 * to non-starred state, increment snd_una for ACK of SYN,
16028 * and check if we can do window scaling.
16030 tp->t_flags &= ~TF_NEEDSYN;
16031 tp->snd_una++;
16032 acked_amount = acked = (high_seq - tp->snd_una);
16034 if (acked > sbavail(&so->so_snd))
16035 acked_amount = sbavail(&so->so_snd);
16036 if (IN_FASTRECOVERY(tp->t_flags) &&
16037 (rack->rack_no_prr == 0))
16039 if (IN_RECOVERY(tp->t_flags)) {
16040 if (SEQ_LT(high_seq, tp->snd_recover) &&
16041 (SEQ_LT(high_seq, tp->snd_max))) {
16047 } else if ((rack->rto_from_rec == 1) &&
16048 SEQ_GEQ(high_seq, tp->snd_recover)) {
16051 * and never re-entered recovery. The timeout(s)
16055 rack->rto_from_rec = 0;
16057 /* Handle the rack-log-ack part (sendmap) */
16058 if ((sbused(&so->so_snd) == 0) &&
16060 (tp->t_state >= TCPS_FIN_WAIT_1) &&
16061 (tp->t_flags & TF_SENTFIN)) {
16074 tp->snd_una = high_seq;
16077 if ((tp->t_flags & TF_PREVVALID) &&
16078 ((tp->t_flags & TF_RCVD_TSTMP) == 0)) {
16079 tp->t_flags &= ~TF_PREVVALID;
16080 if (tp->t_rxtshift == 1 &&
16081 (int)(ticks - tp->t_badrxtwin) < 0)
16097 p_cwnd = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
16099 p_cwnd += tp->snd_cwnd;
16102 if (post_recovery && (tp->snd_cwnd > p_cwnd)) {
16103 /* Must be non-newreno (cubic) getting too ahead of itself */
16104 tp->snd_cwnd = p_cwnd;
16107 mfree = sbcut_locked(&so->so_snd, acked_amount);
16108 tp->snd_una = high_seq;
16110 rack_adjust_sendmap_head(rack, &so->so_snd);
16112 rack_log_wakeup(tp,rack, &so->so_snd, acked, 2);
16117 tp->t_acktime = ticks;
16118 rack_log_progress_event(rack, tp, tp->t_acktime,
16121 tp->t_rxtshift = 0;
16122 RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
16123 rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
16124 rack->rc_tlp_in_progress = 0;
16125 rack->r_ctl.rc_tlp_cnt_out = 0;
16127 if (SEQ_GT(tp->snd_una, tp->snd_recover))
16128 tp->snd_recover = tp->snd_una;
16129 if (SEQ_LT(tp->snd_nxt, tp->snd_max))
16130 tp->snd_nxt = tp->snd_max;
16135 if (rack->r_ctl.rc_hpts_flags & PACE_TMR_RXT)
16136 rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
16137 tp->snd_wl2 = high_seq;
16138 tp->t_dupacks = 0;
16140 (rack->use_fixed_rate == 0) &&
16141 (rack->in_probe_rtt == 0) &&
16142 rack->rc_gp_dyn_mul &&
16143 rack->rc_always_pace) {
16147 if (tp->snd_una == tp->snd_max) {
16148 tp->t_flags &= ~TF_PREVVALID;
16149 rack->r_ctl.retran_during_recovery = 0;
16150 rack->rc_suspicious = 0;
16151 rack->r_ctl.dsack_byte_cnt = 0;
16152 rack->r_ctl.rc_went_idle_time = tcp_get_usecs(NULL);
16153 if (rack->r_ctl.rc_went_idle_time == 0)
16154 rack->r_ctl.rc_went_idle_time = 1;
16156 if (sbavail(&tptosocket(tp)->so_snd) == 0)
16157 tp->t_acktime = 0;
16159 rack->r_wanted_output = 1;
16160 rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
16161 sack_filter_clear(&rack->r_ctl.rack_sf, tp->snd_una);
16162 if ((tp->t_state >= TCPS_FIN_WAIT_1) &&
16163 (sbavail(&so->so_snd) == 0) &&
16164 (tp->t_flags2 & TF2_DROP_AF_DATA)) {
16170 rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
16171 /* tcp_close will kill the inp pre-log the Reset */
16176 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
16177 tp->tcp_proc_time[ACK_CUMACK] += (rdstc - ts_val);
16178 tp->tcp_proc_time[CYC_HANDLE_ACK] += (rdstc - ts_val);
16191 * We would normally do drop-with-reset which would
16202 if ((sbused(&so->so_snd) == 0) &&
16203 (tp->t_state >= TCPS_FIN_WAIT_1) &&
16204 (tp->t_flags & TF_SENTFIN)) {
16212 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
16221 * We don't change to fin-wait-2 if we have our fin acked
16229 if (sbavail(&so->so_snd)) {
16230 rack->r_wanted_output = 1;
16232 rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr,
16241 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
16242 tp->tcp_proc_time[ACK_CUMACK] += (rdstc - ts_val);
16243 tp->tcp_proc_time[CYC_HANDLE_ACK] += (rdstc - ts_val);
16254 switch(tp->t_state) {
16259 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
16260 tp->tcp_proc_time[ACK_CUMACK] += (rdstc - ts_val);
16261 tp->tcp_proc_time[CYC_HANDLE_ACK] += (rdstc - ts_val);
16274 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
16275 tp->tcp_proc_time[ACK_CUMACK] += (rdstc - ts_val);
16276 tp->tcp_proc_time[CYC_HANDLE_ACK] += (rdstc - ts_val);
16289 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
16290 tp->tcp_proc_time[ACK_CUMACK] += (rdstc - ts_val);
16291 tp->tcp_proc_time[CYC_HANDLE_ACK] += (rdstc - ts_val);
16295 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
16308 if (rack->r_fast_output) {
16317 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
16318 tp->tcp_proc_time[ACK_CUMACK] += (rdstc - ts_val);
16319 tp->tcp_proc_time[CYC_HANDLE_ACK] += (rdstc - ts_val);
16326 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
16327 tp->tcp_proc_time[ACK_RWND] += (rdstc - ts_val);
16344 if ((rack->r_wanted_output != 0) ||
16345 (rack->r_fast_output != 0) ||
16346 (tp->t_flags & TF_ACKNOW )) {
16356 if (tp->t_flags2 & TF2_HPTS_CALLS)
16357 tp->t_flags2 &= ~TF2_HPTS_CALLS;
16362 rack_timer_audit(tp, rack, &so->so_snd);
16384 * cts - is the current time from tv (caller gets ts) in microseconds.
16385 * ms_cts - is the current time from tv in milliseconds.
16386 * us_cts - is the time that LRO or hardware actually got the packet in microseconds.
16409 rack = (struct tcp_rack *)tp->t_fb_ptr;
16410 if (rack->rack_deferred_inited == 0) {
16421 * can happen in the non-LRO path where we are pacing and
16426 if (m->m_flags & M_ACKCMP) {
16431 rack->rc_ack_required = 0;
16435 if ((rack->rc_always_pace == 1) &&
16436 (rack->rc_ack_can_sendout_data == 0) &&
16437 (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) &&
16438 (TSTMP_LT(us_cts, rack->r_ctl.rc_last_output_to))) {
16445 slot_remaining = rack->r_ctl.rc_last_output_to - us_cts;
16446 if (rack->rc_tp->t_flags2 & TF2_DONT_SACK_QUEUE) {
16458 optlen = (th->th_off << 2) - sizeof(struct tcphdr);
16484 rack->r_ctl.gp_bw,
16490 if (m->m_flags & M_ACKCMP) {
16491 panic("Impossible reach m has ackcmp? m:%p tp:%p", m, tp);
16495 nsegs = m->m_pkthdr.lro_nsegs;
16502 if ((m->m_flags & M_TSTMP) ||
16503 (m->m_flags & M_TSTMP_LRO)) {
16505 rack->r_ctl.act_rcv_time.tv_sec = ts.tv_sec;
16506 rack->r_ctl.act_rcv_time.tv_usec = ts.tv_nsec/1000;
16508 rack->r_ctl.act_rcv_time = *tv;
16512 * Unscale the window into a 32-bit value. For the SYN_SENT state
16515 tiwin = th->th_win << tp->snd_scale;
16524 * that often will gain you a context switch, and instead lock
16544 (th->th_off << 2) - sizeof(struct tcphdr),
16546 KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
16548 KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
16550 if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) {
16558 if ((tp->t_state >= TCPS_FIN_WAIT_1) &&
16559 (tp->t_flags & TF_GPUTINPROG)) {
16568 bytes = tp->gput_ack - tp->gput_seq;
16569 if (SEQ_GT(tp->gput_seq, tp->snd_una))
16570 bytes += tp->gput_seq - tp->snd_una;
16571 if (bytes > sbavail(&tptosocket(tp)->so_snd)) {
16577 rack_log_pacing_delay_calc(rack, (tp->gput_ack - tp->gput_seq) /*flex2*/,
16578 rack->r_ctl.rc_gp_srtt /*flex1*/,
16579 tp->gput_seq,
16581 tp->t_flags &= ~TF_GPUTINPROG;
16584 if (tcp_bblogging_on(rack->rc_tp)) {
16590 if (SEQ_GT(th->th_ack, tp->snd_una)) {
16591 tcp_req = tcp_req_find_req_for_seq(tp, (th->th_ack-1));
16593 tcp_req = tcp_req_find_req_for_seq(tp, th->th_ack);
16597 log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
16598 if (rack->rack_no_prr == 0)
16599 log.u_bbr.flex1 = rack->r_ctl.rc_prr_sndcnt;
16602 log.u_bbr.use_lt_bw = rack->r_ent_rec_ns;
16604 log.u_bbr.use_lt_bw |= rack->r_might_revert;
16605 log.u_bbr.flex2 = rack->r_ctl.rc_num_maps_alloced;
16606 log.u_bbr.bbr_state = rack->rc_free_cnt;
16607 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
16608 log.u_bbr.pkts_out = rack->rc_tp->t_maxseg;
16609 log.u_bbr.flex3 = m->m_flags;
16610 log.u_bbr.flex4 = rack->r_ctl.rc_hpts_flags;
16617 if (m->m_flags & M_TSTMP) {
16623 } else if (m->m_flags & M_TSTMP_LRO) {
16632 log.u_bbr.delRate = m->m_pkthdr.rcv_tstmp;
16634 log.u_bbr.applimited = tp->t_tcpreq_closed;
16636 log.u_bbr.applimited |= tp->t_tcpreq_open;
16638 log.u_bbr.applimited |= tp->t_tcpreq_req;
16642 log.u_bbr.pkt_epoch = (tcp_req->localtime / HPTS_USEC_IN_SEC);
16644 log.u_bbr.delivered = (tcp_req->localtime % HPTS_USEC_IN_SEC);
16645 log.u_bbr.rttProp = tcp_req->timestamp;
16646 log.u_bbr.cur_del_rate = tcp_req->start;
16647 if (tcp_req->flags & TCP_TRK_TRACK_FLG_OPEN) {
16651 log.u_bbr.bw_inuse = tcp_req->end;
16653 log.u_bbr.flex6 = tcp_req->start_seq;
16654 if (tcp_req->flags & TCP_TRK_TRACK_FLG_COMP) {
16656 log.u_bbr.epoch = tcp_req->end_seq;
16660 TCP_LOG_EVENTP(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_IN, 0,
16665 rack->rc_ack_required = 0;
16674 * If a segment with the ACK-bit set arrives in the SYN-SENT state
16677 if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) &&
16678 (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) {
16693 if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS) &&
16701 * Segment received on connection. Reset idle time and keep-alive
16705 if (tp->t_idle_reduce &&
16706 (tp->snd_max == tp->snd_una) &&
16707 (TICKS_2_USEC(ticks - tp->t_rcvtime) >= tp->t_rxtcur)) {
16711 tp->t_rcvtime = ticks;
16713 stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
16715 if (tiwin > rack->r_ctl.rc_high_rwnd)
16716 rack->r_ctl.rc_high_rwnd = tiwin;
16722 tcp_packets_this_ack(tp, th->th_ack),
16724 rack_cong_signal(tp, CC_ECN, th->th_ack, __LINE__);
16732 to.to_tsecr -= tp->ts_offset;
16736 if ((rack->r_rcvpath_rtt_up == 1) &&
16738 (TSTMP_GEQ(to.to_tsecr, rack->r_ctl.last_rcv_tstmp_for_rtt))) {
16749 if (TSTMP_GT(cts, rack->r_ctl.last_time_of_arm_rcv))
16750 rtt = (cts - rack->r_ctl.last_time_of_arm_rcv);
16751 rack->r_rcvpath_rtt_up = 0;
16762 if (rack->r_state == 0) {
16764 KASSERT(rack->rc_inp != NULL,
16765 ("%s: rack->rc_inp unexpectedly NULL", __func__));
16766 if (rack->rc_inp == NULL) {
16767 rack->rc_inp = inp;
16777 if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
16781 (tp->t_flags & TF_REQ_SCALE)) {
16782 tp->t_flags |= TF_RCVD_SCALE;
16783 tp->snd_scale = to.to_wscale;
16785 tp->t_flags &= ~TF_REQ_SCALE;
16790 tp->snd_wnd = th->th_win;
16793 (tp->t_flags & TF_REQ_TSTMP)) {
16794 tp->t_flags |= TF_RCVD_TSTMP;
16795 tp->ts_recent = to.to_tsval;
16796 tp->ts_recent_age = cts;
16798 tp->t_flags &= ~TF_REQ_TSTMP;
16802 if ((tp->t_flags & TF_SACK_PERMIT) &&
16804 tp->t_flags &= ~TF_SACK_PERMIT;
16805 if (tp->t_flags & TF_FASTOPEN) {
16812 if ((inp->inp_vflag & INP_IPV6) != 0)
16825 * TF_SACK_PERMIT is set and the sack-not-required is clear.
16826 * The code now does do dup-ack counting so if you don't
16832 ((tp->t_flags & TF_SACK_PERMIT) == 0)) {
16834 (*tp->t_fb->tfb_tcp_do_segment)(tp, m, th, drop_hdrlen,
16842 sack_filter_clear(&rack->r_ctl.rack_sf, th->th_ack);
16846 us_cts = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time);
16847 if ((rack->rc_gp_dyn_mul) &&
16848 (rack->use_fixed_rate == 0) &&
16849 (rack->rc_always_pace)) {
16854 if ((rack->forced_ack) &&
16860 * always. All other times (timers etc) we must have a rack-state
16863 rack->r_ctl.rc_rcvtime = cts;
16864 if (rack->r_state != tp->t_state)
16866 if (SEQ_GT(th->th_ack, tp->snd_una) &&
16867 (rsm = tqhash_min(rack->r_ctl.tqh)) != NULL)
16869 prev_state = rack->r_state;
16871 ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
16872 SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
16873 (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq))) {
16875 tcp_trace_point(rack->rc_tp, TCP_TP_RESET_RCV);
16877 retval = (*rack->r_substate) (m, th, so,
16886 if ((rack->rc_gp_dyn_mul) &&
16887 (rack->rc_always_pace) &&
16888 (rack->use_fixed_rate == 0) &&
16889 rack->in_probe_rtt &&
16890 (rack->r_ctl.rc_time_probertt_starts == 0)) {
16897 if (rack->set_pacing_done_a_iw == 0) {
16899 if ((tp->snd_una - tp->iss) > (ctf_fixed_maxseg(tp) * 10)) {
16901 rack->set_pacing_done_a_iw = 1;
16912 * use of 0xf here since we only have 11 counter (0 - 0xa) and
16920 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
16921 tp->tcp_proc_time[ack_val_set] += (crtsc - ts_val);
16926 if ((rack->r_wanted_output != 0) ||
16927 (tp->t_flags & TF_ACKNOW) ||
16928 (rack->r_fast_output != 0)) {
16941 } else if ((nxt_pkt == 0) && (tp->t_flags & TF_ACKNOW)) {
16945 (tcp_in_hpts(rack->rc_tp) == 0)) {
16950 KASSERT ((slot_remaining != 0), ("slot remaining is zero for rack:%p tp:%p", rack, tp));
16955 if ((nxt_pkt == 0) && (tp->t_flags2 & TF2_HPTS_CALLS))
16956 tp->t_flags2 &= ~TF2_HPTS_CALLS;
16967 if (SEQ_GEQ(tp->snd_una, rack->r_ctl.roundends) &&
16968 (rack->rc_new_rnd_needed == 0) &&
16974 rack_new_round_setup(tp, rack, tp->snd_una);
16977 ((rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) == 0) &&
16978 (SEQ_GT(tp->snd_max, tp->snd_una) ||
16979 (tp->t_flags & TF_DELACK) ||
16980 ((V_tcp_always_keepalive || rack->rc_inp->inp_socket->so_options & SO_KEEPALIVE) &&
16981 (tp->t_state <= TCPS_CLOSING)))) {
16983 if ((tp->snd_max == tp->snd_una) &&
16984 ((tp->t_flags & TF_DELACK) == 0) &&
16985 (tcp_in_hpts(rack->rc_tp)) &&
16986 (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT)) {
16992 if (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) {
16994 if (TSTMP_GT(rack->r_ctl.rc_last_output_to, us_cts)) {
16995 rack->r_early = 1;
16996 rack->r_ctl.rc_agg_early += (rack->r_ctl.rc_last_output_to - us_cts);
16999 rack->r_ctl.rc_hpts_flags &= ~PACE_PKT_OUTPUT;
17016 rack_timer_audit(tp, rack, &so->so_snd);
17022 rack->r_wanted_output = 0;
17038 if (!STAILQ_EMPTY(&tp->t_inqueue)) {
17044 if (m->m_flags & M_TSTMP_LRO) {
17063 /* Return the next guy to be re-transmitted */
17064 if (tqhash_empty(rack->r_ctl.tqh)) {
17067 if (tp->t_flags & TF_SENTFIN) {
17072 rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
17073 if (rack->r_must_retran && rsm && (rsm->r_flags & RACK_MUST_RXT)) {
17076 if (rsm && ((rsm->r_flags & RACK_ACKED) == 0)) {
17084 if (((rack->rc_tp->t_flags & TF_SACK_PERMIT) == 0) &&
17085 (rsm->r_dupack >= DUP_ACK_THRESHOLD)) {
17092 if (rsm->r_flags & RACK_ACKED) {
17095 if (((rsm->r_flags & RACK_SACK_PASSED) == 0) &&
17096 (rsm->r_dupack < DUP_ACK_THRESHOLD)) {
17101 idx = rsm->r_rtr_cnt - 1;
17102 ts_low = (uint32_t)rsm->r_tim_lastsent[idx];
17109 if ((tsused - ts_low) < thresh) {
17113 if ((rsm->r_dupack >= DUP_ACK_THRESHOLD) ||
17114 ((rsm->r_flags & RACK_SACK_PASSED))) {
17116 * We have passed the dup-ack threshold <or>
17119 * it is only the dup-ack threshold that
17123 rack_log_retran_reason(rack, rsm, (tsused - ts_low), thresh, 1);
17124 rack->r_fast_output = 0;
17135 if (tcp_bblogging_on(rack->rc_tp)) {
17156 log.u_bbr.flex3 = rack->r_ctl.rc_pace_min_segs;
17157 log.u_bbr.flex4 = rack->r_ctl.rc_pace_max_segs;
17158 log.u_bbr.flex5 = rack->r_ctl.rack_per_of_gp_ss;
17159 log.u_bbr.flex6 = rack->r_ctl.rack_per_of_gp_ca;
17160 log.u_bbr.use_lt_bw = rack->rc_ack_can_sendout_data;
17162 log.u_bbr.use_lt_bw |= rack->r_late;
17164 log.u_bbr.use_lt_bw |= rack->r_early;
17166 log.u_bbr.use_lt_bw |= rack->app_limited_needs_set;
17168 log.u_bbr.use_lt_bw |= rack->rc_gp_filled;
17170 log.u_bbr.use_lt_bw |= rack->measure_saw_probe_rtt;
17172 log.u_bbr.use_lt_bw |= rack->in_probe_rtt;
17174 log.u_bbr.use_lt_bw |= rack->gp_ready;
17176 log.u_bbr.epoch = rack->r_ctl.rc_agg_delayed;
17177 log.u_bbr.lt_epoch = rack->r_ctl.rc_agg_early;
17178 log.u_bbr.applimited = rack->r_ctl.rack_per_of_gp_rec;
17181 if (rack->r_ctl.gp_bw == 0)
17186 log.u_bbr.pkts_out = rack->r_ctl.rc_rack_min_rtt;
17187 log.u_bbr.lost = rack->r_ctl.rc_probertt_sndmax_atexit;
17189 if (rack->r_ctl.cwnd_to_use < rack->rc_tp->snd_ssthresh) {
17198 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
17199 log.u_bbr.cwnd_gain = rack->rc_gp_saw_rec;
17201 log.u_bbr.cwnd_gain |= rack->rc_gp_saw_ss;
17203 log.u_bbr.cwnd_gain |= rack->rc_gp_saw_ca;
17205 log.u_bbr.bbr_state = rack->dgp_on;
17207 log.u_bbr.bbr_state |= rack->rc_pace_to_cwnd;
17209 TCP_LOG_EVENTP(rack->rc_tp, NULL,
17210 &rack->rc_inp->inp_socket->so_rcv,
17211 &rack->rc_inp->inp_socket->so_snd,
17222 user_max = rack->rc_user_set_max_segs * mss;
17223 if (rack->rc_force_max_seg) {
17226 if (rack->use_fixed_rate &&
17227 ((rack->r_ctl.crte == NULL) ||
17228 (bw != rack->r_ctl.crte->rate))) {
17233 (rack->r_ctl.rc_user_set_min_segs == 1))
17238 new_tso = tcp_get_pacing_burst_size_w_divisor(rack->rc_tp, bw, mss,
17239 pace_one, rack->r_ctl.crte, NULL, rack->r_ctl.pace_len_divisor);
17242 if (rack->rc_hybrid_mode && rack->r_ctl.client_suggested_maxseg) {
17243 if (((uint32_t)rack->r_ctl.client_suggested_maxseg * mss) > new_tso)
17244 new_tso = (uint32_t)rack->r_ctl.client_suggested_maxseg * mss;
17246 if (rack->r_ctl.rc_user_set_min_segs &&
17247 ((rack->r_ctl.rc_user_set_min_segs * mss) > new_tso))
17248 new_tso = rack->r_ctl.rc_user_set_min_segs * mss;
17256 uint32_t gain;
17261 * nearly zero, maybe because of a time-out?
17262 * Lets drop back to the lt-bw.
17267 gain = 100;
17268 } else if (IN_RECOVERY(rack->rc_tp->t_flags)) {
17273 if (rack->rack_hibeta == 0) {
17275 gain = 50;
17277 reduced_win = window_input * rack->r_ctl.saved_hibeta;
17279 gain = rack->r_ctl.saved_hibeta;
17286 gain = rack_get_output_gain(rack, NULL);
17287 if (gain > rack_gain_p5_ub) {
17288 gain = rack_gain_p5_ub;
17290 reduced_win = window_input * gain;
17294 *gain_b = gain;
17311 rack->r_via_fill_cw = 0;
17312 if (ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked) > rack->r_ctl.cwnd_to_use)
17314 if ((ctf_outstanding(rack->rc_tp) + (segsiz-1)) > rack->rc_tp->snd_wnd)
17316 if (rack->r_ctl.rc_last_us_rtt == 0)
17318 if (rack->rc_pace_fill_if_rttin_range &&
17319 (rack->r_ctl.rc_last_us_rtt >=
17320 (get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt) * rack->rtt_limit_mul))) {
17324 if (rack->r_ctl.fillcw_cap && *rate_wanted >= rack->r_ctl.fillcw_cap)
17327 * first lets calculate the b/w based on the last us-rtt
17330 fill_bw = min(rack->rc_tp->snd_cwnd, rack->r_ctl.cwnd_to_use);
17331 if (rack->rc_fillcw_apply_discount) {
17340 if (fill_bw > rack->rc_tp->snd_wnd)
17341 fill_bw = rack->rc_tp->snd_wnd;
17344 fill_bw /= (uint64_t)rack->r_ctl.rc_last_us_rtt;
17346 if (rack->r_ctl.fillcw_cap && fill_bw >= rack->r_ctl.fillcw_cap)
17347 fill_bw = rack->r_ctl.fillcw_cap;
17352 * We want to limit fill-cw to the some multiplier
17366 if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
17379 tcp_log_event(rack->rc_tp, NULL, NULL, NULL,
17392 rack->r_via_fill_cw = 1;
17393 if (rack->r_rack_hw_rate_caps &&
17394 (rack->r_ctl.crte != NULL)) {
17397 high_rate = tcp_hw_highest_rate(rack->r_ctl.crte);
17402 rack->r_via_fill_cw = 0;
17411 } else if ((rack->r_ctl.crte == NULL) &&
17412 (rack->rack_hdrw_pacing == 0) &&
17413 (rack->rack_hdw_pace_ena) &&
17414 rack->r_rack_hw_rate_caps &&
17415 (rack->rack_attempt_hdwr_pace == 0) &&
17416 (rack->rc_inp->inp_route.ro_nh != NULL) &&
17417 (rack->rc_inp->inp_route.ro_nh->nh_ifp != NULL)) {
17424 high_rate = tcp_hw_highest_rate_ifp(rack->rc_inp->inp_route.ro_nh->nh_ifp, rack->rc_inp);
17433 if (rack->r_ctl.bw_rate_cap && (fill_bw > rack->r_ctl.bw_rate_cap)) {
17434 rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
17436 fill_bw = rack->r_ctl.bw_rate_cap;
17464 (rack->r_ctl.rc_user_set_min_segs == 1))
17468 if (rack->rc_always_pace == 0) {
17484 if (rack->r_ctl.rc_rack_min_rtt)
17485 srtt = rack->r_ctl.rc_rack_min_rtt;
17487 srtt = max(tp->t_srtt, 1);
17488 if (rack->r_ctl.rc_rack_largest_cwnd)
17489 cwnd = rack->r_ctl.rc_rack_largest_cwnd;
17491 cwnd = rack->r_ctl.cwnd_to_use;
17511 slot -= reduce;
17517 if (rack->rc_pace_to_cwnd) {
17521 rack->rc_ack_can_sendout_data = 1;
17526 /* RRS: We insert non-paced call to stats here for len */
17534 if ((rack->r_rr_config == 1) && rsm) {
17535 return (rack->r_ctl.rc_min_to);
17537 if (rack->use_fixed_rate) {
17539 } else if ((rack->r_ctl.init_rate == 0) &&
17540 (rack->r_ctl.gp_bw == 0)) {
17543 } else if (rack->dgp_on) {
17547 uint32_t gain, rate_set = 0;
17549 rate_wanted = min(rack->rc_tp->snd_cwnd, rack->r_ctl.cwnd_to_use);
17550 rate_wanted = rack_arrive_at_discounted_rate(rack, rate_wanted, &rate_set, &gain);
17552 if (rate_wanted > rack->rc_tp->snd_wnd)
17553 rate_wanted = rack->rc_tp->snd_wnd;
17556 rate_wanted /= (uint64_t)rack->r_ctl.rc_last_us_rtt;
17559 rack_log_pacing_delay_calc(rack, rack->rc_tp->snd_cwnd,
17560 rack->r_ctl.cwnd_to_use,
17562 rack->r_ctl.rc_last_us_rtt,
17563 88, __LINE__, NULL, gain);
17566 ((rack->gp_ready == 0) && (rack->use_fixed_rate == 0))) {
17575 segs = (len + segsiz - 1) / segsiz;
17577 * We need the diff between 1514 bytes (e-mtu with e-hdr)
17583 oh = (tp->t_maxseg - segsiz) + sizeof(struct tcphdr);
17584 if (rack->r_is_v6) {
17602 if (rack->r_ctl.crte) {
17607 tcp_rel_pacing_rate(rack->r_ctl.crte, rack->rc_tp);
17608 rack->r_ctl.crte = NULL;
17609 rack->rack_attempt_hdwr_pace = 0;
17610 rack->rack_hdrw_pacing = 0;
17613 if (rack->r_ctl.crte &&
17614 (tcp_hw_highest_rate(rack->r_ctl.crte) < rate_wanted)) {
17620 if (rack->r_rack_hw_rate_caps == 0) {
17627 tcp_rel_pacing_rate(rack->r_ctl.crte, rack->rc_tp);
17628 rack->r_ctl.crte = NULL;
17629 rack->rack_attempt_hdwr_pace = 0;
17630 rack->rack_hdrw_pacing = 0;
17633 if ((rack->r_ctl.crte != NULL) && (rack->rc_inp->inp_snd_tag == NULL)) {
17638 tcp_rel_pacing_rate(rack->r_ctl.crte, rack->rc_tp);
17639 rack->r_ctl.crte = NULL;
17640 /* Lets re-allow attempting to setup pacing */
17641 rack->rack_hdrw_pacing = 0;
17642 rack->rack_attempt_hdwr_pace = 0;
17647 prev_fill = rack->r_via_fill_cw;
17648 if ((rack->rc_pace_to_cwnd) &&
17650 (rack->dgp_on == 1) &&
17651 (rack->use_fixed_rate == 0) &&
17652 (rack->in_probe_rtt == 0) &&
17653 (IN_FASTRECOVERY(rack->rc_tp->t_flags) == 0)) {
17659 /* Re-check to make sure we are not exceeding our max b/w */
17660 if ((rack->r_ctl.crte != NULL) &&
17661 (tcp_hw_highest_rate(rack->r_ctl.crte) < rate_wanted)) {
17667 if (rack->r_rack_hw_rate_caps == 0) {
17674 tcp_rel_pacing_rate(rack->r_ctl.crte, rack->rc_tp);
17675 rack->r_ctl.crte = NULL;
17676 rack->rack_attempt_hdwr_pace = 0;
17677 rack->rack_hdrw_pacing = 0;
17678 rack_set_pace_segments(rack->rc_tp, rack, __LINE__, NULL);
17682 if ((rack->rc_inp->inp_route.ro_nh != NULL) &&
17683 (rack->rc_inp->inp_route.ro_nh->nh_ifp != NULL)) {
17684 if ((rack->rack_hdw_pace_ena) &&
17686 (rack->rack_hdrw_pacing == 0) &&
17687 (rack->rack_attempt_hdwr_pace == 0)) {
17692 rack->rack_attempt_hdwr_pace = 1;
17693 rack->r_ctl.crte = tcp_set_pacing_rate(rack->rc_tp,
17694 rack->rc_inp->inp_route.ro_nh->nh_ifp,
17697 &err, &rack->r_ctl.crte_prev_rate);
17698 if (rack->r_ctl.crte) {
17699 rack->rack_hdrw_pacing = 1;
17700 rack->r_ctl.rc_pace_max_segs = tcp_get_pacing_burst_size_w_divisor(tp, rate_wanted, segsiz,
17701 pace_one, rack->r_ctl.crte,
17702 NULL, rack->r_ctl.pace_len_divisor);
17704 rate_wanted, rack->r_ctl.crte->rate, __LINE__,
17706 rack->r_ctl.last_hw_bw_req = rate_wanted;
17710 } else if (rack->rack_hdrw_pacing &&
17711 (rack->r_ctl.last_hw_bw_req != rate_wanted)) {
17715 if (rack->r_up_only &&
17716 (rate_wanted < rack->r_ctl.crte->rate)) {
17721 * previous | this-time
17722 * A) 0 | 0 -- fill_cw not in the picture
17723 * B) 1 | 0 -- we were doing a fill-cw but now are not
17724 * C) 1 | 1 -- all rates from fill_cw
17725 * D) 0 | 1 -- we were doing non-fill and now we are filling
17732 if (!((prev_fill == 1) && (rack->r_via_fill_cw == 0)))
17735 if ((rate_wanted > rack->r_ctl.crte->rate) ||
17736 (rate_wanted <= rack->r_ctl.crte_prev_rate)) {
17744 bw_est, rack->r_ctl.crte->rate, __LINE__,
17746 tcp_rel_pacing_rate(rack->r_ctl.crte, rack->rc_tp);
17747 rack->r_ctl.crte = NULL;
17748 rack->rack_attempt_hdwr_pace = 0;
17749 rack->rack_hdrw_pacing = 0;
17750 rack_set_pace_segments(rack->rc_tp, rack, __LINE__, &rate_wanted);
17753 nrte = tcp_chg_pacing_rate(rack->r_ctl.crte,
17754 rack->rc_tp,
17755 rack->rc_inp->inp_route.ro_nh->nh_ifp,
17758 &err, &rack->r_ctl.crte_prev_rate);
17764 rack->rack_hdrw_pacing = 0;
17765 rack->r_ctl.crte = NULL;
17769 rack_set_pace_segments(rack->rc_tp, rack, __LINE__, &rate_wanted);
17771 } else if (nrte != rack->r_ctl.crte) {
17772 rack->r_ctl.crte = nrte;
17773 rack->r_ctl.rc_pace_max_segs = tcp_get_pacing_burst_size_w_divisor(tp, rate_wanted,
17774 segsiz, pace_one, rack->r_ctl.crte,
17775 NULL, rack->r_ctl.pace_len_divisor);
17777 rate_wanted, rack->r_ctl.crte->rate, __LINE__,
17779 rack->r_ctl.last_hw_bw_req = rate_wanted;
17783 rack_set_pace_segments(rack->rc_tp, rack, __LINE__, &rate_wanted);
17785 rate_wanted, rack->r_ctl.crte->rate, __LINE__,
17787 rack->r_ctl.last_hw_bw_req = rate_wanted;
17793 (rack->use_fixed_rate == 0) &&
17794 (rack->rack_hdrw_pacing == 0)) {
17805 if (rack->rc_tp->t_srtt)
17806 srtt = rack->rc_tp->t_srtt;
17819 if (rack->r_ctl.crte && (rack->r_ctl.crte->rs_num_enobufs > 0)) {
17823 * of gas or we are mis-estimating the time
17829 hw_boost_delay = rack->r_ctl.crte->time_between * rack_enobuf_hw_boost_mult;
17845 if (tp->t_state < TCPS_ESTABLISHED) {
17852 if (tp->t_state >= TCPS_FIN_WAIT_1) {
17859 if (sbavail(&tptosocket(tp)->so_snd) <
17866 tp->t_flags |= TF_GPUTINPROG;
17867 rack->r_ctl.rc_gp_cumack_ts = 0;
17868 rack->r_ctl.rc_gp_lowrtt = 0xffffffff;
17869 rack->r_ctl.rc_gp_high_rwnd = rack->rc_tp->snd_wnd;
17870 tp->gput_seq = startseq;
17871 rack->app_limited_needs_set = 0;
17872 if (rack->in_probe_rtt)
17873 rack->measure_saw_probe_rtt = 1;
17874 else if ((rack->measure_saw_probe_rtt) &&
17875 (SEQ_GEQ(tp->gput_seq, rack->r_ctl.rc_probertt_sndmax_atexit)))
17876 rack->measure_saw_probe_rtt = 0;
17877 if (rack->rc_gp_filled)
17878 tp->gput_ts = rack->r_ctl.last_cumack_advance;
17883 tp->gput_ts = tcp_get_usecs(&tv);
17884 rack->r_ctl.rc_gp_output_ts = rack_to_usec_ts(&tv);
17890 * initial-windows worth of data to
17894 if ((rack->rc_gp_filled == 0) && (rack->r_ctl.init_rate == 0)) {
17895 rack->app_limited_needs_set = 1;
17896 tp->gput_ack = startseq + max(rc_init_window(rack),
17899 tp->gput_seq,
17900 tp->gput_ack,
17902 tp->gput_ts,
17903 (((uint64_t)rack->r_ctl.rc_app_limited_cnt << 32) | (uint64_t)rack->r_ctl.rc_gp_output_ts),
17907 rack_log_gpset(rack, tp->gput_ack, 0, 0, __LINE__, 1, NULL);
17916 if (rack->r_ctl.rc_app_limited_cnt == 0) {
17919 * the tp->gput_ts is correctly set based on
17923 my_rsm = tqhash_min(rack->r_ctl.tqh);
17925 (my_rsm->r_rtr_cnt != 1)) {
17930 if (rack->r_ctl.rc_first_appl == NULL) {
17945 rack->app_limited_needs_set = 1;
17949 * after that (after the app-limited).
17951 my_rsm = tqhash_next(rack->r_ctl.tqh, rack->r_ctl.rc_first_appl);
17953 if ((my_rsm->r_end - my_rsm->r_start) <= ctf_fixed_maxseg(tp))
17955 my_rsm = tqhash_next(rack->r_ctl.tqh, my_rsm);
17958 tp->gput_seq = my_rsm->r_start + ctf_fixed_maxseg(tp);
17963 (my_rsm->r_rtr_cnt != 1)) {
17966 * the last is the app-limited one.
17971 tp->gput_seq = my_rsm->r_start;
17973 if (my_rsm->r_flags & RACK_ACKED) {
17979 tp->gput_ts = (uint32_t)my_rsm->r_ack_arrival;
17980 rack->app_limited_needs_set = 0;
17985 tp->gput_seq = my_rsm->r_end;
17990 nrsm = tqhash_next(rack->r_ctl.tqh, my_rsm);
18001 rack->r_ctl.rc_gp_output_ts = my_rsm->r_tim_lastsent[0];
18002 tp->gput_ack = tp->gput_seq + rack_get_measure_window(tp, rack);
18003 rack->r_ctl.rc_gp_cumack_ts = 0;
18004 if ((rack->r_ctl.cleared_app_ack == 1) &&
18005 (SEQ_GEQ(rack->r_ctl.cleared_app_ack, tp->gput_seq))) {
18011 rack->app_limited_needs_set = 1;
18012 rack->r_ctl.cleared_app_ack = 0;
18015 tp->gput_seq,
18016 tp->gput_ack,
18018 tp->gput_ts,
18019 (((uint64_t)rack->r_ctl.rc_app_limited_cnt << 32) | (uint64_t)rack->r_ctl.rc_gp_output_ts),
18024 rack_log_gpset(rack, tp->gput_ack, 0, 0, __LINE__, 1, NULL);
18031 * idle or if this is the first-send. Lets
18036 rack->app_limited_needs_set = 1;
18037 tp->gput_ack = startseq + rack_get_measure_window(tp, rack);
18038 rack->r_ctl.rc_gp_cumack_ts = 0;
18040 my_rsm = tqhash_find(rack->r_ctl.tqh, startseq);
18042 rack->r_ctl.rc_gp_output_ts = my_rsm->r_tim_lastsent[0];
18043 if (my_rsm->r_flags & RACK_ACKED) {
18048 tp->gput_ts = (uint32_t)my_rsm->r_ack_arrival;
18049 rack->app_limited_needs_set = 0;
18051 if (SEQ_LT(my_rsm->r_start, tp->gput_seq)) {
18053 tp->gput_seq = my_rsm->r_start;
18057 * TSNH unless we have some send-map limit,
18064 rack->r_ctl.rc_gp_output_ts = rack_to_usec_ts(&tv);
18068 tp->gput_seq,
18069 tp->gput_ack,
18071 tp->gput_ts,
18072 (((uint64_t)rack->r_ctl.rc_app_limited_cnt << 32) | (uint64_t)rack->r_ctl.rc_gp_output_ts),
18074 rack_log_gpset(rack, tp->gput_ack, 0, 0, __LINE__, 1, NULL);
18084 if (tp->snd_wnd > cwnd_to_use)
18087 sendwin = tp->snd_wnd;
18088 if (ctf_outstanding(tp) >= tp->snd_wnd) {
18089 /* We never want to go over our peers rcv-window */
18094 flight = ctf_flight_size(tp, rack->r_ctl.rc_sacked);
18099 * >= tp->snd_wnd).
18103 len = sendwin - flight;
18104 if ((len + ctf_outstanding(tp)) > tp->snd_wnd) {
18106 len = tp->snd_wnd - ctf_outstanding(tp);
18113 len = avail - sb_offset;
18124 if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
18129 log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
18134 log.u_bbr.flex5 = tp->rcv_numsacks;
18135 log.u_bbr.flex6 = rack->r_ctl.rc_agg_early;
18137 log.u_bbr.flex8 = rack->r_fsb_inited;
18138 log.u_bbr.applimited = rack->r_fast_output;
18146 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
18147 tcp_log_event(tp, NULL, &so->so_rcv, &so->so_snd, TCP_LOG_FSB, 0,
18177 if (hw_tls && (m->m_flags & M_EXTPG))
18178 tls = m->m_epg_tls;
18192 if (m->m_flags & M_EXTPG)
18193 ntls = m->m_epg_tls;
18209 mlen = min(len, m->m_len - off);
18219 if (m->m_flags & M_EXTPG) {
18240 mlen = (seglimit - frags - 1) * fragsize;
18247 seglimit -= frags;
18251 n = m_get(M_NOWAIT, m->m_type);
18255 n->m_len = mlen;
18257 len_cp += n->m_len;
18258 if (m->m_flags & (M_EXT | M_EXTPG)) {
18259 n->m_data = m->m_data + off;
18263 (u_int)n->m_len);
18265 len -= n->m_len;
18267 m = m->m_next;
18268 np = &n->m_next;
18269 if (len || (soff == smb->m_len)) {
18281 fsb->m = smb;
18282 fsb->off = soff;
18290 fsb->o_m_len = smb->m_len;
18291 fsb->o_t_len = M_TRAILINGROOM(smb);
18301 fsb->o_m_len = 0;
18302 fsb->o_t_len = 0;
18324 m = rack->r_ctl.fsb.m;
18325 if (M_TRAILINGROOM(m) != rack->r_ctl.fsb.o_t_len) {
18332 KASSERT((rack->r_ctl.fsb.o_t_len > M_TRAILINGROOM(m)),
18333 ("mbuf:%p rack:%p trailing_space:%jd ots:%u oml:%u mlen:%u\n",
18337 rack->r_ctl.fsb.o_t_len,
18338 rack->r_ctl.fsb.o_m_len,
18339 m->m_len));
18340 rack->r_ctl.fsb.o_m_len += (rack->r_ctl.fsb.o_t_len - M_TRAILINGROOM(m));
18341 rack->r_ctl.fsb.o_t_len = M_TRAILINGROOM(m);
18343 if (m->m_len < rack->r_ctl.fsb.o_m_len) {
18348 KASSERT((rack->r_ctl.fsb.off >= (rack->r_ctl.fsb.o_m_len - m->m_len)),
18349 ("mbuf:%p len:%u rack:%p oml:%u soff:%u\n",
18350 m, m->m_len,
18351 rack, rack->r_ctl.fsb.o_m_len,
18352 rack->r_ctl.fsb.off));
18354 if (rack->r_ctl.fsb.off >= (rack->r_ctl.fsb.o_m_len- m->m_len))
18355 rack->r_ctl.fsb.off -= (rack->r_ctl.fsb.o_m_len - m->m_len);
18357 rack->r_ctl.fsb.off = 0;
18358 rack->r_ctl.fsb.o_m_len = m->m_len;
18360 } else if (m->m_len > rack->r_ctl.fsb.o_m_len) {
18361 panic("rack:%p m:%p m_len grew outside of t_space compensation",
18365 soff = rack->r_ctl.fsb.off;
18368 KASSERT(soff < m->m_len, ("%s rack:%p len:%u m:%p m->m_len:%u < off?",
18370 rack, *plen, m, m->m_len));
18373 *s_mb = rack->r_ctl.fsb.m;
18375 &rack->r_ctl.fsb,
18376 seglimit, segsize, rack->r_ctl.fsb.hw_tls);
18390 err = in_pcbquery_txrlevel(rack->rc_inp, &p_queue);
18391 err = in_pcbquery_txrtlmt(rack->rc_inp, &p_rate);
18394 log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
18397 log.u_bbr.flex4 = (uint32_t)rack->r_ctl.crte->using;
18398 log.u_bbr.flex5 = (uint32_t)rack->r_ctl.crte->rs_num_enobufs;
18399 log.u_bbr.flex6 = rack->r_ctl.crte->time_between;
18403 log.u_bbr.delRate = rack->r_ctl.crte->rate;
18405 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
18421 err = in_pcbquery_txrlevel(rack->rc_inp, &p_queue);
18427 err = in_pcbquery_txrtlmt(rack->rc_inp, &p_rate);
18448 lentime = (rack->r_ctl.rc_pace_max_segs / segsiz);
18453 /* TSNH -- KASSERT? */
18459 log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
18462 log.u_bbr.flex4 = (uint32_t)rack->r_ctl.crte->using;
18463 log.u_bbr.flex5 = (uint32_t)rack->r_ctl.crte->rs_num_enobufs;
18464 log.u_bbr.flex6 = rack->r_ctl.crte->time_between;
18468 log.u_bbr.delRate = rack->r_ctl.crte->rate;
18471 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
18514 if (rack->r_is_v6) {
18515 ip6 = (struct ip6_hdr *)rack->r_ctl.fsb.tcp_ip_hdr;
18520 ip = (struct ip *)rack->r_ctl.fsb.tcp_ip_hdr;
18523 if (tp->t_port && (V_tcp_udp_tunneling_port == 0)) {
18528 rsm->r_flags |= RACK_TLP;
18531 rsm->r_flags &= ~RACK_TLP;
18533 startseq = rsm->r_start;
18534 segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
18535 inp = rack->rc_inp;
18537 flags = tcp_outflags[tp->t_state];
18541 if (rsm->r_flags & RACK_HAS_FIN) {
18549 if (tp->t_flags & TF_RCVD_TSTMP) {
18550 to.to_tsval = ms_cts + tp->ts_offset;
18551 to.to_tsecr = tp->ts_recent;
18555 /* TCP-MD5 (RFC2385). */
18556 if (tp->t_flags & TF_SIGNATURE)
18561 udp = rack->r_ctl.fsb.udp;
18564 if (rack->r_ctl.rc_pace_max_segs)
18565 max_val = rack->r_ctl.rc_pace_max_segs;
18566 else if (rack->rc_user_set_max_segs)
18567 max_val = rack->rc_user_set_max_segs * segsiz;
18570 if ((tp->t_flags & TF_TSO) &&
18573 (tp->t_port == 0))
18583 m->m_data += max_linkhdr;
18584 m->m_len = hdrlen;
18585 th = rack->r_ctl.fsb.th;
18594 if_hw_tsomax = tp->t_tsomax;
18595 if_hw_tsomaxsegcount = tp->t_tsomaxsegcount;
18596 if_hw_tsomaxsegsize = tp->t_tsomaxsegsize;
18603 max_len = (if_hw_tsomax - hdrlen -
18625 (len <= MHLEN - hdrlen - max_linkhdr)) {
18628 th->th_seq = htonl(rsm->r_start);
18629 th->th_ack = htonl(tp->rcv_nxt);
18637 if ((rsm->r_flags & RACK_HAD_PUSH) &&
18638 (len == (rsm->r_end - rsm->r_start)))
18640 th->th_win = htons((u_short)(rack->r_ctl.fsb.recwin >> tp->rcv_scale));
18641 if (th->th_win == 0) {
18642 tp->t_sndzerowin++;
18643 tp->t_flags |= TF_RXWIN0SENT;
18645 tp->t_flags &= ~TF_RXWIN0SENT;
18646 if (rsm->r_flags & RACK_TLP) {
18654 tp->t_sndrexmitpack++;
18659 stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
18662 if (rsm->m == NULL)
18664 if (rsm->m &&
18665 ((rsm->orig_m_len != rsm->m->m_len) ||
18666 (M_TRAILINGROOM(rsm->m) != rsm->orig_t_space))) {
18670 m->m_next = rack_fo_base_copym(rsm->m, rsm->soff, &len, NULL, if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, rsm->r_hw_tls);
18680 if ((m->m_next == NULL) || (len <= 0)){
18684 if (rack->r_is_v6)
18685 ulen = hdrlen + len - sizeof(struct ip6_hdr);
18687 ulen = hdrlen + len - sizeof(struct ip);
18688 udp->uh_ulen = htons(ulen);
18690 m->m_pkthdr.rcvif = (struct ifnet *)0;
18691 if (TCPS_HAVERCVDSYN(tp->t_state) &&
18692 (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
18694 if ((tp->t_state == TCPS_SYN_RECEIVED) &&
18695 (tp->t_flags2 & TF2_ECN_SND_ECE))
18696 tp->t_flags2 &= ~TF2_ECN_SND_ECE;
18698 if (rack->r_is_v6) {
18699 ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
18700 ip6->ip6_flow |= htonl(ect << 20);
18705 ip->ip_tos &= ~IPTOS_ECN_MASK;
18706 ip->ip_tos |= ect;
18709 if (rack->r_ctl.crte != NULL) {
18717 m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
18727 (u_char *)(th + 1) + (to.to_signature - opt)) != 0) {
18737 if (rack->r_is_v6) {
18738 if (tp->t_port) {
18739 m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
18740 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
18741 udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0);
18742 th->th_sum = htons(0);
18745 m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
18746 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
18747 th->th_sum = in6_cksum_pseudo(ip6,
18758 if (tp->t_port) {
18759 m->m_pkthdr.csum_flags = CSUM_UDP;
18760 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
18761 udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
18762 ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
18763 th->th_sum = htons(0);
18766 m->m_pkthdr.csum_flags = CSUM_TCP;
18767 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
18768 th->th_sum = in_pseudo(ip->ip_src.s_addr,
18769 ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) +
18773 KASSERT(ip->ip_v == IPVERSION,
18774 ("%s: IP version incorrect: %d", __func__, ip->ip_v));
18781 * via either fast-path).
18784 ("%s: len <= tso_segsz tp:%p", __func__, tp));
18785 m->m_pkthdr.csum_flags |= CSUM_TSO;
18786 m->m_pkthdr.tso_segsz = segsiz;
18789 if (rack->r_is_v6) {
18790 ip6->ip6_hlim = rack->r_ctl.fsb.hoplimit;
18791 ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
18792 if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss)
18793 tp->t_flags2 |= TF2_PLPMTU_PMTUD;
18795 tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
18803 ip->ip_len = htons(m->m_pkthdr.len);
18804 ip->ip_ttl = rack->r_ctl.fsb.hoplimit;
18805 if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) {
18806 tp->t_flags2 |= TF2_PLPMTU_PMTUD;
18807 if (tp->t_port == 0 || len < V_tcp_minmss) {
18808 ip->ip_off |= htons(IP_DF);
18811 tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
18817 rack->rc_gp_saw_rec = 1;
18820 if (tp->snd_cwnd > tp->snd_ssthresh) {
18822 rack->rc_gp_saw_ca = 1;
18825 rack->rc_gp_saw_ss = 1;
18830 memcpy(cpto, rack->r_ctl.fsb.tcp_ip_hdr, rack->r_ctl.fsb.tcp_ip_hdr_len);
18831 th = (struct tcphdr *)(cpto + ((uint8_t *)rack->r_ctl.fsb.th - rack->r_ctl.fsb.tcp_ip_hdr));
18834 th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
18836 th->th_off = sizeof(struct tcphdr) >> 2;
18838 if (tcp_bblogging_on(rack->rc_tp)) {
18841 if (rsm->r_flags & RACK_RWND_COLLAPSED) {
18842 rack_log_collapse(rack, rsm->r_start, rsm->r_end, 0, __LINE__, 5, rsm->r_flags, rsm);
18844 counter_u64_add(rack_collapsed_win_rxt_bytes, (rsm->r_end - rsm->r_start));
18847 log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
18848 if (rack->rack_no_prr)
18851 log.u_bbr.flex1 = rack->r_ctl.rc_prr_sndcnt;
18852 log.u_bbr.flex2 = rack->r_ctl.rc_pace_min_segs;
18853 log.u_bbr.flex3 = rack->r_ctl.rc_pace_max_segs;
18856 log.u_bbr.flex6 = rack->r_ctl.rc_agg_early;
18857 log.u_bbr.applimited = rack->r_ctl.rc_agg_delayed;
18859 log.u_bbr.cur_del_rate = rack->r_ctl.gp_bw;
18866 log.u_bbr.pkts_out = tp->t_maxseg;
18868 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
18869 if (rsm->r_rtr_cnt > 0) {
18874 log.u_bbr.flex5 = rsm->r_fas;
18875 log.u_bbr.bbr_substate = rsm->r_bas;
18882 log.u_bbr.bbr_substate = (uint8_t)((len + segsiz - 1)/segsiz);
18884 log.u_bbr.lt_epoch = rack->r_ctl.cwnd_to_use;
18887 log.u_bbr.delRate = rsm->r_flags;
18889 log.u_bbr.delRate |= rack->r_must_retran;
18897 if ((rack->r_ctl.crte != NULL) &&
18902 if (rack->r_is_v6) {
18903 error = ip6_output(m, inp->in6p_outputopts,
18904 &inp->inp_route6,
18912 &inp->inp_route,
18918 lgb->tlb_errno = error;
18922 tp->snd_nxt = tp->snd_max;
18925 } else if (rack->rc_hw_nobuf && (ip_sendflag != IP_NO_SND_TAG_RL)) {
18926 rack->rc_hw_nobuf = 0;
18927 rack->r_ctl.rc_agg_delayed = 0;
18928 rack->r_early = 0;
18929 rack->r_late = 0;
18930 rack->r_ctl.rc_agg_early = 0;
18932 rack_log_output(tp, &to, len, rsm->r_start, flags, error, rack_to_usec_ts(tv),
18933 rsm, RACK_SENT_FP, rsm->m, rsm->soff, rsm->r_hw_tls, segsiz);
18935 rack->rc_tlp_in_progress = 1;
18936 rack->r_ctl.rc_tlp_cnt_out++;
18940 tcp_account_for_send(tp, len, 1, doing_tlp, rsm->r_hw_tls);
18942 rack->rc_last_sent_tlp_past_cumack = 0;
18943 rack->rc_last_sent_tlp_seq_valid = 1;
18944 rack->r_ctl.last_sent_tlp_seq = rsm->r_start;
18945 rack->r_ctl.last_sent_tlp_len = rsm->r_end - rsm->r_start;
18947 if (rack->r_ctl.rc_prr_sndcnt >= len)
18948 rack->r_ctl.rc_prr_sndcnt -= len;
18950 rack->r_ctl.rc_prr_sndcnt = 0;
18952 tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
18953 rack->forced_ack = 0; /* If we send something zap the FA flag */
18954 if (IN_FASTRECOVERY(tp->t_flags) && rsm)
18955 rack->r_ctl.retran_during_recovery += len;
18961 counter_u64_add(rack_out_size[(TCP_MSS_ACCT_ATIMER-1)], 1);
18965 if (tp->t_rtttime == 0) {
18966 tp->t_rtttime = ticks;
18967 tp->t_rtseq = startseq;
18972 if (rack->r_ctl.crte != NULL) {
18973 tcp_trace_point(rack->rc_tp, TCP_TP_HWENOBUF);
18974 if (tcp_bblogging_on(rack->rc_tp))
18977 tcp_trace_point(rack->rc_tp, TCP_TP_ENOBUF);
18978 slot = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC);
18979 if (rack->rc_enobuf < 0x7f)
18980 rack->rc_enobuf++;
18983 if (rack->r_ctl.crte != NULL) {
18985 tcp_rl_log_enobuf(rack->r_ctl.crte);
18994 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
18995 tp->tcp_cnt_counters[SND_OUT_DATA] += cnt_thru;
18997 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
18998 tp->tcp_proc_time[SND_OUT_DATA] += (crtsc - ts_val);
19000 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
19001 tp->tcp_cnt_counters[CNT_OF_MSS_OUT] += ((len + segsiz - 1) / segsiz);
19009 return (-1);
19020 * delay (eg. trans-continental/oceanic links). Setting the
19042 tp = rack->rc_tp;
19043 so = rack->rc_inp->inp_socket;
19044 sendwin = min(rack->r_ctl.cwnd_to_use, tp->snd_wnd);
19045 if (V_tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) {
19046 if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat &&
19047 sbused(&so->so_snd) >=
19048 (so->so_snd.sb_hiwat / 8 * 7) &&
19049 sbused(&so->so_snd) < V_tcp_autosndbuf_max &&
19050 sendwin >= (sbused(&so->so_snd) -
19051 (tp->snd_max - tp->snd_una))) {
19053 scaleup = (rack_autosndbuf_inc * so->so_snd.sb_hiwat) / 100;
19058 scaleup += so->so_snd.sb_hiwat;
19062 so->so_snd.sb_flags &= ~SB_AUTOSIZE;
19077 * the max-burst). We have how much to send and all the info we
19107 if (rack->r_is_v6) {
19108 ip6 = (struct ip6_hdr *)rack->r_ctl.fsb.tcp_ip_hdr;
19114 ip = (struct ip *)rack->r_ctl.fsb.tcp_ip_hdr;
19118 if (tp->t_port && (V_tcp_udp_tunneling_port == 0)) {
19122 rack->r_ctl.cwnd_to_use = tp->snd_cwnd;
19123 startseq = tp->snd_max;
19124 segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
19125 inp = rack->rc_inp;
19126 len = rack->r_ctl.fsb.left_to_send;
19128 flags = rack->r_ctl.fsb.tcp_flags;
19129 if (tp->t_flags & TF_RCVD_TSTMP) {
19130 to.to_tsval = ms_cts + tp->ts_offset;
19131 to.to_tsecr = tp->ts_recent;
19135 /* TCP-MD5 (RFC2385). */
19136 if (tp->t_flags & TF_SIGNATURE)
19141 udp = rack->r_ctl.fsb.udp;
19144 if (rack->r_ctl.rc_pace_max_segs)
19145 max_val = rack->r_ctl.rc_pace_max_segs;
19146 else if (rack->rc_user_set_max_segs)
19147 max_val = rack->rc_user_set_max_segs * segsiz;
19150 if ((tp->t_flags & TF_TSO) &&
19153 (tp->t_port == 0))
19164 m->m_data += max_linkhdr;
19165 m->m_len = hdrlen;
19166 th = rack->r_ctl.fsb.th;
19175 if_hw_tsomax = tp->t_tsomax;
19176 if_hw_tsomaxsegcount = tp->t_tsomaxsegcount;
19177 if_hw_tsomaxsegsize = tp->t_tsomaxsegsize;
19184 max_len = (if_hw_tsomax - hdrlen -
19206 (len <= MHLEN - hdrlen - max_linkhdr)) {
19209 sb_offset = tp->snd_max - tp->snd_una;
19210 th->th_seq = htonl(tp->snd_max);
19211 th->th_ack = htonl(tp->rcv_nxt);
19212 th->th_win = htons((u_short)(rack->r_ctl.fsb.recwin >> tp->rcv_scale));
19213 if (th->th_win == 0) {
19214 tp->t_sndzerowin++;
19215 tp->t_flags |= TF_RXWIN0SENT;
19217 tp->t_flags &= ~TF_RXWIN0SENT;
19218 tp->snd_up = tp->snd_una; /* drag it along, its deprecated */
19222 stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
19225 if (rack->r_ctl.fsb.m == NULL)
19229 m->m_next = rack_fo_m_copym(rack, &len, if_hw_tsomaxsegcount, if_hw_tsomaxsegsize,
19240 if (rack->r_ctl.fsb.rfo_apply_push &&
19241 (len == rack->r_ctl.fsb.left_to_send)) {
19245 if ((m->m_next == NULL) || (len <= 0)){
19249 if (rack->r_is_v6)
19250 ulen = hdrlen + len - sizeof(struct ip6_hdr);
19252 ulen = hdrlen + len - sizeof(struct ip);
19253 udp->uh_ulen = htons(ulen);
19255 m->m_pkthdr.rcvif = (struct ifnet *)0;
19256 if (TCPS_HAVERCVDSYN(tp->t_state) &&
19257 (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
19259 if ((tp->t_state == TCPS_SYN_RECEIVED) &&
19260 (tp->t_flags2 & TF2_ECN_SND_ECE))
19261 tp->t_flags2 &= ~TF2_ECN_SND_ECE;
19263 if (rack->r_is_v6) {
19264 ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
19265 ip6->ip6_flow |= htonl(ect << 20);
19271 ip->ip_tos &= ~IPTOS_ECN_MASK;
19272 ip->ip_tos |= ect;
19277 m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
19287 (u_char *)(th + 1) + (to.to_signature - opt)) != 0) {
19297 if (rack->r_is_v6) {
19298 if (tp->t_port) {
19299 m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
19300 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
19301 udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0);
19302 th->th_sum = htons(0);
19305 m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
19306 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
19307 th->th_sum = in6_cksum_pseudo(ip6,
19318 if (tp->t_port) {
19319 m->m_pkthdr.csum_flags = CSUM_UDP;
19320 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
19321 udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
19322 ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
19323 th->th_sum = htons(0);
19326 m->m_pkthdr.csum_flags = CSUM_TCP;
19327 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
19328 th->th_sum = in_pseudo(ip->ip_src.s_addr,
19329 ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) +
19333 KASSERT(ip->ip_v == IPVERSION,
19334 ("%s: IP version incorrect: %d", __func__, ip->ip_v));
19341 * via either fast-path).
19344 ("%s: len <= tso_segsz tp:%p", __func__, tp));
19345 m->m_pkthdr.csum_flags |= CSUM_TSO;
19346 m->m_pkthdr.tso_segsz = segsiz;
19349 if (rack->r_is_v6) {
19350 ip6->ip6_hlim = rack->r_ctl.fsb.hoplimit;
19351 ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
19352 if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss)
19353 tp->t_flags2 |= TF2_PLPMTU_PMTUD;
19355 tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
19363 ip->ip_len = htons(m->m_pkthdr.len);
19364 ip->ip_ttl = rack->r_ctl.fsb.hoplimit;
19365 if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) {
19366 tp->t_flags2 |= TF2_PLPMTU_PMTUD;
19367 if (tp->t_port == 0 || len < V_tcp_minmss) {
19368 ip->ip_off |= htons(IP_DF);
19371 tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
19375 if (tp->snd_cwnd > tp->snd_ssthresh) {
19377 rack->rc_gp_saw_ca = 1;
19380 rack->rc_gp_saw_ss = 1;
19384 memcpy(cpto, rack->r_ctl.fsb.tcp_ip_hdr, rack->r_ctl.fsb.tcp_ip_hdr_len);
19385 th = (struct tcphdr *)(cpto + ((uint8_t *)rack->r_ctl.fsb.th - rack->r_ctl.fsb.tcp_ip_hdr));
19388 th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
19390 th->th_off = sizeof(struct tcphdr) >> 2;
19392 if ((rack->r_ctl.crte != NULL) &&
19396 if (tcp_bblogging_on(rack->rc_tp)) {
19400 log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
19401 if (rack->rack_no_prr)
19404 log.u_bbr.flex1 = rack->r_ctl.rc_prr_sndcnt;
19405 log.u_bbr.flex2 = rack->r_ctl.rc_pace_min_segs;
19406 log.u_bbr.flex3 = rack->r_ctl.rc_pace_max_segs;
19409 log.u_bbr.flex6 = rack->r_ctl.rc_agg_early;
19410 log.u_bbr.applimited = rack->r_ctl.rc_agg_delayed;
19412 log.u_bbr.cur_del_rate = rack->r_ctl.gp_bw;
19416 log.u_bbr.pkts_out = tp->t_maxseg;
19418 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
19420 log.u_bbr.lt_epoch = rack->r_ctl.cwnd_to_use;
19423 log.u_bbr.delRate = rack->r_must_retran;
19428 log.u_bbr.bbr_substate = (uint8_t)((len + segsiz - 1)/segsiz);
19434 if (rack->r_is_v6) {
19435 error = ip6_output(m, inp->in6p_outputopts,
19436 &inp->inp_route6,
19446 &inp->inp_route,
19451 lgb->tlb_errno = error;
19458 } else if (rack->rc_hw_nobuf) {
19459 rack->rc_hw_nobuf = 0;
19460 rack->r_ctl.rc_agg_delayed = 0;
19461 rack->r_early = 0;
19462 rack->r_late = 0;
19463 rack->r_ctl.rc_agg_early = 0;
19465 if ((error == 0) && (rack->lt_bw_up == 0)) {
19467 rack->r_ctl.lt_timemark = tcp_tv_to_lusectick(tv);
19468 rack->r_ctl.lt_seq = tp->snd_una;
19469 rack->lt_bw_up = 1;
19471 (((tp->snd_max + len) - rack->r_ctl.lt_seq) > 0x7fffffff)) {
19479 rack->r_ctl.lt_bw_bytes += (tp->snd_una - rack->r_ctl.lt_seq);
19480 rack->r_ctl.lt_seq = tp->snd_una;
19482 if (tmark > rack->r_ctl.lt_timemark) {
19483 rack->r_ctl.lt_bw_time += (tmark - rack->r_ctl.lt_timemark);
19484 rack->r_ctl.lt_timemark = tmark;
19487 rack_log_output(tp, &to, len, tp->snd_max, flags, error, rack_to_usec_ts(tv),
19488 NULL, add_flag, s_mb, s_soff, rack->r_ctl.fsb.hw_tls, segsiz);
19489 if (tp->snd_una == tp->snd_max) {
19490 rack->r_ctl.rc_tlp_rxt_last_time = cts;
19492 tp->t_acktime = ticks;
19495 tcp_account_for_send(tp, len, 0, 0, rack->r_ctl.fsb.hw_tls);
19497 rack->forced_ack = 0; /* If we send something zap the FA flag */
19499 if ((tp->t_flags & TF_GPUTINPROG) == 0)
19500 rack_start_gp_measurement(tp, rack, tp->snd_max, sb_offset);
19501 tp->snd_max += len;
19502 tp->snd_nxt = tp->snd_max;
19503 if (rack->rc_new_rnd_needed) {
19504 rack_new_round_starts(tp, rack, tp->snd_max);
19511 counter_u64_add(rack_out_size[(TCP_MSS_ACCT_ATIMER-1)], 1);
19515 if (len <= rack->r_ctl.fsb.left_to_send)
19516 rack->r_ctl.fsb.left_to_send -= len;
19518 rack->r_ctl.fsb.left_to_send = 0;
19519 if (rack->r_ctl.fsb.left_to_send < segsiz) {
19520 rack->r_fast_output = 0;
19521 rack->r_ctl.fsb.left_to_send = 0;
19523 SOCK_SENDBUF_LOCK(rack->rc_inp->inp_socket);
19525 SOCK_SENDBUF_UNLOCK(rack->rc_inp->inp_socket);
19527 if (tp->t_rtttime == 0) {
19528 tp->t_rtttime = ticks;
19529 tp->t_rtseq = startseq;
19532 if ((rack->r_ctl.fsb.left_to_send >= segsiz) &&
19535 max_val -= len;
19537 th = rack->r_ctl.fsb.th;
19543 tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
19549 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
19550 tp->tcp_cnt_counters[SND_OUT_DATA] += cnt_thru;
19552 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
19553 tp->tcp_proc_time[SND_OUT_DATA] += (crtsc - ts_val);
19555 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
19556 tp->tcp_cnt_counters[CNT_OF_MSS_OUT] += ((tot_len + segsiz - 1) / segsiz);
19564 rack->r_fast_output = 0;
19565 return (-1);
19575 rack->r_fast_output = 1;
19576 rack->r_ctl.fsb.m = sbsndmbuf(sb, (tp->snd_max - tp->snd_una), &rack->r_ctl.fsb.off);
19577 rack->r_ctl.fsb.o_m_len = rack->r_ctl.fsb.m->m_len;
19578 rack->r_ctl.fsb.o_t_len = M_TRAILINGROOM(rack->r_ctl.fsb.m);
19579 rack->r_ctl.fsb.tcp_flags = flags;
19580 rack->r_ctl.fsb.left_to_send = orig_len - len;
19581 if (rack->r_ctl.fsb.left_to_send < pace_max_seg) {
19583 rack->r_fast_output = 0;
19587 rack->r_ctl.fsb.left_to_send = rounddown(rack->r_ctl.fsb.left_to_send, pace_max_seg);
19590 rack->r_ctl.fsb.hw_tls = 1;
19592 rack->r_ctl.fsb.hw_tls = 0;
19593 KASSERT((rack->r_ctl.fsb.left_to_send <= (sbavail(sb) - (tp->snd_max - tp->snd_una))),
19594 ("rack:%p left_to_send:%u sbavail:%u out:%u",
19595 rack, rack->r_ctl.fsb.left_to_send, sbavail(sb),
19596 (tp->snd_max - tp->snd_una)));
19597 if (rack->r_ctl.fsb.left_to_send < segsiz)
19598 rack->r_fast_output = 0;
19600 if (rack->r_ctl.fsb.left_to_send == (sbavail(sb) - (tp->snd_max - tp->snd_una)))
19601 rack->r_ctl.fsb.rfo_apply_push = 1;
19603 rack->r_ctl.fsb.rfo_apply_push = 0;
19614 maxlen = (uint32_t)((rack->r_ctl.gp_bw * min_time) / (uint64_t)HPTS_USEC_IN_SEC);
19626 rsm = tqhash_find(rack->r_ctl.tqh, rack->r_ctl.last_collapse_point);
19627 if ((rsm == NULL) || ((rsm->r_flags & RACK_RWND_COLLAPSED) == 0)) {
19629 rack->r_collapse_point_valid = 0;
19633 if (rsm->r_end > (rack->rc_tp->snd_una + rack->rc_tp->snd_wnd)) {
19640 if (rsm->r_flags & RACK_ACKED) {
19645 rack->r_ctl.last_collapse_point = rsm->r_end;
19647 if (SEQ_GEQ(rack->r_ctl.last_collapse_point,
19648 rack->r_ctl.high_collapse_point)) {
19649 rack->r_collapse_point_valid = 0;
19655 thresh = rack_calc_thresh_rack(rack, rack_grab_rtt(rack->rc_tp, rack), cts, __LINE__, 1);
19656 if ((cts - ((uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)])) > thresh) {
19657 rack_log_collapse(rack, rsm->r_start,
19658 (cts - ((uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)])),
19659 thresh, __LINE__, 6, rsm->r_flags, rsm);
19663 rack_log_collapse(rack, rsm->r_start,
19664 (cts - ((uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)])),
19665 thresh, __LINE__, 7, rsm->r_flags, rsm);
19672 if ((rack->full_size_rxt == 0) &&
19673 (rack->shape_rxt_to_pacing_min == 0) &&
19676 } else if (rack->shape_rxt_to_pacing_min &&
19677 rack->gp_ready) {
19766 rack = (struct tcp_rack *)tp->t_fb_ptr;
19771 hpts_calling = !!(tp->t_flags2 & TF2_HPTS_CALLS);
19772 tp->t_flags2 &= ~TF2_HPTS_CALLS;
19774 if (tp->t_flags & TF_TOE) {
19781 if (rack->rack_deferred_inited == 0) {
19794 if ((tp->t_flags & TF_FASTOPEN) &&
19795 (tp->t_state == TCPS_SYN_RECEIVED) &&
19796 SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN|ACK sent */
19797 (rack->r_ctl.rc_resend == NULL)) { /* not a retransmit */
19804 if (rack->r_state) {
19806 isipv6 = rack->r_is_v6;
19808 isipv6 = (rack->rc_inp->inp_vflag & INP_IPV6) != 0;
19814 if (((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) == 0) &&
19815 tcp_in_hpts(rack->rc_tp)) {
19823 if ((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) &&
19824 TSTMP_GEQ(cts, rack->r_ctl.rc_last_output_to)) {
19826 delayed = cts - rack->r_ctl.rc_last_output_to;
19831 if (rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) {
19848 if (rack->rc_in_persist) {
19849 if (tcp_in_hpts(rack->rc_tp) == 0) {
19858 if ((rack->rc_ack_required == 1) &&
19859 (rack->r_timer_override == 0)){
19861 if (tcp_in_hpts(rack->rc_tp) == 0) {
19870 if ((rack->r_timer_override) ||
19871 (rack->rc_ack_can_sendout_data) ||
19873 (tp->t_state < TCPS_ESTABLISHED)) {
19874 rack->rc_ack_can_sendout_data = 0;
19875 if (tcp_in_hpts(rack->rc_tp))
19876 tcp_hpts_remove(rack->rc_tp);
19877 } else if (tcp_in_hpts(rack->rc_tp)) {
19884 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
19885 tp->tcp_proc_time[SND_BLOCKED] += (crtsc - ts_val);
19887 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
19888 tp->tcp_cnt_counters[SND_BLOCKED]++;
19896 if ((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) &&
19897 TSTMP_GT(rack->r_ctl.rc_last_output_to, cts)) {
19898 early = rack->r_ctl.rc_last_output_to - cts;
19901 if (delayed && (rack->rc_always_pace == 1)) {
19902 rack->r_ctl.rc_agg_delayed += delayed;
19903 rack->r_late = 1;
19904 } else if (early && (rack->rc_always_pace == 1)) {
19905 rack->r_ctl.rc_agg_early += early;
19906 rack->r_early = 1;
19907 } else if (rack->rc_always_pace == 0) {
19908 /* Non-paced we are not late */
19909 rack->r_ctl.rc_agg_delayed = rack->r_ctl.rc_agg_early = 0;
19910 rack->r_early = rack->r_late = 0;
19913 rack->r_ctl.rc_hpts_flags &= ~PACE_PKT_OUTPUT;
19914 rack->r_wanted_output = 0;
19915 rack->r_timer_override = 0;
19916 if ((tp->t_state != rack->r_state) &&
19917 TCPS_HAVEESTABLISHED(tp->t_state)) {
19920 if ((rack->r_fast_output) &&
19922 (tp->rcv_numsacks == 0)) {
19930 inp = rack->rc_inp;
19931 so = inp->inp_socket;
19932 sb = &so->so_snd;
19936 inp = rack->rc_inp;
19942 if ((tp->t_flags & TF_FASTOPEN) &&
19943 ((tp->t_state == TCPS_SYN_RECEIVED) ||
19944 (tp->t_state == TCPS_SYN_SENT)) &&
19945 SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN or SYN|ACK sent */
19946 (tp->t_rxtshift == 0)) { /* not a retransmit */
19959 idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una);
19960 if (tp->t_idle_reduce) {
19961 if (idle && (TICKS_2_USEC(ticks - tp->t_rcvtime) >= tp->t_rxtcur))
19964 tp->t_flags &= ~TF_LASTIDLE;
19966 if (tp->t_flags & TF_MORETOCOME) {
19967 tp->t_flags |= TF_LASTIDLE;
19971 if ((tp->snd_una == tp->snd_max) &&
19972 rack->r_ctl.rc_went_idle_time &&
19973 (cts > rack->r_ctl.rc_went_idle_time)) {
19974 tot_idle = (cts - rack->r_ctl.rc_went_idle_time);
19977 if (rack->in_probe_rtt == 0) {
19978 rack->r_ctl.rc_lower_rtt_us_cts = cts;
19979 rack->r_ctl.rc_time_probertt_entered = rack->r_ctl.rc_lower_rtt_us_cts;
19980 rack->r_ctl.rc_time_probertt_starts = rack->r_ctl.rc_lower_rtt_us_cts;
19981 rack->r_ctl.rc_time_of_last_probertt = rack->r_ctl.rc_lower_rtt_us_cts;
19989 (rack->r_ctl.fsb.tcp_ip_hdr) &&
19990 (rack->r_fsb_inited == 0) &&
19991 (rack->r_state != TCPS_CLOSED))
19992 rack_init_fsb_block(tp, rack, tcp_outflags[tp->t_state]);
19993 if (rack->rc_sendvars_notset == 1) {
19994 rack->rc_sendvars_notset = 0;
19996 * Make sure any TCP timers (keep-alive) is not running.
20000 if ((rack->rack_no_prr == 1) &&
20001 (rack->rc_always_pace == 0)) {
20004 * no-pacing enabled and prr is turned off that
20012 rack->rack_no_prr = 0;
20014 if ((rack->pcm_enabled == 1) &&
20015 (rack->pcm_needed == 0) &&
20023 if (tp->t_srtt)
20024 rtts_idle = tot_idle / tp->t_srtt;
20027 rnds = rack->r_ctl.current_round - rack->r_ctl.last_pcm_round;
20028 rack->r_ctl.pcm_idle_rounds += rtts_idle;
20029 if ((rnds + rack->r_ctl.pcm_idle_rounds) >= rack_pcm_every_n_rounds) {
20030 rack->pcm_needed = 1;
20031 rack_log_pcm(rack, 8, rack->r_ctl.last_pcm_round, rtts_idle, rack->r_ctl.current_round );
20040 segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
20042 if (rack->r_ctl.rc_pace_max_segs == 0)
20043 pace_max_seg = rack->rc_user_set_max_segs * segsiz;
20045 pace_max_seg = rack->r_ctl.rc_pace_max_segs;
20046 if (TCPS_HAVEESTABLISHED(tp->t_state) &&
20047 (rack->r_ctl.pcm_max_seg == 0)) {
20053 rack->r_ctl.pcm_max_seg = rc_init_window(rack);
20054 if (rack->r_ctl.pcm_max_seg < (ctf_fixed_maxseg(tp) * 10)) {
20058 rack->r_ctl.pcm_max_seg = ctf_fixed_maxseg(tp) * 10;
20061 if ((rack->r_ctl.pcm_max_seg != 0) && (rack->pcm_needed == 1)) {
20064 if (tp->snd_wnd > ctf_outstanding(tp))
20065 rw_avail = tp->snd_wnd - ctf_outstanding(tp);
20068 if (tp->snd_cwnd > ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked))
20069 cwa = tp->snd_cwnd -ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
20072 if ((cwa >= rack->r_ctl.pcm_max_seg) &&
20073 (rw_avail > rack->r_ctl.pcm_max_seg)) {
20075 pace_max_seg = rack->r_ctl.pcm_max_seg;
20077 rack->r_fast_output = 0;
20081 cwa, rack->r_ctl.pcm_max_seg, rw_avail);
20084 sb_offset = tp->snd_max - tp->snd_una;
20085 cwnd_to_use = rack->r_ctl.cwnd_to_use = tp->snd_cwnd;
20086 flags = tcp_outflags[tp->t_state];
20087 while (rack->rc_free_cnt < rack_free_cache) {
20093 so = inp->inp_socket;
20094 sb = &so->so_snd;
20097 TAILQ_INSERT_TAIL(&rack->r_ctl.rc_free, rsm, r_tnext);
20098 rack->rc_free_cnt++;
20105 SOCK_SENDBUF_LOCK(inp->inp_socket);
20106 so = inp->inp_socket;
20107 sb = &so->so_snd;
20110 if (rack->r_ctl.rc_resend) {
20112 rsm = rack->r_ctl.rc_resend;
20113 rack->r_ctl.rc_resend = NULL;
20114 len = rsm->r_end - rsm->r_start;
20117 KASSERT(SEQ_LEQ(tp->snd_una, rsm->r_start),
20118 ("%s:%d: r.start:%u < SND.UNA:%u; tp:%p, rack:%p, rsm:%p",
20120 rsm->r_start, tp->snd_una, tp, rack, rsm));
20121 sb_offset = rsm->r_start - tp->snd_una;
20123 } else if (rack->r_collapse_point_valid &&
20130 tcp_trace_point(rack->rc_tp, TCP_TP_COLLAPSED_RXT);
20131 rack->r_ctl.last_collapse_point = rsm->r_end;
20133 if (SEQ_GEQ(rack->r_ctl.last_collapse_point,
20134 rack->r_ctl.high_collapse_point))
20135 rack->r_collapse_point_valid = 0;
20139 len = rsm->r_end - rsm->r_start;
20140 sb_offset = rsm->r_start - tp->snd_una;
20145 if ((!IN_FASTRECOVERY(tp->t_flags)) &&
20146 ((rsm->r_flags & RACK_MUST_RXT) == 0) &&
20147 ((tp->t_flags & TF_WASFRECOVERY) == 0)) {
20148 /* Enter recovery if not induced by a time-out */
20149 rack_cong_signal(tp, CC_NDUPACK, tp->snd_una, __LINE__);
20152 if (SEQ_LT(rsm->r_start, tp->snd_una)) {
20153 panic("Huh, tp:%p rack:%p rsm:%p start:%u < snd_una:%u\n",
20154 tp, rack, rsm, rsm->r_start, tp->snd_una);
20157 len = rsm->r_end - rsm->r_start;
20158 KASSERT(SEQ_LEQ(tp->snd_una, rsm->r_start),
20159 ("%s:%d: r.start:%u < SND.UNA:%u; tp:%p, rack:%p, rsm:%p",
20161 rsm->r_start, tp->snd_una, tp, rack, rsm));
20162 sb_offset = rsm->r_start - tp->snd_una;
20171 } else if (rack->r_ctl.rc_tlpsend) {
20182 rsm = rack->r_ctl.rc_tlpsend;
20184 rsm->r_flags |= RACK_TLP;
20185 rack->r_ctl.rc_tlpsend = NULL;
20187 tlen = rsm->r_end - rsm->r_start;
20190 KASSERT(SEQ_LEQ(tp->snd_una, rsm->r_start),
20191 ("%s:%d: r.start:%u < SND.UNA:%u; tp:%p, rack:%p, rsm:%p",
20193 rsm->r_start, tp->snd_una, tp, rack, rsm));
20194 sb_offset = rsm->r_start - tp->snd_una;
20195 cwin = min(tp->snd_wnd, tlen);
20198 if (rack->r_must_retran &&
20200 (SEQ_GT(tp->snd_max, tp->snd_una)) &&
20205 * a) This is a non-sack connection, we had a time-out
20219 sendwin = min(tp->snd_wnd, tp->snd_cwnd);
20220 flight = ctf_flight_size(tp, rack->r_ctl.rc_out_at_rto);
20225 so = inp->inp_socket;
20226 sb = &so->so_snd;
20231 * outstanding/not-acked should be marked.
20234 rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
20237 rack->r_must_retran = 0;
20238 rack->r_ctl.rc_out_at_rto = 0;
20239 so = inp->inp_socket;
20240 sb = &so->so_snd;
20243 if ((rsm->r_flags & RACK_MUST_RXT) == 0) {
20248 rack->r_must_retran = 0;
20249 rack->r_ctl.rc_out_at_rto = 0;
20254 len = rsm->r_end - rsm->r_start;
20255 sb_offset = rsm->r_start - tp->snd_una;
20257 if ((rack->full_size_rxt == 0) &&
20258 (rack->shape_rxt_to_pacing_min == 0) &&
20261 else if (rack->shape_rxt_to_pacing_min &&
20262 rack->gp_ready) {
20283 (rack->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
20285 if (!rack->alloc_limit_reported) {
20286 rack->alloc_limit_reported = 1;
20289 so = inp->inp_socket;
20290 sb = &so->so_snd;
20293 if (rsm && (rsm->r_flags & RACK_HAS_FIN)) {
20295 len--;
20304 if (rsm && rack->r_fsb_inited &&
20306 ((rsm->r_flags & RACK_HAS_FIN) == 0)) {
20313 so = inp->inp_socket;
20314 sb = &so->so_snd;
20320 if ((tp->t_flags2 & TF2_TCP_SCWND_ALLOWED) &&
20321 rack->rack_enable_scwnd) {
20323 if (rack->gp_ready &&
20324 (rack->rack_attempted_scwnd == 0) &&
20325 (rack->r_ctl.rc_scw == NULL) &&
20326 tp->t_lib) {
20329 rack->rack_attempted_scwnd = 1;
20330 rack->r_ctl.rc_scw = tcp_shared_cwnd_alloc(tp,
20331 &rack->r_ctl.rc_scw_index,
20334 if (rack->r_ctl.rc_scw &&
20335 (rack->rack_scwnd_is_idle == 1) &&
20336 sbavail(&so->so_snd)) {
20338 tcp_shared_cwnd_active(rack->r_ctl.rc_scw, rack->r_ctl.rc_scw_index);
20339 rack->rack_scwnd_is_idle = 0;
20341 if (rack->r_ctl.rc_scw) {
20343 rack->r_ctl.cwnd_to_use = cwnd_to_use = tcp_shared_cwnd_update(rack->r_ctl.rc_scw,
20344 rack->r_ctl.rc_scw_index,
20345 tp->snd_cwnd, tp->snd_wnd, segsiz);
20353 if (tp->t_flags & TF_NEEDFIN)
20355 if (tp->t_flags & TF_NEEDSYN)
20359 end_rsm = TAILQ_LAST_FAST(&rack->r_ctl.rc_tmap, rack_sendmap, r_tnext);
20366 (TCPS_HAVEESTABLISHED(tp->t_state) ||
20367 (tp->t_flags & TF_FASTOPEN))) {
20377 if (SEQ_GT(tp->snd_max, tp->snd_una) && avail)
20378 sb_offset = tp->snd_max - tp->snd_una;
20381 if ((IN_FASTRECOVERY(tp->t_flags) == 0) || rack->rack_no_prr) {
20382 if (rack->r_ctl.rc_tlp_new_data) {
20384 if (rack->r_ctl.rc_tlp_new_data > (uint32_t) (avail - sb_offset)) {
20385 rack->r_ctl.rc_tlp_new_data = (uint32_t) (avail - sb_offset);
20387 if ((rack->r_ctl.rc_tlp_new_data + sb_offset) > tp->snd_wnd) {
20388 if (tp->snd_wnd > sb_offset)
20389 len = tp->snd_wnd - sb_offset;
20393 len = rack->r_ctl.rc_tlp_new_data;
20395 rack->r_ctl.rc_tlp_new_data = 0;
20399 if ((rack->r_ctl.crte == NULL) &&
20400 IN_FASTRECOVERY(tp->t_flags) &&
20401 (rack->full_size_rxt == 0) &&
20402 (rack->shape_rxt_to_pacing_min == 0) &&
20412 } else if (rack->shape_rxt_to_pacing_min &&
20413 rack->gp_ready) {
20431 outstanding = tp->snd_max - tp->snd_una;
20432 if ((rack->r_ctl.rc_prr_sndcnt + outstanding) > tp->snd_wnd) {
20433 if (tp->snd_wnd > outstanding) {
20434 len = tp->snd_wnd - outstanding;
20439 len = avail - sb_offset;
20447 len = avail - sb_offset;
20452 if (len > rack->r_ctl.rc_prr_sndcnt) {
20453 len = rack->r_ctl.rc_prr_sndcnt;
20465 * let us send a lot as well :-)
20467 if (rack->r_ctl.rc_prr_sendalot == 0) {
20479 leftinsb = sbavail(sb) - sb_offset;
20486 } else if (!TCPS_HAVEESTABLISHED(tp->t_state)) {
20493 !(tp->t_flags & TF_FASTOPEN)) {
20505 * SYN-SENT state and if segment contains data and if we don't know
20509 SEQ_GT(tp->snd_max, tp->snd_una) &&
20511 (tp->t_rxtshift == 0))) {
20516 if ((tp->t_flags & TF_FASTOPEN) &&
20517 (tp->t_state == TCPS_SYN_RECEIVED))
20525 if ((flags & TH_SYN) && (tp->t_flags & TF_NOOPT)) {
20532 * - When retransmitting SYN|ACK on a passively-created socket
20534 * - When retransmitting SYN on an actively created socket
20536 * - When sending a zero-length cookie (cookie request) on an
20539 * - When the socket is in the CLOSED state (RST is being sent)
20541 if ((tp->t_flags & TF_FASTOPEN) &&
20542 (((flags & TH_SYN) && (tp->t_rxtshift > 0)) ||
20543 ((tp->t_state == TCPS_SYN_SENT) &&
20544 (tp->t_tfo_client_cookie_len == 0)) ||
20549 /* Without fast-open there should never be data sent on a SYN */
20550 if ((flags & TH_SYN) && !(tp->t_flags & TF_FASTOPEN)) {
20564 if ((tp->snd_wnd == 0) &&
20565 (TCPS_HAVEESTABLISHED(tp->t_state)) &&
20566 (tp->snd_una == tp->snd_max) &&
20568 rack_enter_persist(tp, rack, cts, tp->snd_una);
20578 if ((tp->snd_wnd < min((rack->r_ctl.rc_high_rwnd/2), minseg)) &&
20579 (TCPS_HAVEESTABLISHED(tp->t_state)) &&
20581 (len < (int)(sbavail(sb) - sb_offset))) {
20591 if (tp->snd_max == tp->snd_una) {
20596 rack_enter_persist(tp, rack, cts, tp->snd_una);
20599 (ctf_flight_size(tp, rack->r_ctl.rc_sacked) > (2 * segsiz)) &&
20600 (len < (int)(sbavail(sb) - sb_offset)) &&
20613 } else if (((tp->snd_wnd - ctf_outstanding(tp)) <
20614 min((rack->r_ctl.rc_high_rwnd/2), minseg)) &&
20615 (ctf_flight_size(tp, rack->r_ctl.rc_sacked) > (2 * segsiz)) &&
20616 (len < (int)(sbavail(sb) - sb_offset)) &&
20617 (TCPS_HAVEESTABLISHED(tp->t_state))) {
20627 } else if ((rack->r_ctl.crte != NULL) &&
20628 (tp->snd_wnd >= (pace_max_seg * max(1, rack_hw_rwnd_factor))) &&
20630 (ctf_flight_size(tp, rack->r_ctl.rc_sacked) >= (2 * segsiz)) &&
20631 (len < (int)(sbavail(sb) - sb_offset))) {
20651 * defeats the point of hw-pacing (i.e. to help us get
20666 * presence of TCP-MD5, SACK retransmits, SACK advertizements and IP
20680 * Pre-calculate here as we save another lookup into the darknesses
20699 if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > segsiz &&
20700 (tp->t_port == 0) &&
20701 ((tp->t_flags & TF_SIGNATURE) == 0) &&
20708 outstanding = tp->snd_max - tp->snd_una;
20709 if (tp->t_flags & TF_SENTFIN) {
20714 outstanding--;
20717 if ((rsm->r_flags & RACK_HAS_FIN) == 0)
20721 recwin = lmin(lmax(sbspace(&so->so_rcv), 0),
20722 (long)TCP_MAXWIN << tp->rcv_scale);
20726 * conditions when len is non-zero:
20728 * - We have a full segment (or more with TSO) - This is the last
20730 * NODELAY - we've timed out (e.g. persist timer) - we have more
20732 * limited the window size) - we need to retransmit
20744 if (!(tp->t_flags & TF_MORETOCOME) && /* normal case */
20745 (idle || (tp->t_flags & TF_NODELAY)) &&
20747 (tp->t_flags & TF_NOPUSH) == 0) {
20751 if ((tp->snd_una == tp->snd_max) && len) { /* Nothing outstanding */
20755 if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) {
20763 if (((tp->snd_wnd - ctf_outstanding(tp)) < segsiz) &&
20800 * pending (it will get piggy-backed on it) or the remote side
20801 * already has done a half-close and won't send more data. Skip
20802 * this if the connection is in T/TCP half-open state.
20804 if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN) &&
20805 !(tp->t_flags & TF_DELACK) &&
20806 !TCPS_HAVERCVDFIN(tp->t_state)) {
20810 * tp->rcv_scale.
20816 if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) {
20817 oldwin = (tp->rcv_adv - tp->rcv_nxt);
20819 adv -= oldwin;
20832 if (oldwin >> tp->rcv_scale >= (adv + oldwin) >> tp->rcv_scale)
20836 (adv >= (int32_t)(so->so_rcv.sb_hiwat / 4) ||
20837 recwin <= (int32_t)(so->so_rcv.sb_hiwat / 8) ||
20838 so->so_rcv.sb_hiwat <= 8 * segsiz)) {
20842 if (2 * adv >= (int32_t) so->so_rcv.sb_hiwat) {
20851 * is also a catch-all for the retransmit timer timeout case.
20853 if (tp->t_flags & TF_ACKNOW) {
20857 if (((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0)) {
20866 (tp->snd_max == tp->snd_una)) {
20879 if ((tp->t_flags & TF_FASTOPEN) == 0 &&
20882 (sbused(sb) == (tp->snd_max - tp->snd_una)) &&
20883 ((tp->snd_max - tp->snd_una) <= segsiz)) {
20892 * the peer wait for the delayed-ack timer to run off
20898 rack->r_ctl.fsb.recwin = recwin;
20904 rack->r_fsb_inited &&
20905 TCPS_HAVEESTABLISHED(tp->t_state) &&
20906 ((IN_RECOVERY(tp->t_flags)) == 0) &&
20907 (rack->r_must_retran == 0) &&
20908 ((tp->t_flags & TF_NEEDFIN) == 0) &&
20911 ((orig_len - len) >= segsiz) &&
20918 rack->r_fast_output = 0;
20923 if (SEQ_GT(tp->snd_max, tp->snd_nxt))
20924 tp->snd_nxt = tp->snd_max;
20927 uint32_t seq = tp->gput_ack;
20929 rsm = tqhash_max(rack->r_ctl.tqh);
20932 * Mark the last sent that we just-returned (hinting
20935 rsm->r_just_ret = 1;
20938 rack->r_ctl.rc_agg_delayed = 0;
20939 rack->r_early = 0;
20940 rack->r_late = 0;
20941 rack->r_ctl.rc_agg_early = 0;
20943 min(max(segsiz, (rack->r_ctl.rc_high_rwnd/2)),
20944 minseg)) >= tp->snd_wnd) {
20947 if (IN_FASTRECOVERY(tp->t_flags))
20948 rack->r_ctl.rc_prr_sndcnt = 0;
20950 /* We are limited by whats available -- app limited */
20952 if (IN_FASTRECOVERY(tp->t_flags))
20953 rack->r_ctl.rc_prr_sndcnt = 0;
20955 ((tp->t_flags & TF_NODELAY) == 0) &&
20962 * don't send. Another app-limited case.
20965 } else if (tp->t_flags & TF_NOPUSH) {
20976 } else if (IN_FASTRECOVERY(tp->t_flags) &&
20977 (rack->rack_no_prr == 0) &&
20978 (rack->r_ctl.rc_prr_sndcnt < segsiz)) {
20984 panic("rack:%p hit JR_ASSESSING case cwnd_to_use:%u?", rack, cwnd_to_use);
21033 if ((tp->t_flags & TF_GPUTINPROG) &&
21034 SEQ_GT(tp->gput_ack, tp->snd_max)) {
21035 tp->gput_ack = tp->snd_max;
21036 if ((tp->gput_ack - tp->gput_seq) < (MIN_GP_WIN * segsiz)) {
21040 tp->t_flags &= ~TF_GPUTINPROG;
21041 rack_log_pacing_delay_calc(rack, (tp->gput_ack - tp->gput_seq) /*flex2*/,
21042 rack->r_ctl.rc_gp_srtt /*flex1*/,
21043 tp->gput_seq,
21049 rsm = tqhash_max(rack->r_ctl.tqh);
21050 if (rsm && ((rsm->r_flags & RACK_APP_LIMITED) == 0)) {
21051 if (rack->r_ctl.rc_app_limited_cnt == 0)
21052 rack->r_ctl.rc_end_appl = rack->r_ctl.rc_first_appl = rsm;
21059 if (rack->r_ctl.rc_end_appl)
21060 rack->r_ctl.rc_end_appl->r_nseq_appl = rsm->r_start;
21061 rack->r_ctl.rc_end_appl = rsm;
21063 rsm->r_flags |= RACK_APP_LIMITED;
21064 rack->r_ctl.rc_app_limited_cnt++;
21068 rack->r_ctl.rc_app_limited_cnt, seq,
21069 tp->gput_ack, 0, 0, 4, __LINE__, NULL, 0);
21073 if ((tp->snd_max == tp->snd_una) &&
21074 TCPS_HAVEESTABLISHED(tp->t_state) &&
21076 (sbavail(sb) > tp->snd_wnd) &&
21077 (tp->snd_wnd < min((rack->r_ctl.rc_high_rwnd/2), minseg))) {
21078 /* Yes lets make sure to move to persist before timer-start */
21079 rack_enter_persist(tp, rack, rack->r_ctl.rc_rcvtime, tp->snd_una);
21086 rack->r_ctl.rc_scw) {
21087 tcp_shared_cwnd_idle(rack->r_ctl.rc_scw, rack->r_ctl.rc_scw_index);
21088 rack->rack_scwnd_is_idle = 1;
21094 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
21095 tp->tcp_cnt_counters[SND_OUT_DATA]++;
21097 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
21098 tp->tcp_proc_time[SND_OUT_DATA] += (crtsc - ts_val);
21100 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
21101 tp->tcp_cnt_counters[CNT_OF_MSS_OUT] += ((tot_len_this_send + segsiz - 1) / segsiz);
21105 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
21106 tp->tcp_cnt_counters[SND_LIMITED]++;
21108 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
21109 tp->tcp_proc_time[SND_LIMITED] += (crtsc - ts_val);
21117 if ((rack->r_ctl.crte != NULL) &&
21119 ((rack->rc_hw_nobuf == 1) ||
21129 rack->r_ctl.rc_agg_delayed = 0;
21130 rack->r_ctl.rc_agg_early = 0;
21131 rack->r_early = 0;
21132 rack->r_late = 0;
21150 if (TCPS_HAVEESTABLISHED(tp->t_state) &&
21151 (sbused(sb) == (tp->snd_max - tp->snd_una)) &&
21152 ((tp->snd_max - tp->snd_una) <= segsiz)) {
21161 * the peer wait for the delayed-ack timer to run off
21174 (rack->pcm_in_progress == 0) &&
21175 (rack->r_ctl.pcm_max_seg > 0) &&
21176 (len >= rack->r_ctl.pcm_max_seg)) {
21179 rack_log_pcm(rack, 5, len, rack->r_ctl.pcm_max_seg, add_flag);
21181 rack_log_pcm(rack, 6, len, rack->r_ctl.pcm_max_seg, add_flag);
21187 tp->t_flags2 |= TF2_PLPMTU_MAXSEGSNT;
21189 tp->t_flags2 &= ~TF2_PLPMTU_MAXSEGSNT;
21211 * be snd_max-1 else its snd_max.
21215 rack_seq = tp->iss;
21217 (tp->t_flags & TF_SENTFIN))
21218 rack_seq = tp->snd_max - 1;
21220 rack_seq = tp->snd_max;
21222 rack_seq = rsm->r_start;
21226 * established connection segments. Options for SYN-ACK segments
21230 if ((tp->t_flags & TF_NOOPT) == 0) {
21233 to.to_mss = tcp_mssopt(&inp->inp_inc);
21234 if (tp->t_port)
21235 to.to_mss -= V_tcp_udp_tunneling_overhead;
21245 if ((tp->t_flags & TF_FASTOPEN) &&
21246 (tp->t_rxtshift == 0)) {
21247 if (tp->t_state == TCPS_SYN_RECEIVED) {
21250 (u_int8_t *)&tp->t_tfo_cookie.server;
21253 } else if (tp->t_state == TCPS_SYN_SENT) {
21255 tp->t_tfo_client_cookie_len;
21257 tp->t_tfo_cookie.client;
21271 /* Window scaling. */
21272 if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
21273 to.to_wscale = tp->request_r_scale;
21277 if ((tp->t_flags & TF_RCVD_TSTMP) ||
21278 ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
21281 if ((rack->r_rcvpath_rtt_up == 1) &&
21282 (ms_cts == rack->r_ctl.last_rcv_tstmp_for_rtt)) {
21290 * our ack-probe.
21296 to.to_tsval = ts_to_use + tp->ts_offset;
21297 to.to_tsecr = tp->ts_recent;
21300 (TCPS_HAVEESTABLISHED(tp->t_state)) &&
21301 ((ms_cts - rack->r_ctl.last_rcv_tstmp_for_rtt) > RCV_PATH_RTT_MS) &&
21302 (tp->snd_una == tp->snd_max) &&
21305 (rack->r_ctl.current_round != 0) &&
21307 (rack->r_rcvpath_rtt_up == 0)) {
21308 rack->r_ctl.last_rcv_tstmp_for_rtt = ms_cts;
21309 rack->r_ctl.last_time_of_arm_rcv = cts;
21310 rack->r_rcvpath_rtt_up = 1;
21312 rack_seq--;
21316 if (tp->rfbuf_ts == 0 &&
21317 (so->so_rcv.sb_flags & SB_AUTOSIZE)) {
21318 tp->rfbuf_ts = ms_cts;
21321 if (tp->t_flags & TF_SACK_PERMIT) {
21324 else if (TCPS_HAVEESTABLISHED(tp->t_state) &&
21325 tp->rcv_numsacks > 0) {
21327 to.to_nsacks = tp->rcv_numsacks;
21328 to.to_sacks = (u_char *)tp->sackblks;
21332 /* TCP-MD5 (RFC2385). */
21333 if (tp->t_flags & TF_SIGNATURE)
21343 if ((tp->t_flags & TF_FASTOPEN) && wanted_cookie &&
21347 if (tp->t_port) {
21353 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
21354 tp->tcp_cnt_counters[SND_OUT_FAIL]++;
21356 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
21357 tp->tcp_proc_time[SND_OUT_FAIL] += (crtsc - ts_val);
21370 if (inp->inp_options)
21371 ipoptlen = inp->inp_options->m_len -
21384 if (len + optlen + ipoptlen > tp->t_maxseg) {
21391 if_hw_tsomax = tp->t_tsomax;
21392 if_hw_tsomaxsegcount = tp->t_tsomaxsegcount;
21393 if_hw_tsomaxsegsize = tp->t_tsomaxsegsize;
21403 max_len = (if_hw_tsomax - hdrlen -
21417 max_len = (tp->t_maxseg - optlen);
21422 len -= moff;
21439 if (tp->t_flags & TF_NEEDFIN) {
21444 if (optlen + ipoptlen >= tp->t_maxseg) {
21458 len = tp->t_maxseg - optlen - ipoptlen;
21490 if ((sbused(sb) == (tp->snd_max - tp->snd_una)) &&
21491 ((tp->snd_max - tp->snd_una) <= segsiz)) {
21500 * the peer wait for the delayed-ack timer to run off
21512 hw_tls = tp->t_nic_ktls_xmit != 0;
21541 m->m_data += max_linkhdr;
21542 m->m_len = hdrlen;
21551 if (len <= MHLEN - hdrlen - max_linkhdr && !hw_tls) {
21561 m->m_len += len;
21576 m->m_next = tcp_m_copym(
21584 if (len <= (tp->t_maxseg - optlen)) {
21593 if (m->m_next == NULL) {
21602 if (rsm && (rsm->r_flags & RACK_TLP)) {
21610 tp->t_sndrexmitpack++;
21615 stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
21622 stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
21641 if (tp->t_flags & TF_ACKNOW)
21660 m->m_data += max_linkhdr;
21661 m->m_len = hdrlen;
21664 m->m_pkthdr.rcvif = (struct ifnet *)0;
21668 if ((ipoptlen == 0) && (rack->r_ctl.fsb.tcp_ip_hdr) && rack->r_fsb_inited) {
21671 ip6 = (struct ip6_hdr *)rack->r_ctl.fsb.tcp_ip_hdr;
21675 ip = (struct ip *)rack->r_ctl.fsb.tcp_ip_hdr;
21677 th = rack->r_ctl.fsb.th;
21678 udp = rack->r_ctl.fsb.udp;
21682 ulen = hdrlen + len - sizeof(struct ip6_hdr);
21685 ulen = hdrlen + len - sizeof(struct ip);
21686 udp->uh_ulen = htons(ulen);
21692 if (tp->t_port) {
21694 udp->uh_sport = htons(V_tcp_udp_tunneling_port);
21695 udp->uh_dport = tp->t_port;
21696 ulen = hdrlen + len - sizeof(struct ip6_hdr);
21697 udp->uh_ulen = htons(ulen);
21701 tcpip_fillheaders(inp, tp->t_port, ip6, th);
21707 if (tp->t_port) {
21709 udp->uh_sport = htons(V_tcp_udp_tunneling_port);
21710 udp->uh_dport = tp->t_port;
21711 ulen = hdrlen + len - sizeof(struct ip);
21712 udp->uh_ulen = htons(ulen);
21716 tcpip_fillheaders(inp, tp->t_port, ip, th);
21725 if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
21729 if (TCPS_HAVERCVDSYN(tp->t_state) &&
21730 (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
21732 if ((tp->t_state == TCPS_SYN_RECEIVED) &&
21733 (tp->t_flags2 & TF2_ECN_SND_ECE))
21734 tp->t_flags2 &= ~TF2_ECN_SND_ECE;
21737 ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
21738 ip6->ip6_flow |= htonl(ect << 20);
21744 ip->ip_tos &= ~IPTOS_ECN_MASK;
21745 ip->ip_tos |= ect;
21749 th->th_seq = htonl(rack_seq);
21750 th->th_ack = htonl(tp->rcv_nxt);
21760 if (recwin < (long)(so->so_rcv.sb_hiwat / 4) &&
21764 if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) &&
21765 recwin < (long)(tp->rcv_adv - tp->rcv_nxt))
21766 recwin = (long)(tp->rcv_adv - tp->rcv_nxt);
21775 th->th_win = htons((u_short)
21776 (min(sbspace(&so->so_rcv), TCP_MAXWIN)));
21778 /* Avoid shrinking window with window scaling. */
21779 recwin = roundup2(recwin, 1 << tp->rcv_scale);
21780 th->th_win = htons((u_short)(recwin >> tp->rcv_scale));
21783 * Adjust the RXWIN0SENT flag - indicate that we have advertised a 0
21790 if (th->th_win == 0) {
21791 tp->t_sndzerowin++;
21792 tp->t_flags |= TF_RXWIN0SENT;
21794 tp->t_flags &= ~TF_RXWIN0SENT;
21795 tp->snd_up = tp->snd_una; /* drag it along, its deprecated */
21797 if ((ipoptlen == 0) && (rack->r_ctl.fsb.tcp_ip_hdr) && rack->r_fsb_inited) {
21801 memcpy(cpto, rack->r_ctl.fsb.tcp_ip_hdr, rack->r_ctl.fsb.tcp_ip_hdr_len);
21821 th = (struct tcphdr *)(cpto + ((uint8_t *)rack->r_ctl.fsb.th - rack->r_ctl.fsb.tcp_ip_hdr));
21824 udp = (struct udphdr *)(cpto + ((uint8_t *)rack->r_ctl.fsb.udp - rack->r_ctl.fsb.tcp_ip_hdr));
21828 th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
21834 m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
21844 (u_char *)(th + 1) + (to.to_signature - opt)) != 0) {
21859 if (tp->t_port) {
21860 m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
21861 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
21862 udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0);
21863 th->th_sum = htons(0);
21866 m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
21867 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
21868 th->th_sum = in6_cksum_pseudo(ip6,
21879 if (tp->t_port) {
21880 m->m_pkthdr.csum_flags = CSUM_UDP;
21881 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
21882 udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
21883 ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
21884 th->th_sum = htons(0);
21887 m->m_pkthdr.csum_flags = CSUM_TCP;
21888 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
21889 th->th_sum = in_pseudo(ip->ip_src.s_addr,
21890 ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) +
21894 KASSERT(ip->ip_v == IPVERSION,
21895 ("%s: IP version incorrect: %d", __func__, ip->ip_v));
21908 KASSERT(len > tp->t_maxseg - optlen,
21910 m->m_pkthdr.csum_flags |= CSUM_TSO;
21911 m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen;
21921 if ((rack->r_ctl.crte != NULL) &&
21922 (rack->rc_hw_nobuf == 0) &&
21927 if (tcp_bblogging_on(rack->rc_tp)) {
21931 log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
21932 if (rack->rack_no_prr)
21935 log.u_bbr.flex1 = rack->r_ctl.rc_prr_sndcnt;
21936 log.u_bbr.flex2 = rack->r_ctl.rc_pace_min_segs;
21937 log.u_bbr.flex3 = rack->r_ctl.rc_pace_max_segs;
21940 log.u_bbr.flex6 = rack->r_ctl.rc_agg_early;
21941 log.u_bbr.applimited = rack->r_ctl.rc_agg_delayed;
21943 log.u_bbr.cur_del_rate = rack->r_ctl.gp_bw;
21946 if (rsm->r_flags & RACK_RWND_COLLAPSED) {
21947 rack_log_collapse(rack, rsm->r_start, rsm->r_end, 0, __LINE__, 5, rsm->r_flags, rsm);
21949 counter_u64_add(rack_collapsed_win_rxt_bytes, (rsm->r_end - rsm->r_start));
21963 log.u_bbr.pkts_out = tp->t_maxseg;
21965 log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
21966 if (rsm && (rsm->r_rtr_cnt > 0)) {
21971 log.u_bbr.flex5 = rsm->r_fas;
21972 log.u_bbr.bbr_substate = rsm->r_bas;
21980 log.u_bbr.bbr_substate = (uint8_t)((len + segsiz - 1)/segsiz);
21987 log.u_bbr.delRate = rsm->r_flags;
21989 log.u_bbr.delRate |= rack->r_must_retran;
21993 log.u_bbr.delRate = rack->r_must_retran;
21997 lgb = tcp_log_event(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_OUT, ERRNO_UNK,
22008 * m->m_pkthdr.len should have been set before cksum calcuration,
22019 rack->r_ctl.fsb.hoplimit = ip6->ip6_hlim = in6_selecthlim(inp, NULL);
22026 ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
22028 if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss)
22029 tp->t_flags2 |= TF2_PLPMTU_PMTUD;
22031 tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
22033 if (tp->t_state == TCPS_SYN_SENT)
22039 inp->in6p_outputopts,
22040 &inp->inp_route6,
22044 if (error == EMSGSIZE && inp->inp_route6.ro_nh != NULL)
22045 mtu = inp->inp_route6.ro_nh->nh_mtu;
22053 ip->ip_len = htons(m->m_pkthdr.len);
22055 if (inp->inp_vflag & INP_IPV6PROTO)
22056 ip->ip_ttl = in6_selecthlim(inp, NULL);
22058 rack->r_ctl.fsb.hoplimit = ip->ip_ttl;
22069 if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) {
22070 tp->t_flags2 |= TF2_PLPMTU_PMTUD;
22071 if (tp->t_port == 0 || len < V_tcp_minmss) {
22072 ip->ip_off |= htons(IP_DF);
22075 tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
22078 if (tp->t_state == TCPS_SYN_SENT)
22085 inp->inp_options,
22089 &inp->inp_route,
22092 if (error == EMSGSIZE && inp->inp_route.ro_nh != NULL)
22093 mtu = inp->inp_route.ro_nh->nh_mtu;
22097 lgb->tlb_errno = error;
22113 rack->pcm_in_progress = 1;
22114 rack->pcm_needed = 0;
22115 rack_log_pcm(rack, 7, len, rack->r_ctl.pcm_max_seg, add_flag);
22118 if (rack->lt_bw_up == 0) {
22119 rack->r_ctl.lt_timemark = tcp_tv_to_lusectick(&tv);
22120 rack->r_ctl.lt_seq = tp->snd_una;
22121 rack->lt_bw_up = 1;
22122 } else if (((rack_seq + len) - rack->r_ctl.lt_seq) > 0x7fffffff) {
22129 rack->r_ctl.lt_bw_bytes += (tp->snd_una - rack->r_ctl.lt_seq);
22130 rack->r_ctl.lt_seq = tp->snd_una;
22132 if (tmark > rack->r_ctl.lt_timemark) {
22133 rack->r_ctl.lt_bw_time += (tmark - rack->r_ctl.lt_timemark);
22134 rack->r_ctl.lt_timemark = tmark;
22138 rack->forced_ack = 0; /* If we send something zap the FA flag */
22142 rack->rc_last_sent_tlp_past_cumack = 0;
22143 rack->rc_last_sent_tlp_seq_valid = 1;
22144 rack->r_ctl.last_sent_tlp_seq = rsm->r_start;
22145 rack->r_ctl.last_sent_tlp_len = rsm->r_end - rsm->r_start;
22147 if (rack->rc_hw_nobuf) {
22148 rack->rc_hw_nobuf = 0;
22149 rack->r_ctl.rc_agg_delayed = 0;
22150 rack->r_early = 0;
22151 rack->r_late = 0;
22152 rack->r_ctl.rc_agg_early = 0;
22156 rack->rc_gp_saw_rec = 1;
22158 if (cwnd_to_use > tp->snd_ssthresh) {
22160 rack->rc_gp_saw_ca = 1;
22163 rack->rc_gp_saw_ss = 1;
22166 if (TCPS_HAVEESTABLISHED(tp->t_state) &&
22167 (tp->t_flags & TF_SACK_PERMIT) &&
22168 tp->rcv_numsacks > 0)
22178 counter_u64_add(rack_out_size[(TCP_MSS_ACCT_ATIMER-1)], 1);
22183 if ((rack->rack_no_prr == 0) &&
22186 if (rack->r_ctl.rc_prr_sndcnt >= len)
22187 rack->r_ctl.rc_prr_sndcnt -= len;
22189 rack->r_ctl.rc_prr_sndcnt = 0;
22197 rsm->r_flags &= ~RACK_TLP;
22203 (tp->snd_una == tp->snd_max))
22204 rack->r_ctl.rc_tlp_rxt_last_time = cts;
22211 tcp_seq startseq = tp->snd_max;
22215 rack->r_ctl.rc_loss_count += rsm->r_end - rsm->r_start;
22226 rack->rc_tlp_in_progress = 0;
22227 rack->r_ctl.rc_tlp_cnt_out = 0;
22235 rack->rc_tlp_in_progress = 1;
22236 rack->r_ctl.rc_tlp_cnt_out++;
22244 if ((tp->snd_una == tp->snd_max) && (len > 0)) {
22250 tp->t_acktime = ticks;
22257 ((tp->t_flags & TF_SENTSYN) == 0)) {
22258 tp->snd_max++;
22259 tp->t_flags |= TF_SENTSYN;
22262 ((tp->t_flags & TF_SENTFIN) == 0)) {
22263 tp->snd_max++;
22264 tp->t_flags |= TF_SENTFIN;
22267 tp->snd_max += len;
22268 if (rack->rc_new_rnd_needed) {
22269 rack_new_round_starts(tp, rack, tp->snd_max);
22277 if (tp->t_rtttime == 0) {
22278 tp->t_rtttime = ticks;
22279 tp->t_rtseq = startseq;
22283 ((tp->t_flags & TF_GPUTINPROG) == 0))
22294 if (rack->r_fast_output && len) {
22295 if (rack->r_ctl.fsb.left_to_send > len)
22296 rack->r_ctl.fsb.left_to_send -= len;
22298 rack->r_ctl.fsb.left_to_send = 0;
22299 if (rack->r_ctl.fsb.left_to_send < segsiz)
22300 rack->r_fast_output = 0;
22301 if (rack->r_fast_output) {
22302 rack->r_ctl.fsb.m = sbsndmbuf(sb, (tp->snd_max - tp->snd_una), &rack->r_ctl.fsb.off);
22303 rack->r_ctl.fsb.o_m_len = rack->r_ctl.fsb.m->m_len;
22304 rack->r_ctl.fsb.o_t_len = M_TRAILINGROOM(rack->r_ctl.fsb.m);
22311 ((pace_max_seg - len) > segsiz)) {
22319 n_len = (orig_len - len);
22320 orig_len -= len;
22321 pace_max_seg -= len;
22323 sb_offset = tp->snd_max - tp->snd_una;
22324 /* Re-lock for the next spin */
22331 ((orig_len - len) > segsiz)) {
22339 n_len = (orig_len - len);
22340 orig_len -= len;
22342 sb_offset = tp->snd_max - tp->snd_una;
22343 /* Re-lock for the next spin */
22351 rack->r_ctl.rc_agg_delayed = 0;
22352 rack->r_early = 0;
22353 rack->r_late = 0;
22354 rack->r_ctl.rc_agg_early = 0;
22369 tp->t_softerror = error;
22372 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
22373 tp->tcp_cnt_counters[SND_OUT_FAIL]++;
22375 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
22376 tp->tcp_proc_time[SND_OUT_FAIL] += (crtsc - ts_val);
22386 if (rack->r_ctl.crte != NULL) {
22387 tcp_trace_point(rack->rc_tp, TCP_TP_HWENOBUF);
22388 if (tcp_bblogging_on(rack->rc_tp))
22391 tcp_trace_point(rack->rc_tp, TCP_TP_ENOBUF);
22392 slot = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC);
22393 if (rack->rc_enobuf < 0x7f)
22394 rack->rc_enobuf++;
22397 if (rack->r_ctl.crte != NULL) {
22399 tcp_rl_log_enobuf(rack->r_ctl.crte);
22413 tp->t_flags &= ~TF_TSO;
22417 saved_mtu = tp->t_maxseg;
22418 tcp_mss_update(tp, -1, mtu, NULL, NULL);
22419 if (saved_mtu > tp->t_maxseg) {
22427 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
22428 tp->tcp_cnt_counters[SND_OUT_FAIL]++;
22430 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
22431 tp->tcp_proc_time[SND_OUT_FAIL] += (crtsc - ts_val);
22442 if (TCPS_HAVERCVDSYN(tp->t_state)) {
22443 tp->t_softerror = error;
22452 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
22453 tp->tcp_cnt_counters[SND_OUT_FAIL]++;
22455 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
22456 tp->tcp_proc_time[SND_OUT_FAIL] += (crtsc - ts_val);
22463 rack->rc_enobuf = 0;
22464 if (IN_FASTRECOVERY(tp->t_flags) && rsm)
22465 rack->r_ctl.retran_during_recovery += len;
22474 if (recwin > 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv))
22475 tp->rcv_adv = tp->rcv_nxt + recwin;
22477 tp->last_ack_sent = tp->rcv_nxt;
22478 tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
22508 rack->r_ent_rec_ns = 0;
22509 if (rack->r_must_retran) {
22511 rack->r_ctl.rc_out_at_rto -= (rsm->r_end - rsm->r_start);
22512 if (SEQ_GEQ(rsm->r_end, rack->r_ctl.rc_snd_max_at_rto)) {
22516 rack->r_must_retran = 0;
22517 rack->r_ctl.rc_out_at_rto = 0;
22519 } else if (SEQ_GEQ(tp->snd_max, rack->r_ctl.rc_snd_max_at_rto)) {
22524 rack->r_must_retran = 0;
22525 rack->r_ctl.rc_out_at_rto = 0;
22528 rack->r_ctl.fsb.recwin = recwin;
22529 if ((tp->t_flags & (TF_WASCRECOVERY|TF_WASFRECOVERY)) &&
22530 SEQ_GT(tp->snd_max, rack->r_ctl.rc_snd_max_at_rto)) {
22535 tp->t_flags &= ~(TF_WASCRECOVERY|TF_WASFRECOVERY);
22544 rack->r_fsb_inited &&
22545 TCPS_HAVEESTABLISHED(tp->t_state) &&
22546 ((IN_RECOVERY(tp->t_flags)) == 0) &&
22547 (rack->r_must_retran == 0) &&
22548 ((tp->t_flags & TF_NEEDFIN) == 0) &&
22551 ((orig_len - len) >= segsiz) &&
22558 rack->r_fast_output = 0;
22571 (rack->r_must_retran == 0) &&
22572 rack->r_fsb_inited &&
22573 TCPS_HAVEESTABLISHED(tp->t_state) &&
22574 ((IN_RECOVERY(tp->t_flags)) == 0) &&
22575 ((tp->t_flags & TF_NEEDFIN) == 0) &&
22578 ((orig_len - len) >= segsiz) &&
22584 if (rack->r_fast_output) {
22598 if (SEQ_GT(tp->snd_max, tp->snd_nxt))
22599 tp->snd_nxt = tp->snd_max;
22602 crtsc = get_cyclecount() - ts_val;
22604 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
22605 tp->tcp_cnt_counters[SND_OUT_DATA]++;
22607 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
22608 tp->tcp_proc_time[SND_OUT_DATA] += crtsc;
22610 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
22611 tp->tcp_cnt_counters[CNT_OF_MSS_OUT] += ((tot_len_this_send + segsiz - 1) /segsiz);
22614 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
22615 tp->tcp_cnt_counters[SND_OUT_ACK]++;
22617 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
22618 tp->tcp_proc_time[SND_OUT_ACK] += crtsc;
22633 orig_val = rack->r_ctl.rc_pace_max_segs;
22634 rack_set_pace_segments(rack->rc_tp, rack, __LINE__, NULL);
22635 if (orig_val != rack->r_ctl.rc_pace_max_segs)
22648 rack = (struct tcp_rack *)tp->t_fb_ptr;
22649 if (rack->r_ctl.rc_pace_min_segs != ctf_fixed_maxseg(tp)) {
22658 rack->r_fast_output = 0;
22659 rack->r_ctl.rc_out_at_rto = ctf_flight_size(tp,
22660 rack->r_ctl.rc_sacked);
22661 rack->r_ctl.rc_snd_max_at_rto = tp->snd_max;
22662 rack->r_must_retran = 1;
22664 TAILQ_FOREACH(rsm, &rack->r_ctl.rc_tmap, r_tnext) {
22665 rsm->r_flags |= (RACK_MUST_RXT|RACK_PMTU_CHG);
22668 sack_filter_clear(&rack->r_ctl.rack_sf, tp->snd_una);
22670 tp->snd_nxt = tp->snd_max;
22676 if (rack->dgp_on == 1)
22678 if ((rack->use_fixed_rate == 1) &&
22679 (rack->rc_always_pace == 1)) {
22686 if (rack->rc_always_pace == 1) {
22691 rack->r_ctl.pacing_method |= RACK_DGP_PACING;
22692 rack->rc_fillcw_apply_discount = 0;
22693 rack->dgp_on = 1;
22694 rack->rc_always_pace = 1;
22695 rack->rc_pace_dnd = 1;
22696 rack->use_fixed_rate = 0;
22697 if (rack->gp_ready)
22699 rack->rc_tp->t_flags2 |= TF2_SUPPORTS_MBUFQ;
22700 rack->rack_attempt_hdwr_pace = 0;
22702 rack->full_size_rxt = 1;
22703 rack->shape_rxt_to_pacing_min = 0;
22705 rack->r_use_cmp_ack = 1;
22706 if (TCPS_HAVEESTABLISHED(rack->rc_tp->t_state) &&
22707 rack->r_use_cmp_ack)
22708 rack->rc_tp->t_flags2 |= TF2_MBUF_ACKCMP;
22710 rack->rack_enable_scwnd = 1;
22712 rack->rc_gp_dyn_mul = 1;
22714 rack->r_ctl.rack_per_of_gp_ca = 100;
22716 rack->r_rr_config = 3;
22718 rack->r_ctl.rc_no_push_at_mrtt = 2;
22720 rack->rc_pace_to_cwnd = 1;
22721 rack->rc_pace_fill_if_rttin_range = 0;
22722 rack->rtt_limit_mul = 0;
22724 rack->rack_no_prr = 1;
22726 rack->r_limit_scw = 1;
22728 rack->r_ctl.rack_per_of_gp_rec = 90;
22750 * fill-cw the same settings that profile5 does
22751 * to replace DGP. It gets then the max(dgp-rate, fillcw(discounted).
22753 rack->rc_fillcw_apply_discount = 1;
22756 if (rack->rc_always_pace == 1) {
22760 rack->dgp_on = 0;
22761 rack->rc_hybrid_mode = 0;
22762 rack->use_fixed_rate = 0;
22766 rack->rc_pace_to_cwnd = 1;
22768 rack->rc_pace_to_cwnd = 0;
22771 rack->r_ctl.pacing_method |= RACK_REG_PACING;
22772 rack->rc_always_pace = 1;
22773 if (rack->rack_hibeta)
22776 rack->rc_always_pace = 0;
22779 rack->rc_rack_tmr_std_based = 1;
22783 rack->rc_rack_use_dsack = 1;
22786 rack->r_use_cmp_ack = 1;
22788 rack->r_use_cmp_ack = 0;
22790 rack->rack_no_prr = 1;
22792 rack->rack_no_prr = 0;
22794 rack->rc_gp_no_rec_chg = 1;
22796 rack->rc_gp_no_rec_chg = 0;
22797 if (rack_enable_mqueue_for_nonpaced || rack->r_use_cmp_ack) {
22798 rack->r_mbuf_queue = 1;
22799 if (TCPS_HAVEESTABLISHED(rack->rc_tp->t_state))
22800 rack->rc_tp->t_flags2 |= TF2_MBUF_ACKCMP;
22801 rack->rc_tp->t_flags2 |= TF2_SUPPORTS_MBUFQ;
22803 rack->r_mbuf_queue = 0;
22804 rack->rc_tp->t_flags2 &= ~TF2_SUPPORTS_MBUFQ;
22807 rack->rack_enable_scwnd = 1;
22809 rack->rack_enable_scwnd = 0;
22812 rack->rc_gp_dyn_mul = 1;
22814 rack->r_ctl.rack_per_of_gp_ca = rack_do_dyn_mul;
22816 rack->r_ctl.rack_per_of_gp_ca = rack_per_of_gp_ca;
22817 rack->rc_gp_dyn_mul = 0;
22819 rack->r_rr_config = 0;
22820 rack->r_ctl.rc_no_push_at_mrtt = 0;
22821 rack->rc_pace_fill_if_rttin_range = 0;
22822 rack->rtt_limit_mul = 0;
22825 rack->rack_hdw_pace_ena = 1;
22827 rack->rack_hdw_pace_ena = 0;
22829 rack->rack_no_prr = 1;
22831 rack->rack_no_prr = 0;
22833 rack->r_limit_scw = 1;
22835 rack->r_limit_scw = 0;
22851 * No space yikes -- fail out..
22855 dol->optname = sopt_name;
22856 dol->optval = loptval;
22857 TAILQ_INSERT_TAIL(&rack->r_ctl.opt_list, dol, next);
22873 rack->use_fixed_rate = 0;
22874 rack->r_ctl.rc_fixed_pacing_rate_rec = 0;
22875 rack->r_ctl.rc_fixed_pacing_rate_ca = 0;
22876 rack->r_ctl.rc_fixed_pacing_rate_ss = 0;
22878 sft = tcp_req_alloc_req_full(rack->rc_tp, &hybrid->req, tcp_tv_to_lusectick(&tv), 0);
22880 rack->rc_tp->tcp_hybrid_error++;
22882 seq = rack->rc_tp->snd_una + rack->rc_tp->t_inpcb.inp_socket->so_snd.sb_ccc;
22887 hybrid->hybrid_flags &= TCP_HYBRID_PACING_USER_MASK;
22889 seq = sft->start_seq;
22890 if ((hybrid->hybrid_flags & TCP_HYBRID_PACING_ENABLE) == 0) {
22892 if (rack->rc_hybrid_mode) {
22894 rack->rc_tp->tcp_hybrid_stop++;
22899 if (rack->dgp_on == 0) {
22907 rack->rc_tp->tcp_hybrid_error++;
22916 if (rack->rc_hybrid_mode == 0) {
22919 rack->r_ctl.pacing_method |= RACK_REG_PACING;
22920 rack->rc_hybrid_mode = 1;
22924 if (rack->r_ctl.pacing_method & RACK_DGP_PACING) {
22929 rack->r_ctl.pacing_method &= ~RACK_DGP_PACING;
22933 sft->hybrid_flags = hybrid->hybrid_flags | TCP_HYBRID_PACING_WASSET;
22934 if (hybrid->hybrid_flags & TCP_HYBRID_PACING_CSPR)
22935 sft->cspr = hybrid->cspr;
22937 sft->cspr = 0;
22938 if (hybrid->hybrid_flags & TCP_HYBRID_PACING_H_MS)
22939 sft->hint_maxseg = hybrid->hint_maxseg;
22941 sft->hint_maxseg = 0;
22942 rack->rc_tp->tcp_hybrid_start++;
22954 si->bytes_transmitted = tp->t_sndbytes;
22955 si->bytes_retransmitted = tp->t_snd_rxt_bytes;
22986 rack->rc_rack_tmr_std_based = 1;
22988 rack->rc_rack_tmr_std_based = 0;
22991 rack->rc_rack_use_dsack = 1;
22993 rack->rc_rack_use_dsack = 0;
23000 rack->r_ctl.pace_len_divisor = rack_default_pacing_divisor;
23003 rack->r_ctl.pace_len_divisor = RL_MIN_DIVISOR;
23005 rack->r_ctl.pace_len_divisor = optval;
23011 rack->rack_hibeta = 1;
23017 rack->r_ctl.saved_hibeta = optval;
23018 if (rack->rc_pacing_cc_set)
23020 rack->r_ctl.rc_saved_beta.beta = optval;
23022 if (rack->rc_pacing_cc_set == 0)
23025 rack->rack_hibeta = 0;
23026 if (rack->rc_pacing_cc_set)
23035 rack->r_ctl.timer_slop = optval;
23036 if (rack->rc_tp->t_srtt) {
23041 RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
23043 rack->r_ctl.timer_slop);
23048 if (strcmp(tp->t_cc->name, CCALGONAME_NEWRENO) != 0) {
23053 if (rack->rc_pacing_cc_set) {
23062 if (CC_ALGO(tp)->ctl_output != NULL)
23063 error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, &sopt, &opt);
23071 rack->r_ctl.rc_saved_beta.beta_ecn = optval;
23072 rack->r_ctl.rc_saved_beta.newreno_flags = CC_NEWRENO_BETA_ECN_ENABLED;
23078 if (rack->gp_ready) {
23083 rack->defer_options = 1;
23085 rack->defer_options = 0;
23090 rack->r_ctl.req_measurements = optval;
23097 rack->r_use_labc_for_rec = 1;
23099 rack->r_use_labc_for_rec = 0;
23104 rack->rc_labc = optval;
23111 rack->r_up_only = 1;
23113 rack->r_up_only = 0;
23117 rack->r_ctl.fillcw_cap = loptval;
23121 if ((rack->dgp_on == 1) &&
23122 (rack->r_ctl.pacing_method & RACK_DGP_PACING)) {
23134 rack->r_ctl.pacing_method |= RACK_REG_PACING;
23136 rack->r_ctl.pacing_method &= ~RACK_DGP_PACING;
23138 rack->r_ctl.bw_rate_cap = loptval;
23145 if (rack->r_ctl.side_chan_dis_mask & HYBRID_DIS_MASK) {
23153 rack->r_ctl.side_chan_dis_mask = optval;
23155 rack->r_ctl.side_chan_dis_mask = 0;
23163 if ((optval == 0) && (tp->t_flags2 & TF2_MBUF_ACKCMP)) {
23166 } else if ((optval == 1) && (rack->r_use_cmp_ack == 0)) {
23167 rack->r_use_cmp_ack = 1;
23168 rack->r_mbuf_queue = 1;
23169 tp->t_flags2 |= TF2_SUPPORTS_MBUFQ;
23171 if (rack->r_use_cmp_ack && TCPS_HAVEESTABLISHED(tp->t_state))
23172 tp->t_flags2 |= TF2_MBUF_ACKCMP;
23177 rack->r_limit_scw = 1;
23179 rack->r_limit_scw = 0;
23187 rack->rc_pace_to_cwnd = 0;
23189 rack->rc_pace_to_cwnd = 1;
23194 rack->rc_pace_fill_if_rttin_range = 1;
23195 rack->rtt_limit_mul = optval;
23197 rack->rc_pace_fill_if_rttin_range = 0;
23198 rack->rtt_limit_mul = 0;
23204 rack->r_ctl.rc_no_push_at_mrtt = 0;
23206 rack->r_ctl.rc_no_push_at_mrtt = optval;
23213 rack->rack_enable_scwnd = 0;
23215 rack->rack_enable_scwnd = 1;
23218 /* Now do we use the LRO mbuf-queue feature */
23220 if (optval || rack->r_use_cmp_ack)
23221 rack->r_mbuf_queue = 1;
23223 rack->r_mbuf_queue = 0;
23224 if (rack->r_mbuf_queue || rack->rc_always_pace || rack->r_use_cmp_ack)
23225 tp->t_flags2 |= TF2_SUPPORTS_MBUFQ;
23227 tp->t_flags2 &= ~TF2_SUPPORTS_MBUFQ;
23232 rack->rack_rec_nonrxt_use_cr = 0;
23234 rack->rack_rec_nonrxt_use_cr = 1;
23239 rack->rack_no_prr = 0;
23241 rack->rack_no_prr = 1;
23243 rack->no_prr_addback = 1;
23249 rack->cspr_is_fcc = 1;
23251 rack->cspr_is_fcc = 0;
23256 rack->rc_gp_dyn_mul = 0;
23258 rack->rc_gp_dyn_mul = 1;
23264 rack->r_ctl.rack_per_of_gp_ca = optval;
23277 rack->rack_tlp_threshold_use = optval;
23282 rack->r_ctl.rc_tlp_cwnd_reduce = optval;
23291 if (rack->r_ctl.side_chan_dis_mask & CCSP_DIS_MASK) {
23296 if (rack->rc_always_pace) {
23300 rack->r_ctl.pacing_method |= RACK_REG_PACING;
23301 rack->rc_always_pace = 1;
23302 if (rack->rack_hibeta)
23310 if (rack->rc_always_pace == 1) {
23314 if (rack->r_mbuf_queue || rack->rc_always_pace || rack->r_use_cmp_ack)
23315 tp->t_flags2 |= TF2_SUPPORTS_MBUFQ;
23317 tp->t_flags2 &= ~TF2_SUPPORTS_MBUFQ;
23327 rack->r_ctl.init_rate = val;
23328 if (rack->rc_always_pace)
23337 rack->rc_force_max_seg = 1;
23339 rack->rc_force_max_seg = 0;
23343 rack->r_ctl.rc_user_set_min_segs = (0x0000ffff & optval);
23349 if ((rack->dgp_on == 1) &&
23350 (rack->r_ctl.pacing_method & RACK_DGP_PACING)) {
23352 * If we set a max-seg and are doing DGP then
23363 rack->r_ctl.pacing_method |= RACK_REG_PACING;
23365 rack->r_ctl.pacing_method &= ~RACK_DGP_PACING;
23368 rack->rc_user_set_max_segs = optval;
23370 rack->rc_user_set_max_segs = MAX_USER_SET_SEG;
23376 if (rack->r_ctl.side_chan_dis_mask & CCSP_DIS_MASK) {
23380 if (rack->dgp_on) {
23388 rack->r_ctl.rc_fixed_pacing_rate_rec = optval;
23389 if (rack->r_ctl.rc_fixed_pacing_rate_ca == 0)
23390 rack->r_ctl.rc_fixed_pacing_rate_ca = optval;
23391 if (rack->r_ctl.rc_fixed_pacing_rate_ss == 0)
23392 rack->r_ctl.rc_fixed_pacing_rate_ss = optval;
23393 rack->use_fixed_rate = 1;
23394 if (rack->rack_hibeta)
23397 rack->r_ctl.rc_fixed_pacing_rate_ss,
23398 rack->r_ctl.rc_fixed_pacing_rate_ca,
23399 rack->r_ctl.rc_fixed_pacing_rate_rec, 0, 0, 8,
23406 if (rack->r_ctl.side_chan_dis_mask & CCSP_DIS_MASK) {
23410 if (rack->dgp_on) {
23418 rack->r_ctl.rc_fixed_pacing_rate_ss = optval;
23419 if (rack->r_ctl.rc_fixed_pacing_rate_ca == 0)
23420 rack->r_ctl.rc_fixed_pacing_rate_ca = optval;
23421 if (rack->r_ctl.rc_fixed_pacing_rate_rec == 0)
23422 rack->r_ctl.rc_fixed_pacing_rate_rec = optval;
23423 rack->use_fixed_rate = 1;
23424 if (rack->rack_hibeta)
23427 rack->r_ctl.rc_fixed_pacing_rate_ss,
23428 rack->r_ctl.rc_fixed_pacing_rate_ca,
23429 rack->r_ctl.rc_fixed_pacing_rate_rec, 0, 0, 8,
23436 if (rack->r_ctl.side_chan_dis_mask & CCSP_DIS_MASK) {
23440 if (rack->dgp_on) {
23448 rack->r_ctl.rc_fixed_pacing_rate_ca = optval;
23449 if (rack->r_ctl.rc_fixed_pacing_rate_ss == 0)
23450 rack->r_ctl.rc_fixed_pacing_rate_ss = optval;
23451 if (rack->r_ctl.rc_fixed_pacing_rate_rec == 0)
23452 rack->r_ctl.rc_fixed_pacing_rate_rec = optval;
23453 rack->use_fixed_rate = 1;
23454 if (rack->rack_hibeta)
23457 rack->r_ctl.rc_fixed_pacing_rate_ss,
23458 rack->r_ctl.rc_fixed_pacing_rate_ca,
23459 rack->r_ctl.rc_fixed_pacing_rate_rec, 0, 0, 8,
23464 rack->r_ctl.rack_per_of_gp_rec = optval;
23466 rack->r_ctl.rack_per_of_gp_ss,
23467 rack->r_ctl.rack_per_of_gp_ca,
23468 rack->r_ctl.rack_per_of_gp_rec, 0, 0, 1,
23482 rack->r_ctl.rack_per_of_gp_ca = ca;
23484 rack->r_ctl.rack_per_of_gp_ss,
23485 rack->r_ctl.rack_per_of_gp_ca,
23486 rack->r_ctl.rack_per_of_gp_rec, 0, 0, 1,
23500 rack->r_ctl.rack_per_of_gp_ss = ss;
23502 rack->r_ctl.rack_per_of_gp_ss,
23503 rack->r_ctl.rack_per_of_gp_ca,
23504 rack->r_ctl.rack_per_of_gp_rec, 0, 0, 1,
23510 rack->r_rr_config = optval;
23512 rack->r_rr_config = 0;
23516 rack->rc_pace_dnd = 1;
23518 rack->rc_pace_dnd = 0;
23523 if (rack->r_rack_hw_rate_caps == 0)
23524 rack->r_rack_hw_rate_caps = 1;
23528 rack->r_rack_hw_rate_caps = 0;
23535 rack->r_ctl.rack_per_upper_bound_ca = val;
23537 rack->r_ctl.rack_per_upper_bound_ss = val;
23542 rack->r_ctl.gp_rnd_thresh = optval & 0x0ff;
23544 rack->r_ctl.gate_to_fs = 1;
23546 rack->r_ctl.gate_to_fs = 0;
23549 rack->r_ctl.use_gp_not_last = 1;
23551 rack->r_ctl.use_gp_not_last = 0;
23558 rack->r_ctl.gp_gain_req = v;
23562 rack->rc_initial_ss_comp = 1;
23563 rack->r_ctl.gp_rnd_thresh = 0;
23568 rack->r_ctl.rc_split_limit = optval;
23573 if (rack->rack_hdrw_pacing == 0) {
23574 rack->rack_hdw_pace_ena = 1;
23575 rack->rack_attempt_hdwr_pace = 0;
23579 rack->rack_hdw_pace_ena = 0;
23581 if (rack->r_ctl.crte != NULL) {
23582 rack->rack_hdrw_pacing = 0;
23583 rack->rack_attempt_hdwr_pace = 0;
23584 tcp_rel_pacing_rate(rack->r_ctl.crte, tp);
23585 rack->r_ctl.crte = NULL;
23594 rack->r_ctl.rc_prr_sendalot = optval;
23597 /* Minimum time between rack t-o's in ms */
23599 rack->r_ctl.rc_min_to = optval;
23604 rack->r_ctl.rc_early_recovery_segs = optval;
23609 tp->t_ccv.flags |= CCF_HYSTART_ALLOWED;
23611 tp->t_ccv.flags |= CCF_HYSTART_CAN_SH_CWND;
23613 tp->t_ccv.flags |= CCF_HYSTART_CONS_SSTH;
23615 tp->t_ccv.flags &= ~(CCF_HYSTART_ALLOWED|CCF_HYSTART_CAN_SH_CWND|CCF_HYSTART_CONS_SSTH);
23623 rack->r_ctl.rc_reorder_shift = optval;
23630 rack->r_ctl.rc_reorder_fade = optval;
23636 rack->r_ctl.rc_tlp_threshold = optval;
23643 rack->use_rack_rr = 1;
23645 rack->use_rack_rr = 0;
23648 /* RACK added ms i.e. rack-rtt + reord + N */
23650 rack->r_ctl.rc_pkt_delay = optval;
23655 tp->t_delayed_ack = 0;
23657 tp->t_delayed_ack = 1;
23658 if (tp->t_flags & TF_DELACK) {
23659 tp->t_flags &= ~TF_DELACK;
23660 tp->t_flags |= TF_ACKNOW;
23674 rack->r_ctl.rc_rate_sample_method = optval;
23679 rack->r_use_hpts_min = 1;
23681 * Must be between 2 - 80% to be a reduction else
23685 rack->r_ctl.max_reduction = optval;
23688 rack->r_use_hpts_min = 0;
23693 rack->rc_gp_no_rec_chg = 1;
23695 rack->rc_gp_no_rec_chg = 0;
23700 rack->rc_skip_timely = 1;
23701 rack->r_ctl.rack_per_of_gp_rec = 90;
23702 rack->r_ctl.rack_per_of_gp_ca = 100;
23703 rack->r_ctl.rack_per_of_gp_ss = 250;
23705 rack->rc_skip_timely = 0;
23710 rack->use_lesser_lt_bw = 0;
23711 rack->dis_lt_bw = 1;
23713 rack->use_lesser_lt_bw = 1;
23714 rack->dis_lt_bw = 0;
23716 rack->use_lesser_lt_bw = 0;
23717 rack->dis_lt_bw = 0;
23723 rack->rc_allow_data_af_clo = 1;
23725 rack->rc_allow_data_af_clo = 0;
23740 * apply a read-lock to the parent (we are already
23751 if (par->t_fb != tp->t_fb) {
23757 dest = (struct tcp_rack *)tp->t_fb_ptr;
23758 src = (struct tcp_rack *)par->t_fb_ptr;
23764 /* Now copy out anything we wish to inherit i.e. things in socket-options */
23766 if ((src->dgp_on) && (dest->dgp_on == 0)) {
23772 if (dest->full_size_rxt != src->full_size_rxt) {
23773 dest->full_size_rxt = src->full_size_rxt;
23776 if (dest->shape_rxt_to_pacing_min != src->shape_rxt_to_pacing_min) {
23777 dest->shape_rxt_to_pacing_min = src->shape_rxt_to_pacing_min;
23781 if (dest->rc_rack_tmr_std_based != src->rc_rack_tmr_std_based) {
23782 dest->rc_rack_tmr_std_based = src->rc_rack_tmr_std_based;
23785 if (dest->rc_rack_use_dsack != src->rc_rack_use_dsack) {
23786 dest->rc_rack_use_dsack = src->rc_rack_use_dsack;
23790 if (dest->r_ctl.pace_len_divisor != src->r_ctl.pace_len_divisor) {
23791 dest->r_ctl.pace_len_divisor = src->r_ctl.pace_len_divisor;
23795 if (src->rack_hibeta != dest->rack_hibeta) {
23797 if (src->rack_hibeta) {
23798 dest->r_ctl.rc_saved_beta.beta = src->r_ctl.rc_saved_beta.beta;
23799 dest->rack_hibeta = 1;
23801 dest->rack_hibeta = 0;
23805 if (dest->r_ctl.timer_slop != src->r_ctl.timer_slop) {
23806 dest->r_ctl.timer_slop = src->r_ctl.timer_slop;
23810 if (dest->r_ctl.rc_saved_beta.beta_ecn != src->r_ctl.rc_saved_beta.beta_ecn) {
23811 dest->r_ctl.rc_saved_beta.beta_ecn = src->r_ctl.rc_saved_beta.beta_ecn;
23814 if (dest->r_ctl.rc_saved_beta.newreno_flags != src->r_ctl.rc_saved_beta.newreno_flags) {
23815 dest->r_ctl.rc_saved_beta.newreno_flags = src->r_ctl.rc_saved_beta.newreno_flags;
23820 if (dest->r_ctl.req_measurements != src->r_ctl.req_measurements) {
23821 dest->r_ctl.req_measurements = src->r_ctl.req_measurements;
23825 if (dest->r_up_only != src->r_up_only) {
23826 dest->r_up_only = src->r_up_only;
23830 if (dest->r_ctl.fillcw_cap != src->r_ctl.fillcw_cap) {
23831 dest->r_ctl.fillcw_cap = src->r_ctl.fillcw_cap;
23835 if (dest->r_ctl.bw_rate_cap != src->r_ctl.bw_rate_cap) {
23836 dest->r_ctl.bw_rate_cap = src->r_ctl.bw_rate_cap;
23841 if (dest->r_ctl.side_chan_dis_mask != src->r_ctl.side_chan_dis_mask) {
23842 dest->r_ctl.side_chan_dis_mask = src->r_ctl.side_chan_dis_mask;
23846 if (dest->r_limit_scw != src->r_limit_scw) {
23847 dest->r_limit_scw = src->r_limit_scw;
23851 if (dest->rc_pace_to_cwnd != src->rc_pace_to_cwnd) {
23852 dest->rc_pace_to_cwnd = src->rc_pace_to_cwnd;
23855 if (dest->rc_pace_fill_if_rttin_range != src->rc_pace_fill_if_rttin_range) {
23856 dest->rc_pace_fill_if_rttin_range = src->rc_pace_fill_if_rttin_range;
23859 if (dest->rtt_limit_mul != src->rtt_limit_mul) {
23860 dest->rtt_limit_mul = src->rtt_limit_mul;
23864 if (dest->r_ctl.rc_no_push_at_mrtt != src->r_ctl.rc_no_push_at_mrtt) {
23865 dest->r_ctl.rc_no_push_at_mrtt = src->r_ctl.rc_no_push_at_mrtt;
23869 if (dest->rack_enable_scwnd != src->rack_enable_scwnd) {
23870 dest->rack_enable_scwnd = src->rack_enable_scwnd;
23874 if (dest->r_use_cmp_ack != src->r_use_cmp_ack) {
23875 dest->r_use_cmp_ack = src->r_use_cmp_ack;
23879 if (dest->r_mbuf_queue != src->r_mbuf_queue) {
23880 dest->r_mbuf_queue = src->r_mbuf_queue;
23884 if (dest->r_mbuf_queue != src->r_mbuf_queue) {
23885 dest->r_mbuf_queue = src->r_mbuf_queue;
23888 if (dest->r_mbuf_queue || dest->rc_always_pace || dest->r_use_cmp_ack) {
23889 tp->t_flags2 |= TF2_SUPPORTS_MBUFQ;
23891 tp->t_flags2 &= ~TF2_SUPPORTS_MBUFQ;
23893 if (dest->r_use_cmp_ack && TCPS_HAVEESTABLISHED(tp->t_state)) {
23894 tp->t_flags2 |= TF2_MBUF_ACKCMP;
23897 if (dest->rack_rec_nonrxt_use_cr != src->rack_rec_nonrxt_use_cr) {
23898 dest->rack_rec_nonrxt_use_cr = src->rack_rec_nonrxt_use_cr;
23902 if (dest->rack_no_prr != src->rack_no_prr) {
23903 dest->rack_no_prr = src->rack_no_prr;
23906 if (dest->no_prr_addback != src->no_prr_addback) {
23907 dest->no_prr_addback = src->no_prr_addback;
23911 if (dest->cspr_is_fcc != src->cspr_is_fcc) {
23912 dest->cspr_is_fcc = src->cspr_is_fcc;
23916 if (dest->rc_gp_dyn_mul != src->rc_gp_dyn_mul) {
23917 dest->rc_gp_dyn_mul = src->rc_gp_dyn_mul;
23920 if (dest->r_ctl.rack_per_of_gp_ca != src->r_ctl.rack_per_of_gp_ca) {
23921 dest->r_ctl.rack_per_of_gp_ca = src->r_ctl.rack_per_of_gp_ca;
23925 if (dest->rack_tlp_threshold_use != src->rack_tlp_threshold_use) {
23926 dest->rack_tlp_threshold_use = src->rack_tlp_threshold_use;
23931 if (dest->r_ctl.init_rate != src->r_ctl.init_rate) {
23932 dest->r_ctl.init_rate = src->r_ctl.init_rate;
23936 if (dest->rc_force_max_seg != src->rc_force_max_seg) {
23937 dest->rc_force_max_seg = src->rc_force_max_seg;
23941 if (dest->r_ctl.rc_user_set_min_segs != src->r_ctl.rc_user_set_min_segs) {
23942 dest->r_ctl.rc_user_set_min_segs = src->r_ctl.rc_user_set_min_segs;
23947 if (dest->r_ctl.rc_fixed_pacing_rate_ca != src->r_ctl.rc_fixed_pacing_rate_ca) {
23948 dest->r_ctl.rc_fixed_pacing_rate_ca = src->r_ctl.rc_fixed_pacing_rate_ca;
23951 if (dest->r_ctl.rc_fixed_pacing_rate_ss != src->r_ctl.rc_fixed_pacing_rate_ss) {
23952 dest->r_ctl.rc_fixed_pacing_rate_ss = src->r_ctl.rc_fixed_pacing_rate_ss;
23955 if (dest->r_ctl.rc_fixed_pacing_rate_rec != src->r_ctl.rc_fixed_pacing_rate_rec) {
23956 dest->r_ctl.rc_fixed_pacing_rate_rec = src->r_ctl.rc_fixed_pacing_rate_rec;
23960 if (dest->r_ctl.rack_per_of_gp_rec != src->r_ctl.rack_per_of_gp_rec) {
23961 dest->r_ctl.rack_per_of_gp_rec = src->r_ctl.rack_per_of_gp_rec;
23964 if (dest->r_ctl.rack_per_of_gp_ca != src->r_ctl.rack_per_of_gp_ca) {
23965 dest->r_ctl.rack_per_of_gp_ca = src->r_ctl.rack_per_of_gp_ca;
23969 if (dest->r_ctl.rack_per_of_gp_ss != src->r_ctl.rack_per_of_gp_ss) {
23970 dest->r_ctl.rack_per_of_gp_ss = src->r_ctl.rack_per_of_gp_ss;
23974 if (dest->r_rr_config != src->r_rr_config) {
23975 dest->r_rr_config = src->r_rr_config;
23979 if (dest->rc_pace_dnd != src->rc_pace_dnd) {
23980 dest->rc_pace_dnd = src->rc_pace_dnd;
23984 if (dest->r_rack_hw_rate_caps != src->r_rack_hw_rate_caps) {
23985 dest->r_rack_hw_rate_caps = src->r_rack_hw_rate_caps;
23989 if (dest->r_ctl.rack_per_upper_bound_ca != src->r_ctl.rack_per_upper_bound_ca) {
23990 dest->r_ctl.rack_per_upper_bound_ca = src->r_ctl.rack_per_upper_bound_ca;
23993 if (dest->r_ctl.rack_per_upper_bound_ss != src->r_ctl.rack_per_upper_bound_ss) {
23994 dest->r_ctl.rack_per_upper_bound_ss = src->r_ctl.rack_per_upper_bound_ss;
23998 if (dest->r_ctl.gp_rnd_thresh != src->r_ctl.gp_rnd_thresh) {
23999 dest->r_ctl.gp_rnd_thresh = src->r_ctl.gp_rnd_thresh;
24002 if (dest->r_ctl.gate_to_fs != src->r_ctl.gate_to_fs) {
24003 dest->r_ctl.gate_to_fs = src->r_ctl.gate_to_fs;
24006 if (dest->r_ctl.use_gp_not_last != src->r_ctl.use_gp_not_last) {
24007 dest->r_ctl.use_gp_not_last = src->r_ctl.use_gp_not_last;
24010 if (dest->r_ctl.gp_gain_req != src->r_ctl.gp_gain_req) {
24011 dest->r_ctl.gp_gain_req = src->r_ctl.gp_gain_req;
24015 if (dest->rack_hdw_pace_ena != src->rack_hdw_pace_ena) {
24016 dest->rack_hdw_pace_ena = src->rack_hdw_pace_ena;
24019 if (dest->rack_attempt_hdwr_pace != src->rack_attempt_hdwr_pace) {
24020 dest->rack_attempt_hdwr_pace = src->rack_attempt_hdwr_pace;
24024 if (dest->r_ctl.rc_prr_sendalot != src->r_ctl.rc_prr_sendalot) {
24025 dest->r_ctl.rc_prr_sendalot = src->r_ctl.rc_prr_sendalot;
24029 if (dest->r_ctl.rc_min_to != src->r_ctl.rc_min_to) {
24030 dest->r_ctl.rc_min_to = src->r_ctl.rc_min_to;
24034 if (dest->r_ctl.rc_early_recovery_segs != src->r_ctl.rc_early_recovery_segs) {
24035 dest->r_ctl.rc_early_recovery_segs = src->r_ctl.rc_early_recovery_segs;
24039 if (par->t_ccv.flags != tp->t_ccv.flags) {
24041 if (par->t_ccv.flags & CCF_HYSTART_ALLOWED) {
24042 tp->t_ccv.flags |= CCF_HYSTART_ALLOWED;
24044 tp->t_ccv.flags |= CCF_HYSTART_CAN_SH_CWND;
24046 tp->t_ccv.flags |= CCF_HYSTART_CONS_SSTH;
24048 tp->t_ccv.flags &= ~(CCF_HYSTART_ALLOWED|CCF_HYSTART_CAN_SH_CWND|CCF_HYSTART_CONS_SSTH);
24052 if (dest->r_ctl.rc_reorder_shift != src->r_ctl.rc_reorder_shift) {
24053 dest->r_ctl.rc_reorder_shift = src->r_ctl.rc_reorder_shift;
24057 if (dest->r_ctl.rc_reorder_fade != src->r_ctl.rc_reorder_fade) {
24058 dest->r_ctl.rc_reorder_fade = src->r_ctl.rc_reorder_fade;
24062 if (dest->r_ctl.rc_tlp_threshold != src->r_ctl.rc_tlp_threshold) {
24063 dest->r_ctl.rc_tlp_threshold = src->r_ctl.rc_tlp_threshold;
24067 if (dest->use_rack_rr != src->use_rack_rr) {
24068 dest->use_rack_rr = src->use_rack_rr;
24072 if (dest->r_ctl.rc_pkt_delay != src->r_ctl.rc_pkt_delay) {
24073 dest->r_ctl.rc_pkt_delay = src->r_ctl.rc_pkt_delay;
24078 if (dest->r_ctl.rc_rate_sample_method != src->r_ctl.rc_rate_sample_method) {
24079 dest->r_ctl.rc_rate_sample_method = src->r_ctl.rc_rate_sample_method;
24083 if (dest->r_use_hpts_min != src->r_use_hpts_min) {
24084 dest->r_use_hpts_min = src->r_use_hpts_min;
24087 if (dest->r_ctl.max_reduction != src->r_ctl.max_reduction) {
24088 dest->r_ctl.max_reduction = src->r_ctl.max_reduction;
24092 if (dest->rc_gp_no_rec_chg != src->rc_gp_no_rec_chg) {
24093 dest->rc_gp_no_rec_chg = src->rc_gp_no_rec_chg;
24096 if (dest->rc_skip_timely != src->rc_skip_timely) {
24097 dest->rc_skip_timely = src->rc_skip_timely;
24101 if (dest->rc_allow_data_af_clo != src->rc_allow_data_af_clo) {
24102 dest->rc_allow_data_af_clo = src->rc_allow_data_af_clo;
24106 if (src->use_lesser_lt_bw != dest->use_lesser_lt_bw) {
24107 dest->use_lesser_lt_bw = src->use_lesser_lt_bw;
24110 if (dest->dis_lt_bw != src->dis_lt_bw) {
24111 dest->dis_lt_bw = src->dis_lt_bw;
24124 TAILQ_FOREACH_SAFE(dol, &rack->r_ctl.opt_list, next, sdol) {
24125 TAILQ_REMOVE(&rack->r_ctl.opt_list, dol, next);
24127 s_optval = (uint32_t)dol->optval;
24128 (void)rack_process_option(rack->rc_tp, rack, dol->optname, s_optval, dol->optval, NULL);
24139 rack = (struct tcp_rack *)tp->t_fb_ptr;
24141 rack->r_ctl.fsb.hw_tls = 1;
24143 rack->r_ctl.fsb.hw_tls = 0;
24161 rack = (struct tcp_rack *)tp->t_fb_ptr;
24162 if (rack->r_ctl.rc_hpts_flags) {
24164 if ((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) == PACE_PKT_OUTPUT){
24168 if (TSTMP_GEQ(cts, rack->r_ctl.rc_last_output_to))
24170 } else if ((rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) != 0) {
24174 if (TSTMP_GEQ(cts, rack->r_ctl.rc_timer_exp))
24208 * socket option arguments. When it re-acquires the lock after the copy, it
24224 rack = (struct tcp_rack *)tp->t_fb_ptr;
24230 ip = (struct ip *)rack->r_ctl.fsb.tcp_ip_hdr;
24233 switch (sopt->sopt_level) {
24236 MPASS(inp->inp_vflag & INP_IPV6PROTO);
24237 switch (sopt->sopt_name) {
24247 switch (sopt->sopt_name) {
24252 ip->ip_tos = rack->rc_inp->inp_ip_tos;
24258 ip->ip_ttl = rack->rc_inp->inp_ip_ttl;
24266 switch (sopt->sopt_name) {
24267 case SO_PEERPRIO: /* SC-URL:bs */
24269 if (inp->inp_socket) {
24270 rack->client_bufferlvl = inp->inp_socket->so_peerprio;
24278 switch (sopt->sopt_name) {
24295 case TCP_PACING_RATE_CAP: /* URL:cap -- used by side-channel */
24296 case TCP_HDWR_UP_ONLY: /* URL:uponly -- hardware pacing boolean */
24356 if ((sopt->sopt_name == TCP_PACING_RATE_CAP) ||
24357 (sopt->sopt_name == TCP_FILLCW_RATE_CAP)) {
24360 * We truncate it down to 32 bits for the socket-option trace this
24364 } else if (sopt->sopt_name == TCP_HYBRID_PACING) {
24374 if (tp->t_fb != &__tcp_rack) {
24378 if (rack->defer_options && (rack->gp_ready == 0) &&
24379 (sopt->sopt_name != TCP_DEFER_OPTIONS) &&
24380 (sopt->sopt_name != TCP_HYBRID_PACING) &&
24381 (sopt->sopt_name != TCP_RACK_SET_RXT_OPTIONS) &&
24382 (sopt->sopt_name != TCP_RACK_PACING_BETA_ECN) &&
24383 (sopt->sopt_name != TCP_RACK_MEASURE_CNT)) {
24385 if (rack_add_deferred_option(rack, sopt->sopt_name, loptval)) {
24394 error = rack_process_option(tp, rack, sopt->sopt_name, optval, loptval, &hybrid);
24406 ti->tcpi_state = tp->t_state;
24407 if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
24408 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
24409 if (tp->t_flags & TF_SACK_PERMIT)
24410 ti->tcpi_options |= TCPI_OPT_SACK;
24411 if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
24412 ti->tcpi_options |= TCPI_OPT_WSCALE;
24413 ti->tcpi_snd_wscale = tp->snd_scale;
24414 ti->tcpi_rcv_wscale = tp->rcv_scale;
24416 if (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))
24417 ti->tcpi_options |= TCPI_OPT_ECN;
24418 if (tp->t_flags & TF_FASTOPEN)
24419 ti->tcpi_options |= TCPI_OPT_TFO;
24421 ti->tcpi_last_data_recv = ((uint32_t)ticks - tp->t_rcvtime) * tick;
24423 ti->tcpi_rtt = tp->t_srtt;
24424 ti->tcpi_rttvar = tp->t_rttvar;
24425 ti->tcpi_rto = tp->t_rxtcur;
24426 ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
24427 ti->tcpi_snd_cwnd = tp->snd_cwnd;
24429 * FreeBSD-specific extension fields for tcp_info.
24431 ti->tcpi_rcv_space = tp->rcv_wnd;
24432 ti->tcpi_rcv_nxt = tp->rcv_nxt;
24433 ti->tcpi_snd_wnd = tp->snd_wnd;
24434 ti->tcpi_snd_bwnd = 0; /* Unused, kept for compat. */
24435 ti->tcpi_snd_nxt = tp->snd_nxt;
24436 ti->tcpi_snd_mss = tp->t_maxseg;
24437 ti->tcpi_rcv_mss = tp->t_maxseg;
24438 ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
24439 ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
24440 ti->tcpi_snd_zerowin = tp->t_sndzerowin;
24441 ti->tcpi_total_tlp = tp->t_sndtlppack;
24442 ti->tcpi_total_tlp_bytes = tp->t_sndtlpbyte;
24443 ti->tcpi_rttmin = tp->t_rttlow;
24445 memcpy(&ti->tcpi_rxsyninfo, &tp->t_rxsyninfo, sizeof(struct tcpsyninfo));
24448 if (tp->t_flags & TF_TOE) {
24449 ti->tcpi_options |= TCPI_OPT_TOE;
24470 rack = (struct tcp_rack *)tp->t_fb_ptr;
24475 switch (sopt->sopt_name) {
24490 if (strcmp(tp->t_cc->name, CCALGONAME_NEWRENO) != 0)
24492 else if (rack->rc_pacing_cc_set == 0)
24493 optval = rack->r_ctl.rc_saved_beta.beta;
24500 if (tp->t_ccv.cc_data)
24501 optval = ((struct newreno *)tp->t_ccv.cc_data)->beta;
24514 if (strcmp(tp->t_cc->name, CCALGONAME_NEWRENO) != 0)
24516 else if (rack->rc_pacing_cc_set == 0)
24517 optval = rack->r_ctl.rc_saved_beta.beta_ecn;
24524 if (tp->t_ccv.cc_data)
24525 optval = ((struct newreno *)tp->t_ccv.cc_data)->beta_ecn;
24532 if (rack->rc_rack_tmr_std_based) {
24535 if (rack->rc_rack_use_dsack) {
24541 if (tp->t_ccv.flags & CCF_HYSTART_ALLOWED) {
24543 if (tp->t_ccv.flags & CCF_HYSTART_CAN_SH_CWND)
24545 if (tp->t_ccv.flags & CCF_HYSTART_CONS_SSTH)
24556 optval = rack->rack_hibeta;
24559 optval = rack->defer_options;
24562 optval = rack->r_ctl.req_measurements;
24565 optval = rack->r_use_labc_for_rec;
24568 optval = rack->rc_labc;
24571 optval= rack->r_up_only;
24574 loptval = rack->r_ctl.fillcw_cap;
24577 loptval = rack->r_ctl.bw_rate_cap;
24584 optval = rack->r_ctl.side_chan_dis_mask;
24591 optval = rack->r_use_cmp_ack;
24594 optval = rack->rc_pace_to_cwnd;
24597 optval = rack->r_ctl.rc_no_push_at_mrtt;
24600 optval = rack->rack_enable_scwnd;
24603 optval = rack->rack_rec_nonrxt_use_cr;
24606 if (rack->rack_no_prr == 1)
24608 else if (rack->no_prr_addback == 1)
24614 if (rack->dis_lt_bw) {
24617 } else if (rack->use_lesser_lt_bw) {
24629 /* Now do we use the LRO mbuf-queue feature */
24630 optval = rack->r_mbuf_queue;
24633 optval = rack->cspr_is_fcc;
24636 optval = rack->rc_gp_dyn_mul;
24643 optval = rack->r_ctl.rc_tlp_cwnd_reduce;
24646 val = rack->r_ctl.init_rate;
24653 optval = rack->rc_force_max_seg;
24656 optval = rack->r_ctl.rc_user_set_min_segs;
24660 optval = rack->rc_user_set_max_segs;
24664 optval = rack->rc_always_pace;
24668 optval = rack->r_ctl.rc_prr_sendalot;
24671 /* Minimum time between rack t-o's in ms */
24672 optval = rack->r_ctl.rc_min_to;
24675 optval = rack->r_ctl.rc_split_limit;
24679 optval = rack->r_ctl.rc_early_recovery_segs;
24683 optval = rack->r_ctl.rc_reorder_shift;
24686 if (rack->r_ctl.gp_rnd_thresh) {
24689 v = rack->r_ctl.gp_gain_req;
24691 optval = v | (rack->r_ctl.gp_rnd_thresh & 0xff);
24692 if (rack->r_ctl.gate_to_fs == 1)
24699 optval = rack->r_ctl.rc_reorder_fade;
24703 optval = rack->use_rack_rr;
24706 optval = rack->r_rr_config;
24709 optval = rack->r_rack_hw_rate_caps;
24712 optval = rack->rack_hdw_pace_ena;
24716 optval = rack->r_ctl.rc_tlp_threshold;
24719 /* RACK added ms i.e. rack-rtt + reord + N */
24720 optval = rack->r_ctl.rc_pkt_delay;
24723 optval = rack->rack_tlp_threshold_use;
24726 optval = rack->rc_pace_dnd;
24729 optval = rack->r_ctl.rc_fixed_pacing_rate_ca;
24732 optval = rack->r_ctl.rc_fixed_pacing_rate_ss;
24735 optval = rack->r_ctl.rc_fixed_pacing_rate_rec;
24738 optval = rack->r_ctl.rack_per_upper_bound_ss;
24740 optval |= rack->r_ctl.rack_per_upper_bound_ca;
24743 optval = rack->r_ctl.rack_per_of_gp_ca;
24746 optval = rack->r_ctl.rack_per_of_gp_ss;
24749 optval = rack->r_ctl.pace_len_divisor;
24752 optval = rack->r_ctl.rc_rate_sample_method;
24755 optval = tp->t_delayed_ack;
24758 optval = rack->rc_allow_data_af_clo;
24761 optval = rack->r_limit_scw;
24764 if (rack->r_use_hpts_min)
24765 optval = rack->r_ctl.max_reduction;
24770 optval = rack->rc_gp_no_rec_chg;
24773 optval = rack->rc_skip_timely;
24776 optval = rack->r_ctl.timer_slop;
24784 if ((sopt->sopt_name == TCP_PACING_RATE_CAP) ||
24785 (sopt->sopt_name == TCP_FILLCW_RATE_CAP))
24796 if (sopt->sopt_dir == SOPT_SET) {
24798 } else if (sopt->sopt_dir == SOPT_GET) {
24801 panic("%s: sopt_dir $%d", __func__, sopt->sopt_dir);
24872 printf("Failed to register rack module -- err:%d\n", err);