189e560f4SRandall Stewart /*- 2*963fb2adSRandall Stewart * Copyright (c) 2016-2020 Netflix, Inc. 389e560f4SRandall Stewart * 489e560f4SRandall Stewart * Redistribution and use in source and binary forms, with or without 589e560f4SRandall Stewart * modification, are permitted provided that the following conditions 689e560f4SRandall Stewart * are met: 789e560f4SRandall Stewart * 1. Redistributions of source code must retain the above copyright 889e560f4SRandall Stewart * notice, this list of conditions and the following disclaimer. 989e560f4SRandall Stewart * 2. Redistributions in binary form must reproduce the above copyright 1089e560f4SRandall Stewart * notice, this list of conditions and the following disclaimer in the 1189e560f4SRandall Stewart * documentation and/or other materials provided with the distribution. 1289e560f4SRandall Stewart * 1389e560f4SRandall Stewart * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 1489e560f4SRandall Stewart * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1589e560f4SRandall Stewart * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1689e560f4SRandall Stewart * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 1789e560f4SRandall Stewart * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1889e560f4SRandall Stewart * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 1989e560f4SRandall Stewart * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2089e560f4SRandall Stewart * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2189e560f4SRandall Stewart * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2289e560f4SRandall Stewart * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2389e560f4SRandall Stewart * SUCH DAMAGE. 2489e560f4SRandall Stewart * 2589e560f4SRandall Stewart * $FreeBSD$ 2689e560f4SRandall Stewart */ 2789e560f4SRandall Stewart 2889e560f4SRandall Stewart #ifndef _NETINET_TCP_RACK_H_ 2989e560f4SRandall Stewart #define _NETINET_TCP_RACK_H_ 3089e560f4SRandall Stewart 3189e560f4SRandall Stewart #define RACK_ACKED 0x0001/* The remote endpoint acked this */ 3235c7bb34SRandall Stewart #define RACK_TO_MIXED 0x0002/* A timeout occured that mixed the send order - not used */ 3335c7bb34SRandall Stewart #define RACK_DEFERRED 0x0004/* We can't use this for RTT calc - not used */ 3489e560f4SRandall Stewart #define RACK_OVERMAX 0x0008/* We have more retran's then we can fit */ 3589e560f4SRandall Stewart #define RACK_SACK_PASSED 0x0010/* A sack was done above this block */ 3689e560f4SRandall Stewart #define RACK_WAS_SACKPASS 0x0020/* We retransmitted due to SACK pass */ 3789e560f4SRandall Stewart #define RACK_HAS_FIN 0x0040/* segment is sent with fin */ 3889e560f4SRandall Stewart #define RACK_TLP 0x0080/* segment sent as tail-loss-probe */ 3935c7bb34SRandall Stewart #define RACK_RWND_COLLAPSED 0x0100/* The peer collapsed the rwnd on the segment */ 40*963fb2adSRandall Stewart #define RACK_APP_LIMITED 0x0200/* We went app limited after this send */ 41*963fb2adSRandall Stewart #define RACK_WAS_ACKED 0x0400/* a RTO undid the ack, but it already had a rtt calc done */ 4289e560f4SRandall Stewart #define RACK_NUM_OF_RETRANS 3 4389e560f4SRandall Stewart 4489e560f4SRandall Stewart #define RACK_INITIAL_RTO 1000 /* 1 second in milli seconds */ 4589e560f4SRandall Stewart 46*963fb2adSRandall Stewart #define RACK_REQ_AVG 4 /* Must be less than 256 */ 47*963fb2adSRandall Stewart 4889e560f4SRandall Stewart struct rack_sendmap { 4989e560f4SRandall Stewart uint32_t r_start; /* Sequence number of the segment */ 5089e560f4SRandall Stewart uint32_t r_end; /* End seq, this is 1 beyond actually */ 5135c7bb34SRandall Stewart TAILQ_ENTRY(rack_sendmap) r_tnext; /* Time of transmit based next */ 5235c7bb34SRandall Stewart RB_ENTRY(rack_sendmap) r_next; /* RB Tree next */ 5389e560f4SRandall Stewart uint32_t r_rtr_bytes; /* How many bytes have been retransmitted */ 5489e560f4SRandall Stewart uint16_t r_rtr_cnt; /* Retran count, index this -1 to get time 5589e560f4SRandall Stewart * sent */ 5635c7bb34SRandall Stewart uint16_t r_flags; /* Flags as defined above */ 5735c7bb34SRandall Stewart uint32_t r_tim_lastsent[RACK_NUM_OF_RETRANS]; 58*963fb2adSRandall Stewart uint32_t usec_orig_send; /* time of orginal send in useconds */ 59*963fb2adSRandall Stewart uint32_t r_nseq_appl; /* If this one is app limited, this is the nxt seq limited */ 60*963fb2adSRandall Stewart uint32_t r_ack_arrival; /* This is the time of ack-arrival (if SACK'd) */ 6135c7bb34SRandall Stewart uint8_t r_dupack; /* Dup ack count */ 6289e560f4SRandall Stewart uint8_t r_in_tmap; /* Flag to see if its in the r_tnext array */ 635e02b277SJonathan T. Looney uint8_t r_limit_type; /* is this entry counted against a limit? */ 64*963fb2adSRandall Stewart uint8_t r_just_ret : 1, /* After sending, the next pkt was just returned, i.e. limited */ 65*963fb2adSRandall Stewart r_one_out_nr : 1, /* Special case 1 outstanding and not in recovery */ 66*963fb2adSRandall Stewart r_avail : 6; 67*963fb2adSRandall Stewart uint8_t r_resv[36]; 6889e560f4SRandall Stewart }; 6989e560f4SRandall Stewart 7035c7bb34SRandall Stewart RB_HEAD(rack_rb_tree_head, rack_sendmap); 7189e560f4SRandall Stewart TAILQ_HEAD(rack_head, rack_sendmap); 7289e560f4SRandall Stewart 7335c7bb34SRandall Stewart #define RACK_LIMIT_TYPE_SPLIT 1 7489e560f4SRandall Stewart 7589e560f4SRandall Stewart /* 7689e560f4SRandall Stewart * We use the rate sample structure to 7789e560f4SRandall Stewart * assist in single sack/ack rate and rtt 7889e560f4SRandall Stewart * calculation. In the future we will expand 7989e560f4SRandall Stewart * this in BBR to do forward rate sample 8089e560f4SRandall Stewart * b/w estimation. 8189e560f4SRandall Stewart */ 8289e560f4SRandall Stewart #define RACK_RTT_EMPTY 0x00000001 /* Nothing yet stored in RTT's */ 8389e560f4SRandall Stewart #define RACK_RTT_VALID 0x00000002 /* We have at least one valid RTT */ 8489e560f4SRandall Stewart struct rack_rtt_sample { 8589e560f4SRandall Stewart uint32_t rs_flags; 8689e560f4SRandall Stewart uint32_t rs_rtt_lowest; 8789e560f4SRandall Stewart uint32_t rs_rtt_highest; 8889e560f4SRandall Stewart uint32_t rs_rtt_cnt; 89*963fb2adSRandall Stewart uint32_t rs_us_rtt; 90*963fb2adSRandall Stewart int32_t confidence; 9189e560f4SRandall Stewart uint64_t rs_rtt_tot; 92*963fb2adSRandall Stewart uint16_t rs_us_rtrcnt; 9389e560f4SRandall Stewart }; 9489e560f4SRandall Stewart 9589e560f4SRandall Stewart #define RACK_LOG_TYPE_ACK 0x01 9689e560f4SRandall Stewart #define RACK_LOG_TYPE_OUT 0x02 9789e560f4SRandall Stewart #define RACK_LOG_TYPE_TO 0x03 9889e560f4SRandall Stewart #define RACK_LOG_TYPE_ALLOC 0x04 9989e560f4SRandall Stewart #define RACK_LOG_TYPE_FREE 0x05 10089e560f4SRandall Stewart 10189e560f4SRandall Stewart 10289e560f4SRandall Stewart struct rack_log { 10389e560f4SRandall Stewart union { 10489e560f4SRandall Stewart struct rack_sendmap *rsm; /* For alloc/free */ 10589e560f4SRandall Stewart uint64_t sb_acc;/* For out/ack or t-o */ 10689e560f4SRandall Stewart }; 10789e560f4SRandall Stewart uint32_t th_seq; 10889e560f4SRandall Stewart uint32_t th_ack; 10989e560f4SRandall Stewart uint32_t snd_una; 11089e560f4SRandall Stewart uint32_t snd_nxt; /* th_win for TYPE_ACK */ 11189e560f4SRandall Stewart uint32_t snd_max; 11289e560f4SRandall Stewart uint32_t blk_start[4]; 11389e560f4SRandall Stewart uint32_t blk_end[4]; 11489e560f4SRandall Stewart uint8_t type; 11589e560f4SRandall Stewart uint8_t n_sackblks; 11689e560f4SRandall Stewart uint16_t len; /* Timeout T3=1, TLP=2, RACK=3 */ 11789e560f4SRandall Stewart }; 11889e560f4SRandall Stewart 11989e560f4SRandall Stewart /* 12089e560f4SRandall Stewart * Magic numbers for logging timeout events if the 12189e560f4SRandall Stewart * logging is enabled. 12289e560f4SRandall Stewart */ 12389e560f4SRandall Stewart #define RACK_TO_FRM_TMR 1 12489e560f4SRandall Stewart #define RACK_TO_FRM_TLP 2 12589e560f4SRandall Stewart #define RACK_TO_FRM_RACK 3 12689e560f4SRandall Stewart #define RACK_TO_FRM_KEEP 4 12789e560f4SRandall Stewart #define RACK_TO_FRM_PERSIST 5 12889e560f4SRandall Stewart #define RACK_TO_FRM_DELACK 6 12989e560f4SRandall Stewart 13089e560f4SRandall Stewart struct rack_opts_stats { 13189e560f4SRandall Stewart uint64_t tcp_rack_prop_rate; 13289e560f4SRandall Stewart uint64_t tcp_rack_prop; 13389e560f4SRandall Stewart uint64_t tcp_rack_tlp_reduce; 13489e560f4SRandall Stewart uint64_t tcp_rack_early_recov; 13589e560f4SRandall Stewart uint64_t tcp_rack_pace_always; 13689e560f4SRandall Stewart uint64_t tcp_rack_pace_reduce; 13789e560f4SRandall Stewart uint64_t tcp_rack_max_seg; 13889e560f4SRandall Stewart uint64_t tcp_rack_prr_sendalot; 13989e560f4SRandall Stewart uint64_t tcp_rack_min_to; 14089e560f4SRandall Stewart uint64_t tcp_rack_early_seg; 14189e560f4SRandall Stewart uint64_t tcp_rack_reord_thresh; 14289e560f4SRandall Stewart uint64_t tcp_rack_reord_fade; 14389e560f4SRandall Stewart uint64_t tcp_rack_tlp_thresh; 14489e560f4SRandall Stewart uint64_t tcp_rack_pkt_delay; 14589e560f4SRandall Stewart uint64_t tcp_rack_tlp_inc_var; 14689e560f4SRandall Stewart uint64_t tcp_tlp_use; 14789e560f4SRandall Stewart uint64_t tcp_rack_idle_reduce; 14889e560f4SRandall Stewart uint64_t tcp_rack_idle_reduce_high; 14989e560f4SRandall Stewart uint64_t rack_no_timer_in_hpts; 15089e560f4SRandall Stewart uint64_t tcp_rack_min_pace_seg; 151*963fb2adSRandall Stewart uint64_t tcp_rack_pace_rate_ca; 152*963fb2adSRandall Stewart uint64_t tcp_rack_rr; 1531cf55767SRandall Stewart uint64_t tcp_rack_do_detection; 154*963fb2adSRandall Stewart uint64_t tcp_rack_rrr_no_conf_rate; 155*963fb2adSRandall Stewart uint64_t tcp_initial_rate; 156*963fb2adSRandall Stewart uint64_t tcp_initial_win; 157*963fb2adSRandall Stewart uint64_t tcp_hdwr_pacing; 158*963fb2adSRandall Stewart uint64_t tcp_gp_inc_ss; 159*963fb2adSRandall Stewart uint64_t tcp_gp_inc_ca; 160*963fb2adSRandall Stewart uint64_t tcp_gp_inc_rec; 161*963fb2adSRandall Stewart uint64_t tcp_rack_force_max_seg; 162*963fb2adSRandall Stewart uint64_t tcp_rack_pace_rate_ss; 163*963fb2adSRandall Stewart uint64_t tcp_rack_pace_rate_rec; 164*963fb2adSRandall Stewart /* Temp counters for dsack */ 165*963fb2adSRandall Stewart uint64_t tcp_sack_path_1; 166*963fb2adSRandall Stewart uint64_t tcp_sack_path_2a; 167*963fb2adSRandall Stewart uint64_t tcp_sack_path_2b; 168*963fb2adSRandall Stewart uint64_t tcp_sack_path_3; 169*963fb2adSRandall Stewart uint64_t tcp_sack_path_4; 170*963fb2adSRandall Stewart /* non temp counters */ 171*963fb2adSRandall Stewart uint64_t tcp_rack_scwnd; 172*963fb2adSRandall Stewart uint64_t tcp_rack_noprr; 173*963fb2adSRandall Stewart uint64_t tcp_rack_cfg_rate; 174*963fb2adSRandall Stewart uint64_t tcp_timely_dyn; 175*963fb2adSRandall Stewart uint64_t tcp_rack_mbufq; 176*963fb2adSRandall Stewart uint64_t tcp_fillcw; 177*963fb2adSRandall Stewart uint64_t tcp_npush; 178*963fb2adSRandall Stewart uint64_t tcp_lscwnd; 179*963fb2adSRandall Stewart uint64_t tcp_profile; 18089e560f4SRandall Stewart }; 18189e560f4SRandall Stewart 182*963fb2adSRandall Stewart /* RTT shrink reasons */ 183*963fb2adSRandall Stewart #define RACK_RTTS_INIT 0 184*963fb2adSRandall Stewart #define RACK_RTTS_NEWRTT 1 185*963fb2adSRandall Stewart #define RACK_RTTS_EXITPROBE 2 186*963fb2adSRandall Stewart #define RACK_RTTS_ENTERPROBE 3 187*963fb2adSRandall Stewart #define RACK_RTTS_REACHTARGET 4 188*963fb2adSRandall Stewart #define RACK_RTTS_SEEHBP 5 189*963fb2adSRandall Stewart #define RACK_RTTS_NOBACKOFF 6 190*963fb2adSRandall Stewart #define RACK_RTTS_SAFETY 7 191*963fb2adSRandall Stewart 192*963fb2adSRandall Stewart #define RACK_USE_BEG 1 193*963fb2adSRandall Stewart #define RACK_USE_END 2 194*963fb2adSRandall Stewart #define RACK_USE_END_OR_THACK 3 195*963fb2adSRandall Stewart 19689e560f4SRandall Stewart #define TLP_USE_ID 1 /* Internet draft behavior */ 19789e560f4SRandall Stewart #define TLP_USE_TWO_ONE 2 /* Use 2.1 behavior */ 19889e560f4SRandall Stewart #define TLP_USE_TWO_TWO 3 /* Use 2.2 behavior */ 199*963fb2adSRandall Stewart #define RACK_MIN_BW 8000 /* 64kbps in Bps */ 20089e560f4SRandall Stewart 201*963fb2adSRandall Stewart #define MIN_GP_WIN 6 /* We need at least 6 MSS in a GP measurement */ 20289e560f4SRandall Stewart #ifdef _KERNEL 20389e560f4SRandall Stewart #define RACK_OPTS_SIZE (sizeof(struct rack_opts_stats)/sizeof(uint64_t)) 20489e560f4SRandall Stewart extern counter_u64_t rack_opts_arry[RACK_OPTS_SIZE]; 20589e560f4SRandall Stewart #define RACK_OPTS_ADD(name, amm) counter_u64_add(rack_opts_arry[(offsetof(struct rack_opts_stats, name)/sizeof(uint64_t))], (amm)) 20689e560f4SRandall Stewart #define RACK_OPTS_INC(name) RACK_OPTS_ADD(name, 1) 20789e560f4SRandall Stewart #endif 20889e560f4SRandall Stewart /* 20989e560f4SRandall Stewart * As we get each SACK we wade through the 21089e560f4SRandall Stewart * rc_map and mark off what is acked. 21189e560f4SRandall Stewart * We also increment rc_sacked as well. 21289e560f4SRandall Stewart * 21389e560f4SRandall Stewart * We also pay attention to missing entries 21489e560f4SRandall Stewart * based on the time and possibly mark them 21589e560f4SRandall Stewart * for retransmit. If we do and we are not already 21689e560f4SRandall Stewart * in recovery we enter recovery. In doing 21789e560f4SRandall Stewart * so we claer prr_delivered/holes_rxt and prr_sent_dur_rec. 21889e560f4SRandall Stewart * We also setup rc_next/rc_snd_nxt/rc_send_end so 21989e560f4SRandall Stewart * we will know where to send from. When not in 22089e560f4SRandall Stewart * recovery rc_next will be NULL and rc_snd_nxt should 22189e560f4SRandall Stewart * equal snd_max. 22289e560f4SRandall Stewart * 22389e560f4SRandall Stewart * Whenever we retransmit from recovery we increment 22489e560f4SRandall Stewart * rc_holes_rxt as we retran a block and mark it as retransmitted 22589e560f4SRandall Stewart * with the time it was sent. During non-recovery sending we 22689e560f4SRandall Stewart * add to our map and note the time down of any send expanding 22789e560f4SRandall Stewart * the rc_map at the tail and moving rc_snd_nxt up with snd_max. 22889e560f4SRandall Stewart * 22989e560f4SRandall Stewart * In recovery during SACK/ACK processing if a chunk has 23089e560f4SRandall Stewart * been retransmitted and it is now acked, we decrement rc_holes_rxt. 23189e560f4SRandall Stewart * When we retransmit from the scoreboard we use 23289e560f4SRandall Stewart * rc_next and rc_snd_nxt/rc_send_end to help us 23389e560f4SRandall Stewart * find what needs to be retran. 23489e560f4SRandall Stewart * 23589e560f4SRandall Stewart * To calculate pipe we simply take (snd_max - snd_una) + rc_holes_rxt 23689e560f4SRandall Stewart * This gets us the effect of RFC6675 pipe, counting twice for 23789e560f4SRandall Stewart * bytes retransmitted. 23889e560f4SRandall Stewart */ 23989e560f4SRandall Stewart 24089e560f4SRandall Stewart #define TT_RACK_FR_TMR 0x2000 24189e560f4SRandall Stewart 24289e560f4SRandall Stewart /* 24389e560f4SRandall Stewart * Locking for the rack control block. 24489e560f4SRandall Stewart * a) Locked by INP_WLOCK 24589e560f4SRandall Stewart * b) Locked by the hpts-mutex 24689e560f4SRandall Stewart * 24789e560f4SRandall Stewart */ 24835c7bb34SRandall Stewart #define RACK_GP_HIST 4 /* How much goodput history do we maintain? */ 24989e560f4SRandall Stewart 25089e560f4SRandall Stewart struct rack_control { 25189e560f4SRandall Stewart /* Second cache line 0x40 from tcp_rack */ 25235c7bb34SRandall Stewart struct rack_rb_tree_head rc_mtree; /* Tree of all segments Lock(a) */ 25389e560f4SRandall Stewart struct rack_head rc_tmap; /* List in transmit order Lock(a) */ 25489e560f4SRandall Stewart struct rack_sendmap *rc_tlpsend; /* Remembered place for 25589e560f4SRandall Stewart * tlp_sending Lock(a) */ 25689e560f4SRandall Stewart struct rack_sendmap *rc_resend; /* something we have been asked to 25789e560f4SRandall Stewart * resend */ 25835c7bb34SRandall Stewart uint32_t input_pkt; 25935c7bb34SRandall Stewart uint32_t saved_input_pkt; 26089e560f4SRandall Stewart uint32_t rc_hpts_flags; 261*963fb2adSRandall Stewart uint32_t rc_fixed_pacing_rate_ca; 262*963fb2adSRandall Stewart uint32_t rc_fixed_pacing_rate_rec; 263*963fb2adSRandall Stewart uint32_t rc_fixed_pacing_rate_ss; 264*963fb2adSRandall Stewart uint32_t cwnd_to_use; /* The cwnd in use */ 26589e560f4SRandall Stewart uint32_t rc_timer_exp; /* If a timer ticks of expiry */ 26689e560f4SRandall Stewart uint32_t rc_rack_min_rtt; /* lowest RTT seen Lock(a) */ 26789e560f4SRandall Stewart uint32_t rc_rack_largest_cwnd; /* Largest CWND we have seen Lock(a) */ 26889e560f4SRandall Stewart 26989e560f4SRandall Stewart /* Third Cache line 0x80 */ 27089e560f4SRandall Stewart struct rack_head rc_free; /* Allocation array */ 27189e560f4SRandall Stewart uint32_t rc_time_last_sent; /* Time we last sent some data and 27289e560f4SRandall Stewart * logged it Lock(a). */ 27389e560f4SRandall Stewart uint32_t rc_reorder_ts; /* Last time we saw reordering Lock(a) */ 27489e560f4SRandall Stewart 27589e560f4SRandall Stewart uint32_t rc_tlp_new_data; /* we need to send new-data on a TLP 27689e560f4SRandall Stewart * Lock(a) */ 27789e560f4SRandall Stewart uint32_t rc_prr_out; /* bytes sent during recovery Lock(a) */ 27889e560f4SRandall Stewart 27989e560f4SRandall Stewart uint32_t rc_prr_recovery_fs; /* recovery fs point Lock(a) */ 28089e560f4SRandall Stewart 28189e560f4SRandall Stewart uint32_t rc_prr_sndcnt; /* Prr sndcnt Lock(a) */ 28289e560f4SRandall Stewart 28389e560f4SRandall Stewart uint32_t rc_sacked; /* Tot sacked on scoreboard Lock(a) */ 284*963fb2adSRandall Stewart uint32_t xxx_rc_last_tlp_seq; /* Last tlp sequence Lock(a) */ 28589e560f4SRandall Stewart 28689e560f4SRandall Stewart uint32_t rc_prr_delivered; /* during recovery prr var Lock(a) */ 287*963fb2adSRandall Stewart uint16_t rc_tlp_cnt_out; /* count of times we have sent a TLP without new data */ 288*963fb2adSRandall Stewart uint16_t xxx_rc_tlp_seg_send_cnt; /* Number of times we have TLP sent 28989e560f4SRandall Stewart * rc_last_tlp_seq Lock(a) */ 29089e560f4SRandall Stewart 291*963fb2adSRandall Stewart uint32_t rc_loss_count; /* How many bytes have been retransmitted 29289e560f4SRandall Stewart * Lock(a) */ 29389e560f4SRandall Stewart uint32_t rc_reorder_fade; /* Socket option value Lock(a) */ 29489e560f4SRandall Stewart 29589e560f4SRandall Stewart /* Forth cache line 0xc0 */ 29689e560f4SRandall Stewart /* Times */ 29789e560f4SRandall Stewart 29889e560f4SRandall Stewart uint32_t rc_rack_tmit_time; /* Rack transmit time Lock(a) */ 29989e560f4SRandall Stewart uint32_t rc_holes_rxt; /* Tot retraned from scoreboard Lock(a) */ 30089e560f4SRandall Stewart 30189e560f4SRandall Stewart /* Variables to track bad retransmits and recover */ 30289e560f4SRandall Stewart uint32_t rc_rsm_start; /* RSM seq number we retransmitted Lock(a) */ 30389e560f4SRandall Stewart uint32_t rc_cwnd_at; /* cwnd at the retransmit Lock(a) */ 30489e560f4SRandall Stewart 30589e560f4SRandall Stewart uint32_t rc_ssthresh_at;/* ssthresh at the retransmit Lock(a) */ 30689e560f4SRandall Stewart uint32_t rc_num_maps_alloced; /* Number of map blocks (sacks) we 30789e560f4SRandall Stewart * have allocated */ 30889e560f4SRandall Stewart uint32_t rc_rcvtime; /* When we last received data */ 3095e02b277SJonathan T. Looney uint32_t rc_num_split_allocs; /* num split map entries allocated */ 31035c7bb34SRandall Stewart 31189e560f4SRandall Stewart uint32_t rc_last_output_to; 31289e560f4SRandall Stewart uint32_t rc_went_idle_time; 31389e560f4SRandall Stewart 31489e560f4SRandall Stewart struct rack_sendmap *rc_sacklast; /* sack remembered place 31589e560f4SRandall Stewart * Lock(a) */ 31689e560f4SRandall Stewart 31789e560f4SRandall Stewart struct rack_sendmap *rc_rsm_at_retran; /* Debug variable kept for 31889e560f4SRandall Stewart * cache line alignment 31989e560f4SRandall Stewart * Lock(a) */ 320*963fb2adSRandall Stewart struct rack_sendmap *rc_first_appl; /* Pointer to first app limited */ 321*963fb2adSRandall Stewart struct rack_sendmap *rc_end_appl; /* Pointer to last app limited */ 32289e560f4SRandall Stewart /* Cache line split 0x100 */ 32389e560f4SRandall Stewart struct sack_filter rack_sf; 32489e560f4SRandall Stewart /* Cache line split 0x140 */ 32589e560f4SRandall Stewart /* Flags for various things */ 326*963fb2adSRandall Stewart uint32_t last_pacing_time; 32735c7bb34SRandall Stewart uint32_t rc_pace_max_segs; 32835c7bb34SRandall Stewart uint32_t rc_pace_min_segs; 329*963fb2adSRandall Stewart uint32_t rc_app_limited_cnt; 330*963fb2adSRandall Stewart uint16_t rack_per_of_gp_ss; /* 100 = 100%, so from 65536 = 655 x bw */ 331*963fb2adSRandall Stewart uint16_t rack_per_of_gp_ca; /* 100 = 100%, so from 65536 = 655 x bw */ 332*963fb2adSRandall Stewart uint16_t rack_per_of_gp_rec; /* 100 = 100%, so from 65536 = 655 x bw, 0=off */ 333*963fb2adSRandall Stewart uint16_t rack_per_of_gp_probertt; /* 100 = 100%, so from 65536 = 655 x bw, 0=off */ 33435c7bb34SRandall Stewart uint32_t rc_high_rwnd; 33535c7bb34SRandall Stewart uint32_t ack_count; 33635c7bb34SRandall Stewart uint32_t sack_count; 33735c7bb34SRandall Stewart uint32_t sack_noextra_move; 33835c7bb34SRandall Stewart uint32_t sack_moved_extra; 33989e560f4SRandall Stewart struct rack_rtt_sample rack_rs; 340*963fb2adSRandall Stewart const struct tcp_hwrate_limit_table *crte; 341*963fb2adSRandall Stewart uint32_t rc_agg_early; 342*963fb2adSRandall Stewart uint32_t rc_agg_delayed; 34335c7bb34SRandall Stewart uint32_t rc_tlp_rxt_last_time; 34435c7bb34SRandall Stewart uint32_t rc_saved_cwnd; 345*963fb2adSRandall Stewart uint32_t rc_gp_output_ts; 346*963fb2adSRandall Stewart uint32_t rc_gp_cumack_ts; 347*963fb2adSRandall Stewart struct timeval act_rcv_time; 348*963fb2adSRandall Stewart struct timeval rc_last_time_decay; /* SAD time decay happened here */ 349*963fb2adSRandall Stewart uint64_t gp_bw; 350*963fb2adSRandall Stewart uint64_t init_rate; 351*963fb2adSRandall Stewart #ifdef NETFLIX_SHARED_CWND 352*963fb2adSRandall Stewart struct shared_cwnd *rc_scw; 353*963fb2adSRandall Stewart #endif 354*963fb2adSRandall Stewart uint64_t last_gp_comp_bw; 355*963fb2adSRandall Stewart uint64_t last_max_bw; /* Our calculated max b/w last */ 356*963fb2adSRandall Stewart struct time_filter_small rc_gp_min_rtt; 357*963fb2adSRandall Stewart int32_t rc_rtt_diff; /* Timely style rtt diff of our gp_srtt */ 358*963fb2adSRandall Stewart uint32_t rc_gp_srtt; /* Current GP srtt */ 359*963fb2adSRandall Stewart uint32_t rc_prev_gp_srtt; /* Previous RTT */ 360*963fb2adSRandall Stewart uint32_t rc_entry_gp_rtt; /* Entry to PRTT gp-rtt */ 361*963fb2adSRandall Stewart uint32_t rc_loss_at_start; /* At measurement window where was our lost value */ 362*963fb2adSRandall Stewart 363*963fb2adSRandall Stewart uint32_t forced_ack_ts; 364*963fb2adSRandall Stewart uint32_t rc_lower_rtt_us_cts; /* Time our GP rtt was last lowered */ 365*963fb2adSRandall Stewart uint32_t rc_time_probertt_entered; 366*963fb2adSRandall Stewart uint32_t rc_time_probertt_starts; 367*963fb2adSRandall Stewart uint32_t rc_lowest_us_rtt; 368*963fb2adSRandall Stewart uint32_t rc_highest_us_rtt; 369*963fb2adSRandall Stewart uint32_t rc_last_us_rtt; 370*963fb2adSRandall Stewart uint32_t rc_time_of_last_probertt; 371*963fb2adSRandall Stewart uint32_t rc_target_probertt_flight; 372*963fb2adSRandall Stewart uint32_t rc_probertt_sndmax_atexit; /* Highest sent to in probe-rtt */ 373*963fb2adSRandall Stewart uint32_t rc_gp_lowrtt; /* Lowest rtt seen during GPUT measurement */ 374*963fb2adSRandall Stewart uint32_t rc_gp_high_rwnd; /* Highest rwnd seen during GPUT measurement */ 375*963fb2adSRandall Stewart int32_t rc_scw_index; 37689e560f4SRandall Stewart uint32_t rc_tlp_threshold; /* Socket option value Lock(a) */ 37789e560f4SRandall Stewart uint16_t rc_early_recovery_segs; /* Socket option value Lock(a) */ 37889e560f4SRandall Stewart uint16_t rc_reorder_shift; /* Socket option value Lock(a) */ 37989e560f4SRandall Stewart uint16_t rc_pkt_delay; /* Socket option value Lock(a) */ 380*963fb2adSRandall Stewart uint8_t rc_no_push_at_mrtt; /* No push when we exceed max rtt */ 381*963fb2adSRandall Stewart uint8_t num_avg; /* average count before we go to normal decay */ 38289e560f4SRandall Stewart uint8_t rc_prop_rate; /* Socket option value Lock(a) */ 38389e560f4SRandall Stewart uint8_t rc_prop_reduce; /* Socket option value Lock(a) */ 38489e560f4SRandall Stewart uint8_t rc_tlp_cwnd_reduce; /* Socket option value Lock(a) */ 38589e560f4SRandall Stewart uint8_t rc_early_recovery; /* Socket option value Lock(a) */ 38689e560f4SRandall Stewart uint8_t rc_prr_sendalot;/* Socket option value Lock(a) */ 38789e560f4SRandall Stewart uint8_t rc_min_to; /* Socket option value Lock(a) */ 38889e560f4SRandall Stewart uint8_t rc_rate_sample_method; 389*963fb2adSRandall Stewart uint8_t rc_gp_hist_idx; 39089e560f4SRandall Stewart }; 39189e560f4SRandall Stewart 392*963fb2adSRandall Stewart #define RACK_TIMELY_CNT_BOOST 5 /* At 5th increase boost */ 393*963fb2adSRandall Stewart #define RACK_MINRTT_FILTER_TIM 10 /* Seconds */ 394*963fb2adSRandall Stewart 39589e560f4SRandall Stewart #ifdef _KERNEL 39689e560f4SRandall Stewart 39789e560f4SRandall Stewart struct tcp_rack { 39889e560f4SRandall Stewart /* First cache line 0x00 */ 39989e560f4SRandall Stewart TAILQ_ENTRY(tcp_rack) r_hpts; /* hptsi queue next Lock(b) */ 40089e560f4SRandall Stewart int32_t(*r_substate) (struct mbuf *, struct tcphdr *, 40189e560f4SRandall Stewart struct socket *, struct tcpcb *, struct tcpopt *, 4028df12ffcSMichael Tuexen int32_t, int32_t, uint32_t, int, int, uint8_t); /* Lock(a) */ 40389e560f4SRandall Stewart struct tcpcb *rc_tp; /* The tcpcb Lock(a) */ 40489e560f4SRandall Stewart struct inpcb *rc_inp; /* The inpcb Lock(a) */ 40589e560f4SRandall Stewart uint32_t rc_free_cnt; /* Number of free entries on the rc_free list 40689e560f4SRandall Stewart * Lock(a) */ 40789e560f4SRandall Stewart uint32_t rc_rack_rtt; /* RACK-RTT Lock(a) */ 408*963fb2adSRandall Stewart uint16_t r_mbuf_queue : 1, /* Do we do mbuf queue for non-paced */ 409*963fb2adSRandall Stewart rtt_limit_mul : 4, /* muliply this by low rtt */ 410*963fb2adSRandall Stewart r_limit_scw : 1, 411*963fb2adSRandall Stewart r_avail_bits : 10; /* Available */ 41289e560f4SRandall Stewart 413*963fb2adSRandall Stewart uint16_t rc_user_set_max_segs; /* Socket option value Lock(a) */ 414*963fb2adSRandall Stewart uint16_t forced_ack : 1, 415*963fb2adSRandall Stewart rc_gp_incr : 1, 416*963fb2adSRandall Stewart rc_gp_bwred : 1, 417*963fb2adSRandall Stewart rc_gp_timely_inc_cnt : 3, 418*963fb2adSRandall Stewart rc_gp_timely_dec_cnt : 3, 419*963fb2adSRandall Stewart rc_not_backing_off: 1, 420*963fb2adSRandall Stewart rc_highly_buffered: 1, /* The path is highly buffered */ 421*963fb2adSRandall Stewart rc_dragged_bottom: 1, 422*963fb2adSRandall Stewart rc_dack_mode : 1, /* Mac O/S emulation of d-ack */ 423*963fb2adSRandall Stewart rc_dack_toggle : 1, /* For Mac O/S emulation of d-ack */ 424*963fb2adSRandall Stewart pacing_longer_than_rtt : 1, 425*963fb2adSRandall Stewart rc_gp_filled : 1; 42689e560f4SRandall Stewart uint8_t r_state; /* Current rack state Lock(a) */ 42789e560f4SRandall Stewart uint8_t rc_tmr_stopped : 7, 42889e560f4SRandall Stewart t_timers_stopped : 1; 429*963fb2adSRandall Stewart uint8_t rc_enobuf : 7, /* count of enobufs on connection provides */ 430*963fb2adSRandall Stewart rc_on_min_to : 1; 43189e560f4SRandall Stewart uint8_t r_timer_override : 1, /* hpts override Lock(a) */ 43289e560f4SRandall Stewart r_is_v6 : 1, /* V6 pcb Lock(a) */ 43389e560f4SRandall Stewart rc_in_persist : 1, 43489e560f4SRandall Stewart rc_tlp_in_progress : 1, 43589e560f4SRandall Stewart rc_always_pace : 1, /* Socket option value Lock(a) */ 436*963fb2adSRandall Stewart rc_pace_to_cwnd : 1, 437*963fb2adSRandall Stewart rc_pace_fill_if_rttin_range : 1, 438*963fb2adSRandall Stewart xxx_avail_bits : 1; 439*963fb2adSRandall Stewart uint8_t app_limited_needs_set : 1, 440*963fb2adSRandall Stewart use_fixed_rate : 1, 44135c7bb34SRandall Stewart rc_has_collapsed : 1, 44235c7bb34SRandall Stewart r_rep_attack : 1, 44335c7bb34SRandall Stewart r_rep_reverse : 1, 444*963fb2adSRandall Stewart rack_hdrw_pacing : 1, /* We are doing Hardware pacing */ 445*963fb2adSRandall Stewart rack_hdw_pace_ena : 1, /* Is hardware pacing enabled? */ 446*963fb2adSRandall Stewart rack_attempt_hdwr_pace : 1; /* Did we attempt hdwr pacing (if allowed) */ 447*963fb2adSRandall Stewart uint8_t rack_tlp_threshold_use : 3, /* only 1, 2 and 3 used so far */ 448*963fb2adSRandall Stewart rack_rec_nonrxt_use_cr : 1, 449*963fb2adSRandall Stewart rack_enable_scwnd : 1, 450*963fb2adSRandall Stewart rack_attempted_scwnd : 1, 451*963fb2adSRandall Stewart rack_no_prr : 1, 452*963fb2adSRandall Stewart rack_scwnd_is_idle : 1; 45389e560f4SRandall Stewart uint8_t rc_allow_data_af_clo: 1, 45489e560f4SRandall Stewart delayed_ack : 1, 45535c7bb34SRandall Stewart set_pacing_done_a_iw : 1, 456*963fb2adSRandall Stewart use_rack_rr : 1, 4575e02b277SJonathan T. Looney alloc_limit_reported : 1, 45835c7bb34SRandall Stewart sack_attack_disable : 1, 45935c7bb34SRandall Stewart do_detection : 1, 460*963fb2adSRandall Stewart rc_force_max_seg : 1; 461*963fb2adSRandall Stewart uint8_t rack_cwnd_limited : 1, 462*963fb2adSRandall Stewart r_early : 1, 463*963fb2adSRandall Stewart r_late : 1, 464*963fb2adSRandall Stewart r_running_early : 1, 465*963fb2adSRandall Stewart r_running_late : 1, 466*963fb2adSRandall Stewart r_wanted_output: 1, 467*963fb2adSRandall Stewart r_rr_config : 2; 468*963fb2adSRandall Stewart uint16_t rc_init_win : 8, 469*963fb2adSRandall Stewart rc_gp_rtt_set : 1, 470*963fb2adSRandall Stewart rc_gp_dyn_mul : 1, 471*963fb2adSRandall Stewart rc_gp_saw_rec : 1, 472*963fb2adSRandall Stewart rc_gp_saw_ca : 1, 473*963fb2adSRandall Stewart rc_gp_saw_ss : 1, 474*963fb2adSRandall Stewart rc_gp_no_rec_chg : 1, 475*963fb2adSRandall Stewart in_probe_rtt : 1, 476*963fb2adSRandall Stewart measure_saw_probe_rtt : 1; 47789e560f4SRandall Stewart /* Cache line 2 0x40 */ 47889e560f4SRandall Stewart struct rack_control r_ctl; 47989e560f4SRandall Stewart } __aligned(CACHE_LINE_SIZE); 48089e560f4SRandall Stewart 48189e560f4SRandall Stewart #endif 48289e560f4SRandall Stewart #endif 483