rack.c (f7220c486c1bf858e97c2d8e5c4c9dac4947d50a) rack.c (a9696510f5caef4a7c525fcf4359597798829350)
1/*-
2 * Copyright (c) 2016-2020 Netflix, Inc.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.

--- 12844 unchanged lines hidden (view full) ---

12853 * also change the values of t_srtt and t_rttvar, if
12854 * they are non-zero. They are kept with a 5
12855 * bit decimal so we have to carefully convert
12856 * these to get the full precision.
12857 */
12858 rack_convert_rtts(tp);
12859 tp->t_rttlow = TICKS_2_USEC(tp->t_rttlow);
12860 if (rack_do_hystart) {
1/*-
2 * Copyright (c) 2016-2020 Netflix, Inc.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.

--- 12844 unchanged lines hidden (view full) ---

12853 * also change the values of t_srtt and t_rttvar, if
12854 * they are non-zero. They are kept with a 5
12855 * bit decimal so we have to carefully convert
12856 * these to get the full precision.
12857 */
12858 rack_convert_rtts(tp);
12859 tp->t_rttlow = TICKS_2_USEC(tp->t_rttlow);
12860 if (rack_do_hystart) {
12861 struct sockopt sopt;
12862 struct cc_newreno_opts opt;
12863
12864 sopt.sopt_valsize = sizeof(struct cc_newreno_opts);
12865 sopt.sopt_dir = SOPT_SET;
12866 opt.name = CC_NEWRENO_ENABLE_HYSTART;
12867 opt.val = rack_do_hystart;
12868 if (CC_ALGO(tp)->ctl_output != NULL)
12869 (void)CC_ALGO(tp)->ctl_output(tp->ccv, &sopt, &opt);
12861 tp->ccv->flags |= CCF_HYSTART_ALLOWED;
12862 if (rack_do_hystart > 1)
12863 tp->ccv->flags |= CCF_HYSTART_CAN_SH_CWND;
12864 if (rack_do_hystart > 2)
12865 tp->ccv->flags |= CCF_HYSTART_CONS_SSTH;
12870 }
12871 if (rack_def_profile)
12872 rack_set_profile(rack, rack_def_profile);
12873 /* Cancel the GP measurement in progress */
12874 tp->t_flags &= ~TF_GPUTINPROG;
12875 if (SEQ_GT(tp->snd_max, tp->iss))
12876 snt = tp->snd_max - tp->iss;
12877 else

--- 632 unchanged lines hidden (view full) ---

13510 if (rack_apply_rtt_with_reduced_conf) {
13511 rack_apply_updated_usrtt(rack, us_rtt, us_cts);
13512 tcp_rack_xmit_timer(rack, us_rtt, 0, us_rtt, 0, NULL, 1);
13513 }
13514 }
13515 }
13516}
13517
12866 }
12867 if (rack_def_profile)
12868 rack_set_profile(rack, rack_def_profile);
12869 /* Cancel the GP measurement in progress */
12870 tp->t_flags &= ~TF_GPUTINPROG;
12871 if (SEQ_GT(tp->snd_max, tp->iss))
12872 snt = tp->snd_max - tp->iss;
12873 else

--- 632 unchanged lines hidden (view full) ---

13506 if (rack_apply_rtt_with_reduced_conf) {
13507 rack_apply_updated_usrtt(rack, us_rtt, us_cts);
13508 tcp_rack_xmit_timer(rack, us_rtt, 0, us_rtt, 0, NULL, 1);
13509 }
13510 }
13511 }
13512}
13513
13518
13519static int
13520rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mbuf *m, int nxt_pkt, struct timeval *tv)
13521{
13522 /*
13523 * Handle a "special" compressed ack mbuf. Each incoming
13524 * ack has only four possible dispositions:
13525 *
13526 * A) It moves the cum-ack forward

--- 234 unchanged lines hidden (view full) ---

13761 /* Account for the acks */
13762 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
13763 tp->tcp_cnt_counters[CNT_OF_ACKS_IN] += (((ae->ack - high_seq) + segsiz - 1) / segsiz);
13764 }
13765 counter_u64_add(tcp_cnt_counters[CNT_OF_ACKS_IN],
13766 (((ae->ack - high_seq) + segsiz - 1) / segsiz));
13767#endif
13768 high_seq = ae->ack;
13514static int
13515rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mbuf *m, int nxt_pkt, struct timeval *tv)
13516{
13517 /*
13518 * Handle a "special" compressed ack mbuf. Each incoming
13519 * ack has only four possible dispositions:
13520 *
13521 * A) It moves the cum-ack forward

--- 234 unchanged lines hidden (view full) ---

13756 /* Account for the acks */
13757 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
13758 tp->tcp_cnt_counters[CNT_OF_ACKS_IN] += (((ae->ack - high_seq) + segsiz - 1) / segsiz);
13759 }
13760 counter_u64_add(tcp_cnt_counters[CNT_OF_ACKS_IN],
13761 (((ae->ack - high_seq) + segsiz - 1) / segsiz));
13762#endif
13763 high_seq = ae->ack;
13769 if (SEQ_GEQ(high_seq, rack->r_ctl.roundends)) {
13764 if (rack_verbose_logging && (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF)) {
13765 union tcp_log_stackspecific log;
13766 struct timeval tv;
13767
13768 memset(&log.u_bbr, 0, sizeof(log.u_bbr));
13769 log.u_bbr.timeStamp = tcp_get_usecs(&tv);
13770 log.u_bbr.flex1 = high_seq;
13771 log.u_bbr.flex2 = rack->r_ctl.roundends;
13772 log.u_bbr.flex3 = rack->r_ctl.current_round;
13773 log.u_bbr.rttProp = (uint64_t)CC_ALGO(tp)->newround;
13774 log.u_bbr.flex8 = 8;
13775 tcp_log_event_(tp, NULL, NULL, NULL, BBR_LOG_CWND, 0,
13776 0, &log, false, NULL, NULL, 0, &tv);
13777 }
13778 /*
13779 * The draft (v3) calls for us to use SEQ_GEQ, but that
13780 * causes issues when we are just going app limited. Lets
13781 * instead use SEQ_GT <or> where its equal but more data
13782 * is outstanding.
13783 */
13784 if ((SEQ_GT(high_seq, rack->r_ctl.roundends)) ||
13785 ((high_seq == rack->r_ctl.roundends) &&
13786 SEQ_GT(tp->snd_max, tp->snd_una))) {
13770 rack->r_ctl.current_round++;
13771 rack->r_ctl.roundends = tp->snd_max;
13772 if (CC_ALGO(tp)->newround != NULL) {
13773 CC_ALGO(tp)->newround(tp->ccv, rack->r_ctl.current_round);
13774 }
13775 }
13776 /* Setup our act_rcv_time */
13777 if ((ae->flags & TSTMP_LRO) || (ae->flags & TSTMP_HDWR)) {

--- 411 unchanged lines hidden (view full) ---

14189 int32_t thflags, retval, did_out = 0;
14190 int32_t way_out = 0;
14191 /*
14192 * cts - is the current time from tv (caller gets ts) in microseconds.
14193 * ms_cts - is the current time from tv in milliseconds.
14194 * us_cts - is the time that LRO or hardware actually got the packet in microseconds.
14195 */
14196 uint32_t cts, us_cts, ms_cts;
13787 rack->r_ctl.current_round++;
13788 rack->r_ctl.roundends = tp->snd_max;
13789 if (CC_ALGO(tp)->newround != NULL) {
13790 CC_ALGO(tp)->newround(tp->ccv, rack->r_ctl.current_round);
13791 }
13792 }
13793 /* Setup our act_rcv_time */
13794 if ((ae->flags & TSTMP_LRO) || (ae->flags & TSTMP_HDWR)) {

--- 411 unchanged lines hidden (view full) ---

14206 int32_t thflags, retval, did_out = 0;
14207 int32_t way_out = 0;
14208 /*
14209 * cts - is the current time from tv (caller gets ts) in microseconds.
14210 * ms_cts - is the current time from tv in milliseconds.
14211 * us_cts - is the time that LRO or hardware actually got the packet in microseconds.
14212 */
14213 uint32_t cts, us_cts, ms_cts;
14197 uint32_t tiwin;
14214 uint32_t tiwin, high_seq;
14198 struct timespec ts;
14199 struct tcpopt to;
14200 struct tcp_rack *rack;
14201 struct rack_sendmap *rsm;
14202 int32_t prev_state = 0;
14203#ifdef TCP_ACCOUNTING
14204 int ack_val_set = 0xf;
14205#endif

--- 90 unchanged lines hidden (view full) ---

14296 */
14297 rack_log_pacing_delay_calc(rack, (tp->gput_ack - tp->gput_seq) /*flex2*/,
14298 rack->r_ctl.rc_gp_srtt /*flex1*/,
14299 tp->gput_seq,
14300 0, 0, 18, __LINE__, NULL, 0);
14301 tp->t_flags &= ~TF_GPUTINPROG;
14302 }
14303 }
14215 struct timespec ts;
14216 struct tcpopt to;
14217 struct tcp_rack *rack;
14218 struct rack_sendmap *rsm;
14219 int32_t prev_state = 0;
14220#ifdef TCP_ACCOUNTING
14221 int ack_val_set = 0xf;
14222#endif

--- 90 unchanged lines hidden (view full) ---

14313 */
14314 rack_log_pacing_delay_calc(rack, (tp->gput_ack - tp->gput_seq) /*flex2*/,
14315 rack->r_ctl.rc_gp_srtt /*flex1*/,
14316 tp->gput_seq,
14317 0, 0, 18, __LINE__, NULL, 0);
14318 tp->t_flags &= ~TF_GPUTINPROG;
14319 }
14320 }
14321 high_seq = th->th_ack;
14304 if (tp->t_logstate != TCP_LOG_STATE_OFF) {
14305 union tcp_log_stackspecific log;
14306 struct timeval ltv;
14307#ifdef NETFLIX_HTTP_LOGGING
14308 struct http_sendfile_track *http_req;
14309
14310 if (SEQ_GT(th->th_ack, tp->snd_una)) {
14311 http_req = tcp_http_find_req_for_seq(tp, (th->th_ack-1));

--- 310 unchanged lines hidden (view full) ---

14622 if (tcp_output(tp) < 0)
14623 return (1);
14624 did_out = 1;
14625 }
14626 rack_start_hpts_timer(rack, tp, cts, 0, 0, 0);
14627 rack_free_trim(rack);
14628 }
14629 /* Update any rounds needed */
14322 if (tp->t_logstate != TCP_LOG_STATE_OFF) {
14323 union tcp_log_stackspecific log;
14324 struct timeval ltv;
14325#ifdef NETFLIX_HTTP_LOGGING
14326 struct http_sendfile_track *http_req;
14327
14328 if (SEQ_GT(th->th_ack, tp->snd_una)) {
14329 http_req = tcp_http_find_req_for_seq(tp, (th->th_ack-1));

--- 310 unchanged lines hidden (view full) ---

14640 if (tcp_output(tp) < 0)
14641 return (1);
14642 did_out = 1;
14643 }
14644 rack_start_hpts_timer(rack, tp, cts, 0, 0, 0);
14645 rack_free_trim(rack);
14646 }
14647 /* Update any rounds needed */
14630 if (SEQ_GEQ(tp->snd_una, rack->r_ctl.roundends)) {
14648 if (rack_verbose_logging && (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF)) {
14649 union tcp_log_stackspecific log;
14650 struct timeval tv;
14651
14652 memset(&log.u_bbr, 0, sizeof(log.u_bbr));
14653 log.u_bbr.timeStamp = tcp_get_usecs(&tv);
14654 log.u_bbr.flex1 = high_seq;
14655 log.u_bbr.flex2 = rack->r_ctl.roundends;
14656 log.u_bbr.flex3 = rack->r_ctl.current_round;
14657 log.u_bbr.rttProp = (uint64_t)CC_ALGO(tp)->newround;
14658 log.u_bbr.flex8 = 9;
14659 tcp_log_event_(tp, NULL, NULL, NULL, BBR_LOG_CWND, 0,
14660 0, &log, false, NULL, NULL, 0, &tv);
14661 }
14662 /*
14663 * The draft (v3) calls for us to use SEQ_GEQ, but that
14664 * causes issues when we are just going app limited. Lets
14665 * instead use SEQ_GT <or> where its equal but more data
14666 * is outstanding.
14667 */
14668 if ((SEQ_GT(tp->snd_una, rack->r_ctl.roundends)) ||
14669 ((tp->snd_una == rack->r_ctl.roundends) && SEQ_GT(tp->snd_max, tp->snd_una))) {
14631 rack->r_ctl.current_round++;
14632 rack->r_ctl.roundends = tp->snd_max;
14633 if (CC_ALGO(tp)->newround != NULL) {
14634 CC_ALGO(tp)->newround(tp->ccv, rack->r_ctl.current_round);
14635 }
14636 }
14637 if ((nxt_pkt == 0) &&
14638 ((rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) == 0) &&

--- 5560 unchanged lines hidden (view full) ---

20199 break;
20200 case TCP_RACK_EARLY_SEG:
20201 /* If early recovery max segments */
20202 RACK_OPTS_INC(tcp_rack_early_seg);
20203 rack->r_ctl.rc_early_recovery_segs = optval;
20204 break;
20205 case TCP_RACK_ENABLE_HYSTART:
20206 {
14670 rack->r_ctl.current_round++;
14671 rack->r_ctl.roundends = tp->snd_max;
14672 if (CC_ALGO(tp)->newround != NULL) {
14673 CC_ALGO(tp)->newround(tp->ccv, rack->r_ctl.current_round);
14674 }
14675 }
14676 if ((nxt_pkt == 0) &&
14677 ((rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) == 0) &&

--- 5560 unchanged lines hidden (view full) ---

20238 break;
20239 case TCP_RACK_EARLY_SEG:
20240 /* If early recovery max segments */
20241 RACK_OPTS_INC(tcp_rack_early_seg);
20242 rack->r_ctl.rc_early_recovery_segs = optval;
20243 break;
20244 case TCP_RACK_ENABLE_HYSTART:
20245 {
20207 struct sockopt sopt;
20208 struct cc_newreno_opts opt;
20209
20210 sopt.sopt_valsize = sizeof(struct cc_newreno_opts);
20211 sopt.sopt_dir = SOPT_SET;
20212 opt.name = CC_NEWRENO_ENABLE_HYSTART;
20213 opt.val = optval;
20214 if (CC_ALGO(tp)->ctl_output != NULL)
20215 error = CC_ALGO(tp)->ctl_output(tp->ccv, &sopt, &opt);
20216 else
20217 error = EINVAL;
20246 if (optval) {
20247 tp->ccv->flags |= CCF_HYSTART_ALLOWED;
20248 if (rack_do_hystart > RACK_HYSTART_ON)
20249 tp->ccv->flags |= CCF_HYSTART_CAN_SH_CWND;
20250 if (rack_do_hystart > RACK_HYSTART_ON_W_SC)
20251 tp->ccv->flags |= CCF_HYSTART_CONS_SSTH;
20252 } else {
20253 tp->ccv->flags &= ~(CCF_HYSTART_ALLOWED|CCF_HYSTART_CAN_SH_CWND|CCF_HYSTART_CONS_SSTH);
20254 }
20218 }
20219 break;
20220 case TCP_RACK_REORD_THRESH:
20221 /* RACK reorder threshold (shift amount) */
20222 RACK_OPTS_INC(tcp_rack_reord_thresh);
20223 if ((optval > 0) && (optval < 31))
20224 rack->r_ctl.rc_reorder_shift = optval;
20225 else

--- 453 unchanged lines hidden (view full) ---

20679 optval |= 1;
20680 }
20681 if (rack->rc_rack_use_dsack) {
20682 optval |= 2;
20683 }
20684 break;
20685 case TCP_RACK_ENABLE_HYSTART:
20686 {
20255 }
20256 break;
20257 case TCP_RACK_REORD_THRESH:
20258 /* RACK reorder threshold (shift amount) */
20259 RACK_OPTS_INC(tcp_rack_reord_thresh);
20260 if ((optval > 0) && (optval < 31))
20261 rack->r_ctl.rc_reorder_shift = optval;
20262 else

--- 453 unchanged lines hidden (view full) ---

20716 optval |= 1;
20717 }
20718 if (rack->rc_rack_use_dsack) {
20719 optval |= 2;
20720 }
20721 break;
20722 case TCP_RACK_ENABLE_HYSTART:
20723 {
20687 struct sockopt sopt;
20688 struct cc_newreno_opts opt;
20689
20690 sopt.sopt_valsize = sizeof(struct cc_newreno_opts);
20691 sopt.sopt_dir = SOPT_GET;
20692 opt.name = CC_NEWRENO_ENABLE_HYSTART;
20693 if (CC_ALGO(tp)->ctl_output != NULL)
20694 error = CC_ALGO(tp)->ctl_output(tp->ccv, &sopt, &opt);
20695 else
20696 error = EINVAL;
20697 optval = opt.val;
20724 if (tp->ccv->flags & CCF_HYSTART_ALLOWED) {
20725 optval = RACK_HYSTART_ON;
20726 if (tp->ccv->flags & CCF_HYSTART_CAN_SH_CWND)
20727 optval = RACK_HYSTART_ON_W_SC;
20728 if (tp->ccv->flags & CCF_HYSTART_CONS_SSTH)
20729 optval = RACK_HYSTART_ON_W_SC_C;
20730 } else {
20731 optval = RACK_HYSTART_OFF;
20732 }
20698 }
20699 break;
20700 case TCP_FAST_RSM_HACK:
20701 optval = rack->fast_rsm_hack;
20702 break;
20703 case TCP_DEFER_OPTIONS:
20704 optval = rack->defer_options;
20705 break;

--- 284 unchanged lines hidden ---
20733 }
20734 break;
20735 case TCP_FAST_RSM_HACK:
20736 optval = rack->fast_rsm_hack;
20737 break;
20738 case TCP_DEFER_OPTIONS:
20739 optval = rack->defer_options;
20740 break;

--- 284 unchanged lines hidden ---