xref: /freebsd/sys/netinet/cc/cc_newreno.c (revision 790b5264886e581e995fc3b8fa45ca4ab4ffd31c)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
5  *	The Regents of the University of California.
6  * Copyright (c) 2007-2008,2010,2014
7  *	Swinburne University of Technology, Melbourne, Australia.
8  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
9  * Copyright (c) 2010 The FreeBSD Foundation
10  * All rights reserved.
11  *
12  * This software was developed at the Centre for Advanced Internet
13  * Architectures, Swinburne University of Technology, by Lawrence Stewart, James
14  * Healy and David Hayes, made possible in part by a grant from the Cisco
15  * University Research Program Fund at Community Foundation Silicon Valley.
16  *
17  * Portions of this software were developed at the Centre for Advanced
18  * Internet Architectures, Swinburne University of Technology, Melbourne,
19  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
20  *
21  * Redistribution and use in source and binary forms, with or without
22  * modification, are permitted provided that the following conditions
23  * are met:
24  * 1. Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  * 2. Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in the
28  *    documentation and/or other materials provided with the distribution.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  */
42 
43 /*
44  * This software was first released in 2007 by James Healy and Lawrence Stewart
45  * whilst working on the NewTCP research project at Swinburne University of
46  * Technology's Centre for Advanced Internet Architectures, Melbourne,
47  * Australia, which was made possible in part by a grant from the Cisco
48  * University Research Program Fund at Community Foundation Silicon Valley.
49  * More details are available at:
50  *   http://caia.swin.edu.au/urp/newtcp/
51  *
52  * Dec 2014 garmitage@swin.edu.au
53  * Borrowed code fragments from cc_cdg.c to add modifiable beta
54  * via sysctls.
55  *
56  */
57 
58 #include <sys/cdefs.h>
59 __FBSDID("$FreeBSD$");
60 
61 #include <sys/param.h>
62 #include <sys/kernel.h>
63 #include <sys/malloc.h>
64 #include <sys/module.h>
65 #include <sys/socket.h>
66 #include <sys/lock.h>
67 #include <sys/mutex.h>
68 #include <sys/socketvar.h>
69 #include <sys/sysctl.h>
70 #include <sys/systm.h>
71 
72 #include <net/vnet.h>
73 
74 #include <netinet/in.h>
75 #include <netinet/in_pcb.h>
76 #include <netinet/tcp.h>
77 #include <netinet/tcp_seq.h>
78 #include <netinet/tcp_var.h>
79 #include <netinet/tcp_log_buf.h>
80 #include <netinet/tcp_hpts.h>
81 #include <netinet/cc/cc.h>
82 #include <netinet/cc/cc_module.h>
83 #include <netinet/cc/cc_newreno.h>
84 
85 static MALLOC_DEFINE(M_NEWRENO, "newreno data",
86 	"newreno beta values");
87 
88 static void	newreno_cb_destroy(struct cc_var *ccv);
89 static void	newreno_ack_received(struct cc_var *ccv, uint16_t type);
90 static void	newreno_after_idle(struct cc_var *ccv);
91 static void	newreno_cong_signal(struct cc_var *ccv, uint32_t type);
92 static void	newreno_post_recovery(struct cc_var *ccv);
93 static int newreno_ctl_output(struct cc_var *ccv, struct sockopt *sopt, void *buf);
94 static void	newreno_newround(struct cc_var *ccv, uint32_t round_cnt);
95 static void	newreno_rttsample(struct cc_var *ccv, uint32_t usec_rtt, uint32_t rxtcnt, uint32_t fas);
96 static 	int	newreno_cb_init(struct cc_var *ccv);
97 
98 VNET_DEFINE(uint32_t, newreno_beta) = 50;
99 VNET_DEFINE(uint32_t, newreno_beta_ecn) = 80;
100 #define V_newreno_beta VNET(newreno_beta)
101 #define V_newreno_beta_ecn VNET(newreno_beta_ecn)
102 
103 struct cc_algo newreno_cc_algo = {
104 	.name = "newreno",
105 	.cb_destroy = newreno_cb_destroy,
106 	.ack_received = newreno_ack_received,
107 	.after_idle = newreno_after_idle,
108 	.cong_signal = newreno_cong_signal,
109 	.post_recovery = newreno_post_recovery,
110 	.ctl_output = newreno_ctl_output,
111 	.newround = newreno_newround,
112 	.rttsample = newreno_rttsample,
113 	.cb_init = newreno_cb_init,
114 };
115 
116 static uint32_t hystart_lowcwnd = 16;
117 static uint32_t hystart_minrtt_thresh = 4000;
118 static uint32_t hystart_maxrtt_thresh = 16000;
119 static uint32_t hystart_n_rttsamples = 8;
120 static uint32_t hystart_css_growth_div = 4;
121 static uint32_t hystart_css_rounds = 5;
122 static uint32_t hystart_bblogs = 0;
123 
124 static void
125 newreno_log_hystart_event(struct cc_var *ccv, struct newreno *nreno, uint8_t mod, uint32_t flex1)
126 {
127 	/*
128 	 * Types of logs (mod value)
129 	 * 1 - rtt_thresh in flex1, checking to see if RTT is to great.
130 	 * 2 - rtt is too great, rtt_thresh in flex1.
131 	 * 3 - CSS is active incr in flex1
132 	 * 4 - A new round is beginning flex1 is round count
133 	 * 5 - A new RTT measurement flex1 is the new measurement.
134 	 * 6 - We enter CA ssthresh is also in flex1.
135 	 * 7 - Socket option to change hystart executed opt.val in flex1.
136 	 * 8 - Back out of CSS into SS, flex1 is the css_baseline_minrtt
137 	 */
138 	struct tcpcb *tp;
139 
140 	if (hystart_bblogs == 0)
141 		return;
142 	tp = ccv->ccvc.tcp;
143 	if (tp->t_logstate != TCP_LOG_STATE_OFF) {
144 		union tcp_log_stackspecific log;
145 		struct timeval tv;
146 
147 		memset(&log, 0, sizeof(log));
148 		log.u_bbr.flex1 = flex1;
149 		log.u_bbr.flex2 = nreno->css_current_round_minrtt;
150 		log.u_bbr.flex3 = nreno->css_lastround_minrtt;
151 		log.u_bbr.flex4 = nreno->css_rttsample_count;
152 		log.u_bbr.flex5 = nreno->css_entered_at_round;
153 		log.u_bbr.flex6 = nreno->css_baseline_minrtt;
154 		/* We only need bottom 16 bits of flags */
155 		log.u_bbr.flex7 = nreno->newreno_flags & 0x0000ffff;
156 		log.u_bbr.flex8 = mod;
157 		log.u_bbr.epoch = nreno->css_current_round;
158 		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
159 		log.u_bbr.lt_epoch = nreno->css_fas_at_css_entry;
160 		log.u_bbr.pkts_out = nreno->css_last_fas;
161 		log.u_bbr.delivered = nreno->css_lowrtt_fas;
162 		TCP_LOG_EVENTP(tp, NULL,
163 		    &tp->t_inpcb->inp_socket->so_rcv,
164 		    &tp->t_inpcb->inp_socket->so_snd,
165 		    TCP_HYSTART, 0,
166 		    0, &log, false, &tv);
167 	}
168 }
169 
170 static 	int
171 newreno_cb_init(struct cc_var *ccv)
172 {
173 	struct newreno *nreno;
174 
175 	ccv->cc_data = NULL;
176 	ccv->cc_data = malloc(sizeof(struct newreno), M_NEWRENO, M_NOWAIT);
177 	if (ccv->cc_data == NULL)
178 		return (ENOMEM);
179 	nreno = (struct newreno *)ccv->cc_data;
180 	/* NB: nreno is not zeroed, so initialise all fields. */
181 	nreno->beta = V_newreno_beta;
182 	nreno->beta_ecn = V_newreno_beta_ecn;
183 	/*
184 	 * We set the enabled flag so that if
185 	 * the socket option gets strobed and
186 	 * we have not hit a loss
187 	 */
188 	nreno->newreno_flags = CC_NEWRENO_HYSTART_ENABLED;
189 	/* At init set both to infinity */
190 	nreno->css_lastround_minrtt = 0xffffffff;
191 	nreno->css_current_round_minrtt = 0xffffffff;
192 	nreno->css_current_round = 0;
193 	nreno->css_baseline_minrtt = 0xffffffff;
194 	nreno->css_rttsample_count = 0;
195 	nreno->css_entered_at_round = 0;
196 	nreno->css_fas_at_css_entry = 0;
197 	nreno->css_lowrtt_fas = 0;
198 	nreno->css_last_fas = 0;
199 	return (0);
200 }
201 
202 static void
203 newreno_cb_destroy(struct cc_var *ccv)
204 {
205 	free(ccv->cc_data, M_NEWRENO);
206 }
207 
208 static void
209 newreno_ack_received(struct cc_var *ccv, uint16_t type)
210 {
211 	struct newreno *nreno;
212 
213 	nreno = (struct newreno *)ccv->cc_data;
214 	if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) &&
215 	    (ccv->flags & CCF_CWND_LIMITED)) {
216 		u_int cw = CCV(ccv, snd_cwnd);
217 		u_int incr = CCV(ccv, t_maxseg);
218 
219 		/*
220 		 * Regular in-order ACK, open the congestion window.
221 		 * Method depends on which congestion control state we're
222 		 * in (slow start or cong avoid) and if ABC (RFC 3465) is
223 		 * enabled.
224 		 *
225 		 * slow start: cwnd <= ssthresh
226 		 * cong avoid: cwnd > ssthresh
227 		 *
228 		 * slow start and ABC (RFC 3465):
229 		 *   Grow cwnd exponentially by the amount of data
230 		 *   ACKed capping the max increment per ACK to
231 		 *   (abc_l_var * maxseg) bytes.
232 		 *
233 		 * slow start without ABC (RFC 5681):
234 		 *   Grow cwnd exponentially by maxseg per ACK.
235 		 *
236 		 * cong avoid and ABC (RFC 3465):
237 		 *   Grow cwnd linearly by maxseg per RTT for each
238 		 *   cwnd worth of ACKed data.
239 		 *
240 		 * cong avoid without ABC (RFC 5681):
241 		 *   Grow cwnd linearly by approximately maxseg per RTT using
242 		 *   maxseg^2 / cwnd per ACK as the increment.
243 		 *   If cwnd > maxseg^2, fix the cwnd increment at 1 byte to
244 		 *   avoid capping cwnd.
245 		 */
246 		if (cw > CCV(ccv, snd_ssthresh)) {
247 			if (nreno->newreno_flags & CC_NEWRENO_HYSTART_IN_CSS) {
248 				/*
249 				 * We have slipped into CA with
250 				 * CSS active. Deactivate all.
251 				 */
252 				/* Turn off the CSS flag */
253 				nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_IN_CSS;
254 				/* Disable use of CSS in the future except long idle  */
255 				nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_ENABLED;
256 			}
257 			if (V_tcp_do_rfc3465) {
258 				if (ccv->flags & CCF_ABC_SENTAWND)
259 					ccv->flags &= ~CCF_ABC_SENTAWND;
260 				else
261 					incr = 0;
262 			} else
263 				incr = max((incr * incr / cw), 1);
264 		} else if (V_tcp_do_rfc3465) {
265 			/*
266 			 * In slow-start with ABC enabled and no RTO in sight?
267 			 * (Must not use abc_l_var > 1 if slow starting after
268 			 * an RTO. On RTO, snd_nxt = snd_una, so the
269 			 * snd_nxt == snd_max check is sufficient to
270 			 * handle this).
271 			 *
272 			 * XXXLAS: Find a way to signal SS after RTO that
273 			 * doesn't rely on tcpcb vars.
274 			 */
275 			uint16_t abc_val;
276 
277 			if (ccv->flags & CCF_USE_LOCAL_ABC)
278 				abc_val = ccv->labc;
279 			else
280 				abc_val = V_tcp_abc_l_var;
281 			if ((nreno->newreno_flags & CC_NEWRENO_HYSTART_ALLOWED) &&
282 			    (nreno->newreno_flags & CC_NEWRENO_HYSTART_ENABLED) &&
283 			    ((nreno->newreno_flags & CC_NEWRENO_HYSTART_IN_CSS) == 0)) {
284 				/*
285 				 * Hystart is allowed and still enabled and we are not yet
286 				 * in CSS. Lets check to see if we can make a decision on
287 				 * if we need to go into CSS.
288 				 */
289 				if ((nreno->css_rttsample_count >= hystart_n_rttsamples) &&
290 				    (CCV(ccv, snd_cwnd) >
291 				     (hystart_lowcwnd * tcp_fixed_maxseg(ccv->ccvc.tcp)))) {
292 					uint32_t rtt_thresh;
293 
294 					/* Clamp (minrtt_thresh, lastround/8, maxrtt_thresh) */
295 					rtt_thresh = (nreno->css_lastround_minrtt >> 3);
296 					if (rtt_thresh < hystart_minrtt_thresh)
297 						rtt_thresh = hystart_minrtt_thresh;
298 					if (rtt_thresh > hystart_maxrtt_thresh)
299 						rtt_thresh = hystart_maxrtt_thresh;
300 					newreno_log_hystart_event(ccv, nreno, 1, rtt_thresh);
301 					if (nreno->css_current_round_minrtt >= (nreno->css_lastround_minrtt + rtt_thresh)) {
302 						/* Enter CSS */
303 						nreno->newreno_flags |= CC_NEWRENO_HYSTART_IN_CSS;
304 						nreno->css_fas_at_css_entry = nreno->css_lowrtt_fas;
305 						nreno->css_baseline_minrtt = nreno->css_current_round_minrtt;
306 						nreno->css_entered_at_round = nreno->css_current_round;
307 						newreno_log_hystart_event(ccv, nreno, 2, rtt_thresh);
308 					}
309 				}
310 			}
311 			if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max))
312 				incr = min(ccv->bytes_this_ack,
313 				    ccv->nsegs * abc_val *
314 				    CCV(ccv, t_maxseg));
315 			else
316 				incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg));
317 
318 			/* Only if Hystart is enabled will the flag get set */
319 			if (nreno->newreno_flags & CC_NEWRENO_HYSTART_IN_CSS) {
320 				incr /= hystart_css_growth_div;
321 				newreno_log_hystart_event(ccv, nreno, 3, incr);
322 			}
323 		}
324 		/* ABC is on by default, so incr equals 0 frequently. */
325 		if (incr > 0)
326 			CCV(ccv, snd_cwnd) = min(cw + incr,
327 			    TCP_MAXWIN << CCV(ccv, snd_scale));
328 	}
329 }
330 
331 static void
332 newreno_after_idle(struct cc_var *ccv)
333 {
334 	struct newreno *nreno;
335 	uint32_t rw;
336 
337 	nreno = (struct newreno *)ccv->cc_data;
338 	/*
339 	 * If we've been idle for more than one retransmit timeout the old
340 	 * congestion window is no longer current and we have to reduce it to
341 	 * the restart window before we can transmit again.
342 	 *
343 	 * The restart window is the initial window or the last CWND, whichever
344 	 * is smaller.
345 	 *
346 	 * This is done to prevent us from flooding the path with a full CWND at
347 	 * wirespeed, overloading router and switch buffers along the way.
348 	 *
349 	 * See RFC5681 Section 4.1. "Restarting Idle Connections".
350 	 *
351 	 * In addition, per RFC2861 Section 2, the ssthresh is set to the
352 	 * maximum of the former ssthresh or 3/4 of the old cwnd, to
353 	 * not exit slow-start prematurely.
354 	 */
355 	rw = tcp_compute_initwnd(tcp_maxseg(ccv->ccvc.tcp));
356 
357 	CCV(ccv, snd_ssthresh) = max(CCV(ccv, snd_ssthresh),
358 	    CCV(ccv, snd_cwnd)-(CCV(ccv, snd_cwnd)>>2));
359 
360 	CCV(ccv, snd_cwnd) = min(rw, CCV(ccv, snd_cwnd));
361 	if ((nreno->newreno_flags & CC_NEWRENO_HYSTART_ENABLED) == 0) {
362 		if (CCV(ccv, snd_cwnd) <= (hystart_lowcwnd * tcp_fixed_maxseg(ccv->ccvc.tcp))) {
363 			/*
364 			 * Re-enable hystart if our cwnd has fallen below
365 			 * the hystart lowcwnd point.
366 			 */
367 			nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_IN_CSS;
368 			nreno->newreno_flags |= CC_NEWRENO_HYSTART_ENABLED;
369 		}
370 	}
371 }
372 
373 /*
374  * Perform any necessary tasks before we enter congestion recovery.
375  */
376 static void
377 newreno_cong_signal(struct cc_var *ccv, uint32_t type)
378 {
379 	struct newreno *nreno;
380 	uint32_t beta, beta_ecn, cwin, factor;
381 	u_int mss;
382 
383 	cwin = CCV(ccv, snd_cwnd);
384 	mss = tcp_fixed_maxseg(ccv->ccvc.tcp);
385 	nreno = (struct newreno *) ccv->cc_data;
386 	beta = nreno->beta;
387 	beta_ecn = nreno->beta_ecn;
388 	/*
389 	 * Note that we only change the backoff for ECN if the
390 	 * global sysctl V_cc_do_abe is set <or> the stack itself
391 	 * has set a flag in our newreno_flags (due to pacing) telling
392 	 * us to use the lower valued back-off.
393 	 */
394 	if ((type == CC_ECN) &&
395 	    (V_cc_do_abe ||
396 	    ((nreno != NULL) && (nreno->newreno_flags & CC_NEWRENO_BETA_ECN_ENABLED))))
397 		factor = beta_ecn;
398 	else
399 		factor = beta;
400 
401 	/* Catch algos which mistakenly leak private signal types. */
402 	KASSERT((type & CC_SIGPRIVMASK) == 0,
403 	    ("%s: congestion signal type 0x%08x is private\n", __func__, type));
404 
405 	cwin = max(((uint64_t)cwin * (uint64_t)factor) / (100ULL * (uint64_t)mss),
406 	    2) * mss;
407 
408 	switch (type) {
409 	case CC_NDUPACK:
410 		if (nreno->newreno_flags & CC_NEWRENO_HYSTART_ENABLED) {
411 			/* Make sure the flags are all off we had a loss */
412 			nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_ENABLED;
413 			nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_IN_CSS;
414 		}
415 		if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) {
416 			if (IN_CONGRECOVERY(CCV(ccv, t_flags) &&
417 			    V_cc_do_abe && V_cc_abe_frlossreduce)) {
418 				CCV(ccv, snd_ssthresh) =
419 				    ((uint64_t)CCV(ccv, snd_ssthresh) *
420 				     (uint64_t)beta) / (uint64_t)beta_ecn;
421 			}
422 			if (!IN_CONGRECOVERY(CCV(ccv, t_flags)))
423 				CCV(ccv, snd_ssthresh) = cwin;
424 			ENTER_RECOVERY(CCV(ccv, t_flags));
425 		}
426 		break;
427 	case CC_ECN:
428 		if (nreno->newreno_flags & CC_NEWRENO_HYSTART_ENABLED) {
429 			/* Make sure the flags are all off we had a loss */
430 			nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_ENABLED;
431 			nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_IN_CSS;
432 		}
433 		if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
434 			CCV(ccv, snd_ssthresh) = cwin;
435 			CCV(ccv, snd_cwnd) = cwin;
436 			ENTER_CONGRECOVERY(CCV(ccv, t_flags));
437 		}
438 		break;
439 	case CC_RTO:
440 		CCV(ccv, snd_ssthresh) = max(min(CCV(ccv, snd_wnd),
441 						 CCV(ccv, snd_cwnd)) / 2 / mss,
442 					     2) * mss;
443 		CCV(ccv, snd_cwnd) = mss;
444 		break;
445 	}
446 }
447 
448 /*
449  * Perform any necessary tasks before we exit congestion recovery.
450  */
451 static void
452 newreno_post_recovery(struct cc_var *ccv)
453 {
454 	int pipe;
455 
456 	if (IN_FASTRECOVERY(CCV(ccv, t_flags))) {
457 		/*
458 		 * Fast recovery will conclude after returning from this
459 		 * function. Window inflation should have left us with
460 		 * approximately snd_ssthresh outstanding data. But in case we
461 		 * would be inclined to send a burst, better to do it via the
462 		 * slow start mechanism.
463 		 *
464 		 * XXXLAS: Find a way to do this without needing curack
465 		 */
466 		if (V_tcp_do_newsack)
467 			pipe = tcp_compute_pipe(ccv->ccvc.tcp);
468 		else
469 			pipe = CCV(ccv, snd_max) - ccv->curack;
470 
471 		if (pipe < CCV(ccv, snd_ssthresh))
472 			/*
473 			 * Ensure that cwnd does not collapse to 1 MSS under
474 			 * adverse conditons. Implements RFC6582
475 			 */
476 			CCV(ccv, snd_cwnd) = max(pipe, CCV(ccv, t_maxseg)) +
477 			    CCV(ccv, t_maxseg);
478 		else
479 			CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
480 	}
481 }
482 
483 static int
484 newreno_ctl_output(struct cc_var *ccv, struct sockopt *sopt, void *buf)
485 {
486 	struct newreno *nreno;
487 	struct cc_newreno_opts *opt;
488 
489 	if (sopt->sopt_valsize != sizeof(struct cc_newreno_opts))
490 		return (EMSGSIZE);
491 
492 	if (CC_ALGO(ccv->ccvc.tcp) != &newreno_cc_algo)
493 		return (ENOPROTOOPT);
494 
495 	nreno = (struct newreno *)ccv->cc_data;
496 	opt = buf;
497 	switch (sopt->sopt_dir) {
498 	case SOPT_SET:
499 		switch (opt->name) {
500 		case CC_NEWRENO_BETA:
501 			nreno->beta = opt->val;
502 			break;
503 		case CC_NEWRENO_BETA_ECN:
504 			if ((!V_cc_do_abe) && ((nreno->newreno_flags & CC_NEWRENO_BETA_ECN) == 0))
505 				return (EACCES);
506 			nreno->beta_ecn = opt->val;
507 			nreno->newreno_flags |= CC_NEWRENO_BETA_ECN_ENABLED;
508 			break;
509 		case CC_NEWRENO_ENABLE_HYSTART:
510 			/* Allow hystart on this connection */
511 			if (opt->val != 0) {
512 				nreno->newreno_flags |= CC_NEWRENO_HYSTART_ALLOWED;
513 				if (opt->val > 1)
514 					nreno->newreno_flags |= CC_NEWRENO_HYSTART_CAN_SH_CWND;
515 				if (opt->val > 2)
516 					nreno->newreno_flags |= CC_NEWRENO_HYSTART_CONS_SSTH;
517 			} else
518 				nreno->newreno_flags &= ~(CC_NEWRENO_HYSTART_ALLOWED|CC_NEWRENO_HYSTART_CAN_SH_CWND|CC_NEWRENO_HYSTART_CONS_SSTH);
519 			newreno_log_hystart_event(ccv, nreno, 7, opt->val);
520 			break;
521 		default:
522 			return (ENOPROTOOPT);
523 		}
524 		break;
525 	case SOPT_GET:
526 		switch (opt->name) {
527 		case CC_NEWRENO_BETA:
528 			opt->val = (nreno == NULL) ?
529 			    V_newreno_beta : nreno->beta;
530 			break;
531 		case CC_NEWRENO_BETA_ECN:
532 			opt->val = (nreno == NULL) ?
533 			    V_newreno_beta_ecn : nreno->beta_ecn;
534 			break;
535 		case CC_NEWRENO_ENABLE_HYSTART:
536 			if (nreno->newreno_flags & CC_NEWRENO_HYSTART_ALLOWED) {
537 				if (nreno->newreno_flags & CC_NEWRENO_HYSTART_CONS_SSTH)
538 					opt->val = 3;
539 				else if (nreno->newreno_flags & CC_NEWRENO_HYSTART_CAN_SH_CWND)
540 					opt->val = 2;
541 				else
542 					opt->val = 1;
543 			} else
544 				opt->val = 0;
545 			break;
546 		default:
547 			return (ENOPROTOOPT);
548 		}
549 		break;
550 	default:
551 		return (EINVAL);
552 	}
553 
554 	return (0);
555 }
556 
557 static int
558 newreno_beta_handler(SYSCTL_HANDLER_ARGS)
559 {
560 	int error;
561 	uint32_t new;
562 
563 	new = *(uint32_t *)arg1;
564 	error = sysctl_handle_int(oidp, &new, 0, req);
565 	if (error == 0 && req->newptr != NULL ) {
566 		if (arg1 == &VNET_NAME(newreno_beta_ecn) && !V_cc_do_abe)
567 			error = EACCES;
568 		else if (new == 0 || new > 100)
569 			error = EINVAL;
570 		else
571 			*(uint32_t *)arg1 = new;
572 	}
573 
574 	return (error);
575 }
576 
577 static void
578 newreno_newround(struct cc_var *ccv, uint32_t round_cnt)
579 {
580 	struct newreno *nreno;
581 
582 	nreno = (struct newreno *)ccv->cc_data;
583 	/* We have entered a new round */
584 	nreno->css_lastround_minrtt = nreno->css_current_round_minrtt;
585 	nreno->css_current_round_minrtt = 0xffffffff;
586 	nreno->css_rttsample_count = 0;
587 	nreno->css_current_round = round_cnt;
588 	if ((nreno->newreno_flags & CC_NEWRENO_HYSTART_IN_CSS) &&
589 	    ((round_cnt - nreno->css_entered_at_round) >= hystart_css_rounds)) {
590 		/* Enter CA */
591 		if (nreno->newreno_flags & CC_NEWRENO_HYSTART_CAN_SH_CWND) {
592 			/*
593 			 * We engage more than snd_ssthresh, engage
594 			 * the brakes!! Though we will stay in SS to
595 			 * creep back up again, so lets leave CSS active
596 			 * and give us hystart_css_rounds more rounds.
597 			 */
598 			if (nreno->newreno_flags & CC_NEWRENO_HYSTART_CONS_SSTH) {
599 				CCV(ccv, snd_ssthresh) = ((nreno->css_lowrtt_fas + nreno->css_fas_at_css_entry) / 2);
600 			} else {
601 				CCV(ccv, snd_ssthresh) = nreno->css_lowrtt_fas;
602 			}
603 			CCV(ccv, snd_cwnd) = nreno->css_fas_at_css_entry;
604 			nreno->css_entered_at_round = round_cnt;
605 		} else {
606 			CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd);
607 			/* Turn off the CSS flag */
608 			nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_IN_CSS;
609 			/* Disable use of CSS in the future except long idle  */
610 			nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_ENABLED;
611 		}
612 		newreno_log_hystart_event(ccv, nreno, 6, CCV(ccv, snd_ssthresh));
613 	}
614 	newreno_log_hystart_event(ccv, nreno, 4, round_cnt);
615 }
616 
617 static void
618 newreno_rttsample(struct cc_var *ccv, uint32_t usec_rtt, uint32_t rxtcnt, uint32_t fas)
619 {
620 	struct newreno *nreno;
621 
622 	nreno = (struct newreno *)ccv->cc_data;
623 	if (rxtcnt > 1) {
624 		/*
625 		 * Only look at RTT's that are non-ambiguous.
626 		 */
627 		return;
628 	}
629 	nreno->css_rttsample_count++;
630 	nreno->css_last_fas = fas;
631 	if (nreno->css_current_round_minrtt > usec_rtt) {
632 		nreno->css_current_round_minrtt = usec_rtt;
633 		nreno->css_lowrtt_fas = nreno->css_last_fas;
634 	}
635 	if ((nreno->newreno_flags & CC_NEWRENO_HYSTART_IN_CSS) &&
636 	    (nreno->css_rttsample_count >= hystart_n_rttsamples) &&
637 	    (nreno->css_baseline_minrtt > nreno->css_current_round_minrtt)) {
638 		/*
639 		 * We were in CSS and the RTT is now less, we
640 		 * entered CSS erroneously.
641 		 */
642 		nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_IN_CSS;
643 		newreno_log_hystart_event(ccv, nreno, 8, nreno->css_baseline_minrtt);
644 		nreno->css_baseline_minrtt = 0xffffffff;
645 	}
646 	newreno_log_hystart_event(ccv, nreno, 5, usec_rtt);
647 }
648 
649 SYSCTL_DECL(_net_inet_tcp_cc_newreno);
650 SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, newreno,
651     CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
652     "New Reno related settings");
653 
654 SYSCTL_PROC(_net_inet_tcp_cc_newreno, OID_AUTO, beta,
655     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
656     &VNET_NAME(newreno_beta), 3, &newreno_beta_handler, "IU",
657     "New Reno beta, specified as number between 1 and 100");
658 
659 SYSCTL_PROC(_net_inet_tcp_cc_newreno, OID_AUTO, beta_ecn,
660     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
661     &VNET_NAME(newreno_beta_ecn), 3, &newreno_beta_handler, "IU",
662     "New Reno beta ecn, specified as number between 1 and 100");
663 
664 SYSCTL_NODE(_net_inet_tcp_cc_newreno, OID_AUTO, hystartplusplus,
665     CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
666     "New Reno related HyStart++ settings");
667 
668 SYSCTL_UINT(_net_inet_tcp_cc_newreno_hystartplusplus, OID_AUTO, lowcwnd,
669     CTLFLAG_RW,
670     &hystart_lowcwnd, 16,
671    "The number of MSS in the CWND before HyStart++ is active");
672 
673 SYSCTL_UINT(_net_inet_tcp_cc_newreno_hystartplusplus, OID_AUTO, minrtt_thresh,
674     CTLFLAG_RW,
675     &hystart_minrtt_thresh, 4000,
676    "HyStarts++ minimum RTT thresh used in clamp (in microseconds)");
677 
678 SYSCTL_UINT(_net_inet_tcp_cc_newreno_hystartplusplus, OID_AUTO, maxrtt_thresh,
679     CTLFLAG_RW,
680     &hystart_maxrtt_thresh, 16000,
681    "HyStarts++ maximum RTT thresh used in clamp (in microseconds)");
682 
683 SYSCTL_UINT(_net_inet_tcp_cc_newreno_hystartplusplus, OID_AUTO, n_rttsamples,
684     CTLFLAG_RW,
685     &hystart_n_rttsamples, 8,
686    "The number of RTT samples that must be seen to consider HyStart++");
687 
688 SYSCTL_UINT(_net_inet_tcp_cc_newreno_hystartplusplus, OID_AUTO, css_growth_div,
689     CTLFLAG_RW,
690     &hystart_css_growth_div, 4,
691    "The divisor to the growth when in Hystart++ CSS");
692 
693 SYSCTL_UINT(_net_inet_tcp_cc_newreno_hystartplusplus, OID_AUTO, css_rounds,
694     CTLFLAG_RW,
695     &hystart_css_rounds, 5,
696    "The number of rounds HyStart++ lasts in CSS before falling to CA");
697 
698 SYSCTL_UINT(_net_inet_tcp_cc_newreno_hystartplusplus, OID_AUTO, bblogs,
699     CTLFLAG_RW,
700     &hystart_bblogs, 0,
701    "Do we enable HyStart++ Black Box logs to be generated if BB logging is on");
702 
703 
704 DECLARE_CC_MODULE(newreno, &newreno_cc_algo);
705 MODULE_VERSION(newreno, 1);
706