xref: /freebsd/sys/netinet/cc/cc_newreno.c (revision 81b22a9892b1047e551fc3f1d6d58031bc59a4c3)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
5  *	The Regents of the University of California.
6  * Copyright (c) 2007-2008,2010,2014
7  *	Swinburne University of Technology, Melbourne, Australia.
8  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
9  * Copyright (c) 2010 The FreeBSD Foundation
10  * All rights reserved.
11  *
12  * This software was developed at the Centre for Advanced Internet
13  * Architectures, Swinburne University of Technology, by Lawrence Stewart, James
14  * Healy and David Hayes, made possible in part by a grant from the Cisco
15  * University Research Program Fund at Community Foundation Silicon Valley.
16  *
17  * Portions of this software were developed at the Centre for Advanced
18  * Internet Architectures, Swinburne University of Technology, Melbourne,
19  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
20  *
21  * Redistribution and use in source and binary forms, with or without
22  * modification, are permitted provided that the following conditions
23  * are met:
24  * 1. Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  * 2. Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in the
28  *    documentation and/or other materials provided with the distribution.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  */
42 
43 /*
44  * This software was first released in 2007 by James Healy and Lawrence Stewart
45  * whilst working on the NewTCP research project at Swinburne University of
46  * Technology's Centre for Advanced Internet Architectures, Melbourne,
47  * Australia, which was made possible in part by a grant from the Cisco
48  * University Research Program Fund at Community Foundation Silicon Valley.
49  * More details are available at:
50  *   http://caia.swin.edu.au/urp/newtcp/
51  *
52  * Dec 2014 garmitage@swin.edu.au
53  * Borrowed code fragments from cc_cdg.c to add modifiable beta
54  * via sysctls.
55  *
56  */
57 
58 #include <sys/cdefs.h>
59 __FBSDID("$FreeBSD$");
60 
61 #include <sys/param.h>
62 #include <sys/kernel.h>
63 #include <sys/malloc.h>
64 #include <sys/module.h>
65 #include <sys/socket.h>
66 #include <sys/lock.h>
67 #include <sys/mutex.h>
68 #include <sys/socketvar.h>
69 #include <sys/sysctl.h>
70 #include <sys/systm.h>
71 
72 #include <net/vnet.h>
73 
74 #include <netinet/in.h>
75 #include <netinet/in_pcb.h>
76 #include <netinet/tcp.h>
77 #include <netinet/tcp_seq.h>
78 #include <netinet/tcp_var.h>
79 #include <netinet/tcp_log_buf.h>
80 #include <netinet/tcp_hpts.h>
81 #include <netinet/cc/cc.h>
82 #include <netinet/cc/cc_module.h>
83 #include <netinet/cc/cc_newreno.h>
84 
85 static MALLOC_DEFINE(M_NEWRENO, "newreno data",
86 	"newreno beta values");
87 
88 static void	newreno_cb_destroy(struct cc_var *ccv);
89 static void	newreno_ack_received(struct cc_var *ccv, uint16_t type);
90 static void	newreno_after_idle(struct cc_var *ccv);
91 static void	newreno_cong_signal(struct cc_var *ccv, uint32_t type);
92 static void	newreno_post_recovery(struct cc_var *ccv);
93 static int newreno_ctl_output(struct cc_var *ccv, struct sockopt *sopt, void *buf);
94 static void	newreno_newround(struct cc_var *ccv, uint32_t round_cnt);
95 static void	newreno_rttsample(struct cc_var *ccv, uint32_t usec_rtt, uint32_t rxtcnt, uint32_t fas);
96 static 	int	newreno_cb_init(struct cc_var *ccv);
97 
98 VNET_DEFINE(uint32_t, newreno_beta) = 50;
99 VNET_DEFINE(uint32_t, newreno_beta_ecn) = 80;
100 #define V_newreno_beta VNET(newreno_beta)
101 #define V_newreno_beta_ecn VNET(newreno_beta_ecn)
102 
103 struct cc_algo newreno_cc_algo = {
104 	.name = "newreno",
105 	.cb_destroy = newreno_cb_destroy,
106 	.ack_received = newreno_ack_received,
107 	.after_idle = newreno_after_idle,
108 	.cong_signal = newreno_cong_signal,
109 	.post_recovery = newreno_post_recovery,
110 	.ctl_output = newreno_ctl_output,
111 	.newround = newreno_newround,
112 	.rttsample = newreno_rttsample,
113 	.cb_init = newreno_cb_init,
114 };
115 
116 static uint32_t hystart_lowcwnd = 16;
117 static uint32_t hystart_minrtt_thresh = 4000;
118 static uint32_t hystart_maxrtt_thresh = 16000;
119 static uint32_t hystart_n_rttsamples = 8;
120 static uint32_t hystart_css_growth_div = 4;
121 static uint32_t hystart_css_rounds = 5;
122 static uint32_t hystart_bblogs = 0;
123 
124 static void
125 newreno_log_hystart_event(struct cc_var *ccv, struct newreno *nreno, uint8_t mod, uint32_t flex1)
126 {
127 	/*
128 	 * Types of logs (mod value)
129 	 * 1 - rtt_thresh in flex1, checking to see if RTT is to great.
130 	 * 2 - rtt is too great, rtt_thresh in flex1.
131 	 * 3 - CSS is active incr in flex1
132 	 * 4 - A new round is beginning flex1 is round count
133 	 * 5 - A new RTT measurement flex1 is the new measurement.
134 	 * 6 - We enter CA ssthresh is also in flex1.
135 	 * 7 - Socket option to change hystart executed opt.val in flex1.
136 	 * 8 - Back out of CSS into SS, flex1 is the css_baseline_minrtt
137 	 */
138 	struct tcpcb *tp;
139 
140 	if (hystart_bblogs == 0)
141 		return;
142 	tp = ccv->ccvc.tcp;
143 	if (tp->t_logstate != TCP_LOG_STATE_OFF) {
144 		union tcp_log_stackspecific log;
145 		struct timeval tv;
146 
147 		memset(&log, 0, sizeof(log));
148 		log.u_bbr.flex1 = flex1;
149 		log.u_bbr.flex2 = nreno->css_current_round_minrtt;
150 		log.u_bbr.flex3 = nreno->css_lastround_minrtt;
151 		log.u_bbr.flex4 = nreno->css_rttsample_count;
152 		log.u_bbr.flex5 = nreno->css_entered_at_round;
153 		log.u_bbr.flex6 = nreno->css_baseline_minrtt;
154 		/* We only need bottom 16 bits of flags */
155 		log.u_bbr.flex7 = nreno->newreno_flags & 0x0000ffff;
156 		log.u_bbr.flex8 = mod;
157 		log.u_bbr.epoch = nreno->css_current_round;
158 		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
159 		log.u_bbr.lt_epoch = nreno->css_fas_at_css_entry;
160 		log.u_bbr.pkts_out = nreno->css_last_fas;
161 		log.u_bbr.delivered = nreno->css_lowrtt_fas;
162 		TCP_LOG_EVENTP(tp, NULL,
163 		    &tp->t_inpcb->inp_socket->so_rcv,
164 		    &tp->t_inpcb->inp_socket->so_snd,
165 		    TCP_HYSTART, 0,
166 		    0, &log, false, &tv);
167 	}
168 }
169 
170 static int
171 newreno_cb_init(struct cc_var *ccv)
172 {
173 	struct newreno *nreno;
174 
175 	ccv->cc_data = malloc(sizeof(struct newreno), M_NEWRENO, M_NOWAIT);
176 	if (ccv->cc_data == NULL)
177 		return (ENOMEM);
178 	nreno = (struct newreno *)ccv->cc_data;
179 	/* NB: nreno is not zeroed, so initialise all fields. */
180 	nreno->beta = V_newreno_beta;
181 	nreno->beta_ecn = V_newreno_beta_ecn;
182 	/*
183 	 * We set the enabled flag so that if
184 	 * the socket option gets strobed and
185 	 * we have not hit a loss
186 	 */
187 	nreno->newreno_flags = CC_NEWRENO_HYSTART_ENABLED;
188 	/* At init set both to infinity */
189 	nreno->css_lastround_minrtt = 0xffffffff;
190 	nreno->css_current_round_minrtt = 0xffffffff;
191 	nreno->css_current_round = 0;
192 	nreno->css_baseline_minrtt = 0xffffffff;
193 	nreno->css_rttsample_count = 0;
194 	nreno->css_entered_at_round = 0;
195 	nreno->css_fas_at_css_entry = 0;
196 	nreno->css_lowrtt_fas = 0;
197 	nreno->css_last_fas = 0;
198 	return (0);
199 }
200 
201 static void
202 newreno_cb_destroy(struct cc_var *ccv)
203 {
204 	free(ccv->cc_data, M_NEWRENO);
205 }
206 
207 static void
208 newreno_ack_received(struct cc_var *ccv, uint16_t type)
209 {
210 	struct newreno *nreno;
211 
212 	/*
213 	 * Other TCP congestion controls use newreno_ack_received(), but
214 	 * with their own private cc_data. Make sure the cc_data is used
215 	 * correctly.
216 	 */
217 	nreno = (CC_ALGO(ccv->ccvc.tcp) == &newreno_cc_algo) ? ccv->cc_data : NULL;
218 
219 	if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) &&
220 	    (ccv->flags & CCF_CWND_LIMITED)) {
221 		u_int cw = CCV(ccv, snd_cwnd);
222 		u_int incr = CCV(ccv, t_maxseg);
223 
224 		/*
225 		 * Regular in-order ACK, open the congestion window.
226 		 * Method depends on which congestion control state we're
227 		 * in (slow start or cong avoid) and if ABC (RFC 3465) is
228 		 * enabled.
229 		 *
230 		 * slow start: cwnd <= ssthresh
231 		 * cong avoid: cwnd > ssthresh
232 		 *
233 		 * slow start and ABC (RFC 3465):
234 		 *   Grow cwnd exponentially by the amount of data
235 		 *   ACKed capping the max increment per ACK to
236 		 *   (abc_l_var * maxseg) bytes.
237 		 *
238 		 * slow start without ABC (RFC 5681):
239 		 *   Grow cwnd exponentially by maxseg per ACK.
240 		 *
241 		 * cong avoid and ABC (RFC 3465):
242 		 *   Grow cwnd linearly by maxseg per RTT for each
243 		 *   cwnd worth of ACKed data.
244 		 *
245 		 * cong avoid without ABC (RFC 5681):
246 		 *   Grow cwnd linearly by approximately maxseg per RTT using
247 		 *   maxseg^2 / cwnd per ACK as the increment.
248 		 *   If cwnd > maxseg^2, fix the cwnd increment at 1 byte to
249 		 *   avoid capping cwnd.
250 		 */
251 		if (cw > CCV(ccv, snd_ssthresh)) {
252 			if ((nreno != NULL) &&
253 			    (nreno->newreno_flags & CC_NEWRENO_HYSTART_IN_CSS)) {
254 				/*
255 				 * We have slipped into CA with
256 				 * CSS active. Deactivate all.
257 				 */
258 				/* Turn off the CSS flag */
259 				nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_IN_CSS;
260 				/* Disable use of CSS in the future except long idle  */
261 				nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_ENABLED;
262 			}
263 			if (V_tcp_do_rfc3465) {
264 				if (ccv->flags & CCF_ABC_SENTAWND)
265 					ccv->flags &= ~CCF_ABC_SENTAWND;
266 				else
267 					incr = 0;
268 			} else
269 				incr = max((incr * incr / cw), 1);
270 		} else if (V_tcp_do_rfc3465) {
271 			/*
272 			 * In slow-start with ABC enabled and no RTO in sight?
273 			 * (Must not use abc_l_var > 1 if slow starting after
274 			 * an RTO. On RTO, snd_nxt = snd_una, so the
275 			 * snd_nxt == snd_max check is sufficient to
276 			 * handle this).
277 			 *
278 			 * XXXLAS: Find a way to signal SS after RTO that
279 			 * doesn't rely on tcpcb vars.
280 			 */
281 			uint16_t abc_val;
282 
283 			if (ccv->flags & CCF_USE_LOCAL_ABC)
284 				abc_val = ccv->labc;
285 			else
286 				abc_val = V_tcp_abc_l_var;
287 			if ((nreno != NULL) &&
288 			    (nreno->newreno_flags & CC_NEWRENO_HYSTART_ALLOWED) &&
289 			    (nreno->newreno_flags & CC_NEWRENO_HYSTART_ENABLED) &&
290 			    ((nreno->newreno_flags & CC_NEWRENO_HYSTART_IN_CSS) == 0)) {
291 				/*
292 				 * Hystart is allowed and still enabled and we are not yet
293 				 * in CSS. Lets check to see if we can make a decision on
294 				 * if we need to go into CSS.
295 				 */
296 				if ((nreno->css_rttsample_count >= hystart_n_rttsamples) &&
297 				    (CCV(ccv, snd_cwnd) >
298 				     (hystart_lowcwnd * tcp_fixed_maxseg(ccv->ccvc.tcp)))) {
299 					uint32_t rtt_thresh;
300 
301 					/* Clamp (minrtt_thresh, lastround/8, maxrtt_thresh) */
302 					rtt_thresh = (nreno->css_lastround_minrtt >> 3);
303 					if (rtt_thresh < hystart_minrtt_thresh)
304 						rtt_thresh = hystart_minrtt_thresh;
305 					if (rtt_thresh > hystart_maxrtt_thresh)
306 						rtt_thresh = hystart_maxrtt_thresh;
307 					newreno_log_hystart_event(ccv, nreno, 1, rtt_thresh);
308 					if (nreno->css_current_round_minrtt >= (nreno->css_lastround_minrtt + rtt_thresh)) {
309 						/* Enter CSS */
310 						nreno->newreno_flags |= CC_NEWRENO_HYSTART_IN_CSS;
311 						nreno->css_fas_at_css_entry = nreno->css_lowrtt_fas;
312 						nreno->css_baseline_minrtt = nreno->css_current_round_minrtt;
313 						nreno->css_entered_at_round = nreno->css_current_round;
314 						newreno_log_hystart_event(ccv, nreno, 2, rtt_thresh);
315 					}
316 				}
317 			}
318 			if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max))
319 				incr = min(ccv->bytes_this_ack,
320 				    ccv->nsegs * abc_val *
321 				    CCV(ccv, t_maxseg));
322 			else
323 				incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg));
324 
325 			/* Only if Hystart is enabled will the flag get set */
326 			if ((nreno != NULL) &&
327 			    (nreno->newreno_flags & CC_NEWRENO_HYSTART_IN_CSS)) {
328 				incr /= hystart_css_growth_div;
329 				newreno_log_hystart_event(ccv, nreno, 3, incr);
330 			}
331 		}
332 		/* ABC is on by default, so incr equals 0 frequently. */
333 		if (incr > 0)
334 			CCV(ccv, snd_cwnd) = min(cw + incr,
335 			    TCP_MAXWIN << CCV(ccv, snd_scale));
336 	}
337 }
338 
339 static void
340 newreno_after_idle(struct cc_var *ccv)
341 {
342 	struct newreno *nreno;
343 	uint32_t rw;
344 
345 	/*
346 	 * Other TCP congestion controls use newreno_after_idle(), but
347 	 * with their own private cc_data. Make sure the cc_data is used
348 	 * correctly.
349 	 */
350 	nreno = (CC_ALGO(ccv->ccvc.tcp) == &newreno_cc_algo) ? ccv->cc_data : NULL;
351 	/*
352 	 * If we've been idle for more than one retransmit timeout the old
353 	 * congestion window is no longer current and we have to reduce it to
354 	 * the restart window before we can transmit again.
355 	 *
356 	 * The restart window is the initial window or the last CWND, whichever
357 	 * is smaller.
358 	 *
359 	 * This is done to prevent us from flooding the path with a full CWND at
360 	 * wirespeed, overloading router and switch buffers along the way.
361 	 *
362 	 * See RFC5681 Section 4.1. "Restarting Idle Connections".
363 	 *
364 	 * In addition, per RFC2861 Section 2, the ssthresh is set to the
365 	 * maximum of the former ssthresh or 3/4 of the old cwnd, to
366 	 * not exit slow-start prematurely.
367 	 */
368 	rw = tcp_compute_initwnd(tcp_maxseg(ccv->ccvc.tcp));
369 
370 	CCV(ccv, snd_ssthresh) = max(CCV(ccv, snd_ssthresh),
371 	    CCV(ccv, snd_cwnd)-(CCV(ccv, snd_cwnd)>>2));
372 
373 	CCV(ccv, snd_cwnd) = min(rw, CCV(ccv, snd_cwnd));
374 	if ((nreno != NULL) &&
375 	    (nreno->newreno_flags & CC_NEWRENO_HYSTART_ENABLED) == 0) {
376 		if (CCV(ccv, snd_cwnd) <= (hystart_lowcwnd * tcp_fixed_maxseg(ccv->ccvc.tcp))) {
377 			/*
378 			 * Re-enable hystart if our cwnd has fallen below
379 			 * the hystart lowcwnd point.
380 			 */
381 			nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_IN_CSS;
382 			nreno->newreno_flags |= CC_NEWRENO_HYSTART_ENABLED;
383 		}
384 	}
385 }
386 
387 /*
388  * Perform any necessary tasks before we enter congestion recovery.
389  */
390 static void
391 newreno_cong_signal(struct cc_var *ccv, uint32_t type)
392 {
393 	struct newreno *nreno;
394 	uint32_t beta, beta_ecn, cwin, factor;
395 	u_int mss;
396 
397 	cwin = CCV(ccv, snd_cwnd);
398 	mss = tcp_fixed_maxseg(ccv->ccvc.tcp);
399 	/*
400 	 * Other TCP congestion controls use newreno_cong_signal(), but
401 	 * with their own private cc_data. Make sure the cc_data is used
402 	 * correctly.
403 	 */
404 	nreno = (CC_ALGO(ccv->ccvc.tcp) == &newreno_cc_algo) ? ccv->cc_data : NULL;
405 	beta = (nreno == NULL) ? V_newreno_beta : nreno->beta;;
406 	beta_ecn = (nreno == NULL) ? V_newreno_beta_ecn : nreno->beta_ecn;
407 	/*
408 	 * Note that we only change the backoff for ECN if the
409 	 * global sysctl V_cc_do_abe is set <or> the stack itself
410 	 * has set a flag in our newreno_flags (due to pacing) telling
411 	 * us to use the lower valued back-off.
412 	 */
413 	if ((type == CC_ECN) &&
414 	    (V_cc_do_abe ||
415 	    ((nreno != NULL) && (nreno->newreno_flags & CC_NEWRENO_BETA_ECN_ENABLED))))
416 		factor = beta_ecn;
417 	else
418 		factor = beta;
419 
420 	/* Catch algos which mistakenly leak private signal types. */
421 	KASSERT((type & CC_SIGPRIVMASK) == 0,
422 	    ("%s: congestion signal type 0x%08x is private\n", __func__, type));
423 
424 	cwin = max(((uint64_t)cwin * (uint64_t)factor) / (100ULL * (uint64_t)mss),
425 	    2) * mss;
426 
427 	switch (type) {
428 	case CC_NDUPACK:
429 		if ((nreno != NULL) &&
430 		    (nreno->newreno_flags & CC_NEWRENO_HYSTART_ENABLED)) {
431 			/* Make sure the flags are all off we had a loss */
432 			nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_ENABLED;
433 			nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_IN_CSS;
434 		}
435 		if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) {
436 			if (IN_CONGRECOVERY(CCV(ccv, t_flags) &&
437 			    V_cc_do_abe && V_cc_abe_frlossreduce)) {
438 				CCV(ccv, snd_ssthresh) =
439 				    ((uint64_t)CCV(ccv, snd_ssthresh) *
440 				     (uint64_t)beta) / (uint64_t)beta_ecn;
441 			}
442 			if (!IN_CONGRECOVERY(CCV(ccv, t_flags)))
443 				CCV(ccv, snd_ssthresh) = cwin;
444 			ENTER_RECOVERY(CCV(ccv, t_flags));
445 		}
446 		break;
447 	case CC_ECN:
448 		if ((nreno != NULL) &&
449 		    (nreno->newreno_flags & CC_NEWRENO_HYSTART_ENABLED)) {
450 			/* Make sure the flags are all off we had a loss */
451 			nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_ENABLED;
452 			nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_IN_CSS;
453 		}
454 		if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
455 			CCV(ccv, snd_ssthresh) = cwin;
456 			CCV(ccv, snd_cwnd) = cwin;
457 			ENTER_CONGRECOVERY(CCV(ccv, t_flags));
458 		}
459 		break;
460 	case CC_RTO:
461 		CCV(ccv, snd_ssthresh) = max(min(CCV(ccv, snd_wnd),
462 						 CCV(ccv, snd_cwnd)) / 2 / mss,
463 					     2) * mss;
464 		CCV(ccv, snd_cwnd) = mss;
465 		break;
466 	}
467 }
468 
469 /*
470  * Perform any necessary tasks before we exit congestion recovery.
471  */
472 static void
473 newreno_post_recovery(struct cc_var *ccv)
474 {
475 	int pipe;
476 
477 	if (IN_FASTRECOVERY(CCV(ccv, t_flags))) {
478 		/*
479 		 * Fast recovery will conclude after returning from this
480 		 * function. Window inflation should have left us with
481 		 * approximately snd_ssthresh outstanding data. But in case we
482 		 * would be inclined to send a burst, better to do it via the
483 		 * slow start mechanism.
484 		 *
485 		 * XXXLAS: Find a way to do this without needing curack
486 		 */
487 		if (V_tcp_do_newsack)
488 			pipe = tcp_compute_pipe(ccv->ccvc.tcp);
489 		else
490 			pipe = CCV(ccv, snd_max) - ccv->curack;
491 
492 		if (pipe < CCV(ccv, snd_ssthresh))
493 			/*
494 			 * Ensure that cwnd does not collapse to 1 MSS under
495 			 * adverse conditons. Implements RFC6582
496 			 */
497 			CCV(ccv, snd_cwnd) = max(pipe, CCV(ccv, t_maxseg)) +
498 			    CCV(ccv, t_maxseg);
499 		else
500 			CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
501 	}
502 }
503 
504 static int
505 newreno_ctl_output(struct cc_var *ccv, struct sockopt *sopt, void *buf)
506 {
507 	struct newreno *nreno;
508 	struct cc_newreno_opts *opt;
509 
510 	if (sopt->sopt_valsize != sizeof(struct cc_newreno_opts))
511 		return (EMSGSIZE);
512 
513 	if (CC_ALGO(ccv->ccvc.tcp) != &newreno_cc_algo)
514 		return (ENOPROTOOPT);
515 
516 	nreno = (struct newreno *)ccv->cc_data;
517 	opt = buf;
518 	switch (sopt->sopt_dir) {
519 	case SOPT_SET:
520 		switch (opt->name) {
521 		case CC_NEWRENO_BETA:
522 			nreno->beta = opt->val;
523 			break;
524 		case CC_NEWRENO_BETA_ECN:
525 			if ((!V_cc_do_abe) && ((nreno->newreno_flags & CC_NEWRENO_BETA_ECN) == 0))
526 				return (EACCES);
527 			nreno->beta_ecn = opt->val;
528 			nreno->newreno_flags |= CC_NEWRENO_BETA_ECN_ENABLED;
529 			break;
530 		case CC_NEWRENO_ENABLE_HYSTART:
531 			/* Allow hystart on this connection */
532 			if (opt->val != 0) {
533 				nreno->newreno_flags |= CC_NEWRENO_HYSTART_ALLOWED;
534 				if (opt->val > 1)
535 					nreno->newreno_flags |= CC_NEWRENO_HYSTART_CAN_SH_CWND;
536 				if (opt->val > 2)
537 					nreno->newreno_flags |= CC_NEWRENO_HYSTART_CONS_SSTH;
538 			} else
539 				nreno->newreno_flags &= ~(CC_NEWRENO_HYSTART_ALLOWED|CC_NEWRENO_HYSTART_CAN_SH_CWND|CC_NEWRENO_HYSTART_CONS_SSTH);
540 			newreno_log_hystart_event(ccv, nreno, 7, opt->val);
541 			break;
542 		default:
543 			return (ENOPROTOOPT);
544 		}
545 		break;
546 	case SOPT_GET:
547 		switch (opt->name) {
548 		case CC_NEWRENO_BETA:
549 			opt->val = (nreno == NULL) ?
550 			    V_newreno_beta : nreno->beta;
551 			break;
552 		case CC_NEWRENO_BETA_ECN:
553 			opt->val = (nreno == NULL) ?
554 			    V_newreno_beta_ecn : nreno->beta_ecn;
555 			break;
556 		case CC_NEWRENO_ENABLE_HYSTART:
557 			if (nreno->newreno_flags & CC_NEWRENO_HYSTART_ALLOWED) {
558 				if (nreno->newreno_flags & CC_NEWRENO_HYSTART_CONS_SSTH)
559 					opt->val = 3;
560 				else if (nreno->newreno_flags & CC_NEWRENO_HYSTART_CAN_SH_CWND)
561 					opt->val = 2;
562 				else
563 					opt->val = 1;
564 			} else
565 				opt->val = 0;
566 			break;
567 		default:
568 			return (ENOPROTOOPT);
569 		}
570 		break;
571 	default:
572 		return (EINVAL);
573 	}
574 
575 	return (0);
576 }
577 
578 static int
579 newreno_beta_handler(SYSCTL_HANDLER_ARGS)
580 {
581 	int error;
582 	uint32_t new;
583 
584 	new = *(uint32_t *)arg1;
585 	error = sysctl_handle_int(oidp, &new, 0, req);
586 	if (error == 0 && req->newptr != NULL ) {
587 		if (arg1 == &VNET_NAME(newreno_beta_ecn) && !V_cc_do_abe)
588 			error = EACCES;
589 		else if (new == 0 || new > 100)
590 			error = EINVAL;
591 		else
592 			*(uint32_t *)arg1 = new;
593 	}
594 
595 	return (error);
596 }
597 
598 static void
599 newreno_newround(struct cc_var *ccv, uint32_t round_cnt)
600 {
601 	struct newreno *nreno;
602 
603 	nreno = (struct newreno *)ccv->cc_data;
604 	/* We have entered a new round */
605 	nreno->css_lastround_minrtt = nreno->css_current_round_minrtt;
606 	nreno->css_current_round_minrtt = 0xffffffff;
607 	nreno->css_rttsample_count = 0;
608 	nreno->css_current_round = round_cnt;
609 	if ((nreno->newreno_flags & CC_NEWRENO_HYSTART_IN_CSS) &&
610 	    ((round_cnt - nreno->css_entered_at_round) >= hystart_css_rounds)) {
611 		/* Enter CA */
612 		if (nreno->newreno_flags & CC_NEWRENO_HYSTART_CAN_SH_CWND) {
613 			/*
614 			 * We engage more than snd_ssthresh, engage
615 			 * the brakes!! Though we will stay in SS to
616 			 * creep back up again, so lets leave CSS active
617 			 * and give us hystart_css_rounds more rounds.
618 			 */
619 			if (nreno->newreno_flags & CC_NEWRENO_HYSTART_CONS_SSTH) {
620 				CCV(ccv, snd_ssthresh) = ((nreno->css_lowrtt_fas + nreno->css_fas_at_css_entry) / 2);
621 			} else {
622 				CCV(ccv, snd_ssthresh) = nreno->css_lowrtt_fas;
623 			}
624 			CCV(ccv, snd_cwnd) = nreno->css_fas_at_css_entry;
625 			nreno->css_entered_at_round = round_cnt;
626 		} else {
627 			CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd);
628 			/* Turn off the CSS flag */
629 			nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_IN_CSS;
630 			/* Disable use of CSS in the future except long idle  */
631 			nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_ENABLED;
632 		}
633 		newreno_log_hystart_event(ccv, nreno, 6, CCV(ccv, snd_ssthresh));
634 	}
635 	newreno_log_hystart_event(ccv, nreno, 4, round_cnt);
636 }
637 
638 static void
639 newreno_rttsample(struct cc_var *ccv, uint32_t usec_rtt, uint32_t rxtcnt, uint32_t fas)
640 {
641 	struct newreno *nreno;
642 
643 	nreno = (struct newreno *)ccv->cc_data;
644 	if (rxtcnt > 1) {
645 		/*
646 		 * Only look at RTT's that are non-ambiguous.
647 		 */
648 		return;
649 	}
650 	nreno->css_rttsample_count++;
651 	nreno->css_last_fas = fas;
652 	if (nreno->css_current_round_minrtt > usec_rtt) {
653 		nreno->css_current_round_minrtt = usec_rtt;
654 		nreno->css_lowrtt_fas = nreno->css_last_fas;
655 	}
656 	if ((nreno->newreno_flags & CC_NEWRENO_HYSTART_IN_CSS) &&
657 	    (nreno->css_rttsample_count >= hystart_n_rttsamples) &&
658 	    (nreno->css_baseline_minrtt > nreno->css_current_round_minrtt)) {
659 		/*
660 		 * We were in CSS and the RTT is now less, we
661 		 * entered CSS erroneously.
662 		 */
663 		nreno->newreno_flags &= ~CC_NEWRENO_HYSTART_IN_CSS;
664 		newreno_log_hystart_event(ccv, nreno, 8, nreno->css_baseline_minrtt);
665 		nreno->css_baseline_minrtt = 0xffffffff;
666 	}
667 	newreno_log_hystart_event(ccv, nreno, 5, usec_rtt);
668 }
669 
670 SYSCTL_DECL(_net_inet_tcp_cc_newreno);
671 SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, newreno,
672     CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
673     "New Reno related settings");
674 
675 SYSCTL_PROC(_net_inet_tcp_cc_newreno, OID_AUTO, beta,
676     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
677     &VNET_NAME(newreno_beta), 3, &newreno_beta_handler, "IU",
678     "New Reno beta, specified as number between 1 and 100");
679 
680 SYSCTL_PROC(_net_inet_tcp_cc_newreno, OID_AUTO, beta_ecn,
681     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
682     &VNET_NAME(newreno_beta_ecn), 3, &newreno_beta_handler, "IU",
683     "New Reno beta ecn, specified as number between 1 and 100");
684 
685 SYSCTL_NODE(_net_inet_tcp_cc_newreno, OID_AUTO, hystartplusplus,
686     CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
687     "New Reno related HyStart++ settings");
688 
689 SYSCTL_UINT(_net_inet_tcp_cc_newreno_hystartplusplus, OID_AUTO, lowcwnd,
690     CTLFLAG_RW,
691     &hystart_lowcwnd, 16,
692    "The number of MSS in the CWND before HyStart++ is active");
693 
694 SYSCTL_UINT(_net_inet_tcp_cc_newreno_hystartplusplus, OID_AUTO, minrtt_thresh,
695     CTLFLAG_RW,
696     &hystart_minrtt_thresh, 4000,
697    "HyStarts++ minimum RTT thresh used in clamp (in microseconds)");
698 
699 SYSCTL_UINT(_net_inet_tcp_cc_newreno_hystartplusplus, OID_AUTO, maxrtt_thresh,
700     CTLFLAG_RW,
701     &hystart_maxrtt_thresh, 16000,
702    "HyStarts++ maximum RTT thresh used in clamp (in microseconds)");
703 
704 SYSCTL_UINT(_net_inet_tcp_cc_newreno_hystartplusplus, OID_AUTO, n_rttsamples,
705     CTLFLAG_RW,
706     &hystart_n_rttsamples, 8,
707    "The number of RTT samples that must be seen to consider HyStart++");
708 
709 SYSCTL_UINT(_net_inet_tcp_cc_newreno_hystartplusplus, OID_AUTO, css_growth_div,
710     CTLFLAG_RW,
711     &hystart_css_growth_div, 4,
712    "The divisor to the growth when in Hystart++ CSS");
713 
714 SYSCTL_UINT(_net_inet_tcp_cc_newreno_hystartplusplus, OID_AUTO, css_rounds,
715     CTLFLAG_RW,
716     &hystart_css_rounds, 5,
717    "The number of rounds HyStart++ lasts in CSS before falling to CA");
718 
719 SYSCTL_UINT(_net_inet_tcp_cc_newreno_hystartplusplus, OID_AUTO, bblogs,
720     CTLFLAG_RW,
721     &hystart_bblogs, 0,
722    "Do we enable HyStart++ Black Box logs to be generated if BB logging is on");
723 
724 
725 DECLARE_CC_MODULE(newreno, &newreno_cc_algo);
726 MODULE_VERSION(newreno, 1);
727