xref: /freebsd/sys/netinet/sctp_cc_functions.c (revision d4d735b26537662565001d86685080256cf95758)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
5  * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
6  * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * a) Redistributions of source code must retain the above copyright notice,
12  *    this list of conditions and the following disclaimer.
13  *
14  * b) Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the distribution.
17  *
18  * c) Neither the name of Cisco Systems, Inc. nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32  * THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include <netinet/sctp_os.h>
36 #include <netinet/sctp_var.h>
37 #include <netinet/sctp_sysctl.h>
38 #include <netinet/sctp_pcb.h>
39 #include <netinet/sctp_header.h>
40 #include <netinet/sctputil.h>
41 #include <netinet/sctp_output.h>
42 #include <netinet/sctp_input.h>
43 #include <netinet/sctp_indata.h>
44 #include <netinet/sctp_uio.h>
45 #include <netinet/sctp_timer.h>
46 #include <netinet/sctp_auth.h>
47 #include <netinet/sctp_asconf.h>
48 #include <netinet/sctp_kdtrace.h>
49 
50 #define SHIFT_MPTCP_MULTI_N 40
51 #define SHIFT_MPTCP_MULTI_Z 16
52 #define SHIFT_MPTCP_MULTI 8
53 
54 static void
sctp_enforce_cwnd_limit(struct sctp_association * assoc,struct sctp_nets * net)55 sctp_enforce_cwnd_limit(struct sctp_association *assoc, struct sctp_nets *net)
56 {
57 	if ((assoc->max_cwnd > 0) &&
58 	    (net->cwnd > assoc->max_cwnd) &&
59 	    (net->cwnd > (net->mtu - sizeof(struct sctphdr)))) {
60 		net->cwnd = assoc->max_cwnd;
61 		if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) {
62 			net->cwnd = net->mtu - sizeof(struct sctphdr);
63 		}
64 	}
65 }
66 
67 static void
sctp_set_initial_cc_param(struct sctp_tcb * stcb,struct sctp_nets * net)68 sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
69 {
70 	struct sctp_association *assoc;
71 	uint32_t cwnd_in_mtu;
72 
73 	assoc = &stcb->asoc;
74 	cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd);
75 	if (cwnd_in_mtu == 0) {
76 		/* Using 0 means that the value of RFC 4960 is used. */
77 		net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
78 	} else {
79 		/*
80 		 * We take the minimum of the burst limit and the initial
81 		 * congestion window.
82 		 */
83 		if ((assoc->max_burst > 0) && (cwnd_in_mtu > assoc->max_burst))
84 			cwnd_in_mtu = assoc->max_burst;
85 		net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu;
86 	}
87 	if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) ||
88 	    (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) {
89 		/* In case of resource pooling initialize appropriately */
90 		net->cwnd /= assoc->numnets;
91 		if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) {
92 			net->cwnd = net->mtu - sizeof(struct sctphdr);
93 		}
94 	}
95 	sctp_enforce_cwnd_limit(assoc, net);
96 	net->ssthresh = assoc->peers_rwnd;
97 	SDT_PROBE5(sctp, cwnd, net, init,
98 	    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
99 	    0, net->cwnd);
100 	if (SCTP_BASE_SYSCTL(sctp_logging_level) &
101 	    (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
102 		sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
103 	}
104 }
105 
106 static void
sctp_cwnd_update_after_fr(struct sctp_tcb * stcb,struct sctp_association * asoc)107 sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
108     struct sctp_association *asoc)
109 {
110 	struct sctp_nets *net;
111 	uint32_t t_ssthresh, t_cwnd;
112 	uint64_t t_ucwnd_sbw;
113 
114 	/* MT FIXME: Don't compute this over and over again */
115 	t_ssthresh = 0;
116 	t_cwnd = 0;
117 	t_ucwnd_sbw = 0;
118 	if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) ||
119 	    (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) {
120 		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
121 			t_ssthresh += net->ssthresh;
122 			t_cwnd += net->cwnd;
123 			if (net->lastsa > 0) {
124 				t_ucwnd_sbw += (uint64_t)net->cwnd / (uint64_t)net->lastsa;
125 			}
126 		}
127 		if (t_ucwnd_sbw == 0) {
128 			t_ucwnd_sbw = 1;
129 		}
130 	}
131 
132 	/*-
133 	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) &&
134 	 * (net->fast_retran_loss_recovery == 0)))
135 	 */
136 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
137 		if ((asoc->fast_retran_loss_recovery == 0) ||
138 		    (asoc->sctp_cmt_on_off > 0)) {
139 			/* out of a RFC2582 Fast recovery window? */
140 			if (net->net_ack > 0) {
141 				/*
142 				 * per section 7.2.3, are there any
143 				 * destinations that had a fast retransmit
144 				 * to them. If so what we need to do is
145 				 * adjust ssthresh and cwnd.
146 				 */
147 				struct sctp_tmit_chunk *lchk;
148 				int old_cwnd = net->cwnd;
149 
150 				if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) ||
151 				    (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) {
152 					if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) {
153 						net->ssthresh = (uint32_t)(((uint64_t)4 *
154 						    (uint64_t)net->mtu *
155 						    (uint64_t)net->ssthresh) /
156 						    (uint64_t)t_ssthresh);
157 					}
158 					if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2) {
159 						uint32_t srtt;
160 
161 						srtt = net->lastsa;
162 						/*
163 						 * lastsa>>3;  we don't need
164 						 * to devide ...
165 						 */
166 						if (srtt == 0) {
167 							srtt = 1;
168 						}
169 						/*
170 						 * Short Version => Equal to
171 						 * Contel Version MBe
172 						 */
173 						net->ssthresh = (uint32_t)(((uint64_t)4 *
174 						    (uint64_t)net->mtu *
175 						    (uint64_t)net->cwnd) /
176 						    ((uint64_t)srtt *
177 						    t_ucwnd_sbw));
178 						 /* INCREASE FACTOR */ ;
179 					}
180 					if ((net->cwnd > t_cwnd / 2) &&
181 					    (net->ssthresh < net->cwnd - t_cwnd / 2)) {
182 						net->ssthresh = net->cwnd - t_cwnd / 2;
183 					}
184 					if (net->ssthresh < net->mtu) {
185 						net->ssthresh = net->mtu;
186 					}
187 				} else {
188 					net->ssthresh = net->cwnd / 2;
189 					if (net->ssthresh < (net->mtu * 2)) {
190 						net->ssthresh = 2 * net->mtu;
191 					}
192 				}
193 				net->cwnd = net->ssthresh;
194 				sctp_enforce_cwnd_limit(asoc, net);
195 				SDT_PROBE5(sctp, cwnd, net, fr,
196 				    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
197 				    old_cwnd, net->cwnd);
198 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
199 					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
200 					    SCTP_CWND_LOG_FROM_FR);
201 				}
202 				lchk = TAILQ_FIRST(&asoc->send_queue);
203 
204 				net->partial_bytes_acked = 0;
205 				/* Turn on fast recovery window */
206 				asoc->fast_retran_loss_recovery = 1;
207 				if (lchk == NULL) {
208 					/* Mark end of the window */
209 					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
210 				} else {
211 					asoc->fast_recovery_tsn = lchk->rec.data.tsn - 1;
212 				}
213 
214 				/*
215 				 * CMT fast recovery -- per destination
216 				 * recovery variable.
217 				 */
218 				net->fast_retran_loss_recovery = 1;
219 
220 				if (lchk == NULL) {
221 					/* Mark end of the window */
222 					net->fast_recovery_tsn = asoc->sending_seq - 1;
223 				} else {
224 					net->fast_recovery_tsn = lchk->rec.data.tsn - 1;
225 				}
226 
227 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
228 				    stcb->sctp_ep, stcb, net,
229 				    SCTP_FROM_SCTP_CC_FUNCTIONS + SCTP_LOC_1);
230 				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
231 				    stcb->sctp_ep, stcb, net);
232 			}
233 		} else if (net->net_ack > 0) {
234 			/*
235 			 * Mark a peg that we WOULD have done a cwnd
236 			 * reduction but RFC2582 prevented this action.
237 			 */
238 			SCTP_STAT_INCR(sctps_fastretransinrtt);
239 		}
240 	}
241 }
242 
243 /* Defines for instantaneous bw decisions */
244 #define SCTP_INST_LOOSING 1	/* Losing to other flows */
245 #define SCTP_INST_NEUTRAL 2	/* Neutral, no indication */
246 #define SCTP_INST_GAINING 3	/* Gaining, step down possible */
247 
248 static int
cc_bw_same(struct sctp_tcb * stcb,struct sctp_nets * net,uint64_t nbw,uint64_t rtt_offset,uint64_t vtag,uint8_t inst_ind)249 cc_bw_same(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw,
250     uint64_t rtt_offset, uint64_t vtag, uint8_t inst_ind)
251 {
252 	uint64_t oth __sdt_used, probepoint __sdt_used;
253 
254 	probepoint = (((uint64_t)net->cwnd) << 32);
255 	if (net->rtt > net->cc_mod.rtcc.lbw_rtt + rtt_offset) {
256 		/*
257 		 * rtt increased we don't update bw.. so we don't update the
258 		 * rtt either.
259 		 */
260 		/* Probe point 5 */
261 		probepoint |= ((5 << 16) | 1);
262 		SDT_PROBE5(sctp, cwnd, net, rttvar,
263 		    vtag,
264 		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
265 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
266 		    net->flight_size,
267 		    probepoint);
268 		if ((net->cc_mod.rtcc.steady_step) && (inst_ind != SCTP_INST_LOOSING)) {
269 			if (net->cc_mod.rtcc.last_step_state == 5)
270 				net->cc_mod.rtcc.step_cnt++;
271 			else
272 				net->cc_mod.rtcc.step_cnt = 1;
273 			net->cc_mod.rtcc.last_step_state = 5;
274 			if ((net->cc_mod.rtcc.step_cnt == net->cc_mod.rtcc.steady_step) ||
275 			    ((net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step) &&
276 			    ((net->cc_mod.rtcc.step_cnt % net->cc_mod.rtcc.steady_step) == 0))) {
277 				/* Try a step down */
278 				oth = net->cc_mod.rtcc.vol_reduce;
279 				oth <<= 16;
280 				oth |= net->cc_mod.rtcc.step_cnt;
281 				oth <<= 16;
282 				oth |= net->cc_mod.rtcc.last_step_state;
283 				SDT_PROBE5(sctp, cwnd, net, rttstep,
284 				    vtag,
285 				    ((net->cc_mod.rtcc.lbw << 32) | nbw),
286 				    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
287 				    oth,
288 				    probepoint);
289 				if (net->cwnd > (4 * net->mtu)) {
290 					net->cwnd -= net->mtu;
291 					net->cc_mod.rtcc.vol_reduce++;
292 				} else {
293 					net->cc_mod.rtcc.step_cnt = 0;
294 				}
295 			}
296 		}
297 		return (1);
298 	}
299 	if (net->rtt < net->cc_mod.rtcc.lbw_rtt - rtt_offset) {
300 		/*
301 		 * rtt decreased, there could be more room. we update both
302 		 * the bw and the rtt here to lock this in as a good step
303 		 * down.
304 		 */
305 		/* Probe point 6 */
306 		probepoint |= ((6 << 16) | 0);
307 		SDT_PROBE5(sctp, cwnd, net, rttvar,
308 		    vtag,
309 		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
310 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
311 		    net->flight_size,
312 		    probepoint);
313 		if (net->cc_mod.rtcc.steady_step) {
314 			oth = net->cc_mod.rtcc.vol_reduce;
315 			oth <<= 16;
316 			oth |= net->cc_mod.rtcc.step_cnt;
317 			oth <<= 16;
318 			oth |= net->cc_mod.rtcc.last_step_state;
319 			SDT_PROBE5(sctp, cwnd, net, rttstep,
320 			    vtag,
321 			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
322 			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
323 			    oth,
324 			    probepoint);
325 			if ((net->cc_mod.rtcc.last_step_state == 5) &&
326 			    (net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step)) {
327 				/* Step down worked */
328 				net->cc_mod.rtcc.step_cnt = 0;
329 				return (1);
330 			} else {
331 				net->cc_mod.rtcc.last_step_state = 6;
332 				net->cc_mod.rtcc.step_cnt = 0;
333 			}
334 		}
335 		net->cc_mod.rtcc.lbw = nbw;
336 		net->cc_mod.rtcc.lbw_rtt = net->rtt;
337 		net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd;
338 		if (inst_ind == SCTP_INST_GAINING)
339 			return (1);
340 		else if (inst_ind == SCTP_INST_NEUTRAL)
341 			return (1);
342 		else
343 			return (0);
344 	}
345 	/*
346 	 * Ok bw and rtt remained the same .. no update to any
347 	 */
348 	/* Probe point 7 */
349 	probepoint |= ((7 << 16) | net->cc_mod.rtcc.ret_from_eq);
350 	SDT_PROBE5(sctp, cwnd, net, rttvar,
351 	    vtag,
352 	    ((net->cc_mod.rtcc.lbw << 32) | nbw),
353 	    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
354 	    net->flight_size,
355 	    probepoint);
356 	if ((net->cc_mod.rtcc.steady_step) && (inst_ind != SCTP_INST_LOOSING)) {
357 		if (net->cc_mod.rtcc.last_step_state == 5)
358 			net->cc_mod.rtcc.step_cnt++;
359 		else
360 			net->cc_mod.rtcc.step_cnt = 1;
361 		net->cc_mod.rtcc.last_step_state = 5;
362 		if ((net->cc_mod.rtcc.step_cnt == net->cc_mod.rtcc.steady_step) ||
363 		    ((net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step) &&
364 		    ((net->cc_mod.rtcc.step_cnt % net->cc_mod.rtcc.steady_step) == 0))) {
365 			/* Try a step down */
366 			if (net->cwnd > (4 * net->mtu)) {
367 				net->cwnd -= net->mtu;
368 				net->cc_mod.rtcc.vol_reduce++;
369 				return (1);
370 			} else {
371 				net->cc_mod.rtcc.step_cnt = 0;
372 			}
373 		}
374 	}
375 	if (inst_ind == SCTP_INST_GAINING)
376 		return (1);
377 	else if (inst_ind == SCTP_INST_NEUTRAL)
378 		return (1);
379 	else
380 		return ((int)net->cc_mod.rtcc.ret_from_eq);
381 }
382 
383 static int
cc_bw_decrease(struct sctp_tcb * stcb,struct sctp_nets * net,uint64_t nbw,uint64_t rtt_offset,uint64_t vtag,uint8_t inst_ind)384 cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint64_t rtt_offset,
385     uint64_t vtag, uint8_t inst_ind)
386 {
387 	uint64_t oth __sdt_used, probepoint __sdt_used;
388 
389 	/* Bandwidth decreased. */
390 	probepoint = (((uint64_t)net->cwnd) << 32);
391 	if (net->rtt > net->cc_mod.rtcc.lbw_rtt + rtt_offset) {
392 		/* rtt increased */
393 		/* Did we add more */
394 		if ((net->cwnd > net->cc_mod.rtcc.cwnd_at_bw_set) &&
395 		    (inst_ind != SCTP_INST_LOOSING)) {
396 			/* We caused it maybe.. back off? */
397 			/* PROBE POINT 1 */
398 			probepoint |= ((1 << 16) | 1);
399 			SDT_PROBE5(sctp, cwnd, net, rttvar,
400 			    vtag,
401 			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
402 			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
403 			    net->flight_size,
404 			    probepoint);
405 			if (net->cc_mod.rtcc.ret_from_eq) {
406 				/*
407 				 * Switch over to CA if we are less
408 				 * aggressive
409 				 */
410 				net->ssthresh = net->cwnd - 1;
411 				net->partial_bytes_acked = 0;
412 			}
413 			return (1);
414 		}
415 		/* Probe point 2 */
416 		probepoint |= ((2 << 16) | 0);
417 		SDT_PROBE5(sctp, cwnd, net, rttvar,
418 		    vtag,
419 		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
420 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
421 		    net->flight_size,
422 		    probepoint);
423 		/* Someone else - fight for more? */
424 		if (net->cc_mod.rtcc.steady_step) {
425 			oth = net->cc_mod.rtcc.vol_reduce;
426 			oth <<= 16;
427 			oth |= net->cc_mod.rtcc.step_cnt;
428 			oth <<= 16;
429 			oth |= net->cc_mod.rtcc.last_step_state;
430 			SDT_PROBE5(sctp, cwnd, net, rttstep,
431 			    vtag,
432 			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
433 			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
434 			    oth,
435 			    probepoint);
436 			/*
437 			 * Did we voluntarily give up some? if so take one
438 			 * back please
439 			 */
440 			if ((net->cc_mod.rtcc.vol_reduce) &&
441 			    (inst_ind != SCTP_INST_GAINING)) {
442 				net->cwnd += net->mtu;
443 				sctp_enforce_cwnd_limit(&stcb->asoc, net);
444 				net->cc_mod.rtcc.vol_reduce--;
445 			}
446 			net->cc_mod.rtcc.last_step_state = 2;
447 			net->cc_mod.rtcc.step_cnt = 0;
448 		}
449 		goto out_decision;
450 	} else if (net->rtt < net->cc_mod.rtcc.lbw_rtt - rtt_offset) {
451 		/* bw & rtt decreased */
452 		/* Probe point 3 */
453 		probepoint |= ((3 << 16) | 0);
454 		SDT_PROBE5(sctp, cwnd, net, rttvar,
455 		    vtag,
456 		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
457 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
458 		    net->flight_size,
459 		    probepoint);
460 		if (net->cc_mod.rtcc.steady_step) {
461 			oth = net->cc_mod.rtcc.vol_reduce;
462 			oth <<= 16;
463 			oth |= net->cc_mod.rtcc.step_cnt;
464 			oth <<= 16;
465 			oth |= net->cc_mod.rtcc.last_step_state;
466 			SDT_PROBE5(sctp, cwnd, net, rttstep,
467 			    vtag,
468 			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
469 			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
470 			    oth,
471 			    probepoint);
472 			if ((net->cc_mod.rtcc.vol_reduce) &&
473 			    (inst_ind != SCTP_INST_GAINING)) {
474 				net->cwnd += net->mtu;
475 				sctp_enforce_cwnd_limit(&stcb->asoc, net);
476 				net->cc_mod.rtcc.vol_reduce--;
477 			}
478 			net->cc_mod.rtcc.last_step_state = 3;
479 			net->cc_mod.rtcc.step_cnt = 0;
480 		}
481 		goto out_decision;
482 	}
483 	/* The bw decreased but rtt stayed the same */
484 	/* Probe point 4 */
485 	probepoint |= ((4 << 16) | 0);
486 	SDT_PROBE5(sctp, cwnd, net, rttvar,
487 	    vtag,
488 	    ((net->cc_mod.rtcc.lbw << 32) | nbw),
489 	    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
490 	    net->flight_size,
491 	    probepoint);
492 	if (net->cc_mod.rtcc.steady_step) {
493 		oth = net->cc_mod.rtcc.vol_reduce;
494 		oth <<= 16;
495 		oth |= net->cc_mod.rtcc.step_cnt;
496 		oth <<= 16;
497 		oth |= net->cc_mod.rtcc.last_step_state;
498 		SDT_PROBE5(sctp, cwnd, net, rttstep,
499 		    vtag,
500 		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
501 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
502 		    oth,
503 		    probepoint);
504 		if ((net->cc_mod.rtcc.vol_reduce) &&
505 		    (inst_ind != SCTP_INST_GAINING)) {
506 			net->cwnd += net->mtu;
507 			sctp_enforce_cwnd_limit(&stcb->asoc, net);
508 			net->cc_mod.rtcc.vol_reduce--;
509 		}
510 		net->cc_mod.rtcc.last_step_state = 4;
511 		net->cc_mod.rtcc.step_cnt = 0;
512 	}
513 out_decision:
514 	net->cc_mod.rtcc.lbw = nbw;
515 	net->cc_mod.rtcc.lbw_rtt = net->rtt;
516 	net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd;
517 	if (inst_ind == SCTP_INST_GAINING) {
518 		return (1);
519 	} else {
520 		return (0);
521 	}
522 }
523 
524 static int
cc_bw_increase(struct sctp_tcb * stcb,struct sctp_nets * net,uint64_t nbw,uint64_t vtag)525 cc_bw_increase(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint64_t vtag)
526 {
527 	uint64_t oth __sdt_used, probepoint __sdt_used;
528 
529 	/*
530 	 * BW increased, so update and return 0, since all actions in our
531 	 * table say to do the normal CC update. Note that we pay no
532 	 * attention to the inst_ind since our overall sum is increasing.
533 	 */
534 	/* PROBE POINT 0 */
535 	probepoint = (((uint64_t)net->cwnd) << 32);
536 	SDT_PROBE5(sctp, cwnd, net, rttvar,
537 	    vtag,
538 	    ((net->cc_mod.rtcc.lbw << 32) | nbw),
539 	    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
540 	    net->flight_size,
541 	    probepoint);
542 	if (net->cc_mod.rtcc.steady_step) {
543 		oth = net->cc_mod.rtcc.vol_reduce;
544 		oth <<= 16;
545 		oth |= net->cc_mod.rtcc.step_cnt;
546 		oth <<= 16;
547 		oth |= net->cc_mod.rtcc.last_step_state;
548 		SDT_PROBE5(sctp, cwnd, net, rttstep,
549 		    vtag,
550 		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
551 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
552 		    oth,
553 		    probepoint);
554 		net->cc_mod.rtcc.last_step_state = 0;
555 		net->cc_mod.rtcc.step_cnt = 0;
556 		net->cc_mod.rtcc.vol_reduce = 0;
557 	}
558 	net->cc_mod.rtcc.lbw = nbw;
559 	net->cc_mod.rtcc.lbw_rtt = net->rtt;
560 	net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd;
561 	return (0);
562 }
563 
564 /* RTCC Algorithm to limit growth of cwnd, return
565  * true if you want to NOT allow cwnd growth
566  */
567 static int
cc_bw_limit(struct sctp_tcb * stcb,struct sctp_nets * net,uint64_t nbw)568 cc_bw_limit(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw)
569 {
570 	uint64_t bw_offset, rtt_offset;
571 	uint64_t probepoint __sdt_used, rtt, vtag;
572 	uint64_t bytes_for_this_rtt, inst_bw;
573 	uint64_t div, inst_off;
574 	int bw_shift;
575 	uint8_t inst_ind;
576 	int ret;
577 
578 	/*-
579 	 * Here we need to see if we want
580 	 * to limit cwnd growth due to increase
581 	 * in overall rtt but no increase in bw.
582 	 * We use the following table to figure
583 	 * out what we should do. When we return
584 	 * 0, cc update goes on as planned. If we
585 	 * return 1, then no cc update happens and cwnd
586 	 * stays where it is at.
587 	 * ----------------------------------
588 	 *   BW    |    RTT   | Action
589 	 * *********************************
590 	 *   INC   |    INC   | return 0
591 	 * ----------------------------------
592 	 *   INC   |    SAME  | return 0
593 	 * ----------------------------------
594 	 *   INC   |    DECR  | return 0
595 	 * ----------------------------------
596 	 *   SAME  |    INC   | return 1
597 	 * ----------------------------------
598 	 *   SAME  |    SAME  | return 1
599 	 * ----------------------------------
600 	 *   SAME  |    DECR  | return 0
601 	 * ----------------------------------
602 	 *   DECR  |    INC   | return 0 or 1 based on if we caused.
603 	 * ----------------------------------
604 	 *   DECR  |    SAME  | return 0
605 	 * ----------------------------------
606 	 *   DECR  |    DECR  | return 0
607 	 * ----------------------------------
608 	 *
609 	 * We are a bit fuzz on what an increase or
610 	 * decrease is. For BW it is the same if
611 	 * it did not change within 1/64th. For
612 	 * RTT it stayed the same if it did not
613 	 * change within 1/32nd
614 	 */
615 	bw_shift = SCTP_BASE_SYSCTL(sctp_rttvar_bw);
616 	rtt = stcb->asoc.my_vtag;
617 	vtag = (rtt << 32) | (((uint32_t)(stcb->sctp_ep->sctp_lport)) << 16) | (stcb->rport);
618 	probepoint = (((uint64_t)net->cwnd) << 32);
619 	rtt = net->rtt;
620 	if (net->cc_mod.rtcc.rtt_set_this_sack) {
621 		net->cc_mod.rtcc.rtt_set_this_sack = 0;
622 		bytes_for_this_rtt = net->cc_mod.rtcc.bw_bytes - net->cc_mod.rtcc.bw_bytes_at_last_rttc;
623 		net->cc_mod.rtcc.bw_bytes_at_last_rttc = net->cc_mod.rtcc.bw_bytes;
624 		if (net->rtt) {
625 			div = net->rtt / 1000;
626 			if (div) {
627 				inst_bw = bytes_for_this_rtt / div;
628 				inst_off = inst_bw >> bw_shift;
629 				if (inst_bw > nbw)
630 					inst_ind = SCTP_INST_GAINING;
631 				else if ((inst_bw + inst_off) < nbw)
632 					inst_ind = SCTP_INST_LOOSING;
633 				else
634 					inst_ind = SCTP_INST_NEUTRAL;
635 				probepoint |= ((0xb << 16) | inst_ind);
636 			} else {
637 				inst_ind = net->cc_mod.rtcc.last_inst_ind;
638 				inst_bw = bytes_for_this_rtt / (uint64_t)(net->rtt);
639 				/* Can't determine do not change */
640 				probepoint |= ((0xc << 16) | inst_ind);
641 			}
642 		} else {
643 			inst_ind = net->cc_mod.rtcc.last_inst_ind;
644 			inst_bw = bytes_for_this_rtt;
645 			/* Can't determine do not change */
646 			probepoint |= ((0xd << 16) | inst_ind);
647 		}
648 		SDT_PROBE5(sctp, cwnd, net, rttvar,
649 		    vtag,
650 		    ((nbw << 32) | inst_bw),
651 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | rtt),
652 		    net->flight_size,
653 		    probepoint);
654 	} else {
655 		/* No rtt measurement, use last one */
656 		inst_ind = net->cc_mod.rtcc.last_inst_ind;
657 	}
658 	bw_offset = net->cc_mod.rtcc.lbw >> bw_shift;
659 	if (nbw > net->cc_mod.rtcc.lbw + bw_offset) {
660 		ret = cc_bw_increase(stcb, net, nbw, vtag);
661 		goto out;
662 	}
663 	rtt_offset = net->cc_mod.rtcc.lbw_rtt >> SCTP_BASE_SYSCTL(sctp_rttvar_rtt);
664 	if (nbw < net->cc_mod.rtcc.lbw - bw_offset) {
665 		ret = cc_bw_decrease(stcb, net, nbw, rtt_offset, vtag, inst_ind);
666 		goto out;
667 	}
668 	/*
669 	 * If we reach here then we are in a situation where the bw stayed
670 	 * the same.
671 	 */
672 	ret = cc_bw_same(stcb, net, nbw, rtt_offset, vtag, inst_ind);
673 out:
674 	net->cc_mod.rtcc.last_inst_ind = inst_ind;
675 	return (ret);
676 }
677 
678 static void
sctp_cwnd_update_after_sack_common(struct sctp_tcb * stcb,struct sctp_association * asoc,int accum_moved,int reneged_all SCTP_UNUSED,int will_exit,int use_rtcc)679 sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb,
680     struct sctp_association *asoc,
681     int accum_moved, int reneged_all SCTP_UNUSED, int will_exit, int use_rtcc)
682 {
683 	struct sctp_nets *net;
684 	int old_cwnd __sdt_used;
685 	uint32_t t_ssthresh, incr;
686 	uint64_t t_ucwnd_sbw;
687 	uint64_t t_path_mptcp;
688 	uint64_t mptcp_like_alpha;
689 	uint32_t srtt;
690 	uint64_t max_path;
691 
692 	/* MT FIXME: Don't compute this over and over again */
693 	t_ssthresh = 0;
694 	t_ucwnd_sbw = 0;
695 	t_path_mptcp = 0;
696 	mptcp_like_alpha = 1;
697 	if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) ||
698 	    (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2) ||
699 	    (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_MPTCP)) {
700 		max_path = 0;
701 		TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
702 			t_ssthresh += net->ssthresh;
703 			/* lastsa>>3;  we don't need to devide ... */
704 			srtt = net->lastsa;
705 			if (srtt > 0) {
706 				uint64_t tmp;
707 
708 				t_ucwnd_sbw += (uint64_t)net->cwnd / (uint64_t)srtt;
709 				t_path_mptcp += (((uint64_t)net->cwnd) << SHIFT_MPTCP_MULTI_Z) /
710 				    (((uint64_t)net->mtu) * (uint64_t)srtt);
711 				tmp = (((uint64_t)net->cwnd) << SHIFT_MPTCP_MULTI_N) /
712 				    ((uint64_t)net->mtu * (uint64_t)(srtt * srtt));
713 				if (tmp > max_path) {
714 					max_path = tmp;
715 				}
716 			}
717 		}
718 		if (t_path_mptcp > 0) {
719 			mptcp_like_alpha = max_path / (t_path_mptcp * t_path_mptcp);
720 		} else {
721 			mptcp_like_alpha = 1;
722 		}
723 	}
724 	if (t_ssthresh == 0) {
725 		t_ssthresh = 1;
726 	}
727 	if (t_ucwnd_sbw == 0) {
728 		t_ucwnd_sbw = 1;
729 	}
730 	/******************************/
731 	/* update cwnd and Early FR   */
732 	/******************************/
733 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
734 #ifdef JANA_CMT_FAST_RECOVERY
735 		/*
736 		 * CMT fast recovery code. Need to debug.
737 		 */
738 		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
739 			if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) ||
740 			    SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) {
741 				net->will_exit_fast_recovery = 1;
742 			}
743 		}
744 #endif
745 		/* if nothing was acked on this destination skip it */
746 		if (net->net_ack == 0) {
747 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
748 				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
749 			}
750 			continue;
751 		}
752 #ifdef JANA_CMT_FAST_RECOVERY
753 		/*
754 		 * CMT fast recovery code
755 		 */
756 		/*
757 		 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery
758 		 * && net->will_exit_fast_recovery == 0) { @@@ Do something
759 		 * } else if (sctp_cmt_on_off == 0 &&
760 		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
761 		 */
762 #endif
763 
764 		if (asoc->fast_retran_loss_recovery &&
765 		    (will_exit == 0) &&
766 		    (asoc->sctp_cmt_on_off == 0)) {
767 			/*
768 			 * If we are in loss recovery we skip any cwnd
769 			 * update
770 			 */
771 			return;
772 		}
773 		/*
774 		 * Did any measurements go on for this network?
775 		 */
776 		if (use_rtcc && (net->cc_mod.rtcc.tls_needs_set > 0)) {
777 			uint64_t nbw;
778 
779 			/*
780 			 * At this point our bw_bytes has been updated by
781 			 * incoming sack information.
782 			 *
783 			 * But our bw may not yet be set.
784 			 *
785 			 */
786 			if ((net->cc_mod.rtcc.new_tot_time / 1000) > 0) {
787 				nbw = net->cc_mod.rtcc.bw_bytes / (net->cc_mod.rtcc.new_tot_time / 1000);
788 			} else {
789 				nbw = net->cc_mod.rtcc.bw_bytes;
790 			}
791 			if (net->cc_mod.rtcc.lbw) {
792 				if (cc_bw_limit(stcb, net, nbw)) {
793 					/* Hold here, no update */
794 					continue;
795 				}
796 			} else {
797 				uint64_t vtag __sdt_used, probepoint __sdt_used;
798 
799 				probepoint = (((uint64_t)net->cwnd) << 32);
800 				probepoint |= ((0xa << 16) | 0);
801 				vtag = (net->rtt << 32) |
802 				    (((uint32_t)(stcb->sctp_ep->sctp_lport)) << 16) |
803 				    (stcb->rport);
804 
805 				SDT_PROBE5(sctp, cwnd, net, rttvar,
806 				    vtag,
807 				    nbw,
808 				    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
809 				    net->flight_size,
810 				    probepoint);
811 				net->cc_mod.rtcc.lbw = nbw;
812 				net->cc_mod.rtcc.lbw_rtt = net->rtt;
813 				if (net->cc_mod.rtcc.rtt_set_this_sack) {
814 					net->cc_mod.rtcc.rtt_set_this_sack = 0;
815 					net->cc_mod.rtcc.bw_bytes_at_last_rttc = net->cc_mod.rtcc.bw_bytes;
816 				}
817 			}
818 		}
819 		/*
820 		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
821 		 * moved.
822 		 */
823 		if (accum_moved ||
824 		    ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) {
825 			/* If the cumulative ack moved we can proceed */
826 			if (net->cwnd <= net->ssthresh) {
827 				/* We are in slow start */
828 				if (net->flight_size + net->net_ack >= net->cwnd) {
829 					uint32_t limit;
830 
831 					old_cwnd = net->cwnd;
832 					switch (asoc->sctp_cmt_on_off) {
833 					case SCTP_CMT_RPV1:
834 						limit = (uint32_t)(((uint64_t)net->mtu *
835 						    (uint64_t)SCTP_BASE_SYSCTL(sctp_L2_abc_variable) *
836 						    (uint64_t)net->ssthresh) /
837 						    (uint64_t)t_ssthresh);
838 						incr = (uint32_t)(((uint64_t)net->net_ack *
839 						    (uint64_t)net->ssthresh) /
840 						    (uint64_t)t_ssthresh);
841 						if (incr > limit) {
842 							incr = limit;
843 						}
844 						if (incr == 0) {
845 							incr = 1;
846 						}
847 						break;
848 					case SCTP_CMT_RPV2:
849 						/*
850 						 * lastsa>>3;  we don't need
851 						 * to divide ...
852 						 */
853 						srtt = net->lastsa;
854 						if (srtt == 0) {
855 							srtt = 1;
856 						}
857 						limit = (uint32_t)(((uint64_t)net->mtu *
858 						    (uint64_t)SCTP_BASE_SYSCTL(sctp_L2_abc_variable) *
859 						    (uint64_t)net->cwnd) /
860 						    ((uint64_t)srtt * t_ucwnd_sbw));
861 						/* INCREASE FACTOR */
862 						incr = (uint32_t)(((uint64_t)net->net_ack *
863 						    (uint64_t)net->cwnd) /
864 						    ((uint64_t)srtt * t_ucwnd_sbw));
865 						/* INCREASE FACTOR */
866 						if (incr > limit) {
867 							incr = limit;
868 						}
869 						if (incr == 0) {
870 							incr = 1;
871 						}
872 						break;
873 					case SCTP_CMT_MPTCP:
874 						limit = (uint32_t)(((uint64_t)net->mtu *
875 						    mptcp_like_alpha *
876 						    (uint64_t)SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) >>
877 						    SHIFT_MPTCP_MULTI);
878 						incr = (uint32_t)(((uint64_t)net->net_ack *
879 						    mptcp_like_alpha) >>
880 						    SHIFT_MPTCP_MULTI);
881 						if (incr > limit) {
882 							incr = limit;
883 						}
884 						if (incr > net->net_ack) {
885 							incr = net->net_ack;
886 						}
887 						if (incr > net->mtu) {
888 							incr = net->mtu;
889 						}
890 						break;
891 					default:
892 						incr = net->net_ack;
893 						if (incr > net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) {
894 							incr = net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable);
895 						}
896 						break;
897 					}
898 					net->cwnd += incr;
899 					sctp_enforce_cwnd_limit(asoc, net);
900 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
901 						sctp_log_cwnd(stcb, net, incr,
902 						    SCTP_CWND_LOG_FROM_SS);
903 					}
904 					SDT_PROBE5(sctp, cwnd, net, ack,
905 					    stcb->asoc.my_vtag,
906 					    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
907 					    net,
908 					    old_cwnd, net->cwnd);
909 				} else {
910 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
911 						sctp_log_cwnd(stcb, net, net->net_ack,
912 						    SCTP_CWND_LOG_NOADV_SS);
913 					}
914 				}
915 			} else {
916 				/* We are in congestion avoidance */
917 				/*
918 				 * Add to pba
919 				 */
920 				net->partial_bytes_acked += net->net_ack;
921 
922 				if ((net->flight_size + net->net_ack >= net->cwnd) &&
923 				    (net->partial_bytes_acked >= net->cwnd)) {
924 					net->partial_bytes_acked -= net->cwnd;
925 					old_cwnd = net->cwnd;
926 					switch (asoc->sctp_cmt_on_off) {
927 					case SCTP_CMT_RPV1:
928 						incr = (uint32_t)(((uint64_t)net->mtu *
929 						    (uint64_t)net->ssthresh) /
930 						    (uint64_t)t_ssthresh);
931 						if (incr == 0) {
932 							incr = 1;
933 						}
934 						break;
935 					case SCTP_CMT_RPV2:
936 						/*
937 						 * lastsa>>3;  we don't need
938 						 * to divide ...
939 						 */
940 						srtt = net->lastsa;
941 						if (srtt == 0) {
942 							srtt = 1;
943 						}
944 						incr = (uint32_t)((uint64_t)net->mtu *
945 						    (uint64_t)net->cwnd /
946 						    ((uint64_t)srtt *
947 						    t_ucwnd_sbw));
948 						/* INCREASE FACTOR */
949 						if (incr == 0) {
950 							incr = 1;
951 						}
952 						break;
953 					case SCTP_CMT_MPTCP:
954 						incr = (uint32_t)((mptcp_like_alpha *
955 						    (uint64_t)net->cwnd) >>
956 						    SHIFT_MPTCP_MULTI);
957 						if (incr > net->mtu) {
958 							incr = net->mtu;
959 						}
960 						break;
961 					default:
962 						incr = net->mtu;
963 						break;
964 					}
965 					net->cwnd += incr;
966 					sctp_enforce_cwnd_limit(asoc, net);
967 					SDT_PROBE5(sctp, cwnd, net, ack,
968 					    stcb->asoc.my_vtag,
969 					    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
970 					    net,
971 					    old_cwnd, net->cwnd);
972 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
973 						sctp_log_cwnd(stcb, net, net->mtu,
974 						    SCTP_CWND_LOG_FROM_CA);
975 					}
976 				} else {
977 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
978 						sctp_log_cwnd(stcb, net, net->net_ack,
979 						    SCTP_CWND_LOG_NOADV_CA);
980 					}
981 				}
982 			}
983 		} else {
984 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
985 				sctp_log_cwnd(stcb, net, net->mtu,
986 				    SCTP_CWND_LOG_NO_CUMACK);
987 			}
988 		}
989 	}
990 }
991 
992 static void
sctp_cwnd_update_exit_pf_common(struct sctp_tcb * stcb,struct sctp_nets * net)993 sctp_cwnd_update_exit_pf_common(struct sctp_tcb *stcb, struct sctp_nets *net)
994 {
995 	int old_cwnd __sdt_used;
996 
997 	old_cwnd = net->cwnd;
998 	net->cwnd = net->mtu;
999 	SDT_PROBE5(sctp, cwnd, net, ack,
1000 	    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
1001 	    old_cwnd, net->cwnd);
1002 	SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
1003 	    (void *)net, net->cwnd);
1004 }
1005 
1006 static void
sctp_cwnd_update_after_timeout(struct sctp_tcb * stcb,struct sctp_nets * net)1007 sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net)
1008 {
1009 	int old_cwnd = net->cwnd;
1010 	uint32_t t_ssthresh, t_cwnd;
1011 	uint64_t t_ucwnd_sbw;
1012 
1013 	/* MT FIXME: Don't compute this over and over again */
1014 	t_ssthresh = 0;
1015 	t_cwnd = 0;
1016 	if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) ||
1017 	    (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) {
1018 		struct sctp_nets *lnet;
1019 		uint32_t srtt;
1020 
1021 		t_ucwnd_sbw = 0;
1022 		TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
1023 			t_ssthresh += lnet->ssthresh;
1024 			t_cwnd += lnet->cwnd;
1025 			srtt = lnet->lastsa;
1026 			/* lastsa>>3;  we don't need to divide ... */
1027 			if (srtt > 0) {
1028 				t_ucwnd_sbw += (uint64_t)lnet->cwnd / (uint64_t)srtt;
1029 			}
1030 		}
1031 		if (t_ssthresh < 1) {
1032 			t_ssthresh = 1;
1033 		}
1034 		if (t_ucwnd_sbw < 1) {
1035 			t_ucwnd_sbw = 1;
1036 		}
1037 		if (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) {
1038 			net->ssthresh = (uint32_t)(((uint64_t)4 *
1039 			    (uint64_t)net->mtu *
1040 			    (uint64_t)net->ssthresh) /
1041 			    (uint64_t)t_ssthresh);
1042 		} else {
1043 			uint64_t cc_delta;
1044 
1045 			srtt = net->lastsa;
1046 			/* lastsa>>3;  we don't need to divide ... */
1047 			if (srtt == 0) {
1048 				srtt = 1;
1049 			}
1050 			cc_delta = t_ucwnd_sbw * (uint64_t)srtt / 2;
1051 			if (cc_delta < t_cwnd) {
1052 				net->ssthresh = (uint32_t)((uint64_t)t_cwnd - cc_delta);
1053 			} else {
1054 				net->ssthresh = net->mtu;
1055 			}
1056 		}
1057 		if ((net->cwnd > t_cwnd / 2) &&
1058 		    (net->ssthresh < net->cwnd - t_cwnd / 2)) {
1059 			net->ssthresh = net->cwnd - t_cwnd / 2;
1060 		}
1061 		if (net->ssthresh < net->mtu) {
1062 			net->ssthresh = net->mtu;
1063 		}
1064 	} else {
1065 		net->ssthresh = max(net->cwnd / 2, 4 * net->mtu);
1066 	}
1067 	net->cwnd = net->mtu;
1068 	net->partial_bytes_acked = 0;
1069 	SDT_PROBE5(sctp, cwnd, net, to,
1070 	    stcb->asoc.my_vtag,
1071 	    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
1072 	    net,
1073 	    old_cwnd, net->cwnd);
1074 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1075 		sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
1076 	}
1077 }
1078 
1079 static void
sctp_cwnd_update_after_ecn_echo_common(struct sctp_tcb * stcb,struct sctp_nets * net,int in_window,int num_pkt_lost,int use_rtcc)1080 sctp_cwnd_update_after_ecn_echo_common(struct sctp_tcb *stcb, struct sctp_nets *net,
1081     int in_window, int num_pkt_lost, int use_rtcc)
1082 {
1083 	int old_cwnd = net->cwnd;
1084 
1085 	if ((use_rtcc) && (net->lan_type == SCTP_LAN_LOCAL) && (net->cc_mod.rtcc.use_dccc_ecn)) {
1086 		/* Data center Congestion Control */
1087 		if (in_window == 0) {
1088 			/*
1089 			 * Go to CA with the cwnd at the point we sent the
1090 			 * TSN that was marked with a CE.
1091 			 */
1092 			if (net->ecn_prev_cwnd < net->cwnd) {
1093 				/* Restore to prev cwnd */
1094 				net->cwnd = net->ecn_prev_cwnd - (net->mtu * num_pkt_lost);
1095 			} else {
1096 				/* Just cut in 1/2 */
1097 				net->cwnd /= 2;
1098 			}
1099 			/* Drop to CA */
1100 			net->ssthresh = net->cwnd - (num_pkt_lost * net->mtu);
1101 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1102 				sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
1103 			}
1104 		} else {
1105 			/*
1106 			 * Further tuning down required over the drastic
1107 			 * original cut
1108 			 */
1109 			net->ssthresh -= (net->mtu * num_pkt_lost);
1110 			net->cwnd -= (net->mtu * num_pkt_lost);
1111 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1112 				sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
1113 			}
1114 		}
1115 		SCTP_STAT_INCR(sctps_ecnereducedcwnd);
1116 	} else {
1117 		if (in_window == 0) {
1118 			SCTP_STAT_INCR(sctps_ecnereducedcwnd);
1119 			net->ssthresh = net->cwnd / 2;
1120 			if (net->ssthresh < net->mtu) {
1121 				net->ssthresh = net->mtu;
1122 				/*
1123 				 * here back off the timer as well, to slow
1124 				 * us down
1125 				 */
1126 				net->RTO <<= 1;
1127 			}
1128 			net->cwnd = net->ssthresh;
1129 			SDT_PROBE5(sctp, cwnd, net, ecn,
1130 			    stcb->asoc.my_vtag,
1131 			    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
1132 			    net,
1133 			    old_cwnd, net->cwnd);
1134 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1135 				sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
1136 			}
1137 		}
1138 	}
1139 
1140 }
1141 
1142 static void
sctp_cwnd_update_after_packet_dropped(struct sctp_tcb * stcb,struct sctp_nets * net,struct sctp_pktdrop_chunk * cp,uint32_t * bottle_bw,uint32_t * on_queue)1143 sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb,
1144     struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
1145     uint32_t *bottle_bw, uint32_t *on_queue)
1146 {
1147 	uint32_t bw_avail;
1148 	unsigned int incr;
1149 	int old_cwnd = net->cwnd;
1150 
1151 	/* get bottle neck bw */
1152 	*bottle_bw = ntohl(cp->bottle_bw);
1153 	/* and whats on queue */
1154 	*on_queue = ntohl(cp->current_onq);
1155 	/*
1156 	 * adjust the on-queue if our flight is more it could be that the
1157 	 * router has not yet gotten data "in-flight" to it
1158 	 */
1159 	if (*on_queue < net->flight_size) {
1160 		*on_queue = net->flight_size;
1161 	}
1162 	/* rtt is measured in micro seconds, bottle_bw in bytes per second */
1163 	bw_avail = (uint32_t)(((uint64_t)(*bottle_bw) * net->rtt) / (uint64_t)1000000);
1164 	if (bw_avail > *bottle_bw) {
1165 		/*
1166 		 * Cap the growth to no more than the bottle neck. This can
1167 		 * happen as RTT slides up due to queues. It also means if
1168 		 * you have more than a 1 second RTT with a empty queue you
1169 		 * will be limited to the bottle_bw per second no matter if
1170 		 * other points have 1/2 the RTT and you could get more
1171 		 * out...
1172 		 */
1173 		bw_avail = *bottle_bw;
1174 	}
1175 	if (*on_queue > bw_avail) {
1176 		/*
1177 		 * No room for anything else don't allow anything else to be
1178 		 * "added to the fire".
1179 		 */
1180 		int seg_inflight, seg_onqueue, my_portion;
1181 
1182 		net->partial_bytes_acked = 0;
1183 		/* how much are we over queue size? */
1184 		incr = *on_queue - bw_avail;
1185 		if (stcb->asoc.seen_a_sack_this_pkt) {
1186 			/*
1187 			 * undo any cwnd adjustment that the sack might have
1188 			 * made
1189 			 */
1190 			net->cwnd = net->prev_cwnd;
1191 		}
1192 		/* Now how much of that is mine? */
1193 		seg_inflight = net->flight_size / net->mtu;
1194 		seg_onqueue = *on_queue / net->mtu;
1195 		my_portion = (incr * seg_inflight) / seg_onqueue;
1196 
1197 		/* Have I made an adjustment already */
1198 		if (net->cwnd > net->flight_size) {
1199 			/*
1200 			 * for this flight I made an adjustment we need to
1201 			 * decrease the portion by a share our previous
1202 			 * adjustment.
1203 			 */
1204 			int diff_adj;
1205 
1206 			diff_adj = net->cwnd - net->flight_size;
1207 			if (diff_adj > my_portion)
1208 				my_portion = 0;
1209 			else
1210 				my_portion -= diff_adj;
1211 		}
1212 		/*
1213 		 * back down to the previous cwnd (assume we have had a sack
1214 		 * before this packet). minus what ever portion of the
1215 		 * overage is my fault.
1216 		 */
1217 		net->cwnd -= my_portion;
1218 
1219 		/* we will NOT back down more than 1 MTU */
1220 		if (net->cwnd <= net->mtu) {
1221 			net->cwnd = net->mtu;
1222 		}
1223 		/* force into CA */
1224 		net->ssthresh = net->cwnd - 1;
1225 	} else {
1226 		/*
1227 		 * Take 1/4 of the space left or max burst up .. whichever
1228 		 * is less.
1229 		 */
1230 		incr = (bw_avail - *on_queue) >> 2;
1231 		if ((stcb->asoc.max_burst > 0) &&
1232 		    (stcb->asoc.max_burst * net->mtu < incr)) {
1233 			incr = stcb->asoc.max_burst * net->mtu;
1234 		}
1235 		net->cwnd += incr;
1236 	}
1237 	if (net->cwnd > bw_avail) {
1238 		/* We can't exceed the pipe size */
1239 		net->cwnd = bw_avail;
1240 	}
1241 	if (net->cwnd < net->mtu) {
1242 		/* We always have 1 MTU */
1243 		net->cwnd = net->mtu;
1244 	}
1245 	sctp_enforce_cwnd_limit(&stcb->asoc, net);
1246 	if (net->cwnd - old_cwnd != 0) {
1247 		/* log only changes */
1248 		SDT_PROBE5(sctp, cwnd, net, pd,
1249 		    stcb->asoc.my_vtag,
1250 		    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
1251 		    net,
1252 		    old_cwnd, net->cwnd);
1253 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1254 			sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
1255 			    SCTP_CWND_LOG_FROM_SAT);
1256 		}
1257 	}
1258 }
1259 
1260 static void
sctp_cwnd_update_after_output(struct sctp_tcb * stcb,struct sctp_nets * net,int burst_limit)1261 sctp_cwnd_update_after_output(struct sctp_tcb *stcb,
1262     struct sctp_nets *net, int burst_limit)
1263 {
1264 	int old_cwnd = net->cwnd;
1265 
1266 	if (net->ssthresh < net->cwnd)
1267 		net->ssthresh = net->cwnd;
1268 	if (burst_limit) {
1269 		net->cwnd = (net->flight_size + (burst_limit * net->mtu));
1270 		sctp_enforce_cwnd_limit(&stcb->asoc, net);
1271 		SDT_PROBE5(sctp, cwnd, net, bl,
1272 		    stcb->asoc.my_vtag,
1273 		    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
1274 		    net,
1275 		    old_cwnd, net->cwnd);
1276 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1277 			sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST);
1278 		}
1279 	}
1280 }
1281 
1282 static void
sctp_cwnd_update_after_sack(struct sctp_tcb * stcb,struct sctp_association * asoc,int accum_moved,int reneged_all,int will_exit)1283 sctp_cwnd_update_after_sack(struct sctp_tcb *stcb,
1284     struct sctp_association *asoc,
1285     int accum_moved, int reneged_all, int will_exit)
1286 {
1287 	/* Passing a zero argument in last disables the rtcc algorithm */
1288 	sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 0);
1289 }
1290 
1291 static void
sctp_cwnd_update_after_ecn_echo(struct sctp_tcb * stcb,struct sctp_nets * net,int in_window,int num_pkt_lost)1292 sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net,
1293     int in_window, int num_pkt_lost)
1294 {
1295 	/* Passing a zero argument in last disables the rtcc algorithm */
1296 	sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 0);
1297 }
1298 
1299 /* Here starts the RTCCVAR type CC invented by RRS which
1300  * is a slight mod to RFC2581. We reuse a common routine or
1301  * two since these algorithms are so close and need to
1302  * remain the same.
1303  */
1304 static void
sctp_cwnd_update_rtcc_after_ecn_echo(struct sctp_tcb * stcb,struct sctp_nets * net,int in_window,int num_pkt_lost)1305 sctp_cwnd_update_rtcc_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net,
1306     int in_window, int num_pkt_lost)
1307 {
1308 	sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 1);
1309 }
1310 
1311 static void
sctp_cwnd_update_rtcc_tsn_acknowledged(struct sctp_nets * net,struct sctp_tmit_chunk * tp1)1312 sctp_cwnd_update_rtcc_tsn_acknowledged(struct sctp_nets *net,
1313     struct sctp_tmit_chunk *tp1)
1314 {
1315 	net->cc_mod.rtcc.bw_bytes += tp1->send_size;
1316 }
1317 
1318 static void
sctp_cwnd_prepare_rtcc_net_for_sack(struct sctp_tcb * stcb SCTP_UNUSED,struct sctp_nets * net)1319 sctp_cwnd_prepare_rtcc_net_for_sack(struct sctp_tcb *stcb SCTP_UNUSED,
1320     struct sctp_nets *net)
1321 {
1322 	if (net->cc_mod.rtcc.tls_needs_set > 0) {
1323 		/* We had a bw measurement going on */
1324 		struct timeval ltls;
1325 
1326 		SCTP_GETPTIME_TIMEVAL(&ltls);
1327 		timevalsub(&ltls, &net->cc_mod.rtcc.tls);
1328 		net->cc_mod.rtcc.new_tot_time = (ltls.tv_sec * 1000000) + ltls.tv_usec;
1329 	}
1330 }
1331 
1332 static void
sctp_cwnd_new_rtcc_transmission_begins(struct sctp_tcb * stcb,struct sctp_nets * net)1333 sctp_cwnd_new_rtcc_transmission_begins(struct sctp_tcb *stcb,
1334     struct sctp_nets *net)
1335 {
1336 	uint64_t vtag __sdt_used, probepoint __sdt_used;
1337 
1338 	if (net->cc_mod.rtcc.lbw) {
1339 		/* Clear the old bw.. we went to 0 in-flight */
1340 		vtag = (net->rtt << 32) | (((uint32_t)(stcb->sctp_ep->sctp_lport)) << 16) |
1341 		    (stcb->rport);
1342 		probepoint = (((uint64_t)net->cwnd) << 32);
1343 		/* Probe point 8 */
1344 		probepoint |= ((8 << 16) | 0);
1345 		SDT_PROBE5(sctp, cwnd, net, rttvar,
1346 		    vtag,
1347 		    ((net->cc_mod.rtcc.lbw << 32) | 0),
1348 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
1349 		    net->flight_size,
1350 		    probepoint);
1351 		net->cc_mod.rtcc.lbw_rtt = 0;
1352 		net->cc_mod.rtcc.cwnd_at_bw_set = 0;
1353 		net->cc_mod.rtcc.lbw = 0;
1354 		net->cc_mod.rtcc.bw_bytes_at_last_rttc = 0;
1355 		net->cc_mod.rtcc.vol_reduce = 0;
1356 		net->cc_mod.rtcc.bw_tot_time = 0;
1357 		net->cc_mod.rtcc.bw_bytes = 0;
1358 		net->cc_mod.rtcc.tls_needs_set = 0;
1359 		if (net->cc_mod.rtcc.steady_step) {
1360 			net->cc_mod.rtcc.vol_reduce = 0;
1361 			net->cc_mod.rtcc.step_cnt = 0;
1362 			net->cc_mod.rtcc.last_step_state = 0;
1363 		}
1364 		if (net->cc_mod.rtcc.ret_from_eq) {
1365 			/* less aggressive one - reset cwnd too */
1366 			uint32_t cwnd_in_mtu, cwnd;
1367 
1368 			cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd);
1369 			if (cwnd_in_mtu == 0) {
1370 				/*
1371 				 * Using 0 means that the value of RFC 4960
1372 				 * is used.
1373 				 */
1374 				cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
1375 			} else {
1376 				/*
1377 				 * We take the minimum of the burst limit
1378 				 * and the initial congestion window.
1379 				 */
1380 				if ((stcb->asoc.max_burst > 0) && (cwnd_in_mtu > stcb->asoc.max_burst))
1381 					cwnd_in_mtu = stcb->asoc.max_burst;
1382 				cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu;
1383 			}
1384 			if (net->cwnd > cwnd) {
1385 				/*
1386 				 * Only set if we are not a timeout (i.e.
1387 				 * down to 1 mtu)
1388 				 */
1389 				net->cwnd = cwnd;
1390 			}
1391 		}
1392 	}
1393 }
1394 
1395 static void
sctp_set_rtcc_initial_cc_param(struct sctp_tcb * stcb,struct sctp_nets * net)1396 sctp_set_rtcc_initial_cc_param(struct sctp_tcb *stcb,
1397     struct sctp_nets *net)
1398 {
1399 	uint64_t vtag __sdt_used, probepoint __sdt_used;
1400 
1401 	sctp_set_initial_cc_param(stcb, net);
1402 	stcb->asoc.use_precise_time = 1;
1403 	probepoint = (((uint64_t)net->cwnd) << 32);
1404 	probepoint |= ((9 << 16) | 0);
1405 	vtag = (net->rtt << 32) |
1406 	    (((uint32_t)(stcb->sctp_ep->sctp_lport)) << 16) |
1407 	    (stcb->rport);
1408 	SDT_PROBE5(sctp, cwnd, net, rttvar,
1409 	    vtag,
1410 	    0,
1411 	    0,
1412 	    0,
1413 	    probepoint);
1414 	net->cc_mod.rtcc.lbw_rtt = 0;
1415 	net->cc_mod.rtcc.cwnd_at_bw_set = 0;
1416 	net->cc_mod.rtcc.vol_reduce = 0;
1417 	net->cc_mod.rtcc.lbw = 0;
1418 	net->cc_mod.rtcc.vol_reduce = 0;
1419 	net->cc_mod.rtcc.bw_bytes_at_last_rttc = 0;
1420 	net->cc_mod.rtcc.bw_tot_time = 0;
1421 	net->cc_mod.rtcc.bw_bytes = 0;
1422 	net->cc_mod.rtcc.tls_needs_set = 0;
1423 	net->cc_mod.rtcc.ret_from_eq = SCTP_BASE_SYSCTL(sctp_rttvar_eqret);
1424 	net->cc_mod.rtcc.steady_step = SCTP_BASE_SYSCTL(sctp_steady_step);
1425 	net->cc_mod.rtcc.use_dccc_ecn = SCTP_BASE_SYSCTL(sctp_use_dccc_ecn);
1426 	net->cc_mod.rtcc.step_cnt = 0;
1427 	net->cc_mod.rtcc.last_step_state = 0;
1428 }
1429 
1430 static int
sctp_cwnd_rtcc_socket_option(struct sctp_tcb * stcb,int setorget,struct sctp_cc_option * cc_opt)1431 sctp_cwnd_rtcc_socket_option(struct sctp_tcb *stcb, int setorget,
1432     struct sctp_cc_option *cc_opt)
1433 {
1434 	struct sctp_nets *net;
1435 
1436 	if (setorget == 1) {
1437 		/* a set */
1438 		if (cc_opt->option == SCTP_CC_OPT_RTCC_SETMODE) {
1439 			if ((cc_opt->aid_value.assoc_value != 0) &&
1440 			    (cc_opt->aid_value.assoc_value != 1)) {
1441 				return (EINVAL);
1442 			}
1443 			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
1444 				net->cc_mod.rtcc.ret_from_eq = cc_opt->aid_value.assoc_value;
1445 			}
1446 		} else if (cc_opt->option == SCTP_CC_OPT_USE_DCCC_ECN) {
1447 			if ((cc_opt->aid_value.assoc_value != 0) &&
1448 			    (cc_opt->aid_value.assoc_value != 1)) {
1449 				return (EINVAL);
1450 			}
1451 			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
1452 				net->cc_mod.rtcc.use_dccc_ecn = cc_opt->aid_value.assoc_value;
1453 			}
1454 		} else if (cc_opt->option == SCTP_CC_OPT_STEADY_STEP) {
1455 			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
1456 				net->cc_mod.rtcc.steady_step = cc_opt->aid_value.assoc_value;
1457 			}
1458 		} else {
1459 			return (EINVAL);
1460 		}
1461 	} else {
1462 		/* a get */
1463 		if (cc_opt->option == SCTP_CC_OPT_RTCC_SETMODE) {
1464 			net = TAILQ_FIRST(&stcb->asoc.nets);
1465 			if (net == NULL) {
1466 				return (EFAULT);
1467 			}
1468 			cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.ret_from_eq;
1469 		} else if (cc_opt->option == SCTP_CC_OPT_USE_DCCC_ECN) {
1470 			net = TAILQ_FIRST(&stcb->asoc.nets);
1471 			if (net == NULL) {
1472 				return (EFAULT);
1473 			}
1474 			cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.use_dccc_ecn;
1475 		} else if (cc_opt->option == SCTP_CC_OPT_STEADY_STEP) {
1476 			net = TAILQ_FIRST(&stcb->asoc.nets);
1477 			if (net == NULL) {
1478 				return (EFAULT);
1479 			}
1480 			cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.steady_step;
1481 		} else {
1482 			return (EINVAL);
1483 		}
1484 	}
1485 	return (0);
1486 }
1487 
1488 static void
sctp_cwnd_update_rtcc_packet_transmitted(struct sctp_tcb * stcb SCTP_UNUSED,struct sctp_nets * net)1489 sctp_cwnd_update_rtcc_packet_transmitted(struct sctp_tcb *stcb SCTP_UNUSED,
1490     struct sctp_nets *net)
1491 {
1492 	if (net->cc_mod.rtcc.tls_needs_set == 0) {
1493 		SCTP_GETPTIME_TIMEVAL(&net->cc_mod.rtcc.tls);
1494 		net->cc_mod.rtcc.tls_needs_set = 2;
1495 	}
1496 }
1497 
1498 static void
sctp_cwnd_update_rtcc_after_sack(struct sctp_tcb * stcb,struct sctp_association * asoc,int accum_moved,int reneged_all,int will_exit)1499 sctp_cwnd_update_rtcc_after_sack(struct sctp_tcb *stcb,
1500     struct sctp_association *asoc,
1501     int accum_moved, int reneged_all, int will_exit)
1502 {
1503 	/* Passing a one argument at the last enables the rtcc algorithm */
1504 	sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 1);
1505 }
1506 
1507 static void
sctp_rtt_rtcc_calculated(struct sctp_tcb * stcb SCTP_UNUSED,struct sctp_nets * net,struct timeval * now SCTP_UNUSED)1508 sctp_rtt_rtcc_calculated(struct sctp_tcb *stcb SCTP_UNUSED,
1509     struct sctp_nets *net,
1510     struct timeval *now SCTP_UNUSED)
1511 {
1512 	net->cc_mod.rtcc.rtt_set_this_sack = 1;
1513 }
1514 
1515 /* Here starts Sally Floyds HS-TCP */
1516 
1517 struct sctp_hs_raise_drop {
1518 	int32_t cwnd;
1519 	int8_t increase;
1520 	int8_t drop_percent;
1521 };
1522 
1523 #define SCTP_HS_TABLE_SIZE 73
1524 
1525 static const struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = {
1526 	{38, 1, 50},		/* 0   */
1527 	{118, 2, 44},		/* 1   */
1528 	{221, 3, 41},		/* 2   */
1529 	{347, 4, 38},		/* 3   */
1530 	{495, 5, 37},		/* 4   */
1531 	{663, 6, 35},		/* 5   */
1532 	{851, 7, 34},		/* 6   */
1533 	{1058, 8, 33},		/* 7   */
1534 	{1284, 9, 32},		/* 8   */
1535 	{1529, 10, 31},		/* 9   */
1536 	{1793, 11, 30},		/* 10  */
1537 	{2076, 12, 29},		/* 11  */
1538 	{2378, 13, 28},		/* 12  */
1539 	{2699, 14, 28},		/* 13  */
1540 	{3039, 15, 27},		/* 14  */
1541 	{3399, 16, 27},		/* 15  */
1542 	{3778, 17, 26},		/* 16  */
1543 	{4177, 18, 26},		/* 17  */
1544 	{4596, 19, 25},		/* 18  */
1545 	{5036, 20, 25},		/* 19  */
1546 	{5497, 21, 24},		/* 20  */
1547 	{5979, 22, 24},		/* 21  */
1548 	{6483, 23, 23},		/* 22  */
1549 	{7009, 24, 23},		/* 23  */
1550 	{7558, 25, 22},		/* 24  */
1551 	{8130, 26, 22},		/* 25  */
1552 	{8726, 27, 22},		/* 26  */
1553 	{9346, 28, 21},		/* 27  */
1554 	{9991, 29, 21},		/* 28  */
1555 	{10661, 30, 21},	/* 29  */
1556 	{11358, 31, 20},	/* 30  */
1557 	{12082, 32, 20},	/* 31  */
1558 	{12834, 33, 20},	/* 32  */
1559 	{13614, 34, 19},	/* 33  */
1560 	{14424, 35, 19},	/* 34  */
1561 	{15265, 36, 19},	/* 35  */
1562 	{16137, 37, 19},	/* 36  */
1563 	{17042, 38, 18},	/* 37  */
1564 	{17981, 39, 18},	/* 38  */
1565 	{18955, 40, 18},	/* 39  */
1566 	{19965, 41, 17},	/* 40  */
1567 	{21013, 42, 17},	/* 41  */
1568 	{22101, 43, 17},	/* 42  */
1569 	{23230, 44, 17},	/* 43  */
1570 	{24402, 45, 16},	/* 44  */
1571 	{25618, 46, 16},	/* 45  */
1572 	{26881, 47, 16},	/* 46  */
1573 	{28193, 48, 16},	/* 47  */
1574 	{29557, 49, 15},	/* 48  */
1575 	{30975, 50, 15},	/* 49  */
1576 	{32450, 51, 15},	/* 50  */
1577 	{33986, 52, 15},	/* 51  */
1578 	{35586, 53, 14},	/* 52  */
1579 	{37253, 54, 14},	/* 53  */
1580 	{38992, 55, 14},	/* 54  */
1581 	{40808, 56, 14},	/* 55  */
1582 	{42707, 57, 13},	/* 56  */
1583 	{44694, 58, 13},	/* 57  */
1584 	{46776, 59, 13},	/* 58  */
1585 	{48961, 60, 13},	/* 59  */
1586 	{51258, 61, 13},	/* 60  */
1587 	{53677, 62, 12},	/* 61  */
1588 	{56230, 63, 12},	/* 62  */
1589 	{58932, 64, 12},	/* 63  */
1590 	{61799, 65, 12},	/* 64  */
1591 	{64851, 66, 11},	/* 65  */
1592 	{68113, 67, 11},	/* 66  */
1593 	{71617, 68, 11},	/* 67  */
1594 	{75401, 69, 10},	/* 68  */
1595 	{79517, 70, 10},	/* 69  */
1596 	{84035, 71, 10},	/* 70  */
1597 	{89053, 72, 10},	/* 71  */
1598 	{94717, 73, 9}		/* 72  */
1599 };
1600 
1601 static void
sctp_hs_cwnd_increase(struct sctp_tcb * stcb,struct sctp_nets * net)1602 sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net)
1603 {
1604 	int cur_val, i, indx, incr;
1605 	int old_cwnd = net->cwnd;
1606 
1607 	cur_val = net->cwnd >> 10;
1608 	indx = SCTP_HS_TABLE_SIZE - 1;
1609 
1610 	if (cur_val < sctp_cwnd_adjust[0].cwnd) {
1611 		/* normal mode */
1612 		if (net->net_ack > net->mtu) {
1613 			net->cwnd += net->mtu;
1614 		} else {
1615 			net->cwnd += net->net_ack;
1616 		}
1617 	} else {
1618 		for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) {
1619 			if (cur_val < sctp_cwnd_adjust[i].cwnd) {
1620 				indx = i;
1621 				break;
1622 			}
1623 		}
1624 		net->last_hs_used = indx;
1625 		incr = (((int32_t)sctp_cwnd_adjust[indx].increase) << 10);
1626 		net->cwnd += incr;
1627 	}
1628 	sctp_enforce_cwnd_limit(&stcb->asoc, net);
1629 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1630 		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SS);
1631 	}
1632 }
1633 
1634 static void
sctp_hs_cwnd_decrease(struct sctp_tcb * stcb,struct sctp_nets * net)1635 sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net)
1636 {
1637 	int cur_val, i, indx;
1638 	int old_cwnd = net->cwnd;
1639 
1640 	cur_val = net->cwnd >> 10;
1641 	if (cur_val < sctp_cwnd_adjust[0].cwnd) {
1642 		/* normal mode */
1643 		net->ssthresh = net->cwnd / 2;
1644 		if (net->ssthresh < (net->mtu * 2)) {
1645 			net->ssthresh = 2 * net->mtu;
1646 		}
1647 		net->cwnd = net->ssthresh;
1648 	} else {
1649 		/* drop by the proper amount */
1650 		net->ssthresh = net->cwnd - (int)((net->cwnd / 100) *
1651 		    (int32_t)sctp_cwnd_adjust[net->last_hs_used].drop_percent);
1652 		net->cwnd = net->ssthresh;
1653 		/* now where are we */
1654 		indx = net->last_hs_used;
1655 		cur_val = net->cwnd >> 10;
1656 		/* reset where we are in the table */
1657 		if (cur_val < sctp_cwnd_adjust[0].cwnd) {
1658 			/* feel out of hs */
1659 			net->last_hs_used = 0;
1660 		} else {
1661 			for (i = indx; i >= 1; i--) {
1662 				if (cur_val > sctp_cwnd_adjust[i - 1].cwnd) {
1663 					break;
1664 				}
1665 			}
1666 			net->last_hs_used = indx;
1667 		}
1668 	}
1669 	sctp_enforce_cwnd_limit(&stcb->asoc, net);
1670 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1671 		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR);
1672 	}
1673 }
1674 
1675 static void
sctp_hs_cwnd_update_after_fr(struct sctp_tcb * stcb,struct sctp_association * asoc)1676 sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb,
1677     struct sctp_association *asoc)
1678 {
1679 	struct sctp_nets *net;
1680 
1681 	/*
1682 	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) &&
1683 	 * (net->fast_retran_loss_recovery == 0)))
1684 	 */
1685 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
1686 		if ((asoc->fast_retran_loss_recovery == 0) ||
1687 		    (asoc->sctp_cmt_on_off > 0)) {
1688 			/* out of a RFC2582 Fast recovery window? */
1689 			if (net->net_ack > 0) {
1690 				/*
1691 				 * per section 7.2.3, are there any
1692 				 * destinations that had a fast retransmit
1693 				 * to them. If so what we need to do is
1694 				 * adjust ssthresh and cwnd.
1695 				 */
1696 				struct sctp_tmit_chunk *lchk;
1697 
1698 				sctp_hs_cwnd_decrease(stcb, net);
1699 
1700 				lchk = TAILQ_FIRST(&asoc->send_queue);
1701 
1702 				net->partial_bytes_acked = 0;
1703 				/* Turn on fast recovery window */
1704 				asoc->fast_retran_loss_recovery = 1;
1705 				if (lchk == NULL) {
1706 					/* Mark end of the window */
1707 					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
1708 				} else {
1709 					asoc->fast_recovery_tsn = lchk->rec.data.tsn - 1;
1710 				}
1711 
1712 				/*
1713 				 * CMT fast recovery -- per destination
1714 				 * recovery variable.
1715 				 */
1716 				net->fast_retran_loss_recovery = 1;
1717 
1718 				if (lchk == NULL) {
1719 					/* Mark end of the window */
1720 					net->fast_recovery_tsn = asoc->sending_seq - 1;
1721 				} else {
1722 					net->fast_recovery_tsn = lchk->rec.data.tsn - 1;
1723 				}
1724 
1725 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
1726 				    stcb->sctp_ep, stcb, net,
1727 				    SCTP_FROM_SCTP_CC_FUNCTIONS + SCTP_LOC_2);
1728 				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
1729 				    stcb->sctp_ep, stcb, net);
1730 			}
1731 		} else if (net->net_ack > 0) {
1732 			/*
1733 			 * Mark a peg that we WOULD have done a cwnd
1734 			 * reduction but RFC2582 prevented this action.
1735 			 */
1736 			SCTP_STAT_INCR(sctps_fastretransinrtt);
1737 		}
1738 	}
1739 }
1740 
1741 static void
sctp_hs_cwnd_update_after_sack(struct sctp_tcb * stcb,struct sctp_association * asoc,int accum_moved,int reneged_all SCTP_UNUSED,int will_exit)1742 sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb,
1743     struct sctp_association *asoc,
1744     int accum_moved, int reneged_all SCTP_UNUSED, int will_exit)
1745 {
1746 	struct sctp_nets *net;
1747 
1748 	/******************************/
1749 	/* update cwnd and Early FR   */
1750 	/******************************/
1751 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
1752 #ifdef JANA_CMT_FAST_RECOVERY
1753 		/*
1754 		 * CMT fast recovery code. Need to debug.
1755 		 */
1756 		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
1757 			if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) ||
1758 			    SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) {
1759 				net->will_exit_fast_recovery = 1;
1760 			}
1761 		}
1762 #endif
1763 		/* if nothing was acked on this destination skip it */
1764 		if (net->net_ack == 0) {
1765 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1766 				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
1767 			}
1768 			continue;
1769 		}
1770 #ifdef JANA_CMT_FAST_RECOVERY
1771 		/*
1772 		 * CMT fast recovery code
1773 		 */
1774 		/*
1775 		 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery
1776 		 * && net->will_exit_fast_recovery == 0) { @@@ Do something
1777 		 * } else if (sctp_cmt_on_off == 0 &&
1778 		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
1779 		 */
1780 #endif
1781 
1782 		if (asoc->fast_retran_loss_recovery &&
1783 		    (will_exit == 0) &&
1784 		    (asoc->sctp_cmt_on_off == 0)) {
1785 			/*
1786 			 * If we are in loss recovery we skip any cwnd
1787 			 * update
1788 			 */
1789 			return;
1790 		}
1791 		/*
1792 		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
1793 		 * moved.
1794 		 */
1795 		if (accum_moved ||
1796 		    ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) {
1797 			/* If the cumulative ack moved we can proceed */
1798 			if (net->cwnd <= net->ssthresh) {
1799 				/* We are in slow start */
1800 				if (net->flight_size + net->net_ack >= net->cwnd) {
1801 					sctp_hs_cwnd_increase(stcb, net);
1802 				} else {
1803 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1804 						sctp_log_cwnd(stcb, net, net->net_ack,
1805 						    SCTP_CWND_LOG_NOADV_SS);
1806 					}
1807 				}
1808 			} else {
1809 				/* We are in congestion avoidance */
1810 				net->partial_bytes_acked += net->net_ack;
1811 				if ((net->flight_size + net->net_ack >= net->cwnd) &&
1812 				    (net->partial_bytes_acked >= net->cwnd)) {
1813 					net->partial_bytes_acked -= net->cwnd;
1814 					net->cwnd += net->mtu;
1815 					sctp_enforce_cwnd_limit(asoc, net);
1816 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1817 						sctp_log_cwnd(stcb, net, net->mtu,
1818 						    SCTP_CWND_LOG_FROM_CA);
1819 					}
1820 				} else {
1821 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1822 						sctp_log_cwnd(stcb, net, net->net_ack,
1823 						    SCTP_CWND_LOG_NOADV_CA);
1824 					}
1825 				}
1826 			}
1827 		} else {
1828 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1829 				sctp_log_cwnd(stcb, net, net->mtu,
1830 				    SCTP_CWND_LOG_NO_CUMACK);
1831 			}
1832 		}
1833 	}
1834 }
1835 
1836 /*
1837  * H-TCP congestion control. The algorithm is detailed in:
1838  * R.N.Shorten, D.J.Leith:
1839  *   "H-TCP: TCP for high-speed and long-distance networks"
1840  *   Proc. PFLDnet, Argonne, 2004.
1841  * http://www.hamilton.ie/net/htcp3.pdf
1842  */
1843 
1844 static int use_rtt_scaling = 1;
1845 static int use_bandwidth_switch = 1;
1846 
1847 static inline int
between(uint32_t seq1,uint32_t seq2,uint32_t seq3)1848 between(uint32_t seq1, uint32_t seq2, uint32_t seq3)
1849 {
1850 	return (seq3 - seq2 >= seq1 - seq2);
1851 }
1852 
1853 static inline uint32_t
htcp_cong_time(struct htcp * ca)1854 htcp_cong_time(struct htcp *ca)
1855 {
1856 	return (sctp_get_tick_count() - ca->last_cong);
1857 }
1858 
1859 static inline uint32_t
htcp_ccount(struct htcp * ca)1860 htcp_ccount(struct htcp *ca)
1861 {
1862 	return (ca->minRTT == 0 ? htcp_cong_time(ca) : htcp_cong_time(ca) / ca->minRTT);
1863 }
1864 
1865 static inline void
htcp_reset(struct htcp * ca)1866 htcp_reset(struct htcp *ca)
1867 {
1868 	ca->undo_last_cong = ca->last_cong;
1869 	ca->undo_maxRTT = ca->maxRTT;
1870 	ca->undo_old_maxB = ca->old_maxB;
1871 	ca->last_cong = sctp_get_tick_count();
1872 }
1873 
1874 #ifdef SCTP_NOT_USED
1875 
1876 static uint32_t
htcp_cwnd_undo(struct sctp_tcb * stcb,struct sctp_nets * net)1877 htcp_cwnd_undo(struct sctp_tcb *stcb, struct sctp_nets *net)
1878 {
1879 	net->cc_mod.htcp_ca.last_cong = net->cc_mod.htcp_ca.undo_last_cong;
1880 	net->cc_mod.htcp_ca.maxRTT = net->cc_mod.htcp_ca.undo_maxRTT;
1881 	net->cc_mod.htcp_ca.old_maxB = net->cc_mod.htcp_ca.undo_old_maxB;
1882 	return (max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->cc_mod.htcp_ca.beta) * net->mtu));
1883 }
1884 
1885 #endif
1886 
1887 static inline void
measure_rtt(struct sctp_nets * net)1888 measure_rtt(struct sctp_nets *net)
1889 {
1890 	uint32_t srtt = net->lastsa >> SCTP_RTT_SHIFT;
1891 
1892 	/* keep track of minimum RTT seen so far, minRTT is zero at first */
1893 	if (net->cc_mod.htcp_ca.minRTT > srtt || !net->cc_mod.htcp_ca.minRTT)
1894 		net->cc_mod.htcp_ca.minRTT = srtt;
1895 
1896 	/* max RTT */
1897 	if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->cc_mod.htcp_ca) > 3) {
1898 		if (net->cc_mod.htcp_ca.maxRTT < net->cc_mod.htcp_ca.minRTT)
1899 			net->cc_mod.htcp_ca.maxRTT = net->cc_mod.htcp_ca.minRTT;
1900 		if (net->cc_mod.htcp_ca.maxRTT < srtt && srtt <= net->cc_mod.htcp_ca.maxRTT + sctp_msecs_to_ticks(20))
1901 			net->cc_mod.htcp_ca.maxRTT = srtt;
1902 	}
1903 }
1904 
1905 static void
measure_achieved_throughput(struct sctp_nets * net)1906 measure_achieved_throughput(struct sctp_nets *net)
1907 {
1908 	uint32_t now = sctp_get_tick_count();
1909 
1910 	if (net->fast_retran_ip == 0)
1911 		net->cc_mod.htcp_ca.bytes_acked = net->net_ack;
1912 
1913 	if (!use_bandwidth_switch)
1914 		return;
1915 
1916 	/* achieved throughput calculations */
1917 	/* JRS - not 100% sure of this statement */
1918 	if (net->fast_retran_ip == 1) {
1919 		net->cc_mod.htcp_ca.bytecount = 0;
1920 		net->cc_mod.htcp_ca.lasttime = now;
1921 		return;
1922 	}
1923 
1924 	net->cc_mod.htcp_ca.bytecount += net->net_ack;
1925 	if ((net->cc_mod.htcp_ca.bytecount >= net->cwnd - (((net->cc_mod.htcp_ca.alpha >> 7) ? (net->cc_mod.htcp_ca.alpha >> 7) : 1) * net->mtu)) &&
1926 	    (now - net->cc_mod.htcp_ca.lasttime >= net->cc_mod.htcp_ca.minRTT) &&
1927 	    (net->cc_mod.htcp_ca.minRTT > 0)) {
1928 		uint32_t cur_Bi = net->cc_mod.htcp_ca.bytecount / net->mtu * hz / (now - net->cc_mod.htcp_ca.lasttime);
1929 
1930 		if (htcp_ccount(&net->cc_mod.htcp_ca) <= 3) {
1931 			/* just after backoff */
1932 			net->cc_mod.htcp_ca.minB = net->cc_mod.htcp_ca.maxB = net->cc_mod.htcp_ca.Bi = cur_Bi;
1933 		} else {
1934 			net->cc_mod.htcp_ca.Bi = (3 * net->cc_mod.htcp_ca.Bi + cur_Bi) / 4;
1935 			if (net->cc_mod.htcp_ca.Bi > net->cc_mod.htcp_ca.maxB)
1936 				net->cc_mod.htcp_ca.maxB = net->cc_mod.htcp_ca.Bi;
1937 			if (net->cc_mod.htcp_ca.minB > net->cc_mod.htcp_ca.maxB)
1938 				net->cc_mod.htcp_ca.minB = net->cc_mod.htcp_ca.maxB;
1939 		}
1940 		net->cc_mod.htcp_ca.bytecount = 0;
1941 		net->cc_mod.htcp_ca.lasttime = now;
1942 	}
1943 }
1944 
1945 static inline void
htcp_beta_update(struct htcp * ca,uint32_t minRTT,uint32_t maxRTT)1946 htcp_beta_update(struct htcp *ca, uint32_t minRTT, uint32_t maxRTT)
1947 {
1948 	if (use_bandwidth_switch) {
1949 		uint32_t maxB = ca->maxB;
1950 		uint32_t old_maxB = ca->old_maxB;
1951 
1952 		ca->old_maxB = ca->maxB;
1953 
1954 		if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) {
1955 			ca->beta = BETA_MIN;
1956 			ca->modeswitch = 0;
1957 			return;
1958 		}
1959 	}
1960 
1961 	if (ca->modeswitch && minRTT > sctp_msecs_to_ticks(10) && maxRTT) {
1962 		ca->beta = (minRTT << 7) / maxRTT;
1963 		if (ca->beta < BETA_MIN)
1964 			ca->beta = BETA_MIN;
1965 		else if (ca->beta > BETA_MAX)
1966 			ca->beta = BETA_MAX;
1967 	} else {
1968 		ca->beta = BETA_MIN;
1969 		ca->modeswitch = 1;
1970 	}
1971 }
1972 
1973 static inline void
htcp_alpha_update(struct htcp * ca)1974 htcp_alpha_update(struct htcp *ca)
1975 {
1976 	uint32_t minRTT = ca->minRTT;
1977 	uint32_t factor = 1;
1978 	uint32_t diff = htcp_cong_time(ca);
1979 
1980 	if (diff > (uint32_t)hz) {
1981 		diff -= hz;
1982 		factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / hz)) / hz;
1983 	}
1984 
1985 	if (use_rtt_scaling && minRTT) {
1986 		uint32_t scale = (hz << 3) / (10 * minRTT);
1987 
1988 		scale = min(max(scale, 1U << 2), 10U << 3);	/* clamping ratio to
1989 								 * interval [0.5,10]<<3 */
1990 		factor = (factor << 3) / scale;
1991 		if (factor != 0)
1992 			factor = 1;
1993 	}
1994 
1995 	ca->alpha = 2 * factor * ((1 << 7) - ca->beta);
1996 	if (ca->alpha != 0)
1997 		ca->alpha = ALPHA_BASE;
1998 }
1999 
2000 /* After we have the rtt data to calculate beta, we'd still prefer to wait one
2001  * rtt before we adjust our beta to ensure we are working from a consistent
2002  * data.
2003  *
2004  * This function should be called when we hit a congestion event since only at
2005  * that point do we really have a real sense of maxRTT (the queues en route
2006  * were getting just too full now).
2007  */
2008 static void
htcp_param_update(struct sctp_nets * net)2009 htcp_param_update(struct sctp_nets *net)
2010 {
2011 	uint32_t minRTT = net->cc_mod.htcp_ca.minRTT;
2012 	uint32_t maxRTT = net->cc_mod.htcp_ca.maxRTT;
2013 
2014 	htcp_beta_update(&net->cc_mod.htcp_ca, minRTT, maxRTT);
2015 	htcp_alpha_update(&net->cc_mod.htcp_ca);
2016 
2017 	/*
2018 	 * add slowly fading memory for maxRTT to accommodate routing
2019 	 * changes etc
2020 	 */
2021 	if (minRTT > 0 && maxRTT > minRTT)
2022 		net->cc_mod.htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100;
2023 }
2024 
2025 static uint32_t
htcp_recalc_ssthresh(struct sctp_nets * net)2026 htcp_recalc_ssthresh(struct sctp_nets *net)
2027 {
2028 	htcp_param_update(net);
2029 	return (max(((net->cwnd / net->mtu * net->cc_mod.htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu));
2030 }
2031 
2032 static void
htcp_cong_avoid(struct sctp_tcb * stcb,struct sctp_nets * net)2033 htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net)
2034 {
2035 	/*-
2036 	 * How to handle these functions?
2037 	 *	if (!tcp_is_cwnd_limited(sk, in_flight)) RRS - good question.
2038 	 *		return;
2039 	 */
2040 	if (net->cwnd <= net->ssthresh) {
2041 		/* We are in slow start */
2042 		if (net->flight_size + net->net_ack >= net->cwnd) {
2043 			if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) {
2044 				net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable));
2045 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2046 					sctp_log_cwnd(stcb, net, net->mtu,
2047 					    SCTP_CWND_LOG_FROM_SS);
2048 				}
2049 
2050 			} else {
2051 				net->cwnd += net->net_ack;
2052 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2053 					sctp_log_cwnd(stcb, net, net->net_ack,
2054 					    SCTP_CWND_LOG_FROM_SS);
2055 				}
2056 			}
2057 			sctp_enforce_cwnd_limit(&stcb->asoc, net);
2058 		} else {
2059 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
2060 				sctp_log_cwnd(stcb, net, net->net_ack,
2061 				    SCTP_CWND_LOG_NOADV_SS);
2062 			}
2063 		}
2064 	} else {
2065 		measure_rtt(net);
2066 
2067 		/*
2068 		 * In dangerous area, increase slowly. In theory this is
2069 		 * net->cwnd += alpha / net->cwnd
2070 		 */
2071 		/* What is snd_cwnd_cnt?? */
2072 		if (((net->partial_bytes_acked / net->mtu * net->cc_mod.htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) {
2073 			/*-
2074 			 * Does SCTP have a cwnd clamp?
2075 			 * if (net->snd_cwnd < net->snd_cwnd_clamp) - Nope (RRS).
2076 			 */
2077 			net->cwnd += net->mtu;
2078 			net->partial_bytes_acked = 0;
2079 			sctp_enforce_cwnd_limit(&stcb->asoc, net);
2080 			htcp_alpha_update(&net->cc_mod.htcp_ca);
2081 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2082 				sctp_log_cwnd(stcb, net, net->mtu,
2083 				    SCTP_CWND_LOG_FROM_CA);
2084 			}
2085 		} else {
2086 			net->partial_bytes_acked += net->net_ack;
2087 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
2088 				sctp_log_cwnd(stcb, net, net->net_ack,
2089 				    SCTP_CWND_LOG_NOADV_CA);
2090 			}
2091 		}
2092 
2093 		net->cc_mod.htcp_ca.bytes_acked = net->mtu;
2094 	}
2095 }
2096 
2097 #ifdef SCTP_NOT_USED
2098 /* Lower bound on congestion window. */
2099 static uint32_t
htcp_min_cwnd(struct sctp_tcb * stcb,struct sctp_nets * net)2100 htcp_min_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net)
2101 {
2102 	return (net->ssthresh);
2103 }
2104 #endif
2105 
2106 static void
htcp_init(struct sctp_nets * net)2107 htcp_init(struct sctp_nets *net)
2108 {
2109 	memset(&net->cc_mod.htcp_ca, 0, sizeof(struct htcp));
2110 	net->cc_mod.htcp_ca.alpha = ALPHA_BASE;
2111 	net->cc_mod.htcp_ca.beta = BETA_MIN;
2112 	net->cc_mod.htcp_ca.bytes_acked = net->mtu;
2113 	net->cc_mod.htcp_ca.last_cong = sctp_get_tick_count();
2114 }
2115 
2116 static void
sctp_htcp_set_initial_cc_param(struct sctp_tcb * stcb,struct sctp_nets * net)2117 sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
2118 {
2119 	/*
2120 	 * We take the max of the burst limit times a MTU or the
2121 	 * INITIAL_CWND. We then limit this to 4 MTU's of sending.
2122 	 */
2123 	net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
2124 	net->ssthresh = stcb->asoc.peers_rwnd;
2125 	sctp_enforce_cwnd_limit(&stcb->asoc, net);
2126 	htcp_init(net);
2127 
2128 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
2129 		sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
2130 	}
2131 }
2132 
2133 static void
sctp_htcp_cwnd_update_after_sack(struct sctp_tcb * stcb,struct sctp_association * asoc,int accum_moved,int reneged_all SCTP_UNUSED,int will_exit)2134 sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb,
2135     struct sctp_association *asoc,
2136     int accum_moved, int reneged_all SCTP_UNUSED, int will_exit)
2137 {
2138 	struct sctp_nets *net;
2139 
2140 	/******************************/
2141 	/* update cwnd and Early FR   */
2142 	/******************************/
2143 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
2144 #ifdef JANA_CMT_FAST_RECOVERY
2145 		/*
2146 		 * CMT fast recovery code. Need to debug.
2147 		 */
2148 		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
2149 			if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) ||
2150 			    SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) {
2151 				net->will_exit_fast_recovery = 1;
2152 			}
2153 		}
2154 #endif
2155 		/* if nothing was acked on this destination skip it */
2156 		if (net->net_ack == 0) {
2157 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
2158 				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
2159 			}
2160 			continue;
2161 		}
2162 #ifdef JANA_CMT_FAST_RECOVERY
2163 		/*
2164 		 * CMT fast recovery code
2165 		 */
2166 		/*
2167 		 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery
2168 		 * && net->will_exit_fast_recovery == 0) { @@@ Do something
2169 		 * } else if (sctp_cmt_on_off == 0 &&
2170 		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
2171 		 */
2172 #endif
2173 
2174 		if (asoc->fast_retran_loss_recovery &&
2175 		    will_exit == 0 &&
2176 		    (asoc->sctp_cmt_on_off == 0)) {
2177 			/*
2178 			 * If we are in loss recovery we skip any cwnd
2179 			 * update
2180 			 */
2181 			return;
2182 		}
2183 		/*
2184 		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
2185 		 * moved.
2186 		 */
2187 		if (accum_moved ||
2188 		    ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) {
2189 			htcp_cong_avoid(stcb, net);
2190 			measure_achieved_throughput(net);
2191 		} else {
2192 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
2193 				sctp_log_cwnd(stcb, net, net->mtu,
2194 				    SCTP_CWND_LOG_NO_CUMACK);
2195 			}
2196 		}
2197 	}
2198 }
2199 
2200 static void
sctp_htcp_cwnd_update_after_fr(struct sctp_tcb * stcb,struct sctp_association * asoc)2201 sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb,
2202     struct sctp_association *asoc)
2203 {
2204 	struct sctp_nets *net;
2205 
2206 	/*
2207 	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) &&
2208 	 * (net->fast_retran_loss_recovery == 0)))
2209 	 */
2210 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
2211 		if ((asoc->fast_retran_loss_recovery == 0) ||
2212 		    (asoc->sctp_cmt_on_off > 0)) {
2213 			/* out of a RFC2582 Fast recovery window? */
2214 			if (net->net_ack > 0) {
2215 				/*
2216 				 * per section 7.2.3, are there any
2217 				 * destinations that had a fast retransmit
2218 				 * to them. If so what we need to do is
2219 				 * adjust ssthresh and cwnd.
2220 				 */
2221 				struct sctp_tmit_chunk *lchk;
2222 				int old_cwnd = net->cwnd;
2223 
2224 				/* JRS - reset as if state were changed */
2225 				htcp_reset(&net->cc_mod.htcp_ca);
2226 				net->ssthresh = htcp_recalc_ssthresh(net);
2227 				net->cwnd = net->ssthresh;
2228 				sctp_enforce_cwnd_limit(asoc, net);
2229 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2230 					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
2231 					    SCTP_CWND_LOG_FROM_FR);
2232 				}
2233 				lchk = TAILQ_FIRST(&asoc->send_queue);
2234 
2235 				net->partial_bytes_acked = 0;
2236 				/* Turn on fast recovery window */
2237 				asoc->fast_retran_loss_recovery = 1;
2238 				if (lchk == NULL) {
2239 					/* Mark end of the window */
2240 					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
2241 				} else {
2242 					asoc->fast_recovery_tsn = lchk->rec.data.tsn - 1;
2243 				}
2244 
2245 				/*
2246 				 * CMT fast recovery -- per destination
2247 				 * recovery variable.
2248 				 */
2249 				net->fast_retran_loss_recovery = 1;
2250 
2251 				if (lchk == NULL) {
2252 					/* Mark end of the window */
2253 					net->fast_recovery_tsn = asoc->sending_seq - 1;
2254 				} else {
2255 					net->fast_recovery_tsn = lchk->rec.data.tsn - 1;
2256 				}
2257 
2258 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
2259 				    stcb->sctp_ep, stcb, net,
2260 				    SCTP_FROM_SCTP_CC_FUNCTIONS + SCTP_LOC_3);
2261 				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
2262 				    stcb->sctp_ep, stcb, net);
2263 			}
2264 		} else if (net->net_ack > 0) {
2265 			/*
2266 			 * Mark a peg that we WOULD have done a cwnd
2267 			 * reduction but RFC2582 prevented this action.
2268 			 */
2269 			SCTP_STAT_INCR(sctps_fastretransinrtt);
2270 		}
2271 	}
2272 }
2273 
2274 static void
sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb * stcb,struct sctp_nets * net)2275 sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb,
2276     struct sctp_nets *net)
2277 {
2278 	int old_cwnd = net->cwnd;
2279 
2280 	/* JRS - reset as if the state were being changed to timeout */
2281 	htcp_reset(&net->cc_mod.htcp_ca);
2282 	net->ssthresh = htcp_recalc_ssthresh(net);
2283 	net->cwnd = net->mtu;
2284 	net->partial_bytes_acked = 0;
2285 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2286 		sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
2287 	}
2288 }
2289 
2290 static void
sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb * stcb,struct sctp_nets * net,int in_window,int num_pkt_lost SCTP_UNUSED)2291 sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
2292     struct sctp_nets *net, int in_window, int num_pkt_lost SCTP_UNUSED)
2293 {
2294 	int old_cwnd;
2295 
2296 	old_cwnd = net->cwnd;
2297 
2298 	/* JRS - reset hctp as if state changed */
2299 	if (in_window == 0) {
2300 		htcp_reset(&net->cc_mod.htcp_ca);
2301 		SCTP_STAT_INCR(sctps_ecnereducedcwnd);
2302 		net->ssthresh = htcp_recalc_ssthresh(net);
2303 		if (net->ssthresh < net->mtu) {
2304 			net->ssthresh = net->mtu;
2305 			/* here back off the timer as well, to slow us down */
2306 			net->RTO <<= 1;
2307 		}
2308 		net->cwnd = net->ssthresh;
2309 		sctp_enforce_cwnd_limit(&stcb->asoc, net);
2310 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2311 			sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
2312 		}
2313 	}
2314 }
2315 
2316 const struct sctp_cc_functions sctp_cc_functions[] = {
2317 	{
2318 		.sctp_set_initial_cc_param = sctp_set_initial_cc_param,
2319 		.sctp_cwnd_update_after_sack = sctp_cwnd_update_after_sack,
2320 		.sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common,
2321 		.sctp_cwnd_update_after_fr = sctp_cwnd_update_after_fr,
2322 		.sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout,
2323 		.sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_after_ecn_echo,
2324 		.sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped,
2325 		.sctp_cwnd_update_after_output = sctp_cwnd_update_after_output,
2326 	},
2327 	{
2328 		.sctp_set_initial_cc_param = sctp_set_initial_cc_param,
2329 		.sctp_cwnd_update_after_sack = sctp_hs_cwnd_update_after_sack,
2330 		.sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common,
2331 		.sctp_cwnd_update_after_fr = sctp_hs_cwnd_update_after_fr,
2332 		.sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout,
2333 		.sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_after_ecn_echo,
2334 		.sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped,
2335 		.sctp_cwnd_update_after_output = sctp_cwnd_update_after_output,
2336 	},
2337 	{
2338 		.sctp_set_initial_cc_param = sctp_htcp_set_initial_cc_param,
2339 		.sctp_cwnd_update_after_sack = sctp_htcp_cwnd_update_after_sack,
2340 		.sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common,
2341 		.sctp_cwnd_update_after_fr = sctp_htcp_cwnd_update_after_fr,
2342 		.sctp_cwnd_update_after_timeout = sctp_htcp_cwnd_update_after_timeout,
2343 		.sctp_cwnd_update_after_ecn_echo = sctp_htcp_cwnd_update_after_ecn_echo,
2344 		.sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped,
2345 		.sctp_cwnd_update_after_output = sctp_cwnd_update_after_output,
2346 	},
2347 	{
2348 		.sctp_set_initial_cc_param = sctp_set_rtcc_initial_cc_param,
2349 		.sctp_cwnd_update_after_sack = sctp_cwnd_update_rtcc_after_sack,
2350 		.sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common,
2351 		.sctp_cwnd_update_after_fr = sctp_cwnd_update_after_fr,
2352 		.sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout,
2353 		.sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_rtcc_after_ecn_echo,
2354 		.sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped,
2355 		.sctp_cwnd_update_after_output = sctp_cwnd_update_after_output,
2356 		.sctp_cwnd_update_packet_transmitted = sctp_cwnd_update_rtcc_packet_transmitted,
2357 		.sctp_cwnd_update_tsn_acknowledged = sctp_cwnd_update_rtcc_tsn_acknowledged,
2358 		.sctp_cwnd_new_transmission_begins = sctp_cwnd_new_rtcc_transmission_begins,
2359 		.sctp_cwnd_prepare_net_for_sack = sctp_cwnd_prepare_rtcc_net_for_sack,
2360 		.sctp_cwnd_socket_option = sctp_cwnd_rtcc_socket_option,
2361 		.sctp_rtt_calculated = sctp_rtt_rtcc_calculated
2362 	}
2363 };
2364