1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. 5 * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. 6 * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * a) Redistributions of source code must retain the above copyright notice, 12 * this list of conditions and the following disclaimer. 13 * 14 * b) Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the distribution. 17 * 18 * c) Neither the name of Cisco Systems, Inc. nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 26 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 32 * THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <netinet/sctp_os.h> 39 #include <netinet/sctp_var.h> 40 #include <netinet/sctp_sysctl.h> 41 #include <netinet/sctp_pcb.h> 42 #include <netinet/sctp_header.h> 43 #include <netinet/sctputil.h> 44 #include <netinet/sctp_output.h> 45 #include <netinet/sctp_input.h> 46 #include <netinet/sctp_indata.h> 47 #include <netinet/sctp_uio.h> 48 #include <netinet/sctp_timer.h> 49 #include <netinet/sctp_auth.h> 50 #include <netinet/sctp_asconf.h> 51 #include <netinet/sctp_kdtrace.h> 52 53 #define SHIFT_MPTCP_MULTI_N 40 54 #define SHIFT_MPTCP_MULTI_Z 16 55 #define SHIFT_MPTCP_MULTI 8 56 57 #ifdef KDTRACE_HOOOKS 58 #define __dtrace 59 #else 60 #define __dtrace __unused 61 #endif 62 63 static void 64 sctp_enforce_cwnd_limit(struct sctp_association *assoc, struct sctp_nets *net) 65 { 66 if ((assoc->max_cwnd > 0) && 67 (net->cwnd > assoc->max_cwnd) && 68 (net->cwnd > (net->mtu - sizeof(struct sctphdr)))) { 69 net->cwnd = assoc->max_cwnd; 70 if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) { 71 net->cwnd = net->mtu - sizeof(struct sctphdr); 72 } 73 } 74 } 75 76 static void 77 sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) 78 { 79 struct sctp_association *assoc; 80 uint32_t cwnd_in_mtu; 81 82 assoc = &stcb->asoc; 83 cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd); 84 if (cwnd_in_mtu == 0) { 85 /* Using 0 means that the value of RFC 4960 is used. */ 86 net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); 87 } else { 88 /* 89 * We take the minimum of the burst limit and the initial 90 * congestion window. 91 */ 92 if ((assoc->max_burst > 0) && (cwnd_in_mtu > assoc->max_burst)) 93 cwnd_in_mtu = assoc->max_burst; 94 net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu; 95 } 96 if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) || 97 (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) { 98 /* In case of resource pooling initialize appropriately */ 99 net->cwnd /= assoc->numnets; 100 if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) { 101 net->cwnd = net->mtu - sizeof(struct sctphdr); 102 } 103 } 104 sctp_enforce_cwnd_limit(assoc, net); 105 net->ssthresh = assoc->peers_rwnd; 106 SDT_PROBE5(sctp, cwnd, net, init, 107 stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, 108 0, net->cwnd); 109 if (SCTP_BASE_SYSCTL(sctp_logging_level) & 110 (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { 111 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION); 112 } 113 } 114 115 static void 116 sctp_cwnd_update_after_fr(struct sctp_tcb *stcb, 117 struct sctp_association *asoc) 118 { 119 struct sctp_nets *net; 120 uint32_t t_ssthresh, t_cwnd; 121 uint64_t t_ucwnd_sbw; 122 123 /* MT FIXME: Don't compute this over and over again */ 124 t_ssthresh = 0; 125 t_cwnd = 0; 126 t_ucwnd_sbw = 0; 127 if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) || 128 (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) { 129 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 130 t_ssthresh += net->ssthresh; 131 t_cwnd += net->cwnd; 132 if (net->lastsa > 0) { 133 t_ucwnd_sbw += (uint64_t)net->cwnd / (uint64_t)net->lastsa; 134 } 135 } 136 if (t_ucwnd_sbw == 0) { 137 t_ucwnd_sbw = 1; 138 } 139 } 140 141 /*- 142 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) && 143 * (net->fast_retran_loss_recovery == 0))) 144 */ 145 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 146 if ((asoc->fast_retran_loss_recovery == 0) || 147 (asoc->sctp_cmt_on_off > 0)) { 148 /* out of a RFC2582 Fast recovery window? */ 149 if (net->net_ack > 0) { 150 /* 151 * per section 7.2.3, are there any 152 * destinations that had a fast retransmit 153 * to them. If so what we need to do is 154 * adjust ssthresh and cwnd. 155 */ 156 struct sctp_tmit_chunk *lchk; 157 int old_cwnd = net->cwnd; 158 159 if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) || 160 (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) { 161 if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) { 162 net->ssthresh = (uint32_t)(((uint64_t)4 * 163 (uint64_t)net->mtu * 164 (uint64_t)net->ssthresh) / 165 (uint64_t)t_ssthresh); 166 } 167 if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2) { 168 uint32_t srtt; 169 170 srtt = net->lastsa; 171 /* 172 * lastsa>>3; we don't need 173 * to devide ... 174 */ 175 if (srtt == 0) { 176 srtt = 1; 177 } 178 /* 179 * Short Version => Equal to 180 * Contel Version MBe 181 */ 182 net->ssthresh = (uint32_t)(((uint64_t)4 * 183 (uint64_t)net->mtu * 184 (uint64_t)net->cwnd) / 185 ((uint64_t)srtt * 186 t_ucwnd_sbw)); 187 /* INCREASE FACTOR */ ; 188 } 189 if ((net->cwnd > t_cwnd / 2) && 190 (net->ssthresh < net->cwnd - t_cwnd / 2)) { 191 net->ssthresh = net->cwnd - t_cwnd / 2; 192 } 193 if (net->ssthresh < net->mtu) { 194 net->ssthresh = net->mtu; 195 } 196 } else { 197 net->ssthresh = net->cwnd / 2; 198 if (net->ssthresh < (net->mtu * 2)) { 199 net->ssthresh = 2 * net->mtu; 200 } 201 } 202 net->cwnd = net->ssthresh; 203 sctp_enforce_cwnd_limit(asoc, net); 204 SDT_PROBE5(sctp, cwnd, net, fr, 205 stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, 206 old_cwnd, net->cwnd); 207 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 208 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), 209 SCTP_CWND_LOG_FROM_FR); 210 } 211 lchk = TAILQ_FIRST(&asoc->send_queue); 212 213 net->partial_bytes_acked = 0; 214 /* Turn on fast recovery window */ 215 asoc->fast_retran_loss_recovery = 1; 216 if (lchk == NULL) { 217 /* Mark end of the window */ 218 asoc->fast_recovery_tsn = asoc->sending_seq - 1; 219 } else { 220 asoc->fast_recovery_tsn = lchk->rec.data.tsn - 1; 221 } 222 223 /* 224 * CMT fast recovery -- per destination 225 * recovery variable. 226 */ 227 net->fast_retran_loss_recovery = 1; 228 229 if (lchk == NULL) { 230 /* Mark end of the window */ 231 net->fast_recovery_tsn = asoc->sending_seq - 1; 232 } else { 233 net->fast_recovery_tsn = lchk->rec.data.tsn - 1; 234 } 235 236 sctp_timer_stop(SCTP_TIMER_TYPE_SEND, 237 stcb->sctp_ep, stcb, net, 238 SCTP_FROM_SCTP_CC_FUNCTIONS + SCTP_LOC_1); 239 sctp_timer_start(SCTP_TIMER_TYPE_SEND, 240 stcb->sctp_ep, stcb, net); 241 } 242 } else if (net->net_ack > 0) { 243 /* 244 * Mark a peg that we WOULD have done a cwnd 245 * reduction but RFC2582 prevented this action. 246 */ 247 SCTP_STAT_INCR(sctps_fastretransinrtt); 248 } 249 } 250 } 251 252 /* Defines for instantaneous bw decisions */ 253 #define SCTP_INST_LOOSING 1 /* Losing to other flows */ 254 #define SCTP_INST_NEUTRAL 2 /* Neutral, no indication */ 255 #define SCTP_INST_GAINING 3 /* Gaining, step down possible */ 256 257 static int 258 cc_bw_same(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, 259 uint64_t rtt_offset, uint64_t vtag, uint8_t inst_ind) 260 { 261 uint64_t oth __dtrace, probepoint __dtrace; 262 263 probepoint = (((uint64_t)net->cwnd) << 32); 264 if (net->rtt > net->cc_mod.rtcc.lbw_rtt + rtt_offset) { 265 /* 266 * rtt increased we don't update bw.. so we don't update the 267 * rtt either. 268 */ 269 /* Probe point 5 */ 270 probepoint |= ((5 << 16) | 1); 271 SDT_PROBE5(sctp, cwnd, net, rttvar, 272 vtag, 273 ((net->cc_mod.rtcc.lbw << 32) | nbw), 274 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 275 net->flight_size, 276 probepoint); 277 if ((net->cc_mod.rtcc.steady_step) && (inst_ind != SCTP_INST_LOOSING)) { 278 if (net->cc_mod.rtcc.last_step_state == 5) 279 net->cc_mod.rtcc.step_cnt++; 280 else 281 net->cc_mod.rtcc.step_cnt = 1; 282 net->cc_mod.rtcc.last_step_state = 5; 283 if ((net->cc_mod.rtcc.step_cnt == net->cc_mod.rtcc.steady_step) || 284 ((net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step) && 285 ((net->cc_mod.rtcc.step_cnt % net->cc_mod.rtcc.steady_step) == 0))) { 286 /* Try a step down */ 287 oth = net->cc_mod.rtcc.vol_reduce; 288 oth <<= 16; 289 oth |= net->cc_mod.rtcc.step_cnt; 290 oth <<= 16; 291 oth |= net->cc_mod.rtcc.last_step_state; 292 SDT_PROBE5(sctp, cwnd, net, rttstep, 293 vtag, 294 ((net->cc_mod.rtcc.lbw << 32) | nbw), 295 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 296 oth, 297 probepoint); 298 if (net->cwnd > (4 * net->mtu)) { 299 net->cwnd -= net->mtu; 300 net->cc_mod.rtcc.vol_reduce++; 301 } else { 302 net->cc_mod.rtcc.step_cnt = 0; 303 } 304 } 305 } 306 return (1); 307 } 308 if (net->rtt < net->cc_mod.rtcc.lbw_rtt - rtt_offset) { 309 /* 310 * rtt decreased, there could be more room. we update both 311 * the bw and the rtt here to lock this in as a good step 312 * down. 313 */ 314 /* Probe point 6 */ 315 probepoint |= ((6 << 16) | 0); 316 SDT_PROBE5(sctp, cwnd, net, rttvar, 317 vtag, 318 ((net->cc_mod.rtcc.lbw << 32) | nbw), 319 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 320 net->flight_size, 321 probepoint); 322 if (net->cc_mod.rtcc.steady_step) { 323 oth = net->cc_mod.rtcc.vol_reduce; 324 oth <<= 16; 325 oth |= net->cc_mod.rtcc.step_cnt; 326 oth <<= 16; 327 oth |= net->cc_mod.rtcc.last_step_state; 328 SDT_PROBE5(sctp, cwnd, net, rttstep, 329 vtag, 330 ((net->cc_mod.rtcc.lbw << 32) | nbw), 331 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 332 oth, 333 probepoint); 334 if ((net->cc_mod.rtcc.last_step_state == 5) && 335 (net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step)) { 336 /* Step down worked */ 337 net->cc_mod.rtcc.step_cnt = 0; 338 return (1); 339 } else { 340 net->cc_mod.rtcc.last_step_state = 6; 341 net->cc_mod.rtcc.step_cnt = 0; 342 } 343 } 344 net->cc_mod.rtcc.lbw = nbw; 345 net->cc_mod.rtcc.lbw_rtt = net->rtt; 346 net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd; 347 if (inst_ind == SCTP_INST_GAINING) 348 return (1); 349 else if (inst_ind == SCTP_INST_NEUTRAL) 350 return (1); 351 else 352 return (0); 353 } 354 /* 355 * Ok bw and rtt remained the same .. no update to any 356 */ 357 /* Probe point 7 */ 358 probepoint |= ((7 << 16) | net->cc_mod.rtcc.ret_from_eq); 359 SDT_PROBE5(sctp, cwnd, net, rttvar, 360 vtag, 361 ((net->cc_mod.rtcc.lbw << 32) | nbw), 362 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 363 net->flight_size, 364 probepoint); 365 if ((net->cc_mod.rtcc.steady_step) && (inst_ind != SCTP_INST_LOOSING)) { 366 if (net->cc_mod.rtcc.last_step_state == 5) 367 net->cc_mod.rtcc.step_cnt++; 368 else 369 net->cc_mod.rtcc.step_cnt = 1; 370 net->cc_mod.rtcc.last_step_state = 5; 371 if ((net->cc_mod.rtcc.step_cnt == net->cc_mod.rtcc.steady_step) || 372 ((net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step) && 373 ((net->cc_mod.rtcc.step_cnt % net->cc_mod.rtcc.steady_step) == 0))) { 374 /* Try a step down */ 375 if (net->cwnd > (4 * net->mtu)) { 376 net->cwnd -= net->mtu; 377 net->cc_mod.rtcc.vol_reduce++; 378 return (1); 379 } else { 380 net->cc_mod.rtcc.step_cnt = 0; 381 } 382 } 383 } 384 if (inst_ind == SCTP_INST_GAINING) 385 return (1); 386 else if (inst_ind == SCTP_INST_NEUTRAL) 387 return (1); 388 else 389 return ((int)net->cc_mod.rtcc.ret_from_eq); 390 } 391 392 static int 393 cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint64_t rtt_offset, 394 uint64_t vtag, uint8_t inst_ind) 395 { 396 uint64_t oth __dtrace, probepoint __dtrace; 397 398 /* Bandwidth decreased. */ 399 probepoint = (((uint64_t)net->cwnd) << 32); 400 if (net->rtt > net->cc_mod.rtcc.lbw_rtt + rtt_offset) { 401 /* rtt increased */ 402 /* Did we add more */ 403 if ((net->cwnd > net->cc_mod.rtcc.cwnd_at_bw_set) && 404 (inst_ind != SCTP_INST_LOOSING)) { 405 /* We caused it maybe.. back off? */ 406 /* PROBE POINT 1 */ 407 probepoint |= ((1 << 16) | 1); 408 SDT_PROBE5(sctp, cwnd, net, rttvar, 409 vtag, 410 ((net->cc_mod.rtcc.lbw << 32) | nbw), 411 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 412 net->flight_size, 413 probepoint); 414 if (net->cc_mod.rtcc.ret_from_eq) { 415 /* 416 * Switch over to CA if we are less 417 * aggressive 418 */ 419 net->ssthresh = net->cwnd - 1; 420 net->partial_bytes_acked = 0; 421 } 422 return (1); 423 } 424 /* Probe point 2 */ 425 probepoint |= ((2 << 16) | 0); 426 SDT_PROBE5(sctp, cwnd, net, rttvar, 427 vtag, 428 ((net->cc_mod.rtcc.lbw << 32) | nbw), 429 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 430 net->flight_size, 431 probepoint); 432 /* Someone else - fight for more? */ 433 if (net->cc_mod.rtcc.steady_step) { 434 oth = net->cc_mod.rtcc.vol_reduce; 435 oth <<= 16; 436 oth |= net->cc_mod.rtcc.step_cnt; 437 oth <<= 16; 438 oth |= net->cc_mod.rtcc.last_step_state; 439 SDT_PROBE5(sctp, cwnd, net, rttstep, 440 vtag, 441 ((net->cc_mod.rtcc.lbw << 32) | nbw), 442 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 443 oth, 444 probepoint); 445 /* 446 * Did we voluntarily give up some? if so take one 447 * back please 448 */ 449 if ((net->cc_mod.rtcc.vol_reduce) && 450 (inst_ind != SCTP_INST_GAINING)) { 451 net->cwnd += net->mtu; 452 sctp_enforce_cwnd_limit(&stcb->asoc, net); 453 net->cc_mod.rtcc.vol_reduce--; 454 } 455 net->cc_mod.rtcc.last_step_state = 2; 456 net->cc_mod.rtcc.step_cnt = 0; 457 } 458 goto out_decision; 459 } else if (net->rtt < net->cc_mod.rtcc.lbw_rtt - rtt_offset) { 460 /* bw & rtt decreased */ 461 /* Probe point 3 */ 462 probepoint |= ((3 << 16) | 0); 463 SDT_PROBE5(sctp, cwnd, net, rttvar, 464 vtag, 465 ((net->cc_mod.rtcc.lbw << 32) | nbw), 466 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 467 net->flight_size, 468 probepoint); 469 if (net->cc_mod.rtcc.steady_step) { 470 oth = net->cc_mod.rtcc.vol_reduce; 471 oth <<= 16; 472 oth |= net->cc_mod.rtcc.step_cnt; 473 oth <<= 16; 474 oth |= net->cc_mod.rtcc.last_step_state; 475 SDT_PROBE5(sctp, cwnd, net, rttstep, 476 vtag, 477 ((net->cc_mod.rtcc.lbw << 32) | nbw), 478 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 479 oth, 480 probepoint); 481 if ((net->cc_mod.rtcc.vol_reduce) && 482 (inst_ind != SCTP_INST_GAINING)) { 483 net->cwnd += net->mtu; 484 sctp_enforce_cwnd_limit(&stcb->asoc, net); 485 net->cc_mod.rtcc.vol_reduce--; 486 } 487 net->cc_mod.rtcc.last_step_state = 3; 488 net->cc_mod.rtcc.step_cnt = 0; 489 } 490 goto out_decision; 491 } 492 /* The bw decreased but rtt stayed the same */ 493 /* Probe point 4 */ 494 probepoint |= ((4 << 16) | 0); 495 SDT_PROBE5(sctp, cwnd, net, rttvar, 496 vtag, 497 ((net->cc_mod.rtcc.lbw << 32) | nbw), 498 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 499 net->flight_size, 500 probepoint); 501 if (net->cc_mod.rtcc.steady_step) { 502 oth = net->cc_mod.rtcc.vol_reduce; 503 oth <<= 16; 504 oth |= net->cc_mod.rtcc.step_cnt; 505 oth <<= 16; 506 oth |= net->cc_mod.rtcc.last_step_state; 507 SDT_PROBE5(sctp, cwnd, net, rttstep, 508 vtag, 509 ((net->cc_mod.rtcc.lbw << 32) | nbw), 510 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 511 oth, 512 probepoint); 513 if ((net->cc_mod.rtcc.vol_reduce) && 514 (inst_ind != SCTP_INST_GAINING)) { 515 net->cwnd += net->mtu; 516 sctp_enforce_cwnd_limit(&stcb->asoc, net); 517 net->cc_mod.rtcc.vol_reduce--; 518 } 519 net->cc_mod.rtcc.last_step_state = 4; 520 net->cc_mod.rtcc.step_cnt = 0; 521 } 522 out_decision: 523 net->cc_mod.rtcc.lbw = nbw; 524 net->cc_mod.rtcc.lbw_rtt = net->rtt; 525 net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd; 526 if (inst_ind == SCTP_INST_GAINING) { 527 return (1); 528 } else { 529 return (0); 530 } 531 } 532 533 static int 534 cc_bw_increase(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint64_t vtag) 535 { 536 uint64_t oth __dtrace, probepoint __dtrace; 537 538 /* 539 * BW increased, so update and return 0, since all actions in our 540 * table say to do the normal CC update. Note that we pay no 541 * attention to the inst_ind since our overall sum is increasing. 542 */ 543 /* PROBE POINT 0 */ 544 probepoint = (((uint64_t)net->cwnd) << 32); 545 SDT_PROBE5(sctp, cwnd, net, rttvar, 546 vtag, 547 ((net->cc_mod.rtcc.lbw << 32) | nbw), 548 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 549 net->flight_size, 550 probepoint); 551 if (net->cc_mod.rtcc.steady_step) { 552 oth = net->cc_mod.rtcc.vol_reduce; 553 oth <<= 16; 554 oth |= net->cc_mod.rtcc.step_cnt; 555 oth <<= 16; 556 oth |= net->cc_mod.rtcc.last_step_state; 557 SDT_PROBE5(sctp, cwnd, net, rttstep, 558 vtag, 559 ((net->cc_mod.rtcc.lbw << 32) | nbw), 560 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 561 oth, 562 probepoint); 563 net->cc_mod.rtcc.last_step_state = 0; 564 net->cc_mod.rtcc.step_cnt = 0; 565 net->cc_mod.rtcc.vol_reduce = 0; 566 } 567 net->cc_mod.rtcc.lbw = nbw; 568 net->cc_mod.rtcc.lbw_rtt = net->rtt; 569 net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd; 570 return (0); 571 } 572 573 /* RTCC Algorithm to limit growth of cwnd, return 574 * true if you want to NOT allow cwnd growth 575 */ 576 static int 577 cc_bw_limit(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw) 578 { 579 uint64_t bw_offset, rtt_offset; 580 uint64_t probepoint __dtrace, rtt, vtag; 581 uint64_t bytes_for_this_rtt, inst_bw; 582 uint64_t div, inst_off; 583 int bw_shift; 584 uint8_t inst_ind; 585 int ret; 586 587 /*- 588 * Here we need to see if we want 589 * to limit cwnd growth due to increase 590 * in overall rtt but no increase in bw. 591 * We use the following table to figure 592 * out what we should do. When we return 593 * 0, cc update goes on as planned. If we 594 * return 1, then no cc update happens and cwnd 595 * stays where it is at. 596 * ---------------------------------- 597 * BW | RTT | Action 598 * ********************************* 599 * INC | INC | return 0 600 * ---------------------------------- 601 * INC | SAME | return 0 602 * ---------------------------------- 603 * INC | DECR | return 0 604 * ---------------------------------- 605 * SAME | INC | return 1 606 * ---------------------------------- 607 * SAME | SAME | return 1 608 * ---------------------------------- 609 * SAME | DECR | return 0 610 * ---------------------------------- 611 * DECR | INC | return 0 or 1 based on if we caused. 612 * ---------------------------------- 613 * DECR | SAME | return 0 614 * ---------------------------------- 615 * DECR | DECR | return 0 616 * ---------------------------------- 617 * 618 * We are a bit fuzz on what an increase or 619 * decrease is. For BW it is the same if 620 * it did not change within 1/64th. For 621 * RTT it stayed the same if it did not 622 * change within 1/32nd 623 */ 624 bw_shift = SCTP_BASE_SYSCTL(sctp_rttvar_bw); 625 rtt = stcb->asoc.my_vtag; 626 vtag = (rtt << 32) | (((uint32_t)(stcb->sctp_ep->sctp_lport)) << 16) | (stcb->rport); 627 probepoint = (((uint64_t)net->cwnd) << 32); 628 rtt = net->rtt; 629 if (net->cc_mod.rtcc.rtt_set_this_sack) { 630 net->cc_mod.rtcc.rtt_set_this_sack = 0; 631 bytes_for_this_rtt = net->cc_mod.rtcc.bw_bytes - net->cc_mod.rtcc.bw_bytes_at_last_rttc; 632 net->cc_mod.rtcc.bw_bytes_at_last_rttc = net->cc_mod.rtcc.bw_bytes; 633 if (net->rtt) { 634 div = net->rtt / 1000; 635 if (div) { 636 inst_bw = bytes_for_this_rtt / div; 637 inst_off = inst_bw >> bw_shift; 638 if (inst_bw > nbw) 639 inst_ind = SCTP_INST_GAINING; 640 else if ((inst_bw + inst_off) < nbw) 641 inst_ind = SCTP_INST_LOOSING; 642 else 643 inst_ind = SCTP_INST_NEUTRAL; 644 probepoint |= ((0xb << 16) | inst_ind); 645 } else { 646 inst_ind = net->cc_mod.rtcc.last_inst_ind; 647 inst_bw = bytes_for_this_rtt / (uint64_t)(net->rtt); 648 /* Can't determine do not change */ 649 probepoint |= ((0xc << 16) | inst_ind); 650 } 651 } else { 652 inst_ind = net->cc_mod.rtcc.last_inst_ind; 653 inst_bw = bytes_for_this_rtt; 654 /* Can't determine do not change */ 655 probepoint |= ((0xd << 16) | inst_ind); 656 } 657 SDT_PROBE5(sctp, cwnd, net, rttvar, 658 vtag, 659 ((nbw << 32) | inst_bw), 660 ((net->cc_mod.rtcc.lbw_rtt << 32) | rtt), 661 net->flight_size, 662 probepoint); 663 } else { 664 /* No rtt measurement, use last one */ 665 inst_ind = net->cc_mod.rtcc.last_inst_ind; 666 } 667 bw_offset = net->cc_mod.rtcc.lbw >> bw_shift; 668 if (nbw > net->cc_mod.rtcc.lbw + bw_offset) { 669 ret = cc_bw_increase(stcb, net, nbw, vtag); 670 goto out; 671 } 672 rtt_offset = net->cc_mod.rtcc.lbw_rtt >> SCTP_BASE_SYSCTL(sctp_rttvar_rtt); 673 if (nbw < net->cc_mod.rtcc.lbw - bw_offset) { 674 ret = cc_bw_decrease(stcb, net, nbw, rtt_offset, vtag, inst_ind); 675 goto out; 676 } 677 /* 678 * If we reach here then we are in a situation where the bw stayed 679 * the same. 680 */ 681 ret = cc_bw_same(stcb, net, nbw, rtt_offset, vtag, inst_ind); 682 out: 683 net->cc_mod.rtcc.last_inst_ind = inst_ind; 684 return (ret); 685 } 686 687 static void 688 sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb, 689 struct sctp_association *asoc, 690 int accum_moved, int reneged_all SCTP_UNUSED, int will_exit, int use_rtcc) 691 { 692 struct sctp_nets *net; 693 int old_cwnd __dtrace; 694 uint32_t t_ssthresh, incr; 695 uint64_t t_ucwnd_sbw; 696 uint64_t t_path_mptcp; 697 uint64_t mptcp_like_alpha; 698 uint32_t srtt; 699 uint64_t max_path; 700 701 /* MT FIXME: Don't compute this over and over again */ 702 t_ssthresh = 0; 703 t_ucwnd_sbw = 0; 704 t_path_mptcp = 0; 705 mptcp_like_alpha = 1; 706 if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) || 707 (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2) || 708 (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_MPTCP)) { 709 max_path = 0; 710 TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { 711 t_ssthresh += net->ssthresh; 712 /* lastsa>>3; we don't need to devide ... */ 713 srtt = net->lastsa; 714 if (srtt > 0) { 715 uint64_t tmp; 716 717 t_ucwnd_sbw += (uint64_t)net->cwnd / (uint64_t)srtt; 718 t_path_mptcp += (((uint64_t)net->cwnd) << SHIFT_MPTCP_MULTI_Z) / 719 (((uint64_t)net->mtu) * (uint64_t)srtt); 720 tmp = (((uint64_t)net->cwnd) << SHIFT_MPTCP_MULTI_N) / 721 ((uint64_t)net->mtu * (uint64_t)(srtt * srtt)); 722 if (tmp > max_path) { 723 max_path = tmp; 724 } 725 } 726 } 727 if (t_path_mptcp > 0) { 728 mptcp_like_alpha = max_path / (t_path_mptcp * t_path_mptcp); 729 } else { 730 mptcp_like_alpha = 1; 731 } 732 } 733 if (t_ssthresh == 0) { 734 t_ssthresh = 1; 735 } 736 if (t_ucwnd_sbw == 0) { 737 t_ucwnd_sbw = 1; 738 } 739 /******************************/ 740 /* update cwnd and Early FR */ 741 /******************************/ 742 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 743 #ifdef JANA_CMT_FAST_RECOVERY 744 /* 745 * CMT fast recovery code. Need to debug. 746 */ 747 if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { 748 if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) || 749 SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) { 750 net->will_exit_fast_recovery = 1; 751 } 752 } 753 #endif 754 /* if nothing was acked on this destination skip it */ 755 if (net->net_ack == 0) { 756 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 757 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); 758 } 759 continue; 760 } 761 #ifdef JANA_CMT_FAST_RECOVERY 762 /* 763 * CMT fast recovery code 764 */ 765 /* 766 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery 767 * && net->will_exit_fast_recovery == 0) { @@@ Do something 768 * } else if (sctp_cmt_on_off == 0 && 769 * asoc->fast_retran_loss_recovery && will_exit == 0) { 770 */ 771 #endif 772 773 if (asoc->fast_retran_loss_recovery && 774 (will_exit == 0) && 775 (asoc->sctp_cmt_on_off == 0)) { 776 /* 777 * If we are in loss recovery we skip any cwnd 778 * update 779 */ 780 return; 781 } 782 /* 783 * Did any measurements go on for this network? 784 */ 785 if (use_rtcc && (net->cc_mod.rtcc.tls_needs_set > 0)) { 786 uint64_t nbw; 787 788 /* 789 * At this point our bw_bytes has been updated by 790 * incoming sack information. 791 * 792 * But our bw may not yet be set. 793 * 794 */ 795 if ((net->cc_mod.rtcc.new_tot_time / 1000) > 0) { 796 nbw = net->cc_mod.rtcc.bw_bytes / (net->cc_mod.rtcc.new_tot_time / 1000); 797 } else { 798 nbw = net->cc_mod.rtcc.bw_bytes; 799 } 800 if (net->cc_mod.rtcc.lbw) { 801 if (cc_bw_limit(stcb, net, nbw)) { 802 /* Hold here, no update */ 803 continue; 804 } 805 } else { 806 uint64_t vtag __dtrace, probepoint __dtrace; 807 808 probepoint = (((uint64_t)net->cwnd) << 32); 809 probepoint |= ((0xa << 16) | 0); 810 vtag = (net->rtt << 32) | 811 (((uint32_t)(stcb->sctp_ep->sctp_lport)) << 16) | 812 (stcb->rport); 813 814 SDT_PROBE5(sctp, cwnd, net, rttvar, 815 vtag, 816 nbw, 817 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 818 net->flight_size, 819 probepoint); 820 net->cc_mod.rtcc.lbw = nbw; 821 net->cc_mod.rtcc.lbw_rtt = net->rtt; 822 if (net->cc_mod.rtcc.rtt_set_this_sack) { 823 net->cc_mod.rtcc.rtt_set_this_sack = 0; 824 net->cc_mod.rtcc.bw_bytes_at_last_rttc = net->cc_mod.rtcc.bw_bytes; 825 } 826 } 827 } 828 /* 829 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has 830 * moved. 831 */ 832 if (accum_moved || 833 ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) { 834 /* If the cumulative ack moved we can proceed */ 835 if (net->cwnd <= net->ssthresh) { 836 /* We are in slow start */ 837 if (net->flight_size + net->net_ack >= net->cwnd) { 838 uint32_t limit; 839 840 old_cwnd = net->cwnd; 841 switch (asoc->sctp_cmt_on_off) { 842 case SCTP_CMT_RPV1: 843 limit = (uint32_t)(((uint64_t)net->mtu * 844 (uint64_t)SCTP_BASE_SYSCTL(sctp_L2_abc_variable) * 845 (uint64_t)net->ssthresh) / 846 (uint64_t)t_ssthresh); 847 incr = (uint32_t)(((uint64_t)net->net_ack * 848 (uint64_t)net->ssthresh) / 849 (uint64_t)t_ssthresh); 850 if (incr > limit) { 851 incr = limit; 852 } 853 if (incr == 0) { 854 incr = 1; 855 } 856 break; 857 case SCTP_CMT_RPV2: 858 /* 859 * lastsa>>3; we don't need 860 * to divide ... 861 */ 862 srtt = net->lastsa; 863 if (srtt == 0) { 864 srtt = 1; 865 } 866 limit = (uint32_t)(((uint64_t)net->mtu * 867 (uint64_t)SCTP_BASE_SYSCTL(sctp_L2_abc_variable) * 868 (uint64_t)net->cwnd) / 869 ((uint64_t)srtt * t_ucwnd_sbw)); 870 /* INCREASE FACTOR */ 871 incr = (uint32_t)(((uint64_t)net->net_ack * 872 (uint64_t)net->cwnd) / 873 ((uint64_t)srtt * t_ucwnd_sbw)); 874 /* INCREASE FACTOR */ 875 if (incr > limit) { 876 incr = limit; 877 } 878 if (incr == 0) { 879 incr = 1; 880 } 881 break; 882 case SCTP_CMT_MPTCP: 883 limit = (uint32_t)(((uint64_t)net->mtu * 884 mptcp_like_alpha * 885 (uint64_t)SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) >> 886 SHIFT_MPTCP_MULTI); 887 incr = (uint32_t)(((uint64_t)net->net_ack * 888 mptcp_like_alpha) >> 889 SHIFT_MPTCP_MULTI); 890 if (incr > limit) { 891 incr = limit; 892 } 893 if (incr > net->net_ack) { 894 incr = net->net_ack; 895 } 896 if (incr > net->mtu) { 897 incr = net->mtu; 898 } 899 break; 900 default: 901 incr = net->net_ack; 902 if (incr > net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) { 903 incr = net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable); 904 } 905 break; 906 } 907 net->cwnd += incr; 908 sctp_enforce_cwnd_limit(asoc, net); 909 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 910 sctp_log_cwnd(stcb, net, incr, 911 SCTP_CWND_LOG_FROM_SS); 912 } 913 SDT_PROBE5(sctp, cwnd, net, ack, 914 stcb->asoc.my_vtag, 915 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 916 net, 917 old_cwnd, net->cwnd); 918 } else { 919 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 920 sctp_log_cwnd(stcb, net, net->net_ack, 921 SCTP_CWND_LOG_NOADV_SS); 922 } 923 } 924 } else { 925 /* We are in congestion avoidance */ 926 /* 927 * Add to pba 928 */ 929 net->partial_bytes_acked += net->net_ack; 930 931 if ((net->flight_size + net->net_ack >= net->cwnd) && 932 (net->partial_bytes_acked >= net->cwnd)) { 933 net->partial_bytes_acked -= net->cwnd; 934 old_cwnd = net->cwnd; 935 switch (asoc->sctp_cmt_on_off) { 936 case SCTP_CMT_RPV1: 937 incr = (uint32_t)(((uint64_t)net->mtu * 938 (uint64_t)net->ssthresh) / 939 (uint64_t)t_ssthresh); 940 if (incr == 0) { 941 incr = 1; 942 } 943 break; 944 case SCTP_CMT_RPV2: 945 /* 946 * lastsa>>3; we don't need 947 * to divide ... 948 */ 949 srtt = net->lastsa; 950 if (srtt == 0) { 951 srtt = 1; 952 } 953 incr = (uint32_t)((uint64_t)net->mtu * 954 (uint64_t)net->cwnd / 955 ((uint64_t)srtt * 956 t_ucwnd_sbw)); 957 /* INCREASE FACTOR */ 958 if (incr == 0) { 959 incr = 1; 960 } 961 break; 962 case SCTP_CMT_MPTCP: 963 incr = (uint32_t)((mptcp_like_alpha * 964 (uint64_t)net->cwnd) >> 965 SHIFT_MPTCP_MULTI); 966 if (incr > net->mtu) { 967 incr = net->mtu; 968 } 969 break; 970 default: 971 incr = net->mtu; 972 break; 973 } 974 net->cwnd += incr; 975 sctp_enforce_cwnd_limit(asoc, net); 976 SDT_PROBE5(sctp, cwnd, net, ack, 977 stcb->asoc.my_vtag, 978 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 979 net, 980 old_cwnd, net->cwnd); 981 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 982 sctp_log_cwnd(stcb, net, net->mtu, 983 SCTP_CWND_LOG_FROM_CA); 984 } 985 } else { 986 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 987 sctp_log_cwnd(stcb, net, net->net_ack, 988 SCTP_CWND_LOG_NOADV_CA); 989 } 990 } 991 } 992 } else { 993 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 994 sctp_log_cwnd(stcb, net, net->mtu, 995 SCTP_CWND_LOG_NO_CUMACK); 996 } 997 } 998 } 999 } 1000 1001 static void 1002 sctp_cwnd_update_exit_pf_common(struct sctp_tcb *stcb, struct sctp_nets *net) 1003 { 1004 int old_cwnd __dtrace; 1005 1006 old_cwnd = net->cwnd; 1007 net->cwnd = net->mtu; 1008 SDT_PROBE5(sctp, cwnd, net, ack, 1009 stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, 1010 old_cwnd, net->cwnd); 1011 SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", 1012 (void *)net, net->cwnd); 1013 } 1014 1015 static void 1016 sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net) 1017 { 1018 int old_cwnd = net->cwnd; 1019 uint32_t t_ssthresh, t_cwnd; 1020 uint64_t t_ucwnd_sbw; 1021 1022 /* MT FIXME: Don't compute this over and over again */ 1023 t_ssthresh = 0; 1024 t_cwnd = 0; 1025 if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) || 1026 (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) { 1027 struct sctp_nets *lnet; 1028 uint32_t srtt; 1029 1030 t_ucwnd_sbw = 0; 1031 TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) { 1032 t_ssthresh += lnet->ssthresh; 1033 t_cwnd += lnet->cwnd; 1034 srtt = lnet->lastsa; 1035 /* lastsa>>3; we don't need to divide ... */ 1036 if (srtt > 0) { 1037 t_ucwnd_sbw += (uint64_t)lnet->cwnd / (uint64_t)srtt; 1038 } 1039 } 1040 if (t_ssthresh < 1) { 1041 t_ssthresh = 1; 1042 } 1043 if (t_ucwnd_sbw < 1) { 1044 t_ucwnd_sbw = 1; 1045 } 1046 if (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) { 1047 net->ssthresh = (uint32_t)(((uint64_t)4 * 1048 (uint64_t)net->mtu * 1049 (uint64_t)net->ssthresh) / 1050 (uint64_t)t_ssthresh); 1051 } else { 1052 uint64_t cc_delta; 1053 1054 srtt = net->lastsa; 1055 /* lastsa>>3; we don't need to divide ... */ 1056 if (srtt == 0) { 1057 srtt = 1; 1058 } 1059 cc_delta = t_ucwnd_sbw * (uint64_t)srtt / 2; 1060 if (cc_delta < t_cwnd) { 1061 net->ssthresh = (uint32_t)((uint64_t)t_cwnd - cc_delta); 1062 } else { 1063 net->ssthresh = net->mtu; 1064 } 1065 } 1066 if ((net->cwnd > t_cwnd / 2) && 1067 (net->ssthresh < net->cwnd - t_cwnd / 2)) { 1068 net->ssthresh = net->cwnd - t_cwnd / 2; 1069 } 1070 if (net->ssthresh < net->mtu) { 1071 net->ssthresh = net->mtu; 1072 } 1073 } else { 1074 net->ssthresh = max(net->cwnd / 2, 4 * net->mtu); 1075 } 1076 net->cwnd = net->mtu; 1077 net->partial_bytes_acked = 0; 1078 SDT_PROBE5(sctp, cwnd, net, to, 1079 stcb->asoc.my_vtag, 1080 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 1081 net, 1082 old_cwnd, net->cwnd); 1083 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1084 sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX); 1085 } 1086 } 1087 1088 static void 1089 sctp_cwnd_update_after_ecn_echo_common(struct sctp_tcb *stcb, struct sctp_nets *net, 1090 int in_window, int num_pkt_lost, int use_rtcc) 1091 { 1092 int old_cwnd = net->cwnd; 1093 1094 if ((use_rtcc) && (net->lan_type == SCTP_LAN_LOCAL) && (net->cc_mod.rtcc.use_dccc_ecn)) { 1095 /* Data center Congestion Control */ 1096 if (in_window == 0) { 1097 /* 1098 * Go to CA with the cwnd at the point we sent the 1099 * TSN that was marked with a CE. 1100 */ 1101 if (net->ecn_prev_cwnd < net->cwnd) { 1102 /* Restore to prev cwnd */ 1103 net->cwnd = net->ecn_prev_cwnd - (net->mtu * num_pkt_lost); 1104 } else { 1105 /* Just cut in 1/2 */ 1106 net->cwnd /= 2; 1107 } 1108 /* Drop to CA */ 1109 net->ssthresh = net->cwnd - (num_pkt_lost * net->mtu); 1110 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1111 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); 1112 } 1113 } else { 1114 /* 1115 * Further tuning down required over the drastic 1116 * original cut 1117 */ 1118 net->ssthresh -= (net->mtu * num_pkt_lost); 1119 net->cwnd -= (net->mtu * num_pkt_lost); 1120 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1121 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); 1122 } 1123 } 1124 SCTP_STAT_INCR(sctps_ecnereducedcwnd); 1125 } else { 1126 if (in_window == 0) { 1127 SCTP_STAT_INCR(sctps_ecnereducedcwnd); 1128 net->ssthresh = net->cwnd / 2; 1129 if (net->ssthresh < net->mtu) { 1130 net->ssthresh = net->mtu; 1131 /* 1132 * here back off the timer as well, to slow 1133 * us down 1134 */ 1135 net->RTO <<= 1; 1136 } 1137 net->cwnd = net->ssthresh; 1138 SDT_PROBE5(sctp, cwnd, net, ecn, 1139 stcb->asoc.my_vtag, 1140 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 1141 net, 1142 old_cwnd, net->cwnd); 1143 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1144 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); 1145 } 1146 } 1147 } 1148 1149 } 1150 1151 static void 1152 sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb, 1153 struct sctp_nets *net, struct sctp_pktdrop_chunk *cp, 1154 uint32_t *bottle_bw, uint32_t *on_queue) 1155 { 1156 uint32_t bw_avail; 1157 unsigned int incr; 1158 int old_cwnd = net->cwnd; 1159 1160 /* get bottle neck bw */ 1161 *bottle_bw = ntohl(cp->bottle_bw); 1162 /* and whats on queue */ 1163 *on_queue = ntohl(cp->current_onq); 1164 /* 1165 * adjust the on-queue if our flight is more it could be that the 1166 * router has not yet gotten data "in-flight" to it 1167 */ 1168 if (*on_queue < net->flight_size) { 1169 *on_queue = net->flight_size; 1170 } 1171 /* rtt is measured in micro seconds, bottle_bw in bytes per second */ 1172 bw_avail = (uint32_t)(((uint64_t)(*bottle_bw) * net->rtt) / (uint64_t)1000000); 1173 if (bw_avail > *bottle_bw) { 1174 /* 1175 * Cap the growth to no more than the bottle neck. This can 1176 * happen as RTT slides up due to queues. It also means if 1177 * you have more than a 1 second RTT with a empty queue you 1178 * will be limited to the bottle_bw per second no matter if 1179 * other points have 1/2 the RTT and you could get more 1180 * out... 1181 */ 1182 bw_avail = *bottle_bw; 1183 } 1184 if (*on_queue > bw_avail) { 1185 /* 1186 * No room for anything else don't allow anything else to be 1187 * "added to the fire". 1188 */ 1189 int seg_inflight, seg_onqueue, my_portion; 1190 1191 net->partial_bytes_acked = 0; 1192 /* how much are we over queue size? */ 1193 incr = *on_queue - bw_avail; 1194 if (stcb->asoc.seen_a_sack_this_pkt) { 1195 /* 1196 * undo any cwnd adjustment that the sack might have 1197 * made 1198 */ 1199 net->cwnd = net->prev_cwnd; 1200 } 1201 /* Now how much of that is mine? */ 1202 seg_inflight = net->flight_size / net->mtu; 1203 seg_onqueue = *on_queue / net->mtu; 1204 my_portion = (incr * seg_inflight) / seg_onqueue; 1205 1206 /* Have I made an adjustment already */ 1207 if (net->cwnd > net->flight_size) { 1208 /* 1209 * for this flight I made an adjustment we need to 1210 * decrease the portion by a share our previous 1211 * adjustment. 1212 */ 1213 int diff_adj; 1214 1215 diff_adj = net->cwnd - net->flight_size; 1216 if (diff_adj > my_portion) 1217 my_portion = 0; 1218 else 1219 my_portion -= diff_adj; 1220 } 1221 /* 1222 * back down to the previous cwnd (assume we have had a sack 1223 * before this packet). minus what ever portion of the 1224 * overage is my fault. 1225 */ 1226 net->cwnd -= my_portion; 1227 1228 /* we will NOT back down more than 1 MTU */ 1229 if (net->cwnd <= net->mtu) { 1230 net->cwnd = net->mtu; 1231 } 1232 /* force into CA */ 1233 net->ssthresh = net->cwnd - 1; 1234 } else { 1235 /* 1236 * Take 1/4 of the space left or max burst up .. whichever 1237 * is less. 1238 */ 1239 incr = (bw_avail - *on_queue) >> 2; 1240 if ((stcb->asoc.max_burst > 0) && 1241 (stcb->asoc.max_burst * net->mtu < incr)) { 1242 incr = stcb->asoc.max_burst * net->mtu; 1243 } 1244 net->cwnd += incr; 1245 } 1246 if (net->cwnd > bw_avail) { 1247 /* We can't exceed the pipe size */ 1248 net->cwnd = bw_avail; 1249 } 1250 if (net->cwnd < net->mtu) { 1251 /* We always have 1 MTU */ 1252 net->cwnd = net->mtu; 1253 } 1254 sctp_enforce_cwnd_limit(&stcb->asoc, net); 1255 if (net->cwnd - old_cwnd != 0) { 1256 /* log only changes */ 1257 SDT_PROBE5(sctp, cwnd, net, pd, 1258 stcb->asoc.my_vtag, 1259 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 1260 net, 1261 old_cwnd, net->cwnd); 1262 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1263 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), 1264 SCTP_CWND_LOG_FROM_SAT); 1265 } 1266 } 1267 } 1268 1269 static void 1270 sctp_cwnd_update_after_output(struct sctp_tcb *stcb, 1271 struct sctp_nets *net, int burst_limit) 1272 { 1273 int old_cwnd = net->cwnd; 1274 1275 if (net->ssthresh < net->cwnd) 1276 net->ssthresh = net->cwnd; 1277 if (burst_limit) { 1278 net->cwnd = (net->flight_size + (burst_limit * net->mtu)); 1279 sctp_enforce_cwnd_limit(&stcb->asoc, net); 1280 SDT_PROBE5(sctp, cwnd, net, bl, 1281 stcb->asoc.my_vtag, 1282 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 1283 net, 1284 old_cwnd, net->cwnd); 1285 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1286 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST); 1287 } 1288 } 1289 } 1290 1291 static void 1292 sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, 1293 struct sctp_association *asoc, 1294 int accum_moved, int reneged_all, int will_exit) 1295 { 1296 /* Passing a zero argument in last disables the rtcc algorithm */ 1297 sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 0); 1298 } 1299 1300 static void 1301 sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net, 1302 int in_window, int num_pkt_lost) 1303 { 1304 /* Passing a zero argument in last disables the rtcc algorithm */ 1305 sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 0); 1306 } 1307 1308 /* Here starts the RTCCVAR type CC invented by RRS which 1309 * is a slight mod to RFC2581. We reuse a common routine or 1310 * two since these algorithms are so close and need to 1311 * remain the same. 1312 */ 1313 static void 1314 sctp_cwnd_update_rtcc_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net, 1315 int in_window, int num_pkt_lost) 1316 { 1317 sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 1); 1318 } 1319 1320 static void 1321 sctp_cwnd_update_rtcc_tsn_acknowledged(struct sctp_nets *net, 1322 struct sctp_tmit_chunk *tp1) 1323 { 1324 net->cc_mod.rtcc.bw_bytes += tp1->send_size; 1325 } 1326 1327 static void 1328 sctp_cwnd_prepare_rtcc_net_for_sack(struct sctp_tcb *stcb SCTP_UNUSED, 1329 struct sctp_nets *net) 1330 { 1331 if (net->cc_mod.rtcc.tls_needs_set > 0) { 1332 /* We had a bw measurement going on */ 1333 struct timeval ltls; 1334 1335 SCTP_GETPTIME_TIMEVAL(<ls); 1336 timevalsub(<ls, &net->cc_mod.rtcc.tls); 1337 net->cc_mod.rtcc.new_tot_time = (ltls.tv_sec * 1000000) + ltls.tv_usec; 1338 } 1339 } 1340 1341 static void 1342 sctp_cwnd_new_rtcc_transmission_begins(struct sctp_tcb *stcb, 1343 struct sctp_nets *net) 1344 { 1345 uint64_t vtag __dtrace, probepoint __dtrace; 1346 1347 if (net->cc_mod.rtcc.lbw) { 1348 /* Clear the old bw.. we went to 0 in-flight */ 1349 vtag = (net->rtt << 32) | (((uint32_t)(stcb->sctp_ep->sctp_lport)) << 16) | 1350 (stcb->rport); 1351 probepoint = (((uint64_t)net->cwnd) << 32); 1352 /* Probe point 8 */ 1353 probepoint |= ((8 << 16) | 0); 1354 SDT_PROBE5(sctp, cwnd, net, rttvar, 1355 vtag, 1356 ((net->cc_mod.rtcc.lbw << 32) | 0), 1357 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 1358 net->flight_size, 1359 probepoint); 1360 net->cc_mod.rtcc.lbw_rtt = 0; 1361 net->cc_mod.rtcc.cwnd_at_bw_set = 0; 1362 net->cc_mod.rtcc.lbw = 0; 1363 net->cc_mod.rtcc.bw_bytes_at_last_rttc = 0; 1364 net->cc_mod.rtcc.vol_reduce = 0; 1365 net->cc_mod.rtcc.bw_tot_time = 0; 1366 net->cc_mod.rtcc.bw_bytes = 0; 1367 net->cc_mod.rtcc.tls_needs_set = 0; 1368 if (net->cc_mod.rtcc.steady_step) { 1369 net->cc_mod.rtcc.vol_reduce = 0; 1370 net->cc_mod.rtcc.step_cnt = 0; 1371 net->cc_mod.rtcc.last_step_state = 0; 1372 } 1373 if (net->cc_mod.rtcc.ret_from_eq) { 1374 /* less aggressive one - reset cwnd too */ 1375 uint32_t cwnd_in_mtu, cwnd; 1376 1377 cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd); 1378 if (cwnd_in_mtu == 0) { 1379 /* 1380 * Using 0 means that the value of RFC 4960 1381 * is used. 1382 */ 1383 cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); 1384 } else { 1385 /* 1386 * We take the minimum of the burst limit 1387 * and the initial congestion window. 1388 */ 1389 if ((stcb->asoc.max_burst > 0) && (cwnd_in_mtu > stcb->asoc.max_burst)) 1390 cwnd_in_mtu = stcb->asoc.max_burst; 1391 cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu; 1392 } 1393 if (net->cwnd > cwnd) { 1394 /* 1395 * Only set if we are not a timeout (i.e. 1396 * down to 1 mtu) 1397 */ 1398 net->cwnd = cwnd; 1399 } 1400 } 1401 } 1402 } 1403 1404 static void 1405 sctp_set_rtcc_initial_cc_param(struct sctp_tcb *stcb, 1406 struct sctp_nets *net) 1407 { 1408 #ifdef KDTRACE_HOOOKS 1409 uint64_t vtag, probepoint; 1410 #else 1411 uint64_t vtag __unused, probepoint __unused; 1412 #endif 1413 1414 sctp_set_initial_cc_param(stcb, net); 1415 stcb->asoc.use_precise_time = 1; 1416 probepoint = (((uint64_t)net->cwnd) << 32); 1417 probepoint |= ((9 << 16) | 0); 1418 vtag = (net->rtt << 32) | 1419 (((uint32_t)(stcb->sctp_ep->sctp_lport)) << 16) | 1420 (stcb->rport); 1421 SDT_PROBE5(sctp, cwnd, net, rttvar, 1422 vtag, 1423 0, 1424 0, 1425 0, 1426 probepoint); 1427 net->cc_mod.rtcc.lbw_rtt = 0; 1428 net->cc_mod.rtcc.cwnd_at_bw_set = 0; 1429 net->cc_mod.rtcc.vol_reduce = 0; 1430 net->cc_mod.rtcc.lbw = 0; 1431 net->cc_mod.rtcc.vol_reduce = 0; 1432 net->cc_mod.rtcc.bw_bytes_at_last_rttc = 0; 1433 net->cc_mod.rtcc.bw_tot_time = 0; 1434 net->cc_mod.rtcc.bw_bytes = 0; 1435 net->cc_mod.rtcc.tls_needs_set = 0; 1436 net->cc_mod.rtcc.ret_from_eq = SCTP_BASE_SYSCTL(sctp_rttvar_eqret); 1437 net->cc_mod.rtcc.steady_step = SCTP_BASE_SYSCTL(sctp_steady_step); 1438 net->cc_mod.rtcc.use_dccc_ecn = SCTP_BASE_SYSCTL(sctp_use_dccc_ecn); 1439 net->cc_mod.rtcc.step_cnt = 0; 1440 net->cc_mod.rtcc.last_step_state = 0; 1441 } 1442 1443 static int 1444 sctp_cwnd_rtcc_socket_option(struct sctp_tcb *stcb, int setorget, 1445 struct sctp_cc_option *cc_opt) 1446 { 1447 struct sctp_nets *net; 1448 1449 if (setorget == 1) { 1450 /* a set */ 1451 if (cc_opt->option == SCTP_CC_OPT_RTCC_SETMODE) { 1452 if ((cc_opt->aid_value.assoc_value != 0) && 1453 (cc_opt->aid_value.assoc_value != 1)) { 1454 return (EINVAL); 1455 } 1456 TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { 1457 net->cc_mod.rtcc.ret_from_eq = cc_opt->aid_value.assoc_value; 1458 } 1459 } else if (cc_opt->option == SCTP_CC_OPT_USE_DCCC_ECN) { 1460 if ((cc_opt->aid_value.assoc_value != 0) && 1461 (cc_opt->aid_value.assoc_value != 1)) { 1462 return (EINVAL); 1463 } 1464 TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { 1465 net->cc_mod.rtcc.use_dccc_ecn = cc_opt->aid_value.assoc_value; 1466 } 1467 } else if (cc_opt->option == SCTP_CC_OPT_STEADY_STEP) { 1468 TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { 1469 net->cc_mod.rtcc.steady_step = cc_opt->aid_value.assoc_value; 1470 } 1471 } else { 1472 return (EINVAL); 1473 } 1474 } else { 1475 /* a get */ 1476 if (cc_opt->option == SCTP_CC_OPT_RTCC_SETMODE) { 1477 net = TAILQ_FIRST(&stcb->asoc.nets); 1478 if (net == NULL) { 1479 return (EFAULT); 1480 } 1481 cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.ret_from_eq; 1482 } else if (cc_opt->option == SCTP_CC_OPT_USE_DCCC_ECN) { 1483 net = TAILQ_FIRST(&stcb->asoc.nets); 1484 if (net == NULL) { 1485 return (EFAULT); 1486 } 1487 cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.use_dccc_ecn; 1488 } else if (cc_opt->option == SCTP_CC_OPT_STEADY_STEP) { 1489 net = TAILQ_FIRST(&stcb->asoc.nets); 1490 if (net == NULL) { 1491 return (EFAULT); 1492 } 1493 cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.steady_step; 1494 } else { 1495 return (EINVAL); 1496 } 1497 } 1498 return (0); 1499 } 1500 1501 static void 1502 sctp_cwnd_update_rtcc_packet_transmitted(struct sctp_tcb *stcb SCTP_UNUSED, 1503 struct sctp_nets *net) 1504 { 1505 if (net->cc_mod.rtcc.tls_needs_set == 0) { 1506 SCTP_GETPTIME_TIMEVAL(&net->cc_mod.rtcc.tls); 1507 net->cc_mod.rtcc.tls_needs_set = 2; 1508 } 1509 } 1510 1511 static void 1512 sctp_cwnd_update_rtcc_after_sack(struct sctp_tcb *stcb, 1513 struct sctp_association *asoc, 1514 int accum_moved, int reneged_all, int will_exit) 1515 { 1516 /* Passing a one argument at the last enables the rtcc algorithm */ 1517 sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 1); 1518 } 1519 1520 static void 1521 sctp_rtt_rtcc_calculated(struct sctp_tcb *stcb SCTP_UNUSED, 1522 struct sctp_nets *net, 1523 struct timeval *now SCTP_UNUSED) 1524 { 1525 net->cc_mod.rtcc.rtt_set_this_sack = 1; 1526 } 1527 1528 /* Here starts Sally Floyds HS-TCP */ 1529 1530 struct sctp_hs_raise_drop { 1531 int32_t cwnd; 1532 int8_t increase; 1533 int8_t drop_percent; 1534 }; 1535 1536 #define SCTP_HS_TABLE_SIZE 73 1537 1538 static const struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = { 1539 {38, 1, 50}, /* 0 */ 1540 {118, 2, 44}, /* 1 */ 1541 {221, 3, 41}, /* 2 */ 1542 {347, 4, 38}, /* 3 */ 1543 {495, 5, 37}, /* 4 */ 1544 {663, 6, 35}, /* 5 */ 1545 {851, 7, 34}, /* 6 */ 1546 {1058, 8, 33}, /* 7 */ 1547 {1284, 9, 32}, /* 8 */ 1548 {1529, 10, 31}, /* 9 */ 1549 {1793, 11, 30}, /* 10 */ 1550 {2076, 12, 29}, /* 11 */ 1551 {2378, 13, 28}, /* 12 */ 1552 {2699, 14, 28}, /* 13 */ 1553 {3039, 15, 27}, /* 14 */ 1554 {3399, 16, 27}, /* 15 */ 1555 {3778, 17, 26}, /* 16 */ 1556 {4177, 18, 26}, /* 17 */ 1557 {4596, 19, 25}, /* 18 */ 1558 {5036, 20, 25}, /* 19 */ 1559 {5497, 21, 24}, /* 20 */ 1560 {5979, 22, 24}, /* 21 */ 1561 {6483, 23, 23}, /* 22 */ 1562 {7009, 24, 23}, /* 23 */ 1563 {7558, 25, 22}, /* 24 */ 1564 {8130, 26, 22}, /* 25 */ 1565 {8726, 27, 22}, /* 26 */ 1566 {9346, 28, 21}, /* 27 */ 1567 {9991, 29, 21}, /* 28 */ 1568 {10661, 30, 21}, /* 29 */ 1569 {11358, 31, 20}, /* 30 */ 1570 {12082, 32, 20}, /* 31 */ 1571 {12834, 33, 20}, /* 32 */ 1572 {13614, 34, 19}, /* 33 */ 1573 {14424, 35, 19}, /* 34 */ 1574 {15265, 36, 19}, /* 35 */ 1575 {16137, 37, 19}, /* 36 */ 1576 {17042, 38, 18}, /* 37 */ 1577 {17981, 39, 18}, /* 38 */ 1578 {18955, 40, 18}, /* 39 */ 1579 {19965, 41, 17}, /* 40 */ 1580 {21013, 42, 17}, /* 41 */ 1581 {22101, 43, 17}, /* 42 */ 1582 {23230, 44, 17}, /* 43 */ 1583 {24402, 45, 16}, /* 44 */ 1584 {25618, 46, 16}, /* 45 */ 1585 {26881, 47, 16}, /* 46 */ 1586 {28193, 48, 16}, /* 47 */ 1587 {29557, 49, 15}, /* 48 */ 1588 {30975, 50, 15}, /* 49 */ 1589 {32450, 51, 15}, /* 50 */ 1590 {33986, 52, 15}, /* 51 */ 1591 {35586, 53, 14}, /* 52 */ 1592 {37253, 54, 14}, /* 53 */ 1593 {38992, 55, 14}, /* 54 */ 1594 {40808, 56, 14}, /* 55 */ 1595 {42707, 57, 13}, /* 56 */ 1596 {44694, 58, 13}, /* 57 */ 1597 {46776, 59, 13}, /* 58 */ 1598 {48961, 60, 13}, /* 59 */ 1599 {51258, 61, 13}, /* 60 */ 1600 {53677, 62, 12}, /* 61 */ 1601 {56230, 63, 12}, /* 62 */ 1602 {58932, 64, 12}, /* 63 */ 1603 {61799, 65, 12}, /* 64 */ 1604 {64851, 66, 11}, /* 65 */ 1605 {68113, 67, 11}, /* 66 */ 1606 {71617, 68, 11}, /* 67 */ 1607 {75401, 69, 10}, /* 68 */ 1608 {79517, 70, 10}, /* 69 */ 1609 {84035, 71, 10}, /* 70 */ 1610 {89053, 72, 10}, /* 71 */ 1611 {94717, 73, 9} /* 72 */ 1612 }; 1613 1614 static void 1615 sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net) 1616 { 1617 int cur_val, i, indx, incr; 1618 int old_cwnd = net->cwnd; 1619 1620 cur_val = net->cwnd >> 10; 1621 indx = SCTP_HS_TABLE_SIZE - 1; 1622 1623 if (cur_val < sctp_cwnd_adjust[0].cwnd) { 1624 /* normal mode */ 1625 if (net->net_ack > net->mtu) { 1626 net->cwnd += net->mtu; 1627 } else { 1628 net->cwnd += net->net_ack; 1629 } 1630 } else { 1631 for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) { 1632 if (cur_val < sctp_cwnd_adjust[i].cwnd) { 1633 indx = i; 1634 break; 1635 } 1636 } 1637 net->last_hs_used = indx; 1638 incr = (((int32_t)sctp_cwnd_adjust[indx].increase) << 10); 1639 net->cwnd += incr; 1640 } 1641 sctp_enforce_cwnd_limit(&stcb->asoc, net); 1642 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1643 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SS); 1644 } 1645 } 1646 1647 static void 1648 sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net) 1649 { 1650 int cur_val, i, indx; 1651 int old_cwnd = net->cwnd; 1652 1653 cur_val = net->cwnd >> 10; 1654 if (cur_val < sctp_cwnd_adjust[0].cwnd) { 1655 /* normal mode */ 1656 net->ssthresh = net->cwnd / 2; 1657 if (net->ssthresh < (net->mtu * 2)) { 1658 net->ssthresh = 2 * net->mtu; 1659 } 1660 net->cwnd = net->ssthresh; 1661 } else { 1662 /* drop by the proper amount */ 1663 net->ssthresh = net->cwnd - (int)((net->cwnd / 100) * 1664 (int32_t)sctp_cwnd_adjust[net->last_hs_used].drop_percent); 1665 net->cwnd = net->ssthresh; 1666 /* now where are we */ 1667 indx = net->last_hs_used; 1668 cur_val = net->cwnd >> 10; 1669 /* reset where we are in the table */ 1670 if (cur_val < sctp_cwnd_adjust[0].cwnd) { 1671 /* feel out of hs */ 1672 net->last_hs_used = 0; 1673 } else { 1674 for (i = indx; i >= 1; i--) { 1675 if (cur_val > sctp_cwnd_adjust[i - 1].cwnd) { 1676 break; 1677 } 1678 } 1679 net->last_hs_used = indx; 1680 } 1681 } 1682 sctp_enforce_cwnd_limit(&stcb->asoc, net); 1683 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1684 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR); 1685 } 1686 } 1687 1688 static void 1689 sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb, 1690 struct sctp_association *asoc) 1691 { 1692 struct sctp_nets *net; 1693 1694 /* 1695 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) && 1696 * (net->fast_retran_loss_recovery == 0))) 1697 */ 1698 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 1699 if ((asoc->fast_retran_loss_recovery == 0) || 1700 (asoc->sctp_cmt_on_off > 0)) { 1701 /* out of a RFC2582 Fast recovery window? */ 1702 if (net->net_ack > 0) { 1703 /* 1704 * per section 7.2.3, are there any 1705 * destinations that had a fast retransmit 1706 * to them. If so what we need to do is 1707 * adjust ssthresh and cwnd. 1708 */ 1709 struct sctp_tmit_chunk *lchk; 1710 1711 sctp_hs_cwnd_decrease(stcb, net); 1712 1713 lchk = TAILQ_FIRST(&asoc->send_queue); 1714 1715 net->partial_bytes_acked = 0; 1716 /* Turn on fast recovery window */ 1717 asoc->fast_retran_loss_recovery = 1; 1718 if (lchk == NULL) { 1719 /* Mark end of the window */ 1720 asoc->fast_recovery_tsn = asoc->sending_seq - 1; 1721 } else { 1722 asoc->fast_recovery_tsn = lchk->rec.data.tsn - 1; 1723 } 1724 1725 /* 1726 * CMT fast recovery -- per destination 1727 * recovery variable. 1728 */ 1729 net->fast_retran_loss_recovery = 1; 1730 1731 if (lchk == NULL) { 1732 /* Mark end of the window */ 1733 net->fast_recovery_tsn = asoc->sending_seq - 1; 1734 } else { 1735 net->fast_recovery_tsn = lchk->rec.data.tsn - 1; 1736 } 1737 1738 sctp_timer_stop(SCTP_TIMER_TYPE_SEND, 1739 stcb->sctp_ep, stcb, net, 1740 SCTP_FROM_SCTP_CC_FUNCTIONS + SCTP_LOC_2); 1741 sctp_timer_start(SCTP_TIMER_TYPE_SEND, 1742 stcb->sctp_ep, stcb, net); 1743 } 1744 } else if (net->net_ack > 0) { 1745 /* 1746 * Mark a peg that we WOULD have done a cwnd 1747 * reduction but RFC2582 prevented this action. 1748 */ 1749 SCTP_STAT_INCR(sctps_fastretransinrtt); 1750 } 1751 } 1752 } 1753 1754 static void 1755 sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb, 1756 struct sctp_association *asoc, 1757 int accum_moved, int reneged_all SCTP_UNUSED, int will_exit) 1758 { 1759 struct sctp_nets *net; 1760 1761 /******************************/ 1762 /* update cwnd and Early FR */ 1763 /******************************/ 1764 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 1765 #ifdef JANA_CMT_FAST_RECOVERY 1766 /* 1767 * CMT fast recovery code. Need to debug. 1768 */ 1769 if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { 1770 if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) || 1771 SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) { 1772 net->will_exit_fast_recovery = 1; 1773 } 1774 } 1775 #endif 1776 /* if nothing was acked on this destination skip it */ 1777 if (net->net_ack == 0) { 1778 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 1779 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); 1780 } 1781 continue; 1782 } 1783 #ifdef JANA_CMT_FAST_RECOVERY 1784 /* 1785 * CMT fast recovery code 1786 */ 1787 /* 1788 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery 1789 * && net->will_exit_fast_recovery == 0) { @@@ Do something 1790 * } else if (sctp_cmt_on_off == 0 && 1791 * asoc->fast_retran_loss_recovery && will_exit == 0) { 1792 */ 1793 #endif 1794 1795 if (asoc->fast_retran_loss_recovery && 1796 (will_exit == 0) && 1797 (asoc->sctp_cmt_on_off == 0)) { 1798 /* 1799 * If we are in loss recovery we skip any cwnd 1800 * update 1801 */ 1802 return; 1803 } 1804 /* 1805 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has 1806 * moved. 1807 */ 1808 if (accum_moved || 1809 ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) { 1810 /* If the cumulative ack moved we can proceed */ 1811 if (net->cwnd <= net->ssthresh) { 1812 /* We are in slow start */ 1813 if (net->flight_size + net->net_ack >= net->cwnd) { 1814 sctp_hs_cwnd_increase(stcb, net); 1815 } else { 1816 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 1817 sctp_log_cwnd(stcb, net, net->net_ack, 1818 SCTP_CWND_LOG_NOADV_SS); 1819 } 1820 } 1821 } else { 1822 /* We are in congestion avoidance */ 1823 net->partial_bytes_acked += net->net_ack; 1824 if ((net->flight_size + net->net_ack >= net->cwnd) && 1825 (net->partial_bytes_acked >= net->cwnd)) { 1826 net->partial_bytes_acked -= net->cwnd; 1827 net->cwnd += net->mtu; 1828 sctp_enforce_cwnd_limit(asoc, net); 1829 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1830 sctp_log_cwnd(stcb, net, net->mtu, 1831 SCTP_CWND_LOG_FROM_CA); 1832 } 1833 } else { 1834 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 1835 sctp_log_cwnd(stcb, net, net->net_ack, 1836 SCTP_CWND_LOG_NOADV_CA); 1837 } 1838 } 1839 } 1840 } else { 1841 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 1842 sctp_log_cwnd(stcb, net, net->mtu, 1843 SCTP_CWND_LOG_NO_CUMACK); 1844 } 1845 } 1846 } 1847 } 1848 1849 /* 1850 * H-TCP congestion control. The algorithm is detailed in: 1851 * R.N.Shorten, D.J.Leith: 1852 * "H-TCP: TCP for high-speed and long-distance networks" 1853 * Proc. PFLDnet, Argonne, 2004. 1854 * http://www.hamilton.ie/net/htcp3.pdf 1855 */ 1856 1857 static int use_rtt_scaling = 1; 1858 static int use_bandwidth_switch = 1; 1859 1860 static inline int 1861 between(uint32_t seq1, uint32_t seq2, uint32_t seq3) 1862 { 1863 return (seq3 - seq2 >= seq1 - seq2); 1864 } 1865 1866 static inline uint32_t 1867 htcp_cong_time(struct htcp *ca) 1868 { 1869 return (sctp_get_tick_count() - ca->last_cong); 1870 } 1871 1872 static inline uint32_t 1873 htcp_ccount(struct htcp *ca) 1874 { 1875 return (ca->minRTT == 0 ? htcp_cong_time(ca) : htcp_cong_time(ca) / ca->minRTT); 1876 } 1877 1878 static inline void 1879 htcp_reset(struct htcp *ca) 1880 { 1881 ca->undo_last_cong = ca->last_cong; 1882 ca->undo_maxRTT = ca->maxRTT; 1883 ca->undo_old_maxB = ca->old_maxB; 1884 ca->last_cong = sctp_get_tick_count(); 1885 } 1886 1887 #ifdef SCTP_NOT_USED 1888 1889 static uint32_t 1890 htcp_cwnd_undo(struct sctp_tcb *stcb, struct sctp_nets *net) 1891 { 1892 net->cc_mod.htcp_ca.last_cong = net->cc_mod.htcp_ca.undo_last_cong; 1893 net->cc_mod.htcp_ca.maxRTT = net->cc_mod.htcp_ca.undo_maxRTT; 1894 net->cc_mod.htcp_ca.old_maxB = net->cc_mod.htcp_ca.undo_old_maxB; 1895 return (max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->cc_mod.htcp_ca.beta) * net->mtu)); 1896 } 1897 1898 #endif 1899 1900 static inline void 1901 measure_rtt(struct sctp_nets *net) 1902 { 1903 uint32_t srtt = net->lastsa >> SCTP_RTT_SHIFT; 1904 1905 /* keep track of minimum RTT seen so far, minRTT is zero at first */ 1906 if (net->cc_mod.htcp_ca.minRTT > srtt || !net->cc_mod.htcp_ca.minRTT) 1907 net->cc_mod.htcp_ca.minRTT = srtt; 1908 1909 /* max RTT */ 1910 if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->cc_mod.htcp_ca) > 3) { 1911 if (net->cc_mod.htcp_ca.maxRTT < net->cc_mod.htcp_ca.minRTT) 1912 net->cc_mod.htcp_ca.maxRTT = net->cc_mod.htcp_ca.minRTT; 1913 if (net->cc_mod.htcp_ca.maxRTT < srtt && srtt <= net->cc_mod.htcp_ca.maxRTT + sctp_msecs_to_ticks(20)) 1914 net->cc_mod.htcp_ca.maxRTT = srtt; 1915 } 1916 } 1917 1918 static void 1919 measure_achieved_throughput(struct sctp_nets *net) 1920 { 1921 uint32_t now = sctp_get_tick_count(); 1922 1923 if (net->fast_retran_ip == 0) 1924 net->cc_mod.htcp_ca.bytes_acked = net->net_ack; 1925 1926 if (!use_bandwidth_switch) 1927 return; 1928 1929 /* achieved throughput calculations */ 1930 /* JRS - not 100% sure of this statement */ 1931 if (net->fast_retran_ip == 1) { 1932 net->cc_mod.htcp_ca.bytecount = 0; 1933 net->cc_mod.htcp_ca.lasttime = now; 1934 return; 1935 } 1936 1937 net->cc_mod.htcp_ca.bytecount += net->net_ack; 1938 if ((net->cc_mod.htcp_ca.bytecount >= net->cwnd - (((net->cc_mod.htcp_ca.alpha >> 7) ? (net->cc_mod.htcp_ca.alpha >> 7) : 1) * net->mtu)) && 1939 (now - net->cc_mod.htcp_ca.lasttime >= net->cc_mod.htcp_ca.minRTT) && 1940 (net->cc_mod.htcp_ca.minRTT > 0)) { 1941 uint32_t cur_Bi = net->cc_mod.htcp_ca.bytecount / net->mtu * hz / (now - net->cc_mod.htcp_ca.lasttime); 1942 1943 if (htcp_ccount(&net->cc_mod.htcp_ca) <= 3) { 1944 /* just after backoff */ 1945 net->cc_mod.htcp_ca.minB = net->cc_mod.htcp_ca.maxB = net->cc_mod.htcp_ca.Bi = cur_Bi; 1946 } else { 1947 net->cc_mod.htcp_ca.Bi = (3 * net->cc_mod.htcp_ca.Bi + cur_Bi) / 4; 1948 if (net->cc_mod.htcp_ca.Bi > net->cc_mod.htcp_ca.maxB) 1949 net->cc_mod.htcp_ca.maxB = net->cc_mod.htcp_ca.Bi; 1950 if (net->cc_mod.htcp_ca.minB > net->cc_mod.htcp_ca.maxB) 1951 net->cc_mod.htcp_ca.minB = net->cc_mod.htcp_ca.maxB; 1952 } 1953 net->cc_mod.htcp_ca.bytecount = 0; 1954 net->cc_mod.htcp_ca.lasttime = now; 1955 } 1956 } 1957 1958 static inline void 1959 htcp_beta_update(struct htcp *ca, uint32_t minRTT, uint32_t maxRTT) 1960 { 1961 if (use_bandwidth_switch) { 1962 uint32_t maxB = ca->maxB; 1963 uint32_t old_maxB = ca->old_maxB; 1964 1965 ca->old_maxB = ca->maxB; 1966 1967 if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) { 1968 ca->beta = BETA_MIN; 1969 ca->modeswitch = 0; 1970 return; 1971 } 1972 } 1973 1974 if (ca->modeswitch && minRTT > sctp_msecs_to_ticks(10) && maxRTT) { 1975 ca->beta = (minRTT << 7) / maxRTT; 1976 if (ca->beta < BETA_MIN) 1977 ca->beta = BETA_MIN; 1978 else if (ca->beta > BETA_MAX) 1979 ca->beta = BETA_MAX; 1980 } else { 1981 ca->beta = BETA_MIN; 1982 ca->modeswitch = 1; 1983 } 1984 } 1985 1986 static inline void 1987 htcp_alpha_update(struct htcp *ca) 1988 { 1989 uint32_t minRTT = ca->minRTT; 1990 uint32_t factor = 1; 1991 uint32_t diff = htcp_cong_time(ca); 1992 1993 if (diff > (uint32_t)hz) { 1994 diff -= hz; 1995 factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / hz)) / hz; 1996 } 1997 1998 if (use_rtt_scaling && minRTT) { 1999 uint32_t scale = (hz << 3) / (10 * minRTT); 2000 2001 scale = min(max(scale, 1U << 2), 10U << 3); /* clamping ratio to 2002 * interval [0.5,10]<<3 */ 2003 factor = (factor << 3) / scale; 2004 if (factor != 0) 2005 factor = 1; 2006 } 2007 2008 ca->alpha = 2 * factor * ((1 << 7) - ca->beta); 2009 if (ca->alpha != 0) 2010 ca->alpha = ALPHA_BASE; 2011 } 2012 2013 /* After we have the rtt data to calculate beta, we'd still prefer to wait one 2014 * rtt before we adjust our beta to ensure we are working from a consistent 2015 * data. 2016 * 2017 * This function should be called when we hit a congestion event since only at 2018 * that point do we really have a real sense of maxRTT (the queues en route 2019 * were getting just too full now). 2020 */ 2021 static void 2022 htcp_param_update(struct sctp_nets *net) 2023 { 2024 uint32_t minRTT = net->cc_mod.htcp_ca.minRTT; 2025 uint32_t maxRTT = net->cc_mod.htcp_ca.maxRTT; 2026 2027 htcp_beta_update(&net->cc_mod.htcp_ca, minRTT, maxRTT); 2028 htcp_alpha_update(&net->cc_mod.htcp_ca); 2029 2030 /* 2031 * add slowly fading memory for maxRTT to accommodate routing 2032 * changes etc 2033 */ 2034 if (minRTT > 0 && maxRTT > minRTT) 2035 net->cc_mod.htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100; 2036 } 2037 2038 static uint32_t 2039 htcp_recalc_ssthresh(struct sctp_nets *net) 2040 { 2041 htcp_param_update(net); 2042 return (max(((net->cwnd / net->mtu * net->cc_mod.htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu)); 2043 } 2044 2045 static void 2046 htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net) 2047 { 2048 /*- 2049 * How to handle these functions? 2050 * if (!tcp_is_cwnd_limited(sk, in_flight)) RRS - good question. 2051 * return; 2052 */ 2053 if (net->cwnd <= net->ssthresh) { 2054 /* We are in slow start */ 2055 if (net->flight_size + net->net_ack >= net->cwnd) { 2056 if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) { 2057 net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)); 2058 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 2059 sctp_log_cwnd(stcb, net, net->mtu, 2060 SCTP_CWND_LOG_FROM_SS); 2061 } 2062 2063 } else { 2064 net->cwnd += net->net_ack; 2065 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 2066 sctp_log_cwnd(stcb, net, net->net_ack, 2067 SCTP_CWND_LOG_FROM_SS); 2068 } 2069 } 2070 sctp_enforce_cwnd_limit(&stcb->asoc, net); 2071 } else { 2072 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 2073 sctp_log_cwnd(stcb, net, net->net_ack, 2074 SCTP_CWND_LOG_NOADV_SS); 2075 } 2076 } 2077 } else { 2078 measure_rtt(net); 2079 2080 /* 2081 * In dangerous area, increase slowly. In theory this is 2082 * net->cwnd += alpha / net->cwnd 2083 */ 2084 /* What is snd_cwnd_cnt?? */ 2085 if (((net->partial_bytes_acked / net->mtu * net->cc_mod.htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) { 2086 /*- 2087 * Does SCTP have a cwnd clamp? 2088 * if (net->snd_cwnd < net->snd_cwnd_clamp) - Nope (RRS). 2089 */ 2090 net->cwnd += net->mtu; 2091 net->partial_bytes_acked = 0; 2092 sctp_enforce_cwnd_limit(&stcb->asoc, net); 2093 htcp_alpha_update(&net->cc_mod.htcp_ca); 2094 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 2095 sctp_log_cwnd(stcb, net, net->mtu, 2096 SCTP_CWND_LOG_FROM_CA); 2097 } 2098 } else { 2099 net->partial_bytes_acked += net->net_ack; 2100 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 2101 sctp_log_cwnd(stcb, net, net->net_ack, 2102 SCTP_CWND_LOG_NOADV_CA); 2103 } 2104 } 2105 2106 net->cc_mod.htcp_ca.bytes_acked = net->mtu; 2107 } 2108 } 2109 2110 #ifdef SCTP_NOT_USED 2111 /* Lower bound on congestion window. */ 2112 static uint32_t 2113 htcp_min_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net) 2114 { 2115 return (net->ssthresh); 2116 } 2117 #endif 2118 2119 static void 2120 htcp_init(struct sctp_nets *net) 2121 { 2122 memset(&net->cc_mod.htcp_ca, 0, sizeof(struct htcp)); 2123 net->cc_mod.htcp_ca.alpha = ALPHA_BASE; 2124 net->cc_mod.htcp_ca.beta = BETA_MIN; 2125 net->cc_mod.htcp_ca.bytes_acked = net->mtu; 2126 net->cc_mod.htcp_ca.last_cong = sctp_get_tick_count(); 2127 } 2128 2129 static void 2130 sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) 2131 { 2132 /* 2133 * We take the max of the burst limit times a MTU or the 2134 * INITIAL_CWND. We then limit this to 4 MTU's of sending. 2135 */ 2136 net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); 2137 net->ssthresh = stcb->asoc.peers_rwnd; 2138 sctp_enforce_cwnd_limit(&stcb->asoc, net); 2139 htcp_init(net); 2140 2141 if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { 2142 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION); 2143 } 2144 } 2145 2146 static void 2147 sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb, 2148 struct sctp_association *asoc, 2149 int accum_moved, int reneged_all SCTP_UNUSED, int will_exit) 2150 { 2151 struct sctp_nets *net; 2152 2153 /******************************/ 2154 /* update cwnd and Early FR */ 2155 /******************************/ 2156 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 2157 #ifdef JANA_CMT_FAST_RECOVERY 2158 /* 2159 * CMT fast recovery code. Need to debug. 2160 */ 2161 if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { 2162 if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) || 2163 SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) { 2164 net->will_exit_fast_recovery = 1; 2165 } 2166 } 2167 #endif 2168 /* if nothing was acked on this destination skip it */ 2169 if (net->net_ack == 0) { 2170 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 2171 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); 2172 } 2173 continue; 2174 } 2175 #ifdef JANA_CMT_FAST_RECOVERY 2176 /* 2177 * CMT fast recovery code 2178 */ 2179 /* 2180 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery 2181 * && net->will_exit_fast_recovery == 0) { @@@ Do something 2182 * } else if (sctp_cmt_on_off == 0 && 2183 * asoc->fast_retran_loss_recovery && will_exit == 0) { 2184 */ 2185 #endif 2186 2187 if (asoc->fast_retran_loss_recovery && 2188 will_exit == 0 && 2189 (asoc->sctp_cmt_on_off == 0)) { 2190 /* 2191 * If we are in loss recovery we skip any cwnd 2192 * update 2193 */ 2194 return; 2195 } 2196 /* 2197 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has 2198 * moved. 2199 */ 2200 if (accum_moved || 2201 ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) { 2202 htcp_cong_avoid(stcb, net); 2203 measure_achieved_throughput(net); 2204 } else { 2205 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 2206 sctp_log_cwnd(stcb, net, net->mtu, 2207 SCTP_CWND_LOG_NO_CUMACK); 2208 } 2209 } 2210 } 2211 } 2212 2213 static void 2214 sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb, 2215 struct sctp_association *asoc) 2216 { 2217 struct sctp_nets *net; 2218 2219 /* 2220 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) && 2221 * (net->fast_retran_loss_recovery == 0))) 2222 */ 2223 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 2224 if ((asoc->fast_retran_loss_recovery == 0) || 2225 (asoc->sctp_cmt_on_off > 0)) { 2226 /* out of a RFC2582 Fast recovery window? */ 2227 if (net->net_ack > 0) { 2228 /* 2229 * per section 7.2.3, are there any 2230 * destinations that had a fast retransmit 2231 * to them. If so what we need to do is 2232 * adjust ssthresh and cwnd. 2233 */ 2234 struct sctp_tmit_chunk *lchk; 2235 int old_cwnd = net->cwnd; 2236 2237 /* JRS - reset as if state were changed */ 2238 htcp_reset(&net->cc_mod.htcp_ca); 2239 net->ssthresh = htcp_recalc_ssthresh(net); 2240 net->cwnd = net->ssthresh; 2241 sctp_enforce_cwnd_limit(asoc, net); 2242 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 2243 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), 2244 SCTP_CWND_LOG_FROM_FR); 2245 } 2246 lchk = TAILQ_FIRST(&asoc->send_queue); 2247 2248 net->partial_bytes_acked = 0; 2249 /* Turn on fast recovery window */ 2250 asoc->fast_retran_loss_recovery = 1; 2251 if (lchk == NULL) { 2252 /* Mark end of the window */ 2253 asoc->fast_recovery_tsn = asoc->sending_seq - 1; 2254 } else { 2255 asoc->fast_recovery_tsn = lchk->rec.data.tsn - 1; 2256 } 2257 2258 /* 2259 * CMT fast recovery -- per destination 2260 * recovery variable. 2261 */ 2262 net->fast_retran_loss_recovery = 1; 2263 2264 if (lchk == NULL) { 2265 /* Mark end of the window */ 2266 net->fast_recovery_tsn = asoc->sending_seq - 1; 2267 } else { 2268 net->fast_recovery_tsn = lchk->rec.data.tsn - 1; 2269 } 2270 2271 sctp_timer_stop(SCTP_TIMER_TYPE_SEND, 2272 stcb->sctp_ep, stcb, net, 2273 SCTP_FROM_SCTP_CC_FUNCTIONS + SCTP_LOC_3); 2274 sctp_timer_start(SCTP_TIMER_TYPE_SEND, 2275 stcb->sctp_ep, stcb, net); 2276 } 2277 } else if (net->net_ack > 0) { 2278 /* 2279 * Mark a peg that we WOULD have done a cwnd 2280 * reduction but RFC2582 prevented this action. 2281 */ 2282 SCTP_STAT_INCR(sctps_fastretransinrtt); 2283 } 2284 } 2285 } 2286 2287 static void 2288 sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb, 2289 struct sctp_nets *net) 2290 { 2291 int old_cwnd = net->cwnd; 2292 2293 /* JRS - reset as if the state were being changed to timeout */ 2294 htcp_reset(&net->cc_mod.htcp_ca); 2295 net->ssthresh = htcp_recalc_ssthresh(net); 2296 net->cwnd = net->mtu; 2297 net->partial_bytes_acked = 0; 2298 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 2299 sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX); 2300 } 2301 } 2302 2303 static void 2304 sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, 2305 struct sctp_nets *net, int in_window, int num_pkt_lost SCTP_UNUSED) 2306 { 2307 int old_cwnd; 2308 2309 old_cwnd = net->cwnd; 2310 2311 /* JRS - reset hctp as if state changed */ 2312 if (in_window == 0) { 2313 htcp_reset(&net->cc_mod.htcp_ca); 2314 SCTP_STAT_INCR(sctps_ecnereducedcwnd); 2315 net->ssthresh = htcp_recalc_ssthresh(net); 2316 if (net->ssthresh < net->mtu) { 2317 net->ssthresh = net->mtu; 2318 /* here back off the timer as well, to slow us down */ 2319 net->RTO <<= 1; 2320 } 2321 net->cwnd = net->ssthresh; 2322 sctp_enforce_cwnd_limit(&stcb->asoc, net); 2323 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 2324 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); 2325 } 2326 } 2327 } 2328 2329 const struct sctp_cc_functions sctp_cc_functions[] = { 2330 { 2331 .sctp_set_initial_cc_param = sctp_set_initial_cc_param, 2332 .sctp_cwnd_update_after_sack = sctp_cwnd_update_after_sack, 2333 .sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common, 2334 .sctp_cwnd_update_after_fr = sctp_cwnd_update_after_fr, 2335 .sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout, 2336 .sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_after_ecn_echo, 2337 .sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped, 2338 .sctp_cwnd_update_after_output = sctp_cwnd_update_after_output, 2339 }, 2340 { 2341 .sctp_set_initial_cc_param = sctp_set_initial_cc_param, 2342 .sctp_cwnd_update_after_sack = sctp_hs_cwnd_update_after_sack, 2343 .sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common, 2344 .sctp_cwnd_update_after_fr = sctp_hs_cwnd_update_after_fr, 2345 .sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout, 2346 .sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_after_ecn_echo, 2347 .sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped, 2348 .sctp_cwnd_update_after_output = sctp_cwnd_update_after_output, 2349 }, 2350 { 2351 .sctp_set_initial_cc_param = sctp_htcp_set_initial_cc_param, 2352 .sctp_cwnd_update_after_sack = sctp_htcp_cwnd_update_after_sack, 2353 .sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common, 2354 .sctp_cwnd_update_after_fr = sctp_htcp_cwnd_update_after_fr, 2355 .sctp_cwnd_update_after_timeout = sctp_htcp_cwnd_update_after_timeout, 2356 .sctp_cwnd_update_after_ecn_echo = sctp_htcp_cwnd_update_after_ecn_echo, 2357 .sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped, 2358 .sctp_cwnd_update_after_output = sctp_cwnd_update_after_output, 2359 }, 2360 { 2361 .sctp_set_initial_cc_param = sctp_set_rtcc_initial_cc_param, 2362 .sctp_cwnd_update_after_sack = sctp_cwnd_update_rtcc_after_sack, 2363 .sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common, 2364 .sctp_cwnd_update_after_fr = sctp_cwnd_update_after_fr, 2365 .sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout, 2366 .sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_rtcc_after_ecn_echo, 2367 .sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped, 2368 .sctp_cwnd_update_after_output = sctp_cwnd_update_after_output, 2369 .sctp_cwnd_update_packet_transmitted = sctp_cwnd_update_rtcc_packet_transmitted, 2370 .sctp_cwnd_update_tsn_acknowledged = sctp_cwnd_update_rtcc_tsn_acknowledged, 2371 .sctp_cwnd_new_transmission_begins = sctp_cwnd_new_rtcc_transmission_begins, 2372 .sctp_cwnd_prepare_net_for_sack = sctp_cwnd_prepare_rtcc_net_for_sack, 2373 .sctp_cwnd_socket_option = sctp_cwnd_rtcc_socket_option, 2374 .sctp_rtt_calculated = sctp_rtt_rtcc_calculated 2375 } 2376 }; 2377