1 /*- 2 * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. 3 * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved. 4 * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * a) Redistributions of source code must retain the above copyright notice, 10 * this list of conditions and the following disclaimer. 11 * 12 * b) Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in 14 * the documentation and/or other materials provided with the distribution. 15 * 16 * c) Neither the name of Cisco Systems, Inc. nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 22 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 30 * THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <netinet/sctp_os.h> 34 #include <netinet/sctp_var.h> 35 #include <netinet/sctp_sysctl.h> 36 #include <netinet/sctp_pcb.h> 37 #include <netinet/sctp_header.h> 38 #include <netinet/sctputil.h> 39 #include <netinet/sctp_output.h> 40 #include <netinet/sctp_input.h> 41 #include <netinet/sctp_indata.h> 42 #include <netinet/sctp_uio.h> 43 #include <netinet/sctp_timer.h> 44 #include <netinet/sctp_auth.h> 45 #include <netinet/sctp_asconf.h> 46 #include <netinet/sctp_dtrace_declare.h> 47 #include <sys/cdefs.h> 48 __FBSDID("$FreeBSD$"); 49 50 #define SHIFT_MPTCP_MULTI_N 40 51 #define SHIFT_MPTCP_MULTI_Z 16 52 #define SHIFT_MPTCP_MULTI 8 53 54 static void 55 sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) 56 { 57 struct sctp_association *assoc; 58 uint32_t cwnd_in_mtu; 59 60 assoc = &stcb->asoc; 61 cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd); 62 if (cwnd_in_mtu == 0) { 63 /* Using 0 means that the value of RFC 4960 is used. */ 64 net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); 65 } else { 66 /* 67 * We take the minimum of the burst limit and the initial 68 * congestion window. 69 */ 70 if ((assoc->max_burst > 0) && (cwnd_in_mtu > assoc->max_burst)) 71 cwnd_in_mtu = assoc->max_burst; 72 net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu; 73 } 74 if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) || 75 (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) { 76 /* In case of resource pooling initialize appropriately */ 77 net->cwnd /= assoc->numnets; 78 if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) { 79 net->cwnd = net->mtu - sizeof(struct sctphdr); 80 } 81 } 82 net->ssthresh = assoc->peers_rwnd; 83 84 SDT_PROBE(sctp, cwnd, net, init, 85 stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, 86 0, net->cwnd); 87 if (SCTP_BASE_SYSCTL(sctp_logging_level) & 88 (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { 89 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION); 90 } 91 } 92 93 static void 94 sctp_cwnd_update_after_fr(struct sctp_tcb *stcb, 95 struct sctp_association *asoc) 96 { 97 struct sctp_nets *net; 98 uint32_t t_ssthresh, t_cwnd; 99 uint64_t t_ucwnd_sbw; 100 101 /* MT FIXME: Don't compute this over and over again */ 102 t_ssthresh = 0; 103 t_cwnd = 0; 104 t_ucwnd_sbw = 0; 105 if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) || 106 (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) { 107 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 108 t_ssthresh += net->ssthresh; 109 t_cwnd += net->cwnd; 110 if (net->lastsa > 0) { 111 t_ucwnd_sbw += (uint64_t) net->cwnd / (uint64_t) net->lastsa; 112 } 113 } 114 if (t_ucwnd_sbw == 0) { 115 t_ucwnd_sbw = 1; 116 } 117 } 118 /*- 119 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) && 120 * (net->fast_retran_loss_recovery == 0))) 121 */ 122 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 123 if ((asoc->fast_retran_loss_recovery == 0) || 124 (asoc->sctp_cmt_on_off > 0)) { 125 /* out of a RFC2582 Fast recovery window? */ 126 if (net->net_ack > 0) { 127 /* 128 * per section 7.2.3, are there any 129 * destinations that had a fast retransmit 130 * to them. If so what we need to do is 131 * adjust ssthresh and cwnd. 132 */ 133 struct sctp_tmit_chunk *lchk; 134 int old_cwnd = net->cwnd; 135 136 if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) || 137 (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) { 138 if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) { 139 net->ssthresh = (uint32_t) (((uint64_t) 4 * 140 (uint64_t) net->mtu * 141 (uint64_t) net->ssthresh) / 142 (uint64_t) t_ssthresh); 143 144 } 145 if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2) { 146 uint32_t srtt; 147 148 srtt = net->lastsa; 149 /* 150 * lastsa>>3; we don't need 151 * to devide ... 152 */ 153 if (srtt == 0) { 154 srtt = 1; 155 } 156 /* 157 * Short Version => Equal to 158 * Contel Version MBe 159 */ 160 net->ssthresh = (uint32_t) (((uint64_t) 4 * 161 (uint64_t) net->mtu * 162 (uint64_t) net->cwnd) / 163 ((uint64_t) srtt * 164 t_ucwnd_sbw)); 165 /* INCREASE FACTOR */ ; 166 } 167 if ((net->cwnd > t_cwnd / 2) && 168 (net->ssthresh < net->cwnd - t_cwnd / 2)) { 169 net->ssthresh = net->cwnd - t_cwnd / 2; 170 } 171 if (net->ssthresh < net->mtu) { 172 net->ssthresh = net->mtu; 173 } 174 } else { 175 net->ssthresh = net->cwnd / 2; 176 if (net->ssthresh < (net->mtu * 2)) { 177 net->ssthresh = 2 * net->mtu; 178 } 179 } 180 net->cwnd = net->ssthresh; 181 SDT_PROBE(sctp, cwnd, net, fr, 182 stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, 183 old_cwnd, net->cwnd); 184 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 185 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), 186 SCTP_CWND_LOG_FROM_FR); 187 } 188 lchk = TAILQ_FIRST(&asoc->send_queue); 189 190 net->partial_bytes_acked = 0; 191 /* Turn on fast recovery window */ 192 asoc->fast_retran_loss_recovery = 1; 193 if (lchk == NULL) { 194 /* Mark end of the window */ 195 asoc->fast_recovery_tsn = asoc->sending_seq - 1; 196 } else { 197 asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; 198 } 199 200 /* 201 * CMT fast recovery -- per destination 202 * recovery variable. 203 */ 204 net->fast_retran_loss_recovery = 1; 205 206 if (lchk == NULL) { 207 /* Mark end of the window */ 208 net->fast_recovery_tsn = asoc->sending_seq - 1; 209 } else { 210 net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; 211 } 212 213 sctp_timer_stop(SCTP_TIMER_TYPE_SEND, 214 stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); 215 sctp_timer_start(SCTP_TIMER_TYPE_SEND, 216 stcb->sctp_ep, stcb, net); 217 } 218 } else if (net->net_ack > 0) { 219 /* 220 * Mark a peg that we WOULD have done a cwnd 221 * reduction but RFC2582 prevented this action. 222 */ 223 SCTP_STAT_INCR(sctps_fastretransinrtt); 224 } 225 } 226 } 227 228 /* Defines for instantaneous bw decisions */ 229 #define SCTP_INST_LOOSING 1 /* Loosing to other flows */ 230 #define SCTP_INST_NEUTRAL 2 /* Neutral, no indication */ 231 #define SCTP_INST_GAINING 3 /* Gaining, step down possible */ 232 233 234 static int 235 cc_bw_same(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, 236 uint64_t rtt_offset, uint64_t vtag, uint8_t inst_ind) 237 { 238 uint64_t oth, probepoint; 239 240 probepoint = (((uint64_t) net->cwnd) << 32); 241 if (net->rtt > net->cc_mod.rtcc.lbw_rtt + rtt_offset) { 242 /* 243 * rtt increased we don't update bw.. so we don't update the 244 * rtt either. 245 */ 246 /* Probe point 5 */ 247 probepoint |= ((5 << 16) | 1); 248 SDT_PROBE(sctp, cwnd, net, rttvar, 249 vtag, 250 ((net->cc_mod.rtcc.lbw << 32) | nbw), 251 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 252 net->flight_size, 253 probepoint); 254 if ((net->cc_mod.rtcc.steady_step) && (inst_ind != SCTP_INST_LOOSING)) { 255 if (net->cc_mod.rtcc.last_step_state == 5) 256 net->cc_mod.rtcc.step_cnt++; 257 else 258 net->cc_mod.rtcc.step_cnt = 1; 259 net->cc_mod.rtcc.last_step_state = 5; 260 if ((net->cc_mod.rtcc.step_cnt == net->cc_mod.rtcc.steady_step) || 261 ((net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step) && 262 ((net->cc_mod.rtcc.step_cnt % net->cc_mod.rtcc.steady_step) == 0))) { 263 /* Try a step down */ 264 oth = net->cc_mod.rtcc.vol_reduce; 265 oth <<= 16; 266 oth |= net->cc_mod.rtcc.step_cnt; 267 oth <<= 16; 268 oth |= net->cc_mod.rtcc.last_step_state; 269 SDT_PROBE(sctp, cwnd, net, rttstep, 270 vtag, 271 ((net->cc_mod.rtcc.lbw << 32) | nbw), 272 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 273 oth, 274 probepoint); 275 if (net->cwnd > (4 * net->mtu)) { 276 net->cwnd -= net->mtu; 277 net->cc_mod.rtcc.vol_reduce++; 278 } else { 279 net->cc_mod.rtcc.step_cnt = 0; 280 } 281 } 282 } 283 return (1); 284 } 285 if (net->rtt < net->cc_mod.rtcc.lbw_rtt - rtt_offset) { 286 /* 287 * rtt decreased, there could be more room. we update both 288 * the bw and the rtt here to lock this in as a good step 289 * down. 290 */ 291 /* Probe point 6 */ 292 probepoint |= ((6 << 16) | 0); 293 SDT_PROBE(sctp, cwnd, net, rttvar, 294 vtag, 295 ((net->cc_mod.rtcc.lbw << 32) | nbw), 296 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 297 net->flight_size, 298 probepoint); 299 if (net->cc_mod.rtcc.steady_step) { 300 oth = net->cc_mod.rtcc.vol_reduce; 301 oth <<= 16; 302 oth |= net->cc_mod.rtcc.step_cnt; 303 oth <<= 16; 304 oth |= net->cc_mod.rtcc.last_step_state; 305 SDT_PROBE(sctp, cwnd, net, rttstep, 306 vtag, 307 ((net->cc_mod.rtcc.lbw << 32) | nbw), 308 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 309 oth, 310 probepoint); 311 if ((net->cc_mod.rtcc.last_step_state == 5) && 312 (net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step)) { 313 /* Step down worked */ 314 net->cc_mod.rtcc.step_cnt = 0; 315 return (1); 316 } else { 317 net->cc_mod.rtcc.last_step_state = 6; 318 net->cc_mod.rtcc.step_cnt = 0; 319 } 320 } 321 net->cc_mod.rtcc.lbw = nbw; 322 net->cc_mod.rtcc.lbw_rtt = net->rtt; 323 net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd; 324 if (inst_ind == SCTP_INST_GAINING) 325 return (1); 326 else if (inst_ind == SCTP_INST_NEUTRAL) 327 return (1); 328 else 329 return (0); 330 } 331 /* 332 * Ok bw and rtt remained the same .. no update to any 333 */ 334 /* Probe point 7 */ 335 probepoint |= ((7 << 16) | net->cc_mod.rtcc.ret_from_eq); 336 SDT_PROBE(sctp, cwnd, net, rttvar, 337 vtag, 338 ((net->cc_mod.rtcc.lbw << 32) | nbw), 339 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 340 net->flight_size, 341 probepoint); 342 343 if ((net->cc_mod.rtcc.steady_step) && (inst_ind != SCTP_INST_LOOSING)) { 344 if (net->cc_mod.rtcc.last_step_state == 5) 345 net->cc_mod.rtcc.step_cnt++; 346 else 347 net->cc_mod.rtcc.step_cnt = 1; 348 net->cc_mod.rtcc.last_step_state = 5; 349 if ((net->cc_mod.rtcc.step_cnt == net->cc_mod.rtcc.steady_step) || 350 ((net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step) && 351 ((net->cc_mod.rtcc.step_cnt % net->cc_mod.rtcc.steady_step) == 0))) { 352 /* Try a step down */ 353 if (net->cwnd > (4 * net->mtu)) { 354 net->cwnd -= net->mtu; 355 net->cc_mod.rtcc.vol_reduce++; 356 return (1); 357 } else { 358 net->cc_mod.rtcc.step_cnt = 0; 359 } 360 } 361 } 362 if (inst_ind == SCTP_INST_GAINING) 363 return (1); 364 else if (inst_ind == SCTP_INST_NEUTRAL) 365 return (1); 366 else 367 return ((int)net->cc_mod.rtcc.ret_from_eq); 368 } 369 370 static int 371 cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint64_t rtt_offset, 372 uint64_t vtag, uint8_t inst_ind) 373 { 374 uint64_t oth, probepoint; 375 376 /* Bandwidth decreased. */ 377 probepoint = (((uint64_t) net->cwnd) << 32); 378 if (net->rtt > net->cc_mod.rtcc.lbw_rtt + rtt_offset) { 379 /* rtt increased */ 380 /* Did we add more */ 381 if ((net->cwnd > net->cc_mod.rtcc.cwnd_at_bw_set) && 382 (inst_ind != SCTP_INST_LOOSING)) { 383 /* We caused it maybe.. back off? */ 384 /* PROBE POINT 1 */ 385 probepoint |= ((1 << 16) | 1); 386 SDT_PROBE(sctp, cwnd, net, rttvar, 387 vtag, 388 ((net->cc_mod.rtcc.lbw << 32) | nbw), 389 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 390 net->flight_size, 391 probepoint); 392 393 if (net->cc_mod.rtcc.ret_from_eq) { 394 /* 395 * Switch over to CA if we are less 396 * aggressive 397 */ 398 net->ssthresh = net->cwnd - 1; 399 net->partial_bytes_acked = 0; 400 } 401 return (1); 402 } 403 /* Probe point 2 */ 404 probepoint |= ((2 << 16) | 0); 405 SDT_PROBE(sctp, cwnd, net, rttvar, 406 vtag, 407 ((net->cc_mod.rtcc.lbw << 32) | nbw), 408 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 409 net->flight_size, 410 probepoint); 411 412 /* Someone else - fight for more? */ 413 if (net->cc_mod.rtcc.steady_step) { 414 oth = net->cc_mod.rtcc.vol_reduce; 415 oth <<= 16; 416 oth |= net->cc_mod.rtcc.step_cnt; 417 oth <<= 16; 418 oth |= net->cc_mod.rtcc.last_step_state; 419 SDT_PROBE(sctp, cwnd, net, rttstep, 420 vtag, 421 ((net->cc_mod.rtcc.lbw << 32) | nbw), 422 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 423 oth, 424 probepoint); 425 /* 426 * Did we voluntarily give up some? if so take one 427 * back please 428 */ 429 if ((net->cc_mod.rtcc.vol_reduce) && 430 (inst_ind != SCTP_INST_GAINING)) { 431 net->cwnd += net->mtu; 432 net->cc_mod.rtcc.vol_reduce--; 433 } 434 net->cc_mod.rtcc.last_step_state = 2; 435 net->cc_mod.rtcc.step_cnt = 0; 436 } 437 goto out_decision; 438 } else if (net->rtt < net->cc_mod.rtcc.lbw_rtt - rtt_offset) { 439 /* bw & rtt decreased */ 440 /* Probe point 3 */ 441 probepoint |= ((3 << 16) | 0); 442 SDT_PROBE(sctp, cwnd, net, rttvar, 443 vtag, 444 ((net->cc_mod.rtcc.lbw << 32) | nbw), 445 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 446 net->flight_size, 447 probepoint); 448 if (net->cc_mod.rtcc.steady_step) { 449 oth = net->cc_mod.rtcc.vol_reduce; 450 oth <<= 16; 451 oth |= net->cc_mod.rtcc.step_cnt; 452 oth <<= 16; 453 oth |= net->cc_mod.rtcc.last_step_state; 454 SDT_PROBE(sctp, cwnd, net, rttstep, 455 vtag, 456 ((net->cc_mod.rtcc.lbw << 32) | nbw), 457 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 458 oth, 459 probepoint); 460 if ((net->cc_mod.rtcc.vol_reduce) && 461 (inst_ind != SCTP_INST_GAINING)) { 462 net->cwnd += net->mtu; 463 net->cc_mod.rtcc.vol_reduce--; 464 } 465 net->cc_mod.rtcc.last_step_state = 3; 466 net->cc_mod.rtcc.step_cnt = 0; 467 } 468 goto out_decision; 469 } 470 /* The bw decreased but rtt stayed the same */ 471 /* Probe point 4 */ 472 probepoint |= ((4 << 16) | 0); 473 SDT_PROBE(sctp, cwnd, net, rttvar, 474 vtag, 475 ((net->cc_mod.rtcc.lbw << 32) | nbw), 476 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 477 net->flight_size, 478 probepoint); 479 if (net->cc_mod.rtcc.steady_step) { 480 oth = net->cc_mod.rtcc.vol_reduce; 481 oth <<= 16; 482 oth |= net->cc_mod.rtcc.step_cnt; 483 oth <<= 16; 484 oth |= net->cc_mod.rtcc.last_step_state; 485 SDT_PROBE(sctp, cwnd, net, rttstep, 486 vtag, 487 ((net->cc_mod.rtcc.lbw << 32) | nbw), 488 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 489 oth, 490 probepoint); 491 if ((net->cc_mod.rtcc.vol_reduce) && 492 (inst_ind != SCTP_INST_GAINING)) { 493 net->cwnd += net->mtu; 494 net->cc_mod.rtcc.vol_reduce--; 495 } 496 net->cc_mod.rtcc.last_step_state = 4; 497 net->cc_mod.rtcc.step_cnt = 0; 498 } 499 out_decision: 500 net->cc_mod.rtcc.lbw = nbw; 501 net->cc_mod.rtcc.lbw_rtt = net->rtt; 502 net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd; 503 if (inst_ind == SCTP_INST_GAINING) { 504 return (1); 505 } else { 506 return (0); 507 } 508 } 509 510 static int 511 cc_bw_increase(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, 512 uint64_t vtag, uint8_t inst_ind) 513 { 514 uint64_t oth, probepoint; 515 516 /* 517 * BW increased, so update and return 0, since all actions in our 518 * table say to do the normal CC update. Note that we pay no 519 * attention to the inst_ind since our overall sum is increasing. 520 */ 521 /* PROBE POINT 0 */ 522 probepoint = (((uint64_t) net->cwnd) << 32); 523 SDT_PROBE(sctp, cwnd, net, rttvar, 524 vtag, 525 ((net->cc_mod.rtcc.lbw << 32) | nbw), 526 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 527 net->flight_size, 528 probepoint); 529 if (net->cc_mod.rtcc.steady_step) { 530 oth = net->cc_mod.rtcc.vol_reduce; 531 oth <<= 16; 532 oth |= net->cc_mod.rtcc.step_cnt; 533 oth <<= 16; 534 oth |= net->cc_mod.rtcc.last_step_state; 535 SDT_PROBE(sctp, cwnd, net, rttstep, 536 vtag, 537 ((net->cc_mod.rtcc.lbw << 32) | nbw), 538 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 539 oth, 540 probepoint); 541 net->cc_mod.rtcc.last_step_state = 0; 542 net->cc_mod.rtcc.step_cnt = 0; 543 net->cc_mod.rtcc.vol_reduce = 0; 544 } 545 net->cc_mod.rtcc.lbw = nbw; 546 net->cc_mod.rtcc.lbw_rtt = net->rtt; 547 net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd; 548 return (0); 549 } 550 551 /* RTCC Algoritm to limit growth of cwnd, return 552 * true if you want to NOT allow cwnd growth 553 */ 554 static int 555 cc_bw_limit(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw) 556 { 557 uint64_t bw_offset, rtt_offset, rtt, vtag, probepoint; 558 uint64_t bytes_for_this_rtt, inst_bw; 559 uint64_t div, inst_off; 560 int bw_shift; 561 uint8_t inst_ind; 562 int ret; 563 564 /*- 565 * Here we need to see if we want 566 * to limit cwnd growth due to increase 567 * in overall rtt but no increase in bw. 568 * We use the following table to figure 569 * out what we should do. When we return 570 * 0, cc update goes on as planned. If we 571 * return 1, then no cc update happens and cwnd 572 * stays where it is at. 573 * ---------------------------------- 574 * BW | RTT | Action 575 * ********************************* 576 * INC | INC | return 0 577 * ---------------------------------- 578 * INC | SAME | return 0 579 * ---------------------------------- 580 * INC | DECR | return 0 581 * ---------------------------------- 582 * SAME | INC | return 1 583 * ---------------------------------- 584 * SAME | SAME | return 1 585 * ---------------------------------- 586 * SAME | DECR | return 0 587 * ---------------------------------- 588 * DECR | INC | return 0 or 1 based on if we caused. 589 * ---------------------------------- 590 * DECR | SAME | return 0 591 * ---------------------------------- 592 * DECR | DECR | return 0 593 * ---------------------------------- 594 * 595 * We are a bit fuzz on what an increase or 596 * decrease is. For BW it is the same if 597 * it did not change within 1/64th. For 598 * RTT it stayed the same if it did not 599 * change within 1/32nd 600 */ 601 bw_shift = SCTP_BASE_SYSCTL(sctp_rttvar_bw); 602 rtt = stcb->asoc.my_vtag; 603 vtag = (rtt << 32) | (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | (stcb->rport); 604 probepoint = (((uint64_t) net->cwnd) << 32); 605 rtt = net->rtt; 606 if (net->cc_mod.rtcc.rtt_set_this_sack) { 607 net->cc_mod.rtcc.rtt_set_this_sack = 0; 608 bytes_for_this_rtt = net->cc_mod.rtcc.bw_bytes - net->cc_mod.rtcc.bw_bytes_at_last_rttc; 609 net->cc_mod.rtcc.bw_bytes_at_last_rttc = net->cc_mod.rtcc.bw_bytes; 610 if (net->rtt) { 611 div = net->rtt / 1000; 612 if (div) { 613 inst_bw = bytes_for_this_rtt / div; 614 inst_off = inst_bw >> bw_shift; 615 if (inst_bw > nbw) 616 inst_ind = SCTP_INST_GAINING; 617 else if ((inst_bw + inst_off) < nbw) 618 inst_ind = SCTP_INST_LOOSING; 619 else 620 inst_ind = SCTP_INST_NEUTRAL; 621 probepoint |= ((0xb << 16) | inst_ind); 622 } else { 623 inst_bw = bytes_for_this_rtt / (uint64_t) (net->rtt); 624 /* Can't determine do not change */ 625 inst_ind = net->cc_mod.rtcc.last_inst_ind; 626 probepoint |= ((0xc << 16) | inst_ind); 627 } 628 } else { 629 inst_bw = bytes_for_this_rtt; 630 /* Can't determine do not change */ 631 inst_ind = net->cc_mod.rtcc.last_inst_ind; 632 probepoint |= ((0xd << 16) | inst_ind); 633 } 634 SDT_PROBE(sctp, cwnd, net, rttvar, 635 vtag, 636 ((nbw << 32) | inst_bw), 637 ((net->cc_mod.rtcc.lbw_rtt << 32) | rtt), 638 net->flight_size, 639 probepoint); 640 } else { 641 /* No rtt measurement, use last one */ 642 inst_ind = net->cc_mod.rtcc.last_inst_ind; 643 } 644 bw_offset = net->cc_mod.rtcc.lbw >> bw_shift; 645 if (nbw > net->cc_mod.rtcc.lbw + bw_offset) { 646 ret = cc_bw_increase(stcb, net, nbw, vtag, inst_ind); 647 goto out; 648 } 649 rtt_offset = net->cc_mod.rtcc.lbw_rtt >> SCTP_BASE_SYSCTL(sctp_rttvar_rtt); 650 if (nbw < net->cc_mod.rtcc.lbw - bw_offset) { 651 ret = cc_bw_decrease(stcb, net, nbw, rtt_offset, vtag, inst_ind); 652 goto out; 653 } 654 /* 655 * If we reach here then we are in a situation where the bw stayed 656 * the same. 657 */ 658 ret = cc_bw_same(stcb, net, nbw, rtt_offset, vtag, inst_ind); 659 out: 660 net->cc_mod.rtcc.last_inst_ind = inst_ind; 661 return (ret); 662 } 663 664 static void 665 sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb, 666 struct sctp_association *asoc, 667 int accum_moved, int reneged_all, int will_exit, int use_rtcc) 668 { 669 struct sctp_nets *net; 670 int old_cwnd; 671 uint32_t t_ssthresh, t_cwnd, incr; 672 uint64_t t_ucwnd_sbw; 673 uint64_t t_path_mptcp; 674 uint64_t mptcp_like_alpha; 675 uint32_t srtt; 676 uint64_t max_path; 677 678 /* MT FIXME: Don't compute this over and over again */ 679 t_ssthresh = 0; 680 t_cwnd = 0; 681 t_ucwnd_sbw = 0; 682 t_path_mptcp = 0; 683 mptcp_like_alpha = 1; 684 if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) || 685 (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2) || 686 (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_MPTCP)) { 687 max_path = 0; 688 TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { 689 t_ssthresh += net->ssthresh; 690 t_cwnd += net->cwnd; 691 /* lastsa>>3; we don't need to devide ... */ 692 srtt = net->lastsa; 693 if (srtt > 0) { 694 uint64_t tmp; 695 696 t_ucwnd_sbw += (uint64_t) net->cwnd / (uint64_t) srtt; 697 t_path_mptcp += (((uint64_t) net->cwnd) << SHIFT_MPTCP_MULTI_Z) / 698 (((uint64_t) net->mtu) * (uint64_t) srtt); 699 tmp = (((uint64_t) net->cwnd) << SHIFT_MPTCP_MULTI_N) / 700 ((uint64_t) net->mtu * (uint64_t) (srtt * srtt)); 701 if (tmp > max_path) { 702 max_path = tmp; 703 } 704 } 705 } 706 if (t_ucwnd_sbw == 0) { 707 t_ucwnd_sbw = 1; 708 } 709 if (t_path_mptcp > 0) { 710 mptcp_like_alpha = max_path / (t_path_mptcp * t_path_mptcp); 711 } else { 712 mptcp_like_alpha = 1; 713 } 714 } 715 /******************************/ 716 /* update cwnd and Early FR */ 717 /******************************/ 718 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 719 720 #ifdef JANA_CMT_FAST_RECOVERY 721 /* 722 * CMT fast recovery code. Need to debug. 723 */ 724 if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { 725 if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) || 726 SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) { 727 net->will_exit_fast_recovery = 1; 728 } 729 } 730 #endif 731 if (SCTP_BASE_SYSCTL(sctp_early_fr)) { 732 /* 733 * So, first of all do we need to have a Early FR 734 * timer running? 735 */ 736 if ((!TAILQ_EMPTY(&asoc->sent_queue) && 737 (net->ref_count > 1) && 738 (net->flight_size < net->cwnd)) || 739 (reneged_all)) { 740 /* 741 * yes, so in this case stop it if its 742 * running, and then restart it. Reneging 743 * all is a special case where we want to 744 * run the Early FR timer and then force the 745 * last few unacked to be sent, causing us 746 * to illicit a sack with gaps to force out 747 * the others. 748 */ 749 if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { 750 SCTP_STAT_INCR(sctps_earlyfrstpidsck2); 751 sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, 752 SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); 753 } 754 SCTP_STAT_INCR(sctps_earlyfrstrid); 755 sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); 756 } else { 757 /* No, stop it if its running */ 758 if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { 759 SCTP_STAT_INCR(sctps_earlyfrstpidsck3); 760 sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, 761 SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); 762 } 763 } 764 } 765 /* if nothing was acked on this destination skip it */ 766 if (net->net_ack == 0) { 767 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 768 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); 769 } 770 continue; 771 } 772 if (net->net_ack2 > 0) { 773 /* 774 * Karn's rule applies to clearing error count, this 775 * is optional. 776 */ 777 net->error_count = 0; 778 if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == 779 SCTP_ADDR_NOT_REACHABLE) { 780 /* addr came good */ 781 net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; 782 net->dest_state |= SCTP_ADDR_REACHABLE; 783 sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 784 SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED); 785 /* now was it the primary? if so restore */ 786 if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { 787 (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); 788 } 789 } 790 /* 791 * JRS 5/14/07 - If CMT PF is on and the destination 792 * is in PF state, set the destination to active 793 * state and set the cwnd to one or two MTU's based 794 * on whether PF1 or PF2 is being used. 795 * 796 * Should we stop any running T3 timer here? 797 */ 798 if ((asoc->sctp_cmt_on_off > 0) && 799 (asoc->sctp_cmt_pf > 0) && 800 ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { 801 net->dest_state &= ~SCTP_ADDR_PF; 802 old_cwnd = net->cwnd; 803 net->cwnd = net->mtu * asoc->sctp_cmt_pf; 804 SDT_PROBE(sctp, cwnd, net, ack, 805 stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, 806 old_cwnd, net->cwnd); 807 SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", 808 net, net->cwnd); 809 /* 810 * Since the cwnd value is explicitly set, 811 * skip the code that updates the cwnd 812 * value. 813 */ 814 goto skip_cwnd_update; 815 } 816 } 817 #ifdef JANA_CMT_FAST_RECOVERY 818 /* 819 * CMT fast recovery code 820 */ 821 /* 822 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery 823 * && net->will_exit_fast_recovery == 0) { @@@ Do something 824 * } else if (sctp_cmt_on_off == 0 && 825 * asoc->fast_retran_loss_recovery && will_exit == 0) { 826 */ 827 #endif 828 829 if (asoc->fast_retran_loss_recovery && 830 (will_exit == 0) && 831 (asoc->sctp_cmt_on_off == 0)) { 832 /* 833 * If we are in loss recovery we skip any cwnd 834 * update 835 */ 836 goto skip_cwnd_update; 837 } 838 /* 839 * Did any measurements go on for this network? 840 */ 841 if (use_rtcc && (net->cc_mod.rtcc.tls_needs_set > 0)) { 842 uint64_t nbw; 843 844 /* 845 * At this point our bw_bytes has been updated by 846 * incoming sack information. 847 * 848 * But our bw may not yet be set. 849 * 850 */ 851 if ((net->cc_mod.rtcc.new_tot_time / 1000) > 0) { 852 nbw = net->cc_mod.rtcc.bw_bytes / (net->cc_mod.rtcc.new_tot_time / 1000); 853 } else { 854 nbw = net->cc_mod.rtcc.bw_bytes; 855 } 856 if (net->cc_mod.rtcc.lbw) { 857 if (cc_bw_limit(stcb, net, nbw)) { 858 /* Hold here, no update */ 859 goto skip_cwnd_update; 860 } 861 } else { 862 uint64_t vtag, probepoint; 863 864 probepoint = (((uint64_t) net->cwnd) << 32); 865 probepoint |= ((0xa << 16) | 0); 866 vtag = (net->rtt << 32) | 867 (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | 868 (stcb->rport); 869 870 SDT_PROBE(sctp, cwnd, net, rttvar, 871 vtag, 872 nbw, 873 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 874 net->flight_size, 875 probepoint); 876 net->cc_mod.rtcc.lbw = nbw; 877 net->cc_mod.rtcc.lbw_rtt = net->rtt; 878 if (net->cc_mod.rtcc.rtt_set_this_sack) { 879 net->cc_mod.rtcc.rtt_set_this_sack = 0; 880 net->cc_mod.rtcc.bw_bytes_at_last_rttc = net->cc_mod.rtcc.bw_bytes; 881 } 882 } 883 } 884 /* 885 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has 886 * moved. 887 */ 888 if (accum_moved || 889 ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) { 890 /* If the cumulative ack moved we can proceed */ 891 if (net->cwnd <= net->ssthresh) { 892 /* We are in slow start */ 893 if (net->flight_size + net->net_ack >= net->cwnd) { 894 uint32_t limit; 895 896 old_cwnd = net->cwnd; 897 switch (asoc->sctp_cmt_on_off) { 898 case SCTP_CMT_RPV1: 899 limit = (uint32_t) (((uint64_t) net->mtu * 900 (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable) * 901 (uint64_t) net->ssthresh) / 902 (uint64_t) t_ssthresh); 903 incr = (uint32_t) (((uint64_t) net->net_ack * 904 (uint64_t) net->ssthresh) / 905 (uint64_t) t_ssthresh); 906 if (incr > limit) { 907 incr = limit; 908 } 909 if (incr == 0) { 910 incr = 1; 911 } 912 break; 913 case SCTP_CMT_RPV2: 914 /* 915 * lastsa>>3; we don't need 916 * to divide ... 917 */ 918 srtt = net->lastsa; 919 if (srtt == 0) { 920 srtt = 1; 921 } 922 limit = (uint32_t) (((uint64_t) net->mtu * 923 (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable) * 924 (uint64_t) net->cwnd) / 925 ((uint64_t) srtt * t_ucwnd_sbw)); 926 /* INCREASE FACTOR */ 927 incr = (uint32_t) (((uint64_t) net->net_ack * 928 (uint64_t) net->cwnd) / 929 ((uint64_t) srtt * t_ucwnd_sbw)); 930 /* INCREASE FACTOR */ 931 if (incr > limit) { 932 incr = limit; 933 } 934 if (incr == 0) { 935 incr = 1; 936 } 937 break; 938 case SCTP_CMT_MPTCP: 939 limit = (uint32_t) (((uint64_t) net->mtu * 940 mptcp_like_alpha * 941 (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) >> 942 SHIFT_MPTCP_MULTI); 943 incr = (uint32_t) (((uint64_t) net->net_ack * 944 mptcp_like_alpha) >> 945 SHIFT_MPTCP_MULTI); 946 if (incr > limit) { 947 incr = limit; 948 } 949 if (incr > net->net_ack) { 950 incr = net->net_ack; 951 } 952 if (incr > net->mtu) { 953 incr = net->mtu; 954 } 955 break; 956 default: 957 incr = net->net_ack; 958 if (incr > net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) { 959 incr = net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable); 960 } 961 break; 962 } 963 net->cwnd += incr; 964 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 965 sctp_log_cwnd(stcb, net, incr, 966 SCTP_CWND_LOG_FROM_SS); 967 } 968 SDT_PROBE(sctp, cwnd, net, ack, 969 stcb->asoc.my_vtag, 970 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 971 net, 972 old_cwnd, net->cwnd); 973 } else { 974 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 975 sctp_log_cwnd(stcb, net, net->net_ack, 976 SCTP_CWND_LOG_NOADV_SS); 977 } 978 } 979 } else { 980 /* We are in congestion avoidance */ 981 /* 982 * Add to pba 983 */ 984 net->partial_bytes_acked += net->net_ack; 985 986 if ((net->flight_size + net->net_ack >= net->cwnd) && 987 (net->partial_bytes_acked >= net->cwnd)) { 988 net->partial_bytes_acked -= net->cwnd; 989 old_cwnd = net->cwnd; 990 switch (asoc->sctp_cmt_on_off) { 991 case SCTP_CMT_RPV1: 992 incr = (uint32_t) (((uint64_t) net->mtu * 993 (uint64_t) net->ssthresh) / 994 (uint64_t) t_ssthresh); 995 if (incr == 0) { 996 incr = 1; 997 } 998 break; 999 case SCTP_CMT_RPV2: 1000 /* 1001 * lastsa>>3; we don't need 1002 * to divide ... 1003 */ 1004 srtt = net->lastsa; 1005 if (srtt == 0) { 1006 srtt = 1; 1007 } 1008 incr = (uint32_t) ((uint64_t) net->mtu * 1009 (uint64_t) net->cwnd / 1010 ((uint64_t) srtt * 1011 t_ucwnd_sbw)); 1012 /* INCREASE FACTOR */ 1013 if (incr == 0) { 1014 incr = 1; 1015 } 1016 break; 1017 case SCTP_CMT_MPTCP: 1018 incr = (uint32_t) ((mptcp_like_alpha * 1019 (uint64_t) net->cwnd) >> 1020 SHIFT_MPTCP_MULTI); 1021 if (incr > net->mtu) { 1022 incr = net->mtu; 1023 } 1024 break; 1025 default: 1026 incr = net->mtu; 1027 break; 1028 } 1029 net->cwnd += incr; 1030 SDT_PROBE(sctp, cwnd, net, ack, 1031 stcb->asoc.my_vtag, 1032 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 1033 net, 1034 old_cwnd, net->cwnd); 1035 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1036 sctp_log_cwnd(stcb, net, net->mtu, 1037 SCTP_CWND_LOG_FROM_CA); 1038 } 1039 } else { 1040 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 1041 sctp_log_cwnd(stcb, net, net->net_ack, 1042 SCTP_CWND_LOG_NOADV_CA); 1043 } 1044 } 1045 } 1046 } else { 1047 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 1048 sctp_log_cwnd(stcb, net, net->mtu, 1049 SCTP_CWND_LOG_NO_CUMACK); 1050 } 1051 } 1052 skip_cwnd_update: 1053 /* 1054 * NOW, according to Karn's rule do we need to restore the 1055 * RTO timer back? Check our net_ack2. If not set then we 1056 * have a ambiguity.. i.e. all data ack'd was sent to more 1057 * than one place. 1058 */ 1059 if (net->net_ack2) { 1060 /* restore any doubled timers */ 1061 net->RTO = (net->lastsa >> SCTP_RTT_SHIFT) + net->lastsv; 1062 if (net->RTO < stcb->asoc.minrto) { 1063 net->RTO = stcb->asoc.minrto; 1064 } 1065 if (net->RTO > stcb->asoc.maxrto) { 1066 net->RTO = stcb->asoc.maxrto; 1067 } 1068 } 1069 } 1070 } 1071 1072 static void 1073 sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net) 1074 { 1075 int old_cwnd = net->cwnd; 1076 uint32_t t_ssthresh, t_cwnd; 1077 uint64_t t_ucwnd_sbw; 1078 1079 /* MT FIXME: Don't compute this over and over again */ 1080 t_ssthresh = 0; 1081 t_cwnd = 0; 1082 if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) || 1083 (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) { 1084 struct sctp_nets *lnet; 1085 uint32_t srtt; 1086 1087 t_ucwnd_sbw = 0; 1088 TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) { 1089 t_ssthresh += lnet->ssthresh; 1090 t_cwnd += lnet->cwnd; 1091 srtt = lnet->lastsa; 1092 /* lastsa>>3; we don't need to divide ... */ 1093 if (srtt > 0) { 1094 t_ucwnd_sbw += (uint64_t) lnet->cwnd / (uint64_t) srtt; 1095 } 1096 } 1097 if (t_ucwnd_sbw < 1) { 1098 t_ucwnd_sbw = 1; 1099 } 1100 if (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) { 1101 net->ssthresh = (uint32_t) (((uint64_t) 4 * 1102 (uint64_t) net->mtu * 1103 (uint64_t) net->ssthresh) / 1104 (uint64_t) t_ssthresh); 1105 } else { 1106 uint64_t cc_delta; 1107 1108 srtt = net->lastsa; 1109 /* lastsa>>3; we don't need to divide ... */ 1110 if (srtt == 0) { 1111 srtt = 1; 1112 } 1113 cc_delta = t_ucwnd_sbw * (uint64_t) srtt / 2; 1114 if (cc_delta < t_cwnd) { 1115 net->ssthresh = (uint32_t) ((uint64_t) t_cwnd - cc_delta); 1116 } else { 1117 net->ssthresh = net->mtu; 1118 } 1119 } 1120 if ((net->cwnd > t_cwnd / 2) && 1121 (net->ssthresh < net->cwnd - t_cwnd / 2)) { 1122 net->ssthresh = net->cwnd - t_cwnd / 2; 1123 } 1124 if (net->ssthresh < net->mtu) { 1125 net->ssthresh = net->mtu; 1126 } 1127 } else { 1128 net->ssthresh = max(net->cwnd / 2, 4 * net->mtu); 1129 } 1130 net->cwnd = net->mtu; 1131 net->partial_bytes_acked = 0; 1132 SDT_PROBE(sctp, cwnd, net, to, 1133 stcb->asoc.my_vtag, 1134 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 1135 net, 1136 old_cwnd, net->cwnd); 1137 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1138 sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX); 1139 } 1140 } 1141 1142 static void 1143 sctp_cwnd_update_after_ecn_echo_common(struct sctp_tcb *stcb, struct sctp_nets *net, 1144 int in_window, int num_pkt_lost, int use_rtcc) 1145 { 1146 int old_cwnd = net->cwnd; 1147 1148 if ((use_rtcc) && (net->lan_type == SCTP_LAN_LOCAL) && (net->cc_mod.rtcc.use_dccc_ecn)) { 1149 /* Data center Congestion Control */ 1150 if (in_window == 0) { 1151 /* 1152 * Go to CA with the cwnd at the point we sent the 1153 * TSN that was marked with a CE. 1154 */ 1155 if (net->ecn_prev_cwnd < net->cwnd) { 1156 /* Restore to prev cwnd */ 1157 net->cwnd = net->ecn_prev_cwnd - (net->mtu * num_pkt_lost); 1158 } else { 1159 /* Just cut in 1/2 */ 1160 net->cwnd /= 2; 1161 } 1162 /* Drop to CA */ 1163 net->ssthresh = net->cwnd - (num_pkt_lost * net->mtu); 1164 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1165 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); 1166 } 1167 } else { 1168 /* 1169 * Further tuning down required over the drastic 1170 * orginal cut 1171 */ 1172 net->ssthresh -= (net->mtu * num_pkt_lost); 1173 net->cwnd -= (net->mtu * num_pkt_lost); 1174 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1175 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); 1176 } 1177 } 1178 SCTP_STAT_INCR(sctps_ecnereducedcwnd); 1179 } else { 1180 if (in_window == 0) { 1181 SCTP_STAT_INCR(sctps_ecnereducedcwnd); 1182 net->ssthresh = net->cwnd / 2; 1183 if (net->ssthresh < net->mtu) { 1184 net->ssthresh = net->mtu; 1185 /* 1186 * here back off the timer as well, to slow 1187 * us down 1188 */ 1189 net->RTO <<= 1; 1190 } 1191 net->cwnd = net->ssthresh; 1192 SDT_PROBE(sctp, cwnd, net, ecn, 1193 stcb->asoc.my_vtag, 1194 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 1195 net, 1196 old_cwnd, net->cwnd); 1197 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1198 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); 1199 } 1200 } 1201 } 1202 1203 } 1204 1205 static void 1206 sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb, 1207 struct sctp_nets *net, struct sctp_pktdrop_chunk *cp, 1208 uint32_t * bottle_bw, uint32_t * on_queue) 1209 { 1210 uint32_t bw_avail; 1211 int rtt; 1212 unsigned int incr; 1213 int old_cwnd = net->cwnd; 1214 1215 /* need real RTT in msd for this calc */ 1216 rtt = net->rtt / 1000; 1217 /* get bottle neck bw */ 1218 *bottle_bw = ntohl(cp->bottle_bw); 1219 /* and whats on queue */ 1220 *on_queue = ntohl(cp->current_onq); 1221 /* 1222 * adjust the on-queue if our flight is more it could be that the 1223 * router has not yet gotten data "in-flight" to it 1224 */ 1225 if (*on_queue < net->flight_size) 1226 *on_queue = net->flight_size; 1227 /* calculate the available space */ 1228 bw_avail = (*bottle_bw * rtt) / 1000; 1229 if (bw_avail > *bottle_bw) { 1230 /* 1231 * Cap the growth to no more than the bottle neck. This can 1232 * happen as RTT slides up due to queues. It also means if 1233 * you have more than a 1 second RTT with a empty queue you 1234 * will be limited to the bottle_bw per second no matter if 1235 * other points have 1/2 the RTT and you could get more 1236 * out... 1237 */ 1238 bw_avail = *bottle_bw; 1239 } 1240 if (*on_queue > bw_avail) { 1241 /* 1242 * No room for anything else don't allow anything else to be 1243 * "added to the fire". 1244 */ 1245 int seg_inflight, seg_onqueue, my_portion; 1246 1247 net->partial_bytes_acked = 0; 1248 1249 /* how much are we over queue size? */ 1250 incr = *on_queue - bw_avail; 1251 if (stcb->asoc.seen_a_sack_this_pkt) { 1252 /* 1253 * undo any cwnd adjustment that the sack might have 1254 * made 1255 */ 1256 net->cwnd = net->prev_cwnd; 1257 } 1258 /* Now how much of that is mine? */ 1259 seg_inflight = net->flight_size / net->mtu; 1260 seg_onqueue = *on_queue / net->mtu; 1261 my_portion = (incr * seg_inflight) / seg_onqueue; 1262 1263 /* Have I made an adjustment already */ 1264 if (net->cwnd > net->flight_size) { 1265 /* 1266 * for this flight I made an adjustment we need to 1267 * decrease the portion by a share our previous 1268 * adjustment. 1269 */ 1270 int diff_adj; 1271 1272 diff_adj = net->cwnd - net->flight_size; 1273 if (diff_adj > my_portion) 1274 my_portion = 0; 1275 else 1276 my_portion -= diff_adj; 1277 } 1278 /* 1279 * back down to the previous cwnd (assume we have had a sack 1280 * before this packet). minus what ever portion of the 1281 * overage is my fault. 1282 */ 1283 net->cwnd -= my_portion; 1284 1285 /* we will NOT back down more than 1 MTU */ 1286 if (net->cwnd <= net->mtu) { 1287 net->cwnd = net->mtu; 1288 } 1289 /* force into CA */ 1290 net->ssthresh = net->cwnd - 1; 1291 } else { 1292 /* 1293 * Take 1/4 of the space left or max burst up .. whichever 1294 * is less. 1295 */ 1296 incr = (bw_avail - *on_queue) >> 2; 1297 if ((stcb->asoc.max_burst > 0) && 1298 (stcb->asoc.max_burst * net->mtu < incr)) { 1299 incr = stcb->asoc.max_burst * net->mtu; 1300 } 1301 net->cwnd += incr; 1302 } 1303 if (net->cwnd > bw_avail) { 1304 /* We can't exceed the pipe size */ 1305 net->cwnd = bw_avail; 1306 } 1307 if (net->cwnd < net->mtu) { 1308 /* We always have 1 MTU */ 1309 net->cwnd = net->mtu; 1310 } 1311 if (net->cwnd - old_cwnd != 0) { 1312 /* log only changes */ 1313 SDT_PROBE(sctp, cwnd, net, pd, 1314 stcb->asoc.my_vtag, 1315 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 1316 net, 1317 old_cwnd, net->cwnd); 1318 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1319 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), 1320 SCTP_CWND_LOG_FROM_SAT); 1321 } 1322 } 1323 } 1324 1325 static void 1326 sctp_cwnd_update_after_output(struct sctp_tcb *stcb, 1327 struct sctp_nets *net, int burst_limit) 1328 { 1329 int old_cwnd = net->cwnd; 1330 1331 if (net->ssthresh < net->cwnd) 1332 net->ssthresh = net->cwnd; 1333 if (burst_limit) { 1334 net->cwnd = (net->flight_size + (burst_limit * net->mtu)); 1335 SDT_PROBE(sctp, cwnd, net, bl, 1336 stcb->asoc.my_vtag, 1337 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 1338 net, 1339 old_cwnd, net->cwnd); 1340 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1341 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST); 1342 } 1343 } 1344 } 1345 1346 static void 1347 sctp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp, 1348 struct sctp_tcb *stcb, struct sctp_nets *net) 1349 { 1350 int old_cwnd = net->cwnd; 1351 1352 sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED); 1353 /* 1354 * make a small adjustment to cwnd and force to CA. 1355 */ 1356 if (net->cwnd > net->mtu) 1357 /* drop down one MTU after sending */ 1358 net->cwnd -= net->mtu; 1359 if (net->cwnd < net->ssthresh) 1360 /* still in SS move to CA */ 1361 net->ssthresh = net->cwnd - 1; 1362 SDT_PROBE(sctp, cwnd, net, fr, 1363 stcb->asoc.my_vtag, 1364 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 1365 net, 1366 old_cwnd, net->cwnd); 1367 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1368 sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR); 1369 } 1370 } 1371 1372 static void 1373 sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, 1374 struct sctp_association *asoc, 1375 int accum_moved, int reneged_all, int will_exit) 1376 { 1377 /* Passing a zero argument in last disables the rtcc algoritm */ 1378 sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 0); 1379 } 1380 1381 static void 1382 sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net, 1383 int in_window, int num_pkt_lost) 1384 { 1385 /* Passing a zero argument in last disables the rtcc algoritm */ 1386 sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 0); 1387 } 1388 1389 /* Here starts the RTCCVAR type CC invented by RRS which 1390 * is a slight mod to RFC2581. We reuse a common routine or 1391 * two since these algoritms are so close and need to 1392 * remain the same. 1393 */ 1394 static void 1395 sctp_cwnd_update_rtcc_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net, 1396 int in_window, int num_pkt_lost) 1397 { 1398 sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 1); 1399 } 1400 1401 1402 static 1403 void 1404 sctp_cwnd_update_rtcc_tsn_acknowledged(struct sctp_nets *net, 1405 struct sctp_tmit_chunk *tp1) 1406 { 1407 net->cc_mod.rtcc.bw_bytes += tp1->send_size; 1408 } 1409 1410 static void 1411 sctp_cwnd_prepare_rtcc_net_for_sack(struct sctp_tcb *stcb, 1412 struct sctp_nets *net) 1413 { 1414 if (net->cc_mod.rtcc.tls_needs_set > 0) { 1415 /* We had a bw measurment going on */ 1416 struct timeval ltls; 1417 1418 SCTP_GETPTIME_TIMEVAL(<ls); 1419 timevalsub(<ls, &net->cc_mod.rtcc.tls); 1420 net->cc_mod.rtcc.new_tot_time = (ltls.tv_sec * 1000000) + ltls.tv_usec; 1421 } 1422 } 1423 1424 static void 1425 sctp_cwnd_new_rtcc_transmission_begins(struct sctp_tcb *stcb, 1426 struct sctp_nets *net) 1427 { 1428 uint64_t vtag, probepoint; 1429 1430 if (net->cc_mod.rtcc.lbw) { 1431 /* Clear the old bw.. we went to 0 in-flight */ 1432 vtag = (net->rtt << 32) | (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | 1433 (stcb->rport); 1434 probepoint = (((uint64_t) net->cwnd) << 32); 1435 /* Probe point 8 */ 1436 probepoint |= ((8 << 16) | 0); 1437 SDT_PROBE(sctp, cwnd, net, rttvar, 1438 vtag, 1439 ((net->cc_mod.rtcc.lbw << 32) | 0), 1440 ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt), 1441 net->flight_size, 1442 probepoint); 1443 net->cc_mod.rtcc.lbw_rtt = 0; 1444 net->cc_mod.rtcc.cwnd_at_bw_set = 0; 1445 net->cc_mod.rtcc.lbw = 0; 1446 net->cc_mod.rtcc.bw_bytes_at_last_rttc = 0; 1447 net->cc_mod.rtcc.vol_reduce = 0; 1448 net->cc_mod.rtcc.bw_tot_time = 0; 1449 net->cc_mod.rtcc.bw_bytes = 0; 1450 net->cc_mod.rtcc.tls_needs_set = 0; 1451 if (net->cc_mod.rtcc.steady_step) { 1452 net->cc_mod.rtcc.vol_reduce = 0; 1453 net->cc_mod.rtcc.step_cnt = 0; 1454 net->cc_mod.rtcc.last_step_state = 0; 1455 } 1456 if (net->cc_mod.rtcc.ret_from_eq) { 1457 /* less aggressive one - reset cwnd too */ 1458 uint32_t cwnd_in_mtu, cwnd; 1459 1460 cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd); 1461 if (cwnd_in_mtu == 0) { 1462 /* 1463 * Using 0 means that the value of RFC 4960 1464 * is used. 1465 */ 1466 cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); 1467 } else { 1468 /* 1469 * We take the minimum of the burst limit 1470 * and the initial congestion window. 1471 */ 1472 if ((stcb->asoc.max_burst > 0) && (cwnd_in_mtu > stcb->asoc.max_burst)) 1473 cwnd_in_mtu = stcb->asoc.max_burst; 1474 cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu; 1475 } 1476 if (net->cwnd > cwnd) { 1477 /* 1478 * Only set if we are not a timeout (i.e. 1479 * down to 1 mtu) 1480 */ 1481 net->cwnd = cwnd; 1482 } 1483 } 1484 } 1485 } 1486 1487 static void 1488 sctp_set_rtcc_initial_cc_param(struct sctp_tcb *stcb, 1489 struct sctp_nets *net) 1490 { 1491 uint64_t vtag, probepoint; 1492 1493 sctp_set_initial_cc_param(stcb, net); 1494 stcb->asoc.use_precise_time = 1; 1495 probepoint = (((uint64_t) net->cwnd) << 32); 1496 probepoint |= ((9 << 16) | 0); 1497 vtag = (net->rtt << 32) | 1498 (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | 1499 (stcb->rport); 1500 SDT_PROBE(sctp, cwnd, net, rttvar, 1501 vtag, 1502 0, 1503 0, 1504 0, 1505 probepoint); 1506 net->cc_mod.rtcc.lbw_rtt = 0; 1507 net->cc_mod.rtcc.cwnd_at_bw_set = 0; 1508 net->cc_mod.rtcc.vol_reduce = 0; 1509 net->cc_mod.rtcc.lbw = 0; 1510 net->cc_mod.rtcc.vol_reduce = 0; 1511 net->cc_mod.rtcc.bw_bytes_at_last_rttc = 0; 1512 net->cc_mod.rtcc.bw_tot_time = 0; 1513 net->cc_mod.rtcc.bw_bytes = 0; 1514 net->cc_mod.rtcc.tls_needs_set = 0; 1515 net->cc_mod.rtcc.ret_from_eq = SCTP_BASE_SYSCTL(sctp_rttvar_eqret); 1516 net->cc_mod.rtcc.steady_step = SCTP_BASE_SYSCTL(sctp_steady_step); 1517 net->cc_mod.rtcc.use_dccc_ecn = SCTP_BASE_SYSCTL(sctp_use_dccc_ecn); 1518 net->cc_mod.rtcc.step_cnt = 0; 1519 net->cc_mod.rtcc.last_step_state = 0; 1520 1521 1522 } 1523 1524 static int 1525 sctp_cwnd_rtcc_socket_option(struct sctp_tcb *stcb, int setorget, 1526 struct sctp_cc_option *cc_opt) 1527 { 1528 struct sctp_nets *net; 1529 1530 if (setorget == 1) { 1531 /* a set */ 1532 if (cc_opt->option == SCTP_CC_OPT_RTCC_SETMODE) { 1533 if ((cc_opt->aid_value.assoc_value != 0) && 1534 (cc_opt->aid_value.assoc_value != 1)) { 1535 return (EINVAL); 1536 } 1537 TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { 1538 net->cc_mod.rtcc.ret_from_eq = cc_opt->aid_value.assoc_value; 1539 } 1540 } else if (cc_opt->option == SCTP_CC_OPT_USE_DCCC_ECN) { 1541 if ((cc_opt->aid_value.assoc_value != 0) && 1542 (cc_opt->aid_value.assoc_value != 1)) { 1543 return (EINVAL); 1544 } 1545 TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { 1546 net->cc_mod.rtcc.use_dccc_ecn = cc_opt->aid_value.assoc_value; 1547 } 1548 } else if (cc_opt->option == SCTP_CC_OPT_STEADY_STEP) { 1549 TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { 1550 net->cc_mod.rtcc.steady_step = cc_opt->aid_value.assoc_value; 1551 } 1552 } else { 1553 return (EINVAL); 1554 } 1555 } else { 1556 /* a get */ 1557 if (cc_opt->option == SCTP_CC_OPT_RTCC_SETMODE) { 1558 net = TAILQ_FIRST(&stcb->asoc.nets); 1559 if (net == NULL) { 1560 return (EFAULT); 1561 } 1562 cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.ret_from_eq; 1563 } else if (cc_opt->option == SCTP_CC_OPT_USE_DCCC_ECN) { 1564 net = TAILQ_FIRST(&stcb->asoc.nets); 1565 if (net == NULL) { 1566 return (EFAULT); 1567 } 1568 cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.use_dccc_ecn; 1569 } else if (cc_opt->option == SCTP_CC_OPT_STEADY_STEP) { 1570 net = TAILQ_FIRST(&stcb->asoc.nets); 1571 if (net == NULL) { 1572 return (EFAULT); 1573 } 1574 cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.steady_step; 1575 } else { 1576 return (EINVAL); 1577 } 1578 } 1579 return (0); 1580 } 1581 1582 static void 1583 sctp_cwnd_update_rtcc_packet_transmitted(struct sctp_tcb *stcb, 1584 struct sctp_nets *net) 1585 { 1586 if (net->cc_mod.rtcc.tls_needs_set == 0) { 1587 SCTP_GETPTIME_TIMEVAL(&net->cc_mod.rtcc.tls); 1588 net->cc_mod.rtcc.tls_needs_set = 2; 1589 } 1590 } 1591 1592 static void 1593 sctp_cwnd_update_rtcc_after_sack(struct sctp_tcb *stcb, 1594 struct sctp_association *asoc, 1595 int accum_moved, int reneged_all, int will_exit) 1596 { 1597 /* Passing a one argument at the last enables the rtcc algoritm */ 1598 sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 1); 1599 } 1600 1601 static void 1602 sctp_rtt_rtcc_calculated(struct sctp_tcb *stcb, 1603 struct sctp_nets *net, struct timeval *now) 1604 { 1605 net->cc_mod.rtcc.rtt_set_this_sack = 1; 1606 } 1607 1608 /* Here starts Sally Floyds HS-TCP */ 1609 1610 struct sctp_hs_raise_drop { 1611 int32_t cwnd; 1612 int32_t increase; 1613 int32_t drop_percent; 1614 }; 1615 1616 #define SCTP_HS_TABLE_SIZE 73 1617 1618 struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = { 1619 {38, 1, 50}, /* 0 */ 1620 {118, 2, 44}, /* 1 */ 1621 {221, 3, 41}, /* 2 */ 1622 {347, 4, 38}, /* 3 */ 1623 {495, 5, 37}, /* 4 */ 1624 {663, 6, 35}, /* 5 */ 1625 {851, 7, 34}, /* 6 */ 1626 {1058, 8, 33}, /* 7 */ 1627 {1284, 9, 32}, /* 8 */ 1628 {1529, 10, 31}, /* 9 */ 1629 {1793, 11, 30}, /* 10 */ 1630 {2076, 12, 29}, /* 11 */ 1631 {2378, 13, 28}, /* 12 */ 1632 {2699, 14, 28}, /* 13 */ 1633 {3039, 15, 27}, /* 14 */ 1634 {3399, 16, 27}, /* 15 */ 1635 {3778, 17, 26}, /* 16 */ 1636 {4177, 18, 26}, /* 17 */ 1637 {4596, 19, 25}, /* 18 */ 1638 {5036, 20, 25}, /* 19 */ 1639 {5497, 21, 24}, /* 20 */ 1640 {5979, 22, 24}, /* 21 */ 1641 {6483, 23, 23}, /* 22 */ 1642 {7009, 24, 23}, /* 23 */ 1643 {7558, 25, 22}, /* 24 */ 1644 {8130, 26, 22}, /* 25 */ 1645 {8726, 27, 22}, /* 26 */ 1646 {9346, 28, 21}, /* 27 */ 1647 {9991, 29, 21}, /* 28 */ 1648 {10661, 30, 21}, /* 29 */ 1649 {11358, 31, 20}, /* 30 */ 1650 {12082, 32, 20}, /* 31 */ 1651 {12834, 33, 20}, /* 32 */ 1652 {13614, 34, 19}, /* 33 */ 1653 {14424, 35, 19}, /* 34 */ 1654 {15265, 36, 19}, /* 35 */ 1655 {16137, 37, 19}, /* 36 */ 1656 {17042, 38, 18}, /* 37 */ 1657 {17981, 39, 18}, /* 38 */ 1658 {18955, 40, 18}, /* 39 */ 1659 {19965, 41, 17}, /* 40 */ 1660 {21013, 42, 17}, /* 41 */ 1661 {22101, 43, 17}, /* 42 */ 1662 {23230, 44, 17}, /* 43 */ 1663 {24402, 45, 16}, /* 44 */ 1664 {25618, 46, 16}, /* 45 */ 1665 {26881, 47, 16}, /* 46 */ 1666 {28193, 48, 16}, /* 47 */ 1667 {29557, 49, 15}, /* 48 */ 1668 {30975, 50, 15}, /* 49 */ 1669 {32450, 51, 15}, /* 50 */ 1670 {33986, 52, 15}, /* 51 */ 1671 {35586, 53, 14}, /* 52 */ 1672 {37253, 54, 14}, /* 53 */ 1673 {38992, 55, 14}, /* 54 */ 1674 {40808, 56, 14}, /* 55 */ 1675 {42707, 57, 13}, /* 56 */ 1676 {44694, 58, 13}, /* 57 */ 1677 {46776, 59, 13}, /* 58 */ 1678 {48961, 60, 13}, /* 59 */ 1679 {51258, 61, 13}, /* 60 */ 1680 {53677, 62, 12}, /* 61 */ 1681 {56230, 63, 12}, /* 62 */ 1682 {58932, 64, 12}, /* 63 */ 1683 {61799, 65, 12}, /* 64 */ 1684 {64851, 66, 11}, /* 65 */ 1685 {68113, 67, 11}, /* 66 */ 1686 {71617, 68, 11}, /* 67 */ 1687 {75401, 69, 10}, /* 68 */ 1688 {79517, 70, 10}, /* 69 */ 1689 {84035, 71, 10}, /* 70 */ 1690 {89053, 72, 10}, /* 71 */ 1691 {94717, 73, 9} /* 72 */ 1692 }; 1693 1694 static void 1695 sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net) 1696 { 1697 int cur_val, i, indx, incr; 1698 1699 cur_val = net->cwnd >> 10; 1700 indx = SCTP_HS_TABLE_SIZE - 1; 1701 #ifdef SCTP_DEBUG 1702 printf("HS CC CAlled.\n"); 1703 #endif 1704 if (cur_val < sctp_cwnd_adjust[0].cwnd) { 1705 /* normal mode */ 1706 if (net->net_ack > net->mtu) { 1707 net->cwnd += net->mtu; 1708 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1709 sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS); 1710 } 1711 } else { 1712 net->cwnd += net->net_ack; 1713 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1714 sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS); 1715 } 1716 } 1717 } else { 1718 for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) { 1719 if (cur_val < sctp_cwnd_adjust[i].cwnd) { 1720 indx = i; 1721 break; 1722 } 1723 } 1724 net->last_hs_used = indx; 1725 incr = ((sctp_cwnd_adjust[indx].increase) << 10); 1726 net->cwnd += incr; 1727 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1728 sctp_log_cwnd(stcb, net, incr, SCTP_CWND_LOG_FROM_SS); 1729 } 1730 } 1731 } 1732 1733 static void 1734 sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net) 1735 { 1736 int cur_val, i, indx; 1737 int old_cwnd = net->cwnd; 1738 1739 cur_val = net->cwnd >> 10; 1740 if (cur_val < sctp_cwnd_adjust[0].cwnd) { 1741 /* normal mode */ 1742 net->ssthresh = net->cwnd / 2; 1743 if (net->ssthresh < (net->mtu * 2)) { 1744 net->ssthresh = 2 * net->mtu; 1745 } 1746 net->cwnd = net->ssthresh; 1747 } else { 1748 /* drop by the proper amount */ 1749 net->ssthresh = net->cwnd - (int)((net->cwnd / 100) * 1750 sctp_cwnd_adjust[net->last_hs_used].drop_percent); 1751 net->cwnd = net->ssthresh; 1752 /* now where are we */ 1753 indx = net->last_hs_used; 1754 cur_val = net->cwnd >> 10; 1755 /* reset where we are in the table */ 1756 if (cur_val < sctp_cwnd_adjust[0].cwnd) { 1757 /* feel out of hs */ 1758 net->last_hs_used = 0; 1759 } else { 1760 for (i = indx; i >= 1; i--) { 1761 if (cur_val > sctp_cwnd_adjust[i - 1].cwnd) { 1762 break; 1763 } 1764 } 1765 net->last_hs_used = indx; 1766 } 1767 } 1768 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1769 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR); 1770 } 1771 } 1772 1773 static void 1774 sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb, 1775 struct sctp_association *asoc) 1776 { 1777 struct sctp_nets *net; 1778 1779 /* 1780 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) && 1781 * (net->fast_retran_loss_recovery == 0))) 1782 */ 1783 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 1784 if ((asoc->fast_retran_loss_recovery == 0) || 1785 (asoc->sctp_cmt_on_off > 0)) { 1786 /* out of a RFC2582 Fast recovery window? */ 1787 if (net->net_ack > 0) { 1788 /* 1789 * per section 7.2.3, are there any 1790 * destinations that had a fast retransmit 1791 * to them. If so what we need to do is 1792 * adjust ssthresh and cwnd. 1793 */ 1794 struct sctp_tmit_chunk *lchk; 1795 1796 sctp_hs_cwnd_decrease(stcb, net); 1797 1798 lchk = TAILQ_FIRST(&asoc->send_queue); 1799 1800 net->partial_bytes_acked = 0; 1801 /* Turn on fast recovery window */ 1802 asoc->fast_retran_loss_recovery = 1; 1803 if (lchk == NULL) { 1804 /* Mark end of the window */ 1805 asoc->fast_recovery_tsn = asoc->sending_seq - 1; 1806 } else { 1807 asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; 1808 } 1809 1810 /* 1811 * CMT fast recovery -- per destination 1812 * recovery variable. 1813 */ 1814 net->fast_retran_loss_recovery = 1; 1815 1816 if (lchk == NULL) { 1817 /* Mark end of the window */ 1818 net->fast_recovery_tsn = asoc->sending_seq - 1; 1819 } else { 1820 net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; 1821 } 1822 1823 sctp_timer_stop(SCTP_TIMER_TYPE_SEND, 1824 stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); 1825 sctp_timer_start(SCTP_TIMER_TYPE_SEND, 1826 stcb->sctp_ep, stcb, net); 1827 } 1828 } else if (net->net_ack > 0) { 1829 /* 1830 * Mark a peg that we WOULD have done a cwnd 1831 * reduction but RFC2582 prevented this action. 1832 */ 1833 SCTP_STAT_INCR(sctps_fastretransinrtt); 1834 } 1835 } 1836 } 1837 1838 static void 1839 sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb, 1840 struct sctp_association *asoc, 1841 int accum_moved, int reneged_all, int will_exit) 1842 { 1843 struct sctp_nets *net; 1844 1845 /******************************/ 1846 /* update cwnd and Early FR */ 1847 /******************************/ 1848 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 1849 1850 #ifdef JANA_CMT_FAST_RECOVERY 1851 /* 1852 * CMT fast recovery code. Need to debug. 1853 */ 1854 if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { 1855 if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) || 1856 SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) { 1857 net->will_exit_fast_recovery = 1; 1858 } 1859 } 1860 #endif 1861 if (SCTP_BASE_SYSCTL(sctp_early_fr)) { 1862 /* 1863 * So, first of all do we need to have a Early FR 1864 * timer running? 1865 */ 1866 if ((!TAILQ_EMPTY(&asoc->sent_queue) && 1867 (net->ref_count > 1) && 1868 (net->flight_size < net->cwnd)) || 1869 (reneged_all)) { 1870 /* 1871 * yes, so in this case stop it if its 1872 * running, and then restart it. Reneging 1873 * all is a special case where we want to 1874 * run the Early FR timer and then force the 1875 * last few unacked to be sent, causing us 1876 * to illicit a sack with gaps to force out 1877 * the others. 1878 */ 1879 if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { 1880 SCTP_STAT_INCR(sctps_earlyfrstpidsck2); 1881 sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, 1882 SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); 1883 } 1884 SCTP_STAT_INCR(sctps_earlyfrstrid); 1885 sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); 1886 } else { 1887 /* No, stop it if its running */ 1888 if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { 1889 SCTP_STAT_INCR(sctps_earlyfrstpidsck3); 1890 sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, 1891 SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); 1892 } 1893 } 1894 } 1895 /* if nothing was acked on this destination skip it */ 1896 if (net->net_ack == 0) { 1897 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 1898 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); 1899 } 1900 continue; 1901 } 1902 if (net->net_ack2 > 0) { 1903 /* 1904 * Karn's rule applies to clearing error count, this 1905 * is optional. 1906 */ 1907 net->error_count = 0; 1908 if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == 1909 SCTP_ADDR_NOT_REACHABLE) { 1910 /* addr came good */ 1911 net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; 1912 net->dest_state |= SCTP_ADDR_REACHABLE; 1913 sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 1914 SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED); 1915 /* now was it the primary? if so restore */ 1916 if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { 1917 (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); 1918 } 1919 } 1920 /* 1921 * JRS 5/14/07 - If CMT PF is on and the destination 1922 * is in PF state, set the destination to active 1923 * state and set the cwnd to one or two MTU's based 1924 * on whether PF1 or PF2 is being used. 1925 * 1926 * Should we stop any running T3 timer here? 1927 */ 1928 if ((asoc->sctp_cmt_on_off > 0) && 1929 (asoc->sctp_cmt_pf > 0) && 1930 ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { 1931 net->dest_state &= ~SCTP_ADDR_PF; 1932 net->cwnd = net->mtu * asoc->sctp_cmt_pf; 1933 SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", 1934 net, net->cwnd); 1935 /* 1936 * Since the cwnd value is explicitly set, 1937 * skip the code that updates the cwnd 1938 * value. 1939 */ 1940 goto skip_cwnd_update; 1941 } 1942 } 1943 #ifdef JANA_CMT_FAST_RECOVERY 1944 /* 1945 * CMT fast recovery code 1946 */ 1947 /* 1948 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery 1949 * && net->will_exit_fast_recovery == 0) { @@@ Do something 1950 * } else if (sctp_cmt_on_off == 0 && 1951 * asoc->fast_retran_loss_recovery && will_exit == 0) { 1952 */ 1953 #endif 1954 1955 if (asoc->fast_retran_loss_recovery && 1956 (will_exit == 0) && 1957 (asoc->sctp_cmt_on_off == 0)) { 1958 /* 1959 * If we are in loss recovery we skip any cwnd 1960 * update 1961 */ 1962 goto skip_cwnd_update; 1963 } 1964 /* 1965 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has 1966 * moved. 1967 */ 1968 if (accum_moved || 1969 ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) { 1970 /* If the cumulative ack moved we can proceed */ 1971 if (net->cwnd <= net->ssthresh) { 1972 /* We are in slow start */ 1973 if (net->flight_size + net->net_ack >= net->cwnd) { 1974 1975 sctp_hs_cwnd_increase(stcb, net); 1976 1977 } else { 1978 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 1979 sctp_log_cwnd(stcb, net, net->net_ack, 1980 SCTP_CWND_LOG_NOADV_SS); 1981 } 1982 } 1983 } else { 1984 /* We are in congestion avoidance */ 1985 net->partial_bytes_acked += net->net_ack; 1986 if ((net->flight_size + net->net_ack >= net->cwnd) && 1987 (net->partial_bytes_acked >= net->cwnd)) { 1988 net->partial_bytes_acked -= net->cwnd; 1989 net->cwnd += net->mtu; 1990 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1991 sctp_log_cwnd(stcb, net, net->mtu, 1992 SCTP_CWND_LOG_FROM_CA); 1993 } 1994 } else { 1995 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 1996 sctp_log_cwnd(stcb, net, net->net_ack, 1997 SCTP_CWND_LOG_NOADV_CA); 1998 } 1999 } 2000 } 2001 } else { 2002 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 2003 sctp_log_cwnd(stcb, net, net->mtu, 2004 SCTP_CWND_LOG_NO_CUMACK); 2005 } 2006 } 2007 skip_cwnd_update: 2008 /* 2009 * NOW, according to Karn's rule do we need to restore the 2010 * RTO timer back? Check our net_ack2. If not set then we 2011 * have a ambiguity.. i.e. all data ack'd was sent to more 2012 * than one place. 2013 */ 2014 if (net->net_ack2) { 2015 /* restore any doubled timers */ 2016 net->RTO = (net->lastsa >> SCTP_RTT_SHIFT) + net->lastsv; 2017 if (net->RTO < stcb->asoc.minrto) { 2018 net->RTO = stcb->asoc.minrto; 2019 } 2020 if (net->RTO > stcb->asoc.maxrto) { 2021 net->RTO = stcb->asoc.maxrto; 2022 } 2023 } 2024 } 2025 } 2026 2027 2028 /* 2029 * H-TCP congestion control. The algorithm is detailed in: 2030 * R.N.Shorten, D.J.Leith: 2031 * "H-TCP: TCP for high-speed and long-distance networks" 2032 * Proc. PFLDnet, Argonne, 2004. 2033 * http://www.hamilton.ie/net/htcp3.pdf 2034 */ 2035 2036 2037 static int use_rtt_scaling = 1; 2038 static int use_bandwidth_switch = 1; 2039 2040 static inline int 2041 between(uint32_t seq1, uint32_t seq2, uint32_t seq3) 2042 { 2043 return seq3 - seq2 >= seq1 - seq2; 2044 } 2045 2046 static inline uint32_t 2047 htcp_cong_time(struct htcp *ca) 2048 { 2049 return sctp_get_tick_count() - ca->last_cong; 2050 } 2051 2052 static inline uint32_t 2053 htcp_ccount(struct htcp *ca) 2054 { 2055 return htcp_cong_time(ca) / ca->minRTT; 2056 } 2057 2058 static inline void 2059 htcp_reset(struct htcp *ca) 2060 { 2061 ca->undo_last_cong = ca->last_cong; 2062 ca->undo_maxRTT = ca->maxRTT; 2063 ca->undo_old_maxB = ca->old_maxB; 2064 ca->last_cong = sctp_get_tick_count(); 2065 } 2066 2067 #ifdef SCTP_NOT_USED 2068 2069 static uint32_t 2070 htcp_cwnd_undo(struct sctp_tcb *stcb, struct sctp_nets *net) 2071 { 2072 net->cc_mod.htcp_ca.last_cong = net->cc_mod.htcp_ca.undo_last_cong; 2073 net->cc_mod.htcp_ca.maxRTT = net->cc_mod.htcp_ca.undo_maxRTT; 2074 net->cc_mod.htcp_ca.old_maxB = net->cc_mod.htcp_ca.undo_old_maxB; 2075 return max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->cc_mod.htcp_ca.beta) * net->mtu); 2076 } 2077 2078 #endif 2079 2080 static inline void 2081 measure_rtt(struct sctp_tcb *stcb, struct sctp_nets *net) 2082 { 2083 uint32_t srtt = net->lastsa >> SCTP_RTT_SHIFT; 2084 2085 /* keep track of minimum RTT seen so far, minRTT is zero at first */ 2086 if (net->cc_mod.htcp_ca.minRTT > srtt || !net->cc_mod.htcp_ca.minRTT) 2087 net->cc_mod.htcp_ca.minRTT = srtt; 2088 2089 /* max RTT */ 2090 if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->cc_mod.htcp_ca) > 3) { 2091 if (net->cc_mod.htcp_ca.maxRTT < net->cc_mod.htcp_ca.minRTT) 2092 net->cc_mod.htcp_ca.maxRTT = net->cc_mod.htcp_ca.minRTT; 2093 if (net->cc_mod.htcp_ca.maxRTT < srtt && srtt <= net->cc_mod.htcp_ca.maxRTT + MSEC_TO_TICKS(20)) 2094 net->cc_mod.htcp_ca.maxRTT = srtt; 2095 } 2096 } 2097 2098 static void 2099 measure_achieved_throughput(struct sctp_tcb *stcb, struct sctp_nets *net) 2100 { 2101 uint32_t now = sctp_get_tick_count(); 2102 2103 if (net->fast_retran_ip == 0) 2104 net->cc_mod.htcp_ca.bytes_acked = net->net_ack; 2105 2106 if (!use_bandwidth_switch) 2107 return; 2108 2109 /* achieved throughput calculations */ 2110 /* JRS - not 100% sure of this statement */ 2111 if (net->fast_retran_ip == 1) { 2112 net->cc_mod.htcp_ca.bytecount = 0; 2113 net->cc_mod.htcp_ca.lasttime = now; 2114 return; 2115 } 2116 net->cc_mod.htcp_ca.bytecount += net->net_ack; 2117 2118 if (net->cc_mod.htcp_ca.bytecount >= net->cwnd - ((net->cc_mod.htcp_ca.alpha >> 7 ? : 1) * net->mtu) 2119 && now - net->cc_mod.htcp_ca.lasttime >= net->cc_mod.htcp_ca.minRTT 2120 && net->cc_mod.htcp_ca.minRTT > 0) { 2121 uint32_t cur_Bi = net->cc_mod.htcp_ca.bytecount / net->mtu * hz / (now - net->cc_mod.htcp_ca.lasttime); 2122 2123 if (htcp_ccount(&net->cc_mod.htcp_ca) <= 3) { 2124 /* just after backoff */ 2125 net->cc_mod.htcp_ca.minB = net->cc_mod.htcp_ca.maxB = net->cc_mod.htcp_ca.Bi = cur_Bi; 2126 } else { 2127 net->cc_mod.htcp_ca.Bi = (3 * net->cc_mod.htcp_ca.Bi + cur_Bi) / 4; 2128 if (net->cc_mod.htcp_ca.Bi > net->cc_mod.htcp_ca.maxB) 2129 net->cc_mod.htcp_ca.maxB = net->cc_mod.htcp_ca.Bi; 2130 if (net->cc_mod.htcp_ca.minB > net->cc_mod.htcp_ca.maxB) 2131 net->cc_mod.htcp_ca.minB = net->cc_mod.htcp_ca.maxB; 2132 } 2133 net->cc_mod.htcp_ca.bytecount = 0; 2134 net->cc_mod.htcp_ca.lasttime = now; 2135 } 2136 } 2137 2138 static inline void 2139 htcp_beta_update(struct htcp *ca, uint32_t minRTT, uint32_t maxRTT) 2140 { 2141 if (use_bandwidth_switch) { 2142 uint32_t maxB = ca->maxB; 2143 uint32_t old_maxB = ca->old_maxB; 2144 2145 ca->old_maxB = ca->maxB; 2146 2147 if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) { 2148 ca->beta = BETA_MIN; 2149 ca->modeswitch = 0; 2150 return; 2151 } 2152 } 2153 if (ca->modeswitch && minRTT > (uint32_t) MSEC_TO_TICKS(10) && maxRTT) { 2154 ca->beta = (minRTT << 7) / maxRTT; 2155 if (ca->beta < BETA_MIN) 2156 ca->beta = BETA_MIN; 2157 else if (ca->beta > BETA_MAX) 2158 ca->beta = BETA_MAX; 2159 } else { 2160 ca->beta = BETA_MIN; 2161 ca->modeswitch = 1; 2162 } 2163 } 2164 2165 static inline void 2166 htcp_alpha_update(struct htcp *ca) 2167 { 2168 uint32_t minRTT = ca->minRTT; 2169 uint32_t factor = 1; 2170 uint32_t diff = htcp_cong_time(ca); 2171 2172 if (diff > (uint32_t) hz) { 2173 diff -= hz; 2174 factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / hz)) / hz; 2175 } 2176 if (use_rtt_scaling && minRTT) { 2177 uint32_t scale = (hz << 3) / (10 * minRTT); 2178 2179 scale = min(max(scale, 1U << 2), 10U << 3); /* clamping ratio to 2180 * interval [0.5,10]<<3 */ 2181 factor = (factor << 3) / scale; 2182 if (!factor) 2183 factor = 1; 2184 } 2185 ca->alpha = 2 * factor * ((1 << 7) - ca->beta); 2186 if (!ca->alpha) 2187 ca->alpha = ALPHA_BASE; 2188 } 2189 2190 /* After we have the rtt data to calculate beta, we'd still prefer to wait one 2191 * rtt before we adjust our beta to ensure we are working from a consistent 2192 * data. 2193 * 2194 * This function should be called when we hit a congestion event since only at 2195 * that point do we really have a real sense of maxRTT (the queues en route 2196 * were getting just too full now). 2197 */ 2198 static void 2199 htcp_param_update(struct sctp_tcb *stcb, struct sctp_nets *net) 2200 { 2201 uint32_t minRTT = net->cc_mod.htcp_ca.minRTT; 2202 uint32_t maxRTT = net->cc_mod.htcp_ca.maxRTT; 2203 2204 htcp_beta_update(&net->cc_mod.htcp_ca, minRTT, maxRTT); 2205 htcp_alpha_update(&net->cc_mod.htcp_ca); 2206 2207 /* 2208 * add slowly fading memory for maxRTT to accommodate routing 2209 * changes etc 2210 */ 2211 if (minRTT > 0 && maxRTT > minRTT) 2212 net->cc_mod.htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100; 2213 } 2214 2215 static uint32_t 2216 htcp_recalc_ssthresh(struct sctp_tcb *stcb, struct sctp_nets *net) 2217 { 2218 htcp_param_update(stcb, net); 2219 return max(((net->cwnd / net->mtu * net->cc_mod.htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu); 2220 } 2221 2222 static void 2223 htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net) 2224 { 2225 /*- 2226 * How to handle these functions? 2227 * if (!tcp_is_cwnd_limited(sk, in_flight)) RRS - good question. 2228 * return; 2229 */ 2230 if (net->cwnd <= net->ssthresh) { 2231 /* We are in slow start */ 2232 if (net->flight_size + net->net_ack >= net->cwnd) { 2233 if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) { 2234 net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)); 2235 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 2236 sctp_log_cwnd(stcb, net, net->mtu, 2237 SCTP_CWND_LOG_FROM_SS); 2238 } 2239 } else { 2240 net->cwnd += net->net_ack; 2241 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 2242 sctp_log_cwnd(stcb, net, net->net_ack, 2243 SCTP_CWND_LOG_FROM_SS); 2244 } 2245 } 2246 } else { 2247 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 2248 sctp_log_cwnd(stcb, net, net->net_ack, 2249 SCTP_CWND_LOG_NOADV_SS); 2250 } 2251 } 2252 } else { 2253 measure_rtt(stcb, net); 2254 2255 /* 2256 * In dangerous area, increase slowly. In theory this is 2257 * net->cwnd += alpha / net->cwnd 2258 */ 2259 /* What is snd_cwnd_cnt?? */ 2260 if (((net->partial_bytes_acked / net->mtu * net->cc_mod.htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) { 2261 /*- 2262 * Does SCTP have a cwnd clamp? 2263 * if (net->snd_cwnd < net->snd_cwnd_clamp) - Nope (RRS). 2264 */ 2265 net->cwnd += net->mtu; 2266 net->partial_bytes_acked = 0; 2267 htcp_alpha_update(&net->cc_mod.htcp_ca); 2268 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 2269 sctp_log_cwnd(stcb, net, net->mtu, 2270 SCTP_CWND_LOG_FROM_CA); 2271 } 2272 } else { 2273 net->partial_bytes_acked += net->net_ack; 2274 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 2275 sctp_log_cwnd(stcb, net, net->net_ack, 2276 SCTP_CWND_LOG_NOADV_CA); 2277 } 2278 } 2279 2280 net->cc_mod.htcp_ca.bytes_acked = net->mtu; 2281 } 2282 } 2283 2284 #ifdef SCTP_NOT_USED 2285 /* Lower bound on congestion window. */ 2286 static uint32_t 2287 htcp_min_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net) 2288 { 2289 return net->ssthresh; 2290 } 2291 2292 #endif 2293 2294 static void 2295 htcp_init(struct sctp_tcb *stcb, struct sctp_nets *net) 2296 { 2297 memset(&net->cc_mod.htcp_ca, 0, sizeof(struct htcp)); 2298 net->cc_mod.htcp_ca.alpha = ALPHA_BASE; 2299 net->cc_mod.htcp_ca.beta = BETA_MIN; 2300 net->cc_mod.htcp_ca.bytes_acked = net->mtu; 2301 net->cc_mod.htcp_ca.last_cong = sctp_get_tick_count(); 2302 } 2303 2304 static void 2305 sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) 2306 { 2307 /* 2308 * We take the max of the burst limit times a MTU or the 2309 * INITIAL_CWND. We then limit this to 4 MTU's of sending. 2310 */ 2311 net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); 2312 net->ssthresh = stcb->asoc.peers_rwnd; 2313 htcp_init(stcb, net); 2314 2315 if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { 2316 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION); 2317 } 2318 } 2319 2320 static void 2321 sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb, 2322 struct sctp_association *asoc, 2323 int accum_moved, int reneged_all, int will_exit) 2324 { 2325 struct sctp_nets *net; 2326 2327 /******************************/ 2328 /* update cwnd and Early FR */ 2329 /******************************/ 2330 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 2331 2332 #ifdef JANA_CMT_FAST_RECOVERY 2333 /* 2334 * CMT fast recovery code. Need to debug. 2335 */ 2336 if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { 2337 if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) || 2338 SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) { 2339 net->will_exit_fast_recovery = 1; 2340 } 2341 } 2342 #endif 2343 if (SCTP_BASE_SYSCTL(sctp_early_fr)) { 2344 /* 2345 * So, first of all do we need to have a Early FR 2346 * timer running? 2347 */ 2348 if ((!TAILQ_EMPTY(&asoc->sent_queue) && 2349 (net->ref_count > 1) && 2350 (net->flight_size < net->cwnd)) || 2351 (reneged_all)) { 2352 /* 2353 * yes, so in this case stop it if its 2354 * running, and then restart it. Reneging 2355 * all is a special case where we want to 2356 * run the Early FR timer and then force the 2357 * last few unacked to be sent, causing us 2358 * to illicit a sack with gaps to force out 2359 * the others. 2360 */ 2361 if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { 2362 SCTP_STAT_INCR(sctps_earlyfrstpidsck2); 2363 sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, 2364 SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); 2365 } 2366 SCTP_STAT_INCR(sctps_earlyfrstrid); 2367 sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); 2368 } else { 2369 /* No, stop it if its running */ 2370 if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { 2371 SCTP_STAT_INCR(sctps_earlyfrstpidsck3); 2372 sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, 2373 SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); 2374 } 2375 } 2376 } 2377 /* if nothing was acked on this destination skip it */ 2378 if (net->net_ack == 0) { 2379 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 2380 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); 2381 } 2382 continue; 2383 } 2384 if (net->net_ack2 > 0) { 2385 /* 2386 * Karn's rule applies to clearing error count, this 2387 * is optional. 2388 */ 2389 net->error_count = 0; 2390 if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == 2391 SCTP_ADDR_NOT_REACHABLE) { 2392 /* addr came good */ 2393 net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; 2394 net->dest_state |= SCTP_ADDR_REACHABLE; 2395 sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 2396 SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED); 2397 /* now was it the primary? if so restore */ 2398 if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { 2399 (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); 2400 } 2401 } 2402 /* 2403 * JRS 5/14/07 - If CMT PF is on and the destination 2404 * is in PF state, set the destination to active 2405 * state and set the cwnd to one or two MTU's based 2406 * on whether PF1 or PF2 is being used. 2407 * 2408 * Should we stop any running T3 timer here? 2409 */ 2410 if ((asoc->sctp_cmt_on_off > 0) && 2411 (asoc->sctp_cmt_pf > 0) && 2412 ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { 2413 net->dest_state &= ~SCTP_ADDR_PF; 2414 net->cwnd = net->mtu * asoc->sctp_cmt_pf; 2415 SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", 2416 net, net->cwnd); 2417 /* 2418 * Since the cwnd value is explicitly set, 2419 * skip the code that updates the cwnd 2420 * value. 2421 */ 2422 goto skip_cwnd_update; 2423 } 2424 } 2425 #ifdef JANA_CMT_FAST_RECOVERY 2426 /* 2427 * CMT fast recovery code 2428 */ 2429 /* 2430 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery 2431 * && net->will_exit_fast_recovery == 0) { @@@ Do something 2432 * } else if (sctp_cmt_on_off == 0 && 2433 * asoc->fast_retran_loss_recovery && will_exit == 0) { 2434 */ 2435 #endif 2436 2437 if (asoc->fast_retran_loss_recovery && 2438 will_exit == 0 && 2439 (asoc->sctp_cmt_on_off == 0)) { 2440 /* 2441 * If we are in loss recovery we skip any cwnd 2442 * update 2443 */ 2444 goto skip_cwnd_update; 2445 } 2446 /* 2447 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has 2448 * moved. 2449 */ 2450 if (accum_moved || 2451 ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) { 2452 htcp_cong_avoid(stcb, net); 2453 measure_achieved_throughput(stcb, net); 2454 } else { 2455 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 2456 sctp_log_cwnd(stcb, net, net->mtu, 2457 SCTP_CWND_LOG_NO_CUMACK); 2458 } 2459 } 2460 skip_cwnd_update: 2461 /* 2462 * NOW, according to Karn's rule do we need to restore the 2463 * RTO timer back? Check our net_ack2. If not set then we 2464 * have a ambiguity.. i.e. all data ack'd was sent to more 2465 * than one place. 2466 */ 2467 if (net->net_ack2) { 2468 /* restore any doubled timers */ 2469 net->RTO = (net->lastsa >> SCTP_RTT_SHIFT) + net->lastsv; 2470 if (net->RTO < stcb->asoc.minrto) { 2471 net->RTO = stcb->asoc.minrto; 2472 } 2473 if (net->RTO > stcb->asoc.maxrto) { 2474 net->RTO = stcb->asoc.maxrto; 2475 } 2476 } 2477 } 2478 } 2479 2480 static void 2481 sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb, 2482 struct sctp_association *asoc) 2483 { 2484 struct sctp_nets *net; 2485 2486 /* 2487 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) && 2488 * (net->fast_retran_loss_recovery == 0))) 2489 */ 2490 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 2491 if ((asoc->fast_retran_loss_recovery == 0) || 2492 (asoc->sctp_cmt_on_off > 0)) { 2493 /* out of a RFC2582 Fast recovery window? */ 2494 if (net->net_ack > 0) { 2495 /* 2496 * per section 7.2.3, are there any 2497 * destinations that had a fast retransmit 2498 * to them. If so what we need to do is 2499 * adjust ssthresh and cwnd. 2500 */ 2501 struct sctp_tmit_chunk *lchk; 2502 int old_cwnd = net->cwnd; 2503 2504 /* JRS - reset as if state were changed */ 2505 htcp_reset(&net->cc_mod.htcp_ca); 2506 net->ssthresh = htcp_recalc_ssthresh(stcb, net); 2507 net->cwnd = net->ssthresh; 2508 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 2509 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), 2510 SCTP_CWND_LOG_FROM_FR); 2511 } 2512 lchk = TAILQ_FIRST(&asoc->send_queue); 2513 2514 net->partial_bytes_acked = 0; 2515 /* Turn on fast recovery window */ 2516 asoc->fast_retran_loss_recovery = 1; 2517 if (lchk == NULL) { 2518 /* Mark end of the window */ 2519 asoc->fast_recovery_tsn = asoc->sending_seq - 1; 2520 } else { 2521 asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; 2522 } 2523 2524 /* 2525 * CMT fast recovery -- per destination 2526 * recovery variable. 2527 */ 2528 net->fast_retran_loss_recovery = 1; 2529 2530 if (lchk == NULL) { 2531 /* Mark end of the window */ 2532 net->fast_recovery_tsn = asoc->sending_seq - 1; 2533 } else { 2534 net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; 2535 } 2536 2537 sctp_timer_stop(SCTP_TIMER_TYPE_SEND, 2538 stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); 2539 sctp_timer_start(SCTP_TIMER_TYPE_SEND, 2540 stcb->sctp_ep, stcb, net); 2541 } 2542 } else if (net->net_ack > 0) { 2543 /* 2544 * Mark a peg that we WOULD have done a cwnd 2545 * reduction but RFC2582 prevented this action. 2546 */ 2547 SCTP_STAT_INCR(sctps_fastretransinrtt); 2548 } 2549 } 2550 } 2551 2552 static void 2553 sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb, 2554 struct sctp_nets *net) 2555 { 2556 int old_cwnd = net->cwnd; 2557 2558 /* JRS - reset as if the state were being changed to timeout */ 2559 htcp_reset(&net->cc_mod.htcp_ca); 2560 net->ssthresh = htcp_recalc_ssthresh(stcb, net); 2561 net->cwnd = net->mtu; 2562 net->partial_bytes_acked = 0; 2563 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 2564 sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX); 2565 } 2566 } 2567 2568 static void 2569 sctp_htcp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp, 2570 struct sctp_tcb *stcb, struct sctp_nets *net) 2571 { 2572 int old_cwnd; 2573 2574 old_cwnd = net->cwnd; 2575 2576 sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED); 2577 net->cc_mod.htcp_ca.last_cong = sctp_get_tick_count(); 2578 /* 2579 * make a small adjustment to cwnd and force to CA. 2580 */ 2581 if (net->cwnd > net->mtu) 2582 /* drop down one MTU after sending */ 2583 net->cwnd -= net->mtu; 2584 if (net->cwnd < net->ssthresh) 2585 /* still in SS move to CA */ 2586 net->ssthresh = net->cwnd - 1; 2587 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 2588 sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR); 2589 } 2590 } 2591 2592 static void 2593 sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, 2594 struct sctp_nets *net, int in_window, int num_pkt_lost) 2595 { 2596 int old_cwnd; 2597 2598 old_cwnd = net->cwnd; 2599 2600 /* JRS - reset hctp as if state changed */ 2601 if (in_window == 0) { 2602 htcp_reset(&net->cc_mod.htcp_ca); 2603 SCTP_STAT_INCR(sctps_ecnereducedcwnd); 2604 net->ssthresh = htcp_recalc_ssthresh(stcb, net); 2605 if (net->ssthresh < net->mtu) { 2606 net->ssthresh = net->mtu; 2607 /* here back off the timer as well, to slow us down */ 2608 net->RTO <<= 1; 2609 } 2610 net->cwnd = net->ssthresh; 2611 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 2612 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); 2613 } 2614 } 2615 } 2616 2617 struct sctp_cc_functions sctp_cc_functions[] = { 2618 { 2619 .sctp_set_initial_cc_param = sctp_set_initial_cc_param, 2620 .sctp_cwnd_update_after_sack = sctp_cwnd_update_after_sack, 2621 .sctp_cwnd_update_after_fr = sctp_cwnd_update_after_fr, 2622 .sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout, 2623 .sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_after_ecn_echo, 2624 .sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped, 2625 .sctp_cwnd_update_after_output = sctp_cwnd_update_after_output, 2626 .sctp_cwnd_update_after_fr_timer = sctp_cwnd_update_after_fr_timer 2627 }, 2628 { 2629 .sctp_set_initial_cc_param = sctp_set_initial_cc_param, 2630 .sctp_cwnd_update_after_sack = sctp_hs_cwnd_update_after_sack, 2631 .sctp_cwnd_update_after_fr = sctp_hs_cwnd_update_after_fr, 2632 .sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout, 2633 .sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_after_ecn_echo, 2634 .sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped, 2635 .sctp_cwnd_update_after_output = sctp_cwnd_update_after_output, 2636 .sctp_cwnd_update_after_fr_timer = sctp_cwnd_update_after_fr_timer 2637 }, 2638 { 2639 .sctp_set_initial_cc_param = sctp_htcp_set_initial_cc_param, 2640 .sctp_cwnd_update_after_sack = sctp_htcp_cwnd_update_after_sack, 2641 .sctp_cwnd_update_after_fr = sctp_htcp_cwnd_update_after_fr, 2642 .sctp_cwnd_update_after_timeout = sctp_htcp_cwnd_update_after_timeout, 2643 .sctp_cwnd_update_after_ecn_echo = sctp_htcp_cwnd_update_after_ecn_echo, 2644 .sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped, 2645 .sctp_cwnd_update_after_output = sctp_cwnd_update_after_output, 2646 .sctp_cwnd_update_after_fr_timer = sctp_htcp_cwnd_update_after_fr_timer 2647 }, 2648 { 2649 .sctp_set_initial_cc_param = sctp_set_rtcc_initial_cc_param, 2650 .sctp_cwnd_update_after_sack = sctp_cwnd_update_rtcc_after_sack, 2651 .sctp_cwnd_update_after_fr = sctp_cwnd_update_after_fr, 2652 .sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout, 2653 .sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_rtcc_after_ecn_echo, 2654 .sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped, 2655 .sctp_cwnd_update_after_output = sctp_cwnd_update_after_output, 2656 .sctp_cwnd_update_after_fr_timer = sctp_cwnd_update_after_fr_timer, 2657 .sctp_cwnd_update_packet_transmitted = sctp_cwnd_update_rtcc_packet_transmitted, 2658 .sctp_cwnd_update_tsn_acknowledged = sctp_cwnd_update_rtcc_tsn_acknowledged, 2659 .sctp_cwnd_new_transmission_begins = sctp_cwnd_new_rtcc_transmission_begins, 2660 .sctp_cwnd_prepare_net_for_sack = sctp_cwnd_prepare_rtcc_net_for_sack, 2661 .sctp_cwnd_socket_option = sctp_cwnd_rtcc_socket_option, 2662 .sctp_rtt_calculated = sctp_rtt_rtcc_calculated 2663 } 2664 }; 2665