1 /*- 2 * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * 7 * a) Redistributions of source code must retain the above copyright notice, 8 * this list of conditions and the following disclaimer. 9 * 10 * b) Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the distribution. 13 * 14 * c) Neither the name of Cisco Systems, Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived 16 * from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 20 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 28 * THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <netinet/sctp_os.h> 32 #include <netinet/sctp_var.h> 33 #include <netinet/sctp_sysctl.h> 34 #include <netinet/sctp_pcb.h> 35 #include <netinet/sctp_header.h> 36 #include <netinet/sctputil.h> 37 #include <netinet/sctp_output.h> 38 #include <netinet/sctp_input.h> 39 #include <netinet/sctp_indata.h> 40 #include <netinet/sctp_uio.h> 41 #include <netinet/sctp_timer.h> 42 #include <netinet/sctp_auth.h> 43 #include <netinet/sctp_asconf.h> 44 #include <netinet/sctp_cc_functions.h> 45 #include <netinet/sctp_dtrace_declare.h> 46 #include <sys/cdefs.h> 47 __FBSDID("$FreeBSD$"); 48 49 void 50 sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) 51 { 52 struct sctp_association *assoc; 53 uint32_t cwnd_in_mtu; 54 55 assoc = &stcb->asoc; 56 /* 57 * We take the minimum of the burst limit and the initial congestion 58 * window. The initial congestion window is at least two times the 59 * MTU. 60 */ 61 cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd); 62 if ((assoc->max_burst > 0) && (cwnd_in_mtu > assoc->max_burst)) 63 cwnd_in_mtu = assoc->max_burst; 64 net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu; 65 net->ssthresh = assoc->peers_rwnd; 66 67 SDT_PROBE(sctp, cwnd, net, init, 68 stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, 69 0, net->cwnd); 70 if (SCTP_BASE_SYSCTL(sctp_logging_level) & 71 (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { 72 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION); 73 } 74 } 75 76 void 77 sctp_cwnd_update_after_fr(struct sctp_tcb *stcb, 78 struct sctp_association *asoc) 79 { 80 struct sctp_nets *net; 81 82 /*- 83 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) && 84 * (net->fast_retran_loss_recovery == 0))) 85 */ 86 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 87 if ((asoc->fast_retran_loss_recovery == 0) || 88 (asoc->sctp_cmt_on_off > 0)) { 89 /* out of a RFC2582 Fast recovery window? */ 90 if (net->net_ack > 0) { 91 /* 92 * per section 7.2.3, are there any 93 * destinations that had a fast retransmit 94 * to them. If so what we need to do is 95 * adjust ssthresh and cwnd. 96 */ 97 struct sctp_tmit_chunk *lchk; 98 int old_cwnd = net->cwnd; 99 100 net->ssthresh = net->cwnd / 2; 101 if (net->ssthresh < (net->mtu * 2)) { 102 net->ssthresh = 2 * net->mtu; 103 } 104 net->cwnd = net->ssthresh; 105 SDT_PROBE(sctp, cwnd, net, fr, 106 stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, 107 old_cwnd, net->cwnd); 108 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 109 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), 110 SCTP_CWND_LOG_FROM_FR); 111 } 112 lchk = TAILQ_FIRST(&asoc->send_queue); 113 114 net->partial_bytes_acked = 0; 115 /* Turn on fast recovery window */ 116 asoc->fast_retran_loss_recovery = 1; 117 if (lchk == NULL) { 118 /* Mark end of the window */ 119 asoc->fast_recovery_tsn = asoc->sending_seq - 1; 120 } else { 121 asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; 122 } 123 124 /* 125 * CMT fast recovery -- per destination 126 * recovery variable. 127 */ 128 net->fast_retran_loss_recovery = 1; 129 130 if (lchk == NULL) { 131 /* Mark end of the window */ 132 net->fast_recovery_tsn = asoc->sending_seq - 1; 133 } else { 134 net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; 135 } 136 137 /* 138 * Disable Nonce Sum Checking and store the 139 * resync tsn 140 */ 141 asoc->nonce_sum_check = 0; 142 asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; 143 144 sctp_timer_stop(SCTP_TIMER_TYPE_SEND, 145 stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); 146 sctp_timer_start(SCTP_TIMER_TYPE_SEND, 147 stcb->sctp_ep, stcb, net); 148 } 149 } else if (net->net_ack > 0) { 150 /* 151 * Mark a peg that we WOULD have done a cwnd 152 * reduction but RFC2582 prevented this action. 153 */ 154 SCTP_STAT_INCR(sctps_fastretransinrtt); 155 } 156 } 157 } 158 159 void 160 sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, 161 struct sctp_association *asoc, 162 int accum_moved, int reneged_all, int will_exit) 163 { 164 struct sctp_nets *net; 165 int old_cwnd; 166 167 /******************************/ 168 /* update cwnd and Early FR */ 169 /******************************/ 170 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 171 172 #ifdef JANA_CMT_FAST_RECOVERY 173 /* 174 * CMT fast recovery code. Need to debug. 175 */ 176 if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { 177 if (compare_with_wrap(asoc->last_acked_seq, 178 net->fast_recovery_tsn, MAX_TSN) || 179 (asoc->last_acked_seq == net->fast_recovery_tsn) || 180 compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || 181 (net->pseudo_cumack == net->fast_recovery_tsn)) { 182 net->will_exit_fast_recovery = 1; 183 } 184 } 185 #endif 186 if (SCTP_BASE_SYSCTL(sctp_early_fr)) { 187 /* 188 * So, first of all do we need to have a Early FR 189 * timer running? 190 */ 191 if ((!TAILQ_EMPTY(&asoc->sent_queue) && 192 (net->ref_count > 1) && 193 (net->flight_size < net->cwnd)) || 194 (reneged_all)) { 195 /* 196 * yes, so in this case stop it if its 197 * running, and then restart it. Reneging 198 * all is a special case where we want to 199 * run the Early FR timer and then force the 200 * last few unacked to be sent, causing us 201 * to illicit a sack with gaps to force out 202 * the others. 203 */ 204 if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { 205 SCTP_STAT_INCR(sctps_earlyfrstpidsck2); 206 sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, 207 SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); 208 } 209 SCTP_STAT_INCR(sctps_earlyfrstrid); 210 sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); 211 } else { 212 /* No, stop it if its running */ 213 if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { 214 SCTP_STAT_INCR(sctps_earlyfrstpidsck3); 215 sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, 216 SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); 217 } 218 } 219 } 220 /* if nothing was acked on this destination skip it */ 221 if (net->net_ack == 0) { 222 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 223 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); 224 } 225 continue; 226 } 227 if (net->net_ack2 > 0) { 228 /* 229 * Karn's rule applies to clearing error count, this 230 * is optional. 231 */ 232 net->error_count = 0; 233 if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == 234 SCTP_ADDR_NOT_REACHABLE) { 235 /* addr came good */ 236 net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; 237 net->dest_state |= SCTP_ADDR_REACHABLE; 238 sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 239 SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED); 240 /* now was it the primary? if so restore */ 241 if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { 242 (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); 243 } 244 } 245 /* 246 * JRS 5/14/07 - If CMT PF is on and the destination 247 * is in PF state, set the destination to active 248 * state and set the cwnd to one or two MTU's based 249 * on whether PF1 or PF2 is being used. 250 * 251 * Should we stop any running T3 timer here? 252 */ 253 if ((asoc->sctp_cmt_on_off > 0) && 254 (asoc->sctp_cmt_pf > 0) && 255 ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { 256 net->dest_state &= ~SCTP_ADDR_PF; 257 old_cwnd = net->cwnd; 258 net->cwnd = net->mtu * asoc->sctp_cmt_pf; 259 SDT_PROBE(sctp, cwnd, net, ack, 260 stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, 261 old_cwnd, net->cwnd); 262 SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", 263 net, net->cwnd); 264 /* 265 * Since the cwnd value is explicitly set, 266 * skip the code that updates the cwnd 267 * value. 268 */ 269 goto skip_cwnd_update; 270 } 271 } 272 #ifdef JANA_CMT_FAST_RECOVERY 273 /* 274 * CMT fast recovery code 275 */ 276 /* 277 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery 278 * && net->will_exit_fast_recovery == 0) { @@@ Do something 279 * } else if (sctp_cmt_on_off == 0 && 280 * asoc->fast_retran_loss_recovery && will_exit == 0) { 281 */ 282 #endif 283 284 if (asoc->fast_retran_loss_recovery && 285 (will_exit == 0) && 286 (asoc->sctp_cmt_on_off == 0)) { 287 /* 288 * If we are in loss recovery we skip any cwnd 289 * update 290 */ 291 goto skip_cwnd_update; 292 } 293 /* 294 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has 295 * moved. 296 */ 297 if (accum_moved || 298 ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) { 299 /* If the cumulative ack moved we can proceed */ 300 if (net->cwnd <= net->ssthresh) { 301 /* We are in slow start */ 302 if (net->flight_size + net->net_ack >= net->cwnd) { 303 if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) { 304 old_cwnd = net->cwnd; 305 net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)); 306 SDT_PROBE(sctp, cwnd, net, ack, 307 stcb->asoc.my_vtag, 308 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 309 net, 310 old_cwnd, net->cwnd); 311 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 312 sctp_log_cwnd(stcb, net, net->mtu, 313 SCTP_CWND_LOG_FROM_SS); 314 } 315 } else { 316 old_cwnd = net->cwnd; 317 net->cwnd += net->net_ack; 318 SDT_PROBE(sctp, cwnd, net, ack, 319 stcb->asoc.my_vtag, 320 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 321 net, 322 old_cwnd, net->cwnd); 323 324 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 325 sctp_log_cwnd(stcb, net, net->net_ack, 326 SCTP_CWND_LOG_FROM_SS); 327 } 328 } 329 } else { 330 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 331 sctp_log_cwnd(stcb, net, net->net_ack, 332 SCTP_CWND_LOG_NOADV_SS); 333 } 334 } 335 } else { 336 /* We are in congestion avoidance */ 337 /* 338 * Add to pba 339 */ 340 net->partial_bytes_acked += net->net_ack; 341 342 if ((net->flight_size + net->net_ack >= net->cwnd) && 343 (net->partial_bytes_acked >= net->cwnd)) { 344 net->partial_bytes_acked -= net->cwnd; 345 old_cwnd = net->cwnd; 346 net->cwnd += net->mtu; 347 SDT_PROBE(sctp, cwnd, net, ack, 348 stcb->asoc.my_vtag, 349 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 350 net, 351 old_cwnd, net->cwnd); 352 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 353 sctp_log_cwnd(stcb, net, net->mtu, 354 SCTP_CWND_LOG_FROM_CA); 355 } 356 } else { 357 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 358 sctp_log_cwnd(stcb, net, net->net_ack, 359 SCTP_CWND_LOG_NOADV_CA); 360 } 361 } 362 } 363 } else { 364 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 365 sctp_log_cwnd(stcb, net, net->mtu, 366 SCTP_CWND_LOG_NO_CUMACK); 367 } 368 } 369 skip_cwnd_update: 370 /* 371 * NOW, according to Karn's rule do we need to restore the 372 * RTO timer back? Check our net_ack2. If not set then we 373 * have a ambiguity.. i.e. all data ack'd was sent to more 374 * than one place. 375 */ 376 if (net->net_ack2) { 377 /* restore any doubled timers */ 378 net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; 379 if (net->RTO < stcb->asoc.minrto) { 380 net->RTO = stcb->asoc.minrto; 381 } 382 if (net->RTO > stcb->asoc.maxrto) { 383 net->RTO = stcb->asoc.maxrto; 384 } 385 } 386 } 387 } 388 389 void 390 sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net) 391 { 392 int old_cwnd = net->cwnd; 393 394 net->ssthresh = max(net->cwnd / 2, 4 * net->mtu); 395 net->cwnd = net->mtu; 396 net->partial_bytes_acked = 0; 397 SDT_PROBE(sctp, cwnd, net, to, 398 stcb->asoc.my_vtag, 399 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 400 net, 401 old_cwnd, net->cwnd); 402 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 403 sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX); 404 } 405 } 406 407 void 408 sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net) 409 { 410 int old_cwnd = net->cwnd; 411 412 SCTP_STAT_INCR(sctps_ecnereducedcwnd); 413 net->ssthresh = net->cwnd / 2; 414 if (net->ssthresh < net->mtu) { 415 net->ssthresh = net->mtu; 416 /* here back off the timer as well, to slow us down */ 417 net->RTO <<= 1; 418 } 419 net->cwnd = net->ssthresh; 420 SDT_PROBE(sctp, cwnd, net, ecn, 421 stcb->asoc.my_vtag, 422 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 423 net, 424 old_cwnd, net->cwnd); 425 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 426 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); 427 } 428 } 429 430 void 431 sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb, 432 struct sctp_nets *net, struct sctp_pktdrop_chunk *cp, 433 uint32_t * bottle_bw, uint32_t * on_queue) 434 { 435 uint32_t bw_avail; 436 int rtt, incr; 437 int old_cwnd = net->cwnd; 438 439 /* need real RTT for this calc */ 440 rtt = ((net->lastsa >> 2) + net->lastsv) >> 1; 441 /* get bottle neck bw */ 442 *bottle_bw = ntohl(cp->bottle_bw); 443 /* and whats on queue */ 444 *on_queue = ntohl(cp->current_onq); 445 /* 446 * adjust the on-queue if our flight is more it could be that the 447 * router has not yet gotten data "in-flight" to it 448 */ 449 if (*on_queue < net->flight_size) 450 *on_queue = net->flight_size; 451 /* calculate the available space */ 452 bw_avail = (*bottle_bw * rtt) / 1000; 453 if (bw_avail > *bottle_bw) { 454 /* 455 * Cap the growth to no more than the bottle neck. This can 456 * happen as RTT slides up due to queues. It also means if 457 * you have more than a 1 second RTT with a empty queue you 458 * will be limited to the bottle_bw per second no matter if 459 * other points have 1/2 the RTT and you could get more 460 * out... 461 */ 462 bw_avail = *bottle_bw; 463 } 464 if (*on_queue > bw_avail) { 465 /* 466 * No room for anything else don't allow anything else to be 467 * "added to the fire". 468 */ 469 int seg_inflight, seg_onqueue, my_portion; 470 471 net->partial_bytes_acked = 0; 472 473 /* how much are we over queue size? */ 474 incr = *on_queue - bw_avail; 475 if (stcb->asoc.seen_a_sack_this_pkt) { 476 /* 477 * undo any cwnd adjustment that the sack might have 478 * made 479 */ 480 net->cwnd = net->prev_cwnd; 481 } 482 /* Now how much of that is mine? */ 483 seg_inflight = net->flight_size / net->mtu; 484 seg_onqueue = *on_queue / net->mtu; 485 my_portion = (incr * seg_inflight) / seg_onqueue; 486 487 /* Have I made an adjustment already */ 488 if (net->cwnd > net->flight_size) { 489 /* 490 * for this flight I made an adjustment we need to 491 * decrease the portion by a share our previous 492 * adjustment. 493 */ 494 int diff_adj; 495 496 diff_adj = net->cwnd - net->flight_size; 497 if (diff_adj > my_portion) 498 my_portion = 0; 499 else 500 my_portion -= diff_adj; 501 } 502 /* 503 * back down to the previous cwnd (assume we have had a sack 504 * before this packet). minus what ever portion of the 505 * overage is my fault. 506 */ 507 net->cwnd -= my_portion; 508 509 /* we will NOT back down more than 1 MTU */ 510 if (net->cwnd <= net->mtu) { 511 net->cwnd = net->mtu; 512 } 513 /* force into CA */ 514 net->ssthresh = net->cwnd - 1; 515 } else { 516 /* 517 * Take 1/4 of the space left or max burst up .. whichever 518 * is less. 519 */ 520 incr = min((bw_avail - *on_queue) >> 2, 521 stcb->asoc.max_burst * net->mtu); 522 net->cwnd += incr; 523 } 524 if (net->cwnd > bw_avail) { 525 /* We can't exceed the pipe size */ 526 net->cwnd = bw_avail; 527 } 528 if (net->cwnd < net->mtu) { 529 /* We always have 1 MTU */ 530 net->cwnd = net->mtu; 531 } 532 if (net->cwnd - old_cwnd != 0) { 533 /* log only changes */ 534 SDT_PROBE(sctp, cwnd, net, pd, 535 stcb->asoc.my_vtag, 536 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 537 net, 538 old_cwnd, net->cwnd); 539 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 540 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), 541 SCTP_CWND_LOG_FROM_SAT); 542 } 543 } 544 } 545 546 void 547 sctp_cwnd_update_after_output(struct sctp_tcb *stcb, 548 struct sctp_nets *net, int burst_limit) 549 { 550 int old_cwnd = net->cwnd; 551 552 if (net->ssthresh < net->cwnd) 553 net->ssthresh = net->cwnd; 554 net->cwnd = (net->flight_size + (burst_limit * net->mtu)); 555 SDT_PROBE(sctp, cwnd, net, bl, 556 stcb->asoc.my_vtag, 557 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 558 net, 559 old_cwnd, net->cwnd); 560 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 561 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST); 562 } 563 } 564 565 void 566 sctp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp, 567 struct sctp_tcb *stcb, struct sctp_nets *net) 568 { 569 int old_cwnd = net->cwnd; 570 571 sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED); 572 /* 573 * make a small adjustment to cwnd and force to CA. 574 */ 575 if (net->cwnd > net->mtu) 576 /* drop down one MTU after sending */ 577 net->cwnd -= net->mtu; 578 if (net->cwnd < net->ssthresh) 579 /* still in SS move to CA */ 580 net->ssthresh = net->cwnd - 1; 581 SDT_PROBE(sctp, cwnd, net, fr, 582 stcb->asoc.my_vtag, 583 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 584 net, 585 old_cwnd, net->cwnd); 586 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 587 sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR); 588 } 589 } 590 591 struct sctp_hs_raise_drop { 592 int32_t cwnd; 593 int32_t increase; 594 int32_t drop_percent; 595 }; 596 597 #define SCTP_HS_TABLE_SIZE 73 598 599 struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = { 600 {38, 1, 50}, /* 0 */ 601 {118, 2, 44}, /* 1 */ 602 {221, 3, 41}, /* 2 */ 603 {347, 4, 38}, /* 3 */ 604 {495, 5, 37}, /* 4 */ 605 {663, 6, 35}, /* 5 */ 606 {851, 7, 34}, /* 6 */ 607 {1058, 8, 33}, /* 7 */ 608 {1284, 9, 32}, /* 8 */ 609 {1529, 10, 31}, /* 9 */ 610 {1793, 11, 30}, /* 10 */ 611 {2076, 12, 29}, /* 11 */ 612 {2378, 13, 28}, /* 12 */ 613 {2699, 14, 28}, /* 13 */ 614 {3039, 15, 27}, /* 14 */ 615 {3399, 16, 27}, /* 15 */ 616 {3778, 17, 26}, /* 16 */ 617 {4177, 18, 26}, /* 17 */ 618 {4596, 19, 25}, /* 18 */ 619 {5036, 20, 25}, /* 19 */ 620 {5497, 21, 24}, /* 20 */ 621 {5979, 22, 24}, /* 21 */ 622 {6483, 23, 23}, /* 22 */ 623 {7009, 24, 23}, /* 23 */ 624 {7558, 25, 22}, /* 24 */ 625 {8130, 26, 22}, /* 25 */ 626 {8726, 27, 22}, /* 26 */ 627 {9346, 28, 21}, /* 27 */ 628 {9991, 29, 21}, /* 28 */ 629 {10661, 30, 21}, /* 29 */ 630 {11358, 31, 20}, /* 30 */ 631 {12082, 32, 20}, /* 31 */ 632 {12834, 33, 20}, /* 32 */ 633 {13614, 34, 19}, /* 33 */ 634 {14424, 35, 19}, /* 34 */ 635 {15265, 36, 19}, /* 35 */ 636 {16137, 37, 19}, /* 36 */ 637 {17042, 38, 18}, /* 37 */ 638 {17981, 39, 18}, /* 38 */ 639 {18955, 40, 18}, /* 39 */ 640 {19965, 41, 17}, /* 40 */ 641 {21013, 42, 17}, /* 41 */ 642 {22101, 43, 17}, /* 42 */ 643 {23230, 44, 17}, /* 43 */ 644 {24402, 45, 16}, /* 44 */ 645 {25618, 46, 16}, /* 45 */ 646 {26881, 47, 16}, /* 46 */ 647 {28193, 48, 16}, /* 47 */ 648 {29557, 49, 15}, /* 48 */ 649 {30975, 50, 15}, /* 49 */ 650 {32450, 51, 15}, /* 50 */ 651 {33986, 52, 15}, /* 51 */ 652 {35586, 53, 14}, /* 52 */ 653 {37253, 54, 14}, /* 53 */ 654 {38992, 55, 14}, /* 54 */ 655 {40808, 56, 14}, /* 55 */ 656 {42707, 57, 13}, /* 56 */ 657 {44694, 58, 13}, /* 57 */ 658 {46776, 59, 13}, /* 58 */ 659 {48961, 60, 13}, /* 59 */ 660 {51258, 61, 13}, /* 60 */ 661 {53677, 62, 12}, /* 61 */ 662 {56230, 63, 12}, /* 62 */ 663 {58932, 64, 12}, /* 63 */ 664 {61799, 65, 12}, /* 64 */ 665 {64851, 66, 11}, /* 65 */ 666 {68113, 67, 11}, /* 66 */ 667 {71617, 68, 11}, /* 67 */ 668 {75401, 69, 10}, /* 68 */ 669 {79517, 70, 10}, /* 69 */ 670 {84035, 71, 10}, /* 70 */ 671 {89053, 72, 10}, /* 71 */ 672 {94717, 73, 9} /* 72 */ 673 }; 674 675 static void 676 sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net) 677 { 678 int cur_val, i, indx, incr; 679 680 cur_val = net->cwnd >> 10; 681 indx = SCTP_HS_TABLE_SIZE - 1; 682 #ifdef SCTP_DEBUG 683 printf("HS CC CAlled.\n"); 684 #endif 685 if (cur_val < sctp_cwnd_adjust[0].cwnd) { 686 /* normal mode */ 687 if (net->net_ack > net->mtu) { 688 net->cwnd += net->mtu; 689 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 690 sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS); 691 } 692 } else { 693 net->cwnd += net->net_ack; 694 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 695 sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS); 696 } 697 } 698 } else { 699 for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) { 700 if (cur_val < sctp_cwnd_adjust[i].cwnd) { 701 indx = i; 702 break; 703 } 704 } 705 net->last_hs_used = indx; 706 incr = ((sctp_cwnd_adjust[indx].increase) << 10); 707 net->cwnd += incr; 708 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 709 sctp_log_cwnd(stcb, net, incr, SCTP_CWND_LOG_FROM_SS); 710 } 711 } 712 } 713 714 static void 715 sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net) 716 { 717 int cur_val, i, indx; 718 int old_cwnd = net->cwnd; 719 720 cur_val = net->cwnd >> 10; 721 if (cur_val < sctp_cwnd_adjust[0].cwnd) { 722 /* normal mode */ 723 net->ssthresh = net->cwnd / 2; 724 if (net->ssthresh < (net->mtu * 2)) { 725 net->ssthresh = 2 * net->mtu; 726 } 727 net->cwnd = net->ssthresh; 728 } else { 729 /* drop by the proper amount */ 730 net->ssthresh = net->cwnd - (int)((net->cwnd / 100) * 731 sctp_cwnd_adjust[net->last_hs_used].drop_percent); 732 net->cwnd = net->ssthresh; 733 /* now where are we */ 734 indx = net->last_hs_used; 735 cur_val = net->cwnd >> 10; 736 /* reset where we are in the table */ 737 if (cur_val < sctp_cwnd_adjust[0].cwnd) { 738 /* feel out of hs */ 739 net->last_hs_used = 0; 740 } else { 741 for (i = indx; i >= 1; i--) { 742 if (cur_val > sctp_cwnd_adjust[i - 1].cwnd) { 743 break; 744 } 745 } 746 net->last_hs_used = indx; 747 } 748 } 749 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 750 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR); 751 } 752 } 753 754 void 755 sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb, 756 struct sctp_association *asoc) 757 { 758 struct sctp_nets *net; 759 760 /* 761 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) && 762 * (net->fast_retran_loss_recovery == 0))) 763 */ 764 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 765 if ((asoc->fast_retran_loss_recovery == 0) || 766 (asoc->sctp_cmt_on_off > 0)) { 767 /* out of a RFC2582 Fast recovery window? */ 768 if (net->net_ack > 0) { 769 /* 770 * per section 7.2.3, are there any 771 * destinations that had a fast retransmit 772 * to them. If so what we need to do is 773 * adjust ssthresh and cwnd. 774 */ 775 struct sctp_tmit_chunk *lchk; 776 777 sctp_hs_cwnd_decrease(stcb, net); 778 779 lchk = TAILQ_FIRST(&asoc->send_queue); 780 781 net->partial_bytes_acked = 0; 782 /* Turn on fast recovery window */ 783 asoc->fast_retran_loss_recovery = 1; 784 if (lchk == NULL) { 785 /* Mark end of the window */ 786 asoc->fast_recovery_tsn = asoc->sending_seq - 1; 787 } else { 788 asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; 789 } 790 791 /* 792 * CMT fast recovery -- per destination 793 * recovery variable. 794 */ 795 net->fast_retran_loss_recovery = 1; 796 797 if (lchk == NULL) { 798 /* Mark end of the window */ 799 net->fast_recovery_tsn = asoc->sending_seq - 1; 800 } else { 801 net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; 802 } 803 804 /* 805 * Disable Nonce Sum Checking and store the 806 * resync tsn 807 */ 808 asoc->nonce_sum_check = 0; 809 asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; 810 811 sctp_timer_stop(SCTP_TIMER_TYPE_SEND, 812 stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); 813 sctp_timer_start(SCTP_TIMER_TYPE_SEND, 814 stcb->sctp_ep, stcb, net); 815 } 816 } else if (net->net_ack > 0) { 817 /* 818 * Mark a peg that we WOULD have done a cwnd 819 * reduction but RFC2582 prevented this action. 820 */ 821 SCTP_STAT_INCR(sctps_fastretransinrtt); 822 } 823 } 824 } 825 826 void 827 sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb, 828 struct sctp_association *asoc, 829 int accum_moved, int reneged_all, int will_exit) 830 { 831 struct sctp_nets *net; 832 833 /******************************/ 834 /* update cwnd and Early FR */ 835 /******************************/ 836 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 837 838 #ifdef JANA_CMT_FAST_RECOVERY 839 /* 840 * CMT fast recovery code. Need to debug. 841 */ 842 if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { 843 if (compare_with_wrap(asoc->last_acked_seq, 844 net->fast_recovery_tsn, MAX_TSN) || 845 (asoc->last_acked_seq == net->fast_recovery_tsn) || 846 compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || 847 (net->pseudo_cumack == net->fast_recovery_tsn)) { 848 net->will_exit_fast_recovery = 1; 849 } 850 } 851 #endif 852 if (SCTP_BASE_SYSCTL(sctp_early_fr)) { 853 /* 854 * So, first of all do we need to have a Early FR 855 * timer running? 856 */ 857 if ((!TAILQ_EMPTY(&asoc->sent_queue) && 858 (net->ref_count > 1) && 859 (net->flight_size < net->cwnd)) || 860 (reneged_all)) { 861 /* 862 * yes, so in this case stop it if its 863 * running, and then restart it. Reneging 864 * all is a special case where we want to 865 * run the Early FR timer and then force the 866 * last few unacked to be sent, causing us 867 * to illicit a sack with gaps to force out 868 * the others. 869 */ 870 if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { 871 SCTP_STAT_INCR(sctps_earlyfrstpidsck2); 872 sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, 873 SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); 874 } 875 SCTP_STAT_INCR(sctps_earlyfrstrid); 876 sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); 877 } else { 878 /* No, stop it if its running */ 879 if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { 880 SCTP_STAT_INCR(sctps_earlyfrstpidsck3); 881 sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, 882 SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); 883 } 884 } 885 } 886 /* if nothing was acked on this destination skip it */ 887 if (net->net_ack == 0) { 888 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 889 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); 890 } 891 continue; 892 } 893 if (net->net_ack2 > 0) { 894 /* 895 * Karn's rule applies to clearing error count, this 896 * is optional. 897 */ 898 net->error_count = 0; 899 if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == 900 SCTP_ADDR_NOT_REACHABLE) { 901 /* addr came good */ 902 net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; 903 net->dest_state |= SCTP_ADDR_REACHABLE; 904 sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 905 SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED); 906 /* now was it the primary? if so restore */ 907 if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { 908 (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); 909 } 910 } 911 /* 912 * JRS 5/14/07 - If CMT PF is on and the destination 913 * is in PF state, set the destination to active 914 * state and set the cwnd to one or two MTU's based 915 * on whether PF1 or PF2 is being used. 916 * 917 * Should we stop any running T3 timer here? 918 */ 919 if ((asoc->sctp_cmt_on_off > 0) && 920 (asoc->sctp_cmt_pf > 0) && 921 ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { 922 net->dest_state &= ~SCTP_ADDR_PF; 923 net->cwnd = net->mtu * asoc->sctp_cmt_pf; 924 SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", 925 net, net->cwnd); 926 /* 927 * Since the cwnd value is explicitly set, 928 * skip the code that updates the cwnd 929 * value. 930 */ 931 goto skip_cwnd_update; 932 } 933 } 934 #ifdef JANA_CMT_FAST_RECOVERY 935 /* 936 * CMT fast recovery code 937 */ 938 /* 939 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery 940 * && net->will_exit_fast_recovery == 0) { @@@ Do something 941 * } else if (sctp_cmt_on_off == 0 && 942 * asoc->fast_retran_loss_recovery && will_exit == 0) { 943 */ 944 #endif 945 946 if (asoc->fast_retran_loss_recovery && 947 (will_exit == 0) && 948 (asoc->sctp_cmt_on_off == 0)) { 949 /* 950 * If we are in loss recovery we skip any cwnd 951 * update 952 */ 953 goto skip_cwnd_update; 954 } 955 /* 956 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has 957 * moved. 958 */ 959 if (accum_moved || 960 ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) { 961 /* If the cumulative ack moved we can proceed */ 962 if (net->cwnd <= net->ssthresh) { 963 /* We are in slow start */ 964 if (net->flight_size + net->net_ack >= net->cwnd) { 965 966 sctp_hs_cwnd_increase(stcb, net); 967 968 } else { 969 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 970 sctp_log_cwnd(stcb, net, net->net_ack, 971 SCTP_CWND_LOG_NOADV_SS); 972 } 973 } 974 } else { 975 /* We are in congestion avoidance */ 976 net->partial_bytes_acked += net->net_ack; 977 if ((net->flight_size + net->net_ack >= net->cwnd) && 978 (net->partial_bytes_acked >= net->cwnd)) { 979 net->partial_bytes_acked -= net->cwnd; 980 net->cwnd += net->mtu; 981 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 982 sctp_log_cwnd(stcb, net, net->mtu, 983 SCTP_CWND_LOG_FROM_CA); 984 } 985 } else { 986 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 987 sctp_log_cwnd(stcb, net, net->net_ack, 988 SCTP_CWND_LOG_NOADV_CA); 989 } 990 } 991 } 992 } else { 993 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 994 sctp_log_cwnd(stcb, net, net->mtu, 995 SCTP_CWND_LOG_NO_CUMACK); 996 } 997 } 998 skip_cwnd_update: 999 /* 1000 * NOW, according to Karn's rule do we need to restore the 1001 * RTO timer back? Check our net_ack2. If not set then we 1002 * have a ambiguity.. i.e. all data ack'd was sent to more 1003 * than one place. 1004 */ 1005 if (net->net_ack2) { 1006 /* restore any doubled timers */ 1007 net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; 1008 if (net->RTO < stcb->asoc.minrto) { 1009 net->RTO = stcb->asoc.minrto; 1010 } 1011 if (net->RTO > stcb->asoc.maxrto) { 1012 net->RTO = stcb->asoc.maxrto; 1013 } 1014 } 1015 } 1016 } 1017 1018 1019 /* 1020 * H-TCP congestion control. The algorithm is detailed in: 1021 * R.N.Shorten, D.J.Leith: 1022 * "H-TCP: TCP for high-speed and long-distance networks" 1023 * Proc. PFLDnet, Argonne, 2004. 1024 * http://www.hamilton.ie/net/htcp3.pdf 1025 */ 1026 1027 1028 static int use_rtt_scaling = 1; 1029 static int use_bandwidth_switch = 1; 1030 1031 static inline int 1032 between(uint32_t seq1, uint32_t seq2, uint32_t seq3) 1033 { 1034 return seq3 - seq2 >= seq1 - seq2; 1035 } 1036 1037 static inline uint32_t 1038 htcp_cong_time(struct htcp *ca) 1039 { 1040 return sctp_get_tick_count() - ca->last_cong; 1041 } 1042 1043 static inline uint32_t 1044 htcp_ccount(struct htcp *ca) 1045 { 1046 return htcp_cong_time(ca) / ca->minRTT; 1047 } 1048 1049 static inline void 1050 htcp_reset(struct htcp *ca) 1051 { 1052 ca->undo_last_cong = ca->last_cong; 1053 ca->undo_maxRTT = ca->maxRTT; 1054 ca->undo_old_maxB = ca->old_maxB; 1055 ca->last_cong = sctp_get_tick_count(); 1056 } 1057 1058 #ifdef SCTP_NOT_USED 1059 1060 static uint32_t 1061 htcp_cwnd_undo(struct sctp_tcb *stcb, struct sctp_nets *net) 1062 { 1063 net->htcp_ca.last_cong = net->htcp_ca.undo_last_cong; 1064 net->htcp_ca.maxRTT = net->htcp_ca.undo_maxRTT; 1065 net->htcp_ca.old_maxB = net->htcp_ca.undo_old_maxB; 1066 return max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->htcp_ca.beta) * net->mtu); 1067 } 1068 1069 #endif 1070 1071 static inline void 1072 measure_rtt(struct sctp_tcb *stcb, struct sctp_nets *net) 1073 { 1074 uint32_t srtt = net->lastsa >> 3; 1075 1076 /* keep track of minimum RTT seen so far, minRTT is zero at first */ 1077 if (net->htcp_ca.minRTT > srtt || !net->htcp_ca.minRTT) 1078 net->htcp_ca.minRTT = srtt; 1079 1080 /* max RTT */ 1081 if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->htcp_ca) > 3) { 1082 if (net->htcp_ca.maxRTT < net->htcp_ca.minRTT) 1083 net->htcp_ca.maxRTT = net->htcp_ca.minRTT; 1084 if (net->htcp_ca.maxRTT < srtt && srtt <= net->htcp_ca.maxRTT + MSEC_TO_TICKS(20)) 1085 net->htcp_ca.maxRTT = srtt; 1086 } 1087 } 1088 1089 static void 1090 measure_achieved_throughput(struct sctp_tcb *stcb, struct sctp_nets *net) 1091 { 1092 uint32_t now = sctp_get_tick_count(); 1093 1094 if (net->fast_retran_ip == 0) 1095 net->htcp_ca.bytes_acked = net->net_ack; 1096 1097 if (!use_bandwidth_switch) 1098 return; 1099 1100 /* achieved throughput calculations */ 1101 /* JRS - not 100% sure of this statement */ 1102 if (net->fast_retran_ip == 1) { 1103 net->htcp_ca.bytecount = 0; 1104 net->htcp_ca.lasttime = now; 1105 return; 1106 } 1107 net->htcp_ca.bytecount += net->net_ack; 1108 1109 if (net->htcp_ca.bytecount >= net->cwnd - ((net->htcp_ca.alpha >> 7 ? : 1) * net->mtu) 1110 && now - net->htcp_ca.lasttime >= net->htcp_ca.minRTT 1111 && net->htcp_ca.minRTT > 0) { 1112 uint32_t cur_Bi = net->htcp_ca.bytecount / net->mtu * hz / (now - net->htcp_ca.lasttime); 1113 1114 if (htcp_ccount(&net->htcp_ca) <= 3) { 1115 /* just after backoff */ 1116 net->htcp_ca.minB = net->htcp_ca.maxB = net->htcp_ca.Bi = cur_Bi; 1117 } else { 1118 net->htcp_ca.Bi = (3 * net->htcp_ca.Bi + cur_Bi) / 4; 1119 if (net->htcp_ca.Bi > net->htcp_ca.maxB) 1120 net->htcp_ca.maxB = net->htcp_ca.Bi; 1121 if (net->htcp_ca.minB > net->htcp_ca.maxB) 1122 net->htcp_ca.minB = net->htcp_ca.maxB; 1123 } 1124 net->htcp_ca.bytecount = 0; 1125 net->htcp_ca.lasttime = now; 1126 } 1127 } 1128 1129 static inline void 1130 htcp_beta_update(struct htcp *ca, uint32_t minRTT, uint32_t maxRTT) 1131 { 1132 if (use_bandwidth_switch) { 1133 uint32_t maxB = ca->maxB; 1134 uint32_t old_maxB = ca->old_maxB; 1135 1136 ca->old_maxB = ca->maxB; 1137 1138 if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) { 1139 ca->beta = BETA_MIN; 1140 ca->modeswitch = 0; 1141 return; 1142 } 1143 } 1144 if (ca->modeswitch && minRTT > (uint32_t) MSEC_TO_TICKS(10) && maxRTT) { 1145 ca->beta = (minRTT << 7) / maxRTT; 1146 if (ca->beta < BETA_MIN) 1147 ca->beta = BETA_MIN; 1148 else if (ca->beta > BETA_MAX) 1149 ca->beta = BETA_MAX; 1150 } else { 1151 ca->beta = BETA_MIN; 1152 ca->modeswitch = 1; 1153 } 1154 } 1155 1156 static inline void 1157 htcp_alpha_update(struct htcp *ca) 1158 { 1159 uint32_t minRTT = ca->minRTT; 1160 uint32_t factor = 1; 1161 uint32_t diff = htcp_cong_time(ca); 1162 1163 if (diff > (uint32_t) hz) { 1164 diff -= hz; 1165 factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / hz)) / hz; 1166 } 1167 if (use_rtt_scaling && minRTT) { 1168 uint32_t scale = (hz << 3) / (10 * minRTT); 1169 1170 scale = min(max(scale, 1U << 2), 10U << 3); /* clamping ratio to 1171 * interval [0.5,10]<<3 */ 1172 factor = (factor << 3) / scale; 1173 if (!factor) 1174 factor = 1; 1175 } 1176 ca->alpha = 2 * factor * ((1 << 7) - ca->beta); 1177 if (!ca->alpha) 1178 ca->alpha = ALPHA_BASE; 1179 } 1180 1181 /* After we have the rtt data to calculate beta, we'd still prefer to wait one 1182 * rtt before we adjust our beta to ensure we are working from a consistent 1183 * data. 1184 * 1185 * This function should be called when we hit a congestion event since only at 1186 * that point do we really have a real sense of maxRTT (the queues en route 1187 * were getting just too full now). 1188 */ 1189 static void 1190 htcp_param_update(struct sctp_tcb *stcb, struct sctp_nets *net) 1191 { 1192 uint32_t minRTT = net->htcp_ca.minRTT; 1193 uint32_t maxRTT = net->htcp_ca.maxRTT; 1194 1195 htcp_beta_update(&net->htcp_ca, minRTT, maxRTT); 1196 htcp_alpha_update(&net->htcp_ca); 1197 1198 /* 1199 * add slowly fading memory for maxRTT to accommodate routing 1200 * changes etc 1201 */ 1202 if (minRTT > 0 && maxRTT > minRTT) 1203 net->htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100; 1204 } 1205 1206 static uint32_t 1207 htcp_recalc_ssthresh(struct sctp_tcb *stcb, struct sctp_nets *net) 1208 { 1209 htcp_param_update(stcb, net); 1210 return max(((net->cwnd / net->mtu * net->htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu); 1211 } 1212 1213 static void 1214 htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net) 1215 { 1216 /*- 1217 * How to handle these functions? 1218 * if (!tcp_is_cwnd_limited(sk, in_flight)) RRS - good question. 1219 * return; 1220 */ 1221 if (net->cwnd <= net->ssthresh) { 1222 /* We are in slow start */ 1223 if (net->flight_size + net->net_ack >= net->cwnd) { 1224 if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) { 1225 net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)); 1226 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1227 sctp_log_cwnd(stcb, net, net->mtu, 1228 SCTP_CWND_LOG_FROM_SS); 1229 } 1230 } else { 1231 net->cwnd += net->net_ack; 1232 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1233 sctp_log_cwnd(stcb, net, net->net_ack, 1234 SCTP_CWND_LOG_FROM_SS); 1235 } 1236 } 1237 } else { 1238 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 1239 sctp_log_cwnd(stcb, net, net->net_ack, 1240 SCTP_CWND_LOG_NOADV_SS); 1241 } 1242 } 1243 } else { 1244 measure_rtt(stcb, net); 1245 1246 /* 1247 * In dangerous area, increase slowly. In theory this is 1248 * net->cwnd += alpha / net->cwnd 1249 */ 1250 /* What is snd_cwnd_cnt?? */ 1251 if (((net->partial_bytes_acked / net->mtu * net->htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) { 1252 /*- 1253 * Does SCTP have a cwnd clamp? 1254 * if (net->snd_cwnd < net->snd_cwnd_clamp) - Nope (RRS). 1255 */ 1256 net->cwnd += net->mtu; 1257 net->partial_bytes_acked = 0; 1258 htcp_alpha_update(&net->htcp_ca); 1259 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1260 sctp_log_cwnd(stcb, net, net->mtu, 1261 SCTP_CWND_LOG_FROM_CA); 1262 } 1263 } else { 1264 net->partial_bytes_acked += net->net_ack; 1265 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 1266 sctp_log_cwnd(stcb, net, net->net_ack, 1267 SCTP_CWND_LOG_NOADV_CA); 1268 } 1269 } 1270 1271 net->htcp_ca.bytes_acked = net->mtu; 1272 } 1273 } 1274 1275 #ifdef SCTP_NOT_USED 1276 /* Lower bound on congestion window. */ 1277 static uint32_t 1278 htcp_min_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net) 1279 { 1280 return net->ssthresh; 1281 } 1282 1283 #endif 1284 1285 static void 1286 htcp_init(struct sctp_tcb *stcb, struct sctp_nets *net) 1287 { 1288 memset(&net->htcp_ca, 0, sizeof(struct htcp)); 1289 net->htcp_ca.alpha = ALPHA_BASE; 1290 net->htcp_ca.beta = BETA_MIN; 1291 net->htcp_ca.bytes_acked = net->mtu; 1292 net->htcp_ca.last_cong = sctp_get_tick_count(); 1293 } 1294 1295 void 1296 sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) 1297 { 1298 /* 1299 * We take the max of the burst limit times a MTU or the 1300 * INITIAL_CWND. We then limit this to 4 MTU's of sending. 1301 */ 1302 net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); 1303 net->ssthresh = stcb->asoc.peers_rwnd; 1304 htcp_init(stcb, net); 1305 1306 if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { 1307 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION); 1308 } 1309 } 1310 1311 void 1312 sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb, 1313 struct sctp_association *asoc, 1314 int accum_moved, int reneged_all, int will_exit) 1315 { 1316 struct sctp_nets *net; 1317 1318 /******************************/ 1319 /* update cwnd and Early FR */ 1320 /******************************/ 1321 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 1322 1323 #ifdef JANA_CMT_FAST_RECOVERY 1324 /* 1325 * CMT fast recovery code. Need to debug. 1326 */ 1327 if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { 1328 if (compare_with_wrap(asoc->last_acked_seq, 1329 net->fast_recovery_tsn, MAX_TSN) || 1330 (asoc->last_acked_seq == net->fast_recovery_tsn) || 1331 compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || 1332 (net->pseudo_cumack == net->fast_recovery_tsn)) { 1333 net->will_exit_fast_recovery = 1; 1334 } 1335 } 1336 #endif 1337 if (SCTP_BASE_SYSCTL(sctp_early_fr)) { 1338 /* 1339 * So, first of all do we need to have a Early FR 1340 * timer running? 1341 */ 1342 if ((!TAILQ_EMPTY(&asoc->sent_queue) && 1343 (net->ref_count > 1) && 1344 (net->flight_size < net->cwnd)) || 1345 (reneged_all)) { 1346 /* 1347 * yes, so in this case stop it if its 1348 * running, and then restart it. Reneging 1349 * all is a special case where we want to 1350 * run the Early FR timer and then force the 1351 * last few unacked to be sent, causing us 1352 * to illicit a sack with gaps to force out 1353 * the others. 1354 */ 1355 if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { 1356 SCTP_STAT_INCR(sctps_earlyfrstpidsck2); 1357 sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, 1358 SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); 1359 } 1360 SCTP_STAT_INCR(sctps_earlyfrstrid); 1361 sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); 1362 } else { 1363 /* No, stop it if its running */ 1364 if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { 1365 SCTP_STAT_INCR(sctps_earlyfrstpidsck3); 1366 sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, 1367 SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); 1368 } 1369 } 1370 } 1371 /* if nothing was acked on this destination skip it */ 1372 if (net->net_ack == 0) { 1373 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 1374 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); 1375 } 1376 continue; 1377 } 1378 if (net->net_ack2 > 0) { 1379 /* 1380 * Karn's rule applies to clearing error count, this 1381 * is optional. 1382 */ 1383 net->error_count = 0; 1384 if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == 1385 SCTP_ADDR_NOT_REACHABLE) { 1386 /* addr came good */ 1387 net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; 1388 net->dest_state |= SCTP_ADDR_REACHABLE; 1389 sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 1390 SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED); 1391 /* now was it the primary? if so restore */ 1392 if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { 1393 (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); 1394 } 1395 } 1396 /* 1397 * JRS 5/14/07 - If CMT PF is on and the destination 1398 * is in PF state, set the destination to active 1399 * state and set the cwnd to one or two MTU's based 1400 * on whether PF1 or PF2 is being used. 1401 * 1402 * Should we stop any running T3 timer here? 1403 */ 1404 if ((asoc->sctp_cmt_on_off > 0) && 1405 (asoc->sctp_cmt_pf > 0) && 1406 ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { 1407 net->dest_state &= ~SCTP_ADDR_PF; 1408 net->cwnd = net->mtu * asoc->sctp_cmt_pf; 1409 SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", 1410 net, net->cwnd); 1411 /* 1412 * Since the cwnd value is explicitly set, 1413 * skip the code that updates the cwnd 1414 * value. 1415 */ 1416 goto skip_cwnd_update; 1417 } 1418 } 1419 #ifdef JANA_CMT_FAST_RECOVERY 1420 /* 1421 * CMT fast recovery code 1422 */ 1423 /* 1424 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery 1425 * && net->will_exit_fast_recovery == 0) { @@@ Do something 1426 * } else if (sctp_cmt_on_off == 0 && 1427 * asoc->fast_retran_loss_recovery && will_exit == 0) { 1428 */ 1429 #endif 1430 1431 if (asoc->fast_retran_loss_recovery && 1432 will_exit == 0 && 1433 (asoc->sctp_cmt_on_off == 0)) { 1434 /* 1435 * If we are in loss recovery we skip any cwnd 1436 * update 1437 */ 1438 goto skip_cwnd_update; 1439 } 1440 /* 1441 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has 1442 * moved. 1443 */ 1444 if (accum_moved || 1445 ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) { 1446 htcp_cong_avoid(stcb, net); 1447 measure_achieved_throughput(stcb, net); 1448 } else { 1449 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 1450 sctp_log_cwnd(stcb, net, net->mtu, 1451 SCTP_CWND_LOG_NO_CUMACK); 1452 } 1453 } 1454 skip_cwnd_update: 1455 /* 1456 * NOW, according to Karn's rule do we need to restore the 1457 * RTO timer back? Check our net_ack2. If not set then we 1458 * have a ambiguity.. i.e. all data ack'd was sent to more 1459 * than one place. 1460 */ 1461 if (net->net_ack2) { 1462 /* restore any doubled timers */ 1463 net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; 1464 if (net->RTO < stcb->asoc.minrto) { 1465 net->RTO = stcb->asoc.minrto; 1466 } 1467 if (net->RTO > stcb->asoc.maxrto) { 1468 net->RTO = stcb->asoc.maxrto; 1469 } 1470 } 1471 } 1472 } 1473 1474 void 1475 sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb, 1476 struct sctp_association *asoc) 1477 { 1478 struct sctp_nets *net; 1479 1480 /* 1481 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) && 1482 * (net->fast_retran_loss_recovery == 0))) 1483 */ 1484 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 1485 if ((asoc->fast_retran_loss_recovery == 0) || 1486 (asoc->sctp_cmt_on_off > 0)) { 1487 /* out of a RFC2582 Fast recovery window? */ 1488 if (net->net_ack > 0) { 1489 /* 1490 * per section 7.2.3, are there any 1491 * destinations that had a fast retransmit 1492 * to them. If so what we need to do is 1493 * adjust ssthresh and cwnd. 1494 */ 1495 struct sctp_tmit_chunk *lchk; 1496 int old_cwnd = net->cwnd; 1497 1498 /* JRS - reset as if state were changed */ 1499 htcp_reset(&net->htcp_ca); 1500 net->ssthresh = htcp_recalc_ssthresh(stcb, net); 1501 net->cwnd = net->ssthresh; 1502 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1503 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), 1504 SCTP_CWND_LOG_FROM_FR); 1505 } 1506 lchk = TAILQ_FIRST(&asoc->send_queue); 1507 1508 net->partial_bytes_acked = 0; 1509 /* Turn on fast recovery window */ 1510 asoc->fast_retran_loss_recovery = 1; 1511 if (lchk == NULL) { 1512 /* Mark end of the window */ 1513 asoc->fast_recovery_tsn = asoc->sending_seq - 1; 1514 } else { 1515 asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; 1516 } 1517 1518 /* 1519 * CMT fast recovery -- per destination 1520 * recovery variable. 1521 */ 1522 net->fast_retran_loss_recovery = 1; 1523 1524 if (lchk == NULL) { 1525 /* Mark end of the window */ 1526 net->fast_recovery_tsn = asoc->sending_seq - 1; 1527 } else { 1528 net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; 1529 } 1530 1531 /* 1532 * Disable Nonce Sum Checking and store the 1533 * resync tsn 1534 */ 1535 asoc->nonce_sum_check = 0; 1536 asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; 1537 1538 sctp_timer_stop(SCTP_TIMER_TYPE_SEND, 1539 stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); 1540 sctp_timer_start(SCTP_TIMER_TYPE_SEND, 1541 stcb->sctp_ep, stcb, net); 1542 } 1543 } else if (net->net_ack > 0) { 1544 /* 1545 * Mark a peg that we WOULD have done a cwnd 1546 * reduction but RFC2582 prevented this action. 1547 */ 1548 SCTP_STAT_INCR(sctps_fastretransinrtt); 1549 } 1550 } 1551 } 1552 1553 void 1554 sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb, 1555 struct sctp_nets *net) 1556 { 1557 int old_cwnd = net->cwnd; 1558 1559 /* JRS - reset as if the state were being changed to timeout */ 1560 htcp_reset(&net->htcp_ca); 1561 net->ssthresh = htcp_recalc_ssthresh(stcb, net); 1562 net->cwnd = net->mtu; 1563 net->partial_bytes_acked = 0; 1564 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1565 sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX); 1566 } 1567 } 1568 1569 void 1570 sctp_htcp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp, 1571 struct sctp_tcb *stcb, struct sctp_nets *net) 1572 { 1573 int old_cwnd; 1574 1575 old_cwnd = net->cwnd; 1576 1577 sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED); 1578 net->htcp_ca.last_cong = sctp_get_tick_count(); 1579 /* 1580 * make a small adjustment to cwnd and force to CA. 1581 */ 1582 if (net->cwnd > net->mtu) 1583 /* drop down one MTU after sending */ 1584 net->cwnd -= net->mtu; 1585 if (net->cwnd < net->ssthresh) 1586 /* still in SS move to CA */ 1587 net->ssthresh = net->cwnd - 1; 1588 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1589 sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR); 1590 } 1591 } 1592 1593 void 1594 sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, 1595 struct sctp_nets *net) 1596 { 1597 int old_cwnd; 1598 1599 old_cwnd = net->cwnd; 1600 1601 /* JRS - reset hctp as if state changed */ 1602 htcp_reset(&net->htcp_ca); 1603 SCTP_STAT_INCR(sctps_ecnereducedcwnd); 1604 net->ssthresh = htcp_recalc_ssthresh(stcb, net); 1605 if (net->ssthresh < net->mtu) { 1606 net->ssthresh = net->mtu; 1607 /* here back off the timer as well, to slow us down */ 1608 net->RTO <<= 1; 1609 } 1610 net->cwnd = net->ssthresh; 1611 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1612 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); 1613 } 1614 } 1615