1 /*- 2 * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * 7 * a) Redistributions of source code must retain the above copyright notice, 8 * this list of conditions and the following disclaimer. 9 * 10 * b) Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the distribution. 13 * 14 * c) Neither the name of Cisco Systems, Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived 16 * from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 20 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 28 * THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <netinet/sctp_os.h> 32 #include <netinet/sctp_var.h> 33 #include <netinet/sctp_sysctl.h> 34 #include <netinet/sctp_pcb.h> 35 #include <netinet/sctp_header.h> 36 #include <netinet/sctputil.h> 37 #include <netinet/sctp_output.h> 38 #include <netinet/sctp_input.h> 39 #include <netinet/sctp_indata.h> 40 #include <netinet/sctp_uio.h> 41 #include <netinet/sctp_timer.h> 42 #include <netinet/sctp_auth.h> 43 #include <netinet/sctp_asconf.h> 44 #include <netinet/sctp_cc_functions.h> 45 #include <netinet/sctp_dtrace_declare.h> 46 #include <sys/cdefs.h> 47 __FBSDID("$FreeBSD$"); 48 49 void 50 sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) 51 { 52 struct sctp_association *assoc; 53 uint32_t cwnd_in_mtu; 54 55 assoc = &stcb->asoc; 56 cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd); 57 if (cwnd_in_mtu == 0) { 58 /* Using 0 means that the value of RFC 4960 is used. */ 59 net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); 60 } else { 61 /* 62 * We take the minimum of the burst limit and the initial 63 * congestion window. 64 */ 65 if ((assoc->max_burst > 0) && (cwnd_in_mtu > assoc->max_burst)) 66 cwnd_in_mtu = assoc->max_burst; 67 net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu; 68 } 69 net->ssthresh = assoc->peers_rwnd; 70 71 SDT_PROBE(sctp, cwnd, net, init, 72 stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, 73 0, net->cwnd); 74 if (SCTP_BASE_SYSCTL(sctp_logging_level) & 75 (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { 76 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION); 77 } 78 } 79 80 void 81 sctp_cwnd_update_after_fr(struct sctp_tcb *stcb, 82 struct sctp_association *asoc) 83 { 84 struct sctp_nets *net; 85 86 /*- 87 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) && 88 * (net->fast_retran_loss_recovery == 0))) 89 */ 90 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 91 if ((asoc->fast_retran_loss_recovery == 0) || 92 (asoc->sctp_cmt_on_off > 0)) { 93 /* out of a RFC2582 Fast recovery window? */ 94 if (net->net_ack > 0) { 95 /* 96 * per section 7.2.3, are there any 97 * destinations that had a fast retransmit 98 * to them. If so what we need to do is 99 * adjust ssthresh and cwnd. 100 */ 101 struct sctp_tmit_chunk *lchk; 102 int old_cwnd = net->cwnd; 103 104 net->ssthresh = net->cwnd / 2; 105 if (net->ssthresh < (net->mtu * 2)) { 106 net->ssthresh = 2 * net->mtu; 107 } 108 net->cwnd = net->ssthresh; 109 SDT_PROBE(sctp, cwnd, net, fr, 110 stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, 111 old_cwnd, net->cwnd); 112 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 113 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), 114 SCTP_CWND_LOG_FROM_FR); 115 } 116 lchk = TAILQ_FIRST(&asoc->send_queue); 117 118 net->partial_bytes_acked = 0; 119 /* Turn on fast recovery window */ 120 asoc->fast_retran_loss_recovery = 1; 121 if (lchk == NULL) { 122 /* Mark end of the window */ 123 asoc->fast_recovery_tsn = asoc->sending_seq - 1; 124 } else { 125 asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; 126 } 127 128 /* 129 * CMT fast recovery -- per destination 130 * recovery variable. 131 */ 132 net->fast_retran_loss_recovery = 1; 133 134 if (lchk == NULL) { 135 /* Mark end of the window */ 136 net->fast_recovery_tsn = asoc->sending_seq - 1; 137 } else { 138 net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; 139 } 140 141 /* 142 * Disable Nonce Sum Checking and store the 143 * resync tsn 144 */ 145 asoc->nonce_sum_check = 0; 146 asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; 147 148 sctp_timer_stop(SCTP_TIMER_TYPE_SEND, 149 stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); 150 sctp_timer_start(SCTP_TIMER_TYPE_SEND, 151 stcb->sctp_ep, stcb, net); 152 } 153 } else if (net->net_ack > 0) { 154 /* 155 * Mark a peg that we WOULD have done a cwnd 156 * reduction but RFC2582 prevented this action. 157 */ 158 SCTP_STAT_INCR(sctps_fastretransinrtt); 159 } 160 } 161 } 162 163 void 164 sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, 165 struct sctp_association *asoc, 166 int accum_moved, int reneged_all, int will_exit) 167 { 168 struct sctp_nets *net; 169 int old_cwnd; 170 171 /******************************/ 172 /* update cwnd and Early FR */ 173 /******************************/ 174 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 175 176 #ifdef JANA_CMT_FAST_RECOVERY 177 /* 178 * CMT fast recovery code. Need to debug. 179 */ 180 if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { 181 if (compare_with_wrap(asoc->last_acked_seq, 182 net->fast_recovery_tsn, MAX_TSN) || 183 (asoc->last_acked_seq == net->fast_recovery_tsn) || 184 compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || 185 (net->pseudo_cumack == net->fast_recovery_tsn)) { 186 net->will_exit_fast_recovery = 1; 187 } 188 } 189 #endif 190 if (SCTP_BASE_SYSCTL(sctp_early_fr)) { 191 /* 192 * So, first of all do we need to have a Early FR 193 * timer running? 194 */ 195 if ((!TAILQ_EMPTY(&asoc->sent_queue) && 196 (net->ref_count > 1) && 197 (net->flight_size < net->cwnd)) || 198 (reneged_all)) { 199 /* 200 * yes, so in this case stop it if its 201 * running, and then restart it. Reneging 202 * all is a special case where we want to 203 * run the Early FR timer and then force the 204 * last few unacked to be sent, causing us 205 * to illicit a sack with gaps to force out 206 * the others. 207 */ 208 if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { 209 SCTP_STAT_INCR(sctps_earlyfrstpidsck2); 210 sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, 211 SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); 212 } 213 SCTP_STAT_INCR(sctps_earlyfrstrid); 214 sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); 215 } else { 216 /* No, stop it if its running */ 217 if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { 218 SCTP_STAT_INCR(sctps_earlyfrstpidsck3); 219 sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, 220 SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); 221 } 222 } 223 } 224 /* if nothing was acked on this destination skip it */ 225 if (net->net_ack == 0) { 226 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 227 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); 228 } 229 continue; 230 } 231 if (net->net_ack2 > 0) { 232 /* 233 * Karn's rule applies to clearing error count, this 234 * is optional. 235 */ 236 net->error_count = 0; 237 if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == 238 SCTP_ADDR_NOT_REACHABLE) { 239 /* addr came good */ 240 net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; 241 net->dest_state |= SCTP_ADDR_REACHABLE; 242 sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 243 SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED); 244 /* now was it the primary? if so restore */ 245 if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { 246 (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); 247 } 248 } 249 /* 250 * JRS 5/14/07 - If CMT PF is on and the destination 251 * is in PF state, set the destination to active 252 * state and set the cwnd to one or two MTU's based 253 * on whether PF1 or PF2 is being used. 254 * 255 * Should we stop any running T3 timer here? 256 */ 257 if ((asoc->sctp_cmt_on_off > 0) && 258 (asoc->sctp_cmt_pf > 0) && 259 ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { 260 net->dest_state &= ~SCTP_ADDR_PF; 261 old_cwnd = net->cwnd; 262 net->cwnd = net->mtu * asoc->sctp_cmt_pf; 263 SDT_PROBE(sctp, cwnd, net, ack, 264 stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, 265 old_cwnd, net->cwnd); 266 SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", 267 net, net->cwnd); 268 /* 269 * Since the cwnd value is explicitly set, 270 * skip the code that updates the cwnd 271 * value. 272 */ 273 goto skip_cwnd_update; 274 } 275 } 276 #ifdef JANA_CMT_FAST_RECOVERY 277 /* 278 * CMT fast recovery code 279 */ 280 /* 281 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery 282 * && net->will_exit_fast_recovery == 0) { @@@ Do something 283 * } else if (sctp_cmt_on_off == 0 && 284 * asoc->fast_retran_loss_recovery && will_exit == 0) { 285 */ 286 #endif 287 288 if (asoc->fast_retran_loss_recovery && 289 (will_exit == 0) && 290 (asoc->sctp_cmt_on_off == 0)) { 291 /* 292 * If we are in loss recovery we skip any cwnd 293 * update 294 */ 295 goto skip_cwnd_update; 296 } 297 /* 298 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has 299 * moved. 300 */ 301 if (accum_moved || 302 ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) { 303 /* If the cumulative ack moved we can proceed */ 304 if (net->cwnd <= net->ssthresh) { 305 /* We are in slow start */ 306 if (net->flight_size + net->net_ack >= net->cwnd) { 307 if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) { 308 old_cwnd = net->cwnd; 309 net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)); 310 SDT_PROBE(sctp, cwnd, net, ack, 311 stcb->asoc.my_vtag, 312 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 313 net, 314 old_cwnd, net->cwnd); 315 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 316 sctp_log_cwnd(stcb, net, net->mtu, 317 SCTP_CWND_LOG_FROM_SS); 318 } 319 } else { 320 old_cwnd = net->cwnd; 321 net->cwnd += net->net_ack; 322 SDT_PROBE(sctp, cwnd, net, ack, 323 stcb->asoc.my_vtag, 324 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 325 net, 326 old_cwnd, net->cwnd); 327 328 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 329 sctp_log_cwnd(stcb, net, net->net_ack, 330 SCTP_CWND_LOG_FROM_SS); 331 } 332 } 333 } else { 334 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 335 sctp_log_cwnd(stcb, net, net->net_ack, 336 SCTP_CWND_LOG_NOADV_SS); 337 } 338 } 339 } else { 340 /* We are in congestion avoidance */ 341 /* 342 * Add to pba 343 */ 344 net->partial_bytes_acked += net->net_ack; 345 346 if ((net->flight_size + net->net_ack >= net->cwnd) && 347 (net->partial_bytes_acked >= net->cwnd)) { 348 net->partial_bytes_acked -= net->cwnd; 349 old_cwnd = net->cwnd; 350 net->cwnd += net->mtu; 351 SDT_PROBE(sctp, cwnd, net, ack, 352 stcb->asoc.my_vtag, 353 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 354 net, 355 old_cwnd, net->cwnd); 356 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 357 sctp_log_cwnd(stcb, net, net->mtu, 358 SCTP_CWND_LOG_FROM_CA); 359 } 360 } else { 361 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 362 sctp_log_cwnd(stcb, net, net->net_ack, 363 SCTP_CWND_LOG_NOADV_CA); 364 } 365 } 366 } 367 } else { 368 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 369 sctp_log_cwnd(stcb, net, net->mtu, 370 SCTP_CWND_LOG_NO_CUMACK); 371 } 372 } 373 skip_cwnd_update: 374 /* 375 * NOW, according to Karn's rule do we need to restore the 376 * RTO timer back? Check our net_ack2. If not set then we 377 * have a ambiguity.. i.e. all data ack'd was sent to more 378 * than one place. 379 */ 380 if (net->net_ack2) { 381 /* restore any doubled timers */ 382 net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; 383 if (net->RTO < stcb->asoc.minrto) { 384 net->RTO = stcb->asoc.minrto; 385 } 386 if (net->RTO > stcb->asoc.maxrto) { 387 net->RTO = stcb->asoc.maxrto; 388 } 389 } 390 } 391 } 392 393 void 394 sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net) 395 { 396 int old_cwnd = net->cwnd; 397 398 net->ssthresh = max(net->cwnd / 2, 4 * net->mtu); 399 net->cwnd = net->mtu; 400 net->partial_bytes_acked = 0; 401 SDT_PROBE(sctp, cwnd, net, to, 402 stcb->asoc.my_vtag, 403 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 404 net, 405 old_cwnd, net->cwnd); 406 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 407 sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX); 408 } 409 } 410 411 void 412 sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net) 413 { 414 int old_cwnd = net->cwnd; 415 416 SCTP_STAT_INCR(sctps_ecnereducedcwnd); 417 net->ssthresh = net->cwnd / 2; 418 if (net->ssthresh < net->mtu) { 419 net->ssthresh = net->mtu; 420 /* here back off the timer as well, to slow us down */ 421 net->RTO <<= 1; 422 } 423 net->cwnd = net->ssthresh; 424 SDT_PROBE(sctp, cwnd, net, ecn, 425 stcb->asoc.my_vtag, 426 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 427 net, 428 old_cwnd, net->cwnd); 429 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 430 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); 431 } 432 } 433 434 void 435 sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb, 436 struct sctp_nets *net, struct sctp_pktdrop_chunk *cp, 437 uint32_t * bottle_bw, uint32_t * on_queue) 438 { 439 uint32_t bw_avail; 440 int rtt, incr; 441 int old_cwnd = net->cwnd; 442 443 /* need real RTT for this calc */ 444 rtt = ((net->lastsa >> 2) + net->lastsv) >> 1; 445 /* get bottle neck bw */ 446 *bottle_bw = ntohl(cp->bottle_bw); 447 /* and whats on queue */ 448 *on_queue = ntohl(cp->current_onq); 449 /* 450 * adjust the on-queue if our flight is more it could be that the 451 * router has not yet gotten data "in-flight" to it 452 */ 453 if (*on_queue < net->flight_size) 454 *on_queue = net->flight_size; 455 /* calculate the available space */ 456 bw_avail = (*bottle_bw * rtt) / 1000; 457 if (bw_avail > *bottle_bw) { 458 /* 459 * Cap the growth to no more than the bottle neck. This can 460 * happen as RTT slides up due to queues. It also means if 461 * you have more than a 1 second RTT with a empty queue you 462 * will be limited to the bottle_bw per second no matter if 463 * other points have 1/2 the RTT and you could get more 464 * out... 465 */ 466 bw_avail = *bottle_bw; 467 } 468 if (*on_queue > bw_avail) { 469 /* 470 * No room for anything else don't allow anything else to be 471 * "added to the fire". 472 */ 473 int seg_inflight, seg_onqueue, my_portion; 474 475 net->partial_bytes_acked = 0; 476 477 /* how much are we over queue size? */ 478 incr = *on_queue - bw_avail; 479 if (stcb->asoc.seen_a_sack_this_pkt) { 480 /* 481 * undo any cwnd adjustment that the sack might have 482 * made 483 */ 484 net->cwnd = net->prev_cwnd; 485 } 486 /* Now how much of that is mine? */ 487 seg_inflight = net->flight_size / net->mtu; 488 seg_onqueue = *on_queue / net->mtu; 489 my_portion = (incr * seg_inflight) / seg_onqueue; 490 491 /* Have I made an adjustment already */ 492 if (net->cwnd > net->flight_size) { 493 /* 494 * for this flight I made an adjustment we need to 495 * decrease the portion by a share our previous 496 * adjustment. 497 */ 498 int diff_adj; 499 500 diff_adj = net->cwnd - net->flight_size; 501 if (diff_adj > my_portion) 502 my_portion = 0; 503 else 504 my_portion -= diff_adj; 505 } 506 /* 507 * back down to the previous cwnd (assume we have had a sack 508 * before this packet). minus what ever portion of the 509 * overage is my fault. 510 */ 511 net->cwnd -= my_portion; 512 513 /* we will NOT back down more than 1 MTU */ 514 if (net->cwnd <= net->mtu) { 515 net->cwnd = net->mtu; 516 } 517 /* force into CA */ 518 net->ssthresh = net->cwnd - 1; 519 } else { 520 /* 521 * Take 1/4 of the space left or max burst up .. whichever 522 * is less. 523 */ 524 incr = min((bw_avail - *on_queue) >> 2, 525 stcb->asoc.max_burst * net->mtu); 526 net->cwnd += incr; 527 } 528 if (net->cwnd > bw_avail) { 529 /* We can't exceed the pipe size */ 530 net->cwnd = bw_avail; 531 } 532 if (net->cwnd < net->mtu) { 533 /* We always have 1 MTU */ 534 net->cwnd = net->mtu; 535 } 536 if (net->cwnd - old_cwnd != 0) { 537 /* log only changes */ 538 SDT_PROBE(sctp, cwnd, net, pd, 539 stcb->asoc.my_vtag, 540 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 541 net, 542 old_cwnd, net->cwnd); 543 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 544 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), 545 SCTP_CWND_LOG_FROM_SAT); 546 } 547 } 548 } 549 550 void 551 sctp_cwnd_update_after_output(struct sctp_tcb *stcb, 552 struct sctp_nets *net, int burst_limit) 553 { 554 int old_cwnd = net->cwnd; 555 556 if (net->ssthresh < net->cwnd) 557 net->ssthresh = net->cwnd; 558 net->cwnd = (net->flight_size + (burst_limit * net->mtu)); 559 SDT_PROBE(sctp, cwnd, net, bl, 560 stcb->asoc.my_vtag, 561 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 562 net, 563 old_cwnd, net->cwnd); 564 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 565 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST); 566 } 567 } 568 569 void 570 sctp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp, 571 struct sctp_tcb *stcb, struct sctp_nets *net) 572 { 573 int old_cwnd = net->cwnd; 574 575 sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED); 576 /* 577 * make a small adjustment to cwnd and force to CA. 578 */ 579 if (net->cwnd > net->mtu) 580 /* drop down one MTU after sending */ 581 net->cwnd -= net->mtu; 582 if (net->cwnd < net->ssthresh) 583 /* still in SS move to CA */ 584 net->ssthresh = net->cwnd - 1; 585 SDT_PROBE(sctp, cwnd, net, fr, 586 stcb->asoc.my_vtag, 587 ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), 588 net, 589 old_cwnd, net->cwnd); 590 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 591 sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR); 592 } 593 } 594 595 struct sctp_hs_raise_drop { 596 int32_t cwnd; 597 int32_t increase; 598 int32_t drop_percent; 599 }; 600 601 #define SCTP_HS_TABLE_SIZE 73 602 603 struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = { 604 {38, 1, 50}, /* 0 */ 605 {118, 2, 44}, /* 1 */ 606 {221, 3, 41}, /* 2 */ 607 {347, 4, 38}, /* 3 */ 608 {495, 5, 37}, /* 4 */ 609 {663, 6, 35}, /* 5 */ 610 {851, 7, 34}, /* 6 */ 611 {1058, 8, 33}, /* 7 */ 612 {1284, 9, 32}, /* 8 */ 613 {1529, 10, 31}, /* 9 */ 614 {1793, 11, 30}, /* 10 */ 615 {2076, 12, 29}, /* 11 */ 616 {2378, 13, 28}, /* 12 */ 617 {2699, 14, 28}, /* 13 */ 618 {3039, 15, 27}, /* 14 */ 619 {3399, 16, 27}, /* 15 */ 620 {3778, 17, 26}, /* 16 */ 621 {4177, 18, 26}, /* 17 */ 622 {4596, 19, 25}, /* 18 */ 623 {5036, 20, 25}, /* 19 */ 624 {5497, 21, 24}, /* 20 */ 625 {5979, 22, 24}, /* 21 */ 626 {6483, 23, 23}, /* 22 */ 627 {7009, 24, 23}, /* 23 */ 628 {7558, 25, 22}, /* 24 */ 629 {8130, 26, 22}, /* 25 */ 630 {8726, 27, 22}, /* 26 */ 631 {9346, 28, 21}, /* 27 */ 632 {9991, 29, 21}, /* 28 */ 633 {10661, 30, 21}, /* 29 */ 634 {11358, 31, 20}, /* 30 */ 635 {12082, 32, 20}, /* 31 */ 636 {12834, 33, 20}, /* 32 */ 637 {13614, 34, 19}, /* 33 */ 638 {14424, 35, 19}, /* 34 */ 639 {15265, 36, 19}, /* 35 */ 640 {16137, 37, 19}, /* 36 */ 641 {17042, 38, 18}, /* 37 */ 642 {17981, 39, 18}, /* 38 */ 643 {18955, 40, 18}, /* 39 */ 644 {19965, 41, 17}, /* 40 */ 645 {21013, 42, 17}, /* 41 */ 646 {22101, 43, 17}, /* 42 */ 647 {23230, 44, 17}, /* 43 */ 648 {24402, 45, 16}, /* 44 */ 649 {25618, 46, 16}, /* 45 */ 650 {26881, 47, 16}, /* 46 */ 651 {28193, 48, 16}, /* 47 */ 652 {29557, 49, 15}, /* 48 */ 653 {30975, 50, 15}, /* 49 */ 654 {32450, 51, 15}, /* 50 */ 655 {33986, 52, 15}, /* 51 */ 656 {35586, 53, 14}, /* 52 */ 657 {37253, 54, 14}, /* 53 */ 658 {38992, 55, 14}, /* 54 */ 659 {40808, 56, 14}, /* 55 */ 660 {42707, 57, 13}, /* 56 */ 661 {44694, 58, 13}, /* 57 */ 662 {46776, 59, 13}, /* 58 */ 663 {48961, 60, 13}, /* 59 */ 664 {51258, 61, 13}, /* 60 */ 665 {53677, 62, 12}, /* 61 */ 666 {56230, 63, 12}, /* 62 */ 667 {58932, 64, 12}, /* 63 */ 668 {61799, 65, 12}, /* 64 */ 669 {64851, 66, 11}, /* 65 */ 670 {68113, 67, 11}, /* 66 */ 671 {71617, 68, 11}, /* 67 */ 672 {75401, 69, 10}, /* 68 */ 673 {79517, 70, 10}, /* 69 */ 674 {84035, 71, 10}, /* 70 */ 675 {89053, 72, 10}, /* 71 */ 676 {94717, 73, 9} /* 72 */ 677 }; 678 679 static void 680 sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net) 681 { 682 int cur_val, i, indx, incr; 683 684 cur_val = net->cwnd >> 10; 685 indx = SCTP_HS_TABLE_SIZE - 1; 686 #ifdef SCTP_DEBUG 687 printf("HS CC CAlled.\n"); 688 #endif 689 if (cur_val < sctp_cwnd_adjust[0].cwnd) { 690 /* normal mode */ 691 if (net->net_ack > net->mtu) { 692 net->cwnd += net->mtu; 693 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 694 sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS); 695 } 696 } else { 697 net->cwnd += net->net_ack; 698 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 699 sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS); 700 } 701 } 702 } else { 703 for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) { 704 if (cur_val < sctp_cwnd_adjust[i].cwnd) { 705 indx = i; 706 break; 707 } 708 } 709 net->last_hs_used = indx; 710 incr = ((sctp_cwnd_adjust[indx].increase) << 10); 711 net->cwnd += incr; 712 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 713 sctp_log_cwnd(stcb, net, incr, SCTP_CWND_LOG_FROM_SS); 714 } 715 } 716 } 717 718 static void 719 sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net) 720 { 721 int cur_val, i, indx; 722 int old_cwnd = net->cwnd; 723 724 cur_val = net->cwnd >> 10; 725 if (cur_val < sctp_cwnd_adjust[0].cwnd) { 726 /* normal mode */ 727 net->ssthresh = net->cwnd / 2; 728 if (net->ssthresh < (net->mtu * 2)) { 729 net->ssthresh = 2 * net->mtu; 730 } 731 net->cwnd = net->ssthresh; 732 } else { 733 /* drop by the proper amount */ 734 net->ssthresh = net->cwnd - (int)((net->cwnd / 100) * 735 sctp_cwnd_adjust[net->last_hs_used].drop_percent); 736 net->cwnd = net->ssthresh; 737 /* now where are we */ 738 indx = net->last_hs_used; 739 cur_val = net->cwnd >> 10; 740 /* reset where we are in the table */ 741 if (cur_val < sctp_cwnd_adjust[0].cwnd) { 742 /* feel out of hs */ 743 net->last_hs_used = 0; 744 } else { 745 for (i = indx; i >= 1; i--) { 746 if (cur_val > sctp_cwnd_adjust[i - 1].cwnd) { 747 break; 748 } 749 } 750 net->last_hs_used = indx; 751 } 752 } 753 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 754 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR); 755 } 756 } 757 758 void 759 sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb, 760 struct sctp_association *asoc) 761 { 762 struct sctp_nets *net; 763 764 /* 765 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) && 766 * (net->fast_retran_loss_recovery == 0))) 767 */ 768 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 769 if ((asoc->fast_retran_loss_recovery == 0) || 770 (asoc->sctp_cmt_on_off > 0)) { 771 /* out of a RFC2582 Fast recovery window? */ 772 if (net->net_ack > 0) { 773 /* 774 * per section 7.2.3, are there any 775 * destinations that had a fast retransmit 776 * to them. If so what we need to do is 777 * adjust ssthresh and cwnd. 778 */ 779 struct sctp_tmit_chunk *lchk; 780 781 sctp_hs_cwnd_decrease(stcb, net); 782 783 lchk = TAILQ_FIRST(&asoc->send_queue); 784 785 net->partial_bytes_acked = 0; 786 /* Turn on fast recovery window */ 787 asoc->fast_retran_loss_recovery = 1; 788 if (lchk == NULL) { 789 /* Mark end of the window */ 790 asoc->fast_recovery_tsn = asoc->sending_seq - 1; 791 } else { 792 asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; 793 } 794 795 /* 796 * CMT fast recovery -- per destination 797 * recovery variable. 798 */ 799 net->fast_retran_loss_recovery = 1; 800 801 if (lchk == NULL) { 802 /* Mark end of the window */ 803 net->fast_recovery_tsn = asoc->sending_seq - 1; 804 } else { 805 net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; 806 } 807 808 /* 809 * Disable Nonce Sum Checking and store the 810 * resync tsn 811 */ 812 asoc->nonce_sum_check = 0; 813 asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; 814 815 sctp_timer_stop(SCTP_TIMER_TYPE_SEND, 816 stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); 817 sctp_timer_start(SCTP_TIMER_TYPE_SEND, 818 stcb->sctp_ep, stcb, net); 819 } 820 } else if (net->net_ack > 0) { 821 /* 822 * Mark a peg that we WOULD have done a cwnd 823 * reduction but RFC2582 prevented this action. 824 */ 825 SCTP_STAT_INCR(sctps_fastretransinrtt); 826 } 827 } 828 } 829 830 void 831 sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb, 832 struct sctp_association *asoc, 833 int accum_moved, int reneged_all, int will_exit) 834 { 835 struct sctp_nets *net; 836 837 /******************************/ 838 /* update cwnd and Early FR */ 839 /******************************/ 840 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 841 842 #ifdef JANA_CMT_FAST_RECOVERY 843 /* 844 * CMT fast recovery code. Need to debug. 845 */ 846 if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { 847 if (compare_with_wrap(asoc->last_acked_seq, 848 net->fast_recovery_tsn, MAX_TSN) || 849 (asoc->last_acked_seq == net->fast_recovery_tsn) || 850 compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || 851 (net->pseudo_cumack == net->fast_recovery_tsn)) { 852 net->will_exit_fast_recovery = 1; 853 } 854 } 855 #endif 856 if (SCTP_BASE_SYSCTL(sctp_early_fr)) { 857 /* 858 * So, first of all do we need to have a Early FR 859 * timer running? 860 */ 861 if ((!TAILQ_EMPTY(&asoc->sent_queue) && 862 (net->ref_count > 1) && 863 (net->flight_size < net->cwnd)) || 864 (reneged_all)) { 865 /* 866 * yes, so in this case stop it if its 867 * running, and then restart it. Reneging 868 * all is a special case where we want to 869 * run the Early FR timer and then force the 870 * last few unacked to be sent, causing us 871 * to illicit a sack with gaps to force out 872 * the others. 873 */ 874 if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { 875 SCTP_STAT_INCR(sctps_earlyfrstpidsck2); 876 sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, 877 SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); 878 } 879 SCTP_STAT_INCR(sctps_earlyfrstrid); 880 sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); 881 } else { 882 /* No, stop it if its running */ 883 if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { 884 SCTP_STAT_INCR(sctps_earlyfrstpidsck3); 885 sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, 886 SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); 887 } 888 } 889 } 890 /* if nothing was acked on this destination skip it */ 891 if (net->net_ack == 0) { 892 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 893 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); 894 } 895 continue; 896 } 897 if (net->net_ack2 > 0) { 898 /* 899 * Karn's rule applies to clearing error count, this 900 * is optional. 901 */ 902 net->error_count = 0; 903 if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == 904 SCTP_ADDR_NOT_REACHABLE) { 905 /* addr came good */ 906 net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; 907 net->dest_state |= SCTP_ADDR_REACHABLE; 908 sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 909 SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED); 910 /* now was it the primary? if so restore */ 911 if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { 912 (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); 913 } 914 } 915 /* 916 * JRS 5/14/07 - If CMT PF is on and the destination 917 * is in PF state, set the destination to active 918 * state and set the cwnd to one or two MTU's based 919 * on whether PF1 or PF2 is being used. 920 * 921 * Should we stop any running T3 timer here? 922 */ 923 if ((asoc->sctp_cmt_on_off > 0) && 924 (asoc->sctp_cmt_pf > 0) && 925 ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { 926 net->dest_state &= ~SCTP_ADDR_PF; 927 net->cwnd = net->mtu * asoc->sctp_cmt_pf; 928 SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", 929 net, net->cwnd); 930 /* 931 * Since the cwnd value is explicitly set, 932 * skip the code that updates the cwnd 933 * value. 934 */ 935 goto skip_cwnd_update; 936 } 937 } 938 #ifdef JANA_CMT_FAST_RECOVERY 939 /* 940 * CMT fast recovery code 941 */ 942 /* 943 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery 944 * && net->will_exit_fast_recovery == 0) { @@@ Do something 945 * } else if (sctp_cmt_on_off == 0 && 946 * asoc->fast_retran_loss_recovery && will_exit == 0) { 947 */ 948 #endif 949 950 if (asoc->fast_retran_loss_recovery && 951 (will_exit == 0) && 952 (asoc->sctp_cmt_on_off == 0)) { 953 /* 954 * If we are in loss recovery we skip any cwnd 955 * update 956 */ 957 goto skip_cwnd_update; 958 } 959 /* 960 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has 961 * moved. 962 */ 963 if (accum_moved || 964 ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) { 965 /* If the cumulative ack moved we can proceed */ 966 if (net->cwnd <= net->ssthresh) { 967 /* We are in slow start */ 968 if (net->flight_size + net->net_ack >= net->cwnd) { 969 970 sctp_hs_cwnd_increase(stcb, net); 971 972 } else { 973 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 974 sctp_log_cwnd(stcb, net, net->net_ack, 975 SCTP_CWND_LOG_NOADV_SS); 976 } 977 } 978 } else { 979 /* We are in congestion avoidance */ 980 net->partial_bytes_acked += net->net_ack; 981 if ((net->flight_size + net->net_ack >= net->cwnd) && 982 (net->partial_bytes_acked >= net->cwnd)) { 983 net->partial_bytes_acked -= net->cwnd; 984 net->cwnd += net->mtu; 985 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 986 sctp_log_cwnd(stcb, net, net->mtu, 987 SCTP_CWND_LOG_FROM_CA); 988 } 989 } else { 990 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 991 sctp_log_cwnd(stcb, net, net->net_ack, 992 SCTP_CWND_LOG_NOADV_CA); 993 } 994 } 995 } 996 } else { 997 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 998 sctp_log_cwnd(stcb, net, net->mtu, 999 SCTP_CWND_LOG_NO_CUMACK); 1000 } 1001 } 1002 skip_cwnd_update: 1003 /* 1004 * NOW, according to Karn's rule do we need to restore the 1005 * RTO timer back? Check our net_ack2. If not set then we 1006 * have a ambiguity.. i.e. all data ack'd was sent to more 1007 * than one place. 1008 */ 1009 if (net->net_ack2) { 1010 /* restore any doubled timers */ 1011 net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; 1012 if (net->RTO < stcb->asoc.minrto) { 1013 net->RTO = stcb->asoc.minrto; 1014 } 1015 if (net->RTO > stcb->asoc.maxrto) { 1016 net->RTO = stcb->asoc.maxrto; 1017 } 1018 } 1019 } 1020 } 1021 1022 1023 /* 1024 * H-TCP congestion control. The algorithm is detailed in: 1025 * R.N.Shorten, D.J.Leith: 1026 * "H-TCP: TCP for high-speed and long-distance networks" 1027 * Proc. PFLDnet, Argonne, 2004. 1028 * http://www.hamilton.ie/net/htcp3.pdf 1029 */ 1030 1031 1032 static int use_rtt_scaling = 1; 1033 static int use_bandwidth_switch = 1; 1034 1035 static inline int 1036 between(uint32_t seq1, uint32_t seq2, uint32_t seq3) 1037 { 1038 return seq3 - seq2 >= seq1 - seq2; 1039 } 1040 1041 static inline uint32_t 1042 htcp_cong_time(struct htcp *ca) 1043 { 1044 return sctp_get_tick_count() - ca->last_cong; 1045 } 1046 1047 static inline uint32_t 1048 htcp_ccount(struct htcp *ca) 1049 { 1050 return htcp_cong_time(ca) / ca->minRTT; 1051 } 1052 1053 static inline void 1054 htcp_reset(struct htcp *ca) 1055 { 1056 ca->undo_last_cong = ca->last_cong; 1057 ca->undo_maxRTT = ca->maxRTT; 1058 ca->undo_old_maxB = ca->old_maxB; 1059 ca->last_cong = sctp_get_tick_count(); 1060 } 1061 1062 #ifdef SCTP_NOT_USED 1063 1064 static uint32_t 1065 htcp_cwnd_undo(struct sctp_tcb *stcb, struct sctp_nets *net) 1066 { 1067 net->htcp_ca.last_cong = net->htcp_ca.undo_last_cong; 1068 net->htcp_ca.maxRTT = net->htcp_ca.undo_maxRTT; 1069 net->htcp_ca.old_maxB = net->htcp_ca.undo_old_maxB; 1070 return max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->htcp_ca.beta) * net->mtu); 1071 } 1072 1073 #endif 1074 1075 static inline void 1076 measure_rtt(struct sctp_tcb *stcb, struct sctp_nets *net) 1077 { 1078 uint32_t srtt = net->lastsa >> 3; 1079 1080 /* keep track of minimum RTT seen so far, minRTT is zero at first */ 1081 if (net->htcp_ca.minRTT > srtt || !net->htcp_ca.minRTT) 1082 net->htcp_ca.minRTT = srtt; 1083 1084 /* max RTT */ 1085 if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->htcp_ca) > 3) { 1086 if (net->htcp_ca.maxRTT < net->htcp_ca.minRTT) 1087 net->htcp_ca.maxRTT = net->htcp_ca.minRTT; 1088 if (net->htcp_ca.maxRTT < srtt && srtt <= net->htcp_ca.maxRTT + MSEC_TO_TICKS(20)) 1089 net->htcp_ca.maxRTT = srtt; 1090 } 1091 } 1092 1093 static void 1094 measure_achieved_throughput(struct sctp_tcb *stcb, struct sctp_nets *net) 1095 { 1096 uint32_t now = sctp_get_tick_count(); 1097 1098 if (net->fast_retran_ip == 0) 1099 net->htcp_ca.bytes_acked = net->net_ack; 1100 1101 if (!use_bandwidth_switch) 1102 return; 1103 1104 /* achieved throughput calculations */ 1105 /* JRS - not 100% sure of this statement */ 1106 if (net->fast_retran_ip == 1) { 1107 net->htcp_ca.bytecount = 0; 1108 net->htcp_ca.lasttime = now; 1109 return; 1110 } 1111 net->htcp_ca.bytecount += net->net_ack; 1112 1113 if (net->htcp_ca.bytecount >= net->cwnd - ((net->htcp_ca.alpha >> 7 ? : 1) * net->mtu) 1114 && now - net->htcp_ca.lasttime >= net->htcp_ca.minRTT 1115 && net->htcp_ca.minRTT > 0) { 1116 uint32_t cur_Bi = net->htcp_ca.bytecount / net->mtu * hz / (now - net->htcp_ca.lasttime); 1117 1118 if (htcp_ccount(&net->htcp_ca) <= 3) { 1119 /* just after backoff */ 1120 net->htcp_ca.minB = net->htcp_ca.maxB = net->htcp_ca.Bi = cur_Bi; 1121 } else { 1122 net->htcp_ca.Bi = (3 * net->htcp_ca.Bi + cur_Bi) / 4; 1123 if (net->htcp_ca.Bi > net->htcp_ca.maxB) 1124 net->htcp_ca.maxB = net->htcp_ca.Bi; 1125 if (net->htcp_ca.minB > net->htcp_ca.maxB) 1126 net->htcp_ca.minB = net->htcp_ca.maxB; 1127 } 1128 net->htcp_ca.bytecount = 0; 1129 net->htcp_ca.lasttime = now; 1130 } 1131 } 1132 1133 static inline void 1134 htcp_beta_update(struct htcp *ca, uint32_t minRTT, uint32_t maxRTT) 1135 { 1136 if (use_bandwidth_switch) { 1137 uint32_t maxB = ca->maxB; 1138 uint32_t old_maxB = ca->old_maxB; 1139 1140 ca->old_maxB = ca->maxB; 1141 1142 if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) { 1143 ca->beta = BETA_MIN; 1144 ca->modeswitch = 0; 1145 return; 1146 } 1147 } 1148 if (ca->modeswitch && minRTT > (uint32_t) MSEC_TO_TICKS(10) && maxRTT) { 1149 ca->beta = (minRTT << 7) / maxRTT; 1150 if (ca->beta < BETA_MIN) 1151 ca->beta = BETA_MIN; 1152 else if (ca->beta > BETA_MAX) 1153 ca->beta = BETA_MAX; 1154 } else { 1155 ca->beta = BETA_MIN; 1156 ca->modeswitch = 1; 1157 } 1158 } 1159 1160 static inline void 1161 htcp_alpha_update(struct htcp *ca) 1162 { 1163 uint32_t minRTT = ca->minRTT; 1164 uint32_t factor = 1; 1165 uint32_t diff = htcp_cong_time(ca); 1166 1167 if (diff > (uint32_t) hz) { 1168 diff -= hz; 1169 factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / hz)) / hz; 1170 } 1171 if (use_rtt_scaling && minRTT) { 1172 uint32_t scale = (hz << 3) / (10 * minRTT); 1173 1174 scale = min(max(scale, 1U << 2), 10U << 3); /* clamping ratio to 1175 * interval [0.5,10]<<3 */ 1176 factor = (factor << 3) / scale; 1177 if (!factor) 1178 factor = 1; 1179 } 1180 ca->alpha = 2 * factor * ((1 << 7) - ca->beta); 1181 if (!ca->alpha) 1182 ca->alpha = ALPHA_BASE; 1183 } 1184 1185 /* After we have the rtt data to calculate beta, we'd still prefer to wait one 1186 * rtt before we adjust our beta to ensure we are working from a consistent 1187 * data. 1188 * 1189 * This function should be called when we hit a congestion event since only at 1190 * that point do we really have a real sense of maxRTT (the queues en route 1191 * were getting just too full now). 1192 */ 1193 static void 1194 htcp_param_update(struct sctp_tcb *stcb, struct sctp_nets *net) 1195 { 1196 uint32_t minRTT = net->htcp_ca.minRTT; 1197 uint32_t maxRTT = net->htcp_ca.maxRTT; 1198 1199 htcp_beta_update(&net->htcp_ca, minRTT, maxRTT); 1200 htcp_alpha_update(&net->htcp_ca); 1201 1202 /* 1203 * add slowly fading memory for maxRTT to accommodate routing 1204 * changes etc 1205 */ 1206 if (minRTT > 0 && maxRTT > minRTT) 1207 net->htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100; 1208 } 1209 1210 static uint32_t 1211 htcp_recalc_ssthresh(struct sctp_tcb *stcb, struct sctp_nets *net) 1212 { 1213 htcp_param_update(stcb, net); 1214 return max(((net->cwnd / net->mtu * net->htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu); 1215 } 1216 1217 static void 1218 htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net) 1219 { 1220 /*- 1221 * How to handle these functions? 1222 * if (!tcp_is_cwnd_limited(sk, in_flight)) RRS - good question. 1223 * return; 1224 */ 1225 if (net->cwnd <= net->ssthresh) { 1226 /* We are in slow start */ 1227 if (net->flight_size + net->net_ack >= net->cwnd) { 1228 if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) { 1229 net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)); 1230 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1231 sctp_log_cwnd(stcb, net, net->mtu, 1232 SCTP_CWND_LOG_FROM_SS); 1233 } 1234 } else { 1235 net->cwnd += net->net_ack; 1236 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1237 sctp_log_cwnd(stcb, net, net->net_ack, 1238 SCTP_CWND_LOG_FROM_SS); 1239 } 1240 } 1241 } else { 1242 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 1243 sctp_log_cwnd(stcb, net, net->net_ack, 1244 SCTP_CWND_LOG_NOADV_SS); 1245 } 1246 } 1247 } else { 1248 measure_rtt(stcb, net); 1249 1250 /* 1251 * In dangerous area, increase slowly. In theory this is 1252 * net->cwnd += alpha / net->cwnd 1253 */ 1254 /* What is snd_cwnd_cnt?? */ 1255 if (((net->partial_bytes_acked / net->mtu * net->htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) { 1256 /*- 1257 * Does SCTP have a cwnd clamp? 1258 * if (net->snd_cwnd < net->snd_cwnd_clamp) - Nope (RRS). 1259 */ 1260 net->cwnd += net->mtu; 1261 net->partial_bytes_acked = 0; 1262 htcp_alpha_update(&net->htcp_ca); 1263 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1264 sctp_log_cwnd(stcb, net, net->mtu, 1265 SCTP_CWND_LOG_FROM_CA); 1266 } 1267 } else { 1268 net->partial_bytes_acked += net->net_ack; 1269 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 1270 sctp_log_cwnd(stcb, net, net->net_ack, 1271 SCTP_CWND_LOG_NOADV_CA); 1272 } 1273 } 1274 1275 net->htcp_ca.bytes_acked = net->mtu; 1276 } 1277 } 1278 1279 #ifdef SCTP_NOT_USED 1280 /* Lower bound on congestion window. */ 1281 static uint32_t 1282 htcp_min_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net) 1283 { 1284 return net->ssthresh; 1285 } 1286 1287 #endif 1288 1289 static void 1290 htcp_init(struct sctp_tcb *stcb, struct sctp_nets *net) 1291 { 1292 memset(&net->htcp_ca, 0, sizeof(struct htcp)); 1293 net->htcp_ca.alpha = ALPHA_BASE; 1294 net->htcp_ca.beta = BETA_MIN; 1295 net->htcp_ca.bytes_acked = net->mtu; 1296 net->htcp_ca.last_cong = sctp_get_tick_count(); 1297 } 1298 1299 void 1300 sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) 1301 { 1302 /* 1303 * We take the max of the burst limit times a MTU or the 1304 * INITIAL_CWND. We then limit this to 4 MTU's of sending. 1305 */ 1306 net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); 1307 net->ssthresh = stcb->asoc.peers_rwnd; 1308 htcp_init(stcb, net); 1309 1310 if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { 1311 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION); 1312 } 1313 } 1314 1315 void 1316 sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb, 1317 struct sctp_association *asoc, 1318 int accum_moved, int reneged_all, int will_exit) 1319 { 1320 struct sctp_nets *net; 1321 1322 /******************************/ 1323 /* update cwnd and Early FR */ 1324 /******************************/ 1325 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 1326 1327 #ifdef JANA_CMT_FAST_RECOVERY 1328 /* 1329 * CMT fast recovery code. Need to debug. 1330 */ 1331 if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { 1332 if (compare_with_wrap(asoc->last_acked_seq, 1333 net->fast_recovery_tsn, MAX_TSN) || 1334 (asoc->last_acked_seq == net->fast_recovery_tsn) || 1335 compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || 1336 (net->pseudo_cumack == net->fast_recovery_tsn)) { 1337 net->will_exit_fast_recovery = 1; 1338 } 1339 } 1340 #endif 1341 if (SCTP_BASE_SYSCTL(sctp_early_fr)) { 1342 /* 1343 * So, first of all do we need to have a Early FR 1344 * timer running? 1345 */ 1346 if ((!TAILQ_EMPTY(&asoc->sent_queue) && 1347 (net->ref_count > 1) && 1348 (net->flight_size < net->cwnd)) || 1349 (reneged_all)) { 1350 /* 1351 * yes, so in this case stop it if its 1352 * running, and then restart it. Reneging 1353 * all is a special case where we want to 1354 * run the Early FR timer and then force the 1355 * last few unacked to be sent, causing us 1356 * to illicit a sack with gaps to force out 1357 * the others. 1358 */ 1359 if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { 1360 SCTP_STAT_INCR(sctps_earlyfrstpidsck2); 1361 sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, 1362 SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); 1363 } 1364 SCTP_STAT_INCR(sctps_earlyfrstrid); 1365 sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); 1366 } else { 1367 /* No, stop it if its running */ 1368 if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { 1369 SCTP_STAT_INCR(sctps_earlyfrstpidsck3); 1370 sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, 1371 SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); 1372 } 1373 } 1374 } 1375 /* if nothing was acked on this destination skip it */ 1376 if (net->net_ack == 0) { 1377 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 1378 sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); 1379 } 1380 continue; 1381 } 1382 if (net->net_ack2 > 0) { 1383 /* 1384 * Karn's rule applies to clearing error count, this 1385 * is optional. 1386 */ 1387 net->error_count = 0; 1388 if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == 1389 SCTP_ADDR_NOT_REACHABLE) { 1390 /* addr came good */ 1391 net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; 1392 net->dest_state |= SCTP_ADDR_REACHABLE; 1393 sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 1394 SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED); 1395 /* now was it the primary? if so restore */ 1396 if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { 1397 (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); 1398 } 1399 } 1400 /* 1401 * JRS 5/14/07 - If CMT PF is on and the destination 1402 * is in PF state, set the destination to active 1403 * state and set the cwnd to one or two MTU's based 1404 * on whether PF1 or PF2 is being used. 1405 * 1406 * Should we stop any running T3 timer here? 1407 */ 1408 if ((asoc->sctp_cmt_on_off > 0) && 1409 (asoc->sctp_cmt_pf > 0) && 1410 ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { 1411 net->dest_state &= ~SCTP_ADDR_PF; 1412 net->cwnd = net->mtu * asoc->sctp_cmt_pf; 1413 SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", 1414 net, net->cwnd); 1415 /* 1416 * Since the cwnd value is explicitly set, 1417 * skip the code that updates the cwnd 1418 * value. 1419 */ 1420 goto skip_cwnd_update; 1421 } 1422 } 1423 #ifdef JANA_CMT_FAST_RECOVERY 1424 /* 1425 * CMT fast recovery code 1426 */ 1427 /* 1428 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery 1429 * && net->will_exit_fast_recovery == 0) { @@@ Do something 1430 * } else if (sctp_cmt_on_off == 0 && 1431 * asoc->fast_retran_loss_recovery && will_exit == 0) { 1432 */ 1433 #endif 1434 1435 if (asoc->fast_retran_loss_recovery && 1436 will_exit == 0 && 1437 (asoc->sctp_cmt_on_off == 0)) { 1438 /* 1439 * If we are in loss recovery we skip any cwnd 1440 * update 1441 */ 1442 goto skip_cwnd_update; 1443 } 1444 /* 1445 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has 1446 * moved. 1447 */ 1448 if (accum_moved || 1449 ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) { 1450 htcp_cong_avoid(stcb, net); 1451 measure_achieved_throughput(stcb, net); 1452 } else { 1453 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { 1454 sctp_log_cwnd(stcb, net, net->mtu, 1455 SCTP_CWND_LOG_NO_CUMACK); 1456 } 1457 } 1458 skip_cwnd_update: 1459 /* 1460 * NOW, according to Karn's rule do we need to restore the 1461 * RTO timer back? Check our net_ack2. If not set then we 1462 * have a ambiguity.. i.e. all data ack'd was sent to more 1463 * than one place. 1464 */ 1465 if (net->net_ack2) { 1466 /* restore any doubled timers */ 1467 net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; 1468 if (net->RTO < stcb->asoc.minrto) { 1469 net->RTO = stcb->asoc.minrto; 1470 } 1471 if (net->RTO > stcb->asoc.maxrto) { 1472 net->RTO = stcb->asoc.maxrto; 1473 } 1474 } 1475 } 1476 } 1477 1478 void 1479 sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb, 1480 struct sctp_association *asoc) 1481 { 1482 struct sctp_nets *net; 1483 1484 /* 1485 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) && 1486 * (net->fast_retran_loss_recovery == 0))) 1487 */ 1488 TAILQ_FOREACH(net, &asoc->nets, sctp_next) { 1489 if ((asoc->fast_retran_loss_recovery == 0) || 1490 (asoc->sctp_cmt_on_off > 0)) { 1491 /* out of a RFC2582 Fast recovery window? */ 1492 if (net->net_ack > 0) { 1493 /* 1494 * per section 7.2.3, are there any 1495 * destinations that had a fast retransmit 1496 * to them. If so what we need to do is 1497 * adjust ssthresh and cwnd. 1498 */ 1499 struct sctp_tmit_chunk *lchk; 1500 int old_cwnd = net->cwnd; 1501 1502 /* JRS - reset as if state were changed */ 1503 htcp_reset(&net->htcp_ca); 1504 net->ssthresh = htcp_recalc_ssthresh(stcb, net); 1505 net->cwnd = net->ssthresh; 1506 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1507 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), 1508 SCTP_CWND_LOG_FROM_FR); 1509 } 1510 lchk = TAILQ_FIRST(&asoc->send_queue); 1511 1512 net->partial_bytes_acked = 0; 1513 /* Turn on fast recovery window */ 1514 asoc->fast_retran_loss_recovery = 1; 1515 if (lchk == NULL) { 1516 /* Mark end of the window */ 1517 asoc->fast_recovery_tsn = asoc->sending_seq - 1; 1518 } else { 1519 asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; 1520 } 1521 1522 /* 1523 * CMT fast recovery -- per destination 1524 * recovery variable. 1525 */ 1526 net->fast_retran_loss_recovery = 1; 1527 1528 if (lchk == NULL) { 1529 /* Mark end of the window */ 1530 net->fast_recovery_tsn = asoc->sending_seq - 1; 1531 } else { 1532 net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; 1533 } 1534 1535 /* 1536 * Disable Nonce Sum Checking and store the 1537 * resync tsn 1538 */ 1539 asoc->nonce_sum_check = 0; 1540 asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; 1541 1542 sctp_timer_stop(SCTP_TIMER_TYPE_SEND, 1543 stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); 1544 sctp_timer_start(SCTP_TIMER_TYPE_SEND, 1545 stcb->sctp_ep, stcb, net); 1546 } 1547 } else if (net->net_ack > 0) { 1548 /* 1549 * Mark a peg that we WOULD have done a cwnd 1550 * reduction but RFC2582 prevented this action. 1551 */ 1552 SCTP_STAT_INCR(sctps_fastretransinrtt); 1553 } 1554 } 1555 } 1556 1557 void 1558 sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb, 1559 struct sctp_nets *net) 1560 { 1561 int old_cwnd = net->cwnd; 1562 1563 /* JRS - reset as if the state were being changed to timeout */ 1564 htcp_reset(&net->htcp_ca); 1565 net->ssthresh = htcp_recalc_ssthresh(stcb, net); 1566 net->cwnd = net->mtu; 1567 net->partial_bytes_acked = 0; 1568 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1569 sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX); 1570 } 1571 } 1572 1573 void 1574 sctp_htcp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp, 1575 struct sctp_tcb *stcb, struct sctp_nets *net) 1576 { 1577 int old_cwnd; 1578 1579 old_cwnd = net->cwnd; 1580 1581 sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED); 1582 net->htcp_ca.last_cong = sctp_get_tick_count(); 1583 /* 1584 * make a small adjustment to cwnd and force to CA. 1585 */ 1586 if (net->cwnd > net->mtu) 1587 /* drop down one MTU after sending */ 1588 net->cwnd -= net->mtu; 1589 if (net->cwnd < net->ssthresh) 1590 /* still in SS move to CA */ 1591 net->ssthresh = net->cwnd - 1; 1592 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1593 sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR); 1594 } 1595 } 1596 1597 void 1598 sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, 1599 struct sctp_nets *net) 1600 { 1601 int old_cwnd; 1602 1603 old_cwnd = net->cwnd; 1604 1605 /* JRS - reset hctp as if state changed */ 1606 htcp_reset(&net->htcp_ca); 1607 SCTP_STAT_INCR(sctps_ecnereducedcwnd); 1608 net->ssthresh = htcp_recalc_ssthresh(stcb, net); 1609 if (net->ssthresh < net->mtu) { 1610 net->ssthresh = net->mtu; 1611 /* here back off the timer as well, to slow us down */ 1612 net->RTO <<= 1; 1613 } 1614 net->cwnd = net->ssthresh; 1615 if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { 1616 sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); 1617 } 1618 } 1619