1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2007-2008 5 * Swinburne University of Technology, Melbourne, Australia. 6 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> 7 * Copyright (c) 2010 The FreeBSD Foundation 8 * All rights reserved. 9 * 10 * This software was developed at the Centre for Advanced Internet 11 * Architectures, Swinburne University of Technology, by Lawrence Stewart and 12 * James Healy, made possible in part by a grant from the Cisco University 13 * Research Program Fund at Community Foundation Silicon Valley. 14 * 15 * Portions of this software were developed at the Centre for Advanced 16 * Internet Architectures, Swinburne University of Technology, Melbourne, 17 * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions 21 * are met: 22 * 1. Redistributions of source code must retain the above copyright 23 * notice, this list of conditions and the following disclaimer. 24 * 2. Redistributions in binary form must reproduce the above copyright 25 * notice, this list of conditions and the following disclaimer in the 26 * documentation and/or other materials provided with the distribution. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 */ 40 41 /* 42 * This software was first released in 2007 by James Healy and Lawrence Stewart 43 * whilst working on the NewTCP research project at Swinburne University of 44 * Technology's Centre for Advanced Internet Architectures, Melbourne, 45 * Australia, which was made possible in part by a grant from the Cisco 46 * University Research Program Fund at Community Foundation Silicon Valley. 47 * More details are available at: 48 * http://caia.swin.edu.au/urp/newtcp/ 49 */ 50 51 #include <sys/cdefs.h> 52 #include <opt_cc.h> 53 #include <sys/param.h> 54 #include <sys/kernel.h> 55 #include <sys/libkern.h> 56 #include <sys/lock.h> 57 #include <sys/malloc.h> 58 #include <sys/module.h> 59 #include <sys/mutex.h> 60 #include <sys/queue.h> 61 #include <sys/rwlock.h> 62 #include <sys/sbuf.h> 63 #include <sys/socket.h> 64 #include <sys/socketvar.h> 65 #include <sys/sysctl.h> 66 67 #include <net/vnet.h> 68 69 #include <netinet/in.h> 70 #include <netinet/in_pcb.h> 71 #include <netinet/tcp.h> 72 #include <netinet/tcp_seq.h> 73 #include <netinet/tcp_var.h> 74 #include <netinet/tcp_log_buf.h> 75 #include <netinet/tcp_hpts.h> 76 #include <netinet/cc/cc.h> 77 #include <netinet/cc/cc_module.h> 78 79 /* 80 * Have a sane default if no CC_DEFAULT is specified in the kernel config file. 81 */ 82 #ifndef CC_DEFAULT 83 #define CC_DEFAULT "cubic" 84 #endif 85 86 uint32_t hystart_minrtt_thresh = 4000; 87 uint32_t hystart_maxrtt_thresh = 16000; 88 uint32_t hystart_n_rttsamples = 8; 89 uint32_t hystart_css_growth_div = 4; 90 uint32_t hystart_css_rounds = 5; 91 uint32_t hystart_bblogs = 0; 92 93 MALLOC_DEFINE(M_CC_MEM, "CC Mem", "Congestion Control State memory"); 94 95 /* 96 * List of available cc algorithms on the current system. First element 97 * is used as the system default CC algorithm. 98 */ 99 struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list); 100 101 /* Protects the cc_list TAILQ. */ 102 struct rwlock cc_list_lock; 103 104 VNET_DEFINE(struct cc_algo *, default_cc_ptr) = NULL; 105 106 VNET_DEFINE(uint32_t, newreno_beta) = 50; 107 #define V_newreno_beta VNET(newreno_beta) 108 VNET_DEFINE(uint32_t, newreno_beta_ecn) = 80; 109 110 void 111 cc_refer(struct cc_algo *algo) 112 { 113 CC_LIST_LOCK_ASSERT(); 114 refcount_acquire(&algo->cc_refcount); 115 } 116 117 void 118 cc_release(struct cc_algo *algo) 119 { 120 CC_LIST_LOCK_ASSERT(); 121 refcount_release(&algo->cc_refcount); 122 } 123 124 125 void 126 cc_attach(struct tcpcb *tp, struct cc_algo *algo) 127 { 128 /* 129 * Attach the tcpcb to the algorithm. 130 */ 131 CC_LIST_RLOCK(); 132 CC_ALGO(tp) = algo; 133 cc_refer(algo); 134 CC_LIST_RUNLOCK(); 135 } 136 137 void 138 cc_detach(struct tcpcb *tp) 139 { 140 struct cc_algo *algo; 141 142 CC_LIST_RLOCK(); 143 algo = CC_ALGO(tp); 144 CC_ALGO(tp) = NULL; 145 cc_release(algo); 146 CC_LIST_RUNLOCK(); 147 } 148 149 /* 150 * Sysctl handler to show and change the default CC algorithm. 151 */ 152 static int 153 cc_default_algo(SYSCTL_HANDLER_ARGS) 154 { 155 char default_cc[TCP_CA_NAME_MAX]; 156 struct cc_algo *funcs; 157 int error; 158 159 /* Get the current default: */ 160 CC_LIST_RLOCK(); 161 if (CC_DEFAULT_ALGO() != NULL) 162 strlcpy(default_cc, CC_DEFAULT_ALGO()->name, sizeof(default_cc)); 163 else 164 memset(default_cc, 0, TCP_CA_NAME_MAX); 165 CC_LIST_RUNLOCK(); 166 167 error = sysctl_handle_string(oidp, default_cc, sizeof(default_cc), req); 168 169 /* Check for error or no change */ 170 if (error != 0 || req->newptr == NULL) 171 goto done; 172 173 error = ESRCH; 174 /* Find algo with specified name and set it to default. */ 175 CC_LIST_RLOCK(); 176 STAILQ_FOREACH(funcs, &cc_list, entries) { 177 if (strncmp(default_cc, funcs->name, sizeof(default_cc))) 178 continue; 179 if (funcs->flags & CC_MODULE_BEING_REMOVED) { 180 /* Its being removed, its not eligible */ 181 continue; 182 } 183 V_default_cc_ptr = funcs; 184 error = 0; 185 break; 186 } 187 CC_LIST_RUNLOCK(); 188 done: 189 return (error); 190 } 191 192 /* 193 * Sysctl handler to display the list of available CC algorithms. 194 */ 195 static int 196 cc_list_available(SYSCTL_HANDLER_ARGS) 197 { 198 struct cc_algo *algo; 199 int error, nalgos; 200 int linesz; 201 char *buffer, *cp; 202 size_t bufsz, outsz; 203 204 error = nalgos = 0; 205 CC_LIST_RLOCK(); 206 STAILQ_FOREACH(algo, &cc_list, entries) { 207 nalgos++; 208 } 209 CC_LIST_RUNLOCK(); 210 if (nalgos == 0) { 211 return (ENOENT); 212 } 213 bufsz = (nalgos+2) * ((TCP_CA_NAME_MAX + 13) + 1); 214 buffer = malloc(bufsz, M_TEMP, M_WAITOK); 215 cp = buffer; 216 217 linesz = snprintf(cp, bufsz, "\n%-16s%c %s\n", "CCmod", 'D', 218 "PCB count"); 219 cp += linesz; 220 bufsz -= linesz; 221 outsz = linesz; 222 CC_LIST_RLOCK(); 223 STAILQ_FOREACH(algo, &cc_list, entries) { 224 linesz = snprintf(cp, bufsz, "%-16s%c %u\n", 225 algo->name, 226 (algo == CC_DEFAULT_ALGO()) ? '*' : ' ', 227 algo->cc_refcount); 228 if (linesz >= bufsz) { 229 error = EOVERFLOW; 230 break; 231 } 232 cp += linesz; 233 bufsz -= linesz; 234 outsz += linesz; 235 } 236 CC_LIST_RUNLOCK(); 237 if (error == 0) 238 error = sysctl_handle_string(oidp, buffer, outsz + 1, req); 239 free(buffer, M_TEMP); 240 return (error); 241 } 242 243 /* 244 * Return the number of times a proposed removal_cc is 245 * being used as the default. 246 */ 247 static int 248 cc_check_default(struct cc_algo *remove_cc) 249 { 250 int cnt = 0; 251 VNET_ITERATOR_DECL(vnet_iter); 252 253 CC_LIST_LOCK_ASSERT(); 254 255 VNET_LIST_RLOCK_NOSLEEP(); 256 VNET_FOREACH(vnet_iter) { 257 CURVNET_SET(vnet_iter); 258 if ((CC_DEFAULT_ALGO() != NULL) && 259 strncmp(CC_DEFAULT_ALGO()->name, 260 remove_cc->name, 261 TCP_CA_NAME_MAX) == 0) { 262 cnt++; 263 } 264 CURVNET_RESTORE(); 265 } 266 VNET_LIST_RUNLOCK_NOSLEEP(); 267 return (cnt); 268 } 269 270 /* 271 * Initialise CC subsystem on system boot. 272 */ 273 static void 274 cc_init(void) 275 { 276 CC_LIST_LOCK_INIT(); 277 STAILQ_INIT(&cc_list); 278 } 279 280 /* 281 * Returns non-zero on success, 0 on failure. 282 */ 283 static int 284 cc_deregister_algo_locked(struct cc_algo *remove_cc) 285 { 286 struct cc_algo *funcs; 287 int found = 0; 288 289 /* This is unlikely to fail */ 290 STAILQ_FOREACH(funcs, &cc_list, entries) { 291 if (funcs == remove_cc) 292 found = 1; 293 } 294 if (found == 0) { 295 /* Nothing to remove? */ 296 return (ENOENT); 297 } 298 /* We assert it should have been MOD_QUIESCE'd */ 299 KASSERT((remove_cc->flags & CC_MODULE_BEING_REMOVED), 300 ("remove_cc:%p does not have CC_MODULE_BEING_REMOVED flag", remove_cc)); 301 if (cc_check_default(remove_cc)) { 302 return(EBUSY); 303 } 304 if (remove_cc->cc_refcount != 0) { 305 return (EBUSY); 306 } 307 /* Remove algo from cc_list so that new connections can't use it. */ 308 STAILQ_REMOVE(&cc_list, remove_cc, cc_algo, entries); 309 return (0); 310 } 311 312 /* 313 * Returns non-zero on success, 0 on failure. 314 */ 315 int 316 cc_deregister_algo(struct cc_algo *remove_cc) 317 { 318 int ret; 319 320 CC_LIST_WLOCK(); 321 ret = cc_deregister_algo_locked(remove_cc); 322 CC_LIST_WUNLOCK(); 323 return (ret); 324 } 325 326 /* 327 * Returns 0 on success, non-zero on failure. 328 */ 329 int 330 cc_register_algo(struct cc_algo *add_cc) 331 { 332 struct cc_algo *funcs; 333 int err; 334 335 err = 0; 336 337 /* 338 * Iterate over list of registered CC algorithms and make sure 339 * we're not trying to add a duplicate. 340 */ 341 CC_LIST_WLOCK(); 342 STAILQ_FOREACH(funcs, &cc_list, entries) { 343 if (funcs == add_cc || 344 strncmp(funcs->name, add_cc->name, 345 TCP_CA_NAME_MAX) == 0) { 346 err = EEXIST; 347 break; 348 } 349 } 350 /* Init its reference count */ 351 if (err == 0) 352 refcount_init(&add_cc->cc_refcount, 0); 353 /* 354 * The first loaded congestion control module will become 355 * the default until we find the "CC_DEFAULT" defined in 356 * the config (if we do). 357 */ 358 if (!err) { 359 STAILQ_INSERT_TAIL(&cc_list, add_cc, entries); 360 if (strcmp(add_cc->name, CC_DEFAULT) == 0) { 361 V_default_cc_ptr = add_cc; 362 } else if (V_default_cc_ptr == NULL) { 363 V_default_cc_ptr = add_cc; 364 } 365 } 366 CC_LIST_WUNLOCK(); 367 368 return (err); 369 } 370 371 static void 372 vnet_cc_sysinit(void *arg) 373 { 374 struct cc_algo *cc; 375 376 if (IS_DEFAULT_VNET(curvnet)) 377 return; 378 379 CURVNET_SET(vnet0); 380 cc = V_default_cc_ptr; 381 CURVNET_RESTORE(); 382 383 V_default_cc_ptr = cc; 384 } 385 VNET_SYSINIT(vnet_cc_sysinit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 386 vnet_cc_sysinit, NULL); 387 388 /* 389 * Perform any necessary tasks before we exit congestion recovery. 390 */ 391 void 392 newreno_cc_post_recovery(struct cc_var *ccv) 393 { 394 int pipe; 395 uint32_t mss = tcp_fixed_maxseg(ccv->tp); 396 397 if (IN_FASTRECOVERY(CCV(ccv, t_flags))) { 398 /* 399 * Fast recovery will conclude after returning from this 400 * function. Window inflation should have left us with 401 * approximately snd_ssthresh outstanding data. But in case we 402 * would be inclined to send a burst, better to do it via the 403 * slow start mechanism. 404 * 405 * XXXLAS: Find a way to do this without needing curack 406 */ 407 if (V_tcp_do_newsack) 408 pipe = tcp_compute_pipe(ccv->tp); 409 else 410 pipe = CCV(ccv, snd_max) - ccv->curack; 411 if (pipe < CCV(ccv, snd_ssthresh)) 412 /* 413 * Ensure that cwnd does not collapse to 1 MSS under 414 * adverse conditions. Implements RFC6582 415 */ 416 CCV(ccv, snd_cwnd) = max(pipe, mss) + mss; 417 else 418 CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); 419 } 420 } 421 422 void 423 newreno_cc_after_idle(struct cc_var *ccv) 424 { 425 uint32_t rw; 426 /* 427 * If we've been idle for more than one retransmit timeout the old 428 * congestion window is no longer current and we have to reduce it to 429 * the restart window before we can transmit again. 430 * 431 * The restart window is the initial window or the last CWND, whichever 432 * is smaller. 433 * 434 * This is done to prevent us from flooding the path with a full CWND at 435 * wirespeed, overloading router and switch buffers along the way. 436 * 437 * See RFC5681 Section 4.1. "Restarting Idle Connections". 438 * 439 * In addition, per RFC2861 Section 2, the ssthresh is set to the 440 * maximum of the former ssthresh or 3/4 of the old cwnd, to 441 * not exit slow-start prematurely. 442 */ 443 rw = tcp_compute_initwnd(tcp_fixed_maxseg(ccv->tp)); 444 445 CCV(ccv, snd_ssthresh) = max(CCV(ccv, snd_ssthresh), 446 CCV(ccv, snd_cwnd)-(CCV(ccv, snd_cwnd)>>2)); 447 448 CCV(ccv, snd_cwnd) = min(rw, CCV(ccv, snd_cwnd)); 449 } 450 451 /* 452 * Get a new congestion window size on a multiplicative decrease event. 453 * */ 454 u_int 455 newreno_cc_cwnd_on_multiplicative_decrease(struct cc_var *ccv, uint32_t mss) 456 { 457 uint32_t cwin, factor; 458 459 cwin = CCV(ccv, snd_cwnd); 460 /* 461 * Other TCP congestion controls use newreno_cong_signal(), but 462 * with their own private cc_data. Make sure the cc_data is used 463 * correctly. 464 */ 465 factor = V_newreno_beta; 466 467 return max(((uint64_t)cwin * (uint64_t)factor) / (100ULL * (uint64_t)mss), 2) * mss; 468 } 469 470 /* 471 * Perform any necessary tasks before we enter congestion recovery. 472 */ 473 void 474 newreno_cc_cong_signal(struct cc_var *ccv, ccsignal_t type) 475 { 476 uint32_t cwin, mss, pipe; 477 478 mss = tcp_fixed_maxseg(ccv->tp); 479 480 /* Catch algos which mistakenly leak private signal types. */ 481 KASSERT((type & CC_SIGPRIVMASK) == 0, 482 ("%s: congestion signal type 0x%08x is private\n", __func__, type)); 483 484 cwin = newreno_cc_cwnd_on_multiplicative_decrease(ccv, mss); 485 486 switch (type) { 487 case CC_NDUPACK: 488 if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) { 489 if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) 490 CCV(ccv, snd_ssthresh) = cwin; 491 ENTER_RECOVERY(CCV(ccv, t_flags)); 492 } 493 break; 494 case CC_ECN: 495 if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { 496 CCV(ccv, snd_ssthresh) = cwin; 497 CCV(ccv, snd_cwnd) = cwin; 498 ENTER_CONGRECOVERY(CCV(ccv, t_flags)); 499 } 500 break; 501 case CC_RTO: 502 if (CCV(ccv, t_rxtshift) == 1) { 503 if (V_tcp_do_newsack) { 504 pipe = tcp_compute_pipe(ccv->tp); 505 } else { 506 pipe = CCV(ccv, snd_max) - 507 CCV(ccv, snd_fack) + 508 CCV(ccv, sackhint.sack_bytes_rexmit); 509 } 510 CCV(ccv, snd_ssthresh) = max(2, 511 min(CCV(ccv, snd_wnd), pipe) / 2 / mss) * mss; 512 } 513 CCV(ccv, snd_cwnd) = mss; 514 break; 515 default: 516 break; 517 } 518 } 519 520 u_int 521 newreno_cc_cwnd_in_cong_avoid(struct cc_var *ccv) 522 { 523 u_int cw = CCV(ccv, snd_cwnd); 524 u_int incr = tcp_fixed_maxseg(ccv->tp); 525 526 KASSERT(cw > CCV(ccv, snd_ssthresh), 527 ("congestion control state not in congestion avoidance\n")); 528 529 /* 530 * Regular in-order ACK, open the congestion window. 531 * The congestion control state we're in is congestion avoidance. 532 * 533 * Check if ABC (RFC 3465) is enabled. 534 * cong avoid: cwnd > ssthresh 535 * 536 * cong avoid and ABC (RFC 3465): 537 * Grow cwnd linearly by maxseg per RTT for each 538 * cwnd worth of ACKed data. 539 * 540 * cong avoid without ABC (RFC 5681): 541 * Grow cwnd linearly by approximately maxseg per RTT using 542 * maxseg^2 / cwnd per ACK as the increment. 543 * If cwnd > maxseg^2, fix the cwnd increment at 1 byte to 544 * avoid capping cwnd. 545 */ 546 if (V_tcp_do_rfc3465) { 547 if (ccv->flags & CCF_ABC_SENTAWND) 548 ccv->flags &= ~CCF_ABC_SENTAWND; 549 else 550 incr = 0; 551 } else 552 incr = max((incr * incr / cw), 1); 553 /* ABC is on by default, so incr equals 0 frequently. */ 554 if (incr > 0) 555 return min(cw + incr, TCP_MAXWIN << CCV(ccv, snd_scale)); 556 else 557 return cw; 558 } 559 560 u_int 561 newreno_cc_cwnd_in_slow_start(struct cc_var *ccv) 562 { 563 u_int cw = CCV(ccv, snd_cwnd); 564 u_int mss = tcp_fixed_maxseg(ccv->tp); 565 u_int incr = mss; 566 567 KASSERT(cw <= CCV(ccv, snd_ssthresh), 568 ("congestion control state not in slow start\n")); 569 570 /* 571 * Regular in-order ACK, open the congestion window. 572 * The congestion control state we're in is slow start. 573 * 574 * slow start: cwnd <= ssthresh 575 * 576 * slow start and ABC (RFC 3465): 577 * Grow cwnd exponentially by the amount of data 578 * ACKed capping the max increment per ACK to 579 * (abc_l_var * maxseg) bytes. 580 * 581 * slow start without ABC (RFC 5681): 582 * Grow cwnd exponentially by maxseg per ACK. 583 */ 584 if (V_tcp_do_rfc3465) { 585 /* 586 * In slow-start with ABC enabled and no RTO in sight? 587 * (Must not use abc_l_var > 1 if slow starting after 588 * an RTO. On RTO, snd_nxt = snd_una, so the 589 * snd_nxt == snd_max check is sufficient to 590 * handle this). 591 * 592 * XXXLAS: Find a way to signal SS after RTO that 593 * doesn't rely on tcpcb vars. 594 */ 595 uint16_t abc_val; 596 597 if (ccv->flags & CCF_USE_LOCAL_ABC) 598 abc_val = ccv->labc; 599 else 600 abc_val = V_tcp_abc_l_var; 601 if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max)) 602 incr = min(ccv->bytes_this_ack, 603 ccv->nsegs * abc_val * mss); 604 else 605 incr = min(ccv->bytes_this_ack, mss); 606 } 607 /* ABC is on by default, so incr equals 0 frequently. */ 608 if (incr > 0) 609 return min(cw + incr, TCP_MAXWIN << CCV(ccv, snd_scale)); 610 else 611 return cw; 612 } 613 614 void 615 newreno_cc_ack_received(struct cc_var *ccv, ccsignal_t type) 616 { 617 if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) && 618 (ccv->flags & CCF_CWND_LIMITED)) { 619 if (CCV(ccv, snd_cwnd) > CCV(ccv, snd_ssthresh)) { 620 CCV(ccv, snd_cwnd) = newreno_cc_cwnd_in_cong_avoid(ccv); 621 } else { 622 CCV(ccv, snd_cwnd) = newreno_cc_cwnd_in_slow_start(ccv); 623 } 624 } 625 } 626 627 static int 628 cc_stop_new_assignments(struct cc_algo *algo) 629 { 630 CC_LIST_WLOCK(); 631 if (cc_check_default(algo)) { 632 /* A default cannot be removed */ 633 CC_LIST_WUNLOCK(); 634 return (EBUSY); 635 } 636 algo->flags |= CC_MODULE_BEING_REMOVED; 637 CC_LIST_WUNLOCK(); 638 return (0); 639 } 640 641 /* 642 * Handles kld related events. Returns 0 on success, non-zero on failure. 643 */ 644 int 645 cc_modevent(module_t mod, int event_type, void *data) 646 { 647 struct cc_algo *algo; 648 int err; 649 650 err = 0; 651 algo = (struct cc_algo *)data; 652 653 switch(event_type) { 654 case MOD_LOAD: 655 if ((algo->cc_data_sz == NULL) && (algo->cb_init != NULL)) { 656 /* 657 * A module must have a cc_data_sz function 658 * even if it has no data it should return 0. 659 */ 660 printf("Module Load Fails, it lacks a cc_data_sz() function but has a cb_init()!\n"); 661 err = EINVAL; 662 break; 663 } 664 if (algo->mod_init != NULL) 665 err = algo->mod_init(); 666 if (!err) 667 err = cc_register_algo(algo); 668 break; 669 670 case MOD_SHUTDOWN: 671 break; 672 case MOD_QUIESCE: 673 /* Stop any new assigments */ 674 err = cc_stop_new_assignments(algo); 675 break; 676 case MOD_UNLOAD: 677 /* 678 * Deregister and remove the module from the list 679 */ 680 CC_LIST_WLOCK(); 681 /* Even with -f we can't unload if its the default */ 682 if (cc_check_default(algo)) { 683 /* A default cannot be removed */ 684 CC_LIST_WUNLOCK(); 685 return (EBUSY); 686 } 687 /* 688 * If -f was used and users are still attached to 689 * the algorithm things are going to go boom. 690 */ 691 err = cc_deregister_algo_locked(algo); 692 CC_LIST_WUNLOCK(); 693 if ((err == 0) && (algo->mod_destroy != NULL)) { 694 algo->mod_destroy(); 695 } 696 break; 697 default: 698 err = EINVAL; 699 break; 700 } 701 702 return (err); 703 } 704 705 SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL); 706 707 /* Declare sysctl tree and populate it. */ 708 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 709 "Congestion control related settings"); 710 711 SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm, 712 CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, 713 NULL, 0, cc_default_algo, "A", 714 "Default congestion control algorithm"); 715 716 SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available, 717 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 718 NULL, 0, cc_list_available, "A", 719 "List available congestion control algorithms"); 720 721 SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, hystartplusplus, 722 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 723 "New Reno related HyStart++ settings"); 724 725 SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, minrtt_thresh, 726 CTLFLAG_RW, 727 &hystart_minrtt_thresh, 4000, 728 "HyStarts++ minimum RTT thresh used in clamp (in microseconds)"); 729 730 SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, maxrtt_thresh, 731 CTLFLAG_RW, 732 &hystart_maxrtt_thresh, 16000, 733 "HyStarts++ maximum RTT thresh used in clamp (in microseconds)"); 734 735 SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, n_rttsamples, 736 CTLFLAG_RW, 737 &hystart_n_rttsamples, 8, 738 "The number of RTT samples that must be seen to consider HyStart++"); 739 740 SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, css_growth_div, 741 CTLFLAG_RW, 742 &hystart_css_growth_div, 4, 743 "The divisor to the growth when in Hystart++ CSS"); 744 745 SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, css_rounds, 746 CTLFLAG_RW, 747 &hystart_css_rounds, 5, 748 "The number of rounds HyStart++ lasts in CSS before falling to CA"); 749 750 SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, bblogs, 751 CTLFLAG_RW, 752 &hystart_bblogs, 0, 753 "Do we enable HyStart++ Black Box logs to be generated if BB logging is on"); 754 755 VNET_DEFINE(int, cc_do_abe) = 0; 756 SYSCTL_INT(_net_inet_tcp_cc, OID_AUTO, abe, CTLFLAG_VNET | CTLFLAG_RW, 757 &VNET_NAME(cc_do_abe), 0, 758 "Enable draft-ietf-tcpm-alternativebackoff-ecn (TCP Alternative Backoff with ECN)"); 759 760 VNET_DEFINE(int, cc_abe_frlossreduce) = 0; 761 SYSCTL_INT(_net_inet_tcp_cc, OID_AUTO, abe_frlossreduce, CTLFLAG_VNET | CTLFLAG_RW, 762 &VNET_NAME(cc_abe_frlossreduce), 0, 763 "Apply standard beta instead of ABE-beta during ECN-signalled congestion " 764 "recovery episodes if loss also needs to be repaired"); 765