1 /*- 2 * 3 * SPDX-License-Identifier: BSD-3-Clause 4 * 5 * Copyright (c) 2018-2019 6 * Netflix Inc. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 /** 32 * Author: Randall Stewart <rrs@netflix.com> 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 #include "opt_inet.h" 38 #include "opt_inet6.h" 39 #include "opt_ipsec.h" 40 #include "opt_tcpdebug.h" 41 #include "opt_ratelimit.h" 42 #include <sys/param.h> 43 #include <sys/kernel.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/sysctl.h> 49 #include <sys/eventhandler.h> 50 #include <sys/mutex.h> 51 #include <sys/ck.h> 52 #include <net/if.h> 53 #include <net/if_var.h> 54 #include <netinet/in.h> 55 #include <netinet/in_pcb.h> 56 #define TCPSTATES /* for logging */ 57 #include <netinet/tcp_var.h> 58 #ifdef INET6 59 #include <netinet6/tcp6_var.h> 60 #endif 61 #include <netinet/tcp_ratelimit.h> 62 #ifndef USECS_IN_SECOND 63 #define USECS_IN_SECOND 1000000 64 #endif 65 /* 66 * For the purposes of each send, what is the size 67 * of an ethernet frame. 68 */ 69 #ifndef ETHERNET_SEGMENT_SIZE 70 #define ETHERNET_SEGMENT_SIZE 1500 71 #endif 72 MALLOC_DEFINE(M_TCPPACE, "tcp_hwpace", "TCP Hardware pacing memory"); 73 #ifdef RATELIMIT 74 75 #define COMMON_RATE 180500 76 uint64_t desired_rates[] = { 77 62500, /* 500Kbps */ 78 180500, /* 1.44Mpbs */ 79 375000, /* 3Mbps */ 80 500000, /* 4Mbps */ 81 625000, /* 5Mbps */ 82 750000, /* 6Mbps */ 83 1000000, /* 8Mbps */ 84 1250000, /* 10Mbps */ 85 2500000, /* 20Mbps */ 86 3750000, /* 30Mbps */ 87 5000000, /* 40Meg */ 88 6250000, /* 50Mbps */ 89 12500000, /* 100Mbps */ 90 25000000, /* 200Mbps */ 91 50000000, /* 400Mbps */ 92 100000000, /* 800Mbps */ 93 12500, /* 100kbps */ 94 25000, /* 200kbps */ 95 875000, /* 7Mbps */ 96 1125000, /* 9Mbps */ 97 1875000, /* 15Mbps */ 98 3125000, /* 25Mbps */ 99 8125000, /* 65Mbps */ 100 10000000, /* 80Mbps */ 101 18750000, /* 150Mbps */ 102 20000000, /* 250Mbps */ 103 37500000, /* 350Mbps */ 104 62500000, /* 500Mbps */ 105 78125000, /* 625Mbps */ 106 125000000, /* 1Gbps */ 107 }; 108 #define MAX_HDWR_RATES (sizeof(desired_rates)/sizeof(uint64_t)) 109 #define RS_ORDERED_COUNT 16 /* 110 * Number that are in order 111 * at the beginning of the table, 112 * over this a sort is required. 113 */ 114 #define RS_NEXT_ORDER_GROUP 16 /* 115 * The point in our table where 116 * we come fill in a second ordered 117 * group (index wise means -1). 118 */ 119 #define ALL_HARDWARE_RATES 1004 /* 120 * 1Meg - 1Gig in 1 Meg steps 121 * plus 100, 200k and 500k and 122 * 10Gig 123 */ 124 125 #define RS_ONE_MEGABIT_PERSEC 1000000 126 #define RS_ONE_GIGABIT_PERSEC 1000000000 127 #define RS_TEN_GIGABIT_PERSEC 10000000000 128 129 static struct head_tcp_rate_set int_rs; 130 static struct mtx rs_mtx; 131 uint32_t rs_number_alive; 132 uint32_t rs_number_dead; 133 134 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, rl, CTLFLAG_RW, 0, 135 "TCP Ratelimit stats"); 136 SYSCTL_UINT(_net_inet_tcp_rl, OID_AUTO, alive, CTLFLAG_RW, 137 &rs_number_alive, 0, 138 "Number of interfaces initialized for ratelimiting"); 139 SYSCTL_UINT(_net_inet_tcp_rl, OID_AUTO, dead, CTLFLAG_RW, 140 &rs_number_dead, 0, 141 "Number of interfaces departing from ratelimiting"); 142 143 static void 144 rl_add_syctl_entries(struct sysctl_oid *rl_sysctl_root, struct tcp_rate_set *rs) 145 { 146 /* 147 * Add sysctl entries for thus interface. 148 */ 149 if (rs->rs_flags & RS_INTF_NO_SUP) { 150 SYSCTL_ADD_S32(&rs->sysctl_ctx, 151 SYSCTL_CHILDREN(rl_sysctl_root), 152 OID_AUTO, "disable", CTLFLAG_RD, 153 &rs->rs_disable, 0, 154 "Disable this interface from new hdwr limiting?"); 155 } else { 156 SYSCTL_ADD_S32(&rs->sysctl_ctx, 157 SYSCTL_CHILDREN(rl_sysctl_root), 158 OID_AUTO, "disable", CTLFLAG_RW, 159 &rs->rs_disable, 0, 160 "Disable this interface from new hdwr limiting?"); 161 } 162 SYSCTL_ADD_S32(&rs->sysctl_ctx, 163 SYSCTL_CHILDREN(rl_sysctl_root), 164 OID_AUTO, "minseg", CTLFLAG_RW, 165 &rs->rs_min_seg, 0, 166 "What is the minimum we need to send on this interface?"); 167 SYSCTL_ADD_U64(&rs->sysctl_ctx, 168 SYSCTL_CHILDREN(rl_sysctl_root), 169 OID_AUTO, "flow_limit", CTLFLAG_RW, 170 &rs->rs_flow_limit, 0, 171 "What is the limit for number of flows (0=unlimited)?"); 172 SYSCTL_ADD_S32(&rs->sysctl_ctx, 173 SYSCTL_CHILDREN(rl_sysctl_root), 174 OID_AUTO, "highest", CTLFLAG_RD, 175 &rs->rs_highest_valid, 0, 176 "Highest valid rate"); 177 SYSCTL_ADD_S32(&rs->sysctl_ctx, 178 SYSCTL_CHILDREN(rl_sysctl_root), 179 OID_AUTO, "lowest", CTLFLAG_RD, 180 &rs->rs_lowest_valid, 0, 181 "Lowest valid rate"); 182 SYSCTL_ADD_S32(&rs->sysctl_ctx, 183 SYSCTL_CHILDREN(rl_sysctl_root), 184 OID_AUTO, "flags", CTLFLAG_RD, 185 &rs->rs_flags, 0, 186 "What lags are on the entry?"); 187 SYSCTL_ADD_S32(&rs->sysctl_ctx, 188 SYSCTL_CHILDREN(rl_sysctl_root), 189 OID_AUTO, "numrates", CTLFLAG_RD, 190 &rs->rs_rate_cnt, 0, 191 "How many rates re there?"); 192 SYSCTL_ADD_U64(&rs->sysctl_ctx, 193 SYSCTL_CHILDREN(rl_sysctl_root), 194 OID_AUTO, "flows_using", CTLFLAG_RD, 195 &rs->rs_flows_using, 0, 196 "How many flows are using this interface now?"); 197 #ifdef DETAILED_RATELIMIT_SYSCTL 198 if (rs->rs_rlt && rs->rs_rate_cnt > 0) { 199 /* Lets display the rates */ 200 int i; 201 struct sysctl_oid *rl_rates; 202 struct sysctl_oid *rl_rate_num; 203 char rate_num[16]; 204 rl_rates = SYSCTL_ADD_NODE(&rs->sysctl_ctx, 205 SYSCTL_CHILDREN(rl_sysctl_root), 206 OID_AUTO, 207 "rate", 208 CTLFLAG_RW, 0, 209 "Ratelist"); 210 for( i = 0; i < rs->rs_rate_cnt; i++) { 211 sprintf(rate_num, "%d", i); 212 rl_rate_num = SYSCTL_ADD_NODE(&rs->sysctl_ctx, 213 SYSCTL_CHILDREN(rl_rates), 214 OID_AUTO, 215 rate_num, 216 CTLFLAG_RW, 0, 217 "Individual Rate"); 218 SYSCTL_ADD_U32(&rs->sysctl_ctx, 219 SYSCTL_CHILDREN(rl_rate_num), 220 OID_AUTO, "flags", CTLFLAG_RD, 221 &rs->rs_rlt[i].flags, 0, 222 "Flags on this rate"); 223 SYSCTL_ADD_U32(&rs->sysctl_ctx, 224 SYSCTL_CHILDREN(rl_rate_num), 225 OID_AUTO, "pacetime", CTLFLAG_RD, 226 &rs->rs_rlt[i].time_between, 0, 227 "Time hardware inserts between 1500 byte sends"); 228 SYSCTL_ADD_U64(&rs->sysctl_ctx, 229 SYSCTL_CHILDREN(rl_rate_num), 230 OID_AUTO, "rate", CTLFLAG_RD, 231 &rs->rs_rlt[i].rate, 0, 232 "Rate in bytes per second"); 233 } 234 } 235 #endif 236 } 237 238 static void 239 rs_destroy(epoch_context_t ctx) 240 { 241 struct tcp_rate_set *rs; 242 bool do_free_rs; 243 244 rs = __containerof(ctx, struct tcp_rate_set, rs_epoch_ctx); 245 246 mtx_lock(&rs_mtx); 247 rs->rs_flags &= ~RS_FUNERAL_SCHD; 248 /* 249 * In theory its possible (but unlikely) 250 * that while the delete was occuring 251 * and we were applying the DEAD flag 252 * someone slipped in and found the 253 * interface in a lookup. While we 254 * decided rs_flows_using were 0 and 255 * scheduling the epoch_call, the other 256 * thread incremented rs_flow_using. This 257 * is because users have a pointer and 258 * we only use the rs_flows_using in an 259 * atomic fashion, i.e. the other entities 260 * are not protected. To assure this did 261 * not occur, we check rs_flows_using here 262 * before deleting. 263 */ 264 do_free_rs = (rs->rs_flows_using == 0); 265 rs_number_dead--; 266 mtx_unlock(&rs_mtx); 267 268 if (do_free_rs) { 269 sysctl_ctx_free(&rs->sysctl_ctx); 270 free(rs->rs_rlt, M_TCPPACE); 271 free(rs, M_TCPPACE); 272 } 273 } 274 275 static void 276 rs_defer_destroy(struct tcp_rate_set *rs) 277 { 278 279 mtx_assert(&rs_mtx, MA_OWNED); 280 281 /* Check if already pending. */ 282 if (rs->rs_flags & RS_FUNERAL_SCHD) 283 return; 284 285 rs_number_dead++; 286 287 /* Set flag to only defer once. */ 288 rs->rs_flags |= RS_FUNERAL_SCHD; 289 NET_EPOCH_CALL(rs_destroy, &rs->rs_epoch_ctx); 290 } 291 292 #ifdef INET 293 extern counter_u64_t rate_limit_set_ok; 294 extern counter_u64_t rate_limit_active; 295 extern counter_u64_t rate_limit_alloc_fail; 296 #endif 297 298 static int 299 rl_attach_txrtlmt(struct ifnet *ifp, 300 uint32_t flowtype, 301 int flowid, 302 uint64_t cfg_rate, 303 struct m_snd_tag **tag) 304 { 305 int error; 306 union if_snd_tag_alloc_params params = { 307 .rate_limit.hdr.type = IF_SND_TAG_TYPE_RATE_LIMIT, 308 .rate_limit.hdr.flowid = flowid, 309 .rate_limit.hdr.flowtype = flowtype, 310 .rate_limit.max_rate = cfg_rate, 311 .rate_limit.flags = M_NOWAIT, 312 }; 313 314 if (ifp->if_snd_tag_alloc == NULL) { 315 error = EOPNOTSUPP; 316 } else { 317 error = ifp->if_snd_tag_alloc(ifp, ¶ms, tag); 318 #ifdef INET 319 if (error == 0) { 320 if_ref((*tag)->ifp); 321 counter_u64_add(rate_limit_set_ok, 1); 322 counter_u64_add(rate_limit_active, 1); 323 } else 324 counter_u64_add(rate_limit_alloc_fail, 1); 325 #endif 326 } 327 return (error); 328 } 329 330 static void 331 populate_canned_table(struct tcp_rate_set *rs, const uint64_t *rate_table_act) 332 { 333 /* 334 * The internal table is "special", it 335 * is two seperate ordered tables that 336 * must be merged. We get here when the 337 * adapter specifies a number of rates that 338 * covers both ranges in the table in some 339 * form. 340 */ 341 int i, at_low, at_high; 342 uint8_t low_disabled = 0, high_disabled = 0; 343 344 for(i = 0, at_low = 0, at_high = RS_NEXT_ORDER_GROUP; i < rs->rs_rate_cnt; i++) { 345 rs->rs_rlt[i].flags = 0; 346 rs->rs_rlt[i].time_between = 0; 347 if ((low_disabled == 0) && 348 (high_disabled || 349 (rate_table_act[at_low] < rate_table_act[at_high]))) { 350 rs->rs_rlt[i].rate = rate_table_act[at_low]; 351 at_low++; 352 if (at_low == RS_NEXT_ORDER_GROUP) 353 low_disabled = 1; 354 } else if (high_disabled == 0) { 355 rs->rs_rlt[i].rate = rate_table_act[at_high]; 356 at_high++; 357 if (at_high == MAX_HDWR_RATES) 358 high_disabled = 1; 359 } 360 } 361 } 362 363 static struct tcp_rate_set * 364 rt_setup_new_rs(struct ifnet *ifp, int *error) 365 { 366 struct tcp_rate_set *rs; 367 const uint64_t *rate_table_act; 368 uint64_t lentim, res; 369 size_t sz; 370 uint32_t hash_type; 371 int i; 372 struct if_ratelimit_query_results rl; 373 struct sysctl_oid *rl_sysctl_root; 374 /* 375 * We expect to enter with the 376 * mutex locked. 377 */ 378 379 if (ifp->if_ratelimit_query == NULL) { 380 /* 381 * We can do nothing if we cannot 382 * get a query back from the driver. 383 */ 384 return (NULL); 385 } 386 rs = malloc(sizeof(struct tcp_rate_set), M_TCPPACE, M_NOWAIT | M_ZERO); 387 if (rs == NULL) { 388 if (error) 389 *error = ENOMEM; 390 return (NULL); 391 } 392 rl.flags = RT_NOSUPPORT; 393 ifp->if_ratelimit_query(ifp, &rl); 394 if (rl.flags & RT_IS_UNUSABLE) { 395 /* 396 * The interface does not really support 397 * the rate-limiting. 398 */ 399 memset(rs, 0, sizeof(struct tcp_rate_set)); 400 rs->rs_ifp = ifp; 401 rs->rs_if_dunit = ifp->if_dunit; 402 rs->rs_flags = RS_INTF_NO_SUP; 403 rs->rs_disable = 1; 404 rs_number_alive++; 405 sysctl_ctx_init(&rs->sysctl_ctx); 406 rl_sysctl_root = SYSCTL_ADD_NODE(&rs->sysctl_ctx, 407 SYSCTL_STATIC_CHILDREN(_net_inet_tcp_rl), 408 OID_AUTO, 409 rs->rs_ifp->if_xname, 410 CTLFLAG_RW, 0, 411 ""); 412 rl_add_syctl_entries(rl_sysctl_root, rs); 413 mtx_lock(&rs_mtx); 414 CK_LIST_INSERT_HEAD(&int_rs, rs, next); 415 mtx_unlock(&rs_mtx); 416 return (rs); 417 } else if ((rl.flags & RT_IS_INDIRECT) == RT_IS_INDIRECT) { 418 memset(rs, 0, sizeof(struct tcp_rate_set)); 419 rs->rs_ifp = ifp; 420 rs->rs_if_dunit = ifp->if_dunit; 421 rs->rs_flags = RS_IS_DEFF; 422 rs_number_alive++; 423 sysctl_ctx_init(&rs->sysctl_ctx); 424 rl_sysctl_root = SYSCTL_ADD_NODE(&rs->sysctl_ctx, 425 SYSCTL_STATIC_CHILDREN(_net_inet_tcp_rl), 426 OID_AUTO, 427 rs->rs_ifp->if_xname, 428 CTLFLAG_RW, 0, 429 ""); 430 rl_add_syctl_entries(rl_sysctl_root, rs); 431 mtx_lock(&rs_mtx); 432 CK_LIST_INSERT_HEAD(&int_rs, rs, next); 433 mtx_unlock(&rs_mtx); 434 return (rs); 435 } else if ((rl.flags & RT_IS_FIXED_TABLE) == RT_IS_FIXED_TABLE) { 436 /* Mellanox most likely */ 437 rs->rs_ifp = ifp; 438 rs->rs_if_dunit = ifp->if_dunit; 439 rs->rs_rate_cnt = rl.number_of_rates; 440 rs->rs_min_seg = rl.min_segment_burst; 441 rs->rs_highest_valid = 0; 442 rs->rs_flow_limit = rl.max_flows; 443 rs->rs_flags = RS_IS_INTF | RS_NO_PRE; 444 rs->rs_disable = 0; 445 rate_table_act = rl.rate_table; 446 } else if ((rl.flags & RT_IS_SELECTABLE) == RT_IS_SELECTABLE) { 447 /* Chelsio */ 448 rs->rs_ifp = ifp; 449 rs->rs_if_dunit = ifp->if_dunit; 450 rs->rs_rate_cnt = rl.number_of_rates; 451 rs->rs_min_seg = rl.min_segment_burst; 452 rs->rs_disable = 0; 453 rs->rs_flow_limit = rl.max_flows; 454 rate_table_act = desired_rates; 455 if ((rs->rs_rate_cnt > MAX_HDWR_RATES) && 456 (rs->rs_rate_cnt < ALL_HARDWARE_RATES)) { 457 /* 458 * Our desired table is not big 459 * enough, do what we can. 460 */ 461 rs->rs_rate_cnt = MAX_HDWR_RATES; 462 } 463 if (rs->rs_rate_cnt <= RS_ORDERED_COUNT) 464 rs->rs_flags = RS_IS_INTF; 465 else 466 rs->rs_flags = RS_IS_INTF | RS_INT_TBL; 467 if (rs->rs_rate_cnt >= ALL_HARDWARE_RATES) 468 rs->rs_rate_cnt = ALL_HARDWARE_RATES; 469 } else { 470 printf("Interface:%s unit:%d not one known to have rate-limits\n", 471 ifp->if_dname, 472 ifp->if_dunit); 473 free(rs, M_TCPPACE); 474 return (NULL); 475 } 476 sz = sizeof(struct tcp_hwrate_limit_table) * rs->rs_rate_cnt; 477 rs->rs_rlt = malloc(sz, M_TCPPACE, M_NOWAIT); 478 if (rs->rs_rlt == NULL) { 479 if (error) 480 *error = ENOMEM; 481 bail: 482 free(rs, M_TCPPACE); 483 return (NULL); 484 } 485 if (rs->rs_rate_cnt >= ALL_HARDWARE_RATES) { 486 /* 487 * The interface supports all 488 * the rates we could possibly want. 489 */ 490 uint64_t rat; 491 492 rs->rs_rlt[0].rate = 12500; /* 100k */ 493 rs->rs_rlt[1].rate = 25000; /* 200k */ 494 rs->rs_rlt[2].rate = 62500; /* 500k */ 495 /* Note 125000 == 1Megabit 496 * populate 1Meg - 1000meg. 497 */ 498 for(i = 3, rat = 125000; i< (ALL_HARDWARE_RATES-1); i++) { 499 rs->rs_rlt[i].rate = rat; 500 rat += 125000; 501 } 502 rs->rs_rlt[(ALL_HARDWARE_RATES-1)].rate = 1250000000; 503 } else if (rs->rs_flags & RS_INT_TBL) { 504 /* We populate this in a special way */ 505 populate_canned_table(rs, rate_table_act); 506 } else { 507 /* 508 * Just copy in the rates from 509 * the table, it is in order. 510 */ 511 for (i=0; i<rs->rs_rate_cnt; i++) { 512 rs->rs_rlt[i].rate = rate_table_act[i]; 513 rs->rs_rlt[i].time_between = 0; 514 rs->rs_rlt[i].flags = 0; 515 } 516 } 517 for (i = (rs->rs_rate_cnt - 1); i >= 0; i--) { 518 /* 519 * We go backwards through the list so that if we can't get 520 * a rate and fail to init one, we have at least a chance of 521 * getting the highest one. 522 */ 523 rs->rs_rlt[i].ptbl = rs; 524 rs->rs_rlt[i].tag = NULL; 525 /* 526 * Calculate the time between. 527 */ 528 lentim = ETHERNET_SEGMENT_SIZE * USECS_IN_SECOND; 529 res = lentim / rs->rs_rlt[i].rate; 530 if (res > 0) 531 rs->rs_rlt[i].time_between = res; 532 else 533 rs->rs_rlt[i].time_between = 1; 534 if (rs->rs_flags & RS_NO_PRE) { 535 rs->rs_rlt[i].flags = HDWRPACE_INITED; 536 rs->rs_lowest_valid = i; 537 } else { 538 int err; 539 #ifdef RSS 540 hash_type = M_HASHTYPE_RSS_TCP_IPV4; 541 #else 542 hash_type = M_HASHTYPE_OPAQUE_HASH; 543 #endif 544 err = rl_attach_txrtlmt(ifp, 545 hash_type, 546 (i + 1), 547 rs->rs_rlt[i].rate, 548 &rs->rs_rlt[i].tag); 549 if (err) { 550 if (i == (rs->rs_rate_cnt - 1)) { 551 /* 552 * Huh - first rate and we can't get 553 * it? 554 */ 555 free(rs->rs_rlt, M_TCPPACE); 556 if (error) 557 *error = err; 558 goto bail; 559 } else { 560 if (error) 561 *error = err; 562 } 563 break; 564 } else { 565 rs->rs_rlt[i].flags = HDWRPACE_INITED | HDWRPACE_TAGPRESENT; 566 rs->rs_lowest_valid = i; 567 } 568 } 569 } 570 /* Did we get at least 1 rate? */ 571 if (rs->rs_rlt[(rs->rs_rate_cnt - 1)].flags & HDWRPACE_INITED) 572 rs->rs_highest_valid = rs->rs_rate_cnt - 1; 573 else { 574 free(rs->rs_rlt, M_TCPPACE); 575 goto bail; 576 } 577 rs_number_alive++; 578 sysctl_ctx_init(&rs->sysctl_ctx); 579 rl_sysctl_root = SYSCTL_ADD_NODE(&rs->sysctl_ctx, 580 SYSCTL_STATIC_CHILDREN(_net_inet_tcp_rl), 581 OID_AUTO, 582 rs->rs_ifp->if_xname, 583 CTLFLAG_RW, 0, 584 ""); 585 rl_add_syctl_entries(rl_sysctl_root, rs); 586 mtx_lock(&rs_mtx); 587 CK_LIST_INSERT_HEAD(&int_rs, rs, next); 588 mtx_unlock(&rs_mtx); 589 return (rs); 590 } 591 592 static const struct tcp_hwrate_limit_table * 593 tcp_int_find_suitable_rate(const struct tcp_rate_set *rs, 594 uint64_t bytes_per_sec, uint32_t flags) 595 { 596 struct tcp_hwrate_limit_table *arte = NULL, *rte = NULL; 597 uint64_t mbits_per_sec, ind_calc; 598 int i; 599 600 mbits_per_sec = (bytes_per_sec * 8); 601 if (flags & RS_PACING_LT) { 602 if ((mbits_per_sec < RS_ONE_MEGABIT_PERSEC) && 603 (rs->rs_lowest_valid <= 2)){ 604 /* 605 * Smaller than 1Meg, only 606 * 3 entries can match it. 607 */ 608 for(i = rs->rs_lowest_valid; i < 3; i++) { 609 if (bytes_per_sec <= rs->rs_rlt[i].rate) { 610 rte = &rs->rs_rlt[i]; 611 break; 612 } else if (rs->rs_rlt[i].flags & HDWRPACE_INITED) { 613 arte = &rs->rs_rlt[i]; 614 } 615 } 616 goto done; 617 } else if ((mbits_per_sec > RS_ONE_GIGABIT_PERSEC) && 618 (rs->rs_rlt[(ALL_HARDWARE_RATES-1)].flags & HDWRPACE_INITED)){ 619 /* 620 * Larger than 1G (the majority of 621 * our table. 622 */ 623 if (mbits_per_sec < RS_TEN_GIGABIT_PERSEC) 624 rte = &rs->rs_rlt[(ALL_HARDWARE_RATES-1)]; 625 else 626 arte = &rs->rs_rlt[(ALL_HARDWARE_RATES-1)]; 627 goto done; 628 } 629 /* 630 * If we reach here its in our table (between 1Meg - 1000Meg), 631 * just take the rounded down mbits per second, and add 632 * 1Megabit to it, from this we can calculate 633 * the index in the table. 634 */ 635 ind_calc = mbits_per_sec/RS_ONE_MEGABIT_PERSEC; 636 if ((ind_calc * RS_ONE_MEGABIT_PERSEC) != mbits_per_sec) 637 ind_calc++; 638 /* our table is offset by 3, we add 2 */ 639 ind_calc += 2; 640 if (ind_calc > (ALL_HARDWARE_RATES-1)) { 641 /* This should not happen */ 642 ind_calc = ALL_HARDWARE_RATES-1; 643 } 644 if ((ind_calc >= rs->rs_lowest_valid) && 645 (ind_calc <= rs->rs_highest_valid)) 646 rte = &rs->rs_rlt[ind_calc]; 647 } else if (flags & RS_PACING_EXACT_MATCH) { 648 if ((mbits_per_sec < RS_ONE_MEGABIT_PERSEC) && 649 (rs->rs_lowest_valid <= 2)){ 650 for(i = rs->rs_lowest_valid; i < 3; i++) { 651 if (bytes_per_sec == rs->rs_rlt[i].rate) { 652 rte = &rs->rs_rlt[i]; 653 break; 654 } 655 } 656 } else if ((mbits_per_sec > RS_ONE_GIGABIT_PERSEC) && 657 (rs->rs_rlt[(ALL_HARDWARE_RATES-1)].flags & HDWRPACE_INITED)) { 658 /* > 1Gbps only one rate */ 659 if (bytes_per_sec == rs->rs_rlt[(ALL_HARDWARE_RATES-1)].rate) { 660 /* Its 10G wow */ 661 rte = &rs->rs_rlt[(ALL_HARDWARE_RATES-1)]; 662 } 663 } else { 664 /* Ok it must be a exact meg (its between 1G and 1Meg) */ 665 ind_calc = mbits_per_sec/RS_ONE_MEGABIT_PERSEC; 666 if ((ind_calc * RS_ONE_MEGABIT_PERSEC) == mbits_per_sec) { 667 /* its an exact Mbps */ 668 ind_calc += 2; 669 if (ind_calc > (ALL_HARDWARE_RATES-1)) { 670 /* This should not happen */ 671 ind_calc = ALL_HARDWARE_RATES-1; 672 } 673 if (rs->rs_rlt[ind_calc].flags & HDWRPACE_INITED) 674 rte = &rs->rs_rlt[ind_calc]; 675 } 676 } 677 } else { 678 /* we want greater than the requested rate */ 679 if ((mbits_per_sec < RS_ONE_MEGABIT_PERSEC) && 680 (rs->rs_lowest_valid <= 2)){ 681 arte = &rs->rs_rlt[3]; /* set alternate to 1Meg */ 682 for (i=2; i>=rs->rs_lowest_valid; i--) { 683 if (bytes_per_sec < rs->rs_rlt[i].rate) { 684 rte = &rs->rs_rlt[i]; 685 break; 686 } else if ((flags & RS_PACING_GEQ) && 687 (bytes_per_sec == rs->rs_rlt[i].rate)) { 688 rte = &rs->rs_rlt[i]; 689 break; 690 } else { 691 arte = &rs->rs_rlt[i]; /* new alternate */ 692 } 693 } 694 } else if (mbits_per_sec > RS_ONE_GIGABIT_PERSEC) { 695 if ((bytes_per_sec < rs->rs_rlt[(ALL_HARDWARE_RATES-1)].rate) && 696 (rs->rs_rlt[(ALL_HARDWARE_RATES-1)].flags & HDWRPACE_INITED)){ 697 /* Our top rate is larger than the request */ 698 rte = &rs->rs_rlt[(ALL_HARDWARE_RATES-1)]; 699 } else if ((flags & RS_PACING_GEQ) && 700 (bytes_per_sec == rs->rs_rlt[(ALL_HARDWARE_RATES-1)].rate) && 701 (rs->rs_rlt[(ALL_HARDWARE_RATES-1)].flags & HDWRPACE_INITED)) { 702 /* It matches our top rate */ 703 rte = &rs->rs_rlt[(ALL_HARDWARE_RATES-1)]; 704 } else if (rs->rs_rlt[(ALL_HARDWARE_RATES-1)].flags & HDWRPACE_INITED) { 705 /* The top rate is an alternative */ 706 arte = &rs->rs_rlt[(ALL_HARDWARE_RATES-1)]; 707 } 708 } else { 709 /* Its in our range 1Meg - 1Gig */ 710 if (flags & RS_PACING_GEQ) { 711 ind_calc = mbits_per_sec/RS_ONE_MEGABIT_PERSEC; 712 if ((ind_calc * RS_ONE_MEGABIT_PERSEC) == mbits_per_sec) { 713 if (ind_calc > (ALL_HARDWARE_RATES-1)) { 714 /* This should not happen */ 715 ind_calc = (ALL_HARDWARE_RATES-1); 716 } 717 rte = &rs->rs_rlt[ind_calc]; 718 } 719 goto done; 720 } 721 ind_calc = (mbits_per_sec + (RS_ONE_MEGABIT_PERSEC-1))/RS_ONE_MEGABIT_PERSEC; 722 ind_calc += 2; 723 if (ind_calc > (ALL_HARDWARE_RATES-1)) { 724 /* This should not happen */ 725 ind_calc = ALL_HARDWARE_RATES-1; 726 } 727 if (rs->rs_rlt[ind_calc].flags & HDWRPACE_INITED) 728 rte = &rs->rs_rlt[ind_calc]; 729 } 730 } 731 done: 732 if ((rte == NULL) && 733 (arte != NULL) && 734 (flags & RS_PACING_SUB_OK)) { 735 /* We can use the substitute */ 736 rte = arte; 737 } 738 return (rte); 739 } 740 741 static const struct tcp_hwrate_limit_table * 742 tcp_find_suitable_rate(const struct tcp_rate_set *rs, uint64_t bytes_per_sec, uint32_t flags) 743 { 744 /** 745 * Hunt the rate table with the restrictions in flags and find a 746 * suitable rate if possible. 747 * RS_PACING_EXACT_MATCH - look for an exact match to rate. 748 * RS_PACING_GT - must be greater than. 749 * RS_PACING_GEQ - must be greater than or equal. 750 * RS_PACING_LT - must be less than. 751 * RS_PACING_SUB_OK - If we don't meet criteria a 752 * substitute is ok. 753 */ 754 int i, matched; 755 struct tcp_hwrate_limit_table *rte = NULL; 756 757 758 if ((rs->rs_flags & RS_INT_TBL) && 759 (rs->rs_rate_cnt >= ALL_HARDWARE_RATES)) { 760 /* 761 * Here we don't want to paw thru 762 * a big table, we have everything 763 * from 1Meg - 1000Meg in 1Meg increments. 764 * Use an alternate method to "lookup". 765 */ 766 return (tcp_int_find_suitable_rate(rs, bytes_per_sec, flags)); 767 } 768 if ((flags & RS_PACING_LT) || 769 (flags & RS_PACING_EXACT_MATCH)) { 770 /* 771 * For exact and less than we go forward through the table. 772 * This way when we find one larger we stop (exact was a 773 * toss up). 774 */ 775 for (i = rs->rs_lowest_valid, matched = 0; i <= rs->rs_highest_valid; i++) { 776 if ((flags & RS_PACING_EXACT_MATCH) && 777 (bytes_per_sec == rs->rs_rlt[i].rate)) { 778 rte = &rs->rs_rlt[i]; 779 matched = 1; 780 break; 781 } else if ((flags & RS_PACING_LT) && 782 (bytes_per_sec <= rs->rs_rlt[i].rate)) { 783 rte = &rs->rs_rlt[i]; 784 matched = 1; 785 break; 786 } 787 if (bytes_per_sec > rs->rs_rlt[i].rate) 788 break; 789 } 790 if ((matched == 0) && 791 (flags & RS_PACING_LT) && 792 (flags & RS_PACING_SUB_OK)) { 793 /* Kick in a substitute (the lowest) */ 794 rte = &rs->rs_rlt[rs->rs_lowest_valid]; 795 } 796 } else { 797 /* 798 * Here we go backward through the table so that we can find 799 * the one greater in theory faster (but its probably a 800 * wash). 801 */ 802 for (i = rs->rs_highest_valid, matched = 0; i >= rs->rs_lowest_valid; i--) { 803 if (rs->rs_rlt[i].rate > bytes_per_sec) { 804 /* A possible candidate */ 805 rte = &rs->rs_rlt[i]; 806 } 807 if ((flags & RS_PACING_GEQ) && 808 (bytes_per_sec == rs->rs_rlt[i].rate)) { 809 /* An exact match and we want equal */ 810 matched = 1; 811 rte = &rs->rs_rlt[i]; 812 break; 813 } else if (rte) { 814 /* 815 * Found one that is larger than but don't 816 * stop, there may be a more closer match. 817 */ 818 matched = 1; 819 } 820 if (rs->rs_rlt[i].rate < bytes_per_sec) { 821 /* 822 * We found a table entry that is smaller, 823 * stop there will be none greater or equal. 824 */ 825 break; 826 } 827 } 828 if ((matched == 0) && 829 (flags & RS_PACING_SUB_OK)) { 830 /* Kick in a substitute (the highest) */ 831 rte = &rs->rs_rlt[rs->rs_highest_valid]; 832 } 833 } 834 return (rte); 835 } 836 837 static struct ifnet * 838 rt_find_real_interface(struct ifnet *ifp, struct inpcb *inp, int *error) 839 { 840 struct ifnet *tifp; 841 struct m_snd_tag *tag; 842 union if_snd_tag_alloc_params params = { 843 .rate_limit.hdr.type = IF_SND_TAG_TYPE_RATE_LIMIT, 844 .rate_limit.hdr.flowid = 1, 845 .rate_limit.max_rate = COMMON_RATE, 846 .rate_limit.flags = M_NOWAIT, 847 }; 848 int err; 849 #ifdef RSS 850 params.rate_limit.hdr.flowtype = ((inp->inp_vflag & INP_IPV6) ? 851 M_HASHTYPE_RSS_TCP_IPV6 : M_HASHTYPE_RSS_TCP_IPV4); 852 #else 853 params.rate_limit.hdr.flowtype = M_HASHTYPE_OPAQUE_HASH; 854 #endif 855 tag = NULL; 856 if (ifp->if_snd_tag_alloc) { 857 if (error) 858 *error = ENODEV; 859 return (NULL); 860 } 861 err = ifp->if_snd_tag_alloc(ifp, ¶ms, &tag); 862 if (err) { 863 /* Failed to setup a tag? */ 864 if (error) 865 *error = err; 866 return (NULL); 867 } 868 tifp = tag->ifp; 869 tifp->if_snd_tag_free(tag); 870 return (tifp); 871 } 872 873 static const struct tcp_hwrate_limit_table * 874 rt_setup_rate(struct inpcb *inp, struct ifnet *ifp, uint64_t bytes_per_sec, 875 uint32_t flags, int *error) 876 { 877 /* First lets find the interface if it exists */ 878 const struct tcp_hwrate_limit_table *rte; 879 struct tcp_rate_set *rs; 880 struct epoch_tracker et; 881 int err; 882 883 NET_EPOCH_ENTER(et); 884 use_real_interface: 885 CK_LIST_FOREACH(rs, &int_rs, next) { 886 /* 887 * Note we don't look with the lock since we either see a 888 * new entry or will get one when we try to add it. 889 */ 890 if (rs->rs_flags & RS_IS_DEAD) { 891 /* The dead are not looked at */ 892 continue; 893 } 894 if ((rs->rs_ifp == ifp) && 895 (rs->rs_if_dunit == ifp->if_dunit)) { 896 /* Ok we found it */ 897 break; 898 } 899 } 900 if ((rs == NULL) || 901 (rs->rs_flags & RS_INTF_NO_SUP) || 902 (rs->rs_flags & RS_IS_DEAD)) { 903 /* 904 * This means we got a packet *before* 905 * the IF-UP was processed below, <or> 906 * while or after we already received an interface 907 * departed event. In either case we really don't 908 * want to do anything with pacing, in 909 * the departing case the packet is not 910 * going to go very far. The new case 911 * might be arguable, but its impossible 912 * to tell from the departing case. 913 */ 914 if (rs->rs_disable && error) 915 *error = ENODEV; 916 NET_EPOCH_EXIT(et); 917 return (NULL); 918 } 919 920 if ((rs == NULL) || (rs->rs_disable != 0)) { 921 if (rs->rs_disable && error) 922 *error = ENOSPC; 923 NET_EPOCH_EXIT(et); 924 return (NULL); 925 } 926 if (rs->rs_flags & RS_IS_DEFF) { 927 /* We need to find the real interface */ 928 struct ifnet *tifp; 929 930 tifp = rt_find_real_interface(ifp, inp, error); 931 if (tifp == NULL) { 932 if (rs->rs_disable && error) 933 *error = ENOTSUP; 934 NET_EPOCH_EXIT(et); 935 return (NULL); 936 } 937 goto use_real_interface; 938 } 939 if (rs->rs_flow_limit && 940 ((rs->rs_flows_using + 1) > rs->rs_flow_limit)) { 941 if (error) 942 *error = ENOSPC; 943 NET_EPOCH_EXIT(et); 944 return (NULL); 945 } 946 rte = tcp_find_suitable_rate(rs, bytes_per_sec, flags); 947 if (rte) { 948 err = in_pcbattach_txrtlmt(inp, rs->rs_ifp, 949 inp->inp_flowtype, 950 inp->inp_flowid, 951 rte->rate, 952 &inp->inp_snd_tag); 953 if (err) { 954 /* Failed to attach */ 955 if (error) 956 *error = err; 957 rte = NULL; 958 } 959 } 960 if (rte) { 961 /* 962 * We use an atomic here for accounting so we don't have to 963 * use locks when freeing. 964 */ 965 atomic_add_64(&rs->rs_flows_using, 1); 966 } 967 NET_EPOCH_EXIT(et); 968 return (rte); 969 } 970 971 static void 972 tcp_rl_ifnet_link(void *arg __unused, struct ifnet *ifp, int link_state) 973 { 974 int error; 975 struct tcp_rate_set *rs; 976 977 if (((ifp->if_capabilities & IFCAP_TXRTLMT) == 0) || 978 (link_state != LINK_STATE_UP)) { 979 /* 980 * We only care on an interface going up that is rate-limit 981 * capable. 982 */ 983 return; 984 } 985 mtx_lock(&rs_mtx); 986 CK_LIST_FOREACH(rs, &int_rs, next) { 987 if ((rs->rs_ifp == ifp) && 988 (rs->rs_if_dunit == ifp->if_dunit)) { 989 /* We already have initialized this guy */ 990 mtx_unlock(&rs_mtx); 991 return; 992 } 993 } 994 mtx_unlock(&rs_mtx); 995 rt_setup_new_rs(ifp, &error); 996 } 997 998 static void 999 tcp_rl_ifnet_departure(void *arg __unused, struct ifnet *ifp) 1000 { 1001 struct tcp_rate_set *rs, *nrs; 1002 struct ifnet *tifp; 1003 int i; 1004 1005 mtx_lock(&rs_mtx); 1006 CK_LIST_FOREACH_SAFE(rs, &int_rs, next, nrs) { 1007 if ((rs->rs_ifp == ifp) && 1008 (rs->rs_if_dunit == ifp->if_dunit)) { 1009 CK_LIST_REMOVE(rs, next); 1010 rs_number_alive--; 1011 rs->rs_flags |= RS_IS_DEAD; 1012 for (i = 0; i < rs->rs_rate_cnt; i++) { 1013 if (rs->rs_rlt[i].flags & HDWRPACE_TAGPRESENT) { 1014 tifp = rs->rs_rlt[i].tag->ifp; 1015 in_pcbdetach_tag(tifp, rs->rs_rlt[i].tag); 1016 rs->rs_rlt[i].tag = NULL; 1017 } 1018 rs->rs_rlt[i].flags = HDWRPACE_IFPDEPARTED; 1019 } 1020 if (rs->rs_flows_using == 0) 1021 rs_defer_destroy(rs); 1022 break; 1023 } 1024 } 1025 mtx_unlock(&rs_mtx); 1026 } 1027 1028 static void 1029 tcp_rl_shutdown(void *arg __unused, int howto __unused) 1030 { 1031 struct tcp_rate_set *rs, *nrs; 1032 struct ifnet *tifp; 1033 int i; 1034 1035 mtx_lock(&rs_mtx); 1036 CK_LIST_FOREACH_SAFE(rs, &int_rs, next, nrs) { 1037 CK_LIST_REMOVE(rs, next); 1038 rs_number_alive--; 1039 rs->rs_flags |= RS_IS_DEAD; 1040 for (i = 0; i < rs->rs_rate_cnt; i++) { 1041 if (rs->rs_rlt[i].flags & HDWRPACE_TAGPRESENT) { 1042 tifp = rs->rs_rlt[i].tag->ifp; 1043 in_pcbdetach_tag(tifp, rs->rs_rlt[i].tag); 1044 rs->rs_rlt[i].tag = NULL; 1045 } 1046 rs->rs_rlt[i].flags = HDWRPACE_IFPDEPARTED; 1047 } 1048 if (rs->rs_flows_using == 0) 1049 rs_defer_destroy(rs); 1050 } 1051 mtx_unlock(&rs_mtx); 1052 } 1053 1054 const struct tcp_hwrate_limit_table * 1055 tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp, 1056 uint64_t bytes_per_sec, int flags, int *error) 1057 { 1058 const struct tcp_hwrate_limit_table *rte; 1059 1060 if (tp->t_inpcb->inp_snd_tag == NULL) { 1061 /* 1062 * We are setting up a rate for the first time. 1063 */ 1064 if ((ifp->if_capabilities & IFCAP_TXRTLMT) == 0) { 1065 /* Not supported by the egress */ 1066 if (error) 1067 *error = ENODEV; 1068 return (NULL); 1069 } 1070 #ifdef KERN_TLS 1071 if (tp->t_inpcb->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) { 1072 /* 1073 * We currently can't do both TLS and hardware 1074 * pacing 1075 */ 1076 if (error) 1077 *error = EINVAL; 1078 return (NULL); 1079 } 1080 #endif 1081 rte = rt_setup_rate(tp->t_inpcb, ifp, bytes_per_sec, flags, error); 1082 } else { 1083 /* 1084 * We are modifying a rate, wrong interface? 1085 */ 1086 if (error) 1087 *error = EINVAL; 1088 rte = NULL; 1089 } 1090 return (rte); 1091 } 1092 1093 const struct tcp_hwrate_limit_table * 1094 tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table *crte, 1095 struct tcpcb *tp, struct ifnet *ifp, 1096 uint64_t bytes_per_sec, int flags, int *error) 1097 { 1098 const struct tcp_hwrate_limit_table *nrte; 1099 const struct tcp_rate_set *rs; 1100 int is_indirect = 0; 1101 int err; 1102 1103 1104 if ((tp->t_inpcb->inp_snd_tag == NULL) || 1105 (crte == NULL)) { 1106 /* Wrong interface */ 1107 if (error) 1108 *error = EINVAL; 1109 return (NULL); 1110 } 1111 rs = crte->ptbl; 1112 if ((rs->rs_flags & RS_IS_DEAD) || 1113 (crte->flags & HDWRPACE_IFPDEPARTED)) { 1114 /* Release the rate, and try anew */ 1115 re_rate: 1116 tcp_rel_pacing_rate(crte, tp); 1117 nrte = tcp_set_pacing_rate(tp, ifp, 1118 bytes_per_sec, flags, error); 1119 return (nrte); 1120 } 1121 if ((rs->rs_flags & RT_IS_INDIRECT ) == RT_IS_INDIRECT) 1122 is_indirect = 1; 1123 else 1124 is_indirect = 0; 1125 if ((is_indirect == 0) && 1126 ((ifp != rs->rs_ifp) || 1127 (ifp->if_dunit != rs->rs_if_dunit))) { 1128 /* 1129 * Something changed, the user is not pointing to the same 1130 * ifp? Maybe a route updated on this guy? 1131 */ 1132 goto re_rate; 1133 } else if (is_indirect) { 1134 /* 1135 * For indirect we have to dig in and find the real interface. 1136 */ 1137 struct ifnet *rifp; 1138 1139 rifp = rt_find_real_interface(ifp, tp->t_inpcb, error); 1140 if (rifp == NULL) { 1141 /* Can't find it? */ 1142 goto re_rate; 1143 } 1144 if ((rifp != rs->rs_ifp) || 1145 (ifp->if_dunit != rs->rs_if_dunit)) { 1146 goto re_rate; 1147 } 1148 } 1149 nrte = tcp_find_suitable_rate(rs, bytes_per_sec, flags); 1150 if (nrte == crte) { 1151 /* No change */ 1152 if (error) 1153 *error = 0; 1154 return (crte); 1155 } 1156 if (nrte == NULL) { 1157 /* Release the old rate */ 1158 tcp_rel_pacing_rate(crte, tp); 1159 return (NULL); 1160 } 1161 /* Change rates to our new entry */ 1162 err = in_pcbmodify_txrtlmt(tp->t_inpcb, nrte->rate); 1163 if (err) { 1164 if (error) 1165 *error = err; 1166 return (NULL); 1167 } 1168 if (error) 1169 *error = 0; 1170 return (nrte); 1171 } 1172 1173 void 1174 tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table *crte, struct tcpcb *tp) 1175 { 1176 const struct tcp_rate_set *crs; 1177 struct tcp_rate_set *rs; 1178 uint64_t pre; 1179 1180 crs = crte->ptbl; 1181 /* 1182 * Now we must break the const 1183 * in order to release our refcount. 1184 */ 1185 rs = __DECONST(struct tcp_rate_set *, crs); 1186 pre = atomic_fetchadd_64(&rs->rs_flows_using, -1); 1187 if (pre == 1) { 1188 mtx_lock(&rs_mtx); 1189 /* 1190 * Is it dead? 1191 */ 1192 if (rs->rs_flags & RS_IS_DEAD) 1193 rs_defer_destroy(rs); 1194 mtx_unlock(&rs_mtx); 1195 } 1196 in_pcbdetach_txrtlmt(tp->t_inpcb); 1197 } 1198 1199 static eventhandler_tag rl_ifnet_departs; 1200 static eventhandler_tag rl_ifnet_arrives; 1201 static eventhandler_tag rl_shutdown_start; 1202 1203 static void 1204 tcp_rs_init(void *st __unused) 1205 { 1206 CK_LIST_INIT(&int_rs); 1207 rs_number_alive = 0; 1208 rs_number_dead = 0;; 1209 mtx_init(&rs_mtx, "tcp_rs_mtx", "rsmtx", MTX_DEF); 1210 rl_ifnet_departs = EVENTHANDLER_REGISTER(ifnet_departure_event, 1211 tcp_rl_ifnet_departure, 1212 NULL, EVENTHANDLER_PRI_ANY); 1213 rl_ifnet_arrives = EVENTHANDLER_REGISTER(ifnet_link_event, 1214 tcp_rl_ifnet_link, 1215 NULL, EVENTHANDLER_PRI_ANY); 1216 rl_shutdown_start = EVENTHANDLER_REGISTER(shutdown_pre_sync, 1217 tcp_rl_shutdown, NULL, 1218 SHUTDOWN_PRI_FIRST); 1219 printf("TCP_ratelimit: Is now initialized\n"); 1220 } 1221 1222 SYSINIT(tcp_rl_init, SI_SUB_SMP + 1, SI_ORDER_ANY, tcp_rs_init, NULL); 1223 #endif 1224