1 /*- 2 * 3 * SPDX-License-Identifier: BSD-3-Clause 4 * 5 * Copyright (c) 2018-2019 6 * Netflix Inc. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 /** 32 * Author: Randall Stewart <rrs@netflix.com> 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 #include "opt_inet.h" 38 #include "opt_inet6.h" 39 #include "opt_ipsec.h" 40 #include "opt_tcpdebug.h" 41 #include "opt_ratelimit.h" 42 #include <sys/param.h> 43 #include <sys/kernel.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/sysctl.h> 49 #include <sys/eventhandler.h> 50 #include <sys/mutex.h> 51 #include <sys/ck.h> 52 #define TCPSTATES /* for logging */ 53 #include <netinet/in.h> 54 #include <netinet/in_pcb.h> 55 #include <netinet/tcp_var.h> 56 #ifdef INET6 57 #include <netinet6/tcp6_var.h> 58 #endif 59 #include <netinet/tcp_ratelimit.h> 60 #ifndef USECS_IN_SECOND 61 #define USECS_IN_SECOND 1000000 62 #endif 63 /* 64 * For the purposes of each send, what is the size 65 * of an ethernet frame. 66 */ 67 #ifndef ETHERNET_SEGMENT_SIZE 68 #define ETHERNET_SEGMENT_SIZE 1500 69 #endif 70 MALLOC_DEFINE(M_TCPPACE, "tcp_hwpace", "TCP Hardware pacing memory"); 71 #ifdef RATELIMIT 72 73 #define COMMON_RATE 180500 74 uint64_t desired_rates[] = { 75 62500, /* 500Kbps */ 76 180500, /* 1.44Mpbs */ 77 375000, /* 3Mbps */ 78 500000, /* 4Mbps */ 79 625000, /* 5Mbps */ 80 750000, /* 6Mbps */ 81 1000000, /* 8Mbps */ 82 1250000, /* 10Mbps */ 83 2500000, /* 20Mbps */ 84 3750000, /* 30Mbps */ 85 5000000, /* 40Meg */ 86 6250000, /* 50Mbps */ 87 12500000, /* 100Mbps */ 88 25000000, /* 200Mbps */ 89 50000000, /* 400Mbps */ 90 100000000, /* 800Mbps */ 91 12500, /* 100kbps */ 92 25000, /* 200kbps */ 93 875000, /* 7Mbps */ 94 1125000, /* 9Mbps */ 95 1875000, /* 15Mbps */ 96 3125000, /* 25Mbps */ 97 8125000, /* 65Mbps */ 98 10000000, /* 80Mbps */ 99 18750000, /* 150Mbps */ 100 20000000, /* 250Mbps */ 101 37500000, /* 350Mbps */ 102 62500000, /* 500Mbps */ 103 78125000, /* 625Mbps */ 104 125000000, /* 1Gbps */ 105 }; 106 #define MAX_HDWR_RATES (sizeof(desired_rates)/sizeof(uint64_t)) 107 #define RS_ORDERED_COUNT 16 /* 108 * Number that are in order 109 * at the beginning of the table, 110 * over this a sort is required. 111 */ 112 #define RS_NEXT_ORDER_GROUP 16 /* 113 * The point in our table where 114 * we come fill in a second ordered 115 * group (index wise means -1). 116 */ 117 #define ALL_HARDWARE_RATES 1004 /* 118 * 1Meg - 1Gig in 1 Meg steps 119 * plus 100, 200k and 500k and 120 * 10Gig 121 */ 122 123 #define RS_ONE_MEGABIT_PERSEC 1000000 124 #define RS_ONE_GIGABIT_PERSEC 1000000000 125 #define RS_TEN_GIGABIT_PERSEC 10000000000 126 127 static struct head_tcp_rate_set int_rs; 128 static struct mtx rs_mtx; 129 uint32_t rs_number_alive; 130 uint32_t rs_number_dead; 131 132 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, rl, CTLFLAG_RW, 0, 133 "TCP Ratelimit stats"); 134 SYSCTL_UINT(_net_inet_tcp_rl, OID_AUTO, alive, CTLFLAG_RW, 135 &rs_number_alive, 0, 136 "Number of interfaces initialized for ratelimiting"); 137 SYSCTL_UINT(_net_inet_tcp_rl, OID_AUTO, dead, CTLFLAG_RW, 138 &rs_number_dead, 0, 139 "Number of interfaces departing from ratelimiting"); 140 141 static void 142 rl_add_syctl_entries(struct sysctl_oid *rl_sysctl_root, struct tcp_rate_set *rs) 143 { 144 /* 145 * Add sysctl entries for thus interface. 146 */ 147 if (rs->rs_flags & RS_INTF_NO_SUP) { 148 SYSCTL_ADD_S32(&rs->sysctl_ctx, 149 SYSCTL_CHILDREN(rl_sysctl_root), 150 OID_AUTO, "disable", CTLFLAG_RD, 151 &rs->rs_disable, 0, 152 "Disable this interface from new hdwr limiting?"); 153 } else { 154 SYSCTL_ADD_S32(&rs->sysctl_ctx, 155 SYSCTL_CHILDREN(rl_sysctl_root), 156 OID_AUTO, "disable", CTLFLAG_RW, 157 &rs->rs_disable, 0, 158 "Disable this interface from new hdwr limiting?"); 159 } 160 SYSCTL_ADD_S32(&rs->sysctl_ctx, 161 SYSCTL_CHILDREN(rl_sysctl_root), 162 OID_AUTO, "minseg", CTLFLAG_RW, 163 &rs->rs_min_seg, 0, 164 "What is the minimum we need to send on this interface?"); 165 SYSCTL_ADD_U64(&rs->sysctl_ctx, 166 SYSCTL_CHILDREN(rl_sysctl_root), 167 OID_AUTO, "flow_limit", CTLFLAG_RW, 168 &rs->rs_flow_limit, 0, 169 "What is the limit for number of flows (0=unlimited)?"); 170 SYSCTL_ADD_S32(&rs->sysctl_ctx, 171 SYSCTL_CHILDREN(rl_sysctl_root), 172 OID_AUTO, "highest", CTLFLAG_RD, 173 &rs->rs_highest_valid, 0, 174 "Highest valid rate"); 175 SYSCTL_ADD_S32(&rs->sysctl_ctx, 176 SYSCTL_CHILDREN(rl_sysctl_root), 177 OID_AUTO, "lowest", CTLFLAG_RD, 178 &rs->rs_lowest_valid, 0, 179 "Lowest valid rate"); 180 SYSCTL_ADD_S32(&rs->sysctl_ctx, 181 SYSCTL_CHILDREN(rl_sysctl_root), 182 OID_AUTO, "flags", CTLFLAG_RD, 183 &rs->rs_flags, 0, 184 "What lags are on the entry?"); 185 SYSCTL_ADD_S32(&rs->sysctl_ctx, 186 SYSCTL_CHILDREN(rl_sysctl_root), 187 OID_AUTO, "numrates", CTLFLAG_RD, 188 &rs->rs_rate_cnt, 0, 189 "How many rates re there?"); 190 SYSCTL_ADD_U64(&rs->sysctl_ctx, 191 SYSCTL_CHILDREN(rl_sysctl_root), 192 OID_AUTO, "flows_using", CTLFLAG_RD, 193 &rs->rs_flows_using, 0, 194 "How many flows are using this interface now?"); 195 #ifdef DETAILED_RATELIMIT_SYSCTL 196 if (rs->rs_rlt && rs->rs_rate_cnt > 0) { 197 /* Lets display the rates */ 198 int i; 199 struct sysctl_oid *rl_rates; 200 struct sysctl_oid *rl_rate_num; 201 char rate_num[16]; 202 rl_rates = SYSCTL_ADD_NODE(&rs->sysctl_ctx, 203 SYSCTL_CHILDREN(rl_sysctl_root), 204 OID_AUTO, 205 "rate", 206 CTLFLAG_RW, 0, 207 "Ratelist"); 208 for( i = 0; i < rs->rs_rate_cnt; i++) { 209 sprintf(rate_num, "%d", i); 210 rl_rate_num = SYSCTL_ADD_NODE(&rs->sysctl_ctx, 211 SYSCTL_CHILDREN(rl_rates), 212 OID_AUTO, 213 rate_num, 214 CTLFLAG_RW, 0, 215 "Individual Rate"); 216 SYSCTL_ADD_U32(&rs->sysctl_ctx, 217 SYSCTL_CHILDREN(rl_rate_num), 218 OID_AUTO, "flags", CTLFLAG_RD, 219 &rs->rs_rlt[i].flags, 0, 220 "Flags on this rate"); 221 SYSCTL_ADD_U32(&rs->sysctl_ctx, 222 SYSCTL_CHILDREN(rl_rate_num), 223 OID_AUTO, "pacetime", CTLFLAG_RD, 224 &rs->rs_rlt[i].time_between, 0, 225 "Time hardware inserts between 1500 byte sends"); 226 SYSCTL_ADD_U64(&rs->sysctl_ctx, 227 SYSCTL_CHILDREN(rl_rate_num), 228 OID_AUTO, "rate", CTLFLAG_RD, 229 &rs->rs_rlt[i].rate, 0, 230 "Rate in bytes per second"); 231 } 232 } 233 #endif 234 } 235 236 static void 237 rs_destroy(epoch_context_t ctx) 238 { 239 struct tcp_rate_set *rs; 240 bool do_free_rs; 241 242 rs = __containerof(ctx, struct tcp_rate_set, rs_epoch_ctx); 243 244 mtx_lock(&rs_mtx); 245 rs->rs_flags &= ~RS_FUNERAL_SCHD; 246 /* 247 * In theory its possible (but unlikely) 248 * that while the delete was occuring 249 * and we were applying the DEAD flag 250 * someone slipped in and found the 251 * interface in a lookup. While we 252 * decided rs_flows_using were 0 and 253 * scheduling the epoch_call, the other 254 * thread incremented rs_flow_using. This 255 * is because users have a pointer and 256 * we only use the rs_flows_using in an 257 * atomic fashion, i.e. the other entities 258 * are not protected. To assure this did 259 * not occur, we check rs_flows_using here 260 * before deleting. 261 */ 262 do_free_rs = (rs->rs_flows_using == 0); 263 rs_number_dead--; 264 mtx_unlock(&rs_mtx); 265 266 if (do_free_rs) { 267 sysctl_ctx_free(&rs->sysctl_ctx); 268 free(rs->rs_rlt, M_TCPPACE); 269 free(rs, M_TCPPACE); 270 } 271 } 272 273 static void 274 rs_defer_destroy(struct tcp_rate_set *rs) 275 { 276 277 mtx_assert(&rs_mtx, MA_OWNED); 278 279 /* Check if already pending. */ 280 if (rs->rs_flags & RS_FUNERAL_SCHD) 281 return; 282 283 rs_number_dead++; 284 285 /* Set flag to only defer once. */ 286 rs->rs_flags |= RS_FUNERAL_SCHD; 287 epoch_call(net_epoch, &rs->rs_epoch_ctx, rs_destroy); 288 } 289 290 #ifdef INET 291 extern counter_u64_t rate_limit_set_ok; 292 extern counter_u64_t rate_limit_active; 293 extern counter_u64_t rate_limit_alloc_fail; 294 #endif 295 296 static int 297 rl_attach_txrtlmt(struct ifnet *ifp, 298 uint32_t flowtype, 299 int flowid, 300 uint64_t cfg_rate, 301 struct m_snd_tag **tag) 302 { 303 int error; 304 union if_snd_tag_alloc_params params = { 305 .rate_limit.hdr.type = IF_SND_TAG_TYPE_RATE_LIMIT, 306 .rate_limit.hdr.flowid = flowid, 307 .rate_limit.hdr.flowtype = flowtype, 308 .rate_limit.max_rate = cfg_rate, 309 .rate_limit.flags = M_NOWAIT, 310 }; 311 312 if (ifp->if_snd_tag_alloc == NULL) { 313 error = EOPNOTSUPP; 314 } else { 315 error = ifp->if_snd_tag_alloc(ifp, ¶ms, tag); 316 #ifdef INET 317 if (error == 0) { 318 if_ref((*tag)->ifp); 319 counter_u64_add(rate_limit_set_ok, 1); 320 counter_u64_add(rate_limit_active, 1); 321 } else 322 counter_u64_add(rate_limit_alloc_fail, 1); 323 #endif 324 } 325 return (error); 326 } 327 328 static void 329 populate_canned_table(struct tcp_rate_set *rs, const uint64_t *rate_table_act) 330 { 331 /* 332 * The internal table is "special", it 333 * is two seperate ordered tables that 334 * must be merged. We get here when the 335 * adapter specifies a number of rates that 336 * covers both ranges in the table in some 337 * form. 338 */ 339 int i, at_low, at_high; 340 uint8_t low_disabled = 0, high_disabled = 0; 341 342 for(i = 0, at_low = 0, at_high = RS_NEXT_ORDER_GROUP; i < rs->rs_rate_cnt; i++) { 343 rs->rs_rlt[i].flags = 0; 344 rs->rs_rlt[i].time_between = 0; 345 if ((low_disabled == 0) && 346 (high_disabled || 347 (rate_table_act[at_low] < rate_table_act[at_high]))) { 348 rs->rs_rlt[i].rate = rate_table_act[at_low]; 349 at_low++; 350 if (at_low == RS_NEXT_ORDER_GROUP) 351 low_disabled = 1; 352 } else if (high_disabled == 0) { 353 rs->rs_rlt[i].rate = rate_table_act[at_high]; 354 at_high++; 355 if (at_high == MAX_HDWR_RATES) 356 high_disabled = 1; 357 } 358 } 359 } 360 361 static struct tcp_rate_set * 362 rt_setup_new_rs(struct ifnet *ifp, int *error) 363 { 364 struct tcp_rate_set *rs; 365 const uint64_t *rate_table_act; 366 uint64_t lentim, res; 367 size_t sz; 368 uint32_t hash_type; 369 int i; 370 struct if_ratelimit_query_results rl; 371 struct sysctl_oid *rl_sysctl_root; 372 /* 373 * We expect to enter with the 374 * mutex locked. 375 */ 376 377 if (ifp->if_ratelimit_query == NULL) { 378 /* 379 * We can do nothing if we cannot 380 * get a query back from the driver. 381 */ 382 return (NULL); 383 } 384 rs = malloc(sizeof(struct tcp_rate_set), M_TCPPACE, M_NOWAIT | M_ZERO); 385 if (rs == NULL) { 386 if (error) 387 *error = ENOMEM; 388 return (NULL); 389 } 390 rl.flags = RT_NOSUPPORT; 391 ifp->if_ratelimit_query(ifp, &rl); 392 if (rl.flags & RT_IS_UNUSABLE) { 393 /* 394 * The interface does not really support 395 * the rate-limiting. 396 */ 397 memset(rs, 0, sizeof(struct tcp_rate_set)); 398 rs->rs_ifp = ifp; 399 rs->rs_if_dunit = ifp->if_dunit; 400 rs->rs_flags = RS_INTF_NO_SUP; 401 rs->rs_disable = 1; 402 rs_number_alive++; 403 sysctl_ctx_init(&rs->sysctl_ctx); 404 rl_sysctl_root = SYSCTL_ADD_NODE(&rs->sysctl_ctx, 405 SYSCTL_STATIC_CHILDREN(_net_inet_tcp_rl), 406 OID_AUTO, 407 rs->rs_ifp->if_xname, 408 CTLFLAG_RW, 0, 409 ""); 410 rl_add_syctl_entries(rl_sysctl_root, rs); 411 mtx_lock(&rs_mtx); 412 CK_LIST_INSERT_HEAD(&int_rs, rs, next); 413 mtx_unlock(&rs_mtx); 414 return (rs); 415 } else if ((rl.flags & RT_IS_INDIRECT) == RT_IS_INDIRECT) { 416 memset(rs, 0, sizeof(struct tcp_rate_set)); 417 rs->rs_ifp = ifp; 418 rs->rs_if_dunit = ifp->if_dunit; 419 rs->rs_flags = RS_IS_DEFF; 420 rs_number_alive++; 421 sysctl_ctx_init(&rs->sysctl_ctx); 422 rl_sysctl_root = SYSCTL_ADD_NODE(&rs->sysctl_ctx, 423 SYSCTL_STATIC_CHILDREN(_net_inet_tcp_rl), 424 OID_AUTO, 425 rs->rs_ifp->if_xname, 426 CTLFLAG_RW, 0, 427 ""); 428 rl_add_syctl_entries(rl_sysctl_root, rs); 429 mtx_lock(&rs_mtx); 430 CK_LIST_INSERT_HEAD(&int_rs, rs, next); 431 mtx_unlock(&rs_mtx); 432 return (rs); 433 } else if ((rl.flags & RT_IS_FIXED_TABLE) == RT_IS_FIXED_TABLE) { 434 /* Mellanox most likely */ 435 rs->rs_ifp = ifp; 436 rs->rs_if_dunit = ifp->if_dunit; 437 rs->rs_rate_cnt = rl.number_of_rates; 438 rs->rs_min_seg = rl.min_segment_burst; 439 rs->rs_highest_valid = 0; 440 rs->rs_flow_limit = rl.max_flows; 441 rs->rs_flags = RS_IS_INTF | RS_NO_PRE; 442 rs->rs_disable = 0; 443 rate_table_act = rl.rate_table; 444 } else if ((rl.flags & RT_IS_SELECTABLE) == RT_IS_SELECTABLE) { 445 /* Chelsio */ 446 rs->rs_ifp = ifp; 447 rs->rs_if_dunit = ifp->if_dunit; 448 rs->rs_rate_cnt = rl.number_of_rates; 449 rs->rs_min_seg = rl.min_segment_burst; 450 rs->rs_disable = 0; 451 rs->rs_flow_limit = rl.max_flows; 452 rate_table_act = desired_rates; 453 if ((rs->rs_rate_cnt > MAX_HDWR_RATES) && 454 (rs->rs_rate_cnt < ALL_HARDWARE_RATES)) { 455 /* 456 * Our desired table is not big 457 * enough, do what we can. 458 */ 459 rs->rs_rate_cnt = MAX_HDWR_RATES; 460 } 461 if (rs->rs_rate_cnt <= RS_ORDERED_COUNT) 462 rs->rs_flags = RS_IS_INTF; 463 else 464 rs->rs_flags = RS_IS_INTF | RS_INT_TBL; 465 if (rs->rs_rate_cnt >= ALL_HARDWARE_RATES) 466 rs->rs_rate_cnt = ALL_HARDWARE_RATES; 467 } else { 468 printf("Interface:%s unit:%d not one known to have rate-limits\n", 469 ifp->if_dname, 470 ifp->if_dunit); 471 free(rs, M_TCPPACE); 472 return (NULL); 473 } 474 sz = sizeof(struct tcp_hwrate_limit_table) * rs->rs_rate_cnt; 475 rs->rs_rlt = malloc(sz, M_TCPPACE, M_NOWAIT); 476 if (rs->rs_rlt == NULL) { 477 if (error) 478 *error = ENOMEM; 479 bail: 480 free(rs, M_TCPPACE); 481 return (NULL); 482 } 483 if (rs->rs_rate_cnt >= ALL_HARDWARE_RATES) { 484 /* 485 * The interface supports all 486 * the rates we could possibly want. 487 */ 488 uint64_t rat; 489 490 rs->rs_rlt[0].rate = 12500; /* 100k */ 491 rs->rs_rlt[1].rate = 25000; /* 200k */ 492 rs->rs_rlt[2].rate = 62500; /* 500k */ 493 /* Note 125000 == 1Megabit 494 * populate 1Meg - 1000meg. 495 */ 496 for(i = 3, rat = 125000; i< (ALL_HARDWARE_RATES-1); i++) { 497 rs->rs_rlt[i].rate = rat; 498 rat += 125000; 499 } 500 rs->rs_rlt[(ALL_HARDWARE_RATES-1)].rate = 1250000000; 501 } else if (rs->rs_flags & RS_INT_TBL) { 502 /* We populate this in a special way */ 503 populate_canned_table(rs, rate_table_act); 504 } else { 505 /* 506 * Just copy in the rates from 507 * the table, it is in order. 508 */ 509 for (i=0; i<rs->rs_rate_cnt; i++) { 510 rs->rs_rlt[i].rate = rate_table_act[i]; 511 rs->rs_rlt[i].time_between = 0; 512 rs->rs_rlt[i].flags = 0; 513 } 514 } 515 for (i = (rs->rs_rate_cnt - 1); i >= 0; i--) { 516 /* 517 * We go backwards through the list so that if we can't get 518 * a rate and fail to init one, we have at least a chance of 519 * getting the highest one. 520 */ 521 rs->rs_rlt[i].ptbl = rs; 522 rs->rs_rlt[i].tag = NULL; 523 /* 524 * Calculate the time between. 525 */ 526 lentim = ETHERNET_SEGMENT_SIZE * USECS_IN_SECOND; 527 res = lentim / rs->rs_rlt[i].rate; 528 if (res > 0) 529 rs->rs_rlt[i].time_between = res; 530 else 531 rs->rs_rlt[i].time_between = 1; 532 if (rs->rs_flags & RS_NO_PRE) { 533 rs->rs_rlt[i].flags = HDWRPACE_INITED; 534 rs->rs_lowest_valid = i; 535 } else { 536 int err; 537 #ifdef RSS 538 hash_type = M_HASHTYPE_RSS_TCP_IPV4; 539 #else 540 hash_type = M_HASHTYPE_OPAQUE_HASH; 541 #endif 542 err = rl_attach_txrtlmt(ifp, 543 hash_type, 544 (i + 1), 545 rs->rs_rlt[i].rate, 546 &rs->rs_rlt[i].tag); 547 if (err) { 548 if (i == (rs->rs_rate_cnt - 1)) { 549 /* 550 * Huh - first rate and we can't get 551 * it? 552 */ 553 free(rs->rs_rlt, M_TCPPACE); 554 if (error) 555 *error = err; 556 goto bail; 557 } else { 558 if (error) 559 *error = err; 560 } 561 break; 562 } else { 563 rs->rs_rlt[i].flags = HDWRPACE_INITED | HDWRPACE_TAGPRESENT; 564 rs->rs_lowest_valid = i; 565 } 566 } 567 } 568 /* Did we get at least 1 rate? */ 569 if (rs->rs_rlt[(rs->rs_rate_cnt - 1)].flags & HDWRPACE_INITED) 570 rs->rs_highest_valid = rs->rs_rate_cnt - 1; 571 else { 572 free(rs->rs_rlt, M_TCPPACE); 573 goto bail; 574 } 575 rs_number_alive++; 576 sysctl_ctx_init(&rs->sysctl_ctx); 577 rl_sysctl_root = SYSCTL_ADD_NODE(&rs->sysctl_ctx, 578 SYSCTL_STATIC_CHILDREN(_net_inet_tcp_rl), 579 OID_AUTO, 580 rs->rs_ifp->if_xname, 581 CTLFLAG_RW, 0, 582 ""); 583 rl_add_syctl_entries(rl_sysctl_root, rs); 584 mtx_lock(&rs_mtx); 585 CK_LIST_INSERT_HEAD(&int_rs, rs, next); 586 mtx_unlock(&rs_mtx); 587 return (rs); 588 } 589 590 static const struct tcp_hwrate_limit_table * 591 tcp_int_find_suitable_rate(const struct tcp_rate_set *rs, 592 uint64_t bytes_per_sec, uint32_t flags) 593 { 594 struct tcp_hwrate_limit_table *arte = NULL, *rte = NULL; 595 uint64_t mbits_per_sec, ind_calc; 596 int i; 597 598 mbits_per_sec = (bytes_per_sec * 8); 599 if (flags & RS_PACING_LT) { 600 if ((mbits_per_sec < RS_ONE_MEGABIT_PERSEC) && 601 (rs->rs_lowest_valid <= 2)){ 602 /* 603 * Smaller than 1Meg, only 604 * 3 entries can match it. 605 */ 606 for(i = rs->rs_lowest_valid; i < 3; i++) { 607 if (bytes_per_sec <= rs->rs_rlt[i].rate) { 608 rte = &rs->rs_rlt[i]; 609 break; 610 } else if (rs->rs_rlt[i].flags & HDWRPACE_INITED) { 611 arte = &rs->rs_rlt[i]; 612 } 613 } 614 goto done; 615 } else if ((mbits_per_sec > RS_ONE_GIGABIT_PERSEC) && 616 (rs->rs_rlt[(ALL_HARDWARE_RATES-1)].flags & HDWRPACE_INITED)){ 617 /* 618 * Larger than 1G (the majority of 619 * our table. 620 */ 621 if (mbits_per_sec < RS_TEN_GIGABIT_PERSEC) 622 rte = &rs->rs_rlt[(ALL_HARDWARE_RATES-1)]; 623 else 624 arte = &rs->rs_rlt[(ALL_HARDWARE_RATES-1)]; 625 goto done; 626 } 627 /* 628 * If we reach here its in our table (between 1Meg - 1000Meg), 629 * just take the rounded down mbits per second, and add 630 * 1Megabit to it, from this we can calculate 631 * the index in the table. 632 */ 633 ind_calc = mbits_per_sec/RS_ONE_MEGABIT_PERSEC; 634 if ((ind_calc * RS_ONE_MEGABIT_PERSEC) != mbits_per_sec) 635 ind_calc++; 636 /* our table is offset by 3, we add 2 */ 637 ind_calc += 2; 638 if (ind_calc > (ALL_HARDWARE_RATES-1)) { 639 /* This should not happen */ 640 ind_calc = ALL_HARDWARE_RATES-1; 641 } 642 if ((ind_calc >= rs->rs_lowest_valid) && 643 (ind_calc <= rs->rs_highest_valid)) 644 rte = &rs->rs_rlt[ind_calc]; 645 } else if (flags & RS_PACING_EXACT_MATCH) { 646 if ((mbits_per_sec < RS_ONE_MEGABIT_PERSEC) && 647 (rs->rs_lowest_valid <= 2)){ 648 for(i = rs->rs_lowest_valid; i < 3; i++) { 649 if (bytes_per_sec == rs->rs_rlt[i].rate) { 650 rte = &rs->rs_rlt[i]; 651 break; 652 } 653 } 654 } else if ((mbits_per_sec > RS_ONE_GIGABIT_PERSEC) && 655 (rs->rs_rlt[(ALL_HARDWARE_RATES-1)].flags & HDWRPACE_INITED)) { 656 /* > 1Gbps only one rate */ 657 if (bytes_per_sec == rs->rs_rlt[(ALL_HARDWARE_RATES-1)].rate) { 658 /* Its 10G wow */ 659 rte = &rs->rs_rlt[(ALL_HARDWARE_RATES-1)]; 660 } 661 } else { 662 /* Ok it must be a exact meg (its between 1G and 1Meg) */ 663 ind_calc = mbits_per_sec/RS_ONE_MEGABIT_PERSEC; 664 if ((ind_calc * RS_ONE_MEGABIT_PERSEC) == mbits_per_sec) { 665 /* its an exact Mbps */ 666 ind_calc += 2; 667 if (ind_calc > (ALL_HARDWARE_RATES-1)) { 668 /* This should not happen */ 669 ind_calc = ALL_HARDWARE_RATES-1; 670 } 671 if (rs->rs_rlt[ind_calc].flags & HDWRPACE_INITED) 672 rte = &rs->rs_rlt[ind_calc]; 673 } 674 } 675 } else { 676 /* we want greater than the requested rate */ 677 if ((mbits_per_sec < RS_ONE_MEGABIT_PERSEC) && 678 (rs->rs_lowest_valid <= 2)){ 679 arte = &rs->rs_rlt[3]; /* set alternate to 1Meg */ 680 for (i=2; i>=rs->rs_lowest_valid; i--) { 681 if (bytes_per_sec < rs->rs_rlt[i].rate) { 682 rte = &rs->rs_rlt[i]; 683 break; 684 } else if ((flags & RS_PACING_GEQ) && 685 (bytes_per_sec == rs->rs_rlt[i].rate)) { 686 rte = &rs->rs_rlt[i]; 687 break; 688 } else { 689 arte = &rs->rs_rlt[i]; /* new alternate */ 690 } 691 } 692 } else if (mbits_per_sec > RS_ONE_GIGABIT_PERSEC) { 693 if ((bytes_per_sec < rs->rs_rlt[(ALL_HARDWARE_RATES-1)].rate) && 694 (rs->rs_rlt[(ALL_HARDWARE_RATES-1)].flags & HDWRPACE_INITED)){ 695 /* Our top rate is larger than the request */ 696 rte = &rs->rs_rlt[(ALL_HARDWARE_RATES-1)]; 697 } else if ((flags & RS_PACING_GEQ) && 698 (bytes_per_sec == rs->rs_rlt[(ALL_HARDWARE_RATES-1)].rate) && 699 (rs->rs_rlt[(ALL_HARDWARE_RATES-1)].flags & HDWRPACE_INITED)) { 700 /* It matches our top rate */ 701 rte = &rs->rs_rlt[(ALL_HARDWARE_RATES-1)]; 702 } else if (rs->rs_rlt[(ALL_HARDWARE_RATES-1)].flags & HDWRPACE_INITED) { 703 /* The top rate is an alternative */ 704 arte = &rs->rs_rlt[(ALL_HARDWARE_RATES-1)]; 705 } 706 } else { 707 /* Its in our range 1Meg - 1Gig */ 708 if (flags & RS_PACING_GEQ) { 709 ind_calc = mbits_per_sec/RS_ONE_MEGABIT_PERSEC; 710 if ((ind_calc * RS_ONE_MEGABIT_PERSEC) == mbits_per_sec) { 711 if (ind_calc > (ALL_HARDWARE_RATES-1)) { 712 /* This should not happen */ 713 ind_calc = (ALL_HARDWARE_RATES-1); 714 } 715 rte = &rs->rs_rlt[ind_calc]; 716 } 717 goto done; 718 } 719 ind_calc = (mbits_per_sec + (RS_ONE_MEGABIT_PERSEC-1))/RS_ONE_MEGABIT_PERSEC; 720 ind_calc += 2; 721 if (ind_calc > (ALL_HARDWARE_RATES-1)) { 722 /* This should not happen */ 723 ind_calc = ALL_HARDWARE_RATES-1; 724 } 725 if (rs->rs_rlt[ind_calc].flags & HDWRPACE_INITED) 726 rte = &rs->rs_rlt[ind_calc]; 727 } 728 } 729 done: 730 if ((rte == NULL) && 731 (arte != NULL) && 732 (flags & RS_PACING_SUB_OK)) { 733 /* We can use the substitute */ 734 rte = arte; 735 } 736 return (rte); 737 } 738 739 static const struct tcp_hwrate_limit_table * 740 tcp_find_suitable_rate(const struct tcp_rate_set *rs, uint64_t bytes_per_sec, uint32_t flags) 741 { 742 /** 743 * Hunt the rate table with the restrictions in flags and find a 744 * suitable rate if possible. 745 * RS_PACING_EXACT_MATCH - look for an exact match to rate. 746 * RS_PACING_GT - must be greater than. 747 * RS_PACING_GEQ - must be greater than or equal. 748 * RS_PACING_LT - must be less than. 749 * RS_PACING_SUB_OK - If we don't meet criteria a 750 * substitute is ok. 751 */ 752 int i, matched; 753 struct tcp_hwrate_limit_table *rte = NULL; 754 755 756 if ((rs->rs_flags & RS_INT_TBL) && 757 (rs->rs_rate_cnt >= ALL_HARDWARE_RATES)) { 758 /* 759 * Here we don't want to paw thru 760 * a big table, we have everything 761 * from 1Meg - 1000Meg in 1Meg increments. 762 * Use an alternate method to "lookup". 763 */ 764 return (tcp_int_find_suitable_rate(rs, bytes_per_sec, flags)); 765 } 766 if ((flags & RS_PACING_LT) || 767 (flags & RS_PACING_EXACT_MATCH)) { 768 /* 769 * For exact and less than we go forward through the table. 770 * This way when we find one larger we stop (exact was a 771 * toss up). 772 */ 773 for (i = rs->rs_lowest_valid, matched = 0; i <= rs->rs_highest_valid; i++) { 774 if ((flags & RS_PACING_EXACT_MATCH) && 775 (bytes_per_sec == rs->rs_rlt[i].rate)) { 776 rte = &rs->rs_rlt[i]; 777 matched = 1; 778 break; 779 } else if ((flags & RS_PACING_LT) && 780 (bytes_per_sec <= rs->rs_rlt[i].rate)) { 781 rte = &rs->rs_rlt[i]; 782 matched = 1; 783 break; 784 } 785 if (bytes_per_sec > rs->rs_rlt[i].rate) 786 break; 787 } 788 if ((matched == 0) && 789 (flags & RS_PACING_LT) && 790 (flags & RS_PACING_SUB_OK)) { 791 /* Kick in a substitute (the lowest) */ 792 rte = &rs->rs_rlt[rs->rs_lowest_valid]; 793 } 794 } else { 795 /* 796 * Here we go backward through the table so that we can find 797 * the one greater in theory faster (but its probably a 798 * wash). 799 */ 800 for (i = rs->rs_highest_valid, matched = 0; i >= rs->rs_lowest_valid; i--) { 801 if (rs->rs_rlt[i].rate > bytes_per_sec) { 802 /* A possible candidate */ 803 rte = &rs->rs_rlt[i]; 804 } 805 if ((flags & RS_PACING_GEQ) && 806 (bytes_per_sec == rs->rs_rlt[i].rate)) { 807 /* An exact match and we want equal */ 808 matched = 1; 809 rte = &rs->rs_rlt[i]; 810 break; 811 } else if (rte) { 812 /* 813 * Found one that is larger than but don't 814 * stop, there may be a more closer match. 815 */ 816 matched = 1; 817 } 818 if (rs->rs_rlt[i].rate < bytes_per_sec) { 819 /* 820 * We found a table entry that is smaller, 821 * stop there will be none greater or equal. 822 */ 823 break; 824 } 825 } 826 if ((matched == 0) && 827 (flags & RS_PACING_SUB_OK)) { 828 /* Kick in a substitute (the highest) */ 829 rte = &rs->rs_rlt[rs->rs_highest_valid]; 830 } 831 } 832 return (rte); 833 } 834 835 static struct ifnet * 836 rt_find_real_interface(struct ifnet *ifp, struct inpcb *inp, int *error) 837 { 838 struct ifnet *tifp; 839 struct m_snd_tag *tag; 840 union if_snd_tag_alloc_params params = { 841 .rate_limit.hdr.type = IF_SND_TAG_TYPE_RATE_LIMIT, 842 .rate_limit.hdr.flowid = 1, 843 .rate_limit.max_rate = COMMON_RATE, 844 .rate_limit.flags = M_NOWAIT, 845 }; 846 int err; 847 #ifdef RSS 848 params.rate_limit.hdr.flowtype = ((inp->inp_vflag & INP_IPV6) ? 849 M_HASHTYPE_RSS_TCP_IPV6 : M_HASHTYPE_RSS_TCP_IPV4); 850 #else 851 params.rate_limit.hdr.flowtype = M_HASHTYPE_OPAQUE_HASH; 852 #endif 853 tag = NULL; 854 if (ifp->if_snd_tag_alloc) { 855 if (error) 856 *error = ENODEV; 857 return (NULL); 858 } 859 err = ifp->if_snd_tag_alloc(ifp, ¶ms, &tag); 860 if (err) { 861 /* Failed to setup a tag? */ 862 if (error) 863 *error = err; 864 return (NULL); 865 } 866 tifp = tag->ifp; 867 tifp->if_snd_tag_free(tag); 868 return (tifp); 869 } 870 871 static const struct tcp_hwrate_limit_table * 872 rt_setup_rate(struct inpcb *inp, struct ifnet *ifp, uint64_t bytes_per_sec, 873 uint32_t flags, int *error) 874 { 875 /* First lets find the interface if it exists */ 876 const struct tcp_hwrate_limit_table *rte; 877 struct tcp_rate_set *rs; 878 struct epoch_tracker et; 879 int err; 880 881 epoch_enter_preempt(net_epoch_preempt, &et); 882 use_real_interface: 883 CK_LIST_FOREACH(rs, &int_rs, next) { 884 /* 885 * Note we don't look with the lock since we either see a 886 * new entry or will get one when we try to add it. 887 */ 888 if (rs->rs_flags & RS_IS_DEAD) { 889 /* The dead are not looked at */ 890 continue; 891 } 892 if ((rs->rs_ifp == ifp) && 893 (rs->rs_if_dunit == ifp->if_dunit)) { 894 /* Ok we found it */ 895 break; 896 } 897 } 898 if ((rs == NULL) || 899 (rs->rs_flags & RS_INTF_NO_SUP) || 900 (rs->rs_flags & RS_IS_DEAD)) { 901 /* 902 * This means we got a packet *before* 903 * the IF-UP was processed below, <or> 904 * while or after we already received an interface 905 * departed event. In either case we really don't 906 * want to do anything with pacing, in 907 * the departing case the packet is not 908 * going to go very far. The new case 909 * might be arguable, but its impossible 910 * to tell from the departing case. 911 */ 912 if (rs->rs_disable && error) 913 *error = ENODEV; 914 epoch_exit_preempt(net_epoch_preempt, &et); 915 return (NULL); 916 } 917 918 if ((rs == NULL) || (rs->rs_disable != 0)) { 919 if (rs->rs_disable && error) 920 *error = ENOSPC; 921 epoch_exit_preempt(net_epoch_preempt, &et); 922 return (NULL); 923 } 924 if (rs->rs_flags & RS_IS_DEFF) { 925 /* We need to find the real interface */ 926 struct ifnet *tifp; 927 928 tifp = rt_find_real_interface(ifp, inp, error); 929 if (tifp == NULL) { 930 if (rs->rs_disable && error) 931 *error = ENOTSUP; 932 epoch_exit_preempt(net_epoch_preempt, &et); 933 return (NULL); 934 } 935 goto use_real_interface; 936 } 937 if (rs->rs_flow_limit && 938 ((rs->rs_flows_using + 1) > rs->rs_flow_limit)) { 939 if (error) 940 *error = ENOSPC; 941 epoch_exit_preempt(net_epoch_preempt, &et); 942 return (NULL); 943 } 944 rte = tcp_find_suitable_rate(rs, bytes_per_sec, flags); 945 if (rte) { 946 err = in_pcbattach_txrtlmt(inp, rs->rs_ifp, 947 inp->inp_flowtype, 948 inp->inp_flowid, 949 rte->rate, 950 &inp->inp_snd_tag); 951 if (err) { 952 /* Failed to attach */ 953 if (error) 954 *error = err; 955 rte = NULL; 956 } 957 } 958 if (rte) { 959 /* 960 * We use an atomic here for accounting so we don't have to 961 * use locks when freeing. 962 */ 963 atomic_add_64(&rs->rs_flows_using, 1); 964 } 965 epoch_exit_preempt(net_epoch_preempt, &et); 966 return (rte); 967 } 968 969 static void 970 tcp_rl_ifnet_link(void *arg __unused, struct ifnet *ifp, int link_state) 971 { 972 int error; 973 struct tcp_rate_set *rs; 974 975 if (((ifp->if_capabilities & IFCAP_TXRTLMT) == 0) || 976 (link_state != LINK_STATE_UP)) { 977 /* 978 * We only care on an interface going up that is rate-limit 979 * capable. 980 */ 981 return; 982 } 983 mtx_lock(&rs_mtx); 984 CK_LIST_FOREACH(rs, &int_rs, next) { 985 if ((rs->rs_ifp == ifp) && 986 (rs->rs_if_dunit == ifp->if_dunit)) { 987 /* We already have initialized this guy */ 988 mtx_unlock(&rs_mtx); 989 return; 990 } 991 } 992 mtx_unlock(&rs_mtx); 993 rt_setup_new_rs(ifp, &error); 994 } 995 996 static void 997 tcp_rl_ifnet_departure(void *arg __unused, struct ifnet *ifp) 998 { 999 struct tcp_rate_set *rs, *nrs; 1000 struct ifnet *tifp; 1001 int i; 1002 1003 mtx_lock(&rs_mtx); 1004 CK_LIST_FOREACH_SAFE(rs, &int_rs, next, nrs) { 1005 if ((rs->rs_ifp == ifp) && 1006 (rs->rs_if_dunit == ifp->if_dunit)) { 1007 CK_LIST_REMOVE(rs, next); 1008 rs_number_alive--; 1009 rs->rs_flags |= RS_IS_DEAD; 1010 for (i = 0; i < rs->rs_rate_cnt; i++) { 1011 if (rs->rs_rlt[i].flags & HDWRPACE_TAGPRESENT) { 1012 tifp = rs->rs_rlt[i].tag->ifp; 1013 in_pcbdetach_tag(tifp, rs->rs_rlt[i].tag); 1014 rs->rs_rlt[i].tag = NULL; 1015 } 1016 rs->rs_rlt[i].flags = HDWRPACE_IFPDEPARTED; 1017 } 1018 if (rs->rs_flows_using == 0) 1019 rs_defer_destroy(rs); 1020 break; 1021 } 1022 } 1023 mtx_unlock(&rs_mtx); 1024 } 1025 1026 static void 1027 tcp_rl_shutdown(void *arg __unused, int howto __unused) 1028 { 1029 struct tcp_rate_set *rs, *nrs; 1030 struct ifnet *tifp; 1031 int i; 1032 1033 mtx_lock(&rs_mtx); 1034 CK_LIST_FOREACH_SAFE(rs, &int_rs, next, nrs) { 1035 CK_LIST_REMOVE(rs, next); 1036 rs_number_alive--; 1037 rs->rs_flags |= RS_IS_DEAD; 1038 for (i = 0; i < rs->rs_rate_cnt; i++) { 1039 if (rs->rs_rlt[i].flags & HDWRPACE_TAGPRESENT) { 1040 tifp = rs->rs_rlt[i].tag->ifp; 1041 in_pcbdetach_tag(tifp, rs->rs_rlt[i].tag); 1042 rs->rs_rlt[i].tag = NULL; 1043 } 1044 rs->rs_rlt[i].flags = HDWRPACE_IFPDEPARTED; 1045 } 1046 if (rs->rs_flows_using == 0) 1047 rs_defer_destroy(rs); 1048 } 1049 mtx_unlock(&rs_mtx); 1050 } 1051 1052 const struct tcp_hwrate_limit_table * 1053 tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp, 1054 uint64_t bytes_per_sec, int flags, int *error) 1055 { 1056 const struct tcp_hwrate_limit_table *rte; 1057 1058 if (tp->t_inpcb->inp_snd_tag == NULL) { 1059 /* 1060 * We are setting up a rate for the first time. 1061 */ 1062 if ((ifp->if_capabilities & IFCAP_TXRTLMT) == 0) { 1063 /* Not supported by the egress */ 1064 if (error) 1065 *error = ENODEV; 1066 return (NULL); 1067 } 1068 #ifdef KERN_TLS 1069 if (tp->t_inpcb->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) { 1070 /* 1071 * We currently can't do both TLS and hardware 1072 * pacing 1073 */ 1074 if (error) 1075 *error = EINVAL; 1076 return (NULL); 1077 } 1078 #endif 1079 rte = rt_setup_rate(tp->t_inpcb, ifp, bytes_per_sec, flags, error); 1080 } else { 1081 /* 1082 * We are modifying a rate, wrong interface? 1083 */ 1084 if (error) 1085 *error = EINVAL; 1086 rte = NULL; 1087 } 1088 return (rte); 1089 } 1090 1091 const struct tcp_hwrate_limit_table * 1092 tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table *crte, 1093 struct tcpcb *tp, struct ifnet *ifp, 1094 uint64_t bytes_per_sec, int flags, int *error) 1095 { 1096 const struct tcp_hwrate_limit_table *nrte; 1097 const struct tcp_rate_set *rs; 1098 int is_indirect = 0; 1099 int err; 1100 1101 1102 if ((tp->t_inpcb->inp_snd_tag == NULL) || 1103 (crte == NULL)) { 1104 /* Wrong interface */ 1105 if (error) 1106 *error = EINVAL; 1107 return (NULL); 1108 } 1109 rs = crte->ptbl; 1110 if ((rs->rs_flags & RS_IS_DEAD) || 1111 (crte->flags & HDWRPACE_IFPDEPARTED)) { 1112 /* Release the rate, and try anew */ 1113 re_rate: 1114 tcp_rel_pacing_rate(crte, tp); 1115 nrte = tcp_set_pacing_rate(tp, ifp, 1116 bytes_per_sec, flags, error); 1117 return (nrte); 1118 } 1119 if ((rs->rs_flags & RT_IS_INDIRECT ) == RT_IS_INDIRECT) 1120 is_indirect = 1; 1121 else 1122 is_indirect = 0; 1123 if ((is_indirect == 0) && 1124 ((ifp != rs->rs_ifp) || 1125 (ifp->if_dunit != rs->rs_if_dunit))) { 1126 /* 1127 * Something changed, the user is not pointing to the same 1128 * ifp? Maybe a route updated on this guy? 1129 */ 1130 goto re_rate; 1131 } else if (is_indirect) { 1132 /* 1133 * For indirect we have to dig in and find the real interface. 1134 */ 1135 struct ifnet *rifp; 1136 1137 rifp = rt_find_real_interface(ifp, tp->t_inpcb, error); 1138 if (rifp == NULL) { 1139 /* Can't find it? */ 1140 goto re_rate; 1141 } 1142 if ((rifp != rs->rs_ifp) || 1143 (ifp->if_dunit != rs->rs_if_dunit)) { 1144 goto re_rate; 1145 } 1146 } 1147 nrte = tcp_find_suitable_rate(rs, bytes_per_sec, flags); 1148 if (nrte == crte) { 1149 /* No change */ 1150 if (error) 1151 *error = 0; 1152 return (crte); 1153 } 1154 if (nrte == NULL) { 1155 /* Release the old rate */ 1156 tcp_rel_pacing_rate(crte, tp); 1157 return (NULL); 1158 } 1159 /* Change rates to our new entry */ 1160 err = in_pcbmodify_txrtlmt(tp->t_inpcb, nrte->rate); 1161 if (err) { 1162 if (error) 1163 *error = err; 1164 return (NULL); 1165 } 1166 if (error) 1167 *error = 0; 1168 return (nrte); 1169 } 1170 1171 void 1172 tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table *crte, struct tcpcb *tp) 1173 { 1174 const struct tcp_rate_set *crs; 1175 struct tcp_rate_set *rs; 1176 uint64_t pre; 1177 1178 crs = crte->ptbl; 1179 /* 1180 * Now we must break the const 1181 * in order to release our refcount. 1182 */ 1183 rs = __DECONST(struct tcp_rate_set *, crs); 1184 pre = atomic_fetchadd_64(&rs->rs_flows_using, -1); 1185 if (pre == 1) { 1186 mtx_lock(&rs_mtx); 1187 /* 1188 * Is it dead? 1189 */ 1190 if (rs->rs_flags & RS_IS_DEAD) 1191 rs_defer_destroy(rs); 1192 mtx_unlock(&rs_mtx); 1193 } 1194 in_pcbdetach_txrtlmt(tp->t_inpcb); 1195 } 1196 1197 static eventhandler_tag rl_ifnet_departs; 1198 static eventhandler_tag rl_ifnet_arrives; 1199 static eventhandler_tag rl_shutdown_start; 1200 1201 static void 1202 tcp_rs_init(void *st __unused) 1203 { 1204 CK_LIST_INIT(&int_rs); 1205 rs_number_alive = 0; 1206 rs_number_dead = 0;; 1207 mtx_init(&rs_mtx, "tcp_rs_mtx", "rsmtx", MTX_DEF); 1208 rl_ifnet_departs = EVENTHANDLER_REGISTER(ifnet_departure_event, 1209 tcp_rl_ifnet_departure, 1210 NULL, EVENTHANDLER_PRI_ANY); 1211 rl_ifnet_arrives = EVENTHANDLER_REGISTER(ifnet_link_event, 1212 tcp_rl_ifnet_link, 1213 NULL, EVENTHANDLER_PRI_ANY); 1214 rl_shutdown_start = EVENTHANDLER_REGISTER(shutdown_pre_sync, 1215 tcp_rl_shutdown, NULL, 1216 SHUTDOWN_PRI_FIRST); 1217 printf("TCP_ratelimit: Is now initialized\n"); 1218 } 1219 1220 SYSINIT(tcp_rl_init, SI_SUB_SMP + 1, SI_ORDER_ANY, tcp_rs_init, NULL); 1221 #endif 1222