1 /*- 2 * 3 * SPDX-License-Identifier: BSD-3-Clause 4 * 5 * Copyright (c) 2018-2019 6 * Netflix Inc. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 /** 32 * Author: Randall Stewart <rrs@netflix.com> 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 #include "opt_inet.h" 38 #include "opt_inet6.h" 39 #include "opt_ipsec.h" 40 #include "opt_tcpdebug.h" 41 #include "opt_ratelimit.h" 42 #include <sys/param.h> 43 #include <sys/kernel.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/sysctl.h> 49 #include <sys/eventhandler.h> 50 #include <sys/mutex.h> 51 #include <sys/ck.h> 52 #define TCPSTATES /* for logging */ 53 #include <netinet/in.h> 54 #include <netinet/in_pcb.h> 55 #include <netinet/tcp_var.h> 56 #ifdef INET6 57 #include <netinet6/tcp6_var.h> 58 #endif 59 #include <netinet/tcp_ratelimit.h> 60 #ifndef USECS_IN_SECOND 61 #define USECS_IN_SECOND 1000000 62 #endif 63 /* 64 * For the purposes of each send, what is the size 65 * of an ethernet frame. 66 */ 67 #ifndef ETHERNET_SEGMENT_SIZE 68 #define ETHERNET_SEGMENT_SIZE 1500 69 #endif 70 MALLOC_DEFINE(M_TCPPACE, "tcp_hwpace", "TCP Hardware pacing memory"); 71 #ifdef RATELIMIT 72 73 #define COMMON_RATE 180500 74 uint64_t desired_rates[] = { 75 62500, /* 500Kbps */ 76 180500, /* 1.44Mpbs */ 77 375000, /* 3Mbps */ 78 500000, /* 4Mbps */ 79 625000, /* 5Mbps */ 80 750000, /* 6Mbps */ 81 1000000, /* 8Mbps */ 82 1250000, /* 10Mbps */ 83 2500000, /* 20Mbps */ 84 3750000, /* 30Mbps */ 85 5000000, /* 40Meg */ 86 6250000, /* 50Mbps */ 87 12500000, /* 100Mbps */ 88 25000000, /* 200Mbps */ 89 50000000, /* 400Mbps */ 90 100000000, /* 800Mbps */ 91 12500, /* 100kbps */ 92 25000, /* 200kbps */ 93 875000, /* 7Mbps */ 94 1125000, /* 9Mbps */ 95 1875000, /* 15Mbps */ 96 3125000, /* 25Mbps */ 97 8125000, /* 65Mbps */ 98 10000000, /* 80Mbps */ 99 18750000, /* 150Mbps */ 100 20000000, /* 250Mbps */ 101 37500000, /* 350Mbps */ 102 62500000, /* 500Mbps */ 103 78125000, /* 625Mbps */ 104 125000000, /* 1Gbps */ 105 }; 106 #define MAX_HDWR_RATES (sizeof(desired_rates)/sizeof(uint64_t)) 107 #define RS_ORDERED_COUNT 16 /* 108 * Number that are in order 109 * at the beginning of the table, 110 * over this a sort is required. 111 */ 112 #define RS_NEXT_ORDER_GROUP 16 /* 113 * The point in our table where 114 * we come fill in a second ordered 115 * group (index wise means -1). 116 */ 117 #define ALL_HARDWARE_RATES 1004 /* 118 * 1Meg - 1Gig in 1 Meg steps 119 * plus 100, 200k and 500k and 120 * 10Gig 121 */ 122 123 #define RS_ONE_MEGABIT_PERSEC 1000000 124 #define RS_ONE_GIGABIT_PERSEC 1000000000 125 #define RS_TEN_GIGABIT_PERSEC 10000000000 126 127 static struct head_tcp_rate_set int_rs; 128 static struct mtx rs_mtx; 129 uint32_t rs_number_alive; 130 uint32_t rs_number_dead; 131 132 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, rl, CTLFLAG_RW, 0, 133 "TCP Ratelimit stats"); 134 SYSCTL_UINT(_net_inet_tcp_rl, OID_AUTO, alive, CTLFLAG_RW, 135 &rs_number_alive, 0, 136 "Number of interfaces initialized for ratelimiting"); 137 SYSCTL_UINT(_net_inet_tcp_rl, OID_AUTO, dead, CTLFLAG_RW, 138 &rs_number_dead, 0, 139 "Number of interfaces departing from ratelimiting"); 140 141 static void 142 rl_add_syctl_entries(struct sysctl_oid *rl_sysctl_root, struct tcp_rate_set *rs) 143 { 144 /* 145 * Add sysctl entries for thus interface. 146 */ 147 if (rs->rs_flags & RS_INTF_NO_SUP) { 148 SYSCTL_ADD_S32(&rs->sysctl_ctx, 149 SYSCTL_CHILDREN(rl_sysctl_root), 150 OID_AUTO, "disable", CTLFLAG_RD, 151 &rs->rs_disable, 0, 152 "Disable this interface from new hdwr limiting?"); 153 } else { 154 SYSCTL_ADD_S32(&rs->sysctl_ctx, 155 SYSCTL_CHILDREN(rl_sysctl_root), 156 OID_AUTO, "disable", CTLFLAG_RW, 157 &rs->rs_disable, 0, 158 "Disable this interface from new hdwr limiting?"); 159 } 160 SYSCTL_ADD_S32(&rs->sysctl_ctx, 161 SYSCTL_CHILDREN(rl_sysctl_root), 162 OID_AUTO, "minseg", CTLFLAG_RW, 163 &rs->rs_min_seg, 0, 164 "What is the minimum we need to send on this interface?"); 165 SYSCTL_ADD_U64(&rs->sysctl_ctx, 166 SYSCTL_CHILDREN(rl_sysctl_root), 167 OID_AUTO, "flow_limit", CTLFLAG_RW, 168 &rs->rs_flow_limit, 0, 169 "What is the limit for number of flows (0=unlimited)?"); 170 SYSCTL_ADD_S32(&rs->sysctl_ctx, 171 SYSCTL_CHILDREN(rl_sysctl_root), 172 OID_AUTO, "highest", CTLFLAG_RD, 173 &rs->rs_highest_valid, 0, 174 "Highest valid rate"); 175 SYSCTL_ADD_S32(&rs->sysctl_ctx, 176 SYSCTL_CHILDREN(rl_sysctl_root), 177 OID_AUTO, "lowest", CTLFLAG_RD, 178 &rs->rs_lowest_valid, 0, 179 "Lowest valid rate"); 180 SYSCTL_ADD_S32(&rs->sysctl_ctx, 181 SYSCTL_CHILDREN(rl_sysctl_root), 182 OID_AUTO, "flags", CTLFLAG_RD, 183 &rs->rs_flags, 0, 184 "What lags are on the entry?"); 185 SYSCTL_ADD_S32(&rs->sysctl_ctx, 186 SYSCTL_CHILDREN(rl_sysctl_root), 187 OID_AUTO, "numrates", CTLFLAG_RD, 188 &rs->rs_rate_cnt, 0, 189 "How many rates re there?"); 190 SYSCTL_ADD_U64(&rs->sysctl_ctx, 191 SYSCTL_CHILDREN(rl_sysctl_root), 192 OID_AUTO, "flows_using", CTLFLAG_RD, 193 &rs->rs_flows_using, 0, 194 "How many flows are using this interface now?"); 195 #ifdef DETAILED_RATELIMIT_SYSCTL 196 if (rs->rs_rlt && rs->rs_rate_cnt > 0) { 197 /* Lets display the rates */ 198 int i; 199 struct sysctl_oid *rl_rates; 200 struct sysctl_oid *rl_rate_num; 201 char rate_num[16]; 202 rl_rates = SYSCTL_ADD_NODE(&rs->sysctl_ctx, 203 SYSCTL_CHILDREN(rl_sysctl_root), 204 OID_AUTO, 205 "rate", 206 CTLFLAG_RW, 0, 207 "Ratelist"); 208 for( i = 0; i < rs->rs_rate_cnt; i++) { 209 sprintf(rate_num, "%d", i); 210 rl_rate_num = SYSCTL_ADD_NODE(&rs->sysctl_ctx, 211 SYSCTL_CHILDREN(rl_rates), 212 OID_AUTO, 213 rate_num, 214 CTLFLAG_RW, 0, 215 "Individual Rate"); 216 SYSCTL_ADD_U32(&rs->sysctl_ctx, 217 SYSCTL_CHILDREN(rl_rate_num), 218 OID_AUTO, "flags", CTLFLAG_RD, 219 &rs->rs_rlt[i].flags, 0, 220 "Flags on this rate"); 221 SYSCTL_ADD_U32(&rs->sysctl_ctx, 222 SYSCTL_CHILDREN(rl_rate_num), 223 OID_AUTO, "pacetime", CTLFLAG_RD, 224 &rs->rs_rlt[i].time_between, 0, 225 "Time hardware inserts between 1500 byte sends"); 226 SYSCTL_ADD_U64(&rs->sysctl_ctx, 227 SYSCTL_CHILDREN(rl_rate_num), 228 OID_AUTO, "rate", CTLFLAG_RD, 229 &rs->rs_rlt[i].rate, 0, 230 "Rate in bytes per second"); 231 } 232 } 233 #endif 234 } 235 236 static void 237 rs_destroy(epoch_context_t ctx) 238 { 239 struct tcp_rate_set *rs; 240 241 rs = __containerof(ctx, struct tcp_rate_set, rs_epoch_ctx); 242 mtx_lock(&rs_mtx); 243 rs->rs_flags &= ~RS_FUNERAL_SCHD; 244 if (rs->rs_flows_using == 0) { 245 /* 246 * In theory its possible (but unlikely) 247 * that while the delete was occuring 248 * and we were applying the DEAD flag 249 * someone slipped in and found the 250 * interface in a lookup. While we 251 * decided rs_flows_using were 0 and 252 * scheduling the epoch_call, the other 253 * thread incremented rs_flow_using. This 254 * is because users have a pointer and 255 * we only use the rs_flows_using in an 256 * atomic fashion, i.e. the other entities 257 * are not protected. To assure this did 258 * not occur, we check rs_flows_using here 259 * before deleteing. 260 */ 261 sysctl_ctx_free(&rs->sysctl_ctx); 262 free(rs->rs_rlt, M_TCPPACE); 263 free(rs, M_TCPPACE); 264 rs_number_dead--; 265 } 266 mtx_unlock(&rs_mtx); 267 268 } 269 270 #ifdef INET 271 extern counter_u64_t rate_limit_set_ok; 272 extern counter_u64_t rate_limit_active; 273 extern counter_u64_t rate_limit_alloc_fail; 274 #endif 275 276 static int 277 rl_attach_txrtlmt(struct ifnet *ifp, 278 uint32_t flowtype, 279 int flowid, 280 uint64_t cfg_rate, 281 struct m_snd_tag **tag) 282 { 283 int error; 284 union if_snd_tag_alloc_params params = { 285 .rate_limit.hdr.type = IF_SND_TAG_TYPE_RATE_LIMIT, 286 .rate_limit.hdr.flowid = flowid, 287 .rate_limit.hdr.flowtype = flowtype, 288 .rate_limit.max_rate = cfg_rate, 289 .rate_limit.flags = M_NOWAIT, 290 }; 291 292 if (ifp->if_snd_tag_alloc == NULL) { 293 error = EOPNOTSUPP; 294 } else { 295 error = ifp->if_snd_tag_alloc(ifp, ¶ms, tag); 296 #ifdef INET 297 if (error == 0) { 298 if_ref((*tag)->ifp); 299 counter_u64_add(rate_limit_set_ok, 1); 300 counter_u64_add(rate_limit_active, 1); 301 } else 302 counter_u64_add(rate_limit_alloc_fail, 1); 303 #endif 304 } 305 return (error); 306 } 307 308 static void 309 populate_canned_table(struct tcp_rate_set *rs, const uint64_t *rate_table_act) 310 { 311 /* 312 * The internal table is "special", it 313 * is two seperate ordered tables that 314 * must be merged. We get here when the 315 * adapter specifies a number of rates that 316 * covers both ranges in the table in some 317 * form. 318 */ 319 int i, at_low, at_high; 320 uint8_t low_disabled = 0, high_disabled = 0; 321 322 for(i = 0, at_low = 0, at_high = RS_NEXT_ORDER_GROUP; i < rs->rs_rate_cnt; i++) { 323 rs->rs_rlt[i].flags = 0; 324 rs->rs_rlt[i].time_between = 0; 325 if ((low_disabled == 0) && 326 (high_disabled || 327 (rate_table_act[at_low] < rate_table_act[at_high]))) { 328 rs->rs_rlt[i].rate = rate_table_act[at_low]; 329 at_low++; 330 if (at_low == RS_NEXT_ORDER_GROUP) 331 low_disabled = 1; 332 } else if (high_disabled == 0) { 333 rs->rs_rlt[i].rate = rate_table_act[at_high]; 334 at_high++; 335 if (at_high == MAX_HDWR_RATES) 336 high_disabled = 1; 337 } 338 } 339 } 340 341 static struct tcp_rate_set * 342 rt_setup_new_rs(struct ifnet *ifp, int *error) 343 { 344 struct tcp_rate_set *rs; 345 const uint64_t *rate_table_act; 346 uint64_t lentim, res; 347 size_t sz; 348 uint32_t hash_type; 349 int i; 350 struct if_ratelimit_query_results rl; 351 struct sysctl_oid *rl_sysctl_root; 352 /* 353 * We expect to enter with the 354 * mutex locked. 355 */ 356 357 if (ifp->if_ratelimit_query == NULL) { 358 /* 359 * We can do nothing if we cannot 360 * get a query back from the driver. 361 */ 362 return (NULL); 363 } 364 rs = malloc(sizeof(struct tcp_rate_set), M_TCPPACE, M_NOWAIT | M_ZERO); 365 if (rs == NULL) { 366 if (error) 367 *error = ENOMEM; 368 return (NULL); 369 } 370 rl.flags = RT_NOSUPPORT; 371 ifp->if_ratelimit_query(ifp, &rl); 372 if (rl.flags & RT_IS_UNUSABLE) { 373 /* 374 * The interface does not really support 375 * the rate-limiting. 376 */ 377 memset(rs, 0, sizeof(struct tcp_rate_set)); 378 rs->rs_ifp = ifp; 379 rs->rs_if_dunit = ifp->if_dunit; 380 rs->rs_flags = RS_INTF_NO_SUP; 381 rs->rs_disable = 1; 382 rs_number_alive++; 383 sysctl_ctx_init(&rs->sysctl_ctx); 384 rl_sysctl_root = SYSCTL_ADD_NODE(&rs->sysctl_ctx, 385 SYSCTL_STATIC_CHILDREN(_net_inet_tcp_rl), 386 OID_AUTO, 387 rs->rs_ifp->if_xname, 388 CTLFLAG_RW, 0, 389 ""); 390 rl_add_syctl_entries(rl_sysctl_root, rs); 391 mtx_lock(&rs_mtx); 392 CK_LIST_INSERT_HEAD(&int_rs, rs, next); 393 mtx_unlock(&rs_mtx); 394 return (rs); 395 } else if ((rl.flags & RT_IS_INDIRECT) == RT_IS_INDIRECT) { 396 memset(rs, 0, sizeof(struct tcp_rate_set)); 397 rs->rs_ifp = ifp; 398 rs->rs_if_dunit = ifp->if_dunit; 399 rs->rs_flags = RS_IS_DEFF; 400 rs_number_alive++; 401 sysctl_ctx_init(&rs->sysctl_ctx); 402 rl_sysctl_root = SYSCTL_ADD_NODE(&rs->sysctl_ctx, 403 SYSCTL_STATIC_CHILDREN(_net_inet_tcp_rl), 404 OID_AUTO, 405 rs->rs_ifp->if_xname, 406 CTLFLAG_RW, 0, 407 ""); 408 rl_add_syctl_entries(rl_sysctl_root, rs); 409 mtx_lock(&rs_mtx); 410 CK_LIST_INSERT_HEAD(&int_rs, rs, next); 411 mtx_unlock(&rs_mtx); 412 return (rs); 413 } else if ((rl.flags & RT_IS_FIXED_TABLE) == RT_IS_FIXED_TABLE) { 414 /* Mellanox most likely */ 415 rs->rs_ifp = ifp; 416 rs->rs_if_dunit = ifp->if_dunit; 417 rs->rs_rate_cnt = rl.number_of_rates; 418 rs->rs_min_seg = rl.min_segment_burst; 419 rs->rs_highest_valid = 0; 420 rs->rs_flow_limit = rl.max_flows; 421 rs->rs_flags = RS_IS_INTF | RS_NO_PRE; 422 rs->rs_disable = 0; 423 rate_table_act = rl.rate_table; 424 } else if ((rl.flags & RT_IS_SELECTABLE) == RT_IS_SELECTABLE) { 425 /* Chelsio */ 426 rs->rs_ifp = ifp; 427 rs->rs_if_dunit = ifp->if_dunit; 428 rs->rs_rate_cnt = rl.number_of_rates; 429 rs->rs_min_seg = rl.min_segment_burst; 430 rs->rs_disable = 0; 431 rs->rs_flow_limit = rl.max_flows; 432 rate_table_act = desired_rates; 433 if ((rs->rs_rate_cnt > MAX_HDWR_RATES) && 434 (rs->rs_rate_cnt < ALL_HARDWARE_RATES)) { 435 /* 436 * Our desired table is not big 437 * enough, do what we can. 438 */ 439 rs->rs_rate_cnt = MAX_HDWR_RATES; 440 } 441 if (rs->rs_rate_cnt <= RS_ORDERED_COUNT) 442 rs->rs_flags = RS_IS_INTF; 443 else 444 rs->rs_flags = RS_IS_INTF | RS_INT_TBL; 445 if (rs->rs_rate_cnt >= ALL_HARDWARE_RATES) 446 rs->rs_rate_cnt = ALL_HARDWARE_RATES; 447 } else { 448 printf("Interface:%s unit:%d not one known to have rate-limits\n", 449 ifp->if_dname, 450 ifp->if_dunit); 451 free(rs, M_TCPPACE); 452 return (NULL); 453 } 454 sz = sizeof(struct tcp_hwrate_limit_table) * rs->rs_rate_cnt; 455 rs->rs_rlt = malloc(sz, M_TCPPACE, M_NOWAIT); 456 if (rs->rs_rlt == NULL) { 457 if (error) 458 *error = ENOMEM; 459 bail: 460 free(rs, M_TCPPACE); 461 return (NULL); 462 } 463 if (rs->rs_rate_cnt >= ALL_HARDWARE_RATES) { 464 /* 465 * The interface supports all 466 * the rates we could possibly want. 467 */ 468 uint64_t rat; 469 470 rs->rs_rlt[0].rate = 12500; /* 100k */ 471 rs->rs_rlt[1].rate = 25000; /* 200k */ 472 rs->rs_rlt[2].rate = 62500; /* 500k */ 473 /* Note 125000 == 1Megabit 474 * populate 1Meg - 1000meg. 475 */ 476 for(i = 3, rat = 125000; i< (ALL_HARDWARE_RATES-1); i++) { 477 rs->rs_rlt[i].rate = rat; 478 rat += 125000; 479 } 480 rs->rs_rlt[(ALL_HARDWARE_RATES-1)].rate = 1250000000; 481 } else if (rs->rs_flags & RS_INT_TBL) { 482 /* We populate this in a special way */ 483 populate_canned_table(rs, rate_table_act); 484 } else { 485 /* 486 * Just copy in the rates from 487 * the table, it is in order. 488 */ 489 for (i=0; i<rs->rs_rate_cnt; i++) { 490 rs->rs_rlt[i].rate = rate_table_act[i]; 491 rs->rs_rlt[i].time_between = 0; 492 rs->rs_rlt[i].flags = 0; 493 } 494 } 495 for (i = (rs->rs_rate_cnt - 1); i >= 0; i--) { 496 /* 497 * We go backwards through the list so that if we can't get 498 * a rate and fail to init one, we have at least a chance of 499 * getting the highest one. 500 */ 501 rs->rs_rlt[i].ptbl = rs; 502 rs->rs_rlt[i].tag = NULL; 503 /* 504 * Calculate the time between. 505 */ 506 lentim = ETHERNET_SEGMENT_SIZE * USECS_IN_SECOND; 507 res = lentim / rs->rs_rlt[i].rate; 508 if (res > 0) 509 rs->rs_rlt[i].time_between = res; 510 else 511 rs->rs_rlt[i].time_between = 1; 512 if (rs->rs_flags & RS_NO_PRE) { 513 rs->rs_rlt[i].flags = HDWRPACE_INITED; 514 rs->rs_lowest_valid = i; 515 } else { 516 int err; 517 #ifdef RSS 518 hash_type = M_HASHTYPE_RSS_TCP_IPV4; 519 #else 520 hash_type = M_HASHTYPE_OPAQUE_HASH; 521 #endif 522 err = rl_attach_txrtlmt(ifp, 523 hash_type, 524 (i + 1), 525 rs->rs_rlt[i].rate, 526 &rs->rs_rlt[i].tag); 527 if (err) { 528 if (i == (rs->rs_rate_cnt - 1)) { 529 /* 530 * Huh - first rate and we can't get 531 * it? 532 */ 533 free(rs->rs_rlt, M_TCPPACE); 534 if (error) 535 *error = err; 536 goto bail; 537 } else { 538 if (error) 539 *error = err; 540 } 541 break; 542 } else { 543 rs->rs_rlt[i].flags = HDWRPACE_INITED | HDWRPACE_TAGPRESENT; 544 rs->rs_lowest_valid = i; 545 } 546 } 547 } 548 /* Did we get at least 1 rate? */ 549 if (rs->rs_rlt[(rs->rs_rate_cnt - 1)].flags & HDWRPACE_INITED) 550 rs->rs_highest_valid = rs->rs_rate_cnt - 1; 551 else { 552 free(rs->rs_rlt, M_TCPPACE); 553 goto bail; 554 } 555 rs_number_alive++; 556 sysctl_ctx_init(&rs->sysctl_ctx); 557 rl_sysctl_root = SYSCTL_ADD_NODE(&rs->sysctl_ctx, 558 SYSCTL_STATIC_CHILDREN(_net_inet_tcp_rl), 559 OID_AUTO, 560 rs->rs_ifp->if_xname, 561 CTLFLAG_RW, 0, 562 ""); 563 rl_add_syctl_entries(rl_sysctl_root, rs); 564 mtx_lock(&rs_mtx); 565 CK_LIST_INSERT_HEAD(&int_rs, rs, next); 566 mtx_unlock(&rs_mtx); 567 return (rs); 568 } 569 570 static const struct tcp_hwrate_limit_table * 571 tcp_int_find_suitable_rate(const struct tcp_rate_set *rs, 572 uint64_t bytes_per_sec, uint32_t flags) 573 { 574 struct tcp_hwrate_limit_table *arte = NULL, *rte = NULL; 575 uint64_t mbits_per_sec, ind_calc; 576 int i; 577 578 mbits_per_sec = (bytes_per_sec * 8); 579 if (flags & RS_PACING_LT) { 580 if ((mbits_per_sec < RS_ONE_MEGABIT_PERSEC) && 581 (rs->rs_lowest_valid <= 2)){ 582 /* 583 * Smaller than 1Meg, only 584 * 3 entries can match it. 585 */ 586 for(i = rs->rs_lowest_valid; i < 3; i++) { 587 if (bytes_per_sec <= rs->rs_rlt[i].rate) { 588 rte = &rs->rs_rlt[i]; 589 break; 590 } else if (rs->rs_rlt[i].flags & HDWRPACE_INITED) { 591 arte = &rs->rs_rlt[i]; 592 } 593 } 594 goto done; 595 } else if ((mbits_per_sec > RS_ONE_GIGABIT_PERSEC) && 596 (rs->rs_rlt[(ALL_HARDWARE_RATES-1)].flags & HDWRPACE_INITED)){ 597 /* 598 * Larger than 1G (the majority of 599 * our table. 600 */ 601 if (mbits_per_sec < RS_TEN_GIGABIT_PERSEC) 602 rte = &rs->rs_rlt[(ALL_HARDWARE_RATES-1)]; 603 else 604 arte = &rs->rs_rlt[(ALL_HARDWARE_RATES-1)]; 605 goto done; 606 } 607 /* 608 * If we reach here its in our table (between 1Meg - 1000Meg), 609 * just take the rounded down mbits per second, and add 610 * 1Megabit to it, from this we can calculate 611 * the index in the table. 612 */ 613 ind_calc = mbits_per_sec/RS_ONE_MEGABIT_PERSEC; 614 if ((ind_calc * RS_ONE_MEGABIT_PERSEC) != mbits_per_sec) 615 ind_calc++; 616 /* our table is offset by 3, we add 2 */ 617 ind_calc += 2; 618 if (ind_calc > (ALL_HARDWARE_RATES-1)) { 619 /* This should not happen */ 620 ind_calc = ALL_HARDWARE_RATES-1; 621 } 622 if ((ind_calc >= rs->rs_lowest_valid) && 623 (ind_calc <= rs->rs_highest_valid)) 624 rte = &rs->rs_rlt[ind_calc]; 625 } else if (flags & RS_PACING_EXACT_MATCH) { 626 if ((mbits_per_sec < RS_ONE_MEGABIT_PERSEC) && 627 (rs->rs_lowest_valid <= 2)){ 628 for(i = rs->rs_lowest_valid; i < 3; i++) { 629 if (bytes_per_sec == rs->rs_rlt[i].rate) { 630 rte = &rs->rs_rlt[i]; 631 break; 632 } 633 } 634 } else if ((mbits_per_sec > RS_ONE_GIGABIT_PERSEC) && 635 (rs->rs_rlt[(ALL_HARDWARE_RATES-1)].flags & HDWRPACE_INITED)) { 636 /* > 1Gbps only one rate */ 637 if (bytes_per_sec == rs->rs_rlt[(ALL_HARDWARE_RATES-1)].rate) { 638 /* Its 10G wow */ 639 rte = &rs->rs_rlt[(ALL_HARDWARE_RATES-1)]; 640 } 641 } else { 642 /* Ok it must be a exact meg (its between 1G and 1Meg) */ 643 ind_calc = mbits_per_sec/RS_ONE_MEGABIT_PERSEC; 644 if ((ind_calc * RS_ONE_MEGABIT_PERSEC) == mbits_per_sec) { 645 /* its an exact Mbps */ 646 ind_calc += 2; 647 if (ind_calc > (ALL_HARDWARE_RATES-1)) { 648 /* This should not happen */ 649 ind_calc = ALL_HARDWARE_RATES-1; 650 } 651 if (rs->rs_rlt[ind_calc].flags & HDWRPACE_INITED) 652 rte = &rs->rs_rlt[ind_calc]; 653 } 654 } 655 } else { 656 /* we want greater than the requested rate */ 657 if ((mbits_per_sec < RS_ONE_MEGABIT_PERSEC) && 658 (rs->rs_lowest_valid <= 2)){ 659 arte = &rs->rs_rlt[3]; /* set alternate to 1Meg */ 660 for (i=2; i>=rs->rs_lowest_valid; i--) { 661 if (bytes_per_sec < rs->rs_rlt[i].rate) { 662 rte = &rs->rs_rlt[i]; 663 break; 664 } else if ((flags & RS_PACING_GEQ) && 665 (bytes_per_sec == rs->rs_rlt[i].rate)) { 666 rte = &rs->rs_rlt[i]; 667 break; 668 } else { 669 arte = &rs->rs_rlt[i]; /* new alternate */ 670 } 671 } 672 } else if (mbits_per_sec > RS_ONE_GIGABIT_PERSEC) { 673 if ((bytes_per_sec < rs->rs_rlt[(ALL_HARDWARE_RATES-1)].rate) && 674 (rs->rs_rlt[(ALL_HARDWARE_RATES-1)].flags & HDWRPACE_INITED)){ 675 /* Our top rate is larger than the request */ 676 rte = &rs->rs_rlt[(ALL_HARDWARE_RATES-1)]; 677 } else if ((flags & RS_PACING_GEQ) && 678 (bytes_per_sec == rs->rs_rlt[(ALL_HARDWARE_RATES-1)].rate) && 679 (rs->rs_rlt[(ALL_HARDWARE_RATES-1)].flags & HDWRPACE_INITED)) { 680 /* It matches our top rate */ 681 rte = &rs->rs_rlt[(ALL_HARDWARE_RATES-1)]; 682 } else if (rs->rs_rlt[(ALL_HARDWARE_RATES-1)].flags & HDWRPACE_INITED) { 683 /* The top rate is an alternative */ 684 arte = &rs->rs_rlt[(ALL_HARDWARE_RATES-1)]; 685 } 686 } else { 687 /* Its in our range 1Meg - 1Gig */ 688 if (flags & RS_PACING_GEQ) { 689 ind_calc = mbits_per_sec/RS_ONE_MEGABIT_PERSEC; 690 if ((ind_calc * RS_ONE_MEGABIT_PERSEC) == mbits_per_sec) { 691 if (ind_calc > (ALL_HARDWARE_RATES-1)) { 692 /* This should not happen */ 693 ind_calc = (ALL_HARDWARE_RATES-1); 694 } 695 rte = &rs->rs_rlt[ind_calc]; 696 } 697 goto done; 698 } 699 ind_calc = (mbits_per_sec + (RS_ONE_MEGABIT_PERSEC-1))/RS_ONE_MEGABIT_PERSEC; 700 ind_calc += 2; 701 if (ind_calc > (ALL_HARDWARE_RATES-1)) { 702 /* This should not happen */ 703 ind_calc = ALL_HARDWARE_RATES-1; 704 } 705 if (rs->rs_rlt[ind_calc].flags & HDWRPACE_INITED) 706 rte = &rs->rs_rlt[ind_calc]; 707 } 708 } 709 done: 710 if ((rte == NULL) && 711 (arte != NULL) && 712 (flags & RS_PACING_SUB_OK)) { 713 /* We can use the substitute */ 714 rte = arte; 715 } 716 return (rte); 717 } 718 719 static const struct tcp_hwrate_limit_table * 720 tcp_find_suitable_rate(const struct tcp_rate_set *rs, uint64_t bytes_per_sec, uint32_t flags) 721 { 722 /** 723 * Hunt the rate table with the restrictions in flags and find a 724 * suitable rate if possible. 725 * RS_PACING_EXACT_MATCH - look for an exact match to rate. 726 * RS_PACING_GT - must be greater than. 727 * RS_PACING_GEQ - must be greater than or equal. 728 * RS_PACING_LT - must be less than. 729 * RS_PACING_SUB_OK - If we don't meet criteria a 730 * substitute is ok. 731 */ 732 int i, matched; 733 struct tcp_hwrate_limit_table *rte = NULL; 734 735 736 if ((rs->rs_flags & RS_INT_TBL) && 737 (rs->rs_rate_cnt >= ALL_HARDWARE_RATES)) { 738 /* 739 * Here we don't want to paw thru 740 * a big table, we have everything 741 * from 1Meg - 1000Meg in 1Meg increments. 742 * Use an alternate method to "lookup". 743 */ 744 return (tcp_int_find_suitable_rate(rs, bytes_per_sec, flags)); 745 } 746 if ((flags & RS_PACING_LT) || 747 (flags & RS_PACING_EXACT_MATCH)) { 748 /* 749 * For exact and less than we go forward through the table. 750 * This way when we find one larger we stop (exact was a 751 * toss up). 752 */ 753 for (i = rs->rs_lowest_valid, matched = 0; i <= rs->rs_highest_valid; i++) { 754 if ((flags & RS_PACING_EXACT_MATCH) && 755 (bytes_per_sec == rs->rs_rlt[i].rate)) { 756 rte = &rs->rs_rlt[i]; 757 matched = 1; 758 break; 759 } else if ((flags & RS_PACING_LT) && 760 (bytes_per_sec <= rs->rs_rlt[i].rate)) { 761 rte = &rs->rs_rlt[i]; 762 matched = 1; 763 break; 764 } 765 if (bytes_per_sec > rs->rs_rlt[i].rate) 766 break; 767 } 768 if ((matched == 0) && 769 (flags & RS_PACING_LT) && 770 (flags & RS_PACING_SUB_OK)) { 771 /* Kick in a substitute (the lowest) */ 772 rte = &rs->rs_rlt[rs->rs_lowest_valid]; 773 } 774 } else { 775 /* 776 * Here we go backward through the table so that we can find 777 * the one greater in theory faster (but its probably a 778 * wash). 779 */ 780 for (i = rs->rs_highest_valid, matched = 0; i >= rs->rs_lowest_valid; i--) { 781 if (rs->rs_rlt[i].rate > bytes_per_sec) { 782 /* A possible candidate */ 783 rte = &rs->rs_rlt[i]; 784 } 785 if ((flags & RS_PACING_GEQ) && 786 (bytes_per_sec == rs->rs_rlt[i].rate)) { 787 /* An exact match and we want equal */ 788 matched = 1; 789 rte = &rs->rs_rlt[i]; 790 break; 791 } else if (rte) { 792 /* 793 * Found one that is larger than but don't 794 * stop, there may be a more closer match. 795 */ 796 matched = 1; 797 } 798 if (rs->rs_rlt[i].rate < bytes_per_sec) { 799 /* 800 * We found a table entry that is smaller, 801 * stop there will be none greater or equal. 802 */ 803 break; 804 } 805 } 806 if ((matched == 0) && 807 (flags & RS_PACING_SUB_OK)) { 808 /* Kick in a substitute (the highest) */ 809 rte = &rs->rs_rlt[rs->rs_highest_valid]; 810 } 811 } 812 return (rte); 813 } 814 815 static struct ifnet * 816 rt_find_real_interface(struct ifnet *ifp, struct inpcb *inp, int *error) 817 { 818 struct ifnet *tifp; 819 struct m_snd_tag *tag; 820 union if_snd_tag_alloc_params params = { 821 .rate_limit.hdr.type = IF_SND_TAG_TYPE_RATE_LIMIT, 822 .rate_limit.hdr.flowid = 1, 823 .rate_limit.max_rate = COMMON_RATE, 824 .rate_limit.flags = M_NOWAIT, 825 }; 826 int err; 827 #ifdef RSS 828 params.rate_limit.hdr.flowtype = ((inp->inp_vflag & INP_IPV6) ? 829 M_HASHTYPE_RSS_TCP_IPV6 : M_HASHTYPE_RSS_TCP_IPV4); 830 #else 831 params.rate_limit.hdr.flowtype = M_HASHTYPE_OPAQUE_HASH; 832 #endif 833 tag = NULL; 834 if (ifp->if_snd_tag_alloc) { 835 if (error) 836 *error = ENODEV; 837 return (NULL); 838 } 839 err = ifp->if_snd_tag_alloc(ifp, ¶ms, &tag); 840 if (err) { 841 /* Failed to setup a tag? */ 842 if (error) 843 *error = err; 844 return (NULL); 845 } 846 tifp = tag->ifp; 847 tifp->if_snd_tag_free(tag); 848 return (tifp); 849 } 850 851 static const struct tcp_hwrate_limit_table * 852 rt_setup_rate(struct inpcb *inp, struct ifnet *ifp, uint64_t bytes_per_sec, 853 uint32_t flags, int *error) 854 { 855 /* First lets find the interface if it exists */ 856 const struct tcp_hwrate_limit_table *rte; 857 struct tcp_rate_set *rs; 858 struct epoch_tracker et; 859 int err; 860 861 epoch_enter_preempt(net_epoch_preempt, &et); 862 use_real_interface: 863 CK_LIST_FOREACH(rs, &int_rs, next) { 864 /* 865 * Note we don't look with the lock since we either see a 866 * new entry or will get one when we try to add it. 867 */ 868 if (rs->rs_flags & RS_IS_DEAD) { 869 /* The dead are not looked at */ 870 continue; 871 } 872 if ((rs->rs_ifp == ifp) && 873 (rs->rs_if_dunit == ifp->if_dunit)) { 874 /* Ok we found it */ 875 break; 876 } 877 } 878 if ((rs == NULL) || 879 (rs->rs_flags & RS_INTF_NO_SUP) || 880 (rs->rs_flags & RS_IS_DEAD)) { 881 /* 882 * This means we got a packet *before* 883 * the IF-UP was processed below, <or> 884 * while or after we already received an interface 885 * departed event. In either case we really don't 886 * want to do anything with pacing, in 887 * the departing case the packet is not 888 * going to go very far. The new case 889 * might be arguable, but its impossible 890 * to tell from the departing case. 891 */ 892 if (rs->rs_disable && error) 893 *error = ENODEV; 894 epoch_exit_preempt(net_epoch_preempt, &et); 895 return (NULL); 896 } 897 898 if ((rs == NULL) || (rs->rs_disable != 0)) { 899 if (rs->rs_disable && error) 900 *error = ENOSPC; 901 epoch_exit_preempt(net_epoch_preempt, &et); 902 return (NULL); 903 } 904 if (rs->rs_flags & RS_IS_DEFF) { 905 /* We need to find the real interface */ 906 struct ifnet *tifp; 907 908 tifp = rt_find_real_interface(ifp, inp, error); 909 if (tifp == NULL) { 910 if (rs->rs_disable && error) 911 *error = ENOTSUP; 912 epoch_exit_preempt(net_epoch_preempt, &et); 913 return (NULL); 914 } 915 goto use_real_interface; 916 } 917 if (rs->rs_flow_limit && 918 ((rs->rs_flows_using + 1) > rs->rs_flow_limit)) { 919 if (error) 920 *error = ENOSPC; 921 epoch_exit_preempt(net_epoch_preempt, &et); 922 return (NULL); 923 } 924 rte = tcp_find_suitable_rate(rs, bytes_per_sec, flags); 925 if (rte) { 926 err = in_pcbattach_txrtlmt(inp, rs->rs_ifp, 927 inp->inp_flowtype, 928 inp->inp_flowid, 929 rte->rate, 930 &inp->inp_snd_tag); 931 if (err) { 932 /* Failed to attach */ 933 if (error) 934 *error = err; 935 rte = NULL; 936 } 937 } 938 if (rte) { 939 /* 940 * We use an atomic here for accounting so we don't have to 941 * use locks when freeing. 942 */ 943 atomic_add_64(&rs->rs_flows_using, 1); 944 } 945 epoch_exit_preempt(net_epoch_preempt, &et); 946 return (rte); 947 } 948 949 static void 950 tcp_rl_ifnet_link(void *arg __unused, struct ifnet *ifp, int link_state) 951 { 952 int error; 953 struct tcp_rate_set *rs; 954 955 if (((ifp->if_capabilities & IFCAP_TXRTLMT) == 0) || 956 (link_state != LINK_STATE_UP)) { 957 /* 958 * We only care on an interface going up that is rate-limit 959 * capable. 960 */ 961 return; 962 } 963 mtx_lock(&rs_mtx); 964 CK_LIST_FOREACH(rs, &int_rs, next) { 965 if ((rs->rs_ifp == ifp) && 966 (rs->rs_if_dunit == ifp->if_dunit)) { 967 /* We already have initialized this guy */ 968 mtx_unlock(&rs_mtx); 969 return; 970 } 971 } 972 mtx_unlock(&rs_mtx); 973 rt_setup_new_rs(ifp, &error); 974 } 975 976 static void 977 tcp_rl_ifnet_departure(void *arg __unused, struct ifnet *ifp) 978 { 979 struct tcp_rate_set *rs, *nrs; 980 struct ifnet *tifp; 981 int i; 982 983 mtx_lock(&rs_mtx); 984 CK_LIST_FOREACH_SAFE(rs, &int_rs, next, nrs) { 985 if ((rs->rs_ifp == ifp) && 986 (rs->rs_if_dunit == ifp->if_dunit)) { 987 CK_LIST_REMOVE(rs, next); 988 rs_number_alive--; 989 rs_number_dead++; 990 rs->rs_flags |= RS_IS_DEAD; 991 for (i = 0; i < rs->rs_rate_cnt; i++) { 992 if (rs->rs_rlt[i].flags & HDWRPACE_TAGPRESENT) { 993 tifp = rs->rs_rlt[i].tag->ifp; 994 in_pcbdetach_tag(tifp, rs->rs_rlt[i].tag); 995 rs->rs_rlt[i].tag = NULL; 996 } 997 rs->rs_rlt[i].flags = HDWRPACE_IFPDEPARTED; 998 } 999 if (rs->rs_flows_using == 0) { 1000 /* 1001 * No references left, so we can schedule the 1002 * destruction after the epoch (with a caveat). 1003 */ 1004 rs->rs_flags |= RS_FUNERAL_SCHD; 1005 epoch_call(net_epoch, &rs->rs_epoch_ctx, rs_destroy); 1006 } 1007 break; 1008 } 1009 } 1010 mtx_unlock(&rs_mtx); 1011 } 1012 1013 static void 1014 tcp_rl_shutdown(void *arg __unused, int howto __unused) 1015 { 1016 struct tcp_rate_set *rs, *nrs; 1017 struct ifnet *tifp; 1018 int i; 1019 1020 mtx_lock(&rs_mtx); 1021 CK_LIST_FOREACH_SAFE(rs, &int_rs, next, nrs) { 1022 CK_LIST_REMOVE(rs, next); 1023 rs_number_alive--; 1024 rs_number_dead++; 1025 rs->rs_flags |= RS_IS_DEAD; 1026 for (i = 0; i < rs->rs_rate_cnt; i++) { 1027 if (rs->rs_rlt[i].flags & HDWRPACE_TAGPRESENT) { 1028 tifp = rs->rs_rlt[i].tag->ifp; 1029 in_pcbdetach_tag(tifp, rs->rs_rlt[i].tag); 1030 rs->rs_rlt[i].tag = NULL; 1031 } 1032 rs->rs_rlt[i].flags = HDWRPACE_IFPDEPARTED; 1033 } 1034 if (rs->rs_flows_using != 0) { 1035 /* 1036 * We dont hold a reference 1037 * so we have nothing left to 1038 * do. 1039 */ 1040 } else { 1041 /* 1042 * No references left, so we can destroy it 1043 * after the epoch. 1044 */ 1045 rs->rs_flags |= RS_FUNERAL_SCHD; 1046 epoch_call(net_epoch, &rs->rs_epoch_ctx, rs_destroy); 1047 } 1048 } 1049 mtx_unlock(&rs_mtx); 1050 } 1051 1052 const struct tcp_hwrate_limit_table * 1053 tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp, 1054 uint64_t bytes_per_sec, int flags, int *error) 1055 { 1056 const struct tcp_hwrate_limit_table *rte; 1057 1058 if (tp->t_inpcb->inp_snd_tag == NULL) { 1059 /* 1060 * We are setting up a rate for the first time. 1061 */ 1062 if ((ifp->if_capabilities & IFCAP_TXRTLMT) == 0) { 1063 /* Not supported by the egress */ 1064 if (error) 1065 *error = ENODEV; 1066 return (NULL); 1067 } 1068 #ifdef KERN_TLS 1069 if (tp->t_inpcb->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) { 1070 /* 1071 * We currently can't do both TLS and hardware 1072 * pacing 1073 */ 1074 if (error) 1075 *error = EINVAL; 1076 return (NULL); 1077 } 1078 #endif 1079 rte = rt_setup_rate(tp->t_inpcb, ifp, bytes_per_sec, flags, error); 1080 } else { 1081 /* 1082 * We are modifying a rate, wrong interface? 1083 */ 1084 if (error) 1085 *error = EINVAL; 1086 rte = NULL; 1087 } 1088 return (rte); 1089 } 1090 1091 const struct tcp_hwrate_limit_table * 1092 tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table *crte, 1093 struct tcpcb *tp, struct ifnet *ifp, 1094 uint64_t bytes_per_sec, int flags, int *error) 1095 { 1096 const struct tcp_hwrate_limit_table *nrte; 1097 const struct tcp_rate_set *rs; 1098 int is_indirect = 0; 1099 int err; 1100 1101 1102 if ((tp->t_inpcb->inp_snd_tag == NULL) || 1103 (crte == NULL)) { 1104 /* Wrong interface */ 1105 if (error) 1106 *error = EINVAL; 1107 return (NULL); 1108 } 1109 rs = crte->ptbl; 1110 if ((rs->rs_flags & RS_IS_DEAD) || 1111 (crte->flags & HDWRPACE_IFPDEPARTED)) { 1112 /* Release the rate, and try anew */ 1113 re_rate: 1114 tcp_rel_pacing_rate(crte, tp); 1115 nrte = tcp_set_pacing_rate(tp, ifp, 1116 bytes_per_sec, flags, error); 1117 return (nrte); 1118 } 1119 if ((rs->rs_flags & RT_IS_INDIRECT ) == RT_IS_INDIRECT) 1120 is_indirect = 1; 1121 else 1122 is_indirect = 0; 1123 if ((is_indirect == 0) && 1124 ((ifp != rs->rs_ifp) || 1125 (ifp->if_dunit != rs->rs_if_dunit))) { 1126 /* 1127 * Something changed, the user is not pointing to the same 1128 * ifp? Maybe a route updated on this guy? 1129 */ 1130 goto re_rate; 1131 } else if (is_indirect) { 1132 /* 1133 * For indirect we have to dig in and find the real interface. 1134 */ 1135 struct ifnet *rifp; 1136 1137 rifp = rt_find_real_interface(ifp, tp->t_inpcb, error); 1138 if (rifp == NULL) { 1139 /* Can't find it? */ 1140 goto re_rate; 1141 } 1142 if ((rifp != rs->rs_ifp) || 1143 (ifp->if_dunit != rs->rs_if_dunit)) { 1144 goto re_rate; 1145 } 1146 } 1147 nrte = tcp_find_suitable_rate(rs, bytes_per_sec, flags); 1148 if (nrte == crte) { 1149 /* No change */ 1150 if (error) 1151 *error = 0; 1152 return (crte); 1153 } 1154 if (nrte == NULL) { 1155 /* Release the old rate */ 1156 tcp_rel_pacing_rate(crte, tp); 1157 return (NULL); 1158 } 1159 /* Change rates to our new entry */ 1160 err = in_pcbmodify_txrtlmt(tp->t_inpcb, nrte->rate); 1161 if (err) { 1162 if (error) 1163 *error = err; 1164 return (NULL); 1165 } 1166 if (error) 1167 *error = 0; 1168 return (nrte); 1169 } 1170 1171 void 1172 tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table *crte, struct tcpcb *tp) 1173 { 1174 const struct tcp_rate_set *crs; 1175 struct tcp_rate_set *rs; 1176 uint64_t pre; 1177 1178 crs = crte->ptbl; 1179 /* 1180 * Now we must break the const 1181 * in order to release our refcount. 1182 */ 1183 rs = __DECONST(struct tcp_rate_set *, crs); 1184 pre = atomic_fetchadd_64(&rs->rs_flows_using, -1); 1185 if (pre == 1) { 1186 mtx_lock(&rs_mtx); 1187 /* 1188 * Is it dead? 1189 */ 1190 if ((rs->rs_flags & RS_IS_DEAD) && 1191 ((rs->rs_flags & RS_FUNERAL_SCHD) == 0)){ 1192 /* 1193 * We were the last, 1194 * and a funeral is not pending, so 1195 * we must schedule it. 1196 */ 1197 rs->rs_flags |= RS_FUNERAL_SCHD; 1198 epoch_call(net_epoch, &rs->rs_epoch_ctx, rs_destroy); 1199 } 1200 mtx_unlock(&rs_mtx); 1201 } 1202 in_pcbdetach_txrtlmt(tp->t_inpcb); 1203 } 1204 1205 static eventhandler_tag rl_ifnet_departs; 1206 static eventhandler_tag rl_ifnet_arrives; 1207 static eventhandler_tag rl_shutdown_start; 1208 1209 static void 1210 tcp_rs_init(void *st __unused) 1211 { 1212 CK_LIST_INIT(&int_rs); 1213 rs_number_alive = 0; 1214 rs_number_dead = 0;; 1215 mtx_init(&rs_mtx, "tcp_rs_mtx", "rsmtx", MTX_DEF); 1216 rl_ifnet_departs = EVENTHANDLER_REGISTER(ifnet_departure_event, 1217 tcp_rl_ifnet_departure, 1218 NULL, EVENTHANDLER_PRI_ANY); 1219 rl_ifnet_arrives = EVENTHANDLER_REGISTER(ifnet_link_event, 1220 tcp_rl_ifnet_link, 1221 NULL, EVENTHANDLER_PRI_ANY); 1222 rl_shutdown_start = EVENTHANDLER_REGISTER(shutdown_pre_sync, 1223 tcp_rl_shutdown, NULL, 1224 SHUTDOWN_PRI_FIRST); 1225 printf("TCP_ratelimit: Is now initialized\n"); 1226 } 1227 1228 SYSINIT(tcp_rl_init, SI_SUB_SMP + 1, SI_ORDER_ANY, tcp_rs_init, NULL); 1229 #endif 1230