1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_route.h" 33 34 #include <sys/param.h> 35 #include <sys/eventhandler.h> 36 #include <sys/kernel.h> 37 #include <sys/sbuf.h> 38 #include <sys/lock.h> 39 #include <sys/rmlock.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/module.h> 43 #include <sys/kernel.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/sysctl.h> 49 #include <sys/syslog.h> 50 #include <sys/queue.h> 51 #include <net/vnet.h> 52 53 #include <net/if.h> 54 #include <net/if_var.h> 55 56 #include <netinet/in.h> 57 #include <netinet/in_var.h> 58 #include <netinet/ip.h> 59 #include <netinet/ip_var.h> 60 #ifdef INET6 61 #include <netinet/ip6.h> 62 #include <netinet6/ip6_var.h> 63 #endif 64 65 #include <net/route.h> 66 #include <net/route/nhop.h> 67 #include <net/route/route_ctl.h> 68 #include <net/route/route_var.h> 69 #include <net/route/fib_algo.h> 70 71 #include <machine/stdarg.h> 72 73 /* 74 * Fib lookup framework. 75 * 76 * This framework enables accelerated longest-prefix-match lookups for the 77 * routing tables by adding the ability to dynamically attach/detach lookup 78 * algorithms implementation to/from the datapath. 79 * 80 * flm - fib lookup modules - implementation of particular lookup algorithm 81 * fd - fib data - instance of an flm bound to specific routing table 82 * 83 * This file provides main framework functionality. 84 * 85 * The following are the features provided by the framework 86 * 87 * 1) nexhops abstraction -> provides transparent referencing, indexing 88 * and efficient idx->ptr mappings for nexthop and nexthop groups. 89 * 2) Routing table synchronisation 90 * 3) dataplane attachment points 91 * 4) automatic algorithm selection based on the provided preference. 92 * 93 * 94 * DATAPATH 95 * For each supported address family, there is a an allocated array of fib_dp 96 * structures, indexed by fib number. Each array entry contains callback function 97 * and its argument. This function will be called with a family-specific lookup key, 98 * scope and provided argument. This array gets re-created every time when new algo 99 * instance gets created. Please take a look at the replace_rtables_family() function 100 * for more details. 101 * 102 */ 103 104 SYSCTL_DECL(_net_route); 105 SYSCTL_NODE(_net_route, OID_AUTO, algo, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 106 "Fib algorithm lookups"); 107 108 /* Algorithm sync policy */ 109 110 /* Time interval to bucket updates */ 111 VNET_DEFINE(unsigned int, bucket_time_ms) = 50; 112 #define V_bucket_time_ms VNET(bucket_time_ms) 113 SYSCTL_UINT(_net_route_algo, OID_AUTO, bucket_time_ms, CTLFLAG_RW | CTLFLAG_VNET, 114 &VNET_NAME(bucket_time_ms), 0, "Time interval to calculate update rate"); 115 116 /* Minimum update rate to delay sync */ 117 VNET_DEFINE(unsigned int, bucket_change_threshold_rate) = 500; 118 #define V_bucket_change_threshold_rate VNET(bucket_change_threshold_rate) 119 SYSCTL_UINT(_net_route_algo, OID_AUTO, bucket_change_threshold_rate, CTLFLAG_RW | CTLFLAG_VNET, 120 &VNET_NAME(bucket_change_threshold_rate), 0, "Minimum update rate to delay sync"); 121 122 /* Max allowed delay to sync */ 123 VNET_DEFINE(unsigned int, fib_max_sync_delay_ms) = 1000; 124 #define V_fib_max_sync_delay_ms VNET(fib_max_sync_delay_ms) 125 SYSCTL_UINT(_net_route_algo, OID_AUTO, fib_max_sync_delay_ms, CTLFLAG_RW | CTLFLAG_VNET, 126 &VNET_NAME(fib_max_sync_delay_ms), 0, "Maximum time to delay sync (ms)"); 127 128 129 #ifdef INET6 130 VNET_DEFINE_STATIC(bool, algo_fixed_inet6) = false; 131 #define V_algo_fixed_inet6 VNET(algo_fixed_inet6) 132 SYSCTL_NODE(_net_route_algo, OID_AUTO, inet6, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 133 "IPv6 longest prefix match lookups"); 134 #endif 135 #ifdef INET 136 VNET_DEFINE_STATIC(bool, algo_fixed_inet) = false; 137 #define V_algo_fixed_inet VNET(algo_fixed_inet) 138 SYSCTL_NODE(_net_route_algo, OID_AUTO, inet, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 139 "IPv4 longest prefix match lookups"); 140 #endif 141 142 /* Fib instance counter */ 143 static uint32_t fib_gen = 0; 144 145 struct nhop_ref_table { 146 uint32_t count; 147 int32_t refcnt[0]; 148 }; 149 150 enum fib_callout_action { 151 FDA_NONE, /* No callout scheduled */ 152 FDA_REBUILD, /* Asks to rebuild algo instance */ 153 FDA_EVAL, /* Asks to evaluate if the current algo is still be best */ 154 FDA_BATCH, /* Asks to submit batch of updates to the algo */ 155 }; 156 157 struct fib_sync_status { 158 struct timeval diverge_time; /* ts when diverged */ 159 uint32_t num_changes; /* number of changes since sync */ 160 uint32_t bucket_changes; /* num changes within the current bucket */ 161 uint64_t bucket_id; /* 50ms bucket # */ 162 struct fib_change_queue fd_change_queue;/* list of scheduled entries */ 163 }; 164 165 /* 166 * Data structure for the fib lookup instance tied to the particular rib. 167 */ 168 struct fib_data { 169 uint32_t number_nhops; /* current # of nhops */ 170 uint8_t hit_nhops; /* true if out of nhop limit */ 171 uint8_t init_done; /* true if init is competed */ 172 uint32_t fd_dead:1; /* Scheduled for deletion */ 173 uint32_t fd_linked:1; /* true if linked */ 174 uint32_t fd_need_rebuild:1; /* true if rebuild scheduled */ 175 uint32_t fd_batch:1; /* true if batched notification scheduled */ 176 uint8_t fd_family; /* family */ 177 uint32_t fd_fibnum; /* fibnum */ 178 uint32_t fd_failed_rebuilds; /* stat: failed rebuilds */ 179 uint32_t fd_gen; /* instance gen# */ 180 struct callout fd_callout; /* rebuild callout */ 181 enum fib_callout_action fd_callout_action; /* Callout action to take */ 182 void *fd_algo_data; /* algorithm data */ 183 struct nhop_object **nh_idx; /* nhop idx->ptr array */ 184 struct nhop_ref_table *nh_ref_table; /* array with # of nhop references */ 185 struct rib_head *fd_rh; /* RIB table we're attached to */ 186 struct rib_subscription *fd_rs; /* storing table subscription */ 187 struct fib_dp fd_dp; /* fib datapath data */ 188 struct vnet *fd_vnet; /* vnet fib belongs to */ 189 struct epoch_context fd_epoch_ctx; /* epoch context for deletion */ 190 struct fib_lookup_module *fd_flm;/* pointer to the lookup module */ 191 struct fib_sync_status fd_ss; /* State relevant to the rib sync */ 192 uint32_t fd_num_changes; /* number of changes since last callout */ 193 TAILQ_ENTRY(fib_data) entries; /* list of all fds in vnet */ 194 }; 195 196 static bool rebuild_fd(struct fib_data *fd, const char *reason); 197 static bool rebuild_fd_flm(struct fib_data *fd, struct fib_lookup_module *flm_new); 198 static void handle_fd_callout(void *_data); 199 static void destroy_fd_instance_epoch(epoch_context_t ctx); 200 static enum flm_op_result attach_datapath(struct fib_data *fd); 201 static bool is_idx_free(struct fib_data *fd, uint32_t index); 202 static void set_algo_fixed(struct rib_head *rh); 203 static bool is_algo_fixed(struct rib_head *rh); 204 205 static uint32_t fib_ref_nhop(struct fib_data *fd, struct nhop_object *nh); 206 static void fib_unref_nhop(struct fib_data *fd, struct nhop_object *nh); 207 208 static struct fib_lookup_module *fib_check_best_algo(struct rib_head *rh, 209 struct fib_lookup_module *orig_flm); 210 static void fib_unref_algo(struct fib_lookup_module *flm); 211 static bool flm_error_check(const struct fib_lookup_module *flm, uint32_t fibnum); 212 213 struct mtx fib_mtx; 214 #define FIB_MOD_LOCK() mtx_lock(&fib_mtx) 215 #define FIB_MOD_UNLOCK() mtx_unlock(&fib_mtx) 216 #define FIB_MOD_LOCK_ASSERT() mtx_assert(&fib_mtx, MA_OWNED) 217 218 MTX_SYSINIT(fib_mtx, &fib_mtx, "algo list mutex", MTX_DEF); 219 220 /* Algorithm has to be this percent better than the current to switch */ 221 #define BEST_DIFF_PERCENT (5 * 256 / 100) 222 /* Schedule algo re-evaluation X seconds after a change */ 223 #define ALGO_EVAL_DELAY_MS 30000 224 /* Force algo re-evaluation after X changes */ 225 #define ALGO_EVAL_NUM_ROUTES 100 226 /* Try to setup algorithm X times */ 227 #define FIB_MAX_TRIES 32 228 /* Max amount of supported nexthops */ 229 #define FIB_MAX_NHOPS 262144 230 #define FIB_CALLOUT_DELAY_MS 50 231 232 233 /* Debug */ 234 static int flm_debug_level = LOG_NOTICE; 235 SYSCTL_INT(_net_route_algo, OID_AUTO, debug_level, CTLFLAG_RW | CTLFLAG_RWTUN, 236 &flm_debug_level, 0, "debuglevel"); 237 #define FLM_MAX_DEBUG_LEVEL LOG_DEBUG 238 #ifndef LOG_DEBUG2 239 #define LOG_DEBUG2 8 240 #endif 241 242 #define _PASS_MSG(_l) (flm_debug_level >= (_l)) 243 #define ALGO_PRINTF(_fmt, ...) printf("[fib_algo] %s: " _fmt "\n", __func__, ##__VA_ARGS__) 244 #define _ALGO_PRINTF(_fib, _fam, _aname, _gen, _func, _fmt, ...) \ 245 printf("[fib_algo] %s.%u (%s#%u) %s: " _fmt "\n",\ 246 print_family(_fam), _fib, _aname, _gen, _func, ## __VA_ARGS__) 247 #define _RH_PRINTF(_fib, _fam, _func, _fmt, ...) \ 248 printf("[fib_algo] %s.%u %s: " _fmt "\n", print_family(_fam), _fib, _func, ## __VA_ARGS__) 249 #define RH_PRINTF(_l, _rh, _fmt, ...) if (_PASS_MSG(_l)) { \ 250 _RH_PRINTF(_rh->rib_fibnum, _rh->rib_family, __func__, _fmt, ## __VA_ARGS__);\ 251 } 252 #define FD_PRINTF(_l, _fd, _fmt, ...) FD_PRINTF_##_l(_l, _fd, _fmt, ## __VA_ARGS__) 253 #define _FD_PRINTF(_l, _fd, _fmt, ...) if (_PASS_MSG(_l)) { \ 254 _ALGO_PRINTF(_fd->fd_fibnum, _fd->fd_family, _fd->fd_flm->flm_name, \ 255 _fd->fd_gen, __func__, _fmt, ## __VA_ARGS__); \ 256 } 257 #if FLM_MAX_DEBUG_LEVEL>=LOG_DEBUG2 258 #define FD_PRINTF_LOG_DEBUG2 _FD_PRINTF 259 #else 260 #define FD_PRINTF_LOG_DEBUG2(_l, _fd, _fmt, ...) 261 #endif 262 #if FLM_MAX_DEBUG_LEVEL>=LOG_DEBUG 263 #define FD_PRINTF_LOG_DEBUG _FD_PRINTF 264 #else 265 #define FD_PRINTF_LOG_DEBUG() 266 #endif 267 #if FLM_MAX_DEBUG_LEVEL>=LOG_INFO 268 #define FD_PRINTF_LOG_INFO _FD_PRINTF 269 #else 270 #define FD_PRINTF_LOG_INFO() 271 #endif 272 #define FD_PRINTF_LOG_NOTICE _FD_PRINTF 273 #define FD_PRINTF_LOG_ERR _FD_PRINTF 274 #define FD_PRINTF_LOG_WARNING _FD_PRINTF 275 276 277 /* List of all registered lookup algorithms */ 278 static TAILQ_HEAD(, fib_lookup_module) all_algo_list = TAILQ_HEAD_INITIALIZER(all_algo_list); 279 280 /* List of all fib lookup instances in the vnet */ 281 VNET_DEFINE_STATIC(TAILQ_HEAD(fib_data_head, fib_data), fib_data_list); 282 #define V_fib_data_list VNET(fib_data_list) 283 284 /* Datastructure for storing non-transient fib lookup module failures */ 285 struct fib_error { 286 int fe_family; 287 uint32_t fe_fibnum; /* failed rtable */ 288 struct fib_lookup_module *fe_flm; /* failed module */ 289 TAILQ_ENTRY(fib_error) entries;/* list of all errored entries */ 290 }; 291 VNET_DEFINE_STATIC(TAILQ_HEAD(fib_error_head, fib_error), fib_error_list); 292 #define V_fib_error_list VNET(fib_error_list) 293 294 /* Per-family array of fibnum -> {func, arg} mappings used in datapath */ 295 struct fib_dp_header { 296 struct epoch_context fdh_epoch_ctx; 297 uint32_t fdh_num_tables; 298 struct fib_dp fdh_idx[0]; 299 }; 300 301 /* 302 * Tries to add new non-transient algorithm error to the list of 303 * errors. 304 * Returns true on success. 305 */ 306 static bool 307 flm_error_add(struct fib_lookup_module *flm, uint32_t fibnum) 308 { 309 struct fib_error *fe; 310 311 fe = malloc(sizeof(struct fib_error), M_TEMP, M_NOWAIT | M_ZERO); 312 if (fe == NULL) 313 return (false); 314 fe->fe_flm = flm; 315 fe->fe_family = flm->flm_family; 316 fe->fe_fibnum = fibnum; 317 318 FIB_MOD_LOCK(); 319 /* Avoid duplicates by checking if error already exists first */ 320 if (flm_error_check(flm, fibnum)) { 321 FIB_MOD_UNLOCK(); 322 free(fe, M_TEMP); 323 return (true); 324 } 325 TAILQ_INSERT_HEAD(&V_fib_error_list, fe, entries); 326 FIB_MOD_UNLOCK(); 327 328 return (true); 329 } 330 331 /* 332 * True if non-transient error has been registered for @flm in @fibnum. 333 */ 334 static bool 335 flm_error_check(const struct fib_lookup_module *flm, uint32_t fibnum) 336 { 337 const struct fib_error *fe; 338 339 TAILQ_FOREACH(fe, &V_fib_error_list, entries) { 340 if ((fe->fe_flm == flm) && (fe->fe_fibnum == fibnum)) 341 return (true); 342 } 343 344 return (false); 345 } 346 347 /* 348 * Clear all errors of algo specified by @flm. 349 */ 350 static void 351 fib_error_clear_flm(struct fib_lookup_module *flm) 352 { 353 struct fib_error *fe, *fe_tmp; 354 355 FIB_MOD_LOCK_ASSERT(); 356 357 TAILQ_FOREACH_SAFE(fe, &V_fib_error_list, entries, fe_tmp) { 358 if (fe->fe_flm == flm) { 359 TAILQ_REMOVE(&V_fib_error_list, fe, entries); 360 free(fe, M_TEMP); 361 } 362 } 363 } 364 365 /* 366 * Clears all errors in current VNET. 367 */ 368 static void 369 fib_error_clear() 370 { 371 struct fib_error *fe, *fe_tmp; 372 373 FIB_MOD_LOCK_ASSERT(); 374 375 TAILQ_FOREACH_SAFE(fe, &V_fib_error_list, entries, fe_tmp) { 376 TAILQ_REMOVE(&V_fib_error_list, fe, entries); 377 free(fe, M_TEMP); 378 } 379 } 380 381 static const char * 382 print_op_result(enum flm_op_result result) 383 { 384 switch (result) { 385 case FLM_SUCCESS: 386 return "success"; 387 case FLM_REBUILD: 388 return "rebuild"; 389 case FLM_BATCH: 390 return "batch"; 391 case FLM_ERROR: 392 return "error"; 393 } 394 395 return "unknown"; 396 } 397 398 static const char * 399 print_family(int family) 400 { 401 402 if (family == AF_INET) 403 return ("inet"); 404 else if (family == AF_INET6) 405 return ("inet6"); 406 else 407 return ("unknown"); 408 } 409 410 /* 411 * Debug function used by lookup algorithms. 412 * Outputs message denoted by @fmt, prepended by "[fib_algo] inetX.Y (algo) " 413 */ 414 void 415 fib_printf(int level, struct fib_data *fd, const char *func, char *fmt, ...) 416 { 417 char buf[128]; 418 va_list ap; 419 420 if (level > flm_debug_level) 421 return; 422 423 va_start(ap, fmt); 424 vsnprintf(buf, sizeof(buf), fmt, ap); 425 va_end(ap); 426 427 _ALGO_PRINTF(fd->fd_fibnum, fd->fd_family, fd->fd_flm->flm_name, 428 fd->fd_gen, func, "%s", buf); 429 } 430 431 /* 432 * Outputs list of algorithms supported by the provided address family. 433 */ 434 static int 435 print_algos_sysctl(struct sysctl_req *req, int family) 436 { 437 struct fib_lookup_module *flm; 438 struct sbuf sbuf; 439 int error, count = 0; 440 441 error = sysctl_wire_old_buffer(req, 0); 442 if (error == 0) { 443 sbuf_new_for_sysctl(&sbuf, NULL, 512, req); 444 TAILQ_FOREACH(flm, &all_algo_list, entries) { 445 if (flm->flm_family == family) { 446 if (count++ > 0) 447 sbuf_cat(&sbuf, ", "); 448 sbuf_cat(&sbuf, flm->flm_name); 449 } 450 } 451 error = sbuf_finish(&sbuf); 452 sbuf_delete(&sbuf); 453 } 454 return (error); 455 } 456 457 #ifdef INET6 458 static int 459 print_algos_sysctl_inet6(SYSCTL_HANDLER_ARGS) 460 { 461 462 return (print_algos_sysctl(req, AF_INET6)); 463 } 464 SYSCTL_PROC(_net_route_algo_inet6, OID_AUTO, algo_list, 465 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 466 print_algos_sysctl_inet6, "A", "List of IPv6 lookup algorithms"); 467 #endif 468 469 #ifdef INET 470 static int 471 print_algos_sysctl_inet(SYSCTL_HANDLER_ARGS) 472 { 473 474 return (print_algos_sysctl(req, AF_INET)); 475 } 476 SYSCTL_PROC(_net_route_algo_inet, OID_AUTO, algo_list, 477 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 478 print_algos_sysctl_inet, "A", "List of IPv4 lookup algorithms"); 479 #endif 480 481 /* 482 * Calculate delay between repeated failures. 483 * Returns current delay in milliseconds. 484 */ 485 static uint32_t 486 callout_calc_delay_ms(struct fib_data *fd) 487 { 488 uint32_t shift; 489 490 if (fd->fd_failed_rebuilds > 10) 491 shift = 10; 492 else 493 shift = fd->fd_failed_rebuilds; 494 495 return ((1 << shift) * FIB_CALLOUT_DELAY_MS); 496 } 497 498 static void 499 schedule_callout(struct fib_data *fd, enum fib_callout_action action, int delay_ms) 500 { 501 502 FD_PRINTF(LOG_DEBUG, fd, "delay=%d action=%d", delay_ms, action); 503 fd->fd_callout_action = action; 504 callout_reset_sbt(&fd->fd_callout, SBT_1MS * delay_ms, 0, 505 handle_fd_callout, fd, 0); 506 } 507 508 static void 509 schedule_fd_rebuild(struct fib_data *fd, const char *reason) 510 { 511 512 RIB_WLOCK_ASSERT(fd->fd_rh); 513 514 if (!fd->fd_need_rebuild) { 515 fd->fd_need_rebuild = true; 516 /* Stop batch updates */ 517 fd->fd_batch = false; 518 519 /* 520 * Potentially re-schedules pending callout 521 * initiated by schedule_algo_eval. 522 */ 523 FD_PRINTF(LOG_INFO, fd, "Scheduling rebuild: %s (failures=%d)", 524 reason, fd->fd_failed_rebuilds); 525 schedule_callout(fd, FDA_REBUILD, callout_calc_delay_ms(fd)); 526 } 527 } 528 529 static int64_t 530 get_tv_diff_ms(const struct timeval *old_tv, const struct timeval *new_tv) 531 { 532 int64_t diff = 0; 533 534 diff = ((int64_t)(new_tv->tv_sec - old_tv->tv_sec)) * 1000; 535 diff += (new_tv->tv_usec - old_tv->tv_usec) / 1000; 536 537 return (diff); 538 } 539 540 static void 541 add_tv_diff_ms(struct timeval *tv, int ms) 542 { 543 tv->tv_sec += ms / 1000; 544 ms = ms % 1000; 545 if (ms * 1000 + tv->tv_usec < 1000000) 546 tv->tv_usec += ms * 1000; 547 else { 548 tv->tv_sec += 1; 549 tv->tv_usec = ms * 1000 + tv->tv_usec - 1000000; 550 } 551 } 552 553 /* 554 * Marks the time when algo state diverges from the rib state. 555 */ 556 static void 557 mark_diverge_time(struct fib_data *fd) 558 { 559 struct fib_sync_status *fd_ss = &fd->fd_ss; 560 561 getmicrouptime(&fd_ss->diverge_time); 562 fd_ss->bucket_id = 0; 563 fd_ss->bucket_changes = 0; 564 } 565 566 /* 567 * Calculates and updates the next algorithm sync time, based on the current activity. 568 * 569 * The intent is to provide reasonable balance between the update 570 * latency and efficient batching when changing large amount of routes. 571 * 572 * High-level algorithm looks the following: 573 * 1) all changes are bucketed in 50ms intervals 574 * 2) If amount of changes within the bucket is greater than the threshold, 575 * the update gets delayed, up to maximum delay threshold. 576 */ 577 static void 578 update_rebuild_delay(struct fib_data *fd, enum fib_callout_action action) 579 { 580 uint32_t bucket_id, new_delay = 0; 581 struct timeval tv; 582 583 /* Fetch all variables at once to ensure consistent reads */ 584 uint32_t bucket_time_ms = V_bucket_time_ms; 585 uint32_t threshold_rate = V_bucket_change_threshold_rate; 586 uint32_t max_delay_ms = V_fib_max_sync_delay_ms; 587 588 if (bucket_time_ms == 0) 589 bucket_time_ms = 50; 590 /* calculate per-bucket threshold rate */ 591 threshold_rate = threshold_rate * bucket_time_ms / 1000; 592 593 getmicrouptime(&tv); 594 595 struct fib_sync_status *fd_ss = &fd->fd_ss; 596 597 bucket_id = get_tv_diff_ms(&fd_ss->diverge_time, &tv) / bucket_time_ms; 598 599 if (fd_ss->bucket_id == bucket_id) { 600 fd_ss->bucket_changes++; 601 if (fd_ss->bucket_changes == threshold_rate) { 602 new_delay = (bucket_id + 2) * bucket_time_ms; 603 if (new_delay <= max_delay_ms) { 604 FD_PRINTF(LOG_DEBUG, fd, 605 "hit threshold of %u routes, delay update," 606 "bucket: %u, total delay: %u", 607 threshold_rate, bucket_id + 1, new_delay); 608 } else { 609 new_delay = 0; 610 FD_PRINTF(LOG_DEBUG, fd, 611 "maximum sync delay (%u ms) reached", max_delay_ms); 612 } 613 } else if ((bucket_id == 0) && (fd_ss->bucket_changes == 1)) 614 new_delay = bucket_time_ms; 615 } else { 616 fd_ss->bucket_id = bucket_id; 617 fd_ss->bucket_changes = 1; 618 } 619 620 if (new_delay > 0) { 621 /* Calculated time has been updated */ 622 struct timeval new_tv = fd_ss->diverge_time; 623 add_tv_diff_ms(&new_tv, new_delay); 624 625 int32_t delay_ms = get_tv_diff_ms(&tv, &new_tv); 626 schedule_callout(fd, action, delay_ms); 627 } 628 } 629 630 static void 631 update_algo_state(struct fib_data *fd) 632 { 633 634 RIB_WLOCK_ASSERT(fd->fd_rh); 635 636 if (fd->fd_batch || fd->fd_need_rebuild) { 637 enum fib_callout_action action = fd->fd_need_rebuild ? FDA_REBUILD : FDA_BATCH; 638 update_rebuild_delay(fd, action); 639 return; 640 } 641 642 if (fd->fd_num_changes++ == 0) { 643 /* Start callout to consider switch */ 644 if (!callout_pending(&fd->fd_callout)) 645 schedule_callout(fd, FDA_EVAL, ALGO_EVAL_DELAY_MS); 646 } else if (fd->fd_num_changes == ALGO_EVAL_NUM_ROUTES) { 647 /* Reset callout to exec immediately */ 648 if (fd->fd_callout_action == FDA_EVAL) 649 schedule_callout(fd, FDA_EVAL, 1); 650 } 651 } 652 653 static bool 654 need_immediate_sync(struct fib_data *fd, struct rib_cmd_info *rc) 655 { 656 struct nhop_object *nh; 657 658 /* Sync addition/removal of interface routes */ 659 switch (rc->rc_cmd) { 660 case RTM_ADD: 661 nh = rc->rc_nh_new; 662 if (!NH_IS_NHGRP(nh) && (!(nh->nh_flags & NHF_GATEWAY))) 663 return (true); 664 break; 665 case RTM_DELETE: 666 nh = rc->rc_nh_old; 667 if (!NH_IS_NHGRP(nh) && (!(nh->nh_flags & NHF_GATEWAY))) 668 return (true); 669 break; 670 } 671 672 return (false); 673 } 674 675 static bool 676 apply_rtable_changes(struct fib_data *fd) 677 { 678 enum flm_op_result result; 679 struct fib_change_queue *q = &fd->fd_ss.fd_change_queue; 680 681 result = fd->fd_flm->flm_change_rib_items_cb(fd->fd_rh, q, fd->fd_algo_data); 682 683 if (result == FLM_SUCCESS) { 684 for (int i = 0; i < q->count; i++) 685 if (q->entries[i].nh_old) 686 fib_unref_nhop(fd, q->entries[i].nh_old); 687 q->count = 0; 688 } 689 fd->fd_batch = false; 690 691 return (result == FLM_SUCCESS); 692 } 693 694 static bool 695 fill_change_entry(struct fib_data *fd, struct fib_change_entry *ce, struct rib_cmd_info *rc) 696 { 697 int plen = 0; 698 699 switch (fd->fd_family) { 700 case AF_INET: 701 rt_get_inet_prefix_plen(rc->rc_rt, &ce->addr4, &plen, &ce->scopeid); 702 break; 703 case AF_INET6: 704 rt_get_inet6_prefix_plen(rc->rc_rt, &ce->addr6, &plen, &ce->scopeid); 705 break; 706 } 707 708 ce->plen = plen; 709 ce->nh_old = rc->rc_nh_old; 710 ce->nh_new = rc->rc_nh_new; 711 if (ce->nh_new != NULL) { 712 if (fib_ref_nhop(fd, ce->nh_new) == 0) 713 return (false); 714 } 715 716 return (true); 717 } 718 719 static bool 720 queue_rtable_change(struct fib_data *fd, struct rib_cmd_info *rc) 721 { 722 struct fib_change_queue *q = &fd->fd_ss.fd_change_queue; 723 724 if (q->count >= q->size) { 725 uint32_t q_size; 726 727 if (q->size == 0) 728 q_size = 256; /* ~18k memory */ 729 else 730 q_size = q->size * 2; 731 732 size_t size = q_size * sizeof(struct fib_change_entry); 733 void *a = realloc(q->entries, size, M_TEMP, M_NOWAIT | M_ZERO); 734 if (a == NULL) { 735 FD_PRINTF(LOG_INFO, fd, "Unable to realloc queue for %u elements", 736 q_size); 737 return (false); 738 } 739 q->entries = a; 740 q->size = q_size; 741 } 742 743 return (fill_change_entry(fd, &q->entries[q->count++], rc)); 744 } 745 746 /* 747 * Rib subscription handler. Checks if the algorithm is ready to 748 * receive updates, handles nexthop refcounting and passes change 749 * data to the algorithm callback. 750 */ 751 static void 752 handle_rtable_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc, 753 void *_data) 754 { 755 struct fib_data *fd = (struct fib_data *)_data; 756 enum flm_op_result result; 757 758 RIB_WLOCK_ASSERT(rnh); 759 760 /* 761 * There is a small gap between subscribing for route changes 762 * and initiating rtable dump. Avoid receiving route changes 763 * prior to finishing rtable dump by checking `init_done`. 764 */ 765 if (!fd->init_done) 766 return; 767 768 bool immediate_sync = need_immediate_sync(fd, rc); 769 770 /* Consider scheduling algorithm re-evaluation */ 771 update_algo_state(fd); 772 773 /* 774 * If algo requested rebuild, stop sending updates by default. 775 * This simplifies nexthop refcount handling logic. 776 */ 777 if (fd->fd_need_rebuild) { 778 if (immediate_sync) 779 rebuild_fd(fd, "rtable change type enforced sync"); 780 return; 781 } 782 783 /* 784 * Algo requested updates to be delivered in batches. 785 * Add the current change to the queue and return. 786 */ 787 if (fd->fd_batch) { 788 if (immediate_sync) { 789 if (!queue_rtable_change(fd, rc) || !apply_rtable_changes(fd)) 790 rebuild_fd(fd, "batch sync failed"); 791 } else { 792 if (!queue_rtable_change(fd, rc)) 793 schedule_fd_rebuild(fd, "batch queue failed"); 794 } 795 return; 796 } 797 798 /* 799 * Maintain guarantee that every nexthop returned by the dataplane 800 * lookup has > 0 refcount, so can be safely referenced within current 801 * epoch. 802 */ 803 if (rc->rc_nh_new != NULL) { 804 if (fib_ref_nhop(fd, rc->rc_nh_new) == 0) { 805 /* ran out of indexes */ 806 schedule_fd_rebuild(fd, "ran out of nhop indexes"); 807 return; 808 } 809 } 810 811 result = fd->fd_flm->flm_change_rib_item_cb(rnh, rc, fd->fd_algo_data); 812 813 switch (result) { 814 case FLM_SUCCESS: 815 /* Unref old nexthop on success */ 816 if (rc->rc_nh_old != NULL) 817 fib_unref_nhop(fd, rc->rc_nh_old); 818 break; 819 case FLM_BATCH: 820 821 /* 822 * Algo asks to batch the changes. 823 */ 824 if (queue_rtable_change(fd, rc)) { 825 if (!immediate_sync) { 826 fd->fd_batch = true; 827 mark_diverge_time(fd); 828 update_rebuild_delay(fd, FDA_BATCH); 829 break; 830 } 831 if (apply_rtable_changes(fd)) 832 break; 833 } 834 FD_PRINTF(LOG_ERR, fd, "batched sync failed, force the rebuild"); 835 836 case FLM_REBUILD: 837 838 /* 839 * Algo is not able to apply the update. 840 * Schedule algo rebuild. 841 */ 842 if (!immediate_sync) { 843 mark_diverge_time(fd); 844 schedule_fd_rebuild(fd, "algo requested rebuild"); 845 break; 846 } 847 848 FD_PRINTF(LOG_INFO, fd, "running sync rebuild"); 849 rebuild_fd(fd, "rtable change type enforced sync"); 850 break; 851 case FLM_ERROR: 852 853 /* 854 * Algo reported a non-recoverable error. 855 * Record the error and schedule rebuild, which will 856 * trigger best algo selection. 857 */ 858 FD_PRINTF(LOG_ERR, fd, "algo reported non-recoverable error"); 859 if (!flm_error_add(fd->fd_flm, fd->fd_fibnum)) 860 FD_PRINTF(LOG_ERR, fd, "failed to ban algo"); 861 schedule_fd_rebuild(fd, "algo reported non-recoverable error"); 862 } 863 } 864 865 static void 866 estimate_nhop_scale(const struct fib_data *old_fd, struct fib_data *fd) 867 { 868 869 if (old_fd == NULL) { 870 // TODO: read from rtable 871 fd->number_nhops = 16; 872 return; 873 } 874 875 if (old_fd->hit_nhops && old_fd->number_nhops < FIB_MAX_NHOPS) 876 fd->number_nhops = 2 * old_fd->number_nhops; 877 else 878 fd->number_nhops = old_fd->number_nhops; 879 } 880 881 struct walk_cbdata { 882 struct fib_data *fd; 883 flm_dump_t *func; 884 enum flm_op_result result; 885 }; 886 887 /* 888 * Handler called after all rtenties have been dumped. 889 * Performs post-dump framework checks and calls 890 * algo:flm_dump_end_cb(). 891 * 892 * Updates walk_cbdata result. 893 */ 894 static void 895 sync_algo_end_cb(struct rib_head *rnh, enum rib_walk_hook stage, void *_data) 896 { 897 struct walk_cbdata *w = (struct walk_cbdata *)_data; 898 struct fib_data *fd = w->fd; 899 900 RIB_WLOCK_ASSERT(w->fd->fd_rh); 901 902 if (rnh->rib_dying) { 903 w->result = FLM_ERROR; 904 return; 905 } 906 907 if (fd->hit_nhops) { 908 FD_PRINTF(LOG_INFO, fd, "ran out of nexthops at %u nhops", 909 fd->nh_ref_table->count); 910 if (w->result == FLM_SUCCESS) 911 w->result = FLM_REBUILD; 912 return; 913 } 914 915 if (stage != RIB_WALK_HOOK_POST || w->result != FLM_SUCCESS) 916 return; 917 918 /* Post-dump hook, dump successful */ 919 w->result = fd->fd_flm->flm_dump_end_cb(fd->fd_algo_data, &fd->fd_dp); 920 921 if (w->result == FLM_SUCCESS) { 922 /* Mark init as done to allow routing updates */ 923 fd->init_done = 1; 924 } 925 } 926 927 /* 928 * Callback for each entry in rib. 929 * Calls algo:flm_dump_rib_item_cb func as a part of initial 930 * route table synchronisation. 931 */ 932 static int 933 sync_algo_cb(struct rtentry *rt, void *_data) 934 { 935 struct walk_cbdata *w = (struct walk_cbdata *)_data; 936 937 RIB_WLOCK_ASSERT(w->fd->fd_rh); 938 939 if (w->result == FLM_SUCCESS && w->func) { 940 941 /* 942 * Reference nexthops to maintain guarantee that 943 * each nexthop returned by datapath has > 0 references 944 * and can be safely referenced within current epoch. 945 */ 946 struct nhop_object *nh = rt_get_raw_nhop(rt); 947 if (fib_ref_nhop(w->fd, nh) != 0) 948 w->result = w->func(rt, w->fd->fd_algo_data); 949 else 950 w->result = FLM_REBUILD; 951 } 952 953 return (0); 954 } 955 956 /* 957 * Dump all routing table state to the algo instance. 958 */ 959 static enum flm_op_result 960 sync_algo(struct fib_data *fd) 961 { 962 struct walk_cbdata w = { 963 .fd = fd, 964 .func = fd->fd_flm->flm_dump_rib_item_cb, 965 .result = FLM_SUCCESS, 966 }; 967 968 rib_walk_ext_locked(fd->fd_rh, sync_algo_cb, sync_algo_end_cb, &w); 969 970 FD_PRINTF(LOG_INFO, fd, 971 "initial dump completed (rtable version: %d), result: %s", 972 fd->fd_rh->rnh_gen, print_op_result(w.result)); 973 974 return (w.result); 975 } 976 977 /* 978 * Schedules epoch-backed @fd instance deletion. 979 * * Unlinks @fd from the list of active algo instances. 980 * * Removes rib subscription. 981 * * Stops callout. 982 * * Schedules actual deletion. 983 * 984 * Assume @fd is already unlinked from the datapath. 985 */ 986 static int 987 schedule_destroy_fd_instance(struct fib_data *fd, bool in_callout) 988 { 989 bool is_dead; 990 991 NET_EPOCH_ASSERT(); 992 RIB_WLOCK_ASSERT(fd->fd_rh); 993 994 FIB_MOD_LOCK(); 995 is_dead = fd->fd_dead; 996 if (!is_dead) 997 fd->fd_dead = true; 998 if (fd->fd_linked) { 999 TAILQ_REMOVE(&V_fib_data_list, fd, entries); 1000 fd->fd_linked = false; 1001 } 1002 FIB_MOD_UNLOCK(); 1003 if (is_dead) 1004 return (0); 1005 1006 FD_PRINTF(LOG_INFO, fd, "DETACH"); 1007 1008 if (fd->fd_rs != NULL) 1009 rib_unsibscribe_locked(fd->fd_rs); 1010 1011 /* 1012 * After rib_unsubscribe() no _new_ handle_rtable_change_cb() calls 1013 * will be executed, hence no _new_ callout schedules will happen. 1014 */ 1015 callout_stop(&fd->fd_callout); 1016 1017 epoch_call(net_epoch_preempt, destroy_fd_instance_epoch, 1018 &fd->fd_epoch_ctx); 1019 1020 return (0); 1021 } 1022 1023 /* 1024 * Wipe all fd instances from the list matching rib specified by @rh. 1025 * If @keep_first is set, remove all but the first record. 1026 */ 1027 static void 1028 fib_cleanup_algo(struct rib_head *rh, bool keep_first, bool in_callout) 1029 { 1030 struct fib_data_head tmp_head = TAILQ_HEAD_INITIALIZER(tmp_head); 1031 struct fib_data *fd, *fd_tmp; 1032 struct epoch_tracker et; 1033 1034 FIB_MOD_LOCK(); 1035 TAILQ_FOREACH_SAFE(fd, &V_fib_data_list, entries, fd_tmp) { 1036 if (fd->fd_rh == rh) { 1037 if (keep_first) { 1038 keep_first = false; 1039 continue; 1040 } 1041 TAILQ_REMOVE(&V_fib_data_list, fd, entries); 1042 fd->fd_linked = false; 1043 TAILQ_INSERT_TAIL(&tmp_head, fd, entries); 1044 } 1045 } 1046 FIB_MOD_UNLOCK(); 1047 1048 /* Pass 2: remove each entry */ 1049 NET_EPOCH_ENTER(et); 1050 TAILQ_FOREACH_SAFE(fd, &tmp_head, entries, fd_tmp) { 1051 if (!in_callout) 1052 RIB_WLOCK(fd->fd_rh); 1053 schedule_destroy_fd_instance(fd, in_callout); 1054 if (!in_callout) 1055 RIB_WUNLOCK(fd->fd_rh); 1056 } 1057 NET_EPOCH_EXIT(et); 1058 } 1059 1060 void 1061 fib_destroy_rib(struct rib_head *rh) 1062 { 1063 1064 /* 1065 * rnh has `is_dying` flag set, so setup of new fd's will fail at 1066 * sync_algo() stage, preventing new entries to be added to the list 1067 * of active algos. Remove all existing entries for the particular rib. 1068 */ 1069 fib_cleanup_algo(rh, false, false); 1070 } 1071 1072 /* 1073 * Finalises fd destruction by freeing all fd resources. 1074 */ 1075 static void 1076 destroy_fd_instance(struct fib_data *fd) 1077 { 1078 1079 FD_PRINTF(LOG_INFO, fd, "destroy fd %p", fd); 1080 1081 /* Call destroy callback first */ 1082 if (fd->fd_algo_data != NULL) 1083 fd->fd_flm->flm_destroy_cb(fd->fd_algo_data); 1084 1085 /* Nhop table */ 1086 if ((fd->nh_idx != NULL) && (fd->nh_ref_table != NULL)) { 1087 for (int i = 0; i < fd->number_nhops; i++) { 1088 if (!is_idx_free(fd, i)) { 1089 FD_PRINTF(LOG_DEBUG2, fd, " FREE nhop %d %p", 1090 i, fd->nh_idx[i]); 1091 nhop_free_any(fd->nh_idx[i]); 1092 } 1093 } 1094 free(fd->nh_idx, M_RTABLE); 1095 } 1096 if (fd->nh_ref_table != NULL) 1097 free(fd->nh_ref_table, M_RTABLE); 1098 1099 if (fd->fd_ss.fd_change_queue.entries != NULL) 1100 free(fd->fd_ss.fd_change_queue.entries, M_TEMP); 1101 1102 fib_unref_algo(fd->fd_flm); 1103 1104 free(fd, M_RTABLE); 1105 } 1106 1107 /* 1108 * Epoch callback indicating fd is safe to destroy 1109 */ 1110 static void 1111 destroy_fd_instance_epoch(epoch_context_t ctx) 1112 { 1113 struct fib_data *fd; 1114 1115 fd = __containerof(ctx, struct fib_data, fd_epoch_ctx); 1116 1117 destroy_fd_instance(fd); 1118 } 1119 1120 /* 1121 * Tries to setup fd instance. 1122 * - Allocates fd/nhop table 1123 * - Runs algo:flm_init_cb algo init 1124 * - Subscribes fd to the rib 1125 * - Runs rtable dump 1126 * - Adds instance to the list of active instances. 1127 * 1128 * Returns: operation result. Fills in @pfd with resulting fd on success. 1129 * 1130 */ 1131 static enum flm_op_result 1132 try_setup_fd_instance(struct fib_lookup_module *flm, struct rib_head *rh, 1133 struct fib_data *old_fd, struct fib_data **pfd) 1134 { 1135 struct fib_data *fd; 1136 size_t size; 1137 enum flm_op_result result; 1138 1139 /* Allocate */ 1140 fd = malloc(sizeof(struct fib_data), M_RTABLE, M_NOWAIT | M_ZERO); 1141 if (fd == NULL) { 1142 *pfd = NULL; 1143 RH_PRINTF(LOG_INFO, rh, "Unable to allocate fib_data structure"); 1144 return (FLM_REBUILD); 1145 } 1146 *pfd = fd; 1147 1148 estimate_nhop_scale(old_fd, fd); 1149 1150 fd->fd_rh = rh; 1151 fd->fd_gen = ++fib_gen; 1152 fd->fd_family = rh->rib_family; 1153 fd->fd_fibnum = rh->rib_fibnum; 1154 callout_init_rm(&fd->fd_callout, &rh->rib_lock, 0); 1155 fd->fd_vnet = curvnet; 1156 fd->fd_flm = flm; 1157 1158 FD_PRINTF(LOG_DEBUG, fd, "allocated fd %p", fd); 1159 1160 FIB_MOD_LOCK(); 1161 flm->flm_refcount++; 1162 FIB_MOD_UNLOCK(); 1163 1164 /* Allocate nhidx -> nhop_ptr table */ 1165 size = fd->number_nhops * sizeof(void *); 1166 fd->nh_idx = malloc(size, M_RTABLE, M_NOWAIT | M_ZERO); 1167 if (fd->nh_idx == NULL) { 1168 FD_PRINTF(LOG_INFO, fd, "Unable to allocate nhop table idx (sz:%zu)", size); 1169 return (FLM_REBUILD); 1170 } 1171 1172 /* Allocate nhop index refcount table */ 1173 size = sizeof(struct nhop_ref_table); 1174 size += fd->number_nhops * sizeof(uint32_t); 1175 fd->nh_ref_table = malloc(size, M_RTABLE, M_NOWAIT | M_ZERO); 1176 if (fd->nh_ref_table == NULL) { 1177 FD_PRINTF(LOG_INFO, fd, "Unable to allocate nhop refcount table (sz:%zu)", size); 1178 return (FLM_REBUILD); 1179 } 1180 FD_PRINTF(LOG_DEBUG, fd, "Allocated %u nhop indexes", fd->number_nhops); 1181 1182 /* Okay, we're ready for algo init */ 1183 void *old_algo_data = (old_fd != NULL) ? old_fd->fd_algo_data : NULL; 1184 result = flm->flm_init_cb(fd->fd_fibnum, fd, old_algo_data, &fd->fd_algo_data); 1185 if (result != FLM_SUCCESS) { 1186 FD_PRINTF(LOG_INFO, fd, "%s algo init failed", flm->flm_name); 1187 return (result); 1188 } 1189 1190 /* Try to subscribe */ 1191 if (flm->flm_change_rib_item_cb != NULL) { 1192 fd->fd_rs = rib_subscribe_locked(fd->fd_rh, 1193 handle_rtable_change_cb, fd, RIB_NOTIFY_IMMEDIATE); 1194 if (fd->fd_rs == NULL) { 1195 FD_PRINTF(LOG_INFO, fd, "failed to subscribe to the rib changes"); 1196 return (FLM_REBUILD); 1197 } 1198 } 1199 1200 /* Dump */ 1201 result = sync_algo(fd); 1202 if (result != FLM_SUCCESS) { 1203 FD_PRINTF(LOG_INFO, fd, "rib sync failed"); 1204 return (result); 1205 } 1206 FD_PRINTF(LOG_INFO, fd, "DUMP completed successfully."); 1207 1208 FIB_MOD_LOCK(); 1209 /* 1210 * Insert fd in the beginning of a list, to maintain invariant 1211 * that first matching entry for the AF/fib is always the active 1212 * one. 1213 */ 1214 TAILQ_INSERT_HEAD(&V_fib_data_list, fd, entries); 1215 fd->fd_linked = true; 1216 FIB_MOD_UNLOCK(); 1217 1218 return (FLM_SUCCESS); 1219 } 1220 1221 /* 1222 * Sets up algo @flm for table @rh and links it to the datapath. 1223 * 1224 */ 1225 static enum flm_op_result 1226 setup_fd_instance(struct fib_lookup_module *flm, struct rib_head *rh, 1227 struct fib_data *orig_fd, struct fib_data **pfd, bool attach) 1228 { 1229 struct fib_data *prev_fd, *new_fd; 1230 enum flm_op_result result; 1231 1232 NET_EPOCH_ASSERT(); 1233 RIB_WLOCK_ASSERT(rh); 1234 1235 prev_fd = orig_fd; 1236 new_fd = NULL; 1237 for (int i = 0; i < FIB_MAX_TRIES; i++) { 1238 result = try_setup_fd_instance(flm, rh, prev_fd, &new_fd); 1239 1240 if ((result == FLM_SUCCESS) && attach) 1241 result = attach_datapath(new_fd); 1242 1243 if ((prev_fd != NULL) && (prev_fd != orig_fd)) { 1244 schedule_destroy_fd_instance(prev_fd, false); 1245 prev_fd = NULL; 1246 } 1247 1248 RH_PRINTF(LOG_INFO, rh, "try %d: fib algo result: %s", i, 1249 print_op_result(result)); 1250 1251 if (result == FLM_REBUILD) { 1252 prev_fd = new_fd; 1253 new_fd = NULL; 1254 continue; 1255 } 1256 1257 break; 1258 } 1259 1260 if (result != FLM_SUCCESS) { 1261 RH_PRINTF(LOG_WARNING, rh, 1262 "%s algo instance setup failed, failures=%d", flm->flm_name, 1263 orig_fd ? orig_fd->fd_failed_rebuilds + 1 : 0); 1264 /* update failure count */ 1265 FIB_MOD_LOCK(); 1266 if (orig_fd != NULL) 1267 orig_fd->fd_failed_rebuilds++; 1268 FIB_MOD_UNLOCK(); 1269 1270 /* Ban algo on non-recoverable error */ 1271 if (result == FLM_ERROR) 1272 flm_error_add(flm, rh->rib_fibnum); 1273 1274 if ((prev_fd != NULL) && (prev_fd != orig_fd)) 1275 schedule_destroy_fd_instance(prev_fd, false); 1276 if (new_fd != NULL) { 1277 schedule_destroy_fd_instance(new_fd, false); 1278 new_fd = NULL; 1279 } 1280 } 1281 1282 *pfd = new_fd; 1283 return (result); 1284 } 1285 1286 /* 1287 * Tries to sync algo with the current rtable state, either 1288 * by executing batch update or rebuilding. 1289 * Returns true on success. 1290 */ 1291 static bool 1292 execute_callout_action(struct fib_data *fd) 1293 { 1294 enum fib_callout_action action = fd->fd_callout_action; 1295 struct fib_lookup_module *flm_new = NULL; 1296 bool result = true; 1297 1298 NET_EPOCH_ASSERT(); 1299 RIB_WLOCK_ASSERT(fd->fd_rh); 1300 1301 fd->fd_need_rebuild = false; 1302 fd->fd_batch = false; 1303 fd->fd_num_changes = 0; 1304 1305 /* First, check if we're still OK to use this algo */ 1306 if (!is_algo_fixed(fd->fd_rh)) 1307 flm_new = fib_check_best_algo(fd->fd_rh, fd->fd_flm); 1308 if (flm_new != NULL) 1309 action = FDA_REBUILD; 1310 1311 if (action == FDA_BATCH) { 1312 /* Try to sync */ 1313 if (!apply_rtable_changes(fd)) 1314 action = FDA_REBUILD; 1315 } 1316 1317 if (action == FDA_REBUILD) 1318 result = rebuild_fd_flm(fd, flm_new != NULL ? flm_new : fd->fd_flm); 1319 if (flm_new != NULL) 1320 fib_unref_algo(flm_new); 1321 1322 return (result); 1323 } 1324 1325 /* 1326 * Callout for all scheduled fd-related work. 1327 * - Checks if the current algo is still the best algo 1328 * - Synchronises algo instance to the rtable (batch usecase) 1329 * - Creates a new instance of an algo for af/fib if desired. 1330 */ 1331 static void 1332 handle_fd_callout(void *_data) 1333 { 1334 struct fib_data *fd = (struct fib_data *)_data; 1335 struct epoch_tracker et; 1336 1337 FD_PRINTF(LOG_INFO, fd, "running callout type=%d", fd->fd_callout_action); 1338 1339 NET_EPOCH_ENTER(et); 1340 CURVNET_SET(fd->fd_vnet); 1341 execute_callout_action(fd); 1342 CURVNET_RESTORE(); 1343 NET_EPOCH_EXIT(et); 1344 } 1345 1346 /* 1347 * Tries to create new algo instance based on @fd data. 1348 * Returns true on success. 1349 */ 1350 static bool 1351 rebuild_fd_flm(struct fib_data *fd, struct fib_lookup_module *flm_new) 1352 { 1353 struct fib_data *fd_new, *fd_tmp = NULL; 1354 bool result; 1355 1356 if (flm_new == fd->fd_flm) 1357 fd_tmp = fd; 1358 else 1359 FD_PRINTF(LOG_NOTICE, fd, "switching algo to %s", flm_new->flm_name); 1360 1361 result = setup_fd_instance(flm_new, fd->fd_rh, fd_tmp, &fd_new, true); 1362 if (result != FLM_SUCCESS) { 1363 FD_PRINTF(LOG_NOTICE, fd, "table rebuild failed"); 1364 return (false); 1365 } 1366 FD_PRINTF(LOG_INFO, fd_new, "switched to new instance"); 1367 1368 /* Remove old instance */ 1369 schedule_destroy_fd_instance(fd, true); 1370 1371 return (true); 1372 } 1373 1374 static bool 1375 rebuild_fd(struct fib_data *fd, const char *reason) 1376 { 1377 struct fib_lookup_module *flm_new = NULL; 1378 bool result; 1379 1380 if (!is_algo_fixed(fd->fd_rh)) 1381 flm_new = fib_check_best_algo(fd->fd_rh, fd->fd_flm); 1382 1383 FD_PRINTF(LOG_INFO, fd, "running sync rebuild: %s", reason); 1384 result = rebuild_fd_flm(fd, flm_new != NULL ? flm_new : fd->fd_flm); 1385 if (flm_new != NULL) 1386 fib_unref_algo(flm_new); 1387 1388 if (!result) { 1389 FD_PRINTF(LOG_ERR, fd, "sync rebuild failed"); 1390 schedule_fd_rebuild(fd, "sync rebuild failed"); 1391 } 1392 1393 return (result); 1394 } 1395 1396 /* 1397 * Finds algo by name/family. 1398 * Returns referenced algo or NULL. 1399 */ 1400 static struct fib_lookup_module * 1401 fib_find_algo(const char *algo_name, int family) 1402 { 1403 struct fib_lookup_module *flm; 1404 1405 FIB_MOD_LOCK(); 1406 TAILQ_FOREACH(flm, &all_algo_list, entries) { 1407 if ((strcmp(flm->flm_name, algo_name) == 0) && 1408 (family == flm->flm_family)) { 1409 flm->flm_refcount++; 1410 FIB_MOD_UNLOCK(); 1411 return (flm); 1412 } 1413 } 1414 FIB_MOD_UNLOCK(); 1415 1416 return (NULL); 1417 } 1418 1419 static void 1420 fib_unref_algo(struct fib_lookup_module *flm) 1421 { 1422 1423 FIB_MOD_LOCK(); 1424 flm->flm_refcount--; 1425 FIB_MOD_UNLOCK(); 1426 } 1427 1428 static int 1429 set_fib_algo(uint32_t fibnum, int family, struct sysctl_oid *oidp, struct sysctl_req *req) 1430 { 1431 struct fib_lookup_module *flm = NULL; 1432 struct fib_data *fd = NULL; 1433 char old_algo_name[32], algo_name[32]; 1434 struct rib_head *rh = NULL; 1435 enum flm_op_result result; 1436 struct epoch_tracker et; 1437 int error; 1438 1439 /* Fetch current algo/rib for af/family */ 1440 FIB_MOD_LOCK(); 1441 TAILQ_FOREACH(fd, &V_fib_data_list, entries) { 1442 if ((fd->fd_family == family) && (fd->fd_fibnum == fibnum)) 1443 break; 1444 } 1445 if (fd == NULL) { 1446 FIB_MOD_UNLOCK(); 1447 return (ENOENT); 1448 } 1449 rh = fd->fd_rh; 1450 strlcpy(old_algo_name, fd->fd_flm->flm_name, 1451 sizeof(old_algo_name)); 1452 FIB_MOD_UNLOCK(); 1453 1454 strlcpy(algo_name, old_algo_name, sizeof(algo_name)); 1455 error = sysctl_handle_string(oidp, algo_name, sizeof(algo_name), req); 1456 if (error != 0 || req->newptr == NULL) 1457 return (error); 1458 1459 if (strcmp(algo_name, old_algo_name) == 0) 1460 return (0); 1461 1462 /* New algorithm name is different */ 1463 flm = fib_find_algo(algo_name, family); 1464 if (flm == NULL) { 1465 RH_PRINTF(LOG_INFO, rh, "unable to find algo %s", algo_name); 1466 return (ESRCH); 1467 } 1468 1469 fd = NULL; 1470 NET_EPOCH_ENTER(et); 1471 RIB_WLOCK(rh); 1472 result = setup_fd_instance(flm, rh, NULL, &fd, true); 1473 RIB_WUNLOCK(rh); 1474 NET_EPOCH_EXIT(et); 1475 fib_unref_algo(flm); 1476 if (result != FLM_SUCCESS) 1477 return (EINVAL); 1478 1479 /* Disable automated jumping between algos */ 1480 FIB_MOD_LOCK(); 1481 set_algo_fixed(rh); 1482 FIB_MOD_UNLOCK(); 1483 /* Remove old instance(s) */ 1484 fib_cleanup_algo(rh, true, false); 1485 1486 /* Drain cb so user can unload the module after userret if so desired */ 1487 epoch_drain_callbacks(net_epoch_preempt); 1488 1489 return (0); 1490 } 1491 1492 #ifdef INET 1493 static int 1494 set_algo_inet_sysctl_handler(SYSCTL_HANDLER_ARGS) 1495 { 1496 1497 return (set_fib_algo(curthread->td_proc->p_fibnum, AF_INET, oidp, req)); 1498 } 1499 SYSCTL_PROC(_net_route_algo_inet, OID_AUTO, algo, 1500 CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, 1501 set_algo_inet_sysctl_handler, "A", "Set IPv4 lookup algo"); 1502 #endif 1503 1504 #ifdef INET6 1505 static int 1506 set_algo_inet6_sysctl_handler(SYSCTL_HANDLER_ARGS) 1507 { 1508 1509 return (set_fib_algo(curthread->td_proc->p_fibnum, AF_INET6, oidp, req)); 1510 } 1511 SYSCTL_PROC(_net_route_algo_inet6, OID_AUTO, algo, 1512 CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, 1513 set_algo_inet6_sysctl_handler, "A", "Set IPv6 lookup algo"); 1514 #endif 1515 1516 static void 1517 destroy_fdh_epoch(epoch_context_t ctx) 1518 { 1519 struct fib_dp_header *fdh; 1520 1521 fdh = __containerof(ctx, struct fib_dp_header, fdh_epoch_ctx); 1522 free(fdh, M_RTABLE); 1523 } 1524 1525 static struct fib_dp_header * 1526 alloc_fib_dp_array(uint32_t num_tables, bool waitok) 1527 { 1528 size_t sz; 1529 struct fib_dp_header *fdh; 1530 1531 sz = sizeof(struct fib_dp_header); 1532 sz += sizeof(struct fib_dp) * num_tables; 1533 fdh = malloc(sz, M_RTABLE, (waitok ? M_WAITOK : M_NOWAIT) | M_ZERO); 1534 if (fdh != NULL) 1535 fdh->fdh_num_tables = num_tables; 1536 return (fdh); 1537 } 1538 1539 static struct fib_dp_header * 1540 get_fib_dp_header(struct fib_dp *dp) 1541 { 1542 1543 return (__containerof((void *)dp, struct fib_dp_header, fdh_idx)); 1544 } 1545 1546 /* 1547 * Replace per-family index pool @pdp with a new one which 1548 * contains updated callback/algo data from @fd. 1549 * Returns 0 on success. 1550 */ 1551 static enum flm_op_result 1552 replace_rtables_family(struct fib_dp **pdp, struct fib_data *fd) 1553 { 1554 struct fib_dp_header *new_fdh, *old_fdh; 1555 1556 NET_EPOCH_ASSERT(); 1557 1558 FD_PRINTF(LOG_DEBUG, fd, "[vnet %p] replace with f:%p arg:%p", 1559 curvnet, fd->fd_dp.f, fd->fd_dp.arg); 1560 1561 FIB_MOD_LOCK(); 1562 old_fdh = get_fib_dp_header(*pdp); 1563 new_fdh = alloc_fib_dp_array(old_fdh->fdh_num_tables, false); 1564 FD_PRINTF(LOG_DEBUG, fd, "OLD FDH: %p NEW FDH: %p", old_fdh, new_fdh); 1565 if (new_fdh == NULL) { 1566 FIB_MOD_UNLOCK(); 1567 FD_PRINTF(LOG_WARNING, fd, "error attaching datapath"); 1568 return (FLM_REBUILD); 1569 } 1570 1571 memcpy(&new_fdh->fdh_idx[0], &old_fdh->fdh_idx[0], 1572 old_fdh->fdh_num_tables * sizeof(struct fib_dp)); 1573 /* Update relevant data structure for @fd */ 1574 new_fdh->fdh_idx[fd->fd_fibnum] = fd->fd_dp; 1575 1576 /* Ensure memcpy() writes have completed */ 1577 atomic_thread_fence_rel(); 1578 /* Set new datapath pointer */ 1579 *pdp = &new_fdh->fdh_idx[0]; 1580 FIB_MOD_UNLOCK(); 1581 FD_PRINTF(LOG_DEBUG, fd, "update %p -> %p", old_fdh, new_fdh); 1582 1583 epoch_call(net_epoch_preempt, destroy_fdh_epoch, 1584 &old_fdh->fdh_epoch_ctx); 1585 1586 return (FLM_SUCCESS); 1587 } 1588 1589 static struct fib_dp ** 1590 get_family_dp_ptr(int family) 1591 { 1592 switch (family) { 1593 case AF_INET: 1594 return (&V_inet_dp); 1595 case AF_INET6: 1596 return (&V_inet6_dp); 1597 } 1598 return (NULL); 1599 } 1600 1601 /* 1602 * Make datapath use fib instance @fd 1603 */ 1604 static enum flm_op_result 1605 attach_datapath(struct fib_data *fd) 1606 { 1607 struct fib_dp **pdp; 1608 1609 pdp = get_family_dp_ptr(fd->fd_family); 1610 return (replace_rtables_family(pdp, fd)); 1611 } 1612 1613 /* 1614 * Grow datapath pointers array. 1615 * Called from sysctl handler on growing number of routing tables. 1616 */ 1617 static void 1618 grow_rtables_family(struct fib_dp **pdp, uint32_t new_num_tables) 1619 { 1620 struct fib_dp_header *new_fdh, *old_fdh = NULL; 1621 1622 new_fdh = alloc_fib_dp_array(new_num_tables, true); 1623 1624 FIB_MOD_LOCK(); 1625 if (*pdp != NULL) { 1626 old_fdh = get_fib_dp_header(*pdp); 1627 memcpy(&new_fdh->fdh_idx[0], &old_fdh->fdh_idx[0], 1628 old_fdh->fdh_num_tables * sizeof(struct fib_dp)); 1629 } 1630 1631 /* Wait till all writes completed */ 1632 atomic_thread_fence_rel(); 1633 1634 *pdp = &new_fdh->fdh_idx[0]; 1635 FIB_MOD_UNLOCK(); 1636 1637 if (old_fdh != NULL) 1638 epoch_call(net_epoch_preempt, destroy_fdh_epoch, 1639 &old_fdh->fdh_epoch_ctx); 1640 } 1641 1642 /* 1643 * Grows per-AF arrays of datapath pointers for each supported family. 1644 * Called from fibs resize sysctl handler. 1645 */ 1646 void 1647 fib_grow_rtables(uint32_t new_num_tables) 1648 { 1649 1650 #ifdef INET 1651 grow_rtables_family(get_family_dp_ptr(AF_INET), new_num_tables); 1652 #endif 1653 #ifdef INET6 1654 grow_rtables_family(get_family_dp_ptr(AF_INET6), new_num_tables); 1655 #endif 1656 } 1657 1658 void 1659 fib_get_rtable_info(struct rib_head *rh, struct rib_rtable_info *rinfo) 1660 { 1661 1662 bzero(rinfo, sizeof(struct rib_rtable_info)); 1663 rinfo->num_prefixes = rh->rnh_prefixes; 1664 rinfo->num_nhops = nhops_get_count(rh); 1665 #ifdef ROUTE_MPATH 1666 rinfo->num_nhgrp = nhgrp_get_count(rh); 1667 #endif 1668 } 1669 1670 /* 1671 * Accessor to get rib instance @fd is attached to. 1672 */ 1673 struct rib_head * 1674 fib_get_rh(struct fib_data *fd) 1675 { 1676 1677 return (fd->fd_rh); 1678 } 1679 1680 /* 1681 * Accessor to export idx->nhop array 1682 */ 1683 struct nhop_object ** 1684 fib_get_nhop_array(struct fib_data *fd) 1685 { 1686 1687 return (fd->nh_idx); 1688 } 1689 1690 static uint32_t 1691 get_nhop_idx(struct nhop_object *nh) 1692 { 1693 #ifdef ROUTE_MPATH 1694 if (NH_IS_NHGRP(nh)) 1695 return (nhgrp_get_idx((struct nhgrp_object *)nh) * 2 - 1); 1696 else 1697 return (nhop_get_idx(nh) * 2); 1698 #else 1699 return (nhop_get_idx(nh)); 1700 #endif 1701 } 1702 1703 uint32_t 1704 fib_get_nhop_idx(struct fib_data *fd, struct nhop_object *nh) 1705 { 1706 1707 return (get_nhop_idx(nh)); 1708 } 1709 1710 static bool 1711 is_idx_free(struct fib_data *fd, uint32_t index) 1712 { 1713 1714 return (fd->nh_ref_table->refcnt[index] == 0); 1715 } 1716 1717 static uint32_t 1718 fib_ref_nhop(struct fib_data *fd, struct nhop_object *nh) 1719 { 1720 uint32_t idx = get_nhop_idx(nh); 1721 1722 if (idx >= fd->number_nhops) { 1723 fd->hit_nhops = 1; 1724 return (0); 1725 } 1726 1727 if (is_idx_free(fd, idx)) { 1728 nhop_ref_any(nh); 1729 fd->nh_idx[idx] = nh; 1730 fd->nh_ref_table->count++; 1731 FD_PRINTF(LOG_DEBUG2, fd, " REF nhop %u %p", idx, fd->nh_idx[idx]); 1732 } 1733 fd->nh_ref_table->refcnt[idx]++; 1734 1735 return (idx); 1736 } 1737 1738 struct nhop_release_data { 1739 struct nhop_object *nh; 1740 struct epoch_context ctx; 1741 }; 1742 1743 static void 1744 release_nhop_epoch(epoch_context_t ctx) 1745 { 1746 struct nhop_release_data *nrd; 1747 1748 nrd = __containerof(ctx, struct nhop_release_data, ctx); 1749 nhop_free_any(nrd->nh); 1750 free(nrd, M_TEMP); 1751 } 1752 1753 /* 1754 * Delays nexthop refcount release. 1755 * Datapath may have the datastructures not updated yet, so the old 1756 * nexthop may still be returned till the end of current epoch. Delay 1757 * refcount removal, as we may be removing the last instance, which will 1758 * trigger nexthop deletion, rendering returned nexthop invalid. 1759 */ 1760 static void 1761 fib_schedule_release_nhop(struct fib_data *fd, struct nhop_object *nh) 1762 { 1763 struct nhop_release_data *nrd; 1764 1765 nrd = malloc(sizeof(struct nhop_release_data), M_TEMP, M_NOWAIT | M_ZERO); 1766 if (nrd != NULL) { 1767 nrd->nh = nh; 1768 epoch_call(net_epoch_preempt, release_nhop_epoch, &nrd->ctx); 1769 } else { 1770 /* 1771 * Unable to allocate memory. Leak nexthop to maintain guarantee 1772 * that each nhop can be referenced. 1773 */ 1774 FD_PRINTF(LOG_ERR, fd, "unable to schedule nhop %p deletion", nh); 1775 } 1776 } 1777 1778 static void 1779 fib_unref_nhop(struct fib_data *fd, struct nhop_object *nh) 1780 { 1781 uint32_t idx = get_nhop_idx(nh); 1782 1783 KASSERT((idx < fd->number_nhops), ("invalid nhop index")); 1784 KASSERT((nh == fd->nh_idx[idx]), ("index table contains whong nh")); 1785 1786 fd->nh_ref_table->refcnt[idx]--; 1787 if (fd->nh_ref_table->refcnt[idx] == 0) { 1788 FD_PRINTF(LOG_DEBUG, fd, " FREE nhop %d %p", idx, fd->nh_idx[idx]); 1789 fib_schedule_release_nhop(fd, fd->nh_idx[idx]); 1790 } 1791 } 1792 1793 static void 1794 set_algo_fixed(struct rib_head *rh) 1795 { 1796 switch (rh->rib_family) { 1797 #ifdef INET 1798 case AF_INET: 1799 V_algo_fixed_inet = true; 1800 break; 1801 #endif 1802 #ifdef INET6 1803 case AF_INET6: 1804 V_algo_fixed_inet6 = true; 1805 break; 1806 #endif 1807 } 1808 } 1809 1810 static bool 1811 is_algo_fixed(struct rib_head *rh) 1812 { 1813 1814 switch (rh->rib_family) { 1815 #ifdef INET 1816 case AF_INET: 1817 return (V_algo_fixed_inet); 1818 #endif 1819 #ifdef INET6 1820 case AF_INET6: 1821 return (V_algo_fixed_inet6); 1822 #endif 1823 } 1824 return (false); 1825 } 1826 1827 /* 1828 * Runs the check on what would be the best algo for rib @rh, assuming 1829 * that the current algo is the one specified by @orig_flm. Note that 1830 * it can be NULL for initial selection. 1831 * 1832 * Returns referenced new algo or NULL if the current one is the best. 1833 */ 1834 static struct fib_lookup_module * 1835 fib_check_best_algo(struct rib_head *rh, struct fib_lookup_module *orig_flm) 1836 { 1837 uint8_t preference, curr_preference = 0, best_preference = 0; 1838 struct fib_lookup_module *flm, *best_flm = NULL; 1839 struct rib_rtable_info rinfo; 1840 int candidate_algos = 0; 1841 1842 fib_get_rtable_info(rh, &rinfo); 1843 1844 FIB_MOD_LOCK(); 1845 TAILQ_FOREACH(flm, &all_algo_list, entries) { 1846 if (flm->flm_family != rh->rib_family) 1847 continue; 1848 candidate_algos++; 1849 preference = flm->flm_get_pref(&rinfo); 1850 if (preference > best_preference) { 1851 if (!flm_error_check(flm, rh->rib_fibnum)) { 1852 best_preference = preference; 1853 best_flm = flm; 1854 } 1855 } 1856 if (flm == orig_flm) 1857 curr_preference = preference; 1858 } 1859 if ((best_flm != NULL) && (curr_preference + BEST_DIFF_PERCENT < best_preference)) 1860 best_flm->flm_refcount++; 1861 else 1862 best_flm = NULL; 1863 FIB_MOD_UNLOCK(); 1864 1865 RH_PRINTF(LOG_DEBUG, rh, "candidate_algos: %d, curr: %s(%d) result: %s(%d)", 1866 candidate_algos, orig_flm ? orig_flm->flm_name : "NULL", curr_preference, 1867 best_flm ? best_flm->flm_name : (orig_flm ? orig_flm->flm_name : "NULL"), 1868 best_preference); 1869 1870 return (best_flm); 1871 } 1872 1873 /* 1874 * Called when new route table is created. 1875 * Selects, allocates and attaches fib algo for the table. 1876 */ 1877 int 1878 fib_select_algo_initial(struct rib_head *rh) 1879 { 1880 struct fib_lookup_module *flm; 1881 struct fib_data *fd = NULL; 1882 enum flm_op_result result; 1883 struct epoch_tracker et; 1884 int error = 0; 1885 1886 flm = fib_check_best_algo(rh, NULL); 1887 if (flm == NULL) { 1888 RH_PRINTF(LOG_CRIT, rh, "no algo selected"); 1889 return (ENOENT); 1890 } 1891 RH_PRINTF(LOG_INFO, rh, "selected algo %s", flm->flm_name); 1892 1893 NET_EPOCH_ENTER(et); 1894 RIB_WLOCK(rh); 1895 result = setup_fd_instance(flm, rh, NULL, &fd, false); 1896 RIB_WUNLOCK(rh); 1897 NET_EPOCH_EXIT(et); 1898 1899 RH_PRINTF(LOG_DEBUG, rh, "result=%d fd=%p", result, fd); 1900 if (result == FLM_SUCCESS) { 1901 1902 /* 1903 * Attach datapath directly to avoid multiple reallocations 1904 * during fib growth 1905 */ 1906 struct fib_dp_header *fdp; 1907 struct fib_dp **pdp; 1908 1909 pdp = get_family_dp_ptr(rh->rib_family); 1910 if (pdp != NULL) { 1911 fdp = get_fib_dp_header(*pdp); 1912 fdp->fdh_idx[fd->fd_fibnum] = fd->fd_dp; 1913 FD_PRINTF(LOG_INFO, fd, "datapath attached"); 1914 } 1915 } else { 1916 error = EINVAL; 1917 RH_PRINTF(LOG_CRIT, rh, "unable to setup algo %s", flm->flm_name); 1918 } 1919 1920 fib_unref_algo(flm); 1921 1922 return (error); 1923 } 1924 1925 /* 1926 * Registers fib lookup module within the subsystem. 1927 */ 1928 int 1929 fib_module_register(struct fib_lookup_module *flm) 1930 { 1931 1932 FIB_MOD_LOCK(); 1933 ALGO_PRINTF("attaching %s to %s", flm->flm_name, 1934 print_family(flm->flm_family)); 1935 TAILQ_INSERT_TAIL(&all_algo_list, flm, entries); 1936 FIB_MOD_UNLOCK(); 1937 1938 return (0); 1939 } 1940 1941 /* 1942 * Tries to unregister fib lookup module. 1943 * 1944 * Returns 0 on success, EBUSY if module is still used 1945 * by some of the tables. 1946 */ 1947 int 1948 fib_module_unregister(struct fib_lookup_module *flm) 1949 { 1950 1951 FIB_MOD_LOCK(); 1952 if (flm->flm_refcount > 0) { 1953 FIB_MOD_UNLOCK(); 1954 return (EBUSY); 1955 } 1956 fib_error_clear_flm(flm); 1957 ALGO_PRINTF("detaching %s from %s", flm->flm_name, 1958 print_family(flm->flm_family)); 1959 TAILQ_REMOVE(&all_algo_list, flm, entries); 1960 FIB_MOD_UNLOCK(); 1961 1962 return (0); 1963 } 1964 1965 void 1966 vnet_fib_init(void) 1967 { 1968 1969 TAILQ_INIT(&V_fib_data_list); 1970 } 1971 1972 void 1973 vnet_fib_destroy(void) 1974 { 1975 1976 FIB_MOD_LOCK(); 1977 fib_error_clear(); 1978 FIB_MOD_UNLOCK(); 1979 } 1980