xref: /freebsd/sys/net/route/fib_algo.c (revision e49d3eb40324eaffaa13b93f2c4173dfa04dfa34)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet.h"
31 #include "opt_inet6.h"
32 #include "opt_route.h"
33 
34 #include <sys/param.h>
35 #include <sys/eventhandler.h>
36 #include <sys/kernel.h>
37 #include <sys/sbuf.h>
38 #include <sys/lock.h>
39 #include <sys/rmlock.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/module.h>
43 #include <sys/kernel.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/sysctl.h>
49 #include <sys/syslog.h>
50 #include <sys/queue.h>
51 #include <net/vnet.h>
52 
53 #include <net/if.h>
54 #include <net/if_var.h>
55 
56 #include <netinet/in.h>
57 #include <netinet/in_var.h>
58 #include <netinet/ip.h>
59 #include <netinet/ip_var.h>
60 #ifdef INET6
61 #include <netinet/ip6.h>
62 #include <netinet6/ip6_var.h>
63 #endif
64 
65 #include <net/route.h>
66 #include <net/route/nhop.h>
67 #include <net/route/route_ctl.h>
68 #include <net/route/route_var.h>
69 #include <net/route/fib_algo.h>
70 
71 #include <machine/stdarg.h>
72 
73 /*
74  * Fib lookup framework.
75  *
76  * This framework enables accelerated longest-prefix-match lookups for the
77  *  routing tables by adding the ability to dynamically attach/detach lookup
78  *  algorithms implementation to/from the datapath.
79  *
80  * flm - fib lookup modules - implementation of particular lookup algorithm
81  * fd - fib data - instance of an flm bound to specific routing table
82  *
83  * This file provides main framework functionality.
84  *
85  * The following are the features provided by the framework
86  *
87  * 1) nexhops abstraction -> provides transparent referencing, indexing
88  *   and efficient idx->ptr mappings for nexthop and nexthop groups.
89  * 2) Routing table synchronisation
90  * 3) dataplane attachment points
91  * 4) automatic algorithm selection based on the provided preference.
92  *
93  *
94  * DATAPATH
95  * For each supported address family, there is a an allocated array of fib_dp
96  *  structures, indexed by fib number. Each array entry contains callback function
97  *  and its argument. This function will be called with a family-specific lookup key,
98  *  scope and provided argument. This array gets re-created every time when new algo
99  *  instance gets created. Please take a look at the replace_rtables_family() function
100  *  for more details.
101  *
102  */
103 
104 SYSCTL_DECL(_net_route);
105 SYSCTL_NODE(_net_route, OID_AUTO, algo, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
106     "Fib algorithm lookups");
107 
108 /* Algorithm sync policy */
109 
110 /* Time interval to bucket updates */
111 VNET_DEFINE(unsigned int, bucket_time_ms) = 50;
112 #define	V_bucket_time_ms	VNET(bucket_time_ms)
113 SYSCTL_UINT(_net_route_algo, OID_AUTO, bucket_time_ms, CTLFLAG_RW | CTLFLAG_VNET,
114     &VNET_NAME(bucket_time_ms), 0, "Time interval to calculate update rate");
115 
116 /* Minimum update rate to delay sync */
117 VNET_DEFINE(unsigned int, bucket_change_threshold_rate) = 500;
118 #define	V_bucket_change_threshold_rate	VNET(bucket_change_threshold_rate)
119 SYSCTL_UINT(_net_route_algo, OID_AUTO, bucket_change_threshold_rate, CTLFLAG_RW | CTLFLAG_VNET,
120     &VNET_NAME(bucket_change_threshold_rate), 0, "Minimum update rate to delay sync");
121 
122 /* Max allowed delay to sync */
123 VNET_DEFINE(unsigned int, fib_max_sync_delay_ms) = 1000;
124 #define	V_fib_max_sync_delay_ms	VNET(fib_max_sync_delay_ms)
125 SYSCTL_UINT(_net_route_algo, OID_AUTO, fib_max_sync_delay_ms, CTLFLAG_RW | CTLFLAG_VNET,
126     &VNET_NAME(fib_max_sync_delay_ms), 0, "Maximum time to delay sync (ms)");
127 
128 
129 #ifdef INET6
130 VNET_DEFINE_STATIC(bool, algo_fixed_inet6) = false;
131 #define	V_algo_fixed_inet6	VNET(algo_fixed_inet6)
132 SYSCTL_NODE(_net_route_algo, OID_AUTO, inet6, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
133     "IPv6 longest prefix match lookups");
134 #endif
135 #ifdef INET
136 VNET_DEFINE_STATIC(bool, algo_fixed_inet) = false;
137 #define	V_algo_fixed_inet	VNET(algo_fixed_inet)
138 SYSCTL_NODE(_net_route_algo, OID_AUTO, inet, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
139     "IPv4 longest prefix match lookups");
140 #endif
141 
142 /* Fib instance counter */
143 static uint32_t fib_gen = 0;
144 
145 struct nhop_ref_table {
146 	uint32_t		count;
147 	int32_t			refcnt[0];
148 };
149 
150 enum fib_callout_action {
151 	FDA_NONE,	/* No callout scheduled */
152 	FDA_REBUILD,	/* Asks to rebuild algo instance */
153 	FDA_EVAL,	/* Asks to evaluate if the current algo is still be best */
154 };
155 
156 struct fib_sync_status {
157 	struct timeval		diverge_time;	/* ts when diverged */
158 	uint32_t		num_changes;	/* number of changes since sync */
159 	uint32_t		bucket_changes;	/* num changes within the current bucket */
160 	uint64_t		bucket_id;	/* 50ms bucket # */
161 };
162 
163 /*
164  * Data structure for the fib lookup instance tied to the particular rib.
165  */
166 struct fib_data {
167 	uint32_t		number_nhops;	/* current # of nhops */
168 	uint8_t			hit_nhops;	/* true if out of nhop limit */
169 	uint8_t			init_done;	/* true if init is competed */
170 	uint32_t		fd_dead:1;	/* Scheduled for deletion */
171 	uint32_t		fd_linked:1;	/* true if linked */
172 	uint32_t		fd_need_rebuild:1;	/* true if rebuild scheduled */
173 	uint8_t			fd_family;	/* family */
174 	uint32_t		fd_fibnum;	/* fibnum */
175 	uint32_t		fd_failed_rebuilds;	/* stat: failed rebuilds */
176 	uint32_t		fd_gen;		/* instance gen# */
177 	struct callout		fd_callout;	/* rebuild callout */
178 	enum fib_callout_action	fd_callout_action;	/* Callout action to take */
179 	void			*fd_algo_data;	/* algorithm data */
180 	struct nhop_object	**nh_idx;	/* nhop idx->ptr array */
181 	struct nhop_ref_table	*nh_ref_table;	/* array with # of nhop references */
182 	struct rib_head		*fd_rh;		/* RIB table we're attached to */
183 	struct rib_subscription	*fd_rs;		/* storing table subscription */
184 	struct fib_dp		fd_dp;		/* fib datapath data */
185 	struct vnet		*fd_vnet;	/* vnet fib belongs to */
186 	struct epoch_context	fd_epoch_ctx;	/* epoch context for deletion */
187 	struct fib_lookup_module	*fd_flm;/* pointer to the lookup module */
188 	struct fib_sync_status	fd_ss;		/* State relevant to the rib sync  */
189 	uint32_t		fd_num_changes;	/* number of changes since last callout */
190 	TAILQ_ENTRY(fib_data)	entries;	/* list of all fds in vnet */
191 };
192 
193 static bool rebuild_fd(struct fib_data *fd, const char *reason);
194 static bool rebuild_fd_flm(struct fib_data *fd, struct fib_lookup_module *flm_new);
195 static void handle_fd_callout(void *_data);
196 static void destroy_fd_instance_epoch(epoch_context_t ctx);
197 static enum flm_op_result attach_datapath(struct fib_data *fd);
198 static bool is_idx_free(struct fib_data *fd, uint32_t index);
199 static void set_algo_fixed(struct rib_head *rh);
200 static bool is_algo_fixed(struct rib_head *rh);
201 
202 static uint32_t fib_ref_nhop(struct fib_data *fd, struct nhop_object *nh);
203 static void fib_unref_nhop(struct fib_data *fd, struct nhop_object *nh);
204 
205 static struct fib_lookup_module *fib_check_best_algo(struct rib_head *rh,
206     struct fib_lookup_module *orig_flm);
207 static void fib_unref_algo(struct fib_lookup_module *flm);
208 static bool flm_error_check(const struct fib_lookup_module *flm, uint32_t fibnum);
209 
210 struct mtx fib_mtx;
211 #define	FIB_MOD_LOCK()		mtx_lock(&fib_mtx)
212 #define	FIB_MOD_UNLOCK()	mtx_unlock(&fib_mtx)
213 #define	FIB_MOD_LOCK_ASSERT()	mtx_assert(&fib_mtx, MA_OWNED)
214 
215 MTX_SYSINIT(fib_mtx, &fib_mtx, "algo list mutex", MTX_DEF);
216 
217 /* Algorithm has to be this percent better than the current to switch */
218 #define	BEST_DIFF_PERCENT	(5 * 256 / 100)
219 /* Schedule algo re-evaluation X seconds after a change */
220 #define	ALGO_EVAL_DELAY_MS	30000
221 /* Force algo re-evaluation after X changes */
222 #define	ALGO_EVAL_NUM_ROUTES	100
223 /* Try to setup algorithm X times */
224 #define	FIB_MAX_TRIES		32
225 /* Max amount of supported nexthops */
226 #define	FIB_MAX_NHOPS		262144
227 #define	FIB_CALLOUT_DELAY_MS	50
228 
229 
230 /* Debug */
231 static int flm_debug_level = LOG_NOTICE;
232 SYSCTL_INT(_net_route_algo, OID_AUTO, debug_level, CTLFLAG_RW | CTLFLAG_RWTUN,
233     &flm_debug_level, 0, "debuglevel");
234 #define	FLM_MAX_DEBUG_LEVEL	LOG_DEBUG
235 #ifndef	LOG_DEBUG2
236 #define	LOG_DEBUG2	8
237 #endif
238 
239 #define	_PASS_MSG(_l)	(flm_debug_level >= (_l))
240 #define	ALGO_PRINTF(_fmt, ...)	printf("[fib_algo] %s: " _fmt "\n", __func__, ##__VA_ARGS__)
241 #define	_ALGO_PRINTF(_fib, _fam, _aname, _gen, _func, _fmt, ...) \
242     printf("[fib_algo] %s.%u (%s#%u) %s: " _fmt "\n",\
243     print_family(_fam), _fib, _aname, _gen, _func, ## __VA_ARGS__)
244 #define	_RH_PRINTF(_fib, _fam, _func, _fmt, ...) \
245     printf("[fib_algo] %s.%u %s: " _fmt "\n", print_family(_fam), _fib, _func, ## __VA_ARGS__)
246 #define	RH_PRINTF(_l, _rh, _fmt, ...)	if (_PASS_MSG(_l)) {	\
247     _RH_PRINTF(_rh->rib_fibnum, _rh->rib_family, __func__, _fmt, ## __VA_ARGS__);\
248 }
249 #define	FD_PRINTF(_l, _fd, _fmt, ...)	FD_PRINTF_##_l(_l, _fd, _fmt, ## __VA_ARGS__)
250 #define	_FD_PRINTF(_l, _fd, _fmt, ...)	if (_PASS_MSG(_l)) {		\
251     _ALGO_PRINTF(_fd->fd_fibnum, _fd->fd_family, _fd->fd_flm->flm_name,	\
252     _fd->fd_gen, __func__, _fmt, ## __VA_ARGS__);			\
253 }
254 #if FLM_MAX_DEBUG_LEVEL>=LOG_DEBUG2
255 #define	FD_PRINTF_LOG_DEBUG2	_FD_PRINTF
256 #else
257 #define	FD_PRINTF_LOG_DEBUG2(_l, _fd, _fmt, ...)
258 #endif
259 #if FLM_MAX_DEBUG_LEVEL>=LOG_DEBUG
260 #define	FD_PRINTF_LOG_DEBUG	_FD_PRINTF
261 #else
262 #define	FD_PRINTF_LOG_DEBUG()
263 #endif
264 #if FLM_MAX_DEBUG_LEVEL>=LOG_INFO
265 #define	FD_PRINTF_LOG_INFO	_FD_PRINTF
266 #else
267 #define	FD_PRINTF_LOG_INFO()
268 #endif
269 #define	FD_PRINTF_LOG_NOTICE	_FD_PRINTF
270 #define	FD_PRINTF_LOG_ERR	_FD_PRINTF
271 #define	FD_PRINTF_LOG_WARNING	_FD_PRINTF
272 
273 
274 /* List of all registered lookup algorithms */
275 static TAILQ_HEAD(, fib_lookup_module) all_algo_list = TAILQ_HEAD_INITIALIZER(all_algo_list);
276 
277 /* List of all fib lookup instances in the vnet */
278 VNET_DEFINE_STATIC(TAILQ_HEAD(fib_data_head, fib_data), fib_data_list);
279 #define	V_fib_data_list	VNET(fib_data_list)
280 
281 /* Datastructure for storing non-transient fib lookup module failures */
282 struct fib_error {
283 	int				fe_family;
284 	uint32_t			fe_fibnum;	/* failed rtable */
285 	struct fib_lookup_module	*fe_flm;	/* failed module */
286 	TAILQ_ENTRY(fib_error)		entries;/* list of all errored entries */
287 };
288 VNET_DEFINE_STATIC(TAILQ_HEAD(fib_error_head, fib_error), fib_error_list);
289 #define	V_fib_error_list VNET(fib_error_list)
290 
291 /* Per-family array of fibnum -> {func, arg} mappings used in datapath */
292 struct fib_dp_header {
293 	struct epoch_context	fdh_epoch_ctx;
294 	uint32_t		fdh_num_tables;
295 	struct fib_dp		fdh_idx[0];
296 };
297 
298 /*
299  * Tries to add new non-transient algorithm error to the list of
300  *  errors.
301  * Returns true on success.
302  */
303 static bool
304 flm_error_add(struct fib_lookup_module *flm, uint32_t fibnum)
305 {
306 	struct fib_error *fe;
307 
308 	fe = malloc(sizeof(struct fib_error), M_TEMP, M_NOWAIT | M_ZERO);
309 	if (fe == NULL)
310 		return (false);
311 	fe->fe_flm = flm;
312 	fe->fe_family = flm->flm_family;
313 	fe->fe_fibnum = fibnum;
314 
315 	FIB_MOD_LOCK();
316 	/* Avoid duplicates by checking if error already exists first */
317 	if (flm_error_check(flm, fibnum)) {
318 		FIB_MOD_UNLOCK();
319 		free(fe, M_TEMP);
320 		return (true);
321 	}
322 	TAILQ_INSERT_HEAD(&V_fib_error_list, fe, entries);
323 	FIB_MOD_UNLOCK();
324 
325 	return (true);
326 }
327 
328 /*
329  * True if non-transient error has been registered for @flm in @fibnum.
330  */
331 static bool
332 flm_error_check(const struct fib_lookup_module *flm, uint32_t fibnum)
333 {
334 	const struct fib_error *fe;
335 
336 	TAILQ_FOREACH(fe, &V_fib_error_list, entries) {
337 		if ((fe->fe_flm == flm) && (fe->fe_fibnum == fibnum))
338 			return (true);
339 	}
340 
341 	return (false);
342 }
343 
344 /*
345  * Clear all errors of algo specified by @flm.
346  */
347 static void
348 fib_error_clear_flm(struct fib_lookup_module *flm)
349 {
350 	struct fib_error *fe, *fe_tmp;
351 
352 	FIB_MOD_LOCK_ASSERT();
353 
354 	TAILQ_FOREACH_SAFE(fe, &V_fib_error_list, entries, fe_tmp) {
355 		if (fe->fe_flm == flm) {
356 			TAILQ_REMOVE(&V_fib_error_list, fe, entries);
357 			free(fe, M_TEMP);
358 		}
359 	}
360 }
361 
362 /*
363  * Clears all errors in current VNET.
364  */
365 static void
366 fib_error_clear()
367 {
368 	struct fib_error *fe, *fe_tmp;
369 
370 	FIB_MOD_LOCK_ASSERT();
371 
372 	TAILQ_FOREACH_SAFE(fe, &V_fib_error_list, entries, fe_tmp) {
373 		TAILQ_REMOVE(&V_fib_error_list, fe, entries);
374 		free(fe, M_TEMP);
375 	}
376 }
377 
378 static const char *
379 print_op_result(enum flm_op_result result)
380 {
381 	switch (result) {
382 	case FLM_SUCCESS:
383 		return "success";
384 	case FLM_REBUILD:
385 		return "rebuild";
386 	case FLM_ERROR:
387 		return "error";
388 	}
389 
390 	return "unknown";
391 }
392 
393 static const char *
394 print_family(int family)
395 {
396 
397 	if (family == AF_INET)
398 		return ("inet");
399 	else if (family == AF_INET6)
400 		return ("inet6");
401 	else
402 		return ("unknown");
403 }
404 
405 /*
406  * Debug function used by lookup algorithms.
407  * Outputs message denoted by @fmt, prepended by "[fib_algo] inetX.Y (algo) "
408  */
409 void
410 fib_printf(int level, struct fib_data *fd, const char *func, char *fmt, ...)
411 {
412 	char buf[128];
413 	va_list ap;
414 
415 	if (level > flm_debug_level)
416 		return;
417 
418 	va_start(ap, fmt);
419 	vsnprintf(buf, sizeof(buf), fmt, ap);
420 	va_end(ap);
421 
422 	_ALGO_PRINTF(fd->fd_fibnum, fd->fd_family, fd->fd_flm->flm_name,
423 	    fd->fd_gen, func, "%s", buf);
424 }
425 
426 /*
427  * Outputs list of algorithms supported by the provided address family.
428  */
429 static int
430 print_algos_sysctl(struct sysctl_req *req, int family)
431 {
432 	struct fib_lookup_module *flm;
433 	struct sbuf sbuf;
434 	int error, count = 0;
435 
436 	error = sysctl_wire_old_buffer(req, 0);
437 	if (error == 0) {
438 		sbuf_new_for_sysctl(&sbuf, NULL, 512, req);
439 		TAILQ_FOREACH(flm, &all_algo_list, entries) {
440 			if (flm->flm_family == family) {
441 				if (count++ > 0)
442 					sbuf_cat(&sbuf, ", ");
443 				sbuf_cat(&sbuf, flm->flm_name);
444 			}
445 		}
446 		error = sbuf_finish(&sbuf);
447 		sbuf_delete(&sbuf);
448 	}
449 	return (error);
450 }
451 
452 #ifdef INET6
453 static int
454 print_algos_sysctl_inet6(SYSCTL_HANDLER_ARGS)
455 {
456 
457 	return (print_algos_sysctl(req, AF_INET6));
458 }
459 SYSCTL_PROC(_net_route_algo_inet6, OID_AUTO, algo_list,
460     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
461     print_algos_sysctl_inet6, "A", "List of IPv6 lookup algorithms");
462 #endif
463 
464 #ifdef INET
465 static int
466 print_algos_sysctl_inet(SYSCTL_HANDLER_ARGS)
467 {
468 
469 	return (print_algos_sysctl(req, AF_INET));
470 }
471 SYSCTL_PROC(_net_route_algo_inet, OID_AUTO, algo_list,
472     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
473     print_algos_sysctl_inet, "A", "List of IPv4 lookup algorithms");
474 #endif
475 
476 /*
477  * Calculate delay between repeated failures.
478  * Returns current delay in milliseconds.
479  */
480 static uint32_t
481 callout_calc_delay_ms(struct fib_data *fd)
482 {
483 	uint32_t shift;
484 
485 	if (fd->fd_failed_rebuilds > 10)
486 		shift = 10;
487 	else
488 		shift = fd->fd_failed_rebuilds;
489 
490 	return ((1 << shift) * FIB_CALLOUT_DELAY_MS);
491 }
492 
493 static void
494 schedule_callout(struct fib_data *fd, enum fib_callout_action action, int delay_ms)
495 {
496 
497 	FD_PRINTF(LOG_DEBUG, fd, "delay=%d action=%d", delay_ms, action);
498 	fd->fd_callout_action = action;
499 	callout_reset_sbt(&fd->fd_callout, SBT_1MS * delay_ms, 0,
500 	    handle_fd_callout, fd, 0);
501 }
502 
503 static void
504 schedule_fd_rebuild(struct fib_data *fd, const char *reason)
505 {
506 
507 	RIB_WLOCK_ASSERT(fd->fd_rh);
508 
509 	if (!fd->fd_need_rebuild) {
510 		fd->fd_need_rebuild = true;
511 
512 		/*
513 		 * Potentially re-schedules pending callout
514 		 *  initiated by schedule_algo_eval.
515 		 */
516 		FD_PRINTF(LOG_INFO, fd, "Scheduling rebuild: %s (failures=%d)",
517 		    reason, fd->fd_failed_rebuilds);
518 		schedule_callout(fd, FDA_REBUILD, callout_calc_delay_ms(fd));
519 	}
520 }
521 
522 static int64_t
523 get_tv_diff_ms(const struct timeval *old_tv, const struct timeval *new_tv)
524 {
525 	int64_t diff = 0;
526 
527 	diff = ((int64_t)(new_tv->tv_sec - old_tv->tv_sec)) * 1000;
528 	diff += (new_tv->tv_usec - old_tv->tv_usec) / 1000;
529 
530 	return (diff);
531 }
532 
533 static void
534 add_tv_diff_ms(struct timeval *tv, int ms)
535 {
536 	tv->tv_sec += ms / 1000;
537 	ms = ms % 1000;
538 	if (ms * 1000 + tv->tv_usec < 1000000)
539 		tv->tv_usec += ms * 1000;
540 	else {
541 		tv->tv_sec += 1;
542 		tv->tv_usec = ms * 1000 + tv->tv_usec - 1000000;
543 	}
544 }
545 
546 /*
547  * Marks the time when algo state diverges from the rib state.
548  */
549 static void
550 mark_diverge_time(struct fib_data *fd)
551 {
552 	struct fib_sync_status *fd_ss = &fd->fd_ss;
553 
554 	getmicrouptime(&fd_ss->diverge_time);
555 	fd_ss->bucket_id = 0;
556 	fd_ss->bucket_changes = 0;
557 }
558 
559 /*
560  * Calculates and updates the next algorithm sync time, based on the current activity.
561  *
562  * The intent is to provide reasonable balance between the update
563  *  latency and efficient batching when changing large amount of routes.
564  *
565  * High-level algorithm looks the following:
566  * 1) all changes are bucketed in 50ms intervals
567  * 2) If amount of changes within the bucket is greater than the threshold,
568  *   the update gets delayed, up to maximum delay threshold.
569  */
570 static void
571 update_rebuild_delay(struct fib_data *fd)
572 {
573 	uint32_t bucket_id, new_delay = 0;
574 	struct timeval tv;
575 
576 	/* Fetch all variables at once to ensure consistent reads */
577 	uint32_t bucket_time_ms = V_bucket_time_ms;
578 	uint32_t threshold_rate = V_bucket_change_threshold_rate;
579 	uint32_t max_delay_ms = V_fib_max_sync_delay_ms;
580 
581 	if (bucket_time_ms == 0)
582 		bucket_time_ms = 50;
583 	/* calculate per-bucket threshold rate */
584 	threshold_rate = threshold_rate * bucket_time_ms / 1000;
585 
586 	getmicrouptime(&tv);
587 
588 	struct fib_sync_status *fd_ss = &fd->fd_ss;
589 
590 	bucket_id = get_tv_diff_ms(&fd_ss->diverge_time, &tv) / bucket_time_ms;
591 
592 	if (fd_ss->bucket_id == bucket_id) {
593 		fd_ss->bucket_changes++;
594 		if (fd_ss->bucket_changes == threshold_rate) {
595 			new_delay = (bucket_id + 2) * bucket_time_ms;
596 			if (new_delay <= max_delay_ms) {
597 				FD_PRINTF(LOG_DEBUG, fd,
598 				    "hit threshold of %u routes, delay update,"
599 				    "bucket: %u, total delay: %u",
600 				    threshold_rate, bucket_id + 1, new_delay);
601 			} else {
602 				new_delay = 0;
603 				FD_PRINTF(LOG_DEBUG, fd,
604 				    "maximum sync delay (%u ms) reached", max_delay_ms);
605 			}
606 		} else if ((bucket_id == 0) && (fd_ss->bucket_changes == 1))
607 			new_delay = bucket_time_ms;
608 	} else {
609 		fd_ss->bucket_id = bucket_id;
610 		fd_ss->bucket_changes = 1;
611 	}
612 
613 	if (new_delay > 0) {
614 		/* Calculated time has been updated */
615 		struct timeval new_tv = fd_ss->diverge_time;
616 		add_tv_diff_ms(&new_tv, new_delay);
617 
618 		int32_t delay_ms = get_tv_diff_ms(&tv, &new_tv);
619 		schedule_callout(fd, FDA_REBUILD, delay_ms);
620 	}
621 }
622 
623 static void
624 update_algo_state(struct fib_data *fd)
625 {
626 
627 	RIB_WLOCK_ASSERT(fd->fd_rh);
628 
629 	if (fd->fd_need_rebuild) {
630 		update_rebuild_delay(fd);
631 		return;
632 	}
633 
634 	if (fd->fd_num_changes++ == 0) {
635 		/* Start callout to consider switch */
636 		if (!callout_pending(&fd->fd_callout))
637 			schedule_callout(fd, FDA_EVAL, ALGO_EVAL_DELAY_MS);
638 	} else if (fd->fd_num_changes == ALGO_EVAL_NUM_ROUTES) {
639 		/* Reset callout to exec immediately */
640 		if (fd->fd_callout_action == FDA_EVAL)
641 			schedule_callout(fd, FDA_EVAL, 1);
642 	}
643 }
644 
645 static bool
646 need_immediate_sync(struct fib_data *fd, struct rib_cmd_info *rc)
647 {
648 	struct nhop_object *nh;
649 
650 	/* Sync addition/removal of interface routes */
651 	switch (rc->rc_cmd) {
652 	case RTM_ADD:
653 		nh = rc->rc_nh_new;
654 		if (!NH_IS_NHGRP(nh) && (!(nh->nh_flags & NHF_GATEWAY)))
655 			return (true);
656 		break;
657 	case RTM_DELETE:
658 		nh = rc->rc_nh_old;
659 		if (!NH_IS_NHGRP(nh) && (!(nh->nh_flags & NHF_GATEWAY)))
660 			return (true);
661 		break;
662 	}
663 
664 	return (false);
665 }
666 
667 /*
668  * Rib subscription handler. Checks if the algorithm is ready to
669  *  receive updates, handles nexthop refcounting and passes change
670  *  data to the algorithm callback.
671  */
672 static void
673 handle_rtable_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
674     void *_data)
675 {
676 	struct fib_data *fd = (struct fib_data *)_data;
677 	enum flm_op_result result;
678 
679 	RIB_WLOCK_ASSERT(rnh);
680 
681 	/*
682 	 * There is a small gap between subscribing for route changes
683 	 *  and initiating rtable dump. Avoid receiving route changes
684 	 *  prior to finishing rtable dump by checking `init_done`.
685 	 */
686 	if (!fd->init_done)
687 		return;
688 
689 	bool immediate_sync = need_immediate_sync(fd, rc);
690 
691 	/* Consider scheduling algorithm re-evaluation */
692 	update_algo_state(fd);
693 
694 	/*
695 	 * If algo requested rebuild, stop sending updates by default.
696 	 * This simplifies nexthop refcount handling logic.
697 	 */
698 	if (fd->fd_need_rebuild)
699 		return;
700 
701 	/*
702 	 * Maintain guarantee that every nexthop returned by the dataplane
703 	 *  lookup has > 0 refcount, so can be safely referenced within current
704 	 *  epoch.
705 	 */
706 	if (rc->rc_nh_new != NULL) {
707 		if (fib_ref_nhop(fd, rc->rc_nh_new) == 0) {
708 			/* ran out of indexes */
709 			schedule_fd_rebuild(fd, "ran out of nhop indexes");
710 			return;
711 		}
712 	}
713 
714 	result = fd->fd_flm->flm_change_rib_item_cb(rnh, rc, fd->fd_algo_data);
715 
716 	switch (result) {
717 	case FLM_SUCCESS:
718 		/* Unref old nexthop on success */
719 		if (rc->rc_nh_old != NULL)
720 			fib_unref_nhop(fd, rc->rc_nh_old);
721 		break;
722 	case FLM_REBUILD:
723 
724 		/*
725 		 * Algo is not able to apply the update.
726 		 * Schedule algo rebuild.
727 		 */
728 		if (!immediate_sync) {
729 			mark_diverge_time(fd);
730 			schedule_fd_rebuild(fd, "algo requested rebuild");
731 			break;
732 		}
733 
734 		FD_PRINTF(LOG_INFO, fd, "running sync rebuild");
735 		rebuild_fd(fd, "rtable change type enforced sync");
736 		break;
737 	case FLM_ERROR:
738 
739 		/*
740 		 * Algo reported a non-recoverable error.
741 		 * Record the error and schedule rebuild, which will
742 		 *  trigger best algo selection.
743 		 */
744 		FD_PRINTF(LOG_ERR, fd, "algo reported non-recoverable error");
745 		if (!flm_error_add(fd->fd_flm, fd->fd_fibnum))
746 			FD_PRINTF(LOG_ERR, fd, "failed to ban algo");
747 		schedule_fd_rebuild(fd, "algo reported non-recoverable error");
748 	}
749 }
750 
751 static void
752 estimate_nhop_scale(const struct fib_data *old_fd, struct fib_data *fd)
753 {
754 
755 	if (old_fd == NULL) {
756 		// TODO: read from rtable
757 		fd->number_nhops = 16;
758 		return;
759 	}
760 
761 	if (old_fd->hit_nhops && old_fd->number_nhops < FIB_MAX_NHOPS)
762 		fd->number_nhops = 2 * old_fd->number_nhops;
763 	else
764 		fd->number_nhops = old_fd->number_nhops;
765 }
766 
767 struct walk_cbdata {
768 	struct fib_data		*fd;
769 	flm_dump_t		*func;
770 	enum flm_op_result	result;
771 };
772 
773 /*
774  * Handler called after all rtenties have been dumped.
775  * Performs post-dump framework checks and calls
776  * algo:flm_dump_end_cb().
777  *
778  * Updates walk_cbdata result.
779  */
780 static void
781 sync_algo_end_cb(struct rib_head *rnh, enum rib_walk_hook stage, void *_data)
782 {
783 	struct walk_cbdata *w = (struct walk_cbdata *)_data;
784 	struct fib_data *fd = w->fd;
785 
786 	RIB_WLOCK_ASSERT(w->fd->fd_rh);
787 
788 	if (rnh->rib_dying) {
789 		w->result = FLM_ERROR;
790 		return;
791 	}
792 
793 	if (fd->hit_nhops) {
794 		FD_PRINTF(LOG_INFO, fd, "ran out of nexthops at %u nhops",
795 		    fd->nh_ref_table->count);
796 		if (w->result == FLM_SUCCESS)
797 			w->result = FLM_REBUILD;
798 		return;
799 	}
800 
801 	if (stage != RIB_WALK_HOOK_POST || w->result != FLM_SUCCESS)
802 		return;
803 
804 	/* Post-dump hook, dump successful */
805 	w->result = fd->fd_flm->flm_dump_end_cb(fd->fd_algo_data, &fd->fd_dp);
806 
807 	if (w->result == FLM_SUCCESS) {
808 		/* Mark init as done to allow routing updates */
809 		fd->init_done = 1;
810 	}
811 }
812 
813 /*
814  * Callback for each entry in rib.
815  * Calls algo:flm_dump_rib_item_cb func as a part of initial
816  *  route table synchronisation.
817  */
818 static int
819 sync_algo_cb(struct rtentry *rt, void *_data)
820 {
821 	struct walk_cbdata *w = (struct walk_cbdata *)_data;
822 
823 	RIB_WLOCK_ASSERT(w->fd->fd_rh);
824 
825 	if (w->result == FLM_SUCCESS && w->func) {
826 
827 		/*
828 		 * Reference nexthops to maintain guarantee that
829 		 *  each nexthop returned by datapath has > 0 references
830 		 *  and can be safely referenced within current epoch.
831 		 */
832 		struct nhop_object *nh = rt_get_raw_nhop(rt);
833 		if (fib_ref_nhop(w->fd, nh) != 0)
834 			w->result = w->func(rt, w->fd->fd_algo_data);
835 		else
836 			w->result = FLM_REBUILD;
837 	}
838 
839 	return (0);
840 }
841 
842 /*
843  * Dump all routing table state to the algo instance.
844  */
845 static enum flm_op_result
846 sync_algo(struct fib_data *fd)
847 {
848 	struct walk_cbdata w = {
849 		.fd = fd,
850 		.func = fd->fd_flm->flm_dump_rib_item_cb,
851 		.result = FLM_SUCCESS,
852 	};
853 
854 	rib_walk_ext_locked(fd->fd_rh, sync_algo_cb, sync_algo_end_cb, &w);
855 
856 	FD_PRINTF(LOG_INFO, fd,
857 	    "initial dump completed (rtable version: %d), result: %s",
858 	    fd->fd_rh->rnh_gen, print_op_result(w.result));
859 
860 	return (w.result);
861 }
862 
863 /*
864  * Schedules epoch-backed @fd instance deletion.
865  * * Unlinks @fd from the list of active algo instances.
866  * * Removes rib subscription.
867  * * Stops callout.
868  * * Schedules actual deletion.
869  *
870  * Assume @fd is already unlinked from the datapath.
871  */
872 static int
873 schedule_destroy_fd_instance(struct fib_data *fd, bool in_callout)
874 {
875 	bool is_dead;
876 
877 	NET_EPOCH_ASSERT();
878 	RIB_WLOCK_ASSERT(fd->fd_rh);
879 
880 	FIB_MOD_LOCK();
881 	is_dead = fd->fd_dead;
882 	if (!is_dead)
883 		fd->fd_dead = true;
884 	if (fd->fd_linked) {
885 		TAILQ_REMOVE(&V_fib_data_list, fd, entries);
886 		fd->fd_linked = false;
887 	}
888 	FIB_MOD_UNLOCK();
889 	if (is_dead)
890 		return (0);
891 
892 	FD_PRINTF(LOG_INFO, fd, "DETACH");
893 
894 	if (fd->fd_rs != NULL)
895 		rib_unsibscribe_locked(fd->fd_rs);
896 
897 	/*
898 	 * After rib_unsubscribe() no _new_ handle_rtable_change_cb() calls
899 	 * will be executed, hence no _new_ callout schedules will happen.
900 	 */
901 	callout_stop(&fd->fd_callout);
902 
903 	epoch_call(net_epoch_preempt, destroy_fd_instance_epoch,
904 	    &fd->fd_epoch_ctx);
905 
906 	return (0);
907 }
908 
909 /*
910  * Wipe all fd instances from the list matching rib specified by @rh.
911  * If @keep_first is set, remove all but the first record.
912  */
913 static void
914 fib_cleanup_algo(struct rib_head *rh, bool keep_first, bool in_callout)
915 {
916 	struct fib_data_head tmp_head = TAILQ_HEAD_INITIALIZER(tmp_head);
917 	struct fib_data *fd, *fd_tmp;
918 	struct epoch_tracker et;
919 
920 	FIB_MOD_LOCK();
921 	TAILQ_FOREACH_SAFE(fd, &V_fib_data_list, entries, fd_tmp) {
922 		if (fd->fd_rh == rh) {
923 			if (keep_first) {
924 				keep_first = false;
925 				continue;
926 			}
927 			TAILQ_REMOVE(&V_fib_data_list, fd, entries);
928 			fd->fd_linked = false;
929 			TAILQ_INSERT_TAIL(&tmp_head, fd, entries);
930 		}
931 	}
932 	FIB_MOD_UNLOCK();
933 
934 	/* Pass 2: remove each entry */
935 	NET_EPOCH_ENTER(et);
936 	TAILQ_FOREACH_SAFE(fd, &tmp_head, entries, fd_tmp) {
937 		if (!in_callout)
938 			RIB_WLOCK(fd->fd_rh);
939 		schedule_destroy_fd_instance(fd, in_callout);
940 		if (!in_callout)
941 			RIB_WUNLOCK(fd->fd_rh);
942 	}
943 	NET_EPOCH_EXIT(et);
944 }
945 
946 void
947 fib_destroy_rib(struct rib_head *rh)
948 {
949 
950 	/*
951 	 * rnh has `is_dying` flag set, so setup of new fd's will fail at
952 	 *  sync_algo() stage, preventing new entries to be added to the list
953 	 *  of active algos. Remove all existing entries for the particular rib.
954 	 */
955 	fib_cleanup_algo(rh, false, false);
956 }
957 
958 /*
959  * Finalises fd destruction by freeing all fd resources.
960  */
961 static void
962 destroy_fd_instance(struct fib_data *fd)
963 {
964 
965 	FD_PRINTF(LOG_INFO, fd, "destroy fd %p", fd);
966 
967 	/* Call destroy callback first */
968 	if (fd->fd_algo_data != NULL)
969 		fd->fd_flm->flm_destroy_cb(fd->fd_algo_data);
970 
971 	/* Nhop table */
972 	if ((fd->nh_idx != NULL) && (fd->nh_ref_table != NULL)) {
973 		for (int i = 0; i < fd->number_nhops; i++) {
974 			if (!is_idx_free(fd, i)) {
975 				FD_PRINTF(LOG_DEBUG2, fd, " FREE nhop %d %p",
976 				    i, fd->nh_idx[i]);
977 				nhop_free_any(fd->nh_idx[i]);
978 			}
979 		}
980 		free(fd->nh_idx, M_RTABLE);
981 	}
982 	if (fd->nh_ref_table != NULL)
983 		free(fd->nh_ref_table, M_RTABLE);
984 
985 	fib_unref_algo(fd->fd_flm);
986 
987 	free(fd, M_RTABLE);
988 }
989 
990 /*
991  * Epoch callback indicating fd is safe to destroy
992  */
993 static void
994 destroy_fd_instance_epoch(epoch_context_t ctx)
995 {
996 	struct fib_data *fd;
997 
998 	fd = __containerof(ctx, struct fib_data, fd_epoch_ctx);
999 
1000 	destroy_fd_instance(fd);
1001 }
1002 
1003 /*
1004  * Tries to setup fd instance.
1005  * - Allocates fd/nhop table
1006  * - Runs algo:flm_init_cb algo init
1007  * - Subscribes fd to the rib
1008  * - Runs rtable dump
1009  * - Adds instance to the list of active instances.
1010  *
1011  * Returns: operation result. Fills in @pfd with resulting fd on success.
1012  *
1013  */
1014 static enum flm_op_result
1015 try_setup_fd_instance(struct fib_lookup_module *flm, struct rib_head *rh,
1016     struct fib_data *old_fd, struct fib_data **pfd)
1017 {
1018 	struct fib_data *fd;
1019 	size_t size;
1020 	enum flm_op_result result;
1021 
1022 	/* Allocate */
1023 	fd = malloc(sizeof(struct fib_data), M_RTABLE, M_NOWAIT | M_ZERO);
1024 	if (fd == NULL)  {
1025 		*pfd = NULL;
1026 		RH_PRINTF(LOG_INFO, rh, "Unable to allocate fib_data structure");
1027 		return (FLM_REBUILD);
1028 	}
1029 	*pfd = fd;
1030 
1031 	estimate_nhop_scale(old_fd, fd);
1032 
1033 	fd->fd_rh = rh;
1034 	fd->fd_gen = ++fib_gen;
1035 	fd->fd_family = rh->rib_family;
1036 	fd->fd_fibnum = rh->rib_fibnum;
1037 	callout_init_rm(&fd->fd_callout, &rh->rib_lock, 0);
1038 	fd->fd_vnet = curvnet;
1039 	fd->fd_flm = flm;
1040 
1041 	FD_PRINTF(LOG_DEBUG, fd, "allocated fd %p", fd);
1042 
1043 	FIB_MOD_LOCK();
1044 	flm->flm_refcount++;
1045 	FIB_MOD_UNLOCK();
1046 
1047 	/* Allocate nhidx -> nhop_ptr table */
1048 	size = fd->number_nhops * sizeof(void *);
1049 	fd->nh_idx = malloc(size, M_RTABLE, M_NOWAIT | M_ZERO);
1050 	if (fd->nh_idx == NULL) {
1051 		FD_PRINTF(LOG_INFO, fd, "Unable to allocate nhop table idx (sz:%zu)", size);
1052 		return (FLM_REBUILD);
1053 	}
1054 
1055 	/* Allocate nhop index refcount table */
1056 	size = sizeof(struct nhop_ref_table);
1057 	size += fd->number_nhops * sizeof(uint32_t);
1058 	fd->nh_ref_table = malloc(size, M_RTABLE, M_NOWAIT | M_ZERO);
1059 	if (fd->nh_ref_table == NULL) {
1060 		FD_PRINTF(LOG_INFO, fd, "Unable to allocate nhop refcount table (sz:%zu)", size);
1061 		return (FLM_REBUILD);
1062 	}
1063 	FD_PRINTF(LOG_DEBUG, fd, "Allocated %u nhop indexes", fd->number_nhops);
1064 
1065 	/* Okay, we're ready for algo init */
1066 	void *old_algo_data = (old_fd != NULL) ? old_fd->fd_algo_data : NULL;
1067 	result = flm->flm_init_cb(fd->fd_fibnum, fd, old_algo_data, &fd->fd_algo_data);
1068 	if (result != FLM_SUCCESS) {
1069 		FD_PRINTF(LOG_INFO, fd, "%s algo init failed", flm->flm_name);
1070 		return (result);
1071 	}
1072 
1073 	/* Try to subscribe */
1074 	if (flm->flm_change_rib_item_cb != NULL) {
1075 		fd->fd_rs = rib_subscribe_locked(fd->fd_rh,
1076 		    handle_rtable_change_cb, fd, RIB_NOTIFY_IMMEDIATE);
1077 		if (fd->fd_rs == NULL) {
1078 			FD_PRINTF(LOG_INFO, fd, "failed to subscribe to the rib changes");
1079 			return (FLM_REBUILD);
1080 		}
1081 	}
1082 
1083 	/* Dump */
1084 	result = sync_algo(fd);
1085 	if (result != FLM_SUCCESS) {
1086 		FD_PRINTF(LOG_INFO, fd, "rib sync failed");
1087 		return (result);
1088 	}
1089 	FD_PRINTF(LOG_INFO, fd, "DUMP completed successfully.");
1090 
1091 	FIB_MOD_LOCK();
1092 	/*
1093 	 * Insert fd in the beginning of a list, to maintain invariant
1094 	 *  that first matching entry for the AF/fib is always the active
1095 	 *  one.
1096 	 */
1097 	TAILQ_INSERT_HEAD(&V_fib_data_list, fd, entries);
1098 	fd->fd_linked = true;
1099 	FIB_MOD_UNLOCK();
1100 
1101 	return (FLM_SUCCESS);
1102 }
1103 
1104 /*
1105  * Sets up algo @flm for table @rh and links it to the datapath.
1106  *
1107  */
1108 static enum flm_op_result
1109 setup_fd_instance(struct fib_lookup_module *flm, struct rib_head *rh,
1110     struct fib_data *orig_fd, struct fib_data **pfd, bool attach)
1111 {
1112 	struct fib_data *prev_fd, *new_fd;
1113 	enum flm_op_result result;
1114 
1115 	NET_EPOCH_ASSERT();
1116 	RIB_WLOCK_ASSERT(rh);
1117 
1118 	prev_fd = orig_fd;
1119 	new_fd = NULL;
1120 	for (int i = 0; i < FIB_MAX_TRIES; i++) {
1121 		result = try_setup_fd_instance(flm, rh, prev_fd, &new_fd);
1122 
1123 		if ((result == FLM_SUCCESS) && attach)
1124 			result = attach_datapath(new_fd);
1125 
1126 		if ((prev_fd != NULL) && (prev_fd != orig_fd)) {
1127 			schedule_destroy_fd_instance(prev_fd, false);
1128 			prev_fd = NULL;
1129 		}
1130 
1131 		RH_PRINTF(LOG_INFO, rh, "try %d: fib algo result: %s", i,
1132 		    print_op_result(result));
1133 
1134 		if (result == FLM_REBUILD) {
1135 			prev_fd = new_fd;
1136 			new_fd = NULL;
1137 			continue;
1138 		}
1139 
1140 		break;
1141 	}
1142 
1143 	if (result != FLM_SUCCESS) {
1144 		RH_PRINTF(LOG_WARNING, rh,
1145 		    "%s algo instance setup failed, failures=%d", flm->flm_name,
1146 		    orig_fd ? orig_fd->fd_failed_rebuilds + 1 : 0);
1147 		/* update failure count */
1148 		FIB_MOD_LOCK();
1149 		if (orig_fd != NULL)
1150 			orig_fd->fd_failed_rebuilds++;
1151 		FIB_MOD_UNLOCK();
1152 
1153 		/* Ban algo on non-recoverable error */
1154 		if (result == FLM_ERROR)
1155 			flm_error_add(flm, rh->rib_fibnum);
1156 
1157 		if ((prev_fd != NULL) && (prev_fd != orig_fd))
1158 			schedule_destroy_fd_instance(prev_fd, false);
1159 		if (new_fd != NULL) {
1160 			schedule_destroy_fd_instance(new_fd, false);
1161 			new_fd = NULL;
1162 		}
1163 	}
1164 
1165 	*pfd = new_fd;
1166 	return (result);
1167 }
1168 
1169 /*
1170  * Tries to sync algo with the current rtable state, either
1171  * by executing batch update or rebuilding.
1172  * Returns true on success.
1173  */
1174 static bool
1175 execute_callout_action(struct fib_data *fd)
1176 {
1177 	enum fib_callout_action action = fd->fd_callout_action;
1178 	struct fib_lookup_module *flm_new = NULL;
1179 	bool result = true;
1180 
1181 	NET_EPOCH_ASSERT();
1182 	RIB_WLOCK_ASSERT(fd->fd_rh);
1183 
1184 	fd->fd_need_rebuild = false;
1185 	fd->fd_num_changes = 0;
1186 
1187 	/* First, check if we're still OK to use this algo */
1188 	if (!is_algo_fixed(fd->fd_rh))
1189 		flm_new = fib_check_best_algo(fd->fd_rh, fd->fd_flm);
1190 	if (flm_new != NULL)
1191 		action = FDA_REBUILD;
1192 
1193 	if (action == FDA_REBUILD)
1194 		result = rebuild_fd_flm(fd, flm_new != NULL ? flm_new : fd->fd_flm);
1195 	if (flm_new != NULL)
1196 		fib_unref_algo(flm_new);
1197 
1198 	return (result);
1199 }
1200 
1201 /*
1202  * Callout for all scheduled fd-related work.
1203  * - Checks if the current algo is still the best algo
1204  * - Creates a new instance of an algo for af/fib if desired.
1205  */
1206 static void
1207 handle_fd_callout(void *_data)
1208 {
1209 	struct fib_data *fd = (struct fib_data *)_data;
1210 	struct epoch_tracker et;
1211 
1212 	FD_PRINTF(LOG_INFO, fd, "running callout type=%d", fd->fd_callout_action);
1213 
1214 	NET_EPOCH_ENTER(et);
1215 	CURVNET_SET(fd->fd_vnet);
1216 	execute_callout_action(fd);
1217 	CURVNET_RESTORE();
1218 	NET_EPOCH_EXIT(et);
1219 }
1220 
1221 /*
1222  * Tries to create new algo instance based on @fd data.
1223  * Returns true on success.
1224  */
1225 static bool
1226 rebuild_fd_flm(struct fib_data *fd, struct fib_lookup_module *flm_new)
1227 {
1228 	struct fib_data *fd_new, *fd_tmp = NULL;
1229 	bool result;
1230 
1231 	if (flm_new == fd->fd_flm)
1232 		fd_tmp = fd;
1233 	else
1234 		FD_PRINTF(LOG_NOTICE, fd, "switching algo to %s", flm_new->flm_name);
1235 
1236 	result = setup_fd_instance(flm_new, fd->fd_rh, fd_tmp, &fd_new, true);
1237 	if (result != FLM_SUCCESS) {
1238 		FD_PRINTF(LOG_NOTICE, fd, "table rebuild failed");
1239 		return (false);
1240 	}
1241 	FD_PRINTF(LOG_INFO, fd_new, "switched to new instance");
1242 
1243 	/* Remove old instance */
1244 	schedule_destroy_fd_instance(fd, true);
1245 
1246 	return (true);
1247 }
1248 
1249 static bool
1250 rebuild_fd(struct fib_data *fd, const char *reason)
1251 {
1252 	struct fib_lookup_module *flm_new = NULL;
1253 	bool result;
1254 
1255 	if (!is_algo_fixed(fd->fd_rh))
1256 		flm_new = fib_check_best_algo(fd->fd_rh, fd->fd_flm);
1257 
1258 	FD_PRINTF(LOG_INFO, fd, "running sync rebuild: %s", reason);
1259 	result = rebuild_fd_flm(fd, flm_new != NULL ? flm_new : fd->fd_flm);
1260 	if (flm_new != NULL)
1261 		fib_unref_algo(flm_new);
1262 
1263 	if (!result) {
1264 		FD_PRINTF(LOG_ERR, fd, "sync rebuild failed");
1265 		schedule_fd_rebuild(fd, "sync rebuild failed");
1266 	}
1267 
1268 	return (result);
1269 }
1270 
1271 /*
1272  * Finds algo by name/family.
1273  * Returns referenced algo or NULL.
1274  */
1275 static struct fib_lookup_module *
1276 fib_find_algo(const char *algo_name, int family)
1277 {
1278 	struct fib_lookup_module *flm;
1279 
1280 	FIB_MOD_LOCK();
1281 	TAILQ_FOREACH(flm, &all_algo_list, entries) {
1282 		if ((strcmp(flm->flm_name, algo_name) == 0) &&
1283 		    (family == flm->flm_family)) {
1284 			flm->flm_refcount++;
1285 			FIB_MOD_UNLOCK();
1286 			return (flm);
1287 		}
1288 	}
1289 	FIB_MOD_UNLOCK();
1290 
1291 	return (NULL);
1292 }
1293 
1294 static void
1295 fib_unref_algo(struct fib_lookup_module *flm)
1296 {
1297 
1298 	FIB_MOD_LOCK();
1299 	flm->flm_refcount--;
1300 	FIB_MOD_UNLOCK();
1301 }
1302 
1303 static int
1304 set_fib_algo(uint32_t fibnum, int family, struct sysctl_oid *oidp, struct sysctl_req *req)
1305 {
1306 	struct fib_lookup_module *flm = NULL;
1307 	struct fib_data *fd = NULL;
1308 	char old_algo_name[32], algo_name[32];
1309 	struct rib_head *rh = NULL;
1310 	enum flm_op_result result;
1311 	struct epoch_tracker et;
1312 	int error;
1313 
1314 	/* Fetch current algo/rib for af/family */
1315 	FIB_MOD_LOCK();
1316 	TAILQ_FOREACH(fd, &V_fib_data_list, entries) {
1317 		if ((fd->fd_family == family) && (fd->fd_fibnum == fibnum))
1318 			break;
1319 	}
1320 	if (fd == NULL) {
1321 		FIB_MOD_UNLOCK();
1322 		return (ENOENT);
1323 	}
1324 	rh = fd->fd_rh;
1325 	strlcpy(old_algo_name, fd->fd_flm->flm_name,
1326 	    sizeof(old_algo_name));
1327 	FIB_MOD_UNLOCK();
1328 
1329 	strlcpy(algo_name, old_algo_name, sizeof(algo_name));
1330 	error = sysctl_handle_string(oidp, algo_name, sizeof(algo_name), req);
1331 	if (error != 0 || req->newptr == NULL)
1332 		return (error);
1333 
1334 	if (strcmp(algo_name, old_algo_name) == 0)
1335 		return (0);
1336 
1337 	/* New algorithm name is different */
1338 	flm = fib_find_algo(algo_name, family);
1339 	if (flm == NULL) {
1340 		RH_PRINTF(LOG_INFO, rh, "unable to find algo %s", algo_name);
1341 		return (ESRCH);
1342 	}
1343 
1344 	fd = NULL;
1345 	NET_EPOCH_ENTER(et);
1346 	RIB_WLOCK(rh);
1347 	result = setup_fd_instance(flm, rh, NULL, &fd, true);
1348 	RIB_WUNLOCK(rh);
1349 	NET_EPOCH_EXIT(et);
1350 	fib_unref_algo(flm);
1351 	if (result != FLM_SUCCESS)
1352 		return (EINVAL);
1353 
1354 	/* Disable automated jumping between algos */
1355 	FIB_MOD_LOCK();
1356 	set_algo_fixed(rh);
1357 	FIB_MOD_UNLOCK();
1358 	/* Remove old instance(s) */
1359 	fib_cleanup_algo(rh, true, false);
1360 
1361 	/* Drain cb so user can unload the module after userret if so desired */
1362 	epoch_drain_callbacks(net_epoch_preempt);
1363 
1364 	return (0);
1365 }
1366 
1367 #ifdef INET
1368 static int
1369 set_algo_inet_sysctl_handler(SYSCTL_HANDLER_ARGS)
1370 {
1371 
1372 	return (set_fib_algo(curthread->td_proc->p_fibnum, AF_INET, oidp, req));
1373 }
1374 SYSCTL_PROC(_net_route_algo_inet, OID_AUTO, algo,
1375     CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
1376     set_algo_inet_sysctl_handler, "A", "Set IPv4 lookup algo");
1377 #endif
1378 
1379 #ifdef INET6
1380 static int
1381 set_algo_inet6_sysctl_handler(SYSCTL_HANDLER_ARGS)
1382 {
1383 
1384 	return (set_fib_algo(curthread->td_proc->p_fibnum, AF_INET6, oidp, req));
1385 }
1386 SYSCTL_PROC(_net_route_algo_inet6, OID_AUTO, algo,
1387     CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
1388     set_algo_inet6_sysctl_handler, "A", "Set IPv6 lookup algo");
1389 #endif
1390 
1391 static void
1392 destroy_fdh_epoch(epoch_context_t ctx)
1393 {
1394 	struct fib_dp_header *fdh;
1395 
1396 	fdh = __containerof(ctx, struct fib_dp_header, fdh_epoch_ctx);
1397 	free(fdh, M_RTABLE);
1398 }
1399 
1400 static struct fib_dp_header *
1401 alloc_fib_dp_array(uint32_t num_tables, bool waitok)
1402 {
1403 	size_t sz;
1404 	struct fib_dp_header *fdh;
1405 
1406 	sz = sizeof(struct fib_dp_header);
1407 	sz += sizeof(struct fib_dp) * num_tables;
1408 	fdh = malloc(sz, M_RTABLE, (waitok ? M_WAITOK : M_NOWAIT) | M_ZERO);
1409 	if (fdh != NULL)
1410 		fdh->fdh_num_tables = num_tables;
1411 	return (fdh);
1412 }
1413 
1414 static struct fib_dp_header *
1415 get_fib_dp_header(struct fib_dp *dp)
1416 {
1417 
1418 	return (__containerof((void *)dp, struct fib_dp_header, fdh_idx));
1419 }
1420 
1421 /*
1422  * Replace per-family index pool @pdp with a new one which
1423  * contains updated callback/algo data from @fd.
1424  * Returns 0 on success.
1425  */
1426 static enum flm_op_result
1427 replace_rtables_family(struct fib_dp **pdp, struct fib_data *fd)
1428 {
1429 	struct fib_dp_header *new_fdh, *old_fdh;
1430 
1431 	NET_EPOCH_ASSERT();
1432 
1433 	FD_PRINTF(LOG_DEBUG, fd, "[vnet %p] replace with f:%p arg:%p",
1434 	    curvnet, fd->fd_dp.f, fd->fd_dp.arg);
1435 
1436 	FIB_MOD_LOCK();
1437 	old_fdh = get_fib_dp_header(*pdp);
1438 	new_fdh = alloc_fib_dp_array(old_fdh->fdh_num_tables, false);
1439 	FD_PRINTF(LOG_DEBUG, fd, "OLD FDH: %p NEW FDH: %p", old_fdh, new_fdh);
1440 	if (new_fdh == NULL) {
1441 		FIB_MOD_UNLOCK();
1442 		FD_PRINTF(LOG_WARNING, fd, "error attaching datapath");
1443 		return (FLM_REBUILD);
1444 	}
1445 
1446 	memcpy(&new_fdh->fdh_idx[0], &old_fdh->fdh_idx[0],
1447 	    old_fdh->fdh_num_tables * sizeof(struct fib_dp));
1448 	/* Update relevant data structure for @fd */
1449 	new_fdh->fdh_idx[fd->fd_fibnum] = fd->fd_dp;
1450 
1451 	/* Ensure memcpy() writes have completed */
1452 	atomic_thread_fence_rel();
1453 	/* Set new datapath pointer */
1454 	*pdp = &new_fdh->fdh_idx[0];
1455 	FIB_MOD_UNLOCK();
1456 	FD_PRINTF(LOG_DEBUG, fd, "update %p -> %p", old_fdh, new_fdh);
1457 
1458 	epoch_call(net_epoch_preempt, destroy_fdh_epoch,
1459 	    &old_fdh->fdh_epoch_ctx);
1460 
1461 	return (FLM_SUCCESS);
1462 }
1463 
1464 static struct fib_dp **
1465 get_family_dp_ptr(int family)
1466 {
1467 	switch (family) {
1468 	case AF_INET:
1469 		return (&V_inet_dp);
1470 	case AF_INET6:
1471 		return (&V_inet6_dp);
1472 	}
1473 	return (NULL);
1474 }
1475 
1476 /*
1477  * Make datapath use fib instance @fd
1478  */
1479 static enum flm_op_result
1480 attach_datapath(struct fib_data *fd)
1481 {
1482 	struct fib_dp **pdp;
1483 
1484 	pdp = get_family_dp_ptr(fd->fd_family);
1485 	return (replace_rtables_family(pdp, fd));
1486 }
1487 
1488 /*
1489  * Grow datapath pointers array.
1490  * Called from sysctl handler on growing number of routing tables.
1491  */
1492 static void
1493 grow_rtables_family(struct fib_dp **pdp, uint32_t new_num_tables)
1494 {
1495 	struct fib_dp_header *new_fdh, *old_fdh = NULL;
1496 
1497 	new_fdh = alloc_fib_dp_array(new_num_tables, true);
1498 
1499 	FIB_MOD_LOCK();
1500 	if (*pdp != NULL) {
1501 		old_fdh = get_fib_dp_header(*pdp);
1502 		memcpy(&new_fdh->fdh_idx[0], &old_fdh->fdh_idx[0],
1503 		    old_fdh->fdh_num_tables * sizeof(struct fib_dp));
1504 	}
1505 
1506 	/* Wait till all writes completed */
1507 	atomic_thread_fence_rel();
1508 
1509 	*pdp = &new_fdh->fdh_idx[0];
1510 	FIB_MOD_UNLOCK();
1511 
1512 	if (old_fdh != NULL)
1513 		epoch_call(net_epoch_preempt, destroy_fdh_epoch,
1514 		    &old_fdh->fdh_epoch_ctx);
1515 }
1516 
1517 /*
1518  * Grows per-AF arrays of datapath pointers for each supported family.
1519  * Called from fibs resize sysctl handler.
1520  */
1521 void
1522 fib_grow_rtables(uint32_t new_num_tables)
1523 {
1524 
1525 #ifdef INET
1526 	grow_rtables_family(get_family_dp_ptr(AF_INET), new_num_tables);
1527 #endif
1528 #ifdef INET6
1529 	grow_rtables_family(get_family_dp_ptr(AF_INET6), new_num_tables);
1530 #endif
1531 }
1532 
1533 void
1534 fib_get_rtable_info(struct rib_head *rh, struct rib_rtable_info *rinfo)
1535 {
1536 
1537 	bzero(rinfo, sizeof(struct rib_rtable_info));
1538 	rinfo->num_prefixes = rh->rnh_prefixes;
1539 	rinfo->num_nhops = nhops_get_count(rh);
1540 #ifdef ROUTE_MPATH
1541 	rinfo->num_nhgrp = nhgrp_get_count(rh);
1542 #endif
1543 }
1544 
1545 /*
1546  * Accessor to get rib instance @fd is attached to.
1547  */
1548 struct rib_head *
1549 fib_get_rh(struct fib_data *fd)
1550 {
1551 
1552 	return (fd->fd_rh);
1553 }
1554 
1555 /*
1556  * Accessor to export idx->nhop array
1557  */
1558 struct nhop_object **
1559 fib_get_nhop_array(struct fib_data *fd)
1560 {
1561 
1562 	return (fd->nh_idx);
1563 }
1564 
1565 static uint32_t
1566 get_nhop_idx(struct nhop_object *nh)
1567 {
1568 #ifdef ROUTE_MPATH
1569 	if (NH_IS_NHGRP(nh))
1570 		return (nhgrp_get_idx((struct nhgrp_object *)nh) * 2 - 1);
1571 	else
1572 		return (nhop_get_idx(nh) * 2);
1573 #else
1574 	return (nhop_get_idx(nh));
1575 #endif
1576 }
1577 
1578 uint32_t
1579 fib_get_nhop_idx(struct fib_data *fd, struct nhop_object *nh)
1580 {
1581 
1582 	return (get_nhop_idx(nh));
1583 }
1584 
1585 static bool
1586 is_idx_free(struct fib_data *fd, uint32_t index)
1587 {
1588 
1589 	return (fd->nh_ref_table->refcnt[index] == 0);
1590 }
1591 
1592 static uint32_t
1593 fib_ref_nhop(struct fib_data *fd, struct nhop_object *nh)
1594 {
1595 	uint32_t idx = get_nhop_idx(nh);
1596 
1597 	if (idx >= fd->number_nhops) {
1598 		fd->hit_nhops = 1;
1599 		return (0);
1600 	}
1601 
1602 	if (is_idx_free(fd, idx)) {
1603 		nhop_ref_any(nh);
1604 		fd->nh_idx[idx] = nh;
1605 		fd->nh_ref_table->count++;
1606 		FD_PRINTF(LOG_DEBUG2, fd, " REF nhop %u %p", idx, fd->nh_idx[idx]);
1607 	}
1608 	fd->nh_ref_table->refcnt[idx]++;
1609 
1610 	return (idx);
1611 }
1612 
1613 struct nhop_release_data {
1614 	struct nhop_object	*nh;
1615 	struct epoch_context	ctx;
1616 };
1617 
1618 static void
1619 release_nhop_epoch(epoch_context_t ctx)
1620 {
1621 	struct nhop_release_data *nrd;
1622 
1623 	nrd = __containerof(ctx, struct nhop_release_data, ctx);
1624 	nhop_free_any(nrd->nh);
1625 	free(nrd, M_TEMP);
1626 }
1627 
1628 /*
1629  * Delays nexthop refcount release.
1630  * Datapath may have the datastructures not updated yet, so the old
1631  *  nexthop may still be returned till the end of current epoch. Delay
1632  *  refcount removal, as we may be removing the last instance, which will
1633  *  trigger nexthop deletion, rendering returned nexthop invalid.
1634  */
1635 static void
1636 fib_schedule_release_nhop(struct fib_data *fd, struct nhop_object *nh)
1637 {
1638 	struct nhop_release_data *nrd;
1639 
1640 	nrd = malloc(sizeof(struct nhop_release_data), M_TEMP, M_NOWAIT | M_ZERO);
1641 	if (nrd != NULL) {
1642 		nrd->nh = nh;
1643 		epoch_call(net_epoch_preempt, release_nhop_epoch, &nrd->ctx);
1644 	} else {
1645 		/*
1646 		 * Unable to allocate memory. Leak nexthop to maintain guarantee
1647 		 *  that each nhop can be referenced.
1648 		 */
1649 		FD_PRINTF(LOG_ERR, fd, "unable to schedule nhop %p deletion", nh);
1650 	}
1651 }
1652 
1653 static void
1654 fib_unref_nhop(struct fib_data *fd, struct nhop_object *nh)
1655 {
1656 	uint32_t idx = get_nhop_idx(nh);
1657 
1658 	KASSERT((idx < fd->number_nhops), ("invalid nhop index"));
1659 	KASSERT((nh == fd->nh_idx[idx]), ("index table contains whong nh"));
1660 
1661 	fd->nh_ref_table->refcnt[idx]--;
1662 	if (fd->nh_ref_table->refcnt[idx] == 0) {
1663 		FD_PRINTF(LOG_DEBUG, fd, " FREE nhop %d %p", idx, fd->nh_idx[idx]);
1664 		fib_schedule_release_nhop(fd, fd->nh_idx[idx]);
1665 	}
1666 }
1667 
1668 static void
1669 set_algo_fixed(struct rib_head *rh)
1670 {
1671 	switch (rh->rib_family) {
1672 #ifdef INET
1673 	case AF_INET:
1674 		V_algo_fixed_inet = true;
1675 		break;
1676 #endif
1677 #ifdef INET6
1678 	case AF_INET6:
1679 		V_algo_fixed_inet6 = true;
1680 		break;
1681 #endif
1682 	}
1683 }
1684 
1685 static bool
1686 is_algo_fixed(struct rib_head *rh)
1687 {
1688 
1689 	switch (rh->rib_family) {
1690 #ifdef INET
1691 	case AF_INET:
1692 		return (V_algo_fixed_inet);
1693 #endif
1694 #ifdef INET6
1695 	case AF_INET6:
1696 		return (V_algo_fixed_inet6);
1697 #endif
1698 	}
1699 	return (false);
1700 }
1701 
1702 /*
1703  * Runs the check on what would be the best algo for rib @rh, assuming
1704  *  that the current algo is the one specified by @orig_flm. Note that
1705  *  it can be NULL for initial selection.
1706  *
1707  * Returns referenced new algo or NULL if the current one is the best.
1708  */
1709 static struct fib_lookup_module *
1710 fib_check_best_algo(struct rib_head *rh, struct fib_lookup_module *orig_flm)
1711 {
1712 	uint8_t preference, curr_preference = 0, best_preference = 0;
1713 	struct fib_lookup_module *flm, *best_flm = NULL;
1714 	struct rib_rtable_info rinfo;
1715 	int candidate_algos = 0;
1716 
1717 	fib_get_rtable_info(rh, &rinfo);
1718 
1719 	FIB_MOD_LOCK();
1720 	TAILQ_FOREACH(flm, &all_algo_list, entries) {
1721 		if (flm->flm_family != rh->rib_family)
1722 			continue;
1723 		candidate_algos++;
1724 		preference = flm->flm_get_pref(&rinfo);
1725 		if (preference > best_preference) {
1726 			if (!flm_error_check(flm, rh->rib_fibnum)) {
1727 				best_preference = preference;
1728 				best_flm = flm;
1729 			}
1730 		}
1731 		if (flm == orig_flm)
1732 			curr_preference = preference;
1733 	}
1734 	if ((best_flm != NULL) && (curr_preference + BEST_DIFF_PERCENT < best_preference))
1735 		best_flm->flm_refcount++;
1736 	else
1737 		best_flm = NULL;
1738 	FIB_MOD_UNLOCK();
1739 
1740 	RH_PRINTF(LOG_DEBUG, rh, "candidate_algos: %d, curr: %s(%d) result: %s(%d)",
1741 	    candidate_algos, orig_flm ? orig_flm->flm_name : "NULL", curr_preference,
1742 	    best_flm ? best_flm->flm_name : (orig_flm ? orig_flm->flm_name : "NULL"),
1743 	    best_preference);
1744 
1745 	return (best_flm);
1746 }
1747 
1748 /*
1749  * Called when new route table is created.
1750  * Selects, allocates and attaches fib algo for the table.
1751  */
1752 int
1753 fib_select_algo_initial(struct rib_head *rh)
1754 {
1755 	struct fib_lookup_module *flm;
1756 	struct fib_data *fd = NULL;
1757 	enum flm_op_result result;
1758 	struct epoch_tracker et;
1759 	int error = 0;
1760 
1761 	flm = fib_check_best_algo(rh, NULL);
1762 	if (flm == NULL) {
1763 		RH_PRINTF(LOG_CRIT, rh, "no algo selected");
1764 		return (ENOENT);
1765 	}
1766 	RH_PRINTF(LOG_INFO, rh, "selected algo %s", flm->flm_name);
1767 
1768 	NET_EPOCH_ENTER(et);
1769 	RIB_WLOCK(rh);
1770 	result = setup_fd_instance(flm, rh, NULL, &fd, false);
1771 	RIB_WUNLOCK(rh);
1772 	NET_EPOCH_EXIT(et);
1773 
1774 	RH_PRINTF(LOG_DEBUG, rh, "result=%d fd=%p", result, fd);
1775 	if (result == FLM_SUCCESS) {
1776 
1777 		/*
1778 		 * Attach datapath directly to avoid multiple reallocations
1779 		 * during fib growth
1780 		 */
1781 		struct fib_dp_header *fdp;
1782 		struct fib_dp **pdp;
1783 
1784 		pdp = get_family_dp_ptr(rh->rib_family);
1785 		if (pdp != NULL) {
1786 			fdp = get_fib_dp_header(*pdp);
1787 			fdp->fdh_idx[fd->fd_fibnum] = fd->fd_dp;
1788 			FD_PRINTF(LOG_INFO, fd, "datapath attached");
1789 		}
1790 	} else {
1791 		error = EINVAL;
1792 		RH_PRINTF(LOG_CRIT, rh, "unable to setup algo %s", flm->flm_name);
1793 	}
1794 
1795 	fib_unref_algo(flm);
1796 
1797 	return (error);
1798 }
1799 
1800 /*
1801  * Registers fib lookup module within the subsystem.
1802  */
1803 int
1804 fib_module_register(struct fib_lookup_module *flm)
1805 {
1806 
1807 	FIB_MOD_LOCK();
1808 	ALGO_PRINTF("attaching %s to %s", flm->flm_name,
1809 	    print_family(flm->flm_family));
1810 	TAILQ_INSERT_TAIL(&all_algo_list, flm, entries);
1811 	FIB_MOD_UNLOCK();
1812 
1813 	return (0);
1814 }
1815 
1816 /*
1817  * Tries to unregister fib lookup module.
1818  *
1819  * Returns 0 on success, EBUSY if module is still used
1820  *  by some of the tables.
1821  */
1822 int
1823 fib_module_unregister(struct fib_lookup_module *flm)
1824 {
1825 
1826 	FIB_MOD_LOCK();
1827 	if (flm->flm_refcount > 0) {
1828 		FIB_MOD_UNLOCK();
1829 		return (EBUSY);
1830 	}
1831 	fib_error_clear_flm(flm);
1832 	ALGO_PRINTF("detaching %s from %s", flm->flm_name,
1833 	    print_family(flm->flm_family));
1834 	TAILQ_REMOVE(&all_algo_list, flm, entries);
1835 	FIB_MOD_UNLOCK();
1836 
1837 	return (0);
1838 }
1839 
1840 void
1841 vnet_fib_init(void)
1842 {
1843 
1844 	TAILQ_INIT(&V_fib_data_list);
1845 }
1846 
1847 void
1848 vnet_fib_destroy(void)
1849 {
1850 
1851 	FIB_MOD_LOCK();
1852 	fib_error_clear();
1853 	FIB_MOD_UNLOCK();
1854 }
1855