xref: /freebsd/sys/netinet/in_fib_algo.c (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet.h"
31 
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/lock.h>
35 #include <sys/rmlock.h>
36 #include <sys/malloc.h>
37 #include <sys/kernel.h>
38 #include <sys/priv.h>
39 #include <sys/socket.h>
40 #include <sys/sysctl.h>
41 #include <net/vnet.h>
42 
43 #include <net/if.h>
44 #include <netinet/in.h>
45 
46 #include <net/route.h>
47 #include <net/route/nhop.h>
48 #include <net/route/route_ctl.h>
49 #include <net/route/route_var.h>
50 #include <net/route/fib_algo.h>
51 
52 /*
53  * Binary search lookup algo.
54  *
55  * Compiles route table into a sorted array.
56  * Used with small amount of routes (< 16).
57  * As array is immutable, it is rebuild on each rtable change.
58  *
59  * Example:
60  *
61  * 0.0.0.0/0 -> nh1
62  * 10.0.0.0/24 -> nh2
63  * 10.0.0.1/32 -> nh3
64  *
65  * gets compiled to:
66  *
67  * 0.0.0.0 -> nh1
68  * 10.0.0.0 -> nh2
69  * 10.0.0.1 -> nh3
70  * 10.0.0.2 -> nh2
71  * 10.0.1.0 -> nh1
72  *
73  */
74 
75 struct bsearch4_record {
76 	uint32_t		addr4;
77 	uint32_t		mask4;
78 	struct nhop_object	*nh;
79 };
80 
81 struct bsearch4_data {
82 	struct fib_data		*fd;
83 	uint32_t		alloc_items;
84 	uint32_t		num_items;
85 	void			*mem;
86 	struct bsearch4_record	*rr;
87 	struct bsearch4_record	br[0];
88 };
89 
90 /*
91  * Main IPv4 address lookup function.
92  *
93  * Finds array record with maximum index that is <= provided key.
94  * Assumes 0.0.0.0/0 always exists (may be with NULL nhop)
95  */
96 static struct nhop_object *
97 bsearch4_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
98 {
99 	const struct bsearch4_data *bd = (const struct bsearch4_data *)algo_data;
100 	const struct bsearch4_record *br;
101 	uint32_t addr4 = ntohl(key.addr4.s_addr);
102 
103 	int start = 0;
104 	int end = bd->num_items;
105 
106 	int i = (start + end) / 2;
107 	while (start + 1 < end) {
108 		i = (start + end) / 2;
109 		br = &bd->br[i];
110 		if (addr4 < br->addr4) {
111 			/* key < average, reduce right boundary */
112 			end = i;
113 			continue;
114 		} else if (addr4 > br->addr4) {
115 			/* key > average, increase left aboundary */
116 			start = i;
117 			continue;
118 		} else {
119 			/* direct match */
120 			return (br->nh);
121 		}
122 	}
123 	/* start + 1 == end */
124 	return (bd->br[start].nh);
125 }
126 
127 /*
128  * Preference function.
129  * Assume ideal for < 10 (typical single-interface setup has 5)
130  * Then gradually degrade.
131  * Assume 30 prefixes is at least 60 records, so it will require 8 lookup,
132  *  which is even worse than radix.
133  */
134 static uint8_t
135 bsearch4_get_pref(const struct rib_rtable_info *rinfo)
136 {
137 
138 	if (rinfo->num_prefixes < 10)
139 		return (253);
140 	else if (rinfo->num_prefixes < 30)
141 		return (255 - rinfo->num_prefixes * 8);
142 	else
143 		return (1);
144 }
145 
146 static enum flm_op_result
147 bsearch4_init(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **_data)
148 {
149 	struct bsearch4_data *bd;
150 	struct rib_rtable_info rinfo;
151 	uint32_t count;
152 	size_t sz;
153 	void *mem;
154 
155 	fib_get_rtable_info(fib_get_rh(fd), &rinfo);
156 	count = rinfo.num_prefixes * 11 / 10 + 64;
157 
158 	sz = sizeof(struct bsearch4_data) + sizeof(struct bsearch4_record) * count;
159 	/* add cache line sz to ease alignment */
160 	sz += CACHE_LINE_SIZE;
161 	mem = malloc(sz, M_RTABLE, M_NOWAIT | M_ZERO);
162 	if (mem == NULL)
163 		return (FLM_REBUILD);
164 	/* Align datapath-usable structure to cache line boundary */
165 	bd = (struct bsearch4_data *)roundup2((uintptr_t)mem, CACHE_LINE_SIZE);
166 	bd->mem = mem;
167 	bd->alloc_items = count;
168 	bd->fd = fd;
169 
170 	*_data = bd;
171 
172 	/*
173 	 * Allocate temporary array to store all rtable data.
174 	 * This step is required to provide the required prefix iteration order.
175 	 */
176 	bd->rr = mallocarray(count, sizeof(struct bsearch4_record), M_TEMP, M_NOWAIT | M_ZERO);
177 	if (bd->rr == NULL)
178 		return (FLM_REBUILD);
179 
180 	return (FLM_SUCCESS);
181 }
182 
183 static void
184 bsearch4_destroy(void *_data)
185 {
186 	struct bsearch4_data *bd = (struct bsearch4_data *)_data;
187 
188 	if (bd->rr != NULL)
189 		free(bd->rr, M_TEMP);
190 	free(bd->mem, M_RTABLE);
191 }
192 
193 /*
194  * Callback storing converted rtable prefixes in the temporary array.
195  * Addresses are converted to a host order.
196  */
197 static enum flm_op_result
198 bsearch4_add_route_cb(struct rtentry *rt, void *_data)
199 {
200 	struct bsearch4_data *bd = (struct bsearch4_data *)_data;
201 	struct bsearch4_record *rr;
202 	struct in_addr addr4, mask4;
203 	uint32_t scopeid;
204 
205 	if (bd->num_items >= bd->alloc_items)
206 		return (FLM_REBUILD);
207 
208 	rr = &bd->rr[bd->num_items++];
209 	rt_get_inet_prefix_pmask(rt, &addr4, &mask4, &scopeid);
210 	rr->addr4 = ntohl(addr4.s_addr);
211 	rr->mask4 = ntohl(mask4.s_addr);
212 	rr->nh = rt_get_raw_nhop(rt);
213 
214 	return (FLM_SUCCESS);
215 }
216 
217 /*
218  * Prefix comparison function.
219  * 10.0.0.0/24 < 10.0.0.0/25 <- less specific wins
220  * 10.0.0.0/25 < 10.0.0.1/32 <- bigger base wins
221  */
222 static int
223 rr_cmp(const void *_rec1, const void *_rec2)
224 {
225 	const struct bsearch4_record *rec1, *rec2;
226 	rec1 = _rec1;
227 	rec2 = _rec2;
228 
229 	if (rec1->addr4 < rec2->addr4)
230 		return (-1);
231 	else if (rec1->addr4 > rec2->addr4)
232 		return (1);
233 
234 	/*
235 	 * wider mask value is lesser mask
236 	 * we want less specific come first, e.g. <
237 	 */
238 	if (rec1->mask4 < rec2->mask4)
239 		return (-1);
240 	else if (rec1->mask4 > rec2->mask4)
241 		return (1);
242 	return (0);
243 }
244 
245 struct bsearch4_array {
246 	uint32_t		alloc_items;
247 	uint32_t		num_items;
248 	struct bsearch4_record	*arr;
249 };
250 
251 static bool
252 add_array_entry(struct bsearch4_array *ba, struct bsearch4_record *br_new)
253 {
254 
255 	if (ba->num_items < ba->alloc_items) {
256 		ba->arr[ba->num_items++] = *br_new;
257 		return (true);
258 	}
259 	return (false);
260 }
261 
262 static struct bsearch4_record *
263 get_last_entry(struct bsearch4_array *ba)
264 {
265 
266 	return (&ba->arr[ba->num_items - 1]);
267 }
268 
269 /*
270  *
271  * Example:
272  *  stack: 10.0.1.0/24,nh=3 array: 10.0.1.0/25,nh=4 -> ++10.0.1.128/24,nh=3
273  *
274  *
275  */
276 static bool
277 pop_stack_entry(struct bsearch4_array *dst_array, struct bsearch4_array *stack)
278 {
279 	uint32_t last_stack_addr, last_array_addr;
280 
281 	struct bsearch4_record *br_prev = get_last_entry(dst_array);
282 	struct bsearch4_record *pstack = get_last_entry(stack);
283 
284 	/* Regardless of the result, pop stack entry */
285 	stack->num_items--;
286 
287 	/* Prefix last address for the last entry in lookup array */
288 	last_array_addr = (br_prev->addr4 | ~br_prev->mask4);
289 	/* Prefix last address for the stack record entry */
290 	last_stack_addr = (pstack->addr4 | ~pstack->mask4);
291 
292 	if (last_stack_addr > last_array_addr) {
293 		/*
294 		 * Stack record covers > address space than
295 		 * the last entry in the lookup array.
296 		 * Add the remaining parts of a stack record to
297 		 * the lookup array.
298 		 */
299 		struct bsearch4_record br_new = {
300 			.addr4 = last_array_addr + 1,
301 			.mask4 = pstack->mask4,
302 			.nh = pstack->nh,
303 		};
304 		return (add_array_entry(dst_array, &br_new));
305 	}
306 
307 	return (true);
308 }
309 
310 /*
311  * Updates resulting array @dst_array with a rib entry @rib_entry.
312  */
313 static bool
314 bsearch4_process_record(struct bsearch4_array *dst_array,
315     struct bsearch4_array *stack, struct bsearch4_record *rib_entry)
316 {
317 
318 	/*
319 	 * Maintain invariant: current rib_entry is always contained
320 	 *  in the top stack entry.
321 	 * Note we always have 0.0.0.0/0.
322 	 */
323 	while (stack->num_items > 0) {
324 		struct bsearch4_record *pst = get_last_entry(stack);
325 
326 		/*
327 		 * Check if we need to pop stack.
328 		 * Rely on the ordering - larger prefixes comes up first
329 		 * Pop any entry that doesn't contain current prefix.
330 		 */
331 		if (pst->addr4 == (rib_entry->addr4 & pst->mask4))
332 			break;
333 
334 		if (!pop_stack_entry(dst_array, stack))
335 			return (false);
336 	}
337 
338 	 if (dst_array->num_items > 0) {
339 
340 		 /*
341 		  * Check if there is a gap between previous entry and a
342 		  *  current entry. Code above guarantees that both previous
343 		  *  and current entry are contained in the top stack entry.
344 		  *
345 		  * Example: last: 10.0.0.1(/32,nh=3) cur: 10.0.0.3(/32,nh=4),
346 		  *  stack: 10.0.0.0/24,nh=2.
347 		  * Cover a gap between previous and current by adding stack
348 		  *  nexthop.
349 		  */
350 		 struct bsearch4_record *br_tmp = get_last_entry(dst_array);
351 		 uint32_t last_declared_addr = br_tmp->addr4 | ~br_tmp->mask4;
352 		 if (last_declared_addr < rib_entry->addr4 - 1) {
353 			 /* Cover a hole */
354 			struct bsearch4_record *pst = get_last_entry(stack);
355 			struct bsearch4_record new_entry = {
356 				.addr4 = last_declared_addr + 1,
357 				.mask4 = pst->mask4,
358 				.nh = pst->nh,
359 			};
360 			if (!add_array_entry(dst_array, &new_entry))
361 				return (false);
362 		 }
363 
364 		 /*
365 		  * Special case: adding more specific prefix at the start of
366 		  * the previous interval:
367 		  * 10.0.0.0(/24,nh=3), 10.0.0.0(/25,nh=4)
368 		  * Alter the last record, seeting new nexthop and mask.
369 		  */
370 		 if (br_tmp->addr4 == rib_entry->addr4) {
371 			*br_tmp = *rib_entry;
372 			add_array_entry(stack, rib_entry);
373 			return (true);
374 		 }
375 	 }
376 
377 	if (!add_array_entry(dst_array, rib_entry))
378 		return (false);
379 	add_array_entry(stack, rib_entry);
380 
381 	return (true);
382 }
383 
384 static enum flm_op_result
385 bsearch4_build_array(struct bsearch4_array *dst_array, struct bsearch4_array *src_array)
386 {
387 
388 	/*
389 	 * During iteration, we keep track of all prefixes in rtable
390 	 * we currently match, by maintaining stack. As there can be only
391 	 * 32 prefixes for a single address, pre-allocate stack of size 32.
392 	 */
393 	struct bsearch4_array stack = {
394 		.alloc_items = 32,
395 		.arr = mallocarray(32, sizeof(struct bsearch4_record), M_TEMP, M_NOWAIT | M_ZERO),
396 	};
397 	if (stack.arr == NULL)
398 		return (FLM_REBUILD);
399 
400 	for (int i = 0; i < src_array->num_items; i++) {
401 		struct bsearch4_record *rib_entry = &src_array->arr[i];
402 
403 		if (!bsearch4_process_record(dst_array, &stack, rib_entry)) {
404 			free(stack.arr, M_TEMP);
405 			return (FLM_REBUILD);
406 		}
407 	}
408 
409 	/*
410 	 * We know that last record is contained in the top stack entry.
411 	 */
412 	while (stack.num_items > 0) {
413 		if (!pop_stack_entry(dst_array, &stack))
414 			return (FLM_REBUILD);
415 	}
416 	free(stack.arr, M_TEMP);
417 
418 	return (FLM_SUCCESS);
419 }
420 
421 static enum flm_op_result
422 bsearch4_build(struct bsearch4_data *bd)
423 {
424 	enum flm_op_result ret;
425 
426 	struct bsearch4_array prefixes_array = {
427 		.alloc_items = bd->alloc_items,
428 		.num_items = bd->num_items,
429 		.arr = bd->rr,
430 	};
431 
432 	/* Add default route if not exists */
433 	bool default_found = false;
434 	for (int i = 0; i < prefixes_array.num_items; i++) {
435 		if (prefixes_array.arr[i].mask4 == 0) {
436 			default_found = true;
437 			break;
438 		}
439 	}
440 	if (!default_found) {
441 		 /* Add default route with NULL nhop */
442 		struct bsearch4_record default_entry = {};
443 		if (!add_array_entry(&prefixes_array, &default_entry))
444 			 return (FLM_REBUILD);
445 	}
446 
447 	/* Sort prefixes */
448 	qsort(prefixes_array.arr, prefixes_array.num_items, sizeof(struct bsearch4_record), rr_cmp);
449 
450 	struct bsearch4_array dst_array = {
451 		.alloc_items = bd->alloc_items,
452 		.arr = bd->br,
453 	};
454 
455 	ret = bsearch4_build_array(&dst_array, &prefixes_array);
456 	bd->num_items = dst_array.num_items;
457 
458 	free(bd->rr, M_TEMP);
459 	bd->rr = NULL;
460 	return (ret);
461 }
462 
463 
464 static enum flm_op_result
465 bsearch4_end_dump(void *_data, struct fib_dp *dp)
466 {
467 	struct bsearch4_data *bd = (struct bsearch4_data *)_data;
468 	enum flm_op_result ret;
469 
470 	ret = bsearch4_build(bd);
471 	if (ret == FLM_SUCCESS) {
472 		dp->f = bsearch4_lookup;
473 		dp->arg = bd;
474 	}
475 
476 	return (ret);
477 }
478 
479 static enum flm_op_result
480 bsearch4_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
481     void *_data)
482 {
483 
484 	return (FLM_REBUILD);
485 }
486 
487 struct fib_lookup_module flm_bsearch4= {
488 	.flm_name = "bsearch4",
489 	.flm_family = AF_INET,
490 	.flm_init_cb = bsearch4_init,
491 	.flm_destroy_cb = bsearch4_destroy,
492 	.flm_dump_rib_item_cb = bsearch4_add_route_cb,
493 	.flm_dump_end_cb = bsearch4_end_dump,
494 	.flm_change_rib_item_cb = bsearch4_change_cb,
495 	.flm_get_pref = bsearch4_get_pref,
496 };
497 
498 /*
499  * Lockless radix lookup algo.
500  *
501  * Compiles immutable radix from the current routing table.
502  * Used with small amount of routes (<1000).
503  * As datastructure is immutable, it gets rebuild on each rtable change.
504  *
505  * Lookups are slightly faster as shorter lookup keys are used
506  *  (4 bytes instead of 8 in stock radix).
507  */
508 
509 #define KEY_LEN_INET	(offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t))
510 #define OFF_LEN_INET	(8 * offsetof(struct sockaddr_in, sin_addr))
511 struct radix4_addr_entry {
512 	struct radix_node	rn[2];
513 	struct sockaddr_in	addr;
514 	struct nhop_object	*nhop;
515 };
516 #define	LRADIX4_ITEM_SZ	roundup2(sizeof(struct radix4_addr_entry), 64)
517 
518 struct lradix4_data {
519 	struct radix_node_head	*rnh;
520 	struct fib_data		*fd;
521 	void			*mem;
522 	char			*rt_base;
523 	uint32_t		alloc_items;
524 	uint32_t		num_items;
525 };
526 
527 static struct nhop_object *
528 lradix4_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
529 {
530 	struct radix_node_head *rnh = (struct radix_node_head *)algo_data;
531 	struct radix4_addr_entry *ent;
532 	struct sockaddr_in addr4 = {
533 		.sin_len = KEY_LEN_INET,
534 		.sin_addr = key.addr4,
535 	};
536 	ent = (struct radix4_addr_entry *)(rn_match(&addr4, &rnh->rh));
537 	if (ent != NULL)
538 		return (ent->nhop);
539 	return (NULL);
540 }
541 
542 /*
543  * Preference function.
544  * Assume close-to-ideal of < 10 routes (though worse than bsearch), then
545  * gradually degrade until 1000 routes are reached.
546  */
547 static uint8_t
548 lradix4_get_pref(const struct rib_rtable_info *rinfo)
549 {
550 
551 	if (rinfo->num_prefixes < 10)
552 		return (250);
553 	else if (rinfo->num_prefixes < 1000)
554 		return (254 - rinfo->num_prefixes / 4);
555 	else
556 		return (1);
557 }
558 
559 static enum flm_op_result
560 lradix4_init(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **_data)
561 {
562 	struct lradix4_data *lr;
563 	struct rib_rtable_info rinfo;
564 	uint32_t count;
565 	size_t sz;
566 
567 	lr = malloc(sizeof(struct lradix4_data), M_RTABLE, M_NOWAIT | M_ZERO);
568 	if (lr == NULL || !rn_inithead((void **)&lr->rnh, OFF_LEN_INET))
569 		return (FLM_REBUILD);
570 	fib_get_rtable_info(fib_get_rh(fd), &rinfo);
571 
572 	count = rinfo.num_prefixes * 11 / 10;
573 	sz = count * LRADIX4_ITEM_SZ + CACHE_LINE_SIZE;
574 	lr->mem = malloc(sz, M_RTABLE, M_NOWAIT | M_ZERO);
575 	if (lr->mem == NULL)
576 		return (FLM_REBUILD);
577 	/* Align all rtentries to a cacheline boundary */
578 	lr->rt_base = (char *)roundup2((uintptr_t)lr->mem, CACHE_LINE_SIZE);
579 	lr->alloc_items = count;
580 	lr->fd = fd;
581 
582 	*_data = lr;
583 
584 	return (FLM_SUCCESS);
585 }
586 
587 static void
588 lradix4_destroy(void *_data)
589 {
590 	struct lradix4_data *lr = (struct lradix4_data *)_data;
591 
592 	if (lr->rnh != NULL)
593 		rn_detachhead((void **)&lr->rnh);
594 	if (lr->mem != NULL)
595 		free(lr->mem, M_RTABLE);
596 	free(lr, M_RTABLE);
597 }
598 
599 static enum flm_op_result
600 lradix4_add_route_cb(struct rtentry *rt, void *_data)
601 {
602 	struct lradix4_data *lr = (struct lradix4_data *)_data;
603 	struct radix4_addr_entry *ae;
604 	struct sockaddr_in mask;
605 	struct sockaddr *rt_mask;
606 	struct radix_node *rn;
607 	struct in_addr addr4, mask4;
608 	uint32_t scopeid;
609 
610 	if (lr->num_items >= lr->alloc_items)
611 		return (FLM_REBUILD);
612 
613 	ae = (struct radix4_addr_entry *)(lr->rt_base + lr->num_items * LRADIX4_ITEM_SZ);
614 	lr->num_items++;
615 
616 	ae->nhop = rt_get_raw_nhop(rt);
617 
618 	rt_get_inet_prefix_pmask(rt, &addr4, &mask4, &scopeid);
619 	ae->addr.sin_len = KEY_LEN_INET;
620 	ae->addr.sin_addr = addr4;
621 
622 	if (mask4.s_addr != INADDR_BROADCAST) {
623 		bzero(&mask, sizeof(mask));
624 		mask.sin_len = KEY_LEN_INET;
625 		mask.sin_addr = mask4;
626 		rt_mask = (struct sockaddr *)&mask;
627 	} else
628 		rt_mask = NULL;
629 
630 	rn = lr->rnh->rnh_addaddr((struct sockaddr *)&ae->addr, rt_mask,
631 	    &lr->rnh->rh, ae->rn);
632 	if (rn == NULL)
633 		return (FLM_REBUILD);
634 
635 	return (FLM_SUCCESS);
636 }
637 
638 static enum flm_op_result
639 lradix4_end_dump(void *_data, struct fib_dp *dp)
640 {
641 	struct lradix4_data *lr = (struct lradix4_data *)_data;
642 
643 	dp->f = lradix4_lookup;
644 	dp->arg = lr->rnh;
645 
646 	return (FLM_SUCCESS);
647 }
648 
649 static enum flm_op_result
650 lradix4_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
651     void *_data)
652 {
653 
654 	return (FLM_REBUILD);
655 }
656 
657 struct fib_lookup_module flm_radix4_lockless = {
658 	.flm_name = "radix4_lockless",
659 	.flm_family = AF_INET,
660 	.flm_init_cb = lradix4_init,
661 	.flm_destroy_cb = lradix4_destroy,
662 	.flm_dump_rib_item_cb = lradix4_add_route_cb,
663 	.flm_dump_end_cb = lradix4_end_dump,
664 	.flm_change_rib_item_cb = lradix4_change_cb,
665 	.flm_get_pref = lradix4_get_pref,
666 };
667 
668 /*
669  * Fallback lookup algorithm.
670  * This is a simple wrapper around system radix.
671  */
672 
673 struct radix4_data {
674 	struct fib_data *fd;
675 	struct rib_head *rh;
676 };
677 
678 static struct nhop_object *
679 radix4_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
680 {
681 	RIB_RLOCK_TRACKER;
682 	struct rib_head *rh = (struct rib_head *)algo_data;
683 	struct radix_node *rn;
684 	struct nhop_object *nh;
685 
686 	/* Prepare lookup key */
687 	struct sockaddr_in sin4 = {
688 		.sin_family = AF_INET,
689 		.sin_len = sizeof(struct sockaddr_in),
690 		.sin_addr = key.addr4,
691 	};
692 
693 	nh = NULL;
694 	RIB_RLOCK(rh);
695 	rn = rn_match((void *)&sin4, &rh->head);
696 	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0))
697 		nh = (RNTORT(rn))->rt_nhop;
698 	RIB_RUNLOCK(rh);
699 
700 	return (nh);
701 }
702 
703 static uint8_t
704 radix4_get_pref(const struct rib_rtable_info *rinfo)
705 {
706 
707 	return (50);
708 }
709 
710 static enum flm_op_result
711 radix4_init(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **_data)
712 {
713 	struct radix4_data *r4;
714 
715 	r4 = malloc(sizeof(struct radix4_data), M_RTABLE, M_NOWAIT | M_ZERO);
716 	if (r4 == NULL)
717 		return (FLM_REBUILD);
718 	r4->fd = fd;
719 	r4->rh = fib_get_rh(fd);
720 
721 	*_data = r4;
722 
723 	return (FLM_SUCCESS);
724 }
725 
726 static void
727 radix4_destroy(void *_data)
728 {
729 
730 	free(_data, M_RTABLE);
731 }
732 
733 static enum flm_op_result
734 radix4_add_route_cb(struct rtentry *rt, void *_data)
735 {
736 
737 	return (FLM_SUCCESS);
738 }
739 
740 static enum flm_op_result
741 radix4_end_dump(void *_data, struct fib_dp *dp)
742 {
743 	struct radix4_data *r4 = (struct radix4_data *)_data;
744 
745 	dp->f = radix4_lookup;
746 	dp->arg = r4->rh;
747 
748 	return (FLM_SUCCESS);
749 }
750 
751 static enum flm_op_result
752 radix4_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
753     void *_data)
754 {
755 
756 	return (FLM_SUCCESS);
757 }
758 
759 struct fib_lookup_module flm_radix4 = {
760 	.flm_name = "radix4",
761 	.flm_family = AF_INET,
762 	.flm_init_cb = radix4_init,
763 	.flm_destroy_cb = radix4_destroy,
764 	.flm_dump_rib_item_cb = radix4_add_route_cb,
765 	.flm_dump_end_cb = radix4_end_dump,
766 	.flm_change_rib_item_cb = radix4_change_cb,
767 	.flm_get_pref = radix4_get_pref,
768 };
769 
770 static void
771 fib4_algo_init(void)
772 {
773 
774 	fib_module_register(&flm_bsearch4);
775 	fib_module_register(&flm_radix4_lockless);
776 	fib_module_register(&flm_radix4);
777 }
778 SYSINIT(fib4_algo_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, fib4_algo_init, NULL);
779