xref: /freebsd/sys/netinet/in_fib_algo.c (revision 7b71f57f4e514a2ab7308ce4147e14d90e099ad0)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include "opt_inet.h"
29 
30 #include <sys/param.h>
31 #include <sys/kernel.h>
32 #include <sys/lock.h>
33 #include <sys/rmlock.h>
34 #include <sys/malloc.h>
35 #include <sys/kernel.h>
36 #include <sys/priv.h>
37 #include <sys/socket.h>
38 #include <sys/sysctl.h>
39 #include <net/vnet.h>
40 
41 #include <net/if.h>
42 #include <netinet/in.h>
43 
44 #include <net/route.h>
45 #include <net/route/nhop.h>
46 #include <net/route/route_ctl.h>
47 #include <net/route/route_var.h>
48 #include <net/route/fib_algo.h>
49 
50 /*
51  * Binary search lookup algo.
52  *
53  * Compiles route table into a sorted array.
54  * Used with small amount of routes (< 16).
55  * As array is immutable, it is rebuild on each rtable change.
56  *
57  * Example:
58  *
59  * 0.0.0.0/0 -> nh1
60  * 10.0.0.0/24 -> nh2
61  * 10.0.0.1/32 -> nh3
62  *
63  * gets compiled to:
64  *
65  * 0.0.0.0 -> nh1
66  * 10.0.0.0 -> nh2
67  * 10.0.0.1 -> nh3
68  * 10.0.0.2 -> nh2
69  * 10.0.1.0 -> nh1
70  *
71  */
72 
73 struct bsearch4_record {
74 	uint32_t		addr4;
75 	uint32_t		mask4;
76 	struct nhop_object	*nh;
77 };
78 
79 struct bsearch4_data {
80 	struct fib_data		*fd;
81 	uint32_t		alloc_items;
82 	uint32_t		num_items;
83 	void			*mem;
84 	struct bsearch4_record	*rr;
85 	struct bsearch4_record	br[0];
86 };
87 
88 /*
89  * Main IPv4 address lookup function.
90  *
91  * Finds array record with maximum index that is <= provided key.
92  * Assumes 0.0.0.0/0 always exists (may be with NULL nhop)
93  */
94 static struct nhop_object *
bsearch4_lookup(void * algo_data,const struct flm_lookup_key key,uint32_t scopeid)95 bsearch4_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
96 {
97 	const struct bsearch4_data *bd = (const struct bsearch4_data *)algo_data;
98 	const struct bsearch4_record *br;
99 	uint32_t addr4 = ntohl(key.addr4.s_addr);
100 
101 	int start = 0;
102 	int end = bd->num_items;
103 
104 	int i = (start + end) / 2;
105 	while (start + 1 < end) {
106 		i = (start + end) / 2;
107 		br = &bd->br[i];
108 		if (addr4 < br->addr4) {
109 			/* key < average, reduce right boundary */
110 			end = i;
111 			continue;
112 		} else if (addr4 > br->addr4) {
113 			/* key > average, increase left aboundary */
114 			start = i;
115 			continue;
116 		} else {
117 			/* direct match */
118 			return (br->nh);
119 		}
120 	}
121 	/* start + 1 == end */
122 	return (bd->br[start].nh);
123 }
124 
125 /*
126  * Preference function.
127  * Assume ideal for < 10 (typical single-interface setup has 5)
128  * Then gradually degrade.
129  * Assume 30 prefixes is at least 60 records, so it will require 8 lookup,
130  *  which is even worse than radix.
131  */
132 static uint8_t
bsearch4_get_pref(const struct rib_rtable_info * rinfo)133 bsearch4_get_pref(const struct rib_rtable_info *rinfo)
134 {
135 
136 	if (rinfo->num_prefixes < 10)
137 		return (253);
138 	else if (rinfo->num_prefixes < 30)
139 		return (255 - rinfo->num_prefixes * 8);
140 	else
141 		return (1);
142 }
143 
144 static enum flm_op_result
bsearch4_init(uint32_t fibnum,struct fib_data * fd,void * _old_data,void ** _data)145 bsearch4_init(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **_data)
146 {
147 	struct bsearch4_data *bd;
148 	struct rib_rtable_info rinfo;
149 	uint32_t count;
150 	size_t sz;
151 	void *mem;
152 
153 	fib_get_rtable_info(fib_get_rh(fd), &rinfo);
154 	count = rinfo.num_prefixes * 11 / 10 + 64;
155 
156 	sz = sizeof(struct bsearch4_data) + sizeof(struct bsearch4_record) * count;
157 	/* add cache line sz to ease alignment */
158 	sz += CACHE_LINE_SIZE;
159 	mem = malloc(sz, M_RTABLE, M_NOWAIT | M_ZERO);
160 	if (mem == NULL)
161 		return (FLM_REBUILD);
162 	/* Align datapath-usable structure to cache line boundary */
163 	bd = (struct bsearch4_data *)roundup2((uintptr_t)mem, CACHE_LINE_SIZE);
164 	bd->mem = mem;
165 	bd->alloc_items = count;
166 	bd->fd = fd;
167 
168 	*_data = bd;
169 
170 	/*
171 	 * Allocate temporary array to store all rtable data.
172 	 * This step is required to provide the required prefix iteration order.
173 	 */
174 	bd->rr = mallocarray(count, sizeof(struct bsearch4_record), M_TEMP, M_NOWAIT | M_ZERO);
175 	if (bd->rr == NULL)
176 		return (FLM_REBUILD);
177 
178 	return (FLM_SUCCESS);
179 }
180 
181 static void
bsearch4_destroy(void * _data)182 bsearch4_destroy(void *_data)
183 {
184 	struct bsearch4_data *bd = (struct bsearch4_data *)_data;
185 
186 	if (bd->rr != NULL)
187 		free(bd->rr, M_TEMP);
188 	free(bd->mem, M_RTABLE);
189 }
190 
191 /*
192  * Callback storing converted rtable prefixes in the temporary array.
193  * Addresses are converted to a host order.
194  */
195 static enum flm_op_result
bsearch4_add_route_cb(struct rtentry * rt,void * _data)196 bsearch4_add_route_cb(struct rtentry *rt, void *_data)
197 {
198 	struct bsearch4_data *bd = (struct bsearch4_data *)_data;
199 	struct bsearch4_record *rr;
200 	struct in_addr addr4, mask4;
201 	uint32_t scopeid;
202 
203 	if (bd->num_items >= bd->alloc_items)
204 		return (FLM_REBUILD);
205 
206 	rr = &bd->rr[bd->num_items++];
207 	rt_get_inet_prefix_pmask(rt, &addr4, &mask4, &scopeid);
208 	rr->addr4 = ntohl(addr4.s_addr);
209 	rr->mask4 = ntohl(mask4.s_addr);
210 	rr->nh = rt_get_raw_nhop(rt);
211 
212 	return (FLM_SUCCESS);
213 }
214 
215 /*
216  * Prefix comparison function.
217  * 10.0.0.0/24 < 10.0.0.0/25 <- less specific wins
218  * 10.0.0.0/25 < 10.0.0.1/32 <- bigger base wins
219  */
220 static int
rr_cmp(const void * _rec1,const void * _rec2)221 rr_cmp(const void *_rec1, const void *_rec2)
222 {
223 	const struct bsearch4_record *rec1, *rec2;
224 	rec1 = _rec1;
225 	rec2 = _rec2;
226 
227 	if (rec1->addr4 < rec2->addr4)
228 		return (-1);
229 	else if (rec1->addr4 > rec2->addr4)
230 		return (1);
231 
232 	/*
233 	 * wider mask value is lesser mask
234 	 * we want less specific come first, e.g. <
235 	 */
236 	if (rec1->mask4 < rec2->mask4)
237 		return (-1);
238 	else if (rec1->mask4 > rec2->mask4)
239 		return (1);
240 	return (0);
241 }
242 
243 struct bsearch4_array {
244 	uint32_t		alloc_items;
245 	uint32_t		num_items;
246 	struct bsearch4_record	*arr;
247 };
248 
249 static bool
add_array_entry(struct bsearch4_array * ba,struct bsearch4_record * br_new)250 add_array_entry(struct bsearch4_array *ba, struct bsearch4_record *br_new)
251 {
252 
253 	if (ba->num_items < ba->alloc_items) {
254 		ba->arr[ba->num_items++] = *br_new;
255 		return (true);
256 	}
257 	return (false);
258 }
259 
260 static struct bsearch4_record *
get_last_entry(struct bsearch4_array * ba)261 get_last_entry(struct bsearch4_array *ba)
262 {
263 
264 	return (&ba->arr[ba->num_items - 1]);
265 }
266 
267 /*
268  *
269  * Example:
270  *  stack: 10.0.1.0/24,nh=3 array: 10.0.1.0/25,nh=4 -> ++10.0.1.128/24,nh=3
271  *
272  *
273  */
274 static bool
pop_stack_entry(struct bsearch4_array * dst_array,struct bsearch4_array * stack)275 pop_stack_entry(struct bsearch4_array *dst_array, struct bsearch4_array *stack)
276 {
277 	uint32_t last_stack_addr, last_array_addr;
278 
279 	struct bsearch4_record *br_prev = get_last_entry(dst_array);
280 	struct bsearch4_record *pstack = get_last_entry(stack);
281 
282 	/* Regardless of the result, pop stack entry */
283 	stack->num_items--;
284 
285 	/* Prefix last address for the last entry in lookup array */
286 	last_array_addr = (br_prev->addr4 | ~br_prev->mask4);
287 	/* Prefix last address for the stack record entry */
288 	last_stack_addr = (pstack->addr4 | ~pstack->mask4);
289 
290 	if (last_stack_addr > last_array_addr) {
291 		/*
292 		 * Stack record covers > address space than
293 		 * the last entry in the lookup array.
294 		 * Add the remaining parts of a stack record to
295 		 * the lookup array.
296 		 */
297 		struct bsearch4_record br_new = {
298 			.addr4 = last_array_addr + 1,
299 			.mask4 = pstack->mask4,
300 			.nh = pstack->nh,
301 		};
302 		return (add_array_entry(dst_array, &br_new));
303 	}
304 
305 	return (true);
306 }
307 
308 /*
309  * Updates resulting array @dst_array with a rib entry @rib_entry.
310  */
311 static bool
bsearch4_process_record(struct bsearch4_array * dst_array,struct bsearch4_array * stack,struct bsearch4_record * rib_entry)312 bsearch4_process_record(struct bsearch4_array *dst_array,
313     struct bsearch4_array *stack, struct bsearch4_record *rib_entry)
314 {
315 
316 	/*
317 	 * Maintain invariant: current rib_entry is always contained
318 	 *  in the top stack entry.
319 	 * Note we always have 0.0.0.0/0.
320 	 */
321 	while (stack->num_items > 0) {
322 		struct bsearch4_record *pst = get_last_entry(stack);
323 
324 		/*
325 		 * Check if we need to pop stack.
326 		 * Rely on the ordering - larger prefixes comes up first
327 		 * Pop any entry that doesn't contain current prefix.
328 		 */
329 		if (pst->addr4 == (rib_entry->addr4 & pst->mask4))
330 			break;
331 
332 		if (!pop_stack_entry(dst_array, stack))
333 			return (false);
334 	}
335 
336 	 if (dst_array->num_items > 0) {
337 
338 		 /*
339 		  * Check if there is a gap between previous entry and a
340 		  *  current entry. Code above guarantees that both previous
341 		  *  and current entry are contained in the top stack entry.
342 		  *
343 		  * Example: last: 10.0.0.1(/32,nh=3) cur: 10.0.0.3(/32,nh=4),
344 		  *  stack: 10.0.0.0/24,nh=2.
345 		  * Cover a gap between previous and current by adding stack
346 		  *  nexthop.
347 		  */
348 		 struct bsearch4_record *br_tmp = get_last_entry(dst_array);
349 		 uint32_t last_declared_addr = br_tmp->addr4 | ~br_tmp->mask4;
350 		 if (last_declared_addr < rib_entry->addr4 - 1) {
351 			 /* Cover a hole */
352 			struct bsearch4_record *pst = get_last_entry(stack);
353 			struct bsearch4_record new_entry = {
354 				.addr4 = last_declared_addr + 1,
355 				.mask4 = pst->mask4,
356 				.nh = pst->nh,
357 			};
358 			if (!add_array_entry(dst_array, &new_entry))
359 				return (false);
360 		 }
361 
362 		 /*
363 		  * Special case: adding more specific prefix at the start of
364 		  * the previous interval:
365 		  * 10.0.0.0(/24,nh=3), 10.0.0.0(/25,nh=4)
366 		  * Alter the last record, seeting new nexthop and mask.
367 		  */
368 		 if (br_tmp->addr4 == rib_entry->addr4) {
369 			*br_tmp = *rib_entry;
370 			add_array_entry(stack, rib_entry);
371 			return (true);
372 		 }
373 	 }
374 
375 	if (!add_array_entry(dst_array, rib_entry))
376 		return (false);
377 	add_array_entry(stack, rib_entry);
378 
379 	return (true);
380 }
381 
382 static enum flm_op_result
bsearch4_build_array(struct bsearch4_array * dst_array,struct bsearch4_array * src_array)383 bsearch4_build_array(struct bsearch4_array *dst_array, struct bsearch4_array *src_array)
384 {
385 
386 	/*
387 	 * During iteration, we keep track of all prefixes in rtable
388 	 * we currently match, by maintaining stack. As there can be only
389 	 * 32 prefixes for a single address, pre-allocate stack of size 32.
390 	 */
391 	struct bsearch4_array stack = {
392 		.alloc_items = 32,
393 		.arr = mallocarray(32, sizeof(struct bsearch4_record), M_TEMP, M_NOWAIT | M_ZERO),
394 	};
395 	if (stack.arr == NULL)
396 		return (FLM_REBUILD);
397 
398 	for (int i = 0; i < src_array->num_items; i++) {
399 		struct bsearch4_record *rib_entry = &src_array->arr[i];
400 
401 		if (!bsearch4_process_record(dst_array, &stack, rib_entry)) {
402 			free(stack.arr, M_TEMP);
403 			return (FLM_REBUILD);
404 		}
405 	}
406 
407 	/*
408 	 * We know that last record is contained in the top stack entry.
409 	 */
410 	while (stack.num_items > 0) {
411 		if (!pop_stack_entry(dst_array, &stack))
412 			return (FLM_REBUILD);
413 	}
414 	free(stack.arr, M_TEMP);
415 
416 	return (FLM_SUCCESS);
417 }
418 
419 static enum flm_op_result
bsearch4_build(struct bsearch4_data * bd)420 bsearch4_build(struct bsearch4_data *bd)
421 {
422 	enum flm_op_result ret;
423 
424 	struct bsearch4_array prefixes_array = {
425 		.alloc_items = bd->alloc_items,
426 		.num_items = bd->num_items,
427 		.arr = bd->rr,
428 	};
429 
430 	/* Add default route if not exists */
431 	bool default_found = false;
432 	for (int i = 0; i < prefixes_array.num_items; i++) {
433 		if (prefixes_array.arr[i].mask4 == 0) {
434 			default_found = true;
435 			break;
436 		}
437 	}
438 	if (!default_found) {
439 		 /* Add default route with NULL nhop */
440 		struct bsearch4_record default_entry = {};
441 		if (!add_array_entry(&prefixes_array, &default_entry))
442 			 return (FLM_REBUILD);
443 	}
444 
445 	/* Sort prefixes */
446 	qsort(prefixes_array.arr, prefixes_array.num_items, sizeof(struct bsearch4_record), rr_cmp);
447 
448 	struct bsearch4_array dst_array = {
449 		.alloc_items = bd->alloc_items,
450 		.arr = bd->br,
451 	};
452 
453 	ret = bsearch4_build_array(&dst_array, &prefixes_array);
454 	bd->num_items = dst_array.num_items;
455 
456 	free(bd->rr, M_TEMP);
457 	bd->rr = NULL;
458 	return (ret);
459 }
460 
461 
462 static enum flm_op_result
bsearch4_end_dump(void * _data,struct fib_dp * dp)463 bsearch4_end_dump(void *_data, struct fib_dp *dp)
464 {
465 	struct bsearch4_data *bd = (struct bsearch4_data *)_data;
466 	enum flm_op_result ret;
467 
468 	ret = bsearch4_build(bd);
469 	if (ret == FLM_SUCCESS) {
470 		dp->f = bsearch4_lookup;
471 		dp->arg = bd;
472 	}
473 
474 	return (ret);
475 }
476 
477 static enum flm_op_result
bsearch4_change_cb(struct rib_head * rnh,struct rib_cmd_info * rc,void * _data)478 bsearch4_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
479     void *_data)
480 {
481 
482 	return (FLM_REBUILD);
483 }
484 
485 struct fib_lookup_module flm_bsearch4= {
486 	.flm_name = "bsearch4",
487 	.flm_family = AF_INET,
488 	.flm_init_cb = bsearch4_init,
489 	.flm_destroy_cb = bsearch4_destroy,
490 	.flm_dump_rib_item_cb = bsearch4_add_route_cb,
491 	.flm_dump_end_cb = bsearch4_end_dump,
492 	.flm_change_rib_item_cb = bsearch4_change_cb,
493 	.flm_get_pref = bsearch4_get_pref,
494 };
495 
496 /*
497  * Lockless radix lookup algo.
498  *
499  * Compiles immutable radix from the current routing table.
500  * Used with small amount of routes (<1000).
501  * As datastructure is immutable, it gets rebuild on each rtable change.
502  *
503  * Lookups are slightly faster as shorter lookup keys are used
504  *  (4 bytes instead of 8 in stock radix).
505  */
506 
507 #define KEY_LEN_INET	(offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t))
508 #define OFF_LEN_INET	(8 * offsetof(struct sockaddr_in, sin_addr))
509 struct radix4_addr_entry {
510 	struct radix_node	rn[2];
511 	struct sockaddr_in	addr;
512 	struct nhop_object	*nhop;
513 };
514 #define	LRADIX4_ITEM_SZ	roundup2(sizeof(struct radix4_addr_entry), 64)
515 
516 struct lradix4_data {
517 	struct radix_node_head	*rnh;
518 	struct fib_data		*fd;
519 	void			*mem;
520 	char			*rt_base;
521 	uint32_t		alloc_items;
522 	uint32_t		num_items;
523 };
524 
525 static struct nhop_object *
lradix4_lookup(void * algo_data,const struct flm_lookup_key key,uint32_t scopeid)526 lradix4_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
527 {
528 	struct radix_node_head *rnh = (struct radix_node_head *)algo_data;
529 	struct radix4_addr_entry *ent;
530 	struct sockaddr_in addr4 = {
531 		.sin_len = KEY_LEN_INET,
532 		.sin_addr = key.addr4,
533 	};
534 	ent = (struct radix4_addr_entry *)(rn_match(&addr4, &rnh->rh));
535 	if (ent != NULL)
536 		return (ent->nhop);
537 	return (NULL);
538 }
539 
540 /*
541  * Preference function.
542  * Assume close-to-ideal of < 10 routes (though worse than bsearch), then
543  * gradually degrade until 1000 routes are reached.
544  */
545 static uint8_t
lradix4_get_pref(const struct rib_rtable_info * rinfo)546 lradix4_get_pref(const struct rib_rtable_info *rinfo)
547 {
548 
549 	if (rinfo->num_prefixes < 10)
550 		return (250);
551 	else if (rinfo->num_prefixes < 1000)
552 		return (254 - rinfo->num_prefixes / 4);
553 	else
554 		return (1);
555 }
556 
557 static enum flm_op_result
lradix4_init(uint32_t fibnum,struct fib_data * fd,void * _old_data,void ** _data)558 lradix4_init(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **_data)
559 {
560 	struct lradix4_data *lr;
561 	struct rib_rtable_info rinfo;
562 	uint32_t count;
563 	size_t sz;
564 
565 	lr = malloc(sizeof(struct lradix4_data), M_RTABLE, M_NOWAIT | M_ZERO);
566 	if (lr == NULL || !rn_inithead((void **)&lr->rnh, OFF_LEN_INET))
567 		return (FLM_REBUILD);
568 	fib_get_rtable_info(fib_get_rh(fd), &rinfo);
569 
570 	count = rinfo.num_prefixes * 11 / 10;
571 	sz = count * LRADIX4_ITEM_SZ + CACHE_LINE_SIZE;
572 	lr->mem = malloc(sz, M_RTABLE, M_NOWAIT | M_ZERO);
573 	if (lr->mem == NULL)
574 		return (FLM_REBUILD);
575 	/* Align all rtentries to a cacheline boundary */
576 	lr->rt_base = (char *)roundup2((uintptr_t)lr->mem, CACHE_LINE_SIZE);
577 	lr->alloc_items = count;
578 	lr->fd = fd;
579 
580 	*_data = lr;
581 
582 	return (FLM_SUCCESS);
583 }
584 
585 static void
lradix4_destroy(void * _data)586 lradix4_destroy(void *_data)
587 {
588 	struct lradix4_data *lr = (struct lradix4_data *)_data;
589 
590 	if (lr->rnh != NULL)
591 		rn_detachhead((void **)&lr->rnh);
592 	if (lr->mem != NULL)
593 		free(lr->mem, M_RTABLE);
594 	free(lr, M_RTABLE);
595 }
596 
597 static enum flm_op_result
lradix4_add_route_cb(struct rtentry * rt,void * _data)598 lradix4_add_route_cb(struct rtentry *rt, void *_data)
599 {
600 	struct lradix4_data *lr = (struct lradix4_data *)_data;
601 	struct radix4_addr_entry *ae;
602 	struct sockaddr_in mask;
603 	struct sockaddr *rt_mask;
604 	struct radix_node *rn;
605 	struct in_addr addr4, mask4;
606 	uint32_t scopeid;
607 
608 	if (lr->num_items >= lr->alloc_items)
609 		return (FLM_REBUILD);
610 
611 	ae = (struct radix4_addr_entry *)(lr->rt_base + lr->num_items * LRADIX4_ITEM_SZ);
612 	lr->num_items++;
613 
614 	ae->nhop = rt_get_raw_nhop(rt);
615 
616 	rt_get_inet_prefix_pmask(rt, &addr4, &mask4, &scopeid);
617 	ae->addr.sin_len = KEY_LEN_INET;
618 	ae->addr.sin_addr = addr4;
619 
620 	if (mask4.s_addr != INADDR_BROADCAST) {
621 		bzero(&mask, sizeof(mask));
622 		mask.sin_len = KEY_LEN_INET;
623 		mask.sin_addr = mask4;
624 		rt_mask = (struct sockaddr *)&mask;
625 	} else
626 		rt_mask = NULL;
627 
628 	rn = lr->rnh->rnh_addaddr((struct sockaddr *)&ae->addr, rt_mask,
629 	    &lr->rnh->rh, ae->rn);
630 	if (rn == NULL)
631 		return (FLM_REBUILD);
632 
633 	return (FLM_SUCCESS);
634 }
635 
636 static enum flm_op_result
lradix4_end_dump(void * _data,struct fib_dp * dp)637 lradix4_end_dump(void *_data, struct fib_dp *dp)
638 {
639 	struct lradix4_data *lr = (struct lradix4_data *)_data;
640 
641 	dp->f = lradix4_lookup;
642 	dp->arg = lr->rnh;
643 
644 	return (FLM_SUCCESS);
645 }
646 
647 static enum flm_op_result
lradix4_change_cb(struct rib_head * rnh,struct rib_cmd_info * rc,void * _data)648 lradix4_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
649     void *_data)
650 {
651 
652 	return (FLM_REBUILD);
653 }
654 
655 struct fib_lookup_module flm_radix4_lockless = {
656 	.flm_name = "radix4_lockless",
657 	.flm_family = AF_INET,
658 	.flm_init_cb = lradix4_init,
659 	.flm_destroy_cb = lradix4_destroy,
660 	.flm_dump_rib_item_cb = lradix4_add_route_cb,
661 	.flm_dump_end_cb = lradix4_end_dump,
662 	.flm_change_rib_item_cb = lradix4_change_cb,
663 	.flm_get_pref = lradix4_get_pref,
664 };
665 
666 /*
667  * Fallback lookup algorithm.
668  * This is a simple wrapper around system radix.
669  */
670 
671 struct radix4_data {
672 	struct fib_data *fd;
673 	struct rib_head *rh;
674 };
675 
676 static struct nhop_object *
radix4_lookup(void * algo_data,const struct flm_lookup_key key,uint32_t scopeid)677 radix4_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
678 {
679 	RIB_RLOCK_TRACKER;
680 	struct rib_head *rh = (struct rib_head *)algo_data;
681 	struct radix_node *rn;
682 	struct nhop_object *nh;
683 
684 	/* Prepare lookup key */
685 	struct sockaddr_in sin4 = {
686 		.sin_family = AF_INET,
687 		.sin_len = sizeof(struct sockaddr_in),
688 		.sin_addr = key.addr4,
689 	};
690 
691 	nh = NULL;
692 	RIB_RLOCK(rh);
693 	rn = rn_match((void *)&sin4, &rh->head);
694 	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0))
695 		nh = (RNTORT(rn))->rt_nhop;
696 	RIB_RUNLOCK(rh);
697 
698 	return (nh);
699 }
700 
701 static uint8_t
radix4_get_pref(const struct rib_rtable_info * rinfo)702 radix4_get_pref(const struct rib_rtable_info *rinfo)
703 {
704 
705 	return (50);
706 }
707 
708 static enum flm_op_result
radix4_init(uint32_t fibnum,struct fib_data * fd,void * _old_data,void ** _data)709 radix4_init(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **_data)
710 {
711 	struct radix4_data *r4;
712 
713 	r4 = malloc(sizeof(struct radix4_data), M_RTABLE, M_NOWAIT | M_ZERO);
714 	if (r4 == NULL)
715 		return (FLM_REBUILD);
716 	r4->fd = fd;
717 	r4->rh = fib_get_rh(fd);
718 
719 	*_data = r4;
720 
721 	return (FLM_SUCCESS);
722 }
723 
724 static void
radix4_destroy(void * _data)725 radix4_destroy(void *_data)
726 {
727 
728 	free(_data, M_RTABLE);
729 }
730 
731 static enum flm_op_result
radix4_add_route_cb(struct rtentry * rt,void * _data)732 radix4_add_route_cb(struct rtentry *rt, void *_data)
733 {
734 
735 	return (FLM_SUCCESS);
736 }
737 
738 static enum flm_op_result
radix4_end_dump(void * _data,struct fib_dp * dp)739 radix4_end_dump(void *_data, struct fib_dp *dp)
740 {
741 	struct radix4_data *r4 = (struct radix4_data *)_data;
742 
743 	dp->f = radix4_lookup;
744 	dp->arg = r4->rh;
745 
746 	return (FLM_SUCCESS);
747 }
748 
749 static enum flm_op_result
radix4_change_cb(struct rib_head * rnh,struct rib_cmd_info * rc,void * _data)750 radix4_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
751     void *_data)
752 {
753 
754 	return (FLM_SUCCESS);
755 }
756 
757 struct fib_lookup_module flm_radix4 = {
758 	.flm_name = "radix4",
759 	.flm_family = AF_INET,
760 	.flm_init_cb = radix4_init,
761 	.flm_destroy_cb = radix4_destroy,
762 	.flm_dump_rib_item_cb = radix4_add_route_cb,
763 	.flm_dump_end_cb = radix4_end_dump,
764 	.flm_change_rib_item_cb = radix4_change_cb,
765 	.flm_get_pref = radix4_get_pref,
766 };
767 
768 static void
fib4_algo_init(void * dummy __unused)769 fib4_algo_init(void *dummy __unused)
770 {
771 
772 	fib_module_register(&flm_bsearch4);
773 	fib_module_register(&flm_radix4_lockless);
774 	fib_module_register(&flm_radix4);
775 }
776 SYSINIT(fib4_algo_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, fib4_algo_init, NULL);
777