xref: /freebsd/sys/netinet/in_fib_algo.c (revision 685dc743dc3b5645e34836464128e1c0558b404b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 #include "opt_inet.h"
30 
31 #include <sys/param.h>
32 #include <sys/kernel.h>
33 #include <sys/lock.h>
34 #include <sys/rmlock.h>
35 #include <sys/malloc.h>
36 #include <sys/kernel.h>
37 #include <sys/priv.h>
38 #include <sys/socket.h>
39 #include <sys/sysctl.h>
40 #include <net/vnet.h>
41 
42 #include <net/if.h>
43 #include <netinet/in.h>
44 
45 #include <net/route.h>
46 #include <net/route/nhop.h>
47 #include <net/route/route_ctl.h>
48 #include <net/route/route_var.h>
49 #include <net/route/fib_algo.h>
50 
51 /*
52  * Binary search lookup algo.
53  *
54  * Compiles route table into a sorted array.
55  * Used with small amount of routes (< 16).
56  * As array is immutable, it is rebuild on each rtable change.
57  *
58  * Example:
59  *
60  * 0.0.0.0/0 -> nh1
61  * 10.0.0.0/24 -> nh2
62  * 10.0.0.1/32 -> nh3
63  *
64  * gets compiled to:
65  *
66  * 0.0.0.0 -> nh1
67  * 10.0.0.0 -> nh2
68  * 10.0.0.1 -> nh3
69  * 10.0.0.2 -> nh2
70  * 10.0.1.0 -> nh1
71  *
72  */
73 
74 struct bsearch4_record {
75 	uint32_t		addr4;
76 	uint32_t		mask4;
77 	struct nhop_object	*nh;
78 };
79 
80 struct bsearch4_data {
81 	struct fib_data		*fd;
82 	uint32_t		alloc_items;
83 	uint32_t		num_items;
84 	void			*mem;
85 	struct bsearch4_record	*rr;
86 	struct bsearch4_record	br[0];
87 };
88 
89 /*
90  * Main IPv4 address lookup function.
91  *
92  * Finds array record with maximum index that is <= provided key.
93  * Assumes 0.0.0.0/0 always exists (may be with NULL nhop)
94  */
95 static struct nhop_object *
bsearch4_lookup(void * algo_data,const struct flm_lookup_key key,uint32_t scopeid)96 bsearch4_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
97 {
98 	const struct bsearch4_data *bd = (const struct bsearch4_data *)algo_data;
99 	const struct bsearch4_record *br;
100 	uint32_t addr4 = ntohl(key.addr4.s_addr);
101 
102 	int start = 0;
103 	int end = bd->num_items;
104 
105 	int i = (start + end) / 2;
106 	while (start + 1 < end) {
107 		i = (start + end) / 2;
108 		br = &bd->br[i];
109 		if (addr4 < br->addr4) {
110 			/* key < average, reduce right boundary */
111 			end = i;
112 			continue;
113 		} else if (addr4 > br->addr4) {
114 			/* key > average, increase left aboundary */
115 			start = i;
116 			continue;
117 		} else {
118 			/* direct match */
119 			return (br->nh);
120 		}
121 	}
122 	/* start + 1 == end */
123 	return (bd->br[start].nh);
124 }
125 
126 /*
127  * Preference function.
128  * Assume ideal for < 10 (typical single-interface setup has 5)
129  * Then gradually degrade.
130  * Assume 30 prefixes is at least 60 records, so it will require 8 lookup,
131  *  which is even worse than radix.
132  */
133 static uint8_t
bsearch4_get_pref(const struct rib_rtable_info * rinfo)134 bsearch4_get_pref(const struct rib_rtable_info *rinfo)
135 {
136 
137 	if (rinfo->num_prefixes < 10)
138 		return (253);
139 	else if (rinfo->num_prefixes < 30)
140 		return (255 - rinfo->num_prefixes * 8);
141 	else
142 		return (1);
143 }
144 
145 static enum flm_op_result
bsearch4_init(uint32_t fibnum,struct fib_data * fd,void * _old_data,void ** _data)146 bsearch4_init(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **_data)
147 {
148 	struct bsearch4_data *bd;
149 	struct rib_rtable_info rinfo;
150 	uint32_t count;
151 	size_t sz;
152 	void *mem;
153 
154 	fib_get_rtable_info(fib_get_rh(fd), &rinfo);
155 	count = rinfo.num_prefixes * 11 / 10 + 64;
156 
157 	sz = sizeof(struct bsearch4_data) + sizeof(struct bsearch4_record) * count;
158 	/* add cache line sz to ease alignment */
159 	sz += CACHE_LINE_SIZE;
160 	mem = malloc(sz, M_RTABLE, M_NOWAIT | M_ZERO);
161 	if (mem == NULL)
162 		return (FLM_REBUILD);
163 	/* Align datapath-usable structure to cache line boundary */
164 	bd = (struct bsearch4_data *)roundup2((uintptr_t)mem, CACHE_LINE_SIZE);
165 	bd->mem = mem;
166 	bd->alloc_items = count;
167 	bd->fd = fd;
168 
169 	*_data = bd;
170 
171 	/*
172 	 * Allocate temporary array to store all rtable data.
173 	 * This step is required to provide the required prefix iteration order.
174 	 */
175 	bd->rr = mallocarray(count, sizeof(struct bsearch4_record), M_TEMP, M_NOWAIT | M_ZERO);
176 	if (bd->rr == NULL)
177 		return (FLM_REBUILD);
178 
179 	return (FLM_SUCCESS);
180 }
181 
182 static void
bsearch4_destroy(void * _data)183 bsearch4_destroy(void *_data)
184 {
185 	struct bsearch4_data *bd = (struct bsearch4_data *)_data;
186 
187 	if (bd->rr != NULL)
188 		free(bd->rr, M_TEMP);
189 	free(bd->mem, M_RTABLE);
190 }
191 
192 /*
193  * Callback storing converted rtable prefixes in the temporary array.
194  * Addresses are converted to a host order.
195  */
196 static enum flm_op_result
bsearch4_add_route_cb(struct rtentry * rt,void * _data)197 bsearch4_add_route_cb(struct rtentry *rt, void *_data)
198 {
199 	struct bsearch4_data *bd = (struct bsearch4_data *)_data;
200 	struct bsearch4_record *rr;
201 	struct in_addr addr4, mask4;
202 	uint32_t scopeid;
203 
204 	if (bd->num_items >= bd->alloc_items)
205 		return (FLM_REBUILD);
206 
207 	rr = &bd->rr[bd->num_items++];
208 	rt_get_inet_prefix_pmask(rt, &addr4, &mask4, &scopeid);
209 	rr->addr4 = ntohl(addr4.s_addr);
210 	rr->mask4 = ntohl(mask4.s_addr);
211 	rr->nh = rt_get_raw_nhop(rt);
212 
213 	return (FLM_SUCCESS);
214 }
215 
216 /*
217  * Prefix comparison function.
218  * 10.0.0.0/24 < 10.0.0.0/25 <- less specific wins
219  * 10.0.0.0/25 < 10.0.0.1/32 <- bigger base wins
220  */
221 static int
rr_cmp(const void * _rec1,const void * _rec2)222 rr_cmp(const void *_rec1, const void *_rec2)
223 {
224 	const struct bsearch4_record *rec1, *rec2;
225 	rec1 = _rec1;
226 	rec2 = _rec2;
227 
228 	if (rec1->addr4 < rec2->addr4)
229 		return (-1);
230 	else if (rec1->addr4 > rec2->addr4)
231 		return (1);
232 
233 	/*
234 	 * wider mask value is lesser mask
235 	 * we want less specific come first, e.g. <
236 	 */
237 	if (rec1->mask4 < rec2->mask4)
238 		return (-1);
239 	else if (rec1->mask4 > rec2->mask4)
240 		return (1);
241 	return (0);
242 }
243 
244 struct bsearch4_array {
245 	uint32_t		alloc_items;
246 	uint32_t		num_items;
247 	struct bsearch4_record	*arr;
248 };
249 
250 static bool
add_array_entry(struct bsearch4_array * ba,struct bsearch4_record * br_new)251 add_array_entry(struct bsearch4_array *ba, struct bsearch4_record *br_new)
252 {
253 
254 	if (ba->num_items < ba->alloc_items) {
255 		ba->arr[ba->num_items++] = *br_new;
256 		return (true);
257 	}
258 	return (false);
259 }
260 
261 static struct bsearch4_record *
get_last_entry(struct bsearch4_array * ba)262 get_last_entry(struct bsearch4_array *ba)
263 {
264 
265 	return (&ba->arr[ba->num_items - 1]);
266 }
267 
268 /*
269  *
270  * Example:
271  *  stack: 10.0.1.0/24,nh=3 array: 10.0.1.0/25,nh=4 -> ++10.0.1.128/24,nh=3
272  *
273  *
274  */
275 static bool
pop_stack_entry(struct bsearch4_array * dst_array,struct bsearch4_array * stack)276 pop_stack_entry(struct bsearch4_array *dst_array, struct bsearch4_array *stack)
277 {
278 	uint32_t last_stack_addr, last_array_addr;
279 
280 	struct bsearch4_record *br_prev = get_last_entry(dst_array);
281 	struct bsearch4_record *pstack = get_last_entry(stack);
282 
283 	/* Regardless of the result, pop stack entry */
284 	stack->num_items--;
285 
286 	/* Prefix last address for the last entry in lookup array */
287 	last_array_addr = (br_prev->addr4 | ~br_prev->mask4);
288 	/* Prefix last address for the stack record entry */
289 	last_stack_addr = (pstack->addr4 | ~pstack->mask4);
290 
291 	if (last_stack_addr > last_array_addr) {
292 		/*
293 		 * Stack record covers > address space than
294 		 * the last entry in the lookup array.
295 		 * Add the remaining parts of a stack record to
296 		 * the lookup array.
297 		 */
298 		struct bsearch4_record br_new = {
299 			.addr4 = last_array_addr + 1,
300 			.mask4 = pstack->mask4,
301 			.nh = pstack->nh,
302 		};
303 		return (add_array_entry(dst_array, &br_new));
304 	}
305 
306 	return (true);
307 }
308 
309 /*
310  * Updates resulting array @dst_array with a rib entry @rib_entry.
311  */
312 static bool
bsearch4_process_record(struct bsearch4_array * dst_array,struct bsearch4_array * stack,struct bsearch4_record * rib_entry)313 bsearch4_process_record(struct bsearch4_array *dst_array,
314     struct bsearch4_array *stack, struct bsearch4_record *rib_entry)
315 {
316 
317 	/*
318 	 * Maintain invariant: current rib_entry is always contained
319 	 *  in the top stack entry.
320 	 * Note we always have 0.0.0.0/0.
321 	 */
322 	while (stack->num_items > 0) {
323 		struct bsearch4_record *pst = get_last_entry(stack);
324 
325 		/*
326 		 * Check if we need to pop stack.
327 		 * Rely on the ordering - larger prefixes comes up first
328 		 * Pop any entry that doesn't contain current prefix.
329 		 */
330 		if (pst->addr4 == (rib_entry->addr4 & pst->mask4))
331 			break;
332 
333 		if (!pop_stack_entry(dst_array, stack))
334 			return (false);
335 	}
336 
337 	 if (dst_array->num_items > 0) {
338 
339 		 /*
340 		  * Check if there is a gap between previous entry and a
341 		  *  current entry. Code above guarantees that both previous
342 		  *  and current entry are contained in the top stack entry.
343 		  *
344 		  * Example: last: 10.0.0.1(/32,nh=3) cur: 10.0.0.3(/32,nh=4),
345 		  *  stack: 10.0.0.0/24,nh=2.
346 		  * Cover a gap between previous and current by adding stack
347 		  *  nexthop.
348 		  */
349 		 struct bsearch4_record *br_tmp = get_last_entry(dst_array);
350 		 uint32_t last_declared_addr = br_tmp->addr4 | ~br_tmp->mask4;
351 		 if (last_declared_addr < rib_entry->addr4 - 1) {
352 			 /* Cover a hole */
353 			struct bsearch4_record *pst = get_last_entry(stack);
354 			struct bsearch4_record new_entry = {
355 				.addr4 = last_declared_addr + 1,
356 				.mask4 = pst->mask4,
357 				.nh = pst->nh,
358 			};
359 			if (!add_array_entry(dst_array, &new_entry))
360 				return (false);
361 		 }
362 
363 		 /*
364 		  * Special case: adding more specific prefix at the start of
365 		  * the previous interval:
366 		  * 10.0.0.0(/24,nh=3), 10.0.0.0(/25,nh=4)
367 		  * Alter the last record, seeting new nexthop and mask.
368 		  */
369 		 if (br_tmp->addr4 == rib_entry->addr4) {
370 			*br_tmp = *rib_entry;
371 			add_array_entry(stack, rib_entry);
372 			return (true);
373 		 }
374 	 }
375 
376 	if (!add_array_entry(dst_array, rib_entry))
377 		return (false);
378 	add_array_entry(stack, rib_entry);
379 
380 	return (true);
381 }
382 
383 static enum flm_op_result
bsearch4_build_array(struct bsearch4_array * dst_array,struct bsearch4_array * src_array)384 bsearch4_build_array(struct bsearch4_array *dst_array, struct bsearch4_array *src_array)
385 {
386 
387 	/*
388 	 * During iteration, we keep track of all prefixes in rtable
389 	 * we currently match, by maintaining stack. As there can be only
390 	 * 32 prefixes for a single address, pre-allocate stack of size 32.
391 	 */
392 	struct bsearch4_array stack = {
393 		.alloc_items = 32,
394 		.arr = mallocarray(32, sizeof(struct bsearch4_record), M_TEMP, M_NOWAIT | M_ZERO),
395 	};
396 	if (stack.arr == NULL)
397 		return (FLM_REBUILD);
398 
399 	for (int i = 0; i < src_array->num_items; i++) {
400 		struct bsearch4_record *rib_entry = &src_array->arr[i];
401 
402 		if (!bsearch4_process_record(dst_array, &stack, rib_entry)) {
403 			free(stack.arr, M_TEMP);
404 			return (FLM_REBUILD);
405 		}
406 	}
407 
408 	/*
409 	 * We know that last record is contained in the top stack entry.
410 	 */
411 	while (stack.num_items > 0) {
412 		if (!pop_stack_entry(dst_array, &stack))
413 			return (FLM_REBUILD);
414 	}
415 	free(stack.arr, M_TEMP);
416 
417 	return (FLM_SUCCESS);
418 }
419 
420 static enum flm_op_result
bsearch4_build(struct bsearch4_data * bd)421 bsearch4_build(struct bsearch4_data *bd)
422 {
423 	enum flm_op_result ret;
424 
425 	struct bsearch4_array prefixes_array = {
426 		.alloc_items = bd->alloc_items,
427 		.num_items = bd->num_items,
428 		.arr = bd->rr,
429 	};
430 
431 	/* Add default route if not exists */
432 	bool default_found = false;
433 	for (int i = 0; i < prefixes_array.num_items; i++) {
434 		if (prefixes_array.arr[i].mask4 == 0) {
435 			default_found = true;
436 			break;
437 		}
438 	}
439 	if (!default_found) {
440 		 /* Add default route with NULL nhop */
441 		struct bsearch4_record default_entry = {};
442 		if (!add_array_entry(&prefixes_array, &default_entry))
443 			 return (FLM_REBUILD);
444 	}
445 
446 	/* Sort prefixes */
447 	qsort(prefixes_array.arr, prefixes_array.num_items, sizeof(struct bsearch4_record), rr_cmp);
448 
449 	struct bsearch4_array dst_array = {
450 		.alloc_items = bd->alloc_items,
451 		.arr = bd->br,
452 	};
453 
454 	ret = bsearch4_build_array(&dst_array, &prefixes_array);
455 	bd->num_items = dst_array.num_items;
456 
457 	free(bd->rr, M_TEMP);
458 	bd->rr = NULL;
459 	return (ret);
460 }
461 
462 
463 static enum flm_op_result
bsearch4_end_dump(void * _data,struct fib_dp * dp)464 bsearch4_end_dump(void *_data, struct fib_dp *dp)
465 {
466 	struct bsearch4_data *bd = (struct bsearch4_data *)_data;
467 	enum flm_op_result ret;
468 
469 	ret = bsearch4_build(bd);
470 	if (ret == FLM_SUCCESS) {
471 		dp->f = bsearch4_lookup;
472 		dp->arg = bd;
473 	}
474 
475 	return (ret);
476 }
477 
478 static enum flm_op_result
bsearch4_change_cb(struct rib_head * rnh,struct rib_cmd_info * rc,void * _data)479 bsearch4_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
480     void *_data)
481 {
482 
483 	return (FLM_REBUILD);
484 }
485 
486 struct fib_lookup_module flm_bsearch4= {
487 	.flm_name = "bsearch4",
488 	.flm_family = AF_INET,
489 	.flm_init_cb = bsearch4_init,
490 	.flm_destroy_cb = bsearch4_destroy,
491 	.flm_dump_rib_item_cb = bsearch4_add_route_cb,
492 	.flm_dump_end_cb = bsearch4_end_dump,
493 	.flm_change_rib_item_cb = bsearch4_change_cb,
494 	.flm_get_pref = bsearch4_get_pref,
495 };
496 
497 /*
498  * Lockless radix lookup algo.
499  *
500  * Compiles immutable radix from the current routing table.
501  * Used with small amount of routes (<1000).
502  * As datastructure is immutable, it gets rebuild on each rtable change.
503  *
504  * Lookups are slightly faster as shorter lookup keys are used
505  *  (4 bytes instead of 8 in stock radix).
506  */
507 
508 #define KEY_LEN_INET	(offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t))
509 #define OFF_LEN_INET	(8 * offsetof(struct sockaddr_in, sin_addr))
510 struct radix4_addr_entry {
511 	struct radix_node	rn[2];
512 	struct sockaddr_in	addr;
513 	struct nhop_object	*nhop;
514 };
515 #define	LRADIX4_ITEM_SZ	roundup2(sizeof(struct radix4_addr_entry), 64)
516 
517 struct lradix4_data {
518 	struct radix_node_head	*rnh;
519 	struct fib_data		*fd;
520 	void			*mem;
521 	char			*rt_base;
522 	uint32_t		alloc_items;
523 	uint32_t		num_items;
524 };
525 
526 static struct nhop_object *
lradix4_lookup(void * algo_data,const struct flm_lookup_key key,uint32_t scopeid)527 lradix4_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
528 {
529 	struct radix_node_head *rnh = (struct radix_node_head *)algo_data;
530 	struct radix4_addr_entry *ent;
531 	struct sockaddr_in addr4 = {
532 		.sin_len = KEY_LEN_INET,
533 		.sin_addr = key.addr4,
534 	};
535 	ent = (struct radix4_addr_entry *)(rn_match(&addr4, &rnh->rh));
536 	if (ent != NULL)
537 		return (ent->nhop);
538 	return (NULL);
539 }
540 
541 /*
542  * Preference function.
543  * Assume close-to-ideal of < 10 routes (though worse than bsearch), then
544  * gradually degrade until 1000 routes are reached.
545  */
546 static uint8_t
lradix4_get_pref(const struct rib_rtable_info * rinfo)547 lradix4_get_pref(const struct rib_rtable_info *rinfo)
548 {
549 
550 	if (rinfo->num_prefixes < 10)
551 		return (250);
552 	else if (rinfo->num_prefixes < 1000)
553 		return (254 - rinfo->num_prefixes / 4);
554 	else
555 		return (1);
556 }
557 
558 static enum flm_op_result
lradix4_init(uint32_t fibnum,struct fib_data * fd,void * _old_data,void ** _data)559 lradix4_init(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **_data)
560 {
561 	struct lradix4_data *lr;
562 	struct rib_rtable_info rinfo;
563 	uint32_t count;
564 	size_t sz;
565 
566 	lr = malloc(sizeof(struct lradix4_data), M_RTABLE, M_NOWAIT | M_ZERO);
567 	if (lr == NULL || !rn_inithead((void **)&lr->rnh, OFF_LEN_INET))
568 		return (FLM_REBUILD);
569 	fib_get_rtable_info(fib_get_rh(fd), &rinfo);
570 
571 	count = rinfo.num_prefixes * 11 / 10;
572 	sz = count * LRADIX4_ITEM_SZ + CACHE_LINE_SIZE;
573 	lr->mem = malloc(sz, M_RTABLE, M_NOWAIT | M_ZERO);
574 	if (lr->mem == NULL)
575 		return (FLM_REBUILD);
576 	/* Align all rtentries to a cacheline boundary */
577 	lr->rt_base = (char *)roundup2((uintptr_t)lr->mem, CACHE_LINE_SIZE);
578 	lr->alloc_items = count;
579 	lr->fd = fd;
580 
581 	*_data = lr;
582 
583 	return (FLM_SUCCESS);
584 }
585 
586 static void
lradix4_destroy(void * _data)587 lradix4_destroy(void *_data)
588 {
589 	struct lradix4_data *lr = (struct lradix4_data *)_data;
590 
591 	if (lr->rnh != NULL)
592 		rn_detachhead((void **)&lr->rnh);
593 	if (lr->mem != NULL)
594 		free(lr->mem, M_RTABLE);
595 	free(lr, M_RTABLE);
596 }
597 
598 static enum flm_op_result
lradix4_add_route_cb(struct rtentry * rt,void * _data)599 lradix4_add_route_cb(struct rtentry *rt, void *_data)
600 {
601 	struct lradix4_data *lr = (struct lradix4_data *)_data;
602 	struct radix4_addr_entry *ae;
603 	struct sockaddr_in mask;
604 	struct sockaddr *rt_mask;
605 	struct radix_node *rn;
606 	struct in_addr addr4, mask4;
607 	uint32_t scopeid;
608 
609 	if (lr->num_items >= lr->alloc_items)
610 		return (FLM_REBUILD);
611 
612 	ae = (struct radix4_addr_entry *)(lr->rt_base + lr->num_items * LRADIX4_ITEM_SZ);
613 	lr->num_items++;
614 
615 	ae->nhop = rt_get_raw_nhop(rt);
616 
617 	rt_get_inet_prefix_pmask(rt, &addr4, &mask4, &scopeid);
618 	ae->addr.sin_len = KEY_LEN_INET;
619 	ae->addr.sin_addr = addr4;
620 
621 	if (mask4.s_addr != INADDR_BROADCAST) {
622 		bzero(&mask, sizeof(mask));
623 		mask.sin_len = KEY_LEN_INET;
624 		mask.sin_addr = mask4;
625 		rt_mask = (struct sockaddr *)&mask;
626 	} else
627 		rt_mask = NULL;
628 
629 	rn = lr->rnh->rnh_addaddr((struct sockaddr *)&ae->addr, rt_mask,
630 	    &lr->rnh->rh, ae->rn);
631 	if (rn == NULL)
632 		return (FLM_REBUILD);
633 
634 	return (FLM_SUCCESS);
635 }
636 
637 static enum flm_op_result
lradix4_end_dump(void * _data,struct fib_dp * dp)638 lradix4_end_dump(void *_data, struct fib_dp *dp)
639 {
640 	struct lradix4_data *lr = (struct lradix4_data *)_data;
641 
642 	dp->f = lradix4_lookup;
643 	dp->arg = lr->rnh;
644 
645 	return (FLM_SUCCESS);
646 }
647 
648 static enum flm_op_result
lradix4_change_cb(struct rib_head * rnh,struct rib_cmd_info * rc,void * _data)649 lradix4_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
650     void *_data)
651 {
652 
653 	return (FLM_REBUILD);
654 }
655 
656 struct fib_lookup_module flm_radix4_lockless = {
657 	.flm_name = "radix4_lockless",
658 	.flm_family = AF_INET,
659 	.flm_init_cb = lradix4_init,
660 	.flm_destroy_cb = lradix4_destroy,
661 	.flm_dump_rib_item_cb = lradix4_add_route_cb,
662 	.flm_dump_end_cb = lradix4_end_dump,
663 	.flm_change_rib_item_cb = lradix4_change_cb,
664 	.flm_get_pref = lradix4_get_pref,
665 };
666 
667 /*
668  * Fallback lookup algorithm.
669  * This is a simple wrapper around system radix.
670  */
671 
672 struct radix4_data {
673 	struct fib_data *fd;
674 	struct rib_head *rh;
675 };
676 
677 static struct nhop_object *
radix4_lookup(void * algo_data,const struct flm_lookup_key key,uint32_t scopeid)678 radix4_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
679 {
680 	RIB_RLOCK_TRACKER;
681 	struct rib_head *rh = (struct rib_head *)algo_data;
682 	struct radix_node *rn;
683 	struct nhop_object *nh;
684 
685 	/* Prepare lookup key */
686 	struct sockaddr_in sin4 = {
687 		.sin_family = AF_INET,
688 		.sin_len = sizeof(struct sockaddr_in),
689 		.sin_addr = key.addr4,
690 	};
691 
692 	nh = NULL;
693 	RIB_RLOCK(rh);
694 	rn = rn_match((void *)&sin4, &rh->head);
695 	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0))
696 		nh = (RNTORT(rn))->rt_nhop;
697 	RIB_RUNLOCK(rh);
698 
699 	return (nh);
700 }
701 
702 static uint8_t
radix4_get_pref(const struct rib_rtable_info * rinfo)703 radix4_get_pref(const struct rib_rtable_info *rinfo)
704 {
705 
706 	return (50);
707 }
708 
709 static enum flm_op_result
radix4_init(uint32_t fibnum,struct fib_data * fd,void * _old_data,void ** _data)710 radix4_init(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **_data)
711 {
712 	struct radix4_data *r4;
713 
714 	r4 = malloc(sizeof(struct radix4_data), M_RTABLE, M_NOWAIT | M_ZERO);
715 	if (r4 == NULL)
716 		return (FLM_REBUILD);
717 	r4->fd = fd;
718 	r4->rh = fib_get_rh(fd);
719 
720 	*_data = r4;
721 
722 	return (FLM_SUCCESS);
723 }
724 
725 static void
radix4_destroy(void * _data)726 radix4_destroy(void *_data)
727 {
728 
729 	free(_data, M_RTABLE);
730 }
731 
732 static enum flm_op_result
radix4_add_route_cb(struct rtentry * rt,void * _data)733 radix4_add_route_cb(struct rtentry *rt, void *_data)
734 {
735 
736 	return (FLM_SUCCESS);
737 }
738 
739 static enum flm_op_result
radix4_end_dump(void * _data,struct fib_dp * dp)740 radix4_end_dump(void *_data, struct fib_dp *dp)
741 {
742 	struct radix4_data *r4 = (struct radix4_data *)_data;
743 
744 	dp->f = radix4_lookup;
745 	dp->arg = r4->rh;
746 
747 	return (FLM_SUCCESS);
748 }
749 
750 static enum flm_op_result
radix4_change_cb(struct rib_head * rnh,struct rib_cmd_info * rc,void * _data)751 radix4_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
752     void *_data)
753 {
754 
755 	return (FLM_SUCCESS);
756 }
757 
758 struct fib_lookup_module flm_radix4 = {
759 	.flm_name = "radix4",
760 	.flm_family = AF_INET,
761 	.flm_init_cb = radix4_init,
762 	.flm_destroy_cb = radix4_destroy,
763 	.flm_dump_rib_item_cb = radix4_add_route_cb,
764 	.flm_dump_end_cb = radix4_end_dump,
765 	.flm_change_rib_item_cb = radix4_change_cb,
766 	.flm_get_pref = radix4_get_pref,
767 };
768 
769 static void
fib4_algo_init(void)770 fib4_algo_init(void)
771 {
772 
773 	fib_module_register(&flm_bsearch4);
774 	fib_module_register(&flm_radix4_lockless);
775 	fib_module_register(&flm_radix4);
776 }
777 SYSINIT(fib4_algo_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, fib4_algo_init, NULL);
778