xref: /freebsd/sys/netinet/in_fib_algo.c (revision 685dc743dc3b5645e34836464128e1c0558b404b)
1f5baf8bbSAlexander V. Chernikov /*-
2*4d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
3f5baf8bbSAlexander V. Chernikov  *
4f5baf8bbSAlexander V. Chernikov  * Copyright (c) 2020 Alexander V. Chernikov
5f5baf8bbSAlexander V. Chernikov  *
6f5baf8bbSAlexander V. Chernikov  * Redistribution and use in source and binary forms, with or without
7f5baf8bbSAlexander V. Chernikov  * modification, are permitted provided that the following conditions
8f5baf8bbSAlexander V. Chernikov  * are met:
9f5baf8bbSAlexander V. Chernikov  * 1. Redistributions of source code must retain the above copyright
10f5baf8bbSAlexander V. Chernikov  *    notice, this list of conditions and the following disclaimer.
11f5baf8bbSAlexander V. Chernikov  * 2. Redistributions in binary form must reproduce the above copyright
12f5baf8bbSAlexander V. Chernikov  *    notice, this list of conditions and the following disclaimer in the
13f5baf8bbSAlexander V. Chernikov  *    documentation and/or other materials provided with the distribution.
14f5baf8bbSAlexander V. Chernikov  *
15f5baf8bbSAlexander V. Chernikov  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16f5baf8bbSAlexander V. Chernikov  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17f5baf8bbSAlexander V. Chernikov  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18f5baf8bbSAlexander V. Chernikov  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19f5baf8bbSAlexander V. Chernikov  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20f5baf8bbSAlexander V. Chernikov  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21f5baf8bbSAlexander V. Chernikov  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22f5baf8bbSAlexander V. Chernikov  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23f5baf8bbSAlexander V. Chernikov  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24f5baf8bbSAlexander V. Chernikov  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25f5baf8bbSAlexander V. Chernikov  * SUCH DAMAGE.
26f5baf8bbSAlexander V. Chernikov  */
27f5baf8bbSAlexander V. Chernikov 
28f5baf8bbSAlexander V. Chernikov #include <sys/cdefs.h>
29f5baf8bbSAlexander V. Chernikov #include "opt_inet.h"
30f5baf8bbSAlexander V. Chernikov 
31f5baf8bbSAlexander V. Chernikov #include <sys/param.h>
32f5baf8bbSAlexander V. Chernikov #include <sys/kernel.h>
33f5baf8bbSAlexander V. Chernikov #include <sys/lock.h>
34f5baf8bbSAlexander V. Chernikov #include <sys/rmlock.h>
35f5baf8bbSAlexander V. Chernikov #include <sys/malloc.h>
36f5baf8bbSAlexander V. Chernikov #include <sys/kernel.h>
37f5baf8bbSAlexander V. Chernikov #include <sys/priv.h>
38f5baf8bbSAlexander V. Chernikov #include <sys/socket.h>
39f5baf8bbSAlexander V. Chernikov #include <sys/sysctl.h>
40f5baf8bbSAlexander V. Chernikov #include <net/vnet.h>
41f5baf8bbSAlexander V. Chernikov 
42f5baf8bbSAlexander V. Chernikov #include <net/if.h>
43f5baf8bbSAlexander V. Chernikov #include <netinet/in.h>
44f5baf8bbSAlexander V. Chernikov 
45f5baf8bbSAlexander V. Chernikov #include <net/route.h>
46f5baf8bbSAlexander V. Chernikov #include <net/route/nhop.h>
47f5baf8bbSAlexander V. Chernikov #include <net/route/route_ctl.h>
48f5baf8bbSAlexander V. Chernikov #include <net/route/route_var.h>
49f5baf8bbSAlexander V. Chernikov #include <net/route/fib_algo.h>
50f5baf8bbSAlexander V. Chernikov 
51f5baf8bbSAlexander V. Chernikov /*
52f5baf8bbSAlexander V. Chernikov  * Binary search lookup algo.
53f5baf8bbSAlexander V. Chernikov  *
54f5baf8bbSAlexander V. Chernikov  * Compiles route table into a sorted array.
55f5baf8bbSAlexander V. Chernikov  * Used with small amount of routes (< 16).
56f5baf8bbSAlexander V. Chernikov  * As array is immutable, it is rebuild on each rtable change.
57f5baf8bbSAlexander V. Chernikov  *
58f5baf8bbSAlexander V. Chernikov  * Example:
59f5baf8bbSAlexander V. Chernikov  *
60f5baf8bbSAlexander V. Chernikov  * 0.0.0.0/0 -> nh1
61f5baf8bbSAlexander V. Chernikov  * 10.0.0.0/24 -> nh2
62f5baf8bbSAlexander V. Chernikov  * 10.0.0.1/32 -> nh3
63f5baf8bbSAlexander V. Chernikov  *
64f5baf8bbSAlexander V. Chernikov  * gets compiled to:
65f5baf8bbSAlexander V. Chernikov  *
66f5baf8bbSAlexander V. Chernikov  * 0.0.0.0 -> nh1
67f5baf8bbSAlexander V. Chernikov  * 10.0.0.0 -> nh2
68f5baf8bbSAlexander V. Chernikov  * 10.0.0.1 -> nh3
69f5baf8bbSAlexander V. Chernikov  * 10.0.0.2 -> nh2
70f5baf8bbSAlexander V. Chernikov  * 10.0.1.0 -> nh1
71f5baf8bbSAlexander V. Chernikov  *
72f5baf8bbSAlexander V. Chernikov  */
73f5baf8bbSAlexander V. Chernikov 
74f5baf8bbSAlexander V. Chernikov struct bsearch4_record {
75f5baf8bbSAlexander V. Chernikov 	uint32_t		addr4;
76f5baf8bbSAlexander V. Chernikov 	uint32_t		mask4;
77f5baf8bbSAlexander V. Chernikov 	struct nhop_object	*nh;
78f5baf8bbSAlexander V. Chernikov };
79f5baf8bbSAlexander V. Chernikov 
80f5baf8bbSAlexander V. Chernikov struct bsearch4_data {
81f5baf8bbSAlexander V. Chernikov 	struct fib_data		*fd;
82f5baf8bbSAlexander V. Chernikov 	uint32_t		alloc_items;
83f5baf8bbSAlexander V. Chernikov 	uint32_t		num_items;
84f5baf8bbSAlexander V. Chernikov 	void			*mem;
85f5baf8bbSAlexander V. Chernikov 	struct bsearch4_record	*rr;
86f5baf8bbSAlexander V. Chernikov 	struct bsearch4_record	br[0];
87f5baf8bbSAlexander V. Chernikov };
88f5baf8bbSAlexander V. Chernikov 
89f5baf8bbSAlexander V. Chernikov /*
90f5baf8bbSAlexander V. Chernikov  * Main IPv4 address lookup function.
91f5baf8bbSAlexander V. Chernikov  *
92f5baf8bbSAlexander V. Chernikov  * Finds array record with maximum index that is <= provided key.
93f5baf8bbSAlexander V. Chernikov  * Assumes 0.0.0.0/0 always exists (may be with NULL nhop)
94f5baf8bbSAlexander V. Chernikov  */
95f5baf8bbSAlexander V. Chernikov static struct nhop_object *
bsearch4_lookup(void * algo_data,const struct flm_lookup_key key,uint32_t scopeid)96f5baf8bbSAlexander V. Chernikov bsearch4_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
97f5baf8bbSAlexander V. Chernikov {
98f5baf8bbSAlexander V. Chernikov 	const struct bsearch4_data *bd = (const struct bsearch4_data *)algo_data;
99f5baf8bbSAlexander V. Chernikov 	const struct bsearch4_record *br;
100f5baf8bbSAlexander V. Chernikov 	uint32_t addr4 = ntohl(key.addr4.s_addr);
101f5baf8bbSAlexander V. Chernikov 
102f5baf8bbSAlexander V. Chernikov 	int start = 0;
103f5baf8bbSAlexander V. Chernikov 	int end = bd->num_items;
104f5baf8bbSAlexander V. Chernikov 
105f5baf8bbSAlexander V. Chernikov 	int i = (start + end) / 2;
106f5baf8bbSAlexander V. Chernikov 	while (start + 1 < end) {
107f5baf8bbSAlexander V. Chernikov 		i = (start + end) / 2;
108f5baf8bbSAlexander V. Chernikov 		br = &bd->br[i];
109f5baf8bbSAlexander V. Chernikov 		if (addr4 < br->addr4) {
110f5baf8bbSAlexander V. Chernikov 			/* key < average, reduce right boundary */
111f5baf8bbSAlexander V. Chernikov 			end = i;
112f5baf8bbSAlexander V. Chernikov 			continue;
113f5baf8bbSAlexander V. Chernikov 		} else if (addr4 > br->addr4) {
114f5baf8bbSAlexander V. Chernikov 			/* key > average, increase left aboundary */
115f5baf8bbSAlexander V. Chernikov 			start = i;
116f5baf8bbSAlexander V. Chernikov 			continue;
117f5baf8bbSAlexander V. Chernikov 		} else {
118f5baf8bbSAlexander V. Chernikov 			/* direct match */
119f5baf8bbSAlexander V. Chernikov 			return (br->nh);
120f5baf8bbSAlexander V. Chernikov 		}
121f5baf8bbSAlexander V. Chernikov 	}
122f5baf8bbSAlexander V. Chernikov 	/* start + 1 == end */
123f5baf8bbSAlexander V. Chernikov 	return (bd->br[start].nh);
124f5baf8bbSAlexander V. Chernikov }
125f5baf8bbSAlexander V. Chernikov 
126f5baf8bbSAlexander V. Chernikov /*
127f5baf8bbSAlexander V. Chernikov  * Preference function.
128f5baf8bbSAlexander V. Chernikov  * Assume ideal for < 10 (typical single-interface setup has 5)
129f5baf8bbSAlexander V. Chernikov  * Then gradually degrade.
130f5baf8bbSAlexander V. Chernikov  * Assume 30 prefixes is at least 60 records, so it will require 8 lookup,
131f5baf8bbSAlexander V. Chernikov  *  which is even worse than radix.
132f5baf8bbSAlexander V. Chernikov  */
133f5baf8bbSAlexander V. Chernikov static uint8_t
bsearch4_get_pref(const struct rib_rtable_info * rinfo)134f5baf8bbSAlexander V. Chernikov bsearch4_get_pref(const struct rib_rtable_info *rinfo)
135f5baf8bbSAlexander V. Chernikov {
136f5baf8bbSAlexander V. Chernikov 
137f5baf8bbSAlexander V. Chernikov 	if (rinfo->num_prefixes < 10)
138f5baf8bbSAlexander V. Chernikov 		return (253);
139f5baf8bbSAlexander V. Chernikov 	else if (rinfo->num_prefixes < 30)
140f5baf8bbSAlexander V. Chernikov 		return (255 - rinfo->num_prefixes * 8);
141f5baf8bbSAlexander V. Chernikov 	else
142f5baf8bbSAlexander V. Chernikov 		return (1);
143f5baf8bbSAlexander V. Chernikov }
144f5baf8bbSAlexander V. Chernikov 
145f5baf8bbSAlexander V. Chernikov static enum flm_op_result
bsearch4_init(uint32_t fibnum,struct fib_data * fd,void * _old_data,void ** _data)146f5baf8bbSAlexander V. Chernikov bsearch4_init(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **_data)
147f5baf8bbSAlexander V. Chernikov {
148f5baf8bbSAlexander V. Chernikov 	struct bsearch4_data *bd;
149f5baf8bbSAlexander V. Chernikov 	struct rib_rtable_info rinfo;
150f5baf8bbSAlexander V. Chernikov 	uint32_t count;
151f5baf8bbSAlexander V. Chernikov 	size_t sz;
152f5baf8bbSAlexander V. Chernikov 	void *mem;
153f5baf8bbSAlexander V. Chernikov 
154f5baf8bbSAlexander V. Chernikov 	fib_get_rtable_info(fib_get_rh(fd), &rinfo);
155f5baf8bbSAlexander V. Chernikov 	count = rinfo.num_prefixes * 11 / 10 + 64;
156f5baf8bbSAlexander V. Chernikov 
157f5baf8bbSAlexander V. Chernikov 	sz = sizeof(struct bsearch4_data) + sizeof(struct bsearch4_record) * count;
158f5baf8bbSAlexander V. Chernikov 	/* add cache line sz to ease alignment */
159f5baf8bbSAlexander V. Chernikov 	sz += CACHE_LINE_SIZE;
160f5baf8bbSAlexander V. Chernikov 	mem = malloc(sz, M_RTABLE, M_NOWAIT | M_ZERO);
161f5baf8bbSAlexander V. Chernikov 	if (mem == NULL)
162f5baf8bbSAlexander V. Chernikov 		return (FLM_REBUILD);
163f5baf8bbSAlexander V. Chernikov 	/* Align datapath-usable structure to cache line boundary */
164f5baf8bbSAlexander V. Chernikov 	bd = (struct bsearch4_data *)roundup2((uintptr_t)mem, CACHE_LINE_SIZE);
165f5baf8bbSAlexander V. Chernikov 	bd->mem = mem;
166f5baf8bbSAlexander V. Chernikov 	bd->alloc_items = count;
167f5baf8bbSAlexander V. Chernikov 	bd->fd = fd;
168f5baf8bbSAlexander V. Chernikov 
169f5baf8bbSAlexander V. Chernikov 	*_data = bd;
170f5baf8bbSAlexander V. Chernikov 
171f5baf8bbSAlexander V. Chernikov 	/*
172f5baf8bbSAlexander V. Chernikov 	 * Allocate temporary array to store all rtable data.
173f5baf8bbSAlexander V. Chernikov 	 * This step is required to provide the required prefix iteration order.
174f5baf8bbSAlexander V. Chernikov 	 */
175f5baf8bbSAlexander V. Chernikov 	bd->rr = mallocarray(count, sizeof(struct bsearch4_record), M_TEMP, M_NOWAIT | M_ZERO);
176f5baf8bbSAlexander V. Chernikov 	if (bd->rr == NULL)
177f5baf8bbSAlexander V. Chernikov 		return (FLM_REBUILD);
178f5baf8bbSAlexander V. Chernikov 
179f5baf8bbSAlexander V. Chernikov 	return (FLM_SUCCESS);
180f5baf8bbSAlexander V. Chernikov }
181f5baf8bbSAlexander V. Chernikov 
182f5baf8bbSAlexander V. Chernikov static void
bsearch4_destroy(void * _data)183f5baf8bbSAlexander V. Chernikov bsearch4_destroy(void *_data)
184f5baf8bbSAlexander V. Chernikov {
185f5baf8bbSAlexander V. Chernikov 	struct bsearch4_data *bd = (struct bsearch4_data *)_data;
186f5baf8bbSAlexander V. Chernikov 
187f5baf8bbSAlexander V. Chernikov 	if (bd->rr != NULL)
188f5baf8bbSAlexander V. Chernikov 		free(bd->rr, M_TEMP);
189f5baf8bbSAlexander V. Chernikov 	free(bd->mem, M_RTABLE);
190f5baf8bbSAlexander V. Chernikov }
191f5baf8bbSAlexander V. Chernikov 
192f5baf8bbSAlexander V. Chernikov /*
193f5baf8bbSAlexander V. Chernikov  * Callback storing converted rtable prefixes in the temporary array.
194f5baf8bbSAlexander V. Chernikov  * Addresses are converted to a host order.
195f5baf8bbSAlexander V. Chernikov  */
196f5baf8bbSAlexander V. Chernikov static enum flm_op_result
bsearch4_add_route_cb(struct rtentry * rt,void * _data)197f5baf8bbSAlexander V. Chernikov bsearch4_add_route_cb(struct rtentry *rt, void *_data)
198f5baf8bbSAlexander V. Chernikov {
199f5baf8bbSAlexander V. Chernikov 	struct bsearch4_data *bd = (struct bsearch4_data *)_data;
200f5baf8bbSAlexander V. Chernikov 	struct bsearch4_record *rr;
201f5baf8bbSAlexander V. Chernikov 	struct in_addr addr4, mask4;
202f5baf8bbSAlexander V. Chernikov 	uint32_t scopeid;
203f5baf8bbSAlexander V. Chernikov 
204f5baf8bbSAlexander V. Chernikov 	if (bd->num_items >= bd->alloc_items)
205f5baf8bbSAlexander V. Chernikov 		return (FLM_REBUILD);
206f5baf8bbSAlexander V. Chernikov 
207f5baf8bbSAlexander V. Chernikov 	rr = &bd->rr[bd->num_items++];
208f5baf8bbSAlexander V. Chernikov 	rt_get_inet_prefix_pmask(rt, &addr4, &mask4, &scopeid);
209f5baf8bbSAlexander V. Chernikov 	rr->addr4 = ntohl(addr4.s_addr);
210f5baf8bbSAlexander V. Chernikov 	rr->mask4 = ntohl(mask4.s_addr);
211f5baf8bbSAlexander V. Chernikov 	rr->nh = rt_get_raw_nhop(rt);
212f5baf8bbSAlexander V. Chernikov 
213f5baf8bbSAlexander V. Chernikov 	return (FLM_SUCCESS);
214f5baf8bbSAlexander V. Chernikov }
215f5baf8bbSAlexander V. Chernikov 
216f5baf8bbSAlexander V. Chernikov /*
217f5baf8bbSAlexander V. Chernikov  * Prefix comparison function.
218f5baf8bbSAlexander V. Chernikov  * 10.0.0.0/24 < 10.0.0.0/25 <- less specific wins
219f5baf8bbSAlexander V. Chernikov  * 10.0.0.0/25 < 10.0.0.1/32 <- bigger base wins
220f5baf8bbSAlexander V. Chernikov  */
221f5baf8bbSAlexander V. Chernikov static int
rr_cmp(const void * _rec1,const void * _rec2)222f5baf8bbSAlexander V. Chernikov rr_cmp(const void *_rec1, const void *_rec2)
223f5baf8bbSAlexander V. Chernikov {
224f5baf8bbSAlexander V. Chernikov 	const struct bsearch4_record *rec1, *rec2;
225f5baf8bbSAlexander V. Chernikov 	rec1 = _rec1;
226f5baf8bbSAlexander V. Chernikov 	rec2 = _rec2;
227f5baf8bbSAlexander V. Chernikov 
228f5baf8bbSAlexander V. Chernikov 	if (rec1->addr4 < rec2->addr4)
229f5baf8bbSAlexander V. Chernikov 		return (-1);
230f5baf8bbSAlexander V. Chernikov 	else if (rec1->addr4 > rec2->addr4)
231f5baf8bbSAlexander V. Chernikov 		return (1);
232f5baf8bbSAlexander V. Chernikov 
233f5baf8bbSAlexander V. Chernikov 	/*
234f5baf8bbSAlexander V. Chernikov 	 * wider mask value is lesser mask
235f5baf8bbSAlexander V. Chernikov 	 * we want less specific come first, e.g. <
236f5baf8bbSAlexander V. Chernikov 	 */
237f5baf8bbSAlexander V. Chernikov 	if (rec1->mask4 < rec2->mask4)
238f5baf8bbSAlexander V. Chernikov 		return (-1);
239f5baf8bbSAlexander V. Chernikov 	else if (rec1->mask4 > rec2->mask4)
240f5baf8bbSAlexander V. Chernikov 		return (1);
241f5baf8bbSAlexander V. Chernikov 	return (0);
242f5baf8bbSAlexander V. Chernikov }
243f5baf8bbSAlexander V. Chernikov 
244f5baf8bbSAlexander V. Chernikov struct bsearch4_array {
245f5baf8bbSAlexander V. Chernikov 	uint32_t		alloc_items;
246f5baf8bbSAlexander V. Chernikov 	uint32_t		num_items;
247f5baf8bbSAlexander V. Chernikov 	struct bsearch4_record	*arr;
248f5baf8bbSAlexander V. Chernikov };
249f5baf8bbSAlexander V. Chernikov 
250f5baf8bbSAlexander V. Chernikov static bool
add_array_entry(struct bsearch4_array * ba,struct bsearch4_record * br_new)251f5baf8bbSAlexander V. Chernikov add_array_entry(struct bsearch4_array *ba, struct bsearch4_record *br_new)
252f5baf8bbSAlexander V. Chernikov {
253f5baf8bbSAlexander V. Chernikov 
254f5baf8bbSAlexander V. Chernikov 	if (ba->num_items < ba->alloc_items) {
255f5baf8bbSAlexander V. Chernikov 		ba->arr[ba->num_items++] = *br_new;
256f5baf8bbSAlexander V. Chernikov 		return (true);
257f5baf8bbSAlexander V. Chernikov 	}
258f5baf8bbSAlexander V. Chernikov 	return (false);
259f5baf8bbSAlexander V. Chernikov }
260f5baf8bbSAlexander V. Chernikov 
261f5baf8bbSAlexander V. Chernikov static struct bsearch4_record *
get_last_entry(struct bsearch4_array * ba)262f5baf8bbSAlexander V. Chernikov get_last_entry(struct bsearch4_array *ba)
263f5baf8bbSAlexander V. Chernikov {
264f5baf8bbSAlexander V. Chernikov 
265f5baf8bbSAlexander V. Chernikov 	return (&ba->arr[ba->num_items - 1]);
266f5baf8bbSAlexander V. Chernikov }
267f5baf8bbSAlexander V. Chernikov 
268f5baf8bbSAlexander V. Chernikov /*
269f5baf8bbSAlexander V. Chernikov  *
270f5baf8bbSAlexander V. Chernikov  * Example:
271f5baf8bbSAlexander V. Chernikov  *  stack: 10.0.1.0/24,nh=3 array: 10.0.1.0/25,nh=4 -> ++10.0.1.128/24,nh=3
272f5baf8bbSAlexander V. Chernikov  *
273f5baf8bbSAlexander V. Chernikov  *
274f5baf8bbSAlexander V. Chernikov  */
275f5baf8bbSAlexander V. Chernikov static bool
pop_stack_entry(struct bsearch4_array * dst_array,struct bsearch4_array * stack)276f5baf8bbSAlexander V. Chernikov pop_stack_entry(struct bsearch4_array *dst_array, struct bsearch4_array *stack)
277f5baf8bbSAlexander V. Chernikov {
278f5baf8bbSAlexander V. Chernikov 	uint32_t last_stack_addr, last_array_addr;
279f5baf8bbSAlexander V. Chernikov 
280f5baf8bbSAlexander V. Chernikov 	struct bsearch4_record *br_prev = get_last_entry(dst_array);
281f5baf8bbSAlexander V. Chernikov 	struct bsearch4_record *pstack = get_last_entry(stack);
282f5baf8bbSAlexander V. Chernikov 
283f5baf8bbSAlexander V. Chernikov 	/* Regardless of the result, pop stack entry */
284f5baf8bbSAlexander V. Chernikov 	stack->num_items--;
285f5baf8bbSAlexander V. Chernikov 
286f5baf8bbSAlexander V. Chernikov 	/* Prefix last address for the last entry in lookup array */
287f5baf8bbSAlexander V. Chernikov 	last_array_addr = (br_prev->addr4 | ~br_prev->mask4);
288f5baf8bbSAlexander V. Chernikov 	/* Prefix last address for the stack record entry */
289f5baf8bbSAlexander V. Chernikov 	last_stack_addr = (pstack->addr4 | ~pstack->mask4);
290f5baf8bbSAlexander V. Chernikov 
291f5baf8bbSAlexander V. Chernikov 	if (last_stack_addr > last_array_addr) {
292f5baf8bbSAlexander V. Chernikov 		/*
293f5baf8bbSAlexander V. Chernikov 		 * Stack record covers > address space than
294f5baf8bbSAlexander V. Chernikov 		 * the last entry in the lookup array.
295f5baf8bbSAlexander V. Chernikov 		 * Add the remaining parts of a stack record to
296f5baf8bbSAlexander V. Chernikov 		 * the lookup array.
297f5baf8bbSAlexander V. Chernikov 		 */
298f5baf8bbSAlexander V. Chernikov 		struct bsearch4_record br_new = {
299f5baf8bbSAlexander V. Chernikov 			.addr4 = last_array_addr + 1,
300f5baf8bbSAlexander V. Chernikov 			.mask4 = pstack->mask4,
301f5baf8bbSAlexander V. Chernikov 			.nh = pstack->nh,
302f5baf8bbSAlexander V. Chernikov 		};
303f5baf8bbSAlexander V. Chernikov 		return (add_array_entry(dst_array, &br_new));
304f5baf8bbSAlexander V. Chernikov 	}
305f5baf8bbSAlexander V. Chernikov 
306f5baf8bbSAlexander V. Chernikov 	return (true);
307f5baf8bbSAlexander V. Chernikov }
308f5baf8bbSAlexander V. Chernikov 
309f5baf8bbSAlexander V. Chernikov /*
310f5baf8bbSAlexander V. Chernikov  * Updates resulting array @dst_array with a rib entry @rib_entry.
311f5baf8bbSAlexander V. Chernikov  */
312f5baf8bbSAlexander V. Chernikov static bool
bsearch4_process_record(struct bsearch4_array * dst_array,struct bsearch4_array * stack,struct bsearch4_record * rib_entry)313f5baf8bbSAlexander V. Chernikov bsearch4_process_record(struct bsearch4_array *dst_array,
314f5baf8bbSAlexander V. Chernikov     struct bsearch4_array *stack, struct bsearch4_record *rib_entry)
315f5baf8bbSAlexander V. Chernikov {
316f5baf8bbSAlexander V. Chernikov 
317f5baf8bbSAlexander V. Chernikov 	/*
318f5baf8bbSAlexander V. Chernikov 	 * Maintain invariant: current rib_entry is always contained
319f5baf8bbSAlexander V. Chernikov 	 *  in the top stack entry.
320f5baf8bbSAlexander V. Chernikov 	 * Note we always have 0.0.0.0/0.
321f5baf8bbSAlexander V. Chernikov 	 */
322f5baf8bbSAlexander V. Chernikov 	while (stack->num_items > 0) {
323f5baf8bbSAlexander V. Chernikov 		struct bsearch4_record *pst = get_last_entry(stack);
324f5baf8bbSAlexander V. Chernikov 
325f5baf8bbSAlexander V. Chernikov 		/*
326f5baf8bbSAlexander V. Chernikov 		 * Check if we need to pop stack.
327f5baf8bbSAlexander V. Chernikov 		 * Rely on the ordering - larger prefixes comes up first
328f5baf8bbSAlexander V. Chernikov 		 * Pop any entry that doesn't contain current prefix.
329f5baf8bbSAlexander V. Chernikov 		 */
330f5baf8bbSAlexander V. Chernikov 		if (pst->addr4 == (rib_entry->addr4 & pst->mask4))
331f5baf8bbSAlexander V. Chernikov 			break;
332f5baf8bbSAlexander V. Chernikov 
333f5baf8bbSAlexander V. Chernikov 		if (!pop_stack_entry(dst_array, stack))
334f5baf8bbSAlexander V. Chernikov 			return (false);
335f5baf8bbSAlexander V. Chernikov 	}
336f5baf8bbSAlexander V. Chernikov 
337f5baf8bbSAlexander V. Chernikov 	 if (dst_array->num_items > 0) {
338f5baf8bbSAlexander V. Chernikov 
339f5baf8bbSAlexander V. Chernikov 		 /*
340f5baf8bbSAlexander V. Chernikov 		  * Check if there is a gap between previous entry and a
341f5baf8bbSAlexander V. Chernikov 		  *  current entry. Code above guarantees that both previous
342f5baf8bbSAlexander V. Chernikov 		  *  and current entry are contained in the top stack entry.
343f5baf8bbSAlexander V. Chernikov 		  *
344f5baf8bbSAlexander V. Chernikov 		  * Example: last: 10.0.0.1(/32,nh=3) cur: 10.0.0.3(/32,nh=4),
345f5baf8bbSAlexander V. Chernikov 		  *  stack: 10.0.0.0/24,nh=2.
346f5baf8bbSAlexander V. Chernikov 		  * Cover a gap between previous and current by adding stack
347f5baf8bbSAlexander V. Chernikov 		  *  nexthop.
348f5baf8bbSAlexander V. Chernikov 		  */
349f5baf8bbSAlexander V. Chernikov 		 struct bsearch4_record *br_tmp = get_last_entry(dst_array);
350f5baf8bbSAlexander V. Chernikov 		 uint32_t last_declared_addr = br_tmp->addr4 | ~br_tmp->mask4;
351f5baf8bbSAlexander V. Chernikov 		 if (last_declared_addr < rib_entry->addr4 - 1) {
352f5baf8bbSAlexander V. Chernikov 			 /* Cover a hole */
353f5baf8bbSAlexander V. Chernikov 			struct bsearch4_record *pst = get_last_entry(stack);
354f5baf8bbSAlexander V. Chernikov 			struct bsearch4_record new_entry = {
355f5baf8bbSAlexander V. Chernikov 				.addr4 = last_declared_addr + 1,
356f5baf8bbSAlexander V. Chernikov 				.mask4 = pst->mask4,
357f5baf8bbSAlexander V. Chernikov 				.nh = pst->nh,
358f5baf8bbSAlexander V. Chernikov 			};
359f5baf8bbSAlexander V. Chernikov 			if (!add_array_entry(dst_array, &new_entry))
360f5baf8bbSAlexander V. Chernikov 				return (false);
361f5baf8bbSAlexander V. Chernikov 		 }
362f8798767SAlexander V. Chernikov 
363f8798767SAlexander V. Chernikov 		 /*
364f8798767SAlexander V. Chernikov 		  * Special case: adding more specific prefix at the start of
365f8798767SAlexander V. Chernikov 		  * the previous interval:
366f8798767SAlexander V. Chernikov 		  * 10.0.0.0(/24,nh=3), 10.0.0.0(/25,nh=4)
367f8798767SAlexander V. Chernikov 		  * Alter the last record, seeting new nexthop and mask.
368f8798767SAlexander V. Chernikov 		  */
369f8798767SAlexander V. Chernikov 		 if (br_tmp->addr4 == rib_entry->addr4) {
370f8798767SAlexander V. Chernikov 			*br_tmp = *rib_entry;
371f8798767SAlexander V. Chernikov 			add_array_entry(stack, rib_entry);
372f8798767SAlexander V. Chernikov 			return (true);
373f8798767SAlexander V. Chernikov 		 }
374f5baf8bbSAlexander V. Chernikov 	 }
375f5baf8bbSAlexander V. Chernikov 
376f5baf8bbSAlexander V. Chernikov 	if (!add_array_entry(dst_array, rib_entry))
377f5baf8bbSAlexander V. Chernikov 		return (false);
378f5baf8bbSAlexander V. Chernikov 	add_array_entry(stack, rib_entry);
379f5baf8bbSAlexander V. Chernikov 
380f5baf8bbSAlexander V. Chernikov 	return (true);
381f5baf8bbSAlexander V. Chernikov }
382f5baf8bbSAlexander V. Chernikov 
383f5baf8bbSAlexander V. Chernikov static enum flm_op_result
bsearch4_build_array(struct bsearch4_array * dst_array,struct bsearch4_array * src_array)384f5baf8bbSAlexander V. Chernikov bsearch4_build_array(struct bsearch4_array *dst_array, struct bsearch4_array *src_array)
385f5baf8bbSAlexander V. Chernikov {
386f5baf8bbSAlexander V. Chernikov 
387f5baf8bbSAlexander V. Chernikov 	/*
388f5baf8bbSAlexander V. Chernikov 	 * During iteration, we keep track of all prefixes in rtable
389f5baf8bbSAlexander V. Chernikov 	 * we currently match, by maintaining stack. As there can be only
390f5baf8bbSAlexander V. Chernikov 	 * 32 prefixes for a single address, pre-allocate stack of size 32.
391f5baf8bbSAlexander V. Chernikov 	 */
392f5baf8bbSAlexander V. Chernikov 	struct bsearch4_array stack = {
393f5baf8bbSAlexander V. Chernikov 		.alloc_items = 32,
394f5baf8bbSAlexander V. Chernikov 		.arr = mallocarray(32, sizeof(struct bsearch4_record), M_TEMP, M_NOWAIT | M_ZERO),
395f5baf8bbSAlexander V. Chernikov 	};
396f5baf8bbSAlexander V. Chernikov 	if (stack.arr == NULL)
397f5baf8bbSAlexander V. Chernikov 		return (FLM_REBUILD);
398f5baf8bbSAlexander V. Chernikov 
399f5baf8bbSAlexander V. Chernikov 	for (int i = 0; i < src_array->num_items; i++) {
400f5baf8bbSAlexander V. Chernikov 		struct bsearch4_record *rib_entry = &src_array->arr[i];
401f5baf8bbSAlexander V. Chernikov 
402f5baf8bbSAlexander V. Chernikov 		if (!bsearch4_process_record(dst_array, &stack, rib_entry)) {
403f5baf8bbSAlexander V. Chernikov 			free(stack.arr, M_TEMP);
404f5baf8bbSAlexander V. Chernikov 			return (FLM_REBUILD);
405f5baf8bbSAlexander V. Chernikov 		}
406f5baf8bbSAlexander V. Chernikov 	}
407f5baf8bbSAlexander V. Chernikov 
408f5baf8bbSAlexander V. Chernikov 	/*
409f5baf8bbSAlexander V. Chernikov 	 * We know that last record is contained in the top stack entry.
410f5baf8bbSAlexander V. Chernikov 	 */
411f5baf8bbSAlexander V. Chernikov 	while (stack.num_items > 0) {
412f5baf8bbSAlexander V. Chernikov 		if (!pop_stack_entry(dst_array, &stack))
413f5baf8bbSAlexander V. Chernikov 			return (FLM_REBUILD);
414f5baf8bbSAlexander V. Chernikov 	}
415f5baf8bbSAlexander V. Chernikov 	free(stack.arr, M_TEMP);
416f5baf8bbSAlexander V. Chernikov 
417f5baf8bbSAlexander V. Chernikov 	return (FLM_SUCCESS);
418f5baf8bbSAlexander V. Chernikov }
419f5baf8bbSAlexander V. Chernikov 
420f5baf8bbSAlexander V. Chernikov static enum flm_op_result
bsearch4_build(struct bsearch4_data * bd)421f5baf8bbSAlexander V. Chernikov bsearch4_build(struct bsearch4_data *bd)
422f5baf8bbSAlexander V. Chernikov {
423f5baf8bbSAlexander V. Chernikov 	enum flm_op_result ret;
424f5baf8bbSAlexander V. Chernikov 
425f5baf8bbSAlexander V. Chernikov 	struct bsearch4_array prefixes_array = {
426f5baf8bbSAlexander V. Chernikov 		.alloc_items = bd->alloc_items,
427f5baf8bbSAlexander V. Chernikov 		.num_items = bd->num_items,
428f5baf8bbSAlexander V. Chernikov 		.arr = bd->rr,
429f5baf8bbSAlexander V. Chernikov 	};
430f5baf8bbSAlexander V. Chernikov 
431f5baf8bbSAlexander V. Chernikov 	/* Add default route if not exists */
432f5baf8bbSAlexander V. Chernikov 	bool default_found = false;
433f5baf8bbSAlexander V. Chernikov 	for (int i = 0; i < prefixes_array.num_items; i++) {
434f5baf8bbSAlexander V. Chernikov 		if (prefixes_array.arr[i].mask4 == 0) {
435f5baf8bbSAlexander V. Chernikov 			default_found = true;
436f5baf8bbSAlexander V. Chernikov 			break;
437f5baf8bbSAlexander V. Chernikov 		}
438f5baf8bbSAlexander V. Chernikov 	}
439f5baf8bbSAlexander V. Chernikov 	if (!default_found) {
440f5baf8bbSAlexander V. Chernikov 		 /* Add default route with NULL nhop */
441f5baf8bbSAlexander V. Chernikov 		struct bsearch4_record default_entry = {};
442f5baf8bbSAlexander V. Chernikov 		if (!add_array_entry(&prefixes_array, &default_entry))
443f5baf8bbSAlexander V. Chernikov 			 return (FLM_REBUILD);
444f5baf8bbSAlexander V. Chernikov 	}
445f5baf8bbSAlexander V. Chernikov 
446f5baf8bbSAlexander V. Chernikov 	/* Sort prefixes */
447f5baf8bbSAlexander V. Chernikov 	qsort(prefixes_array.arr, prefixes_array.num_items, sizeof(struct bsearch4_record), rr_cmp);
448f5baf8bbSAlexander V. Chernikov 
449f5baf8bbSAlexander V. Chernikov 	struct bsearch4_array dst_array = {
450f5baf8bbSAlexander V. Chernikov 		.alloc_items = bd->alloc_items,
451f5baf8bbSAlexander V. Chernikov 		.arr = bd->br,
452f5baf8bbSAlexander V. Chernikov 	};
453f5baf8bbSAlexander V. Chernikov 
454f5baf8bbSAlexander V. Chernikov 	ret = bsearch4_build_array(&dst_array, &prefixes_array);
455f5baf8bbSAlexander V. Chernikov 	bd->num_items = dst_array.num_items;
456f5baf8bbSAlexander V. Chernikov 
457f5baf8bbSAlexander V. Chernikov 	free(bd->rr, M_TEMP);
458f5baf8bbSAlexander V. Chernikov 	bd->rr = NULL;
459f5baf8bbSAlexander V. Chernikov 	return (ret);
460f5baf8bbSAlexander V. Chernikov }
461f5baf8bbSAlexander V. Chernikov 
462f5baf8bbSAlexander V. Chernikov 
463f5baf8bbSAlexander V. Chernikov static enum flm_op_result
bsearch4_end_dump(void * _data,struct fib_dp * dp)464f5baf8bbSAlexander V. Chernikov bsearch4_end_dump(void *_data, struct fib_dp *dp)
465f5baf8bbSAlexander V. Chernikov {
466f5baf8bbSAlexander V. Chernikov 	struct bsearch4_data *bd = (struct bsearch4_data *)_data;
467f5baf8bbSAlexander V. Chernikov 	enum flm_op_result ret;
468f5baf8bbSAlexander V. Chernikov 
469f5baf8bbSAlexander V. Chernikov 	ret = bsearch4_build(bd);
470f5baf8bbSAlexander V. Chernikov 	if (ret == FLM_SUCCESS) {
471f5baf8bbSAlexander V. Chernikov 		dp->f = bsearch4_lookup;
472f5baf8bbSAlexander V. Chernikov 		dp->arg = bd;
473f5baf8bbSAlexander V. Chernikov 	}
474f5baf8bbSAlexander V. Chernikov 
475f5baf8bbSAlexander V. Chernikov 	return (ret);
476f5baf8bbSAlexander V. Chernikov }
477f5baf8bbSAlexander V. Chernikov 
478f5baf8bbSAlexander V. Chernikov static enum flm_op_result
bsearch4_change_cb(struct rib_head * rnh,struct rib_cmd_info * rc,void * _data)479f5baf8bbSAlexander V. Chernikov bsearch4_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
480f5baf8bbSAlexander V. Chernikov     void *_data)
481f5baf8bbSAlexander V. Chernikov {
482f5baf8bbSAlexander V. Chernikov 
483f5baf8bbSAlexander V. Chernikov 	return (FLM_REBUILD);
484f5baf8bbSAlexander V. Chernikov }
485f5baf8bbSAlexander V. Chernikov 
486f5baf8bbSAlexander V. Chernikov struct fib_lookup_module flm_bsearch4= {
487f5baf8bbSAlexander V. Chernikov 	.flm_name = "bsearch4",
488f5baf8bbSAlexander V. Chernikov 	.flm_family = AF_INET,
489f5baf8bbSAlexander V. Chernikov 	.flm_init_cb = bsearch4_init,
490f5baf8bbSAlexander V. Chernikov 	.flm_destroy_cb = bsearch4_destroy,
491f5baf8bbSAlexander V. Chernikov 	.flm_dump_rib_item_cb = bsearch4_add_route_cb,
492f5baf8bbSAlexander V. Chernikov 	.flm_dump_end_cb = bsearch4_end_dump,
493f5baf8bbSAlexander V. Chernikov 	.flm_change_rib_item_cb = bsearch4_change_cb,
494f5baf8bbSAlexander V. Chernikov 	.flm_get_pref = bsearch4_get_pref,
495f5baf8bbSAlexander V. Chernikov };
496f5baf8bbSAlexander V. Chernikov 
497f5baf8bbSAlexander V. Chernikov /*
498f5baf8bbSAlexander V. Chernikov  * Lockless radix lookup algo.
499f5baf8bbSAlexander V. Chernikov  *
500f5baf8bbSAlexander V. Chernikov  * Compiles immutable radix from the current routing table.
501f5baf8bbSAlexander V. Chernikov  * Used with small amount of routes (<1000).
502f5baf8bbSAlexander V. Chernikov  * As datastructure is immutable, it gets rebuild on each rtable change.
503f5baf8bbSAlexander V. Chernikov  *
504f5baf8bbSAlexander V. Chernikov  * Lookups are slightly faster as shorter lookup keys are used
505f5baf8bbSAlexander V. Chernikov  *  (4 bytes instead of 8 in stock radix).
506f5baf8bbSAlexander V. Chernikov  */
507f5baf8bbSAlexander V. Chernikov 
508f5baf8bbSAlexander V. Chernikov #define KEY_LEN_INET	(offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t))
509f5baf8bbSAlexander V. Chernikov #define OFF_LEN_INET	(8 * offsetof(struct sockaddr_in, sin_addr))
510f5baf8bbSAlexander V. Chernikov struct radix4_addr_entry {
511f5baf8bbSAlexander V. Chernikov 	struct radix_node	rn[2];
512f5baf8bbSAlexander V. Chernikov 	struct sockaddr_in	addr;
513f5baf8bbSAlexander V. Chernikov 	struct nhop_object	*nhop;
514f5baf8bbSAlexander V. Chernikov };
515f5baf8bbSAlexander V. Chernikov #define	LRADIX4_ITEM_SZ	roundup2(sizeof(struct radix4_addr_entry), 64)
516f5baf8bbSAlexander V. Chernikov 
517f5baf8bbSAlexander V. Chernikov struct lradix4_data {
518f5baf8bbSAlexander V. Chernikov 	struct radix_node_head	*rnh;
519f5baf8bbSAlexander V. Chernikov 	struct fib_data		*fd;
520f5baf8bbSAlexander V. Chernikov 	void			*mem;
521f5baf8bbSAlexander V. Chernikov 	char			*rt_base;
522f5baf8bbSAlexander V. Chernikov 	uint32_t		alloc_items;
523f5baf8bbSAlexander V. Chernikov 	uint32_t		num_items;
524f5baf8bbSAlexander V. Chernikov };
525f5baf8bbSAlexander V. Chernikov 
526f5baf8bbSAlexander V. Chernikov static struct nhop_object *
lradix4_lookup(void * algo_data,const struct flm_lookup_key key,uint32_t scopeid)527f5baf8bbSAlexander V. Chernikov lradix4_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
528f5baf8bbSAlexander V. Chernikov {
529f5baf8bbSAlexander V. Chernikov 	struct radix_node_head *rnh = (struct radix_node_head *)algo_data;
530f5baf8bbSAlexander V. Chernikov 	struct radix4_addr_entry *ent;
531f5baf8bbSAlexander V. Chernikov 	struct sockaddr_in addr4 = {
532f5baf8bbSAlexander V. Chernikov 		.sin_len = KEY_LEN_INET,
533f5baf8bbSAlexander V. Chernikov 		.sin_addr = key.addr4,
534f5baf8bbSAlexander V. Chernikov 	};
5352defbe9fSAlexander V. Chernikov 	ent = (struct radix4_addr_entry *)(rn_match(&addr4, &rnh->rh));
536f5baf8bbSAlexander V. Chernikov 	if (ent != NULL)
537f5baf8bbSAlexander V. Chernikov 		return (ent->nhop);
538f5baf8bbSAlexander V. Chernikov 	return (NULL);
539f5baf8bbSAlexander V. Chernikov }
540f5baf8bbSAlexander V. Chernikov 
541f5baf8bbSAlexander V. Chernikov /*
542f5baf8bbSAlexander V. Chernikov  * Preference function.
543f5baf8bbSAlexander V. Chernikov  * Assume close-to-ideal of < 10 routes (though worse than bsearch), then
544f5baf8bbSAlexander V. Chernikov  * gradually degrade until 1000 routes are reached.
545f5baf8bbSAlexander V. Chernikov  */
546f5baf8bbSAlexander V. Chernikov static uint8_t
lradix4_get_pref(const struct rib_rtable_info * rinfo)547f5baf8bbSAlexander V. Chernikov lradix4_get_pref(const struct rib_rtable_info *rinfo)
548f5baf8bbSAlexander V. Chernikov {
549f5baf8bbSAlexander V. Chernikov 
550f5baf8bbSAlexander V. Chernikov 	if (rinfo->num_prefixes < 10)
551f5baf8bbSAlexander V. Chernikov 		return (250);
552f5baf8bbSAlexander V. Chernikov 	else if (rinfo->num_prefixes < 1000)
553f5baf8bbSAlexander V. Chernikov 		return (254 - rinfo->num_prefixes / 4);
554f5baf8bbSAlexander V. Chernikov 	else
555f5baf8bbSAlexander V. Chernikov 		return (1);
556f5baf8bbSAlexander V. Chernikov }
557f5baf8bbSAlexander V. Chernikov 
558f5baf8bbSAlexander V. Chernikov static enum flm_op_result
lradix4_init(uint32_t fibnum,struct fib_data * fd,void * _old_data,void ** _data)559f5baf8bbSAlexander V. Chernikov lradix4_init(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **_data)
560f5baf8bbSAlexander V. Chernikov {
561f5baf8bbSAlexander V. Chernikov 	struct lradix4_data *lr;
562f5baf8bbSAlexander V. Chernikov 	struct rib_rtable_info rinfo;
563f5baf8bbSAlexander V. Chernikov 	uint32_t count;
564f5baf8bbSAlexander V. Chernikov 	size_t sz;
565f5baf8bbSAlexander V. Chernikov 
566f5baf8bbSAlexander V. Chernikov 	lr = malloc(sizeof(struct lradix4_data), M_RTABLE, M_NOWAIT | M_ZERO);
567f5baf8bbSAlexander V. Chernikov 	if (lr == NULL || !rn_inithead((void **)&lr->rnh, OFF_LEN_INET))
568f5baf8bbSAlexander V. Chernikov 		return (FLM_REBUILD);
569f5baf8bbSAlexander V. Chernikov 	fib_get_rtable_info(fib_get_rh(fd), &rinfo);
570f5baf8bbSAlexander V. Chernikov 
571f5baf8bbSAlexander V. Chernikov 	count = rinfo.num_prefixes * 11 / 10;
572f5baf8bbSAlexander V. Chernikov 	sz = count * LRADIX4_ITEM_SZ + CACHE_LINE_SIZE;
573f5baf8bbSAlexander V. Chernikov 	lr->mem = malloc(sz, M_RTABLE, M_NOWAIT | M_ZERO);
574f5baf8bbSAlexander V. Chernikov 	if (lr->mem == NULL)
575f5baf8bbSAlexander V. Chernikov 		return (FLM_REBUILD);
576f5baf8bbSAlexander V. Chernikov 	/* Align all rtentries to a cacheline boundary */
577f5baf8bbSAlexander V. Chernikov 	lr->rt_base = (char *)roundup2((uintptr_t)lr->mem, CACHE_LINE_SIZE);
578f5baf8bbSAlexander V. Chernikov 	lr->alloc_items = count;
579f5baf8bbSAlexander V. Chernikov 	lr->fd = fd;
580f5baf8bbSAlexander V. Chernikov 
581f5baf8bbSAlexander V. Chernikov 	*_data = lr;
582f5baf8bbSAlexander V. Chernikov 
583f5baf8bbSAlexander V. Chernikov 	return (FLM_SUCCESS);
584f5baf8bbSAlexander V. Chernikov }
585f5baf8bbSAlexander V. Chernikov 
586f5baf8bbSAlexander V. Chernikov static void
lradix4_destroy(void * _data)587f5baf8bbSAlexander V. Chernikov lradix4_destroy(void *_data)
588f5baf8bbSAlexander V. Chernikov {
589f5baf8bbSAlexander V. Chernikov 	struct lradix4_data *lr = (struct lradix4_data *)_data;
590f5baf8bbSAlexander V. Chernikov 
591f5baf8bbSAlexander V. Chernikov 	if (lr->rnh != NULL)
592f5baf8bbSAlexander V. Chernikov 		rn_detachhead((void **)&lr->rnh);
593f5baf8bbSAlexander V. Chernikov 	if (lr->mem != NULL)
594f5baf8bbSAlexander V. Chernikov 		free(lr->mem, M_RTABLE);
595f5baf8bbSAlexander V. Chernikov 	free(lr, M_RTABLE);
596f5baf8bbSAlexander V. Chernikov }
597f5baf8bbSAlexander V. Chernikov 
598f5baf8bbSAlexander V. Chernikov static enum flm_op_result
lradix4_add_route_cb(struct rtentry * rt,void * _data)599f5baf8bbSAlexander V. Chernikov lradix4_add_route_cb(struct rtentry *rt, void *_data)
600f5baf8bbSAlexander V. Chernikov {
601f5baf8bbSAlexander V. Chernikov 	struct lradix4_data *lr = (struct lradix4_data *)_data;
602f5baf8bbSAlexander V. Chernikov 	struct radix4_addr_entry *ae;
603f5baf8bbSAlexander V. Chernikov 	struct sockaddr_in mask;
604f733d970SAlexander V. Chernikov 	struct sockaddr *rt_mask;
605f5baf8bbSAlexander V. Chernikov 	struct radix_node *rn;
606f5baf8bbSAlexander V. Chernikov 	struct in_addr addr4, mask4;
607f5baf8bbSAlexander V. Chernikov 	uint32_t scopeid;
608f5baf8bbSAlexander V. Chernikov 
609f5baf8bbSAlexander V. Chernikov 	if (lr->num_items >= lr->alloc_items)
610f5baf8bbSAlexander V. Chernikov 		return (FLM_REBUILD);
611f5baf8bbSAlexander V. Chernikov 
612f5baf8bbSAlexander V. Chernikov 	ae = (struct radix4_addr_entry *)(lr->rt_base + lr->num_items * LRADIX4_ITEM_SZ);
613f5baf8bbSAlexander V. Chernikov 	lr->num_items++;
614f5baf8bbSAlexander V. Chernikov 
615f5baf8bbSAlexander V. Chernikov 	ae->nhop = rt_get_raw_nhop(rt);
616f5baf8bbSAlexander V. Chernikov 
617f5baf8bbSAlexander V. Chernikov 	rt_get_inet_prefix_pmask(rt, &addr4, &mask4, &scopeid);
618f5baf8bbSAlexander V. Chernikov 	ae->addr.sin_len = KEY_LEN_INET;
619f5baf8bbSAlexander V. Chernikov 	ae->addr.sin_addr = addr4;
620f5baf8bbSAlexander V. Chernikov 
621f733d970SAlexander V. Chernikov 	if (mask4.s_addr != INADDR_BROADCAST) {
622f5baf8bbSAlexander V. Chernikov 		bzero(&mask, sizeof(mask));
623f5baf8bbSAlexander V. Chernikov 		mask.sin_len = KEY_LEN_INET;
624f5baf8bbSAlexander V. Chernikov 		mask.sin_addr = mask4;
625f5baf8bbSAlexander V. Chernikov 		rt_mask = (struct sockaddr *)&mask;
626f733d970SAlexander V. Chernikov 	} else
627f733d970SAlexander V. Chernikov 		rt_mask = NULL;
628f5baf8bbSAlexander V. Chernikov 
629f5baf8bbSAlexander V. Chernikov 	rn = lr->rnh->rnh_addaddr((struct sockaddr *)&ae->addr, rt_mask,
630f5baf8bbSAlexander V. Chernikov 	    &lr->rnh->rh, ae->rn);
631f5baf8bbSAlexander V. Chernikov 	if (rn == NULL)
632f5baf8bbSAlexander V. Chernikov 		return (FLM_REBUILD);
633f5baf8bbSAlexander V. Chernikov 
634f5baf8bbSAlexander V. Chernikov 	return (FLM_SUCCESS);
635f5baf8bbSAlexander V. Chernikov }
636f5baf8bbSAlexander V. Chernikov 
637f5baf8bbSAlexander V. Chernikov static enum flm_op_result
lradix4_end_dump(void * _data,struct fib_dp * dp)638f5baf8bbSAlexander V. Chernikov lradix4_end_dump(void *_data, struct fib_dp *dp)
639f5baf8bbSAlexander V. Chernikov {
640f5baf8bbSAlexander V. Chernikov 	struct lradix4_data *lr = (struct lradix4_data *)_data;
641f5baf8bbSAlexander V. Chernikov 
642f5baf8bbSAlexander V. Chernikov 	dp->f = lradix4_lookup;
643f5baf8bbSAlexander V. Chernikov 	dp->arg = lr->rnh;
644f5baf8bbSAlexander V. Chernikov 
645f5baf8bbSAlexander V. Chernikov 	return (FLM_SUCCESS);
646f5baf8bbSAlexander V. Chernikov }
647f5baf8bbSAlexander V. Chernikov 
648f5baf8bbSAlexander V. Chernikov static enum flm_op_result
lradix4_change_cb(struct rib_head * rnh,struct rib_cmd_info * rc,void * _data)649f5baf8bbSAlexander V. Chernikov lradix4_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
650f5baf8bbSAlexander V. Chernikov     void *_data)
651f5baf8bbSAlexander V. Chernikov {
652f5baf8bbSAlexander V. Chernikov 
653f5baf8bbSAlexander V. Chernikov 	return (FLM_REBUILD);
654f5baf8bbSAlexander V. Chernikov }
655f5baf8bbSAlexander V. Chernikov 
656f5baf8bbSAlexander V. Chernikov struct fib_lookup_module flm_radix4_lockless = {
657f5baf8bbSAlexander V. Chernikov 	.flm_name = "radix4_lockless",
658f5baf8bbSAlexander V. Chernikov 	.flm_family = AF_INET,
659f5baf8bbSAlexander V. Chernikov 	.flm_init_cb = lradix4_init,
660f5baf8bbSAlexander V. Chernikov 	.flm_destroy_cb = lradix4_destroy,
661f5baf8bbSAlexander V. Chernikov 	.flm_dump_rib_item_cb = lradix4_add_route_cb,
662f5baf8bbSAlexander V. Chernikov 	.flm_dump_end_cb = lradix4_end_dump,
663f5baf8bbSAlexander V. Chernikov 	.flm_change_rib_item_cb = lradix4_change_cb,
664f5baf8bbSAlexander V. Chernikov 	.flm_get_pref = lradix4_get_pref,
665f5baf8bbSAlexander V. Chernikov };
666f5baf8bbSAlexander V. Chernikov 
667f5baf8bbSAlexander V. Chernikov /*
668f5baf8bbSAlexander V. Chernikov  * Fallback lookup algorithm.
669f5baf8bbSAlexander V. Chernikov  * This is a simple wrapper around system radix.
670f5baf8bbSAlexander V. Chernikov  */
671f5baf8bbSAlexander V. Chernikov 
672f5baf8bbSAlexander V. Chernikov struct radix4_data {
673f5baf8bbSAlexander V. Chernikov 	struct fib_data *fd;
674f5baf8bbSAlexander V. Chernikov 	struct rib_head *rh;
675f5baf8bbSAlexander V. Chernikov };
676f5baf8bbSAlexander V. Chernikov 
677f5baf8bbSAlexander V. Chernikov static struct nhop_object *
radix4_lookup(void * algo_data,const struct flm_lookup_key key,uint32_t scopeid)678f5baf8bbSAlexander V. Chernikov radix4_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
679f5baf8bbSAlexander V. Chernikov {
680f5baf8bbSAlexander V. Chernikov 	RIB_RLOCK_TRACKER;
681f5baf8bbSAlexander V. Chernikov 	struct rib_head *rh = (struct rib_head *)algo_data;
682f5baf8bbSAlexander V. Chernikov 	struct radix_node *rn;
683f5baf8bbSAlexander V. Chernikov 	struct nhop_object *nh;
684f5baf8bbSAlexander V. Chernikov 
685f5baf8bbSAlexander V. Chernikov 	/* Prepare lookup key */
686f5baf8bbSAlexander V. Chernikov 	struct sockaddr_in sin4 = {
687f5baf8bbSAlexander V. Chernikov 		.sin_family = AF_INET,
688f5baf8bbSAlexander V. Chernikov 		.sin_len = sizeof(struct sockaddr_in),
689f5baf8bbSAlexander V. Chernikov 		.sin_addr = key.addr4,
690f5baf8bbSAlexander V. Chernikov 	};
691f5baf8bbSAlexander V. Chernikov 
692f5baf8bbSAlexander V. Chernikov 	nh = NULL;
693f5baf8bbSAlexander V. Chernikov 	RIB_RLOCK(rh);
6942defbe9fSAlexander V. Chernikov 	rn = rn_match((void *)&sin4, &rh->head);
695f5baf8bbSAlexander V. Chernikov 	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0))
696f5baf8bbSAlexander V. Chernikov 		nh = (RNTORT(rn))->rt_nhop;
697f5baf8bbSAlexander V. Chernikov 	RIB_RUNLOCK(rh);
698f5baf8bbSAlexander V. Chernikov 
699f5baf8bbSAlexander V. Chernikov 	return (nh);
700f5baf8bbSAlexander V. Chernikov }
701f5baf8bbSAlexander V. Chernikov 
702f5baf8bbSAlexander V. Chernikov static uint8_t
radix4_get_pref(const struct rib_rtable_info * rinfo)703f5baf8bbSAlexander V. Chernikov radix4_get_pref(const struct rib_rtable_info *rinfo)
704f5baf8bbSAlexander V. Chernikov {
705f5baf8bbSAlexander V. Chernikov 
706f5baf8bbSAlexander V. Chernikov 	return (50);
707f5baf8bbSAlexander V. Chernikov }
708f5baf8bbSAlexander V. Chernikov 
709f5baf8bbSAlexander V. Chernikov static enum flm_op_result
radix4_init(uint32_t fibnum,struct fib_data * fd,void * _old_data,void ** _data)710f5baf8bbSAlexander V. Chernikov radix4_init(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **_data)
711f5baf8bbSAlexander V. Chernikov {
712f5baf8bbSAlexander V. Chernikov 	struct radix4_data *r4;
713f5baf8bbSAlexander V. Chernikov 
714f5baf8bbSAlexander V. Chernikov 	r4 = malloc(sizeof(struct radix4_data), M_RTABLE, M_NOWAIT | M_ZERO);
715f5baf8bbSAlexander V. Chernikov 	if (r4 == NULL)
716f5baf8bbSAlexander V. Chernikov 		return (FLM_REBUILD);
717f5baf8bbSAlexander V. Chernikov 	r4->fd = fd;
718f5baf8bbSAlexander V. Chernikov 	r4->rh = fib_get_rh(fd);
719f5baf8bbSAlexander V. Chernikov 
720f5baf8bbSAlexander V. Chernikov 	*_data = r4;
721f5baf8bbSAlexander V. Chernikov 
722f5baf8bbSAlexander V. Chernikov 	return (FLM_SUCCESS);
723f5baf8bbSAlexander V. Chernikov }
724f5baf8bbSAlexander V. Chernikov 
725f5baf8bbSAlexander V. Chernikov static void
radix4_destroy(void * _data)726f5baf8bbSAlexander V. Chernikov radix4_destroy(void *_data)
727f5baf8bbSAlexander V. Chernikov {
728f5baf8bbSAlexander V. Chernikov 
729f5baf8bbSAlexander V. Chernikov 	free(_data, M_RTABLE);
730f5baf8bbSAlexander V. Chernikov }
731f5baf8bbSAlexander V. Chernikov 
732f5baf8bbSAlexander V. Chernikov static enum flm_op_result
radix4_add_route_cb(struct rtentry * rt,void * _data)733f5baf8bbSAlexander V. Chernikov radix4_add_route_cb(struct rtentry *rt, void *_data)
734f5baf8bbSAlexander V. Chernikov {
735f5baf8bbSAlexander V. Chernikov 
736f5baf8bbSAlexander V. Chernikov 	return (FLM_SUCCESS);
737f5baf8bbSAlexander V. Chernikov }
738f5baf8bbSAlexander V. Chernikov 
739f5baf8bbSAlexander V. Chernikov static enum flm_op_result
radix4_end_dump(void * _data,struct fib_dp * dp)740f5baf8bbSAlexander V. Chernikov radix4_end_dump(void *_data, struct fib_dp *dp)
741f5baf8bbSAlexander V. Chernikov {
742f5baf8bbSAlexander V. Chernikov 	struct radix4_data *r4 = (struct radix4_data *)_data;
743f5baf8bbSAlexander V. Chernikov 
744f5baf8bbSAlexander V. Chernikov 	dp->f = radix4_lookup;
745f5baf8bbSAlexander V. Chernikov 	dp->arg = r4->rh;
746f5baf8bbSAlexander V. Chernikov 
747f5baf8bbSAlexander V. Chernikov 	return (FLM_SUCCESS);
748f5baf8bbSAlexander V. Chernikov }
749f5baf8bbSAlexander V. Chernikov 
750f5baf8bbSAlexander V. Chernikov static enum flm_op_result
radix4_change_cb(struct rib_head * rnh,struct rib_cmd_info * rc,void * _data)751f5baf8bbSAlexander V. Chernikov radix4_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
752f5baf8bbSAlexander V. Chernikov     void *_data)
753f5baf8bbSAlexander V. Chernikov {
754f5baf8bbSAlexander V. Chernikov 
755f5baf8bbSAlexander V. Chernikov 	return (FLM_SUCCESS);
756f5baf8bbSAlexander V. Chernikov }
757f5baf8bbSAlexander V. Chernikov 
758f5baf8bbSAlexander V. Chernikov struct fib_lookup_module flm_radix4 = {
759f5baf8bbSAlexander V. Chernikov 	.flm_name = "radix4",
760f5baf8bbSAlexander V. Chernikov 	.flm_family = AF_INET,
761f5baf8bbSAlexander V. Chernikov 	.flm_init_cb = radix4_init,
762f5baf8bbSAlexander V. Chernikov 	.flm_destroy_cb = radix4_destroy,
763f5baf8bbSAlexander V. Chernikov 	.flm_dump_rib_item_cb = radix4_add_route_cb,
764f5baf8bbSAlexander V. Chernikov 	.flm_dump_end_cb = radix4_end_dump,
765f5baf8bbSAlexander V. Chernikov 	.flm_change_rib_item_cb = radix4_change_cb,
766f5baf8bbSAlexander V. Chernikov 	.flm_get_pref = radix4_get_pref,
767f5baf8bbSAlexander V. Chernikov };
768f5baf8bbSAlexander V. Chernikov 
769f5baf8bbSAlexander V. Chernikov static void
fib4_algo_init(void)770f5baf8bbSAlexander V. Chernikov fib4_algo_init(void)
771f5baf8bbSAlexander V. Chernikov {
772f5baf8bbSAlexander V. Chernikov 
773f5baf8bbSAlexander V. Chernikov 	fib_module_register(&flm_bsearch4);
774f5baf8bbSAlexander V. Chernikov 	fib_module_register(&flm_radix4_lockless);
775f5baf8bbSAlexander V. Chernikov 	fib_module_register(&flm_radix4);
776f5baf8bbSAlexander V. Chernikov }
777f5baf8bbSAlexander V. Chernikov SYSINIT(fib4_algo_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, fib4_algo_init, NULL);
778