xref: /freebsd/sys/netinet/in_fib_dxr.c (revision 42b3c16e3051fdc04acc6276dd5d97dbe80ae7eb)
12aca58e1SMarko Zec /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
32aca58e1SMarko Zec  *
4*42b3c16eSMarko Zec  * Copyright (c) 2012-2024 Marko Zec
52aca58e1SMarko Zec  * Copyright (c) 2005, 2018 University of Zagreb
62aca58e1SMarko Zec  * Copyright (c) 2005 International Computer Science Institute
72aca58e1SMarko Zec  *
82aca58e1SMarko Zec  * Redistribution and use in source and binary forms, with or without
92aca58e1SMarko Zec  * modification, are permitted provided that the following conditions
102aca58e1SMarko Zec  * are met:
112aca58e1SMarko Zec  * 1. Redistributions of source code must retain the above copyright
122aca58e1SMarko Zec  *    notice, this list of conditions and the following disclaimer.
132aca58e1SMarko Zec  * 2. Redistributions in binary form must reproduce the above copyright
142aca58e1SMarko Zec  *    notice, this list of conditions and the following disclaimer in the
152aca58e1SMarko Zec  *    documentation and/or other materials provided with the distribution.
162aca58e1SMarko Zec  *
172aca58e1SMarko Zec  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
182aca58e1SMarko Zec  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
192aca58e1SMarko Zec  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
202aca58e1SMarko Zec  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
212aca58e1SMarko Zec  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
222aca58e1SMarko Zec  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
232aca58e1SMarko Zec  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
242aca58e1SMarko Zec  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
252aca58e1SMarko Zec  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
262aca58e1SMarko Zec  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
272aca58e1SMarko Zec  * SUCH DAMAGE.
282aca58e1SMarko Zec  */
292aca58e1SMarko Zec 
302aca58e1SMarko Zec /*
312aca58e1SMarko Zec  * An implementation of DXR, a simple IPv4 LPM scheme with compact lookup
322aca58e1SMarko Zec  * structures and a trivial search procedure.  More significant bits of
332aca58e1SMarko Zec  * the search key are used to directly index a two-stage trie, while the
342aca58e1SMarko Zec  * remaining bits are used for finding the next hop in a sorted array.
352aca58e1SMarko Zec  * More details in:
362aca58e1SMarko Zec  *
372aca58e1SMarko Zec  * M. Zec, L. Rizzo, M. Mikuc, DXR: towards a billion routing lookups per
382aca58e1SMarko Zec  * second in software, ACM SIGCOMM Computer Communication Review, September
392aca58e1SMarko Zec  * 2012
402aca58e1SMarko Zec  *
412aca58e1SMarko Zec  * M. Zec, M. Mikuc, Pushing the envelope: beyond two billion IP routing
422aca58e1SMarko Zec  * lookups per second on commodity CPUs, IEEE SoftCOM, September 2017, Split
432aca58e1SMarko Zec  */
442aca58e1SMarko Zec 
452aca58e1SMarko Zec #include <sys/cdefs.h>
462aca58e1SMarko Zec #include "opt_inet.h"
472aca58e1SMarko Zec 
482aca58e1SMarko Zec #include <sys/param.h>
492aca58e1SMarko Zec #include <sys/kernel.h>
501549575fSMarko Zec #include <sys/ctype.h>
512aca58e1SMarko Zec #include <sys/epoch.h>
522aca58e1SMarko Zec #include <sys/malloc.h>
532aca58e1SMarko Zec #include <sys/module.h>
542aca58e1SMarko Zec #include <sys/socket.h>
552aca58e1SMarko Zec #include <sys/sysctl.h>
562aca58e1SMarko Zec #include <sys/syslog.h>
572aca58e1SMarko Zec 
582aca58e1SMarko Zec #include <vm/uma.h>
592aca58e1SMarko Zec 
602aca58e1SMarko Zec #include <netinet/in.h>
612aca58e1SMarko Zec #include <netinet/in_fib.h>
622aca58e1SMarko Zec 
632aca58e1SMarko Zec #include <net/route.h>
642aca58e1SMarko Zec #include <net/route/route_ctl.h>
652aca58e1SMarko Zec #include <net/route/fib_algo.h>
662aca58e1SMarko Zec 
672aca58e1SMarko Zec #define	DXR_TRIE_BITS		20
682aca58e1SMarko Zec 
692aca58e1SMarko Zec CTASSERT(DXR_TRIE_BITS >= 16 && DXR_TRIE_BITS <= 24);
702aca58e1SMarko Zec 
712aca58e1SMarko Zec #if DXR_TRIE_BITS > 16
722aca58e1SMarko Zec #define	DXR_D			16
732aca58e1SMarko Zec #else
742aca58e1SMarko Zec #define	DXR_D			(DXR_TRIE_BITS - 1)
752aca58e1SMarko Zec #endif
762aca58e1SMarko Zec 
772aca58e1SMarko Zec #define	D_TBL_SIZE		(1 << DXR_D)
782aca58e1SMarko Zec #define	DIRECT_TBL_SIZE		(1 << DXR_TRIE_BITS)
792aca58e1SMarko Zec #define	DXR_RANGE_MASK		(0xffffffffU >> DXR_TRIE_BITS)
802aca58e1SMarko Zec #define	DXR_RANGE_SHIFT		(32 - DXR_TRIE_BITS)
812aca58e1SMarko Zec 
822aca58e1SMarko Zec #define	DESC_BASE_BITS		22
832aca58e1SMarko Zec #define	DESC_FRAGMENTS_BITS	(32 - DESC_BASE_BITS)
842aca58e1SMarko Zec #define	BASE_MAX		((1 << DESC_BASE_BITS) - 1)
852aca58e1SMarko Zec #define	RTBL_SIZE_INCR		(BASE_MAX / 64)
862aca58e1SMarko Zec 
872aca58e1SMarko Zec #if DXR_TRIE_BITS < 24
882aca58e1SMarko Zec #define	FRAGS_MASK_SHORT	((1 << (23 - DXR_TRIE_BITS)) - 1)
892aca58e1SMarko Zec #else
902aca58e1SMarko Zec #define	FRAGS_MASK_SHORT	0
912aca58e1SMarko Zec #endif
922aca58e1SMarko Zec #define	FRAGS_PREF_SHORT	(((1 << DESC_FRAGMENTS_BITS) - 1) & \
932aca58e1SMarko Zec 				 ~FRAGS_MASK_SHORT)
942aca58e1SMarko Zec #define	FRAGS_MARK_XL		(FRAGS_PREF_SHORT - 1)
952aca58e1SMarko Zec #define	FRAGS_MARK_HIT		(FRAGS_PREF_SHORT - 2)
962aca58e1SMarko Zec 
972aca58e1SMarko Zec #define	IS_SHORT_FORMAT(x)	((x & FRAGS_PREF_SHORT) == FRAGS_PREF_SHORT)
982aca58e1SMarko Zec #define	IS_LONG_FORMAT(x)	((x & FRAGS_PREF_SHORT) != FRAGS_PREF_SHORT)
992aca58e1SMarko Zec #define	IS_XL_FORMAT(x)		(x == FRAGS_MARK_XL)
1002aca58e1SMarko Zec 
1012aca58e1SMarko Zec #define	RE_SHORT_MAX_NH		((1 << (DXR_TRIE_BITS - 8)) - 1)
1022aca58e1SMarko Zec 
1032aca58e1SMarko Zec #define	CHUNK_HASH_BITS		16
1042aca58e1SMarko Zec #define	CHUNK_HASH_SIZE		(1 << CHUNK_HASH_BITS)
1052aca58e1SMarko Zec #define	CHUNK_HASH_MASK		(CHUNK_HASH_SIZE - 1)
1062aca58e1SMarko Zec 
1072aca58e1SMarko Zec #define	TRIE_HASH_BITS		16
1082aca58e1SMarko Zec #define	TRIE_HASH_SIZE		(1 << TRIE_HASH_BITS)
1092aca58e1SMarko Zec #define	TRIE_HASH_MASK		(TRIE_HASH_SIZE - 1)
1102aca58e1SMarko Zec 
1112aca58e1SMarko Zec #define	XTBL_SIZE_INCR		(DIRECT_TBL_SIZE / 16)
1122aca58e1SMarko Zec 
11343880c51SMarko Zec #define	UNUSED_BUCKETS		8
11443880c51SMarko Zec 
1152aca58e1SMarko Zec /* Lookup structure elements */
1162aca58e1SMarko Zec 
1172aca58e1SMarko Zec struct direct_entry {
1182aca58e1SMarko Zec 	uint32_t		fragments: DESC_FRAGMENTS_BITS,
1192aca58e1SMarko Zec 				base: DESC_BASE_BITS;
1202aca58e1SMarko Zec };
1212aca58e1SMarko Zec 
1222aca58e1SMarko Zec struct range_entry_long {
1232aca58e1SMarko Zec 	uint32_t		start: DXR_RANGE_SHIFT,
1242aca58e1SMarko Zec 				nexthop: DXR_TRIE_BITS;
1252aca58e1SMarko Zec };
1262aca58e1SMarko Zec 
1272aca58e1SMarko Zec #if DXR_TRIE_BITS < 24
1282aca58e1SMarko Zec struct range_entry_short {
1292aca58e1SMarko Zec 	uint16_t		start: DXR_RANGE_SHIFT - 8,
1302aca58e1SMarko Zec 				nexthop: DXR_TRIE_BITS - 8;
1312aca58e1SMarko Zec };
1322aca58e1SMarko Zec #endif
1332aca58e1SMarko Zec 
1342aca58e1SMarko Zec /* Auxiliary structures */
1352aca58e1SMarko Zec 
1362aca58e1SMarko Zec struct heap_entry {
1372aca58e1SMarko Zec 	uint32_t		start;
1382aca58e1SMarko Zec 	uint32_t		end;
1392aca58e1SMarko Zec 	uint32_t		preflen;
1402aca58e1SMarko Zec 	uint32_t		nexthop;
1412aca58e1SMarko Zec };
1422aca58e1SMarko Zec 
1432aca58e1SMarko Zec struct chunk_desc {
1442aca58e1SMarko Zec 	LIST_ENTRY(chunk_desc)	cd_all_le;
1452aca58e1SMarko Zec 	LIST_ENTRY(chunk_desc)	cd_hash_le;
1462aca58e1SMarko Zec 	uint32_t		cd_hash;
1472aca58e1SMarko Zec 	uint32_t		cd_refcnt;
1482aca58e1SMarko Zec 	uint32_t		cd_base;
1492aca58e1SMarko Zec 	uint32_t		cd_cur_size;
1502aca58e1SMarko Zec 	uint32_t		cd_max_size;
1512aca58e1SMarko Zec };
1522aca58e1SMarko Zec 
1532aca58e1SMarko Zec struct trie_desc {
1542aca58e1SMarko Zec 	LIST_ENTRY(trie_desc)	td_all_le;
1552aca58e1SMarko Zec 	LIST_ENTRY(trie_desc)	td_hash_le;
1562aca58e1SMarko Zec 	uint32_t		td_hash;
1572aca58e1SMarko Zec 	uint32_t		td_index;
1582aca58e1SMarko Zec 	uint32_t		td_refcnt;
1592aca58e1SMarko Zec };
1602aca58e1SMarko Zec 
1612aca58e1SMarko Zec struct dxr_aux {
1622aca58e1SMarko Zec 	/* Glue to external state */
1632aca58e1SMarko Zec 	struct fib_data		*fd;
1642aca58e1SMarko Zec 	uint32_t		fibnum;
1652aca58e1SMarko Zec 	int			refcnt;
1662aca58e1SMarko Zec 
1672aca58e1SMarko Zec 	/* Auxiliary build-time tables */
1682aca58e1SMarko Zec 	struct direct_entry	direct_tbl[DIRECT_TBL_SIZE];
1692aca58e1SMarko Zec 	uint16_t		d_tbl[D_TBL_SIZE];
1702aca58e1SMarko Zec 	struct direct_entry	*x_tbl;
1712aca58e1SMarko Zec 	union {
1722aca58e1SMarko Zec 		struct range_entry_long	re;
1732aca58e1SMarko Zec 		uint32_t	fragments;
1742aca58e1SMarko Zec 	}			*range_tbl;
1752aca58e1SMarko Zec 
1762aca58e1SMarko Zec 	/* Auxiliary internal state */
1772aca58e1SMarko Zec 	uint32_t		updates_mask[DIRECT_TBL_SIZE / 32];
1782aca58e1SMarko Zec 	struct trie_desc	*trietbl[D_TBL_SIZE];
1792aca58e1SMarko Zec 	LIST_HEAD(, chunk_desc)	chunk_hashtbl[CHUNK_HASH_SIZE];
1802aca58e1SMarko Zec 	LIST_HEAD(, chunk_desc)	all_chunks;
18143880c51SMarko Zec 	LIST_HEAD(, chunk_desc) unused_chunks[UNUSED_BUCKETS];
1822aca58e1SMarko Zec 	LIST_HEAD(, trie_desc)	trie_hashtbl[TRIE_HASH_SIZE];
1832aca58e1SMarko Zec 	LIST_HEAD(, trie_desc)	all_trie;
1842aca58e1SMarko Zec 	LIST_HEAD(, trie_desc)	unused_trie; /* abuses hash link entry */
1852aca58e1SMarko Zec 	struct sockaddr_in	dst;
1862aca58e1SMarko Zec 	struct sockaddr_in	mask;
1872aca58e1SMarko Zec 	struct heap_entry	heap[33];
1882aca58e1SMarko Zec 	uint32_t		prefixes;
1892aca58e1SMarko Zec 	uint32_t		updates_low;
1902aca58e1SMarko Zec 	uint32_t		updates_high;
1911549575fSMarko Zec 	uint32_t		unused_chunks_size;
1922aca58e1SMarko Zec 	uint32_t		xtbl_size;
1932aca58e1SMarko Zec 	uint32_t		all_trie_cnt;
1942aca58e1SMarko Zec 	uint32_t		unused_trie_cnt;
1952aca58e1SMarko Zec 	uint32_t		trie_rebuilt_prefixes;
1962aca58e1SMarko Zec 	uint32_t		heap_index;
1972aca58e1SMarko Zec 	uint32_t		d_bits;
1982aca58e1SMarko Zec 	uint32_t		rtbl_size;
1992aca58e1SMarko Zec 	uint32_t		rtbl_top;
2002aca58e1SMarko Zec 	uint32_t		rtbl_work_frags;
2012aca58e1SMarko Zec 	uint32_t		work_chunk;
2022aca58e1SMarko Zec };
2032aca58e1SMarko Zec 
2042aca58e1SMarko Zec /* Main lookup structure container */
2052aca58e1SMarko Zec 
2062aca58e1SMarko Zec struct dxr {
2072aca58e1SMarko Zec 	/* Lookup tables */
2082aca58e1SMarko Zec 	void			*d;
2092aca58e1SMarko Zec 	void			*x;
2102aca58e1SMarko Zec 	void			*r;
2112aca58e1SMarko Zec 	struct nhop_object	**nh_tbl;
2122aca58e1SMarko Zec 
2132aca58e1SMarko Zec 	/* Glue to external state */
2142aca58e1SMarko Zec 	struct dxr_aux		*aux;
2152aca58e1SMarko Zec 	struct fib_data		*fd;
2162aca58e1SMarko Zec 	struct epoch_context	epoch_ctx;
2172aca58e1SMarko Zec 	uint32_t		fibnum;
218e7abe200SMarko Zec 	uint16_t		d_shift;
219e7abe200SMarko Zec 	uint16_t		x_shift;
220e7abe200SMarko Zec 	uint32_t		x_mask;
2212aca58e1SMarko Zec };
2222aca58e1SMarko Zec 
2232aca58e1SMarko Zec static MALLOC_DEFINE(M_DXRLPM, "dxr", "DXR LPM");
2242aca58e1SMarko Zec static MALLOC_DEFINE(M_DXRAUX, "dxr aux", "DXR auxiliary");
2252aca58e1SMarko Zec 
2262aca58e1SMarko Zec uma_zone_t chunk_zone;
2272aca58e1SMarko Zec uma_zone_t trie_zone;
2282aca58e1SMarko Zec 
2291549575fSMarko Zec VNET_DEFINE_STATIC(int, frag_limit) = 100;
2301549575fSMarko Zec #define	V_frag_limit	VNET(frag_limit)
2311549575fSMarko Zec 
2322aca58e1SMarko Zec /* Binary search for a matching address range */
2332aca58e1SMarko Zec #define	DXR_LOOKUP_STAGE					\
2342aca58e1SMarko Zec 	if (masked_dst < range[middle].start) {			\
2352aca58e1SMarko Zec 		upperbound = middle;				\
2362aca58e1SMarko Zec 		middle = (middle + lowerbound) / 2;		\
2372aca58e1SMarko Zec 	} else if (masked_dst < range[middle + 1].start)	\
2382aca58e1SMarko Zec 		return (range[middle].nexthop);			\
2392aca58e1SMarko Zec 	else {							\
2402aca58e1SMarko Zec 		lowerbound = middle + 1;			\
2412aca58e1SMarko Zec 		middle = (upperbound + middle + 1) / 2;		\
2422aca58e1SMarko Zec 	}							\
2432aca58e1SMarko Zec 	if (upperbound == lowerbound)				\
2442aca58e1SMarko Zec 		return (range[lowerbound].nexthop);
2452aca58e1SMarko Zec 
2462aca58e1SMarko Zec static int
247e7abe200SMarko Zec range_lookup(struct range_entry_long *rt, struct direct_entry de, uint32_t dst)
2482aca58e1SMarko Zec {
2492aca58e1SMarko Zec 	uint32_t base;
2502aca58e1SMarko Zec 	uint32_t upperbound;
2512aca58e1SMarko Zec 	uint32_t middle;
2522aca58e1SMarko Zec 	uint32_t lowerbound;
2532aca58e1SMarko Zec 	uint32_t masked_dst;
2542aca58e1SMarko Zec 
2552aca58e1SMarko Zec 	base = de.base;
2562aca58e1SMarko Zec 	lowerbound = 0;
2572aca58e1SMarko Zec 	masked_dst = dst & DXR_RANGE_MASK;
2582aca58e1SMarko Zec 
2592aca58e1SMarko Zec #if DXR_TRIE_BITS < 24
2602aca58e1SMarko Zec 	if (__predict_true(IS_SHORT_FORMAT(de.fragments))) {
2612aca58e1SMarko Zec 		upperbound = de.fragments & FRAGS_MASK_SHORT;
2622aca58e1SMarko Zec 		struct range_entry_short *range =
2632aca58e1SMarko Zec 		    (struct range_entry_short *) &rt[base];
2642aca58e1SMarko Zec 
2652aca58e1SMarko Zec 		masked_dst >>= 8;
2662aca58e1SMarko Zec 		middle = upperbound;
2672aca58e1SMarko Zec 		upperbound = upperbound * 2 + 1;
2682aca58e1SMarko Zec 
2692aca58e1SMarko Zec 		for (;;) {
2702aca58e1SMarko Zec 			DXR_LOOKUP_STAGE
2712aca58e1SMarko Zec 			DXR_LOOKUP_STAGE
2722aca58e1SMarko Zec 		}
2732aca58e1SMarko Zec 	}
2742aca58e1SMarko Zec #endif
2752aca58e1SMarko Zec 
2762aca58e1SMarko Zec 	upperbound = de.fragments;
2772aca58e1SMarko Zec 	middle = upperbound / 2;
2782aca58e1SMarko Zec 	struct range_entry_long *range = &rt[base];
2792aca58e1SMarko Zec 	if (__predict_false(IS_XL_FORMAT(de.fragments))) {
2802aca58e1SMarko Zec 		upperbound = *((uint32_t *) range);
2812aca58e1SMarko Zec 		range++;
2822aca58e1SMarko Zec 		middle = upperbound / 2;
2832aca58e1SMarko Zec 	}
2842aca58e1SMarko Zec 
2852aca58e1SMarko Zec 	for (;;) {
2862aca58e1SMarko Zec 		DXR_LOOKUP_STAGE
2872aca58e1SMarko Zec 		DXR_LOOKUP_STAGE
2882aca58e1SMarko Zec 	}
2892aca58e1SMarko Zec }
2902aca58e1SMarko Zec 
291e7abe200SMarko Zec #define DXR_LOOKUP_DEFINE(D)						\
292e7abe200SMarko Zec 	static int inline						\
293e7abe200SMarko Zec 	dxr_lookup_##D(struct dxr *dxr, uint32_t dst)			\
294e7abe200SMarko Zec 	{								\
295e7abe200SMarko Zec 		struct direct_entry de;					\
296e7abe200SMarko Zec 		uint16_t *dt = dxr->d;					\
297e7abe200SMarko Zec 		struct direct_entry *xt = dxr->x;			\
298e7abe200SMarko Zec 									\
299e7abe200SMarko Zec 		de = xt[(dt[dst >> (32 - (D))] << (DXR_TRIE_BITS - (D))) \
300e7abe200SMarko Zec 		    + ((dst >> DXR_RANGE_SHIFT) &			\
301e7abe200SMarko Zec 		    (0xffffffffU >> (32 - DXR_TRIE_BITS + (D))))];	\
302e7abe200SMarko Zec 		if (__predict_true(de.fragments == FRAGS_MARK_HIT))	\
303e7abe200SMarko Zec 			return (de.base);				\
304e7abe200SMarko Zec 		return (range_lookup(dxr->r, de, dst));			\
305e7abe200SMarko Zec 	}								\
306e7abe200SMarko Zec 									\
307e7abe200SMarko Zec 	static struct nhop_object *					\
308e7abe200SMarko Zec 	dxr_fib_lookup_##D(void *algo_data,				\
309e7abe200SMarko Zec 	    const struct flm_lookup_key key, uint32_t scopeid __unused)	\
310e7abe200SMarko Zec 	{								\
311e7abe200SMarko Zec 		struct dxr *dxr = algo_data;				\
312e7abe200SMarko Zec 									\
313e7abe200SMarko Zec 		return (dxr->nh_tbl[dxr_lookup_##D(dxr,			\
314e7abe200SMarko Zec 		    ntohl(key.addr4.s_addr))]);				\
315e7abe200SMarko Zec 	}
316e7abe200SMarko Zec 
317e7abe200SMarko Zec #if DXR_TRIE_BITS > 16
318e7abe200SMarko Zec DXR_LOOKUP_DEFINE(16)
319e7abe200SMarko Zec #endif
320e7abe200SMarko Zec DXR_LOOKUP_DEFINE(15)
321e7abe200SMarko Zec DXR_LOOKUP_DEFINE(14)
322e7abe200SMarko Zec DXR_LOOKUP_DEFINE(13)
323e7abe200SMarko Zec DXR_LOOKUP_DEFINE(12)
324e7abe200SMarko Zec DXR_LOOKUP_DEFINE(11)
325e7abe200SMarko Zec DXR_LOOKUP_DEFINE(10)
326e7abe200SMarko Zec DXR_LOOKUP_DEFINE(9)
327e7abe200SMarko Zec 
328e7abe200SMarko Zec static int inline
329e7abe200SMarko Zec dxr_lookup(struct dxr *dxr, uint32_t dst)
330e7abe200SMarko Zec {
331e7abe200SMarko Zec 	struct direct_entry de;
332e7abe200SMarko Zec 	uint16_t *dt = dxr->d;
333e7abe200SMarko Zec 	struct direct_entry *xt = dxr->x;
334e7abe200SMarko Zec 
335e7abe200SMarko Zec 	de = xt[(dt[dst >> dxr->d_shift] << dxr->x_shift) +
336e7abe200SMarko Zec 	    ((dst >> DXR_RANGE_SHIFT) & dxr->x_mask)];
337e7abe200SMarko Zec 	if (__predict_true(de.fragments == FRAGS_MARK_HIT))
338e7abe200SMarko Zec 		return (de.base);
339e7abe200SMarko Zec 	return (range_lookup(dxr->r, de, dst));
340e7abe200SMarko Zec }
341e7abe200SMarko Zec 
3422aca58e1SMarko Zec static void
3432aca58e1SMarko Zec initheap(struct dxr_aux *da, uint32_t dst_u32, uint32_t chunk)
3442aca58e1SMarko Zec {
3452aca58e1SMarko Zec 	struct heap_entry *fhp = &da->heap[0];
3462aca58e1SMarko Zec 	struct rtentry *rt;
3472aca58e1SMarko Zec 	struct route_nhop_data rnd;
3482aca58e1SMarko Zec 
3492aca58e1SMarko Zec 	da->heap_index = 0;
3502aca58e1SMarko Zec 	da->dst.sin_addr.s_addr = htonl(dst_u32);
3512aca58e1SMarko Zec 	rt = fib4_lookup_rt(da->fibnum, da->dst.sin_addr, 0, NHR_UNLOCKED,
3522aca58e1SMarko Zec 	    &rnd);
3532aca58e1SMarko Zec 	if (rt != NULL) {
3542aca58e1SMarko Zec 		struct in_addr addr;
3552aca58e1SMarko Zec 		uint32_t scopeid;
3562aca58e1SMarko Zec 
3572aca58e1SMarko Zec 		rt_get_inet_prefix_plen(rt, &addr, &fhp->preflen, &scopeid);
3582aca58e1SMarko Zec 		fhp->start = ntohl(addr.s_addr);
3592aca58e1SMarko Zec 		fhp->end = fhp->start;
3602aca58e1SMarko Zec 		if (fhp->preflen < 32)
3612aca58e1SMarko Zec 			fhp->end |= (0xffffffffU >> fhp->preflen);
3622aca58e1SMarko Zec 		fhp->nexthop = fib_get_nhop_idx(da->fd, rnd.rnd_nhop);
3632aca58e1SMarko Zec 	} else {
3642aca58e1SMarko Zec 		fhp->preflen = fhp->nexthop = fhp->start = 0;
3652aca58e1SMarko Zec 		fhp->end = 0xffffffffU;
3662aca58e1SMarko Zec 	}
3672aca58e1SMarko Zec }
3682aca58e1SMarko Zec 
3692aca58e1SMarko Zec static uint32_t
3702aca58e1SMarko Zec chunk_size(struct dxr_aux *da, struct direct_entry *fdesc)
3712aca58e1SMarko Zec {
3722aca58e1SMarko Zec 
3732aca58e1SMarko Zec 	if (IS_SHORT_FORMAT(fdesc->fragments))
3742aca58e1SMarko Zec 		return ((fdesc->fragments & FRAGS_MASK_SHORT) + 1);
3752aca58e1SMarko Zec 	else if (IS_XL_FORMAT(fdesc->fragments))
3762aca58e1SMarko Zec 		return (da->range_tbl[fdesc->base].fragments + 2);
3772aca58e1SMarko Zec 	else /* if (IS_LONG_FORMAT(fdesc->fragments)) */
3782aca58e1SMarko Zec 		return (fdesc->fragments + 1);
3792aca58e1SMarko Zec }
3802aca58e1SMarko Zec 
3812aca58e1SMarko Zec static uint32_t
3822aca58e1SMarko Zec chunk_hash(struct dxr_aux *da, struct direct_entry *fdesc)
3832aca58e1SMarko Zec {
3842aca58e1SMarko Zec 	uint32_t size = chunk_size(da, fdesc);
3852aca58e1SMarko Zec 	uint32_t *p = (uint32_t *) &da->range_tbl[fdesc->base];
3862aca58e1SMarko Zec 	uint32_t *l = (uint32_t *) &da->range_tbl[fdesc->base + size];
3872aca58e1SMarko Zec 	uint32_t hash = fdesc->fragments;
3882aca58e1SMarko Zec 
3892aca58e1SMarko Zec 	for (; p < l; p++)
3902aca58e1SMarko Zec 		hash = (hash << 7) + (hash >> 13) + *p;
3912aca58e1SMarko Zec 
3922aca58e1SMarko Zec 	return (hash + (hash >> 16));
3932aca58e1SMarko Zec }
3942aca58e1SMarko Zec 
3952aca58e1SMarko Zec static int
3962aca58e1SMarko Zec chunk_ref(struct dxr_aux *da, uint32_t chunk)
3972aca58e1SMarko Zec {
3982aca58e1SMarko Zec 	struct direct_entry *fdesc = &da->direct_tbl[chunk];
3992aca58e1SMarko Zec 	struct chunk_desc *cdp, *empty_cdp;
4002aca58e1SMarko Zec 	uint32_t base = fdesc->base;
4012aca58e1SMarko Zec 	uint32_t size = chunk_size(da, fdesc);
4022aca58e1SMarko Zec 	uint32_t hash = chunk_hash(da, fdesc);
40343880c51SMarko Zec 	int i;
4042aca58e1SMarko Zec 
4052aca58e1SMarko Zec 	/* Find an existing descriptor */
4062aca58e1SMarko Zec 	LIST_FOREACH(cdp, &da->chunk_hashtbl[hash & CHUNK_HASH_MASK],
4072aca58e1SMarko Zec 	    cd_hash_le) {
4082aca58e1SMarko Zec 		if (cdp->cd_hash != hash || cdp->cd_cur_size != size ||
4092aca58e1SMarko Zec 		    memcmp(&da->range_tbl[base], &da->range_tbl[cdp->cd_base],
4102aca58e1SMarko Zec 		    sizeof(struct range_entry_long) * size))
4112aca58e1SMarko Zec 			continue;
4122aca58e1SMarko Zec 		da->rtbl_top = fdesc->base;
4132aca58e1SMarko Zec 		fdesc->base = cdp->cd_base;
4142aca58e1SMarko Zec 		cdp->cd_refcnt++;
4152aca58e1SMarko Zec 		return (0);
4162aca58e1SMarko Zec 	}
4172aca58e1SMarko Zec 
41843880c51SMarko Zec 	/* No matching chunks found. Find an empty one to recycle. */
41943880c51SMarko Zec 	for (cdp = NULL, i = size; cdp == NULL && i < UNUSED_BUCKETS; i++)
42043880c51SMarko Zec 		cdp = LIST_FIRST(&da->unused_chunks[i]);
42143880c51SMarko Zec 
42243880c51SMarko Zec 	if (cdp == NULL)
42343880c51SMarko Zec 		LIST_FOREACH(empty_cdp, &da->unused_chunks[0], cd_hash_le)
4242aca58e1SMarko Zec 			if (empty_cdp->cd_max_size >= size && (cdp == NULL ||
4252aca58e1SMarko Zec 			    empty_cdp->cd_max_size < cdp->cd_max_size)) {
4262aca58e1SMarko Zec 				cdp = empty_cdp;
4272aca58e1SMarko Zec 				if (empty_cdp->cd_max_size == size)
4282aca58e1SMarko Zec 					break;
4292aca58e1SMarko Zec 			}
4302aca58e1SMarko Zec 
4312aca58e1SMarko Zec 	if (cdp != NULL) {
4322aca58e1SMarko Zec 		/* Copy from heap into the recycled chunk */
4332aca58e1SMarko Zec 		bcopy(&da->range_tbl[fdesc->base], &da->range_tbl[cdp->cd_base],
4342aca58e1SMarko Zec 		    size * sizeof(struct range_entry_long));
4352aca58e1SMarko Zec 		fdesc->base = cdp->cd_base;
4362aca58e1SMarko Zec 		da->rtbl_top -= size;
4371549575fSMarko Zec 		da->unused_chunks_size -= cdp->cd_max_size;
4382ac039f7SMarko Zec 		if (cdp->cd_max_size > size) {
4392aca58e1SMarko Zec 			/* Split the range in two, need a new descriptor */
4402aca58e1SMarko Zec 			empty_cdp = uma_zalloc(chunk_zone, M_NOWAIT);
4412aca58e1SMarko Zec 			if (empty_cdp == NULL)
4422aca58e1SMarko Zec 				return (1);
44343880c51SMarko Zec 			LIST_INSERT_BEFORE(cdp, empty_cdp, cd_all_le);
44443880c51SMarko Zec 			empty_cdp->cd_base = cdp->cd_base + size;
4452ac039f7SMarko Zec 			empty_cdp->cd_cur_size = 0;
4462aca58e1SMarko Zec 			empty_cdp->cd_max_size = cdp->cd_max_size - size;
44743880c51SMarko Zec 
44843880c51SMarko Zec 			i = empty_cdp->cd_max_size;
44943880c51SMarko Zec 			if (i >= UNUSED_BUCKETS)
45043880c51SMarko Zec 				i = 0;
45143880c51SMarko Zec 			LIST_INSERT_HEAD(&da->unused_chunks[i], empty_cdp,
45243880c51SMarko Zec 			    cd_hash_le);
45343880c51SMarko Zec 
4541549575fSMarko Zec 			da->unused_chunks_size += empty_cdp->cd_max_size;
4552aca58e1SMarko Zec 			cdp->cd_max_size = size;
4562aca58e1SMarko Zec 		}
4572aca58e1SMarko Zec 		LIST_REMOVE(cdp, cd_hash_le);
4582aca58e1SMarko Zec 	} else {
4592ac039f7SMarko Zec 		/* Alloc a new descriptor at the top of the heap*/
4602aca58e1SMarko Zec 		cdp = uma_zalloc(chunk_zone, M_NOWAIT);
4612aca58e1SMarko Zec 		if (cdp == NULL)
4622aca58e1SMarko Zec 			return (1);
4632aca58e1SMarko Zec 		cdp->cd_max_size = size;
4642aca58e1SMarko Zec 		cdp->cd_base = fdesc->base;
4652aca58e1SMarko Zec 		LIST_INSERT_HEAD(&da->all_chunks, cdp, cd_all_le);
4664aa275f1SMarko Zec 		MPASS(cdp->cd_base + cdp->cd_max_size == da->rtbl_top);
4672aca58e1SMarko Zec 	}
4682aca58e1SMarko Zec 
4692aca58e1SMarko Zec 	cdp->cd_hash = hash;
4702aca58e1SMarko Zec 	cdp->cd_refcnt = 1;
4712aca58e1SMarko Zec 	cdp->cd_cur_size = size;
4722aca58e1SMarko Zec 	LIST_INSERT_HEAD(&da->chunk_hashtbl[hash & CHUNK_HASH_MASK], cdp,
4732aca58e1SMarko Zec 	    cd_hash_le);
4742aca58e1SMarko Zec 	if (da->rtbl_top >= da->rtbl_size) {
4752aca58e1SMarko Zec 		if (da->rtbl_top >= BASE_MAX) {
4762aca58e1SMarko Zec 			FIB_PRINTF(LOG_ERR, da->fd,
4772aca58e1SMarko Zec 			    "structural limit exceeded at %d "
4782aca58e1SMarko Zec 			    "range table elements", da->rtbl_top);
4792aca58e1SMarko Zec 			return (1);
4802aca58e1SMarko Zec 		}
4812aca58e1SMarko Zec 		da->rtbl_size += RTBL_SIZE_INCR;
4821549575fSMarko Zec 		i = (BASE_MAX - da->rtbl_top) * LOG_DEBUG / BASE_MAX;
4831549575fSMarko Zec 		FIB_PRINTF(i, da->fd, "range table at %d%% structural limit",
4842aca58e1SMarko Zec 		    da->rtbl_top * 100 / BASE_MAX);
4852aca58e1SMarko Zec 		da->range_tbl = realloc(da->range_tbl,
4862aca58e1SMarko Zec 		    sizeof(*da->range_tbl) * da->rtbl_size + FRAGS_PREF_SHORT,
4872aca58e1SMarko Zec 		    M_DXRAUX, M_NOWAIT);
488ed541e20SMarko Zec 		if (da->range_tbl == NULL) {
489ed541e20SMarko Zec 			FIB_PRINTF(LOG_NOTICE, da->fd,
490ed541e20SMarko Zec 			    "Unable to allocate DXR range table");
4912aca58e1SMarko Zec 			return (1);
4922aca58e1SMarko Zec 		}
493ed541e20SMarko Zec 	}
4942aca58e1SMarko Zec 
4952aca58e1SMarko Zec 	return (0);
4962aca58e1SMarko Zec }
4972aca58e1SMarko Zec 
4982aca58e1SMarko Zec static void
4992aca58e1SMarko Zec chunk_unref(struct dxr_aux *da, uint32_t chunk)
5002aca58e1SMarko Zec {
5012aca58e1SMarko Zec 	struct direct_entry *fdesc = &da->direct_tbl[chunk];
5022ac039f7SMarko Zec 	struct chunk_desc *cdp, *cdp2;
5032aca58e1SMarko Zec 	uint32_t base = fdesc->base;
5042aca58e1SMarko Zec 	uint32_t size = chunk_size(da, fdesc);
5052aca58e1SMarko Zec 	uint32_t hash = chunk_hash(da, fdesc);
50643880c51SMarko Zec 	int i;
5072aca58e1SMarko Zec 
5082ac039f7SMarko Zec 	/* Find the corresponding descriptor */
5092aca58e1SMarko Zec 	LIST_FOREACH(cdp, &da->chunk_hashtbl[hash & CHUNK_HASH_MASK],
5102aca58e1SMarko Zec 	    cd_hash_le)
5112aca58e1SMarko Zec 		if (cdp->cd_hash == hash && cdp->cd_cur_size == size &&
5122aca58e1SMarko Zec 		    memcmp(&da->range_tbl[base], &da->range_tbl[cdp->cd_base],
5132aca58e1SMarko Zec 		    sizeof(struct range_entry_long) * size) == 0)
5142aca58e1SMarko Zec 			break;
5152aca58e1SMarko Zec 
5164aa275f1SMarko Zec 	MPASS(cdp != NULL);
5172aca58e1SMarko Zec 	if (--cdp->cd_refcnt > 0)
5182aca58e1SMarko Zec 		return;
5192aca58e1SMarko Zec 
5202aca58e1SMarko Zec 	LIST_REMOVE(cdp, cd_hash_le);
5211549575fSMarko Zec 	da->unused_chunks_size += cdp->cd_max_size;
5222ac039f7SMarko Zec 	cdp->cd_cur_size = 0;
5232ac039f7SMarko Zec 
5242ac039f7SMarko Zec 	/* Attempt to merge with the preceding chunk, if empty */
5252ac039f7SMarko Zec 	cdp2 = LIST_NEXT(cdp, cd_all_le);
5262ac039f7SMarko Zec 	if (cdp2 != NULL && cdp2->cd_cur_size == 0) {
5274aa275f1SMarko Zec 		MPASS(cdp2->cd_base + cdp2->cd_max_size == cdp->cd_base);
5282ac039f7SMarko Zec 		LIST_REMOVE(cdp, cd_all_le);
5292ac039f7SMarko Zec 		LIST_REMOVE(cdp2, cd_hash_le);
5302ac039f7SMarko Zec 		cdp2->cd_max_size += cdp->cd_max_size;
5312ac039f7SMarko Zec 		uma_zfree(chunk_zone, cdp);
5322ac039f7SMarko Zec 		cdp = cdp2;
5332aca58e1SMarko Zec 	}
5342aca58e1SMarko Zec 
5352ac039f7SMarko Zec 	/* Attempt to merge with the subsequent chunk, if empty */
5362ac039f7SMarko Zec 	cdp2 = LIST_PREV(cdp, &da->all_chunks, chunk_desc, cd_all_le);
5372ac039f7SMarko Zec 	if (cdp2 != NULL && cdp2->cd_cur_size == 0) {
5384aa275f1SMarko Zec 		MPASS(cdp->cd_base + cdp->cd_max_size == cdp2->cd_base);
5392ac039f7SMarko Zec 		LIST_REMOVE(cdp, cd_all_le);
5402ac039f7SMarko Zec 		LIST_REMOVE(cdp2, cd_hash_le);
5412ac039f7SMarko Zec 		cdp2->cd_max_size += cdp->cd_max_size;
5422ac039f7SMarko Zec 		cdp2->cd_base = cdp->cd_base;
5432ac039f7SMarko Zec 		uma_zfree(chunk_zone, cdp);
5442ac039f7SMarko Zec 		cdp = cdp2;
5452ac039f7SMarko Zec 	}
5462ac039f7SMarko Zec 
5472ac039f7SMarko Zec 	if (cdp->cd_base + cdp->cd_max_size == da->rtbl_top) {
5482ac039f7SMarko Zec 		/* Free the chunk on the top of the range heap, trim the heap */
5494aa275f1SMarko Zec 		MPASS(cdp == LIST_FIRST(&da->all_chunks));
5502aca58e1SMarko Zec 		da->rtbl_top -= cdp->cd_max_size;
5511549575fSMarko Zec 		da->unused_chunks_size -= cdp->cd_max_size;
5522aca58e1SMarko Zec 		LIST_REMOVE(cdp, cd_all_le);
5532aca58e1SMarko Zec 		uma_zfree(chunk_zone, cdp);
5542ac039f7SMarko Zec 		return;
5552aca58e1SMarko Zec 	}
5562ac039f7SMarko Zec 
55743880c51SMarko Zec 	i = cdp->cd_max_size;
55843880c51SMarko Zec 	if (i >= UNUSED_BUCKETS)
55943880c51SMarko Zec 		i = 0;
56043880c51SMarko Zec 	LIST_INSERT_HEAD(&da->unused_chunks[i], cdp, cd_hash_le);
5612aca58e1SMarko Zec }
5622aca58e1SMarko Zec 
5632aca58e1SMarko Zec static uint32_t
5642aca58e1SMarko Zec trie_hash(struct dxr_aux *da, uint32_t dxr_x, uint32_t index)
5652aca58e1SMarko Zec {
5662aca58e1SMarko Zec 	uint32_t i, *val;
5672aca58e1SMarko Zec 	uint32_t hash = 0;
5682aca58e1SMarko Zec 
5692aca58e1SMarko Zec 	for (i = 0; i < (1 << dxr_x); i++) {
5702aca58e1SMarko Zec 		hash = (hash << 3) ^ (hash >> 3);
5712aca58e1SMarko Zec 		val = (uint32_t *)
5722aca58e1SMarko Zec 		    (void *) &da->direct_tbl[(index << dxr_x) + i];
5732aca58e1SMarko Zec 		hash += (*val << 5);
5742aca58e1SMarko Zec 		hash += (*val >> 5);
5752aca58e1SMarko Zec 	}
5762aca58e1SMarko Zec 
5772aca58e1SMarko Zec 	return (hash + (hash >> 16));
5782aca58e1SMarko Zec }
5792aca58e1SMarko Zec 
5802aca58e1SMarko Zec static int
5812aca58e1SMarko Zec trie_ref(struct dxr_aux *da, uint32_t index)
5822aca58e1SMarko Zec {
5832aca58e1SMarko Zec 	struct trie_desc *tp;
5842aca58e1SMarko Zec 	uint32_t dxr_d = da->d_bits;
5852aca58e1SMarko Zec 	uint32_t dxr_x = DXR_TRIE_BITS - dxr_d;
5862aca58e1SMarko Zec 	uint32_t hash = trie_hash(da, dxr_x, index);
5872aca58e1SMarko Zec 
5882aca58e1SMarko Zec 	/* Find an existing descriptor */
5892aca58e1SMarko Zec 	LIST_FOREACH(tp, &da->trie_hashtbl[hash & TRIE_HASH_MASK], td_hash_le)
5902aca58e1SMarko Zec 		if (tp->td_hash == hash &&
5912aca58e1SMarko Zec 		    memcmp(&da->direct_tbl[index << dxr_x],
5922aca58e1SMarko Zec 		    &da->x_tbl[tp->td_index << dxr_x],
5932aca58e1SMarko Zec 		    sizeof(*da->x_tbl) << dxr_x) == 0) {
5942aca58e1SMarko Zec 			tp->td_refcnt++;
5952aca58e1SMarko Zec 			da->trietbl[index] = tp;
5962aca58e1SMarko Zec 			return(tp->td_index);
5972aca58e1SMarko Zec 		}
5982aca58e1SMarko Zec 
5992aca58e1SMarko Zec 	tp = LIST_FIRST(&da->unused_trie);
6002aca58e1SMarko Zec 	if (tp != NULL) {
6012aca58e1SMarko Zec 		LIST_REMOVE(tp, td_hash_le);
6022aca58e1SMarko Zec 		da->unused_trie_cnt--;
6032aca58e1SMarko Zec 	} else {
6042aca58e1SMarko Zec 		tp = uma_zalloc(trie_zone, M_NOWAIT);
6052aca58e1SMarko Zec 		if (tp == NULL)
6062aca58e1SMarko Zec 			return (-1);
6072aca58e1SMarko Zec 		LIST_INSERT_HEAD(&da->all_trie, tp, td_all_le);
6082aca58e1SMarko Zec 		tp->td_index = da->all_trie_cnt++;
6092aca58e1SMarko Zec 	}
6102aca58e1SMarko Zec 
6112aca58e1SMarko Zec 	tp->td_hash = hash;
6122aca58e1SMarko Zec 	tp->td_refcnt = 1;
6132aca58e1SMarko Zec 	LIST_INSERT_HEAD(&da->trie_hashtbl[hash & TRIE_HASH_MASK], tp,
6142aca58e1SMarko Zec 	   td_hash_le);
6152aca58e1SMarko Zec 	memcpy(&da->x_tbl[tp->td_index << dxr_x],
6162aca58e1SMarko Zec 	    &da->direct_tbl[index << dxr_x], sizeof(*da->x_tbl) << dxr_x);
6172aca58e1SMarko Zec 	da->trietbl[index] = tp;
6182aca58e1SMarko Zec 	if (da->all_trie_cnt >= da->xtbl_size >> dxr_x) {
6192aca58e1SMarko Zec 		da->xtbl_size += XTBL_SIZE_INCR;
6202aca58e1SMarko Zec 		da->x_tbl = realloc(da->x_tbl,
6212aca58e1SMarko Zec 		    sizeof(*da->x_tbl) * da->xtbl_size, M_DXRAUX, M_NOWAIT);
622ed541e20SMarko Zec 		if (da->x_tbl == NULL) {
623ed541e20SMarko Zec 			FIB_PRINTF(LOG_NOTICE, da->fd,
624ed541e20SMarko Zec 			    "Unable to allocate DXR extension table");
6252aca58e1SMarko Zec 			return (-1);
6262aca58e1SMarko Zec 		}
627ed541e20SMarko Zec 	}
6282aca58e1SMarko Zec 	return(tp->td_index);
6292aca58e1SMarko Zec }
6302aca58e1SMarko Zec 
6312aca58e1SMarko Zec static void
6322aca58e1SMarko Zec trie_unref(struct dxr_aux *da, uint32_t index)
6332aca58e1SMarko Zec {
6342aca58e1SMarko Zec 	struct trie_desc *tp = da->trietbl[index];
6352aca58e1SMarko Zec 
6362aca58e1SMarko Zec 	if (tp == NULL)
6372aca58e1SMarko Zec 		return;
6382aca58e1SMarko Zec 	da->trietbl[index] = NULL;
6392aca58e1SMarko Zec 	if (--tp->td_refcnt > 0)
6402aca58e1SMarko Zec 		return;
6412aca58e1SMarko Zec 
6422aca58e1SMarko Zec 	LIST_REMOVE(tp, td_hash_le);
6432aca58e1SMarko Zec 	da->unused_trie_cnt++;
6442aca58e1SMarko Zec 	if (tp->td_index != da->all_trie_cnt - 1) {
6452aca58e1SMarko Zec 		LIST_INSERT_HEAD(&da->unused_trie, tp, td_hash_le);
6462aca58e1SMarko Zec 		return;
6472aca58e1SMarko Zec 	}
6482aca58e1SMarko Zec 
6492aca58e1SMarko Zec 	do {
6502aca58e1SMarko Zec 		da->all_trie_cnt--;
6512aca58e1SMarko Zec 		da->unused_trie_cnt--;
6522aca58e1SMarko Zec 		LIST_REMOVE(tp, td_all_le);
6532aca58e1SMarko Zec 		uma_zfree(trie_zone, tp);
6542aca58e1SMarko Zec 		LIST_FOREACH(tp, &da->unused_trie, td_hash_le)
6552aca58e1SMarko Zec 			if (tp->td_index == da->all_trie_cnt - 1) {
6562aca58e1SMarko Zec 				LIST_REMOVE(tp, td_hash_le);
6572aca58e1SMarko Zec 				break;
6582aca58e1SMarko Zec 			}
6592aca58e1SMarko Zec 	} while (tp != NULL);
6602aca58e1SMarko Zec }
6612aca58e1SMarko Zec 
6622aca58e1SMarko Zec static void
6632aca58e1SMarko Zec heap_inject(struct dxr_aux *da, uint32_t start, uint32_t end, uint32_t preflen,
6642aca58e1SMarko Zec     uint32_t nh)
6652aca58e1SMarko Zec {
6662aca58e1SMarko Zec 	struct heap_entry *fhp;
6672aca58e1SMarko Zec 	int i;
6682aca58e1SMarko Zec 
6692aca58e1SMarko Zec 	for (i = da->heap_index; i >= 0; i--) {
6702aca58e1SMarko Zec 		if (preflen > da->heap[i].preflen)
6712aca58e1SMarko Zec 			break;
6722aca58e1SMarko Zec 		else if (preflen < da->heap[i].preflen)
6732aca58e1SMarko Zec 			da->heap[i + 1] = da->heap[i];
6742aca58e1SMarko Zec 		else
6752aca58e1SMarko Zec 			return;
6762aca58e1SMarko Zec 	}
6772aca58e1SMarko Zec 
6782aca58e1SMarko Zec 	fhp = &da->heap[i + 1];
6792aca58e1SMarko Zec 	fhp->preflen = preflen;
6802aca58e1SMarko Zec 	fhp->start = start;
6812aca58e1SMarko Zec 	fhp->end = end;
6822aca58e1SMarko Zec 	fhp->nexthop = nh;
6832aca58e1SMarko Zec 	da->heap_index++;
6842aca58e1SMarko Zec }
6852aca58e1SMarko Zec 
6862aca58e1SMarko Zec static int
6872aca58e1SMarko Zec dxr_walk(struct rtentry *rt, void *arg)
6882aca58e1SMarko Zec {
6892aca58e1SMarko Zec 	struct dxr_aux *da = arg;
6902aca58e1SMarko Zec 	uint32_t chunk = da->work_chunk;
6912aca58e1SMarko Zec 	uint32_t first = chunk << DXR_RANGE_SHIFT;
6922aca58e1SMarko Zec 	uint32_t last = first | DXR_RANGE_MASK;
6932aca58e1SMarko Zec 	struct range_entry_long *fp =
6942aca58e1SMarko Zec 	    &da->range_tbl[da->rtbl_top + da->rtbl_work_frags].re;
6952aca58e1SMarko Zec 	struct heap_entry *fhp = &da->heap[da->heap_index];
6962aca58e1SMarko Zec 	uint32_t preflen, nh, start, end, scopeid;
6972aca58e1SMarko Zec 	struct in_addr addr;
6982aca58e1SMarko Zec 
6992aca58e1SMarko Zec 	rt_get_inet_prefix_plen(rt, &addr, &preflen, &scopeid);
7002aca58e1SMarko Zec 	start = ntohl(addr.s_addr);
7012aca58e1SMarko Zec 	if (start > last)
7022aca58e1SMarko Zec 		return (-1);	/* Beyond chunk boundaries, we are done */
7032aca58e1SMarko Zec 	if (start < first)
7042aca58e1SMarko Zec 		return (0);	/* Skip this route */
7052aca58e1SMarko Zec 
7062aca58e1SMarko Zec 	end = start;
7072aca58e1SMarko Zec 	if (preflen < 32)
7082aca58e1SMarko Zec 		end |= (0xffffffffU >> preflen);
7092aca58e1SMarko Zec 	nh = fib_get_nhop_idx(da->fd, rt_get_raw_nhop(rt));
7102aca58e1SMarko Zec 
7112aca58e1SMarko Zec 	if (start == fhp->start)
7122aca58e1SMarko Zec 		heap_inject(da, start, end, preflen, nh);
7132aca58e1SMarko Zec 	else {
7142aca58e1SMarko Zec 		/* start > fhp->start */
7152aca58e1SMarko Zec 		while (start > fhp->end) {
7162aca58e1SMarko Zec 			uint32_t oend = fhp->end;
7172aca58e1SMarko Zec 
7182aca58e1SMarko Zec 			if (da->heap_index > 0) {
7192aca58e1SMarko Zec 				fhp--;
7202aca58e1SMarko Zec 				da->heap_index--;
7212aca58e1SMarko Zec 			} else
7222aca58e1SMarko Zec 				initheap(da, fhp->end + 1, chunk);
7232aca58e1SMarko Zec 			if (fhp->end > oend && fhp->nexthop != fp->nexthop) {
7242aca58e1SMarko Zec 				fp++;
7252aca58e1SMarko Zec 				da->rtbl_work_frags++;
7262aca58e1SMarko Zec 				fp->start = (oend + 1) & DXR_RANGE_MASK;
7272aca58e1SMarko Zec 				fp->nexthop = fhp->nexthop;
7282aca58e1SMarko Zec 			}
7292aca58e1SMarko Zec 		}
7302aca58e1SMarko Zec 		if (start > ((chunk << DXR_RANGE_SHIFT) | fp->start) &&
7312aca58e1SMarko Zec 		    nh != fp->nexthop) {
7322aca58e1SMarko Zec 			fp++;
7332aca58e1SMarko Zec 			da->rtbl_work_frags++;
7342aca58e1SMarko Zec 			fp->start = start & DXR_RANGE_MASK;
7352aca58e1SMarko Zec 		} else if (da->rtbl_work_frags) {
7362aca58e1SMarko Zec 			if ((--fp)->nexthop == nh)
7372aca58e1SMarko Zec 				da->rtbl_work_frags--;
7382aca58e1SMarko Zec 			else
7392aca58e1SMarko Zec 				fp++;
7402aca58e1SMarko Zec 		}
7412aca58e1SMarko Zec 		fp->nexthop = nh;
7422aca58e1SMarko Zec 		heap_inject(da, start, end, preflen, nh);
7432aca58e1SMarko Zec 	}
7442aca58e1SMarko Zec 
7452aca58e1SMarko Zec 	return (0);
7462aca58e1SMarko Zec }
7472aca58e1SMarko Zec 
7482aca58e1SMarko Zec static int
7492aca58e1SMarko Zec update_chunk(struct dxr_aux *da, uint32_t chunk)
7502aca58e1SMarko Zec {
7512aca58e1SMarko Zec 	struct range_entry_long *fp;
7522aca58e1SMarko Zec #if DXR_TRIE_BITS < 24
7532aca58e1SMarko Zec 	struct range_entry_short *fps;
7542aca58e1SMarko Zec 	uint32_t start, nh, i;
7552aca58e1SMarko Zec #endif
7562aca58e1SMarko Zec 	struct heap_entry *fhp;
7572aca58e1SMarko Zec 	uint32_t first = chunk << DXR_RANGE_SHIFT;
7582aca58e1SMarko Zec 	uint32_t last = first | DXR_RANGE_MASK;
7592aca58e1SMarko Zec 
7602aca58e1SMarko Zec 	if (da->direct_tbl[chunk].fragments != FRAGS_MARK_HIT)
7612aca58e1SMarko Zec 		chunk_unref(da, chunk);
7622aca58e1SMarko Zec 
7632aca58e1SMarko Zec 	initheap(da, first, chunk);
7642aca58e1SMarko Zec 
7652aca58e1SMarko Zec 	fp = &da->range_tbl[da->rtbl_top].re;
7662aca58e1SMarko Zec 	da->rtbl_work_frags = 0;
7672aca58e1SMarko Zec 	fp->start = first & DXR_RANGE_MASK;
7682aca58e1SMarko Zec 	fp->nexthop = da->heap[0].nexthop;
7692aca58e1SMarko Zec 
7702aca58e1SMarko Zec 	da->dst.sin_addr.s_addr = htonl(first);
7712aca58e1SMarko Zec 	da->mask.sin_addr.s_addr = htonl(~DXR_RANGE_MASK);
7722aca58e1SMarko Zec 
7732aca58e1SMarko Zec 	da->work_chunk = chunk;
7742aca58e1SMarko Zec 	rib_walk_from(da->fibnum, AF_INET, RIB_FLAG_LOCKED,
7752aca58e1SMarko Zec 	    (struct sockaddr *) &da->dst, (struct sockaddr *) &da->mask,
7762aca58e1SMarko Zec 	    dxr_walk, da);
7772aca58e1SMarko Zec 
7782aca58e1SMarko Zec 	/* Flush any remaining objects on the heap */
7792aca58e1SMarko Zec 	fp = &da->range_tbl[da->rtbl_top + da->rtbl_work_frags].re;
7802aca58e1SMarko Zec 	fhp = &da->heap[da->heap_index];
7812aca58e1SMarko Zec 	while (fhp->preflen > DXR_TRIE_BITS) {
7822aca58e1SMarko Zec 		uint32_t oend = fhp->end;
7832aca58e1SMarko Zec 
7842aca58e1SMarko Zec 		if (da->heap_index > 0) {
7852aca58e1SMarko Zec 			fhp--;
7862aca58e1SMarko Zec 			da->heap_index--;
7872aca58e1SMarko Zec 		} else
7882aca58e1SMarko Zec 			initheap(da, fhp->end + 1, chunk);
7892aca58e1SMarko Zec 		if (fhp->end > oend && fhp->nexthop != fp->nexthop) {
7902aca58e1SMarko Zec 			/* Have we crossed the upper chunk boundary? */
7912aca58e1SMarko Zec 			if (oend >= last)
7922aca58e1SMarko Zec 				break;
7932aca58e1SMarko Zec 			fp++;
7942aca58e1SMarko Zec 			da->rtbl_work_frags++;
7952aca58e1SMarko Zec 			fp->start = (oend + 1) & DXR_RANGE_MASK;
7962aca58e1SMarko Zec 			fp->nexthop = fhp->nexthop;
7972aca58e1SMarko Zec 		}
7982aca58e1SMarko Zec 	}
7992aca58e1SMarko Zec 
8002aca58e1SMarko Zec 	/* Direct hit if the chunk contains only a single fragment */
8012aca58e1SMarko Zec 	if (da->rtbl_work_frags == 0) {
8022aca58e1SMarko Zec 		da->direct_tbl[chunk].base = fp->nexthop;
8032aca58e1SMarko Zec 		da->direct_tbl[chunk].fragments = FRAGS_MARK_HIT;
8042aca58e1SMarko Zec 		return (0);
8052aca58e1SMarko Zec 	}
8062aca58e1SMarko Zec 
8072aca58e1SMarko Zec 	da->direct_tbl[chunk].base = da->rtbl_top;
8082aca58e1SMarko Zec 	da->direct_tbl[chunk].fragments = da->rtbl_work_frags;
8092aca58e1SMarko Zec 
8102aca58e1SMarko Zec #if DXR_TRIE_BITS < 24
8112aca58e1SMarko Zec 	/* Check whether the chunk can be more compactly encoded */
8122aca58e1SMarko Zec 	fp = &da->range_tbl[da->rtbl_top].re;
8132aca58e1SMarko Zec 	for (i = 0; i <= da->rtbl_work_frags; i++, fp++)
8142aca58e1SMarko Zec 		if ((fp->start & 0xff) != 0 || fp->nexthop > RE_SHORT_MAX_NH)
8152aca58e1SMarko Zec 			break;
8162aca58e1SMarko Zec 	if (i == da->rtbl_work_frags + 1) {
8172aca58e1SMarko Zec 		fp = &da->range_tbl[da->rtbl_top].re;
8182aca58e1SMarko Zec 		fps = (void *) fp;
8192aca58e1SMarko Zec 		for (i = 0; i <= da->rtbl_work_frags; i++, fp++, fps++) {
8202aca58e1SMarko Zec 			start = fp->start;
8212aca58e1SMarko Zec 			nh = fp->nexthop;
8222aca58e1SMarko Zec 			fps->start = start >> 8;
8232aca58e1SMarko Zec 			fps->nexthop = nh;
8242aca58e1SMarko Zec 		}
8252aca58e1SMarko Zec 		fps->start = start >> 8;
8262aca58e1SMarko Zec 		fps->nexthop = nh;
8272aca58e1SMarko Zec 		da->rtbl_work_frags >>= 1;
8282aca58e1SMarko Zec 		da->direct_tbl[chunk].fragments =
8292aca58e1SMarko Zec 		    da->rtbl_work_frags | FRAGS_PREF_SHORT;
8302aca58e1SMarko Zec 	} else
8312aca58e1SMarko Zec #endif
8322aca58e1SMarko Zec 	if (da->rtbl_work_frags >= FRAGS_MARK_HIT) {
8332aca58e1SMarko Zec 		da->direct_tbl[chunk].fragments = FRAGS_MARK_XL;
8342aca58e1SMarko Zec 		memmove(&da->range_tbl[da->rtbl_top + 1],
8352aca58e1SMarko Zec 		   &da->range_tbl[da->rtbl_top],
8362aca58e1SMarko Zec 		   (da->rtbl_work_frags + 1) * sizeof(*da->range_tbl));
8372aca58e1SMarko Zec 		da->range_tbl[da->rtbl_top].fragments = da->rtbl_work_frags;
8382aca58e1SMarko Zec 		da->rtbl_work_frags++;
8392aca58e1SMarko Zec 	}
8402aca58e1SMarko Zec 	da->rtbl_top += (da->rtbl_work_frags + 1);
8412aca58e1SMarko Zec 	return (chunk_ref(da, chunk));
8422aca58e1SMarko Zec }
8432aca58e1SMarko Zec 
8442aca58e1SMarko Zec static void
8452aca58e1SMarko Zec dxr_build(struct dxr *dxr)
8462aca58e1SMarko Zec {
8472aca58e1SMarko Zec 	struct dxr_aux *da = dxr->aux;
8482aca58e1SMarko Zec 	struct chunk_desc *cdp;
8492aca58e1SMarko Zec 	struct rib_rtable_info rinfo;
8502aca58e1SMarko Zec 	struct timeval t0, t1, t2, t3;
8512aca58e1SMarko Zec 	uint32_t r_size, dxr_tot_size;
8522aca58e1SMarko Zec 	uint32_t i, m, range_rebuild = 0;
8531549575fSMarko Zec 	uint32_t range_frag;
8542aca58e1SMarko Zec 	struct trie_desc *tp;
8552aca58e1SMarko Zec 	uint32_t d_tbl_size, dxr_x, d_size, x_size;
8562aca58e1SMarko Zec 	uint32_t ti, trie_rebuild = 0, prev_size = 0;
8571549575fSMarko Zec 	uint32_t trie_frag;
8582aca58e1SMarko Zec 
8594aa275f1SMarko Zec 	MPASS(dxr->d == NULL);
8602aca58e1SMarko Zec 
8612aca58e1SMarko Zec 	if (da == NULL) {
8622aca58e1SMarko Zec 		da = malloc(sizeof(*dxr->aux), M_DXRAUX, M_NOWAIT);
863ed541e20SMarko Zec 		if (da == NULL) {
864ed541e20SMarko Zec 			FIB_PRINTF(LOG_NOTICE, dxr->fd,
865ed541e20SMarko Zec 			    "Unable to allocate DXR aux struct");
8662aca58e1SMarko Zec 			return;
867ed541e20SMarko Zec 		}
8682aca58e1SMarko Zec 		dxr->aux = da;
8692aca58e1SMarko Zec 		da->fibnum = dxr->fibnum;
870b24e353fSMarko Zec 		da->fd = dxr->fd;
8712aca58e1SMarko Zec 		da->refcnt = 1;
8722aca58e1SMarko Zec 		LIST_INIT(&da->all_chunks);
8732aca58e1SMarko Zec 		LIST_INIT(&da->all_trie);
8742aca58e1SMarko Zec 		da->rtbl_size = RTBL_SIZE_INCR;
8752aca58e1SMarko Zec 		da->range_tbl = NULL;
8762aca58e1SMarko Zec 		da->xtbl_size = XTBL_SIZE_INCR;
8772aca58e1SMarko Zec 		da->x_tbl = NULL;
8782aca58e1SMarko Zec 		bzero(&da->dst, sizeof(da->dst));
8792aca58e1SMarko Zec 		bzero(&da->mask, sizeof(da->mask));
8802aca58e1SMarko Zec 		da->dst.sin_len = sizeof(da->dst);
8812aca58e1SMarko Zec 		da->mask.sin_len = sizeof(da->mask);
8822aca58e1SMarko Zec 		da->dst.sin_family = AF_INET;
8832aca58e1SMarko Zec 		da->mask.sin_family = AF_INET;
8842aca58e1SMarko Zec 	}
8852aca58e1SMarko Zec 	if (da->range_tbl == NULL) {
8862aca58e1SMarko Zec 		da->range_tbl = malloc(sizeof(*da->range_tbl) * da->rtbl_size
8872aca58e1SMarko Zec 		    + FRAGS_PREF_SHORT, M_DXRAUX, M_NOWAIT);
888ed541e20SMarko Zec 		if (da->range_tbl == NULL) {
889ed541e20SMarko Zec 			FIB_PRINTF(LOG_NOTICE, da->fd,
890ed541e20SMarko Zec 			    "Unable to allocate DXR range table");
8912aca58e1SMarko Zec 			return;
892ed541e20SMarko Zec 		}
8932aca58e1SMarko Zec 		range_rebuild = 1;
8942aca58e1SMarko Zec 	}
8952aca58e1SMarko Zec 	if (da->x_tbl == NULL) {
8962aca58e1SMarko Zec 		da->x_tbl = malloc(sizeof(*da->x_tbl) * da->xtbl_size,
8972aca58e1SMarko Zec 		    M_DXRAUX, M_NOWAIT);
898ed541e20SMarko Zec 		if (da->x_tbl == NULL) {
899ed541e20SMarko Zec 			FIB_PRINTF(LOG_NOTICE, da->fd,
900ed541e20SMarko Zec 			    "Unable to allocate DXR extension table");
9012aca58e1SMarko Zec 			return;
902ed541e20SMarko Zec 		}
9032aca58e1SMarko Zec 		trie_rebuild = 1;
9042aca58e1SMarko Zec 	}
9052aca58e1SMarko Zec 
9062aca58e1SMarko Zec 	microuptime(&t0);
9072aca58e1SMarko Zec 
9082aca58e1SMarko Zec 	dxr->nh_tbl = fib_get_nhop_array(da->fd);
9092aca58e1SMarko Zec 	fib_get_rtable_info(fib_get_rh(da->fd), &rinfo);
9102aca58e1SMarko Zec 
9111549575fSMarko Zec 	if (da->updates_low > da->updates_high)
9122aca58e1SMarko Zec 		range_rebuild = 1;
9131549575fSMarko Zec 
9141549575fSMarko Zec range_build:
9152aca58e1SMarko Zec 	if (range_rebuild) {
9162aca58e1SMarko Zec 		/* Bulk cleanup */
9172aca58e1SMarko Zec 		bzero(da->chunk_hashtbl, sizeof(da->chunk_hashtbl));
9182aca58e1SMarko Zec 		while ((cdp = LIST_FIRST(&da->all_chunks)) != NULL) {
9192aca58e1SMarko Zec 			LIST_REMOVE(cdp, cd_all_le);
9202aca58e1SMarko Zec 			uma_zfree(chunk_zone, cdp);
9212aca58e1SMarko Zec 		}
92243880c51SMarko Zec 		for (i = 0; i < UNUSED_BUCKETS; i++)
92343880c51SMarko Zec 			LIST_INIT(&da->unused_chunks[i]);
9241549575fSMarko Zec 		da->unused_chunks_size = 0;
9252aca58e1SMarko Zec 		da->rtbl_top = 0;
9262aca58e1SMarko Zec 		da->updates_low = 0;
9272aca58e1SMarko Zec 		da->updates_high = DIRECT_TBL_SIZE - 1;
9282aca58e1SMarko Zec 		memset(da->updates_mask, 0xff, sizeof(da->updates_mask));
9292aca58e1SMarko Zec 		for (i = 0; i < DIRECT_TBL_SIZE; i++) {
9302aca58e1SMarko Zec 			da->direct_tbl[i].fragments = FRAGS_MARK_HIT;
9312aca58e1SMarko Zec 			da->direct_tbl[i].base = 0;
9322aca58e1SMarko Zec 		}
9332aca58e1SMarko Zec 	}
9342aca58e1SMarko Zec 	da->prefixes = rinfo.num_prefixes;
9352aca58e1SMarko Zec 
9362aca58e1SMarko Zec 	/* DXR: construct direct & range table */
9372aca58e1SMarko Zec 	for (i = da->updates_low; i <= da->updates_high; i++) {
9382aca58e1SMarko Zec 		m = da->updates_mask[i >> 5] >> (i & 0x1f);
9392aca58e1SMarko Zec 		if (m == 0)
9402aca58e1SMarko Zec 			i |= 0x1f;
9412aca58e1SMarko Zec 		else if (m & 1 && update_chunk(da, i) != 0)
9422aca58e1SMarko Zec 			return;
9432aca58e1SMarko Zec 	}
9441549575fSMarko Zec 
9451549575fSMarko Zec 	range_frag = 0;
9461549575fSMarko Zec 	if (da->rtbl_top)
9471549575fSMarko Zec 		range_frag = da->unused_chunks_size * 10000ULL / da->rtbl_top;
9481549575fSMarko Zec 	if (range_frag > V_frag_limit) {
9491549575fSMarko Zec 		range_rebuild = 1;
9501549575fSMarko Zec 		goto range_build;
9511549575fSMarko Zec 	}
9521549575fSMarko Zec 
9532aca58e1SMarko Zec 	r_size = sizeof(*da->range_tbl) * da->rtbl_top;
9542aca58e1SMarko Zec 	microuptime(&t1);
9552aca58e1SMarko Zec 
9561549575fSMarko Zec 	if (range_rebuild ||
9572aca58e1SMarko Zec 	    abs(fls(da->prefixes) - fls(da->trie_rebuilt_prefixes)) > 1)
9582aca58e1SMarko Zec 		trie_rebuild = 1;
9591549575fSMarko Zec 
9601549575fSMarko Zec trie_build:
9612aca58e1SMarko Zec 	if (trie_rebuild) {
9622aca58e1SMarko Zec 		da->trie_rebuilt_prefixes = da->prefixes;
9632aca58e1SMarko Zec 		da->d_bits = DXR_D;
9642aca58e1SMarko Zec 		da->updates_low = 0;
9652aca58e1SMarko Zec 		da->updates_high = DIRECT_TBL_SIZE - 1;
9661549575fSMarko Zec 		if (!range_rebuild)
9671549575fSMarko Zec 			memset(da->updates_mask, 0xff,
9681549575fSMarko Zec 			    sizeof(da->updates_mask));
9692aca58e1SMarko Zec 	}
9702aca58e1SMarko Zec 
9712aca58e1SMarko Zec dxr2_try_squeeze:
9722aca58e1SMarko Zec 	if (trie_rebuild) {
9732aca58e1SMarko Zec 		/* Bulk cleanup */
9742aca58e1SMarko Zec 		bzero(da->trietbl, sizeof(da->trietbl));
9752aca58e1SMarko Zec 		bzero(da->trie_hashtbl, sizeof(da->trie_hashtbl));
9762aca58e1SMarko Zec 		while ((tp = LIST_FIRST(&da->all_trie)) != NULL) {
9772aca58e1SMarko Zec 			LIST_REMOVE(tp, td_all_le);
9782aca58e1SMarko Zec 			uma_zfree(trie_zone, tp);
9792aca58e1SMarko Zec 		}
9802aca58e1SMarko Zec 		LIST_INIT(&da->unused_trie);
9812aca58e1SMarko Zec 		da->all_trie_cnt = da->unused_trie_cnt = 0;
9822aca58e1SMarko Zec 	}
9832aca58e1SMarko Zec 
9842aca58e1SMarko Zec 	/* Populate d_tbl, x_tbl */
9852aca58e1SMarko Zec 	dxr_x = DXR_TRIE_BITS - da->d_bits;
9862aca58e1SMarko Zec 	d_tbl_size = (1 << da->d_bits);
9872aca58e1SMarko Zec 
9882aca58e1SMarko Zec 	for (i = da->updates_low >> dxr_x; i <= da->updates_high >> dxr_x;
9892aca58e1SMarko Zec 	    i++) {
990b51f8baeSMarko Zec 		if (!trie_rebuild) {
991b51f8baeSMarko Zec 			m = 0;
992b51f8baeSMarko Zec 			for (int j = 0; j < (1 << dxr_x); j += 32)
993b51f8baeSMarko Zec 				m |= da->updates_mask[((i << dxr_x) + j) >> 5];
994b51f8baeSMarko Zec 			if (m == 0)
995b51f8baeSMarko Zec 				continue;
9962aca58e1SMarko Zec 			trie_unref(da, i);
997b51f8baeSMarko Zec 		}
9982aca58e1SMarko Zec 		ti = trie_ref(da, i);
9992aca58e1SMarko Zec 		if (ti < 0)
10002aca58e1SMarko Zec 			return;
10012aca58e1SMarko Zec 		da->d_tbl[i] = ti;
10022aca58e1SMarko Zec 	}
10032aca58e1SMarko Zec 
10041549575fSMarko Zec 	trie_frag = 0;
10051549575fSMarko Zec 	if (da->all_trie_cnt)
10061549575fSMarko Zec 		trie_frag = da->unused_trie_cnt * 10000ULL / da->all_trie_cnt;
10071549575fSMarko Zec 	if (trie_frag > V_frag_limit) {
10081549575fSMarko Zec 		trie_rebuild = 1;
10091549575fSMarko Zec 		goto trie_build;
10101549575fSMarko Zec 	}
10111549575fSMarko Zec 
10122aca58e1SMarko Zec 	d_size = sizeof(*da->d_tbl) * d_tbl_size;
10132aca58e1SMarko Zec 	x_size = sizeof(*da->x_tbl) * DIRECT_TBL_SIZE / d_tbl_size
10142aca58e1SMarko Zec 	    * da->all_trie_cnt;
10152aca58e1SMarko Zec 	dxr_tot_size = d_size + x_size + r_size;
10162aca58e1SMarko Zec 
10172aca58e1SMarko Zec 	if (trie_rebuild == 1) {
10182aca58e1SMarko Zec 		/* Try to find a more compact D/X split */
10192aca58e1SMarko Zec 		if (prev_size == 0 || dxr_tot_size <= prev_size)
10202aca58e1SMarko Zec 			da->d_bits--;
10212aca58e1SMarko Zec 		else {
10222aca58e1SMarko Zec 			da->d_bits++;
10232aca58e1SMarko Zec 			trie_rebuild = 2;
10242aca58e1SMarko Zec 		}
10252aca58e1SMarko Zec 		prev_size = dxr_tot_size;
10262aca58e1SMarko Zec 		goto dxr2_try_squeeze;
10272aca58e1SMarko Zec 	}
10282aca58e1SMarko Zec 	microuptime(&t2);
10292aca58e1SMarko Zec 
10302aca58e1SMarko Zec 	dxr->d = malloc(dxr_tot_size, M_DXRLPM, M_NOWAIT);
1031ed541e20SMarko Zec 	if (dxr->d == NULL) {
1032ed541e20SMarko Zec 		FIB_PRINTF(LOG_NOTICE, da->fd,
1033ed541e20SMarko Zec 		    "Unable to allocate DXR lookup table");
10342aca58e1SMarko Zec 		return;
1035ed541e20SMarko Zec 	}
10362aca58e1SMarko Zec 	memcpy(dxr->d, da->d_tbl, d_size);
10372aca58e1SMarko Zec 	dxr->x = ((char *) dxr->d) + d_size;
10382aca58e1SMarko Zec 	memcpy(dxr->x, da->x_tbl, x_size);
10392aca58e1SMarko Zec 	dxr->r = ((char *) dxr->x) + x_size;
10402aca58e1SMarko Zec 	dxr->d_shift = 32 - da->d_bits;
10412aca58e1SMarko Zec 	dxr->x_shift = dxr_x;
10422aca58e1SMarko Zec 	dxr->x_mask = 0xffffffffU >> (32 - dxr_x);
10432aca58e1SMarko Zec 	memcpy(dxr->r, da->range_tbl, r_size);
10442aca58e1SMarko Zec 
10452aca58e1SMarko Zec 	if (da->updates_low <= da->updates_high)
10462aca58e1SMarko Zec 		bzero(&da->updates_mask[da->updates_low / 32],
10472aca58e1SMarko Zec 		    (da->updates_high - da->updates_low) / 8 + 1);
10482aca58e1SMarko Zec 	da->updates_low = DIRECT_TBL_SIZE - 1;
10492aca58e1SMarko Zec 	da->updates_high = 0;
10502aca58e1SMarko Zec 	microuptime(&t3);
10512aca58e1SMarko Zec 
10522aca58e1SMarko Zec 	FIB_PRINTF(LOG_INFO, da->fd, "D%dX%dR, %d prefixes, %d nhops (max)",
10532aca58e1SMarko Zec 	    da->d_bits, dxr_x, rinfo.num_prefixes, rinfo.num_nhops);
1054eb3148ccSMarko Zec 	i = dxr_tot_size * 100;
1055eb3148ccSMarko Zec 	if (rinfo.num_prefixes)
1056eb3148ccSMarko Zec 		i /= rinfo.num_prefixes;
10572aca58e1SMarko Zec 	FIB_PRINTF(LOG_INFO, da->fd, "%d.%02d KBytes, %d.%02d Bytes/prefix",
10582aca58e1SMarko Zec 	    dxr_tot_size / 1024, dxr_tot_size * 100 / 1024 % 100,
10592aca58e1SMarko Zec 	    i / 100, i % 100);
10601549575fSMarko Zec 	FIB_PRINTF(LOG_INFO, da->fd,
10611549575fSMarko Zec 	    "%d.%02d%% trie, %d.%02d%% range fragmentation",
10621549575fSMarko Zec 	    trie_frag / 100, trie_frag % 100,
10631549575fSMarko Zec 	    range_frag / 100, range_frag % 100);
10642aca58e1SMarko Zec 	i = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
10652aca58e1SMarko Zec 	FIB_PRINTF(LOG_INFO, da->fd, "range table %s in %u.%03u ms",
10662aca58e1SMarko Zec 	    range_rebuild ? "rebuilt" : "updated", i / 1000, i % 1000);
10672aca58e1SMarko Zec 	i = (t2.tv_sec - t1.tv_sec) * 1000000 + t2.tv_usec - t1.tv_usec;
10682aca58e1SMarko Zec 	FIB_PRINTF(LOG_INFO, da->fd, "trie %s in %u.%03u ms",
10692aca58e1SMarko Zec 	    trie_rebuild ? "rebuilt" : "updated", i / 1000, i % 1000);
10702aca58e1SMarko Zec 	i = (t3.tv_sec - t2.tv_sec) * 1000000 + t3.tv_usec - t2.tv_usec;
10712aca58e1SMarko Zec 	FIB_PRINTF(LOG_INFO, da->fd, "snapshot forked in %u.%03u ms",
10722aca58e1SMarko Zec 	    i / 1000, i % 1000);
10732aca58e1SMarko Zec }
10742aca58e1SMarko Zec 
10752aca58e1SMarko Zec /*
10765295e891SMarko Zec  * Glue functions for attaching to the FIB_ALGO infrastructure.
10772aca58e1SMarko Zec  */
10782aca58e1SMarko Zec 
10792aca58e1SMarko Zec static struct nhop_object *
10802aca58e1SMarko Zec dxr_fib_lookup(void *algo_data, const struct flm_lookup_key key,
10812aca58e1SMarko Zec     uint32_t scopeid)
10822aca58e1SMarko Zec {
10832aca58e1SMarko Zec 	struct dxr *dxr = algo_data;
10842aca58e1SMarko Zec 
1085e7abe200SMarko Zec 	return (dxr->nh_tbl[dxr_lookup(dxr, ntohl(key.addr4.s_addr))]);
10862aca58e1SMarko Zec }
10872aca58e1SMarko Zec 
10882aca58e1SMarko Zec static enum flm_op_result
10892aca58e1SMarko Zec dxr_init(uint32_t fibnum, struct fib_data *fd, void *old_data, void **data)
10902aca58e1SMarko Zec {
10912aca58e1SMarko Zec 	struct dxr *old_dxr = old_data;
10922aca58e1SMarko Zec 	struct dxr_aux *da = NULL;
10932aca58e1SMarko Zec 	struct dxr *dxr;
10942aca58e1SMarko Zec 
10952aca58e1SMarko Zec 	dxr = malloc(sizeof(*dxr), M_DXRAUX, M_NOWAIT);
1096308caa38SMarko Zec 	if (dxr == NULL) {
1097ed541e20SMarko Zec 		FIB_PRINTF(LOG_NOTICE, fd,
1098ed541e20SMarko Zec 		    "Unable to allocate DXR container struct");
10992aca58e1SMarko Zec 		return (FLM_REBUILD);
1100308caa38SMarko Zec 	}
11012aca58e1SMarko Zec 
11022aca58e1SMarko Zec 	/* Check whether we may reuse the old auxiliary structures */
11034ab122e8SMarko Zec 	if (old_dxr != NULL && old_dxr->aux != NULL &&
11044ab122e8SMarko Zec 	    old_dxr->aux->fd == fd) {
11052aca58e1SMarko Zec 		da = old_dxr->aux;
11062aca58e1SMarko Zec 		atomic_add_int(&da->refcnt, 1);
11072aca58e1SMarko Zec 	}
11082aca58e1SMarko Zec 
11092aca58e1SMarko Zec 	dxr->aux = da;
11102aca58e1SMarko Zec 	dxr->d = NULL;
11112aca58e1SMarko Zec 	dxr->fd = fd;
11122aca58e1SMarko Zec 	dxr->fibnum = fibnum;
11132aca58e1SMarko Zec 	*data = dxr;
11142aca58e1SMarko Zec 
11152aca58e1SMarko Zec 	return (FLM_SUCCESS);
11162aca58e1SMarko Zec }
11172aca58e1SMarko Zec 
11182aca58e1SMarko Zec static void
11192aca58e1SMarko Zec dxr_destroy(void *data)
11202aca58e1SMarko Zec {
11212aca58e1SMarko Zec 	struct dxr *dxr = data;
112285801064SMarko Zec 	struct dxr_aux *da = dxr->aux;
11232aca58e1SMarko Zec 	struct chunk_desc *cdp;
11242aca58e1SMarko Zec 	struct trie_desc *tp;
11252aca58e1SMarko Zec 
11262aca58e1SMarko Zec 	free(dxr->d, M_DXRLPM);
11272aca58e1SMarko Zec 	free(dxr, M_DXRAUX);
11282aca58e1SMarko Zec 
11292aca58e1SMarko Zec 	if (da == NULL || atomic_fetchadd_int(&da->refcnt, -1) > 1)
11302aca58e1SMarko Zec 		return;
11312aca58e1SMarko Zec 
11322aca58e1SMarko Zec 	/* Release auxiliary structures */
11332aca58e1SMarko Zec 	while ((cdp = LIST_FIRST(&da->all_chunks)) != NULL) {
11342aca58e1SMarko Zec 		LIST_REMOVE(cdp, cd_all_le);
11352aca58e1SMarko Zec 		uma_zfree(chunk_zone, cdp);
11362aca58e1SMarko Zec 	}
11372aca58e1SMarko Zec 	while ((tp = LIST_FIRST(&da->all_trie)) != NULL) {
11382aca58e1SMarko Zec 		LIST_REMOVE(tp, td_all_le);
11392aca58e1SMarko Zec 		uma_zfree(trie_zone, tp);
11402aca58e1SMarko Zec 	}
11412aca58e1SMarko Zec 	free(da->range_tbl, M_DXRAUX);
11422aca58e1SMarko Zec 	free(da->x_tbl, M_DXRAUX);
11432aca58e1SMarko Zec 	free(da, M_DXRAUX);
11442aca58e1SMarko Zec }
11452aca58e1SMarko Zec 
11462aca58e1SMarko Zec static void
11472aca58e1SMarko Zec epoch_dxr_destroy(epoch_context_t ctx)
11482aca58e1SMarko Zec {
11492aca58e1SMarko Zec 	struct dxr *dxr = __containerof(ctx, struct dxr, epoch_ctx);
11502aca58e1SMarko Zec 
11512aca58e1SMarko Zec 	dxr_destroy(dxr);
11522aca58e1SMarko Zec }
11532aca58e1SMarko Zec 
1154e7abe200SMarko Zec static void *
1155e7abe200SMarko Zec choose_lookup_fn(struct dxr_aux *da)
1156e7abe200SMarko Zec {
1157e7abe200SMarko Zec 
1158e7abe200SMarko Zec 	switch (da->d_bits) {
1159e7abe200SMarko Zec #if DXR_TRIE_BITS > 16
1160e7abe200SMarko Zec 	case 16:
1161e7abe200SMarko Zec 		return (dxr_fib_lookup_16);
1162e7abe200SMarko Zec #endif
1163e7abe200SMarko Zec 	case 15:
1164e7abe200SMarko Zec 		return (dxr_fib_lookup_15);
1165e7abe200SMarko Zec 	case 14:
1166e7abe200SMarko Zec 		return (dxr_fib_lookup_14);
1167e7abe200SMarko Zec 	case 13:
1168e7abe200SMarko Zec 		return (dxr_fib_lookup_13);
1169e7abe200SMarko Zec 	case 12:
1170e7abe200SMarko Zec 		return (dxr_fib_lookup_12);
1171e7abe200SMarko Zec 	case 11:
1172e7abe200SMarko Zec 		return (dxr_fib_lookup_11);
1173e7abe200SMarko Zec 	case 10:
1174e7abe200SMarko Zec 		return (dxr_fib_lookup_10);
1175e7abe200SMarko Zec 	case 9:
1176e7abe200SMarko Zec 		return (dxr_fib_lookup_9);
1177e7abe200SMarko Zec 	}
1178e7abe200SMarko Zec 	return (dxr_fib_lookup);
1179e7abe200SMarko Zec }
1180e7abe200SMarko Zec 
11812aca58e1SMarko Zec static enum flm_op_result
11822aca58e1SMarko Zec dxr_dump_end(void *data, struct fib_dp *dp)
11832aca58e1SMarko Zec {
11842aca58e1SMarko Zec 	struct dxr *dxr = data;
11852aca58e1SMarko Zec 	struct dxr_aux *da;
11862aca58e1SMarko Zec 
11872aca58e1SMarko Zec 	dxr_build(dxr);
11882aca58e1SMarko Zec 
11892aca58e1SMarko Zec 	da = dxr->aux;
1190ed541e20SMarko Zec 	if (da == NULL || dxr->d == NULL)
11912aca58e1SMarko Zec 		return (FLM_REBUILD);
11922aca58e1SMarko Zec 
1193ed541e20SMarko Zec 	if (da->rtbl_top >= BASE_MAX)
11942aca58e1SMarko Zec 		return (FLM_ERROR);
11952aca58e1SMarko Zec 
1196e7abe200SMarko Zec 	dp->f = choose_lookup_fn(da);
11972aca58e1SMarko Zec 	dp->arg = dxr;
11982aca58e1SMarko Zec 
11992aca58e1SMarko Zec 	return (FLM_SUCCESS);
12002aca58e1SMarko Zec }
12012aca58e1SMarko Zec 
12022aca58e1SMarko Zec static enum flm_op_result
12032aca58e1SMarko Zec dxr_dump_rib_item(struct rtentry *rt, void *data)
12042aca58e1SMarko Zec {
12052aca58e1SMarko Zec 
12062aca58e1SMarko Zec 	return (FLM_SUCCESS);
12072aca58e1SMarko Zec }
12082aca58e1SMarko Zec 
12092aca58e1SMarko Zec static enum flm_op_result
12102aca58e1SMarko Zec dxr_change_rib_item(struct rib_head *rnh, struct rib_cmd_info *rc,
12112aca58e1SMarko Zec     void *data)
12122aca58e1SMarko Zec {
12132aca58e1SMarko Zec 
12142aca58e1SMarko Zec 	return (FLM_BATCH);
12152aca58e1SMarko Zec }
12162aca58e1SMarko Zec 
12172aca58e1SMarko Zec static enum flm_op_result
12182aca58e1SMarko Zec dxr_change_rib_batch(struct rib_head *rnh, struct fib_change_queue *q,
12192aca58e1SMarko Zec     void *data)
12202aca58e1SMarko Zec {
12212aca58e1SMarko Zec 	struct dxr *dxr = data;
12222aca58e1SMarko Zec 	struct dxr *new_dxr;
12232aca58e1SMarko Zec 	struct dxr_aux *da;
12242aca58e1SMarko Zec 	struct fib_dp new_dp;
12252aca58e1SMarko Zec 	enum flm_op_result res;
12262aca58e1SMarko Zec 	uint32_t ip, plen, hmask, start, end, i, ui;
12272aca58e1SMarko Zec #ifdef INVARIANTS
12282aca58e1SMarko Zec 	struct rib_rtable_info rinfo;
12292aca58e1SMarko Zec 	int update_delta = 0;
12302aca58e1SMarko Zec #endif
12312aca58e1SMarko Zec 
12324aa275f1SMarko Zec 	MPASS(data != NULL);
12334aa275f1SMarko Zec 	MPASS(q != NULL);
12344aa275f1SMarko Zec 	MPASS(q->count < q->size);
12352aca58e1SMarko Zec 
12362aca58e1SMarko Zec 	da = dxr->aux;
12374aa275f1SMarko Zec 	MPASS(da != NULL);
12384ab122e8SMarko Zec 	MPASS(da->fd == dxr->fd);
12394aa275f1SMarko Zec 	MPASS(da->refcnt > 0);
12402aca58e1SMarko Zec 
12412aca58e1SMarko Zec 	FIB_PRINTF(LOG_INFO, da->fd, "processing %d update(s)", q->count);
12422aca58e1SMarko Zec 	for (ui = 0; ui < q->count; ui++) {
12432aca58e1SMarko Zec #ifdef INVARIANTS
12442aca58e1SMarko Zec 		if (q->entries[ui].nh_new != NULL)
12452aca58e1SMarko Zec 			update_delta++;
12462aca58e1SMarko Zec 		if (q->entries[ui].nh_old != NULL)
12472aca58e1SMarko Zec 			update_delta--;
12482aca58e1SMarko Zec #endif
12492aca58e1SMarko Zec 		plen = q->entries[ui].plen;
12502aca58e1SMarko Zec 		ip = ntohl(q->entries[ui].addr4.s_addr);
1251442c8a24SMarko Zec 		if (plen < 32)
12522aca58e1SMarko Zec 			hmask = 0xffffffffU >> plen;
1253442c8a24SMarko Zec 		else
1254442c8a24SMarko Zec 			hmask = 0;
12552aca58e1SMarko Zec 		start = (ip & ~hmask) >> DXR_RANGE_SHIFT;
12562aca58e1SMarko Zec 		end = (ip | hmask) >> DXR_RANGE_SHIFT;
12572aca58e1SMarko Zec 
12582aca58e1SMarko Zec 		if ((start & 0x1f) == 0 && (end & 0x1f) == 0x1f)
12592aca58e1SMarko Zec 			for (i = start >> 5; i <= end >> 5; i++)
12602aca58e1SMarko Zec 				da->updates_mask[i] = 0xffffffffU;
12612aca58e1SMarko Zec 		else
12622aca58e1SMarko Zec 			for (i = start; i <= end; i++)
12632aca58e1SMarko Zec 				da->updates_mask[i >> 5] |= (1 << (i & 0x1f));
12642aca58e1SMarko Zec 		if (start < da->updates_low)
12652aca58e1SMarko Zec 			da->updates_low = start;
12662aca58e1SMarko Zec 		if (end > da->updates_high)
12672aca58e1SMarko Zec 			da->updates_high = end;
12682aca58e1SMarko Zec 	}
12692aca58e1SMarko Zec 
12702aca58e1SMarko Zec #ifdef INVARIANTS
12712aca58e1SMarko Zec 	fib_get_rtable_info(fib_get_rh(da->fd), &rinfo);
12724aa275f1SMarko Zec 	MPASS(da->prefixes + update_delta == rinfo.num_prefixes);
12732aca58e1SMarko Zec #endif
12742aca58e1SMarko Zec 
12752aca58e1SMarko Zec 	res = dxr_init(0, dxr->fd, data, (void **) &new_dxr);
12762aca58e1SMarko Zec 	if (res != FLM_SUCCESS)
12772aca58e1SMarko Zec 		return (res);
12782aca58e1SMarko Zec 
12792aca58e1SMarko Zec 	dxr_build(new_dxr);
12802aca58e1SMarko Zec 
12812aca58e1SMarko Zec 	/* Structural limit exceeded, hard error */
128219bd24caSMarko Zec 	if (da->rtbl_top >= BASE_MAX) {
128319bd24caSMarko Zec 		dxr_destroy(new_dxr);
12842aca58e1SMarko Zec 		return (FLM_ERROR);
128519bd24caSMarko Zec 	}
12862aca58e1SMarko Zec 
12872aca58e1SMarko Zec 	if (new_dxr->d == NULL) {
12882aca58e1SMarko Zec 		dxr_destroy(new_dxr);
12892aca58e1SMarko Zec 		return (FLM_REBUILD);
12902aca58e1SMarko Zec 	}
12912aca58e1SMarko Zec 
1292e7abe200SMarko Zec 	new_dp.f = choose_lookup_fn(da);
12932aca58e1SMarko Zec 	new_dp.arg = new_dxr;
12942aca58e1SMarko Zec 	if (fib_set_datapath_ptr(dxr->fd, &new_dp)) {
12952aca58e1SMarko Zec 		fib_set_algo_ptr(dxr->fd, new_dxr);
12962aca58e1SMarko Zec 		fib_epoch_call(epoch_dxr_destroy, &dxr->epoch_ctx);
12972aca58e1SMarko Zec 		return (FLM_SUCCESS);
12982aca58e1SMarko Zec 	}
12992aca58e1SMarko Zec 
1300ed541e20SMarko Zec 	FIB_PRINTF(LOG_NOTICE, dxr->fd, "fib_set_datapath_ptr() failed");
13012aca58e1SMarko Zec 	dxr_destroy(new_dxr);
13022aca58e1SMarko Zec 	return (FLM_REBUILD);
13032aca58e1SMarko Zec }
13042aca58e1SMarko Zec 
13052aca58e1SMarko Zec static uint8_t
13062aca58e1SMarko Zec dxr_get_pref(const struct rib_rtable_info *rinfo)
13072aca58e1SMarko Zec {
13082aca58e1SMarko Zec 
13092aca58e1SMarko Zec 	/* Below bsearch4 up to 10 prefixes. Always supersedes dpdk_lpm4. */
13102aca58e1SMarko Zec 	return (251);
13112aca58e1SMarko Zec }
13122aca58e1SMarko Zec 
1313e7abe200SMarko Zec SYSCTL_DECL(_net_route_algo);
1314e7abe200SMarko Zec SYSCTL_NODE(_net_route_algo, OID_AUTO, dxr, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1315e7abe200SMarko Zec     "DXR tunables");
1316e7abe200SMarko Zec 
1317e7abe200SMarko Zec static int
1318e7abe200SMarko Zec sysctl_dxr_frag_limit(SYSCTL_HANDLER_ARGS)
1319e7abe200SMarko Zec {
1320e7abe200SMarko Zec 	char buf[8];
1321e7abe200SMarko Zec 	int error, new, i;
1322e7abe200SMarko Zec 
1323e7abe200SMarko Zec 	snprintf(buf, sizeof(buf), "%d.%02d%%", V_frag_limit / 100,
1324e7abe200SMarko Zec 	    V_frag_limit % 100);
1325e7abe200SMarko Zec 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
1326e7abe200SMarko Zec 	if (error != 0 || req->newptr == NULL)
1327e7abe200SMarko Zec 		return (error);
1328e7abe200SMarko Zec 	if (!isdigit(*buf) && *buf != '.')
1329e7abe200SMarko Zec 		return (EINVAL);
1330e7abe200SMarko Zec 	for (i = 0, new = 0; isdigit(buf[i]) && i < sizeof(buf); i++)
1331e7abe200SMarko Zec 		new = new * 10 + buf[i] - '0';
1332e7abe200SMarko Zec 	new *= 100;
1333e7abe200SMarko Zec 	if (buf[i++] == '.') {
1334e7abe200SMarko Zec 		if (!isdigit(buf[i]))
1335e7abe200SMarko Zec 			return (EINVAL);
1336e7abe200SMarko Zec 		new += (buf[i++] - '0') * 10;
1337e7abe200SMarko Zec 		if (isdigit(buf[i]))
1338e7abe200SMarko Zec 			new += buf[i++] - '0';
1339e7abe200SMarko Zec 	}
1340e7abe200SMarko Zec 	if (new > 1000)
1341e7abe200SMarko Zec 		return (EINVAL);
1342e7abe200SMarko Zec 	V_frag_limit = new;
1343e7abe200SMarko Zec 	snprintf(buf, sizeof(buf), "%d.%02d%%", V_frag_limit / 100,
1344e7abe200SMarko Zec 	    V_frag_limit % 100);
1345e7abe200SMarko Zec 	return (0);
1346e7abe200SMarko Zec }
1347e7abe200SMarko Zec 
1348e7abe200SMarko Zec SYSCTL_PROC(_net_route_algo_dxr, OID_AUTO, frag_limit,
1349e7abe200SMarko Zec     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_VNET,
1350e7abe200SMarko Zec     0, 0, sysctl_dxr_frag_limit, "A",
1351e7abe200SMarko Zec     "Fragmentation threshold to full rebuild");
1352e7abe200SMarko Zec 
13532aca58e1SMarko Zec static struct fib_lookup_module fib_dxr_mod = {
13542aca58e1SMarko Zec 	.flm_name = "dxr",
13552aca58e1SMarko Zec 	.flm_family = AF_INET,
13562aca58e1SMarko Zec 	.flm_init_cb = dxr_init,
13572aca58e1SMarko Zec 	.flm_destroy_cb = dxr_destroy,
13582aca58e1SMarko Zec 	.flm_dump_rib_item_cb = dxr_dump_rib_item,
13592aca58e1SMarko Zec 	.flm_dump_end_cb = dxr_dump_end,
13602aca58e1SMarko Zec 	.flm_change_rib_item_cb = dxr_change_rib_item,
13612aca58e1SMarko Zec 	.flm_change_rib_items_cb = dxr_change_rib_batch,
13622aca58e1SMarko Zec 	.flm_get_pref = dxr_get_pref,
13632aca58e1SMarko Zec };
13642aca58e1SMarko Zec 
13652aca58e1SMarko Zec static int
13662aca58e1SMarko Zec dxr_modevent(module_t mod, int type, void *unused)
13672aca58e1SMarko Zec {
13682aca58e1SMarko Zec 	int error;
13692aca58e1SMarko Zec 
13702aca58e1SMarko Zec 	switch (type) {
13712aca58e1SMarko Zec 	case MOD_LOAD:
13722aca58e1SMarko Zec 		chunk_zone = uma_zcreate("dxr chunk", sizeof(struct chunk_desc),
13732aca58e1SMarko Zec 		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
13742aca58e1SMarko Zec 		trie_zone = uma_zcreate("dxr trie", sizeof(struct trie_desc),
13752aca58e1SMarko Zec 		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
13762aca58e1SMarko Zec 		fib_module_register(&fib_dxr_mod);
13772aca58e1SMarko Zec 		return(0);
13782aca58e1SMarko Zec 	case MOD_UNLOAD:
13792aca58e1SMarko Zec 		error = fib_module_unregister(&fib_dxr_mod);
13802aca58e1SMarko Zec 		if (error)
13812aca58e1SMarko Zec 			return (error);
13822aca58e1SMarko Zec 		uma_zdestroy(chunk_zone);
13832aca58e1SMarko Zec 		uma_zdestroy(trie_zone);
13842aca58e1SMarko Zec 		return(0);
13852aca58e1SMarko Zec 	default:
13862aca58e1SMarko Zec 		return(EOPNOTSUPP);
13872aca58e1SMarko Zec 	}
13882aca58e1SMarko Zec }
13892aca58e1SMarko Zec 
13902aca58e1SMarko Zec static moduledata_t dxr_mod = {"fib_dxr", dxr_modevent, 0};
13912aca58e1SMarko Zec 
13922aca58e1SMarko Zec DECLARE_MODULE(fib_dxr, dxr_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
13932aca58e1SMarko Zec MODULE_VERSION(fib_dxr, 1);
1394