12aca58e1SMarko Zec /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 32aca58e1SMarko Zec * 4*42b3c16eSMarko Zec * Copyright (c) 2012-2024 Marko Zec 52aca58e1SMarko Zec * Copyright (c) 2005, 2018 University of Zagreb 62aca58e1SMarko Zec * Copyright (c) 2005 International Computer Science Institute 72aca58e1SMarko Zec * 82aca58e1SMarko Zec * Redistribution and use in source and binary forms, with or without 92aca58e1SMarko Zec * modification, are permitted provided that the following conditions 102aca58e1SMarko Zec * are met: 112aca58e1SMarko Zec * 1. Redistributions of source code must retain the above copyright 122aca58e1SMarko Zec * notice, this list of conditions and the following disclaimer. 132aca58e1SMarko Zec * 2. Redistributions in binary form must reproduce the above copyright 142aca58e1SMarko Zec * notice, this list of conditions and the following disclaimer in the 152aca58e1SMarko Zec * documentation and/or other materials provided with the distribution. 162aca58e1SMarko Zec * 172aca58e1SMarko Zec * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 182aca58e1SMarko Zec * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 192aca58e1SMarko Zec * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 202aca58e1SMarko Zec * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 212aca58e1SMarko Zec * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 222aca58e1SMarko Zec * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 232aca58e1SMarko Zec * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 242aca58e1SMarko Zec * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 252aca58e1SMarko Zec * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 262aca58e1SMarko Zec * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 272aca58e1SMarko Zec * SUCH DAMAGE. 282aca58e1SMarko Zec */ 292aca58e1SMarko Zec 302aca58e1SMarko Zec /* 312aca58e1SMarko Zec * An implementation of DXR, a simple IPv4 LPM scheme with compact lookup 322aca58e1SMarko Zec * structures and a trivial search procedure. More significant bits of 332aca58e1SMarko Zec * the search key are used to directly index a two-stage trie, while the 342aca58e1SMarko Zec * remaining bits are used for finding the next hop in a sorted array. 352aca58e1SMarko Zec * More details in: 362aca58e1SMarko Zec * 372aca58e1SMarko Zec * M. Zec, L. Rizzo, M. Mikuc, DXR: towards a billion routing lookups per 382aca58e1SMarko Zec * second in software, ACM SIGCOMM Computer Communication Review, September 392aca58e1SMarko Zec * 2012 402aca58e1SMarko Zec * 412aca58e1SMarko Zec * M. Zec, M. Mikuc, Pushing the envelope: beyond two billion IP routing 422aca58e1SMarko Zec * lookups per second on commodity CPUs, IEEE SoftCOM, September 2017, Split 432aca58e1SMarko Zec */ 442aca58e1SMarko Zec 452aca58e1SMarko Zec #include <sys/cdefs.h> 462aca58e1SMarko Zec #include "opt_inet.h" 472aca58e1SMarko Zec 482aca58e1SMarko Zec #include <sys/param.h> 492aca58e1SMarko Zec #include <sys/kernel.h> 501549575fSMarko Zec #include <sys/ctype.h> 512aca58e1SMarko Zec #include <sys/epoch.h> 522aca58e1SMarko Zec #include <sys/malloc.h> 532aca58e1SMarko Zec #include <sys/module.h> 542aca58e1SMarko Zec #include <sys/socket.h> 552aca58e1SMarko Zec #include <sys/sysctl.h> 562aca58e1SMarko Zec #include <sys/syslog.h> 572aca58e1SMarko Zec 582aca58e1SMarko Zec #include <vm/uma.h> 592aca58e1SMarko Zec 602aca58e1SMarko Zec #include <netinet/in.h> 612aca58e1SMarko Zec #include <netinet/in_fib.h> 622aca58e1SMarko Zec 632aca58e1SMarko Zec #include <net/route.h> 642aca58e1SMarko Zec #include <net/route/route_ctl.h> 652aca58e1SMarko Zec #include <net/route/fib_algo.h> 662aca58e1SMarko Zec 672aca58e1SMarko Zec #define DXR_TRIE_BITS 20 682aca58e1SMarko Zec 692aca58e1SMarko Zec CTASSERT(DXR_TRIE_BITS >= 16 && DXR_TRIE_BITS <= 24); 702aca58e1SMarko Zec 712aca58e1SMarko Zec #if DXR_TRIE_BITS > 16 722aca58e1SMarko Zec #define DXR_D 16 732aca58e1SMarko Zec #else 742aca58e1SMarko Zec #define DXR_D (DXR_TRIE_BITS - 1) 752aca58e1SMarko Zec #endif 762aca58e1SMarko Zec 772aca58e1SMarko Zec #define D_TBL_SIZE (1 << DXR_D) 782aca58e1SMarko Zec #define DIRECT_TBL_SIZE (1 << DXR_TRIE_BITS) 792aca58e1SMarko Zec #define DXR_RANGE_MASK (0xffffffffU >> DXR_TRIE_BITS) 802aca58e1SMarko Zec #define DXR_RANGE_SHIFT (32 - DXR_TRIE_BITS) 812aca58e1SMarko Zec 822aca58e1SMarko Zec #define DESC_BASE_BITS 22 832aca58e1SMarko Zec #define DESC_FRAGMENTS_BITS (32 - DESC_BASE_BITS) 842aca58e1SMarko Zec #define BASE_MAX ((1 << DESC_BASE_BITS) - 1) 852aca58e1SMarko Zec #define RTBL_SIZE_INCR (BASE_MAX / 64) 862aca58e1SMarko Zec 872aca58e1SMarko Zec #if DXR_TRIE_BITS < 24 882aca58e1SMarko Zec #define FRAGS_MASK_SHORT ((1 << (23 - DXR_TRIE_BITS)) - 1) 892aca58e1SMarko Zec #else 902aca58e1SMarko Zec #define FRAGS_MASK_SHORT 0 912aca58e1SMarko Zec #endif 922aca58e1SMarko Zec #define FRAGS_PREF_SHORT (((1 << DESC_FRAGMENTS_BITS) - 1) & \ 932aca58e1SMarko Zec ~FRAGS_MASK_SHORT) 942aca58e1SMarko Zec #define FRAGS_MARK_XL (FRAGS_PREF_SHORT - 1) 952aca58e1SMarko Zec #define FRAGS_MARK_HIT (FRAGS_PREF_SHORT - 2) 962aca58e1SMarko Zec 972aca58e1SMarko Zec #define IS_SHORT_FORMAT(x) ((x & FRAGS_PREF_SHORT) == FRAGS_PREF_SHORT) 982aca58e1SMarko Zec #define IS_LONG_FORMAT(x) ((x & FRAGS_PREF_SHORT) != FRAGS_PREF_SHORT) 992aca58e1SMarko Zec #define IS_XL_FORMAT(x) (x == FRAGS_MARK_XL) 1002aca58e1SMarko Zec 1012aca58e1SMarko Zec #define RE_SHORT_MAX_NH ((1 << (DXR_TRIE_BITS - 8)) - 1) 1022aca58e1SMarko Zec 1032aca58e1SMarko Zec #define CHUNK_HASH_BITS 16 1042aca58e1SMarko Zec #define CHUNK_HASH_SIZE (1 << CHUNK_HASH_BITS) 1052aca58e1SMarko Zec #define CHUNK_HASH_MASK (CHUNK_HASH_SIZE - 1) 1062aca58e1SMarko Zec 1072aca58e1SMarko Zec #define TRIE_HASH_BITS 16 1082aca58e1SMarko Zec #define TRIE_HASH_SIZE (1 << TRIE_HASH_BITS) 1092aca58e1SMarko Zec #define TRIE_HASH_MASK (TRIE_HASH_SIZE - 1) 1102aca58e1SMarko Zec 1112aca58e1SMarko Zec #define XTBL_SIZE_INCR (DIRECT_TBL_SIZE / 16) 1122aca58e1SMarko Zec 11343880c51SMarko Zec #define UNUSED_BUCKETS 8 11443880c51SMarko Zec 1152aca58e1SMarko Zec /* Lookup structure elements */ 1162aca58e1SMarko Zec 1172aca58e1SMarko Zec struct direct_entry { 1182aca58e1SMarko Zec uint32_t fragments: DESC_FRAGMENTS_BITS, 1192aca58e1SMarko Zec base: DESC_BASE_BITS; 1202aca58e1SMarko Zec }; 1212aca58e1SMarko Zec 1222aca58e1SMarko Zec struct range_entry_long { 1232aca58e1SMarko Zec uint32_t start: DXR_RANGE_SHIFT, 1242aca58e1SMarko Zec nexthop: DXR_TRIE_BITS; 1252aca58e1SMarko Zec }; 1262aca58e1SMarko Zec 1272aca58e1SMarko Zec #if DXR_TRIE_BITS < 24 1282aca58e1SMarko Zec struct range_entry_short { 1292aca58e1SMarko Zec uint16_t start: DXR_RANGE_SHIFT - 8, 1302aca58e1SMarko Zec nexthop: DXR_TRIE_BITS - 8; 1312aca58e1SMarko Zec }; 1322aca58e1SMarko Zec #endif 1332aca58e1SMarko Zec 1342aca58e1SMarko Zec /* Auxiliary structures */ 1352aca58e1SMarko Zec 1362aca58e1SMarko Zec struct heap_entry { 1372aca58e1SMarko Zec uint32_t start; 1382aca58e1SMarko Zec uint32_t end; 1392aca58e1SMarko Zec uint32_t preflen; 1402aca58e1SMarko Zec uint32_t nexthop; 1412aca58e1SMarko Zec }; 1422aca58e1SMarko Zec 1432aca58e1SMarko Zec struct chunk_desc { 1442aca58e1SMarko Zec LIST_ENTRY(chunk_desc) cd_all_le; 1452aca58e1SMarko Zec LIST_ENTRY(chunk_desc) cd_hash_le; 1462aca58e1SMarko Zec uint32_t cd_hash; 1472aca58e1SMarko Zec uint32_t cd_refcnt; 1482aca58e1SMarko Zec uint32_t cd_base; 1492aca58e1SMarko Zec uint32_t cd_cur_size; 1502aca58e1SMarko Zec uint32_t cd_max_size; 1512aca58e1SMarko Zec }; 1522aca58e1SMarko Zec 1532aca58e1SMarko Zec struct trie_desc { 1542aca58e1SMarko Zec LIST_ENTRY(trie_desc) td_all_le; 1552aca58e1SMarko Zec LIST_ENTRY(trie_desc) td_hash_le; 1562aca58e1SMarko Zec uint32_t td_hash; 1572aca58e1SMarko Zec uint32_t td_index; 1582aca58e1SMarko Zec uint32_t td_refcnt; 1592aca58e1SMarko Zec }; 1602aca58e1SMarko Zec 1612aca58e1SMarko Zec struct dxr_aux { 1622aca58e1SMarko Zec /* Glue to external state */ 1632aca58e1SMarko Zec struct fib_data *fd; 1642aca58e1SMarko Zec uint32_t fibnum; 1652aca58e1SMarko Zec int refcnt; 1662aca58e1SMarko Zec 1672aca58e1SMarko Zec /* Auxiliary build-time tables */ 1682aca58e1SMarko Zec struct direct_entry direct_tbl[DIRECT_TBL_SIZE]; 1692aca58e1SMarko Zec uint16_t d_tbl[D_TBL_SIZE]; 1702aca58e1SMarko Zec struct direct_entry *x_tbl; 1712aca58e1SMarko Zec union { 1722aca58e1SMarko Zec struct range_entry_long re; 1732aca58e1SMarko Zec uint32_t fragments; 1742aca58e1SMarko Zec } *range_tbl; 1752aca58e1SMarko Zec 1762aca58e1SMarko Zec /* Auxiliary internal state */ 1772aca58e1SMarko Zec uint32_t updates_mask[DIRECT_TBL_SIZE / 32]; 1782aca58e1SMarko Zec struct trie_desc *trietbl[D_TBL_SIZE]; 1792aca58e1SMarko Zec LIST_HEAD(, chunk_desc) chunk_hashtbl[CHUNK_HASH_SIZE]; 1802aca58e1SMarko Zec LIST_HEAD(, chunk_desc) all_chunks; 18143880c51SMarko Zec LIST_HEAD(, chunk_desc) unused_chunks[UNUSED_BUCKETS]; 1822aca58e1SMarko Zec LIST_HEAD(, trie_desc) trie_hashtbl[TRIE_HASH_SIZE]; 1832aca58e1SMarko Zec LIST_HEAD(, trie_desc) all_trie; 1842aca58e1SMarko Zec LIST_HEAD(, trie_desc) unused_trie; /* abuses hash link entry */ 1852aca58e1SMarko Zec struct sockaddr_in dst; 1862aca58e1SMarko Zec struct sockaddr_in mask; 1872aca58e1SMarko Zec struct heap_entry heap[33]; 1882aca58e1SMarko Zec uint32_t prefixes; 1892aca58e1SMarko Zec uint32_t updates_low; 1902aca58e1SMarko Zec uint32_t updates_high; 1911549575fSMarko Zec uint32_t unused_chunks_size; 1922aca58e1SMarko Zec uint32_t xtbl_size; 1932aca58e1SMarko Zec uint32_t all_trie_cnt; 1942aca58e1SMarko Zec uint32_t unused_trie_cnt; 1952aca58e1SMarko Zec uint32_t trie_rebuilt_prefixes; 1962aca58e1SMarko Zec uint32_t heap_index; 1972aca58e1SMarko Zec uint32_t d_bits; 1982aca58e1SMarko Zec uint32_t rtbl_size; 1992aca58e1SMarko Zec uint32_t rtbl_top; 2002aca58e1SMarko Zec uint32_t rtbl_work_frags; 2012aca58e1SMarko Zec uint32_t work_chunk; 2022aca58e1SMarko Zec }; 2032aca58e1SMarko Zec 2042aca58e1SMarko Zec /* Main lookup structure container */ 2052aca58e1SMarko Zec 2062aca58e1SMarko Zec struct dxr { 2072aca58e1SMarko Zec /* Lookup tables */ 2082aca58e1SMarko Zec void *d; 2092aca58e1SMarko Zec void *x; 2102aca58e1SMarko Zec void *r; 2112aca58e1SMarko Zec struct nhop_object **nh_tbl; 2122aca58e1SMarko Zec 2132aca58e1SMarko Zec /* Glue to external state */ 2142aca58e1SMarko Zec struct dxr_aux *aux; 2152aca58e1SMarko Zec struct fib_data *fd; 2162aca58e1SMarko Zec struct epoch_context epoch_ctx; 2172aca58e1SMarko Zec uint32_t fibnum; 218e7abe200SMarko Zec uint16_t d_shift; 219e7abe200SMarko Zec uint16_t x_shift; 220e7abe200SMarko Zec uint32_t x_mask; 2212aca58e1SMarko Zec }; 2222aca58e1SMarko Zec 2232aca58e1SMarko Zec static MALLOC_DEFINE(M_DXRLPM, "dxr", "DXR LPM"); 2242aca58e1SMarko Zec static MALLOC_DEFINE(M_DXRAUX, "dxr aux", "DXR auxiliary"); 2252aca58e1SMarko Zec 2262aca58e1SMarko Zec uma_zone_t chunk_zone; 2272aca58e1SMarko Zec uma_zone_t trie_zone; 2282aca58e1SMarko Zec 2291549575fSMarko Zec VNET_DEFINE_STATIC(int, frag_limit) = 100; 2301549575fSMarko Zec #define V_frag_limit VNET(frag_limit) 2311549575fSMarko Zec 2322aca58e1SMarko Zec /* Binary search for a matching address range */ 2332aca58e1SMarko Zec #define DXR_LOOKUP_STAGE \ 2342aca58e1SMarko Zec if (masked_dst < range[middle].start) { \ 2352aca58e1SMarko Zec upperbound = middle; \ 2362aca58e1SMarko Zec middle = (middle + lowerbound) / 2; \ 2372aca58e1SMarko Zec } else if (masked_dst < range[middle + 1].start) \ 2382aca58e1SMarko Zec return (range[middle].nexthop); \ 2392aca58e1SMarko Zec else { \ 2402aca58e1SMarko Zec lowerbound = middle + 1; \ 2412aca58e1SMarko Zec middle = (upperbound + middle + 1) / 2; \ 2422aca58e1SMarko Zec } \ 2432aca58e1SMarko Zec if (upperbound == lowerbound) \ 2442aca58e1SMarko Zec return (range[lowerbound].nexthop); 2452aca58e1SMarko Zec 2462aca58e1SMarko Zec static int 247e7abe200SMarko Zec range_lookup(struct range_entry_long *rt, struct direct_entry de, uint32_t dst) 2482aca58e1SMarko Zec { 2492aca58e1SMarko Zec uint32_t base; 2502aca58e1SMarko Zec uint32_t upperbound; 2512aca58e1SMarko Zec uint32_t middle; 2522aca58e1SMarko Zec uint32_t lowerbound; 2532aca58e1SMarko Zec uint32_t masked_dst; 2542aca58e1SMarko Zec 2552aca58e1SMarko Zec base = de.base; 2562aca58e1SMarko Zec lowerbound = 0; 2572aca58e1SMarko Zec masked_dst = dst & DXR_RANGE_MASK; 2582aca58e1SMarko Zec 2592aca58e1SMarko Zec #if DXR_TRIE_BITS < 24 2602aca58e1SMarko Zec if (__predict_true(IS_SHORT_FORMAT(de.fragments))) { 2612aca58e1SMarko Zec upperbound = de.fragments & FRAGS_MASK_SHORT; 2622aca58e1SMarko Zec struct range_entry_short *range = 2632aca58e1SMarko Zec (struct range_entry_short *) &rt[base]; 2642aca58e1SMarko Zec 2652aca58e1SMarko Zec masked_dst >>= 8; 2662aca58e1SMarko Zec middle = upperbound; 2672aca58e1SMarko Zec upperbound = upperbound * 2 + 1; 2682aca58e1SMarko Zec 2692aca58e1SMarko Zec for (;;) { 2702aca58e1SMarko Zec DXR_LOOKUP_STAGE 2712aca58e1SMarko Zec DXR_LOOKUP_STAGE 2722aca58e1SMarko Zec } 2732aca58e1SMarko Zec } 2742aca58e1SMarko Zec #endif 2752aca58e1SMarko Zec 2762aca58e1SMarko Zec upperbound = de.fragments; 2772aca58e1SMarko Zec middle = upperbound / 2; 2782aca58e1SMarko Zec struct range_entry_long *range = &rt[base]; 2792aca58e1SMarko Zec if (__predict_false(IS_XL_FORMAT(de.fragments))) { 2802aca58e1SMarko Zec upperbound = *((uint32_t *) range); 2812aca58e1SMarko Zec range++; 2822aca58e1SMarko Zec middle = upperbound / 2; 2832aca58e1SMarko Zec } 2842aca58e1SMarko Zec 2852aca58e1SMarko Zec for (;;) { 2862aca58e1SMarko Zec DXR_LOOKUP_STAGE 2872aca58e1SMarko Zec DXR_LOOKUP_STAGE 2882aca58e1SMarko Zec } 2892aca58e1SMarko Zec } 2902aca58e1SMarko Zec 291e7abe200SMarko Zec #define DXR_LOOKUP_DEFINE(D) \ 292e7abe200SMarko Zec static int inline \ 293e7abe200SMarko Zec dxr_lookup_##D(struct dxr *dxr, uint32_t dst) \ 294e7abe200SMarko Zec { \ 295e7abe200SMarko Zec struct direct_entry de; \ 296e7abe200SMarko Zec uint16_t *dt = dxr->d; \ 297e7abe200SMarko Zec struct direct_entry *xt = dxr->x; \ 298e7abe200SMarko Zec \ 299e7abe200SMarko Zec de = xt[(dt[dst >> (32 - (D))] << (DXR_TRIE_BITS - (D))) \ 300e7abe200SMarko Zec + ((dst >> DXR_RANGE_SHIFT) & \ 301e7abe200SMarko Zec (0xffffffffU >> (32 - DXR_TRIE_BITS + (D))))]; \ 302e7abe200SMarko Zec if (__predict_true(de.fragments == FRAGS_MARK_HIT)) \ 303e7abe200SMarko Zec return (de.base); \ 304e7abe200SMarko Zec return (range_lookup(dxr->r, de, dst)); \ 305e7abe200SMarko Zec } \ 306e7abe200SMarko Zec \ 307e7abe200SMarko Zec static struct nhop_object * \ 308e7abe200SMarko Zec dxr_fib_lookup_##D(void *algo_data, \ 309e7abe200SMarko Zec const struct flm_lookup_key key, uint32_t scopeid __unused) \ 310e7abe200SMarko Zec { \ 311e7abe200SMarko Zec struct dxr *dxr = algo_data; \ 312e7abe200SMarko Zec \ 313e7abe200SMarko Zec return (dxr->nh_tbl[dxr_lookup_##D(dxr, \ 314e7abe200SMarko Zec ntohl(key.addr4.s_addr))]); \ 315e7abe200SMarko Zec } 316e7abe200SMarko Zec 317e7abe200SMarko Zec #if DXR_TRIE_BITS > 16 318e7abe200SMarko Zec DXR_LOOKUP_DEFINE(16) 319e7abe200SMarko Zec #endif 320e7abe200SMarko Zec DXR_LOOKUP_DEFINE(15) 321e7abe200SMarko Zec DXR_LOOKUP_DEFINE(14) 322e7abe200SMarko Zec DXR_LOOKUP_DEFINE(13) 323e7abe200SMarko Zec DXR_LOOKUP_DEFINE(12) 324e7abe200SMarko Zec DXR_LOOKUP_DEFINE(11) 325e7abe200SMarko Zec DXR_LOOKUP_DEFINE(10) 326e7abe200SMarko Zec DXR_LOOKUP_DEFINE(9) 327e7abe200SMarko Zec 328e7abe200SMarko Zec static int inline 329e7abe200SMarko Zec dxr_lookup(struct dxr *dxr, uint32_t dst) 330e7abe200SMarko Zec { 331e7abe200SMarko Zec struct direct_entry de; 332e7abe200SMarko Zec uint16_t *dt = dxr->d; 333e7abe200SMarko Zec struct direct_entry *xt = dxr->x; 334e7abe200SMarko Zec 335e7abe200SMarko Zec de = xt[(dt[dst >> dxr->d_shift] << dxr->x_shift) + 336e7abe200SMarko Zec ((dst >> DXR_RANGE_SHIFT) & dxr->x_mask)]; 337e7abe200SMarko Zec if (__predict_true(de.fragments == FRAGS_MARK_HIT)) 338e7abe200SMarko Zec return (de.base); 339e7abe200SMarko Zec return (range_lookup(dxr->r, de, dst)); 340e7abe200SMarko Zec } 341e7abe200SMarko Zec 3422aca58e1SMarko Zec static void 3432aca58e1SMarko Zec initheap(struct dxr_aux *da, uint32_t dst_u32, uint32_t chunk) 3442aca58e1SMarko Zec { 3452aca58e1SMarko Zec struct heap_entry *fhp = &da->heap[0]; 3462aca58e1SMarko Zec struct rtentry *rt; 3472aca58e1SMarko Zec struct route_nhop_data rnd; 3482aca58e1SMarko Zec 3492aca58e1SMarko Zec da->heap_index = 0; 3502aca58e1SMarko Zec da->dst.sin_addr.s_addr = htonl(dst_u32); 3512aca58e1SMarko Zec rt = fib4_lookup_rt(da->fibnum, da->dst.sin_addr, 0, NHR_UNLOCKED, 3522aca58e1SMarko Zec &rnd); 3532aca58e1SMarko Zec if (rt != NULL) { 3542aca58e1SMarko Zec struct in_addr addr; 3552aca58e1SMarko Zec uint32_t scopeid; 3562aca58e1SMarko Zec 3572aca58e1SMarko Zec rt_get_inet_prefix_plen(rt, &addr, &fhp->preflen, &scopeid); 3582aca58e1SMarko Zec fhp->start = ntohl(addr.s_addr); 3592aca58e1SMarko Zec fhp->end = fhp->start; 3602aca58e1SMarko Zec if (fhp->preflen < 32) 3612aca58e1SMarko Zec fhp->end |= (0xffffffffU >> fhp->preflen); 3622aca58e1SMarko Zec fhp->nexthop = fib_get_nhop_idx(da->fd, rnd.rnd_nhop); 3632aca58e1SMarko Zec } else { 3642aca58e1SMarko Zec fhp->preflen = fhp->nexthop = fhp->start = 0; 3652aca58e1SMarko Zec fhp->end = 0xffffffffU; 3662aca58e1SMarko Zec } 3672aca58e1SMarko Zec } 3682aca58e1SMarko Zec 3692aca58e1SMarko Zec static uint32_t 3702aca58e1SMarko Zec chunk_size(struct dxr_aux *da, struct direct_entry *fdesc) 3712aca58e1SMarko Zec { 3722aca58e1SMarko Zec 3732aca58e1SMarko Zec if (IS_SHORT_FORMAT(fdesc->fragments)) 3742aca58e1SMarko Zec return ((fdesc->fragments & FRAGS_MASK_SHORT) + 1); 3752aca58e1SMarko Zec else if (IS_XL_FORMAT(fdesc->fragments)) 3762aca58e1SMarko Zec return (da->range_tbl[fdesc->base].fragments + 2); 3772aca58e1SMarko Zec else /* if (IS_LONG_FORMAT(fdesc->fragments)) */ 3782aca58e1SMarko Zec return (fdesc->fragments + 1); 3792aca58e1SMarko Zec } 3802aca58e1SMarko Zec 3812aca58e1SMarko Zec static uint32_t 3822aca58e1SMarko Zec chunk_hash(struct dxr_aux *da, struct direct_entry *fdesc) 3832aca58e1SMarko Zec { 3842aca58e1SMarko Zec uint32_t size = chunk_size(da, fdesc); 3852aca58e1SMarko Zec uint32_t *p = (uint32_t *) &da->range_tbl[fdesc->base]; 3862aca58e1SMarko Zec uint32_t *l = (uint32_t *) &da->range_tbl[fdesc->base + size]; 3872aca58e1SMarko Zec uint32_t hash = fdesc->fragments; 3882aca58e1SMarko Zec 3892aca58e1SMarko Zec for (; p < l; p++) 3902aca58e1SMarko Zec hash = (hash << 7) + (hash >> 13) + *p; 3912aca58e1SMarko Zec 3922aca58e1SMarko Zec return (hash + (hash >> 16)); 3932aca58e1SMarko Zec } 3942aca58e1SMarko Zec 3952aca58e1SMarko Zec static int 3962aca58e1SMarko Zec chunk_ref(struct dxr_aux *da, uint32_t chunk) 3972aca58e1SMarko Zec { 3982aca58e1SMarko Zec struct direct_entry *fdesc = &da->direct_tbl[chunk]; 3992aca58e1SMarko Zec struct chunk_desc *cdp, *empty_cdp; 4002aca58e1SMarko Zec uint32_t base = fdesc->base; 4012aca58e1SMarko Zec uint32_t size = chunk_size(da, fdesc); 4022aca58e1SMarko Zec uint32_t hash = chunk_hash(da, fdesc); 40343880c51SMarko Zec int i; 4042aca58e1SMarko Zec 4052aca58e1SMarko Zec /* Find an existing descriptor */ 4062aca58e1SMarko Zec LIST_FOREACH(cdp, &da->chunk_hashtbl[hash & CHUNK_HASH_MASK], 4072aca58e1SMarko Zec cd_hash_le) { 4082aca58e1SMarko Zec if (cdp->cd_hash != hash || cdp->cd_cur_size != size || 4092aca58e1SMarko Zec memcmp(&da->range_tbl[base], &da->range_tbl[cdp->cd_base], 4102aca58e1SMarko Zec sizeof(struct range_entry_long) * size)) 4112aca58e1SMarko Zec continue; 4122aca58e1SMarko Zec da->rtbl_top = fdesc->base; 4132aca58e1SMarko Zec fdesc->base = cdp->cd_base; 4142aca58e1SMarko Zec cdp->cd_refcnt++; 4152aca58e1SMarko Zec return (0); 4162aca58e1SMarko Zec } 4172aca58e1SMarko Zec 41843880c51SMarko Zec /* No matching chunks found. Find an empty one to recycle. */ 41943880c51SMarko Zec for (cdp = NULL, i = size; cdp == NULL && i < UNUSED_BUCKETS; i++) 42043880c51SMarko Zec cdp = LIST_FIRST(&da->unused_chunks[i]); 42143880c51SMarko Zec 42243880c51SMarko Zec if (cdp == NULL) 42343880c51SMarko Zec LIST_FOREACH(empty_cdp, &da->unused_chunks[0], cd_hash_le) 4242aca58e1SMarko Zec if (empty_cdp->cd_max_size >= size && (cdp == NULL || 4252aca58e1SMarko Zec empty_cdp->cd_max_size < cdp->cd_max_size)) { 4262aca58e1SMarko Zec cdp = empty_cdp; 4272aca58e1SMarko Zec if (empty_cdp->cd_max_size == size) 4282aca58e1SMarko Zec break; 4292aca58e1SMarko Zec } 4302aca58e1SMarko Zec 4312aca58e1SMarko Zec if (cdp != NULL) { 4322aca58e1SMarko Zec /* Copy from heap into the recycled chunk */ 4332aca58e1SMarko Zec bcopy(&da->range_tbl[fdesc->base], &da->range_tbl[cdp->cd_base], 4342aca58e1SMarko Zec size * sizeof(struct range_entry_long)); 4352aca58e1SMarko Zec fdesc->base = cdp->cd_base; 4362aca58e1SMarko Zec da->rtbl_top -= size; 4371549575fSMarko Zec da->unused_chunks_size -= cdp->cd_max_size; 4382ac039f7SMarko Zec if (cdp->cd_max_size > size) { 4392aca58e1SMarko Zec /* Split the range in two, need a new descriptor */ 4402aca58e1SMarko Zec empty_cdp = uma_zalloc(chunk_zone, M_NOWAIT); 4412aca58e1SMarko Zec if (empty_cdp == NULL) 4422aca58e1SMarko Zec return (1); 44343880c51SMarko Zec LIST_INSERT_BEFORE(cdp, empty_cdp, cd_all_le); 44443880c51SMarko Zec empty_cdp->cd_base = cdp->cd_base + size; 4452ac039f7SMarko Zec empty_cdp->cd_cur_size = 0; 4462aca58e1SMarko Zec empty_cdp->cd_max_size = cdp->cd_max_size - size; 44743880c51SMarko Zec 44843880c51SMarko Zec i = empty_cdp->cd_max_size; 44943880c51SMarko Zec if (i >= UNUSED_BUCKETS) 45043880c51SMarko Zec i = 0; 45143880c51SMarko Zec LIST_INSERT_HEAD(&da->unused_chunks[i], empty_cdp, 45243880c51SMarko Zec cd_hash_le); 45343880c51SMarko Zec 4541549575fSMarko Zec da->unused_chunks_size += empty_cdp->cd_max_size; 4552aca58e1SMarko Zec cdp->cd_max_size = size; 4562aca58e1SMarko Zec } 4572aca58e1SMarko Zec LIST_REMOVE(cdp, cd_hash_le); 4582aca58e1SMarko Zec } else { 4592ac039f7SMarko Zec /* Alloc a new descriptor at the top of the heap*/ 4602aca58e1SMarko Zec cdp = uma_zalloc(chunk_zone, M_NOWAIT); 4612aca58e1SMarko Zec if (cdp == NULL) 4622aca58e1SMarko Zec return (1); 4632aca58e1SMarko Zec cdp->cd_max_size = size; 4642aca58e1SMarko Zec cdp->cd_base = fdesc->base; 4652aca58e1SMarko Zec LIST_INSERT_HEAD(&da->all_chunks, cdp, cd_all_le); 4664aa275f1SMarko Zec MPASS(cdp->cd_base + cdp->cd_max_size == da->rtbl_top); 4672aca58e1SMarko Zec } 4682aca58e1SMarko Zec 4692aca58e1SMarko Zec cdp->cd_hash = hash; 4702aca58e1SMarko Zec cdp->cd_refcnt = 1; 4712aca58e1SMarko Zec cdp->cd_cur_size = size; 4722aca58e1SMarko Zec LIST_INSERT_HEAD(&da->chunk_hashtbl[hash & CHUNK_HASH_MASK], cdp, 4732aca58e1SMarko Zec cd_hash_le); 4742aca58e1SMarko Zec if (da->rtbl_top >= da->rtbl_size) { 4752aca58e1SMarko Zec if (da->rtbl_top >= BASE_MAX) { 4762aca58e1SMarko Zec FIB_PRINTF(LOG_ERR, da->fd, 4772aca58e1SMarko Zec "structural limit exceeded at %d " 4782aca58e1SMarko Zec "range table elements", da->rtbl_top); 4792aca58e1SMarko Zec return (1); 4802aca58e1SMarko Zec } 4812aca58e1SMarko Zec da->rtbl_size += RTBL_SIZE_INCR; 4821549575fSMarko Zec i = (BASE_MAX - da->rtbl_top) * LOG_DEBUG / BASE_MAX; 4831549575fSMarko Zec FIB_PRINTF(i, da->fd, "range table at %d%% structural limit", 4842aca58e1SMarko Zec da->rtbl_top * 100 / BASE_MAX); 4852aca58e1SMarko Zec da->range_tbl = realloc(da->range_tbl, 4862aca58e1SMarko Zec sizeof(*da->range_tbl) * da->rtbl_size + FRAGS_PREF_SHORT, 4872aca58e1SMarko Zec M_DXRAUX, M_NOWAIT); 488ed541e20SMarko Zec if (da->range_tbl == NULL) { 489ed541e20SMarko Zec FIB_PRINTF(LOG_NOTICE, da->fd, 490ed541e20SMarko Zec "Unable to allocate DXR range table"); 4912aca58e1SMarko Zec return (1); 4922aca58e1SMarko Zec } 493ed541e20SMarko Zec } 4942aca58e1SMarko Zec 4952aca58e1SMarko Zec return (0); 4962aca58e1SMarko Zec } 4972aca58e1SMarko Zec 4982aca58e1SMarko Zec static void 4992aca58e1SMarko Zec chunk_unref(struct dxr_aux *da, uint32_t chunk) 5002aca58e1SMarko Zec { 5012aca58e1SMarko Zec struct direct_entry *fdesc = &da->direct_tbl[chunk]; 5022ac039f7SMarko Zec struct chunk_desc *cdp, *cdp2; 5032aca58e1SMarko Zec uint32_t base = fdesc->base; 5042aca58e1SMarko Zec uint32_t size = chunk_size(da, fdesc); 5052aca58e1SMarko Zec uint32_t hash = chunk_hash(da, fdesc); 50643880c51SMarko Zec int i; 5072aca58e1SMarko Zec 5082ac039f7SMarko Zec /* Find the corresponding descriptor */ 5092aca58e1SMarko Zec LIST_FOREACH(cdp, &da->chunk_hashtbl[hash & CHUNK_HASH_MASK], 5102aca58e1SMarko Zec cd_hash_le) 5112aca58e1SMarko Zec if (cdp->cd_hash == hash && cdp->cd_cur_size == size && 5122aca58e1SMarko Zec memcmp(&da->range_tbl[base], &da->range_tbl[cdp->cd_base], 5132aca58e1SMarko Zec sizeof(struct range_entry_long) * size) == 0) 5142aca58e1SMarko Zec break; 5152aca58e1SMarko Zec 5164aa275f1SMarko Zec MPASS(cdp != NULL); 5172aca58e1SMarko Zec if (--cdp->cd_refcnt > 0) 5182aca58e1SMarko Zec return; 5192aca58e1SMarko Zec 5202aca58e1SMarko Zec LIST_REMOVE(cdp, cd_hash_le); 5211549575fSMarko Zec da->unused_chunks_size += cdp->cd_max_size; 5222ac039f7SMarko Zec cdp->cd_cur_size = 0; 5232ac039f7SMarko Zec 5242ac039f7SMarko Zec /* Attempt to merge with the preceding chunk, if empty */ 5252ac039f7SMarko Zec cdp2 = LIST_NEXT(cdp, cd_all_le); 5262ac039f7SMarko Zec if (cdp2 != NULL && cdp2->cd_cur_size == 0) { 5274aa275f1SMarko Zec MPASS(cdp2->cd_base + cdp2->cd_max_size == cdp->cd_base); 5282ac039f7SMarko Zec LIST_REMOVE(cdp, cd_all_le); 5292ac039f7SMarko Zec LIST_REMOVE(cdp2, cd_hash_le); 5302ac039f7SMarko Zec cdp2->cd_max_size += cdp->cd_max_size; 5312ac039f7SMarko Zec uma_zfree(chunk_zone, cdp); 5322ac039f7SMarko Zec cdp = cdp2; 5332aca58e1SMarko Zec } 5342aca58e1SMarko Zec 5352ac039f7SMarko Zec /* Attempt to merge with the subsequent chunk, if empty */ 5362ac039f7SMarko Zec cdp2 = LIST_PREV(cdp, &da->all_chunks, chunk_desc, cd_all_le); 5372ac039f7SMarko Zec if (cdp2 != NULL && cdp2->cd_cur_size == 0) { 5384aa275f1SMarko Zec MPASS(cdp->cd_base + cdp->cd_max_size == cdp2->cd_base); 5392ac039f7SMarko Zec LIST_REMOVE(cdp, cd_all_le); 5402ac039f7SMarko Zec LIST_REMOVE(cdp2, cd_hash_le); 5412ac039f7SMarko Zec cdp2->cd_max_size += cdp->cd_max_size; 5422ac039f7SMarko Zec cdp2->cd_base = cdp->cd_base; 5432ac039f7SMarko Zec uma_zfree(chunk_zone, cdp); 5442ac039f7SMarko Zec cdp = cdp2; 5452ac039f7SMarko Zec } 5462ac039f7SMarko Zec 5472ac039f7SMarko Zec if (cdp->cd_base + cdp->cd_max_size == da->rtbl_top) { 5482ac039f7SMarko Zec /* Free the chunk on the top of the range heap, trim the heap */ 5494aa275f1SMarko Zec MPASS(cdp == LIST_FIRST(&da->all_chunks)); 5502aca58e1SMarko Zec da->rtbl_top -= cdp->cd_max_size; 5511549575fSMarko Zec da->unused_chunks_size -= cdp->cd_max_size; 5522aca58e1SMarko Zec LIST_REMOVE(cdp, cd_all_le); 5532aca58e1SMarko Zec uma_zfree(chunk_zone, cdp); 5542ac039f7SMarko Zec return; 5552aca58e1SMarko Zec } 5562ac039f7SMarko Zec 55743880c51SMarko Zec i = cdp->cd_max_size; 55843880c51SMarko Zec if (i >= UNUSED_BUCKETS) 55943880c51SMarko Zec i = 0; 56043880c51SMarko Zec LIST_INSERT_HEAD(&da->unused_chunks[i], cdp, cd_hash_le); 5612aca58e1SMarko Zec } 5622aca58e1SMarko Zec 5632aca58e1SMarko Zec static uint32_t 5642aca58e1SMarko Zec trie_hash(struct dxr_aux *da, uint32_t dxr_x, uint32_t index) 5652aca58e1SMarko Zec { 5662aca58e1SMarko Zec uint32_t i, *val; 5672aca58e1SMarko Zec uint32_t hash = 0; 5682aca58e1SMarko Zec 5692aca58e1SMarko Zec for (i = 0; i < (1 << dxr_x); i++) { 5702aca58e1SMarko Zec hash = (hash << 3) ^ (hash >> 3); 5712aca58e1SMarko Zec val = (uint32_t *) 5722aca58e1SMarko Zec (void *) &da->direct_tbl[(index << dxr_x) + i]; 5732aca58e1SMarko Zec hash += (*val << 5); 5742aca58e1SMarko Zec hash += (*val >> 5); 5752aca58e1SMarko Zec } 5762aca58e1SMarko Zec 5772aca58e1SMarko Zec return (hash + (hash >> 16)); 5782aca58e1SMarko Zec } 5792aca58e1SMarko Zec 5802aca58e1SMarko Zec static int 5812aca58e1SMarko Zec trie_ref(struct dxr_aux *da, uint32_t index) 5822aca58e1SMarko Zec { 5832aca58e1SMarko Zec struct trie_desc *tp; 5842aca58e1SMarko Zec uint32_t dxr_d = da->d_bits; 5852aca58e1SMarko Zec uint32_t dxr_x = DXR_TRIE_BITS - dxr_d; 5862aca58e1SMarko Zec uint32_t hash = trie_hash(da, dxr_x, index); 5872aca58e1SMarko Zec 5882aca58e1SMarko Zec /* Find an existing descriptor */ 5892aca58e1SMarko Zec LIST_FOREACH(tp, &da->trie_hashtbl[hash & TRIE_HASH_MASK], td_hash_le) 5902aca58e1SMarko Zec if (tp->td_hash == hash && 5912aca58e1SMarko Zec memcmp(&da->direct_tbl[index << dxr_x], 5922aca58e1SMarko Zec &da->x_tbl[tp->td_index << dxr_x], 5932aca58e1SMarko Zec sizeof(*da->x_tbl) << dxr_x) == 0) { 5942aca58e1SMarko Zec tp->td_refcnt++; 5952aca58e1SMarko Zec da->trietbl[index] = tp; 5962aca58e1SMarko Zec return(tp->td_index); 5972aca58e1SMarko Zec } 5982aca58e1SMarko Zec 5992aca58e1SMarko Zec tp = LIST_FIRST(&da->unused_trie); 6002aca58e1SMarko Zec if (tp != NULL) { 6012aca58e1SMarko Zec LIST_REMOVE(tp, td_hash_le); 6022aca58e1SMarko Zec da->unused_trie_cnt--; 6032aca58e1SMarko Zec } else { 6042aca58e1SMarko Zec tp = uma_zalloc(trie_zone, M_NOWAIT); 6052aca58e1SMarko Zec if (tp == NULL) 6062aca58e1SMarko Zec return (-1); 6072aca58e1SMarko Zec LIST_INSERT_HEAD(&da->all_trie, tp, td_all_le); 6082aca58e1SMarko Zec tp->td_index = da->all_trie_cnt++; 6092aca58e1SMarko Zec } 6102aca58e1SMarko Zec 6112aca58e1SMarko Zec tp->td_hash = hash; 6122aca58e1SMarko Zec tp->td_refcnt = 1; 6132aca58e1SMarko Zec LIST_INSERT_HEAD(&da->trie_hashtbl[hash & TRIE_HASH_MASK], tp, 6142aca58e1SMarko Zec td_hash_le); 6152aca58e1SMarko Zec memcpy(&da->x_tbl[tp->td_index << dxr_x], 6162aca58e1SMarko Zec &da->direct_tbl[index << dxr_x], sizeof(*da->x_tbl) << dxr_x); 6172aca58e1SMarko Zec da->trietbl[index] = tp; 6182aca58e1SMarko Zec if (da->all_trie_cnt >= da->xtbl_size >> dxr_x) { 6192aca58e1SMarko Zec da->xtbl_size += XTBL_SIZE_INCR; 6202aca58e1SMarko Zec da->x_tbl = realloc(da->x_tbl, 6212aca58e1SMarko Zec sizeof(*da->x_tbl) * da->xtbl_size, M_DXRAUX, M_NOWAIT); 622ed541e20SMarko Zec if (da->x_tbl == NULL) { 623ed541e20SMarko Zec FIB_PRINTF(LOG_NOTICE, da->fd, 624ed541e20SMarko Zec "Unable to allocate DXR extension table"); 6252aca58e1SMarko Zec return (-1); 6262aca58e1SMarko Zec } 627ed541e20SMarko Zec } 6282aca58e1SMarko Zec return(tp->td_index); 6292aca58e1SMarko Zec } 6302aca58e1SMarko Zec 6312aca58e1SMarko Zec static void 6322aca58e1SMarko Zec trie_unref(struct dxr_aux *da, uint32_t index) 6332aca58e1SMarko Zec { 6342aca58e1SMarko Zec struct trie_desc *tp = da->trietbl[index]; 6352aca58e1SMarko Zec 6362aca58e1SMarko Zec if (tp == NULL) 6372aca58e1SMarko Zec return; 6382aca58e1SMarko Zec da->trietbl[index] = NULL; 6392aca58e1SMarko Zec if (--tp->td_refcnt > 0) 6402aca58e1SMarko Zec return; 6412aca58e1SMarko Zec 6422aca58e1SMarko Zec LIST_REMOVE(tp, td_hash_le); 6432aca58e1SMarko Zec da->unused_trie_cnt++; 6442aca58e1SMarko Zec if (tp->td_index != da->all_trie_cnt - 1) { 6452aca58e1SMarko Zec LIST_INSERT_HEAD(&da->unused_trie, tp, td_hash_le); 6462aca58e1SMarko Zec return; 6472aca58e1SMarko Zec } 6482aca58e1SMarko Zec 6492aca58e1SMarko Zec do { 6502aca58e1SMarko Zec da->all_trie_cnt--; 6512aca58e1SMarko Zec da->unused_trie_cnt--; 6522aca58e1SMarko Zec LIST_REMOVE(tp, td_all_le); 6532aca58e1SMarko Zec uma_zfree(trie_zone, tp); 6542aca58e1SMarko Zec LIST_FOREACH(tp, &da->unused_trie, td_hash_le) 6552aca58e1SMarko Zec if (tp->td_index == da->all_trie_cnt - 1) { 6562aca58e1SMarko Zec LIST_REMOVE(tp, td_hash_le); 6572aca58e1SMarko Zec break; 6582aca58e1SMarko Zec } 6592aca58e1SMarko Zec } while (tp != NULL); 6602aca58e1SMarko Zec } 6612aca58e1SMarko Zec 6622aca58e1SMarko Zec static void 6632aca58e1SMarko Zec heap_inject(struct dxr_aux *da, uint32_t start, uint32_t end, uint32_t preflen, 6642aca58e1SMarko Zec uint32_t nh) 6652aca58e1SMarko Zec { 6662aca58e1SMarko Zec struct heap_entry *fhp; 6672aca58e1SMarko Zec int i; 6682aca58e1SMarko Zec 6692aca58e1SMarko Zec for (i = da->heap_index; i >= 0; i--) { 6702aca58e1SMarko Zec if (preflen > da->heap[i].preflen) 6712aca58e1SMarko Zec break; 6722aca58e1SMarko Zec else if (preflen < da->heap[i].preflen) 6732aca58e1SMarko Zec da->heap[i + 1] = da->heap[i]; 6742aca58e1SMarko Zec else 6752aca58e1SMarko Zec return; 6762aca58e1SMarko Zec } 6772aca58e1SMarko Zec 6782aca58e1SMarko Zec fhp = &da->heap[i + 1]; 6792aca58e1SMarko Zec fhp->preflen = preflen; 6802aca58e1SMarko Zec fhp->start = start; 6812aca58e1SMarko Zec fhp->end = end; 6822aca58e1SMarko Zec fhp->nexthop = nh; 6832aca58e1SMarko Zec da->heap_index++; 6842aca58e1SMarko Zec } 6852aca58e1SMarko Zec 6862aca58e1SMarko Zec static int 6872aca58e1SMarko Zec dxr_walk(struct rtentry *rt, void *arg) 6882aca58e1SMarko Zec { 6892aca58e1SMarko Zec struct dxr_aux *da = arg; 6902aca58e1SMarko Zec uint32_t chunk = da->work_chunk; 6912aca58e1SMarko Zec uint32_t first = chunk << DXR_RANGE_SHIFT; 6922aca58e1SMarko Zec uint32_t last = first | DXR_RANGE_MASK; 6932aca58e1SMarko Zec struct range_entry_long *fp = 6942aca58e1SMarko Zec &da->range_tbl[da->rtbl_top + da->rtbl_work_frags].re; 6952aca58e1SMarko Zec struct heap_entry *fhp = &da->heap[da->heap_index]; 6962aca58e1SMarko Zec uint32_t preflen, nh, start, end, scopeid; 6972aca58e1SMarko Zec struct in_addr addr; 6982aca58e1SMarko Zec 6992aca58e1SMarko Zec rt_get_inet_prefix_plen(rt, &addr, &preflen, &scopeid); 7002aca58e1SMarko Zec start = ntohl(addr.s_addr); 7012aca58e1SMarko Zec if (start > last) 7022aca58e1SMarko Zec return (-1); /* Beyond chunk boundaries, we are done */ 7032aca58e1SMarko Zec if (start < first) 7042aca58e1SMarko Zec return (0); /* Skip this route */ 7052aca58e1SMarko Zec 7062aca58e1SMarko Zec end = start; 7072aca58e1SMarko Zec if (preflen < 32) 7082aca58e1SMarko Zec end |= (0xffffffffU >> preflen); 7092aca58e1SMarko Zec nh = fib_get_nhop_idx(da->fd, rt_get_raw_nhop(rt)); 7102aca58e1SMarko Zec 7112aca58e1SMarko Zec if (start == fhp->start) 7122aca58e1SMarko Zec heap_inject(da, start, end, preflen, nh); 7132aca58e1SMarko Zec else { 7142aca58e1SMarko Zec /* start > fhp->start */ 7152aca58e1SMarko Zec while (start > fhp->end) { 7162aca58e1SMarko Zec uint32_t oend = fhp->end; 7172aca58e1SMarko Zec 7182aca58e1SMarko Zec if (da->heap_index > 0) { 7192aca58e1SMarko Zec fhp--; 7202aca58e1SMarko Zec da->heap_index--; 7212aca58e1SMarko Zec } else 7222aca58e1SMarko Zec initheap(da, fhp->end + 1, chunk); 7232aca58e1SMarko Zec if (fhp->end > oend && fhp->nexthop != fp->nexthop) { 7242aca58e1SMarko Zec fp++; 7252aca58e1SMarko Zec da->rtbl_work_frags++; 7262aca58e1SMarko Zec fp->start = (oend + 1) & DXR_RANGE_MASK; 7272aca58e1SMarko Zec fp->nexthop = fhp->nexthop; 7282aca58e1SMarko Zec } 7292aca58e1SMarko Zec } 7302aca58e1SMarko Zec if (start > ((chunk << DXR_RANGE_SHIFT) | fp->start) && 7312aca58e1SMarko Zec nh != fp->nexthop) { 7322aca58e1SMarko Zec fp++; 7332aca58e1SMarko Zec da->rtbl_work_frags++; 7342aca58e1SMarko Zec fp->start = start & DXR_RANGE_MASK; 7352aca58e1SMarko Zec } else if (da->rtbl_work_frags) { 7362aca58e1SMarko Zec if ((--fp)->nexthop == nh) 7372aca58e1SMarko Zec da->rtbl_work_frags--; 7382aca58e1SMarko Zec else 7392aca58e1SMarko Zec fp++; 7402aca58e1SMarko Zec } 7412aca58e1SMarko Zec fp->nexthop = nh; 7422aca58e1SMarko Zec heap_inject(da, start, end, preflen, nh); 7432aca58e1SMarko Zec } 7442aca58e1SMarko Zec 7452aca58e1SMarko Zec return (0); 7462aca58e1SMarko Zec } 7472aca58e1SMarko Zec 7482aca58e1SMarko Zec static int 7492aca58e1SMarko Zec update_chunk(struct dxr_aux *da, uint32_t chunk) 7502aca58e1SMarko Zec { 7512aca58e1SMarko Zec struct range_entry_long *fp; 7522aca58e1SMarko Zec #if DXR_TRIE_BITS < 24 7532aca58e1SMarko Zec struct range_entry_short *fps; 7542aca58e1SMarko Zec uint32_t start, nh, i; 7552aca58e1SMarko Zec #endif 7562aca58e1SMarko Zec struct heap_entry *fhp; 7572aca58e1SMarko Zec uint32_t first = chunk << DXR_RANGE_SHIFT; 7582aca58e1SMarko Zec uint32_t last = first | DXR_RANGE_MASK; 7592aca58e1SMarko Zec 7602aca58e1SMarko Zec if (da->direct_tbl[chunk].fragments != FRAGS_MARK_HIT) 7612aca58e1SMarko Zec chunk_unref(da, chunk); 7622aca58e1SMarko Zec 7632aca58e1SMarko Zec initheap(da, first, chunk); 7642aca58e1SMarko Zec 7652aca58e1SMarko Zec fp = &da->range_tbl[da->rtbl_top].re; 7662aca58e1SMarko Zec da->rtbl_work_frags = 0; 7672aca58e1SMarko Zec fp->start = first & DXR_RANGE_MASK; 7682aca58e1SMarko Zec fp->nexthop = da->heap[0].nexthop; 7692aca58e1SMarko Zec 7702aca58e1SMarko Zec da->dst.sin_addr.s_addr = htonl(first); 7712aca58e1SMarko Zec da->mask.sin_addr.s_addr = htonl(~DXR_RANGE_MASK); 7722aca58e1SMarko Zec 7732aca58e1SMarko Zec da->work_chunk = chunk; 7742aca58e1SMarko Zec rib_walk_from(da->fibnum, AF_INET, RIB_FLAG_LOCKED, 7752aca58e1SMarko Zec (struct sockaddr *) &da->dst, (struct sockaddr *) &da->mask, 7762aca58e1SMarko Zec dxr_walk, da); 7772aca58e1SMarko Zec 7782aca58e1SMarko Zec /* Flush any remaining objects on the heap */ 7792aca58e1SMarko Zec fp = &da->range_tbl[da->rtbl_top + da->rtbl_work_frags].re; 7802aca58e1SMarko Zec fhp = &da->heap[da->heap_index]; 7812aca58e1SMarko Zec while (fhp->preflen > DXR_TRIE_BITS) { 7822aca58e1SMarko Zec uint32_t oend = fhp->end; 7832aca58e1SMarko Zec 7842aca58e1SMarko Zec if (da->heap_index > 0) { 7852aca58e1SMarko Zec fhp--; 7862aca58e1SMarko Zec da->heap_index--; 7872aca58e1SMarko Zec } else 7882aca58e1SMarko Zec initheap(da, fhp->end + 1, chunk); 7892aca58e1SMarko Zec if (fhp->end > oend && fhp->nexthop != fp->nexthop) { 7902aca58e1SMarko Zec /* Have we crossed the upper chunk boundary? */ 7912aca58e1SMarko Zec if (oend >= last) 7922aca58e1SMarko Zec break; 7932aca58e1SMarko Zec fp++; 7942aca58e1SMarko Zec da->rtbl_work_frags++; 7952aca58e1SMarko Zec fp->start = (oend + 1) & DXR_RANGE_MASK; 7962aca58e1SMarko Zec fp->nexthop = fhp->nexthop; 7972aca58e1SMarko Zec } 7982aca58e1SMarko Zec } 7992aca58e1SMarko Zec 8002aca58e1SMarko Zec /* Direct hit if the chunk contains only a single fragment */ 8012aca58e1SMarko Zec if (da->rtbl_work_frags == 0) { 8022aca58e1SMarko Zec da->direct_tbl[chunk].base = fp->nexthop; 8032aca58e1SMarko Zec da->direct_tbl[chunk].fragments = FRAGS_MARK_HIT; 8042aca58e1SMarko Zec return (0); 8052aca58e1SMarko Zec } 8062aca58e1SMarko Zec 8072aca58e1SMarko Zec da->direct_tbl[chunk].base = da->rtbl_top; 8082aca58e1SMarko Zec da->direct_tbl[chunk].fragments = da->rtbl_work_frags; 8092aca58e1SMarko Zec 8102aca58e1SMarko Zec #if DXR_TRIE_BITS < 24 8112aca58e1SMarko Zec /* Check whether the chunk can be more compactly encoded */ 8122aca58e1SMarko Zec fp = &da->range_tbl[da->rtbl_top].re; 8132aca58e1SMarko Zec for (i = 0; i <= da->rtbl_work_frags; i++, fp++) 8142aca58e1SMarko Zec if ((fp->start & 0xff) != 0 || fp->nexthop > RE_SHORT_MAX_NH) 8152aca58e1SMarko Zec break; 8162aca58e1SMarko Zec if (i == da->rtbl_work_frags + 1) { 8172aca58e1SMarko Zec fp = &da->range_tbl[da->rtbl_top].re; 8182aca58e1SMarko Zec fps = (void *) fp; 8192aca58e1SMarko Zec for (i = 0; i <= da->rtbl_work_frags; i++, fp++, fps++) { 8202aca58e1SMarko Zec start = fp->start; 8212aca58e1SMarko Zec nh = fp->nexthop; 8222aca58e1SMarko Zec fps->start = start >> 8; 8232aca58e1SMarko Zec fps->nexthop = nh; 8242aca58e1SMarko Zec } 8252aca58e1SMarko Zec fps->start = start >> 8; 8262aca58e1SMarko Zec fps->nexthop = nh; 8272aca58e1SMarko Zec da->rtbl_work_frags >>= 1; 8282aca58e1SMarko Zec da->direct_tbl[chunk].fragments = 8292aca58e1SMarko Zec da->rtbl_work_frags | FRAGS_PREF_SHORT; 8302aca58e1SMarko Zec } else 8312aca58e1SMarko Zec #endif 8322aca58e1SMarko Zec if (da->rtbl_work_frags >= FRAGS_MARK_HIT) { 8332aca58e1SMarko Zec da->direct_tbl[chunk].fragments = FRAGS_MARK_XL; 8342aca58e1SMarko Zec memmove(&da->range_tbl[da->rtbl_top + 1], 8352aca58e1SMarko Zec &da->range_tbl[da->rtbl_top], 8362aca58e1SMarko Zec (da->rtbl_work_frags + 1) * sizeof(*da->range_tbl)); 8372aca58e1SMarko Zec da->range_tbl[da->rtbl_top].fragments = da->rtbl_work_frags; 8382aca58e1SMarko Zec da->rtbl_work_frags++; 8392aca58e1SMarko Zec } 8402aca58e1SMarko Zec da->rtbl_top += (da->rtbl_work_frags + 1); 8412aca58e1SMarko Zec return (chunk_ref(da, chunk)); 8422aca58e1SMarko Zec } 8432aca58e1SMarko Zec 8442aca58e1SMarko Zec static void 8452aca58e1SMarko Zec dxr_build(struct dxr *dxr) 8462aca58e1SMarko Zec { 8472aca58e1SMarko Zec struct dxr_aux *da = dxr->aux; 8482aca58e1SMarko Zec struct chunk_desc *cdp; 8492aca58e1SMarko Zec struct rib_rtable_info rinfo; 8502aca58e1SMarko Zec struct timeval t0, t1, t2, t3; 8512aca58e1SMarko Zec uint32_t r_size, dxr_tot_size; 8522aca58e1SMarko Zec uint32_t i, m, range_rebuild = 0; 8531549575fSMarko Zec uint32_t range_frag; 8542aca58e1SMarko Zec struct trie_desc *tp; 8552aca58e1SMarko Zec uint32_t d_tbl_size, dxr_x, d_size, x_size; 8562aca58e1SMarko Zec uint32_t ti, trie_rebuild = 0, prev_size = 0; 8571549575fSMarko Zec uint32_t trie_frag; 8582aca58e1SMarko Zec 8594aa275f1SMarko Zec MPASS(dxr->d == NULL); 8602aca58e1SMarko Zec 8612aca58e1SMarko Zec if (da == NULL) { 8622aca58e1SMarko Zec da = malloc(sizeof(*dxr->aux), M_DXRAUX, M_NOWAIT); 863ed541e20SMarko Zec if (da == NULL) { 864ed541e20SMarko Zec FIB_PRINTF(LOG_NOTICE, dxr->fd, 865ed541e20SMarko Zec "Unable to allocate DXR aux struct"); 8662aca58e1SMarko Zec return; 867ed541e20SMarko Zec } 8682aca58e1SMarko Zec dxr->aux = da; 8692aca58e1SMarko Zec da->fibnum = dxr->fibnum; 870b24e353fSMarko Zec da->fd = dxr->fd; 8712aca58e1SMarko Zec da->refcnt = 1; 8722aca58e1SMarko Zec LIST_INIT(&da->all_chunks); 8732aca58e1SMarko Zec LIST_INIT(&da->all_trie); 8742aca58e1SMarko Zec da->rtbl_size = RTBL_SIZE_INCR; 8752aca58e1SMarko Zec da->range_tbl = NULL; 8762aca58e1SMarko Zec da->xtbl_size = XTBL_SIZE_INCR; 8772aca58e1SMarko Zec da->x_tbl = NULL; 8782aca58e1SMarko Zec bzero(&da->dst, sizeof(da->dst)); 8792aca58e1SMarko Zec bzero(&da->mask, sizeof(da->mask)); 8802aca58e1SMarko Zec da->dst.sin_len = sizeof(da->dst); 8812aca58e1SMarko Zec da->mask.sin_len = sizeof(da->mask); 8822aca58e1SMarko Zec da->dst.sin_family = AF_INET; 8832aca58e1SMarko Zec da->mask.sin_family = AF_INET; 8842aca58e1SMarko Zec } 8852aca58e1SMarko Zec if (da->range_tbl == NULL) { 8862aca58e1SMarko Zec da->range_tbl = malloc(sizeof(*da->range_tbl) * da->rtbl_size 8872aca58e1SMarko Zec + FRAGS_PREF_SHORT, M_DXRAUX, M_NOWAIT); 888ed541e20SMarko Zec if (da->range_tbl == NULL) { 889ed541e20SMarko Zec FIB_PRINTF(LOG_NOTICE, da->fd, 890ed541e20SMarko Zec "Unable to allocate DXR range table"); 8912aca58e1SMarko Zec return; 892ed541e20SMarko Zec } 8932aca58e1SMarko Zec range_rebuild = 1; 8942aca58e1SMarko Zec } 8952aca58e1SMarko Zec if (da->x_tbl == NULL) { 8962aca58e1SMarko Zec da->x_tbl = malloc(sizeof(*da->x_tbl) * da->xtbl_size, 8972aca58e1SMarko Zec M_DXRAUX, M_NOWAIT); 898ed541e20SMarko Zec if (da->x_tbl == NULL) { 899ed541e20SMarko Zec FIB_PRINTF(LOG_NOTICE, da->fd, 900ed541e20SMarko Zec "Unable to allocate DXR extension table"); 9012aca58e1SMarko Zec return; 902ed541e20SMarko Zec } 9032aca58e1SMarko Zec trie_rebuild = 1; 9042aca58e1SMarko Zec } 9052aca58e1SMarko Zec 9062aca58e1SMarko Zec microuptime(&t0); 9072aca58e1SMarko Zec 9082aca58e1SMarko Zec dxr->nh_tbl = fib_get_nhop_array(da->fd); 9092aca58e1SMarko Zec fib_get_rtable_info(fib_get_rh(da->fd), &rinfo); 9102aca58e1SMarko Zec 9111549575fSMarko Zec if (da->updates_low > da->updates_high) 9122aca58e1SMarko Zec range_rebuild = 1; 9131549575fSMarko Zec 9141549575fSMarko Zec range_build: 9152aca58e1SMarko Zec if (range_rebuild) { 9162aca58e1SMarko Zec /* Bulk cleanup */ 9172aca58e1SMarko Zec bzero(da->chunk_hashtbl, sizeof(da->chunk_hashtbl)); 9182aca58e1SMarko Zec while ((cdp = LIST_FIRST(&da->all_chunks)) != NULL) { 9192aca58e1SMarko Zec LIST_REMOVE(cdp, cd_all_le); 9202aca58e1SMarko Zec uma_zfree(chunk_zone, cdp); 9212aca58e1SMarko Zec } 92243880c51SMarko Zec for (i = 0; i < UNUSED_BUCKETS; i++) 92343880c51SMarko Zec LIST_INIT(&da->unused_chunks[i]); 9241549575fSMarko Zec da->unused_chunks_size = 0; 9252aca58e1SMarko Zec da->rtbl_top = 0; 9262aca58e1SMarko Zec da->updates_low = 0; 9272aca58e1SMarko Zec da->updates_high = DIRECT_TBL_SIZE - 1; 9282aca58e1SMarko Zec memset(da->updates_mask, 0xff, sizeof(da->updates_mask)); 9292aca58e1SMarko Zec for (i = 0; i < DIRECT_TBL_SIZE; i++) { 9302aca58e1SMarko Zec da->direct_tbl[i].fragments = FRAGS_MARK_HIT; 9312aca58e1SMarko Zec da->direct_tbl[i].base = 0; 9322aca58e1SMarko Zec } 9332aca58e1SMarko Zec } 9342aca58e1SMarko Zec da->prefixes = rinfo.num_prefixes; 9352aca58e1SMarko Zec 9362aca58e1SMarko Zec /* DXR: construct direct & range table */ 9372aca58e1SMarko Zec for (i = da->updates_low; i <= da->updates_high; i++) { 9382aca58e1SMarko Zec m = da->updates_mask[i >> 5] >> (i & 0x1f); 9392aca58e1SMarko Zec if (m == 0) 9402aca58e1SMarko Zec i |= 0x1f; 9412aca58e1SMarko Zec else if (m & 1 && update_chunk(da, i) != 0) 9422aca58e1SMarko Zec return; 9432aca58e1SMarko Zec } 9441549575fSMarko Zec 9451549575fSMarko Zec range_frag = 0; 9461549575fSMarko Zec if (da->rtbl_top) 9471549575fSMarko Zec range_frag = da->unused_chunks_size * 10000ULL / da->rtbl_top; 9481549575fSMarko Zec if (range_frag > V_frag_limit) { 9491549575fSMarko Zec range_rebuild = 1; 9501549575fSMarko Zec goto range_build; 9511549575fSMarko Zec } 9521549575fSMarko Zec 9532aca58e1SMarko Zec r_size = sizeof(*da->range_tbl) * da->rtbl_top; 9542aca58e1SMarko Zec microuptime(&t1); 9552aca58e1SMarko Zec 9561549575fSMarko Zec if (range_rebuild || 9572aca58e1SMarko Zec abs(fls(da->prefixes) - fls(da->trie_rebuilt_prefixes)) > 1) 9582aca58e1SMarko Zec trie_rebuild = 1; 9591549575fSMarko Zec 9601549575fSMarko Zec trie_build: 9612aca58e1SMarko Zec if (trie_rebuild) { 9622aca58e1SMarko Zec da->trie_rebuilt_prefixes = da->prefixes; 9632aca58e1SMarko Zec da->d_bits = DXR_D; 9642aca58e1SMarko Zec da->updates_low = 0; 9652aca58e1SMarko Zec da->updates_high = DIRECT_TBL_SIZE - 1; 9661549575fSMarko Zec if (!range_rebuild) 9671549575fSMarko Zec memset(da->updates_mask, 0xff, 9681549575fSMarko Zec sizeof(da->updates_mask)); 9692aca58e1SMarko Zec } 9702aca58e1SMarko Zec 9712aca58e1SMarko Zec dxr2_try_squeeze: 9722aca58e1SMarko Zec if (trie_rebuild) { 9732aca58e1SMarko Zec /* Bulk cleanup */ 9742aca58e1SMarko Zec bzero(da->trietbl, sizeof(da->trietbl)); 9752aca58e1SMarko Zec bzero(da->trie_hashtbl, sizeof(da->trie_hashtbl)); 9762aca58e1SMarko Zec while ((tp = LIST_FIRST(&da->all_trie)) != NULL) { 9772aca58e1SMarko Zec LIST_REMOVE(tp, td_all_le); 9782aca58e1SMarko Zec uma_zfree(trie_zone, tp); 9792aca58e1SMarko Zec } 9802aca58e1SMarko Zec LIST_INIT(&da->unused_trie); 9812aca58e1SMarko Zec da->all_trie_cnt = da->unused_trie_cnt = 0; 9822aca58e1SMarko Zec } 9832aca58e1SMarko Zec 9842aca58e1SMarko Zec /* Populate d_tbl, x_tbl */ 9852aca58e1SMarko Zec dxr_x = DXR_TRIE_BITS - da->d_bits; 9862aca58e1SMarko Zec d_tbl_size = (1 << da->d_bits); 9872aca58e1SMarko Zec 9882aca58e1SMarko Zec for (i = da->updates_low >> dxr_x; i <= da->updates_high >> dxr_x; 9892aca58e1SMarko Zec i++) { 990b51f8baeSMarko Zec if (!trie_rebuild) { 991b51f8baeSMarko Zec m = 0; 992b51f8baeSMarko Zec for (int j = 0; j < (1 << dxr_x); j += 32) 993b51f8baeSMarko Zec m |= da->updates_mask[((i << dxr_x) + j) >> 5]; 994b51f8baeSMarko Zec if (m == 0) 995b51f8baeSMarko Zec continue; 9962aca58e1SMarko Zec trie_unref(da, i); 997b51f8baeSMarko Zec } 9982aca58e1SMarko Zec ti = trie_ref(da, i); 9992aca58e1SMarko Zec if (ti < 0) 10002aca58e1SMarko Zec return; 10012aca58e1SMarko Zec da->d_tbl[i] = ti; 10022aca58e1SMarko Zec } 10032aca58e1SMarko Zec 10041549575fSMarko Zec trie_frag = 0; 10051549575fSMarko Zec if (da->all_trie_cnt) 10061549575fSMarko Zec trie_frag = da->unused_trie_cnt * 10000ULL / da->all_trie_cnt; 10071549575fSMarko Zec if (trie_frag > V_frag_limit) { 10081549575fSMarko Zec trie_rebuild = 1; 10091549575fSMarko Zec goto trie_build; 10101549575fSMarko Zec } 10111549575fSMarko Zec 10122aca58e1SMarko Zec d_size = sizeof(*da->d_tbl) * d_tbl_size; 10132aca58e1SMarko Zec x_size = sizeof(*da->x_tbl) * DIRECT_TBL_SIZE / d_tbl_size 10142aca58e1SMarko Zec * da->all_trie_cnt; 10152aca58e1SMarko Zec dxr_tot_size = d_size + x_size + r_size; 10162aca58e1SMarko Zec 10172aca58e1SMarko Zec if (trie_rebuild == 1) { 10182aca58e1SMarko Zec /* Try to find a more compact D/X split */ 10192aca58e1SMarko Zec if (prev_size == 0 || dxr_tot_size <= prev_size) 10202aca58e1SMarko Zec da->d_bits--; 10212aca58e1SMarko Zec else { 10222aca58e1SMarko Zec da->d_bits++; 10232aca58e1SMarko Zec trie_rebuild = 2; 10242aca58e1SMarko Zec } 10252aca58e1SMarko Zec prev_size = dxr_tot_size; 10262aca58e1SMarko Zec goto dxr2_try_squeeze; 10272aca58e1SMarko Zec } 10282aca58e1SMarko Zec microuptime(&t2); 10292aca58e1SMarko Zec 10302aca58e1SMarko Zec dxr->d = malloc(dxr_tot_size, M_DXRLPM, M_NOWAIT); 1031ed541e20SMarko Zec if (dxr->d == NULL) { 1032ed541e20SMarko Zec FIB_PRINTF(LOG_NOTICE, da->fd, 1033ed541e20SMarko Zec "Unable to allocate DXR lookup table"); 10342aca58e1SMarko Zec return; 1035ed541e20SMarko Zec } 10362aca58e1SMarko Zec memcpy(dxr->d, da->d_tbl, d_size); 10372aca58e1SMarko Zec dxr->x = ((char *) dxr->d) + d_size; 10382aca58e1SMarko Zec memcpy(dxr->x, da->x_tbl, x_size); 10392aca58e1SMarko Zec dxr->r = ((char *) dxr->x) + x_size; 10402aca58e1SMarko Zec dxr->d_shift = 32 - da->d_bits; 10412aca58e1SMarko Zec dxr->x_shift = dxr_x; 10422aca58e1SMarko Zec dxr->x_mask = 0xffffffffU >> (32 - dxr_x); 10432aca58e1SMarko Zec memcpy(dxr->r, da->range_tbl, r_size); 10442aca58e1SMarko Zec 10452aca58e1SMarko Zec if (da->updates_low <= da->updates_high) 10462aca58e1SMarko Zec bzero(&da->updates_mask[da->updates_low / 32], 10472aca58e1SMarko Zec (da->updates_high - da->updates_low) / 8 + 1); 10482aca58e1SMarko Zec da->updates_low = DIRECT_TBL_SIZE - 1; 10492aca58e1SMarko Zec da->updates_high = 0; 10502aca58e1SMarko Zec microuptime(&t3); 10512aca58e1SMarko Zec 10522aca58e1SMarko Zec FIB_PRINTF(LOG_INFO, da->fd, "D%dX%dR, %d prefixes, %d nhops (max)", 10532aca58e1SMarko Zec da->d_bits, dxr_x, rinfo.num_prefixes, rinfo.num_nhops); 1054eb3148ccSMarko Zec i = dxr_tot_size * 100; 1055eb3148ccSMarko Zec if (rinfo.num_prefixes) 1056eb3148ccSMarko Zec i /= rinfo.num_prefixes; 10572aca58e1SMarko Zec FIB_PRINTF(LOG_INFO, da->fd, "%d.%02d KBytes, %d.%02d Bytes/prefix", 10582aca58e1SMarko Zec dxr_tot_size / 1024, dxr_tot_size * 100 / 1024 % 100, 10592aca58e1SMarko Zec i / 100, i % 100); 10601549575fSMarko Zec FIB_PRINTF(LOG_INFO, da->fd, 10611549575fSMarko Zec "%d.%02d%% trie, %d.%02d%% range fragmentation", 10621549575fSMarko Zec trie_frag / 100, trie_frag % 100, 10631549575fSMarko Zec range_frag / 100, range_frag % 100); 10642aca58e1SMarko Zec i = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec; 10652aca58e1SMarko Zec FIB_PRINTF(LOG_INFO, da->fd, "range table %s in %u.%03u ms", 10662aca58e1SMarko Zec range_rebuild ? "rebuilt" : "updated", i / 1000, i % 1000); 10672aca58e1SMarko Zec i = (t2.tv_sec - t1.tv_sec) * 1000000 + t2.tv_usec - t1.tv_usec; 10682aca58e1SMarko Zec FIB_PRINTF(LOG_INFO, da->fd, "trie %s in %u.%03u ms", 10692aca58e1SMarko Zec trie_rebuild ? "rebuilt" : "updated", i / 1000, i % 1000); 10702aca58e1SMarko Zec i = (t3.tv_sec - t2.tv_sec) * 1000000 + t3.tv_usec - t2.tv_usec; 10712aca58e1SMarko Zec FIB_PRINTF(LOG_INFO, da->fd, "snapshot forked in %u.%03u ms", 10722aca58e1SMarko Zec i / 1000, i % 1000); 10732aca58e1SMarko Zec } 10742aca58e1SMarko Zec 10752aca58e1SMarko Zec /* 10765295e891SMarko Zec * Glue functions for attaching to the FIB_ALGO infrastructure. 10772aca58e1SMarko Zec */ 10782aca58e1SMarko Zec 10792aca58e1SMarko Zec static struct nhop_object * 10802aca58e1SMarko Zec dxr_fib_lookup(void *algo_data, const struct flm_lookup_key key, 10812aca58e1SMarko Zec uint32_t scopeid) 10822aca58e1SMarko Zec { 10832aca58e1SMarko Zec struct dxr *dxr = algo_data; 10842aca58e1SMarko Zec 1085e7abe200SMarko Zec return (dxr->nh_tbl[dxr_lookup(dxr, ntohl(key.addr4.s_addr))]); 10862aca58e1SMarko Zec } 10872aca58e1SMarko Zec 10882aca58e1SMarko Zec static enum flm_op_result 10892aca58e1SMarko Zec dxr_init(uint32_t fibnum, struct fib_data *fd, void *old_data, void **data) 10902aca58e1SMarko Zec { 10912aca58e1SMarko Zec struct dxr *old_dxr = old_data; 10922aca58e1SMarko Zec struct dxr_aux *da = NULL; 10932aca58e1SMarko Zec struct dxr *dxr; 10942aca58e1SMarko Zec 10952aca58e1SMarko Zec dxr = malloc(sizeof(*dxr), M_DXRAUX, M_NOWAIT); 1096308caa38SMarko Zec if (dxr == NULL) { 1097ed541e20SMarko Zec FIB_PRINTF(LOG_NOTICE, fd, 1098ed541e20SMarko Zec "Unable to allocate DXR container struct"); 10992aca58e1SMarko Zec return (FLM_REBUILD); 1100308caa38SMarko Zec } 11012aca58e1SMarko Zec 11022aca58e1SMarko Zec /* Check whether we may reuse the old auxiliary structures */ 11034ab122e8SMarko Zec if (old_dxr != NULL && old_dxr->aux != NULL && 11044ab122e8SMarko Zec old_dxr->aux->fd == fd) { 11052aca58e1SMarko Zec da = old_dxr->aux; 11062aca58e1SMarko Zec atomic_add_int(&da->refcnt, 1); 11072aca58e1SMarko Zec } 11082aca58e1SMarko Zec 11092aca58e1SMarko Zec dxr->aux = da; 11102aca58e1SMarko Zec dxr->d = NULL; 11112aca58e1SMarko Zec dxr->fd = fd; 11122aca58e1SMarko Zec dxr->fibnum = fibnum; 11132aca58e1SMarko Zec *data = dxr; 11142aca58e1SMarko Zec 11152aca58e1SMarko Zec return (FLM_SUCCESS); 11162aca58e1SMarko Zec } 11172aca58e1SMarko Zec 11182aca58e1SMarko Zec static void 11192aca58e1SMarko Zec dxr_destroy(void *data) 11202aca58e1SMarko Zec { 11212aca58e1SMarko Zec struct dxr *dxr = data; 112285801064SMarko Zec struct dxr_aux *da = dxr->aux; 11232aca58e1SMarko Zec struct chunk_desc *cdp; 11242aca58e1SMarko Zec struct trie_desc *tp; 11252aca58e1SMarko Zec 11262aca58e1SMarko Zec free(dxr->d, M_DXRLPM); 11272aca58e1SMarko Zec free(dxr, M_DXRAUX); 11282aca58e1SMarko Zec 11292aca58e1SMarko Zec if (da == NULL || atomic_fetchadd_int(&da->refcnt, -1) > 1) 11302aca58e1SMarko Zec return; 11312aca58e1SMarko Zec 11322aca58e1SMarko Zec /* Release auxiliary structures */ 11332aca58e1SMarko Zec while ((cdp = LIST_FIRST(&da->all_chunks)) != NULL) { 11342aca58e1SMarko Zec LIST_REMOVE(cdp, cd_all_le); 11352aca58e1SMarko Zec uma_zfree(chunk_zone, cdp); 11362aca58e1SMarko Zec } 11372aca58e1SMarko Zec while ((tp = LIST_FIRST(&da->all_trie)) != NULL) { 11382aca58e1SMarko Zec LIST_REMOVE(tp, td_all_le); 11392aca58e1SMarko Zec uma_zfree(trie_zone, tp); 11402aca58e1SMarko Zec } 11412aca58e1SMarko Zec free(da->range_tbl, M_DXRAUX); 11422aca58e1SMarko Zec free(da->x_tbl, M_DXRAUX); 11432aca58e1SMarko Zec free(da, M_DXRAUX); 11442aca58e1SMarko Zec } 11452aca58e1SMarko Zec 11462aca58e1SMarko Zec static void 11472aca58e1SMarko Zec epoch_dxr_destroy(epoch_context_t ctx) 11482aca58e1SMarko Zec { 11492aca58e1SMarko Zec struct dxr *dxr = __containerof(ctx, struct dxr, epoch_ctx); 11502aca58e1SMarko Zec 11512aca58e1SMarko Zec dxr_destroy(dxr); 11522aca58e1SMarko Zec } 11532aca58e1SMarko Zec 1154e7abe200SMarko Zec static void * 1155e7abe200SMarko Zec choose_lookup_fn(struct dxr_aux *da) 1156e7abe200SMarko Zec { 1157e7abe200SMarko Zec 1158e7abe200SMarko Zec switch (da->d_bits) { 1159e7abe200SMarko Zec #if DXR_TRIE_BITS > 16 1160e7abe200SMarko Zec case 16: 1161e7abe200SMarko Zec return (dxr_fib_lookup_16); 1162e7abe200SMarko Zec #endif 1163e7abe200SMarko Zec case 15: 1164e7abe200SMarko Zec return (dxr_fib_lookup_15); 1165e7abe200SMarko Zec case 14: 1166e7abe200SMarko Zec return (dxr_fib_lookup_14); 1167e7abe200SMarko Zec case 13: 1168e7abe200SMarko Zec return (dxr_fib_lookup_13); 1169e7abe200SMarko Zec case 12: 1170e7abe200SMarko Zec return (dxr_fib_lookup_12); 1171e7abe200SMarko Zec case 11: 1172e7abe200SMarko Zec return (dxr_fib_lookup_11); 1173e7abe200SMarko Zec case 10: 1174e7abe200SMarko Zec return (dxr_fib_lookup_10); 1175e7abe200SMarko Zec case 9: 1176e7abe200SMarko Zec return (dxr_fib_lookup_9); 1177e7abe200SMarko Zec } 1178e7abe200SMarko Zec return (dxr_fib_lookup); 1179e7abe200SMarko Zec } 1180e7abe200SMarko Zec 11812aca58e1SMarko Zec static enum flm_op_result 11822aca58e1SMarko Zec dxr_dump_end(void *data, struct fib_dp *dp) 11832aca58e1SMarko Zec { 11842aca58e1SMarko Zec struct dxr *dxr = data; 11852aca58e1SMarko Zec struct dxr_aux *da; 11862aca58e1SMarko Zec 11872aca58e1SMarko Zec dxr_build(dxr); 11882aca58e1SMarko Zec 11892aca58e1SMarko Zec da = dxr->aux; 1190ed541e20SMarko Zec if (da == NULL || dxr->d == NULL) 11912aca58e1SMarko Zec return (FLM_REBUILD); 11922aca58e1SMarko Zec 1193ed541e20SMarko Zec if (da->rtbl_top >= BASE_MAX) 11942aca58e1SMarko Zec return (FLM_ERROR); 11952aca58e1SMarko Zec 1196e7abe200SMarko Zec dp->f = choose_lookup_fn(da); 11972aca58e1SMarko Zec dp->arg = dxr; 11982aca58e1SMarko Zec 11992aca58e1SMarko Zec return (FLM_SUCCESS); 12002aca58e1SMarko Zec } 12012aca58e1SMarko Zec 12022aca58e1SMarko Zec static enum flm_op_result 12032aca58e1SMarko Zec dxr_dump_rib_item(struct rtentry *rt, void *data) 12042aca58e1SMarko Zec { 12052aca58e1SMarko Zec 12062aca58e1SMarko Zec return (FLM_SUCCESS); 12072aca58e1SMarko Zec } 12082aca58e1SMarko Zec 12092aca58e1SMarko Zec static enum flm_op_result 12102aca58e1SMarko Zec dxr_change_rib_item(struct rib_head *rnh, struct rib_cmd_info *rc, 12112aca58e1SMarko Zec void *data) 12122aca58e1SMarko Zec { 12132aca58e1SMarko Zec 12142aca58e1SMarko Zec return (FLM_BATCH); 12152aca58e1SMarko Zec } 12162aca58e1SMarko Zec 12172aca58e1SMarko Zec static enum flm_op_result 12182aca58e1SMarko Zec dxr_change_rib_batch(struct rib_head *rnh, struct fib_change_queue *q, 12192aca58e1SMarko Zec void *data) 12202aca58e1SMarko Zec { 12212aca58e1SMarko Zec struct dxr *dxr = data; 12222aca58e1SMarko Zec struct dxr *new_dxr; 12232aca58e1SMarko Zec struct dxr_aux *da; 12242aca58e1SMarko Zec struct fib_dp new_dp; 12252aca58e1SMarko Zec enum flm_op_result res; 12262aca58e1SMarko Zec uint32_t ip, plen, hmask, start, end, i, ui; 12272aca58e1SMarko Zec #ifdef INVARIANTS 12282aca58e1SMarko Zec struct rib_rtable_info rinfo; 12292aca58e1SMarko Zec int update_delta = 0; 12302aca58e1SMarko Zec #endif 12312aca58e1SMarko Zec 12324aa275f1SMarko Zec MPASS(data != NULL); 12334aa275f1SMarko Zec MPASS(q != NULL); 12344aa275f1SMarko Zec MPASS(q->count < q->size); 12352aca58e1SMarko Zec 12362aca58e1SMarko Zec da = dxr->aux; 12374aa275f1SMarko Zec MPASS(da != NULL); 12384ab122e8SMarko Zec MPASS(da->fd == dxr->fd); 12394aa275f1SMarko Zec MPASS(da->refcnt > 0); 12402aca58e1SMarko Zec 12412aca58e1SMarko Zec FIB_PRINTF(LOG_INFO, da->fd, "processing %d update(s)", q->count); 12422aca58e1SMarko Zec for (ui = 0; ui < q->count; ui++) { 12432aca58e1SMarko Zec #ifdef INVARIANTS 12442aca58e1SMarko Zec if (q->entries[ui].nh_new != NULL) 12452aca58e1SMarko Zec update_delta++; 12462aca58e1SMarko Zec if (q->entries[ui].nh_old != NULL) 12472aca58e1SMarko Zec update_delta--; 12482aca58e1SMarko Zec #endif 12492aca58e1SMarko Zec plen = q->entries[ui].plen; 12502aca58e1SMarko Zec ip = ntohl(q->entries[ui].addr4.s_addr); 1251442c8a24SMarko Zec if (plen < 32) 12522aca58e1SMarko Zec hmask = 0xffffffffU >> plen; 1253442c8a24SMarko Zec else 1254442c8a24SMarko Zec hmask = 0; 12552aca58e1SMarko Zec start = (ip & ~hmask) >> DXR_RANGE_SHIFT; 12562aca58e1SMarko Zec end = (ip | hmask) >> DXR_RANGE_SHIFT; 12572aca58e1SMarko Zec 12582aca58e1SMarko Zec if ((start & 0x1f) == 0 && (end & 0x1f) == 0x1f) 12592aca58e1SMarko Zec for (i = start >> 5; i <= end >> 5; i++) 12602aca58e1SMarko Zec da->updates_mask[i] = 0xffffffffU; 12612aca58e1SMarko Zec else 12622aca58e1SMarko Zec for (i = start; i <= end; i++) 12632aca58e1SMarko Zec da->updates_mask[i >> 5] |= (1 << (i & 0x1f)); 12642aca58e1SMarko Zec if (start < da->updates_low) 12652aca58e1SMarko Zec da->updates_low = start; 12662aca58e1SMarko Zec if (end > da->updates_high) 12672aca58e1SMarko Zec da->updates_high = end; 12682aca58e1SMarko Zec } 12692aca58e1SMarko Zec 12702aca58e1SMarko Zec #ifdef INVARIANTS 12712aca58e1SMarko Zec fib_get_rtable_info(fib_get_rh(da->fd), &rinfo); 12724aa275f1SMarko Zec MPASS(da->prefixes + update_delta == rinfo.num_prefixes); 12732aca58e1SMarko Zec #endif 12742aca58e1SMarko Zec 12752aca58e1SMarko Zec res = dxr_init(0, dxr->fd, data, (void **) &new_dxr); 12762aca58e1SMarko Zec if (res != FLM_SUCCESS) 12772aca58e1SMarko Zec return (res); 12782aca58e1SMarko Zec 12792aca58e1SMarko Zec dxr_build(new_dxr); 12802aca58e1SMarko Zec 12812aca58e1SMarko Zec /* Structural limit exceeded, hard error */ 128219bd24caSMarko Zec if (da->rtbl_top >= BASE_MAX) { 128319bd24caSMarko Zec dxr_destroy(new_dxr); 12842aca58e1SMarko Zec return (FLM_ERROR); 128519bd24caSMarko Zec } 12862aca58e1SMarko Zec 12872aca58e1SMarko Zec if (new_dxr->d == NULL) { 12882aca58e1SMarko Zec dxr_destroy(new_dxr); 12892aca58e1SMarko Zec return (FLM_REBUILD); 12902aca58e1SMarko Zec } 12912aca58e1SMarko Zec 1292e7abe200SMarko Zec new_dp.f = choose_lookup_fn(da); 12932aca58e1SMarko Zec new_dp.arg = new_dxr; 12942aca58e1SMarko Zec if (fib_set_datapath_ptr(dxr->fd, &new_dp)) { 12952aca58e1SMarko Zec fib_set_algo_ptr(dxr->fd, new_dxr); 12962aca58e1SMarko Zec fib_epoch_call(epoch_dxr_destroy, &dxr->epoch_ctx); 12972aca58e1SMarko Zec return (FLM_SUCCESS); 12982aca58e1SMarko Zec } 12992aca58e1SMarko Zec 1300ed541e20SMarko Zec FIB_PRINTF(LOG_NOTICE, dxr->fd, "fib_set_datapath_ptr() failed"); 13012aca58e1SMarko Zec dxr_destroy(new_dxr); 13022aca58e1SMarko Zec return (FLM_REBUILD); 13032aca58e1SMarko Zec } 13042aca58e1SMarko Zec 13052aca58e1SMarko Zec static uint8_t 13062aca58e1SMarko Zec dxr_get_pref(const struct rib_rtable_info *rinfo) 13072aca58e1SMarko Zec { 13082aca58e1SMarko Zec 13092aca58e1SMarko Zec /* Below bsearch4 up to 10 prefixes. Always supersedes dpdk_lpm4. */ 13102aca58e1SMarko Zec return (251); 13112aca58e1SMarko Zec } 13122aca58e1SMarko Zec 1313e7abe200SMarko Zec SYSCTL_DECL(_net_route_algo); 1314e7abe200SMarko Zec SYSCTL_NODE(_net_route_algo, OID_AUTO, dxr, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 1315e7abe200SMarko Zec "DXR tunables"); 1316e7abe200SMarko Zec 1317e7abe200SMarko Zec static int 1318e7abe200SMarko Zec sysctl_dxr_frag_limit(SYSCTL_HANDLER_ARGS) 1319e7abe200SMarko Zec { 1320e7abe200SMarko Zec char buf[8]; 1321e7abe200SMarko Zec int error, new, i; 1322e7abe200SMarko Zec 1323e7abe200SMarko Zec snprintf(buf, sizeof(buf), "%d.%02d%%", V_frag_limit / 100, 1324e7abe200SMarko Zec V_frag_limit % 100); 1325e7abe200SMarko Zec error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 1326e7abe200SMarko Zec if (error != 0 || req->newptr == NULL) 1327e7abe200SMarko Zec return (error); 1328e7abe200SMarko Zec if (!isdigit(*buf) && *buf != '.') 1329e7abe200SMarko Zec return (EINVAL); 1330e7abe200SMarko Zec for (i = 0, new = 0; isdigit(buf[i]) && i < sizeof(buf); i++) 1331e7abe200SMarko Zec new = new * 10 + buf[i] - '0'; 1332e7abe200SMarko Zec new *= 100; 1333e7abe200SMarko Zec if (buf[i++] == '.') { 1334e7abe200SMarko Zec if (!isdigit(buf[i])) 1335e7abe200SMarko Zec return (EINVAL); 1336e7abe200SMarko Zec new += (buf[i++] - '0') * 10; 1337e7abe200SMarko Zec if (isdigit(buf[i])) 1338e7abe200SMarko Zec new += buf[i++] - '0'; 1339e7abe200SMarko Zec } 1340e7abe200SMarko Zec if (new > 1000) 1341e7abe200SMarko Zec return (EINVAL); 1342e7abe200SMarko Zec V_frag_limit = new; 1343e7abe200SMarko Zec snprintf(buf, sizeof(buf), "%d.%02d%%", V_frag_limit / 100, 1344e7abe200SMarko Zec V_frag_limit % 100); 1345e7abe200SMarko Zec return (0); 1346e7abe200SMarko Zec } 1347e7abe200SMarko Zec 1348e7abe200SMarko Zec SYSCTL_PROC(_net_route_algo_dxr, OID_AUTO, frag_limit, 1349e7abe200SMarko Zec CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_VNET, 1350e7abe200SMarko Zec 0, 0, sysctl_dxr_frag_limit, "A", 1351e7abe200SMarko Zec "Fragmentation threshold to full rebuild"); 1352e7abe200SMarko Zec 13532aca58e1SMarko Zec static struct fib_lookup_module fib_dxr_mod = { 13542aca58e1SMarko Zec .flm_name = "dxr", 13552aca58e1SMarko Zec .flm_family = AF_INET, 13562aca58e1SMarko Zec .flm_init_cb = dxr_init, 13572aca58e1SMarko Zec .flm_destroy_cb = dxr_destroy, 13582aca58e1SMarko Zec .flm_dump_rib_item_cb = dxr_dump_rib_item, 13592aca58e1SMarko Zec .flm_dump_end_cb = dxr_dump_end, 13602aca58e1SMarko Zec .flm_change_rib_item_cb = dxr_change_rib_item, 13612aca58e1SMarko Zec .flm_change_rib_items_cb = dxr_change_rib_batch, 13622aca58e1SMarko Zec .flm_get_pref = dxr_get_pref, 13632aca58e1SMarko Zec }; 13642aca58e1SMarko Zec 13652aca58e1SMarko Zec static int 13662aca58e1SMarko Zec dxr_modevent(module_t mod, int type, void *unused) 13672aca58e1SMarko Zec { 13682aca58e1SMarko Zec int error; 13692aca58e1SMarko Zec 13702aca58e1SMarko Zec switch (type) { 13712aca58e1SMarko Zec case MOD_LOAD: 13722aca58e1SMarko Zec chunk_zone = uma_zcreate("dxr chunk", sizeof(struct chunk_desc), 13732aca58e1SMarko Zec NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 13742aca58e1SMarko Zec trie_zone = uma_zcreate("dxr trie", sizeof(struct trie_desc), 13752aca58e1SMarko Zec NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 13762aca58e1SMarko Zec fib_module_register(&fib_dxr_mod); 13772aca58e1SMarko Zec return(0); 13782aca58e1SMarko Zec case MOD_UNLOAD: 13792aca58e1SMarko Zec error = fib_module_unregister(&fib_dxr_mod); 13802aca58e1SMarko Zec if (error) 13812aca58e1SMarko Zec return (error); 13822aca58e1SMarko Zec uma_zdestroy(chunk_zone); 13832aca58e1SMarko Zec uma_zdestroy(trie_zone); 13842aca58e1SMarko Zec return(0); 13852aca58e1SMarko Zec default: 13862aca58e1SMarko Zec return(EOPNOTSUPP); 13872aca58e1SMarko Zec } 13882aca58e1SMarko Zec } 13892aca58e1SMarko Zec 13902aca58e1SMarko Zec static moduledata_t dxr_mod = {"fib_dxr", dxr_modevent, 0}; 13912aca58e1SMarko Zec 13922aca58e1SMarko Zec DECLARE_MODULE(fib_dxr, dxr_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 13932aca58e1SMarko Zec MODULE_VERSION(fib_dxr, 1); 1394