1a6663252SAlexander V. Chernikov /*- 2a6663252SAlexander V. Chernikov * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3a6663252SAlexander V. Chernikov * 4a6663252SAlexander V. Chernikov * Copyright (c) 2020 Alexander V. Chernikov 5a6663252SAlexander V. Chernikov * 6a6663252SAlexander V. Chernikov * Redistribution and use in source and binary forms, with or without 7a6663252SAlexander V. Chernikov * modification, are permitted provided that the following conditions 8a6663252SAlexander V. Chernikov * are met: 9a6663252SAlexander V. Chernikov * 1. Redistributions of source code must retain the above copyright 10a6663252SAlexander V. Chernikov * notice, this list of conditions and the following disclaimer. 11a6663252SAlexander V. Chernikov * 2. Redistributions in binary form must reproduce the above copyright 12a6663252SAlexander V. Chernikov * notice, this list of conditions and the following disclaimer in the 13a6663252SAlexander V. Chernikov * documentation and/or other materials provided with the distribution. 14a6663252SAlexander V. Chernikov * 15a6663252SAlexander V. Chernikov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16a6663252SAlexander V. Chernikov * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17a6663252SAlexander V. Chernikov * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18a6663252SAlexander V. Chernikov * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19a6663252SAlexander V. Chernikov * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20a6663252SAlexander V. Chernikov * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21a6663252SAlexander V. Chernikov * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22a6663252SAlexander V. Chernikov * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23a6663252SAlexander V. Chernikov * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24a6663252SAlexander V. Chernikov * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25a6663252SAlexander V. Chernikov * SUCH DAMAGE. 26a6663252SAlexander V. Chernikov */ 27a6663252SAlexander V. Chernikov 28a6663252SAlexander V. Chernikov #include <sys/cdefs.h> 29a6663252SAlexander V. Chernikov __FBSDID("$FreeBSD$"); 30a6663252SAlexander V. Chernikov #include "opt_inet.h" 31a6663252SAlexander V. Chernikov #include "opt_inet6.h" 32a6663252SAlexander V. Chernikov #include "opt_route.h" 33a6663252SAlexander V. Chernikov 34a6663252SAlexander V. Chernikov #include <sys/param.h> 35a6663252SAlexander V. Chernikov #include <sys/jail.h> 36a6663252SAlexander V. Chernikov #include <sys/systm.h> 37a6663252SAlexander V. Chernikov #include <sys/malloc.h> 38a6663252SAlexander V. Chernikov #include <sys/mbuf.h> 39a6663252SAlexander V. Chernikov #include <sys/socket.h> 40a6663252SAlexander V. Chernikov #include <sys/sysctl.h> 41a6663252SAlexander V. Chernikov #include <sys/syslog.h> 42a6663252SAlexander V. Chernikov #include <sys/sysproto.h> 43a6663252SAlexander V. Chernikov #include <sys/proc.h> 44a6663252SAlexander V. Chernikov #include <sys/domain.h> 45a6663252SAlexander V. Chernikov #include <sys/kernel.h> 46a6663252SAlexander V. Chernikov #include <sys/lock.h> 47a6663252SAlexander V. Chernikov #include <sys/rmlock.h> 48a6663252SAlexander V. Chernikov 49a6663252SAlexander V. Chernikov #include <net/if.h> 50a6663252SAlexander V. Chernikov #include <net/if_var.h> 51a6663252SAlexander V. Chernikov #include <net/if_dl.h> 52a6663252SAlexander V. Chernikov #include <net/route.h> 53da187ddbSAlexander V. Chernikov #include <net/route/route_ctl.h> 54e7d8af4fSAlexander V. Chernikov #include <net/route/route_var.h> 55a6663252SAlexander V. Chernikov #include <net/route/nhop_utils.h> 56a6663252SAlexander V. Chernikov #include <net/route/nhop.h> 57a6663252SAlexander V. Chernikov #include <net/route/nhop_var.h> 58682b902dSAlexander V. Chernikov #ifdef INET 59682b902dSAlexander V. Chernikov #include <netinet/in_fib.h> 60682b902dSAlexander V. Chernikov #endif 61682b902dSAlexander V. Chernikov #ifdef INET6 62682b902dSAlexander V. Chernikov #include <netinet6/in6_fib.h> 6336e15b71SAlexander V. Chernikov #include <netinet6/in6_var.h> 64682b902dSAlexander V. Chernikov #endif 65a6663252SAlexander V. Chernikov #include <net/vnet.h> 66a6663252SAlexander V. Chernikov 67a6663252SAlexander V. Chernikov /* 68a6663252SAlexander V. Chernikov * RIB helper functions. 69a6663252SAlexander V. Chernikov */ 70a6663252SAlexander V. Chernikov 71151ec796SAlexander V. Chernikov void 72151ec796SAlexander V. Chernikov rib_walk_ext_locked(struct rib_head *rnh, rib_walktree_f_t *wa_f, 73151ec796SAlexander V. Chernikov rib_walk_hook_f_t *hook_f, void *arg) 74151ec796SAlexander V. Chernikov { 75151ec796SAlexander V. Chernikov if (hook_f != NULL) 76151ec796SAlexander V. Chernikov hook_f(rnh, RIB_WALK_HOOK_PRE, arg); 77151ec796SAlexander V. Chernikov rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f, arg); 78151ec796SAlexander V. Chernikov if (hook_f != NULL) 79151ec796SAlexander V. Chernikov hook_f(rnh, RIB_WALK_HOOK_POST, arg); 80151ec796SAlexander V. Chernikov } 81151ec796SAlexander V. Chernikov 82a6663252SAlexander V. Chernikov /* 83a6663252SAlexander V. Chernikov * Calls @wa_f with @arg for each entry in the table specified by 84a6663252SAlexander V. Chernikov * @af and @fibnum. 85a6663252SAlexander V. Chernikov * 867511a638SAlexander V. Chernikov * @ss_t callback is called before and after the tree traversal 877511a638SAlexander V. Chernikov * while holding table lock. 887511a638SAlexander V. Chernikov * 897511a638SAlexander V. Chernikov * Table is traversed under read lock unless @wlock is set. 90a6663252SAlexander V. Chernikov */ 91a6663252SAlexander V. Chernikov void 923b1654cbSAlexander V. Chernikov rib_walk_ext_internal(struct rib_head *rnh, bool wlock, rib_walktree_f_t *wa_f, 937511a638SAlexander V. Chernikov rib_walk_hook_f_t *hook_f, void *arg) 94a6663252SAlexander V. Chernikov { 95a6663252SAlexander V. Chernikov RIB_RLOCK_TRACKER; 96a6663252SAlexander V. Chernikov 977511a638SAlexander V. Chernikov if (wlock) 987511a638SAlexander V. Chernikov RIB_WLOCK(rnh); 997511a638SAlexander V. Chernikov else 100a6663252SAlexander V. Chernikov RIB_RLOCK(rnh); 101151ec796SAlexander V. Chernikov rib_walk_ext_locked(rnh, wa_f, hook_f, arg); 1027511a638SAlexander V. Chernikov if (wlock) 1037511a638SAlexander V. Chernikov RIB_WUNLOCK(rnh); 1047511a638SAlexander V. Chernikov else 105a6663252SAlexander V. Chernikov RIB_RUNLOCK(rnh); 106a6663252SAlexander V. Chernikov } 107a6663252SAlexander V. Chernikov 1083b1654cbSAlexander V. Chernikov void 1093b1654cbSAlexander V. Chernikov rib_walk_ext(uint32_t fibnum, int family, bool wlock, rib_walktree_f_t *wa_f, 1103b1654cbSAlexander V. Chernikov rib_walk_hook_f_t *hook_f, void *arg) 1113b1654cbSAlexander V. Chernikov { 1123b1654cbSAlexander V. Chernikov struct rib_head *rnh; 1133b1654cbSAlexander V. Chernikov 1143b1654cbSAlexander V. Chernikov if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL) 1153b1654cbSAlexander V. Chernikov rib_walk_ext_internal(rnh, wlock, wa_f, hook_f, arg); 1163b1654cbSAlexander V. Chernikov } 1173b1654cbSAlexander V. Chernikov 118682b902dSAlexander V. Chernikov /* 1197511a638SAlexander V. Chernikov * Calls @wa_f with @arg for each entry in the table specified by 1207511a638SAlexander V. Chernikov * @af and @fibnum. 1217511a638SAlexander V. Chernikov * 1227511a638SAlexander V. Chernikov * Table is traversed under read lock unless @wlock is set. 1237511a638SAlexander V. Chernikov */ 1247511a638SAlexander V. Chernikov void 1257511a638SAlexander V. Chernikov rib_walk(uint32_t fibnum, int family, bool wlock, rib_walktree_f_t *wa_f, 1267511a638SAlexander V. Chernikov void *arg) 1277511a638SAlexander V. Chernikov { 1287511a638SAlexander V. Chernikov 1297511a638SAlexander V. Chernikov rib_walk_ext(fibnum, family, wlock, wa_f, NULL, arg); 1307511a638SAlexander V. Chernikov } 1317511a638SAlexander V. Chernikov 1327511a638SAlexander V. Chernikov /* 133f9668e42SAlexander V. Chernikov * Calls @wa_f with @arg for each entry in the table matching @prefix/@mask. 134f9668e42SAlexander V. Chernikov * 135f9668e42SAlexander V. Chernikov * The following flags are supported: 136f9668e42SAlexander V. Chernikov * RIB_FLAG_WLOCK: acquire exclusive lock 137f9668e42SAlexander V. Chernikov * RIB_FLAG_LOCKED: Assumes the table is already locked & skip locking 138f9668e42SAlexander V. Chernikov * 139f9668e42SAlexander V. Chernikov * By default, table is traversed under read lock. 140f9668e42SAlexander V. Chernikov */ 141f9668e42SAlexander V. Chernikov void 142f9668e42SAlexander V. Chernikov rib_walk_from(uint32_t fibnum, int family, uint32_t flags, struct sockaddr *prefix, 143f9668e42SAlexander V. Chernikov struct sockaddr *mask, rib_walktree_f_t *wa_f, void *arg) 144f9668e42SAlexander V. Chernikov { 145f9668e42SAlexander V. Chernikov RIB_RLOCK_TRACKER; 146f9668e42SAlexander V. Chernikov struct rib_head *rnh = rt_tables_get_rnh(fibnum, family); 147f9668e42SAlexander V. Chernikov 148f9668e42SAlexander V. Chernikov if (rnh == NULL) 149f9668e42SAlexander V. Chernikov return; 150f9668e42SAlexander V. Chernikov 151f9668e42SAlexander V. Chernikov if (flags & RIB_FLAG_WLOCK) 152f9668e42SAlexander V. Chernikov RIB_WLOCK(rnh); 153f9668e42SAlexander V. Chernikov else if (!(flags & RIB_FLAG_LOCKED)) 154f9668e42SAlexander V. Chernikov RIB_RLOCK(rnh); 155f9668e42SAlexander V. Chernikov 156f9668e42SAlexander V. Chernikov rnh->rnh_walktree_from(&rnh->head, prefix, mask, (walktree_f_t *)wa_f, arg); 157f9668e42SAlexander V. Chernikov 158f9668e42SAlexander V. Chernikov if (flags & RIB_FLAG_WLOCK) 159f9668e42SAlexander V. Chernikov RIB_WUNLOCK(rnh); 160f9668e42SAlexander V. Chernikov else if (!(flags & RIB_FLAG_LOCKED)) 161f9668e42SAlexander V. Chernikov RIB_RUNLOCK(rnh); 162f9668e42SAlexander V. Chernikov } 163f9668e42SAlexander V. Chernikov 164f9668e42SAlexander V. Chernikov /* 1657511a638SAlexander V. Chernikov * Iterates over all existing fibs in system calling 1667511a638SAlexander V. Chernikov * @hook_f function before/after traversing each fib. 1677511a638SAlexander V. Chernikov * Calls @wa_f function for each element in current fib. 1687511a638SAlexander V. Chernikov * If af is not AF_UNSPEC, iterates over fibs in particular 1697511a638SAlexander V. Chernikov * address family. 1707511a638SAlexander V. Chernikov */ 1717511a638SAlexander V. Chernikov void 1727511a638SAlexander V. Chernikov rib_foreach_table_walk(int family, bool wlock, rib_walktree_f_t *wa_f, 1737511a638SAlexander V. Chernikov rib_walk_hook_f_t *hook_f, void *arg) 1747511a638SAlexander V. Chernikov { 1757511a638SAlexander V. Chernikov 1767511a638SAlexander V. Chernikov for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1777511a638SAlexander V. Chernikov /* Do we want some specific family? */ 1787511a638SAlexander V. Chernikov if (family != AF_UNSPEC) { 1797511a638SAlexander V. Chernikov rib_walk_ext(fibnum, family, wlock, wa_f, hook_f, arg); 1807511a638SAlexander V. Chernikov continue; 1817511a638SAlexander V. Chernikov } 1827511a638SAlexander V. Chernikov 1837511a638SAlexander V. Chernikov for (int i = 1; i <= AF_MAX; i++) 1847511a638SAlexander V. Chernikov rib_walk_ext(fibnum, i, wlock, wa_f, hook_f, arg); 1857511a638SAlexander V. Chernikov } 1867511a638SAlexander V. Chernikov } 1877511a638SAlexander V. Chernikov 1887511a638SAlexander V. Chernikov /* 1897511a638SAlexander V. Chernikov * Iterates over all existing fibs in system and deletes each element 1907511a638SAlexander V. Chernikov * for which @filter_f function returns non-zero value. 1917511a638SAlexander V. Chernikov * If @family is not AF_UNSPEC, iterates over fibs in particular 1927511a638SAlexander V. Chernikov * address family. 1937511a638SAlexander V. Chernikov */ 1947511a638SAlexander V. Chernikov void 1957511a638SAlexander V. Chernikov rib_foreach_table_walk_del(int family, rib_filter_f_t *filter_f, void *arg) 1967511a638SAlexander V. Chernikov { 1977511a638SAlexander V. Chernikov 1987511a638SAlexander V. Chernikov for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1997511a638SAlexander V. Chernikov /* Do we want some specific family? */ 2007511a638SAlexander V. Chernikov if (family != AF_UNSPEC) { 2017511a638SAlexander V. Chernikov rib_walk_del(fibnum, family, filter_f, arg, 0); 2027511a638SAlexander V. Chernikov continue; 2037511a638SAlexander V. Chernikov } 2047511a638SAlexander V. Chernikov 2057511a638SAlexander V. Chernikov for (int i = 1; i <= AF_MAX; i++) 2067511a638SAlexander V. Chernikov rib_walk_del(fibnum, i, filter_f, arg, 0); 2077511a638SAlexander V. Chernikov } 2087511a638SAlexander V. Chernikov } 2097511a638SAlexander V. Chernikov 2107511a638SAlexander V. Chernikov 2117511a638SAlexander V. Chernikov /* 212682b902dSAlexander V. Chernikov * Wrapper for the control plane functions for performing af-agnostic 213682b902dSAlexander V. Chernikov * lookups. 214682b902dSAlexander V. Chernikov * @fibnum: fib to perform the lookup. 215682b902dSAlexander V. Chernikov * @dst: sockaddr with family and addr filled in. IPv6 addresses needs to be in 216682b902dSAlexander V. Chernikov * deembedded from. 217682b902dSAlexander V. Chernikov * @flags: fib(9) flags. 218682b902dSAlexander V. Chernikov * @flowid: flow id for path selection in multipath use case. 219682b902dSAlexander V. Chernikov * 220682b902dSAlexander V. Chernikov * Returns nhop_object or NULL. 221682b902dSAlexander V. Chernikov * 222682b902dSAlexander V. Chernikov * Requires NET_EPOCH. 223682b902dSAlexander V. Chernikov * 224682b902dSAlexander V. Chernikov */ 225682b902dSAlexander V. Chernikov struct nhop_object * 226682b902dSAlexander V. Chernikov rib_lookup(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags, 227682b902dSAlexander V. Chernikov uint32_t flowid) 228682b902dSAlexander V. Chernikov { 229682b902dSAlexander V. Chernikov struct nhop_object *nh; 230682b902dSAlexander V. Chernikov 231682b902dSAlexander V. Chernikov nh = NULL; 232682b902dSAlexander V. Chernikov 233682b902dSAlexander V. Chernikov switch (dst->sa_family) { 234682b902dSAlexander V. Chernikov #ifdef INET 235682b902dSAlexander V. Chernikov case AF_INET: 236682b902dSAlexander V. Chernikov { 237682b902dSAlexander V. Chernikov const struct sockaddr_in *a = (const struct sockaddr_in *)dst; 238682b902dSAlexander V. Chernikov nh = fib4_lookup(fibnum, a->sin_addr, 0, flags, flowid); 239682b902dSAlexander V. Chernikov break; 240682b902dSAlexander V. Chernikov } 241682b902dSAlexander V. Chernikov #endif 242682b902dSAlexander V. Chernikov #ifdef INET6 243682b902dSAlexander V. Chernikov case AF_INET6: 244682b902dSAlexander V. Chernikov { 245682b902dSAlexander V. Chernikov const struct sockaddr_in6 *a = (const struct sockaddr_in6*)dst; 246682b902dSAlexander V. Chernikov nh = fib6_lookup(fibnum, &a->sin6_addr, a->sin6_scope_id, 247682b902dSAlexander V. Chernikov flags, flowid); 248682b902dSAlexander V. Chernikov break; 249682b902dSAlexander V. Chernikov } 250682b902dSAlexander V. Chernikov #endif 251682b902dSAlexander V. Chernikov } 252682b902dSAlexander V. Chernikov 253682b902dSAlexander V. Chernikov return (nh); 254682b902dSAlexander V. Chernikov } 255fedeb08bSAlexander V. Chernikov 256fedeb08bSAlexander V. Chernikov #ifdef ROUTE_MPATH 257fedeb08bSAlexander V. Chernikov static void 258fedeb08bSAlexander V. Chernikov decompose_change_notification(struct rib_cmd_info *rc, route_notification_t *cb, 259fedeb08bSAlexander V. Chernikov void *cbdata) 260fedeb08bSAlexander V. Chernikov { 261fedeb08bSAlexander V. Chernikov uint32_t num_old, num_new; 262fedeb08bSAlexander V. Chernikov uint32_t nh_idx_old, nh_idx_new; 263fedeb08bSAlexander V. Chernikov struct weightened_nhop *wn_old, *wn_new; 264fedeb08bSAlexander V. Chernikov struct weightened_nhop tmp = { NULL, 0 }; 265fedeb08bSAlexander V. Chernikov uint32_t idx_old = 0, idx_new = 0; 266fedeb08bSAlexander V. Chernikov 267fedeb08bSAlexander V. Chernikov struct rib_cmd_info rc_del = { .rc_cmd = RTM_DELETE, .rc_rt = rc->rc_rt }; 268fedeb08bSAlexander V. Chernikov struct rib_cmd_info rc_add = { .rc_cmd = RTM_ADD, .rc_rt = rc->rc_rt }; 269fedeb08bSAlexander V. Chernikov 270fedeb08bSAlexander V. Chernikov if (NH_IS_NHGRP(rc->rc_nh_old)) { 271fedeb08bSAlexander V. Chernikov wn_old = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_old, &num_old); 272fedeb08bSAlexander V. Chernikov } else { 273fedeb08bSAlexander V. Chernikov tmp.nh = rc->rc_nh_old; 274fedeb08bSAlexander V. Chernikov tmp.weight = rc->rc_nh_weight; 275fedeb08bSAlexander V. Chernikov wn_old = &tmp; 276fedeb08bSAlexander V. Chernikov num_old = 1; 277fedeb08bSAlexander V. Chernikov } 278fedeb08bSAlexander V. Chernikov if (NH_IS_NHGRP(rc->rc_nh_new)) { 279fedeb08bSAlexander V. Chernikov wn_new = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_new, &num_new); 280fedeb08bSAlexander V. Chernikov } else { 281fedeb08bSAlexander V. Chernikov tmp.nh = rc->rc_nh_new; 282fedeb08bSAlexander V. Chernikov tmp.weight = rc->rc_nh_weight; 283fedeb08bSAlexander V. Chernikov wn_new = &tmp; 284fedeb08bSAlexander V. Chernikov num_new = 1; 285fedeb08bSAlexander V. Chernikov } 286fedeb08bSAlexander V. Chernikov 287fedeb08bSAlexander V. Chernikov /* Use the fact that each @wn array is sorted */ 288fedeb08bSAlexander V. Chernikov /* 289fedeb08bSAlexander V. Chernikov * Want to convert into set of add and delete operations 290fedeb08bSAlexander V. Chernikov * [1] -> [1, 2] = A{2} 291fedeb08bSAlexander V. Chernikov * [2] -> [1, 2] = A{1} 292fedeb08bSAlexander V. Chernikov * [1, 2, 4]->[1, 3, 4] = A{2}, D{3} 293fedeb08bSAlexander V. Chernikov * [1, 2, 4]->[1, 4] = D{2} 294fedeb08bSAlexander V. Chernikov * [1, 2, 4] -> [3, 4] = D{1}, C{2,3} OR C{1,3}, D{2} OR D{1},D{2},A{3} 295fedeb08bSAlexander V. Chernikov * [1, 2] -> [3, 4] = 296fedeb08bSAlexander V. Chernikov * 297fedeb08bSAlexander V. Chernikov */ 298fedeb08bSAlexander V. Chernikov idx_old = 0; 299fedeb08bSAlexander V. Chernikov while ((idx_old < num_old) && (idx_new < num_new)) { 300fedeb08bSAlexander V. Chernikov nh_idx_old = wn_old[idx_old].nh->nh_priv->nh_idx; 301fedeb08bSAlexander V. Chernikov nh_idx_new = wn_new[idx_new].nh->nh_priv->nh_idx; 302fedeb08bSAlexander V. Chernikov 303fedeb08bSAlexander V. Chernikov if (nh_idx_old == nh_idx_new) { 304fedeb08bSAlexander V. Chernikov if (wn_old[idx_old].weight != wn_new[idx_new].weight) { 305fedeb08bSAlexander V. Chernikov /* Update weight by providing del/add notifications */ 306fedeb08bSAlexander V. Chernikov rc_del.rc_nh_old = wn_old[idx_old].nh; 307fedeb08bSAlexander V. Chernikov rc_del.rc_nh_weight = wn_old[idx_old].weight; 308fedeb08bSAlexander V. Chernikov cb(&rc_del, cbdata); 309fedeb08bSAlexander V. Chernikov 310fedeb08bSAlexander V. Chernikov rc_add.rc_nh_new = wn_new[idx_new].nh; 311fedeb08bSAlexander V. Chernikov rc_add.rc_nh_weight = wn_new[idx_new].weight; 312fedeb08bSAlexander V. Chernikov cb(&rc_add, cbdata); 313fedeb08bSAlexander V. Chernikov } 314fedeb08bSAlexander V. Chernikov idx_old++; 315fedeb08bSAlexander V. Chernikov idx_new++; 316fedeb08bSAlexander V. Chernikov } else if (nh_idx_old < nh_idx_new) { 317fedeb08bSAlexander V. Chernikov /* 318fedeb08bSAlexander V. Chernikov * [1, ~2~, 4], [1, ~3~, 4] 319fedeb08bSAlexander V. Chernikov * [1, ~2~, 5], [1, ~3~, 4] 320fedeb08bSAlexander V. Chernikov * [1, ~2~], [1, ~3~, 4] 321fedeb08bSAlexander V. Chernikov */ 322fedeb08bSAlexander V. Chernikov if ((idx_old + 1 >= num_old) || 323fedeb08bSAlexander V. Chernikov (wn_old[idx_old + 1].nh->nh_priv->nh_idx > nh_idx_new)) { 324fedeb08bSAlexander V. Chernikov /* Add new unless the next old item is still <= new */ 325fedeb08bSAlexander V. Chernikov rc_add.rc_nh_new = wn_new[idx_new].nh; 326fedeb08bSAlexander V. Chernikov rc_add.rc_nh_weight = wn_new[idx_new].weight; 327fedeb08bSAlexander V. Chernikov cb(&rc_add, cbdata); 328fedeb08bSAlexander V. Chernikov idx_new++; 329fedeb08bSAlexander V. Chernikov } 330fedeb08bSAlexander V. Chernikov /* In any case, delete current old */ 331fedeb08bSAlexander V. Chernikov rc_del.rc_nh_old = wn_old[idx_old].nh; 332fedeb08bSAlexander V. Chernikov rc_del.rc_nh_weight = wn_old[idx_old].weight; 333fedeb08bSAlexander V. Chernikov cb(&rc_del, cbdata); 334fedeb08bSAlexander V. Chernikov idx_old++; 335fedeb08bSAlexander V. Chernikov } else { 336fedeb08bSAlexander V. Chernikov /* 337fedeb08bSAlexander V. Chernikov * nh_idx_old > nh_idx_new 338fedeb08bSAlexander V. Chernikov * 339fedeb08bSAlexander V. Chernikov * [1, ~3~, 4], [1, ~2~, 4] 340fedeb08bSAlexander V. Chernikov * [1, ~3~, 5], [1, ~2~, 4] 341fedeb08bSAlexander V. Chernikov * [1, ~3~, 4], [1, ~2~] 342fedeb08bSAlexander V. Chernikov */ 343fedeb08bSAlexander V. Chernikov if ((idx_new + 1 >= num_new) || 344fedeb08bSAlexander V. Chernikov (wn_new[idx_new + 1].nh->nh_priv->nh_idx > nh_idx_old)) { 345fedeb08bSAlexander V. Chernikov /* No next item or next item is > current one */ 346fedeb08bSAlexander V. Chernikov rc_add.rc_nh_new = wn_new[idx_new].nh; 347fedeb08bSAlexander V. Chernikov rc_add.rc_nh_weight = wn_new[idx_new].weight; 348fedeb08bSAlexander V. Chernikov cb(&rc_add, cbdata); 349fedeb08bSAlexander V. Chernikov idx_new++; 350fedeb08bSAlexander V. Chernikov } 351fedeb08bSAlexander V. Chernikov /* In any case, delete current old */ 352fedeb08bSAlexander V. Chernikov rc_del.rc_nh_old = wn_old[idx_old].nh; 353fedeb08bSAlexander V. Chernikov rc_del.rc_nh_weight = wn_old[idx_old].weight; 354fedeb08bSAlexander V. Chernikov cb(&rc_del, cbdata); 355fedeb08bSAlexander V. Chernikov idx_old++; 356fedeb08bSAlexander V. Chernikov } 357fedeb08bSAlexander V. Chernikov } 358fedeb08bSAlexander V. Chernikov 359fedeb08bSAlexander V. Chernikov while (idx_old < num_old) { 360fedeb08bSAlexander V. Chernikov rc_del.rc_nh_old = wn_old[idx_old].nh; 361fedeb08bSAlexander V. Chernikov rc_del.rc_nh_weight = wn_old[idx_old].weight; 362fedeb08bSAlexander V. Chernikov cb(&rc_del, cbdata); 363fedeb08bSAlexander V. Chernikov idx_old++; 364fedeb08bSAlexander V. Chernikov } 365fedeb08bSAlexander V. Chernikov 366fedeb08bSAlexander V. Chernikov while (idx_new < num_new) { 367fedeb08bSAlexander V. Chernikov rc_add.rc_nh_new = wn_new[idx_new].nh; 368fedeb08bSAlexander V. Chernikov rc_add.rc_nh_weight = wn_new[idx_new].weight; 369fedeb08bSAlexander V. Chernikov cb(&rc_add, cbdata); 370fedeb08bSAlexander V. Chernikov idx_new++; 371fedeb08bSAlexander V. Chernikov } 372fedeb08bSAlexander V. Chernikov } 373fedeb08bSAlexander V. Chernikov 374fedeb08bSAlexander V. Chernikov /* 375fedeb08bSAlexander V. Chernikov * Decompose multipath cmd info @rc into a list of add/del/change 376fedeb08bSAlexander V. Chernikov * single-path operations, calling @cb callback for each operation. 377fedeb08bSAlexander V. Chernikov * Assumes at least one of the nexthops in @rc is multipath. 378fedeb08bSAlexander V. Chernikov */ 379fedeb08bSAlexander V. Chernikov void 380fedeb08bSAlexander V. Chernikov rib_decompose_notification(struct rib_cmd_info *rc, route_notification_t *cb, 381fedeb08bSAlexander V. Chernikov void *cbdata) 382fedeb08bSAlexander V. Chernikov { 383fedeb08bSAlexander V. Chernikov struct weightened_nhop *wn; 384fedeb08bSAlexander V. Chernikov uint32_t num_nhops; 385fedeb08bSAlexander V. Chernikov struct rib_cmd_info rc_new; 386fedeb08bSAlexander V. Chernikov 387fedeb08bSAlexander V. Chernikov rc_new = *rc; 388fedeb08bSAlexander V. Chernikov DPRINTF("cb=%p cmd=%d nh_old=%p nh_new=%p", 389fedeb08bSAlexander V. Chernikov cb, rc->cmd, rc->nh_old, rc->nh_new); 390fedeb08bSAlexander V. Chernikov switch (rc->rc_cmd) { 391fedeb08bSAlexander V. Chernikov case RTM_ADD: 392fedeb08bSAlexander V. Chernikov if (!NH_IS_NHGRP(rc->rc_nh_new)) 393fedeb08bSAlexander V. Chernikov return; 394fedeb08bSAlexander V. Chernikov wn = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_new, &num_nhops); 395fedeb08bSAlexander V. Chernikov for (uint32_t i = 0; i < num_nhops; i++) { 396fedeb08bSAlexander V. Chernikov rc_new.rc_nh_new = wn[i].nh; 397fedeb08bSAlexander V. Chernikov rc_new.rc_nh_weight = wn[i].weight; 398fedeb08bSAlexander V. Chernikov cb(&rc_new, cbdata); 399fedeb08bSAlexander V. Chernikov } 400fedeb08bSAlexander V. Chernikov break; 401fedeb08bSAlexander V. Chernikov case RTM_DELETE: 402fedeb08bSAlexander V. Chernikov if (!NH_IS_NHGRP(rc->rc_nh_old)) 403fedeb08bSAlexander V. Chernikov return; 404fedeb08bSAlexander V. Chernikov wn = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_old, &num_nhops); 405fedeb08bSAlexander V. Chernikov for (uint32_t i = 0; i < num_nhops; i++) { 406fedeb08bSAlexander V. Chernikov rc_new.rc_nh_old = wn[i].nh; 407fedeb08bSAlexander V. Chernikov rc_new.rc_nh_weight = wn[i].weight; 408fedeb08bSAlexander V. Chernikov cb(&rc_new, cbdata); 409fedeb08bSAlexander V. Chernikov } 410fedeb08bSAlexander V. Chernikov break; 411fedeb08bSAlexander V. Chernikov case RTM_CHANGE: 412fedeb08bSAlexander V. Chernikov if (!NH_IS_NHGRP(rc->rc_nh_old) && !NH_IS_NHGRP(rc->rc_nh_new)) 413fedeb08bSAlexander V. Chernikov return; 414fedeb08bSAlexander V. Chernikov decompose_change_notification(rc, cb, cbdata); 415fedeb08bSAlexander V. Chernikov break; 416fedeb08bSAlexander V. Chernikov } 417fedeb08bSAlexander V. Chernikov } 418fedeb08bSAlexander V. Chernikov #endif 41936e15b71SAlexander V. Chernikov 42036e15b71SAlexander V. Chernikov #ifdef INET 42136e15b71SAlexander V. Chernikov /* 42236e15b71SAlexander V. Chernikov * Checks if the found key in the trie contains (<=) a prefix covering 42336e15b71SAlexander V. Chernikov * @paddr/@plen. 42436e15b71SAlexander V. Chernikov * Returns the most specific rtentry matching the condition or NULL. 42536e15b71SAlexander V. Chernikov */ 42636e15b71SAlexander V. Chernikov static struct rtentry * 42736e15b71SAlexander V. Chernikov get_inet_parent_prefix(uint32_t fibnum, struct in_addr addr, int plen) 42836e15b71SAlexander V. Chernikov { 42936e15b71SAlexander V. Chernikov struct route_nhop_data rnd; 43036e15b71SAlexander V. Chernikov struct rtentry *rt; 43136e15b71SAlexander V. Chernikov struct in_addr addr4; 43236e15b71SAlexander V. Chernikov uint32_t scopeid; 43336e15b71SAlexander V. Chernikov int parent_plen; 43436e15b71SAlexander V. Chernikov struct radix_node *rn; 43536e15b71SAlexander V. Chernikov 43636e15b71SAlexander V. Chernikov rt = fib4_lookup_rt(fibnum, addr, 0, NHR_UNLOCKED, &rnd); 437*f84c3010SAlexander V. Chernikov if (rt == NULL) 438*f84c3010SAlexander V. Chernikov return (NULL); 439*f84c3010SAlexander V. Chernikov 44036e15b71SAlexander V. Chernikov rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid); 44136e15b71SAlexander V. Chernikov if (parent_plen <= plen) 44236e15b71SAlexander V. Chernikov return (rt); 44336e15b71SAlexander V. Chernikov 44436e15b71SAlexander V. Chernikov /* 44536e15b71SAlexander V. Chernikov * There can be multiple prefixes associated with the found key: 44636e15b71SAlexander V. Chernikov * 10.0.0.0 -> 10.0.0.0/24, 10.0.0.0/23, 10.0.0.0/22, etc. 44736e15b71SAlexander V. Chernikov * All such prefixes are linked via rn_dupedkey, from most specific 44836e15b71SAlexander V. Chernikov * to least specific. Iterate over them to check if any of these 44936e15b71SAlexander V. Chernikov * prefixes are wider than desired plen. 45036e15b71SAlexander V. Chernikov */ 45136e15b71SAlexander V. Chernikov rn = (struct radix_node *)rt; 45236e15b71SAlexander V. Chernikov while ((rn = rn_nextprefix(rn)) != NULL) { 45336e15b71SAlexander V. Chernikov rt = RNTORT(rn); 45436e15b71SAlexander V. Chernikov rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid); 45536e15b71SAlexander V. Chernikov if (parent_plen <= plen) 45636e15b71SAlexander V. Chernikov return (rt); 45736e15b71SAlexander V. Chernikov } 45836e15b71SAlexander V. Chernikov 45936e15b71SAlexander V. Chernikov return (NULL); 46036e15b71SAlexander V. Chernikov } 46136e15b71SAlexander V. Chernikov 46236e15b71SAlexander V. Chernikov /* 46336e15b71SAlexander V. Chernikov * Returns the most specific prefix containing (>) @paddr/plen. 46436e15b71SAlexander V. Chernikov */ 46536e15b71SAlexander V. Chernikov struct rtentry * 46636e15b71SAlexander V. Chernikov rt_get_inet_parent(uint32_t fibnum, struct in_addr addr, int plen) 46736e15b71SAlexander V. Chernikov { 46836e15b71SAlexander V. Chernikov struct in_addr lookup_addr = { .s_addr = INADDR_BROADCAST }; 46936e15b71SAlexander V. Chernikov struct in_addr addr4 = addr; 47036e15b71SAlexander V. Chernikov struct in_addr mask4; 47136e15b71SAlexander V. Chernikov struct rtentry *rt; 47236e15b71SAlexander V. Chernikov 47336e15b71SAlexander V. Chernikov while (plen-- > 0) { 47436e15b71SAlexander V. Chernikov /* Calculate wider mask & new key to lookup */ 47536e15b71SAlexander V. Chernikov mask4.s_addr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0); 47636e15b71SAlexander V. Chernikov addr4.s_addr = htonl(ntohl(addr4.s_addr) & ntohl(mask4.s_addr)); 47736e15b71SAlexander V. Chernikov if (addr4.s_addr == lookup_addr.s_addr) { 47836e15b71SAlexander V. Chernikov /* Skip lookup if the key is the same */ 47936e15b71SAlexander V. Chernikov continue; 48036e15b71SAlexander V. Chernikov } 48136e15b71SAlexander V. Chernikov lookup_addr = addr4; 48236e15b71SAlexander V. Chernikov 48336e15b71SAlexander V. Chernikov rt = get_inet_parent_prefix(fibnum, lookup_addr, plen); 48436e15b71SAlexander V. Chernikov if (rt != NULL) 48536e15b71SAlexander V. Chernikov return (rt); 48636e15b71SAlexander V. Chernikov } 48736e15b71SAlexander V. Chernikov 48836e15b71SAlexander V. Chernikov return (NULL); 48936e15b71SAlexander V. Chernikov } 49036e15b71SAlexander V. Chernikov #endif 49136e15b71SAlexander V. Chernikov 49236e15b71SAlexander V. Chernikov #ifdef INET6 49336e15b71SAlexander V. Chernikov /* 49436e15b71SAlexander V. Chernikov * Checks if the found key in the trie contains (<=) a prefix covering 49536e15b71SAlexander V. Chernikov * @paddr/@plen. 49636e15b71SAlexander V. Chernikov * Returns the most specific rtentry matching the condition or NULL. 49736e15b71SAlexander V. Chernikov */ 49836e15b71SAlexander V. Chernikov static struct rtentry * 49936e15b71SAlexander V. Chernikov get_inet6_parent_prefix(uint32_t fibnum, const struct in6_addr *paddr, int plen) 50036e15b71SAlexander V. Chernikov { 50136e15b71SAlexander V. Chernikov struct route_nhop_data rnd; 50236e15b71SAlexander V. Chernikov struct rtentry *rt; 50336e15b71SAlexander V. Chernikov struct in6_addr addr6; 50436e15b71SAlexander V. Chernikov uint32_t scopeid; 50536e15b71SAlexander V. Chernikov int parent_plen; 50636e15b71SAlexander V. Chernikov struct radix_node *rn; 50736e15b71SAlexander V. Chernikov 50836e15b71SAlexander V. Chernikov rt = fib6_lookup_rt(fibnum, paddr, 0, NHR_UNLOCKED, &rnd); 509*f84c3010SAlexander V. Chernikov if (rt == NULL) 510*f84c3010SAlexander V. Chernikov return (NULL); 511*f84c3010SAlexander V. Chernikov 51236e15b71SAlexander V. Chernikov rt_get_inet6_prefix_plen(rt, &addr6, &parent_plen, &scopeid); 51336e15b71SAlexander V. Chernikov if (parent_plen <= plen) 51436e15b71SAlexander V. Chernikov return (rt); 51536e15b71SAlexander V. Chernikov 51636e15b71SAlexander V. Chernikov /* 51736e15b71SAlexander V. Chernikov * There can be multiple prefixes associated with the found key: 51836e15b71SAlexander V. Chernikov * 2001:db8:1::/64 -> 2001:db8:1::/56, 2001:db8:1::/48, etc. 51936e15b71SAlexander V. Chernikov * All such prefixes are linked via rn_dupedkey, from most specific 52036e15b71SAlexander V. Chernikov * to least specific. Iterate over them to check if any of these 52136e15b71SAlexander V. Chernikov * prefixes are wider than desired plen. 52236e15b71SAlexander V. Chernikov */ 52336e15b71SAlexander V. Chernikov rn = (struct radix_node *)rt; 52436e15b71SAlexander V. Chernikov while ((rn = rn_nextprefix(rn)) != NULL) { 52536e15b71SAlexander V. Chernikov rt = RNTORT(rn); 52636e15b71SAlexander V. Chernikov rt_get_inet6_prefix_plen(rt, &addr6, &parent_plen, &scopeid); 52736e15b71SAlexander V. Chernikov if (parent_plen <= plen) 52836e15b71SAlexander V. Chernikov return (rt); 52936e15b71SAlexander V. Chernikov } 53036e15b71SAlexander V. Chernikov 53136e15b71SAlexander V. Chernikov return (NULL); 53236e15b71SAlexander V. Chernikov } 53336e15b71SAlexander V. Chernikov 53436e15b71SAlexander V. Chernikov static void 53536e15b71SAlexander V. Chernikov ipv6_writemask(struct in6_addr *addr6, uint8_t mask) 53636e15b71SAlexander V. Chernikov { 53736e15b71SAlexander V. Chernikov uint32_t *cp; 53836e15b71SAlexander V. Chernikov 53936e15b71SAlexander V. Chernikov for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32) 54036e15b71SAlexander V. Chernikov *cp++ = 0xFFFFFFFF; 54136e15b71SAlexander V. Chernikov if (mask > 0) 54236e15b71SAlexander V. Chernikov *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0); 54336e15b71SAlexander V. Chernikov } 54436e15b71SAlexander V. Chernikov 54536e15b71SAlexander V. Chernikov /* 54636e15b71SAlexander V. Chernikov * Returns the most specific prefix containing (>) @paddr/plen. 54736e15b71SAlexander V. Chernikov */ 54836e15b71SAlexander V. Chernikov struct rtentry * 54936e15b71SAlexander V. Chernikov rt_get_inet6_parent(uint32_t fibnum, const struct in6_addr *paddr, int plen) 55036e15b71SAlexander V. Chernikov { 55136e15b71SAlexander V. Chernikov struct in6_addr lookup_addr = in6mask128; 55236e15b71SAlexander V. Chernikov struct in6_addr addr6 = *paddr; 55336e15b71SAlexander V. Chernikov struct in6_addr mask6; 55436e15b71SAlexander V. Chernikov struct rtentry *rt; 55536e15b71SAlexander V. Chernikov 55636e15b71SAlexander V. Chernikov while (plen-- > 0) { 55736e15b71SAlexander V. Chernikov /* Calculate wider mask & new key to lookup */ 55836e15b71SAlexander V. Chernikov ipv6_writemask(&mask6, plen); 55936e15b71SAlexander V. Chernikov IN6_MASK_ADDR(&addr6, &mask6); 56036e15b71SAlexander V. Chernikov if (IN6_ARE_ADDR_EQUAL(&addr6, &lookup_addr)) { 56136e15b71SAlexander V. Chernikov /* Skip lookup if the key is the same */ 56236e15b71SAlexander V. Chernikov continue; 56336e15b71SAlexander V. Chernikov } 56436e15b71SAlexander V. Chernikov lookup_addr = addr6; 56536e15b71SAlexander V. Chernikov 56636e15b71SAlexander V. Chernikov rt = get_inet6_parent_prefix(fibnum, &lookup_addr, plen); 56736e15b71SAlexander V. Chernikov if (rt != NULL) 56836e15b71SAlexander V. Chernikov return (rt); 56936e15b71SAlexander V. Chernikov } 57036e15b71SAlexander V. Chernikov 57136e15b71SAlexander V. Chernikov return (NULL); 57236e15b71SAlexander V. Chernikov } 57336e15b71SAlexander V. Chernikov #endif 574