1a6663252SAlexander V. Chernikov /*- 2a6663252SAlexander V. Chernikov * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3a6663252SAlexander V. Chernikov * 4a6663252SAlexander V. Chernikov * Copyright (c) 2020 Alexander V. Chernikov 5a6663252SAlexander V. Chernikov * 6a6663252SAlexander V. Chernikov * Redistribution and use in source and binary forms, with or without 7a6663252SAlexander V. Chernikov * modification, are permitted provided that the following conditions 8a6663252SAlexander V. Chernikov * are met: 9a6663252SAlexander V. Chernikov * 1. Redistributions of source code must retain the above copyright 10a6663252SAlexander V. Chernikov * notice, this list of conditions and the following disclaimer. 11a6663252SAlexander V. Chernikov * 2. Redistributions in binary form must reproduce the above copyright 12a6663252SAlexander V. Chernikov * notice, this list of conditions and the following disclaimer in the 13a6663252SAlexander V. Chernikov * documentation and/or other materials provided with the distribution. 14a6663252SAlexander V. Chernikov * 15a6663252SAlexander V. Chernikov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16a6663252SAlexander V. Chernikov * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17a6663252SAlexander V. Chernikov * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18a6663252SAlexander V. Chernikov * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19a6663252SAlexander V. Chernikov * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20a6663252SAlexander V. Chernikov * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21a6663252SAlexander V. Chernikov * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22a6663252SAlexander V. Chernikov * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23a6663252SAlexander V. Chernikov * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24a6663252SAlexander V. Chernikov * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25a6663252SAlexander V. Chernikov * SUCH DAMAGE. 26a6663252SAlexander V. Chernikov */ 27a6663252SAlexander V. Chernikov 28a6663252SAlexander V. Chernikov #include <sys/cdefs.h> 29a6663252SAlexander V. Chernikov __FBSDID("$FreeBSD$"); 30a6663252SAlexander V. Chernikov #include "opt_inet.h" 31a6663252SAlexander V. Chernikov #include "opt_inet6.h" 32a6663252SAlexander V. Chernikov #include "opt_route.h" 33a6663252SAlexander V. Chernikov 34a6663252SAlexander V. Chernikov #include <sys/param.h> 35a6663252SAlexander V. Chernikov #include <sys/jail.h> 36a6663252SAlexander V. Chernikov #include <sys/systm.h> 37a6663252SAlexander V. Chernikov #include <sys/malloc.h> 38a6663252SAlexander V. Chernikov #include <sys/mbuf.h> 39a6663252SAlexander V. Chernikov #include <sys/socket.h> 40a6663252SAlexander V. Chernikov #include <sys/sysctl.h> 41a6663252SAlexander V. Chernikov #include <sys/syslog.h> 42a6663252SAlexander V. Chernikov #include <sys/sysproto.h> 43a6663252SAlexander V. Chernikov #include <sys/proc.h> 44a6663252SAlexander V. Chernikov #include <sys/domain.h> 45a6663252SAlexander V. Chernikov #include <sys/kernel.h> 46a6663252SAlexander V. Chernikov #include <sys/lock.h> 47a6663252SAlexander V. Chernikov #include <sys/rmlock.h> 48a6663252SAlexander V. Chernikov 49a6663252SAlexander V. Chernikov #include <net/if.h> 50a6663252SAlexander V. Chernikov #include <net/if_var.h> 51a6663252SAlexander V. Chernikov #include <net/if_dl.h> 52a6663252SAlexander V. Chernikov #include <net/route.h> 53da187ddbSAlexander V. Chernikov #include <net/route/route_ctl.h> 54e7d8af4fSAlexander V. Chernikov #include <net/route/route_var.h> 55a6663252SAlexander V. Chernikov #include <net/route/nhop_utils.h> 56a6663252SAlexander V. Chernikov #include <net/route/nhop.h> 57a6663252SAlexander V. Chernikov #include <net/route/nhop_var.h> 58682b902dSAlexander V. Chernikov #ifdef INET 59682b902dSAlexander V. Chernikov #include <netinet/in_fib.h> 60682b902dSAlexander V. Chernikov #endif 61682b902dSAlexander V. Chernikov #ifdef INET6 62682b902dSAlexander V. Chernikov #include <netinet6/in6_fib.h> 6336e15b71SAlexander V. Chernikov #include <netinet6/in6_var.h> 64682b902dSAlexander V. Chernikov #endif 65a6663252SAlexander V. Chernikov #include <net/vnet.h> 66a6663252SAlexander V. Chernikov 678010b7a7SAlexander V. Chernikov #define DEBUG_MOD_NAME rt_helpers 688010b7a7SAlexander V. Chernikov #define DEBUG_MAX_LEVEL LOG_DEBUG2 698010b7a7SAlexander V. Chernikov #include <net/route/route_debug.h> 708010b7a7SAlexander V. Chernikov _DECLARE_DEBUG(LOG_INFO); 718010b7a7SAlexander V. Chernikov 72a6663252SAlexander V. Chernikov /* 73a6663252SAlexander V. Chernikov * RIB helper functions. 74a6663252SAlexander V. Chernikov */ 75a6663252SAlexander V. Chernikov 76151ec796SAlexander V. Chernikov void 77151ec796SAlexander V. Chernikov rib_walk_ext_locked(struct rib_head *rnh, rib_walktree_f_t *wa_f, 78151ec796SAlexander V. Chernikov rib_walk_hook_f_t *hook_f, void *arg) 79151ec796SAlexander V. Chernikov { 80151ec796SAlexander V. Chernikov if (hook_f != NULL) 81151ec796SAlexander V. Chernikov hook_f(rnh, RIB_WALK_HOOK_PRE, arg); 82151ec796SAlexander V. Chernikov rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f, arg); 83151ec796SAlexander V. Chernikov if (hook_f != NULL) 84151ec796SAlexander V. Chernikov hook_f(rnh, RIB_WALK_HOOK_POST, arg); 85151ec796SAlexander V. Chernikov } 86151ec796SAlexander V. Chernikov 87a6663252SAlexander V. Chernikov /* 88a6663252SAlexander V. Chernikov * Calls @wa_f with @arg for each entry in the table specified by 89a6663252SAlexander V. Chernikov * @af and @fibnum. 90a6663252SAlexander V. Chernikov * 917511a638SAlexander V. Chernikov * @ss_t callback is called before and after the tree traversal 927511a638SAlexander V. Chernikov * while holding table lock. 937511a638SAlexander V. Chernikov * 947511a638SAlexander V. Chernikov * Table is traversed under read lock unless @wlock is set. 95a6663252SAlexander V. Chernikov */ 96a6663252SAlexander V. Chernikov void 973b1654cbSAlexander V. Chernikov rib_walk_ext_internal(struct rib_head *rnh, bool wlock, rib_walktree_f_t *wa_f, 987511a638SAlexander V. Chernikov rib_walk_hook_f_t *hook_f, void *arg) 99a6663252SAlexander V. Chernikov { 100a6663252SAlexander V. Chernikov RIB_RLOCK_TRACKER; 101a6663252SAlexander V. Chernikov 1027511a638SAlexander V. Chernikov if (wlock) 1037511a638SAlexander V. Chernikov RIB_WLOCK(rnh); 1047511a638SAlexander V. Chernikov else 105a6663252SAlexander V. Chernikov RIB_RLOCK(rnh); 106151ec796SAlexander V. Chernikov rib_walk_ext_locked(rnh, wa_f, hook_f, arg); 1077511a638SAlexander V. Chernikov if (wlock) 1087511a638SAlexander V. Chernikov RIB_WUNLOCK(rnh); 1097511a638SAlexander V. Chernikov else 110a6663252SAlexander V. Chernikov RIB_RUNLOCK(rnh); 111a6663252SAlexander V. Chernikov } 112a6663252SAlexander V. Chernikov 1133b1654cbSAlexander V. Chernikov void 1143b1654cbSAlexander V. Chernikov rib_walk_ext(uint32_t fibnum, int family, bool wlock, rib_walktree_f_t *wa_f, 1153b1654cbSAlexander V. Chernikov rib_walk_hook_f_t *hook_f, void *arg) 1163b1654cbSAlexander V. Chernikov { 1173b1654cbSAlexander V. Chernikov struct rib_head *rnh; 1183b1654cbSAlexander V. Chernikov 1193b1654cbSAlexander V. Chernikov if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL) 1203b1654cbSAlexander V. Chernikov rib_walk_ext_internal(rnh, wlock, wa_f, hook_f, arg); 1213b1654cbSAlexander V. Chernikov } 1223b1654cbSAlexander V. Chernikov 123682b902dSAlexander V. Chernikov /* 1247511a638SAlexander V. Chernikov * Calls @wa_f with @arg for each entry in the table specified by 1257511a638SAlexander V. Chernikov * @af and @fibnum. 1267511a638SAlexander V. Chernikov * 1277511a638SAlexander V. Chernikov * Table is traversed under read lock unless @wlock is set. 1287511a638SAlexander V. Chernikov */ 1297511a638SAlexander V. Chernikov void 1307511a638SAlexander V. Chernikov rib_walk(uint32_t fibnum, int family, bool wlock, rib_walktree_f_t *wa_f, 1317511a638SAlexander V. Chernikov void *arg) 1327511a638SAlexander V. Chernikov { 1337511a638SAlexander V. Chernikov 1347511a638SAlexander V. Chernikov rib_walk_ext(fibnum, family, wlock, wa_f, NULL, arg); 1357511a638SAlexander V. Chernikov } 1367511a638SAlexander V. Chernikov 1377511a638SAlexander V. Chernikov /* 138f9668e42SAlexander V. Chernikov * Calls @wa_f with @arg for each entry in the table matching @prefix/@mask. 139f9668e42SAlexander V. Chernikov * 140f9668e42SAlexander V. Chernikov * The following flags are supported: 141f9668e42SAlexander V. Chernikov * RIB_FLAG_WLOCK: acquire exclusive lock 142f9668e42SAlexander V. Chernikov * RIB_FLAG_LOCKED: Assumes the table is already locked & skip locking 143f9668e42SAlexander V. Chernikov * 144f9668e42SAlexander V. Chernikov * By default, table is traversed under read lock. 145f9668e42SAlexander V. Chernikov */ 146f9668e42SAlexander V. Chernikov void 147f9668e42SAlexander V. Chernikov rib_walk_from(uint32_t fibnum, int family, uint32_t flags, struct sockaddr *prefix, 148f9668e42SAlexander V. Chernikov struct sockaddr *mask, rib_walktree_f_t *wa_f, void *arg) 149f9668e42SAlexander V. Chernikov { 150f9668e42SAlexander V. Chernikov RIB_RLOCK_TRACKER; 151f9668e42SAlexander V. Chernikov struct rib_head *rnh = rt_tables_get_rnh(fibnum, family); 152f9668e42SAlexander V. Chernikov 153f9668e42SAlexander V. Chernikov if (rnh == NULL) 154f9668e42SAlexander V. Chernikov return; 155f9668e42SAlexander V. Chernikov 156f9668e42SAlexander V. Chernikov if (flags & RIB_FLAG_WLOCK) 157f9668e42SAlexander V. Chernikov RIB_WLOCK(rnh); 158f9668e42SAlexander V. Chernikov else if (!(flags & RIB_FLAG_LOCKED)) 159f9668e42SAlexander V. Chernikov RIB_RLOCK(rnh); 160f9668e42SAlexander V. Chernikov 161f9668e42SAlexander V. Chernikov rnh->rnh_walktree_from(&rnh->head, prefix, mask, (walktree_f_t *)wa_f, arg); 162f9668e42SAlexander V. Chernikov 163f9668e42SAlexander V. Chernikov if (flags & RIB_FLAG_WLOCK) 164f9668e42SAlexander V. Chernikov RIB_WUNLOCK(rnh); 165f9668e42SAlexander V. Chernikov else if (!(flags & RIB_FLAG_LOCKED)) 166f9668e42SAlexander V. Chernikov RIB_RUNLOCK(rnh); 167f9668e42SAlexander V. Chernikov } 168f9668e42SAlexander V. Chernikov 169f9668e42SAlexander V. Chernikov /* 1707511a638SAlexander V. Chernikov * Iterates over all existing fibs in system calling 1717511a638SAlexander V. Chernikov * @hook_f function before/after traversing each fib. 1727511a638SAlexander V. Chernikov * Calls @wa_f function for each element in current fib. 1737511a638SAlexander V. Chernikov * If af is not AF_UNSPEC, iterates over fibs in particular 1747511a638SAlexander V. Chernikov * address family. 1757511a638SAlexander V. Chernikov */ 1767511a638SAlexander V. Chernikov void 1777511a638SAlexander V. Chernikov rib_foreach_table_walk(int family, bool wlock, rib_walktree_f_t *wa_f, 1787511a638SAlexander V. Chernikov rib_walk_hook_f_t *hook_f, void *arg) 1797511a638SAlexander V. Chernikov { 1807511a638SAlexander V. Chernikov 1817511a638SAlexander V. Chernikov for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1827511a638SAlexander V. Chernikov /* Do we want some specific family? */ 1837511a638SAlexander V. Chernikov if (family != AF_UNSPEC) { 1847511a638SAlexander V. Chernikov rib_walk_ext(fibnum, family, wlock, wa_f, hook_f, arg); 1857511a638SAlexander V. Chernikov continue; 1867511a638SAlexander V. Chernikov } 1877511a638SAlexander V. Chernikov 1887511a638SAlexander V. Chernikov for (int i = 1; i <= AF_MAX; i++) 1897511a638SAlexander V. Chernikov rib_walk_ext(fibnum, i, wlock, wa_f, hook_f, arg); 1907511a638SAlexander V. Chernikov } 1917511a638SAlexander V. Chernikov } 1927511a638SAlexander V. Chernikov 1937511a638SAlexander V. Chernikov /* 1947511a638SAlexander V. Chernikov * Iterates over all existing fibs in system and deletes each element 1957511a638SAlexander V. Chernikov * for which @filter_f function returns non-zero value. 1967511a638SAlexander V. Chernikov * If @family is not AF_UNSPEC, iterates over fibs in particular 1977511a638SAlexander V. Chernikov * address family. 1987511a638SAlexander V. Chernikov */ 1997511a638SAlexander V. Chernikov void 2007511a638SAlexander V. Chernikov rib_foreach_table_walk_del(int family, rib_filter_f_t *filter_f, void *arg) 2017511a638SAlexander V. Chernikov { 2027511a638SAlexander V. Chernikov 2037511a638SAlexander V. Chernikov for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 2047511a638SAlexander V. Chernikov /* Do we want some specific family? */ 2057511a638SAlexander V. Chernikov if (family != AF_UNSPEC) { 2067511a638SAlexander V. Chernikov rib_walk_del(fibnum, family, filter_f, arg, 0); 2077511a638SAlexander V. Chernikov continue; 2087511a638SAlexander V. Chernikov } 2097511a638SAlexander V. Chernikov 2107511a638SAlexander V. Chernikov for (int i = 1; i <= AF_MAX; i++) 2117511a638SAlexander V. Chernikov rib_walk_del(fibnum, i, filter_f, arg, 0); 2127511a638SAlexander V. Chernikov } 2137511a638SAlexander V. Chernikov } 2147511a638SAlexander V. Chernikov 2157511a638SAlexander V. Chernikov 2167511a638SAlexander V. Chernikov /* 217682b902dSAlexander V. Chernikov * Wrapper for the control plane functions for performing af-agnostic 218682b902dSAlexander V. Chernikov * lookups. 219682b902dSAlexander V. Chernikov * @fibnum: fib to perform the lookup. 220682b902dSAlexander V. Chernikov * @dst: sockaddr with family and addr filled in. IPv6 addresses needs to be in 221682b902dSAlexander V. Chernikov * deembedded from. 222682b902dSAlexander V. Chernikov * @flags: fib(9) flags. 223682b902dSAlexander V. Chernikov * @flowid: flow id for path selection in multipath use case. 224682b902dSAlexander V. Chernikov * 225682b902dSAlexander V. Chernikov * Returns nhop_object or NULL. 226682b902dSAlexander V. Chernikov * 227682b902dSAlexander V. Chernikov * Requires NET_EPOCH. 228682b902dSAlexander V. Chernikov * 229682b902dSAlexander V. Chernikov */ 230682b902dSAlexander V. Chernikov struct nhop_object * 231682b902dSAlexander V. Chernikov rib_lookup(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags, 232682b902dSAlexander V. Chernikov uint32_t flowid) 233682b902dSAlexander V. Chernikov { 234682b902dSAlexander V. Chernikov struct nhop_object *nh; 235682b902dSAlexander V. Chernikov 236682b902dSAlexander V. Chernikov nh = NULL; 237682b902dSAlexander V. Chernikov 238682b902dSAlexander V. Chernikov switch (dst->sa_family) { 239682b902dSAlexander V. Chernikov #ifdef INET 240682b902dSAlexander V. Chernikov case AF_INET: 241682b902dSAlexander V. Chernikov { 242682b902dSAlexander V. Chernikov const struct sockaddr_in *a = (const struct sockaddr_in *)dst; 243682b902dSAlexander V. Chernikov nh = fib4_lookup(fibnum, a->sin_addr, 0, flags, flowid); 244682b902dSAlexander V. Chernikov break; 245682b902dSAlexander V. Chernikov } 246682b902dSAlexander V. Chernikov #endif 247682b902dSAlexander V. Chernikov #ifdef INET6 248682b902dSAlexander V. Chernikov case AF_INET6: 249682b902dSAlexander V. Chernikov { 250682b902dSAlexander V. Chernikov const struct sockaddr_in6 *a = (const struct sockaddr_in6*)dst; 251682b902dSAlexander V. Chernikov nh = fib6_lookup(fibnum, &a->sin6_addr, a->sin6_scope_id, 252682b902dSAlexander V. Chernikov flags, flowid); 253682b902dSAlexander V. Chernikov break; 254682b902dSAlexander V. Chernikov } 255682b902dSAlexander V. Chernikov #endif 256682b902dSAlexander V. Chernikov } 257682b902dSAlexander V. Chernikov 258682b902dSAlexander V. Chernikov return (nh); 259682b902dSAlexander V. Chernikov } 260fedeb08bSAlexander V. Chernikov 261db4b4021SMateusz Guzik #ifdef ROUTE_MPATH 2628010b7a7SAlexander V. Chernikov static void 2638010b7a7SAlexander V. Chernikov notify_add(struct rib_cmd_info *rc, const struct weightened_nhop *wn_src, 2648010b7a7SAlexander V. Chernikov route_notification_t *cb, void *cbdata) { 2658010b7a7SAlexander V. Chernikov rc->rc_nh_new = wn_src->nh; 2668010b7a7SAlexander V. Chernikov rc->rc_nh_weight = wn_src->weight; 2678010b7a7SAlexander V. Chernikov #if DEBUG_MAX_LEVEL >= LOG_DEBUG2 2688010b7a7SAlexander V. Chernikov char nhbuf[NHOP_PRINT_BUFSIZE]; 2698010b7a7SAlexander V. Chernikov FIB_NH_LOG(LOG_DEBUG2, wn_src->nh, "RTM_ADD for %s @ w=%u", 2708010b7a7SAlexander V. Chernikov nhop_print_buf(wn_src->nh, nhbuf, sizeof(nhbuf)), wn_src->weight); 2718010b7a7SAlexander V. Chernikov #endif 2728010b7a7SAlexander V. Chernikov cb(rc, cbdata); 2738010b7a7SAlexander V. Chernikov } 2748010b7a7SAlexander V. Chernikov 2758010b7a7SAlexander V. Chernikov static void 2768010b7a7SAlexander V. Chernikov notify_del(struct rib_cmd_info *rc, const struct weightened_nhop *wn_src, 2778010b7a7SAlexander V. Chernikov route_notification_t *cb, void *cbdata) { 2788010b7a7SAlexander V. Chernikov rc->rc_nh_old = wn_src->nh; 2798010b7a7SAlexander V. Chernikov rc->rc_nh_weight = wn_src->weight; 2808010b7a7SAlexander V. Chernikov #if DEBUG_MAX_LEVEL >= LOG_DEBUG2 2818010b7a7SAlexander V. Chernikov char nhbuf[NHOP_PRINT_BUFSIZE]; 2828010b7a7SAlexander V. Chernikov FIB_NH_LOG(LOG_DEBUG2, wn_src->nh, "RTM_DEL for %s @ w=%u", 2838010b7a7SAlexander V. Chernikov nhop_print_buf(wn_src->nh, nhbuf, sizeof(nhbuf)), wn_src->weight); 2848010b7a7SAlexander V. Chernikov #endif 2858010b7a7SAlexander V. Chernikov cb(rc, cbdata); 2868010b7a7SAlexander V. Chernikov } 2878010b7a7SAlexander V. Chernikov 288fedeb08bSAlexander V. Chernikov static void 289fedeb08bSAlexander V. Chernikov decompose_change_notification(struct rib_cmd_info *rc, route_notification_t *cb, 290fedeb08bSAlexander V. Chernikov void *cbdata) 291fedeb08bSAlexander V. Chernikov { 292fedeb08bSAlexander V. Chernikov uint32_t num_old, num_new; 293*ae6bfd12SAlexander V. Chernikov const struct weightened_nhop *wn_old, *wn_new; 294fedeb08bSAlexander V. Chernikov struct weightened_nhop tmp = { NULL, 0 }; 295fedeb08bSAlexander V. Chernikov uint32_t idx_old = 0, idx_new = 0; 296fedeb08bSAlexander V. Chernikov 297fedeb08bSAlexander V. Chernikov struct rib_cmd_info rc_del = { .rc_cmd = RTM_DELETE, .rc_rt = rc->rc_rt }; 298fedeb08bSAlexander V. Chernikov struct rib_cmd_info rc_add = { .rc_cmd = RTM_ADD, .rc_rt = rc->rc_rt }; 299fedeb08bSAlexander V. Chernikov 300fedeb08bSAlexander V. Chernikov if (NH_IS_NHGRP(rc->rc_nh_old)) { 301fedeb08bSAlexander V. Chernikov wn_old = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_old, &num_old); 302fedeb08bSAlexander V. Chernikov } else { 303fedeb08bSAlexander V. Chernikov tmp.nh = rc->rc_nh_old; 304fedeb08bSAlexander V. Chernikov tmp.weight = rc->rc_nh_weight; 305fedeb08bSAlexander V. Chernikov wn_old = &tmp; 306fedeb08bSAlexander V. Chernikov num_old = 1; 307fedeb08bSAlexander V. Chernikov } 308fedeb08bSAlexander V. Chernikov if (NH_IS_NHGRP(rc->rc_nh_new)) { 309fedeb08bSAlexander V. Chernikov wn_new = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_new, &num_new); 310fedeb08bSAlexander V. Chernikov } else { 311fedeb08bSAlexander V. Chernikov tmp.nh = rc->rc_nh_new; 312fedeb08bSAlexander V. Chernikov tmp.weight = rc->rc_nh_weight; 313fedeb08bSAlexander V. Chernikov wn_new = &tmp; 314fedeb08bSAlexander V. Chernikov num_new = 1; 315fedeb08bSAlexander V. Chernikov } 3168010b7a7SAlexander V. Chernikov #if DEBUG_MAX_LEVEL >= LOG_DEBUG 3178010b7a7SAlexander V. Chernikov { 3188010b7a7SAlexander V. Chernikov char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE]; 3198010b7a7SAlexander V. Chernikov nhop_print_buf_any(rc->rc_nh_old, buf_old, NHOP_PRINT_BUFSIZE); 3208010b7a7SAlexander V. Chernikov nhop_print_buf_any(rc->rc_nh_new, buf_new, NHOP_PRINT_BUFSIZE); 3218010b7a7SAlexander V. Chernikov FIB_NH_LOG(LOG_DEBUG, wn_old[0].nh, "change %s -> %s", buf_old, buf_new); 3228010b7a7SAlexander V. Chernikov } 3238010b7a7SAlexander V. Chernikov #endif 324fedeb08bSAlexander V. Chernikov 325fedeb08bSAlexander V. Chernikov /* Use the fact that each @wn array is sorted */ 326fedeb08bSAlexander V. Chernikov /* 3278010b7a7SAlexander V. Chernikov * Here we have one (or two) multipath groups and transition 3288010b7a7SAlexander V. Chernikov * between them needs to be reported to the caller, using series 3298010b7a7SAlexander V. Chernikov * of primitive (RTM_DEL, RTM_ADD) operations. 330fedeb08bSAlexander V. Chernikov * 3318010b7a7SAlexander V. Chernikov * Leverage the fact that each nexthop group has its nexthops sorted 3328010b7a7SAlexander V. Chernikov * by their indices. 3338010b7a7SAlexander V. Chernikov * [1] -> [1, 2] = A{2} 3348010b7a7SAlexander V. Chernikov * [1, 2] -> [1] = D{2} 3358010b7a7SAlexander V. Chernikov * [1, 2, 4] -> [1, 3, 4] = D{2}, A{3} 3368010b7a7SAlexander V. Chernikov * [1, 2] -> [3, 4] = D{1}, D{2}, A{3}, A{4] 337fedeb08bSAlexander V. Chernikov */ 338fedeb08bSAlexander V. Chernikov while ((idx_old < num_old) && (idx_new < num_new)) { 3398010b7a7SAlexander V. Chernikov uint32_t nh_idx_old = wn_old[idx_old].nh->nh_priv->nh_idx; 3408010b7a7SAlexander V. Chernikov uint32_t nh_idx_new = wn_new[idx_new].nh->nh_priv->nh_idx; 341fedeb08bSAlexander V. Chernikov 342fedeb08bSAlexander V. Chernikov if (nh_idx_old == nh_idx_new) { 343fedeb08bSAlexander V. Chernikov if (wn_old[idx_old].weight != wn_new[idx_new].weight) { 344fedeb08bSAlexander V. Chernikov /* Update weight by providing del/add notifications */ 3458010b7a7SAlexander V. Chernikov notify_del(&rc_del, &wn_old[idx_old], cb, cbdata); 3468010b7a7SAlexander V. Chernikov notify_add(&rc_add, &wn_new[idx_new], cb, cbdata); 347fedeb08bSAlexander V. Chernikov } 348fedeb08bSAlexander V. Chernikov idx_old++; 349fedeb08bSAlexander V. Chernikov idx_new++; 350fedeb08bSAlexander V. Chernikov } else if (nh_idx_old < nh_idx_new) { 3518010b7a7SAlexander V. Chernikov /* [1, ~2~, 4], [1, ~3~, 4] */ 3528010b7a7SAlexander V. Chernikov notify_del(&rc_del, &wn_old[idx_old], cb, cbdata); 353fedeb08bSAlexander V. Chernikov idx_old++; 354fedeb08bSAlexander V. Chernikov } else { 3558010b7a7SAlexander V. Chernikov /* nh_idx_old > nh_idx_new. */ 3568010b7a7SAlexander V. Chernikov notify_add(&rc_add, &wn_new[idx_new], cb, cbdata); 357fedeb08bSAlexander V. Chernikov idx_new++; 358fedeb08bSAlexander V. Chernikov } 359fedeb08bSAlexander V. Chernikov } 360fedeb08bSAlexander V. Chernikov 361fedeb08bSAlexander V. Chernikov while (idx_old < num_old) { 3628010b7a7SAlexander V. Chernikov notify_del(&rc_del, &wn_old[idx_old], cb, cbdata); 363fedeb08bSAlexander V. Chernikov idx_old++; 364fedeb08bSAlexander V. Chernikov } 365fedeb08bSAlexander V. Chernikov 366fedeb08bSAlexander V. Chernikov while (idx_new < num_new) { 3678010b7a7SAlexander V. Chernikov notify_add(&rc_add, &wn_new[idx_new], cb, cbdata); 368fedeb08bSAlexander V. Chernikov idx_new++; 369fedeb08bSAlexander V. Chernikov } 370fedeb08bSAlexander V. Chernikov } 371fedeb08bSAlexander V. Chernikov 372fedeb08bSAlexander V. Chernikov /* 373fedeb08bSAlexander V. Chernikov * Decompose multipath cmd info @rc into a list of add/del/change 374fedeb08bSAlexander V. Chernikov * single-path operations, calling @cb callback for each operation. 375fedeb08bSAlexander V. Chernikov * Assumes at least one of the nexthops in @rc is multipath. 376fedeb08bSAlexander V. Chernikov */ 377fedeb08bSAlexander V. Chernikov void 378fedeb08bSAlexander V. Chernikov rib_decompose_notification(struct rib_cmd_info *rc, route_notification_t *cb, 379fedeb08bSAlexander V. Chernikov void *cbdata) 380fedeb08bSAlexander V. Chernikov { 381*ae6bfd12SAlexander V. Chernikov const struct weightened_nhop *wn; 382fedeb08bSAlexander V. Chernikov uint32_t num_nhops; 383fedeb08bSAlexander V. Chernikov struct rib_cmd_info rc_new; 384fedeb08bSAlexander V. Chernikov 385fedeb08bSAlexander V. Chernikov rc_new = *rc; 386fedeb08bSAlexander V. Chernikov switch (rc->rc_cmd) { 387fedeb08bSAlexander V. Chernikov case RTM_ADD: 388fedeb08bSAlexander V. Chernikov if (!NH_IS_NHGRP(rc->rc_nh_new)) 389fedeb08bSAlexander V. Chernikov return; 390fedeb08bSAlexander V. Chernikov wn = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_new, &num_nhops); 391fedeb08bSAlexander V. Chernikov for (uint32_t i = 0; i < num_nhops; i++) { 3928010b7a7SAlexander V. Chernikov notify_add(&rc_new, &wn[i], cb, cbdata); 393fedeb08bSAlexander V. Chernikov } 394fedeb08bSAlexander V. Chernikov break; 395fedeb08bSAlexander V. Chernikov case RTM_DELETE: 396fedeb08bSAlexander V. Chernikov if (!NH_IS_NHGRP(rc->rc_nh_old)) 397fedeb08bSAlexander V. Chernikov return; 398fedeb08bSAlexander V. Chernikov wn = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_old, &num_nhops); 399fedeb08bSAlexander V. Chernikov for (uint32_t i = 0; i < num_nhops; i++) { 4008010b7a7SAlexander V. Chernikov notify_del(&rc_new, &wn[i], cb, cbdata); 401fedeb08bSAlexander V. Chernikov } 402fedeb08bSAlexander V. Chernikov break; 403fedeb08bSAlexander V. Chernikov case RTM_CHANGE: 404fedeb08bSAlexander V. Chernikov if (!NH_IS_NHGRP(rc->rc_nh_old) && !NH_IS_NHGRP(rc->rc_nh_new)) 405fedeb08bSAlexander V. Chernikov return; 406fedeb08bSAlexander V. Chernikov decompose_change_notification(rc, cb, cbdata); 407fedeb08bSAlexander V. Chernikov break; 408fedeb08bSAlexander V. Chernikov } 409fedeb08bSAlexander V. Chernikov } 410fedeb08bSAlexander V. Chernikov #endif 41136e15b71SAlexander V. Chernikov 41236e15b71SAlexander V. Chernikov #ifdef INET 41336e15b71SAlexander V. Chernikov /* 41436e15b71SAlexander V. Chernikov * Checks if the found key in the trie contains (<=) a prefix covering 41536e15b71SAlexander V. Chernikov * @paddr/@plen. 41636e15b71SAlexander V. Chernikov * Returns the most specific rtentry matching the condition or NULL. 41736e15b71SAlexander V. Chernikov */ 41836e15b71SAlexander V. Chernikov static struct rtentry * 41936e15b71SAlexander V. Chernikov get_inet_parent_prefix(uint32_t fibnum, struct in_addr addr, int plen) 42036e15b71SAlexander V. Chernikov { 42136e15b71SAlexander V. Chernikov struct route_nhop_data rnd; 42236e15b71SAlexander V. Chernikov struct rtentry *rt; 42336e15b71SAlexander V. Chernikov struct in_addr addr4; 42436e15b71SAlexander V. Chernikov uint32_t scopeid; 42536e15b71SAlexander V. Chernikov int parent_plen; 42636e15b71SAlexander V. Chernikov struct radix_node *rn; 42736e15b71SAlexander V. Chernikov 42836e15b71SAlexander V. Chernikov rt = fib4_lookup_rt(fibnum, addr, 0, NHR_UNLOCKED, &rnd); 429f84c3010SAlexander V. Chernikov if (rt == NULL) 430f84c3010SAlexander V. Chernikov return (NULL); 431f84c3010SAlexander V. Chernikov 43236e15b71SAlexander V. Chernikov rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid); 43336e15b71SAlexander V. Chernikov if (parent_plen <= plen) 43436e15b71SAlexander V. Chernikov return (rt); 43536e15b71SAlexander V. Chernikov 43636e15b71SAlexander V. Chernikov /* 43736e15b71SAlexander V. Chernikov * There can be multiple prefixes associated with the found key: 43836e15b71SAlexander V. Chernikov * 10.0.0.0 -> 10.0.0.0/24, 10.0.0.0/23, 10.0.0.0/22, etc. 43936e15b71SAlexander V. Chernikov * All such prefixes are linked via rn_dupedkey, from most specific 44036e15b71SAlexander V. Chernikov * to least specific. Iterate over them to check if any of these 44136e15b71SAlexander V. Chernikov * prefixes are wider than desired plen. 44236e15b71SAlexander V. Chernikov */ 44336e15b71SAlexander V. Chernikov rn = (struct radix_node *)rt; 44436e15b71SAlexander V. Chernikov while ((rn = rn_nextprefix(rn)) != NULL) { 44536e15b71SAlexander V. Chernikov rt = RNTORT(rn); 44636e15b71SAlexander V. Chernikov rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid); 44736e15b71SAlexander V. Chernikov if (parent_plen <= plen) 44836e15b71SAlexander V. Chernikov return (rt); 44936e15b71SAlexander V. Chernikov } 45036e15b71SAlexander V. Chernikov 45136e15b71SAlexander V. Chernikov return (NULL); 45236e15b71SAlexander V. Chernikov } 45336e15b71SAlexander V. Chernikov 45436e15b71SAlexander V. Chernikov /* 45536e15b71SAlexander V. Chernikov * Returns the most specific prefix containing (>) @paddr/plen. 45636e15b71SAlexander V. Chernikov */ 45736e15b71SAlexander V. Chernikov struct rtentry * 45836e15b71SAlexander V. Chernikov rt_get_inet_parent(uint32_t fibnum, struct in_addr addr, int plen) 45936e15b71SAlexander V. Chernikov { 46036e15b71SAlexander V. Chernikov struct in_addr lookup_addr = { .s_addr = INADDR_BROADCAST }; 46136e15b71SAlexander V. Chernikov struct in_addr addr4 = addr; 46236e15b71SAlexander V. Chernikov struct in_addr mask4; 46336e15b71SAlexander V. Chernikov struct rtentry *rt; 46436e15b71SAlexander V. Chernikov 46536e15b71SAlexander V. Chernikov while (plen-- > 0) { 46636e15b71SAlexander V. Chernikov /* Calculate wider mask & new key to lookup */ 46736e15b71SAlexander V. Chernikov mask4.s_addr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0); 46836e15b71SAlexander V. Chernikov addr4.s_addr = htonl(ntohl(addr4.s_addr) & ntohl(mask4.s_addr)); 46936e15b71SAlexander V. Chernikov if (addr4.s_addr == lookup_addr.s_addr) { 47036e15b71SAlexander V. Chernikov /* Skip lookup if the key is the same */ 47136e15b71SAlexander V. Chernikov continue; 47236e15b71SAlexander V. Chernikov } 47336e15b71SAlexander V. Chernikov lookup_addr = addr4; 47436e15b71SAlexander V. Chernikov 47536e15b71SAlexander V. Chernikov rt = get_inet_parent_prefix(fibnum, lookup_addr, plen); 47636e15b71SAlexander V. Chernikov if (rt != NULL) 47736e15b71SAlexander V. Chernikov return (rt); 47836e15b71SAlexander V. Chernikov } 47936e15b71SAlexander V. Chernikov 48036e15b71SAlexander V. Chernikov return (NULL); 48136e15b71SAlexander V. Chernikov } 48236e15b71SAlexander V. Chernikov #endif 48336e15b71SAlexander V. Chernikov 48436e15b71SAlexander V. Chernikov #ifdef INET6 48536e15b71SAlexander V. Chernikov /* 48636e15b71SAlexander V. Chernikov * Checks if the found key in the trie contains (<=) a prefix covering 48736e15b71SAlexander V. Chernikov * @paddr/@plen. 48836e15b71SAlexander V. Chernikov * Returns the most specific rtentry matching the condition or NULL. 48936e15b71SAlexander V. Chernikov */ 49036e15b71SAlexander V. Chernikov static struct rtentry * 49136e15b71SAlexander V. Chernikov get_inet6_parent_prefix(uint32_t fibnum, const struct in6_addr *paddr, int plen) 49236e15b71SAlexander V. Chernikov { 49336e15b71SAlexander V. Chernikov struct route_nhop_data rnd; 49436e15b71SAlexander V. Chernikov struct rtentry *rt; 49536e15b71SAlexander V. Chernikov struct in6_addr addr6; 49636e15b71SAlexander V. Chernikov uint32_t scopeid; 49736e15b71SAlexander V. Chernikov int parent_plen; 49836e15b71SAlexander V. Chernikov struct radix_node *rn; 49936e15b71SAlexander V. Chernikov 50036e15b71SAlexander V. Chernikov rt = fib6_lookup_rt(fibnum, paddr, 0, NHR_UNLOCKED, &rnd); 501f84c3010SAlexander V. Chernikov if (rt == NULL) 502f84c3010SAlexander V. Chernikov return (NULL); 503f84c3010SAlexander V. Chernikov 50436e15b71SAlexander V. Chernikov rt_get_inet6_prefix_plen(rt, &addr6, &parent_plen, &scopeid); 50536e15b71SAlexander V. Chernikov if (parent_plen <= plen) 50636e15b71SAlexander V. Chernikov return (rt); 50736e15b71SAlexander V. Chernikov 50836e15b71SAlexander V. Chernikov /* 50936e15b71SAlexander V. Chernikov * There can be multiple prefixes associated with the found key: 51036e15b71SAlexander V. Chernikov * 2001:db8:1::/64 -> 2001:db8:1::/56, 2001:db8:1::/48, etc. 51136e15b71SAlexander V. Chernikov * All such prefixes are linked via rn_dupedkey, from most specific 51236e15b71SAlexander V. Chernikov * to least specific. Iterate over them to check if any of these 51336e15b71SAlexander V. Chernikov * prefixes are wider than desired plen. 51436e15b71SAlexander V. Chernikov */ 51536e15b71SAlexander V. Chernikov rn = (struct radix_node *)rt; 51636e15b71SAlexander V. Chernikov while ((rn = rn_nextprefix(rn)) != NULL) { 51736e15b71SAlexander V. Chernikov rt = RNTORT(rn); 51836e15b71SAlexander V. Chernikov rt_get_inet6_prefix_plen(rt, &addr6, &parent_plen, &scopeid); 51936e15b71SAlexander V. Chernikov if (parent_plen <= plen) 52036e15b71SAlexander V. Chernikov return (rt); 52136e15b71SAlexander V. Chernikov } 52236e15b71SAlexander V. Chernikov 52336e15b71SAlexander V. Chernikov return (NULL); 52436e15b71SAlexander V. Chernikov } 52536e15b71SAlexander V. Chernikov 52636e15b71SAlexander V. Chernikov static void 52736e15b71SAlexander V. Chernikov ipv6_writemask(struct in6_addr *addr6, uint8_t mask) 52836e15b71SAlexander V. Chernikov { 52936e15b71SAlexander V. Chernikov uint32_t *cp; 53036e15b71SAlexander V. Chernikov 53136e15b71SAlexander V. Chernikov for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32) 53236e15b71SAlexander V. Chernikov *cp++ = 0xFFFFFFFF; 53336e15b71SAlexander V. Chernikov if (mask > 0) 53436e15b71SAlexander V. Chernikov *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0); 53536e15b71SAlexander V. Chernikov } 53636e15b71SAlexander V. Chernikov 53736e15b71SAlexander V. Chernikov /* 53836e15b71SAlexander V. Chernikov * Returns the most specific prefix containing (>) @paddr/plen. 53936e15b71SAlexander V. Chernikov */ 54036e15b71SAlexander V. Chernikov struct rtentry * 54136e15b71SAlexander V. Chernikov rt_get_inet6_parent(uint32_t fibnum, const struct in6_addr *paddr, int plen) 54236e15b71SAlexander V. Chernikov { 54336e15b71SAlexander V. Chernikov struct in6_addr lookup_addr = in6mask128; 54436e15b71SAlexander V. Chernikov struct in6_addr addr6 = *paddr; 54536e15b71SAlexander V. Chernikov struct in6_addr mask6; 54636e15b71SAlexander V. Chernikov struct rtentry *rt; 54736e15b71SAlexander V. Chernikov 54836e15b71SAlexander V. Chernikov while (plen-- > 0) { 54936e15b71SAlexander V. Chernikov /* Calculate wider mask & new key to lookup */ 55036e15b71SAlexander V. Chernikov ipv6_writemask(&mask6, plen); 55136e15b71SAlexander V. Chernikov IN6_MASK_ADDR(&addr6, &mask6); 55236e15b71SAlexander V. Chernikov if (IN6_ARE_ADDR_EQUAL(&addr6, &lookup_addr)) { 55336e15b71SAlexander V. Chernikov /* Skip lookup if the key is the same */ 55436e15b71SAlexander V. Chernikov continue; 55536e15b71SAlexander V. Chernikov } 55636e15b71SAlexander V. Chernikov lookup_addr = addr6; 55736e15b71SAlexander V. Chernikov 55836e15b71SAlexander V. Chernikov rt = get_inet6_parent_prefix(fibnum, &lookup_addr, plen); 55936e15b71SAlexander V. Chernikov if (rt != NULL) 56036e15b71SAlexander V. Chernikov return (rt); 56136e15b71SAlexander V. Chernikov } 56236e15b71SAlexander V. Chernikov 56336e15b71SAlexander V. Chernikov return (NULL); 56436e15b71SAlexander V. Chernikov } 56536e15b71SAlexander V. Chernikov #endif 56627f107e1SAlexander V. Chernikov 56727f107e1SAlexander V. Chernikov /* 56827f107e1SAlexander V. Chernikov * Prints rtentry @rt data in the provided @buf. 56927f107e1SAlexander V. Chernikov * Example: rt/192.168.0.0/24 57027f107e1SAlexander V. Chernikov */ 57127f107e1SAlexander V. Chernikov char * 57227f107e1SAlexander V. Chernikov rt_print_buf(const struct rtentry *rt, char *buf, size_t bufsize) 57327f107e1SAlexander V. Chernikov { 57427f107e1SAlexander V. Chernikov char abuf[INET6_ADDRSTRLEN]; 57527f107e1SAlexander V. Chernikov uint32_t scopeid; 57627f107e1SAlexander V. Chernikov int plen; 57727f107e1SAlexander V. Chernikov 57827f107e1SAlexander V. Chernikov switch (rt_get_family(rt)) { 57927f107e1SAlexander V. Chernikov #ifdef INET 58027f107e1SAlexander V. Chernikov case AF_INET: 58127f107e1SAlexander V. Chernikov { 58227f107e1SAlexander V. Chernikov struct in_addr addr4; 58327f107e1SAlexander V. Chernikov rt_get_inet_prefix_plen(rt, &addr4, &plen, &scopeid); 58427f107e1SAlexander V. Chernikov inet_ntop(AF_INET, &addr4, abuf, sizeof(abuf)); 58527f107e1SAlexander V. Chernikov snprintf(buf, bufsize, "rt/%s/%d", abuf, plen); 58627f107e1SAlexander V. Chernikov } 58727f107e1SAlexander V. Chernikov break; 58827f107e1SAlexander V. Chernikov #endif 58927f107e1SAlexander V. Chernikov #ifdef INET6 59027f107e1SAlexander V. Chernikov case AF_INET6: 59127f107e1SAlexander V. Chernikov { 59227f107e1SAlexander V. Chernikov struct in6_addr addr6; 59327f107e1SAlexander V. Chernikov rt_get_inet6_prefix_plen(rt, &addr6, &plen, &scopeid); 59427f107e1SAlexander V. Chernikov inet_ntop(AF_INET6, &addr6, abuf, sizeof(abuf)); 59527f107e1SAlexander V. Chernikov snprintf(buf, bufsize, "rt/%s/%d", abuf, plen); 59627f107e1SAlexander V. Chernikov } 59727f107e1SAlexander V. Chernikov break; 59827f107e1SAlexander V. Chernikov #endif 59927f107e1SAlexander V. Chernikov default: 60027f107e1SAlexander V. Chernikov snprintf(buf, bufsize, "rt/unknown_af#%d", rt_get_family(rt)); 60127f107e1SAlexander V. Chernikov break; 60227f107e1SAlexander V. Chernikov } 60327f107e1SAlexander V. Chernikov 60427f107e1SAlexander V. Chernikov return (buf); 60527f107e1SAlexander V. Chernikov } 60627f107e1SAlexander V. Chernikov 60727f107e1SAlexander V. Chernikov const char * 60827f107e1SAlexander V. Chernikov rib_print_cmd(int rib_cmd) 60927f107e1SAlexander V. Chernikov { 61027f107e1SAlexander V. Chernikov switch (rib_cmd) { 61127f107e1SAlexander V. Chernikov case RTM_ADD: 61227f107e1SAlexander V. Chernikov return ("RTM_ADD"); 61327f107e1SAlexander V. Chernikov case RTM_CHANGE: 61427f107e1SAlexander V. Chernikov return ("RTM_CHANGE"); 61527f107e1SAlexander V. Chernikov case RTM_DELETE: 61627f107e1SAlexander V. Chernikov return ("RTM_DELETE"); 61727f107e1SAlexander V. Chernikov case RTM_GET: 61827f107e1SAlexander V. Chernikov return ("RTM_GET"); 61927f107e1SAlexander V. Chernikov } 62027f107e1SAlexander V. Chernikov 62127f107e1SAlexander V. Chernikov return ("UNKNOWN"); 62227f107e1SAlexander V. Chernikov } 623