xref: /freebsd/sys/net/route/route_helpers.c (revision f84c30106e8b725774b4e9a32c8dd11c90da8c25)
1a6663252SAlexander V. Chernikov /*-
2a6663252SAlexander V. Chernikov  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3a6663252SAlexander V. Chernikov  *
4a6663252SAlexander V. Chernikov  * Copyright (c) 2020 Alexander V. Chernikov
5a6663252SAlexander V. Chernikov  *
6a6663252SAlexander V. Chernikov  * Redistribution and use in source and binary forms, with or without
7a6663252SAlexander V. Chernikov  * modification, are permitted provided that the following conditions
8a6663252SAlexander V. Chernikov  * are met:
9a6663252SAlexander V. Chernikov  * 1. Redistributions of source code must retain the above copyright
10a6663252SAlexander V. Chernikov  *    notice, this list of conditions and the following disclaimer.
11a6663252SAlexander V. Chernikov  * 2. Redistributions in binary form must reproduce the above copyright
12a6663252SAlexander V. Chernikov  *    notice, this list of conditions and the following disclaimer in the
13a6663252SAlexander V. Chernikov  *    documentation and/or other materials provided with the distribution.
14a6663252SAlexander V. Chernikov  *
15a6663252SAlexander V. Chernikov  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16a6663252SAlexander V. Chernikov  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17a6663252SAlexander V. Chernikov  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18a6663252SAlexander V. Chernikov  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19a6663252SAlexander V. Chernikov  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20a6663252SAlexander V. Chernikov  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21a6663252SAlexander V. Chernikov  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22a6663252SAlexander V. Chernikov  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23a6663252SAlexander V. Chernikov  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24a6663252SAlexander V. Chernikov  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25a6663252SAlexander V. Chernikov  * SUCH DAMAGE.
26a6663252SAlexander V. Chernikov  */
27a6663252SAlexander V. Chernikov 
28a6663252SAlexander V. Chernikov #include <sys/cdefs.h>
29a6663252SAlexander V. Chernikov __FBSDID("$FreeBSD$");
30a6663252SAlexander V. Chernikov #include "opt_inet.h"
31a6663252SAlexander V. Chernikov #include "opt_inet6.h"
32a6663252SAlexander V. Chernikov #include "opt_route.h"
33a6663252SAlexander V. Chernikov 
34a6663252SAlexander V. Chernikov #include <sys/param.h>
35a6663252SAlexander V. Chernikov #include <sys/jail.h>
36a6663252SAlexander V. Chernikov #include <sys/systm.h>
37a6663252SAlexander V. Chernikov #include <sys/malloc.h>
38a6663252SAlexander V. Chernikov #include <sys/mbuf.h>
39a6663252SAlexander V. Chernikov #include <sys/socket.h>
40a6663252SAlexander V. Chernikov #include <sys/sysctl.h>
41a6663252SAlexander V. Chernikov #include <sys/syslog.h>
42a6663252SAlexander V. Chernikov #include <sys/sysproto.h>
43a6663252SAlexander V. Chernikov #include <sys/proc.h>
44a6663252SAlexander V. Chernikov #include <sys/domain.h>
45a6663252SAlexander V. Chernikov #include <sys/kernel.h>
46a6663252SAlexander V. Chernikov #include <sys/lock.h>
47a6663252SAlexander V. Chernikov #include <sys/rmlock.h>
48a6663252SAlexander V. Chernikov 
49a6663252SAlexander V. Chernikov #include <net/if.h>
50a6663252SAlexander V. Chernikov #include <net/if_var.h>
51a6663252SAlexander V. Chernikov #include <net/if_dl.h>
52a6663252SAlexander V. Chernikov #include <net/route.h>
53da187ddbSAlexander V. Chernikov #include <net/route/route_ctl.h>
54e7d8af4fSAlexander V. Chernikov #include <net/route/route_var.h>
55a6663252SAlexander V. Chernikov #include <net/route/nhop_utils.h>
56a6663252SAlexander V. Chernikov #include <net/route/nhop.h>
57a6663252SAlexander V. Chernikov #include <net/route/nhop_var.h>
58682b902dSAlexander V. Chernikov #ifdef INET
59682b902dSAlexander V. Chernikov #include <netinet/in_fib.h>
60682b902dSAlexander V. Chernikov #endif
61682b902dSAlexander V. Chernikov #ifdef INET6
62682b902dSAlexander V. Chernikov #include <netinet6/in6_fib.h>
6336e15b71SAlexander V. Chernikov #include <netinet6/in6_var.h>
64682b902dSAlexander V. Chernikov #endif
65a6663252SAlexander V. Chernikov #include <net/vnet.h>
66a6663252SAlexander V. Chernikov 
67a6663252SAlexander V. Chernikov /*
68a6663252SAlexander V. Chernikov  * RIB helper functions.
69a6663252SAlexander V. Chernikov  */
70a6663252SAlexander V. Chernikov 
71151ec796SAlexander V. Chernikov void
72151ec796SAlexander V. Chernikov rib_walk_ext_locked(struct rib_head *rnh, rib_walktree_f_t *wa_f,
73151ec796SAlexander V. Chernikov     rib_walk_hook_f_t *hook_f, void *arg)
74151ec796SAlexander V. Chernikov {
75151ec796SAlexander V. Chernikov 	if (hook_f != NULL)
76151ec796SAlexander V. Chernikov 		hook_f(rnh, RIB_WALK_HOOK_PRE, arg);
77151ec796SAlexander V. Chernikov 	rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f, arg);
78151ec796SAlexander V. Chernikov 	if (hook_f != NULL)
79151ec796SAlexander V. Chernikov 		hook_f(rnh, RIB_WALK_HOOK_POST, arg);
80151ec796SAlexander V. Chernikov }
81151ec796SAlexander V. Chernikov 
82a6663252SAlexander V. Chernikov /*
83a6663252SAlexander V. Chernikov  * Calls @wa_f with @arg for each entry in the table specified by
84a6663252SAlexander V. Chernikov  * @af and @fibnum.
85a6663252SAlexander V. Chernikov  *
867511a638SAlexander V. Chernikov  * @ss_t callback is called before and after the tree traversal
877511a638SAlexander V. Chernikov  *  while holding table lock.
887511a638SAlexander V. Chernikov  *
897511a638SAlexander V. Chernikov  * Table is traversed under read lock unless @wlock is set.
90a6663252SAlexander V. Chernikov  */
91a6663252SAlexander V. Chernikov void
923b1654cbSAlexander V. Chernikov rib_walk_ext_internal(struct rib_head *rnh, bool wlock, rib_walktree_f_t *wa_f,
937511a638SAlexander V. Chernikov     rib_walk_hook_f_t *hook_f, void *arg)
94a6663252SAlexander V. Chernikov {
95a6663252SAlexander V. Chernikov 	RIB_RLOCK_TRACKER;
96a6663252SAlexander V. Chernikov 
977511a638SAlexander V. Chernikov 	if (wlock)
987511a638SAlexander V. Chernikov 		RIB_WLOCK(rnh);
997511a638SAlexander V. Chernikov 	else
100a6663252SAlexander V. Chernikov 		RIB_RLOCK(rnh);
101151ec796SAlexander V. Chernikov 	rib_walk_ext_locked(rnh, wa_f, hook_f, arg);
1027511a638SAlexander V. Chernikov 	if (wlock)
1037511a638SAlexander V. Chernikov 		RIB_WUNLOCK(rnh);
1047511a638SAlexander V. Chernikov 	else
105a6663252SAlexander V. Chernikov 		RIB_RUNLOCK(rnh);
106a6663252SAlexander V. Chernikov }
107a6663252SAlexander V. Chernikov 
1083b1654cbSAlexander V. Chernikov void
1093b1654cbSAlexander V. Chernikov rib_walk_ext(uint32_t fibnum, int family, bool wlock, rib_walktree_f_t *wa_f,
1103b1654cbSAlexander V. Chernikov     rib_walk_hook_f_t *hook_f, void *arg)
1113b1654cbSAlexander V. Chernikov {
1123b1654cbSAlexander V. Chernikov 	struct rib_head *rnh;
1133b1654cbSAlexander V. Chernikov 
1143b1654cbSAlexander V. Chernikov 	if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL)
1153b1654cbSAlexander V. Chernikov 		rib_walk_ext_internal(rnh, wlock, wa_f, hook_f, arg);
1163b1654cbSAlexander V. Chernikov }
1173b1654cbSAlexander V. Chernikov 
118682b902dSAlexander V. Chernikov /*
1197511a638SAlexander V. Chernikov  * Calls @wa_f with @arg for each entry in the table specified by
1207511a638SAlexander V. Chernikov  * @af and @fibnum.
1217511a638SAlexander V. Chernikov  *
1227511a638SAlexander V. Chernikov  * Table is traversed under read lock unless @wlock is set.
1237511a638SAlexander V. Chernikov  */
1247511a638SAlexander V. Chernikov void
1257511a638SAlexander V. Chernikov rib_walk(uint32_t fibnum, int family, bool wlock, rib_walktree_f_t *wa_f,
1267511a638SAlexander V. Chernikov     void *arg)
1277511a638SAlexander V. Chernikov {
1287511a638SAlexander V. Chernikov 
1297511a638SAlexander V. Chernikov 	rib_walk_ext(fibnum, family, wlock, wa_f, NULL, arg);
1307511a638SAlexander V. Chernikov }
1317511a638SAlexander V. Chernikov 
1327511a638SAlexander V. Chernikov /*
133f9668e42SAlexander V. Chernikov  * Calls @wa_f with @arg for each entry in the table matching @prefix/@mask.
134f9668e42SAlexander V. Chernikov  *
135f9668e42SAlexander V. Chernikov  * The following flags are supported:
136f9668e42SAlexander V. Chernikov  *  RIB_FLAG_WLOCK: acquire exclusive lock
137f9668e42SAlexander V. Chernikov  *  RIB_FLAG_LOCKED: Assumes the table is already locked & skip locking
138f9668e42SAlexander V. Chernikov  *
139f9668e42SAlexander V. Chernikov  * By default, table is traversed under read lock.
140f9668e42SAlexander V. Chernikov  */
141f9668e42SAlexander V. Chernikov void
142f9668e42SAlexander V. Chernikov rib_walk_from(uint32_t fibnum, int family, uint32_t flags, struct sockaddr *prefix,
143f9668e42SAlexander V. Chernikov     struct sockaddr *mask, rib_walktree_f_t *wa_f, void *arg)
144f9668e42SAlexander V. Chernikov {
145f9668e42SAlexander V. Chernikov 	RIB_RLOCK_TRACKER;
146f9668e42SAlexander V. Chernikov 	struct rib_head *rnh = rt_tables_get_rnh(fibnum, family);
147f9668e42SAlexander V. Chernikov 
148f9668e42SAlexander V. Chernikov 	if (rnh == NULL)
149f9668e42SAlexander V. Chernikov 		return;
150f9668e42SAlexander V. Chernikov 
151f9668e42SAlexander V. Chernikov 	if (flags & RIB_FLAG_WLOCK)
152f9668e42SAlexander V. Chernikov 		RIB_WLOCK(rnh);
153f9668e42SAlexander V. Chernikov 	else if (!(flags & RIB_FLAG_LOCKED))
154f9668e42SAlexander V. Chernikov 		RIB_RLOCK(rnh);
155f9668e42SAlexander V. Chernikov 
156f9668e42SAlexander V. Chernikov 	rnh->rnh_walktree_from(&rnh->head, prefix, mask, (walktree_f_t *)wa_f, arg);
157f9668e42SAlexander V. Chernikov 
158f9668e42SAlexander V. Chernikov 	if (flags & RIB_FLAG_WLOCK)
159f9668e42SAlexander V. Chernikov 		RIB_WUNLOCK(rnh);
160f9668e42SAlexander V. Chernikov 	else if (!(flags & RIB_FLAG_LOCKED))
161f9668e42SAlexander V. Chernikov 		RIB_RUNLOCK(rnh);
162f9668e42SAlexander V. Chernikov }
163f9668e42SAlexander V. Chernikov 
164f9668e42SAlexander V. Chernikov /*
1657511a638SAlexander V. Chernikov  * Iterates over all existing fibs in system calling
1667511a638SAlexander V. Chernikov  *  @hook_f function before/after traversing each fib.
1677511a638SAlexander V. Chernikov  *  Calls @wa_f function for each element in current fib.
1687511a638SAlexander V. Chernikov  * If af is not AF_UNSPEC, iterates over fibs in particular
1697511a638SAlexander V. Chernikov  * address family.
1707511a638SAlexander V. Chernikov  */
1717511a638SAlexander V. Chernikov void
1727511a638SAlexander V. Chernikov rib_foreach_table_walk(int family, bool wlock, rib_walktree_f_t *wa_f,
1737511a638SAlexander V. Chernikov     rib_walk_hook_f_t *hook_f, void *arg)
1747511a638SAlexander V. Chernikov {
1757511a638SAlexander V. Chernikov 
1767511a638SAlexander V. Chernikov 	for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
1777511a638SAlexander V. Chernikov 		/* Do we want some specific family? */
1787511a638SAlexander V. Chernikov 		if (family != AF_UNSPEC) {
1797511a638SAlexander V. Chernikov 			rib_walk_ext(fibnum, family, wlock, wa_f, hook_f, arg);
1807511a638SAlexander V. Chernikov 			continue;
1817511a638SAlexander V. Chernikov 		}
1827511a638SAlexander V. Chernikov 
1837511a638SAlexander V. Chernikov 		for (int i = 1; i <= AF_MAX; i++)
1847511a638SAlexander V. Chernikov 			rib_walk_ext(fibnum, i, wlock, wa_f, hook_f, arg);
1857511a638SAlexander V. Chernikov 	}
1867511a638SAlexander V. Chernikov }
1877511a638SAlexander V. Chernikov 
1887511a638SAlexander V. Chernikov /*
1897511a638SAlexander V. Chernikov  * Iterates over all existing fibs in system and deletes each element
1907511a638SAlexander V. Chernikov  *  for which @filter_f function returns non-zero value.
1917511a638SAlexander V. Chernikov  * If @family is not AF_UNSPEC, iterates over fibs in particular
1927511a638SAlexander V. Chernikov  * address family.
1937511a638SAlexander V. Chernikov  */
1947511a638SAlexander V. Chernikov void
1957511a638SAlexander V. Chernikov rib_foreach_table_walk_del(int family, rib_filter_f_t *filter_f, void *arg)
1967511a638SAlexander V. Chernikov {
1977511a638SAlexander V. Chernikov 
1987511a638SAlexander V. Chernikov 	for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
1997511a638SAlexander V. Chernikov 		/* Do we want some specific family? */
2007511a638SAlexander V. Chernikov 		if (family != AF_UNSPEC) {
2017511a638SAlexander V. Chernikov 			rib_walk_del(fibnum, family, filter_f, arg, 0);
2027511a638SAlexander V. Chernikov 			continue;
2037511a638SAlexander V. Chernikov 		}
2047511a638SAlexander V. Chernikov 
2057511a638SAlexander V. Chernikov 		for (int i = 1; i <= AF_MAX; i++)
2067511a638SAlexander V. Chernikov 			rib_walk_del(fibnum, i, filter_f, arg, 0);
2077511a638SAlexander V. Chernikov 	}
2087511a638SAlexander V. Chernikov }
2097511a638SAlexander V. Chernikov 
2107511a638SAlexander V. Chernikov 
2117511a638SAlexander V. Chernikov /*
212682b902dSAlexander V. Chernikov  * Wrapper for the control plane functions for performing af-agnostic
213682b902dSAlexander V. Chernikov  *  lookups.
214682b902dSAlexander V. Chernikov  * @fibnum: fib to perform the lookup.
215682b902dSAlexander V. Chernikov  * @dst: sockaddr with family and addr filled in. IPv6 addresses needs to be in
216682b902dSAlexander V. Chernikov  *  deembedded from.
217682b902dSAlexander V. Chernikov  * @flags: fib(9) flags.
218682b902dSAlexander V. Chernikov  * @flowid: flow id for path selection in multipath use case.
219682b902dSAlexander V. Chernikov  *
220682b902dSAlexander V. Chernikov  * Returns nhop_object or NULL.
221682b902dSAlexander V. Chernikov  *
222682b902dSAlexander V. Chernikov  * Requires NET_EPOCH.
223682b902dSAlexander V. Chernikov  *
224682b902dSAlexander V. Chernikov  */
225682b902dSAlexander V. Chernikov struct nhop_object *
226682b902dSAlexander V. Chernikov rib_lookup(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags,
227682b902dSAlexander V. Chernikov     uint32_t flowid)
228682b902dSAlexander V. Chernikov {
229682b902dSAlexander V. Chernikov 	struct nhop_object *nh;
230682b902dSAlexander V. Chernikov 
231682b902dSAlexander V. Chernikov 	nh = NULL;
232682b902dSAlexander V. Chernikov 
233682b902dSAlexander V. Chernikov 	switch (dst->sa_family) {
234682b902dSAlexander V. Chernikov #ifdef INET
235682b902dSAlexander V. Chernikov 	case AF_INET:
236682b902dSAlexander V. Chernikov 	{
237682b902dSAlexander V. Chernikov 		const struct sockaddr_in *a = (const struct sockaddr_in *)dst;
238682b902dSAlexander V. Chernikov 		nh = fib4_lookup(fibnum, a->sin_addr, 0, flags, flowid);
239682b902dSAlexander V. Chernikov 		break;
240682b902dSAlexander V. Chernikov 	}
241682b902dSAlexander V. Chernikov #endif
242682b902dSAlexander V. Chernikov #ifdef INET6
243682b902dSAlexander V. Chernikov 	case AF_INET6:
244682b902dSAlexander V. Chernikov 	{
245682b902dSAlexander V. Chernikov 		const struct sockaddr_in6 *a = (const struct sockaddr_in6*)dst;
246682b902dSAlexander V. Chernikov 		nh = fib6_lookup(fibnum, &a->sin6_addr, a->sin6_scope_id,
247682b902dSAlexander V. Chernikov 		    flags, flowid);
248682b902dSAlexander V. Chernikov 		break;
249682b902dSAlexander V. Chernikov 	}
250682b902dSAlexander V. Chernikov #endif
251682b902dSAlexander V. Chernikov 	}
252682b902dSAlexander V. Chernikov 
253682b902dSAlexander V. Chernikov 	return (nh);
254682b902dSAlexander V. Chernikov }
255fedeb08bSAlexander V. Chernikov 
256fedeb08bSAlexander V. Chernikov #ifdef ROUTE_MPATH
257fedeb08bSAlexander V. Chernikov static void
258fedeb08bSAlexander V. Chernikov decompose_change_notification(struct rib_cmd_info *rc, route_notification_t *cb,
259fedeb08bSAlexander V. Chernikov     void *cbdata)
260fedeb08bSAlexander V. Chernikov {
261fedeb08bSAlexander V. Chernikov 	uint32_t num_old, num_new;
262fedeb08bSAlexander V. Chernikov 	uint32_t nh_idx_old, nh_idx_new;
263fedeb08bSAlexander V. Chernikov 	struct weightened_nhop *wn_old, *wn_new;
264fedeb08bSAlexander V. Chernikov 	struct weightened_nhop tmp = { NULL, 0 };
265fedeb08bSAlexander V. Chernikov 	uint32_t idx_old = 0, idx_new = 0;
266fedeb08bSAlexander V. Chernikov 
267fedeb08bSAlexander V. Chernikov 	struct rib_cmd_info rc_del = { .rc_cmd = RTM_DELETE, .rc_rt = rc->rc_rt };
268fedeb08bSAlexander V. Chernikov 	struct rib_cmd_info rc_add = { .rc_cmd = RTM_ADD, .rc_rt = rc->rc_rt };
269fedeb08bSAlexander V. Chernikov 
270fedeb08bSAlexander V. Chernikov 	if (NH_IS_NHGRP(rc->rc_nh_old)) {
271fedeb08bSAlexander V. Chernikov 		wn_old = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_old, &num_old);
272fedeb08bSAlexander V. Chernikov 	} else {
273fedeb08bSAlexander V. Chernikov 		tmp.nh = rc->rc_nh_old;
274fedeb08bSAlexander V. Chernikov 		tmp.weight = rc->rc_nh_weight;
275fedeb08bSAlexander V. Chernikov 		wn_old = &tmp;
276fedeb08bSAlexander V. Chernikov 		num_old = 1;
277fedeb08bSAlexander V. Chernikov 	}
278fedeb08bSAlexander V. Chernikov 	if (NH_IS_NHGRP(rc->rc_nh_new)) {
279fedeb08bSAlexander V. Chernikov 		wn_new = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_new, &num_new);
280fedeb08bSAlexander V. Chernikov 	} else {
281fedeb08bSAlexander V. Chernikov 		tmp.nh = rc->rc_nh_new;
282fedeb08bSAlexander V. Chernikov 		tmp.weight = rc->rc_nh_weight;
283fedeb08bSAlexander V. Chernikov 		wn_new = &tmp;
284fedeb08bSAlexander V. Chernikov 		num_new = 1;
285fedeb08bSAlexander V. Chernikov 	}
286fedeb08bSAlexander V. Chernikov 
287fedeb08bSAlexander V. Chernikov 	/* Use the fact that each @wn array is sorted */
288fedeb08bSAlexander V. Chernikov 	/*
289fedeb08bSAlexander V. Chernikov 	 * Want to convert into set of add and delete operations
290fedeb08bSAlexander V. Chernikov 	 * [1] -> [1, 2] = A{2}
291fedeb08bSAlexander V. Chernikov 	 * [2] -> [1, 2] = A{1}
292fedeb08bSAlexander V. Chernikov 	 * [1, 2, 4]->[1, 3, 4] = A{2}, D{3}
293fedeb08bSAlexander V. Chernikov 	 * [1, 2, 4]->[1, 4] = D{2}
294fedeb08bSAlexander V. Chernikov 	 * [1, 2, 4] -> [3, 4] = D{1}, C{2,3} OR C{1,3}, D{2} OR D{1},D{2},A{3}
295fedeb08bSAlexander V. Chernikov 	 * [1, 2] -> [3, 4] =
296fedeb08bSAlexander V. Chernikov 	 *
297fedeb08bSAlexander V. Chernikov 	 */
298fedeb08bSAlexander V. Chernikov 	idx_old = 0;
299fedeb08bSAlexander V. Chernikov 	while ((idx_old < num_old) && (idx_new < num_new)) {
300fedeb08bSAlexander V. Chernikov 		nh_idx_old = wn_old[idx_old].nh->nh_priv->nh_idx;
301fedeb08bSAlexander V. Chernikov 		nh_idx_new = wn_new[idx_new].nh->nh_priv->nh_idx;
302fedeb08bSAlexander V. Chernikov 
303fedeb08bSAlexander V. Chernikov 		if (nh_idx_old == nh_idx_new) {
304fedeb08bSAlexander V. Chernikov 			if (wn_old[idx_old].weight != wn_new[idx_new].weight) {
305fedeb08bSAlexander V. Chernikov 				/* Update weight by providing del/add notifications */
306fedeb08bSAlexander V. Chernikov 				rc_del.rc_nh_old = wn_old[idx_old].nh;
307fedeb08bSAlexander V. Chernikov 				rc_del.rc_nh_weight = wn_old[idx_old].weight;
308fedeb08bSAlexander V. Chernikov 				cb(&rc_del, cbdata);
309fedeb08bSAlexander V. Chernikov 
310fedeb08bSAlexander V. Chernikov 				rc_add.rc_nh_new = wn_new[idx_new].nh;
311fedeb08bSAlexander V. Chernikov 				rc_add.rc_nh_weight = wn_new[idx_new].weight;
312fedeb08bSAlexander V. Chernikov 				cb(&rc_add, cbdata);
313fedeb08bSAlexander V. Chernikov 			}
314fedeb08bSAlexander V. Chernikov 			idx_old++;
315fedeb08bSAlexander V. Chernikov 			idx_new++;
316fedeb08bSAlexander V. Chernikov 		} else if (nh_idx_old < nh_idx_new) {
317fedeb08bSAlexander V. Chernikov 			/*
318fedeb08bSAlexander V. Chernikov 			 * [1, ~2~, 4], [1, ~3~, 4]
319fedeb08bSAlexander V. Chernikov 			 * [1, ~2~, 5], [1, ~3~, 4]
320fedeb08bSAlexander V. Chernikov 			 * [1, ~2~], [1, ~3~, 4]
321fedeb08bSAlexander V. Chernikov 			 */
322fedeb08bSAlexander V. Chernikov 			if ((idx_old + 1 >= num_old) ||
323fedeb08bSAlexander V. Chernikov 			    (wn_old[idx_old + 1].nh->nh_priv->nh_idx > nh_idx_new)) {
324fedeb08bSAlexander V. Chernikov 				/* Add new unless the next old item is still <= new */
325fedeb08bSAlexander V. Chernikov 				rc_add.rc_nh_new = wn_new[idx_new].nh;
326fedeb08bSAlexander V. Chernikov 				rc_add.rc_nh_weight = wn_new[idx_new].weight;
327fedeb08bSAlexander V. Chernikov 				cb(&rc_add, cbdata);
328fedeb08bSAlexander V. Chernikov 				idx_new++;
329fedeb08bSAlexander V. Chernikov 			}
330fedeb08bSAlexander V. Chernikov 			/* In any case, delete current old */
331fedeb08bSAlexander V. Chernikov 			rc_del.rc_nh_old = wn_old[idx_old].nh;
332fedeb08bSAlexander V. Chernikov 			rc_del.rc_nh_weight = wn_old[idx_old].weight;
333fedeb08bSAlexander V. Chernikov 			cb(&rc_del, cbdata);
334fedeb08bSAlexander V. Chernikov 			idx_old++;
335fedeb08bSAlexander V. Chernikov 		} else {
336fedeb08bSAlexander V. Chernikov 			/*
337fedeb08bSAlexander V. Chernikov 			 * nh_idx_old > nh_idx_new
338fedeb08bSAlexander V. Chernikov 			 *
339fedeb08bSAlexander V. Chernikov 			 * [1, ~3~, 4], [1, ~2~, 4]
340fedeb08bSAlexander V. Chernikov 			 * [1, ~3~, 5], [1, ~2~, 4]
341fedeb08bSAlexander V. Chernikov 			 * [1, ~3~, 4], [1, ~2~]
342fedeb08bSAlexander V. Chernikov 			 */
343fedeb08bSAlexander V. Chernikov 			if ((idx_new + 1 >= num_new) ||
344fedeb08bSAlexander V. Chernikov 			    (wn_new[idx_new + 1].nh->nh_priv->nh_idx > nh_idx_old)) {
345fedeb08bSAlexander V. Chernikov 				/* No next item or next item is > current one */
346fedeb08bSAlexander V. Chernikov 				rc_add.rc_nh_new = wn_new[idx_new].nh;
347fedeb08bSAlexander V. Chernikov 				rc_add.rc_nh_weight = wn_new[idx_new].weight;
348fedeb08bSAlexander V. Chernikov 				cb(&rc_add, cbdata);
349fedeb08bSAlexander V. Chernikov 				idx_new++;
350fedeb08bSAlexander V. Chernikov 			}
351fedeb08bSAlexander V. Chernikov 			/* In any case, delete current old */
352fedeb08bSAlexander V. Chernikov 			rc_del.rc_nh_old = wn_old[idx_old].nh;
353fedeb08bSAlexander V. Chernikov 			rc_del.rc_nh_weight = wn_old[idx_old].weight;
354fedeb08bSAlexander V. Chernikov 			cb(&rc_del, cbdata);
355fedeb08bSAlexander V. Chernikov 			idx_old++;
356fedeb08bSAlexander V. Chernikov 		}
357fedeb08bSAlexander V. Chernikov 	}
358fedeb08bSAlexander V. Chernikov 
359fedeb08bSAlexander V. Chernikov 	while (idx_old < num_old) {
360fedeb08bSAlexander V. Chernikov 		rc_del.rc_nh_old = wn_old[idx_old].nh;
361fedeb08bSAlexander V. Chernikov 		rc_del.rc_nh_weight = wn_old[idx_old].weight;
362fedeb08bSAlexander V. Chernikov 		cb(&rc_del, cbdata);
363fedeb08bSAlexander V. Chernikov 		idx_old++;
364fedeb08bSAlexander V. Chernikov 	}
365fedeb08bSAlexander V. Chernikov 
366fedeb08bSAlexander V. Chernikov 	while (idx_new < num_new) {
367fedeb08bSAlexander V. Chernikov 		rc_add.rc_nh_new = wn_new[idx_new].nh;
368fedeb08bSAlexander V. Chernikov 		rc_add.rc_nh_weight = wn_new[idx_new].weight;
369fedeb08bSAlexander V. Chernikov 		cb(&rc_add, cbdata);
370fedeb08bSAlexander V. Chernikov 		idx_new++;
371fedeb08bSAlexander V. Chernikov 	}
372fedeb08bSAlexander V. Chernikov }
373fedeb08bSAlexander V. Chernikov 
374fedeb08bSAlexander V. Chernikov /*
375fedeb08bSAlexander V. Chernikov  * Decompose multipath cmd info @rc into a list of add/del/change
376fedeb08bSAlexander V. Chernikov  *  single-path operations, calling @cb callback for each operation.
377fedeb08bSAlexander V. Chernikov  * Assumes at least one of the nexthops in @rc is multipath.
378fedeb08bSAlexander V. Chernikov  */
379fedeb08bSAlexander V. Chernikov void
380fedeb08bSAlexander V. Chernikov rib_decompose_notification(struct rib_cmd_info *rc, route_notification_t *cb,
381fedeb08bSAlexander V. Chernikov     void *cbdata)
382fedeb08bSAlexander V. Chernikov {
383fedeb08bSAlexander V. Chernikov 	struct weightened_nhop *wn;
384fedeb08bSAlexander V. Chernikov 	uint32_t num_nhops;
385fedeb08bSAlexander V. Chernikov 	struct rib_cmd_info rc_new;
386fedeb08bSAlexander V. Chernikov 
387fedeb08bSAlexander V. Chernikov 	rc_new = *rc;
388fedeb08bSAlexander V. Chernikov 	DPRINTF("cb=%p cmd=%d nh_old=%p nh_new=%p",
389fedeb08bSAlexander V. Chernikov 	    cb, rc->cmd, rc->nh_old, rc->nh_new);
390fedeb08bSAlexander V. Chernikov 	switch (rc->rc_cmd) {
391fedeb08bSAlexander V. Chernikov 	case RTM_ADD:
392fedeb08bSAlexander V. Chernikov 		if (!NH_IS_NHGRP(rc->rc_nh_new))
393fedeb08bSAlexander V. Chernikov 			return;
394fedeb08bSAlexander V. Chernikov 		wn = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_new, &num_nhops);
395fedeb08bSAlexander V. Chernikov 		for (uint32_t i = 0; i < num_nhops; i++) {
396fedeb08bSAlexander V. Chernikov 			rc_new.rc_nh_new = wn[i].nh;
397fedeb08bSAlexander V. Chernikov 			rc_new.rc_nh_weight = wn[i].weight;
398fedeb08bSAlexander V. Chernikov 			cb(&rc_new, cbdata);
399fedeb08bSAlexander V. Chernikov 		}
400fedeb08bSAlexander V. Chernikov 		break;
401fedeb08bSAlexander V. Chernikov 	case RTM_DELETE:
402fedeb08bSAlexander V. Chernikov 		if (!NH_IS_NHGRP(rc->rc_nh_old))
403fedeb08bSAlexander V. Chernikov 			return;
404fedeb08bSAlexander V. Chernikov 		wn = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_old, &num_nhops);
405fedeb08bSAlexander V. Chernikov 		for (uint32_t i = 0; i < num_nhops; i++) {
406fedeb08bSAlexander V. Chernikov 			rc_new.rc_nh_old = wn[i].nh;
407fedeb08bSAlexander V. Chernikov 			rc_new.rc_nh_weight = wn[i].weight;
408fedeb08bSAlexander V. Chernikov 			cb(&rc_new, cbdata);
409fedeb08bSAlexander V. Chernikov 		}
410fedeb08bSAlexander V. Chernikov 		break;
411fedeb08bSAlexander V. Chernikov 	case RTM_CHANGE:
412fedeb08bSAlexander V. Chernikov 		if (!NH_IS_NHGRP(rc->rc_nh_old) && !NH_IS_NHGRP(rc->rc_nh_new))
413fedeb08bSAlexander V. Chernikov 			return;
414fedeb08bSAlexander V. Chernikov 		decompose_change_notification(rc, cb, cbdata);
415fedeb08bSAlexander V. Chernikov 		break;
416fedeb08bSAlexander V. Chernikov 	}
417fedeb08bSAlexander V. Chernikov }
418fedeb08bSAlexander V. Chernikov #endif
41936e15b71SAlexander V. Chernikov 
42036e15b71SAlexander V. Chernikov #ifdef INET
42136e15b71SAlexander V. Chernikov /*
42236e15b71SAlexander V. Chernikov  * Checks if the found key in the trie contains (<=) a prefix covering
42336e15b71SAlexander V. Chernikov  *  @paddr/@plen.
42436e15b71SAlexander V. Chernikov  * Returns the most specific rtentry matching the condition or NULL.
42536e15b71SAlexander V. Chernikov  */
42636e15b71SAlexander V. Chernikov static struct rtentry *
42736e15b71SAlexander V. Chernikov get_inet_parent_prefix(uint32_t fibnum, struct in_addr addr, int plen)
42836e15b71SAlexander V. Chernikov {
42936e15b71SAlexander V. Chernikov 	struct route_nhop_data rnd;
43036e15b71SAlexander V. Chernikov 	struct rtentry *rt;
43136e15b71SAlexander V. Chernikov 	struct in_addr addr4;
43236e15b71SAlexander V. Chernikov 	uint32_t scopeid;
43336e15b71SAlexander V. Chernikov 	int parent_plen;
43436e15b71SAlexander V. Chernikov 	struct radix_node *rn;
43536e15b71SAlexander V. Chernikov 
43636e15b71SAlexander V. Chernikov 	rt = fib4_lookup_rt(fibnum, addr, 0, NHR_UNLOCKED, &rnd);
437*f84c3010SAlexander V. Chernikov 	if (rt == NULL)
438*f84c3010SAlexander V. Chernikov 		return (NULL);
439*f84c3010SAlexander V. Chernikov 
44036e15b71SAlexander V. Chernikov 	rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid);
44136e15b71SAlexander V. Chernikov 	if (parent_plen <= plen)
44236e15b71SAlexander V. Chernikov 		return (rt);
44336e15b71SAlexander V. Chernikov 
44436e15b71SAlexander V. Chernikov 	/*
44536e15b71SAlexander V. Chernikov 	 * There can be multiple prefixes associated with the found key:
44636e15b71SAlexander V. Chernikov 	 * 10.0.0.0 -> 10.0.0.0/24, 10.0.0.0/23, 10.0.0.0/22, etc.
44736e15b71SAlexander V. Chernikov 	 * All such prefixes are linked via rn_dupedkey, from most specific
44836e15b71SAlexander V. Chernikov 	 *  to least specific. Iterate over them to check if any of these
44936e15b71SAlexander V. Chernikov 	 *  prefixes are wider than desired plen.
45036e15b71SAlexander V. Chernikov 	 */
45136e15b71SAlexander V. Chernikov 	rn = (struct radix_node *)rt;
45236e15b71SAlexander V. Chernikov 	while ((rn = rn_nextprefix(rn)) != NULL) {
45336e15b71SAlexander V. Chernikov 		rt = RNTORT(rn);
45436e15b71SAlexander V. Chernikov 		rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid);
45536e15b71SAlexander V. Chernikov 		if (parent_plen <= plen)
45636e15b71SAlexander V. Chernikov 			return (rt);
45736e15b71SAlexander V. Chernikov 	}
45836e15b71SAlexander V. Chernikov 
45936e15b71SAlexander V. Chernikov 	return (NULL);
46036e15b71SAlexander V. Chernikov }
46136e15b71SAlexander V. Chernikov 
46236e15b71SAlexander V. Chernikov /*
46336e15b71SAlexander V. Chernikov  * Returns the most specific prefix containing (>) @paddr/plen.
46436e15b71SAlexander V. Chernikov  */
46536e15b71SAlexander V. Chernikov struct rtentry *
46636e15b71SAlexander V. Chernikov rt_get_inet_parent(uint32_t fibnum, struct in_addr addr, int plen)
46736e15b71SAlexander V. Chernikov {
46836e15b71SAlexander V. Chernikov 	struct in_addr lookup_addr = { .s_addr = INADDR_BROADCAST };
46936e15b71SAlexander V. Chernikov 	struct in_addr addr4 = addr;
47036e15b71SAlexander V. Chernikov 	struct in_addr mask4;
47136e15b71SAlexander V. Chernikov 	struct rtentry *rt;
47236e15b71SAlexander V. Chernikov 
47336e15b71SAlexander V. Chernikov 	while (plen-- > 0) {
47436e15b71SAlexander V. Chernikov 		/* Calculate wider mask & new key to lookup */
47536e15b71SAlexander V. Chernikov 		mask4.s_addr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
47636e15b71SAlexander V. Chernikov 		addr4.s_addr = htonl(ntohl(addr4.s_addr) & ntohl(mask4.s_addr));
47736e15b71SAlexander V. Chernikov 		if (addr4.s_addr == lookup_addr.s_addr) {
47836e15b71SAlexander V. Chernikov 			/* Skip lookup if the key is the same */
47936e15b71SAlexander V. Chernikov 			continue;
48036e15b71SAlexander V. Chernikov 		}
48136e15b71SAlexander V. Chernikov 		lookup_addr = addr4;
48236e15b71SAlexander V. Chernikov 
48336e15b71SAlexander V. Chernikov 		rt = get_inet_parent_prefix(fibnum, lookup_addr, plen);
48436e15b71SAlexander V. Chernikov 		if (rt != NULL)
48536e15b71SAlexander V. Chernikov 			return (rt);
48636e15b71SAlexander V. Chernikov 	}
48736e15b71SAlexander V. Chernikov 
48836e15b71SAlexander V. Chernikov 	return (NULL);
48936e15b71SAlexander V. Chernikov }
49036e15b71SAlexander V. Chernikov #endif
49136e15b71SAlexander V. Chernikov 
49236e15b71SAlexander V. Chernikov #ifdef INET6
49336e15b71SAlexander V. Chernikov /*
49436e15b71SAlexander V. Chernikov  * Checks if the found key in the trie contains (<=) a prefix covering
49536e15b71SAlexander V. Chernikov  *  @paddr/@plen.
49636e15b71SAlexander V. Chernikov  * Returns the most specific rtentry matching the condition or NULL.
49736e15b71SAlexander V. Chernikov  */
49836e15b71SAlexander V. Chernikov static struct rtentry *
49936e15b71SAlexander V. Chernikov get_inet6_parent_prefix(uint32_t fibnum, const struct in6_addr *paddr, int plen)
50036e15b71SAlexander V. Chernikov {
50136e15b71SAlexander V. Chernikov 	struct route_nhop_data rnd;
50236e15b71SAlexander V. Chernikov 	struct rtentry *rt;
50336e15b71SAlexander V. Chernikov 	struct in6_addr addr6;
50436e15b71SAlexander V. Chernikov 	uint32_t scopeid;
50536e15b71SAlexander V. Chernikov 	int parent_plen;
50636e15b71SAlexander V. Chernikov 	struct radix_node *rn;
50736e15b71SAlexander V. Chernikov 
50836e15b71SAlexander V. Chernikov 	rt = fib6_lookup_rt(fibnum, paddr, 0, NHR_UNLOCKED, &rnd);
509*f84c3010SAlexander V. Chernikov 	if (rt == NULL)
510*f84c3010SAlexander V. Chernikov 		return (NULL);
511*f84c3010SAlexander V. Chernikov 
51236e15b71SAlexander V. Chernikov 	rt_get_inet6_prefix_plen(rt, &addr6, &parent_plen, &scopeid);
51336e15b71SAlexander V. Chernikov 	if (parent_plen <= plen)
51436e15b71SAlexander V. Chernikov 		return (rt);
51536e15b71SAlexander V. Chernikov 
51636e15b71SAlexander V. Chernikov 	/*
51736e15b71SAlexander V. Chernikov 	 * There can be multiple prefixes associated with the found key:
51836e15b71SAlexander V. Chernikov 	 * 2001:db8:1::/64 -> 2001:db8:1::/56, 2001:db8:1::/48, etc.
51936e15b71SAlexander V. Chernikov 	 * All such prefixes are linked via rn_dupedkey, from most specific
52036e15b71SAlexander V. Chernikov 	 *  to least specific. Iterate over them to check if any of these
52136e15b71SAlexander V. Chernikov 	 *  prefixes are wider than desired plen.
52236e15b71SAlexander V. Chernikov 	 */
52336e15b71SAlexander V. Chernikov 	rn = (struct radix_node *)rt;
52436e15b71SAlexander V. Chernikov 	while ((rn = rn_nextprefix(rn)) != NULL) {
52536e15b71SAlexander V. Chernikov 		rt = RNTORT(rn);
52636e15b71SAlexander V. Chernikov 		rt_get_inet6_prefix_plen(rt, &addr6, &parent_plen, &scopeid);
52736e15b71SAlexander V. Chernikov 		if (parent_plen <= plen)
52836e15b71SAlexander V. Chernikov 			return (rt);
52936e15b71SAlexander V. Chernikov 	}
53036e15b71SAlexander V. Chernikov 
53136e15b71SAlexander V. Chernikov 	return (NULL);
53236e15b71SAlexander V. Chernikov }
53336e15b71SAlexander V. Chernikov 
53436e15b71SAlexander V. Chernikov static void
53536e15b71SAlexander V. Chernikov ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
53636e15b71SAlexander V. Chernikov {
53736e15b71SAlexander V. Chernikov 	uint32_t *cp;
53836e15b71SAlexander V. Chernikov 
53936e15b71SAlexander V. Chernikov 	for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
54036e15b71SAlexander V. Chernikov 		*cp++ = 0xFFFFFFFF;
54136e15b71SAlexander V. Chernikov 	if (mask > 0)
54236e15b71SAlexander V. Chernikov 		*cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
54336e15b71SAlexander V. Chernikov }
54436e15b71SAlexander V. Chernikov 
54536e15b71SAlexander V. Chernikov /*
54636e15b71SAlexander V. Chernikov  * Returns the most specific prefix containing (>) @paddr/plen.
54736e15b71SAlexander V. Chernikov  */
54836e15b71SAlexander V. Chernikov struct rtentry *
54936e15b71SAlexander V. Chernikov rt_get_inet6_parent(uint32_t fibnum, const struct in6_addr *paddr, int plen)
55036e15b71SAlexander V. Chernikov {
55136e15b71SAlexander V. Chernikov 	struct in6_addr lookup_addr = in6mask128;
55236e15b71SAlexander V. Chernikov 	struct in6_addr addr6 = *paddr;
55336e15b71SAlexander V. Chernikov 	struct in6_addr mask6;
55436e15b71SAlexander V. Chernikov 	struct rtentry *rt;
55536e15b71SAlexander V. Chernikov 
55636e15b71SAlexander V. Chernikov 	while (plen-- > 0) {
55736e15b71SAlexander V. Chernikov 		/* Calculate wider mask & new key to lookup */
55836e15b71SAlexander V. Chernikov 		ipv6_writemask(&mask6, plen);
55936e15b71SAlexander V. Chernikov 		IN6_MASK_ADDR(&addr6, &mask6);
56036e15b71SAlexander V. Chernikov 		if (IN6_ARE_ADDR_EQUAL(&addr6, &lookup_addr)) {
56136e15b71SAlexander V. Chernikov 			/* Skip lookup if the key is the same */
56236e15b71SAlexander V. Chernikov 			continue;
56336e15b71SAlexander V. Chernikov 		}
56436e15b71SAlexander V. Chernikov 		lookup_addr = addr6;
56536e15b71SAlexander V. Chernikov 
56636e15b71SAlexander V. Chernikov 		rt = get_inet6_parent_prefix(fibnum, &lookup_addr, plen);
56736e15b71SAlexander V. Chernikov 		if (rt != NULL)
56836e15b71SAlexander V. Chernikov 			return (rt);
56936e15b71SAlexander V. Chernikov 	}
57036e15b71SAlexander V. Chernikov 
57136e15b71SAlexander V. Chernikov 	return (NULL);
57236e15b71SAlexander V. Chernikov }
57336e15b71SAlexander V. Chernikov #endif
574