xref: /freebsd/sys/net/route/route_helpers.c (revision c0256b31efcccb6964822b5aadb183e8a6d45507)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 
32 #include <sys/param.h>
33 #include <sys/jail.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/socket.h>
38 #include <sys/sysctl.h>
39 #include <sys/syslog.h>
40 #include <sys/sysproto.h>
41 #include <sys/proc.h>
42 #include <sys/domain.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/rmlock.h>
46 
47 #include <net/if.h>
48 #include <net/if_var.h>
49 #include <net/if_dl.h>
50 #include <net/route.h>
51 #include <net/route/route_ctl.h>
52 #include <net/route/route_var.h>
53 #include <net/route/nhop_utils.h>
54 #include <net/route/nhop.h>
55 #include <net/route/nhop_var.h>
56 #ifdef INET
57 #include <netinet/in_fib.h>
58 #endif
59 #ifdef INET6
60 #include <netinet6/in6_fib.h>
61 #include <netinet6/in6_var.h>
62 #endif
63 #include <net/vnet.h>
64 
65 #define	DEBUG_MOD_NAME	rt_helpers
66 #define	DEBUG_MAX_LEVEL	LOG_DEBUG2
67 #include <net/route/route_debug.h>
68 _DECLARE_DEBUG(LOG_INFO);
69 
70 /*
71  * RIB helper functions.
72  */
73 
74 void
rib_walk_ext_locked(struct rib_head * rnh,rib_walktree_f_t * wa_f,rib_walk_hook_f_t * hook_f,void * arg)75 rib_walk_ext_locked(struct rib_head *rnh, rib_walktree_f_t *wa_f,
76     rib_walk_hook_f_t *hook_f, void *arg)
77 {
78 	if (hook_f != NULL)
79 		hook_f(rnh, RIB_WALK_HOOK_PRE, arg);
80 	rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f, arg);
81 	if (hook_f != NULL)
82 		hook_f(rnh, RIB_WALK_HOOK_POST, arg);
83 }
84 
85 /*
86  * Calls @wa_f with @arg for each entry in the table specified by
87  * @af and @fibnum.
88  *
89  * @ss_t callback is called before and after the tree traversal
90  *  while holding table lock.
91  *
92  * Table is traversed under read lock unless @wlock is set.
93  */
94 void
rib_walk_ext_internal(struct rib_head * rnh,bool wlock,rib_walktree_f_t * wa_f,rib_walk_hook_f_t * hook_f,void * arg)95 rib_walk_ext_internal(struct rib_head *rnh, bool wlock, rib_walktree_f_t *wa_f,
96     rib_walk_hook_f_t *hook_f, void *arg)
97 {
98 	RIB_RLOCK_TRACKER;
99 
100 	if (wlock)
101 		RIB_WLOCK(rnh);
102 	else
103 		RIB_RLOCK(rnh);
104 	rib_walk_ext_locked(rnh, wa_f, hook_f, arg);
105 	if (wlock)
106 		RIB_WUNLOCK(rnh);
107 	else
108 		RIB_RUNLOCK(rnh);
109 }
110 
111 void
rib_walk_ext(uint32_t fibnum,int family,bool wlock,rib_walktree_f_t * wa_f,rib_walk_hook_f_t * hook_f,void * arg)112 rib_walk_ext(uint32_t fibnum, int family, bool wlock, rib_walktree_f_t *wa_f,
113     rib_walk_hook_f_t *hook_f, void *arg)
114 {
115 	struct rib_head *rnh;
116 
117 	if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL)
118 		rib_walk_ext_internal(rnh, wlock, wa_f, hook_f, arg);
119 }
120 
121 /*
122  * Calls @wa_f with @arg for each entry in the table specified by
123  * @af and @fibnum.
124  *
125  * Table is traversed under read lock unless @wlock is set.
126  */
127 void
rib_walk(uint32_t fibnum,int family,bool wlock,rib_walktree_f_t * wa_f,void * arg)128 rib_walk(uint32_t fibnum, int family, bool wlock, rib_walktree_f_t *wa_f,
129     void *arg)
130 {
131 
132 	rib_walk_ext(fibnum, family, wlock, wa_f, NULL, arg);
133 }
134 
135 /*
136  * Calls @wa_f with @arg for each entry in the table matching @prefix/@mask.
137  *
138  * The following flags are supported:
139  *  RIB_FLAG_WLOCK: acquire exclusive lock
140  *  RIB_FLAG_LOCKED: Assumes the table is already locked & skip locking
141  *
142  * By default, table is traversed under read lock.
143  */
144 void
rib_walk_from(uint32_t fibnum,int family,uint32_t flags,struct sockaddr * prefix,struct sockaddr * mask,rib_walktree_f_t * wa_f,void * arg)145 rib_walk_from(uint32_t fibnum, int family, uint32_t flags, struct sockaddr *prefix,
146     struct sockaddr *mask, rib_walktree_f_t *wa_f, void *arg)
147 {
148 	RIB_RLOCK_TRACKER;
149 	struct rib_head *rnh = rt_tables_get_rnh(fibnum, family);
150 
151 	if (rnh == NULL)
152 		return;
153 
154 	if (flags & RIB_FLAG_WLOCK)
155 		RIB_WLOCK(rnh);
156 	else if (!(flags & RIB_FLAG_LOCKED))
157 		RIB_RLOCK(rnh);
158 
159 	rnh->rnh_walktree_from(&rnh->head, prefix, mask, (walktree_f_t *)wa_f, arg);
160 
161 	if (flags & RIB_FLAG_WLOCK)
162 		RIB_WUNLOCK(rnh);
163 	else if (!(flags & RIB_FLAG_LOCKED))
164 		RIB_RUNLOCK(rnh);
165 }
166 
167 /*
168  * Iterates over all existing fibs in system calling
169  *  @hook_f function before/after traversing each fib.
170  *  Calls @wa_f function for each element in current fib.
171  * If af is not AF_UNSPEC, iterates over fibs in particular
172  * address family.
173  */
174 void
rib_foreach_table_walk(int family,bool wlock,rib_walktree_f_t * wa_f,rib_walk_hook_f_t * hook_f,void * arg)175 rib_foreach_table_walk(int family, bool wlock, rib_walktree_f_t *wa_f,
176     rib_walk_hook_f_t *hook_f, void *arg)
177 {
178 
179 	for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
180 		/* Do we want some specific family? */
181 		if (family != AF_UNSPEC) {
182 			rib_walk_ext(fibnum, family, wlock, wa_f, hook_f, arg);
183 			continue;
184 		}
185 
186 		for (int i = 1; i <= AF_MAX; i++)
187 			rib_walk_ext(fibnum, i, wlock, wa_f, hook_f, arg);
188 	}
189 }
190 
191 /*
192  * Iterates over all existing fibs in system and deletes each element
193  *  for which @filter_f function returns non-zero value.
194  * If @family is not AF_UNSPEC, iterates over fibs in particular
195  * address family.
196  */
197 void
rib_foreach_table_walk_del(int family,rib_filter_f_t * filter_f,void * arg)198 rib_foreach_table_walk_del(int family, rib_filter_f_t *filter_f, void *arg)
199 {
200 
201 	for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
202 		/* Do we want some specific family? */
203 		if (family != AF_UNSPEC) {
204 			rib_walk_del(fibnum, family, filter_f, arg, 0);
205 			continue;
206 		}
207 
208 		for (int i = 1; i <= AF_MAX; i++)
209 			rib_walk_del(fibnum, i, filter_f, arg, 0);
210 	}
211 }
212 
213 
214 /*
215  * Wrapper for the control plane functions for performing af-agnostic
216  *  lookups.
217  * @fibnum: fib to perform the lookup.
218  * @dst: sockaddr with family and addr filled in. IPv6 addresses needs to be in
219  *  deembedded from.
220  * @flags: fib(9) flags.
221  * @flowid: flow id for path selection in multipath use case.
222  *
223  * Returns nhop_object or NULL.
224  *
225  * Requires NET_EPOCH.
226  *
227  */
228 struct nhop_object *
rib_lookup(uint32_t fibnum,const struct sockaddr * dst,uint32_t flags,uint32_t flowid)229 rib_lookup(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags,
230     uint32_t flowid)
231 {
232 	struct nhop_object *nh;
233 
234 	nh = NULL;
235 
236 	switch (dst->sa_family) {
237 #ifdef INET
238 	case AF_INET:
239 	{
240 		const struct sockaddr_in *a = (const struct sockaddr_in *)dst;
241 		nh = fib4_lookup(fibnum, a->sin_addr, 0, flags, flowid);
242 		break;
243 	}
244 #endif
245 #ifdef INET6
246 	case AF_INET6:
247 	{
248 		const struct sockaddr_in6 *a = (const struct sockaddr_in6*)dst;
249 		nh = fib6_lookup(fibnum, &a->sin6_addr, a->sin6_scope_id,
250 		    flags, flowid);
251 		break;
252 	}
253 #endif
254 	}
255 
256 	return (nh);
257 }
258 
259 static void
notify_add(struct rib_cmd_info * rc,const struct weightened_nhop * wn_src,route_notification_t * cb,void * cbdata)260 notify_add(struct rib_cmd_info *rc, const struct weightened_nhop *wn_src,
261     route_notification_t *cb, void *cbdata)
262 {
263 	rc->rc_nh_new = wn_src->nh;
264 	rc->rc_nh_weight = wn_src->weight;
265 
266 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
267 		char nhbuf[NHOP_PRINT_BUFSIZE] __unused;
268 		FIB_NH_LOG(LOG_DEBUG2, wn_src->nh, "RTM_ADD for %s @ w=%u",
269 		    nhop_print_buf(wn_src->nh, nhbuf, sizeof(nhbuf)),
270 		    wn_src->weight);
271 	}
272 	cb(rc, cbdata);
273 }
274 
275 static void
notify_del(struct rib_cmd_info * rc,const struct weightened_nhop * wn_src,route_notification_t * cb,void * cbdata)276 notify_del(struct rib_cmd_info *rc, const struct weightened_nhop *wn_src,
277     route_notification_t *cb, void *cbdata)
278 {
279 	rc->rc_nh_old = wn_src->nh;
280 	rc->rc_nh_weight = wn_src->weight;
281 
282 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
283 		char nhbuf[NHOP_PRINT_BUFSIZE] __unused;
284 		FIB_NH_LOG(LOG_DEBUG2, wn_src->nh, "RTM_DEL for %s @ w=%u",
285 		    nhop_print_buf(wn_src->nh, nhbuf, sizeof(nhbuf)),
286 		    wn_src->weight);
287 	}
288 	cb(rc, cbdata);
289 }
290 
291 static void
decompose_change_notification(const struct rib_cmd_info * rc,route_notification_t * cb,void * cbdata)292 decompose_change_notification(const struct rib_cmd_info *rc, route_notification_t *cb,
293     void *cbdata)
294 {
295 	uint32_t num_old, num_new;
296 	const struct weightened_nhop *wn_old, *wn_new;
297 	struct weightened_nhop tmp = { NULL, 0 };
298 	uint32_t idx_old = 0, idx_new = 0;
299 
300 	struct rib_cmd_info rc_del = { .rc_cmd = RTM_DELETE, .rc_rt = rc->rc_rt };
301 	struct rib_cmd_info rc_add = { .rc_cmd = RTM_ADD, .rc_rt = rc->rc_rt };
302 
303 	if (NH_IS_NHGRP(rc->rc_nh_old)) {
304 		wn_old = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_old, &num_old);
305 	} else {
306 		tmp.nh = rc->rc_nh_old;
307 		tmp.weight = rc->rc_nh_weight;
308 		wn_old = &tmp;
309 		num_old = 1;
310 	}
311 	if (NH_IS_NHGRP(rc->rc_nh_new)) {
312 		wn_new = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_new, &num_new);
313 	} else {
314 		tmp.nh = rc->rc_nh_new;
315 		tmp.weight = rc->rc_nh_weight;
316 		wn_new = &tmp;
317 		num_new = 1;
318 	}
319 	IF_DEBUG_LEVEL(LOG_DEBUG) {
320 		char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE];
321 		nhop_print_buf_any(rc->rc_nh_old, buf_old, NHOP_PRINT_BUFSIZE);
322 		nhop_print_buf_any(rc->rc_nh_new, buf_new, NHOP_PRINT_BUFSIZE);
323 		FIB_NH_LOG(LOG_DEBUG, wn_old[0].nh, "change %s -> %s", buf_old, buf_new);
324 	}
325 
326 	/* Use the fact that each @wn array is sorted */
327 	/*
328 	 * Here we have one (or two) multipath groups and transition
329 	 *  between them needs to be reported to the caller, using series
330 	 *  of primitive (RTM_DEL, RTM_ADD) operations.
331 	 *
332 	 * Leverage the fact that each nexthop group has its nexthops sorted
333 	 *  by their indices.
334 	 * [1] -> [1, 2] = A{2}
335 	 * [1, 2] -> [1] = D{2}
336 	 * [1, 2, 4] -> [1, 3, 4] = D{2}, A{3}
337 	 * [1, 2] -> [3, 4] = D{1}, D{2}, A{3}, A{4]
338 	 */
339 	while ((idx_old < num_old) && (idx_new < num_new)) {
340 		uint32_t nh_idx_old = wn_old[idx_old].nh->nh_priv->nh_idx;
341 		uint32_t nh_idx_new = wn_new[idx_new].nh->nh_priv->nh_idx;
342 
343 		if (nh_idx_old == nh_idx_new) {
344 			if (wn_old[idx_old].weight != wn_new[idx_new].weight) {
345 				/* Update weight by providing del/add notifications */
346 				notify_del(&rc_del, &wn_old[idx_old], cb, cbdata);
347 				notify_add(&rc_add, &wn_new[idx_new], cb, cbdata);
348 			}
349 			idx_old++;
350 			idx_new++;
351 		} else if (nh_idx_old < nh_idx_new) {
352 			/* [1, ~2~, 4], [1, ~3~, 4] */
353 			notify_del(&rc_del, &wn_old[idx_old], cb, cbdata);
354 			idx_old++;
355 		} else {
356 			/* nh_idx_old > nh_idx_new. */
357 			notify_add(&rc_add, &wn_new[idx_new], cb, cbdata);
358 			idx_new++;
359 		}
360 	}
361 
362 	while (idx_old < num_old) {
363 		notify_del(&rc_del, &wn_old[idx_old], cb, cbdata);
364 		idx_old++;
365 	}
366 
367 	while (idx_new < num_new) {
368 		notify_add(&rc_add, &wn_new[idx_new], cb, cbdata);
369 		idx_new++;
370 	}
371 }
372 
373 /*
374  * Decompose multipath cmd info @rc into a list of add/del/change
375  *  single-path operations, calling @cb callback for each operation.
376  * Assumes at least one of the nexthops in @rc is multipath.
377  */
378 void
rib_decompose_notification(const struct rib_cmd_info * rc,route_notification_t * cb,void * cbdata)379 rib_decompose_notification(const struct rib_cmd_info *rc, route_notification_t *cb,
380     void *cbdata)
381 {
382 	const struct weightened_nhop *wn;
383 	uint32_t num_nhops;
384 	struct rib_cmd_info rc_new;
385 
386 	rc_new = *rc;
387 	switch (rc->rc_cmd) {
388 	case RTM_ADD:
389 		if (!NH_IS_NHGRP(rc->rc_nh_new))
390 			return;
391 		wn = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_new, &num_nhops);
392 		for (uint32_t i = 0; i < num_nhops; i++) {
393 			notify_add(&rc_new, &wn[i], cb, cbdata);
394 		}
395 		break;
396 	case RTM_DELETE:
397 		if (!NH_IS_NHGRP(rc->rc_nh_old))
398 			return;
399 		wn = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_old, &num_nhops);
400 		for (uint32_t i = 0; i < num_nhops; i++) {
401 			notify_del(&rc_new, &wn[i], cb, cbdata);
402 		}
403 		break;
404 	case RTM_CHANGE:
405 		if (!NH_IS_NHGRP(rc->rc_nh_old) && !NH_IS_NHGRP(rc->rc_nh_new))
406 			return;
407 		decompose_change_notification(rc, cb, cbdata);
408 		break;
409 	}
410 }
411 
412 union sockaddr_union {
413 	struct sockaddr		sa;
414 	struct sockaddr_in	sin;
415 	struct sockaddr_in6	sin6;
416 	char			_buf[32];
417 };
418 
419 /*
420  * Creates nexhops suitable for using as a default route nhop.
421  * Helper for the various kernel subsystems adding/changing default route.
422  */
423 int
rib_add_default_route(uint32_t fibnum,int family,struct ifnet * ifp,struct sockaddr * gw,struct rib_cmd_info * rc)424 rib_add_default_route(uint32_t fibnum, int family, struct ifnet *ifp,
425     struct sockaddr *gw, struct rib_cmd_info *rc)
426 {
427 	struct route_nhop_data rnd = { .rnd_weight = RT_DEFAULT_WEIGHT };
428 	union sockaddr_union saun = {};
429 	struct sockaddr *dst = &saun.sa;
430 	int error;
431 
432 	switch (family) {
433 #ifdef INET
434 	case AF_INET:
435 		saun.sin.sin_family = AF_INET;
436 		saun.sin.sin_len = sizeof(struct sockaddr_in);
437 		break;
438 #endif
439 #ifdef INET6
440 	case AF_INET6:
441 		saun.sin6.sin6_family = AF_INET6;
442 		saun.sin6.sin6_len = sizeof(struct sockaddr_in6);
443 		break;
444 #endif
445 	default:
446 		return (EAFNOSUPPORT);
447 	}
448 
449 	struct ifaddr *ifa = ifaof_ifpforaddr(gw, ifp);
450 	if (ifa == NULL)
451 		return (ENOENT);
452 
453 	struct nhop_object *nh = nhop_alloc(fibnum, family);
454 	if (nh == NULL)
455 		return (ENOMEM);
456 
457 	nhop_set_gw(nh, gw, true);
458 	nhop_set_transmit_ifp(nh, ifp);
459 	nhop_set_src(nh, ifa);
460 	nhop_set_pxtype_flag(nh, NHF_DEFAULT);
461 	nhop_set_metric(nh, RT_DEFAULT_METRIC);
462 	rnd.rnd_nhop = nhop_get_nhop(nh, &error);
463 
464 	if (error == 0)
465 		error = rib_add_route_px(fibnum, dst, 0, &rnd, RTM_F_CREATE, rc);
466 	return (error);
467 }
468 
469 #ifdef INET
470 /*
471  * Checks if the found key in the trie contains (<=) a prefix covering
472  *  @paddr/@plen.
473  * Returns the most specific rtentry matching the condition or NULL.
474  */
475 static struct rtentry *
get_inet_parent_prefix(uint32_t fibnum,struct in_addr addr,int plen)476 get_inet_parent_prefix(uint32_t fibnum, struct in_addr addr, int plen)
477 {
478 	struct route_nhop_data rnd;
479 	struct rtentry *rt;
480 	struct in_addr addr4;
481 	uint32_t scopeid;
482 	int parent_plen;
483 	struct radix_node *rn;
484 
485 	rt = fib4_lookup_rt(fibnum, addr, 0, NHR_UNLOCKED, &rnd);
486 	if (rt == NULL)
487 		return (NULL);
488 
489 	rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid);
490 	if (parent_plen <= plen)
491 		return (rt);
492 
493 	/*
494 	 * There can be multiple prefixes associated with the found key:
495 	 * 10.0.0.0 -> 10.0.0.0/24, 10.0.0.0/23, 10.0.0.0/22, etc.
496 	 * All such prefixes are linked via rn_dupedkey, from most specific
497 	 *  to least specific. Iterate over them to check if any of these
498 	 *  prefixes are wider than desired plen.
499 	 */
500 	rn = (struct radix_node *)rt;
501 	while ((rn = rn_nextprefix(rn)) != NULL) {
502 		rt = RNTORT(rn);
503 		rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid);
504 		if (parent_plen <= plen)
505 			return (rt);
506 	}
507 
508 	return (NULL);
509 }
510 
511 /*
512  * Returns the most specific prefix containing (>) @paddr/plen.
513  */
514 struct rtentry *
rt_get_inet_parent(uint32_t fibnum,struct in_addr addr,int plen)515 rt_get_inet_parent(uint32_t fibnum, struct in_addr addr, int plen)
516 {
517 	struct in_addr lookup_addr = { .s_addr = INADDR_BROADCAST };
518 	struct in_addr addr4 = addr;
519 	struct in_addr mask4;
520 	struct rtentry *rt;
521 
522 	while (plen-- > 0) {
523 		/* Calculate wider mask & new key to lookup */
524 		mask4.s_addr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
525 		addr4.s_addr = htonl(ntohl(addr4.s_addr) & ntohl(mask4.s_addr));
526 		if (addr4.s_addr == lookup_addr.s_addr) {
527 			/* Skip lookup if the key is the same */
528 			continue;
529 		}
530 		lookup_addr = addr4;
531 
532 		rt = get_inet_parent_prefix(fibnum, lookup_addr, plen);
533 		if (rt != NULL)
534 			return (rt);
535 	}
536 
537 	return (NULL);
538 }
539 #endif
540 
541 #ifdef INET6
542 /*
543  * Checks if the found key in the trie contains (<=) a prefix covering
544  *  @paddr/@plen.
545  * Returns the most specific rtentry matching the condition or NULL.
546  */
547 static struct rtentry *
get_inet6_parent_prefix(uint32_t fibnum,const struct in6_addr * paddr,int plen)548 get_inet6_parent_prefix(uint32_t fibnum, const struct in6_addr *paddr, int plen)
549 {
550 	struct route_nhop_data rnd;
551 	struct rtentry *rt;
552 	struct in6_addr addr6;
553 	uint32_t scopeid;
554 	int parent_plen;
555 	struct radix_node *rn;
556 
557 	rt = fib6_lookup_rt(fibnum, paddr, 0, NHR_UNLOCKED, &rnd);
558 	if (rt == NULL)
559 		return (NULL);
560 
561 	rt_get_inet6_prefix_plen(rt, &addr6, &parent_plen, &scopeid);
562 	if (parent_plen <= plen)
563 		return (rt);
564 
565 	/*
566 	 * There can be multiple prefixes associated with the found key:
567 	 * 2001:db8:1::/64 -> 2001:db8:1::/56, 2001:db8:1::/48, etc.
568 	 * All such prefixes are linked via rn_dupedkey, from most specific
569 	 *  to least specific. Iterate over them to check if any of these
570 	 *  prefixes are wider than desired plen.
571 	 */
572 	rn = (struct radix_node *)rt;
573 	while ((rn = rn_nextprefix(rn)) != NULL) {
574 		rt = RNTORT(rn);
575 		rt_get_inet6_prefix_plen(rt, &addr6, &parent_plen, &scopeid);
576 		if (parent_plen <= plen)
577 			return (rt);
578 	}
579 
580 	return (NULL);
581 }
582 
583 void
ip6_writemask(struct in6_addr * addr6,uint8_t mask)584 ip6_writemask(struct in6_addr *addr6, uint8_t mask)
585 {
586 	uint32_t *cp;
587 
588 	for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
589 		*cp++ = 0xFFFFFFFF;
590 	if (mask > 0)
591 		*cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
592 }
593 
594 /*
595  * Returns the most specific prefix containing (>) @paddr/plen.
596  */
597 struct rtentry *
rt_get_inet6_parent(uint32_t fibnum,const struct in6_addr * paddr,int plen)598 rt_get_inet6_parent(uint32_t fibnum, const struct in6_addr *paddr, int plen)
599 {
600 	struct in6_addr lookup_addr = in6mask128;
601 	struct in6_addr addr6 = *paddr;
602 	struct in6_addr mask6;
603 	struct rtentry *rt;
604 
605 	while (plen-- > 0) {
606 		/* Calculate wider mask & new key to lookup */
607 		ip6_writemask(&mask6, plen);
608 		IN6_MASK_ADDR(&addr6, &mask6);
609 		if (IN6_ARE_ADDR_EQUAL(&addr6, &lookup_addr)) {
610 			/* Skip lookup if the key is the same */
611 			continue;
612 		}
613 		lookup_addr = addr6;
614 
615 		rt = get_inet6_parent_prefix(fibnum, &lookup_addr, plen);
616 		if (rt != NULL)
617 			return (rt);
618 	}
619 
620 	return (NULL);
621 }
622 #endif
623 
624 /*
625  * Prints rtentry @rt data in the provided @buf.
626  * Example: rt/192.168.0.0/24
627  */
628 char *
rt_print_buf(const struct rtentry * rt,char * buf,size_t bufsize)629 rt_print_buf(const struct rtentry *rt, char *buf, size_t bufsize)
630 {
631 #if defined(INET) || defined(INET6)
632 	char abuf[INET6_ADDRSTRLEN];
633 	uint32_t scopeid;
634 	int plen;
635 #endif
636 
637 	switch (rt_get_family(rt)) {
638 #ifdef INET
639 	case AF_INET:
640 		{
641 			struct in_addr addr4;
642 			rt_get_inet_prefix_plen(rt, &addr4, &plen, &scopeid);
643 			inet_ntop(AF_INET, &addr4, abuf, sizeof(abuf));
644 			snprintf(buf, bufsize, "rt/%s/%d", abuf, plen);
645 		}
646 		break;
647 #endif
648 #ifdef INET6
649 	case AF_INET6:
650 		{
651 			struct in6_addr addr6;
652 			rt_get_inet6_prefix_plen(rt, &addr6, &plen, &scopeid);
653 			inet_ntop(AF_INET6, &addr6, abuf, sizeof(abuf));
654 			snprintf(buf, bufsize, "rt/%s/%d", abuf, plen);
655 		}
656 		break;
657 #endif
658 	default:
659 		snprintf(buf, bufsize, "rt/unknown_af#%d", rt_get_family(rt));
660 		break;
661 	}
662 
663 	return (buf);
664 }
665 
666 const char *
rib_print_cmd(int rib_cmd)667 rib_print_cmd(int rib_cmd)
668 {
669 	switch (rib_cmd) {
670 	case RTM_ADD:
671 		return ("RTM_ADD");
672 	case RTM_CHANGE:
673 		return ("RTM_CHANGE");
674 	case RTM_DELETE:
675 		return ("RTM_DELETE");
676 	case RTM_GET:
677 		return ("RTM_GET");
678 	}
679 
680 	return ("UNKNOWN");
681 }
682