xref: /freebsd/sys/net/route/route_helpers.c (revision cd8537910406e68d4719136a5b0cf6d23bb1b23b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet.h"
31 #include "opt_inet6.h"
32 #include "opt_route.h"
33 
34 #include <sys/param.h>
35 #include <sys/jail.h>
36 #include <sys/systm.h>
37 #include <sys/malloc.h>
38 #include <sys/mbuf.h>
39 #include <sys/socket.h>
40 #include <sys/sysctl.h>
41 #include <sys/syslog.h>
42 #include <sys/sysproto.h>
43 #include <sys/proc.h>
44 #include <sys/domain.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/rmlock.h>
48 
49 #include <net/if.h>
50 #include <net/if_var.h>
51 #include <net/if_dl.h>
52 #include <net/route.h>
53 #include <net/route/route_ctl.h>
54 #include <net/route/route_var.h>
55 #include <net/route/nhop_utils.h>
56 #include <net/route/nhop.h>
57 #include <net/route/nhop_var.h>
58 #ifdef INET
59 #include <netinet/in_fib.h>
60 #endif
61 #ifdef INET6
62 #include <netinet6/in6_fib.h>
63 #endif
64 #include <net/vnet.h>
65 
66 /*
67  * RIB helper functions.
68  */
69 
70 /*
71  * Calls @wa_f with @arg for each entry in the table specified by
72  * @af and @fibnum.
73  *
74  * @ss_t callback is called before and after the tree traversal
75  *  while holding table lock.
76  *
77  * Table is traversed under read lock unless @wlock is set.
78  */
79 void
80 rib_walk_ext(uint32_t fibnum, int family, bool wlock, rib_walktree_f_t *wa_f,
81     rib_walk_hook_f_t *hook_f, void *arg)
82 {
83 	RIB_RLOCK_TRACKER;
84 	struct rib_head *rnh;
85 
86 	if ((rnh = rt_tables_get_rnh(fibnum, family)) == NULL)
87 		return;
88 
89 	if (wlock)
90 		RIB_WLOCK(rnh);
91 	else
92 		RIB_RLOCK(rnh);
93 	if (hook_f != NULL)
94 		hook_f(rnh, RIB_WALK_HOOK_PRE, arg);
95 	rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f, arg);
96 	if (hook_f != NULL)
97 		hook_f(rnh, RIB_WALK_HOOK_POST, arg);
98 	if (wlock)
99 		RIB_WUNLOCK(rnh);
100 	else
101 		RIB_RUNLOCK(rnh);
102 }
103 
104 /*
105  * Calls @wa_f with @arg for each entry in the table specified by
106  * @af and @fibnum.
107  *
108  * Table is traversed under read lock unless @wlock is set.
109  */
110 void
111 rib_walk(uint32_t fibnum, int family, bool wlock, rib_walktree_f_t *wa_f,
112     void *arg)
113 {
114 
115 	rib_walk_ext(fibnum, family, wlock, wa_f, NULL, arg);
116 }
117 
118 /*
119  * Iterates over all existing fibs in system calling
120  *  @hook_f function before/after traversing each fib.
121  *  Calls @wa_f function for each element in current fib.
122  * If af is not AF_UNSPEC, iterates over fibs in particular
123  * address family.
124  */
125 void
126 rib_foreach_table_walk(int family, bool wlock, rib_walktree_f_t *wa_f,
127     rib_walk_hook_f_t *hook_f, void *arg)
128 {
129 
130 	for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
131 		/* Do we want some specific family? */
132 		if (family != AF_UNSPEC) {
133 			rib_walk_ext(fibnum, family, wlock, wa_f, hook_f, arg);
134 			continue;
135 		}
136 
137 		for (int i = 1; i <= AF_MAX; i++)
138 			rib_walk_ext(fibnum, i, wlock, wa_f, hook_f, arg);
139 	}
140 }
141 
142 /*
143  * Iterates over all existing fibs in system and deletes each element
144  *  for which @filter_f function returns non-zero value.
145  * If @family is not AF_UNSPEC, iterates over fibs in particular
146  * address family.
147  */
148 void
149 rib_foreach_table_walk_del(int family, rib_filter_f_t *filter_f, void *arg)
150 {
151 
152 	for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
153 		/* Do we want some specific family? */
154 		if (family != AF_UNSPEC) {
155 			rib_walk_del(fibnum, family, filter_f, arg, 0);
156 			continue;
157 		}
158 
159 		for (int i = 1; i <= AF_MAX; i++)
160 			rib_walk_del(fibnum, i, filter_f, arg, 0);
161 	}
162 }
163 
164 
165 /*
166  * Wrapper for the control plane functions for performing af-agnostic
167  *  lookups.
168  * @fibnum: fib to perform the lookup.
169  * @dst: sockaddr with family and addr filled in. IPv6 addresses needs to be in
170  *  deembedded from.
171  * @flags: fib(9) flags.
172  * @flowid: flow id for path selection in multipath use case.
173  *
174  * Returns nhop_object or NULL.
175  *
176  * Requires NET_EPOCH.
177  *
178  */
179 struct nhop_object *
180 rib_lookup(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags,
181     uint32_t flowid)
182 {
183 	struct nhop_object *nh;
184 
185 	nh = NULL;
186 
187 	switch (dst->sa_family) {
188 #ifdef INET
189 	case AF_INET:
190 	{
191 		const struct sockaddr_in *a = (const struct sockaddr_in *)dst;
192 		nh = fib4_lookup(fibnum, a->sin_addr, 0, flags, flowid);
193 		break;
194 	}
195 #endif
196 #ifdef INET6
197 	case AF_INET6:
198 	{
199 		const struct sockaddr_in6 *a = (const struct sockaddr_in6*)dst;
200 		nh = fib6_lookup(fibnum, &a->sin6_addr, a->sin6_scope_id,
201 		    flags, flowid);
202 		break;
203 	}
204 #endif
205 	}
206 
207 	return (nh);
208 }
209 
210 #ifdef ROUTE_MPATH
211 static void
212 decompose_change_notification(struct rib_cmd_info *rc, route_notification_t *cb,
213     void *cbdata)
214 {
215 	uint32_t num_old, num_new;
216 	uint32_t nh_idx_old, nh_idx_new;
217 	struct weightened_nhop *wn_old, *wn_new;
218 	struct weightened_nhop tmp = { NULL, 0 };
219 	uint32_t idx_old = 0, idx_new = 0;
220 
221 	struct rib_cmd_info rc_del = { .rc_cmd = RTM_DELETE, .rc_rt = rc->rc_rt };
222 	struct rib_cmd_info rc_add = { .rc_cmd = RTM_ADD, .rc_rt = rc->rc_rt };
223 
224 	if (NH_IS_NHGRP(rc->rc_nh_old)) {
225 		wn_old = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_old, &num_old);
226 	} else {
227 		tmp.nh = rc->rc_nh_old;
228 		tmp.weight = rc->rc_nh_weight;
229 		wn_old = &tmp;
230 		num_old = 1;
231 	}
232 	if (NH_IS_NHGRP(rc->rc_nh_new)) {
233 		wn_new = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_new, &num_new);
234 	} else {
235 		tmp.nh = rc->rc_nh_new;
236 		tmp.weight = rc->rc_nh_weight;
237 		wn_new = &tmp;
238 		num_new = 1;
239 	}
240 
241 	/* Use the fact that each @wn array is sorted */
242 	/*
243 	 * Want to convert into set of add and delete operations
244 	 * [1] -> [1, 2] = A{2}
245 	 * [2] -> [1, 2] = A{1}
246 	 * [1, 2, 4]->[1, 3, 4] = A{2}, D{3}
247 	 * [1, 2, 4]->[1, 4] = D{2}
248 	 * [1, 2, 4] -> [3, 4] = D{1}, C{2,3} OR C{1,3}, D{2} OR D{1},D{2},A{3}
249 	 * [1, 2] -> [3, 4] =
250 	 *
251 	 */
252 	idx_old = 0;
253 	while ((idx_old < num_old) && (idx_new < num_new)) {
254 		nh_idx_old = wn_old[idx_old].nh->nh_priv->nh_idx;
255 		nh_idx_new = wn_new[idx_new].nh->nh_priv->nh_idx;
256 
257 		if (nh_idx_old == nh_idx_new) {
258 			if (wn_old[idx_old].weight != wn_new[idx_new].weight) {
259 				/* Update weight by providing del/add notifications */
260 				rc_del.rc_nh_old = wn_old[idx_old].nh;
261 				rc_del.rc_nh_weight = wn_old[idx_old].weight;
262 				cb(&rc_del, cbdata);
263 
264 				rc_add.rc_nh_new = wn_new[idx_new].nh;
265 				rc_add.rc_nh_weight = wn_new[idx_new].weight;
266 				cb(&rc_add, cbdata);
267 			}
268 			idx_old++;
269 			idx_new++;
270 		} else if (nh_idx_old < nh_idx_new) {
271 			/*
272 			 * [1, ~2~, 4], [1, ~3~, 4]
273 			 * [1, ~2~, 5], [1, ~3~, 4]
274 			 * [1, ~2~], [1, ~3~, 4]
275 			 */
276 			if ((idx_old + 1 >= num_old) ||
277 			    (wn_old[idx_old + 1].nh->nh_priv->nh_idx > nh_idx_new)) {
278 				/* Add new unless the next old item is still <= new */
279 				rc_add.rc_nh_new = wn_new[idx_new].nh;
280 				rc_add.rc_nh_weight = wn_new[idx_new].weight;
281 				cb(&rc_add, cbdata);
282 				idx_new++;
283 			}
284 			/* In any case, delete current old */
285 			rc_del.rc_nh_old = wn_old[idx_old].nh;
286 			rc_del.rc_nh_weight = wn_old[idx_old].weight;
287 			cb(&rc_del, cbdata);
288 			idx_old++;
289 		} else {
290 			/*
291 			 * nh_idx_old > nh_idx_new
292 			 *
293 			 * [1, ~3~, 4], [1, ~2~, 4]
294 			 * [1, ~3~, 5], [1, ~2~, 4]
295 			 * [1, ~3~, 4], [1, ~2~]
296 			 */
297 			if ((idx_new + 1 >= num_new) ||
298 			    (wn_new[idx_new + 1].nh->nh_priv->nh_idx > nh_idx_old)) {
299 				/* No next item or next item is > current one */
300 				rc_add.rc_nh_new = wn_new[idx_new].nh;
301 				rc_add.rc_nh_weight = wn_new[idx_new].weight;
302 				cb(&rc_add, cbdata);
303 				idx_new++;
304 			}
305 			/* In any case, delete current old */
306 			rc_del.rc_nh_old = wn_old[idx_old].nh;
307 			rc_del.rc_nh_weight = wn_old[idx_old].weight;
308 			cb(&rc_del, cbdata);
309 			idx_old++;
310 		}
311 	}
312 
313 	while (idx_old < num_old) {
314 		rc_del.rc_nh_old = wn_old[idx_old].nh;
315 		rc_del.rc_nh_weight = wn_old[idx_old].weight;
316 		cb(&rc_del, cbdata);
317 		idx_old++;
318 	}
319 
320 	while (idx_new < num_new) {
321 		rc_add.rc_nh_new = wn_new[idx_new].nh;
322 		rc_add.rc_nh_weight = wn_new[idx_new].weight;
323 		cb(&rc_add, cbdata);
324 		idx_new++;
325 	}
326 }
327 
328 /*
329  * Decompose multipath cmd info @rc into a list of add/del/change
330  *  single-path operations, calling @cb callback for each operation.
331  * Assumes at least one of the nexthops in @rc is multipath.
332  */
333 void
334 rib_decompose_notification(struct rib_cmd_info *rc, route_notification_t *cb,
335     void *cbdata)
336 {
337 	struct weightened_nhop *wn;
338 	uint32_t num_nhops;
339 	struct rib_cmd_info rc_new;
340 
341 	rc_new = *rc;
342 	DPRINTF("cb=%p cmd=%d nh_old=%p nh_new=%p",
343 	    cb, rc->cmd, rc->nh_old, rc->nh_new);
344 	switch (rc->rc_cmd) {
345 	case RTM_ADD:
346 		if (!NH_IS_NHGRP(rc->rc_nh_new))
347 			return;
348 		wn = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_new, &num_nhops);
349 		for (uint32_t i = 0; i < num_nhops; i++) {
350 			rc_new.rc_nh_new = wn[i].nh;
351 			rc_new.rc_nh_weight = wn[i].weight;
352 			cb(&rc_new, cbdata);
353 		}
354 		break;
355 	case RTM_DELETE:
356 		if (!NH_IS_NHGRP(rc->rc_nh_old))
357 			return;
358 		wn = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_old, &num_nhops);
359 		for (uint32_t i = 0; i < num_nhops; i++) {
360 			rc_new.rc_nh_old = wn[i].nh;
361 			rc_new.rc_nh_weight = wn[i].weight;
362 			cb(&rc_new, cbdata);
363 		}
364 		break;
365 	case RTM_CHANGE:
366 		if (!NH_IS_NHGRP(rc->rc_nh_old) && !NH_IS_NHGRP(rc->rc_nh_new))
367 			return;
368 		decompose_change_notification(rc, cb, cbdata);
369 		break;
370 	}
371 }
372 #endif
373