xref: /freebsd/sys/net/route/route_ctl.c (revision eac7052fdebb90caf2f653e06187bdbca837b9c7)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet.h"
31 #include "opt_inet6.h"
32 #include "opt_mpath.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/mbuf.h>
38 #include <sys/socket.h>
39 #include <sys/sysctl.h>
40 #include <sys/syslog.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/rmlock.h>
44 
45 #include <net/if.h>
46 #include <net/if_var.h>
47 #include <net/if_dl.h>
48 #include <net/vnet.h>
49 #include <net/route.h>
50 #include <net/route/route_ctl.h>
51 #include <net/route/route_var.h>
52 #include <net/route/nhop_utils.h>
53 #include <net/route/nhop.h>
54 #include <net/route/nhop_var.h>
55 #include <netinet/in.h>
56 
57 #ifdef RADIX_MPATH
58 #include <net/radix_mpath.h>
59 #endif
60 
61 #include <vm/uma.h>
62 
63 /*
64  * This file contains control plane routing tables functions.
65  *
66  * All functions assumes they are called in net epoch.
67  */
68 
69 struct rib_subscription {
70 	CK_STAILQ_ENTRY(rib_subscription)	next;
71 	rib_subscription_cb_t			*func;
72 	void					*arg;
73 	enum rib_subscription_type		type;
74 	struct epoch_context			epoch_ctx;
75 };
76 
77 static int add_route(struct rib_head *rnh, struct rt_addrinfo *info,
78     struct rib_cmd_info *rc);
79 static int add_route_nhop(struct rib_head *rnh, struct rtentry *rt,
80     struct rt_addrinfo *info, struct route_nhop_data *rnd,
81     struct rib_cmd_info *rc);
82 static int del_route(struct rib_head *rnh, struct rt_addrinfo *info,
83     struct rib_cmd_info *rc);
84 static int change_route(struct rib_head *rnh, struct rt_addrinfo *info,
85     struct route_nhop_data *nhd_orig, struct rib_cmd_info *rc);
86 static int change_route_nhop(struct rib_head *rnh, struct rtentry *rt,
87     struct rt_addrinfo *info, struct route_nhop_data *rnd,
88     struct rib_cmd_info *rc);
89 static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type,
90     struct rib_cmd_info *rc);
91 
92 static void destroy_subscription_epoch(epoch_context_t ctx);
93 
94 /* Routing table UMA zone */
95 VNET_DEFINE_STATIC(uma_zone_t, rtzone);
96 #define	V_rtzone	VNET(rtzone)
97 
98 void
99 vnet_rtzone_init()
100 {
101 
102 	V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry),
103 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
104 }
105 
106 #ifdef VIMAGE
107 void
108 vnet_rtzone_destroy()
109 {
110 
111 	uma_zdestroy(V_rtzone);
112 }
113 #endif
114 
115 static void
116 destroy_rtentry(struct rtentry *rt)
117 {
118 
119 	/*
120 	 * At this moment rnh, nh_control may be already freed.
121 	 * nhop interface may have been migrated to a different vnet.
122 	 * Use vnet stored in the nexthop to delete the entry.
123 	 */
124 	CURVNET_SET(nhop_get_vnet(rt->rt_nhop));
125 
126 	/* Unreference nexthop */
127 	nhop_free(rt->rt_nhop);
128 
129 	uma_zfree(V_rtzone, rt);
130 
131 	CURVNET_RESTORE();
132 }
133 
134 /*
135  * Epoch callback indicating rtentry is safe to destroy
136  */
137 static void
138 destroy_rtentry_epoch(epoch_context_t ctx)
139 {
140 	struct rtentry *rt;
141 
142 	rt = __containerof(ctx, struct rtentry, rt_epoch_ctx);
143 
144 	destroy_rtentry(rt);
145 }
146 
147 /*
148  * Schedule rtentry deletion
149  */
150 static void
151 rtfree(struct rtentry *rt)
152 {
153 
154 	KASSERT(rt != NULL, ("%s: NULL rt", __func__));
155 
156 	epoch_call(net_epoch_preempt, destroy_rtentry_epoch,
157 	    &rt->rt_epoch_ctx);
158 }
159 
160 static struct rib_head *
161 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info)
162 {
163 	struct rib_head *rnh;
164 	struct sockaddr *dst;
165 
166 	KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum"));
167 
168 	dst = info->rti_info[RTAX_DST];
169 	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
170 
171 	return (rnh);
172 }
173 
174 /*
175  * Adds route defined by @info into the kernel table specified by @fibnum and
176  * sa_family in @info->rti_info[RTAX_DST].
177  *
178  * Returns 0 on success and fills in operation metadata into @rc.
179  */
180 int
181 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info,
182     struct rib_cmd_info *rc)
183 {
184 	struct rib_head *rnh;
185 
186 	NET_EPOCH_ASSERT();
187 
188 	rnh = get_rnh(fibnum, info);
189 	if (rnh == NULL)
190 		return (EAFNOSUPPORT);
191 
192 	/*
193 	 * Check consistency between RTF_HOST flag and netmask
194 	 * existence.
195 	 */
196 	if (info->rti_flags & RTF_HOST)
197 		info->rti_info[RTAX_NETMASK] = NULL;
198 	else if (info->rti_info[RTAX_NETMASK] == NULL)
199 		return (EINVAL);
200 
201 	bzero(rc, sizeof(struct rib_cmd_info));
202 	rc->rc_cmd = RTM_ADD;
203 
204 	return (add_route(rnh, info, rc));
205 }
206 
207 /*
208  * Creates rtentry and nexthop based on @info data.
209  * Return 0 and fills in rtentry into @prt on success,
210  * return errno otherwise.
211  */
212 static int
213 create_rtentry(struct rib_head *rnh, struct rt_addrinfo *info,
214     struct rtentry **prt)
215 {
216 	struct sockaddr *dst, *ndst, *gateway, *netmask;
217 	struct rtentry *rt;
218 	struct nhop_object *nh;
219 	struct ifaddr *ifa;
220 	int error, flags;
221 
222 	dst = info->rti_info[RTAX_DST];
223 	gateway = info->rti_info[RTAX_GATEWAY];
224 	netmask = info->rti_info[RTAX_NETMASK];
225 	flags = info->rti_flags;
226 
227 	if ((flags & RTF_GATEWAY) && !gateway)
228 		return (EINVAL);
229 	if (dst && gateway && (dst->sa_family != gateway->sa_family) &&
230 	    (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK))
231 		return (EINVAL);
232 
233 	if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb))
234 		return (EINVAL);
235 
236 	if (info->rti_ifa == NULL) {
237 		error = rt_getifa_fib(info, rnh->rib_fibnum);
238 		if (error)
239 			return (error);
240 	} else {
241 		ifa_ref(info->rti_ifa);
242 	}
243 
244 	error = nhop_create_from_info(rnh, info, &nh);
245 	if (error != 0) {
246 		ifa_free(info->rti_ifa);
247 		return (error);
248 	}
249 
250 	rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO);
251 	if (rt == NULL) {
252 		ifa_free(info->rti_ifa);
253 		nhop_free(nh);
254 		return (ENOBUFS);
255 	}
256 	rt->rte_flags = RTF_UP | flags;
257 	rt->rt_nhop = nh;
258 
259 	/* Fill in dst */
260 	memcpy(&rt->rt_dst, dst, dst->sa_len);
261 	rt_key(rt) = &rt->rt_dst;
262 
263 	/*
264 	 * point to the (possibly newly malloc'd) dest address.
265 	 */
266 	ndst = (struct sockaddr *)rt_key(rt);
267 
268 	/*
269 	 * make sure it contains the value we want (masked if needed).
270 	 */
271 	if (netmask) {
272 		rt_maskedcopy(dst, ndst, netmask);
273 	} else
274 		bcopy(dst, ndst, dst->sa_len);
275 
276 	/*
277 	 * We use the ifa reference returned by rt_getifa_fib().
278 	 * This moved from below so that rnh->rnh_addaddr() can
279 	 * examine the ifa and  ifa->ifa_ifp if it so desires.
280 	 */
281 	ifa = info->rti_ifa;
282 	rt->rt_weight = 1;
283 
284 	rt_setmetrics(info, rt);
285 
286 	*prt = rt;
287 	return (0);
288 }
289 
290 static int
291 add_route(struct rib_head *rnh, struct rt_addrinfo *info,
292     struct rib_cmd_info *rc)
293 {
294 	struct sockaddr *ndst, *netmask;
295 	struct route_nhop_data rnd;
296 	struct nhop_object *nh;
297 	struct rtentry *rt;
298 	int error;
299 
300 	error = create_rtentry(rnh, info, &rt);
301 	if (error != 0)
302 		return (error);
303 
304 	rnd.rnd_nhop = rt->rt_nhop;
305 	rnd.rnd_weight = rt->rt_weight;
306 	nh = rt->rt_nhop;
307 
308 	RIB_WLOCK(rnh);
309 #ifdef RADIX_MPATH
310 	netmask = info->rti_info[RTAX_NETMASK];
311 	/* do not permit exactly the same dst/mask/gw pair */
312 	if (rt_mpath_capable(rnh) &&
313 		rt_mpath_conflict(rnh, rt, netmask)) {
314 		RIB_WUNLOCK(rnh);
315 
316 		nhop_free(nh);
317 		uma_zfree(V_rtzone, rt);
318 		return (EEXIST);
319 	}
320 #endif
321 	error = add_route_nhop(rnh, rt, info, &rnd, rc);
322 	if (error == 0) {
323 		rt = NULL;
324 		nh = NULL;
325 	} else if ((error == EEXIST) && ((info->rti_flags & RTF_PINNED) != 0)) {
326 		struct rtentry *rt_orig;
327 		struct nhop_object *nh_orig;
328 		struct radix_node *rn;
329 
330 		ndst = (struct sockaddr *)rt_key(rt);
331 		netmask = info->rti_info[RTAX_NETMASK];
332 		rn = rnh->rnh_lookup(ndst, netmask, &rnh->head);
333 		rt_orig = (struct rtentry *)rn;
334 		if (rt_orig != NULL) {
335 			nh_orig = rt_orig->rt_nhop;
336 			if ((nhop_get_rtflags(nh_orig) & RTF_PINNED) == 0) {
337 				/* Current nexhop is not PINNED, can update */
338 				error = change_route_nhop(rnh, rt_orig,
339 				    info, &rnd, rc);
340 				if (error == 0)
341 					nh = NULL;
342 			}
343 		} else
344 			error = ENOBUFS;
345 	}
346 	RIB_WUNLOCK(rnh);
347 
348 	if (error == 0)
349 		rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
350 
351 	if (nh != NULL)
352 		nhop_free(nh);
353 	if (rt != NULL)
354 		uma_zfree(V_rtzone, rt);
355 
356 	return (error);
357 }
358 
359 /*
360  * Removes route defined by @info from the kernel table specified by @fibnum and
361  * sa_family in @info->rti_info[RTAX_DST].
362  *
363  * Returns 0 on success and fills in operation metadata into @rc.
364  */
365 int
366 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc)
367 {
368 	struct rib_head *rnh;
369 
370 	NET_EPOCH_ASSERT();
371 
372 	rnh = get_rnh(fibnum, info);
373 	if (rnh == NULL)
374 		return (EAFNOSUPPORT);
375 
376 	bzero(rc, sizeof(struct rib_cmd_info));
377 	rc->rc_cmd = RTM_DELETE;
378 
379 	return (del_route(rnh, info, rc));
380 }
381 
382 /*
383  * Conditionally unlinks rtentry matching data inside @info from @rnh.
384  * Returns unlinked, locked and referenced @rtentry on success,
385  * Returns NULL and sets @perror to:
386  * ESRCH - if prefix was not found,
387  * EADDRINUSE - if trying to delete PINNED route without appropriate flag.
388  * ENOENT - if supplied filter function returned 0 (not matched).
389  */
390 struct rtentry *
391 rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror)
392 {
393 	struct sockaddr *dst, *netmask;
394 	struct rtentry *rt;
395 	struct nhop_object *nh;
396 	struct radix_node *rn;
397 
398 	dst = info->rti_info[RTAX_DST];
399 	netmask = info->rti_info[RTAX_NETMASK];
400 
401 	rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head);
402 	if (rt == NULL) {
403 		*perror = ESRCH;
404 		return (NULL);
405 	}
406 
407 	nh = rt->rt_nhop;
408 
409 	if ((info->rti_flags & RTF_PINNED) == 0) {
410 		/* Check if target route can be deleted */
411 		if (NH_IS_PINNED(nh)) {
412 			*perror = EADDRINUSE;
413 			return (NULL);
414 		}
415 	}
416 
417 	if (info->rti_filter != NULL) {
418 		if (info->rti_filter(rt, nh, info->rti_filterdata)==0){
419 			/* Not matched */
420 			*perror = ENOENT;
421 			return (NULL);
422 		}
423 
424 		/*
425 		 * Filter function requested rte deletion.
426 		 * Ease the caller work by filling in remaining info
427 		 * from that particular entry.
428 		 */
429 		info->rti_info[RTAX_GATEWAY] = &nh->gw_sa;
430 	}
431 
432 	/*
433 	 * Remove the item from the tree and return it.
434 	 * Complain if it is not there and do no more processing.
435 	 */
436 	*perror = ESRCH;
437 #ifdef RADIX_MPATH
438 	if (rt_mpath_capable(rnh))
439 		rn = rt_mpath_unlink(rnh, info, rt, perror);
440 	else
441 #endif
442 	rn = rnh->rnh_deladdr(dst, netmask, &rnh->head);
443 	if (rn == NULL)
444 		return (NULL);
445 
446 	if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
447 		panic ("rtrequest delete");
448 
449 	rt = RNTORT(rn);
450 	rt->rte_flags &= ~RTF_UP;
451 
452 	*perror = 0;
453 
454 	return (rt);
455 }
456 
457 static int
458 del_route(struct rib_head *rnh, struct rt_addrinfo *info,
459     struct rib_cmd_info *rc)
460 {
461 	struct sockaddr *dst, *netmask;
462 	struct sockaddr_storage mdst;
463 	struct rtentry *rt;
464 	int error;
465 
466 	dst = info->rti_info[RTAX_DST];
467 	netmask = info->rti_info[RTAX_NETMASK];
468 
469 	if (netmask) {
470 		if (dst->sa_len > sizeof(mdst))
471 			return (EINVAL);
472 		rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
473 		dst = (struct sockaddr *)&mdst;
474 	}
475 
476 	RIB_WLOCK(rnh);
477 	rt = rt_unlinkrte(rnh, info, &error);
478 	if (rt != NULL) {
479 		/* Finalize notification */
480 		rnh->rnh_gen++;
481 		rc->rc_rt = rt;
482 		rc->rc_nh_old = rt->rt_nhop;
483 		rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
484 	}
485 	RIB_WUNLOCK(rnh);
486 	if (error != 0)
487 		return (error);
488 
489 	rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
490 
491 	/*
492 	 * If the caller wants it, then it can have it,
493 	 * the entry will be deleted after the end of the current epoch.
494 	 */
495 	rtfree(rt);
496 
497 	return (0);
498 }
499 
500 int
501 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
502     struct rib_cmd_info *rc)
503 {
504 	RIB_RLOCK_TRACKER;
505 	struct route_nhop_data rnd_orig;
506 	struct rib_head *rnh;
507 	struct rtentry *rt;
508 	int error;
509 
510 	NET_EPOCH_ASSERT();
511 
512 	rnh = get_rnh(fibnum, info);
513 	if (rnh == NULL)
514 		return (EAFNOSUPPORT);
515 
516 	bzero(rc, sizeof(struct rib_cmd_info));
517 	rc->rc_cmd = RTM_CHANGE;
518 
519 	/* Check if updated gateway exists */
520 	if ((info->rti_flags & RTF_GATEWAY) &&
521 	    (info->rti_info[RTAX_GATEWAY] == NULL))
522 		return (EINVAL);
523 
524 	/*
525 	 * route change is done in multiple steps, with dropping and
526 	 * reacquiring lock. In the situations with multiple processes
527 	 * changes the same route in can lead to the case when route
528 	 * is changed between the steps. Address it by retrying the operation
529 	 * multiple times before failing.
530 	 */
531 
532 	RIB_RLOCK(rnh);
533 	rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
534 	    info->rti_info[RTAX_NETMASK], &rnh->head);
535 
536 	if (rt == NULL) {
537 		RIB_RUNLOCK(rnh);
538 		return (ESRCH);
539 	}
540 
541 #ifdef RADIX_MPATH
542 	/*
543 	 * If we got multipath routes,
544 	 * we require users to specify a matching RTAX_GATEWAY.
545 	 */
546 	if (rt_mpath_capable(rnh)) {
547 		rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]);
548 		if (rt == NULL) {
549 			RIB_RUNLOCK(rnh);
550 			return (ESRCH);
551 		}
552 	}
553 #endif
554 	rnd_orig.rnd_nhop = rt->rt_nhop;
555 	rnd_orig.rnd_weight = rt->rt_weight;
556 
557 	RIB_RUNLOCK(rnh);
558 
559 	for (int i = 0; i < RIB_MAX_RETRIES; i++) {
560 		error = change_route(rnh, info, &rnd_orig, rc);
561 		if (error != EAGAIN)
562 			break;
563 	}
564 
565 	return (error);
566 }
567 
568 static int
569 change_route(struct rib_head *rnh, struct rt_addrinfo *info,
570     struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc)
571 {
572 	int error = 0;
573 	int free_ifa = 0;
574 	struct nhop_object *nh, *nh_orig;
575 	struct route_nhop_data rnd_new;
576 
577 	nh = NULL;
578 	nh_orig = rnd_orig->rnd_nhop;
579 	if (nh_orig == NULL)
580 		return (ESRCH);
581 
582 	/*
583 	 * New gateway could require new ifaddr, ifp;
584 	 * flags may also be different; ifp may be specified
585 	 * by ll sockaddr when protocol address is ambiguous
586 	 */
587 	if (((nh_orig->nh_flags & NHF_GATEWAY) &&
588 	    info->rti_info[RTAX_GATEWAY] != NULL) ||
589 	    info->rti_info[RTAX_IFP] != NULL ||
590 	    (info->rti_info[RTAX_IFA] != NULL &&
591 	     !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) {
592 		error = rt_getifa_fib(info, rnh->rib_fibnum);
593 		if (info->rti_ifa != NULL)
594 			free_ifa = 1;
595 
596 		if (error != 0) {
597 			if (free_ifa) {
598 				ifa_free(info->rti_ifa);
599 				info->rti_ifa = NULL;
600 			}
601 
602 			return (error);
603 		}
604 	}
605 
606 	error = nhop_create_from_nhop(rnh, nh_orig, info, &nh);
607 	if (free_ifa) {
608 		ifa_free(info->rti_ifa);
609 		info->rti_ifa = NULL;
610 	}
611 	if (error != 0)
612 		return (error);
613 
614 	rnd_new.rnd_nhop = nh;
615 	if (info->rti_mflags & RTV_WEIGHT)
616 		rnd_new.rnd_weight = info->rti_rmx->rmx_weight;
617 	else
618 		rnd_new.rnd_weight = rnd_orig->rnd_weight;
619 
620 	error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc);
621 
622 	return (error);
623 }
624 
625 /*
626  * Insert @rt with nhop data from @rnd_new to @rnh.
627  * Returns 0 on success.
628  */
629 static int
630 add_route_nhop(struct rib_head *rnh, struct rtentry *rt,
631     struct rt_addrinfo *info, struct route_nhop_data *rnd,
632     struct rib_cmd_info *rc)
633 {
634 	struct sockaddr *ndst, *netmask;
635 	struct radix_node *rn;
636 	int error = 0;
637 
638 	RIB_WLOCK_ASSERT(rnh);
639 
640 	ndst = (struct sockaddr *)rt_key(rt);
641 	netmask = info->rti_info[RTAX_NETMASK];
642 
643 	rt->rt_nhop = rnd->rnd_nhop;
644 	rt->rt_weight = rnd->rnd_weight;
645 	rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes);
646 
647 	if (rn != NULL) {
648 		if (rt->rt_expire > 0)
649 			tmproutes_update(rnh, rt);
650 
651 		/* Finalize notification */
652 		rnh->rnh_gen++;
653 
654 		rc->rc_cmd = RTM_ADD;
655 		rc->rc_rt = rt;
656 		rc->rc_nh_old = NULL;
657 		rc->rc_nh_new = rnd->rnd_nhop;
658 		rc->rc_nh_weight = rnd->rnd_weight;
659 
660 		rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
661 	} else {
662 		/* Existing route or memory allocation failure */
663 		error = EEXIST;
664 	}
665 
666 	return (error);
667 }
668 
669 /*
670  * Switch @rt nhop/weigh to the ones specified in @rnd.
671  *  Conditionally set rt_expire if set in @info.
672  * Returns 0 on success.
673  */
674 static int
675 change_route_nhop(struct rib_head *rnh, struct rtentry *rt,
676     struct rt_addrinfo *info, struct route_nhop_data *rnd,
677     struct rib_cmd_info *rc)
678 {
679 	struct nhop_object *nh_orig;
680 
681 	RIB_WLOCK_ASSERT(rnh);
682 
683 	nh_orig = rt->rt_nhop;
684 
685 	if (rnd->rnd_nhop != NULL) {
686 		/* Changing expiration & nexthop & weight to a new one */
687 		rt_setmetrics(info, rt);
688 		rt->rt_nhop = rnd->rnd_nhop;
689 		rt->rt_weight = rnd->rnd_weight;
690 		if (rt->rt_expire > 0)
691 			tmproutes_update(rnh, rt);
692 	} else {
693 		/* Route deletion requested. */
694 		struct sockaddr *ndst, *netmask;
695 		struct radix_node *rn;
696 
697 		ndst = (struct sockaddr *)rt_key(rt);
698 		netmask = info->rti_info[RTAX_NETMASK];
699 		rn = rnh->rnh_deladdr(ndst, netmask, &rnh->head);
700 		if (rn == NULL)
701 			return (ESRCH);
702 	}
703 
704 	/* Finalize notification */
705 	rnh->rnh_gen++;
706 
707 	rc->rc_cmd = (rnd->rnd_nhop != NULL) ? RTM_CHANGE : RTM_DELETE;
708 	rc->rc_rt = rt;
709 	rc->rc_nh_old = nh_orig;
710 	rc->rc_nh_new = rnd->rnd_nhop;
711 	rc->rc_nh_weight = rnd->rnd_weight;
712 
713 	rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
714 
715 	return (0);
716 }
717 
718 /*
719  * Conditionally update route nhop/weight IFF data in @nhd_orig is
720  *  consistent with the current route data.
721  * Nexthop in @nhd_new is consumed.
722  */
723 int
724 change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
725     struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
726     struct route_nhop_data *rnd_new, struct rib_cmd_info *rc)
727 {
728 	struct rtentry *rt_new;
729 	int error = 0;
730 
731 	RIB_WLOCK(rnh);
732 
733 	rt_new = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
734 	    info->rti_info[RTAX_NETMASK], &rnh->head);
735 
736 	if (rt_new == NULL) {
737 		if (rnd_orig->rnd_nhop == NULL)
738 			error = add_route_nhop(rnh, rt, info, rnd_new, rc);
739 		else {
740 			/*
741 			 * Prefix does not exist, which was not our assumption.
742 			 * Update @rnd_orig with the new data and return
743 			 */
744 			rnd_orig->rnd_nhop = NULL;
745 			rnd_orig->rnd_weight = 0;
746 			error = EAGAIN;
747 		}
748 	} else {
749 		/* Prefix exists, try to update */
750 		if (rnd_orig->rnd_nhop == rt_new->rt_nhop) {
751 			/*
752 			 * Nhop/mpath group hasn't changed. Flip
753 			 * to the new precalculated one and return
754 			 */
755 			error = change_route_nhop(rnh, rt_new, info, rnd_new, rc);
756 		} else {
757 			/* Update and retry */
758 			rnd_orig->rnd_nhop = rt_new->rt_nhop;
759 			rnd_orig->rnd_weight = rt_new->rt_weight;
760 			error = EAGAIN;
761 		}
762 	}
763 
764 	RIB_WUNLOCK(rnh);
765 
766 	if (error == 0) {
767 		rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
768 
769 		if (rnd_orig->rnd_nhop != NULL)
770 			nhop_free_any(rnd_orig->rnd_nhop);
771 
772 	} else {
773 		if (rnd_new->rnd_nhop != NULL)
774 			nhop_free_any(rnd_new->rnd_nhop);
775 	}
776 
777 	return (error);
778 }
779 
780 /*
781  * Performs modification of routing table specificed by @action.
782  * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST].
783  * Needs to be run in network epoch.
784  *
785  * Returns 0 on success and fills in @rc with action result.
786  */
787 int
788 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info,
789     struct rib_cmd_info *rc)
790 {
791 	int error;
792 
793 	switch (action) {
794 	case RTM_ADD:
795 		error = rib_add_route(fibnum, info, rc);
796 		break;
797 	case RTM_DELETE:
798 		error = rib_del_route(fibnum, info, rc);
799 		break;
800 	case RTM_CHANGE:
801 		error = rib_change_route(fibnum, info, rc);
802 		break;
803 	default:
804 		error = ENOTSUP;
805 	}
806 
807 	return (error);
808 }
809 
810 struct rt_delinfo
811 {
812 	struct rt_addrinfo info;
813 	struct rib_head *rnh;
814 	struct rtentry *head;
815 	struct rib_cmd_info rc;
816 };
817 
818 /*
819  * Conditionally unlinks @rn from radix tree based
820  * on info data passed in @arg.
821  */
822 static int
823 rt_checkdelroute(struct radix_node *rn, void *arg)
824 {
825 	struct rt_delinfo *di;
826 	struct rt_addrinfo *info;
827 	struct rtentry *rt;
828 	int error;
829 
830 	di = (struct rt_delinfo *)arg;
831 	rt = (struct rtentry *)rn;
832 	info = &di->info;
833 	error = 0;
834 
835 	info->rti_info[RTAX_DST] = rt_key(rt);
836 	info->rti_info[RTAX_NETMASK] = rt_mask(rt);
837 	info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa;
838 
839 	rt = rt_unlinkrte(di->rnh, info, &error);
840 	if (rt == NULL) {
841 		/* Either not allowed or not matched. Skip entry */
842 		return (0);
843 	}
844 
845 	/* Entry was unlinked. Notify subscribers */
846 	di->rnh->rnh_gen++;
847 	di->rc.rc_rt = rt;
848 	di->rc.rc_nh_old = rt->rt_nhop;
849 	rib_notify(di->rnh, RIB_NOTIFY_IMMEDIATE, &di->rc);
850 
851 	/* Add to the list and return */
852 	rt->rt_chain = di->head;
853 	di->head = rt;
854 
855 	return (0);
856 }
857 
858 /*
859  * Iterates over a routing table specified by @fibnum and @family and
860  *  deletes elements marked by @filter_f.
861  * @fibnum: rtable id
862  * @family: AF_ address family
863  * @filter_f: function returning non-zero value for items to delete
864  * @arg: data to pass to the @filter_f function
865  * @report: true if rtsock notification is needed.
866  */
867 void
868 rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f, void *arg, bool report)
869 {
870 	struct rib_head *rnh;
871 	struct rt_delinfo di;
872 	struct rtentry *rt;
873 	struct epoch_tracker et;
874 
875 	rnh = rt_tables_get_rnh(fibnum, family);
876 	if (rnh == NULL)
877 		return;
878 
879 	bzero(&di, sizeof(di));
880 	di.info.rti_filter = filter_f;
881 	di.info.rti_filterdata = arg;
882 	di.rnh = rnh;
883 	di.rc.rc_cmd = RTM_DELETE;
884 
885 	NET_EPOCH_ENTER(et);
886 
887 	RIB_WLOCK(rnh);
888 	rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
889 	RIB_WUNLOCK(rnh);
890 
891 	/* We might have something to reclaim. */
892 	while (di.head != NULL) {
893 		rt = di.head;
894 		di.head = rt->rt_chain;
895 		rt->rt_chain = NULL;
896 
897 		di.rc.rc_rt = rt;
898 		di.rc.rc_nh_old = rt->rt_nhop;
899 		rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc);
900 
901 		/* TODO std rt -> rt_addrinfo export */
902 		di.info.rti_info[RTAX_DST] = rt_key(rt);
903 		di.info.rti_info[RTAX_NETMASK] = rt_mask(rt);
904 
905 		if (report)
906 			rt_routemsg(RTM_DELETE, rt, rt->rt_nhop->nh_ifp, 0,
907 			    fibnum);
908 		rtfree(rt);
909 	}
910 
911 	NET_EPOCH_EXIT(et);
912 }
913 
914 static void
915 rib_notify(struct rib_head *rnh, enum rib_subscription_type type,
916     struct rib_cmd_info *rc)
917 {
918 	struct rib_subscription *rs;
919 
920 	CK_STAILQ_FOREACH(rs, &rnh->rnh_subscribers, next) {
921 		if (rs->type == type)
922 			rs->func(rnh, rc, rs->arg);
923 	}
924 }
925 
926 static struct rib_subscription *
927 allocate_subscription(rib_subscription_cb_t *f, void *arg,
928     enum rib_subscription_type type, bool waitok)
929 {
930 	struct rib_subscription *rs;
931 	int flags = M_ZERO | (waitok ? M_WAITOK : 0);
932 
933 	rs = malloc(sizeof(struct rib_subscription), M_RTABLE, flags);
934 	if (rs == NULL)
935 		return (NULL);
936 
937 	rs->func = f;
938 	rs->arg = arg;
939 	rs->type = type;
940 
941 	return (rs);
942 }
943 
944 /*
945  * Subscribe for the changes in the routing table specified by @fibnum and
946  *  @family.
947  *
948  * Returns pointer to the subscription structure on success.
949  */
950 struct rib_subscription *
951 rib_subscribe(uint32_t fibnum, int family, rib_subscription_cb_t *f, void *arg,
952     enum rib_subscription_type type, bool waitok)
953 {
954 	struct rib_head *rnh;
955 	struct rib_subscription *rs;
956 	struct epoch_tracker et;
957 
958 	if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL)
959 		return (NULL);
960 
961 	NET_EPOCH_ENTER(et);
962 	KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__));
963 	rnh = rt_tables_get_rnh(fibnum, family);
964 
965 	RIB_WLOCK(rnh);
966 	CK_STAILQ_INSERT_TAIL(&rnh->rnh_subscribers, rs, next);
967 	RIB_WUNLOCK(rnh);
968 	NET_EPOCH_EXIT(et);
969 
970 	return (rs);
971 }
972 
973 struct rib_subscription *
974 rib_subscribe_internal(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg,
975     enum rib_subscription_type type, bool waitok)
976 {
977 	struct rib_subscription *rs;
978 	struct epoch_tracker et;
979 
980 	if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL)
981 		return (NULL);
982 
983 	NET_EPOCH_ENTER(et);
984 	RIB_WLOCK(rnh);
985 	CK_STAILQ_INSERT_TAIL(&rnh->rnh_subscribers, rs, next);
986 	RIB_WUNLOCK(rnh);
987 	NET_EPOCH_EXIT(et);
988 
989 	return (rs);
990 }
991 
992 /*
993  * Remove rtable subscription @rs from the table specified by @fibnum
994  *  and @family.
995  * Needs to be run in network epoch.
996  *
997  * Returns 0 on success.
998  */
999 int
1000 rib_unsibscribe(uint32_t fibnum, int family, struct rib_subscription *rs)
1001 {
1002 	struct rib_head *rnh;
1003 
1004 	NET_EPOCH_ASSERT();
1005 	KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__));
1006 	rnh = rt_tables_get_rnh(fibnum, family);
1007 
1008 	if (rnh == NULL)
1009 		return (ENOENT);
1010 
1011 	RIB_WLOCK(rnh);
1012 	CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next);
1013 	RIB_WUNLOCK(rnh);
1014 
1015 	epoch_call(net_epoch_preempt, destroy_subscription_epoch,
1016 	    &rs->epoch_ctx);
1017 
1018 	return (0);
1019 }
1020 
1021 /*
1022  * Epoch callback indicating subscription is safe to destroy
1023  */
1024 static void
1025 destroy_subscription_epoch(epoch_context_t ctx)
1026 {
1027 	struct rib_subscription *rs;
1028 
1029 	rs = __containerof(ctx, struct rib_subscription, epoch_ctx);
1030 
1031 	free(rs, M_RTABLE);
1032 }
1033 
1034 void
1035 rib_init_subscriptions(struct rib_head *rnh)
1036 {
1037 
1038 	CK_STAILQ_INIT(&rnh->rnh_subscribers);
1039 }
1040 
1041 void
1042 rib_destroy_subscriptions(struct rib_head *rnh)
1043 {
1044 	struct rib_subscription *rs;
1045 	struct epoch_tracker et;
1046 
1047 	NET_EPOCH_ENTER(et);
1048 	RIB_WLOCK(rnh);
1049 	while ((rs = CK_STAILQ_FIRST(&rnh->rnh_subscribers)) != NULL) {
1050 		CK_STAILQ_REMOVE_HEAD(&rnh->rnh_subscribers, next);
1051 		epoch_call(net_epoch_preempt, destroy_subscription_epoch,
1052 		    &rs->epoch_ctx);
1053 	}
1054 	RIB_WUNLOCK(rnh);
1055 	NET_EPOCH_EXIT(et);
1056 }
1057