xref: /freebsd/sys/net/route/route_ctl.c (revision c0256b31efcccb6964822b5aadb183e8a6d45507)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 #include "opt_route.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/socket.h>
38 #include <sys/sysctl.h>
39 #include <sys/syslog.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/rmlock.h>
43 
44 #include <net/if.h>
45 #include <net/if_var.h>
46 #include <net/if_private.h>
47 #include <net/if_dl.h>
48 #include <net/vnet.h>
49 #include <net/route.h>
50 #include <net/route/route_ctl.h>
51 #include <net/route/route_var.h>
52 #include <net/route/nhop_utils.h>
53 #include <net/route/nhop.h>
54 #include <net/route/nhop_var.h>
55 #include <netinet/in.h>
56 #include <netinet6/scope6_var.h>
57 #include <netinet6/in6_var.h>
58 
59 #define	DEBUG_MOD_NAME	route_ctl
60 #define	DEBUG_MAX_LEVEL	LOG_DEBUG
61 #include <net/route/route_debug.h>
62 _DECLARE_DEBUG(LOG_INFO);
63 
64 /*
65  * This file contains control plane routing tables functions.
66  *
67  * All functions assumes they are called in net epoch.
68  */
69 
70 union sockaddr_union {
71 	struct sockaddr		sa;
72 	struct sockaddr_in	sin;
73 	struct sockaddr_in6	sin6;
74 	char			_buf[32];
75 };
76 
77 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
78     struct rib_cmd_info *rc);
79 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
80     struct rt_addrinfo *info, struct route_nhop_data *nhd_orig,
81     struct rib_cmd_info *rc);
82 
83 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt,
84     struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc);
85 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
86     struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
87     int op_flags, struct rib_cmd_info *rc);
88 
89 static int add_route(struct rib_head *rnh, struct rtentry *rt,
90     struct route_nhop_data *rnd, struct rib_cmd_info *rc);
91 static int delete_route(struct rib_head *rnh, struct rtentry *rt,
92     struct rib_cmd_info *rc);
93 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
94     int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc);
95 
96 static bool fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
97     struct sockaddr **pmask);
98 static int get_prio_from_info(const struct rt_addrinfo *info);
99 static int nhop_get_prio(const struct nhop_object *nh);
100 
101 static bool rib_can_multipath(struct rib_head *rh);
102 
103 /* Per-vnet multipath routing configuration */
104 SYSCTL_DECL(_net_route);
105 #define	V_rib_route_multipath	VNET(rib_route_multipath)
106 VNET_DEFINE(u_int, rib_route_multipath) = 1;
107 SYSCTL_UINT(_net_route, OID_AUTO, multipath, CTLFLAG_RW | CTLFLAG_VNET,
108     &VNET_NAME(rib_route_multipath), 0, "Enable route multipath");
109 
110 VNET_DEFINE(u_int, fib_hash_outbound) = 0;
111 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET,
112     &VNET_NAME(fib_hash_outbound), 0,
113     "Compute flowid for locally-originated packets");
114 
115 /* Default entropy to add to the hash calculation for the outbound connections*/
116 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = {
117 	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
118 	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
119 	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
120 	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
121 	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
122 };
123 
124 #if defined(INET) && defined(INET6)
125 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops");
126 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop)
127 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1;
128 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET,
129     &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address");
130 #endif
131 
132 /* Debug bits */
133 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
134 
135 static struct rib_head *
get_rnh(uint32_t fibnum,const struct rt_addrinfo * info)136 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info)
137 {
138 	struct rib_head *rnh;
139 	struct sockaddr *dst;
140 
141 	KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum"));
142 
143 	dst = info->rti_info[RTAX_DST];
144 	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
145 
146 	return (rnh);
147 }
148 
149 #if defined(INET) && defined(INET6)
150 bool
rib_can_4o6_nhop(void)151 rib_can_4o6_nhop(void)
152 {
153 	return (!!V_rib_route_ipv6_nexthop);
154 }
155 #endif
156 
157 static bool
rib_can_multipath(struct rib_head * rh)158 rib_can_multipath(struct rib_head *rh)
159 {
160 	int result;
161 
162 	CURVNET_SET(rh->rib_vnet);
163 	result = !!V_rib_route_multipath;
164 	CURVNET_RESTORE();
165 
166 	return (result);
167 }
168 
169 /*
170  * Check is nhop is multipath-eligible.
171  * Avoid nhops without gateways and redirects.
172  *
173  * Returns 1 for multipath-eligible nexthop,
174  * 0 otherwise.
175  */
176 bool
nhop_can_multipath(const struct nhop_object * nh)177 nhop_can_multipath(const struct nhop_object *nh)
178 {
179 
180 	if ((nh->nh_flags & NHF_MULTIPATH) != 0)
181 		return (1);
182 	if ((nh->nh_flags & NHF_GATEWAY) == 0)
183 		return (0);
184 	if ((nh->nh_flags & NHF_REDIRECT) != 0)
185 		return (0);
186 
187 	return (1);
188 }
189 
190 static int
get_info_weight(const struct rt_addrinfo * info,uint32_t default_weight)191 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight)
192 {
193 	uint32_t weight;
194 
195 	if (info->rti_mflags & RTV_WEIGHT)
196 		weight = info->rti_rmx->rmx_weight;
197 	else
198 		weight = default_weight;
199 	if (weight == 0)
200 		weight = default_weight;
201 
202 	return (weight);
203 }
204 
205 /*
206  * File-local concept for distingushing between the normal and
207  * RTF_PINNED routes tha can override the "normal" one.
208  */
209 #define	NH_PRIORITY_HIGH	2
210 #define	NH_PRIORITY_NORMAL	1
211 static int
get_prio_from_info(const struct rt_addrinfo * info)212 get_prio_from_info(const struct rt_addrinfo *info)
213 {
214 	if (info->rti_flags & RTF_PINNED)
215 		return (NH_PRIORITY_HIGH);
216 	return (NH_PRIORITY_NORMAL);
217 }
218 
219 static int
nhop_get_prio(const struct nhop_object * nh)220 nhop_get_prio(const struct nhop_object *nh)
221 {
222 	if (NH_IS_PINNED(nh))
223 		return (NH_PRIORITY_HIGH);
224 	return (NH_PRIORITY_NORMAL);
225 }
226 
227 /*
228  * Check if specified @gw matches gw data in the nexthop @nh.
229  *
230  * Returns true if matches, false otherwise.
231  */
232 bool
match_nhop_gw(const struct nhop_object * nh,const struct sockaddr * gw)233 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw)
234 {
235 
236 	if (nh->gw_sa.sa_family != gw->sa_family)
237 		return (false);
238 
239 	switch (gw->sa_family) {
240 	case AF_INET:
241 		return (nh->gw4_sa.sin_addr.s_addr ==
242 		    ((const struct sockaddr_in *)gw)->sin_addr.s_addr);
243 	case AF_INET6:
244 		{
245 			const struct sockaddr_in6 *gw6;
246 			gw6 = (const struct sockaddr_in6 *)gw;
247 
248 			/*
249 			 * Currently (2020-09) IPv6 gws in kernel have their
250 			 * scope embedded. Once this becomes false, this code
251 			 * has to be revisited.
252 			 */
253 			if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr,
254 			    &gw6->sin6_addr))
255 				return (true);
256 			return (false);
257 		}
258 	case AF_LINK:
259 		{
260 			const struct sockaddr_dl *sdl;
261 			sdl = (const struct sockaddr_dl *)gw;
262 			return (nh->gwl_sa.sdl_index == sdl->sdl_index);
263 		}
264 	default:
265 		return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0);
266 	}
267 
268 	/* NOTREACHED */
269 	return (false);
270 }
271 
272 /*
273  * Matches all nexthop with given @gw.
274  * Can be used as rib_filter_f callback.
275  */
276 int
rib_match_gw(const struct rtentry * rt,const struct nhop_object * nh,void * gw_sa)277 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa)
278 {
279 	const struct sockaddr *gw = (const struct sockaddr *)gw_sa;
280 
281 	return (match_nhop_gw(nh, gw));
282 }
283 
284 struct gw_filter_data {
285 	const struct sockaddr *gw;
286 	int count;
287 };
288 
289 /*
290  * Matches first occurence of the gateway provided in @gwd
291  */
292 static int
match_gw_one(const struct rtentry * rt,const struct nhop_object * nh,void * _data)293 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data)
294 {
295 	struct gw_filter_data *gwd = (struct gw_filter_data *)_data;
296 
297 	/* Return only first match to make rtsock happy */
298 	if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0)
299 		return (1);
300 	return (0);
301 }
302 
303 /*
304  * Checks if data in @info matches nexhop @nh.
305  *
306  * Returns 0 on success,
307  * ESRCH if not matched,
308  * ENOENT if filter function returned false
309  */
310 int
check_info_match_nhop(const struct rt_addrinfo * info,const struct rtentry * rt,const struct nhop_object * nh)311 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt,
312     const struct nhop_object *nh)
313 {
314 	const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY];
315 
316 	if (info->rti_filter != NULL) {
317 	    if (info->rti_filter(rt, nh, info->rti_filterdata) == 0)
318 		    return (ENOENT);
319 	    else
320 		    return (0);
321 	}
322 	if ((gw != NULL) && !match_nhop_gw(nh, gw))
323 		return (ESRCH);
324 
325 	return (0);
326 }
327 
328 /*
329  * Runs exact prefix match based on @dst and @netmask.
330  * Returns matched @rtentry if found or NULL.
331  * If rtentry was found, saves nexthop / weight value into @rnd.
332  */
333 static struct rtentry *
lookup_prefix_bysa(struct rib_head * rnh,const struct sockaddr * dst,const struct sockaddr * netmask,struct route_nhop_data * rnd)334 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst,
335     const struct sockaddr *netmask, struct route_nhop_data *rnd)
336 {
337 	struct rtentry *rt;
338 
339 	RIB_LOCK_ASSERT(rnh);
340 
341 	rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head);
342 	if (rt != NULL) {
343 		rnd->rnd_nhop = rt->rt_nhop;
344 		rnd->rnd_weight = rt->rt_weight;
345 	} else {
346 		rnd->rnd_nhop = NULL;
347 		rnd->rnd_weight = 0;
348 	}
349 
350 	return (rt);
351 }
352 
353 struct rtentry *
lookup_prefix_rt(struct rib_head * rnh,const struct rtentry * rt,struct route_nhop_data * rnd)354 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt,
355     struct route_nhop_data *rnd)
356 {
357 	return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd));
358 }
359 
360 /*
361  * Runs exact prefix match based on dst/netmask from @info.
362  * Assumes RIB lock is held.
363  * Returns matched @rtentry if found or NULL.
364  * If rtentry was found, saves nexthop / weight value into @rnd.
365  */
366 struct rtentry *
lookup_prefix(struct rib_head * rnh,const struct rt_addrinfo * info,struct route_nhop_data * rnd)367 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info,
368     struct route_nhop_data *rnd)
369 {
370 	struct rtentry *rt;
371 
372 	rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST],
373 	    info->rti_info[RTAX_NETMASK], rnd);
374 
375 	return (rt);
376 }
377 
378 const struct rtentry *
rib_lookup_prefix_plen(struct rib_head * rnh,struct sockaddr * dst,int plen,struct route_nhop_data * rnd)379 rib_lookup_prefix_plen(struct rib_head *rnh, struct sockaddr *dst, int plen,
380     struct route_nhop_data *rnd)
381 {
382 	union sockaddr_union mask_storage;
383 	struct sockaddr *netmask = &mask_storage.sa;
384 
385 	if (fill_pxmask_family(dst->sa_family, plen, dst, &netmask))
386 		return (lookup_prefix_bysa(rnh, dst, netmask, rnd));
387 	return (NULL);
388 }
389 
390 static bool
fill_pxmask_family(int family,int plen,struct sockaddr * _dst,struct sockaddr ** pmask)391 fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
392     struct sockaddr **pmask)
393 {
394 	if (plen == -1) {
395 		*pmask = NULL;
396 		return (true);
397 	}
398 
399 	switch (family) {
400 #ifdef INET
401 	case AF_INET:
402 		{
403 			struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask);
404 			struct sockaddr_in *dst= (struct sockaddr_in *)_dst;
405 
406 			memset(mask, 0, sizeof(*mask));
407 			mask->sin_family = family;
408 			mask->sin_len = sizeof(*mask);
409 			if (plen == 32)
410 				*pmask = NULL;
411 			else if (plen > 32 || plen < 0)
412 				return (false);
413 			else {
414 				uint32_t daddr, maddr;
415 				maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
416 				mask->sin_addr.s_addr = maddr;
417 				daddr = dst->sin_addr.s_addr;
418 				daddr = htonl(ntohl(daddr) & ntohl(maddr));
419 				dst->sin_addr.s_addr = daddr;
420 			}
421 			return (true);
422 		}
423 		break;
424 #endif
425 #ifdef INET6
426 	case AF_INET6:
427 		{
428 			struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask);
429 			struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst;
430 
431 			memset(mask, 0, sizeof(*mask));
432 			mask->sin6_family = family;
433 			mask->sin6_len = sizeof(*mask);
434 			if (plen == 128)
435 				*pmask = NULL;
436 			else if (plen > 128 || plen < 0)
437 				return (false);
438 			else {
439 				ip6_writemask(&mask->sin6_addr, plen);
440 				IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr);
441 			}
442 			return (true);
443 		}
444 		break;
445 #endif
446 	}
447 	return (false);
448 }
449 
450 /*
451  * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd
452  * to the routing table.
453  *
454  * @fibnum: verified kernel rtable id to insert route to
455  * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
456  * @plen: prefix length (or -1 if host route or not applicable for AF)
457  * @op_flags: combination of RTM_F_ flags
458  * @rc: storage to report operation result
459  *
460  * Returns 0 on success.
461  */
462 int
rib_add_route_px(uint32_t fibnum,struct sockaddr * dst,int plen,struct route_nhop_data * rnd,int op_flags,struct rib_cmd_info * rc)463 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
464     struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc)
465 {
466 	union sockaddr_union mask_storage;
467 	struct sockaddr *netmask = &mask_storage.sa;
468 	struct rtentry *rt = NULL;
469 
470 	NET_EPOCH_ASSERT();
471 
472 	bzero(rc, sizeof(struct rib_cmd_info));
473 	rc->rc_cmd = RTM_ADD;
474 
475 	struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
476 	if (rnh == NULL)
477 		return (EAFNOSUPPORT);
478 
479 	if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
480 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
481 		return (EINVAL);
482 	}
483 
484 	if (op_flags & RTM_F_CREATE) {
485 		if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) {
486 			FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed");
487 			return (ENOMEM);
488 		}
489 	} else {
490 		struct route_nhop_data rnd_tmp;
491 		RIB_RLOCK_TRACKER;
492 
493 		RIB_RLOCK(rnh);
494 		rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd_tmp);
495 		RIB_RUNLOCK(rnh);
496 
497 		if (rt == NULL)
498 			return (ESRCH);
499 	}
500 
501 	return (add_route_flags(rnh, rt, rnd, op_flags, rc));
502 }
503 
504 /*
505  * Attempts to delete @dst/plen prefix matching gateway @gw from the
506  *  routing rable.
507  *
508  * @fibnum: rtable id to remove route from
509  * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
510  * @plen: prefix length (or -1 if host route or not applicable for AF)
511  * @gw: gateway to match
512  * @op_flags: combination of RTM_F_ flags
513  * @rc: storage to report operation result
514  *
515  * Returns 0 on success.
516  */
517 int
rib_del_route_px_gw(uint32_t fibnum,struct sockaddr * dst,int plen,const struct sockaddr * gw,int op_flags,struct rib_cmd_info * rc)518 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen,
519     const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc)
520 {
521 	struct gw_filter_data gwd = { .gw = gw };
522 
523 	return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc));
524 }
525 
526 /*
527  * Attempts to delete @dst/plen prefix matching @filter_func from the
528  *  routing rable.
529  *
530  * @fibnum: rtable id to remove route from
531  * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
532  * @plen: prefix length (or -1 if host route or not applicable for AF)
533  * @filter_func: func to be called for each nexthop of the prefix for matching
534  * @filter_arg: argument to pass to @filter_func
535  * @op_flags: combination of RTM_F_ flags
536  * @rc: storage to report operation result
537  *
538  * Returns 0 on success.
539  */
540 int
rib_del_route_px(uint32_t fibnum,struct sockaddr * dst,int plen,rib_filter_f_t * filter_func,void * filter_arg,int op_flags,struct rib_cmd_info * rc)541 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
542     rib_filter_f_t *filter_func, void *filter_arg, int op_flags,
543     struct rib_cmd_info *rc)
544 {
545 	union sockaddr_union mask_storage;
546 	struct sockaddr *netmask = &mask_storage.sa;
547 	int error;
548 
549 	NET_EPOCH_ASSERT();
550 
551 	bzero(rc, sizeof(struct rib_cmd_info));
552 	rc->rc_cmd = RTM_DELETE;
553 
554 	struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
555 	if (rnh == NULL)
556 		return (EAFNOSUPPORT);
557 
558 	if (dst->sa_len > sizeof(mask_storage)) {
559 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len);
560 		return (EINVAL);
561 	}
562 
563 	if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
564 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
565 		return (EINVAL);
566 	}
567 
568 	int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL;
569 
570 	RIB_WLOCK(rnh);
571 	struct route_nhop_data rnd;
572 	struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
573 	if (rt != NULL) {
574 		error = rt_delete_conditional(rnh, rt, prio, filter_func,
575 		    filter_arg, rc);
576 	} else
577 		error = ESRCH;
578 	RIB_WUNLOCK(rnh);
579 
580 	if (error != 0)
581 		return (error);
582 
583 	rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
584 
585 	if (rc->rc_cmd == RTM_DELETE)
586 		rt_free(rc->rc_rt);
587 	else {
588 		/*
589 		 * Deleting 1 path may result in RTM_CHANGE to
590 		 * a different mpath group/nhop.
591 		 * Free old mpath group.
592 		 */
593 		nhop_free_any(rc->rc_nh_old);
594 	}
595 
596 	return (0);
597 }
598 
599 /*
600  * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh.
601  * @rt: route to copy.
602  * @rnd_src: nhop and weight. Multipath routes are not supported
603  * @rh_dst: target rtable.
604  * @rc: operation result storage
605  *
606  * Return 0 on success.
607  */
608 int
rib_copy_route(struct rtentry * rt,const struct route_nhop_data * rnd_src,struct rib_head * rh_dst,struct rib_cmd_info * rc)609 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src,
610     struct rib_head *rh_dst, struct rib_cmd_info *rc)
611 {
612 	struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop;
613 	int error;
614 
615 	MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0);
616 
617 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
618 		char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE];
619 		nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf));
620 		rt_print_buf(rt, rtbuf, sizeof(rtbuf));
621 		FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u",
622 		    rtbuf, nhbuf, nhop_get_fibnum(nh_src));
623 	}
624 	struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family);
625 	if (nh == NULL) {
626 		FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop");
627 		return (ENOMEM);
628 	}
629 	nhop_copy(nh, rnd_src->rnd_nhop);
630 	nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop));
631 	nhop_set_fibnum(nh, rh_dst->rib_fibnum);
632 	nh = nhop_get_nhop_internal(rh_dst, nh, &error);
633 	if (error != 0) {
634 		FIB_RH_LOG(LOG_INFO, rh_dst,
635 		    "unable to finalize new nexthop: error %d", error);
636 		return (ENOMEM);
637 	}
638 
639 	struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt));
640 	if (rt_new == NULL) {
641 		FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry");
642 		nhop_free(nh);
643 		return (ENOMEM);
644 	}
645 
646 	struct route_nhop_data rnd = {
647 		.rnd_nhop = nh,
648 		.rnd_weight = rnd_src->rnd_weight
649 	};
650 	int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0);
651 	error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc);
652 
653 	if (error != 0) {
654 		IF_DEBUG_LEVEL(LOG_DEBUG2) {
655 			char buf[NHOP_PRINT_BUFSIZE];
656 			rt_print_buf(rt, buf, sizeof(buf));
657 			FIB_RH_LOG(LOG_DEBUG, rh_dst,
658 			    "Unable to add route %s: error %d", buf, error);
659 		}
660 		nhop_free(nh);
661 	}
662 	return (error);
663 }
664 
665 /*
666  * Adds route defined by @info into the kernel table specified by @fibnum and
667  * sa_family in @info->rti_info[RTAX_DST].
668  *
669  * Returns 0 on success and fills in operation metadata into @rc.
670  */
671 int
rib_add_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)672 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info,
673     struct rib_cmd_info *rc)
674 {
675 	struct rib_head *rnh;
676 	int error;
677 
678 	NET_EPOCH_ASSERT();
679 
680 	rnh = get_rnh(fibnum, info);
681 	if (rnh == NULL)
682 		return (EAFNOSUPPORT);
683 
684 	/*
685 	 * Check consistency between RTF_HOST flag and netmask
686 	 * existence.
687 	 */
688 	if (info->rti_flags & RTF_HOST)
689 		info->rti_info[RTAX_NETMASK] = NULL;
690 	else if (info->rti_info[RTAX_NETMASK] == NULL) {
691 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask");
692 		return (EINVAL);
693 	}
694 
695 	bzero(rc, sizeof(struct rib_cmd_info));
696 	rc->rc_cmd = RTM_ADD;
697 
698 	error = add_route_byinfo(rnh, info, rc);
699 	if (error == 0)
700 		rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
701 
702 	return (error);
703 }
704 
705 static int
add_route_byinfo(struct rib_head * rnh,struct rt_addrinfo * info,struct rib_cmd_info * rc)706 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
707     struct rib_cmd_info *rc)
708 {
709 	struct route_nhop_data rnd_add;
710 	struct nhop_object *nh;
711 	struct rtentry *rt;
712 	struct sockaddr *dst, *gateway, *netmask;
713 	int error;
714 
715 	dst = info->rti_info[RTAX_DST];
716 	gateway = info->rti_info[RTAX_GATEWAY];
717 	netmask = info->rti_info[RTAX_NETMASK];
718 
719 	if ((info->rti_flags & RTF_GATEWAY) && !gateway) {
720 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw");
721 		return (EINVAL);
722 	}
723 	if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) {
724 		FIB_RH_LOG(LOG_DEBUG, rnh,
725 		    "error: invalid dst/gateway family combination (%d, %d)",
726 		    dst->sa_family, gateway->sa_family);
727 		return (EINVAL);
728 	}
729 
730 	if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) {
731 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d",
732 		    dst->sa_len);
733 		return (EINVAL);
734 	}
735 
736 	if (info->rti_ifa == NULL) {
737 		error = rt_getifa_fib(info, rnh->rib_fibnum);
738 		if (error)
739 			return (error);
740 	}
741 
742 	if ((rt = rt_alloc(rnh, dst, netmask)) == NULL)
743 		return (ENOBUFS);
744 
745 	error = nhop_create_from_info(rnh, info, &nh);
746 	if (error != 0) {
747 		rt_free_immediate(rt);
748 		return (error);
749 	}
750 
751 	rnd_add.rnd_nhop = nh;
752 	rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT);
753 
754 	int op_flags = RTM_F_CREATE;
755 
756 	/*
757 	 * Set the desired action when the route already exists:
758 	 * If RTF_PINNED is present, assume the direct kernel routes that cannot be multipath.
759 	 * Otherwise, append the path.
760 	 */
761 	op_flags |= (info->rti_flags & RTF_PINNED) ? RTM_F_REPLACE : RTM_F_APPEND;
762 
763 	return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc));
764 }
765 
766 static int
add_route_flags(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_add,int op_flags,struct rib_cmd_info * rc)767 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add,
768     int op_flags, struct rib_cmd_info *rc)
769 {
770 	struct route_nhop_data rnd_orig;
771 	struct nhop_object *nh;
772 	struct rtentry *rt_orig;
773 	int error = 0;
774 
775 	MPASS(rt != NULL);
776 
777 	nh = rnd_add->rnd_nhop;
778 
779 	RIB_WLOCK(rnh);
780 
781 	rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig);
782 
783 	if (rt_orig == NULL) {
784 		if (op_flags & RTM_F_CREATE)
785 			error = add_route(rnh, rt, rnd_add, rc);
786 		else
787 			error = ESRCH; /* no entry but creation was not required */
788 		RIB_WUNLOCK(rnh);
789 		if (error != 0)
790 			goto out;
791 		return (0);
792 	}
793 
794 	if (op_flags & RTM_F_EXCL) {
795 		/* We have existing route in the RIB but not allowed to replace. */
796 		RIB_WUNLOCK(rnh);
797 		error = EEXIST;
798 		goto out;
799 	}
800 
801 	/* Now either append or replace */
802 	if (op_flags & RTM_F_REPLACE) {
803 		if (nhop_get_prio(rnd_orig.rnd_nhop) == NH_PRIORITY_HIGH) {
804 			/* Old path is "better" (e.g. has PINNED flag set) */
805 			RIB_WUNLOCK(rnh);
806 			error = EEXIST;
807 			goto out;
808 		}
809 		change_route(rnh, rt_orig, rnd_add, rc);
810 		RIB_WUNLOCK(rnh);
811 		nh = rc->rc_nh_old;
812 		goto out;
813 	}
814 
815 	RIB_WUNLOCK(rnh);
816 
817 	if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) &&
818 	    nhop_can_multipath(rnd_add->rnd_nhop) &&
819 	    nhop_can_multipath(rnd_orig.rnd_nhop)) {
820 
821 		for (int i = 0; i < RIB_MAX_RETRIES; i++) {
822 			error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig,
823 			    op_flags, rc);
824 			if (error != EAGAIN)
825 				break;
826 			RTSTAT_INC(rts_add_retry);
827 		}
828 
829 		/*
830 		 *  Original nhop reference is unused in any case.
831 		 */
832 		nhop_free_any(rnd_add->rnd_nhop);
833 		if (op_flags & RTM_F_CREATE) {
834 			if (error != 0 || rc->rc_cmd != RTM_ADD)
835 				rt_free_immediate(rt);
836 		}
837 		return (error);
838 	}
839 	/* Out of options - free state and return error */
840 	error = EEXIST;
841 out:
842 	if (op_flags & RTM_F_CREATE)
843 		rt_free_immediate(rt);
844 	nhop_free_any(nh);
845 
846 	return (error);
847 }
848 
849 static int
add_route_flags_mpath(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_add,struct route_nhop_data * rnd_orig,int op_flags,struct rib_cmd_info * rc)850 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
851     struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
852     int op_flags, struct rib_cmd_info *rc)
853 {
854 	RIB_RLOCK_TRACKER;
855 	struct route_nhop_data rnd_new;
856 	int error = 0;
857 
858 	if (!NH_IS_NHGRP(rnd_add->rnd_nhop))
859 		error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new);
860 	else
861 		error = nhgrp_get_merge_group(rnh, rnd_orig, rnd_add, &rnd_new);
862 	if (error != 0) {
863 		if (error == EAGAIN) {
864 			/*
865 			 * Group creation failed, most probably because
866 			 * @rnd_orig data got scheduled for deletion.
867 			 * Refresh @rnd_orig data and retry.
868 			 */
869 			RIB_RLOCK(rnh);
870 			lookup_prefix_rt(rnh, rt, rnd_orig);
871 			RIB_RUNLOCK(rnh);
872 			if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) {
873 				/* In this iteration route doesn't exist */
874 				error = ENOENT;
875 			}
876 		}
877 		return (error);
878 	}
879 	error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
880 	if (error != 0)
881 		return (error);
882 
883 	if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) {
884 		/*
885 		 * First multipath route got installed. Enable local
886 		 * outbound connections hashing.
887 		 */
888 		if (bootverbose)
889 			printf("FIB: enabled flowid calculation for locally-originated packets\n");
890 		V_fib_hash_outbound = 1;
891 	}
892 
893 	return (0);
894 }
895 
896 /*
897  * Removes route defined by @info from the kernel table specified by @fibnum and
898  * sa_family in @info->rti_info[RTAX_DST].
899  *
900  * Returns 0 on success and fills in operation metadata into @rc.
901  */
902 int
rib_del_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)903 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc)
904 {
905 	struct rib_head *rnh;
906 	struct sockaddr *dst, *netmask;
907 	struct sockaddr_storage mdst;
908 	int error;
909 
910 	NET_EPOCH_ASSERT();
911 
912 	rnh = get_rnh(fibnum, info);
913 	if (rnh == NULL)
914 		return (EAFNOSUPPORT);
915 
916 	bzero(rc, sizeof(struct rib_cmd_info));
917 	rc->rc_cmd = RTM_DELETE;
918 
919 	dst = info->rti_info[RTAX_DST];
920 	netmask = info->rti_info[RTAX_NETMASK];
921 
922 	if (netmask != NULL) {
923 		/* Ensure @dst is always properly masked */
924 		if (dst->sa_len > sizeof(mdst)) {
925 			FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large");
926 			return (EINVAL);
927 		}
928 		rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
929 		dst = (struct sockaddr *)&mdst;
930 	}
931 
932 	rib_filter_f_t *filter_func = NULL;
933 	void *filter_arg = NULL;
934 	struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] };
935 
936 	if (info->rti_filter != NULL) {
937 		filter_func = info->rti_filter;
938 		filter_arg = info->rti_filterdata;
939 	} else if (gwd.gw != NULL) {
940 		filter_func = match_gw_one;
941 		filter_arg = &gwd;
942 	}
943 
944 	int prio = get_prio_from_info(info);
945 
946 	RIB_WLOCK(rnh);
947 	struct route_nhop_data rnd;
948 	struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
949 	if (rt != NULL) {
950 		error = rt_delete_conditional(rnh, rt, prio, filter_func,
951 		    filter_arg, rc);
952 	} else
953 		error = ESRCH;
954 	RIB_WUNLOCK(rnh);
955 
956 	if (error != 0)
957 		return (error);
958 
959 	rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
960 
961 	if (rc->rc_cmd == RTM_DELETE)
962 		rt_free(rc->rc_rt);
963 	else {
964 		/*
965 		 * Deleting 1 path may result in RTM_CHANGE to
966 		 * a different mpath group/nhop.
967 		 * Free old mpath group.
968 		 */
969 		nhop_free_any(rc->rc_nh_old);
970 	}
971 
972 	return (0);
973 }
974 
975 /*
976  * Conditionally unlinks rtentry paths from @rnh matching @cb.
977  * Returns 0 on success with operation result stored in @rc.
978  * On error, returns:
979  * ESRCH - if prefix was not found or filter function failed to match
980  * EADDRINUSE - if trying to delete higher priority route.
981  */
982 static int
rt_delete_conditional(struct rib_head * rnh,struct rtentry * rt,int prio,rib_filter_f_t * cb,void * cbdata,struct rib_cmd_info * rc)983 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
984     int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc)
985 {
986 	struct nhop_object *nh = rt->rt_nhop;
987 
988 	if (NH_IS_NHGRP(nh)) {
989 		struct nhgrp_object *nhg = (struct nhgrp_object *)nh;
990 		struct route_nhop_data rnd;
991 		int error;
992 
993 		if (cb == NULL)
994 			return (ESRCH);
995 		error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd);
996 		if (error == 0) {
997 			if (rnd.rnd_nhgrp == nhg) {
998 				/* No match, unreference new group and return. */
999 				nhop_free_any(rnd.rnd_nhop);
1000 				return (ESRCH);
1001 			}
1002 			error = change_route(rnh, rt, &rnd, rc);
1003 		}
1004 		return (error);
1005 	}
1006 	if (cb != NULL && !cb(rt, nh, cbdata))
1007 		return (ESRCH);
1008 
1009 	if (prio < nhop_get_prio(nh))
1010 		return (EADDRINUSE);
1011 
1012 	return (delete_route(rnh, rt, rc));
1013 }
1014 
1015 int
rib_change_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)1016 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
1017     struct rib_cmd_info *rc)
1018 {
1019 	RIB_RLOCK_TRACKER;
1020 	struct route_nhop_data rnd_orig;
1021 	struct rib_head *rnh;
1022 	struct rtentry *rt;
1023 	int error;
1024 
1025 	NET_EPOCH_ASSERT();
1026 
1027 	rnh = get_rnh(fibnum, info);
1028 	if (rnh == NULL)
1029 		return (EAFNOSUPPORT);
1030 
1031 	bzero(rc, sizeof(struct rib_cmd_info));
1032 	rc->rc_cmd = RTM_CHANGE;
1033 
1034 	/* Check if updated gateway exists */
1035 	if ((info->rti_flags & RTF_GATEWAY) &&
1036 	    (info->rti_info[RTAX_GATEWAY] == NULL)) {
1037 
1038 		/*
1039 		 * route(8) adds RTF_GATEWAY flag if -interface is not set.
1040 		 * Remove RTF_GATEWAY to enforce consistency and maintain
1041 		 * compatibility..
1042 		 */
1043 		info->rti_flags &= ~RTF_GATEWAY;
1044 	}
1045 
1046 	/*
1047 	 * route change is done in multiple steps, with dropping and
1048 	 * reacquiring lock. In the situations with multiple processes
1049 	 * changes the same route in can lead to the case when route
1050 	 * is changed between the steps. Address it by retrying the operation
1051 	 * multiple times before failing.
1052 	 */
1053 
1054 	RIB_RLOCK(rnh);
1055 	rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
1056 	    info->rti_info[RTAX_NETMASK], &rnh->head);
1057 
1058 	if (rt == NULL) {
1059 		RIB_RUNLOCK(rnh);
1060 		return (ESRCH);
1061 	}
1062 
1063 	rnd_orig.rnd_nhop = rt->rt_nhop;
1064 	rnd_orig.rnd_weight = rt->rt_weight;
1065 
1066 	RIB_RUNLOCK(rnh);
1067 
1068 	for (int i = 0; i < RIB_MAX_RETRIES; i++) {
1069 		error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc);
1070 		if (error != EAGAIN)
1071 			break;
1072 	}
1073 
1074 	return (error);
1075 }
1076 
1077 static int
change_nhop(struct rib_head * rnh,struct rt_addrinfo * info,struct nhop_object * nh_orig,struct nhop_object ** nh_new)1078 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
1079     struct nhop_object *nh_orig, struct nhop_object **nh_new)
1080 {
1081 	int error;
1082 
1083 	/*
1084 	 * New gateway could require new ifaddr, ifp;
1085 	 * flags may also be different; ifp may be specified
1086 	 * by ll sockaddr when protocol address is ambiguous
1087 	 */
1088 	if (((nh_orig->nh_flags & NHF_GATEWAY) &&
1089 	    info->rti_info[RTAX_GATEWAY] != NULL) ||
1090 	    info->rti_info[RTAX_IFP] != NULL ||
1091 	    (info->rti_info[RTAX_IFA] != NULL &&
1092 	     !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) {
1093 		error = rt_getifa_fib(info, rnh->rib_fibnum);
1094 
1095 		if (error != 0) {
1096 			info->rti_ifa = NULL;
1097 			return (error);
1098 		}
1099 	}
1100 
1101 	error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new);
1102 	info->rti_ifa = NULL;
1103 
1104 	return (error);
1105 }
1106 
1107 static int
change_mpath_route(struct rib_head * rnh,struct rtentry * rt,struct rt_addrinfo * info,struct route_nhop_data * rnd_orig,struct rib_cmd_info * rc)1108 change_mpath_route(struct rib_head *rnh, struct rtentry *rt,
1109     struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1110     struct rib_cmd_info *rc)
1111 {
1112 	int error = 0, found_idx = 0;
1113 	struct nhop_object *nh_orig = NULL, *nh_new;
1114 	struct route_nhop_data rnd_new = {};
1115 	const struct weightened_nhop *wn = NULL;
1116 	struct weightened_nhop *wn_new;
1117 	uint32_t num_nhops;
1118 
1119 	wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops);
1120 	for (int i = 0; i < num_nhops; i++) {
1121 		if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) {
1122 			nh_orig = wn[i].nh;
1123 			found_idx = i;
1124 			break;
1125 		}
1126 	}
1127 
1128 	if (nh_orig == NULL)
1129 		return (ESRCH);
1130 
1131 	error = change_nhop(rnh, info, nh_orig, &nh_new);
1132 	if (error != 0)
1133 		return (error);
1134 
1135 	wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop),
1136 	    M_TEMP, M_NOWAIT | M_ZERO);
1137 	if (wn_new == NULL) {
1138 		nhop_free(nh_new);
1139 		return (EAGAIN);
1140 	}
1141 
1142 	memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop));
1143 	wn_new[found_idx].nh = nh_new;
1144 	wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight);
1145 
1146 	error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp);
1147 	nhop_free(nh_new);
1148 	free(wn_new, M_TEMP);
1149 
1150 	if (error != 0)
1151 		return (error);
1152 
1153 	error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1154 
1155 	return (error);
1156 }
1157 
1158 static int
change_route_byinfo(struct rib_head * rnh,struct rtentry * rt,struct rt_addrinfo * info,struct route_nhop_data * rnd_orig,struct rib_cmd_info * rc)1159 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
1160     struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1161     struct rib_cmd_info *rc)
1162 {
1163 	int error = 0;
1164 	struct nhop_object *nh_orig;
1165 	struct route_nhop_data rnd_new;
1166 
1167 	nh_orig = rnd_orig->rnd_nhop;
1168 	if (nh_orig == NULL)
1169 		return (ESRCH);
1170 
1171 	if (NH_IS_NHGRP(nh_orig))
1172 		return (change_mpath_route(rnh, rt, info, rnd_orig, rc));
1173 
1174 	rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight);
1175 	error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop);
1176 	if (error != 0)
1177 		return (error);
1178 	error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1179 
1180 	return (error);
1181 }
1182 
1183 static void
update_tmproutes_mpath(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd)1184 update_tmproutes_mpath(struct rib_head *rnh, struct rtentry *rt,
1185     struct route_nhop_data *rnd)
1186 {
1187 	const struct weightened_nhop *wn;
1188 	uint32_t i, nhops;
1189 
1190 	if (NH_IS_NHGRP(rnd->rnd_nhop)) {
1191 		wn = nhgrp_get_nhops(rnd->rnd_nhgrp, &nhops);
1192 
1193 		for (i = 0; i < nhops; i++) {
1194 			if (nhop_get_expire(wn[i].nh) == 0)
1195 				continue;
1196 
1197 			tmproutes_update(rnh, rt, wn[i].nh);
1198 		}
1199 	} else if (nhop_get_expire(rnd->rnd_nhop) != 0)
1200 		tmproutes_update(rnh, rt, rnd->rnd_nhop);
1201 }
1202 
1203 /*
1204  * Insert @rt with nhop data from @rnd_new to @rnh.
1205  * Returns 0 on success and stores operation results in @rc.
1206  */
1207 static int
add_route(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd,struct rib_cmd_info * rc)1208 add_route(struct rib_head *rnh, struct rtentry *rt,
1209     struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1210 {
1211 	struct radix_node *rn;
1212 
1213 	RIB_WLOCK_ASSERT(rnh);
1214 
1215 	rt->rt_nhop = rnd->rnd_nhop;
1216 	rt->rt_weight = rnd->rnd_weight;
1217 	rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes);
1218 
1219 	if (rn != NULL) {
1220 		update_tmproutes_mpath(rnh, rt, rnd);
1221 		/* Finalize notification */
1222 		rib_bump_gen(rnh);
1223 		rnh->rnh_prefixes++;
1224 
1225 		rc->rc_cmd = RTM_ADD;
1226 		rc->rc_rt = rt;
1227 		rc->rc_nh_old = NULL;
1228 		rc->rc_nh_new = rnd->rnd_nhop;
1229 		rc->rc_nh_weight = rnd->rnd_weight;
1230 
1231 		rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1232 		return (0);
1233 	}
1234 
1235 	/* Existing route or memory allocation failure. */
1236 	return (EEXIST);
1237 }
1238 
1239 /*
1240  * Unconditionally deletes @rt from @rnh.
1241  */
1242 static int
delete_route(struct rib_head * rnh,struct rtentry * rt,struct rib_cmd_info * rc)1243 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc)
1244 {
1245 	RIB_WLOCK_ASSERT(rnh);
1246 
1247 	/* Route deletion requested. */
1248 	struct radix_node *rn;
1249 
1250 	rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head);
1251 	if (rn == NULL)
1252 		return (ESRCH);
1253 	rt = RNTORT(rn);
1254 	rt->rte_flags &= ~RTF_UP;
1255 
1256 	rib_bump_gen(rnh);
1257 	rnh->rnh_prefixes--;
1258 
1259 	rc->rc_cmd = RTM_DELETE;
1260 	rc->rc_rt = rt;
1261 	rc->rc_nh_old = rt->rt_nhop;
1262 	rc->rc_nh_new = NULL;
1263 	rc->rc_nh_weight = rt->rt_weight;
1264 
1265 	rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1266 
1267 	return (0);
1268 }
1269 
1270 /*
1271  * Switch @rt nhop/weigh to the ones specified in @rnd.
1272  * Returns 0 on success.
1273  */
1274 int
change_route(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd,struct rib_cmd_info * rc)1275 change_route(struct rib_head *rnh, struct rtentry *rt,
1276     struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1277 {
1278 	struct nhop_object *nh_orig;
1279 
1280 	RIB_WLOCK_ASSERT(rnh);
1281 
1282 	nh_orig = rt->rt_nhop;
1283 
1284 	if (rnd->rnd_nhop == NULL)
1285 		return (delete_route(rnh, rt, rc));
1286 
1287 	/* Changing nexthop & weight to a new one */
1288 	rt->rt_nhop = rnd->rnd_nhop;
1289 	rt->rt_weight = rnd->rnd_weight;
1290 	update_tmproutes_mpath(rnh, rt, rnd);
1291 
1292 	/* Finalize notification */
1293 	rib_bump_gen(rnh);
1294 	rc->rc_cmd = RTM_CHANGE;
1295 	rc->rc_rt = rt;
1296 	rc->rc_nh_old = nh_orig;
1297 	rc->rc_nh_new = rnd->rnd_nhop;
1298 	rc->rc_nh_weight = rnd->rnd_weight;
1299 
1300 	rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1301 
1302 	return (0);
1303 }
1304 
1305 /*
1306  * Conditionally update route nhop/weight IFF data in @nhd_orig is
1307  *  consistent with the current route data.
1308  * Nexthop in @nhd_new is consumed.
1309  */
1310 int
change_route_conditional(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_orig,struct route_nhop_data * rnd_new,struct rib_cmd_info * rc)1311 change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
1312     struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new,
1313     struct rib_cmd_info *rc)
1314 {
1315 	struct rtentry *rt_new;
1316 	int error = 0;
1317 
1318 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
1319 		char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE];
1320 		nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE);
1321 		nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE);
1322 		FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family,
1323 		    "trying change %s -> %s", buf_old, buf_new);
1324 	}
1325 	RIB_WLOCK(rnh);
1326 
1327 	struct route_nhop_data rnd;
1328 	rt_new = lookup_prefix_rt(rnh, rt, &rnd);
1329 
1330 	if (rt_new == NULL) {
1331 		if (rnd_orig->rnd_nhop == NULL)
1332 			error = add_route(rnh, rt, rnd_new, rc);
1333 		else {
1334 			/*
1335 			 * Prefix does not exist, which was not our assumption.
1336 			 * Update @rnd_orig with the new data and return
1337 			 */
1338 			rnd_orig->rnd_nhop = NULL;
1339 			rnd_orig->rnd_weight = 0;
1340 			error = EAGAIN;
1341 		}
1342 	} else {
1343 		/* Prefix exists, try to update */
1344 		if (rnd_orig->rnd_nhop == rt_new->rt_nhop) {
1345 			/*
1346 			 * Nhop/mpath group hasn't changed. Flip
1347 			 * to the new precalculated one and return
1348 			 */
1349 			error = change_route(rnh, rt_new, rnd_new, rc);
1350 		} else {
1351 			/* Update and retry */
1352 			rnd_orig->rnd_nhop = rt_new->rt_nhop;
1353 			rnd_orig->rnd_weight = rt_new->rt_weight;
1354 			error = EAGAIN;
1355 		}
1356 	}
1357 
1358 	RIB_WUNLOCK(rnh);
1359 
1360 	if (error == 0) {
1361 		rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
1362 
1363 		if (rnd_orig->rnd_nhop != NULL)
1364 			nhop_free_any(rnd_orig->rnd_nhop);
1365 
1366 	} else {
1367 		if (rnd_new->rnd_nhop != NULL)
1368 			nhop_free_any(rnd_new->rnd_nhop);
1369 	}
1370 
1371 	return (error);
1372 }
1373 
1374 /*
1375  * Performs modification of routing table specificed by @action.
1376  * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST].
1377  * Needs to be run in network epoch.
1378  *
1379  * Returns 0 on success and fills in @rc with action result.
1380  */
1381 int
rib_action(uint32_t fibnum,int action,struct rt_addrinfo * info,struct rib_cmd_info * rc)1382 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info,
1383     struct rib_cmd_info *rc)
1384 {
1385 	int error;
1386 
1387 	switch (action) {
1388 	case RTM_ADD:
1389 		error = rib_add_route(fibnum, info, rc);
1390 		break;
1391 	case RTM_DELETE:
1392 		error = rib_del_route(fibnum, info, rc);
1393 		break;
1394 	case RTM_CHANGE:
1395 		error = rib_change_route(fibnum, info, rc);
1396 		break;
1397 	default:
1398 		error = ENOTSUP;
1399 	}
1400 
1401 	return (error);
1402 }
1403 
1404 struct rt_delinfo
1405 {
1406 	struct rib_head *rnh;
1407 	struct rtentry *head;
1408 	rib_filter_f_t *filter_f;
1409 	void *filter_arg;
1410 	int prio;
1411 	struct rib_cmd_info rc;
1412 };
1413 
1414 /*
1415  * Conditionally unlinks rtenties or paths from radix tree based
1416  * on the callback data passed in @arg.
1417  */
1418 static int
rt_checkdelroute(struct radix_node * rn,void * arg)1419 rt_checkdelroute(struct radix_node *rn, void *arg)
1420 {
1421 	struct rt_delinfo *di = (struct rt_delinfo *)arg;
1422 	struct rtentry *rt = (struct rtentry *)rn;
1423 
1424 	if (rt_delete_conditional(di->rnh, rt, di->prio,
1425 	    di->filter_f, di->filter_arg, &di->rc) != 0)
1426 		return (0);
1427 
1428 	/*
1429 	 * Add deleted rtentries to the list to GC them
1430 	 *  after dropping the lock.
1431 	 *
1432 	 * XXX: Delayed notifications not implemented
1433 	 *  for nexthop updates.
1434 	 */
1435 	if (di->rc.rc_cmd == RTM_DELETE) {
1436 		/* Add to the list and return */
1437 		rt->rt_chain = di->head;
1438 		di->head = rt;
1439 	} else {
1440 		/*
1441 		 * RTM_CHANGE to a different nexthop or nexthop group.
1442 		 * Free old multipath group.
1443 		 */
1444 		nhop_free_any(di->rc.rc_nh_old);
1445 	}
1446 
1447 	return (0);
1448 }
1449 
1450 /*
1451  * Iterates over a routing table specified by @fibnum and @family and
1452  *  deletes elements marked by @filter_f.
1453  * @fibnum: rtable id
1454  * @family: AF_ address family
1455  * @filter_f: function returning non-zero value for items to delete
1456  * @arg: data to pass to the @filter_f function
1457  * @report: true if rtsock notification is needed.
1458  */
1459 void
rib_walk_del(u_int fibnum,int family,rib_filter_f_t * filter_f,void * filter_arg,bool report)1460 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg,
1461     bool report)
1462 {
1463 	struct rib_head *rnh;
1464 	struct rtentry *rt;
1465 	struct nhop_object *nh;
1466 	struct epoch_tracker et;
1467 
1468 	rnh = rt_tables_get_rnh(fibnum, family);
1469 	if (rnh == NULL)
1470 		return;
1471 
1472 	struct rt_delinfo di = {
1473 		.rnh = rnh,
1474 		.filter_f = filter_f,
1475 		.filter_arg = filter_arg,
1476 		.prio = NH_PRIORITY_NORMAL,
1477 	};
1478 
1479 	NET_EPOCH_ENTER(et);
1480 
1481 	RIB_WLOCK(rnh);
1482 	rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
1483 	RIB_WUNLOCK(rnh);
1484 
1485 	/* We might have something to reclaim. */
1486 	bzero(&di.rc, sizeof(di.rc));
1487 	di.rc.rc_cmd = RTM_DELETE;
1488 	while (di.head != NULL) {
1489 		rt = di.head;
1490 		di.head = rt->rt_chain;
1491 		rt->rt_chain = NULL;
1492 		nh = rt->rt_nhop;
1493 
1494 		di.rc.rc_rt = rt;
1495 		di.rc.rc_nh_old = nh;
1496 		rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc);
1497 
1498 		if (report) {
1499 			struct nhgrp_object *nhg;
1500 			const struct weightened_nhop *wn;
1501 			uint32_t num_nhops;
1502 			if (NH_IS_NHGRP(nh)) {
1503 				nhg = (struct nhgrp_object *)nh;
1504 				wn = nhgrp_get_nhops(nhg, &num_nhops);
1505 				for (int i = 0; i < num_nhops; i++)
1506 					rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum);
1507 			} else
1508 				rt_routemsg(RTM_DELETE, rt, nh, fibnum);
1509 		}
1510 		rt_free(rt);
1511 	}
1512 
1513 	NET_EPOCH_EXIT(et);
1514 }
1515 
1516 static int
rt_delete_unconditional(struct radix_node * rn,void * arg)1517 rt_delete_unconditional(struct radix_node *rn, void *arg)
1518 {
1519 	struct rtentry *rt = RNTORT(rn);
1520 	struct rib_head *rnh = (struct rib_head *)arg;
1521 
1522 	rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head);
1523 	if (RNTORT(rn) == rt)
1524 		rt_free(rt);
1525 
1526 	return (0);
1527 }
1528 
1529 /*
1530  * Removes all routes from the routing table without executing notifications.
1531  * rtentres will be removed after the end of a current epoch.
1532  */
1533 static void
rib_flush_routes(struct rib_head * rnh)1534 rib_flush_routes(struct rib_head *rnh)
1535 {
1536 	RIB_WLOCK(rnh);
1537 	rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh);
1538 	RIB_WUNLOCK(rnh);
1539 }
1540 
1541 void
rib_flush_routes_family(int family)1542 rib_flush_routes_family(int family)
1543 {
1544 	struct rib_head *rnh;
1545 
1546 	for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
1547 		if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL)
1548 			rib_flush_routes(rnh);
1549 	}
1550 }
1551 
1552 const char *
rib_print_family(int family)1553 rib_print_family(int family)
1554 {
1555 	switch (family) {
1556 	case AF_INET:
1557 		return ("inet");
1558 	case AF_INET6:
1559 		return ("inet6");
1560 	case AF_LINK:
1561 		return ("link");
1562 	}
1563 	return ("unknown");
1564 }
1565 
1566