xref: /freebsd/sys/net/route/route_ctl.c (revision 361a8395f0b0e6f254fd138798232529679d99f6)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 #include "opt_route.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/socket.h>
38 #include <sys/sysctl.h>
39 #include <sys/syslog.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/rmlock.h>
43 
44 #include <net/if.h>
45 #include <net/if_var.h>
46 #include <net/if_private.h>
47 #include <net/if_dl.h>
48 #include <net/vnet.h>
49 #include <net/route.h>
50 #include <net/route/route_ctl.h>
51 #include <net/route/route_var.h>
52 #include <net/route/nhop_utils.h>
53 #include <net/route/nhop.h>
54 #include <net/route/nhop_var.h>
55 #include <netinet/in.h>
56 #include <netinet6/scope6_var.h>
57 #include <netinet6/in6_var.h>
58 
59 #define	DEBUG_MOD_NAME	route_ctl
60 #define	DEBUG_MAX_LEVEL	LOG_DEBUG
61 #include <net/route/route_debug.h>
62 _DECLARE_DEBUG(LOG_INFO);
63 
64 /*
65  * This file contains control plane routing tables functions.
66  *
67  * All functions assumes they are called in net epoch.
68  */
69 
70 union sockaddr_union {
71 	struct sockaddr		sa;
72 	struct sockaddr_in	sin;
73 	struct sockaddr_in6	sin6;
74 	char			_buf[32];
75 };
76 
77 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
78     struct rib_cmd_info *rc);
79 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
80     struct rt_addrinfo *info, struct route_nhop_data *nhd_orig,
81     struct rib_cmd_info *rc);
82 
83 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt,
84     struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc);
85 #ifdef ROUTE_MPATH
86 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
87     struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
88     int op_flags, struct rib_cmd_info *rc);
89 #endif
90 
91 static int add_route(struct rib_head *rnh, struct rtentry *rt,
92     struct route_nhop_data *rnd, struct rib_cmd_info *rc);
93 static int delete_route(struct rib_head *rnh, struct rtentry *rt,
94     struct rib_cmd_info *rc);
95 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
96     int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc);
97 
98 static bool fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
99     struct sockaddr **pmask);
100 static int get_prio_from_info(const struct rt_addrinfo *info);
101 static int nhop_get_prio(const struct nhop_object *nh);
102 
103 #ifdef ROUTE_MPATH
104 static bool rib_can_multipath(struct rib_head *rh);
105 #endif
106 
107 /* Per-vnet multipath routing configuration */
108 SYSCTL_DECL(_net_route);
109 #define	V_rib_route_multipath	VNET(rib_route_multipath)
110 #ifdef ROUTE_MPATH
111 #define _MP_FLAGS	CTLFLAG_RW
112 VNET_DEFINE(u_int, rib_route_multipath) = 1;
113 #else
114 #define _MP_FLAGS	CTLFLAG_RD
115 VNET_DEFINE(u_int, rib_route_multipath) = 0;
116 #endif
117 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET,
118     &VNET_NAME(rib_route_multipath), 0, "Enable route multipath");
119 #undef _MP_FLAGS
120 
121 #ifdef ROUTE_MPATH
122 VNET_DEFINE(u_int, fib_hash_outbound) = 0;
123 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET,
124     &VNET_NAME(fib_hash_outbound), 0,
125     "Compute flowid for locally-originated packets");
126 
127 /* Default entropy to add to the hash calculation for the outbound connections*/
128 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = {
129 	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
130 	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
131 	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
132 	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
133 	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
134 };
135 #endif
136 
137 #if defined(INET) && defined(INET6)
138 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops");
139 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop)
140 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1;
141 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET,
142     &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address");
143 #endif
144 
145 /* Debug bits */
146 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
147 
148 static struct rib_head *
get_rnh(uint32_t fibnum,const struct rt_addrinfo * info)149 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info)
150 {
151 	struct rib_head *rnh;
152 	struct sockaddr *dst;
153 
154 	KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum"));
155 
156 	dst = info->rti_info[RTAX_DST];
157 	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
158 
159 	return (rnh);
160 }
161 
162 #if defined(INET) && defined(INET6)
163 bool
rib_can_4o6_nhop(void)164 rib_can_4o6_nhop(void)
165 {
166 	return (!!V_rib_route_ipv6_nexthop);
167 }
168 #endif
169 
170 #ifdef ROUTE_MPATH
171 static bool
rib_can_multipath(struct rib_head * rh)172 rib_can_multipath(struct rib_head *rh)
173 {
174 	int result;
175 
176 	CURVNET_SET(rh->rib_vnet);
177 	result = !!V_rib_route_multipath;
178 	CURVNET_RESTORE();
179 
180 	return (result);
181 }
182 
183 /*
184  * Check is nhop is multipath-eligible.
185  * Avoid nhops without gateways and redirects.
186  *
187  * Returns 1 for multipath-eligible nexthop,
188  * 0 otherwise.
189  */
190 bool
nhop_can_multipath(const struct nhop_object * nh)191 nhop_can_multipath(const struct nhop_object *nh)
192 {
193 
194 	if ((nh->nh_flags & NHF_MULTIPATH) != 0)
195 		return (1);
196 	if ((nh->nh_flags & NHF_GATEWAY) == 0)
197 		return (0);
198 	if ((nh->nh_flags & NHF_REDIRECT) != 0)
199 		return (0);
200 
201 	return (1);
202 }
203 #endif
204 
205 static int
get_info_weight(const struct rt_addrinfo * info,uint32_t default_weight)206 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight)
207 {
208 	uint32_t weight;
209 
210 	if (info->rti_mflags & RTV_WEIGHT)
211 		weight = info->rti_rmx->rmx_weight;
212 	else
213 		weight = default_weight;
214 	/* Keep upper 1 byte for adm distance purposes */
215 	if (weight > RT_MAX_WEIGHT)
216 		weight = RT_MAX_WEIGHT;
217 	else if (weight == 0)
218 		weight = default_weight;
219 
220 	return (weight);
221 }
222 
223 /*
224  * File-local concept for distingushing between the normal and
225  * RTF_PINNED routes tha can override the "normal" one.
226  */
227 #define	NH_PRIORITY_HIGH	2
228 #define	NH_PRIORITY_NORMAL	1
229 static int
get_prio_from_info(const struct rt_addrinfo * info)230 get_prio_from_info(const struct rt_addrinfo *info)
231 {
232 	if (info->rti_flags & RTF_PINNED)
233 		return (NH_PRIORITY_HIGH);
234 	return (NH_PRIORITY_NORMAL);
235 }
236 
237 static int
nhop_get_prio(const struct nhop_object * nh)238 nhop_get_prio(const struct nhop_object *nh)
239 {
240 	if (NH_IS_PINNED(nh))
241 		return (NH_PRIORITY_HIGH);
242 	return (NH_PRIORITY_NORMAL);
243 }
244 
245 /*
246  * Check if specified @gw matches gw data in the nexthop @nh.
247  *
248  * Returns true if matches, false otherwise.
249  */
250 bool
match_nhop_gw(const struct nhop_object * nh,const struct sockaddr * gw)251 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw)
252 {
253 
254 	if (nh->gw_sa.sa_family != gw->sa_family)
255 		return (false);
256 
257 	switch (gw->sa_family) {
258 	case AF_INET:
259 		return (nh->gw4_sa.sin_addr.s_addr ==
260 		    ((const struct sockaddr_in *)gw)->sin_addr.s_addr);
261 	case AF_INET6:
262 		{
263 			const struct sockaddr_in6 *gw6;
264 			gw6 = (const struct sockaddr_in6 *)gw;
265 
266 			/*
267 			 * Currently (2020-09) IPv6 gws in kernel have their
268 			 * scope embedded. Once this becomes false, this code
269 			 * has to be revisited.
270 			 */
271 			if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr,
272 			    &gw6->sin6_addr))
273 				return (true);
274 			return (false);
275 		}
276 	case AF_LINK:
277 		{
278 			const struct sockaddr_dl *sdl;
279 			sdl = (const struct sockaddr_dl *)gw;
280 			return (nh->gwl_sa.sdl_index == sdl->sdl_index);
281 		}
282 	default:
283 		return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0);
284 	}
285 
286 	/* NOTREACHED */
287 	return (false);
288 }
289 
290 /*
291  * Matches all nexthop with given @gw.
292  * Can be used as rib_filter_f callback.
293  */
294 int
rib_match_gw(const struct rtentry * rt,const struct nhop_object * nh,void * gw_sa)295 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa)
296 {
297 	const struct sockaddr *gw = (const struct sockaddr *)gw_sa;
298 
299 	return (match_nhop_gw(nh, gw));
300 }
301 
302 struct gw_filter_data {
303 	const struct sockaddr *gw;
304 	int count;
305 };
306 
307 /*
308  * Matches first occurence of the gateway provided in @gwd
309  */
310 static int
match_gw_one(const struct rtentry * rt,const struct nhop_object * nh,void * _data)311 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data)
312 {
313 	struct gw_filter_data *gwd = (struct gw_filter_data *)_data;
314 
315 	/* Return only first match to make rtsock happy */
316 	if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0)
317 		return (1);
318 	return (0);
319 }
320 
321 /*
322  * Checks if data in @info matches nexhop @nh.
323  *
324  * Returns 0 on success,
325  * ESRCH if not matched,
326  * ENOENT if filter function returned false
327  */
328 int
check_info_match_nhop(const struct rt_addrinfo * info,const struct rtentry * rt,const struct nhop_object * nh)329 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt,
330     const struct nhop_object *nh)
331 {
332 	const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY];
333 
334 	if (info->rti_filter != NULL) {
335 	    if (info->rti_filter(rt, nh, info->rti_filterdata) == 0)
336 		    return (ENOENT);
337 	    else
338 		    return (0);
339 	}
340 	if ((gw != NULL) && !match_nhop_gw(nh, gw))
341 		return (ESRCH);
342 
343 	return (0);
344 }
345 
346 /*
347  * Runs exact prefix match based on @dst and @netmask.
348  * Returns matched @rtentry if found or NULL.
349  * If rtentry was found, saves nexthop / weight value into @rnd.
350  */
351 static struct rtentry *
lookup_prefix_bysa(struct rib_head * rnh,const struct sockaddr * dst,const struct sockaddr * netmask,struct route_nhop_data * rnd)352 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst,
353     const struct sockaddr *netmask, struct route_nhop_data *rnd)
354 {
355 	struct rtentry *rt;
356 
357 	RIB_LOCK_ASSERT(rnh);
358 
359 	rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head);
360 	if (rt != NULL) {
361 		rnd->rnd_nhop = rt->rt_nhop;
362 		rnd->rnd_weight = rt->rt_weight;
363 	} else {
364 		rnd->rnd_nhop = NULL;
365 		rnd->rnd_weight = 0;
366 	}
367 
368 	return (rt);
369 }
370 
371 struct rtentry *
lookup_prefix_rt(struct rib_head * rnh,const struct rtentry * rt,struct route_nhop_data * rnd)372 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt,
373     struct route_nhop_data *rnd)
374 {
375 	return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd));
376 }
377 
378 /*
379  * Runs exact prefix match based on dst/netmask from @info.
380  * Assumes RIB lock is held.
381  * Returns matched @rtentry if found or NULL.
382  * If rtentry was found, saves nexthop / weight value into @rnd.
383  */
384 struct rtentry *
lookup_prefix(struct rib_head * rnh,const struct rt_addrinfo * info,struct route_nhop_data * rnd)385 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info,
386     struct route_nhop_data *rnd)
387 {
388 	struct rtentry *rt;
389 
390 	rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST],
391 	    info->rti_info[RTAX_NETMASK], rnd);
392 
393 	return (rt);
394 }
395 
396 const struct rtentry *
rib_lookup_prefix_plen(struct rib_head * rnh,struct sockaddr * dst,int plen,struct route_nhop_data * rnd)397 rib_lookup_prefix_plen(struct rib_head *rnh, struct sockaddr *dst, int plen,
398     struct route_nhop_data *rnd)
399 {
400 	union sockaddr_union mask_storage;
401 	struct sockaddr *netmask = &mask_storage.sa;
402 
403 	if (fill_pxmask_family(dst->sa_family, plen, dst, &netmask))
404 		return (lookup_prefix_bysa(rnh, dst, netmask, rnd));
405 	return (NULL);
406 }
407 
408 static bool
fill_pxmask_family(int family,int plen,struct sockaddr * _dst,struct sockaddr ** pmask)409 fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
410     struct sockaddr **pmask)
411 {
412 	if (plen == -1) {
413 		*pmask = NULL;
414 		return (true);
415 	}
416 
417 	switch (family) {
418 #ifdef INET
419 	case AF_INET:
420 		{
421 			struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask);
422 			struct sockaddr_in *dst= (struct sockaddr_in *)_dst;
423 
424 			memset(mask, 0, sizeof(*mask));
425 			mask->sin_family = family;
426 			mask->sin_len = sizeof(*mask);
427 			if (plen == 32)
428 				*pmask = NULL;
429 			else if (plen > 32 || plen < 0)
430 				return (false);
431 			else {
432 				uint32_t daddr, maddr;
433 				maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
434 				mask->sin_addr.s_addr = maddr;
435 				daddr = dst->sin_addr.s_addr;
436 				daddr = htonl(ntohl(daddr) & ntohl(maddr));
437 				dst->sin_addr.s_addr = daddr;
438 			}
439 			return (true);
440 		}
441 		break;
442 #endif
443 #ifdef INET6
444 	case AF_INET6:
445 		{
446 			struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask);
447 			struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst;
448 
449 			memset(mask, 0, sizeof(*mask));
450 			mask->sin6_family = family;
451 			mask->sin6_len = sizeof(*mask);
452 			if (plen == 128)
453 				*pmask = NULL;
454 			else if (plen > 128 || plen < 0)
455 				return (false);
456 			else {
457 				ip6_writemask(&mask->sin6_addr, plen);
458 				IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr);
459 			}
460 			return (true);
461 		}
462 		break;
463 #endif
464 	}
465 	return (false);
466 }
467 
468 /*
469  * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd
470  * to the routing table.
471  *
472  * @fibnum: verified kernel rtable id to insert route to
473  * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
474  * @plen: prefix length (or -1 if host route or not applicable for AF)
475  * @op_flags: combination of RTM_F_ flags
476  * @rc: storage to report operation result
477  *
478  * Returns 0 on success.
479  */
480 int
rib_add_route_px(uint32_t fibnum,struct sockaddr * dst,int plen,struct route_nhop_data * rnd,int op_flags,struct rib_cmd_info * rc)481 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
482     struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc)
483 {
484 	union sockaddr_union mask_storage;
485 	struct sockaddr *netmask = &mask_storage.sa;
486 	struct rtentry *rt = NULL;
487 
488 	NET_EPOCH_ASSERT();
489 
490 	bzero(rc, sizeof(struct rib_cmd_info));
491 	rc->rc_cmd = RTM_ADD;
492 
493 	struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
494 	if (rnh == NULL)
495 		return (EAFNOSUPPORT);
496 
497 	if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
498 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
499 		return (EINVAL);
500 	}
501 
502 	if (op_flags & RTM_F_CREATE) {
503 		if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) {
504 			FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed");
505 			return (ENOMEM);
506 		}
507 	} else {
508 		struct route_nhop_data rnd_tmp;
509 		RIB_RLOCK_TRACKER;
510 
511 		RIB_RLOCK(rnh);
512 		rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd_tmp);
513 		RIB_RUNLOCK(rnh);
514 
515 		if (rt == NULL)
516 			return (ESRCH);
517 	}
518 
519 	return (add_route_flags(rnh, rt, rnd, op_flags, rc));
520 }
521 
522 /*
523  * Attempts to delete @dst/plen prefix matching gateway @gw from the
524  *  routing rable.
525  *
526  * @fibnum: rtable id to remove route from
527  * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
528  * @plen: prefix length (or -1 if host route or not applicable for AF)
529  * @gw: gateway to match
530  * @op_flags: combination of RTM_F_ flags
531  * @rc: storage to report operation result
532  *
533  * Returns 0 on success.
534  */
535 int
rib_del_route_px_gw(uint32_t fibnum,struct sockaddr * dst,int plen,const struct sockaddr * gw,int op_flags,struct rib_cmd_info * rc)536 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen,
537     const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc)
538 {
539 	struct gw_filter_data gwd = { .gw = gw };
540 
541 	return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc));
542 }
543 
544 /*
545  * Attempts to delete @dst/plen prefix matching @filter_func from the
546  *  routing rable.
547  *
548  * @fibnum: rtable id to remove route from
549  * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
550  * @plen: prefix length (or -1 if host route or not applicable for AF)
551  * @filter_func: func to be called for each nexthop of the prefix for matching
552  * @filter_arg: argument to pass to @filter_func
553  * @op_flags: combination of RTM_F_ flags
554  * @rc: storage to report operation result
555  *
556  * Returns 0 on success.
557  */
558 int
rib_del_route_px(uint32_t fibnum,struct sockaddr * dst,int plen,rib_filter_f_t * filter_func,void * filter_arg,int op_flags,struct rib_cmd_info * rc)559 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
560     rib_filter_f_t *filter_func, void *filter_arg, int op_flags,
561     struct rib_cmd_info *rc)
562 {
563 	union sockaddr_union mask_storage;
564 	struct sockaddr *netmask = &mask_storage.sa;
565 	int error;
566 
567 	NET_EPOCH_ASSERT();
568 
569 	bzero(rc, sizeof(struct rib_cmd_info));
570 	rc->rc_cmd = RTM_DELETE;
571 
572 	struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
573 	if (rnh == NULL)
574 		return (EAFNOSUPPORT);
575 
576 	if (dst->sa_len > sizeof(mask_storage)) {
577 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len);
578 		return (EINVAL);
579 	}
580 
581 	if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
582 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
583 		return (EINVAL);
584 	}
585 
586 	int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL;
587 
588 	RIB_WLOCK(rnh);
589 	struct route_nhop_data rnd;
590 	struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
591 	if (rt != NULL) {
592 		error = rt_delete_conditional(rnh, rt, prio, filter_func,
593 		    filter_arg, rc);
594 	} else
595 		error = ESRCH;
596 	RIB_WUNLOCK(rnh);
597 
598 	if (error != 0)
599 		return (error);
600 
601 	rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
602 
603 	if (rc->rc_cmd == RTM_DELETE)
604 		rt_free(rc->rc_rt);
605 #ifdef ROUTE_MPATH
606 	else {
607 		/*
608 		 * Deleting 1 path may result in RTM_CHANGE to
609 		 * a different mpath group/nhop.
610 		 * Free old mpath group.
611 		 */
612 		nhop_free_any(rc->rc_nh_old);
613 	}
614 #endif
615 
616 	return (0);
617 }
618 
619 /*
620  * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh.
621  * @rt: route to copy.
622  * @rnd_src: nhop and weight. Multipath routes are not supported
623  * @rh_dst: target rtable.
624  * @rc: operation result storage
625  *
626  * Return 0 on success.
627  */
628 int
rib_copy_route(struct rtentry * rt,const struct route_nhop_data * rnd_src,struct rib_head * rh_dst,struct rib_cmd_info * rc)629 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src,
630     struct rib_head *rh_dst, struct rib_cmd_info *rc)
631 {
632 	struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop;
633 	int error;
634 
635 	MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0);
636 
637 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
638 		char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE];
639 		nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf));
640 		rt_print_buf(rt, rtbuf, sizeof(rtbuf));
641 		FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u",
642 		    rtbuf, nhbuf, nhop_get_fibnum(nh_src));
643 	}
644 	struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family);
645 	if (nh == NULL) {
646 		FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop");
647 		return (ENOMEM);
648 	}
649 	nhop_copy(nh, rnd_src->rnd_nhop);
650 	nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop));
651 	nhop_set_fibnum(nh, rh_dst->rib_fibnum);
652 	nh = nhop_get_nhop_internal(rh_dst, nh, &error);
653 	if (error != 0) {
654 		FIB_RH_LOG(LOG_INFO, rh_dst,
655 		    "unable to finalize new nexthop: error %d", error);
656 		return (ENOMEM);
657 	}
658 
659 	struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt));
660 	if (rt_new == NULL) {
661 		FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry");
662 		nhop_free(nh);
663 		return (ENOMEM);
664 	}
665 
666 	struct route_nhop_data rnd = {
667 		.rnd_nhop = nh,
668 		.rnd_weight = rnd_src->rnd_weight
669 	};
670 	int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0);
671 	error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc);
672 
673 	if (error != 0) {
674 		IF_DEBUG_LEVEL(LOG_DEBUG2) {
675 			char buf[NHOP_PRINT_BUFSIZE];
676 			rt_print_buf(rt_new, buf, sizeof(buf));
677 			FIB_RH_LOG(LOG_DEBUG, rh_dst,
678 			    "Unable to add route %s: error %d", buf, error);
679 		}
680 		nhop_free(nh);
681 		rt_free_immediate(rt_new);
682 	}
683 	return (error);
684 }
685 
686 /*
687  * Adds route defined by @info into the kernel table specified by @fibnum and
688  * sa_family in @info->rti_info[RTAX_DST].
689  *
690  * Returns 0 on success and fills in operation metadata into @rc.
691  */
692 int
rib_add_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)693 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info,
694     struct rib_cmd_info *rc)
695 {
696 	struct rib_head *rnh;
697 	int error;
698 
699 	NET_EPOCH_ASSERT();
700 
701 	rnh = get_rnh(fibnum, info);
702 	if (rnh == NULL)
703 		return (EAFNOSUPPORT);
704 
705 	/*
706 	 * Check consistency between RTF_HOST flag and netmask
707 	 * existence.
708 	 */
709 	if (info->rti_flags & RTF_HOST)
710 		info->rti_info[RTAX_NETMASK] = NULL;
711 	else if (info->rti_info[RTAX_NETMASK] == NULL) {
712 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask");
713 		return (EINVAL);
714 	}
715 
716 	bzero(rc, sizeof(struct rib_cmd_info));
717 	rc->rc_cmd = RTM_ADD;
718 
719 	error = add_route_byinfo(rnh, info, rc);
720 	if (error == 0)
721 		rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
722 
723 	return (error);
724 }
725 
726 static int
add_route_byinfo(struct rib_head * rnh,struct rt_addrinfo * info,struct rib_cmd_info * rc)727 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
728     struct rib_cmd_info *rc)
729 {
730 	struct route_nhop_data rnd_add;
731 	struct nhop_object *nh;
732 	struct rtentry *rt;
733 	struct sockaddr *dst, *gateway, *netmask;
734 	int error;
735 
736 	dst = info->rti_info[RTAX_DST];
737 	gateway = info->rti_info[RTAX_GATEWAY];
738 	netmask = info->rti_info[RTAX_NETMASK];
739 
740 	if ((info->rti_flags & RTF_GATEWAY) && !gateway) {
741 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw");
742 		return (EINVAL);
743 	}
744 	if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) {
745 		FIB_RH_LOG(LOG_DEBUG, rnh,
746 		    "error: invalid dst/gateway family combination (%d, %d)",
747 		    dst->sa_family, gateway->sa_family);
748 		return (EINVAL);
749 	}
750 
751 	if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) {
752 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d",
753 		    dst->sa_len);
754 		return (EINVAL);
755 	}
756 
757 	if (info->rti_ifa == NULL) {
758 		error = rt_getifa_fib(info, rnh->rib_fibnum);
759 		if (error)
760 			return (error);
761 	}
762 
763 	if ((rt = rt_alloc(rnh, dst, netmask)) == NULL)
764 		return (ENOBUFS);
765 
766 	error = nhop_create_from_info(rnh, info, &nh);
767 	if (error != 0) {
768 		rt_free_immediate(rt);
769 		return (error);
770 	}
771 
772 	rnd_add.rnd_nhop = nh;
773 	rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT);
774 
775 	int op_flags = RTM_F_CREATE;
776 
777 	/*
778 	 * Set the desired action when the route already exists:
779 	 * If RTF_PINNED is present, assume the direct kernel routes that cannot be multipath.
780 	 * Otherwise, append the path.
781 	 */
782 	op_flags |= (info->rti_flags & RTF_PINNED) ? RTM_F_REPLACE : RTM_F_APPEND;
783 
784 	return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc));
785 }
786 
787 static int
add_route_flags(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_add,int op_flags,struct rib_cmd_info * rc)788 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add,
789     int op_flags, struct rib_cmd_info *rc)
790 {
791 	struct route_nhop_data rnd_orig;
792 	struct nhop_object *nh;
793 	struct rtentry *rt_orig;
794 	int error = 0;
795 
796 	MPASS(rt != NULL);
797 
798 	nh = rnd_add->rnd_nhop;
799 
800 	RIB_WLOCK(rnh);
801 
802 	rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig);
803 
804 	if (rt_orig == NULL) {
805 		if (op_flags & RTM_F_CREATE)
806 			error = add_route(rnh, rt, rnd_add, rc);
807 		else
808 			error = ESRCH; /* no entry but creation was not required */
809 		RIB_WUNLOCK(rnh);
810 		if (error != 0)
811 			goto out;
812 		return (0);
813 	}
814 
815 	if (op_flags & RTM_F_EXCL) {
816 		/* We have existing route in the RIB but not allowed to replace. */
817 		RIB_WUNLOCK(rnh);
818 		error = EEXIST;
819 		goto out;
820 	}
821 
822 	/* Now either append or replace */
823 	if (op_flags & RTM_F_REPLACE) {
824 		if (nhop_get_prio(rnd_orig.rnd_nhop) == NH_PRIORITY_HIGH) {
825 			/* Old path is "better" (e.g. has PINNED flag set) */
826 			RIB_WUNLOCK(rnh);
827 			error = EEXIST;
828 			goto out;
829 		}
830 		change_route(rnh, rt_orig, rnd_add, rc);
831 		RIB_WUNLOCK(rnh);
832 		nh = rc->rc_nh_old;
833 		goto out;
834 	}
835 
836 	RIB_WUNLOCK(rnh);
837 
838 #ifdef ROUTE_MPATH
839 	if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) &&
840 	    nhop_can_multipath(rnd_add->rnd_nhop) &&
841 	    nhop_can_multipath(rnd_orig.rnd_nhop)) {
842 
843 		for (int i = 0; i < RIB_MAX_RETRIES; i++) {
844 			error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig,
845 			    op_flags, rc);
846 			if (error != EAGAIN)
847 				break;
848 			RTSTAT_INC(rts_add_retry);
849 		}
850 
851 		/*
852 		 *  Original nhop reference is unused in any case.
853 		 */
854 		nhop_free_any(rnd_add->rnd_nhop);
855 		if (op_flags & RTM_F_CREATE) {
856 			if (error != 0 || rc->rc_cmd != RTM_ADD)
857 				rt_free_immediate(rt);
858 		}
859 		return (error);
860 	}
861 #endif
862 	/* Out of options - free state and return error */
863 	error = EEXIST;
864 out:
865 	if (op_flags & RTM_F_CREATE)
866 		rt_free_immediate(rt);
867 	nhop_free_any(nh);
868 
869 	return (error);
870 }
871 
872 #ifdef ROUTE_MPATH
873 static int
add_route_flags_mpath(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_add,struct route_nhop_data * rnd_orig,int op_flags,struct rib_cmd_info * rc)874 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
875     struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
876     int op_flags, struct rib_cmd_info *rc)
877 {
878 	RIB_RLOCK_TRACKER;
879 	struct route_nhop_data rnd_new;
880 	int error = 0;
881 
882 	error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new);
883 	if (error != 0) {
884 		if (error == EAGAIN) {
885 			/*
886 			 * Group creation failed, most probably because
887 			 * @rnd_orig data got scheduled for deletion.
888 			 * Refresh @rnd_orig data and retry.
889 			 */
890 			RIB_RLOCK(rnh);
891 			lookup_prefix_rt(rnh, rt, rnd_orig);
892 			RIB_RUNLOCK(rnh);
893 			if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) {
894 				/* In this iteration route doesn't exist */
895 				error = ENOENT;
896 			}
897 		}
898 		return (error);
899 	}
900 	error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
901 	if (error != 0)
902 		return (error);
903 
904 	if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) {
905 		/*
906 		 * First multipath route got installed. Enable local
907 		 * outbound connections hashing.
908 		 */
909 		if (bootverbose)
910 			printf("FIB: enabled flowid calculation for locally-originated packets\n");
911 		V_fib_hash_outbound = 1;
912 	}
913 
914 	return (0);
915 }
916 #endif
917 
918 /*
919  * Removes route defined by @info from the kernel table specified by @fibnum and
920  * sa_family in @info->rti_info[RTAX_DST].
921  *
922  * Returns 0 on success and fills in operation metadata into @rc.
923  */
924 int
rib_del_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)925 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc)
926 {
927 	struct rib_head *rnh;
928 	struct sockaddr *dst, *netmask;
929 	struct sockaddr_storage mdst;
930 	int error;
931 
932 	NET_EPOCH_ASSERT();
933 
934 	rnh = get_rnh(fibnum, info);
935 	if (rnh == NULL)
936 		return (EAFNOSUPPORT);
937 
938 	bzero(rc, sizeof(struct rib_cmd_info));
939 	rc->rc_cmd = RTM_DELETE;
940 
941 	dst = info->rti_info[RTAX_DST];
942 	netmask = info->rti_info[RTAX_NETMASK];
943 
944 	if (netmask != NULL) {
945 		/* Ensure @dst is always properly masked */
946 		if (dst->sa_len > sizeof(mdst)) {
947 			FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large");
948 			return (EINVAL);
949 		}
950 		rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
951 		dst = (struct sockaddr *)&mdst;
952 	}
953 
954 	rib_filter_f_t *filter_func = NULL;
955 	void *filter_arg = NULL;
956 	struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] };
957 
958 	if (info->rti_filter != NULL) {
959 		filter_func = info->rti_filter;
960 		filter_arg = info->rti_filterdata;
961 	} else if (gwd.gw != NULL) {
962 		filter_func = match_gw_one;
963 		filter_arg = &gwd;
964 	}
965 
966 	int prio = get_prio_from_info(info);
967 
968 	RIB_WLOCK(rnh);
969 	struct route_nhop_data rnd;
970 	struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
971 	if (rt != NULL) {
972 		error = rt_delete_conditional(rnh, rt, prio, filter_func,
973 		    filter_arg, rc);
974 	} else
975 		error = ESRCH;
976 	RIB_WUNLOCK(rnh);
977 
978 	if (error != 0)
979 		return (error);
980 
981 	rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
982 
983 	if (rc->rc_cmd == RTM_DELETE)
984 		rt_free(rc->rc_rt);
985 #ifdef ROUTE_MPATH
986 	else {
987 		/*
988 		 * Deleting 1 path may result in RTM_CHANGE to
989 		 * a different mpath group/nhop.
990 		 * Free old mpath group.
991 		 */
992 		nhop_free_any(rc->rc_nh_old);
993 	}
994 #endif
995 
996 	return (0);
997 }
998 
999 /*
1000  * Conditionally unlinks rtentry paths from @rnh matching @cb.
1001  * Returns 0 on success with operation result stored in @rc.
1002  * On error, returns:
1003  * ESRCH - if prefix was not found or filter function failed to match
1004  * EADDRINUSE - if trying to delete higher priority route.
1005  */
1006 static int
rt_delete_conditional(struct rib_head * rnh,struct rtentry * rt,int prio,rib_filter_f_t * cb,void * cbdata,struct rib_cmd_info * rc)1007 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
1008     int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc)
1009 {
1010 	struct nhop_object *nh = rt->rt_nhop;
1011 
1012 #ifdef ROUTE_MPATH
1013 	if (NH_IS_NHGRP(nh)) {
1014 		struct nhgrp_object *nhg = (struct nhgrp_object *)nh;
1015 		struct route_nhop_data rnd;
1016 		int error;
1017 
1018 		if (cb == NULL)
1019 			return (ESRCH);
1020 		error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd);
1021 		if (error == 0) {
1022 			if (rnd.rnd_nhgrp == nhg) {
1023 				/* No match, unreference new group and return. */
1024 				nhop_free_any(rnd.rnd_nhop);
1025 				return (ESRCH);
1026 			}
1027 			error = change_route(rnh, rt, &rnd, rc);
1028 		}
1029 		return (error);
1030 	}
1031 #endif
1032 	if (cb != NULL && !cb(rt, nh, cbdata))
1033 		return (ESRCH);
1034 
1035 	if (prio < nhop_get_prio(nh))
1036 		return (EADDRINUSE);
1037 
1038 	return (delete_route(rnh, rt, rc));
1039 }
1040 
1041 int
rib_change_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)1042 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
1043     struct rib_cmd_info *rc)
1044 {
1045 	RIB_RLOCK_TRACKER;
1046 	struct route_nhop_data rnd_orig;
1047 	struct rib_head *rnh;
1048 	struct rtentry *rt;
1049 	int error;
1050 
1051 	NET_EPOCH_ASSERT();
1052 
1053 	rnh = get_rnh(fibnum, info);
1054 	if (rnh == NULL)
1055 		return (EAFNOSUPPORT);
1056 
1057 	bzero(rc, sizeof(struct rib_cmd_info));
1058 	rc->rc_cmd = RTM_CHANGE;
1059 
1060 	/* Check if updated gateway exists */
1061 	if ((info->rti_flags & RTF_GATEWAY) &&
1062 	    (info->rti_info[RTAX_GATEWAY] == NULL)) {
1063 
1064 		/*
1065 		 * route(8) adds RTF_GATEWAY flag if -interface is not set.
1066 		 * Remove RTF_GATEWAY to enforce consistency and maintain
1067 		 * compatibility..
1068 		 */
1069 		info->rti_flags &= ~RTF_GATEWAY;
1070 	}
1071 
1072 	/*
1073 	 * route change is done in multiple steps, with dropping and
1074 	 * reacquiring lock. In the situations with multiple processes
1075 	 * changes the same route in can lead to the case when route
1076 	 * is changed between the steps. Address it by retrying the operation
1077 	 * multiple times before failing.
1078 	 */
1079 
1080 	RIB_RLOCK(rnh);
1081 	rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
1082 	    info->rti_info[RTAX_NETMASK], &rnh->head);
1083 
1084 	if (rt == NULL) {
1085 		RIB_RUNLOCK(rnh);
1086 		return (ESRCH);
1087 	}
1088 
1089 	rnd_orig.rnd_nhop = rt->rt_nhop;
1090 	rnd_orig.rnd_weight = rt->rt_weight;
1091 
1092 	RIB_RUNLOCK(rnh);
1093 
1094 	for (int i = 0; i < RIB_MAX_RETRIES; i++) {
1095 		error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc);
1096 		if (error != EAGAIN)
1097 			break;
1098 	}
1099 
1100 	return (error);
1101 }
1102 
1103 static int
change_nhop(struct rib_head * rnh,struct rt_addrinfo * info,struct nhop_object * nh_orig,struct nhop_object ** nh_new)1104 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
1105     struct nhop_object *nh_orig, struct nhop_object **nh_new)
1106 {
1107 	int error;
1108 
1109 	/*
1110 	 * New gateway could require new ifaddr, ifp;
1111 	 * flags may also be different; ifp may be specified
1112 	 * by ll sockaddr when protocol address is ambiguous
1113 	 */
1114 	if (((nh_orig->nh_flags & NHF_GATEWAY) &&
1115 	    info->rti_info[RTAX_GATEWAY] != NULL) ||
1116 	    info->rti_info[RTAX_IFP] != NULL ||
1117 	    (info->rti_info[RTAX_IFA] != NULL &&
1118 	     !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) {
1119 		error = rt_getifa_fib(info, rnh->rib_fibnum);
1120 
1121 		if (error != 0) {
1122 			info->rti_ifa = NULL;
1123 			return (error);
1124 		}
1125 	}
1126 
1127 	error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new);
1128 	info->rti_ifa = NULL;
1129 
1130 	return (error);
1131 }
1132 
1133 #ifdef ROUTE_MPATH
1134 static int
change_mpath_route(struct rib_head * rnh,struct rtentry * rt,struct rt_addrinfo * info,struct route_nhop_data * rnd_orig,struct rib_cmd_info * rc)1135 change_mpath_route(struct rib_head *rnh, struct rtentry *rt,
1136     struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1137     struct rib_cmd_info *rc)
1138 {
1139 	int error = 0, found_idx = 0;
1140 	struct nhop_object *nh_orig = NULL, *nh_new;
1141 	struct route_nhop_data rnd_new = {};
1142 	const struct weightened_nhop *wn = NULL;
1143 	struct weightened_nhop *wn_new;
1144 	uint32_t num_nhops;
1145 
1146 	wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops);
1147 	for (int i = 0; i < num_nhops; i++) {
1148 		if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) {
1149 			nh_orig = wn[i].nh;
1150 			found_idx = i;
1151 			break;
1152 		}
1153 	}
1154 
1155 	if (nh_orig == NULL)
1156 		return (ESRCH);
1157 
1158 	error = change_nhop(rnh, info, nh_orig, &nh_new);
1159 	if (error != 0)
1160 		return (error);
1161 
1162 	wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop),
1163 	    M_TEMP, M_NOWAIT | M_ZERO);
1164 	if (wn_new == NULL) {
1165 		nhop_free(nh_new);
1166 		return (EAGAIN);
1167 	}
1168 
1169 	memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop));
1170 	wn_new[found_idx].nh = nh_new;
1171 	wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight);
1172 
1173 	error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp);
1174 	nhop_free(nh_new);
1175 	free(wn_new, M_TEMP);
1176 
1177 	if (error != 0)
1178 		return (error);
1179 
1180 	error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1181 
1182 	return (error);
1183 }
1184 #endif
1185 
1186 static int
change_route_byinfo(struct rib_head * rnh,struct rtentry * rt,struct rt_addrinfo * info,struct route_nhop_data * rnd_orig,struct rib_cmd_info * rc)1187 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
1188     struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1189     struct rib_cmd_info *rc)
1190 {
1191 	int error = 0;
1192 	struct nhop_object *nh_orig;
1193 	struct route_nhop_data rnd_new;
1194 
1195 	nh_orig = rnd_orig->rnd_nhop;
1196 	if (nh_orig == NULL)
1197 		return (ESRCH);
1198 
1199 #ifdef ROUTE_MPATH
1200 	if (NH_IS_NHGRP(nh_orig))
1201 		return (change_mpath_route(rnh, rt, info, rnd_orig, rc));
1202 #endif
1203 
1204 	rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight);
1205 	error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop);
1206 	if (error != 0)
1207 		return (error);
1208 	error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1209 
1210 	return (error);
1211 }
1212 
1213 /*
1214  * Insert @rt with nhop data from @rnd_new to @rnh.
1215  * Returns 0 on success and stores operation results in @rc.
1216  */
1217 static int
add_route(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd,struct rib_cmd_info * rc)1218 add_route(struct rib_head *rnh, struct rtentry *rt,
1219     struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1220 {
1221 	struct radix_node *rn;
1222 
1223 	RIB_WLOCK_ASSERT(rnh);
1224 
1225 	rt->rt_nhop = rnd->rnd_nhop;
1226 	rt->rt_weight = rnd->rnd_weight;
1227 	rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes);
1228 
1229 	if (rn != NULL) {
1230 		if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
1231 			tmproutes_update(rnh, rt, rnd->rnd_nhop);
1232 
1233 		/* Finalize notification */
1234 		rib_bump_gen(rnh);
1235 		rnh->rnh_prefixes++;
1236 
1237 		rc->rc_cmd = RTM_ADD;
1238 		rc->rc_rt = rt;
1239 		rc->rc_nh_old = NULL;
1240 		rc->rc_nh_new = rnd->rnd_nhop;
1241 		rc->rc_nh_weight = rnd->rnd_weight;
1242 
1243 		rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1244 		return (0);
1245 	}
1246 
1247 	/* Existing route or memory allocation failure. */
1248 	return (EEXIST);
1249 }
1250 
1251 /*
1252  * Unconditionally deletes @rt from @rnh.
1253  */
1254 static int
delete_route(struct rib_head * rnh,struct rtentry * rt,struct rib_cmd_info * rc)1255 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc)
1256 {
1257 	RIB_WLOCK_ASSERT(rnh);
1258 
1259 	/* Route deletion requested. */
1260 	struct radix_node *rn;
1261 
1262 	rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head);
1263 	if (rn == NULL)
1264 		return (ESRCH);
1265 	rt = RNTORT(rn);
1266 	rt->rte_flags &= ~RTF_UP;
1267 
1268 	rib_bump_gen(rnh);
1269 	rnh->rnh_prefixes--;
1270 
1271 	rc->rc_cmd = RTM_DELETE;
1272 	rc->rc_rt = rt;
1273 	rc->rc_nh_old = rt->rt_nhop;
1274 	rc->rc_nh_new = NULL;
1275 	rc->rc_nh_weight = rt->rt_weight;
1276 
1277 	rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1278 
1279 	return (0);
1280 }
1281 
1282 /*
1283  * Switch @rt nhop/weigh to the ones specified in @rnd.
1284  * Returns 0 on success.
1285  */
1286 int
change_route(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd,struct rib_cmd_info * rc)1287 change_route(struct rib_head *rnh, struct rtentry *rt,
1288     struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1289 {
1290 	struct nhop_object *nh_orig;
1291 
1292 	RIB_WLOCK_ASSERT(rnh);
1293 
1294 	nh_orig = rt->rt_nhop;
1295 
1296 	if (rnd->rnd_nhop == NULL)
1297 		return (delete_route(rnh, rt, rc));
1298 
1299 	/* Changing nexthop & weight to a new one */
1300 	rt->rt_nhop = rnd->rnd_nhop;
1301 	rt->rt_weight = rnd->rnd_weight;
1302 	if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
1303 		tmproutes_update(rnh, rt, rnd->rnd_nhop);
1304 
1305 	/* Finalize notification */
1306 	rib_bump_gen(rnh);
1307 	rc->rc_cmd = RTM_CHANGE;
1308 	rc->rc_rt = rt;
1309 	rc->rc_nh_old = nh_orig;
1310 	rc->rc_nh_new = rnd->rnd_nhop;
1311 	rc->rc_nh_weight = rnd->rnd_weight;
1312 
1313 	rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1314 
1315 	return (0);
1316 }
1317 
1318 /*
1319  * Conditionally update route nhop/weight IFF data in @nhd_orig is
1320  *  consistent with the current route data.
1321  * Nexthop in @nhd_new is consumed.
1322  */
1323 int
change_route_conditional(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_orig,struct route_nhop_data * rnd_new,struct rib_cmd_info * rc)1324 change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
1325     struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new,
1326     struct rib_cmd_info *rc)
1327 {
1328 	struct rtentry *rt_new;
1329 	int error = 0;
1330 
1331 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
1332 		char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE];
1333 		nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE);
1334 		nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE);
1335 		FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family,
1336 		    "trying change %s -> %s", buf_old, buf_new);
1337 	}
1338 	RIB_WLOCK(rnh);
1339 
1340 	struct route_nhop_data rnd;
1341 	rt_new = lookup_prefix_rt(rnh, rt, &rnd);
1342 
1343 	if (rt_new == NULL) {
1344 		if (rnd_orig->rnd_nhop == NULL)
1345 			error = add_route(rnh, rt, rnd_new, rc);
1346 		else {
1347 			/*
1348 			 * Prefix does not exist, which was not our assumption.
1349 			 * Update @rnd_orig with the new data and return
1350 			 */
1351 			rnd_orig->rnd_nhop = NULL;
1352 			rnd_orig->rnd_weight = 0;
1353 			error = EAGAIN;
1354 		}
1355 	} else {
1356 		/* Prefix exists, try to update */
1357 		if (rnd_orig->rnd_nhop == rt_new->rt_nhop) {
1358 			/*
1359 			 * Nhop/mpath group hasn't changed. Flip
1360 			 * to the new precalculated one and return
1361 			 */
1362 			error = change_route(rnh, rt_new, rnd_new, rc);
1363 		} else {
1364 			/* Update and retry */
1365 			rnd_orig->rnd_nhop = rt_new->rt_nhop;
1366 			rnd_orig->rnd_weight = rt_new->rt_weight;
1367 			error = EAGAIN;
1368 		}
1369 	}
1370 
1371 	RIB_WUNLOCK(rnh);
1372 
1373 	if (error == 0) {
1374 		rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
1375 
1376 		if (rnd_orig->rnd_nhop != NULL)
1377 			nhop_free_any(rnd_orig->rnd_nhop);
1378 
1379 	} else {
1380 		if (rnd_new->rnd_nhop != NULL)
1381 			nhop_free_any(rnd_new->rnd_nhop);
1382 	}
1383 
1384 	return (error);
1385 }
1386 
1387 /*
1388  * Performs modification of routing table specificed by @action.
1389  * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST].
1390  * Needs to be run in network epoch.
1391  *
1392  * Returns 0 on success and fills in @rc with action result.
1393  */
1394 int
rib_action(uint32_t fibnum,int action,struct rt_addrinfo * info,struct rib_cmd_info * rc)1395 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info,
1396     struct rib_cmd_info *rc)
1397 {
1398 	int error;
1399 
1400 	switch (action) {
1401 	case RTM_ADD:
1402 		error = rib_add_route(fibnum, info, rc);
1403 		break;
1404 	case RTM_DELETE:
1405 		error = rib_del_route(fibnum, info, rc);
1406 		break;
1407 	case RTM_CHANGE:
1408 		error = rib_change_route(fibnum, info, rc);
1409 		break;
1410 	default:
1411 		error = ENOTSUP;
1412 	}
1413 
1414 	return (error);
1415 }
1416 
1417 struct rt_delinfo
1418 {
1419 	struct rib_head *rnh;
1420 	struct rtentry *head;
1421 	rib_filter_f_t *filter_f;
1422 	void *filter_arg;
1423 	int prio;
1424 	struct rib_cmd_info rc;
1425 };
1426 
1427 /*
1428  * Conditionally unlinks rtenties or paths from radix tree based
1429  * on the callback data passed in @arg.
1430  */
1431 static int
rt_checkdelroute(struct radix_node * rn,void * arg)1432 rt_checkdelroute(struct radix_node *rn, void *arg)
1433 {
1434 	struct rt_delinfo *di = (struct rt_delinfo *)arg;
1435 	struct rtentry *rt = (struct rtentry *)rn;
1436 
1437 	if (rt_delete_conditional(di->rnh, rt, di->prio,
1438 	    di->filter_f, di->filter_arg, &di->rc) != 0)
1439 		return (0);
1440 
1441 	/*
1442 	 * Add deleted rtentries to the list to GC them
1443 	 *  after dropping the lock.
1444 	 *
1445 	 * XXX: Delayed notifications not implemented
1446 	 *  for nexthop updates.
1447 	 */
1448 	if (di->rc.rc_cmd == RTM_DELETE) {
1449 		/* Add to the list and return */
1450 		rt->rt_chain = di->head;
1451 		di->head = rt;
1452 #ifdef ROUTE_MPATH
1453 	} else {
1454 		/*
1455 		 * RTM_CHANGE to a different nexthop or nexthop group.
1456 		 * Free old multipath group.
1457 		 */
1458 		nhop_free_any(di->rc.rc_nh_old);
1459 #endif
1460 	}
1461 
1462 	return (0);
1463 }
1464 
1465 /*
1466  * Iterates over a routing table specified by @fibnum and @family and
1467  *  deletes elements marked by @filter_f.
1468  * @fibnum: rtable id
1469  * @family: AF_ address family
1470  * @filter_f: function returning non-zero value for items to delete
1471  * @arg: data to pass to the @filter_f function
1472  * @report: true if rtsock notification is needed.
1473  */
1474 void
rib_walk_del(u_int fibnum,int family,rib_filter_f_t * filter_f,void * filter_arg,bool report)1475 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg,
1476     bool report)
1477 {
1478 	struct rib_head *rnh;
1479 	struct rtentry *rt;
1480 	struct nhop_object *nh;
1481 	struct epoch_tracker et;
1482 
1483 	rnh = rt_tables_get_rnh(fibnum, family);
1484 	if (rnh == NULL)
1485 		return;
1486 
1487 	struct rt_delinfo di = {
1488 		.rnh = rnh,
1489 		.filter_f = filter_f,
1490 		.filter_arg = filter_arg,
1491 		.prio = NH_PRIORITY_NORMAL,
1492 	};
1493 
1494 	NET_EPOCH_ENTER(et);
1495 
1496 	RIB_WLOCK(rnh);
1497 	rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
1498 	RIB_WUNLOCK(rnh);
1499 
1500 	/* We might have something to reclaim. */
1501 	bzero(&di.rc, sizeof(di.rc));
1502 	di.rc.rc_cmd = RTM_DELETE;
1503 	while (di.head != NULL) {
1504 		rt = di.head;
1505 		di.head = rt->rt_chain;
1506 		rt->rt_chain = NULL;
1507 		nh = rt->rt_nhop;
1508 
1509 		di.rc.rc_rt = rt;
1510 		di.rc.rc_nh_old = nh;
1511 		rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc);
1512 
1513 		if (report) {
1514 #ifdef ROUTE_MPATH
1515 			struct nhgrp_object *nhg;
1516 			const struct weightened_nhop *wn;
1517 			uint32_t num_nhops;
1518 			if (NH_IS_NHGRP(nh)) {
1519 				nhg = (struct nhgrp_object *)nh;
1520 				wn = nhgrp_get_nhops(nhg, &num_nhops);
1521 				for (int i = 0; i < num_nhops; i++)
1522 					rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum);
1523 			} else
1524 #endif
1525 			rt_routemsg(RTM_DELETE, rt, nh, fibnum);
1526 		}
1527 		rt_free(rt);
1528 	}
1529 
1530 	NET_EPOCH_EXIT(et);
1531 }
1532 
1533 static int
rt_delete_unconditional(struct radix_node * rn,void * arg)1534 rt_delete_unconditional(struct radix_node *rn, void *arg)
1535 {
1536 	struct rtentry *rt = RNTORT(rn);
1537 	struct rib_head *rnh = (struct rib_head *)arg;
1538 
1539 	rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head);
1540 	if (RNTORT(rn) == rt)
1541 		rt_free(rt);
1542 
1543 	return (0);
1544 }
1545 
1546 /*
1547  * Removes all routes from the routing table without executing notifications.
1548  * rtentres will be removed after the end of a current epoch.
1549  */
1550 static void
rib_flush_routes(struct rib_head * rnh)1551 rib_flush_routes(struct rib_head *rnh)
1552 {
1553 	RIB_WLOCK(rnh);
1554 	rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh);
1555 	RIB_WUNLOCK(rnh);
1556 }
1557 
1558 void
rib_flush_routes_family(int family)1559 rib_flush_routes_family(int family)
1560 {
1561 	struct rib_head *rnh;
1562 
1563 	for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
1564 		if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL)
1565 			rib_flush_routes(rnh);
1566 	}
1567 }
1568 
1569 const char *
rib_print_family(int family)1570 rib_print_family(int family)
1571 {
1572 	switch (family) {
1573 	case AF_INET:
1574 		return ("inet");
1575 	case AF_INET6:
1576 		return ("inet6");
1577 	case AF_LINK:
1578 		return ("link");
1579 	}
1580 	return ("unknown");
1581 }
1582 
1583