1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2020 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 #include "opt_route.h"
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/socket.h>
38 #include <sys/sysctl.h>
39 #include <sys/syslog.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/rmlock.h>
43
44 #include <net/if.h>
45 #include <net/if_var.h>
46 #include <net/if_private.h>
47 #include <net/if_dl.h>
48 #include <net/vnet.h>
49 #include <net/route.h>
50 #include <net/route/route_ctl.h>
51 #include <net/route/route_var.h>
52 #include <net/route/nhop_utils.h>
53 #include <net/route/nhop.h>
54 #include <net/route/nhop_var.h>
55 #include <netinet/in.h>
56 #include <netinet6/scope6_var.h>
57 #include <netinet6/in6_var.h>
58
59 #define DEBUG_MOD_NAME route_ctl
60 #define DEBUG_MAX_LEVEL LOG_DEBUG
61 #include <net/route/route_debug.h>
62 _DECLARE_DEBUG(LOG_INFO);
63
64 /*
65 * This file contains control plane routing tables functions.
66 *
67 * All functions assumes they are called in net epoch.
68 */
69
70 union sockaddr_union {
71 struct sockaddr sa;
72 struct sockaddr_in sin;
73 struct sockaddr_in6 sin6;
74 char _buf[32];
75 };
76
77 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
78 struct rib_cmd_info *rc);
79 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
80 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig,
81 struct rib_cmd_info *rc);
82
83 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt,
84 struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc);
85 #ifdef ROUTE_MPATH
86 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
87 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
88 int op_flags, struct rib_cmd_info *rc);
89 #endif
90
91 static int add_route(struct rib_head *rnh, struct rtentry *rt,
92 struct route_nhop_data *rnd, struct rib_cmd_info *rc);
93 static int delete_route(struct rib_head *rnh, struct rtentry *rt,
94 struct rib_cmd_info *rc);
95 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
96 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc);
97
98 static bool fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
99 struct sockaddr **pmask);
100 static int get_prio_from_info(const struct rt_addrinfo *info);
101 static int nhop_get_prio(const struct nhop_object *nh);
102
103 #ifdef ROUTE_MPATH
104 static bool rib_can_multipath(struct rib_head *rh);
105 #endif
106
107 /* Per-vnet multipath routing configuration */
108 SYSCTL_DECL(_net_route);
109 #define V_rib_route_multipath VNET(rib_route_multipath)
110 #ifdef ROUTE_MPATH
111 #define _MP_FLAGS CTLFLAG_RW
112 #else
113 #define _MP_FLAGS CTLFLAG_RD
114 #endif
115 VNET_DEFINE(u_int, rib_route_multipath) = 1;
116 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET,
117 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath");
118 #undef _MP_FLAGS
119
120 #ifdef ROUTE_MPATH
121 VNET_DEFINE(u_int, fib_hash_outbound) = 0;
122 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET,
123 &VNET_NAME(fib_hash_outbound), 0,
124 "Compute flowid for locally-originated packets");
125
126 /* Default entropy to add to the hash calculation for the outbound connections*/
127 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = {
128 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
129 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
130 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
131 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
132 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
133 };
134 #endif
135
136 #if defined(INET) && defined(INET6)
137 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops");
138 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop)
139 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1;
140 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET,
141 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address");
142 #endif
143
144 /* Debug bits */
145 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
146
147 static struct rib_head *
get_rnh(uint32_t fibnum,const struct rt_addrinfo * info)148 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info)
149 {
150 struct rib_head *rnh;
151 struct sockaddr *dst;
152
153 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum"));
154
155 dst = info->rti_info[RTAX_DST];
156 rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
157
158 return (rnh);
159 }
160
161 #if defined(INET) && defined(INET6)
162 bool
rib_can_4o6_nhop(void)163 rib_can_4o6_nhop(void)
164 {
165 return (!!V_rib_route_ipv6_nexthop);
166 }
167 #endif
168
169 #ifdef ROUTE_MPATH
170 static bool
rib_can_multipath(struct rib_head * rh)171 rib_can_multipath(struct rib_head *rh)
172 {
173 int result;
174
175 CURVNET_SET(rh->rib_vnet);
176 result = !!V_rib_route_multipath;
177 CURVNET_RESTORE();
178
179 return (result);
180 }
181
182 /*
183 * Check is nhop is multipath-eligible.
184 * Avoid nhops without gateways and redirects.
185 *
186 * Returns 1 for multipath-eligible nexthop,
187 * 0 otherwise.
188 */
189 bool
nhop_can_multipath(const struct nhop_object * nh)190 nhop_can_multipath(const struct nhop_object *nh)
191 {
192
193 if ((nh->nh_flags & NHF_MULTIPATH) != 0)
194 return (1);
195 if ((nh->nh_flags & NHF_GATEWAY) == 0)
196 return (0);
197 if ((nh->nh_flags & NHF_REDIRECT) != 0)
198 return (0);
199
200 return (1);
201 }
202 #endif
203
204 static int
get_info_weight(const struct rt_addrinfo * info,uint32_t default_weight)205 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight)
206 {
207 uint32_t weight;
208
209 if (info->rti_mflags & RTV_WEIGHT)
210 weight = info->rti_rmx->rmx_weight;
211 else
212 weight = default_weight;
213 /* Keep upper 1 byte for adm distance purposes */
214 if (weight > RT_MAX_WEIGHT)
215 weight = RT_MAX_WEIGHT;
216 else if (weight == 0)
217 weight = default_weight;
218
219 return (weight);
220 }
221
222 /*
223 * File-local concept for distingushing between the normal and
224 * RTF_PINNED routes tha can override the "normal" one.
225 */
226 #define NH_PRIORITY_HIGH 2
227 #define NH_PRIORITY_NORMAL 1
228 static int
get_prio_from_info(const struct rt_addrinfo * info)229 get_prio_from_info(const struct rt_addrinfo *info)
230 {
231 if (info->rti_flags & RTF_PINNED)
232 return (NH_PRIORITY_HIGH);
233 return (NH_PRIORITY_NORMAL);
234 }
235
236 static int
nhop_get_prio(const struct nhop_object * nh)237 nhop_get_prio(const struct nhop_object *nh)
238 {
239 if (NH_IS_PINNED(nh))
240 return (NH_PRIORITY_HIGH);
241 return (NH_PRIORITY_NORMAL);
242 }
243
244 /*
245 * Check if specified @gw matches gw data in the nexthop @nh.
246 *
247 * Returns true if matches, false otherwise.
248 */
249 bool
match_nhop_gw(const struct nhop_object * nh,const struct sockaddr * gw)250 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw)
251 {
252
253 if (nh->gw_sa.sa_family != gw->sa_family)
254 return (false);
255
256 switch (gw->sa_family) {
257 case AF_INET:
258 return (nh->gw4_sa.sin_addr.s_addr ==
259 ((const struct sockaddr_in *)gw)->sin_addr.s_addr);
260 case AF_INET6:
261 {
262 const struct sockaddr_in6 *gw6;
263 gw6 = (const struct sockaddr_in6 *)gw;
264
265 /*
266 * Currently (2020-09) IPv6 gws in kernel have their
267 * scope embedded. Once this becomes false, this code
268 * has to be revisited.
269 */
270 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr,
271 &gw6->sin6_addr))
272 return (true);
273 return (false);
274 }
275 case AF_LINK:
276 {
277 const struct sockaddr_dl *sdl;
278 sdl = (const struct sockaddr_dl *)gw;
279 return (nh->gwl_sa.sdl_index == sdl->sdl_index);
280 }
281 default:
282 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0);
283 }
284
285 /* NOTREACHED */
286 return (false);
287 }
288
289 /*
290 * Matches all nexthop with given @gw.
291 * Can be used as rib_filter_f callback.
292 */
293 int
rib_match_gw(const struct rtentry * rt,const struct nhop_object * nh,void * gw_sa)294 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa)
295 {
296 const struct sockaddr *gw = (const struct sockaddr *)gw_sa;
297
298 return (match_nhop_gw(nh, gw));
299 }
300
301 struct gw_filter_data {
302 const struct sockaddr *gw;
303 int count;
304 };
305
306 /*
307 * Matches first occurence of the gateway provided in @gwd
308 */
309 static int
match_gw_one(const struct rtentry * rt,const struct nhop_object * nh,void * _data)310 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data)
311 {
312 struct gw_filter_data *gwd = (struct gw_filter_data *)_data;
313
314 /* Return only first match to make rtsock happy */
315 if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0)
316 return (1);
317 return (0);
318 }
319
320 /*
321 * Checks if data in @info matches nexhop @nh.
322 *
323 * Returns 0 on success,
324 * ESRCH if not matched,
325 * ENOENT if filter function returned false
326 */
327 int
check_info_match_nhop(const struct rt_addrinfo * info,const struct rtentry * rt,const struct nhop_object * nh)328 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt,
329 const struct nhop_object *nh)
330 {
331 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY];
332
333 if (info->rti_filter != NULL) {
334 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0)
335 return (ENOENT);
336 else
337 return (0);
338 }
339 if ((gw != NULL) && !match_nhop_gw(nh, gw))
340 return (ESRCH);
341
342 return (0);
343 }
344
345 /*
346 * Runs exact prefix match based on @dst and @netmask.
347 * Returns matched @rtentry if found or NULL.
348 * If rtentry was found, saves nexthop / weight value into @rnd.
349 */
350 static struct rtentry *
lookup_prefix_bysa(struct rib_head * rnh,const struct sockaddr * dst,const struct sockaddr * netmask,struct route_nhop_data * rnd)351 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst,
352 const struct sockaddr *netmask, struct route_nhop_data *rnd)
353 {
354 struct rtentry *rt;
355
356 RIB_LOCK_ASSERT(rnh);
357
358 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head);
359 if (rt != NULL) {
360 rnd->rnd_nhop = rt->rt_nhop;
361 rnd->rnd_weight = rt->rt_weight;
362 } else {
363 rnd->rnd_nhop = NULL;
364 rnd->rnd_weight = 0;
365 }
366
367 return (rt);
368 }
369
370 struct rtentry *
lookup_prefix_rt(struct rib_head * rnh,const struct rtentry * rt,struct route_nhop_data * rnd)371 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt,
372 struct route_nhop_data *rnd)
373 {
374 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd));
375 }
376
377 /*
378 * Runs exact prefix match based on dst/netmask from @info.
379 * Assumes RIB lock is held.
380 * Returns matched @rtentry if found or NULL.
381 * If rtentry was found, saves nexthop / weight value into @rnd.
382 */
383 struct rtentry *
lookup_prefix(struct rib_head * rnh,const struct rt_addrinfo * info,struct route_nhop_data * rnd)384 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info,
385 struct route_nhop_data *rnd)
386 {
387 struct rtentry *rt;
388
389 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST],
390 info->rti_info[RTAX_NETMASK], rnd);
391
392 return (rt);
393 }
394
395 const struct rtentry *
rib_lookup_prefix_plen(struct rib_head * rnh,struct sockaddr * dst,int plen,struct route_nhop_data * rnd)396 rib_lookup_prefix_plen(struct rib_head *rnh, struct sockaddr *dst, int plen,
397 struct route_nhop_data *rnd)
398 {
399 union sockaddr_union mask_storage;
400 struct sockaddr *netmask = &mask_storage.sa;
401
402 if (fill_pxmask_family(dst->sa_family, plen, dst, &netmask))
403 return (lookup_prefix_bysa(rnh, dst, netmask, rnd));
404 return (NULL);
405 }
406
407 static bool
fill_pxmask_family(int family,int plen,struct sockaddr * _dst,struct sockaddr ** pmask)408 fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
409 struct sockaddr **pmask)
410 {
411 if (plen == -1) {
412 *pmask = NULL;
413 return (true);
414 }
415
416 switch (family) {
417 #ifdef INET
418 case AF_INET:
419 {
420 struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask);
421 struct sockaddr_in *dst= (struct sockaddr_in *)_dst;
422
423 memset(mask, 0, sizeof(*mask));
424 mask->sin_family = family;
425 mask->sin_len = sizeof(*mask);
426 if (plen == 32)
427 *pmask = NULL;
428 else if (plen > 32 || plen < 0)
429 return (false);
430 else {
431 uint32_t daddr, maddr;
432 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
433 mask->sin_addr.s_addr = maddr;
434 daddr = dst->sin_addr.s_addr;
435 daddr = htonl(ntohl(daddr) & ntohl(maddr));
436 dst->sin_addr.s_addr = daddr;
437 }
438 return (true);
439 }
440 break;
441 #endif
442 #ifdef INET6
443 case AF_INET6:
444 {
445 struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask);
446 struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst;
447
448 memset(mask, 0, sizeof(*mask));
449 mask->sin6_family = family;
450 mask->sin6_len = sizeof(*mask);
451 if (plen == 128)
452 *pmask = NULL;
453 else if (plen > 128 || plen < 0)
454 return (false);
455 else {
456 ip6_writemask(&mask->sin6_addr, plen);
457 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr);
458 }
459 return (true);
460 }
461 break;
462 #endif
463 }
464 return (false);
465 }
466
467 /*
468 * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd
469 * to the routing table.
470 *
471 * @fibnum: verified kernel rtable id to insert route to
472 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
473 * @plen: prefix length (or -1 if host route or not applicable for AF)
474 * @op_flags: combination of RTM_F_ flags
475 * @rc: storage to report operation result
476 *
477 * Returns 0 on success.
478 */
479 int
rib_add_route_px(uint32_t fibnum,struct sockaddr * dst,int plen,struct route_nhop_data * rnd,int op_flags,struct rib_cmd_info * rc)480 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
481 struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc)
482 {
483 union sockaddr_union mask_storage;
484 struct sockaddr *netmask = &mask_storage.sa;
485 struct rtentry *rt = NULL;
486
487 NET_EPOCH_ASSERT();
488
489 bzero(rc, sizeof(struct rib_cmd_info));
490 rc->rc_cmd = RTM_ADD;
491
492 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
493 if (rnh == NULL)
494 return (EAFNOSUPPORT);
495
496 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
497 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
498 return (EINVAL);
499 }
500
501 if (op_flags & RTM_F_CREATE) {
502 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) {
503 FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed");
504 return (ENOMEM);
505 }
506 } else {
507 struct route_nhop_data rnd_tmp;
508 RIB_RLOCK_TRACKER;
509
510 RIB_RLOCK(rnh);
511 rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd_tmp);
512 RIB_RUNLOCK(rnh);
513
514 if (rt == NULL)
515 return (ESRCH);
516 }
517
518 return (add_route_flags(rnh, rt, rnd, op_flags, rc));
519 }
520
521 /*
522 * Attempts to delete @dst/plen prefix matching gateway @gw from the
523 * routing rable.
524 *
525 * @fibnum: rtable id to remove route from
526 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
527 * @plen: prefix length (or -1 if host route or not applicable for AF)
528 * @gw: gateway to match
529 * @op_flags: combination of RTM_F_ flags
530 * @rc: storage to report operation result
531 *
532 * Returns 0 on success.
533 */
534 int
rib_del_route_px_gw(uint32_t fibnum,struct sockaddr * dst,int plen,const struct sockaddr * gw,int op_flags,struct rib_cmd_info * rc)535 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen,
536 const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc)
537 {
538 struct gw_filter_data gwd = { .gw = gw };
539
540 return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc));
541 }
542
543 /*
544 * Attempts to delete @dst/plen prefix matching @filter_func from the
545 * routing rable.
546 *
547 * @fibnum: rtable id to remove route from
548 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
549 * @plen: prefix length (or -1 if host route or not applicable for AF)
550 * @filter_func: func to be called for each nexthop of the prefix for matching
551 * @filter_arg: argument to pass to @filter_func
552 * @op_flags: combination of RTM_F_ flags
553 * @rc: storage to report operation result
554 *
555 * Returns 0 on success.
556 */
557 int
rib_del_route_px(uint32_t fibnum,struct sockaddr * dst,int plen,rib_filter_f_t * filter_func,void * filter_arg,int op_flags,struct rib_cmd_info * rc)558 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
559 rib_filter_f_t *filter_func, void *filter_arg, int op_flags,
560 struct rib_cmd_info *rc)
561 {
562 union sockaddr_union mask_storage;
563 struct sockaddr *netmask = &mask_storage.sa;
564 int error;
565
566 NET_EPOCH_ASSERT();
567
568 bzero(rc, sizeof(struct rib_cmd_info));
569 rc->rc_cmd = RTM_DELETE;
570
571 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
572 if (rnh == NULL)
573 return (EAFNOSUPPORT);
574
575 if (dst->sa_len > sizeof(mask_storage)) {
576 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len);
577 return (EINVAL);
578 }
579
580 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
581 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
582 return (EINVAL);
583 }
584
585 int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL;
586
587 RIB_WLOCK(rnh);
588 struct route_nhop_data rnd;
589 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
590 if (rt != NULL) {
591 error = rt_delete_conditional(rnh, rt, prio, filter_func,
592 filter_arg, rc);
593 } else
594 error = ESRCH;
595 RIB_WUNLOCK(rnh);
596
597 if (error != 0)
598 return (error);
599
600 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
601
602 if (rc->rc_cmd == RTM_DELETE)
603 rt_free(rc->rc_rt);
604 #ifdef ROUTE_MPATH
605 else {
606 /*
607 * Deleting 1 path may result in RTM_CHANGE to
608 * a different mpath group/nhop.
609 * Free old mpath group.
610 */
611 nhop_free_any(rc->rc_nh_old);
612 }
613 #endif
614
615 return (0);
616 }
617
618 /*
619 * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh.
620 * @rt: route to copy.
621 * @rnd_src: nhop and weight. Multipath routes are not supported
622 * @rh_dst: target rtable.
623 * @rc: operation result storage
624 *
625 * Return 0 on success.
626 */
627 int
rib_copy_route(struct rtentry * rt,const struct route_nhop_data * rnd_src,struct rib_head * rh_dst,struct rib_cmd_info * rc)628 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src,
629 struct rib_head *rh_dst, struct rib_cmd_info *rc)
630 {
631 struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop;
632 int error;
633
634 MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0);
635
636 IF_DEBUG_LEVEL(LOG_DEBUG2) {
637 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE];
638 nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf));
639 rt_print_buf(rt, rtbuf, sizeof(rtbuf));
640 FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u",
641 rtbuf, nhbuf, nhop_get_fibnum(nh_src));
642 }
643 struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family);
644 if (nh == NULL) {
645 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop");
646 return (ENOMEM);
647 }
648 nhop_copy(nh, rnd_src->rnd_nhop);
649 nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop));
650 nhop_set_fibnum(nh, rh_dst->rib_fibnum);
651 nh = nhop_get_nhop_internal(rh_dst, nh, &error);
652 if (error != 0) {
653 FIB_RH_LOG(LOG_INFO, rh_dst,
654 "unable to finalize new nexthop: error %d", error);
655 return (ENOMEM);
656 }
657
658 struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt));
659 if (rt_new == NULL) {
660 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry");
661 nhop_free(nh);
662 return (ENOMEM);
663 }
664
665 struct route_nhop_data rnd = {
666 .rnd_nhop = nh,
667 .rnd_weight = rnd_src->rnd_weight
668 };
669 int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0);
670 error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc);
671
672 if (error != 0) {
673 IF_DEBUG_LEVEL(LOG_DEBUG2) {
674 char buf[NHOP_PRINT_BUFSIZE];
675 rt_print_buf(rt_new, buf, sizeof(buf));
676 FIB_RH_LOG(LOG_DEBUG, rh_dst,
677 "Unable to add route %s: error %d", buf, error);
678 }
679 nhop_free(nh);
680 rt_free_immediate(rt_new);
681 }
682 return (error);
683 }
684
685 /*
686 * Adds route defined by @info into the kernel table specified by @fibnum and
687 * sa_family in @info->rti_info[RTAX_DST].
688 *
689 * Returns 0 on success and fills in operation metadata into @rc.
690 */
691 int
rib_add_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)692 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info,
693 struct rib_cmd_info *rc)
694 {
695 struct rib_head *rnh;
696 int error;
697
698 NET_EPOCH_ASSERT();
699
700 rnh = get_rnh(fibnum, info);
701 if (rnh == NULL)
702 return (EAFNOSUPPORT);
703
704 /*
705 * Check consistency between RTF_HOST flag and netmask
706 * existence.
707 */
708 if (info->rti_flags & RTF_HOST)
709 info->rti_info[RTAX_NETMASK] = NULL;
710 else if (info->rti_info[RTAX_NETMASK] == NULL) {
711 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask");
712 return (EINVAL);
713 }
714
715 bzero(rc, sizeof(struct rib_cmd_info));
716 rc->rc_cmd = RTM_ADD;
717
718 error = add_route_byinfo(rnh, info, rc);
719 if (error == 0)
720 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
721
722 return (error);
723 }
724
725 static int
add_route_byinfo(struct rib_head * rnh,struct rt_addrinfo * info,struct rib_cmd_info * rc)726 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
727 struct rib_cmd_info *rc)
728 {
729 struct route_nhop_data rnd_add;
730 struct nhop_object *nh;
731 struct rtentry *rt;
732 struct sockaddr *dst, *gateway, *netmask;
733 int error;
734
735 dst = info->rti_info[RTAX_DST];
736 gateway = info->rti_info[RTAX_GATEWAY];
737 netmask = info->rti_info[RTAX_NETMASK];
738
739 if ((info->rti_flags & RTF_GATEWAY) && !gateway) {
740 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw");
741 return (EINVAL);
742 }
743 if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) {
744 FIB_RH_LOG(LOG_DEBUG, rnh,
745 "error: invalid dst/gateway family combination (%d, %d)",
746 dst->sa_family, gateway->sa_family);
747 return (EINVAL);
748 }
749
750 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) {
751 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d",
752 dst->sa_len);
753 return (EINVAL);
754 }
755
756 if (info->rti_ifa == NULL) {
757 error = rt_getifa_fib(info, rnh->rib_fibnum);
758 if (error)
759 return (error);
760 }
761
762 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL)
763 return (ENOBUFS);
764
765 error = nhop_create_from_info(rnh, info, &nh);
766 if (error != 0) {
767 rt_free_immediate(rt);
768 return (error);
769 }
770
771 rnd_add.rnd_nhop = nh;
772 rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT);
773
774 int op_flags = RTM_F_CREATE;
775
776 /*
777 * Set the desired action when the route already exists:
778 * If RTF_PINNED is present, assume the direct kernel routes that cannot be multipath.
779 * Otherwise, append the path.
780 */
781 op_flags |= (info->rti_flags & RTF_PINNED) ? RTM_F_REPLACE : RTM_F_APPEND;
782
783 return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc));
784 }
785
786 static int
add_route_flags(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_add,int op_flags,struct rib_cmd_info * rc)787 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add,
788 int op_flags, struct rib_cmd_info *rc)
789 {
790 struct route_nhop_data rnd_orig;
791 struct nhop_object *nh;
792 struct rtentry *rt_orig;
793 int error = 0;
794
795 MPASS(rt != NULL);
796
797 nh = rnd_add->rnd_nhop;
798
799 RIB_WLOCK(rnh);
800
801 rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig);
802
803 if (rt_orig == NULL) {
804 if (op_flags & RTM_F_CREATE)
805 error = add_route(rnh, rt, rnd_add, rc);
806 else
807 error = ESRCH; /* no entry but creation was not required */
808 RIB_WUNLOCK(rnh);
809 if (error != 0)
810 goto out;
811 return (0);
812 }
813
814 if (op_flags & RTM_F_EXCL) {
815 /* We have existing route in the RIB but not allowed to replace. */
816 RIB_WUNLOCK(rnh);
817 error = EEXIST;
818 goto out;
819 }
820
821 /* Now either append or replace */
822 if (op_flags & RTM_F_REPLACE) {
823 if (nhop_get_prio(rnd_orig.rnd_nhop) > nhop_get_prio(rnd_add->rnd_nhop)) {
824 /* Old path is "better" (e.g. has PINNED flag set) */
825 RIB_WUNLOCK(rnh);
826 error = EEXIST;
827 goto out;
828 }
829 change_route(rnh, rt_orig, rnd_add, rc);
830 RIB_WUNLOCK(rnh);
831 nh = rc->rc_nh_old;
832 goto out;
833 }
834
835 RIB_WUNLOCK(rnh);
836
837 #ifdef ROUTE_MPATH
838 if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) &&
839 nhop_can_multipath(rnd_add->rnd_nhop) &&
840 nhop_can_multipath(rnd_orig.rnd_nhop)) {
841
842 for (int i = 0; i < RIB_MAX_RETRIES; i++) {
843 error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig,
844 op_flags, rc);
845 if (error != EAGAIN)
846 break;
847 RTSTAT_INC(rts_add_retry);
848 }
849
850 /*
851 * Original nhop reference is unused in any case.
852 */
853 nhop_free_any(rnd_add->rnd_nhop);
854 if (op_flags & RTM_F_CREATE) {
855 if (error != 0 || rc->rc_cmd != RTM_ADD)
856 rt_free_immediate(rt);
857 }
858 return (error);
859 }
860 #endif
861 /* Out of options - free state and return error */
862 error = EEXIST;
863 out:
864 if (op_flags & RTM_F_CREATE)
865 rt_free_immediate(rt);
866 nhop_free_any(nh);
867
868 return (error);
869 }
870
871 #ifdef ROUTE_MPATH
872 static int
add_route_flags_mpath(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_add,struct route_nhop_data * rnd_orig,int op_flags,struct rib_cmd_info * rc)873 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
874 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
875 int op_flags, struct rib_cmd_info *rc)
876 {
877 RIB_RLOCK_TRACKER;
878 struct route_nhop_data rnd_new;
879 int error = 0;
880
881 error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new);
882 if (error != 0) {
883 if (error == EAGAIN) {
884 /*
885 * Group creation failed, most probably because
886 * @rnd_orig data got scheduled for deletion.
887 * Refresh @rnd_orig data and retry.
888 */
889 RIB_RLOCK(rnh);
890 lookup_prefix_rt(rnh, rt, rnd_orig);
891 RIB_RUNLOCK(rnh);
892 if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) {
893 /* In this iteration route doesn't exist */
894 error = ENOENT;
895 }
896 }
897 return (error);
898 }
899 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
900 if (error != 0)
901 return (error);
902
903 if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) {
904 /*
905 * First multipath route got installed. Enable local
906 * outbound connections hashing.
907 */
908 if (bootverbose)
909 printf("FIB: enabled flowid calculation for locally-originated packets\n");
910 V_fib_hash_outbound = 1;
911 }
912
913 return (0);
914 }
915 #endif
916
917 /*
918 * Removes route defined by @info from the kernel table specified by @fibnum and
919 * sa_family in @info->rti_info[RTAX_DST].
920 *
921 * Returns 0 on success and fills in operation metadata into @rc.
922 */
923 int
rib_del_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)924 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc)
925 {
926 struct rib_head *rnh;
927 struct sockaddr *dst, *netmask;
928 struct sockaddr_storage mdst;
929 int error;
930
931 NET_EPOCH_ASSERT();
932
933 rnh = get_rnh(fibnum, info);
934 if (rnh == NULL)
935 return (EAFNOSUPPORT);
936
937 bzero(rc, sizeof(struct rib_cmd_info));
938 rc->rc_cmd = RTM_DELETE;
939
940 dst = info->rti_info[RTAX_DST];
941 netmask = info->rti_info[RTAX_NETMASK];
942
943 if (netmask != NULL) {
944 /* Ensure @dst is always properly masked */
945 if (dst->sa_len > sizeof(mdst)) {
946 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large");
947 return (EINVAL);
948 }
949 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
950 dst = (struct sockaddr *)&mdst;
951 }
952
953 rib_filter_f_t *filter_func = NULL;
954 void *filter_arg = NULL;
955 struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] };
956
957 if (info->rti_filter != NULL) {
958 filter_func = info->rti_filter;
959 filter_arg = info->rti_filterdata;
960 } else if (gwd.gw != NULL) {
961 filter_func = match_gw_one;
962 filter_arg = &gwd;
963 }
964
965 int prio = get_prio_from_info(info);
966
967 RIB_WLOCK(rnh);
968 struct route_nhop_data rnd;
969 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
970 if (rt != NULL) {
971 error = rt_delete_conditional(rnh, rt, prio, filter_func,
972 filter_arg, rc);
973 } else
974 error = ESRCH;
975 RIB_WUNLOCK(rnh);
976
977 if (error != 0)
978 return (error);
979
980 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
981
982 if (rc->rc_cmd == RTM_DELETE)
983 rt_free(rc->rc_rt);
984 #ifdef ROUTE_MPATH
985 else {
986 /*
987 * Deleting 1 path may result in RTM_CHANGE to
988 * a different mpath group/nhop.
989 * Free old mpath group.
990 */
991 nhop_free_any(rc->rc_nh_old);
992 }
993 #endif
994
995 return (0);
996 }
997
998 /*
999 * Conditionally unlinks rtentry paths from @rnh matching @cb.
1000 * Returns 0 on success with operation result stored in @rc.
1001 * On error, returns:
1002 * ESRCH - if prefix was not found or filter function failed to match
1003 * EADDRINUSE - if trying to delete higher priority route.
1004 */
1005 static int
rt_delete_conditional(struct rib_head * rnh,struct rtentry * rt,int prio,rib_filter_f_t * cb,void * cbdata,struct rib_cmd_info * rc)1006 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
1007 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc)
1008 {
1009 struct nhop_object *nh = rt->rt_nhop;
1010
1011 #ifdef ROUTE_MPATH
1012 if (NH_IS_NHGRP(nh)) {
1013 struct nhgrp_object *nhg = (struct nhgrp_object *)nh;
1014 struct route_nhop_data rnd;
1015 int error;
1016
1017 if (cb == NULL)
1018 return (ESRCH);
1019 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd);
1020 if (error == 0) {
1021 if (rnd.rnd_nhgrp == nhg) {
1022 /* No match, unreference new group and return. */
1023 nhop_free_any(rnd.rnd_nhop);
1024 return (ESRCH);
1025 }
1026 error = change_route(rnh, rt, &rnd, rc);
1027 }
1028 return (error);
1029 }
1030 #endif
1031 if (cb != NULL && !cb(rt, nh, cbdata))
1032 return (ESRCH);
1033
1034 if (prio < nhop_get_prio(nh))
1035 return (EADDRINUSE);
1036
1037 return (delete_route(rnh, rt, rc));
1038 }
1039
1040 int
rib_change_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)1041 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
1042 struct rib_cmd_info *rc)
1043 {
1044 RIB_RLOCK_TRACKER;
1045 struct route_nhop_data rnd_orig;
1046 struct rib_head *rnh;
1047 struct rtentry *rt;
1048 int error;
1049
1050 NET_EPOCH_ASSERT();
1051
1052 rnh = get_rnh(fibnum, info);
1053 if (rnh == NULL)
1054 return (EAFNOSUPPORT);
1055
1056 bzero(rc, sizeof(struct rib_cmd_info));
1057 rc->rc_cmd = RTM_CHANGE;
1058
1059 /* Check if updated gateway exists */
1060 if ((info->rti_flags & RTF_GATEWAY) &&
1061 (info->rti_info[RTAX_GATEWAY] == NULL)) {
1062
1063 /*
1064 * route(8) adds RTF_GATEWAY flag if -interface is not set.
1065 * Remove RTF_GATEWAY to enforce consistency and maintain
1066 * compatibility..
1067 */
1068 info->rti_flags &= ~RTF_GATEWAY;
1069 }
1070
1071 /*
1072 * route change is done in multiple steps, with dropping and
1073 * reacquiring lock. In the situations with multiple processes
1074 * changes the same route in can lead to the case when route
1075 * is changed between the steps. Address it by retrying the operation
1076 * multiple times before failing.
1077 */
1078
1079 RIB_RLOCK(rnh);
1080 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
1081 info->rti_info[RTAX_NETMASK], &rnh->head);
1082
1083 if (rt == NULL) {
1084 RIB_RUNLOCK(rnh);
1085 return (ESRCH);
1086 }
1087
1088 rnd_orig.rnd_nhop = rt->rt_nhop;
1089 rnd_orig.rnd_weight = rt->rt_weight;
1090
1091 RIB_RUNLOCK(rnh);
1092
1093 for (int i = 0; i < RIB_MAX_RETRIES; i++) {
1094 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc);
1095 if (error != EAGAIN)
1096 break;
1097 }
1098
1099 return (error);
1100 }
1101
1102 static int
change_nhop(struct rib_head * rnh,struct rt_addrinfo * info,struct nhop_object * nh_orig,struct nhop_object ** nh_new)1103 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
1104 struct nhop_object *nh_orig, struct nhop_object **nh_new)
1105 {
1106 int error;
1107
1108 /*
1109 * New gateway could require new ifaddr, ifp;
1110 * flags may also be different; ifp may be specified
1111 * by ll sockaddr when protocol address is ambiguous
1112 */
1113 if (((nh_orig->nh_flags & NHF_GATEWAY) &&
1114 info->rti_info[RTAX_GATEWAY] != NULL) ||
1115 info->rti_info[RTAX_IFP] != NULL ||
1116 (info->rti_info[RTAX_IFA] != NULL &&
1117 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) {
1118 error = rt_getifa_fib(info, rnh->rib_fibnum);
1119
1120 if (error != 0) {
1121 info->rti_ifa = NULL;
1122 return (error);
1123 }
1124 }
1125
1126 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new);
1127 info->rti_ifa = NULL;
1128
1129 return (error);
1130 }
1131
1132 #ifdef ROUTE_MPATH
1133 static int
change_mpath_route(struct rib_head * rnh,struct rtentry * rt,struct rt_addrinfo * info,struct route_nhop_data * rnd_orig,struct rib_cmd_info * rc)1134 change_mpath_route(struct rib_head *rnh, struct rtentry *rt,
1135 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1136 struct rib_cmd_info *rc)
1137 {
1138 int error = 0, found_idx = 0;
1139 struct nhop_object *nh_orig = NULL, *nh_new;
1140 struct route_nhop_data rnd_new = {};
1141 const struct weightened_nhop *wn = NULL;
1142 struct weightened_nhop *wn_new;
1143 uint32_t num_nhops;
1144
1145 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops);
1146 for (int i = 0; i < num_nhops; i++) {
1147 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) {
1148 nh_orig = wn[i].nh;
1149 found_idx = i;
1150 break;
1151 }
1152 }
1153
1154 if (nh_orig == NULL)
1155 return (ESRCH);
1156
1157 error = change_nhop(rnh, info, nh_orig, &nh_new);
1158 if (error != 0)
1159 return (error);
1160
1161 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop),
1162 M_TEMP, M_NOWAIT | M_ZERO);
1163 if (wn_new == NULL) {
1164 nhop_free(nh_new);
1165 return (EAGAIN);
1166 }
1167
1168 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop));
1169 wn_new[found_idx].nh = nh_new;
1170 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight);
1171
1172 error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp);
1173 nhop_free(nh_new);
1174 free(wn_new, M_TEMP);
1175
1176 if (error != 0)
1177 return (error);
1178
1179 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1180
1181 return (error);
1182 }
1183 #endif
1184
1185 static int
change_route_byinfo(struct rib_head * rnh,struct rtentry * rt,struct rt_addrinfo * info,struct route_nhop_data * rnd_orig,struct rib_cmd_info * rc)1186 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
1187 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1188 struct rib_cmd_info *rc)
1189 {
1190 int error = 0;
1191 struct nhop_object *nh_orig;
1192 struct route_nhop_data rnd_new;
1193
1194 nh_orig = rnd_orig->rnd_nhop;
1195 if (nh_orig == NULL)
1196 return (ESRCH);
1197
1198 #ifdef ROUTE_MPATH
1199 if (NH_IS_NHGRP(nh_orig))
1200 return (change_mpath_route(rnh, rt, info, rnd_orig, rc));
1201 #endif
1202
1203 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight);
1204 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop);
1205 if (error != 0)
1206 return (error);
1207 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1208
1209 return (error);
1210 }
1211
1212 /*
1213 * Insert @rt with nhop data from @rnd_new to @rnh.
1214 * Returns 0 on success and stores operation results in @rc.
1215 */
1216 static int
add_route(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd,struct rib_cmd_info * rc)1217 add_route(struct rib_head *rnh, struct rtentry *rt,
1218 struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1219 {
1220 struct radix_node *rn;
1221
1222 RIB_WLOCK_ASSERT(rnh);
1223
1224 rt->rt_nhop = rnd->rnd_nhop;
1225 rt->rt_weight = rnd->rnd_weight;
1226 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes);
1227
1228 if (rn != NULL) {
1229 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
1230 tmproutes_update(rnh, rt, rnd->rnd_nhop);
1231
1232 /* Finalize notification */
1233 rib_bump_gen(rnh);
1234 rnh->rnh_prefixes++;
1235
1236 rc->rc_cmd = RTM_ADD;
1237 rc->rc_rt = rt;
1238 rc->rc_nh_old = NULL;
1239 rc->rc_nh_new = rnd->rnd_nhop;
1240 rc->rc_nh_weight = rnd->rnd_weight;
1241
1242 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1243 return (0);
1244 }
1245
1246 /* Existing route or memory allocation failure. */
1247 return (EEXIST);
1248 }
1249
1250 /*
1251 * Unconditionally deletes @rt from @rnh.
1252 */
1253 static int
delete_route(struct rib_head * rnh,struct rtentry * rt,struct rib_cmd_info * rc)1254 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc)
1255 {
1256 RIB_WLOCK_ASSERT(rnh);
1257
1258 /* Route deletion requested. */
1259 struct radix_node *rn;
1260
1261 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head);
1262 if (rn == NULL)
1263 return (ESRCH);
1264 rt = RNTORT(rn);
1265 rt->rte_flags &= ~RTF_UP;
1266
1267 rib_bump_gen(rnh);
1268 rnh->rnh_prefixes--;
1269
1270 rc->rc_cmd = RTM_DELETE;
1271 rc->rc_rt = rt;
1272 rc->rc_nh_old = rt->rt_nhop;
1273 rc->rc_nh_new = NULL;
1274 rc->rc_nh_weight = rt->rt_weight;
1275
1276 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1277
1278 return (0);
1279 }
1280
1281 /*
1282 * Switch @rt nhop/weigh to the ones specified in @rnd.
1283 * Returns 0 on success.
1284 */
1285 int
change_route(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd,struct rib_cmd_info * rc)1286 change_route(struct rib_head *rnh, struct rtentry *rt,
1287 struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1288 {
1289 struct nhop_object *nh_orig;
1290
1291 RIB_WLOCK_ASSERT(rnh);
1292
1293 nh_orig = rt->rt_nhop;
1294
1295 if (rnd->rnd_nhop == NULL)
1296 return (delete_route(rnh, rt, rc));
1297
1298 /* Changing nexthop & weight to a new one */
1299 rt->rt_nhop = rnd->rnd_nhop;
1300 rt->rt_weight = rnd->rnd_weight;
1301 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
1302 tmproutes_update(rnh, rt, rnd->rnd_nhop);
1303
1304 /* Finalize notification */
1305 rib_bump_gen(rnh);
1306 rc->rc_cmd = RTM_CHANGE;
1307 rc->rc_rt = rt;
1308 rc->rc_nh_old = nh_orig;
1309 rc->rc_nh_new = rnd->rnd_nhop;
1310 rc->rc_nh_weight = rnd->rnd_weight;
1311
1312 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1313
1314 return (0);
1315 }
1316
1317 /*
1318 * Conditionally update route nhop/weight IFF data in @nhd_orig is
1319 * consistent with the current route data.
1320 * Nexthop in @nhd_new is consumed.
1321 */
1322 int
change_route_conditional(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_orig,struct route_nhop_data * rnd_new,struct rib_cmd_info * rc)1323 change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
1324 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new,
1325 struct rib_cmd_info *rc)
1326 {
1327 struct rtentry *rt_new;
1328 int error = 0;
1329
1330 IF_DEBUG_LEVEL(LOG_DEBUG2) {
1331 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE];
1332 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE);
1333 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE);
1334 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family,
1335 "trying change %s -> %s", buf_old, buf_new);
1336 }
1337 RIB_WLOCK(rnh);
1338
1339 struct route_nhop_data rnd;
1340 rt_new = lookup_prefix_rt(rnh, rt, &rnd);
1341
1342 if (rt_new == NULL) {
1343 if (rnd_orig->rnd_nhop == NULL)
1344 error = add_route(rnh, rt, rnd_new, rc);
1345 else {
1346 /*
1347 * Prefix does not exist, which was not our assumption.
1348 * Update @rnd_orig with the new data and return
1349 */
1350 rnd_orig->rnd_nhop = NULL;
1351 rnd_orig->rnd_weight = 0;
1352 error = EAGAIN;
1353 }
1354 } else {
1355 /* Prefix exists, try to update */
1356 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) {
1357 /*
1358 * Nhop/mpath group hasn't changed. Flip
1359 * to the new precalculated one and return
1360 */
1361 error = change_route(rnh, rt_new, rnd_new, rc);
1362 } else {
1363 /* Update and retry */
1364 rnd_orig->rnd_nhop = rt_new->rt_nhop;
1365 rnd_orig->rnd_weight = rt_new->rt_weight;
1366 error = EAGAIN;
1367 }
1368 }
1369
1370 RIB_WUNLOCK(rnh);
1371
1372 if (error == 0) {
1373 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
1374
1375 if (rnd_orig->rnd_nhop != NULL)
1376 nhop_free_any(rnd_orig->rnd_nhop);
1377
1378 } else {
1379 if (rnd_new->rnd_nhop != NULL)
1380 nhop_free_any(rnd_new->rnd_nhop);
1381 }
1382
1383 return (error);
1384 }
1385
1386 /*
1387 * Performs modification of routing table specificed by @action.
1388 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST].
1389 * Needs to be run in network epoch.
1390 *
1391 * Returns 0 on success and fills in @rc with action result.
1392 */
1393 int
rib_action(uint32_t fibnum,int action,struct rt_addrinfo * info,struct rib_cmd_info * rc)1394 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info,
1395 struct rib_cmd_info *rc)
1396 {
1397 int error;
1398
1399 switch (action) {
1400 case RTM_ADD:
1401 error = rib_add_route(fibnum, info, rc);
1402 break;
1403 case RTM_DELETE:
1404 error = rib_del_route(fibnum, info, rc);
1405 break;
1406 case RTM_CHANGE:
1407 error = rib_change_route(fibnum, info, rc);
1408 break;
1409 default:
1410 error = ENOTSUP;
1411 }
1412
1413 return (error);
1414 }
1415
1416 struct rt_delinfo
1417 {
1418 struct rib_head *rnh;
1419 struct rtentry *head;
1420 rib_filter_f_t *filter_f;
1421 void *filter_arg;
1422 int prio;
1423 struct rib_cmd_info rc;
1424 };
1425
1426 /*
1427 * Conditionally unlinks rtenties or paths from radix tree based
1428 * on the callback data passed in @arg.
1429 */
1430 static int
rt_checkdelroute(struct radix_node * rn,void * arg)1431 rt_checkdelroute(struct radix_node *rn, void *arg)
1432 {
1433 struct rt_delinfo *di = (struct rt_delinfo *)arg;
1434 struct rtentry *rt = (struct rtentry *)rn;
1435
1436 if (rt_delete_conditional(di->rnh, rt, di->prio,
1437 di->filter_f, di->filter_arg, &di->rc) != 0)
1438 return (0);
1439
1440 /*
1441 * Add deleted rtentries to the list to GC them
1442 * after dropping the lock.
1443 *
1444 * XXX: Delayed notifications not implemented
1445 * for nexthop updates.
1446 */
1447 if (di->rc.rc_cmd == RTM_DELETE) {
1448 /* Add to the list and return */
1449 rt->rt_chain = di->head;
1450 di->head = rt;
1451 #ifdef ROUTE_MPATH
1452 } else {
1453 /*
1454 * RTM_CHANGE to a different nexthop or nexthop group.
1455 * Free old multipath group.
1456 */
1457 nhop_free_any(di->rc.rc_nh_old);
1458 #endif
1459 }
1460
1461 return (0);
1462 }
1463
1464 /*
1465 * Iterates over a routing table specified by @fibnum and @family and
1466 * deletes elements marked by @filter_f.
1467 * @fibnum: rtable id
1468 * @family: AF_ address family
1469 * @filter_f: function returning non-zero value for items to delete
1470 * @arg: data to pass to the @filter_f function
1471 * @report: true if rtsock notification is needed.
1472 */
1473 void
rib_walk_del(u_int fibnum,int family,rib_filter_f_t * filter_f,void * filter_arg,bool report)1474 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg,
1475 bool report)
1476 {
1477 struct rib_head *rnh;
1478 struct rtentry *rt;
1479 struct nhop_object *nh;
1480 struct epoch_tracker et;
1481
1482 rnh = rt_tables_get_rnh(fibnum, family);
1483 if (rnh == NULL)
1484 return;
1485
1486 struct rt_delinfo di = {
1487 .rnh = rnh,
1488 .filter_f = filter_f,
1489 .filter_arg = filter_arg,
1490 .prio = NH_PRIORITY_NORMAL,
1491 };
1492
1493 NET_EPOCH_ENTER(et);
1494
1495 RIB_WLOCK(rnh);
1496 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
1497 RIB_WUNLOCK(rnh);
1498
1499 /* We might have something to reclaim. */
1500 bzero(&di.rc, sizeof(di.rc));
1501 di.rc.rc_cmd = RTM_DELETE;
1502 while (di.head != NULL) {
1503 rt = di.head;
1504 di.head = rt->rt_chain;
1505 rt->rt_chain = NULL;
1506 nh = rt->rt_nhop;
1507
1508 di.rc.rc_rt = rt;
1509 di.rc.rc_nh_old = nh;
1510 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc);
1511
1512 if (report) {
1513 #ifdef ROUTE_MPATH
1514 struct nhgrp_object *nhg;
1515 const struct weightened_nhop *wn;
1516 uint32_t num_nhops;
1517 if (NH_IS_NHGRP(nh)) {
1518 nhg = (struct nhgrp_object *)nh;
1519 wn = nhgrp_get_nhops(nhg, &num_nhops);
1520 for (int i = 0; i < num_nhops; i++)
1521 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum);
1522 } else
1523 #endif
1524 rt_routemsg(RTM_DELETE, rt, nh, fibnum);
1525 }
1526 rt_free(rt);
1527 }
1528
1529 NET_EPOCH_EXIT(et);
1530 }
1531
1532 static int
rt_delete_unconditional(struct radix_node * rn,void * arg)1533 rt_delete_unconditional(struct radix_node *rn, void *arg)
1534 {
1535 struct rtentry *rt = RNTORT(rn);
1536 struct rib_head *rnh = (struct rib_head *)arg;
1537
1538 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head);
1539 if (RNTORT(rn) == rt)
1540 rt_free(rt);
1541
1542 return (0);
1543 }
1544
1545 /*
1546 * Removes all routes from the routing table without executing notifications.
1547 * rtentres will be removed after the end of a current epoch.
1548 */
1549 static void
rib_flush_routes(struct rib_head * rnh)1550 rib_flush_routes(struct rib_head *rnh)
1551 {
1552 RIB_WLOCK(rnh);
1553 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh);
1554 RIB_WUNLOCK(rnh);
1555 }
1556
1557 void
rib_flush_routes_family(int family)1558 rib_flush_routes_family(int family)
1559 {
1560 struct rib_head *rnh;
1561
1562 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
1563 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL)
1564 rib_flush_routes(rnh);
1565 }
1566 }
1567
1568 const char *
rib_print_family(int family)1569 rib_print_family(int family)
1570 {
1571 switch (family) {
1572 case AF_INET:
1573 return ("inet");
1574 case AF_INET6:
1575 return ("inet6");
1576 case AF_LINK:
1577 return ("link");
1578 }
1579 return ("unknown");
1580 }
1581
1582