1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2020 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 #include "opt_route.h"
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/socket.h>
38 #include <sys/sysctl.h>
39 #include <sys/syslog.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/rmlock.h>
43
44 #include <net/if.h>
45 #include <net/if_var.h>
46 #include <net/if_private.h>
47 #include <net/if_dl.h>
48 #include <net/vnet.h>
49 #include <net/route.h>
50 #include <net/route/route_ctl.h>
51 #include <net/route/route_var.h>
52 #include <net/route/nhop_utils.h>
53 #include <net/route/nhop.h>
54 #include <net/route/nhop_var.h>
55 #include <netinet/in.h>
56 #include <netinet6/scope6_var.h>
57 #include <netinet6/in6_var.h>
58
59 #define DEBUG_MOD_NAME route_ctl
60 #define DEBUG_MAX_LEVEL LOG_DEBUG
61 #include <net/route/route_debug.h>
62 _DECLARE_DEBUG(LOG_INFO);
63
64 /*
65 * This file contains control plane routing tables functions.
66 *
67 * All functions assumes they are called in net epoch.
68 */
69
70 union sockaddr_union {
71 struct sockaddr sa;
72 struct sockaddr_in sin;
73 struct sockaddr_in6 sin6;
74 char _buf[32];
75 };
76
77 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
78 struct rib_cmd_info *rc);
79 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
80 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig,
81 struct rib_cmd_info *rc);
82
83 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt,
84 struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc);
85 #ifdef ROUTE_MPATH
86 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
87 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
88 int op_flags, struct rib_cmd_info *rc);
89 #endif
90
91 static int add_route(struct rib_head *rnh, struct rtentry *rt,
92 struct route_nhop_data *rnd, struct rib_cmd_info *rc);
93 static int delete_route(struct rib_head *rnh, struct rtentry *rt,
94 struct rib_cmd_info *rc);
95 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
96 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc);
97
98 static bool fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
99 struct sockaddr **pmask);
100 static int get_prio_from_info(const struct rt_addrinfo *info);
101 static int nhop_get_prio(const struct nhop_object *nh);
102
103 #ifdef ROUTE_MPATH
104 static bool rib_can_multipath(struct rib_head *rh);
105 #endif
106
107 /* Per-vnet multipath routing configuration */
108 SYSCTL_DECL(_net_route);
109 #define V_rib_route_multipath VNET(rib_route_multipath)
110 #ifdef ROUTE_MPATH
111 #define _MP_FLAGS CTLFLAG_RW
112 VNET_DEFINE(u_int, rib_route_multipath) = 1;
113 #else
114 #define _MP_FLAGS CTLFLAG_RD
115 VNET_DEFINE(u_int, rib_route_multipath) = 0;
116 #endif
117 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET,
118 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath");
119 #undef _MP_FLAGS
120
121 #ifdef ROUTE_MPATH
122 VNET_DEFINE(u_int, fib_hash_outbound) = 0;
123 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET,
124 &VNET_NAME(fib_hash_outbound), 0,
125 "Compute flowid for locally-originated packets");
126
127 /* Default entropy to add to the hash calculation for the outbound connections*/
128 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = {
129 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
130 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
131 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
132 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
133 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
134 };
135 #endif
136
137 #if defined(INET) && defined(INET6)
138 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops");
139 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop)
140 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1;
141 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET,
142 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address");
143 #endif
144
145 /* Debug bits */
146 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
147
148 static struct rib_head *
get_rnh(uint32_t fibnum,const struct rt_addrinfo * info)149 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info)
150 {
151 struct rib_head *rnh;
152 struct sockaddr *dst;
153
154 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum"));
155
156 dst = info->rti_info[RTAX_DST];
157 rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
158
159 return (rnh);
160 }
161
162 #if defined(INET) && defined(INET6)
163 bool
rib_can_4o6_nhop(void)164 rib_can_4o6_nhop(void)
165 {
166 return (!!V_rib_route_ipv6_nexthop);
167 }
168 #endif
169
170 #ifdef ROUTE_MPATH
171 static bool
rib_can_multipath(struct rib_head * rh)172 rib_can_multipath(struct rib_head *rh)
173 {
174 int result;
175
176 CURVNET_SET(rh->rib_vnet);
177 result = !!V_rib_route_multipath;
178 CURVNET_RESTORE();
179
180 return (result);
181 }
182
183 /*
184 * Check is nhop is multipath-eligible.
185 * Avoid nhops without gateways and redirects.
186 *
187 * Returns 1 for multipath-eligible nexthop,
188 * 0 otherwise.
189 */
190 bool
nhop_can_multipath(const struct nhop_object * nh)191 nhop_can_multipath(const struct nhop_object *nh)
192 {
193
194 if ((nh->nh_flags & NHF_MULTIPATH) != 0)
195 return (1);
196 if ((nh->nh_flags & NHF_GATEWAY) == 0)
197 return (0);
198 if ((nh->nh_flags & NHF_REDIRECT) != 0)
199 return (0);
200
201 return (1);
202 }
203 #endif
204
205 static int
get_info_weight(const struct rt_addrinfo * info,uint32_t default_weight)206 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight)
207 {
208 uint32_t weight;
209
210 if (info->rti_mflags & RTV_WEIGHT)
211 weight = info->rti_rmx->rmx_weight;
212 else
213 weight = default_weight;
214 /* Keep upper 1 byte for adm distance purposes */
215 if (weight > RT_MAX_WEIGHT)
216 weight = RT_MAX_WEIGHT;
217 else if (weight == 0)
218 weight = default_weight;
219
220 return (weight);
221 }
222
223 /*
224 * File-local concept for distingushing between the normal and
225 * RTF_PINNED routes tha can override the "normal" one.
226 */
227 #define NH_PRIORITY_HIGH 2
228 #define NH_PRIORITY_NORMAL 1
229 static int
get_prio_from_info(const struct rt_addrinfo * info)230 get_prio_from_info(const struct rt_addrinfo *info)
231 {
232 if (info->rti_flags & RTF_PINNED)
233 return (NH_PRIORITY_HIGH);
234 return (NH_PRIORITY_NORMAL);
235 }
236
237 static int
nhop_get_prio(const struct nhop_object * nh)238 nhop_get_prio(const struct nhop_object *nh)
239 {
240 if (NH_IS_PINNED(nh))
241 return (NH_PRIORITY_HIGH);
242 return (NH_PRIORITY_NORMAL);
243 }
244
245 /*
246 * Check if specified @gw matches gw data in the nexthop @nh.
247 *
248 * Returns true if matches, false otherwise.
249 */
250 bool
match_nhop_gw(const struct nhop_object * nh,const struct sockaddr * gw)251 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw)
252 {
253
254 if (nh->gw_sa.sa_family != gw->sa_family)
255 return (false);
256
257 switch (gw->sa_family) {
258 case AF_INET:
259 return (nh->gw4_sa.sin_addr.s_addr ==
260 ((const struct sockaddr_in *)gw)->sin_addr.s_addr);
261 case AF_INET6:
262 {
263 const struct sockaddr_in6 *gw6;
264 gw6 = (const struct sockaddr_in6 *)gw;
265
266 /*
267 * Currently (2020-09) IPv6 gws in kernel have their
268 * scope embedded. Once this becomes false, this code
269 * has to be revisited.
270 */
271 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr,
272 &gw6->sin6_addr))
273 return (true);
274 return (false);
275 }
276 case AF_LINK:
277 {
278 const struct sockaddr_dl *sdl;
279 sdl = (const struct sockaddr_dl *)gw;
280 return (nh->gwl_sa.sdl_index == sdl->sdl_index);
281 }
282 default:
283 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0);
284 }
285
286 /* NOTREACHED */
287 return (false);
288 }
289
290 /*
291 * Matches all nexthop with given @gw.
292 * Can be used as rib_filter_f callback.
293 */
294 int
rib_match_gw(const struct rtentry * rt,const struct nhop_object * nh,void * gw_sa)295 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa)
296 {
297 const struct sockaddr *gw = (const struct sockaddr *)gw_sa;
298
299 return (match_nhop_gw(nh, gw));
300 }
301
302 struct gw_filter_data {
303 const struct sockaddr *gw;
304 int count;
305 };
306
307 /*
308 * Matches first occurence of the gateway provided in @gwd
309 */
310 static int
match_gw_one(const struct rtentry * rt,const struct nhop_object * nh,void * _data)311 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data)
312 {
313 struct gw_filter_data *gwd = (struct gw_filter_data *)_data;
314
315 /* Return only first match to make rtsock happy */
316 if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0)
317 return (1);
318 return (0);
319 }
320
321 /*
322 * Checks if data in @info matches nexhop @nh.
323 *
324 * Returns 0 on success,
325 * ESRCH if not matched,
326 * ENOENT if filter function returned false
327 */
328 int
check_info_match_nhop(const struct rt_addrinfo * info,const struct rtentry * rt,const struct nhop_object * nh)329 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt,
330 const struct nhop_object *nh)
331 {
332 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY];
333
334 if (info->rti_filter != NULL) {
335 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0)
336 return (ENOENT);
337 else
338 return (0);
339 }
340 if ((gw != NULL) && !match_nhop_gw(nh, gw))
341 return (ESRCH);
342
343 return (0);
344 }
345
346 /*
347 * Runs exact prefix match based on @dst and @netmask.
348 * Returns matched @rtentry if found or NULL.
349 * If rtentry was found, saves nexthop / weight value into @rnd.
350 */
351 static struct rtentry *
lookup_prefix_bysa(struct rib_head * rnh,const struct sockaddr * dst,const struct sockaddr * netmask,struct route_nhop_data * rnd)352 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst,
353 const struct sockaddr *netmask, struct route_nhop_data *rnd)
354 {
355 struct rtentry *rt;
356
357 RIB_LOCK_ASSERT(rnh);
358
359 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head);
360 if (rt != NULL) {
361 rnd->rnd_nhop = rt->rt_nhop;
362 rnd->rnd_weight = rt->rt_weight;
363 } else {
364 rnd->rnd_nhop = NULL;
365 rnd->rnd_weight = 0;
366 }
367
368 return (rt);
369 }
370
371 struct rtentry *
lookup_prefix_rt(struct rib_head * rnh,const struct rtentry * rt,struct route_nhop_data * rnd)372 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt,
373 struct route_nhop_data *rnd)
374 {
375 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd));
376 }
377
378 /*
379 * Runs exact prefix match based on dst/netmask from @info.
380 * Assumes RIB lock is held.
381 * Returns matched @rtentry if found or NULL.
382 * If rtentry was found, saves nexthop / weight value into @rnd.
383 */
384 struct rtentry *
lookup_prefix(struct rib_head * rnh,const struct rt_addrinfo * info,struct route_nhop_data * rnd)385 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info,
386 struct route_nhop_data *rnd)
387 {
388 struct rtentry *rt;
389
390 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST],
391 info->rti_info[RTAX_NETMASK], rnd);
392
393 return (rt);
394 }
395
396 const struct rtentry *
rib_lookup_prefix_plen(struct rib_head * rnh,struct sockaddr * dst,int plen,struct route_nhop_data * rnd)397 rib_lookup_prefix_plen(struct rib_head *rnh, struct sockaddr *dst, int plen,
398 struct route_nhop_data *rnd)
399 {
400 union sockaddr_union mask_storage;
401 struct sockaddr *netmask = &mask_storage.sa;
402
403 if (fill_pxmask_family(dst->sa_family, plen, dst, &netmask))
404 return (lookup_prefix_bysa(rnh, dst, netmask, rnd));
405 return (NULL);
406 }
407
408 static bool
fill_pxmask_family(int family,int plen,struct sockaddr * _dst,struct sockaddr ** pmask)409 fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
410 struct sockaddr **pmask)
411 {
412 if (plen == -1) {
413 *pmask = NULL;
414 return (true);
415 }
416
417 switch (family) {
418 #ifdef INET
419 case AF_INET:
420 {
421 struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask);
422 struct sockaddr_in *dst= (struct sockaddr_in *)_dst;
423
424 memset(mask, 0, sizeof(*mask));
425 mask->sin_family = family;
426 mask->sin_len = sizeof(*mask);
427 if (plen == 32)
428 *pmask = NULL;
429 else if (plen > 32 || plen < 0)
430 return (false);
431 else {
432 uint32_t daddr, maddr;
433 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
434 mask->sin_addr.s_addr = maddr;
435 daddr = dst->sin_addr.s_addr;
436 daddr = htonl(ntohl(daddr) & ntohl(maddr));
437 dst->sin_addr.s_addr = daddr;
438 }
439 return (true);
440 }
441 break;
442 #endif
443 #ifdef INET6
444 case AF_INET6:
445 {
446 struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask);
447 struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst;
448
449 memset(mask, 0, sizeof(*mask));
450 mask->sin6_family = family;
451 mask->sin6_len = sizeof(*mask);
452 if (plen == 128)
453 *pmask = NULL;
454 else if (plen > 128 || plen < 0)
455 return (false);
456 else {
457 ip6_writemask(&mask->sin6_addr, plen);
458 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr);
459 }
460 return (true);
461 }
462 break;
463 #endif
464 }
465 return (false);
466 }
467
468 /*
469 * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd
470 * to the routing table.
471 *
472 * @fibnum: verified kernel rtable id to insert route to
473 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
474 * @plen: prefix length (or -1 if host route or not applicable for AF)
475 * @op_flags: combination of RTM_F_ flags
476 * @rc: storage to report operation result
477 *
478 * Returns 0 on success.
479 */
480 int
rib_add_route_px(uint32_t fibnum,struct sockaddr * dst,int plen,struct route_nhop_data * rnd,int op_flags,struct rib_cmd_info * rc)481 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
482 struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc)
483 {
484 union sockaddr_union mask_storage;
485 struct sockaddr *netmask = &mask_storage.sa;
486 struct rtentry *rt = NULL;
487
488 NET_EPOCH_ASSERT();
489
490 bzero(rc, sizeof(struct rib_cmd_info));
491 rc->rc_cmd = RTM_ADD;
492
493 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
494 if (rnh == NULL)
495 return (EAFNOSUPPORT);
496
497 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
498 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
499 return (EINVAL);
500 }
501
502 if (op_flags & RTM_F_CREATE) {
503 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) {
504 FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed");
505 return (ENOMEM);
506 }
507 } else {
508 struct route_nhop_data rnd_tmp;
509 RIB_RLOCK_TRACKER;
510
511 RIB_RLOCK(rnh);
512 rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd_tmp);
513 RIB_RUNLOCK(rnh);
514
515 if (rt == NULL)
516 return (ESRCH);
517 }
518
519 return (add_route_flags(rnh, rt, rnd, op_flags, rc));
520 }
521
522 /*
523 * Attempts to delete @dst/plen prefix matching gateway @gw from the
524 * routing rable.
525 *
526 * @fibnum: rtable id to remove route from
527 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
528 * @plen: prefix length (or -1 if host route or not applicable for AF)
529 * @gw: gateway to match
530 * @op_flags: combination of RTM_F_ flags
531 * @rc: storage to report operation result
532 *
533 * Returns 0 on success.
534 */
535 int
rib_del_route_px_gw(uint32_t fibnum,struct sockaddr * dst,int plen,const struct sockaddr * gw,int op_flags,struct rib_cmd_info * rc)536 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen,
537 const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc)
538 {
539 struct gw_filter_data gwd = { .gw = gw };
540
541 return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc));
542 }
543
544 /*
545 * Attempts to delete @dst/plen prefix matching @filter_func from the
546 * routing rable.
547 *
548 * @fibnum: rtable id to remove route from
549 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
550 * @plen: prefix length (or -1 if host route or not applicable for AF)
551 * @filter_func: func to be called for each nexthop of the prefix for matching
552 * @filter_arg: argument to pass to @filter_func
553 * @op_flags: combination of RTM_F_ flags
554 * @rc: storage to report operation result
555 *
556 * Returns 0 on success.
557 */
558 int
rib_del_route_px(uint32_t fibnum,struct sockaddr * dst,int plen,rib_filter_f_t * filter_func,void * filter_arg,int op_flags,struct rib_cmd_info * rc)559 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
560 rib_filter_f_t *filter_func, void *filter_arg, int op_flags,
561 struct rib_cmd_info *rc)
562 {
563 union sockaddr_union mask_storage;
564 struct sockaddr *netmask = &mask_storage.sa;
565 int error;
566
567 NET_EPOCH_ASSERT();
568
569 bzero(rc, sizeof(struct rib_cmd_info));
570 rc->rc_cmd = RTM_DELETE;
571
572 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
573 if (rnh == NULL)
574 return (EAFNOSUPPORT);
575
576 if (dst->sa_len > sizeof(mask_storage)) {
577 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len);
578 return (EINVAL);
579 }
580
581 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
582 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
583 return (EINVAL);
584 }
585
586 int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL;
587
588 RIB_WLOCK(rnh);
589 struct route_nhop_data rnd;
590 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
591 if (rt != NULL) {
592 error = rt_delete_conditional(rnh, rt, prio, filter_func,
593 filter_arg, rc);
594 } else
595 error = ESRCH;
596 RIB_WUNLOCK(rnh);
597
598 if (error != 0)
599 return (error);
600
601 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
602
603 if (rc->rc_cmd == RTM_DELETE)
604 rt_free(rc->rc_rt);
605 #ifdef ROUTE_MPATH
606 else {
607 /*
608 * Deleting 1 path may result in RTM_CHANGE to
609 * a different mpath group/nhop.
610 * Free old mpath group.
611 */
612 nhop_free_any(rc->rc_nh_old);
613 }
614 #endif
615
616 return (0);
617 }
618
619 /*
620 * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh.
621 * @rt: route to copy.
622 * @rnd_src: nhop and weight. Multipath routes are not supported
623 * @rh_dst: target rtable.
624 * @rc: operation result storage
625 *
626 * Return 0 on success.
627 */
628 int
rib_copy_route(struct rtentry * rt,const struct route_nhop_data * rnd_src,struct rib_head * rh_dst,struct rib_cmd_info * rc)629 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src,
630 struct rib_head *rh_dst, struct rib_cmd_info *rc)
631 {
632 struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop;
633 int error;
634
635 MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0);
636
637 IF_DEBUG_LEVEL(LOG_DEBUG2) {
638 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE];
639 nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf));
640 rt_print_buf(rt, rtbuf, sizeof(rtbuf));
641 FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u",
642 rtbuf, nhbuf, nhop_get_fibnum(nh_src));
643 }
644 struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family);
645 if (nh == NULL) {
646 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop");
647 return (ENOMEM);
648 }
649 nhop_copy(nh, rnd_src->rnd_nhop);
650 nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop));
651 nhop_set_fibnum(nh, rh_dst->rib_fibnum);
652 nh = nhop_get_nhop_internal(rh_dst, nh, &error);
653 if (error != 0) {
654 FIB_RH_LOG(LOG_INFO, rh_dst,
655 "unable to finalize new nexthop: error %d", error);
656 return (ENOMEM);
657 }
658
659 struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt));
660 if (rt_new == NULL) {
661 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry");
662 nhop_free(nh);
663 return (ENOMEM);
664 }
665
666 struct route_nhop_data rnd = {
667 .rnd_nhop = nh,
668 .rnd_weight = rnd_src->rnd_weight
669 };
670 int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0);
671 error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc);
672
673 if (error != 0) {
674 IF_DEBUG_LEVEL(LOG_DEBUG2) {
675 char buf[NHOP_PRINT_BUFSIZE];
676 rt_print_buf(rt, buf, sizeof(buf));
677 FIB_RH_LOG(LOG_DEBUG, rh_dst,
678 "Unable to add route %s: error %d", buf, error);
679 }
680 nhop_free(nh);
681 }
682 return (error);
683 }
684
685 /*
686 * Adds route defined by @info into the kernel table specified by @fibnum and
687 * sa_family in @info->rti_info[RTAX_DST].
688 *
689 * Returns 0 on success and fills in operation metadata into @rc.
690 */
691 int
rib_add_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)692 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info,
693 struct rib_cmd_info *rc)
694 {
695 struct rib_head *rnh;
696 int error;
697
698 NET_EPOCH_ASSERT();
699
700 rnh = get_rnh(fibnum, info);
701 if (rnh == NULL)
702 return (EAFNOSUPPORT);
703
704 /*
705 * Check consistency between RTF_HOST flag and netmask
706 * existence.
707 */
708 if (info->rti_flags & RTF_HOST)
709 info->rti_info[RTAX_NETMASK] = NULL;
710 else if (info->rti_info[RTAX_NETMASK] == NULL) {
711 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask");
712 return (EINVAL);
713 }
714
715 bzero(rc, sizeof(struct rib_cmd_info));
716 rc->rc_cmd = RTM_ADD;
717
718 error = add_route_byinfo(rnh, info, rc);
719 if (error == 0)
720 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
721
722 return (error);
723 }
724
725 static int
add_route_byinfo(struct rib_head * rnh,struct rt_addrinfo * info,struct rib_cmd_info * rc)726 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
727 struct rib_cmd_info *rc)
728 {
729 struct route_nhop_data rnd_add;
730 struct nhop_object *nh;
731 struct rtentry *rt;
732 struct sockaddr *dst, *gateway, *netmask;
733 int error;
734
735 dst = info->rti_info[RTAX_DST];
736 gateway = info->rti_info[RTAX_GATEWAY];
737 netmask = info->rti_info[RTAX_NETMASK];
738
739 if ((info->rti_flags & RTF_GATEWAY) && !gateway) {
740 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw");
741 return (EINVAL);
742 }
743 if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) {
744 FIB_RH_LOG(LOG_DEBUG, rnh,
745 "error: invalid dst/gateway family combination (%d, %d)",
746 dst->sa_family, gateway->sa_family);
747 return (EINVAL);
748 }
749
750 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) {
751 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d",
752 dst->sa_len);
753 return (EINVAL);
754 }
755
756 if (info->rti_ifa == NULL) {
757 error = rt_getifa_fib(info, rnh->rib_fibnum);
758 if (error)
759 return (error);
760 }
761
762 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL)
763 return (ENOBUFS);
764
765 error = nhop_create_from_info(rnh, info, &nh);
766 if (error != 0) {
767 rt_free_immediate(rt);
768 return (error);
769 }
770
771 rnd_add.rnd_nhop = nh;
772 rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT);
773
774 int op_flags = RTM_F_CREATE;
775
776 /*
777 * Set the desired action when the route already exists:
778 * If RTF_PINNED is present, assume the direct kernel routes that cannot be multipath.
779 * Otherwise, append the path.
780 */
781 op_flags |= (info->rti_flags & RTF_PINNED) ? RTM_F_REPLACE : RTM_F_APPEND;
782
783 return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc));
784 }
785
786 static int
add_route_flags(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_add,int op_flags,struct rib_cmd_info * rc)787 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add,
788 int op_flags, struct rib_cmd_info *rc)
789 {
790 struct route_nhop_data rnd_orig;
791 struct nhop_object *nh;
792 struct rtentry *rt_orig;
793 int error = 0;
794
795 MPASS(rt != NULL);
796
797 nh = rnd_add->rnd_nhop;
798
799 RIB_WLOCK(rnh);
800
801 rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig);
802
803 if (rt_orig == NULL) {
804 if (op_flags & RTM_F_CREATE)
805 error = add_route(rnh, rt, rnd_add, rc);
806 else
807 error = ESRCH; /* no entry but creation was not required */
808 RIB_WUNLOCK(rnh);
809 if (error != 0)
810 goto out;
811 return (0);
812 }
813
814 if (op_flags & RTM_F_EXCL) {
815 /* We have existing route in the RIB but not allowed to replace. */
816 RIB_WUNLOCK(rnh);
817 error = EEXIST;
818 goto out;
819 }
820
821 /* Now either append or replace */
822 if (op_flags & RTM_F_REPLACE) {
823 if (nhop_get_prio(rnd_orig.rnd_nhop) == NH_PRIORITY_HIGH) {
824 /* Old path is "better" (e.g. has PINNED flag set) */
825 RIB_WUNLOCK(rnh);
826 error = EEXIST;
827 goto out;
828 }
829 change_route(rnh, rt_orig, rnd_add, rc);
830 RIB_WUNLOCK(rnh);
831 nh = rc->rc_nh_old;
832 goto out;
833 }
834
835 RIB_WUNLOCK(rnh);
836
837 #ifdef ROUTE_MPATH
838 if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) &&
839 nhop_can_multipath(rnd_add->rnd_nhop) &&
840 nhop_can_multipath(rnd_orig.rnd_nhop)) {
841
842 for (int i = 0; i < RIB_MAX_RETRIES; i++) {
843 error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig,
844 op_flags, rc);
845 if (error != EAGAIN)
846 break;
847 RTSTAT_INC(rts_add_retry);
848 }
849
850 /*
851 * Original nhop reference is unused in any case.
852 */
853 nhop_free_any(rnd_add->rnd_nhop);
854 if (op_flags & RTM_F_CREATE) {
855 if (error != 0 || rc->rc_cmd != RTM_ADD)
856 rt_free_immediate(rt);
857 }
858 return (error);
859 }
860 #endif
861 /* Out of options - free state and return error */
862 error = EEXIST;
863 out:
864 if (op_flags & RTM_F_CREATE)
865 rt_free_immediate(rt);
866 nhop_free_any(nh);
867
868 return (error);
869 }
870
871 #ifdef ROUTE_MPATH
872 static int
add_route_flags_mpath(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_add,struct route_nhop_data * rnd_orig,int op_flags,struct rib_cmd_info * rc)873 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
874 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
875 int op_flags, struct rib_cmd_info *rc)
876 {
877 RIB_RLOCK_TRACKER;
878 struct route_nhop_data rnd_new;
879 int error = 0;
880
881 error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new);
882 if (error != 0) {
883 if (error == EAGAIN) {
884 /*
885 * Group creation failed, most probably because
886 * @rnd_orig data got scheduled for deletion.
887 * Refresh @rnd_orig data and retry.
888 */
889 RIB_RLOCK(rnh);
890 lookup_prefix_rt(rnh, rt, rnd_orig);
891 RIB_RUNLOCK(rnh);
892 if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) {
893 /* In this iteration route doesn't exist */
894 error = ENOENT;
895 }
896 }
897 return (error);
898 }
899 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
900 if (error != 0)
901 return (error);
902
903 if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) {
904 /*
905 * First multipath route got installed. Enable local
906 * outbound connections hashing.
907 */
908 if (bootverbose)
909 printf("FIB: enabled flowid calculation for locally-originated packets\n");
910 V_fib_hash_outbound = 1;
911 }
912
913 return (0);
914 }
915 #endif
916
917 /*
918 * Removes route defined by @info from the kernel table specified by @fibnum and
919 * sa_family in @info->rti_info[RTAX_DST].
920 *
921 * Returns 0 on success and fills in operation metadata into @rc.
922 */
923 int
rib_del_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)924 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc)
925 {
926 struct rib_head *rnh;
927 struct sockaddr *dst, *netmask;
928 struct sockaddr_storage mdst;
929 int error;
930
931 NET_EPOCH_ASSERT();
932
933 rnh = get_rnh(fibnum, info);
934 if (rnh == NULL)
935 return (EAFNOSUPPORT);
936
937 bzero(rc, sizeof(struct rib_cmd_info));
938 rc->rc_cmd = RTM_DELETE;
939
940 dst = info->rti_info[RTAX_DST];
941 netmask = info->rti_info[RTAX_NETMASK];
942
943 if (netmask != NULL) {
944 /* Ensure @dst is always properly masked */
945 if (dst->sa_len > sizeof(mdst)) {
946 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large");
947 return (EINVAL);
948 }
949 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
950 dst = (struct sockaddr *)&mdst;
951 }
952
953 rib_filter_f_t *filter_func = NULL;
954 void *filter_arg = NULL;
955 struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] };
956
957 if (info->rti_filter != NULL) {
958 filter_func = info->rti_filter;
959 filter_arg = info->rti_filterdata;
960 } else if (gwd.gw != NULL) {
961 filter_func = match_gw_one;
962 filter_arg = &gwd;
963 }
964
965 int prio = get_prio_from_info(info);
966
967 RIB_WLOCK(rnh);
968 struct route_nhop_data rnd;
969 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
970 if (rt != NULL) {
971 error = rt_delete_conditional(rnh, rt, prio, filter_func,
972 filter_arg, rc);
973 } else
974 error = ESRCH;
975 RIB_WUNLOCK(rnh);
976
977 if (error != 0)
978 return (error);
979
980 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
981
982 if (rc->rc_cmd == RTM_DELETE)
983 rt_free(rc->rc_rt);
984 #ifdef ROUTE_MPATH
985 else {
986 /*
987 * Deleting 1 path may result in RTM_CHANGE to
988 * a different mpath group/nhop.
989 * Free old mpath group.
990 */
991 nhop_free_any(rc->rc_nh_old);
992 }
993 #endif
994
995 return (0);
996 }
997
998 /*
999 * Conditionally unlinks rtentry paths from @rnh matching @cb.
1000 * Returns 0 on success with operation result stored in @rc.
1001 * On error, returns:
1002 * ESRCH - if prefix was not found or filter function failed to match
1003 * EADDRINUSE - if trying to delete higher priority route.
1004 */
1005 static int
rt_delete_conditional(struct rib_head * rnh,struct rtentry * rt,int prio,rib_filter_f_t * cb,void * cbdata,struct rib_cmd_info * rc)1006 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
1007 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc)
1008 {
1009 struct nhop_object *nh = rt->rt_nhop;
1010
1011 #ifdef ROUTE_MPATH
1012 if (NH_IS_NHGRP(nh)) {
1013 struct nhgrp_object *nhg = (struct nhgrp_object *)nh;
1014 struct route_nhop_data rnd;
1015 int error;
1016
1017 if (cb == NULL)
1018 return (ESRCH);
1019 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd);
1020 if (error == 0) {
1021 if (rnd.rnd_nhgrp == nhg) {
1022 /* No match, unreference new group and return. */
1023 nhop_free_any(rnd.rnd_nhop);
1024 return (ESRCH);
1025 }
1026 error = change_route(rnh, rt, &rnd, rc);
1027 }
1028 return (error);
1029 }
1030 #endif
1031 if (cb != NULL && !cb(rt, nh, cbdata))
1032 return (ESRCH);
1033
1034 if (prio < nhop_get_prio(nh))
1035 return (EADDRINUSE);
1036
1037 return (delete_route(rnh, rt, rc));
1038 }
1039
1040 int
rib_change_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)1041 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
1042 struct rib_cmd_info *rc)
1043 {
1044 RIB_RLOCK_TRACKER;
1045 struct route_nhop_data rnd_orig;
1046 struct rib_head *rnh;
1047 struct rtentry *rt;
1048 int error;
1049
1050 NET_EPOCH_ASSERT();
1051
1052 rnh = get_rnh(fibnum, info);
1053 if (rnh == NULL)
1054 return (EAFNOSUPPORT);
1055
1056 bzero(rc, sizeof(struct rib_cmd_info));
1057 rc->rc_cmd = RTM_CHANGE;
1058
1059 /* Check if updated gateway exists */
1060 if ((info->rti_flags & RTF_GATEWAY) &&
1061 (info->rti_info[RTAX_GATEWAY] == NULL)) {
1062
1063 /*
1064 * route(8) adds RTF_GATEWAY flag if -interface is not set.
1065 * Remove RTF_GATEWAY to enforce consistency and maintain
1066 * compatibility..
1067 */
1068 info->rti_flags &= ~RTF_GATEWAY;
1069 }
1070
1071 /*
1072 * route change is done in multiple steps, with dropping and
1073 * reacquiring lock. In the situations with multiple processes
1074 * changes the same route in can lead to the case when route
1075 * is changed between the steps. Address it by retrying the operation
1076 * multiple times before failing.
1077 */
1078
1079 RIB_RLOCK(rnh);
1080 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
1081 info->rti_info[RTAX_NETMASK], &rnh->head);
1082
1083 if (rt == NULL) {
1084 RIB_RUNLOCK(rnh);
1085 return (ESRCH);
1086 }
1087
1088 rnd_orig.rnd_nhop = rt->rt_nhop;
1089 rnd_orig.rnd_weight = rt->rt_weight;
1090
1091 RIB_RUNLOCK(rnh);
1092
1093 for (int i = 0; i < RIB_MAX_RETRIES; i++) {
1094 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc);
1095 if (error != EAGAIN)
1096 break;
1097 }
1098
1099 return (error);
1100 }
1101
1102 static int
change_nhop(struct rib_head * rnh,struct rt_addrinfo * info,struct nhop_object * nh_orig,struct nhop_object ** nh_new)1103 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
1104 struct nhop_object *nh_orig, struct nhop_object **nh_new)
1105 {
1106 int error;
1107
1108 /*
1109 * New gateway could require new ifaddr, ifp;
1110 * flags may also be different; ifp may be specified
1111 * by ll sockaddr when protocol address is ambiguous
1112 */
1113 if (((nh_orig->nh_flags & NHF_GATEWAY) &&
1114 info->rti_info[RTAX_GATEWAY] != NULL) ||
1115 info->rti_info[RTAX_IFP] != NULL ||
1116 (info->rti_info[RTAX_IFA] != NULL &&
1117 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) {
1118 error = rt_getifa_fib(info, rnh->rib_fibnum);
1119
1120 if (error != 0) {
1121 info->rti_ifa = NULL;
1122 return (error);
1123 }
1124 }
1125
1126 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new);
1127 info->rti_ifa = NULL;
1128
1129 return (error);
1130 }
1131
1132 #ifdef ROUTE_MPATH
1133 static int
change_mpath_route(struct rib_head * rnh,struct rtentry * rt,struct rt_addrinfo * info,struct route_nhop_data * rnd_orig,struct rib_cmd_info * rc)1134 change_mpath_route(struct rib_head *rnh, struct rtentry *rt,
1135 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1136 struct rib_cmd_info *rc)
1137 {
1138 int error = 0, found_idx = 0;
1139 struct nhop_object *nh_orig = NULL, *nh_new;
1140 struct route_nhop_data rnd_new = {};
1141 const struct weightened_nhop *wn = NULL;
1142 struct weightened_nhop *wn_new;
1143 uint32_t num_nhops;
1144
1145 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops);
1146 for (int i = 0; i < num_nhops; i++) {
1147 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) {
1148 nh_orig = wn[i].nh;
1149 found_idx = i;
1150 break;
1151 }
1152 }
1153
1154 if (nh_orig == NULL)
1155 return (ESRCH);
1156
1157 error = change_nhop(rnh, info, nh_orig, &nh_new);
1158 if (error != 0)
1159 return (error);
1160
1161 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop),
1162 M_TEMP, M_NOWAIT | M_ZERO);
1163 if (wn_new == NULL) {
1164 nhop_free(nh_new);
1165 return (EAGAIN);
1166 }
1167
1168 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop));
1169 wn_new[found_idx].nh = nh_new;
1170 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight);
1171
1172 error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp);
1173 nhop_free(nh_new);
1174 free(wn_new, M_TEMP);
1175
1176 if (error != 0)
1177 return (error);
1178
1179 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1180
1181 return (error);
1182 }
1183 #endif
1184
1185 static int
change_route_byinfo(struct rib_head * rnh,struct rtentry * rt,struct rt_addrinfo * info,struct route_nhop_data * rnd_orig,struct rib_cmd_info * rc)1186 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
1187 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1188 struct rib_cmd_info *rc)
1189 {
1190 int error = 0;
1191 struct nhop_object *nh_orig;
1192 struct route_nhop_data rnd_new;
1193
1194 nh_orig = rnd_orig->rnd_nhop;
1195 if (nh_orig == NULL)
1196 return (ESRCH);
1197
1198 #ifdef ROUTE_MPATH
1199 if (NH_IS_NHGRP(nh_orig))
1200 return (change_mpath_route(rnh, rt, info, rnd_orig, rc));
1201 #endif
1202
1203 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight);
1204 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop);
1205 if (error != 0)
1206 return (error);
1207 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1208
1209 return (error);
1210 }
1211
1212 /*
1213 * Insert @rt with nhop data from @rnd_new to @rnh.
1214 * Returns 0 on success and stores operation results in @rc.
1215 */
1216 static int
add_route(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd,struct rib_cmd_info * rc)1217 add_route(struct rib_head *rnh, struct rtentry *rt,
1218 struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1219 {
1220 struct radix_node *rn;
1221
1222 RIB_WLOCK_ASSERT(rnh);
1223
1224 rt->rt_nhop = rnd->rnd_nhop;
1225 rt->rt_weight = rnd->rnd_weight;
1226 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes);
1227
1228 if (rn != NULL) {
1229 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
1230 tmproutes_update(rnh, rt, rnd->rnd_nhop);
1231
1232 /* Finalize notification */
1233 rib_bump_gen(rnh);
1234 rnh->rnh_prefixes++;
1235
1236 rc->rc_cmd = RTM_ADD;
1237 rc->rc_rt = rt;
1238 rc->rc_nh_old = NULL;
1239 rc->rc_nh_new = rnd->rnd_nhop;
1240 rc->rc_nh_weight = rnd->rnd_weight;
1241
1242 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1243 return (0);
1244 }
1245
1246 /* Existing route or memory allocation failure. */
1247 return (EEXIST);
1248 }
1249
1250 /*
1251 * Unconditionally deletes @rt from @rnh.
1252 */
1253 static int
delete_route(struct rib_head * rnh,struct rtentry * rt,struct rib_cmd_info * rc)1254 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc)
1255 {
1256 RIB_WLOCK_ASSERT(rnh);
1257
1258 /* Route deletion requested. */
1259 struct radix_node *rn;
1260
1261 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head);
1262 if (rn == NULL)
1263 return (ESRCH);
1264 rt = RNTORT(rn);
1265 rt->rte_flags &= ~RTF_UP;
1266
1267 rib_bump_gen(rnh);
1268 rnh->rnh_prefixes--;
1269
1270 rc->rc_cmd = RTM_DELETE;
1271 rc->rc_rt = rt;
1272 rc->rc_nh_old = rt->rt_nhop;
1273 rc->rc_nh_new = NULL;
1274 rc->rc_nh_weight = rt->rt_weight;
1275
1276 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1277
1278 return (0);
1279 }
1280
1281 /*
1282 * Switch @rt nhop/weigh to the ones specified in @rnd.
1283 * Returns 0 on success.
1284 */
1285 int
change_route(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd,struct rib_cmd_info * rc)1286 change_route(struct rib_head *rnh, struct rtentry *rt,
1287 struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1288 {
1289 struct nhop_object *nh_orig;
1290
1291 RIB_WLOCK_ASSERT(rnh);
1292
1293 nh_orig = rt->rt_nhop;
1294
1295 if (rnd->rnd_nhop == NULL)
1296 return (delete_route(rnh, rt, rc));
1297
1298 /* Changing nexthop & weight to a new one */
1299 rt->rt_nhop = rnd->rnd_nhop;
1300 rt->rt_weight = rnd->rnd_weight;
1301 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
1302 tmproutes_update(rnh, rt, rnd->rnd_nhop);
1303
1304 /* Finalize notification */
1305 rib_bump_gen(rnh);
1306 rc->rc_cmd = RTM_CHANGE;
1307 rc->rc_rt = rt;
1308 rc->rc_nh_old = nh_orig;
1309 rc->rc_nh_new = rnd->rnd_nhop;
1310 rc->rc_nh_weight = rnd->rnd_weight;
1311
1312 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1313
1314 return (0);
1315 }
1316
1317 /*
1318 * Conditionally update route nhop/weight IFF data in @nhd_orig is
1319 * consistent with the current route data.
1320 * Nexthop in @nhd_new is consumed.
1321 */
1322 int
change_route_conditional(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_orig,struct route_nhop_data * rnd_new,struct rib_cmd_info * rc)1323 change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
1324 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new,
1325 struct rib_cmd_info *rc)
1326 {
1327 struct rtentry *rt_new;
1328 int error = 0;
1329
1330 IF_DEBUG_LEVEL(LOG_DEBUG2) {
1331 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE];
1332 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE);
1333 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE);
1334 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family,
1335 "trying change %s -> %s", buf_old, buf_new);
1336 }
1337 RIB_WLOCK(rnh);
1338
1339 struct route_nhop_data rnd;
1340 rt_new = lookup_prefix_rt(rnh, rt, &rnd);
1341
1342 if (rt_new == NULL) {
1343 if (rnd_orig->rnd_nhop == NULL)
1344 error = add_route(rnh, rt, rnd_new, rc);
1345 else {
1346 /*
1347 * Prefix does not exist, which was not our assumption.
1348 * Update @rnd_orig with the new data and return
1349 */
1350 rnd_orig->rnd_nhop = NULL;
1351 rnd_orig->rnd_weight = 0;
1352 error = EAGAIN;
1353 }
1354 } else {
1355 /* Prefix exists, try to update */
1356 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) {
1357 /*
1358 * Nhop/mpath group hasn't changed. Flip
1359 * to the new precalculated one and return
1360 */
1361 error = change_route(rnh, rt_new, rnd_new, rc);
1362 } else {
1363 /* Update and retry */
1364 rnd_orig->rnd_nhop = rt_new->rt_nhop;
1365 rnd_orig->rnd_weight = rt_new->rt_weight;
1366 error = EAGAIN;
1367 }
1368 }
1369
1370 RIB_WUNLOCK(rnh);
1371
1372 if (error == 0) {
1373 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
1374
1375 if (rnd_orig->rnd_nhop != NULL)
1376 nhop_free_any(rnd_orig->rnd_nhop);
1377
1378 } else {
1379 if (rnd_new->rnd_nhop != NULL)
1380 nhop_free_any(rnd_new->rnd_nhop);
1381 }
1382
1383 return (error);
1384 }
1385
1386 /*
1387 * Performs modification of routing table specificed by @action.
1388 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST].
1389 * Needs to be run in network epoch.
1390 *
1391 * Returns 0 on success and fills in @rc with action result.
1392 */
1393 int
rib_action(uint32_t fibnum,int action,struct rt_addrinfo * info,struct rib_cmd_info * rc)1394 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info,
1395 struct rib_cmd_info *rc)
1396 {
1397 int error;
1398
1399 switch (action) {
1400 case RTM_ADD:
1401 error = rib_add_route(fibnum, info, rc);
1402 break;
1403 case RTM_DELETE:
1404 error = rib_del_route(fibnum, info, rc);
1405 break;
1406 case RTM_CHANGE:
1407 error = rib_change_route(fibnum, info, rc);
1408 break;
1409 default:
1410 error = ENOTSUP;
1411 }
1412
1413 return (error);
1414 }
1415
1416 struct rt_delinfo
1417 {
1418 struct rib_head *rnh;
1419 struct rtentry *head;
1420 rib_filter_f_t *filter_f;
1421 void *filter_arg;
1422 int prio;
1423 struct rib_cmd_info rc;
1424 };
1425
1426 /*
1427 * Conditionally unlinks rtenties or paths from radix tree based
1428 * on the callback data passed in @arg.
1429 */
1430 static int
rt_checkdelroute(struct radix_node * rn,void * arg)1431 rt_checkdelroute(struct radix_node *rn, void *arg)
1432 {
1433 struct rt_delinfo *di = (struct rt_delinfo *)arg;
1434 struct rtentry *rt = (struct rtentry *)rn;
1435
1436 if (rt_delete_conditional(di->rnh, rt, di->prio,
1437 di->filter_f, di->filter_arg, &di->rc) != 0)
1438 return (0);
1439
1440 /*
1441 * Add deleted rtentries to the list to GC them
1442 * after dropping the lock.
1443 *
1444 * XXX: Delayed notifications not implemented
1445 * for nexthop updates.
1446 */
1447 if (di->rc.rc_cmd == RTM_DELETE) {
1448 /* Add to the list and return */
1449 rt->rt_chain = di->head;
1450 di->head = rt;
1451 #ifdef ROUTE_MPATH
1452 } else {
1453 /*
1454 * RTM_CHANGE to a different nexthop or nexthop group.
1455 * Free old multipath group.
1456 */
1457 nhop_free_any(di->rc.rc_nh_old);
1458 #endif
1459 }
1460
1461 return (0);
1462 }
1463
1464 /*
1465 * Iterates over a routing table specified by @fibnum and @family and
1466 * deletes elements marked by @filter_f.
1467 * @fibnum: rtable id
1468 * @family: AF_ address family
1469 * @filter_f: function returning non-zero value for items to delete
1470 * @arg: data to pass to the @filter_f function
1471 * @report: true if rtsock notification is needed.
1472 */
1473 void
rib_walk_del(u_int fibnum,int family,rib_filter_f_t * filter_f,void * filter_arg,bool report)1474 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg,
1475 bool report)
1476 {
1477 struct rib_head *rnh;
1478 struct rtentry *rt;
1479 struct nhop_object *nh;
1480 struct epoch_tracker et;
1481
1482 rnh = rt_tables_get_rnh(fibnum, family);
1483 if (rnh == NULL)
1484 return;
1485
1486 struct rt_delinfo di = {
1487 .rnh = rnh,
1488 .filter_f = filter_f,
1489 .filter_arg = filter_arg,
1490 .prio = NH_PRIORITY_NORMAL,
1491 };
1492
1493 NET_EPOCH_ENTER(et);
1494
1495 RIB_WLOCK(rnh);
1496 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
1497 RIB_WUNLOCK(rnh);
1498
1499 /* We might have something to reclaim. */
1500 bzero(&di.rc, sizeof(di.rc));
1501 di.rc.rc_cmd = RTM_DELETE;
1502 while (di.head != NULL) {
1503 rt = di.head;
1504 di.head = rt->rt_chain;
1505 rt->rt_chain = NULL;
1506 nh = rt->rt_nhop;
1507
1508 di.rc.rc_rt = rt;
1509 di.rc.rc_nh_old = nh;
1510 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc);
1511
1512 if (report) {
1513 #ifdef ROUTE_MPATH
1514 struct nhgrp_object *nhg;
1515 const struct weightened_nhop *wn;
1516 uint32_t num_nhops;
1517 if (NH_IS_NHGRP(nh)) {
1518 nhg = (struct nhgrp_object *)nh;
1519 wn = nhgrp_get_nhops(nhg, &num_nhops);
1520 for (int i = 0; i < num_nhops; i++)
1521 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum);
1522 } else
1523 #endif
1524 rt_routemsg(RTM_DELETE, rt, nh, fibnum);
1525 }
1526 rt_free(rt);
1527 }
1528
1529 NET_EPOCH_EXIT(et);
1530 }
1531
1532 static int
rt_delete_unconditional(struct radix_node * rn,void * arg)1533 rt_delete_unconditional(struct radix_node *rn, void *arg)
1534 {
1535 struct rtentry *rt = RNTORT(rn);
1536 struct rib_head *rnh = (struct rib_head *)arg;
1537
1538 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head);
1539 if (RNTORT(rn) == rt)
1540 rt_free(rt);
1541
1542 return (0);
1543 }
1544
1545 /*
1546 * Removes all routes from the routing table without executing notifications.
1547 * rtentres will be removed after the end of a current epoch.
1548 */
1549 static void
rib_flush_routes(struct rib_head * rnh)1550 rib_flush_routes(struct rib_head *rnh)
1551 {
1552 RIB_WLOCK(rnh);
1553 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh);
1554 RIB_WUNLOCK(rnh);
1555 }
1556
1557 void
rib_flush_routes_family(int family)1558 rib_flush_routes_family(int family)
1559 {
1560 struct rib_head *rnh;
1561
1562 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
1563 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL)
1564 rib_flush_routes(rnh);
1565 }
1566 }
1567
1568 const char *
rib_print_family(int family)1569 rib_print_family(int family)
1570 {
1571 switch (family) {
1572 case AF_INET:
1573 return ("inet");
1574 case AF_INET6:
1575 return ("inet6");
1576 case AF_LINK:
1577 return ("link");
1578 }
1579 return ("unknown");
1580 }
1581
1582