1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2020 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 #include "opt_route.h"
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/socket.h>
38 #include <sys/sysctl.h>
39 #include <sys/syslog.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/rmlock.h>
43
44 #include <net/if.h>
45 #include <net/if_var.h>
46 #include <net/if_private.h>
47 #include <net/if_dl.h>
48 #include <net/vnet.h>
49 #include <net/route.h>
50 #include <net/route/route_ctl.h>
51 #include <net/route/route_var.h>
52 #include <net/route/nhop_utils.h>
53 #include <net/route/nhop.h>
54 #include <net/route/nhop_var.h>
55 #include <netinet/in.h>
56 #include <netinet6/scope6_var.h>
57 #include <netinet6/in6_var.h>
58
59 #define DEBUG_MOD_NAME route_ctl
60 #define DEBUG_MAX_LEVEL LOG_DEBUG
61 #include <net/route/route_debug.h>
62 _DECLARE_DEBUG(LOG_INFO);
63
64 /*
65 * This file contains control plane routing tables functions.
66 *
67 * All functions assumes they are called in net epoch.
68 */
69
70 union sockaddr_union {
71 struct sockaddr sa;
72 struct sockaddr_in sin;
73 struct sockaddr_in6 sin6;
74 char _buf[32];
75 };
76
77 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
78 struct rib_cmd_info *rc);
79 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
80 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig,
81 struct rib_cmd_info *rc);
82
83 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt,
84 struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc);
85 #ifdef ROUTE_MPATH
86 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
87 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
88 int op_flags, struct rib_cmd_info *rc);
89 #endif
90
91 static int add_route(struct rib_head *rnh, struct rtentry *rt,
92 struct route_nhop_data *rnd, struct rib_cmd_info *rc);
93 static int delete_route(struct rib_head *rnh, struct rtentry *rt,
94 struct rib_cmd_info *rc);
95 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
96 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc);
97
98 static bool fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
99 struct sockaddr **pmask);
100 static int get_prio_from_info(const struct rt_addrinfo *info);
101 static int nhop_get_prio(const struct nhop_object *nh);
102
103 #ifdef ROUTE_MPATH
104 static bool rib_can_multipath(struct rib_head *rh);
105 #endif
106
107 /* Per-vnet multipath routing configuration */
108 SYSCTL_DECL(_net_route);
109 #define V_rib_route_multipath VNET(rib_route_multipath)
110 #ifdef ROUTE_MPATH
111 #define _MP_FLAGS CTLFLAG_RW
112 VNET_DEFINE(u_int, rib_route_multipath) = 1;
113 #else
114 #define _MP_FLAGS CTLFLAG_RD
115 VNET_DEFINE(u_int, rib_route_multipath) = 0;
116 #endif
117 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET,
118 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath");
119 #undef _MP_FLAGS
120
121 #ifdef ROUTE_MPATH
122 VNET_DEFINE(u_int, fib_hash_outbound) = 0;
123 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET,
124 &VNET_NAME(fib_hash_outbound), 0,
125 "Compute flowid for locally-originated packets");
126
127 /* Default entropy to add to the hash calculation for the outbound connections*/
128 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = {
129 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
130 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
131 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
132 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
133 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
134 };
135 #endif
136
137 #if defined(INET) && defined(INET6)
138 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops");
139 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop)
140 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1;
141 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET,
142 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address");
143 #endif
144
145 /* Debug bits */
146 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
147
148 static struct rib_head *
get_rnh(uint32_t fibnum,const struct rt_addrinfo * info)149 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info)
150 {
151 struct rib_head *rnh;
152 struct sockaddr *dst;
153
154 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum"));
155
156 dst = info->rti_info[RTAX_DST];
157 rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
158
159 return (rnh);
160 }
161
162 #if defined(INET) && defined(INET6)
163 bool
rib_can_4o6_nhop(void)164 rib_can_4o6_nhop(void)
165 {
166 return (!!V_rib_route_ipv6_nexthop);
167 }
168 #endif
169
170 #ifdef ROUTE_MPATH
171 static bool
rib_can_multipath(struct rib_head * rh)172 rib_can_multipath(struct rib_head *rh)
173 {
174 int result;
175
176 CURVNET_SET(rh->rib_vnet);
177 result = !!V_rib_route_multipath;
178 CURVNET_RESTORE();
179
180 return (result);
181 }
182
183 /*
184 * Check is nhop is multipath-eligible.
185 * Avoid nhops without gateways and redirects.
186 *
187 * Returns 1 for multipath-eligible nexthop,
188 * 0 otherwise.
189 */
190 bool
nhop_can_multipath(const struct nhop_object * nh)191 nhop_can_multipath(const struct nhop_object *nh)
192 {
193
194 if ((nh->nh_flags & NHF_MULTIPATH) != 0)
195 return (1);
196 if ((nh->nh_flags & NHF_GATEWAY) == 0)
197 return (0);
198 if ((nh->nh_flags & NHF_REDIRECT) != 0)
199 return (0);
200
201 return (1);
202 }
203 #endif
204
205 static int
get_info_weight(const struct rt_addrinfo * info,uint32_t default_weight)206 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight)
207 {
208 uint32_t weight;
209
210 if (info->rti_mflags & RTV_WEIGHT)
211 weight = info->rti_rmx->rmx_weight;
212 else
213 weight = default_weight;
214 /* Keep upper 1 byte for adm distance purposes */
215 if (weight > RT_MAX_WEIGHT)
216 weight = RT_MAX_WEIGHT;
217 else if (weight == 0)
218 weight = default_weight;
219
220 return (weight);
221 }
222
223 /*
224 * File-local concept for distingushing between the normal and
225 * RTF_PINNED routes tha can override the "normal" one.
226 */
227 #define NH_PRIORITY_HIGH 2
228 #define NH_PRIORITY_NORMAL 1
229 static int
get_prio_from_info(const struct rt_addrinfo * info)230 get_prio_from_info(const struct rt_addrinfo *info)
231 {
232 if (info->rti_flags & RTF_PINNED)
233 return (NH_PRIORITY_HIGH);
234 return (NH_PRIORITY_NORMAL);
235 }
236
237 static int
nhop_get_prio(const struct nhop_object * nh)238 nhop_get_prio(const struct nhop_object *nh)
239 {
240 if (NH_IS_PINNED(nh))
241 return (NH_PRIORITY_HIGH);
242 return (NH_PRIORITY_NORMAL);
243 }
244
245 /*
246 * Check if specified @gw matches gw data in the nexthop @nh.
247 *
248 * Returns true if matches, false otherwise.
249 */
250 bool
match_nhop_gw(const struct nhop_object * nh,const struct sockaddr * gw)251 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw)
252 {
253
254 if (nh->gw_sa.sa_family != gw->sa_family)
255 return (false);
256
257 switch (gw->sa_family) {
258 case AF_INET:
259 return (nh->gw4_sa.sin_addr.s_addr ==
260 ((const struct sockaddr_in *)gw)->sin_addr.s_addr);
261 case AF_INET6:
262 {
263 const struct sockaddr_in6 *gw6;
264 gw6 = (const struct sockaddr_in6 *)gw;
265
266 /*
267 * Currently (2020-09) IPv6 gws in kernel have their
268 * scope embedded. Once this becomes false, this code
269 * has to be revisited.
270 */
271 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr,
272 &gw6->sin6_addr))
273 return (true);
274 return (false);
275 }
276 case AF_LINK:
277 {
278 const struct sockaddr_dl *sdl;
279 sdl = (const struct sockaddr_dl *)gw;
280 return (nh->gwl_sa.sdl_index == sdl->sdl_index);
281 }
282 default:
283 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0);
284 }
285
286 /* NOTREACHED */
287 return (false);
288 }
289
290 /*
291 * Matches all nexthop with given @gw.
292 * Can be used as rib_filter_f callback.
293 */
294 int
rib_match_gw(const struct rtentry * rt,const struct nhop_object * nh,void * gw_sa)295 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa)
296 {
297 const struct sockaddr *gw = (const struct sockaddr *)gw_sa;
298
299 return (match_nhop_gw(nh, gw));
300 }
301
302 struct gw_filter_data {
303 const struct sockaddr *gw;
304 int count;
305 };
306
307 /*
308 * Matches first occurence of the gateway provided in @gwd
309 */
310 static int
match_gw_one(const struct rtentry * rt,const struct nhop_object * nh,void * _data)311 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data)
312 {
313 struct gw_filter_data *gwd = (struct gw_filter_data *)_data;
314
315 /* Return only first match to make rtsock happy */
316 if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0)
317 return (1);
318 return (0);
319 }
320
321 /*
322 * Checks if data in @info matches nexhop @nh.
323 *
324 * Returns 0 on success,
325 * ESRCH if not matched,
326 * ENOENT if filter function returned false
327 */
328 int
check_info_match_nhop(const struct rt_addrinfo * info,const struct rtentry * rt,const struct nhop_object * nh)329 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt,
330 const struct nhop_object *nh)
331 {
332 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY];
333
334 if (info->rti_filter != NULL) {
335 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0)
336 return (ENOENT);
337 else
338 return (0);
339 }
340 if ((gw != NULL) && !match_nhop_gw(nh, gw))
341 return (ESRCH);
342
343 return (0);
344 }
345
346 /*
347 * Runs exact prefix match based on @dst and @netmask.
348 * Returns matched @rtentry if found or NULL.
349 * If rtentry was found, saves nexthop / weight value into @rnd.
350 */
351 static struct rtentry *
lookup_prefix_bysa(struct rib_head * rnh,const struct sockaddr * dst,const struct sockaddr * netmask,struct route_nhop_data * rnd)352 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst,
353 const struct sockaddr *netmask, struct route_nhop_data *rnd)
354 {
355 struct rtentry *rt;
356
357 RIB_LOCK_ASSERT(rnh);
358
359 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head);
360 if (rt != NULL) {
361 rnd->rnd_nhop = rt->rt_nhop;
362 rnd->rnd_weight = rt->rt_weight;
363 } else {
364 rnd->rnd_nhop = NULL;
365 rnd->rnd_weight = 0;
366 }
367
368 return (rt);
369 }
370
371 struct rtentry *
lookup_prefix_rt(struct rib_head * rnh,const struct rtentry * rt,struct route_nhop_data * rnd)372 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt,
373 struct route_nhop_data *rnd)
374 {
375 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd));
376 }
377
378 /*
379 * Runs exact prefix match based on dst/netmask from @info.
380 * Assumes RIB lock is held.
381 * Returns matched @rtentry if found or NULL.
382 * If rtentry was found, saves nexthop / weight value into @rnd.
383 */
384 struct rtentry *
lookup_prefix(struct rib_head * rnh,const struct rt_addrinfo * info,struct route_nhop_data * rnd)385 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info,
386 struct route_nhop_data *rnd)
387 {
388 struct rtentry *rt;
389
390 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST],
391 info->rti_info[RTAX_NETMASK], rnd);
392
393 return (rt);
394 }
395
396 const struct rtentry *
rib_lookup_prefix_plen(struct rib_head * rnh,struct sockaddr * dst,int plen,struct route_nhop_data * rnd)397 rib_lookup_prefix_plen(struct rib_head *rnh, struct sockaddr *dst, int plen,
398 struct route_nhop_data *rnd)
399 {
400 union sockaddr_union mask_storage;
401 struct sockaddr *netmask = &mask_storage.sa;
402
403 if (fill_pxmask_family(dst->sa_family, plen, dst, &netmask))
404 return (lookup_prefix_bysa(rnh, dst, netmask, rnd));
405 return (NULL);
406 }
407
408 static bool
fill_pxmask_family(int family,int plen,struct sockaddr * _dst,struct sockaddr ** pmask)409 fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
410 struct sockaddr **pmask)
411 {
412 if (plen == -1) {
413 *pmask = NULL;
414 return (true);
415 }
416
417 switch (family) {
418 #ifdef INET
419 case AF_INET:
420 {
421 struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask);
422 struct sockaddr_in *dst= (struct sockaddr_in *)_dst;
423
424 memset(mask, 0, sizeof(*mask));
425 mask->sin_family = family;
426 mask->sin_len = sizeof(*mask);
427 if (plen == 32)
428 *pmask = NULL;
429 else if (plen > 32 || plen < 0)
430 return (false);
431 else {
432 uint32_t daddr, maddr;
433 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
434 mask->sin_addr.s_addr = maddr;
435 daddr = dst->sin_addr.s_addr;
436 daddr = htonl(ntohl(daddr) & ntohl(maddr));
437 dst->sin_addr.s_addr = daddr;
438 }
439 return (true);
440 }
441 break;
442 #endif
443 #ifdef INET6
444 case AF_INET6:
445 {
446 struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask);
447 struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst;
448
449 memset(mask, 0, sizeof(*mask));
450 mask->sin6_family = family;
451 mask->sin6_len = sizeof(*mask);
452 if (plen == 128)
453 *pmask = NULL;
454 else if (plen > 128 || plen < 0)
455 return (false);
456 else {
457 ip6_writemask(&mask->sin6_addr, plen);
458 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr);
459 }
460 return (true);
461 }
462 break;
463 #endif
464 }
465 return (false);
466 }
467
468 /*
469 * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd
470 * to the routing table.
471 *
472 * @fibnum: verified kernel rtable id to insert route to
473 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
474 * @plen: prefix length (or -1 if host route or not applicable for AF)
475 * @op_flags: combination of RTM_F_ flags
476 * @rc: storage to report operation result
477 *
478 * Returns 0 on success.
479 */
480 int
rib_add_route_px(uint32_t fibnum,struct sockaddr * dst,int plen,struct route_nhop_data * rnd,int op_flags,struct rib_cmd_info * rc)481 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
482 struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc)
483 {
484 union sockaddr_union mask_storage;
485 struct sockaddr *netmask = &mask_storage.sa;
486 struct rtentry *rt = NULL;
487
488 NET_EPOCH_ASSERT();
489
490 bzero(rc, sizeof(struct rib_cmd_info));
491 rc->rc_cmd = RTM_ADD;
492
493 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
494 if (rnh == NULL)
495 return (EAFNOSUPPORT);
496
497 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
498 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
499 return (EINVAL);
500 }
501
502 if (op_flags & RTM_F_CREATE) {
503 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) {
504 FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed");
505 return (ENOMEM);
506 }
507 } else {
508 struct route_nhop_data rnd_tmp;
509 RIB_RLOCK_TRACKER;
510
511 RIB_RLOCK(rnh);
512 rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd_tmp);
513 RIB_RUNLOCK(rnh);
514
515 if (rt == NULL)
516 return (ESRCH);
517 }
518
519 return (add_route_flags(rnh, rt, rnd, op_flags, rc));
520 }
521
522 /*
523 * Attempts to delete @dst/plen prefix matching gateway @gw from the
524 * routing rable.
525 *
526 * @fibnum: rtable id to remove route from
527 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
528 * @plen: prefix length (or -1 if host route or not applicable for AF)
529 * @gw: gateway to match
530 * @op_flags: combination of RTM_F_ flags
531 * @rc: storage to report operation result
532 *
533 * Returns 0 on success.
534 */
535 int
rib_del_route_px_gw(uint32_t fibnum,struct sockaddr * dst,int plen,const struct sockaddr * gw,int op_flags,struct rib_cmd_info * rc)536 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen,
537 const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc)
538 {
539 struct gw_filter_data gwd = { .gw = gw };
540
541 return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc));
542 }
543
544 /*
545 * Attempts to delete @dst/plen prefix matching @filter_func from the
546 * routing rable.
547 *
548 * @fibnum: rtable id to remove route from
549 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
550 * @plen: prefix length (or -1 if host route or not applicable for AF)
551 * @filter_func: func to be called for each nexthop of the prefix for matching
552 * @filter_arg: argument to pass to @filter_func
553 * @op_flags: combination of RTM_F_ flags
554 * @rc: storage to report operation result
555 *
556 * Returns 0 on success.
557 */
558 int
rib_del_route_px(uint32_t fibnum,struct sockaddr * dst,int plen,rib_filter_f_t * filter_func,void * filter_arg,int op_flags,struct rib_cmd_info * rc)559 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
560 rib_filter_f_t *filter_func, void *filter_arg, int op_flags,
561 struct rib_cmd_info *rc)
562 {
563 union sockaddr_union mask_storage;
564 struct sockaddr *netmask = &mask_storage.sa;
565 int error;
566
567 NET_EPOCH_ASSERT();
568
569 bzero(rc, sizeof(struct rib_cmd_info));
570 rc->rc_cmd = RTM_DELETE;
571
572 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
573 if (rnh == NULL)
574 return (EAFNOSUPPORT);
575
576 if (dst->sa_len > sizeof(mask_storage)) {
577 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len);
578 return (EINVAL);
579 }
580
581 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
582 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
583 return (EINVAL);
584 }
585
586 int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL;
587
588 RIB_WLOCK(rnh);
589 struct route_nhop_data rnd;
590 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
591 if (rt != NULL) {
592 error = rt_delete_conditional(rnh, rt, prio, filter_func,
593 filter_arg, rc);
594 } else
595 error = ESRCH;
596 RIB_WUNLOCK(rnh);
597
598 if (error != 0)
599 return (error);
600
601 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
602
603 if (rc->rc_cmd == RTM_DELETE)
604 rt_free(rc->rc_rt);
605 #ifdef ROUTE_MPATH
606 else {
607 /*
608 * Deleting 1 path may result in RTM_CHANGE to
609 * a different mpath group/nhop.
610 * Free old mpath group.
611 */
612 nhop_free_any(rc->rc_nh_old);
613 }
614 #endif
615
616 return (0);
617 }
618
619 /*
620 * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh.
621 * @rt: route to copy.
622 * @rnd_src: nhop and weight. Multipath routes are not supported
623 * @rh_dst: target rtable.
624 * @rc: operation result storage
625 *
626 * Return 0 on success.
627 */
628 int
rib_copy_route(struct rtentry * rt,const struct route_nhop_data * rnd_src,struct rib_head * rh_dst,struct rib_cmd_info * rc)629 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src,
630 struct rib_head *rh_dst, struct rib_cmd_info *rc)
631 {
632 struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop;
633 int error;
634
635 MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0);
636
637 IF_DEBUG_LEVEL(LOG_DEBUG2) {
638 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE];
639 nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf));
640 rt_print_buf(rt, rtbuf, sizeof(rtbuf));
641 FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u",
642 rtbuf, nhbuf, nhop_get_fibnum(nh_src));
643 }
644 struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family);
645 if (nh == NULL) {
646 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop");
647 return (ENOMEM);
648 }
649 nhop_copy(nh, rnd_src->rnd_nhop);
650 nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop));
651 nhop_set_fibnum(nh, rh_dst->rib_fibnum);
652 nh = nhop_get_nhop_internal(rh_dst, nh, &error);
653 if (error != 0) {
654 FIB_RH_LOG(LOG_INFO, rh_dst,
655 "unable to finalize new nexthop: error %d", error);
656 return (ENOMEM);
657 }
658
659 struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt));
660 if (rt_new == NULL) {
661 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry");
662 nhop_free(nh);
663 return (ENOMEM);
664 }
665
666 struct route_nhop_data rnd = {
667 .rnd_nhop = nh,
668 .rnd_weight = rnd_src->rnd_weight
669 };
670 int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0);
671 error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc);
672
673 if (error != 0) {
674 IF_DEBUG_LEVEL(LOG_DEBUG2) {
675 char buf[NHOP_PRINT_BUFSIZE];
676 rt_print_buf(rt_new, buf, sizeof(buf));
677 FIB_RH_LOG(LOG_DEBUG, rh_dst,
678 "Unable to add route %s: error %d", buf, error);
679 }
680 nhop_free(nh);
681 rt_free_immediate(rt_new);
682 }
683 return (error);
684 }
685
686 /*
687 * Adds route defined by @info into the kernel table specified by @fibnum and
688 * sa_family in @info->rti_info[RTAX_DST].
689 *
690 * Returns 0 on success and fills in operation metadata into @rc.
691 */
692 int
rib_add_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)693 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info,
694 struct rib_cmd_info *rc)
695 {
696 struct rib_head *rnh;
697 int error;
698
699 NET_EPOCH_ASSERT();
700
701 rnh = get_rnh(fibnum, info);
702 if (rnh == NULL)
703 return (EAFNOSUPPORT);
704
705 /*
706 * Check consistency between RTF_HOST flag and netmask
707 * existence.
708 */
709 if (info->rti_flags & RTF_HOST)
710 info->rti_info[RTAX_NETMASK] = NULL;
711 else if (info->rti_info[RTAX_NETMASK] == NULL) {
712 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask");
713 return (EINVAL);
714 }
715
716 bzero(rc, sizeof(struct rib_cmd_info));
717 rc->rc_cmd = RTM_ADD;
718
719 error = add_route_byinfo(rnh, info, rc);
720 if (error == 0)
721 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
722
723 return (error);
724 }
725
726 static int
add_route_byinfo(struct rib_head * rnh,struct rt_addrinfo * info,struct rib_cmd_info * rc)727 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
728 struct rib_cmd_info *rc)
729 {
730 struct route_nhop_data rnd_add;
731 struct nhop_object *nh;
732 struct rtentry *rt;
733 struct sockaddr *dst, *gateway, *netmask;
734 int error;
735
736 dst = info->rti_info[RTAX_DST];
737 gateway = info->rti_info[RTAX_GATEWAY];
738 netmask = info->rti_info[RTAX_NETMASK];
739
740 if ((info->rti_flags & RTF_GATEWAY) && !gateway) {
741 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw");
742 return (EINVAL);
743 }
744 if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) {
745 FIB_RH_LOG(LOG_DEBUG, rnh,
746 "error: invalid dst/gateway family combination (%d, %d)",
747 dst->sa_family, gateway->sa_family);
748 return (EINVAL);
749 }
750
751 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) {
752 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d",
753 dst->sa_len);
754 return (EINVAL);
755 }
756
757 if (info->rti_ifa == NULL) {
758 error = rt_getifa_fib(info, rnh->rib_fibnum);
759 if (error)
760 return (error);
761 }
762
763 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL)
764 return (ENOBUFS);
765
766 error = nhop_create_from_info(rnh, info, &nh);
767 if (error != 0) {
768 rt_free_immediate(rt);
769 return (error);
770 }
771
772 rnd_add.rnd_nhop = nh;
773 rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT);
774
775 int op_flags = RTM_F_CREATE;
776
777 /*
778 * Set the desired action when the route already exists:
779 * If RTF_PINNED is present, assume the direct kernel routes that cannot be multipath.
780 * Otherwise, append the path.
781 */
782 op_flags |= (info->rti_flags & RTF_PINNED) ? RTM_F_REPLACE : RTM_F_APPEND;
783
784 return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc));
785 }
786
787 static int
add_route_flags(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_add,int op_flags,struct rib_cmd_info * rc)788 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add,
789 int op_flags, struct rib_cmd_info *rc)
790 {
791 struct route_nhop_data rnd_orig;
792 struct nhop_object *nh;
793 struct rtentry *rt_orig;
794 int error = 0;
795
796 MPASS(rt != NULL);
797
798 nh = rnd_add->rnd_nhop;
799
800 RIB_WLOCK(rnh);
801
802 rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig);
803
804 if (rt_orig == NULL) {
805 if (op_flags & RTM_F_CREATE)
806 error = add_route(rnh, rt, rnd_add, rc);
807 else
808 error = ESRCH; /* no entry but creation was not required */
809 RIB_WUNLOCK(rnh);
810 if (error != 0)
811 goto out;
812 return (0);
813 }
814
815 if (op_flags & RTM_F_EXCL) {
816 /* We have existing route in the RIB but not allowed to replace. */
817 RIB_WUNLOCK(rnh);
818 error = EEXIST;
819 goto out;
820 }
821
822 /* Now either append or replace */
823 if (op_flags & RTM_F_REPLACE) {
824 if (nhop_get_prio(rnd_orig.rnd_nhop) == NH_PRIORITY_HIGH) {
825 /* Old path is "better" (e.g. has PINNED flag set) */
826 RIB_WUNLOCK(rnh);
827 error = EEXIST;
828 goto out;
829 }
830 change_route(rnh, rt_orig, rnd_add, rc);
831 RIB_WUNLOCK(rnh);
832 nh = rc->rc_nh_old;
833 goto out;
834 }
835
836 RIB_WUNLOCK(rnh);
837
838 #ifdef ROUTE_MPATH
839 if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) &&
840 nhop_can_multipath(rnd_add->rnd_nhop) &&
841 nhop_can_multipath(rnd_orig.rnd_nhop)) {
842
843 for (int i = 0; i < RIB_MAX_RETRIES; i++) {
844 error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig,
845 op_flags, rc);
846 if (error != EAGAIN)
847 break;
848 RTSTAT_INC(rts_add_retry);
849 }
850
851 /*
852 * Original nhop reference is unused in any case.
853 */
854 nhop_free_any(rnd_add->rnd_nhop);
855 if (op_flags & RTM_F_CREATE) {
856 if (error != 0 || rc->rc_cmd != RTM_ADD)
857 rt_free_immediate(rt);
858 }
859 return (error);
860 }
861 #endif
862 /* Out of options - free state and return error */
863 error = EEXIST;
864 out:
865 if (op_flags & RTM_F_CREATE)
866 rt_free_immediate(rt);
867 nhop_free_any(nh);
868
869 return (error);
870 }
871
872 #ifdef ROUTE_MPATH
873 static int
add_route_flags_mpath(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_add,struct route_nhop_data * rnd_orig,int op_flags,struct rib_cmd_info * rc)874 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
875 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
876 int op_flags, struct rib_cmd_info *rc)
877 {
878 RIB_RLOCK_TRACKER;
879 struct route_nhop_data rnd_new;
880 int error = 0;
881
882 error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new);
883 if (error != 0) {
884 if (error == EAGAIN) {
885 /*
886 * Group creation failed, most probably because
887 * @rnd_orig data got scheduled for deletion.
888 * Refresh @rnd_orig data and retry.
889 */
890 RIB_RLOCK(rnh);
891 lookup_prefix_rt(rnh, rt, rnd_orig);
892 RIB_RUNLOCK(rnh);
893 if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) {
894 /* In this iteration route doesn't exist */
895 error = ENOENT;
896 }
897 }
898 return (error);
899 }
900 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
901 if (error != 0)
902 return (error);
903
904 if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) {
905 /*
906 * First multipath route got installed. Enable local
907 * outbound connections hashing.
908 */
909 if (bootverbose)
910 printf("FIB: enabled flowid calculation for locally-originated packets\n");
911 V_fib_hash_outbound = 1;
912 }
913
914 return (0);
915 }
916 #endif
917
918 /*
919 * Removes route defined by @info from the kernel table specified by @fibnum and
920 * sa_family in @info->rti_info[RTAX_DST].
921 *
922 * Returns 0 on success and fills in operation metadata into @rc.
923 */
924 int
rib_del_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)925 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc)
926 {
927 struct rib_head *rnh;
928 struct sockaddr *dst, *netmask;
929 struct sockaddr_storage mdst;
930 int error;
931
932 NET_EPOCH_ASSERT();
933
934 rnh = get_rnh(fibnum, info);
935 if (rnh == NULL)
936 return (EAFNOSUPPORT);
937
938 bzero(rc, sizeof(struct rib_cmd_info));
939 rc->rc_cmd = RTM_DELETE;
940
941 dst = info->rti_info[RTAX_DST];
942 netmask = info->rti_info[RTAX_NETMASK];
943
944 if (netmask != NULL) {
945 /* Ensure @dst is always properly masked */
946 if (dst->sa_len > sizeof(mdst)) {
947 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large");
948 return (EINVAL);
949 }
950 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
951 dst = (struct sockaddr *)&mdst;
952 }
953
954 rib_filter_f_t *filter_func = NULL;
955 void *filter_arg = NULL;
956 struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] };
957
958 if (info->rti_filter != NULL) {
959 filter_func = info->rti_filter;
960 filter_arg = info->rti_filterdata;
961 } else if (gwd.gw != NULL) {
962 filter_func = match_gw_one;
963 filter_arg = &gwd;
964 }
965
966 int prio = get_prio_from_info(info);
967
968 RIB_WLOCK(rnh);
969 struct route_nhop_data rnd;
970 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
971 if (rt != NULL) {
972 error = rt_delete_conditional(rnh, rt, prio, filter_func,
973 filter_arg, rc);
974 } else
975 error = ESRCH;
976 RIB_WUNLOCK(rnh);
977
978 if (error != 0)
979 return (error);
980
981 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
982
983 if (rc->rc_cmd == RTM_DELETE)
984 rt_free(rc->rc_rt);
985 #ifdef ROUTE_MPATH
986 else {
987 /*
988 * Deleting 1 path may result in RTM_CHANGE to
989 * a different mpath group/nhop.
990 * Free old mpath group.
991 */
992 nhop_free_any(rc->rc_nh_old);
993 }
994 #endif
995
996 return (0);
997 }
998
999 /*
1000 * Conditionally unlinks rtentry paths from @rnh matching @cb.
1001 * Returns 0 on success with operation result stored in @rc.
1002 * On error, returns:
1003 * ESRCH - if prefix was not found or filter function failed to match
1004 * EADDRINUSE - if trying to delete higher priority route.
1005 */
1006 static int
rt_delete_conditional(struct rib_head * rnh,struct rtentry * rt,int prio,rib_filter_f_t * cb,void * cbdata,struct rib_cmd_info * rc)1007 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
1008 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc)
1009 {
1010 struct nhop_object *nh = rt->rt_nhop;
1011
1012 #ifdef ROUTE_MPATH
1013 if (NH_IS_NHGRP(nh)) {
1014 struct nhgrp_object *nhg = (struct nhgrp_object *)nh;
1015 struct route_nhop_data rnd;
1016 int error;
1017
1018 if (cb == NULL)
1019 return (ESRCH);
1020 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd);
1021 if (error == 0) {
1022 if (rnd.rnd_nhgrp == nhg) {
1023 /* No match, unreference new group and return. */
1024 nhop_free_any(rnd.rnd_nhop);
1025 return (ESRCH);
1026 }
1027 error = change_route(rnh, rt, &rnd, rc);
1028 }
1029 return (error);
1030 }
1031 #endif
1032 if (cb != NULL && !cb(rt, nh, cbdata))
1033 return (ESRCH);
1034
1035 if (prio < nhop_get_prio(nh))
1036 return (EADDRINUSE);
1037
1038 return (delete_route(rnh, rt, rc));
1039 }
1040
1041 int
rib_change_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)1042 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
1043 struct rib_cmd_info *rc)
1044 {
1045 RIB_RLOCK_TRACKER;
1046 struct route_nhop_data rnd_orig;
1047 struct rib_head *rnh;
1048 struct rtentry *rt;
1049 int error;
1050
1051 NET_EPOCH_ASSERT();
1052
1053 rnh = get_rnh(fibnum, info);
1054 if (rnh == NULL)
1055 return (EAFNOSUPPORT);
1056
1057 bzero(rc, sizeof(struct rib_cmd_info));
1058 rc->rc_cmd = RTM_CHANGE;
1059
1060 /* Check if updated gateway exists */
1061 if ((info->rti_flags & RTF_GATEWAY) &&
1062 (info->rti_info[RTAX_GATEWAY] == NULL)) {
1063
1064 /*
1065 * route(8) adds RTF_GATEWAY flag if -interface is not set.
1066 * Remove RTF_GATEWAY to enforce consistency and maintain
1067 * compatibility..
1068 */
1069 info->rti_flags &= ~RTF_GATEWAY;
1070 }
1071
1072 /*
1073 * route change is done in multiple steps, with dropping and
1074 * reacquiring lock. In the situations with multiple processes
1075 * changes the same route in can lead to the case when route
1076 * is changed between the steps. Address it by retrying the operation
1077 * multiple times before failing.
1078 */
1079
1080 RIB_RLOCK(rnh);
1081 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
1082 info->rti_info[RTAX_NETMASK], &rnh->head);
1083
1084 if (rt == NULL) {
1085 RIB_RUNLOCK(rnh);
1086 return (ESRCH);
1087 }
1088
1089 rnd_orig.rnd_nhop = rt->rt_nhop;
1090 rnd_orig.rnd_weight = rt->rt_weight;
1091
1092 RIB_RUNLOCK(rnh);
1093
1094 for (int i = 0; i < RIB_MAX_RETRIES; i++) {
1095 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc);
1096 if (error != EAGAIN)
1097 break;
1098 }
1099
1100 return (error);
1101 }
1102
1103 static int
change_nhop(struct rib_head * rnh,struct rt_addrinfo * info,struct nhop_object * nh_orig,struct nhop_object ** nh_new)1104 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
1105 struct nhop_object *nh_orig, struct nhop_object **nh_new)
1106 {
1107 int error;
1108
1109 /*
1110 * New gateway could require new ifaddr, ifp;
1111 * flags may also be different; ifp may be specified
1112 * by ll sockaddr when protocol address is ambiguous
1113 */
1114 if (((nh_orig->nh_flags & NHF_GATEWAY) &&
1115 info->rti_info[RTAX_GATEWAY] != NULL) ||
1116 info->rti_info[RTAX_IFP] != NULL ||
1117 (info->rti_info[RTAX_IFA] != NULL &&
1118 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) {
1119 error = rt_getifa_fib(info, rnh->rib_fibnum);
1120
1121 if (error != 0) {
1122 info->rti_ifa = NULL;
1123 return (error);
1124 }
1125 }
1126
1127 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new);
1128 info->rti_ifa = NULL;
1129
1130 return (error);
1131 }
1132
1133 #ifdef ROUTE_MPATH
1134 static int
change_mpath_route(struct rib_head * rnh,struct rtentry * rt,struct rt_addrinfo * info,struct route_nhop_data * rnd_orig,struct rib_cmd_info * rc)1135 change_mpath_route(struct rib_head *rnh, struct rtentry *rt,
1136 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1137 struct rib_cmd_info *rc)
1138 {
1139 int error = 0, found_idx = 0;
1140 struct nhop_object *nh_orig = NULL, *nh_new;
1141 struct route_nhop_data rnd_new = {};
1142 const struct weightened_nhop *wn = NULL;
1143 struct weightened_nhop *wn_new;
1144 uint32_t num_nhops;
1145
1146 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops);
1147 for (int i = 0; i < num_nhops; i++) {
1148 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) {
1149 nh_orig = wn[i].nh;
1150 found_idx = i;
1151 break;
1152 }
1153 }
1154
1155 if (nh_orig == NULL)
1156 return (ESRCH);
1157
1158 error = change_nhop(rnh, info, nh_orig, &nh_new);
1159 if (error != 0)
1160 return (error);
1161
1162 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop),
1163 M_TEMP, M_NOWAIT | M_ZERO);
1164 if (wn_new == NULL) {
1165 nhop_free(nh_new);
1166 return (EAGAIN);
1167 }
1168
1169 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop));
1170 wn_new[found_idx].nh = nh_new;
1171 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight);
1172
1173 error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp);
1174 nhop_free(nh_new);
1175 free(wn_new, M_TEMP);
1176
1177 if (error != 0)
1178 return (error);
1179
1180 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1181
1182 return (error);
1183 }
1184 #endif
1185
1186 static int
change_route_byinfo(struct rib_head * rnh,struct rtentry * rt,struct rt_addrinfo * info,struct route_nhop_data * rnd_orig,struct rib_cmd_info * rc)1187 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
1188 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1189 struct rib_cmd_info *rc)
1190 {
1191 int error = 0;
1192 struct nhop_object *nh_orig;
1193 struct route_nhop_data rnd_new;
1194
1195 nh_orig = rnd_orig->rnd_nhop;
1196 if (nh_orig == NULL)
1197 return (ESRCH);
1198
1199 #ifdef ROUTE_MPATH
1200 if (NH_IS_NHGRP(nh_orig))
1201 return (change_mpath_route(rnh, rt, info, rnd_orig, rc));
1202 #endif
1203
1204 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight);
1205 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop);
1206 if (error != 0)
1207 return (error);
1208 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1209
1210 return (error);
1211 }
1212
1213 /*
1214 * Insert @rt with nhop data from @rnd_new to @rnh.
1215 * Returns 0 on success and stores operation results in @rc.
1216 */
1217 static int
add_route(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd,struct rib_cmd_info * rc)1218 add_route(struct rib_head *rnh, struct rtentry *rt,
1219 struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1220 {
1221 struct radix_node *rn;
1222
1223 RIB_WLOCK_ASSERT(rnh);
1224
1225 rt->rt_nhop = rnd->rnd_nhop;
1226 rt->rt_weight = rnd->rnd_weight;
1227 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes);
1228
1229 if (rn != NULL) {
1230 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
1231 tmproutes_update(rnh, rt, rnd->rnd_nhop);
1232
1233 /* Finalize notification */
1234 rib_bump_gen(rnh);
1235 rnh->rnh_prefixes++;
1236
1237 rc->rc_cmd = RTM_ADD;
1238 rc->rc_rt = rt;
1239 rc->rc_nh_old = NULL;
1240 rc->rc_nh_new = rnd->rnd_nhop;
1241 rc->rc_nh_weight = rnd->rnd_weight;
1242
1243 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1244 return (0);
1245 }
1246
1247 /* Existing route or memory allocation failure. */
1248 return (EEXIST);
1249 }
1250
1251 /*
1252 * Unconditionally deletes @rt from @rnh.
1253 */
1254 static int
delete_route(struct rib_head * rnh,struct rtentry * rt,struct rib_cmd_info * rc)1255 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc)
1256 {
1257 RIB_WLOCK_ASSERT(rnh);
1258
1259 /* Route deletion requested. */
1260 struct radix_node *rn;
1261
1262 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head);
1263 if (rn == NULL)
1264 return (ESRCH);
1265 rt = RNTORT(rn);
1266 rt->rte_flags &= ~RTF_UP;
1267
1268 rib_bump_gen(rnh);
1269 rnh->rnh_prefixes--;
1270
1271 rc->rc_cmd = RTM_DELETE;
1272 rc->rc_rt = rt;
1273 rc->rc_nh_old = rt->rt_nhop;
1274 rc->rc_nh_new = NULL;
1275 rc->rc_nh_weight = rt->rt_weight;
1276
1277 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1278
1279 return (0);
1280 }
1281
1282 /*
1283 * Switch @rt nhop/weigh to the ones specified in @rnd.
1284 * Returns 0 on success.
1285 */
1286 int
change_route(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd,struct rib_cmd_info * rc)1287 change_route(struct rib_head *rnh, struct rtentry *rt,
1288 struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1289 {
1290 struct nhop_object *nh_orig;
1291
1292 RIB_WLOCK_ASSERT(rnh);
1293
1294 nh_orig = rt->rt_nhop;
1295
1296 if (rnd->rnd_nhop == NULL)
1297 return (delete_route(rnh, rt, rc));
1298
1299 /* Changing nexthop & weight to a new one */
1300 rt->rt_nhop = rnd->rnd_nhop;
1301 rt->rt_weight = rnd->rnd_weight;
1302 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
1303 tmproutes_update(rnh, rt, rnd->rnd_nhop);
1304
1305 /* Finalize notification */
1306 rib_bump_gen(rnh);
1307 rc->rc_cmd = RTM_CHANGE;
1308 rc->rc_rt = rt;
1309 rc->rc_nh_old = nh_orig;
1310 rc->rc_nh_new = rnd->rnd_nhop;
1311 rc->rc_nh_weight = rnd->rnd_weight;
1312
1313 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1314
1315 return (0);
1316 }
1317
1318 /*
1319 * Conditionally update route nhop/weight IFF data in @nhd_orig is
1320 * consistent with the current route data.
1321 * Nexthop in @nhd_new is consumed.
1322 */
1323 int
change_route_conditional(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_orig,struct route_nhop_data * rnd_new,struct rib_cmd_info * rc)1324 change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
1325 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new,
1326 struct rib_cmd_info *rc)
1327 {
1328 struct rtentry *rt_new;
1329 int error = 0;
1330
1331 IF_DEBUG_LEVEL(LOG_DEBUG2) {
1332 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE];
1333 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE);
1334 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE);
1335 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family,
1336 "trying change %s -> %s", buf_old, buf_new);
1337 }
1338 RIB_WLOCK(rnh);
1339
1340 struct route_nhop_data rnd;
1341 rt_new = lookup_prefix_rt(rnh, rt, &rnd);
1342
1343 if (rt_new == NULL) {
1344 if (rnd_orig->rnd_nhop == NULL)
1345 error = add_route(rnh, rt, rnd_new, rc);
1346 else {
1347 /*
1348 * Prefix does not exist, which was not our assumption.
1349 * Update @rnd_orig with the new data and return
1350 */
1351 rnd_orig->rnd_nhop = NULL;
1352 rnd_orig->rnd_weight = 0;
1353 error = EAGAIN;
1354 }
1355 } else {
1356 /* Prefix exists, try to update */
1357 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) {
1358 /*
1359 * Nhop/mpath group hasn't changed. Flip
1360 * to the new precalculated one and return
1361 */
1362 error = change_route(rnh, rt_new, rnd_new, rc);
1363 } else {
1364 /* Update and retry */
1365 rnd_orig->rnd_nhop = rt_new->rt_nhop;
1366 rnd_orig->rnd_weight = rt_new->rt_weight;
1367 error = EAGAIN;
1368 }
1369 }
1370
1371 RIB_WUNLOCK(rnh);
1372
1373 if (error == 0) {
1374 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
1375
1376 if (rnd_orig->rnd_nhop != NULL)
1377 nhop_free_any(rnd_orig->rnd_nhop);
1378
1379 } else {
1380 if (rnd_new->rnd_nhop != NULL)
1381 nhop_free_any(rnd_new->rnd_nhop);
1382 }
1383
1384 return (error);
1385 }
1386
1387 /*
1388 * Performs modification of routing table specificed by @action.
1389 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST].
1390 * Needs to be run in network epoch.
1391 *
1392 * Returns 0 on success and fills in @rc with action result.
1393 */
1394 int
rib_action(uint32_t fibnum,int action,struct rt_addrinfo * info,struct rib_cmd_info * rc)1395 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info,
1396 struct rib_cmd_info *rc)
1397 {
1398 int error;
1399
1400 switch (action) {
1401 case RTM_ADD:
1402 error = rib_add_route(fibnum, info, rc);
1403 break;
1404 case RTM_DELETE:
1405 error = rib_del_route(fibnum, info, rc);
1406 break;
1407 case RTM_CHANGE:
1408 error = rib_change_route(fibnum, info, rc);
1409 break;
1410 default:
1411 error = ENOTSUP;
1412 }
1413
1414 return (error);
1415 }
1416
1417 struct rt_delinfo
1418 {
1419 struct rib_head *rnh;
1420 struct rtentry *head;
1421 rib_filter_f_t *filter_f;
1422 void *filter_arg;
1423 int prio;
1424 struct rib_cmd_info rc;
1425 };
1426
1427 /*
1428 * Conditionally unlinks rtenties or paths from radix tree based
1429 * on the callback data passed in @arg.
1430 */
1431 static int
rt_checkdelroute(struct radix_node * rn,void * arg)1432 rt_checkdelroute(struct radix_node *rn, void *arg)
1433 {
1434 struct rt_delinfo *di = (struct rt_delinfo *)arg;
1435 struct rtentry *rt = (struct rtentry *)rn;
1436
1437 if (rt_delete_conditional(di->rnh, rt, di->prio,
1438 di->filter_f, di->filter_arg, &di->rc) != 0)
1439 return (0);
1440
1441 /*
1442 * Add deleted rtentries to the list to GC them
1443 * after dropping the lock.
1444 *
1445 * XXX: Delayed notifications not implemented
1446 * for nexthop updates.
1447 */
1448 if (di->rc.rc_cmd == RTM_DELETE) {
1449 /* Add to the list and return */
1450 rt->rt_chain = di->head;
1451 di->head = rt;
1452 #ifdef ROUTE_MPATH
1453 } else {
1454 /*
1455 * RTM_CHANGE to a different nexthop or nexthop group.
1456 * Free old multipath group.
1457 */
1458 nhop_free_any(di->rc.rc_nh_old);
1459 #endif
1460 }
1461
1462 return (0);
1463 }
1464
1465 /*
1466 * Iterates over a routing table specified by @fibnum and @family and
1467 * deletes elements marked by @filter_f.
1468 * @fibnum: rtable id
1469 * @family: AF_ address family
1470 * @filter_f: function returning non-zero value for items to delete
1471 * @arg: data to pass to the @filter_f function
1472 * @report: true if rtsock notification is needed.
1473 */
1474 void
rib_walk_del(u_int fibnum,int family,rib_filter_f_t * filter_f,void * filter_arg,bool report)1475 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg,
1476 bool report)
1477 {
1478 struct rib_head *rnh;
1479 struct rtentry *rt;
1480 struct nhop_object *nh;
1481 struct epoch_tracker et;
1482
1483 rnh = rt_tables_get_rnh(fibnum, family);
1484 if (rnh == NULL)
1485 return;
1486
1487 struct rt_delinfo di = {
1488 .rnh = rnh,
1489 .filter_f = filter_f,
1490 .filter_arg = filter_arg,
1491 .prio = NH_PRIORITY_NORMAL,
1492 };
1493
1494 NET_EPOCH_ENTER(et);
1495
1496 RIB_WLOCK(rnh);
1497 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
1498 RIB_WUNLOCK(rnh);
1499
1500 /* We might have something to reclaim. */
1501 bzero(&di.rc, sizeof(di.rc));
1502 di.rc.rc_cmd = RTM_DELETE;
1503 while (di.head != NULL) {
1504 rt = di.head;
1505 di.head = rt->rt_chain;
1506 rt->rt_chain = NULL;
1507 nh = rt->rt_nhop;
1508
1509 di.rc.rc_rt = rt;
1510 di.rc.rc_nh_old = nh;
1511 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc);
1512
1513 if (report) {
1514 #ifdef ROUTE_MPATH
1515 struct nhgrp_object *nhg;
1516 const struct weightened_nhop *wn;
1517 uint32_t num_nhops;
1518 if (NH_IS_NHGRP(nh)) {
1519 nhg = (struct nhgrp_object *)nh;
1520 wn = nhgrp_get_nhops(nhg, &num_nhops);
1521 for (int i = 0; i < num_nhops; i++)
1522 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum);
1523 } else
1524 #endif
1525 rt_routemsg(RTM_DELETE, rt, nh, fibnum);
1526 }
1527 rt_free(rt);
1528 }
1529
1530 NET_EPOCH_EXIT(et);
1531 }
1532
1533 static int
rt_delete_unconditional(struct radix_node * rn,void * arg)1534 rt_delete_unconditional(struct radix_node *rn, void *arg)
1535 {
1536 struct rtentry *rt = RNTORT(rn);
1537 struct rib_head *rnh = (struct rib_head *)arg;
1538
1539 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head);
1540 if (RNTORT(rn) == rt)
1541 rt_free(rt);
1542
1543 return (0);
1544 }
1545
1546 /*
1547 * Removes all routes from the routing table without executing notifications.
1548 * rtentres will be removed after the end of a current epoch.
1549 */
1550 static void
rib_flush_routes(struct rib_head * rnh)1551 rib_flush_routes(struct rib_head *rnh)
1552 {
1553 RIB_WLOCK(rnh);
1554 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh);
1555 RIB_WUNLOCK(rnh);
1556 }
1557
1558 void
rib_flush_routes_family(int family)1559 rib_flush_routes_family(int family)
1560 {
1561 struct rib_head *rnh;
1562
1563 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
1564 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL)
1565 rib_flush_routes(rnh);
1566 }
1567 }
1568
1569 const char *
rib_print_family(int family)1570 rib_print_family(int family)
1571 {
1572 switch (family) {
1573 case AF_INET:
1574 return ("inet");
1575 case AF_INET6:
1576 return ("inet6");
1577 case AF_LINK:
1578 return ("link");
1579 }
1580 return ("unknown");
1581 }
1582
1583