1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2020 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 #include "opt_route.h"
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/socket.h>
38 #include <sys/sysctl.h>
39 #include <sys/syslog.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/rmlock.h>
43
44 #include <net/if.h>
45 #include <net/if_var.h>
46 #include <net/if_private.h>
47 #include <net/if_dl.h>
48 #include <net/vnet.h>
49 #include <net/route.h>
50 #include <net/route/route_ctl.h>
51 #include <net/route/route_var.h>
52 #include <net/route/nhop_utils.h>
53 #include <net/route/nhop.h>
54 #include <net/route/nhop_var.h>
55 #include <netinet/in.h>
56 #include <netinet6/scope6_var.h>
57 #include <netinet6/in6_var.h>
58
59 #define DEBUG_MOD_NAME route_ctl
60 #define DEBUG_MAX_LEVEL LOG_DEBUG
61 #include <net/route/route_debug.h>
62 _DECLARE_DEBUG(LOG_INFO);
63
64 /*
65 * This file contains control plane routing tables functions.
66 *
67 * All functions assumes they are called in net epoch.
68 */
69
70 union sockaddr_union {
71 struct sockaddr sa;
72 struct sockaddr_in sin;
73 struct sockaddr_in6 sin6;
74 char _buf[32];
75 };
76
77 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
78 struct rib_cmd_info *rc);
79 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
80 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig,
81 struct rib_cmd_info *rc);
82
83 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt,
84 struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc);
85 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
86 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
87 int op_flags, struct rib_cmd_info *rc);
88
89 static int add_route(struct rib_head *rnh, struct rtentry *rt,
90 struct route_nhop_data *rnd, struct rib_cmd_info *rc);
91 static int delete_route(struct rib_head *rnh, struct rtentry *rt,
92 struct rib_cmd_info *rc);
93 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
94 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc);
95
96 static bool fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
97 struct sockaddr **pmask);
98 static int get_prio_from_info(const struct rt_addrinfo *info);
99 static int nhop_get_prio(const struct nhop_object *nh);
100
101 static bool rib_can_multipath(struct rib_head *rh);
102
103 /* Per-vnet multipath routing configuration */
104 SYSCTL_DECL(_net_route);
105 #define V_rib_route_multipath VNET(rib_route_multipath)
106 VNET_DEFINE(u_int, rib_route_multipath) = 1;
107 SYSCTL_UINT(_net_route, OID_AUTO, multipath, CTLFLAG_RW | CTLFLAG_VNET,
108 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath");
109
110 VNET_DEFINE(u_int, fib_hash_outbound) = 0;
111 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET,
112 &VNET_NAME(fib_hash_outbound), 0,
113 "Compute flowid for locally-originated packets");
114
115 /* Default entropy to add to the hash calculation for the outbound connections*/
116 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = {
117 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
118 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
119 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
120 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
121 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
122 };
123
124 #if defined(INET) && defined(INET6)
125 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops");
126 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop)
127 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1;
128 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET,
129 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address");
130 #endif
131
132 /* Debug bits */
133 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
134
135 static struct rib_head *
get_rnh(uint32_t fibnum,const struct rt_addrinfo * info)136 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info)
137 {
138 struct rib_head *rnh;
139 struct sockaddr *dst;
140
141 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum"));
142
143 dst = info->rti_info[RTAX_DST];
144 rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
145
146 return (rnh);
147 }
148
149 #if defined(INET) && defined(INET6)
150 bool
rib_can_4o6_nhop(void)151 rib_can_4o6_nhop(void)
152 {
153 return (!!V_rib_route_ipv6_nexthop);
154 }
155 #endif
156
157 static bool
rib_can_multipath(struct rib_head * rh)158 rib_can_multipath(struct rib_head *rh)
159 {
160 int result;
161
162 CURVNET_SET(rh->rib_vnet);
163 result = !!V_rib_route_multipath;
164 CURVNET_RESTORE();
165
166 return (result);
167 }
168
169 /*
170 * Check is nhop is multipath-eligible.
171 * Avoid nhops without gateways and redirects.
172 *
173 * Returns 1 for multipath-eligible nexthop,
174 * 0 otherwise.
175 */
176 bool
nhop_can_multipath(const struct nhop_object * nh)177 nhop_can_multipath(const struct nhop_object *nh)
178 {
179
180 if ((nh->nh_flags & NHF_MULTIPATH) != 0)
181 return (1);
182 if ((nh->nh_flags & NHF_GATEWAY) == 0)
183 return (0);
184 if ((nh->nh_flags & NHF_REDIRECT) != 0)
185 return (0);
186
187 return (1);
188 }
189
190 static int
get_info_weight(const struct rt_addrinfo * info,uint32_t default_weight)191 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight)
192 {
193 uint32_t weight;
194
195 if (info->rti_mflags & RTV_WEIGHT)
196 weight = info->rti_rmx->rmx_weight;
197 else
198 weight = default_weight;
199 if (weight == 0)
200 weight = default_weight;
201
202 return (weight);
203 }
204
205 /*
206 * File-local concept for distingushing between the normal and
207 * RTF_PINNED routes tha can override the "normal" one.
208 */
209 #define NH_PRIORITY_HIGH 2
210 #define NH_PRIORITY_NORMAL 1
211 static int
get_prio_from_info(const struct rt_addrinfo * info)212 get_prio_from_info(const struct rt_addrinfo *info)
213 {
214 if (info->rti_flags & RTF_PINNED)
215 return (NH_PRIORITY_HIGH);
216 return (NH_PRIORITY_NORMAL);
217 }
218
219 static int
nhop_get_prio(const struct nhop_object * nh)220 nhop_get_prio(const struct nhop_object *nh)
221 {
222 if (NH_IS_PINNED(nh))
223 return (NH_PRIORITY_HIGH);
224 return (NH_PRIORITY_NORMAL);
225 }
226
227 /*
228 * Check if specified @gw matches gw data in the nexthop @nh.
229 *
230 * Returns true if matches, false otherwise.
231 */
232 bool
match_nhop_gw(const struct nhop_object * nh,const struct sockaddr * gw)233 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw)
234 {
235
236 if (nh->gw_sa.sa_family != gw->sa_family)
237 return (false);
238
239 switch (gw->sa_family) {
240 case AF_INET:
241 return (nh->gw4_sa.sin_addr.s_addr ==
242 ((const struct sockaddr_in *)gw)->sin_addr.s_addr);
243 case AF_INET6:
244 {
245 const struct sockaddr_in6 *gw6;
246 gw6 = (const struct sockaddr_in6 *)gw;
247
248 /*
249 * Currently (2020-09) IPv6 gws in kernel have their
250 * scope embedded. Once this becomes false, this code
251 * has to be revisited.
252 */
253 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr,
254 &gw6->sin6_addr))
255 return (true);
256 return (false);
257 }
258 case AF_LINK:
259 {
260 const struct sockaddr_dl *sdl;
261 sdl = (const struct sockaddr_dl *)gw;
262 return (nh->gwl_sa.sdl_index == sdl->sdl_index);
263 }
264 default:
265 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0);
266 }
267
268 /* NOTREACHED */
269 return (false);
270 }
271
272 /*
273 * Matches all nexthop with given @gw.
274 * Can be used as rib_filter_f callback.
275 */
276 int
rib_match_gw(const struct rtentry * rt,const struct nhop_object * nh,void * gw_sa)277 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa)
278 {
279 const struct sockaddr *gw = (const struct sockaddr *)gw_sa;
280
281 return (match_nhop_gw(nh, gw));
282 }
283
284 struct gw_filter_data {
285 const struct sockaddr *gw;
286 int count;
287 };
288
289 /*
290 * Matches first occurence of the gateway provided in @gwd
291 */
292 static int
match_gw_one(const struct rtentry * rt,const struct nhop_object * nh,void * _data)293 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data)
294 {
295 struct gw_filter_data *gwd = (struct gw_filter_data *)_data;
296
297 /* Return only first match to make rtsock happy */
298 if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0)
299 return (1);
300 return (0);
301 }
302
303 /*
304 * Checks if data in @info matches nexhop @nh.
305 *
306 * Returns 0 on success,
307 * ESRCH if not matched,
308 * ENOENT if filter function returned false
309 */
310 int
check_info_match_nhop(const struct rt_addrinfo * info,const struct rtentry * rt,const struct nhop_object * nh)311 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt,
312 const struct nhop_object *nh)
313 {
314 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY];
315
316 if (info->rti_filter != NULL) {
317 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0)
318 return (ENOENT);
319 else
320 return (0);
321 }
322 if ((gw != NULL) && !match_nhop_gw(nh, gw))
323 return (ESRCH);
324
325 return (0);
326 }
327
328 /*
329 * Runs exact prefix match based on @dst and @netmask.
330 * Returns matched @rtentry if found or NULL.
331 * If rtentry was found, saves nexthop / weight value into @rnd.
332 */
333 static struct rtentry *
lookup_prefix_bysa(struct rib_head * rnh,const struct sockaddr * dst,const struct sockaddr * netmask,struct route_nhop_data * rnd)334 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst,
335 const struct sockaddr *netmask, struct route_nhop_data *rnd)
336 {
337 struct rtentry *rt;
338
339 RIB_LOCK_ASSERT(rnh);
340
341 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head);
342 if (rt != NULL) {
343 rnd->rnd_nhop = rt->rt_nhop;
344 rnd->rnd_weight = rt->rt_weight;
345 } else {
346 rnd->rnd_nhop = NULL;
347 rnd->rnd_weight = 0;
348 }
349
350 return (rt);
351 }
352
353 struct rtentry *
lookup_prefix_rt(struct rib_head * rnh,const struct rtentry * rt,struct route_nhop_data * rnd)354 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt,
355 struct route_nhop_data *rnd)
356 {
357 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd));
358 }
359
360 /*
361 * Runs exact prefix match based on dst/netmask from @info.
362 * Assumes RIB lock is held.
363 * Returns matched @rtentry if found or NULL.
364 * If rtentry was found, saves nexthop / weight value into @rnd.
365 */
366 struct rtentry *
lookup_prefix(struct rib_head * rnh,const struct rt_addrinfo * info,struct route_nhop_data * rnd)367 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info,
368 struct route_nhop_data *rnd)
369 {
370 struct rtentry *rt;
371
372 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST],
373 info->rti_info[RTAX_NETMASK], rnd);
374
375 return (rt);
376 }
377
378 const struct rtentry *
rib_lookup_prefix_plen(struct rib_head * rnh,struct sockaddr * dst,int plen,struct route_nhop_data * rnd)379 rib_lookup_prefix_plen(struct rib_head *rnh, struct sockaddr *dst, int plen,
380 struct route_nhop_data *rnd)
381 {
382 union sockaddr_union mask_storage;
383 struct sockaddr *netmask = &mask_storage.sa;
384
385 if (fill_pxmask_family(dst->sa_family, plen, dst, &netmask))
386 return (lookup_prefix_bysa(rnh, dst, netmask, rnd));
387 return (NULL);
388 }
389
390 static bool
fill_pxmask_family(int family,int plen,struct sockaddr * _dst,struct sockaddr ** pmask)391 fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
392 struct sockaddr **pmask)
393 {
394 if (plen == -1) {
395 *pmask = NULL;
396 return (true);
397 }
398
399 switch (family) {
400 #ifdef INET
401 case AF_INET:
402 {
403 struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask);
404 struct sockaddr_in *dst= (struct sockaddr_in *)_dst;
405
406 memset(mask, 0, sizeof(*mask));
407 mask->sin_family = family;
408 mask->sin_len = sizeof(*mask);
409 if (plen == 32)
410 *pmask = NULL;
411 else if (plen > 32 || plen < 0)
412 return (false);
413 else {
414 uint32_t daddr, maddr;
415 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
416 mask->sin_addr.s_addr = maddr;
417 daddr = dst->sin_addr.s_addr;
418 daddr = htonl(ntohl(daddr) & ntohl(maddr));
419 dst->sin_addr.s_addr = daddr;
420 }
421 return (true);
422 }
423 break;
424 #endif
425 #ifdef INET6
426 case AF_INET6:
427 {
428 struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask);
429 struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst;
430
431 memset(mask, 0, sizeof(*mask));
432 mask->sin6_family = family;
433 mask->sin6_len = sizeof(*mask);
434 if (plen == 128)
435 *pmask = NULL;
436 else if (plen > 128 || plen < 0)
437 return (false);
438 else {
439 ip6_writemask(&mask->sin6_addr, plen);
440 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr);
441 }
442 return (true);
443 }
444 break;
445 #endif
446 }
447 return (false);
448 }
449
450 /*
451 * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd
452 * to the routing table.
453 *
454 * @fibnum: verified kernel rtable id to insert route to
455 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
456 * @plen: prefix length (or -1 if host route or not applicable for AF)
457 * @op_flags: combination of RTM_F_ flags
458 * @rc: storage to report operation result
459 *
460 * Returns 0 on success.
461 */
462 int
rib_add_route_px(uint32_t fibnum,struct sockaddr * dst,int plen,struct route_nhop_data * rnd,int op_flags,struct rib_cmd_info * rc)463 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
464 struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc)
465 {
466 union sockaddr_union mask_storage;
467 struct sockaddr *netmask = &mask_storage.sa;
468 struct rtentry *rt = NULL;
469
470 NET_EPOCH_ASSERT();
471
472 bzero(rc, sizeof(struct rib_cmd_info));
473 rc->rc_cmd = RTM_ADD;
474
475 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
476 if (rnh == NULL)
477 return (EAFNOSUPPORT);
478
479 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
480 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
481 return (EINVAL);
482 }
483
484 if (op_flags & RTM_F_CREATE) {
485 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) {
486 FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed");
487 return (ENOMEM);
488 }
489 } else {
490 struct route_nhop_data rnd_tmp;
491 RIB_RLOCK_TRACKER;
492
493 RIB_RLOCK(rnh);
494 rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd_tmp);
495 RIB_RUNLOCK(rnh);
496
497 if (rt == NULL)
498 return (ESRCH);
499 }
500
501 return (add_route_flags(rnh, rt, rnd, op_flags, rc));
502 }
503
504 /*
505 * Attempts to delete @dst/plen prefix matching gateway @gw from the
506 * routing rable.
507 *
508 * @fibnum: rtable id to remove route from
509 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
510 * @plen: prefix length (or -1 if host route or not applicable for AF)
511 * @gw: gateway to match
512 * @op_flags: combination of RTM_F_ flags
513 * @rc: storage to report operation result
514 *
515 * Returns 0 on success.
516 */
517 int
rib_del_route_px_gw(uint32_t fibnum,struct sockaddr * dst,int plen,const struct sockaddr * gw,int op_flags,struct rib_cmd_info * rc)518 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen,
519 const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc)
520 {
521 struct gw_filter_data gwd = { .gw = gw };
522
523 return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc));
524 }
525
526 /*
527 * Attempts to delete @dst/plen prefix matching @filter_func from the
528 * routing rable.
529 *
530 * @fibnum: rtable id to remove route from
531 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
532 * @plen: prefix length (or -1 if host route or not applicable for AF)
533 * @filter_func: func to be called for each nexthop of the prefix for matching
534 * @filter_arg: argument to pass to @filter_func
535 * @op_flags: combination of RTM_F_ flags
536 * @rc: storage to report operation result
537 *
538 * Returns 0 on success.
539 */
540 int
rib_del_route_px(uint32_t fibnum,struct sockaddr * dst,int plen,rib_filter_f_t * filter_func,void * filter_arg,int op_flags,struct rib_cmd_info * rc)541 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
542 rib_filter_f_t *filter_func, void *filter_arg, int op_flags,
543 struct rib_cmd_info *rc)
544 {
545 union sockaddr_union mask_storage;
546 struct sockaddr *netmask = &mask_storage.sa;
547 int error;
548
549 NET_EPOCH_ASSERT();
550
551 bzero(rc, sizeof(struct rib_cmd_info));
552 rc->rc_cmd = RTM_DELETE;
553
554 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
555 if (rnh == NULL)
556 return (EAFNOSUPPORT);
557
558 if (dst->sa_len > sizeof(mask_storage)) {
559 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len);
560 return (EINVAL);
561 }
562
563 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
564 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
565 return (EINVAL);
566 }
567
568 int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL;
569
570 RIB_WLOCK(rnh);
571 struct route_nhop_data rnd;
572 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
573 if (rt != NULL) {
574 error = rt_delete_conditional(rnh, rt, prio, filter_func,
575 filter_arg, rc);
576 } else
577 error = ESRCH;
578 RIB_WUNLOCK(rnh);
579
580 if (error != 0)
581 return (error);
582
583 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
584
585 if (rc->rc_cmd == RTM_DELETE)
586 rt_free(rc->rc_rt);
587 else {
588 /*
589 * Deleting 1 path may result in RTM_CHANGE to
590 * a different mpath group/nhop.
591 * Free old mpath group.
592 */
593 nhop_free_any(rc->rc_nh_old);
594 }
595
596 return (0);
597 }
598
599 /*
600 * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh.
601 * @rt: route to copy.
602 * @rnd_src: nhop and weight. Multipath routes are not supported
603 * @rh_dst: target rtable.
604 * @rc: operation result storage
605 *
606 * Return 0 on success.
607 */
608 int
rib_copy_route(struct rtentry * rt,const struct route_nhop_data * rnd_src,struct rib_head * rh_dst,struct rib_cmd_info * rc)609 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src,
610 struct rib_head *rh_dst, struct rib_cmd_info *rc)
611 {
612 struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop;
613 int error;
614
615 MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0);
616
617 IF_DEBUG_LEVEL(LOG_DEBUG2) {
618 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE];
619 nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf));
620 rt_print_buf(rt, rtbuf, sizeof(rtbuf));
621 FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u",
622 rtbuf, nhbuf, nhop_get_fibnum(nh_src));
623 }
624 struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family);
625 if (nh == NULL) {
626 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop");
627 return (ENOMEM);
628 }
629 nhop_copy(nh, rnd_src->rnd_nhop);
630 nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop));
631 nhop_set_fibnum(nh, rh_dst->rib_fibnum);
632 nh = nhop_get_nhop_internal(rh_dst, nh, &error);
633 if (error != 0) {
634 FIB_RH_LOG(LOG_INFO, rh_dst,
635 "unable to finalize new nexthop: error %d", error);
636 return (ENOMEM);
637 }
638
639 struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt));
640 if (rt_new == NULL) {
641 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry");
642 nhop_free(nh);
643 return (ENOMEM);
644 }
645
646 struct route_nhop_data rnd = {
647 .rnd_nhop = nh,
648 .rnd_weight = rnd_src->rnd_weight
649 };
650 int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0);
651 error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc);
652
653 if (error != 0) {
654 IF_DEBUG_LEVEL(LOG_DEBUG2) {
655 char buf[NHOP_PRINT_BUFSIZE];
656 rt_print_buf(rt, buf, sizeof(buf));
657 FIB_RH_LOG(LOG_DEBUG, rh_dst,
658 "Unable to add route %s: error %d", buf, error);
659 }
660 nhop_free(nh);
661 }
662 return (error);
663 }
664
665 /*
666 * Adds route defined by @info into the kernel table specified by @fibnum and
667 * sa_family in @info->rti_info[RTAX_DST].
668 *
669 * Returns 0 on success and fills in operation metadata into @rc.
670 */
671 int
rib_add_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)672 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info,
673 struct rib_cmd_info *rc)
674 {
675 struct rib_head *rnh;
676 int error;
677
678 NET_EPOCH_ASSERT();
679
680 rnh = get_rnh(fibnum, info);
681 if (rnh == NULL)
682 return (EAFNOSUPPORT);
683
684 /*
685 * Check consistency between RTF_HOST flag and netmask
686 * existence.
687 */
688 if (info->rti_flags & RTF_HOST)
689 info->rti_info[RTAX_NETMASK] = NULL;
690 else if (info->rti_info[RTAX_NETMASK] == NULL) {
691 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask");
692 return (EINVAL);
693 }
694
695 bzero(rc, sizeof(struct rib_cmd_info));
696 rc->rc_cmd = RTM_ADD;
697
698 error = add_route_byinfo(rnh, info, rc);
699 if (error == 0)
700 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
701
702 return (error);
703 }
704
705 static int
add_route_byinfo(struct rib_head * rnh,struct rt_addrinfo * info,struct rib_cmd_info * rc)706 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
707 struct rib_cmd_info *rc)
708 {
709 struct route_nhop_data rnd_add;
710 struct nhop_object *nh;
711 struct rtentry *rt;
712 struct sockaddr *dst, *gateway, *netmask;
713 int error;
714
715 dst = info->rti_info[RTAX_DST];
716 gateway = info->rti_info[RTAX_GATEWAY];
717 netmask = info->rti_info[RTAX_NETMASK];
718
719 if ((info->rti_flags & RTF_GATEWAY) && !gateway) {
720 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw");
721 return (EINVAL);
722 }
723 if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) {
724 FIB_RH_LOG(LOG_DEBUG, rnh,
725 "error: invalid dst/gateway family combination (%d, %d)",
726 dst->sa_family, gateway->sa_family);
727 return (EINVAL);
728 }
729
730 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) {
731 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d",
732 dst->sa_len);
733 return (EINVAL);
734 }
735
736 if (info->rti_ifa == NULL) {
737 error = rt_getifa_fib(info, rnh->rib_fibnum);
738 if (error)
739 return (error);
740 }
741
742 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL)
743 return (ENOBUFS);
744
745 error = nhop_create_from_info(rnh, info, &nh);
746 if (error != 0) {
747 rt_free_immediate(rt);
748 return (error);
749 }
750
751 rnd_add.rnd_nhop = nh;
752 rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT);
753
754 int op_flags = RTM_F_CREATE;
755
756 /*
757 * Set the desired action when the route already exists:
758 * If RTF_PINNED is present, assume the direct kernel routes that cannot be multipath.
759 * Otherwise, append the path.
760 */
761 op_flags |= (info->rti_flags & RTF_PINNED) ? RTM_F_REPLACE : RTM_F_APPEND;
762
763 return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc));
764 }
765
766 static int
add_route_flags(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_add,int op_flags,struct rib_cmd_info * rc)767 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add,
768 int op_flags, struct rib_cmd_info *rc)
769 {
770 struct route_nhop_data rnd_orig;
771 struct nhop_object *nh;
772 struct rtentry *rt_orig;
773 int error = 0;
774
775 MPASS(rt != NULL);
776
777 nh = rnd_add->rnd_nhop;
778
779 RIB_WLOCK(rnh);
780
781 rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig);
782
783 if (rt_orig == NULL) {
784 if (op_flags & RTM_F_CREATE)
785 error = add_route(rnh, rt, rnd_add, rc);
786 else
787 error = ESRCH; /* no entry but creation was not required */
788 RIB_WUNLOCK(rnh);
789 if (error != 0)
790 goto out;
791 return (0);
792 }
793
794 if (op_flags & RTM_F_EXCL) {
795 /* We have existing route in the RIB but not allowed to replace. */
796 RIB_WUNLOCK(rnh);
797 error = EEXIST;
798 goto out;
799 }
800
801 /* Now either append or replace */
802 if (op_flags & RTM_F_REPLACE) {
803 if (nhop_get_prio(rnd_orig.rnd_nhop) == NH_PRIORITY_HIGH) {
804 /* Old path is "better" (e.g. has PINNED flag set) */
805 RIB_WUNLOCK(rnh);
806 error = EEXIST;
807 goto out;
808 }
809 change_route(rnh, rt_orig, rnd_add, rc);
810 RIB_WUNLOCK(rnh);
811 nh = rc->rc_nh_old;
812 goto out;
813 }
814
815 RIB_WUNLOCK(rnh);
816
817 if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) &&
818 nhop_can_multipath(rnd_add->rnd_nhop) &&
819 nhop_can_multipath(rnd_orig.rnd_nhop)) {
820
821 for (int i = 0; i < RIB_MAX_RETRIES; i++) {
822 error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig,
823 op_flags, rc);
824 if (error != EAGAIN)
825 break;
826 RTSTAT_INC(rts_add_retry);
827 }
828
829 /*
830 * Original nhop reference is unused in any case.
831 */
832 nhop_free_any(rnd_add->rnd_nhop);
833 if (op_flags & RTM_F_CREATE) {
834 if (error != 0 || rc->rc_cmd != RTM_ADD)
835 rt_free_immediate(rt);
836 }
837 return (error);
838 }
839 /* Out of options - free state and return error */
840 error = EEXIST;
841 out:
842 if (op_flags & RTM_F_CREATE)
843 rt_free_immediate(rt);
844 nhop_free_any(nh);
845
846 return (error);
847 }
848
849 static int
add_route_flags_mpath(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_add,struct route_nhop_data * rnd_orig,int op_flags,struct rib_cmd_info * rc)850 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
851 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
852 int op_flags, struct rib_cmd_info *rc)
853 {
854 RIB_RLOCK_TRACKER;
855 struct route_nhop_data rnd_new;
856 int error = 0;
857
858 if (!NH_IS_NHGRP(rnd_add->rnd_nhop))
859 error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new);
860 else
861 error = nhgrp_get_merge_group(rnh, rnd_orig, rnd_add, &rnd_new);
862 if (error != 0) {
863 if (error == EAGAIN) {
864 /*
865 * Group creation failed, most probably because
866 * @rnd_orig data got scheduled for deletion.
867 * Refresh @rnd_orig data and retry.
868 */
869 RIB_RLOCK(rnh);
870 lookup_prefix_rt(rnh, rt, rnd_orig);
871 RIB_RUNLOCK(rnh);
872 if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) {
873 /* In this iteration route doesn't exist */
874 error = ENOENT;
875 }
876 }
877 return (error);
878 }
879 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
880 if (error != 0)
881 return (error);
882
883 if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) {
884 /*
885 * First multipath route got installed. Enable local
886 * outbound connections hashing.
887 */
888 if (bootverbose)
889 printf("FIB: enabled flowid calculation for locally-originated packets\n");
890 V_fib_hash_outbound = 1;
891 }
892
893 return (0);
894 }
895
896 /*
897 * Removes route defined by @info from the kernel table specified by @fibnum and
898 * sa_family in @info->rti_info[RTAX_DST].
899 *
900 * Returns 0 on success and fills in operation metadata into @rc.
901 */
902 int
rib_del_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)903 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc)
904 {
905 struct rib_head *rnh;
906 struct sockaddr *dst, *netmask;
907 struct sockaddr_storage mdst;
908 int error;
909
910 NET_EPOCH_ASSERT();
911
912 rnh = get_rnh(fibnum, info);
913 if (rnh == NULL)
914 return (EAFNOSUPPORT);
915
916 bzero(rc, sizeof(struct rib_cmd_info));
917 rc->rc_cmd = RTM_DELETE;
918
919 dst = info->rti_info[RTAX_DST];
920 netmask = info->rti_info[RTAX_NETMASK];
921
922 if (netmask != NULL) {
923 /* Ensure @dst is always properly masked */
924 if (dst->sa_len > sizeof(mdst)) {
925 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large");
926 return (EINVAL);
927 }
928 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
929 dst = (struct sockaddr *)&mdst;
930 }
931
932 rib_filter_f_t *filter_func = NULL;
933 void *filter_arg = NULL;
934 struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] };
935
936 if (info->rti_filter != NULL) {
937 filter_func = info->rti_filter;
938 filter_arg = info->rti_filterdata;
939 } else if (gwd.gw != NULL) {
940 filter_func = match_gw_one;
941 filter_arg = &gwd;
942 }
943
944 int prio = get_prio_from_info(info);
945
946 RIB_WLOCK(rnh);
947 struct route_nhop_data rnd;
948 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
949 if (rt != NULL) {
950 error = rt_delete_conditional(rnh, rt, prio, filter_func,
951 filter_arg, rc);
952 } else
953 error = ESRCH;
954 RIB_WUNLOCK(rnh);
955
956 if (error != 0)
957 return (error);
958
959 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
960
961 if (rc->rc_cmd == RTM_DELETE)
962 rt_free(rc->rc_rt);
963 else {
964 /*
965 * Deleting 1 path may result in RTM_CHANGE to
966 * a different mpath group/nhop.
967 * Free old mpath group.
968 */
969 nhop_free_any(rc->rc_nh_old);
970 }
971
972 return (0);
973 }
974
975 /*
976 * Conditionally unlinks rtentry paths from @rnh matching @cb.
977 * Returns 0 on success with operation result stored in @rc.
978 * On error, returns:
979 * ESRCH - if prefix was not found or filter function failed to match
980 * EADDRINUSE - if trying to delete higher priority route.
981 */
982 static int
rt_delete_conditional(struct rib_head * rnh,struct rtentry * rt,int prio,rib_filter_f_t * cb,void * cbdata,struct rib_cmd_info * rc)983 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
984 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc)
985 {
986 struct nhop_object *nh = rt->rt_nhop;
987
988 if (NH_IS_NHGRP(nh)) {
989 struct nhgrp_object *nhg = (struct nhgrp_object *)nh;
990 struct route_nhop_data rnd;
991 int error;
992
993 if (cb == NULL)
994 return (ESRCH);
995 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd);
996 if (error == 0) {
997 if (rnd.rnd_nhgrp == nhg) {
998 /* No match, unreference new group and return. */
999 nhop_free_any(rnd.rnd_nhop);
1000 return (ESRCH);
1001 }
1002 error = change_route(rnh, rt, &rnd, rc);
1003 }
1004 return (error);
1005 }
1006 if (cb != NULL && !cb(rt, nh, cbdata))
1007 return (ESRCH);
1008
1009 if (prio < nhop_get_prio(nh))
1010 return (EADDRINUSE);
1011
1012 return (delete_route(rnh, rt, rc));
1013 }
1014
1015 int
rib_change_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)1016 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
1017 struct rib_cmd_info *rc)
1018 {
1019 RIB_RLOCK_TRACKER;
1020 struct route_nhop_data rnd_orig;
1021 struct rib_head *rnh;
1022 struct rtentry *rt;
1023 int error;
1024
1025 NET_EPOCH_ASSERT();
1026
1027 rnh = get_rnh(fibnum, info);
1028 if (rnh == NULL)
1029 return (EAFNOSUPPORT);
1030
1031 bzero(rc, sizeof(struct rib_cmd_info));
1032 rc->rc_cmd = RTM_CHANGE;
1033
1034 /* Check if updated gateway exists */
1035 if ((info->rti_flags & RTF_GATEWAY) &&
1036 (info->rti_info[RTAX_GATEWAY] == NULL)) {
1037
1038 /*
1039 * route(8) adds RTF_GATEWAY flag if -interface is not set.
1040 * Remove RTF_GATEWAY to enforce consistency and maintain
1041 * compatibility..
1042 */
1043 info->rti_flags &= ~RTF_GATEWAY;
1044 }
1045
1046 /*
1047 * route change is done in multiple steps, with dropping and
1048 * reacquiring lock. In the situations with multiple processes
1049 * changes the same route in can lead to the case when route
1050 * is changed between the steps. Address it by retrying the operation
1051 * multiple times before failing.
1052 */
1053
1054 RIB_RLOCK(rnh);
1055 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
1056 info->rti_info[RTAX_NETMASK], &rnh->head);
1057
1058 if (rt == NULL) {
1059 RIB_RUNLOCK(rnh);
1060 return (ESRCH);
1061 }
1062
1063 rnd_orig.rnd_nhop = rt->rt_nhop;
1064 rnd_orig.rnd_weight = rt->rt_weight;
1065
1066 RIB_RUNLOCK(rnh);
1067
1068 for (int i = 0; i < RIB_MAX_RETRIES; i++) {
1069 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc);
1070 if (error != EAGAIN)
1071 break;
1072 }
1073
1074 return (error);
1075 }
1076
1077 static int
change_nhop(struct rib_head * rnh,struct rt_addrinfo * info,struct nhop_object * nh_orig,struct nhop_object ** nh_new)1078 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
1079 struct nhop_object *nh_orig, struct nhop_object **nh_new)
1080 {
1081 int error;
1082
1083 /*
1084 * New gateway could require new ifaddr, ifp;
1085 * flags may also be different; ifp may be specified
1086 * by ll sockaddr when protocol address is ambiguous
1087 */
1088 if (((nh_orig->nh_flags & NHF_GATEWAY) &&
1089 info->rti_info[RTAX_GATEWAY] != NULL) ||
1090 info->rti_info[RTAX_IFP] != NULL ||
1091 (info->rti_info[RTAX_IFA] != NULL &&
1092 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) {
1093 error = rt_getifa_fib(info, rnh->rib_fibnum);
1094
1095 if (error != 0) {
1096 info->rti_ifa = NULL;
1097 return (error);
1098 }
1099 }
1100
1101 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new);
1102 info->rti_ifa = NULL;
1103
1104 return (error);
1105 }
1106
1107 static int
change_mpath_route(struct rib_head * rnh,struct rtentry * rt,struct rt_addrinfo * info,struct route_nhop_data * rnd_orig,struct rib_cmd_info * rc)1108 change_mpath_route(struct rib_head *rnh, struct rtentry *rt,
1109 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1110 struct rib_cmd_info *rc)
1111 {
1112 int error = 0, found_idx = 0;
1113 struct nhop_object *nh_orig = NULL, *nh_new;
1114 struct route_nhop_data rnd_new = {};
1115 const struct weightened_nhop *wn = NULL;
1116 struct weightened_nhop *wn_new;
1117 uint32_t num_nhops;
1118
1119 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops);
1120 for (int i = 0; i < num_nhops; i++) {
1121 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) {
1122 nh_orig = wn[i].nh;
1123 found_idx = i;
1124 break;
1125 }
1126 }
1127
1128 if (nh_orig == NULL)
1129 return (ESRCH);
1130
1131 error = change_nhop(rnh, info, nh_orig, &nh_new);
1132 if (error != 0)
1133 return (error);
1134
1135 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop),
1136 M_TEMP, M_NOWAIT | M_ZERO);
1137 if (wn_new == NULL) {
1138 nhop_free(nh_new);
1139 return (EAGAIN);
1140 }
1141
1142 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop));
1143 wn_new[found_idx].nh = nh_new;
1144 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight);
1145
1146 error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp);
1147 nhop_free(nh_new);
1148 free(wn_new, M_TEMP);
1149
1150 if (error != 0)
1151 return (error);
1152
1153 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1154
1155 return (error);
1156 }
1157
1158 static int
change_route_byinfo(struct rib_head * rnh,struct rtentry * rt,struct rt_addrinfo * info,struct route_nhop_data * rnd_orig,struct rib_cmd_info * rc)1159 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
1160 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1161 struct rib_cmd_info *rc)
1162 {
1163 int error = 0;
1164 struct nhop_object *nh_orig;
1165 struct route_nhop_data rnd_new;
1166
1167 nh_orig = rnd_orig->rnd_nhop;
1168 if (nh_orig == NULL)
1169 return (ESRCH);
1170
1171 if (NH_IS_NHGRP(nh_orig))
1172 return (change_mpath_route(rnh, rt, info, rnd_orig, rc));
1173
1174 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight);
1175 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop);
1176 if (error != 0)
1177 return (error);
1178 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1179
1180 return (error);
1181 }
1182
1183 static void
update_tmproutes_mpath(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd)1184 update_tmproutes_mpath(struct rib_head *rnh, struct rtentry *rt,
1185 struct route_nhop_data *rnd)
1186 {
1187 const struct weightened_nhop *wn;
1188 uint32_t i, nhops;
1189
1190 if (NH_IS_NHGRP(rnd->rnd_nhop)) {
1191 wn = nhgrp_get_nhops(rnd->rnd_nhgrp, &nhops);
1192
1193 for (i = 0; i < nhops; i++) {
1194 if (nhop_get_expire(wn[i].nh) == 0)
1195 continue;
1196
1197 tmproutes_update(rnh, rt, wn[i].nh);
1198 }
1199 } else if (nhop_get_expire(rnd->rnd_nhop) != 0)
1200 tmproutes_update(rnh, rt, rnd->rnd_nhop);
1201 }
1202
1203 /*
1204 * Insert @rt with nhop data from @rnd_new to @rnh.
1205 * Returns 0 on success and stores operation results in @rc.
1206 */
1207 static int
add_route(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd,struct rib_cmd_info * rc)1208 add_route(struct rib_head *rnh, struct rtentry *rt,
1209 struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1210 {
1211 struct radix_node *rn;
1212
1213 RIB_WLOCK_ASSERT(rnh);
1214
1215 rt->rt_nhop = rnd->rnd_nhop;
1216 rt->rt_weight = rnd->rnd_weight;
1217 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes);
1218
1219 if (rn != NULL) {
1220 update_tmproutes_mpath(rnh, rt, rnd);
1221 /* Finalize notification */
1222 rib_bump_gen(rnh);
1223 rnh->rnh_prefixes++;
1224
1225 rc->rc_cmd = RTM_ADD;
1226 rc->rc_rt = rt;
1227 rc->rc_nh_old = NULL;
1228 rc->rc_nh_new = rnd->rnd_nhop;
1229 rc->rc_nh_weight = rnd->rnd_weight;
1230
1231 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1232 return (0);
1233 }
1234
1235 /* Existing route or memory allocation failure. */
1236 return (EEXIST);
1237 }
1238
1239 /*
1240 * Unconditionally deletes @rt from @rnh.
1241 */
1242 static int
delete_route(struct rib_head * rnh,struct rtentry * rt,struct rib_cmd_info * rc)1243 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc)
1244 {
1245 RIB_WLOCK_ASSERT(rnh);
1246
1247 /* Route deletion requested. */
1248 struct radix_node *rn;
1249
1250 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head);
1251 if (rn == NULL)
1252 return (ESRCH);
1253 rt = RNTORT(rn);
1254 rt->rte_flags &= ~RTF_UP;
1255
1256 rib_bump_gen(rnh);
1257 rnh->rnh_prefixes--;
1258
1259 rc->rc_cmd = RTM_DELETE;
1260 rc->rc_rt = rt;
1261 rc->rc_nh_old = rt->rt_nhop;
1262 rc->rc_nh_new = NULL;
1263 rc->rc_nh_weight = rt->rt_weight;
1264
1265 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1266
1267 return (0);
1268 }
1269
1270 /*
1271 * Switch @rt nhop/weigh to the ones specified in @rnd.
1272 * Returns 0 on success.
1273 */
1274 int
change_route(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd,struct rib_cmd_info * rc)1275 change_route(struct rib_head *rnh, struct rtentry *rt,
1276 struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1277 {
1278 struct nhop_object *nh_orig;
1279
1280 RIB_WLOCK_ASSERT(rnh);
1281
1282 nh_orig = rt->rt_nhop;
1283
1284 if (rnd->rnd_nhop == NULL)
1285 return (delete_route(rnh, rt, rc));
1286
1287 /* Changing nexthop & weight to a new one */
1288 rt->rt_nhop = rnd->rnd_nhop;
1289 rt->rt_weight = rnd->rnd_weight;
1290 update_tmproutes_mpath(rnh, rt, rnd);
1291
1292 /* Finalize notification */
1293 rib_bump_gen(rnh);
1294 rc->rc_cmd = RTM_CHANGE;
1295 rc->rc_rt = rt;
1296 rc->rc_nh_old = nh_orig;
1297 rc->rc_nh_new = rnd->rnd_nhop;
1298 rc->rc_nh_weight = rnd->rnd_weight;
1299
1300 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1301
1302 return (0);
1303 }
1304
1305 /*
1306 * Conditionally update route nhop/weight IFF data in @nhd_orig is
1307 * consistent with the current route data.
1308 * Nexthop in @nhd_new is consumed.
1309 */
1310 int
change_route_conditional(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_orig,struct route_nhop_data * rnd_new,struct rib_cmd_info * rc)1311 change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
1312 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new,
1313 struct rib_cmd_info *rc)
1314 {
1315 struct rtentry *rt_new;
1316 int error = 0;
1317
1318 IF_DEBUG_LEVEL(LOG_DEBUG2) {
1319 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE];
1320 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE);
1321 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE);
1322 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family,
1323 "trying change %s -> %s", buf_old, buf_new);
1324 }
1325 RIB_WLOCK(rnh);
1326
1327 struct route_nhop_data rnd;
1328 rt_new = lookup_prefix_rt(rnh, rt, &rnd);
1329
1330 if (rt_new == NULL) {
1331 if (rnd_orig->rnd_nhop == NULL)
1332 error = add_route(rnh, rt, rnd_new, rc);
1333 else {
1334 /*
1335 * Prefix does not exist, which was not our assumption.
1336 * Update @rnd_orig with the new data and return
1337 */
1338 rnd_orig->rnd_nhop = NULL;
1339 rnd_orig->rnd_weight = 0;
1340 error = EAGAIN;
1341 }
1342 } else {
1343 /* Prefix exists, try to update */
1344 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) {
1345 /*
1346 * Nhop/mpath group hasn't changed. Flip
1347 * to the new precalculated one and return
1348 */
1349 error = change_route(rnh, rt_new, rnd_new, rc);
1350 } else {
1351 /* Update and retry */
1352 rnd_orig->rnd_nhop = rt_new->rt_nhop;
1353 rnd_orig->rnd_weight = rt_new->rt_weight;
1354 error = EAGAIN;
1355 }
1356 }
1357
1358 RIB_WUNLOCK(rnh);
1359
1360 if (error == 0) {
1361 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
1362
1363 if (rnd_orig->rnd_nhop != NULL)
1364 nhop_free_any(rnd_orig->rnd_nhop);
1365
1366 } else {
1367 if (rnd_new->rnd_nhop != NULL)
1368 nhop_free_any(rnd_new->rnd_nhop);
1369 }
1370
1371 return (error);
1372 }
1373
1374 /*
1375 * Performs modification of routing table specificed by @action.
1376 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST].
1377 * Needs to be run in network epoch.
1378 *
1379 * Returns 0 on success and fills in @rc with action result.
1380 */
1381 int
rib_action(uint32_t fibnum,int action,struct rt_addrinfo * info,struct rib_cmd_info * rc)1382 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info,
1383 struct rib_cmd_info *rc)
1384 {
1385 int error;
1386
1387 switch (action) {
1388 case RTM_ADD:
1389 error = rib_add_route(fibnum, info, rc);
1390 break;
1391 case RTM_DELETE:
1392 error = rib_del_route(fibnum, info, rc);
1393 break;
1394 case RTM_CHANGE:
1395 error = rib_change_route(fibnum, info, rc);
1396 break;
1397 default:
1398 error = ENOTSUP;
1399 }
1400
1401 return (error);
1402 }
1403
1404 struct rt_delinfo
1405 {
1406 struct rib_head *rnh;
1407 struct rtentry *head;
1408 rib_filter_f_t *filter_f;
1409 void *filter_arg;
1410 int prio;
1411 struct rib_cmd_info rc;
1412 };
1413
1414 /*
1415 * Conditionally unlinks rtenties or paths from radix tree based
1416 * on the callback data passed in @arg.
1417 */
1418 static int
rt_checkdelroute(struct radix_node * rn,void * arg)1419 rt_checkdelroute(struct radix_node *rn, void *arg)
1420 {
1421 struct rt_delinfo *di = (struct rt_delinfo *)arg;
1422 struct rtentry *rt = (struct rtentry *)rn;
1423
1424 if (rt_delete_conditional(di->rnh, rt, di->prio,
1425 di->filter_f, di->filter_arg, &di->rc) != 0)
1426 return (0);
1427
1428 /*
1429 * Add deleted rtentries to the list to GC them
1430 * after dropping the lock.
1431 *
1432 * XXX: Delayed notifications not implemented
1433 * for nexthop updates.
1434 */
1435 if (di->rc.rc_cmd == RTM_DELETE) {
1436 /* Add to the list and return */
1437 rt->rt_chain = di->head;
1438 di->head = rt;
1439 } else {
1440 /*
1441 * RTM_CHANGE to a different nexthop or nexthop group.
1442 * Free old multipath group.
1443 */
1444 nhop_free_any(di->rc.rc_nh_old);
1445 }
1446
1447 return (0);
1448 }
1449
1450 /*
1451 * Iterates over a routing table specified by @fibnum and @family and
1452 * deletes elements marked by @filter_f.
1453 * @fibnum: rtable id
1454 * @family: AF_ address family
1455 * @filter_f: function returning non-zero value for items to delete
1456 * @arg: data to pass to the @filter_f function
1457 * @report: true if rtsock notification is needed.
1458 */
1459 void
rib_walk_del(u_int fibnum,int family,rib_filter_f_t * filter_f,void * filter_arg,bool report)1460 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg,
1461 bool report)
1462 {
1463 struct rib_head *rnh;
1464 struct rtentry *rt;
1465 struct nhop_object *nh;
1466 struct epoch_tracker et;
1467
1468 rnh = rt_tables_get_rnh(fibnum, family);
1469 if (rnh == NULL)
1470 return;
1471
1472 struct rt_delinfo di = {
1473 .rnh = rnh,
1474 .filter_f = filter_f,
1475 .filter_arg = filter_arg,
1476 .prio = NH_PRIORITY_NORMAL,
1477 };
1478
1479 NET_EPOCH_ENTER(et);
1480
1481 RIB_WLOCK(rnh);
1482 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
1483 RIB_WUNLOCK(rnh);
1484
1485 /* We might have something to reclaim. */
1486 bzero(&di.rc, sizeof(di.rc));
1487 di.rc.rc_cmd = RTM_DELETE;
1488 while (di.head != NULL) {
1489 rt = di.head;
1490 di.head = rt->rt_chain;
1491 rt->rt_chain = NULL;
1492 nh = rt->rt_nhop;
1493
1494 di.rc.rc_rt = rt;
1495 di.rc.rc_nh_old = nh;
1496 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc);
1497
1498 if (report) {
1499 struct nhgrp_object *nhg;
1500 const struct weightened_nhop *wn;
1501 uint32_t num_nhops;
1502 if (NH_IS_NHGRP(nh)) {
1503 nhg = (struct nhgrp_object *)nh;
1504 wn = nhgrp_get_nhops(nhg, &num_nhops);
1505 for (int i = 0; i < num_nhops; i++)
1506 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum);
1507 } else
1508 rt_routemsg(RTM_DELETE, rt, nh, fibnum);
1509 }
1510 rt_free(rt);
1511 }
1512
1513 NET_EPOCH_EXIT(et);
1514 }
1515
1516 static int
rt_delete_unconditional(struct radix_node * rn,void * arg)1517 rt_delete_unconditional(struct radix_node *rn, void *arg)
1518 {
1519 struct rtentry *rt = RNTORT(rn);
1520 struct rib_head *rnh = (struct rib_head *)arg;
1521
1522 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head);
1523 if (RNTORT(rn) == rt)
1524 rt_free(rt);
1525
1526 return (0);
1527 }
1528
1529 /*
1530 * Removes all routes from the routing table without executing notifications.
1531 * rtentres will be removed after the end of a current epoch.
1532 */
1533 static void
rib_flush_routes(struct rib_head * rnh)1534 rib_flush_routes(struct rib_head *rnh)
1535 {
1536 RIB_WLOCK(rnh);
1537 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh);
1538 RIB_WUNLOCK(rnh);
1539 }
1540
1541 void
rib_flush_routes_family(int family)1542 rib_flush_routes_family(int family)
1543 {
1544 struct rib_head *rnh;
1545
1546 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
1547 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL)
1548 rib_flush_routes(rnh);
1549 }
1550 }
1551
1552 const char *
rib_print_family(int family)1553 rib_print_family(int family)
1554 {
1555 switch (family) {
1556 case AF_INET:
1557 return ("inet");
1558 case AF_INET6:
1559 return ("inet6");
1560 case AF_LINK:
1561 return ("link");
1562 }
1563 return ("unknown");
1564 }
1565
1566