1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1980, 1986, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95 32 * $FreeBSD$ 33 */ 34 /************************************************************************ 35 * Note: In this file a 'fib' is a "forwarding information base" * 36 * Which is the new name for an in kernel routing (next hop) table. * 37 ***********************************************************************/ 38 39 #include "opt_inet.h" 40 #include "opt_inet6.h" 41 #include "opt_mrouting.h" 42 #include "opt_mpath.h" 43 #include "opt_route.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/malloc.h> 48 #include <sys/mbuf.h> 49 #include <sys/socket.h> 50 #include <sys/sysctl.h> 51 #include <sys/syslog.h> 52 #include <sys/sysproto.h> 53 #include <sys/proc.h> 54 #include <sys/domain.h> 55 #include <sys/eventhandler.h> 56 #include <sys/kernel.h> 57 #include <sys/lock.h> 58 #include <sys/rmlock.h> 59 60 #include <net/if.h> 61 #include <net/if_var.h> 62 #include <net/if_dl.h> 63 #include <net/route.h> 64 #include <net/route/route_var.h> 65 #include <net/route/nhop.h> 66 #include <net/route/shared.h> 67 #include <net/vnet.h> 68 69 #ifdef RADIX_MPATH 70 #include <net/radix_mpath.h> 71 #endif 72 73 #include <netinet/in.h> 74 #include <netinet/ip_mroute.h> 75 76 #include <vm/uma.h> 77 78 #define RT_MAXFIBS UINT16_MAX 79 80 /* Kernel config default option. */ 81 #ifdef ROUTETABLES 82 #if ROUTETABLES <= 0 83 #error "ROUTETABLES defined too low" 84 #endif 85 #if ROUTETABLES > RT_MAXFIBS 86 #error "ROUTETABLES defined too big" 87 #endif 88 #define RT_NUMFIBS ROUTETABLES 89 #endif /* ROUTETABLES */ 90 /* Initialize to default if not otherwise set. */ 91 #ifndef RT_NUMFIBS 92 #define RT_NUMFIBS 1 93 #endif 94 95 /* This is read-only.. */ 96 u_int rt_numfibs = RT_NUMFIBS; 97 SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RDTUN, &rt_numfibs, 0, ""); 98 99 /* 100 * By default add routes to all fibs for new interfaces. 101 * Once this is set to 0 then only allocate routes on interface 102 * changes for the FIB of the caller when adding a new set of addresses 103 * to an interface. XXX this is a shotgun aproach to a problem that needs 104 * a more fine grained solution.. that will come. 105 * XXX also has the problems getting the FIB from curthread which will not 106 * always work given the fib can be overridden and prefixes can be added 107 * from the network stack context. 108 */ 109 VNET_DEFINE(u_int, rt_add_addr_allfibs) = 1; 110 SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RWTUN | CTLFLAG_VNET, 111 &VNET_NAME(rt_add_addr_allfibs), 0, ""); 112 113 VNET_PCPUSTAT_DEFINE(struct rtstat, rtstat); 114 115 VNET_PCPUSTAT_SYSINIT(rtstat); 116 #ifdef VIMAGE 117 VNET_PCPUSTAT_SYSUNINIT(rtstat); 118 #endif 119 120 VNET_DEFINE(struct rib_head *, rt_tables); 121 #define V_rt_tables VNET(rt_tables) 122 123 VNET_DEFINE(int, rttrash); /* routes not in table but not freed */ 124 #define V_rttrash VNET(rttrash) 125 126 127 /* 128 * Convert a 'struct radix_node *' to a 'struct rtentry *'. 129 * The operation can be done safely (in this code) because a 130 * 'struct rtentry' starts with two 'struct radix_node''s, the first 131 * one representing leaf nodes in the routing tree, which is 132 * what the code in radix.c passes us as a 'struct radix_node'. 133 * 134 * But because there are a lot of assumptions in this conversion, 135 * do not cast explicitly, but always use the macro below. 136 */ 137 #define RNTORT(p) ((struct rtentry *)(p)) 138 139 VNET_DEFINE_STATIC(uma_zone_t, rtzone); /* Routing table UMA zone. */ 140 #define V_rtzone VNET(rtzone) 141 142 EVENTHANDLER_LIST_DEFINE(rt_addrmsg); 143 144 static int rt_getifa_fib(struct rt_addrinfo *, u_int); 145 static void rt_setmetrics(const struct rt_addrinfo *, struct rtentry *); 146 static int rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *, 147 void *arg); 148 static struct rtentry *rt_unlinkrte(struct rib_head *rnh, 149 struct rt_addrinfo *info, int *perror); 150 static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info); 151 #ifdef RADIX_MPATH 152 static struct radix_node *rt_mpath_unlink(struct rib_head *rnh, 153 struct rt_addrinfo *info, struct rtentry *rto, int *perror); 154 #endif 155 static int rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, 156 int flags); 157 158 static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, 159 struct rtentry **ret_nrt); 160 static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, 161 struct rtentry **ret_nrt); 162 static int change_route(struct rib_head *, struct rt_addrinfo *, 163 struct rtentry **); 164 165 /* 166 * handler for net.my_fibnum 167 */ 168 static int 169 sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) 170 { 171 int fibnum; 172 int error; 173 174 fibnum = curthread->td_proc->p_fibnum; 175 error = sysctl_handle_int(oidp, &fibnum, 0, req); 176 return (error); 177 } 178 179 SYSCTL_PROC(_net, OID_AUTO, my_fibnum, 180 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 181 &sysctl_my_fibnum, "I", 182 "default FIB of caller"); 183 184 static __inline struct rib_head ** 185 rt_tables_get_rnh_ptr(int table, int fam) 186 { 187 struct rib_head **rnh; 188 189 KASSERT(table >= 0 && table < rt_numfibs, 190 ("%s: table out of bounds (0 <= %d < %d)", __func__, table, 191 rt_numfibs)); 192 KASSERT(fam >= 0 && fam < (AF_MAX + 1), 193 ("%s: fam out of bounds (0 <= %d < %d)", __func__, fam, AF_MAX+1)); 194 195 /* rnh is [fib=0][af=0]. */ 196 rnh = (struct rib_head **)V_rt_tables; 197 /* Get the offset to the requested table and fam. */ 198 rnh += table * (AF_MAX+1) + fam; 199 200 return (rnh); 201 } 202 203 struct rib_head * 204 rt_tables_get_rnh(int table, int fam) 205 { 206 207 return (*rt_tables_get_rnh_ptr(table, fam)); 208 } 209 210 u_int 211 rt_tables_get_gen(int table, int fam) 212 { 213 struct rib_head *rnh; 214 215 rnh = *rt_tables_get_rnh_ptr(table, fam); 216 KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d fam %d", 217 __func__, table, fam)); 218 return (rnh->rnh_gen); 219 } 220 221 222 /* 223 * route initialization must occur before ip6_init2(), which happenas at 224 * SI_ORDER_MIDDLE. 225 */ 226 static void 227 route_init(void) 228 { 229 230 /* whack the tunable ints into line. */ 231 if (rt_numfibs > RT_MAXFIBS) 232 rt_numfibs = RT_MAXFIBS; 233 if (rt_numfibs == 0) 234 rt_numfibs = 1; 235 nhops_init(); 236 } 237 SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, NULL); 238 239 static int 240 rtentry_zinit(void *mem, int size, int how) 241 { 242 struct rtentry *rt = mem; 243 244 rt->rt_pksent = counter_u64_alloc(how); 245 if (rt->rt_pksent == NULL) 246 return (ENOMEM); 247 248 RT_LOCK_INIT(rt); 249 250 return (0); 251 } 252 253 static void 254 rtentry_zfini(void *mem, int size) 255 { 256 struct rtentry *rt = mem; 257 258 RT_LOCK_DESTROY(rt); 259 counter_u64_free(rt->rt_pksent); 260 } 261 262 static int 263 rtentry_ctor(void *mem, int size, void *arg, int how) 264 { 265 struct rtentry *rt = mem; 266 267 bzero(rt, offsetof(struct rtentry, rt_endzero)); 268 counter_u64_zero(rt->rt_pksent); 269 rt->rt_chain = NULL; 270 271 return (0); 272 } 273 274 static void 275 rtentry_dtor(void *mem, int size, void *arg) 276 { 277 struct rtentry *rt = mem; 278 279 RT_UNLOCK_COND(rt); 280 } 281 282 static void 283 vnet_route_init(const void *unused __unused) 284 { 285 struct domain *dom; 286 struct rib_head **rnh; 287 int table; 288 int fam; 289 290 V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) * 291 sizeof(struct rib_head *), M_RTABLE, M_WAITOK|M_ZERO); 292 293 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), 294 rtentry_ctor, rtentry_dtor, 295 rtentry_zinit, rtentry_zfini, UMA_ALIGN_PTR, 0); 296 for (dom = domains; dom; dom = dom->dom_next) { 297 if (dom->dom_rtattach == NULL) 298 continue; 299 300 for (table = 0; table < rt_numfibs; table++) { 301 fam = dom->dom_family; 302 if (table != 0 && fam != AF_INET6 && fam != AF_INET) 303 break; 304 305 rnh = rt_tables_get_rnh_ptr(table, fam); 306 if (rnh == NULL) 307 panic("%s: rnh NULL", __func__); 308 dom->dom_rtattach((void **)rnh, 0, table); 309 } 310 } 311 } 312 VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, 313 vnet_route_init, 0); 314 315 #ifdef VIMAGE 316 static void 317 vnet_route_uninit(const void *unused __unused) 318 { 319 int table; 320 int fam; 321 struct domain *dom; 322 struct rib_head **rnh; 323 324 for (dom = domains; dom; dom = dom->dom_next) { 325 if (dom->dom_rtdetach == NULL) 326 continue; 327 328 for (table = 0; table < rt_numfibs; table++) { 329 fam = dom->dom_family; 330 331 if (table != 0 && fam != AF_INET6 && fam != AF_INET) 332 break; 333 334 rnh = rt_tables_get_rnh_ptr(table, fam); 335 if (rnh == NULL) 336 panic("%s: rnh NULL", __func__); 337 dom->dom_rtdetach((void **)rnh, 0); 338 } 339 } 340 341 free(V_rt_tables, M_RTABLE); 342 uma_zdestroy(V_rtzone); 343 } 344 VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, 345 vnet_route_uninit, 0); 346 #endif 347 348 struct rib_head * 349 rt_table_init(int offset, int family, u_int fibnum) 350 { 351 struct rib_head *rh; 352 353 rh = malloc(sizeof(struct rib_head), M_RTABLE, M_WAITOK | M_ZERO); 354 355 /* TODO: These details should be hidded inside radix.c */ 356 /* Init masks tree */ 357 rn_inithead_internal(&rh->head, rh->rnh_nodes, offset); 358 rn_inithead_internal(&rh->rmhead.head, rh->rmhead.mask_nodes, 0); 359 rh->head.rnh_masks = &rh->rmhead; 360 361 /* Save metadata associated with this routing table. */ 362 rh->rib_family = family; 363 rh->rib_fibnum = fibnum; 364 #ifdef VIMAGE 365 rh->rib_vnet = curvnet; 366 #endif 367 368 tmproutes_init(rh); 369 370 /* Init locks */ 371 RIB_LOCK_INIT(rh); 372 373 nhops_init_rib(rh); 374 375 /* Finally, set base callbacks */ 376 rh->rnh_addaddr = rn_addroute; 377 rh->rnh_deladdr = rn_delete; 378 rh->rnh_matchaddr = rn_match; 379 rh->rnh_lookup = rn_lookup; 380 rh->rnh_walktree = rn_walktree; 381 rh->rnh_walktree_from = rn_walktree_from; 382 383 return (rh); 384 } 385 386 static int 387 rt_freeentry(struct radix_node *rn, void *arg) 388 { 389 struct radix_head * const rnh = arg; 390 struct radix_node *x; 391 392 x = (struct radix_node *)rn_delete(rn + 2, NULL, rnh); 393 if (x != NULL) 394 R_Free(x); 395 return (0); 396 } 397 398 void 399 rt_table_destroy(struct rib_head *rh) 400 { 401 402 tmproutes_destroy(rh); 403 404 rn_walktree(&rh->rmhead.head, rt_freeentry, &rh->rmhead.head); 405 406 nhops_destroy_rib(rh); 407 408 /* Assume table is already empty */ 409 RIB_LOCK_DESTROY(rh); 410 free(rh, M_RTABLE); 411 } 412 413 414 #ifndef _SYS_SYSPROTO_H_ 415 struct setfib_args { 416 int fibnum; 417 }; 418 #endif 419 int 420 sys_setfib(struct thread *td, struct setfib_args *uap) 421 { 422 if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs) 423 return EINVAL; 424 td->td_proc->p_fibnum = uap->fibnum; 425 return (0); 426 } 427 428 /* 429 * Look up the route that matches the address given 430 * Or, at least try.. Create a cloned route if needed. 431 * 432 * The returned route, if any, is locked. 433 */ 434 struct rtentry * 435 rtalloc1(struct sockaddr *dst, int report, u_long ignflags) 436 { 437 438 return (rtalloc1_fib(dst, report, ignflags, RT_DEFAULT_FIB)); 439 } 440 441 struct rtentry * 442 rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, 443 u_int fibnum) 444 { 445 RIB_RLOCK_TRACKER; 446 struct rib_head *rh; 447 struct radix_node *rn; 448 struct rtentry *newrt; 449 struct rt_addrinfo info; 450 int err = 0, msgtype = RTM_MISS; 451 452 KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum")); 453 rh = rt_tables_get_rnh(fibnum, dst->sa_family); 454 newrt = NULL; 455 if (rh == NULL) 456 goto miss; 457 458 /* 459 * Look up the address in the table for that Address Family 460 */ 461 if ((ignflags & RTF_RNH_LOCKED) == 0) 462 RIB_RLOCK(rh); 463 #ifdef INVARIANTS 464 else 465 RIB_LOCK_ASSERT(rh); 466 #endif 467 rn = rh->rnh_matchaddr(dst, &rh->head); 468 if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { 469 newrt = RNTORT(rn); 470 RT_LOCK(newrt); 471 RT_ADDREF(newrt); 472 if ((ignflags & RTF_RNH_LOCKED) == 0) 473 RIB_RUNLOCK(rh); 474 return (newrt); 475 476 } else if ((ignflags & RTF_RNH_LOCKED) == 0) 477 RIB_RUNLOCK(rh); 478 /* 479 * Either we hit the root or could not find any match, 480 * which basically means: "cannot get there from here". 481 */ 482 miss: 483 RTSTAT_INC(rts_unreach); 484 485 if (report) { 486 /* 487 * If required, report the failure to the supervising 488 * Authorities. 489 * For a delete, this is not an error. (report == 0) 490 */ 491 bzero(&info, sizeof(info)); 492 info.rti_info[RTAX_DST] = dst; 493 rt_missmsg_fib(msgtype, &info, 0, err, fibnum); 494 } 495 return (newrt); 496 } 497 498 /* 499 * Remove a reference count from an rtentry. 500 * If the count gets low enough, take it out of the routing table 501 */ 502 void 503 rtfree(struct rtentry *rt) 504 { 505 struct rib_head *rnh; 506 507 KASSERT(rt != NULL,("%s: NULL rt", __func__)); 508 rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); 509 KASSERT(rnh != NULL,("%s: NULL rnh", __func__)); 510 511 RT_LOCK_ASSERT(rt); 512 513 /* 514 * The callers should use RTFREE_LOCKED() or RTFREE(), so 515 * we should come here exactly with the last reference. 516 */ 517 RT_REMREF(rt); 518 if (rt->rt_refcnt > 0) { 519 log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt); 520 goto done; 521 } 522 523 /* 524 * On last reference give the "close method" a chance 525 * to cleanup private state. This also permits (for 526 * IPv4 and IPv6) a chance to decide if the routing table 527 * entry should be purged immediately or at a later time. 528 * When an immediate purge is to happen the close routine 529 * typically calls rtexpunge which clears the RTF_UP flag 530 * on the entry so that the code below reclaims the storage. 531 */ 532 if (rt->rt_refcnt == 0 && rnh->rnh_close) 533 rnh->rnh_close((struct radix_node *)rt, &rnh->head); 534 535 /* 536 * If we are no longer "up" (and ref == 0) 537 * then we can free the resources associated 538 * with the route. 539 */ 540 if ((rt->rt_flags & RTF_UP) == 0) { 541 if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 542 panic("rtfree 2"); 543 /* 544 * the rtentry must have been removed from the routing table 545 * so it is represented in rttrash.. remove that now. 546 */ 547 V_rttrash--; 548 #ifdef DIAGNOSTIC 549 if (rt->rt_refcnt < 0) { 550 printf("rtfree: %p not freed (neg refs)\n", rt); 551 goto done; 552 } 553 #endif 554 /* 555 * The key is separatly alloc'd so free it (see rt_setgate()). 556 * This also frees the gateway, as they are always malloc'd 557 * together. 558 */ 559 R_Free(rt_key(rt)); 560 561 /* Unreference nexthop */ 562 nhop_free(rt->rt_nhop); 563 564 /* 565 * and the rtentry itself of course 566 */ 567 uma_zfree(V_rtzone, rt); 568 return; 569 } 570 done: 571 RT_UNLOCK(rt); 572 } 573 574 /* 575 * Temporary RTFREE() function wrapper. 576 * Intended to use in control plane code to 577 * avoid exposing internal layout of 'struct rtentry'. 578 */ 579 void 580 rtfree_func(struct rtentry *rt) 581 { 582 583 RTFREE(rt); 584 } 585 586 /* 587 * Adds a temporal redirect entry to the routing table. 588 * @fibnum: fib number 589 * @dst: destination to install redirect to 590 * @gateway: gateway to go via 591 * @author: sockaddr of originating router, can be NULL 592 * @ifp: interface to use for the redirected route 593 * @flags: set of flags to add. Allowed: RTF_GATEWAY 594 * @lifetime_sec: time in seconds to expire this redirect. 595 * 596 * Retuns 0 on success, errno otherwise. 597 */ 598 int 599 rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway, 600 struct sockaddr *author, struct ifnet *ifp, int flags, int lifetime_sec) 601 { 602 struct rtentry *rt; 603 int error; 604 struct rt_addrinfo info; 605 struct rt_metrics rti_rmx; 606 struct ifaddr *ifa; 607 608 NET_EPOCH_ASSERT(); 609 610 if (rt_tables_get_rnh(fibnum, dst->sa_family) == NULL) 611 return (EAFNOSUPPORT); 612 613 /* Verify the allowed flag mask. */ 614 KASSERT(((flags & ~(RTF_GATEWAY)) == 0), 615 ("invalid redirect flags: %x", flags)); 616 617 /* Get the best ifa for the given interface and gateway. */ 618 if ((ifa = ifaof_ifpforaddr(gateway, ifp)) == NULL) 619 return (ENETUNREACH); 620 ifa_ref(ifa); 621 622 bzero(&info, sizeof(info)); 623 info.rti_info[RTAX_DST] = dst; 624 info.rti_info[RTAX_GATEWAY] = gateway; 625 info.rti_ifa = ifa; 626 info.rti_ifp = ifp; 627 info.rti_flags = flags | RTF_HOST | RTF_DYNAMIC; 628 629 /* Setup route metrics to define expire time. */ 630 bzero(&rti_rmx, sizeof(rti_rmx)); 631 /* Set expire time as absolute. */ 632 rti_rmx.rmx_expire = lifetime_sec + time_second; 633 info.rti_mflags |= RTV_EXPIRE; 634 info.rti_rmx = &rti_rmx; 635 636 error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum); 637 ifa_free(ifa); 638 639 if (error != 0) { 640 /* TODO: add per-fib redirect stats. */ 641 return (error); 642 } 643 644 RT_LOCK(rt); 645 flags = rt->rt_flags; 646 RTFREE_LOCKED(rt); 647 648 RTSTAT_INC(rts_dynamic); 649 650 /* Send notification of a route addition to userland. */ 651 bzero(&info, sizeof(info)); 652 info.rti_info[RTAX_DST] = dst; 653 info.rti_info[RTAX_GATEWAY] = gateway; 654 info.rti_info[RTAX_AUTHOR] = author; 655 rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum); 656 657 return (0); 658 } 659 660 /* 661 * Routing table ioctl interface. 662 */ 663 int 664 rtioctl_fib(u_long req, caddr_t data, u_int fibnum) 665 { 666 667 /* 668 * If more ioctl commands are added here, make sure the proper 669 * super-user checks are being performed because it is possible for 670 * prison-root to make it this far if raw sockets have been enabled 671 * in jails. 672 */ 673 #ifdef INET 674 /* Multicast goop, grrr... */ 675 return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP; 676 #else /* INET */ 677 return ENXIO; 678 #endif /* INET */ 679 } 680 681 struct ifaddr * 682 ifa_ifwithroute(int flags, const struct sockaddr *dst, struct sockaddr *gateway, 683 u_int fibnum) 684 { 685 struct ifaddr *ifa; 686 687 NET_EPOCH_ASSERT(); 688 if ((flags & RTF_GATEWAY) == 0) { 689 /* 690 * If we are adding a route to an interface, 691 * and the interface is a pt to pt link 692 * we should search for the destination 693 * as our clue to the interface. Otherwise 694 * we can use the local address. 695 */ 696 ifa = NULL; 697 if (flags & RTF_HOST) 698 ifa = ifa_ifwithdstaddr(dst, fibnum); 699 if (ifa == NULL) 700 ifa = ifa_ifwithaddr(gateway); 701 } else { 702 /* 703 * If we are adding a route to a remote net 704 * or host, the gateway may still be on the 705 * other end of a pt to pt link. 706 */ 707 ifa = ifa_ifwithdstaddr(gateway, fibnum); 708 } 709 if (ifa == NULL) 710 ifa = ifa_ifwithnet(gateway, 0, fibnum); 711 if (ifa == NULL) { 712 struct nhop_object *nh; 713 714 nh = rib_lookup(fibnum, gateway, NHR_NONE, 0); 715 716 /* 717 * dismiss a gateway that is reachable only 718 * through the default router 719 */ 720 if ((nh == NULL) || (nh->nh_flags & NHF_DEFAULT)) 721 return (NULL); 722 ifa = nh->nh_ifa; 723 } 724 if (ifa->ifa_addr->sa_family != dst->sa_family) { 725 struct ifaddr *oifa = ifa; 726 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 727 if (ifa == NULL) 728 ifa = oifa; 729 } 730 731 return (ifa); 732 } 733 734 /* 735 * Do appropriate manipulations of a routing tree given 736 * all the bits of info needed 737 */ 738 int 739 rtrequest_fib(int req, 740 struct sockaddr *dst, 741 struct sockaddr *gateway, 742 struct sockaddr *netmask, 743 int flags, 744 struct rtentry **ret_nrt, 745 u_int fibnum) 746 { 747 struct rt_addrinfo info; 748 749 if (dst->sa_len == 0) 750 return(EINVAL); 751 752 bzero((caddr_t)&info, sizeof(info)); 753 info.rti_flags = flags; 754 info.rti_info[RTAX_DST] = dst; 755 info.rti_info[RTAX_GATEWAY] = gateway; 756 info.rti_info[RTAX_NETMASK] = netmask; 757 return rtrequest1_fib(req, &info, ret_nrt, fibnum); 758 } 759 760 761 /* 762 * Copy most of @rt data into @info. 763 * 764 * If @flags contains NHR_COPY, copies dst,netmask and gw to the 765 * pointers specified by @info structure. Assume such pointers 766 * are zeroed sockaddr-like structures with sa_len field initialized 767 * to reflect size of the provided buffer. if no NHR_COPY is specified, 768 * point dst,netmask and gw @info fields to appropriate @rt values. 769 * 770 * if @flags contains NHR_REF, do refcouting on rt_ifp and rt_ifa. 771 * 772 * Returns 0 on success. 773 */ 774 int 775 rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags) 776 { 777 struct rt_metrics *rmx; 778 struct sockaddr *src, *dst; 779 struct nhop_object *nh; 780 int sa_len; 781 782 if (flags & NHR_COPY) { 783 /* Copy destination if dst is non-zero */ 784 src = rt_key(rt); 785 dst = info->rti_info[RTAX_DST]; 786 sa_len = src->sa_len; 787 if (dst != NULL) { 788 if (src->sa_len > dst->sa_len) 789 return (ENOMEM); 790 memcpy(dst, src, src->sa_len); 791 info->rti_addrs |= RTA_DST; 792 } 793 794 /* Copy mask if set && dst is non-zero */ 795 src = rt_mask(rt); 796 dst = info->rti_info[RTAX_NETMASK]; 797 if (src != NULL && dst != NULL) { 798 799 /* 800 * Radix stores different value in sa_len, 801 * assume rt_mask() to have the same length 802 * as rt_key() 803 */ 804 if (sa_len > dst->sa_len) 805 return (ENOMEM); 806 memcpy(dst, src, src->sa_len); 807 info->rti_addrs |= RTA_NETMASK; 808 } 809 810 /* Copy gateway is set && dst is non-zero */ 811 src = &rt->rt_nhop->gw_sa; 812 dst = info->rti_info[RTAX_GATEWAY]; 813 if ((rt->rt_flags & RTF_GATEWAY) && src != NULL && dst != NULL){ 814 if (src->sa_len > dst->sa_len) 815 return (ENOMEM); 816 memcpy(dst, src, src->sa_len); 817 info->rti_addrs |= RTA_GATEWAY; 818 } 819 } else { 820 info->rti_info[RTAX_DST] = rt_key(rt); 821 info->rti_addrs |= RTA_DST; 822 if (rt_mask(rt) != NULL) { 823 info->rti_info[RTAX_NETMASK] = rt_mask(rt); 824 info->rti_addrs |= RTA_NETMASK; 825 } 826 if (rt->rt_flags & RTF_GATEWAY) { 827 info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; 828 info->rti_addrs |= RTA_GATEWAY; 829 } 830 } 831 832 nh = rt->rt_nhop; 833 rmx = info->rti_rmx; 834 if (rmx != NULL) { 835 info->rti_mflags |= RTV_MTU; 836 rmx->rmx_mtu = nh->nh_mtu; 837 } 838 839 info->rti_flags = rt->rt_flags | nhop_get_rtflags(nh); 840 info->rti_ifp = nh->nh_ifp; 841 info->rti_ifa = nh->nh_ifa; 842 if (flags & NHR_REF) { 843 if_ref(info->rti_ifp); 844 ifa_ref(info->rti_ifa); 845 } 846 847 return (0); 848 } 849 850 /* 851 * Lookups up route entry for @dst in RIB database for fib @fibnum. 852 * Exports entry data to @info using rt_exportinfo(). 853 * 854 * If @flags contains NHR_REF, refcouting is performed on rt_ifp and rt_ifa. 855 * All references can be released later by calling rib_free_info(). 856 * 857 * Returns 0 on success. 858 * Returns ENOENT for lookup failure, ENOMEM for export failure. 859 */ 860 int 861 rib_lookup_info(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags, 862 uint32_t flowid, struct rt_addrinfo *info) 863 { 864 RIB_RLOCK_TRACKER; 865 struct rib_head *rh; 866 struct radix_node *rn; 867 struct rtentry *rt; 868 int error; 869 870 KASSERT((fibnum < rt_numfibs), ("rib_lookup_rte: bad fibnum")); 871 rh = rt_tables_get_rnh(fibnum, dst->sa_family); 872 if (rh == NULL) 873 return (ENOENT); 874 875 RIB_RLOCK(rh); 876 rn = rh->rnh_matchaddr(__DECONST(void *, dst), &rh->head); 877 if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { 878 rt = RNTORT(rn); 879 /* Ensure route & ifp is UP */ 880 if (RT_LINK_IS_UP(rt->rt_nhop->nh_ifp)) { 881 flags = (flags & NHR_REF) | NHR_COPY; 882 error = rt_exportinfo(rt, info, flags); 883 RIB_RUNLOCK(rh); 884 885 return (error); 886 } 887 } 888 RIB_RUNLOCK(rh); 889 890 return (ENOENT); 891 } 892 893 /* 894 * Releases all references acquired by rib_lookup_info() when 895 * called with NHR_REF flags. 896 */ 897 void 898 rib_free_info(struct rt_addrinfo *info) 899 { 900 901 ifa_free(info->rti_ifa); 902 if_rele(info->rti_ifp); 903 } 904 905 /* 906 * Iterates over all existing fibs in system calling 907 * @setwa_f function prior to traversing each fib. 908 * Calls @wa_f function for each element in current fib. 909 * If af is not AF_UNSPEC, iterates over fibs in particular 910 * address family. 911 */ 912 void 913 rt_foreach_fib_walk(int af, rt_setwarg_t *setwa_f, rt_walktree_f_t *wa_f, 914 void *arg) 915 { 916 struct rib_head *rnh; 917 uint32_t fibnum; 918 int i; 919 920 for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { 921 /* Do we want some specific family? */ 922 if (af != AF_UNSPEC) { 923 rnh = rt_tables_get_rnh(fibnum, af); 924 if (rnh == NULL) 925 continue; 926 if (setwa_f != NULL) 927 setwa_f(rnh, fibnum, af, arg); 928 929 RIB_WLOCK(rnh); 930 rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f,arg); 931 RIB_WUNLOCK(rnh); 932 continue; 933 } 934 935 for (i = 1; i <= AF_MAX; i++) { 936 rnh = rt_tables_get_rnh(fibnum, i); 937 if (rnh == NULL) 938 continue; 939 if (setwa_f != NULL) 940 setwa_f(rnh, fibnum, i, arg); 941 942 RIB_WLOCK(rnh); 943 rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f,arg); 944 RIB_WUNLOCK(rnh); 945 } 946 } 947 } 948 949 struct rt_delinfo 950 { 951 struct rt_addrinfo info; 952 struct rib_head *rnh; 953 struct rtentry *head; 954 }; 955 956 /* 957 * Conditionally unlinks @rn from radix tree based 958 * on info data passed in @arg. 959 */ 960 static int 961 rt_checkdelroute(struct radix_node *rn, void *arg) 962 { 963 struct rt_delinfo *di; 964 struct rt_addrinfo *info; 965 struct rtentry *rt; 966 int error; 967 968 di = (struct rt_delinfo *)arg; 969 rt = (struct rtentry *)rn; 970 info = &di->info; 971 error = 0; 972 973 info->rti_info[RTAX_DST] = rt_key(rt); 974 info->rti_info[RTAX_NETMASK] = rt_mask(rt); 975 info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; 976 977 rt = rt_unlinkrte(di->rnh, info, &error); 978 if (rt == NULL) { 979 /* Either not allowed or not matched. Skip entry */ 980 return (0); 981 } 982 983 /* Entry was unlinked. Add to the list and return */ 984 rt->rt_chain = di->head; 985 di->head = rt; 986 987 return (0); 988 } 989 990 /* 991 * Iterates over a routing table specified by @fibnum and @family and 992 * deletes elements marked by @filter_f. 993 * @fibnum: rtable id 994 * @family: AF_ address family 995 * @filter_f: function returning non-zero value for items to delete 996 * @arg: data to pass to the @filter_f function 997 * @report: true if rtsock notification is needed. 998 */ 999 void 1000 rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f, void *arg, bool report) 1001 { 1002 struct rib_head *rnh; 1003 struct rt_delinfo di; 1004 struct rtentry *rt; 1005 1006 rnh = rt_tables_get_rnh(fibnum, family); 1007 if (rnh == NULL) 1008 return; 1009 1010 bzero(&di, sizeof(di)); 1011 di.info.rti_filter = filter_f; 1012 di.info.rti_filterdata = arg; 1013 di.rnh = rnh; 1014 1015 RIB_WLOCK(rnh); 1016 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1017 RIB_WUNLOCK(rnh); 1018 1019 if (di.head == NULL) 1020 return; 1021 1022 /* We might have something to reclaim. */ 1023 while (di.head != NULL) { 1024 rt = di.head; 1025 di.head = rt->rt_chain; 1026 rt->rt_chain = NULL; 1027 1028 /* TODO std rt -> rt_addrinfo export */ 1029 di.info.rti_info[RTAX_DST] = rt_key(rt); 1030 di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1031 1032 rt_notifydelete(rt, &di.info); 1033 1034 if (report) 1035 rt_routemsg(RTM_DELETE, rt, rt->rt_nhop->nh_ifp, 0, 1036 fibnum); 1037 RTFREE_LOCKED(rt); 1038 } 1039 } 1040 1041 /* 1042 * Iterates over all existing fibs in system and deletes each element 1043 * for which @filter_f function returns non-zero value. 1044 * If @family is not AF_UNSPEC, iterates over fibs in particular 1045 * address family. 1046 */ 1047 void 1048 rt_foreach_fib_walk_del(int family, rt_filter_f_t *filter_f, void *arg) 1049 { 1050 u_int fibnum; 1051 int i, start, end; 1052 1053 for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1054 /* Do we want some specific family? */ 1055 if (family != AF_UNSPEC) { 1056 start = family; 1057 end = family; 1058 } else { 1059 start = 1; 1060 end = AF_MAX; 1061 } 1062 1063 for (i = start; i <= end; i++) { 1064 if (rt_tables_get_rnh(fibnum, i) == NULL) 1065 continue; 1066 1067 rib_walk_del(fibnum, i, filter_f, arg, 0); 1068 } 1069 } 1070 } 1071 1072 /* 1073 * Delete Routes for a Network Interface 1074 * 1075 * Called for each routing entry via the rnh->rnh_walktree() call above 1076 * to delete all route entries referencing a detaching network interface. 1077 * 1078 * Arguments: 1079 * rt pointer to rtentry 1080 * nh pointer to nhop 1081 * arg argument passed to rnh->rnh_walktree() - detaching interface 1082 * 1083 * Returns: 1084 * 0 successful 1085 * errno failed - reason indicated 1086 */ 1087 static int 1088 rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *nh, void *arg) 1089 { 1090 struct ifnet *ifp = arg; 1091 1092 if (nh->nh_ifp != ifp) 1093 return (0); 1094 1095 /* 1096 * Protect (sorta) against walktree recursion problems 1097 * with cloned routes 1098 */ 1099 if ((rt->rt_flags & RTF_UP) == 0) 1100 return (0); 1101 1102 return (1); 1103 } 1104 1105 /* 1106 * Delete all remaining routes using this interface 1107 * Unfortuneatly the only way to do this is to slog through 1108 * the entire routing table looking for routes which point 1109 * to this interface...oh well... 1110 */ 1111 void 1112 rt_flushifroutes_af(struct ifnet *ifp, int af) 1113 { 1114 KASSERT((af >= 1 && af <= AF_MAX), ("%s: af %d not >= 1 and <= %d", 1115 __func__, af, AF_MAX)); 1116 1117 rt_foreach_fib_walk_del(af, rt_ifdelroute, ifp); 1118 } 1119 1120 void 1121 rt_flushifroutes(struct ifnet *ifp) 1122 { 1123 1124 rt_foreach_fib_walk_del(AF_UNSPEC, rt_ifdelroute, ifp); 1125 } 1126 1127 /* 1128 * Conditionally unlinks rtentry matching data inside @info from @rnh. 1129 * Returns unlinked, locked and referenced @rtentry on success, 1130 * Returns NULL and sets @perror to: 1131 * ESRCH - if prefix was not found, 1132 * EADDRINUSE - if trying to delete PINNED route without appropriate flag. 1133 * ENOENT - if supplied filter function returned 0 (not matched). 1134 */ 1135 static struct rtentry * 1136 rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror) 1137 { 1138 struct sockaddr *dst, *netmask; 1139 struct rtentry *rt; 1140 struct radix_node *rn; 1141 1142 dst = info->rti_info[RTAX_DST]; 1143 netmask = info->rti_info[RTAX_NETMASK]; 1144 1145 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); 1146 if (rt == NULL) { 1147 *perror = ESRCH; 1148 return (NULL); 1149 } 1150 1151 if ((info->rti_flags & RTF_PINNED) == 0) { 1152 /* Check if target route can be deleted */ 1153 if (rt->rt_flags & RTF_PINNED) { 1154 *perror = EADDRINUSE; 1155 return (NULL); 1156 } 1157 } 1158 1159 if (info->rti_filter != NULL) { 1160 if (info->rti_filter(rt, rt->rt_nhop, info->rti_filterdata)==0){ 1161 /* Not matched */ 1162 *perror = ENOENT; 1163 return (NULL); 1164 } 1165 1166 /* 1167 * Filter function requested rte deletion. 1168 * Ease the caller work by filling in remaining info 1169 * from that particular entry. 1170 */ 1171 info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; 1172 } 1173 1174 /* 1175 * Remove the item from the tree and return it. 1176 * Complain if it is not there and do no more processing. 1177 */ 1178 *perror = ESRCH; 1179 #ifdef RADIX_MPATH 1180 if (rt_mpath_capable(rnh)) 1181 rn = rt_mpath_unlink(rnh, info, rt, perror); 1182 else 1183 #endif 1184 rn = rnh->rnh_deladdr(dst, netmask, &rnh->head); 1185 if (rn == NULL) 1186 return (NULL); 1187 1188 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 1189 panic ("rtrequest delete"); 1190 1191 rt = RNTORT(rn); 1192 RT_LOCK(rt); 1193 RT_ADDREF(rt); 1194 rt->rt_flags &= ~RTF_UP; 1195 1196 *perror = 0; 1197 1198 return (rt); 1199 } 1200 1201 static void 1202 rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info) 1203 { 1204 struct ifaddr *ifa; 1205 1206 /* 1207 * give the protocol a chance to keep things in sync. 1208 */ 1209 ifa = rt->rt_nhop->nh_ifa; 1210 if (ifa != NULL && ifa->ifa_rtrequest != NULL) 1211 ifa->ifa_rtrequest(RTM_DELETE, rt, rt->rt_nhop, info); 1212 1213 /* 1214 * One more rtentry floating around that is not 1215 * linked to the routing table. rttrash will be decremented 1216 * when RTFREE(rt) is eventually called. 1217 */ 1218 V_rttrash++; 1219 } 1220 1221 1222 /* 1223 * These (questionable) definitions of apparent local variables apply 1224 * to the next two functions. XXXXXX!!! 1225 */ 1226 #define dst info->rti_info[RTAX_DST] 1227 #define gateway info->rti_info[RTAX_GATEWAY] 1228 #define netmask info->rti_info[RTAX_NETMASK] 1229 #define ifaaddr info->rti_info[RTAX_IFA] 1230 #define ifpaddr info->rti_info[RTAX_IFP] 1231 #define flags info->rti_flags 1232 1233 /* 1234 * Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined, 1235 * it will be referenced so the caller must free it. 1236 * 1237 * Assume basic consistency checks are executed by callers: 1238 * RTAX_DST exists, if RTF_GATEWAY is set, RTAX_GATEWAY exists as well. 1239 */ 1240 int 1241 rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum) 1242 { 1243 struct epoch_tracker et; 1244 int needref, error; 1245 1246 /* 1247 * ifp may be specified by sockaddr_dl 1248 * when protocol address is ambiguous. 1249 */ 1250 error = 0; 1251 needref = (info->rti_ifa == NULL); 1252 NET_EPOCH_ENTER(et); 1253 1254 /* If we have interface specified by the ifindex in the address, use it */ 1255 if (info->rti_ifp == NULL && ifpaddr != NULL && 1256 ifpaddr->sa_family == AF_LINK) { 1257 const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)ifpaddr; 1258 if (sdl->sdl_index != 0) 1259 info->rti_ifp = ifnet_byindex(sdl->sdl_index); 1260 } 1261 /* 1262 * If we have source address specified, try to find it 1263 * TODO: avoid enumerating all ifas on all interfaces. 1264 */ 1265 if (info->rti_ifa == NULL && ifaaddr != NULL) 1266 info->rti_ifa = ifa_ifwithaddr(ifaaddr); 1267 if (info->rti_ifa == NULL) { 1268 struct sockaddr *sa; 1269 1270 /* 1271 * Most common use case for the userland-supplied routes. 1272 * 1273 * Choose sockaddr to select ifa. 1274 * -- if ifp is set -- 1275 * Order of preference: 1276 * 1) IFA address 1277 * 2) gateway address 1278 * Note: for interface routes link-level gateway address 1279 * is specified to indicate the interface index without 1280 * specifying RTF_GATEWAY. In this case, ignore gateway 1281 * Note: gateway AF may be different from dst AF. In this case, 1282 * ignore gateway 1283 * 3) final destination. 1284 * 4) if all of these fails, try to get at least link-level ifa. 1285 * -- else -- 1286 * try to lookup gateway or dst in the routing table to get ifa 1287 */ 1288 if (info->rti_info[RTAX_IFA] != NULL) 1289 sa = info->rti_info[RTAX_IFA]; 1290 else if ((info->rti_flags & RTF_GATEWAY) != 0 && 1291 gateway->sa_family == dst->sa_family) 1292 sa = gateway; 1293 else 1294 sa = dst; 1295 if (info->rti_ifp != NULL) { 1296 info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp); 1297 /* Case 4 */ 1298 if (info->rti_ifa == NULL && gateway != NULL) 1299 info->rti_ifa = ifaof_ifpforaddr(gateway, info->rti_ifp); 1300 } else if (dst != NULL && gateway != NULL) 1301 info->rti_ifa = ifa_ifwithroute(flags, dst, gateway, 1302 fibnum); 1303 else if (sa != NULL) 1304 info->rti_ifa = ifa_ifwithroute(flags, sa, sa, 1305 fibnum); 1306 } 1307 if (needref && info->rti_ifa != NULL) { 1308 if (info->rti_ifp == NULL) 1309 info->rti_ifp = info->rti_ifa->ifa_ifp; 1310 ifa_ref(info->rti_ifa); 1311 } else 1312 error = ENETUNREACH; 1313 NET_EPOCH_EXIT(et); 1314 return (error); 1315 } 1316 1317 void 1318 rt_updatemtu(struct ifnet *ifp) 1319 { 1320 struct rib_head *rnh; 1321 int mtu; 1322 int i, j; 1323 1324 /* 1325 * Try to update rt_mtu for all routes using this interface 1326 * Unfortunately the only way to do this is to traverse all 1327 * routing tables in all fibs/domains. 1328 */ 1329 for (i = 1; i <= AF_MAX; i++) { 1330 mtu = if_getmtu_family(ifp, i); 1331 for (j = 0; j < rt_numfibs; j++) { 1332 rnh = rt_tables_get_rnh(j, i); 1333 if (rnh == NULL) 1334 continue; 1335 nhops_update_ifmtu(rnh, ifp, mtu); 1336 } 1337 } 1338 } 1339 1340 1341 #if 0 1342 int p_sockaddr(char *buf, int buflen, struct sockaddr *s); 1343 int rt_print(char *buf, int buflen, struct rtentry *rt); 1344 1345 int 1346 p_sockaddr(char *buf, int buflen, struct sockaddr *s) 1347 { 1348 void *paddr = NULL; 1349 1350 switch (s->sa_family) { 1351 case AF_INET: 1352 paddr = &((struct sockaddr_in *)s)->sin_addr; 1353 break; 1354 case AF_INET6: 1355 paddr = &((struct sockaddr_in6 *)s)->sin6_addr; 1356 break; 1357 } 1358 1359 if (paddr == NULL) 1360 return (0); 1361 1362 if (inet_ntop(s->sa_family, paddr, buf, buflen) == NULL) 1363 return (0); 1364 1365 return (strlen(buf)); 1366 } 1367 1368 int 1369 rt_print(char *buf, int buflen, struct rtentry *rt) 1370 { 1371 struct sockaddr *addr, *mask; 1372 int i = 0; 1373 1374 addr = rt_key(rt); 1375 mask = rt_mask(rt); 1376 1377 i = p_sockaddr(buf, buflen, addr); 1378 if (!(rt->rt_flags & RTF_HOST)) { 1379 buf[i++] = '/'; 1380 i += p_sockaddr(buf + i, buflen - i, mask); 1381 } 1382 1383 if (rt->rt_flags & RTF_GATEWAY) { 1384 buf[i++] = '>'; 1385 i += p_sockaddr(buf + i, buflen - i, &rt->rt_nhop->gw_sa); 1386 } 1387 1388 return (i); 1389 } 1390 #endif 1391 1392 #ifdef RADIX_MPATH 1393 /* 1394 * Deletes key for single-path routes, unlinks rtentry with 1395 * gateway specified in @info from multi-path routes. 1396 * 1397 * Returnes unlinked entry. In case of failure, returns NULL 1398 * and sets @perror to ESRCH. 1399 */ 1400 static struct radix_node * 1401 rt_mpath_unlink(struct rib_head *rnh, struct rt_addrinfo *info, 1402 struct rtentry *rto, int *perror) 1403 { 1404 /* 1405 * if we got multipath routes, we require users to specify 1406 * a matching RTAX_GATEWAY. 1407 */ 1408 struct rtentry *rt; // *rto = NULL; 1409 struct radix_node *rn; 1410 struct sockaddr *gw; 1411 1412 gw = info->rti_info[RTAX_GATEWAY]; 1413 rt = rt_mpath_matchgate(rto, gw); 1414 if (rt == NULL) { 1415 *perror = ESRCH; 1416 return (NULL); 1417 } 1418 1419 /* 1420 * this is the first entry in the chain 1421 */ 1422 if (rto == rt) { 1423 rn = rn_mpath_next((struct radix_node *)rt); 1424 /* 1425 * there is another entry, now it's active 1426 */ 1427 if (rn) { 1428 rto = RNTORT(rn); 1429 RT_LOCK(rto); 1430 rto->rt_flags |= RTF_UP; 1431 RT_UNLOCK(rto); 1432 } else if (rt->rt_flags & RTF_GATEWAY) { 1433 /* 1434 * For gateway routes, we need to 1435 * make sure that we we are deleting 1436 * the correct gateway. 1437 * rt_mpath_matchgate() does not 1438 * check the case when there is only 1439 * one route in the chain. 1440 */ 1441 if (gw && 1442 (rt->rt_nhop->gw_sa.sa_len != gw->sa_len || 1443 memcmp(&rt->rt_nhop->gw_sa, gw, gw->sa_len))) { 1444 *perror = ESRCH; 1445 return (NULL); 1446 } 1447 } 1448 1449 /* 1450 * use the normal delete code to remove 1451 * the first entry 1452 */ 1453 rn = rnh->rnh_deladdr(dst, netmask, &rnh->head); 1454 *perror = 0; 1455 return (rn); 1456 } 1457 1458 /* 1459 * if the entry is 2nd and on up 1460 */ 1461 if (rt_mpath_deldup(rto, rt) == 0) 1462 panic ("rtrequest1: rt_mpath_deldup"); 1463 *perror = 0; 1464 rn = (struct radix_node *)rt; 1465 return (rn); 1466 } 1467 #endif 1468 1469 #undef dst 1470 #undef gateway 1471 #undef netmask 1472 #undef ifaaddr 1473 #undef ifpaddr 1474 #undef flags 1475 1476 int 1477 rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, 1478 u_int fibnum) 1479 { 1480 const struct sockaddr *dst; 1481 struct rib_head *rnh; 1482 int error; 1483 1484 KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); 1485 KASSERT((info->rti_flags & RTF_RNH_LOCKED) == 0, ("rtrequest1_fib: locked")); 1486 1487 dst = info->rti_info[RTAX_DST]; 1488 1489 switch (dst->sa_family) { 1490 case AF_INET6: 1491 case AF_INET: 1492 /* We support multiple FIBs. */ 1493 break; 1494 default: 1495 fibnum = RT_DEFAULT_FIB; 1496 break; 1497 } 1498 1499 /* 1500 * Find the correct routing tree to use for this Address Family 1501 */ 1502 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 1503 if (rnh == NULL) 1504 return (EAFNOSUPPORT); 1505 1506 /* 1507 * If we are adding a host route then we don't want to put 1508 * a netmask in the tree, nor do we want to clone it. 1509 */ 1510 if (info->rti_flags & RTF_HOST) 1511 info->rti_info[RTAX_NETMASK] = NULL; 1512 1513 error = 0; 1514 switch (req) { 1515 case RTM_DELETE: 1516 error = del_route(rnh, info, ret_nrt); 1517 break; 1518 case RTM_RESOLVE: 1519 /* 1520 * resolve was only used for route cloning 1521 * here for compat 1522 */ 1523 break; 1524 case RTM_ADD: 1525 error = add_route(rnh, info, ret_nrt); 1526 break; 1527 case RTM_CHANGE: 1528 error = change_route(rnh, info, ret_nrt); 1529 break; 1530 default: 1531 error = EOPNOTSUPP; 1532 } 1533 1534 return (error); 1535 } 1536 1537 static int 1538 add_route(struct rib_head *rnh, struct rt_addrinfo *info, 1539 struct rtentry **ret_nrt) 1540 { 1541 struct sockaddr *dst, *ndst, *gateway, *netmask; 1542 struct rtentry *rt, *rt_old; 1543 struct nhop_object *nh; 1544 struct radix_node *rn; 1545 struct ifaddr *ifa; 1546 int error, flags; 1547 struct epoch_tracker et; 1548 1549 dst = info->rti_info[RTAX_DST]; 1550 gateway = info->rti_info[RTAX_GATEWAY]; 1551 netmask = info->rti_info[RTAX_NETMASK]; 1552 flags = info->rti_flags; 1553 1554 if ((flags & RTF_GATEWAY) && !gateway) 1555 return (EINVAL); 1556 if (dst && gateway && (dst->sa_family != gateway->sa_family) && 1557 (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) 1558 return (EINVAL); 1559 1560 if (info->rti_ifa == NULL) { 1561 error = rt_getifa_fib(info, rnh->rib_fibnum); 1562 if (error) 1563 return (error); 1564 } else { 1565 ifa_ref(info->rti_ifa); 1566 } 1567 1568 NET_EPOCH_ENTER(et); 1569 error = nhop_create_from_info(rnh, info, &nh); 1570 NET_EPOCH_EXIT(et); 1571 if (error != 0) { 1572 ifa_free(info->rti_ifa); 1573 return (error); 1574 } 1575 1576 rt = uma_zalloc(V_rtzone, M_NOWAIT); 1577 if (rt == NULL) { 1578 ifa_free(info->rti_ifa); 1579 nhop_free(nh); 1580 return (ENOBUFS); 1581 } 1582 rt->rt_flags = RTF_UP | flags; 1583 rt->rt_fibnum = rnh->rib_fibnum; 1584 rt->rt_nhop = nh; 1585 /* 1586 * Add the gateway. Possibly re-malloc-ing the storage for it. 1587 */ 1588 if ((error = rt_setgate(rt, dst, gateway)) != 0) { 1589 ifa_free(info->rti_ifa); 1590 nhop_free(nh); 1591 uma_zfree(V_rtzone, rt); 1592 return (error); 1593 } 1594 1595 /* 1596 * point to the (possibly newly malloc'd) dest address. 1597 */ 1598 ndst = (struct sockaddr *)rt_key(rt); 1599 1600 /* 1601 * make sure it contains the value we want (masked if needed). 1602 */ 1603 if (netmask) { 1604 rt_maskedcopy(dst, ndst, netmask); 1605 } else 1606 bcopy(dst, ndst, dst->sa_len); 1607 1608 /* 1609 * We use the ifa reference returned by rt_getifa_fib(). 1610 * This moved from below so that rnh->rnh_addaddr() can 1611 * examine the ifa and ifa->ifa_ifp if it so desires. 1612 */ 1613 ifa = info->rti_ifa; 1614 rt->rt_weight = 1; 1615 1616 rt_setmetrics(info, rt); 1617 1618 RIB_WLOCK(rnh); 1619 RT_LOCK(rt); 1620 #ifdef RADIX_MPATH 1621 /* do not permit exactly the same dst/mask/gw pair */ 1622 if (rt_mpath_capable(rnh) && 1623 rt_mpath_conflict(rnh, rt, netmask)) { 1624 RIB_WUNLOCK(rnh); 1625 1626 R_Free(rt_key(rt)); 1627 nhop_free(nh); 1628 uma_zfree(V_rtzone, rt); 1629 return (EEXIST); 1630 } 1631 #endif 1632 1633 rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); 1634 1635 if (rn != NULL && rt->rt_expire > 0) 1636 tmproutes_update(rnh, rt); 1637 1638 rt_old = NULL; 1639 if (rn == NULL && (info->rti_flags & RTF_PINNED) != 0) { 1640 1641 /* 1642 * Force removal and re-try addition 1643 * TODO: better multipath&pinned support 1644 */ 1645 struct sockaddr *info_dst = info->rti_info[RTAX_DST]; 1646 info->rti_info[RTAX_DST] = ndst; 1647 /* Do not delete existing PINNED(interface) routes */ 1648 info->rti_flags &= ~RTF_PINNED; 1649 rt_old = rt_unlinkrte(rnh, info, &error); 1650 info->rti_flags |= RTF_PINNED; 1651 info->rti_info[RTAX_DST] = info_dst; 1652 if (rt_old != NULL) 1653 rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, 1654 rt->rt_nodes); 1655 } 1656 RIB_WUNLOCK(rnh); 1657 1658 if (rt_old != NULL) 1659 RT_UNLOCK(rt_old); 1660 1661 /* 1662 * If it still failed to go into the tree, 1663 * then un-make it (this should be a function) 1664 */ 1665 if (rn == NULL) { 1666 R_Free(rt_key(rt)); 1667 nhop_free(nh); 1668 uma_zfree(V_rtzone, rt); 1669 return (EEXIST); 1670 } 1671 1672 if (rt_old != NULL) { 1673 rt_notifydelete(rt_old, info); 1674 RTFREE(rt_old); 1675 } 1676 1677 /* 1678 * If this protocol has something to add to this then 1679 * allow it to do that as well. 1680 */ 1681 if (ifa->ifa_rtrequest) 1682 ifa->ifa_rtrequest(RTM_ADD, rt, rt->rt_nhop, info); 1683 1684 /* 1685 * actually return a resultant rtentry and 1686 * give the caller a single reference. 1687 */ 1688 if (ret_nrt) { 1689 *ret_nrt = rt; 1690 RT_ADDREF(rt); 1691 } 1692 rnh->rnh_gen++; /* Routing table updated */ 1693 RT_UNLOCK(rt); 1694 1695 return (0); 1696 } 1697 1698 static int 1699 del_route(struct rib_head *rnh, struct rt_addrinfo *info, 1700 struct rtentry **ret_nrt) 1701 { 1702 struct sockaddr *dst, *netmask; 1703 struct sockaddr_storage mdst; 1704 struct rtentry *rt; 1705 int error; 1706 1707 dst = info->rti_info[RTAX_DST]; 1708 netmask = info->rti_info[RTAX_NETMASK]; 1709 1710 if (netmask) { 1711 if (dst->sa_len > sizeof(mdst)) 1712 return (EINVAL); 1713 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 1714 dst = (struct sockaddr *)&mdst; 1715 } 1716 1717 RIB_WLOCK(rnh); 1718 rt = rt_unlinkrte(rnh, info, &error); 1719 RIB_WUNLOCK(rnh); 1720 if (error != 0) 1721 return (error); 1722 1723 rt_notifydelete(rt, info); 1724 1725 /* 1726 * If the caller wants it, then it can have it, 1727 * but it's up to it to free the rtentry as we won't be 1728 * doing it. 1729 */ 1730 if (ret_nrt) { 1731 *ret_nrt = rt; 1732 RT_UNLOCK(rt); 1733 } else 1734 RTFREE_LOCKED(rt); 1735 1736 return (0); 1737 } 1738 1739 static int 1740 change_route_one(struct rib_head *rnh, struct rt_addrinfo *info, 1741 struct rtentry **ret_nrt) 1742 { 1743 RIB_RLOCK_TRACKER; 1744 struct rtentry *rt = NULL; 1745 int error = 0; 1746 int free_ifa = 0; 1747 struct nhop_object *nh, *nh_orig; 1748 1749 RIB_RLOCK(rnh); 1750 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1751 info->rti_info[RTAX_NETMASK], &rnh->head); 1752 1753 if (rt == NULL) { 1754 RIB_RUNLOCK(rnh); 1755 return (ESRCH); 1756 } 1757 1758 #ifdef RADIX_MPATH 1759 /* 1760 * If we got multipath routes, 1761 * we require users to specify a matching RTAX_GATEWAY. 1762 */ 1763 if (rt_mpath_capable(rnh)) { 1764 rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]); 1765 if (rt == NULL) { 1766 RIB_RUNLOCK(rnh); 1767 return (ESRCH); 1768 } 1769 } 1770 #endif 1771 nh_orig = rt->rt_nhop; 1772 1773 RIB_RUNLOCK(rnh); 1774 1775 rt = NULL; 1776 nh = NULL; 1777 1778 /* 1779 * New gateway could require new ifaddr, ifp; 1780 * flags may also be different; ifp may be specified 1781 * by ll sockaddr when protocol address is ambiguous 1782 */ 1783 if (((nh_orig->nh_flags & NHF_GATEWAY) && 1784 info->rti_info[RTAX_GATEWAY] != NULL) || 1785 info->rti_info[RTAX_IFP] != NULL || 1786 (info->rti_info[RTAX_IFA] != NULL && 1787 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 1788 error = rt_getifa_fib(info, rnh->rib_fibnum); 1789 if (info->rti_ifa != NULL) 1790 free_ifa = 1; 1791 1792 if (error != 0) { 1793 if (free_ifa) { 1794 ifa_free(info->rti_ifa); 1795 info->rti_ifa = NULL; 1796 } 1797 1798 return (error); 1799 } 1800 } 1801 1802 error = nhop_create_from_nhop(rnh, nh_orig, info, &nh); 1803 if (free_ifa) { 1804 ifa_free(info->rti_ifa); 1805 info->rti_ifa = NULL; 1806 } 1807 if (error != 0) 1808 return (error); 1809 1810 RIB_WLOCK(rnh); 1811 1812 /* Lookup rtentry once again and check if nexthop is still the same */ 1813 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1814 info->rti_info[RTAX_NETMASK], &rnh->head); 1815 1816 if (rt == NULL) { 1817 RIB_WUNLOCK(rnh); 1818 nhop_free(nh); 1819 return (ESRCH); 1820 } 1821 1822 if (rt->rt_nhop != nh_orig) { 1823 RIB_WUNLOCK(rnh); 1824 nhop_free(nh); 1825 return (EAGAIN); 1826 } 1827 1828 /* Proceed with the update */ 1829 RT_LOCK(rt); 1830 1831 /* Provide notification to the protocols.*/ 1832 if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest) 1833 nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info); 1834 1835 rt->rt_nhop = nh; 1836 rt_setmetrics(info, rt); 1837 1838 if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest) 1839 nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info); 1840 1841 if (ret_nrt != NULL) { 1842 *ret_nrt = rt; 1843 RT_ADDREF(rt); 1844 } 1845 1846 RT_UNLOCK(rt); 1847 1848 /* Update generation id to reflect rtable change */ 1849 rnh->rnh_gen++; 1850 1851 RIB_WUNLOCK(rnh); 1852 1853 nhop_free(nh_orig); 1854 1855 return (0); 1856 } 1857 1858 static int 1859 change_route(struct rib_head *rnh, struct rt_addrinfo *info, 1860 struct rtentry **ret_nrt) 1861 { 1862 struct epoch_tracker et; 1863 int error; 1864 1865 /* Check if updated gateway exists */ 1866 if ((info->rti_flags & RTF_GATEWAY) && 1867 (info->rti_info[RTAX_GATEWAY] == NULL)) 1868 return (EINVAL); 1869 1870 NET_EPOCH_ENTER(et); 1871 1872 /* 1873 * route change is done in multiple steps, with dropping and 1874 * reacquiring lock. In the situations with multiple processes 1875 * changes the same route in can lead to the case when route 1876 * is changed between the steps. Address it by retrying the operation 1877 * multiple times before failing. 1878 */ 1879 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 1880 error = change_route_one(rnh, info, ret_nrt); 1881 if (error != EAGAIN) 1882 break; 1883 } 1884 NET_EPOCH_EXIT(et); 1885 1886 return (error); 1887 } 1888 1889 1890 static void 1891 rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt) 1892 { 1893 1894 if (info->rti_mflags & RTV_WEIGHT) 1895 rt->rt_weight = info->rti_rmx->rmx_weight; 1896 /* Kernel -> userland timebase conversion. */ 1897 if (info->rti_mflags & RTV_EXPIRE) 1898 rt->rt_expire = info->rti_rmx->rmx_expire ? 1899 info->rti_rmx->rmx_expire - time_second + time_uptime : 0; 1900 } 1901 1902 int 1903 rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) 1904 { 1905 /* XXX dst may be overwritten, can we move this to below */ 1906 int dlen = SA_SIZE(dst), glen = SA_SIZE(gate); 1907 1908 /* 1909 * Prepare to store the gateway in rt->rt_gateway. 1910 * Both dst and gateway are stored one after the other in the same 1911 * malloc'd chunk. If we have room, we can reuse the old buffer, 1912 * rt_gateway already points to the right place. 1913 * Otherwise, malloc a new block and update the 'dst' address. 1914 */ 1915 if (rt_key(rt) == NULL) { 1916 caddr_t new; 1917 1918 R_Malloc(new, caddr_t, dlen + glen); 1919 if (new == NULL) 1920 return ENOBUFS; 1921 /* 1922 * XXX note, we copy from *dst and not *rt_key(rt) because 1923 * rt_setgate() can be called to initialize a newly 1924 * allocated route entry, in which case rt_key(rt) == NULL 1925 * (and also rt->rt_gateway == NULL). 1926 * Free()/free() handle a NULL argument just fine. 1927 */ 1928 bcopy(dst, new, dlen); 1929 R_Free(rt_key(rt)); /* free old block, if any */ 1930 rt_key(rt) = (struct sockaddr *)new; 1931 } 1932 1933 return (0); 1934 } 1935 1936 void 1937 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask) 1938 { 1939 u_char *cp1 = (u_char *)src; 1940 u_char *cp2 = (u_char *)dst; 1941 u_char *cp3 = (u_char *)netmask; 1942 u_char *cplim = cp2 + *cp3; 1943 u_char *cplim2 = cp2 + *cp1; 1944 1945 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ 1946 cp3 += 2; 1947 if (cplim > cplim2) 1948 cplim = cplim2; 1949 while (cp2 < cplim) 1950 *cp2++ = *cp1++ & *cp3++; 1951 if (cp2 < cplim2) 1952 bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); 1953 } 1954 1955 /* 1956 * Set up a routing table entry, normally 1957 * for an interface. 1958 */ 1959 #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */ 1960 static inline int 1961 rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) 1962 { 1963 RIB_RLOCK_TRACKER; 1964 struct sockaddr *dst; 1965 struct sockaddr *netmask; 1966 struct rtentry *rt = NULL; 1967 struct rt_addrinfo info; 1968 int error = 0; 1969 int startfib, endfib; 1970 char tempbuf[_SOCKADDR_TMPSIZE]; 1971 int didwork = 0; 1972 int a_failure = 0; 1973 struct sockaddr_dl_short *sdl = NULL; 1974 struct rib_head *rnh; 1975 1976 if (flags & RTF_HOST) { 1977 dst = ifa->ifa_dstaddr; 1978 netmask = NULL; 1979 } else { 1980 dst = ifa->ifa_addr; 1981 netmask = ifa->ifa_netmask; 1982 } 1983 if (dst->sa_len == 0) 1984 return(EINVAL); 1985 switch (dst->sa_family) { 1986 case AF_INET6: 1987 case AF_INET: 1988 /* We support multiple FIBs. */ 1989 break; 1990 default: 1991 fibnum = RT_DEFAULT_FIB; 1992 break; 1993 } 1994 if (fibnum == RT_ALL_FIBS) { 1995 if (V_rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) 1996 startfib = endfib = ifa->ifa_ifp->if_fib; 1997 else { 1998 startfib = 0; 1999 endfib = rt_numfibs - 1; 2000 } 2001 } else { 2002 KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum")); 2003 startfib = fibnum; 2004 endfib = fibnum; 2005 } 2006 2007 /* 2008 * If it's a delete, check that if it exists, 2009 * it's on the correct interface or we might scrub 2010 * a route to another ifa which would 2011 * be confusing at best and possibly worse. 2012 */ 2013 if (cmd == RTM_DELETE) { 2014 /* 2015 * It's a delete, so it should already exist.. 2016 * If it's a net, mask off the host bits 2017 * (Assuming we have a mask) 2018 * XXX this is kinda inet specific.. 2019 */ 2020 if (netmask != NULL) { 2021 rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask); 2022 dst = (struct sockaddr *)tempbuf; 2023 } 2024 } else if (cmd == RTM_ADD) { 2025 sdl = (struct sockaddr_dl_short *)tempbuf; 2026 bzero(sdl, sizeof(struct sockaddr_dl_short)); 2027 sdl->sdl_family = AF_LINK; 2028 sdl->sdl_len = sizeof(struct sockaddr_dl_short); 2029 sdl->sdl_type = ifa->ifa_ifp->if_type; 2030 sdl->sdl_index = ifa->ifa_ifp->if_index; 2031 } 2032 /* 2033 * Now go through all the requested tables (fibs) and do the 2034 * requested action. Realistically, this will either be fib 0 2035 * for protocols that don't do multiple tables or all the 2036 * tables for those that do. 2037 */ 2038 for ( fibnum = startfib; fibnum <= endfib; fibnum++) { 2039 if (cmd == RTM_DELETE) { 2040 struct radix_node *rn; 2041 /* 2042 * Look up an rtentry that is in the routing tree and 2043 * contains the correct info. 2044 */ 2045 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 2046 if (rnh == NULL) 2047 /* this table doesn't exist but others might */ 2048 continue; 2049 RIB_RLOCK(rnh); 2050 rn = rnh->rnh_lookup(dst, netmask, &rnh->head); 2051 #ifdef RADIX_MPATH 2052 if (rt_mpath_capable(rnh)) { 2053 2054 if (rn == NULL) 2055 error = ESRCH; 2056 else { 2057 rt = RNTORT(rn); 2058 /* 2059 * for interface route the gateway 2060 * gateway is sockaddr_dl, so 2061 * rt_mpath_matchgate must use the 2062 * interface address 2063 */ 2064 rt = rt_mpath_matchgate(rt, 2065 ifa->ifa_addr); 2066 if (rt == NULL) 2067 error = ESRCH; 2068 } 2069 } 2070 #endif 2071 error = (rn == NULL || 2072 (rn->rn_flags & RNF_ROOT) || 2073 RNTORT(rn)->rt_nhop->nh_ifa != ifa); 2074 RIB_RUNLOCK(rnh); 2075 if (error) { 2076 /* this is only an error if bad on ALL tables */ 2077 continue; 2078 } 2079 } 2080 /* 2081 * Do the actual request 2082 */ 2083 bzero((caddr_t)&info, sizeof(info)); 2084 info.rti_ifa = ifa; 2085 info.rti_flags = flags | 2086 (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED; 2087 info.rti_info[RTAX_DST] = dst; 2088 /* 2089 * doing this for compatibility reasons 2090 */ 2091 if (cmd == RTM_ADD) 2092 info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)sdl; 2093 else 2094 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 2095 info.rti_info[RTAX_NETMASK] = netmask; 2096 error = rtrequest1_fib(cmd, &info, &rt, fibnum); 2097 if (error == 0 && rt != NULL) { 2098 /* 2099 * notify any listening routing agents of the change 2100 */ 2101 RT_LOCK(rt); 2102 2103 /* TODO: interface routes/aliases */ 2104 RT_ADDREF(rt); 2105 RT_UNLOCK(rt); 2106 rt_newaddrmsg_fib(cmd, ifa, rt, fibnum); 2107 RT_LOCK(rt); 2108 RT_REMREF(rt); 2109 if (cmd == RTM_DELETE) { 2110 /* 2111 * If we are deleting, and we found an entry, 2112 * then it's been removed from the tree.. 2113 * now throw it away. 2114 */ 2115 RTFREE_LOCKED(rt); 2116 } else { 2117 if (cmd == RTM_ADD) { 2118 /* 2119 * We just wanted to add it.. 2120 * we don't actually need a reference. 2121 */ 2122 RT_REMREF(rt); 2123 } 2124 RT_UNLOCK(rt); 2125 } 2126 didwork = 1; 2127 } 2128 if (error) 2129 a_failure = error; 2130 } 2131 if (cmd == RTM_DELETE) { 2132 if (didwork) { 2133 error = 0; 2134 } else { 2135 /* we only give an error if it wasn't in any table */ 2136 error = ((flags & RTF_HOST) ? 2137 EHOSTUNREACH : ENETUNREACH); 2138 } 2139 } else { 2140 if (a_failure) { 2141 /* return an error if any of them failed */ 2142 error = a_failure; 2143 } 2144 } 2145 return (error); 2146 } 2147 2148 /* 2149 * Set up a routing table entry, normally 2150 * for an interface. 2151 */ 2152 int 2153 rtinit(struct ifaddr *ifa, int cmd, int flags) 2154 { 2155 struct sockaddr *dst; 2156 int fib = RT_DEFAULT_FIB; 2157 2158 if (flags & RTF_HOST) { 2159 dst = ifa->ifa_dstaddr; 2160 } else { 2161 dst = ifa->ifa_addr; 2162 } 2163 2164 switch (dst->sa_family) { 2165 case AF_INET6: 2166 case AF_INET: 2167 /* We do support multiple FIBs. */ 2168 fib = RT_ALL_FIBS; 2169 break; 2170 } 2171 return (rtinit1(ifa, cmd, flags, fib)); 2172 } 2173 2174 /* 2175 * Announce interface address arrival/withdraw 2176 * Returns 0 on success. 2177 */ 2178 int 2179 rt_addrmsg(int cmd, struct ifaddr *ifa, int fibnum) 2180 { 2181 2182 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, 2183 ("unexpected cmd %d", cmd)); 2184 KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), 2185 ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); 2186 2187 EVENTHANDLER_DIRECT_INVOKE(rt_addrmsg, ifa, cmd); 2188 return (rtsock_addrmsg(cmd, ifa, fibnum)); 2189 } 2190 2191 /* 2192 * Announce kernel-originated route addition/removal to rtsock based on @rt data. 2193 * cmd: RTM_ cmd 2194 * @rt: valid rtentry 2195 * @ifp: target route interface 2196 * @fibnum: fib id or RT_ALL_FIBS 2197 * 2198 * Returns 0 on success. 2199 */ 2200 int 2201 rt_routemsg(int cmd, struct rtentry *rt, struct ifnet *ifp, int rti_addrs, 2202 int fibnum) 2203 { 2204 2205 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, 2206 ("unexpected cmd %d", cmd)); 2207 2208 KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), 2209 ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); 2210 2211 KASSERT(rt_key(rt) != NULL, (":%s: rt_key must be supplied", __func__)); 2212 2213 return (rtsock_routemsg(cmd, rt, ifp, 0, fibnum)); 2214 } 2215 2216 /* 2217 * Announce kernel-originated route addition/removal to rtsock based on @rt data. 2218 * cmd: RTM_ cmd 2219 * @info: addrinfo structure with valid data. 2220 * @fibnum: fib id or RT_ALL_FIBS 2221 * 2222 * Returns 0 on success. 2223 */ 2224 int 2225 rt_routemsg_info(int cmd, struct rt_addrinfo *info, int fibnum) 2226 { 2227 2228 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE || cmd == RTM_CHANGE, 2229 ("unexpected cmd %d", cmd)); 2230 2231 KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), 2232 ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); 2233 2234 KASSERT(info->rti_info[RTAX_DST] != NULL, (":%s: RTAX_DST must be supplied", __func__)); 2235 2236 return (rtsock_routemsg_info(cmd, info, fibnum)); 2237 } 2238 2239 2240 /* 2241 * This is called to generate messages from the routing socket 2242 * indicating a network interface has had addresses associated with it. 2243 */ 2244 void 2245 rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, struct rtentry *rt, int fibnum) 2246 { 2247 2248 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, 2249 ("unexpected cmd %u", cmd)); 2250 KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), 2251 ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); 2252 2253 if (cmd == RTM_ADD) { 2254 rt_addrmsg(cmd, ifa, fibnum); 2255 if (rt != NULL) 2256 rt_routemsg(cmd, rt, ifa->ifa_ifp, 0, fibnum); 2257 } else { 2258 if (rt != NULL) 2259 rt_routemsg(cmd, rt, ifa->ifa_ifp, 0, fibnum); 2260 rt_addrmsg(cmd, ifa, fibnum); 2261 } 2262 } 2263 2264