1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1980, 1986, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95 32 * $FreeBSD$ 33 */ 34 /************************************************************************ 35 * Note: In this file a 'fib' is a "forwarding information base" * 36 * Which is the new name for an in kernel routing (next hop) table. * 37 ***********************************************************************/ 38 39 #include "opt_inet.h" 40 #include "opt_inet6.h" 41 #include "opt_mrouting.h" 42 #include "opt_mpath.h" 43 #include "opt_route.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/malloc.h> 48 #include <sys/mbuf.h> 49 #include <sys/socket.h> 50 #include <sys/sysctl.h> 51 #include <sys/syslog.h> 52 #include <sys/sysproto.h> 53 #include <sys/proc.h> 54 #include <sys/domain.h> 55 #include <sys/eventhandler.h> 56 #include <sys/kernel.h> 57 #include <sys/lock.h> 58 #include <sys/rmlock.h> 59 60 #include <net/if.h> 61 #include <net/if_var.h> 62 #include <net/if_dl.h> 63 #include <net/route.h> 64 #include <net/route/route_var.h> 65 #include <net/route/nhop.h> 66 #include <net/route/shared.h> 67 #include <net/vnet.h> 68 69 #ifdef RADIX_MPATH 70 #include <net/radix_mpath.h> 71 #endif 72 73 #include <netinet/in.h> 74 #include <netinet/ip_mroute.h> 75 76 #include <vm/uma.h> 77 78 #define RT_MAXFIBS UINT16_MAX 79 80 /* Kernel config default option. */ 81 #ifdef ROUTETABLES 82 #if ROUTETABLES <= 0 83 #error "ROUTETABLES defined too low" 84 #endif 85 #if ROUTETABLES > RT_MAXFIBS 86 #error "ROUTETABLES defined too big" 87 #endif 88 #define RT_NUMFIBS ROUTETABLES 89 #endif /* ROUTETABLES */ 90 /* Initialize to default if not otherwise set. */ 91 #ifndef RT_NUMFIBS 92 #define RT_NUMFIBS 1 93 #endif 94 95 /* This is read-only.. */ 96 u_int rt_numfibs = RT_NUMFIBS; 97 SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RDTUN, &rt_numfibs, 0, ""); 98 99 /* 100 * By default add routes to all fibs for new interfaces. 101 * Once this is set to 0 then only allocate routes on interface 102 * changes for the FIB of the caller when adding a new set of addresses 103 * to an interface. XXX this is a shotgun aproach to a problem that needs 104 * a more fine grained solution.. that will come. 105 * XXX also has the problems getting the FIB from curthread which will not 106 * always work given the fib can be overridden and prefixes can be added 107 * from the network stack context. 108 */ 109 VNET_DEFINE(u_int, rt_add_addr_allfibs) = 1; 110 SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RWTUN | CTLFLAG_VNET, 111 &VNET_NAME(rt_add_addr_allfibs), 0, ""); 112 113 VNET_PCPUSTAT_DEFINE(struct rtstat, rtstat); 114 115 VNET_PCPUSTAT_SYSINIT(rtstat); 116 #ifdef VIMAGE 117 VNET_PCPUSTAT_SYSUNINIT(rtstat); 118 #endif 119 120 VNET_DEFINE(struct rib_head *, rt_tables); 121 #define V_rt_tables VNET(rt_tables) 122 123 VNET_DEFINE(int, rttrash); /* routes not in table but not freed */ 124 #define V_rttrash VNET(rttrash) 125 126 127 /* 128 * Convert a 'struct radix_node *' to a 'struct rtentry *'. 129 * The operation can be done safely (in this code) because a 130 * 'struct rtentry' starts with two 'struct radix_node''s, the first 131 * one representing leaf nodes in the routing tree, which is 132 * what the code in radix.c passes us as a 'struct radix_node'. 133 * 134 * But because there are a lot of assumptions in this conversion, 135 * do not cast explicitly, but always use the macro below. 136 */ 137 #define RNTORT(p) ((struct rtentry *)(p)) 138 139 VNET_DEFINE_STATIC(uma_zone_t, rtzone); /* Routing table UMA zone. */ 140 #define V_rtzone VNET(rtzone) 141 142 EVENTHANDLER_LIST_DEFINE(rt_addrmsg); 143 144 static int rt_getifa_fib(struct rt_addrinfo *, u_int); 145 static void rt_setmetrics(const struct rt_addrinfo *, struct rtentry *); 146 static int rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *, 147 void *arg); 148 static struct rtentry *rt_unlinkrte(struct rib_head *rnh, 149 struct rt_addrinfo *info, int *perror); 150 static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info); 151 #ifdef RADIX_MPATH 152 static struct radix_node *rt_mpath_unlink(struct rib_head *rnh, 153 struct rt_addrinfo *info, struct rtentry *rto, int *perror); 154 #endif 155 static int rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, 156 int flags); 157 158 static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, 159 struct rtentry **ret_nrt); 160 static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, 161 struct rtentry **ret_nrt); 162 static int change_route(struct rib_head *, struct rt_addrinfo *, 163 struct rtentry **); 164 165 /* 166 * handler for net.my_fibnum 167 */ 168 static int 169 sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) 170 { 171 int fibnum; 172 int error; 173 174 fibnum = curthread->td_proc->p_fibnum; 175 error = sysctl_handle_int(oidp, &fibnum, 0, req); 176 return (error); 177 } 178 179 SYSCTL_PROC(_net, OID_AUTO, my_fibnum, 180 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 181 &sysctl_my_fibnum, "I", 182 "default FIB of caller"); 183 184 static __inline struct rib_head ** 185 rt_tables_get_rnh_ptr(int table, int fam) 186 { 187 struct rib_head **rnh; 188 189 KASSERT(table >= 0 && table < rt_numfibs, 190 ("%s: table out of bounds (0 <= %d < %d)", __func__, table, 191 rt_numfibs)); 192 KASSERT(fam >= 0 && fam < (AF_MAX + 1), 193 ("%s: fam out of bounds (0 <= %d < %d)", __func__, fam, AF_MAX+1)); 194 195 /* rnh is [fib=0][af=0]. */ 196 rnh = (struct rib_head **)V_rt_tables; 197 /* Get the offset to the requested table and fam. */ 198 rnh += table * (AF_MAX+1) + fam; 199 200 return (rnh); 201 } 202 203 struct rib_head * 204 rt_tables_get_rnh(int table, int fam) 205 { 206 207 return (*rt_tables_get_rnh_ptr(table, fam)); 208 } 209 210 u_int 211 rt_tables_get_gen(int table, int fam) 212 { 213 struct rib_head *rnh; 214 215 rnh = *rt_tables_get_rnh_ptr(table, fam); 216 KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d fam %d", 217 __func__, table, fam)); 218 return (rnh->rnh_gen); 219 } 220 221 222 /* 223 * route initialization must occur before ip6_init2(), which happenas at 224 * SI_ORDER_MIDDLE. 225 */ 226 static void 227 route_init(void) 228 { 229 230 /* whack the tunable ints into line. */ 231 if (rt_numfibs > RT_MAXFIBS) 232 rt_numfibs = RT_MAXFIBS; 233 if (rt_numfibs == 0) 234 rt_numfibs = 1; 235 nhops_init(); 236 } 237 SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, NULL); 238 239 static int 240 rtentry_zinit(void *mem, int size, int how) 241 { 242 struct rtentry *rt = mem; 243 244 rt->rt_pksent = counter_u64_alloc(how); 245 if (rt->rt_pksent == NULL) 246 return (ENOMEM); 247 248 RT_LOCK_INIT(rt); 249 250 return (0); 251 } 252 253 static void 254 rtentry_zfini(void *mem, int size) 255 { 256 struct rtentry *rt = mem; 257 258 RT_LOCK_DESTROY(rt); 259 counter_u64_free(rt->rt_pksent); 260 } 261 262 static int 263 rtentry_ctor(void *mem, int size, void *arg, int how) 264 { 265 struct rtentry *rt = mem; 266 267 bzero(rt, offsetof(struct rtentry, rt_endzero)); 268 counter_u64_zero(rt->rt_pksent); 269 rt->rt_chain = NULL; 270 271 return (0); 272 } 273 274 static void 275 rtentry_dtor(void *mem, int size, void *arg) 276 { 277 struct rtentry *rt = mem; 278 279 RT_UNLOCK_COND(rt); 280 } 281 282 static void 283 vnet_route_init(const void *unused __unused) 284 { 285 struct domain *dom; 286 struct rib_head **rnh; 287 int table; 288 int fam; 289 290 V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) * 291 sizeof(struct rib_head *), M_RTABLE, M_WAITOK|M_ZERO); 292 293 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), 294 rtentry_ctor, rtentry_dtor, 295 rtentry_zinit, rtentry_zfini, UMA_ALIGN_PTR, 0); 296 for (dom = domains; dom; dom = dom->dom_next) { 297 if (dom->dom_rtattach == NULL) 298 continue; 299 300 for (table = 0; table < rt_numfibs; table++) { 301 fam = dom->dom_family; 302 if (table != 0 && fam != AF_INET6 && fam != AF_INET) 303 break; 304 305 rnh = rt_tables_get_rnh_ptr(table, fam); 306 if (rnh == NULL) 307 panic("%s: rnh NULL", __func__); 308 dom->dom_rtattach((void **)rnh, 0, table); 309 } 310 } 311 } 312 VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, 313 vnet_route_init, 0); 314 315 #ifdef VIMAGE 316 static void 317 vnet_route_uninit(const void *unused __unused) 318 { 319 int table; 320 int fam; 321 struct domain *dom; 322 struct rib_head **rnh; 323 324 for (dom = domains; dom; dom = dom->dom_next) { 325 if (dom->dom_rtdetach == NULL) 326 continue; 327 328 for (table = 0; table < rt_numfibs; table++) { 329 fam = dom->dom_family; 330 331 if (table != 0 && fam != AF_INET6 && fam != AF_INET) 332 break; 333 334 rnh = rt_tables_get_rnh_ptr(table, fam); 335 if (rnh == NULL) 336 panic("%s: rnh NULL", __func__); 337 dom->dom_rtdetach((void **)rnh, 0); 338 } 339 } 340 341 free(V_rt_tables, M_RTABLE); 342 uma_zdestroy(V_rtzone); 343 } 344 VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, 345 vnet_route_uninit, 0); 346 #endif 347 348 struct rib_head * 349 rt_table_init(int offset, int family, u_int fibnum) 350 { 351 struct rib_head *rh; 352 353 rh = malloc(sizeof(struct rib_head), M_RTABLE, M_WAITOK | M_ZERO); 354 355 /* TODO: These details should be hidded inside radix.c */ 356 /* Init masks tree */ 357 rn_inithead_internal(&rh->head, rh->rnh_nodes, offset); 358 rn_inithead_internal(&rh->rmhead.head, rh->rmhead.mask_nodes, 0); 359 rh->head.rnh_masks = &rh->rmhead; 360 361 /* Save metadata associated with this routing table. */ 362 rh->rib_family = family; 363 rh->rib_fibnum = fibnum; 364 #ifdef VIMAGE 365 rh->rib_vnet = curvnet; 366 #endif 367 368 tmproutes_init(rh); 369 370 /* Init locks */ 371 RIB_LOCK_INIT(rh); 372 373 nhops_init_rib(rh); 374 375 /* Finally, set base callbacks */ 376 rh->rnh_addaddr = rn_addroute; 377 rh->rnh_deladdr = rn_delete; 378 rh->rnh_matchaddr = rn_match; 379 rh->rnh_lookup = rn_lookup; 380 rh->rnh_walktree = rn_walktree; 381 rh->rnh_walktree_from = rn_walktree_from; 382 383 return (rh); 384 } 385 386 static int 387 rt_freeentry(struct radix_node *rn, void *arg) 388 { 389 struct radix_head * const rnh = arg; 390 struct radix_node *x; 391 392 x = (struct radix_node *)rn_delete(rn + 2, NULL, rnh); 393 if (x != NULL) 394 R_Free(x); 395 return (0); 396 } 397 398 void 399 rt_table_destroy(struct rib_head *rh) 400 { 401 402 tmproutes_destroy(rh); 403 404 rn_walktree(&rh->rmhead.head, rt_freeentry, &rh->rmhead.head); 405 406 nhops_destroy_rib(rh); 407 408 /* Assume table is already empty */ 409 RIB_LOCK_DESTROY(rh); 410 free(rh, M_RTABLE); 411 } 412 413 414 #ifndef _SYS_SYSPROTO_H_ 415 struct setfib_args { 416 int fibnum; 417 }; 418 #endif 419 int 420 sys_setfib(struct thread *td, struct setfib_args *uap) 421 { 422 if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs) 423 return EINVAL; 424 td->td_proc->p_fibnum = uap->fibnum; 425 return (0); 426 } 427 428 /* 429 * Look up the route that matches the address given 430 * Or, at least try.. Create a cloned route if needed. 431 * 432 * The returned route, if any, is locked. 433 */ 434 struct rtentry * 435 rtalloc1(struct sockaddr *dst, int report, u_long ignflags) 436 { 437 438 return (rtalloc1_fib(dst, report, ignflags, RT_DEFAULT_FIB)); 439 } 440 441 struct rtentry * 442 rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, 443 u_int fibnum) 444 { 445 RIB_RLOCK_TRACKER; 446 struct rib_head *rh; 447 struct radix_node *rn; 448 struct rtentry *newrt; 449 struct rt_addrinfo info; 450 int err = 0, msgtype = RTM_MISS; 451 452 KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum")); 453 rh = rt_tables_get_rnh(fibnum, dst->sa_family); 454 newrt = NULL; 455 if (rh == NULL) 456 goto miss; 457 458 /* 459 * Look up the address in the table for that Address Family 460 */ 461 if ((ignflags & RTF_RNH_LOCKED) == 0) 462 RIB_RLOCK(rh); 463 #ifdef INVARIANTS 464 else 465 RIB_LOCK_ASSERT(rh); 466 #endif 467 rn = rh->rnh_matchaddr(dst, &rh->head); 468 if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { 469 newrt = RNTORT(rn); 470 RT_LOCK(newrt); 471 RT_ADDREF(newrt); 472 if ((ignflags & RTF_RNH_LOCKED) == 0) 473 RIB_RUNLOCK(rh); 474 return (newrt); 475 476 } else if ((ignflags & RTF_RNH_LOCKED) == 0) 477 RIB_RUNLOCK(rh); 478 /* 479 * Either we hit the root or could not find any match, 480 * which basically means: "cannot get there from here". 481 */ 482 miss: 483 RTSTAT_INC(rts_unreach); 484 485 if (report) { 486 /* 487 * If required, report the failure to the supervising 488 * Authorities. 489 * For a delete, this is not an error. (report == 0) 490 */ 491 bzero(&info, sizeof(info)); 492 info.rti_info[RTAX_DST] = dst; 493 rt_missmsg_fib(msgtype, &info, 0, err, fibnum); 494 } 495 return (newrt); 496 } 497 498 /* 499 * Remove a reference count from an rtentry. 500 * If the count gets low enough, take it out of the routing table 501 */ 502 void 503 rtfree(struct rtentry *rt) 504 { 505 struct rib_head *rnh; 506 507 KASSERT(rt != NULL,("%s: NULL rt", __func__)); 508 rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); 509 KASSERT(rnh != NULL,("%s: NULL rnh", __func__)); 510 511 RT_LOCK_ASSERT(rt); 512 513 /* 514 * The callers should use RTFREE_LOCKED() or RTFREE(), so 515 * we should come here exactly with the last reference. 516 */ 517 RT_REMREF(rt); 518 if (rt->rt_refcnt > 0) { 519 log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt); 520 goto done; 521 } 522 523 /* 524 * On last reference give the "close method" a chance 525 * to cleanup private state. This also permits (for 526 * IPv4 and IPv6) a chance to decide if the routing table 527 * entry should be purged immediately or at a later time. 528 * When an immediate purge is to happen the close routine 529 * typically calls rtexpunge which clears the RTF_UP flag 530 * on the entry so that the code below reclaims the storage. 531 */ 532 if (rt->rt_refcnt == 0 && rnh->rnh_close) 533 rnh->rnh_close((struct radix_node *)rt, &rnh->head); 534 535 /* 536 * If we are no longer "up" (and ref == 0) 537 * then we can free the resources associated 538 * with the route. 539 */ 540 if ((rt->rt_flags & RTF_UP) == 0) { 541 if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 542 panic("rtfree 2"); 543 /* 544 * the rtentry must have been removed from the routing table 545 * so it is represented in rttrash.. remove that now. 546 */ 547 V_rttrash--; 548 #ifdef DIAGNOSTIC 549 if (rt->rt_refcnt < 0) { 550 printf("rtfree: %p not freed (neg refs)\n", rt); 551 goto done; 552 } 553 #endif 554 /* 555 * The key is separatly alloc'd so free it (see rt_setgate()). 556 * This also frees the gateway, as they are always malloc'd 557 * together. 558 */ 559 R_Free(rt_key(rt)); 560 561 /* Unreference nexthop */ 562 nhop_free(rt->rt_nhop); 563 564 /* 565 * and the rtentry itself of course 566 */ 567 uma_zfree(V_rtzone, rt); 568 return; 569 } 570 done: 571 RT_UNLOCK(rt); 572 } 573 574 /* 575 * Temporary RTFREE() function wrapper. 576 * Intended to use in control plane code to 577 * avoid exposing internal layout of 'struct rtentry'. 578 */ 579 void 580 rtfree_func(struct rtentry *rt) 581 { 582 583 RTFREE(rt); 584 } 585 586 /* 587 * Adds a temporal redirect entry to the routing table. 588 * @fibnum: fib number 589 * @dst: destination to install redirect to 590 * @gateway: gateway to go via 591 * @author: sockaddr of originating router, can be NULL 592 * @ifp: interface to use for the redirected route 593 * @flags: set of flags to add. Allowed: RTF_GATEWAY 594 * @lifetime_sec: time in seconds to expire this redirect. 595 * 596 * Retuns 0 on success, errno otherwise. 597 */ 598 int 599 rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway, 600 struct sockaddr *author, struct ifnet *ifp, int flags, int lifetime_sec) 601 { 602 struct rtentry *rt; 603 int error; 604 struct rt_addrinfo info; 605 struct rt_metrics rti_rmx; 606 struct ifaddr *ifa; 607 608 NET_EPOCH_ASSERT(); 609 610 if (rt_tables_get_rnh(fibnum, dst->sa_family) == NULL) 611 return (EAFNOSUPPORT); 612 613 /* Verify the allowed flag mask. */ 614 KASSERT(((flags & ~(RTF_GATEWAY)) == 0), 615 ("invalid redirect flags: %x", flags)); 616 617 /* Get the best ifa for the given interface and gateway. */ 618 if ((ifa = ifaof_ifpforaddr(gateway, ifp)) == NULL) 619 return (ENETUNREACH); 620 ifa_ref(ifa); 621 622 bzero(&info, sizeof(info)); 623 info.rti_info[RTAX_DST] = dst; 624 info.rti_info[RTAX_GATEWAY] = gateway; 625 info.rti_ifa = ifa; 626 info.rti_ifp = ifp; 627 info.rti_flags = flags | RTF_HOST | RTF_DYNAMIC; 628 629 /* Setup route metrics to define expire time. */ 630 bzero(&rti_rmx, sizeof(rti_rmx)); 631 /* Set expire time as absolute. */ 632 rti_rmx.rmx_expire = lifetime_sec + time_second; 633 info.rti_mflags |= RTV_EXPIRE; 634 info.rti_rmx = &rti_rmx; 635 636 error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum); 637 ifa_free(ifa); 638 639 if (error != 0) { 640 /* TODO: add per-fib redirect stats. */ 641 return (error); 642 } 643 644 RT_LOCK(rt); 645 flags = rt->rt_flags; 646 RTFREE_LOCKED(rt); 647 648 RTSTAT_INC(rts_dynamic); 649 650 /* Send notification of a route addition to userland. */ 651 bzero(&info, sizeof(info)); 652 info.rti_info[RTAX_DST] = dst; 653 info.rti_info[RTAX_GATEWAY] = gateway; 654 info.rti_info[RTAX_AUTHOR] = author; 655 rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum); 656 657 return (0); 658 } 659 660 /* 661 * Routing table ioctl interface. 662 */ 663 int 664 rtioctl_fib(u_long req, caddr_t data, u_int fibnum) 665 { 666 667 /* 668 * If more ioctl commands are added here, make sure the proper 669 * super-user checks are being performed because it is possible for 670 * prison-root to make it this far if raw sockets have been enabled 671 * in jails. 672 */ 673 #ifdef INET 674 /* Multicast goop, grrr... */ 675 return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP; 676 #else /* INET */ 677 return ENXIO; 678 #endif /* INET */ 679 } 680 681 struct ifaddr * 682 ifa_ifwithroute(int flags, const struct sockaddr *dst, struct sockaddr *gateway, 683 u_int fibnum) 684 { 685 struct ifaddr *ifa; 686 int not_found = 0; 687 688 NET_EPOCH_ASSERT(); 689 if ((flags & RTF_GATEWAY) == 0) { 690 /* 691 * If we are adding a route to an interface, 692 * and the interface is a pt to pt link 693 * we should search for the destination 694 * as our clue to the interface. Otherwise 695 * we can use the local address. 696 */ 697 ifa = NULL; 698 if (flags & RTF_HOST) 699 ifa = ifa_ifwithdstaddr(dst, fibnum); 700 if (ifa == NULL) 701 ifa = ifa_ifwithaddr(gateway); 702 } else { 703 /* 704 * If we are adding a route to a remote net 705 * or host, the gateway may still be on the 706 * other end of a pt to pt link. 707 */ 708 ifa = ifa_ifwithdstaddr(gateway, fibnum); 709 } 710 if (ifa == NULL) 711 ifa = ifa_ifwithnet(gateway, 0, fibnum); 712 if (ifa == NULL) { 713 struct rtentry *rt; 714 715 rt = rtalloc1_fib(gateway, 0, flags, fibnum); 716 if (rt == NULL) 717 goto out; 718 /* 719 * dismiss a gateway that is reachable only 720 * through the default router 721 */ 722 switch (gateway->sa_family) { 723 case AF_INET: 724 if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY) 725 not_found = 1; 726 break; 727 case AF_INET6: 728 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr)) 729 not_found = 1; 730 break; 731 default: 732 break; 733 } 734 if (!not_found && rt->rt_nhop->nh_ifa != NULL) { 735 ifa = rt->rt_nhop->nh_ifa; 736 } 737 RT_REMREF(rt); 738 RT_UNLOCK(rt); 739 if (not_found || ifa == NULL) 740 goto out; 741 } 742 if (ifa->ifa_addr->sa_family != dst->sa_family) { 743 struct ifaddr *oifa = ifa; 744 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 745 if (ifa == NULL) 746 ifa = oifa; 747 } 748 out: 749 return (ifa); 750 } 751 752 /* 753 * Do appropriate manipulations of a routing tree given 754 * all the bits of info needed 755 */ 756 int 757 rtrequest_fib(int req, 758 struct sockaddr *dst, 759 struct sockaddr *gateway, 760 struct sockaddr *netmask, 761 int flags, 762 struct rtentry **ret_nrt, 763 u_int fibnum) 764 { 765 struct rt_addrinfo info; 766 767 if (dst->sa_len == 0) 768 return(EINVAL); 769 770 bzero((caddr_t)&info, sizeof(info)); 771 info.rti_flags = flags; 772 info.rti_info[RTAX_DST] = dst; 773 info.rti_info[RTAX_GATEWAY] = gateway; 774 info.rti_info[RTAX_NETMASK] = netmask; 775 return rtrequest1_fib(req, &info, ret_nrt, fibnum); 776 } 777 778 779 /* 780 * Copy most of @rt data into @info. 781 * 782 * If @flags contains NHR_COPY, copies dst,netmask and gw to the 783 * pointers specified by @info structure. Assume such pointers 784 * are zeroed sockaddr-like structures with sa_len field initialized 785 * to reflect size of the provided buffer. if no NHR_COPY is specified, 786 * point dst,netmask and gw @info fields to appropriate @rt values. 787 * 788 * if @flags contains NHR_REF, do refcouting on rt_ifp and rt_ifa. 789 * 790 * Returns 0 on success. 791 */ 792 int 793 rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags) 794 { 795 struct rt_metrics *rmx; 796 struct sockaddr *src, *dst; 797 struct nhop_object *nh; 798 int sa_len; 799 800 if (flags & NHR_COPY) { 801 /* Copy destination if dst is non-zero */ 802 src = rt_key(rt); 803 dst = info->rti_info[RTAX_DST]; 804 sa_len = src->sa_len; 805 if (dst != NULL) { 806 if (src->sa_len > dst->sa_len) 807 return (ENOMEM); 808 memcpy(dst, src, src->sa_len); 809 info->rti_addrs |= RTA_DST; 810 } 811 812 /* Copy mask if set && dst is non-zero */ 813 src = rt_mask(rt); 814 dst = info->rti_info[RTAX_NETMASK]; 815 if (src != NULL && dst != NULL) { 816 817 /* 818 * Radix stores different value in sa_len, 819 * assume rt_mask() to have the same length 820 * as rt_key() 821 */ 822 if (sa_len > dst->sa_len) 823 return (ENOMEM); 824 memcpy(dst, src, src->sa_len); 825 info->rti_addrs |= RTA_NETMASK; 826 } 827 828 /* Copy gateway is set && dst is non-zero */ 829 src = &rt->rt_nhop->gw_sa; 830 dst = info->rti_info[RTAX_GATEWAY]; 831 if ((rt->rt_flags & RTF_GATEWAY) && src != NULL && dst != NULL){ 832 if (src->sa_len > dst->sa_len) 833 return (ENOMEM); 834 memcpy(dst, src, src->sa_len); 835 info->rti_addrs |= RTA_GATEWAY; 836 } 837 } else { 838 info->rti_info[RTAX_DST] = rt_key(rt); 839 info->rti_addrs |= RTA_DST; 840 if (rt_mask(rt) != NULL) { 841 info->rti_info[RTAX_NETMASK] = rt_mask(rt); 842 info->rti_addrs |= RTA_NETMASK; 843 } 844 if (rt->rt_flags & RTF_GATEWAY) { 845 info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; 846 info->rti_addrs |= RTA_GATEWAY; 847 } 848 } 849 850 nh = rt->rt_nhop; 851 rmx = info->rti_rmx; 852 if (rmx != NULL) { 853 info->rti_mflags |= RTV_MTU; 854 rmx->rmx_mtu = nh->nh_mtu; 855 } 856 857 info->rti_flags = rt->rt_flags | nhop_get_rtflags(nh); 858 info->rti_ifp = nh->nh_ifp; 859 info->rti_ifa = nh->nh_ifa; 860 if (flags & NHR_REF) { 861 if_ref(info->rti_ifp); 862 ifa_ref(info->rti_ifa); 863 } 864 865 return (0); 866 } 867 868 /* 869 * Lookups up route entry for @dst in RIB database for fib @fibnum. 870 * Exports entry data to @info using rt_exportinfo(). 871 * 872 * If @flags contains NHR_REF, refcouting is performed on rt_ifp and rt_ifa. 873 * All references can be released later by calling rib_free_info(). 874 * 875 * Returns 0 on success. 876 * Returns ENOENT for lookup failure, ENOMEM for export failure. 877 */ 878 int 879 rib_lookup_info(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags, 880 uint32_t flowid, struct rt_addrinfo *info) 881 { 882 RIB_RLOCK_TRACKER; 883 struct rib_head *rh; 884 struct radix_node *rn; 885 struct rtentry *rt; 886 int error; 887 888 KASSERT((fibnum < rt_numfibs), ("rib_lookup_rte: bad fibnum")); 889 rh = rt_tables_get_rnh(fibnum, dst->sa_family); 890 if (rh == NULL) 891 return (ENOENT); 892 893 RIB_RLOCK(rh); 894 rn = rh->rnh_matchaddr(__DECONST(void *, dst), &rh->head); 895 if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { 896 rt = RNTORT(rn); 897 /* Ensure route & ifp is UP */ 898 if (RT_LINK_IS_UP(rt->rt_nhop->nh_ifp)) { 899 flags = (flags & NHR_REF) | NHR_COPY; 900 error = rt_exportinfo(rt, info, flags); 901 RIB_RUNLOCK(rh); 902 903 return (error); 904 } 905 } 906 RIB_RUNLOCK(rh); 907 908 return (ENOENT); 909 } 910 911 /* 912 * Releases all references acquired by rib_lookup_info() when 913 * called with NHR_REF flags. 914 */ 915 void 916 rib_free_info(struct rt_addrinfo *info) 917 { 918 919 ifa_free(info->rti_ifa); 920 if_rele(info->rti_ifp); 921 } 922 923 /* 924 * Iterates over all existing fibs in system calling 925 * @setwa_f function prior to traversing each fib. 926 * Calls @wa_f function for each element in current fib. 927 * If af is not AF_UNSPEC, iterates over fibs in particular 928 * address family. 929 */ 930 void 931 rt_foreach_fib_walk(int af, rt_setwarg_t *setwa_f, rt_walktree_f_t *wa_f, 932 void *arg) 933 { 934 struct rib_head *rnh; 935 uint32_t fibnum; 936 int i; 937 938 for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { 939 /* Do we want some specific family? */ 940 if (af != AF_UNSPEC) { 941 rnh = rt_tables_get_rnh(fibnum, af); 942 if (rnh == NULL) 943 continue; 944 if (setwa_f != NULL) 945 setwa_f(rnh, fibnum, af, arg); 946 947 RIB_WLOCK(rnh); 948 rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f,arg); 949 RIB_WUNLOCK(rnh); 950 continue; 951 } 952 953 for (i = 1; i <= AF_MAX; i++) { 954 rnh = rt_tables_get_rnh(fibnum, i); 955 if (rnh == NULL) 956 continue; 957 if (setwa_f != NULL) 958 setwa_f(rnh, fibnum, i, arg); 959 960 RIB_WLOCK(rnh); 961 rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f,arg); 962 RIB_WUNLOCK(rnh); 963 } 964 } 965 } 966 967 struct rt_delinfo 968 { 969 struct rt_addrinfo info; 970 struct rib_head *rnh; 971 struct rtentry *head; 972 }; 973 974 /* 975 * Conditionally unlinks @rn from radix tree based 976 * on info data passed in @arg. 977 */ 978 static int 979 rt_checkdelroute(struct radix_node *rn, void *arg) 980 { 981 struct rt_delinfo *di; 982 struct rt_addrinfo *info; 983 struct rtentry *rt; 984 int error; 985 986 di = (struct rt_delinfo *)arg; 987 rt = (struct rtentry *)rn; 988 info = &di->info; 989 error = 0; 990 991 info->rti_info[RTAX_DST] = rt_key(rt); 992 info->rti_info[RTAX_NETMASK] = rt_mask(rt); 993 info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; 994 995 rt = rt_unlinkrte(di->rnh, info, &error); 996 if (rt == NULL) { 997 /* Either not allowed or not matched. Skip entry */ 998 return (0); 999 } 1000 1001 /* Entry was unlinked. Add to the list and return */ 1002 rt->rt_chain = di->head; 1003 di->head = rt; 1004 1005 return (0); 1006 } 1007 1008 /* 1009 * Iterates over a routing table specified by @fibnum and @family and 1010 * deletes elements marked by @filter_f. 1011 * @fibnum: rtable id 1012 * @family: AF_ address family 1013 * @filter_f: function returning non-zero value for items to delete 1014 * @arg: data to pass to the @filter_f function 1015 * @report: true if rtsock notification is needed. 1016 */ 1017 void 1018 rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f, void *arg, bool report) 1019 { 1020 struct rib_head *rnh; 1021 struct rt_delinfo di; 1022 struct rtentry *rt; 1023 1024 rnh = rt_tables_get_rnh(fibnum, family); 1025 if (rnh == NULL) 1026 return; 1027 1028 bzero(&di, sizeof(di)); 1029 di.info.rti_filter = filter_f; 1030 di.info.rti_filterdata = arg; 1031 di.rnh = rnh; 1032 1033 RIB_WLOCK(rnh); 1034 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1035 RIB_WUNLOCK(rnh); 1036 1037 if (di.head == NULL) 1038 return; 1039 1040 /* We might have something to reclaim. */ 1041 while (di.head != NULL) { 1042 rt = di.head; 1043 di.head = rt->rt_chain; 1044 rt->rt_chain = NULL; 1045 1046 /* TODO std rt -> rt_addrinfo export */ 1047 di.info.rti_info[RTAX_DST] = rt_key(rt); 1048 di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1049 1050 rt_notifydelete(rt, &di.info); 1051 1052 if (report) 1053 rt_routemsg(RTM_DELETE, rt, rt->rt_nhop->nh_ifp, 0, 1054 fibnum); 1055 RTFREE_LOCKED(rt); 1056 } 1057 } 1058 1059 /* 1060 * Iterates over all existing fibs in system and deletes each element 1061 * for which @filter_f function returns non-zero value. 1062 * If @family is not AF_UNSPEC, iterates over fibs in particular 1063 * address family. 1064 */ 1065 void 1066 rt_foreach_fib_walk_del(int family, rt_filter_f_t *filter_f, void *arg) 1067 { 1068 u_int fibnum; 1069 int i, start, end; 1070 1071 for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1072 /* Do we want some specific family? */ 1073 if (family != AF_UNSPEC) { 1074 start = family; 1075 end = family; 1076 } else { 1077 start = 1; 1078 end = AF_MAX; 1079 } 1080 1081 for (i = start; i <= end; i++) { 1082 if (rt_tables_get_rnh(fibnum, i) == NULL) 1083 continue; 1084 1085 rib_walk_del(fibnum, i, filter_f, arg, 0); 1086 } 1087 } 1088 } 1089 1090 /* 1091 * Delete Routes for a Network Interface 1092 * 1093 * Called for each routing entry via the rnh->rnh_walktree() call above 1094 * to delete all route entries referencing a detaching network interface. 1095 * 1096 * Arguments: 1097 * rt pointer to rtentry 1098 * nh pointer to nhop 1099 * arg argument passed to rnh->rnh_walktree() - detaching interface 1100 * 1101 * Returns: 1102 * 0 successful 1103 * errno failed - reason indicated 1104 */ 1105 static int 1106 rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *nh, void *arg) 1107 { 1108 struct ifnet *ifp = arg; 1109 1110 if (nh->nh_ifp != ifp) 1111 return (0); 1112 1113 /* 1114 * Protect (sorta) against walktree recursion problems 1115 * with cloned routes 1116 */ 1117 if ((rt->rt_flags & RTF_UP) == 0) 1118 return (0); 1119 1120 return (1); 1121 } 1122 1123 /* 1124 * Delete all remaining routes using this interface 1125 * Unfortuneatly the only way to do this is to slog through 1126 * the entire routing table looking for routes which point 1127 * to this interface...oh well... 1128 */ 1129 void 1130 rt_flushifroutes_af(struct ifnet *ifp, int af) 1131 { 1132 KASSERT((af >= 1 && af <= AF_MAX), ("%s: af %d not >= 1 and <= %d", 1133 __func__, af, AF_MAX)); 1134 1135 rt_foreach_fib_walk_del(af, rt_ifdelroute, ifp); 1136 } 1137 1138 void 1139 rt_flushifroutes(struct ifnet *ifp) 1140 { 1141 1142 rt_foreach_fib_walk_del(AF_UNSPEC, rt_ifdelroute, ifp); 1143 } 1144 1145 /* 1146 * Conditionally unlinks rtentry matching data inside @info from @rnh. 1147 * Returns unlinked, locked and referenced @rtentry on success, 1148 * Returns NULL and sets @perror to: 1149 * ESRCH - if prefix was not found, 1150 * EADDRINUSE - if trying to delete PINNED route without appropriate flag. 1151 * ENOENT - if supplied filter function returned 0 (not matched). 1152 */ 1153 static struct rtentry * 1154 rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror) 1155 { 1156 struct sockaddr *dst, *netmask; 1157 struct rtentry *rt; 1158 struct radix_node *rn; 1159 1160 dst = info->rti_info[RTAX_DST]; 1161 netmask = info->rti_info[RTAX_NETMASK]; 1162 1163 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); 1164 if (rt == NULL) { 1165 *perror = ESRCH; 1166 return (NULL); 1167 } 1168 1169 if ((info->rti_flags & RTF_PINNED) == 0) { 1170 /* Check if target route can be deleted */ 1171 if (rt->rt_flags & RTF_PINNED) { 1172 *perror = EADDRINUSE; 1173 return (NULL); 1174 } 1175 } 1176 1177 if (info->rti_filter != NULL) { 1178 if (info->rti_filter(rt, rt->rt_nhop, info->rti_filterdata)==0){ 1179 /* Not matched */ 1180 *perror = ENOENT; 1181 return (NULL); 1182 } 1183 1184 /* 1185 * Filter function requested rte deletion. 1186 * Ease the caller work by filling in remaining info 1187 * from that particular entry. 1188 */ 1189 info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; 1190 } 1191 1192 /* 1193 * Remove the item from the tree and return it. 1194 * Complain if it is not there and do no more processing. 1195 */ 1196 *perror = ESRCH; 1197 #ifdef RADIX_MPATH 1198 if (rt_mpath_capable(rnh)) 1199 rn = rt_mpath_unlink(rnh, info, rt, perror); 1200 else 1201 #endif 1202 rn = rnh->rnh_deladdr(dst, netmask, &rnh->head); 1203 if (rn == NULL) 1204 return (NULL); 1205 1206 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 1207 panic ("rtrequest delete"); 1208 1209 rt = RNTORT(rn); 1210 RT_LOCK(rt); 1211 RT_ADDREF(rt); 1212 rt->rt_flags &= ~RTF_UP; 1213 1214 *perror = 0; 1215 1216 return (rt); 1217 } 1218 1219 static void 1220 rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info) 1221 { 1222 struct ifaddr *ifa; 1223 1224 /* 1225 * give the protocol a chance to keep things in sync. 1226 */ 1227 ifa = rt->rt_nhop->nh_ifa; 1228 if (ifa != NULL && ifa->ifa_rtrequest != NULL) 1229 ifa->ifa_rtrequest(RTM_DELETE, rt, rt->rt_nhop, info); 1230 1231 /* 1232 * One more rtentry floating around that is not 1233 * linked to the routing table. rttrash will be decremented 1234 * when RTFREE(rt) is eventually called. 1235 */ 1236 V_rttrash++; 1237 } 1238 1239 1240 /* 1241 * These (questionable) definitions of apparent local variables apply 1242 * to the next two functions. XXXXXX!!! 1243 */ 1244 #define dst info->rti_info[RTAX_DST] 1245 #define gateway info->rti_info[RTAX_GATEWAY] 1246 #define netmask info->rti_info[RTAX_NETMASK] 1247 #define ifaaddr info->rti_info[RTAX_IFA] 1248 #define ifpaddr info->rti_info[RTAX_IFP] 1249 #define flags info->rti_flags 1250 1251 /* 1252 * Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined, 1253 * it will be referenced so the caller must free it. 1254 * 1255 * Assume basic consistency checks are executed by callers: 1256 * RTAX_DST exists, if RTF_GATEWAY is set, RTAX_GATEWAY exists as well. 1257 */ 1258 int 1259 rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum) 1260 { 1261 struct epoch_tracker et; 1262 int needref, error; 1263 1264 /* 1265 * ifp may be specified by sockaddr_dl 1266 * when protocol address is ambiguous. 1267 */ 1268 error = 0; 1269 needref = (info->rti_ifa == NULL); 1270 NET_EPOCH_ENTER(et); 1271 1272 /* If we have interface specified by the ifindex in the address, use it */ 1273 if (info->rti_ifp == NULL && ifpaddr != NULL && 1274 ifpaddr->sa_family == AF_LINK) { 1275 const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)ifpaddr; 1276 if (sdl->sdl_index != 0) 1277 info->rti_ifp = ifnet_byindex(sdl->sdl_index); 1278 } 1279 /* 1280 * If we have source address specified, try to find it 1281 * TODO: avoid enumerating all ifas on all interfaces. 1282 */ 1283 if (info->rti_ifa == NULL && ifaaddr != NULL) 1284 info->rti_ifa = ifa_ifwithaddr(ifaaddr); 1285 if (info->rti_ifa == NULL) { 1286 struct sockaddr *sa; 1287 1288 /* 1289 * Most common use case for the userland-supplied routes. 1290 * 1291 * Choose sockaddr to select ifa. 1292 * -- if ifp is set -- 1293 * Order of preference: 1294 * 1) IFA address 1295 * 2) gateway address 1296 * Note: for interface routes link-level gateway address 1297 * is specified to indicate the interface index without 1298 * specifying RTF_GATEWAY. In this case, ignore gateway 1299 * Note: gateway AF may be different from dst AF. In this case, 1300 * ignore gateway 1301 * 3) final destination. 1302 * 4) if all of these fails, try to get at least link-level ifa. 1303 * -- else -- 1304 * try to lookup gateway or dst in the routing table to get ifa 1305 */ 1306 if (info->rti_info[RTAX_IFA] != NULL) 1307 sa = info->rti_info[RTAX_IFA]; 1308 else if ((info->rti_flags & RTF_GATEWAY) != 0 && 1309 gateway->sa_family == dst->sa_family) 1310 sa = gateway; 1311 else 1312 sa = dst; 1313 if (info->rti_ifp != NULL) { 1314 info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp); 1315 /* Case 4 */ 1316 if (info->rti_ifa == NULL && gateway != NULL) 1317 info->rti_ifa = ifaof_ifpforaddr(gateway, info->rti_ifp); 1318 } else if (dst != NULL && gateway != NULL) 1319 info->rti_ifa = ifa_ifwithroute(flags, dst, gateway, 1320 fibnum); 1321 else if (sa != NULL) 1322 info->rti_ifa = ifa_ifwithroute(flags, sa, sa, 1323 fibnum); 1324 } 1325 if (needref && info->rti_ifa != NULL) { 1326 if (info->rti_ifp == NULL) 1327 info->rti_ifp = info->rti_ifa->ifa_ifp; 1328 ifa_ref(info->rti_ifa); 1329 } else 1330 error = ENETUNREACH; 1331 NET_EPOCH_EXIT(et); 1332 return (error); 1333 } 1334 1335 void 1336 rt_updatemtu(struct ifnet *ifp) 1337 { 1338 struct rib_head *rnh; 1339 int mtu; 1340 int i, j; 1341 1342 /* 1343 * Try to update rt_mtu for all routes using this interface 1344 * Unfortunately the only way to do this is to traverse all 1345 * routing tables in all fibs/domains. 1346 */ 1347 for (i = 1; i <= AF_MAX; i++) { 1348 mtu = if_getmtu_family(ifp, i); 1349 for (j = 0; j < rt_numfibs; j++) { 1350 rnh = rt_tables_get_rnh(j, i); 1351 if (rnh == NULL) 1352 continue; 1353 nhops_update_ifmtu(rnh, ifp, mtu); 1354 } 1355 } 1356 } 1357 1358 1359 #if 0 1360 int p_sockaddr(char *buf, int buflen, struct sockaddr *s); 1361 int rt_print(char *buf, int buflen, struct rtentry *rt); 1362 1363 int 1364 p_sockaddr(char *buf, int buflen, struct sockaddr *s) 1365 { 1366 void *paddr = NULL; 1367 1368 switch (s->sa_family) { 1369 case AF_INET: 1370 paddr = &((struct sockaddr_in *)s)->sin_addr; 1371 break; 1372 case AF_INET6: 1373 paddr = &((struct sockaddr_in6 *)s)->sin6_addr; 1374 break; 1375 } 1376 1377 if (paddr == NULL) 1378 return (0); 1379 1380 if (inet_ntop(s->sa_family, paddr, buf, buflen) == NULL) 1381 return (0); 1382 1383 return (strlen(buf)); 1384 } 1385 1386 int 1387 rt_print(char *buf, int buflen, struct rtentry *rt) 1388 { 1389 struct sockaddr *addr, *mask; 1390 int i = 0; 1391 1392 addr = rt_key(rt); 1393 mask = rt_mask(rt); 1394 1395 i = p_sockaddr(buf, buflen, addr); 1396 if (!(rt->rt_flags & RTF_HOST)) { 1397 buf[i++] = '/'; 1398 i += p_sockaddr(buf + i, buflen - i, mask); 1399 } 1400 1401 if (rt->rt_flags & RTF_GATEWAY) { 1402 buf[i++] = '>'; 1403 i += p_sockaddr(buf + i, buflen - i, &rt->rt_nhop->gw_sa); 1404 } 1405 1406 return (i); 1407 } 1408 #endif 1409 1410 #ifdef RADIX_MPATH 1411 /* 1412 * Deletes key for single-path routes, unlinks rtentry with 1413 * gateway specified in @info from multi-path routes. 1414 * 1415 * Returnes unlinked entry. In case of failure, returns NULL 1416 * and sets @perror to ESRCH. 1417 */ 1418 static struct radix_node * 1419 rt_mpath_unlink(struct rib_head *rnh, struct rt_addrinfo *info, 1420 struct rtentry *rto, int *perror) 1421 { 1422 /* 1423 * if we got multipath routes, we require users to specify 1424 * a matching RTAX_GATEWAY. 1425 */ 1426 struct rtentry *rt; // *rto = NULL; 1427 struct radix_node *rn; 1428 struct sockaddr *gw; 1429 1430 gw = info->rti_info[RTAX_GATEWAY]; 1431 rt = rt_mpath_matchgate(rto, gw); 1432 if (rt == NULL) { 1433 *perror = ESRCH; 1434 return (NULL); 1435 } 1436 1437 /* 1438 * this is the first entry in the chain 1439 */ 1440 if (rto == rt) { 1441 rn = rn_mpath_next((struct radix_node *)rt); 1442 /* 1443 * there is another entry, now it's active 1444 */ 1445 if (rn) { 1446 rto = RNTORT(rn); 1447 RT_LOCK(rto); 1448 rto->rt_flags |= RTF_UP; 1449 RT_UNLOCK(rto); 1450 } else if (rt->rt_flags & RTF_GATEWAY) { 1451 /* 1452 * For gateway routes, we need to 1453 * make sure that we we are deleting 1454 * the correct gateway. 1455 * rt_mpath_matchgate() does not 1456 * check the case when there is only 1457 * one route in the chain. 1458 */ 1459 if (gw && 1460 (rt->rt_nhop->gw_sa.sa_len != gw->sa_len || 1461 memcmp(&rt->rt_nhop->gw_sa, gw, gw->sa_len))) { 1462 *perror = ESRCH; 1463 return (NULL); 1464 } 1465 } 1466 1467 /* 1468 * use the normal delete code to remove 1469 * the first entry 1470 */ 1471 rn = rnh->rnh_deladdr(dst, netmask, &rnh->head); 1472 *perror = 0; 1473 return (rn); 1474 } 1475 1476 /* 1477 * if the entry is 2nd and on up 1478 */ 1479 if (rt_mpath_deldup(rto, rt) == 0) 1480 panic ("rtrequest1: rt_mpath_deldup"); 1481 *perror = 0; 1482 rn = (struct radix_node *)rt; 1483 return (rn); 1484 } 1485 #endif 1486 1487 #undef dst 1488 #undef gateway 1489 #undef netmask 1490 #undef ifaaddr 1491 #undef ifpaddr 1492 #undef flags 1493 1494 int 1495 rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, 1496 u_int fibnum) 1497 { 1498 const struct sockaddr *dst; 1499 struct rib_head *rnh; 1500 int error; 1501 1502 KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); 1503 KASSERT((info->rti_flags & RTF_RNH_LOCKED) == 0, ("rtrequest1_fib: locked")); 1504 1505 dst = info->rti_info[RTAX_DST]; 1506 1507 switch (dst->sa_family) { 1508 case AF_INET6: 1509 case AF_INET: 1510 /* We support multiple FIBs. */ 1511 break; 1512 default: 1513 fibnum = RT_DEFAULT_FIB; 1514 break; 1515 } 1516 1517 /* 1518 * Find the correct routing tree to use for this Address Family 1519 */ 1520 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 1521 if (rnh == NULL) 1522 return (EAFNOSUPPORT); 1523 1524 /* 1525 * If we are adding a host route then we don't want to put 1526 * a netmask in the tree, nor do we want to clone it. 1527 */ 1528 if (info->rti_flags & RTF_HOST) 1529 info->rti_info[RTAX_NETMASK] = NULL; 1530 1531 error = 0; 1532 switch (req) { 1533 case RTM_DELETE: 1534 error = del_route(rnh, info, ret_nrt); 1535 break; 1536 case RTM_RESOLVE: 1537 /* 1538 * resolve was only used for route cloning 1539 * here for compat 1540 */ 1541 break; 1542 case RTM_ADD: 1543 error = add_route(rnh, info, ret_nrt); 1544 break; 1545 case RTM_CHANGE: 1546 error = change_route(rnh, info, ret_nrt); 1547 break; 1548 default: 1549 error = EOPNOTSUPP; 1550 } 1551 1552 return (error); 1553 } 1554 1555 static int 1556 add_route(struct rib_head *rnh, struct rt_addrinfo *info, 1557 struct rtentry **ret_nrt) 1558 { 1559 struct sockaddr *dst, *ndst, *gateway, *netmask; 1560 struct rtentry *rt, *rt_old; 1561 struct nhop_object *nh; 1562 struct radix_node *rn; 1563 struct ifaddr *ifa; 1564 int error, flags; 1565 struct epoch_tracker et; 1566 1567 dst = info->rti_info[RTAX_DST]; 1568 gateway = info->rti_info[RTAX_GATEWAY]; 1569 netmask = info->rti_info[RTAX_NETMASK]; 1570 flags = info->rti_flags; 1571 1572 if ((flags & RTF_GATEWAY) && !gateway) 1573 return (EINVAL); 1574 if (dst && gateway && (dst->sa_family != gateway->sa_family) && 1575 (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) 1576 return (EINVAL); 1577 1578 if (info->rti_ifa == NULL) { 1579 error = rt_getifa_fib(info, rnh->rib_fibnum); 1580 if (error) 1581 return (error); 1582 } else { 1583 ifa_ref(info->rti_ifa); 1584 } 1585 1586 NET_EPOCH_ENTER(et); 1587 error = nhop_create_from_info(rnh, info, &nh); 1588 NET_EPOCH_EXIT(et); 1589 if (error != 0) { 1590 ifa_free(info->rti_ifa); 1591 return (error); 1592 } 1593 1594 rt = uma_zalloc(V_rtzone, M_NOWAIT); 1595 if (rt == NULL) { 1596 ifa_free(info->rti_ifa); 1597 nhop_free(nh); 1598 return (ENOBUFS); 1599 } 1600 rt->rt_flags = RTF_UP | flags; 1601 rt->rt_fibnum = rnh->rib_fibnum; 1602 rt->rt_nhop = nh; 1603 /* 1604 * Add the gateway. Possibly re-malloc-ing the storage for it. 1605 */ 1606 if ((error = rt_setgate(rt, dst, gateway)) != 0) { 1607 ifa_free(info->rti_ifa); 1608 nhop_free(nh); 1609 uma_zfree(V_rtzone, rt); 1610 return (error); 1611 } 1612 1613 /* 1614 * point to the (possibly newly malloc'd) dest address. 1615 */ 1616 ndst = (struct sockaddr *)rt_key(rt); 1617 1618 /* 1619 * make sure it contains the value we want (masked if needed). 1620 */ 1621 if (netmask) { 1622 rt_maskedcopy(dst, ndst, netmask); 1623 } else 1624 bcopy(dst, ndst, dst->sa_len); 1625 1626 /* 1627 * We use the ifa reference returned by rt_getifa_fib(). 1628 * This moved from below so that rnh->rnh_addaddr() can 1629 * examine the ifa and ifa->ifa_ifp if it so desires. 1630 */ 1631 ifa = info->rti_ifa; 1632 rt->rt_weight = 1; 1633 1634 rt_setmetrics(info, rt); 1635 1636 RIB_WLOCK(rnh); 1637 RT_LOCK(rt); 1638 #ifdef RADIX_MPATH 1639 /* do not permit exactly the same dst/mask/gw pair */ 1640 if (rt_mpath_capable(rnh) && 1641 rt_mpath_conflict(rnh, rt, netmask)) { 1642 RIB_WUNLOCK(rnh); 1643 1644 R_Free(rt_key(rt)); 1645 nhop_free(nh); 1646 uma_zfree(V_rtzone, rt); 1647 return (EEXIST); 1648 } 1649 #endif 1650 1651 rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); 1652 1653 if (rn != NULL && rt->rt_expire > 0) 1654 tmproutes_update(rnh, rt); 1655 1656 rt_old = NULL; 1657 if (rn == NULL && (info->rti_flags & RTF_PINNED) != 0) { 1658 1659 /* 1660 * Force removal and re-try addition 1661 * TODO: better multipath&pinned support 1662 */ 1663 struct sockaddr *info_dst = info->rti_info[RTAX_DST]; 1664 info->rti_info[RTAX_DST] = ndst; 1665 /* Do not delete existing PINNED(interface) routes */ 1666 info->rti_flags &= ~RTF_PINNED; 1667 rt_old = rt_unlinkrte(rnh, info, &error); 1668 info->rti_flags |= RTF_PINNED; 1669 info->rti_info[RTAX_DST] = info_dst; 1670 if (rt_old != NULL) 1671 rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, 1672 rt->rt_nodes); 1673 } 1674 RIB_WUNLOCK(rnh); 1675 1676 if (rt_old != NULL) 1677 RT_UNLOCK(rt_old); 1678 1679 /* 1680 * If it still failed to go into the tree, 1681 * then un-make it (this should be a function) 1682 */ 1683 if (rn == NULL) { 1684 R_Free(rt_key(rt)); 1685 nhop_free(nh); 1686 uma_zfree(V_rtzone, rt); 1687 return (EEXIST); 1688 } 1689 1690 if (rt_old != NULL) { 1691 rt_notifydelete(rt_old, info); 1692 RTFREE(rt_old); 1693 } 1694 1695 /* 1696 * If this protocol has something to add to this then 1697 * allow it to do that as well. 1698 */ 1699 if (ifa->ifa_rtrequest) 1700 ifa->ifa_rtrequest(RTM_ADD, rt, rt->rt_nhop, info); 1701 1702 /* 1703 * actually return a resultant rtentry and 1704 * give the caller a single reference. 1705 */ 1706 if (ret_nrt) { 1707 *ret_nrt = rt; 1708 RT_ADDREF(rt); 1709 } 1710 rnh->rnh_gen++; /* Routing table updated */ 1711 RT_UNLOCK(rt); 1712 1713 return (0); 1714 } 1715 1716 static int 1717 del_route(struct rib_head *rnh, struct rt_addrinfo *info, 1718 struct rtentry **ret_nrt) 1719 { 1720 struct sockaddr *dst, *netmask; 1721 struct sockaddr_storage mdst; 1722 struct rtentry *rt; 1723 int error; 1724 1725 dst = info->rti_info[RTAX_DST]; 1726 netmask = info->rti_info[RTAX_NETMASK]; 1727 1728 if (netmask) { 1729 if (dst->sa_len > sizeof(mdst)) 1730 return (EINVAL); 1731 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 1732 dst = (struct sockaddr *)&mdst; 1733 } 1734 1735 RIB_WLOCK(rnh); 1736 rt = rt_unlinkrte(rnh, info, &error); 1737 RIB_WUNLOCK(rnh); 1738 if (error != 0) 1739 return (error); 1740 1741 rt_notifydelete(rt, info); 1742 1743 /* 1744 * If the caller wants it, then it can have it, 1745 * but it's up to it to free the rtentry as we won't be 1746 * doing it. 1747 */ 1748 if (ret_nrt) { 1749 *ret_nrt = rt; 1750 RT_UNLOCK(rt); 1751 } else 1752 RTFREE_LOCKED(rt); 1753 1754 return (0); 1755 } 1756 1757 static int 1758 change_route_one(struct rib_head *rnh, struct rt_addrinfo *info, 1759 struct rtentry **ret_nrt) 1760 { 1761 RIB_RLOCK_TRACKER; 1762 struct rtentry *rt = NULL; 1763 int error = 0; 1764 int free_ifa = 0; 1765 struct nhop_object *nh, *nh_orig; 1766 1767 RIB_RLOCK(rnh); 1768 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1769 info->rti_info[RTAX_NETMASK], &rnh->head); 1770 1771 if (rt == NULL) { 1772 RIB_RUNLOCK(rnh); 1773 return (ESRCH); 1774 } 1775 1776 #ifdef RADIX_MPATH 1777 /* 1778 * If we got multipath routes, 1779 * we require users to specify a matching RTAX_GATEWAY. 1780 */ 1781 if (rt_mpath_capable(rnh)) { 1782 rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]); 1783 if (rt == NULL) { 1784 RIB_RUNLOCK(rnh); 1785 return (ESRCH); 1786 } 1787 } 1788 #endif 1789 nh_orig = rt->rt_nhop; 1790 1791 RIB_RUNLOCK(rnh); 1792 1793 rt = NULL; 1794 nh = NULL; 1795 1796 /* 1797 * New gateway could require new ifaddr, ifp; 1798 * flags may also be different; ifp may be specified 1799 * by ll sockaddr when protocol address is ambiguous 1800 */ 1801 if (((nh_orig->nh_flags & NHF_GATEWAY) && 1802 info->rti_info[RTAX_GATEWAY] != NULL) || 1803 info->rti_info[RTAX_IFP] != NULL || 1804 (info->rti_info[RTAX_IFA] != NULL && 1805 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 1806 error = rt_getifa_fib(info, rnh->rib_fibnum); 1807 if (info->rti_ifa != NULL) 1808 free_ifa = 1; 1809 1810 if (error != 0) { 1811 if (free_ifa) { 1812 ifa_free(info->rti_ifa); 1813 info->rti_ifa = NULL; 1814 } 1815 1816 return (error); 1817 } 1818 } 1819 1820 error = nhop_create_from_nhop(rnh, nh_orig, info, &nh); 1821 if (free_ifa) { 1822 ifa_free(info->rti_ifa); 1823 info->rti_ifa = NULL; 1824 } 1825 if (error != 0) 1826 return (error); 1827 1828 RIB_WLOCK(rnh); 1829 1830 /* Lookup rtentry once again and check if nexthop is still the same */ 1831 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1832 info->rti_info[RTAX_NETMASK], &rnh->head); 1833 1834 if (rt == NULL) { 1835 RIB_WUNLOCK(rnh); 1836 nhop_free(nh); 1837 return (ESRCH); 1838 } 1839 1840 if (rt->rt_nhop != nh_orig) { 1841 RIB_WUNLOCK(rnh); 1842 nhop_free(nh); 1843 return (EAGAIN); 1844 } 1845 1846 /* Proceed with the update */ 1847 RT_LOCK(rt); 1848 1849 /* Provide notification to the protocols.*/ 1850 if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest) 1851 nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info); 1852 1853 rt->rt_nhop = nh; 1854 rt_setmetrics(info, rt); 1855 1856 if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest) 1857 nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info); 1858 1859 if (ret_nrt != NULL) { 1860 *ret_nrt = rt; 1861 RT_ADDREF(rt); 1862 } 1863 1864 RT_UNLOCK(rt); 1865 1866 /* Update generation id to reflect rtable change */ 1867 rnh->rnh_gen++; 1868 1869 RIB_WUNLOCK(rnh); 1870 1871 nhop_free(nh_orig); 1872 1873 return (0); 1874 } 1875 1876 static int 1877 change_route(struct rib_head *rnh, struct rt_addrinfo *info, 1878 struct rtentry **ret_nrt) 1879 { 1880 struct epoch_tracker et; 1881 int error; 1882 1883 /* Check if updated gateway exists */ 1884 if ((info->rti_flags & RTF_GATEWAY) && 1885 (info->rti_info[RTAX_GATEWAY] == NULL)) 1886 return (EINVAL); 1887 1888 NET_EPOCH_ENTER(et); 1889 1890 /* 1891 * route change is done in multiple steps, with dropping and 1892 * reacquiring lock. In the situations with multiple processes 1893 * changes the same route in can lead to the case when route 1894 * is changed between the steps. Address it by retrying the operation 1895 * multiple times before failing. 1896 */ 1897 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 1898 error = change_route_one(rnh, info, ret_nrt); 1899 if (error != EAGAIN) 1900 break; 1901 } 1902 NET_EPOCH_EXIT(et); 1903 1904 return (error); 1905 } 1906 1907 1908 static void 1909 rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt) 1910 { 1911 1912 if (info->rti_mflags & RTV_WEIGHT) 1913 rt->rt_weight = info->rti_rmx->rmx_weight; 1914 /* Kernel -> userland timebase conversion. */ 1915 if (info->rti_mflags & RTV_EXPIRE) 1916 rt->rt_expire = info->rti_rmx->rmx_expire ? 1917 info->rti_rmx->rmx_expire - time_second + time_uptime : 0; 1918 } 1919 1920 int 1921 rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) 1922 { 1923 /* XXX dst may be overwritten, can we move this to below */ 1924 int dlen = SA_SIZE(dst), glen = SA_SIZE(gate); 1925 1926 /* 1927 * Prepare to store the gateway in rt->rt_gateway. 1928 * Both dst and gateway are stored one after the other in the same 1929 * malloc'd chunk. If we have room, we can reuse the old buffer, 1930 * rt_gateway already points to the right place. 1931 * Otherwise, malloc a new block and update the 'dst' address. 1932 */ 1933 if (rt_key(rt) == NULL) { 1934 caddr_t new; 1935 1936 R_Malloc(new, caddr_t, dlen + glen); 1937 if (new == NULL) 1938 return ENOBUFS; 1939 /* 1940 * XXX note, we copy from *dst and not *rt_key(rt) because 1941 * rt_setgate() can be called to initialize a newly 1942 * allocated route entry, in which case rt_key(rt) == NULL 1943 * (and also rt->rt_gateway == NULL). 1944 * Free()/free() handle a NULL argument just fine. 1945 */ 1946 bcopy(dst, new, dlen); 1947 R_Free(rt_key(rt)); /* free old block, if any */ 1948 rt_key(rt) = (struct sockaddr *)new; 1949 } 1950 1951 return (0); 1952 } 1953 1954 void 1955 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask) 1956 { 1957 u_char *cp1 = (u_char *)src; 1958 u_char *cp2 = (u_char *)dst; 1959 u_char *cp3 = (u_char *)netmask; 1960 u_char *cplim = cp2 + *cp3; 1961 u_char *cplim2 = cp2 + *cp1; 1962 1963 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ 1964 cp3 += 2; 1965 if (cplim > cplim2) 1966 cplim = cplim2; 1967 while (cp2 < cplim) 1968 *cp2++ = *cp1++ & *cp3++; 1969 if (cp2 < cplim2) 1970 bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); 1971 } 1972 1973 /* 1974 * Set up a routing table entry, normally 1975 * for an interface. 1976 */ 1977 #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */ 1978 static inline int 1979 rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) 1980 { 1981 RIB_RLOCK_TRACKER; 1982 struct sockaddr *dst; 1983 struct sockaddr *netmask; 1984 struct rtentry *rt = NULL; 1985 struct rt_addrinfo info; 1986 int error = 0; 1987 int startfib, endfib; 1988 char tempbuf[_SOCKADDR_TMPSIZE]; 1989 int didwork = 0; 1990 int a_failure = 0; 1991 struct sockaddr_dl_short *sdl = NULL; 1992 struct rib_head *rnh; 1993 1994 if (flags & RTF_HOST) { 1995 dst = ifa->ifa_dstaddr; 1996 netmask = NULL; 1997 } else { 1998 dst = ifa->ifa_addr; 1999 netmask = ifa->ifa_netmask; 2000 } 2001 if (dst->sa_len == 0) 2002 return(EINVAL); 2003 switch (dst->sa_family) { 2004 case AF_INET6: 2005 case AF_INET: 2006 /* We support multiple FIBs. */ 2007 break; 2008 default: 2009 fibnum = RT_DEFAULT_FIB; 2010 break; 2011 } 2012 if (fibnum == RT_ALL_FIBS) { 2013 if (V_rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) 2014 startfib = endfib = ifa->ifa_ifp->if_fib; 2015 else { 2016 startfib = 0; 2017 endfib = rt_numfibs - 1; 2018 } 2019 } else { 2020 KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum")); 2021 startfib = fibnum; 2022 endfib = fibnum; 2023 } 2024 2025 /* 2026 * If it's a delete, check that if it exists, 2027 * it's on the correct interface or we might scrub 2028 * a route to another ifa which would 2029 * be confusing at best and possibly worse. 2030 */ 2031 if (cmd == RTM_DELETE) { 2032 /* 2033 * It's a delete, so it should already exist.. 2034 * If it's a net, mask off the host bits 2035 * (Assuming we have a mask) 2036 * XXX this is kinda inet specific.. 2037 */ 2038 if (netmask != NULL) { 2039 rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask); 2040 dst = (struct sockaddr *)tempbuf; 2041 } 2042 } else if (cmd == RTM_ADD) { 2043 sdl = (struct sockaddr_dl_short *)tempbuf; 2044 bzero(sdl, sizeof(struct sockaddr_dl_short)); 2045 sdl->sdl_family = AF_LINK; 2046 sdl->sdl_len = sizeof(struct sockaddr_dl_short); 2047 sdl->sdl_type = ifa->ifa_ifp->if_type; 2048 sdl->sdl_index = ifa->ifa_ifp->if_index; 2049 } 2050 /* 2051 * Now go through all the requested tables (fibs) and do the 2052 * requested action. Realistically, this will either be fib 0 2053 * for protocols that don't do multiple tables or all the 2054 * tables for those that do. 2055 */ 2056 for ( fibnum = startfib; fibnum <= endfib; fibnum++) { 2057 if (cmd == RTM_DELETE) { 2058 struct radix_node *rn; 2059 /* 2060 * Look up an rtentry that is in the routing tree and 2061 * contains the correct info. 2062 */ 2063 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 2064 if (rnh == NULL) 2065 /* this table doesn't exist but others might */ 2066 continue; 2067 RIB_RLOCK(rnh); 2068 rn = rnh->rnh_lookup(dst, netmask, &rnh->head); 2069 #ifdef RADIX_MPATH 2070 if (rt_mpath_capable(rnh)) { 2071 2072 if (rn == NULL) 2073 error = ESRCH; 2074 else { 2075 rt = RNTORT(rn); 2076 /* 2077 * for interface route the gateway 2078 * gateway is sockaddr_dl, so 2079 * rt_mpath_matchgate must use the 2080 * interface address 2081 */ 2082 rt = rt_mpath_matchgate(rt, 2083 ifa->ifa_addr); 2084 if (rt == NULL) 2085 error = ESRCH; 2086 } 2087 } 2088 #endif 2089 error = (rn == NULL || 2090 (rn->rn_flags & RNF_ROOT) || 2091 RNTORT(rn)->rt_nhop->nh_ifa != ifa); 2092 RIB_RUNLOCK(rnh); 2093 if (error) { 2094 /* this is only an error if bad on ALL tables */ 2095 continue; 2096 } 2097 } 2098 /* 2099 * Do the actual request 2100 */ 2101 bzero((caddr_t)&info, sizeof(info)); 2102 info.rti_ifa = ifa; 2103 info.rti_flags = flags | 2104 (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED; 2105 info.rti_info[RTAX_DST] = dst; 2106 /* 2107 * doing this for compatibility reasons 2108 */ 2109 if (cmd == RTM_ADD) 2110 info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)sdl; 2111 else 2112 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 2113 info.rti_info[RTAX_NETMASK] = netmask; 2114 error = rtrequest1_fib(cmd, &info, &rt, fibnum); 2115 if (error == 0 && rt != NULL) { 2116 /* 2117 * notify any listening routing agents of the change 2118 */ 2119 RT_LOCK(rt); 2120 2121 /* TODO: interface routes/aliases */ 2122 RT_ADDREF(rt); 2123 RT_UNLOCK(rt); 2124 rt_newaddrmsg_fib(cmd, ifa, rt, fibnum); 2125 RT_LOCK(rt); 2126 RT_REMREF(rt); 2127 if (cmd == RTM_DELETE) { 2128 /* 2129 * If we are deleting, and we found an entry, 2130 * then it's been removed from the tree.. 2131 * now throw it away. 2132 */ 2133 RTFREE_LOCKED(rt); 2134 } else { 2135 if (cmd == RTM_ADD) { 2136 /* 2137 * We just wanted to add it.. 2138 * we don't actually need a reference. 2139 */ 2140 RT_REMREF(rt); 2141 } 2142 RT_UNLOCK(rt); 2143 } 2144 didwork = 1; 2145 } 2146 if (error) 2147 a_failure = error; 2148 } 2149 if (cmd == RTM_DELETE) { 2150 if (didwork) { 2151 error = 0; 2152 } else { 2153 /* we only give an error if it wasn't in any table */ 2154 error = ((flags & RTF_HOST) ? 2155 EHOSTUNREACH : ENETUNREACH); 2156 } 2157 } else { 2158 if (a_failure) { 2159 /* return an error if any of them failed */ 2160 error = a_failure; 2161 } 2162 } 2163 return (error); 2164 } 2165 2166 /* 2167 * Set up a routing table entry, normally 2168 * for an interface. 2169 */ 2170 int 2171 rtinit(struct ifaddr *ifa, int cmd, int flags) 2172 { 2173 struct sockaddr *dst; 2174 int fib = RT_DEFAULT_FIB; 2175 2176 if (flags & RTF_HOST) { 2177 dst = ifa->ifa_dstaddr; 2178 } else { 2179 dst = ifa->ifa_addr; 2180 } 2181 2182 switch (dst->sa_family) { 2183 case AF_INET6: 2184 case AF_INET: 2185 /* We do support multiple FIBs. */ 2186 fib = RT_ALL_FIBS; 2187 break; 2188 } 2189 return (rtinit1(ifa, cmd, flags, fib)); 2190 } 2191 2192 /* 2193 * Announce interface address arrival/withdraw 2194 * Returns 0 on success. 2195 */ 2196 int 2197 rt_addrmsg(int cmd, struct ifaddr *ifa, int fibnum) 2198 { 2199 2200 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, 2201 ("unexpected cmd %d", cmd)); 2202 KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), 2203 ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); 2204 2205 EVENTHANDLER_DIRECT_INVOKE(rt_addrmsg, ifa, cmd); 2206 return (rtsock_addrmsg(cmd, ifa, fibnum)); 2207 } 2208 2209 /* 2210 * Announce kernel-originated route addition/removal to rtsock based on @rt data. 2211 * cmd: RTM_ cmd 2212 * @rt: valid rtentry 2213 * @ifp: target route interface 2214 * @fibnum: fib id or RT_ALL_FIBS 2215 * 2216 * Returns 0 on success. 2217 */ 2218 int 2219 rt_routemsg(int cmd, struct rtentry *rt, struct ifnet *ifp, int rti_addrs, 2220 int fibnum) 2221 { 2222 2223 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, 2224 ("unexpected cmd %d", cmd)); 2225 2226 KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), 2227 ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); 2228 2229 KASSERT(rt_key(rt) != NULL, (":%s: rt_key must be supplied", __func__)); 2230 2231 return (rtsock_routemsg(cmd, rt, ifp, 0, fibnum)); 2232 } 2233 2234 /* 2235 * Announce kernel-originated route addition/removal to rtsock based on @rt data. 2236 * cmd: RTM_ cmd 2237 * @info: addrinfo structure with valid data. 2238 * @fibnum: fib id or RT_ALL_FIBS 2239 * 2240 * Returns 0 on success. 2241 */ 2242 int 2243 rt_routemsg_info(int cmd, struct rt_addrinfo *info, int fibnum) 2244 { 2245 2246 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE || cmd == RTM_CHANGE, 2247 ("unexpected cmd %d", cmd)); 2248 2249 KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), 2250 ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); 2251 2252 KASSERT(info->rti_info[RTAX_DST] != NULL, (":%s: RTAX_DST must be supplied", __func__)); 2253 2254 return (rtsock_routemsg_info(cmd, info, fibnum)); 2255 } 2256 2257 2258 /* 2259 * This is called to generate messages from the routing socket 2260 * indicating a network interface has had addresses associated with it. 2261 */ 2262 void 2263 rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, struct rtentry *rt, int fibnum) 2264 { 2265 2266 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, 2267 ("unexpected cmd %u", cmd)); 2268 KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), 2269 ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); 2270 2271 if (cmd == RTM_ADD) { 2272 rt_addrmsg(cmd, ifa, fibnum); 2273 if (rt != NULL) 2274 rt_routemsg(cmd, rt, ifa->ifa_ifp, 0, fibnum); 2275 } else { 2276 if (rt != NULL) 2277 rt_routemsg(cmd, rt, ifa->ifa_ifp, 0, fibnum); 2278 rt_addrmsg(cmd, ifa, fibnum); 2279 } 2280 } 2281 2282