1 /*- 2 * Copyright (c) 1980, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95 30 * $FreeBSD$ 31 */ 32 /************************************************************************ 33 * Note: In this file a 'fib' is a "forwarding information base" * 34 * Which is the new name for an in kernel routing (next hop) table. * 35 ***********************************************************************/ 36 37 #include "opt_inet.h" 38 #include "opt_inet6.h" 39 #include "opt_route.h" 40 #include "opt_mrouting.h" 41 #include "opt_mpath.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/syslog.h> 46 #include <sys/malloc.h> 47 #include <sys/mbuf.h> 48 #include <sys/socket.h> 49 #include <sys/sysctl.h> 50 #include <sys/syslog.h> 51 #include <sys/sysproto.h> 52 #include <sys/proc.h> 53 #include <sys/domain.h> 54 #include <sys/kernel.h> 55 56 #include <net/if.h> 57 #include <net/if_dl.h> 58 #include <net/route.h> 59 #include <net/vnet.h> 60 #include <net/flowtable.h> 61 62 #ifdef RADIX_MPATH 63 #include <net/radix_mpath.h> 64 #endif 65 66 #include <netinet/in.h> 67 #include <netinet/ip_mroute.h> 68 69 #include <vm/uma.h> 70 71 /* We use 4 bits in the mbuf flags, thus we are limited to 16 FIBS. */ 72 #define RT_MAXFIBS 16 73 74 /* Kernel config default option. */ 75 #ifdef ROUTETABLES 76 #if ROUTETABLES <= 0 77 #error "ROUTETABLES defined too low" 78 #endif 79 #if ROUTETABLES > RT_MAXFIBS 80 #error "ROUTETABLES defined too big" 81 #endif 82 #define RT_NUMFIBS ROUTETABLES 83 #endif /* ROUTETABLES */ 84 /* Initialize to default if not otherwise set. */ 85 #ifndef RT_NUMFIBS 86 #define RT_NUMFIBS 1 87 #endif 88 89 u_int rt_numfibs = RT_NUMFIBS; 90 SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, ""); 91 /* 92 * Allow the boot code to allow LESS than RT_MAXFIBS to be used. 93 * We can't do more because storage is statically allocated for now. 94 * (for compatibility reasons.. this will change. When this changes, code should 95 * be refactored to protocol independent parts and protocol dependent parts, 96 * probably hanging of domain(9) specific storage to not need the full 97 * fib * af RNH allocation etc. but allow tuning the number of tables per 98 * address family). 99 */ 100 TUNABLE_INT("net.fibs", &rt_numfibs); 101 102 /* 103 * By default add routes to all fibs for new interfaces. 104 * Once this is set to 0 then only allocate routes on interface 105 * changes for the FIB of the caller when adding a new set of addresses 106 * to an interface. XXX this is a shotgun aproach to a problem that needs 107 * a more fine grained solution.. that will come. 108 * XXX also has the problems getting the FIB from curthread which will not 109 * always work given the fib can be overridden and prefixes can be added 110 * from the network stack context. 111 */ 112 u_int rt_add_addr_allfibs = 1; 113 SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW, 114 &rt_add_addr_allfibs, 0, ""); 115 TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs); 116 117 VNET_DEFINE(struct rtstat, rtstat); 118 #define V_rtstat VNET(rtstat) 119 120 VNET_DEFINE(struct radix_node_head *, rt_tables); 121 #define V_rt_tables VNET(rt_tables) 122 123 VNET_DEFINE(int, rttrash); /* routes not in table but not freed */ 124 #define V_rttrash VNET(rttrash) 125 126 127 /* compare two sockaddr structures */ 128 #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0) 129 130 /* 131 * Convert a 'struct radix_node *' to a 'struct rtentry *'. 132 * The operation can be done safely (in this code) because a 133 * 'struct rtentry' starts with two 'struct radix_node''s, the first 134 * one representing leaf nodes in the routing tree, which is 135 * what the code in radix.c passes us as a 'struct radix_node'. 136 * 137 * But because there are a lot of assumptions in this conversion, 138 * do not cast explicitly, but always use the macro below. 139 */ 140 #define RNTORT(p) ((struct rtentry *)(p)) 141 142 static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */ 143 #define V_rtzone VNET(rtzone) 144 145 /* 146 * handler for net.my_fibnum 147 */ 148 static int 149 sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) 150 { 151 int fibnum; 152 int error; 153 154 fibnum = curthread->td_proc->p_fibnum; 155 error = sysctl_handle_int(oidp, &fibnum, 0, req); 156 return (error); 157 } 158 159 SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD, 160 NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller"); 161 162 static __inline struct radix_node_head ** 163 rt_tables_get_rnh_ptr(int table, int fam) 164 { 165 struct radix_node_head **rnh; 166 167 KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.", 168 __func__)); 169 KASSERT(fam >= 0 && fam < (AF_MAX+1), ("%s: fam out of bounds.", 170 __func__)); 171 172 /* rnh is [fib=0][af=0]. */ 173 rnh = (struct radix_node_head **)V_rt_tables; 174 /* Get the offset to the requested table and fam. */ 175 rnh += table * (AF_MAX+1) + fam; 176 177 return (rnh); 178 } 179 180 struct radix_node_head * 181 rt_tables_get_rnh(int table, int fam) 182 { 183 184 return (*rt_tables_get_rnh_ptr(table, fam)); 185 } 186 187 /* 188 * route initialization must occur before ip6_init2(), which happenas at 189 * SI_ORDER_MIDDLE. 190 */ 191 static void 192 route_init(void) 193 { 194 struct domain *dom; 195 int max_keylen = 0; 196 197 /* whack the tunable ints into line. */ 198 if (rt_numfibs > RT_MAXFIBS) 199 rt_numfibs = RT_MAXFIBS; 200 if (rt_numfibs == 0) 201 rt_numfibs = 1; 202 203 for (dom = domains; dom; dom = dom->dom_next) 204 if (dom->dom_maxrtkey > max_keylen) 205 max_keylen = dom->dom_maxrtkey; 206 207 rn_init(max_keylen); /* init all zeroes, all ones, mask table */ 208 } 209 SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0); 210 211 static void 212 vnet_route_init(const void *unused __unused) 213 { 214 struct domain *dom; 215 struct radix_node_head **rnh; 216 int table; 217 int fam; 218 219 V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) * 220 sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO); 221 222 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL, 223 NULL, NULL, UMA_ALIGN_PTR, 0); 224 for (dom = domains; dom; dom = dom->dom_next) { 225 if (dom->dom_rtattach == NULL) 226 continue; 227 228 for (table = 0; table < rt_numfibs; table++) { 229 fam = dom->dom_family; 230 if (table != 0 && fam != AF_INET6 && fam != AF_INET) 231 break; 232 233 /* 234 * XXX MRT rtattach will be also called from 235 * vfs_export.c but the offset will be 0 (only for 236 * AF_INET and AF_INET6 which don't need it anyhow). 237 */ 238 rnh = rt_tables_get_rnh_ptr(table, fam); 239 if (rnh == NULL) 240 panic("%s: rnh NULL", __func__); 241 dom->dom_rtattach((void **)rnh, dom->dom_rtoffset); 242 } 243 } 244 } 245 VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, 246 vnet_route_init, 0); 247 248 #ifdef VIMAGE 249 static void 250 vnet_route_uninit(const void *unused __unused) 251 { 252 int table; 253 int fam; 254 struct domain *dom; 255 struct radix_node_head **rnh; 256 257 for (dom = domains; dom; dom = dom->dom_next) { 258 if (dom->dom_rtdetach == NULL) 259 continue; 260 261 for (table = 0; table < rt_numfibs; table++) { 262 fam = dom->dom_family; 263 264 if (table != 0 && fam != AF_INET6 && fam != AF_INET) 265 break; 266 267 rnh = rt_tables_get_rnh_ptr(table, fam); 268 if (rnh == NULL) 269 panic("%s: rnh NULL", __func__); 270 dom->dom_rtdetach((void **)rnh, dom->dom_rtoffset); 271 } 272 } 273 } 274 VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, 275 vnet_route_uninit, 0); 276 #endif 277 278 #ifndef _SYS_SYSPROTO_H_ 279 struct setfib_args { 280 int fibnum; 281 }; 282 #endif 283 int 284 sys_setfib(struct thread *td, struct setfib_args *uap) 285 { 286 if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs) 287 return EINVAL; 288 td->td_proc->p_fibnum = uap->fibnum; 289 return (0); 290 } 291 292 /* 293 * Packet routing routines. 294 */ 295 void 296 rtalloc(struct route *ro) 297 { 298 299 rtalloc_ign_fib(ro, 0UL, RT_DEFAULT_FIB); 300 } 301 302 void 303 rtalloc_fib(struct route *ro, u_int fibnum) 304 { 305 rtalloc_ign_fib(ro, 0UL, fibnum); 306 } 307 308 void 309 rtalloc_ign(struct route *ro, u_long ignore) 310 { 311 struct rtentry *rt; 312 313 if ((rt = ro->ro_rt) != NULL) { 314 if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) 315 return; 316 RTFREE(rt); 317 ro->ro_rt = NULL; 318 } 319 ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, RT_DEFAULT_FIB); 320 if (ro->ro_rt) 321 RT_UNLOCK(ro->ro_rt); 322 } 323 324 void 325 rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum) 326 { 327 struct rtentry *rt; 328 329 if ((rt = ro->ro_rt) != NULL) { 330 if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) 331 return; 332 RTFREE(rt); 333 ro->ro_rt = NULL; 334 } 335 ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum); 336 if (ro->ro_rt) 337 RT_UNLOCK(ro->ro_rt); 338 } 339 340 /* 341 * Look up the route that matches the address given 342 * Or, at least try.. Create a cloned route if needed. 343 * 344 * The returned route, if any, is locked. 345 */ 346 struct rtentry * 347 rtalloc1(struct sockaddr *dst, int report, u_long ignflags) 348 { 349 350 return (rtalloc1_fib(dst, report, ignflags, RT_DEFAULT_FIB)); 351 } 352 353 struct rtentry * 354 rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, 355 u_int fibnum) 356 { 357 struct radix_node_head *rnh; 358 struct radix_node *rn; 359 struct rtentry *newrt; 360 struct rt_addrinfo info; 361 int err = 0, msgtype = RTM_MISS; 362 int needlock; 363 364 KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum")); 365 switch (dst->sa_family) { 366 case AF_INET6: 367 case AF_INET: 368 /* We support multiple FIBs. */ 369 break; 370 default: 371 fibnum = RT_DEFAULT_FIB; 372 break; 373 } 374 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 375 newrt = NULL; 376 if (rnh == NULL) 377 goto miss; 378 379 /* 380 * Look up the address in the table for that Address Family 381 */ 382 needlock = !(ignflags & RTF_RNH_LOCKED); 383 if (needlock) 384 RADIX_NODE_HEAD_RLOCK(rnh); 385 #ifdef INVARIANTS 386 else 387 RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 388 #endif 389 rn = rnh->rnh_matchaddr(dst, rnh); 390 if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { 391 newrt = RNTORT(rn); 392 RT_LOCK(newrt); 393 RT_ADDREF(newrt); 394 if (needlock) 395 RADIX_NODE_HEAD_RUNLOCK(rnh); 396 goto done; 397 398 } else if (needlock) 399 RADIX_NODE_HEAD_RUNLOCK(rnh); 400 401 /* 402 * Either we hit the root or couldn't find any match, 403 * Which basically means 404 * "caint get there frm here" 405 */ 406 miss: 407 V_rtstat.rts_unreach++; 408 409 if (report) { 410 /* 411 * If required, report the failure to the supervising 412 * Authorities. 413 * For a delete, this is not an error. (report == 0) 414 */ 415 bzero(&info, sizeof(info)); 416 info.rti_info[RTAX_DST] = dst; 417 rt_missmsg_fib(msgtype, &info, 0, err, fibnum); 418 } 419 done: 420 if (newrt) 421 RT_LOCK_ASSERT(newrt); 422 return (newrt); 423 } 424 425 /* 426 * Remove a reference count from an rtentry. 427 * If the count gets low enough, take it out of the routing table 428 */ 429 void 430 rtfree(struct rtentry *rt) 431 { 432 struct radix_node_head *rnh; 433 434 KASSERT(rt != NULL,("%s: NULL rt", __func__)); 435 rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); 436 KASSERT(rnh != NULL,("%s: NULL rnh", __func__)); 437 438 RT_LOCK_ASSERT(rt); 439 440 /* 441 * The callers should use RTFREE_LOCKED() or RTFREE(), so 442 * we should come here exactly with the last reference. 443 */ 444 RT_REMREF(rt); 445 if (rt->rt_refcnt > 0) { 446 log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt); 447 goto done; 448 } 449 450 /* 451 * On last reference give the "close method" a chance 452 * to cleanup private state. This also permits (for 453 * IPv4 and IPv6) a chance to decide if the routing table 454 * entry should be purged immediately or at a later time. 455 * When an immediate purge is to happen the close routine 456 * typically calls rtexpunge which clears the RTF_UP flag 457 * on the entry so that the code below reclaims the storage. 458 */ 459 if (rt->rt_refcnt == 0 && rnh->rnh_close) 460 rnh->rnh_close((struct radix_node *)rt, rnh); 461 462 /* 463 * If we are no longer "up" (and ref == 0) 464 * then we can free the resources associated 465 * with the route. 466 */ 467 if ((rt->rt_flags & RTF_UP) == 0) { 468 if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 469 panic("rtfree 2"); 470 /* 471 * the rtentry must have been removed from the routing table 472 * so it is represented in rttrash.. remove that now. 473 */ 474 V_rttrash--; 475 #ifdef DIAGNOSTIC 476 if (rt->rt_refcnt < 0) { 477 printf("rtfree: %p not freed (neg refs)\n", rt); 478 goto done; 479 } 480 #endif 481 /* 482 * release references on items we hold them on.. 483 * e.g other routes and ifaddrs. 484 */ 485 if (rt->rt_ifa) 486 ifa_free(rt->rt_ifa); 487 /* 488 * The key is separatly alloc'd so free it (see rt_setgate()). 489 * This also frees the gateway, as they are always malloc'd 490 * together. 491 */ 492 Free(rt_key(rt)); 493 494 /* 495 * and the rtentry itself of course 496 */ 497 RT_LOCK_DESTROY(rt); 498 uma_zfree(V_rtzone, rt); 499 return; 500 } 501 done: 502 RT_UNLOCK(rt); 503 } 504 505 506 /* 507 * Force a routing table entry to the specified 508 * destination to go through the given gateway. 509 * Normally called as a result of a routing redirect 510 * message from the network layer. 511 */ 512 void 513 rtredirect(struct sockaddr *dst, 514 struct sockaddr *gateway, 515 struct sockaddr *netmask, 516 int flags, 517 struct sockaddr *src) 518 { 519 520 rtredirect_fib(dst, gateway, netmask, flags, src, RT_DEFAULT_FIB); 521 } 522 523 void 524 rtredirect_fib(struct sockaddr *dst, 525 struct sockaddr *gateway, 526 struct sockaddr *netmask, 527 int flags, 528 struct sockaddr *src, 529 u_int fibnum) 530 { 531 struct rtentry *rt, *rt0 = NULL; 532 int error = 0; 533 short *stat = NULL; 534 struct rt_addrinfo info; 535 struct ifaddr *ifa; 536 struct radix_node_head *rnh; 537 538 ifa = NULL; 539 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 540 if (rnh == NULL) { 541 error = EAFNOSUPPORT; 542 goto out; 543 } 544 545 /* verify the gateway is directly reachable */ 546 if ((ifa = ifa_ifwithnet(gateway, 0)) == NULL) { 547 error = ENETUNREACH; 548 goto out; 549 } 550 rt = rtalloc1_fib(dst, 0, 0UL, fibnum); /* NB: rt is locked */ 551 /* 552 * If the redirect isn't from our current router for this dst, 553 * it's either old or wrong. If it redirects us to ourselves, 554 * we have a routing loop, perhaps as a result of an interface 555 * going down recently. 556 */ 557 if (!(flags & RTF_DONE) && rt && 558 (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) 559 error = EINVAL; 560 else if (ifa_ifwithaddr_check(gateway)) 561 error = EHOSTUNREACH; 562 if (error) 563 goto done; 564 /* 565 * Create a new entry if we just got back a wildcard entry 566 * or the lookup failed. This is necessary for hosts 567 * which use routing redirects generated by smart gateways 568 * to dynamically build the routing tables. 569 */ 570 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) 571 goto create; 572 /* 573 * Don't listen to the redirect if it's 574 * for a route to an interface. 575 */ 576 if (rt->rt_flags & RTF_GATEWAY) { 577 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 578 /* 579 * Changing from route to net => route to host. 580 * Create new route, rather than smashing route to net. 581 */ 582 create: 583 rt0 = rt; 584 rt = NULL; 585 586 flags |= RTF_GATEWAY | RTF_DYNAMIC; 587 bzero((caddr_t)&info, sizeof(info)); 588 info.rti_info[RTAX_DST] = dst; 589 info.rti_info[RTAX_GATEWAY] = gateway; 590 info.rti_info[RTAX_NETMASK] = netmask; 591 info.rti_ifa = ifa; 592 info.rti_flags = flags; 593 if (rt0 != NULL) 594 RT_UNLOCK(rt0); /* drop lock to avoid LOR with RNH */ 595 error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum); 596 if (rt != NULL) { 597 RT_LOCK(rt); 598 if (rt0 != NULL) 599 EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst); 600 flags = rt->rt_flags; 601 } 602 if (rt0 != NULL) 603 RTFREE(rt0); 604 605 stat = &V_rtstat.rts_dynamic; 606 } else { 607 struct rtentry *gwrt; 608 609 /* 610 * Smash the current notion of the gateway to 611 * this destination. Should check about netmask!!! 612 */ 613 rt->rt_flags |= RTF_MODIFIED; 614 flags |= RTF_MODIFIED; 615 stat = &V_rtstat.rts_newgateway; 616 /* 617 * add the key and gateway (in one malloc'd chunk). 618 */ 619 RT_UNLOCK(rt); 620 RADIX_NODE_HEAD_LOCK(rnh); 621 RT_LOCK(rt); 622 rt_setgate(rt, rt_key(rt), gateway); 623 gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED); 624 RADIX_NODE_HEAD_UNLOCK(rnh); 625 EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst); 626 RTFREE_LOCKED(gwrt); 627 } 628 } else 629 error = EHOSTUNREACH; 630 done: 631 if (rt) 632 RTFREE_LOCKED(rt); 633 out: 634 if (error) 635 V_rtstat.rts_badredirect++; 636 else if (stat != NULL) 637 (*stat)++; 638 bzero((caddr_t)&info, sizeof(info)); 639 info.rti_info[RTAX_DST] = dst; 640 info.rti_info[RTAX_GATEWAY] = gateway; 641 info.rti_info[RTAX_NETMASK] = netmask; 642 info.rti_info[RTAX_AUTHOR] = src; 643 rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum); 644 if (ifa != NULL) 645 ifa_free(ifa); 646 } 647 648 int 649 rtioctl(u_long req, caddr_t data) 650 { 651 652 return (rtioctl_fib(req, data, RT_DEFAULT_FIB)); 653 } 654 655 /* 656 * Routing table ioctl interface. 657 */ 658 int 659 rtioctl_fib(u_long req, caddr_t data, u_int fibnum) 660 { 661 662 /* 663 * If more ioctl commands are added here, make sure the proper 664 * super-user checks are being performed because it is possible for 665 * prison-root to make it this far if raw sockets have been enabled 666 * in jails. 667 */ 668 #ifdef INET 669 /* Multicast goop, grrr... */ 670 return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP; 671 #else /* INET */ 672 return ENXIO; 673 #endif /* INET */ 674 } 675 676 /* 677 * For both ifa_ifwithroute() routines, 'ifa' is returned referenced. 678 */ 679 struct ifaddr * 680 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway) 681 { 682 683 return (ifa_ifwithroute_fib(flags, dst, gateway, RT_DEFAULT_FIB)); 684 } 685 686 struct ifaddr * 687 ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway, 688 u_int fibnum) 689 { 690 register struct ifaddr *ifa; 691 int not_found = 0; 692 693 if ((flags & RTF_GATEWAY) == 0) { 694 /* 695 * If we are adding a route to an interface, 696 * and the interface is a pt to pt link 697 * we should search for the destination 698 * as our clue to the interface. Otherwise 699 * we can use the local address. 700 */ 701 ifa = NULL; 702 if (flags & RTF_HOST) 703 ifa = ifa_ifwithdstaddr(dst); 704 if (ifa == NULL) 705 ifa = ifa_ifwithaddr(gateway); 706 } else { 707 /* 708 * If we are adding a route to a remote net 709 * or host, the gateway may still be on the 710 * other end of a pt to pt link. 711 */ 712 ifa = ifa_ifwithdstaddr(gateway); 713 } 714 if (ifa == NULL) 715 ifa = ifa_ifwithnet(gateway, 0); 716 if (ifa == NULL) { 717 struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum); 718 if (rt == NULL) 719 return (NULL); 720 /* 721 * dismiss a gateway that is reachable only 722 * through the default router 723 */ 724 switch (gateway->sa_family) { 725 case AF_INET: 726 if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY) 727 not_found = 1; 728 break; 729 case AF_INET6: 730 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr)) 731 not_found = 1; 732 break; 733 default: 734 break; 735 } 736 if (!not_found && rt->rt_ifa != NULL) { 737 ifa = rt->rt_ifa; 738 ifa_ref(ifa); 739 } 740 RT_REMREF(rt); 741 RT_UNLOCK(rt); 742 if (not_found || ifa == NULL) 743 return (NULL); 744 } 745 if (ifa->ifa_addr->sa_family != dst->sa_family) { 746 struct ifaddr *oifa = ifa; 747 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 748 if (ifa == NULL) 749 ifa = oifa; 750 else 751 ifa_free(oifa); 752 } 753 return (ifa); 754 } 755 756 /* 757 * Do appropriate manipulations of a routing tree given 758 * all the bits of info needed 759 */ 760 int 761 rtrequest(int req, 762 struct sockaddr *dst, 763 struct sockaddr *gateway, 764 struct sockaddr *netmask, 765 int flags, 766 struct rtentry **ret_nrt) 767 { 768 769 return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 770 RT_DEFAULT_FIB)); 771 } 772 773 int 774 rtrequest_fib(int req, 775 struct sockaddr *dst, 776 struct sockaddr *gateway, 777 struct sockaddr *netmask, 778 int flags, 779 struct rtentry **ret_nrt, 780 u_int fibnum) 781 { 782 struct rt_addrinfo info; 783 784 if (dst->sa_len == 0) 785 return(EINVAL); 786 787 bzero((caddr_t)&info, sizeof(info)); 788 info.rti_flags = flags; 789 info.rti_info[RTAX_DST] = dst; 790 info.rti_info[RTAX_GATEWAY] = gateway; 791 info.rti_info[RTAX_NETMASK] = netmask; 792 return rtrequest1_fib(req, &info, ret_nrt, fibnum); 793 } 794 795 /* 796 * These (questionable) definitions of apparent local variables apply 797 * to the next two functions. XXXXXX!!! 798 */ 799 #define dst info->rti_info[RTAX_DST] 800 #define gateway info->rti_info[RTAX_GATEWAY] 801 #define netmask info->rti_info[RTAX_NETMASK] 802 #define ifaaddr info->rti_info[RTAX_IFA] 803 #define ifpaddr info->rti_info[RTAX_IFP] 804 #define flags info->rti_flags 805 806 int 807 rt_getifa(struct rt_addrinfo *info) 808 { 809 810 return (rt_getifa_fib(info, RT_DEFAULT_FIB)); 811 } 812 813 /* 814 * Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined, 815 * it will be referenced so the caller must free it. 816 */ 817 int 818 rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum) 819 { 820 struct ifaddr *ifa; 821 int error = 0; 822 823 /* 824 * ifp may be specified by sockaddr_dl 825 * when protocol address is ambiguous. 826 */ 827 if (info->rti_ifp == NULL && ifpaddr != NULL && 828 ifpaddr->sa_family == AF_LINK && 829 (ifa = ifa_ifwithnet(ifpaddr, 0)) != NULL) { 830 info->rti_ifp = ifa->ifa_ifp; 831 ifa_free(ifa); 832 } 833 if (info->rti_ifa == NULL && ifaaddr != NULL) 834 info->rti_ifa = ifa_ifwithaddr(ifaaddr); 835 if (info->rti_ifa == NULL) { 836 struct sockaddr *sa; 837 838 sa = ifaaddr != NULL ? ifaaddr : 839 (gateway != NULL ? gateway : dst); 840 if (sa != NULL && info->rti_ifp != NULL) 841 info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp); 842 else if (dst != NULL && gateway != NULL) 843 info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway, 844 fibnum); 845 else if (sa != NULL) 846 info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa, 847 fibnum); 848 } 849 if ((ifa = info->rti_ifa) != NULL) { 850 if (info->rti_ifp == NULL) 851 info->rti_ifp = ifa->ifa_ifp; 852 } else 853 error = ENETUNREACH; 854 return (error); 855 } 856 857 /* 858 * Expunges references to a route that's about to be reclaimed. 859 * The route must be locked. 860 */ 861 int 862 rtexpunge(struct rtentry *rt) 863 { 864 #if !defined(RADIX_MPATH) 865 struct radix_node *rn; 866 #else 867 struct rt_addrinfo info; 868 int fib; 869 struct rtentry *rt0; 870 #endif 871 struct radix_node_head *rnh; 872 struct ifaddr *ifa; 873 int error = 0; 874 875 /* 876 * Find the correct routing tree to use for this Address Family 877 */ 878 rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); 879 RT_LOCK_ASSERT(rt); 880 if (rnh == NULL) 881 return (EAFNOSUPPORT); 882 RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 883 884 #ifdef RADIX_MPATH 885 fib = rt->rt_fibnum; 886 bzero(&info, sizeof(info)); 887 info.rti_ifp = rt->rt_ifp; 888 info.rti_flags = RTF_RNH_LOCKED; 889 info.rti_info[RTAX_DST] = rt_key(rt); 890 info.rti_info[RTAX_GATEWAY] = rt->rt_ifa->ifa_addr; 891 892 RT_UNLOCK(rt); 893 error = rtrequest1_fib(RTM_DELETE, &info, &rt0, fib); 894 895 if (error == 0 && rt0 != NULL) { 896 rt = rt0; 897 RT_LOCK(rt); 898 } else if (error != 0) { 899 RT_LOCK(rt); 900 return (error); 901 } 902 #else 903 /* 904 * Remove the item from the tree; it should be there, 905 * but when callers invoke us blindly it may not (sigh). 906 */ 907 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh); 908 if (rn == NULL) { 909 error = ESRCH; 910 goto bad; 911 } 912 KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0, 913 ("unexpected flags 0x%x", rn->rn_flags)); 914 KASSERT(rt == RNTORT(rn), 915 ("lookup mismatch, rt %p rn %p", rt, rn)); 916 #endif /* RADIX_MPATH */ 917 918 rt->rt_flags &= ~RTF_UP; 919 920 /* 921 * Give the protocol a chance to keep things in sync. 922 */ 923 if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) { 924 struct rt_addrinfo info; 925 926 bzero((caddr_t)&info, sizeof(info)); 927 info.rti_flags = rt->rt_flags; 928 info.rti_info[RTAX_DST] = rt_key(rt); 929 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 930 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 931 ifa->ifa_rtrequest(RTM_DELETE, rt, &info); 932 } 933 934 /* 935 * one more rtentry floating around that is not 936 * linked to the routing table. 937 */ 938 V_rttrash++; 939 #if !defined(RADIX_MPATH) 940 bad: 941 #endif 942 return (error); 943 } 944 945 #ifdef RADIX_MPATH 946 static int 947 rn_mpath_update(int req, struct rt_addrinfo *info, 948 struct radix_node_head *rnh, struct rtentry **ret_nrt) 949 { 950 /* 951 * if we got multipath routes, we require users to specify 952 * a matching RTAX_GATEWAY. 953 */ 954 struct rtentry *rt, *rto = NULL; 955 register struct radix_node *rn; 956 int error = 0; 957 958 rn = rnh->rnh_matchaddr(dst, rnh); 959 if (rn == NULL) 960 return (ESRCH); 961 rto = rt = RNTORT(rn); 962 rt = rt_mpath_matchgate(rt, gateway); 963 if (rt == NULL) 964 return (ESRCH); 965 /* 966 * this is the first entry in the chain 967 */ 968 if (rto == rt) { 969 rn = rn_mpath_next((struct radix_node *)rt); 970 /* 971 * there is another entry, now it's active 972 */ 973 if (rn) { 974 rto = RNTORT(rn); 975 RT_LOCK(rto); 976 rto->rt_flags |= RTF_UP; 977 RT_UNLOCK(rto); 978 } else if (rt->rt_flags & RTF_GATEWAY) { 979 /* 980 * For gateway routes, we need to 981 * make sure that we we are deleting 982 * the correct gateway. 983 * rt_mpath_matchgate() does not 984 * check the case when there is only 985 * one route in the chain. 986 */ 987 if (gateway && 988 (rt->rt_gateway->sa_len != gateway->sa_len || 989 memcmp(rt->rt_gateway, gateway, gateway->sa_len))) 990 error = ESRCH; 991 else { 992 /* 993 * remove from tree before returning it 994 * to the caller 995 */ 996 rn = rnh->rnh_deladdr(dst, netmask, rnh); 997 KASSERT(rt == RNTORT(rn), ("radix node disappeared")); 998 goto gwdelete; 999 } 1000 1001 } 1002 /* 1003 * use the normal delete code to remove 1004 * the first entry 1005 */ 1006 if (req != RTM_DELETE) 1007 goto nondelete; 1008 1009 error = ENOENT; 1010 goto done; 1011 } 1012 1013 /* 1014 * if the entry is 2nd and on up 1015 */ 1016 if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt)) 1017 panic ("rtrequest1: rt_mpath_deldup"); 1018 gwdelete: 1019 RT_LOCK(rt); 1020 RT_ADDREF(rt); 1021 if (req == RTM_DELETE) { 1022 rt->rt_flags &= ~RTF_UP; 1023 /* 1024 * One more rtentry floating around that is not 1025 * linked to the routing table. rttrash will be decremented 1026 * when RTFREE(rt) is eventually called. 1027 */ 1028 V_rttrash++; 1029 } 1030 1031 nondelete: 1032 if (req != RTM_DELETE) 1033 panic("unrecognized request %d", req); 1034 1035 1036 /* 1037 * If the caller wants it, then it can have it, 1038 * but it's up to it to free the rtentry as we won't be 1039 * doing it. 1040 */ 1041 if (ret_nrt) { 1042 *ret_nrt = rt; 1043 RT_UNLOCK(rt); 1044 } else 1045 RTFREE_LOCKED(rt); 1046 done: 1047 return (error); 1048 } 1049 #endif 1050 1051 int 1052 rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, 1053 u_int fibnum) 1054 { 1055 int error = 0, needlock = 0; 1056 register struct rtentry *rt; 1057 #ifdef FLOWTABLE 1058 register struct rtentry *rt0; 1059 #endif 1060 register struct radix_node *rn; 1061 register struct radix_node_head *rnh; 1062 struct ifaddr *ifa; 1063 struct sockaddr *ndst; 1064 struct sockaddr_storage mdst; 1065 #define senderr(x) { error = x ; goto bad; } 1066 1067 KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); 1068 switch (dst->sa_family) { 1069 case AF_INET6: 1070 case AF_INET: 1071 /* We support multiple FIBs. */ 1072 break; 1073 default: 1074 fibnum = RT_DEFAULT_FIB; 1075 break; 1076 } 1077 1078 /* 1079 * Find the correct routing tree to use for this Address Family 1080 */ 1081 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 1082 if (rnh == NULL) 1083 return (EAFNOSUPPORT); 1084 needlock = ((flags & RTF_RNH_LOCKED) == 0); 1085 flags &= ~RTF_RNH_LOCKED; 1086 if (needlock) 1087 RADIX_NODE_HEAD_LOCK(rnh); 1088 else 1089 RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 1090 /* 1091 * If we are adding a host route then we don't want to put 1092 * a netmask in the tree, nor do we want to clone it. 1093 */ 1094 if (flags & RTF_HOST) 1095 netmask = NULL; 1096 1097 switch (req) { 1098 case RTM_DELETE: 1099 if (netmask) { 1100 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 1101 dst = (struct sockaddr *)&mdst; 1102 } 1103 #ifdef RADIX_MPATH 1104 if (rn_mpath_capable(rnh)) { 1105 error = rn_mpath_update(req, info, rnh, ret_nrt); 1106 /* 1107 * "bad" holds true for the success case 1108 * as well 1109 */ 1110 if (error != ENOENT) 1111 goto bad; 1112 error = 0; 1113 } 1114 #endif 1115 if ((flags & RTF_PINNED) == 0) { 1116 /* Check if target route can be deleted */ 1117 rt = (struct rtentry *)rnh->rnh_lookup(dst, 1118 netmask, rnh); 1119 if ((rt != NULL) && (rt->rt_flags & RTF_PINNED)) 1120 senderr(EADDRINUSE); 1121 } 1122 1123 /* 1124 * Remove the item from the tree and return it. 1125 * Complain if it is not there and do no more processing. 1126 */ 1127 rn = rnh->rnh_deladdr(dst, netmask, rnh); 1128 if (rn == NULL) 1129 senderr(ESRCH); 1130 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 1131 panic ("rtrequest delete"); 1132 rt = RNTORT(rn); 1133 RT_LOCK(rt); 1134 RT_ADDREF(rt); 1135 rt->rt_flags &= ~RTF_UP; 1136 1137 /* 1138 * give the protocol a chance to keep things in sync. 1139 */ 1140 if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) 1141 ifa->ifa_rtrequest(RTM_DELETE, rt, info); 1142 1143 /* 1144 * One more rtentry floating around that is not 1145 * linked to the routing table. rttrash will be decremented 1146 * when RTFREE(rt) is eventually called. 1147 */ 1148 V_rttrash++; 1149 1150 /* 1151 * If the caller wants it, then it can have it, 1152 * but it's up to it to free the rtentry as we won't be 1153 * doing it. 1154 */ 1155 if (ret_nrt) { 1156 *ret_nrt = rt; 1157 RT_UNLOCK(rt); 1158 } else 1159 RTFREE_LOCKED(rt); 1160 break; 1161 case RTM_RESOLVE: 1162 /* 1163 * resolve was only used for route cloning 1164 * here for compat 1165 */ 1166 break; 1167 case RTM_ADD: 1168 if ((flags & RTF_GATEWAY) && !gateway) 1169 senderr(EINVAL); 1170 if (dst && gateway && (dst->sa_family != gateway->sa_family) && 1171 (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) 1172 senderr(EINVAL); 1173 1174 if (info->rti_ifa == NULL) { 1175 error = rt_getifa_fib(info, fibnum); 1176 if (error) 1177 senderr(error); 1178 } else 1179 ifa_ref(info->rti_ifa); 1180 ifa = info->rti_ifa; 1181 rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 1182 if (rt == NULL) { 1183 ifa_free(ifa); 1184 senderr(ENOBUFS); 1185 } 1186 RT_LOCK_INIT(rt); 1187 rt->rt_flags = RTF_UP | flags; 1188 rt->rt_fibnum = fibnum; 1189 /* 1190 * Add the gateway. Possibly re-malloc-ing the storage for it. 1191 */ 1192 RT_LOCK(rt); 1193 if ((error = rt_setgate(rt, dst, gateway)) != 0) { 1194 RT_LOCK_DESTROY(rt); 1195 ifa_free(ifa); 1196 uma_zfree(V_rtzone, rt); 1197 senderr(error); 1198 } 1199 1200 /* 1201 * point to the (possibly newly malloc'd) dest address. 1202 */ 1203 ndst = (struct sockaddr *)rt_key(rt); 1204 1205 /* 1206 * make sure it contains the value we want (masked if needed). 1207 */ 1208 if (netmask) { 1209 rt_maskedcopy(dst, ndst, netmask); 1210 } else 1211 bcopy(dst, ndst, dst->sa_len); 1212 1213 /* 1214 * We use the ifa reference returned by rt_getifa_fib(). 1215 * This moved from below so that rnh->rnh_addaddr() can 1216 * examine the ifa and ifa->ifa_ifp if it so desires. 1217 */ 1218 rt->rt_ifa = ifa; 1219 rt->rt_ifp = ifa->ifa_ifp; 1220 rt->rt_rmx.rmx_weight = 1; 1221 1222 #ifdef RADIX_MPATH 1223 /* do not permit exactly the same dst/mask/gw pair */ 1224 if (rn_mpath_capable(rnh) && 1225 rt_mpath_conflict(rnh, rt, netmask)) { 1226 ifa_free(rt->rt_ifa); 1227 Free(rt_key(rt)); 1228 RT_LOCK_DESTROY(rt); 1229 uma_zfree(V_rtzone, rt); 1230 senderr(EEXIST); 1231 } 1232 #endif 1233 1234 #ifdef FLOWTABLE 1235 rt0 = NULL; 1236 /* "flow-table" only supports IPv6 and IPv4 at the moment. */ 1237 switch (dst->sa_family) { 1238 #ifdef INET6 1239 case AF_INET6: 1240 #endif 1241 #ifdef INET 1242 case AF_INET: 1243 #endif 1244 #if defined(INET6) || defined(INET) 1245 rn = rnh->rnh_matchaddr(dst, rnh); 1246 if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { 1247 struct sockaddr *mask; 1248 u_char *m, *n; 1249 int len; 1250 1251 /* 1252 * compare mask to see if the new route is 1253 * more specific than the existing one 1254 */ 1255 rt0 = RNTORT(rn); 1256 RT_LOCK(rt0); 1257 RT_ADDREF(rt0); 1258 RT_UNLOCK(rt0); 1259 /* 1260 * A host route is already present, so 1261 * leave the flow-table entries as is. 1262 */ 1263 if (rt0->rt_flags & RTF_HOST) { 1264 RTFREE(rt0); 1265 rt0 = NULL; 1266 } else if (!(flags & RTF_HOST) && netmask) { 1267 mask = rt_mask(rt0); 1268 len = mask->sa_len; 1269 m = (u_char *)mask; 1270 n = (u_char *)netmask; 1271 while (len-- > 0) { 1272 if (*n != *m) 1273 break; 1274 n++; 1275 m++; 1276 } 1277 if (len == 0 || (*n < *m)) { 1278 RTFREE(rt0); 1279 rt0 = NULL; 1280 } 1281 } 1282 } 1283 #endif/* INET6 || INET */ 1284 } 1285 #endif /* FLOWTABLE */ 1286 1287 /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ 1288 rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes); 1289 /* 1290 * If it still failed to go into the tree, 1291 * then un-make it (this should be a function) 1292 */ 1293 if (rn == NULL) { 1294 ifa_free(rt->rt_ifa); 1295 Free(rt_key(rt)); 1296 RT_LOCK_DESTROY(rt); 1297 uma_zfree(V_rtzone, rt); 1298 #ifdef FLOWTABLE 1299 if (rt0 != NULL) 1300 RTFREE(rt0); 1301 #endif 1302 senderr(EEXIST); 1303 } 1304 #ifdef FLOWTABLE 1305 else if (rt0 != NULL) { 1306 switch (dst->sa_family) { 1307 #ifdef INET6 1308 case AF_INET6: 1309 flowtable_route_flush(V_ip6_ft, rt0); 1310 break; 1311 #endif 1312 #ifdef INET 1313 case AF_INET: 1314 flowtable_route_flush(V_ip_ft, rt0); 1315 break; 1316 #endif 1317 } 1318 RTFREE(rt0); 1319 } 1320 #endif 1321 1322 /* 1323 * If this protocol has something to add to this then 1324 * allow it to do that as well. 1325 */ 1326 if (ifa->ifa_rtrequest) 1327 ifa->ifa_rtrequest(req, rt, info); 1328 1329 /* 1330 * actually return a resultant rtentry and 1331 * give the caller a single reference. 1332 */ 1333 if (ret_nrt) { 1334 *ret_nrt = rt; 1335 RT_ADDREF(rt); 1336 } 1337 RT_UNLOCK(rt); 1338 break; 1339 default: 1340 error = EOPNOTSUPP; 1341 } 1342 bad: 1343 if (needlock) 1344 RADIX_NODE_HEAD_UNLOCK(rnh); 1345 return (error); 1346 #undef senderr 1347 } 1348 1349 #undef dst 1350 #undef gateway 1351 #undef netmask 1352 #undef ifaaddr 1353 #undef ifpaddr 1354 #undef flags 1355 1356 int 1357 rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) 1358 { 1359 /* XXX dst may be overwritten, can we move this to below */ 1360 int dlen = SA_SIZE(dst), glen = SA_SIZE(gate); 1361 #ifdef INVARIANTS 1362 struct radix_node_head *rnh; 1363 1364 rnh = rt_tables_get_rnh(rt->rt_fibnum, dst->sa_family); 1365 #endif 1366 1367 RT_LOCK_ASSERT(rt); 1368 RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 1369 1370 /* 1371 * Prepare to store the gateway in rt->rt_gateway. 1372 * Both dst and gateway are stored one after the other in the same 1373 * malloc'd chunk. If we have room, we can reuse the old buffer, 1374 * rt_gateway already points to the right place. 1375 * Otherwise, malloc a new block and update the 'dst' address. 1376 */ 1377 if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) { 1378 caddr_t new; 1379 1380 R_Malloc(new, caddr_t, dlen + glen); 1381 if (new == NULL) 1382 return ENOBUFS; 1383 /* 1384 * XXX note, we copy from *dst and not *rt_key(rt) because 1385 * rt_setgate() can be called to initialize a newly 1386 * allocated route entry, in which case rt_key(rt) == NULL 1387 * (and also rt->rt_gateway == NULL). 1388 * Free()/free() handle a NULL argument just fine. 1389 */ 1390 bcopy(dst, new, dlen); 1391 Free(rt_key(rt)); /* free old block, if any */ 1392 rt_key(rt) = (struct sockaddr *)new; 1393 rt->rt_gateway = (struct sockaddr *)(new + dlen); 1394 } 1395 1396 /* 1397 * Copy the new gateway value into the memory chunk. 1398 */ 1399 bcopy(gate, rt->rt_gateway, glen); 1400 1401 return (0); 1402 } 1403 1404 void 1405 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask) 1406 { 1407 register u_char *cp1 = (u_char *)src; 1408 register u_char *cp2 = (u_char *)dst; 1409 register u_char *cp3 = (u_char *)netmask; 1410 u_char *cplim = cp2 + *cp3; 1411 u_char *cplim2 = cp2 + *cp1; 1412 1413 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ 1414 cp3 += 2; 1415 if (cplim > cplim2) 1416 cplim = cplim2; 1417 while (cp2 < cplim) 1418 *cp2++ = *cp1++ & *cp3++; 1419 if (cp2 < cplim2) 1420 bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); 1421 } 1422 1423 /* 1424 * Set up a routing table entry, normally 1425 * for an interface. 1426 */ 1427 #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */ 1428 static inline int 1429 rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) 1430 { 1431 struct sockaddr *dst; 1432 struct sockaddr *netmask; 1433 struct rtentry *rt = NULL; 1434 struct rt_addrinfo info; 1435 int error = 0; 1436 int startfib, endfib; 1437 char tempbuf[_SOCKADDR_TMPSIZE]; 1438 int didwork = 0; 1439 int a_failure = 0; 1440 static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; 1441 struct radix_node_head *rnh; 1442 1443 if (flags & RTF_HOST) { 1444 dst = ifa->ifa_dstaddr; 1445 netmask = NULL; 1446 } else { 1447 dst = ifa->ifa_addr; 1448 netmask = ifa->ifa_netmask; 1449 } 1450 if (dst->sa_len == 0) 1451 return(EINVAL); 1452 switch (dst->sa_family) { 1453 case AF_INET6: 1454 case AF_INET: 1455 /* We support multiple FIBs. */ 1456 break; 1457 default: 1458 fibnum = RT_DEFAULT_FIB; 1459 break; 1460 } 1461 if (fibnum == -1) { 1462 if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) { 1463 startfib = endfib = curthread->td_proc->p_fibnum; 1464 } else { 1465 startfib = 0; 1466 endfib = rt_numfibs - 1; 1467 } 1468 } else { 1469 KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum")); 1470 startfib = fibnum; 1471 endfib = fibnum; 1472 } 1473 1474 /* 1475 * If it's a delete, check that if it exists, 1476 * it's on the correct interface or we might scrub 1477 * a route to another ifa which would 1478 * be confusing at best and possibly worse. 1479 */ 1480 if (cmd == RTM_DELETE) { 1481 /* 1482 * It's a delete, so it should already exist.. 1483 * If it's a net, mask off the host bits 1484 * (Assuming we have a mask) 1485 * XXX this is kinda inet specific.. 1486 */ 1487 if (netmask != NULL) { 1488 rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask); 1489 dst = (struct sockaddr *)tempbuf; 1490 } 1491 } 1492 /* 1493 * Now go through all the requested tables (fibs) and do the 1494 * requested action. Realistically, this will either be fib 0 1495 * for protocols that don't do multiple tables or all the 1496 * tables for those that do. 1497 */ 1498 for ( fibnum = startfib; fibnum <= endfib; fibnum++) { 1499 if (cmd == RTM_DELETE) { 1500 struct radix_node *rn; 1501 /* 1502 * Look up an rtentry that is in the routing tree and 1503 * contains the correct info. 1504 */ 1505 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 1506 if (rnh == NULL) 1507 /* this table doesn't exist but others might */ 1508 continue; 1509 RADIX_NODE_HEAD_RLOCK(rnh); 1510 #ifdef RADIX_MPATH 1511 if (rn_mpath_capable(rnh)) { 1512 1513 rn = rnh->rnh_matchaddr(dst, rnh); 1514 if (rn == NULL) 1515 error = ESRCH; 1516 else { 1517 rt = RNTORT(rn); 1518 /* 1519 * for interface route the 1520 * rt->rt_gateway is sockaddr_intf 1521 * for cloning ARP entries, so 1522 * rt_mpath_matchgate must use the 1523 * interface address 1524 */ 1525 rt = rt_mpath_matchgate(rt, 1526 ifa->ifa_addr); 1527 if (!rt) 1528 error = ESRCH; 1529 } 1530 } 1531 else 1532 #endif 1533 rn = rnh->rnh_lookup(dst, netmask, rnh); 1534 error = (rn == NULL || 1535 (rn->rn_flags & RNF_ROOT) || 1536 RNTORT(rn)->rt_ifa != ifa || 1537 !sa_equal((struct sockaddr *)rn->rn_key, dst)); 1538 RADIX_NODE_HEAD_RUNLOCK(rnh); 1539 if (error) { 1540 /* this is only an error if bad on ALL tables */ 1541 continue; 1542 } 1543 } 1544 /* 1545 * Do the actual request 1546 */ 1547 bzero((caddr_t)&info, sizeof(info)); 1548 info.rti_ifa = ifa; 1549 info.rti_flags = flags | 1550 (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED; 1551 info.rti_info[RTAX_DST] = dst; 1552 /* 1553 * doing this for compatibility reasons 1554 */ 1555 if (cmd == RTM_ADD) 1556 info.rti_info[RTAX_GATEWAY] = 1557 (struct sockaddr *)&null_sdl; 1558 else 1559 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1560 info.rti_info[RTAX_NETMASK] = netmask; 1561 error = rtrequest1_fib(cmd, &info, &rt, fibnum); 1562 1563 if ((error == EEXIST) && (cmd == RTM_ADD)) { 1564 /* 1565 * Interface route addition failed. 1566 * Atomically delete current prefix generating 1567 * RTM_DELETE message, and retry adding 1568 * interface prefix. 1569 */ 1570 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 1571 RADIX_NODE_HEAD_LOCK(rnh); 1572 1573 /* Delete old prefix */ 1574 info.rti_ifa = NULL; 1575 info.rti_flags = RTF_RNH_LOCKED; 1576 1577 error = rtrequest1_fib(RTM_DELETE, &info, &rt, fibnum); 1578 if (error == 0) { 1579 info.rti_ifa = ifa; 1580 info.rti_flags = flags | RTF_RNH_LOCKED | 1581 (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED; 1582 error = rtrequest1_fib(cmd, &info, &rt, fibnum); 1583 } 1584 1585 RADIX_NODE_HEAD_UNLOCK(rnh); 1586 } 1587 1588 1589 if (error == 0 && rt != NULL) { 1590 /* 1591 * notify any listening routing agents of the change 1592 */ 1593 RT_LOCK(rt); 1594 #ifdef RADIX_MPATH 1595 /* 1596 * in case address alias finds the first address 1597 * e.g. ifconfig bge0 192.0.2.246/24 1598 * e.g. ifconfig bge0 192.0.2.247/24 1599 * the address set in the route is 192.0.2.246 1600 * so we need to replace it with 192.0.2.247 1601 */ 1602 if (memcmp(rt->rt_ifa->ifa_addr, 1603 ifa->ifa_addr, ifa->ifa_addr->sa_len)) { 1604 ifa_free(rt->rt_ifa); 1605 ifa_ref(ifa); 1606 rt->rt_ifp = ifa->ifa_ifp; 1607 rt->rt_ifa = ifa; 1608 } 1609 #endif 1610 /* 1611 * doing this for compatibility reasons 1612 */ 1613 if (cmd == RTM_ADD) { 1614 ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type = 1615 rt->rt_ifp->if_type; 1616 ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index = 1617 rt->rt_ifp->if_index; 1618 } 1619 RT_ADDREF(rt); 1620 RT_UNLOCK(rt); 1621 rt_newaddrmsg_fib(cmd, ifa, error, rt, fibnum); 1622 RT_LOCK(rt); 1623 RT_REMREF(rt); 1624 if (cmd == RTM_DELETE) { 1625 /* 1626 * If we are deleting, and we found an entry, 1627 * then it's been removed from the tree.. 1628 * now throw it away. 1629 */ 1630 RTFREE_LOCKED(rt); 1631 } else { 1632 if (cmd == RTM_ADD) { 1633 /* 1634 * We just wanted to add it.. 1635 * we don't actually need a reference. 1636 */ 1637 RT_REMREF(rt); 1638 } 1639 RT_UNLOCK(rt); 1640 } 1641 didwork = 1; 1642 } 1643 if (error) 1644 a_failure = error; 1645 } 1646 if (cmd == RTM_DELETE) { 1647 if (didwork) { 1648 error = 0; 1649 } else { 1650 /* we only give an error if it wasn't in any table */ 1651 error = ((flags & RTF_HOST) ? 1652 EHOSTUNREACH : ENETUNREACH); 1653 } 1654 } else { 1655 if (a_failure) { 1656 /* return an error if any of them failed */ 1657 error = a_failure; 1658 } 1659 } 1660 return (error); 1661 } 1662 1663 #ifndef BURN_BRIDGES 1664 /* special one for inet internal use. may not use. */ 1665 int 1666 rtinit_fib(struct ifaddr *ifa, int cmd, int flags) 1667 { 1668 return (rtinit1(ifa, cmd, flags, -1)); 1669 } 1670 #endif 1671 1672 /* 1673 * Set up a routing table entry, normally 1674 * for an interface. 1675 */ 1676 int 1677 rtinit(struct ifaddr *ifa, int cmd, int flags) 1678 { 1679 struct sockaddr *dst; 1680 int fib = RT_DEFAULT_FIB; 1681 1682 if (flags & RTF_HOST) { 1683 dst = ifa->ifa_dstaddr; 1684 } else { 1685 dst = ifa->ifa_addr; 1686 } 1687 1688 switch (dst->sa_family) { 1689 case AF_INET6: 1690 case AF_INET: 1691 /* We do support multiple FIBs. */ 1692 fib = -1; 1693 break; 1694 } 1695 return (rtinit1(ifa, cmd, flags, fib)); 1696 } 1697