1 /*- 2 * Copyright (c) 1980, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95 30 * $FreeBSD$ 31 */ 32 /************************************************************************ 33 * Note: In this file a 'fib' is a "forwarding information base" * 34 * Which is the new name for an in kernel routing (next hop) table. * 35 ***********************************************************************/ 36 37 #include "opt_inet.h" 38 #include "opt_inet6.h" 39 #include "opt_route.h" 40 #include "opt_mrouting.h" 41 #include "opt_mpath.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/syslog.h> 46 #include <sys/malloc.h> 47 #include <sys/mbuf.h> 48 #include <sys/socket.h> 49 #include <sys/sysctl.h> 50 #include <sys/syslog.h> 51 #include <sys/sysproto.h> 52 #include <sys/proc.h> 53 #include <sys/domain.h> 54 #include <sys/kernel.h> 55 56 #include <net/if.h> 57 #include <net/if_dl.h> 58 #include <net/route.h> 59 #include <net/vnet.h> 60 #include <net/flowtable.h> 61 62 #ifdef RADIX_MPATH 63 #include <net/radix_mpath.h> 64 #endif 65 66 #include <netinet/in.h> 67 #include <netinet/ip_mroute.h> 68 69 #include <vm/uma.h> 70 71 /* We use 4 bits in the mbuf flags, thus we are limited to 16 FIBS. */ 72 #define RT_MAXFIBS 16 73 74 /* Kernel config default option. */ 75 #ifdef ROUTETABLES 76 #if ROUTETABLES <= 0 77 #error "ROUTETABLES defined too low" 78 #endif 79 #if ROUTETABLES > RT_MAXFIBS 80 #error "ROUTETABLES defined too big" 81 #endif 82 #define RT_NUMFIBS ROUTETABLES 83 #endif /* ROUTETABLES */ 84 /* Initialize to default if not otherwise set. */ 85 #ifndef RT_NUMFIBS 86 #define RT_NUMFIBS 1 87 #endif 88 89 u_int rt_numfibs = RT_NUMFIBS; 90 SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, ""); 91 /* 92 * Allow the boot code to allow LESS than RT_MAXFIBS to be used. 93 * We can't do more because storage is statically allocated for now. 94 * (for compatibility reasons.. this will change. When this changes, code should 95 * be refactored to protocol independent parts and protocol dependent parts, 96 * probably hanging of domain(9) specific storage to not need the full 97 * fib * af RNH allocation etc. but allow tuning the number of tables per 98 * address family). 99 */ 100 TUNABLE_INT("net.fibs", &rt_numfibs); 101 102 /* 103 * By default add routes to all fibs for new interfaces. 104 * Once this is set to 0 then only allocate routes on interface 105 * changes for the FIB of the caller when adding a new set of addresses 106 * to an interface. XXX this is a shotgun aproach to a problem that needs 107 * a more fine grained solution.. that will come. 108 * XXX also has the problems getting the FIB from curthread which will not 109 * always work given the fib can be overridden and prefixes can be added 110 * from the network stack context. 111 */ 112 u_int rt_add_addr_allfibs = 1; 113 SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW, 114 &rt_add_addr_allfibs, 0, ""); 115 TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs); 116 117 VNET_DEFINE(struct rtstat, rtstat); 118 #define V_rtstat VNET(rtstat) 119 120 VNET_DEFINE(struct radix_node_head *, rt_tables); 121 #define V_rt_tables VNET(rt_tables) 122 123 VNET_DEFINE(int, rttrash); /* routes not in table but not freed */ 124 #define V_rttrash VNET(rttrash) 125 126 127 /* compare two sockaddr structures */ 128 #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0) 129 130 /* 131 * Convert a 'struct radix_node *' to a 'struct rtentry *'. 132 * The operation can be done safely (in this code) because a 133 * 'struct rtentry' starts with two 'struct radix_node''s, the first 134 * one representing leaf nodes in the routing tree, which is 135 * what the code in radix.c passes us as a 'struct radix_node'. 136 * 137 * But because there are a lot of assumptions in this conversion, 138 * do not cast explicitly, but always use the macro below. 139 */ 140 #define RNTORT(p) ((struct rtentry *)(p)) 141 142 static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */ 143 #define V_rtzone VNET(rtzone) 144 145 /* 146 * handler for net.my_fibnum 147 */ 148 static int 149 sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) 150 { 151 int fibnum; 152 int error; 153 154 fibnum = curthread->td_proc->p_fibnum; 155 error = sysctl_handle_int(oidp, &fibnum, 0, req); 156 return (error); 157 } 158 159 SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD, 160 NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller"); 161 162 static __inline struct radix_node_head ** 163 rt_tables_get_rnh_ptr(int table, int fam) 164 { 165 struct radix_node_head **rnh; 166 167 KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.", 168 __func__)); 169 KASSERT(fam >= 0 && fam < (AF_MAX+1), ("%s: fam out of bounds.", 170 __func__)); 171 172 /* rnh is [fib=0][af=0]. */ 173 rnh = (struct radix_node_head **)V_rt_tables; 174 /* Get the offset to the requested table and fam. */ 175 rnh += table * (AF_MAX+1) + fam; 176 177 return (rnh); 178 } 179 180 struct radix_node_head * 181 rt_tables_get_rnh(int table, int fam) 182 { 183 184 return (*rt_tables_get_rnh_ptr(table, fam)); 185 } 186 187 /* 188 * route initialization must occur before ip6_init2(), which happenas at 189 * SI_ORDER_MIDDLE. 190 */ 191 static void 192 route_init(void) 193 { 194 struct domain *dom; 195 int max_keylen = 0; 196 197 /* whack the tunable ints into line. */ 198 if (rt_numfibs > RT_MAXFIBS) 199 rt_numfibs = RT_MAXFIBS; 200 if (rt_numfibs == 0) 201 rt_numfibs = 1; 202 203 for (dom = domains; dom; dom = dom->dom_next) 204 if (dom->dom_maxrtkey > max_keylen) 205 max_keylen = dom->dom_maxrtkey; 206 207 rn_init(max_keylen); /* init all zeroes, all ones, mask table */ 208 } 209 SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0); 210 211 static void 212 vnet_route_init(const void *unused __unused) 213 { 214 struct domain *dom; 215 struct radix_node_head **rnh; 216 int table; 217 int fam; 218 219 V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) * 220 sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO); 221 222 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL, 223 NULL, NULL, UMA_ALIGN_PTR, 0); 224 for (dom = domains; dom; dom = dom->dom_next) { 225 if (dom->dom_rtattach == NULL) 226 continue; 227 228 for (table = 0; table < rt_numfibs; table++) { 229 fam = dom->dom_family; 230 if (table != 0 && fam != AF_INET6 && fam != AF_INET) 231 break; 232 233 /* 234 * XXX MRT rtattach will be also called from 235 * vfs_export.c but the offset will be 0 (only for 236 * AF_INET and AF_INET6 which don't need it anyhow). 237 */ 238 rnh = rt_tables_get_rnh_ptr(table, fam); 239 if (rnh == NULL) 240 panic("%s: rnh NULL", __func__); 241 dom->dom_rtattach((void **)rnh, dom->dom_rtoffset); 242 } 243 } 244 } 245 VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, 246 vnet_route_init, 0); 247 248 #ifdef VIMAGE 249 static void 250 vnet_route_uninit(const void *unused __unused) 251 { 252 int table; 253 int fam; 254 struct domain *dom; 255 struct radix_node_head **rnh; 256 257 for (dom = domains; dom; dom = dom->dom_next) { 258 if (dom->dom_rtdetach == NULL) 259 continue; 260 261 for (table = 0; table < rt_numfibs; table++) { 262 fam = dom->dom_family; 263 264 if (table != 0 && fam != AF_INET6 && fam != AF_INET) 265 break; 266 267 rnh = rt_tables_get_rnh_ptr(table, fam); 268 if (rnh == NULL) 269 panic("%s: rnh NULL", __func__); 270 dom->dom_rtdetach((void **)rnh, dom->dom_rtoffset); 271 } 272 } 273 } 274 VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, 275 vnet_route_uninit, 0); 276 #endif 277 278 #ifndef _SYS_SYSPROTO_H_ 279 struct setfib_args { 280 int fibnum; 281 }; 282 #endif 283 int 284 sys_setfib(struct thread *td, struct setfib_args *uap) 285 { 286 if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs) 287 return EINVAL; 288 td->td_proc->p_fibnum = uap->fibnum; 289 return (0); 290 } 291 292 /* 293 * Packet routing routines. 294 */ 295 void 296 rtalloc(struct route *ro) 297 { 298 299 rtalloc_ign_fib(ro, 0UL, RT_DEFAULT_FIB); 300 } 301 302 void 303 rtalloc_fib(struct route *ro, u_int fibnum) 304 { 305 rtalloc_ign_fib(ro, 0UL, fibnum); 306 } 307 308 void 309 rtalloc_ign(struct route *ro, u_long ignore) 310 { 311 struct rtentry *rt; 312 313 if ((rt = ro->ro_rt) != NULL) { 314 if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) 315 return; 316 RTFREE(rt); 317 ro->ro_rt = NULL; 318 } 319 ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, RT_DEFAULT_FIB); 320 if (ro->ro_rt) 321 RT_UNLOCK(ro->ro_rt); 322 } 323 324 void 325 rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum) 326 { 327 struct rtentry *rt; 328 329 if ((rt = ro->ro_rt) != NULL) { 330 if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) 331 return; 332 RTFREE(rt); 333 ro->ro_rt = NULL; 334 } 335 ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum); 336 if (ro->ro_rt) 337 RT_UNLOCK(ro->ro_rt); 338 } 339 340 /* 341 * Look up the route that matches the address given 342 * Or, at least try.. Create a cloned route if needed. 343 * 344 * The returned route, if any, is locked. 345 */ 346 struct rtentry * 347 rtalloc1(struct sockaddr *dst, int report, u_long ignflags) 348 { 349 350 return (rtalloc1_fib(dst, report, ignflags, RT_DEFAULT_FIB)); 351 } 352 353 struct rtentry * 354 rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, 355 u_int fibnum) 356 { 357 struct radix_node_head *rnh; 358 struct radix_node *rn; 359 struct rtentry *newrt; 360 struct rt_addrinfo info; 361 int err = 0, msgtype = RTM_MISS; 362 int needlock; 363 364 KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum")); 365 switch (dst->sa_family) { 366 case AF_INET6: 367 case AF_INET: 368 /* We support multiple FIBs. */ 369 break; 370 default: 371 fibnum = RT_DEFAULT_FIB; 372 break; 373 } 374 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 375 newrt = NULL; 376 if (rnh == NULL) 377 goto miss; 378 379 /* 380 * Look up the address in the table for that Address Family 381 */ 382 needlock = !(ignflags & RTF_RNH_LOCKED); 383 if (needlock) 384 RADIX_NODE_HEAD_RLOCK(rnh); 385 #ifdef INVARIANTS 386 else 387 RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 388 #endif 389 rn = rnh->rnh_matchaddr(dst, rnh); 390 if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { 391 newrt = RNTORT(rn); 392 RT_LOCK(newrt); 393 RT_ADDREF(newrt); 394 if (needlock) 395 RADIX_NODE_HEAD_RUNLOCK(rnh); 396 goto done; 397 398 } else if (needlock) 399 RADIX_NODE_HEAD_RUNLOCK(rnh); 400 401 /* 402 * Either we hit the root or couldn't find any match, 403 * Which basically means 404 * "caint get there frm here" 405 */ 406 miss: 407 V_rtstat.rts_unreach++; 408 409 if (report) { 410 /* 411 * If required, report the failure to the supervising 412 * Authorities. 413 * For a delete, this is not an error. (report == 0) 414 */ 415 bzero(&info, sizeof(info)); 416 info.rti_info[RTAX_DST] = dst; 417 rt_missmsg_fib(msgtype, &info, 0, err, fibnum); 418 } 419 done: 420 if (newrt) 421 RT_LOCK_ASSERT(newrt); 422 return (newrt); 423 } 424 425 /* 426 * Remove a reference count from an rtentry. 427 * If the count gets low enough, take it out of the routing table 428 */ 429 void 430 rtfree(struct rtentry *rt) 431 { 432 struct radix_node_head *rnh; 433 434 KASSERT(rt != NULL,("%s: NULL rt", __func__)); 435 rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); 436 KASSERT(rnh != NULL,("%s: NULL rnh", __func__)); 437 438 RT_LOCK_ASSERT(rt); 439 440 /* 441 * The callers should use RTFREE_LOCKED() or RTFREE(), so 442 * we should come here exactly with the last reference. 443 */ 444 RT_REMREF(rt); 445 if (rt->rt_refcnt > 0) { 446 log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt); 447 goto done; 448 } 449 450 /* 451 * On last reference give the "close method" a chance 452 * to cleanup private state. This also permits (for 453 * IPv4 and IPv6) a chance to decide if the routing table 454 * entry should be purged immediately or at a later time. 455 * When an immediate purge is to happen the close routine 456 * typically calls rtexpunge which clears the RTF_UP flag 457 * on the entry so that the code below reclaims the storage. 458 */ 459 if (rt->rt_refcnt == 0 && rnh->rnh_close) 460 rnh->rnh_close((struct radix_node *)rt, rnh); 461 462 /* 463 * If we are no longer "up" (and ref == 0) 464 * then we can free the resources associated 465 * with the route. 466 */ 467 if ((rt->rt_flags & RTF_UP) == 0) { 468 if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 469 panic("rtfree 2"); 470 /* 471 * the rtentry must have been removed from the routing table 472 * so it is represented in rttrash.. remove that now. 473 */ 474 V_rttrash--; 475 #ifdef DIAGNOSTIC 476 if (rt->rt_refcnt < 0) { 477 printf("rtfree: %p not freed (neg refs)\n", rt); 478 goto done; 479 } 480 #endif 481 /* 482 * release references on items we hold them on.. 483 * e.g other routes and ifaddrs. 484 */ 485 if (rt->rt_ifa) 486 ifa_free(rt->rt_ifa); 487 /* 488 * The key is separatly alloc'd so free it (see rt_setgate()). 489 * This also frees the gateway, as they are always malloc'd 490 * together. 491 */ 492 Free(rt_key(rt)); 493 494 /* 495 * and the rtentry itself of course 496 */ 497 RT_LOCK_DESTROY(rt); 498 uma_zfree(V_rtzone, rt); 499 return; 500 } 501 done: 502 RT_UNLOCK(rt); 503 } 504 505 506 /* 507 * Force a routing table entry to the specified 508 * destination to go through the given gateway. 509 * Normally called as a result of a routing redirect 510 * message from the network layer. 511 */ 512 void 513 rtredirect(struct sockaddr *dst, 514 struct sockaddr *gateway, 515 struct sockaddr *netmask, 516 int flags, 517 struct sockaddr *src) 518 { 519 520 rtredirect_fib(dst, gateway, netmask, flags, src, RT_DEFAULT_FIB); 521 } 522 523 void 524 rtredirect_fib(struct sockaddr *dst, 525 struct sockaddr *gateway, 526 struct sockaddr *netmask, 527 int flags, 528 struct sockaddr *src, 529 u_int fibnum) 530 { 531 struct rtentry *rt, *rt0 = NULL; 532 int error = 0; 533 short *stat = NULL; 534 struct rt_addrinfo info; 535 struct ifaddr *ifa; 536 struct radix_node_head *rnh; 537 538 ifa = NULL; 539 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 540 if (rnh == NULL) { 541 error = EAFNOSUPPORT; 542 goto out; 543 } 544 545 /* verify the gateway is directly reachable */ 546 if ((ifa = ifa_ifwithnet(gateway, 0)) == NULL) { 547 error = ENETUNREACH; 548 goto out; 549 } 550 rt = rtalloc1_fib(dst, 0, 0UL, fibnum); /* NB: rt is locked */ 551 /* 552 * If the redirect isn't from our current router for this dst, 553 * it's either old or wrong. If it redirects us to ourselves, 554 * we have a routing loop, perhaps as a result of an interface 555 * going down recently. 556 */ 557 if (!(flags & RTF_DONE) && rt && 558 (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) 559 error = EINVAL; 560 else if (ifa_ifwithaddr_check(gateway)) 561 error = EHOSTUNREACH; 562 if (error) 563 goto done; 564 /* 565 * Create a new entry if we just got back a wildcard entry 566 * or the lookup failed. This is necessary for hosts 567 * which use routing redirects generated by smart gateways 568 * to dynamically build the routing tables. 569 */ 570 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) 571 goto create; 572 /* 573 * Don't listen to the redirect if it's 574 * for a route to an interface. 575 */ 576 if (rt->rt_flags & RTF_GATEWAY) { 577 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 578 /* 579 * Changing from route to net => route to host. 580 * Create new route, rather than smashing route to net. 581 */ 582 create: 583 rt0 = rt; 584 rt = NULL; 585 586 flags |= RTF_GATEWAY | RTF_DYNAMIC; 587 bzero((caddr_t)&info, sizeof(info)); 588 info.rti_info[RTAX_DST] = dst; 589 info.rti_info[RTAX_GATEWAY] = gateway; 590 info.rti_info[RTAX_NETMASK] = netmask; 591 info.rti_ifa = ifa; 592 info.rti_flags = flags; 593 if (rt0 != NULL) 594 RT_UNLOCK(rt0); /* drop lock to avoid LOR with RNH */ 595 error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum); 596 if (rt != NULL) { 597 RT_LOCK(rt); 598 if (rt0 != NULL) 599 EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst); 600 flags = rt->rt_flags; 601 } 602 if (rt0 != NULL) 603 RTFREE(rt0); 604 605 stat = &V_rtstat.rts_dynamic; 606 } else { 607 struct rtentry *gwrt; 608 609 /* 610 * Smash the current notion of the gateway to 611 * this destination. Should check about netmask!!! 612 */ 613 rt->rt_flags |= RTF_MODIFIED; 614 flags |= RTF_MODIFIED; 615 stat = &V_rtstat.rts_newgateway; 616 /* 617 * add the key and gateway (in one malloc'd chunk). 618 */ 619 RT_UNLOCK(rt); 620 RADIX_NODE_HEAD_LOCK(rnh); 621 RT_LOCK(rt); 622 rt_setgate(rt, rt_key(rt), gateway); 623 gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED); 624 RADIX_NODE_HEAD_UNLOCK(rnh); 625 EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst); 626 RTFREE_LOCKED(gwrt); 627 } 628 } else 629 error = EHOSTUNREACH; 630 done: 631 if (rt) 632 RTFREE_LOCKED(rt); 633 out: 634 if (error) 635 V_rtstat.rts_badredirect++; 636 else if (stat != NULL) 637 (*stat)++; 638 bzero((caddr_t)&info, sizeof(info)); 639 info.rti_info[RTAX_DST] = dst; 640 info.rti_info[RTAX_GATEWAY] = gateway; 641 info.rti_info[RTAX_NETMASK] = netmask; 642 info.rti_info[RTAX_AUTHOR] = src; 643 rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum); 644 if (ifa != NULL) 645 ifa_free(ifa); 646 } 647 648 int 649 rtioctl(u_long req, caddr_t data) 650 { 651 652 return (rtioctl_fib(req, data, RT_DEFAULT_FIB)); 653 } 654 655 /* 656 * Routing table ioctl interface. 657 */ 658 int 659 rtioctl_fib(u_long req, caddr_t data, u_int fibnum) 660 { 661 662 /* 663 * If more ioctl commands are added here, make sure the proper 664 * super-user checks are being performed because it is possible for 665 * prison-root to make it this far if raw sockets have been enabled 666 * in jails. 667 */ 668 #ifdef INET 669 /* Multicast goop, grrr... */ 670 return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP; 671 #else /* INET */ 672 return ENXIO; 673 #endif /* INET */ 674 } 675 676 /* 677 * For both ifa_ifwithroute() routines, 'ifa' is returned referenced. 678 */ 679 struct ifaddr * 680 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway) 681 { 682 683 return (ifa_ifwithroute_fib(flags, dst, gateway, RT_DEFAULT_FIB)); 684 } 685 686 struct ifaddr * 687 ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway, 688 u_int fibnum) 689 { 690 register struct ifaddr *ifa; 691 int not_found = 0; 692 693 if ((flags & RTF_GATEWAY) == 0) { 694 /* 695 * If we are adding a route to an interface, 696 * and the interface is a pt to pt link 697 * we should search for the destination 698 * as our clue to the interface. Otherwise 699 * we can use the local address. 700 */ 701 ifa = NULL; 702 if (flags & RTF_HOST) 703 ifa = ifa_ifwithdstaddr(dst); 704 if (ifa == NULL) 705 ifa = ifa_ifwithaddr(gateway); 706 } else { 707 /* 708 * If we are adding a route to a remote net 709 * or host, the gateway may still be on the 710 * other end of a pt to pt link. 711 */ 712 ifa = ifa_ifwithdstaddr(gateway); 713 } 714 if (ifa == NULL) 715 ifa = ifa_ifwithnet(gateway, 0); 716 if (ifa == NULL) { 717 struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum); 718 if (rt == NULL) 719 return (NULL); 720 /* 721 * dismiss a gateway that is reachable only 722 * through the default router 723 */ 724 switch (gateway->sa_family) { 725 case AF_INET: 726 if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY) 727 not_found = 1; 728 break; 729 case AF_INET6: 730 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr)) 731 not_found = 1; 732 break; 733 default: 734 break; 735 } 736 if (!not_found && rt->rt_ifa != NULL) { 737 ifa = rt->rt_ifa; 738 ifa_ref(ifa); 739 } 740 RT_REMREF(rt); 741 RT_UNLOCK(rt); 742 if (not_found || ifa == NULL) 743 return (NULL); 744 } 745 if (ifa->ifa_addr->sa_family != dst->sa_family) { 746 struct ifaddr *oifa = ifa; 747 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 748 if (ifa == NULL) 749 ifa = oifa; 750 else 751 ifa_free(oifa); 752 } 753 return (ifa); 754 } 755 756 /* 757 * Do appropriate manipulations of a routing tree given 758 * all the bits of info needed 759 */ 760 int 761 rtrequest(int req, 762 struct sockaddr *dst, 763 struct sockaddr *gateway, 764 struct sockaddr *netmask, 765 int flags, 766 struct rtentry **ret_nrt) 767 { 768 769 return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 770 RT_DEFAULT_FIB)); 771 } 772 773 int 774 rtrequest_fib(int req, 775 struct sockaddr *dst, 776 struct sockaddr *gateway, 777 struct sockaddr *netmask, 778 int flags, 779 struct rtentry **ret_nrt, 780 u_int fibnum) 781 { 782 struct rt_addrinfo info; 783 784 if (dst->sa_len == 0) 785 return(EINVAL); 786 787 bzero((caddr_t)&info, sizeof(info)); 788 info.rti_flags = flags; 789 info.rti_info[RTAX_DST] = dst; 790 info.rti_info[RTAX_GATEWAY] = gateway; 791 info.rti_info[RTAX_NETMASK] = netmask; 792 return rtrequest1_fib(req, &info, ret_nrt, fibnum); 793 } 794 795 /* 796 * These (questionable) definitions of apparent local variables apply 797 * to the next two functions. XXXXXX!!! 798 */ 799 #define dst info->rti_info[RTAX_DST] 800 #define gateway info->rti_info[RTAX_GATEWAY] 801 #define netmask info->rti_info[RTAX_NETMASK] 802 #define ifaaddr info->rti_info[RTAX_IFA] 803 #define ifpaddr info->rti_info[RTAX_IFP] 804 #define flags info->rti_flags 805 806 int 807 rt_getifa(struct rt_addrinfo *info) 808 { 809 810 return (rt_getifa_fib(info, RT_DEFAULT_FIB)); 811 } 812 813 /* 814 * Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined, 815 * it will be referenced so the caller must free it. 816 */ 817 int 818 rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum) 819 { 820 struct ifaddr *ifa; 821 int error = 0; 822 823 /* 824 * ifp may be specified by sockaddr_dl 825 * when protocol address is ambiguous. 826 */ 827 if (info->rti_ifp == NULL && ifpaddr != NULL && 828 ifpaddr->sa_family == AF_LINK && 829 (ifa = ifa_ifwithnet(ifpaddr, 0)) != NULL) { 830 info->rti_ifp = ifa->ifa_ifp; 831 ifa_free(ifa); 832 } 833 if (info->rti_ifa == NULL && ifaaddr != NULL) 834 info->rti_ifa = ifa_ifwithaddr(ifaaddr); 835 if (info->rti_ifa == NULL) { 836 struct sockaddr *sa; 837 838 sa = ifaaddr != NULL ? ifaaddr : 839 (gateway != NULL ? gateway : dst); 840 if (sa != NULL && info->rti_ifp != NULL) 841 info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp); 842 else if (dst != NULL && gateway != NULL) 843 info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway, 844 fibnum); 845 else if (sa != NULL) 846 info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa, 847 fibnum); 848 } 849 if ((ifa = info->rti_ifa) != NULL) { 850 if (info->rti_ifp == NULL) 851 info->rti_ifp = ifa->ifa_ifp; 852 } else 853 error = ENETUNREACH; 854 return (error); 855 } 856 857 /* 858 * Expunges references to a route that's about to be reclaimed. 859 * The route must be locked. 860 */ 861 int 862 rtexpunge(struct rtentry *rt) 863 { 864 #if !defined(RADIX_MPATH) 865 struct radix_node *rn; 866 #else 867 struct rt_addrinfo info; 868 int fib; 869 struct rtentry *rt0; 870 #endif 871 struct radix_node_head *rnh; 872 struct ifaddr *ifa; 873 int error = 0; 874 875 /* 876 * Find the correct routing tree to use for this Address Family 877 */ 878 rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); 879 RT_LOCK_ASSERT(rt); 880 if (rnh == NULL) 881 return (EAFNOSUPPORT); 882 RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 883 884 #ifdef RADIX_MPATH 885 fib = rt->rt_fibnum; 886 bzero(&info, sizeof(info)); 887 info.rti_ifp = rt->rt_ifp; 888 info.rti_flags = RTF_RNH_LOCKED; 889 info.rti_info[RTAX_DST] = rt_key(rt); 890 info.rti_info[RTAX_GATEWAY] = rt->rt_ifa->ifa_addr; 891 892 RT_UNLOCK(rt); 893 error = rtrequest1_fib(RTM_DELETE, &info, &rt0, fib); 894 895 if (error == 0 && rt0 != NULL) { 896 rt = rt0; 897 RT_LOCK(rt); 898 } else if (error != 0) { 899 RT_LOCK(rt); 900 return (error); 901 } 902 #else 903 /* 904 * Remove the item from the tree; it should be there, 905 * but when callers invoke us blindly it may not (sigh). 906 */ 907 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh); 908 if (rn == NULL) { 909 error = ESRCH; 910 goto bad; 911 } 912 KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0, 913 ("unexpected flags 0x%x", rn->rn_flags)); 914 KASSERT(rt == RNTORT(rn), 915 ("lookup mismatch, rt %p rn %p", rt, rn)); 916 #endif /* RADIX_MPATH */ 917 918 rt->rt_flags &= ~RTF_UP; 919 920 /* 921 * Give the protocol a chance to keep things in sync. 922 */ 923 if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) { 924 struct rt_addrinfo info; 925 926 bzero((caddr_t)&info, sizeof(info)); 927 info.rti_flags = rt->rt_flags; 928 info.rti_info[RTAX_DST] = rt_key(rt); 929 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 930 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 931 ifa->ifa_rtrequest(RTM_DELETE, rt, &info); 932 } 933 934 /* 935 * one more rtentry floating around that is not 936 * linked to the routing table. 937 */ 938 V_rttrash++; 939 #if !defined(RADIX_MPATH) 940 bad: 941 #endif 942 return (error); 943 } 944 945 #ifdef RADIX_MPATH 946 static int 947 rn_mpath_update(int req, struct rt_addrinfo *info, 948 struct radix_node_head *rnh, struct rtentry **ret_nrt) 949 { 950 /* 951 * if we got multipath routes, we require users to specify 952 * a matching RTAX_GATEWAY. 953 */ 954 struct rtentry *rt, *rto = NULL; 955 register struct radix_node *rn; 956 int error = 0; 957 958 rn = rnh->rnh_matchaddr(dst, rnh); 959 if (rn == NULL) 960 return (ESRCH); 961 rto = rt = RNTORT(rn); 962 rt = rt_mpath_matchgate(rt, gateway); 963 if (rt == NULL) 964 return (ESRCH); 965 /* 966 * this is the first entry in the chain 967 */ 968 if (rto == rt) { 969 rn = rn_mpath_next((struct radix_node *)rt); 970 /* 971 * there is another entry, now it's active 972 */ 973 if (rn) { 974 rto = RNTORT(rn); 975 RT_LOCK(rto); 976 rto->rt_flags |= RTF_UP; 977 RT_UNLOCK(rto); 978 } else if (rt->rt_flags & RTF_GATEWAY) { 979 /* 980 * For gateway routes, we need to 981 * make sure that we we are deleting 982 * the correct gateway. 983 * rt_mpath_matchgate() does not 984 * check the case when there is only 985 * one route in the chain. 986 */ 987 if (gateway && 988 (rt->rt_gateway->sa_len != gateway->sa_len || 989 memcmp(rt->rt_gateway, gateway, gateway->sa_len))) 990 error = ESRCH; 991 else { 992 /* 993 * remove from tree before returning it 994 * to the caller 995 */ 996 rn = rnh->rnh_deladdr(dst, netmask, rnh); 997 KASSERT(rt == RNTORT(rn), ("radix node disappeared")); 998 goto gwdelete; 999 } 1000 1001 } 1002 /* 1003 * use the normal delete code to remove 1004 * the first entry 1005 */ 1006 if (req != RTM_DELETE) 1007 goto nondelete; 1008 1009 error = ENOENT; 1010 goto done; 1011 } 1012 1013 /* 1014 * if the entry is 2nd and on up 1015 */ 1016 if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt)) 1017 panic ("rtrequest1: rt_mpath_deldup"); 1018 gwdelete: 1019 RT_LOCK(rt); 1020 RT_ADDREF(rt); 1021 if (req == RTM_DELETE) { 1022 rt->rt_flags &= ~RTF_UP; 1023 /* 1024 * One more rtentry floating around that is not 1025 * linked to the routing table. rttrash will be decremented 1026 * when RTFREE(rt) is eventually called. 1027 */ 1028 V_rttrash++; 1029 } 1030 1031 nondelete: 1032 if (req != RTM_DELETE) 1033 panic("unrecognized request %d", req); 1034 1035 1036 /* 1037 * If the caller wants it, then it can have it, 1038 * but it's up to it to free the rtentry as we won't be 1039 * doing it. 1040 */ 1041 if (ret_nrt) { 1042 *ret_nrt = rt; 1043 RT_UNLOCK(rt); 1044 } else 1045 RTFREE_LOCKED(rt); 1046 done: 1047 return (error); 1048 } 1049 #endif 1050 1051 int 1052 rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, 1053 u_int fibnum) 1054 { 1055 int error = 0, needlock = 0; 1056 register struct rtentry *rt; 1057 #ifdef FLOWTABLE 1058 register struct rtentry *rt0; 1059 #endif 1060 register struct radix_node *rn; 1061 register struct radix_node_head *rnh; 1062 struct ifaddr *ifa; 1063 struct sockaddr *ndst; 1064 struct sockaddr_storage mdst; 1065 #define senderr(x) { error = x ; goto bad; } 1066 1067 KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); 1068 switch (dst->sa_family) { 1069 case AF_INET6: 1070 case AF_INET: 1071 /* We support multiple FIBs. */ 1072 break; 1073 default: 1074 fibnum = RT_DEFAULT_FIB; 1075 break; 1076 } 1077 1078 /* 1079 * Find the correct routing tree to use for this Address Family 1080 */ 1081 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 1082 if (rnh == NULL) 1083 return (EAFNOSUPPORT); 1084 needlock = ((flags & RTF_RNH_LOCKED) == 0); 1085 flags &= ~RTF_RNH_LOCKED; 1086 if (needlock) 1087 RADIX_NODE_HEAD_LOCK(rnh); 1088 else 1089 RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 1090 /* 1091 * If we are adding a host route then we don't want to put 1092 * a netmask in the tree, nor do we want to clone it. 1093 */ 1094 if (flags & RTF_HOST) 1095 netmask = NULL; 1096 1097 switch (req) { 1098 case RTM_DELETE: 1099 if (netmask) { 1100 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 1101 dst = (struct sockaddr *)&mdst; 1102 } 1103 #ifdef RADIX_MPATH 1104 if (rn_mpath_capable(rnh)) { 1105 error = rn_mpath_update(req, info, rnh, ret_nrt); 1106 /* 1107 * "bad" holds true for the success case 1108 * as well 1109 */ 1110 if (error != ENOENT) 1111 goto bad; 1112 error = 0; 1113 } 1114 #endif 1115 /* 1116 * Remove the item from the tree and return it. 1117 * Complain if it is not there and do no more processing. 1118 */ 1119 rn = rnh->rnh_deladdr(dst, netmask, rnh); 1120 if (rn == NULL) 1121 senderr(ESRCH); 1122 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 1123 panic ("rtrequest delete"); 1124 rt = RNTORT(rn); 1125 RT_LOCK(rt); 1126 RT_ADDREF(rt); 1127 rt->rt_flags &= ~RTF_UP; 1128 1129 /* 1130 * give the protocol a chance to keep things in sync. 1131 */ 1132 if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) 1133 ifa->ifa_rtrequest(RTM_DELETE, rt, info); 1134 1135 /* 1136 * One more rtentry floating around that is not 1137 * linked to the routing table. rttrash will be decremented 1138 * when RTFREE(rt) is eventually called. 1139 */ 1140 V_rttrash++; 1141 1142 /* 1143 * If the caller wants it, then it can have it, 1144 * but it's up to it to free the rtentry as we won't be 1145 * doing it. 1146 */ 1147 if (ret_nrt) { 1148 *ret_nrt = rt; 1149 RT_UNLOCK(rt); 1150 } else 1151 RTFREE_LOCKED(rt); 1152 break; 1153 case RTM_RESOLVE: 1154 /* 1155 * resolve was only used for route cloning 1156 * here for compat 1157 */ 1158 break; 1159 case RTM_ADD: 1160 if ((flags & RTF_GATEWAY) && !gateway) 1161 senderr(EINVAL); 1162 if (dst && gateway && (dst->sa_family != gateway->sa_family) && 1163 (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) 1164 senderr(EINVAL); 1165 1166 if (info->rti_ifa == NULL) { 1167 error = rt_getifa_fib(info, fibnum); 1168 if (error) 1169 senderr(error); 1170 } else 1171 ifa_ref(info->rti_ifa); 1172 ifa = info->rti_ifa; 1173 rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 1174 if (rt == NULL) { 1175 ifa_free(ifa); 1176 senderr(ENOBUFS); 1177 } 1178 RT_LOCK_INIT(rt); 1179 rt->rt_flags = RTF_UP | flags; 1180 rt->rt_fibnum = fibnum; 1181 /* 1182 * Add the gateway. Possibly re-malloc-ing the storage for it. 1183 */ 1184 RT_LOCK(rt); 1185 if ((error = rt_setgate(rt, dst, gateway)) != 0) { 1186 RT_LOCK_DESTROY(rt); 1187 ifa_free(ifa); 1188 uma_zfree(V_rtzone, rt); 1189 senderr(error); 1190 } 1191 1192 /* 1193 * point to the (possibly newly malloc'd) dest address. 1194 */ 1195 ndst = (struct sockaddr *)rt_key(rt); 1196 1197 /* 1198 * make sure it contains the value we want (masked if needed). 1199 */ 1200 if (netmask) { 1201 rt_maskedcopy(dst, ndst, netmask); 1202 } else 1203 bcopy(dst, ndst, dst->sa_len); 1204 1205 /* 1206 * We use the ifa reference returned by rt_getifa_fib(). 1207 * This moved from below so that rnh->rnh_addaddr() can 1208 * examine the ifa and ifa->ifa_ifp if it so desires. 1209 */ 1210 rt->rt_ifa = ifa; 1211 rt->rt_ifp = ifa->ifa_ifp; 1212 rt->rt_rmx.rmx_weight = 1; 1213 1214 #ifdef RADIX_MPATH 1215 /* do not permit exactly the same dst/mask/gw pair */ 1216 if (rn_mpath_capable(rnh) && 1217 rt_mpath_conflict(rnh, rt, netmask)) { 1218 ifa_free(rt->rt_ifa); 1219 Free(rt_key(rt)); 1220 RT_LOCK_DESTROY(rt); 1221 uma_zfree(V_rtzone, rt); 1222 senderr(EEXIST); 1223 } 1224 #endif 1225 1226 #ifdef FLOWTABLE 1227 rt0 = NULL; 1228 /* "flow-table" only supports IPv6 and IPv4 at the moment. */ 1229 switch (dst->sa_family) { 1230 #ifdef INET6 1231 case AF_INET6: 1232 #endif 1233 #ifdef INET 1234 case AF_INET: 1235 #endif 1236 #if defined(INET6) || defined(INET) 1237 rn = rnh->rnh_matchaddr(dst, rnh); 1238 if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { 1239 struct sockaddr *mask; 1240 u_char *m, *n; 1241 int len; 1242 1243 /* 1244 * compare mask to see if the new route is 1245 * more specific than the existing one 1246 */ 1247 rt0 = RNTORT(rn); 1248 RT_LOCK(rt0); 1249 RT_ADDREF(rt0); 1250 RT_UNLOCK(rt0); 1251 /* 1252 * A host route is already present, so 1253 * leave the flow-table entries as is. 1254 */ 1255 if (rt0->rt_flags & RTF_HOST) { 1256 RTFREE(rt0); 1257 rt0 = NULL; 1258 } else if (!(flags & RTF_HOST) && netmask) { 1259 mask = rt_mask(rt0); 1260 len = mask->sa_len; 1261 m = (u_char *)mask; 1262 n = (u_char *)netmask; 1263 while (len-- > 0) { 1264 if (*n != *m) 1265 break; 1266 n++; 1267 m++; 1268 } 1269 if (len == 0 || (*n < *m)) { 1270 RTFREE(rt0); 1271 rt0 = NULL; 1272 } 1273 } 1274 } 1275 #endif/* INET6 || INET */ 1276 } 1277 #endif /* FLOWTABLE */ 1278 1279 /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ 1280 rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes); 1281 /* 1282 * If it still failed to go into the tree, 1283 * then un-make it (this should be a function) 1284 */ 1285 if (rn == NULL) { 1286 ifa_free(rt->rt_ifa); 1287 Free(rt_key(rt)); 1288 RT_LOCK_DESTROY(rt); 1289 uma_zfree(V_rtzone, rt); 1290 #ifdef FLOWTABLE 1291 if (rt0 != NULL) 1292 RTFREE(rt0); 1293 #endif 1294 senderr(EEXIST); 1295 } 1296 #ifdef FLOWTABLE 1297 else if (rt0 != NULL) { 1298 switch (dst->sa_family) { 1299 #ifdef INET6 1300 case AF_INET6: 1301 flowtable_route_flush(V_ip6_ft, rt0); 1302 break; 1303 #endif 1304 #ifdef INET 1305 case AF_INET: 1306 flowtable_route_flush(V_ip_ft, rt0); 1307 break; 1308 #endif 1309 } 1310 RTFREE(rt0); 1311 } 1312 #endif 1313 1314 /* 1315 * If this protocol has something to add to this then 1316 * allow it to do that as well. 1317 */ 1318 if (ifa->ifa_rtrequest) 1319 ifa->ifa_rtrequest(req, rt, info); 1320 1321 /* 1322 * actually return a resultant rtentry and 1323 * give the caller a single reference. 1324 */ 1325 if (ret_nrt) { 1326 *ret_nrt = rt; 1327 RT_ADDREF(rt); 1328 } 1329 RT_UNLOCK(rt); 1330 break; 1331 default: 1332 error = EOPNOTSUPP; 1333 } 1334 bad: 1335 if (needlock) 1336 RADIX_NODE_HEAD_UNLOCK(rnh); 1337 return (error); 1338 #undef senderr 1339 } 1340 1341 #undef dst 1342 #undef gateway 1343 #undef netmask 1344 #undef ifaaddr 1345 #undef ifpaddr 1346 #undef flags 1347 1348 int 1349 rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) 1350 { 1351 /* XXX dst may be overwritten, can we move this to below */ 1352 int dlen = SA_SIZE(dst), glen = SA_SIZE(gate); 1353 #ifdef INVARIANTS 1354 struct radix_node_head *rnh; 1355 1356 rnh = rt_tables_get_rnh(rt->rt_fibnum, dst->sa_family); 1357 #endif 1358 1359 RT_LOCK_ASSERT(rt); 1360 RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 1361 1362 /* 1363 * Prepare to store the gateway in rt->rt_gateway. 1364 * Both dst and gateway are stored one after the other in the same 1365 * malloc'd chunk. If we have room, we can reuse the old buffer, 1366 * rt_gateway already points to the right place. 1367 * Otherwise, malloc a new block and update the 'dst' address. 1368 */ 1369 if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) { 1370 caddr_t new; 1371 1372 R_Malloc(new, caddr_t, dlen + glen); 1373 if (new == NULL) 1374 return ENOBUFS; 1375 /* 1376 * XXX note, we copy from *dst and not *rt_key(rt) because 1377 * rt_setgate() can be called to initialize a newly 1378 * allocated route entry, in which case rt_key(rt) == NULL 1379 * (and also rt->rt_gateway == NULL). 1380 * Free()/free() handle a NULL argument just fine. 1381 */ 1382 bcopy(dst, new, dlen); 1383 Free(rt_key(rt)); /* free old block, if any */ 1384 rt_key(rt) = (struct sockaddr *)new; 1385 rt->rt_gateway = (struct sockaddr *)(new + dlen); 1386 } 1387 1388 /* 1389 * Copy the new gateway value into the memory chunk. 1390 */ 1391 bcopy(gate, rt->rt_gateway, glen); 1392 1393 return (0); 1394 } 1395 1396 void 1397 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask) 1398 { 1399 register u_char *cp1 = (u_char *)src; 1400 register u_char *cp2 = (u_char *)dst; 1401 register u_char *cp3 = (u_char *)netmask; 1402 u_char *cplim = cp2 + *cp3; 1403 u_char *cplim2 = cp2 + *cp1; 1404 1405 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ 1406 cp3 += 2; 1407 if (cplim > cplim2) 1408 cplim = cplim2; 1409 while (cp2 < cplim) 1410 *cp2++ = *cp1++ & *cp3++; 1411 if (cp2 < cplim2) 1412 bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); 1413 } 1414 1415 /* 1416 * Set up a routing table entry, normally 1417 * for an interface. 1418 */ 1419 #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */ 1420 static inline int 1421 rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) 1422 { 1423 struct sockaddr *dst; 1424 struct sockaddr *netmask; 1425 struct rtentry *rt = NULL; 1426 struct rt_addrinfo info; 1427 int error = 0; 1428 int startfib, endfib; 1429 char tempbuf[_SOCKADDR_TMPSIZE]; 1430 int didwork = 0; 1431 int a_failure = 0; 1432 static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; 1433 1434 if (flags & RTF_HOST) { 1435 dst = ifa->ifa_dstaddr; 1436 netmask = NULL; 1437 } else { 1438 dst = ifa->ifa_addr; 1439 netmask = ifa->ifa_netmask; 1440 } 1441 if (dst->sa_len == 0) 1442 return(EINVAL); 1443 switch (dst->sa_family) { 1444 case AF_INET6: 1445 case AF_INET: 1446 /* We support multiple FIBs. */ 1447 break; 1448 default: 1449 fibnum = RT_DEFAULT_FIB; 1450 break; 1451 } 1452 if (fibnum == -1) { 1453 if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) { 1454 startfib = endfib = curthread->td_proc->p_fibnum; 1455 } else { 1456 startfib = 0; 1457 endfib = rt_numfibs - 1; 1458 } 1459 } else { 1460 KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum")); 1461 startfib = fibnum; 1462 endfib = fibnum; 1463 } 1464 1465 /* 1466 * If it's a delete, check that if it exists, 1467 * it's on the correct interface or we might scrub 1468 * a route to another ifa which would 1469 * be confusing at best and possibly worse. 1470 */ 1471 if (cmd == RTM_DELETE) { 1472 /* 1473 * It's a delete, so it should already exist.. 1474 * If it's a net, mask off the host bits 1475 * (Assuming we have a mask) 1476 * XXX this is kinda inet specific.. 1477 */ 1478 if (netmask != NULL) { 1479 rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask); 1480 dst = (struct sockaddr *)tempbuf; 1481 } 1482 } 1483 /* 1484 * Now go through all the requested tables (fibs) and do the 1485 * requested action. Realistically, this will either be fib 0 1486 * for protocols that don't do multiple tables or all the 1487 * tables for those that do. 1488 */ 1489 for ( fibnum = startfib; fibnum <= endfib; fibnum++) { 1490 if (cmd == RTM_DELETE) { 1491 struct radix_node_head *rnh; 1492 struct radix_node *rn; 1493 /* 1494 * Look up an rtentry that is in the routing tree and 1495 * contains the correct info. 1496 */ 1497 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 1498 if (rnh == NULL) 1499 /* this table doesn't exist but others might */ 1500 continue; 1501 RADIX_NODE_HEAD_LOCK(rnh); 1502 #ifdef RADIX_MPATH 1503 if (rn_mpath_capable(rnh)) { 1504 1505 rn = rnh->rnh_matchaddr(dst, rnh); 1506 if (rn == NULL) 1507 error = ESRCH; 1508 else { 1509 rt = RNTORT(rn); 1510 /* 1511 * for interface route the 1512 * rt->rt_gateway is sockaddr_intf 1513 * for cloning ARP entries, so 1514 * rt_mpath_matchgate must use the 1515 * interface address 1516 */ 1517 rt = rt_mpath_matchgate(rt, 1518 ifa->ifa_addr); 1519 if (!rt) 1520 error = ESRCH; 1521 } 1522 } 1523 else 1524 #endif 1525 rn = rnh->rnh_lookup(dst, netmask, rnh); 1526 error = (rn == NULL || 1527 (rn->rn_flags & RNF_ROOT) || 1528 RNTORT(rn)->rt_ifa != ifa || 1529 !sa_equal((struct sockaddr *)rn->rn_key, dst)); 1530 RADIX_NODE_HEAD_UNLOCK(rnh); 1531 if (error) { 1532 /* this is only an error if bad on ALL tables */ 1533 continue; 1534 } 1535 } 1536 /* 1537 * Do the actual request 1538 */ 1539 bzero((caddr_t)&info, sizeof(info)); 1540 info.rti_ifa = ifa; 1541 info.rti_flags = flags | (ifa->ifa_flags & ~IFA_RTSELF); 1542 info.rti_info[RTAX_DST] = dst; 1543 /* 1544 * doing this for compatibility reasons 1545 */ 1546 if (cmd == RTM_ADD) 1547 info.rti_info[RTAX_GATEWAY] = 1548 (struct sockaddr *)&null_sdl; 1549 else 1550 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1551 info.rti_info[RTAX_NETMASK] = netmask; 1552 error = rtrequest1_fib(cmd, &info, &rt, fibnum); 1553 if (error == 0 && rt != NULL) { 1554 /* 1555 * notify any listening routing agents of the change 1556 */ 1557 RT_LOCK(rt); 1558 #ifdef RADIX_MPATH 1559 /* 1560 * in case address alias finds the first address 1561 * e.g. ifconfig bge0 192.0.2.246/24 1562 * e.g. ifconfig bge0 192.0.2.247/24 1563 * the address set in the route is 192.0.2.246 1564 * so we need to replace it with 192.0.2.247 1565 */ 1566 if (memcmp(rt->rt_ifa->ifa_addr, 1567 ifa->ifa_addr, ifa->ifa_addr->sa_len)) { 1568 ifa_free(rt->rt_ifa); 1569 ifa_ref(ifa); 1570 rt->rt_ifp = ifa->ifa_ifp; 1571 rt->rt_ifa = ifa; 1572 } 1573 #endif 1574 /* 1575 * doing this for compatibility reasons 1576 */ 1577 if (cmd == RTM_ADD) { 1578 ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type = 1579 rt->rt_ifp->if_type; 1580 ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index = 1581 rt->rt_ifp->if_index; 1582 } 1583 RT_ADDREF(rt); 1584 RT_UNLOCK(rt); 1585 rt_newaddrmsg_fib(cmd, ifa, error, rt, fibnum); 1586 RT_LOCK(rt); 1587 RT_REMREF(rt); 1588 if (cmd == RTM_DELETE) { 1589 /* 1590 * If we are deleting, and we found an entry, 1591 * then it's been removed from the tree.. 1592 * now throw it away. 1593 */ 1594 RTFREE_LOCKED(rt); 1595 } else { 1596 if (cmd == RTM_ADD) { 1597 /* 1598 * We just wanted to add it.. 1599 * we don't actually need a reference. 1600 */ 1601 RT_REMREF(rt); 1602 } 1603 RT_UNLOCK(rt); 1604 } 1605 didwork = 1; 1606 } 1607 if (error) 1608 a_failure = error; 1609 } 1610 if (cmd == RTM_DELETE) { 1611 if (didwork) { 1612 error = 0; 1613 } else { 1614 /* we only give an error if it wasn't in any table */ 1615 error = ((flags & RTF_HOST) ? 1616 EHOSTUNREACH : ENETUNREACH); 1617 } 1618 } else { 1619 if (a_failure) { 1620 /* return an error if any of them failed */ 1621 error = a_failure; 1622 } 1623 } 1624 return (error); 1625 } 1626 1627 #ifndef BURN_BRIDGES 1628 /* special one for inet internal use. may not use. */ 1629 int 1630 rtinit_fib(struct ifaddr *ifa, int cmd, int flags) 1631 { 1632 return (rtinit1(ifa, cmd, flags, -1)); 1633 } 1634 #endif 1635 1636 /* 1637 * Set up a routing table entry, normally 1638 * for an interface. 1639 */ 1640 int 1641 rtinit(struct ifaddr *ifa, int cmd, int flags) 1642 { 1643 struct sockaddr *dst; 1644 int fib = RT_DEFAULT_FIB; 1645 1646 if (flags & RTF_HOST) { 1647 dst = ifa->ifa_dstaddr; 1648 } else { 1649 dst = ifa->ifa_addr; 1650 } 1651 1652 switch (dst->sa_family) { 1653 case AF_INET6: 1654 case AF_INET: 1655 /* We do support multiple FIBs. */ 1656 fib = -1; 1657 break; 1658 } 1659 return (rtinit1(ifa, cmd, flags, fib)); 1660 } 1661