1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 /* 32 * This file contains routines that manipulate Internet Routing Entries (IREs). 33 */ 34 #include <sys/types.h> 35 #include <sys/stream.h> 36 #include <sys/stropts.h> 37 #include <sys/ddi.h> 38 #include <sys/cmn_err.h> 39 40 #include <sys/systm.h> 41 #include <sys/param.h> 42 #include <sys/socket.h> 43 #include <net/if.h> 44 #include <net/route.h> 45 #include <netinet/in.h> 46 #include <net/if_dl.h> 47 #include <netinet/ip6.h> 48 #include <netinet/icmp6.h> 49 50 #include <inet/common.h> 51 #include <inet/mi.h> 52 #include <inet/ip.h> 53 #include <inet/ip6.h> 54 #include <inet/ip_ndp.h> 55 #include <inet/ip_if.h> 56 #include <inet/ip_ire.h> 57 #include <inet/ipclassifier.h> 58 #include <inet/nd.h> 59 #include <sys/kmem.h> 60 #include <sys/zone.h> 61 62 #include <sys/tsol/label.h> 63 #include <sys/tsol/tnet.h> 64 65 static ire_t ire_null; 66 67 static ire_t *ire_ihandle_lookup_onlink_v6(ire_t *cire); 68 static boolean_t ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, 69 const in6_addr_t *mask, const in6_addr_t *gateway, int type, 70 const ipif_t *ipif, zoneid_t zoneid, uint32_t ihandle, 71 const ts_label_t *tsl, int match_flags); 72 static ire_t *ire_init_v6(ire_t *, const in6_addr_t *, const in6_addr_t *, 73 const in6_addr_t *, const in6_addr_t *, uint_t *, queue_t *, queue_t *, 74 ushort_t, ipif_t *, const in6_addr_t *, uint32_t, uint32_t, uint_t, 75 const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *); 76 77 78 /* 79 * Initialize the ire that is specific to IPv6 part and call 80 * ire_init_common to finish it. 81 */ 82 static ire_t * 83 ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, const in6_addr_t *v6mask, 84 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 85 uint_t *max_fragp, queue_t *rfq, queue_t *stq, ushort_t type, 86 ipif_t *ipif, const in6_addr_t *v6cmask, uint32_t phandle, 87 uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, tsol_gc_t *gc, 88 tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 89 { 90 91 /* 92 * Reject IRE security attribute creation/initialization 93 * if system is not running in Trusted mode. 94 */ 95 if ((gc != NULL || gcgrp != NULL) && !is_system_labeled()) 96 return (NULL); 97 98 99 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_alloced); 100 ire->ire_addr_v6 = *v6addr; 101 102 if (v6src_addr != NULL) 103 ire->ire_src_addr_v6 = *v6src_addr; 104 if (v6mask != NULL) { 105 ire->ire_mask_v6 = *v6mask; 106 ire->ire_masklen = ip_mask_to_plen_v6(&ire->ire_mask_v6); 107 } 108 if (v6gateway != NULL) 109 ire->ire_gateway_addr_v6 = *v6gateway; 110 111 if (type == IRE_CACHE && v6cmask != NULL) 112 ire->ire_cmask_v6 = *v6cmask; 113 114 /* 115 * Multirouted packets need to have a fragment header added so that 116 * the receiver is able to discard duplicates according to their 117 * fragment identifier. 118 */ 119 if (type == IRE_CACHE && (flags & RTF_MULTIRT)) { 120 ire->ire_frag_flag = IPH_FRAG_HDR; 121 } 122 123 /* ire_init_common will free the mblks upon encountering any failure */ 124 if (!ire_init_common(ire, max_fragp, NULL, rfq, stq, type, ipif, 125 phandle, ihandle, flags, IPV6_VERSION, ulp_info, gc, gcgrp, ipst)) 126 return (NULL); 127 128 return (ire); 129 } 130 131 /* 132 * Similar to ire_create_v6 except that it is called only when 133 * we want to allocate ire as an mblk e.g. we have a external 134 * resolver. Do we need this in IPv6 ? 135 * 136 * IPv6 initializes the ire_nce in ire_add_v6, which expects to 137 * find the ire_nce to be null when it is called. So, although 138 * we have a src_nce parameter (in the interest of matching up with 139 * the argument list of the v4 version), we ignore the src_nce 140 * argument here. 141 */ 142 /* ARGSUSED */ 143 ire_t * 144 ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 145 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 146 nce_t *src_nce, queue_t *rfq, queue_t *stq, ushort_t type, 147 ipif_t *ipif, const in6_addr_t *v6cmask, 148 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 149 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 150 { 151 ire_t *ire; 152 ire_t *ret_ire; 153 mblk_t *mp; 154 155 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 156 157 /* Allocate the new IRE. */ 158 mp = allocb(sizeof (ire_t), BPRI_MED); 159 if (mp == NULL) { 160 ip1dbg(("ire_create_mp_v6: alloc failed\n")); 161 return (NULL); 162 } 163 164 ire = (ire_t *)mp->b_rptr; 165 mp->b_wptr = (uchar_t *)&ire[1]; 166 167 /* Start clean. */ 168 *ire = ire_null; 169 ire->ire_mp = mp; 170 mp->b_datap->db_type = IRE_DB_TYPE; 171 172 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 173 NULL, rfq, stq, type, ipif, v6cmask, phandle, 174 ihandle, flags, ulp_info, gc, gcgrp, ipst); 175 176 if (ret_ire == NULL) { 177 freeb(ire->ire_mp); 178 return (NULL); 179 } 180 return (ire); 181 } 182 183 /* 184 * ire_create_v6 is called to allocate and initialize a new IRE. 185 * 186 * NOTE : This is called as writer sometimes though not required 187 * by this function. 188 * 189 * See comments above ire_create_mp_v6() for the rationale behind the 190 * unused src_nce argument. 191 */ 192 /* ARGSUSED */ 193 ire_t * 194 ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 195 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 196 uint_t *max_fragp, nce_t *src_nce, queue_t *rfq, queue_t *stq, 197 ushort_t type, ipif_t *ipif, const in6_addr_t *v6cmask, 198 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 199 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 200 { 201 ire_t *ire; 202 ire_t *ret_ire; 203 204 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 205 206 ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP); 207 if (ire == NULL) { 208 ip1dbg(("ire_create_v6: alloc failed\n")); 209 return (NULL); 210 } 211 *ire = ire_null; 212 213 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 214 max_fragp, rfq, stq, type, ipif, v6cmask, phandle, 215 ihandle, flags, ulp_info, gc, gcgrp, ipst); 216 217 if (ret_ire == NULL) { 218 kmem_cache_free(ire_cache, ire); 219 return (NULL); 220 } 221 ASSERT(ret_ire == ire); 222 return (ire); 223 } 224 225 /* 226 * Find an IRE_INTERFACE for the multicast group. 227 * Allows different routes for multicast addresses 228 * in the unicast routing table (akin to FF::0/8 but could be more specific) 229 * which point at different interfaces. This is used when IPV6_MULTICAST_IF 230 * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't 231 * specify the interface to join on. 232 * 233 * Supports link-local addresses by following the ipif/ill when recursing. 234 */ 235 ire_t * 236 ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst) 237 { 238 ire_t *ire; 239 ipif_t *ipif = NULL; 240 int match_flags = MATCH_IRE_TYPE; 241 in6_addr_t gw_addr_v6; 242 243 ire = ire_ftable_lookup_v6(group, 0, 0, 0, NULL, NULL, 244 zoneid, 0, NULL, MATCH_IRE_DEFAULT, ipst); 245 246 /* We search a resolvable ire in case of multirouting. */ 247 if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) { 248 ire_t *cire = NULL; 249 /* 250 * If the route is not resolvable, the looked up ire 251 * may be changed here. In that case, ire_multirt_lookup() 252 * IRE_REFRELE the original ire and change it. 253 */ 254 (void) ire_multirt_lookup_v6(&cire, &ire, MULTIRT_CACHEGW, 255 NULL, ipst); 256 if (cire != NULL) 257 ire_refrele(cire); 258 } 259 if (ire == NULL) 260 return (NULL); 261 /* 262 * Make sure we follow ire_ipif. 263 * 264 * We need to determine the interface route through 265 * which the gateway will be reached. We don't really 266 * care which interface is picked if the interface is 267 * part of a group. 268 */ 269 if (ire->ire_ipif != NULL) { 270 ipif = ire->ire_ipif; 271 match_flags |= MATCH_IRE_ILL_GROUP; 272 } 273 274 switch (ire->ire_type) { 275 case IRE_DEFAULT: 276 case IRE_PREFIX: 277 case IRE_HOST: 278 mutex_enter(&ire->ire_lock); 279 gw_addr_v6 = ire->ire_gateway_addr_v6; 280 mutex_exit(&ire->ire_lock); 281 ire_refrele(ire); 282 ire = ire_ftable_lookup_v6(&gw_addr_v6, 0, 0, 283 IRE_INTERFACE, ipif, NULL, zoneid, 0, 284 NULL, match_flags, ipst); 285 return (ire); 286 case IRE_IF_NORESOLVER: 287 case IRE_IF_RESOLVER: 288 return (ire); 289 default: 290 ire_refrele(ire); 291 return (NULL); 292 } 293 } 294 295 /* 296 * Return any local address. We use this to target ourselves 297 * when the src address was specified as 'default'. 298 * Preference for IRE_LOCAL entries. 299 */ 300 ire_t * 301 ire_lookup_local_v6(zoneid_t zoneid, ip_stack_t *ipst) 302 { 303 ire_t *ire; 304 irb_t *irb; 305 ire_t *maybe = NULL; 306 int i; 307 308 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 309 irb = &ipst->ips_ip_cache_table_v6[i]; 310 if (irb->irb_ire == NULL) 311 continue; 312 rw_enter(&irb->irb_lock, RW_READER); 313 for (ire = irb->irb_ire; ire; ire = ire->ire_next) { 314 if ((ire->ire_marks & IRE_MARK_CONDEMNED) || 315 ire->ire_zoneid != zoneid && 316 ire->ire_zoneid != ALL_ZONES) 317 continue; 318 switch (ire->ire_type) { 319 case IRE_LOOPBACK: 320 if (maybe == NULL) { 321 IRE_REFHOLD(ire); 322 maybe = ire; 323 } 324 break; 325 case IRE_LOCAL: 326 if (maybe != NULL) { 327 ire_refrele(maybe); 328 } 329 IRE_REFHOLD(ire); 330 rw_exit(&irb->irb_lock); 331 return (ire); 332 } 333 } 334 rw_exit(&irb->irb_lock); 335 } 336 return (maybe); 337 } 338 339 /* 340 * This function takes a mask and returns number of bits set in the 341 * mask (the represented prefix length). Assumes a contiguous mask. 342 */ 343 int 344 ip_mask_to_plen_v6(const in6_addr_t *v6mask) 345 { 346 int bits; 347 int plen = IPV6_ABITS; 348 int i; 349 350 for (i = 3; i >= 0; i--) { 351 if (v6mask->s6_addr32[i] == 0) { 352 plen -= 32; 353 continue; 354 } 355 bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1; 356 if (bits == 0) 357 break; 358 plen -= bits; 359 } 360 361 return (plen); 362 } 363 364 /* 365 * Convert a prefix length to the mask for that prefix. 366 * Returns the argument bitmask. 367 */ 368 in6_addr_t * 369 ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask) 370 { 371 uint32_t *ptr; 372 373 if (plen < 0 || plen > IPV6_ABITS) 374 return (NULL); 375 *bitmask = ipv6_all_zeros; 376 377 ptr = (uint32_t *)bitmask; 378 while (plen > 32) { 379 *ptr++ = 0xffffffffU; 380 plen -= 32; 381 } 382 *ptr = htonl(0xffffffffU << (32 - plen)); 383 return (bitmask); 384 } 385 386 /* 387 * Add a fully initialized IRE to an appropriate 388 * table based on ire_type. 389 * 390 * The forward table contains IRE_PREFIX/IRE_HOST/IRE_HOST and 391 * IRE_IF_RESOLVER/IRE_IF_NORESOLVER and IRE_DEFAULT. 392 * 393 * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK 394 * and IRE_CACHE. 395 * 396 * NOTE : This function is called as writer though not required 397 * by this function. 398 */ 399 int 400 ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) 401 { 402 ire_t *ire1; 403 int mask_table_index; 404 irb_t *irb_ptr; 405 ire_t **irep; 406 int flags; 407 ire_t *pire = NULL; 408 ill_t *stq_ill; 409 boolean_t ndp_g_lock_held = B_FALSE; 410 ire_t *ire = *ire_p; 411 int error; 412 ip_stack_t *ipst = ire->ire_ipst; 413 414 ASSERT(ire->ire_ipversion == IPV6_VERSION); 415 ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */ 416 ASSERT(ire->ire_nce == NULL); 417 418 /* Find the appropriate list head. */ 419 switch (ire->ire_type) { 420 case IRE_HOST: 421 ire->ire_mask_v6 = ipv6_all_ones; 422 ire->ire_masklen = IPV6_ABITS; 423 if ((ire->ire_flags & RTF_SETSRC) == 0) 424 ire->ire_src_addr_v6 = ipv6_all_zeros; 425 break; 426 case IRE_CACHE: 427 case IRE_LOCAL: 428 case IRE_LOOPBACK: 429 ire->ire_mask_v6 = ipv6_all_ones; 430 ire->ire_masklen = IPV6_ABITS; 431 break; 432 case IRE_PREFIX: 433 if ((ire->ire_flags & RTF_SETSRC) == 0) 434 ire->ire_src_addr_v6 = ipv6_all_zeros; 435 break; 436 case IRE_DEFAULT: 437 if ((ire->ire_flags & RTF_SETSRC) == 0) 438 ire->ire_src_addr_v6 = ipv6_all_zeros; 439 break; 440 case IRE_IF_RESOLVER: 441 case IRE_IF_NORESOLVER: 442 break; 443 default: 444 printf("ire_add_v6: ire %p has unrecognized IRE type (%d)\n", 445 (void *)ire, ire->ire_type); 446 ire_delete(ire); 447 *ire_p = NULL; 448 return (EINVAL); 449 } 450 451 /* Make sure the address is properly masked. */ 452 V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6); 453 454 if ((ire->ire_type & IRE_CACHETABLE) == 0) { 455 /* IRE goes into Forward Table */ 456 mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6); 457 if ((ipst->ips_ip_forwarding_table_v6[mask_table_index]) == 458 NULL) { 459 irb_t *ptr; 460 int i; 461 462 ptr = (irb_t *)mi_zalloc(( 463 ipst->ips_ip6_ftable_hash_size * sizeof (irb_t))); 464 if (ptr == NULL) { 465 ire_delete(ire); 466 *ire_p = NULL; 467 return (ENOMEM); 468 } 469 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) { 470 rw_init(&ptr[i].irb_lock, NULL, 471 RW_DEFAULT, NULL); 472 } 473 mutex_enter(&ipst->ips_ire_ft_init_lock); 474 if (ipst->ips_ip_forwarding_table_v6[ 475 mask_table_index] == NULL) { 476 ipst->ips_ip_forwarding_table_v6[ 477 mask_table_index] = ptr; 478 mutex_exit(&ipst->ips_ire_ft_init_lock); 479 } else { 480 /* 481 * Some other thread won the race in 482 * initializing the forwarding table at the 483 * same index. 484 */ 485 mutex_exit(&ipst->ips_ire_ft_init_lock); 486 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; 487 i++) { 488 rw_destroy(&ptr[i].irb_lock); 489 } 490 mi_free(ptr); 491 } 492 } 493 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[mask_table_index][ 494 IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6, 495 ipst->ips_ip6_ftable_hash_size)]); 496 } else { 497 irb_ptr = &(ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6( 498 ire->ire_addr_v6, ipst->ips_ip6_cache_table_size)]); 499 } 500 /* 501 * For xresolv interfaces (v6 interfaces with an external 502 * address resolver), ip_newroute_v6/ip_newroute_ipif_v6 503 * are unable to prevent the deletion of the interface route 504 * while adding an IRE_CACHE for an on-link destination 505 * in the IRE_IF_RESOLVER case, since the ire has to go to 506 * the external resolver and return. We can't do a REFHOLD on the 507 * associated interface ire for fear of the message being freed 508 * if the external resolver can't resolve the address. 509 * Here we look up the interface ire in the forwarding table 510 * and make sure that the interface route has not been deleted. 511 */ 512 if (ire->ire_type == IRE_CACHE && 513 IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6) && 514 (((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) && 515 (((ill_t *)ire->ire_stq->q_ptr)->ill_flags & ILLF_XRESOLV)) { 516 517 pire = ire_ihandle_lookup_onlink_v6(ire); 518 if (pire == NULL) { 519 ire_delete(ire); 520 *ire_p = NULL; 521 return (EINVAL); 522 } 523 /* Prevent pire from getting deleted */ 524 IRB_REFHOLD(pire->ire_bucket); 525 /* Has it been removed already? */ 526 if (pire->ire_marks & IRE_MARK_CONDEMNED) { 527 IRB_REFRELE(pire->ire_bucket); 528 ire_refrele(pire); 529 ire_delete(ire); 530 *ire_p = NULL; 531 return (EINVAL); 532 } 533 } 534 535 flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW); 536 /* 537 * For IRE_CACHES, MATCH_IRE_IPIF is not enough to check 538 * for duplicates because : 539 * 540 * 1) ire_ipif->ipif_ill and ire_stq->q_ptr could be 541 * pointing at different ills. A real duplicate is 542 * a match on both ire_ipif and ire_stq. 543 * 544 * 2) We could have multiple packets trying to create 545 * an IRE_CACHE for the same ill. 546 * 547 * Moreover, IPIF_NOFAILOVER and IPV6_BOUND_PIF endpoints wants 548 * to go out on a particular ill. Rather than looking at the 549 * packet, we depend on the above for MATCH_IRE_ILL here. 550 * 551 * Unlike IPv4, MATCH_IRE_IPIF is needed here as we could have 552 * multiple IRE_CACHES for an ill for the same destination 553 * with various scoped addresses i.e represented by ipifs. 554 * 555 * MATCH_IRE_ILL is done implicitly below for IRE_CACHES. 556 */ 557 if (ire->ire_ipif != NULL) 558 flags |= MATCH_IRE_IPIF; 559 /* 560 * If we are creating hidden ires, make sure we search on 561 * this ill (MATCH_IRE_ILL) and a hidden ire, while we are 562 * searching for duplicates below. Otherwise we could 563 * potentially find an IRE on some other interface 564 * and it may not be a IRE marked with IRE_MARK_HIDDEN. We 565 * shouldn't do this as this will lead to an infinite loop as 566 * eventually we need an hidden ire for this packet to go 567 * out. MATCH_IRE_ILL is already marked above. 568 */ 569 if (ire->ire_marks & IRE_MARK_HIDDEN) { 570 ASSERT(ire->ire_type == IRE_CACHE); 571 flags |= MATCH_IRE_MARK_HIDDEN; 572 } 573 574 /* 575 * Start the atomic add of the ire. Grab the ill locks, 576 * ill_g_usesrc_lock and the bucket lock. Check for condemned. 577 * To avoid lock order problems, get the ndp6.ndp_g_lock now itself. 578 */ 579 if (ire->ire_type == IRE_CACHE) { 580 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 581 ndp_g_lock_held = B_TRUE; 582 } 583 584 /* 585 * If ipif or ill is changing ire_atomic_start() may queue the 586 * request and return EINPROGRESS. 587 */ 588 589 error = ire_atomic_start(irb_ptr, ire, q, mp, func); 590 if (error != 0) { 591 if (ndp_g_lock_held) 592 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 593 /* 594 * We don't know whether it is a valid ipif or not. 595 * So, set it to NULL. This assumes that the ire has not added 596 * a reference to the ipif. 597 */ 598 ire->ire_ipif = NULL; 599 ire_delete(ire); 600 if (pire != NULL) { 601 IRB_REFRELE(pire->ire_bucket); 602 ire_refrele(pire); 603 } 604 *ire_p = NULL; 605 return (error); 606 } 607 /* 608 * To avoid creating ires having stale values for the ire_max_frag 609 * we get the latest value atomically here. For more details 610 * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE 611 * in ip_rput_dlpi_writer 612 */ 613 if (ire->ire_max_fragp == NULL) { 614 if (IN6_IS_ADDR_MULTICAST(&ire->ire_addr_v6)) 615 ire->ire_max_frag = ire->ire_ipif->ipif_mtu; 616 else 617 ire->ire_max_frag = pire->ire_max_frag; 618 } else { 619 uint_t max_frag; 620 621 max_frag = *ire->ire_max_fragp; 622 ire->ire_max_fragp = NULL; 623 ire->ire_max_frag = max_frag; 624 } 625 626 /* 627 * Atomically check for duplicate and insert in the table. 628 */ 629 for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 630 if (ire1->ire_marks & IRE_MARK_CONDEMNED) 631 continue; 632 633 if (ire->ire_type == IRE_CACHE) { 634 /* 635 * We do MATCH_IRE_ILL implicitly here for IRE_CACHES. 636 * As ire_ipif and ire_stq could point to two 637 * different ills, we can't pass just ire_ipif to 638 * ire_match_args and get a match on both ills. 639 * This is just needed for duplicate checks here and 640 * so we don't add an extra argument to 641 * ire_match_args for this. Do it locally. 642 * 643 * NOTE : Currently there is no part of the code 644 * that asks for both MATH_IRE_IPIF and MATCH_IRE_ILL 645 * match for IRE_CACHEs. Thus we don't want to 646 * extend the arguments to ire_match_args_v6. 647 */ 648 if (ire1->ire_stq != ire->ire_stq) 649 continue; 650 /* 651 * Multiroute IRE_CACHEs for a given destination can 652 * have the same ire_ipif, typically if their source 653 * address is forced using RTF_SETSRC, and the same 654 * send-to queue. We differentiate them using the parent 655 * handle. 656 */ 657 if ((ire1->ire_flags & RTF_MULTIRT) && 658 (ire->ire_flags & RTF_MULTIRT) && 659 (ire1->ire_phandle != ire->ire_phandle)) 660 continue; 661 } 662 if (ire1->ire_zoneid != ire->ire_zoneid) 663 continue; 664 if (ire_match_args_v6(ire1, &ire->ire_addr_v6, 665 &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, 666 ire->ire_type, ire->ire_ipif, ire->ire_zoneid, 0, NULL, 667 flags)) { 668 /* 669 * Return the old ire after doing a REFHOLD. 670 * As most of the callers continue to use the IRE 671 * after adding, we return a held ire. This will 672 * avoid a lookup in the caller again. If the callers 673 * don't want to use it, they need to do a REFRELE. 674 */ 675 ip1dbg(("found dup ire existing %p new %p", 676 (void *)ire1, (void *)ire)); 677 IRE_REFHOLD(ire1); 678 if (ndp_g_lock_held) 679 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 680 ire_atomic_end(irb_ptr, ire); 681 ire_delete(ire); 682 if (pire != NULL) { 683 /* 684 * Assert that it is 685 * not yet removed from the list. 686 */ 687 ASSERT(pire->ire_ptpn != NULL); 688 IRB_REFRELE(pire->ire_bucket); 689 ire_refrele(pire); 690 } 691 *ire_p = ire1; 692 return (0); 693 } 694 } 695 if (ire->ire_type == IRE_CACHE) { 696 in6_addr_t gw_addr_v6; 697 ill_t *ill = ire_to_ill(ire); 698 char buf[INET6_ADDRSTRLEN]; 699 nce_t *nce; 700 701 /* 702 * All IRE_CACHE types must have a nce. If this is 703 * not the case the entry will not be added. We need 704 * to make sure that if somebody deletes the nce 705 * after we looked up, they will find this ire and 706 * delete the ire. To delete this ire one needs the 707 * bucket lock which we are still holding here. So, 708 * even if the nce gets deleted after we looked up, 709 * this ire will get deleted. 710 * 711 * NOTE : Don't need the ire_lock for accessing 712 * ire_gateway_addr_v6 as it is appearing first 713 * time on the list and rts_setgwr_v6 could not 714 * be changing this. 715 */ 716 gw_addr_v6 = ire->ire_gateway_addr_v6; 717 if (IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 718 nce = ndp_lookup_v6(ill, &ire->ire_addr_v6, B_TRUE); 719 } else { 720 nce = ndp_lookup_v6(ill, &gw_addr_v6, B_TRUE); 721 } 722 if (nce == NULL) 723 goto failed; 724 725 /* Pair of refhold, refrele just to get the tracing right */ 726 NCE_REFHOLD_TO_REFHOLD_NOTR(nce); 727 /* 728 * Atomically make sure that new IREs don't point 729 * to an NCE that is logically deleted (CONDEMNED). 730 * ndp_delete() first marks the NCE CONDEMNED. 731 * This ensures that the nce_refcnt won't increase 732 * due to new nce_lookups or due to addition of new IREs 733 * pointing to this NCE. Then ndp_delete() cleans up 734 * existing references. If we don't do it atomically here, 735 * ndp_delete() -> nce_ire_delete() will not be able to 736 * clean up the IRE list completely, and the nce_refcnt 737 * won't go down to zero. 738 */ 739 mutex_enter(&nce->nce_lock); 740 if (ill->ill_flags & ILLF_XRESOLV) { 741 /* 742 * If we used an external resolver, we may not 743 * have gone through neighbor discovery to get here. 744 * Must update the nce_state before the next check. 745 */ 746 if (nce->nce_state == ND_INCOMPLETE) 747 nce->nce_state = ND_REACHABLE; 748 } 749 if (nce->nce_state == ND_INCOMPLETE || 750 (nce->nce_flags & NCE_F_CONDEMNED) || 751 (nce->nce_state == ND_UNREACHABLE)) { 752 failed: 753 if (ndp_g_lock_held) 754 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 755 if (nce != NULL) 756 mutex_exit(&nce->nce_lock); 757 ire_atomic_end(irb_ptr, ire); 758 ip1dbg(("ire_add_v6: No nce for dst %s \n", 759 inet_ntop(AF_INET6, &ire->ire_addr_v6, 760 buf, sizeof (buf)))); 761 ire_delete(ire); 762 if (pire != NULL) { 763 /* 764 * Assert that it is 765 * not yet removed from the list. 766 */ 767 ASSERT(pire->ire_ptpn != NULL); 768 IRB_REFRELE(pire->ire_bucket); 769 ire_refrele(pire); 770 } 771 if (nce != NULL) 772 NCE_REFRELE_NOTR(nce); 773 *ire_p = NULL; 774 return (EINVAL); 775 } else { 776 ire->ire_nce = nce; 777 } 778 mutex_exit(&nce->nce_lock); 779 } 780 /* 781 * Find the first entry that matches ire_addr - provides 782 * tail insertion. *irep will be null if no match. 783 */ 784 irep = (ire_t **)irb_ptr; 785 while ((ire1 = *irep) != NULL && 786 !IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6)) 787 irep = &ire1->ire_next; 788 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 789 790 if (*irep != NULL) { 791 /* 792 * Find the last ire which matches ire_addr_v6. 793 * Needed to do tail insertion among entries with the same 794 * ire_addr_v6. 795 */ 796 while (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 797 &ire1->ire_addr_v6)) { 798 irep = &ire1->ire_next; 799 ire1 = *irep; 800 if (ire1 == NULL) 801 break; 802 } 803 } 804 805 if (ire->ire_type == IRE_DEFAULT) { 806 /* 807 * We keep a count of default gateways which is used when 808 * assigning them as routes. 809 */ 810 ipst->ips_ipv6_ire_default_count++; 811 ASSERT(ipst->ips_ipv6_ire_default_count != 0); /* Wraparound */ 812 } 813 /* Insert at *irep */ 814 ire1 = *irep; 815 if (ire1 != NULL) 816 ire1->ire_ptpn = &ire->ire_next; 817 ire->ire_next = ire1; 818 /* Link the new one in. */ 819 ire->ire_ptpn = irep; 820 /* 821 * ire_walk routines de-reference ire_next without holding 822 * a lock. Before we point to the new ire, we want to make 823 * sure the store that sets the ire_next of the new ire 824 * reaches global visibility, so that ire_walk routines 825 * don't see a truncated list of ires i.e if the ire_next 826 * of the new ire gets set after we do "*irep = ire" due 827 * to re-ordering, the ire_walk thread will see a NULL 828 * once it accesses the ire_next of the new ire. 829 * membar_producer() makes sure that the following store 830 * happens *after* all of the above stores. 831 */ 832 membar_producer(); 833 *irep = ire; 834 ire->ire_bucket = irb_ptr; 835 /* 836 * We return a bumped up IRE above. Keep it symmetrical 837 * so that the callers will always have to release. This 838 * helps the callers of this function because they continue 839 * to use the IRE after adding and hence they don't have to 840 * lookup again after we return the IRE. 841 * 842 * NOTE : We don't have to use atomics as this is appearing 843 * in the list for the first time and no one else can bump 844 * up the reference count on this yet. 845 */ 846 IRE_REFHOLD_LOCKED(ire); 847 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_inserted); 848 irb_ptr->irb_ire_cnt++; 849 if (ire->ire_marks & IRE_MARK_TEMPORARY) 850 irb_ptr->irb_tmp_ire_cnt++; 851 852 if (ire->ire_ipif != NULL) { 853 ire->ire_ipif->ipif_ire_cnt++; 854 if (ire->ire_stq != NULL) { 855 stq_ill = (ill_t *)ire->ire_stq->q_ptr; 856 stq_ill->ill_ire_cnt++; 857 } 858 } else { 859 ASSERT(ire->ire_stq == NULL); 860 } 861 862 if (ndp_g_lock_held) 863 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 864 ire_atomic_end(irb_ptr, ire); 865 866 if (pire != NULL) { 867 /* Assert that it is not removed from the list yet */ 868 ASSERT(pire->ire_ptpn != NULL); 869 IRB_REFRELE(pire->ire_bucket); 870 ire_refrele(pire); 871 } 872 873 if (ire->ire_type != IRE_CACHE) { 874 /* 875 * For ire's with with host mask see if there is an entry 876 * in the cache. If there is one flush the whole cache as 877 * there might be multiple entries due to RTF_MULTIRT (CGTP). 878 * If no entry is found than there is no need to flush the 879 * cache. 880 */ 881 882 if (ip_mask_to_plen_v6(&ire->ire_mask_v6) == IPV6_ABITS) { 883 ire_t *lire; 884 lire = ire_ctable_lookup_v6(&ire->ire_addr_v6, NULL, 885 IRE_CACHE, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, 886 ipst); 887 if (lire != NULL) { 888 ire_refrele(lire); 889 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 890 } 891 } else { 892 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 893 } 894 } 895 896 *ire_p = ire; 897 return (0); 898 } 899 900 /* 901 * Search for all HOST REDIRECT routes that are 902 * pointing at the specified gateway and 903 * delete them. This routine is called only 904 * when a default gateway is going away. 905 */ 906 static void 907 ire_delete_host_redirects_v6(const in6_addr_t *gateway, ip_stack_t *ipst) 908 { 909 irb_t *irb_ptr; 910 irb_t *irb; 911 ire_t *ire; 912 in6_addr_t gw_addr_v6; 913 int i; 914 915 /* get the hash table for HOST routes */ 916 irb_ptr = ipst->ips_ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)]; 917 if (irb_ptr == NULL) 918 return; 919 for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) { 920 irb = &irb_ptr[i]; 921 IRB_REFHOLD(irb); 922 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 923 if (!(ire->ire_flags & RTF_DYNAMIC)) 924 continue; 925 mutex_enter(&ire->ire_lock); 926 gw_addr_v6 = ire->ire_gateway_addr_v6; 927 mutex_exit(&ire->ire_lock); 928 if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) 929 ire_delete(ire); 930 } 931 IRB_REFRELE(irb); 932 } 933 } 934 935 /* 936 * Delete all the cache entries with this 'addr'. This is the IPv6 counterpart 937 * of ip_ire_clookup_and_delete. The difference being this function does not 938 * return any value. IPv6 processing of a gratuitous ARP, as it stands, is 939 * different than IPv4 in that, regardless of the presence of a cache entry 940 * for this address, an ire_walk_v6 is done. Another difference is that unlike 941 * in the case of IPv4 this does not take an ipif_t argument, since it is only 942 * called by ip_arp_news and the match is always only on the address. 943 */ 944 void 945 ip_ire_clookup_and_delete_v6(const in6_addr_t *addr, ip_stack_t *ipst) 946 { 947 irb_t *irb; 948 ire_t *cire; 949 boolean_t found = B_FALSE; 950 951 irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 952 ipst->ips_ip6_cache_table_size)]; 953 IRB_REFHOLD(irb); 954 for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) { 955 if (cire->ire_marks & IRE_MARK_CONDEMNED) 956 continue; 957 if (IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, addr)) { 958 959 /* This signifies start of a match */ 960 if (!found) 961 found = B_TRUE; 962 if (cire->ire_type == IRE_CACHE) { 963 if (cire->ire_nce != NULL) 964 ndp_delete(cire->ire_nce); 965 ire_delete_v6(cire); 966 } 967 /* End of the match */ 968 } else if (found) 969 break; 970 } 971 IRB_REFRELE(irb); 972 } 973 974 /* 975 * Delete the specified IRE. 976 * All calls should use ire_delete(). 977 * Sometimes called as writer though not required by this function. 978 * 979 * NOTE : This function is called only if the ire was added 980 * in the list. 981 */ 982 void 983 ire_delete_v6(ire_t *ire) 984 { 985 in6_addr_t gw_addr_v6; 986 ip_stack_t *ipst = ire->ire_ipst; 987 988 ASSERT(ire->ire_refcnt >= 1); 989 ASSERT(ire->ire_ipversion == IPV6_VERSION); 990 991 if (ire->ire_type != IRE_CACHE) 992 ire_flush_cache_v6(ire, IRE_FLUSH_DELETE); 993 if (ire->ire_type == IRE_DEFAULT) { 994 /* 995 * when a default gateway is going away 996 * delete all the host redirects pointing at that 997 * gateway. 998 */ 999 mutex_enter(&ire->ire_lock); 1000 gw_addr_v6 = ire->ire_gateway_addr_v6; 1001 mutex_exit(&ire->ire_lock); 1002 ire_delete_host_redirects_v6(&gw_addr_v6, ipst); 1003 } 1004 } 1005 1006 /* 1007 * ire_walk routine to delete all IRE_CACHE and IRE_HOST type redirect 1008 * entries. 1009 */ 1010 /*ARGSUSED1*/ 1011 void 1012 ire_delete_cache_v6(ire_t *ire, char *arg) 1013 { 1014 char addrstr1[INET6_ADDRSTRLEN]; 1015 char addrstr2[INET6_ADDRSTRLEN]; 1016 1017 if ((ire->ire_type & IRE_CACHE) || 1018 (ire->ire_flags & RTF_DYNAMIC)) { 1019 ip1dbg(("ire_delete_cache_v6: deleted %s type %d through %s\n", 1020 inet_ntop(AF_INET6, &ire->ire_addr_v6, 1021 addrstr1, sizeof (addrstr1)), 1022 ire->ire_type, 1023 inet_ntop(AF_INET6, &ire->ire_gateway_addr_v6, 1024 addrstr2, sizeof (addrstr2)))); 1025 ire_delete(ire); 1026 } 1027 1028 } 1029 1030 /* 1031 * ire_walk routine to delete all IRE_CACHE/IRE_HOST type redirect entries 1032 * that have a given gateway address. 1033 */ 1034 void 1035 ire_delete_cache_gw_v6(ire_t *ire, char *addr) 1036 { 1037 in6_addr_t *gw_addr = (in6_addr_t *)addr; 1038 char buf1[INET6_ADDRSTRLEN]; 1039 char buf2[INET6_ADDRSTRLEN]; 1040 in6_addr_t ire_gw_addr_v6; 1041 1042 if (!(ire->ire_type & IRE_CACHE) && 1043 !(ire->ire_flags & RTF_DYNAMIC)) 1044 return; 1045 1046 mutex_enter(&ire->ire_lock); 1047 ire_gw_addr_v6 = ire->ire_gateway_addr_v6; 1048 mutex_exit(&ire->ire_lock); 1049 1050 if (IN6_ARE_ADDR_EQUAL(&ire_gw_addr_v6, gw_addr)) { 1051 ip1dbg(("ire_delete_cache_gw_v6: deleted %s type %d to %s\n", 1052 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, 1053 buf1, sizeof (buf1)), 1054 ire->ire_type, 1055 inet_ntop(AF_INET6, &ire_gw_addr_v6, 1056 buf2, sizeof (buf2)))); 1057 ire_delete(ire); 1058 } 1059 } 1060 1061 /* 1062 * Remove all IRE_CACHE entries that match 1063 * the ire specified. (Sometimes called 1064 * as writer though not required by this function.) 1065 * 1066 * The flag argument indicates if the 1067 * flush request is due to addition 1068 * of new route (IRE_FLUSH_ADD) or deletion of old 1069 * route (IRE_FLUSH_DELETE). 1070 * 1071 * This routine takes only the IREs from the forwarding 1072 * table and flushes the corresponding entries from 1073 * the cache table. 1074 * 1075 * When flushing due to the deletion of an old route, it 1076 * just checks the cache handles (ire_phandle and ire_ihandle) and 1077 * deletes the ones that match. 1078 * 1079 * When flushing due to the creation of a new route, it checks 1080 * if a cache entry's address matches the one in the IRE and 1081 * that the cache entry's parent has a less specific mask than the 1082 * one in IRE. The destination of such a cache entry could be the 1083 * gateway for other cache entries, so we need to flush those as 1084 * well by looking for gateway addresses matching the IRE's address. 1085 */ 1086 void 1087 ire_flush_cache_v6(ire_t *ire, int flag) 1088 { 1089 int i; 1090 ire_t *cire; 1091 irb_t *irb; 1092 ip_stack_t *ipst = ire->ire_ipst; 1093 1094 if (ire->ire_type & IRE_CACHE) 1095 return; 1096 1097 /* 1098 * If a default is just created, there is no point 1099 * in going through the cache, as there will not be any 1100 * cached ires. 1101 */ 1102 if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD) 1103 return; 1104 if (flag == IRE_FLUSH_ADD) { 1105 /* 1106 * This selective flush is 1107 * due to the addition of 1108 * new IRE. 1109 */ 1110 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 1111 irb = &ipst->ips_ip_cache_table_v6[i]; 1112 if ((cire = irb->irb_ire) == NULL) 1113 continue; 1114 IRB_REFHOLD(irb); 1115 for (cire = irb->irb_ire; cire != NULL; 1116 cire = cire->ire_next) { 1117 if (cire->ire_type != IRE_CACHE) 1118 continue; 1119 /* 1120 * If 'cire' belongs to the same subnet 1121 * as the new ire being added, and 'cire' 1122 * is derived from a prefix that is less 1123 * specific than the new ire being added, 1124 * we need to flush 'cire'; for instance, 1125 * when a new interface comes up. 1126 */ 1127 if ((V6_MASK_EQ_2(cire->ire_addr_v6, 1128 ire->ire_mask_v6, ire->ire_addr_v6) && 1129 (ip_mask_to_plen_v6(&cire->ire_cmask_v6) <= 1130 ire->ire_masklen))) { 1131 ire_delete(cire); 1132 continue; 1133 } 1134 /* 1135 * This is the case when the ire_gateway_addr 1136 * of 'cire' belongs to the same subnet as 1137 * the new ire being added. 1138 * Flushing such ires is sometimes required to 1139 * avoid misrouting: say we have a machine with 1140 * two interfaces (I1 and I2), a default router 1141 * R on the I1 subnet, and a host route to an 1142 * off-link destination D with a gateway G on 1143 * the I2 subnet. 1144 * Under normal operation, we will have an 1145 * on-link cache entry for G and an off-link 1146 * cache entry for D with G as ire_gateway_addr, 1147 * traffic to D will reach its destination 1148 * through gateway G. 1149 * If the administrator does 'ifconfig I2 down', 1150 * the cache entries for D and G will be 1151 * flushed. However, G will now be resolved as 1152 * an off-link destination using R (the default 1153 * router) as gateway. Then D will also be 1154 * resolved as an off-link destination using G 1155 * as gateway - this behavior is due to 1156 * compatibility reasons, see comment in 1157 * ire_ihandle_lookup_offlink(). Traffic to D 1158 * will go to the router R and probably won't 1159 * reach the destination. 1160 * The administrator then does 'ifconfig I2 up'. 1161 * Since G is on the I2 subnet, this routine 1162 * will flush its cache entry. It must also 1163 * flush the cache entry for D, otherwise 1164 * traffic will stay misrouted until the IRE 1165 * times out. 1166 */ 1167 if (V6_MASK_EQ_2(cire->ire_gateway_addr_v6, 1168 ire->ire_mask_v6, ire->ire_addr_v6)) { 1169 ire_delete(cire); 1170 continue; 1171 } 1172 } 1173 IRB_REFRELE(irb); 1174 } 1175 } else { 1176 /* 1177 * delete the cache entries based on 1178 * handle in the IRE as this IRE is 1179 * being deleted/changed. 1180 */ 1181 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 1182 irb = &ipst->ips_ip_cache_table_v6[i]; 1183 if ((cire = irb->irb_ire) == NULL) 1184 continue; 1185 IRB_REFHOLD(irb); 1186 for (cire = irb->irb_ire; cire != NULL; 1187 cire = cire->ire_next) { 1188 if (cire->ire_type != IRE_CACHE) 1189 continue; 1190 if ((cire->ire_phandle == 0 || 1191 cire->ire_phandle != ire->ire_phandle) && 1192 (cire->ire_ihandle == 0 || 1193 cire->ire_ihandle != ire->ire_ihandle)) 1194 continue; 1195 ire_delete(cire); 1196 } 1197 IRB_REFRELE(irb); 1198 } 1199 } 1200 } 1201 1202 /* 1203 * Matches the arguments passed with the values in the ire. 1204 * 1205 * Note: for match types that match using "ipif" passed in, ipif 1206 * must be checked for non-NULL before calling this routine. 1207 */ 1208 static boolean_t 1209 ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask, 1210 const in6_addr_t *gateway, int type, const ipif_t *ipif, zoneid_t zoneid, 1211 uint32_t ihandle, const ts_label_t *tsl, int match_flags) 1212 { 1213 in6_addr_t masked_addr; 1214 in6_addr_t gw_addr_v6; 1215 ill_t *ire_ill = NULL, *dst_ill; 1216 ill_t *ipif_ill = NULL; 1217 ill_group_t *ire_ill_group = NULL; 1218 ill_group_t *ipif_ill_group = NULL; 1219 ipif_t *src_ipif; 1220 1221 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1222 ASSERT(addr != NULL); 1223 ASSERT(mask != NULL); 1224 ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL); 1225 ASSERT((!(match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP))) || 1226 (ipif != NULL && ipif->ipif_isv6)); 1227 1228 /* 1229 * HIDDEN cache entries have to be looked up specifically with 1230 * MATCH_IRE_MARK_HIDDEN. MATCH_IRE_MARK_HIDDEN is usually set 1231 * when the interface is FAILED or INACTIVE. In that case, 1232 * any IRE_CACHES that exists should be marked with 1233 * IRE_MARK_HIDDEN. So, we don't really need to match below 1234 * for IRE_MARK_HIDDEN. But we do so for consistency. 1235 */ 1236 if (!(match_flags & MATCH_IRE_MARK_HIDDEN) && 1237 (ire->ire_marks & IRE_MARK_HIDDEN)) 1238 return (B_FALSE); 1239 1240 if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid && 1241 ire->ire_zoneid != ALL_ZONES) { 1242 /* 1243 * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is 1244 * valid and does not match that of ire_zoneid, a failure to 1245 * match is reported at this point. Otherwise, since some IREs 1246 * that are available in the global zone can be used in local 1247 * zones, additional checks need to be performed: 1248 * 1249 * IRE_CACHE and IRE_LOOPBACK entries should 1250 * never be matched in this situation. 1251 * 1252 * IRE entries that have an interface associated with them 1253 * should in general not match unless they are an IRE_LOCAL 1254 * or in the case when MATCH_IRE_DEFAULT has been set in 1255 * the caller. In the case of the former, checking of the 1256 * other fields supplied should take place. 1257 * 1258 * In the case where MATCH_IRE_DEFAULT has been set, 1259 * all of the ipif's associated with the IRE's ill are 1260 * checked to see if there is a matching zoneid. If any 1261 * one ipif has a matching zoneid, this IRE is a 1262 * potential candidate so checking of the other fields 1263 * takes place. 1264 * 1265 * In the case where the IRE_INTERFACE has a usable source 1266 * address (indicated by ill_usesrc_ifindex) in the 1267 * correct zone then it's permitted to return this IRE 1268 */ 1269 if (match_flags & MATCH_IRE_ZONEONLY) 1270 return (B_FALSE); 1271 if (ire->ire_type & (IRE_CACHE | IRE_LOOPBACK)) 1272 return (B_FALSE); 1273 /* 1274 * Note, IRE_INTERFACE can have the stq as NULL. For 1275 * example, if the default multicast route is tied to 1276 * the loopback address. 1277 */ 1278 if ((ire->ire_type & IRE_INTERFACE) && 1279 (ire->ire_stq != NULL)) { 1280 dst_ill = (ill_t *)ire->ire_stq->q_ptr; 1281 /* 1282 * If there is a usable source address in the 1283 * zone, then it's ok to return an 1284 * IRE_INTERFACE 1285 */ 1286 if ((dst_ill->ill_usesrc_ifindex != 0) && 1287 (src_ipif = ipif_select_source_v6(dst_ill, addr, 1288 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid)) 1289 != NULL) { 1290 ip3dbg(("ire_match_args: src_ipif %p" 1291 " dst_ill %p", (void *)src_ipif, 1292 (void *)dst_ill)); 1293 ipif_refrele(src_ipif); 1294 } else { 1295 ip3dbg(("ire_match_args: src_ipif NULL" 1296 " dst_ill %p\n", (void *)dst_ill)); 1297 return (B_FALSE); 1298 } 1299 } 1300 if (ire->ire_ipif != NULL && ire->ire_type != IRE_LOCAL && 1301 !(ire->ire_type & IRE_INTERFACE)) { 1302 ipif_t *tipif; 1303 1304 if ((match_flags & MATCH_IRE_DEFAULT) == 0) 1305 return (B_FALSE); 1306 mutex_enter(&ire->ire_ipif->ipif_ill->ill_lock); 1307 for (tipif = ire->ire_ipif->ipif_ill->ill_ipif; 1308 tipif != NULL; tipif = tipif->ipif_next) { 1309 if (IPIF_CAN_LOOKUP(tipif) && 1310 (tipif->ipif_flags & IPIF_UP) && 1311 (tipif->ipif_zoneid == zoneid || 1312 tipif->ipif_zoneid == ALL_ZONES)) 1313 break; 1314 } 1315 mutex_exit(&ire->ire_ipif->ipif_ill->ill_lock); 1316 if (tipif == NULL) 1317 return (B_FALSE); 1318 } 1319 } 1320 1321 if (match_flags & MATCH_IRE_GW) { 1322 mutex_enter(&ire->ire_lock); 1323 gw_addr_v6 = ire->ire_gateway_addr_v6; 1324 mutex_exit(&ire->ire_lock); 1325 } 1326 /* 1327 * For IRE_CACHES, MATCH_IRE_ILL/ILL_GROUP really means that 1328 * somebody wants to send out on a particular interface which 1329 * is given by ire_stq and hence use ire_stq to derive the ill 1330 * value. ire_ipif for IRE_CACHES is just the 1331 * means of getting a source address i.e ire_src_addr_v6 = 1332 * ire->ire_ipif->ipif_src_addr_v6. 1333 */ 1334 if (match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP)) { 1335 ire_ill = ire_to_ill(ire); 1336 if (ire_ill != NULL) 1337 ire_ill_group = ire_ill->ill_group; 1338 ipif_ill = ipif->ipif_ill; 1339 ipif_ill_group = ipif_ill->ill_group; 1340 } 1341 1342 /* No ire_addr_v6 bits set past the mask */ 1343 ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6, 1344 ire->ire_addr_v6)); 1345 V6_MASK_COPY(*addr, *mask, masked_addr); 1346 1347 if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) && 1348 ((!(match_flags & MATCH_IRE_GW)) || 1349 IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) && 1350 ((!(match_flags & MATCH_IRE_TYPE)) || 1351 (ire->ire_type & type)) && 1352 ((!(match_flags & MATCH_IRE_SRC)) || 1353 IN6_ARE_ADDR_EQUAL(&ire->ire_src_addr_v6, 1354 &ipif->ipif_v6src_addr)) && 1355 ((!(match_flags & MATCH_IRE_IPIF)) || 1356 (ire->ire_ipif == ipif)) && 1357 ((!(match_flags & MATCH_IRE_MARK_HIDDEN)) || 1358 (ire->ire_type != IRE_CACHE || 1359 ire->ire_marks & IRE_MARK_HIDDEN)) && 1360 ((!(match_flags & MATCH_IRE_ILL)) || 1361 (ire_ill == ipif_ill)) && 1362 ((!(match_flags & MATCH_IRE_IHANDLE)) || 1363 (ire->ire_ihandle == ihandle)) && 1364 ((!(match_flags & MATCH_IRE_ILL_GROUP)) || 1365 (ire_ill == ipif_ill) || 1366 (ire_ill_group != NULL && 1367 ire_ill_group == ipif_ill_group)) && 1368 ((!(match_flags & MATCH_IRE_SECATTR)) || 1369 (!is_system_labeled()) || 1370 (tsol_ire_match_gwattr(ire, tsl) == 0))) { 1371 /* We found the matched IRE */ 1372 return (B_TRUE); 1373 } 1374 return (B_FALSE); 1375 } 1376 1377 /* 1378 * Lookup for a route in all the tables 1379 */ 1380 ire_t * 1381 ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1382 const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 1383 zoneid_t zoneid, const ts_label_t *tsl, int flags, ip_stack_t *ipst) 1384 { 1385 ire_t *ire = NULL; 1386 1387 /* 1388 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1389 * MATCH_IRE_ILL is set. 1390 */ 1391 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1392 (ipif == NULL)) 1393 return (NULL); 1394 1395 /* 1396 * might be asking for a cache lookup, 1397 * This is not best way to lookup cache, 1398 * user should call ire_cache_lookup directly. 1399 * 1400 * If MATCH_IRE_TYPE was set, first lookup in the cache table and then 1401 * in the forwarding table, if the applicable type flags were set. 1402 */ 1403 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) { 1404 ire = ire_ctable_lookup_v6(addr, gateway, type, ipif, zoneid, 1405 tsl, flags, ipst); 1406 if (ire != NULL) 1407 return (ire); 1408 } 1409 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) { 1410 ire = ire_ftable_lookup_v6(addr, mask, gateway, type, ipif, 1411 pire, zoneid, 0, tsl, flags, ipst); 1412 } 1413 return (ire); 1414 } 1415 1416 /* 1417 * Lookup a route in forwarding table. 1418 * specific lookup is indicated by passing the 1419 * required parameters and indicating the 1420 * match required in flag field. 1421 * 1422 * Looking for default route can be done in three ways 1423 * 1) pass mask as ipv6_all_zeros and set MATCH_IRE_MASK in flags field 1424 * along with other matches. 1425 * 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags 1426 * field along with other matches. 1427 * 3) if the destination and mask are passed as zeros. 1428 * 1429 * A request to return a default route if no route 1430 * is found, can be specified by setting MATCH_IRE_DEFAULT 1431 * in flags. 1432 * 1433 * It does not support recursion more than one level. It 1434 * will do recursive lookup only when the lookup maps to 1435 * a prefix or default route and MATCH_IRE_RECURSIVE flag is passed. 1436 * 1437 * If the routing table is setup to allow more than one level 1438 * of recursion, the cleaning up cache table will not work resulting 1439 * in invalid routing. 1440 * 1441 * Supports link-local addresses by following the ipif/ill when recursing. 1442 * 1443 * NOTE : When this function returns NULL, pire has already been released. 1444 * pire is valid only when this function successfully returns an 1445 * ire. 1446 */ 1447 ire_t * 1448 ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1449 const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 1450 zoneid_t zoneid, uint32_t ihandle, const ts_label_t *tsl, int flags, 1451 ip_stack_t *ipst) 1452 { 1453 irb_t *irb_ptr; 1454 ire_t *rire; 1455 ire_t *ire = NULL; 1456 ire_t *saved_ire; 1457 nce_t *nce; 1458 int i; 1459 in6_addr_t gw_addr_v6; 1460 1461 ASSERT(addr != NULL); 1462 ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL); 1463 ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 1464 ASSERT(ipif == NULL || ipif->ipif_isv6); 1465 1466 /* 1467 * When we return NULL from this function, we should make 1468 * sure that *pire is NULL so that the callers will not 1469 * wrongly REFRELE the pire. 1470 */ 1471 if (pire != NULL) 1472 *pire = NULL; 1473 /* 1474 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1475 * MATCH_IRE_ILL is set. 1476 */ 1477 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1478 (ipif == NULL)) 1479 return (NULL); 1480 1481 /* 1482 * If the mask is known, the lookup 1483 * is simple, if the mask is not known 1484 * we need to search. 1485 */ 1486 if (flags & MATCH_IRE_MASK) { 1487 uint_t masklen; 1488 1489 masklen = ip_mask_to_plen_v6(mask); 1490 if (ipst->ips_ip_forwarding_table_v6[masklen] == NULL) 1491 return (NULL); 1492 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[masklen][ 1493 IRE_ADDR_MASK_HASH_V6(*addr, *mask, 1494 ipst->ips_ip6_ftable_hash_size)]); 1495 rw_enter(&irb_ptr->irb_lock, RW_READER); 1496 for (ire = irb_ptr->irb_ire; ire != NULL; 1497 ire = ire->ire_next) { 1498 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1499 continue; 1500 if (ire_match_args_v6(ire, addr, mask, gateway, type, 1501 ipif, zoneid, ihandle, tsl, flags)) 1502 goto found_ire; 1503 } 1504 rw_exit(&irb_ptr->irb_lock); 1505 } else { 1506 /* 1507 * In this case we don't know the mask, we need to 1508 * search the table assuming different mask sizes. 1509 * we start with 128 bit mask, we don't allow default here. 1510 */ 1511 for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) { 1512 in6_addr_t tmpmask; 1513 1514 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 1515 continue; 1516 (void) ip_plen_to_mask_v6(i, &tmpmask); 1517 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][ 1518 IRE_ADDR_MASK_HASH_V6(*addr, tmpmask, 1519 ipst->ips_ip6_ftable_hash_size)]; 1520 rw_enter(&irb_ptr->irb_lock, RW_READER); 1521 for (ire = irb_ptr->irb_ire; ire != NULL; 1522 ire = ire->ire_next) { 1523 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1524 continue; 1525 if (ire_match_args_v6(ire, addr, 1526 &ire->ire_mask_v6, gateway, type, ipif, 1527 zoneid, ihandle, tsl, flags)) 1528 goto found_ire; 1529 } 1530 rw_exit(&irb_ptr->irb_lock); 1531 } 1532 } 1533 1534 /* 1535 * We come here if no route has yet been found. 1536 * 1537 * Handle the case where default route is 1538 * requested by specifying type as one of the possible 1539 * types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE). 1540 * 1541 * If MATCH_IRE_MASK is specified, then the appropriate default route 1542 * would have been found above if it exists so it isn't looked up here. 1543 * If MATCH_IRE_DEFAULT was also specified, then a default route will be 1544 * searched for later. 1545 */ 1546 if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE && 1547 (type & (IRE_DEFAULT | IRE_INTERFACE))) { 1548 if (ipst->ips_ip_forwarding_table_v6[0] != NULL) { 1549 /* addr & mask is zero for defaults */ 1550 irb_ptr = &ipst->ips_ip_forwarding_table_v6[0][ 1551 IRE_ADDR_HASH_V6(ipv6_all_zeros, 1552 ipst->ips_ip6_ftable_hash_size)]; 1553 rw_enter(&irb_ptr->irb_lock, RW_READER); 1554 for (ire = irb_ptr->irb_ire; ire != NULL; 1555 ire = ire->ire_next) { 1556 1557 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1558 continue; 1559 1560 if (ire_match_args_v6(ire, addr, 1561 &ipv6_all_zeros, gateway, type, ipif, 1562 zoneid, ihandle, tsl, flags)) 1563 goto found_ire; 1564 } 1565 rw_exit(&irb_ptr->irb_lock); 1566 } 1567 } 1568 /* 1569 * We come here only if no route is found. 1570 * see if the default route can be used which is allowed 1571 * only if the default matching criteria is specified. 1572 * The ipv6_ire_default_count tracks the number of IRE_DEFAULT 1573 * entries. However, the ip_forwarding_table_v6[0] also contains 1574 * interface routes thus the count can be zero. 1575 */ 1576 saved_ire = NULL; 1577 if ((flags & (MATCH_IRE_DEFAULT | MATCH_IRE_MASK)) == 1578 MATCH_IRE_DEFAULT) { 1579 ire_t *ire_origin; 1580 uint_t g_index; 1581 uint_t index; 1582 1583 if (ipst->ips_ip_forwarding_table_v6[0] == NULL) 1584 return (NULL); 1585 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[0])[0]; 1586 1587 /* 1588 * Keep a tab on the bucket while looking the IRE_DEFAULT 1589 * entries. We need to keep track of a particular IRE 1590 * (ire_origin) so this ensures that it will not be unlinked 1591 * from the hash list during the recursive lookup below. 1592 */ 1593 IRB_REFHOLD(irb_ptr); 1594 ire = irb_ptr->irb_ire; 1595 if (ire == NULL) { 1596 IRB_REFRELE(irb_ptr); 1597 return (NULL); 1598 } 1599 1600 /* 1601 * Get the index first, since it can be changed by other 1602 * threads. Then get to the right default route skipping 1603 * default interface routes if any. As we hold a reference on 1604 * the IRE bucket, ipv6_ire_default_count can only increase so 1605 * we can't reach the end of the hash list unexpectedly. 1606 */ 1607 if (ipst->ips_ipv6_ire_default_count != 0) { 1608 g_index = ipst->ips_ipv6_ire_default_index++; 1609 index = g_index % ipst->ips_ipv6_ire_default_count; 1610 while (index != 0) { 1611 if (!(ire->ire_type & IRE_INTERFACE)) 1612 index--; 1613 ire = ire->ire_next; 1614 } 1615 ASSERT(ire != NULL); 1616 } else { 1617 /* 1618 * No default route, so we only have default interface 1619 * routes: don't enter the first loop. 1620 */ 1621 ire = NULL; 1622 } 1623 1624 /* 1625 * Round-robin the default routers list looking for a neighbor 1626 * that matches the passed in parameters and is reachable. If 1627 * none found, just return a route from the default router list 1628 * if it exists. If we can't find a default route (IRE_DEFAULT), 1629 * look for interface default routes. 1630 * We start with the ire we found above and we walk the hash 1631 * list until we're back where we started, see 1632 * ire_get_next_default_ire(). It doesn't matter if default 1633 * routes are added or deleted by other threads - we know this 1634 * ire will stay in the list because we hold a reference on the 1635 * ire bucket. 1636 * NB: if we only have interface default routes, ire is NULL so 1637 * we don't even enter this loop (see above). 1638 */ 1639 ire_origin = ire; 1640 for (; ire != NULL; 1641 ire = ire_get_next_default_ire(ire, ire_origin)) { 1642 1643 if (ire_match_args_v6(ire, addr, 1644 &ipv6_all_zeros, gateway, type, ipif, 1645 zoneid, ihandle, tsl, flags)) { 1646 int match_flags; 1647 1648 /* 1649 * We have something to work with. 1650 * If we can find a resolved/reachable 1651 * entry, we will use this. Otherwise 1652 * we'll try to find an entry that has 1653 * a resolved cache entry. We will fallback 1654 * on this if we don't find anything else. 1655 */ 1656 if (saved_ire == NULL) 1657 saved_ire = ire; 1658 mutex_enter(&ire->ire_lock); 1659 gw_addr_v6 = ire->ire_gateway_addr_v6; 1660 mutex_exit(&ire->ire_lock); 1661 match_flags = MATCH_IRE_ILL_GROUP | 1662 MATCH_IRE_SECATTR; 1663 rire = ire_ctable_lookup_v6(&gw_addr_v6, NULL, 1664 0, ire->ire_ipif, zoneid, tsl, match_flags, 1665 ipst); 1666 if (rire != NULL) { 1667 nce = rire->ire_nce; 1668 if (nce != NULL && 1669 NCE_ISREACHABLE(nce) && 1670 nce->nce_flags & NCE_F_ISROUTER) { 1671 ire_refrele(rire); 1672 IRE_REFHOLD(ire); 1673 IRB_REFRELE(irb_ptr); 1674 goto found_ire_held; 1675 } else if (nce != NULL && 1676 !(nce->nce_flags & 1677 NCE_F_ISROUTER)) { 1678 /* 1679 * Make sure we don't use 1680 * this ire 1681 */ 1682 if (saved_ire == ire) 1683 saved_ire = NULL; 1684 } 1685 ire_refrele(rire); 1686 } else if (ipst-> 1687 ips_ipv6_ire_default_count > 1 && 1688 zoneid != GLOBAL_ZONEID) { 1689 /* 1690 * When we're in a local zone, we're 1691 * only interested in default routers 1692 * that are reachable through ipifs 1693 * within our zone. 1694 * The potentially expensive call to 1695 * ire_route_lookup_v6() is avoided when 1696 * we have only one default route. 1697 */ 1698 int ire_match_flags = MATCH_IRE_TYPE | 1699 MATCH_IRE_SECATTR; 1700 1701 if (ire->ire_ipif != NULL) { 1702 ire_match_flags |= 1703 MATCH_IRE_ILL_GROUP; 1704 } 1705 rire = ire_route_lookup_v6(&gw_addr_v6, 1706 NULL, NULL, IRE_INTERFACE, 1707 ire->ire_ipif, NULL, 1708 zoneid, tsl, ire_match_flags, ipst); 1709 if (rire != NULL) { 1710 ire_refrele(rire); 1711 saved_ire = ire; 1712 } else if (saved_ire == ire) { 1713 /* 1714 * Make sure we don't use 1715 * this ire 1716 */ 1717 saved_ire = NULL; 1718 } 1719 } 1720 } 1721 } 1722 if (saved_ire != NULL) { 1723 ire = saved_ire; 1724 IRE_REFHOLD(ire); 1725 IRB_REFRELE(irb_ptr); 1726 goto found_ire_held; 1727 } else { 1728 /* 1729 * Look for a interface default route matching the 1730 * args passed in. No round robin here. Just pick 1731 * the right one. 1732 */ 1733 for (ire = irb_ptr->irb_ire; ire != NULL; 1734 ire = ire->ire_next) { 1735 1736 if (!(ire->ire_type & IRE_INTERFACE)) 1737 continue; 1738 1739 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1740 continue; 1741 1742 if (ire_match_args_v6(ire, addr, 1743 &ipv6_all_zeros, gateway, type, ipif, 1744 zoneid, ihandle, tsl, flags)) { 1745 IRE_REFHOLD(ire); 1746 IRB_REFRELE(irb_ptr); 1747 goto found_ire_held; 1748 } 1749 } 1750 IRB_REFRELE(irb_ptr); 1751 } 1752 } 1753 ASSERT(ire == NULL); 1754 ip1dbg(("ire_ftable_lookup_v6: returning NULL ire")); 1755 return (NULL); 1756 found_ire: 1757 ASSERT((ire->ire_marks & IRE_MARK_CONDEMNED) == 0); 1758 IRE_REFHOLD(ire); 1759 rw_exit(&irb_ptr->irb_lock); 1760 1761 found_ire_held: 1762 if ((flags & MATCH_IRE_RJ_BHOLE) && 1763 (ire->ire_flags & (RTF_BLACKHOLE | RTF_REJECT))) { 1764 return (ire); 1765 } 1766 /* 1767 * At this point, IRE that was found must be an IRE_FORWARDTABLE 1768 * or IRE_CACHETABLE type. If this is a recursive lookup and an 1769 * IRE_INTERFACE type was found, return that. If it was some other 1770 * IRE_FORWARDTABLE type of IRE (one of the prefix types), then it 1771 * is necessary to fill in the parent IRE pointed to by pire, and 1772 * then lookup the gateway address of the parent. For backwards 1773 * compatiblity, if this lookup returns an 1774 * IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level 1775 * of lookup is done. 1776 */ 1777 if (flags & MATCH_IRE_RECURSIVE) { 1778 const ipif_t *gw_ipif; 1779 int match_flags = MATCH_IRE_DSTONLY; 1780 1781 if (ire->ire_type & IRE_INTERFACE) 1782 return (ire); 1783 if (pire != NULL) 1784 *pire = ire; 1785 /* 1786 * If we can't find an IRE_INTERFACE or the caller has not 1787 * asked for pire, we need to REFRELE the saved_ire. 1788 */ 1789 saved_ire = ire; 1790 1791 /* 1792 * Currently MATCH_IRE_ILL is never used with 1793 * (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT) while 1794 * sending out packets as MATCH_IRE_ILL is used only 1795 * for communicating with on-link hosts. We can't assert 1796 * that here as RTM_GET calls this function with 1797 * MATCH_IRE_ILL | MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE. 1798 * We have already used the MATCH_IRE_ILL in determining 1799 * the right prefix route at this point. To match the 1800 * behavior of how we locate routes while sending out 1801 * packets, we don't want to use MATCH_IRE_ILL below 1802 * while locating the interface route. 1803 */ 1804 if (ire->ire_ipif != NULL) 1805 match_flags |= MATCH_IRE_ILL_GROUP; 1806 1807 mutex_enter(&ire->ire_lock); 1808 gw_addr_v6 = ire->ire_gateway_addr_v6; 1809 mutex_exit(&ire->ire_lock); 1810 1811 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 0, 1812 ire->ire_ipif, NULL, zoneid, tsl, match_flags, ipst); 1813 if (ire == NULL) { 1814 /* 1815 * In this case we have to deal with the 1816 * MATCH_IRE_PARENT flag, which means the 1817 * parent has to be returned if ire is NULL. 1818 * The aim of this is to have (at least) a starting 1819 * ire when we want to look at all of the ires in a 1820 * bucket aimed at a single destination (as is the 1821 * case in ip_newroute_v6 for the RTF_MULTIRT 1822 * flagged routes). 1823 */ 1824 if (flags & MATCH_IRE_PARENT) { 1825 if (pire != NULL) { 1826 /* 1827 * Need an extra REFHOLD, if the 1828 * parent ire is returned via both 1829 * ire and pire. 1830 */ 1831 IRE_REFHOLD(saved_ire); 1832 } 1833 ire = saved_ire; 1834 } else { 1835 ire_refrele(saved_ire); 1836 if (pire != NULL) 1837 *pire = NULL; 1838 } 1839 return (ire); 1840 } 1841 if (ire->ire_type & (IRE_CACHETABLE | IRE_INTERFACE)) { 1842 /* 1843 * If the caller did not ask for pire, release 1844 * it now. 1845 */ 1846 if (pire == NULL) { 1847 ire_refrele(saved_ire); 1848 } 1849 return (ire); 1850 } 1851 match_flags |= MATCH_IRE_TYPE; 1852 mutex_enter(&ire->ire_lock); 1853 gw_addr_v6 = ire->ire_gateway_addr_v6; 1854 mutex_exit(&ire->ire_lock); 1855 gw_ipif = ire->ire_ipif; 1856 ire_refrele(ire); 1857 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 1858 (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid, 1859 NULL, match_flags, ipst); 1860 if (ire == NULL) { 1861 /* 1862 * In this case we have to deal with the 1863 * MATCH_IRE_PARENT flag, which means the 1864 * parent has to be returned if ire is NULL. 1865 * The aim of this is to have (at least) a starting 1866 * ire when we want to look at all of the ires in a 1867 * bucket aimed at a single destination (as is the 1868 * case in ip_newroute_v6 for the RTF_MULTIRT 1869 * flagged routes). 1870 */ 1871 if (flags & MATCH_IRE_PARENT) { 1872 if (pire != NULL) { 1873 /* 1874 * Need an extra REFHOLD, if the 1875 * parent ire is returned via both 1876 * ire and pire. 1877 */ 1878 IRE_REFHOLD(saved_ire); 1879 } 1880 ire = saved_ire; 1881 } else { 1882 ire_refrele(saved_ire); 1883 if (pire != NULL) 1884 *pire = NULL; 1885 } 1886 return (ire); 1887 } else if (pire == NULL) { 1888 /* 1889 * If the caller did not ask for pire, release 1890 * it now. 1891 */ 1892 ire_refrele(saved_ire); 1893 } 1894 return (ire); 1895 } 1896 1897 ASSERT(pire == NULL || *pire == NULL); 1898 return (ire); 1899 } 1900 1901 /* 1902 * Delete the IRE cache for the gateway and all IRE caches whose 1903 * ire_gateway_addr_v6 points to this gateway, and allow them to 1904 * be created on demand by ip_newroute_v6. 1905 */ 1906 void 1907 ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid, 1908 ip_stack_t *ipst) 1909 { 1910 irb_t *irb; 1911 ire_t *ire; 1912 1913 irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 1914 ipst->ips_ip6_cache_table_size)]; 1915 IRB_REFHOLD(irb); 1916 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 1917 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1918 continue; 1919 1920 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 1921 if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, 0, 1922 IRE_CACHE, NULL, zoneid, 0, NULL, MATCH_IRE_TYPE)) { 1923 ire_delete(ire); 1924 } 1925 } 1926 IRB_REFRELE(irb); 1927 1928 ire_walk_v6(ire_delete_cache_gw_v6, (char *)addr, zoneid, ipst); 1929 } 1930 1931 /* 1932 * Looks up cache table for a route. 1933 * specific lookup can be indicated by 1934 * passing the MATCH_* flags and the 1935 * necessary parameters. 1936 */ 1937 ire_t * 1938 ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway, 1939 int type, const ipif_t *ipif, zoneid_t zoneid, const ts_label_t *tsl, 1940 int flags, ip_stack_t *ipst) 1941 { 1942 ire_t *ire; 1943 irb_t *irb_ptr; 1944 ASSERT(addr != NULL); 1945 ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 1946 1947 /* 1948 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1949 * MATCH_IRE_ILL is set. 1950 */ 1951 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1952 (ipif == NULL)) 1953 return (NULL); 1954 1955 irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 1956 ipst->ips_ip6_cache_table_size)]; 1957 rw_enter(&irb_ptr->irb_lock, RW_READER); 1958 for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 1959 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1960 continue; 1961 1962 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 1963 if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, gateway, 1964 type, ipif, zoneid, 0, tsl, flags)) { 1965 IRE_REFHOLD(ire); 1966 rw_exit(&irb_ptr->irb_lock); 1967 return (ire); 1968 } 1969 } 1970 rw_exit(&irb_ptr->irb_lock); 1971 return (NULL); 1972 } 1973 1974 /* 1975 * Lookup cache. Don't return IRE_MARK_HIDDEN entries. Callers 1976 * should use ire_ctable_lookup with MATCH_IRE_MARK_HIDDEN to get 1977 * to the hidden ones. 1978 * 1979 * In general the zoneid has to match (where ALL_ZONES match all of them). 1980 * But for IRE_LOCAL we also need to handle the case where L2 should 1981 * conceptually loop back the packet. This is necessary since neither 1982 * Ethernet drivers nor Ethernet hardware loops back packets sent to their 1983 * own MAC address. This loopback is needed when the normal 1984 * routes (ignoring IREs with different zoneids) would send out the packet on 1985 * the same ill (or ill group) as the ill with which this IRE_LOCAL is 1986 * associated. 1987 * 1988 * Earlier versions of this code always matched an IRE_LOCAL independently of 1989 * the zoneid. We preserve that earlier behavior when 1990 * ip_restrict_interzone_loopback is turned off. 1991 */ 1992 ire_t * 1993 ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid, 1994 const ts_label_t *tsl, ip_stack_t *ipst) 1995 { 1996 irb_t *irb_ptr; 1997 ire_t *ire; 1998 1999 irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 2000 ipst->ips_ip6_cache_table_size)]; 2001 rw_enter(&irb_ptr->irb_lock, RW_READER); 2002 for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 2003 if (ire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 2004 continue; 2005 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, addr)) { 2006 /* 2007 * Finally, check if the security policy has any 2008 * restriction on using this route for the specified 2009 * message. 2010 */ 2011 if (tsl != NULL && 2012 ire->ire_gw_secattr != NULL && 2013 tsol_ire_match_gwattr(ire, tsl) != 0) { 2014 continue; 2015 } 2016 2017 if (zoneid == ALL_ZONES || ire->ire_zoneid == zoneid || 2018 ire->ire_zoneid == ALL_ZONES) { 2019 IRE_REFHOLD(ire); 2020 rw_exit(&irb_ptr->irb_lock); 2021 return (ire); 2022 } 2023 2024 if (ire->ire_type == IRE_LOCAL) { 2025 if (ipst->ips_ip_restrict_interzone_loopback && 2026 !ire_local_ok_across_zones(ire, zoneid, 2027 (void *)addr, tsl, ipst)) 2028 continue; 2029 2030 IRE_REFHOLD(ire); 2031 rw_exit(&irb_ptr->irb_lock); 2032 return (ire); 2033 } 2034 } 2035 } 2036 rw_exit(&irb_ptr->irb_lock); 2037 return (NULL); 2038 } 2039 2040 /* 2041 * Locate the interface ire that is tied to the cache ire 'cire' via 2042 * cire->ire_ihandle. 2043 * 2044 * We are trying to create the cache ire for an onlink destn. or 2045 * gateway in 'cire'. We are called from ire_add_v6() in the IRE_IF_RESOLVER 2046 * case for xresolv interfaces, after the ire has come back from 2047 * an external resolver. 2048 */ 2049 static ire_t * 2050 ire_ihandle_lookup_onlink_v6(ire_t *cire) 2051 { 2052 ire_t *ire; 2053 int match_flags; 2054 int i; 2055 int j; 2056 irb_t *irb_ptr; 2057 ip_stack_t *ipst = cire->ire_ipst; 2058 2059 ASSERT(cire != NULL); 2060 2061 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2062 /* 2063 * We know that the mask of the interface ire equals cire->ire_cmask. 2064 * (When ip_newroute_v6() created 'cire' for an on-link destn. 2065 * it set its cmask from the interface ire's mask) 2066 */ 2067 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 2068 NULL, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle, 2069 NULL, match_flags, ipst); 2070 if (ire != NULL) 2071 return (ire); 2072 /* 2073 * If we didn't find an interface ire above, we can't declare failure. 2074 * For backwards compatibility, we need to support prefix routes 2075 * pointing to next hop gateways that are not on-link. 2076 * 2077 * In the resolver/noresolver case, ip_newroute_v6() thinks 2078 * it is creating the cache ire for an onlink destination in 'cire'. 2079 * But 'cire' is not actually onlink, because ire_ftable_lookup_v6() 2080 * cheated it, by doing ire_route_lookup_v6() twice and returning an 2081 * interface ire. 2082 * 2083 * Eg. default - gw1 (line 1) 2084 * gw1 - gw2 (line 2) 2085 * gw2 - hme0 (line 3) 2086 * 2087 * In the above example, ip_newroute_v6() tried to create the cache ire 2088 * 'cire' for gw1, based on the interface route in line 3. The 2089 * ire_ftable_lookup_v6() above fails, because there is 2090 * no interface route to reach gw1. (it is gw2). We fall thru below. 2091 * 2092 * Do a brute force search based on the ihandle in a subset of the 2093 * forwarding tables, corresponding to cire->ire_cmask_v6. Otherwise 2094 * things become very complex, since we don't have 'pire' in this 2095 * case. (Also note that this method is not possible in the offlink 2096 * case because we don't know the mask) 2097 */ 2098 i = ip_mask_to_plen_v6(&cire->ire_cmask_v6); 2099 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 2100 return (NULL); 2101 for (j = 0; j < ipst->ips_ip6_ftable_hash_size; j++) { 2102 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][j]; 2103 rw_enter(&irb_ptr->irb_lock, RW_READER); 2104 for (ire = irb_ptr->irb_ire; ire != NULL; 2105 ire = ire->ire_next) { 2106 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2107 continue; 2108 if ((ire->ire_type & IRE_INTERFACE) && 2109 (ire->ire_ihandle == cire->ire_ihandle)) { 2110 IRE_REFHOLD(ire); 2111 rw_exit(&irb_ptr->irb_lock); 2112 return (ire); 2113 } 2114 } 2115 rw_exit(&irb_ptr->irb_lock); 2116 } 2117 return (NULL); 2118 } 2119 2120 2121 /* 2122 * Locate the interface ire that is tied to the cache ire 'cire' via 2123 * cire->ire_ihandle. 2124 * 2125 * We are trying to create the cache ire for an offlink destn based 2126 * on the cache ire of the gateway in 'cire'. 'pire' is the prefix ire 2127 * as found by ip_newroute_v6(). We are called from ip_newroute_v6() in 2128 * the IRE_CACHE case. 2129 */ 2130 ire_t * 2131 ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire) 2132 { 2133 ire_t *ire; 2134 int match_flags; 2135 in6_addr_t gw_addr; 2136 ipif_t *gw_ipif; 2137 ip_stack_t *ipst = cire->ire_ipst; 2138 2139 ASSERT(cire != NULL && pire != NULL); 2140 2141 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2142 /* 2143 * ip_newroute_v6 calls ire_ftable_lookup with MATCH_IRE_ILL only 2144 * for on-link hosts. We should never be here for onlink. 2145 * Thus, use MATCH_IRE_ILL_GROUP. 2146 */ 2147 if (pire->ire_ipif != NULL) 2148 match_flags |= MATCH_IRE_ILL_GROUP; 2149 /* 2150 * We know that the mask of the interface ire equals cire->ire_cmask. 2151 * (When ip_newroute_v6() created 'cire' for an on-link destn. it set 2152 * its cmask from the interface ire's mask) 2153 */ 2154 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 0, 2155 IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2156 NULL, match_flags, ipst); 2157 if (ire != NULL) 2158 return (ire); 2159 /* 2160 * If we didn't find an interface ire above, we can't declare failure. 2161 * For backwards compatibility, we need to support prefix routes 2162 * pointing to next hop gateways that are not on-link. 2163 * 2164 * Assume we are trying to ping some offlink destn, and we have the 2165 * routing table below. 2166 * 2167 * Eg. default - gw1 <--- pire (line 1) 2168 * gw1 - gw2 (line 2) 2169 * gw2 - hme0 (line 3) 2170 * 2171 * If we already have a cache ire for gw1 in 'cire', the 2172 * ire_ftable_lookup_v6 above would have failed, since there is no 2173 * interface ire to reach gw1. We will fallthru below. 2174 * 2175 * Here we duplicate the steps that ire_ftable_lookup_v6() did in 2176 * getting 'cire' from 'pire', in the MATCH_IRE_RECURSIVE case. 2177 * The differences are the following 2178 * i. We want the interface ire only, so we call 2179 * ire_ftable_lookup_v6() instead of ire_route_lookup_v6() 2180 * ii. We look for only prefix routes in the 1st call below. 2181 * ii. We want to match on the ihandle in the 2nd call below. 2182 */ 2183 match_flags = MATCH_IRE_TYPE; 2184 if (pire->ire_ipif != NULL) 2185 match_flags |= MATCH_IRE_ILL_GROUP; 2186 2187 mutex_enter(&pire->ire_lock); 2188 gw_addr = pire->ire_gateway_addr_v6; 2189 mutex_exit(&pire->ire_lock); 2190 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_OFFSUBNET, 2191 pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst); 2192 if (ire == NULL) 2193 return (NULL); 2194 /* 2195 * At this point 'ire' corresponds to the entry shown in line 2. 2196 * gw_addr is 'gw2' in the example above. 2197 */ 2198 mutex_enter(&ire->ire_lock); 2199 gw_addr = ire->ire_gateway_addr_v6; 2200 mutex_exit(&ire->ire_lock); 2201 gw_ipif = ire->ire_ipif; 2202 ire_refrele(ire); 2203 2204 match_flags |= MATCH_IRE_IHANDLE; 2205 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_INTERFACE, 2206 gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2207 NULL, match_flags, ipst); 2208 return (ire); 2209 } 2210 2211 /* 2212 * Return the IRE_LOOPBACK, IRE_IF_RESOLVER or IRE_IF_NORESOLVER 2213 * ire associated with the specified ipif. 2214 * 2215 * This might occasionally be called when IPIF_UP is not set since 2216 * the IPV6_MULTICAST_IF as well as creating interface routes 2217 * allows specifying a down ipif (ipif_lookup* match ipifs that are down). 2218 * 2219 * Note that if IPIF_NOLOCAL, IPIF_NOXMIT, or IPIF_DEPRECATED is set on 2220 * the ipif this routine might return NULL. 2221 * (Sometimes called as writer though not required by this function.) 2222 */ 2223 ire_t * 2224 ipif_to_ire_v6(const ipif_t *ipif) 2225 { 2226 ire_t *ire; 2227 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2228 2229 ASSERT(ipif->ipif_isv6); 2230 if (ipif->ipif_ire_type == IRE_LOOPBACK) { 2231 ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, NULL, 2232 IRE_LOOPBACK, ipif, ALL_ZONES, NULL, 2233 (MATCH_IRE_TYPE | MATCH_IRE_IPIF), ipst); 2234 } else if (ipif->ipif_flags & IPIF_POINTOPOINT) { 2235 /* In this case we need to lookup destination address. */ 2236 ire = ire_ftable_lookup_v6(&ipif->ipif_v6pp_dst_addr, 2237 &ipv6_all_ones, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 2238 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 2239 MATCH_IRE_MASK), ipst); 2240 } else { 2241 ire = ire_ftable_lookup_v6(&ipif->ipif_v6subnet, 2242 &ipif->ipif_v6net_mask, NULL, IRE_INTERFACE, ipif, NULL, 2243 ALL_ZONES, 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 2244 MATCH_IRE_MASK), ipst); 2245 } 2246 return (ire); 2247 } 2248 2249 /* 2250 * Return B_TRUE if a multirt route is resolvable 2251 * (or if no route is resolved yet), B_FALSE otherwise. 2252 * This only works in the global zone. 2253 */ 2254 boolean_t 2255 ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl, 2256 ip_stack_t *ipst) 2257 { 2258 ire_t *first_fire; 2259 ire_t *first_cire; 2260 ire_t *fire; 2261 ire_t *cire; 2262 irb_t *firb; 2263 irb_t *cirb; 2264 int unres_cnt = 0; 2265 boolean_t resolvable = B_FALSE; 2266 2267 /* Retrieve the first IRE_HOST that matches the destination */ 2268 first_fire = ire_ftable_lookup_v6(v6dstp, &ipv6_all_ones, 0, IRE_HOST, 2269 NULL, NULL, ALL_ZONES, 0, tsl, MATCH_IRE_MASK | MATCH_IRE_TYPE | 2270 MATCH_IRE_SECATTR, ipst); 2271 2272 /* No route at all */ 2273 if (first_fire == NULL) { 2274 return (B_TRUE); 2275 } 2276 2277 firb = first_fire->ire_bucket; 2278 ASSERT(firb); 2279 2280 /* Retrieve the first IRE_CACHE ire for that destination. */ 2281 first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID, tsl, ipst); 2282 2283 /* No resolved route. */ 2284 if (first_cire == NULL) { 2285 ire_refrele(first_fire); 2286 return (B_TRUE); 2287 } 2288 2289 /* At least one route is resolved. */ 2290 2291 cirb = first_cire->ire_bucket; 2292 ASSERT(cirb); 2293 2294 /* Count the number of routes to that dest that are declared. */ 2295 IRB_REFHOLD(firb); 2296 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2297 if (!(fire->ire_flags & RTF_MULTIRT)) 2298 continue; 2299 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, v6dstp)) 2300 continue; 2301 unres_cnt++; 2302 } 2303 IRB_REFRELE(firb); 2304 2305 2306 /* Then subtract the number of routes to that dst that are resolved */ 2307 IRB_REFHOLD(cirb); 2308 for (cire = first_cire; cire != NULL; cire = cire->ire_next) { 2309 if (!(cire->ire_flags & RTF_MULTIRT)) 2310 continue; 2311 if (!IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, v6dstp)) 2312 continue; 2313 if (cire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 2314 continue; 2315 unres_cnt--; 2316 } 2317 IRB_REFRELE(cirb); 2318 2319 /* At least one route is unresolved; search for a resolvable route. */ 2320 if (unres_cnt > 0) 2321 resolvable = ire_multirt_lookup_v6(&first_cire, &first_fire, 2322 MULTIRT_USESTAMP|MULTIRT_CACHEGW, tsl, ipst); 2323 2324 if (first_fire) 2325 ire_refrele(first_fire); 2326 2327 if (first_cire) 2328 ire_refrele(first_cire); 2329 2330 return (resolvable); 2331 } 2332 2333 2334 /* 2335 * Return B_TRUE and update *ire_arg and *fire_arg 2336 * if at least one resolvable route is found. 2337 * Return B_FALSE otherwise (all routes are resolved or 2338 * the remaining unresolved routes are all unresolvable). 2339 * This only works in the global zone. 2340 */ 2341 boolean_t 2342 ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, 2343 const ts_label_t *tsl, ip_stack_t *ipst) 2344 { 2345 clock_t delta; 2346 ire_t *best_fire = NULL; 2347 ire_t *best_cire = NULL; 2348 ire_t *first_fire; 2349 ire_t *first_cire; 2350 ire_t *fire; 2351 ire_t *cire; 2352 irb_t *firb = NULL; 2353 irb_t *cirb = NULL; 2354 ire_t *gw_ire; 2355 boolean_t already_resolved; 2356 boolean_t res; 2357 in6_addr_t v6dst; 2358 in6_addr_t v6gw; 2359 2360 ip2dbg(("ire_multirt_lookup_v6: *ire_arg %p, *fire_arg %p, " 2361 "flags %04x\n", (void *)*ire_arg, (void *)*fire_arg, flags)); 2362 2363 ASSERT(ire_arg); 2364 ASSERT(fire_arg); 2365 2366 /* Not an IRE_HOST ire; give up. */ 2367 if ((*fire_arg == NULL) || 2368 ((*fire_arg)->ire_type != IRE_HOST)) { 2369 return (B_FALSE); 2370 } 2371 2372 /* This is the first IRE_HOST ire for that destination. */ 2373 first_fire = *fire_arg; 2374 firb = first_fire->ire_bucket; 2375 ASSERT(firb); 2376 2377 mutex_enter(&first_fire->ire_lock); 2378 v6dst = first_fire->ire_addr_v6; 2379 mutex_exit(&first_fire->ire_lock); 2380 2381 ip2dbg(("ire_multirt_lookup_v6: dst %08x\n", 2382 ntohl(V4_PART_OF_V6(v6dst)))); 2383 2384 /* 2385 * Retrieve the first IRE_CACHE ire for that destination; 2386 * if we don't find one, no route for that dest is 2387 * resolved yet. 2388 */ 2389 first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID, tsl, ipst); 2390 if (first_cire) { 2391 cirb = first_cire->ire_bucket; 2392 } 2393 2394 ip2dbg(("ire_multirt_lookup_v6: first_cire %p\n", (void *)first_cire)); 2395 2396 /* 2397 * Search for a resolvable route, giving the top priority 2398 * to routes that can be resolved without any call to the resolver. 2399 */ 2400 IRB_REFHOLD(firb); 2401 2402 if (!IN6_IS_ADDR_MULTICAST(&v6dst)) { 2403 /* 2404 * For all multiroute IRE_HOST ires for that destination, 2405 * check if the route via the IRE_HOST's gateway is 2406 * resolved yet. 2407 */ 2408 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2409 2410 if (!(fire->ire_flags & RTF_MULTIRT)) 2411 continue; 2412 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2413 continue; 2414 2415 if (fire->ire_gw_secattr != NULL && 2416 tsol_ire_match_gwattr(fire, tsl) != 0) { 2417 continue; 2418 } 2419 2420 mutex_enter(&fire->ire_lock); 2421 v6gw = fire->ire_gateway_addr_v6; 2422 mutex_exit(&fire->ire_lock); 2423 2424 ip2dbg(("ire_multirt_lookup_v6: fire %p, " 2425 "ire_addr %08x, ire_gateway_addr %08x\n", 2426 (void *)fire, 2427 ntohl(V4_PART_OF_V6(fire->ire_addr_v6)), 2428 ntohl(V4_PART_OF_V6(v6gw)))); 2429 2430 already_resolved = B_FALSE; 2431 2432 if (first_cire) { 2433 ASSERT(cirb); 2434 2435 IRB_REFHOLD(cirb); 2436 /* 2437 * For all IRE_CACHE ires for that 2438 * destination. 2439 */ 2440 for (cire = first_cire; 2441 cire != NULL; 2442 cire = cire->ire_next) { 2443 2444 if (!(cire->ire_flags & RTF_MULTIRT)) 2445 continue; 2446 if (!IN6_ARE_ADDR_EQUAL( 2447 &cire->ire_addr_v6, &v6dst)) 2448 continue; 2449 if (cire->ire_marks & 2450 (IRE_MARK_CONDEMNED| 2451 IRE_MARK_HIDDEN)) 2452 continue; 2453 2454 if (cire->ire_gw_secattr != NULL && 2455 tsol_ire_match_gwattr(cire, 2456 tsl) != 0) { 2457 continue; 2458 } 2459 2460 /* 2461 * Check if the IRE_CACHE's gateway 2462 * matches the IRE_HOST's gateway. 2463 */ 2464 if (IN6_ARE_ADDR_EQUAL( 2465 &cire->ire_gateway_addr_v6, 2466 &v6gw)) { 2467 already_resolved = B_TRUE; 2468 break; 2469 } 2470 } 2471 IRB_REFRELE(cirb); 2472 } 2473 2474 /* 2475 * This route is already resolved; 2476 * proceed with next one. 2477 */ 2478 if (already_resolved) { 2479 ip2dbg(("ire_multirt_lookup_v6: found cire %p, " 2480 "already resolved\n", (void *)cire)); 2481 continue; 2482 } 2483 2484 /* 2485 * The route is unresolved; is it actually 2486 * resolvable, i.e. is there a cache or a resolver 2487 * for the gateway? 2488 */ 2489 gw_ire = ire_route_lookup_v6(&v6gw, 0, 0, 0, NULL, NULL, 2490 ALL_ZONES, tsl, MATCH_IRE_RECURSIVE | 2491 MATCH_IRE_SECATTR, ipst); 2492 2493 ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n", 2494 (void *)gw_ire)); 2495 2496 /* 2497 * This route can be resolved without any call to the 2498 * resolver; if the MULTIRT_CACHEGW flag is set, 2499 * give the top priority to this ire and exit the 2500 * loop. 2501 * This occurs when an resolver reply is processed 2502 * through ip_wput_nondata() 2503 */ 2504 if ((flags & MULTIRT_CACHEGW) && 2505 (gw_ire != NULL) && 2506 (gw_ire->ire_type & IRE_CACHETABLE)) { 2507 /* 2508 * Release the resolver associated to the 2509 * previous candidate best ire, if any. 2510 */ 2511 if (best_cire) { 2512 ire_refrele(best_cire); 2513 ASSERT(best_fire); 2514 } 2515 2516 best_fire = fire; 2517 best_cire = gw_ire; 2518 2519 ip2dbg(("ire_multirt_lookup_v6: found top prio " 2520 "best_fire %p, best_cire %p\n", 2521 (void *)best_fire, (void *)best_cire)); 2522 break; 2523 } 2524 2525 /* 2526 * Compute the time elapsed since our preceding 2527 * attempt to resolve that route. 2528 * If the MULTIRT_USESTAMP flag is set, we take that 2529 * route into account only if this time interval 2530 * exceeds ip_multirt_resolution_interval; 2531 * this prevents us from attempting to resolve a 2532 * broken route upon each sending of a packet. 2533 */ 2534 delta = lbolt - fire->ire_last_used_time; 2535 delta = TICK_TO_MSEC(delta); 2536 2537 res = (boolean_t) 2538 ((delta > ipst-> 2539 ips_ip_multirt_resolution_interval) || 2540 (!(flags & MULTIRT_USESTAMP))); 2541 2542 ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, " 2543 "res %d\n", 2544 (void *)fire, delta, res)); 2545 2546 if (res) { 2547 /* 2548 * A resolver exists for the gateway: save 2549 * the current IRE_HOST ire as a candidate 2550 * best ire. If we later discover that a 2551 * top priority ire exists (i.e. no need to 2552 * call the resolver), then this new ire 2553 * will be preferred to the current one. 2554 */ 2555 if (gw_ire != NULL) { 2556 if (best_fire == NULL) { 2557 ASSERT(best_cire == NULL); 2558 2559 best_fire = fire; 2560 best_cire = gw_ire; 2561 2562 ip2dbg(("ire_multirt_lookup_v6:" 2563 "found candidate " 2564 "best_fire %p, " 2565 "best_cire %p\n", 2566 (void *)best_fire, 2567 (void *)best_cire)); 2568 2569 /* 2570 * If MULTIRT_CACHEGW is not 2571 * set, we ignore the top 2572 * priority ires that can 2573 * be resolved without any 2574 * call to the resolver; 2575 * In that case, there is 2576 * actually no need 2577 * to continue the loop. 2578 */ 2579 if (!(flags & 2580 MULTIRT_CACHEGW)) { 2581 break; 2582 } 2583 continue; 2584 } 2585 } else { 2586 /* 2587 * No resolver for the gateway: the 2588 * route is not resolvable. 2589 * If the MULTIRT_SETSTAMP flag is 2590 * set, we stamp the IRE_HOST ire, 2591 * so we will not select it again 2592 * during this resolution interval. 2593 */ 2594 if (flags & MULTIRT_SETSTAMP) 2595 fire->ire_last_used_time = 2596 lbolt; 2597 } 2598 } 2599 2600 if (gw_ire != NULL) 2601 ire_refrele(gw_ire); 2602 } 2603 } else { /* IN6_IS_ADDR_MULTICAST(&v6dst) */ 2604 2605 for (fire = first_fire; 2606 fire != NULL; 2607 fire = fire->ire_next) { 2608 2609 if (!(fire->ire_flags & RTF_MULTIRT)) 2610 continue; 2611 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2612 continue; 2613 2614 if (fire->ire_gw_secattr != NULL && 2615 tsol_ire_match_gwattr(fire, tsl) != 0) { 2616 continue; 2617 } 2618 2619 already_resolved = B_FALSE; 2620 2621 mutex_enter(&fire->ire_lock); 2622 v6gw = fire->ire_gateway_addr_v6; 2623 mutex_exit(&fire->ire_lock); 2624 2625 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2626 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, tsl, 2627 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE | 2628 MATCH_IRE_SECATTR, ipst); 2629 2630 /* No resolver for the gateway; we skip this ire. */ 2631 if (gw_ire == NULL) { 2632 continue; 2633 } 2634 2635 if (first_cire) { 2636 2637 IRB_REFHOLD(cirb); 2638 /* 2639 * For all IRE_CACHE ires for that 2640 * destination. 2641 */ 2642 for (cire = first_cire; 2643 cire != NULL; 2644 cire = cire->ire_next) { 2645 2646 if (!(cire->ire_flags & RTF_MULTIRT)) 2647 continue; 2648 if (!IN6_ARE_ADDR_EQUAL( 2649 &cire->ire_addr_v6, &v6dst)) 2650 continue; 2651 if (cire->ire_marks & 2652 (IRE_MARK_CONDEMNED| 2653 IRE_MARK_HIDDEN)) 2654 continue; 2655 2656 if (cire->ire_gw_secattr != NULL && 2657 tsol_ire_match_gwattr(cire, 2658 tsl) != 0) { 2659 continue; 2660 } 2661 2662 /* 2663 * Cache entries are linked to the 2664 * parent routes using the parent handle 2665 * (ire_phandle). If no cache entry has 2666 * the same handle as fire, fire is 2667 * still unresolved. 2668 */ 2669 ASSERT(cire->ire_phandle != 0); 2670 if (cire->ire_phandle == 2671 fire->ire_phandle) { 2672 already_resolved = B_TRUE; 2673 break; 2674 } 2675 } 2676 IRB_REFRELE(cirb); 2677 } 2678 2679 /* 2680 * This route is already resolved; proceed with 2681 * next one. 2682 */ 2683 if (already_resolved) { 2684 ire_refrele(gw_ire); 2685 continue; 2686 } 2687 2688 /* 2689 * Compute the time elapsed since our preceding 2690 * attempt to resolve that route. 2691 * If the MULTIRT_USESTAMP flag is set, we take 2692 * that route into account only if this time 2693 * interval exceeds ip_multirt_resolution_interval; 2694 * this prevents us from attempting to resolve a 2695 * broken route upon each sending of a packet. 2696 */ 2697 delta = lbolt - fire->ire_last_used_time; 2698 delta = TICK_TO_MSEC(delta); 2699 2700 res = (boolean_t) 2701 ((delta > ipst-> 2702 ips_ip_multirt_resolution_interval) || 2703 (!(flags & MULTIRT_USESTAMP))); 2704 2705 ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, " 2706 "flags %04x, res %d\n", 2707 (void *)fire, delta, flags, res)); 2708 2709 if (res) { 2710 if (best_cire) { 2711 /* 2712 * Release the resolver associated 2713 * to the preceding candidate best 2714 * ire, if any. 2715 */ 2716 ire_refrele(best_cire); 2717 ASSERT(best_fire); 2718 } 2719 best_fire = fire; 2720 best_cire = gw_ire; 2721 continue; 2722 } 2723 2724 ire_refrele(gw_ire); 2725 } 2726 } 2727 2728 if (best_fire) { 2729 IRE_REFHOLD(best_fire); 2730 } 2731 IRB_REFRELE(firb); 2732 2733 /* Release the first IRE_CACHE we initially looked up, if any. */ 2734 if (first_cire) 2735 ire_refrele(first_cire); 2736 2737 /* Found a resolvable route. */ 2738 if (best_fire) { 2739 ASSERT(best_cire); 2740 2741 if (*fire_arg) 2742 ire_refrele(*fire_arg); 2743 if (*ire_arg) 2744 ire_refrele(*ire_arg); 2745 2746 /* 2747 * Update the passed arguments with the 2748 * resolvable multirt route we found 2749 */ 2750 *fire_arg = best_fire; 2751 *ire_arg = best_cire; 2752 2753 ip2dbg(("ire_multirt_lookup_v6: returning B_TRUE, " 2754 "*fire_arg %p, *ire_arg %p\n", 2755 (void *)best_fire, (void *)best_cire)); 2756 2757 return (B_TRUE); 2758 } 2759 2760 ASSERT(best_cire == NULL); 2761 2762 ip2dbg(("ire_multirt_lookup_v6: returning B_FALSE, *fire_arg %p, " 2763 "*ire_arg %p\n", 2764 (void *)*fire_arg, (void *)*ire_arg)); 2765 2766 /* No resolvable route. */ 2767 return (B_FALSE); 2768 } 2769 2770 2771 /* 2772 * Find an IRE_OFFSUBNET IRE entry for the multicast address 'v6dstp' 2773 * that goes through 'ipif'. As a fallback, a route that goes through 2774 * ipif->ipif_ill can be returned. 2775 */ 2776 ire_t * 2777 ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp) 2778 { 2779 ire_t *ire; 2780 ire_t *save_ire = NULL; 2781 ire_t *gw_ire; 2782 irb_t *irb; 2783 in6_addr_t v6gw; 2784 int match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL; 2785 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2786 2787 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, NULL, ALL_ZONES, 0, 2788 NULL, MATCH_IRE_DEFAULT, ipst); 2789 2790 if (ire == NULL) 2791 return (NULL); 2792 2793 irb = ire->ire_bucket; 2794 ASSERT(irb); 2795 2796 IRB_REFHOLD(irb); 2797 ire_refrele(ire); 2798 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 2799 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) || 2800 (ipif->ipif_zoneid != ire->ire_zoneid && 2801 ire->ire_zoneid != ALL_ZONES)) { 2802 continue; 2803 } 2804 2805 switch (ire->ire_type) { 2806 case IRE_DEFAULT: 2807 case IRE_PREFIX: 2808 case IRE_HOST: 2809 mutex_enter(&ire->ire_lock); 2810 v6gw = ire->ire_gateway_addr_v6; 2811 mutex_exit(&ire->ire_lock); 2812 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2813 IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, 2814 NULL, match_flags, ipst); 2815 2816 if (gw_ire != NULL) { 2817 if (save_ire != NULL) { 2818 ire_refrele(save_ire); 2819 } 2820 IRE_REFHOLD(ire); 2821 if (gw_ire->ire_ipif == ipif) { 2822 ire_refrele(gw_ire); 2823 2824 IRB_REFRELE(irb); 2825 return (ire); 2826 } 2827 ire_refrele(gw_ire); 2828 save_ire = ire; 2829 } 2830 break; 2831 case IRE_IF_NORESOLVER: 2832 case IRE_IF_RESOLVER: 2833 if (ire->ire_ipif == ipif) { 2834 if (save_ire != NULL) { 2835 ire_refrele(save_ire); 2836 } 2837 IRE_REFHOLD(ire); 2838 2839 IRB_REFRELE(irb); 2840 return (ire); 2841 } 2842 break; 2843 } 2844 } 2845 IRB_REFRELE(irb); 2846 2847 return (save_ire); 2848 } 2849