1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 /* 30 * This file contains routines that manipulate Internet Routing Entries (IREs). 31 */ 32 #include <sys/types.h> 33 #include <sys/stream.h> 34 #include <sys/stropts.h> 35 #include <sys/ddi.h> 36 #include <sys/cmn_err.h> 37 38 #include <sys/systm.h> 39 #include <sys/param.h> 40 #include <sys/socket.h> 41 #include <net/if.h> 42 #include <net/route.h> 43 #include <netinet/in.h> 44 #include <net/if_dl.h> 45 #include <netinet/ip6.h> 46 #include <netinet/icmp6.h> 47 48 #include <inet/common.h> 49 #include <inet/mi.h> 50 #include <inet/ip.h> 51 #include <inet/ip6.h> 52 #include <inet/ip_ndp.h> 53 #include <inet/ip_if.h> 54 #include <inet/ip_ire.h> 55 #include <inet/ipclassifier.h> 56 #include <inet/nd.h> 57 #include <sys/kmem.h> 58 #include <sys/zone.h> 59 60 #include <sys/tsol/label.h> 61 #include <sys/tsol/tnet.h> 62 63 static ire_t ire_null; 64 65 static ire_t *ire_ihandle_lookup_onlink_v6(ire_t *cire); 66 static boolean_t ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, 67 const in6_addr_t *mask, const in6_addr_t *gateway, int type, 68 const ipif_t *ipif, zoneid_t zoneid, uint32_t ihandle, 69 const ts_label_t *tsl, int match_flags); 70 static ire_t *ire_init_v6(ire_t *, const in6_addr_t *, const in6_addr_t *, 71 const in6_addr_t *, const in6_addr_t *, uint_t *, queue_t *, queue_t *, 72 ushort_t, ipif_t *, const in6_addr_t *, uint32_t, uint32_t, uint_t, 73 const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *); 74 static ire_t *ip6_ctable_lookup_impl(ire_ctable_args_t *); 75 76 77 /* 78 * Initialize the ire that is specific to IPv6 part and call 79 * ire_init_common to finish it. 80 */ 81 static ire_t * 82 ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, const in6_addr_t *v6mask, 83 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 84 uint_t *max_fragp, queue_t *rfq, queue_t *stq, ushort_t type, 85 ipif_t *ipif, const in6_addr_t *v6cmask, uint32_t phandle, 86 uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, tsol_gc_t *gc, 87 tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 88 { 89 90 /* 91 * Reject IRE security attribute creation/initialization 92 * if system is not running in Trusted mode. 93 */ 94 if ((gc != NULL || gcgrp != NULL) && !is_system_labeled()) 95 return (NULL); 96 97 98 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_alloced); 99 ire->ire_addr_v6 = *v6addr; 100 101 if (v6src_addr != NULL) 102 ire->ire_src_addr_v6 = *v6src_addr; 103 if (v6mask != NULL) { 104 ire->ire_mask_v6 = *v6mask; 105 ire->ire_masklen = ip_mask_to_plen_v6(&ire->ire_mask_v6); 106 } 107 if (v6gateway != NULL) 108 ire->ire_gateway_addr_v6 = *v6gateway; 109 110 if (type == IRE_CACHE && v6cmask != NULL) 111 ire->ire_cmask_v6 = *v6cmask; 112 113 /* 114 * Multirouted packets need to have a fragment header added so that 115 * the receiver is able to discard duplicates according to their 116 * fragment identifier. 117 */ 118 if (type == IRE_CACHE && (flags & RTF_MULTIRT)) { 119 ire->ire_frag_flag = IPH_FRAG_HDR; 120 } 121 122 /* ire_init_common will free the mblks upon encountering any failure */ 123 if (!ire_init_common(ire, max_fragp, NULL, rfq, stq, type, ipif, 124 phandle, ihandle, flags, IPV6_VERSION, ulp_info, gc, gcgrp, ipst)) 125 return (NULL); 126 127 return (ire); 128 } 129 130 /* 131 * Similar to ire_create_v6 except that it is called only when 132 * we want to allocate ire as an mblk e.g. we have a external 133 * resolver. Do we need this in IPv6 ? 134 * 135 * IPv6 initializes the ire_nce in ire_add_v6, which expects to 136 * find the ire_nce to be null when it is called. So, although 137 * we have a src_nce parameter (in the interest of matching up with 138 * the argument list of the v4 version), we ignore the src_nce 139 * argument here. 140 */ 141 /* ARGSUSED */ 142 ire_t * 143 ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 144 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 145 nce_t *src_nce, queue_t *rfq, queue_t *stq, ushort_t type, 146 ipif_t *ipif, const in6_addr_t *v6cmask, 147 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 148 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 149 { 150 ire_t *ire; 151 ire_t *ret_ire; 152 mblk_t *mp; 153 154 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 155 156 /* Allocate the new IRE. */ 157 mp = allocb(sizeof (ire_t), BPRI_MED); 158 if (mp == NULL) { 159 ip1dbg(("ire_create_mp_v6: alloc failed\n")); 160 return (NULL); 161 } 162 163 ire = (ire_t *)mp->b_rptr; 164 mp->b_wptr = (uchar_t *)&ire[1]; 165 166 /* Start clean. */ 167 *ire = ire_null; 168 ire->ire_mp = mp; 169 mp->b_datap->db_type = IRE_DB_TYPE; 170 171 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 172 NULL, rfq, stq, type, ipif, v6cmask, phandle, 173 ihandle, flags, ulp_info, gc, gcgrp, ipst); 174 175 if (ret_ire == NULL) { 176 freeb(ire->ire_mp); 177 return (NULL); 178 } 179 return (ire); 180 } 181 182 /* 183 * ire_create_v6 is called to allocate and initialize a new IRE. 184 * 185 * NOTE : This is called as writer sometimes though not required 186 * by this function. 187 * 188 * See comments above ire_create_mp_v6() for the rationale behind the 189 * unused src_nce argument. 190 */ 191 /* ARGSUSED */ 192 ire_t * 193 ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 194 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 195 uint_t *max_fragp, nce_t *src_nce, queue_t *rfq, queue_t *stq, 196 ushort_t type, ipif_t *ipif, const in6_addr_t *v6cmask, 197 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 198 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 199 { 200 ire_t *ire; 201 ire_t *ret_ire; 202 203 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 204 205 ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP); 206 if (ire == NULL) { 207 ip1dbg(("ire_create_v6: alloc failed\n")); 208 return (NULL); 209 } 210 *ire = ire_null; 211 212 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 213 max_fragp, rfq, stq, type, ipif, v6cmask, phandle, 214 ihandle, flags, ulp_info, gc, gcgrp, ipst); 215 216 if (ret_ire == NULL) { 217 kmem_cache_free(ire_cache, ire); 218 return (NULL); 219 } 220 ASSERT(ret_ire == ire); 221 return (ire); 222 } 223 224 /* 225 * Find an IRE_INTERFACE for the multicast group. 226 * Allows different routes for multicast addresses 227 * in the unicast routing table (akin to FF::0/8 but could be more specific) 228 * which point at different interfaces. This is used when IPV6_MULTICAST_IF 229 * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't 230 * specify the interface to join on. 231 * 232 * Supports link-local addresses by following the ipif/ill when recursing. 233 */ 234 ire_t * 235 ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst) 236 { 237 ire_t *ire; 238 ipif_t *ipif = NULL; 239 int match_flags = MATCH_IRE_TYPE; 240 in6_addr_t gw_addr_v6; 241 242 ire = ire_ftable_lookup_v6(group, 0, 0, 0, NULL, NULL, 243 zoneid, 0, NULL, MATCH_IRE_DEFAULT, ipst); 244 245 /* We search a resolvable ire in case of multirouting. */ 246 if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) { 247 ire_t *cire = NULL; 248 /* 249 * If the route is not resolvable, the looked up ire 250 * may be changed here. In that case, ire_multirt_lookup() 251 * IRE_REFRELE the original ire and change it. 252 */ 253 (void) ire_multirt_lookup_v6(&cire, &ire, MULTIRT_CACHEGW, 254 NULL, ipst); 255 if (cire != NULL) 256 ire_refrele(cire); 257 } 258 if (ire == NULL) 259 return (NULL); 260 /* 261 * Make sure we follow ire_ipif. 262 * 263 * We need to determine the interface route through 264 * which the gateway will be reached. We don't really 265 * care which interface is picked if the interface is 266 * part of a group. 267 */ 268 if (ire->ire_ipif != NULL) { 269 ipif = ire->ire_ipif; 270 match_flags |= MATCH_IRE_ILL_GROUP; 271 } 272 273 switch (ire->ire_type) { 274 case IRE_DEFAULT: 275 case IRE_PREFIX: 276 case IRE_HOST: 277 mutex_enter(&ire->ire_lock); 278 gw_addr_v6 = ire->ire_gateway_addr_v6; 279 mutex_exit(&ire->ire_lock); 280 ire_refrele(ire); 281 ire = ire_ftable_lookup_v6(&gw_addr_v6, 0, 0, 282 IRE_INTERFACE, ipif, NULL, zoneid, 0, 283 NULL, match_flags, ipst); 284 return (ire); 285 case IRE_IF_NORESOLVER: 286 case IRE_IF_RESOLVER: 287 return (ire); 288 default: 289 ire_refrele(ire); 290 return (NULL); 291 } 292 } 293 294 /* 295 * Return any local address. We use this to target ourselves 296 * when the src address was specified as 'default'. 297 * Preference for IRE_LOCAL entries. 298 */ 299 ire_t * 300 ire_lookup_local_v6(zoneid_t zoneid, ip_stack_t *ipst) 301 { 302 ire_t *ire; 303 irb_t *irb; 304 ire_t *maybe = NULL; 305 int i; 306 307 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 308 irb = &ipst->ips_ip_cache_table_v6[i]; 309 if (irb->irb_ire == NULL) 310 continue; 311 rw_enter(&irb->irb_lock, RW_READER); 312 for (ire = irb->irb_ire; ire; ire = ire->ire_next) { 313 if ((ire->ire_marks & IRE_MARK_CONDEMNED) || 314 ire->ire_zoneid != zoneid && 315 ire->ire_zoneid != ALL_ZONES) 316 continue; 317 switch (ire->ire_type) { 318 case IRE_LOOPBACK: 319 if (maybe == NULL) { 320 IRE_REFHOLD(ire); 321 maybe = ire; 322 } 323 break; 324 case IRE_LOCAL: 325 if (maybe != NULL) { 326 ire_refrele(maybe); 327 } 328 IRE_REFHOLD(ire); 329 rw_exit(&irb->irb_lock); 330 return (ire); 331 } 332 } 333 rw_exit(&irb->irb_lock); 334 } 335 return (maybe); 336 } 337 338 /* 339 * This function takes a mask and returns number of bits set in the 340 * mask (the represented prefix length). Assumes a contiguous mask. 341 */ 342 int 343 ip_mask_to_plen_v6(const in6_addr_t *v6mask) 344 { 345 int bits; 346 int plen = IPV6_ABITS; 347 int i; 348 349 for (i = 3; i >= 0; i--) { 350 if (v6mask->s6_addr32[i] == 0) { 351 plen -= 32; 352 continue; 353 } 354 bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1; 355 if (bits == 0) 356 break; 357 plen -= bits; 358 } 359 360 return (plen); 361 } 362 363 /* 364 * Convert a prefix length to the mask for that prefix. 365 * Returns the argument bitmask. 366 */ 367 in6_addr_t * 368 ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask) 369 { 370 uint32_t *ptr; 371 372 if (plen < 0 || plen > IPV6_ABITS) 373 return (NULL); 374 *bitmask = ipv6_all_zeros; 375 376 ptr = (uint32_t *)bitmask; 377 while (plen > 32) { 378 *ptr++ = 0xffffffffU; 379 plen -= 32; 380 } 381 *ptr = htonl(0xffffffffU << (32 - plen)); 382 return (bitmask); 383 } 384 385 /* 386 * Add a fully initialized IRE to an appropriate 387 * table based on ire_type. 388 * 389 * The forward table contains IRE_PREFIX/IRE_HOST/IRE_HOST and 390 * IRE_IF_RESOLVER/IRE_IF_NORESOLVER and IRE_DEFAULT. 391 * 392 * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK 393 * and IRE_CACHE. 394 * 395 * NOTE : This function is called as writer though not required 396 * by this function. 397 */ 398 int 399 ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) 400 { 401 ire_t *ire1; 402 int mask_table_index; 403 irb_t *irb_ptr; 404 ire_t **irep; 405 int flags; 406 ire_t *pire = NULL; 407 ill_t *stq_ill; 408 boolean_t ndp_g_lock_held = B_FALSE; 409 ire_t *ire = *ire_p; 410 int error; 411 ip_stack_t *ipst = ire->ire_ipst; 412 413 ASSERT(ire->ire_ipversion == IPV6_VERSION); 414 ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */ 415 ASSERT(ire->ire_nce == NULL); 416 417 /* Find the appropriate list head. */ 418 switch (ire->ire_type) { 419 case IRE_HOST: 420 ire->ire_mask_v6 = ipv6_all_ones; 421 ire->ire_masklen = IPV6_ABITS; 422 if ((ire->ire_flags & RTF_SETSRC) == 0) 423 ire->ire_src_addr_v6 = ipv6_all_zeros; 424 break; 425 case IRE_CACHE: 426 case IRE_LOCAL: 427 case IRE_LOOPBACK: 428 ire->ire_mask_v6 = ipv6_all_ones; 429 ire->ire_masklen = IPV6_ABITS; 430 break; 431 case IRE_PREFIX: 432 if ((ire->ire_flags & RTF_SETSRC) == 0) 433 ire->ire_src_addr_v6 = ipv6_all_zeros; 434 break; 435 case IRE_DEFAULT: 436 if ((ire->ire_flags & RTF_SETSRC) == 0) 437 ire->ire_src_addr_v6 = ipv6_all_zeros; 438 break; 439 case IRE_IF_RESOLVER: 440 case IRE_IF_NORESOLVER: 441 break; 442 default: 443 printf("ire_add_v6: ire %p has unrecognized IRE type (%d)\n", 444 (void *)ire, ire->ire_type); 445 ire_delete(ire); 446 *ire_p = NULL; 447 return (EINVAL); 448 } 449 450 /* Make sure the address is properly masked. */ 451 V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6); 452 453 if ((ire->ire_type & IRE_CACHETABLE) == 0) { 454 /* IRE goes into Forward Table */ 455 mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6); 456 if ((ipst->ips_ip_forwarding_table_v6[mask_table_index]) == 457 NULL) { 458 irb_t *ptr; 459 int i; 460 461 ptr = (irb_t *)mi_zalloc(( 462 ipst->ips_ip6_ftable_hash_size * sizeof (irb_t))); 463 if (ptr == NULL) { 464 ire_delete(ire); 465 *ire_p = NULL; 466 return (ENOMEM); 467 } 468 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) { 469 rw_init(&ptr[i].irb_lock, NULL, 470 RW_DEFAULT, NULL); 471 } 472 mutex_enter(&ipst->ips_ire_ft_init_lock); 473 if (ipst->ips_ip_forwarding_table_v6[ 474 mask_table_index] == NULL) { 475 ipst->ips_ip_forwarding_table_v6[ 476 mask_table_index] = ptr; 477 mutex_exit(&ipst->ips_ire_ft_init_lock); 478 } else { 479 /* 480 * Some other thread won the race in 481 * initializing the forwarding table at the 482 * same index. 483 */ 484 mutex_exit(&ipst->ips_ire_ft_init_lock); 485 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; 486 i++) { 487 rw_destroy(&ptr[i].irb_lock); 488 } 489 mi_free(ptr); 490 } 491 } 492 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[mask_table_index][ 493 IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6, 494 ipst->ips_ip6_ftable_hash_size)]); 495 } else { 496 irb_ptr = &(ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6( 497 ire->ire_addr_v6, ipst->ips_ip6_cache_table_size)]); 498 } 499 /* 500 * For xresolv interfaces (v6 interfaces with an external 501 * address resolver), ip_newroute_v6/ip_newroute_ipif_v6 502 * are unable to prevent the deletion of the interface route 503 * while adding an IRE_CACHE for an on-link destination 504 * in the IRE_IF_RESOLVER case, since the ire has to go to 505 * the external resolver and return. We can't do a REFHOLD on the 506 * associated interface ire for fear of the message being freed 507 * if the external resolver can't resolve the address. 508 * Here we look up the interface ire in the forwarding table 509 * and make sure that the interface route has not been deleted. 510 */ 511 if (ire->ire_type == IRE_CACHE && 512 IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6) && 513 (((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) && 514 (((ill_t *)ire->ire_stq->q_ptr)->ill_flags & ILLF_XRESOLV)) { 515 516 pire = ire_ihandle_lookup_onlink_v6(ire); 517 if (pire == NULL) { 518 ire_delete(ire); 519 *ire_p = NULL; 520 return (EINVAL); 521 } 522 /* Prevent pire from getting deleted */ 523 IRB_REFHOLD(pire->ire_bucket); 524 /* Has it been removed already? */ 525 if (pire->ire_marks & IRE_MARK_CONDEMNED) { 526 IRB_REFRELE(pire->ire_bucket); 527 ire_refrele(pire); 528 ire_delete(ire); 529 *ire_p = NULL; 530 return (EINVAL); 531 } 532 } 533 534 flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW); 535 /* 536 * For IRE_CACHES, MATCH_IRE_IPIF is not enough to check 537 * for duplicates because : 538 * 539 * 1) ire_ipif->ipif_ill and ire_stq->q_ptr could be 540 * pointing at different ills. A real duplicate is 541 * a match on both ire_ipif and ire_stq. 542 * 543 * 2) We could have multiple packets trying to create 544 * an IRE_CACHE for the same ill. 545 * 546 * Moreover, IPIF_NOFAILOVER and IPV6_BOUND_PIF endpoints wants 547 * to go out on a particular ill. Rather than looking at the 548 * packet, we depend on the above for MATCH_IRE_ILL here. 549 * 550 * Unlike IPv4, MATCH_IRE_IPIF is needed here as we could have 551 * multiple IRE_CACHES for an ill for the same destination 552 * with various scoped addresses i.e represented by ipifs. 553 * 554 * MATCH_IRE_ILL is done implicitly below for IRE_CACHES. 555 */ 556 if (ire->ire_ipif != NULL) 557 flags |= MATCH_IRE_IPIF; 558 /* 559 * If we are creating hidden ires, make sure we search on 560 * this ill (MATCH_IRE_ILL) and a hidden ire, while we are 561 * searching for duplicates below. Otherwise we could 562 * potentially find an IRE on some other interface 563 * and it may not be a IRE marked with IRE_MARK_HIDDEN. We 564 * shouldn't do this as this will lead to an infinite loop as 565 * eventually we need an hidden ire for this packet to go 566 * out. MATCH_IRE_ILL is already marked above. 567 */ 568 if (ire->ire_marks & IRE_MARK_HIDDEN) { 569 ASSERT(ire->ire_type == IRE_CACHE); 570 flags |= MATCH_IRE_MARK_HIDDEN; 571 } 572 573 /* 574 * Start the atomic add of the ire. Grab the ill locks, 575 * ill_g_usesrc_lock and the bucket lock. Check for condemned. 576 * To avoid lock order problems, get the ndp6.ndp_g_lock now itself. 577 */ 578 if (ire->ire_type == IRE_CACHE) { 579 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 580 ndp_g_lock_held = B_TRUE; 581 } 582 583 /* 584 * If ipif or ill is changing ire_atomic_start() may queue the 585 * request and return EINPROGRESS. 586 */ 587 588 error = ire_atomic_start(irb_ptr, ire, q, mp, func); 589 if (error != 0) { 590 if (ndp_g_lock_held) 591 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 592 /* 593 * We don't know whether it is a valid ipif or not. 594 * So, set it to NULL. This assumes that the ire has not added 595 * a reference to the ipif. 596 */ 597 ire->ire_ipif = NULL; 598 ire_delete(ire); 599 if (pire != NULL) { 600 IRB_REFRELE(pire->ire_bucket); 601 ire_refrele(pire); 602 } 603 *ire_p = NULL; 604 return (error); 605 } 606 /* 607 * To avoid creating ires having stale values for the ire_max_frag 608 * we get the latest value atomically here. For more details 609 * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE 610 * in ip_rput_dlpi_writer 611 */ 612 if (ire->ire_max_fragp == NULL) { 613 if (IN6_IS_ADDR_MULTICAST(&ire->ire_addr_v6)) 614 ire->ire_max_frag = ire->ire_ipif->ipif_mtu; 615 else 616 ire->ire_max_frag = pire->ire_max_frag; 617 } else { 618 uint_t max_frag; 619 620 max_frag = *ire->ire_max_fragp; 621 ire->ire_max_fragp = NULL; 622 ire->ire_max_frag = max_frag; 623 } 624 625 /* 626 * Atomically check for duplicate and insert in the table. 627 */ 628 for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 629 if (ire1->ire_marks & IRE_MARK_CONDEMNED) 630 continue; 631 632 if (ire->ire_type == IRE_CACHE) { 633 /* 634 * We do MATCH_IRE_ILL implicitly here for IRE_CACHES. 635 * As ire_ipif and ire_stq could point to two 636 * different ills, we can't pass just ire_ipif to 637 * ire_match_args and get a match on both ills. 638 * This is just needed for duplicate checks here and 639 * so we don't add an extra argument to 640 * ire_match_args for this. Do it locally. 641 * 642 * NOTE : Currently there is no part of the code 643 * that asks for both MATH_IRE_IPIF and MATCH_IRE_ILL 644 * match for IRE_CACHEs. Thus we don't want to 645 * extend the arguments to ire_match_args_v6. 646 */ 647 if (ire1->ire_stq != ire->ire_stq) 648 continue; 649 /* 650 * Multiroute IRE_CACHEs for a given destination can 651 * have the same ire_ipif, typically if their source 652 * address is forced using RTF_SETSRC, and the same 653 * send-to queue. We differentiate them using the parent 654 * handle. 655 */ 656 if ((ire1->ire_flags & RTF_MULTIRT) && 657 (ire->ire_flags & RTF_MULTIRT) && 658 (ire1->ire_phandle != ire->ire_phandle)) 659 continue; 660 } 661 if (ire1->ire_zoneid != ire->ire_zoneid) 662 continue; 663 if (ire_match_args_v6(ire1, &ire->ire_addr_v6, 664 &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, 665 ire->ire_type, ire->ire_ipif, ire->ire_zoneid, 0, NULL, 666 flags)) { 667 /* 668 * Return the old ire after doing a REFHOLD. 669 * As most of the callers continue to use the IRE 670 * after adding, we return a held ire. This will 671 * avoid a lookup in the caller again. If the callers 672 * don't want to use it, they need to do a REFRELE. 673 */ 674 ip1dbg(("found dup ire existing %p new %p", 675 (void *)ire1, (void *)ire)); 676 IRE_REFHOLD(ire1); 677 if (ndp_g_lock_held) 678 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 679 ire_atomic_end(irb_ptr, ire); 680 ire_delete(ire); 681 if (pire != NULL) { 682 /* 683 * Assert that it is 684 * not yet removed from the list. 685 */ 686 ASSERT(pire->ire_ptpn != NULL); 687 IRB_REFRELE(pire->ire_bucket); 688 ire_refrele(pire); 689 } 690 *ire_p = ire1; 691 return (0); 692 } 693 } 694 if (ire->ire_type == IRE_CACHE) { 695 in6_addr_t gw_addr_v6; 696 ill_t *ill = ire_to_ill(ire); 697 char buf[INET6_ADDRSTRLEN]; 698 nce_t *nce; 699 700 /* 701 * All IRE_CACHE types must have a nce. If this is 702 * not the case the entry will not be added. We need 703 * to make sure that if somebody deletes the nce 704 * after we looked up, they will find this ire and 705 * delete the ire. To delete this ire one needs the 706 * bucket lock which we are still holding here. So, 707 * even if the nce gets deleted after we looked up, 708 * this ire will get deleted. 709 * 710 * NOTE : Don't need the ire_lock for accessing 711 * ire_gateway_addr_v6 as it is appearing first 712 * time on the list and rts_setgwr_v6 could not 713 * be changing this. 714 */ 715 gw_addr_v6 = ire->ire_gateway_addr_v6; 716 if (IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 717 nce = ndp_lookup_v6(ill, &ire->ire_addr_v6, B_TRUE); 718 } else { 719 nce = ndp_lookup_v6(ill, &gw_addr_v6, B_TRUE); 720 } 721 if (nce == NULL) 722 goto failed; 723 724 /* Pair of refhold, refrele just to get the tracing right */ 725 NCE_REFHOLD_TO_REFHOLD_NOTR(nce); 726 /* 727 * Atomically make sure that new IREs don't point 728 * to an NCE that is logically deleted (CONDEMNED). 729 * ndp_delete() first marks the NCE CONDEMNED. 730 * This ensures that the nce_refcnt won't increase 731 * due to new nce_lookups or due to addition of new IREs 732 * pointing to this NCE. Then ndp_delete() cleans up 733 * existing references. If we don't do it atomically here, 734 * ndp_delete() -> nce_ire_delete() will not be able to 735 * clean up the IRE list completely, and the nce_refcnt 736 * won't go down to zero. 737 */ 738 mutex_enter(&nce->nce_lock); 739 if (ill->ill_flags & ILLF_XRESOLV) { 740 /* 741 * If we used an external resolver, we may not 742 * have gone through neighbor discovery to get here. 743 * Must update the nce_state before the next check. 744 */ 745 if (nce->nce_state == ND_INCOMPLETE) 746 nce->nce_state = ND_REACHABLE; 747 } 748 if (nce->nce_state == ND_INCOMPLETE || 749 (nce->nce_flags & NCE_F_CONDEMNED) || 750 (nce->nce_state == ND_UNREACHABLE)) { 751 failed: 752 if (ndp_g_lock_held) 753 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 754 if (nce != NULL) 755 mutex_exit(&nce->nce_lock); 756 ire_atomic_end(irb_ptr, ire); 757 ip1dbg(("ire_add_v6: No nce for dst %s \n", 758 inet_ntop(AF_INET6, &ire->ire_addr_v6, 759 buf, sizeof (buf)))); 760 ire_delete(ire); 761 if (pire != NULL) { 762 /* 763 * Assert that it is 764 * not yet removed from the list. 765 */ 766 ASSERT(pire->ire_ptpn != NULL); 767 IRB_REFRELE(pire->ire_bucket); 768 ire_refrele(pire); 769 } 770 if (nce != NULL) 771 NCE_REFRELE_NOTR(nce); 772 *ire_p = NULL; 773 return (EINVAL); 774 } else { 775 ire->ire_nce = nce; 776 } 777 mutex_exit(&nce->nce_lock); 778 } 779 /* 780 * Find the first entry that matches ire_addr - provides 781 * tail insertion. *irep will be null if no match. 782 */ 783 irep = (ire_t **)irb_ptr; 784 while ((ire1 = *irep) != NULL && 785 !IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6)) 786 irep = &ire1->ire_next; 787 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 788 789 if (*irep != NULL) { 790 /* 791 * Find the last ire which matches ire_addr_v6. 792 * Needed to do tail insertion among entries with the same 793 * ire_addr_v6. 794 */ 795 while (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 796 &ire1->ire_addr_v6)) { 797 irep = &ire1->ire_next; 798 ire1 = *irep; 799 if (ire1 == NULL) 800 break; 801 } 802 } 803 804 if (ire->ire_type == IRE_DEFAULT) { 805 /* 806 * We keep a count of default gateways which is used when 807 * assigning them as routes. 808 */ 809 ipst->ips_ipv6_ire_default_count++; 810 ASSERT(ipst->ips_ipv6_ire_default_count != 0); /* Wraparound */ 811 } 812 /* Insert at *irep */ 813 ire1 = *irep; 814 if (ire1 != NULL) 815 ire1->ire_ptpn = &ire->ire_next; 816 ire->ire_next = ire1; 817 /* Link the new one in. */ 818 ire->ire_ptpn = irep; 819 /* 820 * ire_walk routines de-reference ire_next without holding 821 * a lock. Before we point to the new ire, we want to make 822 * sure the store that sets the ire_next of the new ire 823 * reaches global visibility, so that ire_walk routines 824 * don't see a truncated list of ires i.e if the ire_next 825 * of the new ire gets set after we do "*irep = ire" due 826 * to re-ordering, the ire_walk thread will see a NULL 827 * once it accesses the ire_next of the new ire. 828 * membar_producer() makes sure that the following store 829 * happens *after* all of the above stores. 830 */ 831 membar_producer(); 832 *irep = ire; 833 ire->ire_bucket = irb_ptr; 834 /* 835 * We return a bumped up IRE above. Keep it symmetrical 836 * so that the callers will always have to release. This 837 * helps the callers of this function because they continue 838 * to use the IRE after adding and hence they don't have to 839 * lookup again after we return the IRE. 840 * 841 * NOTE : We don't have to use atomics as this is appearing 842 * in the list for the first time and no one else can bump 843 * up the reference count on this yet. 844 */ 845 IRE_REFHOLD_LOCKED(ire); 846 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_inserted); 847 irb_ptr->irb_ire_cnt++; 848 if (ire->ire_marks & IRE_MARK_TEMPORARY) 849 irb_ptr->irb_tmp_ire_cnt++; 850 851 if (ire->ire_ipif != NULL) { 852 DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ire->ire_ipif, 853 (char *), "ire", (void *), ire); 854 ire->ire_ipif->ipif_ire_cnt++; 855 if (ire->ire_stq != NULL) { 856 stq_ill = (ill_t *)ire->ire_stq->q_ptr; 857 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), stq_ill, 858 (char *), "ire", (void *), ire); 859 stq_ill->ill_ire_cnt++; 860 } 861 } else { 862 ASSERT(ire->ire_stq == NULL); 863 } 864 865 if (ndp_g_lock_held) 866 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 867 ire_atomic_end(irb_ptr, ire); 868 869 if (pire != NULL) { 870 /* Assert that it is not removed from the list yet */ 871 ASSERT(pire->ire_ptpn != NULL); 872 IRB_REFRELE(pire->ire_bucket); 873 ire_refrele(pire); 874 } 875 876 if (ire->ire_type != IRE_CACHE) { 877 /* 878 * For ire's with with host mask see if there is an entry 879 * in the cache. If there is one flush the whole cache as 880 * there might be multiple entries due to RTF_MULTIRT (CGTP). 881 * If no entry is found than there is no need to flush the 882 * cache. 883 */ 884 885 if (ip_mask_to_plen_v6(&ire->ire_mask_v6) == IPV6_ABITS) { 886 ire_t *lire; 887 lire = ire_ctable_lookup_v6(&ire->ire_addr_v6, NULL, 888 IRE_CACHE, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, 889 ipst); 890 if (lire != NULL) { 891 ire_refrele(lire); 892 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 893 } 894 } else { 895 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 896 } 897 } 898 899 *ire_p = ire; 900 return (0); 901 } 902 903 /* 904 * Search for all HOST REDIRECT routes that are 905 * pointing at the specified gateway and 906 * delete them. This routine is called only 907 * when a default gateway is going away. 908 */ 909 static void 910 ire_delete_host_redirects_v6(const in6_addr_t *gateway, ip_stack_t *ipst) 911 { 912 irb_t *irb_ptr; 913 irb_t *irb; 914 ire_t *ire; 915 in6_addr_t gw_addr_v6; 916 int i; 917 918 /* get the hash table for HOST routes */ 919 irb_ptr = ipst->ips_ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)]; 920 if (irb_ptr == NULL) 921 return; 922 for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) { 923 irb = &irb_ptr[i]; 924 IRB_REFHOLD(irb); 925 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 926 if (!(ire->ire_flags & RTF_DYNAMIC)) 927 continue; 928 mutex_enter(&ire->ire_lock); 929 gw_addr_v6 = ire->ire_gateway_addr_v6; 930 mutex_exit(&ire->ire_lock); 931 if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) 932 ire_delete(ire); 933 } 934 IRB_REFRELE(irb); 935 } 936 } 937 938 /* 939 * Delete all the cache entries with this 'addr'. This is the IPv6 counterpart 940 * of ip_ire_clookup_and_delete. The difference being this function does not 941 * return any value. IPv6 processing of a gratuitous ARP, as it stands, is 942 * different than IPv4 in that, regardless of the presence of a cache entry 943 * for this address, an ire_walk_v6 is done. Another difference is that unlike 944 * in the case of IPv4 this does not take an ipif_t argument, since it is only 945 * called by ip_arp_news and the match is always only on the address. 946 */ 947 void 948 ip_ire_clookup_and_delete_v6(const in6_addr_t *addr, ip_stack_t *ipst) 949 { 950 irb_t *irb; 951 ire_t *cire; 952 boolean_t found = B_FALSE; 953 954 irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 955 ipst->ips_ip6_cache_table_size)]; 956 IRB_REFHOLD(irb); 957 for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) { 958 if (cire->ire_marks & IRE_MARK_CONDEMNED) 959 continue; 960 if (IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, addr)) { 961 962 /* This signifies start of a match */ 963 if (!found) 964 found = B_TRUE; 965 if (cire->ire_type == IRE_CACHE) { 966 if (cire->ire_nce != NULL) 967 ndp_delete(cire->ire_nce); 968 ire_delete_v6(cire); 969 } 970 /* End of the match */ 971 } else if (found) 972 break; 973 } 974 IRB_REFRELE(irb); 975 } 976 977 /* 978 * Delete the specified IRE. 979 * All calls should use ire_delete(). 980 * Sometimes called as writer though not required by this function. 981 * 982 * NOTE : This function is called only if the ire was added 983 * in the list. 984 */ 985 void 986 ire_delete_v6(ire_t *ire) 987 { 988 in6_addr_t gw_addr_v6; 989 ip_stack_t *ipst = ire->ire_ipst; 990 991 ASSERT(ire->ire_refcnt >= 1); 992 ASSERT(ire->ire_ipversion == IPV6_VERSION); 993 994 if (ire->ire_type != IRE_CACHE) 995 ire_flush_cache_v6(ire, IRE_FLUSH_DELETE); 996 if (ire->ire_type == IRE_DEFAULT) { 997 /* 998 * when a default gateway is going away 999 * delete all the host redirects pointing at that 1000 * gateway. 1001 */ 1002 mutex_enter(&ire->ire_lock); 1003 gw_addr_v6 = ire->ire_gateway_addr_v6; 1004 mutex_exit(&ire->ire_lock); 1005 ire_delete_host_redirects_v6(&gw_addr_v6, ipst); 1006 } 1007 } 1008 1009 /* 1010 * ire_walk routine to delete all IRE_CACHE and IRE_HOST type redirect 1011 * entries. 1012 */ 1013 /*ARGSUSED1*/ 1014 void 1015 ire_delete_cache_v6(ire_t *ire, char *arg) 1016 { 1017 char addrstr1[INET6_ADDRSTRLEN]; 1018 char addrstr2[INET6_ADDRSTRLEN]; 1019 1020 if ((ire->ire_type & IRE_CACHE) || 1021 (ire->ire_flags & RTF_DYNAMIC)) { 1022 ip1dbg(("ire_delete_cache_v6: deleted %s type %d through %s\n", 1023 inet_ntop(AF_INET6, &ire->ire_addr_v6, 1024 addrstr1, sizeof (addrstr1)), 1025 ire->ire_type, 1026 inet_ntop(AF_INET6, &ire->ire_gateway_addr_v6, 1027 addrstr2, sizeof (addrstr2)))); 1028 ire_delete(ire); 1029 } 1030 1031 } 1032 1033 /* 1034 * ire_walk routine to delete all IRE_CACHE/IRE_HOST type redirect entries 1035 * that have a given gateway address. 1036 */ 1037 void 1038 ire_delete_cache_gw_v6(ire_t *ire, char *addr) 1039 { 1040 in6_addr_t *gw_addr = (in6_addr_t *)addr; 1041 char buf1[INET6_ADDRSTRLEN]; 1042 char buf2[INET6_ADDRSTRLEN]; 1043 in6_addr_t ire_gw_addr_v6; 1044 1045 if (!(ire->ire_type & IRE_CACHE) && 1046 !(ire->ire_flags & RTF_DYNAMIC)) 1047 return; 1048 1049 mutex_enter(&ire->ire_lock); 1050 ire_gw_addr_v6 = ire->ire_gateway_addr_v6; 1051 mutex_exit(&ire->ire_lock); 1052 1053 if (IN6_ARE_ADDR_EQUAL(&ire_gw_addr_v6, gw_addr)) { 1054 ip1dbg(("ire_delete_cache_gw_v6: deleted %s type %d to %s\n", 1055 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, 1056 buf1, sizeof (buf1)), 1057 ire->ire_type, 1058 inet_ntop(AF_INET6, &ire_gw_addr_v6, 1059 buf2, sizeof (buf2)))); 1060 ire_delete(ire); 1061 } 1062 } 1063 1064 /* 1065 * Remove all IRE_CACHE entries that match 1066 * the ire specified. (Sometimes called 1067 * as writer though not required by this function.) 1068 * 1069 * The flag argument indicates if the 1070 * flush request is due to addition 1071 * of new route (IRE_FLUSH_ADD) or deletion of old 1072 * route (IRE_FLUSH_DELETE). 1073 * 1074 * This routine takes only the IREs from the forwarding 1075 * table and flushes the corresponding entries from 1076 * the cache table. 1077 * 1078 * When flushing due to the deletion of an old route, it 1079 * just checks the cache handles (ire_phandle and ire_ihandle) and 1080 * deletes the ones that match. 1081 * 1082 * When flushing due to the creation of a new route, it checks 1083 * if a cache entry's address matches the one in the IRE and 1084 * that the cache entry's parent has a less specific mask than the 1085 * one in IRE. The destination of such a cache entry could be the 1086 * gateway for other cache entries, so we need to flush those as 1087 * well by looking for gateway addresses matching the IRE's address. 1088 */ 1089 void 1090 ire_flush_cache_v6(ire_t *ire, int flag) 1091 { 1092 int i; 1093 ire_t *cire; 1094 irb_t *irb; 1095 ip_stack_t *ipst = ire->ire_ipst; 1096 1097 if (ire->ire_type & IRE_CACHE) 1098 return; 1099 1100 /* 1101 * If a default is just created, there is no point 1102 * in going through the cache, as there will not be any 1103 * cached ires. 1104 */ 1105 if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD) 1106 return; 1107 if (flag == IRE_FLUSH_ADD) { 1108 /* 1109 * This selective flush is 1110 * due to the addition of 1111 * new IRE. 1112 */ 1113 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 1114 irb = &ipst->ips_ip_cache_table_v6[i]; 1115 if ((cire = irb->irb_ire) == NULL) 1116 continue; 1117 IRB_REFHOLD(irb); 1118 for (cire = irb->irb_ire; cire != NULL; 1119 cire = cire->ire_next) { 1120 if (cire->ire_type != IRE_CACHE) 1121 continue; 1122 /* 1123 * If 'cire' belongs to the same subnet 1124 * as the new ire being added, and 'cire' 1125 * is derived from a prefix that is less 1126 * specific than the new ire being added, 1127 * we need to flush 'cire'; for instance, 1128 * when a new interface comes up. 1129 */ 1130 if ((V6_MASK_EQ_2(cire->ire_addr_v6, 1131 ire->ire_mask_v6, ire->ire_addr_v6) && 1132 (ip_mask_to_plen_v6(&cire->ire_cmask_v6) <= 1133 ire->ire_masklen))) { 1134 ire_delete(cire); 1135 continue; 1136 } 1137 /* 1138 * This is the case when the ire_gateway_addr 1139 * of 'cire' belongs to the same subnet as 1140 * the new ire being added. 1141 * Flushing such ires is sometimes required to 1142 * avoid misrouting: say we have a machine with 1143 * two interfaces (I1 and I2), a default router 1144 * R on the I1 subnet, and a host route to an 1145 * off-link destination D with a gateway G on 1146 * the I2 subnet. 1147 * Under normal operation, we will have an 1148 * on-link cache entry for G and an off-link 1149 * cache entry for D with G as ire_gateway_addr, 1150 * traffic to D will reach its destination 1151 * through gateway G. 1152 * If the administrator does 'ifconfig I2 down', 1153 * the cache entries for D and G will be 1154 * flushed. However, G will now be resolved as 1155 * an off-link destination using R (the default 1156 * router) as gateway. Then D will also be 1157 * resolved as an off-link destination using G 1158 * as gateway - this behavior is due to 1159 * compatibility reasons, see comment in 1160 * ire_ihandle_lookup_offlink(). Traffic to D 1161 * will go to the router R and probably won't 1162 * reach the destination. 1163 * The administrator then does 'ifconfig I2 up'. 1164 * Since G is on the I2 subnet, this routine 1165 * will flush its cache entry. It must also 1166 * flush the cache entry for D, otherwise 1167 * traffic will stay misrouted until the IRE 1168 * times out. 1169 */ 1170 if (V6_MASK_EQ_2(cire->ire_gateway_addr_v6, 1171 ire->ire_mask_v6, ire->ire_addr_v6)) { 1172 ire_delete(cire); 1173 continue; 1174 } 1175 } 1176 IRB_REFRELE(irb); 1177 } 1178 } else { 1179 /* 1180 * delete the cache entries based on 1181 * handle in the IRE as this IRE is 1182 * being deleted/changed. 1183 */ 1184 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 1185 irb = &ipst->ips_ip_cache_table_v6[i]; 1186 if ((cire = irb->irb_ire) == NULL) 1187 continue; 1188 IRB_REFHOLD(irb); 1189 for (cire = irb->irb_ire; cire != NULL; 1190 cire = cire->ire_next) { 1191 if (cire->ire_type != IRE_CACHE) 1192 continue; 1193 if ((cire->ire_phandle == 0 || 1194 cire->ire_phandle != ire->ire_phandle) && 1195 (cire->ire_ihandle == 0 || 1196 cire->ire_ihandle != ire->ire_ihandle)) 1197 continue; 1198 ire_delete(cire); 1199 } 1200 IRB_REFRELE(irb); 1201 } 1202 } 1203 } 1204 1205 /* 1206 * Matches the arguments passed with the values in the ire. 1207 * 1208 * Note: for match types that match using "ipif" passed in, ipif 1209 * must be checked for non-NULL before calling this routine. 1210 */ 1211 static boolean_t 1212 ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask, 1213 const in6_addr_t *gateway, int type, const ipif_t *ipif, zoneid_t zoneid, 1214 uint32_t ihandle, const ts_label_t *tsl, int match_flags) 1215 { 1216 in6_addr_t masked_addr; 1217 in6_addr_t gw_addr_v6; 1218 ill_t *ire_ill = NULL, *dst_ill; 1219 ill_t *ipif_ill = NULL; 1220 ill_group_t *ire_ill_group = NULL; 1221 ill_group_t *ipif_ill_group = NULL; 1222 ipif_t *src_ipif; 1223 1224 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1225 ASSERT(addr != NULL); 1226 ASSERT(mask != NULL); 1227 ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL); 1228 ASSERT((!(match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP))) || 1229 (ipif != NULL && ipif->ipif_isv6)); 1230 1231 /* 1232 * HIDDEN cache entries have to be looked up specifically with 1233 * MATCH_IRE_MARK_HIDDEN. MATCH_IRE_MARK_HIDDEN is usually set 1234 * when the interface is FAILED or INACTIVE. In that case, 1235 * any IRE_CACHES that exists should be marked with 1236 * IRE_MARK_HIDDEN. So, we don't really need to match below 1237 * for IRE_MARK_HIDDEN. But we do so for consistency. 1238 */ 1239 if (!(match_flags & MATCH_IRE_MARK_HIDDEN) && 1240 (ire->ire_marks & IRE_MARK_HIDDEN)) 1241 return (B_FALSE); 1242 1243 if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid && 1244 ire->ire_zoneid != ALL_ZONES) { 1245 /* 1246 * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is 1247 * valid and does not match that of ire_zoneid, a failure to 1248 * match is reported at this point. Otherwise, since some IREs 1249 * that are available in the global zone can be used in local 1250 * zones, additional checks need to be performed: 1251 * 1252 * IRE_CACHE and IRE_LOOPBACK entries should 1253 * never be matched in this situation. 1254 * 1255 * IRE entries that have an interface associated with them 1256 * should in general not match unless they are an IRE_LOCAL 1257 * or in the case when MATCH_IRE_DEFAULT has been set in 1258 * the caller. In the case of the former, checking of the 1259 * other fields supplied should take place. 1260 * 1261 * In the case where MATCH_IRE_DEFAULT has been set, 1262 * all of the ipif's associated with the IRE's ill are 1263 * checked to see if there is a matching zoneid. If any 1264 * one ipif has a matching zoneid, this IRE is a 1265 * potential candidate so checking of the other fields 1266 * takes place. 1267 * 1268 * In the case where the IRE_INTERFACE has a usable source 1269 * address (indicated by ill_usesrc_ifindex) in the 1270 * correct zone then it's permitted to return this IRE 1271 */ 1272 if (match_flags & MATCH_IRE_ZONEONLY) 1273 return (B_FALSE); 1274 if (ire->ire_type & (IRE_CACHE | IRE_LOOPBACK)) 1275 return (B_FALSE); 1276 /* 1277 * Note, IRE_INTERFACE can have the stq as NULL. For 1278 * example, if the default multicast route is tied to 1279 * the loopback address. 1280 */ 1281 if ((ire->ire_type & IRE_INTERFACE) && 1282 (ire->ire_stq != NULL)) { 1283 dst_ill = (ill_t *)ire->ire_stq->q_ptr; 1284 /* 1285 * If there is a usable source address in the 1286 * zone, then it's ok to return an 1287 * IRE_INTERFACE 1288 */ 1289 if ((dst_ill->ill_usesrc_ifindex != 0) && 1290 (src_ipif = ipif_select_source_v6(dst_ill, addr, 1291 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid)) 1292 != NULL) { 1293 ip3dbg(("ire_match_args: src_ipif %p" 1294 " dst_ill %p", (void *)src_ipif, 1295 (void *)dst_ill)); 1296 ipif_refrele(src_ipif); 1297 } else { 1298 ip3dbg(("ire_match_args: src_ipif NULL" 1299 " dst_ill %p\n", (void *)dst_ill)); 1300 return (B_FALSE); 1301 } 1302 } 1303 if (ire->ire_ipif != NULL && ire->ire_type != IRE_LOCAL && 1304 !(ire->ire_type & IRE_INTERFACE)) { 1305 ipif_t *tipif; 1306 1307 if ((match_flags & MATCH_IRE_DEFAULT) == 0) 1308 return (B_FALSE); 1309 mutex_enter(&ire->ire_ipif->ipif_ill->ill_lock); 1310 for (tipif = ire->ire_ipif->ipif_ill->ill_ipif; 1311 tipif != NULL; tipif = tipif->ipif_next) { 1312 if (IPIF_CAN_LOOKUP(tipif) && 1313 (tipif->ipif_flags & IPIF_UP) && 1314 (tipif->ipif_zoneid == zoneid || 1315 tipif->ipif_zoneid == ALL_ZONES)) 1316 break; 1317 } 1318 mutex_exit(&ire->ire_ipif->ipif_ill->ill_lock); 1319 if (tipif == NULL) 1320 return (B_FALSE); 1321 } 1322 } 1323 1324 if (match_flags & MATCH_IRE_GW) { 1325 mutex_enter(&ire->ire_lock); 1326 gw_addr_v6 = ire->ire_gateway_addr_v6; 1327 mutex_exit(&ire->ire_lock); 1328 } 1329 /* 1330 * For IRE_CACHES, MATCH_IRE_ILL/ILL_GROUP really means that 1331 * somebody wants to send out on a particular interface which 1332 * is given by ire_stq and hence use ire_stq to derive the ill 1333 * value. ire_ipif for IRE_CACHES is just the 1334 * means of getting a source address i.e ire_src_addr_v6 = 1335 * ire->ire_ipif->ipif_src_addr_v6. 1336 */ 1337 if (match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP)) { 1338 ire_ill = ire_to_ill(ire); 1339 if (ire_ill != NULL) 1340 ire_ill_group = ire_ill->ill_group; 1341 ipif_ill = ipif->ipif_ill; 1342 ipif_ill_group = ipif_ill->ill_group; 1343 } 1344 1345 /* No ire_addr_v6 bits set past the mask */ 1346 ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6, 1347 ire->ire_addr_v6)); 1348 V6_MASK_COPY(*addr, *mask, masked_addr); 1349 1350 if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) && 1351 ((!(match_flags & MATCH_IRE_GW)) || 1352 IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) && 1353 ((!(match_flags & MATCH_IRE_TYPE)) || 1354 (ire->ire_type & type)) && 1355 ((!(match_flags & MATCH_IRE_SRC)) || 1356 IN6_ARE_ADDR_EQUAL(&ire->ire_src_addr_v6, 1357 &ipif->ipif_v6src_addr)) && 1358 ((!(match_flags & MATCH_IRE_IPIF)) || 1359 (ire->ire_ipif == ipif)) && 1360 ((!(match_flags & MATCH_IRE_MARK_HIDDEN)) || 1361 (ire->ire_type != IRE_CACHE || 1362 ire->ire_marks & IRE_MARK_HIDDEN)) && 1363 ((!(match_flags & MATCH_IRE_ILL)) || 1364 (ire_ill == ipif_ill)) && 1365 ((!(match_flags & MATCH_IRE_IHANDLE)) || 1366 (ire->ire_ihandle == ihandle)) && 1367 ((!(match_flags & MATCH_IRE_ILL_GROUP)) || 1368 (ire_ill == ipif_ill) || 1369 (ire_ill_group != NULL && 1370 ire_ill_group == ipif_ill_group)) && 1371 ((!(match_flags & MATCH_IRE_SECATTR)) || 1372 (!is_system_labeled()) || 1373 (tsol_ire_match_gwattr(ire, tsl) == 0))) { 1374 /* We found the matched IRE */ 1375 return (B_TRUE); 1376 } 1377 return (B_FALSE); 1378 } 1379 1380 /* 1381 * Lookup for a route in all the tables 1382 */ 1383 ire_t * 1384 ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1385 const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 1386 zoneid_t zoneid, const ts_label_t *tsl, int flags, ip_stack_t *ipst) 1387 { 1388 ire_t *ire = NULL; 1389 1390 /* 1391 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1392 * MATCH_IRE_ILL is set. 1393 */ 1394 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1395 (ipif == NULL)) 1396 return (NULL); 1397 1398 /* 1399 * might be asking for a cache lookup, 1400 * This is not best way to lookup cache, 1401 * user should call ire_cache_lookup directly. 1402 * 1403 * If MATCH_IRE_TYPE was set, first lookup in the cache table and then 1404 * in the forwarding table, if the applicable type flags were set. 1405 */ 1406 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) { 1407 ire = ire_ctable_lookup_v6(addr, gateway, type, ipif, zoneid, 1408 tsl, flags, ipst); 1409 if (ire != NULL) 1410 return (ire); 1411 } 1412 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) { 1413 ire = ire_ftable_lookup_v6(addr, mask, gateway, type, ipif, 1414 pire, zoneid, 0, tsl, flags, ipst); 1415 } 1416 return (ire); 1417 } 1418 1419 /* 1420 * Lookup a route in forwarding table. 1421 * specific lookup is indicated by passing the 1422 * required parameters and indicating the 1423 * match required in flag field. 1424 * 1425 * Looking for default route can be done in three ways 1426 * 1) pass mask as ipv6_all_zeros and set MATCH_IRE_MASK in flags field 1427 * along with other matches. 1428 * 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags 1429 * field along with other matches. 1430 * 3) if the destination and mask are passed as zeros. 1431 * 1432 * A request to return a default route if no route 1433 * is found, can be specified by setting MATCH_IRE_DEFAULT 1434 * in flags. 1435 * 1436 * It does not support recursion more than one level. It 1437 * will do recursive lookup only when the lookup maps to 1438 * a prefix or default route and MATCH_IRE_RECURSIVE flag is passed. 1439 * 1440 * If the routing table is setup to allow more than one level 1441 * of recursion, the cleaning up cache table will not work resulting 1442 * in invalid routing. 1443 * 1444 * Supports link-local addresses by following the ipif/ill when recursing. 1445 * 1446 * NOTE : When this function returns NULL, pire has already been released. 1447 * pire is valid only when this function successfully returns an 1448 * ire. 1449 */ 1450 ire_t * 1451 ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1452 const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 1453 zoneid_t zoneid, uint32_t ihandle, const ts_label_t *tsl, int flags, 1454 ip_stack_t *ipst) 1455 { 1456 irb_t *irb_ptr; 1457 ire_t *rire; 1458 ire_t *ire = NULL; 1459 ire_t *saved_ire; 1460 nce_t *nce; 1461 int i; 1462 in6_addr_t gw_addr_v6; 1463 1464 ASSERT(addr != NULL); 1465 ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL); 1466 ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 1467 ASSERT(ipif == NULL || ipif->ipif_isv6); 1468 1469 /* 1470 * When we return NULL from this function, we should make 1471 * sure that *pire is NULL so that the callers will not 1472 * wrongly REFRELE the pire. 1473 */ 1474 if (pire != NULL) 1475 *pire = NULL; 1476 /* 1477 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1478 * MATCH_IRE_ILL is set. 1479 */ 1480 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1481 (ipif == NULL)) 1482 return (NULL); 1483 1484 /* 1485 * If the mask is known, the lookup 1486 * is simple, if the mask is not known 1487 * we need to search. 1488 */ 1489 if (flags & MATCH_IRE_MASK) { 1490 uint_t masklen; 1491 1492 masklen = ip_mask_to_plen_v6(mask); 1493 if (ipst->ips_ip_forwarding_table_v6[masklen] == NULL) 1494 return (NULL); 1495 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[masklen][ 1496 IRE_ADDR_MASK_HASH_V6(*addr, *mask, 1497 ipst->ips_ip6_ftable_hash_size)]); 1498 rw_enter(&irb_ptr->irb_lock, RW_READER); 1499 for (ire = irb_ptr->irb_ire; ire != NULL; 1500 ire = ire->ire_next) { 1501 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1502 continue; 1503 if (ire_match_args_v6(ire, addr, mask, gateway, type, 1504 ipif, zoneid, ihandle, tsl, flags)) 1505 goto found_ire; 1506 } 1507 rw_exit(&irb_ptr->irb_lock); 1508 } else { 1509 /* 1510 * In this case we don't know the mask, we need to 1511 * search the table assuming different mask sizes. 1512 * we start with 128 bit mask, we don't allow default here. 1513 */ 1514 for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) { 1515 in6_addr_t tmpmask; 1516 1517 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 1518 continue; 1519 (void) ip_plen_to_mask_v6(i, &tmpmask); 1520 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][ 1521 IRE_ADDR_MASK_HASH_V6(*addr, tmpmask, 1522 ipst->ips_ip6_ftable_hash_size)]; 1523 rw_enter(&irb_ptr->irb_lock, RW_READER); 1524 for (ire = irb_ptr->irb_ire; ire != NULL; 1525 ire = ire->ire_next) { 1526 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1527 continue; 1528 if (ire_match_args_v6(ire, addr, 1529 &ire->ire_mask_v6, gateway, type, ipif, 1530 zoneid, ihandle, tsl, flags)) 1531 goto found_ire; 1532 } 1533 rw_exit(&irb_ptr->irb_lock); 1534 } 1535 } 1536 1537 /* 1538 * We come here if no route has yet been found. 1539 * 1540 * Handle the case where default route is 1541 * requested by specifying type as one of the possible 1542 * types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE). 1543 * 1544 * If MATCH_IRE_MASK is specified, then the appropriate default route 1545 * would have been found above if it exists so it isn't looked up here. 1546 * If MATCH_IRE_DEFAULT was also specified, then a default route will be 1547 * searched for later. 1548 */ 1549 if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE && 1550 (type & (IRE_DEFAULT | IRE_INTERFACE))) { 1551 if (ipst->ips_ip_forwarding_table_v6[0] != NULL) { 1552 /* addr & mask is zero for defaults */ 1553 irb_ptr = &ipst->ips_ip_forwarding_table_v6[0][ 1554 IRE_ADDR_HASH_V6(ipv6_all_zeros, 1555 ipst->ips_ip6_ftable_hash_size)]; 1556 rw_enter(&irb_ptr->irb_lock, RW_READER); 1557 for (ire = irb_ptr->irb_ire; ire != NULL; 1558 ire = ire->ire_next) { 1559 1560 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1561 continue; 1562 1563 if (ire_match_args_v6(ire, addr, 1564 &ipv6_all_zeros, gateway, type, ipif, 1565 zoneid, ihandle, tsl, flags)) 1566 goto found_ire; 1567 } 1568 rw_exit(&irb_ptr->irb_lock); 1569 } 1570 } 1571 /* 1572 * We come here only if no route is found. 1573 * see if the default route can be used which is allowed 1574 * only if the default matching criteria is specified. 1575 * The ipv6_ire_default_count tracks the number of IRE_DEFAULT 1576 * entries. However, the ip_forwarding_table_v6[0] also contains 1577 * interface routes thus the count can be zero. 1578 */ 1579 saved_ire = NULL; 1580 if ((flags & (MATCH_IRE_DEFAULT | MATCH_IRE_MASK)) == 1581 MATCH_IRE_DEFAULT) { 1582 ire_t *ire_origin; 1583 uint_t g_index; 1584 uint_t index; 1585 1586 if (ipst->ips_ip_forwarding_table_v6[0] == NULL) 1587 return (NULL); 1588 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[0])[0]; 1589 1590 /* 1591 * Keep a tab on the bucket while looking the IRE_DEFAULT 1592 * entries. We need to keep track of a particular IRE 1593 * (ire_origin) so this ensures that it will not be unlinked 1594 * from the hash list during the recursive lookup below. 1595 */ 1596 IRB_REFHOLD(irb_ptr); 1597 ire = irb_ptr->irb_ire; 1598 if (ire == NULL) { 1599 IRB_REFRELE(irb_ptr); 1600 return (NULL); 1601 } 1602 1603 /* 1604 * Get the index first, since it can be changed by other 1605 * threads. Then get to the right default route skipping 1606 * default interface routes if any. As we hold a reference on 1607 * the IRE bucket, ipv6_ire_default_count can only increase so 1608 * we can't reach the end of the hash list unexpectedly. 1609 */ 1610 if (ipst->ips_ipv6_ire_default_count != 0) { 1611 g_index = ipst->ips_ipv6_ire_default_index++; 1612 index = g_index % ipst->ips_ipv6_ire_default_count; 1613 while (index != 0) { 1614 if (!(ire->ire_type & IRE_INTERFACE)) 1615 index--; 1616 ire = ire->ire_next; 1617 } 1618 ASSERT(ire != NULL); 1619 } else { 1620 /* 1621 * No default route, so we only have default interface 1622 * routes: don't enter the first loop. 1623 */ 1624 ire = NULL; 1625 } 1626 1627 /* 1628 * Round-robin the default routers list looking for a neighbor 1629 * that matches the passed in parameters and is reachable. If 1630 * none found, just return a route from the default router list 1631 * if it exists. If we can't find a default route (IRE_DEFAULT), 1632 * look for interface default routes. 1633 * We start with the ire we found above and we walk the hash 1634 * list until we're back where we started, see 1635 * ire_get_next_default_ire(). It doesn't matter if default 1636 * routes are added or deleted by other threads - we know this 1637 * ire will stay in the list because we hold a reference on the 1638 * ire bucket. 1639 * NB: if we only have interface default routes, ire is NULL so 1640 * we don't even enter this loop (see above). 1641 */ 1642 ire_origin = ire; 1643 for (; ire != NULL; 1644 ire = ire_get_next_default_ire(ire, ire_origin)) { 1645 1646 if (ire_match_args_v6(ire, addr, 1647 &ipv6_all_zeros, gateway, type, ipif, 1648 zoneid, ihandle, tsl, flags)) { 1649 int match_flags; 1650 1651 /* 1652 * We have something to work with. 1653 * If we can find a resolved/reachable 1654 * entry, we will use this. Otherwise 1655 * we'll try to find an entry that has 1656 * a resolved cache entry. We will fallback 1657 * on this if we don't find anything else. 1658 */ 1659 if (saved_ire == NULL) 1660 saved_ire = ire; 1661 mutex_enter(&ire->ire_lock); 1662 gw_addr_v6 = ire->ire_gateway_addr_v6; 1663 mutex_exit(&ire->ire_lock); 1664 match_flags = MATCH_IRE_ILL_GROUP | 1665 MATCH_IRE_SECATTR; 1666 rire = ire_ctable_lookup_v6(&gw_addr_v6, NULL, 1667 0, ire->ire_ipif, zoneid, tsl, match_flags, 1668 ipst); 1669 if (rire != NULL) { 1670 nce = rire->ire_nce; 1671 if (nce != NULL && 1672 NCE_ISREACHABLE(nce) && 1673 nce->nce_flags & NCE_F_ISROUTER) { 1674 ire_refrele(rire); 1675 IRE_REFHOLD(ire); 1676 IRB_REFRELE(irb_ptr); 1677 goto found_ire_held; 1678 } else if (nce != NULL && 1679 !(nce->nce_flags & 1680 NCE_F_ISROUTER)) { 1681 /* 1682 * Make sure we don't use 1683 * this ire 1684 */ 1685 if (saved_ire == ire) 1686 saved_ire = NULL; 1687 } 1688 ire_refrele(rire); 1689 } else if (ipst-> 1690 ips_ipv6_ire_default_count > 1 && 1691 zoneid != GLOBAL_ZONEID) { 1692 /* 1693 * When we're in a local zone, we're 1694 * only interested in default routers 1695 * that are reachable through ipifs 1696 * within our zone. 1697 * The potentially expensive call to 1698 * ire_route_lookup_v6() is avoided when 1699 * we have only one default route. 1700 */ 1701 int ire_match_flags = MATCH_IRE_TYPE | 1702 MATCH_IRE_SECATTR; 1703 1704 if (ire->ire_ipif != NULL) { 1705 ire_match_flags |= 1706 MATCH_IRE_ILL_GROUP; 1707 } 1708 rire = ire_route_lookup_v6(&gw_addr_v6, 1709 NULL, NULL, IRE_INTERFACE, 1710 ire->ire_ipif, NULL, 1711 zoneid, tsl, ire_match_flags, ipst); 1712 if (rire != NULL) { 1713 ire_refrele(rire); 1714 saved_ire = ire; 1715 } else if (saved_ire == ire) { 1716 /* 1717 * Make sure we don't use 1718 * this ire 1719 */ 1720 saved_ire = NULL; 1721 } 1722 } 1723 } 1724 } 1725 if (saved_ire != NULL) { 1726 ire = saved_ire; 1727 IRE_REFHOLD(ire); 1728 IRB_REFRELE(irb_ptr); 1729 goto found_ire_held; 1730 } else { 1731 /* 1732 * Look for a interface default route matching the 1733 * args passed in. No round robin here. Just pick 1734 * the right one. 1735 */ 1736 for (ire = irb_ptr->irb_ire; ire != NULL; 1737 ire = ire->ire_next) { 1738 1739 if (!(ire->ire_type & IRE_INTERFACE)) 1740 continue; 1741 1742 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1743 continue; 1744 1745 if (ire_match_args_v6(ire, addr, 1746 &ipv6_all_zeros, gateway, type, ipif, 1747 zoneid, ihandle, tsl, flags)) { 1748 IRE_REFHOLD(ire); 1749 IRB_REFRELE(irb_ptr); 1750 goto found_ire_held; 1751 } 1752 } 1753 IRB_REFRELE(irb_ptr); 1754 } 1755 } 1756 ASSERT(ire == NULL); 1757 ip1dbg(("ire_ftable_lookup_v6: returning NULL ire")); 1758 return (NULL); 1759 found_ire: 1760 ASSERT((ire->ire_marks & IRE_MARK_CONDEMNED) == 0); 1761 IRE_REFHOLD(ire); 1762 rw_exit(&irb_ptr->irb_lock); 1763 1764 found_ire_held: 1765 if ((flags & MATCH_IRE_RJ_BHOLE) && 1766 (ire->ire_flags & (RTF_BLACKHOLE | RTF_REJECT))) { 1767 return (ire); 1768 } 1769 /* 1770 * At this point, IRE that was found must be an IRE_FORWARDTABLE 1771 * or IRE_CACHETABLE type. If this is a recursive lookup and an 1772 * IRE_INTERFACE type was found, return that. If it was some other 1773 * IRE_FORWARDTABLE type of IRE (one of the prefix types), then it 1774 * is necessary to fill in the parent IRE pointed to by pire, and 1775 * then lookup the gateway address of the parent. For backwards 1776 * compatiblity, if this lookup returns an 1777 * IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level 1778 * of lookup is done. 1779 */ 1780 if (flags & MATCH_IRE_RECURSIVE) { 1781 const ipif_t *gw_ipif; 1782 int match_flags = MATCH_IRE_DSTONLY; 1783 1784 if (ire->ire_type & IRE_INTERFACE) 1785 return (ire); 1786 if (pire != NULL) 1787 *pire = ire; 1788 /* 1789 * If we can't find an IRE_INTERFACE or the caller has not 1790 * asked for pire, we need to REFRELE the saved_ire. 1791 */ 1792 saved_ire = ire; 1793 1794 /* 1795 * Currently MATCH_IRE_ILL is never used with 1796 * (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT) while 1797 * sending out packets as MATCH_IRE_ILL is used only 1798 * for communicating with on-link hosts. We can't assert 1799 * that here as RTM_GET calls this function with 1800 * MATCH_IRE_ILL | MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE. 1801 * We have already used the MATCH_IRE_ILL in determining 1802 * the right prefix route at this point. To match the 1803 * behavior of how we locate routes while sending out 1804 * packets, we don't want to use MATCH_IRE_ILL below 1805 * while locating the interface route. 1806 */ 1807 if (ire->ire_ipif != NULL) 1808 match_flags |= MATCH_IRE_ILL_GROUP; 1809 1810 mutex_enter(&ire->ire_lock); 1811 gw_addr_v6 = ire->ire_gateway_addr_v6; 1812 mutex_exit(&ire->ire_lock); 1813 1814 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 0, 1815 ire->ire_ipif, NULL, zoneid, tsl, match_flags, ipst); 1816 if (ire == NULL) { 1817 /* 1818 * In this case we have to deal with the 1819 * MATCH_IRE_PARENT flag, which means the 1820 * parent has to be returned if ire is NULL. 1821 * The aim of this is to have (at least) a starting 1822 * ire when we want to look at all of the ires in a 1823 * bucket aimed at a single destination (as is the 1824 * case in ip_newroute_v6 for the RTF_MULTIRT 1825 * flagged routes). 1826 */ 1827 if (flags & MATCH_IRE_PARENT) { 1828 if (pire != NULL) { 1829 /* 1830 * Need an extra REFHOLD, if the 1831 * parent ire is returned via both 1832 * ire and pire. 1833 */ 1834 IRE_REFHOLD(saved_ire); 1835 } 1836 ire = saved_ire; 1837 } else { 1838 ire_refrele(saved_ire); 1839 if (pire != NULL) 1840 *pire = NULL; 1841 } 1842 return (ire); 1843 } 1844 if (ire->ire_type & (IRE_CACHETABLE | IRE_INTERFACE)) { 1845 /* 1846 * If the caller did not ask for pire, release 1847 * it now. 1848 */ 1849 if (pire == NULL) { 1850 ire_refrele(saved_ire); 1851 } 1852 return (ire); 1853 } 1854 match_flags |= MATCH_IRE_TYPE; 1855 mutex_enter(&ire->ire_lock); 1856 gw_addr_v6 = ire->ire_gateway_addr_v6; 1857 mutex_exit(&ire->ire_lock); 1858 gw_ipif = ire->ire_ipif; 1859 ire_refrele(ire); 1860 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 1861 (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid, 1862 NULL, match_flags, ipst); 1863 if (ire == NULL) { 1864 /* 1865 * In this case we have to deal with the 1866 * MATCH_IRE_PARENT flag, which means the 1867 * parent has to be returned if ire is NULL. 1868 * The aim of this is to have (at least) a starting 1869 * ire when we want to look at all of the ires in a 1870 * bucket aimed at a single destination (as is the 1871 * case in ip_newroute_v6 for the RTF_MULTIRT 1872 * flagged routes). 1873 */ 1874 if (flags & MATCH_IRE_PARENT) { 1875 if (pire != NULL) { 1876 /* 1877 * Need an extra REFHOLD, if the 1878 * parent ire is returned via both 1879 * ire and pire. 1880 */ 1881 IRE_REFHOLD(saved_ire); 1882 } 1883 ire = saved_ire; 1884 } else { 1885 ire_refrele(saved_ire); 1886 if (pire != NULL) 1887 *pire = NULL; 1888 } 1889 return (ire); 1890 } else if (pire == NULL) { 1891 /* 1892 * If the caller did not ask for pire, release 1893 * it now. 1894 */ 1895 ire_refrele(saved_ire); 1896 } 1897 return (ire); 1898 } 1899 1900 ASSERT(pire == NULL || *pire == NULL); 1901 return (ire); 1902 } 1903 1904 /* 1905 * Delete the IRE cache for the gateway and all IRE caches whose 1906 * ire_gateway_addr_v6 points to this gateway, and allow them to 1907 * be created on demand by ip_newroute_v6. 1908 */ 1909 void 1910 ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid, 1911 ip_stack_t *ipst) 1912 { 1913 irb_t *irb; 1914 ire_t *ire; 1915 1916 irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 1917 ipst->ips_ip6_cache_table_size)]; 1918 IRB_REFHOLD(irb); 1919 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 1920 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1921 continue; 1922 1923 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 1924 if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, 0, 1925 IRE_CACHE, NULL, zoneid, 0, NULL, MATCH_IRE_TYPE)) { 1926 ire_delete(ire); 1927 } 1928 } 1929 IRB_REFRELE(irb); 1930 1931 ire_walk_v6(ire_delete_cache_gw_v6, (char *)addr, zoneid, ipst); 1932 } 1933 1934 /* 1935 * Looks up cache table for a route. 1936 * specific lookup can be indicated by 1937 * passing the MATCH_* flags and the 1938 * necessary parameters. 1939 */ 1940 ire_t * 1941 ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway, 1942 int type, const ipif_t *ipif, zoneid_t zoneid, const ts_label_t *tsl, 1943 int flags, ip_stack_t *ipst) 1944 { 1945 ire_ctable_args_t margs; 1946 1947 margs.ict_addr = (void *)addr; 1948 margs.ict_gateway = (void *)gateway; 1949 margs.ict_type = type; 1950 margs.ict_ipif = ipif; 1951 margs.ict_zoneid = zoneid; 1952 margs.ict_tsl = tsl; 1953 margs.ict_flags = flags; 1954 margs.ict_ipst = ipst; 1955 margs.ict_wq = NULL; 1956 1957 return (ip6_ctable_lookup_impl(&margs)); 1958 } 1959 1960 /* 1961 * Lookup cache. Don't return IRE_MARK_HIDDEN entries. Callers 1962 * should use ire_ctable_lookup with MATCH_IRE_MARK_HIDDEN to get 1963 * to the hidden ones. 1964 * 1965 * In general the zoneid has to match (where ALL_ZONES match all of them). 1966 * But for IRE_LOCAL we also need to handle the case where L2 should 1967 * conceptually loop back the packet. This is necessary since neither 1968 * Ethernet drivers nor Ethernet hardware loops back packets sent to their 1969 * own MAC address. This loopback is needed when the normal 1970 * routes (ignoring IREs with different zoneids) would send out the packet on 1971 * the same ill (or ill group) as the ill with which this IRE_LOCAL is 1972 * associated. 1973 * 1974 * Earlier versions of this code always matched an IRE_LOCAL independently of 1975 * the zoneid. We preserve that earlier behavior when 1976 * ip_restrict_interzone_loopback is turned off. 1977 */ 1978 ire_t * 1979 ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid, 1980 const ts_label_t *tsl, ip_stack_t *ipst) 1981 { 1982 irb_t *irb_ptr; 1983 ire_t *ire; 1984 1985 irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 1986 ipst->ips_ip6_cache_table_size)]; 1987 rw_enter(&irb_ptr->irb_lock, RW_READER); 1988 for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 1989 if (ire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 1990 continue; 1991 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, addr)) { 1992 /* 1993 * Finally, check if the security policy has any 1994 * restriction on using this route for the specified 1995 * message. 1996 */ 1997 if (tsl != NULL && 1998 ire->ire_gw_secattr != NULL && 1999 tsol_ire_match_gwattr(ire, tsl) != 0) { 2000 continue; 2001 } 2002 2003 if (zoneid == ALL_ZONES || ire->ire_zoneid == zoneid || 2004 ire->ire_zoneid == ALL_ZONES) { 2005 IRE_REFHOLD(ire); 2006 rw_exit(&irb_ptr->irb_lock); 2007 return (ire); 2008 } 2009 2010 if (ire->ire_type == IRE_LOCAL) { 2011 if (ipst->ips_ip_restrict_interzone_loopback && 2012 !ire_local_ok_across_zones(ire, zoneid, 2013 (void *)addr, tsl, ipst)) 2014 continue; 2015 2016 IRE_REFHOLD(ire); 2017 rw_exit(&irb_ptr->irb_lock); 2018 return (ire); 2019 } 2020 } 2021 } 2022 rw_exit(&irb_ptr->irb_lock); 2023 return (NULL); 2024 } 2025 2026 /* 2027 * Locate the interface ire that is tied to the cache ire 'cire' via 2028 * cire->ire_ihandle. 2029 * 2030 * We are trying to create the cache ire for an onlink destn. or 2031 * gateway in 'cire'. We are called from ire_add_v6() in the IRE_IF_RESOLVER 2032 * case for xresolv interfaces, after the ire has come back from 2033 * an external resolver. 2034 */ 2035 static ire_t * 2036 ire_ihandle_lookup_onlink_v6(ire_t *cire) 2037 { 2038 ire_t *ire; 2039 int match_flags; 2040 int i; 2041 int j; 2042 irb_t *irb_ptr; 2043 ip_stack_t *ipst = cire->ire_ipst; 2044 2045 ASSERT(cire != NULL); 2046 2047 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2048 /* 2049 * We know that the mask of the interface ire equals cire->ire_cmask. 2050 * (When ip_newroute_v6() created 'cire' for an on-link destn. 2051 * it set its cmask from the interface ire's mask) 2052 */ 2053 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 2054 NULL, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle, 2055 NULL, match_flags, ipst); 2056 if (ire != NULL) 2057 return (ire); 2058 /* 2059 * If we didn't find an interface ire above, we can't declare failure. 2060 * For backwards compatibility, we need to support prefix routes 2061 * pointing to next hop gateways that are not on-link. 2062 * 2063 * In the resolver/noresolver case, ip_newroute_v6() thinks 2064 * it is creating the cache ire for an onlink destination in 'cire'. 2065 * But 'cire' is not actually onlink, because ire_ftable_lookup_v6() 2066 * cheated it, by doing ire_route_lookup_v6() twice and returning an 2067 * interface ire. 2068 * 2069 * Eg. default - gw1 (line 1) 2070 * gw1 - gw2 (line 2) 2071 * gw2 - hme0 (line 3) 2072 * 2073 * In the above example, ip_newroute_v6() tried to create the cache ire 2074 * 'cire' for gw1, based on the interface route in line 3. The 2075 * ire_ftable_lookup_v6() above fails, because there is 2076 * no interface route to reach gw1. (it is gw2). We fall thru below. 2077 * 2078 * Do a brute force search based on the ihandle in a subset of the 2079 * forwarding tables, corresponding to cire->ire_cmask_v6. Otherwise 2080 * things become very complex, since we don't have 'pire' in this 2081 * case. (Also note that this method is not possible in the offlink 2082 * case because we don't know the mask) 2083 */ 2084 i = ip_mask_to_plen_v6(&cire->ire_cmask_v6); 2085 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 2086 return (NULL); 2087 for (j = 0; j < ipst->ips_ip6_ftable_hash_size; j++) { 2088 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][j]; 2089 rw_enter(&irb_ptr->irb_lock, RW_READER); 2090 for (ire = irb_ptr->irb_ire; ire != NULL; 2091 ire = ire->ire_next) { 2092 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2093 continue; 2094 if ((ire->ire_type & IRE_INTERFACE) && 2095 (ire->ire_ihandle == cire->ire_ihandle)) { 2096 IRE_REFHOLD(ire); 2097 rw_exit(&irb_ptr->irb_lock); 2098 return (ire); 2099 } 2100 } 2101 rw_exit(&irb_ptr->irb_lock); 2102 } 2103 return (NULL); 2104 } 2105 2106 2107 /* 2108 * Locate the interface ire that is tied to the cache ire 'cire' via 2109 * cire->ire_ihandle. 2110 * 2111 * We are trying to create the cache ire for an offlink destn based 2112 * on the cache ire of the gateway in 'cire'. 'pire' is the prefix ire 2113 * as found by ip_newroute_v6(). We are called from ip_newroute_v6() in 2114 * the IRE_CACHE case. 2115 */ 2116 ire_t * 2117 ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire) 2118 { 2119 ire_t *ire; 2120 int match_flags; 2121 in6_addr_t gw_addr; 2122 ipif_t *gw_ipif; 2123 ip_stack_t *ipst = cire->ire_ipst; 2124 2125 ASSERT(cire != NULL && pire != NULL); 2126 2127 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2128 /* 2129 * ip_newroute_v6 calls ire_ftable_lookup with MATCH_IRE_ILL only 2130 * for on-link hosts. We should never be here for onlink. 2131 * Thus, use MATCH_IRE_ILL_GROUP. 2132 */ 2133 if (pire->ire_ipif != NULL) 2134 match_flags |= MATCH_IRE_ILL_GROUP; 2135 /* 2136 * We know that the mask of the interface ire equals cire->ire_cmask. 2137 * (When ip_newroute_v6() created 'cire' for an on-link destn. it set 2138 * its cmask from the interface ire's mask) 2139 */ 2140 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 0, 2141 IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2142 NULL, match_flags, ipst); 2143 if (ire != NULL) 2144 return (ire); 2145 /* 2146 * If we didn't find an interface ire above, we can't declare failure. 2147 * For backwards compatibility, we need to support prefix routes 2148 * pointing to next hop gateways that are not on-link. 2149 * 2150 * Assume we are trying to ping some offlink destn, and we have the 2151 * routing table below. 2152 * 2153 * Eg. default - gw1 <--- pire (line 1) 2154 * gw1 - gw2 (line 2) 2155 * gw2 - hme0 (line 3) 2156 * 2157 * If we already have a cache ire for gw1 in 'cire', the 2158 * ire_ftable_lookup_v6 above would have failed, since there is no 2159 * interface ire to reach gw1. We will fallthru below. 2160 * 2161 * Here we duplicate the steps that ire_ftable_lookup_v6() did in 2162 * getting 'cire' from 'pire', in the MATCH_IRE_RECURSIVE case. 2163 * The differences are the following 2164 * i. We want the interface ire only, so we call 2165 * ire_ftable_lookup_v6() instead of ire_route_lookup_v6() 2166 * ii. We look for only prefix routes in the 1st call below. 2167 * ii. We want to match on the ihandle in the 2nd call below. 2168 */ 2169 match_flags = MATCH_IRE_TYPE; 2170 if (pire->ire_ipif != NULL) 2171 match_flags |= MATCH_IRE_ILL_GROUP; 2172 2173 mutex_enter(&pire->ire_lock); 2174 gw_addr = pire->ire_gateway_addr_v6; 2175 mutex_exit(&pire->ire_lock); 2176 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_OFFSUBNET, 2177 pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst); 2178 if (ire == NULL) 2179 return (NULL); 2180 /* 2181 * At this point 'ire' corresponds to the entry shown in line 2. 2182 * gw_addr is 'gw2' in the example above. 2183 */ 2184 mutex_enter(&ire->ire_lock); 2185 gw_addr = ire->ire_gateway_addr_v6; 2186 mutex_exit(&ire->ire_lock); 2187 gw_ipif = ire->ire_ipif; 2188 ire_refrele(ire); 2189 2190 match_flags |= MATCH_IRE_IHANDLE; 2191 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_INTERFACE, 2192 gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2193 NULL, match_flags, ipst); 2194 return (ire); 2195 } 2196 2197 /* 2198 * Return the IRE_LOOPBACK, IRE_IF_RESOLVER or IRE_IF_NORESOLVER 2199 * ire associated with the specified ipif. 2200 * 2201 * This might occasionally be called when IPIF_UP is not set since 2202 * the IPV6_MULTICAST_IF as well as creating interface routes 2203 * allows specifying a down ipif (ipif_lookup* match ipifs that are down). 2204 * 2205 * Note that if IPIF_NOLOCAL, IPIF_NOXMIT, or IPIF_DEPRECATED is set on 2206 * the ipif this routine might return NULL. 2207 * (Sometimes called as writer though not required by this function.) 2208 */ 2209 ire_t * 2210 ipif_to_ire_v6(const ipif_t *ipif) 2211 { 2212 ire_t *ire; 2213 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2214 2215 ASSERT(ipif->ipif_isv6); 2216 if (ipif->ipif_ire_type == IRE_LOOPBACK) { 2217 ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, NULL, 2218 IRE_LOOPBACK, ipif, ALL_ZONES, NULL, 2219 (MATCH_IRE_TYPE | MATCH_IRE_IPIF), ipst); 2220 } else if (ipif->ipif_flags & IPIF_POINTOPOINT) { 2221 /* In this case we need to lookup destination address. */ 2222 ire = ire_ftable_lookup_v6(&ipif->ipif_v6pp_dst_addr, 2223 &ipv6_all_ones, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 2224 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 2225 MATCH_IRE_MASK), ipst); 2226 } else { 2227 ire = ire_ftable_lookup_v6(&ipif->ipif_v6subnet, 2228 &ipif->ipif_v6net_mask, NULL, IRE_INTERFACE, ipif, NULL, 2229 ALL_ZONES, 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 2230 MATCH_IRE_MASK), ipst); 2231 } 2232 return (ire); 2233 } 2234 2235 /* 2236 * Return B_TRUE if a multirt route is resolvable 2237 * (or if no route is resolved yet), B_FALSE otherwise. 2238 * This only works in the global zone. 2239 */ 2240 boolean_t 2241 ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl, 2242 ip_stack_t *ipst) 2243 { 2244 ire_t *first_fire; 2245 ire_t *first_cire; 2246 ire_t *fire; 2247 ire_t *cire; 2248 irb_t *firb; 2249 irb_t *cirb; 2250 int unres_cnt = 0; 2251 boolean_t resolvable = B_FALSE; 2252 2253 /* Retrieve the first IRE_HOST that matches the destination */ 2254 first_fire = ire_ftable_lookup_v6(v6dstp, &ipv6_all_ones, 0, IRE_HOST, 2255 NULL, NULL, ALL_ZONES, 0, tsl, MATCH_IRE_MASK | MATCH_IRE_TYPE | 2256 MATCH_IRE_SECATTR, ipst); 2257 2258 /* No route at all */ 2259 if (first_fire == NULL) { 2260 return (B_TRUE); 2261 } 2262 2263 firb = first_fire->ire_bucket; 2264 ASSERT(firb); 2265 2266 /* Retrieve the first IRE_CACHE ire for that destination. */ 2267 first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID, tsl, ipst); 2268 2269 /* No resolved route. */ 2270 if (first_cire == NULL) { 2271 ire_refrele(first_fire); 2272 return (B_TRUE); 2273 } 2274 2275 /* At least one route is resolved. */ 2276 2277 cirb = first_cire->ire_bucket; 2278 ASSERT(cirb); 2279 2280 /* Count the number of routes to that dest that are declared. */ 2281 IRB_REFHOLD(firb); 2282 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2283 if (!(fire->ire_flags & RTF_MULTIRT)) 2284 continue; 2285 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, v6dstp)) 2286 continue; 2287 unres_cnt++; 2288 } 2289 IRB_REFRELE(firb); 2290 2291 2292 /* Then subtract the number of routes to that dst that are resolved */ 2293 IRB_REFHOLD(cirb); 2294 for (cire = first_cire; cire != NULL; cire = cire->ire_next) { 2295 if (!(cire->ire_flags & RTF_MULTIRT)) 2296 continue; 2297 if (!IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, v6dstp)) 2298 continue; 2299 if (cire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 2300 continue; 2301 unres_cnt--; 2302 } 2303 IRB_REFRELE(cirb); 2304 2305 /* At least one route is unresolved; search for a resolvable route. */ 2306 if (unres_cnt > 0) 2307 resolvable = ire_multirt_lookup_v6(&first_cire, &first_fire, 2308 MULTIRT_USESTAMP|MULTIRT_CACHEGW, tsl, ipst); 2309 2310 if (first_fire) 2311 ire_refrele(first_fire); 2312 2313 if (first_cire) 2314 ire_refrele(first_cire); 2315 2316 return (resolvable); 2317 } 2318 2319 2320 /* 2321 * Return B_TRUE and update *ire_arg and *fire_arg 2322 * if at least one resolvable route is found. 2323 * Return B_FALSE otherwise (all routes are resolved or 2324 * the remaining unresolved routes are all unresolvable). 2325 * This only works in the global zone. 2326 */ 2327 boolean_t 2328 ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, 2329 const ts_label_t *tsl, ip_stack_t *ipst) 2330 { 2331 clock_t delta; 2332 ire_t *best_fire = NULL; 2333 ire_t *best_cire = NULL; 2334 ire_t *first_fire; 2335 ire_t *first_cire; 2336 ire_t *fire; 2337 ire_t *cire; 2338 irb_t *firb = NULL; 2339 irb_t *cirb = NULL; 2340 ire_t *gw_ire; 2341 boolean_t already_resolved; 2342 boolean_t res; 2343 in6_addr_t v6dst; 2344 in6_addr_t v6gw; 2345 2346 ip2dbg(("ire_multirt_lookup_v6: *ire_arg %p, *fire_arg %p, " 2347 "flags %04x\n", (void *)*ire_arg, (void *)*fire_arg, flags)); 2348 2349 ASSERT(ire_arg); 2350 ASSERT(fire_arg); 2351 2352 /* Not an IRE_HOST ire; give up. */ 2353 if ((*fire_arg == NULL) || 2354 ((*fire_arg)->ire_type != IRE_HOST)) { 2355 return (B_FALSE); 2356 } 2357 2358 /* This is the first IRE_HOST ire for that destination. */ 2359 first_fire = *fire_arg; 2360 firb = first_fire->ire_bucket; 2361 ASSERT(firb); 2362 2363 mutex_enter(&first_fire->ire_lock); 2364 v6dst = first_fire->ire_addr_v6; 2365 mutex_exit(&first_fire->ire_lock); 2366 2367 ip2dbg(("ire_multirt_lookup_v6: dst %08x\n", 2368 ntohl(V4_PART_OF_V6(v6dst)))); 2369 2370 /* 2371 * Retrieve the first IRE_CACHE ire for that destination; 2372 * if we don't find one, no route for that dest is 2373 * resolved yet. 2374 */ 2375 first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID, tsl, ipst); 2376 if (first_cire) { 2377 cirb = first_cire->ire_bucket; 2378 } 2379 2380 ip2dbg(("ire_multirt_lookup_v6: first_cire %p\n", (void *)first_cire)); 2381 2382 /* 2383 * Search for a resolvable route, giving the top priority 2384 * to routes that can be resolved without any call to the resolver. 2385 */ 2386 IRB_REFHOLD(firb); 2387 2388 if (!IN6_IS_ADDR_MULTICAST(&v6dst)) { 2389 /* 2390 * For all multiroute IRE_HOST ires for that destination, 2391 * check if the route via the IRE_HOST's gateway is 2392 * resolved yet. 2393 */ 2394 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2395 2396 if (!(fire->ire_flags & RTF_MULTIRT)) 2397 continue; 2398 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2399 continue; 2400 2401 if (fire->ire_gw_secattr != NULL && 2402 tsol_ire_match_gwattr(fire, tsl) != 0) { 2403 continue; 2404 } 2405 2406 mutex_enter(&fire->ire_lock); 2407 v6gw = fire->ire_gateway_addr_v6; 2408 mutex_exit(&fire->ire_lock); 2409 2410 ip2dbg(("ire_multirt_lookup_v6: fire %p, " 2411 "ire_addr %08x, ire_gateway_addr %08x\n", 2412 (void *)fire, 2413 ntohl(V4_PART_OF_V6(fire->ire_addr_v6)), 2414 ntohl(V4_PART_OF_V6(v6gw)))); 2415 2416 already_resolved = B_FALSE; 2417 2418 if (first_cire) { 2419 ASSERT(cirb); 2420 2421 IRB_REFHOLD(cirb); 2422 /* 2423 * For all IRE_CACHE ires for that 2424 * destination. 2425 */ 2426 for (cire = first_cire; 2427 cire != NULL; 2428 cire = cire->ire_next) { 2429 2430 if (!(cire->ire_flags & RTF_MULTIRT)) 2431 continue; 2432 if (!IN6_ARE_ADDR_EQUAL( 2433 &cire->ire_addr_v6, &v6dst)) 2434 continue; 2435 if (cire->ire_marks & 2436 (IRE_MARK_CONDEMNED| 2437 IRE_MARK_HIDDEN)) 2438 continue; 2439 2440 if (cire->ire_gw_secattr != NULL && 2441 tsol_ire_match_gwattr(cire, 2442 tsl) != 0) { 2443 continue; 2444 } 2445 2446 /* 2447 * Check if the IRE_CACHE's gateway 2448 * matches the IRE_HOST's gateway. 2449 */ 2450 if (IN6_ARE_ADDR_EQUAL( 2451 &cire->ire_gateway_addr_v6, 2452 &v6gw)) { 2453 already_resolved = B_TRUE; 2454 break; 2455 } 2456 } 2457 IRB_REFRELE(cirb); 2458 } 2459 2460 /* 2461 * This route is already resolved; 2462 * proceed with next one. 2463 */ 2464 if (already_resolved) { 2465 ip2dbg(("ire_multirt_lookup_v6: found cire %p, " 2466 "already resolved\n", (void *)cire)); 2467 continue; 2468 } 2469 2470 /* 2471 * The route is unresolved; is it actually 2472 * resolvable, i.e. is there a cache or a resolver 2473 * for the gateway? 2474 */ 2475 gw_ire = ire_route_lookup_v6(&v6gw, 0, 0, 0, NULL, NULL, 2476 ALL_ZONES, tsl, MATCH_IRE_RECURSIVE | 2477 MATCH_IRE_SECATTR, ipst); 2478 2479 ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n", 2480 (void *)gw_ire)); 2481 2482 /* 2483 * This route can be resolved without any call to the 2484 * resolver; if the MULTIRT_CACHEGW flag is set, 2485 * give the top priority to this ire and exit the 2486 * loop. 2487 * This occurs when an resolver reply is processed 2488 * through ip_wput_nondata() 2489 */ 2490 if ((flags & MULTIRT_CACHEGW) && 2491 (gw_ire != NULL) && 2492 (gw_ire->ire_type & IRE_CACHETABLE)) { 2493 /* 2494 * Release the resolver associated to the 2495 * previous candidate best ire, if any. 2496 */ 2497 if (best_cire) { 2498 ire_refrele(best_cire); 2499 ASSERT(best_fire); 2500 } 2501 2502 best_fire = fire; 2503 best_cire = gw_ire; 2504 2505 ip2dbg(("ire_multirt_lookup_v6: found top prio " 2506 "best_fire %p, best_cire %p\n", 2507 (void *)best_fire, (void *)best_cire)); 2508 break; 2509 } 2510 2511 /* 2512 * Compute the time elapsed since our preceding 2513 * attempt to resolve that route. 2514 * If the MULTIRT_USESTAMP flag is set, we take that 2515 * route into account only if this time interval 2516 * exceeds ip_multirt_resolution_interval; 2517 * this prevents us from attempting to resolve a 2518 * broken route upon each sending of a packet. 2519 */ 2520 delta = lbolt - fire->ire_last_used_time; 2521 delta = TICK_TO_MSEC(delta); 2522 2523 res = (boolean_t) 2524 ((delta > ipst-> 2525 ips_ip_multirt_resolution_interval) || 2526 (!(flags & MULTIRT_USESTAMP))); 2527 2528 ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, " 2529 "res %d\n", 2530 (void *)fire, delta, res)); 2531 2532 if (res) { 2533 /* 2534 * A resolver exists for the gateway: save 2535 * the current IRE_HOST ire as a candidate 2536 * best ire. If we later discover that a 2537 * top priority ire exists (i.e. no need to 2538 * call the resolver), then this new ire 2539 * will be preferred to the current one. 2540 */ 2541 if (gw_ire != NULL) { 2542 if (best_fire == NULL) { 2543 ASSERT(best_cire == NULL); 2544 2545 best_fire = fire; 2546 best_cire = gw_ire; 2547 2548 ip2dbg(("ire_multirt_lookup_v6:" 2549 "found candidate " 2550 "best_fire %p, " 2551 "best_cire %p\n", 2552 (void *)best_fire, 2553 (void *)best_cire)); 2554 2555 /* 2556 * If MULTIRT_CACHEGW is not 2557 * set, we ignore the top 2558 * priority ires that can 2559 * be resolved without any 2560 * call to the resolver; 2561 * In that case, there is 2562 * actually no need 2563 * to continue the loop. 2564 */ 2565 if (!(flags & 2566 MULTIRT_CACHEGW)) { 2567 break; 2568 } 2569 continue; 2570 } 2571 } else { 2572 /* 2573 * No resolver for the gateway: the 2574 * route is not resolvable. 2575 * If the MULTIRT_SETSTAMP flag is 2576 * set, we stamp the IRE_HOST ire, 2577 * so we will not select it again 2578 * during this resolution interval. 2579 */ 2580 if (flags & MULTIRT_SETSTAMP) 2581 fire->ire_last_used_time = 2582 lbolt; 2583 } 2584 } 2585 2586 if (gw_ire != NULL) 2587 ire_refrele(gw_ire); 2588 } 2589 } else { /* IN6_IS_ADDR_MULTICAST(&v6dst) */ 2590 2591 for (fire = first_fire; 2592 fire != NULL; 2593 fire = fire->ire_next) { 2594 2595 if (!(fire->ire_flags & RTF_MULTIRT)) 2596 continue; 2597 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2598 continue; 2599 2600 if (fire->ire_gw_secattr != NULL && 2601 tsol_ire_match_gwattr(fire, tsl) != 0) { 2602 continue; 2603 } 2604 2605 already_resolved = B_FALSE; 2606 2607 mutex_enter(&fire->ire_lock); 2608 v6gw = fire->ire_gateway_addr_v6; 2609 mutex_exit(&fire->ire_lock); 2610 2611 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2612 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, tsl, 2613 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE | 2614 MATCH_IRE_SECATTR, ipst); 2615 2616 /* No resolver for the gateway; we skip this ire. */ 2617 if (gw_ire == NULL) { 2618 continue; 2619 } 2620 2621 if (first_cire) { 2622 2623 IRB_REFHOLD(cirb); 2624 /* 2625 * For all IRE_CACHE ires for that 2626 * destination. 2627 */ 2628 for (cire = first_cire; 2629 cire != NULL; 2630 cire = cire->ire_next) { 2631 2632 if (!(cire->ire_flags & RTF_MULTIRT)) 2633 continue; 2634 if (!IN6_ARE_ADDR_EQUAL( 2635 &cire->ire_addr_v6, &v6dst)) 2636 continue; 2637 if (cire->ire_marks & 2638 (IRE_MARK_CONDEMNED| 2639 IRE_MARK_HIDDEN)) 2640 continue; 2641 2642 if (cire->ire_gw_secattr != NULL && 2643 tsol_ire_match_gwattr(cire, 2644 tsl) != 0) { 2645 continue; 2646 } 2647 2648 /* 2649 * Cache entries are linked to the 2650 * parent routes using the parent handle 2651 * (ire_phandle). If no cache entry has 2652 * the same handle as fire, fire is 2653 * still unresolved. 2654 */ 2655 ASSERT(cire->ire_phandle != 0); 2656 if (cire->ire_phandle == 2657 fire->ire_phandle) { 2658 already_resolved = B_TRUE; 2659 break; 2660 } 2661 } 2662 IRB_REFRELE(cirb); 2663 } 2664 2665 /* 2666 * This route is already resolved; proceed with 2667 * next one. 2668 */ 2669 if (already_resolved) { 2670 ire_refrele(gw_ire); 2671 continue; 2672 } 2673 2674 /* 2675 * Compute the time elapsed since our preceding 2676 * attempt to resolve that route. 2677 * If the MULTIRT_USESTAMP flag is set, we take 2678 * that route into account only if this time 2679 * interval exceeds ip_multirt_resolution_interval; 2680 * this prevents us from attempting to resolve a 2681 * broken route upon each sending of a packet. 2682 */ 2683 delta = lbolt - fire->ire_last_used_time; 2684 delta = TICK_TO_MSEC(delta); 2685 2686 res = (boolean_t) 2687 ((delta > ipst-> 2688 ips_ip_multirt_resolution_interval) || 2689 (!(flags & MULTIRT_USESTAMP))); 2690 2691 ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, " 2692 "flags %04x, res %d\n", 2693 (void *)fire, delta, flags, res)); 2694 2695 if (res) { 2696 if (best_cire) { 2697 /* 2698 * Release the resolver associated 2699 * to the preceding candidate best 2700 * ire, if any. 2701 */ 2702 ire_refrele(best_cire); 2703 ASSERT(best_fire); 2704 } 2705 best_fire = fire; 2706 best_cire = gw_ire; 2707 continue; 2708 } 2709 2710 ire_refrele(gw_ire); 2711 } 2712 } 2713 2714 if (best_fire) { 2715 IRE_REFHOLD(best_fire); 2716 } 2717 IRB_REFRELE(firb); 2718 2719 /* Release the first IRE_CACHE we initially looked up, if any. */ 2720 if (first_cire) 2721 ire_refrele(first_cire); 2722 2723 /* Found a resolvable route. */ 2724 if (best_fire) { 2725 ASSERT(best_cire); 2726 2727 if (*fire_arg) 2728 ire_refrele(*fire_arg); 2729 if (*ire_arg) 2730 ire_refrele(*ire_arg); 2731 2732 /* 2733 * Update the passed arguments with the 2734 * resolvable multirt route we found 2735 */ 2736 *fire_arg = best_fire; 2737 *ire_arg = best_cire; 2738 2739 ip2dbg(("ire_multirt_lookup_v6: returning B_TRUE, " 2740 "*fire_arg %p, *ire_arg %p\n", 2741 (void *)best_fire, (void *)best_cire)); 2742 2743 return (B_TRUE); 2744 } 2745 2746 ASSERT(best_cire == NULL); 2747 2748 ip2dbg(("ire_multirt_lookup_v6: returning B_FALSE, *fire_arg %p, " 2749 "*ire_arg %p\n", 2750 (void *)*fire_arg, (void *)*ire_arg)); 2751 2752 /* No resolvable route. */ 2753 return (B_FALSE); 2754 } 2755 2756 2757 /* 2758 * Find an IRE_OFFSUBNET IRE entry for the multicast address 'v6dstp' 2759 * that goes through 'ipif'. As a fallback, a route that goes through 2760 * ipif->ipif_ill can be returned. 2761 */ 2762 ire_t * 2763 ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp) 2764 { 2765 ire_t *ire; 2766 ire_t *save_ire = NULL; 2767 ire_t *gw_ire; 2768 irb_t *irb; 2769 in6_addr_t v6gw; 2770 int match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL; 2771 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2772 2773 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, NULL, ALL_ZONES, 0, 2774 NULL, MATCH_IRE_DEFAULT, ipst); 2775 2776 if (ire == NULL) 2777 return (NULL); 2778 2779 irb = ire->ire_bucket; 2780 ASSERT(irb); 2781 2782 IRB_REFHOLD(irb); 2783 ire_refrele(ire); 2784 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 2785 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) || 2786 (ipif->ipif_zoneid != ire->ire_zoneid && 2787 ire->ire_zoneid != ALL_ZONES)) { 2788 continue; 2789 } 2790 2791 switch (ire->ire_type) { 2792 case IRE_DEFAULT: 2793 case IRE_PREFIX: 2794 case IRE_HOST: 2795 mutex_enter(&ire->ire_lock); 2796 v6gw = ire->ire_gateway_addr_v6; 2797 mutex_exit(&ire->ire_lock); 2798 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2799 IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, 2800 NULL, match_flags, ipst); 2801 2802 if (gw_ire != NULL) { 2803 if (save_ire != NULL) { 2804 ire_refrele(save_ire); 2805 } 2806 IRE_REFHOLD(ire); 2807 if (gw_ire->ire_ipif == ipif) { 2808 ire_refrele(gw_ire); 2809 2810 IRB_REFRELE(irb); 2811 return (ire); 2812 } 2813 ire_refrele(gw_ire); 2814 save_ire = ire; 2815 } 2816 break; 2817 case IRE_IF_NORESOLVER: 2818 case IRE_IF_RESOLVER: 2819 if (ire->ire_ipif == ipif) { 2820 if (save_ire != NULL) { 2821 ire_refrele(save_ire); 2822 } 2823 IRE_REFHOLD(ire); 2824 2825 IRB_REFRELE(irb); 2826 return (ire); 2827 } 2828 break; 2829 } 2830 } 2831 IRB_REFRELE(irb); 2832 2833 return (save_ire); 2834 } 2835 2836 /* 2837 * This is the implementation of the IPv6 IRE cache lookup procedure. 2838 * Separating the interface from the implementation allows additional 2839 * flexibility when specifying search criteria. 2840 */ 2841 static ire_t * 2842 ip6_ctable_lookup_impl(ire_ctable_args_t *margs) 2843 { 2844 irb_t *irb_ptr; 2845 ire_t *ire; 2846 ip_stack_t *ipst = margs->ict_ipst; 2847 2848 if ((margs->ict_flags & 2849 (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 2850 (margs->ict_ipif == NULL)) { 2851 return (NULL); 2852 } 2853 2854 irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6( 2855 *((in6_addr_t *)(margs->ict_addr)), 2856 ipst->ips_ip6_cache_table_size)]; 2857 rw_enter(&irb_ptr->irb_lock, RW_READER); 2858 for (ire = irb_ptr->irb_ire; ire != NULL; ire = ire->ire_next) { 2859 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2860 continue; 2861 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 2862 if (ire_match_args_v6(ire, (in6_addr_t *)margs->ict_addr, 2863 &ire->ire_mask_v6, (in6_addr_t *)margs->ict_gateway, 2864 margs->ict_type, margs->ict_ipif, margs->ict_zoneid, 0, 2865 margs->ict_tsl, margs->ict_flags)) { 2866 IRE_REFHOLD(ire); 2867 rw_exit(&irb_ptr->irb_lock); 2868 return (ire); 2869 } 2870 } 2871 2872 rw_exit(&irb_ptr->irb_lock); 2873 return (NULL); 2874 } 2875