1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 /* 30 * This file contains routines that manipulate Internet Routing Entries (IREs). 31 */ 32 #include <sys/types.h> 33 #include <sys/stream.h> 34 #include <sys/stropts.h> 35 #include <sys/ddi.h> 36 #include <sys/cmn_err.h> 37 38 #include <sys/systm.h> 39 #include <sys/param.h> 40 #include <sys/socket.h> 41 #include <net/if.h> 42 #include <net/route.h> 43 #include <netinet/in.h> 44 #include <net/if_dl.h> 45 #include <netinet/ip6.h> 46 #include <netinet/icmp6.h> 47 48 #include <inet/common.h> 49 #include <inet/mi.h> 50 #include <inet/ip.h> 51 #include <inet/ip6.h> 52 #include <inet/ip_ndp.h> 53 #include <inet/ip_if.h> 54 #include <inet/ip_ire.h> 55 #include <inet/ipclassifier.h> 56 #include <inet/nd.h> 57 #include <sys/kmem.h> 58 #include <sys/zone.h> 59 60 #include <sys/tsol/label.h> 61 #include <sys/tsol/tnet.h> 62 63 static ire_t ire_null; 64 65 static ire_t *ire_ihandle_lookup_onlink_v6(ire_t *cire); 66 static boolean_t ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, 67 const in6_addr_t *mask, const in6_addr_t *gateway, int type, 68 const ipif_t *ipif, zoneid_t zoneid, uint32_t ihandle, 69 const ts_label_t *tsl, int match_flags); 70 static ire_t *ire_init_v6(ire_t *, const in6_addr_t *, const in6_addr_t *, 71 const in6_addr_t *, const in6_addr_t *, uint_t *, queue_t *, queue_t *, 72 ushort_t, ipif_t *, const in6_addr_t *, uint32_t, uint32_t, uint_t, 73 const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *); 74 static ire_t *ip6_ctable_lookup_impl(ire_ctable_args_t *); 75 76 /* 77 * Initialize the ire that is specific to IPv6 part and call 78 * ire_init_common to finish it. 79 */ 80 static ire_t * 81 ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, const in6_addr_t *v6mask, 82 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 83 uint_t *max_fragp, queue_t *rfq, queue_t *stq, ushort_t type, 84 ipif_t *ipif, const in6_addr_t *v6cmask, uint32_t phandle, 85 uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, tsol_gc_t *gc, 86 tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 87 { 88 89 /* 90 * Reject IRE security attribute creation/initialization 91 * if system is not running in Trusted mode. 92 */ 93 if ((gc != NULL || gcgrp != NULL) && !is_system_labeled()) 94 return (NULL); 95 96 97 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_alloced); 98 ire->ire_addr_v6 = *v6addr; 99 100 if (v6src_addr != NULL) 101 ire->ire_src_addr_v6 = *v6src_addr; 102 if (v6mask != NULL) { 103 ire->ire_mask_v6 = *v6mask; 104 ire->ire_masklen = ip_mask_to_plen_v6(&ire->ire_mask_v6); 105 } 106 if (v6gateway != NULL) 107 ire->ire_gateway_addr_v6 = *v6gateway; 108 109 if (type == IRE_CACHE && v6cmask != NULL) 110 ire->ire_cmask_v6 = *v6cmask; 111 112 /* 113 * Multirouted packets need to have a fragment header added so that 114 * the receiver is able to discard duplicates according to their 115 * fragment identifier. 116 */ 117 if (type == IRE_CACHE && (flags & RTF_MULTIRT)) { 118 ire->ire_frag_flag = IPH_FRAG_HDR; 119 } 120 121 /* ire_init_common will free the mblks upon encountering any failure */ 122 if (!ire_init_common(ire, max_fragp, NULL, rfq, stq, type, ipif, 123 phandle, ihandle, flags, IPV6_VERSION, ulp_info, gc, gcgrp, ipst)) 124 return (NULL); 125 126 return (ire); 127 } 128 129 /* 130 * Similar to ire_create_v6 except that it is called only when 131 * we want to allocate ire as an mblk e.g. we have a external 132 * resolver. Do we need this in IPv6 ? 133 * 134 * IPv6 initializes the ire_nce in ire_add_v6, which expects to 135 * find the ire_nce to be null when it is called. So, although 136 * we have a src_nce parameter (in the interest of matching up with 137 * the argument list of the v4 version), we ignore the src_nce 138 * argument here. 139 */ 140 /* ARGSUSED */ 141 ire_t * 142 ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 143 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 144 nce_t *src_nce, queue_t *rfq, queue_t *stq, ushort_t type, 145 ipif_t *ipif, const in6_addr_t *v6cmask, 146 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 147 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 148 { 149 ire_t *ire; 150 ire_t *ret_ire; 151 mblk_t *mp; 152 153 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 154 155 /* Allocate the new IRE. */ 156 mp = allocb(sizeof (ire_t), BPRI_MED); 157 if (mp == NULL) { 158 ip1dbg(("ire_create_mp_v6: alloc failed\n")); 159 return (NULL); 160 } 161 162 ire = (ire_t *)mp->b_rptr; 163 mp->b_wptr = (uchar_t *)&ire[1]; 164 165 /* Start clean. */ 166 *ire = ire_null; 167 ire->ire_mp = mp; 168 mp->b_datap->db_type = IRE_DB_TYPE; 169 170 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 171 NULL, rfq, stq, type, ipif, v6cmask, phandle, 172 ihandle, flags, ulp_info, gc, gcgrp, ipst); 173 174 if (ret_ire == NULL) { 175 freeb(ire->ire_mp); 176 return (NULL); 177 } 178 return (ire); 179 } 180 181 /* 182 * ire_create_v6 is called to allocate and initialize a new IRE. 183 * 184 * NOTE : This is called as writer sometimes though not required 185 * by this function. 186 * 187 * See comments above ire_create_mp_v6() for the rationale behind the 188 * unused src_nce argument. 189 */ 190 /* ARGSUSED */ 191 ire_t * 192 ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 193 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 194 uint_t *max_fragp, nce_t *src_nce, queue_t *rfq, queue_t *stq, 195 ushort_t type, ipif_t *ipif, const in6_addr_t *v6cmask, 196 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 197 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 198 { 199 ire_t *ire; 200 ire_t *ret_ire; 201 202 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 203 204 ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP); 205 if (ire == NULL) { 206 ip1dbg(("ire_create_v6: alloc failed\n")); 207 return (NULL); 208 } 209 *ire = ire_null; 210 211 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 212 max_fragp, rfq, stq, type, ipif, v6cmask, phandle, 213 ihandle, flags, ulp_info, gc, gcgrp, ipst); 214 215 if (ret_ire == NULL) { 216 kmem_cache_free(ire_cache, ire); 217 return (NULL); 218 } 219 ASSERT(ret_ire == ire); 220 return (ire); 221 } 222 223 /* 224 * Find an IRE_INTERFACE for the multicast group. 225 * Allows different routes for multicast addresses 226 * in the unicast routing table (akin to FF::0/8 but could be more specific) 227 * which point at different interfaces. This is used when IPV6_MULTICAST_IF 228 * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't 229 * specify the interface to join on. 230 * 231 * Supports link-local addresses by following the ipif/ill when recursing. 232 */ 233 ire_t * 234 ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst) 235 { 236 ire_t *ire; 237 ipif_t *ipif = NULL; 238 int match_flags = MATCH_IRE_TYPE; 239 in6_addr_t gw_addr_v6; 240 241 ire = ire_ftable_lookup_v6(group, 0, 0, 0, NULL, NULL, 242 zoneid, 0, NULL, MATCH_IRE_DEFAULT, ipst); 243 244 /* We search a resolvable ire in case of multirouting. */ 245 if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) { 246 ire_t *cire = NULL; 247 /* 248 * If the route is not resolvable, the looked up ire 249 * may be changed here. In that case, ire_multirt_lookup() 250 * IRE_REFRELE the original ire and change it. 251 */ 252 (void) ire_multirt_lookup_v6(&cire, &ire, MULTIRT_CACHEGW, 253 NULL, ipst); 254 if (cire != NULL) 255 ire_refrele(cire); 256 } 257 if (ire == NULL) 258 return (NULL); 259 /* 260 * Make sure we follow ire_ipif. 261 * 262 * We need to determine the interface route through 263 * which the gateway will be reached. 264 */ 265 if (ire->ire_ipif != NULL) { 266 ipif = ire->ire_ipif; 267 match_flags |= MATCH_IRE_ILL; 268 } 269 270 switch (ire->ire_type) { 271 case IRE_DEFAULT: 272 case IRE_PREFIX: 273 case IRE_HOST: 274 mutex_enter(&ire->ire_lock); 275 gw_addr_v6 = ire->ire_gateway_addr_v6; 276 mutex_exit(&ire->ire_lock); 277 ire_refrele(ire); 278 ire = ire_ftable_lookup_v6(&gw_addr_v6, 0, 0, 279 IRE_INTERFACE, ipif, NULL, zoneid, 0, 280 NULL, match_flags, ipst); 281 return (ire); 282 case IRE_IF_NORESOLVER: 283 case IRE_IF_RESOLVER: 284 return (ire); 285 default: 286 ire_refrele(ire); 287 return (NULL); 288 } 289 } 290 291 /* 292 * Return any local address. We use this to target ourselves 293 * when the src address was specified as 'default'. 294 * Preference for IRE_LOCAL entries. 295 */ 296 ire_t * 297 ire_lookup_local_v6(zoneid_t zoneid, ip_stack_t *ipst) 298 { 299 ire_t *ire; 300 irb_t *irb; 301 ire_t *maybe = NULL; 302 int i; 303 304 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 305 irb = &ipst->ips_ip_cache_table_v6[i]; 306 if (irb->irb_ire == NULL) 307 continue; 308 rw_enter(&irb->irb_lock, RW_READER); 309 for (ire = irb->irb_ire; ire; ire = ire->ire_next) { 310 if ((ire->ire_marks & IRE_MARK_CONDEMNED) || 311 ire->ire_zoneid != zoneid && 312 ire->ire_zoneid != ALL_ZONES) 313 continue; 314 switch (ire->ire_type) { 315 case IRE_LOOPBACK: 316 if (maybe == NULL) { 317 IRE_REFHOLD(ire); 318 maybe = ire; 319 } 320 break; 321 case IRE_LOCAL: 322 if (maybe != NULL) { 323 ire_refrele(maybe); 324 } 325 IRE_REFHOLD(ire); 326 rw_exit(&irb->irb_lock); 327 return (ire); 328 } 329 } 330 rw_exit(&irb->irb_lock); 331 } 332 return (maybe); 333 } 334 335 /* 336 * This function takes a mask and returns number of bits set in the 337 * mask (the represented prefix length). Assumes a contiguous mask. 338 */ 339 int 340 ip_mask_to_plen_v6(const in6_addr_t *v6mask) 341 { 342 int bits; 343 int plen = IPV6_ABITS; 344 int i; 345 346 for (i = 3; i >= 0; i--) { 347 if (v6mask->s6_addr32[i] == 0) { 348 plen -= 32; 349 continue; 350 } 351 bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1; 352 if (bits == 0) 353 break; 354 plen -= bits; 355 } 356 357 return (plen); 358 } 359 360 /* 361 * Convert a prefix length to the mask for that prefix. 362 * Returns the argument bitmask. 363 */ 364 in6_addr_t * 365 ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask) 366 { 367 uint32_t *ptr; 368 369 if (plen < 0 || plen > IPV6_ABITS) 370 return (NULL); 371 *bitmask = ipv6_all_zeros; 372 373 ptr = (uint32_t *)bitmask; 374 while (plen > 32) { 375 *ptr++ = 0xffffffffU; 376 plen -= 32; 377 } 378 *ptr = htonl(0xffffffffU << (32 - plen)); 379 return (bitmask); 380 } 381 382 /* 383 * Add a fully initialized IRE to an appropriate 384 * table based on ire_type. 385 * 386 * The forward table contains IRE_PREFIX/IRE_HOST/IRE_HOST and 387 * IRE_IF_RESOLVER/IRE_IF_NORESOLVER and IRE_DEFAULT. 388 * 389 * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK 390 * and IRE_CACHE. 391 * 392 * NOTE : This function is called as writer though not required 393 * by this function. 394 */ 395 int 396 ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) 397 { 398 ire_t *ire1; 399 int mask_table_index; 400 irb_t *irb_ptr; 401 ire_t **irep; 402 int flags; 403 ire_t *pire = NULL; 404 ill_t *stq_ill; 405 boolean_t ndp_g_lock_held = B_FALSE; 406 ire_t *ire = *ire_p; 407 int error; 408 ip_stack_t *ipst = ire->ire_ipst; 409 uint_t marks = 0; 410 411 ASSERT(ire->ire_ipversion == IPV6_VERSION); 412 ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */ 413 ASSERT(ire->ire_nce == NULL); 414 415 /* 416 * IREs with source addresses hosted on interfaces that are under IPMP 417 * should be hidden so that applications don't accidentally end up 418 * sending packets with test addresses as their source addresses, or 419 * sending out interfaces that are e.g. IFF_INACTIVE. Hide them here. 420 * (We let IREs with unspecified source addresses slip through since 421 * ire_send_v6() will delete them automatically.) 422 */ 423 if (ire->ire_ipif != NULL && IS_UNDER_IPMP(ire->ire_ipif->ipif_ill) && 424 !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_src_addr_v6)) { 425 DTRACE_PROBE1(ipmp__mark__testhidden, ire_t *, ire); 426 marks |= IRE_MARK_TESTHIDDEN; 427 } 428 429 /* Find the appropriate list head. */ 430 switch (ire->ire_type) { 431 case IRE_HOST: 432 ire->ire_mask_v6 = ipv6_all_ones; 433 ire->ire_masklen = IPV6_ABITS; 434 ire->ire_marks |= marks; 435 if ((ire->ire_flags & RTF_SETSRC) == 0) 436 ire->ire_src_addr_v6 = ipv6_all_zeros; 437 break; 438 case IRE_CACHE: 439 ire->ire_mask_v6 = ipv6_all_ones; 440 ire->ire_masklen = IPV6_ABITS; 441 ire->ire_marks |= marks; 442 break; 443 case IRE_LOCAL: 444 case IRE_LOOPBACK: 445 ire->ire_mask_v6 = ipv6_all_ones; 446 ire->ire_masklen = IPV6_ABITS; 447 break; 448 case IRE_PREFIX: 449 case IRE_DEFAULT: 450 ire->ire_marks |= marks; 451 if ((ire->ire_flags & RTF_SETSRC) == 0) 452 ire->ire_src_addr_v6 = ipv6_all_zeros; 453 break; 454 case IRE_IF_RESOLVER: 455 case IRE_IF_NORESOLVER: 456 ire->ire_marks |= marks; 457 break; 458 default: 459 printf("ire_add_v6: ire %p has unrecognized IRE type (%d)\n", 460 (void *)ire, ire->ire_type); 461 ire_delete(ire); 462 *ire_p = NULL; 463 return (EINVAL); 464 } 465 466 /* Make sure the address is properly masked. */ 467 V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6); 468 469 if ((ire->ire_type & IRE_CACHETABLE) == 0) { 470 /* IRE goes into Forward Table */ 471 mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6); 472 if ((ipst->ips_ip_forwarding_table_v6[mask_table_index]) == 473 NULL) { 474 irb_t *ptr; 475 int i; 476 477 ptr = (irb_t *)mi_zalloc(( 478 ipst->ips_ip6_ftable_hash_size * sizeof (irb_t))); 479 if (ptr == NULL) { 480 ire_delete(ire); 481 *ire_p = NULL; 482 return (ENOMEM); 483 } 484 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) { 485 rw_init(&ptr[i].irb_lock, NULL, 486 RW_DEFAULT, NULL); 487 } 488 mutex_enter(&ipst->ips_ire_ft_init_lock); 489 if (ipst->ips_ip_forwarding_table_v6[ 490 mask_table_index] == NULL) { 491 ipst->ips_ip_forwarding_table_v6[ 492 mask_table_index] = ptr; 493 mutex_exit(&ipst->ips_ire_ft_init_lock); 494 } else { 495 /* 496 * Some other thread won the race in 497 * initializing the forwarding table at the 498 * same index. 499 */ 500 mutex_exit(&ipst->ips_ire_ft_init_lock); 501 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; 502 i++) { 503 rw_destroy(&ptr[i].irb_lock); 504 } 505 mi_free(ptr); 506 } 507 } 508 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[mask_table_index][ 509 IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6, 510 ipst->ips_ip6_ftable_hash_size)]); 511 } else { 512 irb_ptr = &(ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6( 513 ire->ire_addr_v6, ipst->ips_ip6_cache_table_size)]); 514 } 515 /* 516 * For xresolv interfaces (v6 interfaces with an external 517 * address resolver), ip_newroute_v6/ip_newroute_ipif_v6 518 * are unable to prevent the deletion of the interface route 519 * while adding an IRE_CACHE for an on-link destination 520 * in the IRE_IF_RESOLVER case, since the ire has to go to 521 * the external resolver and return. We can't do a REFHOLD on the 522 * associated interface ire for fear of the message being freed 523 * if the external resolver can't resolve the address. 524 * Here we look up the interface ire in the forwarding table 525 * and make sure that the interface route has not been deleted. 526 */ 527 if (ire->ire_type == IRE_CACHE && 528 IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6) && 529 (((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) && 530 (((ill_t *)ire->ire_stq->q_ptr)->ill_flags & ILLF_XRESOLV)) { 531 532 pire = ire_ihandle_lookup_onlink_v6(ire); 533 if (pire == NULL) { 534 ire_delete(ire); 535 *ire_p = NULL; 536 return (EINVAL); 537 } 538 /* Prevent pire from getting deleted */ 539 IRB_REFHOLD(pire->ire_bucket); 540 /* Has it been removed already? */ 541 if (pire->ire_marks & IRE_MARK_CONDEMNED) { 542 IRB_REFRELE(pire->ire_bucket); 543 ire_refrele(pire); 544 ire_delete(ire); 545 *ire_p = NULL; 546 return (EINVAL); 547 } 548 } 549 550 flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW); 551 /* 552 * For IRE_CACHES, MATCH_IRE_IPIF is not enough to check 553 * for duplicates because : 554 * 555 * 1) ire_ipif->ipif_ill and ire_stq->q_ptr could be 556 * pointing at different ills. A real duplicate is 557 * a match on both ire_ipif and ire_stq. 558 * 559 * 2) We could have multiple packets trying to create 560 * an IRE_CACHE for the same ill. 561 * 562 * Rather than looking at the packet, we depend on the above for 563 * MATCH_IRE_ILL here. 564 * 565 * Unlike IPv4, MATCH_IRE_IPIF is needed here as we could have 566 * multiple IRE_CACHES for an ill for the same destination 567 * with various scoped addresses i.e represented by ipifs. 568 * 569 * MATCH_IRE_ILL is done implicitly below for IRE_CACHES. 570 */ 571 if (ire->ire_ipif != NULL) 572 flags |= MATCH_IRE_IPIF; 573 574 /* 575 * If we are creating a hidden IRE, make sure we search for 576 * hidden IREs when searching for duplicates below. 577 * Otherwise, we might find an IRE on some other interface 578 * that's not marked hidden. 579 */ 580 if (ire->ire_marks & IRE_MARK_TESTHIDDEN) 581 flags |= MATCH_IRE_MARK_TESTHIDDEN; 582 583 /* 584 * Start the atomic add of the ire. Grab the ill locks, 585 * ill_g_usesrc_lock and the bucket lock. Check for condemned. 586 * To avoid lock order problems, get the ndp6.ndp_g_lock now itself. 587 */ 588 if (ire->ire_type == IRE_CACHE) { 589 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 590 ndp_g_lock_held = B_TRUE; 591 } 592 593 /* 594 * If ipif or ill is changing ire_atomic_start() may queue the 595 * request and return EINPROGRESS. 596 */ 597 598 error = ire_atomic_start(irb_ptr, ire, q, mp, func); 599 if (error != 0) { 600 if (ndp_g_lock_held) 601 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 602 /* 603 * We don't know whether it is a valid ipif or not. 604 * So, set it to NULL. This assumes that the ire has not added 605 * a reference to the ipif. 606 */ 607 ire->ire_ipif = NULL; 608 ire_delete(ire); 609 if (pire != NULL) { 610 IRB_REFRELE(pire->ire_bucket); 611 ire_refrele(pire); 612 } 613 *ire_p = NULL; 614 return (error); 615 } 616 /* 617 * To avoid creating ires having stale values for the ire_max_frag 618 * we get the latest value atomically here. For more details 619 * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE 620 * in ip_rput_dlpi_writer 621 */ 622 if (ire->ire_max_fragp == NULL) { 623 if (IN6_IS_ADDR_MULTICAST(&ire->ire_addr_v6)) 624 ire->ire_max_frag = ire->ire_ipif->ipif_mtu; 625 else 626 ire->ire_max_frag = pire->ire_max_frag; 627 } else { 628 uint_t max_frag; 629 630 max_frag = *ire->ire_max_fragp; 631 ire->ire_max_fragp = NULL; 632 ire->ire_max_frag = max_frag; 633 } 634 635 /* 636 * Atomically check for duplicate and insert in the table. 637 */ 638 for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 639 if (ire1->ire_marks & IRE_MARK_CONDEMNED) 640 continue; 641 642 if (ire->ire_type == IRE_CACHE) { 643 /* 644 * We do MATCH_IRE_ILL implicitly here for IRE_CACHES. 645 * As ire_ipif and ire_stq could point to two 646 * different ills, we can't pass just ire_ipif to 647 * ire_match_args and get a match on both ills. 648 * This is just needed for duplicate checks here and 649 * so we don't add an extra argument to 650 * ire_match_args for this. Do it locally. 651 * 652 * NOTE : Currently there is no part of the code 653 * that asks for both MATH_IRE_IPIF and MATCH_IRE_ILL 654 * match for IRE_CACHEs. Thus we don't want to 655 * extend the arguments to ire_match_args_v6. 656 */ 657 if (ire1->ire_stq != ire->ire_stq) 658 continue; 659 /* 660 * Multiroute IRE_CACHEs for a given destination can 661 * have the same ire_ipif, typically if their source 662 * address is forced using RTF_SETSRC, and the same 663 * send-to queue. We differentiate them using the parent 664 * handle. 665 */ 666 if ((ire1->ire_flags & RTF_MULTIRT) && 667 (ire->ire_flags & RTF_MULTIRT) && 668 (ire1->ire_phandle != ire->ire_phandle)) 669 continue; 670 } 671 if (ire1->ire_zoneid != ire->ire_zoneid) 672 continue; 673 if (ire_match_args_v6(ire1, &ire->ire_addr_v6, 674 &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, 675 ire->ire_type, ire->ire_ipif, ire->ire_zoneid, 0, NULL, 676 flags)) { 677 /* 678 * Return the old ire after doing a REFHOLD. 679 * As most of the callers continue to use the IRE 680 * after adding, we return a held ire. This will 681 * avoid a lookup in the caller again. If the callers 682 * don't want to use it, they need to do a REFRELE. 683 */ 684 ip1dbg(("found dup ire existing %p new %p", 685 (void *)ire1, (void *)ire)); 686 IRE_REFHOLD(ire1); 687 if (ndp_g_lock_held) 688 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 689 ire_atomic_end(irb_ptr, ire); 690 ire_delete(ire); 691 if (pire != NULL) { 692 /* 693 * Assert that it is 694 * not yet removed from the list. 695 */ 696 ASSERT(pire->ire_ptpn != NULL); 697 IRB_REFRELE(pire->ire_bucket); 698 ire_refrele(pire); 699 } 700 *ire_p = ire1; 701 return (0); 702 } 703 } 704 if (ire->ire_type == IRE_CACHE) { 705 const in6_addr_t *addr_v6; 706 ill_t *ill = ire_to_ill(ire); 707 char buf[INET6_ADDRSTRLEN]; 708 nce_t *nce; 709 710 /* 711 * All IRE_CACHE types must have a nce. If this is 712 * not the case the entry will not be added. We need 713 * to make sure that if somebody deletes the nce 714 * after we looked up, they will find this ire and 715 * delete the ire. To delete this ire one needs the 716 * bucket lock which we are still holding here. So, 717 * even if the nce gets deleted after we looked up, 718 * this ire will get deleted. 719 * 720 * NOTE : Don't need the ire_lock for accessing 721 * ire_gateway_addr_v6 as it is appearing first 722 * time on the list and rts_setgwr_v6 could not 723 * be changing this. 724 */ 725 addr_v6 = &ire->ire_gateway_addr_v6; 726 if (IN6_IS_ADDR_UNSPECIFIED(addr_v6)) 727 addr_v6 = &ire->ire_addr_v6; 728 729 /* nce fastpath is per-ill; don't match across illgrp */ 730 nce = ndp_lookup_v6(ill, B_FALSE, addr_v6, B_TRUE); 731 if (nce == NULL) 732 goto failed; 733 734 /* Pair of refhold, refrele just to get the tracing right */ 735 NCE_REFHOLD_TO_REFHOLD_NOTR(nce); 736 /* 737 * Atomically make sure that new IREs don't point 738 * to an NCE that is logically deleted (CONDEMNED). 739 * ndp_delete() first marks the NCE CONDEMNED. 740 * This ensures that the nce_refcnt won't increase 741 * due to new nce_lookups or due to addition of new IREs 742 * pointing to this NCE. Then ndp_delete() cleans up 743 * existing references. If we don't do it atomically here, 744 * ndp_delete() -> nce_ire_delete() will not be able to 745 * clean up the IRE list completely, and the nce_refcnt 746 * won't go down to zero. 747 */ 748 mutex_enter(&nce->nce_lock); 749 if (ill->ill_flags & ILLF_XRESOLV) { 750 /* 751 * If we used an external resolver, we may not 752 * have gone through neighbor discovery to get here. 753 * Must update the nce_state before the next check. 754 */ 755 if (nce->nce_state == ND_INCOMPLETE) 756 nce->nce_state = ND_REACHABLE; 757 } 758 if (nce->nce_state == ND_INCOMPLETE || 759 (nce->nce_flags & NCE_F_CONDEMNED) || 760 (nce->nce_state == ND_UNREACHABLE)) { 761 failed: 762 if (ndp_g_lock_held) 763 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 764 if (nce != NULL) 765 mutex_exit(&nce->nce_lock); 766 ire_atomic_end(irb_ptr, ire); 767 ip1dbg(("ire_add_v6: No nce for dst %s \n", 768 inet_ntop(AF_INET6, &ire->ire_addr_v6, 769 buf, sizeof (buf)))); 770 ire_delete(ire); 771 if (pire != NULL) { 772 /* 773 * Assert that it is 774 * not yet removed from the list. 775 */ 776 ASSERT(pire->ire_ptpn != NULL); 777 IRB_REFRELE(pire->ire_bucket); 778 ire_refrele(pire); 779 } 780 if (nce != NULL) 781 NCE_REFRELE_NOTR(nce); 782 *ire_p = NULL; 783 return (EINVAL); 784 } else { 785 ire->ire_nce = nce; 786 } 787 mutex_exit(&nce->nce_lock); 788 } 789 /* 790 * Find the first entry that matches ire_addr - provides 791 * tail insertion. *irep will be null if no match. 792 */ 793 irep = (ire_t **)irb_ptr; 794 while ((ire1 = *irep) != NULL && 795 !IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6)) 796 irep = &ire1->ire_next; 797 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 798 799 if (*irep != NULL) { 800 /* 801 * Find the last ire which matches ire_addr_v6. 802 * Needed to do tail insertion among entries with the same 803 * ire_addr_v6. 804 */ 805 while (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 806 &ire1->ire_addr_v6)) { 807 irep = &ire1->ire_next; 808 ire1 = *irep; 809 if (ire1 == NULL) 810 break; 811 } 812 } 813 814 if (ire->ire_type == IRE_DEFAULT) { 815 /* 816 * We keep a count of default gateways which is used when 817 * assigning them as routes. 818 */ 819 ipst->ips_ipv6_ire_default_count++; 820 ASSERT(ipst->ips_ipv6_ire_default_count != 0); /* Wraparound */ 821 } 822 /* Insert at *irep */ 823 ire1 = *irep; 824 if (ire1 != NULL) 825 ire1->ire_ptpn = &ire->ire_next; 826 ire->ire_next = ire1; 827 /* Link the new one in. */ 828 ire->ire_ptpn = irep; 829 /* 830 * ire_walk routines de-reference ire_next without holding 831 * a lock. Before we point to the new ire, we want to make 832 * sure the store that sets the ire_next of the new ire 833 * reaches global visibility, so that ire_walk routines 834 * don't see a truncated list of ires i.e if the ire_next 835 * of the new ire gets set after we do "*irep = ire" due 836 * to re-ordering, the ire_walk thread will see a NULL 837 * once it accesses the ire_next of the new ire. 838 * membar_producer() makes sure that the following store 839 * happens *after* all of the above stores. 840 */ 841 membar_producer(); 842 *irep = ire; 843 ire->ire_bucket = irb_ptr; 844 /* 845 * We return a bumped up IRE above. Keep it symmetrical 846 * so that the callers will always have to release. This 847 * helps the callers of this function because they continue 848 * to use the IRE after adding and hence they don't have to 849 * lookup again after we return the IRE. 850 * 851 * NOTE : We don't have to use atomics as this is appearing 852 * in the list for the first time and no one else can bump 853 * up the reference count on this yet. 854 */ 855 IRE_REFHOLD_LOCKED(ire); 856 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_inserted); 857 irb_ptr->irb_ire_cnt++; 858 if (ire->ire_marks & IRE_MARK_TEMPORARY) 859 irb_ptr->irb_tmp_ire_cnt++; 860 861 if (ire->ire_ipif != NULL) { 862 DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ire->ire_ipif, 863 (char *), "ire", (void *), ire); 864 ire->ire_ipif->ipif_ire_cnt++; 865 if (ire->ire_stq != NULL) { 866 stq_ill = (ill_t *)ire->ire_stq->q_ptr; 867 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), stq_ill, 868 (char *), "ire", (void *), ire); 869 stq_ill->ill_ire_cnt++; 870 } 871 } else { 872 ASSERT(ire->ire_stq == NULL); 873 } 874 875 if (ndp_g_lock_held) 876 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 877 ire_atomic_end(irb_ptr, ire); 878 879 if (pire != NULL) { 880 /* Assert that it is not removed from the list yet */ 881 ASSERT(pire->ire_ptpn != NULL); 882 IRB_REFRELE(pire->ire_bucket); 883 ire_refrele(pire); 884 } 885 886 if (ire->ire_type != IRE_CACHE) { 887 /* 888 * For ire's with with host mask see if there is an entry 889 * in the cache. If there is one flush the whole cache as 890 * there might be multiple entries due to RTF_MULTIRT (CGTP). 891 * If no entry is found than there is no need to flush the 892 * cache. 893 */ 894 895 if (ip_mask_to_plen_v6(&ire->ire_mask_v6) == IPV6_ABITS) { 896 ire_t *lire; 897 lire = ire_ctable_lookup_v6(&ire->ire_addr_v6, NULL, 898 IRE_CACHE, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, 899 ipst); 900 if (lire != NULL) { 901 ire_refrele(lire); 902 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 903 } 904 } else { 905 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 906 } 907 } 908 909 *ire_p = ire; 910 return (0); 911 } 912 913 /* 914 * Search for all HOST REDIRECT routes that are 915 * pointing at the specified gateway and 916 * delete them. This routine is called only 917 * when a default gateway is going away. 918 */ 919 static void 920 ire_delete_host_redirects_v6(const in6_addr_t *gateway, ip_stack_t *ipst) 921 { 922 irb_t *irb_ptr; 923 irb_t *irb; 924 ire_t *ire; 925 in6_addr_t gw_addr_v6; 926 int i; 927 928 /* get the hash table for HOST routes */ 929 irb_ptr = ipst->ips_ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)]; 930 if (irb_ptr == NULL) 931 return; 932 for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) { 933 irb = &irb_ptr[i]; 934 IRB_REFHOLD(irb); 935 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 936 if (!(ire->ire_flags & RTF_DYNAMIC)) 937 continue; 938 mutex_enter(&ire->ire_lock); 939 gw_addr_v6 = ire->ire_gateway_addr_v6; 940 mutex_exit(&ire->ire_lock); 941 if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) 942 ire_delete(ire); 943 } 944 IRB_REFRELE(irb); 945 } 946 } 947 948 /* 949 * Delete all the cache entries with this 'addr'. This is the IPv6 counterpart 950 * of ip_ire_clookup_and_delete. The difference being this function does not 951 * return any value. IPv6 processing of a gratuitous ARP, as it stands, is 952 * different than IPv4 in that, regardless of the presence of a cache entry 953 * for this address, an ire_walk_v6 is done. Another difference is that unlike 954 * in the case of IPv4 this does not take an ipif_t argument, since it is only 955 * called by ip_arp_news and the match is always only on the address. 956 */ 957 void 958 ip_ire_clookup_and_delete_v6(const in6_addr_t *addr, ip_stack_t *ipst) 959 { 960 irb_t *irb; 961 ire_t *cire; 962 boolean_t found = B_FALSE; 963 964 irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 965 ipst->ips_ip6_cache_table_size)]; 966 IRB_REFHOLD(irb); 967 for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) { 968 if (cire->ire_marks & IRE_MARK_CONDEMNED) 969 continue; 970 if (IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, addr)) { 971 972 /* This signifies start of a match */ 973 if (!found) 974 found = B_TRUE; 975 if (cire->ire_type == IRE_CACHE) { 976 if (cire->ire_nce != NULL) 977 ndp_delete(cire->ire_nce); 978 ire_delete_v6(cire); 979 } 980 /* End of the match */ 981 } else if (found) 982 break; 983 } 984 IRB_REFRELE(irb); 985 } 986 987 /* 988 * Delete the specified IRE. 989 * All calls should use ire_delete(). 990 * Sometimes called as writer though not required by this function. 991 * 992 * NOTE : This function is called only if the ire was added 993 * in the list. 994 */ 995 void 996 ire_delete_v6(ire_t *ire) 997 { 998 in6_addr_t gw_addr_v6; 999 ip_stack_t *ipst = ire->ire_ipst; 1000 1001 ASSERT(ire->ire_refcnt >= 1); 1002 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1003 1004 if (ire->ire_type != IRE_CACHE) 1005 ire_flush_cache_v6(ire, IRE_FLUSH_DELETE); 1006 if (ire->ire_type == IRE_DEFAULT) { 1007 /* 1008 * when a default gateway is going away 1009 * delete all the host redirects pointing at that 1010 * gateway. 1011 */ 1012 mutex_enter(&ire->ire_lock); 1013 gw_addr_v6 = ire->ire_gateway_addr_v6; 1014 mutex_exit(&ire->ire_lock); 1015 ire_delete_host_redirects_v6(&gw_addr_v6, ipst); 1016 } 1017 } 1018 1019 /* 1020 * ire_walk routine to delete all IRE_CACHE and IRE_HOST type redirect 1021 * entries. 1022 */ 1023 /*ARGSUSED1*/ 1024 void 1025 ire_delete_cache_v6(ire_t *ire, char *arg) 1026 { 1027 char addrstr1[INET6_ADDRSTRLEN]; 1028 char addrstr2[INET6_ADDRSTRLEN]; 1029 1030 if ((ire->ire_type & IRE_CACHE) || 1031 (ire->ire_flags & RTF_DYNAMIC)) { 1032 ip1dbg(("ire_delete_cache_v6: deleted %s type %d through %s\n", 1033 inet_ntop(AF_INET6, &ire->ire_addr_v6, 1034 addrstr1, sizeof (addrstr1)), 1035 ire->ire_type, 1036 inet_ntop(AF_INET6, &ire->ire_gateway_addr_v6, 1037 addrstr2, sizeof (addrstr2)))); 1038 ire_delete(ire); 1039 } 1040 1041 } 1042 1043 /* 1044 * ire_walk routine to delete all IRE_CACHE/IRE_HOST type redirect entries 1045 * that have a given gateway address. 1046 */ 1047 void 1048 ire_delete_cache_gw_v6(ire_t *ire, char *addr) 1049 { 1050 in6_addr_t *gw_addr = (in6_addr_t *)addr; 1051 char buf1[INET6_ADDRSTRLEN]; 1052 char buf2[INET6_ADDRSTRLEN]; 1053 in6_addr_t ire_gw_addr_v6; 1054 1055 if (!(ire->ire_type & IRE_CACHE) && 1056 !(ire->ire_flags & RTF_DYNAMIC)) 1057 return; 1058 1059 mutex_enter(&ire->ire_lock); 1060 ire_gw_addr_v6 = ire->ire_gateway_addr_v6; 1061 mutex_exit(&ire->ire_lock); 1062 1063 if (IN6_ARE_ADDR_EQUAL(&ire_gw_addr_v6, gw_addr)) { 1064 ip1dbg(("ire_delete_cache_gw_v6: deleted %s type %d to %s\n", 1065 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, 1066 buf1, sizeof (buf1)), 1067 ire->ire_type, 1068 inet_ntop(AF_INET6, &ire_gw_addr_v6, 1069 buf2, sizeof (buf2)))); 1070 ire_delete(ire); 1071 } 1072 } 1073 1074 /* 1075 * Remove all IRE_CACHE entries that match 1076 * the ire specified. (Sometimes called 1077 * as writer though not required by this function.) 1078 * 1079 * The flag argument indicates if the 1080 * flush request is due to addition 1081 * of new route (IRE_FLUSH_ADD) or deletion of old 1082 * route (IRE_FLUSH_DELETE). 1083 * 1084 * This routine takes only the IREs from the forwarding 1085 * table and flushes the corresponding entries from 1086 * the cache table. 1087 * 1088 * When flushing due to the deletion of an old route, it 1089 * just checks the cache handles (ire_phandle and ire_ihandle) and 1090 * deletes the ones that match. 1091 * 1092 * When flushing due to the creation of a new route, it checks 1093 * if a cache entry's address matches the one in the IRE and 1094 * that the cache entry's parent has a less specific mask than the 1095 * one in IRE. The destination of such a cache entry could be the 1096 * gateway for other cache entries, so we need to flush those as 1097 * well by looking for gateway addresses matching the IRE's address. 1098 */ 1099 void 1100 ire_flush_cache_v6(ire_t *ire, int flag) 1101 { 1102 int i; 1103 ire_t *cire; 1104 irb_t *irb; 1105 ip_stack_t *ipst = ire->ire_ipst; 1106 1107 if (ire->ire_type & IRE_CACHE) 1108 return; 1109 1110 /* 1111 * If a default is just created, there is no point 1112 * in going through the cache, as there will not be any 1113 * cached ires. 1114 */ 1115 if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD) 1116 return; 1117 if (flag == IRE_FLUSH_ADD) { 1118 /* 1119 * This selective flush is 1120 * due to the addition of 1121 * new IRE. 1122 */ 1123 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 1124 irb = &ipst->ips_ip_cache_table_v6[i]; 1125 if ((cire = irb->irb_ire) == NULL) 1126 continue; 1127 IRB_REFHOLD(irb); 1128 for (cire = irb->irb_ire; cire != NULL; 1129 cire = cire->ire_next) { 1130 if (cire->ire_type != IRE_CACHE) 1131 continue; 1132 /* 1133 * If 'cire' belongs to the same subnet 1134 * as the new ire being added, and 'cire' 1135 * is derived from a prefix that is less 1136 * specific than the new ire being added, 1137 * we need to flush 'cire'; for instance, 1138 * when a new interface comes up. 1139 */ 1140 if ((V6_MASK_EQ_2(cire->ire_addr_v6, 1141 ire->ire_mask_v6, ire->ire_addr_v6) && 1142 (ip_mask_to_plen_v6(&cire->ire_cmask_v6) <= 1143 ire->ire_masklen))) { 1144 ire_delete(cire); 1145 continue; 1146 } 1147 /* 1148 * This is the case when the ire_gateway_addr 1149 * of 'cire' belongs to the same subnet as 1150 * the new ire being added. 1151 * Flushing such ires is sometimes required to 1152 * avoid misrouting: say we have a machine with 1153 * two interfaces (I1 and I2), a default router 1154 * R on the I1 subnet, and a host route to an 1155 * off-link destination D with a gateway G on 1156 * the I2 subnet. 1157 * Under normal operation, we will have an 1158 * on-link cache entry for G and an off-link 1159 * cache entry for D with G as ire_gateway_addr, 1160 * traffic to D will reach its destination 1161 * through gateway G. 1162 * If the administrator does 'ifconfig I2 down', 1163 * the cache entries for D and G will be 1164 * flushed. However, G will now be resolved as 1165 * an off-link destination using R (the default 1166 * router) as gateway. Then D will also be 1167 * resolved as an off-link destination using G 1168 * as gateway - this behavior is due to 1169 * compatibility reasons, see comment in 1170 * ire_ihandle_lookup_offlink(). Traffic to D 1171 * will go to the router R and probably won't 1172 * reach the destination. 1173 * The administrator then does 'ifconfig I2 up'. 1174 * Since G is on the I2 subnet, this routine 1175 * will flush its cache entry. It must also 1176 * flush the cache entry for D, otherwise 1177 * traffic will stay misrouted until the IRE 1178 * times out. 1179 */ 1180 if (V6_MASK_EQ_2(cire->ire_gateway_addr_v6, 1181 ire->ire_mask_v6, ire->ire_addr_v6)) { 1182 ire_delete(cire); 1183 continue; 1184 } 1185 } 1186 IRB_REFRELE(irb); 1187 } 1188 } else { 1189 /* 1190 * delete the cache entries based on 1191 * handle in the IRE as this IRE is 1192 * being deleted/changed. 1193 */ 1194 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 1195 irb = &ipst->ips_ip_cache_table_v6[i]; 1196 if ((cire = irb->irb_ire) == NULL) 1197 continue; 1198 IRB_REFHOLD(irb); 1199 for (cire = irb->irb_ire; cire != NULL; 1200 cire = cire->ire_next) { 1201 if (cire->ire_type != IRE_CACHE) 1202 continue; 1203 if ((cire->ire_phandle == 0 || 1204 cire->ire_phandle != ire->ire_phandle) && 1205 (cire->ire_ihandle == 0 || 1206 cire->ire_ihandle != ire->ire_ihandle)) 1207 continue; 1208 ire_delete(cire); 1209 } 1210 IRB_REFRELE(irb); 1211 } 1212 } 1213 } 1214 1215 /* 1216 * Matches the arguments passed with the values in the ire. 1217 * 1218 * Note: for match types that match using "ipif" passed in, ipif 1219 * must be checked for non-NULL before calling this routine. 1220 */ 1221 static boolean_t 1222 ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask, 1223 const in6_addr_t *gateway, int type, const ipif_t *ipif, zoneid_t zoneid, 1224 uint32_t ihandle, const ts_label_t *tsl, int match_flags) 1225 { 1226 in6_addr_t masked_addr; 1227 in6_addr_t gw_addr_v6; 1228 ill_t *ire_ill = NULL, *dst_ill; 1229 ill_t *ipif_ill = NULL; 1230 ipif_t *src_ipif; 1231 1232 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1233 ASSERT(addr != NULL); 1234 ASSERT(mask != NULL); 1235 ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL); 1236 ASSERT((!(match_flags & MATCH_IRE_ILL)) || 1237 (ipif != NULL && ipif->ipif_isv6)); 1238 1239 /* 1240 * If MATCH_IRE_MARK_TESTHIDDEN is set, then only return the IRE if it 1241 * is in fact hidden, to ensure the caller gets the right one. One 1242 * exception: if the caller passed MATCH_IRE_IHANDLE, then they 1243 * already know the identity of the given IRE_INTERFACE entry and 1244 * there's no point trying to hide it from them. 1245 */ 1246 if (ire->ire_marks & IRE_MARK_TESTHIDDEN) { 1247 if (match_flags & MATCH_IRE_IHANDLE) 1248 match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 1249 1250 if (!(match_flags & MATCH_IRE_MARK_TESTHIDDEN)) 1251 return (B_FALSE); 1252 } 1253 1254 if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid && 1255 ire->ire_zoneid != ALL_ZONES) { 1256 /* 1257 * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is 1258 * valid and does not match that of ire_zoneid, a failure to 1259 * match is reported at this point. Otherwise, since some IREs 1260 * that are available in the global zone can be used in local 1261 * zones, additional checks need to be performed: 1262 * 1263 * IRE_CACHE and IRE_LOOPBACK entries should 1264 * never be matched in this situation. 1265 * 1266 * IRE entries that have an interface associated with them 1267 * should in general not match unless they are an IRE_LOCAL 1268 * or in the case when MATCH_IRE_DEFAULT has been set in 1269 * the caller. In the case of the former, checking of the 1270 * other fields supplied should take place. 1271 * 1272 * In the case where MATCH_IRE_DEFAULT has been set, 1273 * all of the ipif's associated with the IRE's ill are 1274 * checked to see if there is a matching zoneid. If any 1275 * one ipif has a matching zoneid, this IRE is a 1276 * potential candidate so checking of the other fields 1277 * takes place. 1278 * 1279 * In the case where the IRE_INTERFACE has a usable source 1280 * address (indicated by ill_usesrc_ifindex) in the 1281 * correct zone then it's permitted to return this IRE 1282 */ 1283 if (match_flags & MATCH_IRE_ZONEONLY) 1284 return (B_FALSE); 1285 if (ire->ire_type & (IRE_CACHE | IRE_LOOPBACK)) 1286 return (B_FALSE); 1287 /* 1288 * Note, IRE_INTERFACE can have the stq as NULL. For 1289 * example, if the default multicast route is tied to 1290 * the loopback address. 1291 */ 1292 if ((ire->ire_type & IRE_INTERFACE) && 1293 (ire->ire_stq != NULL)) { 1294 dst_ill = (ill_t *)ire->ire_stq->q_ptr; 1295 /* 1296 * If there is a usable source address in the 1297 * zone, then it's ok to return an 1298 * IRE_INTERFACE 1299 */ 1300 if ((dst_ill->ill_usesrc_ifindex != 0) && 1301 (src_ipif = ipif_select_source_v6(dst_ill, addr, 1302 B_FALSE, IPV6_PREFER_SRC_DEFAULT, zoneid)) 1303 != NULL) { 1304 ip3dbg(("ire_match_args: src_ipif %p" 1305 " dst_ill %p", (void *)src_ipif, 1306 (void *)dst_ill)); 1307 ipif_refrele(src_ipif); 1308 } else { 1309 ip3dbg(("ire_match_args: src_ipif NULL" 1310 " dst_ill %p\n", (void *)dst_ill)); 1311 return (B_FALSE); 1312 } 1313 } 1314 if (ire->ire_ipif != NULL && ire->ire_type != IRE_LOCAL && 1315 !(ire->ire_type & IRE_INTERFACE)) { 1316 ipif_t *tipif; 1317 1318 if ((match_flags & MATCH_IRE_DEFAULT) == 0) 1319 return (B_FALSE); 1320 mutex_enter(&ire->ire_ipif->ipif_ill->ill_lock); 1321 for (tipif = ire->ire_ipif->ipif_ill->ill_ipif; 1322 tipif != NULL; tipif = tipif->ipif_next) { 1323 if (IPIF_CAN_LOOKUP(tipif) && 1324 (tipif->ipif_flags & IPIF_UP) && 1325 (tipif->ipif_zoneid == zoneid || 1326 tipif->ipif_zoneid == ALL_ZONES)) 1327 break; 1328 } 1329 mutex_exit(&ire->ire_ipif->ipif_ill->ill_lock); 1330 if (tipif == NULL) 1331 return (B_FALSE); 1332 } 1333 } 1334 1335 if (match_flags & MATCH_IRE_GW) { 1336 mutex_enter(&ire->ire_lock); 1337 gw_addr_v6 = ire->ire_gateway_addr_v6; 1338 mutex_exit(&ire->ire_lock); 1339 } 1340 1341 /* 1342 * For IRE_CACHE entries, MATCH_IRE_ILL means that somebody wants to 1343 * send out ire_stq (ire_ipif for IRE_CACHE entries is just the means 1344 * of getting a source address -- i.e., ire_src_addr_v6 == 1345 * ire->ire_ipif->ipif_v6src_addr). ire_to_ill() handles this. 1346 * 1347 * NOTE: For IPMP, MATCH_IRE_ILL usually matches any ill in the group. 1348 * However, if MATCH_IRE_MARK_TESTHIDDEN is set (i.e., the IRE is for 1349 * IPMP test traffic), then the ill must match exactly. 1350 */ 1351 if (match_flags & MATCH_IRE_ILL) { 1352 ire_ill = ire_to_ill(ire); 1353 ipif_ill = ipif->ipif_ill; 1354 } 1355 1356 /* No ire_addr_v6 bits set past the mask */ 1357 ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6, 1358 ire->ire_addr_v6)); 1359 V6_MASK_COPY(*addr, *mask, masked_addr); 1360 1361 if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) && 1362 ((!(match_flags & MATCH_IRE_GW)) || 1363 IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) && 1364 ((!(match_flags & MATCH_IRE_TYPE)) || 1365 (ire->ire_type & type)) && 1366 ((!(match_flags & MATCH_IRE_SRC)) || 1367 IN6_ARE_ADDR_EQUAL(&ire->ire_src_addr_v6, 1368 &ipif->ipif_v6src_addr)) && 1369 ((!(match_flags & MATCH_IRE_IPIF)) || 1370 (ire->ire_ipif == ipif)) && 1371 ((!(match_flags & MATCH_IRE_MARK_TESTHIDDEN)) || 1372 (ire->ire_marks & IRE_MARK_TESTHIDDEN)) && 1373 ((!(match_flags & MATCH_IRE_ILL)) || 1374 (ire_ill == ipif_ill || 1375 (!(match_flags & MATCH_IRE_MARK_TESTHIDDEN) && 1376 ire_ill != NULL && IS_IN_SAME_ILLGRP(ipif_ill, ire_ill)))) && 1377 ((!(match_flags & MATCH_IRE_IHANDLE)) || 1378 (ire->ire_ihandle == ihandle)) && 1379 ((!(match_flags & MATCH_IRE_SECATTR)) || 1380 (!is_system_labeled()) || 1381 (tsol_ire_match_gwattr(ire, tsl) == 0))) { 1382 /* We found the matched IRE */ 1383 return (B_TRUE); 1384 } 1385 return (B_FALSE); 1386 } 1387 1388 /* 1389 * Lookup for a route in all the tables 1390 */ 1391 ire_t * 1392 ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1393 const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 1394 zoneid_t zoneid, const ts_label_t *tsl, int flags, ip_stack_t *ipst) 1395 { 1396 ire_t *ire = NULL; 1397 1398 /* 1399 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1400 * MATCH_IRE_ILL is set. 1401 */ 1402 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL)) && (ipif == NULL)) 1403 return (NULL); 1404 1405 /* 1406 * might be asking for a cache lookup, 1407 * This is not best way to lookup cache, 1408 * user should call ire_cache_lookup directly. 1409 * 1410 * If MATCH_IRE_TYPE was set, first lookup in the cache table and then 1411 * in the forwarding table, if the applicable type flags were set. 1412 */ 1413 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) { 1414 ire = ire_ctable_lookup_v6(addr, gateway, type, ipif, zoneid, 1415 tsl, flags, ipst); 1416 if (ire != NULL) 1417 return (ire); 1418 } 1419 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) { 1420 ire = ire_ftable_lookup_v6(addr, mask, gateway, type, ipif, 1421 pire, zoneid, 0, tsl, flags, ipst); 1422 } 1423 return (ire); 1424 } 1425 1426 /* 1427 * Lookup a route in forwarding table. 1428 * specific lookup is indicated by passing the 1429 * required parameters and indicating the 1430 * match required in flag field. 1431 * 1432 * Looking for default route can be done in three ways 1433 * 1) pass mask as ipv6_all_zeros and set MATCH_IRE_MASK in flags field 1434 * along with other matches. 1435 * 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags 1436 * field along with other matches. 1437 * 3) if the destination and mask are passed as zeros. 1438 * 1439 * A request to return a default route if no route 1440 * is found, can be specified by setting MATCH_IRE_DEFAULT 1441 * in flags. 1442 * 1443 * It does not support recursion more than one level. It 1444 * will do recursive lookup only when the lookup maps to 1445 * a prefix or default route and MATCH_IRE_RECURSIVE flag is passed. 1446 * 1447 * If the routing table is setup to allow more than one level 1448 * of recursion, the cleaning up cache table will not work resulting 1449 * in invalid routing. 1450 * 1451 * Supports link-local addresses by following the ipif/ill when recursing. 1452 * 1453 * NOTE : When this function returns NULL, pire has already been released. 1454 * pire is valid only when this function successfully returns an 1455 * ire. 1456 */ 1457 ire_t * 1458 ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1459 const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 1460 zoneid_t zoneid, uint32_t ihandle, const ts_label_t *tsl, int flags, 1461 ip_stack_t *ipst) 1462 { 1463 irb_t *irb_ptr; 1464 ire_t *rire; 1465 ire_t *ire = NULL; 1466 ire_t *saved_ire; 1467 nce_t *nce; 1468 int i; 1469 in6_addr_t gw_addr_v6; 1470 1471 ASSERT(addr != NULL); 1472 ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL); 1473 ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 1474 ASSERT(ipif == NULL || ipif->ipif_isv6); 1475 1476 /* 1477 * When we return NULL from this function, we should make 1478 * sure that *pire is NULL so that the callers will not 1479 * wrongly REFRELE the pire. 1480 */ 1481 if (pire != NULL) 1482 *pire = NULL; 1483 /* 1484 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1485 * MATCH_IRE_ILL is set. 1486 */ 1487 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL)) && (ipif == NULL)) 1488 return (NULL); 1489 1490 /* 1491 * If the mask is known, the lookup 1492 * is simple, if the mask is not known 1493 * we need to search. 1494 */ 1495 if (flags & MATCH_IRE_MASK) { 1496 uint_t masklen; 1497 1498 masklen = ip_mask_to_plen_v6(mask); 1499 if (ipst->ips_ip_forwarding_table_v6[masklen] == NULL) 1500 return (NULL); 1501 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[masklen][ 1502 IRE_ADDR_MASK_HASH_V6(*addr, *mask, 1503 ipst->ips_ip6_ftable_hash_size)]); 1504 rw_enter(&irb_ptr->irb_lock, RW_READER); 1505 for (ire = irb_ptr->irb_ire; ire != NULL; 1506 ire = ire->ire_next) { 1507 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1508 continue; 1509 if (ire_match_args_v6(ire, addr, mask, gateway, type, 1510 ipif, zoneid, ihandle, tsl, flags)) 1511 goto found_ire; 1512 } 1513 rw_exit(&irb_ptr->irb_lock); 1514 } else { 1515 /* 1516 * In this case we don't know the mask, we need to 1517 * search the table assuming different mask sizes. 1518 * we start with 128 bit mask, we don't allow default here. 1519 */ 1520 for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) { 1521 in6_addr_t tmpmask; 1522 1523 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 1524 continue; 1525 (void) ip_plen_to_mask_v6(i, &tmpmask); 1526 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][ 1527 IRE_ADDR_MASK_HASH_V6(*addr, tmpmask, 1528 ipst->ips_ip6_ftable_hash_size)]; 1529 rw_enter(&irb_ptr->irb_lock, RW_READER); 1530 for (ire = irb_ptr->irb_ire; ire != NULL; 1531 ire = ire->ire_next) { 1532 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1533 continue; 1534 if (ire_match_args_v6(ire, addr, 1535 &ire->ire_mask_v6, gateway, type, ipif, 1536 zoneid, ihandle, tsl, flags)) 1537 goto found_ire; 1538 } 1539 rw_exit(&irb_ptr->irb_lock); 1540 } 1541 } 1542 1543 /* 1544 * We come here if no route has yet been found. 1545 * 1546 * Handle the case where default route is 1547 * requested by specifying type as one of the possible 1548 * types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE). 1549 * 1550 * If MATCH_IRE_MASK is specified, then the appropriate default route 1551 * would have been found above if it exists so it isn't looked up here. 1552 * If MATCH_IRE_DEFAULT was also specified, then a default route will be 1553 * searched for later. 1554 */ 1555 if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE && 1556 (type & (IRE_DEFAULT | IRE_INTERFACE))) { 1557 if (ipst->ips_ip_forwarding_table_v6[0] != NULL) { 1558 /* addr & mask is zero for defaults */ 1559 irb_ptr = &ipst->ips_ip_forwarding_table_v6[0][ 1560 IRE_ADDR_HASH_V6(ipv6_all_zeros, 1561 ipst->ips_ip6_ftable_hash_size)]; 1562 rw_enter(&irb_ptr->irb_lock, RW_READER); 1563 for (ire = irb_ptr->irb_ire; ire != NULL; 1564 ire = ire->ire_next) { 1565 1566 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1567 continue; 1568 1569 if (ire_match_args_v6(ire, addr, 1570 &ipv6_all_zeros, gateway, type, ipif, 1571 zoneid, ihandle, tsl, flags)) 1572 goto found_ire; 1573 } 1574 rw_exit(&irb_ptr->irb_lock); 1575 } 1576 } 1577 /* 1578 * We come here only if no route is found. 1579 * see if the default route can be used which is allowed 1580 * only if the default matching criteria is specified. 1581 * The ipv6_ire_default_count tracks the number of IRE_DEFAULT 1582 * entries. However, the ip_forwarding_table_v6[0] also contains 1583 * interface routes thus the count can be zero. 1584 */ 1585 saved_ire = NULL; 1586 if ((flags & (MATCH_IRE_DEFAULT | MATCH_IRE_MASK)) == 1587 MATCH_IRE_DEFAULT) { 1588 ire_t *ire_origin; 1589 uint_t g_index; 1590 uint_t index; 1591 1592 if (ipst->ips_ip_forwarding_table_v6[0] == NULL) 1593 return (NULL); 1594 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[0])[0]; 1595 1596 /* 1597 * Keep a tab on the bucket while looking the IRE_DEFAULT 1598 * entries. We need to keep track of a particular IRE 1599 * (ire_origin) so this ensures that it will not be unlinked 1600 * from the hash list during the recursive lookup below. 1601 */ 1602 IRB_REFHOLD(irb_ptr); 1603 ire = irb_ptr->irb_ire; 1604 if (ire == NULL) { 1605 IRB_REFRELE(irb_ptr); 1606 return (NULL); 1607 } 1608 1609 /* 1610 * Get the index first, since it can be changed by other 1611 * threads. Then get to the right default route skipping 1612 * default interface routes if any. As we hold a reference on 1613 * the IRE bucket, ipv6_ire_default_count can only increase so 1614 * we can't reach the end of the hash list unexpectedly. 1615 */ 1616 if (ipst->ips_ipv6_ire_default_count != 0) { 1617 g_index = ipst->ips_ipv6_ire_default_index++; 1618 index = g_index % ipst->ips_ipv6_ire_default_count; 1619 while (index != 0) { 1620 if (!(ire->ire_type & IRE_INTERFACE)) 1621 index--; 1622 ire = ire->ire_next; 1623 } 1624 ASSERT(ire != NULL); 1625 } else { 1626 /* 1627 * No default route, so we only have default interface 1628 * routes: don't enter the first loop. 1629 */ 1630 ire = NULL; 1631 } 1632 1633 /* 1634 * Round-robin the default routers list looking for a neighbor 1635 * that matches the passed in parameters and is reachable. If 1636 * none found, just return a route from the default router list 1637 * if it exists. If we can't find a default route (IRE_DEFAULT), 1638 * look for interface default routes. 1639 * We start with the ire we found above and we walk the hash 1640 * list until we're back where we started, see 1641 * ire_get_next_default_ire(). It doesn't matter if default 1642 * routes are added or deleted by other threads - we know this 1643 * ire will stay in the list because we hold a reference on the 1644 * ire bucket. 1645 * NB: if we only have interface default routes, ire is NULL so 1646 * we don't even enter this loop (see above). 1647 */ 1648 ire_origin = ire; 1649 for (; ire != NULL; 1650 ire = ire_get_next_default_ire(ire, ire_origin)) { 1651 1652 if (ire_match_args_v6(ire, addr, 1653 &ipv6_all_zeros, gateway, type, ipif, 1654 zoneid, ihandle, tsl, flags)) { 1655 int match_flags; 1656 1657 /* 1658 * We have something to work with. 1659 * If we can find a resolved/reachable 1660 * entry, we will use this. Otherwise 1661 * we'll try to find an entry that has 1662 * a resolved cache entry. We will fallback 1663 * on this if we don't find anything else. 1664 */ 1665 if (saved_ire == NULL) 1666 saved_ire = ire; 1667 mutex_enter(&ire->ire_lock); 1668 gw_addr_v6 = ire->ire_gateway_addr_v6; 1669 mutex_exit(&ire->ire_lock); 1670 match_flags = MATCH_IRE_ILL | MATCH_IRE_SECATTR; 1671 rire = ire_ctable_lookup_v6(&gw_addr_v6, NULL, 1672 0, ire->ire_ipif, zoneid, tsl, match_flags, 1673 ipst); 1674 if (rire != NULL) { 1675 nce = rire->ire_nce; 1676 if (nce != NULL && 1677 NCE_ISREACHABLE(nce) && 1678 nce->nce_flags & NCE_F_ISROUTER) { 1679 ire_refrele(rire); 1680 IRE_REFHOLD(ire); 1681 IRB_REFRELE(irb_ptr); 1682 goto found_ire_held; 1683 } else if (nce != NULL && 1684 !(nce->nce_flags & 1685 NCE_F_ISROUTER)) { 1686 /* 1687 * Make sure we don't use 1688 * this ire 1689 */ 1690 if (saved_ire == ire) 1691 saved_ire = NULL; 1692 } 1693 ire_refrele(rire); 1694 } else if (ipst-> 1695 ips_ipv6_ire_default_count > 1 && 1696 zoneid != GLOBAL_ZONEID) { 1697 /* 1698 * When we're in a local zone, we're 1699 * only interested in default routers 1700 * that are reachable through ipifs 1701 * within our zone. 1702 * The potentially expensive call to 1703 * ire_route_lookup_v6() is avoided when 1704 * we have only one default route. 1705 */ 1706 int ire_match_flags = MATCH_IRE_TYPE | 1707 MATCH_IRE_SECATTR; 1708 1709 if (ire->ire_ipif != NULL) { 1710 ire_match_flags |= 1711 MATCH_IRE_ILL; 1712 } 1713 rire = ire_route_lookup_v6(&gw_addr_v6, 1714 NULL, NULL, IRE_INTERFACE, 1715 ire->ire_ipif, NULL, 1716 zoneid, tsl, ire_match_flags, ipst); 1717 if (rire != NULL) { 1718 ire_refrele(rire); 1719 saved_ire = ire; 1720 } else if (saved_ire == ire) { 1721 /* 1722 * Make sure we don't use 1723 * this ire 1724 */ 1725 saved_ire = NULL; 1726 } 1727 } 1728 } 1729 } 1730 if (saved_ire != NULL) { 1731 ire = saved_ire; 1732 IRE_REFHOLD(ire); 1733 IRB_REFRELE(irb_ptr); 1734 goto found_ire_held; 1735 } else { 1736 /* 1737 * Look for a interface default route matching the 1738 * args passed in. No round robin here. Just pick 1739 * the right one. 1740 */ 1741 for (ire = irb_ptr->irb_ire; ire != NULL; 1742 ire = ire->ire_next) { 1743 1744 if (!(ire->ire_type & IRE_INTERFACE)) 1745 continue; 1746 1747 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1748 continue; 1749 1750 if (ire_match_args_v6(ire, addr, 1751 &ipv6_all_zeros, gateway, type, ipif, 1752 zoneid, ihandle, tsl, flags)) { 1753 IRE_REFHOLD(ire); 1754 IRB_REFRELE(irb_ptr); 1755 goto found_ire_held; 1756 } 1757 } 1758 IRB_REFRELE(irb_ptr); 1759 } 1760 } 1761 ASSERT(ire == NULL); 1762 ip1dbg(("ire_ftable_lookup_v6: returning NULL ire")); 1763 return (NULL); 1764 found_ire: 1765 ASSERT((ire->ire_marks & IRE_MARK_CONDEMNED) == 0); 1766 IRE_REFHOLD(ire); 1767 rw_exit(&irb_ptr->irb_lock); 1768 1769 found_ire_held: 1770 if ((flags & MATCH_IRE_RJ_BHOLE) && 1771 (ire->ire_flags & (RTF_BLACKHOLE | RTF_REJECT))) { 1772 return (ire); 1773 } 1774 /* 1775 * At this point, IRE that was found must be an IRE_FORWARDTABLE 1776 * or IRE_CACHETABLE type. If this is a recursive lookup and an 1777 * IRE_INTERFACE type was found, return that. If it was some other 1778 * IRE_FORWARDTABLE type of IRE (one of the prefix types), then it 1779 * is necessary to fill in the parent IRE pointed to by pire, and 1780 * then lookup the gateway address of the parent. For backwards 1781 * compatiblity, if this lookup returns an 1782 * IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level 1783 * of lookup is done. 1784 */ 1785 if (flags & MATCH_IRE_RECURSIVE) { 1786 const ipif_t *gw_ipif; 1787 int match_flags = MATCH_IRE_DSTONLY; 1788 1789 if (ire->ire_type & IRE_INTERFACE) 1790 return (ire); 1791 if (pire != NULL) 1792 *pire = ire; 1793 /* 1794 * If we can't find an IRE_INTERFACE or the caller has not 1795 * asked for pire, we need to REFRELE the saved_ire. 1796 */ 1797 saved_ire = ire; 1798 1799 if (ire->ire_ipif != NULL) 1800 match_flags |= MATCH_IRE_ILL; 1801 1802 mutex_enter(&ire->ire_lock); 1803 gw_addr_v6 = ire->ire_gateway_addr_v6; 1804 mutex_exit(&ire->ire_lock); 1805 1806 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 0, 1807 ire->ire_ipif, NULL, zoneid, tsl, match_flags, ipst); 1808 if (ire == NULL) { 1809 /* 1810 * In this case we have to deal with the 1811 * MATCH_IRE_PARENT flag, which means the 1812 * parent has to be returned if ire is NULL. 1813 * The aim of this is to have (at least) a starting 1814 * ire when we want to look at all of the ires in a 1815 * bucket aimed at a single destination (as is the 1816 * case in ip_newroute_v6 for the RTF_MULTIRT 1817 * flagged routes). 1818 */ 1819 if (flags & MATCH_IRE_PARENT) { 1820 if (pire != NULL) { 1821 /* 1822 * Need an extra REFHOLD, if the 1823 * parent ire is returned via both 1824 * ire and pire. 1825 */ 1826 IRE_REFHOLD(saved_ire); 1827 } 1828 ire = saved_ire; 1829 } else { 1830 ire_refrele(saved_ire); 1831 if (pire != NULL) 1832 *pire = NULL; 1833 } 1834 return (ire); 1835 } 1836 if (ire->ire_type & (IRE_CACHETABLE | IRE_INTERFACE)) { 1837 /* 1838 * If the caller did not ask for pire, release 1839 * it now. 1840 */ 1841 if (pire == NULL) { 1842 ire_refrele(saved_ire); 1843 } 1844 return (ire); 1845 } 1846 match_flags |= MATCH_IRE_TYPE; 1847 mutex_enter(&ire->ire_lock); 1848 gw_addr_v6 = ire->ire_gateway_addr_v6; 1849 mutex_exit(&ire->ire_lock); 1850 gw_ipif = ire->ire_ipif; 1851 ire_refrele(ire); 1852 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 1853 (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid, 1854 NULL, match_flags, ipst); 1855 if (ire == NULL) { 1856 /* 1857 * In this case we have to deal with the 1858 * MATCH_IRE_PARENT flag, which means the 1859 * parent has to be returned if ire is NULL. 1860 * The aim of this is to have (at least) a starting 1861 * ire when we want to look at all of the ires in a 1862 * bucket aimed at a single destination (as is the 1863 * case in ip_newroute_v6 for the RTF_MULTIRT 1864 * flagged routes). 1865 */ 1866 if (flags & MATCH_IRE_PARENT) { 1867 if (pire != NULL) { 1868 /* 1869 * Need an extra REFHOLD, if the 1870 * parent ire is returned via both 1871 * ire and pire. 1872 */ 1873 IRE_REFHOLD(saved_ire); 1874 } 1875 ire = saved_ire; 1876 } else { 1877 ire_refrele(saved_ire); 1878 if (pire != NULL) 1879 *pire = NULL; 1880 } 1881 return (ire); 1882 } else if (pire == NULL) { 1883 /* 1884 * If the caller did not ask for pire, release 1885 * it now. 1886 */ 1887 ire_refrele(saved_ire); 1888 } 1889 return (ire); 1890 } 1891 1892 ASSERT(pire == NULL || *pire == NULL); 1893 return (ire); 1894 } 1895 1896 /* 1897 * Delete the IRE cache for the gateway and all IRE caches whose 1898 * ire_gateway_addr_v6 points to this gateway, and allow them to 1899 * be created on demand by ip_newroute_v6. 1900 */ 1901 void 1902 ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid, 1903 ip_stack_t *ipst) 1904 { 1905 irb_t *irb; 1906 ire_t *ire; 1907 1908 irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 1909 ipst->ips_ip6_cache_table_size)]; 1910 IRB_REFHOLD(irb); 1911 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 1912 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1913 continue; 1914 1915 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 1916 if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, 0, 1917 IRE_CACHE, NULL, zoneid, 0, NULL, MATCH_IRE_TYPE)) { 1918 ire_delete(ire); 1919 } 1920 } 1921 IRB_REFRELE(irb); 1922 1923 ire_walk_v6(ire_delete_cache_gw_v6, (char *)addr, zoneid, ipst); 1924 } 1925 1926 /* 1927 * Looks up cache table for a route. 1928 * specific lookup can be indicated by 1929 * passing the MATCH_* flags and the 1930 * necessary parameters. 1931 */ 1932 ire_t * 1933 ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway, 1934 int type, const ipif_t *ipif, zoneid_t zoneid, const ts_label_t *tsl, 1935 int flags, ip_stack_t *ipst) 1936 { 1937 ire_ctable_args_t margs; 1938 1939 margs.ict_addr = (void *)addr; 1940 margs.ict_gateway = (void *)gateway; 1941 margs.ict_type = type; 1942 margs.ict_ipif = ipif; 1943 margs.ict_zoneid = zoneid; 1944 margs.ict_tsl = tsl; 1945 margs.ict_flags = flags; 1946 margs.ict_ipst = ipst; 1947 margs.ict_wq = NULL; 1948 1949 return (ip6_ctable_lookup_impl(&margs)); 1950 } 1951 1952 /* 1953 * Lookup cache. 1954 * 1955 * In general the zoneid has to match (where ALL_ZONES match all of them). 1956 * But for IRE_LOCAL we also need to handle the case where L2 should 1957 * conceptually loop back the packet. This is necessary since neither 1958 * Ethernet drivers nor Ethernet hardware loops back packets sent to their 1959 * own MAC address. This loopback is needed when the normal 1960 * routes (ignoring IREs with different zoneids) would send out the packet on 1961 * the same ill as the ill with which this IRE_LOCAL is associated. 1962 * 1963 * Earlier versions of this code always matched an IRE_LOCAL independently of 1964 * the zoneid. We preserve that earlier behavior when 1965 * ip_restrict_interzone_loopback is turned off. 1966 */ 1967 ire_t * 1968 ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid, 1969 const ts_label_t *tsl, ip_stack_t *ipst) 1970 { 1971 irb_t *irb_ptr; 1972 ire_t *ire; 1973 1974 irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 1975 ipst->ips_ip6_cache_table_size)]; 1976 rw_enter(&irb_ptr->irb_lock, RW_READER); 1977 for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 1978 if (ire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_TESTHIDDEN)) 1979 continue; 1980 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, addr)) { 1981 /* 1982 * Finally, check if the security policy has any 1983 * restriction on using this route for the specified 1984 * message. 1985 */ 1986 if (tsl != NULL && 1987 ire->ire_gw_secattr != NULL && 1988 tsol_ire_match_gwattr(ire, tsl) != 0) { 1989 continue; 1990 } 1991 1992 if (zoneid == ALL_ZONES || ire->ire_zoneid == zoneid || 1993 ire->ire_zoneid == ALL_ZONES) { 1994 IRE_REFHOLD(ire); 1995 rw_exit(&irb_ptr->irb_lock); 1996 return (ire); 1997 } 1998 1999 if (ire->ire_type == IRE_LOCAL) { 2000 if (ipst->ips_ip_restrict_interzone_loopback && 2001 !ire_local_ok_across_zones(ire, zoneid, 2002 (void *)addr, tsl, ipst)) 2003 continue; 2004 2005 IRE_REFHOLD(ire); 2006 rw_exit(&irb_ptr->irb_lock); 2007 return (ire); 2008 } 2009 } 2010 } 2011 rw_exit(&irb_ptr->irb_lock); 2012 return (NULL); 2013 } 2014 2015 /* 2016 * Locate the interface ire that is tied to the cache ire 'cire' via 2017 * cire->ire_ihandle. 2018 * 2019 * We are trying to create the cache ire for an onlink destn. or 2020 * gateway in 'cire'. We are called from ire_add_v6() in the IRE_IF_RESOLVER 2021 * case for xresolv interfaces, after the ire has come back from 2022 * an external resolver. 2023 */ 2024 static ire_t * 2025 ire_ihandle_lookup_onlink_v6(ire_t *cire) 2026 { 2027 ire_t *ire; 2028 int match_flags; 2029 int i; 2030 int j; 2031 irb_t *irb_ptr; 2032 ip_stack_t *ipst = cire->ire_ipst; 2033 2034 ASSERT(cire != NULL); 2035 2036 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2037 /* 2038 * We know that the mask of the interface ire equals cire->ire_cmask. 2039 * (When ip_newroute_v6() created 'cire' for an on-link destn. 2040 * it set its cmask from the interface ire's mask) 2041 */ 2042 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 2043 NULL, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle, 2044 NULL, match_flags, ipst); 2045 if (ire != NULL) 2046 return (ire); 2047 /* 2048 * If we didn't find an interface ire above, we can't declare failure. 2049 * For backwards compatibility, we need to support prefix routes 2050 * pointing to next hop gateways that are not on-link. 2051 * 2052 * In the resolver/noresolver case, ip_newroute_v6() thinks 2053 * it is creating the cache ire for an onlink destination in 'cire'. 2054 * But 'cire' is not actually onlink, because ire_ftable_lookup_v6() 2055 * cheated it, by doing ire_route_lookup_v6() twice and returning an 2056 * interface ire. 2057 * 2058 * Eg. default - gw1 (line 1) 2059 * gw1 - gw2 (line 2) 2060 * gw2 - hme0 (line 3) 2061 * 2062 * In the above example, ip_newroute_v6() tried to create the cache ire 2063 * 'cire' for gw1, based on the interface route in line 3. The 2064 * ire_ftable_lookup_v6() above fails, because there is 2065 * no interface route to reach gw1. (it is gw2). We fall thru below. 2066 * 2067 * Do a brute force search based on the ihandle in a subset of the 2068 * forwarding tables, corresponding to cire->ire_cmask_v6. Otherwise 2069 * things become very complex, since we don't have 'pire' in this 2070 * case. (Also note that this method is not possible in the offlink 2071 * case because we don't know the mask) 2072 */ 2073 i = ip_mask_to_plen_v6(&cire->ire_cmask_v6); 2074 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 2075 return (NULL); 2076 for (j = 0; j < ipst->ips_ip6_ftable_hash_size; j++) { 2077 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][j]; 2078 rw_enter(&irb_ptr->irb_lock, RW_READER); 2079 for (ire = irb_ptr->irb_ire; ire != NULL; 2080 ire = ire->ire_next) { 2081 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2082 continue; 2083 if ((ire->ire_type & IRE_INTERFACE) && 2084 (ire->ire_ihandle == cire->ire_ihandle)) { 2085 IRE_REFHOLD(ire); 2086 rw_exit(&irb_ptr->irb_lock); 2087 return (ire); 2088 } 2089 } 2090 rw_exit(&irb_ptr->irb_lock); 2091 } 2092 return (NULL); 2093 } 2094 2095 2096 /* 2097 * Locate the interface ire that is tied to the cache ire 'cire' via 2098 * cire->ire_ihandle. 2099 * 2100 * We are trying to create the cache ire for an offlink destn based 2101 * on the cache ire of the gateway in 'cire'. 'pire' is the prefix ire 2102 * as found by ip_newroute_v6(). We are called from ip_newroute_v6() in 2103 * the IRE_CACHE case. 2104 */ 2105 ire_t * 2106 ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire) 2107 { 2108 ire_t *ire; 2109 int match_flags; 2110 in6_addr_t gw_addr; 2111 ipif_t *gw_ipif; 2112 ip_stack_t *ipst = cire->ire_ipst; 2113 2114 ASSERT(cire != NULL && pire != NULL); 2115 2116 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2117 if (pire->ire_ipif != NULL) 2118 match_flags |= MATCH_IRE_ILL; 2119 /* 2120 * We know that the mask of the interface ire equals cire->ire_cmask. 2121 * (When ip_newroute_v6() created 'cire' for an on-link destn. it set 2122 * its cmask from the interface ire's mask) 2123 */ 2124 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 0, 2125 IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2126 NULL, match_flags, ipst); 2127 if (ire != NULL) 2128 return (ire); 2129 /* 2130 * If we didn't find an interface ire above, we can't declare failure. 2131 * For backwards compatibility, we need to support prefix routes 2132 * pointing to next hop gateways that are not on-link. 2133 * 2134 * Assume we are trying to ping some offlink destn, and we have the 2135 * routing table below. 2136 * 2137 * Eg. default - gw1 <--- pire (line 1) 2138 * gw1 - gw2 (line 2) 2139 * gw2 - hme0 (line 3) 2140 * 2141 * If we already have a cache ire for gw1 in 'cire', the 2142 * ire_ftable_lookup_v6 above would have failed, since there is no 2143 * interface ire to reach gw1. We will fallthru below. 2144 * 2145 * Here we duplicate the steps that ire_ftable_lookup_v6() did in 2146 * getting 'cire' from 'pire', in the MATCH_IRE_RECURSIVE case. 2147 * The differences are the following 2148 * i. We want the interface ire only, so we call 2149 * ire_ftable_lookup_v6() instead of ire_route_lookup_v6() 2150 * ii. We look for only prefix routes in the 1st call below. 2151 * ii. We want to match on the ihandle in the 2nd call below. 2152 */ 2153 match_flags = MATCH_IRE_TYPE; 2154 if (pire->ire_ipif != NULL) 2155 match_flags |= MATCH_IRE_ILL; 2156 2157 mutex_enter(&pire->ire_lock); 2158 gw_addr = pire->ire_gateway_addr_v6; 2159 mutex_exit(&pire->ire_lock); 2160 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_OFFSUBNET, 2161 pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst); 2162 if (ire == NULL) 2163 return (NULL); 2164 /* 2165 * At this point 'ire' corresponds to the entry shown in line 2. 2166 * gw_addr is 'gw2' in the example above. 2167 */ 2168 mutex_enter(&ire->ire_lock); 2169 gw_addr = ire->ire_gateway_addr_v6; 2170 mutex_exit(&ire->ire_lock); 2171 gw_ipif = ire->ire_ipif; 2172 ire_refrele(ire); 2173 2174 match_flags |= MATCH_IRE_IHANDLE; 2175 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_INTERFACE, 2176 gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2177 NULL, match_flags, ipst); 2178 return (ire); 2179 } 2180 2181 /* 2182 * Return the IRE_LOOPBACK, IRE_IF_RESOLVER or IRE_IF_NORESOLVER 2183 * ire associated with the specified ipif. 2184 * 2185 * This might occasionally be called when IPIF_UP is not set since 2186 * the IPV6_MULTICAST_IF as well as creating interface routes 2187 * allows specifying a down ipif (ipif_lookup* match ipifs that are down). 2188 * 2189 * Note that if IPIF_NOLOCAL, IPIF_NOXMIT, or IPIF_DEPRECATED is set on 2190 * the ipif this routine might return NULL. 2191 * (Sometimes called as writer though not required by this function.) 2192 */ 2193 ire_t * 2194 ipif_to_ire_v6(const ipif_t *ipif) 2195 { 2196 ire_t *ire; 2197 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2198 uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_IPIF; 2199 2200 /* 2201 * IRE_INTERFACE entries for ills under IPMP are IRE_MARK_TESTHIDDEN 2202 * so that they aren't accidentally returned. However, if the 2203 * caller's ipif is on an ill under IPMP, there's no need to hide 'em. 2204 */ 2205 if (IS_UNDER_IPMP(ipif->ipif_ill)) 2206 match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 2207 2208 ASSERT(ipif->ipif_isv6); 2209 if (ipif->ipif_ire_type == IRE_LOOPBACK) { 2210 ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, NULL, 2211 IRE_LOOPBACK, ipif, ALL_ZONES, NULL, match_flags, ipst); 2212 } else if (ipif->ipif_flags & IPIF_POINTOPOINT) { 2213 /* In this case we need to lookup destination address. */ 2214 ire = ire_ftable_lookup_v6(&ipif->ipif_v6pp_dst_addr, 2215 &ipv6_all_ones, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 2216 0, NULL, (match_flags | MATCH_IRE_MASK), ipst); 2217 } else { 2218 ire = ire_ftable_lookup_v6(&ipif->ipif_v6subnet, 2219 &ipif->ipif_v6net_mask, NULL, IRE_INTERFACE, ipif, NULL, 2220 ALL_ZONES, 0, NULL, (match_flags | MATCH_IRE_MASK), ipst); 2221 } 2222 return (ire); 2223 } 2224 2225 /* 2226 * Return B_TRUE if a multirt route is resolvable 2227 * (or if no route is resolved yet), B_FALSE otherwise. 2228 * This only works in the global zone. 2229 */ 2230 boolean_t 2231 ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl, 2232 ip_stack_t *ipst) 2233 { 2234 ire_t *first_fire; 2235 ire_t *first_cire; 2236 ire_t *fire; 2237 ire_t *cire; 2238 irb_t *firb; 2239 irb_t *cirb; 2240 int unres_cnt = 0; 2241 boolean_t resolvable = B_FALSE; 2242 2243 /* Retrieve the first IRE_HOST that matches the destination */ 2244 first_fire = ire_ftable_lookup_v6(v6dstp, &ipv6_all_ones, 0, IRE_HOST, 2245 NULL, NULL, ALL_ZONES, 0, tsl, MATCH_IRE_MASK | MATCH_IRE_TYPE | 2246 MATCH_IRE_SECATTR, ipst); 2247 2248 /* No route at all */ 2249 if (first_fire == NULL) { 2250 return (B_TRUE); 2251 } 2252 2253 firb = first_fire->ire_bucket; 2254 ASSERT(firb); 2255 2256 /* Retrieve the first IRE_CACHE ire for that destination. */ 2257 first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID, tsl, ipst); 2258 2259 /* No resolved route. */ 2260 if (first_cire == NULL) { 2261 ire_refrele(first_fire); 2262 return (B_TRUE); 2263 } 2264 2265 /* At least one route is resolved. */ 2266 2267 cirb = first_cire->ire_bucket; 2268 ASSERT(cirb); 2269 2270 /* Count the number of routes to that dest that are declared. */ 2271 IRB_REFHOLD(firb); 2272 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2273 if (!(fire->ire_flags & RTF_MULTIRT)) 2274 continue; 2275 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, v6dstp)) 2276 continue; 2277 unres_cnt++; 2278 } 2279 IRB_REFRELE(firb); 2280 2281 2282 /* Then subtract the number of routes to that dst that are resolved */ 2283 IRB_REFHOLD(cirb); 2284 for (cire = first_cire; cire != NULL; cire = cire->ire_next) { 2285 if (!(cire->ire_flags & RTF_MULTIRT)) 2286 continue; 2287 if (!IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, v6dstp)) 2288 continue; 2289 if (cire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_TESTHIDDEN)) 2290 continue; 2291 unres_cnt--; 2292 } 2293 IRB_REFRELE(cirb); 2294 2295 /* At least one route is unresolved; search for a resolvable route. */ 2296 if (unres_cnt > 0) 2297 resolvable = ire_multirt_lookup_v6(&first_cire, &first_fire, 2298 MULTIRT_USESTAMP|MULTIRT_CACHEGW, tsl, ipst); 2299 2300 if (first_fire) 2301 ire_refrele(first_fire); 2302 2303 if (first_cire) 2304 ire_refrele(first_cire); 2305 2306 return (resolvable); 2307 } 2308 2309 2310 /* 2311 * Return B_TRUE and update *ire_arg and *fire_arg 2312 * if at least one resolvable route is found. 2313 * Return B_FALSE otherwise (all routes are resolved or 2314 * the remaining unresolved routes are all unresolvable). 2315 * This only works in the global zone. 2316 */ 2317 boolean_t 2318 ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, 2319 const ts_label_t *tsl, ip_stack_t *ipst) 2320 { 2321 clock_t delta; 2322 ire_t *best_fire = NULL; 2323 ire_t *best_cire = NULL; 2324 ire_t *first_fire; 2325 ire_t *first_cire; 2326 ire_t *fire; 2327 ire_t *cire; 2328 irb_t *firb = NULL; 2329 irb_t *cirb = NULL; 2330 ire_t *gw_ire; 2331 boolean_t already_resolved; 2332 boolean_t res; 2333 in6_addr_t v6dst; 2334 in6_addr_t v6gw; 2335 2336 ip2dbg(("ire_multirt_lookup_v6: *ire_arg %p, *fire_arg %p, " 2337 "flags %04x\n", (void *)*ire_arg, (void *)*fire_arg, flags)); 2338 2339 ASSERT(ire_arg); 2340 ASSERT(fire_arg); 2341 2342 /* Not an IRE_HOST ire; give up. */ 2343 if ((*fire_arg == NULL) || 2344 ((*fire_arg)->ire_type != IRE_HOST)) { 2345 return (B_FALSE); 2346 } 2347 2348 /* This is the first IRE_HOST ire for that destination. */ 2349 first_fire = *fire_arg; 2350 firb = first_fire->ire_bucket; 2351 ASSERT(firb); 2352 2353 mutex_enter(&first_fire->ire_lock); 2354 v6dst = first_fire->ire_addr_v6; 2355 mutex_exit(&first_fire->ire_lock); 2356 2357 ip2dbg(("ire_multirt_lookup_v6: dst %08x\n", 2358 ntohl(V4_PART_OF_V6(v6dst)))); 2359 2360 /* 2361 * Retrieve the first IRE_CACHE ire for that destination; 2362 * if we don't find one, no route for that dest is 2363 * resolved yet. 2364 */ 2365 first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID, tsl, ipst); 2366 if (first_cire) { 2367 cirb = first_cire->ire_bucket; 2368 } 2369 2370 ip2dbg(("ire_multirt_lookup_v6: first_cire %p\n", (void *)first_cire)); 2371 2372 /* 2373 * Search for a resolvable route, giving the top priority 2374 * to routes that can be resolved without any call to the resolver. 2375 */ 2376 IRB_REFHOLD(firb); 2377 2378 if (!IN6_IS_ADDR_MULTICAST(&v6dst)) { 2379 /* 2380 * For all multiroute IRE_HOST ires for that destination, 2381 * check if the route via the IRE_HOST's gateway is 2382 * resolved yet. 2383 */ 2384 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2385 2386 if (!(fire->ire_flags & RTF_MULTIRT)) 2387 continue; 2388 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2389 continue; 2390 2391 if (fire->ire_gw_secattr != NULL && 2392 tsol_ire_match_gwattr(fire, tsl) != 0) { 2393 continue; 2394 } 2395 2396 mutex_enter(&fire->ire_lock); 2397 v6gw = fire->ire_gateway_addr_v6; 2398 mutex_exit(&fire->ire_lock); 2399 2400 ip2dbg(("ire_multirt_lookup_v6: fire %p, " 2401 "ire_addr %08x, ire_gateway_addr %08x\n", 2402 (void *)fire, 2403 ntohl(V4_PART_OF_V6(fire->ire_addr_v6)), 2404 ntohl(V4_PART_OF_V6(v6gw)))); 2405 2406 already_resolved = B_FALSE; 2407 2408 if (first_cire) { 2409 ASSERT(cirb); 2410 2411 IRB_REFHOLD(cirb); 2412 /* 2413 * For all IRE_CACHE ires for that 2414 * destination. 2415 */ 2416 for (cire = first_cire; 2417 cire != NULL; 2418 cire = cire->ire_next) { 2419 2420 if (!(cire->ire_flags & RTF_MULTIRT)) 2421 continue; 2422 if (!IN6_ARE_ADDR_EQUAL( 2423 &cire->ire_addr_v6, &v6dst)) 2424 continue; 2425 if (cire->ire_marks & 2426 (IRE_MARK_CONDEMNED| 2427 IRE_MARK_TESTHIDDEN)) 2428 continue; 2429 2430 if (cire->ire_gw_secattr != NULL && 2431 tsol_ire_match_gwattr(cire, 2432 tsl) != 0) { 2433 continue; 2434 } 2435 2436 /* 2437 * Check if the IRE_CACHE's gateway 2438 * matches the IRE_HOST's gateway. 2439 */ 2440 if (IN6_ARE_ADDR_EQUAL( 2441 &cire->ire_gateway_addr_v6, 2442 &v6gw)) { 2443 already_resolved = B_TRUE; 2444 break; 2445 } 2446 } 2447 IRB_REFRELE(cirb); 2448 } 2449 2450 /* 2451 * This route is already resolved; 2452 * proceed with next one. 2453 */ 2454 if (already_resolved) { 2455 ip2dbg(("ire_multirt_lookup_v6: found cire %p, " 2456 "already resolved\n", (void *)cire)); 2457 continue; 2458 } 2459 2460 /* 2461 * The route is unresolved; is it actually 2462 * resolvable, i.e. is there a cache or a resolver 2463 * for the gateway? 2464 */ 2465 gw_ire = ire_route_lookup_v6(&v6gw, 0, 0, 0, NULL, NULL, 2466 ALL_ZONES, tsl, MATCH_IRE_RECURSIVE | 2467 MATCH_IRE_SECATTR, ipst); 2468 2469 ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n", 2470 (void *)gw_ire)); 2471 2472 /* 2473 * This route can be resolved without any call to the 2474 * resolver; if the MULTIRT_CACHEGW flag is set, 2475 * give the top priority to this ire and exit the 2476 * loop. 2477 * This occurs when an resolver reply is processed 2478 * through ip_wput_nondata() 2479 */ 2480 if ((flags & MULTIRT_CACHEGW) && 2481 (gw_ire != NULL) && 2482 (gw_ire->ire_type & IRE_CACHETABLE)) { 2483 /* 2484 * Release the resolver associated to the 2485 * previous candidate best ire, if any. 2486 */ 2487 if (best_cire) { 2488 ire_refrele(best_cire); 2489 ASSERT(best_fire); 2490 } 2491 2492 best_fire = fire; 2493 best_cire = gw_ire; 2494 2495 ip2dbg(("ire_multirt_lookup_v6: found top prio " 2496 "best_fire %p, best_cire %p\n", 2497 (void *)best_fire, (void *)best_cire)); 2498 break; 2499 } 2500 2501 /* 2502 * Compute the time elapsed since our preceding 2503 * attempt to resolve that route. 2504 * If the MULTIRT_USESTAMP flag is set, we take that 2505 * route into account only if this time interval 2506 * exceeds ip_multirt_resolution_interval; 2507 * this prevents us from attempting to resolve a 2508 * broken route upon each sending of a packet. 2509 */ 2510 delta = lbolt - fire->ire_last_used_time; 2511 delta = TICK_TO_MSEC(delta); 2512 2513 res = (boolean_t) 2514 ((delta > ipst-> 2515 ips_ip_multirt_resolution_interval) || 2516 (!(flags & MULTIRT_USESTAMP))); 2517 2518 ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, " 2519 "res %d\n", 2520 (void *)fire, delta, res)); 2521 2522 if (res) { 2523 /* 2524 * A resolver exists for the gateway: save 2525 * the current IRE_HOST ire as a candidate 2526 * best ire. If we later discover that a 2527 * top priority ire exists (i.e. no need to 2528 * call the resolver), then this new ire 2529 * will be preferred to the current one. 2530 */ 2531 if (gw_ire != NULL) { 2532 if (best_fire == NULL) { 2533 ASSERT(best_cire == NULL); 2534 2535 best_fire = fire; 2536 best_cire = gw_ire; 2537 2538 ip2dbg(("ire_multirt_lookup_v6:" 2539 "found candidate " 2540 "best_fire %p, " 2541 "best_cire %p\n", 2542 (void *)best_fire, 2543 (void *)best_cire)); 2544 2545 /* 2546 * If MULTIRT_CACHEGW is not 2547 * set, we ignore the top 2548 * priority ires that can 2549 * be resolved without any 2550 * call to the resolver; 2551 * In that case, there is 2552 * actually no need 2553 * to continue the loop. 2554 */ 2555 if (!(flags & 2556 MULTIRT_CACHEGW)) { 2557 break; 2558 } 2559 continue; 2560 } 2561 } else { 2562 /* 2563 * No resolver for the gateway: the 2564 * route is not resolvable. 2565 * If the MULTIRT_SETSTAMP flag is 2566 * set, we stamp the IRE_HOST ire, 2567 * so we will not select it again 2568 * during this resolution interval. 2569 */ 2570 if (flags & MULTIRT_SETSTAMP) 2571 fire->ire_last_used_time = 2572 lbolt; 2573 } 2574 } 2575 2576 if (gw_ire != NULL) 2577 ire_refrele(gw_ire); 2578 } 2579 } else { /* IN6_IS_ADDR_MULTICAST(&v6dst) */ 2580 2581 for (fire = first_fire; 2582 fire != NULL; 2583 fire = fire->ire_next) { 2584 2585 if (!(fire->ire_flags & RTF_MULTIRT)) 2586 continue; 2587 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2588 continue; 2589 2590 if (fire->ire_gw_secattr != NULL && 2591 tsol_ire_match_gwattr(fire, tsl) != 0) { 2592 continue; 2593 } 2594 2595 already_resolved = B_FALSE; 2596 2597 mutex_enter(&fire->ire_lock); 2598 v6gw = fire->ire_gateway_addr_v6; 2599 mutex_exit(&fire->ire_lock); 2600 2601 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2602 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, tsl, 2603 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE | 2604 MATCH_IRE_SECATTR, ipst); 2605 2606 /* No resolver for the gateway; we skip this ire. */ 2607 if (gw_ire == NULL) { 2608 continue; 2609 } 2610 2611 if (first_cire) { 2612 2613 IRB_REFHOLD(cirb); 2614 /* 2615 * For all IRE_CACHE ires for that 2616 * destination. 2617 */ 2618 for (cire = first_cire; 2619 cire != NULL; 2620 cire = cire->ire_next) { 2621 2622 if (!(cire->ire_flags & RTF_MULTIRT)) 2623 continue; 2624 if (!IN6_ARE_ADDR_EQUAL( 2625 &cire->ire_addr_v6, &v6dst)) 2626 continue; 2627 if (cire->ire_marks & 2628 IRE_MARK_CONDEMNED) 2629 continue; 2630 2631 if (cire->ire_gw_secattr != NULL && 2632 tsol_ire_match_gwattr(cire, 2633 tsl) != 0) { 2634 continue; 2635 } 2636 2637 /* 2638 * Cache entries are linked to the 2639 * parent routes using the parent handle 2640 * (ire_phandle). If no cache entry has 2641 * the same handle as fire, fire is 2642 * still unresolved. 2643 */ 2644 ASSERT(cire->ire_phandle != 0); 2645 if (cire->ire_phandle == 2646 fire->ire_phandle) { 2647 already_resolved = B_TRUE; 2648 break; 2649 } 2650 } 2651 IRB_REFRELE(cirb); 2652 } 2653 2654 /* 2655 * This route is already resolved; proceed with 2656 * next one. 2657 */ 2658 if (already_resolved) { 2659 ire_refrele(gw_ire); 2660 continue; 2661 } 2662 2663 /* 2664 * Compute the time elapsed since our preceding 2665 * attempt to resolve that route. 2666 * If the MULTIRT_USESTAMP flag is set, we take 2667 * that route into account only if this time 2668 * interval exceeds ip_multirt_resolution_interval; 2669 * this prevents us from attempting to resolve a 2670 * broken route upon each sending of a packet. 2671 */ 2672 delta = lbolt - fire->ire_last_used_time; 2673 delta = TICK_TO_MSEC(delta); 2674 2675 res = (boolean_t) 2676 ((delta > ipst-> 2677 ips_ip_multirt_resolution_interval) || 2678 (!(flags & MULTIRT_USESTAMP))); 2679 2680 ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, " 2681 "flags %04x, res %d\n", 2682 (void *)fire, delta, flags, res)); 2683 2684 if (res) { 2685 if (best_cire) { 2686 /* 2687 * Release the resolver associated 2688 * to the preceding candidate best 2689 * ire, if any. 2690 */ 2691 ire_refrele(best_cire); 2692 ASSERT(best_fire); 2693 } 2694 best_fire = fire; 2695 best_cire = gw_ire; 2696 continue; 2697 } 2698 2699 ire_refrele(gw_ire); 2700 } 2701 } 2702 2703 if (best_fire) { 2704 IRE_REFHOLD(best_fire); 2705 } 2706 IRB_REFRELE(firb); 2707 2708 /* Release the first IRE_CACHE we initially looked up, if any. */ 2709 if (first_cire) 2710 ire_refrele(first_cire); 2711 2712 /* Found a resolvable route. */ 2713 if (best_fire) { 2714 ASSERT(best_cire); 2715 2716 if (*fire_arg) 2717 ire_refrele(*fire_arg); 2718 if (*ire_arg) 2719 ire_refrele(*ire_arg); 2720 2721 /* 2722 * Update the passed arguments with the 2723 * resolvable multirt route we found 2724 */ 2725 *fire_arg = best_fire; 2726 *ire_arg = best_cire; 2727 2728 ip2dbg(("ire_multirt_lookup_v6: returning B_TRUE, " 2729 "*fire_arg %p, *ire_arg %p\n", 2730 (void *)best_fire, (void *)best_cire)); 2731 2732 return (B_TRUE); 2733 } 2734 2735 ASSERT(best_cire == NULL); 2736 2737 ip2dbg(("ire_multirt_lookup_v6: returning B_FALSE, *fire_arg %p, " 2738 "*ire_arg %p\n", 2739 (void *)*fire_arg, (void *)*ire_arg)); 2740 2741 /* No resolvable route. */ 2742 return (B_FALSE); 2743 } 2744 2745 2746 /* 2747 * Find an IRE_OFFSUBNET IRE entry for the multicast address 'v6dstp' 2748 * that goes through 'ipif'. As a fallback, a route that goes through 2749 * ipif->ipif_ill can be returned. 2750 */ 2751 ire_t * 2752 ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp) 2753 { 2754 ire_t *ire; 2755 ire_t *save_ire = NULL; 2756 ire_t *gw_ire; 2757 irb_t *irb; 2758 in6_addr_t v6gw; 2759 int match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL; 2760 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2761 2762 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, NULL, ALL_ZONES, 0, 2763 NULL, MATCH_IRE_DEFAULT, ipst); 2764 2765 if (ire == NULL) 2766 return (NULL); 2767 2768 irb = ire->ire_bucket; 2769 ASSERT(irb); 2770 2771 IRB_REFHOLD(irb); 2772 ire_refrele(ire); 2773 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 2774 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) || 2775 (ipif->ipif_zoneid != ire->ire_zoneid && 2776 ire->ire_zoneid != ALL_ZONES)) { 2777 continue; 2778 } 2779 2780 switch (ire->ire_type) { 2781 case IRE_DEFAULT: 2782 case IRE_PREFIX: 2783 case IRE_HOST: 2784 mutex_enter(&ire->ire_lock); 2785 v6gw = ire->ire_gateway_addr_v6; 2786 mutex_exit(&ire->ire_lock); 2787 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2788 IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, 2789 NULL, match_flags, ipst); 2790 2791 if (gw_ire != NULL) { 2792 if (save_ire != NULL) { 2793 ire_refrele(save_ire); 2794 } 2795 IRE_REFHOLD(ire); 2796 if (gw_ire->ire_ipif == ipif) { 2797 ire_refrele(gw_ire); 2798 2799 IRB_REFRELE(irb); 2800 return (ire); 2801 } 2802 ire_refrele(gw_ire); 2803 save_ire = ire; 2804 } 2805 break; 2806 case IRE_IF_NORESOLVER: 2807 case IRE_IF_RESOLVER: 2808 if (ire->ire_ipif == ipif) { 2809 if (save_ire != NULL) { 2810 ire_refrele(save_ire); 2811 } 2812 IRE_REFHOLD(ire); 2813 2814 IRB_REFRELE(irb); 2815 return (ire); 2816 } 2817 break; 2818 } 2819 } 2820 IRB_REFRELE(irb); 2821 2822 return (save_ire); 2823 } 2824 2825 /* 2826 * This is the implementation of the IPv6 IRE cache lookup procedure. 2827 * Separating the interface from the implementation allows additional 2828 * flexibility when specifying search criteria. 2829 */ 2830 static ire_t * 2831 ip6_ctable_lookup_impl(ire_ctable_args_t *margs) 2832 { 2833 irb_t *irb_ptr; 2834 ire_t *ire; 2835 ip_stack_t *ipst = margs->ict_ipst; 2836 2837 if ((margs->ict_flags & (MATCH_IRE_SRC | MATCH_IRE_ILL)) && 2838 (margs->ict_ipif == NULL)) { 2839 return (NULL); 2840 } 2841 2842 irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6( 2843 *((in6_addr_t *)(margs->ict_addr)), 2844 ipst->ips_ip6_cache_table_size)]; 2845 rw_enter(&irb_ptr->irb_lock, RW_READER); 2846 for (ire = irb_ptr->irb_ire; ire != NULL; ire = ire->ire_next) { 2847 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2848 continue; 2849 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 2850 if (ire_match_args_v6(ire, (in6_addr_t *)margs->ict_addr, 2851 &ire->ire_mask_v6, (in6_addr_t *)margs->ict_gateway, 2852 margs->ict_type, margs->ict_ipif, margs->ict_zoneid, 0, 2853 margs->ict_tsl, margs->ict_flags)) { 2854 IRE_REFHOLD(ire); 2855 rw_exit(&irb_ptr->irb_lock); 2856 return (ire); 2857 } 2858 } 2859 2860 rw_exit(&irb_ptr->irb_lock); 2861 return (NULL); 2862 } 2863