1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 /* 32 * This file contains routines that manipulate Internet Routing Entries (IREs). 33 */ 34 #include <sys/types.h> 35 #include <sys/stream.h> 36 #include <sys/stropts.h> 37 #include <sys/ddi.h> 38 #include <sys/cmn_err.h> 39 40 #include <sys/systm.h> 41 #include <sys/param.h> 42 #include <sys/socket.h> 43 #include <net/if.h> 44 #include <net/route.h> 45 #include <netinet/in.h> 46 #include <net/if_dl.h> 47 #include <netinet/ip6.h> 48 #include <netinet/icmp6.h> 49 50 #include <inet/common.h> 51 #include <inet/mi.h> 52 #include <inet/ip.h> 53 #include <inet/ip6.h> 54 #include <inet/ip_ndp.h> 55 #include <inet/ip_if.h> 56 #include <inet/ip_ire.h> 57 #include <inet/ipclassifier.h> 58 #include <inet/nd.h> 59 #include <sys/kmem.h> 60 #include <sys/zone.h> 61 62 #include <sys/tsol/label.h> 63 #include <sys/tsol/tnet.h> 64 65 static ire_t ire_null; 66 67 static ire_t *ire_ihandle_lookup_onlink_v6(ire_t *cire); 68 static boolean_t ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, 69 const in6_addr_t *mask, const in6_addr_t *gateway, int type, 70 const ipif_t *ipif, zoneid_t zoneid, uint32_t ihandle, 71 const ts_label_t *tsl, int match_flags); 72 static ire_t *ire_init_v6(ire_t *, const in6_addr_t *, const in6_addr_t *, 73 const in6_addr_t *, const in6_addr_t *, uint_t *, queue_t *, queue_t *, 74 ushort_t, ipif_t *, const in6_addr_t *, uint32_t, uint32_t, uint_t, 75 const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *); 76 77 78 /* 79 * Initialize the ire that is specific to IPv6 part and call 80 * ire_init_common to finish it. 81 */ 82 static ire_t * 83 ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, const in6_addr_t *v6mask, 84 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 85 uint_t *max_fragp, queue_t *rfq, queue_t *stq, ushort_t type, 86 ipif_t *ipif, const in6_addr_t *v6cmask, uint32_t phandle, 87 uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, tsol_gc_t *gc, 88 tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 89 { 90 91 /* 92 * Reject IRE security attribute creation/initialization 93 * if system is not running in Trusted mode. 94 */ 95 if ((gc != NULL || gcgrp != NULL) && !is_system_labeled()) 96 return (NULL); 97 98 99 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_alloced); 100 ire->ire_addr_v6 = *v6addr; 101 102 if (v6src_addr != NULL) 103 ire->ire_src_addr_v6 = *v6src_addr; 104 if (v6mask != NULL) { 105 ire->ire_mask_v6 = *v6mask; 106 ire->ire_masklen = ip_mask_to_plen_v6(&ire->ire_mask_v6); 107 } 108 if (v6gateway != NULL) 109 ire->ire_gateway_addr_v6 = *v6gateway; 110 111 if (type == IRE_CACHE && v6cmask != NULL) 112 ire->ire_cmask_v6 = *v6cmask; 113 114 /* 115 * Multirouted packets need to have a fragment header added so that 116 * the receiver is able to discard duplicates according to their 117 * fragment identifier. 118 */ 119 if (type == IRE_CACHE && (flags & RTF_MULTIRT)) { 120 ire->ire_frag_flag = IPH_FRAG_HDR; 121 } 122 123 /* ire_init_common will free the mblks upon encountering any failure */ 124 if (!ire_init_common(ire, max_fragp, NULL, rfq, stq, type, ipif, 125 phandle, ihandle, flags, IPV6_VERSION, ulp_info, gc, gcgrp, ipst)) 126 return (NULL); 127 128 return (ire); 129 } 130 131 /* 132 * Similar to ire_create_v6 except that it is called only when 133 * we want to allocate ire as an mblk e.g. we have a external 134 * resolver. Do we need this in IPv6 ? 135 * 136 * IPv6 initializes the ire_nce in ire_add_v6, which expects to 137 * find the ire_nce to be null when it is called. So, although 138 * we have a src_nce parameter (in the interest of matching up with 139 * the argument list of the v4 version), we ignore the src_nce 140 * argument here. 141 */ 142 /* ARGSUSED */ 143 ire_t * 144 ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 145 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 146 nce_t *src_nce, queue_t *rfq, queue_t *stq, ushort_t type, 147 ipif_t *ipif, const in6_addr_t *v6cmask, 148 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 149 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 150 { 151 ire_t *ire; 152 ire_t *ret_ire; 153 mblk_t *mp; 154 155 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 156 157 /* Allocate the new IRE. */ 158 mp = allocb(sizeof (ire_t), BPRI_MED); 159 if (mp == NULL) { 160 ip1dbg(("ire_create_mp_v6: alloc failed\n")); 161 return (NULL); 162 } 163 164 ire = (ire_t *)mp->b_rptr; 165 mp->b_wptr = (uchar_t *)&ire[1]; 166 167 /* Start clean. */ 168 *ire = ire_null; 169 ire->ire_mp = mp; 170 mp->b_datap->db_type = IRE_DB_TYPE; 171 172 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 173 NULL, rfq, stq, type, ipif, v6cmask, phandle, 174 ihandle, flags, ulp_info, gc, gcgrp, ipst); 175 176 if (ret_ire == NULL) { 177 freeb(ire->ire_mp); 178 return (NULL); 179 } 180 return (ire); 181 } 182 183 /* 184 * ire_create_v6 is called to allocate and initialize a new IRE. 185 * 186 * NOTE : This is called as writer sometimes though not required 187 * by this function. 188 * 189 * See comments above ire_create_mp_v6() for the rationale behind the 190 * unused src_nce argument. 191 */ 192 /* ARGSUSED */ 193 ire_t * 194 ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 195 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 196 uint_t *max_fragp, nce_t *src_nce, queue_t *rfq, queue_t *stq, 197 ushort_t type, ipif_t *ipif, const in6_addr_t *v6cmask, 198 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 199 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 200 { 201 ire_t *ire; 202 ire_t *ret_ire; 203 204 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 205 206 ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP); 207 if (ire == NULL) { 208 ip1dbg(("ire_create_v6: alloc failed\n")); 209 return (NULL); 210 } 211 *ire = ire_null; 212 213 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 214 max_fragp, rfq, stq, type, ipif, v6cmask, phandle, 215 ihandle, flags, ulp_info, gc, gcgrp, ipst); 216 217 if (ret_ire == NULL) { 218 kmem_cache_free(ire_cache, ire); 219 return (NULL); 220 } 221 ASSERT(ret_ire == ire); 222 return (ire); 223 } 224 225 /* 226 * Find an IRE_INTERFACE for the multicast group. 227 * Allows different routes for multicast addresses 228 * in the unicast routing table (akin to FF::0/8 but could be more specific) 229 * which point at different interfaces. This is used when IPV6_MULTICAST_IF 230 * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't 231 * specify the interface to join on. 232 * 233 * Supports link-local addresses by following the ipif/ill when recursing. 234 */ 235 ire_t * 236 ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst) 237 { 238 ire_t *ire; 239 ipif_t *ipif = NULL; 240 int match_flags = MATCH_IRE_TYPE; 241 in6_addr_t gw_addr_v6; 242 243 ire = ire_ftable_lookup_v6(group, 0, 0, 0, NULL, NULL, 244 zoneid, 0, NULL, MATCH_IRE_DEFAULT, ipst); 245 246 /* We search a resolvable ire in case of multirouting. */ 247 if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) { 248 ire_t *cire = NULL; 249 /* 250 * If the route is not resolvable, the looked up ire 251 * may be changed here. In that case, ire_multirt_lookup() 252 * IRE_REFRELE the original ire and change it. 253 */ 254 (void) ire_multirt_lookup_v6(&cire, &ire, MULTIRT_CACHEGW, 255 NULL, ipst); 256 if (cire != NULL) 257 ire_refrele(cire); 258 } 259 if (ire == NULL) 260 return (NULL); 261 /* 262 * Make sure we follow ire_ipif. 263 * 264 * We need to determine the interface route through 265 * which the gateway will be reached. We don't really 266 * care which interface is picked if the interface is 267 * part of a group. 268 */ 269 if (ire->ire_ipif != NULL) { 270 ipif = ire->ire_ipif; 271 match_flags |= MATCH_IRE_ILL_GROUP; 272 } 273 274 switch (ire->ire_type) { 275 case IRE_DEFAULT: 276 case IRE_PREFIX: 277 case IRE_HOST: 278 mutex_enter(&ire->ire_lock); 279 gw_addr_v6 = ire->ire_gateway_addr_v6; 280 mutex_exit(&ire->ire_lock); 281 ire_refrele(ire); 282 ire = ire_ftable_lookup_v6(&gw_addr_v6, 0, 0, 283 IRE_INTERFACE, ipif, NULL, zoneid, 0, 284 NULL, match_flags, ipst); 285 return (ire); 286 case IRE_IF_NORESOLVER: 287 case IRE_IF_RESOLVER: 288 return (ire); 289 default: 290 ire_refrele(ire); 291 return (NULL); 292 } 293 } 294 295 /* 296 * Return any local address. We use this to target ourselves 297 * when the src address was specified as 'default'. 298 * Preference for IRE_LOCAL entries. 299 */ 300 ire_t * 301 ire_lookup_local_v6(zoneid_t zoneid, ip_stack_t *ipst) 302 { 303 ire_t *ire; 304 irb_t *irb; 305 ire_t *maybe = NULL; 306 int i; 307 308 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 309 irb = &ipst->ips_ip_cache_table_v6[i]; 310 if (irb->irb_ire == NULL) 311 continue; 312 rw_enter(&irb->irb_lock, RW_READER); 313 for (ire = irb->irb_ire; ire; ire = ire->ire_next) { 314 if ((ire->ire_marks & IRE_MARK_CONDEMNED) || 315 ire->ire_zoneid != zoneid && 316 ire->ire_zoneid != ALL_ZONES) 317 continue; 318 switch (ire->ire_type) { 319 case IRE_LOOPBACK: 320 if (maybe == NULL) { 321 IRE_REFHOLD(ire); 322 maybe = ire; 323 } 324 break; 325 case IRE_LOCAL: 326 if (maybe != NULL) { 327 ire_refrele(maybe); 328 } 329 IRE_REFHOLD(ire); 330 rw_exit(&irb->irb_lock); 331 return (ire); 332 } 333 } 334 rw_exit(&irb->irb_lock); 335 } 336 return (maybe); 337 } 338 339 /* 340 * This function takes a mask and returns number of bits set in the 341 * mask (the represented prefix length). Assumes a contiguous mask. 342 */ 343 int 344 ip_mask_to_plen_v6(const in6_addr_t *v6mask) 345 { 346 int bits; 347 int plen = IPV6_ABITS; 348 int i; 349 350 for (i = 3; i >= 0; i--) { 351 if (v6mask->s6_addr32[i] == 0) { 352 plen -= 32; 353 continue; 354 } 355 bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1; 356 if (bits == 0) 357 break; 358 plen -= bits; 359 } 360 361 return (plen); 362 } 363 364 /* 365 * Convert a prefix length to the mask for that prefix. 366 * Returns the argument bitmask. 367 */ 368 in6_addr_t * 369 ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask) 370 { 371 uint32_t *ptr; 372 373 if (plen < 0 || plen > IPV6_ABITS) 374 return (NULL); 375 *bitmask = ipv6_all_zeros; 376 377 ptr = (uint32_t *)bitmask; 378 while (plen > 32) { 379 *ptr++ = 0xffffffffU; 380 plen -= 32; 381 } 382 *ptr = htonl(0xffffffffU << (32 - plen)); 383 return (bitmask); 384 } 385 386 /* 387 * Add a fully initialized IRE to an appropriate 388 * table based on ire_type. 389 * 390 * The forward table contains IRE_PREFIX/IRE_HOST/IRE_HOST and 391 * IRE_IF_RESOLVER/IRE_IF_NORESOLVER and IRE_DEFAULT. 392 * 393 * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK 394 * and IRE_CACHE. 395 * 396 * NOTE : This function is called as writer though not required 397 * by this function. 398 */ 399 int 400 ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) 401 { 402 ire_t *ire1; 403 int mask_table_index; 404 irb_t *irb_ptr; 405 ire_t **irep; 406 int flags; 407 ire_t *pire = NULL; 408 ill_t *stq_ill; 409 boolean_t ndp_g_lock_held = B_FALSE; 410 ire_t *ire = *ire_p; 411 int error; 412 ip_stack_t *ipst = ire->ire_ipst; 413 414 ASSERT(ire->ire_ipversion == IPV6_VERSION); 415 ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */ 416 ASSERT(ire->ire_nce == NULL); 417 418 /* Find the appropriate list head. */ 419 switch (ire->ire_type) { 420 case IRE_HOST: 421 ire->ire_mask_v6 = ipv6_all_ones; 422 ire->ire_masklen = IPV6_ABITS; 423 if ((ire->ire_flags & RTF_SETSRC) == 0) 424 ire->ire_src_addr_v6 = ipv6_all_zeros; 425 break; 426 case IRE_CACHE: 427 case IRE_LOCAL: 428 case IRE_LOOPBACK: 429 ire->ire_mask_v6 = ipv6_all_ones; 430 ire->ire_masklen = IPV6_ABITS; 431 break; 432 case IRE_PREFIX: 433 if ((ire->ire_flags & RTF_SETSRC) == 0) 434 ire->ire_src_addr_v6 = ipv6_all_zeros; 435 break; 436 case IRE_DEFAULT: 437 if ((ire->ire_flags & RTF_SETSRC) == 0) 438 ire->ire_src_addr_v6 = ipv6_all_zeros; 439 break; 440 case IRE_IF_RESOLVER: 441 case IRE_IF_NORESOLVER: 442 break; 443 default: 444 printf("ire_add_v6: ire %p has unrecognized IRE type (%d)\n", 445 (void *)ire, ire->ire_type); 446 ire_delete(ire); 447 *ire_p = NULL; 448 return (EINVAL); 449 } 450 451 /* Make sure the address is properly masked. */ 452 V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6); 453 454 if ((ire->ire_type & IRE_CACHETABLE) == 0) { 455 /* IRE goes into Forward Table */ 456 mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6); 457 if ((ipst->ips_ip_forwarding_table_v6[mask_table_index]) == 458 NULL) { 459 irb_t *ptr; 460 int i; 461 462 ptr = (irb_t *)mi_zalloc(( 463 ipst->ips_ip6_ftable_hash_size * sizeof (irb_t))); 464 if (ptr == NULL) { 465 ire_delete(ire); 466 *ire_p = NULL; 467 return (ENOMEM); 468 } 469 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) { 470 rw_init(&ptr[i].irb_lock, NULL, 471 RW_DEFAULT, NULL); 472 } 473 mutex_enter(&ipst->ips_ire_ft_init_lock); 474 if (ipst->ips_ip_forwarding_table_v6[ 475 mask_table_index] == NULL) { 476 ipst->ips_ip_forwarding_table_v6[ 477 mask_table_index] = ptr; 478 mutex_exit(&ipst->ips_ire_ft_init_lock); 479 } else { 480 /* 481 * Some other thread won the race in 482 * initializing the forwarding table at the 483 * same index. 484 */ 485 mutex_exit(&ipst->ips_ire_ft_init_lock); 486 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; 487 i++) { 488 rw_destroy(&ptr[i].irb_lock); 489 } 490 mi_free(ptr); 491 } 492 } 493 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[mask_table_index][ 494 IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6, 495 ipst->ips_ip6_ftable_hash_size)]); 496 } else { 497 irb_ptr = &(ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6( 498 ire->ire_addr_v6, ipst->ips_ip6_cache_table_size)]); 499 } 500 /* 501 * For xresolv interfaces (v6 interfaces with an external 502 * address resolver), ip_newroute_v6/ip_newroute_ipif_v6 503 * are unable to prevent the deletion of the interface route 504 * while adding an IRE_CACHE for an on-link destination 505 * in the IRE_IF_RESOLVER case, since the ire has to go to 506 * the external resolver and return. We can't do a REFHOLD on the 507 * associated interface ire for fear of the message being freed 508 * if the external resolver can't resolve the address. 509 * Here we look up the interface ire in the forwarding table 510 * and make sure that the interface route has not been deleted. 511 */ 512 if (ire->ire_type == IRE_CACHE && 513 IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6) && 514 (((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) && 515 (((ill_t *)ire->ire_stq->q_ptr)->ill_flags & ILLF_XRESOLV)) { 516 517 pire = ire_ihandle_lookup_onlink_v6(ire); 518 if (pire == NULL) { 519 ire_delete(ire); 520 *ire_p = NULL; 521 return (EINVAL); 522 } 523 /* Prevent pire from getting deleted */ 524 IRB_REFHOLD(pire->ire_bucket); 525 /* Has it been removed already? */ 526 if (pire->ire_marks & IRE_MARK_CONDEMNED) { 527 IRB_REFRELE(pire->ire_bucket); 528 ire_refrele(pire); 529 ire_delete(ire); 530 *ire_p = NULL; 531 return (EINVAL); 532 } 533 } 534 535 flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW); 536 /* 537 * For IRE_CACHES, MATCH_IRE_IPIF is not enough to check 538 * for duplicates because : 539 * 540 * 1) ire_ipif->ipif_ill and ire_stq->q_ptr could be 541 * pointing at different ills. A real duplicate is 542 * a match on both ire_ipif and ire_stq. 543 * 544 * 2) We could have multiple packets trying to create 545 * an IRE_CACHE for the same ill. 546 * 547 * Moreover, IPIF_NOFAILOVER and IPV6_BOUND_PIF endpoints wants 548 * to go out on a particular ill. Rather than looking at the 549 * packet, we depend on the above for MATCH_IRE_ILL here. 550 * 551 * Unlike IPv4, MATCH_IRE_IPIF is needed here as we could have 552 * multiple IRE_CACHES for an ill for the same destination 553 * with various scoped addresses i.e represented by ipifs. 554 * 555 * MATCH_IRE_ILL is done implicitly below for IRE_CACHES. 556 */ 557 if (ire->ire_ipif != NULL) 558 flags |= MATCH_IRE_IPIF; 559 /* 560 * If we are creating hidden ires, make sure we search on 561 * this ill (MATCH_IRE_ILL) and a hidden ire, while we are 562 * searching for duplicates below. Otherwise we could 563 * potentially find an IRE on some other interface 564 * and it may not be a IRE marked with IRE_MARK_HIDDEN. We 565 * shouldn't do this as this will lead to an infinite loop as 566 * eventually we need an hidden ire for this packet to go 567 * out. MATCH_IRE_ILL is already marked above. 568 */ 569 if (ire->ire_marks & IRE_MARK_HIDDEN) { 570 ASSERT(ire->ire_type == IRE_CACHE); 571 flags |= MATCH_IRE_MARK_HIDDEN; 572 } 573 574 /* 575 * Start the atomic add of the ire. Grab the ill locks, 576 * ill_g_usesrc_lock and the bucket lock. Check for condemned. 577 * To avoid lock order problems, get the ndp6.ndp_g_lock now itself. 578 */ 579 if (ire->ire_type == IRE_CACHE) { 580 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 581 ndp_g_lock_held = B_TRUE; 582 } 583 584 /* 585 * If ipif or ill is changing ire_atomic_start() may queue the 586 * request and return EINPROGRESS. 587 */ 588 589 error = ire_atomic_start(irb_ptr, ire, q, mp, func); 590 if (error != 0) { 591 if (ndp_g_lock_held) 592 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 593 /* 594 * We don't know whether it is a valid ipif or not. 595 * So, set it to NULL. This assumes that the ire has not added 596 * a reference to the ipif. 597 */ 598 ire->ire_ipif = NULL; 599 ire_delete(ire); 600 if (pire != NULL) { 601 IRB_REFRELE(pire->ire_bucket); 602 ire_refrele(pire); 603 } 604 *ire_p = NULL; 605 return (error); 606 } 607 /* 608 * To avoid creating ires having stale values for the ire_max_frag 609 * we get the latest value atomically here. For more details 610 * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE 611 * in ip_rput_dlpi_writer 612 */ 613 if (ire->ire_max_fragp == NULL) { 614 if (IN6_IS_ADDR_MULTICAST(&ire->ire_addr_v6)) 615 ire->ire_max_frag = ire->ire_ipif->ipif_mtu; 616 else 617 ire->ire_max_frag = pire->ire_max_frag; 618 } else { 619 uint_t max_frag; 620 621 max_frag = *ire->ire_max_fragp; 622 ire->ire_max_fragp = NULL; 623 ire->ire_max_frag = max_frag; 624 } 625 626 /* 627 * Atomically check for duplicate and insert in the table. 628 */ 629 for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 630 if (ire1->ire_marks & IRE_MARK_CONDEMNED) 631 continue; 632 633 if (ire->ire_type == IRE_CACHE) { 634 /* 635 * We do MATCH_IRE_ILL implicitly here for IRE_CACHES. 636 * As ire_ipif and ire_stq could point to two 637 * different ills, we can't pass just ire_ipif to 638 * ire_match_args and get a match on both ills. 639 * This is just needed for duplicate checks here and 640 * so we don't add an extra argument to 641 * ire_match_args for this. Do it locally. 642 * 643 * NOTE : Currently there is no part of the code 644 * that asks for both MATH_IRE_IPIF and MATCH_IRE_ILL 645 * match for IRE_CACHEs. Thus we don't want to 646 * extend the arguments to ire_match_args_v6. 647 */ 648 if (ire1->ire_stq != ire->ire_stq) 649 continue; 650 /* 651 * Multiroute IRE_CACHEs for a given destination can 652 * have the same ire_ipif, typically if their source 653 * address is forced using RTF_SETSRC, and the same 654 * send-to queue. We differentiate them using the parent 655 * handle. 656 */ 657 if ((ire1->ire_flags & RTF_MULTIRT) && 658 (ire->ire_flags & RTF_MULTIRT) && 659 (ire1->ire_phandle != ire->ire_phandle)) 660 continue; 661 } 662 if (ire1->ire_zoneid != ire->ire_zoneid) 663 continue; 664 if (ire_match_args_v6(ire1, &ire->ire_addr_v6, 665 &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, 666 ire->ire_type, ire->ire_ipif, ire->ire_zoneid, 0, NULL, 667 flags)) { 668 /* 669 * Return the old ire after doing a REFHOLD. 670 * As most of the callers continue to use the IRE 671 * after adding, we return a held ire. This will 672 * avoid a lookup in the caller again. If the callers 673 * don't want to use it, they need to do a REFRELE. 674 */ 675 ip1dbg(("found dup ire existing %p new %p", 676 (void *)ire1, (void *)ire)); 677 IRE_REFHOLD(ire1); 678 if (ndp_g_lock_held) 679 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 680 ire_atomic_end(irb_ptr, ire); 681 ire_delete(ire); 682 if (pire != NULL) { 683 /* 684 * Assert that it is 685 * not yet removed from the list. 686 */ 687 ASSERT(pire->ire_ptpn != NULL); 688 IRB_REFRELE(pire->ire_bucket); 689 ire_refrele(pire); 690 } 691 *ire_p = ire1; 692 return (0); 693 } 694 } 695 if (ire->ire_type == IRE_CACHE) { 696 in6_addr_t gw_addr_v6; 697 ill_t *ill = ire_to_ill(ire); 698 char buf[INET6_ADDRSTRLEN]; 699 nce_t *nce; 700 701 /* 702 * All IRE_CACHE types must have a nce. If this is 703 * not the case the entry will not be added. We need 704 * to make sure that if somebody deletes the nce 705 * after we looked up, they will find this ire and 706 * delete the ire. To delete this ire one needs the 707 * bucket lock which we are still holding here. So, 708 * even if the nce gets deleted after we looked up, 709 * this ire will get deleted. 710 * 711 * NOTE : Don't need the ire_lock for accessing 712 * ire_gateway_addr_v6 as it is appearing first 713 * time on the list and rts_setgwr_v6 could not 714 * be changing this. 715 */ 716 gw_addr_v6 = ire->ire_gateway_addr_v6; 717 if (IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 718 nce = ndp_lookup_v6(ill, &ire->ire_addr_v6, B_TRUE); 719 } else { 720 nce = ndp_lookup_v6(ill, &gw_addr_v6, B_TRUE); 721 } 722 if (nce == NULL) 723 goto failed; 724 725 /* Pair of refhold, refrele just to get the tracing right */ 726 NCE_REFHOLD_TO_REFHOLD_NOTR(nce); 727 /* 728 * Atomically make sure that new IREs don't point 729 * to an NCE that is logically deleted (CONDEMNED). 730 * ndp_delete() first marks the NCE CONDEMNED. 731 * This ensures that the nce_refcnt won't increase 732 * due to new nce_lookups or due to addition of new IREs 733 * pointing to this NCE. Then ndp_delete() cleans up 734 * existing references. If we don't do it atomically here, 735 * ndp_delete() -> nce_ire_delete() will not be able to 736 * clean up the IRE list completely, and the nce_refcnt 737 * won't go down to zero. 738 */ 739 mutex_enter(&nce->nce_lock); 740 if (ill->ill_flags & ILLF_XRESOLV) { 741 /* 742 * If we used an external resolver, we may not 743 * have gone through neighbor discovery to get here. 744 * Must update the nce_state before the next check. 745 */ 746 if (nce->nce_state == ND_INCOMPLETE) 747 nce->nce_state = ND_REACHABLE; 748 } 749 if (nce->nce_state == ND_INCOMPLETE || 750 (nce->nce_flags & NCE_F_CONDEMNED) || 751 (nce->nce_state == ND_UNREACHABLE)) { 752 failed: 753 if (ndp_g_lock_held) 754 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 755 if (nce != NULL) 756 mutex_exit(&nce->nce_lock); 757 ire_atomic_end(irb_ptr, ire); 758 ip1dbg(("ire_add_v6: No nce for dst %s \n", 759 inet_ntop(AF_INET6, &ire->ire_addr_v6, 760 buf, sizeof (buf)))); 761 ire_delete(ire); 762 if (pire != NULL) { 763 /* 764 * Assert that it is 765 * not yet removed from the list. 766 */ 767 ASSERT(pire->ire_ptpn != NULL); 768 IRB_REFRELE(pire->ire_bucket); 769 ire_refrele(pire); 770 } 771 if (nce != NULL) 772 NCE_REFRELE_NOTR(nce); 773 *ire_p = NULL; 774 return (EINVAL); 775 } else { 776 ire->ire_nce = nce; 777 } 778 mutex_exit(&nce->nce_lock); 779 } 780 /* 781 * Find the first entry that matches ire_addr - provides 782 * tail insertion. *irep will be null if no match. 783 */ 784 irep = (ire_t **)irb_ptr; 785 while ((ire1 = *irep) != NULL && 786 !IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6)) 787 irep = &ire1->ire_next; 788 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 789 790 if (*irep != NULL) { 791 /* 792 * Find the last ire which matches ire_addr_v6. 793 * Needed to do tail insertion among entries with the same 794 * ire_addr_v6. 795 */ 796 while (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 797 &ire1->ire_addr_v6)) { 798 irep = &ire1->ire_next; 799 ire1 = *irep; 800 if (ire1 == NULL) 801 break; 802 } 803 } 804 805 if (ire->ire_type == IRE_DEFAULT) { 806 /* 807 * We keep a count of default gateways which is used when 808 * assigning them as routes. 809 */ 810 ipst->ips_ipv6_ire_default_count++; 811 ASSERT(ipst->ips_ipv6_ire_default_count != 0); /* Wraparound */ 812 } 813 /* Insert at *irep */ 814 ire1 = *irep; 815 if (ire1 != NULL) 816 ire1->ire_ptpn = &ire->ire_next; 817 ire->ire_next = ire1; 818 /* Link the new one in. */ 819 ire->ire_ptpn = irep; 820 /* 821 * ire_walk routines de-reference ire_next without holding 822 * a lock. Before we point to the new ire, we want to make 823 * sure the store that sets the ire_next of the new ire 824 * reaches global visibility, so that ire_walk routines 825 * don't see a truncated list of ires i.e if the ire_next 826 * of the new ire gets set after we do "*irep = ire" due 827 * to re-ordering, the ire_walk thread will see a NULL 828 * once it accesses the ire_next of the new ire. 829 * membar_producer() makes sure that the following store 830 * happens *after* all of the above stores. 831 */ 832 membar_producer(); 833 *irep = ire; 834 ire->ire_bucket = irb_ptr; 835 /* 836 * We return a bumped up IRE above. Keep it symmetrical 837 * so that the callers will always have to release. This 838 * helps the callers of this function because they continue 839 * to use the IRE after adding and hence they don't have to 840 * lookup again after we return the IRE. 841 * 842 * NOTE : We don't have to use atomics as this is appearing 843 * in the list for the first time and no one else can bump 844 * up the reference count on this yet. 845 */ 846 IRE_REFHOLD_LOCKED(ire); 847 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_inserted); 848 irb_ptr->irb_ire_cnt++; 849 if (ire->ire_marks & IRE_MARK_TEMPORARY) 850 irb_ptr->irb_tmp_ire_cnt++; 851 852 if (ire->ire_ipif != NULL) { 853 DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ire->ire_ipif, 854 (char *), "ire", (void *), ire); 855 ire->ire_ipif->ipif_cnt_ire++; 856 if (ire->ire_stq != NULL) { 857 stq_ill = (ill_t *)ire->ire_stq->q_ptr; 858 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), stq_ill, 859 (char *), "ire", (void *), ire); 860 stq_ill->ill_cnt_ire++; 861 } 862 } else { 863 ASSERT(ire->ire_stq == NULL); 864 } 865 866 if (ndp_g_lock_held) 867 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 868 ire_atomic_end(irb_ptr, ire); 869 870 if (pire != NULL) { 871 /* Assert that it is not removed from the list yet */ 872 ASSERT(pire->ire_ptpn != NULL); 873 IRB_REFRELE(pire->ire_bucket); 874 ire_refrele(pire); 875 } 876 877 if (ire->ire_type != IRE_CACHE) { 878 /* 879 * For ire's with with host mask see if there is an entry 880 * in the cache. If there is one flush the whole cache as 881 * there might be multiple entries due to RTF_MULTIRT (CGTP). 882 * If no entry is found than there is no need to flush the 883 * cache. 884 */ 885 886 if (ip_mask_to_plen_v6(&ire->ire_mask_v6) == IPV6_ABITS) { 887 ire_t *lire; 888 lire = ire_ctable_lookup_v6(&ire->ire_addr_v6, NULL, 889 IRE_CACHE, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, 890 ipst); 891 if (lire != NULL) { 892 ire_refrele(lire); 893 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 894 } 895 } else { 896 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 897 } 898 } 899 900 *ire_p = ire; 901 return (0); 902 } 903 904 /* 905 * Search for all HOST REDIRECT routes that are 906 * pointing at the specified gateway and 907 * delete them. This routine is called only 908 * when a default gateway is going away. 909 */ 910 static void 911 ire_delete_host_redirects_v6(const in6_addr_t *gateway, ip_stack_t *ipst) 912 { 913 irb_t *irb_ptr; 914 irb_t *irb; 915 ire_t *ire; 916 in6_addr_t gw_addr_v6; 917 int i; 918 919 /* get the hash table for HOST routes */ 920 irb_ptr = ipst->ips_ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)]; 921 if (irb_ptr == NULL) 922 return; 923 for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) { 924 irb = &irb_ptr[i]; 925 IRB_REFHOLD(irb); 926 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 927 if (!(ire->ire_flags & RTF_DYNAMIC)) 928 continue; 929 mutex_enter(&ire->ire_lock); 930 gw_addr_v6 = ire->ire_gateway_addr_v6; 931 mutex_exit(&ire->ire_lock); 932 if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) 933 ire_delete(ire); 934 } 935 IRB_REFRELE(irb); 936 } 937 } 938 939 /* 940 * Delete all the cache entries with this 'addr'. This is the IPv6 counterpart 941 * of ip_ire_clookup_and_delete. The difference being this function does not 942 * return any value. IPv6 processing of a gratuitous ARP, as it stands, is 943 * different than IPv4 in that, regardless of the presence of a cache entry 944 * for this address, an ire_walk_v6 is done. Another difference is that unlike 945 * in the case of IPv4 this does not take an ipif_t argument, since it is only 946 * called by ip_arp_news and the match is always only on the address. 947 */ 948 void 949 ip_ire_clookup_and_delete_v6(const in6_addr_t *addr, ip_stack_t *ipst) 950 { 951 irb_t *irb; 952 ire_t *cire; 953 boolean_t found = B_FALSE; 954 955 irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 956 ipst->ips_ip6_cache_table_size)]; 957 IRB_REFHOLD(irb); 958 for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) { 959 if (cire->ire_marks & IRE_MARK_CONDEMNED) 960 continue; 961 if (IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, addr)) { 962 963 /* This signifies start of a match */ 964 if (!found) 965 found = B_TRUE; 966 if (cire->ire_type == IRE_CACHE) { 967 if (cire->ire_nce != NULL) 968 ndp_delete(cire->ire_nce); 969 ire_delete_v6(cire); 970 } 971 /* End of the match */ 972 } else if (found) 973 break; 974 } 975 IRB_REFRELE(irb); 976 } 977 978 /* 979 * Delete the specified IRE. 980 * All calls should use ire_delete(). 981 * Sometimes called as writer though not required by this function. 982 * 983 * NOTE : This function is called only if the ire was added 984 * in the list. 985 */ 986 void 987 ire_delete_v6(ire_t *ire) 988 { 989 in6_addr_t gw_addr_v6; 990 ip_stack_t *ipst = ire->ire_ipst; 991 992 ASSERT(ire->ire_refcnt >= 1); 993 ASSERT(ire->ire_ipversion == IPV6_VERSION); 994 995 if (ire->ire_type != IRE_CACHE) 996 ire_flush_cache_v6(ire, IRE_FLUSH_DELETE); 997 if (ire->ire_type == IRE_DEFAULT) { 998 /* 999 * when a default gateway is going away 1000 * delete all the host redirects pointing at that 1001 * gateway. 1002 */ 1003 mutex_enter(&ire->ire_lock); 1004 gw_addr_v6 = ire->ire_gateway_addr_v6; 1005 mutex_exit(&ire->ire_lock); 1006 ire_delete_host_redirects_v6(&gw_addr_v6, ipst); 1007 } 1008 } 1009 1010 /* 1011 * ire_walk routine to delete all IRE_CACHE and IRE_HOST type redirect 1012 * entries. 1013 */ 1014 /*ARGSUSED1*/ 1015 void 1016 ire_delete_cache_v6(ire_t *ire, char *arg) 1017 { 1018 char addrstr1[INET6_ADDRSTRLEN]; 1019 char addrstr2[INET6_ADDRSTRLEN]; 1020 1021 if ((ire->ire_type & IRE_CACHE) || 1022 (ire->ire_flags & RTF_DYNAMIC)) { 1023 ip1dbg(("ire_delete_cache_v6: deleted %s type %d through %s\n", 1024 inet_ntop(AF_INET6, &ire->ire_addr_v6, 1025 addrstr1, sizeof (addrstr1)), 1026 ire->ire_type, 1027 inet_ntop(AF_INET6, &ire->ire_gateway_addr_v6, 1028 addrstr2, sizeof (addrstr2)))); 1029 ire_delete(ire); 1030 } 1031 1032 } 1033 1034 /* 1035 * ire_walk routine to delete all IRE_CACHE/IRE_HOST type redirect entries 1036 * that have a given gateway address. 1037 */ 1038 void 1039 ire_delete_cache_gw_v6(ire_t *ire, char *addr) 1040 { 1041 in6_addr_t *gw_addr = (in6_addr_t *)addr; 1042 char buf1[INET6_ADDRSTRLEN]; 1043 char buf2[INET6_ADDRSTRLEN]; 1044 in6_addr_t ire_gw_addr_v6; 1045 1046 if (!(ire->ire_type & IRE_CACHE) && 1047 !(ire->ire_flags & RTF_DYNAMIC)) 1048 return; 1049 1050 mutex_enter(&ire->ire_lock); 1051 ire_gw_addr_v6 = ire->ire_gateway_addr_v6; 1052 mutex_exit(&ire->ire_lock); 1053 1054 if (IN6_ARE_ADDR_EQUAL(&ire_gw_addr_v6, gw_addr)) { 1055 ip1dbg(("ire_delete_cache_gw_v6: deleted %s type %d to %s\n", 1056 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, 1057 buf1, sizeof (buf1)), 1058 ire->ire_type, 1059 inet_ntop(AF_INET6, &ire_gw_addr_v6, 1060 buf2, sizeof (buf2)))); 1061 ire_delete(ire); 1062 } 1063 } 1064 1065 /* 1066 * Remove all IRE_CACHE entries that match 1067 * the ire specified. (Sometimes called 1068 * as writer though not required by this function.) 1069 * 1070 * The flag argument indicates if the 1071 * flush request is due to addition 1072 * of new route (IRE_FLUSH_ADD) or deletion of old 1073 * route (IRE_FLUSH_DELETE). 1074 * 1075 * This routine takes only the IREs from the forwarding 1076 * table and flushes the corresponding entries from 1077 * the cache table. 1078 * 1079 * When flushing due to the deletion of an old route, it 1080 * just checks the cache handles (ire_phandle and ire_ihandle) and 1081 * deletes the ones that match. 1082 * 1083 * When flushing due to the creation of a new route, it checks 1084 * if a cache entry's address matches the one in the IRE and 1085 * that the cache entry's parent has a less specific mask than the 1086 * one in IRE. The destination of such a cache entry could be the 1087 * gateway for other cache entries, so we need to flush those as 1088 * well by looking for gateway addresses matching the IRE's address. 1089 */ 1090 void 1091 ire_flush_cache_v6(ire_t *ire, int flag) 1092 { 1093 int i; 1094 ire_t *cire; 1095 irb_t *irb; 1096 ip_stack_t *ipst = ire->ire_ipst; 1097 1098 if (ire->ire_type & IRE_CACHE) 1099 return; 1100 1101 /* 1102 * If a default is just created, there is no point 1103 * in going through the cache, as there will not be any 1104 * cached ires. 1105 */ 1106 if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD) 1107 return; 1108 if (flag == IRE_FLUSH_ADD) { 1109 /* 1110 * This selective flush is 1111 * due to the addition of 1112 * new IRE. 1113 */ 1114 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 1115 irb = &ipst->ips_ip_cache_table_v6[i]; 1116 if ((cire = irb->irb_ire) == NULL) 1117 continue; 1118 IRB_REFHOLD(irb); 1119 for (cire = irb->irb_ire; cire != NULL; 1120 cire = cire->ire_next) { 1121 if (cire->ire_type != IRE_CACHE) 1122 continue; 1123 /* 1124 * If 'cire' belongs to the same subnet 1125 * as the new ire being added, and 'cire' 1126 * is derived from a prefix that is less 1127 * specific than the new ire being added, 1128 * we need to flush 'cire'; for instance, 1129 * when a new interface comes up. 1130 */ 1131 if ((V6_MASK_EQ_2(cire->ire_addr_v6, 1132 ire->ire_mask_v6, ire->ire_addr_v6) && 1133 (ip_mask_to_plen_v6(&cire->ire_cmask_v6) <= 1134 ire->ire_masklen))) { 1135 ire_delete(cire); 1136 continue; 1137 } 1138 /* 1139 * This is the case when the ire_gateway_addr 1140 * of 'cire' belongs to the same subnet as 1141 * the new ire being added. 1142 * Flushing such ires is sometimes required to 1143 * avoid misrouting: say we have a machine with 1144 * two interfaces (I1 and I2), a default router 1145 * R on the I1 subnet, and a host route to an 1146 * off-link destination D with a gateway G on 1147 * the I2 subnet. 1148 * Under normal operation, we will have an 1149 * on-link cache entry for G and an off-link 1150 * cache entry for D with G as ire_gateway_addr, 1151 * traffic to D will reach its destination 1152 * through gateway G. 1153 * If the administrator does 'ifconfig I2 down', 1154 * the cache entries for D and G will be 1155 * flushed. However, G will now be resolved as 1156 * an off-link destination using R (the default 1157 * router) as gateway. Then D will also be 1158 * resolved as an off-link destination using G 1159 * as gateway - this behavior is due to 1160 * compatibility reasons, see comment in 1161 * ire_ihandle_lookup_offlink(). Traffic to D 1162 * will go to the router R and probably won't 1163 * reach the destination. 1164 * The administrator then does 'ifconfig I2 up'. 1165 * Since G is on the I2 subnet, this routine 1166 * will flush its cache entry. It must also 1167 * flush the cache entry for D, otherwise 1168 * traffic will stay misrouted until the IRE 1169 * times out. 1170 */ 1171 if (V6_MASK_EQ_2(cire->ire_gateway_addr_v6, 1172 ire->ire_mask_v6, ire->ire_addr_v6)) { 1173 ire_delete(cire); 1174 continue; 1175 } 1176 } 1177 IRB_REFRELE(irb); 1178 } 1179 } else { 1180 /* 1181 * delete the cache entries based on 1182 * handle in the IRE as this IRE is 1183 * being deleted/changed. 1184 */ 1185 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 1186 irb = &ipst->ips_ip_cache_table_v6[i]; 1187 if ((cire = irb->irb_ire) == NULL) 1188 continue; 1189 IRB_REFHOLD(irb); 1190 for (cire = irb->irb_ire; cire != NULL; 1191 cire = cire->ire_next) { 1192 if (cire->ire_type != IRE_CACHE) 1193 continue; 1194 if ((cire->ire_phandle == 0 || 1195 cire->ire_phandle != ire->ire_phandle) && 1196 (cire->ire_ihandle == 0 || 1197 cire->ire_ihandle != ire->ire_ihandle)) 1198 continue; 1199 ire_delete(cire); 1200 } 1201 IRB_REFRELE(irb); 1202 } 1203 } 1204 } 1205 1206 /* 1207 * Matches the arguments passed with the values in the ire. 1208 * 1209 * Note: for match types that match using "ipif" passed in, ipif 1210 * must be checked for non-NULL before calling this routine. 1211 */ 1212 static boolean_t 1213 ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask, 1214 const in6_addr_t *gateway, int type, const ipif_t *ipif, zoneid_t zoneid, 1215 uint32_t ihandle, const ts_label_t *tsl, int match_flags) 1216 { 1217 in6_addr_t masked_addr; 1218 in6_addr_t gw_addr_v6; 1219 ill_t *ire_ill = NULL, *dst_ill; 1220 ill_t *ipif_ill = NULL; 1221 ill_group_t *ire_ill_group = NULL; 1222 ill_group_t *ipif_ill_group = NULL; 1223 ipif_t *src_ipif; 1224 1225 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1226 ASSERT(addr != NULL); 1227 ASSERT(mask != NULL); 1228 ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL); 1229 ASSERT((!(match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP))) || 1230 (ipif != NULL && ipif->ipif_isv6)); 1231 1232 /* 1233 * HIDDEN cache entries have to be looked up specifically with 1234 * MATCH_IRE_MARK_HIDDEN. MATCH_IRE_MARK_HIDDEN is usually set 1235 * when the interface is FAILED or INACTIVE. In that case, 1236 * any IRE_CACHES that exists should be marked with 1237 * IRE_MARK_HIDDEN. So, we don't really need to match below 1238 * for IRE_MARK_HIDDEN. But we do so for consistency. 1239 */ 1240 if (!(match_flags & MATCH_IRE_MARK_HIDDEN) && 1241 (ire->ire_marks & IRE_MARK_HIDDEN)) 1242 return (B_FALSE); 1243 1244 if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid && 1245 ire->ire_zoneid != ALL_ZONES) { 1246 /* 1247 * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is 1248 * valid and does not match that of ire_zoneid, a failure to 1249 * match is reported at this point. Otherwise, since some IREs 1250 * that are available in the global zone can be used in local 1251 * zones, additional checks need to be performed: 1252 * 1253 * IRE_CACHE and IRE_LOOPBACK entries should 1254 * never be matched in this situation. 1255 * 1256 * IRE entries that have an interface associated with them 1257 * should in general not match unless they are an IRE_LOCAL 1258 * or in the case when MATCH_IRE_DEFAULT has been set in 1259 * the caller. In the case of the former, checking of the 1260 * other fields supplied should take place. 1261 * 1262 * In the case where MATCH_IRE_DEFAULT has been set, 1263 * all of the ipif's associated with the IRE's ill are 1264 * checked to see if there is a matching zoneid. If any 1265 * one ipif has a matching zoneid, this IRE is a 1266 * potential candidate so checking of the other fields 1267 * takes place. 1268 * 1269 * In the case where the IRE_INTERFACE has a usable source 1270 * address (indicated by ill_usesrc_ifindex) in the 1271 * correct zone then it's permitted to return this IRE 1272 */ 1273 if (match_flags & MATCH_IRE_ZONEONLY) 1274 return (B_FALSE); 1275 if (ire->ire_type & (IRE_CACHE | IRE_LOOPBACK)) 1276 return (B_FALSE); 1277 /* 1278 * Note, IRE_INTERFACE can have the stq as NULL. For 1279 * example, if the default multicast route is tied to 1280 * the loopback address. 1281 */ 1282 if ((ire->ire_type & IRE_INTERFACE) && 1283 (ire->ire_stq != NULL)) { 1284 dst_ill = (ill_t *)ire->ire_stq->q_ptr; 1285 /* 1286 * If there is a usable source address in the 1287 * zone, then it's ok to return an 1288 * IRE_INTERFACE 1289 */ 1290 if ((dst_ill->ill_usesrc_ifindex != 0) && 1291 (src_ipif = ipif_select_source_v6(dst_ill, addr, 1292 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid)) 1293 != NULL) { 1294 ip3dbg(("ire_match_args: src_ipif %p" 1295 " dst_ill %p", (void *)src_ipif, 1296 (void *)dst_ill)); 1297 ipif_refrele(src_ipif); 1298 } else { 1299 ip3dbg(("ire_match_args: src_ipif NULL" 1300 " dst_ill %p\n", (void *)dst_ill)); 1301 return (B_FALSE); 1302 } 1303 } 1304 if (ire->ire_ipif != NULL && ire->ire_type != IRE_LOCAL && 1305 !(ire->ire_type & IRE_INTERFACE)) { 1306 ipif_t *tipif; 1307 1308 if ((match_flags & MATCH_IRE_DEFAULT) == 0) 1309 return (B_FALSE); 1310 mutex_enter(&ire->ire_ipif->ipif_ill->ill_lock); 1311 for (tipif = ire->ire_ipif->ipif_ill->ill_ipif; 1312 tipif != NULL; tipif = tipif->ipif_next) { 1313 if (IPIF_CAN_LOOKUP(tipif) && 1314 (tipif->ipif_flags & IPIF_UP) && 1315 (tipif->ipif_zoneid == zoneid || 1316 tipif->ipif_zoneid == ALL_ZONES)) 1317 break; 1318 } 1319 mutex_exit(&ire->ire_ipif->ipif_ill->ill_lock); 1320 if (tipif == NULL) 1321 return (B_FALSE); 1322 } 1323 } 1324 1325 if (match_flags & MATCH_IRE_GW) { 1326 mutex_enter(&ire->ire_lock); 1327 gw_addr_v6 = ire->ire_gateway_addr_v6; 1328 mutex_exit(&ire->ire_lock); 1329 } 1330 /* 1331 * For IRE_CACHES, MATCH_IRE_ILL/ILL_GROUP really means that 1332 * somebody wants to send out on a particular interface which 1333 * is given by ire_stq and hence use ire_stq to derive the ill 1334 * value. ire_ipif for IRE_CACHES is just the 1335 * means of getting a source address i.e ire_src_addr_v6 = 1336 * ire->ire_ipif->ipif_src_addr_v6. 1337 */ 1338 if (match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP)) { 1339 ire_ill = ire_to_ill(ire); 1340 if (ire_ill != NULL) 1341 ire_ill_group = ire_ill->ill_group; 1342 ipif_ill = ipif->ipif_ill; 1343 ipif_ill_group = ipif_ill->ill_group; 1344 } 1345 1346 /* No ire_addr_v6 bits set past the mask */ 1347 ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6, 1348 ire->ire_addr_v6)); 1349 V6_MASK_COPY(*addr, *mask, masked_addr); 1350 1351 if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) && 1352 ((!(match_flags & MATCH_IRE_GW)) || 1353 IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) && 1354 ((!(match_flags & MATCH_IRE_TYPE)) || 1355 (ire->ire_type & type)) && 1356 ((!(match_flags & MATCH_IRE_SRC)) || 1357 IN6_ARE_ADDR_EQUAL(&ire->ire_src_addr_v6, 1358 &ipif->ipif_v6src_addr)) && 1359 ((!(match_flags & MATCH_IRE_IPIF)) || 1360 (ire->ire_ipif == ipif)) && 1361 ((!(match_flags & MATCH_IRE_MARK_HIDDEN)) || 1362 (ire->ire_type != IRE_CACHE || 1363 ire->ire_marks & IRE_MARK_HIDDEN)) && 1364 ((!(match_flags & MATCH_IRE_ILL)) || 1365 (ire_ill == ipif_ill)) && 1366 ((!(match_flags & MATCH_IRE_IHANDLE)) || 1367 (ire->ire_ihandle == ihandle)) && 1368 ((!(match_flags & MATCH_IRE_ILL_GROUP)) || 1369 (ire_ill == ipif_ill) || 1370 (ire_ill_group != NULL && 1371 ire_ill_group == ipif_ill_group)) && 1372 ((!(match_flags & MATCH_IRE_SECATTR)) || 1373 (!is_system_labeled()) || 1374 (tsol_ire_match_gwattr(ire, tsl) == 0))) { 1375 /* We found the matched IRE */ 1376 return (B_TRUE); 1377 } 1378 return (B_FALSE); 1379 } 1380 1381 /* 1382 * Lookup for a route in all the tables 1383 */ 1384 ire_t * 1385 ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1386 const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 1387 zoneid_t zoneid, const ts_label_t *tsl, int flags, ip_stack_t *ipst) 1388 { 1389 ire_t *ire = NULL; 1390 1391 /* 1392 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1393 * MATCH_IRE_ILL is set. 1394 */ 1395 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1396 (ipif == NULL)) 1397 return (NULL); 1398 1399 /* 1400 * might be asking for a cache lookup, 1401 * This is not best way to lookup cache, 1402 * user should call ire_cache_lookup directly. 1403 * 1404 * If MATCH_IRE_TYPE was set, first lookup in the cache table and then 1405 * in the forwarding table, if the applicable type flags were set. 1406 */ 1407 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) { 1408 ire = ire_ctable_lookup_v6(addr, gateway, type, ipif, zoneid, 1409 tsl, flags, ipst); 1410 if (ire != NULL) 1411 return (ire); 1412 } 1413 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) { 1414 ire = ire_ftable_lookup_v6(addr, mask, gateway, type, ipif, 1415 pire, zoneid, 0, tsl, flags, ipst); 1416 } 1417 return (ire); 1418 } 1419 1420 /* 1421 * Lookup a route in forwarding table. 1422 * specific lookup is indicated by passing the 1423 * required parameters and indicating the 1424 * match required in flag field. 1425 * 1426 * Looking for default route can be done in three ways 1427 * 1) pass mask as ipv6_all_zeros and set MATCH_IRE_MASK in flags field 1428 * along with other matches. 1429 * 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags 1430 * field along with other matches. 1431 * 3) if the destination and mask are passed as zeros. 1432 * 1433 * A request to return a default route if no route 1434 * is found, can be specified by setting MATCH_IRE_DEFAULT 1435 * in flags. 1436 * 1437 * It does not support recursion more than one level. It 1438 * will do recursive lookup only when the lookup maps to 1439 * a prefix or default route and MATCH_IRE_RECURSIVE flag is passed. 1440 * 1441 * If the routing table is setup to allow more than one level 1442 * of recursion, the cleaning up cache table will not work resulting 1443 * in invalid routing. 1444 * 1445 * Supports link-local addresses by following the ipif/ill when recursing. 1446 * 1447 * NOTE : When this function returns NULL, pire has already been released. 1448 * pire is valid only when this function successfully returns an 1449 * ire. 1450 */ 1451 ire_t * 1452 ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1453 const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 1454 zoneid_t zoneid, uint32_t ihandle, const ts_label_t *tsl, int flags, 1455 ip_stack_t *ipst) 1456 { 1457 irb_t *irb_ptr; 1458 ire_t *rire; 1459 ire_t *ire = NULL; 1460 ire_t *saved_ire; 1461 nce_t *nce; 1462 int i; 1463 in6_addr_t gw_addr_v6; 1464 1465 ASSERT(addr != NULL); 1466 ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL); 1467 ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 1468 ASSERT(ipif == NULL || ipif->ipif_isv6); 1469 1470 /* 1471 * When we return NULL from this function, we should make 1472 * sure that *pire is NULL so that the callers will not 1473 * wrongly REFRELE the pire. 1474 */ 1475 if (pire != NULL) 1476 *pire = NULL; 1477 /* 1478 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1479 * MATCH_IRE_ILL is set. 1480 */ 1481 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1482 (ipif == NULL)) 1483 return (NULL); 1484 1485 /* 1486 * If the mask is known, the lookup 1487 * is simple, if the mask is not known 1488 * we need to search. 1489 */ 1490 if (flags & MATCH_IRE_MASK) { 1491 uint_t masklen; 1492 1493 masklen = ip_mask_to_plen_v6(mask); 1494 if (ipst->ips_ip_forwarding_table_v6[masklen] == NULL) 1495 return (NULL); 1496 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[masklen][ 1497 IRE_ADDR_MASK_HASH_V6(*addr, *mask, 1498 ipst->ips_ip6_ftable_hash_size)]); 1499 rw_enter(&irb_ptr->irb_lock, RW_READER); 1500 for (ire = irb_ptr->irb_ire; ire != NULL; 1501 ire = ire->ire_next) { 1502 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1503 continue; 1504 if (ire_match_args_v6(ire, addr, mask, gateway, type, 1505 ipif, zoneid, ihandle, tsl, flags)) 1506 goto found_ire; 1507 } 1508 rw_exit(&irb_ptr->irb_lock); 1509 } else { 1510 /* 1511 * In this case we don't know the mask, we need to 1512 * search the table assuming different mask sizes. 1513 * we start with 128 bit mask, we don't allow default here. 1514 */ 1515 for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) { 1516 in6_addr_t tmpmask; 1517 1518 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 1519 continue; 1520 (void) ip_plen_to_mask_v6(i, &tmpmask); 1521 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][ 1522 IRE_ADDR_MASK_HASH_V6(*addr, tmpmask, 1523 ipst->ips_ip6_ftable_hash_size)]; 1524 rw_enter(&irb_ptr->irb_lock, RW_READER); 1525 for (ire = irb_ptr->irb_ire; ire != NULL; 1526 ire = ire->ire_next) { 1527 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1528 continue; 1529 if (ire_match_args_v6(ire, addr, 1530 &ire->ire_mask_v6, gateway, type, ipif, 1531 zoneid, ihandle, tsl, flags)) 1532 goto found_ire; 1533 } 1534 rw_exit(&irb_ptr->irb_lock); 1535 } 1536 } 1537 1538 /* 1539 * We come here if no route has yet been found. 1540 * 1541 * Handle the case where default route is 1542 * requested by specifying type as one of the possible 1543 * types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE). 1544 * 1545 * If MATCH_IRE_MASK is specified, then the appropriate default route 1546 * would have been found above if it exists so it isn't looked up here. 1547 * If MATCH_IRE_DEFAULT was also specified, then a default route will be 1548 * searched for later. 1549 */ 1550 if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE && 1551 (type & (IRE_DEFAULT | IRE_INTERFACE))) { 1552 if (ipst->ips_ip_forwarding_table_v6[0] != NULL) { 1553 /* addr & mask is zero for defaults */ 1554 irb_ptr = &ipst->ips_ip_forwarding_table_v6[0][ 1555 IRE_ADDR_HASH_V6(ipv6_all_zeros, 1556 ipst->ips_ip6_ftable_hash_size)]; 1557 rw_enter(&irb_ptr->irb_lock, RW_READER); 1558 for (ire = irb_ptr->irb_ire; ire != NULL; 1559 ire = ire->ire_next) { 1560 1561 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1562 continue; 1563 1564 if (ire_match_args_v6(ire, addr, 1565 &ipv6_all_zeros, gateway, type, ipif, 1566 zoneid, ihandle, tsl, flags)) 1567 goto found_ire; 1568 } 1569 rw_exit(&irb_ptr->irb_lock); 1570 } 1571 } 1572 /* 1573 * We come here only if no route is found. 1574 * see if the default route can be used which is allowed 1575 * only if the default matching criteria is specified. 1576 * The ipv6_ire_default_count tracks the number of IRE_DEFAULT 1577 * entries. However, the ip_forwarding_table_v6[0] also contains 1578 * interface routes thus the count can be zero. 1579 */ 1580 saved_ire = NULL; 1581 if ((flags & (MATCH_IRE_DEFAULT | MATCH_IRE_MASK)) == 1582 MATCH_IRE_DEFAULT) { 1583 ire_t *ire_origin; 1584 uint_t g_index; 1585 uint_t index; 1586 1587 if (ipst->ips_ip_forwarding_table_v6[0] == NULL) 1588 return (NULL); 1589 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[0])[0]; 1590 1591 /* 1592 * Keep a tab on the bucket while looking the IRE_DEFAULT 1593 * entries. We need to keep track of a particular IRE 1594 * (ire_origin) so this ensures that it will not be unlinked 1595 * from the hash list during the recursive lookup below. 1596 */ 1597 IRB_REFHOLD(irb_ptr); 1598 ire = irb_ptr->irb_ire; 1599 if (ire == NULL) { 1600 IRB_REFRELE(irb_ptr); 1601 return (NULL); 1602 } 1603 1604 /* 1605 * Get the index first, since it can be changed by other 1606 * threads. Then get to the right default route skipping 1607 * default interface routes if any. As we hold a reference on 1608 * the IRE bucket, ipv6_ire_default_count can only increase so 1609 * we can't reach the end of the hash list unexpectedly. 1610 */ 1611 if (ipst->ips_ipv6_ire_default_count != 0) { 1612 g_index = ipst->ips_ipv6_ire_default_index++; 1613 index = g_index % ipst->ips_ipv6_ire_default_count; 1614 while (index != 0) { 1615 if (!(ire->ire_type & IRE_INTERFACE)) 1616 index--; 1617 ire = ire->ire_next; 1618 } 1619 ASSERT(ire != NULL); 1620 } else { 1621 /* 1622 * No default route, so we only have default interface 1623 * routes: don't enter the first loop. 1624 */ 1625 ire = NULL; 1626 } 1627 1628 /* 1629 * Round-robin the default routers list looking for a neighbor 1630 * that matches the passed in parameters and is reachable. If 1631 * none found, just return a route from the default router list 1632 * if it exists. If we can't find a default route (IRE_DEFAULT), 1633 * look for interface default routes. 1634 * We start with the ire we found above and we walk the hash 1635 * list until we're back where we started, see 1636 * ire_get_next_default_ire(). It doesn't matter if default 1637 * routes are added or deleted by other threads - we know this 1638 * ire will stay in the list because we hold a reference on the 1639 * ire bucket. 1640 * NB: if we only have interface default routes, ire is NULL so 1641 * we don't even enter this loop (see above). 1642 */ 1643 ire_origin = ire; 1644 for (; ire != NULL; 1645 ire = ire_get_next_default_ire(ire, ire_origin)) { 1646 1647 if (ire_match_args_v6(ire, addr, 1648 &ipv6_all_zeros, gateway, type, ipif, 1649 zoneid, ihandle, tsl, flags)) { 1650 int match_flags; 1651 1652 /* 1653 * We have something to work with. 1654 * If we can find a resolved/reachable 1655 * entry, we will use this. Otherwise 1656 * we'll try to find an entry that has 1657 * a resolved cache entry. We will fallback 1658 * on this if we don't find anything else. 1659 */ 1660 if (saved_ire == NULL) 1661 saved_ire = ire; 1662 mutex_enter(&ire->ire_lock); 1663 gw_addr_v6 = ire->ire_gateway_addr_v6; 1664 mutex_exit(&ire->ire_lock); 1665 match_flags = MATCH_IRE_ILL_GROUP | 1666 MATCH_IRE_SECATTR; 1667 rire = ire_ctable_lookup_v6(&gw_addr_v6, NULL, 1668 0, ire->ire_ipif, zoneid, tsl, match_flags, 1669 ipst); 1670 if (rire != NULL) { 1671 nce = rire->ire_nce; 1672 if (nce != NULL && 1673 NCE_ISREACHABLE(nce) && 1674 nce->nce_flags & NCE_F_ISROUTER) { 1675 ire_refrele(rire); 1676 IRE_REFHOLD(ire); 1677 IRB_REFRELE(irb_ptr); 1678 goto found_ire_held; 1679 } else if (nce != NULL && 1680 !(nce->nce_flags & 1681 NCE_F_ISROUTER)) { 1682 /* 1683 * Make sure we don't use 1684 * this ire 1685 */ 1686 if (saved_ire == ire) 1687 saved_ire = NULL; 1688 } 1689 ire_refrele(rire); 1690 } else if (ipst-> 1691 ips_ipv6_ire_default_count > 1 && 1692 zoneid != GLOBAL_ZONEID) { 1693 /* 1694 * When we're in a local zone, we're 1695 * only interested in default routers 1696 * that are reachable through ipifs 1697 * within our zone. 1698 * The potentially expensive call to 1699 * ire_route_lookup_v6() is avoided when 1700 * we have only one default route. 1701 */ 1702 int ire_match_flags = MATCH_IRE_TYPE | 1703 MATCH_IRE_SECATTR; 1704 1705 if (ire->ire_ipif != NULL) { 1706 ire_match_flags |= 1707 MATCH_IRE_ILL_GROUP; 1708 } 1709 rire = ire_route_lookup_v6(&gw_addr_v6, 1710 NULL, NULL, IRE_INTERFACE, 1711 ire->ire_ipif, NULL, 1712 zoneid, tsl, ire_match_flags, ipst); 1713 if (rire != NULL) { 1714 ire_refrele(rire); 1715 saved_ire = ire; 1716 } else if (saved_ire == ire) { 1717 /* 1718 * Make sure we don't use 1719 * this ire 1720 */ 1721 saved_ire = NULL; 1722 } 1723 } 1724 } 1725 } 1726 if (saved_ire != NULL) { 1727 ire = saved_ire; 1728 IRE_REFHOLD(ire); 1729 IRB_REFRELE(irb_ptr); 1730 goto found_ire_held; 1731 } else { 1732 /* 1733 * Look for a interface default route matching the 1734 * args passed in. No round robin here. Just pick 1735 * the right one. 1736 */ 1737 for (ire = irb_ptr->irb_ire; ire != NULL; 1738 ire = ire->ire_next) { 1739 1740 if (!(ire->ire_type & IRE_INTERFACE)) 1741 continue; 1742 1743 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1744 continue; 1745 1746 if (ire_match_args_v6(ire, addr, 1747 &ipv6_all_zeros, gateway, type, ipif, 1748 zoneid, ihandle, tsl, flags)) { 1749 IRE_REFHOLD(ire); 1750 IRB_REFRELE(irb_ptr); 1751 goto found_ire_held; 1752 } 1753 } 1754 IRB_REFRELE(irb_ptr); 1755 } 1756 } 1757 ASSERT(ire == NULL); 1758 ip1dbg(("ire_ftable_lookup_v6: returning NULL ire")); 1759 return (NULL); 1760 found_ire: 1761 ASSERT((ire->ire_marks & IRE_MARK_CONDEMNED) == 0); 1762 IRE_REFHOLD(ire); 1763 rw_exit(&irb_ptr->irb_lock); 1764 1765 found_ire_held: 1766 if ((flags & MATCH_IRE_RJ_BHOLE) && 1767 (ire->ire_flags & (RTF_BLACKHOLE | RTF_REJECT))) { 1768 return (ire); 1769 } 1770 /* 1771 * At this point, IRE that was found must be an IRE_FORWARDTABLE 1772 * or IRE_CACHETABLE type. If this is a recursive lookup and an 1773 * IRE_INTERFACE type was found, return that. If it was some other 1774 * IRE_FORWARDTABLE type of IRE (one of the prefix types), then it 1775 * is necessary to fill in the parent IRE pointed to by pire, and 1776 * then lookup the gateway address of the parent. For backwards 1777 * compatiblity, if this lookup returns an 1778 * IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level 1779 * of lookup is done. 1780 */ 1781 if (flags & MATCH_IRE_RECURSIVE) { 1782 const ipif_t *gw_ipif; 1783 int match_flags = MATCH_IRE_DSTONLY; 1784 1785 if (ire->ire_type & IRE_INTERFACE) 1786 return (ire); 1787 if (pire != NULL) 1788 *pire = ire; 1789 /* 1790 * If we can't find an IRE_INTERFACE or the caller has not 1791 * asked for pire, we need to REFRELE the saved_ire. 1792 */ 1793 saved_ire = ire; 1794 1795 /* 1796 * Currently MATCH_IRE_ILL is never used with 1797 * (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT) while 1798 * sending out packets as MATCH_IRE_ILL is used only 1799 * for communicating with on-link hosts. We can't assert 1800 * that here as RTM_GET calls this function with 1801 * MATCH_IRE_ILL | MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE. 1802 * We have already used the MATCH_IRE_ILL in determining 1803 * the right prefix route at this point. To match the 1804 * behavior of how we locate routes while sending out 1805 * packets, we don't want to use MATCH_IRE_ILL below 1806 * while locating the interface route. 1807 */ 1808 if (ire->ire_ipif != NULL) 1809 match_flags |= MATCH_IRE_ILL_GROUP; 1810 1811 mutex_enter(&ire->ire_lock); 1812 gw_addr_v6 = ire->ire_gateway_addr_v6; 1813 mutex_exit(&ire->ire_lock); 1814 1815 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 0, 1816 ire->ire_ipif, NULL, zoneid, tsl, match_flags, ipst); 1817 if (ire == NULL) { 1818 /* 1819 * In this case we have to deal with the 1820 * MATCH_IRE_PARENT flag, which means the 1821 * parent has to be returned if ire is NULL. 1822 * The aim of this is to have (at least) a starting 1823 * ire when we want to look at all of the ires in a 1824 * bucket aimed at a single destination (as is the 1825 * case in ip_newroute_v6 for the RTF_MULTIRT 1826 * flagged routes). 1827 */ 1828 if (flags & MATCH_IRE_PARENT) { 1829 if (pire != NULL) { 1830 /* 1831 * Need an extra REFHOLD, if the 1832 * parent ire is returned via both 1833 * ire and pire. 1834 */ 1835 IRE_REFHOLD(saved_ire); 1836 } 1837 ire = saved_ire; 1838 } else { 1839 ire_refrele(saved_ire); 1840 if (pire != NULL) 1841 *pire = NULL; 1842 } 1843 return (ire); 1844 } 1845 if (ire->ire_type & (IRE_CACHETABLE | IRE_INTERFACE)) { 1846 /* 1847 * If the caller did not ask for pire, release 1848 * it now. 1849 */ 1850 if (pire == NULL) { 1851 ire_refrele(saved_ire); 1852 } 1853 return (ire); 1854 } 1855 match_flags |= MATCH_IRE_TYPE; 1856 mutex_enter(&ire->ire_lock); 1857 gw_addr_v6 = ire->ire_gateway_addr_v6; 1858 mutex_exit(&ire->ire_lock); 1859 gw_ipif = ire->ire_ipif; 1860 ire_refrele(ire); 1861 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 1862 (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid, 1863 NULL, match_flags, ipst); 1864 if (ire == NULL) { 1865 /* 1866 * In this case we have to deal with the 1867 * MATCH_IRE_PARENT flag, which means the 1868 * parent has to be returned if ire is NULL. 1869 * The aim of this is to have (at least) a starting 1870 * ire when we want to look at all of the ires in a 1871 * bucket aimed at a single destination (as is the 1872 * case in ip_newroute_v6 for the RTF_MULTIRT 1873 * flagged routes). 1874 */ 1875 if (flags & MATCH_IRE_PARENT) { 1876 if (pire != NULL) { 1877 /* 1878 * Need an extra REFHOLD, if the 1879 * parent ire is returned via both 1880 * ire and pire. 1881 */ 1882 IRE_REFHOLD(saved_ire); 1883 } 1884 ire = saved_ire; 1885 } else { 1886 ire_refrele(saved_ire); 1887 if (pire != NULL) 1888 *pire = NULL; 1889 } 1890 return (ire); 1891 } else if (pire == NULL) { 1892 /* 1893 * If the caller did not ask for pire, release 1894 * it now. 1895 */ 1896 ire_refrele(saved_ire); 1897 } 1898 return (ire); 1899 } 1900 1901 ASSERT(pire == NULL || *pire == NULL); 1902 return (ire); 1903 } 1904 1905 /* 1906 * Delete the IRE cache for the gateway and all IRE caches whose 1907 * ire_gateway_addr_v6 points to this gateway, and allow them to 1908 * be created on demand by ip_newroute_v6. 1909 */ 1910 void 1911 ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid, 1912 ip_stack_t *ipst) 1913 { 1914 irb_t *irb; 1915 ire_t *ire; 1916 1917 irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 1918 ipst->ips_ip6_cache_table_size)]; 1919 IRB_REFHOLD(irb); 1920 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 1921 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1922 continue; 1923 1924 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 1925 if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, 0, 1926 IRE_CACHE, NULL, zoneid, 0, NULL, MATCH_IRE_TYPE)) { 1927 ire_delete(ire); 1928 } 1929 } 1930 IRB_REFRELE(irb); 1931 1932 ire_walk_v6(ire_delete_cache_gw_v6, (char *)addr, zoneid, ipst); 1933 } 1934 1935 /* 1936 * Looks up cache table for a route. 1937 * specific lookup can be indicated by 1938 * passing the MATCH_* flags and the 1939 * necessary parameters. 1940 */ 1941 ire_t * 1942 ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway, 1943 int type, const ipif_t *ipif, zoneid_t zoneid, const ts_label_t *tsl, 1944 int flags, ip_stack_t *ipst) 1945 { 1946 ire_t *ire; 1947 irb_t *irb_ptr; 1948 ASSERT(addr != NULL); 1949 ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 1950 1951 /* 1952 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1953 * MATCH_IRE_ILL is set. 1954 */ 1955 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1956 (ipif == NULL)) 1957 return (NULL); 1958 1959 irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 1960 ipst->ips_ip6_cache_table_size)]; 1961 rw_enter(&irb_ptr->irb_lock, RW_READER); 1962 for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 1963 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1964 continue; 1965 1966 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 1967 if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, gateway, 1968 type, ipif, zoneid, 0, tsl, flags)) { 1969 IRE_REFHOLD(ire); 1970 rw_exit(&irb_ptr->irb_lock); 1971 return (ire); 1972 } 1973 } 1974 rw_exit(&irb_ptr->irb_lock); 1975 return (NULL); 1976 } 1977 1978 /* 1979 * Lookup cache. Don't return IRE_MARK_HIDDEN entries. Callers 1980 * should use ire_ctable_lookup with MATCH_IRE_MARK_HIDDEN to get 1981 * to the hidden ones. 1982 * 1983 * In general the zoneid has to match (where ALL_ZONES match all of them). 1984 * But for IRE_LOCAL we also need to handle the case where L2 should 1985 * conceptually loop back the packet. This is necessary since neither 1986 * Ethernet drivers nor Ethernet hardware loops back packets sent to their 1987 * own MAC address. This loopback is needed when the normal 1988 * routes (ignoring IREs with different zoneids) would send out the packet on 1989 * the same ill (or ill group) as the ill with which this IRE_LOCAL is 1990 * associated. 1991 * 1992 * Earlier versions of this code always matched an IRE_LOCAL independently of 1993 * the zoneid. We preserve that earlier behavior when 1994 * ip_restrict_interzone_loopback is turned off. 1995 */ 1996 ire_t * 1997 ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid, 1998 const ts_label_t *tsl, ip_stack_t *ipst) 1999 { 2000 irb_t *irb_ptr; 2001 ire_t *ire; 2002 2003 irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 2004 ipst->ips_ip6_cache_table_size)]; 2005 rw_enter(&irb_ptr->irb_lock, RW_READER); 2006 for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 2007 if (ire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 2008 continue; 2009 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, addr)) { 2010 /* 2011 * Finally, check if the security policy has any 2012 * restriction on using this route for the specified 2013 * message. 2014 */ 2015 if (tsl != NULL && 2016 ire->ire_gw_secattr != NULL && 2017 tsol_ire_match_gwattr(ire, tsl) != 0) { 2018 continue; 2019 } 2020 2021 if (zoneid == ALL_ZONES || ire->ire_zoneid == zoneid || 2022 ire->ire_zoneid == ALL_ZONES) { 2023 IRE_REFHOLD(ire); 2024 rw_exit(&irb_ptr->irb_lock); 2025 return (ire); 2026 } 2027 2028 if (ire->ire_type == IRE_LOCAL) { 2029 if (ipst->ips_ip_restrict_interzone_loopback && 2030 !ire_local_ok_across_zones(ire, zoneid, 2031 (void *)addr, tsl, ipst)) 2032 continue; 2033 2034 IRE_REFHOLD(ire); 2035 rw_exit(&irb_ptr->irb_lock); 2036 return (ire); 2037 } 2038 } 2039 } 2040 rw_exit(&irb_ptr->irb_lock); 2041 return (NULL); 2042 } 2043 2044 /* 2045 * Locate the interface ire that is tied to the cache ire 'cire' via 2046 * cire->ire_ihandle. 2047 * 2048 * We are trying to create the cache ire for an onlink destn. or 2049 * gateway in 'cire'. We are called from ire_add_v6() in the IRE_IF_RESOLVER 2050 * case for xresolv interfaces, after the ire has come back from 2051 * an external resolver. 2052 */ 2053 static ire_t * 2054 ire_ihandle_lookup_onlink_v6(ire_t *cire) 2055 { 2056 ire_t *ire; 2057 int match_flags; 2058 int i; 2059 int j; 2060 irb_t *irb_ptr; 2061 ip_stack_t *ipst = cire->ire_ipst; 2062 2063 ASSERT(cire != NULL); 2064 2065 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2066 /* 2067 * We know that the mask of the interface ire equals cire->ire_cmask. 2068 * (When ip_newroute_v6() created 'cire' for an on-link destn. 2069 * it set its cmask from the interface ire's mask) 2070 */ 2071 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 2072 NULL, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle, 2073 NULL, match_flags, ipst); 2074 if (ire != NULL) 2075 return (ire); 2076 /* 2077 * If we didn't find an interface ire above, we can't declare failure. 2078 * For backwards compatibility, we need to support prefix routes 2079 * pointing to next hop gateways that are not on-link. 2080 * 2081 * In the resolver/noresolver case, ip_newroute_v6() thinks 2082 * it is creating the cache ire for an onlink destination in 'cire'. 2083 * But 'cire' is not actually onlink, because ire_ftable_lookup_v6() 2084 * cheated it, by doing ire_route_lookup_v6() twice and returning an 2085 * interface ire. 2086 * 2087 * Eg. default - gw1 (line 1) 2088 * gw1 - gw2 (line 2) 2089 * gw2 - hme0 (line 3) 2090 * 2091 * In the above example, ip_newroute_v6() tried to create the cache ire 2092 * 'cire' for gw1, based on the interface route in line 3. The 2093 * ire_ftable_lookup_v6() above fails, because there is 2094 * no interface route to reach gw1. (it is gw2). We fall thru below. 2095 * 2096 * Do a brute force search based on the ihandle in a subset of the 2097 * forwarding tables, corresponding to cire->ire_cmask_v6. Otherwise 2098 * things become very complex, since we don't have 'pire' in this 2099 * case. (Also note that this method is not possible in the offlink 2100 * case because we don't know the mask) 2101 */ 2102 i = ip_mask_to_plen_v6(&cire->ire_cmask_v6); 2103 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 2104 return (NULL); 2105 for (j = 0; j < ipst->ips_ip6_ftable_hash_size; j++) { 2106 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][j]; 2107 rw_enter(&irb_ptr->irb_lock, RW_READER); 2108 for (ire = irb_ptr->irb_ire; ire != NULL; 2109 ire = ire->ire_next) { 2110 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2111 continue; 2112 if ((ire->ire_type & IRE_INTERFACE) && 2113 (ire->ire_ihandle == cire->ire_ihandle)) { 2114 IRE_REFHOLD(ire); 2115 rw_exit(&irb_ptr->irb_lock); 2116 return (ire); 2117 } 2118 } 2119 rw_exit(&irb_ptr->irb_lock); 2120 } 2121 return (NULL); 2122 } 2123 2124 2125 /* 2126 * Locate the interface ire that is tied to the cache ire 'cire' via 2127 * cire->ire_ihandle. 2128 * 2129 * We are trying to create the cache ire for an offlink destn based 2130 * on the cache ire of the gateway in 'cire'. 'pire' is the prefix ire 2131 * as found by ip_newroute_v6(). We are called from ip_newroute_v6() in 2132 * the IRE_CACHE case. 2133 */ 2134 ire_t * 2135 ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire) 2136 { 2137 ire_t *ire; 2138 int match_flags; 2139 in6_addr_t gw_addr; 2140 ipif_t *gw_ipif; 2141 ip_stack_t *ipst = cire->ire_ipst; 2142 2143 ASSERT(cire != NULL && pire != NULL); 2144 2145 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2146 /* 2147 * ip_newroute_v6 calls ire_ftable_lookup with MATCH_IRE_ILL only 2148 * for on-link hosts. We should never be here for onlink. 2149 * Thus, use MATCH_IRE_ILL_GROUP. 2150 */ 2151 if (pire->ire_ipif != NULL) 2152 match_flags |= MATCH_IRE_ILL_GROUP; 2153 /* 2154 * We know that the mask of the interface ire equals cire->ire_cmask. 2155 * (When ip_newroute_v6() created 'cire' for an on-link destn. it set 2156 * its cmask from the interface ire's mask) 2157 */ 2158 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 0, 2159 IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2160 NULL, match_flags, ipst); 2161 if (ire != NULL) 2162 return (ire); 2163 /* 2164 * If we didn't find an interface ire above, we can't declare failure. 2165 * For backwards compatibility, we need to support prefix routes 2166 * pointing to next hop gateways that are not on-link. 2167 * 2168 * Assume we are trying to ping some offlink destn, and we have the 2169 * routing table below. 2170 * 2171 * Eg. default - gw1 <--- pire (line 1) 2172 * gw1 - gw2 (line 2) 2173 * gw2 - hme0 (line 3) 2174 * 2175 * If we already have a cache ire for gw1 in 'cire', the 2176 * ire_ftable_lookup_v6 above would have failed, since there is no 2177 * interface ire to reach gw1. We will fallthru below. 2178 * 2179 * Here we duplicate the steps that ire_ftable_lookup_v6() did in 2180 * getting 'cire' from 'pire', in the MATCH_IRE_RECURSIVE case. 2181 * The differences are the following 2182 * i. We want the interface ire only, so we call 2183 * ire_ftable_lookup_v6() instead of ire_route_lookup_v6() 2184 * ii. We look for only prefix routes in the 1st call below. 2185 * ii. We want to match on the ihandle in the 2nd call below. 2186 */ 2187 match_flags = MATCH_IRE_TYPE; 2188 if (pire->ire_ipif != NULL) 2189 match_flags |= MATCH_IRE_ILL_GROUP; 2190 2191 mutex_enter(&pire->ire_lock); 2192 gw_addr = pire->ire_gateway_addr_v6; 2193 mutex_exit(&pire->ire_lock); 2194 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_OFFSUBNET, 2195 pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst); 2196 if (ire == NULL) 2197 return (NULL); 2198 /* 2199 * At this point 'ire' corresponds to the entry shown in line 2. 2200 * gw_addr is 'gw2' in the example above. 2201 */ 2202 mutex_enter(&ire->ire_lock); 2203 gw_addr = ire->ire_gateway_addr_v6; 2204 mutex_exit(&ire->ire_lock); 2205 gw_ipif = ire->ire_ipif; 2206 ire_refrele(ire); 2207 2208 match_flags |= MATCH_IRE_IHANDLE; 2209 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_INTERFACE, 2210 gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2211 NULL, match_flags, ipst); 2212 return (ire); 2213 } 2214 2215 /* 2216 * Return the IRE_LOOPBACK, IRE_IF_RESOLVER or IRE_IF_NORESOLVER 2217 * ire associated with the specified ipif. 2218 * 2219 * This might occasionally be called when IPIF_UP is not set since 2220 * the IPV6_MULTICAST_IF as well as creating interface routes 2221 * allows specifying a down ipif (ipif_lookup* match ipifs that are down). 2222 * 2223 * Note that if IPIF_NOLOCAL, IPIF_NOXMIT, or IPIF_DEPRECATED is set on 2224 * the ipif this routine might return NULL. 2225 * (Sometimes called as writer though not required by this function.) 2226 */ 2227 ire_t * 2228 ipif_to_ire_v6(const ipif_t *ipif) 2229 { 2230 ire_t *ire; 2231 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2232 2233 ASSERT(ipif->ipif_isv6); 2234 if (ipif->ipif_ire_type == IRE_LOOPBACK) { 2235 ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, NULL, 2236 IRE_LOOPBACK, ipif, ALL_ZONES, NULL, 2237 (MATCH_IRE_TYPE | MATCH_IRE_IPIF), ipst); 2238 } else if (ipif->ipif_flags & IPIF_POINTOPOINT) { 2239 /* In this case we need to lookup destination address. */ 2240 ire = ire_ftable_lookup_v6(&ipif->ipif_v6pp_dst_addr, 2241 &ipv6_all_ones, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 2242 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 2243 MATCH_IRE_MASK), ipst); 2244 } else { 2245 ire = ire_ftable_lookup_v6(&ipif->ipif_v6subnet, 2246 &ipif->ipif_v6net_mask, NULL, IRE_INTERFACE, ipif, NULL, 2247 ALL_ZONES, 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 2248 MATCH_IRE_MASK), ipst); 2249 } 2250 return (ire); 2251 } 2252 2253 /* 2254 * Return B_TRUE if a multirt route is resolvable 2255 * (or if no route is resolved yet), B_FALSE otherwise. 2256 * This only works in the global zone. 2257 */ 2258 boolean_t 2259 ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl, 2260 ip_stack_t *ipst) 2261 { 2262 ire_t *first_fire; 2263 ire_t *first_cire; 2264 ire_t *fire; 2265 ire_t *cire; 2266 irb_t *firb; 2267 irb_t *cirb; 2268 int unres_cnt = 0; 2269 boolean_t resolvable = B_FALSE; 2270 2271 /* Retrieve the first IRE_HOST that matches the destination */ 2272 first_fire = ire_ftable_lookup_v6(v6dstp, &ipv6_all_ones, 0, IRE_HOST, 2273 NULL, NULL, ALL_ZONES, 0, tsl, MATCH_IRE_MASK | MATCH_IRE_TYPE | 2274 MATCH_IRE_SECATTR, ipst); 2275 2276 /* No route at all */ 2277 if (first_fire == NULL) { 2278 return (B_TRUE); 2279 } 2280 2281 firb = first_fire->ire_bucket; 2282 ASSERT(firb); 2283 2284 /* Retrieve the first IRE_CACHE ire for that destination. */ 2285 first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID, tsl, ipst); 2286 2287 /* No resolved route. */ 2288 if (first_cire == NULL) { 2289 ire_refrele(first_fire); 2290 return (B_TRUE); 2291 } 2292 2293 /* At least one route is resolved. */ 2294 2295 cirb = first_cire->ire_bucket; 2296 ASSERT(cirb); 2297 2298 /* Count the number of routes to that dest that are declared. */ 2299 IRB_REFHOLD(firb); 2300 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2301 if (!(fire->ire_flags & RTF_MULTIRT)) 2302 continue; 2303 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, v6dstp)) 2304 continue; 2305 unres_cnt++; 2306 } 2307 IRB_REFRELE(firb); 2308 2309 2310 /* Then subtract the number of routes to that dst that are resolved */ 2311 IRB_REFHOLD(cirb); 2312 for (cire = first_cire; cire != NULL; cire = cire->ire_next) { 2313 if (!(cire->ire_flags & RTF_MULTIRT)) 2314 continue; 2315 if (!IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, v6dstp)) 2316 continue; 2317 if (cire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 2318 continue; 2319 unres_cnt--; 2320 } 2321 IRB_REFRELE(cirb); 2322 2323 /* At least one route is unresolved; search for a resolvable route. */ 2324 if (unres_cnt > 0) 2325 resolvable = ire_multirt_lookup_v6(&first_cire, &first_fire, 2326 MULTIRT_USESTAMP|MULTIRT_CACHEGW, tsl, ipst); 2327 2328 if (first_fire) 2329 ire_refrele(first_fire); 2330 2331 if (first_cire) 2332 ire_refrele(first_cire); 2333 2334 return (resolvable); 2335 } 2336 2337 2338 /* 2339 * Return B_TRUE and update *ire_arg and *fire_arg 2340 * if at least one resolvable route is found. 2341 * Return B_FALSE otherwise (all routes are resolved or 2342 * the remaining unresolved routes are all unresolvable). 2343 * This only works in the global zone. 2344 */ 2345 boolean_t 2346 ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, 2347 const ts_label_t *tsl, ip_stack_t *ipst) 2348 { 2349 clock_t delta; 2350 ire_t *best_fire = NULL; 2351 ire_t *best_cire = NULL; 2352 ire_t *first_fire; 2353 ire_t *first_cire; 2354 ire_t *fire; 2355 ire_t *cire; 2356 irb_t *firb = NULL; 2357 irb_t *cirb = NULL; 2358 ire_t *gw_ire; 2359 boolean_t already_resolved; 2360 boolean_t res; 2361 in6_addr_t v6dst; 2362 in6_addr_t v6gw; 2363 2364 ip2dbg(("ire_multirt_lookup_v6: *ire_arg %p, *fire_arg %p, " 2365 "flags %04x\n", (void *)*ire_arg, (void *)*fire_arg, flags)); 2366 2367 ASSERT(ire_arg); 2368 ASSERT(fire_arg); 2369 2370 /* Not an IRE_HOST ire; give up. */ 2371 if ((*fire_arg == NULL) || 2372 ((*fire_arg)->ire_type != IRE_HOST)) { 2373 return (B_FALSE); 2374 } 2375 2376 /* This is the first IRE_HOST ire for that destination. */ 2377 first_fire = *fire_arg; 2378 firb = first_fire->ire_bucket; 2379 ASSERT(firb); 2380 2381 mutex_enter(&first_fire->ire_lock); 2382 v6dst = first_fire->ire_addr_v6; 2383 mutex_exit(&first_fire->ire_lock); 2384 2385 ip2dbg(("ire_multirt_lookup_v6: dst %08x\n", 2386 ntohl(V4_PART_OF_V6(v6dst)))); 2387 2388 /* 2389 * Retrieve the first IRE_CACHE ire for that destination; 2390 * if we don't find one, no route for that dest is 2391 * resolved yet. 2392 */ 2393 first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID, tsl, ipst); 2394 if (first_cire) { 2395 cirb = first_cire->ire_bucket; 2396 } 2397 2398 ip2dbg(("ire_multirt_lookup_v6: first_cire %p\n", (void *)first_cire)); 2399 2400 /* 2401 * Search for a resolvable route, giving the top priority 2402 * to routes that can be resolved without any call to the resolver. 2403 */ 2404 IRB_REFHOLD(firb); 2405 2406 if (!IN6_IS_ADDR_MULTICAST(&v6dst)) { 2407 /* 2408 * For all multiroute IRE_HOST ires for that destination, 2409 * check if the route via the IRE_HOST's gateway is 2410 * resolved yet. 2411 */ 2412 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2413 2414 if (!(fire->ire_flags & RTF_MULTIRT)) 2415 continue; 2416 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2417 continue; 2418 2419 if (fire->ire_gw_secattr != NULL && 2420 tsol_ire_match_gwattr(fire, tsl) != 0) { 2421 continue; 2422 } 2423 2424 mutex_enter(&fire->ire_lock); 2425 v6gw = fire->ire_gateway_addr_v6; 2426 mutex_exit(&fire->ire_lock); 2427 2428 ip2dbg(("ire_multirt_lookup_v6: fire %p, " 2429 "ire_addr %08x, ire_gateway_addr %08x\n", 2430 (void *)fire, 2431 ntohl(V4_PART_OF_V6(fire->ire_addr_v6)), 2432 ntohl(V4_PART_OF_V6(v6gw)))); 2433 2434 already_resolved = B_FALSE; 2435 2436 if (first_cire) { 2437 ASSERT(cirb); 2438 2439 IRB_REFHOLD(cirb); 2440 /* 2441 * For all IRE_CACHE ires for that 2442 * destination. 2443 */ 2444 for (cire = first_cire; 2445 cire != NULL; 2446 cire = cire->ire_next) { 2447 2448 if (!(cire->ire_flags & RTF_MULTIRT)) 2449 continue; 2450 if (!IN6_ARE_ADDR_EQUAL( 2451 &cire->ire_addr_v6, &v6dst)) 2452 continue; 2453 if (cire->ire_marks & 2454 (IRE_MARK_CONDEMNED| 2455 IRE_MARK_HIDDEN)) 2456 continue; 2457 2458 if (cire->ire_gw_secattr != NULL && 2459 tsol_ire_match_gwattr(cire, 2460 tsl) != 0) { 2461 continue; 2462 } 2463 2464 /* 2465 * Check if the IRE_CACHE's gateway 2466 * matches the IRE_HOST's gateway. 2467 */ 2468 if (IN6_ARE_ADDR_EQUAL( 2469 &cire->ire_gateway_addr_v6, 2470 &v6gw)) { 2471 already_resolved = B_TRUE; 2472 break; 2473 } 2474 } 2475 IRB_REFRELE(cirb); 2476 } 2477 2478 /* 2479 * This route is already resolved; 2480 * proceed with next one. 2481 */ 2482 if (already_resolved) { 2483 ip2dbg(("ire_multirt_lookup_v6: found cire %p, " 2484 "already resolved\n", (void *)cire)); 2485 continue; 2486 } 2487 2488 /* 2489 * The route is unresolved; is it actually 2490 * resolvable, i.e. is there a cache or a resolver 2491 * for the gateway? 2492 */ 2493 gw_ire = ire_route_lookup_v6(&v6gw, 0, 0, 0, NULL, NULL, 2494 ALL_ZONES, tsl, MATCH_IRE_RECURSIVE | 2495 MATCH_IRE_SECATTR, ipst); 2496 2497 ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n", 2498 (void *)gw_ire)); 2499 2500 /* 2501 * This route can be resolved without any call to the 2502 * resolver; if the MULTIRT_CACHEGW flag is set, 2503 * give the top priority to this ire and exit the 2504 * loop. 2505 * This occurs when an resolver reply is processed 2506 * through ip_wput_nondata() 2507 */ 2508 if ((flags & MULTIRT_CACHEGW) && 2509 (gw_ire != NULL) && 2510 (gw_ire->ire_type & IRE_CACHETABLE)) { 2511 /* 2512 * Release the resolver associated to the 2513 * previous candidate best ire, if any. 2514 */ 2515 if (best_cire) { 2516 ire_refrele(best_cire); 2517 ASSERT(best_fire); 2518 } 2519 2520 best_fire = fire; 2521 best_cire = gw_ire; 2522 2523 ip2dbg(("ire_multirt_lookup_v6: found top prio " 2524 "best_fire %p, best_cire %p\n", 2525 (void *)best_fire, (void *)best_cire)); 2526 break; 2527 } 2528 2529 /* 2530 * Compute the time elapsed since our preceding 2531 * attempt to resolve that route. 2532 * If the MULTIRT_USESTAMP flag is set, we take that 2533 * route into account only if this time interval 2534 * exceeds ip_multirt_resolution_interval; 2535 * this prevents us from attempting to resolve a 2536 * broken route upon each sending of a packet. 2537 */ 2538 delta = lbolt - fire->ire_last_used_time; 2539 delta = TICK_TO_MSEC(delta); 2540 2541 res = (boolean_t) 2542 ((delta > ipst-> 2543 ips_ip_multirt_resolution_interval) || 2544 (!(flags & MULTIRT_USESTAMP))); 2545 2546 ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, " 2547 "res %d\n", 2548 (void *)fire, delta, res)); 2549 2550 if (res) { 2551 /* 2552 * A resolver exists for the gateway: save 2553 * the current IRE_HOST ire as a candidate 2554 * best ire. If we later discover that a 2555 * top priority ire exists (i.e. no need to 2556 * call the resolver), then this new ire 2557 * will be preferred to the current one. 2558 */ 2559 if (gw_ire != NULL) { 2560 if (best_fire == NULL) { 2561 ASSERT(best_cire == NULL); 2562 2563 best_fire = fire; 2564 best_cire = gw_ire; 2565 2566 ip2dbg(("ire_multirt_lookup_v6:" 2567 "found candidate " 2568 "best_fire %p, " 2569 "best_cire %p\n", 2570 (void *)best_fire, 2571 (void *)best_cire)); 2572 2573 /* 2574 * If MULTIRT_CACHEGW is not 2575 * set, we ignore the top 2576 * priority ires that can 2577 * be resolved without any 2578 * call to the resolver; 2579 * In that case, there is 2580 * actually no need 2581 * to continue the loop. 2582 */ 2583 if (!(flags & 2584 MULTIRT_CACHEGW)) { 2585 break; 2586 } 2587 continue; 2588 } 2589 } else { 2590 /* 2591 * No resolver for the gateway: the 2592 * route is not resolvable. 2593 * If the MULTIRT_SETSTAMP flag is 2594 * set, we stamp the IRE_HOST ire, 2595 * so we will not select it again 2596 * during this resolution interval. 2597 */ 2598 if (flags & MULTIRT_SETSTAMP) 2599 fire->ire_last_used_time = 2600 lbolt; 2601 } 2602 } 2603 2604 if (gw_ire != NULL) 2605 ire_refrele(gw_ire); 2606 } 2607 } else { /* IN6_IS_ADDR_MULTICAST(&v6dst) */ 2608 2609 for (fire = first_fire; 2610 fire != NULL; 2611 fire = fire->ire_next) { 2612 2613 if (!(fire->ire_flags & RTF_MULTIRT)) 2614 continue; 2615 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2616 continue; 2617 2618 if (fire->ire_gw_secattr != NULL && 2619 tsol_ire_match_gwattr(fire, tsl) != 0) { 2620 continue; 2621 } 2622 2623 already_resolved = B_FALSE; 2624 2625 mutex_enter(&fire->ire_lock); 2626 v6gw = fire->ire_gateway_addr_v6; 2627 mutex_exit(&fire->ire_lock); 2628 2629 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2630 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, tsl, 2631 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE | 2632 MATCH_IRE_SECATTR, ipst); 2633 2634 /* No resolver for the gateway; we skip this ire. */ 2635 if (gw_ire == NULL) { 2636 continue; 2637 } 2638 2639 if (first_cire) { 2640 2641 IRB_REFHOLD(cirb); 2642 /* 2643 * For all IRE_CACHE ires for that 2644 * destination. 2645 */ 2646 for (cire = first_cire; 2647 cire != NULL; 2648 cire = cire->ire_next) { 2649 2650 if (!(cire->ire_flags & RTF_MULTIRT)) 2651 continue; 2652 if (!IN6_ARE_ADDR_EQUAL( 2653 &cire->ire_addr_v6, &v6dst)) 2654 continue; 2655 if (cire->ire_marks & 2656 (IRE_MARK_CONDEMNED| 2657 IRE_MARK_HIDDEN)) 2658 continue; 2659 2660 if (cire->ire_gw_secattr != NULL && 2661 tsol_ire_match_gwattr(cire, 2662 tsl) != 0) { 2663 continue; 2664 } 2665 2666 /* 2667 * Cache entries are linked to the 2668 * parent routes using the parent handle 2669 * (ire_phandle). If no cache entry has 2670 * the same handle as fire, fire is 2671 * still unresolved. 2672 */ 2673 ASSERT(cire->ire_phandle != 0); 2674 if (cire->ire_phandle == 2675 fire->ire_phandle) { 2676 already_resolved = B_TRUE; 2677 break; 2678 } 2679 } 2680 IRB_REFRELE(cirb); 2681 } 2682 2683 /* 2684 * This route is already resolved; proceed with 2685 * next one. 2686 */ 2687 if (already_resolved) { 2688 ire_refrele(gw_ire); 2689 continue; 2690 } 2691 2692 /* 2693 * Compute the time elapsed since our preceding 2694 * attempt to resolve that route. 2695 * If the MULTIRT_USESTAMP flag is set, we take 2696 * that route into account only if this time 2697 * interval exceeds ip_multirt_resolution_interval; 2698 * this prevents us from attempting to resolve a 2699 * broken route upon each sending of a packet. 2700 */ 2701 delta = lbolt - fire->ire_last_used_time; 2702 delta = TICK_TO_MSEC(delta); 2703 2704 res = (boolean_t) 2705 ((delta > ipst-> 2706 ips_ip_multirt_resolution_interval) || 2707 (!(flags & MULTIRT_USESTAMP))); 2708 2709 ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, " 2710 "flags %04x, res %d\n", 2711 (void *)fire, delta, flags, res)); 2712 2713 if (res) { 2714 if (best_cire) { 2715 /* 2716 * Release the resolver associated 2717 * to the preceding candidate best 2718 * ire, if any. 2719 */ 2720 ire_refrele(best_cire); 2721 ASSERT(best_fire); 2722 } 2723 best_fire = fire; 2724 best_cire = gw_ire; 2725 continue; 2726 } 2727 2728 ire_refrele(gw_ire); 2729 } 2730 } 2731 2732 if (best_fire) { 2733 IRE_REFHOLD(best_fire); 2734 } 2735 IRB_REFRELE(firb); 2736 2737 /* Release the first IRE_CACHE we initially looked up, if any. */ 2738 if (first_cire) 2739 ire_refrele(first_cire); 2740 2741 /* Found a resolvable route. */ 2742 if (best_fire) { 2743 ASSERT(best_cire); 2744 2745 if (*fire_arg) 2746 ire_refrele(*fire_arg); 2747 if (*ire_arg) 2748 ire_refrele(*ire_arg); 2749 2750 /* 2751 * Update the passed arguments with the 2752 * resolvable multirt route we found 2753 */ 2754 *fire_arg = best_fire; 2755 *ire_arg = best_cire; 2756 2757 ip2dbg(("ire_multirt_lookup_v6: returning B_TRUE, " 2758 "*fire_arg %p, *ire_arg %p\n", 2759 (void *)best_fire, (void *)best_cire)); 2760 2761 return (B_TRUE); 2762 } 2763 2764 ASSERT(best_cire == NULL); 2765 2766 ip2dbg(("ire_multirt_lookup_v6: returning B_FALSE, *fire_arg %p, " 2767 "*ire_arg %p\n", 2768 (void *)*fire_arg, (void *)*ire_arg)); 2769 2770 /* No resolvable route. */ 2771 return (B_FALSE); 2772 } 2773 2774 2775 /* 2776 * Find an IRE_OFFSUBNET IRE entry for the multicast address 'v6dstp' 2777 * that goes through 'ipif'. As a fallback, a route that goes through 2778 * ipif->ipif_ill can be returned. 2779 */ 2780 ire_t * 2781 ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp) 2782 { 2783 ire_t *ire; 2784 ire_t *save_ire = NULL; 2785 ire_t *gw_ire; 2786 irb_t *irb; 2787 in6_addr_t v6gw; 2788 int match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL; 2789 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2790 2791 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, NULL, ALL_ZONES, 0, 2792 NULL, MATCH_IRE_DEFAULT, ipst); 2793 2794 if (ire == NULL) 2795 return (NULL); 2796 2797 irb = ire->ire_bucket; 2798 ASSERT(irb); 2799 2800 IRB_REFHOLD(irb); 2801 ire_refrele(ire); 2802 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 2803 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) || 2804 (ipif->ipif_zoneid != ire->ire_zoneid && 2805 ire->ire_zoneid != ALL_ZONES)) { 2806 continue; 2807 } 2808 2809 switch (ire->ire_type) { 2810 case IRE_DEFAULT: 2811 case IRE_PREFIX: 2812 case IRE_HOST: 2813 mutex_enter(&ire->ire_lock); 2814 v6gw = ire->ire_gateway_addr_v6; 2815 mutex_exit(&ire->ire_lock); 2816 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2817 IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, 2818 NULL, match_flags, ipst); 2819 2820 if (gw_ire != NULL) { 2821 if (save_ire != NULL) { 2822 ire_refrele(save_ire); 2823 } 2824 IRE_REFHOLD(ire); 2825 if (gw_ire->ire_ipif == ipif) { 2826 ire_refrele(gw_ire); 2827 2828 IRB_REFRELE(irb); 2829 return (ire); 2830 } 2831 ire_refrele(gw_ire); 2832 save_ire = ire; 2833 } 2834 break; 2835 case IRE_IF_NORESOLVER: 2836 case IRE_IF_RESOLVER: 2837 if (ire->ire_ipif == ipif) { 2838 if (save_ire != NULL) { 2839 ire_refrele(save_ire); 2840 } 2841 IRE_REFHOLD(ire); 2842 2843 IRB_REFRELE(irb); 2844 return (ire); 2845 } 2846 break; 2847 } 2848 } 2849 IRB_REFRELE(irb); 2850 2851 return (save_ire); 2852 } 2853