1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 /* 32 * This file contains routines that manipulate Internet Routing Entries (IREs). 33 */ 34 #include <sys/types.h> 35 #include <sys/stream.h> 36 #include <sys/stropts.h> 37 #include <sys/ddi.h> 38 #include <sys/cmn_err.h> 39 40 #include <sys/systm.h> 41 #include <sys/param.h> 42 #include <sys/socket.h> 43 #include <net/if.h> 44 #include <net/route.h> 45 #include <netinet/in.h> 46 #include <net/if_dl.h> 47 #include <netinet/ip6.h> 48 #include <netinet/icmp6.h> 49 50 #include <inet/common.h> 51 #include <inet/mi.h> 52 #include <inet/ip.h> 53 #include <inet/ip6.h> 54 #include <inet/ip_ndp.h> 55 #include <inet/ip_if.h> 56 #include <inet/ip_ire.h> 57 #include <inet/ipclassifier.h> 58 #include <inet/nd.h> 59 #include <sys/kmem.h> 60 #include <sys/zone.h> 61 62 #include <sys/tsol/label.h> 63 #include <sys/tsol/tnet.h> 64 65 irb_t *ip_forwarding_table_v6[IP6_MASK_TABLE_SIZE]; 66 /* This is dynamically allocated in ip_ire_init */ 67 irb_t *ip_cache_table_v6; 68 static ire_t ire_null; 69 70 static ire_t *ire_ihandle_lookup_onlink_v6(ire_t *cire); 71 static void ire_report_ftable_v6(ire_t *ire, char *mp); 72 static void ire_report_ctable_v6(ire_t *ire, char *mp); 73 static boolean_t ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, 74 const in6_addr_t *mask, const in6_addr_t *gateway, int type, 75 const ipif_t *ipif, zoneid_t zoneid, uint32_t ihandle, 76 const ts_label_t *tsl, int match_flags); 77 78 /* 79 * Named Dispatch routine to produce a formatted report on all IREs. 80 * This report is accessed by using the ndd utility to "get" ND variable 81 * "ip_ire_status_v6". 82 */ 83 /* ARGSUSED */ 84 int 85 ip_ire_report_v6(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 86 { 87 zoneid_t zoneid; 88 89 (void) mi_mpprintf(mp, 90 "IRE " MI_COL_HDRPAD_STR 91 "rfq " MI_COL_HDRPAD_STR 92 "stq " MI_COL_HDRPAD_STR 93 " zone mxfrg rtt rtt_sd ssthresh ref " 94 "rtomax tstamp_ok wscale_ok ecn_ok pmtud_ok sack sendpipe recvpipe " 95 "in/out/forward type addr mask " 96 "src gateway"); 97 /* 98 * 01234567 01234567 01234567 12345 12345 12345 12345 12345678 123 99 * 123456 123456789 123456789 123456 12345678 1234 12345678 12345678 100 * in/out/forward xxxxxxxxxx 101 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 102 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 103 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 104 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 105 */ 106 107 /* 108 * Because of the ndd constraint, at most we can have 64K buffer 109 * to put in all IRE info. So to be more efficient, just 110 * allocate a 64K buffer here, assuming we need that large buffer. 111 * This should be OK as only root can do ndd /dev/ip. 112 */ 113 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 114 /* The following may work even if we cannot get a large buf. */ 115 (void) mi_mpprintf(mp, "<< Out of buffer >>\n"); 116 return (0); 117 } 118 zoneid = Q_TO_CONN(q)->conn_zoneid; 119 if (zoneid == GLOBAL_ZONEID) 120 zoneid = ALL_ZONES; 121 122 ire_walk_v6(ire_report_ftable_v6, (char *)mp->b_cont, zoneid); 123 ire_walk_v6(ire_report_ctable_v6, (char *)mp->b_cont, zoneid); 124 return (0); 125 } 126 127 /* 128 * ire_walk routine invoked for ip_ire_report_v6 for each IRE. 129 */ 130 static void 131 ire_report_ftable_v6(ire_t *ire, char *mp) 132 { 133 char buf1[INET6_ADDRSTRLEN]; 134 char buf2[INET6_ADDRSTRLEN]; 135 char buf3[INET6_ADDRSTRLEN]; 136 char buf4[INET6_ADDRSTRLEN]; 137 uint_t fo_pkt_count; 138 uint_t ib_pkt_count; 139 int ref; 140 in6_addr_t gw_addr_v6; 141 uint_t print_len, buf_len; 142 143 ASSERT(ire->ire_ipversion == IPV6_VERSION); 144 if (ire->ire_type & IRE_CACHETABLE) 145 return; 146 buf_len = ((mblk_t *)mp)->b_datap->db_lim - ((mblk_t *)mp)->b_wptr; 147 if (buf_len <= 0) 148 return; 149 150 /* Number of active references of this ire */ 151 ref = ire->ire_refcnt; 152 /* "inbound" to a non local address is a forward */ 153 ib_pkt_count = ire->ire_ib_pkt_count; 154 fo_pkt_count = 0; 155 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 156 if (!(ire->ire_type & (IRE_LOCAL|IRE_BROADCAST))) { 157 fo_pkt_count = ib_pkt_count; 158 ib_pkt_count = 0; 159 } 160 161 mutex_enter(&ire->ire_lock); 162 gw_addr_v6 = ire->ire_gateway_addr_v6; 163 mutex_exit(&ire->ire_lock); 164 165 print_len = snprintf((char *)((mblk_t *)mp)->b_wptr, buf_len, 166 MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR "%5d " 167 "%05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d " 168 "%04d %08d %08d %d/%d/%d %s\n\t%s\n\t%s\n\t%s\n\t%s\n", 169 (void *)ire, (void *)ire->ire_rfq, (void *)ire->ire_stq, 170 (int)ire->ire_zoneid, 171 ire->ire_max_frag, ire->ire_uinfo.iulp_rtt, 172 ire->ire_uinfo.iulp_rtt_sd, 173 ire->ire_uinfo.iulp_ssthresh, ref, 174 ire->ire_uinfo.iulp_rtomax, 175 (ire->ire_uinfo.iulp_tstamp_ok ? 1: 0), 176 (ire->ire_uinfo.iulp_wscale_ok ? 1: 0), 177 (ire->ire_uinfo.iulp_ecn_ok ? 1: 0), 178 (ire->ire_uinfo.iulp_pmtud_ok ? 1: 0), 179 ire->ire_uinfo.iulp_sack, 180 ire->ire_uinfo.iulp_spipe, ire->ire_uinfo.iulp_rpipe, 181 ib_pkt_count, ire->ire_ob_pkt_count, fo_pkt_count, 182 ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type), 183 inet_ntop(AF_INET6, &ire->ire_addr_v6, buf1, sizeof (buf1)), 184 inet_ntop(AF_INET6, &ire->ire_mask_v6, buf2, sizeof (buf2)), 185 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, buf3, sizeof (buf3)), 186 inet_ntop(AF_INET6, &gw_addr_v6, buf4, sizeof (buf4))); 187 if (print_len < buf_len) { 188 ((mblk_t *)mp)->b_wptr += print_len; 189 } else { 190 ((mblk_t *)mp)->b_wptr += buf_len; 191 } 192 } 193 194 /* ire_walk routine invoked for ip_ire_report_v6 for each IRE. */ 195 static void 196 ire_report_ctable_v6(ire_t *ire, char *mp) 197 { 198 char buf1[INET6_ADDRSTRLEN]; 199 char buf2[INET6_ADDRSTRLEN]; 200 char buf3[INET6_ADDRSTRLEN]; 201 char buf4[INET6_ADDRSTRLEN]; 202 uint_t fo_pkt_count; 203 uint_t ib_pkt_count; 204 int ref; 205 in6_addr_t gw_addr_v6; 206 uint_t print_len, buf_len; 207 208 if ((ire->ire_type & IRE_CACHETABLE) == 0) 209 return; 210 buf_len = ((mblk_t *)mp)->b_datap->db_lim - ((mblk_t *)mp)->b_wptr; 211 if (buf_len <= 0) 212 return; 213 214 /* Number of active references of this ire */ 215 ref = ire->ire_refcnt; 216 /* "inbound" to a non local address is a forward */ 217 ib_pkt_count = ire->ire_ib_pkt_count; 218 fo_pkt_count = 0; 219 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 220 if (ire->ire_type & IRE_LOCAL) { 221 fo_pkt_count = ib_pkt_count; 222 ib_pkt_count = 0; 223 } 224 225 mutex_enter(&ire->ire_lock); 226 gw_addr_v6 = ire->ire_gateway_addr_v6; 227 mutex_exit(&ire->ire_lock); 228 229 print_len = snprintf((char *)((mblk_t *)mp)->b_wptr, buf_len, 230 MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR "%5d " 231 "%05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d " 232 "%04d %08d %08d %d/%d/%d %s\n\t%s\n\t%s\n\t%s\n\t%s\n", 233 (void *)ire, (void *)ire->ire_rfq, (void *)ire->ire_stq, 234 (int)ire->ire_zoneid, 235 ire->ire_max_frag, ire->ire_uinfo.iulp_rtt, 236 ire->ire_uinfo.iulp_rtt_sd, ire->ire_uinfo.iulp_ssthresh, ref, 237 ire->ire_uinfo.iulp_rtomax, 238 (ire->ire_uinfo.iulp_tstamp_ok ? 1: 0), 239 (ire->ire_uinfo.iulp_wscale_ok ? 1: 0), 240 (ire->ire_uinfo.iulp_ecn_ok ? 1: 0), 241 (ire->ire_uinfo.iulp_pmtud_ok ? 1: 0), 242 ire->ire_uinfo.iulp_sack, 243 ire->ire_uinfo.iulp_spipe, ire->ire_uinfo.iulp_rpipe, 244 ib_pkt_count, ire->ire_ob_pkt_count, 245 fo_pkt_count, ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type), 246 inet_ntop(AF_INET6, &ire->ire_addr_v6, buf1, sizeof (buf1)), 247 inet_ntop(AF_INET6, &ire->ire_mask_v6, buf2, sizeof (buf2)), 248 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, buf3, sizeof (buf3)), 249 inet_ntop(AF_INET6, &gw_addr_v6, buf4, sizeof (buf4))); 250 if (print_len < buf_len) { 251 ((mblk_t *)mp)->b_wptr += print_len; 252 } else { 253 ((mblk_t *)mp)->b_wptr += buf_len; 254 } 255 } 256 257 258 /* 259 * Initialize the ire that is specific to IPv6 part and call 260 * ire_init_common to finish it. 261 */ 262 ire_t * 263 ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, 264 const in6_addr_t *v6mask, const in6_addr_t *v6src_addr, 265 const in6_addr_t *v6gateway, uint_t *max_fragp, 266 mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, 267 mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask, 268 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 269 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp) 270 { 271 272 /* 273 * Reject IRE security attribute creation/initialization 274 * if system is not running in Trusted mode. 275 */ 276 if ((gc != NULL || gcgrp != NULL) && !is_system_labeled()) 277 return (NULL); 278 279 if (fp_mp != NULL) { 280 /* 281 * We can't dupb() here as multiple threads could be 282 * calling dupb on the same mp which is incorrect. 283 * First dupb() should be called only by one thread. 284 */ 285 fp_mp = copyb(fp_mp); 286 if (fp_mp == NULL) 287 return (NULL); 288 } 289 290 if (dlureq_mp != NULL) { 291 /* 292 * We can't dupb() here as multiple threads could be 293 * calling dupb on the same mp which is incorrect. 294 * First dupb() should be called only by one thread. 295 */ 296 dlureq_mp = copyb(dlureq_mp); 297 if (dlureq_mp == NULL) { 298 if (fp_mp != NULL) 299 freeb(fp_mp); 300 return (NULL); 301 } 302 } 303 304 BUMP_IRE_STATS(ire_stats_v6, ire_stats_alloced); 305 ire->ire_addr_v6 = *v6addr; 306 307 if (v6src_addr != NULL) 308 ire->ire_src_addr_v6 = *v6src_addr; 309 if (v6mask != NULL) { 310 ire->ire_mask_v6 = *v6mask; 311 ire->ire_masklen = ip_mask_to_plen_v6(&ire->ire_mask_v6); 312 } 313 if (v6gateway != NULL) 314 ire->ire_gateway_addr_v6 = *v6gateway; 315 316 if (type == IRE_CACHE && v6cmask != NULL) 317 ire->ire_cmask_v6 = *v6cmask; 318 319 /* 320 * Multirouted packets need to have a fragment header added so that 321 * the receiver is able to discard duplicates according to their 322 * fragment identifier. 323 */ 324 if (type == IRE_CACHE && (flags & RTF_MULTIRT)) { 325 ire->ire_frag_flag = IPH_FRAG_HDR; 326 } 327 328 /* ire_init_common will free the mblks upon encountering any failure */ 329 if (!ire_init_common(ire, max_fragp, fp_mp, rfq, stq, type, dlureq_mp, 330 ipif, NULL, phandle, ihandle, flags, IPV6_VERSION, ulp_info, 331 gc, gcgrp)) 332 return (NULL); 333 334 return (ire); 335 } 336 337 /* 338 * Similar to ire_create_v6 except that it is called only when 339 * we want to allocate ire as an mblk e.g. we have a external 340 * resolver. Do we need this in IPv6 ? 341 */ 342 ire_t * 343 ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 344 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 345 mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, 346 mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask, 347 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 348 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp) 349 { 350 ire_t *ire; 351 ire_t *ret_ire; 352 mblk_t *mp; 353 354 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 355 356 /* Allocate the new IRE. */ 357 mp = allocb(sizeof (ire_t), BPRI_MED); 358 if (mp == NULL) { 359 ip1dbg(("ire_create_mp_v6: alloc failed\n")); 360 return (NULL); 361 } 362 363 ire = (ire_t *)mp->b_rptr; 364 mp->b_wptr = (uchar_t *)&ire[1]; 365 366 /* Start clean. */ 367 *ire = ire_null; 368 ire->ire_mp = mp; 369 mp->b_datap->db_type = IRE_DB_TYPE; 370 371 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 372 NULL, fp_mp, rfq, stq, type, dlureq_mp, ipif, v6cmask, phandle, 373 ihandle, flags, ulp_info, gc, gcgrp); 374 375 if (ret_ire == NULL) { 376 freeb(ire->ire_mp); 377 return (NULL); 378 } 379 return (ire); 380 } 381 382 /* 383 * ire_create_v6 is called to allocate and initialize a new IRE. 384 * 385 * NOTE : This is called as writer sometimes though not required 386 * by this function. 387 */ 388 ire_t * 389 ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 390 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 391 uint_t *max_fragp, mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, 392 mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask, 393 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 394 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp) 395 { 396 ire_t *ire; 397 ire_t *ret_ire; 398 399 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 400 401 ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP); 402 if (ire == NULL) { 403 ip1dbg(("ire_create_v6: alloc failed\n")); 404 return (NULL); 405 } 406 *ire = ire_null; 407 408 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 409 max_fragp, fp_mp, rfq, stq, type, dlureq_mp, ipif, v6cmask, phandle, 410 ihandle, flags, ulp_info, gc, gcgrp); 411 412 if (ret_ire == NULL) { 413 kmem_cache_free(ire_cache, ire); 414 return (NULL); 415 } 416 ASSERT(ret_ire == ire); 417 return (ire); 418 } 419 420 /* 421 * Find an IRE_INTERFACE for the multicast group. 422 * Allows different routes for multicast addresses 423 * in the unicast routing table (akin to FF::0/8 but could be more specific) 424 * which point at different interfaces. This is used when IPV6_MULTICAST_IF 425 * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't 426 * specify the interface to join on. 427 * 428 * Supports link-local addresses by following the ipif/ill when recursing. 429 */ 430 ire_t * 431 ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid) 432 { 433 ire_t *ire; 434 ipif_t *ipif = NULL; 435 int match_flags = MATCH_IRE_TYPE; 436 in6_addr_t gw_addr_v6; 437 438 ire = ire_ftable_lookup_v6(group, 0, 0, 0, NULL, NULL, 439 zoneid, 0, NULL, MATCH_IRE_DEFAULT); 440 441 /* We search a resolvable ire in case of multirouting. */ 442 if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) { 443 ire_t *cire = NULL; 444 /* 445 * If the route is not resolvable, the looked up ire 446 * may be changed here. In that case, ire_multirt_lookup() 447 * IRE_REFRELE the original ire and change it. 448 */ 449 (void) ire_multirt_lookup_v6(&cire, &ire, MULTIRT_CACHEGW, 450 NULL); 451 if (cire != NULL) 452 ire_refrele(cire); 453 } 454 if (ire == NULL) 455 return (NULL); 456 /* 457 * Make sure we follow ire_ipif. 458 * 459 * We need to determine the interface route through 460 * which the gateway will be reached. We don't really 461 * care which interface is picked if the interface is 462 * part of a group. 463 */ 464 if (ire->ire_ipif != NULL) { 465 ipif = ire->ire_ipif; 466 match_flags |= MATCH_IRE_ILL_GROUP; 467 } 468 469 switch (ire->ire_type) { 470 case IRE_DEFAULT: 471 case IRE_PREFIX: 472 case IRE_HOST: 473 mutex_enter(&ire->ire_lock); 474 gw_addr_v6 = ire->ire_gateway_addr_v6; 475 mutex_exit(&ire->ire_lock); 476 ire_refrele(ire); 477 ire = ire_ftable_lookup_v6(&gw_addr_v6, 0, 0, 478 IRE_INTERFACE, ipif, NULL, zoneid, 0, 479 NULL, match_flags); 480 return (ire); 481 case IRE_IF_NORESOLVER: 482 case IRE_IF_RESOLVER: 483 return (ire); 484 default: 485 ire_refrele(ire); 486 return (NULL); 487 } 488 } 489 490 /* 491 * Return any local address. We use this to target ourselves 492 * when the src address was specified as 'default'. 493 * Preference for IRE_LOCAL entries. 494 */ 495 ire_t * 496 ire_lookup_local_v6(zoneid_t zoneid) 497 { 498 ire_t *ire; 499 irb_t *irb; 500 ire_t *maybe = NULL; 501 int i; 502 503 for (i = 0; i < ip6_cache_table_size; i++) { 504 irb = &ip_cache_table_v6[i]; 505 if (irb->irb_ire == NULL) 506 continue; 507 rw_enter(&irb->irb_lock, RW_READER); 508 for (ire = irb->irb_ire; ire; ire = ire->ire_next) { 509 if ((ire->ire_marks & IRE_MARK_CONDEMNED) || 510 ire->ire_zoneid != zoneid && 511 ire->ire_zoneid != ALL_ZONES) 512 continue; 513 switch (ire->ire_type) { 514 case IRE_LOOPBACK: 515 if (maybe == NULL) { 516 IRE_REFHOLD(ire); 517 maybe = ire; 518 } 519 break; 520 case IRE_LOCAL: 521 if (maybe != NULL) { 522 ire_refrele(maybe); 523 } 524 IRE_REFHOLD(ire); 525 rw_exit(&irb->irb_lock); 526 return (ire); 527 } 528 } 529 rw_exit(&irb->irb_lock); 530 } 531 return (maybe); 532 } 533 534 /* 535 * This function takes a mask and returns number of bits set in the 536 * mask (the represented prefix length). Assumes a contiguous mask. 537 */ 538 int 539 ip_mask_to_plen_v6(const in6_addr_t *v6mask) 540 { 541 int bits; 542 int plen = IPV6_ABITS; 543 int i; 544 545 for (i = 3; i >= 0; i--) { 546 if (v6mask->s6_addr32[i] == 0) { 547 plen -= 32; 548 continue; 549 } 550 bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1; 551 if (bits == 0) 552 break; 553 plen -= bits; 554 } 555 556 return (plen); 557 } 558 559 /* 560 * Convert a prefix length to the mask for that prefix. 561 * Returns the argument bitmask. 562 */ 563 in6_addr_t * 564 ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask) 565 { 566 uint32_t *ptr; 567 568 if (plen < 0 || plen > IPV6_ABITS) 569 return (NULL); 570 *bitmask = ipv6_all_zeros; 571 572 ptr = (uint32_t *)bitmask; 573 while (plen > 32) { 574 *ptr++ = 0xffffffffU; 575 plen -= 32; 576 } 577 *ptr = htonl(0xffffffffU << (32 - plen)); 578 return (bitmask); 579 } 580 581 /* 582 * Add a fully initialized IRE to an appropriate 583 * table based on ire_type. 584 * 585 * The forward table contains IRE_PREFIX/IRE_HOST/IRE_HOST and 586 * IRE_IF_RESOLVER/IRE_IF_NORESOLVER and IRE_DEFAULT. 587 * 588 * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK 589 * and IRE_CACHE. 590 * 591 * NOTE : This function is called as writer though not required 592 * by this function. 593 */ 594 int 595 ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) 596 { 597 ire_t *ire1; 598 int mask_table_index; 599 irb_t *irb_ptr; 600 ire_t **irep; 601 int flags; 602 ire_t *pire = NULL; 603 ill_t *stq_ill; 604 boolean_t ndp_g_lock_held = B_FALSE; 605 ire_t *ire = *ire_p; 606 int error; 607 608 ASSERT(ire->ire_ipversion == IPV6_VERSION); 609 ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */ 610 ASSERT(ire->ire_nce == NULL); 611 612 /* Find the appropriate list head. */ 613 switch (ire->ire_type) { 614 case IRE_HOST: 615 ire->ire_mask_v6 = ipv6_all_ones; 616 ire->ire_masklen = IPV6_ABITS; 617 if ((ire->ire_flags & RTF_SETSRC) == 0) 618 ire->ire_src_addr_v6 = ipv6_all_zeros; 619 break; 620 case IRE_CACHE: 621 case IRE_LOCAL: 622 case IRE_LOOPBACK: 623 ire->ire_mask_v6 = ipv6_all_ones; 624 ire->ire_masklen = IPV6_ABITS; 625 break; 626 case IRE_PREFIX: 627 if ((ire->ire_flags & RTF_SETSRC) == 0) 628 ire->ire_src_addr_v6 = ipv6_all_zeros; 629 break; 630 case IRE_DEFAULT: 631 if ((ire->ire_flags & RTF_SETSRC) == 0) 632 ire->ire_src_addr_v6 = ipv6_all_zeros; 633 break; 634 case IRE_IF_RESOLVER: 635 case IRE_IF_NORESOLVER: 636 break; 637 default: 638 printf("ire_add_v6: ire %p has unrecognized IRE type (%d)\n", 639 (void *)ire, ire->ire_type); 640 ire_delete(ire); 641 *ire_p = NULL; 642 return (EINVAL); 643 } 644 645 /* Make sure the address is properly masked. */ 646 V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6); 647 648 if ((ire->ire_type & IRE_CACHETABLE) == 0) { 649 /* IRE goes into Forward Table */ 650 mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6); 651 if ((ip_forwarding_table_v6[mask_table_index]) == NULL) { 652 irb_t *ptr; 653 int i; 654 655 ptr = (irb_t *)mi_zalloc((ip6_ftable_hash_size * 656 sizeof (irb_t))); 657 if (ptr == NULL) { 658 ire_delete(ire); 659 *ire_p = NULL; 660 return (ENOMEM); 661 } 662 for (i = 0; i < ip6_ftable_hash_size; i++) { 663 rw_init(&ptr[i].irb_lock, NULL, 664 RW_DEFAULT, NULL); 665 } 666 mutex_enter(&ire_ft_init_lock); 667 if (ip_forwarding_table_v6[mask_table_index] == NULL) { 668 ip_forwarding_table_v6[mask_table_index] = ptr; 669 mutex_exit(&ire_ft_init_lock); 670 } else { 671 /* 672 * Some other thread won the race in 673 * initializing the forwarding table at the 674 * same index. 675 */ 676 mutex_exit(&ire_ft_init_lock); 677 for (i = 0; i < ip6_ftable_hash_size; i++) { 678 rw_destroy(&ptr[i].irb_lock); 679 } 680 mi_free(ptr); 681 } 682 } 683 irb_ptr = &(ip_forwarding_table_v6[mask_table_index][ 684 IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6, 685 ip6_ftable_hash_size)]); 686 } else { 687 irb_ptr = &(ip_cache_table_v6[IRE_ADDR_HASH_V6( 688 ire->ire_addr_v6, ip6_cache_table_size)]); 689 } 690 /* 691 * For xresolv interfaces (v6 interfaces with an external 692 * address resolver), ip_newroute_v6/ip_newroute_ipif_v6 693 * are unable to prevent the deletion of the interface route 694 * while adding an IRE_CACHE for an on-link destination 695 * in the IRE_IF_RESOLVER case, since the ire has to go to 696 * the external resolver and return. We can't do a REFHOLD on the 697 * associated interface ire for fear of the message being freed 698 * if the external resolver can't resolve the address. 699 * Here we look up the interface ire in the forwarding table 700 * and make sure that the interface route has not been deleted. 701 */ 702 if (ire->ire_type == IRE_CACHE && 703 IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6) && 704 (((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) && 705 (((ill_t *)ire->ire_stq->q_ptr)->ill_flags & ILLF_XRESOLV)) { 706 707 pire = ire_ihandle_lookup_onlink_v6(ire); 708 if (pire == NULL) { 709 ire_delete(ire); 710 *ire_p = NULL; 711 return (EINVAL); 712 } 713 /* Prevent pire from getting deleted */ 714 IRB_REFHOLD(pire->ire_bucket); 715 /* Has it been removed already? */ 716 if (pire->ire_marks & IRE_MARK_CONDEMNED) { 717 IRB_REFRELE(pire->ire_bucket); 718 ire_refrele(pire); 719 ire_delete(ire); 720 *ire_p = NULL; 721 return (EINVAL); 722 } 723 } 724 725 flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW); 726 /* 727 * For IRE_CACHES, MATCH_IRE_IPIF is not enough to check 728 * for duplicates because : 729 * 730 * 1) ire_ipif->ipif_ill and ire_stq->q_ptr could be 731 * pointing at different ills. A real duplicate is 732 * a match on both ire_ipif and ire_stq. 733 * 734 * 2) We could have multiple packets trying to create 735 * an IRE_CACHE for the same ill. 736 * 737 * Moreover, IPIF_NOFAILOVER and IPV6_BOUND_PIF endpoints wants 738 * to go out on a particular ill. Rather than looking at the 739 * packet, we depend on the above for MATCH_IRE_ILL here. 740 * 741 * Unlike IPv4, MATCH_IRE_IPIF is needed here as we could have 742 * multiple IRE_CACHES for an ill for the same destination 743 * with various scoped addresses i.e represented by ipifs. 744 * 745 * MATCH_IRE_ILL is done implicitly below for IRE_CACHES. 746 */ 747 if (ire->ire_ipif != NULL) 748 flags |= MATCH_IRE_IPIF; 749 /* 750 * If we are creating hidden ires, make sure we search on 751 * this ill (MATCH_IRE_ILL) and a hidden ire, while we are 752 * searching for duplicates below. Otherwise we could 753 * potentially find an IRE on some other interface 754 * and it may not be a IRE marked with IRE_MARK_HIDDEN. We 755 * shouldn't do this as this will lead to an infinite loop as 756 * eventually we need an hidden ire for this packet to go 757 * out. MATCH_IRE_ILL is already marked above. 758 */ 759 if (ire->ire_marks & IRE_MARK_HIDDEN) { 760 ASSERT(ire->ire_type == IRE_CACHE); 761 flags |= MATCH_IRE_MARK_HIDDEN; 762 } 763 764 /* 765 * Start the atomic add of the ire. Grab the ill locks, 766 * ill_g_usesrc_lock and the bucket lock. Check for condemned. 767 * To avoid lock order problems, get the ndp6.ndp_g_lock now itself. 768 */ 769 if (ire->ire_type == IRE_CACHE) { 770 mutex_enter(&ndp6.ndp_g_lock); 771 ndp_g_lock_held = B_TRUE; 772 } 773 774 /* 775 * If ipif or ill is changing ire_atomic_start() may queue the 776 * request and return EINPROGRESS. 777 */ 778 779 error = ire_atomic_start(irb_ptr, ire, q, mp, func); 780 if (error != 0) { 781 if (ndp_g_lock_held) 782 mutex_exit(&ndp6.ndp_g_lock); 783 /* 784 * We don't know whether it is a valid ipif or not. 785 * So, set it to NULL. This assumes that the ire has not added 786 * a reference to the ipif. 787 */ 788 ire->ire_ipif = NULL; 789 ire_delete(ire); 790 if (pire != NULL) { 791 IRB_REFRELE(pire->ire_bucket); 792 ire_refrele(pire); 793 } 794 *ire_p = NULL; 795 return (error); 796 } 797 /* 798 * To avoid creating ires having stale values for the ire_max_frag 799 * we get the latest value atomically here. For more details 800 * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE 801 * in ip_rput_dlpi_writer 802 */ 803 if (ire->ire_max_fragp == NULL) { 804 if (IN6_IS_ADDR_MULTICAST(&ire->ire_addr_v6)) 805 ire->ire_max_frag = ire->ire_ipif->ipif_mtu; 806 else 807 ire->ire_max_frag = pire->ire_max_frag; 808 } else { 809 uint_t max_frag; 810 811 max_frag = *ire->ire_max_fragp; 812 ire->ire_max_fragp = NULL; 813 ire->ire_max_frag = max_frag; 814 } 815 816 /* 817 * Atomically check for duplicate and insert in the table. 818 */ 819 for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 820 if (ire1->ire_marks & IRE_MARK_CONDEMNED) 821 continue; 822 823 if (ire->ire_type == IRE_CACHE) { 824 /* 825 * We do MATCH_IRE_ILL implicitly here for IRE_CACHES. 826 * As ire_ipif and ire_stq could point to two 827 * different ills, we can't pass just ire_ipif to 828 * ire_match_args and get a match on both ills. 829 * This is just needed for duplicate checks here and 830 * so we don't add an extra argument to 831 * ire_match_args for this. Do it locally. 832 * 833 * NOTE : Currently there is no part of the code 834 * that asks for both MATH_IRE_IPIF and MATCH_IRE_ILL 835 * match for IRE_CACHEs. Thus we don't want to 836 * extend the arguments to ire_match_args_v6. 837 */ 838 if (ire1->ire_stq != ire->ire_stq) 839 continue; 840 /* 841 * Multiroute IRE_CACHEs for a given destination can 842 * have the same ire_ipif, typically if their source 843 * address is forced using RTF_SETSRC, and the same 844 * send-to queue. We differentiate them using the parent 845 * handle. 846 */ 847 if ((ire1->ire_flags & RTF_MULTIRT) && 848 (ire->ire_flags & RTF_MULTIRT) && 849 (ire1->ire_phandle != ire->ire_phandle)) 850 continue; 851 } 852 if (ire1->ire_zoneid != ire->ire_zoneid) 853 continue; 854 if (ire_match_args_v6(ire1, &ire->ire_addr_v6, 855 &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, 856 ire->ire_type, ire->ire_ipif, ire->ire_zoneid, 0, NULL, 857 flags)) { 858 /* 859 * Return the old ire after doing a REFHOLD. 860 * As most of the callers continue to use the IRE 861 * after adding, we return a held ire. This will 862 * avoid a lookup in the caller again. If the callers 863 * don't want to use it, they need to do a REFRELE. 864 */ 865 ip1dbg(("found dup ire existing %p new %p", 866 (void *)ire1, (void *)ire)); 867 IRE_REFHOLD(ire1); 868 if (ndp_g_lock_held) 869 mutex_exit(&ndp6.ndp_g_lock); 870 ire_atomic_end(irb_ptr, ire); 871 ire_delete(ire); 872 if (pire != NULL) { 873 /* 874 * Assert that it is 875 * not yet removed from the list. 876 */ 877 ASSERT(pire->ire_ptpn != NULL); 878 IRB_REFRELE(pire->ire_bucket); 879 ire_refrele(pire); 880 } 881 *ire_p = ire1; 882 return (0); 883 } 884 } 885 if (ire->ire_type == IRE_CACHE) { 886 in6_addr_t gw_addr_v6; 887 ill_t *ill = ire_to_ill(ire); 888 char buf[INET6_ADDRSTRLEN]; 889 nce_t *nce; 890 891 /* 892 * All IRE_CACHE types must have a nce. If this is 893 * not the case the entry will not be added. We need 894 * to make sure that if somebody deletes the nce 895 * after we looked up, they will find this ire and 896 * delete the ire. To delete this ire one needs the 897 * bucket lock which we are still holding here. So, 898 * even if the nce gets deleted after we looked up, 899 * this ire will get deleted. 900 * 901 * NOTE : Don't need the ire_lock for accessing 902 * ire_gateway_addr_v6 as it is appearing first 903 * time on the list and rts_setgwr_v6 could not 904 * be changing this. 905 */ 906 gw_addr_v6 = ire->ire_gateway_addr_v6; 907 if (IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 908 nce = ndp_lookup_v6(ill, &ire->ire_addr_v6, B_TRUE); 909 } else { 910 nce = ndp_lookup_v6(ill, &gw_addr_v6, B_TRUE); 911 } 912 if (nce == NULL) 913 goto failed; 914 915 /* Pair of refhold, refrele just to get the tracing right */ 916 NCE_REFHOLD_TO_REFHOLD_NOTR(nce); 917 /* 918 * Atomically make sure that new IREs don't point 919 * to an NCE that is logically deleted (CONDEMNED). 920 * ndp_delete() first marks the NCE CONDEMNED. 921 * This ensures that the nce_refcnt won't increase 922 * due to new nce_lookups or due to addition of new IREs 923 * pointing to this NCE. Then ndp_delete() cleans up 924 * existing references. If we don't do it atomically here, 925 * ndp_delete() -> nce_ire_delete() will not be able to 926 * clean up the IRE list completely, and the nce_refcnt 927 * won't go down to zero. 928 */ 929 mutex_enter(&nce->nce_lock); 930 if (ill->ill_flags & ILLF_XRESOLV) { 931 /* 932 * If we used an external resolver, we may not 933 * have gone through neighbor discovery to get here. 934 * Must update the nce_state before the next check. 935 */ 936 if (nce->nce_state == ND_INCOMPLETE) 937 nce->nce_state = ND_REACHABLE; 938 } 939 if (nce->nce_state == ND_INCOMPLETE || 940 (nce->nce_flags & NCE_F_CONDEMNED) || 941 (nce->nce_state == ND_UNREACHABLE)) { 942 failed: 943 if (ndp_g_lock_held) 944 mutex_exit(&ndp6.ndp_g_lock); 945 if (nce != NULL) 946 mutex_exit(&nce->nce_lock); 947 ire_atomic_end(irb_ptr, ire); 948 ip1dbg(("ire_add_v6: No nce for dst %s \n", 949 inet_ntop(AF_INET6, &ire->ire_addr_v6, 950 buf, sizeof (buf)))); 951 ire_delete(ire); 952 if (pire != NULL) { 953 /* 954 * Assert that it is 955 * not yet removed from the list. 956 */ 957 ASSERT(pire->ire_ptpn != NULL); 958 IRB_REFRELE(pire->ire_bucket); 959 ire_refrele(pire); 960 } 961 if (nce != NULL) 962 NCE_REFRELE_NOTR(nce); 963 *ire_p = NULL; 964 return (EINVAL); 965 } else { 966 ire->ire_nce = nce; 967 } 968 mutex_exit(&nce->nce_lock); 969 } 970 /* 971 * Find the first entry that matches ire_addr - provides 972 * tail insertion. *irep will be null if no match. 973 */ 974 irep = (ire_t **)irb_ptr; 975 while ((ire1 = *irep) != NULL && 976 !IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6)) 977 irep = &ire1->ire_next; 978 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 979 980 if (*irep != NULL) { 981 /* 982 * Find the last ire which matches ire_addr_v6. 983 * Needed to do tail insertion among entries with the same 984 * ire_addr_v6. 985 */ 986 while (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 987 &ire1->ire_addr_v6)) { 988 irep = &ire1->ire_next; 989 ire1 = *irep; 990 if (ire1 == NULL) 991 break; 992 } 993 } 994 995 if (ire->ire_type == IRE_DEFAULT) { 996 /* 997 * We keep a count of default gateways which is used when 998 * assigning them as routes. 999 */ 1000 ipv6_ire_default_count++; 1001 ASSERT(ipv6_ire_default_count != 0); /* Wraparound */ 1002 } 1003 /* Insert at *irep */ 1004 ire1 = *irep; 1005 if (ire1 != NULL) 1006 ire1->ire_ptpn = &ire->ire_next; 1007 ire->ire_next = ire1; 1008 /* Link the new one in. */ 1009 ire->ire_ptpn = irep; 1010 /* 1011 * ire_walk routines de-reference ire_next without holding 1012 * a lock. Before we point to the new ire, we want to make 1013 * sure the store that sets the ire_next of the new ire 1014 * reaches global visibility, so that ire_walk routines 1015 * don't see a truncated list of ires i.e if the ire_next 1016 * of the new ire gets set after we do "*irep = ire" due 1017 * to re-ordering, the ire_walk thread will see a NULL 1018 * once it accesses the ire_next of the new ire. 1019 * membar_producer() makes sure that the following store 1020 * happens *after* all of the above stores. 1021 */ 1022 membar_producer(); 1023 *irep = ire; 1024 ire->ire_bucket = irb_ptr; 1025 /* 1026 * We return a bumped up IRE above. Keep it symmetrical 1027 * so that the callers will always have to release. This 1028 * helps the callers of this function because they continue 1029 * to use the IRE after adding and hence they don't have to 1030 * lookup again after we return the IRE. 1031 * 1032 * NOTE : We don't have to use atomics as this is appearing 1033 * in the list for the first time and no one else can bump 1034 * up the reference count on this yet. 1035 */ 1036 IRE_REFHOLD_LOCKED(ire); 1037 BUMP_IRE_STATS(ire_stats_v6, ire_stats_inserted); 1038 irb_ptr->irb_ire_cnt++; 1039 if (ire->ire_marks & IRE_MARK_TEMPORARY) 1040 irb_ptr->irb_tmp_ire_cnt++; 1041 1042 if (ire->ire_ipif != NULL) { 1043 ire->ire_ipif->ipif_ire_cnt++; 1044 if (ire->ire_stq != NULL) { 1045 stq_ill = (ill_t *)ire->ire_stq->q_ptr; 1046 stq_ill->ill_ire_cnt++; 1047 } 1048 } else { 1049 ASSERT(ire->ire_stq == NULL); 1050 } 1051 1052 if (ndp_g_lock_held) 1053 mutex_exit(&ndp6.ndp_g_lock); 1054 ire_atomic_end(irb_ptr, ire); 1055 1056 if (pire != NULL) { 1057 /* Assert that it is not removed from the list yet */ 1058 ASSERT(pire->ire_ptpn != NULL); 1059 IRB_REFRELE(pire->ire_bucket); 1060 ire_refrele(pire); 1061 } 1062 1063 if (ire->ire_type != IRE_CACHE) { 1064 /* 1065 * For ire's with with host mask see if there is an entry 1066 * in the cache. If there is one flush the whole cache as 1067 * there might be multiple entries due to RTF_MULTIRT (CGTP). 1068 * If no entry is found than there is no need to flush the 1069 * cache. 1070 */ 1071 1072 if (ip_mask_to_plen_v6(&ire->ire_mask_v6) == IPV6_ABITS) { 1073 ire_t *lire; 1074 lire = ire_ctable_lookup_v6(&ire->ire_addr_v6, NULL, 1075 IRE_CACHE, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); 1076 if (lire != NULL) { 1077 ire_refrele(lire); 1078 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 1079 } 1080 } else { 1081 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 1082 } 1083 } 1084 1085 *ire_p = ire; 1086 return (0); 1087 } 1088 1089 /* 1090 * Search for all HOST REDIRECT routes that are 1091 * pointing at the specified gateway and 1092 * delete them. This routine is called only 1093 * when a default gateway is going away. 1094 */ 1095 static void 1096 ire_delete_host_redirects_v6(const in6_addr_t *gateway) 1097 { 1098 irb_t *irb_ptr; 1099 irb_t *irb; 1100 ire_t *ire; 1101 in6_addr_t gw_addr_v6; 1102 int i; 1103 1104 /* get the hash table for HOST routes */ 1105 irb_ptr = ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)]; 1106 if (irb_ptr == NULL) 1107 return; 1108 for (i = 0; (i < ip6_ftable_hash_size); i++) { 1109 irb = &irb_ptr[i]; 1110 IRB_REFHOLD(irb); 1111 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 1112 if (!(ire->ire_flags & RTF_DYNAMIC)) 1113 continue; 1114 mutex_enter(&ire->ire_lock); 1115 gw_addr_v6 = ire->ire_gateway_addr_v6; 1116 mutex_exit(&ire->ire_lock); 1117 if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) 1118 ire_delete(ire); 1119 } 1120 IRB_REFRELE(irb); 1121 } 1122 } 1123 1124 /* 1125 * Delete all the cache entries with this 'addr'. This is the IPv6 counterpart 1126 * of ip_ire_clookup_and_delete. The difference being this function does not 1127 * return any value. IPv6 processing of a gratuitous ARP, as it stands, is 1128 * different than IPv4 in that, regardless of the presence of a cache entry 1129 * for this address, an ire_walk_v6 is done. Another difference is that unlike 1130 * in the case of IPv4 this does not take an ipif_t argument, since it is only 1131 * called by ip_arp_news and the match is always only on the address. 1132 */ 1133 void 1134 ip_ire_clookup_and_delete_v6(const in6_addr_t *addr) 1135 { 1136 irb_t *irb; 1137 ire_t *cire; 1138 boolean_t found = B_FALSE; 1139 1140 irb = &ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, ip6_cache_table_size)]; 1141 IRB_REFHOLD(irb); 1142 for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) { 1143 if (cire->ire_marks == IRE_MARK_CONDEMNED) 1144 continue; 1145 if (IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, addr)) { 1146 1147 /* This signifies start of a match */ 1148 if (!found) 1149 found = B_TRUE; 1150 if (cire->ire_type == IRE_CACHE) { 1151 if (cire->ire_nce != NULL) 1152 ndp_delete(cire->ire_nce); 1153 ire_delete_v6(cire); 1154 } 1155 /* End of the match */ 1156 } else if (found) 1157 break; 1158 } 1159 IRB_REFRELE(irb); 1160 } 1161 1162 /* 1163 * Delete the specified IRE. 1164 * All calls should use ire_delete(). 1165 * Sometimes called as writer though not required by this function. 1166 * 1167 * NOTE : This function is called only if the ire was added 1168 * in the list. 1169 */ 1170 void 1171 ire_delete_v6(ire_t *ire) 1172 { 1173 in6_addr_t gw_addr_v6; 1174 1175 ASSERT(ire->ire_refcnt >= 1); 1176 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1177 1178 if (ire->ire_type != IRE_CACHE) 1179 ire_flush_cache_v6(ire, IRE_FLUSH_DELETE); 1180 if (ire->ire_type == IRE_DEFAULT) { 1181 /* 1182 * when a default gateway is going away 1183 * delete all the host redirects pointing at that 1184 * gateway. 1185 */ 1186 mutex_enter(&ire->ire_lock); 1187 gw_addr_v6 = ire->ire_gateway_addr_v6; 1188 mutex_exit(&ire->ire_lock); 1189 ire_delete_host_redirects_v6(&gw_addr_v6); 1190 } 1191 } 1192 1193 /* 1194 * ire_walk routine to delete all IRE_CACHE and IRE_HOST type redirect 1195 * entries. 1196 */ 1197 /*ARGSUSED1*/ 1198 void 1199 ire_delete_cache_v6(ire_t *ire, char *arg) 1200 { 1201 char addrstr1[INET6_ADDRSTRLEN]; 1202 char addrstr2[INET6_ADDRSTRLEN]; 1203 1204 if ((ire->ire_type & IRE_CACHE) || 1205 (ire->ire_flags & RTF_DYNAMIC)) { 1206 ip1dbg(("ire_delete_cache_v6: deleted %s type %d through %s\n", 1207 inet_ntop(AF_INET6, &ire->ire_addr_v6, 1208 addrstr1, sizeof (addrstr1)), 1209 ire->ire_type, 1210 inet_ntop(AF_INET6, &ire->ire_gateway_addr_v6, 1211 addrstr2, sizeof (addrstr2)))); 1212 ire_delete(ire); 1213 } 1214 1215 } 1216 1217 /* 1218 * ire_walk routine to delete all IRE_CACHE/IRE_HOST type redirect entries 1219 * that have a given gateway address. 1220 */ 1221 void 1222 ire_delete_cache_gw_v6(ire_t *ire, char *addr) 1223 { 1224 in6_addr_t *gw_addr = (in6_addr_t *)addr; 1225 char buf1[INET6_ADDRSTRLEN]; 1226 char buf2[INET6_ADDRSTRLEN]; 1227 in6_addr_t ire_gw_addr_v6; 1228 1229 if (!(ire->ire_type & IRE_CACHE) && 1230 !(ire->ire_flags & RTF_DYNAMIC)) 1231 return; 1232 1233 mutex_enter(&ire->ire_lock); 1234 ire_gw_addr_v6 = ire->ire_gateway_addr_v6; 1235 mutex_exit(&ire->ire_lock); 1236 1237 if (IN6_ARE_ADDR_EQUAL(&ire_gw_addr_v6, gw_addr)) { 1238 ip1dbg(("ire_delete_cache_gw_v6: deleted %s type %d to %s\n", 1239 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, 1240 buf1, sizeof (buf1)), 1241 ire->ire_type, 1242 inet_ntop(AF_INET6, &ire_gw_addr_v6, 1243 buf2, sizeof (buf2)))); 1244 ire_delete(ire); 1245 } 1246 } 1247 1248 /* 1249 * Remove all IRE_CACHE entries that match 1250 * the ire specified. (Sometimes called 1251 * as writer though not required by this function.) 1252 * 1253 * The flag argument indicates if the 1254 * flush request is due to addition 1255 * of new route (IRE_FLUSH_ADD) or deletion of old 1256 * route (IRE_FLUSH_DELETE). 1257 * 1258 * This routine takes only the IREs from the forwarding 1259 * table and flushes the corresponding entries from 1260 * the cache table. 1261 * 1262 * When flushing due to the deletion of an old route, it 1263 * just checks the cache handles (ire_phandle and ire_ihandle) and 1264 * deletes the ones that match. 1265 * 1266 * When flushing due to the creation of a new route, it checks 1267 * if a cache entry's address matches the one in the IRE and 1268 * that the cache entry's parent has a less specific mask than the 1269 * one in IRE. The destination of such a cache entry could be the 1270 * gateway for other cache entries, so we need to flush those as 1271 * well by looking for gateway addresses matching the IRE's address. 1272 */ 1273 void 1274 ire_flush_cache_v6(ire_t *ire, int flag) 1275 { 1276 int i; 1277 ire_t *cire; 1278 irb_t *irb; 1279 1280 if (ire->ire_type & IRE_CACHE) 1281 return; 1282 1283 /* 1284 * If a default is just created, there is no point 1285 * in going through the cache, as there will not be any 1286 * cached ires. 1287 */ 1288 if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD) 1289 return; 1290 if (flag == IRE_FLUSH_ADD) { 1291 /* 1292 * This selective flush is 1293 * due to the addition of 1294 * new IRE. 1295 */ 1296 for (i = 0; i < ip6_cache_table_size; i++) { 1297 irb = &ip_cache_table_v6[i]; 1298 if ((cire = irb->irb_ire) == NULL) 1299 continue; 1300 IRB_REFHOLD(irb); 1301 for (cire = irb->irb_ire; cire != NULL; 1302 cire = cire->ire_next) { 1303 if (cire->ire_type != IRE_CACHE) 1304 continue; 1305 /* 1306 * If 'cire' belongs to the same subnet 1307 * as the new ire being added, and 'cire' 1308 * is derived from a prefix that is less 1309 * specific than the new ire being added, 1310 * we need to flush 'cire'; for instance, 1311 * when a new interface comes up. 1312 */ 1313 if ((V6_MASK_EQ_2(cire->ire_addr_v6, 1314 ire->ire_mask_v6, ire->ire_addr_v6) && 1315 (ip_mask_to_plen_v6(&cire->ire_cmask_v6) <= 1316 ire->ire_masklen))) { 1317 ire_delete(cire); 1318 continue; 1319 } 1320 /* 1321 * This is the case when the ire_gateway_addr 1322 * of 'cire' belongs to the same subnet as 1323 * the new ire being added. 1324 * Flushing such ires is sometimes required to 1325 * avoid misrouting: say we have a machine with 1326 * two interfaces (I1 and I2), a default router 1327 * R on the I1 subnet, and a host route to an 1328 * off-link destination D with a gateway G on 1329 * the I2 subnet. 1330 * Under normal operation, we will have an 1331 * on-link cache entry for G and an off-link 1332 * cache entry for D with G as ire_gateway_addr, 1333 * traffic to D will reach its destination 1334 * through gateway G. 1335 * If the administrator does 'ifconfig I2 down', 1336 * the cache entries for D and G will be 1337 * flushed. However, G will now be resolved as 1338 * an off-link destination using R (the default 1339 * router) as gateway. Then D will also be 1340 * resolved as an off-link destination using G 1341 * as gateway - this behavior is due to 1342 * compatibility reasons, see comment in 1343 * ire_ihandle_lookup_offlink(). Traffic to D 1344 * will go to the router R and probably won't 1345 * reach the destination. 1346 * The administrator then does 'ifconfig I2 up'. 1347 * Since G is on the I2 subnet, this routine 1348 * will flush its cache entry. It must also 1349 * flush the cache entry for D, otherwise 1350 * traffic will stay misrouted until the IRE 1351 * times out. 1352 */ 1353 if (V6_MASK_EQ_2(cire->ire_gateway_addr_v6, 1354 ire->ire_mask_v6, ire->ire_addr_v6)) { 1355 ire_delete(cire); 1356 continue; 1357 } 1358 } 1359 IRB_REFRELE(irb); 1360 } 1361 } else { 1362 /* 1363 * delete the cache entries based on 1364 * handle in the IRE as this IRE is 1365 * being deleted/changed. 1366 */ 1367 for (i = 0; i < ip6_cache_table_size; i++) { 1368 irb = &ip_cache_table_v6[i]; 1369 if ((cire = irb->irb_ire) == NULL) 1370 continue; 1371 IRB_REFHOLD(irb); 1372 for (cire = irb->irb_ire; cire != NULL; 1373 cire = cire->ire_next) { 1374 if (cire->ire_type != IRE_CACHE) 1375 continue; 1376 if ((cire->ire_phandle == 0 || 1377 cire->ire_phandle != ire->ire_phandle) && 1378 (cire->ire_ihandle == 0 || 1379 cire->ire_ihandle != ire->ire_ihandle)) 1380 continue; 1381 ire_delete(cire); 1382 } 1383 IRB_REFRELE(irb); 1384 } 1385 } 1386 } 1387 1388 /* 1389 * Matches the arguments passed with the values in the ire. 1390 * 1391 * Note: for match types that match using "ipif" passed in, ipif 1392 * must be checked for non-NULL before calling this routine. 1393 */ 1394 static boolean_t 1395 ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask, 1396 const in6_addr_t *gateway, int type, const ipif_t *ipif, zoneid_t zoneid, 1397 uint32_t ihandle, const ts_label_t *tsl, int match_flags) 1398 { 1399 in6_addr_t masked_addr; 1400 in6_addr_t gw_addr_v6; 1401 ill_t *ire_ill = NULL, *dst_ill; 1402 ill_t *ipif_ill = NULL; 1403 ill_group_t *ire_ill_group = NULL; 1404 ill_group_t *ipif_ill_group = NULL; 1405 ipif_t *src_ipif; 1406 1407 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1408 ASSERT(addr != NULL); 1409 ASSERT(mask != NULL); 1410 ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL); 1411 ASSERT((!(match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP))) || 1412 (ipif != NULL && ipif->ipif_isv6)); 1413 ASSERT(!(match_flags & MATCH_IRE_WQ)); 1414 1415 /* 1416 * HIDDEN cache entries have to be looked up specifically with 1417 * MATCH_IRE_MARK_HIDDEN. MATCH_IRE_MARK_HIDDEN is usually set 1418 * when the interface is FAILED or INACTIVE. In that case, 1419 * any IRE_CACHES that exists should be marked with 1420 * IRE_MARK_HIDDEN. So, we don't really need to match below 1421 * for IRE_MARK_HIDDEN. But we do so for consistency. 1422 */ 1423 if (!(match_flags & MATCH_IRE_MARK_HIDDEN) && 1424 (ire->ire_marks & IRE_MARK_HIDDEN)) 1425 return (B_FALSE); 1426 1427 if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid && 1428 ire->ire_zoneid != ALL_ZONES) { 1429 /* 1430 * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is 1431 * valid and does not match that of ire_zoneid, a failure to 1432 * match is reported at this point. Otherwise, since some IREs 1433 * that are available in the global zone can be used in local 1434 * zones, additional checks need to be performed: 1435 * 1436 * IRE_CACHE and IRE_LOOPBACK entries should 1437 * never be matched in this situation. 1438 * 1439 * IRE entries that have an interface associated with them 1440 * should in general not match unless they are an IRE_LOCAL 1441 * or in the case when MATCH_IRE_DEFAULT has been set in 1442 * the caller. In the case of the former, checking of the 1443 * other fields supplied should take place. 1444 * 1445 * In the case where MATCH_IRE_DEFAULT has been set, 1446 * all of the ipif's associated with the IRE's ill are 1447 * checked to see if there is a matching zoneid. If any 1448 * one ipif has a matching zoneid, this IRE is a 1449 * potential candidate so checking of the other fields 1450 * takes place. 1451 * 1452 * In the case where the IRE_INTERFACE has a usable source 1453 * address (indicated by ill_usesrc_ifindex) in the 1454 * correct zone then it's permitted to return this IRE 1455 */ 1456 if (match_flags & MATCH_IRE_ZONEONLY) 1457 return (B_FALSE); 1458 if (ire->ire_type & (IRE_CACHE | IRE_LOOPBACK)) 1459 return (B_FALSE); 1460 /* 1461 * Note, IRE_INTERFACE can have the stq as NULL. For 1462 * example, if the default multicast route is tied to 1463 * the loopback address. 1464 */ 1465 if ((ire->ire_type & IRE_INTERFACE) && 1466 (ire->ire_stq != NULL)) { 1467 dst_ill = (ill_t *)ire->ire_stq->q_ptr; 1468 /* 1469 * If there is a usable source address in the 1470 * zone, then it's ok to return an 1471 * IRE_INTERFACE 1472 */ 1473 if ((dst_ill->ill_usesrc_ifindex != 0) && 1474 (src_ipif = ipif_select_source_v6(dst_ill, addr, 1475 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid)) 1476 != NULL) { 1477 ip3dbg(("ire_match_args: src_ipif %p" 1478 " dst_ill %p", (void *)src_ipif, 1479 (void *)dst_ill)); 1480 ipif_refrele(src_ipif); 1481 } else { 1482 ip3dbg(("ire_match_args: src_ipif NULL" 1483 " dst_ill %p\n", (void *)dst_ill)); 1484 return (B_FALSE); 1485 } 1486 } 1487 if (ire->ire_ipif != NULL && ire->ire_type != IRE_LOCAL && 1488 !(ire->ire_type & IRE_INTERFACE)) { 1489 ipif_t *tipif; 1490 1491 if ((match_flags & MATCH_IRE_DEFAULT) == 0) 1492 return (B_FALSE); 1493 mutex_enter(&ire->ire_ipif->ipif_ill->ill_lock); 1494 for (tipif = ire->ire_ipif->ipif_ill->ill_ipif; 1495 tipif != NULL; tipif = tipif->ipif_next) { 1496 if (IPIF_CAN_LOOKUP(tipif) && 1497 (tipif->ipif_flags & IPIF_UP) && 1498 (tipif->ipif_zoneid == zoneid || 1499 tipif->ipif_zoneid == ALL_ZONES)) 1500 break; 1501 } 1502 mutex_exit(&ire->ire_ipif->ipif_ill->ill_lock); 1503 if (tipif == NULL) 1504 return (B_FALSE); 1505 } 1506 } 1507 1508 if (match_flags & MATCH_IRE_GW) { 1509 mutex_enter(&ire->ire_lock); 1510 gw_addr_v6 = ire->ire_gateway_addr_v6; 1511 mutex_exit(&ire->ire_lock); 1512 } 1513 /* 1514 * For IRE_CACHES, MATCH_IRE_ILL/ILL_GROUP really means that 1515 * somebody wants to send out on a particular interface which 1516 * is given by ire_stq and hence use ire_stq to derive the ill 1517 * value. ire_ipif for IRE_CACHES is just the 1518 * means of getting a source address i.e ire_src_addr_v6 = 1519 * ire->ire_ipif->ipif_src_addr_v6. 1520 */ 1521 if (match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP)) { 1522 ire_ill = ire_to_ill(ire); 1523 if (ire_ill != NULL) 1524 ire_ill_group = ire_ill->ill_group; 1525 ipif_ill = ipif->ipif_ill; 1526 ipif_ill_group = ipif_ill->ill_group; 1527 } 1528 1529 /* No ire_addr_v6 bits set past the mask */ 1530 ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6, 1531 ire->ire_addr_v6)); 1532 V6_MASK_COPY(*addr, *mask, masked_addr); 1533 1534 if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) && 1535 ((!(match_flags & MATCH_IRE_GW)) || 1536 IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) && 1537 ((!(match_flags & MATCH_IRE_TYPE)) || 1538 (ire->ire_type & type)) && 1539 ((!(match_flags & MATCH_IRE_SRC)) || 1540 IN6_ARE_ADDR_EQUAL(&ire->ire_src_addr_v6, 1541 &ipif->ipif_v6src_addr)) && 1542 ((!(match_flags & MATCH_IRE_IPIF)) || 1543 (ire->ire_ipif == ipif)) && 1544 ((!(match_flags & MATCH_IRE_MARK_HIDDEN)) || 1545 (ire->ire_type != IRE_CACHE || 1546 ire->ire_marks & IRE_MARK_HIDDEN)) && 1547 ((!(match_flags & MATCH_IRE_ILL)) || 1548 (ire_ill == ipif_ill)) && 1549 ((!(match_flags & MATCH_IRE_IHANDLE)) || 1550 (ire->ire_ihandle == ihandle)) && 1551 ((!(match_flags & MATCH_IRE_ILL_GROUP)) || 1552 (ire_ill == ipif_ill) || 1553 (ire_ill_group != NULL && 1554 ire_ill_group == ipif_ill_group)) && 1555 ((!(match_flags & MATCH_IRE_SECATTR)) || 1556 (!is_system_labeled()) || 1557 (tsol_ire_match_gwattr(ire, tsl) == 0))) { 1558 /* We found the matched IRE */ 1559 return (B_TRUE); 1560 } 1561 return (B_FALSE); 1562 } 1563 1564 /* 1565 * Lookup for a route in all the tables 1566 */ 1567 ire_t * 1568 ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1569 const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 1570 zoneid_t zoneid, const ts_label_t *tsl, int flags) 1571 { 1572 ire_t *ire = NULL; 1573 1574 /* 1575 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1576 * MATCH_IRE_ILL is set. 1577 */ 1578 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1579 (ipif == NULL)) 1580 return (NULL); 1581 1582 /* 1583 * might be asking for a cache lookup, 1584 * This is not best way to lookup cache, 1585 * user should call ire_cache_lookup directly. 1586 * 1587 * If MATCH_IRE_TYPE was set, first lookup in the cache table and then 1588 * in the forwarding table, if the applicable type flags were set. 1589 */ 1590 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) { 1591 ire = ire_ctable_lookup_v6(addr, gateway, type, ipif, zoneid, 1592 tsl, flags); 1593 if (ire != NULL) 1594 return (ire); 1595 } 1596 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) { 1597 ire = ire_ftable_lookup_v6(addr, mask, gateway, type, ipif, 1598 pire, zoneid, 0, tsl, flags); 1599 } 1600 return (ire); 1601 } 1602 1603 /* 1604 * Lookup a route in forwarding table. 1605 * specific lookup is indicated by passing the 1606 * required parameters and indicating the 1607 * match required in flag field. 1608 * 1609 * Looking for default route can be done in three ways 1610 * 1) pass mask as ipv6_all_zeros and set MATCH_IRE_MASK in flags field 1611 * along with other matches. 1612 * 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags 1613 * field along with other matches. 1614 * 3) if the destination and mask are passed as zeros. 1615 * 1616 * A request to return a default route if no route 1617 * is found, can be specified by setting MATCH_IRE_DEFAULT 1618 * in flags. 1619 * 1620 * It does not support recursion more than one level. It 1621 * will do recursive lookup only when the lookup maps to 1622 * a prefix or default route and MATCH_IRE_RECURSIVE flag is passed. 1623 * 1624 * If the routing table is setup to allow more than one level 1625 * of recursion, the cleaning up cache table will not work resulting 1626 * in invalid routing. 1627 * 1628 * Supports link-local addresses by following the ipif/ill when recursing. 1629 * 1630 * NOTE : When this function returns NULL, pire has already been released. 1631 * pire is valid only when this function successfully returns an 1632 * ire. 1633 */ 1634 ire_t * 1635 ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1636 const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 1637 zoneid_t zoneid, uint32_t ihandle, const ts_label_t *tsl, int flags) 1638 { 1639 irb_t *irb_ptr; 1640 ire_t *rire; 1641 ire_t *ire = NULL; 1642 ire_t *saved_ire; 1643 nce_t *nce; 1644 int i; 1645 in6_addr_t gw_addr_v6; 1646 1647 ASSERT(addr != NULL); 1648 ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL); 1649 ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 1650 ASSERT(ipif == NULL || ipif->ipif_isv6); 1651 ASSERT(!(flags & MATCH_IRE_WQ)); 1652 1653 /* 1654 * When we return NULL from this function, we should make 1655 * sure that *pire is NULL so that the callers will not 1656 * wrongly REFRELE the pire. 1657 */ 1658 if (pire != NULL) 1659 *pire = NULL; 1660 /* 1661 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1662 * MATCH_IRE_ILL is set. 1663 */ 1664 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1665 (ipif == NULL)) 1666 return (NULL); 1667 1668 /* 1669 * If the mask is known, the lookup 1670 * is simple, if the mask is not known 1671 * we need to search. 1672 */ 1673 if (flags & MATCH_IRE_MASK) { 1674 uint_t masklen; 1675 1676 masklen = ip_mask_to_plen_v6(mask); 1677 if (ip_forwarding_table_v6[masklen] == NULL) 1678 return (NULL); 1679 irb_ptr = &(ip_forwarding_table_v6[masklen][ 1680 IRE_ADDR_MASK_HASH_V6(*addr, *mask, ip6_ftable_hash_size)]); 1681 rw_enter(&irb_ptr->irb_lock, RW_READER); 1682 for (ire = irb_ptr->irb_ire; ire != NULL; 1683 ire = ire->ire_next) { 1684 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1685 continue; 1686 if (ire_match_args_v6(ire, addr, mask, gateway, type, 1687 ipif, zoneid, ihandle, tsl, flags)) 1688 goto found_ire; 1689 } 1690 rw_exit(&irb_ptr->irb_lock); 1691 } else { 1692 /* 1693 * In this case we don't know the mask, we need to 1694 * search the table assuming different mask sizes. 1695 * we start with 128 bit mask, we don't allow default here. 1696 */ 1697 for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) { 1698 in6_addr_t tmpmask; 1699 1700 if ((ip_forwarding_table_v6[i]) == NULL) 1701 continue; 1702 (void) ip_plen_to_mask_v6(i, &tmpmask); 1703 irb_ptr = &ip_forwarding_table_v6[i][ 1704 IRE_ADDR_MASK_HASH_V6(*addr, tmpmask, 1705 ip6_ftable_hash_size)]; 1706 rw_enter(&irb_ptr->irb_lock, RW_READER); 1707 for (ire = irb_ptr->irb_ire; ire != NULL; 1708 ire = ire->ire_next) { 1709 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1710 continue; 1711 if (ire_match_args_v6(ire, addr, 1712 &ire->ire_mask_v6, gateway, type, ipif, 1713 zoneid, ihandle, tsl, flags)) 1714 goto found_ire; 1715 } 1716 rw_exit(&irb_ptr->irb_lock); 1717 } 1718 } 1719 1720 /* 1721 * We come here if no route has yet been found. 1722 * 1723 * Handle the case where default route is 1724 * requested by specifying type as one of the possible 1725 * types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE). 1726 * 1727 * If MATCH_IRE_MASK is specified, then the appropriate default route 1728 * would have been found above if it exists so it isn't looked up here. 1729 * If MATCH_IRE_DEFAULT was also specified, then a default route will be 1730 * searched for later. 1731 */ 1732 if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE && 1733 (type & (IRE_DEFAULT | IRE_INTERFACE))) { 1734 if (ip_forwarding_table_v6[0] != NULL) { 1735 /* addr & mask is zero for defaults */ 1736 irb_ptr = &ip_forwarding_table_v6[0][ 1737 IRE_ADDR_HASH_V6(ipv6_all_zeros, 1738 ip6_ftable_hash_size)]; 1739 rw_enter(&irb_ptr->irb_lock, RW_READER); 1740 for (ire = irb_ptr->irb_ire; ire != NULL; 1741 ire = ire->ire_next) { 1742 1743 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1744 continue; 1745 1746 if (ire_match_args_v6(ire, addr, 1747 &ipv6_all_zeros, gateway, type, ipif, 1748 zoneid, ihandle, tsl, flags)) 1749 goto found_ire; 1750 } 1751 rw_exit(&irb_ptr->irb_lock); 1752 } 1753 } 1754 /* 1755 * We come here only if no route is found. 1756 * see if the default route can be used which is allowed 1757 * only if the default matching criteria is specified. 1758 * The ipv6_ire_default_count tracks the number of IRE_DEFAULT 1759 * entries. However, the ip_forwarding_table_v6[0] also contains 1760 * interface routes thus the count can be zero. 1761 */ 1762 saved_ire = NULL; 1763 if ((flags & (MATCH_IRE_DEFAULT | MATCH_IRE_MASK)) == 1764 MATCH_IRE_DEFAULT) { 1765 ire_t *ire_origin; 1766 uint_t g_index; 1767 uint_t index; 1768 1769 if (ip_forwarding_table_v6[0] == NULL) 1770 return (NULL); 1771 irb_ptr = &(ip_forwarding_table_v6[0])[0]; 1772 1773 /* 1774 * Keep a tab on the bucket while looking the IRE_DEFAULT 1775 * entries. We need to keep track of a particular IRE 1776 * (ire_origin) so this ensures that it will not be unlinked 1777 * from the hash list during the recursive lookup below. 1778 */ 1779 IRB_REFHOLD(irb_ptr); 1780 ire = irb_ptr->irb_ire; 1781 if (ire == NULL) { 1782 IRB_REFRELE(irb_ptr); 1783 return (NULL); 1784 } 1785 1786 /* 1787 * Get the index first, since it can be changed by other 1788 * threads. Then get to the right default route skipping 1789 * default interface routes if any. As we hold a reference on 1790 * the IRE bucket, ipv6_ire_default_count can only increase so 1791 * we can't reach the end of the hash list unexpectedly. 1792 */ 1793 if (ipv6_ire_default_count != 0) { 1794 g_index = ipv6_ire_default_index++; 1795 index = g_index % ipv6_ire_default_count; 1796 while (index != 0) { 1797 if (!(ire->ire_type & IRE_INTERFACE)) 1798 index--; 1799 ire = ire->ire_next; 1800 } 1801 ASSERT(ire != NULL); 1802 } else { 1803 /* 1804 * No default route, so we only have default interface 1805 * routes: don't enter the first loop. 1806 */ 1807 ire = NULL; 1808 } 1809 1810 /* 1811 * Round-robin the default routers list looking for a neighbor 1812 * that matches the passed in parameters and is reachable. If 1813 * none found, just return a route from the default router list 1814 * if it exists. If we can't find a default route (IRE_DEFAULT), 1815 * look for interface default routes. 1816 * We start with the ire we found above and we walk the hash 1817 * list until we're back where we started, see 1818 * ire_get_next_default_ire(). It doesn't matter if default 1819 * routes are added or deleted by other threads - we know this 1820 * ire will stay in the list because we hold a reference on the 1821 * ire bucket. 1822 * NB: if we only have interface default routes, ire is NULL so 1823 * we don't even enter this loop (see above). 1824 */ 1825 ire_origin = ire; 1826 for (; ire != NULL; 1827 ire = ire_get_next_default_ire(ire, ire_origin)) { 1828 1829 if (ire_match_args_v6(ire, addr, 1830 &ipv6_all_zeros, gateway, type, ipif, 1831 zoneid, ihandle, tsl, flags)) { 1832 int match_flags; 1833 1834 /* 1835 * We have something to work with. 1836 * If we can find a resolved/reachable 1837 * entry, we will use this. Otherwise 1838 * we'll try to find an entry that has 1839 * a resolved cache entry. We will fallback 1840 * on this if we don't find anything else. 1841 */ 1842 if (saved_ire == NULL) 1843 saved_ire = ire; 1844 mutex_enter(&ire->ire_lock); 1845 gw_addr_v6 = ire->ire_gateway_addr_v6; 1846 mutex_exit(&ire->ire_lock); 1847 match_flags = MATCH_IRE_ILL_GROUP | 1848 MATCH_IRE_SECATTR; 1849 rire = ire_ctable_lookup_v6(&gw_addr_v6, NULL, 1850 0, ire->ire_ipif, zoneid, tsl, match_flags); 1851 if (rire != NULL) { 1852 nce = rire->ire_nce; 1853 if (nce != NULL && 1854 NCE_ISREACHABLE(nce) && 1855 nce->nce_flags & NCE_F_ISROUTER) { 1856 ire_refrele(rire); 1857 IRE_REFHOLD(ire); 1858 IRB_REFRELE(irb_ptr); 1859 goto found_ire_held; 1860 } else if (nce != NULL && 1861 !(nce->nce_flags & 1862 NCE_F_ISROUTER)) { 1863 /* 1864 * Make sure we don't use 1865 * this ire 1866 */ 1867 if (saved_ire == ire) 1868 saved_ire = NULL; 1869 } 1870 ire_refrele(rire); 1871 } else if (ipv6_ire_default_count > 1 && 1872 zoneid != ALL_ZONES) { 1873 /* 1874 * When we're in a local zone, we're 1875 * only interested in default routers 1876 * that are reachable through ipifs 1877 * within our zone. 1878 * The potentially expensive call to 1879 * ire_route_lookup_v6() is avoided when 1880 * we have only one default route. 1881 */ 1882 match_flags |= MATCH_IRE_TYPE; 1883 rire = ire_route_lookup_v6(&gw_addr_v6, 1884 NULL, NULL, IRE_INTERFACE, 1885 ire->ire_ipif, NULL, 1886 zoneid, tsl, match_flags); 1887 if (rire != NULL) { 1888 ire_refrele(rire); 1889 saved_ire = ire; 1890 } else if (saved_ire == ire) { 1891 /* 1892 * Make sure we don't use 1893 * this ire 1894 */ 1895 saved_ire = NULL; 1896 } 1897 } 1898 } 1899 } 1900 if (saved_ire != NULL) { 1901 ire = saved_ire; 1902 IRE_REFHOLD(ire); 1903 IRB_REFRELE(irb_ptr); 1904 goto found_ire_held; 1905 } else { 1906 /* 1907 * Look for a interface default route matching the 1908 * args passed in. No round robin here. Just pick 1909 * the right one. 1910 */ 1911 for (ire = irb_ptr->irb_ire; ire != NULL; 1912 ire = ire->ire_next) { 1913 1914 if (!(ire->ire_type & IRE_INTERFACE)) 1915 continue; 1916 1917 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1918 continue; 1919 1920 if (ire_match_args_v6(ire, addr, 1921 &ipv6_all_zeros, gateway, type, ipif, 1922 zoneid, ihandle, tsl, flags)) { 1923 IRE_REFHOLD(ire); 1924 IRB_REFRELE(irb_ptr); 1925 goto found_ire_held; 1926 } 1927 } 1928 IRB_REFRELE(irb_ptr); 1929 } 1930 } 1931 ASSERT(ire == NULL); 1932 ip1dbg(("ire_ftable_lookup_v6: returning NULL ire")); 1933 return (NULL); 1934 found_ire: 1935 ASSERT((ire->ire_marks & IRE_MARK_CONDEMNED) == 0); 1936 IRE_REFHOLD(ire); 1937 rw_exit(&irb_ptr->irb_lock); 1938 1939 found_ire_held: 1940 if ((flags & MATCH_IRE_RJ_BHOLE) && 1941 (ire->ire_flags & (RTF_BLACKHOLE | RTF_REJECT))) { 1942 return (ire); 1943 } 1944 /* 1945 * At this point, IRE that was found must be an IRE_FORWARDTABLE 1946 * or IRE_CACHETABLE type. If this is a recursive lookup and an 1947 * IRE_INTERFACE type was found, return that. If it was some other 1948 * IRE_FORWARDTABLE type of IRE (one of the prefix types), then it 1949 * is necessary to fill in the parent IRE pointed to by pire, and 1950 * then lookup the gateway address of the parent. For backwards 1951 * compatiblity, if this lookup returns an 1952 * IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level 1953 * of lookup is done. 1954 */ 1955 if (flags & MATCH_IRE_RECURSIVE) { 1956 const ipif_t *gw_ipif; 1957 int match_flags = MATCH_IRE_DSTONLY; 1958 1959 if (ire->ire_type & IRE_INTERFACE) 1960 return (ire); 1961 if (pire != NULL) 1962 *pire = ire; 1963 /* 1964 * If we can't find an IRE_INTERFACE or the caller has not 1965 * asked for pire, we need to REFRELE the saved_ire. 1966 */ 1967 saved_ire = ire; 1968 1969 /* 1970 * Currently MATCH_IRE_ILL is never used with 1971 * (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT) while 1972 * sending out packets as MATCH_IRE_ILL is used only 1973 * for communicating with on-link hosts. We can't assert 1974 * that here as RTM_GET calls this function with 1975 * MATCH_IRE_ILL | MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE. 1976 * We have already used the MATCH_IRE_ILL in determining 1977 * the right prefix route at this point. To match the 1978 * behavior of how we locate routes while sending out 1979 * packets, we don't want to use MATCH_IRE_ILL below 1980 * while locating the interface route. 1981 */ 1982 if (ire->ire_ipif != NULL) 1983 match_flags |= MATCH_IRE_ILL_GROUP; 1984 1985 mutex_enter(&ire->ire_lock); 1986 gw_addr_v6 = ire->ire_gateway_addr_v6; 1987 mutex_exit(&ire->ire_lock); 1988 1989 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 0, 1990 ire->ire_ipif, NULL, zoneid, tsl, match_flags); 1991 if (ire == NULL) { 1992 /* 1993 * In this case we have to deal with the 1994 * MATCH_IRE_PARENT flag, which means the 1995 * parent has to be returned if ire is NULL. 1996 * The aim of this is to have (at least) a starting 1997 * ire when we want to look at all of the ires in a 1998 * bucket aimed at a single destination (as is the 1999 * case in ip_newroute_v6 for the RTF_MULTIRT 2000 * flagged routes). 2001 */ 2002 if (flags & MATCH_IRE_PARENT) { 2003 if (pire != NULL) { 2004 /* 2005 * Need an extra REFHOLD, if the 2006 * parent ire is returned via both 2007 * ire and pire. 2008 */ 2009 IRE_REFHOLD(saved_ire); 2010 } 2011 ire = saved_ire; 2012 } else { 2013 ire_refrele(saved_ire); 2014 if (pire != NULL) 2015 *pire = NULL; 2016 } 2017 return (ire); 2018 } 2019 if (ire->ire_type & (IRE_CACHETABLE | IRE_INTERFACE)) { 2020 /* 2021 * If the caller did not ask for pire, release 2022 * it now. 2023 */ 2024 if (pire == NULL) { 2025 ire_refrele(saved_ire); 2026 } 2027 return (ire); 2028 } 2029 match_flags |= MATCH_IRE_TYPE; 2030 mutex_enter(&ire->ire_lock); 2031 gw_addr_v6 = ire->ire_gateway_addr_v6; 2032 mutex_exit(&ire->ire_lock); 2033 gw_ipif = ire->ire_ipif; 2034 ire_refrele(ire); 2035 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 2036 (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid, 2037 NULL, match_flags); 2038 if (ire == NULL) { 2039 /* 2040 * In this case we have to deal with the 2041 * MATCH_IRE_PARENT flag, which means the 2042 * parent has to be returned if ire is NULL. 2043 * The aim of this is to have (at least) a starting 2044 * ire when we want to look at all of the ires in a 2045 * bucket aimed at a single destination (as is the 2046 * case in ip_newroute_v6 for the RTF_MULTIRT 2047 * flagged routes). 2048 */ 2049 if (flags & MATCH_IRE_PARENT) { 2050 if (pire != NULL) { 2051 /* 2052 * Need an extra REFHOLD, if the 2053 * parent ire is returned via both 2054 * ire and pire. 2055 */ 2056 IRE_REFHOLD(saved_ire); 2057 } 2058 ire = saved_ire; 2059 } else { 2060 ire_refrele(saved_ire); 2061 if (pire != NULL) 2062 *pire = NULL; 2063 } 2064 return (ire); 2065 } else if (pire == NULL) { 2066 /* 2067 * If the caller did not ask for pire, release 2068 * it now. 2069 */ 2070 ire_refrele(saved_ire); 2071 } 2072 return (ire); 2073 } 2074 2075 ASSERT(pire == NULL || *pire == NULL); 2076 return (ire); 2077 } 2078 2079 /* 2080 * Delete the IRE cache for the gateway and all IRE caches whose 2081 * ire_gateway_addr_v6 points to this gateway, and allow them to 2082 * be created on demand by ip_newroute_v6. 2083 */ 2084 void 2085 ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid) 2086 { 2087 irb_t *irb; 2088 ire_t *ire; 2089 2090 irb = &ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, ip6_cache_table_size)]; 2091 IRB_REFHOLD(irb); 2092 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 2093 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2094 continue; 2095 2096 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 2097 if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, 0, 2098 IRE_CACHE, NULL, zoneid, 0, NULL, MATCH_IRE_TYPE)) { 2099 ire_delete(ire); 2100 } 2101 } 2102 IRB_REFRELE(irb); 2103 2104 ire_walk_v6(ire_delete_cache_gw_v6, (char *)addr, zoneid); 2105 } 2106 2107 /* 2108 * Looks up cache table for a route. 2109 * specific lookup can be indicated by 2110 * passing the MATCH_* flags and the 2111 * necessary parameters. 2112 */ 2113 ire_t * 2114 ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway, 2115 int type, const ipif_t *ipif, zoneid_t zoneid, const ts_label_t *tsl, 2116 int flags) 2117 { 2118 ire_t *ire; 2119 irb_t *irb_ptr; 2120 ASSERT(addr != NULL); 2121 ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 2122 2123 /* 2124 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 2125 * MATCH_IRE_ILL is set. 2126 */ 2127 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 2128 (ipif == NULL)) 2129 return (NULL); 2130 2131 irb_ptr = &ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 2132 ip6_cache_table_size)]; 2133 rw_enter(&irb_ptr->irb_lock, RW_READER); 2134 for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 2135 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2136 continue; 2137 2138 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 2139 if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, gateway, 2140 type, ipif, zoneid, 0, tsl, flags)) { 2141 IRE_REFHOLD(ire); 2142 rw_exit(&irb_ptr->irb_lock); 2143 return (ire); 2144 } 2145 } 2146 rw_exit(&irb_ptr->irb_lock); 2147 return (NULL); 2148 } 2149 2150 /* 2151 * Lookup cache. Don't return IRE_MARK_HIDDEN entries. Callers 2152 * should use ire_ctable_lookup with MATCH_IRE_MARK_HIDDEN to get 2153 * to the hidden ones. 2154 * 2155 * In general the zoneid has to match (where ALL_ZONES match all of them). 2156 * But for IRE_LOCAL we also need to handle the case where L2 should 2157 * conceptually loop back the packet. This is necessary since neither 2158 * Ethernet drivers nor Ethernet hardware loops back packets sent to their 2159 * own MAC address. This loopback is needed when the normal 2160 * routes (ignoring IREs with different zoneids) would send out the packet on 2161 * the same ill (or ill group) as the ill with which this IRE_LOCAL is 2162 * associated. 2163 * 2164 * Earlier versions of this code always matched an IRE_LOCAL independently of 2165 * the zoneid. We preserve that earlier behavior when 2166 * ip_restrict_interzone_loopback is turned off. 2167 */ 2168 ire_t * 2169 ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid, 2170 const ts_label_t *tsl) 2171 { 2172 irb_t *irb_ptr; 2173 ire_t *ire; 2174 2175 irb_ptr = &ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 2176 ip6_cache_table_size)]; 2177 rw_enter(&irb_ptr->irb_lock, RW_READER); 2178 for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 2179 if (ire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 2180 continue; 2181 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, addr)) { 2182 /* 2183 * Finally, check if the security policy has any 2184 * restriction on using this route for the specified 2185 * message. 2186 */ 2187 if (tsl != NULL && 2188 ire->ire_gw_secattr != NULL && 2189 tsol_ire_match_gwattr(ire, tsl) != 0) { 2190 continue; 2191 } 2192 2193 if (zoneid == ALL_ZONES || ire->ire_zoneid == zoneid || 2194 ire->ire_zoneid == ALL_ZONES) { 2195 IRE_REFHOLD(ire); 2196 rw_exit(&irb_ptr->irb_lock); 2197 return (ire); 2198 } 2199 2200 if (ire->ire_type == IRE_LOCAL) { 2201 if (ip_restrict_interzone_loopback && 2202 !ire_local_ok_across_zones(ire, zoneid, 2203 (void *)addr, tsl)) 2204 continue; 2205 2206 IRE_REFHOLD(ire); 2207 rw_exit(&irb_ptr->irb_lock); 2208 return (ire); 2209 } 2210 } 2211 } 2212 rw_exit(&irb_ptr->irb_lock); 2213 return (NULL); 2214 } 2215 2216 /* 2217 * Locate the interface ire that is tied to the cache ire 'cire' via 2218 * cire->ire_ihandle. 2219 * 2220 * We are trying to create the cache ire for an onlink destn. or 2221 * gateway in 'cire'. We are called from ire_add_v6() in the IRE_IF_RESOLVER 2222 * case for xresolv interfaces, after the ire has come back from 2223 * an external resolver. 2224 */ 2225 static ire_t * 2226 ire_ihandle_lookup_onlink_v6(ire_t *cire) 2227 { 2228 ire_t *ire; 2229 int match_flags; 2230 int i; 2231 int j; 2232 irb_t *irb_ptr; 2233 2234 ASSERT(cire != NULL); 2235 2236 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2237 /* 2238 * We know that the mask of the interface ire equals cire->ire_cmask. 2239 * (When ip_newroute_v6() created 'cire' for an on-link destn. 2240 * it set its cmask from the interface ire's mask) 2241 */ 2242 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 2243 NULL, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle, 2244 NULL, match_flags); 2245 if (ire != NULL) 2246 return (ire); 2247 /* 2248 * If we didn't find an interface ire above, we can't declare failure. 2249 * For backwards compatibility, we need to support prefix routes 2250 * pointing to next hop gateways that are not on-link. 2251 * 2252 * In the resolver/noresolver case, ip_newroute_v6() thinks 2253 * it is creating the cache ire for an onlink destination in 'cire'. 2254 * But 'cire' is not actually onlink, because ire_ftable_lookup_v6() 2255 * cheated it, by doing ire_route_lookup_v6() twice and returning an 2256 * interface ire. 2257 * 2258 * Eg. default - gw1 (line 1) 2259 * gw1 - gw2 (line 2) 2260 * gw2 - hme0 (line 3) 2261 * 2262 * In the above example, ip_newroute_v6() tried to create the cache ire 2263 * 'cire' for gw1, based on the interface route in line 3. The 2264 * ire_ftable_lookup_v6() above fails, because there is 2265 * no interface route to reach gw1. (it is gw2). We fall thru below. 2266 * 2267 * Do a brute force search based on the ihandle in a subset of the 2268 * forwarding tables, corresponding to cire->ire_cmask_v6. Otherwise 2269 * things become very complex, since we don't have 'pire' in this 2270 * case. (Also note that this method is not possible in the offlink 2271 * case because we don't know the mask) 2272 */ 2273 i = ip_mask_to_plen_v6(&cire->ire_cmask_v6); 2274 if ((ip_forwarding_table_v6[i]) == NULL) 2275 return (NULL); 2276 for (j = 0; j < ip6_ftable_hash_size; j++) { 2277 irb_ptr = &ip_forwarding_table_v6[i][j]; 2278 rw_enter(&irb_ptr->irb_lock, RW_READER); 2279 for (ire = irb_ptr->irb_ire; ire != NULL; 2280 ire = ire->ire_next) { 2281 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2282 continue; 2283 if ((ire->ire_type & IRE_INTERFACE) && 2284 (ire->ire_ihandle == cire->ire_ihandle)) { 2285 IRE_REFHOLD(ire); 2286 rw_exit(&irb_ptr->irb_lock); 2287 return (ire); 2288 } 2289 } 2290 rw_exit(&irb_ptr->irb_lock); 2291 } 2292 return (NULL); 2293 } 2294 2295 2296 /* 2297 * Locate the interface ire that is tied to the cache ire 'cire' via 2298 * cire->ire_ihandle. 2299 * 2300 * We are trying to create the cache ire for an offlink destn based 2301 * on the cache ire of the gateway in 'cire'. 'pire' is the prefix ire 2302 * as found by ip_newroute_v6(). We are called from ip_newroute_v6() in 2303 * the IRE_CACHE case. 2304 */ 2305 ire_t * 2306 ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire) 2307 { 2308 ire_t *ire; 2309 int match_flags; 2310 in6_addr_t gw_addr; 2311 ipif_t *gw_ipif; 2312 2313 ASSERT(cire != NULL && pire != NULL); 2314 2315 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2316 /* 2317 * ip_newroute_v6 calls ire_ftable_lookup with MATCH_IRE_ILL only 2318 * for on-link hosts. We should never be here for onlink. 2319 * Thus, use MATCH_IRE_ILL_GROUP. 2320 */ 2321 if (pire->ire_ipif != NULL) 2322 match_flags |= MATCH_IRE_ILL_GROUP; 2323 /* 2324 * We know that the mask of the interface ire equals cire->ire_cmask. 2325 * (When ip_newroute_v6() created 'cire' for an on-link destn. it set 2326 * its cmask from the interface ire's mask) 2327 */ 2328 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 0, 2329 IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2330 NULL, match_flags); 2331 if (ire != NULL) 2332 return (ire); 2333 /* 2334 * If we didn't find an interface ire above, we can't declare failure. 2335 * For backwards compatibility, we need to support prefix routes 2336 * pointing to next hop gateways that are not on-link. 2337 * 2338 * Assume we are trying to ping some offlink destn, and we have the 2339 * routing table below. 2340 * 2341 * Eg. default - gw1 <--- pire (line 1) 2342 * gw1 - gw2 (line 2) 2343 * gw2 - hme0 (line 3) 2344 * 2345 * If we already have a cache ire for gw1 in 'cire', the 2346 * ire_ftable_lookup_v6 above would have failed, since there is no 2347 * interface ire to reach gw1. We will fallthru below. 2348 * 2349 * Here we duplicate the steps that ire_ftable_lookup_v6() did in 2350 * getting 'cire' from 'pire', in the MATCH_IRE_RECURSIVE case. 2351 * The differences are the following 2352 * i. We want the interface ire only, so we call 2353 * ire_ftable_lookup_v6() instead of ire_route_lookup_v6() 2354 * ii. We look for only prefix routes in the 1st call below. 2355 * ii. We want to match on the ihandle in the 2nd call below. 2356 */ 2357 match_flags = MATCH_IRE_TYPE; 2358 if (pire->ire_ipif != NULL) 2359 match_flags |= MATCH_IRE_ILL_GROUP; 2360 2361 mutex_enter(&pire->ire_lock); 2362 gw_addr = pire->ire_gateway_addr_v6; 2363 mutex_exit(&pire->ire_lock); 2364 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_OFFSUBNET, 2365 pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags); 2366 if (ire == NULL) 2367 return (NULL); 2368 /* 2369 * At this point 'ire' corresponds to the entry shown in line 2. 2370 * gw_addr is 'gw2' in the example above. 2371 */ 2372 mutex_enter(&ire->ire_lock); 2373 gw_addr = ire->ire_gateway_addr_v6; 2374 mutex_exit(&ire->ire_lock); 2375 gw_ipif = ire->ire_ipif; 2376 ire_refrele(ire); 2377 2378 match_flags |= MATCH_IRE_IHANDLE; 2379 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_INTERFACE, 2380 gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2381 NULL, match_flags); 2382 return (ire); 2383 } 2384 2385 /* 2386 * Return the IRE_LOOPBACK, IRE_IF_RESOLVER or IRE_IF_NORESOLVER 2387 * ire associated with the specified ipif. 2388 * 2389 * This might occasionally be called when IPIF_UP is not set since 2390 * the IPV6_MULTICAST_IF as well as creating interface routes 2391 * allows specifying a down ipif (ipif_lookup* match ipifs that are down). 2392 * 2393 * Note that if IPIF_NOLOCAL, IPIF_NOXMIT, or IPIF_DEPRECATED is set on 2394 * the ipif this routine might return NULL. 2395 * (Sometimes called as writer though not required by this function.) 2396 */ 2397 ire_t * 2398 ipif_to_ire_v6(const ipif_t *ipif) 2399 { 2400 ire_t *ire; 2401 2402 ASSERT(ipif->ipif_isv6); 2403 if (ipif->ipif_ire_type == IRE_LOOPBACK) { 2404 ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, NULL, 2405 IRE_LOOPBACK, ipif, ALL_ZONES, NULL, 2406 (MATCH_IRE_TYPE | MATCH_IRE_IPIF)); 2407 } else if (ipif->ipif_flags & IPIF_POINTOPOINT) { 2408 /* In this case we need to lookup destination address. */ 2409 ire = ire_ftable_lookup_v6(&ipif->ipif_v6pp_dst_addr, 2410 &ipv6_all_ones, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 2411 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 2412 MATCH_IRE_MASK)); 2413 } else { 2414 ire = ire_ftable_lookup_v6(&ipif->ipif_v6subnet, 2415 &ipif->ipif_v6net_mask, NULL, IRE_INTERFACE, ipif, NULL, 2416 ALL_ZONES, 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 2417 MATCH_IRE_MASK)); 2418 } 2419 return (ire); 2420 } 2421 2422 /* 2423 * Return B_TRUE if a multirt route is resolvable 2424 * (or if no route is resolved yet), B_FALSE otherwise. 2425 * This only works in the global zone. 2426 */ 2427 boolean_t 2428 ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl) 2429 { 2430 ire_t *first_fire; 2431 ire_t *first_cire; 2432 ire_t *fire; 2433 ire_t *cire; 2434 irb_t *firb; 2435 irb_t *cirb; 2436 int unres_cnt = 0; 2437 boolean_t resolvable = B_FALSE; 2438 2439 /* Retrieve the first IRE_HOST that matches the destination */ 2440 first_fire = ire_ftable_lookup_v6(v6dstp, &ipv6_all_ones, 0, IRE_HOST, 2441 NULL, NULL, ALL_ZONES, 0, tsl, MATCH_IRE_MASK | MATCH_IRE_TYPE | 2442 MATCH_IRE_SECATTR); 2443 2444 /* No route at all */ 2445 if (first_fire == NULL) { 2446 return (B_TRUE); 2447 } 2448 2449 firb = first_fire->ire_bucket; 2450 ASSERT(firb); 2451 2452 /* Retrieve the first IRE_CACHE ire for that destination. */ 2453 first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID, tsl); 2454 2455 /* No resolved route. */ 2456 if (first_cire == NULL) { 2457 ire_refrele(first_fire); 2458 return (B_TRUE); 2459 } 2460 2461 /* At least one route is resolved. */ 2462 2463 cirb = first_cire->ire_bucket; 2464 ASSERT(cirb); 2465 2466 /* Count the number of routes to that dest that are declared. */ 2467 IRB_REFHOLD(firb); 2468 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2469 if (!(fire->ire_flags & RTF_MULTIRT)) 2470 continue; 2471 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, v6dstp)) 2472 continue; 2473 unres_cnt++; 2474 } 2475 IRB_REFRELE(firb); 2476 2477 2478 /* Then subtract the number of routes to that dst that are resolved */ 2479 IRB_REFHOLD(cirb); 2480 for (cire = first_cire; cire != NULL; cire = cire->ire_next) { 2481 if (!(cire->ire_flags & RTF_MULTIRT)) 2482 continue; 2483 if (!IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, v6dstp)) 2484 continue; 2485 if (cire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 2486 continue; 2487 unres_cnt--; 2488 } 2489 IRB_REFRELE(cirb); 2490 2491 /* At least one route is unresolved; search for a resolvable route. */ 2492 if (unres_cnt > 0) 2493 resolvable = ire_multirt_lookup_v6(&first_cire, &first_fire, 2494 MULTIRT_USESTAMP|MULTIRT_CACHEGW, tsl); 2495 2496 if (first_fire) 2497 ire_refrele(first_fire); 2498 2499 if (first_cire) 2500 ire_refrele(first_cire); 2501 2502 return (resolvable); 2503 } 2504 2505 2506 /* 2507 * Return B_TRUE and update *ire_arg and *fire_arg 2508 * if at least one resolvable route is found. 2509 * Return B_FALSE otherwise (all routes are resolved or 2510 * the remaining unresolved routes are all unresolvable). 2511 * This only works in the global zone. 2512 */ 2513 boolean_t 2514 ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, 2515 const ts_label_t *tsl) 2516 { 2517 clock_t delta; 2518 ire_t *best_fire = NULL; 2519 ire_t *best_cire = NULL; 2520 ire_t *first_fire; 2521 ire_t *first_cire; 2522 ire_t *fire; 2523 ire_t *cire; 2524 irb_t *firb = NULL; 2525 irb_t *cirb = NULL; 2526 ire_t *gw_ire; 2527 boolean_t already_resolved; 2528 boolean_t res; 2529 in6_addr_t v6dst; 2530 in6_addr_t v6gw; 2531 2532 ip2dbg(("ire_multirt_lookup_v6: *ire_arg %p, *fire_arg %p, " 2533 "flags %04x\n", (void *)*ire_arg, (void *)*fire_arg, flags)); 2534 2535 ASSERT(ire_arg); 2536 ASSERT(fire_arg); 2537 2538 /* Not an IRE_HOST ire; give up. */ 2539 if ((*fire_arg == NULL) || 2540 ((*fire_arg)->ire_type != IRE_HOST)) { 2541 return (B_FALSE); 2542 } 2543 2544 /* This is the first IRE_HOST ire for that destination. */ 2545 first_fire = *fire_arg; 2546 firb = first_fire->ire_bucket; 2547 ASSERT(firb); 2548 2549 mutex_enter(&first_fire->ire_lock); 2550 v6dst = first_fire->ire_addr_v6; 2551 mutex_exit(&first_fire->ire_lock); 2552 2553 ip2dbg(("ire_multirt_lookup_v6: dst %08x\n", 2554 ntohl(V4_PART_OF_V6(v6dst)))); 2555 2556 /* 2557 * Retrieve the first IRE_CACHE ire for that destination; 2558 * if we don't find one, no route for that dest is 2559 * resolved yet. 2560 */ 2561 first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID, tsl); 2562 if (first_cire) { 2563 cirb = first_cire->ire_bucket; 2564 } 2565 2566 ip2dbg(("ire_multirt_lookup_v6: first_cire %p\n", (void *)first_cire)); 2567 2568 /* 2569 * Search for a resolvable route, giving the top priority 2570 * to routes that can be resolved without any call to the resolver. 2571 */ 2572 IRB_REFHOLD(firb); 2573 2574 if (!IN6_IS_ADDR_MULTICAST(&v6dst)) { 2575 /* 2576 * For all multiroute IRE_HOST ires for that destination, 2577 * check if the route via the IRE_HOST's gateway is 2578 * resolved yet. 2579 */ 2580 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2581 2582 if (!(fire->ire_flags & RTF_MULTIRT)) 2583 continue; 2584 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2585 continue; 2586 2587 if (fire->ire_gw_secattr != NULL && 2588 tsol_ire_match_gwattr(fire, tsl) != 0) { 2589 continue; 2590 } 2591 2592 mutex_enter(&fire->ire_lock); 2593 v6gw = fire->ire_gateway_addr_v6; 2594 mutex_exit(&fire->ire_lock); 2595 2596 ip2dbg(("ire_multirt_lookup_v6: fire %p, " 2597 "ire_addr %08x, ire_gateway_addr %08x\n", 2598 (void *)fire, 2599 ntohl(V4_PART_OF_V6(fire->ire_addr_v6)), 2600 ntohl(V4_PART_OF_V6(v6gw)))); 2601 2602 already_resolved = B_FALSE; 2603 2604 if (first_cire) { 2605 ASSERT(cirb); 2606 2607 IRB_REFHOLD(cirb); 2608 /* 2609 * For all IRE_CACHE ires for that 2610 * destination. 2611 */ 2612 for (cire = first_cire; 2613 cire != NULL; 2614 cire = cire->ire_next) { 2615 2616 if (!(cire->ire_flags & RTF_MULTIRT)) 2617 continue; 2618 if (!IN6_ARE_ADDR_EQUAL( 2619 &cire->ire_addr_v6, &v6dst)) 2620 continue; 2621 if (cire->ire_marks & 2622 (IRE_MARK_CONDEMNED| 2623 IRE_MARK_HIDDEN)) 2624 continue; 2625 2626 if (cire->ire_gw_secattr != NULL && 2627 tsol_ire_match_gwattr(cire, 2628 tsl) != 0) { 2629 continue; 2630 } 2631 2632 /* 2633 * Check if the IRE_CACHE's gateway 2634 * matches the IRE_HOST's gateway. 2635 */ 2636 if (IN6_ARE_ADDR_EQUAL( 2637 &cire->ire_gateway_addr_v6, 2638 &v6gw)) { 2639 already_resolved = B_TRUE; 2640 break; 2641 } 2642 } 2643 IRB_REFRELE(cirb); 2644 } 2645 2646 /* 2647 * This route is already resolved; 2648 * proceed with next one. 2649 */ 2650 if (already_resolved) { 2651 ip2dbg(("ire_multirt_lookup_v6: found cire %p, " 2652 "already resolved\n", (void *)cire)); 2653 continue; 2654 } 2655 2656 /* 2657 * The route is unresolved; is it actually 2658 * resolvable, i.e. is there a cache or a resolver 2659 * for the gateway? 2660 */ 2661 gw_ire = ire_route_lookup_v6(&v6gw, 0, 0, 0, NULL, NULL, 2662 ALL_ZONES, tsl, MATCH_IRE_RECURSIVE | 2663 MATCH_IRE_SECATTR); 2664 2665 ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n", 2666 (void *)gw_ire)); 2667 2668 /* 2669 * This route can be resolved without any call to the 2670 * resolver; if the MULTIRT_CACHEGW flag is set, 2671 * give the top priority to this ire and exit the 2672 * loop. 2673 * This occurs when an resolver reply is processed 2674 * through ip_wput_nondata() 2675 */ 2676 if ((flags & MULTIRT_CACHEGW) && 2677 (gw_ire != NULL) && 2678 (gw_ire->ire_type & IRE_CACHETABLE)) { 2679 /* 2680 * Release the resolver associated to the 2681 * previous candidate best ire, if any. 2682 */ 2683 if (best_cire) { 2684 ire_refrele(best_cire); 2685 ASSERT(best_fire); 2686 } 2687 2688 best_fire = fire; 2689 best_cire = gw_ire; 2690 2691 ip2dbg(("ire_multirt_lookup_v6: found top prio " 2692 "best_fire %p, best_cire %p\n", 2693 (void *)best_fire, (void *)best_cire)); 2694 break; 2695 } 2696 2697 /* 2698 * Compute the time elapsed since our preceding 2699 * attempt to resolve that route. 2700 * If the MULTIRT_USESTAMP flag is set, we take that 2701 * route into account only if this time interval 2702 * exceeds ip_multirt_resolution_interval; 2703 * this prevents us from attempting to resolve a 2704 * broken route upon each sending of a packet. 2705 */ 2706 delta = lbolt - fire->ire_last_used_time; 2707 delta = TICK_TO_MSEC(delta); 2708 2709 res = (boolean_t) 2710 ((delta > ip_multirt_resolution_interval) || 2711 (!(flags & MULTIRT_USESTAMP))); 2712 2713 ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, " 2714 "res %d\n", 2715 (void *)fire, delta, res)); 2716 2717 if (res) { 2718 /* 2719 * A resolver exists for the gateway: save 2720 * the current IRE_HOST ire as a candidate 2721 * best ire. If we later discover that a 2722 * top priority ire exists (i.e. no need to 2723 * call the resolver), then this new ire 2724 * will be preferred to the current one. 2725 */ 2726 if (gw_ire != NULL) { 2727 if (best_fire == NULL) { 2728 ASSERT(best_cire == NULL); 2729 2730 best_fire = fire; 2731 best_cire = gw_ire; 2732 2733 ip2dbg(("ire_multirt_lookup_v6:" 2734 "found candidate " 2735 "best_fire %p, " 2736 "best_cire %p\n", 2737 (void *)best_fire, 2738 (void *)best_cire)); 2739 2740 /* 2741 * If MULTIRT_CACHEGW is not 2742 * set, we ignore the top 2743 * priority ires that can 2744 * be resolved without any 2745 * call to the resolver; 2746 * In that case, there is 2747 * actually no need 2748 * to continue the loop. 2749 */ 2750 if (!(flags & 2751 MULTIRT_CACHEGW)) { 2752 break; 2753 } 2754 continue; 2755 } 2756 } else { 2757 /* 2758 * No resolver for the gateway: the 2759 * route is not resolvable. 2760 * If the MULTIRT_SETSTAMP flag is 2761 * set, we stamp the IRE_HOST ire, 2762 * so we will not select it again 2763 * during this resolution interval. 2764 */ 2765 if (flags & MULTIRT_SETSTAMP) 2766 fire->ire_last_used_time = 2767 lbolt; 2768 } 2769 } 2770 2771 if (gw_ire != NULL) 2772 ire_refrele(gw_ire); 2773 } 2774 } else { /* IN6_IS_ADDR_MULTICAST(&v6dst) */ 2775 2776 for (fire = first_fire; 2777 fire != NULL; 2778 fire = fire->ire_next) { 2779 2780 if (!(fire->ire_flags & RTF_MULTIRT)) 2781 continue; 2782 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2783 continue; 2784 2785 if (fire->ire_gw_secattr != NULL && 2786 tsol_ire_match_gwattr(fire, tsl) != 0) { 2787 continue; 2788 } 2789 2790 already_resolved = B_FALSE; 2791 2792 mutex_enter(&fire->ire_lock); 2793 v6gw = fire->ire_gateway_addr_v6; 2794 mutex_exit(&fire->ire_lock); 2795 2796 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2797 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, tsl, 2798 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE | 2799 MATCH_IRE_SECATTR); 2800 2801 /* No resolver for the gateway; we skip this ire. */ 2802 if (gw_ire == NULL) { 2803 continue; 2804 } 2805 2806 if (first_cire) { 2807 2808 IRB_REFHOLD(cirb); 2809 /* 2810 * For all IRE_CACHE ires for that 2811 * destination. 2812 */ 2813 for (cire = first_cire; 2814 cire != NULL; 2815 cire = cire->ire_next) { 2816 2817 if (!(cire->ire_flags & RTF_MULTIRT)) 2818 continue; 2819 if (!IN6_ARE_ADDR_EQUAL( 2820 &cire->ire_addr_v6, &v6dst)) 2821 continue; 2822 if (cire->ire_marks & 2823 (IRE_MARK_CONDEMNED| 2824 IRE_MARK_HIDDEN)) 2825 continue; 2826 2827 if (cire->ire_gw_secattr != NULL && 2828 tsol_ire_match_gwattr(cire, 2829 tsl) != 0) { 2830 continue; 2831 } 2832 2833 /* 2834 * Cache entries are linked to the 2835 * parent routes using the parent handle 2836 * (ire_phandle). If no cache entry has 2837 * the same handle as fire, fire is 2838 * still unresolved. 2839 */ 2840 ASSERT(cire->ire_phandle != 0); 2841 if (cire->ire_phandle == 2842 fire->ire_phandle) { 2843 already_resolved = B_TRUE; 2844 break; 2845 } 2846 } 2847 IRB_REFRELE(cirb); 2848 } 2849 2850 /* 2851 * This route is already resolved; proceed with 2852 * next one. 2853 */ 2854 if (already_resolved) { 2855 ire_refrele(gw_ire); 2856 continue; 2857 } 2858 2859 /* 2860 * Compute the time elapsed since our preceding 2861 * attempt to resolve that route. 2862 * If the MULTIRT_USESTAMP flag is set, we take 2863 * that route into account only if this time 2864 * interval exceeds ip_multirt_resolution_interval; 2865 * this prevents us from attempting to resolve a 2866 * broken route upon each sending of a packet. 2867 */ 2868 delta = lbolt - fire->ire_last_used_time; 2869 delta = TICK_TO_MSEC(delta); 2870 2871 res = (boolean_t) 2872 ((delta > ip_multirt_resolution_interval) || 2873 (!(flags & MULTIRT_USESTAMP))); 2874 2875 ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, " 2876 "flags %04x, res %d\n", 2877 (void *)fire, delta, flags, res)); 2878 2879 if (res) { 2880 if (best_cire) { 2881 /* 2882 * Release the resolver associated 2883 * to the preceding candidate best 2884 * ire, if any. 2885 */ 2886 ire_refrele(best_cire); 2887 ASSERT(best_fire); 2888 } 2889 best_fire = fire; 2890 best_cire = gw_ire; 2891 continue; 2892 } 2893 2894 ire_refrele(gw_ire); 2895 } 2896 } 2897 2898 if (best_fire) { 2899 IRE_REFHOLD(best_fire); 2900 } 2901 IRB_REFRELE(firb); 2902 2903 /* Release the first IRE_CACHE we initially looked up, if any. */ 2904 if (first_cire) 2905 ire_refrele(first_cire); 2906 2907 /* Found a resolvable route. */ 2908 if (best_fire) { 2909 ASSERT(best_cire); 2910 2911 if (*fire_arg) 2912 ire_refrele(*fire_arg); 2913 if (*ire_arg) 2914 ire_refrele(*ire_arg); 2915 2916 /* 2917 * Update the passed arguments with the 2918 * resolvable multirt route we found 2919 */ 2920 *fire_arg = best_fire; 2921 *ire_arg = best_cire; 2922 2923 ip2dbg(("ire_multirt_lookup_v6: returning B_TRUE, " 2924 "*fire_arg %p, *ire_arg %p\n", 2925 (void *)best_fire, (void *)best_cire)); 2926 2927 return (B_TRUE); 2928 } 2929 2930 ASSERT(best_cire == NULL); 2931 2932 ip2dbg(("ire_multirt_lookup_v6: returning B_FALSE, *fire_arg %p, " 2933 "*ire_arg %p\n", 2934 (void *)*fire_arg, (void *)*ire_arg)); 2935 2936 /* No resolvable route. */ 2937 return (B_FALSE); 2938 } 2939 2940 2941 /* 2942 * Find an IRE_OFFSUBNET IRE entry for the multicast address 'v6dstp' 2943 * that goes through 'ipif'. As a fallback, a route that goes through 2944 * ipif->ipif_ill can be returned. 2945 */ 2946 ire_t * 2947 ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp) 2948 { 2949 ire_t *ire; 2950 ire_t *save_ire = NULL; 2951 ire_t *gw_ire; 2952 irb_t *irb; 2953 in6_addr_t v6gw; 2954 int match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL; 2955 2956 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, NULL, ALL_ZONES, 0, 2957 NULL, MATCH_IRE_DEFAULT); 2958 2959 if (ire == NULL) 2960 return (NULL); 2961 2962 irb = ire->ire_bucket; 2963 ASSERT(irb); 2964 2965 IRB_REFHOLD(irb); 2966 ire_refrele(ire); 2967 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 2968 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) || 2969 (ipif->ipif_zoneid != ire->ire_zoneid && 2970 ire->ire_zoneid != ALL_ZONES)) { 2971 continue; 2972 } 2973 2974 switch (ire->ire_type) { 2975 case IRE_DEFAULT: 2976 case IRE_PREFIX: 2977 case IRE_HOST: 2978 mutex_enter(&ire->ire_lock); 2979 v6gw = ire->ire_gateway_addr_v6; 2980 mutex_exit(&ire->ire_lock); 2981 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2982 IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, 2983 NULL, match_flags); 2984 2985 if (gw_ire != NULL) { 2986 if (save_ire != NULL) { 2987 ire_refrele(save_ire); 2988 } 2989 IRE_REFHOLD(ire); 2990 if (gw_ire->ire_ipif == ipif) { 2991 ire_refrele(gw_ire); 2992 2993 IRB_REFRELE(irb); 2994 return (ire); 2995 } 2996 ire_refrele(gw_ire); 2997 save_ire = ire; 2998 } 2999 break; 3000 case IRE_IF_NORESOLVER: 3001 case IRE_IF_RESOLVER: 3002 if (ire->ire_ipif == ipif) { 3003 if (save_ire != NULL) { 3004 ire_refrele(save_ire); 3005 } 3006 IRE_REFHOLD(ire); 3007 3008 IRB_REFRELE(irb); 3009 return (ire); 3010 } 3011 break; 3012 } 3013 } 3014 IRB_REFRELE(irb); 3015 3016 return (save_ire); 3017 } 3018