1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 /* 32 * This file contains routines that manipulate Internet Routing Entries (IREs). 33 */ 34 #include <sys/types.h> 35 #include <sys/stream.h> 36 #include <sys/stropts.h> 37 #include <sys/ddi.h> 38 #include <sys/cmn_err.h> 39 40 #include <sys/systm.h> 41 #include <sys/param.h> 42 #include <sys/socket.h> 43 #include <net/if.h> 44 #include <net/route.h> 45 #include <netinet/in.h> 46 #include <net/if_dl.h> 47 #include <netinet/ip6.h> 48 #include <netinet/icmp6.h> 49 50 #include <inet/common.h> 51 #include <inet/mi.h> 52 #include <inet/ip.h> 53 #include <inet/ip6.h> 54 #include <inet/ip_ndp.h> 55 #include <inet/ip_if.h> 56 #include <inet/ip_ire.h> 57 #include <inet/ipclassifier.h> 58 #include <inet/nd.h> 59 #include <sys/kmem.h> 60 #include <sys/zone.h> 61 62 #include <sys/tsol/label.h> 63 #include <sys/tsol/tnet.h> 64 65 irb_t *ip_forwarding_table_v6[IP6_MASK_TABLE_SIZE]; 66 /* This is dynamically allocated in ip_ire_init */ 67 irb_t *ip_cache_table_v6; 68 static ire_t ire_null; 69 70 /* Defined in ip_ire.c */ 71 extern uint32_t ip6_cache_table_size; 72 extern uint32_t ip6_ftable_hash_size; 73 74 static ire_t *ire_ihandle_lookup_onlink_v6(ire_t *cire); 75 static void ire_report_ftable_v6(ire_t *ire, char *mp); 76 static void ire_report_ctable_v6(ire_t *ire, char *mp); 77 static boolean_t ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, 78 const in6_addr_t *mask, const in6_addr_t *gateway, int type, 79 const ipif_t *ipif, zoneid_t zoneid, uint32_t ihandle, 80 const ts_label_t *tsl, int match_flags); 81 82 /* 83 * Named Dispatch routine to produce a formatted report on all IREs. 84 * This report is accessed by using the ndd utility to "get" ND variable 85 * "ip_ire_status_v6". 86 */ 87 /* ARGSUSED */ 88 int 89 ip_ire_report_v6(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 90 { 91 zoneid_t zoneid; 92 93 (void) mi_mpprintf(mp, 94 "IRE " MI_COL_HDRPAD_STR 95 "rfq " MI_COL_HDRPAD_STR 96 "stq " MI_COL_HDRPAD_STR 97 " zone mxfrg rtt rtt_sd ssthresh ref " 98 "rtomax tstamp_ok wscale_ok ecn_ok pmtud_ok sack sendpipe recvpipe " 99 "in/out/forward type addr mask " 100 "src gateway"); 101 /* 102 * 01234567 01234567 01234567 12345 12345 12345 12345 12345678 123 103 * 123456 123456789 123456789 123456 12345678 1234 12345678 12345678 104 * in/out/forward xxxxxxxxxx 105 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 106 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 107 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 108 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 109 */ 110 111 /* 112 * Because of the ndd constraint, at most we can have 64K buffer 113 * to put in all IRE info. So to be more efficient, just 114 * allocate a 64K buffer here, assuming we need that large buffer. 115 * This should be OK as only root can do ndd /dev/ip. 116 */ 117 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 118 /* The following may work even if we cannot get a large buf. */ 119 (void) mi_mpprintf(mp, "<< Out of buffer >>\n"); 120 return (0); 121 } 122 zoneid = Q_TO_CONN(q)->conn_zoneid; 123 if (zoneid == GLOBAL_ZONEID) 124 zoneid = ALL_ZONES; 125 126 ire_walk_v6(ire_report_ftable_v6, (char *)mp->b_cont, zoneid); 127 ire_walk_v6(ire_report_ctable_v6, (char *)mp->b_cont, zoneid); 128 return (0); 129 } 130 131 /* 132 * ire_walk routine invoked for ip_ire_report_v6 for each IRE. 133 */ 134 static void 135 ire_report_ftable_v6(ire_t *ire, char *mp) 136 { 137 char buf1[INET6_ADDRSTRLEN]; 138 char buf2[INET6_ADDRSTRLEN]; 139 char buf3[INET6_ADDRSTRLEN]; 140 char buf4[INET6_ADDRSTRLEN]; 141 uint_t fo_pkt_count; 142 uint_t ib_pkt_count; 143 int ref; 144 in6_addr_t gw_addr_v6; 145 uint_t print_len, buf_len; 146 147 ASSERT(ire->ire_ipversion == IPV6_VERSION); 148 if (ire->ire_type & IRE_CACHETABLE) 149 return; 150 buf_len = ((mblk_t *)mp)->b_datap->db_lim - ((mblk_t *)mp)->b_wptr; 151 if (buf_len <= 0) 152 return; 153 154 /* Number of active references of this ire */ 155 ref = ire->ire_refcnt; 156 /* "inbound" to a non local address is a forward */ 157 ib_pkt_count = ire->ire_ib_pkt_count; 158 fo_pkt_count = 0; 159 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 160 if (!(ire->ire_type & (IRE_LOCAL|IRE_BROADCAST))) { 161 fo_pkt_count = ib_pkt_count; 162 ib_pkt_count = 0; 163 } 164 165 mutex_enter(&ire->ire_lock); 166 gw_addr_v6 = ire->ire_gateway_addr_v6; 167 mutex_exit(&ire->ire_lock); 168 169 print_len = snprintf((char *)((mblk_t *)mp)->b_wptr, buf_len, 170 MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR "%5d " 171 "%05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d " 172 "%04d %08d %08d %d/%d/%d %s\n\t%s\n\t%s\n\t%s\n\t%s\n", 173 (void *)ire, (void *)ire->ire_rfq, (void *)ire->ire_stq, 174 (int)ire->ire_zoneid, 175 ire->ire_max_frag, ire->ire_uinfo.iulp_rtt, 176 ire->ire_uinfo.iulp_rtt_sd, 177 ire->ire_uinfo.iulp_ssthresh, ref, 178 ire->ire_uinfo.iulp_rtomax, 179 (ire->ire_uinfo.iulp_tstamp_ok ? 1: 0), 180 (ire->ire_uinfo.iulp_wscale_ok ? 1: 0), 181 (ire->ire_uinfo.iulp_ecn_ok ? 1: 0), 182 (ire->ire_uinfo.iulp_pmtud_ok ? 1: 0), 183 ire->ire_uinfo.iulp_sack, 184 ire->ire_uinfo.iulp_spipe, ire->ire_uinfo.iulp_rpipe, 185 ib_pkt_count, ire->ire_ob_pkt_count, fo_pkt_count, 186 ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type), 187 inet_ntop(AF_INET6, &ire->ire_addr_v6, buf1, sizeof (buf1)), 188 inet_ntop(AF_INET6, &ire->ire_mask_v6, buf2, sizeof (buf2)), 189 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, buf3, sizeof (buf3)), 190 inet_ntop(AF_INET6, &gw_addr_v6, buf4, sizeof (buf4))); 191 if (print_len < buf_len) { 192 ((mblk_t *)mp)->b_wptr += print_len; 193 } else { 194 ((mblk_t *)mp)->b_wptr += buf_len; 195 } 196 } 197 198 /* ire_walk routine invoked for ip_ire_report_v6 for each IRE. */ 199 static void 200 ire_report_ctable_v6(ire_t *ire, char *mp) 201 { 202 char buf1[INET6_ADDRSTRLEN]; 203 char buf2[INET6_ADDRSTRLEN]; 204 char buf3[INET6_ADDRSTRLEN]; 205 char buf4[INET6_ADDRSTRLEN]; 206 uint_t fo_pkt_count; 207 uint_t ib_pkt_count; 208 int ref; 209 in6_addr_t gw_addr_v6; 210 uint_t print_len, buf_len; 211 212 if ((ire->ire_type & IRE_CACHETABLE) == 0) 213 return; 214 buf_len = ((mblk_t *)mp)->b_datap->db_lim - ((mblk_t *)mp)->b_wptr; 215 if (buf_len <= 0) 216 return; 217 218 /* Number of active references of this ire */ 219 ref = ire->ire_refcnt; 220 /* "inbound" to a non local address is a forward */ 221 ib_pkt_count = ire->ire_ib_pkt_count; 222 fo_pkt_count = 0; 223 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 224 if (ire->ire_type & IRE_LOCAL) { 225 fo_pkt_count = ib_pkt_count; 226 ib_pkt_count = 0; 227 } 228 229 mutex_enter(&ire->ire_lock); 230 gw_addr_v6 = ire->ire_gateway_addr_v6; 231 mutex_exit(&ire->ire_lock); 232 233 print_len = snprintf((char *)((mblk_t *)mp)->b_wptr, buf_len, 234 MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR "%5d " 235 "%05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d " 236 "%04d %08d %08d %d/%d/%d %s\n\t%s\n\t%s\n\t%s\n\t%s\n", 237 (void *)ire, (void *)ire->ire_rfq, (void *)ire->ire_stq, 238 (int)ire->ire_zoneid, 239 ire->ire_max_frag, ire->ire_uinfo.iulp_rtt, 240 ire->ire_uinfo.iulp_rtt_sd, ire->ire_uinfo.iulp_ssthresh, ref, 241 ire->ire_uinfo.iulp_rtomax, 242 (ire->ire_uinfo.iulp_tstamp_ok ? 1: 0), 243 (ire->ire_uinfo.iulp_wscale_ok ? 1: 0), 244 (ire->ire_uinfo.iulp_ecn_ok ? 1: 0), 245 (ire->ire_uinfo.iulp_pmtud_ok ? 1: 0), 246 ire->ire_uinfo.iulp_sack, 247 ire->ire_uinfo.iulp_spipe, ire->ire_uinfo.iulp_rpipe, 248 ib_pkt_count, ire->ire_ob_pkt_count, 249 fo_pkt_count, ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type), 250 inet_ntop(AF_INET6, &ire->ire_addr_v6, buf1, sizeof (buf1)), 251 inet_ntop(AF_INET6, &ire->ire_mask_v6, buf2, sizeof (buf2)), 252 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, buf3, sizeof (buf3)), 253 inet_ntop(AF_INET6, &gw_addr_v6, buf4, sizeof (buf4))); 254 if (print_len < buf_len) { 255 ((mblk_t *)mp)->b_wptr += print_len; 256 } else { 257 ((mblk_t *)mp)->b_wptr += buf_len; 258 } 259 } 260 261 262 /* 263 * Initialize the ire that is specific to IPv6 part and call 264 * ire_init_common to finish it. 265 */ 266 ire_t * 267 ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, 268 const in6_addr_t *v6mask, const in6_addr_t *v6src_addr, 269 const in6_addr_t *v6gateway, uint_t *max_fragp, 270 mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, 271 mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask, 272 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 273 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp) 274 { 275 /* 276 * Reject IRE security attribute creation/initialization 277 * if system is not running in Trusted mode. 278 */ 279 if ((gc != NULL || gcgrp != NULL) && !is_system_labeled()) 280 return (NULL); 281 282 if (fp_mp != NULL) { 283 /* 284 * We can't dupb() here as multiple threads could be 285 * calling dupb on the same mp which is incorrect. 286 * First dupb() should be called only by one thread. 287 */ 288 fp_mp = copyb(fp_mp); 289 if (fp_mp == NULL) 290 return (NULL); 291 } 292 293 if (dlureq_mp != NULL) { 294 /* 295 * We can't dupb() here as multiple threads could be 296 * calling dupb on the same mp which is incorrect. 297 * First dupb() should be called only by one thread. 298 */ 299 dlureq_mp = copyb(dlureq_mp); 300 if (dlureq_mp == NULL) { 301 if (fp_mp != NULL) 302 freeb(fp_mp); 303 return (NULL); 304 } 305 } 306 307 BUMP_IRE_STATS(ire_stats_v6, ire_stats_alloced); 308 ire->ire_addr_v6 = *v6addr; 309 310 if (v6src_addr != NULL) 311 ire->ire_src_addr_v6 = *v6src_addr; 312 if (v6mask != NULL) { 313 ire->ire_mask_v6 = *v6mask; 314 ire->ire_masklen = ip_mask_to_plen_v6(&ire->ire_mask_v6); 315 } 316 if (v6gateway != NULL) 317 ire->ire_gateway_addr_v6 = *v6gateway; 318 319 if (type == IRE_CACHE && v6cmask != NULL) 320 ire->ire_cmask_v6 = *v6cmask; 321 322 /* 323 * Multirouted packets need to have a fragment header added so that 324 * the receiver is able to discard duplicates according to their 325 * fragment identifier. 326 */ 327 if (type == IRE_CACHE && (flags & RTF_MULTIRT)) { 328 ire->ire_frag_flag = IPH_FRAG_HDR; 329 } 330 331 /* ire_init_common will free the mblks upon encountering any failure */ 332 if (!ire_init_common(ire, max_fragp, fp_mp, rfq, stq, type, dlureq_mp, 333 ipif, NULL, phandle, ihandle, flags, IPV6_VERSION, ulp_info, 334 gc, gcgrp)) 335 return (NULL); 336 337 return (ire); 338 } 339 340 /* 341 * Similar to ire_create_v6 except that it is called only when 342 * we want to allocate ire as an mblk e.g. we have a external 343 * resolver. Do we need this in IPv6 ? 344 */ 345 ire_t * 346 ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 347 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 348 mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, 349 mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask, 350 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 351 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp) 352 { 353 ire_t *ire; 354 ire_t *ret_ire; 355 mblk_t *mp; 356 357 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 358 359 /* Allocate the new IRE. */ 360 mp = allocb(sizeof (ire_t), BPRI_MED); 361 if (mp == NULL) { 362 ip1dbg(("ire_create_mp_v6: alloc failed\n")); 363 return (NULL); 364 } 365 366 ire = (ire_t *)mp->b_rptr; 367 mp->b_wptr = (uchar_t *)&ire[1]; 368 369 /* Start clean. */ 370 *ire = ire_null; 371 ire->ire_mp = mp; 372 mp->b_datap->db_type = IRE_DB_TYPE; 373 374 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 375 NULL, fp_mp, rfq, stq, type, dlureq_mp, ipif, v6cmask, phandle, 376 ihandle, flags, ulp_info, gc, gcgrp); 377 378 if (ret_ire == NULL) { 379 freeb(ire->ire_mp); 380 return (NULL); 381 } 382 return (ire); 383 } 384 385 /* 386 * ire_create_v6 is called to allocate and initialize a new IRE. 387 * 388 * NOTE : This is called as writer sometimes though not required 389 * by this function. 390 */ 391 ire_t * 392 ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 393 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 394 uint_t *max_fragp, mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, 395 mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask, 396 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 397 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp) 398 { 399 ire_t *ire; 400 ire_t *ret_ire; 401 402 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 403 404 ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP); 405 if (ire == NULL) { 406 ip1dbg(("ire_create_v6: alloc failed\n")); 407 return (NULL); 408 } 409 *ire = ire_null; 410 411 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 412 max_fragp, fp_mp, rfq, stq, type, dlureq_mp, ipif, v6cmask, phandle, 413 ihandle, flags, ulp_info, gc, gcgrp); 414 415 if (ret_ire == NULL) { 416 kmem_cache_free(ire_cache, ire); 417 return (NULL); 418 } 419 ASSERT(ret_ire == ire); 420 return (ire); 421 } 422 423 /* 424 * Find an IRE_INTERFACE for the multicast group. 425 * Allows different routes for multicast addresses 426 * in the unicast routing table (akin to FF::0/8 but could be more specific) 427 * which point at different interfaces. This is used when IPV6_MULTICAST_IF 428 * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't 429 * specify the interface to join on. 430 * 431 * Supports link-local addresses by following the ipif/ill when recursing. 432 */ 433 ire_t * 434 ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid) 435 { 436 ire_t *ire; 437 ipif_t *ipif = NULL; 438 int match_flags = MATCH_IRE_TYPE; 439 in6_addr_t gw_addr_v6; 440 441 ire = ire_ftable_lookup_v6(group, 0, 0, 0, NULL, NULL, 442 zoneid, 0, NULL, MATCH_IRE_DEFAULT); 443 444 /* We search a resolvable ire in case of multirouting. */ 445 if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) { 446 ire_t *cire = NULL; 447 /* 448 * If the route is not resolvable, the looked up ire 449 * may be changed here. In that case, ire_multirt_lookup() 450 * IRE_REFRELE the original ire and change it. 451 */ 452 (void) ire_multirt_lookup_v6(&cire, &ire, MULTIRT_CACHEGW, 453 NULL); 454 if (cire != NULL) 455 ire_refrele(cire); 456 } 457 if (ire == NULL) 458 return (NULL); 459 /* 460 * Make sure we follow ire_ipif. 461 * 462 * We need to determine the interface route through 463 * which the gateway will be reached. We don't really 464 * care which interface is picked if the interface is 465 * part of a group. 466 */ 467 if (ire->ire_ipif != NULL) { 468 ipif = ire->ire_ipif; 469 match_flags |= MATCH_IRE_ILL_GROUP; 470 } 471 472 switch (ire->ire_type) { 473 case IRE_DEFAULT: 474 case IRE_PREFIX: 475 case IRE_HOST: 476 mutex_enter(&ire->ire_lock); 477 gw_addr_v6 = ire->ire_gateway_addr_v6; 478 mutex_exit(&ire->ire_lock); 479 ire_refrele(ire); 480 ire = ire_ftable_lookup_v6(&gw_addr_v6, 0, 0, 481 IRE_INTERFACE, ipif, NULL, zoneid, 0, 482 NULL, match_flags); 483 return (ire); 484 case IRE_IF_NORESOLVER: 485 case IRE_IF_RESOLVER: 486 return (ire); 487 default: 488 ire_refrele(ire); 489 return (NULL); 490 } 491 } 492 493 /* 494 * Return any local address. We use this to target ourselves 495 * when the src address was specified as 'default'. 496 * Preference for IRE_LOCAL entries. 497 */ 498 ire_t * 499 ire_lookup_local_v6(zoneid_t zoneid) 500 { 501 ire_t *ire; 502 irb_t *irb; 503 ire_t *maybe = NULL; 504 int i; 505 506 for (i = 0; i < ip6_cache_table_size; i++) { 507 irb = &ip_cache_table_v6[i]; 508 if (irb->irb_ire == NULL) 509 continue; 510 rw_enter(&irb->irb_lock, RW_READER); 511 for (ire = irb->irb_ire; ire; ire = ire->ire_next) { 512 if ((ire->ire_marks & IRE_MARK_CONDEMNED) || 513 ire->ire_zoneid != zoneid && 514 ire->ire_zoneid != ALL_ZONES) 515 continue; 516 switch (ire->ire_type) { 517 case IRE_LOOPBACK: 518 if (maybe == NULL) { 519 IRE_REFHOLD(ire); 520 maybe = ire; 521 } 522 break; 523 case IRE_LOCAL: 524 if (maybe != NULL) { 525 ire_refrele(maybe); 526 } 527 IRE_REFHOLD(ire); 528 rw_exit(&irb->irb_lock); 529 return (ire); 530 } 531 } 532 rw_exit(&irb->irb_lock); 533 } 534 return (maybe); 535 } 536 537 /* 538 * This function takes a mask and returns number of bits set in the 539 * mask (the represented prefix length). Assumes a contiguous mask. 540 */ 541 int 542 ip_mask_to_plen_v6(const in6_addr_t *v6mask) 543 { 544 int bits; 545 int plen = IPV6_ABITS; 546 int i; 547 548 for (i = 3; i >= 0; i--) { 549 if (v6mask->s6_addr32[i] == 0) { 550 plen -= 32; 551 continue; 552 } 553 bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1; 554 if (bits == 0) 555 break; 556 plen -= bits; 557 } 558 559 return (plen); 560 } 561 562 /* 563 * Convert a prefix length to the mask for that prefix. 564 * Returns the argument bitmask. 565 */ 566 in6_addr_t * 567 ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask) 568 { 569 uint32_t *ptr; 570 571 if (plen < 0 || plen > IPV6_ABITS) 572 return (NULL); 573 *bitmask = ipv6_all_zeros; 574 575 ptr = (uint32_t *)bitmask; 576 while (plen > 32) { 577 *ptr++ = 0xffffffffU; 578 plen -= 32; 579 } 580 *ptr = htonl(0xffffffffU << (32 - plen)); 581 return (bitmask); 582 } 583 584 /* 585 * Add a fully initialized IRE to an appropriate 586 * table based on ire_type. 587 * 588 * The forward table contains IRE_PREFIX/IRE_HOST/IRE_HOST_REDIRECT 589 * IRE_IF_RESOLVER/IRE_IF_NORESOLVER and IRE_DEFAULT. 590 * 591 * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK 592 * and IRE_CACHE. 593 * 594 * NOTE : This function is called as writer though not required 595 * by this function. 596 */ 597 int 598 ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) 599 { 600 ire_t *ire1; 601 int mask_table_index; 602 irb_t *irb_ptr; 603 ire_t **irep; 604 int flags; 605 ire_t *pire = NULL; 606 ill_t *stq_ill; 607 boolean_t ndp_g_lock_held = B_FALSE; 608 ire_t *ire = *ire_p; 609 int error; 610 611 ASSERT(ire->ire_ipversion == IPV6_VERSION); 612 ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */ 613 ASSERT(ire->ire_nce == NULL); 614 615 /* Find the appropriate list head. */ 616 switch (ire->ire_type) { 617 case IRE_HOST: 618 ire->ire_mask_v6 = ipv6_all_ones; 619 ire->ire_masklen = IPV6_ABITS; 620 if ((ire->ire_flags & RTF_SETSRC) == 0) 621 ire->ire_src_addr_v6 = ipv6_all_zeros; 622 break; 623 case IRE_HOST_REDIRECT: 624 ire->ire_mask_v6 = ipv6_all_ones; 625 ire->ire_masklen = IPV6_ABITS; 626 ire->ire_src_addr_v6 = ipv6_all_zeros; 627 break; 628 case IRE_CACHE: 629 case IRE_LOCAL: 630 case IRE_LOOPBACK: 631 ire->ire_mask_v6 = ipv6_all_ones; 632 ire->ire_masklen = IPV6_ABITS; 633 break; 634 case IRE_PREFIX: 635 if ((ire->ire_flags & RTF_SETSRC) == 0) 636 ire->ire_src_addr_v6 = ipv6_all_zeros; 637 break; 638 case IRE_DEFAULT: 639 if ((ire->ire_flags & RTF_SETSRC) == 0) 640 ire->ire_src_addr_v6 = ipv6_all_zeros; 641 break; 642 case IRE_IF_RESOLVER: 643 case IRE_IF_NORESOLVER: 644 break; 645 default: 646 printf("ire_add_v6: ire %p has unrecognized IRE type (%d)\n", 647 (void *)ire, ire->ire_type); 648 ire_delete(ire); 649 *ire_p = NULL; 650 return (EINVAL); 651 } 652 653 /* Make sure the address is properly masked. */ 654 V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6); 655 656 if ((ire->ire_type & IRE_CACHETABLE) == 0) { 657 /* IRE goes into Forward Table */ 658 mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6); 659 if ((ip_forwarding_table_v6[mask_table_index]) == NULL) { 660 irb_t *ptr; 661 int i; 662 663 ptr = (irb_t *)mi_zalloc((ip6_ftable_hash_size * 664 sizeof (irb_t))); 665 if (ptr == NULL) { 666 ire_delete(ire); 667 *ire_p = NULL; 668 return (ENOMEM); 669 } 670 for (i = 0; i < ip6_ftable_hash_size; i++) { 671 rw_init(&ptr[i].irb_lock, NULL, 672 RW_DEFAULT, NULL); 673 } 674 mutex_enter(&ire_ft_init_lock); 675 if (ip_forwarding_table_v6[mask_table_index] == NULL) { 676 ip_forwarding_table_v6[mask_table_index] = ptr; 677 mutex_exit(&ire_ft_init_lock); 678 } else { 679 /* 680 * Some other thread won the race in 681 * initializing the forwarding table at the 682 * same index. 683 */ 684 mutex_exit(&ire_ft_init_lock); 685 for (i = 0; i < ip6_ftable_hash_size; i++) { 686 rw_destroy(&ptr[i].irb_lock); 687 } 688 mi_free(ptr); 689 } 690 } 691 irb_ptr = &(ip_forwarding_table_v6[mask_table_index][ 692 IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6, 693 ip6_ftable_hash_size)]); 694 } else { 695 irb_ptr = &(ip_cache_table_v6[IRE_ADDR_HASH_V6( 696 ire->ire_addr_v6, ip6_cache_table_size)]); 697 } 698 /* 699 * For xresolv interfaces (v6 interfaces with an external 700 * address resolver), ip_newroute_v6/ip_newroute_ipif_v6 701 * are unable to prevent the deletion of the interface route 702 * while adding an IRE_CACHE for an on-link destination 703 * in the IRE_IF_RESOLVER case, since the ire has to go to 704 * the external resolver and return. We can't do a REFHOLD on the 705 * associated interface ire for fear of the message being freed 706 * if the external resolver can't resolve the address. 707 * Here we look up the interface ire in the forwarding table 708 * and make sure that the interface route has not been deleted. 709 */ 710 if (ire->ire_type == IRE_CACHE && 711 IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6) && 712 (((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) && 713 (((ill_t *)ire->ire_stq->q_ptr)->ill_flags & ILLF_XRESOLV)) { 714 715 pire = ire_ihandle_lookup_onlink_v6(ire); 716 if (pire == NULL) { 717 ire_delete(ire); 718 *ire_p = NULL; 719 return (EINVAL); 720 } 721 /* Prevent pire from getting deleted */ 722 IRB_REFHOLD(pire->ire_bucket); 723 /* Has it been removed already? */ 724 if (pire->ire_marks & IRE_MARK_CONDEMNED) { 725 IRB_REFRELE(pire->ire_bucket); 726 ire_refrele(pire); 727 ire_delete(ire); 728 *ire_p = NULL; 729 return (EINVAL); 730 } 731 } 732 733 flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW); 734 /* 735 * For IRE_CACHES, MATCH_IRE_IPIF is not enough to check 736 * for duplicates because : 737 * 738 * 1) ire_ipif->ipif_ill and ire_stq->q_ptr could be 739 * pointing at different ills. A real duplicate is 740 * a match on both ire_ipif and ire_stq. 741 * 742 * 2) We could have multiple packets trying to create 743 * an IRE_CACHE for the same ill. 744 * 745 * Moreover, IPIF_NOFAILOVER and IPV6_BOUND_PIF endpoints wants 746 * to go out on a particular ill. Rather than looking at the 747 * packet, we depend on the above for MATCH_IRE_ILL here. 748 * 749 * Unlike IPv4, MATCH_IRE_IPIF is needed here as we could have 750 * multiple IRE_CACHES for an ill for the same destination 751 * with various scoped addresses i.e represented by ipifs. 752 * 753 * MATCH_IRE_ILL is done implicitly below for IRE_CACHES. 754 */ 755 if (ire->ire_ipif != NULL) 756 flags |= MATCH_IRE_IPIF; 757 /* 758 * If we are creating hidden ires, make sure we search on 759 * this ill (MATCH_IRE_ILL) and a hidden ire, while we are 760 * searching for duplicates below. Otherwise we could 761 * potentially find an IRE on some other interface 762 * and it may not be a IRE marked with IRE_MARK_HIDDEN. We 763 * shouldn't do this as this will lead to an infinite loop as 764 * eventually we need an hidden ire for this packet to go 765 * out. MATCH_IRE_ILL is already marked above. 766 */ 767 if (ire->ire_marks & IRE_MARK_HIDDEN) { 768 ASSERT(ire->ire_type == IRE_CACHE); 769 flags |= MATCH_IRE_MARK_HIDDEN; 770 } 771 772 /* 773 * Start the atomic add of the ire. Grab the ill locks, 774 * ill_g_usesrc_lock and the bucket lock. Check for condemned. 775 * To avoid lock order problems, get the ndp_g_lock now itself. 776 */ 777 if (ire->ire_type == IRE_CACHE) { 778 mutex_enter(&ndp_g_lock); 779 ndp_g_lock_held = B_TRUE; 780 } 781 782 /* 783 * If ipif or ill is changing ire_atomic_start() may queue the 784 * request and return EINPROGRESS. 785 */ 786 787 error = ire_atomic_start(irb_ptr, ire, q, mp, func); 788 if (error != 0) { 789 if (ndp_g_lock_held) 790 mutex_exit(&ndp_g_lock); 791 /* 792 * We don't know whether it is a valid ipif or not. 793 * So, set it to NULL. This assumes that the ire has not added 794 * a reference to the ipif. 795 */ 796 ire->ire_ipif = NULL; 797 ire_delete(ire); 798 if (pire != NULL) { 799 IRB_REFRELE(pire->ire_bucket); 800 ire_refrele(pire); 801 } 802 *ire_p = NULL; 803 return (error); 804 } 805 /* 806 * To avoid creating ires having stale values for the ire_max_frag 807 * we get the latest value atomically here. For more details 808 * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE 809 * in ip_rput_dlpi_writer 810 */ 811 if (ire->ire_max_fragp == NULL) { 812 if (IN6_IS_ADDR_MULTICAST(&ire->ire_addr_v6)) 813 ire->ire_max_frag = ire->ire_ipif->ipif_mtu; 814 else 815 ire->ire_max_frag = pire->ire_max_frag; 816 } else { 817 uint_t max_frag; 818 819 max_frag = *ire->ire_max_fragp; 820 ire->ire_max_fragp = NULL; 821 ire->ire_max_frag = max_frag; 822 } 823 824 /* 825 * Atomically check for duplicate and insert in the table. 826 */ 827 for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 828 if (ire1->ire_marks & IRE_MARK_CONDEMNED) 829 continue; 830 831 if (ire->ire_type == IRE_CACHE) { 832 /* 833 * We do MATCH_IRE_ILL implicitly here for IRE_CACHES. 834 * As ire_ipif and ire_stq could point to two 835 * different ills, we can't pass just ire_ipif to 836 * ire_match_args and get a match on both ills. 837 * This is just needed for duplicate checks here and 838 * so we don't add an extra argument to 839 * ire_match_args for this. Do it locally. 840 * 841 * NOTE : Currently there is no part of the code 842 * that asks for both MATH_IRE_IPIF and MATCH_IRE_ILL 843 * match for IRE_CACHEs. Thus we don't want to 844 * extend the arguments to ire_match_args_v6. 845 */ 846 if (ire1->ire_stq != ire->ire_stq) 847 continue; 848 /* 849 * Multiroute IRE_CACHEs for a given destination can 850 * have the same ire_ipif, typically if their source 851 * address is forced using RTF_SETSRC, and the same 852 * send-to queue. We differentiate them using the parent 853 * handle. 854 */ 855 if ((ire1->ire_flags & RTF_MULTIRT) && 856 (ire->ire_flags & RTF_MULTIRT) && 857 (ire1->ire_phandle != ire->ire_phandle)) 858 continue; 859 } 860 if (ire1->ire_zoneid != ire->ire_zoneid) 861 continue; 862 if (ire_match_args_v6(ire1, &ire->ire_addr_v6, 863 &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, 864 ire->ire_type, ire->ire_ipif, ire->ire_zoneid, 0, NULL, 865 flags)) { 866 /* 867 * Return the old ire after doing a REFHOLD. 868 * As most of the callers continue to use the IRE 869 * after adding, we return a held ire. This will 870 * avoid a lookup in the caller again. If the callers 871 * don't want to use it, they need to do a REFRELE. 872 */ 873 ip1dbg(("found dup ire existing %p new %p", 874 (void *)ire1, (void *)ire)); 875 IRE_REFHOLD(ire1); 876 if (ndp_g_lock_held) 877 mutex_exit(&ndp_g_lock); 878 ire_atomic_end(irb_ptr, ire); 879 ire_delete(ire); 880 if (pire != NULL) { 881 /* 882 * Assert that it is 883 * not yet removed from the list. 884 */ 885 ASSERT(pire->ire_ptpn != NULL); 886 IRB_REFRELE(pire->ire_bucket); 887 ire_refrele(pire); 888 } 889 *ire_p = ire1; 890 return (0); 891 } 892 } 893 if (ire->ire_type == IRE_CACHE) { 894 in6_addr_t gw_addr_v6; 895 ill_t *ill = ire_to_ill(ire); 896 char buf[INET6_ADDRSTRLEN]; 897 nce_t *nce; 898 899 /* 900 * All IRE_CACHE types must have a nce. If this is 901 * not the case the entry will not be added. We need 902 * to make sure that if somebody deletes the nce 903 * after we looked up, they will find this ire and 904 * delete the ire. To delete this ire one needs the 905 * bucket lock which we are still holding here. So, 906 * even if the nce gets deleted after we looked up, 907 * this ire will get deleted. 908 * 909 * NOTE : Don't need the ire_lock for accessing 910 * ire_gateway_addr_v6 as it is appearing first 911 * time on the list and rts_setgwr_v6 could not 912 * be changing this. 913 */ 914 gw_addr_v6 = ire->ire_gateway_addr_v6; 915 if (IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 916 nce = ndp_lookup(ill, &ire->ire_addr_v6, B_TRUE); 917 } else { 918 nce = ndp_lookup(ill, &gw_addr_v6, B_TRUE); 919 } 920 if (nce == NULL) 921 goto failed; 922 923 /* Pair of refhold, refrele just to get the tracing right */ 924 NCE_REFHOLD_NOTR(nce); 925 NCE_REFRELE(nce); 926 /* 927 * Atomically make sure that new IREs don't point 928 * to an NCE that is logically deleted (CONDEMNED). 929 * ndp_delete() first marks the NCE CONDEMNED. 930 * This ensures that the nce_refcnt won't increase 931 * due to new nce_lookups or due to addition of new IREs 932 * pointing to this NCE. Then ndp_delete() cleans up 933 * existing references. If we don't do it atomically here, 934 * ndp_delete() -> nce_ire_delete() will not be able to 935 * clean up the IRE list completely, and the nce_refcnt 936 * won't go down to zero. 937 */ 938 mutex_enter(&nce->nce_lock); 939 if (ill->ill_flags & ILLF_XRESOLV) { 940 /* 941 * If we used an external resolver, we may not 942 * have gone through neighbor discovery to get here. 943 * Must update the nce_state before the next check. 944 */ 945 if (nce->nce_state == ND_INCOMPLETE) 946 nce->nce_state = ND_REACHABLE; 947 } 948 if (nce->nce_state == ND_INCOMPLETE || 949 (nce->nce_flags & NCE_F_CONDEMNED) || 950 (nce->nce_state == ND_UNREACHABLE)) { 951 failed: 952 if (ndp_g_lock_held) 953 mutex_exit(&ndp_g_lock); 954 if (nce != NULL) 955 mutex_exit(&nce->nce_lock); 956 ire_atomic_end(irb_ptr, ire); 957 ip1dbg(("ire_add_v6: No nce for dst %s \n", 958 inet_ntop(AF_INET6, &ire->ire_addr_v6, 959 buf, sizeof (buf)))); 960 ire_delete(ire); 961 if (pire != NULL) { 962 /* 963 * Assert that it is 964 * not yet removed from the list. 965 */ 966 ASSERT(pire->ire_ptpn != NULL); 967 IRB_REFRELE(pire->ire_bucket); 968 ire_refrele(pire); 969 } 970 if (nce != NULL) 971 NCE_REFRELE_NOTR(nce); 972 *ire_p = NULL; 973 return (EINVAL); 974 } else { 975 ire->ire_nce = nce; 976 } 977 mutex_exit(&nce->nce_lock); 978 } 979 /* 980 * Find the first entry that matches ire_addr - provides 981 * tail insertion. *irep will be null if no match. 982 */ 983 irep = (ire_t **)irb_ptr; 984 while ((ire1 = *irep) != NULL && 985 !IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6)) 986 irep = &ire1->ire_next; 987 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 988 989 if (*irep != NULL) { 990 /* 991 * Find the last ire which matches ire_addr_v6. 992 * Needed to do tail insertion among entries with the same 993 * ire_addr_v6. 994 */ 995 while (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 996 &ire1->ire_addr_v6)) { 997 irep = &ire1->ire_next; 998 ire1 = *irep; 999 if (ire1 == NULL) 1000 break; 1001 } 1002 } 1003 1004 if (ire->ire_type == IRE_DEFAULT) { 1005 /* 1006 * We keep a count of default gateways which is used when 1007 * assigning them as routes. 1008 */ 1009 ipv6_ire_default_count++; 1010 ASSERT(ipv6_ire_default_count != 0); /* Wraparound */ 1011 } 1012 /* Insert at *irep */ 1013 ire1 = *irep; 1014 if (ire1 != NULL) 1015 ire1->ire_ptpn = &ire->ire_next; 1016 ire->ire_next = ire1; 1017 /* Link the new one in. */ 1018 ire->ire_ptpn = irep; 1019 /* 1020 * ire_walk routines de-reference ire_next without holding 1021 * a lock. Before we point to the new ire, we want to make 1022 * sure the store that sets the ire_next of the new ire 1023 * reaches global visibility, so that ire_walk routines 1024 * don't see a truncated list of ires i.e if the ire_next 1025 * of the new ire gets set after we do "*irep = ire" due 1026 * to re-ordering, the ire_walk thread will see a NULL 1027 * once it accesses the ire_next of the new ire. 1028 * membar_producer() makes sure that the following store 1029 * happens *after* all of the above stores. 1030 */ 1031 membar_producer(); 1032 *irep = ire; 1033 ire->ire_bucket = irb_ptr; 1034 /* 1035 * We return a bumped up IRE above. Keep it symmetrical 1036 * so that the callers will always have to release. This 1037 * helps the callers of this function because they continue 1038 * to use the IRE after adding and hence they don't have to 1039 * lookup again after we return the IRE. 1040 * 1041 * NOTE : We don't have to use atomics as this is appearing 1042 * in the list for the first time and no one else can bump 1043 * up the reference count on this yet. 1044 */ 1045 IRE_REFHOLD_LOCKED(ire); 1046 BUMP_IRE_STATS(ire_stats_v6, ire_stats_inserted); 1047 irb_ptr->irb_ire_cnt++; 1048 if (ire->ire_marks & IRE_MARK_TEMPORARY) 1049 irb_ptr->irb_tmp_ire_cnt++; 1050 1051 if (ire->ire_ipif != NULL) { 1052 ire->ire_ipif->ipif_ire_cnt++; 1053 if (ire->ire_stq != NULL) { 1054 stq_ill = (ill_t *)ire->ire_stq->q_ptr; 1055 stq_ill->ill_ire_cnt++; 1056 } 1057 } else { 1058 ASSERT(ire->ire_stq == NULL); 1059 } 1060 1061 if (ndp_g_lock_held) 1062 mutex_exit(&ndp_g_lock); 1063 ire_atomic_end(irb_ptr, ire); 1064 1065 if (pire != NULL) { 1066 /* Assert that it is not removed from the list yet */ 1067 ASSERT(pire->ire_ptpn != NULL); 1068 IRB_REFRELE(pire->ire_bucket); 1069 ire_refrele(pire); 1070 } 1071 1072 if (ire->ire_type != IRE_CACHE) { 1073 /* 1074 * For ire's with with host mask see if there is an entry 1075 * in the cache. If there is one flush the whole cache as 1076 * there might be multiple entries due to RTF_MULTIRT (CGTP). 1077 * If no entry is found than there is no need to flush the 1078 * cache. 1079 */ 1080 1081 if (ip_mask_to_plen_v6(&ire->ire_mask_v6) == IPV6_ABITS) { 1082 ire_t *lire; 1083 lire = ire_ctable_lookup_v6(&ire->ire_addr_v6, NULL, 1084 IRE_CACHE, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); 1085 if (lire != NULL) { 1086 ire_refrele(lire); 1087 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 1088 } 1089 } else { 1090 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 1091 } 1092 } 1093 1094 *ire_p = ire; 1095 return (0); 1096 } 1097 1098 /* 1099 * Search for all HOST REDIRECT routes that are 1100 * pointing at the specified gateway and 1101 * delete them. This routine is called only 1102 * when a default gateway is going away. 1103 */ 1104 static void 1105 ire_delete_host_redirects_v6(const in6_addr_t *gateway) 1106 { 1107 irb_t *irb_ptr; 1108 irb_t *irb; 1109 ire_t *ire; 1110 in6_addr_t gw_addr_v6; 1111 int i; 1112 1113 /* get the hash table for HOST routes */ 1114 irb_ptr = ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)]; 1115 if (irb_ptr == NULL) 1116 return; 1117 for (i = 0; (i < ip6_ftable_hash_size); i++) { 1118 irb = &irb_ptr[i]; 1119 IRB_REFHOLD(irb); 1120 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 1121 if (ire->ire_type != IRE_HOST_REDIRECT) 1122 continue; 1123 mutex_enter(&ire->ire_lock); 1124 gw_addr_v6 = ire->ire_gateway_addr_v6; 1125 mutex_exit(&ire->ire_lock); 1126 if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) 1127 ire_delete(ire); 1128 } 1129 IRB_REFRELE(irb); 1130 } 1131 } 1132 1133 /* 1134 * Delete all the cache entries with this 'addr'. This is the IPv6 counterpart 1135 * of ip_ire_clookup_and_delete. The difference being this function does not 1136 * return any value. IPv6 processing of a gratuitous ARP, as it stands, is 1137 * different than IPv4 in that, regardless of the presence of a cache entry 1138 * for this address, an ire_walk_v6 is done. Another difference is that unlike 1139 * in the case of IPv4 this does not take an ipif_t argument, since it is only 1140 * called by ip_arp_news and the match is always only on the address. 1141 */ 1142 void 1143 ip_ire_clookup_and_delete_v6(const in6_addr_t *addr) 1144 { 1145 irb_t *irb; 1146 ire_t *cire; 1147 boolean_t found = B_FALSE; 1148 1149 irb = &ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, ip6_cache_table_size)]; 1150 IRB_REFHOLD(irb); 1151 for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) { 1152 if (cire->ire_marks == IRE_MARK_CONDEMNED) 1153 continue; 1154 if (IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, addr)) { 1155 1156 /* This signifies start of a match */ 1157 if (!found) 1158 found = B_TRUE; 1159 if (cire->ire_type == IRE_CACHE) { 1160 if (cire->ire_nce != NULL) 1161 ndp_delete(cire->ire_nce); 1162 ire_delete_v6(cire); 1163 } 1164 /* End of the match */ 1165 } else if (found) 1166 break; 1167 } 1168 IRB_REFRELE(irb); 1169 } 1170 1171 /* 1172 * Delete the specified IRE. 1173 * All calls should use ire_delete(). 1174 * Sometimes called as writer though not required by this function. 1175 * 1176 * NOTE : This function is called only if the ire was added 1177 * in the list. 1178 */ 1179 void 1180 ire_delete_v6(ire_t *ire) 1181 { 1182 in6_addr_t gw_addr_v6; 1183 1184 ASSERT(ire->ire_refcnt >= 1); 1185 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1186 1187 if (ire->ire_type != IRE_CACHE) 1188 ire_flush_cache_v6(ire, IRE_FLUSH_DELETE); 1189 if (ire->ire_type == IRE_DEFAULT) { 1190 /* 1191 * when a default gateway is going away 1192 * delete all the host redirects pointing at that 1193 * gateway. 1194 */ 1195 mutex_enter(&ire->ire_lock); 1196 gw_addr_v6 = ire->ire_gateway_addr_v6; 1197 mutex_exit(&ire->ire_lock); 1198 ire_delete_host_redirects_v6(&gw_addr_v6); 1199 } 1200 } 1201 1202 /* 1203 * ire_walk routine to delete all IRE_CACHE and IRE_HOST_REDIRECT 1204 * entries. 1205 */ 1206 /*ARGSUSED1*/ 1207 void 1208 ire_delete_cache_v6(ire_t *ire, char *arg) 1209 { 1210 char addrstr1[INET6_ADDRSTRLEN]; 1211 char addrstr2[INET6_ADDRSTRLEN]; 1212 1213 if (ire->ire_type & (IRE_CACHE | IRE_HOST_REDIRECT)) { 1214 ip1dbg(("ire_delete_cache_v6: deleted %s type %d through %s\n", 1215 inet_ntop(AF_INET6, &ire->ire_addr_v6, 1216 addrstr1, sizeof (addrstr1)), 1217 ire->ire_type, 1218 inet_ntop(AF_INET6, &ire->ire_gateway_addr_v6, 1219 addrstr2, sizeof (addrstr2)))); 1220 ire_delete(ire); 1221 } 1222 1223 } 1224 1225 /* 1226 * ire_walk routine to delete all IRE_CACHE/IRE_HOST_REDIRECT entries 1227 * that have a given gateway address. 1228 */ 1229 void 1230 ire_delete_cache_gw_v6(ire_t *ire, char *addr) 1231 { 1232 in6_addr_t *gw_addr = (in6_addr_t *)addr; 1233 char buf1[INET6_ADDRSTRLEN]; 1234 char buf2[INET6_ADDRSTRLEN]; 1235 in6_addr_t ire_gw_addr_v6; 1236 1237 if (!(ire->ire_type & (IRE_CACHE|IRE_HOST_REDIRECT))) 1238 return; 1239 1240 mutex_enter(&ire->ire_lock); 1241 ire_gw_addr_v6 = ire->ire_gateway_addr_v6; 1242 mutex_exit(&ire->ire_lock); 1243 1244 if (IN6_ARE_ADDR_EQUAL(&ire_gw_addr_v6, gw_addr)) { 1245 ip1dbg(("ire_delete_cache_gw_v6: deleted %s type %d to %s\n", 1246 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, 1247 buf1, sizeof (buf1)), 1248 ire->ire_type, 1249 inet_ntop(AF_INET6, &ire_gw_addr_v6, 1250 buf2, sizeof (buf2)))); 1251 ire_delete(ire); 1252 } 1253 } 1254 1255 /* 1256 * Remove all IRE_CACHE entries that match 1257 * the ire specified. (Sometimes called 1258 * as writer though not required by this function.) 1259 * 1260 * The flag argument indicates if the 1261 * flush request is due to addition 1262 * of new route (IRE_FLUSH_ADD) or deletion of old 1263 * route (IRE_FLUSH_DELETE). 1264 * 1265 * This routine takes only the IREs from the forwarding 1266 * table and flushes the corresponding entries from 1267 * the cache table. 1268 * 1269 * When flushing due to the deletion of an old route, it 1270 * just checks the cache handles (ire_phandle and ire_ihandle) and 1271 * deletes the ones that match. 1272 * 1273 * When flushing due to the creation of a new route, it checks 1274 * if a cache entry's address matches the one in the IRE and 1275 * that the cache entry's parent has a less specific mask than the 1276 * one in IRE. The destination of such a cache entry could be the 1277 * gateway for other cache entries, so we need to flush those as 1278 * well by looking for gateway addresses matching the IRE's address. 1279 */ 1280 void 1281 ire_flush_cache_v6(ire_t *ire, int flag) 1282 { 1283 int i; 1284 ire_t *cire; 1285 irb_t *irb; 1286 1287 if (ire->ire_type & IRE_CACHE) 1288 return; 1289 1290 /* 1291 * If a default is just created, there is no point 1292 * in going through the cache, as there will not be any 1293 * cached ires. 1294 */ 1295 if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD) 1296 return; 1297 if (flag == IRE_FLUSH_ADD) { 1298 /* 1299 * This selective flush is 1300 * due to the addition of 1301 * new IRE. 1302 */ 1303 for (i = 0; i < ip6_cache_table_size; i++) { 1304 irb = &ip_cache_table_v6[i]; 1305 if ((cire = irb->irb_ire) == NULL) 1306 continue; 1307 IRB_REFHOLD(irb); 1308 for (cire = irb->irb_ire; cire != NULL; 1309 cire = cire->ire_next) { 1310 if (cire->ire_type != IRE_CACHE) 1311 continue; 1312 /* 1313 * If 'cire' belongs to the same subnet 1314 * as the new ire being added, and 'cire' 1315 * is derived from a prefix that is less 1316 * specific than the new ire being added, 1317 * we need to flush 'cire'; for instance, 1318 * when a new interface comes up. 1319 */ 1320 if ((V6_MASK_EQ_2(cire->ire_addr_v6, 1321 ire->ire_mask_v6, ire->ire_addr_v6) && 1322 (ip_mask_to_plen_v6(&cire->ire_cmask_v6) <= 1323 ire->ire_masklen))) { 1324 ire_delete(cire); 1325 continue; 1326 } 1327 /* 1328 * This is the case when the ire_gateway_addr 1329 * of 'cire' belongs to the same subnet as 1330 * the new ire being added. 1331 * Flushing such ires is sometimes required to 1332 * avoid misrouting: say we have a machine with 1333 * two interfaces (I1 and I2), a default router 1334 * R on the I1 subnet, and a host route to an 1335 * off-link destination D with a gateway G on 1336 * the I2 subnet. 1337 * Under normal operation, we will have an 1338 * on-link cache entry for G and an off-link 1339 * cache entry for D with G as ire_gateway_addr, 1340 * traffic to D will reach its destination 1341 * through gateway G. 1342 * If the administrator does 'ifconfig I2 down', 1343 * the cache entries for D and G will be 1344 * flushed. However, G will now be resolved as 1345 * an off-link destination using R (the default 1346 * router) as gateway. Then D will also be 1347 * resolved as an off-link destination using G 1348 * as gateway - this behavior is due to 1349 * compatibility reasons, see comment in 1350 * ire_ihandle_lookup_offlink(). Traffic to D 1351 * will go to the router R and probably won't 1352 * reach the destination. 1353 * The administrator then does 'ifconfig I2 up'. 1354 * Since G is on the I2 subnet, this routine 1355 * will flush its cache entry. It must also 1356 * flush the cache entry for D, otherwise 1357 * traffic will stay misrouted until the IRE 1358 * times out. 1359 */ 1360 if (V6_MASK_EQ_2(cire->ire_gateway_addr_v6, 1361 ire->ire_mask_v6, ire->ire_addr_v6)) { 1362 ire_delete(cire); 1363 continue; 1364 } 1365 } 1366 IRB_REFRELE(irb); 1367 } 1368 } else { 1369 /* 1370 * delete the cache entries based on 1371 * handle in the IRE as this IRE is 1372 * being deleted/changed. 1373 */ 1374 for (i = 0; i < ip6_cache_table_size; i++) { 1375 irb = &ip_cache_table_v6[i]; 1376 if ((cire = irb->irb_ire) == NULL) 1377 continue; 1378 IRB_REFHOLD(irb); 1379 for (cire = irb->irb_ire; cire != NULL; 1380 cire = cire->ire_next) { 1381 if (cire->ire_type != IRE_CACHE) 1382 continue; 1383 if ((cire->ire_phandle == 0 || 1384 cire->ire_phandle != ire->ire_phandle) && 1385 (cire->ire_ihandle == 0 || 1386 cire->ire_ihandle != ire->ire_ihandle)) 1387 continue; 1388 ire_delete(cire); 1389 } 1390 IRB_REFRELE(irb); 1391 } 1392 } 1393 } 1394 1395 /* 1396 * Matches the arguments passed with the values in the ire. 1397 * 1398 * Note: for match types that match using "ipif" passed in, ipif 1399 * must be checked for non-NULL before calling this routine. 1400 */ 1401 static boolean_t 1402 ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask, 1403 const in6_addr_t *gateway, int type, const ipif_t *ipif, zoneid_t zoneid, 1404 uint32_t ihandle, const ts_label_t *tsl, int match_flags) 1405 { 1406 in6_addr_t masked_addr; 1407 in6_addr_t gw_addr_v6; 1408 ill_t *ire_ill = NULL, *dst_ill; 1409 ill_t *ipif_ill = NULL; 1410 ill_group_t *ire_ill_group = NULL; 1411 ill_group_t *ipif_ill_group = NULL; 1412 ipif_t *src_ipif; 1413 1414 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1415 ASSERT(addr != NULL); 1416 ASSERT(mask != NULL); 1417 ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL); 1418 ASSERT((!(match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP))) || 1419 (ipif != NULL && ipif->ipif_isv6)); 1420 ASSERT(!(match_flags & MATCH_IRE_WQ)); 1421 1422 /* 1423 * HIDDEN cache entries have to be looked up specifically with 1424 * MATCH_IRE_MARK_HIDDEN. MATCH_IRE_MARK_HIDDEN is usually set 1425 * when the interface is FAILED or INACTIVE. In that case, 1426 * any IRE_CACHES that exists should be marked with 1427 * IRE_MARK_HIDDEN. So, we don't really need to match below 1428 * for IRE_MARK_HIDDEN. But we do so for consistency. 1429 */ 1430 if (!(match_flags & MATCH_IRE_MARK_HIDDEN) && 1431 (ire->ire_marks & IRE_MARK_HIDDEN)) 1432 return (B_FALSE); 1433 1434 if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid && 1435 ire->ire_zoneid != ALL_ZONES) { 1436 /* 1437 * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is 1438 * valid and does not match that of ire_zoneid, a failure to 1439 * match is reported at this point. Otherwise, since some IREs 1440 * that are available in the global zone can be used in local 1441 * zones, additional checks need to be performed: 1442 * 1443 * IRE_CACHE and IRE_LOOPBACK entries should 1444 * never be matched in this situation. 1445 * 1446 * IRE entries that have an interface associated with them 1447 * should in general not match unless they are an IRE_LOCAL 1448 * or in the case when MATCH_IRE_DEFAULT has been set in 1449 * the caller. In the case of the former, checking of the 1450 * other fields supplied should take place. 1451 * 1452 * In the case where MATCH_IRE_DEFAULT has been set, 1453 * all of the ipif's associated with the IRE's ill are 1454 * checked to see if there is a matching zoneid. If any 1455 * one ipif has a matching zoneid, this IRE is a 1456 * potential candidate so checking of the other fields 1457 * takes place. 1458 * 1459 * In the case where the IRE_INTERFACE has a usable source 1460 * address (indicated by ill_usesrc_ifindex) in the 1461 * correct zone then it's permitted to return this IRE 1462 */ 1463 if (match_flags & MATCH_IRE_ZONEONLY) 1464 return (B_FALSE); 1465 if (ire->ire_type & (IRE_CACHE | IRE_LOOPBACK)) 1466 return (B_FALSE); 1467 /* 1468 * Note, IRE_INTERFACE can have the stq as NULL. For 1469 * example, if the default multicast route is tied to 1470 * the loopback address. 1471 */ 1472 if ((ire->ire_type & IRE_INTERFACE) && 1473 (ire->ire_stq != NULL)) { 1474 dst_ill = (ill_t *)ire->ire_stq->q_ptr; 1475 /* 1476 * If there is a usable source address in the 1477 * zone, then it's ok to return an 1478 * IRE_INTERFACE 1479 */ 1480 if ((dst_ill->ill_usesrc_ifindex != 0) && 1481 (src_ipif = ipif_select_source_v6(dst_ill, addr, 1482 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid)) 1483 != NULL) { 1484 ip3dbg(("ire_match_args: src_ipif %p" 1485 " dst_ill %p", (void *)src_ipif, 1486 (void *)dst_ill)); 1487 ipif_refrele(src_ipif); 1488 } else { 1489 ip3dbg(("ire_match_args: src_ipif NULL" 1490 " dst_ill %p\n", (void *)dst_ill)); 1491 return (B_FALSE); 1492 } 1493 } 1494 if (ire->ire_ipif != NULL && ire->ire_type != IRE_LOCAL && 1495 !(ire->ire_type & IRE_INTERFACE)) { 1496 ipif_t *tipif; 1497 1498 if ((match_flags & MATCH_IRE_DEFAULT) == 0) 1499 return (B_FALSE); 1500 mutex_enter(&ire->ire_ipif->ipif_ill->ill_lock); 1501 for (tipif = ire->ire_ipif->ipif_ill->ill_ipif; 1502 tipif != NULL; tipif = tipif->ipif_next) { 1503 if (IPIF_CAN_LOOKUP(tipif) && 1504 (tipif->ipif_flags & IPIF_UP) && 1505 (tipif->ipif_zoneid == zoneid || 1506 tipif->ipif_zoneid == ALL_ZONES)) 1507 break; 1508 } 1509 mutex_exit(&ire->ire_ipif->ipif_ill->ill_lock); 1510 if (tipif == NULL) 1511 return (B_FALSE); 1512 } 1513 } 1514 1515 if (match_flags & MATCH_IRE_GW) { 1516 mutex_enter(&ire->ire_lock); 1517 gw_addr_v6 = ire->ire_gateway_addr_v6; 1518 mutex_exit(&ire->ire_lock); 1519 } 1520 /* 1521 * For IRE_CACHES, MATCH_IRE_ILL/ILL_GROUP really means that 1522 * somebody wants to send out on a particular interface which 1523 * is given by ire_stq and hence use ire_stq to derive the ill 1524 * value. ire_ipif for IRE_CACHES is just the 1525 * means of getting a source address i.e ire_src_addr_v6 = 1526 * ire->ire_ipif->ipif_src_addr_v6. 1527 */ 1528 if (match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP)) { 1529 ire_ill = ire_to_ill(ire); 1530 if (ire_ill != NULL) 1531 ire_ill_group = ire_ill->ill_group; 1532 ipif_ill = ipif->ipif_ill; 1533 ipif_ill_group = ipif_ill->ill_group; 1534 } 1535 1536 /* No ire_addr_v6 bits set past the mask */ 1537 ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6, 1538 ire->ire_addr_v6)); 1539 V6_MASK_COPY(*addr, *mask, masked_addr); 1540 1541 if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) && 1542 ((!(match_flags & MATCH_IRE_GW)) || 1543 IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) && 1544 ((!(match_flags & MATCH_IRE_TYPE)) || 1545 (ire->ire_type & type)) && 1546 ((!(match_flags & MATCH_IRE_SRC)) || 1547 IN6_ARE_ADDR_EQUAL(&ire->ire_src_addr_v6, 1548 &ipif->ipif_v6src_addr)) && 1549 ((!(match_flags & MATCH_IRE_IPIF)) || 1550 (ire->ire_ipif == ipif)) && 1551 ((!(match_flags & MATCH_IRE_MARK_HIDDEN)) || 1552 (ire->ire_type != IRE_CACHE || 1553 ire->ire_marks & IRE_MARK_HIDDEN)) && 1554 ((!(match_flags & MATCH_IRE_ILL)) || 1555 (ire_ill == ipif_ill)) && 1556 ((!(match_flags & MATCH_IRE_IHANDLE)) || 1557 (ire->ire_ihandle == ihandle)) && 1558 ((!(match_flags & MATCH_IRE_ILL_GROUP)) || 1559 (ire_ill == ipif_ill) || 1560 (ire_ill_group != NULL && 1561 ire_ill_group == ipif_ill_group)) && 1562 ((!(match_flags & MATCH_IRE_SECATTR)) || 1563 (!is_system_labeled()) || 1564 (tsol_ire_match_gwattr(ire, tsl) == 0))) { 1565 /* We found the matched IRE */ 1566 return (B_TRUE); 1567 } 1568 return (B_FALSE); 1569 } 1570 1571 /* 1572 * Lookup for a route in all the tables 1573 */ 1574 ire_t * 1575 ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1576 const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 1577 zoneid_t zoneid, const ts_label_t *tsl, int flags) 1578 { 1579 ire_t *ire = NULL; 1580 1581 /* 1582 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1583 * MATCH_IRE_ILL is set. 1584 */ 1585 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1586 (ipif == NULL)) 1587 return (NULL); 1588 1589 /* 1590 * might be asking for a cache lookup, 1591 * This is not best way to lookup cache, 1592 * user should call ire_cache_lookup directly. 1593 * 1594 * If MATCH_IRE_TYPE was set, first lookup in the cache table and then 1595 * in the forwarding table, if the applicable type flags were set. 1596 */ 1597 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) { 1598 ire = ire_ctable_lookup_v6(addr, gateway, type, ipif, zoneid, 1599 tsl, flags); 1600 if (ire != NULL) 1601 return (ire); 1602 } 1603 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) { 1604 ire = ire_ftable_lookup_v6(addr, mask, gateway, type, ipif, 1605 pire, zoneid, 0, tsl, flags); 1606 } 1607 return (ire); 1608 } 1609 1610 /* 1611 * Lookup a route in forwarding table. 1612 * specific lookup is indicated by passing the 1613 * required parameters and indicating the 1614 * match required in flag field. 1615 * 1616 * Looking for default route can be done in three ways 1617 * 1) pass mask as ipv6_all_zeros and set MATCH_IRE_MASK in flags field 1618 * along with other matches. 1619 * 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags 1620 * field along with other matches. 1621 * 3) if the destination and mask are passed as zeros. 1622 * 1623 * A request to return a default route if no route 1624 * is found, can be specified by setting MATCH_IRE_DEFAULT 1625 * in flags. 1626 * 1627 * It does not support recursion more than one level. It 1628 * will do recursive lookup only when the lookup maps to 1629 * a prefix or default route and MATCH_IRE_RECURSIVE flag is passed. 1630 * 1631 * If the routing table is setup to allow more than one level 1632 * of recursion, the cleaning up cache table will not work resulting 1633 * in invalid routing. 1634 * 1635 * Supports link-local addresses by following the ipif/ill when recursing. 1636 * 1637 * NOTE : When this function returns NULL, pire has already been released. 1638 * pire is valid only when this function successfully returns an 1639 * ire. 1640 */ 1641 ire_t * 1642 ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1643 const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 1644 zoneid_t zoneid, uint32_t ihandle, const ts_label_t *tsl, int flags) 1645 { 1646 irb_t *irb_ptr; 1647 ire_t *rire; 1648 ire_t *ire = NULL; 1649 ire_t *saved_ire; 1650 nce_t *nce; 1651 int i; 1652 in6_addr_t gw_addr_v6; 1653 1654 ASSERT(addr != NULL); 1655 ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL); 1656 ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 1657 ASSERT(ipif == NULL || ipif->ipif_isv6); 1658 ASSERT(!(flags & MATCH_IRE_WQ)); 1659 1660 /* 1661 * When we return NULL from this function, we should make 1662 * sure that *pire is NULL so that the callers will not 1663 * wrongly REFRELE the pire. 1664 */ 1665 if (pire != NULL) 1666 *pire = NULL; 1667 /* 1668 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1669 * MATCH_IRE_ILL is set. 1670 */ 1671 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1672 (ipif == NULL)) 1673 return (NULL); 1674 1675 /* 1676 * If the mask is known, the lookup 1677 * is simple, if the mask is not known 1678 * we need to search. 1679 */ 1680 if (flags & MATCH_IRE_MASK) { 1681 uint_t masklen; 1682 1683 masklen = ip_mask_to_plen_v6(mask); 1684 if (ip_forwarding_table_v6[masklen] == NULL) 1685 return (NULL); 1686 irb_ptr = &(ip_forwarding_table_v6[masklen][ 1687 IRE_ADDR_MASK_HASH_V6(*addr, *mask, ip6_ftable_hash_size)]); 1688 rw_enter(&irb_ptr->irb_lock, RW_READER); 1689 for (ire = irb_ptr->irb_ire; ire != NULL; 1690 ire = ire->ire_next) { 1691 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1692 continue; 1693 if (ire_match_args_v6(ire, addr, mask, gateway, type, 1694 ipif, zoneid, ihandle, tsl, flags)) 1695 goto found_ire; 1696 } 1697 rw_exit(&irb_ptr->irb_lock); 1698 } else { 1699 /* 1700 * In this case we don't know the mask, we need to 1701 * search the table assuming different mask sizes. 1702 * we start with 128 bit mask, we don't allow default here. 1703 */ 1704 for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) { 1705 in6_addr_t tmpmask; 1706 1707 if ((ip_forwarding_table_v6[i]) == NULL) 1708 continue; 1709 (void) ip_plen_to_mask_v6(i, &tmpmask); 1710 irb_ptr = &ip_forwarding_table_v6[i][ 1711 IRE_ADDR_MASK_HASH_V6(*addr, tmpmask, 1712 ip6_ftable_hash_size)]; 1713 rw_enter(&irb_ptr->irb_lock, RW_READER); 1714 for (ire = irb_ptr->irb_ire; ire != NULL; 1715 ire = ire->ire_next) { 1716 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1717 continue; 1718 if (ire_match_args_v6(ire, addr, 1719 &ire->ire_mask_v6, gateway, type, ipif, 1720 zoneid, ihandle, tsl, flags)) 1721 goto found_ire; 1722 } 1723 rw_exit(&irb_ptr->irb_lock); 1724 } 1725 } 1726 1727 /* 1728 * We come here if no route has yet been found. 1729 * 1730 * Handle the case where default route is 1731 * requested by specifying type as one of the possible 1732 * types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE). 1733 * 1734 * If MATCH_IRE_MASK is specified, then the appropriate default route 1735 * would have been found above if it exists so it isn't looked up here. 1736 * If MATCH_IRE_DEFAULT was also specified, then a default route will be 1737 * searched for later. 1738 */ 1739 if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE && 1740 (type & (IRE_DEFAULT | IRE_INTERFACE))) { 1741 if (ip_forwarding_table_v6[0] != NULL) { 1742 /* addr & mask is zero for defaults */ 1743 irb_ptr = &ip_forwarding_table_v6[0][ 1744 IRE_ADDR_HASH_V6(ipv6_all_zeros, 1745 ip6_ftable_hash_size)]; 1746 rw_enter(&irb_ptr->irb_lock, RW_READER); 1747 for (ire = irb_ptr->irb_ire; ire != NULL; 1748 ire = ire->ire_next) { 1749 1750 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1751 continue; 1752 1753 if (ire_match_args_v6(ire, addr, 1754 &ipv6_all_zeros, gateway, type, ipif, 1755 zoneid, ihandle, tsl, flags)) 1756 goto found_ire; 1757 } 1758 rw_exit(&irb_ptr->irb_lock); 1759 } 1760 } 1761 /* 1762 * We come here only if no route is found. 1763 * see if the default route can be used which is allowed 1764 * only if the default matching criteria is specified. 1765 * The ipv6_ire_default_count tracks the number of IRE_DEFAULT 1766 * entries. However, the ip_forwarding_table_v6[0] also contains 1767 * interface routes thus the count can be zero. 1768 */ 1769 saved_ire = NULL; 1770 if ((flags & (MATCH_IRE_DEFAULT | MATCH_IRE_MASK)) == 1771 MATCH_IRE_DEFAULT) { 1772 ire_t *ire_origin; 1773 uint_t g_index; 1774 uint_t index; 1775 1776 if (ip_forwarding_table_v6[0] == NULL) 1777 return (NULL); 1778 irb_ptr = &(ip_forwarding_table_v6[0])[0]; 1779 1780 /* 1781 * Keep a tab on the bucket while looking the IRE_DEFAULT 1782 * entries. We need to keep track of a particular IRE 1783 * (ire_origin) so this ensures that it will not be unlinked 1784 * from the hash list during the recursive lookup below. 1785 */ 1786 IRB_REFHOLD(irb_ptr); 1787 ire = irb_ptr->irb_ire; 1788 if (ire == NULL) { 1789 IRB_REFRELE(irb_ptr); 1790 return (NULL); 1791 } 1792 1793 /* 1794 * Get the index first, since it can be changed by other 1795 * threads. Then get to the right default route skipping 1796 * default interface routes if any. As we hold a reference on 1797 * the IRE bucket, ipv6_ire_default_count can only increase so 1798 * we can't reach the end of the hash list unexpectedly. 1799 */ 1800 if (ipv6_ire_default_count != 0) { 1801 g_index = ipv6_ire_default_index++; 1802 index = g_index % ipv6_ire_default_count; 1803 while (index != 0) { 1804 if (!(ire->ire_type & IRE_INTERFACE)) 1805 index--; 1806 ire = ire->ire_next; 1807 } 1808 ASSERT(ire != NULL); 1809 } else { 1810 /* 1811 * No default route, so we only have default interface 1812 * routes: don't enter the first loop. 1813 */ 1814 ire = NULL; 1815 } 1816 1817 /* 1818 * Round-robin the default routers list looking for a neighbor 1819 * that matches the passed in parameters and is reachable. If 1820 * none found, just return a route from the default router list 1821 * if it exists. If we can't find a default route (IRE_DEFAULT), 1822 * look for interface default routes. 1823 * We start with the ire we found above and we walk the hash 1824 * list until we're back where we started, see 1825 * ire_get_next_default_ire(). It doesn't matter if default 1826 * routes are added or deleted by other threads - we know this 1827 * ire will stay in the list because we hold a reference on the 1828 * ire bucket. 1829 * NB: if we only have interface default routes, ire is NULL so 1830 * we don't even enter this loop (see above). 1831 */ 1832 ire_origin = ire; 1833 for (; ire != NULL; 1834 ire = ire_get_next_default_ire(ire, ire_origin)) { 1835 1836 if (ire_match_args_v6(ire, addr, 1837 &ipv6_all_zeros, gateway, type, ipif, 1838 zoneid, ihandle, tsl, flags)) { 1839 int match_flags; 1840 1841 /* 1842 * We have something to work with. 1843 * If we can find a resolved/reachable 1844 * entry, we will use this. Otherwise 1845 * we'll try to find an entry that has 1846 * a resolved cache entry. We will fallback 1847 * on this if we don't find anything else. 1848 */ 1849 if (saved_ire == NULL) 1850 saved_ire = ire; 1851 mutex_enter(&ire->ire_lock); 1852 gw_addr_v6 = ire->ire_gateway_addr_v6; 1853 mutex_exit(&ire->ire_lock); 1854 match_flags = MATCH_IRE_ILL_GROUP | 1855 MATCH_IRE_SECATTR; 1856 rire = ire_ctable_lookup_v6(&gw_addr_v6, NULL, 1857 0, ire->ire_ipif, zoneid, tsl, match_flags); 1858 if (rire != NULL) { 1859 nce = rire->ire_nce; 1860 if (nce != NULL && 1861 NCE_ISREACHABLE(nce) && 1862 nce->nce_flags & NCE_F_ISROUTER) { 1863 ire_refrele(rire); 1864 IRE_REFHOLD(ire); 1865 IRB_REFRELE(irb_ptr); 1866 goto found_ire_held; 1867 } else if (nce != NULL && 1868 !(nce->nce_flags & 1869 NCE_F_ISROUTER)) { 1870 /* 1871 * Make sure we don't use 1872 * this ire 1873 */ 1874 if (saved_ire == ire) 1875 saved_ire = NULL; 1876 } 1877 ire_refrele(rire); 1878 } else if (ipv6_ire_default_count > 1 && 1879 zoneid != ALL_ZONES) { 1880 /* 1881 * When we're in a local zone, we're 1882 * only interested in default routers 1883 * that are reachable through ipifs 1884 * within our zone. 1885 * The potentially expensive call to 1886 * ire_route_lookup_v6() is avoided when 1887 * we have only one default route. 1888 */ 1889 rire = ire_route_lookup_v6(&gw_addr_v6, 1890 NULL, NULL, 0, ire->ire_ipif, NULL, 1891 zoneid, tsl, match_flags); 1892 if (rire != NULL) { 1893 ire_refrele(rire); 1894 saved_ire = ire; 1895 } else if (saved_ire == ire) { 1896 /* 1897 * Make sure we don't use 1898 * this ire 1899 */ 1900 saved_ire = NULL; 1901 } 1902 } 1903 } 1904 } 1905 if (saved_ire != NULL) { 1906 ire = saved_ire; 1907 IRE_REFHOLD(ire); 1908 IRB_REFRELE(irb_ptr); 1909 goto found_ire_held; 1910 } else { 1911 /* 1912 * Look for a interface default route matching the 1913 * args passed in. No round robin here. Just pick 1914 * the right one. 1915 */ 1916 for (ire = irb_ptr->irb_ire; ire != NULL; 1917 ire = ire->ire_next) { 1918 1919 if (!(ire->ire_type & IRE_INTERFACE)) 1920 continue; 1921 1922 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1923 continue; 1924 1925 if (ire_match_args_v6(ire, addr, 1926 &ipv6_all_zeros, gateway, type, ipif, 1927 zoneid, ihandle, tsl, flags)) { 1928 IRE_REFHOLD(ire); 1929 IRB_REFRELE(irb_ptr); 1930 goto found_ire_held; 1931 } 1932 } 1933 IRB_REFRELE(irb_ptr); 1934 } 1935 } 1936 ASSERT(ire == NULL); 1937 ip1dbg(("ire_ftable_lookup_v6: returning NULL ire")); 1938 return (NULL); 1939 found_ire: 1940 ASSERT((ire->ire_marks & IRE_MARK_CONDEMNED) == 0); 1941 IRE_REFHOLD(ire); 1942 rw_exit(&irb_ptr->irb_lock); 1943 1944 found_ire_held: 1945 if ((flags & MATCH_IRE_RJ_BHOLE) && 1946 (ire->ire_flags & (RTF_BLACKHOLE | RTF_REJECT))) { 1947 return (ire); 1948 } 1949 /* 1950 * At this point, IRE that was found must be an IRE_FORWARDTABLE 1951 * or IRE_CACHETABLE type. If this is a recursive lookup and an 1952 * IRE_INTERFACE type was found, return that. If it was some other 1953 * IRE_FORWARDTABLE type of IRE (one of the prefix types), then it 1954 * is necessary to fill in the parent IRE pointed to by pire, and 1955 * then lookup the gateway address of the parent. For backwards 1956 * compatiblity, if this lookup returns an 1957 * IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level 1958 * of lookup is done. 1959 */ 1960 if (flags & MATCH_IRE_RECURSIVE) { 1961 const ipif_t *gw_ipif; 1962 int match_flags = MATCH_IRE_DSTONLY; 1963 1964 if (ire->ire_type & IRE_INTERFACE) 1965 return (ire); 1966 if (pire != NULL) 1967 *pire = ire; 1968 /* 1969 * If we can't find an IRE_INTERFACE or the caller has not 1970 * asked for pire, we need to REFRELE the saved_ire. 1971 */ 1972 saved_ire = ire; 1973 1974 /* 1975 * Currently MATCH_IRE_ILL is never used with 1976 * (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT) while 1977 * sending out packets as MATCH_IRE_ILL is used only 1978 * for communicating with on-link hosts. We can't assert 1979 * that here as RTM_GET calls this function with 1980 * MATCH_IRE_ILL | MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE. 1981 * We have already used the MATCH_IRE_ILL in determining 1982 * the right prefix route at this point. To match the 1983 * behavior of how we locate routes while sending out 1984 * packets, we don't want to use MATCH_IRE_ILL below 1985 * while locating the interface route. 1986 */ 1987 if (ire->ire_ipif != NULL) 1988 match_flags |= MATCH_IRE_ILL_GROUP; 1989 1990 mutex_enter(&ire->ire_lock); 1991 gw_addr_v6 = ire->ire_gateway_addr_v6; 1992 mutex_exit(&ire->ire_lock); 1993 1994 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 0, 1995 ire->ire_ipif, NULL, zoneid, tsl, match_flags); 1996 if (ire == NULL) { 1997 /* 1998 * In this case we have to deal with the 1999 * MATCH_IRE_PARENT flag, which means the 2000 * parent has to be returned if ire is NULL. 2001 * The aim of this is to have (at least) a starting 2002 * ire when we want to look at all of the ires in a 2003 * bucket aimed at a single destination (as is the 2004 * case in ip_newroute_v6 for the RTF_MULTIRT 2005 * flagged routes). 2006 */ 2007 if (flags & MATCH_IRE_PARENT) { 2008 if (pire != NULL) { 2009 /* 2010 * Need an extra REFHOLD, if the 2011 * parent ire is returned via both 2012 * ire and pire. 2013 */ 2014 IRE_REFHOLD(saved_ire); 2015 } 2016 ire = saved_ire; 2017 } else { 2018 ire_refrele(saved_ire); 2019 if (pire != NULL) 2020 *pire = NULL; 2021 } 2022 return (ire); 2023 } 2024 if (ire->ire_type & (IRE_CACHETABLE | IRE_INTERFACE)) { 2025 /* 2026 * If the caller did not ask for pire, release 2027 * it now. 2028 */ 2029 if (pire == NULL) { 2030 ire_refrele(saved_ire); 2031 } 2032 return (ire); 2033 } 2034 match_flags |= MATCH_IRE_TYPE; 2035 mutex_enter(&ire->ire_lock); 2036 gw_addr_v6 = ire->ire_gateway_addr_v6; 2037 mutex_exit(&ire->ire_lock); 2038 gw_ipif = ire->ire_ipif; 2039 ire_refrele(ire); 2040 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 2041 (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid, 2042 NULL, match_flags); 2043 if (ire == NULL) { 2044 /* 2045 * In this case we have to deal with the 2046 * MATCH_IRE_PARENT flag, which means the 2047 * parent has to be returned if ire is NULL. 2048 * The aim of this is to have (at least) a starting 2049 * ire when we want to look at all of the ires in a 2050 * bucket aimed at a single destination (as is the 2051 * case in ip_newroute_v6 for the RTF_MULTIRT 2052 * flagged routes). 2053 */ 2054 if (flags & MATCH_IRE_PARENT) { 2055 if (pire != NULL) { 2056 /* 2057 * Need an extra REFHOLD, if the 2058 * parent ire is returned via both 2059 * ire and pire. 2060 */ 2061 IRE_REFHOLD(saved_ire); 2062 } 2063 ire = saved_ire; 2064 } else { 2065 ire_refrele(saved_ire); 2066 if (pire != NULL) 2067 *pire = NULL; 2068 } 2069 return (ire); 2070 } else if (pire == NULL) { 2071 /* 2072 * If the caller did not ask for pire, release 2073 * it now. 2074 */ 2075 ire_refrele(saved_ire); 2076 } 2077 return (ire); 2078 } 2079 2080 ASSERT(pire == NULL || *pire == NULL); 2081 return (ire); 2082 } 2083 2084 /* 2085 * Delete the IRE cache for the gateway and all IRE caches whose 2086 * ire_gateway_addr_v6 points to this gateway, and allow them to 2087 * be created on demand by ip_newroute_v6. 2088 */ 2089 void 2090 ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid) 2091 { 2092 irb_t *irb; 2093 ire_t *ire; 2094 2095 irb = &ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, ip6_cache_table_size)]; 2096 IRB_REFHOLD(irb); 2097 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 2098 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2099 continue; 2100 2101 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 2102 if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, 0, 2103 IRE_CACHE, NULL, zoneid, 0, NULL, MATCH_IRE_TYPE)) { 2104 ire_delete(ire); 2105 } 2106 } 2107 IRB_REFRELE(irb); 2108 2109 ire_walk_v6(ire_delete_cache_gw_v6, (char *)addr, zoneid); 2110 } 2111 2112 /* 2113 * Looks up cache table for a route. 2114 * specific lookup can be indicated by 2115 * passing the MATCH_* flags and the 2116 * necessary parameters. 2117 */ 2118 ire_t * 2119 ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway, 2120 int type, const ipif_t *ipif, zoneid_t zoneid, const ts_label_t *tsl, 2121 int flags) 2122 { 2123 ire_t *ire; 2124 irb_t *irb_ptr; 2125 ASSERT(addr != NULL); 2126 ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 2127 2128 /* 2129 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 2130 * MATCH_IRE_ILL is set. 2131 */ 2132 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 2133 (ipif == NULL)) 2134 return (NULL); 2135 2136 irb_ptr = &ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 2137 ip6_cache_table_size)]; 2138 rw_enter(&irb_ptr->irb_lock, RW_READER); 2139 for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 2140 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2141 continue; 2142 2143 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 2144 if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, gateway, 2145 type, ipif, zoneid, 0, tsl, flags)) { 2146 IRE_REFHOLD(ire); 2147 rw_exit(&irb_ptr->irb_lock); 2148 return (ire); 2149 } 2150 } 2151 rw_exit(&irb_ptr->irb_lock); 2152 return (NULL); 2153 } 2154 2155 /* 2156 * Lookup cache. Don't return IRE_MARK_HIDDEN entries. Callers 2157 * should use ire_ctable_lookup with MATCH_IRE_MARK_HIDDEN to get 2158 * to the hidden ones. 2159 */ 2160 ire_t * 2161 ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid, 2162 const ts_label_t *tsl) 2163 { 2164 irb_t *irb_ptr; 2165 ire_t *ire; 2166 2167 irb_ptr = &ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 2168 ip6_cache_table_size)]; 2169 rw_enter(&irb_ptr->irb_lock, RW_READER); 2170 for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 2171 if (ire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 2172 continue; 2173 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, addr)) { 2174 /* 2175 * Finally, check if the security policy has any 2176 * restriction on using this route for the specified 2177 * message. 2178 */ 2179 if (tsl != NULL && 2180 ire->ire_gw_secattr != NULL && 2181 tsol_ire_match_gwattr(ire, tsl) != 0) { 2182 continue; 2183 } 2184 2185 if (zoneid == ALL_ZONES || ire->ire_zoneid == zoneid || 2186 ire->ire_zoneid == ALL_ZONES || 2187 ire->ire_type == IRE_LOCAL) { 2188 IRE_REFHOLD(ire); 2189 rw_exit(&irb_ptr->irb_lock); 2190 return (ire); 2191 } 2192 } 2193 } 2194 rw_exit(&irb_ptr->irb_lock); 2195 return (NULL); 2196 } 2197 2198 /* 2199 * Locate the interface ire that is tied to the cache ire 'cire' via 2200 * cire->ire_ihandle. 2201 * 2202 * We are trying to create the cache ire for an onlink destn. or 2203 * gateway in 'cire'. We are called from ire_add_v6() in the IRE_IF_RESOLVER 2204 * case for xresolv interfaces, after the ire has come back from 2205 * an external resolver. 2206 */ 2207 static ire_t * 2208 ire_ihandle_lookup_onlink_v6(ire_t *cire) 2209 { 2210 ire_t *ire; 2211 int match_flags; 2212 int i; 2213 int j; 2214 irb_t *irb_ptr; 2215 2216 ASSERT(cire != NULL); 2217 2218 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2219 /* 2220 * We know that the mask of the interface ire equals cire->ire_cmask. 2221 * (When ip_newroute_v6() created 'cire' for an on-link destn. 2222 * it set its cmask from the interface ire's mask) 2223 */ 2224 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 2225 NULL, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle, 2226 NULL, match_flags); 2227 if (ire != NULL) 2228 return (ire); 2229 /* 2230 * If we didn't find an interface ire above, we can't declare failure. 2231 * For backwards compatibility, we need to support prefix routes 2232 * pointing to next hop gateways that are not on-link. 2233 * 2234 * In the resolver/noresolver case, ip_newroute_v6() thinks 2235 * it is creating the cache ire for an onlink destination in 'cire'. 2236 * But 'cire' is not actually onlink, because ire_ftable_lookup_v6() 2237 * cheated it, by doing ire_route_lookup_v6() twice and returning an 2238 * interface ire. 2239 * 2240 * Eg. default - gw1 (line 1) 2241 * gw1 - gw2 (line 2) 2242 * gw2 - hme0 (line 3) 2243 * 2244 * In the above example, ip_newroute_v6() tried to create the cache ire 2245 * 'cire' for gw1, based on the interface route in line 3. The 2246 * ire_ftable_lookup_v6() above fails, because there is 2247 * no interface route to reach gw1. (it is gw2). We fall thru below. 2248 * 2249 * Do a brute force search based on the ihandle in a subset of the 2250 * forwarding tables, corresponding to cire->ire_cmask_v6. Otherwise 2251 * things become very complex, since we don't have 'pire' in this 2252 * case. (Also note that this method is not possible in the offlink 2253 * case because we don't know the mask) 2254 */ 2255 i = ip_mask_to_plen_v6(&cire->ire_cmask_v6); 2256 if ((ip_forwarding_table_v6[i]) == NULL) 2257 return (NULL); 2258 for (j = 0; j < ip6_ftable_hash_size; j++) { 2259 irb_ptr = &ip_forwarding_table_v6[i][j]; 2260 rw_enter(&irb_ptr->irb_lock, RW_READER); 2261 for (ire = irb_ptr->irb_ire; ire != NULL; 2262 ire = ire->ire_next) { 2263 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2264 continue; 2265 if ((ire->ire_type & IRE_INTERFACE) && 2266 (ire->ire_ihandle == cire->ire_ihandle)) { 2267 IRE_REFHOLD(ire); 2268 rw_exit(&irb_ptr->irb_lock); 2269 return (ire); 2270 } 2271 } 2272 rw_exit(&irb_ptr->irb_lock); 2273 } 2274 return (NULL); 2275 } 2276 2277 2278 /* 2279 * Locate the interface ire that is tied to the cache ire 'cire' via 2280 * cire->ire_ihandle. 2281 * 2282 * We are trying to create the cache ire for an offlink destn based 2283 * on the cache ire of the gateway in 'cire'. 'pire' is the prefix ire 2284 * as found by ip_newroute_v6(). We are called from ip_newroute_v6() in 2285 * the IRE_CACHE case. 2286 */ 2287 ire_t * 2288 ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire) 2289 { 2290 ire_t *ire; 2291 int match_flags; 2292 in6_addr_t gw_addr; 2293 ipif_t *gw_ipif; 2294 2295 ASSERT(cire != NULL && pire != NULL); 2296 2297 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2298 /* 2299 * ip_newroute_v6 calls ire_ftable_lookup with MATCH_IRE_ILL only 2300 * for on-link hosts. We should never be here for onlink. 2301 * Thus, use MATCH_IRE_ILL_GROUP. 2302 */ 2303 if (pire->ire_ipif != NULL) 2304 match_flags |= MATCH_IRE_ILL_GROUP; 2305 /* 2306 * We know that the mask of the interface ire equals cire->ire_cmask. 2307 * (When ip_newroute_v6() created 'cire' for an on-link destn. it set 2308 * its cmask from the interface ire's mask) 2309 */ 2310 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 0, 2311 IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2312 NULL, match_flags); 2313 if (ire != NULL) 2314 return (ire); 2315 /* 2316 * If we didn't find an interface ire above, we can't declare failure. 2317 * For backwards compatibility, we need to support prefix routes 2318 * pointing to next hop gateways that are not on-link. 2319 * 2320 * Assume we are trying to ping some offlink destn, and we have the 2321 * routing table below. 2322 * 2323 * Eg. default - gw1 <--- pire (line 1) 2324 * gw1 - gw2 (line 2) 2325 * gw2 - hme0 (line 3) 2326 * 2327 * If we already have a cache ire for gw1 in 'cire', the 2328 * ire_ftable_lookup_v6 above would have failed, since there is no 2329 * interface ire to reach gw1. We will fallthru below. 2330 * 2331 * Here we duplicate the steps that ire_ftable_lookup_v6() did in 2332 * getting 'cire' from 'pire', in the MATCH_IRE_RECURSIVE case. 2333 * The differences are the following 2334 * i. We want the interface ire only, so we call 2335 * ire_ftable_lookup_v6() instead of ire_route_lookup_v6() 2336 * ii. We look for only prefix routes in the 1st call below. 2337 * ii. We want to match on the ihandle in the 2nd call below. 2338 */ 2339 match_flags = MATCH_IRE_TYPE; 2340 if (pire->ire_ipif != NULL) 2341 match_flags |= MATCH_IRE_ILL_GROUP; 2342 2343 mutex_enter(&pire->ire_lock); 2344 gw_addr = pire->ire_gateway_addr_v6; 2345 mutex_exit(&pire->ire_lock); 2346 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_OFFSUBNET, 2347 pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags); 2348 if (ire == NULL) 2349 return (NULL); 2350 /* 2351 * At this point 'ire' corresponds to the entry shown in line 2. 2352 * gw_addr is 'gw2' in the example above. 2353 */ 2354 mutex_enter(&ire->ire_lock); 2355 gw_addr = ire->ire_gateway_addr_v6; 2356 mutex_exit(&ire->ire_lock); 2357 gw_ipif = ire->ire_ipif; 2358 ire_refrele(ire); 2359 2360 match_flags |= MATCH_IRE_IHANDLE; 2361 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_INTERFACE, 2362 gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2363 NULL, match_flags); 2364 return (ire); 2365 } 2366 2367 /* 2368 * Return the IRE_LOOPBACK, IRE_IF_RESOLVER or IRE_IF_NORESOLVER 2369 * ire associated with the specified ipif. 2370 * 2371 * This might occasionally be called when IPIF_UP is not set since 2372 * the IPV6_MULTICAST_IF as well as creating interface routes 2373 * allows specifying a down ipif (ipif_lookup* match ipifs that are down). 2374 * 2375 * Note that if IPIF_NOLOCAL, IPIF_NOXMIT, or IPIF_DEPRECATED is set on 2376 * the ipif this routine might return NULL. 2377 * (Sometimes called as writer though not required by this function.) 2378 */ 2379 ire_t * 2380 ipif_to_ire_v6(const ipif_t *ipif) 2381 { 2382 ire_t *ire; 2383 2384 ASSERT(ipif->ipif_isv6); 2385 if (ipif->ipif_ire_type == IRE_LOOPBACK) { 2386 ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, NULL, 2387 IRE_LOOPBACK, ipif, ALL_ZONES, NULL, 2388 (MATCH_IRE_TYPE | MATCH_IRE_IPIF)); 2389 } else if (ipif->ipif_flags & IPIF_POINTOPOINT) { 2390 /* In this case we need to lookup destination address. */ 2391 ire = ire_ftable_lookup_v6(&ipif->ipif_v6pp_dst_addr, 2392 &ipv6_all_ones, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 2393 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 2394 MATCH_IRE_MASK)); 2395 } else { 2396 ire = ire_ftable_lookup_v6(&ipif->ipif_v6subnet, 2397 &ipif->ipif_v6net_mask, NULL, IRE_INTERFACE, ipif, NULL, 2398 ALL_ZONES, 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 2399 MATCH_IRE_MASK)); 2400 } 2401 return (ire); 2402 } 2403 2404 /* 2405 * Return B_TRUE if a multirt route is resolvable 2406 * (or if no route is resolved yet), B_FALSE otherwise. 2407 * This only works in the global zone. 2408 */ 2409 boolean_t 2410 ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl) 2411 { 2412 ire_t *first_fire; 2413 ire_t *first_cire; 2414 ire_t *fire; 2415 ire_t *cire; 2416 irb_t *firb; 2417 irb_t *cirb; 2418 int unres_cnt = 0; 2419 boolean_t resolvable = B_FALSE; 2420 2421 /* Retrieve the first IRE_HOST that matches the destination */ 2422 first_fire = ire_ftable_lookup_v6(v6dstp, &ipv6_all_ones, 0, IRE_HOST, 2423 NULL, NULL, ALL_ZONES, 0, tsl, MATCH_IRE_MASK | MATCH_IRE_TYPE | 2424 MATCH_IRE_SECATTR); 2425 2426 /* No route at all */ 2427 if (first_fire == NULL) { 2428 return (B_TRUE); 2429 } 2430 2431 firb = first_fire->ire_bucket; 2432 ASSERT(firb); 2433 2434 /* Retrieve the first IRE_CACHE ire for that destination. */ 2435 first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID, tsl); 2436 2437 /* No resolved route. */ 2438 if (first_cire == NULL) { 2439 ire_refrele(first_fire); 2440 return (B_TRUE); 2441 } 2442 2443 /* At least one route is resolved. */ 2444 2445 cirb = first_cire->ire_bucket; 2446 ASSERT(cirb); 2447 2448 /* Count the number of routes to that dest that are declared. */ 2449 IRB_REFHOLD(firb); 2450 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2451 if (!(fire->ire_flags & RTF_MULTIRT)) 2452 continue; 2453 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, v6dstp)) 2454 continue; 2455 unres_cnt++; 2456 } 2457 IRB_REFRELE(firb); 2458 2459 2460 /* Then subtract the number of routes to that dst that are resolved */ 2461 IRB_REFHOLD(cirb); 2462 for (cire = first_cire; cire != NULL; cire = cire->ire_next) { 2463 if (!(cire->ire_flags & RTF_MULTIRT)) 2464 continue; 2465 if (!IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, v6dstp)) 2466 continue; 2467 if (cire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 2468 continue; 2469 unres_cnt--; 2470 } 2471 IRB_REFRELE(cirb); 2472 2473 /* At least one route is unresolved; search for a resolvable route. */ 2474 if (unres_cnt > 0) 2475 resolvable = ire_multirt_lookup_v6(&first_cire, &first_fire, 2476 MULTIRT_USESTAMP|MULTIRT_CACHEGW, tsl); 2477 2478 if (first_fire) 2479 ire_refrele(first_fire); 2480 2481 if (first_cire) 2482 ire_refrele(first_cire); 2483 2484 return (resolvable); 2485 } 2486 2487 2488 /* 2489 * Return B_TRUE and update *ire_arg and *fire_arg 2490 * if at least one resolvable route is found. 2491 * Return B_FALSE otherwise (all routes are resolved or 2492 * the remaining unresolved routes are all unresolvable). 2493 * This only works in the global zone. 2494 */ 2495 boolean_t 2496 ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, 2497 const ts_label_t *tsl) 2498 { 2499 clock_t delta; 2500 ire_t *best_fire = NULL; 2501 ire_t *best_cire = NULL; 2502 ire_t *first_fire; 2503 ire_t *first_cire; 2504 ire_t *fire; 2505 ire_t *cire; 2506 irb_t *firb = NULL; 2507 irb_t *cirb = NULL; 2508 ire_t *gw_ire; 2509 boolean_t already_resolved; 2510 boolean_t res; 2511 in6_addr_t v6dst; 2512 in6_addr_t v6gw; 2513 2514 ip2dbg(("ire_multirt_lookup_v6: *ire_arg %p, *fire_arg %p, " 2515 "flags %04x\n", (void *)*ire_arg, (void *)*fire_arg, flags)); 2516 2517 ASSERT(ire_arg); 2518 ASSERT(fire_arg); 2519 2520 /* Not an IRE_HOST ire; give up. */ 2521 if ((*fire_arg == NULL) || 2522 ((*fire_arg)->ire_type != IRE_HOST)) { 2523 return (B_FALSE); 2524 } 2525 2526 /* This is the first IRE_HOST ire for that destination. */ 2527 first_fire = *fire_arg; 2528 firb = first_fire->ire_bucket; 2529 ASSERT(firb); 2530 2531 mutex_enter(&first_fire->ire_lock); 2532 v6dst = first_fire->ire_addr_v6; 2533 mutex_exit(&first_fire->ire_lock); 2534 2535 ip2dbg(("ire_multirt_lookup_v6: dst %08x\n", 2536 ntohl(V4_PART_OF_V6(v6dst)))); 2537 2538 /* 2539 * Retrieve the first IRE_CACHE ire for that destination; 2540 * if we don't find one, no route for that dest is 2541 * resolved yet. 2542 */ 2543 first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID, tsl); 2544 if (first_cire) { 2545 cirb = first_cire->ire_bucket; 2546 } 2547 2548 ip2dbg(("ire_multirt_lookup_v6: first_cire %p\n", (void *)first_cire)); 2549 2550 /* 2551 * Search for a resolvable route, giving the top priority 2552 * to routes that can be resolved without any call to the resolver. 2553 */ 2554 IRB_REFHOLD(firb); 2555 2556 if (!IN6_IS_ADDR_MULTICAST(&v6dst)) { 2557 /* 2558 * For all multiroute IRE_HOST ires for that destination, 2559 * check if the route via the IRE_HOST's gateway is 2560 * resolved yet. 2561 */ 2562 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2563 2564 if (!(fire->ire_flags & RTF_MULTIRT)) 2565 continue; 2566 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2567 continue; 2568 2569 if (fire->ire_gw_secattr != NULL && 2570 tsol_ire_match_gwattr(fire, tsl) != 0) { 2571 continue; 2572 } 2573 2574 mutex_enter(&fire->ire_lock); 2575 v6gw = fire->ire_gateway_addr_v6; 2576 mutex_exit(&fire->ire_lock); 2577 2578 ip2dbg(("ire_multirt_lookup_v6: fire %p, " 2579 "ire_addr %08x, ire_gateway_addr %08x\n", 2580 (void *)fire, 2581 ntohl(V4_PART_OF_V6(fire->ire_addr_v6)), 2582 ntohl(V4_PART_OF_V6(v6gw)))); 2583 2584 already_resolved = B_FALSE; 2585 2586 if (first_cire) { 2587 ASSERT(cirb); 2588 2589 IRB_REFHOLD(cirb); 2590 /* 2591 * For all IRE_CACHE ires for that 2592 * destination. 2593 */ 2594 for (cire = first_cire; 2595 cire != NULL; 2596 cire = cire->ire_next) { 2597 2598 if (!(cire->ire_flags & RTF_MULTIRT)) 2599 continue; 2600 if (!IN6_ARE_ADDR_EQUAL( 2601 &cire->ire_addr_v6, &v6dst)) 2602 continue; 2603 if (cire->ire_marks & 2604 (IRE_MARK_CONDEMNED| 2605 IRE_MARK_HIDDEN)) 2606 continue; 2607 2608 if (cire->ire_gw_secattr != NULL && 2609 tsol_ire_match_gwattr(cire, 2610 tsl) != 0) { 2611 continue; 2612 } 2613 2614 /* 2615 * Check if the IRE_CACHE's gateway 2616 * matches the IRE_HOST's gateway. 2617 */ 2618 if (IN6_ARE_ADDR_EQUAL( 2619 &cire->ire_gateway_addr_v6, 2620 &v6gw)) { 2621 already_resolved = B_TRUE; 2622 break; 2623 } 2624 } 2625 IRB_REFRELE(cirb); 2626 } 2627 2628 /* 2629 * This route is already resolved; 2630 * proceed with next one. 2631 */ 2632 if (already_resolved) { 2633 ip2dbg(("ire_multirt_lookup_v6: found cire %p, " 2634 "already resolved\n", (void *)cire)); 2635 continue; 2636 } 2637 2638 /* 2639 * The route is unresolved; is it actually 2640 * resolvable, i.e. is there a cache or a resolver 2641 * for the gateway? 2642 */ 2643 gw_ire = ire_route_lookup_v6(&v6gw, 0, 0, 0, NULL, NULL, 2644 ALL_ZONES, tsl, MATCH_IRE_RECURSIVE | 2645 MATCH_IRE_SECATTR); 2646 2647 ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n", 2648 (void *)gw_ire)); 2649 2650 /* 2651 * This route can be resolved without any call to the 2652 * resolver; if the MULTIRT_CACHEGW flag is set, 2653 * give the top priority to this ire and exit the 2654 * loop. 2655 * This occurs when an resolver reply is processed 2656 * through ip_wput_nondata() 2657 */ 2658 if ((flags & MULTIRT_CACHEGW) && 2659 (gw_ire != NULL) && 2660 (gw_ire->ire_type & IRE_CACHETABLE)) { 2661 /* 2662 * Release the resolver associated to the 2663 * previous candidate best ire, if any. 2664 */ 2665 if (best_cire) { 2666 ire_refrele(best_cire); 2667 ASSERT(best_fire); 2668 } 2669 2670 best_fire = fire; 2671 best_cire = gw_ire; 2672 2673 ip2dbg(("ire_multirt_lookup_v6: found top prio " 2674 "best_fire %p, best_cire %p\n", 2675 (void *)best_fire, (void *)best_cire)); 2676 break; 2677 } 2678 2679 /* 2680 * Compute the time elapsed since our preceding 2681 * attempt to resolve that route. 2682 * If the MULTIRT_USESTAMP flag is set, we take that 2683 * route into account only if this time interval 2684 * exceeds ip_multirt_resolution_interval; 2685 * this prevents us from attempting to resolve a 2686 * broken route upon each sending of a packet. 2687 */ 2688 delta = lbolt - fire->ire_last_used_time; 2689 delta = TICK_TO_MSEC(delta); 2690 2691 res = (boolean_t) 2692 ((delta > ip_multirt_resolution_interval) || 2693 (!(flags & MULTIRT_USESTAMP))); 2694 2695 ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, " 2696 "res %d\n", 2697 (void *)fire, delta, res)); 2698 2699 if (res) { 2700 /* 2701 * A resolver exists for the gateway: save 2702 * the current IRE_HOST ire as a candidate 2703 * best ire. If we later discover that a 2704 * top priority ire exists (i.e. no need to 2705 * call the resolver), then this new ire 2706 * will be preferred to the current one. 2707 */ 2708 if (gw_ire != NULL) { 2709 if (best_fire == NULL) { 2710 ASSERT(best_cire == NULL); 2711 2712 best_fire = fire; 2713 best_cire = gw_ire; 2714 2715 ip2dbg(("ire_multirt_lookup_v6:" 2716 "found candidate " 2717 "best_fire %p, " 2718 "best_cire %p\n", 2719 (void *)best_fire, 2720 (void *)best_cire)); 2721 2722 /* 2723 * If MULTIRT_CACHEGW is not 2724 * set, we ignore the top 2725 * priority ires that can 2726 * be resolved without any 2727 * call to the resolver; 2728 * In that case, there is 2729 * actually no need 2730 * to continue the loop. 2731 */ 2732 if (!(flags & 2733 MULTIRT_CACHEGW)) { 2734 break; 2735 } 2736 continue; 2737 } 2738 } else { 2739 /* 2740 * No resolver for the gateway: the 2741 * route is not resolvable. 2742 * If the MULTIRT_SETSTAMP flag is 2743 * set, we stamp the IRE_HOST ire, 2744 * so we will not select it again 2745 * during this resolution interval. 2746 */ 2747 if (flags & MULTIRT_SETSTAMP) 2748 fire->ire_last_used_time = 2749 lbolt; 2750 } 2751 } 2752 2753 if (gw_ire != NULL) 2754 ire_refrele(gw_ire); 2755 } 2756 } else { /* IN6_IS_ADDR_MULTICAST(&v6dst) */ 2757 2758 for (fire = first_fire; 2759 fire != NULL; 2760 fire = fire->ire_next) { 2761 2762 if (!(fire->ire_flags & RTF_MULTIRT)) 2763 continue; 2764 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2765 continue; 2766 2767 if (fire->ire_gw_secattr != NULL && 2768 tsol_ire_match_gwattr(fire, tsl) != 0) { 2769 continue; 2770 } 2771 2772 already_resolved = B_FALSE; 2773 2774 mutex_enter(&fire->ire_lock); 2775 v6gw = fire->ire_gateway_addr_v6; 2776 mutex_exit(&fire->ire_lock); 2777 2778 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2779 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, tsl, 2780 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE | 2781 MATCH_IRE_SECATTR); 2782 2783 /* No resolver for the gateway; we skip this ire. */ 2784 if (gw_ire == NULL) { 2785 continue; 2786 } 2787 2788 if (first_cire) { 2789 2790 IRB_REFHOLD(cirb); 2791 /* 2792 * For all IRE_CACHE ires for that 2793 * destination. 2794 */ 2795 for (cire = first_cire; 2796 cire != NULL; 2797 cire = cire->ire_next) { 2798 2799 if (!(cire->ire_flags & RTF_MULTIRT)) 2800 continue; 2801 if (!IN6_ARE_ADDR_EQUAL( 2802 &cire->ire_addr_v6, &v6dst)) 2803 continue; 2804 if (cire->ire_marks & 2805 (IRE_MARK_CONDEMNED| 2806 IRE_MARK_HIDDEN)) 2807 continue; 2808 2809 if (cire->ire_gw_secattr != NULL && 2810 tsol_ire_match_gwattr(cire, 2811 tsl) != 0) { 2812 continue; 2813 } 2814 2815 /* 2816 * Cache entries are linked to the 2817 * parent routes using the parent handle 2818 * (ire_phandle). If no cache entry has 2819 * the same handle as fire, fire is 2820 * still unresolved. 2821 */ 2822 ASSERT(cire->ire_phandle != 0); 2823 if (cire->ire_phandle == 2824 fire->ire_phandle) { 2825 already_resolved = B_TRUE; 2826 break; 2827 } 2828 } 2829 IRB_REFRELE(cirb); 2830 } 2831 2832 /* 2833 * This route is already resolved; proceed with 2834 * next one. 2835 */ 2836 if (already_resolved) { 2837 ire_refrele(gw_ire); 2838 continue; 2839 } 2840 2841 /* 2842 * Compute the time elapsed since our preceding 2843 * attempt to resolve that route. 2844 * If the MULTIRT_USESTAMP flag is set, we take 2845 * that route into account only if this time 2846 * interval exceeds ip_multirt_resolution_interval; 2847 * this prevents us from attempting to resolve a 2848 * broken route upon each sending of a packet. 2849 */ 2850 delta = lbolt - fire->ire_last_used_time; 2851 delta = TICK_TO_MSEC(delta); 2852 2853 res = (boolean_t) 2854 ((delta > ip_multirt_resolution_interval) || 2855 (!(flags & MULTIRT_USESTAMP))); 2856 2857 ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, " 2858 "flags %04x, res %d\n", 2859 (void *)fire, delta, flags, res)); 2860 2861 if (res) { 2862 if (best_cire) { 2863 /* 2864 * Release the resolver associated 2865 * to the preceding candidate best 2866 * ire, if any. 2867 */ 2868 ire_refrele(best_cire); 2869 ASSERT(best_fire); 2870 } 2871 best_fire = fire; 2872 best_cire = gw_ire; 2873 continue; 2874 } 2875 2876 ire_refrele(gw_ire); 2877 } 2878 } 2879 2880 if (best_fire) { 2881 IRE_REFHOLD(best_fire); 2882 } 2883 IRB_REFRELE(firb); 2884 2885 /* Release the first IRE_CACHE we initially looked up, if any. */ 2886 if (first_cire) 2887 ire_refrele(first_cire); 2888 2889 /* Found a resolvable route. */ 2890 if (best_fire) { 2891 ASSERT(best_cire); 2892 2893 if (*fire_arg) 2894 ire_refrele(*fire_arg); 2895 if (*ire_arg) 2896 ire_refrele(*ire_arg); 2897 2898 /* 2899 * Update the passed arguments with the 2900 * resolvable multirt route we found 2901 */ 2902 *fire_arg = best_fire; 2903 *ire_arg = best_cire; 2904 2905 ip2dbg(("ire_multirt_lookup_v6: returning B_TRUE, " 2906 "*fire_arg %p, *ire_arg %p\n", 2907 (void *)best_fire, (void *)best_cire)); 2908 2909 return (B_TRUE); 2910 } 2911 2912 ASSERT(best_cire == NULL); 2913 2914 ip2dbg(("ire_multirt_lookup_v6: returning B_FALSE, *fire_arg %p, " 2915 "*ire_arg %p\n", 2916 (void *)*fire_arg, (void *)*ire_arg)); 2917 2918 /* No resolvable route. */ 2919 return (B_FALSE); 2920 } 2921 2922 2923 /* 2924 * Find an IRE_OFFSUBNET IRE entry for the multicast address 'v6dstp' 2925 * that goes through 'ipif'. As a fallback, a route that goes through 2926 * ipif->ipif_ill can be returned. 2927 */ 2928 ire_t * 2929 ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp) 2930 { 2931 ire_t *ire; 2932 ire_t *save_ire = NULL; 2933 ire_t *gw_ire; 2934 irb_t *irb; 2935 in6_addr_t v6gw; 2936 int match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL; 2937 2938 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, NULL, ALL_ZONES, 0, 2939 NULL, MATCH_IRE_DEFAULT); 2940 2941 if (ire == NULL) 2942 return (NULL); 2943 2944 irb = ire->ire_bucket; 2945 ASSERT(irb); 2946 2947 IRB_REFHOLD(irb); 2948 ire_refrele(ire); 2949 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 2950 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) || 2951 (ipif->ipif_zoneid != ire->ire_zoneid && 2952 ire->ire_zoneid != ALL_ZONES)) { 2953 continue; 2954 } 2955 2956 switch (ire->ire_type) { 2957 case IRE_DEFAULT: 2958 case IRE_PREFIX: 2959 case IRE_HOST: 2960 mutex_enter(&ire->ire_lock); 2961 v6gw = ire->ire_gateway_addr_v6; 2962 mutex_exit(&ire->ire_lock); 2963 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2964 IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, 2965 NULL, match_flags); 2966 2967 if (gw_ire != NULL) { 2968 if (save_ire != NULL) { 2969 ire_refrele(save_ire); 2970 } 2971 IRE_REFHOLD(ire); 2972 if (gw_ire->ire_ipif == ipif) { 2973 ire_refrele(gw_ire); 2974 2975 IRB_REFRELE(irb); 2976 return (ire); 2977 } 2978 ire_refrele(gw_ire); 2979 save_ire = ire; 2980 } 2981 break; 2982 case IRE_IF_NORESOLVER: 2983 case IRE_IF_RESOLVER: 2984 if (ire->ire_ipif == ipif) { 2985 if (save_ire != NULL) { 2986 ire_refrele(save_ire); 2987 } 2988 IRE_REFHOLD(ire); 2989 2990 IRB_REFRELE(irb); 2991 return (ire); 2992 } 2993 break; 2994 } 2995 } 2996 IRB_REFRELE(irb); 2997 2998 return (save_ire); 2999 } 3000