1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 /* 32 * This file contains routines that manipulate Internet Routing Entries (IREs). 33 */ 34 #include <sys/types.h> 35 #include <sys/stream.h> 36 #include <sys/stropts.h> 37 #include <sys/ddi.h> 38 #include <sys/cmn_err.h> 39 40 #include <sys/systm.h> 41 #include <sys/param.h> 42 #include <sys/socket.h> 43 #include <net/if.h> 44 #include <net/route.h> 45 #include <netinet/in.h> 46 #include <net/if_dl.h> 47 #include <netinet/ip6.h> 48 #include <netinet/icmp6.h> 49 50 #include <inet/common.h> 51 #include <inet/mi.h> 52 #include <inet/ip.h> 53 #include <inet/ip6.h> 54 #include <inet/ip_ndp.h> 55 #include <inet/ip_if.h> 56 #include <inet/ip_ire.h> 57 #include <inet/ipclassifier.h> 58 #include <inet/nd.h> 59 #include <sys/kmem.h> 60 #include <sys/zone.h> 61 62 #include <sys/tsol/label.h> 63 #include <sys/tsol/tnet.h> 64 65 static ire_t ire_null; 66 67 static ire_t *ire_ihandle_lookup_onlink_v6(ire_t *cire); 68 static void ire_report_ftable_v6(ire_t *ire, char *mp); 69 static void ire_report_ctable_v6(ire_t *ire, char *mp); 70 static boolean_t ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, 71 const in6_addr_t *mask, const in6_addr_t *gateway, int type, 72 const ipif_t *ipif, zoneid_t zoneid, uint32_t ihandle, 73 const ts_label_t *tsl, int match_flags); 74 static ire_t *ire_init_v6(ire_t *, const in6_addr_t *, const in6_addr_t *, 75 const in6_addr_t *, const in6_addr_t *, uint_t *, queue_t *, queue_t *, 76 ushort_t, ipif_t *, const in6_addr_t *, uint32_t, uint32_t, uint_t, 77 const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *); 78 79 /* 80 * Named Dispatch routine to produce a formatted report on all IREs. 81 * This report is accessed by using the ndd utility to "get" ND variable 82 * "ip_ire_status_v6". 83 */ 84 /* ARGSUSED */ 85 int 86 ip_ire_report_v6(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 87 { 88 zoneid_t zoneid; 89 ip_stack_t *ipst; 90 91 (void) mi_mpprintf(mp, 92 "IRE " MI_COL_HDRPAD_STR 93 "rfq " MI_COL_HDRPAD_STR 94 "stq " MI_COL_HDRPAD_STR 95 " zone mxfrg rtt rtt_sd ssthresh ref " 96 "rtomax tstamp_ok wscale_ok ecn_ok pmtud_ok sack sendpipe recvpipe " 97 "in/out/forward type addr mask " 98 "src gateway"); 99 /* 100 * 01234567 01234567 01234567 12345 12345 12345 12345 12345678 123 101 * 123456 123456789 123456789 123456 12345678 1234 12345678 12345678 102 * in/out/forward xxxxxxxxxx 103 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 104 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 105 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 106 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 107 */ 108 109 /* 110 * Because of the ndd constraint, at most we can have 64K buffer 111 * to put in all IRE info. So to be more efficient, just 112 * allocate a 64K buffer here, assuming we need that large buffer. 113 * This should be OK as only root can do ndd /dev/ip. 114 */ 115 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 116 /* The following may work even if we cannot get a large buf. */ 117 (void) mi_mpprintf(mp, "<< Out of buffer >>\n"); 118 return (0); 119 } 120 zoneid = Q_TO_CONN(q)->conn_zoneid; 121 if (zoneid == GLOBAL_ZONEID) 122 zoneid = ALL_ZONES; 123 ipst = CONNQ_TO_IPST(q); 124 125 ire_walk_v6(ire_report_ftable_v6, (char *)mp->b_cont, zoneid, ipst); 126 ire_walk_v6(ire_report_ctable_v6, (char *)mp->b_cont, zoneid, ipst); 127 return (0); 128 } 129 130 /* 131 * ire_walk routine invoked for ip_ire_report_v6 for each IRE. 132 */ 133 static void 134 ire_report_ftable_v6(ire_t *ire, char *mp) 135 { 136 char buf1[INET6_ADDRSTRLEN]; 137 char buf2[INET6_ADDRSTRLEN]; 138 char buf3[INET6_ADDRSTRLEN]; 139 char buf4[INET6_ADDRSTRLEN]; 140 uint_t fo_pkt_count; 141 uint_t ib_pkt_count; 142 int ref; 143 in6_addr_t gw_addr_v6; 144 uint_t print_len, buf_len; 145 146 ASSERT(ire->ire_ipversion == IPV6_VERSION); 147 if (ire->ire_type & IRE_CACHETABLE) 148 return; 149 buf_len = ((mblk_t *)mp)->b_datap->db_lim - ((mblk_t *)mp)->b_wptr; 150 if (buf_len <= 0) 151 return; 152 153 /* Number of active references of this ire */ 154 ref = ire->ire_refcnt; 155 /* "inbound" to a non local address is a forward */ 156 ib_pkt_count = ire->ire_ib_pkt_count; 157 fo_pkt_count = 0; 158 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 159 if (!(ire->ire_type & (IRE_LOCAL|IRE_BROADCAST))) { 160 fo_pkt_count = ib_pkt_count; 161 ib_pkt_count = 0; 162 } 163 164 mutex_enter(&ire->ire_lock); 165 gw_addr_v6 = ire->ire_gateway_addr_v6; 166 mutex_exit(&ire->ire_lock); 167 168 print_len = snprintf((char *)((mblk_t *)mp)->b_wptr, buf_len, 169 MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR "%5d " 170 "%05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d " 171 "%04d %08d %08d %d/%d/%d %s\n\t%s\n\t%s\n\t%s\n\t%s\n", 172 (void *)ire, (void *)ire->ire_rfq, (void *)ire->ire_stq, 173 (int)ire->ire_zoneid, 174 ire->ire_max_frag, ire->ire_uinfo.iulp_rtt, 175 ire->ire_uinfo.iulp_rtt_sd, 176 ire->ire_uinfo.iulp_ssthresh, ref, 177 ire->ire_uinfo.iulp_rtomax, 178 (ire->ire_uinfo.iulp_tstamp_ok ? 1: 0), 179 (ire->ire_uinfo.iulp_wscale_ok ? 1: 0), 180 (ire->ire_uinfo.iulp_ecn_ok ? 1: 0), 181 (ire->ire_uinfo.iulp_pmtud_ok ? 1: 0), 182 ire->ire_uinfo.iulp_sack, 183 ire->ire_uinfo.iulp_spipe, ire->ire_uinfo.iulp_rpipe, 184 ib_pkt_count, ire->ire_ob_pkt_count, fo_pkt_count, 185 ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type), 186 inet_ntop(AF_INET6, &ire->ire_addr_v6, buf1, sizeof (buf1)), 187 inet_ntop(AF_INET6, &ire->ire_mask_v6, buf2, sizeof (buf2)), 188 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, buf3, sizeof (buf3)), 189 inet_ntop(AF_INET6, &gw_addr_v6, buf4, sizeof (buf4))); 190 if (print_len < buf_len) { 191 ((mblk_t *)mp)->b_wptr += print_len; 192 } else { 193 ((mblk_t *)mp)->b_wptr += buf_len; 194 } 195 } 196 197 /* ire_walk routine invoked for ip_ire_report_v6 for each IRE. */ 198 static void 199 ire_report_ctable_v6(ire_t *ire, char *mp) 200 { 201 char buf1[INET6_ADDRSTRLEN]; 202 char buf2[INET6_ADDRSTRLEN]; 203 char buf3[INET6_ADDRSTRLEN]; 204 char buf4[INET6_ADDRSTRLEN]; 205 uint_t fo_pkt_count; 206 uint_t ib_pkt_count; 207 int ref; 208 in6_addr_t gw_addr_v6; 209 uint_t print_len, buf_len; 210 211 if ((ire->ire_type & IRE_CACHETABLE) == 0) 212 return; 213 buf_len = ((mblk_t *)mp)->b_datap->db_lim - ((mblk_t *)mp)->b_wptr; 214 if (buf_len <= 0) 215 return; 216 217 /* Number of active references of this ire */ 218 ref = ire->ire_refcnt; 219 /* "inbound" to a non local address is a forward */ 220 ib_pkt_count = ire->ire_ib_pkt_count; 221 fo_pkt_count = 0; 222 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 223 if (ire->ire_type & IRE_LOCAL) { 224 fo_pkt_count = ib_pkt_count; 225 ib_pkt_count = 0; 226 } 227 228 mutex_enter(&ire->ire_lock); 229 gw_addr_v6 = ire->ire_gateway_addr_v6; 230 mutex_exit(&ire->ire_lock); 231 232 print_len = snprintf((char *)((mblk_t *)mp)->b_wptr, buf_len, 233 MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR "%5d " 234 "%05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d " 235 "%04d %08d %08d %d/%d/%d %s\n\t%s\n\t%s\n\t%s\n\t%s\n", 236 (void *)ire, (void *)ire->ire_rfq, (void *)ire->ire_stq, 237 (int)ire->ire_zoneid, 238 ire->ire_max_frag, ire->ire_uinfo.iulp_rtt, 239 ire->ire_uinfo.iulp_rtt_sd, ire->ire_uinfo.iulp_ssthresh, ref, 240 ire->ire_uinfo.iulp_rtomax, 241 (ire->ire_uinfo.iulp_tstamp_ok ? 1: 0), 242 (ire->ire_uinfo.iulp_wscale_ok ? 1: 0), 243 (ire->ire_uinfo.iulp_ecn_ok ? 1: 0), 244 (ire->ire_uinfo.iulp_pmtud_ok ? 1: 0), 245 ire->ire_uinfo.iulp_sack, 246 ire->ire_uinfo.iulp_spipe, ire->ire_uinfo.iulp_rpipe, 247 ib_pkt_count, ire->ire_ob_pkt_count, 248 fo_pkt_count, ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type), 249 inet_ntop(AF_INET6, &ire->ire_addr_v6, buf1, sizeof (buf1)), 250 inet_ntop(AF_INET6, &ire->ire_mask_v6, buf2, sizeof (buf2)), 251 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, buf3, sizeof (buf3)), 252 inet_ntop(AF_INET6, &gw_addr_v6, buf4, sizeof (buf4))); 253 if (print_len < buf_len) { 254 ((mblk_t *)mp)->b_wptr += print_len; 255 } else { 256 ((mblk_t *)mp)->b_wptr += buf_len; 257 } 258 } 259 260 261 /* 262 * Initialize the ire that is specific to IPv6 part and call 263 * ire_init_common to finish it. 264 */ 265 static ire_t * 266 ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, const in6_addr_t *v6mask, 267 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 268 uint_t *max_fragp, queue_t *rfq, queue_t *stq, ushort_t type, 269 ipif_t *ipif, const in6_addr_t *v6cmask, uint32_t phandle, 270 uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, tsol_gc_t *gc, 271 tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 272 { 273 274 /* 275 * Reject IRE security attribute creation/initialization 276 * if system is not running in Trusted mode. 277 */ 278 if ((gc != NULL || gcgrp != NULL) && !is_system_labeled()) 279 return (NULL); 280 281 282 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_alloced); 283 ire->ire_addr_v6 = *v6addr; 284 285 if (v6src_addr != NULL) 286 ire->ire_src_addr_v6 = *v6src_addr; 287 if (v6mask != NULL) { 288 ire->ire_mask_v6 = *v6mask; 289 ire->ire_masklen = ip_mask_to_plen_v6(&ire->ire_mask_v6); 290 } 291 if (v6gateway != NULL) 292 ire->ire_gateway_addr_v6 = *v6gateway; 293 294 if (type == IRE_CACHE && v6cmask != NULL) 295 ire->ire_cmask_v6 = *v6cmask; 296 297 /* 298 * Multirouted packets need to have a fragment header added so that 299 * the receiver is able to discard duplicates according to their 300 * fragment identifier. 301 */ 302 if (type == IRE_CACHE && (flags & RTF_MULTIRT)) { 303 ire->ire_frag_flag = IPH_FRAG_HDR; 304 } 305 306 /* ire_init_common will free the mblks upon encountering any failure */ 307 if (!ire_init_common(ire, max_fragp, NULL, rfq, stq, type, 308 ipif, NULL, phandle, ihandle, flags, IPV6_VERSION, ulp_info, 309 gc, gcgrp, ipst)) 310 return (NULL); 311 312 return (ire); 313 } 314 315 /* 316 * Similar to ire_create_v6 except that it is called only when 317 * we want to allocate ire as an mblk e.g. we have a external 318 * resolver. Do we need this in IPv6 ? 319 * 320 * IPv6 initializes the ire_nce in ire_add_v6, which expects to 321 * find the ire_nce to be null when it is called. So, although 322 * we have a src_nce parameter (in the interest of matching up with 323 * the argument list of the v4 version), we ignore the src_nce 324 * argument here. 325 */ 326 /* ARGSUSED */ 327 ire_t * 328 ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 329 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 330 nce_t *src_nce, queue_t *rfq, queue_t *stq, ushort_t type, 331 ipif_t *ipif, const in6_addr_t *v6cmask, 332 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 333 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 334 { 335 ire_t *ire; 336 ire_t *ret_ire; 337 mblk_t *mp; 338 339 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 340 341 /* Allocate the new IRE. */ 342 mp = allocb(sizeof (ire_t), BPRI_MED); 343 if (mp == NULL) { 344 ip1dbg(("ire_create_mp_v6: alloc failed\n")); 345 return (NULL); 346 } 347 348 ire = (ire_t *)mp->b_rptr; 349 mp->b_wptr = (uchar_t *)&ire[1]; 350 351 /* Start clean. */ 352 *ire = ire_null; 353 ire->ire_mp = mp; 354 mp->b_datap->db_type = IRE_DB_TYPE; 355 356 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 357 NULL, rfq, stq, type, ipif, v6cmask, phandle, 358 ihandle, flags, ulp_info, gc, gcgrp, ipst); 359 360 if (ret_ire == NULL) { 361 freeb(ire->ire_mp); 362 return (NULL); 363 } 364 return (ire); 365 } 366 367 /* 368 * ire_create_v6 is called to allocate and initialize a new IRE. 369 * 370 * NOTE : This is called as writer sometimes though not required 371 * by this function. 372 * 373 * See comments above ire_create_mp_v6() for the rationale behind the 374 * unused src_nce argument. 375 */ 376 /* ARGSUSED */ 377 ire_t * 378 ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 379 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 380 uint_t *max_fragp, nce_t *src_nce, queue_t *rfq, queue_t *stq, 381 ushort_t type, ipif_t *ipif, const in6_addr_t *v6cmask, 382 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 383 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 384 { 385 ire_t *ire; 386 ire_t *ret_ire; 387 388 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 389 390 ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP); 391 if (ire == NULL) { 392 ip1dbg(("ire_create_v6: alloc failed\n")); 393 return (NULL); 394 } 395 *ire = ire_null; 396 397 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 398 max_fragp, rfq, stq, type, ipif, v6cmask, phandle, 399 ihandle, flags, ulp_info, gc, gcgrp, ipst); 400 401 if (ret_ire == NULL) { 402 kmem_cache_free(ire_cache, ire); 403 return (NULL); 404 } 405 ASSERT(ret_ire == ire); 406 return (ire); 407 } 408 409 /* 410 * Find an IRE_INTERFACE for the multicast group. 411 * Allows different routes for multicast addresses 412 * in the unicast routing table (akin to FF::0/8 but could be more specific) 413 * which point at different interfaces. This is used when IPV6_MULTICAST_IF 414 * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't 415 * specify the interface to join on. 416 * 417 * Supports link-local addresses by following the ipif/ill when recursing. 418 */ 419 ire_t * 420 ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst) 421 { 422 ire_t *ire; 423 ipif_t *ipif = NULL; 424 int match_flags = MATCH_IRE_TYPE; 425 in6_addr_t gw_addr_v6; 426 427 ire = ire_ftable_lookup_v6(group, 0, 0, 0, NULL, NULL, 428 zoneid, 0, NULL, MATCH_IRE_DEFAULT, ipst); 429 430 /* We search a resolvable ire in case of multirouting. */ 431 if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) { 432 ire_t *cire = NULL; 433 /* 434 * If the route is not resolvable, the looked up ire 435 * may be changed here. In that case, ire_multirt_lookup() 436 * IRE_REFRELE the original ire and change it. 437 */ 438 (void) ire_multirt_lookup_v6(&cire, &ire, MULTIRT_CACHEGW, 439 NULL, ipst); 440 if (cire != NULL) 441 ire_refrele(cire); 442 } 443 if (ire == NULL) 444 return (NULL); 445 /* 446 * Make sure we follow ire_ipif. 447 * 448 * We need to determine the interface route through 449 * which the gateway will be reached. We don't really 450 * care which interface is picked if the interface is 451 * part of a group. 452 */ 453 if (ire->ire_ipif != NULL) { 454 ipif = ire->ire_ipif; 455 match_flags |= MATCH_IRE_ILL_GROUP; 456 } 457 458 switch (ire->ire_type) { 459 case IRE_DEFAULT: 460 case IRE_PREFIX: 461 case IRE_HOST: 462 mutex_enter(&ire->ire_lock); 463 gw_addr_v6 = ire->ire_gateway_addr_v6; 464 mutex_exit(&ire->ire_lock); 465 ire_refrele(ire); 466 ire = ire_ftable_lookup_v6(&gw_addr_v6, 0, 0, 467 IRE_INTERFACE, ipif, NULL, zoneid, 0, 468 NULL, match_flags, ipst); 469 return (ire); 470 case IRE_IF_NORESOLVER: 471 case IRE_IF_RESOLVER: 472 return (ire); 473 default: 474 ire_refrele(ire); 475 return (NULL); 476 } 477 } 478 479 /* 480 * Return any local address. We use this to target ourselves 481 * when the src address was specified as 'default'. 482 * Preference for IRE_LOCAL entries. 483 */ 484 ire_t * 485 ire_lookup_local_v6(zoneid_t zoneid, ip_stack_t *ipst) 486 { 487 ire_t *ire; 488 irb_t *irb; 489 ire_t *maybe = NULL; 490 int i; 491 492 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 493 irb = &ipst->ips_ip_cache_table_v6[i]; 494 if (irb->irb_ire == NULL) 495 continue; 496 rw_enter(&irb->irb_lock, RW_READER); 497 for (ire = irb->irb_ire; ire; ire = ire->ire_next) { 498 if ((ire->ire_marks & IRE_MARK_CONDEMNED) || 499 ire->ire_zoneid != zoneid && 500 ire->ire_zoneid != ALL_ZONES) 501 continue; 502 switch (ire->ire_type) { 503 case IRE_LOOPBACK: 504 if (maybe == NULL) { 505 IRE_REFHOLD(ire); 506 maybe = ire; 507 } 508 break; 509 case IRE_LOCAL: 510 if (maybe != NULL) { 511 ire_refrele(maybe); 512 } 513 IRE_REFHOLD(ire); 514 rw_exit(&irb->irb_lock); 515 return (ire); 516 } 517 } 518 rw_exit(&irb->irb_lock); 519 } 520 return (maybe); 521 } 522 523 /* 524 * This function takes a mask and returns number of bits set in the 525 * mask (the represented prefix length). Assumes a contiguous mask. 526 */ 527 int 528 ip_mask_to_plen_v6(const in6_addr_t *v6mask) 529 { 530 int bits; 531 int plen = IPV6_ABITS; 532 int i; 533 534 for (i = 3; i >= 0; i--) { 535 if (v6mask->s6_addr32[i] == 0) { 536 plen -= 32; 537 continue; 538 } 539 bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1; 540 if (bits == 0) 541 break; 542 plen -= bits; 543 } 544 545 return (plen); 546 } 547 548 /* 549 * Convert a prefix length to the mask for that prefix. 550 * Returns the argument bitmask. 551 */ 552 in6_addr_t * 553 ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask) 554 { 555 uint32_t *ptr; 556 557 if (plen < 0 || plen > IPV6_ABITS) 558 return (NULL); 559 *bitmask = ipv6_all_zeros; 560 561 ptr = (uint32_t *)bitmask; 562 while (plen > 32) { 563 *ptr++ = 0xffffffffU; 564 plen -= 32; 565 } 566 *ptr = htonl(0xffffffffU << (32 - plen)); 567 return (bitmask); 568 } 569 570 /* 571 * Add a fully initialized IRE to an appropriate 572 * table based on ire_type. 573 * 574 * The forward table contains IRE_PREFIX/IRE_HOST/IRE_HOST and 575 * IRE_IF_RESOLVER/IRE_IF_NORESOLVER and IRE_DEFAULT. 576 * 577 * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK 578 * and IRE_CACHE. 579 * 580 * NOTE : This function is called as writer though not required 581 * by this function. 582 */ 583 int 584 ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) 585 { 586 ire_t *ire1; 587 int mask_table_index; 588 irb_t *irb_ptr; 589 ire_t **irep; 590 int flags; 591 ire_t *pire = NULL; 592 ill_t *stq_ill; 593 boolean_t ndp_g_lock_held = B_FALSE; 594 ire_t *ire = *ire_p; 595 int error; 596 ip_stack_t *ipst = ire->ire_ipst; 597 598 ASSERT(ire->ire_ipversion == IPV6_VERSION); 599 ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */ 600 ASSERT(ire->ire_nce == NULL); 601 602 /* Find the appropriate list head. */ 603 switch (ire->ire_type) { 604 case IRE_HOST: 605 ire->ire_mask_v6 = ipv6_all_ones; 606 ire->ire_masklen = IPV6_ABITS; 607 if ((ire->ire_flags & RTF_SETSRC) == 0) 608 ire->ire_src_addr_v6 = ipv6_all_zeros; 609 break; 610 case IRE_CACHE: 611 case IRE_LOCAL: 612 case IRE_LOOPBACK: 613 ire->ire_mask_v6 = ipv6_all_ones; 614 ire->ire_masklen = IPV6_ABITS; 615 break; 616 case IRE_PREFIX: 617 if ((ire->ire_flags & RTF_SETSRC) == 0) 618 ire->ire_src_addr_v6 = ipv6_all_zeros; 619 break; 620 case IRE_DEFAULT: 621 if ((ire->ire_flags & RTF_SETSRC) == 0) 622 ire->ire_src_addr_v6 = ipv6_all_zeros; 623 break; 624 case IRE_IF_RESOLVER: 625 case IRE_IF_NORESOLVER: 626 break; 627 default: 628 printf("ire_add_v6: ire %p has unrecognized IRE type (%d)\n", 629 (void *)ire, ire->ire_type); 630 ire_delete(ire); 631 *ire_p = NULL; 632 return (EINVAL); 633 } 634 635 /* Make sure the address is properly masked. */ 636 V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6); 637 638 if ((ire->ire_type & IRE_CACHETABLE) == 0) { 639 /* IRE goes into Forward Table */ 640 mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6); 641 if ((ipst->ips_ip_forwarding_table_v6[mask_table_index]) == 642 NULL) { 643 irb_t *ptr; 644 int i; 645 646 ptr = (irb_t *)mi_zalloc(( 647 ipst->ips_ip6_ftable_hash_size * sizeof (irb_t))); 648 if (ptr == NULL) { 649 ire_delete(ire); 650 *ire_p = NULL; 651 return (ENOMEM); 652 } 653 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) { 654 rw_init(&ptr[i].irb_lock, NULL, 655 RW_DEFAULT, NULL); 656 } 657 mutex_enter(&ipst->ips_ire_ft_init_lock); 658 if (ipst->ips_ip_forwarding_table_v6[ 659 mask_table_index] == NULL) { 660 ipst->ips_ip_forwarding_table_v6[ 661 mask_table_index] = ptr; 662 mutex_exit(&ipst->ips_ire_ft_init_lock); 663 } else { 664 /* 665 * Some other thread won the race in 666 * initializing the forwarding table at the 667 * same index. 668 */ 669 mutex_exit(&ipst->ips_ire_ft_init_lock); 670 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; 671 i++) { 672 rw_destroy(&ptr[i].irb_lock); 673 } 674 mi_free(ptr); 675 } 676 } 677 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[mask_table_index][ 678 IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6, 679 ipst->ips_ip6_ftable_hash_size)]); 680 } else { 681 irb_ptr = &(ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6( 682 ire->ire_addr_v6, ipst->ips_ip6_cache_table_size)]); 683 } 684 /* 685 * For xresolv interfaces (v6 interfaces with an external 686 * address resolver), ip_newroute_v6/ip_newroute_ipif_v6 687 * are unable to prevent the deletion of the interface route 688 * while adding an IRE_CACHE for an on-link destination 689 * in the IRE_IF_RESOLVER case, since the ire has to go to 690 * the external resolver and return. We can't do a REFHOLD on the 691 * associated interface ire for fear of the message being freed 692 * if the external resolver can't resolve the address. 693 * Here we look up the interface ire in the forwarding table 694 * and make sure that the interface route has not been deleted. 695 */ 696 if (ire->ire_type == IRE_CACHE && 697 IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6) && 698 (((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) && 699 (((ill_t *)ire->ire_stq->q_ptr)->ill_flags & ILLF_XRESOLV)) { 700 701 pire = ire_ihandle_lookup_onlink_v6(ire); 702 if (pire == NULL) { 703 ire_delete(ire); 704 *ire_p = NULL; 705 return (EINVAL); 706 } 707 /* Prevent pire from getting deleted */ 708 IRB_REFHOLD(pire->ire_bucket); 709 /* Has it been removed already? */ 710 if (pire->ire_marks & IRE_MARK_CONDEMNED) { 711 IRB_REFRELE(pire->ire_bucket); 712 ire_refrele(pire); 713 ire_delete(ire); 714 *ire_p = NULL; 715 return (EINVAL); 716 } 717 } 718 719 flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW); 720 /* 721 * For IRE_CACHES, MATCH_IRE_IPIF is not enough to check 722 * for duplicates because : 723 * 724 * 1) ire_ipif->ipif_ill and ire_stq->q_ptr could be 725 * pointing at different ills. A real duplicate is 726 * a match on both ire_ipif and ire_stq. 727 * 728 * 2) We could have multiple packets trying to create 729 * an IRE_CACHE for the same ill. 730 * 731 * Moreover, IPIF_NOFAILOVER and IPV6_BOUND_PIF endpoints wants 732 * to go out on a particular ill. Rather than looking at the 733 * packet, we depend on the above for MATCH_IRE_ILL here. 734 * 735 * Unlike IPv4, MATCH_IRE_IPIF is needed here as we could have 736 * multiple IRE_CACHES for an ill for the same destination 737 * with various scoped addresses i.e represented by ipifs. 738 * 739 * MATCH_IRE_ILL is done implicitly below for IRE_CACHES. 740 */ 741 if (ire->ire_ipif != NULL) 742 flags |= MATCH_IRE_IPIF; 743 /* 744 * If we are creating hidden ires, make sure we search on 745 * this ill (MATCH_IRE_ILL) and a hidden ire, while we are 746 * searching for duplicates below. Otherwise we could 747 * potentially find an IRE on some other interface 748 * and it may not be a IRE marked with IRE_MARK_HIDDEN. We 749 * shouldn't do this as this will lead to an infinite loop as 750 * eventually we need an hidden ire for this packet to go 751 * out. MATCH_IRE_ILL is already marked above. 752 */ 753 if (ire->ire_marks & IRE_MARK_HIDDEN) { 754 ASSERT(ire->ire_type == IRE_CACHE); 755 flags |= MATCH_IRE_MARK_HIDDEN; 756 } 757 758 /* 759 * Start the atomic add of the ire. Grab the ill locks, 760 * ill_g_usesrc_lock and the bucket lock. Check for condemned. 761 * To avoid lock order problems, get the ndp6.ndp_g_lock now itself. 762 */ 763 if (ire->ire_type == IRE_CACHE) { 764 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 765 ndp_g_lock_held = B_TRUE; 766 } 767 768 /* 769 * If ipif or ill is changing ire_atomic_start() may queue the 770 * request and return EINPROGRESS. 771 */ 772 773 error = ire_atomic_start(irb_ptr, ire, q, mp, func); 774 if (error != 0) { 775 if (ndp_g_lock_held) 776 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 777 /* 778 * We don't know whether it is a valid ipif or not. 779 * So, set it to NULL. This assumes that the ire has not added 780 * a reference to the ipif. 781 */ 782 ire->ire_ipif = NULL; 783 ire_delete(ire); 784 if (pire != NULL) { 785 IRB_REFRELE(pire->ire_bucket); 786 ire_refrele(pire); 787 } 788 *ire_p = NULL; 789 return (error); 790 } 791 /* 792 * To avoid creating ires having stale values for the ire_max_frag 793 * we get the latest value atomically here. For more details 794 * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE 795 * in ip_rput_dlpi_writer 796 */ 797 if (ire->ire_max_fragp == NULL) { 798 if (IN6_IS_ADDR_MULTICAST(&ire->ire_addr_v6)) 799 ire->ire_max_frag = ire->ire_ipif->ipif_mtu; 800 else 801 ire->ire_max_frag = pire->ire_max_frag; 802 } else { 803 uint_t max_frag; 804 805 max_frag = *ire->ire_max_fragp; 806 ire->ire_max_fragp = NULL; 807 ire->ire_max_frag = max_frag; 808 } 809 810 /* 811 * Atomically check for duplicate and insert in the table. 812 */ 813 for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 814 if (ire1->ire_marks & IRE_MARK_CONDEMNED) 815 continue; 816 817 if (ire->ire_type == IRE_CACHE) { 818 /* 819 * We do MATCH_IRE_ILL implicitly here for IRE_CACHES. 820 * As ire_ipif and ire_stq could point to two 821 * different ills, we can't pass just ire_ipif to 822 * ire_match_args and get a match on both ills. 823 * This is just needed for duplicate checks here and 824 * so we don't add an extra argument to 825 * ire_match_args for this. Do it locally. 826 * 827 * NOTE : Currently there is no part of the code 828 * that asks for both MATH_IRE_IPIF and MATCH_IRE_ILL 829 * match for IRE_CACHEs. Thus we don't want to 830 * extend the arguments to ire_match_args_v6. 831 */ 832 if (ire1->ire_stq != ire->ire_stq) 833 continue; 834 /* 835 * Multiroute IRE_CACHEs for a given destination can 836 * have the same ire_ipif, typically if their source 837 * address is forced using RTF_SETSRC, and the same 838 * send-to queue. We differentiate them using the parent 839 * handle. 840 */ 841 if ((ire1->ire_flags & RTF_MULTIRT) && 842 (ire->ire_flags & RTF_MULTIRT) && 843 (ire1->ire_phandle != ire->ire_phandle)) 844 continue; 845 } 846 if (ire1->ire_zoneid != ire->ire_zoneid) 847 continue; 848 if (ire_match_args_v6(ire1, &ire->ire_addr_v6, 849 &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, 850 ire->ire_type, ire->ire_ipif, ire->ire_zoneid, 0, NULL, 851 flags)) { 852 /* 853 * Return the old ire after doing a REFHOLD. 854 * As most of the callers continue to use the IRE 855 * after adding, we return a held ire. This will 856 * avoid a lookup in the caller again. If the callers 857 * don't want to use it, they need to do a REFRELE. 858 */ 859 ip1dbg(("found dup ire existing %p new %p", 860 (void *)ire1, (void *)ire)); 861 IRE_REFHOLD(ire1); 862 if (ndp_g_lock_held) 863 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 864 ire_atomic_end(irb_ptr, ire); 865 ire_delete(ire); 866 if (pire != NULL) { 867 /* 868 * Assert that it is 869 * not yet removed from the list. 870 */ 871 ASSERT(pire->ire_ptpn != NULL); 872 IRB_REFRELE(pire->ire_bucket); 873 ire_refrele(pire); 874 } 875 *ire_p = ire1; 876 return (0); 877 } 878 } 879 if (ire->ire_type == IRE_CACHE) { 880 in6_addr_t gw_addr_v6; 881 ill_t *ill = ire_to_ill(ire); 882 char buf[INET6_ADDRSTRLEN]; 883 nce_t *nce; 884 885 /* 886 * All IRE_CACHE types must have a nce. If this is 887 * not the case the entry will not be added. We need 888 * to make sure that if somebody deletes the nce 889 * after we looked up, they will find this ire and 890 * delete the ire. To delete this ire one needs the 891 * bucket lock which we are still holding here. So, 892 * even if the nce gets deleted after we looked up, 893 * this ire will get deleted. 894 * 895 * NOTE : Don't need the ire_lock for accessing 896 * ire_gateway_addr_v6 as it is appearing first 897 * time on the list and rts_setgwr_v6 could not 898 * be changing this. 899 */ 900 gw_addr_v6 = ire->ire_gateway_addr_v6; 901 if (IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 902 nce = ndp_lookup_v6(ill, &ire->ire_addr_v6, B_TRUE); 903 } else { 904 nce = ndp_lookup_v6(ill, &gw_addr_v6, B_TRUE); 905 } 906 if (nce == NULL) 907 goto failed; 908 909 /* Pair of refhold, refrele just to get the tracing right */ 910 NCE_REFHOLD_TO_REFHOLD_NOTR(nce); 911 /* 912 * Atomically make sure that new IREs don't point 913 * to an NCE that is logically deleted (CONDEMNED). 914 * ndp_delete() first marks the NCE CONDEMNED. 915 * This ensures that the nce_refcnt won't increase 916 * due to new nce_lookups or due to addition of new IREs 917 * pointing to this NCE. Then ndp_delete() cleans up 918 * existing references. If we don't do it atomically here, 919 * ndp_delete() -> nce_ire_delete() will not be able to 920 * clean up the IRE list completely, and the nce_refcnt 921 * won't go down to zero. 922 */ 923 mutex_enter(&nce->nce_lock); 924 if (ill->ill_flags & ILLF_XRESOLV) { 925 /* 926 * If we used an external resolver, we may not 927 * have gone through neighbor discovery to get here. 928 * Must update the nce_state before the next check. 929 */ 930 if (nce->nce_state == ND_INCOMPLETE) 931 nce->nce_state = ND_REACHABLE; 932 } 933 if (nce->nce_state == ND_INCOMPLETE || 934 (nce->nce_flags & NCE_F_CONDEMNED) || 935 (nce->nce_state == ND_UNREACHABLE)) { 936 failed: 937 if (ndp_g_lock_held) 938 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 939 if (nce != NULL) 940 mutex_exit(&nce->nce_lock); 941 ire_atomic_end(irb_ptr, ire); 942 ip1dbg(("ire_add_v6: No nce for dst %s \n", 943 inet_ntop(AF_INET6, &ire->ire_addr_v6, 944 buf, sizeof (buf)))); 945 ire_delete(ire); 946 if (pire != NULL) { 947 /* 948 * Assert that it is 949 * not yet removed from the list. 950 */ 951 ASSERT(pire->ire_ptpn != NULL); 952 IRB_REFRELE(pire->ire_bucket); 953 ire_refrele(pire); 954 } 955 if (nce != NULL) 956 NCE_REFRELE_NOTR(nce); 957 *ire_p = NULL; 958 return (EINVAL); 959 } else { 960 ire->ire_nce = nce; 961 } 962 mutex_exit(&nce->nce_lock); 963 } 964 /* 965 * Find the first entry that matches ire_addr - provides 966 * tail insertion. *irep will be null if no match. 967 */ 968 irep = (ire_t **)irb_ptr; 969 while ((ire1 = *irep) != NULL && 970 !IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6)) 971 irep = &ire1->ire_next; 972 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 973 974 if (*irep != NULL) { 975 /* 976 * Find the last ire which matches ire_addr_v6. 977 * Needed to do tail insertion among entries with the same 978 * ire_addr_v6. 979 */ 980 while (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 981 &ire1->ire_addr_v6)) { 982 irep = &ire1->ire_next; 983 ire1 = *irep; 984 if (ire1 == NULL) 985 break; 986 } 987 } 988 989 if (ire->ire_type == IRE_DEFAULT) { 990 /* 991 * We keep a count of default gateways which is used when 992 * assigning them as routes. 993 */ 994 ipst->ips_ipv6_ire_default_count++; 995 ASSERT(ipst->ips_ipv6_ire_default_count != 0); /* Wraparound */ 996 } 997 /* Insert at *irep */ 998 ire1 = *irep; 999 if (ire1 != NULL) 1000 ire1->ire_ptpn = &ire->ire_next; 1001 ire->ire_next = ire1; 1002 /* Link the new one in. */ 1003 ire->ire_ptpn = irep; 1004 /* 1005 * ire_walk routines de-reference ire_next without holding 1006 * a lock. Before we point to the new ire, we want to make 1007 * sure the store that sets the ire_next of the new ire 1008 * reaches global visibility, so that ire_walk routines 1009 * don't see a truncated list of ires i.e if the ire_next 1010 * of the new ire gets set after we do "*irep = ire" due 1011 * to re-ordering, the ire_walk thread will see a NULL 1012 * once it accesses the ire_next of the new ire. 1013 * membar_producer() makes sure that the following store 1014 * happens *after* all of the above stores. 1015 */ 1016 membar_producer(); 1017 *irep = ire; 1018 ire->ire_bucket = irb_ptr; 1019 /* 1020 * We return a bumped up IRE above. Keep it symmetrical 1021 * so that the callers will always have to release. This 1022 * helps the callers of this function because they continue 1023 * to use the IRE after adding and hence they don't have to 1024 * lookup again after we return the IRE. 1025 * 1026 * NOTE : We don't have to use atomics as this is appearing 1027 * in the list for the first time and no one else can bump 1028 * up the reference count on this yet. 1029 */ 1030 IRE_REFHOLD_LOCKED(ire); 1031 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_inserted); 1032 irb_ptr->irb_ire_cnt++; 1033 if (ire->ire_marks & IRE_MARK_TEMPORARY) 1034 irb_ptr->irb_tmp_ire_cnt++; 1035 1036 if (ire->ire_ipif != NULL) { 1037 ire->ire_ipif->ipif_ire_cnt++; 1038 if (ire->ire_stq != NULL) { 1039 stq_ill = (ill_t *)ire->ire_stq->q_ptr; 1040 stq_ill->ill_ire_cnt++; 1041 } 1042 } else { 1043 ASSERT(ire->ire_stq == NULL); 1044 } 1045 1046 if (ndp_g_lock_held) 1047 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1048 ire_atomic_end(irb_ptr, ire); 1049 1050 if (pire != NULL) { 1051 /* Assert that it is not removed from the list yet */ 1052 ASSERT(pire->ire_ptpn != NULL); 1053 IRB_REFRELE(pire->ire_bucket); 1054 ire_refrele(pire); 1055 } 1056 1057 if (ire->ire_type != IRE_CACHE) { 1058 /* 1059 * For ire's with with host mask see if there is an entry 1060 * in the cache. If there is one flush the whole cache as 1061 * there might be multiple entries due to RTF_MULTIRT (CGTP). 1062 * If no entry is found than there is no need to flush the 1063 * cache. 1064 */ 1065 1066 if (ip_mask_to_plen_v6(&ire->ire_mask_v6) == IPV6_ABITS) { 1067 ire_t *lire; 1068 lire = ire_ctable_lookup_v6(&ire->ire_addr_v6, NULL, 1069 IRE_CACHE, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, 1070 ipst); 1071 if (lire != NULL) { 1072 ire_refrele(lire); 1073 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 1074 } 1075 } else { 1076 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 1077 } 1078 } 1079 1080 *ire_p = ire; 1081 return (0); 1082 } 1083 1084 /* 1085 * Search for all HOST REDIRECT routes that are 1086 * pointing at the specified gateway and 1087 * delete them. This routine is called only 1088 * when a default gateway is going away. 1089 */ 1090 static void 1091 ire_delete_host_redirects_v6(const in6_addr_t *gateway, ip_stack_t *ipst) 1092 { 1093 irb_t *irb_ptr; 1094 irb_t *irb; 1095 ire_t *ire; 1096 in6_addr_t gw_addr_v6; 1097 int i; 1098 1099 /* get the hash table for HOST routes */ 1100 irb_ptr = ipst->ips_ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)]; 1101 if (irb_ptr == NULL) 1102 return; 1103 for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) { 1104 irb = &irb_ptr[i]; 1105 IRB_REFHOLD(irb); 1106 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 1107 if (!(ire->ire_flags & RTF_DYNAMIC)) 1108 continue; 1109 mutex_enter(&ire->ire_lock); 1110 gw_addr_v6 = ire->ire_gateway_addr_v6; 1111 mutex_exit(&ire->ire_lock); 1112 if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) 1113 ire_delete(ire); 1114 } 1115 IRB_REFRELE(irb); 1116 } 1117 } 1118 1119 /* 1120 * Delete all the cache entries with this 'addr'. This is the IPv6 counterpart 1121 * of ip_ire_clookup_and_delete. The difference being this function does not 1122 * return any value. IPv6 processing of a gratuitous ARP, as it stands, is 1123 * different than IPv4 in that, regardless of the presence of a cache entry 1124 * for this address, an ire_walk_v6 is done. Another difference is that unlike 1125 * in the case of IPv4 this does not take an ipif_t argument, since it is only 1126 * called by ip_arp_news and the match is always only on the address. 1127 */ 1128 void 1129 ip_ire_clookup_and_delete_v6(const in6_addr_t *addr, ip_stack_t *ipst) 1130 { 1131 irb_t *irb; 1132 ire_t *cire; 1133 boolean_t found = B_FALSE; 1134 1135 irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 1136 ipst->ips_ip6_cache_table_size)]; 1137 IRB_REFHOLD(irb); 1138 for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) { 1139 if (cire->ire_marks & IRE_MARK_CONDEMNED) 1140 continue; 1141 if (IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, addr)) { 1142 1143 /* This signifies start of a match */ 1144 if (!found) 1145 found = B_TRUE; 1146 if (cire->ire_type == IRE_CACHE) { 1147 if (cire->ire_nce != NULL) 1148 ndp_delete(cire->ire_nce); 1149 ire_delete_v6(cire); 1150 } 1151 /* End of the match */ 1152 } else if (found) 1153 break; 1154 } 1155 IRB_REFRELE(irb); 1156 } 1157 1158 /* 1159 * Delete the specified IRE. 1160 * All calls should use ire_delete(). 1161 * Sometimes called as writer though not required by this function. 1162 * 1163 * NOTE : This function is called only if the ire was added 1164 * in the list. 1165 */ 1166 void 1167 ire_delete_v6(ire_t *ire) 1168 { 1169 in6_addr_t gw_addr_v6; 1170 ip_stack_t *ipst = ire->ire_ipst; 1171 1172 ASSERT(ire->ire_refcnt >= 1); 1173 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1174 1175 if (ire->ire_type != IRE_CACHE) 1176 ire_flush_cache_v6(ire, IRE_FLUSH_DELETE); 1177 if (ire->ire_type == IRE_DEFAULT) { 1178 /* 1179 * when a default gateway is going away 1180 * delete all the host redirects pointing at that 1181 * gateway. 1182 */ 1183 mutex_enter(&ire->ire_lock); 1184 gw_addr_v6 = ire->ire_gateway_addr_v6; 1185 mutex_exit(&ire->ire_lock); 1186 ire_delete_host_redirects_v6(&gw_addr_v6, ipst); 1187 } 1188 } 1189 1190 /* 1191 * ire_walk routine to delete all IRE_CACHE and IRE_HOST type redirect 1192 * entries. 1193 */ 1194 /*ARGSUSED1*/ 1195 void 1196 ire_delete_cache_v6(ire_t *ire, char *arg) 1197 { 1198 char addrstr1[INET6_ADDRSTRLEN]; 1199 char addrstr2[INET6_ADDRSTRLEN]; 1200 1201 if ((ire->ire_type & IRE_CACHE) || 1202 (ire->ire_flags & RTF_DYNAMIC)) { 1203 ip1dbg(("ire_delete_cache_v6: deleted %s type %d through %s\n", 1204 inet_ntop(AF_INET6, &ire->ire_addr_v6, 1205 addrstr1, sizeof (addrstr1)), 1206 ire->ire_type, 1207 inet_ntop(AF_INET6, &ire->ire_gateway_addr_v6, 1208 addrstr2, sizeof (addrstr2)))); 1209 ire_delete(ire); 1210 } 1211 1212 } 1213 1214 /* 1215 * ire_walk routine to delete all IRE_CACHE/IRE_HOST type redirect entries 1216 * that have a given gateway address. 1217 */ 1218 void 1219 ire_delete_cache_gw_v6(ire_t *ire, char *addr) 1220 { 1221 in6_addr_t *gw_addr = (in6_addr_t *)addr; 1222 char buf1[INET6_ADDRSTRLEN]; 1223 char buf2[INET6_ADDRSTRLEN]; 1224 in6_addr_t ire_gw_addr_v6; 1225 1226 if (!(ire->ire_type & IRE_CACHE) && 1227 !(ire->ire_flags & RTF_DYNAMIC)) 1228 return; 1229 1230 mutex_enter(&ire->ire_lock); 1231 ire_gw_addr_v6 = ire->ire_gateway_addr_v6; 1232 mutex_exit(&ire->ire_lock); 1233 1234 if (IN6_ARE_ADDR_EQUAL(&ire_gw_addr_v6, gw_addr)) { 1235 ip1dbg(("ire_delete_cache_gw_v6: deleted %s type %d to %s\n", 1236 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, 1237 buf1, sizeof (buf1)), 1238 ire->ire_type, 1239 inet_ntop(AF_INET6, &ire_gw_addr_v6, 1240 buf2, sizeof (buf2)))); 1241 ire_delete(ire); 1242 } 1243 } 1244 1245 /* 1246 * Remove all IRE_CACHE entries that match 1247 * the ire specified. (Sometimes called 1248 * as writer though not required by this function.) 1249 * 1250 * The flag argument indicates if the 1251 * flush request is due to addition 1252 * of new route (IRE_FLUSH_ADD) or deletion of old 1253 * route (IRE_FLUSH_DELETE). 1254 * 1255 * This routine takes only the IREs from the forwarding 1256 * table and flushes the corresponding entries from 1257 * the cache table. 1258 * 1259 * When flushing due to the deletion of an old route, it 1260 * just checks the cache handles (ire_phandle and ire_ihandle) and 1261 * deletes the ones that match. 1262 * 1263 * When flushing due to the creation of a new route, it checks 1264 * if a cache entry's address matches the one in the IRE and 1265 * that the cache entry's parent has a less specific mask than the 1266 * one in IRE. The destination of such a cache entry could be the 1267 * gateway for other cache entries, so we need to flush those as 1268 * well by looking for gateway addresses matching the IRE's address. 1269 */ 1270 void 1271 ire_flush_cache_v6(ire_t *ire, int flag) 1272 { 1273 int i; 1274 ire_t *cire; 1275 irb_t *irb; 1276 ip_stack_t *ipst = ire->ire_ipst; 1277 1278 if (ire->ire_type & IRE_CACHE) 1279 return; 1280 1281 /* 1282 * If a default is just created, there is no point 1283 * in going through the cache, as there will not be any 1284 * cached ires. 1285 */ 1286 if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD) 1287 return; 1288 if (flag == IRE_FLUSH_ADD) { 1289 /* 1290 * This selective flush is 1291 * due to the addition of 1292 * new IRE. 1293 */ 1294 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 1295 irb = &ipst->ips_ip_cache_table_v6[i]; 1296 if ((cire = irb->irb_ire) == NULL) 1297 continue; 1298 IRB_REFHOLD(irb); 1299 for (cire = irb->irb_ire; cire != NULL; 1300 cire = cire->ire_next) { 1301 if (cire->ire_type != IRE_CACHE) 1302 continue; 1303 /* 1304 * If 'cire' belongs to the same subnet 1305 * as the new ire being added, and 'cire' 1306 * is derived from a prefix that is less 1307 * specific than the new ire being added, 1308 * we need to flush 'cire'; for instance, 1309 * when a new interface comes up. 1310 */ 1311 if ((V6_MASK_EQ_2(cire->ire_addr_v6, 1312 ire->ire_mask_v6, ire->ire_addr_v6) && 1313 (ip_mask_to_plen_v6(&cire->ire_cmask_v6) <= 1314 ire->ire_masklen))) { 1315 ire_delete(cire); 1316 continue; 1317 } 1318 /* 1319 * This is the case when the ire_gateway_addr 1320 * of 'cire' belongs to the same subnet as 1321 * the new ire being added. 1322 * Flushing such ires is sometimes required to 1323 * avoid misrouting: say we have a machine with 1324 * two interfaces (I1 and I2), a default router 1325 * R on the I1 subnet, and a host route to an 1326 * off-link destination D with a gateway G on 1327 * the I2 subnet. 1328 * Under normal operation, we will have an 1329 * on-link cache entry for G and an off-link 1330 * cache entry for D with G as ire_gateway_addr, 1331 * traffic to D will reach its destination 1332 * through gateway G. 1333 * If the administrator does 'ifconfig I2 down', 1334 * the cache entries for D and G will be 1335 * flushed. However, G will now be resolved as 1336 * an off-link destination using R (the default 1337 * router) as gateway. Then D will also be 1338 * resolved as an off-link destination using G 1339 * as gateway - this behavior is due to 1340 * compatibility reasons, see comment in 1341 * ire_ihandle_lookup_offlink(). Traffic to D 1342 * will go to the router R and probably won't 1343 * reach the destination. 1344 * The administrator then does 'ifconfig I2 up'. 1345 * Since G is on the I2 subnet, this routine 1346 * will flush its cache entry. It must also 1347 * flush the cache entry for D, otherwise 1348 * traffic will stay misrouted until the IRE 1349 * times out. 1350 */ 1351 if (V6_MASK_EQ_2(cire->ire_gateway_addr_v6, 1352 ire->ire_mask_v6, ire->ire_addr_v6)) { 1353 ire_delete(cire); 1354 continue; 1355 } 1356 } 1357 IRB_REFRELE(irb); 1358 } 1359 } else { 1360 /* 1361 * delete the cache entries based on 1362 * handle in the IRE as this IRE is 1363 * being deleted/changed. 1364 */ 1365 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 1366 irb = &ipst->ips_ip_cache_table_v6[i]; 1367 if ((cire = irb->irb_ire) == NULL) 1368 continue; 1369 IRB_REFHOLD(irb); 1370 for (cire = irb->irb_ire; cire != NULL; 1371 cire = cire->ire_next) { 1372 if (cire->ire_type != IRE_CACHE) 1373 continue; 1374 if ((cire->ire_phandle == 0 || 1375 cire->ire_phandle != ire->ire_phandle) && 1376 (cire->ire_ihandle == 0 || 1377 cire->ire_ihandle != ire->ire_ihandle)) 1378 continue; 1379 ire_delete(cire); 1380 } 1381 IRB_REFRELE(irb); 1382 } 1383 } 1384 } 1385 1386 /* 1387 * Matches the arguments passed with the values in the ire. 1388 * 1389 * Note: for match types that match using "ipif" passed in, ipif 1390 * must be checked for non-NULL before calling this routine. 1391 */ 1392 static boolean_t 1393 ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask, 1394 const in6_addr_t *gateway, int type, const ipif_t *ipif, zoneid_t zoneid, 1395 uint32_t ihandle, const ts_label_t *tsl, int match_flags) 1396 { 1397 in6_addr_t masked_addr; 1398 in6_addr_t gw_addr_v6; 1399 ill_t *ire_ill = NULL, *dst_ill; 1400 ill_t *ipif_ill = NULL; 1401 ill_group_t *ire_ill_group = NULL; 1402 ill_group_t *ipif_ill_group = NULL; 1403 ipif_t *src_ipif; 1404 1405 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1406 ASSERT(addr != NULL); 1407 ASSERT(mask != NULL); 1408 ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL); 1409 ASSERT((!(match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP))) || 1410 (ipif != NULL && ipif->ipif_isv6)); 1411 ASSERT(!(match_flags & MATCH_IRE_WQ)); 1412 1413 /* 1414 * HIDDEN cache entries have to be looked up specifically with 1415 * MATCH_IRE_MARK_HIDDEN. MATCH_IRE_MARK_HIDDEN is usually set 1416 * when the interface is FAILED or INACTIVE. In that case, 1417 * any IRE_CACHES that exists should be marked with 1418 * IRE_MARK_HIDDEN. So, we don't really need to match below 1419 * for IRE_MARK_HIDDEN. But we do so for consistency. 1420 */ 1421 if (!(match_flags & MATCH_IRE_MARK_HIDDEN) && 1422 (ire->ire_marks & IRE_MARK_HIDDEN)) 1423 return (B_FALSE); 1424 1425 if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid && 1426 ire->ire_zoneid != ALL_ZONES) { 1427 /* 1428 * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is 1429 * valid and does not match that of ire_zoneid, a failure to 1430 * match is reported at this point. Otherwise, since some IREs 1431 * that are available in the global zone can be used in local 1432 * zones, additional checks need to be performed: 1433 * 1434 * IRE_CACHE and IRE_LOOPBACK entries should 1435 * never be matched in this situation. 1436 * 1437 * IRE entries that have an interface associated with them 1438 * should in general not match unless they are an IRE_LOCAL 1439 * or in the case when MATCH_IRE_DEFAULT has been set in 1440 * the caller. In the case of the former, checking of the 1441 * other fields supplied should take place. 1442 * 1443 * In the case where MATCH_IRE_DEFAULT has been set, 1444 * all of the ipif's associated with the IRE's ill are 1445 * checked to see if there is a matching zoneid. If any 1446 * one ipif has a matching zoneid, this IRE is a 1447 * potential candidate so checking of the other fields 1448 * takes place. 1449 * 1450 * In the case where the IRE_INTERFACE has a usable source 1451 * address (indicated by ill_usesrc_ifindex) in the 1452 * correct zone then it's permitted to return this IRE 1453 */ 1454 if (match_flags & MATCH_IRE_ZONEONLY) 1455 return (B_FALSE); 1456 if (ire->ire_type & (IRE_CACHE | IRE_LOOPBACK)) 1457 return (B_FALSE); 1458 /* 1459 * Note, IRE_INTERFACE can have the stq as NULL. For 1460 * example, if the default multicast route is tied to 1461 * the loopback address. 1462 */ 1463 if ((ire->ire_type & IRE_INTERFACE) && 1464 (ire->ire_stq != NULL)) { 1465 dst_ill = (ill_t *)ire->ire_stq->q_ptr; 1466 /* 1467 * If there is a usable source address in the 1468 * zone, then it's ok to return an 1469 * IRE_INTERFACE 1470 */ 1471 if ((dst_ill->ill_usesrc_ifindex != 0) && 1472 (src_ipif = ipif_select_source_v6(dst_ill, addr, 1473 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid)) 1474 != NULL) { 1475 ip3dbg(("ire_match_args: src_ipif %p" 1476 " dst_ill %p", (void *)src_ipif, 1477 (void *)dst_ill)); 1478 ipif_refrele(src_ipif); 1479 } else { 1480 ip3dbg(("ire_match_args: src_ipif NULL" 1481 " dst_ill %p\n", (void *)dst_ill)); 1482 return (B_FALSE); 1483 } 1484 } 1485 if (ire->ire_ipif != NULL && ire->ire_type != IRE_LOCAL && 1486 !(ire->ire_type & IRE_INTERFACE)) { 1487 ipif_t *tipif; 1488 1489 if ((match_flags & MATCH_IRE_DEFAULT) == 0) 1490 return (B_FALSE); 1491 mutex_enter(&ire->ire_ipif->ipif_ill->ill_lock); 1492 for (tipif = ire->ire_ipif->ipif_ill->ill_ipif; 1493 tipif != NULL; tipif = tipif->ipif_next) { 1494 if (IPIF_CAN_LOOKUP(tipif) && 1495 (tipif->ipif_flags & IPIF_UP) && 1496 (tipif->ipif_zoneid == zoneid || 1497 tipif->ipif_zoneid == ALL_ZONES)) 1498 break; 1499 } 1500 mutex_exit(&ire->ire_ipif->ipif_ill->ill_lock); 1501 if (tipif == NULL) 1502 return (B_FALSE); 1503 } 1504 } 1505 1506 if (match_flags & MATCH_IRE_GW) { 1507 mutex_enter(&ire->ire_lock); 1508 gw_addr_v6 = ire->ire_gateway_addr_v6; 1509 mutex_exit(&ire->ire_lock); 1510 } 1511 /* 1512 * For IRE_CACHES, MATCH_IRE_ILL/ILL_GROUP really means that 1513 * somebody wants to send out on a particular interface which 1514 * is given by ire_stq and hence use ire_stq to derive the ill 1515 * value. ire_ipif for IRE_CACHES is just the 1516 * means of getting a source address i.e ire_src_addr_v6 = 1517 * ire->ire_ipif->ipif_src_addr_v6. 1518 */ 1519 if (match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP)) { 1520 ire_ill = ire_to_ill(ire); 1521 if (ire_ill != NULL) 1522 ire_ill_group = ire_ill->ill_group; 1523 ipif_ill = ipif->ipif_ill; 1524 ipif_ill_group = ipif_ill->ill_group; 1525 } 1526 1527 /* No ire_addr_v6 bits set past the mask */ 1528 ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6, 1529 ire->ire_addr_v6)); 1530 V6_MASK_COPY(*addr, *mask, masked_addr); 1531 1532 if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) && 1533 ((!(match_flags & MATCH_IRE_GW)) || 1534 IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) && 1535 ((!(match_flags & MATCH_IRE_TYPE)) || 1536 (ire->ire_type & type)) && 1537 ((!(match_flags & MATCH_IRE_SRC)) || 1538 IN6_ARE_ADDR_EQUAL(&ire->ire_src_addr_v6, 1539 &ipif->ipif_v6src_addr)) && 1540 ((!(match_flags & MATCH_IRE_IPIF)) || 1541 (ire->ire_ipif == ipif)) && 1542 ((!(match_flags & MATCH_IRE_MARK_HIDDEN)) || 1543 (ire->ire_type != IRE_CACHE || 1544 ire->ire_marks & IRE_MARK_HIDDEN)) && 1545 ((!(match_flags & MATCH_IRE_ILL)) || 1546 (ire_ill == ipif_ill)) && 1547 ((!(match_flags & MATCH_IRE_IHANDLE)) || 1548 (ire->ire_ihandle == ihandle)) && 1549 ((!(match_flags & MATCH_IRE_ILL_GROUP)) || 1550 (ire_ill == ipif_ill) || 1551 (ire_ill_group != NULL && 1552 ire_ill_group == ipif_ill_group)) && 1553 ((!(match_flags & MATCH_IRE_SECATTR)) || 1554 (!is_system_labeled()) || 1555 (tsol_ire_match_gwattr(ire, tsl) == 0))) { 1556 /* We found the matched IRE */ 1557 return (B_TRUE); 1558 } 1559 return (B_FALSE); 1560 } 1561 1562 /* 1563 * Lookup for a route in all the tables 1564 */ 1565 ire_t * 1566 ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1567 const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 1568 zoneid_t zoneid, const ts_label_t *tsl, int flags, ip_stack_t *ipst) 1569 { 1570 ire_t *ire = NULL; 1571 1572 /* 1573 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1574 * MATCH_IRE_ILL is set. 1575 */ 1576 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1577 (ipif == NULL)) 1578 return (NULL); 1579 1580 /* 1581 * might be asking for a cache lookup, 1582 * This is not best way to lookup cache, 1583 * user should call ire_cache_lookup directly. 1584 * 1585 * If MATCH_IRE_TYPE was set, first lookup in the cache table and then 1586 * in the forwarding table, if the applicable type flags were set. 1587 */ 1588 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) { 1589 ire = ire_ctable_lookup_v6(addr, gateway, type, ipif, zoneid, 1590 tsl, flags, ipst); 1591 if (ire != NULL) 1592 return (ire); 1593 } 1594 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) { 1595 ire = ire_ftable_lookup_v6(addr, mask, gateway, type, ipif, 1596 pire, zoneid, 0, tsl, flags, ipst); 1597 } 1598 return (ire); 1599 } 1600 1601 /* 1602 * Lookup a route in forwarding table. 1603 * specific lookup is indicated by passing the 1604 * required parameters and indicating the 1605 * match required in flag field. 1606 * 1607 * Looking for default route can be done in three ways 1608 * 1) pass mask as ipv6_all_zeros and set MATCH_IRE_MASK in flags field 1609 * along with other matches. 1610 * 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags 1611 * field along with other matches. 1612 * 3) if the destination and mask are passed as zeros. 1613 * 1614 * A request to return a default route if no route 1615 * is found, can be specified by setting MATCH_IRE_DEFAULT 1616 * in flags. 1617 * 1618 * It does not support recursion more than one level. It 1619 * will do recursive lookup only when the lookup maps to 1620 * a prefix or default route and MATCH_IRE_RECURSIVE flag is passed. 1621 * 1622 * If the routing table is setup to allow more than one level 1623 * of recursion, the cleaning up cache table will not work resulting 1624 * in invalid routing. 1625 * 1626 * Supports link-local addresses by following the ipif/ill when recursing. 1627 * 1628 * NOTE : When this function returns NULL, pire has already been released. 1629 * pire is valid only when this function successfully returns an 1630 * ire. 1631 */ 1632 ire_t * 1633 ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1634 const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 1635 zoneid_t zoneid, uint32_t ihandle, const ts_label_t *tsl, int flags, 1636 ip_stack_t *ipst) 1637 { 1638 irb_t *irb_ptr; 1639 ire_t *rire; 1640 ire_t *ire = NULL; 1641 ire_t *saved_ire; 1642 nce_t *nce; 1643 int i; 1644 in6_addr_t gw_addr_v6; 1645 1646 ASSERT(addr != NULL); 1647 ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL); 1648 ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 1649 ASSERT(ipif == NULL || ipif->ipif_isv6); 1650 ASSERT(!(flags & MATCH_IRE_WQ)); 1651 1652 /* 1653 * When we return NULL from this function, we should make 1654 * sure that *pire is NULL so that the callers will not 1655 * wrongly REFRELE the pire. 1656 */ 1657 if (pire != NULL) 1658 *pire = NULL; 1659 /* 1660 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1661 * MATCH_IRE_ILL is set. 1662 */ 1663 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1664 (ipif == NULL)) 1665 return (NULL); 1666 1667 /* 1668 * If the mask is known, the lookup 1669 * is simple, if the mask is not known 1670 * we need to search. 1671 */ 1672 if (flags & MATCH_IRE_MASK) { 1673 uint_t masklen; 1674 1675 masklen = ip_mask_to_plen_v6(mask); 1676 if (ipst->ips_ip_forwarding_table_v6[masklen] == NULL) 1677 return (NULL); 1678 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[masklen][ 1679 IRE_ADDR_MASK_HASH_V6(*addr, *mask, 1680 ipst->ips_ip6_ftable_hash_size)]); 1681 rw_enter(&irb_ptr->irb_lock, RW_READER); 1682 for (ire = irb_ptr->irb_ire; ire != NULL; 1683 ire = ire->ire_next) { 1684 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1685 continue; 1686 if (ire_match_args_v6(ire, addr, mask, gateway, type, 1687 ipif, zoneid, ihandle, tsl, flags)) 1688 goto found_ire; 1689 } 1690 rw_exit(&irb_ptr->irb_lock); 1691 } else { 1692 /* 1693 * In this case we don't know the mask, we need to 1694 * search the table assuming different mask sizes. 1695 * we start with 128 bit mask, we don't allow default here. 1696 */ 1697 for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) { 1698 in6_addr_t tmpmask; 1699 1700 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 1701 continue; 1702 (void) ip_plen_to_mask_v6(i, &tmpmask); 1703 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][ 1704 IRE_ADDR_MASK_HASH_V6(*addr, tmpmask, 1705 ipst->ips_ip6_ftable_hash_size)]; 1706 rw_enter(&irb_ptr->irb_lock, RW_READER); 1707 for (ire = irb_ptr->irb_ire; ire != NULL; 1708 ire = ire->ire_next) { 1709 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1710 continue; 1711 if (ire_match_args_v6(ire, addr, 1712 &ire->ire_mask_v6, gateway, type, ipif, 1713 zoneid, ihandle, tsl, flags)) 1714 goto found_ire; 1715 } 1716 rw_exit(&irb_ptr->irb_lock); 1717 } 1718 } 1719 1720 /* 1721 * We come here if no route has yet been found. 1722 * 1723 * Handle the case where default route is 1724 * requested by specifying type as one of the possible 1725 * types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE). 1726 * 1727 * If MATCH_IRE_MASK is specified, then the appropriate default route 1728 * would have been found above if it exists so it isn't looked up here. 1729 * If MATCH_IRE_DEFAULT was also specified, then a default route will be 1730 * searched for later. 1731 */ 1732 if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE && 1733 (type & (IRE_DEFAULT | IRE_INTERFACE))) { 1734 if (ipst->ips_ip_forwarding_table_v6[0] != NULL) { 1735 /* addr & mask is zero for defaults */ 1736 irb_ptr = &ipst->ips_ip_forwarding_table_v6[0][ 1737 IRE_ADDR_HASH_V6(ipv6_all_zeros, 1738 ipst->ips_ip6_ftable_hash_size)]; 1739 rw_enter(&irb_ptr->irb_lock, RW_READER); 1740 for (ire = irb_ptr->irb_ire; ire != NULL; 1741 ire = ire->ire_next) { 1742 1743 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1744 continue; 1745 1746 if (ire_match_args_v6(ire, addr, 1747 &ipv6_all_zeros, gateway, type, ipif, 1748 zoneid, ihandle, tsl, flags)) 1749 goto found_ire; 1750 } 1751 rw_exit(&irb_ptr->irb_lock); 1752 } 1753 } 1754 /* 1755 * We come here only if no route is found. 1756 * see if the default route can be used which is allowed 1757 * only if the default matching criteria is specified. 1758 * The ipv6_ire_default_count tracks the number of IRE_DEFAULT 1759 * entries. However, the ip_forwarding_table_v6[0] also contains 1760 * interface routes thus the count can be zero. 1761 */ 1762 saved_ire = NULL; 1763 if ((flags & (MATCH_IRE_DEFAULT | MATCH_IRE_MASK)) == 1764 MATCH_IRE_DEFAULT) { 1765 ire_t *ire_origin; 1766 uint_t g_index; 1767 uint_t index; 1768 1769 if (ipst->ips_ip_forwarding_table_v6[0] == NULL) 1770 return (NULL); 1771 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[0])[0]; 1772 1773 /* 1774 * Keep a tab on the bucket while looking the IRE_DEFAULT 1775 * entries. We need to keep track of a particular IRE 1776 * (ire_origin) so this ensures that it will not be unlinked 1777 * from the hash list during the recursive lookup below. 1778 */ 1779 IRB_REFHOLD(irb_ptr); 1780 ire = irb_ptr->irb_ire; 1781 if (ire == NULL) { 1782 IRB_REFRELE(irb_ptr); 1783 return (NULL); 1784 } 1785 1786 /* 1787 * Get the index first, since it can be changed by other 1788 * threads. Then get to the right default route skipping 1789 * default interface routes if any. As we hold a reference on 1790 * the IRE bucket, ipv6_ire_default_count can only increase so 1791 * we can't reach the end of the hash list unexpectedly. 1792 */ 1793 if (ipst->ips_ipv6_ire_default_count != 0) { 1794 g_index = ipst->ips_ipv6_ire_default_index++; 1795 index = g_index % ipst->ips_ipv6_ire_default_count; 1796 while (index != 0) { 1797 if (!(ire->ire_type & IRE_INTERFACE)) 1798 index--; 1799 ire = ire->ire_next; 1800 } 1801 ASSERT(ire != NULL); 1802 } else { 1803 /* 1804 * No default route, so we only have default interface 1805 * routes: don't enter the first loop. 1806 */ 1807 ire = NULL; 1808 } 1809 1810 /* 1811 * Round-robin the default routers list looking for a neighbor 1812 * that matches the passed in parameters and is reachable. If 1813 * none found, just return a route from the default router list 1814 * if it exists. If we can't find a default route (IRE_DEFAULT), 1815 * look for interface default routes. 1816 * We start with the ire we found above and we walk the hash 1817 * list until we're back where we started, see 1818 * ire_get_next_default_ire(). It doesn't matter if default 1819 * routes are added or deleted by other threads - we know this 1820 * ire will stay in the list because we hold a reference on the 1821 * ire bucket. 1822 * NB: if we only have interface default routes, ire is NULL so 1823 * we don't even enter this loop (see above). 1824 */ 1825 ire_origin = ire; 1826 for (; ire != NULL; 1827 ire = ire_get_next_default_ire(ire, ire_origin)) { 1828 1829 if (ire_match_args_v6(ire, addr, 1830 &ipv6_all_zeros, gateway, type, ipif, 1831 zoneid, ihandle, tsl, flags)) { 1832 int match_flags; 1833 1834 /* 1835 * We have something to work with. 1836 * If we can find a resolved/reachable 1837 * entry, we will use this. Otherwise 1838 * we'll try to find an entry that has 1839 * a resolved cache entry. We will fallback 1840 * on this if we don't find anything else. 1841 */ 1842 if (saved_ire == NULL) 1843 saved_ire = ire; 1844 mutex_enter(&ire->ire_lock); 1845 gw_addr_v6 = ire->ire_gateway_addr_v6; 1846 mutex_exit(&ire->ire_lock); 1847 match_flags = MATCH_IRE_ILL_GROUP | 1848 MATCH_IRE_SECATTR; 1849 rire = ire_ctable_lookup_v6(&gw_addr_v6, NULL, 1850 0, ire->ire_ipif, zoneid, tsl, match_flags, 1851 ipst); 1852 if (rire != NULL) { 1853 nce = rire->ire_nce; 1854 if (nce != NULL && 1855 NCE_ISREACHABLE(nce) && 1856 nce->nce_flags & NCE_F_ISROUTER) { 1857 ire_refrele(rire); 1858 IRE_REFHOLD(ire); 1859 IRB_REFRELE(irb_ptr); 1860 goto found_ire_held; 1861 } else if (nce != NULL && 1862 !(nce->nce_flags & 1863 NCE_F_ISROUTER)) { 1864 /* 1865 * Make sure we don't use 1866 * this ire 1867 */ 1868 if (saved_ire == ire) 1869 saved_ire = NULL; 1870 } 1871 ire_refrele(rire); 1872 } else if (ipst-> 1873 ips_ipv6_ire_default_count > 1 && 1874 zoneid != GLOBAL_ZONEID) { 1875 /* 1876 * When we're in a local zone, we're 1877 * only interested in default routers 1878 * that are reachable through ipifs 1879 * within our zone. 1880 * The potentially expensive call to 1881 * ire_route_lookup_v6() is avoided when 1882 * we have only one default route. 1883 */ 1884 int ire_match_flags = MATCH_IRE_TYPE | 1885 MATCH_IRE_SECATTR; 1886 1887 if (ire->ire_ipif != NULL) { 1888 ire_match_flags |= 1889 MATCH_IRE_ILL_GROUP; 1890 } 1891 rire = ire_route_lookup_v6(&gw_addr_v6, 1892 NULL, NULL, IRE_INTERFACE, 1893 ire->ire_ipif, NULL, 1894 zoneid, tsl, ire_match_flags, ipst); 1895 if (rire != NULL) { 1896 ire_refrele(rire); 1897 saved_ire = ire; 1898 } else if (saved_ire == ire) { 1899 /* 1900 * Make sure we don't use 1901 * this ire 1902 */ 1903 saved_ire = NULL; 1904 } 1905 } 1906 } 1907 } 1908 if (saved_ire != NULL) { 1909 ire = saved_ire; 1910 IRE_REFHOLD(ire); 1911 IRB_REFRELE(irb_ptr); 1912 goto found_ire_held; 1913 } else { 1914 /* 1915 * Look for a interface default route matching the 1916 * args passed in. No round robin here. Just pick 1917 * the right one. 1918 */ 1919 for (ire = irb_ptr->irb_ire; ire != NULL; 1920 ire = ire->ire_next) { 1921 1922 if (!(ire->ire_type & IRE_INTERFACE)) 1923 continue; 1924 1925 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1926 continue; 1927 1928 if (ire_match_args_v6(ire, addr, 1929 &ipv6_all_zeros, gateway, type, ipif, 1930 zoneid, ihandle, tsl, flags)) { 1931 IRE_REFHOLD(ire); 1932 IRB_REFRELE(irb_ptr); 1933 goto found_ire_held; 1934 } 1935 } 1936 IRB_REFRELE(irb_ptr); 1937 } 1938 } 1939 ASSERT(ire == NULL); 1940 ip1dbg(("ire_ftable_lookup_v6: returning NULL ire")); 1941 return (NULL); 1942 found_ire: 1943 ASSERT((ire->ire_marks & IRE_MARK_CONDEMNED) == 0); 1944 IRE_REFHOLD(ire); 1945 rw_exit(&irb_ptr->irb_lock); 1946 1947 found_ire_held: 1948 if ((flags & MATCH_IRE_RJ_BHOLE) && 1949 (ire->ire_flags & (RTF_BLACKHOLE | RTF_REJECT))) { 1950 return (ire); 1951 } 1952 /* 1953 * At this point, IRE that was found must be an IRE_FORWARDTABLE 1954 * or IRE_CACHETABLE type. If this is a recursive lookup and an 1955 * IRE_INTERFACE type was found, return that. If it was some other 1956 * IRE_FORWARDTABLE type of IRE (one of the prefix types), then it 1957 * is necessary to fill in the parent IRE pointed to by pire, and 1958 * then lookup the gateway address of the parent. For backwards 1959 * compatiblity, if this lookup returns an 1960 * IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level 1961 * of lookup is done. 1962 */ 1963 if (flags & MATCH_IRE_RECURSIVE) { 1964 const ipif_t *gw_ipif; 1965 int match_flags = MATCH_IRE_DSTONLY; 1966 1967 if (ire->ire_type & IRE_INTERFACE) 1968 return (ire); 1969 if (pire != NULL) 1970 *pire = ire; 1971 /* 1972 * If we can't find an IRE_INTERFACE or the caller has not 1973 * asked for pire, we need to REFRELE the saved_ire. 1974 */ 1975 saved_ire = ire; 1976 1977 /* 1978 * Currently MATCH_IRE_ILL is never used with 1979 * (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT) while 1980 * sending out packets as MATCH_IRE_ILL is used only 1981 * for communicating with on-link hosts. We can't assert 1982 * that here as RTM_GET calls this function with 1983 * MATCH_IRE_ILL | MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE. 1984 * We have already used the MATCH_IRE_ILL in determining 1985 * the right prefix route at this point. To match the 1986 * behavior of how we locate routes while sending out 1987 * packets, we don't want to use MATCH_IRE_ILL below 1988 * while locating the interface route. 1989 */ 1990 if (ire->ire_ipif != NULL) 1991 match_flags |= MATCH_IRE_ILL_GROUP; 1992 1993 mutex_enter(&ire->ire_lock); 1994 gw_addr_v6 = ire->ire_gateway_addr_v6; 1995 mutex_exit(&ire->ire_lock); 1996 1997 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 0, 1998 ire->ire_ipif, NULL, zoneid, tsl, match_flags, ipst); 1999 if (ire == NULL) { 2000 /* 2001 * In this case we have to deal with the 2002 * MATCH_IRE_PARENT flag, which means the 2003 * parent has to be returned if ire is NULL. 2004 * The aim of this is to have (at least) a starting 2005 * ire when we want to look at all of the ires in a 2006 * bucket aimed at a single destination (as is the 2007 * case in ip_newroute_v6 for the RTF_MULTIRT 2008 * flagged routes). 2009 */ 2010 if (flags & MATCH_IRE_PARENT) { 2011 if (pire != NULL) { 2012 /* 2013 * Need an extra REFHOLD, if the 2014 * parent ire is returned via both 2015 * ire and pire. 2016 */ 2017 IRE_REFHOLD(saved_ire); 2018 } 2019 ire = saved_ire; 2020 } else { 2021 ire_refrele(saved_ire); 2022 if (pire != NULL) 2023 *pire = NULL; 2024 } 2025 return (ire); 2026 } 2027 if (ire->ire_type & (IRE_CACHETABLE | IRE_INTERFACE)) { 2028 /* 2029 * If the caller did not ask for pire, release 2030 * it now. 2031 */ 2032 if (pire == NULL) { 2033 ire_refrele(saved_ire); 2034 } 2035 return (ire); 2036 } 2037 match_flags |= MATCH_IRE_TYPE; 2038 mutex_enter(&ire->ire_lock); 2039 gw_addr_v6 = ire->ire_gateway_addr_v6; 2040 mutex_exit(&ire->ire_lock); 2041 gw_ipif = ire->ire_ipif; 2042 ire_refrele(ire); 2043 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 2044 (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid, 2045 NULL, match_flags, ipst); 2046 if (ire == NULL) { 2047 /* 2048 * In this case we have to deal with the 2049 * MATCH_IRE_PARENT flag, which means the 2050 * parent has to be returned if ire is NULL. 2051 * The aim of this is to have (at least) a starting 2052 * ire when we want to look at all of the ires in a 2053 * bucket aimed at a single destination (as is the 2054 * case in ip_newroute_v6 for the RTF_MULTIRT 2055 * flagged routes). 2056 */ 2057 if (flags & MATCH_IRE_PARENT) { 2058 if (pire != NULL) { 2059 /* 2060 * Need an extra REFHOLD, if the 2061 * parent ire is returned via both 2062 * ire and pire. 2063 */ 2064 IRE_REFHOLD(saved_ire); 2065 } 2066 ire = saved_ire; 2067 } else { 2068 ire_refrele(saved_ire); 2069 if (pire != NULL) 2070 *pire = NULL; 2071 } 2072 return (ire); 2073 } else if (pire == NULL) { 2074 /* 2075 * If the caller did not ask for pire, release 2076 * it now. 2077 */ 2078 ire_refrele(saved_ire); 2079 } 2080 return (ire); 2081 } 2082 2083 ASSERT(pire == NULL || *pire == NULL); 2084 return (ire); 2085 } 2086 2087 /* 2088 * Delete the IRE cache for the gateway and all IRE caches whose 2089 * ire_gateway_addr_v6 points to this gateway, and allow them to 2090 * be created on demand by ip_newroute_v6. 2091 */ 2092 void 2093 ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid, 2094 ip_stack_t *ipst) 2095 { 2096 irb_t *irb; 2097 ire_t *ire; 2098 2099 irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 2100 ipst->ips_ip6_cache_table_size)]; 2101 IRB_REFHOLD(irb); 2102 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 2103 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2104 continue; 2105 2106 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 2107 if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, 0, 2108 IRE_CACHE, NULL, zoneid, 0, NULL, MATCH_IRE_TYPE)) { 2109 ire_delete(ire); 2110 } 2111 } 2112 IRB_REFRELE(irb); 2113 2114 ire_walk_v6(ire_delete_cache_gw_v6, (char *)addr, zoneid, ipst); 2115 } 2116 2117 /* 2118 * Looks up cache table for a route. 2119 * specific lookup can be indicated by 2120 * passing the MATCH_* flags and the 2121 * necessary parameters. 2122 */ 2123 ire_t * 2124 ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway, 2125 int type, const ipif_t *ipif, zoneid_t zoneid, const ts_label_t *tsl, 2126 int flags, ip_stack_t *ipst) 2127 { 2128 ire_t *ire; 2129 irb_t *irb_ptr; 2130 ASSERT(addr != NULL); 2131 ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 2132 2133 /* 2134 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 2135 * MATCH_IRE_ILL is set. 2136 */ 2137 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 2138 (ipif == NULL)) 2139 return (NULL); 2140 2141 irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 2142 ipst->ips_ip6_cache_table_size)]; 2143 rw_enter(&irb_ptr->irb_lock, RW_READER); 2144 for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 2145 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2146 continue; 2147 2148 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 2149 if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, gateway, 2150 type, ipif, zoneid, 0, tsl, flags)) { 2151 IRE_REFHOLD(ire); 2152 rw_exit(&irb_ptr->irb_lock); 2153 return (ire); 2154 } 2155 } 2156 rw_exit(&irb_ptr->irb_lock); 2157 return (NULL); 2158 } 2159 2160 /* 2161 * Lookup cache. Don't return IRE_MARK_HIDDEN entries. Callers 2162 * should use ire_ctable_lookup with MATCH_IRE_MARK_HIDDEN to get 2163 * to the hidden ones. 2164 * 2165 * In general the zoneid has to match (where ALL_ZONES match all of them). 2166 * But for IRE_LOCAL we also need to handle the case where L2 should 2167 * conceptually loop back the packet. This is necessary since neither 2168 * Ethernet drivers nor Ethernet hardware loops back packets sent to their 2169 * own MAC address. This loopback is needed when the normal 2170 * routes (ignoring IREs with different zoneids) would send out the packet on 2171 * the same ill (or ill group) as the ill with which this IRE_LOCAL is 2172 * associated. 2173 * 2174 * Earlier versions of this code always matched an IRE_LOCAL independently of 2175 * the zoneid. We preserve that earlier behavior when 2176 * ip_restrict_interzone_loopback is turned off. 2177 */ 2178 ire_t * 2179 ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid, 2180 const ts_label_t *tsl, ip_stack_t *ipst) 2181 { 2182 irb_t *irb_ptr; 2183 ire_t *ire; 2184 2185 irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 2186 ipst->ips_ip6_cache_table_size)]; 2187 rw_enter(&irb_ptr->irb_lock, RW_READER); 2188 for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 2189 if (ire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 2190 continue; 2191 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, addr)) { 2192 /* 2193 * Finally, check if the security policy has any 2194 * restriction on using this route for the specified 2195 * message. 2196 */ 2197 if (tsl != NULL && 2198 ire->ire_gw_secattr != NULL && 2199 tsol_ire_match_gwattr(ire, tsl) != 0) { 2200 continue; 2201 } 2202 2203 if (zoneid == ALL_ZONES || ire->ire_zoneid == zoneid || 2204 ire->ire_zoneid == ALL_ZONES) { 2205 IRE_REFHOLD(ire); 2206 rw_exit(&irb_ptr->irb_lock); 2207 return (ire); 2208 } 2209 2210 if (ire->ire_type == IRE_LOCAL) { 2211 if (ipst->ips_ip_restrict_interzone_loopback && 2212 !ire_local_ok_across_zones(ire, zoneid, 2213 (void *)addr, tsl, ipst)) 2214 continue; 2215 2216 IRE_REFHOLD(ire); 2217 rw_exit(&irb_ptr->irb_lock); 2218 return (ire); 2219 } 2220 } 2221 } 2222 rw_exit(&irb_ptr->irb_lock); 2223 return (NULL); 2224 } 2225 2226 /* 2227 * Locate the interface ire that is tied to the cache ire 'cire' via 2228 * cire->ire_ihandle. 2229 * 2230 * We are trying to create the cache ire for an onlink destn. or 2231 * gateway in 'cire'. We are called from ire_add_v6() in the IRE_IF_RESOLVER 2232 * case for xresolv interfaces, after the ire has come back from 2233 * an external resolver. 2234 */ 2235 static ire_t * 2236 ire_ihandle_lookup_onlink_v6(ire_t *cire) 2237 { 2238 ire_t *ire; 2239 int match_flags; 2240 int i; 2241 int j; 2242 irb_t *irb_ptr; 2243 ip_stack_t *ipst = cire->ire_ipst; 2244 2245 ASSERT(cire != NULL); 2246 2247 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2248 /* 2249 * We know that the mask of the interface ire equals cire->ire_cmask. 2250 * (When ip_newroute_v6() created 'cire' for an on-link destn. 2251 * it set its cmask from the interface ire's mask) 2252 */ 2253 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 2254 NULL, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle, 2255 NULL, match_flags, ipst); 2256 if (ire != NULL) 2257 return (ire); 2258 /* 2259 * If we didn't find an interface ire above, we can't declare failure. 2260 * For backwards compatibility, we need to support prefix routes 2261 * pointing to next hop gateways that are not on-link. 2262 * 2263 * In the resolver/noresolver case, ip_newroute_v6() thinks 2264 * it is creating the cache ire for an onlink destination in 'cire'. 2265 * But 'cire' is not actually onlink, because ire_ftable_lookup_v6() 2266 * cheated it, by doing ire_route_lookup_v6() twice and returning an 2267 * interface ire. 2268 * 2269 * Eg. default - gw1 (line 1) 2270 * gw1 - gw2 (line 2) 2271 * gw2 - hme0 (line 3) 2272 * 2273 * In the above example, ip_newroute_v6() tried to create the cache ire 2274 * 'cire' for gw1, based on the interface route in line 3. The 2275 * ire_ftable_lookup_v6() above fails, because there is 2276 * no interface route to reach gw1. (it is gw2). We fall thru below. 2277 * 2278 * Do a brute force search based on the ihandle in a subset of the 2279 * forwarding tables, corresponding to cire->ire_cmask_v6. Otherwise 2280 * things become very complex, since we don't have 'pire' in this 2281 * case. (Also note that this method is not possible in the offlink 2282 * case because we don't know the mask) 2283 */ 2284 i = ip_mask_to_plen_v6(&cire->ire_cmask_v6); 2285 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 2286 return (NULL); 2287 for (j = 0; j < ipst->ips_ip6_ftable_hash_size; j++) { 2288 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][j]; 2289 rw_enter(&irb_ptr->irb_lock, RW_READER); 2290 for (ire = irb_ptr->irb_ire; ire != NULL; 2291 ire = ire->ire_next) { 2292 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2293 continue; 2294 if ((ire->ire_type & IRE_INTERFACE) && 2295 (ire->ire_ihandle == cire->ire_ihandle)) { 2296 IRE_REFHOLD(ire); 2297 rw_exit(&irb_ptr->irb_lock); 2298 return (ire); 2299 } 2300 } 2301 rw_exit(&irb_ptr->irb_lock); 2302 } 2303 return (NULL); 2304 } 2305 2306 2307 /* 2308 * Locate the interface ire that is tied to the cache ire 'cire' via 2309 * cire->ire_ihandle. 2310 * 2311 * We are trying to create the cache ire for an offlink destn based 2312 * on the cache ire of the gateway in 'cire'. 'pire' is the prefix ire 2313 * as found by ip_newroute_v6(). We are called from ip_newroute_v6() in 2314 * the IRE_CACHE case. 2315 */ 2316 ire_t * 2317 ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire) 2318 { 2319 ire_t *ire; 2320 int match_flags; 2321 in6_addr_t gw_addr; 2322 ipif_t *gw_ipif; 2323 ip_stack_t *ipst = cire->ire_ipst; 2324 2325 ASSERT(cire != NULL && pire != NULL); 2326 2327 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2328 /* 2329 * ip_newroute_v6 calls ire_ftable_lookup with MATCH_IRE_ILL only 2330 * for on-link hosts. We should never be here for onlink. 2331 * Thus, use MATCH_IRE_ILL_GROUP. 2332 */ 2333 if (pire->ire_ipif != NULL) 2334 match_flags |= MATCH_IRE_ILL_GROUP; 2335 /* 2336 * We know that the mask of the interface ire equals cire->ire_cmask. 2337 * (When ip_newroute_v6() created 'cire' for an on-link destn. it set 2338 * its cmask from the interface ire's mask) 2339 */ 2340 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 0, 2341 IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2342 NULL, match_flags, ipst); 2343 if (ire != NULL) 2344 return (ire); 2345 /* 2346 * If we didn't find an interface ire above, we can't declare failure. 2347 * For backwards compatibility, we need to support prefix routes 2348 * pointing to next hop gateways that are not on-link. 2349 * 2350 * Assume we are trying to ping some offlink destn, and we have the 2351 * routing table below. 2352 * 2353 * Eg. default - gw1 <--- pire (line 1) 2354 * gw1 - gw2 (line 2) 2355 * gw2 - hme0 (line 3) 2356 * 2357 * If we already have a cache ire for gw1 in 'cire', the 2358 * ire_ftable_lookup_v6 above would have failed, since there is no 2359 * interface ire to reach gw1. We will fallthru below. 2360 * 2361 * Here we duplicate the steps that ire_ftable_lookup_v6() did in 2362 * getting 'cire' from 'pire', in the MATCH_IRE_RECURSIVE case. 2363 * The differences are the following 2364 * i. We want the interface ire only, so we call 2365 * ire_ftable_lookup_v6() instead of ire_route_lookup_v6() 2366 * ii. We look for only prefix routes in the 1st call below. 2367 * ii. We want to match on the ihandle in the 2nd call below. 2368 */ 2369 match_flags = MATCH_IRE_TYPE; 2370 if (pire->ire_ipif != NULL) 2371 match_flags |= MATCH_IRE_ILL_GROUP; 2372 2373 mutex_enter(&pire->ire_lock); 2374 gw_addr = pire->ire_gateway_addr_v6; 2375 mutex_exit(&pire->ire_lock); 2376 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_OFFSUBNET, 2377 pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst); 2378 if (ire == NULL) 2379 return (NULL); 2380 /* 2381 * At this point 'ire' corresponds to the entry shown in line 2. 2382 * gw_addr is 'gw2' in the example above. 2383 */ 2384 mutex_enter(&ire->ire_lock); 2385 gw_addr = ire->ire_gateway_addr_v6; 2386 mutex_exit(&ire->ire_lock); 2387 gw_ipif = ire->ire_ipif; 2388 ire_refrele(ire); 2389 2390 match_flags |= MATCH_IRE_IHANDLE; 2391 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_INTERFACE, 2392 gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2393 NULL, match_flags, ipst); 2394 return (ire); 2395 } 2396 2397 /* 2398 * Return the IRE_LOOPBACK, IRE_IF_RESOLVER or IRE_IF_NORESOLVER 2399 * ire associated with the specified ipif. 2400 * 2401 * This might occasionally be called when IPIF_UP is not set since 2402 * the IPV6_MULTICAST_IF as well as creating interface routes 2403 * allows specifying a down ipif (ipif_lookup* match ipifs that are down). 2404 * 2405 * Note that if IPIF_NOLOCAL, IPIF_NOXMIT, or IPIF_DEPRECATED is set on 2406 * the ipif this routine might return NULL. 2407 * (Sometimes called as writer though not required by this function.) 2408 */ 2409 ire_t * 2410 ipif_to_ire_v6(const ipif_t *ipif) 2411 { 2412 ire_t *ire; 2413 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2414 2415 ASSERT(ipif->ipif_isv6); 2416 if (ipif->ipif_ire_type == IRE_LOOPBACK) { 2417 ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, NULL, 2418 IRE_LOOPBACK, ipif, ALL_ZONES, NULL, 2419 (MATCH_IRE_TYPE | MATCH_IRE_IPIF), ipst); 2420 } else if (ipif->ipif_flags & IPIF_POINTOPOINT) { 2421 /* In this case we need to lookup destination address. */ 2422 ire = ire_ftable_lookup_v6(&ipif->ipif_v6pp_dst_addr, 2423 &ipv6_all_ones, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 2424 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 2425 MATCH_IRE_MASK), ipst); 2426 } else { 2427 ire = ire_ftable_lookup_v6(&ipif->ipif_v6subnet, 2428 &ipif->ipif_v6net_mask, NULL, IRE_INTERFACE, ipif, NULL, 2429 ALL_ZONES, 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 2430 MATCH_IRE_MASK), ipst); 2431 } 2432 return (ire); 2433 } 2434 2435 /* 2436 * Return B_TRUE if a multirt route is resolvable 2437 * (or if no route is resolved yet), B_FALSE otherwise. 2438 * This only works in the global zone. 2439 */ 2440 boolean_t 2441 ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl, 2442 ip_stack_t *ipst) 2443 { 2444 ire_t *first_fire; 2445 ire_t *first_cire; 2446 ire_t *fire; 2447 ire_t *cire; 2448 irb_t *firb; 2449 irb_t *cirb; 2450 int unres_cnt = 0; 2451 boolean_t resolvable = B_FALSE; 2452 2453 /* Retrieve the first IRE_HOST that matches the destination */ 2454 first_fire = ire_ftable_lookup_v6(v6dstp, &ipv6_all_ones, 0, IRE_HOST, 2455 NULL, NULL, ALL_ZONES, 0, tsl, MATCH_IRE_MASK | MATCH_IRE_TYPE | 2456 MATCH_IRE_SECATTR, ipst); 2457 2458 /* No route at all */ 2459 if (first_fire == NULL) { 2460 return (B_TRUE); 2461 } 2462 2463 firb = first_fire->ire_bucket; 2464 ASSERT(firb); 2465 2466 /* Retrieve the first IRE_CACHE ire for that destination. */ 2467 first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID, tsl, ipst); 2468 2469 /* No resolved route. */ 2470 if (first_cire == NULL) { 2471 ire_refrele(first_fire); 2472 return (B_TRUE); 2473 } 2474 2475 /* At least one route is resolved. */ 2476 2477 cirb = first_cire->ire_bucket; 2478 ASSERT(cirb); 2479 2480 /* Count the number of routes to that dest that are declared. */ 2481 IRB_REFHOLD(firb); 2482 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2483 if (!(fire->ire_flags & RTF_MULTIRT)) 2484 continue; 2485 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, v6dstp)) 2486 continue; 2487 unres_cnt++; 2488 } 2489 IRB_REFRELE(firb); 2490 2491 2492 /* Then subtract the number of routes to that dst that are resolved */ 2493 IRB_REFHOLD(cirb); 2494 for (cire = first_cire; cire != NULL; cire = cire->ire_next) { 2495 if (!(cire->ire_flags & RTF_MULTIRT)) 2496 continue; 2497 if (!IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, v6dstp)) 2498 continue; 2499 if (cire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 2500 continue; 2501 unres_cnt--; 2502 } 2503 IRB_REFRELE(cirb); 2504 2505 /* At least one route is unresolved; search for a resolvable route. */ 2506 if (unres_cnt > 0) 2507 resolvable = ire_multirt_lookup_v6(&first_cire, &first_fire, 2508 MULTIRT_USESTAMP|MULTIRT_CACHEGW, tsl, ipst); 2509 2510 if (first_fire) 2511 ire_refrele(first_fire); 2512 2513 if (first_cire) 2514 ire_refrele(first_cire); 2515 2516 return (resolvable); 2517 } 2518 2519 2520 /* 2521 * Return B_TRUE and update *ire_arg and *fire_arg 2522 * if at least one resolvable route is found. 2523 * Return B_FALSE otherwise (all routes are resolved or 2524 * the remaining unresolved routes are all unresolvable). 2525 * This only works in the global zone. 2526 */ 2527 boolean_t 2528 ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, 2529 const ts_label_t *tsl, ip_stack_t *ipst) 2530 { 2531 clock_t delta; 2532 ire_t *best_fire = NULL; 2533 ire_t *best_cire = NULL; 2534 ire_t *first_fire; 2535 ire_t *first_cire; 2536 ire_t *fire; 2537 ire_t *cire; 2538 irb_t *firb = NULL; 2539 irb_t *cirb = NULL; 2540 ire_t *gw_ire; 2541 boolean_t already_resolved; 2542 boolean_t res; 2543 in6_addr_t v6dst; 2544 in6_addr_t v6gw; 2545 2546 ip2dbg(("ire_multirt_lookup_v6: *ire_arg %p, *fire_arg %p, " 2547 "flags %04x\n", (void *)*ire_arg, (void *)*fire_arg, flags)); 2548 2549 ASSERT(ire_arg); 2550 ASSERT(fire_arg); 2551 2552 /* Not an IRE_HOST ire; give up. */ 2553 if ((*fire_arg == NULL) || 2554 ((*fire_arg)->ire_type != IRE_HOST)) { 2555 return (B_FALSE); 2556 } 2557 2558 /* This is the first IRE_HOST ire for that destination. */ 2559 first_fire = *fire_arg; 2560 firb = first_fire->ire_bucket; 2561 ASSERT(firb); 2562 2563 mutex_enter(&first_fire->ire_lock); 2564 v6dst = first_fire->ire_addr_v6; 2565 mutex_exit(&first_fire->ire_lock); 2566 2567 ip2dbg(("ire_multirt_lookup_v6: dst %08x\n", 2568 ntohl(V4_PART_OF_V6(v6dst)))); 2569 2570 /* 2571 * Retrieve the first IRE_CACHE ire for that destination; 2572 * if we don't find one, no route for that dest is 2573 * resolved yet. 2574 */ 2575 first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID, tsl, ipst); 2576 if (first_cire) { 2577 cirb = first_cire->ire_bucket; 2578 } 2579 2580 ip2dbg(("ire_multirt_lookup_v6: first_cire %p\n", (void *)first_cire)); 2581 2582 /* 2583 * Search for a resolvable route, giving the top priority 2584 * to routes that can be resolved without any call to the resolver. 2585 */ 2586 IRB_REFHOLD(firb); 2587 2588 if (!IN6_IS_ADDR_MULTICAST(&v6dst)) { 2589 /* 2590 * For all multiroute IRE_HOST ires for that destination, 2591 * check if the route via the IRE_HOST's gateway is 2592 * resolved yet. 2593 */ 2594 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2595 2596 if (!(fire->ire_flags & RTF_MULTIRT)) 2597 continue; 2598 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2599 continue; 2600 2601 if (fire->ire_gw_secattr != NULL && 2602 tsol_ire_match_gwattr(fire, tsl) != 0) { 2603 continue; 2604 } 2605 2606 mutex_enter(&fire->ire_lock); 2607 v6gw = fire->ire_gateway_addr_v6; 2608 mutex_exit(&fire->ire_lock); 2609 2610 ip2dbg(("ire_multirt_lookup_v6: fire %p, " 2611 "ire_addr %08x, ire_gateway_addr %08x\n", 2612 (void *)fire, 2613 ntohl(V4_PART_OF_V6(fire->ire_addr_v6)), 2614 ntohl(V4_PART_OF_V6(v6gw)))); 2615 2616 already_resolved = B_FALSE; 2617 2618 if (first_cire) { 2619 ASSERT(cirb); 2620 2621 IRB_REFHOLD(cirb); 2622 /* 2623 * For all IRE_CACHE ires for that 2624 * destination. 2625 */ 2626 for (cire = first_cire; 2627 cire != NULL; 2628 cire = cire->ire_next) { 2629 2630 if (!(cire->ire_flags & RTF_MULTIRT)) 2631 continue; 2632 if (!IN6_ARE_ADDR_EQUAL( 2633 &cire->ire_addr_v6, &v6dst)) 2634 continue; 2635 if (cire->ire_marks & 2636 (IRE_MARK_CONDEMNED| 2637 IRE_MARK_HIDDEN)) 2638 continue; 2639 2640 if (cire->ire_gw_secattr != NULL && 2641 tsol_ire_match_gwattr(cire, 2642 tsl) != 0) { 2643 continue; 2644 } 2645 2646 /* 2647 * Check if the IRE_CACHE's gateway 2648 * matches the IRE_HOST's gateway. 2649 */ 2650 if (IN6_ARE_ADDR_EQUAL( 2651 &cire->ire_gateway_addr_v6, 2652 &v6gw)) { 2653 already_resolved = B_TRUE; 2654 break; 2655 } 2656 } 2657 IRB_REFRELE(cirb); 2658 } 2659 2660 /* 2661 * This route is already resolved; 2662 * proceed with next one. 2663 */ 2664 if (already_resolved) { 2665 ip2dbg(("ire_multirt_lookup_v6: found cire %p, " 2666 "already resolved\n", (void *)cire)); 2667 continue; 2668 } 2669 2670 /* 2671 * The route is unresolved; is it actually 2672 * resolvable, i.e. is there a cache or a resolver 2673 * for the gateway? 2674 */ 2675 gw_ire = ire_route_lookup_v6(&v6gw, 0, 0, 0, NULL, NULL, 2676 ALL_ZONES, tsl, MATCH_IRE_RECURSIVE | 2677 MATCH_IRE_SECATTR, ipst); 2678 2679 ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n", 2680 (void *)gw_ire)); 2681 2682 /* 2683 * This route can be resolved without any call to the 2684 * resolver; if the MULTIRT_CACHEGW flag is set, 2685 * give the top priority to this ire and exit the 2686 * loop. 2687 * This occurs when an resolver reply is processed 2688 * through ip_wput_nondata() 2689 */ 2690 if ((flags & MULTIRT_CACHEGW) && 2691 (gw_ire != NULL) && 2692 (gw_ire->ire_type & IRE_CACHETABLE)) { 2693 /* 2694 * Release the resolver associated to the 2695 * previous candidate best ire, if any. 2696 */ 2697 if (best_cire) { 2698 ire_refrele(best_cire); 2699 ASSERT(best_fire); 2700 } 2701 2702 best_fire = fire; 2703 best_cire = gw_ire; 2704 2705 ip2dbg(("ire_multirt_lookup_v6: found top prio " 2706 "best_fire %p, best_cire %p\n", 2707 (void *)best_fire, (void *)best_cire)); 2708 break; 2709 } 2710 2711 /* 2712 * Compute the time elapsed since our preceding 2713 * attempt to resolve that route. 2714 * If the MULTIRT_USESTAMP flag is set, we take that 2715 * route into account only if this time interval 2716 * exceeds ip_multirt_resolution_interval; 2717 * this prevents us from attempting to resolve a 2718 * broken route upon each sending of a packet. 2719 */ 2720 delta = lbolt - fire->ire_last_used_time; 2721 delta = TICK_TO_MSEC(delta); 2722 2723 res = (boolean_t) 2724 ((delta > ipst-> 2725 ips_ip_multirt_resolution_interval) || 2726 (!(flags & MULTIRT_USESTAMP))); 2727 2728 ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, " 2729 "res %d\n", 2730 (void *)fire, delta, res)); 2731 2732 if (res) { 2733 /* 2734 * A resolver exists for the gateway: save 2735 * the current IRE_HOST ire as a candidate 2736 * best ire. If we later discover that a 2737 * top priority ire exists (i.e. no need to 2738 * call the resolver), then this new ire 2739 * will be preferred to the current one. 2740 */ 2741 if (gw_ire != NULL) { 2742 if (best_fire == NULL) { 2743 ASSERT(best_cire == NULL); 2744 2745 best_fire = fire; 2746 best_cire = gw_ire; 2747 2748 ip2dbg(("ire_multirt_lookup_v6:" 2749 "found candidate " 2750 "best_fire %p, " 2751 "best_cire %p\n", 2752 (void *)best_fire, 2753 (void *)best_cire)); 2754 2755 /* 2756 * If MULTIRT_CACHEGW is not 2757 * set, we ignore the top 2758 * priority ires that can 2759 * be resolved without any 2760 * call to the resolver; 2761 * In that case, there is 2762 * actually no need 2763 * to continue the loop. 2764 */ 2765 if (!(flags & 2766 MULTIRT_CACHEGW)) { 2767 break; 2768 } 2769 continue; 2770 } 2771 } else { 2772 /* 2773 * No resolver for the gateway: the 2774 * route is not resolvable. 2775 * If the MULTIRT_SETSTAMP flag is 2776 * set, we stamp the IRE_HOST ire, 2777 * so we will not select it again 2778 * during this resolution interval. 2779 */ 2780 if (flags & MULTIRT_SETSTAMP) 2781 fire->ire_last_used_time = 2782 lbolt; 2783 } 2784 } 2785 2786 if (gw_ire != NULL) 2787 ire_refrele(gw_ire); 2788 } 2789 } else { /* IN6_IS_ADDR_MULTICAST(&v6dst) */ 2790 2791 for (fire = first_fire; 2792 fire != NULL; 2793 fire = fire->ire_next) { 2794 2795 if (!(fire->ire_flags & RTF_MULTIRT)) 2796 continue; 2797 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2798 continue; 2799 2800 if (fire->ire_gw_secattr != NULL && 2801 tsol_ire_match_gwattr(fire, tsl) != 0) { 2802 continue; 2803 } 2804 2805 already_resolved = B_FALSE; 2806 2807 mutex_enter(&fire->ire_lock); 2808 v6gw = fire->ire_gateway_addr_v6; 2809 mutex_exit(&fire->ire_lock); 2810 2811 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2812 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, tsl, 2813 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE | 2814 MATCH_IRE_SECATTR, ipst); 2815 2816 /* No resolver for the gateway; we skip this ire. */ 2817 if (gw_ire == NULL) { 2818 continue; 2819 } 2820 2821 if (first_cire) { 2822 2823 IRB_REFHOLD(cirb); 2824 /* 2825 * For all IRE_CACHE ires for that 2826 * destination. 2827 */ 2828 for (cire = first_cire; 2829 cire != NULL; 2830 cire = cire->ire_next) { 2831 2832 if (!(cire->ire_flags & RTF_MULTIRT)) 2833 continue; 2834 if (!IN6_ARE_ADDR_EQUAL( 2835 &cire->ire_addr_v6, &v6dst)) 2836 continue; 2837 if (cire->ire_marks & 2838 (IRE_MARK_CONDEMNED| 2839 IRE_MARK_HIDDEN)) 2840 continue; 2841 2842 if (cire->ire_gw_secattr != NULL && 2843 tsol_ire_match_gwattr(cire, 2844 tsl) != 0) { 2845 continue; 2846 } 2847 2848 /* 2849 * Cache entries are linked to the 2850 * parent routes using the parent handle 2851 * (ire_phandle). If no cache entry has 2852 * the same handle as fire, fire is 2853 * still unresolved. 2854 */ 2855 ASSERT(cire->ire_phandle != 0); 2856 if (cire->ire_phandle == 2857 fire->ire_phandle) { 2858 already_resolved = B_TRUE; 2859 break; 2860 } 2861 } 2862 IRB_REFRELE(cirb); 2863 } 2864 2865 /* 2866 * This route is already resolved; proceed with 2867 * next one. 2868 */ 2869 if (already_resolved) { 2870 ire_refrele(gw_ire); 2871 continue; 2872 } 2873 2874 /* 2875 * Compute the time elapsed since our preceding 2876 * attempt to resolve that route. 2877 * If the MULTIRT_USESTAMP flag is set, we take 2878 * that route into account only if this time 2879 * interval exceeds ip_multirt_resolution_interval; 2880 * this prevents us from attempting to resolve a 2881 * broken route upon each sending of a packet. 2882 */ 2883 delta = lbolt - fire->ire_last_used_time; 2884 delta = TICK_TO_MSEC(delta); 2885 2886 res = (boolean_t) 2887 ((delta > ipst-> 2888 ips_ip_multirt_resolution_interval) || 2889 (!(flags & MULTIRT_USESTAMP))); 2890 2891 ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, " 2892 "flags %04x, res %d\n", 2893 (void *)fire, delta, flags, res)); 2894 2895 if (res) { 2896 if (best_cire) { 2897 /* 2898 * Release the resolver associated 2899 * to the preceding candidate best 2900 * ire, if any. 2901 */ 2902 ire_refrele(best_cire); 2903 ASSERT(best_fire); 2904 } 2905 best_fire = fire; 2906 best_cire = gw_ire; 2907 continue; 2908 } 2909 2910 ire_refrele(gw_ire); 2911 } 2912 } 2913 2914 if (best_fire) { 2915 IRE_REFHOLD(best_fire); 2916 } 2917 IRB_REFRELE(firb); 2918 2919 /* Release the first IRE_CACHE we initially looked up, if any. */ 2920 if (first_cire) 2921 ire_refrele(first_cire); 2922 2923 /* Found a resolvable route. */ 2924 if (best_fire) { 2925 ASSERT(best_cire); 2926 2927 if (*fire_arg) 2928 ire_refrele(*fire_arg); 2929 if (*ire_arg) 2930 ire_refrele(*ire_arg); 2931 2932 /* 2933 * Update the passed arguments with the 2934 * resolvable multirt route we found 2935 */ 2936 *fire_arg = best_fire; 2937 *ire_arg = best_cire; 2938 2939 ip2dbg(("ire_multirt_lookup_v6: returning B_TRUE, " 2940 "*fire_arg %p, *ire_arg %p\n", 2941 (void *)best_fire, (void *)best_cire)); 2942 2943 return (B_TRUE); 2944 } 2945 2946 ASSERT(best_cire == NULL); 2947 2948 ip2dbg(("ire_multirt_lookup_v6: returning B_FALSE, *fire_arg %p, " 2949 "*ire_arg %p\n", 2950 (void *)*fire_arg, (void *)*ire_arg)); 2951 2952 /* No resolvable route. */ 2953 return (B_FALSE); 2954 } 2955 2956 2957 /* 2958 * Find an IRE_OFFSUBNET IRE entry for the multicast address 'v6dstp' 2959 * that goes through 'ipif'. As a fallback, a route that goes through 2960 * ipif->ipif_ill can be returned. 2961 */ 2962 ire_t * 2963 ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp) 2964 { 2965 ire_t *ire; 2966 ire_t *save_ire = NULL; 2967 ire_t *gw_ire; 2968 irb_t *irb; 2969 in6_addr_t v6gw; 2970 int match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL; 2971 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2972 2973 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, NULL, ALL_ZONES, 0, 2974 NULL, MATCH_IRE_DEFAULT, ipst); 2975 2976 if (ire == NULL) 2977 return (NULL); 2978 2979 irb = ire->ire_bucket; 2980 ASSERT(irb); 2981 2982 IRB_REFHOLD(irb); 2983 ire_refrele(ire); 2984 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 2985 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) || 2986 (ipif->ipif_zoneid != ire->ire_zoneid && 2987 ire->ire_zoneid != ALL_ZONES)) { 2988 continue; 2989 } 2990 2991 switch (ire->ire_type) { 2992 case IRE_DEFAULT: 2993 case IRE_PREFIX: 2994 case IRE_HOST: 2995 mutex_enter(&ire->ire_lock); 2996 v6gw = ire->ire_gateway_addr_v6; 2997 mutex_exit(&ire->ire_lock); 2998 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2999 IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, 3000 NULL, match_flags, ipst); 3001 3002 if (gw_ire != NULL) { 3003 if (save_ire != NULL) { 3004 ire_refrele(save_ire); 3005 } 3006 IRE_REFHOLD(ire); 3007 if (gw_ire->ire_ipif == ipif) { 3008 ire_refrele(gw_ire); 3009 3010 IRB_REFRELE(irb); 3011 return (ire); 3012 } 3013 ire_refrele(gw_ire); 3014 save_ire = ire; 3015 } 3016 break; 3017 case IRE_IF_NORESOLVER: 3018 case IRE_IF_RESOLVER: 3019 if (ire->ire_ipif == ipif) { 3020 if (save_ire != NULL) { 3021 ire_refrele(save_ire); 3022 } 3023 IRE_REFHOLD(ire); 3024 3025 IRB_REFRELE(irb); 3026 return (ire); 3027 } 3028 break; 3029 } 3030 } 3031 IRB_REFRELE(irb); 3032 3033 return (save_ire); 3034 } 3035