1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 /* 32 * This file contains routines that manipulate Internet Routing Entries (IREs). 33 */ 34 #include <sys/types.h> 35 #include <sys/stream.h> 36 #include <sys/stropts.h> 37 #include <sys/ddi.h> 38 #include <sys/cmn_err.h> 39 40 #include <sys/systm.h> 41 #include <sys/param.h> 42 #include <sys/socket.h> 43 #include <net/if.h> 44 #include <net/route.h> 45 #include <netinet/in.h> 46 #include <net/if_dl.h> 47 #include <netinet/ip6.h> 48 #include <netinet/icmp6.h> 49 50 #include <inet/common.h> 51 #include <inet/mi.h> 52 #include <inet/ip.h> 53 #include <inet/ip6.h> 54 #include <inet/ip_ndp.h> 55 #include <inet/ip_if.h> 56 #include <inet/ip_ire.h> 57 #include <inet/ipclassifier.h> 58 #include <inet/nd.h> 59 #include <sys/kmem.h> 60 #include <sys/zone.h> 61 62 #include <sys/tsol/label.h> 63 #include <sys/tsol/tnet.h> 64 65 static ire_t ire_null; 66 67 static ire_t *ire_ihandle_lookup_onlink_v6(ire_t *cire); 68 static void ire_report_ftable_v6(ire_t *ire, char *mp); 69 static void ire_report_ctable_v6(ire_t *ire, char *mp); 70 static boolean_t ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, 71 const in6_addr_t *mask, const in6_addr_t *gateway, int type, 72 const ipif_t *ipif, zoneid_t zoneid, uint32_t ihandle, 73 const ts_label_t *tsl, int match_flags); 74 static ire_t *ire_init_v6(ire_t *, const in6_addr_t *, const in6_addr_t *, 75 const in6_addr_t *, const in6_addr_t *, uint_t *, queue_t *, queue_t *, 76 ushort_t, ipif_t *, const in6_addr_t *, uint32_t, uint32_t, uint_t, 77 const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *); 78 79 /* 80 * Named Dispatch routine to produce a formatted report on all IREs. 81 * This report is accessed by using the ndd utility to "get" ND variable 82 * "ip_ire_status_v6". 83 */ 84 /* ARGSUSED */ 85 int 86 ip_ire_report_v6(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 87 { 88 zoneid_t zoneid; 89 ip_stack_t *ipst; 90 91 (void) mi_mpprintf(mp, 92 "IRE " MI_COL_HDRPAD_STR 93 "rfq " MI_COL_HDRPAD_STR 94 "stq " MI_COL_HDRPAD_STR 95 " zone mxfrg rtt rtt_sd ssthresh ref " 96 "rtomax tstamp_ok wscale_ok ecn_ok pmtud_ok sack sendpipe recvpipe " 97 "in/out/forward type addr mask " 98 "src gateway"); 99 /* 100 * 01234567 01234567 01234567 12345 12345 12345 12345 12345678 123 101 * 123456 123456789 123456789 123456 12345678 1234 12345678 12345678 102 * in/out/forward xxxxxxxxxx 103 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 104 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 105 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 106 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 107 */ 108 109 /* 110 * Because of the ndd constraint, at most we can have 64K buffer 111 * to put in all IRE info. So to be more efficient, just 112 * allocate a 64K buffer here, assuming we need that large buffer. 113 * This should be OK as only root can do ndd /dev/ip. 114 */ 115 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 116 /* The following may work even if we cannot get a large buf. */ 117 (void) mi_mpprintf(mp, "<< Out of buffer >>\n"); 118 return (0); 119 } 120 zoneid = Q_TO_CONN(q)->conn_zoneid; 121 if (zoneid == GLOBAL_ZONEID) 122 zoneid = ALL_ZONES; 123 ipst = CONNQ_TO_IPST(q); 124 125 ire_walk_v6(ire_report_ftable_v6, (char *)mp->b_cont, zoneid, ipst); 126 ire_walk_v6(ire_report_ctable_v6, (char *)mp->b_cont, zoneid, ipst); 127 return (0); 128 } 129 130 /* 131 * ire_walk routine invoked for ip_ire_report_v6 for each IRE. 132 */ 133 static void 134 ire_report_ftable_v6(ire_t *ire, char *mp) 135 { 136 char buf1[INET6_ADDRSTRLEN]; 137 char buf2[INET6_ADDRSTRLEN]; 138 char buf3[INET6_ADDRSTRLEN]; 139 char buf4[INET6_ADDRSTRLEN]; 140 uint_t fo_pkt_count; 141 uint_t ib_pkt_count; 142 int ref; 143 in6_addr_t gw_addr_v6; 144 uint_t print_len, buf_len; 145 146 ASSERT(ire->ire_ipversion == IPV6_VERSION); 147 if (ire->ire_type & IRE_CACHETABLE) 148 return; 149 buf_len = ((mblk_t *)mp)->b_datap->db_lim - ((mblk_t *)mp)->b_wptr; 150 if (buf_len <= 0) 151 return; 152 153 /* Number of active references of this ire */ 154 ref = ire->ire_refcnt; 155 /* "inbound" to a non local address is a forward */ 156 ib_pkt_count = ire->ire_ib_pkt_count; 157 fo_pkt_count = 0; 158 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 159 if (!(ire->ire_type & (IRE_LOCAL|IRE_BROADCAST))) { 160 fo_pkt_count = ib_pkt_count; 161 ib_pkt_count = 0; 162 } 163 164 mutex_enter(&ire->ire_lock); 165 gw_addr_v6 = ire->ire_gateway_addr_v6; 166 mutex_exit(&ire->ire_lock); 167 168 print_len = snprintf((char *)((mblk_t *)mp)->b_wptr, buf_len, 169 MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR "%5d " 170 "%05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d " 171 "%04d %08d %08d %d/%d/%d %s\n\t%s\n\t%s\n\t%s\n\t%s\n", 172 (void *)ire, (void *)ire->ire_rfq, (void *)ire->ire_stq, 173 (int)ire->ire_zoneid, 174 ire->ire_max_frag, ire->ire_uinfo.iulp_rtt, 175 ire->ire_uinfo.iulp_rtt_sd, 176 ire->ire_uinfo.iulp_ssthresh, ref, 177 ire->ire_uinfo.iulp_rtomax, 178 (ire->ire_uinfo.iulp_tstamp_ok ? 1: 0), 179 (ire->ire_uinfo.iulp_wscale_ok ? 1: 0), 180 (ire->ire_uinfo.iulp_ecn_ok ? 1: 0), 181 (ire->ire_uinfo.iulp_pmtud_ok ? 1: 0), 182 ire->ire_uinfo.iulp_sack, 183 ire->ire_uinfo.iulp_spipe, ire->ire_uinfo.iulp_rpipe, 184 ib_pkt_count, ire->ire_ob_pkt_count, fo_pkt_count, 185 ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type), 186 inet_ntop(AF_INET6, &ire->ire_addr_v6, buf1, sizeof (buf1)), 187 inet_ntop(AF_INET6, &ire->ire_mask_v6, buf2, sizeof (buf2)), 188 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, buf3, sizeof (buf3)), 189 inet_ntop(AF_INET6, &gw_addr_v6, buf4, sizeof (buf4))); 190 if (print_len < buf_len) { 191 ((mblk_t *)mp)->b_wptr += print_len; 192 } else { 193 ((mblk_t *)mp)->b_wptr += buf_len; 194 } 195 } 196 197 /* ire_walk routine invoked for ip_ire_report_v6 for each IRE. */ 198 static void 199 ire_report_ctable_v6(ire_t *ire, char *mp) 200 { 201 char buf1[INET6_ADDRSTRLEN]; 202 char buf2[INET6_ADDRSTRLEN]; 203 char buf3[INET6_ADDRSTRLEN]; 204 char buf4[INET6_ADDRSTRLEN]; 205 uint_t fo_pkt_count; 206 uint_t ib_pkt_count; 207 int ref; 208 in6_addr_t gw_addr_v6; 209 uint_t print_len, buf_len; 210 211 if ((ire->ire_type & IRE_CACHETABLE) == 0) 212 return; 213 buf_len = ((mblk_t *)mp)->b_datap->db_lim - ((mblk_t *)mp)->b_wptr; 214 if (buf_len <= 0) 215 return; 216 217 /* Number of active references of this ire */ 218 ref = ire->ire_refcnt; 219 /* "inbound" to a non local address is a forward */ 220 ib_pkt_count = ire->ire_ib_pkt_count; 221 fo_pkt_count = 0; 222 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 223 if (ire->ire_type & IRE_LOCAL) { 224 fo_pkt_count = ib_pkt_count; 225 ib_pkt_count = 0; 226 } 227 228 mutex_enter(&ire->ire_lock); 229 gw_addr_v6 = ire->ire_gateway_addr_v6; 230 mutex_exit(&ire->ire_lock); 231 232 print_len = snprintf((char *)((mblk_t *)mp)->b_wptr, buf_len, 233 MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR "%5d " 234 "%05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d " 235 "%04d %08d %08d %d/%d/%d %s\n\t%s\n\t%s\n\t%s\n\t%s\n", 236 (void *)ire, (void *)ire->ire_rfq, (void *)ire->ire_stq, 237 (int)ire->ire_zoneid, 238 ire->ire_max_frag, ire->ire_uinfo.iulp_rtt, 239 ire->ire_uinfo.iulp_rtt_sd, ire->ire_uinfo.iulp_ssthresh, ref, 240 ire->ire_uinfo.iulp_rtomax, 241 (ire->ire_uinfo.iulp_tstamp_ok ? 1: 0), 242 (ire->ire_uinfo.iulp_wscale_ok ? 1: 0), 243 (ire->ire_uinfo.iulp_ecn_ok ? 1: 0), 244 (ire->ire_uinfo.iulp_pmtud_ok ? 1: 0), 245 ire->ire_uinfo.iulp_sack, 246 ire->ire_uinfo.iulp_spipe, ire->ire_uinfo.iulp_rpipe, 247 ib_pkt_count, ire->ire_ob_pkt_count, 248 fo_pkt_count, ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type), 249 inet_ntop(AF_INET6, &ire->ire_addr_v6, buf1, sizeof (buf1)), 250 inet_ntop(AF_INET6, &ire->ire_mask_v6, buf2, sizeof (buf2)), 251 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, buf3, sizeof (buf3)), 252 inet_ntop(AF_INET6, &gw_addr_v6, buf4, sizeof (buf4))); 253 if (print_len < buf_len) { 254 ((mblk_t *)mp)->b_wptr += print_len; 255 } else { 256 ((mblk_t *)mp)->b_wptr += buf_len; 257 } 258 } 259 260 261 /* 262 * Initialize the ire that is specific to IPv6 part and call 263 * ire_init_common to finish it. 264 */ 265 static ire_t * 266 ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, const in6_addr_t *v6mask, 267 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 268 uint_t *max_fragp, queue_t *rfq, queue_t *stq, ushort_t type, 269 ipif_t *ipif, const in6_addr_t *v6cmask, uint32_t phandle, 270 uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, tsol_gc_t *gc, 271 tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 272 { 273 274 /* 275 * Reject IRE security attribute creation/initialization 276 * if system is not running in Trusted mode. 277 */ 278 if ((gc != NULL || gcgrp != NULL) && !is_system_labeled()) 279 return (NULL); 280 281 282 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_alloced); 283 ire->ire_addr_v6 = *v6addr; 284 285 if (v6src_addr != NULL) 286 ire->ire_src_addr_v6 = *v6src_addr; 287 if (v6mask != NULL) { 288 ire->ire_mask_v6 = *v6mask; 289 ire->ire_masklen = ip_mask_to_plen_v6(&ire->ire_mask_v6); 290 } 291 if (v6gateway != NULL) 292 ire->ire_gateway_addr_v6 = *v6gateway; 293 294 if (type == IRE_CACHE && v6cmask != NULL) 295 ire->ire_cmask_v6 = *v6cmask; 296 297 /* 298 * Multirouted packets need to have a fragment header added so that 299 * the receiver is able to discard duplicates according to their 300 * fragment identifier. 301 */ 302 if (type == IRE_CACHE && (flags & RTF_MULTIRT)) { 303 ire->ire_frag_flag = IPH_FRAG_HDR; 304 } 305 306 /* ire_init_common will free the mblks upon encountering any failure */ 307 if (!ire_init_common(ire, max_fragp, NULL, rfq, stq, type, ipif, 308 phandle, ihandle, flags, IPV6_VERSION, ulp_info, gc, gcgrp, ipst)) 309 return (NULL); 310 311 return (ire); 312 } 313 314 /* 315 * Similar to ire_create_v6 except that it is called only when 316 * we want to allocate ire as an mblk e.g. we have a external 317 * resolver. Do we need this in IPv6 ? 318 * 319 * IPv6 initializes the ire_nce in ire_add_v6, which expects to 320 * find the ire_nce to be null when it is called. So, although 321 * we have a src_nce parameter (in the interest of matching up with 322 * the argument list of the v4 version), we ignore the src_nce 323 * argument here. 324 */ 325 /* ARGSUSED */ 326 ire_t * 327 ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 328 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 329 nce_t *src_nce, queue_t *rfq, queue_t *stq, ushort_t type, 330 ipif_t *ipif, const in6_addr_t *v6cmask, 331 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 332 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 333 { 334 ire_t *ire; 335 ire_t *ret_ire; 336 mblk_t *mp; 337 338 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 339 340 /* Allocate the new IRE. */ 341 mp = allocb(sizeof (ire_t), BPRI_MED); 342 if (mp == NULL) { 343 ip1dbg(("ire_create_mp_v6: alloc failed\n")); 344 return (NULL); 345 } 346 347 ire = (ire_t *)mp->b_rptr; 348 mp->b_wptr = (uchar_t *)&ire[1]; 349 350 /* Start clean. */ 351 *ire = ire_null; 352 ire->ire_mp = mp; 353 mp->b_datap->db_type = IRE_DB_TYPE; 354 355 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 356 NULL, rfq, stq, type, ipif, v6cmask, phandle, 357 ihandle, flags, ulp_info, gc, gcgrp, ipst); 358 359 if (ret_ire == NULL) { 360 freeb(ire->ire_mp); 361 return (NULL); 362 } 363 return (ire); 364 } 365 366 /* 367 * ire_create_v6 is called to allocate and initialize a new IRE. 368 * 369 * NOTE : This is called as writer sometimes though not required 370 * by this function. 371 * 372 * See comments above ire_create_mp_v6() for the rationale behind the 373 * unused src_nce argument. 374 */ 375 /* ARGSUSED */ 376 ire_t * 377 ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 378 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 379 uint_t *max_fragp, nce_t *src_nce, queue_t *rfq, queue_t *stq, 380 ushort_t type, ipif_t *ipif, const in6_addr_t *v6cmask, 381 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 382 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 383 { 384 ire_t *ire; 385 ire_t *ret_ire; 386 387 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 388 389 ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP); 390 if (ire == NULL) { 391 ip1dbg(("ire_create_v6: alloc failed\n")); 392 return (NULL); 393 } 394 *ire = ire_null; 395 396 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 397 max_fragp, rfq, stq, type, ipif, v6cmask, phandle, 398 ihandle, flags, ulp_info, gc, gcgrp, ipst); 399 400 if (ret_ire == NULL) { 401 kmem_cache_free(ire_cache, ire); 402 return (NULL); 403 } 404 ASSERT(ret_ire == ire); 405 return (ire); 406 } 407 408 /* 409 * Find an IRE_INTERFACE for the multicast group. 410 * Allows different routes for multicast addresses 411 * in the unicast routing table (akin to FF::0/8 but could be more specific) 412 * which point at different interfaces. This is used when IPV6_MULTICAST_IF 413 * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't 414 * specify the interface to join on. 415 * 416 * Supports link-local addresses by following the ipif/ill when recursing. 417 */ 418 ire_t * 419 ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst) 420 { 421 ire_t *ire; 422 ipif_t *ipif = NULL; 423 int match_flags = MATCH_IRE_TYPE; 424 in6_addr_t gw_addr_v6; 425 426 ire = ire_ftable_lookup_v6(group, 0, 0, 0, NULL, NULL, 427 zoneid, 0, NULL, MATCH_IRE_DEFAULT, ipst); 428 429 /* We search a resolvable ire in case of multirouting. */ 430 if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) { 431 ire_t *cire = NULL; 432 /* 433 * If the route is not resolvable, the looked up ire 434 * may be changed here. In that case, ire_multirt_lookup() 435 * IRE_REFRELE the original ire and change it. 436 */ 437 (void) ire_multirt_lookup_v6(&cire, &ire, MULTIRT_CACHEGW, 438 NULL, ipst); 439 if (cire != NULL) 440 ire_refrele(cire); 441 } 442 if (ire == NULL) 443 return (NULL); 444 /* 445 * Make sure we follow ire_ipif. 446 * 447 * We need to determine the interface route through 448 * which the gateway will be reached. We don't really 449 * care which interface is picked if the interface is 450 * part of a group. 451 */ 452 if (ire->ire_ipif != NULL) { 453 ipif = ire->ire_ipif; 454 match_flags |= MATCH_IRE_ILL_GROUP; 455 } 456 457 switch (ire->ire_type) { 458 case IRE_DEFAULT: 459 case IRE_PREFIX: 460 case IRE_HOST: 461 mutex_enter(&ire->ire_lock); 462 gw_addr_v6 = ire->ire_gateway_addr_v6; 463 mutex_exit(&ire->ire_lock); 464 ire_refrele(ire); 465 ire = ire_ftable_lookup_v6(&gw_addr_v6, 0, 0, 466 IRE_INTERFACE, ipif, NULL, zoneid, 0, 467 NULL, match_flags, ipst); 468 return (ire); 469 case IRE_IF_NORESOLVER: 470 case IRE_IF_RESOLVER: 471 return (ire); 472 default: 473 ire_refrele(ire); 474 return (NULL); 475 } 476 } 477 478 /* 479 * Return any local address. We use this to target ourselves 480 * when the src address was specified as 'default'. 481 * Preference for IRE_LOCAL entries. 482 */ 483 ire_t * 484 ire_lookup_local_v6(zoneid_t zoneid, ip_stack_t *ipst) 485 { 486 ire_t *ire; 487 irb_t *irb; 488 ire_t *maybe = NULL; 489 int i; 490 491 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 492 irb = &ipst->ips_ip_cache_table_v6[i]; 493 if (irb->irb_ire == NULL) 494 continue; 495 rw_enter(&irb->irb_lock, RW_READER); 496 for (ire = irb->irb_ire; ire; ire = ire->ire_next) { 497 if ((ire->ire_marks & IRE_MARK_CONDEMNED) || 498 ire->ire_zoneid != zoneid && 499 ire->ire_zoneid != ALL_ZONES) 500 continue; 501 switch (ire->ire_type) { 502 case IRE_LOOPBACK: 503 if (maybe == NULL) { 504 IRE_REFHOLD(ire); 505 maybe = ire; 506 } 507 break; 508 case IRE_LOCAL: 509 if (maybe != NULL) { 510 ire_refrele(maybe); 511 } 512 IRE_REFHOLD(ire); 513 rw_exit(&irb->irb_lock); 514 return (ire); 515 } 516 } 517 rw_exit(&irb->irb_lock); 518 } 519 return (maybe); 520 } 521 522 /* 523 * This function takes a mask and returns number of bits set in the 524 * mask (the represented prefix length). Assumes a contiguous mask. 525 */ 526 int 527 ip_mask_to_plen_v6(const in6_addr_t *v6mask) 528 { 529 int bits; 530 int plen = IPV6_ABITS; 531 int i; 532 533 for (i = 3; i >= 0; i--) { 534 if (v6mask->s6_addr32[i] == 0) { 535 plen -= 32; 536 continue; 537 } 538 bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1; 539 if (bits == 0) 540 break; 541 plen -= bits; 542 } 543 544 return (plen); 545 } 546 547 /* 548 * Convert a prefix length to the mask for that prefix. 549 * Returns the argument bitmask. 550 */ 551 in6_addr_t * 552 ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask) 553 { 554 uint32_t *ptr; 555 556 if (plen < 0 || plen > IPV6_ABITS) 557 return (NULL); 558 *bitmask = ipv6_all_zeros; 559 560 ptr = (uint32_t *)bitmask; 561 while (plen > 32) { 562 *ptr++ = 0xffffffffU; 563 plen -= 32; 564 } 565 *ptr = htonl(0xffffffffU << (32 - plen)); 566 return (bitmask); 567 } 568 569 /* 570 * Add a fully initialized IRE to an appropriate 571 * table based on ire_type. 572 * 573 * The forward table contains IRE_PREFIX/IRE_HOST/IRE_HOST and 574 * IRE_IF_RESOLVER/IRE_IF_NORESOLVER and IRE_DEFAULT. 575 * 576 * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK 577 * and IRE_CACHE. 578 * 579 * NOTE : This function is called as writer though not required 580 * by this function. 581 */ 582 int 583 ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) 584 { 585 ire_t *ire1; 586 int mask_table_index; 587 irb_t *irb_ptr; 588 ire_t **irep; 589 int flags; 590 ire_t *pire = NULL; 591 ill_t *stq_ill; 592 boolean_t ndp_g_lock_held = B_FALSE; 593 ire_t *ire = *ire_p; 594 int error; 595 ip_stack_t *ipst = ire->ire_ipst; 596 597 ASSERT(ire->ire_ipversion == IPV6_VERSION); 598 ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */ 599 ASSERT(ire->ire_nce == NULL); 600 601 /* Find the appropriate list head. */ 602 switch (ire->ire_type) { 603 case IRE_HOST: 604 ire->ire_mask_v6 = ipv6_all_ones; 605 ire->ire_masklen = IPV6_ABITS; 606 if ((ire->ire_flags & RTF_SETSRC) == 0) 607 ire->ire_src_addr_v6 = ipv6_all_zeros; 608 break; 609 case IRE_CACHE: 610 case IRE_LOCAL: 611 case IRE_LOOPBACK: 612 ire->ire_mask_v6 = ipv6_all_ones; 613 ire->ire_masklen = IPV6_ABITS; 614 break; 615 case IRE_PREFIX: 616 if ((ire->ire_flags & RTF_SETSRC) == 0) 617 ire->ire_src_addr_v6 = ipv6_all_zeros; 618 break; 619 case IRE_DEFAULT: 620 if ((ire->ire_flags & RTF_SETSRC) == 0) 621 ire->ire_src_addr_v6 = ipv6_all_zeros; 622 break; 623 case IRE_IF_RESOLVER: 624 case IRE_IF_NORESOLVER: 625 break; 626 default: 627 printf("ire_add_v6: ire %p has unrecognized IRE type (%d)\n", 628 (void *)ire, ire->ire_type); 629 ire_delete(ire); 630 *ire_p = NULL; 631 return (EINVAL); 632 } 633 634 /* Make sure the address is properly masked. */ 635 V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6); 636 637 if ((ire->ire_type & IRE_CACHETABLE) == 0) { 638 /* IRE goes into Forward Table */ 639 mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6); 640 if ((ipst->ips_ip_forwarding_table_v6[mask_table_index]) == 641 NULL) { 642 irb_t *ptr; 643 int i; 644 645 ptr = (irb_t *)mi_zalloc(( 646 ipst->ips_ip6_ftable_hash_size * sizeof (irb_t))); 647 if (ptr == NULL) { 648 ire_delete(ire); 649 *ire_p = NULL; 650 return (ENOMEM); 651 } 652 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) { 653 rw_init(&ptr[i].irb_lock, NULL, 654 RW_DEFAULT, NULL); 655 } 656 mutex_enter(&ipst->ips_ire_ft_init_lock); 657 if (ipst->ips_ip_forwarding_table_v6[ 658 mask_table_index] == NULL) { 659 ipst->ips_ip_forwarding_table_v6[ 660 mask_table_index] = ptr; 661 mutex_exit(&ipst->ips_ire_ft_init_lock); 662 } else { 663 /* 664 * Some other thread won the race in 665 * initializing the forwarding table at the 666 * same index. 667 */ 668 mutex_exit(&ipst->ips_ire_ft_init_lock); 669 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; 670 i++) { 671 rw_destroy(&ptr[i].irb_lock); 672 } 673 mi_free(ptr); 674 } 675 } 676 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[mask_table_index][ 677 IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6, 678 ipst->ips_ip6_ftable_hash_size)]); 679 } else { 680 irb_ptr = &(ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6( 681 ire->ire_addr_v6, ipst->ips_ip6_cache_table_size)]); 682 } 683 /* 684 * For xresolv interfaces (v6 interfaces with an external 685 * address resolver), ip_newroute_v6/ip_newroute_ipif_v6 686 * are unable to prevent the deletion of the interface route 687 * while adding an IRE_CACHE for an on-link destination 688 * in the IRE_IF_RESOLVER case, since the ire has to go to 689 * the external resolver and return. We can't do a REFHOLD on the 690 * associated interface ire for fear of the message being freed 691 * if the external resolver can't resolve the address. 692 * Here we look up the interface ire in the forwarding table 693 * and make sure that the interface route has not been deleted. 694 */ 695 if (ire->ire_type == IRE_CACHE && 696 IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6) && 697 (((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) && 698 (((ill_t *)ire->ire_stq->q_ptr)->ill_flags & ILLF_XRESOLV)) { 699 700 pire = ire_ihandle_lookup_onlink_v6(ire); 701 if (pire == NULL) { 702 ire_delete(ire); 703 *ire_p = NULL; 704 return (EINVAL); 705 } 706 /* Prevent pire from getting deleted */ 707 IRB_REFHOLD(pire->ire_bucket); 708 /* Has it been removed already? */ 709 if (pire->ire_marks & IRE_MARK_CONDEMNED) { 710 IRB_REFRELE(pire->ire_bucket); 711 ire_refrele(pire); 712 ire_delete(ire); 713 *ire_p = NULL; 714 return (EINVAL); 715 } 716 } 717 718 flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW); 719 /* 720 * For IRE_CACHES, MATCH_IRE_IPIF is not enough to check 721 * for duplicates because : 722 * 723 * 1) ire_ipif->ipif_ill and ire_stq->q_ptr could be 724 * pointing at different ills. A real duplicate is 725 * a match on both ire_ipif and ire_stq. 726 * 727 * 2) We could have multiple packets trying to create 728 * an IRE_CACHE for the same ill. 729 * 730 * Moreover, IPIF_NOFAILOVER and IPV6_BOUND_PIF endpoints wants 731 * to go out on a particular ill. Rather than looking at the 732 * packet, we depend on the above for MATCH_IRE_ILL here. 733 * 734 * Unlike IPv4, MATCH_IRE_IPIF is needed here as we could have 735 * multiple IRE_CACHES for an ill for the same destination 736 * with various scoped addresses i.e represented by ipifs. 737 * 738 * MATCH_IRE_ILL is done implicitly below for IRE_CACHES. 739 */ 740 if (ire->ire_ipif != NULL) 741 flags |= MATCH_IRE_IPIF; 742 /* 743 * If we are creating hidden ires, make sure we search on 744 * this ill (MATCH_IRE_ILL) and a hidden ire, while we are 745 * searching for duplicates below. Otherwise we could 746 * potentially find an IRE on some other interface 747 * and it may not be a IRE marked with IRE_MARK_HIDDEN. We 748 * shouldn't do this as this will lead to an infinite loop as 749 * eventually we need an hidden ire for this packet to go 750 * out. MATCH_IRE_ILL is already marked above. 751 */ 752 if (ire->ire_marks & IRE_MARK_HIDDEN) { 753 ASSERT(ire->ire_type == IRE_CACHE); 754 flags |= MATCH_IRE_MARK_HIDDEN; 755 } 756 757 /* 758 * Start the atomic add of the ire. Grab the ill locks, 759 * ill_g_usesrc_lock and the bucket lock. Check for condemned. 760 * To avoid lock order problems, get the ndp6.ndp_g_lock now itself. 761 */ 762 if (ire->ire_type == IRE_CACHE) { 763 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 764 ndp_g_lock_held = B_TRUE; 765 } 766 767 /* 768 * If ipif or ill is changing ire_atomic_start() may queue the 769 * request and return EINPROGRESS. 770 */ 771 772 error = ire_atomic_start(irb_ptr, ire, q, mp, func); 773 if (error != 0) { 774 if (ndp_g_lock_held) 775 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 776 /* 777 * We don't know whether it is a valid ipif or not. 778 * So, set it to NULL. This assumes that the ire has not added 779 * a reference to the ipif. 780 */ 781 ire->ire_ipif = NULL; 782 ire_delete(ire); 783 if (pire != NULL) { 784 IRB_REFRELE(pire->ire_bucket); 785 ire_refrele(pire); 786 } 787 *ire_p = NULL; 788 return (error); 789 } 790 /* 791 * To avoid creating ires having stale values for the ire_max_frag 792 * we get the latest value atomically here. For more details 793 * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE 794 * in ip_rput_dlpi_writer 795 */ 796 if (ire->ire_max_fragp == NULL) { 797 if (IN6_IS_ADDR_MULTICAST(&ire->ire_addr_v6)) 798 ire->ire_max_frag = ire->ire_ipif->ipif_mtu; 799 else 800 ire->ire_max_frag = pire->ire_max_frag; 801 } else { 802 uint_t max_frag; 803 804 max_frag = *ire->ire_max_fragp; 805 ire->ire_max_fragp = NULL; 806 ire->ire_max_frag = max_frag; 807 } 808 809 /* 810 * Atomically check for duplicate and insert in the table. 811 */ 812 for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 813 if (ire1->ire_marks & IRE_MARK_CONDEMNED) 814 continue; 815 816 if (ire->ire_type == IRE_CACHE) { 817 /* 818 * We do MATCH_IRE_ILL implicitly here for IRE_CACHES. 819 * As ire_ipif and ire_stq could point to two 820 * different ills, we can't pass just ire_ipif to 821 * ire_match_args and get a match on both ills. 822 * This is just needed for duplicate checks here and 823 * so we don't add an extra argument to 824 * ire_match_args for this. Do it locally. 825 * 826 * NOTE : Currently there is no part of the code 827 * that asks for both MATH_IRE_IPIF and MATCH_IRE_ILL 828 * match for IRE_CACHEs. Thus we don't want to 829 * extend the arguments to ire_match_args_v6. 830 */ 831 if (ire1->ire_stq != ire->ire_stq) 832 continue; 833 /* 834 * Multiroute IRE_CACHEs for a given destination can 835 * have the same ire_ipif, typically if their source 836 * address is forced using RTF_SETSRC, and the same 837 * send-to queue. We differentiate them using the parent 838 * handle. 839 */ 840 if ((ire1->ire_flags & RTF_MULTIRT) && 841 (ire->ire_flags & RTF_MULTIRT) && 842 (ire1->ire_phandle != ire->ire_phandle)) 843 continue; 844 } 845 if (ire1->ire_zoneid != ire->ire_zoneid) 846 continue; 847 if (ire_match_args_v6(ire1, &ire->ire_addr_v6, 848 &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, 849 ire->ire_type, ire->ire_ipif, ire->ire_zoneid, 0, NULL, 850 flags)) { 851 /* 852 * Return the old ire after doing a REFHOLD. 853 * As most of the callers continue to use the IRE 854 * after adding, we return a held ire. This will 855 * avoid a lookup in the caller again. If the callers 856 * don't want to use it, they need to do a REFRELE. 857 */ 858 ip1dbg(("found dup ire existing %p new %p", 859 (void *)ire1, (void *)ire)); 860 IRE_REFHOLD(ire1); 861 if (ndp_g_lock_held) 862 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 863 ire_atomic_end(irb_ptr, ire); 864 ire_delete(ire); 865 if (pire != NULL) { 866 /* 867 * Assert that it is 868 * not yet removed from the list. 869 */ 870 ASSERT(pire->ire_ptpn != NULL); 871 IRB_REFRELE(pire->ire_bucket); 872 ire_refrele(pire); 873 } 874 *ire_p = ire1; 875 return (0); 876 } 877 } 878 if (ire->ire_type == IRE_CACHE) { 879 in6_addr_t gw_addr_v6; 880 ill_t *ill = ire_to_ill(ire); 881 char buf[INET6_ADDRSTRLEN]; 882 nce_t *nce; 883 884 /* 885 * All IRE_CACHE types must have a nce. If this is 886 * not the case the entry will not be added. We need 887 * to make sure that if somebody deletes the nce 888 * after we looked up, they will find this ire and 889 * delete the ire. To delete this ire one needs the 890 * bucket lock which we are still holding here. So, 891 * even if the nce gets deleted after we looked up, 892 * this ire will get deleted. 893 * 894 * NOTE : Don't need the ire_lock for accessing 895 * ire_gateway_addr_v6 as it is appearing first 896 * time on the list and rts_setgwr_v6 could not 897 * be changing this. 898 */ 899 gw_addr_v6 = ire->ire_gateway_addr_v6; 900 if (IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 901 nce = ndp_lookup_v6(ill, &ire->ire_addr_v6, B_TRUE); 902 } else { 903 nce = ndp_lookup_v6(ill, &gw_addr_v6, B_TRUE); 904 } 905 if (nce == NULL) 906 goto failed; 907 908 /* Pair of refhold, refrele just to get the tracing right */ 909 NCE_REFHOLD_TO_REFHOLD_NOTR(nce); 910 /* 911 * Atomically make sure that new IREs don't point 912 * to an NCE that is logically deleted (CONDEMNED). 913 * ndp_delete() first marks the NCE CONDEMNED. 914 * This ensures that the nce_refcnt won't increase 915 * due to new nce_lookups or due to addition of new IREs 916 * pointing to this NCE. Then ndp_delete() cleans up 917 * existing references. If we don't do it atomically here, 918 * ndp_delete() -> nce_ire_delete() will not be able to 919 * clean up the IRE list completely, and the nce_refcnt 920 * won't go down to zero. 921 */ 922 mutex_enter(&nce->nce_lock); 923 if (ill->ill_flags & ILLF_XRESOLV) { 924 /* 925 * If we used an external resolver, we may not 926 * have gone through neighbor discovery to get here. 927 * Must update the nce_state before the next check. 928 */ 929 if (nce->nce_state == ND_INCOMPLETE) 930 nce->nce_state = ND_REACHABLE; 931 } 932 if (nce->nce_state == ND_INCOMPLETE || 933 (nce->nce_flags & NCE_F_CONDEMNED) || 934 (nce->nce_state == ND_UNREACHABLE)) { 935 failed: 936 if (ndp_g_lock_held) 937 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 938 if (nce != NULL) 939 mutex_exit(&nce->nce_lock); 940 ire_atomic_end(irb_ptr, ire); 941 ip1dbg(("ire_add_v6: No nce for dst %s \n", 942 inet_ntop(AF_INET6, &ire->ire_addr_v6, 943 buf, sizeof (buf)))); 944 ire_delete(ire); 945 if (pire != NULL) { 946 /* 947 * Assert that it is 948 * not yet removed from the list. 949 */ 950 ASSERT(pire->ire_ptpn != NULL); 951 IRB_REFRELE(pire->ire_bucket); 952 ire_refrele(pire); 953 } 954 if (nce != NULL) 955 NCE_REFRELE_NOTR(nce); 956 *ire_p = NULL; 957 return (EINVAL); 958 } else { 959 ire->ire_nce = nce; 960 } 961 mutex_exit(&nce->nce_lock); 962 } 963 /* 964 * Find the first entry that matches ire_addr - provides 965 * tail insertion. *irep will be null if no match. 966 */ 967 irep = (ire_t **)irb_ptr; 968 while ((ire1 = *irep) != NULL && 969 !IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6)) 970 irep = &ire1->ire_next; 971 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 972 973 if (*irep != NULL) { 974 /* 975 * Find the last ire which matches ire_addr_v6. 976 * Needed to do tail insertion among entries with the same 977 * ire_addr_v6. 978 */ 979 while (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 980 &ire1->ire_addr_v6)) { 981 irep = &ire1->ire_next; 982 ire1 = *irep; 983 if (ire1 == NULL) 984 break; 985 } 986 } 987 988 if (ire->ire_type == IRE_DEFAULT) { 989 /* 990 * We keep a count of default gateways which is used when 991 * assigning them as routes. 992 */ 993 ipst->ips_ipv6_ire_default_count++; 994 ASSERT(ipst->ips_ipv6_ire_default_count != 0); /* Wraparound */ 995 } 996 /* Insert at *irep */ 997 ire1 = *irep; 998 if (ire1 != NULL) 999 ire1->ire_ptpn = &ire->ire_next; 1000 ire->ire_next = ire1; 1001 /* Link the new one in. */ 1002 ire->ire_ptpn = irep; 1003 /* 1004 * ire_walk routines de-reference ire_next without holding 1005 * a lock. Before we point to the new ire, we want to make 1006 * sure the store that sets the ire_next of the new ire 1007 * reaches global visibility, so that ire_walk routines 1008 * don't see a truncated list of ires i.e if the ire_next 1009 * of the new ire gets set after we do "*irep = ire" due 1010 * to re-ordering, the ire_walk thread will see a NULL 1011 * once it accesses the ire_next of the new ire. 1012 * membar_producer() makes sure that the following store 1013 * happens *after* all of the above stores. 1014 */ 1015 membar_producer(); 1016 *irep = ire; 1017 ire->ire_bucket = irb_ptr; 1018 /* 1019 * We return a bumped up IRE above. Keep it symmetrical 1020 * so that the callers will always have to release. This 1021 * helps the callers of this function because they continue 1022 * to use the IRE after adding and hence they don't have to 1023 * lookup again after we return the IRE. 1024 * 1025 * NOTE : We don't have to use atomics as this is appearing 1026 * in the list for the first time and no one else can bump 1027 * up the reference count on this yet. 1028 */ 1029 IRE_REFHOLD_LOCKED(ire); 1030 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_inserted); 1031 irb_ptr->irb_ire_cnt++; 1032 if (ire->ire_marks & IRE_MARK_TEMPORARY) 1033 irb_ptr->irb_tmp_ire_cnt++; 1034 1035 if (ire->ire_ipif != NULL) { 1036 ire->ire_ipif->ipif_ire_cnt++; 1037 if (ire->ire_stq != NULL) { 1038 stq_ill = (ill_t *)ire->ire_stq->q_ptr; 1039 stq_ill->ill_ire_cnt++; 1040 } 1041 } else { 1042 ASSERT(ire->ire_stq == NULL); 1043 } 1044 1045 if (ndp_g_lock_held) 1046 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1047 ire_atomic_end(irb_ptr, ire); 1048 1049 if (pire != NULL) { 1050 /* Assert that it is not removed from the list yet */ 1051 ASSERT(pire->ire_ptpn != NULL); 1052 IRB_REFRELE(pire->ire_bucket); 1053 ire_refrele(pire); 1054 } 1055 1056 if (ire->ire_type != IRE_CACHE) { 1057 /* 1058 * For ire's with with host mask see if there is an entry 1059 * in the cache. If there is one flush the whole cache as 1060 * there might be multiple entries due to RTF_MULTIRT (CGTP). 1061 * If no entry is found than there is no need to flush the 1062 * cache. 1063 */ 1064 1065 if (ip_mask_to_plen_v6(&ire->ire_mask_v6) == IPV6_ABITS) { 1066 ire_t *lire; 1067 lire = ire_ctable_lookup_v6(&ire->ire_addr_v6, NULL, 1068 IRE_CACHE, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, 1069 ipst); 1070 if (lire != NULL) { 1071 ire_refrele(lire); 1072 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 1073 } 1074 } else { 1075 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 1076 } 1077 } 1078 1079 *ire_p = ire; 1080 return (0); 1081 } 1082 1083 /* 1084 * Search for all HOST REDIRECT routes that are 1085 * pointing at the specified gateway and 1086 * delete them. This routine is called only 1087 * when a default gateway is going away. 1088 */ 1089 static void 1090 ire_delete_host_redirects_v6(const in6_addr_t *gateway, ip_stack_t *ipst) 1091 { 1092 irb_t *irb_ptr; 1093 irb_t *irb; 1094 ire_t *ire; 1095 in6_addr_t gw_addr_v6; 1096 int i; 1097 1098 /* get the hash table for HOST routes */ 1099 irb_ptr = ipst->ips_ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)]; 1100 if (irb_ptr == NULL) 1101 return; 1102 for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) { 1103 irb = &irb_ptr[i]; 1104 IRB_REFHOLD(irb); 1105 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 1106 if (!(ire->ire_flags & RTF_DYNAMIC)) 1107 continue; 1108 mutex_enter(&ire->ire_lock); 1109 gw_addr_v6 = ire->ire_gateway_addr_v6; 1110 mutex_exit(&ire->ire_lock); 1111 if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) 1112 ire_delete(ire); 1113 } 1114 IRB_REFRELE(irb); 1115 } 1116 } 1117 1118 /* 1119 * Delete all the cache entries with this 'addr'. This is the IPv6 counterpart 1120 * of ip_ire_clookup_and_delete. The difference being this function does not 1121 * return any value. IPv6 processing of a gratuitous ARP, as it stands, is 1122 * different than IPv4 in that, regardless of the presence of a cache entry 1123 * for this address, an ire_walk_v6 is done. Another difference is that unlike 1124 * in the case of IPv4 this does not take an ipif_t argument, since it is only 1125 * called by ip_arp_news and the match is always only on the address. 1126 */ 1127 void 1128 ip_ire_clookup_and_delete_v6(const in6_addr_t *addr, ip_stack_t *ipst) 1129 { 1130 irb_t *irb; 1131 ire_t *cire; 1132 boolean_t found = B_FALSE; 1133 1134 irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 1135 ipst->ips_ip6_cache_table_size)]; 1136 IRB_REFHOLD(irb); 1137 for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) { 1138 if (cire->ire_marks & IRE_MARK_CONDEMNED) 1139 continue; 1140 if (IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, addr)) { 1141 1142 /* This signifies start of a match */ 1143 if (!found) 1144 found = B_TRUE; 1145 if (cire->ire_type == IRE_CACHE) { 1146 if (cire->ire_nce != NULL) 1147 ndp_delete(cire->ire_nce); 1148 ire_delete_v6(cire); 1149 } 1150 /* End of the match */ 1151 } else if (found) 1152 break; 1153 } 1154 IRB_REFRELE(irb); 1155 } 1156 1157 /* 1158 * Delete the specified IRE. 1159 * All calls should use ire_delete(). 1160 * Sometimes called as writer though not required by this function. 1161 * 1162 * NOTE : This function is called only if the ire was added 1163 * in the list. 1164 */ 1165 void 1166 ire_delete_v6(ire_t *ire) 1167 { 1168 in6_addr_t gw_addr_v6; 1169 ip_stack_t *ipst = ire->ire_ipst; 1170 1171 ASSERT(ire->ire_refcnt >= 1); 1172 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1173 1174 if (ire->ire_type != IRE_CACHE) 1175 ire_flush_cache_v6(ire, IRE_FLUSH_DELETE); 1176 if (ire->ire_type == IRE_DEFAULT) { 1177 /* 1178 * when a default gateway is going away 1179 * delete all the host redirects pointing at that 1180 * gateway. 1181 */ 1182 mutex_enter(&ire->ire_lock); 1183 gw_addr_v6 = ire->ire_gateway_addr_v6; 1184 mutex_exit(&ire->ire_lock); 1185 ire_delete_host_redirects_v6(&gw_addr_v6, ipst); 1186 } 1187 } 1188 1189 /* 1190 * ire_walk routine to delete all IRE_CACHE and IRE_HOST type redirect 1191 * entries. 1192 */ 1193 /*ARGSUSED1*/ 1194 void 1195 ire_delete_cache_v6(ire_t *ire, char *arg) 1196 { 1197 char addrstr1[INET6_ADDRSTRLEN]; 1198 char addrstr2[INET6_ADDRSTRLEN]; 1199 1200 if ((ire->ire_type & IRE_CACHE) || 1201 (ire->ire_flags & RTF_DYNAMIC)) { 1202 ip1dbg(("ire_delete_cache_v6: deleted %s type %d through %s\n", 1203 inet_ntop(AF_INET6, &ire->ire_addr_v6, 1204 addrstr1, sizeof (addrstr1)), 1205 ire->ire_type, 1206 inet_ntop(AF_INET6, &ire->ire_gateway_addr_v6, 1207 addrstr2, sizeof (addrstr2)))); 1208 ire_delete(ire); 1209 } 1210 1211 } 1212 1213 /* 1214 * ire_walk routine to delete all IRE_CACHE/IRE_HOST type redirect entries 1215 * that have a given gateway address. 1216 */ 1217 void 1218 ire_delete_cache_gw_v6(ire_t *ire, char *addr) 1219 { 1220 in6_addr_t *gw_addr = (in6_addr_t *)addr; 1221 char buf1[INET6_ADDRSTRLEN]; 1222 char buf2[INET6_ADDRSTRLEN]; 1223 in6_addr_t ire_gw_addr_v6; 1224 1225 if (!(ire->ire_type & IRE_CACHE) && 1226 !(ire->ire_flags & RTF_DYNAMIC)) 1227 return; 1228 1229 mutex_enter(&ire->ire_lock); 1230 ire_gw_addr_v6 = ire->ire_gateway_addr_v6; 1231 mutex_exit(&ire->ire_lock); 1232 1233 if (IN6_ARE_ADDR_EQUAL(&ire_gw_addr_v6, gw_addr)) { 1234 ip1dbg(("ire_delete_cache_gw_v6: deleted %s type %d to %s\n", 1235 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, 1236 buf1, sizeof (buf1)), 1237 ire->ire_type, 1238 inet_ntop(AF_INET6, &ire_gw_addr_v6, 1239 buf2, sizeof (buf2)))); 1240 ire_delete(ire); 1241 } 1242 } 1243 1244 /* 1245 * Remove all IRE_CACHE entries that match 1246 * the ire specified. (Sometimes called 1247 * as writer though not required by this function.) 1248 * 1249 * The flag argument indicates if the 1250 * flush request is due to addition 1251 * of new route (IRE_FLUSH_ADD) or deletion of old 1252 * route (IRE_FLUSH_DELETE). 1253 * 1254 * This routine takes only the IREs from the forwarding 1255 * table and flushes the corresponding entries from 1256 * the cache table. 1257 * 1258 * When flushing due to the deletion of an old route, it 1259 * just checks the cache handles (ire_phandle and ire_ihandle) and 1260 * deletes the ones that match. 1261 * 1262 * When flushing due to the creation of a new route, it checks 1263 * if a cache entry's address matches the one in the IRE and 1264 * that the cache entry's parent has a less specific mask than the 1265 * one in IRE. The destination of such a cache entry could be the 1266 * gateway for other cache entries, so we need to flush those as 1267 * well by looking for gateway addresses matching the IRE's address. 1268 */ 1269 void 1270 ire_flush_cache_v6(ire_t *ire, int flag) 1271 { 1272 int i; 1273 ire_t *cire; 1274 irb_t *irb; 1275 ip_stack_t *ipst = ire->ire_ipst; 1276 1277 if (ire->ire_type & IRE_CACHE) 1278 return; 1279 1280 /* 1281 * If a default is just created, there is no point 1282 * in going through the cache, as there will not be any 1283 * cached ires. 1284 */ 1285 if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD) 1286 return; 1287 if (flag == IRE_FLUSH_ADD) { 1288 /* 1289 * This selective flush is 1290 * due to the addition of 1291 * new IRE. 1292 */ 1293 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 1294 irb = &ipst->ips_ip_cache_table_v6[i]; 1295 if ((cire = irb->irb_ire) == NULL) 1296 continue; 1297 IRB_REFHOLD(irb); 1298 for (cire = irb->irb_ire; cire != NULL; 1299 cire = cire->ire_next) { 1300 if (cire->ire_type != IRE_CACHE) 1301 continue; 1302 /* 1303 * If 'cire' belongs to the same subnet 1304 * as the new ire being added, and 'cire' 1305 * is derived from a prefix that is less 1306 * specific than the new ire being added, 1307 * we need to flush 'cire'; for instance, 1308 * when a new interface comes up. 1309 */ 1310 if ((V6_MASK_EQ_2(cire->ire_addr_v6, 1311 ire->ire_mask_v6, ire->ire_addr_v6) && 1312 (ip_mask_to_plen_v6(&cire->ire_cmask_v6) <= 1313 ire->ire_masklen))) { 1314 ire_delete(cire); 1315 continue; 1316 } 1317 /* 1318 * This is the case when the ire_gateway_addr 1319 * of 'cire' belongs to the same subnet as 1320 * the new ire being added. 1321 * Flushing such ires is sometimes required to 1322 * avoid misrouting: say we have a machine with 1323 * two interfaces (I1 and I2), a default router 1324 * R on the I1 subnet, and a host route to an 1325 * off-link destination D with a gateway G on 1326 * the I2 subnet. 1327 * Under normal operation, we will have an 1328 * on-link cache entry for G and an off-link 1329 * cache entry for D with G as ire_gateway_addr, 1330 * traffic to D will reach its destination 1331 * through gateway G. 1332 * If the administrator does 'ifconfig I2 down', 1333 * the cache entries for D and G will be 1334 * flushed. However, G will now be resolved as 1335 * an off-link destination using R (the default 1336 * router) as gateway. Then D will also be 1337 * resolved as an off-link destination using G 1338 * as gateway - this behavior is due to 1339 * compatibility reasons, see comment in 1340 * ire_ihandle_lookup_offlink(). Traffic to D 1341 * will go to the router R and probably won't 1342 * reach the destination. 1343 * The administrator then does 'ifconfig I2 up'. 1344 * Since G is on the I2 subnet, this routine 1345 * will flush its cache entry. It must also 1346 * flush the cache entry for D, otherwise 1347 * traffic will stay misrouted until the IRE 1348 * times out. 1349 */ 1350 if (V6_MASK_EQ_2(cire->ire_gateway_addr_v6, 1351 ire->ire_mask_v6, ire->ire_addr_v6)) { 1352 ire_delete(cire); 1353 continue; 1354 } 1355 } 1356 IRB_REFRELE(irb); 1357 } 1358 } else { 1359 /* 1360 * delete the cache entries based on 1361 * handle in the IRE as this IRE is 1362 * being deleted/changed. 1363 */ 1364 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 1365 irb = &ipst->ips_ip_cache_table_v6[i]; 1366 if ((cire = irb->irb_ire) == NULL) 1367 continue; 1368 IRB_REFHOLD(irb); 1369 for (cire = irb->irb_ire; cire != NULL; 1370 cire = cire->ire_next) { 1371 if (cire->ire_type != IRE_CACHE) 1372 continue; 1373 if ((cire->ire_phandle == 0 || 1374 cire->ire_phandle != ire->ire_phandle) && 1375 (cire->ire_ihandle == 0 || 1376 cire->ire_ihandle != ire->ire_ihandle)) 1377 continue; 1378 ire_delete(cire); 1379 } 1380 IRB_REFRELE(irb); 1381 } 1382 } 1383 } 1384 1385 /* 1386 * Matches the arguments passed with the values in the ire. 1387 * 1388 * Note: for match types that match using "ipif" passed in, ipif 1389 * must be checked for non-NULL before calling this routine. 1390 */ 1391 static boolean_t 1392 ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask, 1393 const in6_addr_t *gateway, int type, const ipif_t *ipif, zoneid_t zoneid, 1394 uint32_t ihandle, const ts_label_t *tsl, int match_flags) 1395 { 1396 in6_addr_t masked_addr; 1397 in6_addr_t gw_addr_v6; 1398 ill_t *ire_ill = NULL, *dst_ill; 1399 ill_t *ipif_ill = NULL; 1400 ill_group_t *ire_ill_group = NULL; 1401 ill_group_t *ipif_ill_group = NULL; 1402 ipif_t *src_ipif; 1403 1404 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1405 ASSERT(addr != NULL); 1406 ASSERT(mask != NULL); 1407 ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL); 1408 ASSERT((!(match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP))) || 1409 (ipif != NULL && ipif->ipif_isv6)); 1410 ASSERT(!(match_flags & MATCH_IRE_WQ)); 1411 1412 /* 1413 * HIDDEN cache entries have to be looked up specifically with 1414 * MATCH_IRE_MARK_HIDDEN. MATCH_IRE_MARK_HIDDEN is usually set 1415 * when the interface is FAILED or INACTIVE. In that case, 1416 * any IRE_CACHES that exists should be marked with 1417 * IRE_MARK_HIDDEN. So, we don't really need to match below 1418 * for IRE_MARK_HIDDEN. But we do so for consistency. 1419 */ 1420 if (!(match_flags & MATCH_IRE_MARK_HIDDEN) && 1421 (ire->ire_marks & IRE_MARK_HIDDEN)) 1422 return (B_FALSE); 1423 1424 if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid && 1425 ire->ire_zoneid != ALL_ZONES) { 1426 /* 1427 * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is 1428 * valid and does not match that of ire_zoneid, a failure to 1429 * match is reported at this point. Otherwise, since some IREs 1430 * that are available in the global zone can be used in local 1431 * zones, additional checks need to be performed: 1432 * 1433 * IRE_CACHE and IRE_LOOPBACK entries should 1434 * never be matched in this situation. 1435 * 1436 * IRE entries that have an interface associated with them 1437 * should in general not match unless they are an IRE_LOCAL 1438 * or in the case when MATCH_IRE_DEFAULT has been set in 1439 * the caller. In the case of the former, checking of the 1440 * other fields supplied should take place. 1441 * 1442 * In the case where MATCH_IRE_DEFAULT has been set, 1443 * all of the ipif's associated with the IRE's ill are 1444 * checked to see if there is a matching zoneid. If any 1445 * one ipif has a matching zoneid, this IRE is a 1446 * potential candidate so checking of the other fields 1447 * takes place. 1448 * 1449 * In the case where the IRE_INTERFACE has a usable source 1450 * address (indicated by ill_usesrc_ifindex) in the 1451 * correct zone then it's permitted to return this IRE 1452 */ 1453 if (match_flags & MATCH_IRE_ZONEONLY) 1454 return (B_FALSE); 1455 if (ire->ire_type & (IRE_CACHE | IRE_LOOPBACK)) 1456 return (B_FALSE); 1457 /* 1458 * Note, IRE_INTERFACE can have the stq as NULL. For 1459 * example, if the default multicast route is tied to 1460 * the loopback address. 1461 */ 1462 if ((ire->ire_type & IRE_INTERFACE) && 1463 (ire->ire_stq != NULL)) { 1464 dst_ill = (ill_t *)ire->ire_stq->q_ptr; 1465 /* 1466 * If there is a usable source address in the 1467 * zone, then it's ok to return an 1468 * IRE_INTERFACE 1469 */ 1470 if ((dst_ill->ill_usesrc_ifindex != 0) && 1471 (src_ipif = ipif_select_source_v6(dst_ill, addr, 1472 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid)) 1473 != NULL) { 1474 ip3dbg(("ire_match_args: src_ipif %p" 1475 " dst_ill %p", (void *)src_ipif, 1476 (void *)dst_ill)); 1477 ipif_refrele(src_ipif); 1478 } else { 1479 ip3dbg(("ire_match_args: src_ipif NULL" 1480 " dst_ill %p\n", (void *)dst_ill)); 1481 return (B_FALSE); 1482 } 1483 } 1484 if (ire->ire_ipif != NULL && ire->ire_type != IRE_LOCAL && 1485 !(ire->ire_type & IRE_INTERFACE)) { 1486 ipif_t *tipif; 1487 1488 if ((match_flags & MATCH_IRE_DEFAULT) == 0) 1489 return (B_FALSE); 1490 mutex_enter(&ire->ire_ipif->ipif_ill->ill_lock); 1491 for (tipif = ire->ire_ipif->ipif_ill->ill_ipif; 1492 tipif != NULL; tipif = tipif->ipif_next) { 1493 if (IPIF_CAN_LOOKUP(tipif) && 1494 (tipif->ipif_flags & IPIF_UP) && 1495 (tipif->ipif_zoneid == zoneid || 1496 tipif->ipif_zoneid == ALL_ZONES)) 1497 break; 1498 } 1499 mutex_exit(&ire->ire_ipif->ipif_ill->ill_lock); 1500 if (tipif == NULL) 1501 return (B_FALSE); 1502 } 1503 } 1504 1505 if (match_flags & MATCH_IRE_GW) { 1506 mutex_enter(&ire->ire_lock); 1507 gw_addr_v6 = ire->ire_gateway_addr_v6; 1508 mutex_exit(&ire->ire_lock); 1509 } 1510 /* 1511 * For IRE_CACHES, MATCH_IRE_ILL/ILL_GROUP really means that 1512 * somebody wants to send out on a particular interface which 1513 * is given by ire_stq and hence use ire_stq to derive the ill 1514 * value. ire_ipif for IRE_CACHES is just the 1515 * means of getting a source address i.e ire_src_addr_v6 = 1516 * ire->ire_ipif->ipif_src_addr_v6. 1517 */ 1518 if (match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP)) { 1519 ire_ill = ire_to_ill(ire); 1520 if (ire_ill != NULL) 1521 ire_ill_group = ire_ill->ill_group; 1522 ipif_ill = ipif->ipif_ill; 1523 ipif_ill_group = ipif_ill->ill_group; 1524 } 1525 1526 /* No ire_addr_v6 bits set past the mask */ 1527 ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6, 1528 ire->ire_addr_v6)); 1529 V6_MASK_COPY(*addr, *mask, masked_addr); 1530 1531 if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) && 1532 ((!(match_flags & MATCH_IRE_GW)) || 1533 IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) && 1534 ((!(match_flags & MATCH_IRE_TYPE)) || 1535 (ire->ire_type & type)) && 1536 ((!(match_flags & MATCH_IRE_SRC)) || 1537 IN6_ARE_ADDR_EQUAL(&ire->ire_src_addr_v6, 1538 &ipif->ipif_v6src_addr)) && 1539 ((!(match_flags & MATCH_IRE_IPIF)) || 1540 (ire->ire_ipif == ipif)) && 1541 ((!(match_flags & MATCH_IRE_MARK_HIDDEN)) || 1542 (ire->ire_type != IRE_CACHE || 1543 ire->ire_marks & IRE_MARK_HIDDEN)) && 1544 ((!(match_flags & MATCH_IRE_ILL)) || 1545 (ire_ill == ipif_ill)) && 1546 ((!(match_flags & MATCH_IRE_IHANDLE)) || 1547 (ire->ire_ihandle == ihandle)) && 1548 ((!(match_flags & MATCH_IRE_ILL_GROUP)) || 1549 (ire_ill == ipif_ill) || 1550 (ire_ill_group != NULL && 1551 ire_ill_group == ipif_ill_group)) && 1552 ((!(match_flags & MATCH_IRE_SECATTR)) || 1553 (!is_system_labeled()) || 1554 (tsol_ire_match_gwattr(ire, tsl) == 0))) { 1555 /* We found the matched IRE */ 1556 return (B_TRUE); 1557 } 1558 return (B_FALSE); 1559 } 1560 1561 /* 1562 * Lookup for a route in all the tables 1563 */ 1564 ire_t * 1565 ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1566 const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 1567 zoneid_t zoneid, const ts_label_t *tsl, int flags, ip_stack_t *ipst) 1568 { 1569 ire_t *ire = NULL; 1570 1571 /* 1572 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1573 * MATCH_IRE_ILL is set. 1574 */ 1575 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1576 (ipif == NULL)) 1577 return (NULL); 1578 1579 /* 1580 * might be asking for a cache lookup, 1581 * This is not best way to lookup cache, 1582 * user should call ire_cache_lookup directly. 1583 * 1584 * If MATCH_IRE_TYPE was set, first lookup in the cache table and then 1585 * in the forwarding table, if the applicable type flags were set. 1586 */ 1587 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) { 1588 ire = ire_ctable_lookup_v6(addr, gateway, type, ipif, zoneid, 1589 tsl, flags, ipst); 1590 if (ire != NULL) 1591 return (ire); 1592 } 1593 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) { 1594 ire = ire_ftable_lookup_v6(addr, mask, gateway, type, ipif, 1595 pire, zoneid, 0, tsl, flags, ipst); 1596 } 1597 return (ire); 1598 } 1599 1600 /* 1601 * Lookup a route in forwarding table. 1602 * specific lookup is indicated by passing the 1603 * required parameters and indicating the 1604 * match required in flag field. 1605 * 1606 * Looking for default route can be done in three ways 1607 * 1) pass mask as ipv6_all_zeros and set MATCH_IRE_MASK in flags field 1608 * along with other matches. 1609 * 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags 1610 * field along with other matches. 1611 * 3) if the destination and mask are passed as zeros. 1612 * 1613 * A request to return a default route if no route 1614 * is found, can be specified by setting MATCH_IRE_DEFAULT 1615 * in flags. 1616 * 1617 * It does not support recursion more than one level. It 1618 * will do recursive lookup only when the lookup maps to 1619 * a prefix or default route and MATCH_IRE_RECURSIVE flag is passed. 1620 * 1621 * If the routing table is setup to allow more than one level 1622 * of recursion, the cleaning up cache table will not work resulting 1623 * in invalid routing. 1624 * 1625 * Supports link-local addresses by following the ipif/ill when recursing. 1626 * 1627 * NOTE : When this function returns NULL, pire has already been released. 1628 * pire is valid only when this function successfully returns an 1629 * ire. 1630 */ 1631 ire_t * 1632 ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1633 const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 1634 zoneid_t zoneid, uint32_t ihandle, const ts_label_t *tsl, int flags, 1635 ip_stack_t *ipst) 1636 { 1637 irb_t *irb_ptr; 1638 ire_t *rire; 1639 ire_t *ire = NULL; 1640 ire_t *saved_ire; 1641 nce_t *nce; 1642 int i; 1643 in6_addr_t gw_addr_v6; 1644 1645 ASSERT(addr != NULL); 1646 ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL); 1647 ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 1648 ASSERT(ipif == NULL || ipif->ipif_isv6); 1649 ASSERT(!(flags & MATCH_IRE_WQ)); 1650 1651 /* 1652 * When we return NULL from this function, we should make 1653 * sure that *pire is NULL so that the callers will not 1654 * wrongly REFRELE the pire. 1655 */ 1656 if (pire != NULL) 1657 *pire = NULL; 1658 /* 1659 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1660 * MATCH_IRE_ILL is set. 1661 */ 1662 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1663 (ipif == NULL)) 1664 return (NULL); 1665 1666 /* 1667 * If the mask is known, the lookup 1668 * is simple, if the mask is not known 1669 * we need to search. 1670 */ 1671 if (flags & MATCH_IRE_MASK) { 1672 uint_t masklen; 1673 1674 masklen = ip_mask_to_plen_v6(mask); 1675 if (ipst->ips_ip_forwarding_table_v6[masklen] == NULL) 1676 return (NULL); 1677 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[masklen][ 1678 IRE_ADDR_MASK_HASH_V6(*addr, *mask, 1679 ipst->ips_ip6_ftable_hash_size)]); 1680 rw_enter(&irb_ptr->irb_lock, RW_READER); 1681 for (ire = irb_ptr->irb_ire; ire != NULL; 1682 ire = ire->ire_next) { 1683 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1684 continue; 1685 if (ire_match_args_v6(ire, addr, mask, gateway, type, 1686 ipif, zoneid, ihandle, tsl, flags)) 1687 goto found_ire; 1688 } 1689 rw_exit(&irb_ptr->irb_lock); 1690 } else { 1691 /* 1692 * In this case we don't know the mask, we need to 1693 * search the table assuming different mask sizes. 1694 * we start with 128 bit mask, we don't allow default here. 1695 */ 1696 for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) { 1697 in6_addr_t tmpmask; 1698 1699 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 1700 continue; 1701 (void) ip_plen_to_mask_v6(i, &tmpmask); 1702 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][ 1703 IRE_ADDR_MASK_HASH_V6(*addr, tmpmask, 1704 ipst->ips_ip6_ftable_hash_size)]; 1705 rw_enter(&irb_ptr->irb_lock, RW_READER); 1706 for (ire = irb_ptr->irb_ire; ire != NULL; 1707 ire = ire->ire_next) { 1708 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1709 continue; 1710 if (ire_match_args_v6(ire, addr, 1711 &ire->ire_mask_v6, gateway, type, ipif, 1712 zoneid, ihandle, tsl, flags)) 1713 goto found_ire; 1714 } 1715 rw_exit(&irb_ptr->irb_lock); 1716 } 1717 } 1718 1719 /* 1720 * We come here if no route has yet been found. 1721 * 1722 * Handle the case where default route is 1723 * requested by specifying type as one of the possible 1724 * types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE). 1725 * 1726 * If MATCH_IRE_MASK is specified, then the appropriate default route 1727 * would have been found above if it exists so it isn't looked up here. 1728 * If MATCH_IRE_DEFAULT was also specified, then a default route will be 1729 * searched for later. 1730 */ 1731 if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE && 1732 (type & (IRE_DEFAULT | IRE_INTERFACE))) { 1733 if (ipst->ips_ip_forwarding_table_v6[0] != NULL) { 1734 /* addr & mask is zero for defaults */ 1735 irb_ptr = &ipst->ips_ip_forwarding_table_v6[0][ 1736 IRE_ADDR_HASH_V6(ipv6_all_zeros, 1737 ipst->ips_ip6_ftable_hash_size)]; 1738 rw_enter(&irb_ptr->irb_lock, RW_READER); 1739 for (ire = irb_ptr->irb_ire; ire != NULL; 1740 ire = ire->ire_next) { 1741 1742 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1743 continue; 1744 1745 if (ire_match_args_v6(ire, addr, 1746 &ipv6_all_zeros, gateway, type, ipif, 1747 zoneid, ihandle, tsl, flags)) 1748 goto found_ire; 1749 } 1750 rw_exit(&irb_ptr->irb_lock); 1751 } 1752 } 1753 /* 1754 * We come here only if no route is found. 1755 * see if the default route can be used which is allowed 1756 * only if the default matching criteria is specified. 1757 * The ipv6_ire_default_count tracks the number of IRE_DEFAULT 1758 * entries. However, the ip_forwarding_table_v6[0] also contains 1759 * interface routes thus the count can be zero. 1760 */ 1761 saved_ire = NULL; 1762 if ((flags & (MATCH_IRE_DEFAULT | MATCH_IRE_MASK)) == 1763 MATCH_IRE_DEFAULT) { 1764 ire_t *ire_origin; 1765 uint_t g_index; 1766 uint_t index; 1767 1768 if (ipst->ips_ip_forwarding_table_v6[0] == NULL) 1769 return (NULL); 1770 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[0])[0]; 1771 1772 /* 1773 * Keep a tab on the bucket while looking the IRE_DEFAULT 1774 * entries. We need to keep track of a particular IRE 1775 * (ire_origin) so this ensures that it will not be unlinked 1776 * from the hash list during the recursive lookup below. 1777 */ 1778 IRB_REFHOLD(irb_ptr); 1779 ire = irb_ptr->irb_ire; 1780 if (ire == NULL) { 1781 IRB_REFRELE(irb_ptr); 1782 return (NULL); 1783 } 1784 1785 /* 1786 * Get the index first, since it can be changed by other 1787 * threads. Then get to the right default route skipping 1788 * default interface routes if any. As we hold a reference on 1789 * the IRE bucket, ipv6_ire_default_count can only increase so 1790 * we can't reach the end of the hash list unexpectedly. 1791 */ 1792 if (ipst->ips_ipv6_ire_default_count != 0) { 1793 g_index = ipst->ips_ipv6_ire_default_index++; 1794 index = g_index % ipst->ips_ipv6_ire_default_count; 1795 while (index != 0) { 1796 if (!(ire->ire_type & IRE_INTERFACE)) 1797 index--; 1798 ire = ire->ire_next; 1799 } 1800 ASSERT(ire != NULL); 1801 } else { 1802 /* 1803 * No default route, so we only have default interface 1804 * routes: don't enter the first loop. 1805 */ 1806 ire = NULL; 1807 } 1808 1809 /* 1810 * Round-robin the default routers list looking for a neighbor 1811 * that matches the passed in parameters and is reachable. If 1812 * none found, just return a route from the default router list 1813 * if it exists. If we can't find a default route (IRE_DEFAULT), 1814 * look for interface default routes. 1815 * We start with the ire we found above and we walk the hash 1816 * list until we're back where we started, see 1817 * ire_get_next_default_ire(). It doesn't matter if default 1818 * routes are added or deleted by other threads - we know this 1819 * ire will stay in the list because we hold a reference on the 1820 * ire bucket. 1821 * NB: if we only have interface default routes, ire is NULL so 1822 * we don't even enter this loop (see above). 1823 */ 1824 ire_origin = ire; 1825 for (; ire != NULL; 1826 ire = ire_get_next_default_ire(ire, ire_origin)) { 1827 1828 if (ire_match_args_v6(ire, addr, 1829 &ipv6_all_zeros, gateway, type, ipif, 1830 zoneid, ihandle, tsl, flags)) { 1831 int match_flags; 1832 1833 /* 1834 * We have something to work with. 1835 * If we can find a resolved/reachable 1836 * entry, we will use this. Otherwise 1837 * we'll try to find an entry that has 1838 * a resolved cache entry. We will fallback 1839 * on this if we don't find anything else. 1840 */ 1841 if (saved_ire == NULL) 1842 saved_ire = ire; 1843 mutex_enter(&ire->ire_lock); 1844 gw_addr_v6 = ire->ire_gateway_addr_v6; 1845 mutex_exit(&ire->ire_lock); 1846 match_flags = MATCH_IRE_ILL_GROUP | 1847 MATCH_IRE_SECATTR; 1848 rire = ire_ctable_lookup_v6(&gw_addr_v6, NULL, 1849 0, ire->ire_ipif, zoneid, tsl, match_flags, 1850 ipst); 1851 if (rire != NULL) { 1852 nce = rire->ire_nce; 1853 if (nce != NULL && 1854 NCE_ISREACHABLE(nce) && 1855 nce->nce_flags & NCE_F_ISROUTER) { 1856 ire_refrele(rire); 1857 IRE_REFHOLD(ire); 1858 IRB_REFRELE(irb_ptr); 1859 goto found_ire_held; 1860 } else if (nce != NULL && 1861 !(nce->nce_flags & 1862 NCE_F_ISROUTER)) { 1863 /* 1864 * Make sure we don't use 1865 * this ire 1866 */ 1867 if (saved_ire == ire) 1868 saved_ire = NULL; 1869 } 1870 ire_refrele(rire); 1871 } else if (ipst-> 1872 ips_ipv6_ire_default_count > 1 && 1873 zoneid != GLOBAL_ZONEID) { 1874 /* 1875 * When we're in a local zone, we're 1876 * only interested in default routers 1877 * that are reachable through ipifs 1878 * within our zone. 1879 * The potentially expensive call to 1880 * ire_route_lookup_v6() is avoided when 1881 * we have only one default route. 1882 */ 1883 int ire_match_flags = MATCH_IRE_TYPE | 1884 MATCH_IRE_SECATTR; 1885 1886 if (ire->ire_ipif != NULL) { 1887 ire_match_flags |= 1888 MATCH_IRE_ILL_GROUP; 1889 } 1890 rire = ire_route_lookup_v6(&gw_addr_v6, 1891 NULL, NULL, IRE_INTERFACE, 1892 ire->ire_ipif, NULL, 1893 zoneid, tsl, ire_match_flags, ipst); 1894 if (rire != NULL) { 1895 ire_refrele(rire); 1896 saved_ire = ire; 1897 } else if (saved_ire == ire) { 1898 /* 1899 * Make sure we don't use 1900 * this ire 1901 */ 1902 saved_ire = NULL; 1903 } 1904 } 1905 } 1906 } 1907 if (saved_ire != NULL) { 1908 ire = saved_ire; 1909 IRE_REFHOLD(ire); 1910 IRB_REFRELE(irb_ptr); 1911 goto found_ire_held; 1912 } else { 1913 /* 1914 * Look for a interface default route matching the 1915 * args passed in. No round robin here. Just pick 1916 * the right one. 1917 */ 1918 for (ire = irb_ptr->irb_ire; ire != NULL; 1919 ire = ire->ire_next) { 1920 1921 if (!(ire->ire_type & IRE_INTERFACE)) 1922 continue; 1923 1924 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1925 continue; 1926 1927 if (ire_match_args_v6(ire, addr, 1928 &ipv6_all_zeros, gateway, type, ipif, 1929 zoneid, ihandle, tsl, flags)) { 1930 IRE_REFHOLD(ire); 1931 IRB_REFRELE(irb_ptr); 1932 goto found_ire_held; 1933 } 1934 } 1935 IRB_REFRELE(irb_ptr); 1936 } 1937 } 1938 ASSERT(ire == NULL); 1939 ip1dbg(("ire_ftable_lookup_v6: returning NULL ire")); 1940 return (NULL); 1941 found_ire: 1942 ASSERT((ire->ire_marks & IRE_MARK_CONDEMNED) == 0); 1943 IRE_REFHOLD(ire); 1944 rw_exit(&irb_ptr->irb_lock); 1945 1946 found_ire_held: 1947 if ((flags & MATCH_IRE_RJ_BHOLE) && 1948 (ire->ire_flags & (RTF_BLACKHOLE | RTF_REJECT))) { 1949 return (ire); 1950 } 1951 /* 1952 * At this point, IRE that was found must be an IRE_FORWARDTABLE 1953 * or IRE_CACHETABLE type. If this is a recursive lookup and an 1954 * IRE_INTERFACE type was found, return that. If it was some other 1955 * IRE_FORWARDTABLE type of IRE (one of the prefix types), then it 1956 * is necessary to fill in the parent IRE pointed to by pire, and 1957 * then lookup the gateway address of the parent. For backwards 1958 * compatiblity, if this lookup returns an 1959 * IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level 1960 * of lookup is done. 1961 */ 1962 if (flags & MATCH_IRE_RECURSIVE) { 1963 const ipif_t *gw_ipif; 1964 int match_flags = MATCH_IRE_DSTONLY; 1965 1966 if (ire->ire_type & IRE_INTERFACE) 1967 return (ire); 1968 if (pire != NULL) 1969 *pire = ire; 1970 /* 1971 * If we can't find an IRE_INTERFACE or the caller has not 1972 * asked for pire, we need to REFRELE the saved_ire. 1973 */ 1974 saved_ire = ire; 1975 1976 /* 1977 * Currently MATCH_IRE_ILL is never used with 1978 * (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT) while 1979 * sending out packets as MATCH_IRE_ILL is used only 1980 * for communicating with on-link hosts. We can't assert 1981 * that here as RTM_GET calls this function with 1982 * MATCH_IRE_ILL | MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE. 1983 * We have already used the MATCH_IRE_ILL in determining 1984 * the right prefix route at this point. To match the 1985 * behavior of how we locate routes while sending out 1986 * packets, we don't want to use MATCH_IRE_ILL below 1987 * while locating the interface route. 1988 */ 1989 if (ire->ire_ipif != NULL) 1990 match_flags |= MATCH_IRE_ILL_GROUP; 1991 1992 mutex_enter(&ire->ire_lock); 1993 gw_addr_v6 = ire->ire_gateway_addr_v6; 1994 mutex_exit(&ire->ire_lock); 1995 1996 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 0, 1997 ire->ire_ipif, NULL, zoneid, tsl, match_flags, ipst); 1998 if (ire == NULL) { 1999 /* 2000 * In this case we have to deal with the 2001 * MATCH_IRE_PARENT flag, which means the 2002 * parent has to be returned if ire is NULL. 2003 * The aim of this is to have (at least) a starting 2004 * ire when we want to look at all of the ires in a 2005 * bucket aimed at a single destination (as is the 2006 * case in ip_newroute_v6 for the RTF_MULTIRT 2007 * flagged routes). 2008 */ 2009 if (flags & MATCH_IRE_PARENT) { 2010 if (pire != NULL) { 2011 /* 2012 * Need an extra REFHOLD, if the 2013 * parent ire is returned via both 2014 * ire and pire. 2015 */ 2016 IRE_REFHOLD(saved_ire); 2017 } 2018 ire = saved_ire; 2019 } else { 2020 ire_refrele(saved_ire); 2021 if (pire != NULL) 2022 *pire = NULL; 2023 } 2024 return (ire); 2025 } 2026 if (ire->ire_type & (IRE_CACHETABLE | IRE_INTERFACE)) { 2027 /* 2028 * If the caller did not ask for pire, release 2029 * it now. 2030 */ 2031 if (pire == NULL) { 2032 ire_refrele(saved_ire); 2033 } 2034 return (ire); 2035 } 2036 match_flags |= MATCH_IRE_TYPE; 2037 mutex_enter(&ire->ire_lock); 2038 gw_addr_v6 = ire->ire_gateway_addr_v6; 2039 mutex_exit(&ire->ire_lock); 2040 gw_ipif = ire->ire_ipif; 2041 ire_refrele(ire); 2042 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 2043 (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid, 2044 NULL, match_flags, ipst); 2045 if (ire == NULL) { 2046 /* 2047 * In this case we have to deal with the 2048 * MATCH_IRE_PARENT flag, which means the 2049 * parent has to be returned if ire is NULL. 2050 * The aim of this is to have (at least) a starting 2051 * ire when we want to look at all of the ires in a 2052 * bucket aimed at a single destination (as is the 2053 * case in ip_newroute_v6 for the RTF_MULTIRT 2054 * flagged routes). 2055 */ 2056 if (flags & MATCH_IRE_PARENT) { 2057 if (pire != NULL) { 2058 /* 2059 * Need an extra REFHOLD, if the 2060 * parent ire is returned via both 2061 * ire and pire. 2062 */ 2063 IRE_REFHOLD(saved_ire); 2064 } 2065 ire = saved_ire; 2066 } else { 2067 ire_refrele(saved_ire); 2068 if (pire != NULL) 2069 *pire = NULL; 2070 } 2071 return (ire); 2072 } else if (pire == NULL) { 2073 /* 2074 * If the caller did not ask for pire, release 2075 * it now. 2076 */ 2077 ire_refrele(saved_ire); 2078 } 2079 return (ire); 2080 } 2081 2082 ASSERT(pire == NULL || *pire == NULL); 2083 return (ire); 2084 } 2085 2086 /* 2087 * Delete the IRE cache for the gateway and all IRE caches whose 2088 * ire_gateway_addr_v6 points to this gateway, and allow them to 2089 * be created on demand by ip_newroute_v6. 2090 */ 2091 void 2092 ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid, 2093 ip_stack_t *ipst) 2094 { 2095 irb_t *irb; 2096 ire_t *ire; 2097 2098 irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 2099 ipst->ips_ip6_cache_table_size)]; 2100 IRB_REFHOLD(irb); 2101 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 2102 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2103 continue; 2104 2105 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 2106 if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, 0, 2107 IRE_CACHE, NULL, zoneid, 0, NULL, MATCH_IRE_TYPE)) { 2108 ire_delete(ire); 2109 } 2110 } 2111 IRB_REFRELE(irb); 2112 2113 ire_walk_v6(ire_delete_cache_gw_v6, (char *)addr, zoneid, ipst); 2114 } 2115 2116 /* 2117 * Looks up cache table for a route. 2118 * specific lookup can be indicated by 2119 * passing the MATCH_* flags and the 2120 * necessary parameters. 2121 */ 2122 ire_t * 2123 ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway, 2124 int type, const ipif_t *ipif, zoneid_t zoneid, const ts_label_t *tsl, 2125 int flags, ip_stack_t *ipst) 2126 { 2127 ire_t *ire; 2128 irb_t *irb_ptr; 2129 ASSERT(addr != NULL); 2130 ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 2131 2132 /* 2133 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 2134 * MATCH_IRE_ILL is set. 2135 */ 2136 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 2137 (ipif == NULL)) 2138 return (NULL); 2139 2140 irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 2141 ipst->ips_ip6_cache_table_size)]; 2142 rw_enter(&irb_ptr->irb_lock, RW_READER); 2143 for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 2144 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2145 continue; 2146 2147 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 2148 if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, gateway, 2149 type, ipif, zoneid, 0, tsl, flags)) { 2150 IRE_REFHOLD(ire); 2151 rw_exit(&irb_ptr->irb_lock); 2152 return (ire); 2153 } 2154 } 2155 rw_exit(&irb_ptr->irb_lock); 2156 return (NULL); 2157 } 2158 2159 /* 2160 * Lookup cache. Don't return IRE_MARK_HIDDEN entries. Callers 2161 * should use ire_ctable_lookup with MATCH_IRE_MARK_HIDDEN to get 2162 * to the hidden ones. 2163 * 2164 * In general the zoneid has to match (where ALL_ZONES match all of them). 2165 * But for IRE_LOCAL we also need to handle the case where L2 should 2166 * conceptually loop back the packet. This is necessary since neither 2167 * Ethernet drivers nor Ethernet hardware loops back packets sent to their 2168 * own MAC address. This loopback is needed when the normal 2169 * routes (ignoring IREs with different zoneids) would send out the packet on 2170 * the same ill (or ill group) as the ill with which this IRE_LOCAL is 2171 * associated. 2172 * 2173 * Earlier versions of this code always matched an IRE_LOCAL independently of 2174 * the zoneid. We preserve that earlier behavior when 2175 * ip_restrict_interzone_loopback is turned off. 2176 */ 2177 ire_t * 2178 ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid, 2179 const ts_label_t *tsl, ip_stack_t *ipst) 2180 { 2181 irb_t *irb_ptr; 2182 ire_t *ire; 2183 2184 irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 2185 ipst->ips_ip6_cache_table_size)]; 2186 rw_enter(&irb_ptr->irb_lock, RW_READER); 2187 for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 2188 if (ire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 2189 continue; 2190 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, addr)) { 2191 /* 2192 * Finally, check if the security policy has any 2193 * restriction on using this route for the specified 2194 * message. 2195 */ 2196 if (tsl != NULL && 2197 ire->ire_gw_secattr != NULL && 2198 tsol_ire_match_gwattr(ire, tsl) != 0) { 2199 continue; 2200 } 2201 2202 if (zoneid == ALL_ZONES || ire->ire_zoneid == zoneid || 2203 ire->ire_zoneid == ALL_ZONES) { 2204 IRE_REFHOLD(ire); 2205 rw_exit(&irb_ptr->irb_lock); 2206 return (ire); 2207 } 2208 2209 if (ire->ire_type == IRE_LOCAL) { 2210 if (ipst->ips_ip_restrict_interzone_loopback && 2211 !ire_local_ok_across_zones(ire, zoneid, 2212 (void *)addr, tsl, ipst)) 2213 continue; 2214 2215 IRE_REFHOLD(ire); 2216 rw_exit(&irb_ptr->irb_lock); 2217 return (ire); 2218 } 2219 } 2220 } 2221 rw_exit(&irb_ptr->irb_lock); 2222 return (NULL); 2223 } 2224 2225 /* 2226 * Locate the interface ire that is tied to the cache ire 'cire' via 2227 * cire->ire_ihandle. 2228 * 2229 * We are trying to create the cache ire for an onlink destn. or 2230 * gateway in 'cire'. We are called from ire_add_v6() in the IRE_IF_RESOLVER 2231 * case for xresolv interfaces, after the ire has come back from 2232 * an external resolver. 2233 */ 2234 static ire_t * 2235 ire_ihandle_lookup_onlink_v6(ire_t *cire) 2236 { 2237 ire_t *ire; 2238 int match_flags; 2239 int i; 2240 int j; 2241 irb_t *irb_ptr; 2242 ip_stack_t *ipst = cire->ire_ipst; 2243 2244 ASSERT(cire != NULL); 2245 2246 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2247 /* 2248 * We know that the mask of the interface ire equals cire->ire_cmask. 2249 * (When ip_newroute_v6() created 'cire' for an on-link destn. 2250 * it set its cmask from the interface ire's mask) 2251 */ 2252 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 2253 NULL, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle, 2254 NULL, match_flags, ipst); 2255 if (ire != NULL) 2256 return (ire); 2257 /* 2258 * If we didn't find an interface ire above, we can't declare failure. 2259 * For backwards compatibility, we need to support prefix routes 2260 * pointing to next hop gateways that are not on-link. 2261 * 2262 * In the resolver/noresolver case, ip_newroute_v6() thinks 2263 * it is creating the cache ire for an onlink destination in 'cire'. 2264 * But 'cire' is not actually onlink, because ire_ftable_lookup_v6() 2265 * cheated it, by doing ire_route_lookup_v6() twice and returning an 2266 * interface ire. 2267 * 2268 * Eg. default - gw1 (line 1) 2269 * gw1 - gw2 (line 2) 2270 * gw2 - hme0 (line 3) 2271 * 2272 * In the above example, ip_newroute_v6() tried to create the cache ire 2273 * 'cire' for gw1, based on the interface route in line 3. The 2274 * ire_ftable_lookup_v6() above fails, because there is 2275 * no interface route to reach gw1. (it is gw2). We fall thru below. 2276 * 2277 * Do a brute force search based on the ihandle in a subset of the 2278 * forwarding tables, corresponding to cire->ire_cmask_v6. Otherwise 2279 * things become very complex, since we don't have 'pire' in this 2280 * case. (Also note that this method is not possible in the offlink 2281 * case because we don't know the mask) 2282 */ 2283 i = ip_mask_to_plen_v6(&cire->ire_cmask_v6); 2284 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 2285 return (NULL); 2286 for (j = 0; j < ipst->ips_ip6_ftable_hash_size; j++) { 2287 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][j]; 2288 rw_enter(&irb_ptr->irb_lock, RW_READER); 2289 for (ire = irb_ptr->irb_ire; ire != NULL; 2290 ire = ire->ire_next) { 2291 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2292 continue; 2293 if ((ire->ire_type & IRE_INTERFACE) && 2294 (ire->ire_ihandle == cire->ire_ihandle)) { 2295 IRE_REFHOLD(ire); 2296 rw_exit(&irb_ptr->irb_lock); 2297 return (ire); 2298 } 2299 } 2300 rw_exit(&irb_ptr->irb_lock); 2301 } 2302 return (NULL); 2303 } 2304 2305 2306 /* 2307 * Locate the interface ire that is tied to the cache ire 'cire' via 2308 * cire->ire_ihandle. 2309 * 2310 * We are trying to create the cache ire for an offlink destn based 2311 * on the cache ire of the gateway in 'cire'. 'pire' is the prefix ire 2312 * as found by ip_newroute_v6(). We are called from ip_newroute_v6() in 2313 * the IRE_CACHE case. 2314 */ 2315 ire_t * 2316 ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire) 2317 { 2318 ire_t *ire; 2319 int match_flags; 2320 in6_addr_t gw_addr; 2321 ipif_t *gw_ipif; 2322 ip_stack_t *ipst = cire->ire_ipst; 2323 2324 ASSERT(cire != NULL && pire != NULL); 2325 2326 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2327 /* 2328 * ip_newroute_v6 calls ire_ftable_lookup with MATCH_IRE_ILL only 2329 * for on-link hosts. We should never be here for onlink. 2330 * Thus, use MATCH_IRE_ILL_GROUP. 2331 */ 2332 if (pire->ire_ipif != NULL) 2333 match_flags |= MATCH_IRE_ILL_GROUP; 2334 /* 2335 * We know that the mask of the interface ire equals cire->ire_cmask. 2336 * (When ip_newroute_v6() created 'cire' for an on-link destn. it set 2337 * its cmask from the interface ire's mask) 2338 */ 2339 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 0, 2340 IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2341 NULL, match_flags, ipst); 2342 if (ire != NULL) 2343 return (ire); 2344 /* 2345 * If we didn't find an interface ire above, we can't declare failure. 2346 * For backwards compatibility, we need to support prefix routes 2347 * pointing to next hop gateways that are not on-link. 2348 * 2349 * Assume we are trying to ping some offlink destn, and we have the 2350 * routing table below. 2351 * 2352 * Eg. default - gw1 <--- pire (line 1) 2353 * gw1 - gw2 (line 2) 2354 * gw2 - hme0 (line 3) 2355 * 2356 * If we already have a cache ire for gw1 in 'cire', the 2357 * ire_ftable_lookup_v6 above would have failed, since there is no 2358 * interface ire to reach gw1. We will fallthru below. 2359 * 2360 * Here we duplicate the steps that ire_ftable_lookup_v6() did in 2361 * getting 'cire' from 'pire', in the MATCH_IRE_RECURSIVE case. 2362 * The differences are the following 2363 * i. We want the interface ire only, so we call 2364 * ire_ftable_lookup_v6() instead of ire_route_lookup_v6() 2365 * ii. We look for only prefix routes in the 1st call below. 2366 * ii. We want to match on the ihandle in the 2nd call below. 2367 */ 2368 match_flags = MATCH_IRE_TYPE; 2369 if (pire->ire_ipif != NULL) 2370 match_flags |= MATCH_IRE_ILL_GROUP; 2371 2372 mutex_enter(&pire->ire_lock); 2373 gw_addr = pire->ire_gateway_addr_v6; 2374 mutex_exit(&pire->ire_lock); 2375 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_OFFSUBNET, 2376 pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst); 2377 if (ire == NULL) 2378 return (NULL); 2379 /* 2380 * At this point 'ire' corresponds to the entry shown in line 2. 2381 * gw_addr is 'gw2' in the example above. 2382 */ 2383 mutex_enter(&ire->ire_lock); 2384 gw_addr = ire->ire_gateway_addr_v6; 2385 mutex_exit(&ire->ire_lock); 2386 gw_ipif = ire->ire_ipif; 2387 ire_refrele(ire); 2388 2389 match_flags |= MATCH_IRE_IHANDLE; 2390 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_INTERFACE, 2391 gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2392 NULL, match_flags, ipst); 2393 return (ire); 2394 } 2395 2396 /* 2397 * Return the IRE_LOOPBACK, IRE_IF_RESOLVER or IRE_IF_NORESOLVER 2398 * ire associated with the specified ipif. 2399 * 2400 * This might occasionally be called when IPIF_UP is not set since 2401 * the IPV6_MULTICAST_IF as well as creating interface routes 2402 * allows specifying a down ipif (ipif_lookup* match ipifs that are down). 2403 * 2404 * Note that if IPIF_NOLOCAL, IPIF_NOXMIT, or IPIF_DEPRECATED is set on 2405 * the ipif this routine might return NULL. 2406 * (Sometimes called as writer though not required by this function.) 2407 */ 2408 ire_t * 2409 ipif_to_ire_v6(const ipif_t *ipif) 2410 { 2411 ire_t *ire; 2412 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2413 2414 ASSERT(ipif->ipif_isv6); 2415 if (ipif->ipif_ire_type == IRE_LOOPBACK) { 2416 ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, NULL, 2417 IRE_LOOPBACK, ipif, ALL_ZONES, NULL, 2418 (MATCH_IRE_TYPE | MATCH_IRE_IPIF), ipst); 2419 } else if (ipif->ipif_flags & IPIF_POINTOPOINT) { 2420 /* In this case we need to lookup destination address. */ 2421 ire = ire_ftable_lookup_v6(&ipif->ipif_v6pp_dst_addr, 2422 &ipv6_all_ones, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 2423 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 2424 MATCH_IRE_MASK), ipst); 2425 } else { 2426 ire = ire_ftable_lookup_v6(&ipif->ipif_v6subnet, 2427 &ipif->ipif_v6net_mask, NULL, IRE_INTERFACE, ipif, NULL, 2428 ALL_ZONES, 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 2429 MATCH_IRE_MASK), ipst); 2430 } 2431 return (ire); 2432 } 2433 2434 /* 2435 * Return B_TRUE if a multirt route is resolvable 2436 * (or if no route is resolved yet), B_FALSE otherwise. 2437 * This only works in the global zone. 2438 */ 2439 boolean_t 2440 ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl, 2441 ip_stack_t *ipst) 2442 { 2443 ire_t *first_fire; 2444 ire_t *first_cire; 2445 ire_t *fire; 2446 ire_t *cire; 2447 irb_t *firb; 2448 irb_t *cirb; 2449 int unres_cnt = 0; 2450 boolean_t resolvable = B_FALSE; 2451 2452 /* Retrieve the first IRE_HOST that matches the destination */ 2453 first_fire = ire_ftable_lookup_v6(v6dstp, &ipv6_all_ones, 0, IRE_HOST, 2454 NULL, NULL, ALL_ZONES, 0, tsl, MATCH_IRE_MASK | MATCH_IRE_TYPE | 2455 MATCH_IRE_SECATTR, ipst); 2456 2457 /* No route at all */ 2458 if (first_fire == NULL) { 2459 return (B_TRUE); 2460 } 2461 2462 firb = first_fire->ire_bucket; 2463 ASSERT(firb); 2464 2465 /* Retrieve the first IRE_CACHE ire for that destination. */ 2466 first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID, tsl, ipst); 2467 2468 /* No resolved route. */ 2469 if (first_cire == NULL) { 2470 ire_refrele(first_fire); 2471 return (B_TRUE); 2472 } 2473 2474 /* At least one route is resolved. */ 2475 2476 cirb = first_cire->ire_bucket; 2477 ASSERT(cirb); 2478 2479 /* Count the number of routes to that dest that are declared. */ 2480 IRB_REFHOLD(firb); 2481 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2482 if (!(fire->ire_flags & RTF_MULTIRT)) 2483 continue; 2484 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, v6dstp)) 2485 continue; 2486 unres_cnt++; 2487 } 2488 IRB_REFRELE(firb); 2489 2490 2491 /* Then subtract the number of routes to that dst that are resolved */ 2492 IRB_REFHOLD(cirb); 2493 for (cire = first_cire; cire != NULL; cire = cire->ire_next) { 2494 if (!(cire->ire_flags & RTF_MULTIRT)) 2495 continue; 2496 if (!IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, v6dstp)) 2497 continue; 2498 if (cire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 2499 continue; 2500 unres_cnt--; 2501 } 2502 IRB_REFRELE(cirb); 2503 2504 /* At least one route is unresolved; search for a resolvable route. */ 2505 if (unres_cnt > 0) 2506 resolvable = ire_multirt_lookup_v6(&first_cire, &first_fire, 2507 MULTIRT_USESTAMP|MULTIRT_CACHEGW, tsl, ipst); 2508 2509 if (first_fire) 2510 ire_refrele(first_fire); 2511 2512 if (first_cire) 2513 ire_refrele(first_cire); 2514 2515 return (resolvable); 2516 } 2517 2518 2519 /* 2520 * Return B_TRUE and update *ire_arg and *fire_arg 2521 * if at least one resolvable route is found. 2522 * Return B_FALSE otherwise (all routes are resolved or 2523 * the remaining unresolved routes are all unresolvable). 2524 * This only works in the global zone. 2525 */ 2526 boolean_t 2527 ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, 2528 const ts_label_t *tsl, ip_stack_t *ipst) 2529 { 2530 clock_t delta; 2531 ire_t *best_fire = NULL; 2532 ire_t *best_cire = NULL; 2533 ire_t *first_fire; 2534 ire_t *first_cire; 2535 ire_t *fire; 2536 ire_t *cire; 2537 irb_t *firb = NULL; 2538 irb_t *cirb = NULL; 2539 ire_t *gw_ire; 2540 boolean_t already_resolved; 2541 boolean_t res; 2542 in6_addr_t v6dst; 2543 in6_addr_t v6gw; 2544 2545 ip2dbg(("ire_multirt_lookup_v6: *ire_arg %p, *fire_arg %p, " 2546 "flags %04x\n", (void *)*ire_arg, (void *)*fire_arg, flags)); 2547 2548 ASSERT(ire_arg); 2549 ASSERT(fire_arg); 2550 2551 /* Not an IRE_HOST ire; give up. */ 2552 if ((*fire_arg == NULL) || 2553 ((*fire_arg)->ire_type != IRE_HOST)) { 2554 return (B_FALSE); 2555 } 2556 2557 /* This is the first IRE_HOST ire for that destination. */ 2558 first_fire = *fire_arg; 2559 firb = first_fire->ire_bucket; 2560 ASSERT(firb); 2561 2562 mutex_enter(&first_fire->ire_lock); 2563 v6dst = first_fire->ire_addr_v6; 2564 mutex_exit(&first_fire->ire_lock); 2565 2566 ip2dbg(("ire_multirt_lookup_v6: dst %08x\n", 2567 ntohl(V4_PART_OF_V6(v6dst)))); 2568 2569 /* 2570 * Retrieve the first IRE_CACHE ire for that destination; 2571 * if we don't find one, no route for that dest is 2572 * resolved yet. 2573 */ 2574 first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID, tsl, ipst); 2575 if (first_cire) { 2576 cirb = first_cire->ire_bucket; 2577 } 2578 2579 ip2dbg(("ire_multirt_lookup_v6: first_cire %p\n", (void *)first_cire)); 2580 2581 /* 2582 * Search for a resolvable route, giving the top priority 2583 * to routes that can be resolved without any call to the resolver. 2584 */ 2585 IRB_REFHOLD(firb); 2586 2587 if (!IN6_IS_ADDR_MULTICAST(&v6dst)) { 2588 /* 2589 * For all multiroute IRE_HOST ires for that destination, 2590 * check if the route via the IRE_HOST's gateway is 2591 * resolved yet. 2592 */ 2593 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2594 2595 if (!(fire->ire_flags & RTF_MULTIRT)) 2596 continue; 2597 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2598 continue; 2599 2600 if (fire->ire_gw_secattr != NULL && 2601 tsol_ire_match_gwattr(fire, tsl) != 0) { 2602 continue; 2603 } 2604 2605 mutex_enter(&fire->ire_lock); 2606 v6gw = fire->ire_gateway_addr_v6; 2607 mutex_exit(&fire->ire_lock); 2608 2609 ip2dbg(("ire_multirt_lookup_v6: fire %p, " 2610 "ire_addr %08x, ire_gateway_addr %08x\n", 2611 (void *)fire, 2612 ntohl(V4_PART_OF_V6(fire->ire_addr_v6)), 2613 ntohl(V4_PART_OF_V6(v6gw)))); 2614 2615 already_resolved = B_FALSE; 2616 2617 if (first_cire) { 2618 ASSERT(cirb); 2619 2620 IRB_REFHOLD(cirb); 2621 /* 2622 * For all IRE_CACHE ires for that 2623 * destination. 2624 */ 2625 for (cire = first_cire; 2626 cire != NULL; 2627 cire = cire->ire_next) { 2628 2629 if (!(cire->ire_flags & RTF_MULTIRT)) 2630 continue; 2631 if (!IN6_ARE_ADDR_EQUAL( 2632 &cire->ire_addr_v6, &v6dst)) 2633 continue; 2634 if (cire->ire_marks & 2635 (IRE_MARK_CONDEMNED| 2636 IRE_MARK_HIDDEN)) 2637 continue; 2638 2639 if (cire->ire_gw_secattr != NULL && 2640 tsol_ire_match_gwattr(cire, 2641 tsl) != 0) { 2642 continue; 2643 } 2644 2645 /* 2646 * Check if the IRE_CACHE's gateway 2647 * matches the IRE_HOST's gateway. 2648 */ 2649 if (IN6_ARE_ADDR_EQUAL( 2650 &cire->ire_gateway_addr_v6, 2651 &v6gw)) { 2652 already_resolved = B_TRUE; 2653 break; 2654 } 2655 } 2656 IRB_REFRELE(cirb); 2657 } 2658 2659 /* 2660 * This route is already resolved; 2661 * proceed with next one. 2662 */ 2663 if (already_resolved) { 2664 ip2dbg(("ire_multirt_lookup_v6: found cire %p, " 2665 "already resolved\n", (void *)cire)); 2666 continue; 2667 } 2668 2669 /* 2670 * The route is unresolved; is it actually 2671 * resolvable, i.e. is there a cache or a resolver 2672 * for the gateway? 2673 */ 2674 gw_ire = ire_route_lookup_v6(&v6gw, 0, 0, 0, NULL, NULL, 2675 ALL_ZONES, tsl, MATCH_IRE_RECURSIVE | 2676 MATCH_IRE_SECATTR, ipst); 2677 2678 ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n", 2679 (void *)gw_ire)); 2680 2681 /* 2682 * This route can be resolved without any call to the 2683 * resolver; if the MULTIRT_CACHEGW flag is set, 2684 * give the top priority to this ire and exit the 2685 * loop. 2686 * This occurs when an resolver reply is processed 2687 * through ip_wput_nondata() 2688 */ 2689 if ((flags & MULTIRT_CACHEGW) && 2690 (gw_ire != NULL) && 2691 (gw_ire->ire_type & IRE_CACHETABLE)) { 2692 /* 2693 * Release the resolver associated to the 2694 * previous candidate best ire, if any. 2695 */ 2696 if (best_cire) { 2697 ire_refrele(best_cire); 2698 ASSERT(best_fire); 2699 } 2700 2701 best_fire = fire; 2702 best_cire = gw_ire; 2703 2704 ip2dbg(("ire_multirt_lookup_v6: found top prio " 2705 "best_fire %p, best_cire %p\n", 2706 (void *)best_fire, (void *)best_cire)); 2707 break; 2708 } 2709 2710 /* 2711 * Compute the time elapsed since our preceding 2712 * attempt to resolve that route. 2713 * If the MULTIRT_USESTAMP flag is set, we take that 2714 * route into account only if this time interval 2715 * exceeds ip_multirt_resolution_interval; 2716 * this prevents us from attempting to resolve a 2717 * broken route upon each sending of a packet. 2718 */ 2719 delta = lbolt - fire->ire_last_used_time; 2720 delta = TICK_TO_MSEC(delta); 2721 2722 res = (boolean_t) 2723 ((delta > ipst-> 2724 ips_ip_multirt_resolution_interval) || 2725 (!(flags & MULTIRT_USESTAMP))); 2726 2727 ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, " 2728 "res %d\n", 2729 (void *)fire, delta, res)); 2730 2731 if (res) { 2732 /* 2733 * A resolver exists for the gateway: save 2734 * the current IRE_HOST ire as a candidate 2735 * best ire. If we later discover that a 2736 * top priority ire exists (i.e. no need to 2737 * call the resolver), then this new ire 2738 * will be preferred to the current one. 2739 */ 2740 if (gw_ire != NULL) { 2741 if (best_fire == NULL) { 2742 ASSERT(best_cire == NULL); 2743 2744 best_fire = fire; 2745 best_cire = gw_ire; 2746 2747 ip2dbg(("ire_multirt_lookup_v6:" 2748 "found candidate " 2749 "best_fire %p, " 2750 "best_cire %p\n", 2751 (void *)best_fire, 2752 (void *)best_cire)); 2753 2754 /* 2755 * If MULTIRT_CACHEGW is not 2756 * set, we ignore the top 2757 * priority ires that can 2758 * be resolved without any 2759 * call to the resolver; 2760 * In that case, there is 2761 * actually no need 2762 * to continue the loop. 2763 */ 2764 if (!(flags & 2765 MULTIRT_CACHEGW)) { 2766 break; 2767 } 2768 continue; 2769 } 2770 } else { 2771 /* 2772 * No resolver for the gateway: the 2773 * route is not resolvable. 2774 * If the MULTIRT_SETSTAMP flag is 2775 * set, we stamp the IRE_HOST ire, 2776 * so we will not select it again 2777 * during this resolution interval. 2778 */ 2779 if (flags & MULTIRT_SETSTAMP) 2780 fire->ire_last_used_time = 2781 lbolt; 2782 } 2783 } 2784 2785 if (gw_ire != NULL) 2786 ire_refrele(gw_ire); 2787 } 2788 } else { /* IN6_IS_ADDR_MULTICAST(&v6dst) */ 2789 2790 for (fire = first_fire; 2791 fire != NULL; 2792 fire = fire->ire_next) { 2793 2794 if (!(fire->ire_flags & RTF_MULTIRT)) 2795 continue; 2796 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2797 continue; 2798 2799 if (fire->ire_gw_secattr != NULL && 2800 tsol_ire_match_gwattr(fire, tsl) != 0) { 2801 continue; 2802 } 2803 2804 already_resolved = B_FALSE; 2805 2806 mutex_enter(&fire->ire_lock); 2807 v6gw = fire->ire_gateway_addr_v6; 2808 mutex_exit(&fire->ire_lock); 2809 2810 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2811 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, tsl, 2812 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE | 2813 MATCH_IRE_SECATTR, ipst); 2814 2815 /* No resolver for the gateway; we skip this ire. */ 2816 if (gw_ire == NULL) { 2817 continue; 2818 } 2819 2820 if (first_cire) { 2821 2822 IRB_REFHOLD(cirb); 2823 /* 2824 * For all IRE_CACHE ires for that 2825 * destination. 2826 */ 2827 for (cire = first_cire; 2828 cire != NULL; 2829 cire = cire->ire_next) { 2830 2831 if (!(cire->ire_flags & RTF_MULTIRT)) 2832 continue; 2833 if (!IN6_ARE_ADDR_EQUAL( 2834 &cire->ire_addr_v6, &v6dst)) 2835 continue; 2836 if (cire->ire_marks & 2837 (IRE_MARK_CONDEMNED| 2838 IRE_MARK_HIDDEN)) 2839 continue; 2840 2841 if (cire->ire_gw_secattr != NULL && 2842 tsol_ire_match_gwattr(cire, 2843 tsl) != 0) { 2844 continue; 2845 } 2846 2847 /* 2848 * Cache entries are linked to the 2849 * parent routes using the parent handle 2850 * (ire_phandle). If no cache entry has 2851 * the same handle as fire, fire is 2852 * still unresolved. 2853 */ 2854 ASSERT(cire->ire_phandle != 0); 2855 if (cire->ire_phandle == 2856 fire->ire_phandle) { 2857 already_resolved = B_TRUE; 2858 break; 2859 } 2860 } 2861 IRB_REFRELE(cirb); 2862 } 2863 2864 /* 2865 * This route is already resolved; proceed with 2866 * next one. 2867 */ 2868 if (already_resolved) { 2869 ire_refrele(gw_ire); 2870 continue; 2871 } 2872 2873 /* 2874 * Compute the time elapsed since our preceding 2875 * attempt to resolve that route. 2876 * If the MULTIRT_USESTAMP flag is set, we take 2877 * that route into account only if this time 2878 * interval exceeds ip_multirt_resolution_interval; 2879 * this prevents us from attempting to resolve a 2880 * broken route upon each sending of a packet. 2881 */ 2882 delta = lbolt - fire->ire_last_used_time; 2883 delta = TICK_TO_MSEC(delta); 2884 2885 res = (boolean_t) 2886 ((delta > ipst-> 2887 ips_ip_multirt_resolution_interval) || 2888 (!(flags & MULTIRT_USESTAMP))); 2889 2890 ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, " 2891 "flags %04x, res %d\n", 2892 (void *)fire, delta, flags, res)); 2893 2894 if (res) { 2895 if (best_cire) { 2896 /* 2897 * Release the resolver associated 2898 * to the preceding candidate best 2899 * ire, if any. 2900 */ 2901 ire_refrele(best_cire); 2902 ASSERT(best_fire); 2903 } 2904 best_fire = fire; 2905 best_cire = gw_ire; 2906 continue; 2907 } 2908 2909 ire_refrele(gw_ire); 2910 } 2911 } 2912 2913 if (best_fire) { 2914 IRE_REFHOLD(best_fire); 2915 } 2916 IRB_REFRELE(firb); 2917 2918 /* Release the first IRE_CACHE we initially looked up, if any. */ 2919 if (first_cire) 2920 ire_refrele(first_cire); 2921 2922 /* Found a resolvable route. */ 2923 if (best_fire) { 2924 ASSERT(best_cire); 2925 2926 if (*fire_arg) 2927 ire_refrele(*fire_arg); 2928 if (*ire_arg) 2929 ire_refrele(*ire_arg); 2930 2931 /* 2932 * Update the passed arguments with the 2933 * resolvable multirt route we found 2934 */ 2935 *fire_arg = best_fire; 2936 *ire_arg = best_cire; 2937 2938 ip2dbg(("ire_multirt_lookup_v6: returning B_TRUE, " 2939 "*fire_arg %p, *ire_arg %p\n", 2940 (void *)best_fire, (void *)best_cire)); 2941 2942 return (B_TRUE); 2943 } 2944 2945 ASSERT(best_cire == NULL); 2946 2947 ip2dbg(("ire_multirt_lookup_v6: returning B_FALSE, *fire_arg %p, " 2948 "*ire_arg %p\n", 2949 (void *)*fire_arg, (void *)*ire_arg)); 2950 2951 /* No resolvable route. */ 2952 return (B_FALSE); 2953 } 2954 2955 2956 /* 2957 * Find an IRE_OFFSUBNET IRE entry for the multicast address 'v6dstp' 2958 * that goes through 'ipif'. As a fallback, a route that goes through 2959 * ipif->ipif_ill can be returned. 2960 */ 2961 ire_t * 2962 ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp) 2963 { 2964 ire_t *ire; 2965 ire_t *save_ire = NULL; 2966 ire_t *gw_ire; 2967 irb_t *irb; 2968 in6_addr_t v6gw; 2969 int match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL; 2970 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2971 2972 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, NULL, ALL_ZONES, 0, 2973 NULL, MATCH_IRE_DEFAULT, ipst); 2974 2975 if (ire == NULL) 2976 return (NULL); 2977 2978 irb = ire->ire_bucket; 2979 ASSERT(irb); 2980 2981 IRB_REFHOLD(irb); 2982 ire_refrele(ire); 2983 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 2984 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) || 2985 (ipif->ipif_zoneid != ire->ire_zoneid && 2986 ire->ire_zoneid != ALL_ZONES)) { 2987 continue; 2988 } 2989 2990 switch (ire->ire_type) { 2991 case IRE_DEFAULT: 2992 case IRE_PREFIX: 2993 case IRE_HOST: 2994 mutex_enter(&ire->ire_lock); 2995 v6gw = ire->ire_gateway_addr_v6; 2996 mutex_exit(&ire->ire_lock); 2997 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2998 IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, 2999 NULL, match_flags, ipst); 3000 3001 if (gw_ire != NULL) { 3002 if (save_ire != NULL) { 3003 ire_refrele(save_ire); 3004 } 3005 IRE_REFHOLD(ire); 3006 if (gw_ire->ire_ipif == ipif) { 3007 ire_refrele(gw_ire); 3008 3009 IRB_REFRELE(irb); 3010 return (ire); 3011 } 3012 ire_refrele(gw_ire); 3013 save_ire = ire; 3014 } 3015 break; 3016 case IRE_IF_NORESOLVER: 3017 case IRE_IF_RESOLVER: 3018 if (ire->ire_ipif == ipif) { 3019 if (save_ire != NULL) { 3020 ire_refrele(save_ire); 3021 } 3022 IRE_REFHOLD(ire); 3023 3024 IRB_REFRELE(irb); 3025 return (ire); 3026 } 3027 break; 3028 } 3029 } 3030 IRB_REFRELE(irb); 3031 3032 return (save_ire); 3033 } 3034