1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 /* 32 * This file contains routines that manipulate Internet Routing Entries (IREs). 33 */ 34 #include <sys/types.h> 35 #include <sys/stream.h> 36 #include <sys/stropts.h> 37 #include <sys/ddi.h> 38 #include <sys/cmn_err.h> 39 40 #include <sys/systm.h> 41 #include <sys/param.h> 42 #include <sys/socket.h> 43 #include <net/if.h> 44 #include <net/route.h> 45 #include <netinet/in.h> 46 #include <net/if_dl.h> 47 #include <netinet/ip6.h> 48 #include <netinet/icmp6.h> 49 50 #include <inet/common.h> 51 #include <inet/mi.h> 52 #include <inet/ip.h> 53 #include <inet/ip6.h> 54 #include <inet/ip_ndp.h> 55 #include <inet/ip_if.h> 56 #include <inet/ip_ire.h> 57 #include <inet/ipclassifier.h> 58 #include <inet/nd.h> 59 #include <sys/kmem.h> 60 #include <sys/zone.h> 61 62 #include <sys/tsol/label.h> 63 #include <sys/tsol/tnet.h> 64 65 static ire_t ire_null; 66 67 static ire_t *ire_ihandle_lookup_onlink_v6(ire_t *cire); 68 static void ire_report_ftable_v6(ire_t *ire, char *mp); 69 static void ire_report_ctable_v6(ire_t *ire, char *mp); 70 static boolean_t ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, 71 const in6_addr_t *mask, const in6_addr_t *gateway, int type, 72 const ipif_t *ipif, zoneid_t zoneid, uint32_t ihandle, 73 const ts_label_t *tsl, int match_flags); 74 75 /* 76 * Named Dispatch routine to produce a formatted report on all IREs. 77 * This report is accessed by using the ndd utility to "get" ND variable 78 * "ip_ire_status_v6". 79 */ 80 /* ARGSUSED */ 81 int 82 ip_ire_report_v6(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 83 { 84 zoneid_t zoneid; 85 ip_stack_t *ipst; 86 87 (void) mi_mpprintf(mp, 88 "IRE " MI_COL_HDRPAD_STR 89 "rfq " MI_COL_HDRPAD_STR 90 "stq " MI_COL_HDRPAD_STR 91 " zone mxfrg rtt rtt_sd ssthresh ref " 92 "rtomax tstamp_ok wscale_ok ecn_ok pmtud_ok sack sendpipe recvpipe " 93 "in/out/forward type addr mask " 94 "src gateway"); 95 /* 96 * 01234567 01234567 01234567 12345 12345 12345 12345 12345678 123 97 * 123456 123456789 123456789 123456 12345678 1234 12345678 12345678 98 * in/out/forward xxxxxxxxxx 99 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 100 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 101 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 102 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 103 */ 104 105 /* 106 * Because of the ndd constraint, at most we can have 64K buffer 107 * to put in all IRE info. So to be more efficient, just 108 * allocate a 64K buffer here, assuming we need that large buffer. 109 * This should be OK as only root can do ndd /dev/ip. 110 */ 111 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 112 /* The following may work even if we cannot get a large buf. */ 113 (void) mi_mpprintf(mp, "<< Out of buffer >>\n"); 114 return (0); 115 } 116 zoneid = Q_TO_CONN(q)->conn_zoneid; 117 if (zoneid == GLOBAL_ZONEID) 118 zoneid = ALL_ZONES; 119 ipst = CONNQ_TO_IPST(q); 120 121 ire_walk_v6(ire_report_ftable_v6, (char *)mp->b_cont, zoneid, ipst); 122 ire_walk_v6(ire_report_ctable_v6, (char *)mp->b_cont, zoneid, ipst); 123 return (0); 124 } 125 126 /* 127 * ire_walk routine invoked for ip_ire_report_v6 for each IRE. 128 */ 129 static void 130 ire_report_ftable_v6(ire_t *ire, char *mp) 131 { 132 char buf1[INET6_ADDRSTRLEN]; 133 char buf2[INET6_ADDRSTRLEN]; 134 char buf3[INET6_ADDRSTRLEN]; 135 char buf4[INET6_ADDRSTRLEN]; 136 uint_t fo_pkt_count; 137 uint_t ib_pkt_count; 138 int ref; 139 in6_addr_t gw_addr_v6; 140 uint_t print_len, buf_len; 141 142 ASSERT(ire->ire_ipversion == IPV6_VERSION); 143 if (ire->ire_type & IRE_CACHETABLE) 144 return; 145 buf_len = ((mblk_t *)mp)->b_datap->db_lim - ((mblk_t *)mp)->b_wptr; 146 if (buf_len <= 0) 147 return; 148 149 /* Number of active references of this ire */ 150 ref = ire->ire_refcnt; 151 /* "inbound" to a non local address is a forward */ 152 ib_pkt_count = ire->ire_ib_pkt_count; 153 fo_pkt_count = 0; 154 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 155 if (!(ire->ire_type & (IRE_LOCAL|IRE_BROADCAST))) { 156 fo_pkt_count = ib_pkt_count; 157 ib_pkt_count = 0; 158 } 159 160 mutex_enter(&ire->ire_lock); 161 gw_addr_v6 = ire->ire_gateway_addr_v6; 162 mutex_exit(&ire->ire_lock); 163 164 print_len = snprintf((char *)((mblk_t *)mp)->b_wptr, buf_len, 165 MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR "%5d " 166 "%05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d " 167 "%04d %08d %08d %d/%d/%d %s\n\t%s\n\t%s\n\t%s\n\t%s\n", 168 (void *)ire, (void *)ire->ire_rfq, (void *)ire->ire_stq, 169 (int)ire->ire_zoneid, 170 ire->ire_max_frag, ire->ire_uinfo.iulp_rtt, 171 ire->ire_uinfo.iulp_rtt_sd, 172 ire->ire_uinfo.iulp_ssthresh, ref, 173 ire->ire_uinfo.iulp_rtomax, 174 (ire->ire_uinfo.iulp_tstamp_ok ? 1: 0), 175 (ire->ire_uinfo.iulp_wscale_ok ? 1: 0), 176 (ire->ire_uinfo.iulp_ecn_ok ? 1: 0), 177 (ire->ire_uinfo.iulp_pmtud_ok ? 1: 0), 178 ire->ire_uinfo.iulp_sack, 179 ire->ire_uinfo.iulp_spipe, ire->ire_uinfo.iulp_rpipe, 180 ib_pkt_count, ire->ire_ob_pkt_count, fo_pkt_count, 181 ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type), 182 inet_ntop(AF_INET6, &ire->ire_addr_v6, buf1, sizeof (buf1)), 183 inet_ntop(AF_INET6, &ire->ire_mask_v6, buf2, sizeof (buf2)), 184 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, buf3, sizeof (buf3)), 185 inet_ntop(AF_INET6, &gw_addr_v6, buf4, sizeof (buf4))); 186 if (print_len < buf_len) { 187 ((mblk_t *)mp)->b_wptr += print_len; 188 } else { 189 ((mblk_t *)mp)->b_wptr += buf_len; 190 } 191 } 192 193 /* ire_walk routine invoked for ip_ire_report_v6 for each IRE. */ 194 static void 195 ire_report_ctable_v6(ire_t *ire, char *mp) 196 { 197 char buf1[INET6_ADDRSTRLEN]; 198 char buf2[INET6_ADDRSTRLEN]; 199 char buf3[INET6_ADDRSTRLEN]; 200 char buf4[INET6_ADDRSTRLEN]; 201 uint_t fo_pkt_count; 202 uint_t ib_pkt_count; 203 int ref; 204 in6_addr_t gw_addr_v6; 205 uint_t print_len, buf_len; 206 207 if ((ire->ire_type & IRE_CACHETABLE) == 0) 208 return; 209 buf_len = ((mblk_t *)mp)->b_datap->db_lim - ((mblk_t *)mp)->b_wptr; 210 if (buf_len <= 0) 211 return; 212 213 /* Number of active references of this ire */ 214 ref = ire->ire_refcnt; 215 /* "inbound" to a non local address is a forward */ 216 ib_pkt_count = ire->ire_ib_pkt_count; 217 fo_pkt_count = 0; 218 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 219 if (ire->ire_type & IRE_LOCAL) { 220 fo_pkt_count = ib_pkt_count; 221 ib_pkt_count = 0; 222 } 223 224 mutex_enter(&ire->ire_lock); 225 gw_addr_v6 = ire->ire_gateway_addr_v6; 226 mutex_exit(&ire->ire_lock); 227 228 print_len = snprintf((char *)((mblk_t *)mp)->b_wptr, buf_len, 229 MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR "%5d " 230 "%05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d " 231 "%04d %08d %08d %d/%d/%d %s\n\t%s\n\t%s\n\t%s\n\t%s\n", 232 (void *)ire, (void *)ire->ire_rfq, (void *)ire->ire_stq, 233 (int)ire->ire_zoneid, 234 ire->ire_max_frag, ire->ire_uinfo.iulp_rtt, 235 ire->ire_uinfo.iulp_rtt_sd, ire->ire_uinfo.iulp_ssthresh, ref, 236 ire->ire_uinfo.iulp_rtomax, 237 (ire->ire_uinfo.iulp_tstamp_ok ? 1: 0), 238 (ire->ire_uinfo.iulp_wscale_ok ? 1: 0), 239 (ire->ire_uinfo.iulp_ecn_ok ? 1: 0), 240 (ire->ire_uinfo.iulp_pmtud_ok ? 1: 0), 241 ire->ire_uinfo.iulp_sack, 242 ire->ire_uinfo.iulp_spipe, ire->ire_uinfo.iulp_rpipe, 243 ib_pkt_count, ire->ire_ob_pkt_count, 244 fo_pkt_count, ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type), 245 inet_ntop(AF_INET6, &ire->ire_addr_v6, buf1, sizeof (buf1)), 246 inet_ntop(AF_INET6, &ire->ire_mask_v6, buf2, sizeof (buf2)), 247 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, buf3, sizeof (buf3)), 248 inet_ntop(AF_INET6, &gw_addr_v6, buf4, sizeof (buf4))); 249 if (print_len < buf_len) { 250 ((mblk_t *)mp)->b_wptr += print_len; 251 } else { 252 ((mblk_t *)mp)->b_wptr += buf_len; 253 } 254 } 255 256 257 /* 258 * Initialize the ire that is specific to IPv6 part and call 259 * ire_init_common to finish it. 260 */ 261 ire_t * 262 ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, 263 const in6_addr_t *v6mask, const in6_addr_t *v6src_addr, 264 const in6_addr_t *v6gateway, uint_t *max_fragp, 265 mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, 266 mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask, 267 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 268 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 269 { 270 271 /* 272 * Reject IRE security attribute creation/initialization 273 * if system is not running in Trusted mode. 274 */ 275 if ((gc != NULL || gcgrp != NULL) && !is_system_labeled()) 276 return (NULL); 277 278 if (fp_mp != NULL) { 279 /* 280 * We can't dupb() here as multiple threads could be 281 * calling dupb on the same mp which is incorrect. 282 * First dupb() should be called only by one thread. 283 */ 284 fp_mp = copyb(fp_mp); 285 if (fp_mp == NULL) 286 return (NULL); 287 } 288 289 if (dlureq_mp != NULL) { 290 /* 291 * We can't dupb() here as multiple threads could be 292 * calling dupb on the same mp which is incorrect. 293 * First dupb() should be called only by one thread. 294 */ 295 dlureq_mp = copyb(dlureq_mp); 296 if (dlureq_mp == NULL) { 297 if (fp_mp != NULL) 298 freeb(fp_mp); 299 return (NULL); 300 } 301 } 302 303 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_alloced); 304 ire->ire_addr_v6 = *v6addr; 305 306 if (v6src_addr != NULL) 307 ire->ire_src_addr_v6 = *v6src_addr; 308 if (v6mask != NULL) { 309 ire->ire_mask_v6 = *v6mask; 310 ire->ire_masklen = ip_mask_to_plen_v6(&ire->ire_mask_v6); 311 } 312 if (v6gateway != NULL) 313 ire->ire_gateway_addr_v6 = *v6gateway; 314 315 if (type == IRE_CACHE && v6cmask != NULL) 316 ire->ire_cmask_v6 = *v6cmask; 317 318 /* 319 * Multirouted packets need to have a fragment header added so that 320 * the receiver is able to discard duplicates according to their 321 * fragment identifier. 322 */ 323 if (type == IRE_CACHE && (flags & RTF_MULTIRT)) { 324 ire->ire_frag_flag = IPH_FRAG_HDR; 325 } 326 327 /* ire_init_common will free the mblks upon encountering any failure */ 328 if (!ire_init_common(ire, max_fragp, fp_mp, rfq, stq, type, dlureq_mp, 329 ipif, NULL, phandle, ihandle, flags, IPV6_VERSION, ulp_info, 330 gc, gcgrp, ipst)) 331 return (NULL); 332 333 return (ire); 334 } 335 336 /* 337 * Similar to ire_create_v6 except that it is called only when 338 * we want to allocate ire as an mblk e.g. we have a external 339 * resolver. Do we need this in IPv6 ? 340 */ 341 ire_t * 342 ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 343 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 344 mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, 345 mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask, 346 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 347 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 348 { 349 ire_t *ire; 350 ire_t *ret_ire; 351 mblk_t *mp; 352 353 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 354 355 /* Allocate the new IRE. */ 356 mp = allocb(sizeof (ire_t), BPRI_MED); 357 if (mp == NULL) { 358 ip1dbg(("ire_create_mp_v6: alloc failed\n")); 359 return (NULL); 360 } 361 362 ire = (ire_t *)mp->b_rptr; 363 mp->b_wptr = (uchar_t *)&ire[1]; 364 365 /* Start clean. */ 366 *ire = ire_null; 367 ire->ire_mp = mp; 368 mp->b_datap->db_type = IRE_DB_TYPE; 369 370 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 371 NULL, fp_mp, rfq, stq, type, dlureq_mp, ipif, v6cmask, phandle, 372 ihandle, flags, ulp_info, gc, gcgrp, ipst); 373 374 if (ret_ire == NULL) { 375 freeb(ire->ire_mp); 376 return (NULL); 377 } 378 return (ire); 379 } 380 381 /* 382 * ire_create_v6 is called to allocate and initialize a new IRE. 383 * 384 * NOTE : This is called as writer sometimes though not required 385 * by this function. 386 */ 387 ire_t * 388 ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 389 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 390 uint_t *max_fragp, mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, 391 mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask, 392 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, 393 tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst) 394 { 395 ire_t *ire; 396 ire_t *ret_ire; 397 398 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 399 400 ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP); 401 if (ire == NULL) { 402 ip1dbg(("ire_create_v6: alloc failed\n")); 403 return (NULL); 404 } 405 *ire = ire_null; 406 407 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 408 max_fragp, fp_mp, rfq, stq, type, dlureq_mp, ipif, v6cmask, phandle, 409 ihandle, flags, ulp_info, gc, gcgrp, ipst); 410 411 if (ret_ire == NULL) { 412 kmem_cache_free(ire_cache, ire); 413 return (NULL); 414 } 415 ASSERT(ret_ire == ire); 416 return (ire); 417 } 418 419 /* 420 * Find an IRE_INTERFACE for the multicast group. 421 * Allows different routes for multicast addresses 422 * in the unicast routing table (akin to FF::0/8 but could be more specific) 423 * which point at different interfaces. This is used when IPV6_MULTICAST_IF 424 * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't 425 * specify the interface to join on. 426 * 427 * Supports link-local addresses by following the ipif/ill when recursing. 428 */ 429 ire_t * 430 ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst) 431 { 432 ire_t *ire; 433 ipif_t *ipif = NULL; 434 int match_flags = MATCH_IRE_TYPE; 435 in6_addr_t gw_addr_v6; 436 437 ire = ire_ftable_lookup_v6(group, 0, 0, 0, NULL, NULL, 438 zoneid, 0, NULL, MATCH_IRE_DEFAULT, ipst); 439 440 /* We search a resolvable ire in case of multirouting. */ 441 if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) { 442 ire_t *cire = NULL; 443 /* 444 * If the route is not resolvable, the looked up ire 445 * may be changed here. In that case, ire_multirt_lookup() 446 * IRE_REFRELE the original ire and change it. 447 */ 448 (void) ire_multirt_lookup_v6(&cire, &ire, MULTIRT_CACHEGW, 449 NULL, ipst); 450 if (cire != NULL) 451 ire_refrele(cire); 452 } 453 if (ire == NULL) 454 return (NULL); 455 /* 456 * Make sure we follow ire_ipif. 457 * 458 * We need to determine the interface route through 459 * which the gateway will be reached. We don't really 460 * care which interface is picked if the interface is 461 * part of a group. 462 */ 463 if (ire->ire_ipif != NULL) { 464 ipif = ire->ire_ipif; 465 match_flags |= MATCH_IRE_ILL_GROUP; 466 } 467 468 switch (ire->ire_type) { 469 case IRE_DEFAULT: 470 case IRE_PREFIX: 471 case IRE_HOST: 472 mutex_enter(&ire->ire_lock); 473 gw_addr_v6 = ire->ire_gateway_addr_v6; 474 mutex_exit(&ire->ire_lock); 475 ire_refrele(ire); 476 ire = ire_ftable_lookup_v6(&gw_addr_v6, 0, 0, 477 IRE_INTERFACE, ipif, NULL, zoneid, 0, 478 NULL, match_flags, ipst); 479 return (ire); 480 case IRE_IF_NORESOLVER: 481 case IRE_IF_RESOLVER: 482 return (ire); 483 default: 484 ire_refrele(ire); 485 return (NULL); 486 } 487 } 488 489 /* 490 * Return any local address. We use this to target ourselves 491 * when the src address was specified as 'default'. 492 * Preference for IRE_LOCAL entries. 493 */ 494 ire_t * 495 ire_lookup_local_v6(zoneid_t zoneid, ip_stack_t *ipst) 496 { 497 ire_t *ire; 498 irb_t *irb; 499 ire_t *maybe = NULL; 500 int i; 501 502 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 503 irb = &ipst->ips_ip_cache_table_v6[i]; 504 if (irb->irb_ire == NULL) 505 continue; 506 rw_enter(&irb->irb_lock, RW_READER); 507 for (ire = irb->irb_ire; ire; ire = ire->ire_next) { 508 if ((ire->ire_marks & IRE_MARK_CONDEMNED) || 509 ire->ire_zoneid != zoneid && 510 ire->ire_zoneid != ALL_ZONES) 511 continue; 512 switch (ire->ire_type) { 513 case IRE_LOOPBACK: 514 if (maybe == NULL) { 515 IRE_REFHOLD(ire); 516 maybe = ire; 517 } 518 break; 519 case IRE_LOCAL: 520 if (maybe != NULL) { 521 ire_refrele(maybe); 522 } 523 IRE_REFHOLD(ire); 524 rw_exit(&irb->irb_lock); 525 return (ire); 526 } 527 } 528 rw_exit(&irb->irb_lock); 529 } 530 return (maybe); 531 } 532 533 /* 534 * This function takes a mask and returns number of bits set in the 535 * mask (the represented prefix length). Assumes a contiguous mask. 536 */ 537 int 538 ip_mask_to_plen_v6(const in6_addr_t *v6mask) 539 { 540 int bits; 541 int plen = IPV6_ABITS; 542 int i; 543 544 for (i = 3; i >= 0; i--) { 545 if (v6mask->s6_addr32[i] == 0) { 546 plen -= 32; 547 continue; 548 } 549 bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1; 550 if (bits == 0) 551 break; 552 plen -= bits; 553 } 554 555 return (plen); 556 } 557 558 /* 559 * Convert a prefix length to the mask for that prefix. 560 * Returns the argument bitmask. 561 */ 562 in6_addr_t * 563 ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask) 564 { 565 uint32_t *ptr; 566 567 if (plen < 0 || plen > IPV6_ABITS) 568 return (NULL); 569 *bitmask = ipv6_all_zeros; 570 571 ptr = (uint32_t *)bitmask; 572 while (plen > 32) { 573 *ptr++ = 0xffffffffU; 574 plen -= 32; 575 } 576 *ptr = htonl(0xffffffffU << (32 - plen)); 577 return (bitmask); 578 } 579 580 /* 581 * Add a fully initialized IRE to an appropriate 582 * table based on ire_type. 583 * 584 * The forward table contains IRE_PREFIX/IRE_HOST/IRE_HOST and 585 * IRE_IF_RESOLVER/IRE_IF_NORESOLVER and IRE_DEFAULT. 586 * 587 * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK 588 * and IRE_CACHE. 589 * 590 * NOTE : This function is called as writer though not required 591 * by this function. 592 */ 593 int 594 ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) 595 { 596 ire_t *ire1; 597 int mask_table_index; 598 irb_t *irb_ptr; 599 ire_t **irep; 600 int flags; 601 ire_t *pire = NULL; 602 ill_t *stq_ill; 603 boolean_t ndp_g_lock_held = B_FALSE; 604 ire_t *ire = *ire_p; 605 int error; 606 ip_stack_t *ipst = ire->ire_ipst; 607 608 ASSERT(ire->ire_ipversion == IPV6_VERSION); 609 ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */ 610 ASSERT(ire->ire_nce == NULL); 611 612 /* Find the appropriate list head. */ 613 switch (ire->ire_type) { 614 case IRE_HOST: 615 ire->ire_mask_v6 = ipv6_all_ones; 616 ire->ire_masklen = IPV6_ABITS; 617 if ((ire->ire_flags & RTF_SETSRC) == 0) 618 ire->ire_src_addr_v6 = ipv6_all_zeros; 619 break; 620 case IRE_CACHE: 621 case IRE_LOCAL: 622 case IRE_LOOPBACK: 623 ire->ire_mask_v6 = ipv6_all_ones; 624 ire->ire_masklen = IPV6_ABITS; 625 break; 626 case IRE_PREFIX: 627 if ((ire->ire_flags & RTF_SETSRC) == 0) 628 ire->ire_src_addr_v6 = ipv6_all_zeros; 629 break; 630 case IRE_DEFAULT: 631 if ((ire->ire_flags & RTF_SETSRC) == 0) 632 ire->ire_src_addr_v6 = ipv6_all_zeros; 633 break; 634 case IRE_IF_RESOLVER: 635 case IRE_IF_NORESOLVER: 636 break; 637 default: 638 printf("ire_add_v6: ire %p has unrecognized IRE type (%d)\n", 639 (void *)ire, ire->ire_type); 640 ire_delete(ire); 641 *ire_p = NULL; 642 return (EINVAL); 643 } 644 645 /* Make sure the address is properly masked. */ 646 V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6); 647 648 if ((ire->ire_type & IRE_CACHETABLE) == 0) { 649 /* IRE goes into Forward Table */ 650 mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6); 651 if ((ipst->ips_ip_forwarding_table_v6[mask_table_index]) == 652 NULL) { 653 irb_t *ptr; 654 int i; 655 656 ptr = (irb_t *)mi_zalloc(( 657 ipst->ips_ip6_ftable_hash_size * sizeof (irb_t))); 658 if (ptr == NULL) { 659 ire_delete(ire); 660 *ire_p = NULL; 661 return (ENOMEM); 662 } 663 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) { 664 rw_init(&ptr[i].irb_lock, NULL, 665 RW_DEFAULT, NULL); 666 } 667 mutex_enter(&ipst->ips_ire_ft_init_lock); 668 if (ipst->ips_ip_forwarding_table_v6[ 669 mask_table_index] == NULL) { 670 ipst->ips_ip_forwarding_table_v6[ 671 mask_table_index] = ptr; 672 mutex_exit(&ipst->ips_ire_ft_init_lock); 673 } else { 674 /* 675 * Some other thread won the race in 676 * initializing the forwarding table at the 677 * same index. 678 */ 679 mutex_exit(&ipst->ips_ire_ft_init_lock); 680 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; 681 i++) { 682 rw_destroy(&ptr[i].irb_lock); 683 } 684 mi_free(ptr); 685 } 686 } 687 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[mask_table_index][ 688 IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6, 689 ipst->ips_ip6_ftable_hash_size)]); 690 } else { 691 irb_ptr = &(ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6( 692 ire->ire_addr_v6, ipst->ips_ip6_cache_table_size)]); 693 } 694 /* 695 * For xresolv interfaces (v6 interfaces with an external 696 * address resolver), ip_newroute_v6/ip_newroute_ipif_v6 697 * are unable to prevent the deletion of the interface route 698 * while adding an IRE_CACHE for an on-link destination 699 * in the IRE_IF_RESOLVER case, since the ire has to go to 700 * the external resolver and return. We can't do a REFHOLD on the 701 * associated interface ire for fear of the message being freed 702 * if the external resolver can't resolve the address. 703 * Here we look up the interface ire in the forwarding table 704 * and make sure that the interface route has not been deleted. 705 */ 706 if (ire->ire_type == IRE_CACHE && 707 IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6) && 708 (((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) && 709 (((ill_t *)ire->ire_stq->q_ptr)->ill_flags & ILLF_XRESOLV)) { 710 711 pire = ire_ihandle_lookup_onlink_v6(ire); 712 if (pire == NULL) { 713 ire_delete(ire); 714 *ire_p = NULL; 715 return (EINVAL); 716 } 717 /* Prevent pire from getting deleted */ 718 IRB_REFHOLD(pire->ire_bucket); 719 /* Has it been removed already? */ 720 if (pire->ire_marks & IRE_MARK_CONDEMNED) { 721 IRB_REFRELE(pire->ire_bucket); 722 ire_refrele(pire); 723 ire_delete(ire); 724 *ire_p = NULL; 725 return (EINVAL); 726 } 727 } 728 729 flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW); 730 /* 731 * For IRE_CACHES, MATCH_IRE_IPIF is not enough to check 732 * for duplicates because : 733 * 734 * 1) ire_ipif->ipif_ill and ire_stq->q_ptr could be 735 * pointing at different ills. A real duplicate is 736 * a match on both ire_ipif and ire_stq. 737 * 738 * 2) We could have multiple packets trying to create 739 * an IRE_CACHE for the same ill. 740 * 741 * Moreover, IPIF_NOFAILOVER and IPV6_BOUND_PIF endpoints wants 742 * to go out on a particular ill. Rather than looking at the 743 * packet, we depend on the above for MATCH_IRE_ILL here. 744 * 745 * Unlike IPv4, MATCH_IRE_IPIF is needed here as we could have 746 * multiple IRE_CACHES for an ill for the same destination 747 * with various scoped addresses i.e represented by ipifs. 748 * 749 * MATCH_IRE_ILL is done implicitly below for IRE_CACHES. 750 */ 751 if (ire->ire_ipif != NULL) 752 flags |= MATCH_IRE_IPIF; 753 /* 754 * If we are creating hidden ires, make sure we search on 755 * this ill (MATCH_IRE_ILL) and a hidden ire, while we are 756 * searching for duplicates below. Otherwise we could 757 * potentially find an IRE on some other interface 758 * and it may not be a IRE marked with IRE_MARK_HIDDEN. We 759 * shouldn't do this as this will lead to an infinite loop as 760 * eventually we need an hidden ire for this packet to go 761 * out. MATCH_IRE_ILL is already marked above. 762 */ 763 if (ire->ire_marks & IRE_MARK_HIDDEN) { 764 ASSERT(ire->ire_type == IRE_CACHE); 765 flags |= MATCH_IRE_MARK_HIDDEN; 766 } 767 768 /* 769 * Start the atomic add of the ire. Grab the ill locks, 770 * ill_g_usesrc_lock and the bucket lock. Check for condemned. 771 * To avoid lock order problems, get the ndp6.ndp_g_lock now itself. 772 */ 773 if (ire->ire_type == IRE_CACHE) { 774 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 775 ndp_g_lock_held = B_TRUE; 776 } 777 778 /* 779 * If ipif or ill is changing ire_atomic_start() may queue the 780 * request and return EINPROGRESS. 781 */ 782 783 error = ire_atomic_start(irb_ptr, ire, q, mp, func); 784 if (error != 0) { 785 if (ndp_g_lock_held) 786 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 787 /* 788 * We don't know whether it is a valid ipif or not. 789 * So, set it to NULL. This assumes that the ire has not added 790 * a reference to the ipif. 791 */ 792 ire->ire_ipif = NULL; 793 ire_delete(ire); 794 if (pire != NULL) { 795 IRB_REFRELE(pire->ire_bucket); 796 ire_refrele(pire); 797 } 798 *ire_p = NULL; 799 return (error); 800 } 801 /* 802 * To avoid creating ires having stale values for the ire_max_frag 803 * we get the latest value atomically here. For more details 804 * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE 805 * in ip_rput_dlpi_writer 806 */ 807 if (ire->ire_max_fragp == NULL) { 808 if (IN6_IS_ADDR_MULTICAST(&ire->ire_addr_v6)) 809 ire->ire_max_frag = ire->ire_ipif->ipif_mtu; 810 else 811 ire->ire_max_frag = pire->ire_max_frag; 812 } else { 813 uint_t max_frag; 814 815 max_frag = *ire->ire_max_fragp; 816 ire->ire_max_fragp = NULL; 817 ire->ire_max_frag = max_frag; 818 } 819 820 /* 821 * Atomically check for duplicate and insert in the table. 822 */ 823 for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 824 if (ire1->ire_marks & IRE_MARK_CONDEMNED) 825 continue; 826 827 if (ire->ire_type == IRE_CACHE) { 828 /* 829 * We do MATCH_IRE_ILL implicitly here for IRE_CACHES. 830 * As ire_ipif and ire_stq could point to two 831 * different ills, we can't pass just ire_ipif to 832 * ire_match_args and get a match on both ills. 833 * This is just needed for duplicate checks here and 834 * so we don't add an extra argument to 835 * ire_match_args for this. Do it locally. 836 * 837 * NOTE : Currently there is no part of the code 838 * that asks for both MATH_IRE_IPIF and MATCH_IRE_ILL 839 * match for IRE_CACHEs. Thus we don't want to 840 * extend the arguments to ire_match_args_v6. 841 */ 842 if (ire1->ire_stq != ire->ire_stq) 843 continue; 844 /* 845 * Multiroute IRE_CACHEs for a given destination can 846 * have the same ire_ipif, typically if their source 847 * address is forced using RTF_SETSRC, and the same 848 * send-to queue. We differentiate them using the parent 849 * handle. 850 */ 851 if ((ire1->ire_flags & RTF_MULTIRT) && 852 (ire->ire_flags & RTF_MULTIRT) && 853 (ire1->ire_phandle != ire->ire_phandle)) 854 continue; 855 } 856 if (ire1->ire_zoneid != ire->ire_zoneid) 857 continue; 858 if (ire_match_args_v6(ire1, &ire->ire_addr_v6, 859 &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, 860 ire->ire_type, ire->ire_ipif, ire->ire_zoneid, 0, NULL, 861 flags)) { 862 /* 863 * Return the old ire after doing a REFHOLD. 864 * As most of the callers continue to use the IRE 865 * after adding, we return a held ire. This will 866 * avoid a lookup in the caller again. If the callers 867 * don't want to use it, they need to do a REFRELE. 868 */ 869 ip1dbg(("found dup ire existing %p new %p", 870 (void *)ire1, (void *)ire)); 871 IRE_REFHOLD(ire1); 872 if (ndp_g_lock_held) 873 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 874 ire_atomic_end(irb_ptr, ire); 875 ire_delete(ire); 876 if (pire != NULL) { 877 /* 878 * Assert that it is 879 * not yet removed from the list. 880 */ 881 ASSERT(pire->ire_ptpn != NULL); 882 IRB_REFRELE(pire->ire_bucket); 883 ire_refrele(pire); 884 } 885 *ire_p = ire1; 886 return (0); 887 } 888 } 889 if (ire->ire_type == IRE_CACHE) { 890 in6_addr_t gw_addr_v6; 891 ill_t *ill = ire_to_ill(ire); 892 char buf[INET6_ADDRSTRLEN]; 893 nce_t *nce; 894 895 /* 896 * All IRE_CACHE types must have a nce. If this is 897 * not the case the entry will not be added. We need 898 * to make sure that if somebody deletes the nce 899 * after we looked up, they will find this ire and 900 * delete the ire. To delete this ire one needs the 901 * bucket lock which we are still holding here. So, 902 * even if the nce gets deleted after we looked up, 903 * this ire will get deleted. 904 * 905 * NOTE : Don't need the ire_lock for accessing 906 * ire_gateway_addr_v6 as it is appearing first 907 * time on the list and rts_setgwr_v6 could not 908 * be changing this. 909 */ 910 gw_addr_v6 = ire->ire_gateway_addr_v6; 911 if (IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 912 nce = ndp_lookup_v6(ill, &ire->ire_addr_v6, B_TRUE); 913 } else { 914 nce = ndp_lookup_v6(ill, &gw_addr_v6, B_TRUE); 915 } 916 if (nce == NULL) 917 goto failed; 918 919 /* Pair of refhold, refrele just to get the tracing right */ 920 NCE_REFHOLD_TO_REFHOLD_NOTR(nce); 921 /* 922 * Atomically make sure that new IREs don't point 923 * to an NCE that is logically deleted (CONDEMNED). 924 * ndp_delete() first marks the NCE CONDEMNED. 925 * This ensures that the nce_refcnt won't increase 926 * due to new nce_lookups or due to addition of new IREs 927 * pointing to this NCE. Then ndp_delete() cleans up 928 * existing references. If we don't do it atomically here, 929 * ndp_delete() -> nce_ire_delete() will not be able to 930 * clean up the IRE list completely, and the nce_refcnt 931 * won't go down to zero. 932 */ 933 mutex_enter(&nce->nce_lock); 934 if (ill->ill_flags & ILLF_XRESOLV) { 935 /* 936 * If we used an external resolver, we may not 937 * have gone through neighbor discovery to get here. 938 * Must update the nce_state before the next check. 939 */ 940 if (nce->nce_state == ND_INCOMPLETE) 941 nce->nce_state = ND_REACHABLE; 942 } 943 if (nce->nce_state == ND_INCOMPLETE || 944 (nce->nce_flags & NCE_F_CONDEMNED) || 945 (nce->nce_state == ND_UNREACHABLE)) { 946 failed: 947 if (ndp_g_lock_held) 948 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 949 if (nce != NULL) 950 mutex_exit(&nce->nce_lock); 951 ire_atomic_end(irb_ptr, ire); 952 ip1dbg(("ire_add_v6: No nce for dst %s \n", 953 inet_ntop(AF_INET6, &ire->ire_addr_v6, 954 buf, sizeof (buf)))); 955 ire_delete(ire); 956 if (pire != NULL) { 957 /* 958 * Assert that it is 959 * not yet removed from the list. 960 */ 961 ASSERT(pire->ire_ptpn != NULL); 962 IRB_REFRELE(pire->ire_bucket); 963 ire_refrele(pire); 964 } 965 if (nce != NULL) 966 NCE_REFRELE_NOTR(nce); 967 *ire_p = NULL; 968 return (EINVAL); 969 } else { 970 ire->ire_nce = nce; 971 } 972 mutex_exit(&nce->nce_lock); 973 } 974 /* 975 * Find the first entry that matches ire_addr - provides 976 * tail insertion. *irep will be null if no match. 977 */ 978 irep = (ire_t **)irb_ptr; 979 while ((ire1 = *irep) != NULL && 980 !IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6)) 981 irep = &ire1->ire_next; 982 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 983 984 if (*irep != NULL) { 985 /* 986 * Find the last ire which matches ire_addr_v6. 987 * Needed to do tail insertion among entries with the same 988 * ire_addr_v6. 989 */ 990 while (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 991 &ire1->ire_addr_v6)) { 992 irep = &ire1->ire_next; 993 ire1 = *irep; 994 if (ire1 == NULL) 995 break; 996 } 997 } 998 999 if (ire->ire_type == IRE_DEFAULT) { 1000 /* 1001 * We keep a count of default gateways which is used when 1002 * assigning them as routes. 1003 */ 1004 ipst->ips_ipv6_ire_default_count++; 1005 ASSERT(ipst->ips_ipv6_ire_default_count != 0); /* Wraparound */ 1006 } 1007 /* Insert at *irep */ 1008 ire1 = *irep; 1009 if (ire1 != NULL) 1010 ire1->ire_ptpn = &ire->ire_next; 1011 ire->ire_next = ire1; 1012 /* Link the new one in. */ 1013 ire->ire_ptpn = irep; 1014 /* 1015 * ire_walk routines de-reference ire_next without holding 1016 * a lock. Before we point to the new ire, we want to make 1017 * sure the store that sets the ire_next of the new ire 1018 * reaches global visibility, so that ire_walk routines 1019 * don't see a truncated list of ires i.e if the ire_next 1020 * of the new ire gets set after we do "*irep = ire" due 1021 * to re-ordering, the ire_walk thread will see a NULL 1022 * once it accesses the ire_next of the new ire. 1023 * membar_producer() makes sure that the following store 1024 * happens *after* all of the above stores. 1025 */ 1026 membar_producer(); 1027 *irep = ire; 1028 ire->ire_bucket = irb_ptr; 1029 /* 1030 * We return a bumped up IRE above. Keep it symmetrical 1031 * so that the callers will always have to release. This 1032 * helps the callers of this function because they continue 1033 * to use the IRE after adding and hence they don't have to 1034 * lookup again after we return the IRE. 1035 * 1036 * NOTE : We don't have to use atomics as this is appearing 1037 * in the list for the first time and no one else can bump 1038 * up the reference count on this yet. 1039 */ 1040 IRE_REFHOLD_LOCKED(ire); 1041 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_inserted); 1042 irb_ptr->irb_ire_cnt++; 1043 if (ire->ire_marks & IRE_MARK_TEMPORARY) 1044 irb_ptr->irb_tmp_ire_cnt++; 1045 1046 if (ire->ire_ipif != NULL) { 1047 ire->ire_ipif->ipif_ire_cnt++; 1048 if (ire->ire_stq != NULL) { 1049 stq_ill = (ill_t *)ire->ire_stq->q_ptr; 1050 stq_ill->ill_ire_cnt++; 1051 } 1052 } else { 1053 ASSERT(ire->ire_stq == NULL); 1054 } 1055 1056 if (ndp_g_lock_held) 1057 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1058 ire_atomic_end(irb_ptr, ire); 1059 1060 if (pire != NULL) { 1061 /* Assert that it is not removed from the list yet */ 1062 ASSERT(pire->ire_ptpn != NULL); 1063 IRB_REFRELE(pire->ire_bucket); 1064 ire_refrele(pire); 1065 } 1066 1067 if (ire->ire_type != IRE_CACHE) { 1068 /* 1069 * For ire's with with host mask see if there is an entry 1070 * in the cache. If there is one flush the whole cache as 1071 * there might be multiple entries due to RTF_MULTIRT (CGTP). 1072 * If no entry is found than there is no need to flush the 1073 * cache. 1074 */ 1075 1076 if (ip_mask_to_plen_v6(&ire->ire_mask_v6) == IPV6_ABITS) { 1077 ire_t *lire; 1078 lire = ire_ctable_lookup_v6(&ire->ire_addr_v6, NULL, 1079 IRE_CACHE, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, 1080 ipst); 1081 if (lire != NULL) { 1082 ire_refrele(lire); 1083 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 1084 } 1085 } else { 1086 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 1087 } 1088 } 1089 1090 *ire_p = ire; 1091 return (0); 1092 } 1093 1094 /* 1095 * Search for all HOST REDIRECT routes that are 1096 * pointing at the specified gateway and 1097 * delete them. This routine is called only 1098 * when a default gateway is going away. 1099 */ 1100 static void 1101 ire_delete_host_redirects_v6(const in6_addr_t *gateway, ip_stack_t *ipst) 1102 { 1103 irb_t *irb_ptr; 1104 irb_t *irb; 1105 ire_t *ire; 1106 in6_addr_t gw_addr_v6; 1107 int i; 1108 1109 /* get the hash table for HOST routes */ 1110 irb_ptr = ipst->ips_ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)]; 1111 if (irb_ptr == NULL) 1112 return; 1113 for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) { 1114 irb = &irb_ptr[i]; 1115 IRB_REFHOLD(irb); 1116 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 1117 if (!(ire->ire_flags & RTF_DYNAMIC)) 1118 continue; 1119 mutex_enter(&ire->ire_lock); 1120 gw_addr_v6 = ire->ire_gateway_addr_v6; 1121 mutex_exit(&ire->ire_lock); 1122 if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) 1123 ire_delete(ire); 1124 } 1125 IRB_REFRELE(irb); 1126 } 1127 } 1128 1129 /* 1130 * Delete all the cache entries with this 'addr'. This is the IPv6 counterpart 1131 * of ip_ire_clookup_and_delete. The difference being this function does not 1132 * return any value. IPv6 processing of a gratuitous ARP, as it stands, is 1133 * different than IPv4 in that, regardless of the presence of a cache entry 1134 * for this address, an ire_walk_v6 is done. Another difference is that unlike 1135 * in the case of IPv4 this does not take an ipif_t argument, since it is only 1136 * called by ip_arp_news and the match is always only on the address. 1137 */ 1138 void 1139 ip_ire_clookup_and_delete_v6(const in6_addr_t *addr, ip_stack_t *ipst) 1140 { 1141 irb_t *irb; 1142 ire_t *cire; 1143 boolean_t found = B_FALSE; 1144 1145 irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 1146 ipst->ips_ip6_cache_table_size)]; 1147 IRB_REFHOLD(irb); 1148 for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) { 1149 if (cire->ire_marks & IRE_MARK_CONDEMNED) 1150 continue; 1151 if (IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, addr)) { 1152 1153 /* This signifies start of a match */ 1154 if (!found) 1155 found = B_TRUE; 1156 if (cire->ire_type == IRE_CACHE) { 1157 if (cire->ire_nce != NULL) 1158 ndp_delete(cire->ire_nce); 1159 ire_delete_v6(cire); 1160 } 1161 /* End of the match */ 1162 } else if (found) 1163 break; 1164 } 1165 IRB_REFRELE(irb); 1166 } 1167 1168 /* 1169 * Delete the specified IRE. 1170 * All calls should use ire_delete(). 1171 * Sometimes called as writer though not required by this function. 1172 * 1173 * NOTE : This function is called only if the ire was added 1174 * in the list. 1175 */ 1176 void 1177 ire_delete_v6(ire_t *ire) 1178 { 1179 in6_addr_t gw_addr_v6; 1180 ip_stack_t *ipst = ire->ire_ipst; 1181 1182 ASSERT(ire->ire_refcnt >= 1); 1183 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1184 1185 if (ire->ire_type != IRE_CACHE) 1186 ire_flush_cache_v6(ire, IRE_FLUSH_DELETE); 1187 if (ire->ire_type == IRE_DEFAULT) { 1188 /* 1189 * when a default gateway is going away 1190 * delete all the host redirects pointing at that 1191 * gateway. 1192 */ 1193 mutex_enter(&ire->ire_lock); 1194 gw_addr_v6 = ire->ire_gateway_addr_v6; 1195 mutex_exit(&ire->ire_lock); 1196 ire_delete_host_redirects_v6(&gw_addr_v6, ipst); 1197 } 1198 } 1199 1200 /* 1201 * ire_walk routine to delete all IRE_CACHE and IRE_HOST type redirect 1202 * entries. 1203 */ 1204 /*ARGSUSED1*/ 1205 void 1206 ire_delete_cache_v6(ire_t *ire, char *arg) 1207 { 1208 char addrstr1[INET6_ADDRSTRLEN]; 1209 char addrstr2[INET6_ADDRSTRLEN]; 1210 1211 if ((ire->ire_type & IRE_CACHE) || 1212 (ire->ire_flags & RTF_DYNAMIC)) { 1213 ip1dbg(("ire_delete_cache_v6: deleted %s type %d through %s\n", 1214 inet_ntop(AF_INET6, &ire->ire_addr_v6, 1215 addrstr1, sizeof (addrstr1)), 1216 ire->ire_type, 1217 inet_ntop(AF_INET6, &ire->ire_gateway_addr_v6, 1218 addrstr2, sizeof (addrstr2)))); 1219 ire_delete(ire); 1220 } 1221 1222 } 1223 1224 /* 1225 * ire_walk routine to delete all IRE_CACHE/IRE_HOST type redirect entries 1226 * that have a given gateway address. 1227 */ 1228 void 1229 ire_delete_cache_gw_v6(ire_t *ire, char *addr) 1230 { 1231 in6_addr_t *gw_addr = (in6_addr_t *)addr; 1232 char buf1[INET6_ADDRSTRLEN]; 1233 char buf2[INET6_ADDRSTRLEN]; 1234 in6_addr_t ire_gw_addr_v6; 1235 1236 if (!(ire->ire_type & IRE_CACHE) && 1237 !(ire->ire_flags & RTF_DYNAMIC)) 1238 return; 1239 1240 mutex_enter(&ire->ire_lock); 1241 ire_gw_addr_v6 = ire->ire_gateway_addr_v6; 1242 mutex_exit(&ire->ire_lock); 1243 1244 if (IN6_ARE_ADDR_EQUAL(&ire_gw_addr_v6, gw_addr)) { 1245 ip1dbg(("ire_delete_cache_gw_v6: deleted %s type %d to %s\n", 1246 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, 1247 buf1, sizeof (buf1)), 1248 ire->ire_type, 1249 inet_ntop(AF_INET6, &ire_gw_addr_v6, 1250 buf2, sizeof (buf2)))); 1251 ire_delete(ire); 1252 } 1253 } 1254 1255 /* 1256 * Remove all IRE_CACHE entries that match 1257 * the ire specified. (Sometimes called 1258 * as writer though not required by this function.) 1259 * 1260 * The flag argument indicates if the 1261 * flush request is due to addition 1262 * of new route (IRE_FLUSH_ADD) or deletion of old 1263 * route (IRE_FLUSH_DELETE). 1264 * 1265 * This routine takes only the IREs from the forwarding 1266 * table and flushes the corresponding entries from 1267 * the cache table. 1268 * 1269 * When flushing due to the deletion of an old route, it 1270 * just checks the cache handles (ire_phandle and ire_ihandle) and 1271 * deletes the ones that match. 1272 * 1273 * When flushing due to the creation of a new route, it checks 1274 * if a cache entry's address matches the one in the IRE and 1275 * that the cache entry's parent has a less specific mask than the 1276 * one in IRE. The destination of such a cache entry could be the 1277 * gateway for other cache entries, so we need to flush those as 1278 * well by looking for gateway addresses matching the IRE's address. 1279 */ 1280 void 1281 ire_flush_cache_v6(ire_t *ire, int flag) 1282 { 1283 int i; 1284 ire_t *cire; 1285 irb_t *irb; 1286 ip_stack_t *ipst = ire->ire_ipst; 1287 1288 if (ire->ire_type & IRE_CACHE) 1289 return; 1290 1291 /* 1292 * If a default is just created, there is no point 1293 * in going through the cache, as there will not be any 1294 * cached ires. 1295 */ 1296 if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD) 1297 return; 1298 if (flag == IRE_FLUSH_ADD) { 1299 /* 1300 * This selective flush is 1301 * due to the addition of 1302 * new IRE. 1303 */ 1304 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 1305 irb = &ipst->ips_ip_cache_table_v6[i]; 1306 if ((cire = irb->irb_ire) == NULL) 1307 continue; 1308 IRB_REFHOLD(irb); 1309 for (cire = irb->irb_ire; cire != NULL; 1310 cire = cire->ire_next) { 1311 if (cire->ire_type != IRE_CACHE) 1312 continue; 1313 /* 1314 * If 'cire' belongs to the same subnet 1315 * as the new ire being added, and 'cire' 1316 * is derived from a prefix that is less 1317 * specific than the new ire being added, 1318 * we need to flush 'cire'; for instance, 1319 * when a new interface comes up. 1320 */ 1321 if ((V6_MASK_EQ_2(cire->ire_addr_v6, 1322 ire->ire_mask_v6, ire->ire_addr_v6) && 1323 (ip_mask_to_plen_v6(&cire->ire_cmask_v6) <= 1324 ire->ire_masklen))) { 1325 ire_delete(cire); 1326 continue; 1327 } 1328 /* 1329 * This is the case when the ire_gateway_addr 1330 * of 'cire' belongs to the same subnet as 1331 * the new ire being added. 1332 * Flushing such ires is sometimes required to 1333 * avoid misrouting: say we have a machine with 1334 * two interfaces (I1 and I2), a default router 1335 * R on the I1 subnet, and a host route to an 1336 * off-link destination D with a gateway G on 1337 * the I2 subnet. 1338 * Under normal operation, we will have an 1339 * on-link cache entry for G and an off-link 1340 * cache entry for D with G as ire_gateway_addr, 1341 * traffic to D will reach its destination 1342 * through gateway G. 1343 * If the administrator does 'ifconfig I2 down', 1344 * the cache entries for D and G will be 1345 * flushed. However, G will now be resolved as 1346 * an off-link destination using R (the default 1347 * router) as gateway. Then D will also be 1348 * resolved as an off-link destination using G 1349 * as gateway - this behavior is due to 1350 * compatibility reasons, see comment in 1351 * ire_ihandle_lookup_offlink(). Traffic to D 1352 * will go to the router R and probably won't 1353 * reach the destination. 1354 * The administrator then does 'ifconfig I2 up'. 1355 * Since G is on the I2 subnet, this routine 1356 * will flush its cache entry. It must also 1357 * flush the cache entry for D, otherwise 1358 * traffic will stay misrouted until the IRE 1359 * times out. 1360 */ 1361 if (V6_MASK_EQ_2(cire->ire_gateway_addr_v6, 1362 ire->ire_mask_v6, ire->ire_addr_v6)) { 1363 ire_delete(cire); 1364 continue; 1365 } 1366 } 1367 IRB_REFRELE(irb); 1368 } 1369 } else { 1370 /* 1371 * delete the cache entries based on 1372 * handle in the IRE as this IRE is 1373 * being deleted/changed. 1374 */ 1375 for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) { 1376 irb = &ipst->ips_ip_cache_table_v6[i]; 1377 if ((cire = irb->irb_ire) == NULL) 1378 continue; 1379 IRB_REFHOLD(irb); 1380 for (cire = irb->irb_ire; cire != NULL; 1381 cire = cire->ire_next) { 1382 if (cire->ire_type != IRE_CACHE) 1383 continue; 1384 if ((cire->ire_phandle == 0 || 1385 cire->ire_phandle != ire->ire_phandle) && 1386 (cire->ire_ihandle == 0 || 1387 cire->ire_ihandle != ire->ire_ihandle)) 1388 continue; 1389 ire_delete(cire); 1390 } 1391 IRB_REFRELE(irb); 1392 } 1393 } 1394 } 1395 1396 /* 1397 * Matches the arguments passed with the values in the ire. 1398 * 1399 * Note: for match types that match using "ipif" passed in, ipif 1400 * must be checked for non-NULL before calling this routine. 1401 */ 1402 static boolean_t 1403 ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask, 1404 const in6_addr_t *gateway, int type, const ipif_t *ipif, zoneid_t zoneid, 1405 uint32_t ihandle, const ts_label_t *tsl, int match_flags) 1406 { 1407 in6_addr_t masked_addr; 1408 in6_addr_t gw_addr_v6; 1409 ill_t *ire_ill = NULL, *dst_ill; 1410 ill_t *ipif_ill = NULL; 1411 ill_group_t *ire_ill_group = NULL; 1412 ill_group_t *ipif_ill_group = NULL; 1413 ipif_t *src_ipif; 1414 1415 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1416 ASSERT(addr != NULL); 1417 ASSERT(mask != NULL); 1418 ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL); 1419 ASSERT((!(match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP))) || 1420 (ipif != NULL && ipif->ipif_isv6)); 1421 ASSERT(!(match_flags & MATCH_IRE_WQ)); 1422 1423 /* 1424 * HIDDEN cache entries have to be looked up specifically with 1425 * MATCH_IRE_MARK_HIDDEN. MATCH_IRE_MARK_HIDDEN is usually set 1426 * when the interface is FAILED or INACTIVE. In that case, 1427 * any IRE_CACHES that exists should be marked with 1428 * IRE_MARK_HIDDEN. So, we don't really need to match below 1429 * for IRE_MARK_HIDDEN. But we do so for consistency. 1430 */ 1431 if (!(match_flags & MATCH_IRE_MARK_HIDDEN) && 1432 (ire->ire_marks & IRE_MARK_HIDDEN)) 1433 return (B_FALSE); 1434 1435 if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid && 1436 ire->ire_zoneid != ALL_ZONES) { 1437 /* 1438 * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is 1439 * valid and does not match that of ire_zoneid, a failure to 1440 * match is reported at this point. Otherwise, since some IREs 1441 * that are available in the global zone can be used in local 1442 * zones, additional checks need to be performed: 1443 * 1444 * IRE_CACHE and IRE_LOOPBACK entries should 1445 * never be matched in this situation. 1446 * 1447 * IRE entries that have an interface associated with them 1448 * should in general not match unless they are an IRE_LOCAL 1449 * or in the case when MATCH_IRE_DEFAULT has been set in 1450 * the caller. In the case of the former, checking of the 1451 * other fields supplied should take place. 1452 * 1453 * In the case where MATCH_IRE_DEFAULT has been set, 1454 * all of the ipif's associated with the IRE's ill are 1455 * checked to see if there is a matching zoneid. If any 1456 * one ipif has a matching zoneid, this IRE is a 1457 * potential candidate so checking of the other fields 1458 * takes place. 1459 * 1460 * In the case where the IRE_INTERFACE has a usable source 1461 * address (indicated by ill_usesrc_ifindex) in the 1462 * correct zone then it's permitted to return this IRE 1463 */ 1464 if (match_flags & MATCH_IRE_ZONEONLY) 1465 return (B_FALSE); 1466 if (ire->ire_type & (IRE_CACHE | IRE_LOOPBACK)) 1467 return (B_FALSE); 1468 /* 1469 * Note, IRE_INTERFACE can have the stq as NULL. For 1470 * example, if the default multicast route is tied to 1471 * the loopback address. 1472 */ 1473 if ((ire->ire_type & IRE_INTERFACE) && 1474 (ire->ire_stq != NULL)) { 1475 dst_ill = (ill_t *)ire->ire_stq->q_ptr; 1476 /* 1477 * If there is a usable source address in the 1478 * zone, then it's ok to return an 1479 * IRE_INTERFACE 1480 */ 1481 if ((dst_ill->ill_usesrc_ifindex != 0) && 1482 (src_ipif = ipif_select_source_v6(dst_ill, addr, 1483 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid)) 1484 != NULL) { 1485 ip3dbg(("ire_match_args: src_ipif %p" 1486 " dst_ill %p", (void *)src_ipif, 1487 (void *)dst_ill)); 1488 ipif_refrele(src_ipif); 1489 } else { 1490 ip3dbg(("ire_match_args: src_ipif NULL" 1491 " dst_ill %p\n", (void *)dst_ill)); 1492 return (B_FALSE); 1493 } 1494 } 1495 if (ire->ire_ipif != NULL && ire->ire_type != IRE_LOCAL && 1496 !(ire->ire_type & IRE_INTERFACE)) { 1497 ipif_t *tipif; 1498 1499 if ((match_flags & MATCH_IRE_DEFAULT) == 0) 1500 return (B_FALSE); 1501 mutex_enter(&ire->ire_ipif->ipif_ill->ill_lock); 1502 for (tipif = ire->ire_ipif->ipif_ill->ill_ipif; 1503 tipif != NULL; tipif = tipif->ipif_next) { 1504 if (IPIF_CAN_LOOKUP(tipif) && 1505 (tipif->ipif_flags & IPIF_UP) && 1506 (tipif->ipif_zoneid == zoneid || 1507 tipif->ipif_zoneid == ALL_ZONES)) 1508 break; 1509 } 1510 mutex_exit(&ire->ire_ipif->ipif_ill->ill_lock); 1511 if (tipif == NULL) 1512 return (B_FALSE); 1513 } 1514 } 1515 1516 if (match_flags & MATCH_IRE_GW) { 1517 mutex_enter(&ire->ire_lock); 1518 gw_addr_v6 = ire->ire_gateway_addr_v6; 1519 mutex_exit(&ire->ire_lock); 1520 } 1521 /* 1522 * For IRE_CACHES, MATCH_IRE_ILL/ILL_GROUP really means that 1523 * somebody wants to send out on a particular interface which 1524 * is given by ire_stq and hence use ire_stq to derive the ill 1525 * value. ire_ipif for IRE_CACHES is just the 1526 * means of getting a source address i.e ire_src_addr_v6 = 1527 * ire->ire_ipif->ipif_src_addr_v6. 1528 */ 1529 if (match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP)) { 1530 ire_ill = ire_to_ill(ire); 1531 if (ire_ill != NULL) 1532 ire_ill_group = ire_ill->ill_group; 1533 ipif_ill = ipif->ipif_ill; 1534 ipif_ill_group = ipif_ill->ill_group; 1535 } 1536 1537 /* No ire_addr_v6 bits set past the mask */ 1538 ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6, 1539 ire->ire_addr_v6)); 1540 V6_MASK_COPY(*addr, *mask, masked_addr); 1541 1542 if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) && 1543 ((!(match_flags & MATCH_IRE_GW)) || 1544 IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) && 1545 ((!(match_flags & MATCH_IRE_TYPE)) || 1546 (ire->ire_type & type)) && 1547 ((!(match_flags & MATCH_IRE_SRC)) || 1548 IN6_ARE_ADDR_EQUAL(&ire->ire_src_addr_v6, 1549 &ipif->ipif_v6src_addr)) && 1550 ((!(match_flags & MATCH_IRE_IPIF)) || 1551 (ire->ire_ipif == ipif)) && 1552 ((!(match_flags & MATCH_IRE_MARK_HIDDEN)) || 1553 (ire->ire_type != IRE_CACHE || 1554 ire->ire_marks & IRE_MARK_HIDDEN)) && 1555 ((!(match_flags & MATCH_IRE_ILL)) || 1556 (ire_ill == ipif_ill)) && 1557 ((!(match_flags & MATCH_IRE_IHANDLE)) || 1558 (ire->ire_ihandle == ihandle)) && 1559 ((!(match_flags & MATCH_IRE_ILL_GROUP)) || 1560 (ire_ill == ipif_ill) || 1561 (ire_ill_group != NULL && 1562 ire_ill_group == ipif_ill_group)) && 1563 ((!(match_flags & MATCH_IRE_SECATTR)) || 1564 (!is_system_labeled()) || 1565 (tsol_ire_match_gwattr(ire, tsl) == 0))) { 1566 /* We found the matched IRE */ 1567 return (B_TRUE); 1568 } 1569 return (B_FALSE); 1570 } 1571 1572 /* 1573 * Lookup for a route in all the tables 1574 */ 1575 ire_t * 1576 ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1577 const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 1578 zoneid_t zoneid, const ts_label_t *tsl, int flags, ip_stack_t *ipst) 1579 { 1580 ire_t *ire = NULL; 1581 1582 /* 1583 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1584 * MATCH_IRE_ILL is set. 1585 */ 1586 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1587 (ipif == NULL)) 1588 return (NULL); 1589 1590 /* 1591 * might be asking for a cache lookup, 1592 * This is not best way to lookup cache, 1593 * user should call ire_cache_lookup directly. 1594 * 1595 * If MATCH_IRE_TYPE was set, first lookup in the cache table and then 1596 * in the forwarding table, if the applicable type flags were set. 1597 */ 1598 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) { 1599 ire = ire_ctable_lookup_v6(addr, gateway, type, ipif, zoneid, 1600 tsl, flags, ipst); 1601 if (ire != NULL) 1602 return (ire); 1603 } 1604 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) { 1605 ire = ire_ftable_lookup_v6(addr, mask, gateway, type, ipif, 1606 pire, zoneid, 0, tsl, flags, ipst); 1607 } 1608 return (ire); 1609 } 1610 1611 /* 1612 * Lookup a route in forwarding table. 1613 * specific lookup is indicated by passing the 1614 * required parameters and indicating the 1615 * match required in flag field. 1616 * 1617 * Looking for default route can be done in three ways 1618 * 1) pass mask as ipv6_all_zeros and set MATCH_IRE_MASK in flags field 1619 * along with other matches. 1620 * 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags 1621 * field along with other matches. 1622 * 3) if the destination and mask are passed as zeros. 1623 * 1624 * A request to return a default route if no route 1625 * is found, can be specified by setting MATCH_IRE_DEFAULT 1626 * in flags. 1627 * 1628 * It does not support recursion more than one level. It 1629 * will do recursive lookup only when the lookup maps to 1630 * a prefix or default route and MATCH_IRE_RECURSIVE flag is passed. 1631 * 1632 * If the routing table is setup to allow more than one level 1633 * of recursion, the cleaning up cache table will not work resulting 1634 * in invalid routing. 1635 * 1636 * Supports link-local addresses by following the ipif/ill when recursing. 1637 * 1638 * NOTE : When this function returns NULL, pire has already been released. 1639 * pire is valid only when this function successfully returns an 1640 * ire. 1641 */ 1642 ire_t * 1643 ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1644 const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire, 1645 zoneid_t zoneid, uint32_t ihandle, const ts_label_t *tsl, int flags, 1646 ip_stack_t *ipst) 1647 { 1648 irb_t *irb_ptr; 1649 ire_t *rire; 1650 ire_t *ire = NULL; 1651 ire_t *saved_ire; 1652 nce_t *nce; 1653 int i; 1654 in6_addr_t gw_addr_v6; 1655 1656 ASSERT(addr != NULL); 1657 ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL); 1658 ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 1659 ASSERT(ipif == NULL || ipif->ipif_isv6); 1660 ASSERT(!(flags & MATCH_IRE_WQ)); 1661 1662 /* 1663 * When we return NULL from this function, we should make 1664 * sure that *pire is NULL so that the callers will not 1665 * wrongly REFRELE the pire. 1666 */ 1667 if (pire != NULL) 1668 *pire = NULL; 1669 /* 1670 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1671 * MATCH_IRE_ILL is set. 1672 */ 1673 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1674 (ipif == NULL)) 1675 return (NULL); 1676 1677 /* 1678 * If the mask is known, the lookup 1679 * is simple, if the mask is not known 1680 * we need to search. 1681 */ 1682 if (flags & MATCH_IRE_MASK) { 1683 uint_t masklen; 1684 1685 masklen = ip_mask_to_plen_v6(mask); 1686 if (ipst->ips_ip_forwarding_table_v6[masklen] == NULL) 1687 return (NULL); 1688 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[masklen][ 1689 IRE_ADDR_MASK_HASH_V6(*addr, *mask, 1690 ipst->ips_ip6_ftable_hash_size)]); 1691 rw_enter(&irb_ptr->irb_lock, RW_READER); 1692 for (ire = irb_ptr->irb_ire; ire != NULL; 1693 ire = ire->ire_next) { 1694 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1695 continue; 1696 if (ire_match_args_v6(ire, addr, mask, gateway, type, 1697 ipif, zoneid, ihandle, tsl, flags)) 1698 goto found_ire; 1699 } 1700 rw_exit(&irb_ptr->irb_lock); 1701 } else { 1702 /* 1703 * In this case we don't know the mask, we need to 1704 * search the table assuming different mask sizes. 1705 * we start with 128 bit mask, we don't allow default here. 1706 */ 1707 for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) { 1708 in6_addr_t tmpmask; 1709 1710 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 1711 continue; 1712 (void) ip_plen_to_mask_v6(i, &tmpmask); 1713 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][ 1714 IRE_ADDR_MASK_HASH_V6(*addr, tmpmask, 1715 ipst->ips_ip6_ftable_hash_size)]; 1716 rw_enter(&irb_ptr->irb_lock, RW_READER); 1717 for (ire = irb_ptr->irb_ire; ire != NULL; 1718 ire = ire->ire_next) { 1719 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1720 continue; 1721 if (ire_match_args_v6(ire, addr, 1722 &ire->ire_mask_v6, gateway, type, ipif, 1723 zoneid, ihandle, tsl, flags)) 1724 goto found_ire; 1725 } 1726 rw_exit(&irb_ptr->irb_lock); 1727 } 1728 } 1729 1730 /* 1731 * We come here if no route has yet been found. 1732 * 1733 * Handle the case where default route is 1734 * requested by specifying type as one of the possible 1735 * types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE). 1736 * 1737 * If MATCH_IRE_MASK is specified, then the appropriate default route 1738 * would have been found above if it exists so it isn't looked up here. 1739 * If MATCH_IRE_DEFAULT was also specified, then a default route will be 1740 * searched for later. 1741 */ 1742 if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE && 1743 (type & (IRE_DEFAULT | IRE_INTERFACE))) { 1744 if (ipst->ips_ip_forwarding_table_v6[0] != NULL) { 1745 /* addr & mask is zero for defaults */ 1746 irb_ptr = &ipst->ips_ip_forwarding_table_v6[0][ 1747 IRE_ADDR_HASH_V6(ipv6_all_zeros, 1748 ipst->ips_ip6_ftable_hash_size)]; 1749 rw_enter(&irb_ptr->irb_lock, RW_READER); 1750 for (ire = irb_ptr->irb_ire; ire != NULL; 1751 ire = ire->ire_next) { 1752 1753 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1754 continue; 1755 1756 if (ire_match_args_v6(ire, addr, 1757 &ipv6_all_zeros, gateway, type, ipif, 1758 zoneid, ihandle, tsl, flags)) 1759 goto found_ire; 1760 } 1761 rw_exit(&irb_ptr->irb_lock); 1762 } 1763 } 1764 /* 1765 * We come here only if no route is found. 1766 * see if the default route can be used which is allowed 1767 * only if the default matching criteria is specified. 1768 * The ipv6_ire_default_count tracks the number of IRE_DEFAULT 1769 * entries. However, the ip_forwarding_table_v6[0] also contains 1770 * interface routes thus the count can be zero. 1771 */ 1772 saved_ire = NULL; 1773 if ((flags & (MATCH_IRE_DEFAULT | MATCH_IRE_MASK)) == 1774 MATCH_IRE_DEFAULT) { 1775 ire_t *ire_origin; 1776 uint_t g_index; 1777 uint_t index; 1778 1779 if (ipst->ips_ip_forwarding_table_v6[0] == NULL) 1780 return (NULL); 1781 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[0])[0]; 1782 1783 /* 1784 * Keep a tab on the bucket while looking the IRE_DEFAULT 1785 * entries. We need to keep track of a particular IRE 1786 * (ire_origin) so this ensures that it will not be unlinked 1787 * from the hash list during the recursive lookup below. 1788 */ 1789 IRB_REFHOLD(irb_ptr); 1790 ire = irb_ptr->irb_ire; 1791 if (ire == NULL) { 1792 IRB_REFRELE(irb_ptr); 1793 return (NULL); 1794 } 1795 1796 /* 1797 * Get the index first, since it can be changed by other 1798 * threads. Then get to the right default route skipping 1799 * default interface routes if any. As we hold a reference on 1800 * the IRE bucket, ipv6_ire_default_count can only increase so 1801 * we can't reach the end of the hash list unexpectedly. 1802 */ 1803 if (ipst->ips_ipv6_ire_default_count != 0) { 1804 g_index = ipst->ips_ipv6_ire_default_index++; 1805 index = g_index % ipst->ips_ipv6_ire_default_count; 1806 while (index != 0) { 1807 if (!(ire->ire_type & IRE_INTERFACE)) 1808 index--; 1809 ire = ire->ire_next; 1810 } 1811 ASSERT(ire != NULL); 1812 } else { 1813 /* 1814 * No default route, so we only have default interface 1815 * routes: don't enter the first loop. 1816 */ 1817 ire = NULL; 1818 } 1819 1820 /* 1821 * Round-robin the default routers list looking for a neighbor 1822 * that matches the passed in parameters and is reachable. If 1823 * none found, just return a route from the default router list 1824 * if it exists. If we can't find a default route (IRE_DEFAULT), 1825 * look for interface default routes. 1826 * We start with the ire we found above and we walk the hash 1827 * list until we're back where we started, see 1828 * ire_get_next_default_ire(). It doesn't matter if default 1829 * routes are added or deleted by other threads - we know this 1830 * ire will stay in the list because we hold a reference on the 1831 * ire bucket. 1832 * NB: if we only have interface default routes, ire is NULL so 1833 * we don't even enter this loop (see above). 1834 */ 1835 ire_origin = ire; 1836 for (; ire != NULL; 1837 ire = ire_get_next_default_ire(ire, ire_origin)) { 1838 1839 if (ire_match_args_v6(ire, addr, 1840 &ipv6_all_zeros, gateway, type, ipif, 1841 zoneid, ihandle, tsl, flags)) { 1842 int match_flags; 1843 1844 /* 1845 * We have something to work with. 1846 * If we can find a resolved/reachable 1847 * entry, we will use this. Otherwise 1848 * we'll try to find an entry that has 1849 * a resolved cache entry. We will fallback 1850 * on this if we don't find anything else. 1851 */ 1852 if (saved_ire == NULL) 1853 saved_ire = ire; 1854 mutex_enter(&ire->ire_lock); 1855 gw_addr_v6 = ire->ire_gateway_addr_v6; 1856 mutex_exit(&ire->ire_lock); 1857 match_flags = MATCH_IRE_ILL_GROUP | 1858 MATCH_IRE_SECATTR; 1859 rire = ire_ctable_lookup_v6(&gw_addr_v6, NULL, 1860 0, ire->ire_ipif, zoneid, tsl, match_flags, 1861 ipst); 1862 if (rire != NULL) { 1863 nce = rire->ire_nce; 1864 if (nce != NULL && 1865 NCE_ISREACHABLE(nce) && 1866 nce->nce_flags & NCE_F_ISROUTER) { 1867 ire_refrele(rire); 1868 IRE_REFHOLD(ire); 1869 IRB_REFRELE(irb_ptr); 1870 goto found_ire_held; 1871 } else if (nce != NULL && 1872 !(nce->nce_flags & 1873 NCE_F_ISROUTER)) { 1874 /* 1875 * Make sure we don't use 1876 * this ire 1877 */ 1878 if (saved_ire == ire) 1879 saved_ire = NULL; 1880 } 1881 ire_refrele(rire); 1882 } else if (ipst-> 1883 ips_ipv6_ire_default_count > 1 && 1884 zoneid != GLOBAL_ZONEID) { 1885 /* 1886 * When we're in a local zone, we're 1887 * only interested in default routers 1888 * that are reachable through ipifs 1889 * within our zone. 1890 * The potentially expensive call to 1891 * ire_route_lookup_v6() is avoided when 1892 * we have only one default route. 1893 */ 1894 int ire_match_flags = MATCH_IRE_TYPE | 1895 MATCH_IRE_SECATTR; 1896 1897 if (ire->ire_ipif != NULL) { 1898 ire_match_flags |= 1899 MATCH_IRE_ILL_GROUP; 1900 } 1901 rire = ire_route_lookup_v6(&gw_addr_v6, 1902 NULL, NULL, IRE_INTERFACE, 1903 ire->ire_ipif, NULL, 1904 zoneid, tsl, ire_match_flags, ipst); 1905 if (rire != NULL) { 1906 ire_refrele(rire); 1907 saved_ire = ire; 1908 } else if (saved_ire == ire) { 1909 /* 1910 * Make sure we don't use 1911 * this ire 1912 */ 1913 saved_ire = NULL; 1914 } 1915 } 1916 } 1917 } 1918 if (saved_ire != NULL) { 1919 ire = saved_ire; 1920 IRE_REFHOLD(ire); 1921 IRB_REFRELE(irb_ptr); 1922 goto found_ire_held; 1923 } else { 1924 /* 1925 * Look for a interface default route matching the 1926 * args passed in. No round robin here. Just pick 1927 * the right one. 1928 */ 1929 for (ire = irb_ptr->irb_ire; ire != NULL; 1930 ire = ire->ire_next) { 1931 1932 if (!(ire->ire_type & IRE_INTERFACE)) 1933 continue; 1934 1935 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1936 continue; 1937 1938 if (ire_match_args_v6(ire, addr, 1939 &ipv6_all_zeros, gateway, type, ipif, 1940 zoneid, ihandle, tsl, flags)) { 1941 IRE_REFHOLD(ire); 1942 IRB_REFRELE(irb_ptr); 1943 goto found_ire_held; 1944 } 1945 } 1946 IRB_REFRELE(irb_ptr); 1947 } 1948 } 1949 ASSERT(ire == NULL); 1950 ip1dbg(("ire_ftable_lookup_v6: returning NULL ire")); 1951 return (NULL); 1952 found_ire: 1953 ASSERT((ire->ire_marks & IRE_MARK_CONDEMNED) == 0); 1954 IRE_REFHOLD(ire); 1955 rw_exit(&irb_ptr->irb_lock); 1956 1957 found_ire_held: 1958 if ((flags & MATCH_IRE_RJ_BHOLE) && 1959 (ire->ire_flags & (RTF_BLACKHOLE | RTF_REJECT))) { 1960 return (ire); 1961 } 1962 /* 1963 * At this point, IRE that was found must be an IRE_FORWARDTABLE 1964 * or IRE_CACHETABLE type. If this is a recursive lookup and an 1965 * IRE_INTERFACE type was found, return that. If it was some other 1966 * IRE_FORWARDTABLE type of IRE (one of the prefix types), then it 1967 * is necessary to fill in the parent IRE pointed to by pire, and 1968 * then lookup the gateway address of the parent. For backwards 1969 * compatiblity, if this lookup returns an 1970 * IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level 1971 * of lookup is done. 1972 */ 1973 if (flags & MATCH_IRE_RECURSIVE) { 1974 const ipif_t *gw_ipif; 1975 int match_flags = MATCH_IRE_DSTONLY; 1976 1977 if (ire->ire_type & IRE_INTERFACE) 1978 return (ire); 1979 if (pire != NULL) 1980 *pire = ire; 1981 /* 1982 * If we can't find an IRE_INTERFACE or the caller has not 1983 * asked for pire, we need to REFRELE the saved_ire. 1984 */ 1985 saved_ire = ire; 1986 1987 /* 1988 * Currently MATCH_IRE_ILL is never used with 1989 * (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT) while 1990 * sending out packets as MATCH_IRE_ILL is used only 1991 * for communicating with on-link hosts. We can't assert 1992 * that here as RTM_GET calls this function with 1993 * MATCH_IRE_ILL | MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE. 1994 * We have already used the MATCH_IRE_ILL in determining 1995 * the right prefix route at this point. To match the 1996 * behavior of how we locate routes while sending out 1997 * packets, we don't want to use MATCH_IRE_ILL below 1998 * while locating the interface route. 1999 */ 2000 if (ire->ire_ipif != NULL) 2001 match_flags |= MATCH_IRE_ILL_GROUP; 2002 2003 mutex_enter(&ire->ire_lock); 2004 gw_addr_v6 = ire->ire_gateway_addr_v6; 2005 mutex_exit(&ire->ire_lock); 2006 2007 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 0, 2008 ire->ire_ipif, NULL, zoneid, tsl, match_flags, ipst); 2009 if (ire == NULL) { 2010 /* 2011 * In this case we have to deal with the 2012 * MATCH_IRE_PARENT flag, which means the 2013 * parent has to be returned if ire is NULL. 2014 * The aim of this is to have (at least) a starting 2015 * ire when we want to look at all of the ires in a 2016 * bucket aimed at a single destination (as is the 2017 * case in ip_newroute_v6 for the RTF_MULTIRT 2018 * flagged routes). 2019 */ 2020 if (flags & MATCH_IRE_PARENT) { 2021 if (pire != NULL) { 2022 /* 2023 * Need an extra REFHOLD, if the 2024 * parent ire is returned via both 2025 * ire and pire. 2026 */ 2027 IRE_REFHOLD(saved_ire); 2028 } 2029 ire = saved_ire; 2030 } else { 2031 ire_refrele(saved_ire); 2032 if (pire != NULL) 2033 *pire = NULL; 2034 } 2035 return (ire); 2036 } 2037 if (ire->ire_type & (IRE_CACHETABLE | IRE_INTERFACE)) { 2038 /* 2039 * If the caller did not ask for pire, release 2040 * it now. 2041 */ 2042 if (pire == NULL) { 2043 ire_refrele(saved_ire); 2044 } 2045 return (ire); 2046 } 2047 match_flags |= MATCH_IRE_TYPE; 2048 mutex_enter(&ire->ire_lock); 2049 gw_addr_v6 = ire->ire_gateway_addr_v6; 2050 mutex_exit(&ire->ire_lock); 2051 gw_ipif = ire->ire_ipif; 2052 ire_refrele(ire); 2053 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 2054 (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid, 2055 NULL, match_flags, ipst); 2056 if (ire == NULL) { 2057 /* 2058 * In this case we have to deal with the 2059 * MATCH_IRE_PARENT flag, which means the 2060 * parent has to be returned if ire is NULL. 2061 * The aim of this is to have (at least) a starting 2062 * ire when we want to look at all of the ires in a 2063 * bucket aimed at a single destination (as is the 2064 * case in ip_newroute_v6 for the RTF_MULTIRT 2065 * flagged routes). 2066 */ 2067 if (flags & MATCH_IRE_PARENT) { 2068 if (pire != NULL) { 2069 /* 2070 * Need an extra REFHOLD, if the 2071 * parent ire is returned via both 2072 * ire and pire. 2073 */ 2074 IRE_REFHOLD(saved_ire); 2075 } 2076 ire = saved_ire; 2077 } else { 2078 ire_refrele(saved_ire); 2079 if (pire != NULL) 2080 *pire = NULL; 2081 } 2082 return (ire); 2083 } else if (pire == NULL) { 2084 /* 2085 * If the caller did not ask for pire, release 2086 * it now. 2087 */ 2088 ire_refrele(saved_ire); 2089 } 2090 return (ire); 2091 } 2092 2093 ASSERT(pire == NULL || *pire == NULL); 2094 return (ire); 2095 } 2096 2097 /* 2098 * Delete the IRE cache for the gateway and all IRE caches whose 2099 * ire_gateway_addr_v6 points to this gateway, and allow them to 2100 * be created on demand by ip_newroute_v6. 2101 */ 2102 void 2103 ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid, 2104 ip_stack_t *ipst) 2105 { 2106 irb_t *irb; 2107 ire_t *ire; 2108 2109 irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 2110 ipst->ips_ip6_cache_table_size)]; 2111 IRB_REFHOLD(irb); 2112 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 2113 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2114 continue; 2115 2116 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 2117 if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, 0, 2118 IRE_CACHE, NULL, zoneid, 0, NULL, MATCH_IRE_TYPE)) { 2119 ire_delete(ire); 2120 } 2121 } 2122 IRB_REFRELE(irb); 2123 2124 ire_walk_v6(ire_delete_cache_gw_v6, (char *)addr, zoneid, ipst); 2125 } 2126 2127 /* 2128 * Looks up cache table for a route. 2129 * specific lookup can be indicated by 2130 * passing the MATCH_* flags and the 2131 * necessary parameters. 2132 */ 2133 ire_t * 2134 ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway, 2135 int type, const ipif_t *ipif, zoneid_t zoneid, const ts_label_t *tsl, 2136 int flags, ip_stack_t *ipst) 2137 { 2138 ire_t *ire; 2139 irb_t *irb_ptr; 2140 ASSERT(addr != NULL); 2141 ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 2142 2143 /* 2144 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 2145 * MATCH_IRE_ILL is set. 2146 */ 2147 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 2148 (ipif == NULL)) 2149 return (NULL); 2150 2151 irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 2152 ipst->ips_ip6_cache_table_size)]; 2153 rw_enter(&irb_ptr->irb_lock, RW_READER); 2154 for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 2155 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2156 continue; 2157 2158 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 2159 if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, gateway, 2160 type, ipif, zoneid, 0, tsl, flags)) { 2161 IRE_REFHOLD(ire); 2162 rw_exit(&irb_ptr->irb_lock); 2163 return (ire); 2164 } 2165 } 2166 rw_exit(&irb_ptr->irb_lock); 2167 return (NULL); 2168 } 2169 2170 /* 2171 * Lookup cache. Don't return IRE_MARK_HIDDEN entries. Callers 2172 * should use ire_ctable_lookup with MATCH_IRE_MARK_HIDDEN to get 2173 * to the hidden ones. 2174 * 2175 * In general the zoneid has to match (where ALL_ZONES match all of them). 2176 * But for IRE_LOCAL we also need to handle the case where L2 should 2177 * conceptually loop back the packet. This is necessary since neither 2178 * Ethernet drivers nor Ethernet hardware loops back packets sent to their 2179 * own MAC address. This loopback is needed when the normal 2180 * routes (ignoring IREs with different zoneids) would send out the packet on 2181 * the same ill (or ill group) as the ill with which this IRE_LOCAL is 2182 * associated. 2183 * 2184 * Earlier versions of this code always matched an IRE_LOCAL independently of 2185 * the zoneid. We preserve that earlier behavior when 2186 * ip_restrict_interzone_loopback is turned off. 2187 */ 2188 ire_t * 2189 ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid, 2190 const ts_label_t *tsl, ip_stack_t *ipst) 2191 { 2192 irb_t *irb_ptr; 2193 ire_t *ire; 2194 2195 irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 2196 ipst->ips_ip6_cache_table_size)]; 2197 rw_enter(&irb_ptr->irb_lock, RW_READER); 2198 for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 2199 if (ire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 2200 continue; 2201 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, addr)) { 2202 /* 2203 * Finally, check if the security policy has any 2204 * restriction on using this route for the specified 2205 * message. 2206 */ 2207 if (tsl != NULL && 2208 ire->ire_gw_secattr != NULL && 2209 tsol_ire_match_gwattr(ire, tsl) != 0) { 2210 continue; 2211 } 2212 2213 if (zoneid == ALL_ZONES || ire->ire_zoneid == zoneid || 2214 ire->ire_zoneid == ALL_ZONES) { 2215 IRE_REFHOLD(ire); 2216 rw_exit(&irb_ptr->irb_lock); 2217 return (ire); 2218 } 2219 2220 if (ire->ire_type == IRE_LOCAL) { 2221 if (ipst->ips_ip_restrict_interzone_loopback && 2222 !ire_local_ok_across_zones(ire, zoneid, 2223 (void *)addr, tsl, ipst)) 2224 continue; 2225 2226 IRE_REFHOLD(ire); 2227 rw_exit(&irb_ptr->irb_lock); 2228 return (ire); 2229 } 2230 } 2231 } 2232 rw_exit(&irb_ptr->irb_lock); 2233 return (NULL); 2234 } 2235 2236 /* 2237 * Locate the interface ire that is tied to the cache ire 'cire' via 2238 * cire->ire_ihandle. 2239 * 2240 * We are trying to create the cache ire for an onlink destn. or 2241 * gateway in 'cire'. We are called from ire_add_v6() in the IRE_IF_RESOLVER 2242 * case for xresolv interfaces, after the ire has come back from 2243 * an external resolver. 2244 */ 2245 static ire_t * 2246 ire_ihandle_lookup_onlink_v6(ire_t *cire) 2247 { 2248 ire_t *ire; 2249 int match_flags; 2250 int i; 2251 int j; 2252 irb_t *irb_ptr; 2253 ip_stack_t *ipst = cire->ire_ipst; 2254 2255 ASSERT(cire != NULL); 2256 2257 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2258 /* 2259 * We know that the mask of the interface ire equals cire->ire_cmask. 2260 * (When ip_newroute_v6() created 'cire' for an on-link destn. 2261 * it set its cmask from the interface ire's mask) 2262 */ 2263 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 2264 NULL, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle, 2265 NULL, match_flags, ipst); 2266 if (ire != NULL) 2267 return (ire); 2268 /* 2269 * If we didn't find an interface ire above, we can't declare failure. 2270 * For backwards compatibility, we need to support prefix routes 2271 * pointing to next hop gateways that are not on-link. 2272 * 2273 * In the resolver/noresolver case, ip_newroute_v6() thinks 2274 * it is creating the cache ire for an onlink destination in 'cire'. 2275 * But 'cire' is not actually onlink, because ire_ftable_lookup_v6() 2276 * cheated it, by doing ire_route_lookup_v6() twice and returning an 2277 * interface ire. 2278 * 2279 * Eg. default - gw1 (line 1) 2280 * gw1 - gw2 (line 2) 2281 * gw2 - hme0 (line 3) 2282 * 2283 * In the above example, ip_newroute_v6() tried to create the cache ire 2284 * 'cire' for gw1, based on the interface route in line 3. The 2285 * ire_ftable_lookup_v6() above fails, because there is 2286 * no interface route to reach gw1. (it is gw2). We fall thru below. 2287 * 2288 * Do a brute force search based on the ihandle in a subset of the 2289 * forwarding tables, corresponding to cire->ire_cmask_v6. Otherwise 2290 * things become very complex, since we don't have 'pire' in this 2291 * case. (Also note that this method is not possible in the offlink 2292 * case because we don't know the mask) 2293 */ 2294 i = ip_mask_to_plen_v6(&cire->ire_cmask_v6); 2295 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 2296 return (NULL); 2297 for (j = 0; j < ipst->ips_ip6_ftable_hash_size; j++) { 2298 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][j]; 2299 rw_enter(&irb_ptr->irb_lock, RW_READER); 2300 for (ire = irb_ptr->irb_ire; ire != NULL; 2301 ire = ire->ire_next) { 2302 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2303 continue; 2304 if ((ire->ire_type & IRE_INTERFACE) && 2305 (ire->ire_ihandle == cire->ire_ihandle)) { 2306 IRE_REFHOLD(ire); 2307 rw_exit(&irb_ptr->irb_lock); 2308 return (ire); 2309 } 2310 } 2311 rw_exit(&irb_ptr->irb_lock); 2312 } 2313 return (NULL); 2314 } 2315 2316 2317 /* 2318 * Locate the interface ire that is tied to the cache ire 'cire' via 2319 * cire->ire_ihandle. 2320 * 2321 * We are trying to create the cache ire for an offlink destn based 2322 * on the cache ire of the gateway in 'cire'. 'pire' is the prefix ire 2323 * as found by ip_newroute_v6(). We are called from ip_newroute_v6() in 2324 * the IRE_CACHE case. 2325 */ 2326 ire_t * 2327 ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire) 2328 { 2329 ire_t *ire; 2330 int match_flags; 2331 in6_addr_t gw_addr; 2332 ipif_t *gw_ipif; 2333 ip_stack_t *ipst = cire->ire_ipst; 2334 2335 ASSERT(cire != NULL && pire != NULL); 2336 2337 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2338 /* 2339 * ip_newroute_v6 calls ire_ftable_lookup with MATCH_IRE_ILL only 2340 * for on-link hosts. We should never be here for onlink. 2341 * Thus, use MATCH_IRE_ILL_GROUP. 2342 */ 2343 if (pire->ire_ipif != NULL) 2344 match_flags |= MATCH_IRE_ILL_GROUP; 2345 /* 2346 * We know that the mask of the interface ire equals cire->ire_cmask. 2347 * (When ip_newroute_v6() created 'cire' for an on-link destn. it set 2348 * its cmask from the interface ire's mask) 2349 */ 2350 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 0, 2351 IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2352 NULL, match_flags, ipst); 2353 if (ire != NULL) 2354 return (ire); 2355 /* 2356 * If we didn't find an interface ire above, we can't declare failure. 2357 * For backwards compatibility, we need to support prefix routes 2358 * pointing to next hop gateways that are not on-link. 2359 * 2360 * Assume we are trying to ping some offlink destn, and we have the 2361 * routing table below. 2362 * 2363 * Eg. default - gw1 <--- pire (line 1) 2364 * gw1 - gw2 (line 2) 2365 * gw2 - hme0 (line 3) 2366 * 2367 * If we already have a cache ire for gw1 in 'cire', the 2368 * ire_ftable_lookup_v6 above would have failed, since there is no 2369 * interface ire to reach gw1. We will fallthru below. 2370 * 2371 * Here we duplicate the steps that ire_ftable_lookup_v6() did in 2372 * getting 'cire' from 'pire', in the MATCH_IRE_RECURSIVE case. 2373 * The differences are the following 2374 * i. We want the interface ire only, so we call 2375 * ire_ftable_lookup_v6() instead of ire_route_lookup_v6() 2376 * ii. We look for only prefix routes in the 1st call below. 2377 * ii. We want to match on the ihandle in the 2nd call below. 2378 */ 2379 match_flags = MATCH_IRE_TYPE; 2380 if (pire->ire_ipif != NULL) 2381 match_flags |= MATCH_IRE_ILL_GROUP; 2382 2383 mutex_enter(&pire->ire_lock); 2384 gw_addr = pire->ire_gateway_addr_v6; 2385 mutex_exit(&pire->ire_lock); 2386 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_OFFSUBNET, 2387 pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst); 2388 if (ire == NULL) 2389 return (NULL); 2390 /* 2391 * At this point 'ire' corresponds to the entry shown in line 2. 2392 * gw_addr is 'gw2' in the example above. 2393 */ 2394 mutex_enter(&ire->ire_lock); 2395 gw_addr = ire->ire_gateway_addr_v6; 2396 mutex_exit(&ire->ire_lock); 2397 gw_ipif = ire->ire_ipif; 2398 ire_refrele(ire); 2399 2400 match_flags |= MATCH_IRE_IHANDLE; 2401 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_INTERFACE, 2402 gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2403 NULL, match_flags, ipst); 2404 return (ire); 2405 } 2406 2407 /* 2408 * Return the IRE_LOOPBACK, IRE_IF_RESOLVER or IRE_IF_NORESOLVER 2409 * ire associated with the specified ipif. 2410 * 2411 * This might occasionally be called when IPIF_UP is not set since 2412 * the IPV6_MULTICAST_IF as well as creating interface routes 2413 * allows specifying a down ipif (ipif_lookup* match ipifs that are down). 2414 * 2415 * Note that if IPIF_NOLOCAL, IPIF_NOXMIT, or IPIF_DEPRECATED is set on 2416 * the ipif this routine might return NULL. 2417 * (Sometimes called as writer though not required by this function.) 2418 */ 2419 ire_t * 2420 ipif_to_ire_v6(const ipif_t *ipif) 2421 { 2422 ire_t *ire; 2423 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2424 2425 ASSERT(ipif->ipif_isv6); 2426 if (ipif->ipif_ire_type == IRE_LOOPBACK) { 2427 ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, NULL, 2428 IRE_LOOPBACK, ipif, ALL_ZONES, NULL, 2429 (MATCH_IRE_TYPE | MATCH_IRE_IPIF), ipst); 2430 } else if (ipif->ipif_flags & IPIF_POINTOPOINT) { 2431 /* In this case we need to lookup destination address. */ 2432 ire = ire_ftable_lookup_v6(&ipif->ipif_v6pp_dst_addr, 2433 &ipv6_all_ones, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 2434 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 2435 MATCH_IRE_MASK), ipst); 2436 } else { 2437 ire = ire_ftable_lookup_v6(&ipif->ipif_v6subnet, 2438 &ipif->ipif_v6net_mask, NULL, IRE_INTERFACE, ipif, NULL, 2439 ALL_ZONES, 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 2440 MATCH_IRE_MASK), ipst); 2441 } 2442 return (ire); 2443 } 2444 2445 /* 2446 * Return B_TRUE if a multirt route is resolvable 2447 * (or if no route is resolved yet), B_FALSE otherwise. 2448 * This only works in the global zone. 2449 */ 2450 boolean_t 2451 ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl, 2452 ip_stack_t *ipst) 2453 { 2454 ire_t *first_fire; 2455 ire_t *first_cire; 2456 ire_t *fire; 2457 ire_t *cire; 2458 irb_t *firb; 2459 irb_t *cirb; 2460 int unres_cnt = 0; 2461 boolean_t resolvable = B_FALSE; 2462 2463 /* Retrieve the first IRE_HOST that matches the destination */ 2464 first_fire = ire_ftable_lookup_v6(v6dstp, &ipv6_all_ones, 0, IRE_HOST, 2465 NULL, NULL, ALL_ZONES, 0, tsl, MATCH_IRE_MASK | MATCH_IRE_TYPE | 2466 MATCH_IRE_SECATTR, ipst); 2467 2468 /* No route at all */ 2469 if (first_fire == NULL) { 2470 return (B_TRUE); 2471 } 2472 2473 firb = first_fire->ire_bucket; 2474 ASSERT(firb); 2475 2476 /* Retrieve the first IRE_CACHE ire for that destination. */ 2477 first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID, tsl, ipst); 2478 2479 /* No resolved route. */ 2480 if (first_cire == NULL) { 2481 ire_refrele(first_fire); 2482 return (B_TRUE); 2483 } 2484 2485 /* At least one route is resolved. */ 2486 2487 cirb = first_cire->ire_bucket; 2488 ASSERT(cirb); 2489 2490 /* Count the number of routes to that dest that are declared. */ 2491 IRB_REFHOLD(firb); 2492 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2493 if (!(fire->ire_flags & RTF_MULTIRT)) 2494 continue; 2495 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, v6dstp)) 2496 continue; 2497 unres_cnt++; 2498 } 2499 IRB_REFRELE(firb); 2500 2501 2502 /* Then subtract the number of routes to that dst that are resolved */ 2503 IRB_REFHOLD(cirb); 2504 for (cire = first_cire; cire != NULL; cire = cire->ire_next) { 2505 if (!(cire->ire_flags & RTF_MULTIRT)) 2506 continue; 2507 if (!IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, v6dstp)) 2508 continue; 2509 if (cire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 2510 continue; 2511 unres_cnt--; 2512 } 2513 IRB_REFRELE(cirb); 2514 2515 /* At least one route is unresolved; search for a resolvable route. */ 2516 if (unres_cnt > 0) 2517 resolvable = ire_multirt_lookup_v6(&first_cire, &first_fire, 2518 MULTIRT_USESTAMP|MULTIRT_CACHEGW, tsl, ipst); 2519 2520 if (first_fire) 2521 ire_refrele(first_fire); 2522 2523 if (first_cire) 2524 ire_refrele(first_cire); 2525 2526 return (resolvable); 2527 } 2528 2529 2530 /* 2531 * Return B_TRUE and update *ire_arg and *fire_arg 2532 * if at least one resolvable route is found. 2533 * Return B_FALSE otherwise (all routes are resolved or 2534 * the remaining unresolved routes are all unresolvable). 2535 * This only works in the global zone. 2536 */ 2537 boolean_t 2538 ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags, 2539 const ts_label_t *tsl, ip_stack_t *ipst) 2540 { 2541 clock_t delta; 2542 ire_t *best_fire = NULL; 2543 ire_t *best_cire = NULL; 2544 ire_t *first_fire; 2545 ire_t *first_cire; 2546 ire_t *fire; 2547 ire_t *cire; 2548 irb_t *firb = NULL; 2549 irb_t *cirb = NULL; 2550 ire_t *gw_ire; 2551 boolean_t already_resolved; 2552 boolean_t res; 2553 in6_addr_t v6dst; 2554 in6_addr_t v6gw; 2555 2556 ip2dbg(("ire_multirt_lookup_v6: *ire_arg %p, *fire_arg %p, " 2557 "flags %04x\n", (void *)*ire_arg, (void *)*fire_arg, flags)); 2558 2559 ASSERT(ire_arg); 2560 ASSERT(fire_arg); 2561 2562 /* Not an IRE_HOST ire; give up. */ 2563 if ((*fire_arg == NULL) || 2564 ((*fire_arg)->ire_type != IRE_HOST)) { 2565 return (B_FALSE); 2566 } 2567 2568 /* This is the first IRE_HOST ire for that destination. */ 2569 first_fire = *fire_arg; 2570 firb = first_fire->ire_bucket; 2571 ASSERT(firb); 2572 2573 mutex_enter(&first_fire->ire_lock); 2574 v6dst = first_fire->ire_addr_v6; 2575 mutex_exit(&first_fire->ire_lock); 2576 2577 ip2dbg(("ire_multirt_lookup_v6: dst %08x\n", 2578 ntohl(V4_PART_OF_V6(v6dst)))); 2579 2580 /* 2581 * Retrieve the first IRE_CACHE ire for that destination; 2582 * if we don't find one, no route for that dest is 2583 * resolved yet. 2584 */ 2585 first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID, tsl, ipst); 2586 if (first_cire) { 2587 cirb = first_cire->ire_bucket; 2588 } 2589 2590 ip2dbg(("ire_multirt_lookup_v6: first_cire %p\n", (void *)first_cire)); 2591 2592 /* 2593 * Search for a resolvable route, giving the top priority 2594 * to routes that can be resolved without any call to the resolver. 2595 */ 2596 IRB_REFHOLD(firb); 2597 2598 if (!IN6_IS_ADDR_MULTICAST(&v6dst)) { 2599 /* 2600 * For all multiroute IRE_HOST ires for that destination, 2601 * check if the route via the IRE_HOST's gateway is 2602 * resolved yet. 2603 */ 2604 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2605 2606 if (!(fire->ire_flags & RTF_MULTIRT)) 2607 continue; 2608 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2609 continue; 2610 2611 if (fire->ire_gw_secattr != NULL && 2612 tsol_ire_match_gwattr(fire, tsl) != 0) { 2613 continue; 2614 } 2615 2616 mutex_enter(&fire->ire_lock); 2617 v6gw = fire->ire_gateway_addr_v6; 2618 mutex_exit(&fire->ire_lock); 2619 2620 ip2dbg(("ire_multirt_lookup_v6: fire %p, " 2621 "ire_addr %08x, ire_gateway_addr %08x\n", 2622 (void *)fire, 2623 ntohl(V4_PART_OF_V6(fire->ire_addr_v6)), 2624 ntohl(V4_PART_OF_V6(v6gw)))); 2625 2626 already_resolved = B_FALSE; 2627 2628 if (first_cire) { 2629 ASSERT(cirb); 2630 2631 IRB_REFHOLD(cirb); 2632 /* 2633 * For all IRE_CACHE ires for that 2634 * destination. 2635 */ 2636 for (cire = first_cire; 2637 cire != NULL; 2638 cire = cire->ire_next) { 2639 2640 if (!(cire->ire_flags & RTF_MULTIRT)) 2641 continue; 2642 if (!IN6_ARE_ADDR_EQUAL( 2643 &cire->ire_addr_v6, &v6dst)) 2644 continue; 2645 if (cire->ire_marks & 2646 (IRE_MARK_CONDEMNED| 2647 IRE_MARK_HIDDEN)) 2648 continue; 2649 2650 if (cire->ire_gw_secattr != NULL && 2651 tsol_ire_match_gwattr(cire, 2652 tsl) != 0) { 2653 continue; 2654 } 2655 2656 /* 2657 * Check if the IRE_CACHE's gateway 2658 * matches the IRE_HOST's gateway. 2659 */ 2660 if (IN6_ARE_ADDR_EQUAL( 2661 &cire->ire_gateway_addr_v6, 2662 &v6gw)) { 2663 already_resolved = B_TRUE; 2664 break; 2665 } 2666 } 2667 IRB_REFRELE(cirb); 2668 } 2669 2670 /* 2671 * This route is already resolved; 2672 * proceed with next one. 2673 */ 2674 if (already_resolved) { 2675 ip2dbg(("ire_multirt_lookup_v6: found cire %p, " 2676 "already resolved\n", (void *)cire)); 2677 continue; 2678 } 2679 2680 /* 2681 * The route is unresolved; is it actually 2682 * resolvable, i.e. is there a cache or a resolver 2683 * for the gateway? 2684 */ 2685 gw_ire = ire_route_lookup_v6(&v6gw, 0, 0, 0, NULL, NULL, 2686 ALL_ZONES, tsl, MATCH_IRE_RECURSIVE | 2687 MATCH_IRE_SECATTR, ipst); 2688 2689 ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n", 2690 (void *)gw_ire)); 2691 2692 /* 2693 * This route can be resolved without any call to the 2694 * resolver; if the MULTIRT_CACHEGW flag is set, 2695 * give the top priority to this ire and exit the 2696 * loop. 2697 * This occurs when an resolver reply is processed 2698 * through ip_wput_nondata() 2699 */ 2700 if ((flags & MULTIRT_CACHEGW) && 2701 (gw_ire != NULL) && 2702 (gw_ire->ire_type & IRE_CACHETABLE)) { 2703 /* 2704 * Release the resolver associated to the 2705 * previous candidate best ire, if any. 2706 */ 2707 if (best_cire) { 2708 ire_refrele(best_cire); 2709 ASSERT(best_fire); 2710 } 2711 2712 best_fire = fire; 2713 best_cire = gw_ire; 2714 2715 ip2dbg(("ire_multirt_lookup_v6: found top prio " 2716 "best_fire %p, best_cire %p\n", 2717 (void *)best_fire, (void *)best_cire)); 2718 break; 2719 } 2720 2721 /* 2722 * Compute the time elapsed since our preceding 2723 * attempt to resolve that route. 2724 * If the MULTIRT_USESTAMP flag is set, we take that 2725 * route into account only if this time interval 2726 * exceeds ip_multirt_resolution_interval; 2727 * this prevents us from attempting to resolve a 2728 * broken route upon each sending of a packet. 2729 */ 2730 delta = lbolt - fire->ire_last_used_time; 2731 delta = TICK_TO_MSEC(delta); 2732 2733 res = (boolean_t) 2734 ((delta > ipst-> 2735 ips_ip_multirt_resolution_interval) || 2736 (!(flags & MULTIRT_USESTAMP))); 2737 2738 ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, " 2739 "res %d\n", 2740 (void *)fire, delta, res)); 2741 2742 if (res) { 2743 /* 2744 * A resolver exists for the gateway: save 2745 * the current IRE_HOST ire as a candidate 2746 * best ire. If we later discover that a 2747 * top priority ire exists (i.e. no need to 2748 * call the resolver), then this new ire 2749 * will be preferred to the current one. 2750 */ 2751 if (gw_ire != NULL) { 2752 if (best_fire == NULL) { 2753 ASSERT(best_cire == NULL); 2754 2755 best_fire = fire; 2756 best_cire = gw_ire; 2757 2758 ip2dbg(("ire_multirt_lookup_v6:" 2759 "found candidate " 2760 "best_fire %p, " 2761 "best_cire %p\n", 2762 (void *)best_fire, 2763 (void *)best_cire)); 2764 2765 /* 2766 * If MULTIRT_CACHEGW is not 2767 * set, we ignore the top 2768 * priority ires that can 2769 * be resolved without any 2770 * call to the resolver; 2771 * In that case, there is 2772 * actually no need 2773 * to continue the loop. 2774 */ 2775 if (!(flags & 2776 MULTIRT_CACHEGW)) { 2777 break; 2778 } 2779 continue; 2780 } 2781 } else { 2782 /* 2783 * No resolver for the gateway: the 2784 * route is not resolvable. 2785 * If the MULTIRT_SETSTAMP flag is 2786 * set, we stamp the IRE_HOST ire, 2787 * so we will not select it again 2788 * during this resolution interval. 2789 */ 2790 if (flags & MULTIRT_SETSTAMP) 2791 fire->ire_last_used_time = 2792 lbolt; 2793 } 2794 } 2795 2796 if (gw_ire != NULL) 2797 ire_refrele(gw_ire); 2798 } 2799 } else { /* IN6_IS_ADDR_MULTICAST(&v6dst) */ 2800 2801 for (fire = first_fire; 2802 fire != NULL; 2803 fire = fire->ire_next) { 2804 2805 if (!(fire->ire_flags & RTF_MULTIRT)) 2806 continue; 2807 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2808 continue; 2809 2810 if (fire->ire_gw_secattr != NULL && 2811 tsol_ire_match_gwattr(fire, tsl) != 0) { 2812 continue; 2813 } 2814 2815 already_resolved = B_FALSE; 2816 2817 mutex_enter(&fire->ire_lock); 2818 v6gw = fire->ire_gateway_addr_v6; 2819 mutex_exit(&fire->ire_lock); 2820 2821 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2822 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, tsl, 2823 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE | 2824 MATCH_IRE_SECATTR, ipst); 2825 2826 /* No resolver for the gateway; we skip this ire. */ 2827 if (gw_ire == NULL) { 2828 continue; 2829 } 2830 2831 if (first_cire) { 2832 2833 IRB_REFHOLD(cirb); 2834 /* 2835 * For all IRE_CACHE ires for that 2836 * destination. 2837 */ 2838 for (cire = first_cire; 2839 cire != NULL; 2840 cire = cire->ire_next) { 2841 2842 if (!(cire->ire_flags & RTF_MULTIRT)) 2843 continue; 2844 if (!IN6_ARE_ADDR_EQUAL( 2845 &cire->ire_addr_v6, &v6dst)) 2846 continue; 2847 if (cire->ire_marks & 2848 (IRE_MARK_CONDEMNED| 2849 IRE_MARK_HIDDEN)) 2850 continue; 2851 2852 if (cire->ire_gw_secattr != NULL && 2853 tsol_ire_match_gwattr(cire, 2854 tsl) != 0) { 2855 continue; 2856 } 2857 2858 /* 2859 * Cache entries are linked to the 2860 * parent routes using the parent handle 2861 * (ire_phandle). If no cache entry has 2862 * the same handle as fire, fire is 2863 * still unresolved. 2864 */ 2865 ASSERT(cire->ire_phandle != 0); 2866 if (cire->ire_phandle == 2867 fire->ire_phandle) { 2868 already_resolved = B_TRUE; 2869 break; 2870 } 2871 } 2872 IRB_REFRELE(cirb); 2873 } 2874 2875 /* 2876 * This route is already resolved; proceed with 2877 * next one. 2878 */ 2879 if (already_resolved) { 2880 ire_refrele(gw_ire); 2881 continue; 2882 } 2883 2884 /* 2885 * Compute the time elapsed since our preceding 2886 * attempt to resolve that route. 2887 * If the MULTIRT_USESTAMP flag is set, we take 2888 * that route into account only if this time 2889 * interval exceeds ip_multirt_resolution_interval; 2890 * this prevents us from attempting to resolve a 2891 * broken route upon each sending of a packet. 2892 */ 2893 delta = lbolt - fire->ire_last_used_time; 2894 delta = TICK_TO_MSEC(delta); 2895 2896 res = (boolean_t) 2897 ((delta > ipst-> 2898 ips_ip_multirt_resolution_interval) || 2899 (!(flags & MULTIRT_USESTAMP))); 2900 2901 ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, " 2902 "flags %04x, res %d\n", 2903 (void *)fire, delta, flags, res)); 2904 2905 if (res) { 2906 if (best_cire) { 2907 /* 2908 * Release the resolver associated 2909 * to the preceding candidate best 2910 * ire, if any. 2911 */ 2912 ire_refrele(best_cire); 2913 ASSERT(best_fire); 2914 } 2915 best_fire = fire; 2916 best_cire = gw_ire; 2917 continue; 2918 } 2919 2920 ire_refrele(gw_ire); 2921 } 2922 } 2923 2924 if (best_fire) { 2925 IRE_REFHOLD(best_fire); 2926 } 2927 IRB_REFRELE(firb); 2928 2929 /* Release the first IRE_CACHE we initially looked up, if any. */ 2930 if (first_cire) 2931 ire_refrele(first_cire); 2932 2933 /* Found a resolvable route. */ 2934 if (best_fire) { 2935 ASSERT(best_cire); 2936 2937 if (*fire_arg) 2938 ire_refrele(*fire_arg); 2939 if (*ire_arg) 2940 ire_refrele(*ire_arg); 2941 2942 /* 2943 * Update the passed arguments with the 2944 * resolvable multirt route we found 2945 */ 2946 *fire_arg = best_fire; 2947 *ire_arg = best_cire; 2948 2949 ip2dbg(("ire_multirt_lookup_v6: returning B_TRUE, " 2950 "*fire_arg %p, *ire_arg %p\n", 2951 (void *)best_fire, (void *)best_cire)); 2952 2953 return (B_TRUE); 2954 } 2955 2956 ASSERT(best_cire == NULL); 2957 2958 ip2dbg(("ire_multirt_lookup_v6: returning B_FALSE, *fire_arg %p, " 2959 "*ire_arg %p\n", 2960 (void *)*fire_arg, (void *)*ire_arg)); 2961 2962 /* No resolvable route. */ 2963 return (B_FALSE); 2964 } 2965 2966 2967 /* 2968 * Find an IRE_OFFSUBNET IRE entry for the multicast address 'v6dstp' 2969 * that goes through 'ipif'. As a fallback, a route that goes through 2970 * ipif->ipif_ill can be returned. 2971 */ 2972 ire_t * 2973 ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp) 2974 { 2975 ire_t *ire; 2976 ire_t *save_ire = NULL; 2977 ire_t *gw_ire; 2978 irb_t *irb; 2979 in6_addr_t v6gw; 2980 int match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL; 2981 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2982 2983 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, NULL, ALL_ZONES, 0, 2984 NULL, MATCH_IRE_DEFAULT, ipst); 2985 2986 if (ire == NULL) 2987 return (NULL); 2988 2989 irb = ire->ire_bucket; 2990 ASSERT(irb); 2991 2992 IRB_REFHOLD(irb); 2993 ire_refrele(ire); 2994 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 2995 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) || 2996 (ipif->ipif_zoneid != ire->ire_zoneid && 2997 ire->ire_zoneid != ALL_ZONES)) { 2998 continue; 2999 } 3000 3001 switch (ire->ire_type) { 3002 case IRE_DEFAULT: 3003 case IRE_PREFIX: 3004 case IRE_HOST: 3005 mutex_enter(&ire->ire_lock); 3006 v6gw = ire->ire_gateway_addr_v6; 3007 mutex_exit(&ire->ire_lock); 3008 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 3009 IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, 3010 NULL, match_flags, ipst); 3011 3012 if (gw_ire != NULL) { 3013 if (save_ire != NULL) { 3014 ire_refrele(save_ire); 3015 } 3016 IRE_REFHOLD(ire); 3017 if (gw_ire->ire_ipif == ipif) { 3018 ire_refrele(gw_ire); 3019 3020 IRB_REFRELE(irb); 3021 return (ire); 3022 } 3023 ire_refrele(gw_ire); 3024 save_ire = ire; 3025 } 3026 break; 3027 case IRE_IF_NORESOLVER: 3028 case IRE_IF_RESOLVER: 3029 if (ire->ire_ipif == ipif) { 3030 if (save_ire != NULL) { 3031 ire_refrele(save_ire); 3032 } 3033 IRE_REFHOLD(ire); 3034 3035 IRB_REFRELE(irb); 3036 return (ire); 3037 } 3038 break; 3039 } 3040 } 3041 IRB_REFRELE(irb); 3042 3043 return (save_ire); 3044 } 3045