1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* 27 * Copyright (c) 1990 Mentat Inc. 28 */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 /* 33 * This file contains routines that manipulate Internet Routing Entries (IREs). 34 */ 35 #include <sys/types.h> 36 #include <sys/stream.h> 37 #include <sys/stropts.h> 38 #include <sys/strlog.h> 39 #include <sys/dlpi.h> 40 #include <sys/ddi.h> 41 #include <sys/cmn_err.h> 42 43 #include <sys/systm.h> 44 #include <sys/param.h> 45 #include <sys/socket.h> 46 #include <net/if.h> 47 #include <net/route.h> 48 #include <netinet/in.h> 49 #include <net/if_dl.h> 50 #include <netinet/ip6.h> 51 #include <netinet/icmp6.h> 52 53 #include <inet/common.h> 54 #include <inet/mi.h> 55 #include <inet/ip.h> 56 #include <inet/ip6.h> 57 #include <inet/arp.h> 58 #include <inet/ip_ndp.h> 59 #include <inet/ip_if.h> 60 #include <inet/ip_ire.h> 61 #include <inet/ip_rts.h> 62 #include <inet/ipclassifier.h> 63 #include <inet/nd.h> 64 #include <sys/kmem.h> 65 #include <sys/zone.h> 66 67 irb_t *ip_forwarding_table_v6[IP6_MASK_TABLE_SIZE]; 68 /* This is dynamically allocated in ip_ire_init */ 69 irb_t *ip_cache_table_v6; 70 static ire_t ire_null; 71 72 /* Defined in ip_ire.c */ 73 extern uint32_t ip6_cache_table_size; 74 extern uint32_t ip6_ftable_hash_size; 75 76 static ire_t *ire_ihandle_lookup_onlink_v6(ire_t *cire); 77 static void ire_report_ftable_v6(ire_t *ire, char *mp); 78 static void ire_report_ctable_v6(ire_t *ire, char *mp); 79 static boolean_t ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, 80 const in6_addr_t *mask, const in6_addr_t *gateway, int type, ipif_t *ipif, 81 zoneid_t zoneid, uint32_t ihandle, int match_flags); 82 83 /* 84 * Named Dispatch routine to produce a formatted report on all IREs. 85 * This report is accessed by using the ndd utility to "get" ND variable 86 * "ip_ire_status_v6". 87 */ 88 /* ARGSUSED */ 89 int 90 ip_ire_report_v6(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 91 { 92 zoneid_t zoneid; 93 94 (void) mi_mpprintf(mp, 95 "IRE " MI_COL_HDRPAD_STR 96 "rfq " MI_COL_HDRPAD_STR 97 "stq " MI_COL_HDRPAD_STR 98 " zone mxfrg rtt rtt_sd ssthresh ref " 99 "rtomax tstamp_ok wscale_ok ecn_ok pmtud_ok sack sendpipe recvpipe " 100 "in/out/forward type addr mask " 101 "src gateway"); 102 /* 103 * 01234567 01234567 01234567 12345 12345 12345 12345 12345678 123 104 * 123456 123456789 123456789 123456 12345678 1234 12345678 12345678 105 * in/out/forward xxxxxxxxxx 106 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 107 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 108 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 109 * xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx 110 */ 111 112 /* 113 * Because of the ndd constraint, at most we can have 64K buffer 114 * to put in all IRE info. So to be more efficient, just 115 * allocate a 64K buffer here, assuming we need that large buffer. 116 * This should be OK as only root can do ndd /dev/ip. 117 */ 118 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 119 /* The following may work even if we cannot get a large buf. */ 120 (void) mi_mpprintf(mp, "<< Out of buffer >>\n"); 121 return (0); 122 } 123 zoneid = Q_TO_CONN(q)->conn_zoneid; 124 if (zoneid == GLOBAL_ZONEID) 125 zoneid = ALL_ZONES; 126 127 ire_walk_v6(ire_report_ftable_v6, (char *)mp->b_cont, zoneid); 128 ire_walk_v6(ire_report_ctable_v6, (char *)mp->b_cont, zoneid); 129 return (0); 130 } 131 132 /* 133 * ire_walk routine invoked for ip_ire_report_v6 for each IRE. 134 */ 135 static void 136 ire_report_ftable_v6(ire_t *ire, char *mp) 137 { 138 char buf1[INET6_ADDRSTRLEN]; 139 char buf2[INET6_ADDRSTRLEN]; 140 char buf3[INET6_ADDRSTRLEN]; 141 char buf4[INET6_ADDRSTRLEN]; 142 uint_t fo_pkt_count; 143 uint_t ib_pkt_count; 144 int ref; 145 in6_addr_t gw_addr_v6; 146 uint_t print_len, buf_len; 147 148 ASSERT(ire->ire_ipversion == IPV6_VERSION); 149 if (ire->ire_type & IRE_CACHETABLE) 150 return; 151 buf_len = ((mblk_t *)mp)->b_datap->db_lim - ((mblk_t *)mp)->b_wptr; 152 if (buf_len <= 0) 153 return; 154 155 /* Number of active references of this ire */ 156 ref = ire->ire_refcnt; 157 /* "inbound" to a non local address is a forward */ 158 ib_pkt_count = ire->ire_ib_pkt_count; 159 fo_pkt_count = 0; 160 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 161 if (!(ire->ire_type & (IRE_LOCAL|IRE_BROADCAST))) { 162 fo_pkt_count = ib_pkt_count; 163 ib_pkt_count = 0; 164 } 165 166 mutex_enter(&ire->ire_lock); 167 gw_addr_v6 = ire->ire_gateway_addr_v6; 168 mutex_exit(&ire->ire_lock); 169 170 print_len = snprintf((char *)((mblk_t *)mp)->b_wptr, buf_len, 171 MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR "%5d " 172 "%05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d " 173 "%04d %08d %08d %d/%d/%d %s\n\t%s\n\t%s\n\t%s\n\t%s\n", 174 (void *)ire, (void *)ire->ire_rfq, (void *)ire->ire_stq, 175 (int)ire->ire_zoneid, 176 ire->ire_max_frag, ire->ire_uinfo.iulp_rtt, 177 ire->ire_uinfo.iulp_rtt_sd, 178 ire->ire_uinfo.iulp_ssthresh, ref, 179 ire->ire_uinfo.iulp_rtomax, 180 (ire->ire_uinfo.iulp_tstamp_ok ? 1: 0), 181 (ire->ire_uinfo.iulp_wscale_ok ? 1: 0), 182 (ire->ire_uinfo.iulp_ecn_ok ? 1: 0), 183 (ire->ire_uinfo.iulp_pmtud_ok ? 1: 0), 184 ire->ire_uinfo.iulp_sack, 185 ire->ire_uinfo.iulp_spipe, ire->ire_uinfo.iulp_rpipe, 186 ib_pkt_count, ire->ire_ob_pkt_count, fo_pkt_count, 187 ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type), 188 inet_ntop(AF_INET6, &ire->ire_addr_v6, buf1, sizeof (buf1)), 189 inet_ntop(AF_INET6, &ire->ire_mask_v6, buf2, sizeof (buf2)), 190 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, buf3, sizeof (buf3)), 191 inet_ntop(AF_INET6, &gw_addr_v6, buf4, sizeof (buf4))); 192 if (print_len < buf_len) { 193 ((mblk_t *)mp)->b_wptr += print_len; 194 } else { 195 ((mblk_t *)mp)->b_wptr += buf_len; 196 } 197 } 198 199 /* ire_walk routine invoked for ip_ire_report_v6 for each IRE. */ 200 static void 201 ire_report_ctable_v6(ire_t *ire, char *mp) 202 { 203 char buf1[INET6_ADDRSTRLEN]; 204 char buf2[INET6_ADDRSTRLEN]; 205 char buf3[INET6_ADDRSTRLEN]; 206 char buf4[INET6_ADDRSTRLEN]; 207 uint_t fo_pkt_count; 208 uint_t ib_pkt_count; 209 int ref; 210 in6_addr_t gw_addr_v6; 211 uint_t print_len, buf_len; 212 213 if ((ire->ire_type & IRE_CACHETABLE) == 0) 214 return; 215 buf_len = ((mblk_t *)mp)->b_datap->db_lim - ((mblk_t *)mp)->b_wptr; 216 if (buf_len <= 0) 217 return; 218 219 /* Number of active references of this ire */ 220 ref = ire->ire_refcnt; 221 /* "inbound" to a non local address is a forward */ 222 ib_pkt_count = ire->ire_ib_pkt_count; 223 fo_pkt_count = 0; 224 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 225 if (ire->ire_type & IRE_LOCAL) { 226 fo_pkt_count = ib_pkt_count; 227 ib_pkt_count = 0; 228 } 229 230 mutex_enter(&ire->ire_lock); 231 gw_addr_v6 = ire->ire_gateway_addr_v6; 232 mutex_exit(&ire->ire_lock); 233 234 print_len = snprintf((char *)((mblk_t *)mp)->b_wptr, buf_len, 235 MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR "%5d " 236 "%05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d " 237 "%04d %08d %08d %d/%d/%d %s\n\t%s\n\t%s\n\t%s\n\t%s\n", 238 (void *)ire, (void *)ire->ire_rfq, (void *)ire->ire_stq, 239 (int)ire->ire_zoneid, 240 ire->ire_max_frag, ire->ire_uinfo.iulp_rtt, 241 ire->ire_uinfo.iulp_rtt_sd, ire->ire_uinfo.iulp_ssthresh, ref, 242 ire->ire_uinfo.iulp_rtomax, 243 (ire->ire_uinfo.iulp_tstamp_ok ? 1: 0), 244 (ire->ire_uinfo.iulp_wscale_ok ? 1: 0), 245 (ire->ire_uinfo.iulp_ecn_ok ? 1: 0), 246 (ire->ire_uinfo.iulp_pmtud_ok ? 1: 0), 247 ire->ire_uinfo.iulp_sack, 248 ire->ire_uinfo.iulp_spipe, ire->ire_uinfo.iulp_rpipe, 249 ib_pkt_count, ire->ire_ob_pkt_count, 250 fo_pkt_count, ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type), 251 inet_ntop(AF_INET6, &ire->ire_addr_v6, buf1, sizeof (buf1)), 252 inet_ntop(AF_INET6, &ire->ire_mask_v6, buf2, sizeof (buf2)), 253 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, buf3, sizeof (buf3)), 254 inet_ntop(AF_INET6, &gw_addr_v6, buf4, sizeof (buf4))); 255 if (print_len < buf_len) { 256 ((mblk_t *)mp)->b_wptr += print_len; 257 } else { 258 ((mblk_t *)mp)->b_wptr += buf_len; 259 } 260 } 261 262 263 /* 264 * Initialize the ire that is specific to IPv6 part and call 265 * ire_init_common to finish it. 266 */ 267 ire_t * 268 ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, 269 const in6_addr_t *v6mask, const in6_addr_t *v6src_addr, 270 const in6_addr_t *v6gateway, uint_t *max_fragp, 271 mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, 272 mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask, 273 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info) 274 { 275 if (fp_mp != NULL) { 276 /* 277 * We can't dupb() here as multiple threads could be 278 * calling dupb on the same mp which is incorrect. 279 * First dupb() should be called only by one thread. 280 */ 281 fp_mp = copyb(fp_mp); 282 if (fp_mp == NULL) 283 return (NULL); 284 } 285 286 if (dlureq_mp != NULL) { 287 /* 288 * We can't dupb() here as multiple threads could be 289 * calling dupb on the same mp which is incorrect. 290 * First dupb() should be called only by one thread. 291 */ 292 dlureq_mp = copyb(dlureq_mp); 293 if (dlureq_mp == NULL) { 294 if (fp_mp != NULL) 295 freeb(fp_mp); 296 return (NULL); 297 } 298 } 299 300 BUMP_IRE_STATS(ire_stats_v6, ire_stats_alloced); 301 ire->ire_addr_v6 = *v6addr; 302 303 if (v6src_addr != NULL) 304 ire->ire_src_addr_v6 = *v6src_addr; 305 if (v6mask != NULL) { 306 ire->ire_mask_v6 = *v6mask; 307 ire->ire_masklen = ip_mask_to_plen_v6(&ire->ire_mask_v6); 308 } 309 if (v6gateway != NULL) 310 ire->ire_gateway_addr_v6 = *v6gateway; 311 312 if (type == IRE_CACHE && v6cmask != NULL) 313 ire->ire_cmask_v6 = *v6cmask; 314 315 /* 316 * Multirouted packets need to have a fragment header added so that 317 * the receiver is able to discard duplicates according to their 318 * fragment identifier. 319 */ 320 if (type == IRE_CACHE && (flags & RTF_MULTIRT)) { 321 ire->ire_frag_flag = IPH_FRAG_HDR; 322 } 323 324 ire_init_common(ire, max_fragp, fp_mp, rfq, stq, type, dlureq_mp, 325 ipif, NULL, phandle, ihandle, flags, IPV6_VERSION, ulp_info); 326 327 return (ire); 328 } 329 330 /* 331 * Similar to ire_create_v6 except that it is called only when 332 * we want to allocate ire as an mblk e.g. we have a external 333 * resolver. Do we need this in IPv6 ? 334 */ 335 ire_t * 336 ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 337 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 338 mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, 339 mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask, 340 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info) 341 { 342 ire_t *ire; 343 ire_t *ret_ire; 344 mblk_t *mp; 345 346 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 347 348 /* Allocate the new IRE. */ 349 mp = allocb(sizeof (ire_t), BPRI_MED); 350 if (mp == NULL) { 351 ip1dbg(("ire_create_mp_v6: alloc failed\n")); 352 return (NULL); 353 } 354 355 ire = (ire_t *)mp->b_rptr; 356 mp->b_wptr = (uchar_t *)&ire[1]; 357 358 /* Start clean. */ 359 *ire = ire_null; 360 ire->ire_mp = mp; 361 mp->b_datap->db_type = IRE_DB_TYPE; 362 363 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 364 NULL, fp_mp, rfq, stq, type, dlureq_mp, ipif, v6cmask, phandle, 365 ihandle, flags, ulp_info); 366 367 if (ret_ire == NULL) { 368 freeb(ire->ire_mp); 369 return (NULL); 370 } 371 return (ire); 372 } 373 374 /* 375 * ire_create_v6 is called to allocate and initialize a new IRE. 376 * 377 * NOTE : This is called as writer sometimes though not required 378 * by this function. 379 */ 380 ire_t * 381 ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 382 const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway, 383 uint_t *max_fragp, mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type, 384 mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask, 385 uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info) 386 { 387 ire_t *ire; 388 ire_t *ret_ire; 389 390 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 391 392 ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP); 393 if (ire == NULL) { 394 ip1dbg(("ire_create_v6: alloc failed\n")); 395 return (NULL); 396 } 397 *ire = ire_null; 398 399 ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway, 400 max_fragp, fp_mp, rfq, stq, type, dlureq_mp, ipif, v6cmask, phandle, 401 ihandle, flags, ulp_info); 402 403 if (ret_ire == NULL) { 404 kmem_cache_free(ire_cache, ire); 405 return (NULL); 406 } 407 ASSERT(ret_ire == ire); 408 return (ire); 409 } 410 411 /* 412 * Find an IRE_INTERFACE for the multicast group. 413 * Allows different routes for multicast addresses 414 * in the unicast routing table (akin to FF::0/8 but could be more specific) 415 * which point at different interfaces. This is used when IPV6_MULTICAST_IF 416 * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't 417 * specify the interface to join on. 418 * 419 * Supports link-local addresses by following the ipif/ill when recursing. 420 */ 421 ire_t * 422 ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid) 423 { 424 ire_t *ire; 425 ipif_t *ipif = NULL; 426 int match_flags = MATCH_IRE_TYPE; 427 in6_addr_t gw_addr_v6; 428 429 ire = ire_ftable_lookup_v6(group, 0, 0, 0, NULL, NULL, 430 zoneid, 0, MATCH_IRE_DEFAULT); 431 432 /* We search a resolvable ire in case of multirouting. */ 433 if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) { 434 ire_t *cire = NULL; 435 /* 436 * If the route is not resolvable, the looked up ire 437 * may be changed here. In that case, ire_multirt_lookup() 438 * IRE_REFRELE the original ire and change it. 439 */ 440 (void) ire_multirt_lookup_v6(&cire, &ire, MULTIRT_CACHEGW); 441 if (cire != NULL) 442 ire_refrele(cire); 443 } 444 if (ire == NULL) 445 return (NULL); 446 /* 447 * Make sure we follow ire_ipif. 448 * 449 * We need to determine the interface route through 450 * which the gateway will be reached. We don't really 451 * care which interface is picked if the interface is 452 * part of a group. 453 */ 454 if (ire->ire_ipif != NULL) { 455 ipif = ire->ire_ipif; 456 match_flags |= MATCH_IRE_ILL_GROUP; 457 } 458 459 switch (ire->ire_type) { 460 case IRE_DEFAULT: 461 case IRE_PREFIX: 462 case IRE_HOST: 463 mutex_enter(&ire->ire_lock); 464 gw_addr_v6 = ire->ire_gateway_addr_v6; 465 mutex_exit(&ire->ire_lock); 466 ire_refrele(ire); 467 ire = ire_ftable_lookup_v6(&gw_addr_v6, 0, 0, 468 IRE_INTERFACE, ipif, NULL, zoneid, 0, 469 match_flags); 470 return (ire); 471 case IRE_IF_NORESOLVER: 472 case IRE_IF_RESOLVER: 473 return (ire); 474 default: 475 ire_refrele(ire); 476 return (NULL); 477 } 478 } 479 480 /* 481 * Return any local address. We use this to target ourselves 482 * when the src address was specified as 'default'. 483 * Preference for IRE_LOCAL entries. 484 */ 485 ire_t * 486 ire_lookup_local_v6(zoneid_t zoneid) 487 { 488 ire_t *ire; 489 irb_t *irb; 490 ire_t *maybe = NULL; 491 int i; 492 493 for (i = 0; i < ip6_cache_table_size; i++) { 494 irb = &ip_cache_table_v6[i]; 495 if (irb->irb_ire == NULL) 496 continue; 497 rw_enter(&irb->irb_lock, RW_READER); 498 for (ire = irb->irb_ire; ire; ire = ire->ire_next) { 499 if ((ire->ire_marks & IRE_MARK_CONDEMNED) || 500 ire->ire_zoneid != zoneid) 501 continue; 502 switch (ire->ire_type) { 503 case IRE_LOOPBACK: 504 if (maybe == NULL) { 505 IRE_REFHOLD(ire); 506 maybe = ire; 507 } 508 break; 509 case IRE_LOCAL: 510 if (maybe != NULL) { 511 ire_refrele(maybe); 512 } 513 IRE_REFHOLD(ire); 514 rw_exit(&irb->irb_lock); 515 return (ire); 516 } 517 } 518 rw_exit(&irb->irb_lock); 519 } 520 return (maybe); 521 } 522 523 /* 524 * This function takes a mask and returns number of bits set in the 525 * mask (the represented prefix length). Assumes a contiguous mask. 526 */ 527 int 528 ip_mask_to_plen_v6(const in6_addr_t *v6mask) 529 { 530 int bits; 531 int plen = IPV6_ABITS; 532 int i; 533 534 for (i = 3; i >= 0; i--) { 535 if (v6mask->s6_addr32[i] == 0) { 536 plen -= 32; 537 continue; 538 } 539 bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1; 540 if (bits == 0) 541 break; 542 plen -= bits; 543 } 544 545 return (plen); 546 } 547 548 /* 549 * Convert a prefix length to the mask for that prefix. 550 * Returns the argument bitmask. 551 */ 552 in6_addr_t * 553 ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask) 554 { 555 uint32_t *ptr; 556 557 if (plen < 0 || plen > IPV6_ABITS) 558 return (NULL); 559 *bitmask = ipv6_all_zeros; 560 561 ptr = (uint32_t *)bitmask; 562 while (plen > 32) { 563 *ptr++ = 0xffffffffU; 564 plen -= 32; 565 } 566 *ptr = htonl(0xffffffffU << (32 - plen)); 567 return (bitmask); 568 } 569 570 /* 571 * Add a fully initialized IRE to an appropriate 572 * table based on ire_type. 573 * 574 * The forward table contains IRE_PREFIX/IRE_HOST/IRE_HOST_REDIRECT 575 * IRE_IF_RESOLVER/IRE_IF_NORESOLVER and IRE_DEFAULT. 576 * 577 * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK 578 * and IRE_CACHE. 579 * 580 * NOTE : This function is called as writer though not required 581 * by this function. 582 */ 583 int 584 ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func) 585 { 586 ire_t *ire1; 587 int mask_table_index; 588 irb_t *irb_ptr; 589 ire_t **irep; 590 int flags; 591 ire_t *pire = NULL; 592 ill_t *stq_ill; 593 boolean_t ndp_g_lock_held = B_FALSE; 594 ire_t *ire = *ire_p; 595 int error; 596 597 ASSERT(ire->ire_ipversion == IPV6_VERSION); 598 ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */ 599 ASSERT(ire->ire_nce == NULL); 600 601 /* Find the appropriate list head. */ 602 switch (ire->ire_type) { 603 case IRE_HOST: 604 ire->ire_mask_v6 = ipv6_all_ones; 605 ire->ire_masklen = IPV6_ABITS; 606 if ((ire->ire_flags & RTF_SETSRC) == 0) 607 ire->ire_src_addr_v6 = ipv6_all_zeros; 608 break; 609 case IRE_HOST_REDIRECT: 610 ire->ire_mask_v6 = ipv6_all_ones; 611 ire->ire_masklen = IPV6_ABITS; 612 ire->ire_src_addr_v6 = ipv6_all_zeros; 613 break; 614 case IRE_CACHE: 615 case IRE_LOCAL: 616 case IRE_LOOPBACK: 617 ire->ire_mask_v6 = ipv6_all_ones; 618 ire->ire_masklen = IPV6_ABITS; 619 break; 620 case IRE_PREFIX: 621 if ((ire->ire_flags & RTF_SETSRC) == 0) 622 ire->ire_src_addr_v6 = ipv6_all_zeros; 623 break; 624 case IRE_DEFAULT: 625 if ((ire->ire_flags & RTF_SETSRC) == 0) 626 ire->ire_src_addr_v6 = ipv6_all_zeros; 627 break; 628 case IRE_IF_RESOLVER: 629 case IRE_IF_NORESOLVER: 630 break; 631 default: 632 printf("ire_add_v6: ire %p has unrecognized IRE type (%d)\n", 633 (void *)ire, ire->ire_type); 634 ire_delete(ire); 635 *ire_p = NULL; 636 return (EINVAL); 637 } 638 639 /* Make sure the address is properly masked. */ 640 V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6); 641 642 if ((ire->ire_type & IRE_CACHETABLE) == 0) { 643 /* IRE goes into Forward Table */ 644 mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6); 645 if ((ip_forwarding_table_v6[mask_table_index]) == NULL) { 646 irb_t *ptr; 647 int i; 648 649 ptr = (irb_t *)mi_zalloc((ip6_ftable_hash_size * 650 sizeof (irb_t))); 651 if (ptr == NULL) { 652 ire_delete(ire); 653 *ire_p = NULL; 654 return (ENOMEM); 655 } 656 for (i = 0; i < ip6_ftable_hash_size; i++) { 657 rw_init(&ptr[i].irb_lock, NULL, 658 RW_DEFAULT, NULL); 659 } 660 mutex_enter(&ire_ft_init_lock); 661 if (ip_forwarding_table_v6[mask_table_index] == NULL) { 662 ip_forwarding_table_v6[mask_table_index] = ptr; 663 mutex_exit(&ire_ft_init_lock); 664 } else { 665 /* 666 * Some other thread won the race in 667 * initializing the forwarding table at the 668 * same index. 669 */ 670 mutex_exit(&ire_ft_init_lock); 671 for (i = 0; i < ip6_ftable_hash_size; i++) { 672 rw_destroy(&ptr[i].irb_lock); 673 } 674 mi_free(ptr); 675 } 676 } 677 irb_ptr = &(ip_forwarding_table_v6[mask_table_index][ 678 IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6, 679 ip6_ftable_hash_size)]); 680 } else { 681 irb_ptr = &(ip_cache_table_v6[IRE_ADDR_HASH_V6( 682 ire->ire_addr_v6, ip6_cache_table_size)]); 683 } 684 /* 685 * For xresolv interfaces (v6 interfaces with an external 686 * address resolver), ip_newroute_v6/ip_newroute_ipif_v6 687 * are unable to prevent the deletion of the interface route 688 * while adding an IRE_CACHE for an on-link destination 689 * in the IRE_IF_RESOLVER case, since the ire has to go to 690 * the external resolver and return. We can't do a REFHOLD on the 691 * associated interface ire for fear of the message being freed 692 * if the external resolver can't resolve the address. 693 * Here we look up the interface ire in the forwarding table 694 * and make sure that the interface route has not been deleted. 695 */ 696 if (ire->ire_type == IRE_CACHE && 697 IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6) && 698 (((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) && 699 (((ill_t *)ire->ire_stq->q_ptr)->ill_flags & ILLF_XRESOLV)) { 700 701 pire = ire_ihandle_lookup_onlink_v6(ire); 702 if (pire == NULL) { 703 ire_delete(ire); 704 *ire_p = NULL; 705 return (EINVAL); 706 } 707 /* Prevent pire from getting deleted */ 708 IRB_REFHOLD(pire->ire_bucket); 709 /* Has it been removed already? */ 710 if (pire->ire_marks & IRE_MARK_CONDEMNED) { 711 IRB_REFRELE(pire->ire_bucket); 712 ire_refrele(pire); 713 ire_delete(ire); 714 *ire_p = NULL; 715 return (EINVAL); 716 } 717 } 718 719 flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW); 720 /* 721 * For IRE_CACHES, MATCH_IRE_IPIF is not enough to check 722 * for duplicates because : 723 * 724 * 1) ire_ipif->ipif_ill and ire_stq->q_ptr could be 725 * pointing at different ills. A real duplicate is 726 * a match on both ire_ipif and ire_stq. 727 * 728 * 2) We could have multiple packets trying to create 729 * an IRE_CACHE for the same ill. 730 * 731 * Moreover, IPIF_NOFAILOVER and IPV6_BOUND_PIF endpoints wants 732 * to go out on a particular ill. Rather than looking at the 733 * packet, we depend on the above for MATCH_IRE_ILL here. 734 * 735 * Unlike IPv4, MATCH_IRE_IPIF is needed here as we could have 736 * multiple IRE_CACHES for an ill for the same destination 737 * with various scoped addresses i.e represented by ipifs. 738 * 739 * MATCH_IRE_ILL is done implicitly below for IRE_CACHES. 740 */ 741 if (ire->ire_ipif != NULL) 742 flags |= MATCH_IRE_IPIF; 743 /* 744 * If we are creating hidden ires, make sure we search on 745 * this ill (MATCH_IRE_ILL) and a hidden ire, while we are 746 * searching for duplicates below. Otherwise we could 747 * potentially find an IRE on some other interface 748 * and it may not be a IRE marked with IRE_MARK_HIDDEN. We 749 * shouldn't do this as this will lead to an infinite loop as 750 * eventually we need an hidden ire for this packet to go 751 * out. MATCH_IRE_ILL is already marked above. 752 */ 753 if (ire->ire_marks & IRE_MARK_HIDDEN) { 754 ASSERT(ire->ire_type == IRE_CACHE); 755 flags |= MATCH_IRE_MARK_HIDDEN; 756 } 757 758 /* 759 * Start the atomic add of the ire. Grab the ill locks, 760 * ill_g_usesrc_lock and the bucket lock. Check for condemned. 761 * To avoid lock order problems, get the ndp_g_lock now itself. 762 */ 763 if (ire->ire_type == IRE_CACHE) { 764 mutex_enter(&ndp_g_lock); 765 ndp_g_lock_held = B_TRUE; 766 } 767 768 /* 769 * If ipif or ill is changing ire_atomic_start() may queue the 770 * request and return EINPROGRESS. 771 */ 772 773 error = ire_atomic_start(irb_ptr, ire, q, mp, func); 774 if (error != 0) { 775 if (ndp_g_lock_held) 776 mutex_exit(&ndp_g_lock); 777 /* 778 * We don't know whether it is a valid ipif or not. 779 * So, set it to NULL. This assumes that the ire has not added 780 * a reference to the ipif. 781 */ 782 ire->ire_ipif = NULL; 783 ire_delete(ire); 784 if (pire != NULL) { 785 IRB_REFRELE(pire->ire_bucket); 786 ire_refrele(pire); 787 } 788 *ire_p = NULL; 789 return (error); 790 } 791 /* 792 * To avoid creating ires having stale values for the ire_max_frag 793 * we get the latest value atomically here. For more details 794 * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE 795 * in ip_rput_dlpi_writer 796 */ 797 if (ire->ire_max_fragp == NULL) { 798 if (IN6_IS_ADDR_MULTICAST(&ire->ire_addr_v6)) 799 ire->ire_max_frag = ire->ire_ipif->ipif_mtu; 800 else 801 ire->ire_max_frag = pire->ire_max_frag; 802 } else { 803 uint_t max_frag; 804 805 max_frag = *ire->ire_max_fragp; 806 ire->ire_max_fragp = NULL; 807 ire->ire_max_frag = max_frag; 808 } 809 810 /* 811 * Atomically check for duplicate and insert in the table. 812 */ 813 for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 814 if (ire1->ire_marks & IRE_MARK_CONDEMNED) 815 continue; 816 817 if (ire->ire_type == IRE_CACHE) { 818 /* 819 * We do MATCH_IRE_ILL implicitly here for IRE_CACHES. 820 * As ire_ipif and ire_stq could point to two 821 * different ills, we can't pass just ire_ipif to 822 * ire_match_args and get a match on both ills. 823 * This is just needed for duplicate checks here and 824 * so we don't add an extra argument to 825 * ire_match_args for this. Do it locally. 826 * 827 * NOTE : Currently there is no part of the code 828 * that asks for both MATH_IRE_IPIF and MATCH_IRE_ILL 829 * match for IRE_CACHEs. Thus we don't want to 830 * extend the arguments to ire_match_args_v6. 831 */ 832 if (ire1->ire_stq != ire->ire_stq) 833 continue; 834 /* 835 * Multiroute IRE_CACHEs for a given destination can 836 * have the same ire_ipif, typically if their source 837 * address is forced using RTF_SETSRC, and the same 838 * send-to queue. We differentiate them using the parent 839 * handle. 840 */ 841 if ((ire1->ire_flags & RTF_MULTIRT) && 842 (ire->ire_flags & RTF_MULTIRT) && 843 (ire1->ire_phandle != ire->ire_phandle)) 844 continue; 845 } 846 if (ire1->ire_zoneid != ire->ire_zoneid) 847 continue; 848 if (ire_match_args_v6(ire1, &ire->ire_addr_v6, 849 &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, 850 ire->ire_type, ire->ire_ipif, ire->ire_zoneid, 0, flags)) { 851 /* 852 * Return the old ire after doing a REFHOLD. 853 * As most of the callers continue to use the IRE 854 * after adding, we return a held ire. This will 855 * avoid a lookup in the caller again. If the callers 856 * don't want to use it, they need to do a REFRELE. 857 */ 858 ip1dbg(("found dup ire existing %p new %p", 859 (void *)ire1, (void *)ire)); 860 IRE_REFHOLD(ire1); 861 if (ndp_g_lock_held) 862 mutex_exit(&ndp_g_lock); 863 ire_atomic_end(irb_ptr, ire); 864 ire_delete(ire); 865 if (pire != NULL) { 866 /* 867 * Assert that it is 868 * not yet removed from the list. 869 */ 870 ASSERT(pire->ire_ptpn != NULL); 871 IRB_REFRELE(pire->ire_bucket); 872 ire_refrele(pire); 873 } 874 *ire_p = ire1; 875 return (0); 876 } 877 } 878 if (ire->ire_type == IRE_CACHE) { 879 in6_addr_t gw_addr_v6; 880 ill_t *ill = ire_to_ill(ire); 881 char buf[INET6_ADDRSTRLEN]; 882 nce_t *nce; 883 884 /* 885 * All IRE_CACHE types must have a nce. If this is 886 * not the case the entry will not be added. We need 887 * to make sure that if somebody deletes the nce 888 * after we looked up, they will find this ire and 889 * delete the ire. To delete this ire one needs the 890 * bucket lock which we are still holding here. So, 891 * even if the nce gets deleted after we looked up, 892 * this ire will get deleted. 893 * 894 * NOTE : Don't need the ire_lock for accessing 895 * ire_gateway_addr_v6 as it is appearing first 896 * time on the list and rts_setgwr_v6 could not 897 * be changing this. 898 */ 899 gw_addr_v6 = ire->ire_gateway_addr_v6; 900 if (IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 901 nce = ndp_lookup(ill, &ire->ire_addr_v6, B_TRUE); 902 } else { 903 nce = ndp_lookup(ill, &gw_addr_v6, B_TRUE); 904 } 905 if (nce == NULL) 906 goto failed; 907 908 /* Pair of refhold, refrele just to get the tracing right */ 909 NCE_REFHOLD_NOTR(nce); 910 NCE_REFRELE(nce); 911 /* 912 * Atomically make sure that new IREs don't point 913 * to an NCE that is logically deleted (CONDEMNED). 914 * ndp_delete() first marks the NCE CONDEMNED. 915 * This ensures that the nce_refcnt won't increase 916 * due to new nce_lookups or due to addition of new IREs 917 * pointing to this NCE. Then ndp_delete() cleans up 918 * existing references. If we don't do it atomically here, 919 * ndp_delete() -> nce_ire_delete() will not be able to 920 * clean up the IRE list completely, and the nce_refcnt 921 * won't go down to zero. 922 */ 923 mutex_enter(&nce->nce_lock); 924 if (ill->ill_flags & ILLF_XRESOLV) { 925 /* 926 * If we used an external resolver, we may not 927 * have gone through neighbor discovery to get here. 928 * Must update the nce_state before the next check. 929 */ 930 if (nce->nce_state == ND_INCOMPLETE) 931 nce->nce_state = ND_REACHABLE; 932 } 933 if (nce->nce_state == ND_INCOMPLETE || 934 (nce->nce_flags & NCE_F_CONDEMNED) || 935 (nce->nce_state == ND_UNREACHABLE)) { 936 failed: 937 if (ndp_g_lock_held) 938 mutex_exit(&ndp_g_lock); 939 if (nce != NULL) 940 mutex_exit(&nce->nce_lock); 941 ire_atomic_end(irb_ptr, ire); 942 ip1dbg(("ire_add_v6: No nce for dst %s \n", 943 inet_ntop(AF_INET6, &ire->ire_addr_v6, 944 buf, sizeof (buf)))); 945 ire_delete(ire); 946 if (pire != NULL) { 947 /* 948 * Assert that it is 949 * not yet removed from the list. 950 */ 951 ASSERT(pire->ire_ptpn != NULL); 952 IRB_REFRELE(pire->ire_bucket); 953 ire_refrele(pire); 954 } 955 if (nce != NULL) 956 NCE_REFRELE_NOTR(nce); 957 *ire_p = NULL; 958 return (EINVAL); 959 } else { 960 ire->ire_nce = nce; 961 } 962 mutex_exit(&nce->nce_lock); 963 } 964 /* 965 * Find the first entry that matches ire_addr - provides 966 * tail insertion. *irep will be null if no match. 967 */ 968 irep = (ire_t **)irb_ptr; 969 while ((ire1 = *irep) != NULL && 970 !IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6)) 971 irep = &ire1->ire_next; 972 ASSERT(!(ire->ire_type & IRE_BROADCAST)); 973 974 if (*irep != NULL) { 975 /* 976 * Find the last ire which matches ire_addr_v6. 977 * Needed to do tail insertion among entries with the same 978 * ire_addr_v6. 979 */ 980 while (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 981 &ire1->ire_addr_v6)) { 982 irep = &ire1->ire_next; 983 ire1 = *irep; 984 if (ire1 == NULL) 985 break; 986 } 987 } 988 989 if (ire->ire_type == IRE_DEFAULT) { 990 /* 991 * We keep a count of default gateways which is used when 992 * assigning them as routes. 993 */ 994 ipv6_ire_default_count++; 995 ASSERT(ipv6_ire_default_count != 0); /* Wraparound */ 996 } 997 /* Insert at *irep */ 998 ire1 = *irep; 999 if (ire1 != NULL) 1000 ire1->ire_ptpn = &ire->ire_next; 1001 ire->ire_next = ire1; 1002 /* Link the new one in. */ 1003 ire->ire_ptpn = irep; 1004 /* 1005 * ire_walk routines de-reference ire_next without holding 1006 * a lock. Before we point to the new ire, we want to make 1007 * sure the store that sets the ire_next of the new ire 1008 * reaches global visibility, so that ire_walk routines 1009 * don't see a truncated list of ires i.e if the ire_next 1010 * of the new ire gets set after we do "*irep = ire" due 1011 * to re-ordering, the ire_walk thread will see a NULL 1012 * once it accesses the ire_next of the new ire. 1013 * membar_producer() makes sure that the following store 1014 * happens *after* all of the above stores. 1015 */ 1016 membar_producer(); 1017 *irep = ire; 1018 ire->ire_bucket = irb_ptr; 1019 /* 1020 * We return a bumped up IRE above. Keep it symmetrical 1021 * so that the callers will always have to release. This 1022 * helps the callers of this function because they continue 1023 * to use the IRE after adding and hence they don't have to 1024 * lookup again after we return the IRE. 1025 * 1026 * NOTE : We don't have to use atomics as this is appearing 1027 * in the list for the first time and no one else can bump 1028 * up the reference count on this yet. 1029 */ 1030 IRE_REFHOLD_LOCKED(ire); 1031 BUMP_IRE_STATS(ire_stats_v6, ire_stats_inserted); 1032 irb_ptr->irb_ire_cnt++; 1033 if (ire->ire_marks & IRE_MARK_TEMPORARY) 1034 irb_ptr->irb_tmp_ire_cnt++; 1035 1036 if (ire->ire_ipif != NULL) { 1037 ire->ire_ipif->ipif_ire_cnt++; 1038 if (ire->ire_stq != NULL) { 1039 stq_ill = (ill_t *)ire->ire_stq->q_ptr; 1040 stq_ill->ill_ire_cnt++; 1041 } 1042 } else { 1043 ASSERT(ire->ire_stq == NULL); 1044 } 1045 1046 if (ndp_g_lock_held) 1047 mutex_exit(&ndp_g_lock); 1048 ire_atomic_end(irb_ptr, ire); 1049 1050 if (pire != NULL) { 1051 /* Assert that it is not removed from the list yet */ 1052 ASSERT(pire->ire_ptpn != NULL); 1053 IRB_REFRELE(pire->ire_bucket); 1054 ire_refrele(pire); 1055 } 1056 1057 if (ire->ire_type != IRE_CACHE) { 1058 /* 1059 * For ire's with with host mask see if there is an entry 1060 * in the cache. If there is one flush the whole cache as 1061 * there might be multiple entries due to RTF_MULTIRT (CGTP). 1062 * If no entry is found than there is no need to flush the 1063 * cache. 1064 */ 1065 1066 if (ip_mask_to_plen_v6(&ire->ire_mask_v6) == IPV6_ABITS) { 1067 ire_t *lire; 1068 lire = ire_ctable_lookup_v6(&ire->ire_addr_v6, NULL, 1069 IRE_CACHE, NULL, ALL_ZONES, MATCH_IRE_TYPE); 1070 if (lire != NULL) { 1071 ire_refrele(lire); 1072 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 1073 } 1074 } else { 1075 ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 1076 } 1077 } 1078 1079 *ire_p = ire; 1080 return (0); 1081 } 1082 1083 /* 1084 * Search for all HOST REDIRECT routes that are 1085 * pointing at the specified gateway and 1086 * delete them. This routine is called only 1087 * when a default gateway is going away. 1088 */ 1089 static void 1090 ire_delete_host_redirects_v6(const in6_addr_t *gateway) 1091 { 1092 irb_t *irb_ptr; 1093 irb_t *irb; 1094 ire_t *ire; 1095 in6_addr_t gw_addr_v6; 1096 int i; 1097 1098 /* get the hash table for HOST routes */ 1099 irb_ptr = ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)]; 1100 if (irb_ptr == NULL) 1101 return; 1102 for (i = 0; (i < ip6_ftable_hash_size); i++) { 1103 irb = &irb_ptr[i]; 1104 IRB_REFHOLD(irb); 1105 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 1106 if (ire->ire_type != IRE_HOST_REDIRECT) 1107 continue; 1108 mutex_enter(&ire->ire_lock); 1109 gw_addr_v6 = ire->ire_gateway_addr_v6; 1110 mutex_exit(&ire->ire_lock); 1111 if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) 1112 ire_delete(ire); 1113 } 1114 IRB_REFRELE(irb); 1115 } 1116 } 1117 1118 /* 1119 * Delete all the cache entries with this 'addr'. This is the IPv6 counterpart 1120 * of ip_ire_clookup_and_delete. The difference being this function does not 1121 * return any value. IPv6 processing of a gratuitous ARP, as it stands, is 1122 * different than IPv4 in that, regardless of the presence of a cache entry 1123 * for this address, an ire_walk_v6 is done. Another difference is that unlike 1124 * in the case of IPv4 this does not take an ipif_t argument, since it is only 1125 * called by ip_arp_news and the match is always only on the address. 1126 */ 1127 void 1128 ip_ire_clookup_and_delete_v6(const in6_addr_t *addr) 1129 { 1130 irb_t *irb; 1131 ire_t *cire; 1132 boolean_t found = B_FALSE; 1133 1134 irb = &ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, ip6_cache_table_size)]; 1135 IRB_REFHOLD(irb); 1136 for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) { 1137 if (cire->ire_marks == IRE_MARK_CONDEMNED) 1138 continue; 1139 if (IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, addr)) { 1140 1141 /* This signifies start of a match */ 1142 if (!found) 1143 found = B_TRUE; 1144 if (cire->ire_type == IRE_CACHE) { 1145 if (cire->ire_nce != NULL) 1146 ndp_delete(cire->ire_nce); 1147 ire_delete_v6(cire); 1148 } 1149 /* End of the match */ 1150 } else if (found) 1151 break; 1152 } 1153 IRB_REFRELE(irb); 1154 } 1155 1156 /* 1157 * Delete the specified IRE. 1158 * All calls should use ire_delete(). 1159 * Sometimes called as writer though not required by this function. 1160 * 1161 * NOTE : This function is called only if the ire was added 1162 * in the list. 1163 */ 1164 void 1165 ire_delete_v6(ire_t *ire) 1166 { 1167 in6_addr_t gw_addr_v6; 1168 1169 ASSERT(ire->ire_refcnt >= 1); 1170 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1171 1172 if (ire->ire_type != IRE_CACHE) 1173 ire_flush_cache_v6(ire, IRE_FLUSH_DELETE); 1174 if (ire->ire_type == IRE_DEFAULT) { 1175 /* 1176 * when a default gateway is going away 1177 * delete all the host redirects pointing at that 1178 * gateway. 1179 */ 1180 mutex_enter(&ire->ire_lock); 1181 gw_addr_v6 = ire->ire_gateway_addr_v6; 1182 mutex_exit(&ire->ire_lock); 1183 ire_delete_host_redirects_v6(&gw_addr_v6); 1184 } 1185 } 1186 1187 /* 1188 * ire_walk routine to delete all IRE_CACHE and IRE_HOST_REDIRECT 1189 * entries. 1190 */ 1191 /*ARGSUSED1*/ 1192 void 1193 ire_delete_cache_v6(ire_t *ire, char *arg) 1194 { 1195 char addrstr1[INET6_ADDRSTRLEN]; 1196 char addrstr2[INET6_ADDRSTRLEN]; 1197 1198 if (ire->ire_type & (IRE_CACHE | IRE_HOST_REDIRECT)) { 1199 ip1dbg(("ire_delete_cache_v6: deleted %s type %d through %s\n", 1200 inet_ntop(AF_INET6, &ire->ire_addr_v6, 1201 addrstr1, sizeof (addrstr1)), 1202 ire->ire_type, 1203 inet_ntop(AF_INET6, &ire->ire_gateway_addr_v6, 1204 addrstr2, sizeof (addrstr2)))); 1205 ire_delete(ire); 1206 } 1207 1208 } 1209 1210 /* 1211 * ire_walk routine to delete all IRE_CACHE/IRE_HOST_REDIRECT entries 1212 * that have a given gateway address. 1213 */ 1214 void 1215 ire_delete_cache_gw_v6(ire_t *ire, char *addr) 1216 { 1217 in6_addr_t *gw_addr = (in6_addr_t *)addr; 1218 char buf1[INET6_ADDRSTRLEN]; 1219 char buf2[INET6_ADDRSTRLEN]; 1220 in6_addr_t ire_gw_addr_v6; 1221 1222 if (!(ire->ire_type & (IRE_CACHE|IRE_HOST_REDIRECT))) 1223 return; 1224 1225 mutex_enter(&ire->ire_lock); 1226 ire_gw_addr_v6 = ire->ire_gateway_addr_v6; 1227 mutex_exit(&ire->ire_lock); 1228 1229 if (IN6_ARE_ADDR_EQUAL(&ire_gw_addr_v6, gw_addr)) { 1230 ip1dbg(("ire_delete_cache_gw_v6: deleted %s type %d to %s\n", 1231 inet_ntop(AF_INET6, &ire->ire_src_addr_v6, 1232 buf1, sizeof (buf1)), 1233 ire->ire_type, 1234 inet_ntop(AF_INET6, &ire_gw_addr_v6, 1235 buf2, sizeof (buf2)))); 1236 ire_delete(ire); 1237 } 1238 } 1239 1240 /* 1241 * Remove all IRE_CACHE entries that match 1242 * the ire specified. (Sometimes called 1243 * as writer though not required by this function.) 1244 * 1245 * The flag argument indicates if the 1246 * flush request is due to addition 1247 * of new route (IRE_FLUSH_ADD) or deletion of old 1248 * route (IRE_FLUSH_DELETE). 1249 * 1250 * This routine takes only the IREs from the forwarding 1251 * table and flushes the corresponding entries from 1252 * the cache table. 1253 * 1254 * When flushing due to the deletion of an old route, it 1255 * just checks the cache handles (ire_phandle and ire_ihandle) and 1256 * deletes the ones that match. 1257 * 1258 * When flushing due to the creation of a new route, it checks 1259 * if a cache entry's address matches the one in the IRE and 1260 * that the cache entry's parent has a less specific mask than the 1261 * one in IRE. The destination of such a cache entry could be the 1262 * gateway for other cache entries, so we need to flush those as 1263 * well by looking for gateway addresses matching the IRE's address. 1264 */ 1265 void 1266 ire_flush_cache_v6(ire_t *ire, int flag) 1267 { 1268 int i; 1269 ire_t *cire; 1270 irb_t *irb; 1271 1272 if (ire->ire_type & IRE_CACHE) 1273 return; 1274 1275 /* 1276 * If a default is just created, there is no point 1277 * in going through the cache, as there will not be any 1278 * cached ires. 1279 */ 1280 if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD) 1281 return; 1282 if (flag == IRE_FLUSH_ADD) { 1283 /* 1284 * This selective flush is 1285 * due to the addition of 1286 * new IRE. 1287 */ 1288 for (i = 0; i < ip6_cache_table_size; i++) { 1289 irb = &ip_cache_table_v6[i]; 1290 if ((cire = irb->irb_ire) == NULL) 1291 continue; 1292 IRB_REFHOLD(irb); 1293 for (cire = irb->irb_ire; cire != NULL; 1294 cire = cire->ire_next) { 1295 if (cire->ire_type != IRE_CACHE) 1296 continue; 1297 /* 1298 * If 'cire' belongs to the same subnet 1299 * as the new ire being added, and 'cire' 1300 * is derived from a prefix that is less 1301 * specific than the new ire being added, 1302 * we need to flush 'cire'; for instance, 1303 * when a new interface comes up. 1304 */ 1305 if ((V6_MASK_EQ_2(cire->ire_addr_v6, 1306 ire->ire_mask_v6, ire->ire_addr_v6) && 1307 (ip_mask_to_plen_v6(&cire->ire_cmask_v6) <= 1308 ire->ire_masklen))) { 1309 ire_delete(cire); 1310 continue; 1311 } 1312 /* 1313 * This is the case when the ire_gateway_addr 1314 * of 'cire' belongs to the same subnet as 1315 * the new ire being added. 1316 * Flushing such ires is sometimes required to 1317 * avoid misrouting: say we have a machine with 1318 * two interfaces (I1 and I2), a default router 1319 * R on the I1 subnet, and a host route to an 1320 * off-link destination D with a gateway G on 1321 * the I2 subnet. 1322 * Under normal operation, we will have an 1323 * on-link cache entry for G and an off-link 1324 * cache entry for D with G as ire_gateway_addr, 1325 * traffic to D will reach its destination 1326 * through gateway G. 1327 * If the administrator does 'ifconfig I2 down', 1328 * the cache entries for D and G will be 1329 * flushed. However, G will now be resolved as 1330 * an off-link destination using R (the default 1331 * router) as gateway. Then D will also be 1332 * resolved as an off-link destination using G 1333 * as gateway - this behavior is due to 1334 * compatibility reasons, see comment in 1335 * ire_ihandle_lookup_offlink(). Traffic to D 1336 * will go to the router R and probably won't 1337 * reach the destination. 1338 * The administrator then does 'ifconfig I2 up'. 1339 * Since G is on the I2 subnet, this routine 1340 * will flush its cache entry. It must also 1341 * flush the cache entry for D, otherwise 1342 * traffic will stay misrouted until the IRE 1343 * times out. 1344 */ 1345 if (V6_MASK_EQ_2(cire->ire_gateway_addr_v6, 1346 ire->ire_mask_v6, ire->ire_addr_v6)) { 1347 ire_delete(cire); 1348 continue; 1349 } 1350 } 1351 IRB_REFRELE(irb); 1352 } 1353 } else { 1354 /* 1355 * delete the cache entries based on 1356 * handle in the IRE as this IRE is 1357 * being deleted/changed. 1358 */ 1359 for (i = 0; i < ip6_cache_table_size; i++) { 1360 irb = &ip_cache_table_v6[i]; 1361 if ((cire = irb->irb_ire) == NULL) 1362 continue; 1363 IRB_REFHOLD(irb); 1364 for (cire = irb->irb_ire; cire != NULL; 1365 cire = cire->ire_next) { 1366 if (cire->ire_type != IRE_CACHE) 1367 continue; 1368 if ((cire->ire_phandle == 0 || 1369 cire->ire_phandle != ire->ire_phandle) && 1370 (cire->ire_ihandle == 0 || 1371 cire->ire_ihandle != ire->ire_ihandle)) 1372 continue; 1373 ire_delete(cire); 1374 } 1375 IRB_REFRELE(irb); 1376 } 1377 } 1378 } 1379 1380 /* 1381 * Matches the arguments passed with the values in the ire. 1382 * 1383 * Note: for match types that match using "ipif" passed in, ipif 1384 * must be checked for non-NULL before calling this routine. 1385 */ 1386 static boolean_t 1387 ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask, 1388 const in6_addr_t *gateway, int type, ipif_t *ipif, zoneid_t zoneid, 1389 uint32_t ihandle, int match_flags) 1390 { 1391 in6_addr_t masked_addr; 1392 in6_addr_t gw_addr_v6; 1393 ill_t *ire_ill = NULL, *dst_ill; 1394 ill_t *ipif_ill = NULL; 1395 ill_group_t *ire_ill_group = NULL; 1396 ill_group_t *ipif_ill_group = NULL; 1397 ipif_t *src_ipif; 1398 1399 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1400 ASSERT(addr != NULL); 1401 ASSERT(mask != NULL); 1402 ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL); 1403 ASSERT((!(match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP))) || 1404 (ipif != NULL && ipif->ipif_isv6)); 1405 ASSERT(!(match_flags & MATCH_IRE_WQ)); 1406 1407 /* 1408 * HIDDEN cache entries have to be looked up specifically with 1409 * MATCH_IRE_MARK_HIDDEN. MATCH_IRE_MARK_HIDDEN is usually set 1410 * when the interface is FAILED or INACTIVE. In that case, 1411 * any IRE_CACHES that exists should be marked with 1412 * IRE_MARK_HIDDEN. So, we don't really need to match below 1413 * for IRE_MARK_HIDDEN. But we do so for consistency. 1414 */ 1415 if (!(match_flags & MATCH_IRE_MARK_HIDDEN) && 1416 (ire->ire_marks & IRE_MARK_HIDDEN)) 1417 return (B_FALSE); 1418 1419 if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid) { 1420 /* 1421 * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is 1422 * valid and does not match that of ire_zoneid, a failure to 1423 * match is reported at this point. Otherwise, since some IREs 1424 * that are available in the global zone can be used in local 1425 * zones, additional checks need to be performed: 1426 * 1427 * IRE_CACHE and IRE_LOOPBACK entries should 1428 * never be matched in this situation. 1429 * 1430 * IRE entries that have an interface associated with them 1431 * should in general not match unless they are an IRE_LOCAL 1432 * or in the case when MATCH_IRE_DEFAULT has been set in 1433 * the caller. In the case of the former, checking of the 1434 * other fields supplied should take place. 1435 * 1436 * In the case where MATCH_IRE_DEFAULT has been set, 1437 * all of the ipif's associated with the IRE's ill are 1438 * checked to see if there is a matching zoneid. If any 1439 * one ipif has a matching zoneid, this IRE is a 1440 * potential candidate so checking of the other fields 1441 * takes place. 1442 * 1443 * In the case where the IRE_INTERFACE has a usable source 1444 * address (indicated by ill_usesrc_ifindex) in the 1445 * correct zone then it's permitted to return this IRE 1446 */ 1447 if (match_flags & MATCH_IRE_ZONEONLY) 1448 return (B_FALSE); 1449 if (ire->ire_type & (IRE_CACHE | IRE_LOOPBACK)) 1450 return (B_FALSE); 1451 /* 1452 * Note, IRE_INTERFACE can have the stq as NULL. For 1453 * example, if the default multicast route is tied to 1454 * the loopback address. 1455 */ 1456 if ((ire->ire_type & IRE_INTERFACE) && 1457 (ire->ire_stq != NULL)) { 1458 dst_ill = (ill_t *)ire->ire_stq->q_ptr; 1459 /* 1460 * If there is a usable source address in the 1461 * zone, then it's ok to return an 1462 * IRE_INTERFACE 1463 */ 1464 if ((dst_ill->ill_usesrc_ifindex != 0) && 1465 (src_ipif = ipif_select_source_v6(dst_ill, addr, 1466 B_FALSE, IPV6_PREFER_SRC_DEFAULT, zoneid)) 1467 != NULL) { 1468 ip3dbg(("ire_match_args: src_ipif %p" 1469 " dst_ill %p", (void *)src_ipif, 1470 (void *)dst_ill)); 1471 ipif_refrele(src_ipif); 1472 } else { 1473 ip3dbg(("ire_match_args: src_ipif NULL" 1474 " dst_ill %p\n", (void *)dst_ill)); 1475 return (B_FALSE); 1476 } 1477 } 1478 if (ire->ire_ipif != NULL && ire->ire_type != IRE_LOCAL && 1479 !(ire->ire_type & IRE_INTERFACE)) { 1480 ipif_t *tipif; 1481 1482 if ((match_flags & MATCH_IRE_DEFAULT) == 0) 1483 return (B_FALSE); 1484 mutex_enter(&ire->ire_ipif->ipif_ill->ill_lock); 1485 for (tipif = ire->ire_ipif->ipif_ill->ill_ipif; 1486 tipif != NULL; tipif = tipif->ipif_next) { 1487 if (IPIF_CAN_LOOKUP(tipif) && 1488 (tipif->ipif_flags & IPIF_UP) && 1489 (tipif->ipif_zoneid == zoneid)) 1490 break; 1491 } 1492 mutex_exit(&ire->ire_ipif->ipif_ill->ill_lock); 1493 if (tipif == NULL) 1494 return (B_FALSE); 1495 } 1496 } 1497 1498 if (match_flags & MATCH_IRE_GW) { 1499 mutex_enter(&ire->ire_lock); 1500 gw_addr_v6 = ire->ire_gateway_addr_v6; 1501 mutex_exit(&ire->ire_lock); 1502 } 1503 /* 1504 * For IRE_CACHES, MATCH_IRE_ILL/ILL_GROUP really means that 1505 * somebody wants to send out on a particular interface which 1506 * is given by ire_stq and hence use ire_stq to derive the ill 1507 * value. ire_ipif for IRE_CACHES is just the 1508 * means of getting a source address i.e ire_src_addr_v6 = 1509 * ire->ire_ipif->ipif_src_addr_v6. 1510 */ 1511 if (match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP)) { 1512 ire_ill = ire_to_ill(ire); 1513 if (ire_ill != NULL) 1514 ire_ill_group = ire_ill->ill_group; 1515 ipif_ill = ipif->ipif_ill; 1516 ipif_ill_group = ipif_ill->ill_group; 1517 } 1518 1519 /* No ire_addr_v6 bits set past the mask */ 1520 ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6, 1521 ire->ire_addr_v6)); 1522 V6_MASK_COPY(*addr, *mask, masked_addr); 1523 1524 if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) && 1525 ((!(match_flags & MATCH_IRE_GW)) || 1526 IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) && 1527 ((!(match_flags & MATCH_IRE_TYPE)) || 1528 (ire->ire_type & type)) && 1529 ((!(match_flags & MATCH_IRE_SRC)) || 1530 IN6_ARE_ADDR_EQUAL(&ire->ire_src_addr_v6, 1531 &ipif->ipif_v6src_addr)) && 1532 ((!(match_flags & MATCH_IRE_IPIF)) || 1533 (ire->ire_ipif == ipif)) && 1534 ((!(match_flags & MATCH_IRE_MARK_HIDDEN)) || 1535 (ire->ire_type != IRE_CACHE || 1536 ire->ire_marks & IRE_MARK_HIDDEN)) && 1537 ((!(match_flags & MATCH_IRE_ILL)) || 1538 (ire_ill == ipif_ill)) && 1539 ((!(match_flags & MATCH_IRE_IHANDLE)) || 1540 (ire->ire_ihandle == ihandle)) && 1541 ((!(match_flags & MATCH_IRE_ILL_GROUP)) || 1542 (ire_ill == ipif_ill) || 1543 (ire_ill_group != NULL && 1544 ire_ill_group == ipif_ill_group))) { 1545 /* We found the matched IRE */ 1546 return (B_TRUE); 1547 } 1548 return (B_FALSE); 1549 } 1550 1551 /* 1552 * Lookup for a route in all the tables 1553 */ 1554 ire_t * 1555 ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1556 const in6_addr_t *gateway, int type, ipif_t *ipif, ire_t **pire, 1557 zoneid_t zoneid, int flags) 1558 { 1559 ire_t *ire = NULL; 1560 1561 /* 1562 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1563 * MATCH_IRE_ILL is set. 1564 */ 1565 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1566 (ipif == NULL)) 1567 return (NULL); 1568 1569 /* 1570 * might be asking for a cache lookup, 1571 * This is not best way to lookup cache, 1572 * user should call ire_cache_lookup directly. 1573 * 1574 * If MATCH_IRE_TYPE was set, first lookup in the cache table and then 1575 * in the forwarding table, if the applicable type flags were set. 1576 */ 1577 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) { 1578 ire = ire_ctable_lookup_v6(addr, gateway, type, ipif, zoneid, 1579 flags); 1580 if (ire != NULL) 1581 return (ire); 1582 } 1583 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) { 1584 ire = ire_ftable_lookup_v6(addr, mask, gateway, type, ipif, 1585 pire, zoneid, 0, flags); 1586 } 1587 return (ire); 1588 } 1589 1590 /* 1591 * Lookup a route in forwarding table. 1592 * specific lookup is indicated by passing the 1593 * required parameters and indicating the 1594 * match required in flag field. 1595 * 1596 * Looking for default route can be done in three ways 1597 * 1) pass mask as ipv6_all_zeros and set MATCH_IRE_MASK in flags field 1598 * along with other matches. 1599 * 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags 1600 * field along with other matches. 1601 * 3) if the destination and mask are passed as zeros. 1602 * 1603 * A request to return a default route if no route 1604 * is found, can be specified by setting MATCH_IRE_DEFAULT 1605 * in flags. 1606 * 1607 * It does not support recursion more than one level. It 1608 * will do recursive lookup only when the lookup maps to 1609 * a prefix or default route and MATCH_IRE_RECURSIVE flag is passed. 1610 * 1611 * If the routing table is setup to allow more than one level 1612 * of recursion, the cleaning up cache table will not work resulting 1613 * in invalid routing. 1614 * 1615 * Supports link-local addresses by following the ipif/ill when recursing. 1616 * 1617 * NOTE : When this function returns NULL, pire has already been released. 1618 * pire is valid only when this function successfully returns an 1619 * ire. 1620 */ 1621 ire_t * 1622 ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1623 const in6_addr_t *gateway, int type, ipif_t *ipif, ire_t **pire, 1624 zoneid_t zoneid, uint32_t ihandle, int flags) 1625 { 1626 irb_t *irb_ptr; 1627 ire_t *rire; 1628 ire_t *ire = NULL; 1629 ire_t *saved_ire; 1630 nce_t *nce; 1631 int i; 1632 in6_addr_t gw_addr_v6; 1633 1634 ASSERT(addr != NULL); 1635 ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL); 1636 ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 1637 ASSERT(ipif == NULL || ipif->ipif_isv6); 1638 ASSERT(!(flags & MATCH_IRE_WQ)); 1639 1640 /* 1641 * When we return NULL from this function, we should make 1642 * sure that *pire is NULL so that the callers will not 1643 * wrongly REFRELE the pire. 1644 */ 1645 if (pire != NULL) 1646 *pire = NULL; 1647 /* 1648 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 1649 * MATCH_IRE_ILL is set. 1650 */ 1651 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 1652 (ipif == NULL)) 1653 return (NULL); 1654 1655 /* 1656 * If the mask is known, the lookup 1657 * is simple, if the mask is not known 1658 * we need to search. 1659 */ 1660 if (flags & MATCH_IRE_MASK) { 1661 uint_t masklen; 1662 1663 masklen = ip_mask_to_plen_v6(mask); 1664 if (ip_forwarding_table_v6[masklen] == NULL) 1665 return (NULL); 1666 irb_ptr = &(ip_forwarding_table_v6[masklen][ 1667 IRE_ADDR_MASK_HASH_V6(*addr, *mask, ip6_ftable_hash_size)]); 1668 rw_enter(&irb_ptr->irb_lock, RW_READER); 1669 for (ire = irb_ptr->irb_ire; ire != NULL; 1670 ire = ire->ire_next) { 1671 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1672 continue; 1673 if (ire_match_args_v6(ire, addr, mask, gateway, type, 1674 ipif, zoneid, ihandle, flags)) 1675 goto found_ire; 1676 } 1677 rw_exit(&irb_ptr->irb_lock); 1678 } else { 1679 /* 1680 * In this case we don't know the mask, we need to 1681 * search the table assuming different mask sizes. 1682 * we start with 128 bit mask, we don't allow default here. 1683 */ 1684 for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) { 1685 in6_addr_t tmpmask; 1686 1687 if ((ip_forwarding_table_v6[i]) == NULL) 1688 continue; 1689 (void) ip_plen_to_mask_v6(i, &tmpmask); 1690 irb_ptr = &ip_forwarding_table_v6[i][ 1691 IRE_ADDR_MASK_HASH_V6(*addr, tmpmask, 1692 ip6_ftable_hash_size)]; 1693 rw_enter(&irb_ptr->irb_lock, RW_READER); 1694 for (ire = irb_ptr->irb_ire; ire != NULL; 1695 ire = ire->ire_next) { 1696 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1697 continue; 1698 if (ire_match_args_v6(ire, addr, 1699 &ire->ire_mask_v6, gateway, type, ipif, 1700 zoneid, ihandle, flags)) 1701 goto found_ire; 1702 } 1703 rw_exit(&irb_ptr->irb_lock); 1704 } 1705 } 1706 1707 /* 1708 * We come here if no route has yet been found. 1709 * 1710 * Handle the case where default route is 1711 * requested by specifying type as one of the possible 1712 * types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE). 1713 * 1714 * If MATCH_IRE_MASK is specified, then the appropriate default route 1715 * would have been found above if it exists so it isn't looked up here. 1716 * If MATCH_IRE_DEFAULT was also specified, then a default route will be 1717 * searched for later. 1718 */ 1719 if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE && 1720 (type & (IRE_DEFAULT | IRE_INTERFACE))) { 1721 if (ip_forwarding_table_v6[0] != NULL) { 1722 /* addr & mask is zero for defaults */ 1723 irb_ptr = &ip_forwarding_table_v6[0][ 1724 IRE_ADDR_HASH_V6(ipv6_all_zeros, 1725 ip6_ftable_hash_size)]; 1726 rw_enter(&irb_ptr->irb_lock, RW_READER); 1727 for (ire = irb_ptr->irb_ire; ire != NULL; 1728 ire = ire->ire_next) { 1729 1730 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1731 continue; 1732 1733 if (ire_match_args_v6(ire, addr, 1734 &ipv6_all_zeros, gateway, type, ipif, 1735 zoneid, ihandle, flags)) 1736 goto found_ire; 1737 } 1738 rw_exit(&irb_ptr->irb_lock); 1739 } 1740 } 1741 /* 1742 * We come here only if no route is found. 1743 * see if the default route can be used which is allowed 1744 * only if the default matching criteria is specified. 1745 * The ipv6_ire_default_count tracks the number of IRE_DEFAULT 1746 * entries. However, the ip_forwarding_table_v6[0] also contains 1747 * interface routes thus the count can be zero. 1748 */ 1749 saved_ire = NULL; 1750 if ((flags & (MATCH_IRE_DEFAULT | MATCH_IRE_MASK)) == 1751 MATCH_IRE_DEFAULT) { 1752 ire_t *ire_origin; 1753 uint_t g_index; 1754 uint_t index; 1755 1756 if (ip_forwarding_table_v6[0] == NULL) 1757 return (NULL); 1758 irb_ptr = &(ip_forwarding_table_v6[0])[0]; 1759 1760 /* 1761 * Keep a tab on the bucket while looking the IRE_DEFAULT 1762 * entries. We need to keep track of a particular IRE 1763 * (ire_origin) so this ensures that it will not be unlinked 1764 * from the hash list during the recursive lookup below. 1765 */ 1766 IRB_REFHOLD(irb_ptr); 1767 ire = irb_ptr->irb_ire; 1768 if (ire == NULL) { 1769 IRB_REFRELE(irb_ptr); 1770 return (NULL); 1771 } 1772 1773 /* 1774 * Get the index first, since it can be changed by other 1775 * threads. Then get to the right default route skipping 1776 * default interface routes if any. As we hold a reference on 1777 * the IRE bucket, ipv6_ire_default_count can only increase so 1778 * we can't reach the end of the hash list unexpectedly. 1779 */ 1780 if (ipv6_ire_default_count != 0) { 1781 g_index = ipv6_ire_default_index++; 1782 index = g_index % ipv6_ire_default_count; 1783 while (index != 0) { 1784 if (!(ire->ire_type & IRE_INTERFACE)) 1785 index--; 1786 ire = ire->ire_next; 1787 } 1788 ASSERT(ire != NULL); 1789 } else { 1790 /* 1791 * No default route, so we only have default interface 1792 * routes: don't enter the first loop. 1793 */ 1794 ire = NULL; 1795 } 1796 1797 /* 1798 * Round-robin the default routers list looking for a neighbor 1799 * that matches the passed in parameters and is reachable. If 1800 * none found, just return a route from the default router list 1801 * if it exists. If we can't find a default route (IRE_DEFAULT), 1802 * look for interface default routes. 1803 * We start with the ire we found above and we walk the hash 1804 * list until we're back where we started, see 1805 * ire_get_next_default_ire(). It doesn't matter if default 1806 * routes are added or deleted by other threads - we know this 1807 * ire will stay in the list because we hold a reference on the 1808 * ire bucket. 1809 * NB: if we only have interface default routes, ire is NULL so 1810 * we don't even enter this loop (see above). 1811 */ 1812 ire_origin = ire; 1813 for (; ire != NULL; 1814 ire = ire_get_next_default_ire(ire, ire_origin)) { 1815 1816 if (ire_match_args_v6(ire, addr, 1817 &ipv6_all_zeros, gateway, type, ipif, 1818 zoneid, ihandle, flags)) { 1819 int match_flags; 1820 1821 /* 1822 * We have something to work with. 1823 * If we can find a resolved/reachable 1824 * entry, we will use this. Otherwise 1825 * we'll try to find an entry that has 1826 * a resolved cache entry. We will fallback 1827 * on this if we don't find anything else. 1828 */ 1829 if (saved_ire == NULL) 1830 saved_ire = ire; 1831 mutex_enter(&ire->ire_lock); 1832 gw_addr_v6 = ire->ire_gateway_addr_v6; 1833 mutex_exit(&ire->ire_lock); 1834 match_flags = MATCH_IRE_ILL_GROUP; 1835 rire = ire_ctable_lookup_v6(&gw_addr_v6, NULL, 1836 0, ire->ire_ipif, zoneid, match_flags); 1837 if (rire != NULL) { 1838 nce = rire->ire_nce; 1839 if (nce != NULL && 1840 NCE_ISREACHABLE(nce) && 1841 nce->nce_flags & NCE_F_ISROUTER) { 1842 ire_refrele(rire); 1843 IRE_REFHOLD(ire); 1844 IRB_REFRELE(irb_ptr); 1845 goto found_ire_held; 1846 } else if (nce != NULL && 1847 !(nce->nce_flags & 1848 NCE_F_ISROUTER)) { 1849 /* 1850 * Make sure we don't use 1851 * this ire 1852 */ 1853 if (saved_ire == ire) 1854 saved_ire = NULL; 1855 } 1856 ire_refrele(rire); 1857 } else if (ipv6_ire_default_count > 1 && 1858 zoneid != ALL_ZONES) { 1859 /* 1860 * When we're in a local zone, we're 1861 * only interested in default routers 1862 * that are reachable through ipifs 1863 * within our zone. 1864 * The potentially expensive call to 1865 * ire_route_lookup_v6() is avoided when 1866 * we have only one default route. 1867 */ 1868 rire = ire_route_lookup_v6(&gw_addr_v6, 1869 NULL, NULL, 0, ire->ire_ipif, NULL, 1870 zoneid, match_flags); 1871 if (rire != NULL) { 1872 ire_refrele(rire); 1873 saved_ire = ire; 1874 } else if (saved_ire == ire) { 1875 /* 1876 * Make sure we don't use 1877 * this ire 1878 */ 1879 saved_ire = NULL; 1880 } 1881 } 1882 } 1883 } 1884 if (saved_ire != NULL) { 1885 ire = saved_ire; 1886 IRE_REFHOLD(ire); 1887 IRB_REFRELE(irb_ptr); 1888 goto found_ire_held; 1889 } else { 1890 /* 1891 * Look for a interface default route matching the 1892 * args passed in. No round robin here. Just pick 1893 * the right one. 1894 */ 1895 for (ire = irb_ptr->irb_ire; ire != NULL; 1896 ire = ire->ire_next) { 1897 1898 if (!(ire->ire_type & IRE_INTERFACE)) 1899 continue; 1900 1901 if (ire->ire_marks & IRE_MARK_CONDEMNED) 1902 continue; 1903 1904 if (ire_match_args_v6(ire, addr, 1905 &ipv6_all_zeros, gateway, type, ipif, 1906 zoneid, ihandle, flags)) { 1907 IRE_REFHOLD(ire); 1908 IRB_REFRELE(irb_ptr); 1909 goto found_ire_held; 1910 } 1911 } 1912 IRB_REFRELE(irb_ptr); 1913 } 1914 } 1915 ASSERT(ire == NULL); 1916 ip1dbg(("ire_ftable_lookup_v6: returning NULL ire")); 1917 return (NULL); 1918 found_ire: 1919 ASSERT((ire->ire_marks & IRE_MARK_CONDEMNED) == 0); 1920 IRE_REFHOLD(ire); 1921 rw_exit(&irb_ptr->irb_lock); 1922 1923 found_ire_held: 1924 if ((flags & MATCH_IRE_RJ_BHOLE) && 1925 (ire->ire_flags & (RTF_BLACKHOLE | RTF_REJECT))) { 1926 return (ire); 1927 } 1928 /* 1929 * At this point, IRE that was found must be an IRE_FORWARDTABLE 1930 * or IRE_CACHETABLE type. If this is a recursive lookup and an 1931 * IRE_INTERFACE type was found, return that. If it was some other 1932 * IRE_FORWARDTABLE type of IRE (one of the prefix types), then it 1933 * is necessary to fill in the parent IRE pointed to by pire, and 1934 * then lookup the gateway address of the parent. For backwards 1935 * compatiblity, if this lookup returns an 1936 * IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level 1937 * of lookup is done. 1938 */ 1939 if (flags & MATCH_IRE_RECURSIVE) { 1940 ipif_t *gw_ipif; 1941 int match_flags = MATCH_IRE_DSTONLY; 1942 1943 if (ire->ire_type & IRE_INTERFACE) 1944 return (ire); 1945 if (pire != NULL) 1946 *pire = ire; 1947 /* 1948 * If we can't find an IRE_INTERFACE or the caller has not 1949 * asked for pire, we need to REFRELE the saved_ire. 1950 */ 1951 saved_ire = ire; 1952 1953 /* 1954 * Currently MATCH_IRE_ILL is never used with 1955 * (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT) while 1956 * sending out packets as MATCH_IRE_ILL is used only 1957 * for communicating with on-link hosts. We can't assert 1958 * that here as RTM_GET calls this function with 1959 * MATCH_IRE_ILL | MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE. 1960 * We have already used the MATCH_IRE_ILL in determining 1961 * the right prefix route at this point. To match the 1962 * behavior of how we locate routes while sending out 1963 * packets, we don't want to use MATCH_IRE_ILL below 1964 * while locating the interface route. 1965 */ 1966 if (ire->ire_ipif != NULL) 1967 match_flags |= MATCH_IRE_ILL_GROUP; 1968 1969 mutex_enter(&ire->ire_lock); 1970 gw_addr_v6 = ire->ire_gateway_addr_v6; 1971 mutex_exit(&ire->ire_lock); 1972 1973 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 0, 1974 ire->ire_ipif, NULL, zoneid, match_flags); 1975 if (ire == NULL) { 1976 /* 1977 * In this case we have to deal with the 1978 * MATCH_IRE_PARENT flag, which means the 1979 * parent has to be returned if ire is NULL. 1980 * The aim of this is to have (at least) a starting 1981 * ire when we want to look at all of the ires in a 1982 * bucket aimed at a single destination (as is the 1983 * case in ip_newroute_v6 for the RTF_MULTIRT 1984 * flagged routes). 1985 */ 1986 if (flags & MATCH_IRE_PARENT) { 1987 if (pire != NULL) { 1988 /* 1989 * Need an extra REFHOLD, if the 1990 * parent ire is returned via both 1991 * ire and pire. 1992 */ 1993 IRE_REFHOLD(saved_ire); 1994 } 1995 ire = saved_ire; 1996 } else { 1997 ire_refrele(saved_ire); 1998 if (pire != NULL) 1999 *pire = NULL; 2000 } 2001 return (ire); 2002 } 2003 if (ire->ire_type & (IRE_CACHETABLE | IRE_INTERFACE)) { 2004 /* 2005 * If the caller did not ask for pire, release 2006 * it now. 2007 */ 2008 if (pire == NULL) { 2009 ire_refrele(saved_ire); 2010 } 2011 return (ire); 2012 } 2013 match_flags |= MATCH_IRE_TYPE; 2014 mutex_enter(&ire->ire_lock); 2015 gw_addr_v6 = ire->ire_gateway_addr_v6; 2016 mutex_exit(&ire->ire_lock); 2017 gw_ipif = ire->ire_ipif; 2018 ire_refrele(ire); 2019 ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 2020 (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid, 2021 match_flags); 2022 if (ire == NULL) { 2023 /* 2024 * In this case we have to deal with the 2025 * MATCH_IRE_PARENT flag, which means the 2026 * parent has to be returned if ire is NULL. 2027 * The aim of this is to have (at least) a starting 2028 * ire when we want to look at all of the ires in a 2029 * bucket aimed at a single destination (as is the 2030 * case in ip_newroute_v6 for the RTF_MULTIRT 2031 * flagged routes). 2032 */ 2033 if (flags & MATCH_IRE_PARENT) { 2034 if (pire != NULL) { 2035 /* 2036 * Need an extra REFHOLD, if the 2037 * parent ire is returned via both 2038 * ire and pire. 2039 */ 2040 IRE_REFHOLD(saved_ire); 2041 } 2042 ire = saved_ire; 2043 } else { 2044 ire_refrele(saved_ire); 2045 if (pire != NULL) 2046 *pire = NULL; 2047 } 2048 return (ire); 2049 } else if (pire == NULL) { 2050 /* 2051 * If the caller did not ask for pire, release 2052 * it now. 2053 */ 2054 ire_refrele(saved_ire); 2055 } 2056 return (ire); 2057 } 2058 2059 ASSERT(pire == NULL || *pire == NULL); 2060 return (ire); 2061 } 2062 2063 /* 2064 * Looks up cache table for a route. 2065 * specific lookup can be indicated by 2066 * passing the MATCH_* flags and the 2067 * necessary parameters. 2068 */ 2069 ire_t * 2070 ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway, 2071 int type, ipif_t *ipif, zoneid_t zoneid, int flags) 2072 { 2073 ire_t *ire; 2074 irb_t *irb_ptr; 2075 ASSERT(addr != NULL); 2076 ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 2077 2078 /* 2079 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or 2080 * MATCH_IRE_ILL is set. 2081 */ 2082 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) && 2083 (ipif == NULL)) 2084 return (NULL); 2085 2086 irb_ptr = &ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 2087 ip6_cache_table_size)]; 2088 rw_enter(&irb_ptr->irb_lock, RW_READER); 2089 for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 2090 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2091 continue; 2092 2093 ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones)); 2094 if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, gateway, 2095 type, ipif, zoneid, 0, flags)) { 2096 IRE_REFHOLD(ire); 2097 rw_exit(&irb_ptr->irb_lock); 2098 return (ire); 2099 } 2100 } 2101 rw_exit(&irb_ptr->irb_lock); 2102 return (NULL); 2103 } 2104 2105 /* 2106 * Lookup cache. Don't return IRE_MARK_HIDDEN entries. Callers 2107 * should use ire_ctable_lookup with MATCH_IRE_MARK_HIDDEN to get 2108 * to the hidden ones. 2109 */ 2110 ire_t * 2111 ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid) 2112 { 2113 irb_t *irb_ptr; 2114 ire_t *ire; 2115 2116 irb_ptr = &ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, 2117 ip6_cache_table_size)]; 2118 rw_enter(&irb_ptr->irb_lock, RW_READER); 2119 for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) { 2120 if (ire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 2121 continue; 2122 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, addr)) { 2123 if (zoneid == ALL_ZONES || ire->ire_zoneid == zoneid || 2124 ire->ire_type == IRE_LOCAL) { 2125 IRE_REFHOLD(ire); 2126 rw_exit(&irb_ptr->irb_lock); 2127 return (ire); 2128 } 2129 } 2130 } 2131 rw_exit(&irb_ptr->irb_lock); 2132 return (NULL); 2133 } 2134 2135 /* 2136 * Locate the interface ire that is tied to the cache ire 'cire' via 2137 * cire->ire_ihandle. 2138 * 2139 * We are trying to create the cache ire for an onlink destn. or 2140 * gateway in 'cire'. We are called from ire_add_v6() in the IRE_IF_RESOLVER 2141 * case for xresolv interfaces, after the ire has come back from 2142 * an external resolver. 2143 */ 2144 static ire_t * 2145 ire_ihandle_lookup_onlink_v6(ire_t *cire) 2146 { 2147 ire_t *ire; 2148 int match_flags; 2149 int i; 2150 int j; 2151 irb_t *irb_ptr; 2152 2153 ASSERT(cire != NULL); 2154 2155 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2156 /* 2157 * We know that the mask of the interface ire equals cire->ire_cmask. 2158 * (When ip_newroute_v6() created 'cire' for an on-link destn. 2159 * it set its cmask from the interface ire's mask) 2160 */ 2161 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 2162 NULL, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle, 2163 match_flags); 2164 if (ire != NULL) 2165 return (ire); 2166 /* 2167 * If we didn't find an interface ire above, we can't declare failure. 2168 * For backwards compatibility, we need to support prefix routes 2169 * pointing to next hop gateways that are not on-link. 2170 * 2171 * In the resolver/noresolver case, ip_newroute_v6() thinks 2172 * it is creating the cache ire for an onlink destination in 'cire'. 2173 * But 'cire' is not actually onlink, because ire_ftable_lookup_v6() 2174 * cheated it, by doing ire_route_lookup_v6() twice and returning an 2175 * interface ire. 2176 * 2177 * Eg. default - gw1 (line 1) 2178 * gw1 - gw2 (line 2) 2179 * gw2 - hme0 (line 3) 2180 * 2181 * In the above example, ip_newroute_v6() tried to create the cache ire 2182 * 'cire' for gw1, based on the interface route in line 3. The 2183 * ire_ftable_lookup_v6() above fails, because there is 2184 * no interface route to reach gw1. (it is gw2). We fall thru below. 2185 * 2186 * Do a brute force search based on the ihandle in a subset of the 2187 * forwarding tables, corresponding to cire->ire_cmask_v6. Otherwise 2188 * things become very complex, since we don't have 'pire' in this 2189 * case. (Also note that this method is not possible in the offlink 2190 * case because we don't know the mask) 2191 */ 2192 i = ip_mask_to_plen_v6(&cire->ire_cmask_v6); 2193 if ((ip_forwarding_table_v6[i]) == NULL) 2194 return (NULL); 2195 for (j = 0; j < ip6_ftable_hash_size; j++) { 2196 irb_ptr = &ip_forwarding_table_v6[i][j]; 2197 rw_enter(&irb_ptr->irb_lock, RW_READER); 2198 for (ire = irb_ptr->irb_ire; ire != NULL; 2199 ire = ire->ire_next) { 2200 if (ire->ire_marks & IRE_MARK_CONDEMNED) 2201 continue; 2202 if ((ire->ire_type & IRE_INTERFACE) && 2203 (ire->ire_ihandle == cire->ire_ihandle)) { 2204 IRE_REFHOLD(ire); 2205 rw_exit(&irb_ptr->irb_lock); 2206 return (ire); 2207 } 2208 } 2209 rw_exit(&irb_ptr->irb_lock); 2210 } 2211 return (NULL); 2212 } 2213 2214 2215 /* 2216 * Locate the interface ire that is tied to the cache ire 'cire' via 2217 * cire->ire_ihandle. 2218 * 2219 * We are trying to create the cache ire for an offlink destn based 2220 * on the cache ire of the gateway in 'cire'. 'pire' is the prefix ire 2221 * as found by ip_newroute_v6(). We are called from ip_newroute_v6() in 2222 * the IRE_CACHE case. 2223 */ 2224 ire_t * 2225 ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire) 2226 { 2227 ire_t *ire; 2228 int match_flags; 2229 in6_addr_t gw_addr; 2230 ipif_t *gw_ipif; 2231 2232 ASSERT(cire != NULL && pire != NULL); 2233 2234 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK; 2235 /* 2236 * ip_newroute_v6 calls ire_ftable_lookup with MATCH_IRE_ILL only 2237 * for on-link hosts. We should never be here for onlink. 2238 * Thus, use MATCH_IRE_ILL_GROUP. 2239 */ 2240 if (pire->ire_ipif != NULL) 2241 match_flags |= MATCH_IRE_ILL_GROUP; 2242 /* 2243 * We know that the mask of the interface ire equals cire->ire_cmask. 2244 * (When ip_newroute_v6() created 'cire' for an on-link destn. it set 2245 * its cmask from the interface ire's mask) 2246 */ 2247 ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 0, 2248 IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle, 2249 match_flags); 2250 if (ire != NULL) 2251 return (ire); 2252 /* 2253 * If we didn't find an interface ire above, we can't declare failure. 2254 * For backwards compatibility, we need to support prefix routes 2255 * pointing to next hop gateways that are not on-link. 2256 * 2257 * Assume we are trying to ping some offlink destn, and we have the 2258 * routing table below. 2259 * 2260 * Eg. default - gw1 <--- pire (line 1) 2261 * gw1 - gw2 (line 2) 2262 * gw2 - hme0 (line 3) 2263 * 2264 * If we already have a cache ire for gw1 in 'cire', the 2265 * ire_ftable_lookup_v6 above would have failed, since there is no 2266 * interface ire to reach gw1. We will fallthru below. 2267 * 2268 * Here we duplicate the steps that ire_ftable_lookup_v6() did in 2269 * getting 'cire' from 'pire', in the MATCH_IRE_RECURSIVE case. 2270 * The differences are the following 2271 * i. We want the interface ire only, so we call 2272 * ire_ftable_lookup_v6() instead of ire_route_lookup_v6() 2273 * ii. We look for only prefix routes in the 1st call below. 2274 * ii. We want to match on the ihandle in the 2nd call below. 2275 */ 2276 match_flags = MATCH_IRE_TYPE; 2277 if (pire->ire_ipif != NULL) 2278 match_flags |= MATCH_IRE_ILL_GROUP; 2279 2280 mutex_enter(&pire->ire_lock); 2281 gw_addr = pire->ire_gateway_addr_v6; 2282 mutex_exit(&pire->ire_lock); 2283 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_OFFSUBNET, 2284 pire->ire_ipif, NULL, ALL_ZONES, 0, match_flags); 2285 if (ire == NULL) 2286 return (NULL); 2287 /* 2288 * At this point 'ire' corresponds to the entry shown in line 2. 2289 * gw_addr is 'gw2' in the example above. 2290 */ 2291 mutex_enter(&ire->ire_lock); 2292 gw_addr = ire->ire_gateway_addr_v6; 2293 mutex_exit(&ire->ire_lock); 2294 gw_ipif = ire->ire_ipif; 2295 ire_refrele(ire); 2296 2297 match_flags |= MATCH_IRE_IHANDLE; 2298 ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_INTERFACE, 2299 gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle, match_flags); 2300 return (ire); 2301 } 2302 2303 /* 2304 * Return the IRE_LOOPBACK, IRE_IF_RESOLVER or IRE_IF_NORESOLVER 2305 * ire associated with the specified ipif. 2306 * 2307 * This might occasionally be called when IPIF_UP is not set since 2308 * the IPV6_MULTICAST_IF as well as creating interface routes 2309 * allows specifying a down ipif (ipif_lookup* match ipifs that are down). 2310 * 2311 * Note that if IPIF_NOLOCAL, IPIF_NOXMIT, or IPIF_DEPRECATED is set on 2312 * the ipif this routine might return NULL. 2313 * (Sometimes called as writer though not required by this function.) 2314 */ 2315 ire_t * 2316 ipif_to_ire_v6(ipif_t *ipif) 2317 { 2318 ire_t *ire; 2319 2320 ASSERT(ipif->ipif_isv6); 2321 if (ipif->ipif_ire_type == IRE_LOOPBACK) { 2322 ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, NULL, 2323 IRE_LOOPBACK, ipif, ALL_ZONES, 2324 (MATCH_IRE_TYPE | MATCH_IRE_IPIF)); 2325 } else if (ipif->ipif_flags & IPIF_POINTOPOINT) { 2326 /* In this case we need to lookup destination address. */ 2327 ire = ire_ftable_lookup_v6(&ipif->ipif_v6pp_dst_addr, 2328 &ipv6_all_ones, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES, 2329 0, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | MATCH_IRE_MASK)); 2330 } else { 2331 ire = ire_ftable_lookup_v6(&ipif->ipif_v6subnet, 2332 &ipif->ipif_v6net_mask, NULL, IRE_INTERFACE, ipif, NULL, 2333 ALL_ZONES, 0, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | 2334 MATCH_IRE_MASK)); 2335 } 2336 return (ire); 2337 } 2338 2339 /* 2340 * Return B_TRUE if a multirt route is resolvable 2341 * (or if no route is resolved yet), B_FALSE otherwise. 2342 * This only works in the global zone. 2343 */ 2344 boolean_t 2345 ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp) 2346 { 2347 ire_t *first_fire; 2348 ire_t *first_cire; 2349 ire_t *fire; 2350 ire_t *cire; 2351 irb_t *firb; 2352 irb_t *cirb; 2353 int unres_cnt = 0; 2354 boolean_t resolvable = B_FALSE; 2355 2356 /* Retrieve the first IRE_HOST that matches the destination */ 2357 first_fire = ire_ftable_lookup_v6(v6dstp, &ipv6_all_ones, 0, IRE_HOST, 2358 NULL, NULL, ALL_ZONES, 0, MATCH_IRE_MASK | MATCH_IRE_TYPE); 2359 2360 /* No route at all */ 2361 if (first_fire == NULL) { 2362 return (B_TRUE); 2363 } 2364 2365 firb = first_fire->ire_bucket; 2366 ASSERT(firb); 2367 2368 /* Retrieve the first IRE_CACHE ire for that destination. */ 2369 first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID); 2370 2371 /* No resolved route. */ 2372 if (first_cire == NULL) { 2373 ire_refrele(first_fire); 2374 return (B_TRUE); 2375 } 2376 2377 /* At least one route is resolved. */ 2378 2379 cirb = first_cire->ire_bucket; 2380 ASSERT(cirb); 2381 2382 /* Count the number of routes to that dest that are declared. */ 2383 IRB_REFHOLD(firb); 2384 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2385 if (!(fire->ire_flags & RTF_MULTIRT)) 2386 continue; 2387 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, v6dstp)) 2388 continue; 2389 unres_cnt++; 2390 } 2391 IRB_REFRELE(firb); 2392 2393 2394 /* Then subtract the number of routes to that dst that are resolved */ 2395 IRB_REFHOLD(cirb); 2396 for (cire = first_cire; cire != NULL; cire = cire->ire_next) { 2397 if (!(cire->ire_flags & RTF_MULTIRT)) 2398 continue; 2399 if (!IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, v6dstp)) 2400 continue; 2401 if (cire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN)) 2402 continue; 2403 unres_cnt--; 2404 } 2405 IRB_REFRELE(cirb); 2406 2407 /* At least one route is unresolved; search for a resolvable route. */ 2408 if (unres_cnt > 0) 2409 resolvable = ire_multirt_lookup_v6(&first_cire, &first_fire, 2410 MULTIRT_USESTAMP|MULTIRT_CACHEGW); 2411 2412 if (first_fire) 2413 ire_refrele(first_fire); 2414 2415 if (first_cire) 2416 ire_refrele(first_cire); 2417 2418 return (resolvable); 2419 } 2420 2421 2422 /* 2423 * Return B_TRUE and update *ire_arg and *fire_arg 2424 * if at least one resolvable route is found. 2425 * Return B_FALSE otherwise (all routes are resolved or 2426 * the remaining unresolved routes are all unresolvable). 2427 * This only works in the global zone. 2428 */ 2429 boolean_t 2430 ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags) 2431 { 2432 clock_t delta; 2433 ire_t *best_fire = NULL; 2434 ire_t *best_cire = NULL; 2435 ire_t *first_fire; 2436 ire_t *first_cire; 2437 ire_t *fire; 2438 ire_t *cire; 2439 irb_t *firb = NULL; 2440 irb_t *cirb = NULL; 2441 ire_t *gw_ire; 2442 boolean_t already_resolved; 2443 boolean_t res; 2444 in6_addr_t v6dst; 2445 in6_addr_t v6gw; 2446 2447 ip2dbg(("ire_multirt_lookup_v6: *ire_arg %p, *fire_arg %p, " 2448 "flags %04x\n", (void *)*ire_arg, (void *)*fire_arg, flags)); 2449 2450 ASSERT(ire_arg); 2451 ASSERT(fire_arg); 2452 2453 /* Not an IRE_HOST ire; give up. */ 2454 if ((*fire_arg == NULL) || 2455 ((*fire_arg)->ire_type != IRE_HOST)) { 2456 return (B_FALSE); 2457 } 2458 2459 /* This is the first IRE_HOST ire for that destination. */ 2460 first_fire = *fire_arg; 2461 firb = first_fire->ire_bucket; 2462 ASSERT(firb); 2463 2464 mutex_enter(&first_fire->ire_lock); 2465 v6dst = first_fire->ire_addr_v6; 2466 mutex_exit(&first_fire->ire_lock); 2467 2468 ip2dbg(("ire_multirt_lookup_v6: dst %08x\n", 2469 ntohl(V4_PART_OF_V6(v6dst)))); 2470 2471 /* 2472 * Retrieve the first IRE_CACHE ire for that destination; 2473 * if we don't find one, no route for that dest is 2474 * resolved yet. 2475 */ 2476 first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID); 2477 if (first_cire) { 2478 cirb = first_cire->ire_bucket; 2479 } 2480 2481 ip2dbg(("ire_multirt_lookup_v6: first_cire %p\n", (void *)first_cire)); 2482 2483 /* 2484 * Search for a resolvable route, giving the top priority 2485 * to routes that can be resolved without any call to the resolver. 2486 */ 2487 IRB_REFHOLD(firb); 2488 2489 if (!IN6_IS_ADDR_MULTICAST(&v6dst)) { 2490 /* 2491 * For all multiroute IRE_HOST ires for that destination, 2492 * check if the route via the IRE_HOST's gateway is 2493 * resolved yet. 2494 */ 2495 for (fire = first_fire; fire != NULL; fire = fire->ire_next) { 2496 2497 if (!(fire->ire_flags & RTF_MULTIRT)) 2498 continue; 2499 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2500 continue; 2501 2502 mutex_enter(&fire->ire_lock); 2503 v6gw = fire->ire_gateway_addr_v6; 2504 mutex_exit(&fire->ire_lock); 2505 2506 ip2dbg(("ire_multirt_lookup_v6: fire %p, " 2507 "ire_addr %08x, ire_gateway_addr %08x\n", 2508 (void *)fire, 2509 ntohl(V4_PART_OF_V6(fire->ire_addr_v6)), 2510 ntohl(V4_PART_OF_V6(v6gw)))); 2511 2512 already_resolved = B_FALSE; 2513 2514 if (first_cire) { 2515 ASSERT(cirb); 2516 2517 IRB_REFHOLD(cirb); 2518 /* 2519 * For all IRE_CACHE ires for that 2520 * destination. 2521 */ 2522 for (cire = first_cire; 2523 cire != NULL; 2524 cire = cire->ire_next) { 2525 2526 if (!(cire->ire_flags & RTF_MULTIRT)) 2527 continue; 2528 if (!IN6_ARE_ADDR_EQUAL( 2529 &cire->ire_addr_v6, &v6dst)) 2530 continue; 2531 if (cire->ire_marks & 2532 (IRE_MARK_CONDEMNED| 2533 IRE_MARK_HIDDEN)) 2534 continue; 2535 /* 2536 * Check if the IRE_CACHE's gateway 2537 * matches the IRE_HOST's gateway. 2538 */ 2539 if (IN6_ARE_ADDR_EQUAL( 2540 &cire->ire_gateway_addr_v6, 2541 &v6gw)) { 2542 already_resolved = B_TRUE; 2543 break; 2544 } 2545 } 2546 IRB_REFRELE(cirb); 2547 } 2548 2549 /* 2550 * This route is already resolved; 2551 * proceed with next one. 2552 */ 2553 if (already_resolved) { 2554 ip2dbg(("ire_multirt_lookup_v6: found cire %p, " 2555 "already resolved\n", (void *)cire)); 2556 continue; 2557 } 2558 2559 /* 2560 * The route is unresolved; is it actually 2561 * resolvable, i.e. is there a cache or a resolver 2562 * for the gateway? 2563 */ 2564 gw_ire = ire_route_lookup_v6(&v6gw, 0, 0, 0, NULL, NULL, 2565 ALL_ZONES, MATCH_IRE_RECURSIVE); 2566 2567 ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n", 2568 (void *)gw_ire)); 2569 2570 /* 2571 * This route can be resolved without any call to the 2572 * resolver; if the MULTIRT_CACHEGW flag is set, 2573 * give the top priority to this ire and exit the 2574 * loop. 2575 * This occurs when an resolver reply is processed 2576 * through ip_wput_nondata() 2577 */ 2578 if ((flags & MULTIRT_CACHEGW) && 2579 (gw_ire != NULL) && 2580 (gw_ire->ire_type & IRE_CACHETABLE)) { 2581 /* 2582 * Release the resolver associated to the 2583 * previous candidate best ire, if any. 2584 */ 2585 if (best_cire) { 2586 ire_refrele(best_cire); 2587 ASSERT(best_fire); 2588 } 2589 2590 best_fire = fire; 2591 best_cire = gw_ire; 2592 2593 ip2dbg(("ire_multirt_lookup_v6: found top prio " 2594 "best_fire %p, best_cire %p\n", 2595 (void *)best_fire, (void *)best_cire)); 2596 break; 2597 } 2598 2599 /* 2600 * Compute the time elapsed since our preceding 2601 * attempt to resolve that route. 2602 * If the MULTIRT_USESTAMP flag is set, we take that 2603 * route into account only if this time interval 2604 * exceeds ip_multirt_resolution_interval; 2605 * this prevents us from attempting to resolve a 2606 * broken route upon each sending of a packet. 2607 */ 2608 delta = lbolt - fire->ire_last_used_time; 2609 delta = TICK_TO_MSEC(delta); 2610 2611 res = (boolean_t) 2612 ((delta > ip_multirt_resolution_interval) || 2613 (!(flags & MULTIRT_USESTAMP))); 2614 2615 ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, " 2616 "res %d\n", 2617 (void *)fire, delta, res)); 2618 2619 if (res) { 2620 /* 2621 * A resolver exists for the gateway: save 2622 * the current IRE_HOST ire as a candidate 2623 * best ire. If we later discover that a 2624 * top priority ire exists (i.e. no need to 2625 * call the resolver), then this new ire 2626 * will be preferred to the current one. 2627 */ 2628 if (gw_ire != NULL) { 2629 if (best_fire == NULL) { 2630 ASSERT(best_cire == NULL); 2631 2632 best_fire = fire; 2633 best_cire = gw_ire; 2634 2635 ip2dbg(("ire_multirt_lookup_v6:" 2636 "found candidate " 2637 "best_fire %p, " 2638 "best_cire %p\n", 2639 (void *)best_fire, 2640 (void *)best_cire)); 2641 2642 /* 2643 * If MULTIRT_CACHEGW is not 2644 * set, we ignore the top 2645 * priority ires that can 2646 * be resolved without any 2647 * call to the resolver; 2648 * In that case, there is 2649 * actually no need 2650 * to continue the loop. 2651 */ 2652 if (!(flags & 2653 MULTIRT_CACHEGW)) { 2654 break; 2655 } 2656 continue; 2657 } 2658 } else { 2659 /* 2660 * No resolver for the gateway: the 2661 * route is not resolvable. 2662 * If the MULTIRT_SETSTAMP flag is 2663 * set, we stamp the IRE_HOST ire, 2664 * so we will not select it again 2665 * during this resolution interval. 2666 */ 2667 if (flags & MULTIRT_SETSTAMP) 2668 fire->ire_last_used_time = 2669 lbolt; 2670 } 2671 } 2672 2673 if (gw_ire != NULL) 2674 ire_refrele(gw_ire); 2675 } 2676 } else { /* IN6_IS_ADDR_MULTICAST(&v6dst) */ 2677 2678 for (fire = first_fire; 2679 fire != NULL; 2680 fire = fire->ire_next) { 2681 2682 if (!(fire->ire_flags & RTF_MULTIRT)) 2683 continue; 2684 if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst)) 2685 continue; 2686 2687 already_resolved = B_FALSE; 2688 2689 mutex_enter(&fire->ire_lock); 2690 v6gw = fire->ire_gateway_addr_v6; 2691 mutex_exit(&fire->ire_lock); 2692 2693 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2694 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, 2695 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE); 2696 2697 /* No resolver for the gateway; we skip this ire. */ 2698 if (gw_ire == NULL) { 2699 continue; 2700 } 2701 2702 if (first_cire) { 2703 2704 IRB_REFHOLD(cirb); 2705 /* 2706 * For all IRE_CACHE ires for that 2707 * destination. 2708 */ 2709 for (cire = first_cire; 2710 cire != NULL; 2711 cire = cire->ire_next) { 2712 2713 if (!(cire->ire_flags & RTF_MULTIRT)) 2714 continue; 2715 if (!IN6_ARE_ADDR_EQUAL( 2716 &cire->ire_addr_v6, &v6dst)) 2717 continue; 2718 if (cire->ire_marks & 2719 (IRE_MARK_CONDEMNED| 2720 IRE_MARK_HIDDEN)) 2721 continue; 2722 /* 2723 * Cache entries are linked to the 2724 * parent routes using the parent handle 2725 * (ire_phandle). If no cache entry has 2726 * the same handle as fire, fire is 2727 * still unresolved. 2728 */ 2729 ASSERT(cire->ire_phandle != 0); 2730 if (cire->ire_phandle == 2731 fire->ire_phandle) { 2732 already_resolved = B_TRUE; 2733 break; 2734 } 2735 } 2736 IRB_REFRELE(cirb); 2737 } 2738 2739 /* 2740 * This route is already resolved; proceed with 2741 * next one. 2742 */ 2743 if (already_resolved) { 2744 ire_refrele(gw_ire); 2745 continue; 2746 } 2747 2748 /* 2749 * Compute the time elapsed since our preceding 2750 * attempt to resolve that route. 2751 * If the MULTIRT_USESTAMP flag is set, we take 2752 * that route into account only if this time 2753 * interval exceeds ip_multirt_resolution_interval; 2754 * this prevents us from attempting to resolve a 2755 * broken route upon each sending of a packet. 2756 */ 2757 delta = lbolt - fire->ire_last_used_time; 2758 delta = TICK_TO_MSEC(delta); 2759 2760 res = (boolean_t) 2761 ((delta > ip_multirt_resolution_interval) || 2762 (!(flags & MULTIRT_USESTAMP))); 2763 2764 ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, " 2765 "flags %04x, res %d\n", 2766 (void *)fire, delta, flags, res)); 2767 2768 if (res) { 2769 if (best_cire) { 2770 /* 2771 * Release the resolver associated 2772 * to the preceding candidate best 2773 * ire, if any. 2774 */ 2775 ire_refrele(best_cire); 2776 ASSERT(best_fire); 2777 } 2778 best_fire = fire; 2779 best_cire = gw_ire; 2780 continue; 2781 } 2782 2783 ire_refrele(gw_ire); 2784 } 2785 } 2786 2787 if (best_fire) { 2788 IRE_REFHOLD(best_fire); 2789 } 2790 IRB_REFRELE(firb); 2791 2792 /* Release the first IRE_CACHE we initially looked up, if any. */ 2793 if (first_cire) 2794 ire_refrele(first_cire); 2795 2796 /* Found a resolvable route. */ 2797 if (best_fire) { 2798 ASSERT(best_cire); 2799 2800 if (*fire_arg) 2801 ire_refrele(*fire_arg); 2802 if (*ire_arg) 2803 ire_refrele(*ire_arg); 2804 2805 /* 2806 * Update the passed arguments with the 2807 * resolvable multirt route we found 2808 */ 2809 *fire_arg = best_fire; 2810 *ire_arg = best_cire; 2811 2812 ip2dbg(("ire_multirt_lookup_v6: returning B_TRUE, " 2813 "*fire_arg %p, *ire_arg %p\n", 2814 (void *)best_fire, (void *)best_cire)); 2815 2816 return (B_TRUE); 2817 } 2818 2819 ASSERT(best_cire == NULL); 2820 2821 ip2dbg(("ire_multirt_lookup_v6: returning B_FALSE, *fire_arg %p, " 2822 "*ire_arg %p\n", 2823 (void *)*fire_arg, (void *)*ire_arg)); 2824 2825 /* No resolvable route. */ 2826 return (B_FALSE); 2827 } 2828 2829 2830 /* 2831 * Find an IRE_OFFSUBNET IRE entry for the multicast address 'v6dstp' 2832 * that goes through 'ipif'. As a fallback, a route that goes through 2833 * ipif->ipif_ill can be returned. 2834 */ 2835 ire_t * 2836 ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp) 2837 { 2838 ire_t *ire; 2839 ire_t *save_ire = NULL; 2840 ire_t *gw_ire; 2841 irb_t *irb; 2842 in6_addr_t v6gw; 2843 int match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL; 2844 2845 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, NULL, ALL_ZONES, 0, 2846 MATCH_IRE_DEFAULT); 2847 2848 if (ire == NULL) 2849 return (NULL); 2850 2851 irb = ire->ire_bucket; 2852 ASSERT(irb); 2853 2854 IRB_REFHOLD(irb); 2855 ire_refrele(ire); 2856 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 2857 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) || 2858 ipif->ipif_zoneid != ire->ire_zoneid) { 2859 continue; 2860 } 2861 2862 switch (ire->ire_type) { 2863 case IRE_DEFAULT: 2864 case IRE_PREFIX: 2865 case IRE_HOST: 2866 mutex_enter(&ire->ire_lock); 2867 v6gw = ire->ire_gateway_addr_v6; 2868 mutex_exit(&ire->ire_lock); 2869 gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0, 2870 IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, 2871 match_flags); 2872 2873 if (gw_ire != NULL) { 2874 if (save_ire != NULL) { 2875 ire_refrele(save_ire); 2876 } 2877 IRE_REFHOLD(ire); 2878 if (gw_ire->ire_ipif == ipif) { 2879 ire_refrele(gw_ire); 2880 2881 IRB_REFRELE(irb); 2882 return (ire); 2883 } 2884 ire_refrele(gw_ire); 2885 save_ire = ire; 2886 } 2887 break; 2888 case IRE_IF_NORESOLVER: 2889 case IRE_IF_RESOLVER: 2890 if (ire->ire_ipif == ipif) { 2891 if (save_ire != NULL) { 2892 ire_refrele(save_ire); 2893 } 2894 IRE_REFHOLD(ire); 2895 2896 IRB_REFRELE(irb); 2897 return (ire); 2898 } 2899 break; 2900 } 2901 } 2902 IRB_REFRELE(irb); 2903 2904 return (save_ire); 2905 } 2906