1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 /* 30 * This file contains the interface control functions for IPv6. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/sysmacros.h> 35 #include <sys/stream.h> 36 #include <sys/dlpi.h> 37 #include <sys/stropts.h> 38 #include <sys/ddi.h> 39 #include <sys/cmn_err.h> 40 #include <sys/kstat.h> 41 #include <sys/debug.h> 42 #include <sys/zone.h> 43 #include <sys/policy.h> 44 45 #include <sys/systm.h> 46 #include <sys/param.h> 47 #include <sys/socket.h> 48 #include <sys/isa_defs.h> 49 #include <net/if.h> 50 #include <net/if_dl.h> 51 #include <net/route.h> 52 #include <netinet/in.h> 53 #include <netinet/igmp_var.h> 54 #include <netinet/ip6.h> 55 #include <netinet/icmp6.h> 56 57 #include <inet/common.h> 58 #include <inet/nd.h> 59 #include <inet/tunables.h> 60 #include <inet/mib2.h> 61 #include <inet/ip.h> 62 #include <inet/ip6.h> 63 #include <inet/ip_multi.h> 64 #include <inet/ip_ire.h> 65 #include <inet/ip_rts.h> 66 #include <inet/ip_ndp.h> 67 #include <inet/ip_if.h> 68 #include <inet/ip6_asp.h> 69 #include <inet/ipclassifier.h> 70 #include <inet/sctp_ip.h> 71 72 #include <sys/tsol/tndb.h> 73 #include <sys/tsol/tnet.h> 74 75 static in6_addr_t ipv6_ll_template = 76 {(uint32_t)V6_LINKLOCAL, 0x0, 0x0, 0x0}; 77 78 static ipif_t * 79 ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst, 80 ip_stack_t *ipst); 81 82 static int ipif_add_ires_v6(ipif_t *, boolean_t); 83 84 /* 85 * This function is called when an application does not specify an interface 86 * to be used for multicast traffic. It calls ire_lookup_multi_v6() to look 87 * for an interface route for the specified multicast group. Doing 88 * this allows the administrator to add prefix routes for multicast to 89 * indicate which interface to be used for multicast traffic in the above 90 * scenario. The route could be for all multicast (ff00::/8), for a single 91 * multicast group (a /128 route) or anything in between. If there is no 92 * such multicast route, we just find any multicast capable interface and 93 * return it. 94 * 95 * We support MULTIRT and RTF_SETSRC on the multicast routes added to the 96 * unicast table. This is used by CGTP. 97 */ 98 ill_t * 99 ill_lookup_group_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst, 100 boolean_t *multirtp, in6_addr_t *setsrcp) 101 { 102 ill_t *ill; 103 104 ill = ire_lookup_multi_ill_v6(group, zoneid, ipst, multirtp, setsrcp); 105 if (ill != NULL) 106 return (ill); 107 108 return (ill_lookup_multicast(ipst, zoneid, B_TRUE)); 109 } 110 111 /* 112 * Look for an ipif with the specified interface address and destination. 113 * The destination address is used only for matching point-to-point interfaces. 114 */ 115 static ipif_t * 116 ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst, 117 ip_stack_t *ipst) 118 { 119 ipif_t *ipif; 120 ill_t *ill; 121 ill_walk_context_t ctx; 122 123 /* 124 * First match all the point-to-point interfaces 125 * before looking at non-point-to-point interfaces. 126 * This is done to avoid returning non-point-to-point 127 * ipif instead of unnumbered point-to-point ipif. 128 */ 129 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 130 ill = ILL_START_WALK_V6(&ctx, ipst); 131 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 132 mutex_enter(&ill->ill_lock); 133 for (ipif = ill->ill_ipif; ipif != NULL; 134 ipif = ipif->ipif_next) { 135 /* Allow the ipif to be down */ 136 if ((ipif->ipif_flags & IPIF_POINTOPOINT) && 137 (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 138 if_addr)) && 139 (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr, 140 dst))) { 141 if (!IPIF_IS_CONDEMNED(ipif)) { 142 ipif_refhold_locked(ipif); 143 mutex_exit(&ill->ill_lock); 144 rw_exit(&ipst->ips_ill_g_lock); 145 return (ipif); 146 } 147 } 148 } 149 mutex_exit(&ill->ill_lock); 150 } 151 rw_exit(&ipst->ips_ill_g_lock); 152 /* lookup the ipif based on interface address */ 153 ipif = ipif_lookup_addr_v6(if_addr, NULL, ALL_ZONES, ipst); 154 ASSERT(ipif == NULL || ipif->ipif_isv6); 155 return (ipif); 156 } 157 158 /* 159 * Common function for ipif_lookup_addr_v6() and ipif_lookup_addr_exact_v6(). 160 */ 161 static ipif_t * 162 ipif_lookup_addr_common_v6(const in6_addr_t *addr, ill_t *match_ill, 163 uint32_t match_flags, zoneid_t zoneid, ip_stack_t *ipst) 164 { 165 ipif_t *ipif; 166 ill_t *ill; 167 boolean_t ptp = B_FALSE; 168 ill_walk_context_t ctx; 169 boolean_t match_illgrp = (match_flags & IPIF_MATCH_ILLGRP); 170 boolean_t no_duplicate = (match_flags & IPIF_MATCH_NONDUP); 171 172 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 173 /* 174 * Repeat twice, first based on local addresses and 175 * next time for pointopoint. 176 */ 177 repeat: 178 ill = ILL_START_WALK_V6(&ctx, ipst); 179 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 180 if (match_ill != NULL && ill != match_ill && 181 (!match_illgrp || !IS_IN_SAME_ILLGRP(ill, match_ill))) { 182 continue; 183 } 184 mutex_enter(&ill->ill_lock); 185 for (ipif = ill->ill_ipif; ipif != NULL; 186 ipif = ipif->ipif_next) { 187 if (zoneid != ALL_ZONES && 188 ipif->ipif_zoneid != zoneid && 189 ipif->ipif_zoneid != ALL_ZONES) 190 continue; 191 192 if (no_duplicate && 193 !(ipif->ipif_flags & IPIF_UP)) { 194 continue; 195 } 196 197 /* Allow the ipif to be down */ 198 if ((!ptp && (IN6_ARE_ADDR_EQUAL( 199 &ipif->ipif_v6lcl_addr, addr) && 200 (ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) || 201 (ptp && (ipif->ipif_flags & IPIF_POINTOPOINT) && 202 IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr, 203 addr))) { 204 if (!IPIF_IS_CONDEMNED(ipif)) { 205 ipif_refhold_locked(ipif); 206 mutex_exit(&ill->ill_lock); 207 rw_exit(&ipst->ips_ill_g_lock); 208 return (ipif); 209 } 210 } 211 } 212 mutex_exit(&ill->ill_lock); 213 } 214 215 /* If we already did the ptp case, then we are done */ 216 if (ptp) { 217 rw_exit(&ipst->ips_ill_g_lock); 218 return (NULL); 219 } 220 ptp = B_TRUE; 221 goto repeat; 222 } 223 224 /* 225 * Lookup an ipif with the specified address. For point-to-point links we 226 * look for matches on either the destination address or the local address, 227 * but we skip the local address check if IPIF_UNNUMBERED is set. If the 228 * `match_ill' argument is non-NULL, the lookup is restricted to that ill 229 * (or illgrp if `match_ill' is in an IPMP group). 230 */ 231 ipif_t * 232 ipif_lookup_addr_v6(const in6_addr_t *addr, ill_t *match_ill, zoneid_t zoneid, 233 ip_stack_t *ipst) 234 { 235 return (ipif_lookup_addr_common_v6(addr, match_ill, IPIF_MATCH_ILLGRP, 236 zoneid, ipst)); 237 } 238 239 /* 240 * Lookup an ipif with the specified address. Similar to ipif_lookup_addr, 241 * except that we will only return an address if it is not marked as 242 * IPIF_DUPLICATE 243 */ 244 ipif_t * 245 ipif_lookup_addr_nondup_v6(const in6_addr_t *addr, ill_t *match_ill, 246 zoneid_t zoneid, ip_stack_t *ipst) 247 { 248 return (ipif_lookup_addr_common_v6(addr, match_ill, 249 (IPIF_MATCH_ILLGRP | IPIF_MATCH_NONDUP), zoneid, 250 ipst)); 251 } 252 253 /* 254 * Special abbreviated version of ipif_lookup_addr_v6() that doesn't match 255 * `match_ill' across the IPMP group. This function is only needed in some 256 * corner-cases; almost everything should use ipif_lookup_addr_v6(). 257 */ 258 ipif_t * 259 ipif_lookup_addr_exact_v6(const in6_addr_t *addr, ill_t *match_ill, 260 ip_stack_t *ipst) 261 { 262 ASSERT(match_ill != NULL); 263 return (ipif_lookup_addr_common_v6(addr, match_ill, 0, ALL_ZONES, 264 ipst)); 265 } 266 267 /* 268 * Look for an ipif with the specified address. For point-point links 269 * we look for matches on either the destination address and the local 270 * address, but we ignore the check on the local address if IPIF_UNNUMBERED 271 * is set. 272 * If the `match_ill' argument is non-NULL, the lookup is restricted to that 273 * ill (or illgrp if `match_ill' is in an IPMP group). 274 * Return the zoneid for the ipif. ALL_ZONES if none found. 275 */ 276 zoneid_t 277 ipif_lookup_addr_zoneid_v6(const in6_addr_t *addr, ill_t *match_ill, 278 ip_stack_t *ipst) 279 { 280 ipif_t *ipif; 281 ill_t *ill; 282 boolean_t ptp = B_FALSE; 283 ill_walk_context_t ctx; 284 zoneid_t zoneid; 285 286 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 287 /* 288 * Repeat twice, first based on local addresses and 289 * next time for pointopoint. 290 */ 291 repeat: 292 ill = ILL_START_WALK_V6(&ctx, ipst); 293 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 294 if (match_ill != NULL && ill != match_ill && 295 !IS_IN_SAME_ILLGRP(ill, match_ill)) { 296 continue; 297 } 298 mutex_enter(&ill->ill_lock); 299 for (ipif = ill->ill_ipif; ipif != NULL; 300 ipif = ipif->ipif_next) { 301 /* Allow the ipif to be down */ 302 if ((!ptp && (IN6_ARE_ADDR_EQUAL( 303 &ipif->ipif_v6lcl_addr, addr) && 304 (ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) || 305 (ptp && (ipif->ipif_flags & IPIF_POINTOPOINT) && 306 IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr, 307 addr)) && 308 !(ipif->ipif_state_flags & IPIF_CONDEMNED)) { 309 zoneid = ipif->ipif_zoneid; 310 mutex_exit(&ill->ill_lock); 311 rw_exit(&ipst->ips_ill_g_lock); 312 /* 313 * If ipif_zoneid was ALL_ZONES then we have 314 * a trusted extensions shared IP address. 315 * In that case GLOBAL_ZONEID works to send. 316 */ 317 if (zoneid == ALL_ZONES) 318 zoneid = GLOBAL_ZONEID; 319 return (zoneid); 320 } 321 } 322 mutex_exit(&ill->ill_lock); 323 } 324 325 /* If we already did the ptp case, then we are done */ 326 if (ptp) { 327 rw_exit(&ipst->ips_ill_g_lock); 328 return (ALL_ZONES); 329 } 330 ptp = B_TRUE; 331 goto repeat; 332 } 333 334 /* 335 * Perform various checks to verify that an address would make sense as a local 336 * interface address. This is currently only called when an attempt is made 337 * to set a local address. 338 * 339 * Does not allow a v4-mapped address, an address that equals the subnet 340 * anycast address, ... a multicast address, ... 341 */ 342 boolean_t 343 ip_local_addr_ok_v6(const in6_addr_t *addr, const in6_addr_t *subnet_mask) 344 { 345 in6_addr_t subnet; 346 347 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 348 return (B_TRUE); /* Allow all zeros */ 349 350 /* 351 * Don't allow all zeroes or host part, but allow 352 * all ones netmask. 353 */ 354 V6_MASK_COPY(*addr, *subnet_mask, subnet); 355 if (IN6_IS_ADDR_V4MAPPED(addr) || 356 (IN6_ARE_ADDR_EQUAL(addr, &subnet) && 357 !IN6_ARE_ADDR_EQUAL(subnet_mask, &ipv6_all_ones)) || 358 (IN6_IS_ADDR_V4COMPAT(addr) && CLASSD(V4_PART_OF_V6((*addr)))) || 359 IN6_IS_ADDR_MULTICAST(addr)) 360 return (B_FALSE); 361 362 return (B_TRUE); 363 } 364 365 /* 366 * Perform various checks to verify that an address would make sense as a 367 * remote/subnet interface address. 368 */ 369 boolean_t 370 ip_remote_addr_ok_v6(const in6_addr_t *addr, const in6_addr_t *subnet_mask) 371 { 372 in6_addr_t subnet; 373 374 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 375 return (B_TRUE); /* Allow all zeros */ 376 377 V6_MASK_COPY(*addr, *subnet_mask, subnet); 378 if (IN6_IS_ADDR_V4MAPPED(addr) || 379 (IN6_ARE_ADDR_EQUAL(addr, &subnet) && 380 !IN6_ARE_ADDR_EQUAL(subnet_mask, &ipv6_all_ones)) || 381 IN6_IS_ADDR_MULTICAST(addr) || 382 (IN6_IS_ADDR_V4COMPAT(addr) && CLASSD(V4_PART_OF_V6((*addr))))) 383 return (B_FALSE); 384 385 return (B_TRUE); 386 } 387 388 /* 389 * ip_rt_add_v6 is called to add an IPv6 route to the forwarding table. 390 * ill is passed in to associate it with the correct interface 391 * (for link-local destinations and gateways). 392 * If ire_arg is set, then we return the held IRE in that location. 393 */ 394 /* ARGSUSED1 */ 395 int 396 ip_rt_add_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, 397 const in6_addr_t *gw_addr, const in6_addr_t *src_addr, int flags, 398 ill_t *ill, ire_t **ire_arg, struct rtsa_s *sp, ip_stack_t *ipst, 399 zoneid_t zoneid) 400 { 401 ire_t *ire, *nire; 402 ire_t *gw_ire = NULL; 403 ipif_t *ipif; 404 uint_t type; 405 int match_flags = MATCH_IRE_TYPE; 406 tsol_gc_t *gc = NULL; 407 tsol_gcgrp_t *gcgrp = NULL; 408 boolean_t gcgrp_xtraref = B_FALSE; 409 boolean_t unbound = B_FALSE; 410 411 if (ire_arg != NULL) 412 *ire_arg = NULL; 413 414 /* 415 * Prevent routes with a zero gateway from being created (since 416 * interfaces can currently be plumbed and brought up with no assigned 417 * address). 418 */ 419 if (IN6_IS_ADDR_UNSPECIFIED(gw_addr)) 420 return (ENETUNREACH); 421 422 /* 423 * If this is the case of RTF_HOST being set, then we set the netmask 424 * to all ones (regardless if one was supplied). 425 */ 426 if (flags & RTF_HOST) 427 mask = &ipv6_all_ones; 428 429 /* 430 * Get the ipif, if any, corresponding to the gw_addr 431 * If -ifp was specified we restrict ourselves to the ill, otherwise 432 * we match on the gatway and destination to handle unnumbered pt-pt 433 * interfaces. 434 */ 435 if (ill != NULL) 436 ipif = ipif_lookup_addr_v6(gw_addr, ill, ALL_ZONES, ipst); 437 else 438 ipif = ipif_lookup_interface_v6(gw_addr, dst_addr, ipst); 439 if (ipif != NULL) { 440 if (IS_VNI(ipif->ipif_ill)) { 441 ipif_refrele(ipif); 442 return (EINVAL); 443 } 444 } 445 446 /* 447 * GateD will attempt to create routes with a loopback interface 448 * address as the gateway and with RTF_GATEWAY set. We allow 449 * these routes to be added, but create them as interface routes 450 * since the gateway is an interface address. 451 */ 452 if ((ipif != NULL) && (ipif->ipif_ire_type == IRE_LOOPBACK)) { 453 flags &= ~RTF_GATEWAY; 454 if (IN6_ARE_ADDR_EQUAL(gw_addr, &ipv6_loopback) && 455 IN6_ARE_ADDR_EQUAL(dst_addr, &ipv6_loopback) && 456 IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones)) { 457 ire = ire_ftable_lookup_v6(dst_addr, 0, 0, IRE_LOOPBACK, 458 NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, 0, ipst, 459 NULL); 460 if (ire != NULL) { 461 ire_refrele(ire); 462 ipif_refrele(ipif); 463 return (EEXIST); 464 } 465 ip1dbg(("ip_rt_add_v6: 0x%p creating IRE 0x%x" 466 "for 0x%x\n", (void *)ipif, 467 ipif->ipif_ire_type, 468 ntohl(ipif->ipif_lcl_addr))); 469 ire = ire_create_v6( 470 dst_addr, 471 mask, 472 NULL, 473 ipif->ipif_ire_type, /* LOOPBACK */ 474 ipif->ipif_ill, 475 zoneid, 476 (ipif->ipif_flags & IPIF_PRIVATE) ? RTF_PRIVATE : 0, 477 NULL, 478 ipst); 479 480 if (ire == NULL) { 481 ipif_refrele(ipif); 482 return (ENOMEM); 483 } 484 /* src address assigned by the caller? */ 485 if ((flags & RTF_SETSRC) && 486 !IN6_IS_ADDR_UNSPECIFIED(src_addr)) 487 ire->ire_setsrc_addr_v6 = *src_addr; 488 489 nire = ire_add(ire); 490 if (nire == NULL) { 491 /* 492 * In the result of failure, ire_add() will have 493 * already deleted the ire in question, so there 494 * is no need to do that here. 495 */ 496 ipif_refrele(ipif); 497 return (ENOMEM); 498 } 499 /* 500 * Check if it was a duplicate entry. This handles 501 * the case of two racing route adds for the same route 502 */ 503 if (nire != ire) { 504 ASSERT(nire->ire_identical_ref > 1); 505 ire_delete(nire); 506 ire_refrele(nire); 507 ipif_refrele(ipif); 508 return (EEXIST); 509 } 510 ire = nire; 511 goto save_ire; 512 } 513 } 514 515 /* 516 * The routes for multicast with CGTP are quite special in that 517 * the gateway is the local interface address, yet RTF_GATEWAY 518 * is set. We turn off RTF_GATEWAY to provide compatibility with 519 * this undocumented and unusual use of multicast routes. 520 */ 521 if ((flags & RTF_MULTIRT) && ipif != NULL) 522 flags &= ~RTF_GATEWAY; 523 524 /* 525 * Traditionally, interface routes are ones where RTF_GATEWAY isn't set 526 * and the gateway address provided is one of the system's interface 527 * addresses. By using the routing socket interface and supplying an 528 * RTA_IFP sockaddr with an interface index, an alternate method of 529 * specifying an interface route to be created is available which uses 530 * the interface index that specifies the outgoing interface rather than 531 * the address of an outgoing interface (which may not be able to 532 * uniquely identify an interface). When coupled with the RTF_GATEWAY 533 * flag, routes can be specified which not only specify the next-hop to 534 * be used when routing to a certain prefix, but also which outgoing 535 * interface should be used. 536 * 537 * Previously, interfaces would have unique addresses assigned to them 538 * and so the address assigned to a particular interface could be used 539 * to identify a particular interface. One exception to this was the 540 * case of an unnumbered interface (where IPIF_UNNUMBERED was set). 541 * 542 * With the advent of IPv6 and its link-local addresses, this 543 * restriction was relaxed and interfaces could share addresses between 544 * themselves. In fact, typically all of the link-local interfaces on 545 * an IPv6 node or router will have the same link-local address. In 546 * order to differentiate between these interfaces, the use of an 547 * interface index is necessary and this index can be carried inside a 548 * RTA_IFP sockaddr (which is actually a sockaddr_dl). One restriction 549 * of using the interface index, however, is that all of the ipif's that 550 * are part of an ill have the same index and so the RTA_IFP sockaddr 551 * cannot be used to differentiate between ipif's (or logical 552 * interfaces) that belong to the same ill (physical interface). 553 * 554 * For example, in the following case involving IPv4 interfaces and 555 * logical interfaces 556 * 557 * 192.0.2.32 255.255.255.224 192.0.2.33 U if0 558 * 192.0.2.32 255.255.255.224 192.0.2.34 U if0 559 * 192.0.2.32 255.255.255.224 192.0.2.35 U if0 560 * 561 * the ipif's corresponding to each of these interface routes can be 562 * uniquely identified by the "gateway" (actually interface address). 563 * 564 * In this case involving multiple IPv6 default routes to a particular 565 * link-local gateway, the use of RTA_IFP is necessary to specify which 566 * default route is of interest: 567 * 568 * default fe80::123:4567:89ab:cdef U if0 569 * default fe80::123:4567:89ab:cdef U if1 570 */ 571 572 /* RTF_GATEWAY not set */ 573 if (!(flags & RTF_GATEWAY)) { 574 if (sp != NULL) { 575 ip2dbg(("ip_rt_add_v6: gateway security attributes " 576 "cannot be set with interface route\n")); 577 if (ipif != NULL) 578 ipif_refrele(ipif); 579 return (EINVAL); 580 } 581 582 /* 583 * Whether or not ill (RTA_IFP) is set, we require that 584 * the gateway is one of our local addresses. 585 */ 586 if (ipif == NULL) 587 return (ENETUNREACH); 588 589 /* 590 * We use MATCH_IRE_ILL here. If the caller specified an 591 * interface (from the RTA_IFP sockaddr) we use it, otherwise 592 * we use the ill derived from the gateway address. 593 * We can always match the gateway address since we record it 594 * in ire_gateway_addr. 595 * We don't allow RTA_IFP to specify a different ill than the 596 * one matching the ipif to make sure we can delete the route. 597 */ 598 match_flags |= MATCH_IRE_GW | MATCH_IRE_ILL; 599 if (ill == NULL) { 600 ill = ipif->ipif_ill; 601 } else if (ill != ipif->ipif_ill) { 602 ipif_refrele(ipif); 603 return (EINVAL); 604 } 605 606 /* 607 * We check for an existing entry at this point. 608 */ 609 match_flags |= MATCH_IRE_MASK; 610 ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, 611 IRE_INTERFACE, ill, ALL_ZONES, NULL, match_flags, 0, ipst, 612 NULL); 613 if (ire != NULL) { 614 ire_refrele(ire); 615 ipif_refrele(ipif); 616 return (EEXIST); 617 } 618 619 /* 620 * Some software (for example, GateD and Sun Cluster) attempts 621 * to create (what amount to) IRE_PREFIX routes with the 622 * loopback address as the gateway. This is primarily done to 623 * set up prefixes with the RTF_REJECT flag set (for example, 624 * when generating aggregate routes). We also OR in the 625 * RTF_BLACKHOLE flag as these interface routes, by 626 * definition, can only be that. 627 * 628 * If the IRE type (as defined by ill->ill_net_type) would be 629 * IRE_LOOPBACK, then we map the request into a 630 * IRE_IF_NORESOLVER. 631 * 632 * Needless to say, the real IRE_LOOPBACK is NOT created by this 633 * routine, but rather using ire_create_v6() directly. 634 */ 635 type = ill->ill_net_type; 636 if (type == IRE_LOOPBACK) { 637 type = IRE_IF_NORESOLVER; 638 flags |= RTF_BLACKHOLE; 639 } 640 641 /* 642 * Create a copy of the IRE_IF_NORESOLVER or 643 * IRE_IF_RESOLVER with the modified address, netmask, and 644 * gateway. 645 */ 646 ire = ire_create_v6( 647 dst_addr, 648 mask, 649 gw_addr, 650 type, 651 ill, 652 zoneid, 653 flags, 654 NULL, 655 ipst); 656 if (ire == NULL) { 657 ipif_refrele(ipif); 658 return (ENOMEM); 659 } 660 661 /* src address assigned by the caller? */ 662 if ((flags & RTF_SETSRC) && !IN6_IS_ADDR_UNSPECIFIED(src_addr)) 663 ire->ire_setsrc_addr_v6 = *src_addr; 664 665 nire = ire_add(ire); 666 if (nire == NULL) { 667 /* 668 * In the result of failure, ire_add() will have 669 * already deleted the ire in question, so there 670 * is no need to do that here. 671 */ 672 ipif_refrele(ipif); 673 return (ENOMEM); 674 } 675 /* 676 * Check if it was a duplicate entry. This handles 677 * the case of two racing route adds for the same route 678 */ 679 if (nire != ire) { 680 ASSERT(nire->ire_identical_ref > 1); 681 ire_delete(nire); 682 ire_refrele(nire); 683 ipif_refrele(ipif); 684 return (EEXIST); 685 } 686 ire = nire; 687 goto save_ire; 688 } 689 690 /* 691 * Get an interface IRE for the specified gateway. 692 * If we don't have an IRE_IF_NORESOLVER or IRE_IF_RESOLVER for the 693 * gateway, it is currently unreachable and we fail the request 694 * accordingly. We reject any RTF_GATEWAY routes where the gateway 695 * is an IRE_LOCAL or IRE_LOOPBACK. 696 * If RTA_IFP was specified we look on that particular ill. 697 */ 698 if (ill != NULL) 699 match_flags |= MATCH_IRE_ILL; 700 701 /* Check whether the gateway is reachable. */ 702 again: 703 type = IRE_INTERFACE | IRE_LOCAL | IRE_LOOPBACK; 704 if (flags & RTF_INDIRECT) 705 type |= IRE_OFFLINK; 706 707 gw_ire = ire_ftable_lookup_v6(gw_addr, 0, 0, type, ill, 708 ALL_ZONES, NULL, match_flags, 0, ipst, NULL); 709 if (gw_ire == NULL) { 710 /* 711 * With IPMP, we allow host routes to influence in.mpathd's 712 * target selection. However, if the test addresses are on 713 * their own network, the above lookup will fail since the 714 * underlying IRE_INTERFACEs are marked hidden. So allow 715 * hidden test IREs to be found and try again. 716 */ 717 if (!(match_flags & MATCH_IRE_TESTHIDDEN)) { 718 match_flags |= MATCH_IRE_TESTHIDDEN; 719 goto again; 720 } 721 if (ipif != NULL) 722 ipif_refrele(ipif); 723 return (ENETUNREACH); 724 } 725 if (gw_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) { 726 ire_refrele(gw_ire); 727 if (ipif != NULL) 728 ipif_refrele(ipif); 729 return (ENETUNREACH); 730 } 731 if (ill == NULL && !(flags & RTF_INDIRECT)) { 732 unbound = B_TRUE; 733 if (ipst->ips_ipv6_strict_src_multihoming > 0) 734 ill = gw_ire->ire_ill; 735 } 736 737 /* 738 * We create one of three types of IREs as a result of this request 739 * based on the netmask. A netmask of all ones (which is automatically 740 * assumed when RTF_HOST is set) results in an IRE_HOST being created. 741 * An all zeroes netmask implies a default route so an IRE_DEFAULT is 742 * created. Otherwise, an IRE_PREFIX route is created for the 743 * destination prefix. 744 */ 745 if (IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones)) 746 type = IRE_HOST; 747 else if (IN6_IS_ADDR_UNSPECIFIED(mask)) 748 type = IRE_DEFAULT; 749 else 750 type = IRE_PREFIX; 751 752 /* check for a duplicate entry */ 753 ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, type, ill, 754 ALL_ZONES, NULL, 755 match_flags | MATCH_IRE_MASK | MATCH_IRE_GW, 0, ipst, NULL); 756 if (ire != NULL) { 757 if (ipif != NULL) 758 ipif_refrele(ipif); 759 ire_refrele(gw_ire); 760 ire_refrele(ire); 761 return (EEXIST); 762 } 763 764 /* Security attribute exists */ 765 if (sp != NULL) { 766 tsol_gcgrp_addr_t ga; 767 768 /* find or create the gateway credentials group */ 769 ga.ga_af = AF_INET6; 770 ga.ga_addr = *gw_addr; 771 772 /* we hold reference to it upon success */ 773 gcgrp = gcgrp_lookup(&ga, B_TRUE); 774 if (gcgrp == NULL) { 775 if (ipif != NULL) 776 ipif_refrele(ipif); 777 ire_refrele(gw_ire); 778 return (ENOMEM); 779 } 780 781 /* 782 * Create and add the security attribute to the group; a 783 * reference to the group is made upon allocating a new 784 * entry successfully. If it finds an already-existing 785 * entry for the security attribute in the group, it simply 786 * returns it and no new reference is made to the group. 787 */ 788 gc = gc_create(sp, gcgrp, &gcgrp_xtraref); 789 if (gc == NULL) { 790 /* release reference held by gcgrp_lookup */ 791 GCGRP_REFRELE(gcgrp); 792 if (ipif != NULL) 793 ipif_refrele(ipif); 794 ire_refrele(gw_ire); 795 return (ENOMEM); 796 } 797 } 798 799 /* Create the IRE. */ 800 ire = ire_create_v6( 801 dst_addr, /* dest address */ 802 mask, /* mask */ 803 gw_addr, /* gateway address */ 804 (ushort_t)type, /* IRE type */ 805 ill, 806 zoneid, 807 flags, 808 gc, /* security attribute */ 809 ipst); 810 811 /* 812 * The ire holds a reference to the 'gc' and the 'gc' holds a 813 * reference to the 'gcgrp'. We can now release the extra reference 814 * the 'gcgrp' acquired in the gcgrp_lookup, if it was not used. 815 */ 816 if (gcgrp_xtraref) 817 GCGRP_REFRELE(gcgrp); 818 if (ire == NULL) { 819 if (gc != NULL) 820 GC_REFRELE(gc); 821 if (ipif != NULL) 822 ipif_refrele(ipif); 823 ire_refrele(gw_ire); 824 return (ENOMEM); 825 } 826 827 /* src address assigned by the caller? */ 828 if ((flags & RTF_SETSRC) && !IN6_IS_ADDR_UNSPECIFIED(src_addr)) 829 ire->ire_setsrc_addr_v6 = *src_addr; 830 831 ire->ire_unbound = unbound; 832 833 /* 834 * POLICY: should we allow an RTF_HOST with address INADDR_ANY? 835 * SUN/OS socket stuff does but do we really want to allow ::0 ? 836 */ 837 838 /* Add the new IRE. */ 839 nire = ire_add(ire); 840 if (nire == NULL) { 841 /* 842 * In the result of failure, ire_add() will have 843 * already deleted the ire in question, so there 844 * is no need to do that here. 845 */ 846 if (ipif != NULL) 847 ipif_refrele(ipif); 848 ire_refrele(gw_ire); 849 return (ENOMEM); 850 } 851 /* 852 * Check if it was a duplicate entry. This handles 853 * the case of two racing route adds for the same route 854 */ 855 if (nire != ire) { 856 ASSERT(nire->ire_identical_ref > 1); 857 ire_delete(nire); 858 ire_refrele(nire); 859 if (ipif != NULL) 860 ipif_refrele(ipif); 861 ire_refrele(gw_ire); 862 return (EEXIST); 863 } 864 ire = nire; 865 866 if (flags & RTF_MULTIRT) { 867 /* 868 * Invoke the CGTP (multirouting) filtering module 869 * to add the dst address in the filtering database. 870 * Replicated inbound packets coming from that address 871 * will be filtered to discard the duplicates. 872 * It is not necessary to call the CGTP filter hook 873 * when the dst address is a multicast, because an 874 * IP source address cannot be a multicast. 875 */ 876 if (ipst->ips_ip_cgtp_filter_ops != NULL && 877 !IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))) { 878 int res; 879 ipif_t *src_ipif; 880 881 /* Find the source address corresponding to gw_ire */ 882 src_ipif = ipif_lookup_addr_v6( 883 &gw_ire->ire_gateway_addr_v6, NULL, zoneid, ipst); 884 if (src_ipif != NULL) { 885 res = ipst->ips_ip_cgtp_filter_ops-> 886 cfo_add_dest_v6( 887 ipst->ips_netstack->netstack_stackid, 888 &ire->ire_addr_v6, 889 &ire->ire_gateway_addr_v6, 890 &ire->ire_setsrc_addr_v6, 891 &src_ipif->ipif_v6lcl_addr); 892 ipif_refrele(src_ipif); 893 } else { 894 res = EADDRNOTAVAIL; 895 } 896 if (res != 0) { 897 if (ipif != NULL) 898 ipif_refrele(ipif); 899 ire_refrele(gw_ire); 900 ire_delete(ire); 901 ire_refrele(ire); /* Held in ire_add */ 902 return (res); 903 } 904 } 905 } 906 907 save_ire: 908 if (gw_ire != NULL) { 909 ire_refrele(gw_ire); 910 gw_ire = NULL; 911 } 912 if (ire->ire_ill != NULL) { 913 /* 914 * Save enough information so that we can recreate the IRE if 915 * the ILL goes down and then up. The metrics associated 916 * with the route will be saved as well when rts_setmetrics() is 917 * called after the IRE has been created. In the case where 918 * memory cannot be allocated, none of this information will be 919 * saved. 920 */ 921 ill_save_ire(ire->ire_ill, ire); 922 } 923 924 if (ire_arg != NULL) { 925 /* 926 * Store the ire that was successfully added into where ire_arg 927 * points to so that callers don't have to look it up 928 * themselves (but they are responsible for ire_refrele()ing 929 * the ire when they are finished with it). 930 */ 931 *ire_arg = ire; 932 } else { 933 ire_refrele(ire); /* Held in ire_add */ 934 } 935 if (ipif != NULL) 936 ipif_refrele(ipif); 937 return (0); 938 } 939 940 /* 941 * ip_rt_delete_v6 is called to delete an IPv6 route. 942 * ill is passed in to associate it with the correct interface. 943 * (for link-local destinations and gateways). 944 */ 945 /* ARGSUSED4 */ 946 int 947 ip_rt_delete_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, 948 const in6_addr_t *gw_addr, uint_t rtm_addrs, int flags, ill_t *ill, 949 ip_stack_t *ipst, zoneid_t zoneid) 950 { 951 ire_t *ire = NULL; 952 ipif_t *ipif; 953 uint_t type; 954 uint_t match_flags = MATCH_IRE_TYPE; 955 int err = 0; 956 957 /* 958 * If this is the case of RTF_HOST being set, then we set the netmask 959 * to all ones. Otherwise, we use the netmask if one was supplied. 960 */ 961 if (flags & RTF_HOST) { 962 mask = &ipv6_all_ones; 963 match_flags |= MATCH_IRE_MASK; 964 } else if (rtm_addrs & RTA_NETMASK) { 965 match_flags |= MATCH_IRE_MASK; 966 } 967 968 /* 969 * Note that RTF_GATEWAY is never set on a delete, therefore 970 * we check if the gateway address is one of our interfaces first, 971 * and fall back on RTF_GATEWAY routes. 972 * 973 * This makes it possible to delete an original 974 * IRE_IF_NORESOLVER/IRE_IF_RESOLVER - consistent with SunOS 4.1. 975 * However, we have RTF_KERNEL set on the ones created by ipif_up 976 * and those can not be deleted here. 977 * 978 * We use MATCH_IRE_ILL if we know the interface. If the caller 979 * specified an interface (from the RTA_IFP sockaddr) we use it, 980 * otherwise we use the ill derived from the gateway address. 981 * We can always match the gateway address since we record it 982 * in ire_gateway_addr. 983 * 984 * For more detail on specifying routes by gateway address and by 985 * interface index, see the comments in ip_rt_add_v6(). 986 */ 987 ipif = ipif_lookup_interface_v6(gw_addr, dst_addr, ipst); 988 if (ipif != NULL) { 989 ill_t *ill_match; 990 991 if (ill != NULL) 992 ill_match = ill; 993 else 994 ill_match = ipif->ipif_ill; 995 996 match_flags |= MATCH_IRE_ILL; 997 if (ipif->ipif_ire_type == IRE_LOOPBACK) { 998 ire = ire_ftable_lookup_v6(dst_addr, mask, 0, 999 IRE_LOOPBACK, ill_match, ALL_ZONES, NULL, 1000 match_flags, 0, ipst, NULL); 1001 } 1002 if (ire == NULL) { 1003 match_flags |= MATCH_IRE_GW; 1004 ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, 1005 IRE_INTERFACE, ill_match, ALL_ZONES, NULL, 1006 match_flags, 0, ipst, NULL); 1007 } 1008 /* Avoid deleting routes created by kernel from an ipif */ 1009 if (ire != NULL && (ire->ire_flags & RTF_KERNEL)) { 1010 ire_refrele(ire); 1011 ire = NULL; 1012 } 1013 1014 /* Restore in case we didn't find a match */ 1015 match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_ILL); 1016 } 1017 1018 if (ire == NULL) { 1019 /* 1020 * At this point, the gateway address is not one of our own 1021 * addresses or a matching interface route was not found. We 1022 * set the IRE type to lookup based on whether 1023 * this is a host route, a default route or just a prefix. 1024 * 1025 * If an ill was passed in, then the lookup is based on an 1026 * interface index so MATCH_IRE_ILL is added to match_flags. 1027 */ 1028 match_flags |= MATCH_IRE_GW; 1029 if (ill != NULL) 1030 match_flags |= MATCH_IRE_ILL; 1031 if (IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones)) 1032 type = IRE_HOST; 1033 else if (IN6_IS_ADDR_UNSPECIFIED(mask)) 1034 type = IRE_DEFAULT; 1035 else 1036 type = IRE_PREFIX; 1037 ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, type, 1038 ill, ALL_ZONES, NULL, match_flags, 0, ipst, NULL); 1039 } 1040 1041 if (ipif != NULL) { 1042 ipif_refrele(ipif); 1043 ipif = NULL; 1044 } 1045 if (ire == NULL) 1046 return (ESRCH); 1047 1048 if (ire->ire_flags & RTF_MULTIRT) { 1049 /* 1050 * Invoke the CGTP (multirouting) filtering module 1051 * to remove the dst address from the filtering database. 1052 * Packets coming from that address will no longer be 1053 * filtered to remove duplicates. 1054 */ 1055 if (ipst->ips_ip_cgtp_filter_ops != NULL) { 1056 err = ipst->ips_ip_cgtp_filter_ops->cfo_del_dest_v6( 1057 ipst->ips_netstack->netstack_stackid, 1058 &ire->ire_addr_v6, &ire->ire_gateway_addr_v6); 1059 } 1060 } 1061 1062 ill = ire->ire_ill; 1063 if (ill != NULL) 1064 ill_remove_saved_ire(ill, ire); 1065 ire_delete(ire); 1066 ire_refrele(ire); 1067 return (err); 1068 } 1069 1070 /* 1071 * Derive an interface id from the link layer address. 1072 */ 1073 void 1074 ill_setdefaulttoken(ill_t *ill) 1075 { 1076 if (!ill->ill_manual_token) { 1077 bzero(&ill->ill_token, sizeof (ill->ill_token)); 1078 MEDIA_V6INTFID(ill->ill_media, ill, &ill->ill_token); 1079 ill->ill_token_length = IPV6_TOKEN_LEN; 1080 } 1081 } 1082 1083 void 1084 ill_setdesttoken(ill_t *ill) 1085 { 1086 bzero(&ill->ill_dest_token, sizeof (ill->ill_dest_token)); 1087 MEDIA_V6DESTINTFID(ill->ill_media, ill, &ill->ill_dest_token); 1088 } 1089 1090 /* 1091 * Create a link-local address from a token. 1092 */ 1093 static void 1094 ipif_get_linklocal(in6_addr_t *dest, const in6_addr_t *token) 1095 { 1096 int i; 1097 1098 for (i = 0; i < 4; i++) { 1099 dest->s6_addr32[i] = 1100 token->s6_addr32[i] | ipv6_ll_template.s6_addr32[i]; 1101 } 1102 } 1103 1104 /* 1105 * Set a default IPv6 address for a 6to4 tunnel interface 2002:<tsrc>::1/16 1106 */ 1107 static void 1108 ipif_set6to4addr(ipif_t *ipif) 1109 { 1110 ill_t *ill = ipif->ipif_ill; 1111 struct in_addr v4phys; 1112 1113 ASSERT(ill->ill_mactype == DL_6TO4); 1114 ASSERT(ill->ill_phys_addr_length == sizeof (struct in_addr)); 1115 ASSERT(ipif->ipif_isv6); 1116 1117 if (ipif->ipif_flags & IPIF_UP) 1118 return; 1119 1120 (void) ip_plen_to_mask_v6(16, &ipif->ipif_v6net_mask); 1121 bcopy(ill->ill_phys_addr, &v4phys, sizeof (struct in_addr)); 1122 IN6_V4ADDR_TO_6TO4(&v4phys, &ipif->ipif_v6lcl_addr); 1123 V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask, 1124 ipif->ipif_v6subnet); 1125 } 1126 1127 /* 1128 * Is it not possible to set the link local address? 1129 * The address can be set if the token is set, and the token 1130 * isn't too long. 1131 * Return B_TRUE if the address can't be set, or B_FALSE if it can. 1132 */ 1133 boolean_t 1134 ipif_cant_setlinklocal(ipif_t *ipif) 1135 { 1136 ill_t *ill = ipif->ipif_ill; 1137 1138 if (IN6_IS_ADDR_UNSPECIFIED(&ill->ill_token) || 1139 ill->ill_token_length > IPV6_ABITS - IPV6_LL_PREFIXLEN) 1140 return (B_TRUE); 1141 1142 return (B_FALSE); 1143 } 1144 1145 /* 1146 * Generate a link-local address from the token. 1147 */ 1148 void 1149 ipif_setlinklocal(ipif_t *ipif) 1150 { 1151 ill_t *ill = ipif->ipif_ill; 1152 in6_addr_t ov6addr; 1153 1154 ASSERT(IAM_WRITER_ILL(ill)); 1155 1156 /* 1157 * If the interface was created with no link-local address 1158 * on it and the flag ILLF_NOLINKLOCAL was set, then we 1159 * dont want to update the link-local. 1160 */ 1161 if ((ill->ill_flags & ILLF_NOLINKLOCAL) && 1162 IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr)) 1163 return; 1164 /* 1165 * ill_manual_linklocal is set when the link-local address was 1166 * manually configured. 1167 */ 1168 if (ill->ill_manual_linklocal) 1169 return; 1170 1171 /* 1172 * IPv6 interfaces over 6to4 tunnels are special. They do not have 1173 * link-local addresses, but instead have a single automatically 1174 * generated global address. 1175 */ 1176 if (ill->ill_mactype == DL_6TO4) { 1177 ipif_set6to4addr(ipif); 1178 return; 1179 } 1180 1181 if (ipif_cant_setlinklocal(ipif)) 1182 return; 1183 1184 ov6addr = ipif->ipif_v6lcl_addr; 1185 ipif_get_linklocal(&ipif->ipif_v6lcl_addr, &ill->ill_token); 1186 sctp_update_ipif_addr(ipif, ov6addr); 1187 (void) ip_plen_to_mask_v6(IPV6_LL_PREFIXLEN, &ipif->ipif_v6net_mask); 1188 if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6pp_dst_addr)) { 1189 V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask, 1190 ipif->ipif_v6subnet); 1191 } 1192 1193 ip_rts_newaddrmsg(RTM_CHGADDR, 0, ipif, RTSQ_DEFAULT); 1194 } 1195 1196 /* 1197 * Generate a destination link-local address for a point-to-point IPv6 1198 * interface with a destination interface id (IP tunnels are such interfaces) 1199 * based on the destination token. 1200 */ 1201 void 1202 ipif_setdestlinklocal(ipif_t *ipif) 1203 { 1204 ill_t *ill = ipif->ipif_ill; 1205 1206 ASSERT(IAM_WRITER_ILL(ill)); 1207 1208 if (ill->ill_manual_dst_linklocal) 1209 return; 1210 1211 if (IN6_IS_ADDR_UNSPECIFIED(&ill->ill_dest_token)) 1212 return; 1213 1214 ipif_get_linklocal(&ipif->ipif_v6pp_dst_addr, &ill->ill_dest_token); 1215 ipif->ipif_v6subnet = ipif->ipif_v6pp_dst_addr; 1216 } 1217 1218 /* 1219 * Get the resolver set up for a new ipif. (Always called as writer.) 1220 */ 1221 int 1222 ipif_ndp_up(ipif_t *ipif, boolean_t initial) 1223 { 1224 ill_t *ill = ipif->ipif_ill; 1225 int err = 0; 1226 nce_t *nce = NULL; 1227 boolean_t added_ipif = B_FALSE; 1228 1229 DTRACE_PROBE3(ipif__downup, char *, "ipif_ndp_up", 1230 ill_t *, ill, ipif_t *, ipif); 1231 ip1dbg(("ipif_ndp_up(%s:%u)\n", ill->ill_name, ipif->ipif_id)); 1232 1233 if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr) || 1234 (!(ill->ill_net_type & IRE_INTERFACE))) { 1235 ipif->ipif_addr_ready = 1; 1236 return (0); 1237 } 1238 1239 if ((ipif->ipif_flags & (IPIF_UNNUMBERED|IPIF_NOLOCAL)) == 0) { 1240 uint16_t flags; 1241 uint16_t state; 1242 uchar_t *hw_addr; 1243 ill_t *bound_ill; 1244 ipmp_illgrp_t *illg = ill->ill_grp; 1245 uint_t hw_addr_len; 1246 1247 flags = NCE_F_MYADDR | NCE_F_NONUD | NCE_F_PUBLISH | 1248 NCE_F_AUTHORITY; 1249 if (ill->ill_flags & ILLF_ROUTER) 1250 flags |= NCE_F_ISROUTER; 1251 1252 if (ipif->ipif_flags & IPIF_ANYCAST) 1253 flags |= NCE_F_ANYCAST; 1254 1255 if (IS_IPMP(ill)) { 1256 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1257 /* 1258 * If we're here via ipif_up(), then the ipif won't be 1259 * bound yet -- add it to the group, which will bind 1260 * it if possible. (We would add it in ipif_up(), but 1261 * deleting on failure there is gruesome.) If we're 1262 * here via ipmp_ill_bind_ipif(), then the ipif has 1263 * already been added to the group and we just need to 1264 * use the binding. 1265 */ 1266 if ((bound_ill = ipmp_ipif_bound_ill(ipif)) == NULL) { 1267 bound_ill = ipmp_illgrp_add_ipif(illg, ipif); 1268 if (bound_ill == NULL) { 1269 /* 1270 * We couldn't bind the ipif to an ill 1271 * yet, so we have nothing to publish. 1272 * Set ipif_addr_ready so that this 1273 * address can be used locally for now. 1274 * The routing socket message will be 1275 * sent from ipif_up_done_v6(). 1276 */ 1277 ipif->ipif_addr_ready = 1; 1278 return (0); 1279 } 1280 added_ipif = B_TRUE; 1281 } 1282 hw_addr = bound_ill->ill_nd_lla; 1283 hw_addr_len = bound_ill->ill_phys_addr_length; 1284 } else { 1285 bound_ill = ill; 1286 hw_addr = ill->ill_nd_lla; 1287 hw_addr_len = ill->ill_phys_addr_length; 1288 } 1289 1290 /* 1291 * If this is an initial bring-up (or the ipif was never 1292 * completely brought up), do DAD. Otherwise, we're here 1293 * because IPMP has rebound an address to this ill: send 1294 * unsolicited advertisements to inform others. 1295 */ 1296 if (initial || !ipif->ipif_addr_ready) { 1297 /* Causes Duplicate Address Detection to run */ 1298 state = ND_PROBE; 1299 } else { 1300 state = ND_REACHABLE; 1301 flags |= NCE_F_UNSOL_ADV; 1302 } 1303 1304 retry: 1305 err = nce_lookup_then_add_v6(ill, hw_addr, hw_addr_len, 1306 &ipif->ipif_v6lcl_addr, flags, state, &nce); 1307 switch (err) { 1308 case 0: 1309 ip1dbg(("ipif_ndp_up: NCE created for %s\n", 1310 ill->ill_name)); 1311 ipif->ipif_addr_ready = 1; 1312 ipif->ipif_added_nce = 1; 1313 nce->nce_ipif_cnt++; 1314 break; 1315 case EINPROGRESS: 1316 ip1dbg(("ipif_ndp_up: running DAD now for %s\n", 1317 ill->ill_name)); 1318 ipif->ipif_added_nce = 1; 1319 nce->nce_ipif_cnt++; 1320 break; 1321 case EEXIST: 1322 ip1dbg(("ipif_ndp_up: NCE already exists for %s\n", 1323 ill->ill_name)); 1324 if (!NCE_MYADDR(nce->nce_common)) { 1325 /* 1326 * A leftover nce from before this address 1327 * existed 1328 */ 1329 ncec_delete(nce->nce_common); 1330 nce_refrele(nce); 1331 nce = NULL; 1332 goto retry; 1333 } 1334 if ((ipif->ipif_flags & IPIF_POINTOPOINT) == 0) { 1335 nce_refrele(nce); 1336 nce = NULL; 1337 ip1dbg(("ipif_ndp_up: NCE already exists " 1338 "for %s\n", ill->ill_name)); 1339 goto fail; 1340 } 1341 /* 1342 * Duplicate local addresses are permissible for 1343 * IPIF_POINTOPOINT interfaces which will get marked 1344 * IPIF_UNNUMBERED later in 1345 * ip_addr_availability_check(). 1346 * 1347 * The nce_ipif_cnt field tracks the number of 1348 * ipifs that have nce_addr as their local address. 1349 */ 1350 ipif->ipif_addr_ready = 1; 1351 ipif->ipif_added_nce = 1; 1352 nce->nce_ipif_cnt++; 1353 err = 0; 1354 break; 1355 default: 1356 ip1dbg(("ipif_ndp_up: NCE creation failed for %s\n", 1357 ill->ill_name)); 1358 goto fail; 1359 } 1360 } else { 1361 /* No local NCE for this entry */ 1362 ipif->ipif_addr_ready = 1; 1363 } 1364 if (nce != NULL) 1365 nce_refrele(nce); 1366 return (0); 1367 fail: 1368 if (added_ipif) 1369 ipmp_illgrp_del_ipif(ill->ill_grp, ipif); 1370 1371 return (err); 1372 } 1373 1374 /* Remove all cache entries for this logical interface */ 1375 void 1376 ipif_ndp_down(ipif_t *ipif) 1377 { 1378 ipif_nce_down(ipif); 1379 } 1380 1381 /* 1382 * Return the scope of the given IPv6 address. If the address is an 1383 * IPv4 mapped IPv6 address, return the scope of the corresponding 1384 * IPv4 address. 1385 */ 1386 in6addr_scope_t 1387 ip_addr_scope_v6(const in6_addr_t *addr) 1388 { 1389 static in6_addr_t ipv6loopback = IN6ADDR_LOOPBACK_INIT; 1390 1391 if (IN6_IS_ADDR_V4MAPPED(addr)) { 1392 in_addr_t v4addr_h = ntohl(V4_PART_OF_V6((*addr))); 1393 if ((v4addr_h >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 1394 (v4addr_h & IN_AUTOCONF_MASK) == IN_AUTOCONF_NET) 1395 return (IP6_SCOPE_LINKLOCAL); 1396 if ((v4addr_h & IN_PRIVATE8_MASK) == IN_PRIVATE8_NET || 1397 (v4addr_h & IN_PRIVATE12_MASK) == IN_PRIVATE12_NET || 1398 (v4addr_h & IN_PRIVATE16_MASK) == IN_PRIVATE16_NET) 1399 return (IP6_SCOPE_SITELOCAL); 1400 return (IP6_SCOPE_GLOBAL); 1401 } 1402 1403 if (IN6_IS_ADDR_MULTICAST(addr)) 1404 return (IN6_ADDR_MC_SCOPE(addr)); 1405 1406 /* link-local and loopback addresses are of link-local scope */ 1407 if (IN6_IS_ADDR_LINKLOCAL(addr) || 1408 IN6_ARE_ADDR_EQUAL(addr, &ipv6loopback)) 1409 return (IP6_SCOPE_LINKLOCAL); 1410 if (IN6_IS_ADDR_SITELOCAL(addr)) 1411 return (IP6_SCOPE_SITELOCAL); 1412 return (IP6_SCOPE_GLOBAL); 1413 } 1414 1415 1416 /* 1417 * Returns the length of the common prefix of a1 and a2, as per 1418 * CommonPrefixLen() defined in RFC 3484. 1419 */ 1420 static int 1421 ip_common_prefix_v6(const in6_addr_t *a1, const in6_addr_t *a2) 1422 { 1423 int i; 1424 uint32_t a1val, a2val, mask; 1425 1426 for (i = 0; i < 4; i++) { 1427 if ((a1val = a1->s6_addr32[i]) != (a2val = a2->s6_addr32[i])) { 1428 a1val ^= a2val; 1429 i *= 32; 1430 mask = 0x80000000u; 1431 while (!(a1val & mask)) { 1432 mask >>= 1; 1433 i++; 1434 } 1435 return (i); 1436 } 1437 } 1438 return (IPV6_ABITS); 1439 } 1440 1441 #define IPIF_VALID_IPV6_SOURCE(ipif) \ 1442 (((ipif)->ipif_flags & IPIF_UP) && \ 1443 !((ipif)->ipif_flags & (IPIF_NOLOCAL|IPIF_ANYCAST)) && \ 1444 !((ipif)->ipif_ill->ill_flags & ILLF_NOACCEPT)) 1445 1446 /* source address candidate */ 1447 typedef struct candidate { 1448 ipif_t *cand_ipif; 1449 /* The properties of this candidate */ 1450 boolean_t cand_isdst; 1451 boolean_t cand_isdst_set; 1452 in6addr_scope_t cand_scope; 1453 boolean_t cand_scope_set; 1454 boolean_t cand_isdeprecated; 1455 boolean_t cand_isdeprecated_set; 1456 boolean_t cand_ispreferred; 1457 boolean_t cand_ispreferred_set; 1458 boolean_t cand_matchedinterface; 1459 boolean_t cand_matchedinterface_set; 1460 boolean_t cand_matchedlabel; 1461 boolean_t cand_matchedlabel_set; 1462 boolean_t cand_istmp; 1463 boolean_t cand_istmp_set; 1464 int cand_common_pref; 1465 boolean_t cand_common_pref_set; 1466 boolean_t cand_pref_eq; 1467 boolean_t cand_pref_eq_set; 1468 int cand_pref_len; 1469 boolean_t cand_pref_len_set; 1470 } cand_t; 1471 #define cand_srcaddr cand_ipif->ipif_v6lcl_addr 1472 #define cand_mask cand_ipif->ipif_v6net_mask 1473 #define cand_flags cand_ipif->ipif_flags 1474 #define cand_ill cand_ipif->ipif_ill 1475 #define cand_zoneid cand_ipif->ipif_zoneid 1476 1477 /* information about the destination for source address selection */ 1478 typedef struct dstinfo { 1479 const in6_addr_t *dst_addr; 1480 ill_t *dst_ill; 1481 uint_t dst_restrict_ill; 1482 boolean_t dst_prefer_src_tmp; 1483 in6addr_scope_t dst_scope; 1484 char *dst_label; 1485 } dstinfo_t; 1486 1487 /* 1488 * The following functions are rules used to select a source address in 1489 * ipif_select_source_v6(). Each rule compares a current candidate (cc) 1490 * against the best candidate (bc). Each rule has three possible outcomes; 1491 * the candidate is preferred over the best candidate (CAND_PREFER), the 1492 * candidate is not preferred over the best candidate (CAND_AVOID), or the 1493 * candidate is of equal value as the best candidate (CAND_TIE). 1494 * 1495 * These rules are part of a greater "Default Address Selection for IPv6" 1496 * sheme, which is standards based work coming out of the IETF ipv6 working 1497 * group. The IETF document defines both IPv6 source address selection and 1498 * destination address ordering. The rules defined here implement the IPv6 1499 * source address selection. Destination address ordering is done by 1500 * libnsl, and uses a similar set of rules to implement the sorting. 1501 * 1502 * Most of the rules are defined by the RFC and are not typically altered. The 1503 * last rule, number 8, has language that allows for local preferences. In the 1504 * scheme below, this means that new Solaris rules should normally go between 1505 * rule_ifprefix and rule_prefix. 1506 */ 1507 typedef enum {CAND_AVOID, CAND_TIE, CAND_PREFER} rule_res_t; 1508 typedef rule_res_t (*rulef_t)(cand_t *, cand_t *, const dstinfo_t *, 1509 ip_stack_t *); 1510 1511 /* Prefer an address if it is equal to the destination address. */ 1512 /* ARGSUSED3 */ 1513 static rule_res_t 1514 rule_isdst(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 1515 { 1516 if (!bc->cand_isdst_set) { 1517 bc->cand_isdst = 1518 IN6_ARE_ADDR_EQUAL(&bc->cand_srcaddr, dstinfo->dst_addr); 1519 bc->cand_isdst_set = B_TRUE; 1520 } 1521 1522 cc->cand_isdst = 1523 IN6_ARE_ADDR_EQUAL(&cc->cand_srcaddr, dstinfo->dst_addr); 1524 cc->cand_isdst_set = B_TRUE; 1525 1526 if (cc->cand_isdst == bc->cand_isdst) 1527 return (CAND_TIE); 1528 else if (cc->cand_isdst) 1529 return (CAND_PREFER); 1530 else 1531 return (CAND_AVOID); 1532 } 1533 1534 /* 1535 * Prefer addresses that are of closest scope to the destination. Always 1536 * prefer addresses that are of greater scope than the destination over 1537 * those that are of lesser scope than the destination. 1538 */ 1539 /* ARGSUSED3 */ 1540 static rule_res_t 1541 rule_scope(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 1542 { 1543 if (!bc->cand_scope_set) { 1544 bc->cand_scope = ip_addr_scope_v6(&bc->cand_srcaddr); 1545 bc->cand_scope_set = B_TRUE; 1546 } 1547 1548 cc->cand_scope = ip_addr_scope_v6(&cc->cand_srcaddr); 1549 cc->cand_scope_set = B_TRUE; 1550 1551 if (cc->cand_scope < bc->cand_scope) { 1552 if (cc->cand_scope < dstinfo->dst_scope) 1553 return (CAND_AVOID); 1554 else 1555 return (CAND_PREFER); 1556 } else if (bc->cand_scope < cc->cand_scope) { 1557 if (bc->cand_scope < dstinfo->dst_scope) 1558 return (CAND_PREFER); 1559 else 1560 return (CAND_AVOID); 1561 } else { 1562 return (CAND_TIE); 1563 } 1564 } 1565 1566 /* 1567 * Prefer non-deprecated source addresses. 1568 */ 1569 /* ARGSUSED2 */ 1570 static rule_res_t 1571 rule_deprecated(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1572 ip_stack_t *ipst) 1573 { 1574 if (!bc->cand_isdeprecated_set) { 1575 bc->cand_isdeprecated = 1576 ((bc->cand_flags & IPIF_DEPRECATED) != 0); 1577 bc->cand_isdeprecated_set = B_TRUE; 1578 } 1579 1580 cc->cand_isdeprecated = ((cc->cand_flags & IPIF_DEPRECATED) != 0); 1581 cc->cand_isdeprecated_set = B_TRUE; 1582 1583 if (bc->cand_isdeprecated == cc->cand_isdeprecated) 1584 return (CAND_TIE); 1585 else if (cc->cand_isdeprecated) 1586 return (CAND_AVOID); 1587 else 1588 return (CAND_PREFER); 1589 } 1590 1591 /* 1592 * Prefer source addresses that have the IPIF_PREFERRED flag set. This 1593 * rule must be before rule_interface because the flag could be set on any 1594 * interface, not just the interface being used for outgoing packets (for 1595 * example, the IFF_PREFERRED could be set on an address assigned to the 1596 * loopback interface). 1597 */ 1598 /* ARGSUSED2 */ 1599 static rule_res_t 1600 rule_preferred(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1601 ip_stack_t *ipst) 1602 { 1603 if (!bc->cand_ispreferred_set) { 1604 bc->cand_ispreferred = ((bc->cand_flags & IPIF_PREFERRED) != 0); 1605 bc->cand_ispreferred_set = B_TRUE; 1606 } 1607 1608 cc->cand_ispreferred = ((cc->cand_flags & IPIF_PREFERRED) != 0); 1609 cc->cand_ispreferred_set = B_TRUE; 1610 1611 if (bc->cand_ispreferred == cc->cand_ispreferred) 1612 return (CAND_TIE); 1613 else if (cc->cand_ispreferred) 1614 return (CAND_PREFER); 1615 else 1616 return (CAND_AVOID); 1617 } 1618 1619 /* 1620 * Prefer source addresses that are assigned to the outgoing interface. 1621 */ 1622 /* ARGSUSED3 */ 1623 static rule_res_t 1624 rule_interface(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1625 ip_stack_t *ipst) 1626 { 1627 ill_t *dstill = dstinfo->dst_ill; 1628 1629 /* 1630 * If dstinfo->dst_restrict_ill is set, this rule is unnecessary 1631 * since we know all candidates will be on the same link. 1632 */ 1633 if (dstinfo->dst_restrict_ill) 1634 return (CAND_TIE); 1635 1636 if (!bc->cand_matchedinterface_set) { 1637 bc->cand_matchedinterface = bc->cand_ill == dstill; 1638 bc->cand_matchedinterface_set = B_TRUE; 1639 } 1640 1641 cc->cand_matchedinterface = cc->cand_ill == dstill; 1642 cc->cand_matchedinterface_set = B_TRUE; 1643 1644 if (bc->cand_matchedinterface == cc->cand_matchedinterface) 1645 return (CAND_TIE); 1646 else if (cc->cand_matchedinterface) 1647 return (CAND_PREFER); 1648 else 1649 return (CAND_AVOID); 1650 } 1651 1652 /* 1653 * Prefer source addresses whose label matches the destination's label. 1654 */ 1655 static rule_res_t 1656 rule_label(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 1657 { 1658 char *label; 1659 1660 if (!bc->cand_matchedlabel_set) { 1661 label = ip6_asp_lookup(&bc->cand_srcaddr, NULL, ipst); 1662 bc->cand_matchedlabel = 1663 ip6_asp_labelcmp(label, dstinfo->dst_label); 1664 bc->cand_matchedlabel_set = B_TRUE; 1665 } 1666 1667 label = ip6_asp_lookup(&cc->cand_srcaddr, NULL, ipst); 1668 cc->cand_matchedlabel = ip6_asp_labelcmp(label, dstinfo->dst_label); 1669 cc->cand_matchedlabel_set = B_TRUE; 1670 1671 if (bc->cand_matchedlabel == cc->cand_matchedlabel) 1672 return (CAND_TIE); 1673 else if (cc->cand_matchedlabel) 1674 return (CAND_PREFER); 1675 else 1676 return (CAND_AVOID); 1677 } 1678 1679 /* 1680 * Prefer public addresses over temporary ones. An application can reverse 1681 * the logic of this rule and prefer temporary addresses by using the 1682 * IPV6_SRC_PREFERENCES socket option. 1683 */ 1684 /* ARGSUSED3 */ 1685 static rule_res_t 1686 rule_temporary(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1687 ip_stack_t *ipst) 1688 { 1689 if (!bc->cand_istmp_set) { 1690 bc->cand_istmp = ((bc->cand_flags & IPIF_TEMPORARY) != 0); 1691 bc->cand_istmp_set = B_TRUE; 1692 } 1693 1694 cc->cand_istmp = ((cc->cand_flags & IPIF_TEMPORARY) != 0); 1695 cc->cand_istmp_set = B_TRUE; 1696 1697 if (bc->cand_istmp == cc->cand_istmp) 1698 return (CAND_TIE); 1699 1700 if (dstinfo->dst_prefer_src_tmp && cc->cand_istmp) 1701 return (CAND_PREFER); 1702 else if (!dstinfo->dst_prefer_src_tmp && !cc->cand_istmp) 1703 return (CAND_PREFER); 1704 else 1705 return (CAND_AVOID); 1706 } 1707 1708 /* 1709 * Prefer source addresses with longer matching prefix with the destination 1710 * under the interface mask. This gets us on the same subnet before applying 1711 * any Solaris-specific rules. 1712 */ 1713 /* ARGSUSED3 */ 1714 static rule_res_t 1715 rule_ifprefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1716 ip_stack_t *ipst) 1717 { 1718 if (!bc->cand_pref_eq_set) { 1719 bc->cand_pref_eq = V6_MASK_EQ_2(bc->cand_srcaddr, 1720 bc->cand_mask, *dstinfo->dst_addr); 1721 bc->cand_pref_eq_set = B_TRUE; 1722 } 1723 1724 cc->cand_pref_eq = V6_MASK_EQ_2(cc->cand_srcaddr, cc->cand_mask, 1725 *dstinfo->dst_addr); 1726 cc->cand_pref_eq_set = B_TRUE; 1727 1728 if (bc->cand_pref_eq) { 1729 if (cc->cand_pref_eq) { 1730 if (!bc->cand_pref_len_set) { 1731 bc->cand_pref_len = 1732 ip_mask_to_plen_v6(&bc->cand_mask); 1733 bc->cand_pref_len_set = B_TRUE; 1734 } 1735 cc->cand_pref_len = ip_mask_to_plen_v6(&cc->cand_mask); 1736 cc->cand_pref_len_set = B_TRUE; 1737 if (bc->cand_pref_len == cc->cand_pref_len) 1738 return (CAND_TIE); 1739 else if (bc->cand_pref_len > cc->cand_pref_len) 1740 return (CAND_AVOID); 1741 else 1742 return (CAND_PREFER); 1743 } else { 1744 return (CAND_AVOID); 1745 } 1746 } else { 1747 if (cc->cand_pref_eq) 1748 return (CAND_PREFER); 1749 else 1750 return (CAND_TIE); 1751 } 1752 } 1753 1754 /* 1755 * Prefer to use zone-specific addresses when possible instead of all-zones 1756 * addresses. 1757 */ 1758 /* ARGSUSED2 */ 1759 static rule_res_t 1760 rule_zone_specific(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1761 ip_stack_t *ipst) 1762 { 1763 if ((bc->cand_zoneid == ALL_ZONES) == 1764 (cc->cand_zoneid == ALL_ZONES)) 1765 return (CAND_TIE); 1766 else if (cc->cand_zoneid == ALL_ZONES) 1767 return (CAND_AVOID); 1768 else 1769 return (CAND_PREFER); 1770 } 1771 1772 /* 1773 * Prefer to use DHCPv6 (first) and static addresses (second) when possible 1774 * instead of statelessly autoconfigured addresses. 1775 * 1776 * This is done after trying all other preferences (and before the final tie 1777 * breaker) so that, if all else is equal, we select addresses configured by 1778 * DHCPv6 over other addresses. We presume that DHCPv6 addresses, unlike 1779 * stateless autoconfigured addresses, are deliberately configured by an 1780 * administrator, and thus are correctly set up in DNS and network packet 1781 * filters. 1782 */ 1783 /* ARGSUSED2 */ 1784 static rule_res_t 1785 rule_addr_type(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1786 ip_stack_t *ipst) 1787 { 1788 #define ATYPE(x) \ 1789 ((x) & IPIF_DHCPRUNNING) ? 1 : ((x) & IPIF_ADDRCONF) ? 3 : 2 1790 int bcval = ATYPE(bc->cand_flags); 1791 int ccval = ATYPE(cc->cand_flags); 1792 #undef ATYPE 1793 1794 if (bcval == ccval) 1795 return (CAND_TIE); 1796 else if (ccval < bcval) 1797 return (CAND_PREFER); 1798 else 1799 return (CAND_AVOID); 1800 } 1801 1802 /* 1803 * Prefer source addresses with longer matching prefix with the destination. 1804 * We do the longest matching prefix calculation by doing an xor of both 1805 * addresses with the destination, and pick the address with the longest string 1806 * of leading zeros, as per CommonPrefixLen() defined in RFC 3484. 1807 */ 1808 /* ARGSUSED3 */ 1809 static rule_res_t 1810 rule_prefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 1811 { 1812 if (!bc->cand_common_pref_set) { 1813 bc->cand_common_pref = ip_common_prefix_v6(&bc->cand_srcaddr, 1814 dstinfo->dst_addr); 1815 bc->cand_common_pref_set = B_TRUE; 1816 } 1817 1818 cc->cand_common_pref = ip_common_prefix_v6(&cc->cand_srcaddr, 1819 dstinfo->dst_addr); 1820 cc->cand_common_pref_set = B_TRUE; 1821 1822 if (bc->cand_common_pref == cc->cand_common_pref) 1823 return (CAND_TIE); 1824 else if (bc->cand_common_pref > cc->cand_common_pref) 1825 return (CAND_AVOID); 1826 else 1827 return (CAND_PREFER); 1828 } 1829 1830 /* 1831 * Last rule: we must pick something, so just prefer the current best 1832 * candidate. 1833 */ 1834 /* ARGSUSED */ 1835 static rule_res_t 1836 rule_must_be_last(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1837 ip_stack_t *ipst) 1838 { 1839 return (CAND_AVOID); 1840 } 1841 1842 /* 1843 * Determine the best source address given a destination address and a 1844 * destination ill. If no suitable source address is found, it returns 1845 * NULL. If there is a usable address pointed to by the usesrc 1846 * (i.e ill_usesrc_ifindex != 0) then return that first since it is more 1847 * fine grained (i.e per interface) 1848 * 1849 * This implementation is based on the "Default Address Selection for IPv6" 1850 * specification produced by the IETF IPv6 working group. It has been 1851 * implemented so that the list of addresses is only traversed once (the 1852 * specification's algorithm could traverse the list of addresses once for 1853 * every rule). 1854 * 1855 * The restrict_ill argument restricts the algorithm to choose a source 1856 * address that is assigned to the destination ill. This is used when 1857 * the destination address is a link-local or multicast address, and when 1858 * ipv6_strict_dst_multihoming is turned on. 1859 * 1860 * src_prefs is the caller's set of source address preferences. If source 1861 * address selection is being called to determine the source address of a 1862 * connected socket (from ip_set_destination_v6()), then the preferences are 1863 * taken from conn_ixa->ixa_src_preferences. These preferences can be set on a 1864 * per-socket basis using the IPV6_SRC_PREFERENCES socket option. The only 1865 * preference currently implemented is for rfc3041 temporary addresses. 1866 */ 1867 ipif_t * 1868 ipif_select_source_v6(ill_t *dstill, const in6_addr_t *dst, 1869 boolean_t restrict_ill, uint32_t src_prefs, zoneid_t zoneid, 1870 boolean_t allow_usesrc, boolean_t *notreadyp) 1871 { 1872 dstinfo_t dstinfo; 1873 char dstr[INET6_ADDRSTRLEN]; 1874 char sstr[INET6_ADDRSTRLEN]; 1875 ipif_t *ipif, *start_ipif, *next_ipif; 1876 ill_t *ill, *usesrc_ill = NULL, *ipmp_ill = NULL; 1877 ill_walk_context_t ctx; 1878 cand_t best_c; /* The best candidate */ 1879 cand_t curr_c; /* The current candidate */ 1880 uint_t index; 1881 boolean_t first_candidate = B_TRUE; 1882 rule_res_t rule_result; 1883 tsol_tpc_t *src_rhtp, *dst_rhtp; 1884 ip_stack_t *ipst = dstill->ill_ipst; 1885 1886 /* 1887 * The list of ordering rules. They are applied in the order they 1888 * appear in the list. 1889 * 1890 * Solaris doesn't currently support Mobile IPv6, so there's no 1891 * rule_mipv6 corresponding to rule 4 in the specification. 1892 */ 1893 rulef_t rules[] = { 1894 rule_isdst, 1895 rule_scope, 1896 rule_deprecated, 1897 rule_preferred, 1898 rule_interface, 1899 rule_label, 1900 rule_temporary, 1901 rule_ifprefix, /* local rules after this */ 1902 rule_zone_specific, 1903 rule_addr_type, 1904 rule_prefix, /* local rules before this */ 1905 rule_must_be_last, /* must always be last */ 1906 NULL 1907 }; 1908 1909 ASSERT(dstill->ill_isv6); 1910 ASSERT(!IN6_IS_ADDR_V4MAPPED(dst)); 1911 1912 /* 1913 * Check if there is a usable src address pointed to by the 1914 * usesrc ifindex. This has higher precedence since it is 1915 * finer grained (i.e per interface) v/s being system wide. 1916 */ 1917 if (dstill->ill_usesrc_ifindex != 0 && allow_usesrc) { 1918 if ((usesrc_ill = 1919 ill_lookup_on_ifindex(dstill->ill_usesrc_ifindex, B_TRUE, 1920 ipst)) != NULL) { 1921 dstinfo.dst_ill = usesrc_ill; 1922 } else { 1923 return (NULL); 1924 } 1925 } else if (IS_UNDER_IPMP(dstill)) { 1926 /* 1927 * Test addresses should never be used for source address 1928 * selection, so if we were passed an underlying ill, switch 1929 * to the IPMP meta-interface. 1930 */ 1931 if ((ipmp_ill = ipmp_ill_hold_ipmp_ill(dstill)) != NULL) 1932 dstinfo.dst_ill = ipmp_ill; 1933 else 1934 return (NULL); 1935 } else { 1936 dstinfo.dst_ill = dstill; 1937 } 1938 1939 /* 1940 * If we're dealing with an unlabeled destination on a labeled system, 1941 * make sure that we ignore source addresses that are incompatible with 1942 * the destination's default label. That destination's default label 1943 * must dominate the minimum label on the source address. 1944 * 1945 * (Note that this has to do with Trusted Solaris. It's not related to 1946 * the labels described by ip6_asp_lookup.) 1947 */ 1948 dst_rhtp = NULL; 1949 if (is_system_labeled()) { 1950 dst_rhtp = find_tpc(dst, IPV6_VERSION, B_FALSE); 1951 if (dst_rhtp == NULL) 1952 return (NULL); 1953 if (dst_rhtp->tpc_tp.host_type != UNLABELED) { 1954 TPC_RELE(dst_rhtp); 1955 dst_rhtp = NULL; 1956 } 1957 } 1958 1959 dstinfo.dst_addr = dst; 1960 dstinfo.dst_scope = ip_addr_scope_v6(dst); 1961 dstinfo.dst_label = ip6_asp_lookup(dst, NULL, ipst); 1962 dstinfo.dst_prefer_src_tmp = ((src_prefs & IPV6_PREFER_SRC_TMP) != 0); 1963 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1964 /* 1965 * Section three of the I-D states that for multicast and 1966 * link-local destinations, the candidate set must be restricted to 1967 * an interface that is on the same link as the outgoing interface. 1968 * Also, when ipv6_strict_dst_multihoming is turned on, always 1969 * restrict the source address to the destination link as doing 1970 * otherwise will almost certainly cause problems. 1971 */ 1972 if (IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst) || 1973 ipst->ips_ipv6_strict_dst_multihoming || usesrc_ill != NULL) { 1974 dstinfo.dst_restrict_ill = B_TRUE; 1975 } else { 1976 dstinfo.dst_restrict_ill = restrict_ill; 1977 } 1978 1979 bzero(&best_c, sizeof (cand_t)); 1980 1981 /* 1982 * Take a pass through the list of IPv6 interfaces to choose the best 1983 * possible source address. If restrict_ill is set, just use dst_ill. 1984 */ 1985 if (dstinfo.dst_restrict_ill) 1986 ill = dstinfo.dst_ill; 1987 else 1988 ill = ILL_START_WALK_V6(&ctx, ipst); 1989 1990 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1991 ASSERT(ill->ill_isv6); 1992 1993 /* 1994 * Test addresses should never be used for source address 1995 * selection, so ignore underlying ills. 1996 */ 1997 if (IS_UNDER_IPMP(ill)) 1998 continue; 1999 2000 if (ill->ill_ipif == NULL) 2001 continue; 2002 /* 2003 * For source address selection, we treat the ipif list as 2004 * circular and continue until we get back to where we 2005 * started. This allows IPMP to vary source address selection 2006 * (which improves inbound load spreading) by caching its last 2007 * ending point and starting from there. NOTE: we don't have 2008 * to worry about ill_src_ipif changing ills since that can't 2009 * happen on the IPMP ill. 2010 */ 2011 start_ipif = ill->ill_ipif; 2012 if (IS_IPMP(ill) && ill->ill_src_ipif != NULL) 2013 start_ipif = ill->ill_src_ipif; 2014 2015 ipif = start_ipif; 2016 do { 2017 if ((next_ipif = ipif->ipif_next) == NULL) 2018 next_ipif = ill->ill_ipif; 2019 2020 if (!IPIF_VALID_IPV6_SOURCE(ipif)) 2021 continue; 2022 2023 if (!ipif->ipif_addr_ready) { 2024 if (notreadyp != NULL) 2025 *notreadyp = B_TRUE; 2026 continue; 2027 } 2028 2029 if (zoneid != ALL_ZONES && 2030 ipif->ipif_zoneid != zoneid && 2031 ipif->ipif_zoneid != ALL_ZONES) 2032 continue; 2033 2034 /* 2035 * Check compatibility of local address for 2036 * destination's default label if we're on a labeled 2037 * system. Incompatible addresses can't be used at 2038 * all and must be skipped over. 2039 */ 2040 if (dst_rhtp != NULL) { 2041 boolean_t incompat; 2042 2043 src_rhtp = find_tpc(&ipif->ipif_v6lcl_addr, 2044 IPV6_VERSION, B_FALSE); 2045 if (src_rhtp == NULL) 2046 continue; 2047 incompat = 2048 src_rhtp->tpc_tp.host_type != SUN_CIPSO || 2049 src_rhtp->tpc_tp.tp_doi != 2050 dst_rhtp->tpc_tp.tp_doi || 2051 (!_blinrange(&dst_rhtp->tpc_tp.tp_def_label, 2052 &src_rhtp->tpc_tp.tp_sl_range_cipso) && 2053 !blinlset(&dst_rhtp->tpc_tp.tp_def_label, 2054 src_rhtp->tpc_tp.tp_sl_set_cipso)); 2055 TPC_RELE(src_rhtp); 2056 if (incompat) 2057 continue; 2058 } 2059 2060 if (first_candidate) { 2061 /* 2062 * This is first valid address in the list. 2063 * It is automatically the best candidate 2064 * so far. 2065 */ 2066 best_c.cand_ipif = ipif; 2067 first_candidate = B_FALSE; 2068 continue; 2069 } 2070 2071 bzero(&curr_c, sizeof (cand_t)); 2072 curr_c.cand_ipif = ipif; 2073 2074 /* 2075 * Compare this current candidate (curr_c) with the 2076 * best candidate (best_c) by applying the 2077 * comparison rules in order until one breaks the 2078 * tie. 2079 */ 2080 for (index = 0; rules[index] != NULL; index++) { 2081 /* Apply a comparison rule. */ 2082 rule_result = (rules[index])(&best_c, &curr_c, 2083 &dstinfo, ipst); 2084 if (rule_result == CAND_AVOID) { 2085 /* 2086 * The best candidate is still the 2087 * best candidate. Forget about 2088 * this current candidate and go on 2089 * to the next one. 2090 */ 2091 break; 2092 } else if (rule_result == CAND_PREFER) { 2093 /* 2094 * This candidate is prefered. It 2095 * becomes the best candidate so 2096 * far. Go on to the next address. 2097 */ 2098 best_c = curr_c; 2099 break; 2100 } 2101 /* We have a tie, apply the next rule. */ 2102 } 2103 2104 /* 2105 * The last rule must be a tie breaker rule and 2106 * must never produce a tie. At this point, the 2107 * candidate should have either been rejected, or 2108 * have been prefered as the best candidate so far. 2109 */ 2110 ASSERT(rule_result != CAND_TIE); 2111 } while ((ipif = next_ipif) != start_ipif); 2112 2113 /* 2114 * For IPMP, update the source ipif rotor to the next ipif, 2115 * provided we can look it up. (We must not use it if it's 2116 * IPIF_CONDEMNED since we may have grabbed ill_g_lock after 2117 * ipif_free() checked ill_src_ipif.) 2118 */ 2119 if (IS_IPMP(ill) && ipif != NULL) { 2120 mutex_enter(&ipif->ipif_ill->ill_lock); 2121 next_ipif = ipif->ipif_next; 2122 if (next_ipif != NULL && !IPIF_IS_CONDEMNED(next_ipif)) 2123 ill->ill_src_ipif = next_ipif; 2124 else 2125 ill->ill_src_ipif = NULL; 2126 mutex_exit(&ipif->ipif_ill->ill_lock); 2127 } 2128 2129 /* 2130 * Only one ill to consider if dst_restrict_ill is set. 2131 */ 2132 if (dstinfo.dst_restrict_ill) 2133 break; 2134 } 2135 2136 ipif = best_c.cand_ipif; 2137 ip1dbg(("ipif_select_source_v6(%s, %s) -> %s\n", 2138 dstinfo.dst_ill->ill_name, 2139 inet_ntop(AF_INET6, dstinfo.dst_addr, dstr, sizeof (dstr)), 2140 (ipif == NULL ? "NULL" : 2141 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, sstr, sizeof (sstr))))); 2142 2143 if (usesrc_ill != NULL) 2144 ill_refrele(usesrc_ill); 2145 2146 if (ipmp_ill != NULL) 2147 ill_refrele(ipmp_ill); 2148 2149 if (dst_rhtp != NULL) 2150 TPC_RELE(dst_rhtp); 2151 2152 if (ipif == NULL) { 2153 rw_exit(&ipst->ips_ill_g_lock); 2154 return (NULL); 2155 } 2156 2157 mutex_enter(&ipif->ipif_ill->ill_lock); 2158 if (!IPIF_IS_CONDEMNED(ipif)) { 2159 ipif_refhold_locked(ipif); 2160 mutex_exit(&ipif->ipif_ill->ill_lock); 2161 rw_exit(&ipst->ips_ill_g_lock); 2162 return (ipif); 2163 } 2164 mutex_exit(&ipif->ipif_ill->ill_lock); 2165 rw_exit(&ipst->ips_ill_g_lock); 2166 ip1dbg(("ipif_select_source_v6 cannot lookup ipif %p" 2167 " returning null \n", (void *)ipif)); 2168 2169 return (NULL); 2170 } 2171 2172 /* 2173 * Pick a source address based on the destination ill and an optional setsrc 2174 * address. 2175 * The result is stored in srcp. If generation is set, then put the source 2176 * generation number there before we look for the source address (to avoid 2177 * missing changes in the set of source addresses. 2178 * If flagsp is set, then us it to pass back ipif_flags. 2179 * 2180 * If the caller wants to cache the returned source address and detect when 2181 * that might be stale, the caller should pass in a generation argument, 2182 * which the caller can later compare against ips_src_generation 2183 * 2184 * The precedence order for selecting an IPv6 source address is: 2185 * - RTF_SETSRC on the first ire in the recursive lookup always wins. 2186 * - If usrsrc is set, swap the ill to be the usesrc one. 2187 * - If IPMP is used on the ill, select a random address from the most 2188 * preferred ones below: 2189 * That is followed by the long list of IPv6 source address selection rules 2190 * starting with rule_isdst(), rule_scope(), etc. 2191 * 2192 * We have lower preference for ALL_ZONES IP addresses, 2193 * as they pose problems with unlabeled destinations. 2194 * 2195 * Note that when multiple IP addresses match e.g., with rule_scope() we pick 2196 * the first one if IPMP is not in use. With IPMP we randomize. 2197 */ 2198 int 2199 ip_select_source_v6(ill_t *ill, const in6_addr_t *setsrc, const in6_addr_t *dst, 2200 zoneid_t zoneid, ip_stack_t *ipst, uint_t restrict_ill, uint32_t src_prefs, 2201 in6_addr_t *srcp, uint32_t *generation, uint64_t *flagsp) 2202 { 2203 ipif_t *ipif; 2204 boolean_t notready = B_FALSE; /* Set if !ipif_addr_ready found */ 2205 2206 if (flagsp != NULL) 2207 *flagsp = 0; 2208 2209 /* 2210 * Need to grab the generation number before we check to 2211 * avoid a race with a change to the set of local addresses. 2212 * No lock needed since the thread which updates the set of local 2213 * addresses use ipif/ill locks and exit those (hence a store memory 2214 * barrier) before doing the atomic increase of ips_src_generation. 2215 */ 2216 if (generation != NULL) { 2217 *generation = ipst->ips_src_generation; 2218 } 2219 2220 /* Was RTF_SETSRC set on the first IRE in the recursive lookup? */ 2221 if (setsrc != NULL && !IN6_IS_ADDR_UNSPECIFIED(setsrc)) { 2222 *srcp = *setsrc; 2223 return (0); 2224 } 2225 2226 ipif = ipif_select_source_v6(ill, dst, restrict_ill, src_prefs, zoneid, 2227 B_TRUE, ¬ready); 2228 if (ipif == NULL) { 2229 if (notready) 2230 return (ENETDOWN); 2231 else 2232 return (EADDRNOTAVAIL); 2233 } 2234 *srcp = ipif->ipif_v6lcl_addr; 2235 if (flagsp != NULL) 2236 *flagsp = ipif->ipif_flags; 2237 ipif_refrele(ipif); 2238 return (0); 2239 } 2240 2241 /* 2242 * Perform an attach and bind to get phys addr plus info_req for 2243 * the physical device. 2244 * q and mp represents an ioctl which will be queued waiting for 2245 * completion of the DLPI message exchange. 2246 * MUST be called on an ill queue. 2247 * 2248 * Returns EINPROGRESS when mp has been consumed by queueing it. 2249 * The ioctl will complete in ip_rput. 2250 */ 2251 int 2252 ill_dl_phys(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q) 2253 { 2254 mblk_t *v6token_mp = NULL; 2255 mblk_t *v6lla_mp = NULL; 2256 mblk_t *dest_mp = NULL; 2257 mblk_t *phys_mp = NULL; 2258 mblk_t *info_mp = NULL; 2259 mblk_t *attach_mp = NULL; 2260 mblk_t *bind_mp = NULL; 2261 mblk_t *unbind_mp = NULL; 2262 mblk_t *notify_mp = NULL; 2263 mblk_t *capab_mp = NULL; 2264 2265 ip1dbg(("ill_dl_phys(%s:%u)\n", ill->ill_name, ipif->ipif_id)); 2266 ASSERT(ill->ill_dlpi_style_set); 2267 ASSERT(WR(q)->q_next != NULL); 2268 2269 if (ill->ill_isv6) { 2270 v6token_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2271 sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2272 if (v6token_mp == NULL) 2273 goto bad; 2274 ((dl_phys_addr_req_t *)v6token_mp->b_rptr)->dl_addr_type = 2275 DL_IPV6_TOKEN; 2276 2277 v6lla_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2278 sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2279 if (v6lla_mp == NULL) 2280 goto bad; 2281 ((dl_phys_addr_req_t *)v6lla_mp->b_rptr)->dl_addr_type = 2282 DL_IPV6_LINK_LAYER_ADDR; 2283 } 2284 2285 if (ill->ill_mactype == DL_IPV4 || ill->ill_mactype == DL_IPV6) { 2286 dest_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2287 sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2288 if (dest_mp == NULL) 2289 goto bad; 2290 ((dl_phys_addr_req_t *)dest_mp->b_rptr)->dl_addr_type = 2291 DL_CURR_DEST_ADDR; 2292 } 2293 2294 /* 2295 * Allocate a DL_NOTIFY_REQ and set the notifications we want. 2296 */ 2297 notify_mp = ip_dlpi_alloc(sizeof (dl_notify_req_t) + sizeof (long), 2298 DL_NOTIFY_REQ); 2299 if (notify_mp == NULL) 2300 goto bad; 2301 ((dl_notify_req_t *)notify_mp->b_rptr)->dl_notifications = 2302 (DL_NOTE_PHYS_ADDR | DL_NOTE_SDU_SIZE | DL_NOTE_FASTPATH_FLUSH | 2303 DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN | DL_NOTE_CAPAB_RENEG | 2304 DL_NOTE_PROMISC_ON_PHYS | DL_NOTE_PROMISC_OFF_PHYS | 2305 DL_NOTE_REPLUMB); 2306 2307 phys_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2308 sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2309 if (phys_mp == NULL) 2310 goto bad; 2311 ((dl_phys_addr_req_t *)phys_mp->b_rptr)->dl_addr_type = 2312 DL_CURR_PHYS_ADDR; 2313 2314 info_mp = ip_dlpi_alloc( 2315 sizeof (dl_info_req_t) + sizeof (dl_info_ack_t), 2316 DL_INFO_REQ); 2317 if (info_mp == NULL) 2318 goto bad; 2319 2320 ASSERT(ill->ill_dlpi_capab_state == IDCS_UNKNOWN); 2321 capab_mp = ip_dlpi_alloc(sizeof (dl_capability_req_t), 2322 DL_CAPABILITY_REQ); 2323 if (capab_mp == NULL) 2324 goto bad; 2325 2326 bind_mp = ip_dlpi_alloc(sizeof (dl_bind_req_t) + sizeof (long), 2327 DL_BIND_REQ); 2328 if (bind_mp == NULL) 2329 goto bad; 2330 ((dl_bind_req_t *)bind_mp->b_rptr)->dl_sap = ill->ill_sap; 2331 ((dl_bind_req_t *)bind_mp->b_rptr)->dl_service_mode = DL_CLDLS; 2332 2333 unbind_mp = ip_dlpi_alloc(sizeof (dl_unbind_req_t), DL_UNBIND_REQ); 2334 if (unbind_mp == NULL) 2335 goto bad; 2336 2337 /* If we need to attach, pre-alloc and initialize the mblk */ 2338 if (ill->ill_needs_attach) { 2339 attach_mp = ip_dlpi_alloc(sizeof (dl_attach_req_t), 2340 DL_ATTACH_REQ); 2341 if (attach_mp == NULL) 2342 goto bad; 2343 ((dl_attach_req_t *)attach_mp->b_rptr)->dl_ppa = ill->ill_ppa; 2344 } 2345 2346 /* 2347 * Here we are going to delay the ioctl ack until after 2348 * ACKs from DL_PHYS_ADDR_REQ. So need to save the 2349 * original ioctl message before sending the requests 2350 */ 2351 mutex_enter(&ill->ill_lock); 2352 /* ipsq_pending_mp_add won't fail since we pass in a NULL connp */ 2353 (void) ipsq_pending_mp_add(NULL, ipif, ill->ill_wq, mp, 0); 2354 /* 2355 * Set ill_phys_addr_pend to zero. It will be set to the addr_type of 2356 * the DL_PHYS_ADDR_REQ in ill_dlpi_send() and ill_dlpi_done(). It will 2357 * be used to track which DL_PHYS_ADDR_REQ is being ACK'd/NAK'd. 2358 */ 2359 ill->ill_phys_addr_pend = 0; 2360 mutex_exit(&ill->ill_lock); 2361 2362 if (attach_mp != NULL) { 2363 ip1dbg(("ill_dl_phys: attach\n")); 2364 ill_dlpi_send(ill, attach_mp); 2365 } 2366 ill_dlpi_send(ill, bind_mp); 2367 ill_dlpi_send(ill, info_mp); 2368 2369 /* 2370 * Send the capability request to get the VRRP capability information. 2371 */ 2372 ill_capability_send(ill, capab_mp); 2373 2374 if (v6token_mp != NULL) 2375 ill_dlpi_send(ill, v6token_mp); 2376 if (v6lla_mp != NULL) 2377 ill_dlpi_send(ill, v6lla_mp); 2378 if (dest_mp != NULL) 2379 ill_dlpi_send(ill, dest_mp); 2380 ill_dlpi_send(ill, phys_mp); 2381 ill_dlpi_send(ill, notify_mp); 2382 ill_dlpi_send(ill, unbind_mp); 2383 2384 /* 2385 * This operation will complete in ip_rput_dlpi_writer with either 2386 * a DL_PHYS_ADDR_ACK or DL_ERROR_ACK. 2387 */ 2388 return (EINPROGRESS); 2389 bad: 2390 freemsg(v6token_mp); 2391 freemsg(v6lla_mp); 2392 freemsg(dest_mp); 2393 freemsg(phys_mp); 2394 freemsg(info_mp); 2395 freemsg(attach_mp); 2396 freemsg(bind_mp); 2397 freemsg(capab_mp); 2398 freemsg(unbind_mp); 2399 freemsg(notify_mp); 2400 return (ENOMEM); 2401 } 2402 2403 /* Add room for tcp+ip headers */ 2404 uint_t ip_loopback_mtu_v6plus = IP_LOOPBACK_MTU + IPV6_HDR_LEN + 20; 2405 2406 /* 2407 * DLPI is up. 2408 * Create all the IREs associated with an interface bring up multicast. 2409 * Set the interface flag and finish other initialization 2410 * that potentially had to be differed to after DL_BIND_ACK. 2411 */ 2412 int 2413 ipif_up_done_v6(ipif_t *ipif) 2414 { 2415 ill_t *ill = ipif->ipif_ill; 2416 int err; 2417 boolean_t loopback = B_FALSE; 2418 2419 ip1dbg(("ipif_up_done_v6(%s:%u)\n", 2420 ipif->ipif_ill->ill_name, ipif->ipif_id)); 2421 DTRACE_PROBE3(ipif__downup, char *, "ipif_up_done_v6", 2422 ill_t *, ill, ipif_t *, ipif); 2423 2424 /* Check if this is a loopback interface */ 2425 if (ipif->ipif_ill->ill_wq == NULL) 2426 loopback = B_TRUE; 2427 2428 ASSERT(ipif->ipif_isv6); 2429 ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock)); 2430 2431 if (IS_LOOPBACK(ill) || ill->ill_net_type == IRE_IF_NORESOLVER) { 2432 nce_t *loop_nce = NULL; 2433 uint16_t flags = (NCE_F_MYADDR | NCE_F_NONUD | NCE_F_AUTHORITY); 2434 2435 /* 2436 * lo0:1 and subsequent ipifs were marked IRE_LOCAL in 2437 * ipif_lookup_on_name(), but in the case of zones we can have 2438 * several loopback addresses on lo0. So all the interfaces with 2439 * loopback addresses need to be marked IRE_LOOPBACK. 2440 */ 2441 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, &ipv6_loopback)) 2442 ipif->ipif_ire_type = IRE_LOOPBACK; 2443 else 2444 ipif->ipif_ire_type = IRE_LOCAL; 2445 if (ill->ill_net_type != IRE_LOOPBACK) 2446 flags |= NCE_F_PUBLISH; 2447 err = nce_lookup_then_add_v6(ill, NULL, 2448 ill->ill_phys_addr_length, 2449 &ipif->ipif_v6lcl_addr, flags, ND_REACHABLE, &loop_nce); 2450 2451 /* A shared-IP zone sees EEXIST for lo0:N */ 2452 if (err == 0 || err == EEXIST) { 2453 ipif->ipif_added_nce = 1; 2454 loop_nce->nce_ipif_cnt++; 2455 nce_refrele(loop_nce); 2456 err = 0; 2457 } else { 2458 ASSERT(loop_nce == NULL); 2459 return (err); 2460 } 2461 } 2462 2463 err = ipif_add_ires_v6(ipif, loopback); 2464 if (err != 0) { 2465 /* 2466 * See comments about return value from 2467 * ipif_addr_availability_check() in ipif_add_ires_v6(). 2468 */ 2469 if (err != EADDRINUSE) { 2470 ipif_ndp_down(ipif); 2471 } else { 2472 /* 2473 * Make IPMP aware of the deleted ipif so that 2474 * the needed ipmp cleanup (e.g., of ipif_bound_ill) 2475 * can be completed. Note that we do not want to 2476 * destroy the nce that was created on the ipmp_ill 2477 * for the active copy of the duplicate address in 2478 * use. 2479 */ 2480 if (IS_IPMP(ill)) 2481 ipmp_illgrp_del_ipif(ill->ill_grp, ipif); 2482 err = EADDRNOTAVAIL; 2483 } 2484 return (err); 2485 } 2486 2487 if (ill->ill_ipif_up_count == 1 && !loopback) { 2488 /* Recover any additional IREs entries for this ill */ 2489 (void) ill_recover_saved_ire(ill); 2490 } 2491 2492 if (ill->ill_need_recover_multicast) { 2493 /* 2494 * Need to recover all multicast memberships in the driver. 2495 * This had to be deferred until we had attached. 2496 */ 2497 ill_recover_multicast(ill); 2498 } 2499 2500 if (ill->ill_ipif_up_count == 1) { 2501 /* 2502 * Since the interface is now up, it may now be active. 2503 */ 2504 if (IS_UNDER_IPMP(ill)) 2505 ipmp_ill_refresh_active(ill); 2506 } 2507 2508 /* Join the allhosts multicast address and the solicited node MC */ 2509 ipif_multicast_up(ipif); 2510 2511 /* Perhaps ilgs should use this ill */ 2512 update_conn_ill(NULL, ill->ill_ipst); 2513 2514 if (ipif->ipif_addr_ready) 2515 ipif_up_notify(ipif); 2516 2517 return (0); 2518 } 2519 2520 /* 2521 * Add the IREs associated with the ipif. 2522 * Those MUST be explicitly removed in ipif_delete_ires_v6. 2523 */ 2524 static int 2525 ipif_add_ires_v6(ipif_t *ipif, boolean_t loopback) 2526 { 2527 ill_t *ill = ipif->ipif_ill; 2528 ip_stack_t *ipst = ill->ill_ipst; 2529 in6_addr_t v6addr; 2530 in6_addr_t route_mask; 2531 int err; 2532 char buf[INET6_ADDRSTRLEN]; 2533 ire_t *ire_local = NULL; /* LOCAL or LOOPBACK */ 2534 ire_t *ire_if = NULL; 2535 in6_addr_t *gw; 2536 2537 if (!IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr) && 2538 !(ipif->ipif_flags & IPIF_NOLOCAL)) { 2539 2540 /* 2541 * If we're on a labeled system then make sure that zone- 2542 * private addresses have proper remote host database entries. 2543 */ 2544 if (is_system_labeled() && 2545 ipif->ipif_ire_type != IRE_LOOPBACK) { 2546 if (ip6opt_ls == 0) { 2547 cmn_err(CE_WARN, "IPv6 not enabled " 2548 "via /etc/system"); 2549 return (EINVAL); 2550 } 2551 if (!tsol_check_interface_address(ipif)) 2552 return (EINVAL); 2553 } 2554 2555 if (loopback) 2556 gw = &ipif->ipif_v6lcl_addr; 2557 else 2558 gw = NULL; 2559 2560 /* Register the source address for __sin6_src_id */ 2561 err = ip_srcid_insert(&ipif->ipif_v6lcl_addr, 2562 ipif->ipif_zoneid, ipst); 2563 if (err != 0) { 2564 ip0dbg(("ipif_add_ires_v6: srcid_insert %d\n", err)); 2565 return (err); 2566 } 2567 /* 2568 * If the interface address is set, create the LOCAL 2569 * or LOOPBACK IRE. 2570 */ 2571 ip1dbg(("ipif_add_ires_v6: creating IRE %d for %s\n", 2572 ipif->ipif_ire_type, 2573 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 2574 buf, sizeof (buf)))); 2575 2576 ire_local = ire_create_v6( 2577 &ipif->ipif_v6lcl_addr, /* dest address */ 2578 &ipv6_all_ones, /* mask */ 2579 gw, /* gateway */ 2580 ipif->ipif_ire_type, /* LOCAL or LOOPBACK */ 2581 ipif->ipif_ill, /* interface */ 2582 ipif->ipif_zoneid, 2583 ((ipif->ipif_flags & IPIF_PRIVATE) ? 2584 RTF_PRIVATE : 0) | RTF_KERNEL, 2585 NULL, 2586 ipst); 2587 if (ire_local == NULL) { 2588 ip1dbg(("ipif_up_done_v6: NULL ire_local\n")); 2589 err = ENOMEM; 2590 goto bad; 2591 } 2592 } 2593 2594 /* Set up the IRE_IF_RESOLVER or IRE_IF_NORESOLVER, as appropriate. */ 2595 if (!loopback && !(ipif->ipif_flags & IPIF_NOXMIT) && 2596 !(IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6subnet) && 2597 IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6net_mask))) { 2598 /* ipif_v6subnet is ipif_v6pp_dst_addr for pt-pt */ 2599 v6addr = ipif->ipif_v6subnet; 2600 2601 if (ipif->ipif_flags & IPIF_POINTOPOINT) { 2602 route_mask = ipv6_all_ones; 2603 } else { 2604 route_mask = ipif->ipif_v6net_mask; 2605 } 2606 2607 ip1dbg(("ipif_add_ires_v6: creating if IRE %d for %s\n", 2608 ill->ill_net_type, 2609 inet_ntop(AF_INET6, &v6addr, buf, sizeof (buf)))); 2610 2611 ire_if = ire_create_v6( 2612 &v6addr, /* dest pref */ 2613 &route_mask, /* mask */ 2614 &ipif->ipif_v6lcl_addr, /* gateway */ 2615 ill->ill_net_type, /* IF_[NO]RESOLVER */ 2616 ipif->ipif_ill, 2617 ipif->ipif_zoneid, 2618 ((ipif->ipif_flags & IPIF_PRIVATE) ? 2619 RTF_PRIVATE : 0) | RTF_KERNEL, 2620 NULL, 2621 ipst); 2622 if (ire_if == NULL) { 2623 ip1dbg(("ipif_up_done: NULL ire_if\n")); 2624 err = ENOMEM; 2625 goto bad; 2626 } 2627 } 2628 2629 /* 2630 * Need to atomically check for IP address availability under 2631 * ip_addr_avail_lock. ill_g_lock is held as reader to ensure no new 2632 * ills or new ipifs can be added while we are checking availability. 2633 */ 2634 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2635 mutex_enter(&ipst->ips_ip_addr_avail_lock); 2636 ill->ill_ipif_up_count++; 2637 ipif->ipif_flags |= IPIF_UP; 2638 err = ip_addr_availability_check(ipif); 2639 mutex_exit(&ipst->ips_ip_addr_avail_lock); 2640 rw_exit(&ipst->ips_ill_g_lock); 2641 2642 if (err != 0) { 2643 /* 2644 * Our address may already be up on the same ill. In this case, 2645 * the external resolver entry for our ipif replaced the one for 2646 * the other ipif. So we don't want to delete it (otherwise the 2647 * other ipif would be unable to send packets). 2648 * ip_addr_availability_check() identifies this case for us and 2649 * returns EADDRINUSE; Caller must turn it into EADDRNOTAVAIL 2650 * which is the expected error code. 2651 * 2652 * Note that ipif_ndp_down() will only delete the nce in the 2653 * case when the nce_ipif_cnt drops to 0. 2654 */ 2655 ill->ill_ipif_up_count--; 2656 ipif->ipif_flags &= ~IPIF_UP; 2657 goto bad; 2658 } 2659 2660 /* 2661 * Add in all newly created IREs. 2662 * We add the IRE_INTERFACE before the IRE_LOCAL to ensure 2663 * that lookups find the IRE_LOCAL even if the IRE_INTERFACE is 2664 * a /128 route. 2665 */ 2666 if (ire_if != NULL) { 2667 ire_if = ire_add(ire_if); 2668 if (ire_if == NULL) { 2669 err = ENOMEM; 2670 goto bad2; 2671 } 2672 #ifdef DEBUG 2673 ire_refhold_notr(ire_if); 2674 ire_refrele(ire_if); 2675 #endif 2676 } 2677 if (ire_local != NULL) { 2678 ire_local = ire_add(ire_local); 2679 if (ire_local == NULL) { 2680 err = ENOMEM; 2681 goto bad2; 2682 } 2683 #ifdef DEBUG 2684 ire_refhold_notr(ire_local); 2685 ire_refrele(ire_local); 2686 #endif 2687 } 2688 rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); 2689 if (ire_local != NULL) 2690 ipif->ipif_ire_local = ire_local; 2691 if (ire_if != NULL) 2692 ipif->ipif_ire_if = ire_if; 2693 rw_exit(&ipst->ips_ill_g_lock); 2694 ire_local = NULL; 2695 ire_if = NULL; 2696 2697 if (ipif->ipif_addr_ready) 2698 ipif_up_notify(ipif); 2699 return (0); 2700 2701 bad2: 2702 ill->ill_ipif_up_count--; 2703 ipif->ipif_flags &= ~IPIF_UP; 2704 2705 bad: 2706 if (ire_local != NULL) 2707 ire_delete(ire_local); 2708 if (ire_if != NULL) 2709 ire_delete(ire_if); 2710 2711 rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); 2712 ire_local = ipif->ipif_ire_local; 2713 ipif->ipif_ire_local = NULL; 2714 ire_if = ipif->ipif_ire_if; 2715 ipif->ipif_ire_if = NULL; 2716 rw_exit(&ipst->ips_ill_g_lock); 2717 if (ire_local != NULL) { 2718 ire_delete(ire_local); 2719 ire_refrele_notr(ire_local); 2720 } 2721 if (ire_if != NULL) { 2722 ire_delete(ire_if); 2723 ire_refrele_notr(ire_if); 2724 } 2725 (void) ip_srcid_remove(&ipif->ipif_v6lcl_addr, ipif->ipif_zoneid, ipst); 2726 2727 return (err); 2728 } 2729 2730 /* Remove all the IREs created by ipif_add_ires_v6 */ 2731 void 2732 ipif_delete_ires_v6(ipif_t *ipif) 2733 { 2734 ill_t *ill = ipif->ipif_ill; 2735 ip_stack_t *ipst = ill->ill_ipst; 2736 ire_t *ire; 2737 2738 rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); 2739 ire = ipif->ipif_ire_local; 2740 ipif->ipif_ire_local = NULL; 2741 rw_exit(&ipst->ips_ill_g_lock); 2742 if (ire != NULL) { 2743 /* 2744 * Move count to ipif so we don't loose the count due to 2745 * a down/up dance. 2746 */ 2747 atomic_add_32(&ipif->ipif_ib_pkt_count, ire->ire_ib_pkt_count); 2748 2749 ire_delete(ire); 2750 ire_refrele_notr(ire); 2751 } 2752 rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); 2753 ire = ipif->ipif_ire_if; 2754 ipif->ipif_ire_if = NULL; 2755 rw_exit(&ipst->ips_ill_g_lock); 2756 if (ire != NULL) { 2757 ire_delete(ire); 2758 ire_refrele_notr(ire); 2759 } 2760 } 2761 2762 /* 2763 * Delete an ND entry if it exists. 2764 */ 2765 /* ARGSUSED */ 2766 int 2767 ip_siocdelndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2768 ip_ioctl_cmd_t *ipip, void *dummy_ifreq) 2769 { 2770 sin6_t *sin6; 2771 struct lifreq *lifr; 2772 lif_nd_req_t *lnr; 2773 ill_t *ill = ipif->ipif_ill; 2774 nce_t *nce; 2775 2776 lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr; 2777 lnr = &lifr->lifr_nd; 2778 /* Only allow for logical unit zero i.e. not on "le0:17" */ 2779 if (ipif->ipif_id != 0) 2780 return (EINVAL); 2781 2782 if (!ipif->ipif_isv6) 2783 return (EINVAL); 2784 2785 if (lnr->lnr_addr.ss_family != AF_INET6) 2786 return (EAFNOSUPPORT); 2787 2788 sin6 = (sin6_t *)&lnr->lnr_addr; 2789 2790 /* 2791 * Since ND mappings must be consistent across an IPMP group, prohibit 2792 * deleting ND mappings on underlying interfaces. 2793 * Don't allow deletion of mappings for local addresses. 2794 */ 2795 if (IS_UNDER_IPMP(ill)) 2796 return (EPERM); 2797 2798 nce = nce_lookup_v6(ill, &sin6->sin6_addr); 2799 if (nce == NULL) 2800 return (ESRCH); 2801 2802 if (NCE_MYADDR(nce->nce_common)) { 2803 nce_refrele(nce); 2804 return (EPERM); 2805 } 2806 2807 /* 2808 * delete the nce_common which will also delete the nces on any 2809 * under_ill in the case of ipmp. 2810 */ 2811 ncec_delete(nce->nce_common); 2812 nce_refrele(nce); 2813 return (0); 2814 } 2815 2816 /* 2817 * Return nbr cache info. 2818 */ 2819 /* ARGSUSED */ 2820 int 2821 ip_siocqueryndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2822 ip_ioctl_cmd_t *ipip, void *dummy_ifreq) 2823 { 2824 ill_t *ill = ipif->ipif_ill; 2825 struct lifreq *lifr; 2826 lif_nd_req_t *lnr; 2827 2828 lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr; 2829 lnr = &lifr->lifr_nd; 2830 /* Only allow for logical unit zero i.e. not on "le0:17" */ 2831 if (ipif->ipif_id != 0) 2832 return (EINVAL); 2833 2834 if (!ipif->ipif_isv6) 2835 return (EINVAL); 2836 2837 if (lnr->lnr_addr.ss_family != AF_INET6) 2838 return (EAFNOSUPPORT); 2839 2840 if (ill->ill_phys_addr_length > sizeof (lnr->lnr_hdw_addr)) 2841 return (EINVAL); 2842 2843 return (ndp_query(ill, lnr)); 2844 } 2845 2846 /* 2847 * Perform an update of the nd entry for the specified address. 2848 */ 2849 /* ARGSUSED */ 2850 int 2851 ip_siocsetndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2852 ip_ioctl_cmd_t *ipip, void *dummy_ifreq) 2853 { 2854 sin6_t *sin6; 2855 ill_t *ill = ipif->ipif_ill; 2856 struct lifreq *lifr; 2857 lif_nd_req_t *lnr; 2858 ire_t *ire; 2859 2860 lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr; 2861 lnr = &lifr->lifr_nd; 2862 /* Only allow for logical unit zero i.e. not on "le0:17" */ 2863 if (ipif->ipif_id != 0) 2864 return (EINVAL); 2865 2866 if (!ipif->ipif_isv6) 2867 return (EINVAL); 2868 2869 if (lnr->lnr_addr.ss_family != AF_INET6) 2870 return (EAFNOSUPPORT); 2871 2872 sin6 = (sin6_t *)&lnr->lnr_addr; 2873 2874 /* 2875 * Since ND mappings must be consistent across an IPMP group, prohibit 2876 * updating ND mappings on underlying interfaces. Also, since ND 2877 * mappings for IPMP data addresses are owned by IP itself, prohibit 2878 * updating them. 2879 */ 2880 if (IS_UNDER_IPMP(ill)) 2881 return (EPERM); 2882 2883 if (IS_IPMP(ill)) { 2884 ire = ire_ftable_lookup_v6(&sin6->sin6_addr, NULL, NULL, 2885 IRE_LOCAL, ill, ALL_ZONES, NULL, 2886 MATCH_IRE_TYPE | MATCH_IRE_ILL, 0, ill->ill_ipst, NULL); 2887 if (ire != NULL) { 2888 ire_refrele(ire); 2889 return (EPERM); 2890 } 2891 } 2892 2893 return (ndp_sioc_update(ill, lnr)); 2894 } 2895