1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 /* 30 * This file contains the interface control functions for IPv6. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/sysmacros.h> 35 #include <sys/stream.h> 36 #include <sys/dlpi.h> 37 #include <sys/stropts.h> 38 #include <sys/ddi.h> 39 #include <sys/cmn_err.h> 40 #include <sys/kstat.h> 41 #include <sys/debug.h> 42 #include <sys/zone.h> 43 #include <sys/policy.h> 44 45 #include <sys/systm.h> 46 #include <sys/param.h> 47 #include <sys/socket.h> 48 #include <sys/isa_defs.h> 49 #include <net/if.h> 50 #include <net/if_dl.h> 51 #include <net/route.h> 52 #include <netinet/in.h> 53 #include <netinet/igmp_var.h> 54 #include <netinet/ip6.h> 55 #include <netinet/icmp6.h> 56 57 #include <inet/common.h> 58 #include <inet/nd.h> 59 #include <inet/mib2.h> 60 #include <inet/ip.h> 61 #include <inet/ip6.h> 62 #include <inet/ip_multi.h> 63 #include <inet/ip_ire.h> 64 #include <inet/ip_rts.h> 65 #include <inet/ip_ndp.h> 66 #include <inet/ip_if.h> 67 #include <inet/ip6_asp.h> 68 #include <inet/ipclassifier.h> 69 #include <inet/sctp_ip.h> 70 71 #include <sys/tsol/tndb.h> 72 #include <sys/tsol/tnet.h> 73 74 static in6_addr_t ipv6_ll_template = 75 {(uint32_t)V6_LINKLOCAL, 0x0, 0x0, 0x0}; 76 77 static ipif_t * 78 ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst, 79 ip_stack_t *ipst); 80 81 static int ipif_add_ires_v6(ipif_t *, boolean_t); 82 83 /* 84 * This function is called when an application does not specify an interface 85 * to be used for multicast traffic. It calls ire_lookup_multi_v6() to look 86 * for an interface route for the specified multicast group. Doing 87 * this allows the administrator to add prefix routes for multicast to 88 * indicate which interface to be used for multicast traffic in the above 89 * scenario. The route could be for all multicast (ff00::/8), for a single 90 * multicast group (a /128 route) or anything in between. If there is no 91 * such multicast route, we just find any multicast capable interface and 92 * return it. 93 * 94 * We support MULTIRT and RTF_SETSRC on the multicast routes added to the 95 * unicast table. This is used by CGTP. 96 */ 97 ill_t * 98 ill_lookup_group_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst, 99 boolean_t *multirtp, in6_addr_t *setsrcp) 100 { 101 ill_t *ill; 102 103 ill = ire_lookup_multi_ill_v6(group, zoneid, ipst, multirtp, setsrcp); 104 if (ill != NULL) 105 return (ill); 106 107 return (ill_lookup_multicast(ipst, zoneid, B_TRUE)); 108 } 109 110 /* 111 * Look for an ipif with the specified interface address and destination. 112 * The destination address is used only for matching point-to-point interfaces. 113 */ 114 static ipif_t * 115 ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst, 116 ip_stack_t *ipst) 117 { 118 ipif_t *ipif; 119 ill_t *ill; 120 ill_walk_context_t ctx; 121 122 /* 123 * First match all the point-to-point interfaces 124 * before looking at non-point-to-point interfaces. 125 * This is done to avoid returning non-point-to-point 126 * ipif instead of unnumbered point-to-point ipif. 127 */ 128 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 129 ill = ILL_START_WALK_V6(&ctx, ipst); 130 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 131 mutex_enter(&ill->ill_lock); 132 for (ipif = ill->ill_ipif; ipif != NULL; 133 ipif = ipif->ipif_next) { 134 /* Allow the ipif to be down */ 135 if ((ipif->ipif_flags & IPIF_POINTOPOINT) && 136 (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 137 if_addr)) && 138 (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr, 139 dst))) { 140 if (!IPIF_IS_CONDEMNED(ipif)) { 141 ipif_refhold_locked(ipif); 142 mutex_exit(&ill->ill_lock); 143 rw_exit(&ipst->ips_ill_g_lock); 144 return (ipif); 145 } 146 } 147 } 148 mutex_exit(&ill->ill_lock); 149 } 150 rw_exit(&ipst->ips_ill_g_lock); 151 /* lookup the ipif based on interface address */ 152 ipif = ipif_lookup_addr_v6(if_addr, NULL, ALL_ZONES, ipst); 153 ASSERT(ipif == NULL || ipif->ipif_isv6); 154 return (ipif); 155 } 156 157 /* 158 * Common function for ipif_lookup_addr_v6() and ipif_lookup_addr_exact_v6(). 159 */ 160 static ipif_t * 161 ipif_lookup_addr_common_v6(const in6_addr_t *addr, ill_t *match_ill, 162 uint32_t match_flags, zoneid_t zoneid, ip_stack_t *ipst) 163 { 164 ipif_t *ipif; 165 ill_t *ill; 166 boolean_t ptp = B_FALSE; 167 ill_walk_context_t ctx; 168 boolean_t match_illgrp = (match_flags & IPIF_MATCH_ILLGRP); 169 boolean_t no_duplicate = (match_flags & IPIF_MATCH_NONDUP); 170 171 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 172 /* 173 * Repeat twice, first based on local addresses and 174 * next time for pointopoint. 175 */ 176 repeat: 177 ill = ILL_START_WALK_V6(&ctx, ipst); 178 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 179 if (match_ill != NULL && ill != match_ill && 180 (!match_illgrp || !IS_IN_SAME_ILLGRP(ill, match_ill))) { 181 continue; 182 } 183 mutex_enter(&ill->ill_lock); 184 for (ipif = ill->ill_ipif; ipif != NULL; 185 ipif = ipif->ipif_next) { 186 if (zoneid != ALL_ZONES && 187 ipif->ipif_zoneid != zoneid && 188 ipif->ipif_zoneid != ALL_ZONES) 189 continue; 190 191 if (no_duplicate && 192 !(ipif->ipif_flags & IPIF_UP)) { 193 continue; 194 } 195 196 /* Allow the ipif to be down */ 197 if ((!ptp && (IN6_ARE_ADDR_EQUAL( 198 &ipif->ipif_v6lcl_addr, addr) && 199 (ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) || 200 (ptp && (ipif->ipif_flags & IPIF_POINTOPOINT) && 201 IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr, 202 addr))) { 203 if (!IPIF_IS_CONDEMNED(ipif)) { 204 ipif_refhold_locked(ipif); 205 mutex_exit(&ill->ill_lock); 206 rw_exit(&ipst->ips_ill_g_lock); 207 return (ipif); 208 } 209 } 210 } 211 mutex_exit(&ill->ill_lock); 212 } 213 214 /* If we already did the ptp case, then we are done */ 215 if (ptp) { 216 rw_exit(&ipst->ips_ill_g_lock); 217 return (NULL); 218 } 219 ptp = B_TRUE; 220 goto repeat; 221 } 222 223 /* 224 * Lookup an ipif with the specified address. For point-to-point links we 225 * look for matches on either the destination address or the local address, 226 * but we skip the local address check if IPIF_UNNUMBERED is set. If the 227 * `match_ill' argument is non-NULL, the lookup is restricted to that ill 228 * (or illgrp if `match_ill' is in an IPMP group). 229 */ 230 ipif_t * 231 ipif_lookup_addr_v6(const in6_addr_t *addr, ill_t *match_ill, zoneid_t zoneid, 232 ip_stack_t *ipst) 233 { 234 return (ipif_lookup_addr_common_v6(addr, match_ill, IPIF_MATCH_ILLGRP, 235 zoneid, ipst)); 236 } 237 238 /* 239 * Lookup an ipif with the specified address. Similar to ipif_lookup_addr, 240 * except that we will only return an address if it is not marked as 241 * IPIF_DUPLICATE 242 */ 243 ipif_t * 244 ipif_lookup_addr_nondup_v6(const in6_addr_t *addr, ill_t *match_ill, 245 zoneid_t zoneid, ip_stack_t *ipst) 246 { 247 return (ipif_lookup_addr_common_v6(addr, match_ill, 248 (IPIF_MATCH_ILLGRP | IPIF_MATCH_NONDUP), zoneid, 249 ipst)); 250 } 251 252 /* 253 * Special abbreviated version of ipif_lookup_addr_v6() that doesn't match 254 * `match_ill' across the IPMP group. This function is only needed in some 255 * corner-cases; almost everything should use ipif_lookup_addr_v6(). 256 */ 257 ipif_t * 258 ipif_lookup_addr_exact_v6(const in6_addr_t *addr, ill_t *match_ill, 259 ip_stack_t *ipst) 260 { 261 ASSERT(match_ill != NULL); 262 return (ipif_lookup_addr_common_v6(addr, match_ill, 0, ALL_ZONES, 263 ipst)); 264 } 265 266 /* 267 * Look for an ipif with the specified address. For point-point links 268 * we look for matches on either the destination address and the local 269 * address, but we ignore the check on the local address if IPIF_UNNUMBERED 270 * is set. 271 * If the `match_ill' argument is non-NULL, the lookup is restricted to that 272 * ill (or illgrp if `match_ill' is in an IPMP group). 273 * Return the zoneid for the ipif. ALL_ZONES if none found. 274 */ 275 zoneid_t 276 ipif_lookup_addr_zoneid_v6(const in6_addr_t *addr, ill_t *match_ill, 277 ip_stack_t *ipst) 278 { 279 ipif_t *ipif; 280 ill_t *ill; 281 boolean_t ptp = B_FALSE; 282 ill_walk_context_t ctx; 283 zoneid_t zoneid; 284 285 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 286 /* 287 * Repeat twice, first based on local addresses and 288 * next time for pointopoint. 289 */ 290 repeat: 291 ill = ILL_START_WALK_V6(&ctx, ipst); 292 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 293 if (match_ill != NULL && ill != match_ill && 294 !IS_IN_SAME_ILLGRP(ill, match_ill)) { 295 continue; 296 } 297 mutex_enter(&ill->ill_lock); 298 for (ipif = ill->ill_ipif; ipif != NULL; 299 ipif = ipif->ipif_next) { 300 /* Allow the ipif to be down */ 301 if ((!ptp && (IN6_ARE_ADDR_EQUAL( 302 &ipif->ipif_v6lcl_addr, addr) && 303 (ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) || 304 (ptp && (ipif->ipif_flags & IPIF_POINTOPOINT) && 305 IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr, 306 addr)) && 307 !(ipif->ipif_state_flags & IPIF_CONDEMNED)) { 308 zoneid = ipif->ipif_zoneid; 309 mutex_exit(&ill->ill_lock); 310 rw_exit(&ipst->ips_ill_g_lock); 311 /* 312 * If ipif_zoneid was ALL_ZONES then we have 313 * a trusted extensions shared IP address. 314 * In that case GLOBAL_ZONEID works to send. 315 */ 316 if (zoneid == ALL_ZONES) 317 zoneid = GLOBAL_ZONEID; 318 return (zoneid); 319 } 320 } 321 mutex_exit(&ill->ill_lock); 322 } 323 324 /* If we already did the ptp case, then we are done */ 325 if (ptp) { 326 rw_exit(&ipst->ips_ill_g_lock); 327 return (ALL_ZONES); 328 } 329 ptp = B_TRUE; 330 goto repeat; 331 } 332 333 /* 334 * Perform various checks to verify that an address would make sense as a local 335 * interface address. This is currently only called when an attempt is made 336 * to set a local address. 337 * 338 * Does not allow a v4-mapped address, an address that equals the subnet 339 * anycast address, ... a multicast address, ... 340 */ 341 boolean_t 342 ip_local_addr_ok_v6(const in6_addr_t *addr, const in6_addr_t *subnet_mask) 343 { 344 in6_addr_t subnet; 345 346 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 347 return (B_TRUE); /* Allow all zeros */ 348 349 /* 350 * Don't allow all zeroes or host part, but allow 351 * all ones netmask. 352 */ 353 V6_MASK_COPY(*addr, *subnet_mask, subnet); 354 if (IN6_IS_ADDR_V4MAPPED(addr) || 355 (IN6_ARE_ADDR_EQUAL(addr, &subnet) && 356 !IN6_ARE_ADDR_EQUAL(subnet_mask, &ipv6_all_ones)) || 357 (IN6_IS_ADDR_V4COMPAT(addr) && CLASSD(V4_PART_OF_V6((*addr)))) || 358 IN6_IS_ADDR_MULTICAST(addr)) 359 return (B_FALSE); 360 361 return (B_TRUE); 362 } 363 364 /* 365 * Perform various checks to verify that an address would make sense as a 366 * remote/subnet interface address. 367 */ 368 boolean_t 369 ip_remote_addr_ok_v6(const in6_addr_t *addr, const in6_addr_t *subnet_mask) 370 { 371 in6_addr_t subnet; 372 373 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 374 return (B_TRUE); /* Allow all zeros */ 375 376 V6_MASK_COPY(*addr, *subnet_mask, subnet); 377 if (IN6_IS_ADDR_V4MAPPED(addr) || 378 (IN6_ARE_ADDR_EQUAL(addr, &subnet) && 379 !IN6_ARE_ADDR_EQUAL(subnet_mask, &ipv6_all_ones)) || 380 IN6_IS_ADDR_MULTICAST(addr) || 381 (IN6_IS_ADDR_V4COMPAT(addr) && CLASSD(V4_PART_OF_V6((*addr))))) 382 return (B_FALSE); 383 384 return (B_TRUE); 385 } 386 387 /* 388 * ip_rt_add_v6 is called to add an IPv6 route to the forwarding table. 389 * ill is passed in to associate it with the correct interface 390 * (for link-local destinations and gateways). 391 * If ire_arg is set, then we return the held IRE in that location. 392 */ 393 /* ARGSUSED1 */ 394 int 395 ip_rt_add_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, 396 const in6_addr_t *gw_addr, const in6_addr_t *src_addr, int flags, 397 ill_t *ill, ire_t **ire_arg, struct rtsa_s *sp, ip_stack_t *ipst, 398 zoneid_t zoneid) 399 { 400 ire_t *ire, *nire; 401 ire_t *gw_ire = NULL; 402 ipif_t *ipif; 403 uint_t type; 404 int match_flags = MATCH_IRE_TYPE; 405 tsol_gc_t *gc = NULL; 406 tsol_gcgrp_t *gcgrp = NULL; 407 boolean_t gcgrp_xtraref = B_FALSE; 408 409 if (ire_arg != NULL) 410 *ire_arg = NULL; 411 412 /* 413 * Prevent routes with a zero gateway from being created (since 414 * interfaces can currently be plumbed and brought up with no assigned 415 * address). 416 */ 417 if (IN6_IS_ADDR_UNSPECIFIED(gw_addr)) 418 return (ENETUNREACH); 419 420 /* 421 * If this is the case of RTF_HOST being set, then we set the netmask 422 * to all ones (regardless if one was supplied). 423 */ 424 if (flags & RTF_HOST) 425 mask = &ipv6_all_ones; 426 427 /* 428 * Get the ipif, if any, corresponding to the gw_addr 429 * If -ifp was specified we restrict ourselves to the ill, otherwise 430 * we match on the gatway and destination to handle unnumbered pt-pt 431 * interfaces. 432 */ 433 if (ill != NULL) 434 ipif = ipif_lookup_addr_v6(gw_addr, ill, ALL_ZONES, ipst); 435 else 436 ipif = ipif_lookup_interface_v6(gw_addr, dst_addr, ipst); 437 if (ipif != NULL) { 438 if (IS_VNI(ipif->ipif_ill)) { 439 ipif_refrele(ipif); 440 return (EINVAL); 441 } 442 } 443 444 /* 445 * GateD will attempt to create routes with a loopback interface 446 * address as the gateway and with RTF_GATEWAY set. We allow 447 * these routes to be added, but create them as interface routes 448 * since the gateway is an interface address. 449 */ 450 if ((ipif != NULL) && (ipif->ipif_ire_type == IRE_LOOPBACK)) { 451 flags &= ~RTF_GATEWAY; 452 if (IN6_ARE_ADDR_EQUAL(gw_addr, &ipv6_loopback) && 453 IN6_ARE_ADDR_EQUAL(dst_addr, &ipv6_loopback) && 454 IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones)) { 455 ire = ire_ftable_lookup_v6(dst_addr, 0, 0, IRE_LOOPBACK, 456 NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, 0, ipst, 457 NULL); 458 if (ire != NULL) { 459 ire_refrele(ire); 460 ipif_refrele(ipif); 461 return (EEXIST); 462 } 463 ip1dbg(("ip_rt_add_v6: 0x%p creating IRE 0x%x" 464 "for 0x%x\n", (void *)ipif, 465 ipif->ipif_ire_type, 466 ntohl(ipif->ipif_lcl_addr))); 467 ire = ire_create_v6( 468 dst_addr, 469 mask, 470 NULL, 471 ipif->ipif_ire_type, /* LOOPBACK */ 472 ipif->ipif_ill, 473 zoneid, 474 (ipif->ipif_flags & IPIF_PRIVATE) ? RTF_PRIVATE : 0, 475 NULL, 476 ipst); 477 478 if (ire == NULL) { 479 ipif_refrele(ipif); 480 return (ENOMEM); 481 } 482 /* src address assigned by the caller? */ 483 if ((flags & RTF_SETSRC) && 484 !IN6_IS_ADDR_UNSPECIFIED(src_addr)) 485 ire->ire_setsrc_addr_v6 = *src_addr; 486 487 nire = ire_add(ire); 488 if (nire == NULL) { 489 /* 490 * In the result of failure, ire_add() will have 491 * already deleted the ire in question, so there 492 * is no need to do that here. 493 */ 494 ipif_refrele(ipif); 495 return (ENOMEM); 496 } 497 /* 498 * Check if it was a duplicate entry. This handles 499 * the case of two racing route adds for the same route 500 */ 501 if (nire != ire) { 502 ASSERT(nire->ire_identical_ref > 1); 503 ire_delete(nire); 504 ire_refrele(nire); 505 ipif_refrele(ipif); 506 return (EEXIST); 507 } 508 ire = nire; 509 goto save_ire; 510 } 511 } 512 513 /* 514 * The routes for multicast with CGTP are quite special in that 515 * the gateway is the local interface address, yet RTF_GATEWAY 516 * is set. We turn off RTF_GATEWAY to provide compatibility with 517 * this undocumented and unusual use of multicast routes. 518 */ 519 if ((flags & RTF_MULTIRT) && ipif != NULL) 520 flags &= ~RTF_GATEWAY; 521 522 /* 523 * Traditionally, interface routes are ones where RTF_GATEWAY isn't set 524 * and the gateway address provided is one of the system's interface 525 * addresses. By using the routing socket interface and supplying an 526 * RTA_IFP sockaddr with an interface index, an alternate method of 527 * specifying an interface route to be created is available which uses 528 * the interface index that specifies the outgoing interface rather than 529 * the address of an outgoing interface (which may not be able to 530 * uniquely identify an interface). When coupled with the RTF_GATEWAY 531 * flag, routes can be specified which not only specify the next-hop to 532 * be used when routing to a certain prefix, but also which outgoing 533 * interface should be used. 534 * 535 * Previously, interfaces would have unique addresses assigned to them 536 * and so the address assigned to a particular interface could be used 537 * to identify a particular interface. One exception to this was the 538 * case of an unnumbered interface (where IPIF_UNNUMBERED was set). 539 * 540 * With the advent of IPv6 and its link-local addresses, this 541 * restriction was relaxed and interfaces could share addresses between 542 * themselves. In fact, typically all of the link-local interfaces on 543 * an IPv6 node or router will have the same link-local address. In 544 * order to differentiate between these interfaces, the use of an 545 * interface index is necessary and this index can be carried inside a 546 * RTA_IFP sockaddr (which is actually a sockaddr_dl). One restriction 547 * of using the interface index, however, is that all of the ipif's that 548 * are part of an ill have the same index and so the RTA_IFP sockaddr 549 * cannot be used to differentiate between ipif's (or logical 550 * interfaces) that belong to the same ill (physical interface). 551 * 552 * For example, in the following case involving IPv4 interfaces and 553 * logical interfaces 554 * 555 * 192.0.2.32 255.255.255.224 192.0.2.33 U if0 556 * 192.0.2.32 255.255.255.224 192.0.2.34 U if0 557 * 192.0.2.32 255.255.255.224 192.0.2.35 U if0 558 * 559 * the ipif's corresponding to each of these interface routes can be 560 * uniquely identified by the "gateway" (actually interface address). 561 * 562 * In this case involving multiple IPv6 default routes to a particular 563 * link-local gateway, the use of RTA_IFP is necessary to specify which 564 * default route is of interest: 565 * 566 * default fe80::123:4567:89ab:cdef U if0 567 * default fe80::123:4567:89ab:cdef U if1 568 */ 569 570 /* RTF_GATEWAY not set */ 571 if (!(flags & RTF_GATEWAY)) { 572 if (sp != NULL) { 573 ip2dbg(("ip_rt_add_v6: gateway security attributes " 574 "cannot be set with interface route\n")); 575 if (ipif != NULL) 576 ipif_refrele(ipif); 577 return (EINVAL); 578 } 579 580 /* 581 * Whether or not ill (RTA_IFP) is set, we require that 582 * the gateway is one of our local addresses. 583 */ 584 if (ipif == NULL) 585 return (ENETUNREACH); 586 587 /* 588 * We use MATCH_IRE_ILL here. If the caller specified an 589 * interface (from the RTA_IFP sockaddr) we use it, otherwise 590 * we use the ill derived from the gateway address. 591 * We can always match the gateway address since we record it 592 * in ire_gateway_addr. 593 * We don't allow RTA_IFP to specify a different ill than the 594 * one matching the ipif to make sure we can delete the route. 595 */ 596 match_flags |= MATCH_IRE_GW | MATCH_IRE_ILL; 597 if (ill == NULL) { 598 ill = ipif->ipif_ill; 599 } else if (ill != ipif->ipif_ill) { 600 ipif_refrele(ipif); 601 return (EINVAL); 602 } 603 604 /* 605 * We check for an existing entry at this point. 606 */ 607 match_flags |= MATCH_IRE_MASK; 608 ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, 609 IRE_INTERFACE, ill, ALL_ZONES, NULL, match_flags, 0, ipst, 610 NULL); 611 if (ire != NULL) { 612 ire_refrele(ire); 613 ipif_refrele(ipif); 614 return (EEXIST); 615 } 616 617 /* 618 * Create a copy of the IRE_LOOPBACK, IRE_IF_NORESOLVER or 619 * IRE_IF_RESOLVER with the modified address, netmask, and 620 * gateway. 621 */ 622 ire = ire_create_v6( 623 dst_addr, 624 mask, 625 gw_addr, 626 ill->ill_net_type, 627 ill, 628 zoneid, 629 flags, 630 NULL, 631 ipst); 632 if (ire == NULL) { 633 ipif_refrele(ipif); 634 return (ENOMEM); 635 } 636 637 /* 638 * Some software (for example, GateD and Sun Cluster) attempts 639 * to create (what amount to) IRE_PREFIX routes with the 640 * loopback address as the gateway. This is primarily done to 641 * set up prefixes with the RTF_REJECT flag set (for example, 642 * when generating aggregate routes). We also OR in the 643 * RTF_BLACKHOLE flag as these interface routes, by 644 * definition, can only be that. 645 * 646 * If the IRE type (as defined by ill->ill_net_type) is 647 * IRE_LOOPBACK, then we map the request into a 648 * IRE_IF_NORESOLVER. 649 * 650 * Needless to say, the real IRE_LOOPBACK is NOT created by this 651 * routine, but rather using ire_create_v6() directly. 652 */ 653 if (ill->ill_net_type == IRE_LOOPBACK) { 654 ire->ire_type = IRE_IF_NORESOLVER; 655 ire->ire_flags |= RTF_BLACKHOLE; 656 } 657 /* src address assigned by the caller? */ 658 if ((flags & RTF_SETSRC) && !IN6_IS_ADDR_UNSPECIFIED(src_addr)) 659 ire->ire_setsrc_addr_v6 = *src_addr; 660 661 nire = ire_add(ire); 662 if (nire == NULL) { 663 /* 664 * In the result of failure, ire_add() will have 665 * already deleted the ire in question, so there 666 * is no need to do that here. 667 */ 668 ipif_refrele(ipif); 669 return (ENOMEM); 670 } 671 /* 672 * Check if it was a duplicate entry. This handles 673 * the case of two racing route adds for the same route 674 */ 675 if (nire != ire) { 676 ASSERT(nire->ire_identical_ref > 1); 677 ire_delete(nire); 678 ire_refrele(nire); 679 ipif_refrele(ipif); 680 return (EEXIST); 681 } 682 ire = nire; 683 goto save_ire; 684 } 685 686 /* 687 * Get an interface IRE for the specified gateway. 688 * If we don't have an IRE_IF_NORESOLVER or IRE_IF_RESOLVER for the 689 * gateway, it is currently unreachable and we fail the request 690 * accordingly. We reject any RTF_GATEWAY routes where the gateway 691 * is an IRE_LOCAL or IRE_LOOPBACK. 692 * If RTA_IFP was specified we look on that particular ill. 693 */ 694 if (ill != NULL) 695 match_flags |= MATCH_IRE_ILL; 696 697 /* Check whether the gateway is reachable. */ 698 again: 699 type = IRE_INTERFACE | IRE_LOCAL | IRE_LOOPBACK; 700 if (flags & RTF_INDIRECT) 701 type |= IRE_OFFLINK; 702 703 gw_ire = ire_ftable_lookup_v6(gw_addr, 0, 0, type, ill, 704 ALL_ZONES, NULL, match_flags, 0, ipst, NULL); 705 if (gw_ire == NULL) { 706 /* 707 * With IPMP, we allow host routes to influence in.mpathd's 708 * target selection. However, if the test addresses are on 709 * their own network, the above lookup will fail since the 710 * underlying IRE_INTERFACEs are marked hidden. So allow 711 * hidden test IREs to be found and try again. 712 */ 713 if (!(match_flags & MATCH_IRE_TESTHIDDEN)) { 714 match_flags |= MATCH_IRE_TESTHIDDEN; 715 goto again; 716 } 717 if (ipif != NULL) 718 ipif_refrele(ipif); 719 return (ENETUNREACH); 720 } 721 if (gw_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) { 722 ire_refrele(gw_ire); 723 if (ipif != NULL) 724 ipif_refrele(ipif); 725 return (ENETUNREACH); 726 } 727 728 /* 729 * We create one of three types of IREs as a result of this request 730 * based on the netmask. A netmask of all ones (which is automatically 731 * assumed when RTF_HOST is set) results in an IRE_HOST being created. 732 * An all zeroes netmask implies a default route so an IRE_DEFAULT is 733 * created. Otherwise, an IRE_PREFIX route is created for the 734 * destination prefix. 735 */ 736 if (IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones)) 737 type = IRE_HOST; 738 else if (IN6_IS_ADDR_UNSPECIFIED(mask)) 739 type = IRE_DEFAULT; 740 else 741 type = IRE_PREFIX; 742 743 /* check for a duplicate entry */ 744 ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, type, ill, 745 ALL_ZONES, NULL, 746 match_flags | MATCH_IRE_MASK | MATCH_IRE_GW, 0, ipst, NULL); 747 if (ire != NULL) { 748 if (ipif != NULL) 749 ipif_refrele(ipif); 750 ire_refrele(gw_ire); 751 ire_refrele(ire); 752 return (EEXIST); 753 } 754 755 /* Security attribute exists */ 756 if (sp != NULL) { 757 tsol_gcgrp_addr_t ga; 758 759 /* find or create the gateway credentials group */ 760 ga.ga_af = AF_INET6; 761 ga.ga_addr = *gw_addr; 762 763 /* we hold reference to it upon success */ 764 gcgrp = gcgrp_lookup(&ga, B_TRUE); 765 if (gcgrp == NULL) { 766 if (ipif != NULL) 767 ipif_refrele(ipif); 768 ire_refrele(gw_ire); 769 return (ENOMEM); 770 } 771 772 /* 773 * Create and add the security attribute to the group; a 774 * reference to the group is made upon allocating a new 775 * entry successfully. If it finds an already-existing 776 * entry for the security attribute in the group, it simply 777 * returns it and no new reference is made to the group. 778 */ 779 gc = gc_create(sp, gcgrp, &gcgrp_xtraref); 780 if (gc == NULL) { 781 /* release reference held by gcgrp_lookup */ 782 GCGRP_REFRELE(gcgrp); 783 if (ipif != NULL) 784 ipif_refrele(ipif); 785 ire_refrele(gw_ire); 786 return (ENOMEM); 787 } 788 } 789 790 /* Create the IRE. */ 791 ire = ire_create_v6( 792 dst_addr, /* dest address */ 793 mask, /* mask */ 794 gw_addr, /* gateway address */ 795 (ushort_t)type, /* IRE type */ 796 ill, 797 zoneid, 798 flags, 799 gc, /* security attribute */ 800 ipst); 801 802 /* 803 * The ire holds a reference to the 'gc' and the 'gc' holds a 804 * reference to the 'gcgrp'. We can now release the extra reference 805 * the 'gcgrp' acquired in the gcgrp_lookup, if it was not used. 806 */ 807 if (gcgrp_xtraref) 808 GCGRP_REFRELE(gcgrp); 809 if (ire == NULL) { 810 if (gc != NULL) 811 GC_REFRELE(gc); 812 if (ipif != NULL) 813 ipif_refrele(ipif); 814 ire_refrele(gw_ire); 815 return (ENOMEM); 816 } 817 818 /* src address assigned by the caller? */ 819 if ((flags & RTF_SETSRC) && !IN6_IS_ADDR_UNSPECIFIED(src_addr)) 820 ire->ire_setsrc_addr_v6 = *src_addr; 821 822 /* 823 * POLICY: should we allow an RTF_HOST with address INADDR_ANY? 824 * SUN/OS socket stuff does but do we really want to allow ::0 ? 825 */ 826 827 /* Add the new IRE. */ 828 nire = ire_add(ire); 829 if (nire == NULL) { 830 /* 831 * In the result of failure, ire_add() will have 832 * already deleted the ire in question, so there 833 * is no need to do that here. 834 */ 835 if (ipif != NULL) 836 ipif_refrele(ipif); 837 ire_refrele(gw_ire); 838 return (ENOMEM); 839 } 840 /* 841 * Check if it was a duplicate entry. This handles 842 * the case of two racing route adds for the same route 843 */ 844 if (nire != ire) { 845 ASSERT(nire->ire_identical_ref > 1); 846 ire_delete(nire); 847 ire_refrele(nire); 848 if (ipif != NULL) 849 ipif_refrele(ipif); 850 ire_refrele(gw_ire); 851 return (EEXIST); 852 } 853 ire = nire; 854 855 if (flags & RTF_MULTIRT) { 856 /* 857 * Invoke the CGTP (multirouting) filtering module 858 * to add the dst address in the filtering database. 859 * Replicated inbound packets coming from that address 860 * will be filtered to discard the duplicates. 861 * It is not necessary to call the CGTP filter hook 862 * when the dst address is a multicast, because an 863 * IP source address cannot be a multicast. 864 */ 865 if (ipst->ips_ip_cgtp_filter_ops != NULL && 866 !IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))) { 867 int res; 868 ipif_t *src_ipif; 869 870 /* Find the source address corresponding to gw_ire */ 871 src_ipif = ipif_lookup_addr_v6( 872 &gw_ire->ire_gateway_addr_v6, NULL, zoneid, ipst); 873 if (src_ipif != NULL) { 874 res = ipst->ips_ip_cgtp_filter_ops-> 875 cfo_add_dest_v6( 876 ipst->ips_netstack->netstack_stackid, 877 &ire->ire_addr_v6, 878 &ire->ire_gateway_addr_v6, 879 &ire->ire_setsrc_addr_v6, 880 &src_ipif->ipif_v6lcl_addr); 881 ipif_refrele(src_ipif); 882 } else { 883 res = EADDRNOTAVAIL; 884 } 885 if (res != 0) { 886 if (ipif != NULL) 887 ipif_refrele(ipif); 888 ire_refrele(gw_ire); 889 ire_delete(ire); 890 ire_refrele(ire); /* Held in ire_add */ 891 return (res); 892 } 893 } 894 } 895 896 save_ire: 897 if (gw_ire != NULL) { 898 ire_refrele(gw_ire); 899 gw_ire = NULL; 900 } 901 if (ire->ire_ill != NULL) { 902 /* 903 * Save enough information so that we can recreate the IRE if 904 * the ILL goes down and then up. The metrics associated 905 * with the route will be saved as well when rts_setmetrics() is 906 * called after the IRE has been created. In the case where 907 * memory cannot be allocated, none of this information will be 908 * saved. 909 */ 910 ill_save_ire(ire->ire_ill, ire); 911 } 912 913 if (ire_arg != NULL) { 914 /* 915 * Store the ire that was successfully added into where ire_arg 916 * points to so that callers don't have to look it up 917 * themselves (but they are responsible for ire_refrele()ing 918 * the ire when they are finished with it). 919 */ 920 *ire_arg = ire; 921 } else { 922 ire_refrele(ire); /* Held in ire_add */ 923 } 924 if (ipif != NULL) 925 ipif_refrele(ipif); 926 return (0); 927 } 928 929 /* 930 * ip_rt_delete_v6 is called to delete an IPv6 route. 931 * ill is passed in to associate it with the correct interface. 932 * (for link-local destinations and gateways). 933 */ 934 /* ARGSUSED4 */ 935 int 936 ip_rt_delete_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, 937 const in6_addr_t *gw_addr, uint_t rtm_addrs, int flags, ill_t *ill, 938 ip_stack_t *ipst, zoneid_t zoneid) 939 { 940 ire_t *ire = NULL; 941 ipif_t *ipif; 942 uint_t type; 943 uint_t match_flags = MATCH_IRE_TYPE; 944 int err = 0; 945 946 /* 947 * If this is the case of RTF_HOST being set, then we set the netmask 948 * to all ones. Otherwise, we use the netmask if one was supplied. 949 */ 950 if (flags & RTF_HOST) { 951 mask = &ipv6_all_ones; 952 match_flags |= MATCH_IRE_MASK; 953 } else if (rtm_addrs & RTA_NETMASK) { 954 match_flags |= MATCH_IRE_MASK; 955 } 956 957 /* 958 * Note that RTF_GATEWAY is never set on a delete, therefore 959 * we check if the gateway address is one of our interfaces first, 960 * and fall back on RTF_GATEWAY routes. 961 * 962 * This makes it possible to delete an original 963 * IRE_IF_NORESOLVER/IRE_IF_RESOLVER - consistent with SunOS 4.1. 964 * However, we have RTF_KERNEL set on the ones created by ipif_up 965 * and those can not be deleted here. 966 * 967 * We use MATCH_IRE_ILL if we know the interface. If the caller 968 * specified an interface (from the RTA_IFP sockaddr) we use it, 969 * otherwise we use the ill derived from the gateway address. 970 * We can always match the gateway address since we record it 971 * in ire_gateway_addr. 972 * 973 * For more detail on specifying routes by gateway address and by 974 * interface index, see the comments in ip_rt_add_v6(). 975 */ 976 ipif = ipif_lookup_interface_v6(gw_addr, dst_addr, ipst); 977 if (ipif != NULL) { 978 ill_t *ill_match; 979 980 if (ill != NULL) 981 ill_match = ill; 982 else 983 ill_match = ipif->ipif_ill; 984 985 match_flags |= MATCH_IRE_ILL; 986 if (ipif->ipif_ire_type == IRE_LOOPBACK) { 987 ire = ire_ftable_lookup_v6(dst_addr, 0, 0, IRE_LOOPBACK, 988 ill_match, ALL_ZONES, NULL, match_flags, 0, ipst, 989 NULL); 990 } 991 if (ire == NULL) { 992 match_flags |= MATCH_IRE_GW; 993 ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, 994 IRE_INTERFACE, ill_match, ALL_ZONES, NULL, 995 match_flags, 0, ipst, NULL); 996 } 997 /* Avoid deleting routes created by kernel from an ipif */ 998 if (ire != NULL && (ire->ire_flags & RTF_KERNEL)) { 999 ire_refrele(ire); 1000 ire = NULL; 1001 } 1002 1003 /* Restore in case we didn't find a match */ 1004 match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_ILL); 1005 } 1006 1007 if (ire == NULL) { 1008 /* 1009 * At this point, the gateway address is not one of our own 1010 * addresses or a matching interface route was not found. We 1011 * set the IRE type to lookup based on whether 1012 * this is a host route, a default route or just a prefix. 1013 * 1014 * If an ill was passed in, then the lookup is based on an 1015 * interface index so MATCH_IRE_ILL is added to match_flags. 1016 */ 1017 match_flags |= MATCH_IRE_GW; 1018 if (ill != NULL) 1019 match_flags |= MATCH_IRE_ILL; 1020 if (IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones)) 1021 type = IRE_HOST; 1022 else if (IN6_IS_ADDR_UNSPECIFIED(mask)) 1023 type = IRE_DEFAULT; 1024 else 1025 type = IRE_PREFIX; 1026 ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, type, 1027 ill, ALL_ZONES, NULL, match_flags, 0, ipst, NULL); 1028 } 1029 1030 if (ipif != NULL) { 1031 ipif_refrele(ipif); 1032 ipif = NULL; 1033 } 1034 if (ire == NULL) 1035 return (ESRCH); 1036 1037 if (ire->ire_flags & RTF_MULTIRT) { 1038 /* 1039 * Invoke the CGTP (multirouting) filtering module 1040 * to remove the dst address from the filtering database. 1041 * Packets coming from that address will no longer be 1042 * filtered to remove duplicates. 1043 */ 1044 if (ipst->ips_ip_cgtp_filter_ops != NULL) { 1045 err = ipst->ips_ip_cgtp_filter_ops->cfo_del_dest_v6( 1046 ipst->ips_netstack->netstack_stackid, 1047 &ire->ire_addr_v6, &ire->ire_gateway_addr_v6); 1048 } 1049 } 1050 1051 ill = ire->ire_ill; 1052 if (ill != NULL) 1053 ill_remove_saved_ire(ill, ire); 1054 ire_delete(ire); 1055 ire_refrele(ire); 1056 return (err); 1057 } 1058 1059 /* 1060 * Derive an interface id from the link layer address. 1061 */ 1062 void 1063 ill_setdefaulttoken(ill_t *ill) 1064 { 1065 if (!ill->ill_manual_token) { 1066 bzero(&ill->ill_token, sizeof (ill->ill_token)); 1067 MEDIA_V6INTFID(ill->ill_media, ill, &ill->ill_token); 1068 ill->ill_token_length = IPV6_TOKEN_LEN; 1069 } 1070 } 1071 1072 void 1073 ill_setdesttoken(ill_t *ill) 1074 { 1075 bzero(&ill->ill_dest_token, sizeof (ill->ill_dest_token)); 1076 MEDIA_V6DESTINTFID(ill->ill_media, ill, &ill->ill_dest_token); 1077 } 1078 1079 /* 1080 * Create a link-local address from a token. 1081 */ 1082 static void 1083 ipif_get_linklocal(in6_addr_t *dest, const in6_addr_t *token) 1084 { 1085 int i; 1086 1087 for (i = 0; i < 4; i++) { 1088 dest->s6_addr32[i] = 1089 token->s6_addr32[i] | ipv6_ll_template.s6_addr32[i]; 1090 } 1091 } 1092 1093 /* 1094 * Set a default IPv6 address for a 6to4 tunnel interface 2002:<tsrc>::1/16 1095 */ 1096 static void 1097 ipif_set6to4addr(ipif_t *ipif) 1098 { 1099 ill_t *ill = ipif->ipif_ill; 1100 struct in_addr v4phys; 1101 1102 ASSERT(ill->ill_mactype == DL_6TO4); 1103 ASSERT(ill->ill_phys_addr_length == sizeof (struct in_addr)); 1104 ASSERT(ipif->ipif_isv6); 1105 1106 if (ipif->ipif_flags & IPIF_UP) 1107 return; 1108 1109 (void) ip_plen_to_mask_v6(16, &ipif->ipif_v6net_mask); 1110 bcopy(ill->ill_phys_addr, &v4phys, sizeof (struct in_addr)); 1111 IN6_V4ADDR_TO_6TO4(&v4phys, &ipif->ipif_v6lcl_addr); 1112 V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask, 1113 ipif->ipif_v6subnet); 1114 } 1115 1116 /* 1117 * Is it not possible to set the link local address? 1118 * The address can be set if the token is set, and the token 1119 * isn't too long. 1120 * Return B_TRUE if the address can't be set, or B_FALSE if it can. 1121 */ 1122 boolean_t 1123 ipif_cant_setlinklocal(ipif_t *ipif) 1124 { 1125 ill_t *ill = ipif->ipif_ill; 1126 1127 if (IN6_IS_ADDR_UNSPECIFIED(&ill->ill_token) || 1128 ill->ill_token_length > IPV6_ABITS - IPV6_LL_PREFIXLEN) 1129 return (B_TRUE); 1130 1131 return (B_FALSE); 1132 } 1133 1134 /* 1135 * Generate a link-local address from the token. 1136 */ 1137 void 1138 ipif_setlinklocal(ipif_t *ipif) 1139 { 1140 ill_t *ill = ipif->ipif_ill; 1141 in6_addr_t ov6addr; 1142 1143 ASSERT(IAM_WRITER_ILL(ill)); 1144 1145 /* 1146 * ill_manual_linklocal is set when the link-local address was 1147 * manually configured. 1148 */ 1149 if (ill->ill_manual_linklocal) 1150 return; 1151 1152 /* 1153 * IPv6 interfaces over 6to4 tunnels are special. They do not have 1154 * link-local addresses, but instead have a single automatically 1155 * generated global address. 1156 */ 1157 if (ill->ill_mactype == DL_6TO4) { 1158 ipif_set6to4addr(ipif); 1159 return; 1160 } 1161 1162 if (ipif_cant_setlinklocal(ipif)) 1163 return; 1164 1165 ov6addr = ipif->ipif_v6lcl_addr; 1166 ipif_get_linklocal(&ipif->ipif_v6lcl_addr, &ill->ill_token); 1167 sctp_update_ipif_addr(ipif, ov6addr); 1168 (void) ip_plen_to_mask_v6(IPV6_LL_PREFIXLEN, &ipif->ipif_v6net_mask); 1169 if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6pp_dst_addr)) { 1170 V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask, 1171 ipif->ipif_v6subnet); 1172 } 1173 1174 ip_rts_newaddrmsg(RTM_CHGADDR, 0, ipif, RTSQ_DEFAULT); 1175 } 1176 1177 /* 1178 * Generate a destination link-local address for a point-to-point IPv6 1179 * interface with a destination interface id (IP tunnels are such interfaces) 1180 * based on the destination token. 1181 */ 1182 void 1183 ipif_setdestlinklocal(ipif_t *ipif) 1184 { 1185 ill_t *ill = ipif->ipif_ill; 1186 1187 ASSERT(IAM_WRITER_ILL(ill)); 1188 1189 if (ill->ill_manual_dst_linklocal) 1190 return; 1191 1192 if (IN6_IS_ADDR_UNSPECIFIED(&ill->ill_dest_token)) 1193 return; 1194 1195 ipif_get_linklocal(&ipif->ipif_v6pp_dst_addr, &ill->ill_dest_token); 1196 ipif->ipif_v6subnet = ipif->ipif_v6pp_dst_addr; 1197 } 1198 1199 /* 1200 * Get the resolver set up for a new ipif. (Always called as writer.) 1201 */ 1202 int 1203 ipif_ndp_up(ipif_t *ipif, boolean_t initial) 1204 { 1205 ill_t *ill = ipif->ipif_ill; 1206 int err = 0; 1207 nce_t *nce = NULL; 1208 boolean_t added_ipif = B_FALSE; 1209 1210 DTRACE_PROBE3(ipif__downup, char *, "ipif_ndp_up", 1211 ill_t *, ill, ipif_t *, ipif); 1212 ip1dbg(("ipif_ndp_up(%s:%u)\n", ill->ill_name, ipif->ipif_id)); 1213 1214 if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr) || 1215 (!(ill->ill_net_type & IRE_INTERFACE))) { 1216 ipif->ipif_addr_ready = 1; 1217 return (0); 1218 } 1219 1220 if ((ipif->ipif_flags & (IPIF_UNNUMBERED|IPIF_NOLOCAL)) == 0) { 1221 uint16_t flags; 1222 uint16_t state; 1223 uchar_t *hw_addr; 1224 ill_t *bound_ill; 1225 ipmp_illgrp_t *illg = ill->ill_grp; 1226 uint_t hw_addr_len; 1227 1228 flags = NCE_F_MYADDR | NCE_F_NONUD | NCE_F_PUBLISH | 1229 NCE_F_AUTHORITY; 1230 if (ill->ill_flags & ILLF_ROUTER) 1231 flags |= NCE_F_ISROUTER; 1232 1233 if (ipif->ipif_flags & IPIF_ANYCAST) 1234 flags |= NCE_F_ANYCAST; 1235 1236 if (IS_IPMP(ill)) { 1237 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1238 /* 1239 * If we're here via ipif_up(), then the ipif won't be 1240 * bound yet -- add it to the group, which will bind 1241 * it if possible. (We would add it in ipif_up(), but 1242 * deleting on failure there is gruesome.) If we're 1243 * here via ipmp_ill_bind_ipif(), then the ipif has 1244 * already been added to the group and we just need to 1245 * use the binding. 1246 */ 1247 if ((bound_ill = ipmp_ipif_bound_ill(ipif)) == NULL) { 1248 bound_ill = ipmp_illgrp_add_ipif(illg, ipif); 1249 if (bound_ill == NULL) { 1250 /* 1251 * We couldn't bind the ipif to an ill 1252 * yet, so we have nothing to publish. 1253 * Set ipif_addr_ready so that this 1254 * address can be used locally for now. 1255 * The routing socket message will be 1256 * sent from ipif_up_done_v6(). 1257 */ 1258 ipif->ipif_addr_ready = 1; 1259 return (0); 1260 } 1261 added_ipif = B_TRUE; 1262 } 1263 hw_addr = bound_ill->ill_nd_lla; 1264 hw_addr_len = bound_ill->ill_phys_addr_length; 1265 } else { 1266 bound_ill = ill; 1267 hw_addr = ill->ill_nd_lla; 1268 hw_addr_len = ill->ill_phys_addr_length; 1269 } 1270 1271 /* 1272 * If this is an initial bring-up (or the ipif was never 1273 * completely brought up), do DAD. Otherwise, we're here 1274 * because IPMP has rebound an address to this ill: send 1275 * unsolicited advertisements to inform others. 1276 */ 1277 if (initial || !ipif->ipif_addr_ready) { 1278 /* Causes Duplicate Address Detection to run */ 1279 state = ND_PROBE; 1280 } else { 1281 state = ND_REACHABLE; 1282 flags |= NCE_F_UNSOL_ADV; 1283 } 1284 1285 retry: 1286 err = nce_lookup_then_add_v6(ill, hw_addr, hw_addr_len, 1287 &ipif->ipif_v6lcl_addr, flags, state, &nce); 1288 switch (err) { 1289 case 0: 1290 ip1dbg(("ipif_ndp_up: NCE created for %s\n", 1291 ill->ill_name)); 1292 ipif->ipif_addr_ready = 1; 1293 ipif->ipif_added_nce = 1; 1294 nce->nce_ipif_cnt++; 1295 break; 1296 case EINPROGRESS: 1297 ip1dbg(("ipif_ndp_up: running DAD now for %s\n", 1298 ill->ill_name)); 1299 ipif->ipif_added_nce = 1; 1300 nce->nce_ipif_cnt++; 1301 break; 1302 case EEXIST: 1303 ip1dbg(("ipif_ndp_up: NCE already exists for %s\n", 1304 ill->ill_name)); 1305 if (!NCE_MYADDR(nce->nce_common)) { 1306 /* 1307 * A leftover nce from before this address 1308 * existed 1309 */ 1310 ncec_delete(nce->nce_common); 1311 nce_refrele(nce); 1312 nce = NULL; 1313 goto retry; 1314 } 1315 if ((ipif->ipif_flags & IPIF_POINTOPOINT) == 0) { 1316 nce_refrele(nce); 1317 nce = NULL; 1318 ip1dbg(("ipif_ndp_up: NCE already exists " 1319 "for %s\n", ill->ill_name)); 1320 goto fail; 1321 } 1322 /* 1323 * Duplicate local addresses are permissible for 1324 * IPIF_POINTOPOINT interfaces which will get marked 1325 * IPIF_UNNUMBERED later in 1326 * ip_addr_availability_check(). 1327 * 1328 * The nce_ipif_cnt field tracks the number of 1329 * ipifs that have nce_addr as their local address. 1330 */ 1331 ipif->ipif_addr_ready = 1; 1332 ipif->ipif_added_nce = 1; 1333 nce->nce_ipif_cnt++; 1334 err = 0; 1335 break; 1336 default: 1337 ip1dbg(("ipif_ndp_up: NCE creation failed for %s\n", 1338 ill->ill_name)); 1339 goto fail; 1340 } 1341 } else { 1342 /* No local NCE for this entry */ 1343 ipif->ipif_addr_ready = 1; 1344 } 1345 if (nce != NULL) 1346 nce_refrele(nce); 1347 return (0); 1348 fail: 1349 if (added_ipif) 1350 ipmp_illgrp_del_ipif(ill->ill_grp, ipif); 1351 1352 return (err); 1353 } 1354 1355 /* Remove all cache entries for this logical interface */ 1356 void 1357 ipif_ndp_down(ipif_t *ipif) 1358 { 1359 ipif_nce_down(ipif); 1360 } 1361 1362 /* 1363 * Return the scope of the given IPv6 address. If the address is an 1364 * IPv4 mapped IPv6 address, return the scope of the corresponding 1365 * IPv4 address. 1366 */ 1367 in6addr_scope_t 1368 ip_addr_scope_v6(const in6_addr_t *addr) 1369 { 1370 static in6_addr_t ipv6loopback = IN6ADDR_LOOPBACK_INIT; 1371 1372 if (IN6_IS_ADDR_V4MAPPED(addr)) { 1373 in_addr_t v4addr_h = ntohl(V4_PART_OF_V6((*addr))); 1374 if ((v4addr_h >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 1375 (v4addr_h & IN_AUTOCONF_MASK) == IN_AUTOCONF_NET) 1376 return (IP6_SCOPE_LINKLOCAL); 1377 if ((v4addr_h & IN_PRIVATE8_MASK) == IN_PRIVATE8_NET || 1378 (v4addr_h & IN_PRIVATE12_MASK) == IN_PRIVATE12_NET || 1379 (v4addr_h & IN_PRIVATE16_MASK) == IN_PRIVATE16_NET) 1380 return (IP6_SCOPE_SITELOCAL); 1381 return (IP6_SCOPE_GLOBAL); 1382 } 1383 1384 if (IN6_IS_ADDR_MULTICAST(addr)) 1385 return (IN6_ADDR_MC_SCOPE(addr)); 1386 1387 /* link-local and loopback addresses are of link-local scope */ 1388 if (IN6_IS_ADDR_LINKLOCAL(addr) || 1389 IN6_ARE_ADDR_EQUAL(addr, &ipv6loopback)) 1390 return (IP6_SCOPE_LINKLOCAL); 1391 if (IN6_IS_ADDR_SITELOCAL(addr)) 1392 return (IP6_SCOPE_SITELOCAL); 1393 return (IP6_SCOPE_GLOBAL); 1394 } 1395 1396 1397 /* 1398 * Returns the length of the common prefix of a1 and a2, as per 1399 * CommonPrefixLen() defined in RFC 3484. 1400 */ 1401 static int 1402 ip_common_prefix_v6(const in6_addr_t *a1, const in6_addr_t *a2) 1403 { 1404 int i; 1405 uint32_t a1val, a2val, mask; 1406 1407 for (i = 0; i < 4; i++) { 1408 if ((a1val = a1->s6_addr32[i]) != (a2val = a2->s6_addr32[i])) { 1409 a1val ^= a2val; 1410 i *= 32; 1411 mask = 0x80000000u; 1412 while (!(a1val & mask)) { 1413 mask >>= 1; 1414 i++; 1415 } 1416 return (i); 1417 } 1418 } 1419 return (IPV6_ABITS); 1420 } 1421 1422 #define IPIF_VALID_IPV6_SOURCE(ipif) \ 1423 (((ipif)->ipif_flags & IPIF_UP) && \ 1424 !((ipif)->ipif_flags & (IPIF_NOLOCAL|IPIF_ANYCAST)) && \ 1425 !((ipif)->ipif_ill->ill_flags & ILLF_NOACCEPT)) 1426 1427 /* source address candidate */ 1428 typedef struct candidate { 1429 ipif_t *cand_ipif; 1430 /* The properties of this candidate */ 1431 boolean_t cand_isdst; 1432 boolean_t cand_isdst_set; 1433 in6addr_scope_t cand_scope; 1434 boolean_t cand_scope_set; 1435 boolean_t cand_isdeprecated; 1436 boolean_t cand_isdeprecated_set; 1437 boolean_t cand_ispreferred; 1438 boolean_t cand_ispreferred_set; 1439 boolean_t cand_matchedinterface; 1440 boolean_t cand_matchedinterface_set; 1441 boolean_t cand_matchedlabel; 1442 boolean_t cand_matchedlabel_set; 1443 boolean_t cand_istmp; 1444 boolean_t cand_istmp_set; 1445 int cand_common_pref; 1446 boolean_t cand_common_pref_set; 1447 boolean_t cand_pref_eq; 1448 boolean_t cand_pref_eq_set; 1449 int cand_pref_len; 1450 boolean_t cand_pref_len_set; 1451 } cand_t; 1452 #define cand_srcaddr cand_ipif->ipif_v6lcl_addr 1453 #define cand_mask cand_ipif->ipif_v6net_mask 1454 #define cand_flags cand_ipif->ipif_flags 1455 #define cand_ill cand_ipif->ipif_ill 1456 #define cand_zoneid cand_ipif->ipif_zoneid 1457 1458 /* information about the destination for source address selection */ 1459 typedef struct dstinfo { 1460 const in6_addr_t *dst_addr; 1461 ill_t *dst_ill; 1462 uint_t dst_restrict_ill; 1463 boolean_t dst_prefer_src_tmp; 1464 in6addr_scope_t dst_scope; 1465 char *dst_label; 1466 } dstinfo_t; 1467 1468 /* 1469 * The following functions are rules used to select a source address in 1470 * ipif_select_source_v6(). Each rule compares a current candidate (cc) 1471 * against the best candidate (bc). Each rule has three possible outcomes; 1472 * the candidate is preferred over the best candidate (CAND_PREFER), the 1473 * candidate is not preferred over the best candidate (CAND_AVOID), or the 1474 * candidate is of equal value as the best candidate (CAND_TIE). 1475 * 1476 * These rules are part of a greater "Default Address Selection for IPv6" 1477 * sheme, which is standards based work coming out of the IETF ipv6 working 1478 * group. The IETF document defines both IPv6 source address selection and 1479 * destination address ordering. The rules defined here implement the IPv6 1480 * source address selection. Destination address ordering is done by 1481 * libnsl, and uses a similar set of rules to implement the sorting. 1482 * 1483 * Most of the rules are defined by the RFC and are not typically altered. The 1484 * last rule, number 8, has language that allows for local preferences. In the 1485 * scheme below, this means that new Solaris rules should normally go between 1486 * rule_ifprefix and rule_prefix. 1487 */ 1488 typedef enum {CAND_AVOID, CAND_TIE, CAND_PREFER} rule_res_t; 1489 typedef rule_res_t (*rulef_t)(cand_t *, cand_t *, const dstinfo_t *, 1490 ip_stack_t *); 1491 1492 /* Prefer an address if it is equal to the destination address. */ 1493 /* ARGSUSED3 */ 1494 static rule_res_t 1495 rule_isdst(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 1496 { 1497 if (!bc->cand_isdst_set) { 1498 bc->cand_isdst = 1499 IN6_ARE_ADDR_EQUAL(&bc->cand_srcaddr, dstinfo->dst_addr); 1500 bc->cand_isdst_set = B_TRUE; 1501 } 1502 1503 cc->cand_isdst = 1504 IN6_ARE_ADDR_EQUAL(&cc->cand_srcaddr, dstinfo->dst_addr); 1505 cc->cand_isdst_set = B_TRUE; 1506 1507 if (cc->cand_isdst == bc->cand_isdst) 1508 return (CAND_TIE); 1509 else if (cc->cand_isdst) 1510 return (CAND_PREFER); 1511 else 1512 return (CAND_AVOID); 1513 } 1514 1515 /* 1516 * Prefer addresses that are of closest scope to the destination. Always 1517 * prefer addresses that are of greater scope than the destination over 1518 * those that are of lesser scope than the destination. 1519 */ 1520 /* ARGSUSED3 */ 1521 static rule_res_t 1522 rule_scope(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 1523 { 1524 if (!bc->cand_scope_set) { 1525 bc->cand_scope = ip_addr_scope_v6(&bc->cand_srcaddr); 1526 bc->cand_scope_set = B_TRUE; 1527 } 1528 1529 cc->cand_scope = ip_addr_scope_v6(&cc->cand_srcaddr); 1530 cc->cand_scope_set = B_TRUE; 1531 1532 if (cc->cand_scope < bc->cand_scope) { 1533 if (cc->cand_scope < dstinfo->dst_scope) 1534 return (CAND_AVOID); 1535 else 1536 return (CAND_PREFER); 1537 } else if (bc->cand_scope < cc->cand_scope) { 1538 if (bc->cand_scope < dstinfo->dst_scope) 1539 return (CAND_PREFER); 1540 else 1541 return (CAND_AVOID); 1542 } else { 1543 return (CAND_TIE); 1544 } 1545 } 1546 1547 /* 1548 * Prefer non-deprecated source addresses. 1549 */ 1550 /* ARGSUSED2 */ 1551 static rule_res_t 1552 rule_deprecated(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1553 ip_stack_t *ipst) 1554 { 1555 if (!bc->cand_isdeprecated_set) { 1556 bc->cand_isdeprecated = 1557 ((bc->cand_flags & IPIF_DEPRECATED) != 0); 1558 bc->cand_isdeprecated_set = B_TRUE; 1559 } 1560 1561 cc->cand_isdeprecated = ((cc->cand_flags & IPIF_DEPRECATED) != 0); 1562 cc->cand_isdeprecated_set = B_TRUE; 1563 1564 if (bc->cand_isdeprecated == cc->cand_isdeprecated) 1565 return (CAND_TIE); 1566 else if (cc->cand_isdeprecated) 1567 return (CAND_AVOID); 1568 else 1569 return (CAND_PREFER); 1570 } 1571 1572 /* 1573 * Prefer source addresses that have the IPIF_PREFERRED flag set. This 1574 * rule must be before rule_interface because the flag could be set on any 1575 * interface, not just the interface being used for outgoing packets (for 1576 * example, the IFF_PREFERRED could be set on an address assigned to the 1577 * loopback interface). 1578 */ 1579 /* ARGSUSED2 */ 1580 static rule_res_t 1581 rule_preferred(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1582 ip_stack_t *ipst) 1583 { 1584 if (!bc->cand_ispreferred_set) { 1585 bc->cand_ispreferred = ((bc->cand_flags & IPIF_PREFERRED) != 0); 1586 bc->cand_ispreferred_set = B_TRUE; 1587 } 1588 1589 cc->cand_ispreferred = ((cc->cand_flags & IPIF_PREFERRED) != 0); 1590 cc->cand_ispreferred_set = B_TRUE; 1591 1592 if (bc->cand_ispreferred == cc->cand_ispreferred) 1593 return (CAND_TIE); 1594 else if (cc->cand_ispreferred) 1595 return (CAND_PREFER); 1596 else 1597 return (CAND_AVOID); 1598 } 1599 1600 /* 1601 * Prefer source addresses that are assigned to the outgoing interface. 1602 */ 1603 /* ARGSUSED3 */ 1604 static rule_res_t 1605 rule_interface(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1606 ip_stack_t *ipst) 1607 { 1608 ill_t *dstill = dstinfo->dst_ill; 1609 1610 /* 1611 * If dstinfo->dst_restrict_ill is set, this rule is unnecessary 1612 * since we know all candidates will be on the same link. 1613 */ 1614 if (dstinfo->dst_restrict_ill) 1615 return (CAND_TIE); 1616 1617 if (!bc->cand_matchedinterface_set) { 1618 bc->cand_matchedinterface = bc->cand_ill == dstill; 1619 bc->cand_matchedinterface_set = B_TRUE; 1620 } 1621 1622 cc->cand_matchedinterface = cc->cand_ill == dstill; 1623 cc->cand_matchedinterface_set = B_TRUE; 1624 1625 if (bc->cand_matchedinterface == cc->cand_matchedinterface) 1626 return (CAND_TIE); 1627 else if (cc->cand_matchedinterface) 1628 return (CAND_PREFER); 1629 else 1630 return (CAND_AVOID); 1631 } 1632 1633 /* 1634 * Prefer source addresses whose label matches the destination's label. 1635 */ 1636 static rule_res_t 1637 rule_label(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 1638 { 1639 char *label; 1640 1641 if (!bc->cand_matchedlabel_set) { 1642 label = ip6_asp_lookup(&bc->cand_srcaddr, NULL, ipst); 1643 bc->cand_matchedlabel = 1644 ip6_asp_labelcmp(label, dstinfo->dst_label); 1645 bc->cand_matchedlabel_set = B_TRUE; 1646 } 1647 1648 label = ip6_asp_lookup(&cc->cand_srcaddr, NULL, ipst); 1649 cc->cand_matchedlabel = ip6_asp_labelcmp(label, dstinfo->dst_label); 1650 cc->cand_matchedlabel_set = B_TRUE; 1651 1652 if (bc->cand_matchedlabel == cc->cand_matchedlabel) 1653 return (CAND_TIE); 1654 else if (cc->cand_matchedlabel) 1655 return (CAND_PREFER); 1656 else 1657 return (CAND_AVOID); 1658 } 1659 1660 /* 1661 * Prefer public addresses over temporary ones. An application can reverse 1662 * the logic of this rule and prefer temporary addresses by using the 1663 * IPV6_SRC_PREFERENCES socket option. 1664 */ 1665 /* ARGSUSED3 */ 1666 static rule_res_t 1667 rule_temporary(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1668 ip_stack_t *ipst) 1669 { 1670 if (!bc->cand_istmp_set) { 1671 bc->cand_istmp = ((bc->cand_flags & IPIF_TEMPORARY) != 0); 1672 bc->cand_istmp_set = B_TRUE; 1673 } 1674 1675 cc->cand_istmp = ((cc->cand_flags & IPIF_TEMPORARY) != 0); 1676 cc->cand_istmp_set = B_TRUE; 1677 1678 if (bc->cand_istmp == cc->cand_istmp) 1679 return (CAND_TIE); 1680 1681 if (dstinfo->dst_prefer_src_tmp && cc->cand_istmp) 1682 return (CAND_PREFER); 1683 else if (!dstinfo->dst_prefer_src_tmp && !cc->cand_istmp) 1684 return (CAND_PREFER); 1685 else 1686 return (CAND_AVOID); 1687 } 1688 1689 /* 1690 * Prefer source addresses with longer matching prefix with the destination 1691 * under the interface mask. This gets us on the same subnet before applying 1692 * any Solaris-specific rules. 1693 */ 1694 /* ARGSUSED3 */ 1695 static rule_res_t 1696 rule_ifprefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1697 ip_stack_t *ipst) 1698 { 1699 if (!bc->cand_pref_eq_set) { 1700 bc->cand_pref_eq = V6_MASK_EQ_2(bc->cand_srcaddr, 1701 bc->cand_mask, *dstinfo->dst_addr); 1702 bc->cand_pref_eq_set = B_TRUE; 1703 } 1704 1705 cc->cand_pref_eq = V6_MASK_EQ_2(cc->cand_srcaddr, cc->cand_mask, 1706 *dstinfo->dst_addr); 1707 cc->cand_pref_eq_set = B_TRUE; 1708 1709 if (bc->cand_pref_eq) { 1710 if (cc->cand_pref_eq) { 1711 if (!bc->cand_pref_len_set) { 1712 bc->cand_pref_len = 1713 ip_mask_to_plen_v6(&bc->cand_mask); 1714 bc->cand_pref_len_set = B_TRUE; 1715 } 1716 cc->cand_pref_len = ip_mask_to_plen_v6(&cc->cand_mask); 1717 cc->cand_pref_len_set = B_TRUE; 1718 if (bc->cand_pref_len == cc->cand_pref_len) 1719 return (CAND_TIE); 1720 else if (bc->cand_pref_len > cc->cand_pref_len) 1721 return (CAND_AVOID); 1722 else 1723 return (CAND_PREFER); 1724 } else { 1725 return (CAND_AVOID); 1726 } 1727 } else { 1728 if (cc->cand_pref_eq) 1729 return (CAND_PREFER); 1730 else 1731 return (CAND_TIE); 1732 } 1733 } 1734 1735 /* 1736 * Prefer to use zone-specific addresses when possible instead of all-zones 1737 * addresses. 1738 */ 1739 /* ARGSUSED2 */ 1740 static rule_res_t 1741 rule_zone_specific(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1742 ip_stack_t *ipst) 1743 { 1744 if ((bc->cand_zoneid == ALL_ZONES) == 1745 (cc->cand_zoneid == ALL_ZONES)) 1746 return (CAND_TIE); 1747 else if (cc->cand_zoneid == ALL_ZONES) 1748 return (CAND_AVOID); 1749 else 1750 return (CAND_PREFER); 1751 } 1752 1753 /* 1754 * Prefer to use DHCPv6 (first) and static addresses (second) when possible 1755 * instead of statelessly autoconfigured addresses. 1756 * 1757 * This is done after trying all other preferences (and before the final tie 1758 * breaker) so that, if all else is equal, we select addresses configured by 1759 * DHCPv6 over other addresses. We presume that DHCPv6 addresses, unlike 1760 * stateless autoconfigured addresses, are deliberately configured by an 1761 * administrator, and thus are correctly set up in DNS and network packet 1762 * filters. 1763 */ 1764 /* ARGSUSED2 */ 1765 static rule_res_t 1766 rule_addr_type(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1767 ip_stack_t *ipst) 1768 { 1769 #define ATYPE(x) \ 1770 ((x) & IPIF_DHCPRUNNING) ? 1 : ((x) & IPIF_ADDRCONF) ? 3 : 2 1771 int bcval = ATYPE(bc->cand_flags); 1772 int ccval = ATYPE(cc->cand_flags); 1773 #undef ATYPE 1774 1775 if (bcval == ccval) 1776 return (CAND_TIE); 1777 else if (ccval < bcval) 1778 return (CAND_PREFER); 1779 else 1780 return (CAND_AVOID); 1781 } 1782 1783 /* 1784 * Prefer source addresses with longer matching prefix with the destination. 1785 * We do the longest matching prefix calculation by doing an xor of both 1786 * addresses with the destination, and pick the address with the longest string 1787 * of leading zeros, as per CommonPrefixLen() defined in RFC 3484. 1788 */ 1789 /* ARGSUSED3 */ 1790 static rule_res_t 1791 rule_prefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 1792 { 1793 if (!bc->cand_common_pref_set) { 1794 bc->cand_common_pref = ip_common_prefix_v6(&bc->cand_srcaddr, 1795 dstinfo->dst_addr); 1796 bc->cand_common_pref_set = B_TRUE; 1797 } 1798 1799 cc->cand_common_pref = ip_common_prefix_v6(&cc->cand_srcaddr, 1800 dstinfo->dst_addr); 1801 cc->cand_common_pref_set = B_TRUE; 1802 1803 if (bc->cand_common_pref == cc->cand_common_pref) 1804 return (CAND_TIE); 1805 else if (bc->cand_common_pref > cc->cand_common_pref) 1806 return (CAND_AVOID); 1807 else 1808 return (CAND_PREFER); 1809 } 1810 1811 /* 1812 * Last rule: we must pick something, so just prefer the current best 1813 * candidate. 1814 */ 1815 /* ARGSUSED */ 1816 static rule_res_t 1817 rule_must_be_last(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1818 ip_stack_t *ipst) 1819 { 1820 return (CAND_AVOID); 1821 } 1822 1823 /* 1824 * Determine the best source address given a destination address and a 1825 * destination ill. If no suitable source address is found, it returns 1826 * NULL. If there is a usable address pointed to by the usesrc 1827 * (i.e ill_usesrc_ifindex != 0) then return that first since it is more 1828 * fine grained (i.e per interface) 1829 * 1830 * This implementation is based on the "Default Address Selection for IPv6" 1831 * specification produced by the IETF IPv6 working group. It has been 1832 * implemented so that the list of addresses is only traversed once (the 1833 * specification's algorithm could traverse the list of addresses once for 1834 * every rule). 1835 * 1836 * The restrict_ill argument restricts the algorithm to choose a source 1837 * address that is assigned to the destination ill. This is used when 1838 * the destination address is a link-local or multicast address, and when 1839 * ipv6_strict_dst_multihoming is turned on. 1840 * 1841 * src_prefs is the caller's set of source address preferences. If source 1842 * address selection is being called to determine the source address of a 1843 * connected socket (from ip_set_destination_v6()), then the preferences are 1844 * taken from conn_ixa->ixa_src_preferences. These preferences can be set on a 1845 * per-socket basis using the IPV6_SRC_PREFERENCES socket option. The only 1846 * preference currently implemented is for rfc3041 temporary addresses. 1847 */ 1848 ipif_t * 1849 ipif_select_source_v6(ill_t *dstill, const in6_addr_t *dst, 1850 boolean_t restrict_ill, uint32_t src_prefs, zoneid_t zoneid, 1851 boolean_t allow_usesrc, boolean_t *notreadyp) 1852 { 1853 dstinfo_t dstinfo; 1854 char dstr[INET6_ADDRSTRLEN]; 1855 char sstr[INET6_ADDRSTRLEN]; 1856 ipif_t *ipif, *start_ipif, *next_ipif; 1857 ill_t *ill, *usesrc_ill = NULL, *ipmp_ill = NULL; 1858 ill_walk_context_t ctx; 1859 cand_t best_c; /* The best candidate */ 1860 cand_t curr_c; /* The current candidate */ 1861 uint_t index; 1862 boolean_t first_candidate = B_TRUE; 1863 rule_res_t rule_result; 1864 tsol_tpc_t *src_rhtp, *dst_rhtp; 1865 ip_stack_t *ipst = dstill->ill_ipst; 1866 1867 /* 1868 * The list of ordering rules. They are applied in the order they 1869 * appear in the list. 1870 * 1871 * Solaris doesn't currently support Mobile IPv6, so there's no 1872 * rule_mipv6 corresponding to rule 4 in the specification. 1873 */ 1874 rulef_t rules[] = { 1875 rule_isdst, 1876 rule_scope, 1877 rule_deprecated, 1878 rule_preferred, 1879 rule_interface, 1880 rule_label, 1881 rule_temporary, 1882 rule_ifprefix, /* local rules after this */ 1883 rule_zone_specific, 1884 rule_addr_type, 1885 rule_prefix, /* local rules before this */ 1886 rule_must_be_last, /* must always be last */ 1887 NULL 1888 }; 1889 1890 ASSERT(dstill->ill_isv6); 1891 ASSERT(!IN6_IS_ADDR_V4MAPPED(dst)); 1892 1893 /* 1894 * Check if there is a usable src address pointed to by the 1895 * usesrc ifindex. This has higher precedence since it is 1896 * finer grained (i.e per interface) v/s being system wide. 1897 */ 1898 if (dstill->ill_usesrc_ifindex != 0 && allow_usesrc) { 1899 if ((usesrc_ill = 1900 ill_lookup_on_ifindex(dstill->ill_usesrc_ifindex, B_TRUE, 1901 ipst)) != NULL) { 1902 dstinfo.dst_ill = usesrc_ill; 1903 } else { 1904 return (NULL); 1905 } 1906 } else if (IS_UNDER_IPMP(dstill)) { 1907 /* 1908 * Test addresses should never be used for source address 1909 * selection, so if we were passed an underlying ill, switch 1910 * to the IPMP meta-interface. 1911 */ 1912 if ((ipmp_ill = ipmp_ill_hold_ipmp_ill(dstill)) != NULL) 1913 dstinfo.dst_ill = ipmp_ill; 1914 else 1915 return (NULL); 1916 } else { 1917 dstinfo.dst_ill = dstill; 1918 } 1919 1920 /* 1921 * If we're dealing with an unlabeled destination on a labeled system, 1922 * make sure that we ignore source addresses that are incompatible with 1923 * the destination's default label. That destination's default label 1924 * must dominate the minimum label on the source address. 1925 * 1926 * (Note that this has to do with Trusted Solaris. It's not related to 1927 * the labels described by ip6_asp_lookup.) 1928 */ 1929 dst_rhtp = NULL; 1930 if (is_system_labeled()) { 1931 dst_rhtp = find_tpc(dst, IPV6_VERSION, B_FALSE); 1932 if (dst_rhtp == NULL) 1933 return (NULL); 1934 if (dst_rhtp->tpc_tp.host_type != UNLABELED) { 1935 TPC_RELE(dst_rhtp); 1936 dst_rhtp = NULL; 1937 } 1938 } 1939 1940 dstinfo.dst_addr = dst; 1941 dstinfo.dst_scope = ip_addr_scope_v6(dst); 1942 dstinfo.dst_label = ip6_asp_lookup(dst, NULL, ipst); 1943 dstinfo.dst_prefer_src_tmp = ((src_prefs & IPV6_PREFER_SRC_TMP) != 0); 1944 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1945 /* 1946 * Section three of the I-D states that for multicast and 1947 * link-local destinations, the candidate set must be restricted to 1948 * an interface that is on the same link as the outgoing interface. 1949 * Also, when ipv6_strict_dst_multihoming is turned on, always 1950 * restrict the source address to the destination link as doing 1951 * otherwise will almost certainly cause problems. 1952 */ 1953 if (IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst) || 1954 ipst->ips_ipv6_strict_dst_multihoming || usesrc_ill != NULL) { 1955 dstinfo.dst_restrict_ill = B_TRUE; 1956 } else { 1957 dstinfo.dst_restrict_ill = restrict_ill; 1958 } 1959 1960 bzero(&best_c, sizeof (cand_t)); 1961 1962 /* 1963 * Take a pass through the list of IPv6 interfaces to choose the best 1964 * possible source address. If restrict_ill is set, just use dst_ill. 1965 */ 1966 if (dstinfo.dst_restrict_ill) 1967 ill = dstinfo.dst_ill; 1968 else 1969 ill = ILL_START_WALK_V6(&ctx, ipst); 1970 1971 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1972 ASSERT(ill->ill_isv6); 1973 1974 /* 1975 * Test addresses should never be used for source address 1976 * selection, so ignore underlying ills. 1977 */ 1978 if (IS_UNDER_IPMP(ill)) 1979 continue; 1980 1981 if (ill->ill_ipif == NULL) 1982 continue; 1983 /* 1984 * For source address selection, we treat the ipif list as 1985 * circular and continue until we get back to where we 1986 * started. This allows IPMP to vary source address selection 1987 * (which improves inbound load spreading) by caching its last 1988 * ending point and starting from there. NOTE: we don't have 1989 * to worry about ill_src_ipif changing ills since that can't 1990 * happen on the IPMP ill. 1991 */ 1992 start_ipif = ill->ill_ipif; 1993 if (IS_IPMP(ill) && ill->ill_src_ipif != NULL) 1994 start_ipif = ill->ill_src_ipif; 1995 1996 ipif = start_ipif; 1997 do { 1998 if ((next_ipif = ipif->ipif_next) == NULL) 1999 next_ipif = ill->ill_ipif; 2000 2001 if (!IPIF_VALID_IPV6_SOURCE(ipif)) 2002 continue; 2003 2004 if (!ipif->ipif_addr_ready) { 2005 if (notreadyp != NULL) 2006 *notreadyp = B_TRUE; 2007 continue; 2008 } 2009 2010 if (zoneid != ALL_ZONES && 2011 ipif->ipif_zoneid != zoneid && 2012 ipif->ipif_zoneid != ALL_ZONES) 2013 continue; 2014 2015 /* 2016 * Check compatibility of local address for 2017 * destination's default label if we're on a labeled 2018 * system. Incompatible addresses can't be used at 2019 * all and must be skipped over. 2020 */ 2021 if (dst_rhtp != NULL) { 2022 boolean_t incompat; 2023 2024 src_rhtp = find_tpc(&ipif->ipif_v6lcl_addr, 2025 IPV6_VERSION, B_FALSE); 2026 if (src_rhtp == NULL) 2027 continue; 2028 incompat = 2029 src_rhtp->tpc_tp.host_type != SUN_CIPSO || 2030 src_rhtp->tpc_tp.tp_doi != 2031 dst_rhtp->tpc_tp.tp_doi || 2032 (!_blinrange(&dst_rhtp->tpc_tp.tp_def_label, 2033 &src_rhtp->tpc_tp.tp_sl_range_cipso) && 2034 !blinlset(&dst_rhtp->tpc_tp.tp_def_label, 2035 src_rhtp->tpc_tp.tp_sl_set_cipso)); 2036 TPC_RELE(src_rhtp); 2037 if (incompat) 2038 continue; 2039 } 2040 2041 if (first_candidate) { 2042 /* 2043 * This is first valid address in the list. 2044 * It is automatically the best candidate 2045 * so far. 2046 */ 2047 best_c.cand_ipif = ipif; 2048 first_candidate = B_FALSE; 2049 continue; 2050 } 2051 2052 bzero(&curr_c, sizeof (cand_t)); 2053 curr_c.cand_ipif = ipif; 2054 2055 /* 2056 * Compare this current candidate (curr_c) with the 2057 * best candidate (best_c) by applying the 2058 * comparison rules in order until one breaks the 2059 * tie. 2060 */ 2061 for (index = 0; rules[index] != NULL; index++) { 2062 /* Apply a comparison rule. */ 2063 rule_result = (rules[index])(&best_c, &curr_c, 2064 &dstinfo, ipst); 2065 if (rule_result == CAND_AVOID) { 2066 /* 2067 * The best candidate is still the 2068 * best candidate. Forget about 2069 * this current candidate and go on 2070 * to the next one. 2071 */ 2072 break; 2073 } else if (rule_result == CAND_PREFER) { 2074 /* 2075 * This candidate is prefered. It 2076 * becomes the best candidate so 2077 * far. Go on to the next address. 2078 */ 2079 best_c = curr_c; 2080 break; 2081 } 2082 /* We have a tie, apply the next rule. */ 2083 } 2084 2085 /* 2086 * The last rule must be a tie breaker rule and 2087 * must never produce a tie. At this point, the 2088 * candidate should have either been rejected, or 2089 * have been prefered as the best candidate so far. 2090 */ 2091 ASSERT(rule_result != CAND_TIE); 2092 } while ((ipif = next_ipif) != start_ipif); 2093 2094 /* 2095 * For IPMP, update the source ipif rotor to the next ipif, 2096 * provided we can look it up. (We must not use it if it's 2097 * IPIF_CONDEMNED since we may have grabbed ill_g_lock after 2098 * ipif_free() checked ill_src_ipif.) 2099 */ 2100 if (IS_IPMP(ill) && ipif != NULL) { 2101 mutex_enter(&ipif->ipif_ill->ill_lock); 2102 next_ipif = ipif->ipif_next; 2103 if (next_ipif != NULL && !IPIF_IS_CONDEMNED(next_ipif)) 2104 ill->ill_src_ipif = next_ipif; 2105 else 2106 ill->ill_src_ipif = NULL; 2107 mutex_exit(&ipif->ipif_ill->ill_lock); 2108 } 2109 2110 /* 2111 * Only one ill to consider if dst_restrict_ill is set. 2112 */ 2113 if (dstinfo.dst_restrict_ill) 2114 break; 2115 } 2116 2117 ipif = best_c.cand_ipif; 2118 ip1dbg(("ipif_select_source_v6(%s, %s) -> %s\n", 2119 dstinfo.dst_ill->ill_name, 2120 inet_ntop(AF_INET6, dstinfo.dst_addr, dstr, sizeof (dstr)), 2121 (ipif == NULL ? "NULL" : 2122 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, sstr, sizeof (sstr))))); 2123 2124 if (usesrc_ill != NULL) 2125 ill_refrele(usesrc_ill); 2126 2127 if (ipmp_ill != NULL) 2128 ill_refrele(ipmp_ill); 2129 2130 if (dst_rhtp != NULL) 2131 TPC_RELE(dst_rhtp); 2132 2133 if (ipif == NULL) { 2134 rw_exit(&ipst->ips_ill_g_lock); 2135 return (NULL); 2136 } 2137 2138 mutex_enter(&ipif->ipif_ill->ill_lock); 2139 if (!IPIF_IS_CONDEMNED(ipif)) { 2140 ipif_refhold_locked(ipif); 2141 mutex_exit(&ipif->ipif_ill->ill_lock); 2142 rw_exit(&ipst->ips_ill_g_lock); 2143 return (ipif); 2144 } 2145 mutex_exit(&ipif->ipif_ill->ill_lock); 2146 rw_exit(&ipst->ips_ill_g_lock); 2147 ip1dbg(("ipif_select_source_v6 cannot lookup ipif %p" 2148 " returning null \n", (void *)ipif)); 2149 2150 return (NULL); 2151 } 2152 2153 /* 2154 * Pick a source address based on the destination ill and an optional setsrc 2155 * address. 2156 * The result is stored in srcp. If generation is set, then put the source 2157 * generation number there before we look for the source address (to avoid 2158 * missing changes in the set of source addresses. 2159 * If flagsp is set, then us it to pass back ipif_flags. 2160 * 2161 * If the caller wants to cache the returned source address and detect when 2162 * that might be stale, the caller should pass in a generation argument, 2163 * which the caller can later compare against ips_src_generation 2164 * 2165 * The precedence order for selecting an IPv6 source address is: 2166 * - RTF_SETSRC on the first ire in the recursive lookup always wins. 2167 * - If usrsrc is set, swap the ill to be the usesrc one. 2168 * - If IPMP is used on the ill, select a random address from the most 2169 * preferred ones below: 2170 * That is followed by the long list of IPv6 source address selection rules 2171 * starting with rule_isdst(), rule_scope(), etc. 2172 * 2173 * We have lower preference for ALL_ZONES IP addresses, 2174 * as they pose problems with unlabeled destinations. 2175 * 2176 * Note that when multiple IP addresses match e.g., with rule_scope() we pick 2177 * the first one if IPMP is not in use. With IPMP we randomize. 2178 */ 2179 int 2180 ip_select_source_v6(ill_t *ill, const in6_addr_t *setsrc, const in6_addr_t *dst, 2181 zoneid_t zoneid, ip_stack_t *ipst, uint_t restrict_ill, uint32_t src_prefs, 2182 in6_addr_t *srcp, uint32_t *generation, uint64_t *flagsp) 2183 { 2184 ipif_t *ipif; 2185 boolean_t notready = B_FALSE; /* Set if !ipif_addr_ready found */ 2186 2187 if (flagsp != NULL) 2188 *flagsp = 0; 2189 2190 /* 2191 * Need to grab the generation number before we check to 2192 * avoid a race with a change to the set of local addresses. 2193 * No lock needed since the thread which updates the set of local 2194 * addresses use ipif/ill locks and exit those (hence a store memory 2195 * barrier) before doing the atomic increase of ips_src_generation. 2196 */ 2197 if (generation != NULL) { 2198 *generation = ipst->ips_src_generation; 2199 } 2200 2201 /* Was RTF_SETSRC set on the first IRE in the recursive lookup? */ 2202 if (setsrc != NULL && !IN6_IS_ADDR_UNSPECIFIED(setsrc)) { 2203 *srcp = *setsrc; 2204 return (0); 2205 } 2206 2207 ipif = ipif_select_source_v6(ill, dst, restrict_ill, src_prefs, zoneid, 2208 B_TRUE, ¬ready); 2209 if (ipif == NULL) { 2210 if (notready) 2211 return (ENETDOWN); 2212 else 2213 return (EADDRNOTAVAIL); 2214 } 2215 *srcp = ipif->ipif_v6lcl_addr; 2216 if (flagsp != NULL) 2217 *flagsp = ipif->ipif_flags; 2218 ipif_refrele(ipif); 2219 return (0); 2220 } 2221 2222 /* 2223 * Perform an attach and bind to get phys addr plus info_req for 2224 * the physical device. 2225 * q and mp represents an ioctl which will be queued waiting for 2226 * completion of the DLPI message exchange. 2227 * MUST be called on an ill queue. 2228 * 2229 * Returns EINPROGRESS when mp has been consumed by queueing it. 2230 * The ioctl will complete in ip_rput. 2231 */ 2232 int 2233 ill_dl_phys(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q) 2234 { 2235 mblk_t *v6token_mp = NULL; 2236 mblk_t *v6lla_mp = NULL; 2237 mblk_t *dest_mp = NULL; 2238 mblk_t *phys_mp = NULL; 2239 mblk_t *info_mp = NULL; 2240 mblk_t *attach_mp = NULL; 2241 mblk_t *bind_mp = NULL; 2242 mblk_t *unbind_mp = NULL; 2243 mblk_t *notify_mp = NULL; 2244 mblk_t *capab_mp = NULL; 2245 2246 ip1dbg(("ill_dl_phys(%s:%u)\n", ill->ill_name, ipif->ipif_id)); 2247 ASSERT(ill->ill_dlpi_style_set); 2248 ASSERT(WR(q)->q_next != NULL); 2249 2250 if (ill->ill_isv6) { 2251 v6token_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2252 sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2253 if (v6token_mp == NULL) 2254 goto bad; 2255 ((dl_phys_addr_req_t *)v6token_mp->b_rptr)->dl_addr_type = 2256 DL_IPV6_TOKEN; 2257 2258 v6lla_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2259 sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2260 if (v6lla_mp == NULL) 2261 goto bad; 2262 ((dl_phys_addr_req_t *)v6lla_mp->b_rptr)->dl_addr_type = 2263 DL_IPV6_LINK_LAYER_ADDR; 2264 } 2265 2266 if (ill->ill_mactype == DL_IPV4 || ill->ill_mactype == DL_IPV6) { 2267 dest_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2268 sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2269 if (dest_mp == NULL) 2270 goto bad; 2271 ((dl_phys_addr_req_t *)dest_mp->b_rptr)->dl_addr_type = 2272 DL_CURR_DEST_ADDR; 2273 } 2274 2275 /* 2276 * Allocate a DL_NOTIFY_REQ and set the notifications we want. 2277 */ 2278 notify_mp = ip_dlpi_alloc(sizeof (dl_notify_req_t) + sizeof (long), 2279 DL_NOTIFY_REQ); 2280 if (notify_mp == NULL) 2281 goto bad; 2282 ((dl_notify_req_t *)notify_mp->b_rptr)->dl_notifications = 2283 (DL_NOTE_PHYS_ADDR | DL_NOTE_SDU_SIZE | DL_NOTE_FASTPATH_FLUSH | 2284 DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN | DL_NOTE_CAPAB_RENEG | 2285 DL_NOTE_PROMISC_ON_PHYS | DL_NOTE_PROMISC_OFF_PHYS | 2286 DL_NOTE_REPLUMB); 2287 2288 phys_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2289 sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2290 if (phys_mp == NULL) 2291 goto bad; 2292 ((dl_phys_addr_req_t *)phys_mp->b_rptr)->dl_addr_type = 2293 DL_CURR_PHYS_ADDR; 2294 2295 info_mp = ip_dlpi_alloc( 2296 sizeof (dl_info_req_t) + sizeof (dl_info_ack_t), 2297 DL_INFO_REQ); 2298 if (info_mp == NULL) 2299 goto bad; 2300 2301 ASSERT(ill->ill_dlpi_capab_state == IDCS_UNKNOWN); 2302 capab_mp = ip_dlpi_alloc(sizeof (dl_capability_req_t), 2303 DL_CAPABILITY_REQ); 2304 if (capab_mp == NULL) 2305 goto bad; 2306 2307 bind_mp = ip_dlpi_alloc(sizeof (dl_bind_req_t) + sizeof (long), 2308 DL_BIND_REQ); 2309 if (bind_mp == NULL) 2310 goto bad; 2311 ((dl_bind_req_t *)bind_mp->b_rptr)->dl_sap = ill->ill_sap; 2312 ((dl_bind_req_t *)bind_mp->b_rptr)->dl_service_mode = DL_CLDLS; 2313 2314 unbind_mp = ip_dlpi_alloc(sizeof (dl_unbind_req_t), DL_UNBIND_REQ); 2315 if (unbind_mp == NULL) 2316 goto bad; 2317 2318 /* If we need to attach, pre-alloc and initialize the mblk */ 2319 if (ill->ill_needs_attach) { 2320 attach_mp = ip_dlpi_alloc(sizeof (dl_attach_req_t), 2321 DL_ATTACH_REQ); 2322 if (attach_mp == NULL) 2323 goto bad; 2324 ((dl_attach_req_t *)attach_mp->b_rptr)->dl_ppa = ill->ill_ppa; 2325 } 2326 2327 /* 2328 * Here we are going to delay the ioctl ack until after 2329 * ACKs from DL_PHYS_ADDR_REQ. So need to save the 2330 * original ioctl message before sending the requests 2331 */ 2332 mutex_enter(&ill->ill_lock); 2333 /* ipsq_pending_mp_add won't fail since we pass in a NULL connp */ 2334 (void) ipsq_pending_mp_add(NULL, ipif, ill->ill_wq, mp, 0); 2335 /* 2336 * Set ill_phys_addr_pend to zero. It will be set to the addr_type of 2337 * the DL_PHYS_ADDR_REQ in ill_dlpi_send() and ill_dlpi_done(). It will 2338 * be used to track which DL_PHYS_ADDR_REQ is being ACK'd/NAK'd. 2339 */ 2340 ill->ill_phys_addr_pend = 0; 2341 mutex_exit(&ill->ill_lock); 2342 2343 if (attach_mp != NULL) { 2344 ip1dbg(("ill_dl_phys: attach\n")); 2345 ill_dlpi_send(ill, attach_mp); 2346 } 2347 ill_dlpi_send(ill, bind_mp); 2348 ill_dlpi_send(ill, info_mp); 2349 2350 /* 2351 * Send the capability request to get the VRRP capability information. 2352 */ 2353 ill_capability_send(ill, capab_mp); 2354 2355 if (v6token_mp != NULL) 2356 ill_dlpi_send(ill, v6token_mp); 2357 if (v6lla_mp != NULL) 2358 ill_dlpi_send(ill, v6lla_mp); 2359 if (dest_mp != NULL) 2360 ill_dlpi_send(ill, dest_mp); 2361 ill_dlpi_send(ill, phys_mp); 2362 ill_dlpi_send(ill, notify_mp); 2363 ill_dlpi_send(ill, unbind_mp); 2364 2365 /* 2366 * This operation will complete in ip_rput_dlpi_writer with either 2367 * a DL_PHYS_ADDR_ACK or DL_ERROR_ACK. 2368 */ 2369 return (EINPROGRESS); 2370 bad: 2371 freemsg(v6token_mp); 2372 freemsg(v6lla_mp); 2373 freemsg(dest_mp); 2374 freemsg(phys_mp); 2375 freemsg(info_mp); 2376 freemsg(attach_mp); 2377 freemsg(bind_mp); 2378 freemsg(capab_mp); 2379 freemsg(unbind_mp); 2380 freemsg(notify_mp); 2381 return (ENOMEM); 2382 } 2383 2384 /* Add room for tcp+ip headers */ 2385 uint_t ip_loopback_mtu_v6plus = IP_LOOPBACK_MTU + IPV6_HDR_LEN + 20; 2386 2387 /* 2388 * DLPI is up. 2389 * Create all the IREs associated with an interface bring up multicast. 2390 * Set the interface flag and finish other initialization 2391 * that potentially had to be differed to after DL_BIND_ACK. 2392 */ 2393 int 2394 ipif_up_done_v6(ipif_t *ipif) 2395 { 2396 ill_t *ill = ipif->ipif_ill; 2397 int err; 2398 boolean_t loopback = B_FALSE; 2399 2400 ip1dbg(("ipif_up_done_v6(%s:%u)\n", 2401 ipif->ipif_ill->ill_name, ipif->ipif_id)); 2402 DTRACE_PROBE3(ipif__downup, char *, "ipif_up_done_v6", 2403 ill_t *, ill, ipif_t *, ipif); 2404 2405 /* Check if this is a loopback interface */ 2406 if (ipif->ipif_ill->ill_wq == NULL) 2407 loopback = B_TRUE; 2408 2409 ASSERT(ipif->ipif_isv6); 2410 ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock)); 2411 2412 if (IS_LOOPBACK(ill) || ill->ill_net_type == IRE_IF_NORESOLVER) { 2413 nce_t *loop_nce = NULL; 2414 uint16_t flags = (NCE_F_MYADDR | NCE_F_NONUD | NCE_F_AUTHORITY); 2415 2416 /* 2417 * lo0:1 and subsequent ipifs were marked IRE_LOCAL in 2418 * ipif_lookup_on_name(), but in the case of zones we can have 2419 * several loopback addresses on lo0. So all the interfaces with 2420 * loopback addresses need to be marked IRE_LOOPBACK. 2421 */ 2422 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, &ipv6_loopback)) 2423 ipif->ipif_ire_type = IRE_LOOPBACK; 2424 else 2425 ipif->ipif_ire_type = IRE_LOCAL; 2426 if (ill->ill_net_type != IRE_LOOPBACK) 2427 flags |= NCE_F_PUBLISH; 2428 err = nce_lookup_then_add_v6(ill, NULL, 2429 ill->ill_phys_addr_length, 2430 &ipif->ipif_v6lcl_addr, flags, ND_REACHABLE, &loop_nce); 2431 2432 /* A shared-IP zone sees EEXIST for lo0:N */ 2433 if (err == 0 || err == EEXIST) { 2434 ipif->ipif_added_nce = 1; 2435 loop_nce->nce_ipif_cnt++; 2436 nce_refrele(loop_nce); 2437 err = 0; 2438 } else { 2439 ASSERT(loop_nce == NULL); 2440 return (err); 2441 } 2442 } 2443 2444 err = ipif_add_ires_v6(ipif, loopback); 2445 if (err != 0) { 2446 /* 2447 * See comments about return value from 2448 * ipif_addr_availability_check() in ipif_add_ires_v6(). 2449 */ 2450 if (err != EADDRINUSE) { 2451 ipif_ndp_down(ipif); 2452 } else { 2453 /* 2454 * Make IPMP aware of the deleted ipif so that 2455 * the needed ipmp cleanup (e.g., of ipif_bound_ill) 2456 * can be completed. Note that we do not want to 2457 * destroy the nce that was created on the ipmp_ill 2458 * for the active copy of the duplicate address in 2459 * use. 2460 */ 2461 if (IS_IPMP(ill)) 2462 ipmp_illgrp_del_ipif(ill->ill_grp, ipif); 2463 err = EADDRNOTAVAIL; 2464 } 2465 return (err); 2466 } 2467 2468 if (ill->ill_ipif_up_count == 1 && !loopback) { 2469 /* Recover any additional IREs entries for this ill */ 2470 (void) ill_recover_saved_ire(ill); 2471 } 2472 2473 if (ill->ill_need_recover_multicast) { 2474 /* 2475 * Need to recover all multicast memberships in the driver. 2476 * This had to be deferred until we had attached. 2477 */ 2478 ill_recover_multicast(ill); 2479 } 2480 2481 if (ill->ill_ipif_up_count == 1) { 2482 /* 2483 * Since the interface is now up, it may now be active. 2484 */ 2485 if (IS_UNDER_IPMP(ill)) 2486 ipmp_ill_refresh_active(ill); 2487 } 2488 2489 /* Join the allhosts multicast address and the solicited node MC */ 2490 ipif_multicast_up(ipif); 2491 2492 /* Perhaps ilgs should use this ill */ 2493 update_conn_ill(NULL, ill->ill_ipst); 2494 2495 if (ipif->ipif_addr_ready) 2496 ipif_up_notify(ipif); 2497 2498 return (0); 2499 } 2500 2501 /* 2502 * Add the IREs associated with the ipif. 2503 * Those MUST be explicitly removed in ipif_delete_ires_v6. 2504 */ 2505 static int 2506 ipif_add_ires_v6(ipif_t *ipif, boolean_t loopback) 2507 { 2508 ill_t *ill = ipif->ipif_ill; 2509 ip_stack_t *ipst = ill->ill_ipst; 2510 in6_addr_t v6addr; 2511 in6_addr_t route_mask; 2512 int err; 2513 char buf[INET6_ADDRSTRLEN]; 2514 ire_t *ire_local = NULL; /* LOCAL or LOOPBACK */ 2515 ire_t *ire_if = NULL; 2516 2517 if (!IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr) && 2518 !(ipif->ipif_flags & IPIF_NOLOCAL)) { 2519 2520 /* 2521 * If we're on a labeled system then make sure that zone- 2522 * private addresses have proper remote host database entries. 2523 */ 2524 if (is_system_labeled() && 2525 ipif->ipif_ire_type != IRE_LOOPBACK) { 2526 if (ip6opt_ls == 0) { 2527 cmn_err(CE_WARN, "IPv6 not enabled " 2528 "via /etc/system"); 2529 return (EINVAL); 2530 } 2531 if (!tsol_check_interface_address(ipif)) 2532 return (EINVAL); 2533 } 2534 2535 /* Register the source address for __sin6_src_id */ 2536 err = ip_srcid_insert(&ipif->ipif_v6lcl_addr, 2537 ipif->ipif_zoneid, ipst); 2538 if (err != 0) { 2539 ip0dbg(("ipif_add_ires_v6: srcid_insert %d\n", err)); 2540 return (err); 2541 } 2542 /* 2543 * If the interface address is set, create the LOCAL 2544 * or LOOPBACK IRE. 2545 */ 2546 ip1dbg(("ipif_add_ires_v6: creating IRE %d for %s\n", 2547 ipif->ipif_ire_type, 2548 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 2549 buf, sizeof (buf)))); 2550 2551 ire_local = ire_create_v6( 2552 &ipif->ipif_v6lcl_addr, /* dest address */ 2553 &ipv6_all_ones, /* mask */ 2554 NULL, /* no gateway */ 2555 ipif->ipif_ire_type, /* LOCAL or LOOPBACK */ 2556 ipif->ipif_ill, /* interface */ 2557 ipif->ipif_zoneid, 2558 ((ipif->ipif_flags & IPIF_PRIVATE) ? 2559 RTF_PRIVATE : 0) | RTF_KERNEL, 2560 NULL, 2561 ipst); 2562 if (ire_local == NULL) { 2563 ip1dbg(("ipif_up_done_v6: NULL ire_local\n")); 2564 err = ENOMEM; 2565 goto bad; 2566 } 2567 } 2568 2569 /* Set up the IRE_IF_RESOLVER or IRE_IF_NORESOLVER, as appropriate. */ 2570 if (!loopback && !(ipif->ipif_flags & IPIF_NOXMIT) && 2571 !(IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6subnet) && 2572 IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6net_mask))) { 2573 /* ipif_v6subnet is ipif_v6pp_dst_addr for pt-pt */ 2574 v6addr = ipif->ipif_v6subnet; 2575 2576 if (ipif->ipif_flags & IPIF_POINTOPOINT) { 2577 route_mask = ipv6_all_ones; 2578 } else { 2579 route_mask = ipif->ipif_v6net_mask; 2580 } 2581 2582 ip1dbg(("ipif_add_ires_v6: creating if IRE %d for %s\n", 2583 ill->ill_net_type, 2584 inet_ntop(AF_INET6, &v6addr, buf, sizeof (buf)))); 2585 2586 ire_if = ire_create_v6( 2587 &v6addr, /* dest pref */ 2588 &route_mask, /* mask */ 2589 &ipif->ipif_v6lcl_addr, /* gateway */ 2590 ill->ill_net_type, /* IF_[NO]RESOLVER */ 2591 ipif->ipif_ill, 2592 ipif->ipif_zoneid, 2593 ((ipif->ipif_flags & IPIF_PRIVATE) ? 2594 RTF_PRIVATE : 0) | RTF_KERNEL, 2595 NULL, 2596 ipst); 2597 if (ire_if == NULL) { 2598 ip1dbg(("ipif_up_done: NULL ire_if\n")); 2599 err = ENOMEM; 2600 goto bad; 2601 } 2602 } 2603 2604 /* 2605 * Need to atomically check for IP address availability under 2606 * ip_addr_avail_lock. ill_g_lock is held as reader to ensure no new 2607 * ills or new ipifs can be added while we are checking availability. 2608 */ 2609 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2610 mutex_enter(&ipst->ips_ip_addr_avail_lock); 2611 ill->ill_ipif_up_count++; 2612 ipif->ipif_flags |= IPIF_UP; 2613 err = ip_addr_availability_check(ipif); 2614 mutex_exit(&ipst->ips_ip_addr_avail_lock); 2615 rw_exit(&ipst->ips_ill_g_lock); 2616 2617 if (err != 0) { 2618 /* 2619 * Our address may already be up on the same ill. In this case, 2620 * the external resolver entry for our ipif replaced the one for 2621 * the other ipif. So we don't want to delete it (otherwise the 2622 * other ipif would be unable to send packets). 2623 * ip_addr_availability_check() identifies this case for us and 2624 * returns EADDRINUSE; Caller must turn it into EADDRNOTAVAIL 2625 * which is the expected error code. 2626 * 2627 * Note that ipif_ndp_down() will only delete the nce in the 2628 * case when the nce_ipif_cnt drops to 0. 2629 */ 2630 ill->ill_ipif_up_count--; 2631 ipif->ipif_flags &= ~IPIF_UP; 2632 goto bad; 2633 } 2634 2635 /* 2636 * Add in all newly created IREs. 2637 * We add the IRE_INTERFACE before the IRE_LOCAL to ensure 2638 * that lookups find the IRE_LOCAL even if the IRE_INTERFACE is 2639 * a /128 route. 2640 */ 2641 if (ire_if != NULL) { 2642 ire_if = ire_add(ire_if); 2643 if (ire_if == NULL) { 2644 err = ENOMEM; 2645 goto bad2; 2646 } 2647 #ifdef DEBUG 2648 ire_refhold_notr(ire_if); 2649 ire_refrele(ire_if); 2650 #endif 2651 } 2652 if (ire_local != NULL) { 2653 ire_local = ire_add(ire_local); 2654 if (ire_local == NULL) { 2655 err = ENOMEM; 2656 goto bad2; 2657 } 2658 #ifdef DEBUG 2659 ire_refhold_notr(ire_local); 2660 ire_refrele(ire_local); 2661 #endif 2662 } 2663 rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); 2664 if (ire_local != NULL) 2665 ipif->ipif_ire_local = ire_local; 2666 if (ire_if != NULL) 2667 ipif->ipif_ire_if = ire_if; 2668 rw_exit(&ipst->ips_ill_g_lock); 2669 ire_local = NULL; 2670 ire_if = NULL; 2671 2672 if (ipif->ipif_addr_ready) 2673 ipif_up_notify(ipif); 2674 return (0); 2675 2676 bad2: 2677 ill->ill_ipif_up_count--; 2678 ipif->ipif_flags &= ~IPIF_UP; 2679 2680 bad: 2681 if (ire_local != NULL) 2682 ire_delete(ire_local); 2683 if (ire_if != NULL) 2684 ire_delete(ire_if); 2685 2686 rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); 2687 ire_local = ipif->ipif_ire_local; 2688 ipif->ipif_ire_local = NULL; 2689 ire_if = ipif->ipif_ire_if; 2690 ipif->ipif_ire_if = NULL; 2691 rw_exit(&ipst->ips_ill_g_lock); 2692 if (ire_local != NULL) { 2693 ire_delete(ire_local); 2694 ire_refrele_notr(ire_local); 2695 } 2696 if (ire_if != NULL) { 2697 ire_delete(ire_if); 2698 ire_refrele_notr(ire_if); 2699 } 2700 (void) ip_srcid_remove(&ipif->ipif_v6lcl_addr, ipif->ipif_zoneid, ipst); 2701 2702 return (err); 2703 } 2704 2705 /* Remove all the IREs created by ipif_add_ires_v6 */ 2706 void 2707 ipif_delete_ires_v6(ipif_t *ipif) 2708 { 2709 ill_t *ill = ipif->ipif_ill; 2710 ip_stack_t *ipst = ill->ill_ipst; 2711 ire_t *ire; 2712 2713 rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); 2714 ire = ipif->ipif_ire_local; 2715 ipif->ipif_ire_local = NULL; 2716 rw_exit(&ipst->ips_ill_g_lock); 2717 if (ire != NULL) { 2718 /* 2719 * Move count to ipif so we don't loose the count due to 2720 * a down/up dance. 2721 */ 2722 atomic_add_32(&ipif->ipif_ib_pkt_count, ire->ire_ib_pkt_count); 2723 2724 ire_delete(ire); 2725 ire_refrele_notr(ire); 2726 } 2727 rw_enter(&ipst->ips_ill_g_lock, RW_WRITER); 2728 ire = ipif->ipif_ire_if; 2729 ipif->ipif_ire_if = NULL; 2730 rw_exit(&ipst->ips_ill_g_lock); 2731 if (ire != NULL) { 2732 ire_delete(ire); 2733 ire_refrele_notr(ire); 2734 } 2735 } 2736 2737 /* 2738 * Delete an ND entry if it exists. 2739 */ 2740 /* ARGSUSED */ 2741 int 2742 ip_siocdelndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2743 ip_ioctl_cmd_t *ipip, void *dummy_ifreq) 2744 { 2745 sin6_t *sin6; 2746 struct lifreq *lifr; 2747 lif_nd_req_t *lnr; 2748 ill_t *ill = ipif->ipif_ill; 2749 nce_t *nce; 2750 2751 lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr; 2752 lnr = &lifr->lifr_nd; 2753 /* Only allow for logical unit zero i.e. not on "le0:17" */ 2754 if (ipif->ipif_id != 0) 2755 return (EINVAL); 2756 2757 if (!ipif->ipif_isv6) 2758 return (EINVAL); 2759 2760 if (lnr->lnr_addr.ss_family != AF_INET6) 2761 return (EAFNOSUPPORT); 2762 2763 sin6 = (sin6_t *)&lnr->lnr_addr; 2764 2765 /* 2766 * Since ND mappings must be consistent across an IPMP group, prohibit 2767 * deleting ND mappings on underlying interfaces. 2768 * Don't allow deletion of mappings for local addresses. 2769 */ 2770 if (IS_UNDER_IPMP(ill)) 2771 return (EPERM); 2772 2773 nce = nce_lookup_v6(ill, &sin6->sin6_addr); 2774 if (nce == NULL) 2775 return (ESRCH); 2776 2777 if (NCE_MYADDR(nce->nce_common)) { 2778 nce_refrele(nce); 2779 return (EPERM); 2780 } 2781 2782 /* 2783 * delete the nce_common which will also delete the nces on any 2784 * under_ill in the case of ipmp. 2785 */ 2786 ncec_delete(nce->nce_common); 2787 nce_refrele(nce); 2788 return (0); 2789 } 2790 2791 /* 2792 * Return nbr cache info. 2793 */ 2794 /* ARGSUSED */ 2795 int 2796 ip_siocqueryndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2797 ip_ioctl_cmd_t *ipip, void *dummy_ifreq) 2798 { 2799 ill_t *ill = ipif->ipif_ill; 2800 struct lifreq *lifr; 2801 lif_nd_req_t *lnr; 2802 2803 lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr; 2804 lnr = &lifr->lifr_nd; 2805 /* Only allow for logical unit zero i.e. not on "le0:17" */ 2806 if (ipif->ipif_id != 0) 2807 return (EINVAL); 2808 2809 if (!ipif->ipif_isv6) 2810 return (EINVAL); 2811 2812 if (lnr->lnr_addr.ss_family != AF_INET6) 2813 return (EAFNOSUPPORT); 2814 2815 if (ill->ill_phys_addr_length > sizeof (lnr->lnr_hdw_addr)) 2816 return (EINVAL); 2817 2818 return (ndp_query(ill, lnr)); 2819 } 2820 2821 /* 2822 * Perform an update of the nd entry for the specified address. 2823 */ 2824 /* ARGSUSED */ 2825 int 2826 ip_siocsetndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2827 ip_ioctl_cmd_t *ipip, void *dummy_ifreq) 2828 { 2829 sin6_t *sin6; 2830 ill_t *ill = ipif->ipif_ill; 2831 struct lifreq *lifr; 2832 lif_nd_req_t *lnr; 2833 ire_t *ire; 2834 2835 lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr; 2836 lnr = &lifr->lifr_nd; 2837 /* Only allow for logical unit zero i.e. not on "le0:17" */ 2838 if (ipif->ipif_id != 0) 2839 return (EINVAL); 2840 2841 if (!ipif->ipif_isv6) 2842 return (EINVAL); 2843 2844 if (lnr->lnr_addr.ss_family != AF_INET6) 2845 return (EAFNOSUPPORT); 2846 2847 sin6 = (sin6_t *)&lnr->lnr_addr; 2848 2849 /* 2850 * Since ND mappings must be consistent across an IPMP group, prohibit 2851 * updating ND mappings on underlying interfaces. Also, since ND 2852 * mappings for IPMP data addresses are owned by IP itself, prohibit 2853 * updating them. 2854 */ 2855 if (IS_UNDER_IPMP(ill)) 2856 return (EPERM); 2857 2858 if (IS_IPMP(ill)) { 2859 ire = ire_ftable_lookup_v6(&sin6->sin6_addr, NULL, NULL, 2860 IRE_LOCAL, ill, ALL_ZONES, NULL, 2861 MATCH_IRE_TYPE | MATCH_IRE_ILL, 0, ill->ill_ipst, NULL); 2862 if (ire != NULL) { 2863 ire_refrele(ire); 2864 return (EPERM); 2865 } 2866 } 2867 2868 return (ndp_sioc_update(ill, lnr)); 2869 } 2870