1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 /* 30 * This file contains the interface control functions for IPv6. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/sysmacros.h> 35 #include <sys/stream.h> 36 #include <sys/dlpi.h> 37 #include <sys/stropts.h> 38 #include <sys/ddi.h> 39 #include <sys/cmn_err.h> 40 #include <sys/kstat.h> 41 #include <sys/debug.h> 42 #include <sys/zone.h> 43 #include <sys/policy.h> 44 45 #include <sys/systm.h> 46 #include <sys/param.h> 47 #include <sys/socket.h> 48 #include <sys/isa_defs.h> 49 #include <net/if.h> 50 #include <net/if_dl.h> 51 #include <net/route.h> 52 #include <netinet/in.h> 53 #include <netinet/igmp_var.h> 54 #include <netinet/ip6.h> 55 #include <netinet/icmp6.h> 56 #include <netinet/in.h> 57 58 #include <inet/common.h> 59 #include <inet/nd.h> 60 #include <inet/mib2.h> 61 #include <inet/ip.h> 62 #include <inet/ip6.h> 63 #include <inet/ip_multi.h> 64 #include <inet/ip_ire.h> 65 #include <inet/ip_rts.h> 66 #include <inet/ip_ndp.h> 67 #include <inet/ip_if.h> 68 #include <inet/ip6_asp.h> 69 #include <inet/tun.h> 70 #include <inet/ipclassifier.h> 71 #include <inet/sctp_ip.h> 72 73 #include <sys/tsol/tndb.h> 74 #include <sys/tsol/tnet.h> 75 76 static in6_addr_t ipv6_ll_template = 77 {(uint32_t)V6_LINKLOCAL, 0x0, 0x0, 0x0}; 78 79 static ipif_t * 80 ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst, 81 queue_t *q, mblk_t *mp, ipsq_func_t func, int *error, ip_stack_t *ipst); 82 83 /* 84 * These two functions, ipif_lookup_group_v6() and ill_lookup_group_v6(), 85 * are called when an application does not specify an interface to be 86 * used for multicast traffic. It calls ire_lookup_multi_v6() to look 87 * for an interface route for the specified multicast group. Doing 88 * this allows the administrator to add prefix routes for multicast to 89 * indicate which interface to be used for multicast traffic in the above 90 * scenario. The route could be for all multicast (ff00::/8), for a single 91 * multicast group (a /128 route) or anything in between. If there is no 92 * such multicast route, we just find any multicast capable interface and 93 * return it. 94 */ 95 ipif_t * 96 ipif_lookup_group_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst) 97 { 98 ire_t *ire; 99 ipif_t *ipif; 100 101 ire = ire_lookup_multi_v6(group, zoneid, ipst); 102 if (ire != NULL) { 103 ipif = ire->ire_ipif; 104 ipif_refhold(ipif); 105 ire_refrele(ire); 106 return (ipif); 107 } 108 109 return (ipif_lookup_multicast(ipst, zoneid, B_TRUE)); 110 } 111 112 ill_t * 113 ill_lookup_group_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst) 114 { 115 ire_t *ire; 116 ill_t *ill; 117 ipif_t *ipif; 118 119 ire = ire_lookup_multi_v6(group, zoneid, ipst); 120 if (ire != NULL) { 121 ill = ire->ire_ipif->ipif_ill; 122 ill_refhold(ill); 123 ire_refrele(ire); 124 return (ill); 125 } 126 127 ipif = ipif_lookup_multicast(ipst, zoneid, B_TRUE); 128 if (ipif == NULL) 129 return (NULL); 130 131 ill = ipif->ipif_ill; 132 ill_refhold(ill); 133 ipif_refrele(ipif); 134 return (ill); 135 } 136 137 /* 138 * Look for an ipif with the specified interface address and destination. 139 * The destination address is used only for matching point-to-point interfaces. 140 */ 141 static ipif_t * 142 ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst, 143 queue_t *q, mblk_t *mp, ipsq_func_t func, int *error, ip_stack_t *ipst) 144 { 145 ipif_t *ipif; 146 ill_t *ill; 147 ipsq_t *ipsq; 148 ill_walk_context_t ctx; 149 150 if (error != NULL) 151 *error = 0; 152 153 /* 154 * First match all the point-to-point interfaces 155 * before looking at non-point-to-point interfaces. 156 * This is done to avoid returning non-point-to-point 157 * ipif instead of unnumbered point-to-point ipif. 158 */ 159 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 160 ill = ILL_START_WALK_V6(&ctx, ipst); 161 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 162 GRAB_CONN_LOCK(q); 163 mutex_enter(&ill->ill_lock); 164 for (ipif = ill->ill_ipif; ipif != NULL; 165 ipif = ipif->ipif_next) { 166 /* Allow the ipif to be down */ 167 if ((ipif->ipif_flags & IPIF_POINTOPOINT) && 168 (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 169 if_addr)) && 170 (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr, 171 dst))) { 172 if (IPIF_CAN_LOOKUP(ipif)) { 173 ipif_refhold_locked(ipif); 174 mutex_exit(&ill->ill_lock); 175 RELEASE_CONN_LOCK(q); 176 rw_exit(&ipst->ips_ill_g_lock); 177 return (ipif); 178 } else if (IPIF_CAN_WAIT(ipif, q)) { 179 ipsq = ill->ill_phyint->phyint_ipsq; 180 mutex_enter(&ipsq->ipsq_lock); 181 mutex_exit(&ill->ill_lock); 182 rw_exit(&ipst->ips_ill_g_lock); 183 ipsq_enq(ipsq, q, mp, func, NEW_OP, 184 ill); 185 mutex_exit(&ipsq->ipsq_lock); 186 RELEASE_CONN_LOCK(q); 187 if (error != NULL) 188 *error = EINPROGRESS; 189 return (NULL); 190 } 191 } 192 } 193 mutex_exit(&ill->ill_lock); 194 RELEASE_CONN_LOCK(q); 195 } 196 rw_exit(&ipst->ips_ill_g_lock); 197 /* lookup the ipif based on interface address */ 198 ipif = ipif_lookup_addr_v6(if_addr, NULL, ALL_ZONES, q, mp, func, 199 error, ipst); 200 ASSERT(ipif == NULL || ipif->ipif_isv6); 201 return (ipif); 202 } 203 204 /* 205 * Look for an ipif with the specified address. For point-point links 206 * we look for matches on either the destination address and the local 207 * address, but we ignore the check on the local address if IPIF_UNNUMBERED 208 * is set. 209 * Matches on a specific ill if match_ill is set. 210 */ 211 /* ARGSUSED */ 212 ipif_t * 213 ipif_lookup_addr_v6(const in6_addr_t *addr, ill_t *match_ill, zoneid_t zoneid, 214 queue_t *q, mblk_t *mp, ipsq_func_t func, int *error, ip_stack_t *ipst) 215 { 216 ipif_t *ipif; 217 ill_t *ill; 218 boolean_t ptp = B_FALSE; 219 ipsq_t *ipsq; 220 ill_walk_context_t ctx; 221 222 if (error != NULL) 223 *error = 0; 224 225 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 226 /* 227 * Repeat twice, first based on local addresses and 228 * next time for pointopoint. 229 */ 230 repeat: 231 ill = ILL_START_WALK_V6(&ctx, ipst); 232 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 233 if (match_ill != NULL && ill != match_ill) { 234 continue; 235 } 236 GRAB_CONN_LOCK(q); 237 mutex_enter(&ill->ill_lock); 238 for (ipif = ill->ill_ipif; ipif != NULL; 239 ipif = ipif->ipif_next) { 240 if (zoneid != ALL_ZONES && 241 ipif->ipif_zoneid != zoneid && 242 ipif->ipif_zoneid != ALL_ZONES) 243 continue; 244 /* Allow the ipif to be down */ 245 if ((!ptp && (IN6_ARE_ADDR_EQUAL( 246 &ipif->ipif_v6lcl_addr, addr) && 247 (ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) || 248 (ptp && (ipif->ipif_flags & IPIF_POINTOPOINT) && 249 IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr, 250 addr))) { 251 if (IPIF_CAN_LOOKUP(ipif)) { 252 ipif_refhold_locked(ipif); 253 mutex_exit(&ill->ill_lock); 254 RELEASE_CONN_LOCK(q); 255 rw_exit(&ipst->ips_ill_g_lock); 256 return (ipif); 257 } else if (IPIF_CAN_WAIT(ipif, q)) { 258 ipsq = ill->ill_phyint->phyint_ipsq; 259 mutex_enter(&ipsq->ipsq_lock); 260 mutex_exit(&ill->ill_lock); 261 rw_exit(&ipst->ips_ill_g_lock); 262 ipsq_enq(ipsq, q, mp, func, NEW_OP, 263 ill); 264 mutex_exit(&ipsq->ipsq_lock); 265 RELEASE_CONN_LOCK(q); 266 if (error != NULL) 267 *error = EINPROGRESS; 268 return (NULL); 269 } 270 } 271 } 272 mutex_exit(&ill->ill_lock); 273 RELEASE_CONN_LOCK(q); 274 } 275 276 /* If we already did the ptp case, then we are done */ 277 if (ptp) { 278 rw_exit(&ipst->ips_ill_g_lock); 279 if (error != NULL) 280 *error = ENXIO; 281 return (NULL); 282 } 283 ptp = B_TRUE; 284 goto repeat; 285 } 286 287 boolean_t 288 ip_addr_exists_v6(const in6_addr_t *addr, zoneid_t zoneid, 289 ip_stack_t *ipst) 290 { 291 ipif_t *ipif; 292 ill_t *ill; 293 ill_walk_context_t ctx; 294 295 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 296 297 ill = ILL_START_WALK_V6(&ctx, ipst); 298 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 299 mutex_enter(&ill->ill_lock); 300 for (ipif = ill->ill_ipif; ipif != NULL; 301 ipif = ipif->ipif_next) { 302 if (zoneid != ALL_ZONES && 303 ipif->ipif_zoneid != zoneid && 304 ipif->ipif_zoneid != ALL_ZONES) 305 continue; 306 /* Allow the ipif to be down */ 307 if (((IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 308 addr) && 309 (ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) || 310 ((ipif->ipif_flags & IPIF_POINTOPOINT) && 311 IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr, 312 addr))) { 313 mutex_exit(&ill->ill_lock); 314 rw_exit(&ipst->ips_ill_g_lock); 315 return (B_TRUE); 316 } 317 } 318 mutex_exit(&ill->ill_lock); 319 } 320 321 rw_exit(&ipst->ips_ill_g_lock); 322 return (B_FALSE); 323 } 324 325 /* 326 * Look for an ipif with the specified address. For point-point links 327 * we look for matches on either the destination address and the local 328 * address, but we ignore the check on the local address if IPIF_UNNUMBERED 329 * is set. 330 * Matches on a specific ill if match_ill is set. 331 * Return the zoneid for the ipif. ALL_ZONES if none found. 332 */ 333 zoneid_t 334 ipif_lookup_addr_zoneid_v6(const in6_addr_t *addr, ill_t *match_ill, 335 ip_stack_t *ipst) 336 { 337 ipif_t *ipif; 338 ill_t *ill; 339 boolean_t ptp = B_FALSE; 340 ill_walk_context_t ctx; 341 zoneid_t zoneid; 342 343 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 344 /* 345 * Repeat twice, first based on local addresses and 346 * next time for pointopoint. 347 */ 348 repeat: 349 ill = ILL_START_WALK_V6(&ctx, ipst); 350 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 351 if (match_ill != NULL && ill != match_ill) { 352 continue; 353 } 354 mutex_enter(&ill->ill_lock); 355 for (ipif = ill->ill_ipif; ipif != NULL; 356 ipif = ipif->ipif_next) { 357 /* Allow the ipif to be down */ 358 if ((!ptp && (IN6_ARE_ADDR_EQUAL( 359 &ipif->ipif_v6lcl_addr, addr) && 360 (ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) || 361 (ptp && (ipif->ipif_flags & IPIF_POINTOPOINT) && 362 IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr, 363 addr)) && 364 !(ipif->ipif_state_flags & IPIF_CONDEMNED)) { 365 zoneid = ipif->ipif_zoneid; 366 mutex_exit(&ill->ill_lock); 367 rw_exit(&ipst->ips_ill_g_lock); 368 /* 369 * If ipif_zoneid was ALL_ZONES then we have 370 * a trusted extensions shared IP address. 371 * In that case GLOBAL_ZONEID works to send. 372 */ 373 if (zoneid == ALL_ZONES) 374 zoneid = GLOBAL_ZONEID; 375 return (zoneid); 376 } 377 } 378 mutex_exit(&ill->ill_lock); 379 } 380 381 /* If we already did the ptp case, then we are done */ 382 if (ptp) { 383 rw_exit(&ipst->ips_ill_g_lock); 384 return (ALL_ZONES); 385 } 386 ptp = B_TRUE; 387 goto repeat; 388 } 389 390 /* 391 * Perform various checks to verify that an address would make sense as a local 392 * interface address. This is currently only called when an attempt is made 393 * to set a local address. 394 * 395 * Does not allow a v4-mapped address, an address that equals the subnet 396 * anycast address, ... a multicast address, ... 397 */ 398 boolean_t 399 ip_local_addr_ok_v6(const in6_addr_t *addr, const in6_addr_t *subnet_mask) 400 { 401 in6_addr_t subnet; 402 403 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 404 return (B_TRUE); /* Allow all zeros */ 405 406 /* 407 * Don't allow all zeroes or host part, but allow 408 * all ones netmask. 409 */ 410 V6_MASK_COPY(*addr, *subnet_mask, subnet); 411 if (IN6_IS_ADDR_V4MAPPED(addr) || 412 (IN6_ARE_ADDR_EQUAL(addr, &subnet) && 413 !IN6_ARE_ADDR_EQUAL(subnet_mask, &ipv6_all_ones)) || 414 (IN6_IS_ADDR_V4COMPAT(addr) && CLASSD(V4_PART_OF_V6((*addr)))) || 415 IN6_IS_ADDR_MULTICAST(addr)) 416 return (B_FALSE); 417 418 return (B_TRUE); 419 } 420 421 /* 422 * Perform various checks to verify that an address would make sense as a 423 * remote/subnet interface address. 424 */ 425 boolean_t 426 ip_remote_addr_ok_v6(const in6_addr_t *addr, const in6_addr_t *subnet_mask) 427 { 428 in6_addr_t subnet; 429 430 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 431 return (B_TRUE); /* Allow all zeros */ 432 433 V6_MASK_COPY(*addr, *subnet_mask, subnet); 434 if (IN6_IS_ADDR_V4MAPPED(addr) || 435 (IN6_ARE_ADDR_EQUAL(addr, &subnet) && 436 !IN6_ARE_ADDR_EQUAL(subnet_mask, &ipv6_all_ones)) || 437 IN6_IS_ADDR_MULTICAST(addr) || 438 (IN6_IS_ADDR_V4COMPAT(addr) && CLASSD(V4_PART_OF_V6((*addr))))) 439 return (B_FALSE); 440 441 return (B_TRUE); 442 } 443 444 /* 445 * ip_rt_add_v6 is called to add an IPv6 route to the forwarding table. 446 * ipif_arg is passed in to associate it with the correct interface 447 * (for link-local destinations and gateways). 448 */ 449 /* ARGSUSED1 */ 450 int 451 ip_rt_add_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, 452 const in6_addr_t *gw_addr, const in6_addr_t *src_addr, int flags, 453 ipif_t *ipif_arg, ire_t **ire_arg, queue_t *q, mblk_t *mp, ipsq_func_t func, 454 struct rtsa_s *sp, ip_stack_t *ipst) 455 { 456 ire_t *ire; 457 ire_t *gw_ire = NULL; 458 ipif_t *ipif; 459 boolean_t ipif_refheld = B_FALSE; 460 uint_t type; 461 int match_flags = MATCH_IRE_TYPE; 462 int error; 463 tsol_gc_t *gc = NULL; 464 tsol_gcgrp_t *gcgrp = NULL; 465 boolean_t gcgrp_xtraref = B_FALSE; 466 467 if (ire_arg != NULL) 468 *ire_arg = NULL; 469 470 /* 471 * Prevent routes with a zero gateway from being created (since 472 * interfaces can currently be plumbed and brought up with no assigned 473 * address). 474 */ 475 if (IN6_IS_ADDR_UNSPECIFIED(gw_addr)) 476 return (ENETUNREACH); 477 478 /* 479 * If this is the case of RTF_HOST being set, then we set the netmask 480 * to all ones (regardless if one was supplied). 481 */ 482 if (flags & RTF_HOST) 483 mask = &ipv6_all_ones; 484 485 /* 486 * Get the ipif, if any, corresponding to the gw_addr 487 */ 488 ipif = ipif_lookup_interface_v6(gw_addr, dst_addr, q, mp, func, 489 &error, ipst); 490 if (ipif != NULL) 491 ipif_refheld = B_TRUE; 492 else if (error == EINPROGRESS) { 493 ip1dbg(("ip_rt_add_v6: null and EINPROGRESS")); 494 return (error); 495 } 496 497 /* 498 * GateD will attempt to create routes with a loopback interface 499 * address as the gateway and with RTF_GATEWAY set. We allow 500 * these routes to be added, but create them as interface routes 501 * since the gateway is an interface address. 502 */ 503 if ((ipif != NULL) && (ipif->ipif_ire_type == IRE_LOOPBACK)) { 504 flags &= ~RTF_GATEWAY; 505 if (IN6_ARE_ADDR_EQUAL(gw_addr, &ipv6_loopback) && 506 IN6_ARE_ADDR_EQUAL(dst_addr, &ipv6_loopback) && 507 IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones)) { 508 ire = ire_ctable_lookup_v6(dst_addr, 0, IRE_LOOPBACK, 509 ipif, ALL_ZONES, NULL, match_flags, ipst); 510 if (ire != NULL) { 511 ire_refrele(ire); 512 if (ipif_refheld) 513 ipif_refrele(ipif); 514 return (EEXIST); 515 } 516 ip1dbg(("ipif_up_done: 0x%p creating IRE 0x%x" 517 "for 0x%x\n", (void *)ipif, 518 ipif->ipif_ire_type, 519 ntohl(ipif->ipif_lcl_addr))); 520 ire = ire_create_v6( 521 dst_addr, 522 mask, 523 &ipif->ipif_v6src_addr, 524 NULL, 525 &ipif->ipif_mtu, 526 NULL, 527 NULL, 528 NULL, 529 ipif->ipif_net_type, 530 ipif, 531 NULL, 532 0, 533 0, 534 flags, 535 &ire_uinfo_null, 536 NULL, 537 NULL, 538 ipst); 539 if (ire == NULL) { 540 if (ipif_refheld) 541 ipif_refrele(ipif); 542 return (ENOMEM); 543 } 544 error = ire_add(&ire, q, mp, func, B_FALSE); 545 if (error == 0) 546 goto save_ire; 547 /* 548 * In the result of failure, ire_add() will have already 549 * deleted the ire in question, so there is no need to 550 * do that here. 551 */ 552 if (ipif_refheld) 553 ipif_refrele(ipif); 554 return (error); 555 } 556 } 557 558 /* 559 * Traditionally, interface routes are ones where RTF_GATEWAY isn't set 560 * and the gateway address provided is one of the system's interface 561 * addresses. By using the routing socket interface and supplying an 562 * RTA_IFP sockaddr with an interface index, an alternate method of 563 * specifying an interface route to be created is available which uses 564 * the interface index that specifies the outgoing interface rather than 565 * the address of an outgoing interface (which may not be able to 566 * uniquely identify an interface). When coupled with the RTF_GATEWAY 567 * flag, routes can be specified which not only specify the next-hop to 568 * be used when routing to a certain prefix, but also which outgoing 569 * interface should be used. 570 * 571 * Previously, interfaces would have unique addresses assigned to them 572 * and so the address assigned to a particular interface could be used 573 * to identify a particular interface. One exception to this was the 574 * case of an unnumbered interface (where IPIF_UNNUMBERED was set). 575 * 576 * With the advent of IPv6 and its link-local addresses, this 577 * restriction was relaxed and interfaces could share addresses between 578 * themselves. In fact, typically all of the link-local interfaces on 579 * an IPv6 node or router will have the same link-local address. In 580 * order to differentiate between these interfaces, the use of an 581 * interface index is necessary and this index can be carried inside a 582 * RTA_IFP sockaddr (which is actually a sockaddr_dl). One restriction 583 * of using the interface index, however, is that all of the ipif's that 584 * are part of an ill have the same index and so the RTA_IFP sockaddr 585 * cannot be used to differentiate between ipif's (or logical 586 * interfaces) that belong to the same ill (physical interface). 587 * 588 * For example, in the following case involving IPv4 interfaces and 589 * logical interfaces 590 * 591 * 192.0.2.32 255.255.255.224 192.0.2.33 U if0 592 * 192.0.2.32 255.255.255.224 192.0.2.34 U if0:1 593 * 192.0.2.32 255.255.255.224 192.0.2.35 U if0:2 594 * 595 * the ipif's corresponding to each of these interface routes can be 596 * uniquely identified by the "gateway" (actually interface address). 597 * 598 * In this case involving multiple IPv6 default routes to a particular 599 * link-local gateway, the use of RTA_IFP is necessary to specify which 600 * default route is of interest: 601 * 602 * default fe80::123:4567:89ab:cdef U if0 603 * default fe80::123:4567:89ab:cdef U if1 604 */ 605 606 /* RTF_GATEWAY not set */ 607 if (!(flags & RTF_GATEWAY)) { 608 queue_t *stq; 609 610 if (sp != NULL) { 611 ip2dbg(("ip_rt_add_v6: gateway security attributes " 612 "cannot be set with interface route\n")); 613 if (ipif_refheld) 614 ipif_refrele(ipif); 615 return (EINVAL); 616 } 617 618 /* 619 * As the interface index specified with the RTA_IFP sockaddr is 620 * the same for all ipif's off of an ill, the matching logic 621 * below uses MATCH_IRE_ILL if such an index was specified. 622 * This means that routes sharing the same prefix when added 623 * using a RTA_IFP sockaddr must have distinct interface 624 * indices (namely, they must be on distinct ill's). 625 * 626 * On the other hand, since the gateway address will usually be 627 * different for each ipif on the system, the matching logic 628 * uses MATCH_IRE_IPIF in the case of a traditional interface 629 * route. This means that interface routes for the same prefix 630 * can be created if they belong to distinct ipif's and if a 631 * RTA_IFP sockaddr is not present. 632 */ 633 if (ipif_arg != NULL) { 634 if (ipif_refheld) { 635 ipif_refrele(ipif); 636 ipif_refheld = B_FALSE; 637 } 638 ipif = ipif_arg; 639 match_flags |= MATCH_IRE_ILL; 640 } else { 641 /* 642 * Check the ipif corresponding to the gw_addr 643 */ 644 if (ipif == NULL) 645 return (ENETUNREACH); 646 match_flags |= MATCH_IRE_IPIF; 647 } 648 649 ASSERT(ipif != NULL); 650 /* 651 * We check for an existing entry at this point. 652 */ 653 match_flags |= MATCH_IRE_MASK; 654 ire = ire_ftable_lookup_v6(dst_addr, mask, 0, IRE_INTERFACE, 655 ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst); 656 if (ire != NULL) { 657 ire_refrele(ire); 658 if (ipif_refheld) 659 ipif_refrele(ipif); 660 return (EEXIST); 661 } 662 663 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 664 ? ipif->ipif_rq : ipif->ipif_wq; 665 666 /* 667 * Create a copy of the IRE_LOOPBACK, IRE_IF_NORESOLVER or 668 * IRE_IF_RESOLVER with the modified address and netmask. 669 */ 670 ire = ire_create_v6( 671 dst_addr, 672 mask, 673 &ipif->ipif_v6src_addr, 674 NULL, 675 &ipif->ipif_mtu, 676 NULL, 677 NULL, 678 stq, 679 ipif->ipif_net_type, 680 ipif, 681 NULL, 682 0, 683 0, 684 flags, 685 &ire_uinfo_null, 686 NULL, 687 NULL, 688 ipst); 689 if (ire == NULL) { 690 if (ipif_refheld) 691 ipif_refrele(ipif); 692 return (ENOMEM); 693 } 694 695 /* 696 * Some software (for example, GateD and Sun Cluster) attempts 697 * to create (what amount to) IRE_PREFIX routes with the 698 * loopback address as the gateway. This is primarily done to 699 * set up prefixes with the RTF_REJECT flag set (for example, 700 * when generating aggregate routes). We also OR in the 701 * RTF_BLACKHOLE flag as these interface routes, by 702 * definition, can only be that. 703 * 704 * If the IRE type (as defined by ipif->ipif_net_type) is 705 * IRE_LOOPBACK, then we map the request into a 706 * IRE_IF_NORESOLVER. 707 * 708 * Needless to say, the real IRE_LOOPBACK is NOT created by this 709 * routine, but rather using ire_create_v6() directly. 710 */ 711 if (ipif->ipif_net_type == IRE_LOOPBACK) { 712 ire->ire_type = IRE_IF_NORESOLVER; 713 ire->ire_flags |= RTF_BLACKHOLE; 714 } 715 error = ire_add(&ire, q, mp, func, B_FALSE); 716 if (error == 0) 717 goto save_ire; 718 /* 719 * In the result of failure, ire_add() will have already 720 * deleted the ire in question, so there is no need to 721 * do that here. 722 */ 723 if (ipif_refheld) 724 ipif_refrele(ipif); 725 return (error); 726 } 727 if (ipif_refheld) { 728 ipif_refrele(ipif); 729 ipif_refheld = B_FALSE; 730 } 731 732 /* 733 * Get an interface IRE for the specified gateway. 734 * If we don't have an IRE_IF_NORESOLVER or IRE_IF_RESOLVER for the 735 * gateway, it is currently unreachable and we fail the request 736 * accordingly. 737 */ 738 ipif = ipif_arg; 739 if (ipif_arg != NULL) 740 match_flags |= MATCH_IRE_ILL; 741 gw_ire = ire_ftable_lookup_v6(gw_addr, 0, 0, IRE_INTERFACE, ipif_arg, 742 NULL, ALL_ZONES, 0, NULL, match_flags, ipst); 743 if (gw_ire == NULL) 744 return (ENETUNREACH); 745 746 /* 747 * We create one of three types of IREs as a result of this request 748 * based on the netmask. A netmask of all ones (which is automatically 749 * assumed when RTF_HOST is set) results in an IRE_HOST being created. 750 * An all zeroes netmask implies a default route so an IRE_DEFAULT is 751 * created. Otherwise, an IRE_PREFIX route is created for the 752 * destination prefix. 753 */ 754 if (IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones)) 755 type = IRE_HOST; 756 else if (IN6_IS_ADDR_UNSPECIFIED(mask)) 757 type = IRE_DEFAULT; 758 else 759 type = IRE_PREFIX; 760 761 /* check for a duplicate entry */ 762 ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, type, ipif_arg, 763 NULL, ALL_ZONES, 0, NULL, 764 match_flags | MATCH_IRE_MASK | MATCH_IRE_GW, ipst); 765 if (ire != NULL) { 766 ire_refrele(gw_ire); 767 ire_refrele(ire); 768 return (EEXIST); 769 } 770 771 /* Security attribute exists */ 772 if (sp != NULL) { 773 tsol_gcgrp_addr_t ga; 774 775 /* find or create the gateway credentials group */ 776 ga.ga_af = AF_INET6; 777 ga.ga_addr = *gw_addr; 778 779 /* we hold reference to it upon success */ 780 gcgrp = gcgrp_lookup(&ga, B_TRUE); 781 if (gcgrp == NULL) { 782 ire_refrele(gw_ire); 783 return (ENOMEM); 784 } 785 786 /* 787 * Create and add the security attribute to the group; a 788 * reference to the group is made upon allocating a new 789 * entry successfully. If it finds an already-existing 790 * entry for the security attribute in the group, it simply 791 * returns it and no new reference is made to the group. 792 */ 793 gc = gc_create(sp, gcgrp, &gcgrp_xtraref); 794 if (gc == NULL) { 795 /* release reference held by gcgrp_lookup */ 796 GCGRP_REFRELE(gcgrp); 797 ire_refrele(gw_ire); 798 return (ENOMEM); 799 } 800 } 801 802 /* Create the IRE. */ 803 ire = ire_create_v6( 804 dst_addr, /* dest address */ 805 mask, /* mask */ 806 /* src address assigned by the caller? */ 807 (((flags & RTF_SETSRC) && !IN6_IS_ADDR_UNSPECIFIED(src_addr)) ? 808 src_addr : NULL), 809 gw_addr, /* gateway address */ 810 &gw_ire->ire_max_frag, 811 NULL, /* no src nce */ 812 NULL, /* no recv-from queue */ 813 NULL, /* no send-to queue */ 814 (ushort_t)type, /* IRE type */ 815 ipif_arg, 816 NULL, 817 0, 818 0, 819 flags, 820 &gw_ire->ire_uinfo, /* Inherit ULP info from gw */ 821 gc, /* security attribute */ 822 NULL, 823 ipst); 824 825 /* 826 * The ire holds a reference to the 'gc' and the 'gc' holds a 827 * reference to the 'gcgrp'. We can now release the extra reference 828 * the 'gcgrp' acquired in the gcgrp_lookup, if it was not used. 829 */ 830 if (gcgrp_xtraref) 831 GCGRP_REFRELE(gcgrp); 832 if (ire == NULL) { 833 if (gc != NULL) 834 GC_REFRELE(gc); 835 ire_refrele(gw_ire); 836 return (ENOMEM); 837 } 838 839 /* 840 * POLICY: should we allow an RTF_HOST with address INADDR_ANY? 841 * SUN/OS socket stuff does but do we really want to allow ::0 ? 842 */ 843 844 /* Add the new IRE. */ 845 error = ire_add(&ire, q, mp, func, B_FALSE); 846 /* 847 * In the result of failure, ire_add() will have already 848 * deleted the ire in question, so there is no need to 849 * do that here. 850 */ 851 if (error != 0) { 852 ire_refrele(gw_ire); 853 return (error); 854 } 855 856 if (flags & RTF_MULTIRT) { 857 /* 858 * Invoke the CGTP (multirouting) filtering module 859 * to add the dst address in the filtering database. 860 * Replicated inbound packets coming from that address 861 * will be filtered to discard the duplicates. 862 * It is not necessary to call the CGTP filter hook 863 * when the dst address is a multicast, because an 864 * IP source address cannot be a multicast. 865 */ 866 if (ipst->ips_ip_cgtp_filter_ops != NULL && 867 !IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))) { 868 int res; 869 870 res = ipst->ips_ip_cgtp_filter_ops->cfo_add_dest_v6( 871 ipst->ips_netstack->netstack_stackid, 872 &ire->ire_addr_v6, 873 &ire->ire_gateway_addr_v6, 874 &ire->ire_src_addr_v6, 875 &gw_ire->ire_src_addr_v6); 876 if (res != 0) { 877 ire_refrele(gw_ire); 878 ire_delete(ire); 879 return (res); 880 } 881 } 882 } 883 884 /* 885 * Now that the prefix IRE entry has been created, delete any 886 * existing gateway IRE cache entries as well as any IRE caches 887 * using the gateway, and force them to be created through 888 * ip_newroute_v6. 889 */ 890 if (gc != NULL) { 891 ASSERT(gcgrp != NULL); 892 ire_clookup_delete_cache_gw_v6(gw_addr, ALL_ZONES, ipst); 893 } 894 895 save_ire: 896 if (gw_ire != NULL) { 897 ire_refrele(gw_ire); 898 } 899 if (ipif != NULL) { 900 mblk_t *save_mp; 901 902 /* 903 * Save enough information so that we can recreate the IRE if 904 * the interface goes down and then up. The metrics associated 905 * with the route will be saved as well when rts_setmetrics() is 906 * called after the IRE has been created. In the case where 907 * memory cannot be allocated, none of this information will be 908 * saved. 909 */ 910 save_mp = allocb(sizeof (ifrt_t), BPRI_MED); 911 if (save_mp != NULL) { 912 ifrt_t *ifrt; 913 914 save_mp->b_wptr += sizeof (ifrt_t); 915 ifrt = (ifrt_t *)save_mp->b_rptr; 916 bzero(ifrt, sizeof (ifrt_t)); 917 ifrt->ifrt_type = ire->ire_type; 918 ifrt->ifrt_v6addr = ire->ire_addr_v6; 919 mutex_enter(&ire->ire_lock); 920 ifrt->ifrt_v6gateway_addr = ire->ire_gateway_addr_v6; 921 ifrt->ifrt_v6src_addr = ire->ire_src_addr_v6; 922 mutex_exit(&ire->ire_lock); 923 ifrt->ifrt_v6mask = ire->ire_mask_v6; 924 ifrt->ifrt_flags = ire->ire_flags; 925 ifrt->ifrt_max_frag = ire->ire_max_frag; 926 mutex_enter(&ipif->ipif_saved_ire_lock); 927 save_mp->b_cont = ipif->ipif_saved_ire_mp; 928 ipif->ipif_saved_ire_mp = save_mp; 929 ipif->ipif_saved_ire_cnt++; 930 mutex_exit(&ipif->ipif_saved_ire_lock); 931 } 932 } 933 if (ire_arg != NULL) { 934 /* 935 * Store the ire that was successfully added into where ire_arg 936 * points to so that callers don't have to look it up 937 * themselves (but they are responsible for ire_refrele()ing 938 * the ire when they are finished with it). 939 */ 940 *ire_arg = ire; 941 } else { 942 ire_refrele(ire); /* Held in ire_add */ 943 } 944 if (ipif_refheld) 945 ipif_refrele(ipif); 946 return (0); 947 } 948 949 /* 950 * ip_rt_delete_v6 is called to delete an IPv6 route. 951 * ipif_arg is passed in to associate it with the correct interface 952 * (for link-local destinations and gateways). 953 */ 954 /* ARGSUSED4 */ 955 int 956 ip_rt_delete_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, 957 const in6_addr_t *gw_addr, uint_t rtm_addrs, int flags, ipif_t *ipif_arg, 958 queue_t *q, mblk_t *mp, ipsq_func_t func, ip_stack_t *ipst) 959 { 960 ire_t *ire = NULL; 961 ipif_t *ipif; 962 uint_t type; 963 uint_t match_flags = MATCH_IRE_TYPE; 964 int err = 0; 965 boolean_t ipif_refheld = B_FALSE; 966 967 /* 968 * If this is the case of RTF_HOST being set, then we set the netmask 969 * to all ones. Otherwise, we use the netmask if one was supplied. 970 */ 971 if (flags & RTF_HOST) { 972 mask = &ipv6_all_ones; 973 match_flags |= MATCH_IRE_MASK; 974 } else if (rtm_addrs & RTA_NETMASK) { 975 match_flags |= MATCH_IRE_MASK; 976 } 977 978 /* 979 * Note that RTF_GATEWAY is never set on a delete, therefore 980 * we check if the gateway address is one of our interfaces first, 981 * and fall back on RTF_GATEWAY routes. 982 * 983 * This makes it possible to delete an original 984 * IRE_IF_NORESOLVER/IRE_IF_RESOLVER - consistent with SunOS 4.1. 985 * 986 * As the interface index specified with the RTA_IFP sockaddr is the 987 * same for all ipif's off of an ill, the matching logic below uses 988 * MATCH_IRE_ILL if such an index was specified. This means a route 989 * sharing the same prefix and interface index as the the route 990 * intended to be deleted might be deleted instead if a RTA_IFP sockaddr 991 * is specified in the request. 992 * 993 * On the other hand, since the gateway address will usually be 994 * different for each ipif on the system, the matching logic 995 * uses MATCH_IRE_IPIF in the case of a traditional interface 996 * route. This means that interface routes for the same prefix can be 997 * uniquely identified if they belong to distinct ipif's and if a 998 * RTA_IFP sockaddr is not present. 999 * 1000 * For more detail on specifying routes by gateway address and by 1001 * interface index, see the comments in ip_rt_add_v6(). 1002 */ 1003 ipif = ipif_lookup_interface_v6(gw_addr, dst_addr, q, mp, func, &err, 1004 ipst); 1005 if (ipif != NULL) { 1006 ipif_refheld = B_TRUE; 1007 if (ipif_arg != NULL) { 1008 ipif_refrele(ipif); 1009 ipif_refheld = B_FALSE; 1010 ipif = ipif_arg; 1011 match_flags |= MATCH_IRE_ILL; 1012 } else { 1013 match_flags |= MATCH_IRE_IPIF; 1014 } 1015 1016 if (ipif->ipif_ire_type == IRE_LOOPBACK) 1017 ire = ire_ctable_lookup_v6(dst_addr, 0, IRE_LOOPBACK, 1018 ipif, ALL_ZONES, NULL, match_flags, ipst); 1019 if (ire == NULL) 1020 ire = ire_ftable_lookup_v6(dst_addr, mask, 0, 1021 IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, NULL, 1022 match_flags, ipst); 1023 } else if (err == EINPROGRESS) { 1024 return (err); 1025 } else { 1026 err = 0; 1027 } 1028 if (ire == NULL) { 1029 /* 1030 * At this point, the gateway address is not one of our own 1031 * addresses or a matching interface route was not found. We 1032 * set the IRE type to lookup based on whether 1033 * this is a host route, a default route or just a prefix. 1034 * 1035 * If an ipif_arg was passed in, then the lookup is based on an 1036 * interface index so MATCH_IRE_ILL is added to match_flags. 1037 * In any case, MATCH_IRE_IPIF is cleared and MATCH_IRE_GW is 1038 * set as the route being looked up is not a traditional 1039 * interface route. 1040 */ 1041 match_flags &= ~MATCH_IRE_IPIF; 1042 match_flags |= MATCH_IRE_GW; 1043 if (ipif_arg != NULL) 1044 match_flags |= MATCH_IRE_ILL; 1045 if (IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones)) 1046 type = IRE_HOST; 1047 else if (IN6_IS_ADDR_UNSPECIFIED(mask)) 1048 type = IRE_DEFAULT; 1049 else 1050 type = IRE_PREFIX; 1051 ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, type, 1052 ipif_arg, NULL, ALL_ZONES, 0, NULL, match_flags, ipst); 1053 } 1054 1055 if (ipif_refheld) { 1056 ipif_refrele(ipif); 1057 ipif_refheld = B_FALSE; 1058 } 1059 if (ire == NULL) 1060 return (ESRCH); 1061 1062 if (ire->ire_flags & RTF_MULTIRT) { 1063 /* 1064 * Invoke the CGTP (multirouting) filtering module 1065 * to remove the dst address from the filtering database. 1066 * Packets coming from that address will no longer be 1067 * filtered to remove duplicates. 1068 */ 1069 if (ipst->ips_ip_cgtp_filter_ops != NULL) { 1070 err = ipst->ips_ip_cgtp_filter_ops->cfo_del_dest_v6( 1071 ipst->ips_netstack->netstack_stackid, 1072 &ire->ire_addr_v6, &ire->ire_gateway_addr_v6); 1073 } 1074 } 1075 1076 ipif = ire->ire_ipif; 1077 if (ipif != NULL) { 1078 mblk_t **mpp; 1079 mblk_t *mp; 1080 ifrt_t *ifrt; 1081 in6_addr_t gw_addr_v6; 1082 1083 /* Remove from ipif_saved_ire_mp list if it is there */ 1084 mutex_enter(&ire->ire_lock); 1085 gw_addr_v6 = ire->ire_gateway_addr_v6; 1086 mutex_exit(&ire->ire_lock); 1087 mutex_enter(&ipif->ipif_saved_ire_lock); 1088 for (mpp = &ipif->ipif_saved_ire_mp; *mpp != NULL; 1089 mpp = &(*mpp)->b_cont) { 1090 /* 1091 * On a given ipif, the triple of address, gateway and 1092 * mask is unique for each saved IRE (in the case of 1093 * ordinary interface routes, the gateway address is 1094 * all-zeroes). 1095 */ 1096 mp = *mpp; 1097 ifrt = (ifrt_t *)mp->b_rptr; 1098 if (IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr, 1099 &ire->ire_addr_v6) && 1100 IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr, 1101 &gw_addr_v6) && 1102 IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask, 1103 &ire->ire_mask_v6)) { 1104 *mpp = mp->b_cont; 1105 ipif->ipif_saved_ire_cnt--; 1106 freeb(mp); 1107 break; 1108 } 1109 } 1110 mutex_exit(&ipif->ipif_saved_ire_lock); 1111 } 1112 ire_delete(ire); 1113 ire_refrele(ire); 1114 return (err); 1115 } 1116 1117 /* 1118 * Derive a token from the link layer address. 1119 */ 1120 boolean_t 1121 ill_setdefaulttoken(ill_t *ill) 1122 { 1123 int i; 1124 in6_addr_t v6addr, v6mask; 1125 1126 if (!MEDIA_V6INTFID(ill->ill_media, ill->ill_phys_addr_length, 1127 ill->ill_phys_addr, &v6addr)) 1128 return (B_FALSE); 1129 1130 (void) ip_plen_to_mask_v6(IPV6_TOKEN_LEN, &v6mask); 1131 1132 for (i = 0; i < 4; i++) 1133 v6mask.s6_addr32[i] = v6mask.s6_addr32[i] ^ 1134 (uint32_t)0xffffffff; 1135 1136 V6_MASK_COPY(v6addr, v6mask, ill->ill_token); 1137 ill->ill_token_length = IPV6_TOKEN_LEN; 1138 return (B_TRUE); 1139 } 1140 1141 /* 1142 * Create a link-local address from a token. 1143 */ 1144 static void 1145 ipif_get_linklocal(in6_addr_t *dest, const in6_addr_t *token) 1146 { 1147 int i; 1148 1149 for (i = 0; i < 4; i++) { 1150 dest->s6_addr32[i] = 1151 token->s6_addr32[i] | ipv6_ll_template.s6_addr32[i]; 1152 } 1153 } 1154 1155 /* 1156 * Set a nice default address for either automatic tunnels tsrc/96 or 1157 * 6to4 tunnels 2002:<tsrc>::1/64 1158 */ 1159 static void 1160 ipif_set_tun_auto_addr(ipif_t *ipif, struct iftun_req *ta) 1161 { 1162 sin6_t sin6; 1163 sin_t *sin; 1164 ill_t *ill = ipif->ipif_ill; 1165 tun_t *tp = (tun_t *)ill->ill_wq->q_next->q_ptr; 1166 1167 if (ta->ifta_saddr.ss_family != AF_INET || 1168 (ipif->ipif_flags & IPIF_UP) || !ipif->ipif_isv6 || 1169 (ta->ifta_flags & IFTUN_SRC) == 0) 1170 return; 1171 1172 /* 1173 * Check the tunnel type by examining q_next->q_ptr 1174 */ 1175 if (tp->tun_flags & TUN_AUTOMATIC) { 1176 /* this is an automatic tunnel */ 1177 (void) ip_plen_to_mask_v6(IPV6_ABITS - IP_ABITS, 1178 &ipif->ipif_v6net_mask); 1179 bzero(&sin6, sizeof (sin6_t)); 1180 sin = (sin_t *)&ta->ifta_saddr; 1181 V4_PART_OF_V6(sin6.sin6_addr) = sin->sin_addr.s_addr; 1182 sin6.sin6_family = AF_INET6; 1183 (void) ip_sioctl_addr(ipif, (sin_t *)&sin6, 1184 NULL, NULL, NULL, NULL); 1185 } else if (tp->tun_flags & TUN_6TO4) { 1186 /* this is a 6to4 tunnel */ 1187 (void) ip_plen_to_mask_v6(IPV6_PREFIX_LEN, 1188 &ipif->ipif_v6net_mask); 1189 sin = (sin_t *)&ta->ifta_saddr; 1190 /* create a 6to4 address from the IPv4 tsrc */ 1191 IN6_V4ADDR_TO_6TO4(&sin->sin_addr, &sin6.sin6_addr); 1192 sin6.sin6_family = AF_INET6; 1193 (void) ip_sioctl_addr(ipif, (sin_t *)&sin6, 1194 NULL, NULL, NULL, NULL); 1195 } else { 1196 ip1dbg(("ipif_set_tun_auto_addr: Unknown tunnel type")); 1197 return; 1198 } 1199 } 1200 1201 /* 1202 * Set link local for ipif_id 0 of a configured tunnel based on the 1203 * tsrc or tdst parameter 1204 * For tunnels over IPv4 use the IPv4 address prepended with 32 zeros as 1205 * the token. 1206 * For tunnels over IPv6 use the low-order 64 bits of the "inner" IPv6 address 1207 * as the token for the "outer" link. 1208 */ 1209 void 1210 ipif_set_tun_llink(ill_t *ill, struct iftun_req *ta) 1211 { 1212 ipif_t *ipif; 1213 sin_t *sin; 1214 in6_addr_t *s6addr; 1215 1216 ASSERT(IAM_WRITER_ILL(ill)); 1217 1218 /* The first ipif must be id zero. */ 1219 ipif = ill->ill_ipif; 1220 ASSERT(ipif->ipif_id == 0); 1221 1222 /* no link local for automatic tunnels */ 1223 if (!(ipif->ipif_flags & IPIF_POINTOPOINT)) { 1224 ipif_set_tun_auto_addr(ipif, ta); 1225 return; 1226 } 1227 1228 if ((ta->ifta_flags & IFTUN_DST) && 1229 IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6pp_dst_addr)) { 1230 sin6_t sin6; 1231 1232 ASSERT(!(ipif->ipif_flags & IPIF_UP)); 1233 bzero(&sin6, sizeof (sin6_t)); 1234 if ((ta->ifta_saddr.ss_family == AF_INET)) { 1235 sin = (sin_t *)&ta->ifta_daddr; 1236 V4_PART_OF_V6(sin6.sin6_addr) = 1237 sin->sin_addr.s_addr; 1238 } else { 1239 s6addr = 1240 &((sin6_t *)&ta->ifta_daddr)->sin6_addr; 1241 sin6.sin6_addr.s6_addr32[3] = s6addr->s6_addr32[3]; 1242 sin6.sin6_addr.s6_addr32[2] = s6addr->s6_addr32[2]; 1243 } 1244 ipif_get_linklocal(&ipif->ipif_v6pp_dst_addr, 1245 &sin6.sin6_addr); 1246 ipif->ipif_v6subnet = ipif->ipif_v6pp_dst_addr; 1247 } 1248 if ((ta->ifta_flags & IFTUN_SRC)) { 1249 ASSERT(!(ipif->ipif_flags & IPIF_UP)); 1250 1251 /* Set the token if it isn't already set */ 1252 if (IN6_IS_ADDR_UNSPECIFIED(&ill->ill_token)) { 1253 if ((ta->ifta_saddr.ss_family == AF_INET)) { 1254 sin = (sin_t *)&ta->ifta_saddr; 1255 V4_PART_OF_V6(ill->ill_token) = 1256 sin->sin_addr.s_addr; 1257 } else { 1258 s6addr = 1259 &((sin6_t *)&ta->ifta_saddr)->sin6_addr; 1260 ill->ill_token.s6_addr32[3] = 1261 s6addr->s6_addr32[3]; 1262 ill->ill_token.s6_addr32[2] = 1263 s6addr->s6_addr32[2]; 1264 } 1265 ill->ill_token_length = IPV6_TOKEN_LEN; 1266 } 1267 /* 1268 * Attempt to set the link local address if it isn't set. 1269 */ 1270 if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr)) 1271 (void) ipif_setlinklocal(ipif); 1272 } 1273 } 1274 1275 /* 1276 * Is it not possible to set the link local address? 1277 * The address can be set if the token is set, and the token 1278 * isn't too long. 1279 * Return B_TRUE if the address can't be set, or B_FALSE if it can. 1280 */ 1281 boolean_t 1282 ipif_cant_setlinklocal(ipif_t *ipif) 1283 { 1284 ill_t *ill = ipif->ipif_ill; 1285 1286 if (IN6_IS_ADDR_UNSPECIFIED(&ill->ill_token) || 1287 ill->ill_token_length > IPV6_ABITS - IPV6_LL_PREFIXLEN) 1288 return (B_TRUE); 1289 1290 return (B_FALSE); 1291 } 1292 1293 /* 1294 * Generate a link-local address from the token. 1295 * Return zero if the address was set, or non-zero if it couldn't be set. 1296 */ 1297 int 1298 ipif_setlinklocal(ipif_t *ipif) 1299 { 1300 ill_t *ill = ipif->ipif_ill; 1301 in6_addr_t ov6addr; 1302 1303 ASSERT(IAM_WRITER_ILL(ill)); 1304 1305 if (ipif_cant_setlinklocal(ipif)) 1306 return (-1); 1307 1308 ov6addr = ipif->ipif_v6lcl_addr; 1309 ipif_get_linklocal(&ipif->ipif_v6lcl_addr, &ill->ill_token); 1310 sctp_update_ipif_addr(ipif, ov6addr); 1311 (void) ip_plen_to_mask_v6(IPV6_LL_PREFIXLEN, &ipif->ipif_v6net_mask); 1312 V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask, 1313 ipif->ipif_v6subnet); 1314 1315 if (ipif->ipif_flags & IPIF_NOLOCAL) { 1316 ipif->ipif_v6src_addr = ipv6_all_zeros; 1317 } else { 1318 ipif->ipif_v6src_addr = ipif->ipif_v6lcl_addr; 1319 } 1320 return (0); 1321 } 1322 1323 /* 1324 * This function sets up the multicast mappings in NDP. 1325 * Unlike ARP, there are no mapping_mps here. We delete the 1326 * mapping nces and add a new one. 1327 * 1328 * Returns non-zero on error and 0 on success. 1329 */ 1330 int 1331 ipif_ndp_setup_multicast(ipif_t *ipif, nce_t **ret_nce) 1332 { 1333 ill_t *ill = ipif->ipif_ill; 1334 in6_addr_t v6_mcast_addr = {(uint32_t)V6_MCAST, 0, 0, 0}; 1335 in6_addr_t v6_mcast_mask = {(uint32_t)V6_MCAST, 0, 0, 0}; 1336 in6_addr_t v6_extract_mask; 1337 uchar_t *phys_addr, *bphys_addr, *alloc_phys; 1338 nce_t *mnce = NULL; 1339 int err = 0; 1340 phyint_t *phyi = ill->ill_phyint; 1341 uint32_t hw_extract_start; 1342 dl_unitdata_req_t *dlur; 1343 ip_stack_t *ipst = ill->ill_ipst; 1344 1345 if (ret_nce != NULL) 1346 *ret_nce = NULL; 1347 /* 1348 * Delete the mapping nce. Normally these should not exist 1349 * as a previous ipif_down -> ipif_ndp_down should have deleted 1350 * all the nces. But they can exist if ip_rput_dlpi_writer 1351 * calls this when PHYI_MULTI_BCAST is set. 1352 */ 1353 mnce = ndp_lookup_v6(ill, &v6_mcast_addr, B_FALSE); 1354 if (mnce != NULL) { 1355 ndp_delete(mnce); 1356 NCE_REFRELE(mnce); 1357 mnce = NULL; 1358 } 1359 1360 /* 1361 * Get media specific v6 mapping information. Note that 1362 * nd_lla_len can be 0 for tunnels. 1363 */ 1364 alloc_phys = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1365 if ((alloc_phys == NULL) && (ill->ill_nd_lla_len != 0)) 1366 return (ENOMEM); 1367 /* 1368 * Determine the broadcast address. 1369 */ 1370 dlur = (dl_unitdata_req_t *)ill->ill_bcast_mp->b_rptr; 1371 if (ill->ill_sap_length < 0) 1372 bphys_addr = (uchar_t *)dlur + dlur->dl_dest_addr_offset; 1373 else 1374 bphys_addr = (uchar_t *)dlur + 1375 dlur->dl_dest_addr_offset + ill->ill_sap_length; 1376 1377 /* 1378 * Check PHYI_MULTI_BCAST and possible length of physical 1379 * address to determine if we use the mapping or the 1380 * broadcast address. 1381 */ 1382 if ((phyi->phyint_flags & PHYI_MULTI_BCAST) || 1383 (!MEDIA_V6MINFO(ill->ill_media, ill->ill_nd_lla_len, 1384 bphys_addr, alloc_phys, &hw_extract_start, 1385 &v6_extract_mask))) { 1386 if (ill->ill_phys_addr_length > IP_MAX_HW_LEN) { 1387 kmem_free(alloc_phys, ill->ill_nd_lla_len); 1388 return (E2BIG); 1389 } 1390 /* Use the link-layer broadcast address for MULTI_BCAST */ 1391 phys_addr = bphys_addr; 1392 bzero(&v6_extract_mask, sizeof (v6_extract_mask)); 1393 hw_extract_start = ill->ill_nd_lla_len; 1394 } else { 1395 phys_addr = alloc_phys; 1396 } 1397 if ((ipif->ipif_flags & IPIF_BROADCAST) || 1398 (ill->ill_flags & ILLF_MULTICAST) || 1399 (phyi->phyint_flags & PHYI_MULTI_BCAST)) { 1400 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1401 err = ndp_add_v6(ill, 1402 phys_addr, 1403 &v6_mcast_addr, /* v6 address */ 1404 &v6_mcast_mask, /* v6 mask */ 1405 &v6_extract_mask, 1406 hw_extract_start, 1407 NCE_F_MAPPING | NCE_F_PERMANENT | NCE_F_NONUD, 1408 ND_REACHABLE, 1409 &mnce); 1410 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1411 if (err == 0) { 1412 if (ret_nce != NULL) { 1413 *ret_nce = mnce; 1414 } else { 1415 NCE_REFRELE(mnce); 1416 } 1417 } 1418 } 1419 kmem_free(alloc_phys, ill->ill_nd_lla_len); 1420 return (err); 1421 } 1422 1423 /* 1424 * Get the resolver set up for a new ipif. (Always called as writer.) 1425 */ 1426 int 1427 ipif_ndp_up(ipif_t *ipif) 1428 { 1429 ill_t *ill = ipif->ipif_ill; 1430 int err = 0; 1431 nce_t *nce = NULL; 1432 nce_t *mnce = NULL; 1433 1434 ip1dbg(("ipif_ndp_up(%s:%u)\n", ill->ill_name, ipif->ipif_id)); 1435 1436 /* 1437 * ND not supported on XRESOLV interfaces. If ND support (multicast) 1438 * added later, take out this check. 1439 */ 1440 if ((ill->ill_flags & ILLF_XRESOLV) || 1441 IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr) || 1442 (!(ill->ill_net_type & IRE_INTERFACE))) { 1443 ipif->ipif_addr_ready = 1; 1444 return (0); 1445 } 1446 1447 /* 1448 * Need to setup multicast mapping only when the first 1449 * interface is coming UP. 1450 */ 1451 if (ill->ill_ipif_up_count == 0 && 1452 (ill->ill_flags & ILLF_MULTICAST)) { 1453 /* 1454 * We set the multicast before setting up the mapping for 1455 * local address because ipif_ndp_setup_multicast does 1456 * ndp_walk to delete nces which will delete the mapping 1457 * for local address also if we added the mapping for 1458 * local address first. 1459 */ 1460 err = ipif_ndp_setup_multicast(ipif, &mnce); 1461 if (err != 0) 1462 return (err); 1463 } 1464 1465 if ((ipif->ipif_flags & (IPIF_UNNUMBERED|IPIF_NOLOCAL)) == 0) { 1466 uint16_t flags; 1467 uchar_t *hw_addr = NULL; 1468 1469 /* Permanent entries don't need NUD */ 1470 flags = NCE_F_PERMANENT | NCE_F_NONUD; 1471 if (ill->ill_flags & ILLF_ROUTER) 1472 flags |= NCE_F_ISROUTER; 1473 1474 if (ipif->ipif_flags & IPIF_ANYCAST) 1475 flags |= NCE_F_ANYCAST; 1476 1477 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1478 hw_addr = ill->ill_nd_lla; 1479 1480 if (ill->ill_move_in_progress) { 1481 /* 1482 * Addresses are failing over to this ill. 1483 * Don't wait for NUD to see this change. 1484 * Publish our new link-layer address. 1485 */ 1486 flags |= NCE_F_UNSOL_ADV; 1487 } 1488 } 1489 err = ndp_lookup_then_add_v6(ill, 1490 hw_addr, 1491 &ipif->ipif_v6lcl_addr, 1492 &ipv6_all_ones, 1493 &ipv6_all_zeros, 1494 0, 1495 flags, 1496 ND_PROBE, /* Causes Duplicate Address Detection to run */ 1497 &nce); 1498 switch (err) { 1499 case 0: 1500 ip1dbg(("ipif_ndp_up: NCE created for %s\n", 1501 ill->ill_name)); 1502 ipif->ipif_addr_ready = 1; 1503 break; 1504 case EINPROGRESS: 1505 ip1dbg(("ipif_ndp_up: running DAD now for %s\n", 1506 ill->ill_name)); 1507 break; 1508 case EEXIST: 1509 NCE_REFRELE(nce); 1510 ip1dbg(("ipif_ndp_up: NCE already exists for %s\n", 1511 ill->ill_name)); 1512 if (mnce != NULL) { 1513 ndp_delete(mnce); 1514 NCE_REFRELE(mnce); 1515 } 1516 return (err); 1517 default: 1518 ip1dbg(("ipif_ndp_up: NCE creation failed %s\n", 1519 ill->ill_name)); 1520 if (mnce != NULL) { 1521 ndp_delete(mnce); 1522 NCE_REFRELE(mnce); 1523 } 1524 return (err); 1525 } 1526 } else { 1527 /* No local NCE for this entry */ 1528 ipif->ipif_addr_ready = 1; 1529 } 1530 if (nce != NULL) 1531 NCE_REFRELE(nce); 1532 if (mnce != NULL) 1533 NCE_REFRELE(mnce); 1534 return (0); 1535 } 1536 1537 /* Remove all cache entries for this logical interface */ 1538 void 1539 ipif_ndp_down(ipif_t *ipif) 1540 { 1541 nce_t *nce; 1542 1543 if (ipif->ipif_isv6) { 1544 nce = ndp_lookup_v6(ipif->ipif_ill, &ipif->ipif_v6lcl_addr, 1545 B_FALSE); 1546 if (nce != NULL) { 1547 ndp_delete(nce); 1548 NCE_REFRELE(nce); 1549 } 1550 } 1551 /* 1552 * Remove mapping and all other nces dependent on this ill 1553 * when the last ipif is going away. 1554 */ 1555 if (ipif->ipif_ill->ill_ipif_up_count == 0) { 1556 ndp_walk(ipif->ipif_ill, (pfi_t)ndp_delete_per_ill, 1557 (uchar_t *)ipif->ipif_ill, ipif->ipif_ill->ill_ipst); 1558 } 1559 } 1560 1561 /* 1562 * Used when an interface comes up to recreate any extra routes on this 1563 * interface. 1564 */ 1565 static ire_t ** 1566 ipif_recover_ire_v6(ipif_t *ipif) 1567 { 1568 mblk_t *mp; 1569 ire_t **ipif_saved_irep; 1570 ire_t **irep; 1571 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 1572 1573 ip1dbg(("ipif_recover_ire_v6(%s:%u)", ipif->ipif_ill->ill_name, 1574 ipif->ipif_id)); 1575 1576 ASSERT(ipif->ipif_isv6); 1577 1578 mutex_enter(&ipif->ipif_saved_ire_lock); 1579 ipif_saved_irep = (ire_t **)kmem_zalloc(sizeof (ire_t *) * 1580 ipif->ipif_saved_ire_cnt, KM_NOSLEEP); 1581 if (ipif_saved_irep == NULL) { 1582 mutex_exit(&ipif->ipif_saved_ire_lock); 1583 return (NULL); 1584 } 1585 1586 irep = ipif_saved_irep; 1587 1588 for (mp = ipif->ipif_saved_ire_mp; mp != NULL; mp = mp->b_cont) { 1589 ire_t *ire; 1590 queue_t *rfq; 1591 queue_t *stq; 1592 ifrt_t *ifrt; 1593 in6_addr_t *src_addr; 1594 in6_addr_t *gateway_addr; 1595 char buf[INET6_ADDRSTRLEN]; 1596 ushort_t type; 1597 1598 /* 1599 * When the ire was initially created and then added in 1600 * ip_rt_add_v6(), it was created either using 1601 * ipif->ipif_net_type in the case of a traditional interface 1602 * route, or as one of the IRE_OFFSUBNET types (with the 1603 * exception of IRE_HOST type redirect ire which is created by 1604 * icmp_redirect_v6() and which we don't need to save or 1605 * recover). In the case where ipif->ipif_net_type was 1606 * IRE_LOOPBACK, ip_rt_add_v6() will update the ire_type to 1607 * IRE_IF_NORESOLVER before calling ire_add_v6() to satisfy 1608 * software like GateD and Sun Cluster which creates routes 1609 * using the the loopback interface's address as a gateway. 1610 * 1611 * As ifrt->ifrt_type reflects the already updated ire_type, 1612 * ire_create_v6() will be called in the same way here as in 1613 * ip_rt_add_v6(), namely using ipif->ipif_net_type when the 1614 * route looks like a traditional interface route (where 1615 * ifrt->ifrt_type & IRE_INTERFACE is true) and otherwise 1616 * using the saved ifrt->ifrt_type. This means that in 1617 * the case where ipif->ipif_net_type is IRE_LOOPBACK, 1618 * the ire created by ire_create_v6() will be an IRE_LOOPBACK, 1619 * it will then be turned into an IRE_IF_NORESOLVER and then 1620 * added by ire_add_v6(). 1621 */ 1622 ifrt = (ifrt_t *)mp->b_rptr; 1623 if (ifrt->ifrt_type & IRE_INTERFACE) { 1624 rfq = NULL; 1625 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1626 ? ipif->ipif_rq : ipif->ipif_wq; 1627 src_addr = (ifrt->ifrt_flags & RTF_SETSRC) 1628 ? &ifrt->ifrt_v6src_addr 1629 : &ipif->ipif_v6src_addr; 1630 gateway_addr = NULL; 1631 type = ipif->ipif_net_type; 1632 } else { 1633 rfq = NULL; 1634 stq = NULL; 1635 src_addr = (ifrt->ifrt_flags & RTF_SETSRC) 1636 ? &ifrt->ifrt_v6src_addr : NULL; 1637 gateway_addr = &ifrt->ifrt_v6gateway_addr; 1638 type = ifrt->ifrt_type; 1639 } 1640 1641 /* 1642 * Create a copy of the IRE with the saved address and netmask. 1643 */ 1644 ip1dbg(("ipif_recover_ire_v6: creating IRE %s (%d) for %s/%d\n", 1645 ip_nv_lookup(ire_nv_tbl, ifrt->ifrt_type), ifrt->ifrt_type, 1646 inet_ntop(AF_INET6, &ifrt->ifrt_v6addr, buf, sizeof (buf)), 1647 ip_mask_to_plen_v6(&ifrt->ifrt_v6mask))); 1648 ire = ire_create_v6( 1649 &ifrt->ifrt_v6addr, 1650 &ifrt->ifrt_v6mask, 1651 src_addr, 1652 gateway_addr, 1653 &ifrt->ifrt_max_frag, 1654 NULL, 1655 rfq, 1656 stq, 1657 type, 1658 ipif, 1659 NULL, 1660 0, 1661 0, 1662 ifrt->ifrt_flags, 1663 &ifrt->ifrt_iulp_info, 1664 NULL, 1665 NULL, 1666 ipst); 1667 if (ire == NULL) { 1668 mutex_exit(&ipif->ipif_saved_ire_lock); 1669 kmem_free(ipif_saved_irep, 1670 ipif->ipif_saved_ire_cnt * sizeof (ire_t *)); 1671 return (NULL); 1672 } 1673 1674 /* 1675 * Some software (for example, GateD and Sun Cluster) attempts 1676 * to create (what amount to) IRE_PREFIX routes with the 1677 * loopback address as the gateway. This is primarily done to 1678 * set up prefixes with the RTF_REJECT flag set (for example, 1679 * when generating aggregate routes.) 1680 * 1681 * If the IRE type (as defined by ipif->ipif_net_type) is 1682 * IRE_LOOPBACK, then we map the request into a 1683 * IRE_IF_NORESOLVER. 1684 */ 1685 if (ipif->ipif_net_type == IRE_LOOPBACK) 1686 ire->ire_type = IRE_IF_NORESOLVER; 1687 /* 1688 * ire held by ire_add, will be refreled' in ipif_up_done 1689 * towards the end 1690 */ 1691 (void) ire_add(&ire, NULL, NULL, NULL, B_FALSE); 1692 *irep = ire; 1693 irep++; 1694 ip1dbg(("ipif_recover_ire_v6: added ire %p\n", (void *)ire)); 1695 } 1696 mutex_exit(&ipif->ipif_saved_ire_lock); 1697 return (ipif_saved_irep); 1698 } 1699 1700 /* 1701 * Return the scope of the given IPv6 address. If the address is an 1702 * IPv4 mapped IPv6 address, return the scope of the corresponding 1703 * IPv4 address. 1704 */ 1705 in6addr_scope_t 1706 ip_addr_scope_v6(const in6_addr_t *addr) 1707 { 1708 static in6_addr_t ipv6loopback = IN6ADDR_LOOPBACK_INIT; 1709 1710 if (IN6_IS_ADDR_V4MAPPED(addr)) { 1711 in_addr_t v4addr_h = ntohl(V4_PART_OF_V6((*addr))); 1712 if ((v4addr_h >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 1713 (v4addr_h & IN_AUTOCONF_MASK) == IN_AUTOCONF_NET) 1714 return (IP6_SCOPE_LINKLOCAL); 1715 if ((v4addr_h & IN_PRIVATE8_MASK) == IN_PRIVATE8_NET || 1716 (v4addr_h & IN_PRIVATE12_MASK) == IN_PRIVATE12_NET || 1717 (v4addr_h & IN_PRIVATE16_MASK) == IN_PRIVATE16_NET) 1718 return (IP6_SCOPE_SITELOCAL); 1719 return (IP6_SCOPE_GLOBAL); 1720 } 1721 1722 if (IN6_IS_ADDR_MULTICAST(addr)) 1723 return (IN6_ADDR_MC_SCOPE(addr)); 1724 1725 /* link-local and loopback addresses are of link-local scope */ 1726 if (IN6_IS_ADDR_LINKLOCAL(addr) || 1727 IN6_ARE_ADDR_EQUAL(addr, &ipv6loopback)) 1728 return (IP6_SCOPE_LINKLOCAL); 1729 if (IN6_IS_ADDR_SITELOCAL(addr)) 1730 return (IP6_SCOPE_SITELOCAL); 1731 return (IP6_SCOPE_GLOBAL); 1732 } 1733 1734 1735 /* 1736 * Returns the length of the common prefix of a1 and a2, as per 1737 * CommonPrefixLen() defined in RFC 3484. 1738 */ 1739 static int 1740 ip_common_prefix_v6(const in6_addr_t *a1, const in6_addr_t *a2) 1741 { 1742 int i; 1743 uint32_t a1val, a2val, mask; 1744 1745 for (i = 0; i < 4; i++) { 1746 if ((a1val = a1->s6_addr32[i]) != (a2val = a2->s6_addr32[i])) { 1747 a1val ^= a2val; 1748 i *= 32; 1749 mask = 0x80000000u; 1750 while (!(a1val & mask)) { 1751 mask >>= 1; 1752 i++; 1753 } 1754 return (i); 1755 } 1756 } 1757 return (IPV6_ABITS); 1758 } 1759 1760 #define IPIF_VALID_IPV6_SOURCE(ipif) \ 1761 (((ipif)->ipif_flags & IPIF_UP) && \ 1762 !((ipif)->ipif_flags & (IPIF_NOLOCAL|IPIF_ANYCAST)) && \ 1763 (ipif)->ipif_addr_ready) 1764 1765 /* source address candidate */ 1766 typedef struct candidate { 1767 ipif_t *cand_ipif; 1768 /* The properties of this candidate */ 1769 boolean_t cand_isdst; 1770 boolean_t cand_isdst_set; 1771 in6addr_scope_t cand_scope; 1772 boolean_t cand_scope_set; 1773 boolean_t cand_isdeprecated; 1774 boolean_t cand_isdeprecated_set; 1775 boolean_t cand_ispreferred; 1776 boolean_t cand_ispreferred_set; 1777 boolean_t cand_matchedinterface; 1778 boolean_t cand_matchedinterface_set; 1779 boolean_t cand_matchedlabel; 1780 boolean_t cand_matchedlabel_set; 1781 boolean_t cand_istmp; 1782 boolean_t cand_istmp_set; 1783 int cand_common_pref; 1784 boolean_t cand_common_pref_set; 1785 boolean_t cand_pref_eq; 1786 boolean_t cand_pref_eq_set; 1787 int cand_pref_len; 1788 boolean_t cand_pref_len_set; 1789 } cand_t; 1790 #define cand_srcaddr cand_ipif->ipif_v6lcl_addr 1791 #define cand_mask cand_ipif->ipif_v6net_mask 1792 #define cand_flags cand_ipif->ipif_flags 1793 #define cand_ill cand_ipif->ipif_ill 1794 #define cand_zoneid cand_ipif->ipif_zoneid 1795 1796 /* information about the destination for source address selection */ 1797 typedef struct dstinfo { 1798 const in6_addr_t *dst_addr; 1799 ill_t *dst_ill; 1800 uint_t dst_restrict_ill; 1801 boolean_t dst_prefer_src_tmp; 1802 in6addr_scope_t dst_scope; 1803 char *dst_label; 1804 } dstinfo_t; 1805 1806 /* 1807 * The following functions are rules used to select a source address in 1808 * ipif_select_source_v6(). Each rule compares a current candidate (cc) 1809 * against the best candidate (bc). Each rule has three possible outcomes; 1810 * the candidate is preferred over the best candidate (CAND_PREFER), the 1811 * candidate is not preferred over the best candidate (CAND_AVOID), or the 1812 * candidate is of equal value as the best candidate (CAND_TIE). 1813 * 1814 * These rules are part of a greater "Default Address Selection for IPv6" 1815 * sheme, which is standards based work coming out of the IETF ipv6 working 1816 * group. The IETF document defines both IPv6 source address selection and 1817 * destination address ordering. The rules defined here implement the IPv6 1818 * source address selection. Destination address ordering is done by 1819 * libnsl, and uses a similar set of rules to implement the sorting. 1820 * 1821 * Most of the rules are defined by the RFC and are not typically altered. The 1822 * last rule, number 8, has language that allows for local preferences. In the 1823 * scheme below, this means that new Solaris rules should normally go between 1824 * rule_ifprefix and rule_prefix. 1825 */ 1826 typedef enum {CAND_AVOID, CAND_TIE, CAND_PREFER} rule_res_t; 1827 typedef rule_res_t (*rulef_t)(cand_t *, cand_t *, const dstinfo_t *, 1828 ip_stack_t *); 1829 1830 /* Prefer an address if it is equal to the destination address. */ 1831 /* ARGSUSED3 */ 1832 static rule_res_t 1833 rule_isdst(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 1834 { 1835 if (!bc->cand_isdst_set) { 1836 bc->cand_isdst = 1837 IN6_ARE_ADDR_EQUAL(&bc->cand_srcaddr, dstinfo->dst_addr); 1838 bc->cand_isdst_set = B_TRUE; 1839 } 1840 1841 cc->cand_isdst = 1842 IN6_ARE_ADDR_EQUAL(&cc->cand_srcaddr, dstinfo->dst_addr); 1843 cc->cand_isdst_set = B_TRUE; 1844 1845 if (cc->cand_isdst == bc->cand_isdst) 1846 return (CAND_TIE); 1847 else if (cc->cand_isdst) 1848 return (CAND_PREFER); 1849 else 1850 return (CAND_AVOID); 1851 } 1852 1853 /* 1854 * Prefer addresses that are of closest scope to the destination. Always 1855 * prefer addresses that are of greater scope than the destination over 1856 * those that are of lesser scope than the destination. 1857 */ 1858 /* ARGSUSED3 */ 1859 static rule_res_t 1860 rule_scope(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 1861 { 1862 if (!bc->cand_scope_set) { 1863 bc->cand_scope = ip_addr_scope_v6(&bc->cand_srcaddr); 1864 bc->cand_scope_set = B_TRUE; 1865 } 1866 1867 cc->cand_scope = ip_addr_scope_v6(&cc->cand_srcaddr); 1868 cc->cand_scope_set = B_TRUE; 1869 1870 if (cc->cand_scope < bc->cand_scope) { 1871 if (cc->cand_scope < dstinfo->dst_scope) 1872 return (CAND_AVOID); 1873 else 1874 return (CAND_PREFER); 1875 } else if (bc->cand_scope < cc->cand_scope) { 1876 if (bc->cand_scope < dstinfo->dst_scope) 1877 return (CAND_PREFER); 1878 else 1879 return (CAND_AVOID); 1880 } else { 1881 return (CAND_TIE); 1882 } 1883 } 1884 1885 /* 1886 * Prefer non-deprecated source addresses. 1887 */ 1888 /* ARGSUSED2 */ 1889 static rule_res_t 1890 rule_deprecated(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1891 ip_stack_t *ipst) 1892 { 1893 if (!bc->cand_isdeprecated_set) { 1894 bc->cand_isdeprecated = 1895 ((bc->cand_flags & IPIF_DEPRECATED) != 0); 1896 bc->cand_isdeprecated_set = B_TRUE; 1897 } 1898 1899 cc->cand_isdeprecated = ((cc->cand_flags & IPIF_DEPRECATED) != 0); 1900 cc->cand_isdeprecated_set = B_TRUE; 1901 1902 if (bc->cand_isdeprecated == cc->cand_isdeprecated) 1903 return (CAND_TIE); 1904 else if (cc->cand_isdeprecated) 1905 return (CAND_AVOID); 1906 else 1907 return (CAND_PREFER); 1908 } 1909 1910 /* 1911 * Prefer source addresses that have the IPIF_PREFERRED flag set. This 1912 * rule must be before rule_interface because the flag could be set on any 1913 * interface, not just the interface being used for outgoing packets (for 1914 * example, the IFF_PREFERRED could be set on an address assigned to the 1915 * loopback interface). 1916 */ 1917 /* ARGSUSED2 */ 1918 static rule_res_t 1919 rule_preferred(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1920 ip_stack_t *ipst) 1921 { 1922 if (!bc->cand_ispreferred_set) { 1923 bc->cand_ispreferred = ((bc->cand_flags & IPIF_PREFERRED) != 0); 1924 bc->cand_ispreferred_set = B_TRUE; 1925 } 1926 1927 cc->cand_ispreferred = ((cc->cand_flags & IPIF_PREFERRED) != 0); 1928 cc->cand_ispreferred_set = B_TRUE; 1929 1930 if (bc->cand_ispreferred == cc->cand_ispreferred) 1931 return (CAND_TIE); 1932 else if (cc->cand_ispreferred) 1933 return (CAND_PREFER); 1934 else 1935 return (CAND_AVOID); 1936 } 1937 1938 /* 1939 * Prefer source addresses that are assigned to the outgoing interface, or 1940 * to an interface that is in the same IPMP group as the outgoing 1941 * interface. 1942 */ 1943 /* ARGSUSED3 */ 1944 static rule_res_t 1945 rule_interface(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1946 ip_stack_t *ipst) 1947 { 1948 ill_t *dstill = dstinfo->dst_ill; 1949 1950 /* 1951 * If dstinfo->dst_restrict_ill is set, this rule is unnecessary 1952 * since we know all candidates will be on the same link. 1953 */ 1954 if (dstinfo->dst_restrict_ill) 1955 return (CAND_TIE); 1956 1957 if (!bc->cand_matchedinterface_set) { 1958 bc->cand_matchedinterface = (bc->cand_ill == dstill || 1959 (dstill->ill_group != NULL && 1960 dstill->ill_group == bc->cand_ill->ill_group)); 1961 bc->cand_matchedinterface_set = B_TRUE; 1962 } 1963 1964 cc->cand_matchedinterface = (cc->cand_ill == dstill || 1965 (dstill->ill_group != NULL && 1966 dstill->ill_group == cc->cand_ill->ill_group)); 1967 cc->cand_matchedinterface_set = B_TRUE; 1968 1969 if (bc->cand_matchedinterface == cc->cand_matchedinterface) 1970 return (CAND_TIE); 1971 else if (cc->cand_matchedinterface) 1972 return (CAND_PREFER); 1973 else 1974 return (CAND_AVOID); 1975 } 1976 1977 /* 1978 * Prefer source addresses whose label matches the destination's label. 1979 */ 1980 static rule_res_t 1981 rule_label(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 1982 { 1983 char *label; 1984 1985 if (!bc->cand_matchedlabel_set) { 1986 label = ip6_asp_lookup(&bc->cand_srcaddr, NULL, ipst); 1987 bc->cand_matchedlabel = 1988 ip6_asp_labelcmp(label, dstinfo->dst_label); 1989 bc->cand_matchedlabel_set = B_TRUE; 1990 } 1991 1992 label = ip6_asp_lookup(&cc->cand_srcaddr, NULL, ipst); 1993 cc->cand_matchedlabel = ip6_asp_labelcmp(label, dstinfo->dst_label); 1994 cc->cand_matchedlabel_set = B_TRUE; 1995 1996 if (bc->cand_matchedlabel == cc->cand_matchedlabel) 1997 return (CAND_TIE); 1998 else if (cc->cand_matchedlabel) 1999 return (CAND_PREFER); 2000 else 2001 return (CAND_AVOID); 2002 } 2003 2004 /* 2005 * Prefer public addresses over temporary ones. An application can reverse 2006 * the logic of this rule and prefer temporary addresses by using the 2007 * IPV6_SRC_PREFERENCES socket option. 2008 */ 2009 /* ARGSUSED3 */ 2010 static rule_res_t 2011 rule_temporary(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 2012 ip_stack_t *ipst) 2013 { 2014 if (!bc->cand_istmp_set) { 2015 bc->cand_istmp = ((bc->cand_flags & IPIF_TEMPORARY) != 0); 2016 bc->cand_istmp_set = B_TRUE; 2017 } 2018 2019 cc->cand_istmp = ((cc->cand_flags & IPIF_TEMPORARY) != 0); 2020 cc->cand_istmp_set = B_TRUE; 2021 2022 if (bc->cand_istmp == cc->cand_istmp) 2023 return (CAND_TIE); 2024 2025 if (dstinfo->dst_prefer_src_tmp && cc->cand_istmp) 2026 return (CAND_PREFER); 2027 else if (!dstinfo->dst_prefer_src_tmp && !cc->cand_istmp) 2028 return (CAND_PREFER); 2029 else 2030 return (CAND_AVOID); 2031 } 2032 2033 /* 2034 * Prefer source addresses with longer matching prefix with the destination 2035 * under the interface mask. This gets us on the same subnet before applying 2036 * any Solaris-specific rules. 2037 */ 2038 /* ARGSUSED3 */ 2039 static rule_res_t 2040 rule_ifprefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 2041 ip_stack_t *ipst) 2042 { 2043 if (!bc->cand_pref_eq_set) { 2044 bc->cand_pref_eq = V6_MASK_EQ_2(bc->cand_srcaddr, 2045 bc->cand_mask, *dstinfo->dst_addr); 2046 bc->cand_pref_eq_set = B_TRUE; 2047 } 2048 2049 cc->cand_pref_eq = V6_MASK_EQ_2(cc->cand_srcaddr, cc->cand_mask, 2050 *dstinfo->dst_addr); 2051 cc->cand_pref_eq_set = B_TRUE; 2052 2053 if (bc->cand_pref_eq) { 2054 if (cc->cand_pref_eq) { 2055 if (!bc->cand_pref_len_set) { 2056 bc->cand_pref_len = 2057 ip_mask_to_plen_v6(&bc->cand_mask); 2058 bc->cand_pref_len_set = B_TRUE; 2059 } 2060 cc->cand_pref_len = ip_mask_to_plen_v6(&cc->cand_mask); 2061 cc->cand_pref_len_set = B_TRUE; 2062 if (bc->cand_pref_len == cc->cand_pref_len) 2063 return (CAND_TIE); 2064 else if (bc->cand_pref_len > cc->cand_pref_len) 2065 return (CAND_AVOID); 2066 else 2067 return (CAND_PREFER); 2068 } else { 2069 return (CAND_AVOID); 2070 } 2071 } else { 2072 if (cc->cand_pref_eq) 2073 return (CAND_PREFER); 2074 else 2075 return (CAND_TIE); 2076 } 2077 } 2078 2079 /* 2080 * Prefer to use zone-specific addresses when possible instead of all-zones 2081 * addresses. 2082 */ 2083 /* ARGSUSED2 */ 2084 static rule_res_t 2085 rule_zone_specific(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 2086 ip_stack_t *ipst) 2087 { 2088 if ((bc->cand_zoneid == ALL_ZONES) == 2089 (cc->cand_zoneid == ALL_ZONES)) 2090 return (CAND_TIE); 2091 else if (cc->cand_zoneid == ALL_ZONES) 2092 return (CAND_AVOID); 2093 else 2094 return (CAND_PREFER); 2095 } 2096 2097 /* 2098 * Prefer to use DHCPv6 (first) and static addresses (second) when possible 2099 * instead of statelessly autoconfigured addresses. 2100 * 2101 * This is done after trying all other preferences (and before the final tie 2102 * breaker) so that, if all else is equal, we select addresses configured by 2103 * DHCPv6 over other addresses. We presume that DHCPv6 addresses, unlike 2104 * stateless autoconfigured addresses, are deliberately configured by an 2105 * administrator, and thus are correctly set up in DNS and network packet 2106 * filters. 2107 */ 2108 /* ARGSUSED2 */ 2109 static rule_res_t 2110 rule_addr_type(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 2111 ip_stack_t *ipst) 2112 { 2113 #define ATYPE(x) \ 2114 ((x) & IPIF_DHCPRUNNING) ? 1 : ((x) & IPIF_ADDRCONF) ? 3 : 2 2115 int bcval = ATYPE(bc->cand_flags); 2116 int ccval = ATYPE(cc->cand_flags); 2117 #undef ATYPE 2118 2119 if (bcval == ccval) 2120 return (CAND_TIE); 2121 else if (ccval < bcval) 2122 return (CAND_PREFER); 2123 else 2124 return (CAND_AVOID); 2125 } 2126 2127 /* 2128 * Prefer source addresses with longer matching prefix with the destination. 2129 * We do the longest matching prefix calculation by doing an xor of both 2130 * addresses with the destination, and pick the address with the longest string 2131 * of leading zeros, as per CommonPrefixLen() defined in RFC 3484. 2132 */ 2133 /* ARGSUSED3 */ 2134 static rule_res_t 2135 rule_prefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 2136 { 2137 if (!bc->cand_common_pref_set) { 2138 bc->cand_common_pref = ip_common_prefix_v6(&bc->cand_srcaddr, 2139 dstinfo->dst_addr); 2140 bc->cand_common_pref_set = B_TRUE; 2141 } 2142 2143 cc->cand_common_pref = ip_common_prefix_v6(&cc->cand_srcaddr, 2144 dstinfo->dst_addr); 2145 cc->cand_common_pref_set = B_TRUE; 2146 2147 if (bc->cand_common_pref == cc->cand_common_pref) 2148 return (CAND_TIE); 2149 else if (bc->cand_common_pref > cc->cand_common_pref) 2150 return (CAND_AVOID); 2151 else 2152 return (CAND_PREFER); 2153 } 2154 2155 /* 2156 * Last rule: we must pick something, so just prefer the current best 2157 * candidate. 2158 */ 2159 /* ARGSUSED */ 2160 static rule_res_t 2161 rule_must_be_last(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 2162 ip_stack_t *ipst) 2163 { 2164 return (CAND_AVOID); 2165 } 2166 2167 /* 2168 * Determine the best source address given a destination address and a 2169 * destination ill. If no suitable source address is found, it returns 2170 * NULL. If there is a usable address pointed to by the usesrc 2171 * (i.e ill_usesrc_ifindex != 0) then return that first since it is more 2172 * fine grained (i.e per interface) 2173 * 2174 * This implementation is based on the "Default Address Selection for IPv6" 2175 * specification produced by the IETF IPv6 working group. It has been 2176 * implemented so that the list of addresses is only traversed once (the 2177 * specification's algorithm could traverse the list of addresses once for 2178 * every rule). 2179 * 2180 * The restrict_ill argument restricts the algorithm to chose a source 2181 * address that is assigned to the destination ill or an ill in the same 2182 * IPMP group as the destination ill. This is used when the destination 2183 * address is a link-local or multicast address, and when 2184 * ipv6_strict_dst_multihoming is turned on. 2185 * 2186 * src_prefs is the caller's set of source address preferences. If source 2187 * address selection is being called to determine the source address of a 2188 * connected socket (from ip_bind_connected_v6()), then the preferences are 2189 * taken from conn_src_preferences. These preferences can be set on a 2190 * per-socket basis using the IPV6_SRC_PREFERENCES socket option. The only 2191 * preference currently implemented is for rfc3041 temporary addresses. 2192 */ 2193 ipif_t * 2194 ipif_select_source_v6(ill_t *dstill, const in6_addr_t *dst, 2195 uint_t restrict_ill, uint32_t src_prefs, zoneid_t zoneid) 2196 { 2197 dstinfo_t dstinfo; 2198 char dstr[INET6_ADDRSTRLEN]; 2199 char sstr[INET6_ADDRSTRLEN]; 2200 ipif_t *ipif; 2201 ill_t *ill, *usesrc_ill = NULL; 2202 ill_walk_context_t ctx; 2203 cand_t best_c; /* The best candidate */ 2204 cand_t curr_c; /* The current candidate */ 2205 uint_t index; 2206 boolean_t first_candidate = B_TRUE; 2207 rule_res_t rule_result; 2208 tsol_tpc_t *src_rhtp, *dst_rhtp; 2209 ip_stack_t *ipst = dstill->ill_ipst; 2210 2211 /* 2212 * The list of ordering rules. They are applied in the order they 2213 * appear in the list. 2214 * 2215 * Solaris doesn't currently support Mobile IPv6, so there's no 2216 * rule_mipv6 corresponding to rule 4 in the specification. 2217 */ 2218 rulef_t rules[] = { 2219 rule_isdst, 2220 rule_scope, 2221 rule_deprecated, 2222 rule_preferred, 2223 rule_interface, 2224 rule_label, 2225 rule_temporary, 2226 rule_ifprefix, /* local rules after this */ 2227 rule_zone_specific, 2228 rule_addr_type, 2229 rule_prefix, /* local rules before this */ 2230 rule_must_be_last, /* must always be last */ 2231 NULL 2232 }; 2233 2234 ASSERT(dstill->ill_isv6); 2235 ASSERT(!IN6_IS_ADDR_V4MAPPED(dst)); 2236 2237 /* 2238 * Check if there is a usable src address pointed to by the 2239 * usesrc ifindex. This has higher precedence since it is 2240 * finer grained (i.e per interface) v/s being system wide. 2241 */ 2242 if (dstill->ill_usesrc_ifindex != 0) { 2243 if ((usesrc_ill = 2244 ill_lookup_on_ifindex(dstill->ill_usesrc_ifindex, B_TRUE, 2245 NULL, NULL, NULL, NULL, ipst)) != NULL) { 2246 dstinfo.dst_ill = usesrc_ill; 2247 } else { 2248 return (NULL); 2249 } 2250 } else { 2251 dstinfo.dst_ill = dstill; 2252 } 2253 2254 /* 2255 * If we're dealing with an unlabeled destination on a labeled system, 2256 * make sure that we ignore source addresses that are incompatible with 2257 * the destination's default label. That destination's default label 2258 * must dominate the minimum label on the source address. 2259 * 2260 * (Note that this has to do with Trusted Solaris. It's not related to 2261 * the labels described by ip6_asp_lookup.) 2262 */ 2263 dst_rhtp = NULL; 2264 if (is_system_labeled()) { 2265 dst_rhtp = find_tpc(dst, IPV6_VERSION, B_FALSE); 2266 if (dst_rhtp == NULL) 2267 return (NULL); 2268 if (dst_rhtp->tpc_tp.host_type != UNLABELED) { 2269 TPC_RELE(dst_rhtp); 2270 dst_rhtp = NULL; 2271 } 2272 } 2273 2274 dstinfo.dst_addr = dst; 2275 dstinfo.dst_scope = ip_addr_scope_v6(dst); 2276 dstinfo.dst_label = ip6_asp_lookup(dst, NULL, ipst); 2277 dstinfo.dst_prefer_src_tmp = ((src_prefs & IPV6_PREFER_SRC_TMP) != 0); 2278 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2279 /* 2280 * Section three of the I-D states that for multicast and 2281 * link-local destinations, the candidate set must be restricted to 2282 * an interface that is on the same link as the outgoing interface. 2283 * Also, when ipv6_strict_dst_multihoming is turned on, always 2284 * restrict the source address to the destination link as doing 2285 * otherwise will almost certainly cause problems. 2286 */ 2287 if (IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst) || 2288 ipst->ips_ipv6_strict_dst_multihoming || usesrc_ill != NULL) { 2289 if (restrict_ill == RESTRICT_TO_NONE) 2290 dstinfo.dst_restrict_ill = RESTRICT_TO_GROUP; 2291 else 2292 dstinfo.dst_restrict_ill = restrict_ill; 2293 } else { 2294 dstinfo.dst_restrict_ill = restrict_ill; 2295 } 2296 2297 bzero(&best_c, sizeof (cand_t)); 2298 2299 /* 2300 * Take a pass through the list of IPv6 interfaces to chose the 2301 * best possible source address. If restrict_ill is true, we only 2302 * iterate through the ill's that are in the same IPMP group as the 2303 * destination's outgoing ill. If restrict_ill is false, we walk 2304 * the entire list of IPv6 ill's. 2305 */ 2306 if (dstinfo.dst_restrict_ill != RESTRICT_TO_NONE) { 2307 if (dstinfo.dst_ill->ill_group != NULL && 2308 dstinfo.dst_restrict_ill == RESTRICT_TO_GROUP) { 2309 ill = dstinfo.dst_ill->ill_group->illgrp_ill; 2310 } else { 2311 ill = dstinfo.dst_ill; 2312 } 2313 } else { 2314 ill = ILL_START_WALK_V6(&ctx, ipst); 2315 } 2316 2317 while (ill != NULL) { 2318 ASSERT(ill->ill_isv6); 2319 2320 /* 2321 * Avoid FAILED/OFFLINE ills. 2322 * Global and site local addresses will failover and 2323 * will be available on the new ill. 2324 * But link local addresses don't move. 2325 */ 2326 if (dstinfo.dst_restrict_ill != RESTRICT_TO_ILL && 2327 ill->ill_phyint->phyint_flags & 2328 (PHYI_OFFLINE | PHYI_FAILED)) 2329 goto next_ill; 2330 2331 for (ipif = ill->ill_ipif; ipif != NULL; 2332 ipif = ipif->ipif_next) { 2333 2334 if (!IPIF_VALID_IPV6_SOURCE(ipif)) 2335 continue; 2336 2337 if (zoneid != ALL_ZONES && 2338 ipif->ipif_zoneid != zoneid && 2339 ipif->ipif_zoneid != ALL_ZONES) 2340 continue; 2341 2342 /* 2343 * Check compatibility of local address for 2344 * destination's default label if we're on a labeled 2345 * system. Incompatible addresses can't be used at 2346 * all and must be skipped over. 2347 */ 2348 if (dst_rhtp != NULL) { 2349 boolean_t incompat; 2350 2351 src_rhtp = find_tpc(&ipif->ipif_v6lcl_addr, 2352 IPV6_VERSION, B_FALSE); 2353 if (src_rhtp == NULL) 2354 continue; 2355 incompat = 2356 src_rhtp->tpc_tp.host_type != SUN_CIPSO || 2357 src_rhtp->tpc_tp.tp_doi != 2358 dst_rhtp->tpc_tp.tp_doi || 2359 (!_blinrange(&dst_rhtp->tpc_tp.tp_def_label, 2360 &src_rhtp->tpc_tp.tp_sl_range_cipso) && 2361 !blinlset(&dst_rhtp->tpc_tp.tp_def_label, 2362 src_rhtp->tpc_tp.tp_sl_set_cipso)); 2363 TPC_RELE(src_rhtp); 2364 if (incompat) 2365 continue; 2366 } 2367 2368 if (first_candidate) { 2369 /* 2370 * This is first valid address in the list. 2371 * It is automatically the best candidate 2372 * so far. 2373 */ 2374 best_c.cand_ipif = ipif; 2375 first_candidate = B_FALSE; 2376 continue; 2377 } 2378 2379 bzero(&curr_c, sizeof (cand_t)); 2380 curr_c.cand_ipif = ipif; 2381 2382 /* 2383 * Compare this current candidate (curr_c) with the 2384 * best candidate (best_c) by applying the 2385 * comparison rules in order until one breaks the 2386 * tie. 2387 */ 2388 for (index = 0; rules[index] != NULL; index++) { 2389 /* Apply a comparison rule. */ 2390 rule_result = 2391 (rules[index])(&best_c, &curr_c, &dstinfo, 2392 ipst); 2393 if (rule_result == CAND_AVOID) { 2394 /* 2395 * The best candidate is still the 2396 * best candidate. Forget about 2397 * this current candidate and go on 2398 * to the next one. 2399 */ 2400 break; 2401 } else if (rule_result == CAND_PREFER) { 2402 /* 2403 * This candidate is prefered. It 2404 * becomes the best candidate so 2405 * far. Go on to the next address. 2406 */ 2407 best_c = curr_c; 2408 break; 2409 } 2410 /* We have a tie, apply the next rule. */ 2411 } 2412 2413 /* 2414 * The last rule must be a tie breaker rule and 2415 * must never produce a tie. At this point, the 2416 * candidate should have either been rejected, or 2417 * have been prefered as the best candidate so far. 2418 */ 2419 ASSERT(rule_result != CAND_TIE); 2420 } 2421 2422 /* 2423 * We may be walking the linked-list of ill's in an 2424 * IPMP group or traversing the IPv6 ill avl tree. If it is a 2425 * usesrc ILL then it can't be part of IPMP group and we 2426 * will exit the while loop. 2427 */ 2428 next_ill: 2429 if (dstinfo.dst_restrict_ill == RESTRICT_TO_ILL) 2430 ill = NULL; 2431 else if (dstinfo.dst_restrict_ill == RESTRICT_TO_GROUP) 2432 ill = ill->ill_group_next; 2433 else 2434 ill = ill_next(&ctx, ill); 2435 } 2436 2437 ipif = best_c.cand_ipif; 2438 ip1dbg(("ipif_select_source_v6(%s, %s) -> %s\n", 2439 dstinfo.dst_ill->ill_name, 2440 inet_ntop(AF_INET6, dstinfo.dst_addr, dstr, sizeof (dstr)), 2441 (ipif == NULL ? "NULL" : 2442 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, sstr, sizeof (sstr))))); 2443 2444 if (usesrc_ill != NULL) 2445 ill_refrele(usesrc_ill); 2446 2447 if (dst_rhtp != NULL) 2448 TPC_RELE(dst_rhtp); 2449 2450 if (ipif == NULL) { 2451 rw_exit(&ipst->ips_ill_g_lock); 2452 return (NULL); 2453 } 2454 2455 mutex_enter(&ipif->ipif_ill->ill_lock); 2456 if (IPIF_CAN_LOOKUP(ipif)) { 2457 ipif_refhold_locked(ipif); 2458 mutex_exit(&ipif->ipif_ill->ill_lock); 2459 rw_exit(&ipst->ips_ill_g_lock); 2460 return (ipif); 2461 } 2462 mutex_exit(&ipif->ipif_ill->ill_lock); 2463 rw_exit(&ipst->ips_ill_g_lock); 2464 ip1dbg(("ipif_select_source_v6 cannot lookup ipif %p" 2465 " returning null \n", (void *)ipif)); 2466 2467 return (NULL); 2468 } 2469 2470 /* 2471 * If old_ipif is not NULL, see if ipif was derived from old 2472 * ipif and if so, recreate the interface route by re-doing 2473 * source address selection. This happens when ipif_down -> 2474 * ipif_update_other_ipifs calls us. 2475 * 2476 * If old_ipif is NULL, just redo the source address selection 2477 * if needed. This happens when illgrp_insert or ipif_up_done_v6 2478 * calls us. 2479 */ 2480 void 2481 ipif_recreate_interface_routes_v6(ipif_t *old_ipif, ipif_t *ipif) 2482 { 2483 ire_t *ire; 2484 ire_t *ipif_ire; 2485 queue_t *stq; 2486 ill_t *ill; 2487 ipif_t *nipif = NULL; 2488 boolean_t nipif_refheld = B_FALSE; 2489 boolean_t ip6_asp_table_held = B_FALSE; 2490 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2491 2492 ill = ipif->ipif_ill; 2493 2494 if (!(ipif->ipif_flags & 2495 (IPIF_NOLOCAL|IPIF_ANYCAST|IPIF_DEPRECATED))) { 2496 /* 2497 * Can't possibly have borrowed the source 2498 * from old_ipif. 2499 */ 2500 return; 2501 } 2502 2503 /* 2504 * Is there any work to be done? No work if the address 2505 * is INADDR_ANY, loopback or NOLOCAL or ANYCAST ( 2506 * ipif_select_source_v6() does not borrow addresses from 2507 * NOLOCAL and ANYCAST interfaces). 2508 */ 2509 if ((old_ipif != NULL) && 2510 ((IN6_IS_ADDR_UNSPECIFIED(&old_ipif->ipif_v6lcl_addr)) || 2511 (old_ipif->ipif_ill->ill_wq == NULL) || 2512 (old_ipif->ipif_flags & 2513 (IPIF_NOLOCAL|IPIF_ANYCAST)))) { 2514 return; 2515 } 2516 2517 /* 2518 * Perform the same checks as when creating the 2519 * IRE_INTERFACE in ipif_up_done_v6. 2520 */ 2521 if (!(ipif->ipif_flags & IPIF_UP)) 2522 return; 2523 2524 if ((ipif->ipif_flags & IPIF_NOXMIT)) 2525 return; 2526 2527 if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6subnet) && 2528 IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6net_mask)) 2529 return; 2530 2531 /* 2532 * We know that ipif uses some other source for its 2533 * IRE_INTERFACE. Is it using the source of this 2534 * old_ipif? 2535 */ 2536 ipif_ire = ipif_to_ire_v6(ipif); 2537 if (ipif_ire == NULL) 2538 return; 2539 2540 if (old_ipif != NULL && 2541 !IN6_ARE_ADDR_EQUAL(&old_ipif->ipif_v6lcl_addr, 2542 &ipif_ire->ire_src_addr_v6)) { 2543 ire_refrele(ipif_ire); 2544 return; 2545 } 2546 2547 if (ip_debug > 2) { 2548 /* ip1dbg */ 2549 pr_addr_dbg("ipif_recreate_interface_routes_v6: deleting IRE" 2550 " for src %s\n", AF_INET6, &ipif_ire->ire_src_addr_v6); 2551 } 2552 2553 stq = ipif_ire->ire_stq; 2554 2555 /* 2556 * Can't use our source address. Select a different source address 2557 * for the IRE_INTERFACE. We restrict interface route source 2558 * address selection to ipif's assigned to the same link as the 2559 * interface. 2560 */ 2561 if (ip6_asp_can_lookup(ipst)) { 2562 ip6_asp_table_held = B_TRUE; 2563 nipif = ipif_select_source_v6(ill, &ipif->ipif_v6subnet, 2564 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, 2565 ipif->ipif_zoneid); 2566 } 2567 if (nipif == NULL) { 2568 /* Last resort - all ipif's have IPIF_NOLOCAL */ 2569 nipif = ipif; 2570 } else { 2571 nipif_refheld = B_TRUE; 2572 } 2573 2574 ire = ire_create_v6( 2575 &ipif->ipif_v6subnet, /* dest pref */ 2576 &ipif->ipif_v6net_mask, /* mask */ 2577 &nipif->ipif_v6src_addr, /* src addr */ 2578 NULL, /* no gateway */ 2579 &ipif->ipif_mtu, /* max frag */ 2580 NULL, /* no src nce */ 2581 NULL, /* no recv from queue */ 2582 stq, /* send-to queue */ 2583 ill->ill_net_type, /* IF_[NO]RESOLVER */ 2584 ipif, 2585 NULL, 2586 0, 2587 0, 2588 0, 2589 &ire_uinfo_null, 2590 NULL, 2591 NULL, 2592 ipst); 2593 2594 if (ire != NULL) { 2595 ire_t *ret_ire; 2596 int error; 2597 2598 /* 2599 * We don't need ipif_ire anymore. We need to delete 2600 * before we add so that ire_add does not detect 2601 * duplicates. 2602 */ 2603 ire_delete(ipif_ire); 2604 ret_ire = ire; 2605 error = ire_add(&ret_ire, NULL, NULL, NULL, B_FALSE); 2606 ASSERT(error == 0); 2607 ASSERT(ret_ire == ire); 2608 if (ret_ire != NULL) { 2609 /* Held in ire_add */ 2610 ire_refrele(ret_ire); 2611 } 2612 } 2613 /* 2614 * Either we are falling through from above or could not 2615 * allocate a replacement. 2616 */ 2617 ire_refrele(ipif_ire); 2618 if (ip6_asp_table_held) 2619 ip6_asp_table_refrele(ipst); 2620 if (nipif_refheld) 2621 ipif_refrele(nipif); 2622 } 2623 2624 /* 2625 * This old_ipif is going away. 2626 * 2627 * Determine if any other ipif's are using our address as 2628 * ipif_v6lcl_addr (due to those being IPIF_NOLOCAL, IPIF_ANYCAST, or 2629 * IPIF_DEPRECATED). 2630 * Find the IRE_INTERFACE for such ipif's and recreate them 2631 * to use an different source address following the rules in 2632 * ipif_up_done_v6. 2633 * 2634 * This function takes an illgrp as an argument so that illgrp_delete 2635 * can call this to update source address even after deleting the 2636 * old_ipif->ipif_ill from the ill group. 2637 */ 2638 void 2639 ipif_update_other_ipifs_v6(ipif_t *old_ipif, ill_group_t *illgrp) 2640 { 2641 ipif_t *ipif; 2642 ill_t *ill; 2643 char buf[INET6_ADDRSTRLEN]; 2644 2645 ASSERT(IAM_WRITER_IPIF(old_ipif)); 2646 2647 ill = old_ipif->ipif_ill; 2648 2649 ip1dbg(("ipif_update_other_ipifs_v6(%s, %s)\n", 2650 ill->ill_name, 2651 inet_ntop(AF_INET6, &old_ipif->ipif_v6lcl_addr, 2652 buf, sizeof (buf)))); 2653 2654 /* 2655 * If this part of a group, look at all ills as ipif_select_source 2656 * borrows a source address across all the ills in the group. 2657 */ 2658 if (illgrp != NULL) 2659 ill = illgrp->illgrp_ill; 2660 2661 /* Don't need a lock since this is a writer */ 2662 for (; ill != NULL; ill = ill->ill_group_next) { 2663 for (ipif = ill->ill_ipif; ipif != NULL; 2664 ipif = ipif->ipif_next) { 2665 2666 if (ipif == old_ipif) 2667 continue; 2668 2669 ipif_recreate_interface_routes_v6(old_ipif, ipif); 2670 } 2671 } 2672 } 2673 2674 /* 2675 * Perform an attach and bind to get phys addr plus info_req for 2676 * the physical device. 2677 * q and mp represents an ioctl which will be queued waiting for 2678 * completion of the DLPI message exchange. 2679 * MUST be called on an ill queue. Can not set conn_pending_ill for that 2680 * reason thus the DL_PHYS_ADDR_ACK code does not assume ill_pending_q. 2681 * 2682 * Returns EINPROGRESS when mp has been consumed by queueing it on 2683 * ill_pending_mp and the ioctl will complete in ip_rput. 2684 */ 2685 int 2686 ill_dl_phys(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q) 2687 { 2688 mblk_t *v6token_mp = NULL; 2689 mblk_t *v6lla_mp = NULL; 2690 mblk_t *phys_mp = NULL; 2691 mblk_t *info_mp = NULL; 2692 mblk_t *attach_mp = NULL; 2693 mblk_t *bind_mp = NULL; 2694 mblk_t *unbind_mp = NULL; 2695 mblk_t *notify_mp = NULL; 2696 2697 ip1dbg(("ill_dl_phys(%s:%u)\n", ill->ill_name, ipif->ipif_id)); 2698 ASSERT(ill->ill_dlpi_style_set); 2699 ASSERT(WR(q)->q_next != NULL); 2700 2701 if (ill->ill_isv6) { 2702 v6token_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2703 sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2704 if (v6token_mp == NULL) 2705 goto bad; 2706 ((dl_phys_addr_req_t *)v6token_mp->b_rptr)->dl_addr_type = 2707 DL_IPV6_TOKEN; 2708 2709 v6lla_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2710 sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2711 if (v6lla_mp == NULL) 2712 goto bad; 2713 ((dl_phys_addr_req_t *)v6lla_mp->b_rptr)->dl_addr_type = 2714 DL_IPV6_LINK_LAYER_ADDR; 2715 } 2716 2717 /* 2718 * Allocate a DL_NOTIFY_REQ and set the notifications we want. 2719 */ 2720 notify_mp = ip_dlpi_alloc(sizeof (dl_notify_req_t) + sizeof (long), 2721 DL_NOTIFY_REQ); 2722 if (notify_mp == NULL) 2723 goto bad; 2724 ((dl_notify_req_t *)notify_mp->b_rptr)->dl_notifications = 2725 (DL_NOTE_PHYS_ADDR | DL_NOTE_SDU_SIZE | DL_NOTE_FASTPATH_FLUSH | 2726 DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN | DL_NOTE_CAPAB_RENEG); 2727 2728 phys_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2729 sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2730 if (phys_mp == NULL) 2731 goto bad; 2732 ((dl_phys_addr_req_t *)phys_mp->b_rptr)->dl_addr_type = 2733 DL_CURR_PHYS_ADDR; 2734 2735 info_mp = ip_dlpi_alloc( 2736 sizeof (dl_info_req_t) + sizeof (dl_info_ack_t), 2737 DL_INFO_REQ); 2738 if (info_mp == NULL) 2739 goto bad; 2740 2741 bind_mp = ip_dlpi_alloc(sizeof (dl_bind_req_t) + sizeof (long), 2742 DL_BIND_REQ); 2743 if (bind_mp == NULL) 2744 goto bad; 2745 ((dl_bind_req_t *)bind_mp->b_rptr)->dl_sap = ill->ill_sap; 2746 ((dl_bind_req_t *)bind_mp->b_rptr)->dl_service_mode = DL_CLDLS; 2747 2748 unbind_mp = ip_dlpi_alloc(sizeof (dl_unbind_req_t), DL_UNBIND_REQ); 2749 if (unbind_mp == NULL) 2750 goto bad; 2751 2752 /* If we need to attach, pre-alloc and initialize the mblk */ 2753 if (ill->ill_needs_attach) { 2754 attach_mp = ip_dlpi_alloc(sizeof (dl_attach_req_t), 2755 DL_ATTACH_REQ); 2756 if (attach_mp == NULL) 2757 goto bad; 2758 ((dl_attach_req_t *)attach_mp->b_rptr)->dl_ppa = ill->ill_ppa; 2759 } 2760 2761 /* 2762 * Here we are going to delay the ioctl ack until after 2763 * ACKs from DL_PHYS_ADDR_REQ. So need to save the 2764 * original ioctl message before sending the requests 2765 */ 2766 mutex_enter(&ill->ill_lock); 2767 /* ipsq_pending_mp_add won't fail since we pass in a NULL connp */ 2768 (void) ipsq_pending_mp_add(NULL, ipif, ill->ill_wq, mp, 0); 2769 /* 2770 * Set ill_phys_addr_pend to zero. It will be set to the addr_type of 2771 * the DL_PHYS_ADDR_REQ in ill_dlpi_send() and ill_dlpi_done(). It will 2772 * be used to track which DL_PHYS_ADDR_REQ is being ACK'd/NAK'd. 2773 */ 2774 ill->ill_phys_addr_pend = 0; 2775 mutex_exit(&ill->ill_lock); 2776 2777 if (attach_mp != NULL) { 2778 ip1dbg(("ill_dl_phys: attach\n")); 2779 ill_dlpi_send(ill, attach_mp); 2780 } 2781 ill_dlpi_send(ill, bind_mp); 2782 ill_dlpi_send(ill, info_mp); 2783 if (ill->ill_isv6) { 2784 ill_dlpi_send(ill, v6token_mp); 2785 ill_dlpi_send(ill, v6lla_mp); 2786 } 2787 ill_dlpi_send(ill, phys_mp); 2788 ill_dlpi_send(ill, notify_mp); 2789 ill_dlpi_send(ill, unbind_mp); 2790 2791 /* 2792 * This operation will complete in ip_rput_dlpi_writer with either 2793 * a DL_PHYS_ADDR_ACK or DL_ERROR_ACK. 2794 */ 2795 return (EINPROGRESS); 2796 bad: 2797 freemsg(v6token_mp); 2798 freemsg(v6lla_mp); 2799 freemsg(phys_mp); 2800 freemsg(info_mp); 2801 freemsg(attach_mp); 2802 freemsg(bind_mp); 2803 freemsg(unbind_mp); 2804 freemsg(notify_mp); 2805 return (ENOMEM); 2806 } 2807 2808 uint_t ip_loopback_mtu_v6plus = IP_LOOPBACK_MTU + IPV6_HDR_LEN + 20; 2809 2810 /* 2811 * DLPI is up. 2812 * Create all the IREs associated with an interface bring up multicast. 2813 * Set the interface flag and finish other initialization 2814 * that potentially had to be differed to after DL_BIND_ACK. 2815 */ 2816 int 2817 ipif_up_done_v6(ipif_t *ipif) 2818 { 2819 ire_t *ire_array[20]; 2820 ire_t **irep = ire_array; 2821 ire_t **irep1; 2822 ill_t *ill = ipif->ipif_ill; 2823 queue_t *stq; 2824 in6_addr_t v6addr; 2825 in6_addr_t route_mask; 2826 ipif_t *src_ipif = NULL; 2827 ipif_t *tmp_ipif; 2828 boolean_t flush_ire_cache = B_TRUE; 2829 int err; 2830 char buf[INET6_ADDRSTRLEN]; 2831 phyint_t *phyi; 2832 ire_t **ipif_saved_irep = NULL; 2833 int ipif_saved_ire_cnt; 2834 int cnt; 2835 boolean_t src_ipif_held = B_FALSE; 2836 boolean_t ire_added = B_FALSE; 2837 boolean_t loopback = B_FALSE; 2838 boolean_t ip6_asp_table_held = B_FALSE; 2839 ip_stack_t *ipst = ill->ill_ipst; 2840 2841 ip1dbg(("ipif_up_done_v6(%s:%u)\n", 2842 ipif->ipif_ill->ill_name, ipif->ipif_id)); 2843 2844 /* Check if this is a loopback interface */ 2845 if (ipif->ipif_ill->ill_wq == NULL) 2846 loopback = B_TRUE; 2847 2848 ASSERT(ipif->ipif_isv6); 2849 ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock)); 2850 2851 /* 2852 * If all other interfaces for this ill are down or DEPRECATED, 2853 * or otherwise unsuitable for source address selection, remove 2854 * any IRE_CACHE entries for this ill to make sure source 2855 * address selection gets to take this new ipif into account. 2856 * No need to hold ill_lock while traversing the ipif list since 2857 * we are writer 2858 */ 2859 for (tmp_ipif = ill->ill_ipif; tmp_ipif; 2860 tmp_ipif = tmp_ipif->ipif_next) { 2861 if (((tmp_ipif->ipif_flags & 2862 (IPIF_NOXMIT|IPIF_ANYCAST|IPIF_NOLOCAL|IPIF_DEPRECATED)) || 2863 !(tmp_ipif->ipif_flags & IPIF_UP)) || 2864 (tmp_ipif == ipif)) 2865 continue; 2866 /* first useable pre-existing interface */ 2867 flush_ire_cache = B_FALSE; 2868 break; 2869 } 2870 if (flush_ire_cache) 2871 ire_walk_ill_v6(MATCH_IRE_ILL_GROUP | MATCH_IRE_TYPE, 2872 IRE_CACHE, ill_ipif_cache_delete, (char *)ill, ill); 2873 2874 /* 2875 * Figure out which way the send-to queue should go. Only 2876 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER should show up here. 2877 */ 2878 switch (ill->ill_net_type) { 2879 case IRE_IF_RESOLVER: 2880 stq = ill->ill_rq; 2881 break; 2882 case IRE_IF_NORESOLVER: 2883 case IRE_LOOPBACK: 2884 stq = ill->ill_wq; 2885 break; 2886 default: 2887 return (EINVAL); 2888 } 2889 2890 if (IS_LOOPBACK(ill)) { 2891 /* 2892 * lo0:1 and subsequent ipifs were marked IRE_LOCAL in 2893 * ipif_lookup_on_name(), but in the case of zones we can have 2894 * several loopback addresses on lo0. So all the interfaces with 2895 * loopback addresses need to be marked IRE_LOOPBACK. 2896 */ 2897 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, &ipv6_loopback)) 2898 ipif->ipif_ire_type = IRE_LOOPBACK; 2899 else 2900 ipif->ipif_ire_type = IRE_LOCAL; 2901 } 2902 2903 if (ipif->ipif_flags & (IPIF_NOLOCAL|IPIF_ANYCAST|IPIF_DEPRECATED)) { 2904 /* 2905 * Can't use our source address. Select a different 2906 * source address for the IRE_INTERFACE and IRE_LOCAL 2907 */ 2908 if (ip6_asp_can_lookup(ipst)) { 2909 ip6_asp_table_held = B_TRUE; 2910 src_ipif = ipif_select_source_v6(ipif->ipif_ill, 2911 &ipif->ipif_v6subnet, RESTRICT_TO_NONE, 2912 IPV6_PREFER_SRC_DEFAULT, ipif->ipif_zoneid); 2913 } 2914 if (src_ipif == NULL) 2915 src_ipif = ipif; /* Last resort */ 2916 else 2917 src_ipif_held = B_TRUE; 2918 } else { 2919 src_ipif = ipif; 2920 } 2921 2922 if (!IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr) && 2923 !(ipif->ipif_flags & IPIF_NOLOCAL)) { 2924 2925 /* 2926 * If we're on a labeled system then make sure that zone- 2927 * private addresses have proper remote host database entries. 2928 */ 2929 if (is_system_labeled() && 2930 ipif->ipif_ire_type != IRE_LOOPBACK) { 2931 if (ip6opt_ls == 0) { 2932 cmn_err(CE_WARN, "IPv6 not enabled " 2933 "via /etc/system"); 2934 return (EINVAL); 2935 } 2936 if (!tsol_check_interface_address(ipif)) 2937 return (EINVAL); 2938 } 2939 2940 /* Register the source address for __sin6_src_id */ 2941 err = ip_srcid_insert(&ipif->ipif_v6lcl_addr, 2942 ipif->ipif_zoneid, ipst); 2943 if (err != 0) { 2944 ip0dbg(("ipif_up_done_v6: srcid_insert %d\n", err)); 2945 if (src_ipif_held) 2946 ipif_refrele(src_ipif); 2947 if (ip6_asp_table_held) 2948 ip6_asp_table_refrele(ipst); 2949 return (err); 2950 } 2951 /* 2952 * If the interface address is set, create the LOCAL 2953 * or LOOPBACK IRE. 2954 */ 2955 ip1dbg(("ipif_up_done_v6: creating IRE %d for %s\n", 2956 ipif->ipif_ire_type, 2957 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 2958 buf, sizeof (buf)))); 2959 2960 *irep++ = ire_create_v6( 2961 &ipif->ipif_v6lcl_addr, /* dest address */ 2962 &ipv6_all_ones, /* mask */ 2963 &src_ipif->ipif_v6src_addr, /* source address */ 2964 NULL, /* no gateway */ 2965 &ip_loopback_mtu_v6plus, /* max frag size */ 2966 NULL, 2967 ipif->ipif_rq, /* recv-from queue */ 2968 NULL, /* no send-to queue */ 2969 ipif->ipif_ire_type, /* LOCAL or LOOPBACK */ 2970 ipif, /* interface */ 2971 NULL, 2972 0, 2973 0, 2974 (ipif->ipif_flags & IPIF_PRIVATE) ? RTF_PRIVATE : 0, 2975 &ire_uinfo_null, 2976 NULL, 2977 NULL, 2978 ipst); 2979 } 2980 2981 /* 2982 * Set up the IRE_IF_RESOLVER or IRE_IF_NORESOLVER, as appropriate. 2983 * Note that atun interfaces have an all-zero ipif_v6subnet. 2984 * Thus we allow a zero subnet as long as the mask is non-zero. 2985 */ 2986 if (stq != NULL && !(ipif->ipif_flags & IPIF_NOXMIT) && 2987 !(IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6subnet) && 2988 IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6net_mask))) { 2989 /* ipif_v6subnet is ipif_v6pp_dst_addr for pt-pt */ 2990 v6addr = ipif->ipif_v6subnet; 2991 2992 if (ipif->ipif_flags & IPIF_POINTOPOINT) { 2993 route_mask = ipv6_all_ones; 2994 } else { 2995 route_mask = ipif->ipif_v6net_mask; 2996 } 2997 2998 ip1dbg(("ipif_up_done_v6: creating if IRE %d for %s\n", 2999 ill->ill_net_type, 3000 inet_ntop(AF_INET6, &v6addr, buf, sizeof (buf)))); 3001 3002 *irep++ = ire_create_v6( 3003 &v6addr, /* dest pref */ 3004 &route_mask, /* mask */ 3005 &src_ipif->ipif_v6src_addr, /* src addr */ 3006 NULL, /* no gateway */ 3007 &ipif->ipif_mtu, /* max frag */ 3008 NULL, /* no src nce */ 3009 NULL, /* no recv from queue */ 3010 stq, /* send-to queue */ 3011 ill->ill_net_type, /* IF_[NO]RESOLVER */ 3012 ipif, 3013 NULL, 3014 0, 3015 0, 3016 (ipif->ipif_flags & IPIF_PRIVATE) ? RTF_PRIVATE : 0, 3017 &ire_uinfo_null, 3018 NULL, 3019 NULL, 3020 ipst); 3021 } 3022 3023 /* 3024 * Setup 2002::/16 route, if this interface is a 6to4 tunnel 3025 */ 3026 if (IN6_IS_ADDR_6TO4(&ipif->ipif_v6lcl_addr) && 3027 (ill->ill_is_6to4tun)) { 3028 /* 3029 * Destination address is 2002::/16 3030 */ 3031 #ifdef _BIG_ENDIAN 3032 const in6_addr_t prefix_addr = { 0x20020000U, 0, 0, 0 }; 3033 const in6_addr_t prefix_mask = { 0xffff0000U, 0, 0, 0 }; 3034 #else 3035 const in6_addr_t prefix_addr = { 0x00000220U, 0, 0, 0 }; 3036 const in6_addr_t prefix_mask = { 0x0000ffffU, 0, 0, 0 }; 3037 #endif /* _BIG_ENDIAN */ 3038 char buf2[INET6_ADDRSTRLEN]; 3039 ire_t *isdup; 3040 in6_addr_t *first_addr = &ill->ill_ipif->ipif_v6lcl_addr; 3041 3042 /* 3043 * check to see if this route has already been added for 3044 * this tunnel interface. 3045 */ 3046 isdup = ire_ftable_lookup_v6(first_addr, &prefix_mask, 0, 3047 IRE_IF_NORESOLVER, ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 3048 (MATCH_IRE_SRC | MATCH_IRE_MASK), ipst); 3049 3050 if (isdup == NULL) { 3051 ip1dbg(("ipif_up_done_v6: creating if IRE %d for %s", 3052 IRE_IF_NORESOLVER, inet_ntop(AF_INET6, &v6addr, 3053 buf2, sizeof (buf2)))); 3054 3055 *irep++ = ire_create_v6( 3056 &prefix_addr, /* 2002:: */ 3057 &prefix_mask, /* ffff:: */ 3058 &ipif->ipif_v6lcl_addr, /* src addr */ 3059 NULL, /* gateway */ 3060 &ipif->ipif_mtu, /* max_frag */ 3061 NULL, /* no src nce */ 3062 NULL, /* no rfq */ 3063 ill->ill_wq, /* stq */ 3064 IRE_IF_NORESOLVER, /* type */ 3065 ipif, /* interface */ 3066 NULL, /* v6cmask */ 3067 0, 3068 0, 3069 RTF_UP, 3070 &ire_uinfo_null, 3071 NULL, 3072 NULL, 3073 ipst); 3074 } else { 3075 ire_refrele(isdup); 3076 } 3077 } 3078 3079 /* If an earlier ire_create failed, get out now */ 3080 for (irep1 = irep; irep1 > ire_array; ) { 3081 irep1--; 3082 if (*irep1 == NULL) { 3083 ip1dbg(("ipif_up_done_v6: NULL ire found in" 3084 " ire_array\n")); 3085 err = ENOMEM; 3086 goto bad; 3087 } 3088 } 3089 3090 ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock)); 3091 3092 /* 3093 * Need to atomically check for ip_addr_availablity_check 3094 * now under ill_g_lock, and if it fails got bad, and remove 3095 * from group also 3096 */ 3097 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 3098 mutex_enter(&ipst->ips_ip_addr_avail_lock); 3099 ill->ill_ipif_up_count++; 3100 ipif->ipif_flags |= IPIF_UP; 3101 err = ip_addr_availability_check(ipif); 3102 mutex_exit(&ipst->ips_ip_addr_avail_lock); 3103 rw_exit(&ipst->ips_ill_g_lock); 3104 3105 if (err != 0) { 3106 /* 3107 * Our address may already be up on the same ill. In this case, 3108 * the external resolver entry for our ipif replaced the one for 3109 * the other ipif. So we don't want to delete it (otherwise the 3110 * other ipif would be unable to send packets). 3111 * ip_addr_availability_check() identifies this case for us and 3112 * returns EADDRINUSE; we need to turn it into EADDRNOTAVAIL 3113 * which is the expected error code. 3114 */ 3115 if (err == EADDRINUSE) { 3116 if (ipif->ipif_ill->ill_flags & ILLF_XRESOLV) { 3117 freemsg(ipif->ipif_arp_del_mp); 3118 ipif->ipif_arp_del_mp = NULL; 3119 } 3120 err = EADDRNOTAVAIL; 3121 } 3122 ill->ill_ipif_up_count--; 3123 ipif->ipif_flags &= ~IPIF_UP; 3124 goto bad; 3125 } 3126 3127 /* 3128 * Add in all newly created IREs. We want to add before 3129 * we call ifgrp_insert which wants to know whether 3130 * IRE_IF_RESOLVER exists or not. 3131 * 3132 * NOTE : We refrele the ire though we may branch to "bad" 3133 * later on where we do ire_delete. This is okay 3134 * because nobody can delete it as we are running 3135 * exclusively. 3136 */ 3137 for (irep1 = irep; irep1 > ire_array; ) { 3138 irep1--; 3139 /* Shouldn't be adding any bcast ire's */ 3140 ASSERT((*irep1)->ire_type != IRE_BROADCAST); 3141 ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock)); 3142 /* 3143 * refheld by ire_add. refele towards the end of the func 3144 */ 3145 (void) ire_add(irep1, NULL, NULL, NULL, B_FALSE); 3146 } 3147 if (ip6_asp_table_held) { 3148 ip6_asp_table_refrele(ipst); 3149 ip6_asp_table_held = B_FALSE; 3150 } 3151 ire_added = B_TRUE; 3152 3153 /* 3154 * Form groups if possible. 3155 * 3156 * If we are supposed to be in a ill_group with a name, insert it 3157 * now as we know that at least one ipif is UP. Otherwise form 3158 * nameless groups. 3159 * 3160 * If ip_enable_group_ifs is set and ipif address is not ::0, insert 3161 * this ipif into the appropriate interface group, or create a 3162 * new one. If this is already in a nameless group, we try to form 3163 * a bigger group looking at other ills potentially sharing this 3164 * ipif's prefix. 3165 */ 3166 phyi = ill->ill_phyint; 3167 if (phyi->phyint_groupname_len != 0) { 3168 ASSERT(phyi->phyint_groupname != NULL); 3169 if (ill->ill_ipif_up_count == 1) { 3170 ASSERT(ill->ill_group == NULL); 3171 err = illgrp_insert(&ipst->ips_illgrp_head_v6, ill, 3172 phyi->phyint_groupname, NULL, B_TRUE); 3173 if (err != 0) { 3174 ip1dbg(("ipif_up_done_v6: illgrp allocation " 3175 "failed, error %d\n", err)); 3176 goto bad; 3177 } 3178 } 3179 ASSERT(ill->ill_group != NULL); 3180 } 3181 3182 /* Recover any additional IRE_IF_[NO]RESOLVER entries for this ipif */ 3183 ipif_saved_ire_cnt = ipif->ipif_saved_ire_cnt; 3184 ipif_saved_irep = ipif_recover_ire_v6(ipif); 3185 3186 if (ill->ill_need_recover_multicast) { 3187 /* 3188 * Need to recover all multicast memberships in the driver. 3189 * This had to be deferred until we had attached. 3190 */ 3191 ill_recover_multicast(ill); 3192 } 3193 /* Join the allhosts multicast address and the solicited node MC */ 3194 ipif_multicast_up(ipif); 3195 3196 if (!loopback) { 3197 /* 3198 * See whether anybody else would benefit from the 3199 * new ipif that we added. We call this always rather 3200 * than while adding a non-IPIF_NOLOCAL/DEPRECATED/ANYCAST 3201 * ipif for the benefit of illgrp_insert (done above) 3202 * which does not do source address selection as it does 3203 * not want to re-create interface routes that we are 3204 * having reference to it here. 3205 */ 3206 ill_update_source_selection(ill); 3207 } 3208 3209 for (irep1 = irep; irep1 > ire_array; ) { 3210 irep1--; 3211 if (*irep1 != NULL) { 3212 /* was held in ire_add */ 3213 ire_refrele(*irep1); 3214 } 3215 } 3216 3217 cnt = ipif_saved_ire_cnt; 3218 for (irep1 = ipif_saved_irep; cnt > 0; irep1++, cnt--) { 3219 if (*irep1 != NULL) { 3220 /* was held in ire_add */ 3221 ire_refrele(*irep1); 3222 } 3223 } 3224 3225 if (ipif->ipif_addr_ready) 3226 ipif_up_notify(ipif); 3227 3228 if (ipif_saved_irep != NULL) { 3229 kmem_free(ipif_saved_irep, 3230 ipif_saved_ire_cnt * sizeof (ire_t *)); 3231 } 3232 3233 if (src_ipif_held) 3234 ipif_refrele(src_ipif); 3235 3236 return (0); 3237 3238 bad: 3239 if (ip6_asp_table_held) 3240 ip6_asp_table_refrele(ipst); 3241 /* 3242 * We don't have to bother removing from ill groups because 3243 * 3244 * 1) For groups with names, we insert only when the first ipif 3245 * comes up. In that case if it fails, it will not be in any 3246 * group. So, we need not try to remove for that case. 3247 * 3248 * 2) For groups without names, either we tried to insert ipif_ill 3249 * in a group as singleton or found some other group to become 3250 * a bigger group. For the former, if it fails we don't have 3251 * anything to do as ipif_ill is not in the group and for the 3252 * latter, there are no failures in illgrp_insert/illgrp_delete 3253 * (ENOMEM can't occur for this. Check ifgrp_insert). 3254 */ 3255 3256 while (irep > ire_array) { 3257 irep--; 3258 if (*irep != NULL) { 3259 ire_delete(*irep); 3260 if (ire_added) 3261 ire_refrele(*irep); 3262 } 3263 3264 } 3265 (void) ip_srcid_remove(&ipif->ipif_v6lcl_addr, ipif->ipif_zoneid, ipst); 3266 3267 if (ipif_saved_irep != NULL) { 3268 kmem_free(ipif_saved_irep, 3269 ipif_saved_ire_cnt * sizeof (ire_t *)); 3270 } 3271 if (src_ipif_held) 3272 ipif_refrele(src_ipif); 3273 3274 ipif_ndp_down(ipif); 3275 if (ipif->ipif_ill->ill_flags & ILLF_XRESOLV) 3276 ipif_arp_down(ipif); 3277 3278 return (err); 3279 } 3280 3281 /* 3282 * Delete an ND entry and the corresponding IRE_CACHE entry if it exists. 3283 */ 3284 /* ARGSUSED */ 3285 int 3286 ip_siocdelndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 3287 ip_ioctl_cmd_t *ipip, void *dummy_ifreq) 3288 { 3289 in6_addr_t addr; 3290 sin6_t *sin6; 3291 nce_t *nce; 3292 struct lifreq *lifr; 3293 lif_nd_req_t *lnr; 3294 mblk_t *mp1; 3295 3296 mp1 = mp->b_cont->b_cont; 3297 lifr = (struct lifreq *)mp1->b_rptr; 3298 lnr = &lifr->lifr_nd; 3299 /* Only allow for logical unit zero i.e. not on "le0:17" */ 3300 if (ipif->ipif_id != 0) 3301 return (EINVAL); 3302 3303 if (!ipif->ipif_isv6) 3304 return (EINVAL); 3305 3306 if (lnr->lnr_addr.ss_family != AF_INET6) 3307 return (EAFNOSUPPORT); 3308 3309 sin6 = (sin6_t *)&lnr->lnr_addr; 3310 addr = sin6->sin6_addr; 3311 nce = ndp_lookup_v6(ipif->ipif_ill, &addr, B_FALSE); 3312 if (nce == NULL) 3313 return (ESRCH); 3314 ndp_delete(nce); 3315 NCE_REFRELE(nce); 3316 return (0); 3317 } 3318 3319 /* 3320 * Return nbr cache info. 3321 */ 3322 /* ARGSUSED */ 3323 int 3324 ip_siocqueryndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 3325 ip_ioctl_cmd_t *ipip, void *dummy_ifreq) 3326 { 3327 ill_t *ill = ipif->ipif_ill; 3328 struct lifreq *lifr; 3329 lif_nd_req_t *lnr; 3330 3331 lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr; 3332 lnr = &lifr->lifr_nd; 3333 /* Only allow for logical unit zero i.e. not on "le0:17" */ 3334 if (ipif->ipif_id != 0) 3335 return (EINVAL); 3336 3337 if (!ipif->ipif_isv6) 3338 return (EINVAL); 3339 3340 if (lnr->lnr_addr.ss_family != AF_INET6) 3341 return (EAFNOSUPPORT); 3342 3343 if (ill->ill_phys_addr_length > sizeof (lnr->lnr_hdw_addr)) 3344 return (EINVAL); 3345 3346 return (ndp_query(ill, lnr)); 3347 } 3348 3349 /* 3350 * Perform an update of the nd entry for the specified address. 3351 */ 3352 /* ARGSUSED */ 3353 int 3354 ip_siocsetndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 3355 ip_ioctl_cmd_t *ipip, void *dummy_ifreq) 3356 { 3357 ill_t *ill = ipif->ipif_ill; 3358 struct lifreq *lifr; 3359 lif_nd_req_t *lnr; 3360 3361 ASSERT(!(q->q_flag & QREADR) && q->q_next == NULL); 3362 3363 lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr; 3364 lnr = &lifr->lifr_nd; 3365 /* Only allow for logical unit zero i.e. not on "le0:17" */ 3366 if (ipif->ipif_id != 0) 3367 return (EINVAL); 3368 3369 if (!ipif->ipif_isv6) 3370 return (EINVAL); 3371 3372 if (lnr->lnr_addr.ss_family != AF_INET6) 3373 return (EAFNOSUPPORT); 3374 3375 return (ndp_sioc_update(ill, lnr)); 3376 } 3377