1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 /* 30 * This file contains the interface control functions for IPv6. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/sysmacros.h> 35 #include <sys/stream.h> 36 #include <sys/dlpi.h> 37 #include <sys/stropts.h> 38 #include <sys/ddi.h> 39 #include <sys/cmn_err.h> 40 #include <sys/kstat.h> 41 #include <sys/debug.h> 42 #include <sys/zone.h> 43 #include <sys/policy.h> 44 45 #include <sys/systm.h> 46 #include <sys/param.h> 47 #include <sys/socket.h> 48 #include <sys/isa_defs.h> 49 #include <net/if.h> 50 #include <net/if_dl.h> 51 #include <net/route.h> 52 #include <netinet/in.h> 53 #include <netinet/igmp_var.h> 54 #include <netinet/ip6.h> 55 #include <netinet/icmp6.h> 56 57 #include <inet/common.h> 58 #include <inet/nd.h> 59 #include <inet/mib2.h> 60 #include <inet/ip.h> 61 #include <inet/ip6.h> 62 #include <inet/ip_multi.h> 63 #include <inet/ip_ire.h> 64 #include <inet/ip_rts.h> 65 #include <inet/ip_ndp.h> 66 #include <inet/ip_if.h> 67 #include <inet/ip6_asp.h> 68 #include <inet/ipclassifier.h> 69 #include <inet/sctp_ip.h> 70 71 #include <sys/tsol/tndb.h> 72 #include <sys/tsol/tnet.h> 73 74 static in6_addr_t ipv6_ll_template = 75 {(uint32_t)V6_LINKLOCAL, 0x0, 0x0, 0x0}; 76 77 static ipif_t * 78 ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst, 79 queue_t *q, mblk_t *mp, ipsq_func_t func, int *error, ip_stack_t *ipst); 80 81 /* 82 * These two functions, ipif_lookup_group_v6() and ill_lookup_group_v6(), 83 * are called when an application does not specify an interface to be 84 * used for multicast traffic. It calls ire_lookup_multi_v6() to look 85 * for an interface route for the specified multicast group. Doing 86 * this allows the administrator to add prefix routes for multicast to 87 * indicate which interface to be used for multicast traffic in the above 88 * scenario. The route could be for all multicast (ff00::/8), for a single 89 * multicast group (a /128 route) or anything in between. If there is no 90 * such multicast route, we just find any multicast capable interface and 91 * return it. 92 */ 93 ipif_t * 94 ipif_lookup_group_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst) 95 { 96 ire_t *ire; 97 ipif_t *ipif; 98 99 ire = ire_lookup_multi_v6(group, zoneid, ipst); 100 if (ire != NULL) { 101 ipif = ire->ire_ipif; 102 ipif_refhold(ipif); 103 ire_refrele(ire); 104 return (ipif); 105 } 106 107 return (ipif_lookup_multicast(ipst, zoneid, B_TRUE)); 108 } 109 110 ill_t * 111 ill_lookup_group_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst) 112 { 113 ire_t *ire; 114 ill_t *ill; 115 ipif_t *ipif; 116 117 ire = ire_lookup_multi_v6(group, zoneid, ipst); 118 if (ire != NULL) { 119 ill = ire->ire_ipif->ipif_ill; 120 ill_refhold(ill); 121 ire_refrele(ire); 122 return (ill); 123 } 124 125 ipif = ipif_lookup_multicast(ipst, zoneid, B_TRUE); 126 if (ipif == NULL) 127 return (NULL); 128 129 ill = ipif->ipif_ill; 130 ill_refhold(ill); 131 ipif_refrele(ipif); 132 return (ill); 133 } 134 135 /* 136 * Look for an ipif with the specified interface address and destination. 137 * The destination address is used only for matching point-to-point interfaces. 138 */ 139 static ipif_t * 140 ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst, 141 queue_t *q, mblk_t *mp, ipsq_func_t func, int *error, ip_stack_t *ipst) 142 { 143 ipif_t *ipif; 144 ill_t *ill; 145 ipsq_t *ipsq; 146 ill_walk_context_t ctx; 147 148 if (error != NULL) 149 *error = 0; 150 151 /* 152 * First match all the point-to-point interfaces 153 * before looking at non-point-to-point interfaces. 154 * This is done to avoid returning non-point-to-point 155 * ipif instead of unnumbered point-to-point ipif. 156 */ 157 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 158 ill = ILL_START_WALK_V6(&ctx, ipst); 159 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 160 GRAB_CONN_LOCK(q); 161 mutex_enter(&ill->ill_lock); 162 for (ipif = ill->ill_ipif; ipif != NULL; 163 ipif = ipif->ipif_next) { 164 /* Allow the ipif to be down */ 165 if ((ipif->ipif_flags & IPIF_POINTOPOINT) && 166 (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 167 if_addr)) && 168 (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr, 169 dst))) { 170 if (IPIF_CAN_LOOKUP(ipif)) { 171 ipif_refhold_locked(ipif); 172 mutex_exit(&ill->ill_lock); 173 RELEASE_CONN_LOCK(q); 174 rw_exit(&ipst->ips_ill_g_lock); 175 return (ipif); 176 } else if (IPIF_CAN_WAIT(ipif, q)) { 177 ipsq = ill->ill_phyint->phyint_ipsq; 178 mutex_enter(&ipsq->ipsq_lock); 179 mutex_enter(&ipsq->ipsq_xop->ipx_lock); 180 mutex_exit(&ill->ill_lock); 181 rw_exit(&ipst->ips_ill_g_lock); 182 ipsq_enq(ipsq, q, mp, func, NEW_OP, 183 ill); 184 mutex_exit(&ipsq->ipsq_xop->ipx_lock); 185 mutex_exit(&ipsq->ipsq_lock); 186 RELEASE_CONN_LOCK(q); 187 if (error != NULL) 188 *error = EINPROGRESS; 189 return (NULL); 190 } 191 } 192 } 193 mutex_exit(&ill->ill_lock); 194 RELEASE_CONN_LOCK(q); 195 } 196 rw_exit(&ipst->ips_ill_g_lock); 197 /* lookup the ipif based on interface address */ 198 ipif = ipif_lookup_addr_v6(if_addr, NULL, ALL_ZONES, q, mp, func, 199 error, ipst); 200 ASSERT(ipif == NULL || ipif->ipif_isv6); 201 return (ipif); 202 } 203 204 /* 205 * Common function for ipif_lookup_addr_v6() and ipif_lookup_addr_exact_v6(). 206 */ 207 static ipif_t * 208 ipif_lookup_addr_common_v6(const in6_addr_t *addr, ill_t *match_ill, 209 boolean_t match_illgrp, zoneid_t zoneid, queue_t *q, mblk_t *mp, 210 ipsq_func_t func, int *error, ip_stack_t *ipst) 211 { 212 ipif_t *ipif; 213 ill_t *ill; 214 boolean_t ptp = B_FALSE; 215 ipsq_t *ipsq; 216 ill_walk_context_t ctx; 217 218 if (error != NULL) 219 *error = 0; 220 221 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 222 /* 223 * Repeat twice, first based on local addresses and 224 * next time for pointopoint. 225 */ 226 repeat: 227 ill = ILL_START_WALK_V6(&ctx, ipst); 228 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 229 if (match_ill != NULL && ill != match_ill && 230 (!match_illgrp || !IS_IN_SAME_ILLGRP(ill, match_ill))) { 231 continue; 232 } 233 GRAB_CONN_LOCK(q); 234 mutex_enter(&ill->ill_lock); 235 for (ipif = ill->ill_ipif; ipif != NULL; 236 ipif = ipif->ipif_next) { 237 if (zoneid != ALL_ZONES && 238 ipif->ipif_zoneid != zoneid && 239 ipif->ipif_zoneid != ALL_ZONES) 240 continue; 241 /* Allow the ipif to be down */ 242 if ((!ptp && (IN6_ARE_ADDR_EQUAL( 243 &ipif->ipif_v6lcl_addr, addr) && 244 (ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) || 245 (ptp && (ipif->ipif_flags & IPIF_POINTOPOINT) && 246 IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr, 247 addr))) { 248 if (IPIF_CAN_LOOKUP(ipif)) { 249 ipif_refhold_locked(ipif); 250 mutex_exit(&ill->ill_lock); 251 RELEASE_CONN_LOCK(q); 252 rw_exit(&ipst->ips_ill_g_lock); 253 return (ipif); 254 } else if (IPIF_CAN_WAIT(ipif, q)) { 255 ipsq = ill->ill_phyint->phyint_ipsq; 256 mutex_enter(&ipsq->ipsq_lock); 257 mutex_enter(&ipsq->ipsq_xop->ipx_lock); 258 mutex_exit(&ill->ill_lock); 259 rw_exit(&ipst->ips_ill_g_lock); 260 ipsq_enq(ipsq, q, mp, func, NEW_OP, 261 ill); 262 mutex_exit(&ipsq->ipsq_xop->ipx_lock); 263 mutex_exit(&ipsq->ipsq_lock); 264 RELEASE_CONN_LOCK(q); 265 if (error != NULL) 266 *error = EINPROGRESS; 267 return (NULL); 268 } 269 } 270 } 271 mutex_exit(&ill->ill_lock); 272 RELEASE_CONN_LOCK(q); 273 } 274 275 /* If we already did the ptp case, then we are done */ 276 if (ptp) { 277 rw_exit(&ipst->ips_ill_g_lock); 278 if (error != NULL) 279 *error = ENXIO; 280 return (NULL); 281 } 282 ptp = B_TRUE; 283 goto repeat; 284 } 285 286 boolean_t 287 ip_addr_exists_v6(const in6_addr_t *addr, zoneid_t zoneid, 288 ip_stack_t *ipst) 289 { 290 ipif_t *ipif; 291 ill_t *ill; 292 ill_walk_context_t ctx; 293 294 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 295 296 ill = ILL_START_WALK_V6(&ctx, ipst); 297 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 298 mutex_enter(&ill->ill_lock); 299 for (ipif = ill->ill_ipif; ipif != NULL; 300 ipif = ipif->ipif_next) { 301 if (zoneid != ALL_ZONES && 302 ipif->ipif_zoneid != zoneid && 303 ipif->ipif_zoneid != ALL_ZONES) 304 continue; 305 /* Allow the ipif to be down */ 306 if (((IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 307 addr) && 308 (ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) || 309 ((ipif->ipif_flags & IPIF_POINTOPOINT) && 310 IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr, 311 addr))) { 312 mutex_exit(&ill->ill_lock); 313 rw_exit(&ipst->ips_ill_g_lock); 314 return (B_TRUE); 315 } 316 } 317 mutex_exit(&ill->ill_lock); 318 } 319 320 rw_exit(&ipst->ips_ill_g_lock); 321 return (B_FALSE); 322 } 323 324 /* 325 * Lookup an ipif with the specified address. For point-to-point links we 326 * look for matches on either the destination address or the local address, 327 * but we skip the local address check if IPIF_UNNUMBERED is set. If the 328 * `match_ill' argument is non-NULL, the lookup is restricted to that ill 329 * (or illgrp if `match_ill' is in an IPMP group). 330 */ 331 ipif_t * 332 ipif_lookup_addr_v6(const in6_addr_t *addr, ill_t *match_ill, zoneid_t zoneid, 333 queue_t *q, mblk_t *mp, ipsq_func_t func, int *error, ip_stack_t *ipst) 334 { 335 return (ipif_lookup_addr_common_v6(addr, match_ill, B_TRUE, zoneid, q, 336 mp, func, error, ipst)); 337 } 338 339 /* 340 * Special abbreviated version of ipif_lookup_addr_v6() that doesn't match 341 * `match_ill' across the IPMP group. This function is only needed in some 342 * corner-cases; almost everything should use ipif_lookup_addr_v6(). 343 */ 344 ipif_t * 345 ipif_lookup_addr_exact_v6(const in6_addr_t *addr, ill_t *match_ill, 346 ip_stack_t *ipst) 347 { 348 ASSERT(match_ill != NULL); 349 return (ipif_lookup_addr_common_v6(addr, match_ill, B_FALSE, ALL_ZONES, 350 NULL, NULL, NULL, NULL, ipst)); 351 } 352 353 /* 354 * Look for an ipif with the specified address. For point-point links 355 * we look for matches on either the destination address and the local 356 * address, but we ignore the check on the local address if IPIF_UNNUMBERED 357 * is set. 358 * If the `match_ill' argument is non-NULL, the lookup is restricted to that 359 * ill (or illgrp if `match_ill' is in an IPMP group). 360 * Return the zoneid for the ipif. ALL_ZONES if none found. 361 */ 362 zoneid_t 363 ipif_lookup_addr_zoneid_v6(const in6_addr_t *addr, ill_t *match_ill, 364 ip_stack_t *ipst) 365 { 366 ipif_t *ipif; 367 ill_t *ill; 368 boolean_t ptp = B_FALSE; 369 ill_walk_context_t ctx; 370 zoneid_t zoneid; 371 372 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 373 /* 374 * Repeat twice, first based on local addresses and 375 * next time for pointopoint. 376 */ 377 repeat: 378 ill = ILL_START_WALK_V6(&ctx, ipst); 379 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 380 if (match_ill != NULL && ill != match_ill && 381 !IS_IN_SAME_ILLGRP(ill, match_ill)) { 382 continue; 383 } 384 mutex_enter(&ill->ill_lock); 385 for (ipif = ill->ill_ipif; ipif != NULL; 386 ipif = ipif->ipif_next) { 387 /* Allow the ipif to be down */ 388 if ((!ptp && (IN6_ARE_ADDR_EQUAL( 389 &ipif->ipif_v6lcl_addr, addr) && 390 (ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) || 391 (ptp && (ipif->ipif_flags & IPIF_POINTOPOINT) && 392 IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr, 393 addr)) && 394 !(ipif->ipif_state_flags & IPIF_CONDEMNED)) { 395 zoneid = ipif->ipif_zoneid; 396 mutex_exit(&ill->ill_lock); 397 rw_exit(&ipst->ips_ill_g_lock); 398 /* 399 * If ipif_zoneid was ALL_ZONES then we have 400 * a trusted extensions shared IP address. 401 * In that case GLOBAL_ZONEID works to send. 402 */ 403 if (zoneid == ALL_ZONES) 404 zoneid = GLOBAL_ZONEID; 405 return (zoneid); 406 } 407 } 408 mutex_exit(&ill->ill_lock); 409 } 410 411 /* If we already did the ptp case, then we are done */ 412 if (ptp) { 413 rw_exit(&ipst->ips_ill_g_lock); 414 return (ALL_ZONES); 415 } 416 ptp = B_TRUE; 417 goto repeat; 418 } 419 420 /* 421 * Perform various checks to verify that an address would make sense as a local 422 * interface address. This is currently only called when an attempt is made 423 * to set a local address. 424 * 425 * Does not allow a v4-mapped address, an address that equals the subnet 426 * anycast address, ... a multicast address, ... 427 */ 428 boolean_t 429 ip_local_addr_ok_v6(const in6_addr_t *addr, const in6_addr_t *subnet_mask) 430 { 431 in6_addr_t subnet; 432 433 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 434 return (B_TRUE); /* Allow all zeros */ 435 436 /* 437 * Don't allow all zeroes or host part, but allow 438 * all ones netmask. 439 */ 440 V6_MASK_COPY(*addr, *subnet_mask, subnet); 441 if (IN6_IS_ADDR_V4MAPPED(addr) || 442 (IN6_ARE_ADDR_EQUAL(addr, &subnet) && 443 !IN6_ARE_ADDR_EQUAL(subnet_mask, &ipv6_all_ones)) || 444 (IN6_IS_ADDR_V4COMPAT(addr) && CLASSD(V4_PART_OF_V6((*addr)))) || 445 IN6_IS_ADDR_MULTICAST(addr)) 446 return (B_FALSE); 447 448 return (B_TRUE); 449 } 450 451 /* 452 * Perform various checks to verify that an address would make sense as a 453 * remote/subnet interface address. 454 */ 455 boolean_t 456 ip_remote_addr_ok_v6(const in6_addr_t *addr, const in6_addr_t *subnet_mask) 457 { 458 in6_addr_t subnet; 459 460 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 461 return (B_TRUE); /* Allow all zeros */ 462 463 V6_MASK_COPY(*addr, *subnet_mask, subnet); 464 if (IN6_IS_ADDR_V4MAPPED(addr) || 465 (IN6_ARE_ADDR_EQUAL(addr, &subnet) && 466 !IN6_ARE_ADDR_EQUAL(subnet_mask, &ipv6_all_ones)) || 467 IN6_IS_ADDR_MULTICAST(addr) || 468 (IN6_IS_ADDR_V4COMPAT(addr) && CLASSD(V4_PART_OF_V6((*addr))))) 469 return (B_FALSE); 470 471 return (B_TRUE); 472 } 473 474 /* 475 * ip_rt_add_v6 is called to add an IPv6 route to the forwarding table. 476 * ipif_arg is passed in to associate it with the correct interface 477 * (for link-local destinations and gateways). 478 */ 479 /* ARGSUSED1 */ 480 int 481 ip_rt_add_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, 482 const in6_addr_t *gw_addr, const in6_addr_t *src_addr, int flags, 483 ipif_t *ipif_arg, ire_t **ire_arg, queue_t *q, mblk_t *mp, ipsq_func_t func, 484 struct rtsa_s *sp, ip_stack_t *ipst) 485 { 486 ire_t *ire; 487 ire_t *gw_ire = NULL; 488 ipif_t *ipif; 489 boolean_t ipif_refheld = B_FALSE; 490 uint_t type; 491 int match_flags = MATCH_IRE_TYPE; 492 int error; 493 tsol_gc_t *gc = NULL; 494 tsol_gcgrp_t *gcgrp = NULL; 495 boolean_t gcgrp_xtraref = B_FALSE; 496 497 if (ire_arg != NULL) 498 *ire_arg = NULL; 499 500 /* 501 * Prevent routes with a zero gateway from being created (since 502 * interfaces can currently be plumbed and brought up with no assigned 503 * address). 504 */ 505 if (IN6_IS_ADDR_UNSPECIFIED(gw_addr)) 506 return (ENETUNREACH); 507 508 /* 509 * If this is the case of RTF_HOST being set, then we set the netmask 510 * to all ones (regardless if one was supplied). 511 */ 512 if (flags & RTF_HOST) 513 mask = &ipv6_all_ones; 514 515 /* 516 * Get the ipif, if any, corresponding to the gw_addr 517 */ 518 ipif = ipif_lookup_interface_v6(gw_addr, dst_addr, q, mp, func, 519 &error, ipst); 520 if (ipif != NULL) 521 ipif_refheld = B_TRUE; 522 else if (error == EINPROGRESS) { 523 ip1dbg(("ip_rt_add_v6: null and EINPROGRESS")); 524 return (error); 525 } 526 527 /* 528 * GateD will attempt to create routes with a loopback interface 529 * address as the gateway and with RTF_GATEWAY set. We allow 530 * these routes to be added, but create them as interface routes 531 * since the gateway is an interface address. 532 */ 533 if ((ipif != NULL) && (ipif->ipif_ire_type == IRE_LOOPBACK)) { 534 flags &= ~RTF_GATEWAY; 535 if (IN6_ARE_ADDR_EQUAL(gw_addr, &ipv6_loopback) && 536 IN6_ARE_ADDR_EQUAL(dst_addr, &ipv6_loopback) && 537 IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones)) { 538 ire = ire_ctable_lookup_v6(dst_addr, 0, IRE_LOOPBACK, 539 ipif, ALL_ZONES, NULL, match_flags, ipst); 540 if (ire != NULL) { 541 ire_refrele(ire); 542 if (ipif_refheld) 543 ipif_refrele(ipif); 544 return (EEXIST); 545 } 546 ip1dbg(("ipif_up_done: 0x%p creating IRE 0x%x" 547 "for 0x%x\n", (void *)ipif, 548 ipif->ipif_ire_type, 549 ntohl(ipif->ipif_lcl_addr))); 550 ire = ire_create_v6( 551 dst_addr, 552 mask, 553 &ipif->ipif_v6src_addr, 554 NULL, 555 &ipif->ipif_mtu, 556 NULL, 557 NULL, 558 NULL, 559 ipif->ipif_net_type, 560 ipif, 561 NULL, 562 0, 563 0, 564 flags, 565 &ire_uinfo_null, 566 NULL, 567 NULL, 568 ipst); 569 if (ire == NULL) { 570 if (ipif_refheld) 571 ipif_refrele(ipif); 572 return (ENOMEM); 573 } 574 error = ire_add(&ire, q, mp, func, B_FALSE); 575 if (error == 0) 576 goto save_ire; 577 /* 578 * In the result of failure, ire_add() will have already 579 * deleted the ire in question, so there is no need to 580 * do that here. 581 */ 582 if (ipif_refheld) 583 ipif_refrele(ipif); 584 return (error); 585 } 586 } 587 588 /* 589 * Traditionally, interface routes are ones where RTF_GATEWAY isn't set 590 * and the gateway address provided is one of the system's interface 591 * addresses. By using the routing socket interface and supplying an 592 * RTA_IFP sockaddr with an interface index, an alternate method of 593 * specifying an interface route to be created is available which uses 594 * the interface index that specifies the outgoing interface rather than 595 * the address of an outgoing interface (which may not be able to 596 * uniquely identify an interface). When coupled with the RTF_GATEWAY 597 * flag, routes can be specified which not only specify the next-hop to 598 * be used when routing to a certain prefix, but also which outgoing 599 * interface should be used. 600 * 601 * Previously, interfaces would have unique addresses assigned to them 602 * and so the address assigned to a particular interface could be used 603 * to identify a particular interface. One exception to this was the 604 * case of an unnumbered interface (where IPIF_UNNUMBERED was set). 605 * 606 * With the advent of IPv6 and its link-local addresses, this 607 * restriction was relaxed and interfaces could share addresses between 608 * themselves. In fact, typically all of the link-local interfaces on 609 * an IPv6 node or router will have the same link-local address. In 610 * order to differentiate between these interfaces, the use of an 611 * interface index is necessary and this index can be carried inside a 612 * RTA_IFP sockaddr (which is actually a sockaddr_dl). One restriction 613 * of using the interface index, however, is that all of the ipif's that 614 * are part of an ill have the same index and so the RTA_IFP sockaddr 615 * cannot be used to differentiate between ipif's (or logical 616 * interfaces) that belong to the same ill (physical interface). 617 * 618 * For example, in the following case involving IPv4 interfaces and 619 * logical interfaces 620 * 621 * 192.0.2.32 255.255.255.224 192.0.2.33 U if0 622 * 192.0.2.32 255.255.255.224 192.0.2.34 U if0:1 623 * 192.0.2.32 255.255.255.224 192.0.2.35 U if0:2 624 * 625 * the ipif's corresponding to each of these interface routes can be 626 * uniquely identified by the "gateway" (actually interface address). 627 * 628 * In this case involving multiple IPv6 default routes to a particular 629 * link-local gateway, the use of RTA_IFP is necessary to specify which 630 * default route is of interest: 631 * 632 * default fe80::123:4567:89ab:cdef U if0 633 * default fe80::123:4567:89ab:cdef U if1 634 */ 635 636 /* RTF_GATEWAY not set */ 637 if (!(flags & RTF_GATEWAY)) { 638 queue_t *stq; 639 640 if (sp != NULL) { 641 ip2dbg(("ip_rt_add_v6: gateway security attributes " 642 "cannot be set with interface route\n")); 643 if (ipif_refheld) 644 ipif_refrele(ipif); 645 return (EINVAL); 646 } 647 648 /* 649 * As the interface index specified with the RTA_IFP sockaddr is 650 * the same for all ipif's off of an ill, the matching logic 651 * below uses MATCH_IRE_ILL if such an index was specified. 652 * This means that routes sharing the same prefix when added 653 * using a RTA_IFP sockaddr must have distinct interface 654 * indices (namely, they must be on distinct ill's). 655 * 656 * On the other hand, since the gateway address will usually be 657 * different for each ipif on the system, the matching logic 658 * uses MATCH_IRE_IPIF in the case of a traditional interface 659 * route. This means that interface routes for the same prefix 660 * can be created if they belong to distinct ipif's and if a 661 * RTA_IFP sockaddr is not present. 662 */ 663 if (ipif_arg != NULL) { 664 if (ipif_refheld) { 665 ipif_refrele(ipif); 666 ipif_refheld = B_FALSE; 667 } 668 ipif = ipif_arg; 669 match_flags |= MATCH_IRE_ILL; 670 } else { 671 /* 672 * Check the ipif corresponding to the gw_addr 673 */ 674 if (ipif == NULL) 675 return (ENETUNREACH); 676 match_flags |= MATCH_IRE_IPIF; 677 } 678 679 ASSERT(ipif != NULL); 680 /* 681 * We check for an existing entry at this point. 682 */ 683 match_flags |= MATCH_IRE_MASK; 684 ire = ire_ftable_lookup_v6(dst_addr, mask, 0, IRE_INTERFACE, 685 ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst); 686 if (ire != NULL) { 687 ire_refrele(ire); 688 if (ipif_refheld) 689 ipif_refrele(ipif); 690 return (EEXIST); 691 } 692 693 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 694 ? ipif->ipif_rq : ipif->ipif_wq; 695 696 /* 697 * Create a copy of the IRE_LOOPBACK, IRE_IF_NORESOLVER or 698 * IRE_IF_RESOLVER with the modified address and netmask. 699 */ 700 ire = ire_create_v6( 701 dst_addr, 702 mask, 703 &ipif->ipif_v6src_addr, 704 NULL, 705 &ipif->ipif_mtu, 706 NULL, 707 NULL, 708 stq, 709 ipif->ipif_net_type, 710 ipif, 711 NULL, 712 0, 713 0, 714 flags, 715 &ire_uinfo_null, 716 NULL, 717 NULL, 718 ipst); 719 if (ire == NULL) { 720 if (ipif_refheld) 721 ipif_refrele(ipif); 722 return (ENOMEM); 723 } 724 725 /* 726 * Some software (for example, GateD and Sun Cluster) attempts 727 * to create (what amount to) IRE_PREFIX routes with the 728 * loopback address as the gateway. This is primarily done to 729 * set up prefixes with the RTF_REJECT flag set (for example, 730 * when generating aggregate routes). We also OR in the 731 * RTF_BLACKHOLE flag as these interface routes, by 732 * definition, can only be that. 733 * 734 * If the IRE type (as defined by ipif->ipif_net_type) is 735 * IRE_LOOPBACK, then we map the request into a 736 * IRE_IF_NORESOLVER. 737 * 738 * Needless to say, the real IRE_LOOPBACK is NOT created by this 739 * routine, but rather using ire_create_v6() directly. 740 */ 741 if (ipif->ipif_net_type == IRE_LOOPBACK) { 742 ire->ire_type = IRE_IF_NORESOLVER; 743 ire->ire_flags |= RTF_BLACKHOLE; 744 } 745 error = ire_add(&ire, q, mp, func, B_FALSE); 746 if (error == 0) 747 goto save_ire; 748 /* 749 * In the result of failure, ire_add() will have already 750 * deleted the ire in question, so there is no need to 751 * do that here. 752 */ 753 if (ipif_refheld) 754 ipif_refrele(ipif); 755 return (error); 756 } 757 if (ipif_refheld) { 758 ipif_refrele(ipif); 759 ipif_refheld = B_FALSE; 760 } 761 762 /* 763 * Get an interface IRE for the specified gateway. 764 * If we don't have an IRE_IF_NORESOLVER or IRE_IF_RESOLVER for the 765 * gateway, it is currently unreachable and we fail the request 766 * accordingly. 767 */ 768 ipif = ipif_arg; 769 if (ipif_arg != NULL) 770 match_flags |= MATCH_IRE_ILL; 771 gw_ire = ire_ftable_lookup_v6(gw_addr, 0, 0, IRE_INTERFACE, ipif_arg, 772 NULL, ALL_ZONES, 0, NULL, match_flags, ipst); 773 if (gw_ire == NULL) 774 return (ENETUNREACH); 775 776 /* 777 * We create one of three types of IREs as a result of this request 778 * based on the netmask. A netmask of all ones (which is automatically 779 * assumed when RTF_HOST is set) results in an IRE_HOST being created. 780 * An all zeroes netmask implies a default route so an IRE_DEFAULT is 781 * created. Otherwise, an IRE_PREFIX route is created for the 782 * destination prefix. 783 */ 784 if (IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones)) 785 type = IRE_HOST; 786 else if (IN6_IS_ADDR_UNSPECIFIED(mask)) 787 type = IRE_DEFAULT; 788 else 789 type = IRE_PREFIX; 790 791 /* check for a duplicate entry */ 792 ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, type, ipif_arg, 793 NULL, ALL_ZONES, 0, NULL, 794 match_flags | MATCH_IRE_MASK | MATCH_IRE_GW, ipst); 795 if (ire != NULL) { 796 ire_refrele(gw_ire); 797 ire_refrele(ire); 798 return (EEXIST); 799 } 800 801 /* Security attribute exists */ 802 if (sp != NULL) { 803 tsol_gcgrp_addr_t ga; 804 805 /* find or create the gateway credentials group */ 806 ga.ga_af = AF_INET6; 807 ga.ga_addr = *gw_addr; 808 809 /* we hold reference to it upon success */ 810 gcgrp = gcgrp_lookup(&ga, B_TRUE); 811 if (gcgrp == NULL) { 812 ire_refrele(gw_ire); 813 return (ENOMEM); 814 } 815 816 /* 817 * Create and add the security attribute to the group; a 818 * reference to the group is made upon allocating a new 819 * entry successfully. If it finds an already-existing 820 * entry for the security attribute in the group, it simply 821 * returns it and no new reference is made to the group. 822 */ 823 gc = gc_create(sp, gcgrp, &gcgrp_xtraref); 824 if (gc == NULL) { 825 /* release reference held by gcgrp_lookup */ 826 GCGRP_REFRELE(gcgrp); 827 ire_refrele(gw_ire); 828 return (ENOMEM); 829 } 830 } 831 832 /* Create the IRE. */ 833 ire = ire_create_v6( 834 dst_addr, /* dest address */ 835 mask, /* mask */ 836 /* src address assigned by the caller? */ 837 (((flags & RTF_SETSRC) && !IN6_IS_ADDR_UNSPECIFIED(src_addr)) ? 838 src_addr : NULL), 839 gw_addr, /* gateway address */ 840 &gw_ire->ire_max_frag, 841 NULL, /* no src nce */ 842 NULL, /* no recv-from queue */ 843 NULL, /* no send-to queue */ 844 (ushort_t)type, /* IRE type */ 845 ipif_arg, 846 NULL, 847 0, 848 0, 849 flags, 850 &gw_ire->ire_uinfo, /* Inherit ULP info from gw */ 851 gc, /* security attribute */ 852 NULL, 853 ipst); 854 855 /* 856 * The ire holds a reference to the 'gc' and the 'gc' holds a 857 * reference to the 'gcgrp'. We can now release the extra reference 858 * the 'gcgrp' acquired in the gcgrp_lookup, if it was not used. 859 */ 860 if (gcgrp_xtraref) 861 GCGRP_REFRELE(gcgrp); 862 if (ire == NULL) { 863 if (gc != NULL) 864 GC_REFRELE(gc); 865 ire_refrele(gw_ire); 866 return (ENOMEM); 867 } 868 869 /* 870 * POLICY: should we allow an RTF_HOST with address INADDR_ANY? 871 * SUN/OS socket stuff does but do we really want to allow ::0 ? 872 */ 873 874 /* Add the new IRE. */ 875 error = ire_add(&ire, q, mp, func, B_FALSE); 876 /* 877 * In the result of failure, ire_add() will have already 878 * deleted the ire in question, so there is no need to 879 * do that here. 880 */ 881 if (error != 0) { 882 ire_refrele(gw_ire); 883 return (error); 884 } 885 886 if (flags & RTF_MULTIRT) { 887 /* 888 * Invoke the CGTP (multirouting) filtering module 889 * to add the dst address in the filtering database. 890 * Replicated inbound packets coming from that address 891 * will be filtered to discard the duplicates. 892 * It is not necessary to call the CGTP filter hook 893 * when the dst address is a multicast, because an 894 * IP source address cannot be a multicast. 895 */ 896 if (ipst->ips_ip_cgtp_filter_ops != NULL && 897 !IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))) { 898 int res; 899 900 res = ipst->ips_ip_cgtp_filter_ops->cfo_add_dest_v6( 901 ipst->ips_netstack->netstack_stackid, 902 &ire->ire_addr_v6, 903 &ire->ire_gateway_addr_v6, 904 &ire->ire_src_addr_v6, 905 &gw_ire->ire_src_addr_v6); 906 if (res != 0) { 907 ire_refrele(gw_ire); 908 ire_delete(ire); 909 return (res); 910 } 911 } 912 } 913 914 /* 915 * Now that the prefix IRE entry has been created, delete any 916 * existing gateway IRE cache entries as well as any IRE caches 917 * using the gateway, and force them to be created through 918 * ip_newroute_v6. 919 */ 920 if (gc != NULL) { 921 ASSERT(gcgrp != NULL); 922 ire_clookup_delete_cache_gw_v6(gw_addr, ALL_ZONES, ipst); 923 } 924 925 save_ire: 926 if (gw_ire != NULL) { 927 ire_refrele(gw_ire); 928 } 929 if (ipif != NULL) { 930 mblk_t *save_mp; 931 932 /* 933 * Save enough information so that we can recreate the IRE if 934 * the interface goes down and then up. The metrics associated 935 * with the route will be saved as well when rts_setmetrics() is 936 * called after the IRE has been created. In the case where 937 * memory cannot be allocated, none of this information will be 938 * saved. 939 */ 940 save_mp = allocb(sizeof (ifrt_t), BPRI_MED); 941 if (save_mp != NULL) { 942 ifrt_t *ifrt; 943 944 save_mp->b_wptr += sizeof (ifrt_t); 945 ifrt = (ifrt_t *)save_mp->b_rptr; 946 bzero(ifrt, sizeof (ifrt_t)); 947 ifrt->ifrt_type = ire->ire_type; 948 ifrt->ifrt_v6addr = ire->ire_addr_v6; 949 mutex_enter(&ire->ire_lock); 950 ifrt->ifrt_v6gateway_addr = ire->ire_gateway_addr_v6; 951 ifrt->ifrt_v6src_addr = ire->ire_src_addr_v6; 952 mutex_exit(&ire->ire_lock); 953 ifrt->ifrt_v6mask = ire->ire_mask_v6; 954 ifrt->ifrt_flags = ire->ire_flags; 955 ifrt->ifrt_max_frag = ire->ire_max_frag; 956 mutex_enter(&ipif->ipif_saved_ire_lock); 957 save_mp->b_cont = ipif->ipif_saved_ire_mp; 958 ipif->ipif_saved_ire_mp = save_mp; 959 ipif->ipif_saved_ire_cnt++; 960 mutex_exit(&ipif->ipif_saved_ire_lock); 961 } 962 } 963 if (ire_arg != NULL) { 964 /* 965 * Store the ire that was successfully added into where ire_arg 966 * points to so that callers don't have to look it up 967 * themselves (but they are responsible for ire_refrele()ing 968 * the ire when they are finished with it). 969 */ 970 *ire_arg = ire; 971 } else { 972 ire_refrele(ire); /* Held in ire_add */ 973 } 974 if (ipif_refheld) 975 ipif_refrele(ipif); 976 return (0); 977 } 978 979 /* 980 * ip_rt_delete_v6 is called to delete an IPv6 route. 981 * ipif_arg is passed in to associate it with the correct interface 982 * (for link-local destinations and gateways). 983 */ 984 /* ARGSUSED4 */ 985 int 986 ip_rt_delete_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask, 987 const in6_addr_t *gw_addr, uint_t rtm_addrs, int flags, ipif_t *ipif_arg, 988 queue_t *q, mblk_t *mp, ipsq_func_t func, ip_stack_t *ipst) 989 { 990 ire_t *ire = NULL; 991 ipif_t *ipif; 992 uint_t type; 993 uint_t match_flags = MATCH_IRE_TYPE; 994 int err = 0; 995 boolean_t ipif_refheld = B_FALSE; 996 997 /* 998 * If this is the case of RTF_HOST being set, then we set the netmask 999 * to all ones. Otherwise, we use the netmask if one was supplied. 1000 */ 1001 if (flags & RTF_HOST) { 1002 mask = &ipv6_all_ones; 1003 match_flags |= MATCH_IRE_MASK; 1004 } else if (rtm_addrs & RTA_NETMASK) { 1005 match_flags |= MATCH_IRE_MASK; 1006 } 1007 1008 /* 1009 * Note that RTF_GATEWAY is never set on a delete, therefore 1010 * we check if the gateway address is one of our interfaces first, 1011 * and fall back on RTF_GATEWAY routes. 1012 * 1013 * This makes it possible to delete an original 1014 * IRE_IF_NORESOLVER/IRE_IF_RESOLVER - consistent with SunOS 4.1. 1015 * 1016 * As the interface index specified with the RTA_IFP sockaddr is the 1017 * same for all ipif's off of an ill, the matching logic below uses 1018 * MATCH_IRE_ILL if such an index was specified. This means a route 1019 * sharing the same prefix and interface index as the the route 1020 * intended to be deleted might be deleted instead if a RTA_IFP sockaddr 1021 * is specified in the request. 1022 * 1023 * On the other hand, since the gateway address will usually be 1024 * different for each ipif on the system, the matching logic 1025 * uses MATCH_IRE_IPIF in the case of a traditional interface 1026 * route. This means that interface routes for the same prefix can be 1027 * uniquely identified if they belong to distinct ipif's and if a 1028 * RTA_IFP sockaddr is not present. 1029 * 1030 * For more detail on specifying routes by gateway address and by 1031 * interface index, see the comments in ip_rt_add_v6(). 1032 */ 1033 ipif = ipif_lookup_interface_v6(gw_addr, dst_addr, q, mp, func, &err, 1034 ipst); 1035 if (ipif != NULL) { 1036 ipif_refheld = B_TRUE; 1037 if (ipif_arg != NULL) { 1038 ipif_refrele(ipif); 1039 ipif_refheld = B_FALSE; 1040 ipif = ipif_arg; 1041 match_flags |= MATCH_IRE_ILL; 1042 } else { 1043 match_flags |= MATCH_IRE_IPIF; 1044 } 1045 1046 if (ipif->ipif_ire_type == IRE_LOOPBACK) 1047 ire = ire_ctable_lookup_v6(dst_addr, 0, IRE_LOOPBACK, 1048 ipif, ALL_ZONES, NULL, match_flags, ipst); 1049 if (ire == NULL) 1050 ire = ire_ftable_lookup_v6(dst_addr, mask, 0, 1051 IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, NULL, 1052 match_flags, ipst); 1053 } else if (err == EINPROGRESS) { 1054 return (err); 1055 } else { 1056 err = 0; 1057 } 1058 if (ire == NULL) { 1059 /* 1060 * At this point, the gateway address is not one of our own 1061 * addresses or a matching interface route was not found. We 1062 * set the IRE type to lookup based on whether 1063 * this is a host route, a default route or just a prefix. 1064 * 1065 * If an ipif_arg was passed in, then the lookup is based on an 1066 * interface index so MATCH_IRE_ILL is added to match_flags. 1067 * In any case, MATCH_IRE_IPIF is cleared and MATCH_IRE_GW is 1068 * set as the route being looked up is not a traditional 1069 * interface route. 1070 */ 1071 match_flags &= ~MATCH_IRE_IPIF; 1072 match_flags |= MATCH_IRE_GW; 1073 if (ipif_arg != NULL) 1074 match_flags |= MATCH_IRE_ILL; 1075 if (IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones)) 1076 type = IRE_HOST; 1077 else if (IN6_IS_ADDR_UNSPECIFIED(mask)) 1078 type = IRE_DEFAULT; 1079 else 1080 type = IRE_PREFIX; 1081 ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, type, 1082 ipif_arg, NULL, ALL_ZONES, 0, NULL, match_flags, ipst); 1083 } 1084 1085 if (ipif_refheld) { 1086 ipif_refrele(ipif); 1087 ipif_refheld = B_FALSE; 1088 } 1089 if (ire == NULL) 1090 return (ESRCH); 1091 1092 if (ire->ire_flags & RTF_MULTIRT) { 1093 /* 1094 * Invoke the CGTP (multirouting) filtering module 1095 * to remove the dst address from the filtering database. 1096 * Packets coming from that address will no longer be 1097 * filtered to remove duplicates. 1098 */ 1099 if (ipst->ips_ip_cgtp_filter_ops != NULL) { 1100 err = ipst->ips_ip_cgtp_filter_ops->cfo_del_dest_v6( 1101 ipst->ips_netstack->netstack_stackid, 1102 &ire->ire_addr_v6, &ire->ire_gateway_addr_v6); 1103 } 1104 } 1105 1106 ipif = ire->ire_ipif; 1107 if (ipif != NULL) { 1108 mblk_t **mpp; 1109 mblk_t *mp; 1110 ifrt_t *ifrt; 1111 in6_addr_t gw_addr_v6; 1112 1113 /* Remove from ipif_saved_ire_mp list if it is there */ 1114 mutex_enter(&ire->ire_lock); 1115 gw_addr_v6 = ire->ire_gateway_addr_v6; 1116 mutex_exit(&ire->ire_lock); 1117 mutex_enter(&ipif->ipif_saved_ire_lock); 1118 for (mpp = &ipif->ipif_saved_ire_mp; *mpp != NULL; 1119 mpp = &(*mpp)->b_cont) { 1120 /* 1121 * On a given ipif, the triple of address, gateway and 1122 * mask is unique for each saved IRE (in the case of 1123 * ordinary interface routes, the gateway address is 1124 * all-zeroes). 1125 */ 1126 mp = *mpp; 1127 ifrt = (ifrt_t *)mp->b_rptr; 1128 if (IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr, 1129 &ire->ire_addr_v6) && 1130 IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr, 1131 &gw_addr_v6) && 1132 IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask, 1133 &ire->ire_mask_v6)) { 1134 *mpp = mp->b_cont; 1135 ipif->ipif_saved_ire_cnt--; 1136 freeb(mp); 1137 break; 1138 } 1139 } 1140 mutex_exit(&ipif->ipif_saved_ire_lock); 1141 } 1142 ire_delete(ire); 1143 ire_refrele(ire); 1144 return (err); 1145 } 1146 1147 /* 1148 * Derive an interface id from the link layer address. 1149 */ 1150 void 1151 ill_setdefaulttoken(ill_t *ill) 1152 { 1153 if (!ill->ill_manual_token) { 1154 bzero(&ill->ill_token, sizeof (ill->ill_token)); 1155 MEDIA_V6INTFID(ill->ill_media, ill, &ill->ill_token); 1156 ill->ill_token_length = IPV6_TOKEN_LEN; 1157 } 1158 } 1159 1160 void 1161 ill_setdesttoken(ill_t *ill) 1162 { 1163 bzero(&ill->ill_dest_token, sizeof (ill->ill_dest_token)); 1164 MEDIA_V6DESTINTFID(ill->ill_media, ill, &ill->ill_dest_token); 1165 } 1166 1167 /* 1168 * Create a link-local address from a token. 1169 */ 1170 static void 1171 ipif_get_linklocal(in6_addr_t *dest, const in6_addr_t *token) 1172 { 1173 int i; 1174 1175 for (i = 0; i < 4; i++) { 1176 dest->s6_addr32[i] = 1177 token->s6_addr32[i] | ipv6_ll_template.s6_addr32[i]; 1178 } 1179 } 1180 1181 /* 1182 * Set a default IPv6 address for a 6to4 tunnel interface 2002:<tsrc>::1/16 1183 */ 1184 static void 1185 ipif_set6to4addr(ipif_t *ipif) 1186 { 1187 ill_t *ill = ipif->ipif_ill; 1188 struct in_addr v4phys; 1189 1190 ASSERT(ill->ill_mactype == DL_6TO4); 1191 ASSERT(ill->ill_phys_addr_length == sizeof (struct in_addr)); 1192 ASSERT(ipif->ipif_isv6); 1193 1194 if (ipif->ipif_flags & IPIF_UP) 1195 return; 1196 1197 (void) ip_plen_to_mask_v6(16, &ipif->ipif_v6net_mask); 1198 bcopy(ill->ill_phys_addr, &v4phys, sizeof (struct in_addr)); 1199 IN6_V4ADDR_TO_6TO4(&v4phys, &ipif->ipif_v6lcl_addr); 1200 ipif->ipif_v6src_addr = ipif->ipif_v6lcl_addr; 1201 V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask, 1202 ipif->ipif_v6subnet); 1203 } 1204 1205 /* 1206 * Is it not possible to set the link local address? 1207 * The address can be set if the token is set, and the token 1208 * isn't too long. 1209 * Return B_TRUE if the address can't be set, or B_FALSE if it can. 1210 */ 1211 boolean_t 1212 ipif_cant_setlinklocal(ipif_t *ipif) 1213 { 1214 ill_t *ill = ipif->ipif_ill; 1215 1216 if (IN6_IS_ADDR_UNSPECIFIED(&ill->ill_token) || 1217 ill->ill_token_length > IPV6_ABITS - IPV6_LL_PREFIXLEN) 1218 return (B_TRUE); 1219 1220 return (B_FALSE); 1221 } 1222 1223 /* 1224 * Generate a link-local address from the token. 1225 */ 1226 void 1227 ipif_setlinklocal(ipif_t *ipif) 1228 { 1229 ill_t *ill = ipif->ipif_ill; 1230 in6_addr_t ov6addr; 1231 1232 ASSERT(IAM_WRITER_ILL(ill)); 1233 1234 /* 1235 * ill_manual_linklocal is set when the link-local address was 1236 * manually configured. 1237 */ 1238 if (ill->ill_manual_linklocal) 1239 return; 1240 1241 /* 1242 * IPv6 interfaces over 6to4 tunnels are special. They do not have 1243 * link-local addresses, but instead have a single automatically 1244 * generated global address. 1245 */ 1246 if (ill->ill_mactype == DL_6TO4) { 1247 ipif_set6to4addr(ipif); 1248 return; 1249 } 1250 1251 if (ipif_cant_setlinklocal(ipif)) 1252 return; 1253 1254 ov6addr = ipif->ipif_v6lcl_addr; 1255 ipif_get_linklocal(&ipif->ipif_v6lcl_addr, &ill->ill_token); 1256 sctp_update_ipif_addr(ipif, ov6addr); 1257 (void) ip_plen_to_mask_v6(IPV6_LL_PREFIXLEN, &ipif->ipif_v6net_mask); 1258 if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6pp_dst_addr)) { 1259 V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask, 1260 ipif->ipif_v6subnet); 1261 } 1262 1263 if (ipif->ipif_flags & IPIF_NOLOCAL) { 1264 ipif->ipif_v6src_addr = ipv6_all_zeros; 1265 } else { 1266 ipif->ipif_v6src_addr = ipif->ipif_v6lcl_addr; 1267 } 1268 } 1269 1270 /* 1271 * Set the destination link-local address for a point-to-point IPv6 1272 * interface with a destination interface id (IP tunnels are such 1273 * interfaces). 1274 */ 1275 void 1276 ipif_setdestlinklocal(ipif_t *ipif) 1277 { 1278 ill_t *ill = ipif->ipif_ill; 1279 1280 ASSERT(IAM_WRITER_ILL(ill)); 1281 if (IN6_IS_ADDR_UNSPECIFIED(&ill->ill_dest_token)) 1282 return; 1283 ipif_get_linklocal(&ipif->ipif_v6pp_dst_addr, &ill->ill_dest_token); 1284 ipif->ipif_v6subnet = ipif->ipif_v6pp_dst_addr; 1285 } 1286 1287 /* 1288 * This function sets up the multicast mappings in NDP. 1289 * Unlike ARP, there are no mapping_mps here. We delete the 1290 * mapping nces and add a new one. 1291 * 1292 * Returns non-zero on error and 0 on success. 1293 */ 1294 int 1295 ipif_ndp_setup_multicast(ipif_t *ipif, nce_t **ret_nce) 1296 { 1297 ill_t *ill = ipif->ipif_ill; 1298 in6_addr_t v6_mcast_addr = {(uint32_t)V6_MCAST, 0, 0, 0}; 1299 in6_addr_t v6_mcast_mask = {(uint32_t)V6_MCAST, 0, 0, 0}; 1300 in6_addr_t v6_extract_mask; 1301 uchar_t *phys_addr, *bphys_addr, *alloc_phys; 1302 nce_t *mnce = NULL; 1303 int err = 0; 1304 phyint_t *phyi = ill->ill_phyint; 1305 uint32_t hw_extract_start; 1306 dl_unitdata_req_t *dlur; 1307 ip_stack_t *ipst = ill->ill_ipst; 1308 1309 if (ret_nce != NULL) 1310 *ret_nce = NULL; 1311 1312 if (ipif->ipif_flags & IPIF_POINTOPOINT) 1313 return (0); 1314 1315 /* 1316 * IPMP meta-interfaces don't have any inherent multicast mappings, 1317 * and instead use the ones on the underlying interfaces. 1318 */ 1319 if (IS_IPMP(ill)) 1320 return (0); 1321 1322 /* 1323 * Delete the mapping nce. Normally these should not exist 1324 * as a previous ipif_down -> ipif_ndp_down should have deleted 1325 * all the nces. But they can exist if ip_rput_dlpi_writer 1326 * calls this when PHYI_MULTI_BCAST is set. Mappings are always 1327 * tied to the underlying ill, so don't match across the illgrp. 1328 */ 1329 mnce = ndp_lookup_v6(ill, B_FALSE, &v6_mcast_addr, B_FALSE); 1330 if (mnce != NULL) { 1331 ndp_delete(mnce); 1332 NCE_REFRELE(mnce); 1333 mnce = NULL; 1334 } 1335 1336 /* 1337 * Get media specific v6 mapping information. Note that 1338 * nd_lla_len can be 0 for tunnels. 1339 */ 1340 alloc_phys = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1341 if ((alloc_phys == NULL) && (ill->ill_nd_lla_len != 0)) 1342 return (ENOMEM); 1343 /* 1344 * Determine the broadcast address. 1345 */ 1346 dlur = (dl_unitdata_req_t *)ill->ill_bcast_mp->b_rptr; 1347 if (ill->ill_sap_length < 0) 1348 bphys_addr = (uchar_t *)dlur + dlur->dl_dest_addr_offset; 1349 else 1350 bphys_addr = (uchar_t *)dlur + 1351 dlur->dl_dest_addr_offset + ill->ill_sap_length; 1352 1353 /* 1354 * Check PHYI_MULTI_BCAST and possible length of physical 1355 * address to determine if we use the mapping or the 1356 * broadcast address. 1357 */ 1358 if ((phyi->phyint_flags & PHYI_MULTI_BCAST) || 1359 (!MEDIA_V6MINFO(ill->ill_media, ill->ill_nd_lla_len, 1360 bphys_addr, alloc_phys, &hw_extract_start, 1361 &v6_extract_mask))) { 1362 if (ill->ill_phys_addr_length > IP_MAX_HW_LEN) { 1363 kmem_free(alloc_phys, ill->ill_nd_lla_len); 1364 return (E2BIG); 1365 } 1366 /* Use the link-layer broadcast address for MULTI_BCAST */ 1367 phys_addr = bphys_addr; 1368 bzero(&v6_extract_mask, sizeof (v6_extract_mask)); 1369 hw_extract_start = ill->ill_nd_lla_len; 1370 } else { 1371 phys_addr = alloc_phys; 1372 } 1373 if ((ipif->ipif_flags & IPIF_BROADCAST) || 1374 (ill->ill_flags & ILLF_MULTICAST) || 1375 (phyi->phyint_flags & PHYI_MULTI_BCAST)) { 1376 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1377 err = ndp_add_v6(ill, 1378 phys_addr, 1379 &v6_mcast_addr, /* v6 address */ 1380 &v6_mcast_mask, /* v6 mask */ 1381 &v6_extract_mask, 1382 hw_extract_start, 1383 NCE_F_MAPPING | NCE_F_PERMANENT | NCE_F_NONUD, 1384 ND_REACHABLE, 1385 &mnce); 1386 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1387 if (err == 0) { 1388 if (ret_nce != NULL) { 1389 *ret_nce = mnce; 1390 } else { 1391 NCE_REFRELE(mnce); 1392 } 1393 } 1394 } 1395 kmem_free(alloc_phys, ill->ill_nd_lla_len); 1396 return (err); 1397 } 1398 1399 /* 1400 * Get the resolver set up for a new ipif. (Always called as writer.) 1401 */ 1402 int 1403 ipif_ndp_up(ipif_t *ipif, boolean_t initial) 1404 { 1405 ill_t *ill = ipif->ipif_ill; 1406 int err = 0; 1407 nce_t *nce = NULL; 1408 nce_t *mnce = NULL; 1409 boolean_t added_ipif = B_FALSE; 1410 1411 ASSERT(IAM_WRITER_ILL(ill)); 1412 ip1dbg(("ipif_ndp_up(%s:%u)\n", ill->ill_name, ipif->ipif_id)); 1413 1414 /* 1415 * ND not supported on XRESOLV interfaces. If ND support (multicast) 1416 * added later, take out this check. 1417 */ 1418 if ((ill->ill_flags & ILLF_XRESOLV) || 1419 IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr) || 1420 (!(ill->ill_net_type & IRE_INTERFACE))) { 1421 ipif->ipif_addr_ready = 1; 1422 return (0); 1423 } 1424 1425 /* 1426 * Need to setup multicast mapping only when the first 1427 * interface is coming UP. 1428 */ 1429 if (ill->ill_ipif_up_count == 0 && 1430 (ill->ill_flags & ILLF_MULTICAST)) { 1431 /* 1432 * We set the multicast before setting up the mapping for 1433 * local address because ipif_ndp_setup_multicast does 1434 * ndp_walk to delete nces which will delete the mapping 1435 * for local address also if we added the mapping for 1436 * local address first. 1437 */ 1438 err = ipif_ndp_setup_multicast(ipif, &mnce); 1439 if (err != 0) 1440 return (err); 1441 } 1442 1443 if ((ipif->ipif_flags & (IPIF_UNNUMBERED|IPIF_NOLOCAL)) == 0) { 1444 uint16_t flags; 1445 uint16_t state; 1446 uchar_t *hw_addr = NULL; 1447 ill_t *bound_ill; 1448 ipmp_illgrp_t *illg = ill->ill_grp; 1449 1450 /* Permanent entries don't need NUD */ 1451 flags = NCE_F_PERMANENT | NCE_F_NONUD; 1452 if (ill->ill_flags & ILLF_ROUTER) 1453 flags |= NCE_F_ISROUTER; 1454 1455 if (ipif->ipif_flags & IPIF_ANYCAST) 1456 flags |= NCE_F_ANYCAST; 1457 1458 if (IS_IPMP(ill)) { 1459 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1460 /* 1461 * If we're here via ipif_up(), then the ipif won't be 1462 * bound yet -- add it to the group, which will bind 1463 * it if possible. (We would add it in ipif_up(), but 1464 * deleting on failure there is gruesome.) If we're 1465 * here via ipmp_ill_bind_ipif(), then the ipif has 1466 * already been added to the group and we just need to 1467 * use the binding. 1468 */ 1469 if ((bound_ill = ipmp_ipif_bound_ill(ipif)) == NULL) { 1470 bound_ill = ipmp_illgrp_add_ipif(illg, ipif); 1471 if (bound_ill == NULL) { 1472 /* 1473 * We couldn't bind the ipif to an ill 1474 * yet, so we have nothing to publish. 1475 * Set ipif_addr_ready so that this 1476 * address can be used locally for now. 1477 * The routing socket message will be 1478 * sent from ipif_up_done_v6(). 1479 */ 1480 ipif->ipif_addr_ready = 1; 1481 return (0); 1482 } 1483 added_ipif = B_TRUE; 1484 } 1485 hw_addr = bound_ill->ill_nd_lla; 1486 } else { 1487 bound_ill = ill; 1488 if (ill->ill_net_type == IRE_IF_RESOLVER) 1489 hw_addr = ill->ill_nd_lla; 1490 } 1491 1492 /* 1493 * If this is an initial bring-up (or the ipif was never 1494 * completely brought up), do DAD. Otherwise, we're here 1495 * because IPMP has rebound an address to this ill: send 1496 * unsolicited advertisements to inform others. 1497 */ 1498 if (initial || !ipif->ipif_addr_ready) { 1499 state = ND_PROBE; 1500 } else { 1501 state = ND_REACHABLE; 1502 flags |= NCE_F_UNSOL_ADV; 1503 } 1504 retry: 1505 /* 1506 * Create an nce for the local address. We pass a match_illgrp 1507 * of B_TRUE because the local address must be unique across 1508 * the illgrp, and the existence of an nce with nce_ill set 1509 * to any ill in the group is indicative of a duplicate address 1510 */ 1511 err = ndp_lookup_then_add_v6(bound_ill, 1512 B_TRUE, 1513 hw_addr, 1514 &ipif->ipif_v6lcl_addr, 1515 &ipv6_all_ones, 1516 &ipv6_all_zeros, 1517 0, 1518 flags, 1519 state, 1520 &nce); 1521 switch (err) { 1522 case 0: 1523 ip1dbg(("ipif_ndp_up: NCE created for %s\n", 1524 ill->ill_name)); 1525 ipif->ipif_addr_ready = 1; 1526 ipif->ipif_added_nce = 1; 1527 nce->nce_ipif_cnt++; 1528 break; 1529 case EINPROGRESS: 1530 ip1dbg(("ipif_ndp_up: running DAD now for %s\n", 1531 ill->ill_name)); 1532 ipif->ipif_added_nce = 1; 1533 nce->nce_ipif_cnt++; 1534 break; 1535 case EEXIST: 1536 ip1dbg(("ipif_ndp_up: NCE already exists for %s\n", 1537 ill->ill_name)); 1538 if (!(nce->nce_flags & NCE_F_PERMANENT)) { 1539 ndp_delete(nce); 1540 NCE_REFRELE(nce); 1541 nce = NULL; 1542 goto retry; 1543 } 1544 if ((ipif->ipif_flags & IPIF_POINTOPOINT) == 0) { 1545 NCE_REFRELE(nce); 1546 goto fail; 1547 } 1548 /* 1549 * Duplicate local addresses are permissible for 1550 * IPIF_POINTOPOINT interfaces which will get marked 1551 * IPIF_UNNUMBERED later in 1552 * ip_addr_availability_check(). 1553 * 1554 * The nce_ipif_cnt field tracks the number of 1555 * ipifs that have nce_addr as their local address. 1556 */ 1557 ipif->ipif_addr_ready = 1; 1558 ipif->ipif_added_nce = 1; 1559 nce->nce_ipif_cnt++; 1560 break; 1561 default: 1562 ip1dbg(("ipif_ndp_up: NCE creation failed for %s\n", 1563 ill->ill_name)); 1564 goto fail; 1565 } 1566 } else { 1567 /* No local NCE for this entry */ 1568 ipif->ipif_addr_ready = 1; 1569 } 1570 if (nce != NULL) 1571 NCE_REFRELE(nce); 1572 if (mnce != NULL) 1573 NCE_REFRELE(mnce); 1574 return (0); 1575 fail: 1576 if (mnce != NULL) { 1577 ndp_delete(mnce); 1578 NCE_REFRELE(mnce); 1579 } 1580 if (added_ipif) 1581 ipmp_illgrp_del_ipif(ill->ill_grp, ipif); 1582 1583 return (err); 1584 } 1585 1586 /* Remove all cache entries for this logical interface */ 1587 void 1588 ipif_ndp_down(ipif_t *ipif) 1589 { 1590 nce_t *nce; 1591 ill_t *ill = ipif->ipif_ill; 1592 1593 ASSERT(IAM_WRITER_ILL(ill)); 1594 1595 if (ipif->ipif_isv6) { 1596 if (ipif->ipif_added_nce) { 1597 /* 1598 * For IPMP, `ill' can be the IPMP ill but the NCE will 1599 * always be tied to an underlying IP interface, so we 1600 * match across the illgrp. This is safe since we 1601 * ensure uniqueness across the group in ipif_ndp_up(). 1602 */ 1603 nce = ndp_lookup_v6(ill, B_TRUE, &ipif->ipif_v6lcl_addr, 1604 B_FALSE); 1605 if (nce != NULL) { 1606 if (--nce->nce_ipif_cnt == 0) 1607 ndp_delete(nce); /* last ipif for nce */ 1608 NCE_REFRELE(nce); 1609 } 1610 ipif->ipif_added_nce = 0; 1611 } 1612 1613 /* 1614 * Make IPMP aware of the deleted data address. 1615 */ 1616 if (IS_IPMP(ill)) 1617 ipmp_illgrp_del_ipif(ill->ill_grp, ipif); 1618 } 1619 1620 /* 1621 * Remove mapping and all other nces dependent on this ill 1622 * when the last ipif is going away. 1623 */ 1624 if (ill->ill_ipif_up_count == 0) 1625 ndp_walk(ill, (pfi_t)ndp_delete_per_ill, ill, ill->ill_ipst); 1626 } 1627 1628 /* 1629 * Used when an interface comes up to recreate any extra routes on this 1630 * interface. 1631 */ 1632 static ire_t ** 1633 ipif_recover_ire_v6(ipif_t *ipif) 1634 { 1635 mblk_t *mp; 1636 ire_t **ipif_saved_irep; 1637 ire_t **irep; 1638 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 1639 1640 ip1dbg(("ipif_recover_ire_v6(%s:%u)", ipif->ipif_ill->ill_name, 1641 ipif->ipif_id)); 1642 1643 ASSERT(ipif->ipif_isv6); 1644 1645 mutex_enter(&ipif->ipif_saved_ire_lock); 1646 ipif_saved_irep = (ire_t **)kmem_zalloc(sizeof (ire_t *) * 1647 ipif->ipif_saved_ire_cnt, KM_NOSLEEP); 1648 if (ipif_saved_irep == NULL) { 1649 mutex_exit(&ipif->ipif_saved_ire_lock); 1650 return (NULL); 1651 } 1652 1653 irep = ipif_saved_irep; 1654 1655 for (mp = ipif->ipif_saved_ire_mp; mp != NULL; mp = mp->b_cont) { 1656 ire_t *ire; 1657 queue_t *rfq; 1658 queue_t *stq; 1659 ifrt_t *ifrt; 1660 in6_addr_t *src_addr; 1661 in6_addr_t *gateway_addr; 1662 char buf[INET6_ADDRSTRLEN]; 1663 ushort_t type; 1664 1665 /* 1666 * When the ire was initially created and then added in 1667 * ip_rt_add_v6(), it was created either using 1668 * ipif->ipif_net_type in the case of a traditional interface 1669 * route, or as one of the IRE_OFFSUBNET types (with the 1670 * exception of IRE_HOST type redirect ire which is created by 1671 * icmp_redirect_v6() and which we don't need to save or 1672 * recover). In the case where ipif->ipif_net_type was 1673 * IRE_LOOPBACK, ip_rt_add_v6() will update the ire_type to 1674 * IRE_IF_NORESOLVER before calling ire_add_v6() to satisfy 1675 * software like GateD and Sun Cluster which creates routes 1676 * using the the loopback interface's address as a gateway. 1677 * 1678 * As ifrt->ifrt_type reflects the already updated ire_type, 1679 * ire_create_v6() will be called in the same way here as in 1680 * ip_rt_add_v6(), namely using ipif->ipif_net_type when the 1681 * route looks like a traditional interface route (where 1682 * ifrt->ifrt_type & IRE_INTERFACE is true) and otherwise 1683 * using the saved ifrt->ifrt_type. This means that in 1684 * the case where ipif->ipif_net_type is IRE_LOOPBACK, 1685 * the ire created by ire_create_v6() will be an IRE_LOOPBACK, 1686 * it will then be turned into an IRE_IF_NORESOLVER and then 1687 * added by ire_add_v6(). 1688 */ 1689 ifrt = (ifrt_t *)mp->b_rptr; 1690 if (ifrt->ifrt_type & IRE_INTERFACE) { 1691 rfq = NULL; 1692 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1693 ? ipif->ipif_rq : ipif->ipif_wq; 1694 src_addr = (ifrt->ifrt_flags & RTF_SETSRC) 1695 ? &ifrt->ifrt_v6src_addr 1696 : &ipif->ipif_v6src_addr; 1697 gateway_addr = NULL; 1698 type = ipif->ipif_net_type; 1699 } else { 1700 rfq = NULL; 1701 stq = NULL; 1702 src_addr = (ifrt->ifrt_flags & RTF_SETSRC) 1703 ? &ifrt->ifrt_v6src_addr : NULL; 1704 gateway_addr = &ifrt->ifrt_v6gateway_addr; 1705 type = ifrt->ifrt_type; 1706 } 1707 1708 /* 1709 * Create a copy of the IRE with the saved address and netmask. 1710 */ 1711 ip1dbg(("ipif_recover_ire_v6: creating IRE %s (%d) for %s/%d\n", 1712 ip_nv_lookup(ire_nv_tbl, ifrt->ifrt_type), ifrt->ifrt_type, 1713 inet_ntop(AF_INET6, &ifrt->ifrt_v6addr, buf, sizeof (buf)), 1714 ip_mask_to_plen_v6(&ifrt->ifrt_v6mask))); 1715 ire = ire_create_v6( 1716 &ifrt->ifrt_v6addr, 1717 &ifrt->ifrt_v6mask, 1718 src_addr, 1719 gateway_addr, 1720 &ifrt->ifrt_max_frag, 1721 NULL, 1722 rfq, 1723 stq, 1724 type, 1725 ipif, 1726 NULL, 1727 0, 1728 0, 1729 ifrt->ifrt_flags, 1730 &ifrt->ifrt_iulp_info, 1731 NULL, 1732 NULL, 1733 ipst); 1734 if (ire == NULL) { 1735 mutex_exit(&ipif->ipif_saved_ire_lock); 1736 kmem_free(ipif_saved_irep, 1737 ipif->ipif_saved_ire_cnt * sizeof (ire_t *)); 1738 return (NULL); 1739 } 1740 1741 /* 1742 * Some software (for example, GateD and Sun Cluster) attempts 1743 * to create (what amount to) IRE_PREFIX routes with the 1744 * loopback address as the gateway. This is primarily done to 1745 * set up prefixes with the RTF_REJECT flag set (for example, 1746 * when generating aggregate routes.) 1747 * 1748 * If the IRE type (as defined by ipif->ipif_net_type) is 1749 * IRE_LOOPBACK, then we map the request into a 1750 * IRE_IF_NORESOLVER. 1751 */ 1752 if (ipif->ipif_net_type == IRE_LOOPBACK) 1753 ire->ire_type = IRE_IF_NORESOLVER; 1754 /* 1755 * ire held by ire_add, will be refreled' in ipif_up_done 1756 * towards the end 1757 */ 1758 (void) ire_add(&ire, NULL, NULL, NULL, B_FALSE); 1759 *irep = ire; 1760 irep++; 1761 ip1dbg(("ipif_recover_ire_v6: added ire %p\n", (void *)ire)); 1762 } 1763 mutex_exit(&ipif->ipif_saved_ire_lock); 1764 return (ipif_saved_irep); 1765 } 1766 1767 /* 1768 * Return the scope of the given IPv6 address. If the address is an 1769 * IPv4 mapped IPv6 address, return the scope of the corresponding 1770 * IPv4 address. 1771 */ 1772 in6addr_scope_t 1773 ip_addr_scope_v6(const in6_addr_t *addr) 1774 { 1775 static in6_addr_t ipv6loopback = IN6ADDR_LOOPBACK_INIT; 1776 1777 if (IN6_IS_ADDR_V4MAPPED(addr)) { 1778 in_addr_t v4addr_h = ntohl(V4_PART_OF_V6((*addr))); 1779 if ((v4addr_h >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 1780 (v4addr_h & IN_AUTOCONF_MASK) == IN_AUTOCONF_NET) 1781 return (IP6_SCOPE_LINKLOCAL); 1782 if ((v4addr_h & IN_PRIVATE8_MASK) == IN_PRIVATE8_NET || 1783 (v4addr_h & IN_PRIVATE12_MASK) == IN_PRIVATE12_NET || 1784 (v4addr_h & IN_PRIVATE16_MASK) == IN_PRIVATE16_NET) 1785 return (IP6_SCOPE_SITELOCAL); 1786 return (IP6_SCOPE_GLOBAL); 1787 } 1788 1789 if (IN6_IS_ADDR_MULTICAST(addr)) 1790 return (IN6_ADDR_MC_SCOPE(addr)); 1791 1792 /* link-local and loopback addresses are of link-local scope */ 1793 if (IN6_IS_ADDR_LINKLOCAL(addr) || 1794 IN6_ARE_ADDR_EQUAL(addr, &ipv6loopback)) 1795 return (IP6_SCOPE_LINKLOCAL); 1796 if (IN6_IS_ADDR_SITELOCAL(addr)) 1797 return (IP6_SCOPE_SITELOCAL); 1798 return (IP6_SCOPE_GLOBAL); 1799 } 1800 1801 1802 /* 1803 * Returns the length of the common prefix of a1 and a2, as per 1804 * CommonPrefixLen() defined in RFC 3484. 1805 */ 1806 static int 1807 ip_common_prefix_v6(const in6_addr_t *a1, const in6_addr_t *a2) 1808 { 1809 int i; 1810 uint32_t a1val, a2val, mask; 1811 1812 for (i = 0; i < 4; i++) { 1813 if ((a1val = a1->s6_addr32[i]) != (a2val = a2->s6_addr32[i])) { 1814 a1val ^= a2val; 1815 i *= 32; 1816 mask = 0x80000000u; 1817 while (!(a1val & mask)) { 1818 mask >>= 1; 1819 i++; 1820 } 1821 return (i); 1822 } 1823 } 1824 return (IPV6_ABITS); 1825 } 1826 1827 #define IPIF_VALID_IPV6_SOURCE(ipif) \ 1828 (((ipif)->ipif_flags & IPIF_UP) && \ 1829 !((ipif)->ipif_flags & (IPIF_NOLOCAL|IPIF_ANYCAST)) && \ 1830 (ipif)->ipif_addr_ready) 1831 1832 /* source address candidate */ 1833 typedef struct candidate { 1834 ipif_t *cand_ipif; 1835 /* The properties of this candidate */ 1836 boolean_t cand_isdst; 1837 boolean_t cand_isdst_set; 1838 in6addr_scope_t cand_scope; 1839 boolean_t cand_scope_set; 1840 boolean_t cand_isdeprecated; 1841 boolean_t cand_isdeprecated_set; 1842 boolean_t cand_ispreferred; 1843 boolean_t cand_ispreferred_set; 1844 boolean_t cand_matchedinterface; 1845 boolean_t cand_matchedinterface_set; 1846 boolean_t cand_matchedlabel; 1847 boolean_t cand_matchedlabel_set; 1848 boolean_t cand_istmp; 1849 boolean_t cand_istmp_set; 1850 int cand_common_pref; 1851 boolean_t cand_common_pref_set; 1852 boolean_t cand_pref_eq; 1853 boolean_t cand_pref_eq_set; 1854 int cand_pref_len; 1855 boolean_t cand_pref_len_set; 1856 } cand_t; 1857 #define cand_srcaddr cand_ipif->ipif_v6lcl_addr 1858 #define cand_mask cand_ipif->ipif_v6net_mask 1859 #define cand_flags cand_ipif->ipif_flags 1860 #define cand_ill cand_ipif->ipif_ill 1861 #define cand_zoneid cand_ipif->ipif_zoneid 1862 1863 /* information about the destination for source address selection */ 1864 typedef struct dstinfo { 1865 const in6_addr_t *dst_addr; 1866 ill_t *dst_ill; 1867 uint_t dst_restrict_ill; 1868 boolean_t dst_prefer_src_tmp; 1869 in6addr_scope_t dst_scope; 1870 char *dst_label; 1871 } dstinfo_t; 1872 1873 /* 1874 * The following functions are rules used to select a source address in 1875 * ipif_select_source_v6(). Each rule compares a current candidate (cc) 1876 * against the best candidate (bc). Each rule has three possible outcomes; 1877 * the candidate is preferred over the best candidate (CAND_PREFER), the 1878 * candidate is not preferred over the best candidate (CAND_AVOID), or the 1879 * candidate is of equal value as the best candidate (CAND_TIE). 1880 * 1881 * These rules are part of a greater "Default Address Selection for IPv6" 1882 * sheme, which is standards based work coming out of the IETF ipv6 working 1883 * group. The IETF document defines both IPv6 source address selection and 1884 * destination address ordering. The rules defined here implement the IPv6 1885 * source address selection. Destination address ordering is done by 1886 * libnsl, and uses a similar set of rules to implement the sorting. 1887 * 1888 * Most of the rules are defined by the RFC and are not typically altered. The 1889 * last rule, number 8, has language that allows for local preferences. In the 1890 * scheme below, this means that new Solaris rules should normally go between 1891 * rule_ifprefix and rule_prefix. 1892 */ 1893 typedef enum {CAND_AVOID, CAND_TIE, CAND_PREFER} rule_res_t; 1894 typedef rule_res_t (*rulef_t)(cand_t *, cand_t *, const dstinfo_t *, 1895 ip_stack_t *); 1896 1897 /* Prefer an address if it is equal to the destination address. */ 1898 /* ARGSUSED3 */ 1899 static rule_res_t 1900 rule_isdst(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 1901 { 1902 if (!bc->cand_isdst_set) { 1903 bc->cand_isdst = 1904 IN6_ARE_ADDR_EQUAL(&bc->cand_srcaddr, dstinfo->dst_addr); 1905 bc->cand_isdst_set = B_TRUE; 1906 } 1907 1908 cc->cand_isdst = 1909 IN6_ARE_ADDR_EQUAL(&cc->cand_srcaddr, dstinfo->dst_addr); 1910 cc->cand_isdst_set = B_TRUE; 1911 1912 if (cc->cand_isdst == bc->cand_isdst) 1913 return (CAND_TIE); 1914 else if (cc->cand_isdst) 1915 return (CAND_PREFER); 1916 else 1917 return (CAND_AVOID); 1918 } 1919 1920 /* 1921 * Prefer addresses that are of closest scope to the destination. Always 1922 * prefer addresses that are of greater scope than the destination over 1923 * those that are of lesser scope than the destination. 1924 */ 1925 /* ARGSUSED3 */ 1926 static rule_res_t 1927 rule_scope(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 1928 { 1929 if (!bc->cand_scope_set) { 1930 bc->cand_scope = ip_addr_scope_v6(&bc->cand_srcaddr); 1931 bc->cand_scope_set = B_TRUE; 1932 } 1933 1934 cc->cand_scope = ip_addr_scope_v6(&cc->cand_srcaddr); 1935 cc->cand_scope_set = B_TRUE; 1936 1937 if (cc->cand_scope < bc->cand_scope) { 1938 if (cc->cand_scope < dstinfo->dst_scope) 1939 return (CAND_AVOID); 1940 else 1941 return (CAND_PREFER); 1942 } else if (bc->cand_scope < cc->cand_scope) { 1943 if (bc->cand_scope < dstinfo->dst_scope) 1944 return (CAND_PREFER); 1945 else 1946 return (CAND_AVOID); 1947 } else { 1948 return (CAND_TIE); 1949 } 1950 } 1951 1952 /* 1953 * Prefer non-deprecated source addresses. 1954 */ 1955 /* ARGSUSED2 */ 1956 static rule_res_t 1957 rule_deprecated(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1958 ip_stack_t *ipst) 1959 { 1960 if (!bc->cand_isdeprecated_set) { 1961 bc->cand_isdeprecated = 1962 ((bc->cand_flags & IPIF_DEPRECATED) != 0); 1963 bc->cand_isdeprecated_set = B_TRUE; 1964 } 1965 1966 cc->cand_isdeprecated = ((cc->cand_flags & IPIF_DEPRECATED) != 0); 1967 cc->cand_isdeprecated_set = B_TRUE; 1968 1969 if (bc->cand_isdeprecated == cc->cand_isdeprecated) 1970 return (CAND_TIE); 1971 else if (cc->cand_isdeprecated) 1972 return (CAND_AVOID); 1973 else 1974 return (CAND_PREFER); 1975 } 1976 1977 /* 1978 * Prefer source addresses that have the IPIF_PREFERRED flag set. This 1979 * rule must be before rule_interface because the flag could be set on any 1980 * interface, not just the interface being used for outgoing packets (for 1981 * example, the IFF_PREFERRED could be set on an address assigned to the 1982 * loopback interface). 1983 */ 1984 /* ARGSUSED2 */ 1985 static rule_res_t 1986 rule_preferred(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 1987 ip_stack_t *ipst) 1988 { 1989 if (!bc->cand_ispreferred_set) { 1990 bc->cand_ispreferred = ((bc->cand_flags & IPIF_PREFERRED) != 0); 1991 bc->cand_ispreferred_set = B_TRUE; 1992 } 1993 1994 cc->cand_ispreferred = ((cc->cand_flags & IPIF_PREFERRED) != 0); 1995 cc->cand_ispreferred_set = B_TRUE; 1996 1997 if (bc->cand_ispreferred == cc->cand_ispreferred) 1998 return (CAND_TIE); 1999 else if (cc->cand_ispreferred) 2000 return (CAND_PREFER); 2001 else 2002 return (CAND_AVOID); 2003 } 2004 2005 /* 2006 * Prefer source addresses that are assigned to the outgoing interface. 2007 */ 2008 /* ARGSUSED3 */ 2009 static rule_res_t 2010 rule_interface(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 2011 ip_stack_t *ipst) 2012 { 2013 ill_t *dstill = dstinfo->dst_ill; 2014 2015 /* 2016 * If dstinfo->dst_restrict_ill is set, this rule is unnecessary 2017 * since we know all candidates will be on the same link. 2018 */ 2019 if (dstinfo->dst_restrict_ill) 2020 return (CAND_TIE); 2021 2022 if (!bc->cand_matchedinterface_set) { 2023 bc->cand_matchedinterface = bc->cand_ill == dstill; 2024 bc->cand_matchedinterface_set = B_TRUE; 2025 } 2026 2027 cc->cand_matchedinterface = cc->cand_ill == dstill; 2028 cc->cand_matchedinterface_set = B_TRUE; 2029 2030 if (bc->cand_matchedinterface == cc->cand_matchedinterface) 2031 return (CAND_TIE); 2032 else if (cc->cand_matchedinterface) 2033 return (CAND_PREFER); 2034 else 2035 return (CAND_AVOID); 2036 } 2037 2038 /* 2039 * Prefer source addresses whose label matches the destination's label. 2040 */ 2041 static rule_res_t 2042 rule_label(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 2043 { 2044 char *label; 2045 2046 if (!bc->cand_matchedlabel_set) { 2047 label = ip6_asp_lookup(&bc->cand_srcaddr, NULL, ipst); 2048 bc->cand_matchedlabel = 2049 ip6_asp_labelcmp(label, dstinfo->dst_label); 2050 bc->cand_matchedlabel_set = B_TRUE; 2051 } 2052 2053 label = ip6_asp_lookup(&cc->cand_srcaddr, NULL, ipst); 2054 cc->cand_matchedlabel = ip6_asp_labelcmp(label, dstinfo->dst_label); 2055 cc->cand_matchedlabel_set = B_TRUE; 2056 2057 if (bc->cand_matchedlabel == cc->cand_matchedlabel) 2058 return (CAND_TIE); 2059 else if (cc->cand_matchedlabel) 2060 return (CAND_PREFER); 2061 else 2062 return (CAND_AVOID); 2063 } 2064 2065 /* 2066 * Prefer public addresses over temporary ones. An application can reverse 2067 * the logic of this rule and prefer temporary addresses by using the 2068 * IPV6_SRC_PREFERENCES socket option. 2069 */ 2070 /* ARGSUSED3 */ 2071 static rule_res_t 2072 rule_temporary(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 2073 ip_stack_t *ipst) 2074 { 2075 if (!bc->cand_istmp_set) { 2076 bc->cand_istmp = ((bc->cand_flags & IPIF_TEMPORARY) != 0); 2077 bc->cand_istmp_set = B_TRUE; 2078 } 2079 2080 cc->cand_istmp = ((cc->cand_flags & IPIF_TEMPORARY) != 0); 2081 cc->cand_istmp_set = B_TRUE; 2082 2083 if (bc->cand_istmp == cc->cand_istmp) 2084 return (CAND_TIE); 2085 2086 if (dstinfo->dst_prefer_src_tmp && cc->cand_istmp) 2087 return (CAND_PREFER); 2088 else if (!dstinfo->dst_prefer_src_tmp && !cc->cand_istmp) 2089 return (CAND_PREFER); 2090 else 2091 return (CAND_AVOID); 2092 } 2093 2094 /* 2095 * Prefer source addresses with longer matching prefix with the destination 2096 * under the interface mask. This gets us on the same subnet before applying 2097 * any Solaris-specific rules. 2098 */ 2099 /* ARGSUSED3 */ 2100 static rule_res_t 2101 rule_ifprefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 2102 ip_stack_t *ipst) 2103 { 2104 if (!bc->cand_pref_eq_set) { 2105 bc->cand_pref_eq = V6_MASK_EQ_2(bc->cand_srcaddr, 2106 bc->cand_mask, *dstinfo->dst_addr); 2107 bc->cand_pref_eq_set = B_TRUE; 2108 } 2109 2110 cc->cand_pref_eq = V6_MASK_EQ_2(cc->cand_srcaddr, cc->cand_mask, 2111 *dstinfo->dst_addr); 2112 cc->cand_pref_eq_set = B_TRUE; 2113 2114 if (bc->cand_pref_eq) { 2115 if (cc->cand_pref_eq) { 2116 if (!bc->cand_pref_len_set) { 2117 bc->cand_pref_len = 2118 ip_mask_to_plen_v6(&bc->cand_mask); 2119 bc->cand_pref_len_set = B_TRUE; 2120 } 2121 cc->cand_pref_len = ip_mask_to_plen_v6(&cc->cand_mask); 2122 cc->cand_pref_len_set = B_TRUE; 2123 if (bc->cand_pref_len == cc->cand_pref_len) 2124 return (CAND_TIE); 2125 else if (bc->cand_pref_len > cc->cand_pref_len) 2126 return (CAND_AVOID); 2127 else 2128 return (CAND_PREFER); 2129 } else { 2130 return (CAND_AVOID); 2131 } 2132 } else { 2133 if (cc->cand_pref_eq) 2134 return (CAND_PREFER); 2135 else 2136 return (CAND_TIE); 2137 } 2138 } 2139 2140 /* 2141 * Prefer to use zone-specific addresses when possible instead of all-zones 2142 * addresses. 2143 */ 2144 /* ARGSUSED2 */ 2145 static rule_res_t 2146 rule_zone_specific(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 2147 ip_stack_t *ipst) 2148 { 2149 if ((bc->cand_zoneid == ALL_ZONES) == 2150 (cc->cand_zoneid == ALL_ZONES)) 2151 return (CAND_TIE); 2152 else if (cc->cand_zoneid == ALL_ZONES) 2153 return (CAND_AVOID); 2154 else 2155 return (CAND_PREFER); 2156 } 2157 2158 /* 2159 * Prefer to use DHCPv6 (first) and static addresses (second) when possible 2160 * instead of statelessly autoconfigured addresses. 2161 * 2162 * This is done after trying all other preferences (and before the final tie 2163 * breaker) so that, if all else is equal, we select addresses configured by 2164 * DHCPv6 over other addresses. We presume that DHCPv6 addresses, unlike 2165 * stateless autoconfigured addresses, are deliberately configured by an 2166 * administrator, and thus are correctly set up in DNS and network packet 2167 * filters. 2168 */ 2169 /* ARGSUSED2 */ 2170 static rule_res_t 2171 rule_addr_type(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 2172 ip_stack_t *ipst) 2173 { 2174 #define ATYPE(x) \ 2175 ((x) & IPIF_DHCPRUNNING) ? 1 : ((x) & IPIF_ADDRCONF) ? 3 : 2 2176 int bcval = ATYPE(bc->cand_flags); 2177 int ccval = ATYPE(cc->cand_flags); 2178 #undef ATYPE 2179 2180 if (bcval == ccval) 2181 return (CAND_TIE); 2182 else if (ccval < bcval) 2183 return (CAND_PREFER); 2184 else 2185 return (CAND_AVOID); 2186 } 2187 2188 /* 2189 * Prefer source addresses with longer matching prefix with the destination. 2190 * We do the longest matching prefix calculation by doing an xor of both 2191 * addresses with the destination, and pick the address with the longest string 2192 * of leading zeros, as per CommonPrefixLen() defined in RFC 3484. 2193 */ 2194 /* ARGSUSED3 */ 2195 static rule_res_t 2196 rule_prefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst) 2197 { 2198 /* 2199 * For IPMP, we always want to choose a random source address from 2200 * among any equally usable addresses, so always report a tie. 2201 */ 2202 if (IS_IPMP(dstinfo->dst_ill)) 2203 return (CAND_TIE); 2204 2205 if (!bc->cand_common_pref_set) { 2206 bc->cand_common_pref = ip_common_prefix_v6(&bc->cand_srcaddr, 2207 dstinfo->dst_addr); 2208 bc->cand_common_pref_set = B_TRUE; 2209 } 2210 2211 cc->cand_common_pref = ip_common_prefix_v6(&cc->cand_srcaddr, 2212 dstinfo->dst_addr); 2213 cc->cand_common_pref_set = B_TRUE; 2214 2215 if (bc->cand_common_pref == cc->cand_common_pref) 2216 return (CAND_TIE); 2217 else if (bc->cand_common_pref > cc->cand_common_pref) 2218 return (CAND_AVOID); 2219 else 2220 return (CAND_PREFER); 2221 } 2222 2223 /* 2224 * Last rule: we must pick something, so just prefer the current best 2225 * candidate. 2226 */ 2227 /* ARGSUSED */ 2228 static rule_res_t 2229 rule_must_be_last(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, 2230 ip_stack_t *ipst) 2231 { 2232 return (CAND_AVOID); 2233 } 2234 2235 /* 2236 * Determine the best source address given a destination address and a 2237 * destination ill. If no suitable source address is found, it returns 2238 * NULL. If there is a usable address pointed to by the usesrc 2239 * (i.e ill_usesrc_ifindex != 0) then return that first since it is more 2240 * fine grained (i.e per interface) 2241 * 2242 * This implementation is based on the "Default Address Selection for IPv6" 2243 * specification produced by the IETF IPv6 working group. It has been 2244 * implemented so that the list of addresses is only traversed once (the 2245 * specification's algorithm could traverse the list of addresses once for 2246 * every rule). 2247 * 2248 * The restrict_ill argument restricts the algorithm to choose a source 2249 * address that is assigned to the destination ill. This is used when 2250 * the destination address is a link-local or multicast address, and when 2251 * ipv6_strict_dst_multihoming is turned on. 2252 * 2253 * src_prefs is the caller's set of source address preferences. If source 2254 * address selection is being called to determine the source address of a 2255 * connected socket (from ip_bind_connected_v6()), then the preferences are 2256 * taken from conn_src_preferences. These preferences can be set on a 2257 * per-socket basis using the IPV6_SRC_PREFERENCES socket option. The only 2258 * preference currently implemented is for rfc3041 temporary addresses. 2259 */ 2260 ipif_t * 2261 ipif_select_source_v6(ill_t *dstill, const in6_addr_t *dst, 2262 boolean_t restrict_ill, uint32_t src_prefs, zoneid_t zoneid) 2263 { 2264 dstinfo_t dstinfo; 2265 char dstr[INET6_ADDRSTRLEN]; 2266 char sstr[INET6_ADDRSTRLEN]; 2267 ipif_t *ipif, *start_ipif, *next_ipif; 2268 ill_t *ill, *usesrc_ill = NULL, *ipmp_ill = NULL; 2269 ill_walk_context_t ctx; 2270 cand_t best_c; /* The best candidate */ 2271 cand_t curr_c; /* The current candidate */ 2272 uint_t index; 2273 boolean_t first_candidate = B_TRUE; 2274 rule_res_t rule_result; 2275 tsol_tpc_t *src_rhtp, *dst_rhtp; 2276 ip_stack_t *ipst = dstill->ill_ipst; 2277 2278 /* 2279 * The list of ordering rules. They are applied in the order they 2280 * appear in the list. 2281 * 2282 * Solaris doesn't currently support Mobile IPv6, so there's no 2283 * rule_mipv6 corresponding to rule 4 in the specification. 2284 */ 2285 rulef_t rules[] = { 2286 rule_isdst, 2287 rule_scope, 2288 rule_deprecated, 2289 rule_preferred, 2290 rule_interface, 2291 rule_label, 2292 rule_temporary, 2293 rule_ifprefix, /* local rules after this */ 2294 rule_zone_specific, 2295 rule_addr_type, 2296 rule_prefix, /* local rules before this */ 2297 rule_must_be_last, /* must always be last */ 2298 NULL 2299 }; 2300 2301 ASSERT(dstill->ill_isv6); 2302 ASSERT(!IN6_IS_ADDR_V4MAPPED(dst)); 2303 2304 /* 2305 * Check if there is a usable src address pointed to by the 2306 * usesrc ifindex. This has higher precedence since it is 2307 * finer grained (i.e per interface) v/s being system wide. 2308 */ 2309 if (dstill->ill_usesrc_ifindex != 0) { 2310 if ((usesrc_ill = 2311 ill_lookup_on_ifindex(dstill->ill_usesrc_ifindex, B_TRUE, 2312 NULL, NULL, NULL, NULL, ipst)) != NULL) { 2313 dstinfo.dst_ill = usesrc_ill; 2314 } else { 2315 return (NULL); 2316 } 2317 } else if (IS_UNDER_IPMP(dstill)) { 2318 /* 2319 * Test addresses should never be used for source address 2320 * selection, so if we were passed an underlying ill, switch 2321 * to the IPMP meta-interface. 2322 */ 2323 if ((ipmp_ill = ipmp_ill_hold_ipmp_ill(dstill)) != NULL) 2324 dstinfo.dst_ill = ipmp_ill; 2325 else 2326 return (NULL); 2327 } else { 2328 dstinfo.dst_ill = dstill; 2329 } 2330 2331 /* 2332 * If we're dealing with an unlabeled destination on a labeled system, 2333 * make sure that we ignore source addresses that are incompatible with 2334 * the destination's default label. That destination's default label 2335 * must dominate the minimum label on the source address. 2336 * 2337 * (Note that this has to do with Trusted Solaris. It's not related to 2338 * the labels described by ip6_asp_lookup.) 2339 */ 2340 dst_rhtp = NULL; 2341 if (is_system_labeled()) { 2342 dst_rhtp = find_tpc(dst, IPV6_VERSION, B_FALSE); 2343 if (dst_rhtp == NULL) 2344 return (NULL); 2345 if (dst_rhtp->tpc_tp.host_type != UNLABELED) { 2346 TPC_RELE(dst_rhtp); 2347 dst_rhtp = NULL; 2348 } 2349 } 2350 2351 dstinfo.dst_addr = dst; 2352 dstinfo.dst_scope = ip_addr_scope_v6(dst); 2353 dstinfo.dst_label = ip6_asp_lookup(dst, NULL, ipst); 2354 dstinfo.dst_prefer_src_tmp = ((src_prefs & IPV6_PREFER_SRC_TMP) != 0); 2355 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2356 /* 2357 * Section three of the I-D states that for multicast and 2358 * link-local destinations, the candidate set must be restricted to 2359 * an interface that is on the same link as the outgoing interface. 2360 * Also, when ipv6_strict_dst_multihoming is turned on, always 2361 * restrict the source address to the destination link as doing 2362 * otherwise will almost certainly cause problems. 2363 */ 2364 if (IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst) || 2365 ipst->ips_ipv6_strict_dst_multihoming || usesrc_ill != NULL) { 2366 dstinfo.dst_restrict_ill = B_TRUE; 2367 } else { 2368 dstinfo.dst_restrict_ill = restrict_ill; 2369 } 2370 2371 bzero(&best_c, sizeof (cand_t)); 2372 2373 /* 2374 * Take a pass through the list of IPv6 interfaces to choose the best 2375 * possible source address. If restrict_ill is set, just use dst_ill. 2376 */ 2377 if (dstinfo.dst_restrict_ill) 2378 ill = dstinfo.dst_ill; 2379 else 2380 ill = ILL_START_WALK_V6(&ctx, ipst); 2381 2382 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 2383 ASSERT(ill->ill_isv6); 2384 2385 /* 2386 * Test addresses should never be used for source address 2387 * selection, so ignore underlying ills. 2388 */ 2389 if (IS_UNDER_IPMP(ill)) 2390 continue; 2391 2392 if (ill->ill_ipif == NULL) 2393 continue; 2394 /* 2395 * For source address selection, we treat the ipif list as 2396 * circular and continue until we get back to where we 2397 * started. This allows IPMP to vary source address selection 2398 * (which improves inbound load spreading) by caching its last 2399 * ending point and starting from there. NOTE: we don't have 2400 * to worry about ill_src_ipif changing ills since that can't 2401 * happen on the IPMP ill. 2402 */ 2403 start_ipif = ill->ill_ipif; 2404 if (IS_IPMP(ill) && ill->ill_src_ipif != NULL) 2405 start_ipif = ill->ill_src_ipif; 2406 2407 ipif = start_ipif; 2408 do { 2409 if ((next_ipif = ipif->ipif_next) == NULL) 2410 next_ipif = ill->ill_ipif; 2411 2412 if (!IPIF_VALID_IPV6_SOURCE(ipif)) 2413 continue; 2414 2415 if (zoneid != ALL_ZONES && 2416 ipif->ipif_zoneid != zoneid && 2417 ipif->ipif_zoneid != ALL_ZONES) 2418 continue; 2419 2420 /* 2421 * Check compatibility of local address for 2422 * destination's default label if we're on a labeled 2423 * system. Incompatible addresses can't be used at 2424 * all and must be skipped over. 2425 */ 2426 if (dst_rhtp != NULL) { 2427 boolean_t incompat; 2428 2429 src_rhtp = find_tpc(&ipif->ipif_v6lcl_addr, 2430 IPV6_VERSION, B_FALSE); 2431 if (src_rhtp == NULL) 2432 continue; 2433 incompat = 2434 src_rhtp->tpc_tp.host_type != SUN_CIPSO || 2435 src_rhtp->tpc_tp.tp_doi != 2436 dst_rhtp->tpc_tp.tp_doi || 2437 (!_blinrange(&dst_rhtp->tpc_tp.tp_def_label, 2438 &src_rhtp->tpc_tp.tp_sl_range_cipso) && 2439 !blinlset(&dst_rhtp->tpc_tp.tp_def_label, 2440 src_rhtp->tpc_tp.tp_sl_set_cipso)); 2441 TPC_RELE(src_rhtp); 2442 if (incompat) 2443 continue; 2444 } 2445 2446 if (first_candidate) { 2447 /* 2448 * This is first valid address in the list. 2449 * It is automatically the best candidate 2450 * so far. 2451 */ 2452 best_c.cand_ipif = ipif; 2453 first_candidate = B_FALSE; 2454 continue; 2455 } 2456 2457 bzero(&curr_c, sizeof (cand_t)); 2458 curr_c.cand_ipif = ipif; 2459 2460 /* 2461 * Compare this current candidate (curr_c) with the 2462 * best candidate (best_c) by applying the 2463 * comparison rules in order until one breaks the 2464 * tie. 2465 */ 2466 for (index = 0; rules[index] != NULL; index++) { 2467 /* Apply a comparison rule. */ 2468 rule_result = (rules[index])(&best_c, &curr_c, 2469 &dstinfo, ipst); 2470 if (rule_result == CAND_AVOID) { 2471 /* 2472 * The best candidate is still the 2473 * best candidate. Forget about 2474 * this current candidate and go on 2475 * to the next one. 2476 */ 2477 break; 2478 } else if (rule_result == CAND_PREFER) { 2479 /* 2480 * This candidate is prefered. It 2481 * becomes the best candidate so 2482 * far. Go on to the next address. 2483 */ 2484 best_c = curr_c; 2485 break; 2486 } 2487 /* We have a tie, apply the next rule. */ 2488 } 2489 2490 /* 2491 * The last rule must be a tie breaker rule and 2492 * must never produce a tie. At this point, the 2493 * candidate should have either been rejected, or 2494 * have been prefered as the best candidate so far. 2495 */ 2496 ASSERT(rule_result != CAND_TIE); 2497 } while ((ipif = next_ipif) != start_ipif); 2498 2499 /* 2500 * For IPMP, update the source ipif rotor to the next ipif, 2501 * provided we can look it up. (We must not use it if it's 2502 * IPIF_CONDEMNED since we may have grabbed ill_g_lock after 2503 * ipif_free() checked ill_src_ipif.) 2504 */ 2505 if (IS_IPMP(ill) && ipif != NULL) { 2506 mutex_enter(&ipif->ipif_ill->ill_lock); 2507 next_ipif = ipif->ipif_next; 2508 if (next_ipif != NULL && IPIF_CAN_LOOKUP(next_ipif)) 2509 ill->ill_src_ipif = next_ipif; 2510 else 2511 ill->ill_src_ipif = NULL; 2512 mutex_exit(&ipif->ipif_ill->ill_lock); 2513 } 2514 2515 /* 2516 * Only one ill to consider if dst_restrict_ill is set. 2517 */ 2518 if (dstinfo.dst_restrict_ill) 2519 break; 2520 } 2521 2522 ipif = best_c.cand_ipif; 2523 ip1dbg(("ipif_select_source_v6(%s, %s) -> %s\n", 2524 dstinfo.dst_ill->ill_name, 2525 inet_ntop(AF_INET6, dstinfo.dst_addr, dstr, sizeof (dstr)), 2526 (ipif == NULL ? "NULL" : 2527 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, sstr, sizeof (sstr))))); 2528 2529 if (usesrc_ill != NULL) 2530 ill_refrele(usesrc_ill); 2531 2532 if (ipmp_ill != NULL) 2533 ill_refrele(ipmp_ill); 2534 2535 if (dst_rhtp != NULL) 2536 TPC_RELE(dst_rhtp); 2537 2538 if (ipif == NULL) { 2539 rw_exit(&ipst->ips_ill_g_lock); 2540 return (NULL); 2541 } 2542 2543 mutex_enter(&ipif->ipif_ill->ill_lock); 2544 if (IPIF_CAN_LOOKUP(ipif)) { 2545 ipif_refhold_locked(ipif); 2546 mutex_exit(&ipif->ipif_ill->ill_lock); 2547 rw_exit(&ipst->ips_ill_g_lock); 2548 return (ipif); 2549 } 2550 mutex_exit(&ipif->ipif_ill->ill_lock); 2551 rw_exit(&ipst->ips_ill_g_lock); 2552 ip1dbg(("ipif_select_source_v6 cannot lookup ipif %p" 2553 " returning null \n", (void *)ipif)); 2554 2555 return (NULL); 2556 } 2557 2558 /* 2559 * If old_ipif is not NULL, see if ipif was derived from old 2560 * ipif and if so, recreate the interface route by re-doing 2561 * source address selection. This happens when ipif_down -> 2562 * ipif_update_other_ipifs calls us. 2563 * 2564 * If old_ipif is NULL, just redo the source address selection 2565 * if needed. This happens when ipif_up_done_v6 calls us. 2566 */ 2567 void 2568 ipif_recreate_interface_routes_v6(ipif_t *old_ipif, ipif_t *ipif) 2569 { 2570 ire_t *ire; 2571 ire_t *ipif_ire; 2572 queue_t *stq; 2573 ill_t *ill; 2574 ipif_t *nipif = NULL; 2575 boolean_t nipif_refheld = B_FALSE; 2576 boolean_t ip6_asp_table_held = B_FALSE; 2577 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 2578 2579 ill = ipif->ipif_ill; 2580 2581 if (!(ipif->ipif_flags & 2582 (IPIF_NOLOCAL|IPIF_ANYCAST|IPIF_DEPRECATED))) { 2583 /* 2584 * Can't possibly have borrowed the source 2585 * from old_ipif. 2586 */ 2587 return; 2588 } 2589 2590 /* 2591 * Is there any work to be done? No work if the address 2592 * is INADDR_ANY, loopback or NOLOCAL or ANYCAST ( 2593 * ipif_select_source_v6() does not borrow addresses from 2594 * NOLOCAL and ANYCAST interfaces). 2595 */ 2596 if ((old_ipif != NULL) && 2597 ((IN6_IS_ADDR_UNSPECIFIED(&old_ipif->ipif_v6lcl_addr)) || 2598 (old_ipif->ipif_ill->ill_wq == NULL) || 2599 (old_ipif->ipif_flags & 2600 (IPIF_NOLOCAL|IPIF_ANYCAST)))) { 2601 return; 2602 } 2603 2604 /* 2605 * Perform the same checks as when creating the 2606 * IRE_INTERFACE in ipif_up_done_v6. 2607 */ 2608 if (!(ipif->ipif_flags & IPIF_UP)) 2609 return; 2610 2611 if ((ipif->ipif_flags & IPIF_NOXMIT)) 2612 return; 2613 2614 if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6subnet) && 2615 IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6net_mask)) 2616 return; 2617 2618 /* 2619 * We know that ipif uses some other source for its 2620 * IRE_INTERFACE. Is it using the source of this 2621 * old_ipif? 2622 */ 2623 ipif_ire = ipif_to_ire_v6(ipif); 2624 if (ipif_ire == NULL) 2625 return; 2626 2627 if (old_ipif != NULL && 2628 !IN6_ARE_ADDR_EQUAL(&old_ipif->ipif_v6lcl_addr, 2629 &ipif_ire->ire_src_addr_v6)) { 2630 ire_refrele(ipif_ire); 2631 return; 2632 } 2633 2634 if (ip_debug > 2) { 2635 /* ip1dbg */ 2636 pr_addr_dbg("ipif_recreate_interface_routes_v6: deleting IRE" 2637 " for src %s\n", AF_INET6, &ipif_ire->ire_src_addr_v6); 2638 } 2639 2640 stq = ipif_ire->ire_stq; 2641 2642 /* 2643 * Can't use our source address. Select a different source address 2644 * for the IRE_INTERFACE. We restrict interface route source 2645 * address selection to ipif's assigned to the same link as the 2646 * interface. 2647 */ 2648 if (ip6_asp_can_lookup(ipst)) { 2649 ip6_asp_table_held = B_TRUE; 2650 nipif = ipif_select_source_v6(ill, &ipif->ipif_v6subnet, 2651 B_TRUE, IPV6_PREFER_SRC_DEFAULT, ipif->ipif_zoneid); 2652 } 2653 if (nipif == NULL) { 2654 /* Last resort - all ipif's have IPIF_NOLOCAL */ 2655 nipif = ipif; 2656 } else { 2657 nipif_refheld = B_TRUE; 2658 } 2659 2660 ire = ire_create_v6( 2661 &ipif->ipif_v6subnet, /* dest pref */ 2662 &ipif->ipif_v6net_mask, /* mask */ 2663 &nipif->ipif_v6src_addr, /* src addr */ 2664 NULL, /* no gateway */ 2665 &ipif->ipif_mtu, /* max frag */ 2666 NULL, /* no src nce */ 2667 NULL, /* no recv from queue */ 2668 stq, /* send-to queue */ 2669 ill->ill_net_type, /* IF_[NO]RESOLVER */ 2670 ipif, 2671 NULL, 2672 0, 2673 0, 2674 0, 2675 &ire_uinfo_null, 2676 NULL, 2677 NULL, 2678 ipst); 2679 2680 if (ire != NULL) { 2681 ire_t *ret_ire; 2682 int error; 2683 2684 /* 2685 * We don't need ipif_ire anymore. We need to delete 2686 * before we add so that ire_add does not detect 2687 * duplicates. 2688 */ 2689 ire_delete(ipif_ire); 2690 ret_ire = ire; 2691 error = ire_add(&ret_ire, NULL, NULL, NULL, B_FALSE); 2692 ASSERT(error == 0); 2693 ASSERT(ret_ire == ire); 2694 if (ret_ire != NULL) { 2695 /* Held in ire_add */ 2696 ire_refrele(ret_ire); 2697 } 2698 } 2699 /* 2700 * Either we are falling through from above or could not 2701 * allocate a replacement. 2702 */ 2703 ire_refrele(ipif_ire); 2704 if (ip6_asp_table_held) 2705 ip6_asp_table_refrele(ipst); 2706 if (nipif_refheld) 2707 ipif_refrele(nipif); 2708 } 2709 2710 /* 2711 * This old_ipif is going away. 2712 * 2713 * Determine if any other ipif's are using our address as 2714 * ipif_v6lcl_addr (due to those being IPIF_NOLOCAL, IPIF_ANYCAST, or 2715 * IPIF_DEPRECATED). 2716 * Find the IRE_INTERFACE for such ipif's and recreate them 2717 * to use an different source address following the rules in 2718 * ipif_up_done_v6. 2719 */ 2720 void 2721 ipif_update_other_ipifs_v6(ipif_t *old_ipif) 2722 { 2723 ipif_t *ipif; 2724 ill_t *ill; 2725 char buf[INET6_ADDRSTRLEN]; 2726 2727 ASSERT(IAM_WRITER_IPIF(old_ipif)); 2728 2729 ill = old_ipif->ipif_ill; 2730 2731 ip1dbg(("ipif_update_other_ipifs_v6(%s, %s)\n", 2732 ill->ill_name, 2733 inet_ntop(AF_INET6, &old_ipif->ipif_v6lcl_addr, 2734 buf, sizeof (buf)))); 2735 2736 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 2737 if (ipif != old_ipif) 2738 ipif_recreate_interface_routes_v6(old_ipif, ipif); 2739 } 2740 } 2741 2742 /* 2743 * Perform an attach and bind to get phys addr plus info_req for 2744 * the physical device. 2745 * q and mp represents an ioctl which will be queued waiting for 2746 * completion of the DLPI message exchange. 2747 * MUST be called on an ill queue. Can not set conn_pending_ill for that 2748 * reason thus the DL_PHYS_ADDR_ACK code does not assume ill_pending_q. 2749 * 2750 * Returns EINPROGRESS when mp has been consumed by queueing it on 2751 * ill_pending_mp and the ioctl will complete in ip_rput. 2752 */ 2753 int 2754 ill_dl_phys(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q) 2755 { 2756 mblk_t *v6token_mp = NULL; 2757 mblk_t *v6lla_mp = NULL; 2758 mblk_t *dest_mp = NULL; 2759 mblk_t *phys_mp = NULL; 2760 mblk_t *info_mp = NULL; 2761 mblk_t *attach_mp = NULL; 2762 mblk_t *bind_mp = NULL; 2763 mblk_t *unbind_mp = NULL; 2764 mblk_t *notify_mp = NULL; 2765 2766 ip1dbg(("ill_dl_phys(%s:%u)\n", ill->ill_name, ipif->ipif_id)); 2767 ASSERT(ill->ill_dlpi_style_set); 2768 ASSERT(WR(q)->q_next != NULL); 2769 2770 if (ill->ill_isv6) { 2771 v6token_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2772 sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2773 if (v6token_mp == NULL) 2774 goto bad; 2775 ((dl_phys_addr_req_t *)v6token_mp->b_rptr)->dl_addr_type = 2776 DL_IPV6_TOKEN; 2777 2778 v6lla_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2779 sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2780 if (v6lla_mp == NULL) 2781 goto bad; 2782 ((dl_phys_addr_req_t *)v6lla_mp->b_rptr)->dl_addr_type = 2783 DL_IPV6_LINK_LAYER_ADDR; 2784 } 2785 2786 if (ill->ill_mactype == DL_IPV4 || ill->ill_mactype == DL_IPV6) { 2787 dest_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2788 sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2789 if (dest_mp == NULL) 2790 goto bad; 2791 ((dl_phys_addr_req_t *)dest_mp->b_rptr)->dl_addr_type = 2792 DL_CURR_DEST_ADDR; 2793 } 2794 2795 /* 2796 * Allocate a DL_NOTIFY_REQ and set the notifications we want. 2797 */ 2798 notify_mp = ip_dlpi_alloc(sizeof (dl_notify_req_t) + sizeof (long), 2799 DL_NOTIFY_REQ); 2800 if (notify_mp == NULL) 2801 goto bad; 2802 ((dl_notify_req_t *)notify_mp->b_rptr)->dl_notifications = 2803 (DL_NOTE_PHYS_ADDR | DL_NOTE_SDU_SIZE | DL_NOTE_FASTPATH_FLUSH | 2804 DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN | DL_NOTE_CAPAB_RENEG | 2805 DL_NOTE_PROMISC_ON_PHYS | DL_NOTE_PROMISC_OFF_PHYS | 2806 DL_NOTE_REPLUMB); 2807 2808 phys_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + 2809 sizeof (t_scalar_t), DL_PHYS_ADDR_REQ); 2810 if (phys_mp == NULL) 2811 goto bad; 2812 ((dl_phys_addr_req_t *)phys_mp->b_rptr)->dl_addr_type = 2813 DL_CURR_PHYS_ADDR; 2814 2815 info_mp = ip_dlpi_alloc( 2816 sizeof (dl_info_req_t) + sizeof (dl_info_ack_t), 2817 DL_INFO_REQ); 2818 if (info_mp == NULL) 2819 goto bad; 2820 2821 bind_mp = ip_dlpi_alloc(sizeof (dl_bind_req_t) + sizeof (long), 2822 DL_BIND_REQ); 2823 if (bind_mp == NULL) 2824 goto bad; 2825 ((dl_bind_req_t *)bind_mp->b_rptr)->dl_sap = ill->ill_sap; 2826 ((dl_bind_req_t *)bind_mp->b_rptr)->dl_service_mode = DL_CLDLS; 2827 2828 unbind_mp = ip_dlpi_alloc(sizeof (dl_unbind_req_t), DL_UNBIND_REQ); 2829 if (unbind_mp == NULL) 2830 goto bad; 2831 2832 /* If we need to attach, pre-alloc and initialize the mblk */ 2833 if (ill->ill_needs_attach) { 2834 attach_mp = ip_dlpi_alloc(sizeof (dl_attach_req_t), 2835 DL_ATTACH_REQ); 2836 if (attach_mp == NULL) 2837 goto bad; 2838 ((dl_attach_req_t *)attach_mp->b_rptr)->dl_ppa = ill->ill_ppa; 2839 } 2840 2841 /* 2842 * Here we are going to delay the ioctl ack until after 2843 * ACKs from DL_PHYS_ADDR_REQ. So need to save the 2844 * original ioctl message before sending the requests 2845 */ 2846 mutex_enter(&ill->ill_lock); 2847 /* ipsq_pending_mp_add won't fail since we pass in a NULL connp */ 2848 (void) ipsq_pending_mp_add(NULL, ipif, ill->ill_wq, mp, 0); 2849 /* 2850 * Set ill_phys_addr_pend to zero. It will be set to the addr_type of 2851 * the DL_PHYS_ADDR_REQ in ill_dlpi_send() and ill_dlpi_done(). It will 2852 * be used to track which DL_PHYS_ADDR_REQ is being ACK'd/NAK'd. 2853 */ 2854 ill->ill_phys_addr_pend = 0; 2855 mutex_exit(&ill->ill_lock); 2856 2857 if (attach_mp != NULL) { 2858 ip1dbg(("ill_dl_phys: attach\n")); 2859 ill_dlpi_send(ill, attach_mp); 2860 } 2861 ill_dlpi_send(ill, bind_mp); 2862 ill_dlpi_send(ill, info_mp); 2863 if (v6token_mp != NULL) 2864 ill_dlpi_send(ill, v6token_mp); 2865 if (v6lla_mp != NULL) 2866 ill_dlpi_send(ill, v6lla_mp); 2867 if (dest_mp != NULL) 2868 ill_dlpi_send(ill, dest_mp); 2869 ill_dlpi_send(ill, phys_mp); 2870 ill_dlpi_send(ill, notify_mp); 2871 ill_dlpi_send(ill, unbind_mp); 2872 2873 /* 2874 * This operation will complete in ip_rput_dlpi_writer with either 2875 * a DL_PHYS_ADDR_ACK or DL_ERROR_ACK. 2876 */ 2877 return (EINPROGRESS); 2878 bad: 2879 freemsg(v6token_mp); 2880 freemsg(v6lla_mp); 2881 freemsg(dest_mp); 2882 freemsg(phys_mp); 2883 freemsg(info_mp); 2884 freemsg(attach_mp); 2885 freemsg(bind_mp); 2886 freemsg(unbind_mp); 2887 freemsg(notify_mp); 2888 return (ENOMEM); 2889 } 2890 2891 uint_t ip_loopback_mtu_v6plus = IP_LOOPBACK_MTU + IPV6_HDR_LEN + 20; 2892 2893 /* 2894 * DLPI is up. 2895 * Create all the IREs associated with an interface bring up multicast. 2896 * Set the interface flag and finish other initialization 2897 * that potentially had to be differed to after DL_BIND_ACK. 2898 */ 2899 int 2900 ipif_up_done_v6(ipif_t *ipif) 2901 { 2902 ire_t *ire_array[20]; 2903 ire_t **irep = ire_array; 2904 ire_t **irep1; 2905 ill_t *ill = ipif->ipif_ill; 2906 queue_t *stq; 2907 in6_addr_t v6addr; 2908 in6_addr_t route_mask; 2909 ipif_t *src_ipif = NULL; 2910 ipif_t *tmp_ipif; 2911 boolean_t flush_ire_cache = B_TRUE; 2912 int err; 2913 char buf[INET6_ADDRSTRLEN]; 2914 ire_t **ipif_saved_irep = NULL; 2915 int ipif_saved_ire_cnt; 2916 int cnt; 2917 boolean_t src_ipif_held = B_FALSE; 2918 boolean_t loopback = B_FALSE; 2919 boolean_t ip6_asp_table_held = B_FALSE; 2920 ip_stack_t *ipst = ill->ill_ipst; 2921 2922 ip1dbg(("ipif_up_done_v6(%s:%u)\n", 2923 ipif->ipif_ill->ill_name, ipif->ipif_id)); 2924 2925 /* Check if this is a loopback interface */ 2926 if (ipif->ipif_ill->ill_wq == NULL) 2927 loopback = B_TRUE; 2928 2929 ASSERT(ipif->ipif_isv6); 2930 ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock)); 2931 2932 /* 2933 * If all other interfaces for this ill are down or DEPRECATED, 2934 * or otherwise unsuitable for source address selection, remove 2935 * any IRE_CACHE entries for this ill to make sure source 2936 * address selection gets to take this new ipif into account. 2937 * No need to hold ill_lock while traversing the ipif list since 2938 * we are writer 2939 */ 2940 for (tmp_ipif = ill->ill_ipif; tmp_ipif; 2941 tmp_ipif = tmp_ipif->ipif_next) { 2942 if (((tmp_ipif->ipif_flags & 2943 (IPIF_NOXMIT|IPIF_ANYCAST|IPIF_NOLOCAL|IPIF_DEPRECATED)) || 2944 !(tmp_ipif->ipif_flags & IPIF_UP)) || 2945 (tmp_ipif == ipif)) 2946 continue; 2947 /* first useable pre-existing interface */ 2948 flush_ire_cache = B_FALSE; 2949 break; 2950 } 2951 if (flush_ire_cache) 2952 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 2953 IRE_CACHE, ill_ipif_cache_delete, ill, ill); 2954 2955 /* 2956 * Figure out which way the send-to queue should go. Only 2957 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER should show up here. 2958 */ 2959 switch (ill->ill_net_type) { 2960 case IRE_IF_RESOLVER: 2961 stq = ill->ill_rq; 2962 break; 2963 case IRE_IF_NORESOLVER: 2964 case IRE_LOOPBACK: 2965 stq = ill->ill_wq; 2966 break; 2967 default: 2968 return (EINVAL); 2969 } 2970 2971 if (IS_LOOPBACK(ill)) { 2972 /* 2973 * lo0:1 and subsequent ipifs were marked IRE_LOCAL in 2974 * ipif_lookup_on_name(), but in the case of zones we can have 2975 * several loopback addresses on lo0. So all the interfaces with 2976 * loopback addresses need to be marked IRE_LOOPBACK. 2977 */ 2978 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, &ipv6_loopback)) 2979 ipif->ipif_ire_type = IRE_LOOPBACK; 2980 else 2981 ipif->ipif_ire_type = IRE_LOCAL; 2982 } 2983 2984 if (ipif->ipif_flags & (IPIF_NOLOCAL|IPIF_ANYCAST) || 2985 ((ipif->ipif_flags & IPIF_DEPRECATED) && 2986 !(ipif->ipif_flags & IPIF_NOFAILOVER))) { 2987 /* 2988 * Can't use our source address. Select a different 2989 * source address for the IRE_INTERFACE and IRE_LOCAL 2990 */ 2991 if (ip6_asp_can_lookup(ipst)) { 2992 ip6_asp_table_held = B_TRUE; 2993 src_ipif = ipif_select_source_v6(ipif->ipif_ill, 2994 &ipif->ipif_v6subnet, B_FALSE, 2995 IPV6_PREFER_SRC_DEFAULT, ipif->ipif_zoneid); 2996 } 2997 if (src_ipif == NULL) 2998 src_ipif = ipif; /* Last resort */ 2999 else 3000 src_ipif_held = B_TRUE; 3001 } else { 3002 src_ipif = ipif; 3003 } 3004 3005 if (!IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr) && 3006 !(ipif->ipif_flags & IPIF_NOLOCAL)) { 3007 3008 /* 3009 * If we're on a labeled system then make sure that zone- 3010 * private addresses have proper remote host database entries. 3011 */ 3012 if (is_system_labeled() && 3013 ipif->ipif_ire_type != IRE_LOOPBACK) { 3014 if (ip6opt_ls == 0) { 3015 cmn_err(CE_WARN, "IPv6 not enabled " 3016 "via /etc/system"); 3017 return (EINVAL); 3018 } 3019 if (!tsol_check_interface_address(ipif)) 3020 return (EINVAL); 3021 } 3022 3023 /* Register the source address for __sin6_src_id */ 3024 err = ip_srcid_insert(&ipif->ipif_v6lcl_addr, 3025 ipif->ipif_zoneid, ipst); 3026 if (err != 0) { 3027 ip0dbg(("ipif_up_done_v6: srcid_insert %d\n", err)); 3028 if (src_ipif_held) 3029 ipif_refrele(src_ipif); 3030 if (ip6_asp_table_held) 3031 ip6_asp_table_refrele(ipst); 3032 return (err); 3033 } 3034 /* 3035 * If the interface address is set, create the LOCAL 3036 * or LOOPBACK IRE. 3037 */ 3038 ip1dbg(("ipif_up_done_v6: creating IRE %d for %s\n", 3039 ipif->ipif_ire_type, 3040 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 3041 buf, sizeof (buf)))); 3042 3043 *irep++ = ire_create_v6( 3044 &ipif->ipif_v6lcl_addr, /* dest address */ 3045 &ipv6_all_ones, /* mask */ 3046 &src_ipif->ipif_v6src_addr, /* source address */ 3047 NULL, /* no gateway */ 3048 &ip_loopback_mtu_v6plus, /* max frag size */ 3049 NULL, 3050 ipif->ipif_rq, /* recv-from queue */ 3051 NULL, /* no send-to queue */ 3052 ipif->ipif_ire_type, /* LOCAL or LOOPBACK */ 3053 ipif, /* interface */ 3054 NULL, 3055 0, 3056 0, 3057 (ipif->ipif_flags & IPIF_PRIVATE) ? RTF_PRIVATE : 0, 3058 &ire_uinfo_null, 3059 NULL, 3060 NULL, 3061 ipst); 3062 } 3063 3064 /* Set up the IRE_IF_RESOLVER or IRE_IF_NORESOLVER, as appropriate. */ 3065 if (stq != NULL && !(ipif->ipif_flags & IPIF_NOXMIT) && 3066 !(IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6subnet) && 3067 IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6net_mask))) { 3068 /* ipif_v6subnet is ipif_v6pp_dst_addr for pt-pt */ 3069 v6addr = ipif->ipif_v6subnet; 3070 3071 if (ipif->ipif_flags & IPIF_POINTOPOINT) { 3072 route_mask = ipv6_all_ones; 3073 } else { 3074 route_mask = ipif->ipif_v6net_mask; 3075 } 3076 3077 ip1dbg(("ipif_up_done_v6: creating if IRE %d for %s\n", 3078 ill->ill_net_type, 3079 inet_ntop(AF_INET6, &v6addr, buf, sizeof (buf)))); 3080 3081 *irep++ = ire_create_v6( 3082 &v6addr, /* dest pref */ 3083 &route_mask, /* mask */ 3084 &src_ipif->ipif_v6src_addr, /* src addr */ 3085 NULL, /* no gateway */ 3086 &ipif->ipif_mtu, /* max frag */ 3087 NULL, /* no src nce */ 3088 NULL, /* no recv from queue */ 3089 stq, /* send-to queue */ 3090 ill->ill_net_type, /* IF_[NO]RESOLVER */ 3091 ipif, 3092 NULL, 3093 0, 3094 0, 3095 (ipif->ipif_flags & IPIF_PRIVATE) ? RTF_PRIVATE : 0, 3096 &ire_uinfo_null, 3097 NULL, 3098 NULL, 3099 ipst); 3100 } 3101 3102 /* If an earlier ire_create failed, get out now */ 3103 for (irep1 = irep; irep1 > ire_array; ) { 3104 irep1--; 3105 if (*irep1 == NULL) { 3106 ip1dbg(("ipif_up_done_v6: NULL ire found in" 3107 " ire_array\n")); 3108 err = ENOMEM; 3109 goto bad; 3110 } 3111 } 3112 3113 ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock)); 3114 3115 /* 3116 * Need to atomically check for IP address availability under 3117 * ip_addr_avail_lock. ill_g_lock is held as reader to ensure no new 3118 * ills or new ipifs can be added while we are checking availability. 3119 */ 3120 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 3121 mutex_enter(&ipst->ips_ip_addr_avail_lock); 3122 ill->ill_ipif_up_count++; 3123 ipif->ipif_flags |= IPIF_UP; 3124 err = ip_addr_availability_check(ipif); 3125 mutex_exit(&ipst->ips_ip_addr_avail_lock); 3126 rw_exit(&ipst->ips_ill_g_lock); 3127 3128 if (err != 0) { 3129 /* 3130 * Our address may already be up on the same ill. In this case, 3131 * the external resolver entry for our ipif replaced the one for 3132 * the other ipif. So we don't want to delete it (otherwise the 3133 * other ipif would be unable to send packets). 3134 * ip_addr_availability_check() identifies this case for us and 3135 * returns EADDRINUSE; we need to turn it into EADDRNOTAVAIL 3136 * which is the expected error code. 3137 * 3138 * Note that, for the non-XRESOLV case, ipif_ndp_down() will 3139 * only delete the nce in the case when the nce_ipif_cnt drops 3140 * to 0. 3141 */ 3142 if (err == EADDRINUSE) { 3143 if (ipif->ipif_ill->ill_flags & ILLF_XRESOLV) { 3144 freemsg(ipif->ipif_arp_del_mp); 3145 ipif->ipif_arp_del_mp = NULL; 3146 } 3147 err = EADDRNOTAVAIL; 3148 } 3149 ill->ill_ipif_up_count--; 3150 ipif->ipif_flags &= ~IPIF_UP; 3151 goto bad; 3152 } 3153 3154 /* 3155 * Add in all newly created IREs. 3156 * 3157 * NOTE : We refrele the ire though we may branch to "bad" 3158 * later on where we do ire_delete. This is okay 3159 * because nobody can delete it as we are running 3160 * exclusively. 3161 */ 3162 for (irep1 = irep; irep1 > ire_array; ) { 3163 irep1--; 3164 /* Shouldn't be adding any bcast ire's */ 3165 ASSERT((*irep1)->ire_type != IRE_BROADCAST); 3166 ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock)); 3167 /* 3168 * refheld by ire_add. refele towards the end of the func 3169 */ 3170 (void) ire_add(irep1, NULL, NULL, NULL, B_FALSE); 3171 } 3172 if (ip6_asp_table_held) { 3173 ip6_asp_table_refrele(ipst); 3174 ip6_asp_table_held = B_FALSE; 3175 } 3176 3177 /* Recover any additional IRE_IF_[NO]RESOLVER entries for this ipif */ 3178 ipif_saved_ire_cnt = ipif->ipif_saved_ire_cnt; 3179 ipif_saved_irep = ipif_recover_ire_v6(ipif); 3180 3181 if (ill->ill_need_recover_multicast) { 3182 /* 3183 * Need to recover all multicast memberships in the driver. 3184 * This had to be deferred until we had attached. 3185 */ 3186 ill_recover_multicast(ill); 3187 } 3188 3189 if (ill->ill_ipif_up_count == 1) { 3190 /* 3191 * Since the interface is now up, it may now be active. 3192 */ 3193 if (IS_UNDER_IPMP(ill)) 3194 ipmp_ill_refresh_active(ill); 3195 } 3196 3197 /* Join the allhosts multicast address and the solicited node MC */ 3198 ipif_multicast_up(ipif); 3199 3200 /* 3201 * See if anybody else would benefit from our new ipif. 3202 */ 3203 if (!loopback && 3204 !(ipif->ipif_flags & (IPIF_NOLOCAL|IPIF_ANYCAST|IPIF_DEPRECATED))) { 3205 ill_update_source_selection(ill); 3206 } 3207 3208 for (irep1 = irep; irep1 > ire_array; ) { 3209 irep1--; 3210 if (*irep1 != NULL) { 3211 /* was held in ire_add */ 3212 ire_refrele(*irep1); 3213 } 3214 } 3215 3216 cnt = ipif_saved_ire_cnt; 3217 for (irep1 = ipif_saved_irep; cnt > 0; irep1++, cnt--) { 3218 if (*irep1 != NULL) { 3219 /* was held in ire_add */ 3220 ire_refrele(*irep1); 3221 } 3222 } 3223 3224 if (ipif->ipif_addr_ready) 3225 ipif_up_notify(ipif); 3226 3227 if (ipif_saved_irep != NULL) { 3228 kmem_free(ipif_saved_irep, 3229 ipif_saved_ire_cnt * sizeof (ire_t *)); 3230 } 3231 3232 if (src_ipif_held) 3233 ipif_refrele(src_ipif); 3234 3235 return (0); 3236 3237 bad: 3238 if (ip6_asp_table_held) 3239 ip6_asp_table_refrele(ipst); 3240 3241 while (irep > ire_array) { 3242 irep--; 3243 if (*irep != NULL) 3244 ire_delete(*irep); 3245 } 3246 (void) ip_srcid_remove(&ipif->ipif_v6lcl_addr, ipif->ipif_zoneid, ipst); 3247 3248 if (ipif_saved_irep != NULL) { 3249 kmem_free(ipif_saved_irep, 3250 ipif_saved_ire_cnt * sizeof (ire_t *)); 3251 } 3252 if (src_ipif_held) 3253 ipif_refrele(src_ipif); 3254 3255 ipif_ndp_down(ipif); 3256 ipif_resolver_down(ipif); 3257 3258 return (err); 3259 } 3260 3261 /* 3262 * Delete an ND entry and the corresponding IRE_CACHE entry if it exists. 3263 */ 3264 /* ARGSUSED */ 3265 int 3266 ip_siocdelndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 3267 ip_ioctl_cmd_t *ipip, void *dummy_ifreq) 3268 { 3269 sin6_t *sin6; 3270 nce_t *nce; 3271 struct lifreq *lifr; 3272 lif_nd_req_t *lnr; 3273 ill_t *ill = ipif->ipif_ill; 3274 ire_t *ire; 3275 3276 lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr; 3277 lnr = &lifr->lifr_nd; 3278 /* Only allow for logical unit zero i.e. not on "le0:17" */ 3279 if (ipif->ipif_id != 0) 3280 return (EINVAL); 3281 3282 if (!ipif->ipif_isv6) 3283 return (EINVAL); 3284 3285 if (lnr->lnr_addr.ss_family != AF_INET6) 3286 return (EAFNOSUPPORT); 3287 3288 sin6 = (sin6_t *)&lnr->lnr_addr; 3289 3290 /* 3291 * Since ND mappings must be consistent across an IPMP group, prohibit 3292 * deleting ND mappings on underlying interfaces. Also, since ND 3293 * mappings for IPMP data addresses are owned by IP itself, prohibit 3294 * deleting them. 3295 */ 3296 if (IS_UNDER_IPMP(ill)) 3297 return (EPERM); 3298 3299 if (IS_IPMP(ill)) { 3300 ire = ire_ctable_lookup_v6(&sin6->sin6_addr, NULL, IRE_LOCAL, 3301 ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE | MATCH_IRE_ILL, 3302 ill->ill_ipst); 3303 if (ire != NULL) { 3304 ire_refrele(ire); 3305 return (EPERM); 3306 } 3307 } 3308 3309 /* See comment in ndp_query() regarding IS_IPMP(ill) usage */ 3310 nce = ndp_lookup_v6(ill, IS_IPMP(ill), &sin6->sin6_addr, B_FALSE); 3311 if (nce == NULL) 3312 return (ESRCH); 3313 ndp_delete(nce); 3314 NCE_REFRELE(nce); 3315 return (0); 3316 } 3317 3318 /* 3319 * Return nbr cache info. 3320 */ 3321 /* ARGSUSED */ 3322 int 3323 ip_siocqueryndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 3324 ip_ioctl_cmd_t *ipip, void *dummy_ifreq) 3325 { 3326 ill_t *ill = ipif->ipif_ill; 3327 struct lifreq *lifr; 3328 lif_nd_req_t *lnr; 3329 3330 lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr; 3331 lnr = &lifr->lifr_nd; 3332 /* Only allow for logical unit zero i.e. not on "le0:17" */ 3333 if (ipif->ipif_id != 0) 3334 return (EINVAL); 3335 3336 if (!ipif->ipif_isv6) 3337 return (EINVAL); 3338 3339 if (lnr->lnr_addr.ss_family != AF_INET6) 3340 return (EAFNOSUPPORT); 3341 3342 if (ill->ill_phys_addr_length > sizeof (lnr->lnr_hdw_addr)) 3343 return (EINVAL); 3344 3345 return (ndp_query(ill, lnr)); 3346 } 3347 3348 /* 3349 * Perform an update of the nd entry for the specified address. 3350 */ 3351 /* ARGSUSED */ 3352 int 3353 ip_siocsetndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 3354 ip_ioctl_cmd_t *ipip, void *dummy_ifreq) 3355 { 3356 sin6_t *sin6; 3357 ill_t *ill = ipif->ipif_ill; 3358 struct lifreq *lifr; 3359 lif_nd_req_t *lnr; 3360 ire_t *ire; 3361 3362 lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr; 3363 lnr = &lifr->lifr_nd; 3364 /* Only allow for logical unit zero i.e. not on "le0:17" */ 3365 if (ipif->ipif_id != 0) 3366 return (EINVAL); 3367 3368 if (!ipif->ipif_isv6) 3369 return (EINVAL); 3370 3371 if (lnr->lnr_addr.ss_family != AF_INET6) 3372 return (EAFNOSUPPORT); 3373 3374 sin6 = (sin6_t *)&lnr->lnr_addr; 3375 3376 /* 3377 * Since ND mappings must be consistent across an IPMP group, prohibit 3378 * updating ND mappings on underlying interfaces. Also, since ND 3379 * mappings for IPMP data addresses are owned by IP itself, prohibit 3380 * updating them. 3381 */ 3382 if (IS_UNDER_IPMP(ill)) 3383 return (EPERM); 3384 3385 if (IS_IPMP(ill)) { 3386 ire = ire_ctable_lookup_v6(&sin6->sin6_addr, NULL, IRE_LOCAL, 3387 ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE | MATCH_IRE_ILL, 3388 ill->ill_ipst); 3389 if (ire != NULL) { 3390 ire_refrele(ire); 3391 return (EPERM); 3392 } 3393 } 3394 3395 return (ndp_sioc_update(ill, lnr)); 3396 } 3397