1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/stropts.h> 29 #include <sys/strsun.h> 30 #include <sys/sysmacros.h> 31 #include <sys/errno.h> 32 #include <sys/dlpi.h> 33 #include <sys/socket.h> 34 #include <sys/ddi.h> 35 #include <sys/sunddi.h> 36 #include <sys/cmn_err.h> 37 #include <sys/debug.h> 38 #include <sys/vtrace.h> 39 #include <sys/kmem.h> 40 #include <sys/zone.h> 41 #include <sys/ethernet.h> 42 #include <sys/sdt.h> 43 44 #include <net/if.h> 45 #include <net/if_types.h> 46 #include <net/if_dl.h> 47 #include <net/route.h> 48 #include <netinet/in.h> 49 #include <netinet/ip6.h> 50 #include <netinet/icmp6.h> 51 52 #include <inet/common.h> 53 #include <inet/mi.h> 54 #include <inet/mib2.h> 55 #include <inet/nd.h> 56 #include <inet/ip.h> 57 #include <inet/ip_impl.h> 58 #include <inet/ipclassifier.h> 59 #include <inet/ip_if.h> 60 #include <inet/ip_ire.h> 61 #include <inet/ip_rts.h> 62 #include <inet/ip6.h> 63 #include <inet/ip_ndp.h> 64 #include <inet/ipsec_impl.h> 65 #include <inet/ipsec_info.h> 66 #include <inet/sctp_ip.h> 67 #include <inet/ip2mac_impl.h> 68 69 /* 70 * Function names with nce_ prefix are static while function 71 * names with ndp_ prefix are used by rest of the IP. 72 * 73 * Lock ordering: 74 * 75 * ndp_g_lock -> ill_lock -> nce_lock 76 * 77 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 78 * nce_next. Nce_lock protects the contents of the NCE (particularly 79 * nce_refcnt). 80 */ 81 82 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 83 uint32_t ll_addr_len); 84 static void nce_ire_delete(nce_t *nce); 85 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 86 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 87 static nce_t *nce_lookup_addr(ill_t *, boolean_t, const in6_addr_t *, 88 nce_t *); 89 static nce_t *nce_lookup_mapping(ill_t *, const in6_addr_t *); 90 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 91 uchar_t *addr); 92 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 93 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 94 static mblk_t *nce_udreq_alloc(ill_t *ill); 95 static void nce_update(nce_t *nce, uint16_t new_state, 96 uchar_t *new_ll_addr); 97 static uint32_t nce_solicit(nce_t *nce, in6_addr_t src); 98 static boolean_t nce_xmit(ill_t *ill, uint8_t type, 99 boolean_t use_lla_addr, const in6_addr_t *sender, 100 const in6_addr_t *target, int flag); 101 static boolean_t nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, 102 const in6_addr_t *target, uint_t flags); 103 static boolean_t nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, 104 const in6_addr_t *src, uint_t flags); 105 static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t, 106 nce_t **, nce_t *); 107 static ipif_t *ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill); 108 109 #ifdef DEBUG 110 static void nce_trace_cleanup(const nce_t *); 111 #endif 112 113 #define NCE_HASH_PTR_V4(ipst, addr) \ 114 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 115 116 #define NCE_HASH_PTR_V6(ipst, addr) \ 117 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ 118 NCE_TABLE_SIZE)])) 119 120 /* Non-tunable probe interval, based on link capabilities */ 121 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 122 123 /* 124 * NDP Cache Entry creation routine. 125 * Mapped entries will never do NUD . 126 * This routine must always be called with ndp6->ndp_g_lock held. 127 * Prior to return, nce_refcnt is incremented. 128 */ 129 int 130 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 131 const in6_addr_t *mask, const in6_addr_t *extract_mask, 132 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 133 nce_t **newnce) 134 { 135 static nce_t nce_nil; 136 nce_t *nce; 137 mblk_t *mp; 138 mblk_t *template; 139 nce_t **ncep; 140 int err; 141 boolean_t dropped = B_FALSE; 142 ip_stack_t *ipst = ill->ill_ipst; 143 144 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 145 ASSERT(ill != NULL && ill->ill_isv6); 146 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 147 ip0dbg(("ndp_add_v6: no addr\n")); 148 return (EINVAL); 149 } 150 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 151 ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags)); 152 return (EINVAL); 153 } 154 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 155 (flags & NCE_F_MAPPING)) { 156 ip0dbg(("ndp_add_v6: extract mask zero for mapping")); 157 return (EINVAL); 158 } 159 /* 160 * Allocate the mblk to hold the nce. 161 * 162 * XXX This can come out of a separate cache - nce_cache. 163 * We don't need the mp anymore as there are no more 164 * "qwriter"s 165 */ 166 mp = allocb(sizeof (nce_t), BPRI_MED); 167 if (mp == NULL) 168 return (ENOMEM); 169 170 nce = (nce_t *)mp->b_rptr; 171 mp->b_wptr = (uchar_t *)&nce[1]; 172 *nce = nce_nil; 173 174 /* 175 * This one holds link layer address 176 */ 177 if (ill->ill_net_type == IRE_IF_RESOLVER) { 178 template = nce_udreq_alloc(ill); 179 } else { 180 if (ill->ill_phys_addr_length == IPV6_ADDR_LEN && 181 ill->ill_mactype != DL_IPV6) { 182 /* 183 * We create a nce_res_mp with the IP nexthop address 184 * as the destination address if the physical length 185 * is exactly 16 bytes for point-to-multipoint links 186 * that do their own resolution from IP to link-layer 187 * address. 188 */ 189 template = ill_dlur_gen((uchar_t *)addr, 190 ill->ill_phys_addr_length, ill->ill_sap, 191 ill->ill_sap_length); 192 } else { 193 if (ill->ill_resolver_mp == NULL) { 194 freeb(mp); 195 return (EINVAL); 196 } 197 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 198 template = copyb(ill->ill_resolver_mp); 199 } 200 } 201 if (template == NULL) { 202 freeb(mp); 203 return (ENOMEM); 204 } 205 nce->nce_ill = ill; 206 nce->nce_ipversion = IPV6_VERSION; 207 nce->nce_flags = flags; 208 nce->nce_state = state; 209 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 210 nce->nce_rcnt = ill->ill_xmit_count; 211 nce->nce_addr = *addr; 212 nce->nce_mask = *mask; 213 nce->nce_extract_mask = *extract_mask; 214 nce->nce_ll_extract_start = hw_extract_start; 215 nce->nce_fp_mp = NULL; 216 nce->nce_res_mp = template; 217 if (state == ND_REACHABLE) 218 nce->nce_last = TICK_TO_MSEC(lbolt64); 219 else 220 nce->nce_last = 0; 221 nce->nce_qd_mp = NULL; 222 nce->nce_mp = mp; 223 if (hw_addr != NULL) 224 nce_set_ll(nce, hw_addr); 225 /* This one is for nce getting created */ 226 nce->nce_refcnt = 1; 227 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 228 if (nce->nce_flags & NCE_F_MAPPING) { 229 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 230 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 231 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 232 ncep = &ipst->ips_ndp6->nce_mask_entries; 233 } else { 234 ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 235 } 236 237 nce->nce_trace_disable = B_FALSE; 238 239 list_create(&nce->nce_cb, sizeof (nce_cb_t), 240 offsetof(nce_cb_t, nce_cb_node)); 241 /* 242 * Atomically ensure that the ill is not CONDEMNED, before 243 * adding the NCE. 244 */ 245 mutex_enter(&ill->ill_lock); 246 if (ill->ill_state_flags & ILL_CONDEMNED) { 247 mutex_exit(&ill->ill_lock); 248 freeb(mp); 249 freeb(template); 250 return (EINVAL); 251 } 252 if ((nce->nce_next = *ncep) != NULL) 253 nce->nce_next->nce_ptpn = &nce->nce_next; 254 *ncep = nce; 255 nce->nce_ptpn = ncep; 256 *newnce = nce; 257 /* This one is for nce being used by an active thread */ 258 NCE_REFHOLD(*newnce); 259 260 /* Bump up the number of nce's referencing this ill */ 261 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 262 (char *), "nce", (void *), nce); 263 ill->ill_nce_cnt++; 264 mutex_exit(&ill->ill_lock); 265 266 err = 0; 267 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 268 mutex_enter(&nce->nce_lock); 269 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 270 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 271 mutex_exit(&nce->nce_lock); 272 dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE); 273 if (dropped) { 274 mutex_enter(&nce->nce_lock); 275 nce->nce_pcnt++; 276 mutex_exit(&nce->nce_lock); 277 } 278 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 279 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 280 err = EINPROGRESS; 281 } else if (flags & NCE_F_UNSOL_ADV) { 282 /* 283 * We account for the transmit below by assigning one 284 * less than the ndd variable. Subsequent decrements 285 * are done in ndp_timer. 286 */ 287 mutex_enter(&nce->nce_lock); 288 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 289 nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1; 290 mutex_exit(&nce->nce_lock); 291 dropped = nce_xmit_advert(nce, B_TRUE, &ipv6_all_hosts_mcast, 292 0); 293 mutex_enter(&nce->nce_lock); 294 if (dropped) 295 nce->nce_unsolicit_count++; 296 if (nce->nce_unsolicit_count != 0) { 297 ASSERT(nce->nce_timeout_id == 0); 298 nce->nce_timeout_id = timeout(ndp_timer, nce, 299 MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval)); 300 } 301 mutex_exit(&nce->nce_lock); 302 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 303 } 304 305 /* 306 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 307 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 308 * We call nce_fastpath from nce_update if the link layer address of 309 * the peer changes from nce_update 310 */ 311 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 312 nce_fastpath(nce); 313 return (err); 314 } 315 316 int 317 ndp_lookup_then_add_v6(ill_t *ill, boolean_t match_illgrp, uchar_t *hw_addr, 318 const in6_addr_t *addr, const in6_addr_t *mask, 319 const in6_addr_t *extract_mask, uint32_t hw_extract_start, uint16_t flags, 320 uint16_t state, nce_t **newnce) 321 { 322 int err = 0; 323 nce_t *nce; 324 ip_stack_t *ipst = ill->ill_ipst; 325 326 ASSERT(ill->ill_isv6); 327 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 328 329 /* Get head of v6 hash table */ 330 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 331 nce = nce_lookup_addr(ill, match_illgrp, addr, nce); 332 if (nce == NULL) { 333 err = ndp_add_v6(ill, 334 hw_addr, 335 addr, 336 mask, 337 extract_mask, 338 hw_extract_start, 339 flags, 340 state, 341 newnce); 342 } else { 343 *newnce = nce; 344 err = EEXIST; 345 } 346 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 347 return (err); 348 } 349 350 /* 351 * Remove all the CONDEMNED nces from the appropriate hash table. 352 * We create a private list of NCEs, these may have ires pointing 353 * to them, so the list will be passed through to clean up dependent 354 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 355 */ 356 static void 357 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 358 { 359 nce_t *nce1; 360 nce_t **ptpn; 361 362 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 363 ASSERT(ndp->ndp_g_walker == 0); 364 for (; nce; nce = nce1) { 365 nce1 = nce->nce_next; 366 mutex_enter(&nce->nce_lock); 367 if (nce->nce_flags & NCE_F_CONDEMNED) { 368 ptpn = nce->nce_ptpn; 369 nce1 = nce->nce_next; 370 if (nce1 != NULL) 371 nce1->nce_ptpn = ptpn; 372 *ptpn = nce1; 373 nce->nce_ptpn = NULL; 374 nce->nce_next = NULL; 375 nce->nce_next = *free_nce_list; 376 *free_nce_list = nce; 377 } 378 mutex_exit(&nce->nce_lock); 379 } 380 } 381 382 /* 383 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 384 * will return this NCE. Also no new IREs will be created that 385 * point to this NCE (See ire_add_v6). Also no new timeouts will 386 * be started (See NDP_RESTART_TIMER). 387 * 2. Cancel any currently running timeouts. 388 * 3. If there is an ndp walker, return. The walker will do the cleanup. 389 * This ensures that walkers see a consistent list of NCEs while walking. 390 * 4. Otherwise remove the NCE from the list of NCEs 391 * 5. Delete all IREs pointing to this NCE. 392 */ 393 void 394 ndp_delete(nce_t *nce) 395 { 396 nce_t **ptpn; 397 nce_t *nce1; 398 int ipversion = nce->nce_ipversion; 399 ndp_g_t *ndp; 400 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 401 402 if (ipversion == IPV4_VERSION) 403 ndp = ipst->ips_ndp4; 404 else 405 ndp = ipst->ips_ndp6; 406 407 /* Serialize deletes */ 408 mutex_enter(&nce->nce_lock); 409 if (nce->nce_flags & NCE_F_CONDEMNED) { 410 /* Some other thread is doing the delete */ 411 mutex_exit(&nce->nce_lock); 412 return; 413 } 414 /* 415 * Caller has a refhold. Also 1 ref for being in the list. Thus 416 * refcnt has to be >= 2 417 */ 418 ASSERT(nce->nce_refcnt >= 2); 419 nce->nce_flags |= NCE_F_CONDEMNED; 420 mutex_exit(&nce->nce_lock); 421 422 nce_fastpath_list_delete(nce); 423 424 /* Complete any waiting callbacks */ 425 nce_cb_dispatch(nce); 426 427 /* 428 * Cancel any running timer. Timeout can't be restarted 429 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 430 * Passing invalid timeout id is fine. 431 */ 432 if (nce->nce_timeout_id != 0) { 433 (void) untimeout(nce->nce_timeout_id); 434 nce->nce_timeout_id = 0; 435 } 436 437 mutex_enter(&ndp->ndp_g_lock); 438 if (nce->nce_ptpn == NULL) { 439 /* 440 * The last ndp walker has already removed this nce from 441 * the list after we marked the nce CONDEMNED and before 442 * we grabbed the global lock. 443 */ 444 mutex_exit(&ndp->ndp_g_lock); 445 return; 446 } 447 if (ndp->ndp_g_walker > 0) { 448 /* 449 * Can't unlink. The walker will clean up 450 */ 451 ndp->ndp_g_walker_cleanup = B_TRUE; 452 mutex_exit(&ndp->ndp_g_lock); 453 return; 454 } 455 456 /* 457 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 458 * the timer since it is marked CONDEMNED. 459 */ 460 ptpn = nce->nce_ptpn; 461 nce1 = nce->nce_next; 462 if (nce1 != NULL) 463 nce1->nce_ptpn = ptpn; 464 *ptpn = nce1; 465 nce->nce_ptpn = NULL; 466 nce->nce_next = NULL; 467 mutex_exit(&ndp->ndp_g_lock); 468 469 nce_ire_delete(nce); 470 } 471 472 void 473 ndp_inactive(nce_t *nce) 474 { 475 mblk_t **mpp; 476 ill_t *ill; 477 478 ASSERT(nce->nce_refcnt == 0); 479 ASSERT(MUTEX_HELD(&nce->nce_lock)); 480 ASSERT(nce->nce_fastpath == NULL); 481 482 /* Free all nce allocated messages */ 483 mpp = &nce->nce_first_mp_to_free; 484 do { 485 while (*mpp != NULL) { 486 mblk_t *mp; 487 488 mp = *mpp; 489 *mpp = mp->b_next; 490 491 inet_freemsg(mp); 492 } 493 } while (mpp++ != &nce->nce_last_mp_to_free); 494 495 if (nce->nce_ipversion == IPV6_VERSION) { 496 /* 497 * must have been cleaned up in nce_delete 498 */ 499 ASSERT(list_is_empty(&nce->nce_cb)); 500 list_destroy(&nce->nce_cb); 501 } 502 #ifdef DEBUG 503 nce_trace_cleanup(nce); 504 #endif 505 506 ill = nce->nce_ill; 507 mutex_enter(&ill->ill_lock); 508 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 509 (char *), "nce", (void *), nce); 510 ill->ill_nce_cnt--; 511 /* 512 * If the number of nce's associated with this ill have dropped 513 * to zero, check whether we need to restart any operation that 514 * is waiting for this to happen. 515 */ 516 if (ILL_DOWN_OK(ill)) { 517 /* ipif_ill_refrele_tail drops the ill_lock */ 518 ipif_ill_refrele_tail(ill); 519 } else { 520 mutex_exit(&ill->ill_lock); 521 } 522 mutex_destroy(&nce->nce_lock); 523 if (nce->nce_mp != NULL) 524 inet_freemsg(nce->nce_mp); 525 } 526 527 /* 528 * ndp_walk routine. Delete the nce if it is associated with the ill 529 * that is going away. Always called as a writer. 530 */ 531 void 532 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 533 { 534 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 535 ndp_delete(nce); 536 } 537 } 538 539 /* 540 * Walk a list of to be inactive NCEs and blow away all the ires. 541 */ 542 static void 543 nce_ire_delete_list(nce_t *nce) 544 { 545 nce_t *nce_next; 546 547 ASSERT(nce != NULL); 548 while (nce != NULL) { 549 nce_next = nce->nce_next; 550 nce->nce_next = NULL; 551 552 /* 553 * It is possible for the last ndp walker (this thread) 554 * to come here after ndp_delete has marked the nce CONDEMNED 555 * and before it has removed the nce from the fastpath list 556 * or called untimeout. So we need to do it here. It is safe 557 * for both ndp_delete and this thread to do it twice or 558 * even simultaneously since each of the threads has a 559 * reference on the nce. 560 */ 561 nce_fastpath_list_delete(nce); 562 /* 563 * Cancel any running timer. Timeout can't be restarted 564 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 565 * Passing invalid timeout id is fine. 566 */ 567 if (nce->nce_timeout_id != 0) { 568 (void) untimeout(nce->nce_timeout_id); 569 nce->nce_timeout_id = 0; 570 } 571 /* 572 * We might hit this func thus in the v4 case: 573 * ipif_down->ipif_ndp_down->ndp_walk 574 */ 575 576 if (nce->nce_ipversion == IPV4_VERSION) { 577 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 578 IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill); 579 } else { 580 ASSERT(nce->nce_ipversion == IPV6_VERSION); 581 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 582 IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill); 583 } 584 NCE_REFRELE_NOTR(nce); 585 nce = nce_next; 586 } 587 } 588 589 /* 590 * Delete an ire when the nce goes away. 591 */ 592 /* ARGSUSED */ 593 static void 594 nce_ire_delete(nce_t *nce) 595 { 596 if (nce->nce_ipversion == IPV6_VERSION) { 597 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 598 nce_ire_delete1, (char *)nce, nce->nce_ill); 599 NCE_REFRELE_NOTR(nce); 600 } else { 601 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 602 nce_ire_delete1, (char *)nce, nce->nce_ill); 603 NCE_REFRELE_NOTR(nce); 604 } 605 } 606 607 /* 608 * ire_walk routine used to delete every IRE that shares this nce 609 */ 610 static void 611 nce_ire_delete1(ire_t *ire, char *nce_arg) 612 { 613 nce_t *nce = (nce_t *)nce_arg; 614 615 ASSERT(ire->ire_type == IRE_CACHE); 616 617 if (ire->ire_nce == nce) { 618 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 619 ire_delete(ire); 620 } 621 } 622 623 /* 624 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 625 */ 626 boolean_t 627 ndp_restart_dad(nce_t *nce) 628 { 629 boolean_t started; 630 boolean_t dropped; 631 632 if (nce == NULL) 633 return (B_FALSE); 634 mutex_enter(&nce->nce_lock); 635 if (nce->nce_state == ND_PROBE) { 636 mutex_exit(&nce->nce_lock); 637 started = B_TRUE; 638 } else if (nce->nce_state == ND_REACHABLE) { 639 nce->nce_state = ND_PROBE; 640 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 641 mutex_exit(&nce->nce_lock); 642 dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE); 643 if (dropped) { 644 mutex_enter(&nce->nce_lock); 645 nce->nce_pcnt++; 646 mutex_exit(&nce->nce_lock); 647 } 648 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 649 started = B_TRUE; 650 } else { 651 mutex_exit(&nce->nce_lock); 652 started = B_FALSE; 653 } 654 return (started); 655 } 656 657 /* 658 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 659 * If one is found, the refcnt on the nce will be incremented. 660 */ 661 nce_t * 662 ndp_lookup_v6(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr, 663 boolean_t caller_holds_lock) 664 { 665 nce_t *nce; 666 ip_stack_t *ipst = ill->ill_ipst; 667 668 ASSERT(ill->ill_isv6); 669 if (!caller_holds_lock) 670 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 671 672 /* Get head of v6 hash table */ 673 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 674 nce = nce_lookup_addr(ill, match_illgrp, addr, nce); 675 if (nce == NULL) 676 nce = nce_lookup_mapping(ill, addr); 677 if (!caller_holds_lock) 678 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 679 return (nce); 680 } 681 /* 682 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 683 * If one is found, the refcnt on the nce will be incremented. 684 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 685 * so we skip the nce_lookup_mapping call. 686 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 687 */ 688 nce_t * 689 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 690 { 691 nce_t *nce; 692 in6_addr_t addr6; 693 ip_stack_t *ipst = ill->ill_ipst; 694 695 if (!caller_holds_lock) 696 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 697 698 /* Get head of v4 hash table */ 699 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 700 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 701 /* 702 * NOTE: IPv4 never matches across the illgrp since the NCE's we're 703 * looking up have fastpath headers that are inherently per-ill. 704 */ 705 nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce); 706 if (!caller_holds_lock) 707 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 708 return (nce); 709 } 710 711 /* 712 * Cache entry lookup. Try to find an nce matching the parameters passed. 713 * Look only for exact entries (no mappings). If an nce is found, increment 714 * the hold count on that nce. The caller passes in the start of the 715 * appropriate hash table, and must be holding the appropriate global 716 * lock (ndp_g_lock). 717 */ 718 static nce_t * 719 nce_lookup_addr(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr, 720 nce_t *nce) 721 { 722 ndp_g_t *ndp; 723 ip_stack_t *ipst = ill->ill_ipst; 724 725 if (ill->ill_isv6) 726 ndp = ipst->ips_ndp6; 727 else 728 ndp = ipst->ips_ndp4; 729 730 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 731 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 732 return (NULL); 733 for (; nce != NULL; nce = nce->nce_next) { 734 if (nce->nce_ill == ill || 735 match_illgrp && IS_IN_SAME_ILLGRP(ill, nce->nce_ill)) { 736 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 737 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 738 &ipv6_all_ones)) { 739 mutex_enter(&nce->nce_lock); 740 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 741 NCE_REFHOLD_LOCKED(nce); 742 mutex_exit(&nce->nce_lock); 743 break; 744 } 745 mutex_exit(&nce->nce_lock); 746 } 747 } 748 } 749 return (nce); 750 } 751 752 /* 753 * Cache entry lookup. Try to find an nce matching the parameters passed. 754 * Look only for mappings. 755 */ 756 static nce_t * 757 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 758 { 759 nce_t *nce; 760 ip_stack_t *ipst = ill->ill_ipst; 761 762 ASSERT(ill != NULL && ill->ill_isv6); 763 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 764 if (!IN6_IS_ADDR_MULTICAST(addr)) 765 return (NULL); 766 nce = ipst->ips_ndp6->nce_mask_entries; 767 for (; nce != NULL; nce = nce->nce_next) 768 if (nce->nce_ill == ill && 769 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 770 mutex_enter(&nce->nce_lock); 771 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 772 NCE_REFHOLD_LOCKED(nce); 773 mutex_exit(&nce->nce_lock); 774 break; 775 } 776 mutex_exit(&nce->nce_lock); 777 } 778 return (nce); 779 } 780 781 /* 782 * Process passed in parameters either from an incoming packet or via 783 * user ioctl. 784 */ 785 static void 786 nce_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 787 { 788 ill_t *ill = nce->nce_ill; 789 uint32_t hw_addr_len = ill->ill_nd_lla_len; 790 mblk_t *mp; 791 boolean_t ll_updated = B_FALSE; 792 boolean_t ll_changed; 793 ip_stack_t *ipst = ill->ill_ipst; 794 795 ASSERT(nce->nce_ipversion == IPV6_VERSION); 796 /* 797 * No updates of link layer address or the neighbor state is 798 * allowed, when the cache is in NONUD state. This still 799 * allows for responding to reachability solicitation. 800 */ 801 mutex_enter(&nce->nce_lock); 802 if (nce->nce_state == ND_INCOMPLETE) { 803 if (hw_addr == NULL) { 804 mutex_exit(&nce->nce_lock); 805 return; 806 } 807 nce_set_ll(nce, hw_addr); 808 /* 809 * Update nce state and send the queued packets 810 * back to ip this time ire will be added. 811 */ 812 if (flag & ND_NA_FLAG_SOLICITED) { 813 nce_update(nce, ND_REACHABLE, NULL); 814 } else { 815 nce_update(nce, ND_STALE, NULL); 816 } 817 mutex_exit(&nce->nce_lock); 818 nce_fastpath(nce); 819 nce_cb_dispatch(nce); /* complete callbacks */ 820 mutex_enter(&nce->nce_lock); 821 mp = nce->nce_qd_mp; 822 nce->nce_qd_mp = NULL; 823 mutex_exit(&nce->nce_lock); 824 while (mp != NULL) { 825 mblk_t *nxt_mp, *data_mp; 826 827 nxt_mp = mp->b_next; 828 mp->b_next = NULL; 829 830 if (mp->b_datap->db_type == M_CTL) 831 data_mp = mp->b_cont; 832 else 833 data_mp = mp; 834 if (data_mp->b_prev != NULL) { 835 ill_t *inbound_ill; 836 queue_t *fwdq = NULL; 837 uint_t ifindex; 838 839 ifindex = (uint_t)(uintptr_t)data_mp->b_prev; 840 inbound_ill = ill_lookup_on_ifindex(ifindex, 841 B_TRUE, NULL, NULL, NULL, NULL, ipst); 842 if (inbound_ill == NULL) { 843 data_mp->b_prev = NULL; 844 freemsg(mp); 845 return; 846 } else { 847 fwdq = inbound_ill->ill_rq; 848 } 849 data_mp->b_prev = NULL; 850 /* 851 * Send a forwarded packet back into ip_rput_v6 852 * just as in ire_send_v6(). 853 * Extract the queue from b_prev (set in 854 * ip_rput_data_v6). 855 */ 856 if (fwdq != NULL) { 857 /* 858 * Forwarded packets hop count will 859 * get decremented in ip_rput_data_v6 860 */ 861 if (data_mp != mp) 862 freeb(mp); 863 put(fwdq, data_mp); 864 } else { 865 /* 866 * Send locally originated packets back 867 * into ip_wput_v6. 868 */ 869 put(ill->ill_wq, mp); 870 } 871 ill_refrele(inbound_ill); 872 } else { 873 put(ill->ill_wq, mp); 874 } 875 mp = nxt_mp; 876 } 877 return; 878 } 879 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 880 if (!is_adv) { 881 /* If this is a SOLICITATION request only */ 882 if (ll_changed) 883 nce_update(nce, ND_STALE, hw_addr); 884 mutex_exit(&nce->nce_lock); 885 nce_cb_dispatch(nce); 886 return; 887 } 888 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 889 /* If in any other state than REACHABLE, ignore */ 890 if (nce->nce_state == ND_REACHABLE) { 891 nce_update(nce, ND_STALE, NULL); 892 } 893 mutex_exit(&nce->nce_lock); 894 nce_cb_dispatch(nce); 895 return; 896 } else { 897 if (ll_changed) { 898 nce_update(nce, ND_UNCHANGED, hw_addr); 899 ll_updated = B_TRUE; 900 } 901 if (flag & ND_NA_FLAG_SOLICITED) { 902 nce_update(nce, ND_REACHABLE, NULL); 903 } else { 904 if (ll_updated) { 905 nce_update(nce, ND_STALE, NULL); 906 } 907 } 908 mutex_exit(&nce->nce_lock); 909 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 910 NCE_F_ISROUTER)) { 911 ire_t *ire; 912 913 /* 914 * Router turned to host. We need to remove the 915 * entry as well as any default route that may be 916 * using this as a next hop. This is required by 917 * section 7.2.5 of RFC 2461. 918 */ 919 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 920 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 921 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 922 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 923 MATCH_IRE_DEFAULT, ipst); 924 if (ire != NULL) { 925 ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); 926 ire_delete(ire); 927 ire_refrele(ire); 928 } 929 ndp_delete(nce); /* will do nce_cb_dispatch */ 930 } else { 931 nce_cb_dispatch(nce); 932 } 933 } 934 } 935 936 /* 937 * Walker state structure used by ndp_process() / ndp_process_entry(). 938 */ 939 typedef struct ndp_process_data { 940 ill_t *np_ill; /* ill/illgrp to match against */ 941 const in6_addr_t *np_addr; /* IPv6 address to match */ 942 uchar_t *np_hw_addr; /* passed to nce_process() */ 943 uint32_t np_flag; /* passed to nce_process() */ 944 boolean_t np_is_adv; /* passed to nce_process() */ 945 } ndp_process_data_t; 946 947 /* 948 * Walker callback used by ndp_process() for IPMP groups: calls nce_process() 949 * for each NCE with a matching address that's in the same IPMP group. 950 */ 951 static void 952 ndp_process_entry(nce_t *nce, void *arg) 953 { 954 ndp_process_data_t *npp = arg; 955 956 if (IS_IN_SAME_ILLGRP(nce->nce_ill, npp->np_ill) && 957 IN6_ARE_ADDR_EQUAL(&nce->nce_addr, npp->np_addr) && 958 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 959 nce_process(nce, npp->np_hw_addr, npp->np_flag, npp->np_is_adv); 960 } 961 } 962 963 /* 964 * Wrapper around nce_process() that handles IPMP. In particular, for IPMP, 965 * NCEs are per-underlying-ill (because of nce_fp_mp) and thus we may have 966 * more than one NCE for a given IPv6 address to tend to. In that case, we 967 * need to walk all NCEs and callback nce_process() for each one. Since this 968 * is expensive, in the non-IPMP case we just directly call nce_process(). 969 * Ultimately, nce_fp_mp needs to be moved out of the nce_t so that all IP 970 * interfaces in an IPMP group share the same NCEs -- at which point this 971 * function can be removed entirely. 972 */ 973 void 974 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 975 { 976 ill_t *ill = nce->nce_ill; 977 struct ndp_g_s *ndp = ill->ill_ipst->ips_ndp6; 978 ndp_process_data_t np; 979 980 if (ill->ill_grp == NULL) { 981 nce_process(nce, hw_addr, flag, is_adv); 982 return; 983 } 984 985 /* IPMP case: walk all NCEs */ 986 np.np_ill = ill; 987 np.np_addr = &nce->nce_addr; 988 np.np_flag = flag; 989 np.np_is_adv = is_adv; 990 np.np_hw_addr = hw_addr; 991 992 ndp_walk_common(ndp, NULL, (pfi_t)ndp_process_entry, &np, ALL_ZONES); 993 } 994 995 /* 996 * Pass arg1 to the pfi supplied, along with each nce in existence. 997 * ndp_walk() places a REFHOLD on the nce and drops the lock when 998 * walking the hash list. 999 */ 1000 void 1001 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 1002 boolean_t trace) 1003 { 1004 nce_t *nce; 1005 nce_t *nce1; 1006 nce_t **ncep; 1007 nce_t *free_nce_list = NULL; 1008 1009 mutex_enter(&ndp->ndp_g_lock); 1010 /* Prevent ndp_delete from unlink and free of NCE */ 1011 ndp->ndp_g_walker++; 1012 mutex_exit(&ndp->ndp_g_lock); 1013 for (ncep = ndp->nce_hash_tbl; 1014 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 1015 for (nce = *ncep; nce != NULL; nce = nce1) { 1016 nce1 = nce->nce_next; 1017 if (ill == NULL || nce->nce_ill == ill) { 1018 if (trace) { 1019 NCE_REFHOLD(nce); 1020 (*pfi)(nce, arg1); 1021 NCE_REFRELE(nce); 1022 } else { 1023 NCE_REFHOLD_NOTR(nce); 1024 (*pfi)(nce, arg1); 1025 NCE_REFRELE_NOTR(nce); 1026 } 1027 } 1028 } 1029 } 1030 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 1031 nce1 = nce->nce_next; 1032 if (ill == NULL || nce->nce_ill == ill) { 1033 if (trace) { 1034 NCE_REFHOLD(nce); 1035 (*pfi)(nce, arg1); 1036 NCE_REFRELE(nce); 1037 } else { 1038 NCE_REFHOLD_NOTR(nce); 1039 (*pfi)(nce, arg1); 1040 NCE_REFRELE_NOTR(nce); 1041 } 1042 } 1043 } 1044 mutex_enter(&ndp->ndp_g_lock); 1045 ndp->ndp_g_walker--; 1046 /* 1047 * While NCE's are removed from global list they are placed 1048 * in a private list, to be passed to nce_ire_delete_list(). 1049 * The reason is, there may be ires pointing to this nce 1050 * which needs to cleaned up. 1051 */ 1052 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 1053 /* Time to delete condemned entries */ 1054 for (ncep = ndp->nce_hash_tbl; 1055 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 1056 nce = *ncep; 1057 if (nce != NULL) { 1058 nce_remove(ndp, nce, &free_nce_list); 1059 } 1060 } 1061 nce = ndp->nce_mask_entries; 1062 if (nce != NULL) { 1063 nce_remove(ndp, nce, &free_nce_list); 1064 } 1065 ndp->ndp_g_walker_cleanup = B_FALSE; 1066 } 1067 1068 mutex_exit(&ndp->ndp_g_lock); 1069 1070 if (free_nce_list != NULL) { 1071 nce_ire_delete_list(free_nce_list); 1072 } 1073 } 1074 1075 /* 1076 * Walk everything. 1077 * Note that ill can be NULL hence can't derive the ipst from it. 1078 */ 1079 void 1080 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) 1081 { 1082 ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); 1083 ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); 1084 } 1085 1086 /* 1087 * Process resolve requests. Handles both mapped entries 1088 * as well as cases that needs to be send out on the wire. 1089 * Lookup a NCE for a given IRE. Regardless of whether one exists 1090 * or one is created, we defer making ire point to nce until the 1091 * ire is actually added at which point the nce_refcnt on the nce is 1092 * incremented. This is done primarily to have symmetry between ire_add() 1093 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1094 */ 1095 int 1096 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1097 { 1098 nce_t *nce, *hw_nce = NULL; 1099 int err; 1100 ill_t *ipmp_ill; 1101 uint16_t nce_flags; 1102 mblk_t *mp_nce = NULL; 1103 ip_stack_t *ipst = ill->ill_ipst; 1104 uchar_t *hwaddr = NULL; 1105 1106 ASSERT(ill->ill_isv6); 1107 1108 if (IN6_IS_ADDR_MULTICAST(dst)) 1109 return (nce_set_multicast(ill, dst)); 1110 1111 nce_flags = (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0; 1112 1113 /* 1114 * If `ill' is under IPMP, then first check to see if there's an NCE 1115 * for `dst' on the IPMP meta-interface (e.g., because an application 1116 * explicitly did an SIOCLIFSETND to tie a hardware address to `dst'). 1117 * If so, we use that hardware address when creating the NCE below. 1118 * Note that we don't yet have a mechanism to remove these NCEs if the 1119 * NCE for `dst' on the IPMP meta-interface is subsequently removed -- 1120 * but rather than build such a beast, we should fix NCEs so that they 1121 * can be properly shared across an IPMP group. 1122 */ 1123 if (IS_UNDER_IPMP(ill)) { 1124 if ((ipmp_ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) { 1125 hw_nce = ndp_lookup_v6(ipmp_ill, B_FALSE, dst, B_FALSE); 1126 if (hw_nce != NULL && hw_nce->nce_res_mp != NULL) { 1127 hwaddr = hw_nce->nce_res_mp->b_rptr + 1128 NCE_LL_ADDR_OFFSET(ipmp_ill); 1129 nce_flags |= hw_nce->nce_flags; 1130 } 1131 ill_refrele(ipmp_ill); 1132 } 1133 } 1134 1135 err = ndp_lookup_then_add_v6(ill, 1136 B_FALSE, /* NCE fastpath is per ill; don't match across group */ 1137 hwaddr, 1138 dst, 1139 &ipv6_all_ones, 1140 &ipv6_all_zeros, 1141 0, 1142 nce_flags, 1143 hwaddr != NULL ? ND_REACHABLE : ND_INCOMPLETE, 1144 &nce); 1145 1146 if (hw_nce != NULL) 1147 NCE_REFRELE(hw_nce); 1148 1149 switch (err) { 1150 case 0: 1151 /* 1152 * New cache entry was created. Make sure that the state 1153 * is not ND_INCOMPLETE. It can be in some other state 1154 * even before we send out the solicitation as we could 1155 * get un-solicited advertisements. 1156 * 1157 * If this is an XRESOLV interface, simply return 0, 1158 * since we don't want to solicit just yet. 1159 */ 1160 if (ill->ill_flags & ILLF_XRESOLV) { 1161 NCE_REFRELE(nce); 1162 return (0); 1163 } 1164 1165 mutex_enter(&nce->nce_lock); 1166 if (nce->nce_state != ND_INCOMPLETE) { 1167 mutex_exit(&nce->nce_lock); 1168 NCE_REFRELE(nce); 1169 return (0); 1170 } 1171 if (nce->nce_rcnt == 0) { 1172 /* The caller will free mp */ 1173 mutex_exit(&nce->nce_lock); 1174 ndp_delete(nce); 1175 NCE_REFRELE(nce); 1176 return (ESRCH); 1177 } 1178 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1179 if (mp_nce == NULL) { 1180 /* The caller will free mp */ 1181 mutex_exit(&nce->nce_lock); 1182 ndp_delete(nce); 1183 NCE_REFRELE(nce); 1184 return (ENOMEM); 1185 } 1186 nce_queue_mp(nce, mp_nce); 1187 ip_ndp_resolve(nce); 1188 mutex_exit(&nce->nce_lock); 1189 NCE_REFRELE(nce); 1190 return (EINPROGRESS); 1191 case EEXIST: 1192 /* Resolution in progress just queue the packet */ 1193 mutex_enter(&nce->nce_lock); 1194 if (nce->nce_state == ND_INCOMPLETE) { 1195 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1196 if (mp_nce == NULL) { 1197 err = ENOMEM; 1198 } else { 1199 nce_queue_mp(nce, mp_nce); 1200 err = EINPROGRESS; 1201 } 1202 } else { 1203 /* 1204 * Any other state implies we have 1205 * a nce but IRE needs to be added ... 1206 * ire_add_v6() will take care of the 1207 * the case when the nce becomes CONDEMNED 1208 * before the ire is added to the table. 1209 */ 1210 err = 0; 1211 } 1212 mutex_exit(&nce->nce_lock); 1213 NCE_REFRELE(nce); 1214 break; 1215 default: 1216 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1217 break; 1218 } 1219 return (err); 1220 } 1221 1222 /* 1223 * When there is no resolver, the link layer template is passed in 1224 * the IRE. 1225 * Lookup a NCE for a given IRE. Regardless of whether one exists 1226 * or one is created, we defer making ire point to nce until the 1227 * ire is actually added at which point the nce_refcnt on the nce is 1228 * incremented. This is done primarily to have symmetry between ire_add() 1229 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1230 */ 1231 int 1232 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1233 { 1234 nce_t *nce; 1235 int err = 0; 1236 1237 ASSERT(ill != NULL); 1238 ASSERT(ill->ill_isv6); 1239 if (IN6_IS_ADDR_MULTICAST(dst)) { 1240 err = nce_set_multicast(ill, dst); 1241 return (err); 1242 } 1243 1244 err = ndp_lookup_then_add_v6(ill, 1245 B_FALSE, /* NCE fastpath is per ill; don't match across group */ 1246 ill->ill_dest_addr, /* hardware address is NULL in most cases */ 1247 dst, 1248 &ipv6_all_ones, 1249 &ipv6_all_zeros, 1250 0, 1251 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1252 ND_REACHABLE, 1253 &nce); 1254 1255 switch (err) { 1256 case 0: 1257 /* 1258 * Cache entry with a proper resolver cookie was 1259 * created. 1260 */ 1261 NCE_REFRELE(nce); 1262 break; 1263 case EEXIST: 1264 err = 0; 1265 NCE_REFRELE(nce); 1266 break; 1267 default: 1268 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1269 break; 1270 } 1271 return (err); 1272 } 1273 1274 /* 1275 * For each interface an entry is added for the unspecified multicast group. 1276 * Here that mapping is used to form the multicast cache entry for a particular 1277 * multicast destination. 1278 */ 1279 static int 1280 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1281 { 1282 nce_t *mnce; /* Multicast mapping entry */ 1283 nce_t *nce; 1284 uchar_t *hw_addr = NULL; 1285 int err = 0; 1286 ip_stack_t *ipst = ill->ill_ipst; 1287 1288 ASSERT(ill != NULL); 1289 ASSERT(ill->ill_isv6); 1290 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1291 1292 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1293 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst)); 1294 nce = nce_lookup_addr(ill, B_FALSE, dst, nce); 1295 if (nce != NULL) { 1296 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1297 NCE_REFRELE(nce); 1298 return (0); 1299 } 1300 /* No entry, now lookup for a mapping this should never fail */ 1301 mnce = nce_lookup_mapping(ill, dst); 1302 if (mnce == NULL) { 1303 /* Something broken for the interface. */ 1304 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1305 return (ESRCH); 1306 } 1307 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1308 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1309 /* 1310 * For IRE_IF_RESOLVER a hardware mapping can be 1311 * generated, for IRE_IF_NORESOLVER, resolution cookie 1312 * in the ill is copied in ndp_add_v6(). 1313 */ 1314 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1315 if (hw_addr == NULL) { 1316 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1317 NCE_REFRELE(mnce); 1318 return (ENOMEM); 1319 } 1320 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1321 } 1322 NCE_REFRELE(mnce); 1323 /* 1324 * IRE_IF_NORESOLVER type simply copies the resolution 1325 * cookie passed in. So no hw_addr is needed. 1326 */ 1327 err = ndp_add_v6(ill, 1328 hw_addr, 1329 dst, 1330 &ipv6_all_ones, 1331 &ipv6_all_zeros, 1332 0, 1333 NCE_F_NONUD, 1334 ND_REACHABLE, 1335 &nce); 1336 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1337 if (hw_addr != NULL) 1338 kmem_free(hw_addr, ill->ill_nd_lla_len); 1339 if (err != 0) { 1340 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1341 return (err); 1342 } 1343 NCE_REFRELE(nce); 1344 return (0); 1345 } 1346 1347 /* 1348 * Return the link layer address, and any flags of a nce. 1349 */ 1350 int 1351 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1352 { 1353 nce_t *nce; 1354 in6_addr_t *addr; 1355 sin6_t *sin6; 1356 dl_unitdata_req_t *dl; 1357 1358 ASSERT(ill != NULL && ill->ill_isv6); 1359 sin6 = (sin6_t *)&lnr->lnr_addr; 1360 addr = &sin6->sin6_addr; 1361 1362 /* 1363 * NOTE: if the ill is an IPMP interface, then match against the whole 1364 * illgrp. This e.g. allows in.ndpd to retrieve the link layer 1365 * addresses for the data addresses on an IPMP interface even though 1366 * ipif_ndp_up() created them with an nce_ill of ipif_bound_ill. 1367 */ 1368 nce = ndp_lookup_v6(ill, IS_IPMP(ill), addr, B_FALSE); 1369 if (nce == NULL) 1370 return (ESRCH); 1371 /* If in INCOMPLETE state, no link layer address is available yet */ 1372 if (!NCE_ISREACHABLE(nce)) { 1373 NCE_REFRELE(nce); 1374 return (ESRCH); 1375 } 1376 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1377 if (ill->ill_flags & ILLF_XRESOLV) 1378 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1379 else 1380 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1381 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1382 sizeof (lnr->lnr_hdw_addr)); 1383 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1384 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1385 if (nce->nce_flags & NCE_F_ISROUTER) 1386 lnr->lnr_flags = NDF_ISROUTER_ON; 1387 if (nce->nce_flags & NCE_F_ANYCAST) 1388 lnr->lnr_flags |= NDF_ANYCAST_ON; 1389 NCE_REFRELE(nce); 1390 return (0); 1391 } 1392 1393 /* 1394 * Send Enable/Disable multicast reqs to driver. 1395 */ 1396 int 1397 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1398 uint32_t hw_addr_offset, mblk_t *mp) 1399 { 1400 nce_t *nce; 1401 uchar_t *hw_addr; 1402 ip_stack_t *ipst = ill->ill_ipst; 1403 1404 ASSERT(ill != NULL && ill->ill_isv6); 1405 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1406 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1407 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1408 freemsg(mp); 1409 return (EINVAL); 1410 } 1411 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1412 nce = nce_lookup_mapping(ill, addr); 1413 if (nce == NULL) { 1414 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1415 freemsg(mp); 1416 return (ESRCH); 1417 } 1418 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1419 /* 1420 * Update dl_addr_length and dl_addr_offset for primitives that 1421 * have physical addresses as opposed to full saps 1422 */ 1423 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1424 case DL_ENABMULTI_REQ: 1425 /* Track the state if this is the first enabmulti */ 1426 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1427 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1428 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1429 break; 1430 case DL_DISABMULTI_REQ: 1431 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1432 break; 1433 default: 1434 NCE_REFRELE(nce); 1435 ip1dbg(("ndp_mcastreq: default\n")); 1436 return (EINVAL); 1437 } 1438 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1439 NCE_REFRELE(nce); 1440 ill_dlpi_send(ill, mp); 1441 return (0); 1442 } 1443 1444 1445 /* 1446 * Send out a NS for resolving the ip address in nce. 1447 */ 1448 void 1449 ip_ndp_resolve(nce_t *nce) 1450 { 1451 in6_addr_t sender6 = ipv6_all_zeros; 1452 uint32_t ms; 1453 mblk_t *mp; 1454 ip6_t *ip6h; 1455 1456 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1457 /* 1458 * Pick the src from outgoing packet, if one is available. 1459 * Otherwise let nce_xmit figure out the src. 1460 */ 1461 if ((mp = nce->nce_qd_mp) != NULL) { 1462 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1463 if (mp->b_datap->db_type == M_CTL) 1464 mp = mp->b_cont; 1465 ip6h = (ip6_t *)mp->b_rptr; 1466 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1467 /* 1468 * This message should have been pulled up already in 1469 * ip_wput_v6. We can't do pullups here because 1470 * the message could be from the nce_qd_mp which could 1471 * have b_next/b_prev non-NULL. 1472 */ 1473 ASSERT(MBLKL(mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN); 1474 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1475 } 1476 sender6 = ip6h->ip6_src; 1477 } 1478 ms = nce_solicit(nce, sender6); 1479 mutex_exit(&nce->nce_lock); 1480 if (ms == 0) { 1481 if (nce->nce_state != ND_REACHABLE) { 1482 nce_resolv_failed(nce); 1483 ndp_delete(nce); 1484 } 1485 } else { 1486 NDP_RESTART_TIMER(nce, (clock_t)ms); 1487 } 1488 mutex_enter(&nce->nce_lock); 1489 } 1490 1491 /* 1492 * Send a neighbor solicitation. 1493 * Returns number of milliseconds after which we should either rexmit or abort. 1494 * Return of zero means we should abort. 1495 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1496 * 1497 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1498 * the packet. 1499 */ 1500 uint32_t 1501 nce_solicit(nce_t *nce, in6_addr_t sender) 1502 { 1503 boolean_t dropped; 1504 1505 ASSERT(nce->nce_ipversion == IPV6_VERSION); 1506 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1507 1508 if (nce->nce_rcnt == 0) 1509 return (0); 1510 1511 nce->nce_rcnt--; 1512 mutex_exit(&nce->nce_lock); 1513 dropped = nce_xmit_solicit(nce, B_TRUE, &sender, 0); 1514 mutex_enter(&nce->nce_lock); 1515 if (dropped) 1516 nce->nce_rcnt++; 1517 return (nce->nce_ill->ill_reachable_retrans_time); 1518 } 1519 1520 /* 1521 * Attempt to recover an address on an interface that's been marked as a 1522 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1523 * no easy way to just probe the address and have the right thing happen if 1524 * it's no longer in use. Instead, we just bring it up normally and allow the 1525 * regular interface start-up logic to probe for a remaining duplicate and take 1526 * us back down if necessary. 1527 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1528 * ip_ndp_excl. 1529 */ 1530 /* ARGSUSED */ 1531 static void 1532 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1533 { 1534 ill_t *ill = rq->q_ptr; 1535 ipif_t *ipif; 1536 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1537 1538 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1539 /* 1540 * We do not support recovery of proxy ARP'd interfaces, 1541 * because the system lacks a complete proxy ARP mechanism. 1542 */ 1543 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1544 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1545 continue; 1546 } 1547 1548 /* 1549 * If we have already recovered or if the interface is going 1550 * away, then ignore. 1551 */ 1552 mutex_enter(&ill->ill_lock); 1553 if (!(ipif->ipif_flags & IPIF_DUPLICATE) || 1554 (ipif->ipif_state_flags & IPIF_CONDEMNED)) { 1555 mutex_exit(&ill->ill_lock); 1556 continue; 1557 } 1558 1559 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1560 ill->ill_ipif_dup_count--; 1561 mutex_exit(&ill->ill_lock); 1562 ipif->ipif_was_dup = B_TRUE; 1563 1564 VERIFY(ipif_ndp_up(ipif, B_TRUE) != EINPROGRESS); 1565 (void) ipif_up_done_v6(ipif); 1566 } 1567 freeb(mp); 1568 } 1569 1570 /* 1571 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1572 * As long as someone else holds the address, the interface will stay down. 1573 * When that conflict goes away, the interface is brought back up. This is 1574 * done so that accidental shutdowns of addresses aren't made permanent. Your 1575 * server will recover from a failure. 1576 * 1577 * For DHCP and temporary addresses, recovery is not done in the kernel. 1578 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1579 * 1580 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1581 */ 1582 static void 1583 ipif6_dup_recovery(void *arg) 1584 { 1585 ipif_t *ipif = arg; 1586 1587 ipif->ipif_recovery_id = 0; 1588 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1589 return; 1590 1591 /* 1592 * No lock, because this is just an optimization. 1593 */ 1594 if (ipif->ipif_state_flags & IPIF_CONDEMNED) 1595 return; 1596 1597 /* If the link is down, we'll retry this later */ 1598 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1599 return; 1600 1601 ndp_do_recovery(ipif); 1602 } 1603 1604 /* 1605 * Perform interface recovery by forcing the duplicate interfaces up and 1606 * allowing the system to determine which ones should stay up. 1607 * 1608 * Called both by recovery timer expiry and link-up notification. 1609 */ 1610 void 1611 ndp_do_recovery(ipif_t *ipif) 1612 { 1613 ill_t *ill = ipif->ipif_ill; 1614 mblk_t *mp; 1615 ip_stack_t *ipst = ill->ill_ipst; 1616 1617 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1618 if (mp == NULL) { 1619 mutex_enter(&ill->ill_lock); 1620 if (ipif->ipif_recovery_id == 0 && 1621 !(ipif->ipif_state_flags & IPIF_CONDEMNED)) { 1622 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1623 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1624 } 1625 mutex_exit(&ill->ill_lock); 1626 } else { 1627 /* 1628 * A recovery timer may still be running if we got here from 1629 * ill_restart_dad(); cancel that timer. 1630 */ 1631 if (ipif->ipif_recovery_id != 0) 1632 (void) untimeout(ipif->ipif_recovery_id); 1633 ipif->ipif_recovery_id = 0; 1634 1635 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1636 sizeof (ipif->ipif_v6lcl_addr)); 1637 ill_refhold(ill); 1638 qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP, 1639 B_FALSE); 1640 } 1641 } 1642 1643 /* 1644 * Find the MAC and IP addresses in an NA/NS message. 1645 */ 1646 static void 1647 ip_ndp_find_addresses(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, in6_addr_t *targp, 1648 uchar_t **haddr, uint_t *haddrlenp) 1649 { 1650 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1651 icmp6_t *icmp6 = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1652 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 1653 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 1654 uchar_t *addr; 1655 int alen = 0; 1656 1657 if (dl_mp == NULL) { 1658 nd_opt_hdr_t *opt = NULL; 1659 int len; 1660 1661 /* 1662 * If it's from the fast-path, then it can't be a probe 1663 * message, and thus must include a linkaddr option. 1664 * Extract that here. 1665 */ 1666 switch (icmp6->icmp6_type) { 1667 case ND_NEIGHBOR_SOLICIT: 1668 len = mp->b_wptr - (uchar_t *)ns; 1669 if ((len -= sizeof (*ns)) > 0) { 1670 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), 1671 len, ND_OPT_SOURCE_LINKADDR); 1672 } 1673 break; 1674 case ND_NEIGHBOR_ADVERT: 1675 len = mp->b_wptr - (uchar_t *)na; 1676 if ((len -= sizeof (*na)) > 0) { 1677 opt = ndp_get_option((nd_opt_hdr_t *)(na + 1), 1678 len, ND_OPT_TARGET_LINKADDR); 1679 } 1680 break; 1681 } 1682 1683 if (opt != NULL && opt->nd_opt_len * 8 - sizeof (*opt) >= 1684 ill->ill_nd_lla_len) { 1685 addr = (uchar_t *)(opt + 1); 1686 alen = ill->ill_nd_lla_len; 1687 } 1688 1689 /* 1690 * We cheat a bit here for the sake of printing usable log 1691 * messages in the rare case where the reply we got was unicast 1692 * without a source linkaddr option, and the interface is in 1693 * fastpath mode. (Sigh.) 1694 */ 1695 if (alen == 0 && ill->ill_type == IFT_ETHER && 1696 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1697 struct ether_header *pether; 1698 1699 pether = (struct ether_header *)((char *)ip6h - 1700 sizeof (*pether)); 1701 addr = pether->ether_shost.ether_addr_octet; 1702 alen = ETHERADDRL; 1703 } 1704 } else { 1705 dl_unitdata_ind_t *dlu; 1706 1707 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1708 alen = dlu->dl_src_addr_length; 1709 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1710 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1711 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1712 if (ill->ill_sap_length < 0) { 1713 alen += ill->ill_sap_length; 1714 } else { 1715 addr += ill->ill_sap_length; 1716 alen -= ill->ill_sap_length; 1717 } 1718 } 1719 } 1720 1721 if (alen > 0) { 1722 *haddr = addr; 1723 *haddrlenp = alen; 1724 } else { 1725 *haddr = NULL; 1726 *haddrlenp = 0; 1727 } 1728 1729 /* nd_ns_target and nd_na_target are at the same offset, so we cheat */ 1730 *targp = ns->nd_ns_target; 1731 } 1732 1733 /* 1734 * This is for exclusive changes due to NDP duplicate address detection 1735 * failure. 1736 */ 1737 /* ARGSUSED */ 1738 static void 1739 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1740 { 1741 ill_t *ill = rq->q_ptr; 1742 ipif_t *ipif; 1743 mblk_t *dl_mp = NULL; 1744 uchar_t *haddr; 1745 uint_t haddrlen; 1746 ip_stack_t *ipst = ill->ill_ipst; 1747 in6_addr_t targ; 1748 1749 if (DB_TYPE(mp) != M_DATA) { 1750 dl_mp = mp; 1751 mp = mp->b_cont; 1752 } 1753 1754 ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen); 1755 if (haddr != NULL && haddrlen == ill->ill_phys_addr_length) { 1756 /* 1757 * Ignore conflicts generated by misbehaving switches that 1758 * just reflect our own messages back to us. For IPMP, we may 1759 * see reflections across any ill in the illgrp. 1760 */ 1761 if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 || 1762 IS_UNDER_IPMP(ill) && 1763 ipmp_illgrp_find_ill(ill->ill_grp, haddr, haddrlen) != NULL) 1764 goto ignore_conflict; 1765 } 1766 1767 /* 1768 * Look up the appropriate ipif. 1769 */ 1770 ipif = ipif_lookup_addr_v6(&targ, ill, ALL_ZONES, NULL, NULL, NULL, 1771 NULL, ipst); 1772 if (ipif == NULL) 1773 goto ignore_conflict; 1774 1775 /* Reload the ill to match the ipif */ 1776 ill = ipif->ipif_ill; 1777 1778 /* If it's already duplicate or ineligible, then don't do anything. */ 1779 if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) { 1780 ipif_refrele(ipif); 1781 goto ignore_conflict; 1782 } 1783 1784 /* 1785 * If this is a failure during duplicate recovery, then don't 1786 * complain. It may take a long time to recover. 1787 */ 1788 if (!ipif->ipif_was_dup) { 1789 char ibuf[LIFNAMSIZ]; 1790 char hbuf[MAC_STR_LEN]; 1791 char sbuf[INET6_ADDRSTRLEN]; 1792 1793 ipif_get_name(ipif, ibuf, sizeof (ibuf)); 1794 cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);" 1795 " disabled", ibuf, 1796 inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)), 1797 mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf))); 1798 } 1799 mutex_enter(&ill->ill_lock); 1800 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1801 ipif->ipif_flags |= IPIF_DUPLICATE; 1802 ill->ill_ipif_dup_count++; 1803 mutex_exit(&ill->ill_lock); 1804 (void) ipif_down(ipif, NULL, NULL); 1805 ipif_down_tail(ipif); 1806 mutex_enter(&ill->ill_lock); 1807 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1808 ill->ill_net_type == IRE_IF_RESOLVER && 1809 !(ipif->ipif_state_flags & IPIF_CONDEMNED) && 1810 ipst->ips_ip_dup_recovery > 0) { 1811 ASSERT(ipif->ipif_recovery_id == 0); 1812 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1813 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1814 } 1815 mutex_exit(&ill->ill_lock); 1816 ipif_refrele(ipif); 1817 ignore_conflict: 1818 if (dl_mp != NULL) 1819 freeb(dl_mp); 1820 freemsg(mp); 1821 } 1822 1823 /* 1824 * Handle failure by tearing down the ipifs with the specified address. Note 1825 * that tearing down the ipif also means deleting the nce through ipif_down, so 1826 * it's not possible to do recovery by just restarting the nce timer. Instead, 1827 * we start a timer on the ipif. 1828 */ 1829 static void 1830 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1831 { 1832 if ((mp = copymsg(mp)) != NULL) { 1833 if (dl_mp == NULL) 1834 dl_mp = mp; 1835 else if ((dl_mp = copyb(dl_mp)) != NULL) 1836 dl_mp->b_cont = mp; 1837 if (dl_mp == NULL) { 1838 freemsg(mp); 1839 } else { 1840 ill_refhold(ill); 1841 qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP, 1842 B_FALSE); 1843 } 1844 } 1845 } 1846 1847 /* 1848 * Handle a discovered conflict: some other system is advertising that it owns 1849 * one of our IP addresses. We need to defend ourselves, or just shut down the 1850 * interface. 1851 */ 1852 static void 1853 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1854 { 1855 ipif_t *ipif; 1856 uint32_t now; 1857 uint_t maxdefense; 1858 uint_t defs; 1859 ip_stack_t *ipst = ill->ill_ipst; 1860 1861 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1862 NULL, NULL, ipst); 1863 if (ipif == NULL) 1864 return; 1865 1866 /* 1867 * First, figure out if this address is disposable. 1868 */ 1869 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1870 maxdefense = ipst->ips_ip_max_temp_defend; 1871 else 1872 maxdefense = ipst->ips_ip_max_defend; 1873 1874 /* 1875 * Now figure out how many times we've defended ourselves. Ignore 1876 * defenses that happened long in the past. 1877 */ 1878 now = gethrestime_sec(); 1879 mutex_enter(&nce->nce_lock); 1880 if ((defs = nce->nce_defense_count) > 0 && 1881 now - nce->nce_defense_time > ipst->ips_ip_defend_interval) { 1882 nce->nce_defense_count = defs = 0; 1883 } 1884 nce->nce_defense_count++; 1885 nce->nce_defense_time = now; 1886 mutex_exit(&nce->nce_lock); 1887 ipif_refrele(ipif); 1888 1889 /* 1890 * If we've defended ourselves too many times already, then give up and 1891 * tear down the interface(s) using this address. Otherwise, defend by 1892 * sending out an unsolicited Neighbor Advertisement. 1893 */ 1894 if (defs >= maxdefense) { 1895 ip_ndp_failure(ill, mp, dl_mp); 1896 } else { 1897 char hbuf[MAC_STR_LEN]; 1898 char sbuf[INET6_ADDRSTRLEN]; 1899 uchar_t *haddr; 1900 uint_t haddrlen; 1901 in6_addr_t targ; 1902 1903 ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen); 1904 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1905 mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)), 1906 inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)), 1907 ill->ill_name); 1908 1909 (void) nce_xmit_advert(nce, B_FALSE, &ipv6_all_hosts_mcast, 0); 1910 } 1911 } 1912 1913 static void 1914 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1915 { 1916 nd_neighbor_solicit_t *ns; 1917 uint32_t hlen = ill->ill_nd_lla_len; 1918 uchar_t *haddr = NULL; 1919 icmp6_t *icmp_nd; 1920 ip6_t *ip6h; 1921 nce_t *our_nce = NULL; 1922 in6_addr_t target; 1923 in6_addr_t src; 1924 int len; 1925 int flag = 0; 1926 nd_opt_hdr_t *opt = NULL; 1927 boolean_t bad_solicit = B_FALSE; 1928 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1929 1930 ip6h = (ip6_t *)mp->b_rptr; 1931 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1932 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1933 src = ip6h->ip6_src; 1934 ns = (nd_neighbor_solicit_t *)icmp_nd; 1935 target = ns->nd_ns_target; 1936 if (IN6_IS_ADDR_MULTICAST(&target)) { 1937 if (ip_debug > 2) { 1938 /* ip1dbg */ 1939 pr_addr_dbg("ndp_input_solicit: Target is" 1940 " multicast! %s\n", AF_INET6, &target); 1941 } 1942 bad_solicit = B_TRUE; 1943 goto done; 1944 } 1945 if (len > sizeof (nd_neighbor_solicit_t)) { 1946 /* Options present */ 1947 opt = (nd_opt_hdr_t *)&ns[1]; 1948 len -= sizeof (nd_neighbor_solicit_t); 1949 if (!ndp_verify_optlen(opt, len)) { 1950 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1951 bad_solicit = B_TRUE; 1952 goto done; 1953 } 1954 1955 } 1956 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1957 /* Check to see if this is a valid DAD solicitation */ 1958 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1959 if (ip_debug > 2) { 1960 /* ip1dbg */ 1961 pr_addr_dbg("ndp_input_solicit: IPv6 " 1962 "Destination is not solicited node " 1963 "multicast %s\n", AF_INET6, 1964 &ip6h->ip6_dst); 1965 } 1966 bad_solicit = B_TRUE; 1967 goto done; 1968 } 1969 } 1970 1971 /* 1972 * NOTE: with IPMP, it's possible the nominated multicast ill (which 1973 * received this packet if it's multicast) is not the ill tied to 1974 * e.g. the IPMP ill's data link-local. So we match across the illgrp 1975 * to ensure we find the associated NCE. 1976 */ 1977 our_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE); 1978 /* 1979 * If this is a valid Solicitation, a permanent 1980 * entry should exist in the cache 1981 */ 1982 if (our_nce == NULL || 1983 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1984 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1985 "ifname=%s ", ill->ill_name)); 1986 if (ip_debug > 2) { 1987 /* ip1dbg */ 1988 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1989 } 1990 bad_solicit = B_TRUE; 1991 goto done; 1992 } 1993 1994 /* At this point we should have a verified NS per spec */ 1995 if (opt != NULL) { 1996 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1997 if (opt != NULL) { 1998 haddr = (uchar_t *)&opt[1]; 1999 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2000 hlen == 0) { 2001 ip1dbg(("ndp_input_solicit: bad SLLA\n")); 2002 bad_solicit = B_TRUE; 2003 goto done; 2004 } 2005 } 2006 } 2007 2008 /* If sending directly to peer, set the unicast flag */ 2009 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 2010 flag |= NDP_UNICAST; 2011 2012 /* 2013 * Create/update the entry for the soliciting node. 2014 * or respond to outstanding queries, don't if 2015 * the source is unspecified address. 2016 */ 2017 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 2018 int err; 2019 nce_t *nnce; 2020 2021 ASSERT(ill->ill_isv6); 2022 /* 2023 * Regular solicitations *must* include the Source Link-Layer 2024 * Address option. Ignore messages that do not. 2025 */ 2026 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 2027 ip1dbg(("ndp_input_solicit: source link-layer address " 2028 "option missing with a specified source.\n")); 2029 bad_solicit = B_TRUE; 2030 goto done; 2031 } 2032 2033 /* 2034 * This is a regular solicitation. If we're still in the 2035 * process of verifying the address, then don't respond at all 2036 * and don't keep track of the sender. 2037 */ 2038 if (our_nce->nce_state == ND_PROBE) 2039 goto done; 2040 2041 /* 2042 * If the solicitation doesn't have sender hardware address 2043 * (legal for unicast solicitation), then process without 2044 * installing the return NCE. Either we already know it, or 2045 * we'll be forced to look it up when (and if) we reply to the 2046 * packet. 2047 */ 2048 if (haddr == NULL) 2049 goto no_source; 2050 2051 err = ndp_lookup_then_add_v6(ill, 2052 B_FALSE, 2053 haddr, 2054 &src, /* Soliciting nodes address */ 2055 &ipv6_all_ones, 2056 &ipv6_all_zeros, 2057 0, 2058 0, 2059 ND_STALE, 2060 &nnce); 2061 switch (err) { 2062 case 0: 2063 /* done with this entry */ 2064 NCE_REFRELE(nnce); 2065 break; 2066 case EEXIST: 2067 /* 2068 * B_FALSE indicates this is not an an advertisement. 2069 */ 2070 ndp_process(nnce, haddr, 0, B_FALSE); 2071 NCE_REFRELE(nnce); 2072 break; 2073 default: 2074 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 2075 err)); 2076 goto done; 2077 } 2078 no_source: 2079 flag |= NDP_SOLICITED; 2080 } else { 2081 /* 2082 * No source link layer address option should be present in a 2083 * valid DAD request. 2084 */ 2085 if (haddr != NULL) { 2086 ip1dbg(("ndp_input_solicit: source link-layer address " 2087 "option present with an unspecified source.\n")); 2088 bad_solicit = B_TRUE; 2089 goto done; 2090 } 2091 if (our_nce->nce_state == ND_PROBE) { 2092 /* 2093 * Internally looped-back probes won't have DLPI 2094 * attached to them. External ones (which are sent by 2095 * multicast) always will. Just ignore our own 2096 * transmissions. 2097 */ 2098 if (dl_mp != NULL) { 2099 /* 2100 * If someone else is probing our address, then 2101 * we've crossed wires. Declare failure. 2102 */ 2103 ip_ndp_failure(ill, mp, dl_mp); 2104 } 2105 goto done; 2106 } 2107 /* 2108 * This is a DAD probe. Multicast the advertisement to the 2109 * all-nodes address. 2110 */ 2111 src = ipv6_all_hosts_mcast; 2112 } 2113 /* Response to a solicitation */ 2114 (void) nce_xmit_advert(our_nce, B_TRUE, &src, flag); 2115 done: 2116 if (bad_solicit) 2117 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2118 if (our_nce != NULL) 2119 NCE_REFRELE(our_nce); 2120 } 2121 2122 void 2123 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2124 { 2125 nd_neighbor_advert_t *na; 2126 uint32_t hlen = ill->ill_nd_lla_len; 2127 uchar_t *haddr = NULL; 2128 icmp6_t *icmp_nd; 2129 ip6_t *ip6h; 2130 nce_t *dst_nce = NULL; 2131 in6_addr_t target; 2132 nd_opt_hdr_t *opt = NULL; 2133 int len; 2134 ip_stack_t *ipst = ill->ill_ipst; 2135 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2136 2137 ip6h = (ip6_t *)mp->b_rptr; 2138 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2139 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2140 na = (nd_neighbor_advert_t *)icmp_nd; 2141 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2142 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2143 ip1dbg(("ndp_input_advert: Target is multicast but the " 2144 "solicited flag is not zero\n")); 2145 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2146 return; 2147 } 2148 target = na->nd_na_target; 2149 if (IN6_IS_ADDR_MULTICAST(&target)) { 2150 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2151 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2152 return; 2153 } 2154 if (len > sizeof (nd_neighbor_advert_t)) { 2155 opt = (nd_opt_hdr_t *)&na[1]; 2156 if (!ndp_verify_optlen(opt, 2157 len - sizeof (nd_neighbor_advert_t))) { 2158 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2159 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2160 return; 2161 } 2162 /* At this point we have a verified NA per spec */ 2163 len -= sizeof (nd_neighbor_advert_t); 2164 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2165 if (opt != NULL) { 2166 haddr = (uchar_t *)&opt[1]; 2167 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2168 hlen == 0) { 2169 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2170 BUMP_MIB(mib, 2171 ipv6IfIcmpInBadNeighborAdvertisements); 2172 return; 2173 } 2174 } 2175 } 2176 2177 /* 2178 * NOTE: we match across the illgrp since we need to do DAD for all of 2179 * our local addresses, and those are spread across all the active 2180 * ills in the group. 2181 */ 2182 if ((dst_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE)) == NULL) 2183 return; 2184 2185 if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2186 /* 2187 * Someone just advertised one of our local addresses. First, 2188 * check it it was us -- if so, we can safely ignore it. 2189 */ 2190 if (haddr != NULL) { 2191 if (!nce_cmp_ll_addr(dst_nce, haddr, hlen)) 2192 goto out; /* from us -- no conflict */ 2193 2194 /* 2195 * If we're in an IPMP group, check if this is an echo 2196 * from another ill in the group. Use the double- 2197 * checked locking pattern to avoid grabbing 2198 * ill_g_lock in the non-IPMP case. 2199 */ 2200 if (IS_UNDER_IPMP(ill)) { 2201 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2202 if (IS_UNDER_IPMP(ill) && ipmp_illgrp_find_ill( 2203 ill->ill_grp, haddr, hlen) != NULL) { 2204 rw_exit(&ipst->ips_ill_g_lock); 2205 goto out; 2206 } 2207 rw_exit(&ipst->ips_ill_g_lock); 2208 } 2209 } 2210 2211 /* 2212 * Our own (looped-back) unsolicited neighbor advertisements 2213 * will get here with dl_mp == NULL. (These will usually be 2214 * filtered by the `haddr' checks above, but point-to-point 2215 * links have no hardware address and thus make it here.) 2216 */ 2217 if (dl_mp == NULL && dst_nce->nce_state != ND_PROBE) 2218 goto out; 2219 2220 /* 2221 * This appears to be a real conflict. If we're trying to 2222 * configure this NCE (ND_PROBE), then shut it down. 2223 * Otherwise, handle the discovered conflict. 2224 * 2225 * In the ND_PROBE case, dl_mp might be NULL if we're getting 2226 * a unicast reply. This isn't typically done (multicast is 2227 * the norm in response to a probe), but we can handle it. 2228 */ 2229 if (dst_nce->nce_state == ND_PROBE) 2230 ip_ndp_failure(ill, mp, dl_mp); 2231 else 2232 ip_ndp_conflict(ill, mp, dl_mp, dst_nce); 2233 } else { 2234 if (na->nd_na_flags_reserved & ND_NA_FLAG_ROUTER) 2235 dst_nce->nce_flags |= NCE_F_ISROUTER; 2236 2237 /* B_TRUE indicates this an advertisement */ 2238 ndp_process(dst_nce, haddr, na->nd_na_flags_reserved, B_TRUE); 2239 } 2240 out: 2241 NCE_REFRELE(dst_nce); 2242 } 2243 2244 /* 2245 * Process NDP neighbor solicitation/advertisement messages. 2246 * The checksum has already checked o.k before reaching here. 2247 */ 2248 void 2249 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2250 { 2251 icmp6_t *icmp_nd; 2252 ip6_t *ip6h; 2253 int len; 2254 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2255 2256 2257 if (!pullupmsg(mp, -1)) { 2258 ip1dbg(("ndp_input: pullupmsg failed\n")); 2259 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2260 goto done; 2261 } 2262 ip6h = (ip6_t *)mp->b_rptr; 2263 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2264 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2265 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2266 goto done; 2267 } 2268 /* 2269 * NDP does not accept any extension headers between the 2270 * IP header and the ICMP header since e.g. a routing 2271 * header could be dangerous. 2272 * This assumes that any AH or ESP headers are removed 2273 * by ip prior to passing the packet to ndp_input. 2274 */ 2275 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2276 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2277 ip6h->ip6_nxt)); 2278 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2279 goto done; 2280 } 2281 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2282 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2283 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2284 if (icmp_nd->icmp6_code != 0) { 2285 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2286 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2287 goto done; 2288 } 2289 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2290 /* 2291 * Make sure packet length is large enough for either 2292 * a NS or a NA icmp packet. 2293 */ 2294 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2295 ip1dbg(("ndp_input: packet too short\n")); 2296 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2297 goto done; 2298 } 2299 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2300 ndp_input_solicit(ill, mp, dl_mp); 2301 } else { 2302 ndp_input_advert(ill, mp, dl_mp); 2303 } 2304 done: 2305 freemsg(mp); 2306 } 2307 2308 /* 2309 * Utility routine to send an advertisement. Assumes that the NCE cannot 2310 * go away (e.g., because it's refheld). 2311 */ 2312 static boolean_t 2313 nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *target, 2314 uint_t flags) 2315 { 2316 ASSERT((flags & NDP_PROBE) == 0); 2317 2318 if (nce->nce_flags & NCE_F_ISROUTER) 2319 flags |= NDP_ISROUTER; 2320 if (!(nce->nce_flags & NCE_F_ANYCAST)) 2321 flags |= NDP_ORIDE; 2322 2323 return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_ADVERT, use_nd_lla, 2324 &nce->nce_addr, target, flags)); 2325 } 2326 2327 /* 2328 * Utility routine to send a solicitation. Assumes that the NCE cannot 2329 * go away (e.g., because it's refheld). 2330 */ 2331 static boolean_t 2332 nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *sender, 2333 uint_t flags) 2334 { 2335 if (flags & NDP_PROBE) 2336 sender = &ipv6_all_zeros; 2337 2338 return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, use_nd_lla, 2339 sender, &nce->nce_addr, flags)); 2340 } 2341 2342 /* 2343 * nce_xmit is called to form and transmit a ND solicitation or 2344 * advertisement ICMP packet. 2345 * 2346 * If the source address is unspecified and this isn't a probe (used for 2347 * duplicate address detection), an appropriate source address and link layer 2348 * address will be chosen here. The link layer address option is included if 2349 * the source is specified (i.e., all non-probe packets), and omitted (per the 2350 * specification) otherwise. 2351 * 2352 * It returns B_FALSE only if it does a successful put() to the 2353 * corresponding ill's ill_wq otherwise returns B_TRUE. 2354 */ 2355 static boolean_t 2356 nce_xmit(ill_t *ill, uint8_t type, boolean_t use_nd_lla, 2357 const in6_addr_t *sender, const in6_addr_t *target, int flag) 2358 { 2359 ill_t *hwaddr_ill; 2360 uint32_t len; 2361 icmp6_t *icmp6; 2362 mblk_t *mp; 2363 ip6_t *ip6h; 2364 nd_opt_hdr_t *opt; 2365 uint_t plen, maxplen; 2366 ip6i_t *ip6i; 2367 ipif_t *src_ipif = NULL; 2368 uint8_t *hw_addr; 2369 zoneid_t zoneid = GLOBAL_ZONEID; 2370 char buf[INET6_ADDRSTRLEN]; 2371 2372 ASSERT(!IS_IPMP(ill)); 2373 2374 /* 2375 * Check that the sender is actually a usable address on `ill', and if 2376 * so, track that as the src_ipif. If not, for solicitations, set the 2377 * sender to :: so that a new one will be picked below; for adverts, 2378 * drop the packet since we expect nce_xmit_advert() to always provide 2379 * a valid sender. 2380 */ 2381 if (!IN6_IS_ADDR_UNSPECIFIED(sender)) { 2382 if ((src_ipif = ip_ndp_lookup_addr_v6(sender, ill)) == NULL || 2383 !src_ipif->ipif_addr_ready) { 2384 if (src_ipif != NULL) { 2385 ipif_refrele(src_ipif); 2386 src_ipif = NULL; 2387 } 2388 if (type == ND_NEIGHBOR_ADVERT) { 2389 ip1dbg(("nce_xmit: No source ipif for src %s\n", 2390 inet_ntop(AF_INET6, sender, buf, 2391 sizeof (buf)))); 2392 return (B_TRUE); 2393 } 2394 sender = &ipv6_all_zeros; 2395 } 2396 } 2397 2398 /* 2399 * If we still have an unspecified source (sender) address and this 2400 * isn't a probe, select a source address from `ill'. 2401 */ 2402 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2403 ASSERT(type != ND_NEIGHBOR_ADVERT); 2404 /* 2405 * Pick a source address for this solicitation, but restrict 2406 * the selection to addresses assigned to the output 2407 * interface. We do this because the destination will create 2408 * a neighbor cache entry for the source address of this 2409 * packet, so the source address needs to be a valid neighbor. 2410 */ 2411 src_ipif = ipif_select_source_v6(ill, target, B_TRUE, 2412 IPV6_PREFER_SRC_DEFAULT, ALL_ZONES); 2413 if (src_ipif == NULL) { 2414 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2415 inet_ntop(AF_INET6, target, buf, sizeof (buf)))); 2416 return (B_TRUE); 2417 } 2418 sender = &src_ipif->ipif_v6src_addr; 2419 } 2420 2421 /* 2422 * We're either sending a probe or we have a source address. 2423 */ 2424 ASSERT((flag & NDP_PROBE) || src_ipif != NULL); 2425 2426 maxplen = roundup(sizeof (nd_opt_hdr_t) + ND_MAX_HDW_LEN, 8); 2427 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2428 maxplen; 2429 mp = allocb(len, BPRI_LO); 2430 if (mp == NULL) { 2431 if (src_ipif != NULL) 2432 ipif_refrele(src_ipif); 2433 return (B_TRUE); 2434 } 2435 bzero((char *)mp->b_rptr, len); 2436 mp->b_wptr = mp->b_rptr + len; 2437 2438 ip6i = (ip6i_t *)mp->b_rptr; 2439 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2440 ip6i->ip6i_nxt = IPPROTO_RAW; 2441 ip6i->ip6i_flags = IP6I_HOPLIMIT; 2442 if (flag & NDP_PROBE) 2443 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2444 2445 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2446 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2447 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2448 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2449 ip6h->ip6_hops = IPV6_MAX_HOPS; 2450 ip6h->ip6_src = *sender; 2451 ip6h->ip6_dst = *target; 2452 icmp6 = (icmp6_t *)&ip6h[1]; 2453 2454 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2455 sizeof (nd_neighbor_advert_t)); 2456 2457 if (type == ND_NEIGHBOR_SOLICIT) { 2458 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2459 2460 if (!(flag & NDP_PROBE)) 2461 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2462 ns->nd_ns_target = *target; 2463 if (!(flag & NDP_UNICAST)) { 2464 /* Form multicast address of the target */ 2465 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2466 ip6h->ip6_dst.s6_addr32[3] |= 2467 ns->nd_ns_target.s6_addr32[3]; 2468 } 2469 } else { 2470 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2471 2472 ASSERT(!(flag & NDP_PROBE)); 2473 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2474 na->nd_na_target = *sender; 2475 if (flag & NDP_ISROUTER) 2476 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2477 if (flag & NDP_SOLICITED) 2478 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2479 if (flag & NDP_ORIDE) 2480 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2481 } 2482 2483 hw_addr = NULL; 2484 if (!(flag & NDP_PROBE)) { 2485 /* 2486 * Use our source address to find the hardware address to put 2487 * in the packet, so that the hardware address and IP address 2488 * will match up -- even if that hardware address doesn't 2489 * match the ill we actually transmit the packet through. 2490 */ 2491 if (IS_IPMP(src_ipif->ipif_ill)) { 2492 hwaddr_ill = ipmp_ipif_hold_bound_ill(src_ipif); 2493 if (hwaddr_ill == NULL) { 2494 ip1dbg(("nce_xmit: no bound ill!\n")); 2495 ipif_refrele(src_ipif); 2496 freemsg(mp); 2497 return (B_TRUE); 2498 } 2499 } else { 2500 hwaddr_ill = src_ipif->ipif_ill; 2501 ill_refhold(hwaddr_ill); /* for symmetry */ 2502 } 2503 2504 plen = roundup(sizeof (nd_opt_hdr_t) + 2505 hwaddr_ill->ill_nd_lla_len, 8); 2506 2507 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2508 hwaddr_ill->ill_phys_addr; 2509 if (hw_addr != NULL) { 2510 /* Fill in link layer address and option len */ 2511 opt->nd_opt_len = (uint8_t)(plen / 8); 2512 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2513 } 2514 2515 ill_refrele(hwaddr_ill); 2516 } 2517 2518 if (hw_addr == NULL) 2519 plen = 0; 2520 2521 /* Fix up the length of the packet now that plen is known */ 2522 len -= (maxplen - plen); 2523 mp->b_wptr = mp->b_rptr + len; 2524 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2525 2526 icmp6->icmp6_type = type; 2527 icmp6->icmp6_code = 0; 2528 /* 2529 * Prepare for checksum by putting icmp length in the icmp 2530 * checksum field. The checksum is calculated in ip_wput_v6. 2531 */ 2532 icmp6->icmp6_cksum = ip6h->ip6_plen; 2533 2534 /* 2535 * Before we toss the src_ipif, look up the zoneid to pass to 2536 * ip_output_v6(). This is to ensure unicast ND_NEIGHBOR_ADVERT 2537 * packets to be routed correctly by IP (we cannot guarantee that the 2538 * global zone has an interface route to the destination). 2539 */ 2540 if (src_ipif != NULL) { 2541 if ((zoneid = src_ipif->ipif_zoneid) == ALL_ZONES) 2542 zoneid = GLOBAL_ZONEID; 2543 ipif_refrele(src_ipif); 2544 } 2545 2546 ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT); 2547 return (B_FALSE); 2548 } 2549 2550 /* 2551 * Make a link layer address (does not include the SAP) from an nce. 2552 * To form the link layer address, use the last four bytes of ipv6 2553 * address passed in and the fixed offset stored in nce. 2554 */ 2555 static void 2556 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2557 { 2558 uchar_t *mask, *to; 2559 ill_t *ill = nce->nce_ill; 2560 int len; 2561 2562 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2563 return; 2564 ASSERT(nce->nce_res_mp != NULL); 2565 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2566 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2567 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2568 ASSERT(addr != NULL); 2569 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2570 addrpos, ill->ill_nd_lla_len); 2571 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2572 IPV6_ADDR_LEN); 2573 mask = (uchar_t *)&nce->nce_extract_mask; 2574 mask += (IPV6_ADDR_LEN - len); 2575 addr += (IPV6_ADDR_LEN - len); 2576 to = addrpos + nce->nce_ll_extract_start; 2577 while (len-- > 0) 2578 *to++ |= *mask++ & *addr++; 2579 } 2580 2581 mblk_t * 2582 nce_udreq_alloc(ill_t *ill) 2583 { 2584 mblk_t *template_mp = NULL; 2585 dl_unitdata_req_t *dlur; 2586 int sap_length; 2587 2588 ASSERT(ill->ill_isv6); 2589 2590 sap_length = ill->ill_sap_length; 2591 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2592 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2593 if (template_mp == NULL) 2594 return (NULL); 2595 2596 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2597 dlur->dl_priority.dl_min = 0; 2598 dlur->dl_priority.dl_max = 0; 2599 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2600 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2601 2602 /* Copy in the SAP value. */ 2603 NCE_LL_SAP_COPY(ill, template_mp); 2604 2605 return (template_mp); 2606 } 2607 2608 /* 2609 * NDP retransmit timer. 2610 * This timer goes off when: 2611 * a. It is time to retransmit NS for resolver. 2612 * b. It is time to send reachability probes. 2613 */ 2614 void 2615 ndp_timer(void *arg) 2616 { 2617 nce_t *nce = arg; 2618 ill_t *ill = nce->nce_ill; 2619 char addrbuf[INET6_ADDRSTRLEN]; 2620 boolean_t dropped = B_FALSE; 2621 ip_stack_t *ipst = ill->ill_ipst; 2622 2623 /* 2624 * The timer has to be cancelled by ndp_delete before doing the final 2625 * refrele. So the NCE is guaranteed to exist when the timer runs 2626 * until it clears the timeout_id. Before clearing the timeout_id 2627 * bump up the refcnt so that we can continue to use the nce 2628 */ 2629 ASSERT(nce != NULL); 2630 2631 mutex_enter(&nce->nce_lock); 2632 NCE_REFHOLD_LOCKED(nce); 2633 nce->nce_timeout_id = 0; 2634 2635 /* 2636 * Check the reachability state first. 2637 */ 2638 switch (nce->nce_state) { 2639 case ND_DELAY: 2640 nce->nce_state = ND_PROBE; 2641 mutex_exit(&nce->nce_lock); 2642 (void) nce_xmit_solicit(nce, B_FALSE, &ipv6_all_zeros, 2643 NDP_UNICAST); 2644 if (ip_debug > 3) { 2645 /* ip2dbg */ 2646 pr_addr_dbg("ndp_timer: state for %s changed " 2647 "to PROBE\n", AF_INET6, &nce->nce_addr); 2648 } 2649 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2650 NCE_REFRELE(nce); 2651 return; 2652 case ND_PROBE: 2653 /* must be retransmit timer */ 2654 nce->nce_pcnt--; 2655 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2656 nce->nce_pcnt >= -1); 2657 if (nce->nce_pcnt > 0) { 2658 /* 2659 * As per RFC2461, the nce gets deleted after 2660 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2661 * Note that the first unicast solicitation is sent 2662 * during the DELAY state. 2663 */ 2664 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2665 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2666 addrbuf, sizeof (addrbuf)))); 2667 mutex_exit(&nce->nce_lock); 2668 dropped = nce_xmit_solicit(nce, B_FALSE, 2669 &ipv6_all_zeros, 2670 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2671 NDP_UNICAST); 2672 if (dropped) { 2673 mutex_enter(&nce->nce_lock); 2674 nce->nce_pcnt++; 2675 mutex_exit(&nce->nce_lock); 2676 } 2677 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2678 } else if (nce->nce_pcnt < 0) { 2679 /* No hope, delete the nce */ 2680 nce->nce_state = ND_UNREACHABLE; 2681 mutex_exit(&nce->nce_lock); 2682 if (ip_debug > 2) { 2683 /* ip1dbg */ 2684 pr_addr_dbg("ndp_timer: Delete IRE for" 2685 " dst %s\n", AF_INET6, &nce->nce_addr); 2686 } 2687 ndp_delete(nce); 2688 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2689 /* Wait RetransTimer, before deleting the entry */ 2690 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2691 nce->nce_pcnt, inet_ntop(AF_INET6, 2692 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2693 mutex_exit(&nce->nce_lock); 2694 /* Wait one interval before killing */ 2695 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2696 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2697 ipif_t *ipif; 2698 2699 /* 2700 * We're done probing, and we can now declare this 2701 * address to be usable. Let IP know that it's ok to 2702 * use. 2703 */ 2704 nce->nce_state = ND_REACHABLE; 2705 mutex_exit(&nce->nce_lock); 2706 ipif = ip_ndp_lookup_addr_v6(&nce->nce_addr, 2707 nce->nce_ill); 2708 if (ipif != NULL) { 2709 if (ipif->ipif_was_dup) { 2710 char ibuf[LIFNAMSIZ + 10]; 2711 char sbuf[INET6_ADDRSTRLEN]; 2712 2713 ipif->ipif_was_dup = B_FALSE; 2714 (void) inet_ntop(AF_INET6, 2715 &ipif->ipif_v6lcl_addr, 2716 sbuf, sizeof (sbuf)); 2717 ipif_get_name(ipif, ibuf, 2718 sizeof (ibuf)); 2719 cmn_err(CE_NOTE, "recovered address " 2720 "%s on %s", sbuf, ibuf); 2721 } 2722 if ((ipif->ipif_flags & IPIF_UP) && 2723 !ipif->ipif_addr_ready) 2724 ipif_up_notify(ipif); 2725 ipif->ipif_addr_ready = 1; 2726 ipif_refrele(ipif); 2727 } 2728 /* Begin defending our new address */ 2729 nce->nce_unsolicit_count = 0; 2730 dropped = nce_xmit_advert(nce, B_FALSE, 2731 &ipv6_all_hosts_mcast, 0); 2732 if (dropped) { 2733 nce->nce_unsolicit_count = 1; 2734 NDP_RESTART_TIMER(nce, 2735 ipst->ips_ip_ndp_unsolicit_interval); 2736 } else if (ipst->ips_ip_ndp_defense_interval != 0) { 2737 NDP_RESTART_TIMER(nce, 2738 ipst->ips_ip_ndp_defense_interval); 2739 } 2740 } else { 2741 /* 2742 * This is an address we're probing to be our own, but 2743 * the ill is down. Wait until it comes back before 2744 * doing anything, but switch to reachable state so 2745 * that the restart will work. 2746 */ 2747 nce->nce_state = ND_REACHABLE; 2748 mutex_exit(&nce->nce_lock); 2749 } 2750 NCE_REFRELE(nce); 2751 return; 2752 case ND_INCOMPLETE: { 2753 ip6_t *ip6h; 2754 ip6i_t *ip6i; 2755 mblk_t *mp, *datamp, *nextmp, **prevmpp; 2756 2757 /* 2758 * Per case (2) in the nce_queue_mp() comments, scan nce_qd_mp 2759 * for any IPMP probe packets, and toss 'em. IPMP probe 2760 * packets will always be at the head of nce_qd_mp and always 2761 * have an ip6i_t header, so we can stop at the first queued 2762 * ND packet without an ip6i_t. 2763 */ 2764 prevmpp = &nce->nce_qd_mp; 2765 for (mp = nce->nce_qd_mp; mp != NULL; mp = nextmp) { 2766 nextmp = mp->b_next; 2767 datamp = (DB_TYPE(mp) == M_CTL) ? mp->b_cont : mp; 2768 ip6h = (ip6_t *)datamp->b_rptr; 2769 if (ip6h->ip6_nxt != IPPROTO_RAW) 2770 break; 2771 2772 ip6i = (ip6i_t *)ip6h; 2773 if (ip6i->ip6i_flags & IP6I_IPMP_PROBE) { 2774 inet_freemsg(mp); 2775 *prevmpp = nextmp; 2776 } else { 2777 prevmpp = &mp->b_next; 2778 } 2779 } 2780 ip_ndp_resolve(nce); 2781 mutex_exit(&nce->nce_lock); 2782 NCE_REFRELE(nce); 2783 break; 2784 } 2785 case ND_REACHABLE: 2786 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2787 nce->nce_unsolicit_count != 0) || 2788 ((nce->nce_flags & NCE_F_PERMANENT) && 2789 ipst->ips_ip_ndp_defense_interval != 0)) { 2790 if (nce->nce_unsolicit_count > 0) 2791 nce->nce_unsolicit_count--; 2792 mutex_exit(&nce->nce_lock); 2793 dropped = nce_xmit_advert(nce, B_FALSE, 2794 &ipv6_all_hosts_mcast, 0); 2795 if (dropped) { 2796 mutex_enter(&nce->nce_lock); 2797 nce->nce_unsolicit_count++; 2798 mutex_exit(&nce->nce_lock); 2799 } 2800 if (nce->nce_unsolicit_count != 0) { 2801 NDP_RESTART_TIMER(nce, 2802 ipst->ips_ip_ndp_unsolicit_interval); 2803 } else { 2804 NDP_RESTART_TIMER(nce, 2805 ipst->ips_ip_ndp_defense_interval); 2806 } 2807 } else { 2808 mutex_exit(&nce->nce_lock); 2809 } 2810 NCE_REFRELE(nce); 2811 break; 2812 default: 2813 mutex_exit(&nce->nce_lock); 2814 NCE_REFRELE(nce); 2815 break; 2816 } 2817 } 2818 2819 /* 2820 * Set a link layer address from the ll_addr passed in. 2821 * Copy SAP from ill. 2822 */ 2823 static void 2824 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2825 { 2826 ill_t *ill = nce->nce_ill; 2827 uchar_t *woffset; 2828 2829 ASSERT(ll_addr != NULL); 2830 /* Always called before fast_path_probe */ 2831 ASSERT(nce->nce_fp_mp == NULL); 2832 if (ill->ill_sap_length != 0) { 2833 /* 2834 * Copy the SAP type specified in the 2835 * request into the xmit template. 2836 */ 2837 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2838 } 2839 if (ill->ill_phys_addr_length > 0) { 2840 /* 2841 * The bcopy() below used to be called for the physical address 2842 * length rather than the link layer address length. For 2843 * ethernet and many other media, the phys_addr and lla are 2844 * identical. 2845 * However, with xresolv interfaces being introduced, the 2846 * phys_addr and lla are no longer the same, and the physical 2847 * address may not have any useful meaning, so we use the lla 2848 * for IPv6 address resolution and destination addressing. 2849 * 2850 * For PPP or other interfaces with a zero length 2851 * physical address, don't do anything here. 2852 * The bcopy() with a zero phys_addr length was previously 2853 * a no-op for interfaces with a zero-length physical address. 2854 * Using the lla for them would change the way they operate. 2855 * Doing nothing in such cases preserves expected behavior. 2856 */ 2857 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2858 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2859 } 2860 } 2861 2862 static boolean_t 2863 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2864 { 2865 ill_t *ill = nce->nce_ill; 2866 uchar_t *ll_offset; 2867 2868 ASSERT(nce->nce_res_mp != NULL); 2869 if (ll_addr == NULL) 2870 return (B_FALSE); 2871 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2872 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2873 return (B_TRUE); 2874 return (B_FALSE); 2875 } 2876 2877 /* 2878 * Updates the link layer address or the reachability state of 2879 * a cache entry. Reset probe counter if needed. 2880 */ 2881 static void 2882 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2883 { 2884 ill_t *ill = nce->nce_ill; 2885 boolean_t need_stop_timer = B_FALSE; 2886 boolean_t need_fastpath_update = B_FALSE; 2887 2888 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2889 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2890 /* 2891 * If this interface does not do NUD, there is no point 2892 * in allowing an update to the cache entry. Although 2893 * we will respond to NS. 2894 * The only time we accept an update for a resolver when 2895 * NUD is turned off is when it has just been created. 2896 * Non-Resolvers will always be created as REACHABLE. 2897 */ 2898 if (new_state != ND_UNCHANGED) { 2899 if ((nce->nce_flags & NCE_F_NONUD) && 2900 (nce->nce_state != ND_INCOMPLETE)) 2901 return; 2902 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2903 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2904 need_stop_timer = B_TRUE; 2905 if (new_state == ND_REACHABLE) 2906 nce->nce_last = TICK_TO_MSEC(lbolt64); 2907 else { 2908 /* We force NUD in this case */ 2909 nce->nce_last = 0; 2910 } 2911 nce->nce_state = new_state; 2912 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2913 } 2914 /* 2915 * In case of fast path we need to free the the fastpath 2916 * M_DATA and do another probe. Otherwise we can just 2917 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2918 * whatever packets that happens to be transmitting at the time. 2919 */ 2920 if (new_ll_addr != NULL) { 2921 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2922 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2923 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2924 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2925 if (nce->nce_fp_mp != NULL) { 2926 freemsg(nce->nce_fp_mp); 2927 nce->nce_fp_mp = NULL; 2928 } 2929 need_fastpath_update = B_TRUE; 2930 } 2931 mutex_exit(&nce->nce_lock); 2932 if (need_stop_timer) { 2933 (void) untimeout(nce->nce_timeout_id); 2934 nce->nce_timeout_id = 0; 2935 } 2936 if (need_fastpath_update) 2937 nce_fastpath(nce); 2938 mutex_enter(&nce->nce_lock); 2939 } 2940 2941 void 2942 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2943 { 2944 uint_t count = 0; 2945 mblk_t **mpp, *tmp; 2946 2947 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2948 2949 for (mpp = &nce->nce_qd_mp; *mpp != NULL; mpp = &(*mpp)->b_next) { 2950 if (++count > nce->nce_ill->ill_max_buf) { 2951 tmp = nce->nce_qd_mp->b_next; 2952 nce->nce_qd_mp->b_next = NULL; 2953 nce->nce_qd_mp->b_prev = NULL; 2954 freemsg(nce->nce_qd_mp); 2955 nce->nce_qd_mp = tmp; 2956 } 2957 } 2958 2959 if (head_insert) { 2960 mp->b_next = nce->nce_qd_mp; 2961 nce->nce_qd_mp = mp; 2962 } else { 2963 *mpp = mp; 2964 } 2965 } 2966 2967 static void 2968 nce_queue_mp(nce_t *nce, mblk_t *mp) 2969 { 2970 boolean_t head_insert = B_FALSE; 2971 ip6_t *ip6h; 2972 ip6i_t *ip6i; 2973 mblk_t *data_mp; 2974 2975 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2976 2977 if (mp->b_datap->db_type == M_CTL) 2978 data_mp = mp->b_cont; 2979 else 2980 data_mp = mp; 2981 ip6h = (ip6_t *)data_mp->b_rptr; 2982 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2983 /* 2984 * This message should have been pulled up already in 2985 * ip_wput_v6. We can't do pullups here because the message 2986 * could be from the nce_qd_mp which could have b_next/b_prev 2987 * non-NULL. 2988 */ 2989 ip6i = (ip6i_t *)ip6h; 2990 ASSERT(MBLKL(data_mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN); 2991 2992 /* 2993 * If this packet is marked IP6I_IPMP_PROBE, then we need to: 2994 * 2995 * 1. Insert it at the head of the nce_qd_mp list. Consider 2996 * the normal (non-probe) load-speading case where the 2997 * source address of the ND packet is not tied to nce_ill. 2998 * If the ill bound to the source address cannot receive, 2999 * the response to the ND packet will not be received. 3000 * However, if ND packets for nce_ill's probes are queued 3001 * behind that ND packet, those probes will also fail to 3002 * be sent, and thus in.mpathd will erroneously conclude 3003 * that nce_ill has also failed. 3004 * 3005 * 2. Drop the probe packet in ndp_timer() if the ND did 3006 * not succeed on the first attempt. This ensures that 3007 * ND problems do not manifest as probe RTT spikes. 3008 */ 3009 if (ip6i->ip6i_flags & IP6I_IPMP_PROBE) 3010 head_insert = B_TRUE; 3011 } 3012 nce_queue_mp_common(nce, mp, head_insert); 3013 } 3014 3015 /* 3016 * Called when address resolution failed due to a timeout. 3017 * Send an ICMP unreachable in response to all queued packets. 3018 */ 3019 void 3020 nce_resolv_failed(nce_t *nce) 3021 { 3022 mblk_t *mp, *nxt_mp, *first_mp; 3023 char buf[INET6_ADDRSTRLEN]; 3024 ip6_t *ip6h; 3025 zoneid_t zoneid = GLOBAL_ZONEID; 3026 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3027 3028 ip1dbg(("nce_resolv_failed: dst %s\n", 3029 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 3030 mutex_enter(&nce->nce_lock); 3031 mp = nce->nce_qd_mp; 3032 nce->nce_qd_mp = NULL; 3033 mutex_exit(&nce->nce_lock); 3034 while (mp != NULL) { 3035 nxt_mp = mp->b_next; 3036 mp->b_next = NULL; 3037 mp->b_prev = NULL; 3038 3039 first_mp = mp; 3040 if (mp->b_datap->db_type == M_CTL) { 3041 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 3042 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3043 zoneid = io->ipsec_out_zoneid; 3044 ASSERT(zoneid != ALL_ZONES); 3045 mp = mp->b_cont; 3046 mp->b_next = NULL; 3047 mp->b_prev = NULL; 3048 } 3049 3050 ip6h = (ip6_t *)mp->b_rptr; 3051 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3052 ip6i_t *ip6i; 3053 /* 3054 * This message should have been pulled up already 3055 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 3056 * the header is pulled up. 3057 */ 3058 ip6i = (ip6i_t *)ip6h; 3059 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 3060 sizeof (ip6i_t) + IPV6_HDR_LEN); 3061 mp->b_rptr += sizeof (ip6i_t); 3062 } 3063 /* 3064 * Ignore failure since icmp_unreachable_v6 will silently 3065 * drop packets with an unspecified source address. 3066 */ 3067 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst); 3068 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 3069 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst); 3070 mp = nxt_mp; 3071 } 3072 nce_cb_dispatch(nce); 3073 } 3074 3075 /* 3076 * Called by SIOCSNDP* ioctl to add/change an nce entry 3077 * and the corresponding attributes. 3078 * Disallow states other than ND_REACHABLE or ND_STALE. 3079 */ 3080 int 3081 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 3082 { 3083 sin6_t *sin6; 3084 in6_addr_t *addr; 3085 nce_t *nce; 3086 int err; 3087 uint16_t new_flags = 0; 3088 uint16_t old_flags = 0; 3089 int inflags = lnr->lnr_flags; 3090 ip_stack_t *ipst = ill->ill_ipst; 3091 3092 ASSERT(ill->ill_isv6); 3093 if ((lnr->lnr_state_create != ND_REACHABLE) && 3094 (lnr->lnr_state_create != ND_STALE)) 3095 return (EINVAL); 3096 3097 if (lnr->lnr_hdw_len > ND_MAX_HDW_LEN) 3098 return (EINVAL); 3099 3100 sin6 = (sin6_t *)&lnr->lnr_addr; 3101 addr = &sin6->sin6_addr; 3102 3103 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 3104 /* We know it can not be mapping so just look in the hash table */ 3105 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 3106 /* See comment in ndp_query() regarding IS_IPMP(ill) usage */ 3107 nce = nce_lookup_addr(ill, IS_IPMP(ill), addr, nce); 3108 if (nce != NULL) 3109 new_flags = nce->nce_flags; 3110 3111 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 3112 case NDF_ISROUTER_ON: 3113 new_flags |= NCE_F_ISROUTER; 3114 break; 3115 case NDF_ISROUTER_OFF: 3116 new_flags &= ~NCE_F_ISROUTER; 3117 break; 3118 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3119 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3120 if (nce != NULL) 3121 NCE_REFRELE(nce); 3122 return (EINVAL); 3123 } 3124 3125 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3126 case NDF_ANYCAST_ON: 3127 new_flags |= NCE_F_ANYCAST; 3128 break; 3129 case NDF_ANYCAST_OFF: 3130 new_flags &= ~NCE_F_ANYCAST; 3131 break; 3132 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3133 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3134 if (nce != NULL) 3135 NCE_REFRELE(nce); 3136 return (EINVAL); 3137 } 3138 3139 if (nce == NULL) { 3140 err = ndp_add_v6(ill, 3141 (uchar_t *)lnr->lnr_hdw_addr, 3142 addr, 3143 &ipv6_all_ones, 3144 &ipv6_all_zeros, 3145 0, 3146 new_flags, 3147 lnr->lnr_state_create, 3148 &nce); 3149 if (err != 0) { 3150 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3151 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3152 return (err); 3153 } 3154 } 3155 old_flags = nce->nce_flags; 3156 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3157 /* 3158 * Router turned to host, delete all ires. 3159 * XXX Just delete the entry, but we need to add too. 3160 */ 3161 nce->nce_flags &= ~NCE_F_ISROUTER; 3162 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3163 ndp_delete(nce); 3164 NCE_REFRELE(nce); 3165 return (0); 3166 } 3167 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3168 3169 mutex_enter(&nce->nce_lock); 3170 nce->nce_flags = new_flags; 3171 mutex_exit(&nce->nce_lock); 3172 /* 3173 * Note that we ignore the state at this point, which 3174 * should be either STALE or REACHABLE. Instead we let 3175 * the link layer address passed in to determine the state 3176 * much like incoming packets. 3177 */ 3178 nce_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3179 NCE_REFRELE(nce); 3180 return (0); 3181 } 3182 3183 /* 3184 * If the device driver supports it, we make nce_fp_mp to have 3185 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3186 * The caller ensures there is hold on nce for this function. 3187 * Note that since ill_fastpath_probe() copies the mblk there is 3188 * no need for the hold beyond this function. 3189 */ 3190 void 3191 nce_fastpath(nce_t *nce) 3192 { 3193 ill_t *ill = nce->nce_ill; 3194 int res; 3195 3196 ASSERT(ill != NULL); 3197 ASSERT(nce->nce_state != ND_INITIAL && nce->nce_state != ND_INCOMPLETE); 3198 3199 if (nce->nce_fp_mp != NULL) { 3200 /* Already contains fastpath info */ 3201 return; 3202 } 3203 if (nce->nce_res_mp != NULL) { 3204 nce_fastpath_list_add(nce); 3205 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3206 /* 3207 * EAGAIN is an indication of a transient error 3208 * i.e. allocation failure etc. leave the nce in the list it 3209 * will be updated when another probe happens for another ire 3210 * if not it will be taken out of the list when the ire is 3211 * deleted. 3212 */ 3213 3214 if (res != 0 && res != EAGAIN) 3215 nce_fastpath_list_delete(nce); 3216 } 3217 } 3218 3219 /* 3220 * Drain the list of nce's waiting for fastpath response. 3221 */ 3222 void 3223 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3224 void *arg) 3225 { 3226 3227 nce_t *next_nce; 3228 nce_t *current_nce; 3229 nce_t *first_nce; 3230 nce_t *prev_nce = NULL; 3231 3232 mutex_enter(&ill->ill_lock); 3233 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3234 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3235 next_nce = current_nce->nce_fastpath; 3236 /* 3237 * Take it off the list if we're flushing, or if the callback 3238 * routine tells us to do so. Otherwise, leave the nce in the 3239 * fastpath list to handle any pending response from the lower 3240 * layer. We can't drain the list when the callback routine 3241 * comparison failed, because the response is asynchronous in 3242 * nature, and may not arrive in the same order as the list 3243 * insertion. 3244 */ 3245 if (func == NULL || func(current_nce, arg)) { 3246 current_nce->nce_fastpath = NULL; 3247 if (current_nce == first_nce) 3248 ill->ill_fastpath_list = first_nce = next_nce; 3249 else 3250 prev_nce->nce_fastpath = next_nce; 3251 } else { 3252 /* previous element that is still in the list */ 3253 prev_nce = current_nce; 3254 } 3255 current_nce = next_nce; 3256 } 3257 mutex_exit(&ill->ill_lock); 3258 } 3259 3260 /* 3261 * Add nce to the nce fastpath list. 3262 */ 3263 void 3264 nce_fastpath_list_add(nce_t *nce) 3265 { 3266 ill_t *ill; 3267 3268 ill = nce->nce_ill; 3269 3270 mutex_enter(&ill->ill_lock); 3271 mutex_enter(&nce->nce_lock); 3272 3273 /* 3274 * if nce has not been deleted and 3275 * is not already in the list add it. 3276 */ 3277 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3278 (nce->nce_fastpath == NULL)) { 3279 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3280 ill->ill_fastpath_list = nce; 3281 } 3282 3283 mutex_exit(&nce->nce_lock); 3284 mutex_exit(&ill->ill_lock); 3285 } 3286 3287 /* 3288 * remove nce from the nce fastpath list. 3289 */ 3290 void 3291 nce_fastpath_list_delete(nce_t *nce) 3292 { 3293 nce_t *nce_ptr; 3294 3295 ill_t *ill; 3296 3297 ill = nce->nce_ill; 3298 ASSERT(ill != NULL); 3299 3300 mutex_enter(&ill->ill_lock); 3301 if (nce->nce_fastpath == NULL) 3302 goto done; 3303 3304 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3305 3306 if (ill->ill_fastpath_list == nce) { 3307 ill->ill_fastpath_list = nce->nce_fastpath; 3308 } else { 3309 nce_ptr = ill->ill_fastpath_list; 3310 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3311 if (nce_ptr->nce_fastpath == nce) { 3312 nce_ptr->nce_fastpath = nce->nce_fastpath; 3313 break; 3314 } 3315 nce_ptr = nce_ptr->nce_fastpath; 3316 } 3317 } 3318 3319 nce->nce_fastpath = NULL; 3320 done: 3321 mutex_exit(&ill->ill_lock); 3322 } 3323 3324 /* 3325 * Update all NCE's that are not in fastpath mode and 3326 * have an nce_fp_mp that matches mp. mp->b_cont contains 3327 * the fastpath header. 3328 * 3329 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3330 */ 3331 boolean_t 3332 ndp_fastpath_update(nce_t *nce, void *arg) 3333 { 3334 mblk_t *mp, *fp_mp; 3335 uchar_t *mp_rptr, *ud_mp_rptr; 3336 mblk_t *ud_mp = nce->nce_res_mp; 3337 ptrdiff_t cmplen; 3338 3339 if (nce->nce_flags & NCE_F_MAPPING) 3340 return (B_TRUE); 3341 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3342 return (B_TRUE); 3343 3344 ip2dbg(("ndp_fastpath_update: trying\n")); 3345 mp = (mblk_t *)arg; 3346 mp_rptr = mp->b_rptr; 3347 cmplen = mp->b_wptr - mp_rptr; 3348 ASSERT(cmplen >= 0); 3349 ud_mp_rptr = ud_mp->b_rptr; 3350 /* 3351 * The nce is locked here to prevent any other threads 3352 * from accessing and changing nce_res_mp when the IPv6 address 3353 * becomes resolved to an lla while we're in the middle 3354 * of looking at and comparing the hardware address (lla). 3355 * It is also locked to prevent multiple threads in nce_fastpath_update 3356 * from examining nce_res_mp atthe same time. 3357 */ 3358 mutex_enter(&nce->nce_lock); 3359 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3360 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3361 mutex_exit(&nce->nce_lock); 3362 /* 3363 * Don't take the ire off the fastpath list yet, 3364 * since the response may come later. 3365 */ 3366 return (B_FALSE); 3367 } 3368 /* Matched - install mp as the fastpath mp */ 3369 ip1dbg(("ndp_fastpath_update: match\n")); 3370 fp_mp = dupb(mp->b_cont); 3371 if (fp_mp != NULL) { 3372 nce->nce_fp_mp = fp_mp; 3373 } 3374 mutex_exit(&nce->nce_lock); 3375 return (B_TRUE); 3376 } 3377 3378 /* 3379 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3380 * driver. Note that it assumes IP is exclusive... 3381 */ 3382 /* ARGSUSED */ 3383 void 3384 ndp_fastpath_flush(nce_t *nce, char *arg) 3385 { 3386 if (nce->nce_flags & NCE_F_MAPPING) 3387 return; 3388 /* No fastpath info? */ 3389 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3390 return; 3391 3392 if (nce->nce_ipversion == IPV4_VERSION && 3393 nce->nce_flags & NCE_F_BCAST) { 3394 /* 3395 * IPv4 BROADCAST entries: 3396 * We can't delete the nce since it is difficult to 3397 * recreate these without going through the 3398 * ipif down/up dance. 3399 * 3400 * All access to nce->nce_fp_mp in the case of these 3401 * is protected by nce_lock. 3402 */ 3403 mutex_enter(&nce->nce_lock); 3404 if (nce->nce_fp_mp != NULL) { 3405 freeb(nce->nce_fp_mp); 3406 nce->nce_fp_mp = NULL; 3407 mutex_exit(&nce->nce_lock); 3408 nce_fastpath(nce); 3409 } else { 3410 mutex_exit(&nce->nce_lock); 3411 } 3412 } else { 3413 /* Just delete the NCE... */ 3414 ndp_delete(nce); 3415 } 3416 } 3417 3418 /* 3419 * Return a pointer to a given option in the packet. 3420 * Assumes that option part of the packet have already been validated. 3421 */ 3422 nd_opt_hdr_t * 3423 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3424 { 3425 while (optlen > 0) { 3426 if (opt->nd_opt_type == opt_type) 3427 return (opt); 3428 optlen -= 8 * opt->nd_opt_len; 3429 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3430 } 3431 return (NULL); 3432 } 3433 3434 /* 3435 * Verify all option lengths present are > 0, also check to see 3436 * if the option lengths and packet length are consistent. 3437 */ 3438 boolean_t 3439 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3440 { 3441 ASSERT(opt != NULL); 3442 while (optlen > 0) { 3443 if (opt->nd_opt_len == 0) 3444 return (B_FALSE); 3445 optlen -= 8 * opt->nd_opt_len; 3446 if (optlen < 0) 3447 return (B_FALSE); 3448 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3449 } 3450 return (B_TRUE); 3451 } 3452 3453 /* 3454 * ndp_walk function. 3455 * Free a fraction of the NCE cache entries. 3456 * A fraction of zero means to not free any in that category. 3457 */ 3458 void 3459 ndp_cache_reclaim(nce_t *nce, char *arg) 3460 { 3461 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3462 uint_t rand; 3463 3464 if (nce->nce_flags & NCE_F_PERMANENT) 3465 return; 3466 3467 rand = (uint_t)lbolt + 3468 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3469 if (ncr->ncr_host != 0 && 3470 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3471 ndp_delete(nce); 3472 return; 3473 } 3474 } 3475 3476 /* 3477 * ndp_walk function. 3478 * Count the number of NCEs that can be deleted. 3479 * These would be hosts but not routers. 3480 */ 3481 void 3482 ndp_cache_count(nce_t *nce, char *arg) 3483 { 3484 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3485 3486 if (nce->nce_flags & NCE_F_PERMANENT) 3487 return; 3488 3489 ncc->ncc_total++; 3490 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3491 ncc->ncc_host++; 3492 } 3493 3494 #ifdef DEBUG 3495 void 3496 nce_trace_ref(nce_t *nce) 3497 { 3498 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3499 3500 if (nce->nce_trace_disable) 3501 return; 3502 3503 if (!th_trace_ref(nce, nce->nce_ill->ill_ipst)) { 3504 nce->nce_trace_disable = B_TRUE; 3505 nce_trace_cleanup(nce); 3506 } 3507 } 3508 3509 void 3510 nce_untrace_ref(nce_t *nce) 3511 { 3512 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3513 3514 if (!nce->nce_trace_disable) 3515 th_trace_unref(nce); 3516 } 3517 3518 static void 3519 nce_trace_cleanup(const nce_t *nce) 3520 { 3521 th_trace_cleanup(nce, nce->nce_trace_disable); 3522 } 3523 #endif 3524 3525 /* 3526 * Called when address resolution fails due to a timeout. 3527 * Send an ICMP unreachable in response to all queued packets. 3528 */ 3529 void 3530 arp_resolv_failed(nce_t *nce) 3531 { 3532 mblk_t *mp, *nxt_mp, *first_mp; 3533 char buf[INET6_ADDRSTRLEN]; 3534 zoneid_t zoneid = GLOBAL_ZONEID; 3535 struct in_addr ipv4addr; 3536 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3537 3538 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3539 ip3dbg(("arp_resolv_failed: dst %s\n", 3540 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3541 mutex_enter(&nce->nce_lock); 3542 mp = nce->nce_qd_mp; 3543 nce->nce_qd_mp = NULL; 3544 mutex_exit(&nce->nce_lock); 3545 3546 while (mp != NULL) { 3547 nxt_mp = mp->b_next; 3548 mp->b_next = NULL; 3549 mp->b_prev = NULL; 3550 3551 first_mp = mp; 3552 /* 3553 * Send icmp unreachable messages 3554 * to the hosts. 3555 */ 3556 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst); 3557 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3558 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3559 ICMP_HOST_UNREACHABLE, zoneid, ipst); 3560 mp = nxt_mp; 3561 } 3562 } 3563 3564 int 3565 ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3566 nce_t **newnce, nce_t *src_nce) 3567 { 3568 int err; 3569 nce_t *nce; 3570 in6_addr_t addr6; 3571 ip_stack_t *ipst = ill->ill_ipst; 3572 3573 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3574 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3575 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3576 /* 3577 * NOTE: IPv4 never matches across the illgrp since the NCE's we're 3578 * looking up have fastpath headers that are inherently per-ill. 3579 */ 3580 nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce); 3581 if (nce == NULL) { 3582 err = ndp_add_v4(ill, addr, flags, newnce, src_nce); 3583 } else { 3584 *newnce = nce; 3585 err = EEXIST; 3586 } 3587 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3588 return (err); 3589 } 3590 3591 /* 3592 * NDP Cache Entry creation routine for IPv4. 3593 * Mapped entries are handled in arp. 3594 * This routine must always be called with ndp4->ndp_g_lock held. 3595 * Prior to return, nce_refcnt is incremented. 3596 */ 3597 static int 3598 ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3599 nce_t **newnce, nce_t *src_nce) 3600 { 3601 static nce_t nce_nil; 3602 nce_t *nce; 3603 mblk_t *mp; 3604 mblk_t *template = NULL; 3605 nce_t **ncep; 3606 ip_stack_t *ipst = ill->ill_ipst; 3607 uint16_t state = ND_INITIAL; 3608 int err; 3609 3610 ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock)); 3611 ASSERT(!ill->ill_isv6); 3612 ASSERT((flags & NCE_F_MAPPING) == 0); 3613 3614 if (ill->ill_resolver_mp == NULL) 3615 return (EINVAL); 3616 /* 3617 * Allocate the mblk to hold the nce. 3618 */ 3619 mp = allocb(sizeof (nce_t), BPRI_MED); 3620 if (mp == NULL) 3621 return (ENOMEM); 3622 3623 nce = (nce_t *)mp->b_rptr; 3624 mp->b_wptr = (uchar_t *)&nce[1]; 3625 *nce = nce_nil; 3626 nce->nce_ill = ill; 3627 nce->nce_ipversion = IPV4_VERSION; 3628 nce->nce_flags = flags; 3629 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3630 nce->nce_rcnt = ill->ill_xmit_count; 3631 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3632 nce->nce_mask = ipv6_all_ones; 3633 nce->nce_extract_mask = ipv6_all_zeros; 3634 nce->nce_ll_extract_start = 0; 3635 nce->nce_qd_mp = NULL; 3636 nce->nce_mp = mp; 3637 /* This one is for nce getting created */ 3638 nce->nce_refcnt = 1; 3639 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3640 ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3641 3642 nce->nce_trace_disable = B_FALSE; 3643 3644 if (src_nce != NULL) { 3645 /* 3646 * src_nce has been provided by the caller. The only 3647 * caller who provides a non-null, non-broadcast 3648 * src_nce is from ip_newroute() which must pass in 3649 * a ND_REACHABLE src_nce (this condition is verified 3650 * via an ASSERT for the save_ire->ire_nce in ip_newroute()) 3651 */ 3652 mutex_enter(&src_nce->nce_lock); 3653 state = src_nce->nce_state; 3654 if ((src_nce->nce_flags & NCE_F_CONDEMNED) || 3655 (ipst->ips_ndp4->ndp_g_hw_change > 0)) { 3656 /* 3657 * src_nce has been deleted, or 3658 * ip_arp_news is in the middle of 3659 * flushing entries in the the nce. 3660 * Fail the add, since we don't know 3661 * if it is safe to copy the contents of 3662 * src_nce 3663 */ 3664 DTRACE_PROBE2(nce__bad__src__nce, 3665 nce_t *, src_nce, ill_t *, ill); 3666 mutex_exit(&src_nce->nce_lock); 3667 err = EINVAL; 3668 goto err_ret; 3669 } 3670 template = copyb(src_nce->nce_res_mp); 3671 mutex_exit(&src_nce->nce_lock); 3672 if (template == NULL) { 3673 err = ENOMEM; 3674 goto err_ret; 3675 } 3676 } else if (flags & NCE_F_BCAST) { 3677 /* 3678 * broadcast nce. 3679 */ 3680 template = copyb(ill->ill_bcast_mp); 3681 if (template == NULL) { 3682 err = ENOMEM; 3683 goto err_ret; 3684 } 3685 state = ND_REACHABLE; 3686 } else if (ill->ill_net_type == IRE_IF_NORESOLVER) { 3687 /* 3688 * NORESOLVER entries are always created in the REACHABLE 3689 * state. 3690 */ 3691 if (ill->ill_phys_addr_length == IP_ADDR_LEN && 3692 ill->ill_mactype != DL_IPV4 && 3693 ill->ill_mactype != DL_6TO4) { 3694 /* 3695 * We create a nce_res_mp with the IP nexthop address 3696 * as the destination address if the physical length 3697 * is exactly 4 bytes for point-to-multipoint links 3698 * that do their own resolution from IP to link-layer 3699 * address (e.g. IP over X.25). 3700 */ 3701 template = ill_dlur_gen((uchar_t *)addr, 3702 ill->ill_phys_addr_length, 3703 ill->ill_sap, ill->ill_sap_length); 3704 } else { 3705 template = copyb(ill->ill_resolver_mp); 3706 } 3707 if (template == NULL) { 3708 err = ENOMEM; 3709 goto err_ret; 3710 } 3711 state = ND_REACHABLE; 3712 } 3713 nce->nce_fp_mp = NULL; 3714 nce->nce_res_mp = template; 3715 nce->nce_state = state; 3716 if (state == ND_REACHABLE) { 3717 nce->nce_last = TICK_TO_MSEC(lbolt64); 3718 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3719 } else { 3720 nce->nce_last = 0; 3721 if (state == ND_INITIAL) 3722 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3723 } 3724 3725 ASSERT((nce->nce_res_mp == NULL && nce->nce_state == ND_INITIAL) || 3726 (nce->nce_res_mp != NULL && nce->nce_state == ND_REACHABLE)); 3727 /* 3728 * Atomically ensure that the ill is not CONDEMNED, before 3729 * adding the NCE. 3730 */ 3731 mutex_enter(&ill->ill_lock); 3732 if (ill->ill_state_flags & ILL_CONDEMNED) { 3733 mutex_exit(&ill->ill_lock); 3734 err = EINVAL; 3735 goto err_ret; 3736 } 3737 if ((nce->nce_next = *ncep) != NULL) 3738 nce->nce_next->nce_ptpn = &nce->nce_next; 3739 *ncep = nce; 3740 nce->nce_ptpn = ncep; 3741 *newnce = nce; 3742 /* This one is for nce being used by an active thread */ 3743 NCE_REFHOLD(*newnce); 3744 3745 /* Bump up the number of nce's referencing this ill */ 3746 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 3747 (char *), "nce", (void *), nce); 3748 ill->ill_nce_cnt++; 3749 mutex_exit(&ill->ill_lock); 3750 DTRACE_PROBE1(ndp__add__v4, nce_t *, nce); 3751 return (0); 3752 err_ret: 3753 freeb(mp); 3754 freemsg(template); 3755 return (err); 3756 } 3757 3758 /* 3759 * ndp_walk routine to delete all entries that have a given destination or 3760 * gateway address and cached link layer (MAC) address. This is used when ARP 3761 * informs us that a network-to-link-layer mapping may have changed. 3762 */ 3763 void 3764 nce_delete_hw_changed(nce_t *nce, void *arg) 3765 { 3766 nce_hw_map_t *hwm = arg; 3767 mblk_t *mp; 3768 dl_unitdata_req_t *dlu; 3769 uchar_t *macaddr; 3770 ill_t *ill; 3771 int saplen; 3772 ipaddr_t nce_addr; 3773 3774 if (nce->nce_state != ND_REACHABLE) 3775 return; 3776 3777 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3778 if (nce_addr != hwm->hwm_addr) 3779 return; 3780 3781 mutex_enter(&nce->nce_lock); 3782 if ((mp = nce->nce_res_mp) == NULL) { 3783 mutex_exit(&nce->nce_lock); 3784 return; 3785 } 3786 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3787 macaddr = (uchar_t *)(dlu + 1); 3788 ill = nce->nce_ill; 3789 if ((saplen = ill->ill_sap_length) > 0) 3790 macaddr += saplen; 3791 else 3792 saplen = -saplen; 3793 3794 /* 3795 * If the hardware address is unchanged, then leave this one alone. 3796 * Note that saplen == abs(saplen) now. 3797 */ 3798 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 3799 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 3800 mutex_exit(&nce->nce_lock); 3801 return; 3802 } 3803 mutex_exit(&nce->nce_lock); 3804 3805 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 3806 ndp_delete(nce); 3807 } 3808 3809 /* 3810 * This function verifies whether a given IPv4 address is potentially known to 3811 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 3812 * so that it can continue to look for hardware changes on that address. 3813 */ 3814 boolean_t 3815 ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns) 3816 { 3817 nce_t *nce; 3818 struct in_addr nceaddr; 3819 ip_stack_t *ipst = ns->netstack_ip; 3820 3821 if (addr == INADDR_ANY) 3822 return (B_FALSE); 3823 3824 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3825 nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr); 3826 for (; nce != NULL; nce = nce->nce_next) { 3827 /* Note that only v4 mapped entries are in the table. */ 3828 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 3829 if (addr == nceaddr.s_addr && 3830 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 3831 /* Single flag check; no lock needed */ 3832 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 3833 break; 3834 } 3835 } 3836 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3837 return (nce != NULL); 3838 } 3839 3840 /* 3841 * Wrapper around ipif_lookup_addr_exact_v6() that allows ND to work properly 3842 * with IPMP. Specifically, since neighbor discovery is always done on 3843 * underlying interfaces (even for addresses owned by an IPMP interface), we 3844 * need to check for `v6addrp' on both `ill' and on the IPMP meta-interface 3845 * associated with `ill' (if it exists). 3846 */ 3847 static ipif_t * 3848 ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill) 3849 { 3850 ipif_t *ipif; 3851 ip_stack_t *ipst = ill->ill_ipst; 3852 3853 ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst); 3854 if (ipif == NULL && IS_UNDER_IPMP(ill)) { 3855 if ((ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) { 3856 ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst); 3857 ill_refrele(ill); 3858 } 3859 } 3860 return (ipif); 3861 } 3862