1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/stropts.h> 29 #include <sys/strsun.h> 30 #include <sys/sysmacros.h> 31 #include <sys/errno.h> 32 #include <sys/dlpi.h> 33 #include <sys/socket.h> 34 #include <sys/ddi.h> 35 #include <sys/sunddi.h> 36 #include <sys/cmn_err.h> 37 #include <sys/debug.h> 38 #include <sys/vtrace.h> 39 #include <sys/kmem.h> 40 #include <sys/zone.h> 41 #include <sys/ethernet.h> 42 #include <sys/sdt.h> 43 44 #include <net/if.h> 45 #include <net/if_types.h> 46 #include <net/if_dl.h> 47 #include <net/route.h> 48 #include <netinet/in.h> 49 #include <netinet/ip6.h> 50 #include <netinet/icmp6.h> 51 52 #include <inet/common.h> 53 #include <inet/mi.h> 54 #include <inet/mib2.h> 55 #include <inet/nd.h> 56 #include <inet/ip.h> 57 #include <inet/ip_impl.h> 58 #include <inet/ipclassifier.h> 59 #include <inet/ip_if.h> 60 #include <inet/ip_ire.h> 61 #include <inet/ip_rts.h> 62 #include <inet/ip6.h> 63 #include <inet/ip_ndp.h> 64 #include <inet/ipsec_impl.h> 65 #include <inet/ipsec_info.h> 66 #include <inet/sctp_ip.h> 67 #include <inet/ip2mac_impl.h> 68 69 /* 70 * Function names with nce_ prefix are static while function 71 * names with ndp_ prefix are used by rest of the IP. 72 * 73 * Lock ordering: 74 * 75 * ndp_g_lock -> ill_lock -> nce_lock 76 * 77 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 78 * nce_next. Nce_lock protects the contents of the NCE (particularly 79 * nce_refcnt). 80 */ 81 82 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 83 uint32_t ll_addr_len); 84 static void nce_ire_delete(nce_t *nce); 85 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 86 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 87 static nce_t *nce_lookup_addr(ill_t *, boolean_t, const in6_addr_t *, 88 nce_t *); 89 static nce_t *nce_lookup_mapping(ill_t *, const in6_addr_t *); 90 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 91 uchar_t *addr); 92 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 93 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 94 static mblk_t *nce_udreq_alloc(ill_t *ill); 95 static void nce_update(nce_t *nce, uint16_t new_state, 96 uchar_t *new_ll_addr); 97 static uint32_t nce_solicit(nce_t *nce, in6_addr_t src); 98 static boolean_t nce_xmit(ill_t *ill, uint8_t type, 99 boolean_t use_lla_addr, const in6_addr_t *sender, 100 const in6_addr_t *target, int flag); 101 static boolean_t nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, 102 const in6_addr_t *target, uint_t flags); 103 static boolean_t nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, 104 const in6_addr_t *src, uint_t flags); 105 static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t, 106 nce_t **, nce_t *); 107 static ipif_t *ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill); 108 109 #ifdef DEBUG 110 static void nce_trace_cleanup(const nce_t *); 111 #endif 112 113 #define NCE_HASH_PTR_V4(ipst, addr) \ 114 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 115 116 #define NCE_HASH_PTR_V6(ipst, addr) \ 117 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ 118 NCE_TABLE_SIZE)])) 119 120 /* Non-tunable probe interval, based on link capabilities */ 121 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 122 123 /* 124 * NDP Cache Entry creation routine. 125 * Mapped entries will never do NUD . 126 * This routine must always be called with ndp6->ndp_g_lock held. 127 * Prior to return, nce_refcnt is incremented. 128 */ 129 int 130 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 131 const in6_addr_t *mask, const in6_addr_t *extract_mask, 132 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 133 nce_t **newnce) 134 { 135 static nce_t nce_nil; 136 nce_t *nce; 137 mblk_t *mp; 138 mblk_t *template; 139 nce_t **ncep; 140 int err; 141 boolean_t dropped = B_FALSE; 142 ip_stack_t *ipst = ill->ill_ipst; 143 144 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 145 ASSERT(ill != NULL && ill->ill_isv6); 146 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 147 ip0dbg(("ndp_add_v6: no addr\n")); 148 return (EINVAL); 149 } 150 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 151 ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags)); 152 return (EINVAL); 153 } 154 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 155 (flags & NCE_F_MAPPING)) { 156 ip0dbg(("ndp_add_v6: extract mask zero for mapping")); 157 return (EINVAL); 158 } 159 /* 160 * Allocate the mblk to hold the nce. 161 * 162 * XXX This can come out of a separate cache - nce_cache. 163 * We don't need the mp anymore as there are no more 164 * "qwriter"s 165 */ 166 mp = allocb(sizeof (nce_t), BPRI_MED); 167 if (mp == NULL) 168 return (ENOMEM); 169 170 nce = (nce_t *)mp->b_rptr; 171 mp->b_wptr = (uchar_t *)&nce[1]; 172 *nce = nce_nil; 173 174 /* 175 * This one holds link layer address 176 */ 177 if (ill->ill_net_type == IRE_IF_RESOLVER) { 178 template = nce_udreq_alloc(ill); 179 } else { 180 if (ill->ill_resolver_mp == NULL) { 181 freeb(mp); 182 return (EINVAL); 183 } 184 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 185 template = copyb(ill->ill_resolver_mp); 186 } 187 if (template == NULL) { 188 freeb(mp); 189 return (ENOMEM); 190 } 191 nce->nce_ill = ill; 192 nce->nce_ipversion = IPV6_VERSION; 193 nce->nce_flags = flags; 194 nce->nce_state = state; 195 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 196 nce->nce_rcnt = ill->ill_xmit_count; 197 nce->nce_addr = *addr; 198 nce->nce_mask = *mask; 199 nce->nce_extract_mask = *extract_mask; 200 nce->nce_ll_extract_start = hw_extract_start; 201 nce->nce_fp_mp = NULL; 202 nce->nce_res_mp = template; 203 if (state == ND_REACHABLE) 204 nce->nce_last = TICK_TO_MSEC(lbolt64); 205 else 206 nce->nce_last = 0; 207 nce->nce_qd_mp = NULL; 208 nce->nce_mp = mp; 209 if (hw_addr != NULL) 210 nce_set_ll(nce, hw_addr); 211 /* This one is for nce getting created */ 212 nce->nce_refcnt = 1; 213 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 214 if (nce->nce_flags & NCE_F_MAPPING) { 215 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 216 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 217 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 218 ncep = &ipst->ips_ndp6->nce_mask_entries; 219 } else { 220 ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 221 } 222 223 nce->nce_trace_disable = B_FALSE; 224 225 list_create(&nce->nce_cb, sizeof (nce_cb_t), 226 offsetof(nce_cb_t, nce_cb_node)); 227 /* 228 * Atomically ensure that the ill is not CONDEMNED, before 229 * adding the NCE. 230 */ 231 mutex_enter(&ill->ill_lock); 232 if (ill->ill_state_flags & ILL_CONDEMNED) { 233 mutex_exit(&ill->ill_lock); 234 freeb(mp); 235 freeb(template); 236 return (EINVAL); 237 } 238 if ((nce->nce_next = *ncep) != NULL) 239 nce->nce_next->nce_ptpn = &nce->nce_next; 240 *ncep = nce; 241 nce->nce_ptpn = ncep; 242 *newnce = nce; 243 /* This one is for nce being used by an active thread */ 244 NCE_REFHOLD(*newnce); 245 246 /* Bump up the number of nce's referencing this ill */ 247 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 248 (char *), "nce", (void *), nce); 249 ill->ill_nce_cnt++; 250 mutex_exit(&ill->ill_lock); 251 252 err = 0; 253 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 254 mutex_enter(&nce->nce_lock); 255 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 256 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 257 mutex_exit(&nce->nce_lock); 258 dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE); 259 if (dropped) { 260 mutex_enter(&nce->nce_lock); 261 nce->nce_pcnt++; 262 mutex_exit(&nce->nce_lock); 263 } 264 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 265 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 266 err = EINPROGRESS; 267 } else if (flags & NCE_F_UNSOL_ADV) { 268 /* 269 * We account for the transmit below by assigning one 270 * less than the ndd variable. Subsequent decrements 271 * are done in ndp_timer. 272 */ 273 mutex_enter(&nce->nce_lock); 274 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 275 nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1; 276 mutex_exit(&nce->nce_lock); 277 dropped = nce_xmit_advert(nce, B_TRUE, &ipv6_all_hosts_mcast, 278 0); 279 mutex_enter(&nce->nce_lock); 280 if (dropped) 281 nce->nce_unsolicit_count++; 282 if (nce->nce_unsolicit_count != 0) { 283 ASSERT(nce->nce_timeout_id == 0); 284 nce->nce_timeout_id = timeout(ndp_timer, nce, 285 MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval)); 286 } 287 mutex_exit(&nce->nce_lock); 288 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 289 } 290 291 /* 292 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 293 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 294 * We call nce_fastpath from nce_update if the link layer address of 295 * the peer changes from nce_update 296 */ 297 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 298 nce_fastpath(nce); 299 return (err); 300 } 301 302 int 303 ndp_lookup_then_add_v6(ill_t *ill, boolean_t match_illgrp, uchar_t *hw_addr, 304 const in6_addr_t *addr, const in6_addr_t *mask, 305 const in6_addr_t *extract_mask, uint32_t hw_extract_start, uint16_t flags, 306 uint16_t state, nce_t **newnce) 307 { 308 int err = 0; 309 nce_t *nce; 310 ip_stack_t *ipst = ill->ill_ipst; 311 312 ASSERT(ill->ill_isv6); 313 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 314 315 /* Get head of v6 hash table */ 316 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 317 nce = nce_lookup_addr(ill, match_illgrp, addr, nce); 318 if (nce == NULL) { 319 err = ndp_add_v6(ill, 320 hw_addr, 321 addr, 322 mask, 323 extract_mask, 324 hw_extract_start, 325 flags, 326 state, 327 newnce); 328 } else { 329 *newnce = nce; 330 err = EEXIST; 331 } 332 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 333 return (err); 334 } 335 336 /* 337 * Remove all the CONDEMNED nces from the appropriate hash table. 338 * We create a private list of NCEs, these may have ires pointing 339 * to them, so the list will be passed through to clean up dependent 340 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 341 */ 342 static void 343 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 344 { 345 nce_t *nce1; 346 nce_t **ptpn; 347 348 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 349 ASSERT(ndp->ndp_g_walker == 0); 350 for (; nce; nce = nce1) { 351 nce1 = nce->nce_next; 352 mutex_enter(&nce->nce_lock); 353 if (nce->nce_flags & NCE_F_CONDEMNED) { 354 ptpn = nce->nce_ptpn; 355 nce1 = nce->nce_next; 356 if (nce1 != NULL) 357 nce1->nce_ptpn = ptpn; 358 *ptpn = nce1; 359 nce->nce_ptpn = NULL; 360 nce->nce_next = NULL; 361 nce->nce_next = *free_nce_list; 362 *free_nce_list = nce; 363 } 364 mutex_exit(&nce->nce_lock); 365 } 366 } 367 368 /* 369 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 370 * will return this NCE. Also no new IREs will be created that 371 * point to this NCE (See ire_add_v6). Also no new timeouts will 372 * be started (See NDP_RESTART_TIMER). 373 * 2. Cancel any currently running timeouts. 374 * 3. If there is an ndp walker, return. The walker will do the cleanup. 375 * This ensures that walkers see a consistent list of NCEs while walking. 376 * 4. Otherwise remove the NCE from the list of NCEs 377 * 5. Delete all IREs pointing to this NCE. 378 */ 379 void 380 ndp_delete(nce_t *nce) 381 { 382 nce_t **ptpn; 383 nce_t *nce1; 384 int ipversion = nce->nce_ipversion; 385 ndp_g_t *ndp; 386 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 387 388 if (ipversion == IPV4_VERSION) 389 ndp = ipst->ips_ndp4; 390 else 391 ndp = ipst->ips_ndp6; 392 393 /* Serialize deletes */ 394 mutex_enter(&nce->nce_lock); 395 if (nce->nce_flags & NCE_F_CONDEMNED) { 396 /* Some other thread is doing the delete */ 397 mutex_exit(&nce->nce_lock); 398 return; 399 } 400 /* 401 * Caller has a refhold. Also 1 ref for being in the list. Thus 402 * refcnt has to be >= 2 403 */ 404 ASSERT(nce->nce_refcnt >= 2); 405 nce->nce_flags |= NCE_F_CONDEMNED; 406 mutex_exit(&nce->nce_lock); 407 408 nce_fastpath_list_delete(nce); 409 410 /* Complete any waiting callbacks */ 411 nce_cb_dispatch(nce); 412 413 /* 414 * Cancel any running timer. Timeout can't be restarted 415 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 416 * Passing invalid timeout id is fine. 417 */ 418 if (nce->nce_timeout_id != 0) { 419 (void) untimeout(nce->nce_timeout_id); 420 nce->nce_timeout_id = 0; 421 } 422 423 mutex_enter(&ndp->ndp_g_lock); 424 if (nce->nce_ptpn == NULL) { 425 /* 426 * The last ndp walker has already removed this nce from 427 * the list after we marked the nce CONDEMNED and before 428 * we grabbed the global lock. 429 */ 430 mutex_exit(&ndp->ndp_g_lock); 431 return; 432 } 433 if (ndp->ndp_g_walker > 0) { 434 /* 435 * Can't unlink. The walker will clean up 436 */ 437 ndp->ndp_g_walker_cleanup = B_TRUE; 438 mutex_exit(&ndp->ndp_g_lock); 439 return; 440 } 441 442 /* 443 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 444 * the timer since it is marked CONDEMNED. 445 */ 446 ptpn = nce->nce_ptpn; 447 nce1 = nce->nce_next; 448 if (nce1 != NULL) 449 nce1->nce_ptpn = ptpn; 450 *ptpn = nce1; 451 nce->nce_ptpn = NULL; 452 nce->nce_next = NULL; 453 mutex_exit(&ndp->ndp_g_lock); 454 455 nce_ire_delete(nce); 456 } 457 458 void 459 ndp_inactive(nce_t *nce) 460 { 461 mblk_t **mpp; 462 ill_t *ill; 463 464 ASSERT(nce->nce_refcnt == 0); 465 ASSERT(MUTEX_HELD(&nce->nce_lock)); 466 ASSERT(nce->nce_fastpath == NULL); 467 468 /* Free all nce allocated messages */ 469 mpp = &nce->nce_first_mp_to_free; 470 do { 471 while (*mpp != NULL) { 472 mblk_t *mp; 473 474 mp = *mpp; 475 *mpp = mp->b_next; 476 477 inet_freemsg(mp); 478 } 479 } while (mpp++ != &nce->nce_last_mp_to_free); 480 481 if (nce->nce_ipversion == IPV6_VERSION) { 482 /* 483 * must have been cleaned up in nce_delete 484 */ 485 ASSERT(list_is_empty(&nce->nce_cb)); 486 list_destroy(&nce->nce_cb); 487 } 488 #ifdef DEBUG 489 nce_trace_cleanup(nce); 490 #endif 491 492 ill = nce->nce_ill; 493 mutex_enter(&ill->ill_lock); 494 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 495 (char *), "nce", (void *), nce); 496 ill->ill_nce_cnt--; 497 /* 498 * If the number of nce's associated with this ill have dropped 499 * to zero, check whether we need to restart any operation that 500 * is waiting for this to happen. 501 */ 502 if (ILL_DOWN_OK(ill)) { 503 /* ipif_ill_refrele_tail drops the ill_lock */ 504 ipif_ill_refrele_tail(ill); 505 } else { 506 mutex_exit(&ill->ill_lock); 507 } 508 mutex_destroy(&nce->nce_lock); 509 if (nce->nce_mp != NULL) 510 inet_freemsg(nce->nce_mp); 511 } 512 513 /* 514 * ndp_walk routine. Delete the nce if it is associated with the ill 515 * that is going away. Always called as a writer. 516 */ 517 void 518 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 519 { 520 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 521 ndp_delete(nce); 522 } 523 } 524 525 /* 526 * Walk a list of to be inactive NCEs and blow away all the ires. 527 */ 528 static void 529 nce_ire_delete_list(nce_t *nce) 530 { 531 nce_t *nce_next; 532 533 ASSERT(nce != NULL); 534 while (nce != NULL) { 535 nce_next = nce->nce_next; 536 nce->nce_next = NULL; 537 538 /* 539 * It is possible for the last ndp walker (this thread) 540 * to come here after ndp_delete has marked the nce CONDEMNED 541 * and before it has removed the nce from the fastpath list 542 * or called untimeout. So we need to do it here. It is safe 543 * for both ndp_delete and this thread to do it twice or 544 * even simultaneously since each of the threads has a 545 * reference on the nce. 546 */ 547 nce_fastpath_list_delete(nce); 548 /* 549 * Cancel any running timer. Timeout can't be restarted 550 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 551 * Passing invalid timeout id is fine. 552 */ 553 if (nce->nce_timeout_id != 0) { 554 (void) untimeout(nce->nce_timeout_id); 555 nce->nce_timeout_id = 0; 556 } 557 /* 558 * We might hit this func thus in the v4 case: 559 * ipif_down->ipif_ndp_down->ndp_walk 560 */ 561 562 if (nce->nce_ipversion == IPV4_VERSION) { 563 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 564 IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill); 565 } else { 566 ASSERT(nce->nce_ipversion == IPV6_VERSION); 567 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 568 IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill); 569 } 570 NCE_REFRELE_NOTR(nce); 571 nce = nce_next; 572 } 573 } 574 575 /* 576 * Delete an ire when the nce goes away. 577 */ 578 /* ARGSUSED */ 579 static void 580 nce_ire_delete(nce_t *nce) 581 { 582 if (nce->nce_ipversion == IPV6_VERSION) { 583 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 584 nce_ire_delete1, (char *)nce, nce->nce_ill); 585 NCE_REFRELE_NOTR(nce); 586 } else { 587 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 588 nce_ire_delete1, (char *)nce, nce->nce_ill); 589 NCE_REFRELE_NOTR(nce); 590 } 591 } 592 593 /* 594 * ire_walk routine used to delete every IRE that shares this nce 595 */ 596 static void 597 nce_ire_delete1(ire_t *ire, char *nce_arg) 598 { 599 nce_t *nce = (nce_t *)nce_arg; 600 601 ASSERT(ire->ire_type == IRE_CACHE); 602 603 if (ire->ire_nce == nce) { 604 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 605 ire_delete(ire); 606 } 607 } 608 609 /* 610 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 611 */ 612 boolean_t 613 ndp_restart_dad(nce_t *nce) 614 { 615 boolean_t started; 616 boolean_t dropped; 617 618 if (nce == NULL) 619 return (B_FALSE); 620 mutex_enter(&nce->nce_lock); 621 if (nce->nce_state == ND_PROBE) { 622 mutex_exit(&nce->nce_lock); 623 started = B_TRUE; 624 } else if (nce->nce_state == ND_REACHABLE) { 625 nce->nce_state = ND_PROBE; 626 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 627 mutex_exit(&nce->nce_lock); 628 dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE); 629 if (dropped) { 630 mutex_enter(&nce->nce_lock); 631 nce->nce_pcnt++; 632 mutex_exit(&nce->nce_lock); 633 } 634 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 635 started = B_TRUE; 636 } else { 637 mutex_exit(&nce->nce_lock); 638 started = B_FALSE; 639 } 640 return (started); 641 } 642 643 /* 644 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 645 * If one is found, the refcnt on the nce will be incremented. 646 */ 647 nce_t * 648 ndp_lookup_v6(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr, 649 boolean_t caller_holds_lock) 650 { 651 nce_t *nce; 652 ip_stack_t *ipst = ill->ill_ipst; 653 654 ASSERT(ill->ill_isv6); 655 if (!caller_holds_lock) 656 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 657 658 /* Get head of v6 hash table */ 659 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 660 nce = nce_lookup_addr(ill, match_illgrp, addr, nce); 661 if (nce == NULL) 662 nce = nce_lookup_mapping(ill, addr); 663 if (!caller_holds_lock) 664 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 665 return (nce); 666 } 667 /* 668 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 669 * If one is found, the refcnt on the nce will be incremented. 670 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 671 * so we skip the nce_lookup_mapping call. 672 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 673 */ 674 nce_t * 675 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 676 { 677 nce_t *nce; 678 in6_addr_t addr6; 679 ip_stack_t *ipst = ill->ill_ipst; 680 681 if (!caller_holds_lock) 682 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 683 684 /* Get head of v4 hash table */ 685 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 686 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 687 /* 688 * NOTE: IPv4 never matches across the illgrp since the NCE's we're 689 * looking up have fastpath headers that are inherently per-ill. 690 */ 691 nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce); 692 if (!caller_holds_lock) 693 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 694 return (nce); 695 } 696 697 /* 698 * Cache entry lookup. Try to find an nce matching the parameters passed. 699 * Look only for exact entries (no mappings). If an nce is found, increment 700 * the hold count on that nce. The caller passes in the start of the 701 * appropriate hash table, and must be holding the appropriate global 702 * lock (ndp_g_lock). 703 */ 704 static nce_t * 705 nce_lookup_addr(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr, 706 nce_t *nce) 707 { 708 ndp_g_t *ndp; 709 ip_stack_t *ipst = ill->ill_ipst; 710 711 if (ill->ill_isv6) 712 ndp = ipst->ips_ndp6; 713 else 714 ndp = ipst->ips_ndp4; 715 716 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 717 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 718 return (NULL); 719 for (; nce != NULL; nce = nce->nce_next) { 720 if (nce->nce_ill == ill || 721 match_illgrp && IS_IN_SAME_ILLGRP(ill, nce->nce_ill)) { 722 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 723 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 724 &ipv6_all_ones)) { 725 mutex_enter(&nce->nce_lock); 726 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 727 NCE_REFHOLD_LOCKED(nce); 728 mutex_exit(&nce->nce_lock); 729 break; 730 } 731 mutex_exit(&nce->nce_lock); 732 } 733 } 734 } 735 return (nce); 736 } 737 738 /* 739 * Cache entry lookup. Try to find an nce matching the parameters passed. 740 * Look only for mappings. 741 */ 742 static nce_t * 743 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 744 { 745 nce_t *nce; 746 ip_stack_t *ipst = ill->ill_ipst; 747 748 ASSERT(ill != NULL && ill->ill_isv6); 749 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 750 if (!IN6_IS_ADDR_MULTICAST(addr)) 751 return (NULL); 752 nce = ipst->ips_ndp6->nce_mask_entries; 753 for (; nce != NULL; nce = nce->nce_next) 754 if (nce->nce_ill == ill && 755 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 756 mutex_enter(&nce->nce_lock); 757 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 758 NCE_REFHOLD_LOCKED(nce); 759 mutex_exit(&nce->nce_lock); 760 break; 761 } 762 mutex_exit(&nce->nce_lock); 763 } 764 return (nce); 765 } 766 767 /* 768 * Process passed in parameters either from an incoming packet or via 769 * user ioctl. 770 */ 771 static void 772 nce_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 773 { 774 ill_t *ill = nce->nce_ill; 775 uint32_t hw_addr_len = ill->ill_nd_lla_len; 776 mblk_t *mp; 777 boolean_t ll_updated = B_FALSE; 778 boolean_t ll_changed; 779 ip_stack_t *ipst = ill->ill_ipst; 780 781 ASSERT(nce->nce_ipversion == IPV6_VERSION); 782 /* 783 * No updates of link layer address or the neighbor state is 784 * allowed, when the cache is in NONUD state. This still 785 * allows for responding to reachability solicitation. 786 */ 787 mutex_enter(&nce->nce_lock); 788 if (nce->nce_state == ND_INCOMPLETE) { 789 if (hw_addr == NULL) { 790 mutex_exit(&nce->nce_lock); 791 return; 792 } 793 nce_set_ll(nce, hw_addr); 794 /* 795 * Update nce state and send the queued packets 796 * back to ip this time ire will be added. 797 */ 798 if (flag & ND_NA_FLAG_SOLICITED) { 799 nce_update(nce, ND_REACHABLE, NULL); 800 } else { 801 nce_update(nce, ND_STALE, NULL); 802 } 803 mutex_exit(&nce->nce_lock); 804 nce_fastpath(nce); 805 nce_cb_dispatch(nce); /* complete callbacks */ 806 mutex_enter(&nce->nce_lock); 807 mp = nce->nce_qd_mp; 808 nce->nce_qd_mp = NULL; 809 mutex_exit(&nce->nce_lock); 810 while (mp != NULL) { 811 mblk_t *nxt_mp, *data_mp; 812 813 nxt_mp = mp->b_next; 814 mp->b_next = NULL; 815 816 if (mp->b_datap->db_type == M_CTL) 817 data_mp = mp->b_cont; 818 else 819 data_mp = mp; 820 if (data_mp->b_prev != NULL) { 821 ill_t *inbound_ill; 822 queue_t *fwdq = NULL; 823 uint_t ifindex; 824 825 ifindex = (uint_t)(uintptr_t)data_mp->b_prev; 826 inbound_ill = ill_lookup_on_ifindex(ifindex, 827 B_TRUE, NULL, NULL, NULL, NULL, ipst); 828 if (inbound_ill == NULL) { 829 data_mp->b_prev = NULL; 830 freemsg(mp); 831 return; 832 } else { 833 fwdq = inbound_ill->ill_rq; 834 } 835 data_mp->b_prev = NULL; 836 /* 837 * Send a forwarded packet back into ip_rput_v6 838 * just as in ire_send_v6(). 839 * Extract the queue from b_prev (set in 840 * ip_rput_data_v6). 841 */ 842 if (fwdq != NULL) { 843 /* 844 * Forwarded packets hop count will 845 * get decremented in ip_rput_data_v6 846 */ 847 if (data_mp != mp) 848 freeb(mp); 849 put(fwdq, data_mp); 850 } else { 851 /* 852 * Send locally originated packets back 853 * into ip_wput_v6. 854 */ 855 put(ill->ill_wq, mp); 856 } 857 ill_refrele(inbound_ill); 858 } else { 859 put(ill->ill_wq, mp); 860 } 861 mp = nxt_mp; 862 } 863 return; 864 } 865 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 866 if (!is_adv) { 867 /* If this is a SOLICITATION request only */ 868 if (ll_changed) 869 nce_update(nce, ND_STALE, hw_addr); 870 mutex_exit(&nce->nce_lock); 871 nce_cb_dispatch(nce); 872 return; 873 } 874 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 875 /* If in any other state than REACHABLE, ignore */ 876 if (nce->nce_state == ND_REACHABLE) { 877 nce_update(nce, ND_STALE, NULL); 878 } 879 mutex_exit(&nce->nce_lock); 880 nce_cb_dispatch(nce); 881 return; 882 } else { 883 if (ll_changed) { 884 nce_update(nce, ND_UNCHANGED, hw_addr); 885 ll_updated = B_TRUE; 886 } 887 if (flag & ND_NA_FLAG_SOLICITED) { 888 nce_update(nce, ND_REACHABLE, NULL); 889 } else { 890 if (ll_updated) { 891 nce_update(nce, ND_STALE, NULL); 892 } 893 } 894 mutex_exit(&nce->nce_lock); 895 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 896 NCE_F_ISROUTER)) { 897 ire_t *ire; 898 899 /* 900 * Router turned to host. We need to remove the 901 * entry as well as any default route that may be 902 * using this as a next hop. This is required by 903 * section 7.2.5 of RFC 2461. 904 */ 905 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 906 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 907 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 908 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 909 MATCH_IRE_DEFAULT, ipst); 910 if (ire != NULL) { 911 ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); 912 ire_delete(ire); 913 ire_refrele(ire); 914 } 915 ndp_delete(nce); /* will do nce_cb_dispatch */ 916 } else { 917 nce_cb_dispatch(nce); 918 } 919 } 920 } 921 922 /* 923 * Walker state structure used by ndp_process() / ndp_process_entry(). 924 */ 925 typedef struct ndp_process_data { 926 ill_t *np_ill; /* ill/illgrp to match against */ 927 const in6_addr_t *np_addr; /* IPv6 address to match */ 928 uchar_t *np_hw_addr; /* passed to nce_process() */ 929 uint32_t np_flag; /* passed to nce_process() */ 930 boolean_t np_is_adv; /* passed to nce_process() */ 931 } ndp_process_data_t; 932 933 /* 934 * Walker callback used by ndp_process() for IPMP groups: calls nce_process() 935 * for each NCE with a matching address that's in the same IPMP group. 936 */ 937 static void 938 ndp_process_entry(nce_t *nce, void *arg) 939 { 940 ndp_process_data_t *npp = arg; 941 942 if (IS_IN_SAME_ILLGRP(nce->nce_ill, npp->np_ill) && 943 IN6_ARE_ADDR_EQUAL(&nce->nce_addr, npp->np_addr) && 944 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 945 nce_process(nce, npp->np_hw_addr, npp->np_flag, npp->np_is_adv); 946 } 947 } 948 949 /* 950 * Wrapper around nce_process() that handles IPMP. In particular, for IPMP, 951 * NCEs are per-underlying-ill (because of nce_fp_mp) and thus we may have 952 * more than one NCE for a given IPv6 address to tend to. In that case, we 953 * need to walk all NCEs and callback nce_process() for each one. Since this 954 * is expensive, in the non-IPMP case we just directly call nce_process(). 955 * Ultimately, nce_fp_mp needs to be moved out of the nce_t so that all IP 956 * interfaces in an IPMP group share the same NCEs -- at which point this 957 * function can be removed entirely. 958 */ 959 void 960 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 961 { 962 ill_t *ill = nce->nce_ill; 963 struct ndp_g_s *ndp = ill->ill_ipst->ips_ndp6; 964 ndp_process_data_t np; 965 966 if (ill->ill_grp == NULL) { 967 nce_process(nce, hw_addr, flag, is_adv); 968 return; 969 } 970 971 /* IPMP case: walk all NCEs */ 972 np.np_ill = ill; 973 np.np_addr = &nce->nce_addr; 974 np.np_flag = flag; 975 np.np_is_adv = is_adv; 976 np.np_hw_addr = hw_addr; 977 978 ndp_walk_common(ndp, NULL, (pfi_t)ndp_process_entry, &np, ALL_ZONES); 979 } 980 981 /* 982 * Pass arg1 to the pfi supplied, along with each nce in existence. 983 * ndp_walk() places a REFHOLD on the nce and drops the lock when 984 * walking the hash list. 985 */ 986 void 987 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 988 boolean_t trace) 989 { 990 nce_t *nce; 991 nce_t *nce1; 992 nce_t **ncep; 993 nce_t *free_nce_list = NULL; 994 995 mutex_enter(&ndp->ndp_g_lock); 996 /* Prevent ndp_delete from unlink and free of NCE */ 997 ndp->ndp_g_walker++; 998 mutex_exit(&ndp->ndp_g_lock); 999 for (ncep = ndp->nce_hash_tbl; 1000 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 1001 for (nce = *ncep; nce != NULL; nce = nce1) { 1002 nce1 = nce->nce_next; 1003 if (ill == NULL || nce->nce_ill == ill) { 1004 if (trace) { 1005 NCE_REFHOLD(nce); 1006 (*pfi)(nce, arg1); 1007 NCE_REFRELE(nce); 1008 } else { 1009 NCE_REFHOLD_NOTR(nce); 1010 (*pfi)(nce, arg1); 1011 NCE_REFRELE_NOTR(nce); 1012 } 1013 } 1014 } 1015 } 1016 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 1017 nce1 = nce->nce_next; 1018 if (ill == NULL || nce->nce_ill == ill) { 1019 if (trace) { 1020 NCE_REFHOLD(nce); 1021 (*pfi)(nce, arg1); 1022 NCE_REFRELE(nce); 1023 } else { 1024 NCE_REFHOLD_NOTR(nce); 1025 (*pfi)(nce, arg1); 1026 NCE_REFRELE_NOTR(nce); 1027 } 1028 } 1029 } 1030 mutex_enter(&ndp->ndp_g_lock); 1031 ndp->ndp_g_walker--; 1032 /* 1033 * While NCE's are removed from global list they are placed 1034 * in a private list, to be passed to nce_ire_delete_list(). 1035 * The reason is, there may be ires pointing to this nce 1036 * which needs to cleaned up. 1037 */ 1038 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 1039 /* Time to delete condemned entries */ 1040 for (ncep = ndp->nce_hash_tbl; 1041 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 1042 nce = *ncep; 1043 if (nce != NULL) { 1044 nce_remove(ndp, nce, &free_nce_list); 1045 } 1046 } 1047 nce = ndp->nce_mask_entries; 1048 if (nce != NULL) { 1049 nce_remove(ndp, nce, &free_nce_list); 1050 } 1051 ndp->ndp_g_walker_cleanup = B_FALSE; 1052 } 1053 1054 mutex_exit(&ndp->ndp_g_lock); 1055 1056 if (free_nce_list != NULL) { 1057 nce_ire_delete_list(free_nce_list); 1058 } 1059 } 1060 1061 /* 1062 * Walk everything. 1063 * Note that ill can be NULL hence can't derive the ipst from it. 1064 */ 1065 void 1066 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) 1067 { 1068 ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); 1069 ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); 1070 } 1071 1072 /* 1073 * Process resolve requests. Handles both mapped entries 1074 * as well as cases that needs to be send out on the wire. 1075 * Lookup a NCE for a given IRE. Regardless of whether one exists 1076 * or one is created, we defer making ire point to nce until the 1077 * ire is actually added at which point the nce_refcnt on the nce is 1078 * incremented. This is done primarily to have symmetry between ire_add() 1079 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1080 */ 1081 int 1082 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1083 { 1084 nce_t *nce, *hw_nce = NULL; 1085 int err; 1086 ill_t *ipmp_ill; 1087 uint16_t nce_flags; 1088 mblk_t *mp_nce = NULL; 1089 ip_stack_t *ipst = ill->ill_ipst; 1090 uchar_t *hwaddr = NULL; 1091 1092 ASSERT(ill->ill_isv6); 1093 1094 if (IN6_IS_ADDR_MULTICAST(dst)) 1095 return (nce_set_multicast(ill, dst)); 1096 1097 nce_flags = (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0; 1098 1099 /* 1100 * If `ill' is under IPMP, then first check to see if there's an NCE 1101 * for `dst' on the IPMP meta-interface (e.g., because an application 1102 * explicitly did an SIOCLIFSETND to tie a hardware address to `dst'). 1103 * If so, we use that hardware address when creating the NCE below. 1104 * Note that we don't yet have a mechanism to remove these NCEs if the 1105 * NCE for `dst' on the IPMP meta-interface is subsequently removed -- 1106 * but rather than build such a beast, we should fix NCEs so that they 1107 * can be properly shared across an IPMP group. 1108 */ 1109 if (IS_UNDER_IPMP(ill)) { 1110 if ((ipmp_ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) { 1111 hw_nce = ndp_lookup_v6(ipmp_ill, B_FALSE, dst, B_FALSE); 1112 if (hw_nce != NULL && hw_nce->nce_res_mp != NULL) { 1113 hwaddr = hw_nce->nce_res_mp->b_rptr + 1114 NCE_LL_ADDR_OFFSET(ipmp_ill); 1115 nce_flags |= hw_nce->nce_flags; 1116 } 1117 ill_refrele(ipmp_ill); 1118 } 1119 } 1120 1121 err = ndp_lookup_then_add_v6(ill, 1122 B_FALSE, /* NCE fastpath is per ill; don't match across group */ 1123 hwaddr, 1124 dst, 1125 &ipv6_all_ones, 1126 &ipv6_all_zeros, 1127 0, 1128 nce_flags, 1129 hwaddr != NULL ? ND_REACHABLE : ND_INCOMPLETE, 1130 &nce); 1131 1132 if (hw_nce != NULL) 1133 NCE_REFRELE(hw_nce); 1134 1135 switch (err) { 1136 case 0: 1137 /* 1138 * New cache entry was created. Make sure that the state 1139 * is not ND_INCOMPLETE. It can be in some other state 1140 * even before we send out the solicitation as we could 1141 * get un-solicited advertisements. 1142 * 1143 * If this is an XRESOLV interface, simply return 0, 1144 * since we don't want to solicit just yet. 1145 */ 1146 if (ill->ill_flags & ILLF_XRESOLV) { 1147 NCE_REFRELE(nce); 1148 return (0); 1149 } 1150 1151 mutex_enter(&nce->nce_lock); 1152 if (nce->nce_state != ND_INCOMPLETE) { 1153 mutex_exit(&nce->nce_lock); 1154 NCE_REFRELE(nce); 1155 return (0); 1156 } 1157 if (nce->nce_rcnt == 0) { 1158 /* The caller will free mp */ 1159 mutex_exit(&nce->nce_lock); 1160 ndp_delete(nce); 1161 NCE_REFRELE(nce); 1162 return (ESRCH); 1163 } 1164 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1165 if (mp_nce == NULL) { 1166 /* The caller will free mp */ 1167 mutex_exit(&nce->nce_lock); 1168 ndp_delete(nce); 1169 NCE_REFRELE(nce); 1170 return (ENOMEM); 1171 } 1172 nce_queue_mp(nce, mp_nce); 1173 ip_ndp_resolve(nce); 1174 mutex_exit(&nce->nce_lock); 1175 NCE_REFRELE(nce); 1176 return (EINPROGRESS); 1177 case EEXIST: 1178 /* Resolution in progress just queue the packet */ 1179 mutex_enter(&nce->nce_lock); 1180 if (nce->nce_state == ND_INCOMPLETE) { 1181 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1182 if (mp_nce == NULL) { 1183 err = ENOMEM; 1184 } else { 1185 nce_queue_mp(nce, mp_nce); 1186 err = EINPROGRESS; 1187 } 1188 } else { 1189 /* 1190 * Any other state implies we have 1191 * a nce but IRE needs to be added ... 1192 * ire_add_v6() will take care of the 1193 * the case when the nce becomes CONDEMNED 1194 * before the ire is added to the table. 1195 */ 1196 err = 0; 1197 } 1198 mutex_exit(&nce->nce_lock); 1199 NCE_REFRELE(nce); 1200 break; 1201 default: 1202 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1203 break; 1204 } 1205 return (err); 1206 } 1207 1208 /* 1209 * When there is no resolver, the link layer template is passed in 1210 * the IRE. 1211 * Lookup a NCE for a given IRE. Regardless of whether one exists 1212 * or one is created, we defer making ire point to nce until the 1213 * ire is actually added at which point the nce_refcnt on the nce is 1214 * incremented. This is done primarily to have symmetry between ire_add() 1215 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1216 */ 1217 int 1218 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1219 { 1220 nce_t *nce; 1221 int err = 0; 1222 1223 ASSERT(ill != NULL); 1224 ASSERT(ill->ill_isv6); 1225 if (IN6_IS_ADDR_MULTICAST(dst)) { 1226 err = nce_set_multicast(ill, dst); 1227 return (err); 1228 } 1229 1230 err = ndp_lookup_then_add_v6(ill, 1231 B_FALSE, /* NCE fastpath is per ill; don't match across group */ 1232 NULL, /* hardware address */ 1233 dst, 1234 &ipv6_all_ones, 1235 &ipv6_all_zeros, 1236 0, 1237 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1238 ND_REACHABLE, 1239 &nce); 1240 1241 switch (err) { 1242 case 0: 1243 /* 1244 * Cache entry with a proper resolver cookie was 1245 * created. 1246 */ 1247 NCE_REFRELE(nce); 1248 break; 1249 case EEXIST: 1250 err = 0; 1251 NCE_REFRELE(nce); 1252 break; 1253 default: 1254 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1255 break; 1256 } 1257 return (err); 1258 } 1259 1260 /* 1261 * For each interface an entry is added for the unspecified multicast group. 1262 * Here that mapping is used to form the multicast cache entry for a particular 1263 * multicast destination. 1264 */ 1265 static int 1266 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1267 { 1268 nce_t *mnce; /* Multicast mapping entry */ 1269 nce_t *nce; 1270 uchar_t *hw_addr = NULL; 1271 int err = 0; 1272 ip_stack_t *ipst = ill->ill_ipst; 1273 1274 ASSERT(ill != NULL); 1275 ASSERT(ill->ill_isv6); 1276 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1277 1278 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1279 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst)); 1280 nce = nce_lookup_addr(ill, B_FALSE, dst, nce); 1281 if (nce != NULL) { 1282 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1283 NCE_REFRELE(nce); 1284 return (0); 1285 } 1286 /* No entry, now lookup for a mapping this should never fail */ 1287 mnce = nce_lookup_mapping(ill, dst); 1288 if (mnce == NULL) { 1289 /* Something broken for the interface. */ 1290 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1291 return (ESRCH); 1292 } 1293 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1294 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1295 /* 1296 * For IRE_IF_RESOLVER a hardware mapping can be 1297 * generated, for IRE_IF_NORESOLVER, resolution cookie 1298 * in the ill is copied in ndp_add_v6(). 1299 */ 1300 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1301 if (hw_addr == NULL) { 1302 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1303 NCE_REFRELE(mnce); 1304 return (ENOMEM); 1305 } 1306 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1307 } 1308 NCE_REFRELE(mnce); 1309 /* 1310 * IRE_IF_NORESOLVER type simply copies the resolution 1311 * cookie passed in. So no hw_addr is needed. 1312 */ 1313 err = ndp_add_v6(ill, 1314 hw_addr, 1315 dst, 1316 &ipv6_all_ones, 1317 &ipv6_all_zeros, 1318 0, 1319 NCE_F_NONUD, 1320 ND_REACHABLE, 1321 &nce); 1322 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1323 if (hw_addr != NULL) 1324 kmem_free(hw_addr, ill->ill_nd_lla_len); 1325 if (err != 0) { 1326 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1327 return (err); 1328 } 1329 NCE_REFRELE(nce); 1330 return (0); 1331 } 1332 1333 /* 1334 * Return the link layer address, and any flags of a nce. 1335 */ 1336 int 1337 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1338 { 1339 nce_t *nce; 1340 in6_addr_t *addr; 1341 sin6_t *sin6; 1342 dl_unitdata_req_t *dl; 1343 1344 ASSERT(ill != NULL && ill->ill_isv6); 1345 sin6 = (sin6_t *)&lnr->lnr_addr; 1346 addr = &sin6->sin6_addr; 1347 1348 /* 1349 * NOTE: if the ill is an IPMP interface, then match against the whole 1350 * illgrp. This e.g. allows in.ndpd to retrieve the link layer 1351 * addresses for the data addresses on an IPMP interface even though 1352 * ipif_ndp_up() created them with an nce_ill of ipif_bound_ill. 1353 */ 1354 nce = ndp_lookup_v6(ill, IS_IPMP(ill), addr, B_FALSE); 1355 if (nce == NULL) 1356 return (ESRCH); 1357 /* If in INCOMPLETE state, no link layer address is available yet */ 1358 if (!NCE_ISREACHABLE(nce)) { 1359 NCE_REFRELE(nce); 1360 return (ESRCH); 1361 } 1362 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1363 if (ill->ill_flags & ILLF_XRESOLV) 1364 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1365 else 1366 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1367 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1368 sizeof (lnr->lnr_hdw_addr)); 1369 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1370 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1371 if (nce->nce_flags & NCE_F_ISROUTER) 1372 lnr->lnr_flags = NDF_ISROUTER_ON; 1373 if (nce->nce_flags & NCE_F_ANYCAST) 1374 lnr->lnr_flags |= NDF_ANYCAST_ON; 1375 NCE_REFRELE(nce); 1376 return (0); 1377 } 1378 1379 /* 1380 * Send Enable/Disable multicast reqs to driver. 1381 */ 1382 int 1383 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1384 uint32_t hw_addr_offset, mblk_t *mp) 1385 { 1386 nce_t *nce; 1387 uchar_t *hw_addr; 1388 ip_stack_t *ipst = ill->ill_ipst; 1389 1390 ASSERT(ill != NULL && ill->ill_isv6); 1391 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1392 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1393 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1394 freemsg(mp); 1395 return (EINVAL); 1396 } 1397 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1398 nce = nce_lookup_mapping(ill, addr); 1399 if (nce == NULL) { 1400 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1401 freemsg(mp); 1402 return (ESRCH); 1403 } 1404 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1405 /* 1406 * Update dl_addr_length and dl_addr_offset for primitives that 1407 * have physical addresses as opposed to full saps 1408 */ 1409 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1410 case DL_ENABMULTI_REQ: 1411 /* Track the state if this is the first enabmulti */ 1412 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1413 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1414 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1415 break; 1416 case DL_DISABMULTI_REQ: 1417 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1418 break; 1419 default: 1420 NCE_REFRELE(nce); 1421 ip1dbg(("ndp_mcastreq: default\n")); 1422 return (EINVAL); 1423 } 1424 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1425 NCE_REFRELE(nce); 1426 ill_dlpi_send(ill, mp); 1427 return (0); 1428 } 1429 1430 1431 /* 1432 * Send out a NS for resolving the ip address in nce. 1433 */ 1434 void 1435 ip_ndp_resolve(nce_t *nce) 1436 { 1437 in6_addr_t sender6 = ipv6_all_zeros; 1438 uint32_t ms; 1439 mblk_t *mp; 1440 ip6_t *ip6h; 1441 1442 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1443 /* 1444 * Pick the src from outgoing packet, if one is available. 1445 * Otherwise let nce_xmit figure out the src. 1446 */ 1447 if ((mp = nce->nce_qd_mp) != NULL) { 1448 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1449 if (mp->b_datap->db_type == M_CTL) 1450 mp = mp->b_cont; 1451 ip6h = (ip6_t *)mp->b_rptr; 1452 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1453 /* 1454 * This message should have been pulled up already in 1455 * ip_wput_v6. We can't do pullups here because 1456 * the message could be from the nce_qd_mp which could 1457 * have b_next/b_prev non-NULL. 1458 */ 1459 ASSERT(MBLKL(mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN); 1460 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1461 } 1462 sender6 = ip6h->ip6_src; 1463 } 1464 ms = nce_solicit(nce, sender6); 1465 mutex_exit(&nce->nce_lock); 1466 if (ms == 0) { 1467 if (nce->nce_state != ND_REACHABLE) { 1468 nce_resolv_failed(nce); 1469 ndp_delete(nce); 1470 } 1471 } else { 1472 NDP_RESTART_TIMER(nce, (clock_t)ms); 1473 } 1474 mutex_enter(&nce->nce_lock); 1475 } 1476 1477 /* 1478 * Send a neighbor solicitation. 1479 * Returns number of milliseconds after which we should either rexmit or abort. 1480 * Return of zero means we should abort. 1481 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1482 * 1483 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1484 * the packet. 1485 */ 1486 uint32_t 1487 nce_solicit(nce_t *nce, in6_addr_t sender) 1488 { 1489 boolean_t dropped; 1490 1491 ASSERT(nce->nce_ipversion == IPV6_VERSION); 1492 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1493 1494 if (nce->nce_rcnt == 0) 1495 return (0); 1496 1497 nce->nce_rcnt--; 1498 mutex_exit(&nce->nce_lock); 1499 dropped = nce_xmit_solicit(nce, B_TRUE, &sender, 0); 1500 mutex_enter(&nce->nce_lock); 1501 if (dropped) 1502 nce->nce_rcnt++; 1503 return (nce->nce_ill->ill_reachable_retrans_time); 1504 } 1505 1506 /* 1507 * Attempt to recover an address on an interface that's been marked as a 1508 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1509 * no easy way to just probe the address and have the right thing happen if 1510 * it's no longer in use. Instead, we just bring it up normally and allow the 1511 * regular interface start-up logic to probe for a remaining duplicate and take 1512 * us back down if necessary. 1513 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1514 * ip_ndp_excl. 1515 */ 1516 /* ARGSUSED */ 1517 static void 1518 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1519 { 1520 ill_t *ill = rq->q_ptr; 1521 ipif_t *ipif; 1522 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1523 1524 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1525 /* 1526 * We do not support recovery of proxy ARP'd interfaces, 1527 * because the system lacks a complete proxy ARP mechanism. 1528 */ 1529 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1530 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1531 continue; 1532 } 1533 1534 /* 1535 * If we have already recovered or if the interface is going 1536 * away, then ignore. 1537 */ 1538 mutex_enter(&ill->ill_lock); 1539 if (!(ipif->ipif_flags & IPIF_DUPLICATE) || 1540 (ipif->ipif_state_flags & IPIF_CONDEMNED)) { 1541 mutex_exit(&ill->ill_lock); 1542 continue; 1543 } 1544 1545 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1546 ill->ill_ipif_dup_count--; 1547 mutex_exit(&ill->ill_lock); 1548 ipif->ipif_was_dup = B_TRUE; 1549 1550 VERIFY(ipif_ndp_up(ipif, B_TRUE) != EINPROGRESS); 1551 (void) ipif_up_done_v6(ipif); 1552 } 1553 freeb(mp); 1554 } 1555 1556 /* 1557 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1558 * As long as someone else holds the address, the interface will stay down. 1559 * When that conflict goes away, the interface is brought back up. This is 1560 * done so that accidental shutdowns of addresses aren't made permanent. Your 1561 * server will recover from a failure. 1562 * 1563 * For DHCP and temporary addresses, recovery is not done in the kernel. 1564 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1565 * 1566 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1567 */ 1568 static void 1569 ipif6_dup_recovery(void *arg) 1570 { 1571 ipif_t *ipif = arg; 1572 1573 ipif->ipif_recovery_id = 0; 1574 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1575 return; 1576 1577 /* 1578 * No lock, because this is just an optimization. 1579 */ 1580 if (ipif->ipif_state_flags & IPIF_CONDEMNED) 1581 return; 1582 1583 /* If the link is down, we'll retry this later */ 1584 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1585 return; 1586 1587 ndp_do_recovery(ipif); 1588 } 1589 1590 /* 1591 * Perform interface recovery by forcing the duplicate interfaces up and 1592 * allowing the system to determine which ones should stay up. 1593 * 1594 * Called both by recovery timer expiry and link-up notification. 1595 */ 1596 void 1597 ndp_do_recovery(ipif_t *ipif) 1598 { 1599 ill_t *ill = ipif->ipif_ill; 1600 mblk_t *mp; 1601 ip_stack_t *ipst = ill->ill_ipst; 1602 1603 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1604 if (mp == NULL) { 1605 mutex_enter(&ill->ill_lock); 1606 if (ipif->ipif_recovery_id == 0 && 1607 !(ipif->ipif_state_flags & IPIF_CONDEMNED)) { 1608 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1609 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1610 } 1611 mutex_exit(&ill->ill_lock); 1612 } else { 1613 /* 1614 * A recovery timer may still be running if we got here from 1615 * ill_restart_dad(); cancel that timer. 1616 */ 1617 if (ipif->ipif_recovery_id != 0) 1618 (void) untimeout(ipif->ipif_recovery_id); 1619 ipif->ipif_recovery_id = 0; 1620 1621 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1622 sizeof (ipif->ipif_v6lcl_addr)); 1623 ill_refhold(ill); 1624 qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP, 1625 B_FALSE); 1626 } 1627 } 1628 1629 /* 1630 * Find the MAC and IP addresses in an NA/NS message. 1631 */ 1632 static void 1633 ip_ndp_find_addresses(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, in6_addr_t *targp, 1634 uchar_t **haddr, uint_t *haddrlenp) 1635 { 1636 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1637 icmp6_t *icmp6 = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1638 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 1639 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 1640 uchar_t *addr; 1641 int alen = 0; 1642 1643 if (dl_mp == NULL) { 1644 nd_opt_hdr_t *opt = NULL; 1645 int len; 1646 1647 /* 1648 * If it's from the fast-path, then it can't be a probe 1649 * message, and thus must include a linkaddr option. 1650 * Extract that here. 1651 */ 1652 switch (icmp6->icmp6_type) { 1653 case ND_NEIGHBOR_SOLICIT: 1654 len = mp->b_wptr - (uchar_t *)ns; 1655 if ((len -= sizeof (*ns)) > 0) { 1656 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), 1657 len, ND_OPT_SOURCE_LINKADDR); 1658 } 1659 break; 1660 case ND_NEIGHBOR_ADVERT: 1661 len = mp->b_wptr - (uchar_t *)na; 1662 if ((len -= sizeof (*na)) > 0) { 1663 opt = ndp_get_option((nd_opt_hdr_t *)(na + 1), 1664 len, ND_OPT_TARGET_LINKADDR); 1665 } 1666 break; 1667 } 1668 1669 if (opt != NULL && opt->nd_opt_len * 8 - sizeof (*opt) >= 1670 ill->ill_nd_lla_len) { 1671 addr = (uchar_t *)(opt + 1); 1672 alen = ill->ill_nd_lla_len; 1673 } 1674 1675 /* 1676 * We cheat a bit here for the sake of printing usable log 1677 * messages in the rare case where the reply we got was unicast 1678 * without a source linkaddr option, and the interface is in 1679 * fastpath mode. (Sigh.) 1680 */ 1681 if (alen == 0 && ill->ill_type == IFT_ETHER && 1682 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1683 struct ether_header *pether; 1684 1685 pether = (struct ether_header *)((char *)ip6h - 1686 sizeof (*pether)); 1687 addr = pether->ether_shost.ether_addr_octet; 1688 alen = ETHERADDRL; 1689 } 1690 } else { 1691 dl_unitdata_ind_t *dlu; 1692 1693 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1694 alen = dlu->dl_src_addr_length; 1695 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1696 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1697 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1698 if (ill->ill_sap_length < 0) { 1699 alen += ill->ill_sap_length; 1700 } else { 1701 addr += ill->ill_sap_length; 1702 alen -= ill->ill_sap_length; 1703 } 1704 } 1705 } 1706 1707 if (alen > 0) { 1708 *haddr = addr; 1709 *haddrlenp = alen; 1710 } else { 1711 *haddr = NULL; 1712 *haddrlenp = 0; 1713 } 1714 1715 /* nd_ns_target and nd_na_target are at the same offset, so we cheat */ 1716 *targp = ns->nd_ns_target; 1717 } 1718 1719 /* 1720 * This is for exclusive changes due to NDP duplicate address detection 1721 * failure. 1722 */ 1723 /* ARGSUSED */ 1724 static void 1725 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1726 { 1727 ill_t *ill = rq->q_ptr; 1728 ipif_t *ipif; 1729 mblk_t *dl_mp = NULL; 1730 uchar_t *haddr; 1731 uint_t haddrlen; 1732 ip_stack_t *ipst = ill->ill_ipst; 1733 in6_addr_t targ; 1734 1735 if (DB_TYPE(mp) != M_DATA) { 1736 dl_mp = mp; 1737 mp = mp->b_cont; 1738 } 1739 1740 ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen); 1741 if (haddr != NULL && haddrlen == ill->ill_phys_addr_length) { 1742 /* 1743 * Ignore conflicts generated by misbehaving switches that 1744 * just reflect our own messages back to us. For IPMP, we may 1745 * see reflections across any ill in the illgrp. 1746 */ 1747 if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 || 1748 IS_UNDER_IPMP(ill) && 1749 ipmp_illgrp_find_ill(ill->ill_grp, haddr, haddrlen) != NULL) 1750 goto ignore_conflict; 1751 } 1752 1753 /* 1754 * Look up the appropriate ipif. 1755 */ 1756 ipif = ipif_lookup_addr_v6(&targ, ill, ALL_ZONES, NULL, NULL, NULL, 1757 NULL, ipst); 1758 if (ipif == NULL) 1759 goto ignore_conflict; 1760 1761 /* Reload the ill to match the ipif */ 1762 ill = ipif->ipif_ill; 1763 1764 /* If it's already duplicate or ineligible, then don't do anything. */ 1765 if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) { 1766 ipif_refrele(ipif); 1767 goto ignore_conflict; 1768 } 1769 1770 /* 1771 * If this is a failure during duplicate recovery, then don't 1772 * complain. It may take a long time to recover. 1773 */ 1774 if (!ipif->ipif_was_dup) { 1775 char ibuf[LIFNAMSIZ]; 1776 char hbuf[MAC_STR_LEN]; 1777 char sbuf[INET6_ADDRSTRLEN]; 1778 1779 ipif_get_name(ipif, ibuf, sizeof (ibuf)); 1780 cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);" 1781 " disabled", ibuf, 1782 inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)), 1783 mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf))); 1784 } 1785 mutex_enter(&ill->ill_lock); 1786 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1787 ipif->ipif_flags |= IPIF_DUPLICATE; 1788 ill->ill_ipif_dup_count++; 1789 mutex_exit(&ill->ill_lock); 1790 (void) ipif_down(ipif, NULL, NULL); 1791 ipif_down_tail(ipif); 1792 mutex_enter(&ill->ill_lock); 1793 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1794 ill->ill_net_type == IRE_IF_RESOLVER && 1795 !(ipif->ipif_state_flags & IPIF_CONDEMNED) && 1796 ipst->ips_ip_dup_recovery > 0) { 1797 ASSERT(ipif->ipif_recovery_id == 0); 1798 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1799 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1800 } 1801 mutex_exit(&ill->ill_lock); 1802 ipif_refrele(ipif); 1803 ignore_conflict: 1804 if (dl_mp != NULL) 1805 freeb(dl_mp); 1806 freemsg(mp); 1807 } 1808 1809 /* 1810 * Handle failure by tearing down the ipifs with the specified address. Note 1811 * that tearing down the ipif also means deleting the nce through ipif_down, so 1812 * it's not possible to do recovery by just restarting the nce timer. Instead, 1813 * we start a timer on the ipif. 1814 */ 1815 static void 1816 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1817 { 1818 if ((mp = copymsg(mp)) != NULL) { 1819 if (dl_mp == NULL) 1820 dl_mp = mp; 1821 else if ((dl_mp = copyb(dl_mp)) != NULL) 1822 dl_mp->b_cont = mp; 1823 if (dl_mp == NULL) { 1824 freemsg(mp); 1825 } else { 1826 ill_refhold(ill); 1827 qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP, 1828 B_FALSE); 1829 } 1830 } 1831 } 1832 1833 /* 1834 * Handle a discovered conflict: some other system is advertising that it owns 1835 * one of our IP addresses. We need to defend ourselves, or just shut down the 1836 * interface. 1837 */ 1838 static void 1839 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1840 { 1841 ipif_t *ipif; 1842 uint32_t now; 1843 uint_t maxdefense; 1844 uint_t defs; 1845 ip_stack_t *ipst = ill->ill_ipst; 1846 1847 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1848 NULL, NULL, ipst); 1849 if (ipif == NULL) 1850 return; 1851 1852 /* 1853 * First, figure out if this address is disposable. 1854 */ 1855 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1856 maxdefense = ipst->ips_ip_max_temp_defend; 1857 else 1858 maxdefense = ipst->ips_ip_max_defend; 1859 1860 /* 1861 * Now figure out how many times we've defended ourselves. Ignore 1862 * defenses that happened long in the past. 1863 */ 1864 now = gethrestime_sec(); 1865 mutex_enter(&nce->nce_lock); 1866 if ((defs = nce->nce_defense_count) > 0 && 1867 now - nce->nce_defense_time > ipst->ips_ip_defend_interval) { 1868 nce->nce_defense_count = defs = 0; 1869 } 1870 nce->nce_defense_count++; 1871 nce->nce_defense_time = now; 1872 mutex_exit(&nce->nce_lock); 1873 ipif_refrele(ipif); 1874 1875 /* 1876 * If we've defended ourselves too many times already, then give up and 1877 * tear down the interface(s) using this address. Otherwise, defend by 1878 * sending out an unsolicited Neighbor Advertisement. 1879 */ 1880 if (defs >= maxdefense) { 1881 ip_ndp_failure(ill, mp, dl_mp); 1882 } else { 1883 char hbuf[MAC_STR_LEN]; 1884 char sbuf[INET6_ADDRSTRLEN]; 1885 uchar_t *haddr; 1886 uint_t haddrlen; 1887 in6_addr_t targ; 1888 1889 ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen); 1890 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1891 mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)), 1892 inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)), 1893 ill->ill_name); 1894 1895 (void) nce_xmit_advert(nce, B_FALSE, &ipv6_all_hosts_mcast, 0); 1896 } 1897 } 1898 1899 static void 1900 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1901 { 1902 nd_neighbor_solicit_t *ns; 1903 uint32_t hlen = ill->ill_nd_lla_len; 1904 uchar_t *haddr = NULL; 1905 icmp6_t *icmp_nd; 1906 ip6_t *ip6h; 1907 nce_t *our_nce = NULL; 1908 in6_addr_t target; 1909 in6_addr_t src; 1910 int len; 1911 int flag = 0; 1912 nd_opt_hdr_t *opt = NULL; 1913 boolean_t bad_solicit = B_FALSE; 1914 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1915 1916 ip6h = (ip6_t *)mp->b_rptr; 1917 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1918 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1919 src = ip6h->ip6_src; 1920 ns = (nd_neighbor_solicit_t *)icmp_nd; 1921 target = ns->nd_ns_target; 1922 if (IN6_IS_ADDR_MULTICAST(&target)) { 1923 if (ip_debug > 2) { 1924 /* ip1dbg */ 1925 pr_addr_dbg("ndp_input_solicit: Target is" 1926 " multicast! %s\n", AF_INET6, &target); 1927 } 1928 bad_solicit = B_TRUE; 1929 goto done; 1930 } 1931 if (len > sizeof (nd_neighbor_solicit_t)) { 1932 /* Options present */ 1933 opt = (nd_opt_hdr_t *)&ns[1]; 1934 len -= sizeof (nd_neighbor_solicit_t); 1935 if (!ndp_verify_optlen(opt, len)) { 1936 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1937 bad_solicit = B_TRUE; 1938 goto done; 1939 } 1940 1941 } 1942 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1943 /* Check to see if this is a valid DAD solicitation */ 1944 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1945 if (ip_debug > 2) { 1946 /* ip1dbg */ 1947 pr_addr_dbg("ndp_input_solicit: IPv6 " 1948 "Destination is not solicited node " 1949 "multicast %s\n", AF_INET6, 1950 &ip6h->ip6_dst); 1951 } 1952 bad_solicit = B_TRUE; 1953 goto done; 1954 } 1955 } 1956 1957 /* 1958 * NOTE: with IPMP, it's possible the nominated multicast ill (which 1959 * received this packet if it's multicast) is not the ill tied to 1960 * e.g. the IPMP ill's data link-local. So we match across the illgrp 1961 * to ensure we find the associated NCE. 1962 */ 1963 our_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE); 1964 /* 1965 * If this is a valid Solicitation, a permanent 1966 * entry should exist in the cache 1967 */ 1968 if (our_nce == NULL || 1969 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1970 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1971 "ifname=%s ", ill->ill_name)); 1972 if (ip_debug > 2) { 1973 /* ip1dbg */ 1974 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1975 } 1976 bad_solicit = B_TRUE; 1977 goto done; 1978 } 1979 1980 /* At this point we should have a verified NS per spec */ 1981 if (opt != NULL) { 1982 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1983 if (opt != NULL) { 1984 haddr = (uchar_t *)&opt[1]; 1985 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1986 hlen == 0) { 1987 ip1dbg(("ndp_input_solicit: bad SLLA\n")); 1988 bad_solicit = B_TRUE; 1989 goto done; 1990 } 1991 } 1992 } 1993 1994 /* If sending directly to peer, set the unicast flag */ 1995 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 1996 flag |= NDP_UNICAST; 1997 1998 /* 1999 * Create/update the entry for the soliciting node. 2000 * or respond to outstanding queries, don't if 2001 * the source is unspecified address. 2002 */ 2003 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 2004 int err; 2005 nce_t *nnce; 2006 2007 ASSERT(ill->ill_isv6); 2008 /* 2009 * Regular solicitations *must* include the Source Link-Layer 2010 * Address option. Ignore messages that do not. 2011 */ 2012 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 2013 ip1dbg(("ndp_input_solicit: source link-layer address " 2014 "option missing with a specified source.\n")); 2015 bad_solicit = B_TRUE; 2016 goto done; 2017 } 2018 2019 /* 2020 * This is a regular solicitation. If we're still in the 2021 * process of verifying the address, then don't respond at all 2022 * and don't keep track of the sender. 2023 */ 2024 if (our_nce->nce_state == ND_PROBE) 2025 goto done; 2026 2027 /* 2028 * If the solicitation doesn't have sender hardware address 2029 * (legal for unicast solicitation), then process without 2030 * installing the return NCE. Either we already know it, or 2031 * we'll be forced to look it up when (and if) we reply to the 2032 * packet. 2033 */ 2034 if (haddr == NULL) 2035 goto no_source; 2036 2037 err = ndp_lookup_then_add_v6(ill, 2038 B_FALSE, 2039 haddr, 2040 &src, /* Soliciting nodes address */ 2041 &ipv6_all_ones, 2042 &ipv6_all_zeros, 2043 0, 2044 0, 2045 ND_STALE, 2046 &nnce); 2047 switch (err) { 2048 case 0: 2049 /* done with this entry */ 2050 NCE_REFRELE(nnce); 2051 break; 2052 case EEXIST: 2053 /* 2054 * B_FALSE indicates this is not an an advertisement. 2055 */ 2056 ndp_process(nnce, haddr, 0, B_FALSE); 2057 NCE_REFRELE(nnce); 2058 break; 2059 default: 2060 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 2061 err)); 2062 goto done; 2063 } 2064 no_source: 2065 flag |= NDP_SOLICITED; 2066 } else { 2067 /* 2068 * No source link layer address option should be present in a 2069 * valid DAD request. 2070 */ 2071 if (haddr != NULL) { 2072 ip1dbg(("ndp_input_solicit: source link-layer address " 2073 "option present with an unspecified source.\n")); 2074 bad_solicit = B_TRUE; 2075 goto done; 2076 } 2077 if (our_nce->nce_state == ND_PROBE) { 2078 /* 2079 * Internally looped-back probes won't have DLPI 2080 * attached to them. External ones (which are sent by 2081 * multicast) always will. Just ignore our own 2082 * transmissions. 2083 */ 2084 if (dl_mp != NULL) { 2085 /* 2086 * If someone else is probing our address, then 2087 * we've crossed wires. Declare failure. 2088 */ 2089 ip_ndp_failure(ill, mp, dl_mp); 2090 } 2091 goto done; 2092 } 2093 /* 2094 * This is a DAD probe. Multicast the advertisement to the 2095 * all-nodes address. 2096 */ 2097 src = ipv6_all_hosts_mcast; 2098 } 2099 /* Response to a solicitation */ 2100 (void) nce_xmit_advert(our_nce, B_TRUE, &src, flag); 2101 done: 2102 if (bad_solicit) 2103 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2104 if (our_nce != NULL) 2105 NCE_REFRELE(our_nce); 2106 } 2107 2108 void 2109 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2110 { 2111 nd_neighbor_advert_t *na; 2112 uint32_t hlen = ill->ill_nd_lla_len; 2113 uchar_t *haddr = NULL; 2114 icmp6_t *icmp_nd; 2115 ip6_t *ip6h; 2116 nce_t *dst_nce = NULL; 2117 in6_addr_t target; 2118 nd_opt_hdr_t *opt = NULL; 2119 int len; 2120 ip_stack_t *ipst = ill->ill_ipst; 2121 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2122 2123 ip6h = (ip6_t *)mp->b_rptr; 2124 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2125 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2126 na = (nd_neighbor_advert_t *)icmp_nd; 2127 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2128 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2129 ip1dbg(("ndp_input_advert: Target is multicast but the " 2130 "solicited flag is not zero\n")); 2131 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2132 return; 2133 } 2134 target = na->nd_na_target; 2135 if (IN6_IS_ADDR_MULTICAST(&target)) { 2136 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2137 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2138 return; 2139 } 2140 if (len > sizeof (nd_neighbor_advert_t)) { 2141 opt = (nd_opt_hdr_t *)&na[1]; 2142 if (!ndp_verify_optlen(opt, 2143 len - sizeof (nd_neighbor_advert_t))) { 2144 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2145 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2146 return; 2147 } 2148 /* At this point we have a verified NA per spec */ 2149 len -= sizeof (nd_neighbor_advert_t); 2150 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2151 if (opt != NULL) { 2152 haddr = (uchar_t *)&opt[1]; 2153 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2154 hlen == 0) { 2155 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2156 BUMP_MIB(mib, 2157 ipv6IfIcmpInBadNeighborAdvertisements); 2158 return; 2159 } 2160 } 2161 } 2162 2163 /* 2164 * NOTE: we match across the illgrp since we need to do DAD for all of 2165 * our local addresses, and those are spread across all the active 2166 * ills in the group. 2167 */ 2168 if ((dst_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE)) == NULL) 2169 return; 2170 2171 if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2172 /* 2173 * Someone just advertised one of our local addresses. First, 2174 * check it it was us -- if so, we can safely ignore it. 2175 */ 2176 if (haddr != NULL) { 2177 if (!nce_cmp_ll_addr(dst_nce, haddr, hlen)) 2178 goto out; /* from us -- no conflict */ 2179 2180 /* 2181 * If we're in an IPMP group, check if this is an echo 2182 * from another ill in the group. Use the double- 2183 * checked locking pattern to avoid grabbing 2184 * ill_g_lock in the non-IPMP case. 2185 */ 2186 if (IS_UNDER_IPMP(ill)) { 2187 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2188 if (IS_UNDER_IPMP(ill) && ipmp_illgrp_find_ill( 2189 ill->ill_grp, haddr, hlen) != NULL) { 2190 rw_exit(&ipst->ips_ill_g_lock); 2191 goto out; 2192 } 2193 rw_exit(&ipst->ips_ill_g_lock); 2194 } 2195 } 2196 2197 /* 2198 * Our own (looped-back) unsolicited neighbor advertisements 2199 * will get here with dl_mp == NULL. (These will usually be 2200 * filtered by the `haddr' checks above, but point-to-point 2201 * links have no hardware address and thus make it here.) 2202 */ 2203 if (dl_mp == NULL && dst_nce->nce_state != ND_PROBE) 2204 goto out; 2205 2206 /* 2207 * This appears to be a real conflict. If we're trying to 2208 * configure this NCE (ND_PROBE), then shut it down. 2209 * Otherwise, handle the discovered conflict. 2210 * 2211 * In the ND_PROBE case, dl_mp might be NULL if we're getting 2212 * a unicast reply. This isn't typically done (multicast is 2213 * the norm in response to a probe), but we can handle it. 2214 */ 2215 if (dst_nce->nce_state == ND_PROBE) 2216 ip_ndp_failure(ill, mp, dl_mp); 2217 else 2218 ip_ndp_conflict(ill, mp, dl_mp, dst_nce); 2219 } else { 2220 if (na->nd_na_flags_reserved & ND_NA_FLAG_ROUTER) 2221 dst_nce->nce_flags |= NCE_F_ISROUTER; 2222 2223 /* B_TRUE indicates this an advertisement */ 2224 ndp_process(dst_nce, haddr, na->nd_na_flags_reserved, B_TRUE); 2225 } 2226 out: 2227 NCE_REFRELE(dst_nce); 2228 } 2229 2230 /* 2231 * Process NDP neighbor solicitation/advertisement messages. 2232 * The checksum has already checked o.k before reaching here. 2233 */ 2234 void 2235 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2236 { 2237 icmp6_t *icmp_nd; 2238 ip6_t *ip6h; 2239 int len; 2240 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2241 2242 2243 if (!pullupmsg(mp, -1)) { 2244 ip1dbg(("ndp_input: pullupmsg failed\n")); 2245 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2246 goto done; 2247 } 2248 ip6h = (ip6_t *)mp->b_rptr; 2249 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2250 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2251 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2252 goto done; 2253 } 2254 /* 2255 * NDP does not accept any extension headers between the 2256 * IP header and the ICMP header since e.g. a routing 2257 * header could be dangerous. 2258 * This assumes that any AH or ESP headers are removed 2259 * by ip prior to passing the packet to ndp_input. 2260 */ 2261 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2262 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2263 ip6h->ip6_nxt)); 2264 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2265 goto done; 2266 } 2267 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2268 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2269 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2270 if (icmp_nd->icmp6_code != 0) { 2271 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2272 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2273 goto done; 2274 } 2275 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2276 /* 2277 * Make sure packet length is large enough for either 2278 * a NS or a NA icmp packet. 2279 */ 2280 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2281 ip1dbg(("ndp_input: packet too short\n")); 2282 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2283 goto done; 2284 } 2285 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2286 ndp_input_solicit(ill, mp, dl_mp); 2287 } else { 2288 ndp_input_advert(ill, mp, dl_mp); 2289 } 2290 done: 2291 freemsg(mp); 2292 } 2293 2294 /* 2295 * Utility routine to send an advertisement. Assumes that the NCE cannot 2296 * go away (e.g., because it's refheld). 2297 */ 2298 static boolean_t 2299 nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *target, 2300 uint_t flags) 2301 { 2302 ASSERT((flags & NDP_PROBE) == 0); 2303 2304 if (nce->nce_flags & NCE_F_ISROUTER) 2305 flags |= NDP_ISROUTER; 2306 if (!(nce->nce_flags & NCE_F_ANYCAST)) 2307 flags |= NDP_ORIDE; 2308 2309 return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_ADVERT, use_nd_lla, 2310 &nce->nce_addr, target, flags)); 2311 } 2312 2313 /* 2314 * Utility routine to send a solicitation. Assumes that the NCE cannot 2315 * go away (e.g., because it's refheld). 2316 */ 2317 static boolean_t 2318 nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *sender, 2319 uint_t flags) 2320 { 2321 if (flags & NDP_PROBE) 2322 sender = &ipv6_all_zeros; 2323 2324 return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, use_nd_lla, 2325 sender, &nce->nce_addr, flags)); 2326 } 2327 2328 /* 2329 * nce_xmit is called to form and transmit a ND solicitation or 2330 * advertisement ICMP packet. 2331 * 2332 * If the source address is unspecified and this isn't a probe (used for 2333 * duplicate address detection), an appropriate source address and link layer 2334 * address will be chosen here. The link layer address option is included if 2335 * the source is specified (i.e., all non-probe packets), and omitted (per the 2336 * specification) otherwise. 2337 * 2338 * It returns B_FALSE only if it does a successful put() to the 2339 * corresponding ill's ill_wq otherwise returns B_TRUE. 2340 */ 2341 static boolean_t 2342 nce_xmit(ill_t *ill, uint8_t type, boolean_t use_nd_lla, 2343 const in6_addr_t *sender, const in6_addr_t *target, int flag) 2344 { 2345 ill_t *hwaddr_ill; 2346 uint32_t len; 2347 icmp6_t *icmp6; 2348 mblk_t *mp; 2349 ip6_t *ip6h; 2350 nd_opt_hdr_t *opt; 2351 uint_t plen, maxplen; 2352 ip6i_t *ip6i; 2353 ipif_t *src_ipif = NULL; 2354 uint8_t *hw_addr; 2355 zoneid_t zoneid = GLOBAL_ZONEID; 2356 char buf[INET6_ADDRSTRLEN]; 2357 2358 ASSERT(!IS_IPMP(ill)); 2359 2360 /* 2361 * Check that the sender is actually a usable address on `ill', and if 2362 * so, track that as the src_ipif. If not, for solicitations, set the 2363 * sender to :: so that a new one will be picked below; for adverts, 2364 * drop the packet since we expect nce_xmit_advert() to always provide 2365 * a valid sender. 2366 */ 2367 if (!IN6_IS_ADDR_UNSPECIFIED(sender)) { 2368 if ((src_ipif = ip_ndp_lookup_addr_v6(sender, ill)) == NULL || 2369 !src_ipif->ipif_addr_ready) { 2370 if (src_ipif != NULL) { 2371 ipif_refrele(src_ipif); 2372 src_ipif = NULL; 2373 } 2374 if (type == ND_NEIGHBOR_ADVERT) { 2375 ip1dbg(("nce_xmit: No source ipif for src %s\n", 2376 inet_ntop(AF_INET6, sender, buf, 2377 sizeof (buf)))); 2378 return (B_TRUE); 2379 } 2380 sender = &ipv6_all_zeros; 2381 } 2382 } 2383 2384 /* 2385 * If we still have an unspecified source (sender) address and this 2386 * isn't a probe, select a source address from `ill'. 2387 */ 2388 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2389 ASSERT(type != ND_NEIGHBOR_ADVERT); 2390 /* 2391 * Pick a source address for this solicitation, but restrict 2392 * the selection to addresses assigned to the output 2393 * interface. We do this because the destination will create 2394 * a neighbor cache entry for the source address of this 2395 * packet, so the source address needs to be a valid neighbor. 2396 */ 2397 src_ipif = ipif_select_source_v6(ill, target, B_TRUE, 2398 IPV6_PREFER_SRC_DEFAULT, ALL_ZONES); 2399 if (src_ipif == NULL) { 2400 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2401 inet_ntop(AF_INET6, target, buf, sizeof (buf)))); 2402 return (B_TRUE); 2403 } 2404 sender = &src_ipif->ipif_v6src_addr; 2405 } 2406 2407 /* 2408 * We're either sending a probe or we have a source address. 2409 */ 2410 ASSERT((flag & NDP_PROBE) || src_ipif != NULL); 2411 2412 maxplen = roundup(sizeof (nd_opt_hdr_t) + ND_MAX_HDW_LEN, 8); 2413 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2414 maxplen; 2415 mp = allocb(len, BPRI_LO); 2416 if (mp == NULL) { 2417 if (src_ipif != NULL) 2418 ipif_refrele(src_ipif); 2419 return (B_TRUE); 2420 } 2421 bzero((char *)mp->b_rptr, len); 2422 mp->b_wptr = mp->b_rptr + len; 2423 2424 ip6i = (ip6i_t *)mp->b_rptr; 2425 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2426 ip6i->ip6i_nxt = IPPROTO_RAW; 2427 ip6i->ip6i_flags = IP6I_HOPLIMIT; 2428 if (flag & NDP_PROBE) 2429 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2430 2431 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2432 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2433 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2434 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2435 ip6h->ip6_hops = IPV6_MAX_HOPS; 2436 ip6h->ip6_src = *sender; 2437 ip6h->ip6_dst = *target; 2438 icmp6 = (icmp6_t *)&ip6h[1]; 2439 2440 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2441 sizeof (nd_neighbor_advert_t)); 2442 2443 if (type == ND_NEIGHBOR_SOLICIT) { 2444 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2445 2446 if (!(flag & NDP_PROBE)) 2447 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2448 ns->nd_ns_target = *target; 2449 if (!(flag & NDP_UNICAST)) { 2450 /* Form multicast address of the target */ 2451 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2452 ip6h->ip6_dst.s6_addr32[3] |= 2453 ns->nd_ns_target.s6_addr32[3]; 2454 } 2455 } else { 2456 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2457 2458 ASSERT(!(flag & NDP_PROBE)); 2459 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2460 na->nd_na_target = *sender; 2461 if (flag & NDP_ISROUTER) 2462 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2463 if (flag & NDP_SOLICITED) 2464 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2465 if (flag & NDP_ORIDE) 2466 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2467 } 2468 2469 hw_addr = NULL; 2470 if (!(flag & NDP_PROBE)) { 2471 /* 2472 * Use our source address to find the hardware address to put 2473 * in the packet, so that the hardware address and IP address 2474 * will match up -- even if that hardware address doesn't 2475 * match the ill we actually transmit the packet through. 2476 */ 2477 if (IS_IPMP(src_ipif->ipif_ill)) { 2478 hwaddr_ill = ipmp_ipif_hold_bound_ill(src_ipif); 2479 if (hwaddr_ill == NULL) { 2480 ip1dbg(("nce_xmit: no bound ill!\n")); 2481 ipif_refrele(src_ipif); 2482 freemsg(mp); 2483 return (B_TRUE); 2484 } 2485 } else { 2486 hwaddr_ill = src_ipif->ipif_ill; 2487 ill_refhold(hwaddr_ill); /* for symmetry */ 2488 } 2489 2490 plen = roundup(sizeof (nd_opt_hdr_t) + 2491 hwaddr_ill->ill_nd_lla_len, 8); 2492 2493 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2494 hwaddr_ill->ill_phys_addr; 2495 if (hw_addr != NULL) { 2496 /* Fill in link layer address and option len */ 2497 opt->nd_opt_len = (uint8_t)(plen / 8); 2498 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2499 } 2500 2501 ill_refrele(hwaddr_ill); 2502 } 2503 2504 if (hw_addr == NULL) 2505 plen = 0; 2506 2507 /* Fix up the length of the packet now that plen is known */ 2508 len -= (maxplen - plen); 2509 mp->b_wptr = mp->b_rptr + len; 2510 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2511 2512 icmp6->icmp6_type = type; 2513 icmp6->icmp6_code = 0; 2514 /* 2515 * Prepare for checksum by putting icmp length in the icmp 2516 * checksum field. The checksum is calculated in ip_wput_v6. 2517 */ 2518 icmp6->icmp6_cksum = ip6h->ip6_plen; 2519 2520 /* 2521 * Before we toss the src_ipif, look up the zoneid to pass to 2522 * ip_output_v6(). This is to ensure unicast ND_NEIGHBOR_ADVERT 2523 * packets to be routed correctly by IP (we cannot guarantee that the 2524 * global zone has an interface route to the destination). 2525 */ 2526 if (src_ipif != NULL) { 2527 if ((zoneid = src_ipif->ipif_zoneid) == ALL_ZONES) 2528 zoneid = GLOBAL_ZONEID; 2529 ipif_refrele(src_ipif); 2530 } 2531 2532 ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT); 2533 return (B_FALSE); 2534 } 2535 2536 /* 2537 * Make a link layer address (does not include the SAP) from an nce. 2538 * To form the link layer address, use the last four bytes of ipv6 2539 * address passed in and the fixed offset stored in nce. 2540 */ 2541 static void 2542 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2543 { 2544 uchar_t *mask, *to; 2545 ill_t *ill = nce->nce_ill; 2546 int len; 2547 2548 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2549 return; 2550 ASSERT(nce->nce_res_mp != NULL); 2551 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2552 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2553 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2554 ASSERT(addr != NULL); 2555 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2556 addrpos, ill->ill_nd_lla_len); 2557 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2558 IPV6_ADDR_LEN); 2559 mask = (uchar_t *)&nce->nce_extract_mask; 2560 mask += (IPV6_ADDR_LEN - len); 2561 addr += (IPV6_ADDR_LEN - len); 2562 to = addrpos + nce->nce_ll_extract_start; 2563 while (len-- > 0) 2564 *to++ |= *mask++ & *addr++; 2565 } 2566 2567 mblk_t * 2568 nce_udreq_alloc(ill_t *ill) 2569 { 2570 mblk_t *template_mp = NULL; 2571 dl_unitdata_req_t *dlur; 2572 int sap_length; 2573 2574 ASSERT(ill->ill_isv6); 2575 2576 sap_length = ill->ill_sap_length; 2577 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2578 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2579 if (template_mp == NULL) 2580 return (NULL); 2581 2582 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2583 dlur->dl_priority.dl_min = 0; 2584 dlur->dl_priority.dl_max = 0; 2585 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2586 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2587 2588 /* Copy in the SAP value. */ 2589 NCE_LL_SAP_COPY(ill, template_mp); 2590 2591 return (template_mp); 2592 } 2593 2594 /* 2595 * NDP retransmit timer. 2596 * This timer goes off when: 2597 * a. It is time to retransmit NS for resolver. 2598 * b. It is time to send reachability probes. 2599 */ 2600 void 2601 ndp_timer(void *arg) 2602 { 2603 nce_t *nce = arg; 2604 ill_t *ill = nce->nce_ill; 2605 char addrbuf[INET6_ADDRSTRLEN]; 2606 boolean_t dropped = B_FALSE; 2607 ip_stack_t *ipst = ill->ill_ipst; 2608 2609 /* 2610 * The timer has to be cancelled by ndp_delete before doing the final 2611 * refrele. So the NCE is guaranteed to exist when the timer runs 2612 * until it clears the timeout_id. Before clearing the timeout_id 2613 * bump up the refcnt so that we can continue to use the nce 2614 */ 2615 ASSERT(nce != NULL); 2616 2617 mutex_enter(&nce->nce_lock); 2618 NCE_REFHOLD_LOCKED(nce); 2619 nce->nce_timeout_id = 0; 2620 2621 /* 2622 * Check the reachability state first. 2623 */ 2624 switch (nce->nce_state) { 2625 case ND_DELAY: 2626 nce->nce_state = ND_PROBE; 2627 mutex_exit(&nce->nce_lock); 2628 (void) nce_xmit_solicit(nce, B_FALSE, &ipv6_all_zeros, 2629 NDP_UNICAST); 2630 if (ip_debug > 3) { 2631 /* ip2dbg */ 2632 pr_addr_dbg("ndp_timer: state for %s changed " 2633 "to PROBE\n", AF_INET6, &nce->nce_addr); 2634 } 2635 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2636 NCE_REFRELE(nce); 2637 return; 2638 case ND_PROBE: 2639 /* must be retransmit timer */ 2640 nce->nce_pcnt--; 2641 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2642 nce->nce_pcnt >= -1); 2643 if (nce->nce_pcnt > 0) { 2644 /* 2645 * As per RFC2461, the nce gets deleted after 2646 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2647 * Note that the first unicast solicitation is sent 2648 * during the DELAY state. 2649 */ 2650 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2651 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2652 addrbuf, sizeof (addrbuf)))); 2653 mutex_exit(&nce->nce_lock); 2654 dropped = nce_xmit_solicit(nce, B_FALSE, 2655 &ipv6_all_zeros, 2656 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2657 NDP_UNICAST); 2658 if (dropped) { 2659 mutex_enter(&nce->nce_lock); 2660 nce->nce_pcnt++; 2661 mutex_exit(&nce->nce_lock); 2662 } 2663 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2664 } else if (nce->nce_pcnt < 0) { 2665 /* No hope, delete the nce */ 2666 nce->nce_state = ND_UNREACHABLE; 2667 mutex_exit(&nce->nce_lock); 2668 if (ip_debug > 2) { 2669 /* ip1dbg */ 2670 pr_addr_dbg("ndp_timer: Delete IRE for" 2671 " dst %s\n", AF_INET6, &nce->nce_addr); 2672 } 2673 ndp_delete(nce); 2674 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2675 /* Wait RetransTimer, before deleting the entry */ 2676 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2677 nce->nce_pcnt, inet_ntop(AF_INET6, 2678 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2679 mutex_exit(&nce->nce_lock); 2680 /* Wait one interval before killing */ 2681 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2682 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2683 ipif_t *ipif; 2684 2685 /* 2686 * We're done probing, and we can now declare this 2687 * address to be usable. Let IP know that it's ok to 2688 * use. 2689 */ 2690 nce->nce_state = ND_REACHABLE; 2691 mutex_exit(&nce->nce_lock); 2692 ipif = ip_ndp_lookup_addr_v6(&nce->nce_addr, 2693 nce->nce_ill); 2694 if (ipif != NULL) { 2695 if (ipif->ipif_was_dup) { 2696 char ibuf[LIFNAMSIZ + 10]; 2697 char sbuf[INET6_ADDRSTRLEN]; 2698 2699 ipif->ipif_was_dup = B_FALSE; 2700 (void) inet_ntop(AF_INET6, 2701 &ipif->ipif_v6lcl_addr, 2702 sbuf, sizeof (sbuf)); 2703 ipif_get_name(ipif, ibuf, 2704 sizeof (ibuf)); 2705 cmn_err(CE_NOTE, "recovered address " 2706 "%s on %s", sbuf, ibuf); 2707 } 2708 if ((ipif->ipif_flags & IPIF_UP) && 2709 !ipif->ipif_addr_ready) 2710 ipif_up_notify(ipif); 2711 ipif->ipif_addr_ready = 1; 2712 ipif_refrele(ipif); 2713 } 2714 /* Begin defending our new address */ 2715 nce->nce_unsolicit_count = 0; 2716 dropped = nce_xmit_advert(nce, B_FALSE, 2717 &ipv6_all_hosts_mcast, 0); 2718 if (dropped) { 2719 nce->nce_unsolicit_count = 1; 2720 NDP_RESTART_TIMER(nce, 2721 ipst->ips_ip_ndp_unsolicit_interval); 2722 } else if (ipst->ips_ip_ndp_defense_interval != 0) { 2723 NDP_RESTART_TIMER(nce, 2724 ipst->ips_ip_ndp_defense_interval); 2725 } 2726 } else { 2727 /* 2728 * This is an address we're probing to be our own, but 2729 * the ill is down. Wait until it comes back before 2730 * doing anything, but switch to reachable state so 2731 * that the restart will work. 2732 */ 2733 nce->nce_state = ND_REACHABLE; 2734 mutex_exit(&nce->nce_lock); 2735 } 2736 NCE_REFRELE(nce); 2737 return; 2738 case ND_INCOMPLETE: { 2739 ip6_t *ip6h; 2740 ip6i_t *ip6i; 2741 mblk_t *mp, *datamp, *nextmp, **prevmpp; 2742 2743 /* 2744 * Per case (2) in the nce_queue_mp() comments, scan nce_qd_mp 2745 * for any IPMP probe packets, and toss 'em. IPMP probe 2746 * packets will always be at the head of nce_qd_mp and always 2747 * have an ip6i_t header, so we can stop at the first queued 2748 * ND packet without an ip6i_t. 2749 */ 2750 prevmpp = &nce->nce_qd_mp; 2751 for (mp = nce->nce_qd_mp; mp != NULL; mp = nextmp) { 2752 nextmp = mp->b_next; 2753 datamp = (DB_TYPE(mp) == M_CTL) ? mp->b_cont : mp; 2754 ip6h = (ip6_t *)datamp->b_rptr; 2755 if (ip6h->ip6_nxt != IPPROTO_RAW) 2756 break; 2757 2758 ip6i = (ip6i_t *)ip6h; 2759 if (ip6i->ip6i_flags & IP6I_IPMP_PROBE) { 2760 inet_freemsg(mp); 2761 *prevmpp = nextmp; 2762 } else { 2763 prevmpp = &mp->b_next; 2764 } 2765 } 2766 ip_ndp_resolve(nce); 2767 mutex_exit(&nce->nce_lock); 2768 NCE_REFRELE(nce); 2769 break; 2770 } 2771 case ND_REACHABLE: 2772 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2773 nce->nce_unsolicit_count != 0) || 2774 ((nce->nce_flags & NCE_F_PERMANENT) && 2775 ipst->ips_ip_ndp_defense_interval != 0)) { 2776 if (nce->nce_unsolicit_count > 0) 2777 nce->nce_unsolicit_count--; 2778 mutex_exit(&nce->nce_lock); 2779 dropped = nce_xmit_advert(nce, B_FALSE, 2780 &ipv6_all_hosts_mcast, 0); 2781 if (dropped) { 2782 mutex_enter(&nce->nce_lock); 2783 nce->nce_unsolicit_count++; 2784 mutex_exit(&nce->nce_lock); 2785 } 2786 if (nce->nce_unsolicit_count != 0) { 2787 NDP_RESTART_TIMER(nce, 2788 ipst->ips_ip_ndp_unsolicit_interval); 2789 } else { 2790 NDP_RESTART_TIMER(nce, 2791 ipst->ips_ip_ndp_defense_interval); 2792 } 2793 } else { 2794 mutex_exit(&nce->nce_lock); 2795 } 2796 NCE_REFRELE(nce); 2797 break; 2798 default: 2799 mutex_exit(&nce->nce_lock); 2800 NCE_REFRELE(nce); 2801 break; 2802 } 2803 } 2804 2805 /* 2806 * Set a link layer address from the ll_addr passed in. 2807 * Copy SAP from ill. 2808 */ 2809 static void 2810 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2811 { 2812 ill_t *ill = nce->nce_ill; 2813 uchar_t *woffset; 2814 2815 ASSERT(ll_addr != NULL); 2816 /* Always called before fast_path_probe */ 2817 ASSERT(nce->nce_fp_mp == NULL); 2818 if (ill->ill_sap_length != 0) { 2819 /* 2820 * Copy the SAP type specified in the 2821 * request into the xmit template. 2822 */ 2823 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2824 } 2825 if (ill->ill_phys_addr_length > 0) { 2826 /* 2827 * The bcopy() below used to be called for the physical address 2828 * length rather than the link layer address length. For 2829 * ethernet and many other media, the phys_addr and lla are 2830 * identical. 2831 * However, with xresolv interfaces being introduced, the 2832 * phys_addr and lla are no longer the same, and the physical 2833 * address may not have any useful meaning, so we use the lla 2834 * for IPv6 address resolution and destination addressing. 2835 * 2836 * For PPP or other interfaces with a zero length 2837 * physical address, don't do anything here. 2838 * The bcopy() with a zero phys_addr length was previously 2839 * a no-op for interfaces with a zero-length physical address. 2840 * Using the lla for them would change the way they operate. 2841 * Doing nothing in such cases preserves expected behavior. 2842 */ 2843 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2844 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2845 } 2846 } 2847 2848 static boolean_t 2849 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2850 { 2851 ill_t *ill = nce->nce_ill; 2852 uchar_t *ll_offset; 2853 2854 ASSERT(nce->nce_res_mp != NULL); 2855 if (ll_addr == NULL) 2856 return (B_FALSE); 2857 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2858 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2859 return (B_TRUE); 2860 return (B_FALSE); 2861 } 2862 2863 /* 2864 * Updates the link layer address or the reachability state of 2865 * a cache entry. Reset probe counter if needed. 2866 */ 2867 static void 2868 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2869 { 2870 ill_t *ill = nce->nce_ill; 2871 boolean_t need_stop_timer = B_FALSE; 2872 boolean_t need_fastpath_update = B_FALSE; 2873 2874 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2875 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2876 /* 2877 * If this interface does not do NUD, there is no point 2878 * in allowing an update to the cache entry. Although 2879 * we will respond to NS. 2880 * The only time we accept an update for a resolver when 2881 * NUD is turned off is when it has just been created. 2882 * Non-Resolvers will always be created as REACHABLE. 2883 */ 2884 if (new_state != ND_UNCHANGED) { 2885 if ((nce->nce_flags & NCE_F_NONUD) && 2886 (nce->nce_state != ND_INCOMPLETE)) 2887 return; 2888 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2889 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2890 need_stop_timer = B_TRUE; 2891 if (new_state == ND_REACHABLE) 2892 nce->nce_last = TICK_TO_MSEC(lbolt64); 2893 else { 2894 /* We force NUD in this case */ 2895 nce->nce_last = 0; 2896 } 2897 nce->nce_state = new_state; 2898 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2899 } 2900 /* 2901 * In case of fast path we need to free the the fastpath 2902 * M_DATA and do another probe. Otherwise we can just 2903 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2904 * whatever packets that happens to be transmitting at the time. 2905 */ 2906 if (new_ll_addr != NULL) { 2907 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2908 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2909 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2910 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2911 if (nce->nce_fp_mp != NULL) { 2912 freemsg(nce->nce_fp_mp); 2913 nce->nce_fp_mp = NULL; 2914 } 2915 need_fastpath_update = B_TRUE; 2916 } 2917 mutex_exit(&nce->nce_lock); 2918 if (need_stop_timer) { 2919 (void) untimeout(nce->nce_timeout_id); 2920 nce->nce_timeout_id = 0; 2921 } 2922 if (need_fastpath_update) 2923 nce_fastpath(nce); 2924 mutex_enter(&nce->nce_lock); 2925 } 2926 2927 void 2928 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2929 { 2930 uint_t count = 0; 2931 mblk_t **mpp, *tmp; 2932 2933 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2934 2935 for (mpp = &nce->nce_qd_mp; *mpp != NULL; mpp = &(*mpp)->b_next) { 2936 if (++count > nce->nce_ill->ill_max_buf) { 2937 tmp = nce->nce_qd_mp->b_next; 2938 nce->nce_qd_mp->b_next = NULL; 2939 nce->nce_qd_mp->b_prev = NULL; 2940 freemsg(nce->nce_qd_mp); 2941 nce->nce_qd_mp = tmp; 2942 } 2943 } 2944 2945 if (head_insert) { 2946 mp->b_next = nce->nce_qd_mp; 2947 nce->nce_qd_mp = mp; 2948 } else { 2949 *mpp = mp; 2950 } 2951 } 2952 2953 static void 2954 nce_queue_mp(nce_t *nce, mblk_t *mp) 2955 { 2956 boolean_t head_insert = B_FALSE; 2957 ip6_t *ip6h; 2958 ip6i_t *ip6i; 2959 mblk_t *data_mp; 2960 2961 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2962 2963 if (mp->b_datap->db_type == M_CTL) 2964 data_mp = mp->b_cont; 2965 else 2966 data_mp = mp; 2967 ip6h = (ip6_t *)data_mp->b_rptr; 2968 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2969 /* 2970 * This message should have been pulled up already in 2971 * ip_wput_v6. We can't do pullups here because the message 2972 * could be from the nce_qd_mp which could have b_next/b_prev 2973 * non-NULL. 2974 */ 2975 ip6i = (ip6i_t *)ip6h; 2976 ASSERT(MBLKL(data_mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN); 2977 2978 /* 2979 * If this packet is marked IP6I_IPMP_PROBE, then we need to: 2980 * 2981 * 1. Insert it at the head of the nce_qd_mp list. Consider 2982 * the normal (non-probe) load-speading case where the 2983 * source address of the ND packet is not tied to nce_ill. 2984 * If the ill bound to the source address cannot receive, 2985 * the response to the ND packet will not be received. 2986 * However, if ND packets for nce_ill's probes are queued 2987 * behind that ND packet, those probes will also fail to 2988 * be sent, and thus in.mpathd will erroneously conclude 2989 * that nce_ill has also failed. 2990 * 2991 * 2. Drop the probe packet in ndp_timer() if the ND did 2992 * not succeed on the first attempt. This ensures that 2993 * ND problems do not manifest as probe RTT spikes. 2994 */ 2995 if (ip6i->ip6i_flags & IP6I_IPMP_PROBE) 2996 head_insert = B_TRUE; 2997 } 2998 nce_queue_mp_common(nce, mp, head_insert); 2999 } 3000 3001 /* 3002 * Called when address resolution failed due to a timeout. 3003 * Send an ICMP unreachable in response to all queued packets. 3004 */ 3005 void 3006 nce_resolv_failed(nce_t *nce) 3007 { 3008 mblk_t *mp, *nxt_mp, *first_mp; 3009 char buf[INET6_ADDRSTRLEN]; 3010 ip6_t *ip6h; 3011 zoneid_t zoneid = GLOBAL_ZONEID; 3012 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3013 3014 ip1dbg(("nce_resolv_failed: dst %s\n", 3015 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 3016 mutex_enter(&nce->nce_lock); 3017 mp = nce->nce_qd_mp; 3018 nce->nce_qd_mp = NULL; 3019 mutex_exit(&nce->nce_lock); 3020 while (mp != NULL) { 3021 nxt_mp = mp->b_next; 3022 mp->b_next = NULL; 3023 mp->b_prev = NULL; 3024 3025 first_mp = mp; 3026 if (mp->b_datap->db_type == M_CTL) { 3027 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 3028 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3029 zoneid = io->ipsec_out_zoneid; 3030 ASSERT(zoneid != ALL_ZONES); 3031 mp = mp->b_cont; 3032 mp->b_next = NULL; 3033 mp->b_prev = NULL; 3034 } 3035 3036 ip6h = (ip6_t *)mp->b_rptr; 3037 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3038 ip6i_t *ip6i; 3039 /* 3040 * This message should have been pulled up already 3041 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 3042 * the header is pulled up. 3043 */ 3044 ip6i = (ip6i_t *)ip6h; 3045 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 3046 sizeof (ip6i_t) + IPV6_HDR_LEN); 3047 mp->b_rptr += sizeof (ip6i_t); 3048 } 3049 /* 3050 * Ignore failure since icmp_unreachable_v6 will silently 3051 * drop packets with an unspecified source address. 3052 */ 3053 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst); 3054 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 3055 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst); 3056 mp = nxt_mp; 3057 } 3058 nce_cb_dispatch(nce); 3059 } 3060 3061 /* 3062 * Called by SIOCSNDP* ioctl to add/change an nce entry 3063 * and the corresponding attributes. 3064 * Disallow states other than ND_REACHABLE or ND_STALE. 3065 */ 3066 int 3067 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 3068 { 3069 sin6_t *sin6; 3070 in6_addr_t *addr; 3071 nce_t *nce; 3072 int err; 3073 uint16_t new_flags = 0; 3074 uint16_t old_flags = 0; 3075 int inflags = lnr->lnr_flags; 3076 ip_stack_t *ipst = ill->ill_ipst; 3077 3078 ASSERT(ill->ill_isv6); 3079 if ((lnr->lnr_state_create != ND_REACHABLE) && 3080 (lnr->lnr_state_create != ND_STALE)) 3081 return (EINVAL); 3082 3083 if (lnr->lnr_hdw_len > ND_MAX_HDW_LEN) 3084 return (EINVAL); 3085 3086 sin6 = (sin6_t *)&lnr->lnr_addr; 3087 addr = &sin6->sin6_addr; 3088 3089 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 3090 /* We know it can not be mapping so just look in the hash table */ 3091 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 3092 /* See comment in ndp_query() regarding IS_IPMP(ill) usage */ 3093 nce = nce_lookup_addr(ill, IS_IPMP(ill), addr, nce); 3094 if (nce != NULL) 3095 new_flags = nce->nce_flags; 3096 3097 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 3098 case NDF_ISROUTER_ON: 3099 new_flags |= NCE_F_ISROUTER; 3100 break; 3101 case NDF_ISROUTER_OFF: 3102 new_flags &= ~NCE_F_ISROUTER; 3103 break; 3104 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3105 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3106 if (nce != NULL) 3107 NCE_REFRELE(nce); 3108 return (EINVAL); 3109 } 3110 3111 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3112 case NDF_ANYCAST_ON: 3113 new_flags |= NCE_F_ANYCAST; 3114 break; 3115 case NDF_ANYCAST_OFF: 3116 new_flags &= ~NCE_F_ANYCAST; 3117 break; 3118 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3119 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3120 if (nce != NULL) 3121 NCE_REFRELE(nce); 3122 return (EINVAL); 3123 } 3124 3125 if (nce == NULL) { 3126 err = ndp_add_v6(ill, 3127 (uchar_t *)lnr->lnr_hdw_addr, 3128 addr, 3129 &ipv6_all_ones, 3130 &ipv6_all_zeros, 3131 0, 3132 new_flags, 3133 lnr->lnr_state_create, 3134 &nce); 3135 if (err != 0) { 3136 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3137 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3138 return (err); 3139 } 3140 } 3141 old_flags = nce->nce_flags; 3142 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3143 /* 3144 * Router turned to host, delete all ires. 3145 * XXX Just delete the entry, but we need to add too. 3146 */ 3147 nce->nce_flags &= ~NCE_F_ISROUTER; 3148 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3149 ndp_delete(nce); 3150 NCE_REFRELE(nce); 3151 return (0); 3152 } 3153 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3154 3155 mutex_enter(&nce->nce_lock); 3156 nce->nce_flags = new_flags; 3157 mutex_exit(&nce->nce_lock); 3158 /* 3159 * Note that we ignore the state at this point, which 3160 * should be either STALE or REACHABLE. Instead we let 3161 * the link layer address passed in to determine the state 3162 * much like incoming packets. 3163 */ 3164 nce_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3165 NCE_REFRELE(nce); 3166 return (0); 3167 } 3168 3169 /* 3170 * If the device driver supports it, we make nce_fp_mp to have 3171 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3172 * The caller ensures there is hold on nce for this function. 3173 * Note that since ill_fastpath_probe() copies the mblk there is 3174 * no need for the hold beyond this function. 3175 */ 3176 void 3177 nce_fastpath(nce_t *nce) 3178 { 3179 ill_t *ill = nce->nce_ill; 3180 int res; 3181 3182 ASSERT(ill != NULL); 3183 ASSERT(nce->nce_state != ND_INITIAL && nce->nce_state != ND_INCOMPLETE); 3184 3185 if (nce->nce_fp_mp != NULL) { 3186 /* Already contains fastpath info */ 3187 return; 3188 } 3189 if (nce->nce_res_mp != NULL) { 3190 nce_fastpath_list_add(nce); 3191 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3192 /* 3193 * EAGAIN is an indication of a transient error 3194 * i.e. allocation failure etc. leave the nce in the list it 3195 * will be updated when another probe happens for another ire 3196 * if not it will be taken out of the list when the ire is 3197 * deleted. 3198 */ 3199 3200 if (res != 0 && res != EAGAIN) 3201 nce_fastpath_list_delete(nce); 3202 } 3203 } 3204 3205 /* 3206 * Drain the list of nce's waiting for fastpath response. 3207 */ 3208 void 3209 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3210 void *arg) 3211 { 3212 3213 nce_t *next_nce; 3214 nce_t *current_nce; 3215 nce_t *first_nce; 3216 nce_t *prev_nce = NULL; 3217 3218 mutex_enter(&ill->ill_lock); 3219 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3220 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3221 next_nce = current_nce->nce_fastpath; 3222 /* 3223 * Take it off the list if we're flushing, or if the callback 3224 * routine tells us to do so. Otherwise, leave the nce in the 3225 * fastpath list to handle any pending response from the lower 3226 * layer. We can't drain the list when the callback routine 3227 * comparison failed, because the response is asynchronous in 3228 * nature, and may not arrive in the same order as the list 3229 * insertion. 3230 */ 3231 if (func == NULL || func(current_nce, arg)) { 3232 current_nce->nce_fastpath = NULL; 3233 if (current_nce == first_nce) 3234 ill->ill_fastpath_list = first_nce = next_nce; 3235 else 3236 prev_nce->nce_fastpath = next_nce; 3237 } else { 3238 /* previous element that is still in the list */ 3239 prev_nce = current_nce; 3240 } 3241 current_nce = next_nce; 3242 } 3243 mutex_exit(&ill->ill_lock); 3244 } 3245 3246 /* 3247 * Add nce to the nce fastpath list. 3248 */ 3249 void 3250 nce_fastpath_list_add(nce_t *nce) 3251 { 3252 ill_t *ill; 3253 3254 ill = nce->nce_ill; 3255 3256 mutex_enter(&ill->ill_lock); 3257 mutex_enter(&nce->nce_lock); 3258 3259 /* 3260 * if nce has not been deleted and 3261 * is not already in the list add it. 3262 */ 3263 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3264 (nce->nce_fastpath == NULL)) { 3265 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3266 ill->ill_fastpath_list = nce; 3267 } 3268 3269 mutex_exit(&nce->nce_lock); 3270 mutex_exit(&ill->ill_lock); 3271 } 3272 3273 /* 3274 * remove nce from the nce fastpath list. 3275 */ 3276 void 3277 nce_fastpath_list_delete(nce_t *nce) 3278 { 3279 nce_t *nce_ptr; 3280 3281 ill_t *ill; 3282 3283 ill = nce->nce_ill; 3284 ASSERT(ill != NULL); 3285 3286 mutex_enter(&ill->ill_lock); 3287 if (nce->nce_fastpath == NULL) 3288 goto done; 3289 3290 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3291 3292 if (ill->ill_fastpath_list == nce) { 3293 ill->ill_fastpath_list = nce->nce_fastpath; 3294 } else { 3295 nce_ptr = ill->ill_fastpath_list; 3296 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3297 if (nce_ptr->nce_fastpath == nce) { 3298 nce_ptr->nce_fastpath = nce->nce_fastpath; 3299 break; 3300 } 3301 nce_ptr = nce_ptr->nce_fastpath; 3302 } 3303 } 3304 3305 nce->nce_fastpath = NULL; 3306 done: 3307 mutex_exit(&ill->ill_lock); 3308 } 3309 3310 /* 3311 * Update all NCE's that are not in fastpath mode and 3312 * have an nce_fp_mp that matches mp. mp->b_cont contains 3313 * the fastpath header. 3314 * 3315 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3316 */ 3317 boolean_t 3318 ndp_fastpath_update(nce_t *nce, void *arg) 3319 { 3320 mblk_t *mp, *fp_mp; 3321 uchar_t *mp_rptr, *ud_mp_rptr; 3322 mblk_t *ud_mp = nce->nce_res_mp; 3323 ptrdiff_t cmplen; 3324 3325 if (nce->nce_flags & NCE_F_MAPPING) 3326 return (B_TRUE); 3327 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3328 return (B_TRUE); 3329 3330 ip2dbg(("ndp_fastpath_update: trying\n")); 3331 mp = (mblk_t *)arg; 3332 mp_rptr = mp->b_rptr; 3333 cmplen = mp->b_wptr - mp_rptr; 3334 ASSERT(cmplen >= 0); 3335 ud_mp_rptr = ud_mp->b_rptr; 3336 /* 3337 * The nce is locked here to prevent any other threads 3338 * from accessing and changing nce_res_mp when the IPv6 address 3339 * becomes resolved to an lla while we're in the middle 3340 * of looking at and comparing the hardware address (lla). 3341 * It is also locked to prevent multiple threads in nce_fastpath_update 3342 * from examining nce_res_mp atthe same time. 3343 */ 3344 mutex_enter(&nce->nce_lock); 3345 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3346 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3347 mutex_exit(&nce->nce_lock); 3348 /* 3349 * Don't take the ire off the fastpath list yet, 3350 * since the response may come later. 3351 */ 3352 return (B_FALSE); 3353 } 3354 /* Matched - install mp as the fastpath mp */ 3355 ip1dbg(("ndp_fastpath_update: match\n")); 3356 fp_mp = dupb(mp->b_cont); 3357 if (fp_mp != NULL) { 3358 nce->nce_fp_mp = fp_mp; 3359 } 3360 mutex_exit(&nce->nce_lock); 3361 return (B_TRUE); 3362 } 3363 3364 /* 3365 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3366 * driver. Note that it assumes IP is exclusive... 3367 */ 3368 /* ARGSUSED */ 3369 void 3370 ndp_fastpath_flush(nce_t *nce, char *arg) 3371 { 3372 if (nce->nce_flags & NCE_F_MAPPING) 3373 return; 3374 /* No fastpath info? */ 3375 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3376 return; 3377 3378 if (nce->nce_ipversion == IPV4_VERSION && 3379 nce->nce_flags & NCE_F_BCAST) { 3380 /* 3381 * IPv4 BROADCAST entries: 3382 * We can't delete the nce since it is difficult to 3383 * recreate these without going through the 3384 * ipif down/up dance. 3385 * 3386 * All access to nce->nce_fp_mp in the case of these 3387 * is protected by nce_lock. 3388 */ 3389 mutex_enter(&nce->nce_lock); 3390 if (nce->nce_fp_mp != NULL) { 3391 freeb(nce->nce_fp_mp); 3392 nce->nce_fp_mp = NULL; 3393 mutex_exit(&nce->nce_lock); 3394 nce_fastpath(nce); 3395 } else { 3396 mutex_exit(&nce->nce_lock); 3397 } 3398 } else { 3399 /* Just delete the NCE... */ 3400 ndp_delete(nce); 3401 } 3402 } 3403 3404 /* 3405 * Return a pointer to a given option in the packet. 3406 * Assumes that option part of the packet have already been validated. 3407 */ 3408 nd_opt_hdr_t * 3409 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3410 { 3411 while (optlen > 0) { 3412 if (opt->nd_opt_type == opt_type) 3413 return (opt); 3414 optlen -= 8 * opt->nd_opt_len; 3415 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3416 } 3417 return (NULL); 3418 } 3419 3420 /* 3421 * Verify all option lengths present are > 0, also check to see 3422 * if the option lengths and packet length are consistent. 3423 */ 3424 boolean_t 3425 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3426 { 3427 ASSERT(opt != NULL); 3428 while (optlen > 0) { 3429 if (opt->nd_opt_len == 0) 3430 return (B_FALSE); 3431 optlen -= 8 * opt->nd_opt_len; 3432 if (optlen < 0) 3433 return (B_FALSE); 3434 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3435 } 3436 return (B_TRUE); 3437 } 3438 3439 /* 3440 * ndp_walk function. 3441 * Free a fraction of the NCE cache entries. 3442 * A fraction of zero means to not free any in that category. 3443 */ 3444 void 3445 ndp_cache_reclaim(nce_t *nce, char *arg) 3446 { 3447 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3448 uint_t rand; 3449 3450 if (nce->nce_flags & NCE_F_PERMANENT) 3451 return; 3452 3453 rand = (uint_t)lbolt + 3454 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3455 if (ncr->ncr_host != 0 && 3456 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3457 ndp_delete(nce); 3458 return; 3459 } 3460 } 3461 3462 /* 3463 * ndp_walk function. 3464 * Count the number of NCEs that can be deleted. 3465 * These would be hosts but not routers. 3466 */ 3467 void 3468 ndp_cache_count(nce_t *nce, char *arg) 3469 { 3470 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3471 3472 if (nce->nce_flags & NCE_F_PERMANENT) 3473 return; 3474 3475 ncc->ncc_total++; 3476 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3477 ncc->ncc_host++; 3478 } 3479 3480 #ifdef DEBUG 3481 void 3482 nce_trace_ref(nce_t *nce) 3483 { 3484 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3485 3486 if (nce->nce_trace_disable) 3487 return; 3488 3489 if (!th_trace_ref(nce, nce->nce_ill->ill_ipst)) { 3490 nce->nce_trace_disable = B_TRUE; 3491 nce_trace_cleanup(nce); 3492 } 3493 } 3494 3495 void 3496 nce_untrace_ref(nce_t *nce) 3497 { 3498 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3499 3500 if (!nce->nce_trace_disable) 3501 th_trace_unref(nce); 3502 } 3503 3504 static void 3505 nce_trace_cleanup(const nce_t *nce) 3506 { 3507 th_trace_cleanup(nce, nce->nce_trace_disable); 3508 } 3509 #endif 3510 3511 /* 3512 * Called when address resolution fails due to a timeout. 3513 * Send an ICMP unreachable in response to all queued packets. 3514 */ 3515 void 3516 arp_resolv_failed(nce_t *nce) 3517 { 3518 mblk_t *mp, *nxt_mp, *first_mp; 3519 char buf[INET6_ADDRSTRLEN]; 3520 zoneid_t zoneid = GLOBAL_ZONEID; 3521 struct in_addr ipv4addr; 3522 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3523 3524 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3525 ip3dbg(("arp_resolv_failed: dst %s\n", 3526 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3527 mutex_enter(&nce->nce_lock); 3528 mp = nce->nce_qd_mp; 3529 nce->nce_qd_mp = NULL; 3530 mutex_exit(&nce->nce_lock); 3531 3532 while (mp != NULL) { 3533 nxt_mp = mp->b_next; 3534 mp->b_next = NULL; 3535 mp->b_prev = NULL; 3536 3537 first_mp = mp; 3538 /* 3539 * Send icmp unreachable messages 3540 * to the hosts. 3541 */ 3542 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst); 3543 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3544 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3545 ICMP_HOST_UNREACHABLE, zoneid, ipst); 3546 mp = nxt_mp; 3547 } 3548 } 3549 3550 int 3551 ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3552 nce_t **newnce, nce_t *src_nce) 3553 { 3554 int err; 3555 nce_t *nce; 3556 in6_addr_t addr6; 3557 ip_stack_t *ipst = ill->ill_ipst; 3558 3559 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3560 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3561 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3562 /* 3563 * NOTE: IPv4 never matches across the illgrp since the NCE's we're 3564 * looking up have fastpath headers that are inherently per-ill. 3565 */ 3566 nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce); 3567 if (nce == NULL) { 3568 err = ndp_add_v4(ill, addr, flags, newnce, src_nce); 3569 } else { 3570 *newnce = nce; 3571 err = EEXIST; 3572 } 3573 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3574 return (err); 3575 } 3576 3577 /* 3578 * NDP Cache Entry creation routine for IPv4. 3579 * Mapped entries are handled in arp. 3580 * This routine must always be called with ndp4->ndp_g_lock held. 3581 * Prior to return, nce_refcnt is incremented. 3582 */ 3583 static int 3584 ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3585 nce_t **newnce, nce_t *src_nce) 3586 { 3587 static nce_t nce_nil; 3588 nce_t *nce; 3589 mblk_t *mp; 3590 mblk_t *template = NULL; 3591 nce_t **ncep; 3592 ip_stack_t *ipst = ill->ill_ipst; 3593 uint16_t state = ND_INITIAL; 3594 int err; 3595 3596 ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock)); 3597 ASSERT(!ill->ill_isv6); 3598 ASSERT((flags & NCE_F_MAPPING) == 0); 3599 3600 if (ill->ill_resolver_mp == NULL) 3601 return (EINVAL); 3602 /* 3603 * Allocate the mblk to hold the nce. 3604 */ 3605 mp = allocb(sizeof (nce_t), BPRI_MED); 3606 if (mp == NULL) 3607 return (ENOMEM); 3608 3609 nce = (nce_t *)mp->b_rptr; 3610 mp->b_wptr = (uchar_t *)&nce[1]; 3611 *nce = nce_nil; 3612 nce->nce_ill = ill; 3613 nce->nce_ipversion = IPV4_VERSION; 3614 nce->nce_flags = flags; 3615 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3616 nce->nce_rcnt = ill->ill_xmit_count; 3617 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3618 nce->nce_mask = ipv6_all_ones; 3619 nce->nce_extract_mask = ipv6_all_zeros; 3620 nce->nce_ll_extract_start = 0; 3621 nce->nce_qd_mp = NULL; 3622 nce->nce_mp = mp; 3623 /* This one is for nce getting created */ 3624 nce->nce_refcnt = 1; 3625 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3626 ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3627 3628 nce->nce_trace_disable = B_FALSE; 3629 3630 if (src_nce != NULL) { 3631 /* 3632 * src_nce has been provided by the caller. The only 3633 * caller who provides a non-null, non-broadcast 3634 * src_nce is from ip_newroute() which must pass in 3635 * a ND_REACHABLE src_nce (this condition is verified 3636 * via an ASSERT for the save_ire->ire_nce in ip_newroute()) 3637 */ 3638 mutex_enter(&src_nce->nce_lock); 3639 state = src_nce->nce_state; 3640 if ((src_nce->nce_flags & NCE_F_CONDEMNED) || 3641 (ipst->ips_ndp4->ndp_g_hw_change > 0)) { 3642 /* 3643 * src_nce has been deleted, or 3644 * ip_arp_news is in the middle of 3645 * flushing entries in the the nce. 3646 * Fail the add, since we don't know 3647 * if it is safe to copy the contents of 3648 * src_nce 3649 */ 3650 DTRACE_PROBE2(nce__bad__src__nce, 3651 nce_t *, src_nce, ill_t *, ill); 3652 mutex_exit(&src_nce->nce_lock); 3653 err = EINVAL; 3654 goto err_ret; 3655 } 3656 template = copyb(src_nce->nce_res_mp); 3657 mutex_exit(&src_nce->nce_lock); 3658 if (template == NULL) { 3659 err = ENOMEM; 3660 goto err_ret; 3661 } 3662 } else if (flags & NCE_F_BCAST) { 3663 /* 3664 * broadcast nce. 3665 */ 3666 template = copyb(ill->ill_bcast_mp); 3667 if (template == NULL) { 3668 err = ENOMEM; 3669 goto err_ret; 3670 } 3671 state = ND_REACHABLE; 3672 } else if (ill->ill_net_type == IRE_IF_NORESOLVER) { 3673 /* 3674 * NORESOLVER entries are always created in the REACHABLE 3675 * state. We create a nce_res_mp with the IP nexthop address 3676 * in the destination address in the DLPI hdr if the 3677 * physical length is exactly 4 bytes. 3678 * 3679 * XXX not clear which drivers set ill_phys_addr_length to 3680 * IP_ADDR_LEN. 3681 */ 3682 if (ill->ill_phys_addr_length == IP_ADDR_LEN) { 3683 template = ill_dlur_gen((uchar_t *)addr, 3684 ill->ill_phys_addr_length, 3685 ill->ill_sap, ill->ill_sap_length); 3686 } else { 3687 template = copyb(ill->ill_resolver_mp); 3688 } 3689 if (template == NULL) { 3690 err = ENOMEM; 3691 goto err_ret; 3692 } 3693 state = ND_REACHABLE; 3694 } 3695 nce->nce_fp_mp = NULL; 3696 nce->nce_res_mp = template; 3697 nce->nce_state = state; 3698 if (state == ND_REACHABLE) { 3699 nce->nce_last = TICK_TO_MSEC(lbolt64); 3700 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3701 } else { 3702 nce->nce_last = 0; 3703 if (state == ND_INITIAL) 3704 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3705 } 3706 3707 ASSERT((nce->nce_res_mp == NULL && nce->nce_state == ND_INITIAL) || 3708 (nce->nce_res_mp != NULL && nce->nce_state == ND_REACHABLE)); 3709 /* 3710 * Atomically ensure that the ill is not CONDEMNED, before 3711 * adding the NCE. 3712 */ 3713 mutex_enter(&ill->ill_lock); 3714 if (ill->ill_state_flags & ILL_CONDEMNED) { 3715 mutex_exit(&ill->ill_lock); 3716 err = EINVAL; 3717 goto err_ret; 3718 } 3719 if ((nce->nce_next = *ncep) != NULL) 3720 nce->nce_next->nce_ptpn = &nce->nce_next; 3721 *ncep = nce; 3722 nce->nce_ptpn = ncep; 3723 *newnce = nce; 3724 /* This one is for nce being used by an active thread */ 3725 NCE_REFHOLD(*newnce); 3726 3727 /* Bump up the number of nce's referencing this ill */ 3728 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 3729 (char *), "nce", (void *), nce); 3730 ill->ill_nce_cnt++; 3731 mutex_exit(&ill->ill_lock); 3732 DTRACE_PROBE1(ndp__add__v4, nce_t *, nce); 3733 return (0); 3734 err_ret: 3735 freeb(mp); 3736 freemsg(template); 3737 return (err); 3738 } 3739 3740 /* 3741 * ndp_walk routine to delete all entries that have a given destination or 3742 * gateway address and cached link layer (MAC) address. This is used when ARP 3743 * informs us that a network-to-link-layer mapping may have changed. 3744 */ 3745 void 3746 nce_delete_hw_changed(nce_t *nce, void *arg) 3747 { 3748 nce_hw_map_t *hwm = arg; 3749 mblk_t *mp; 3750 dl_unitdata_req_t *dlu; 3751 uchar_t *macaddr; 3752 ill_t *ill; 3753 int saplen; 3754 ipaddr_t nce_addr; 3755 3756 if (nce->nce_state != ND_REACHABLE) 3757 return; 3758 3759 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3760 if (nce_addr != hwm->hwm_addr) 3761 return; 3762 3763 mutex_enter(&nce->nce_lock); 3764 if ((mp = nce->nce_res_mp) == NULL) { 3765 mutex_exit(&nce->nce_lock); 3766 return; 3767 } 3768 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3769 macaddr = (uchar_t *)(dlu + 1); 3770 ill = nce->nce_ill; 3771 if ((saplen = ill->ill_sap_length) > 0) 3772 macaddr += saplen; 3773 else 3774 saplen = -saplen; 3775 3776 /* 3777 * If the hardware address is unchanged, then leave this one alone. 3778 * Note that saplen == abs(saplen) now. 3779 */ 3780 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 3781 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 3782 mutex_exit(&nce->nce_lock); 3783 return; 3784 } 3785 mutex_exit(&nce->nce_lock); 3786 3787 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 3788 ndp_delete(nce); 3789 } 3790 3791 /* 3792 * This function verifies whether a given IPv4 address is potentially known to 3793 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 3794 * so that it can continue to look for hardware changes on that address. 3795 */ 3796 boolean_t 3797 ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns) 3798 { 3799 nce_t *nce; 3800 struct in_addr nceaddr; 3801 ip_stack_t *ipst = ns->netstack_ip; 3802 3803 if (addr == INADDR_ANY) 3804 return (B_FALSE); 3805 3806 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3807 nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr); 3808 for (; nce != NULL; nce = nce->nce_next) { 3809 /* Note that only v4 mapped entries are in the table. */ 3810 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 3811 if (addr == nceaddr.s_addr && 3812 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 3813 /* Single flag check; no lock needed */ 3814 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 3815 break; 3816 } 3817 } 3818 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3819 return (nce != NULL); 3820 } 3821 3822 /* 3823 * Wrapper around ipif_lookup_addr_exact_v6() that allows ND to work properly 3824 * with IPMP. Specifically, since neighbor discovery is always done on 3825 * underlying interfaces (even for addresses owned by an IPMP interface), we 3826 * need to check for `v6addrp' on both `ill' and on the IPMP meta-interface 3827 * associated with `ill' (if it exists). 3828 */ 3829 static ipif_t * 3830 ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill) 3831 { 3832 ipif_t *ipif; 3833 ip_stack_t *ipst = ill->ill_ipst; 3834 3835 ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst); 3836 if (ipif == NULL && IS_UNDER_IPMP(ill)) { 3837 if ((ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) { 3838 ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst); 3839 ill_refrele(ill); 3840 } 3841 } 3842 return (ipif); 3843 } 3844