1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/stropts.h> 29 #include <sys/strsun.h> 30 #include <sys/sysmacros.h> 31 #include <sys/errno.h> 32 #include <sys/dlpi.h> 33 #include <sys/socket.h> 34 #include <sys/ddi.h> 35 #include <sys/sunddi.h> 36 #include <sys/cmn_err.h> 37 #include <sys/debug.h> 38 #include <sys/vtrace.h> 39 #include <sys/kmem.h> 40 #include <sys/zone.h> 41 #include <sys/ethernet.h> 42 #include <sys/sdt.h> 43 44 #include <net/if.h> 45 #include <net/if_types.h> 46 #include <net/if_dl.h> 47 #include <net/route.h> 48 #include <netinet/in.h> 49 #include <netinet/ip6.h> 50 #include <netinet/icmp6.h> 51 52 #include <inet/common.h> 53 #include <inet/mi.h> 54 #include <inet/mib2.h> 55 #include <inet/nd.h> 56 #include <inet/ip.h> 57 #include <inet/ip_impl.h> 58 #include <inet/ipclassifier.h> 59 #include <inet/ip_if.h> 60 #include <inet/ip_ire.h> 61 #include <inet/ip_rts.h> 62 #include <inet/ip6.h> 63 #include <inet/ip_ndp.h> 64 #include <inet/ipsec_impl.h> 65 #include <inet/ipsec_info.h> 66 #include <inet/sctp_ip.h> 67 #include <inet/ip2mac_impl.h> 68 69 /* 70 * Function names with nce_ prefix are static while function 71 * names with ndp_ prefix are used by rest of the IP. 72 * 73 * Lock ordering: 74 * 75 * ndp_g_lock -> ill_lock -> nce_lock 76 * 77 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 78 * nce_next. Nce_lock protects the contents of the NCE (particularly 79 * nce_refcnt). 80 */ 81 82 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 83 uint32_t ll_addr_len); 84 static void nce_ire_delete(nce_t *nce); 85 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 86 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 87 static nce_t *nce_lookup_addr(ill_t *, boolean_t, const in6_addr_t *, 88 nce_t *); 89 static nce_t *nce_lookup_mapping(ill_t *, const in6_addr_t *); 90 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 91 uchar_t *addr); 92 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 93 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 94 static mblk_t *nce_udreq_alloc(ill_t *ill); 95 static void nce_update(nce_t *nce, uint16_t new_state, 96 uchar_t *new_ll_addr); 97 static uint32_t nce_solicit(nce_t *nce, in6_addr_t src); 98 static boolean_t nce_xmit(ill_t *ill, uint8_t type, 99 boolean_t use_lla_addr, const in6_addr_t *sender, 100 const in6_addr_t *target, int flag); 101 static boolean_t nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, 102 const in6_addr_t *target, uint_t flags); 103 static boolean_t nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, 104 const in6_addr_t *src, uint_t flags); 105 static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t, 106 nce_t **, nce_t *); 107 static ipif_t *ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill); 108 109 #ifdef DEBUG 110 static void nce_trace_cleanup(const nce_t *); 111 #endif 112 113 #define NCE_HASH_PTR_V4(ipst, addr) \ 114 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 115 116 #define NCE_HASH_PTR_V6(ipst, addr) \ 117 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ 118 NCE_TABLE_SIZE)])) 119 120 /* Non-tunable probe interval, based on link capabilities */ 121 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 122 123 /* 124 * NDP Cache Entry creation routine. 125 * Mapped entries will never do NUD . 126 * This routine must always be called with ndp6->ndp_g_lock held. 127 * Prior to return, nce_refcnt is incremented. 128 */ 129 int 130 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 131 const in6_addr_t *mask, const in6_addr_t *extract_mask, 132 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 133 nce_t **newnce) 134 { 135 static nce_t nce_nil; 136 nce_t *nce; 137 mblk_t *mp; 138 mblk_t *template; 139 nce_t **ncep; 140 int err; 141 boolean_t dropped = B_FALSE; 142 ip_stack_t *ipst = ill->ill_ipst; 143 144 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 145 ASSERT(ill != NULL && ill->ill_isv6); 146 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 147 ip0dbg(("ndp_add_v6: no addr\n")); 148 return (EINVAL); 149 } 150 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 151 ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags)); 152 return (EINVAL); 153 } 154 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 155 (flags & NCE_F_MAPPING)) { 156 ip0dbg(("ndp_add_v6: extract mask zero for mapping")); 157 return (EINVAL); 158 } 159 /* 160 * Allocate the mblk to hold the nce. 161 * 162 * XXX This can come out of a separate cache - nce_cache. 163 * We don't need the mp anymore as there are no more 164 * "qwriter"s 165 */ 166 mp = allocb(sizeof (nce_t), BPRI_MED); 167 if (mp == NULL) 168 return (ENOMEM); 169 170 nce = (nce_t *)mp->b_rptr; 171 mp->b_wptr = (uchar_t *)&nce[1]; 172 *nce = nce_nil; 173 174 /* 175 * This one holds link layer address 176 */ 177 if (ill->ill_net_type == IRE_IF_RESOLVER) { 178 template = nce_udreq_alloc(ill); 179 } else { 180 if (ill->ill_resolver_mp == NULL) { 181 freeb(mp); 182 return (EINVAL); 183 } 184 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 185 template = copyb(ill->ill_resolver_mp); 186 } 187 if (template == NULL) { 188 freeb(mp); 189 return (ENOMEM); 190 } 191 nce->nce_ill = ill; 192 nce->nce_ipversion = IPV6_VERSION; 193 nce->nce_flags = flags; 194 nce->nce_state = state; 195 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 196 nce->nce_rcnt = ill->ill_xmit_count; 197 nce->nce_addr = *addr; 198 nce->nce_mask = *mask; 199 nce->nce_extract_mask = *extract_mask; 200 nce->nce_ll_extract_start = hw_extract_start; 201 nce->nce_fp_mp = NULL; 202 nce->nce_res_mp = template; 203 if (state == ND_REACHABLE) 204 nce->nce_last = TICK_TO_MSEC(lbolt64); 205 else 206 nce->nce_last = 0; 207 nce->nce_qd_mp = NULL; 208 nce->nce_mp = mp; 209 if (hw_addr != NULL) 210 nce_set_ll(nce, hw_addr); 211 /* This one is for nce getting created */ 212 nce->nce_refcnt = 1; 213 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 214 if (nce->nce_flags & NCE_F_MAPPING) { 215 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 216 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 217 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 218 ncep = &ipst->ips_ndp6->nce_mask_entries; 219 } else { 220 ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 221 } 222 223 nce->nce_trace_disable = B_FALSE; 224 225 list_create(&nce->nce_cb, sizeof (nce_cb_t), 226 offsetof(nce_cb_t, nce_cb_node)); 227 /* 228 * Atomically ensure that the ill is not CONDEMNED, before 229 * adding the NCE. 230 */ 231 mutex_enter(&ill->ill_lock); 232 if (ill->ill_state_flags & ILL_CONDEMNED) { 233 mutex_exit(&ill->ill_lock); 234 freeb(mp); 235 freeb(template); 236 return (EINVAL); 237 } 238 if ((nce->nce_next = *ncep) != NULL) 239 nce->nce_next->nce_ptpn = &nce->nce_next; 240 *ncep = nce; 241 nce->nce_ptpn = ncep; 242 *newnce = nce; 243 /* This one is for nce being used by an active thread */ 244 NCE_REFHOLD(*newnce); 245 246 /* Bump up the number of nce's referencing this ill */ 247 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 248 (char *), "nce", (void *), nce); 249 ill->ill_nce_cnt++; 250 mutex_exit(&ill->ill_lock); 251 252 err = 0; 253 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 254 mutex_enter(&nce->nce_lock); 255 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 256 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 257 mutex_exit(&nce->nce_lock); 258 dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE); 259 if (dropped) { 260 mutex_enter(&nce->nce_lock); 261 nce->nce_pcnt++; 262 mutex_exit(&nce->nce_lock); 263 } 264 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 265 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 266 err = EINPROGRESS; 267 } else if (flags & NCE_F_UNSOL_ADV) { 268 /* 269 * We account for the transmit below by assigning one 270 * less than the ndd variable. Subsequent decrements 271 * are done in ndp_timer. 272 */ 273 mutex_enter(&nce->nce_lock); 274 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 275 nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1; 276 mutex_exit(&nce->nce_lock); 277 dropped = nce_xmit_advert(nce, B_TRUE, &ipv6_all_hosts_mcast, 278 0); 279 mutex_enter(&nce->nce_lock); 280 if (dropped) 281 nce->nce_unsolicit_count++; 282 if (nce->nce_unsolicit_count != 0) { 283 ASSERT(nce->nce_timeout_id == 0); 284 nce->nce_timeout_id = timeout(ndp_timer, nce, 285 MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval)); 286 } 287 mutex_exit(&nce->nce_lock); 288 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 289 } 290 291 /* 292 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 293 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 294 * We call nce_fastpath from nce_update if the link layer address of 295 * the peer changes from nce_update 296 */ 297 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 298 nce_fastpath(nce); 299 return (err); 300 } 301 302 int 303 ndp_lookup_then_add_v6(ill_t *ill, boolean_t match_illgrp, uchar_t *hw_addr, 304 const in6_addr_t *addr, const in6_addr_t *mask, 305 const in6_addr_t *extract_mask, uint32_t hw_extract_start, uint16_t flags, 306 uint16_t state, nce_t **newnce) 307 { 308 int err = 0; 309 nce_t *nce; 310 ip_stack_t *ipst = ill->ill_ipst; 311 312 ASSERT(ill->ill_isv6); 313 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 314 315 /* Get head of v6 hash table */ 316 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 317 nce = nce_lookup_addr(ill, match_illgrp, addr, nce); 318 if (nce == NULL) { 319 err = ndp_add_v6(ill, 320 hw_addr, 321 addr, 322 mask, 323 extract_mask, 324 hw_extract_start, 325 flags, 326 state, 327 newnce); 328 } else { 329 *newnce = nce; 330 err = EEXIST; 331 } 332 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 333 return (err); 334 } 335 336 /* 337 * Remove all the CONDEMNED nces from the appropriate hash table. 338 * We create a private list of NCEs, these may have ires pointing 339 * to them, so the list will be passed through to clean up dependent 340 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 341 */ 342 static void 343 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 344 { 345 nce_t *nce1; 346 nce_t **ptpn; 347 348 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 349 ASSERT(ndp->ndp_g_walker == 0); 350 for (; nce; nce = nce1) { 351 nce1 = nce->nce_next; 352 mutex_enter(&nce->nce_lock); 353 if (nce->nce_flags & NCE_F_CONDEMNED) { 354 ptpn = nce->nce_ptpn; 355 nce1 = nce->nce_next; 356 if (nce1 != NULL) 357 nce1->nce_ptpn = ptpn; 358 *ptpn = nce1; 359 nce->nce_ptpn = NULL; 360 nce->nce_next = NULL; 361 nce->nce_next = *free_nce_list; 362 *free_nce_list = nce; 363 } 364 mutex_exit(&nce->nce_lock); 365 } 366 } 367 368 /* 369 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 370 * will return this NCE. Also no new IREs will be created that 371 * point to this NCE (See ire_add_v6). Also no new timeouts will 372 * be started (See NDP_RESTART_TIMER). 373 * 2. Cancel any currently running timeouts. 374 * 3. If there is an ndp walker, return. The walker will do the cleanup. 375 * This ensures that walkers see a consistent list of NCEs while walking. 376 * 4. Otherwise remove the NCE from the list of NCEs 377 * 5. Delete all IREs pointing to this NCE. 378 */ 379 void 380 ndp_delete(nce_t *nce) 381 { 382 nce_t **ptpn; 383 nce_t *nce1; 384 int ipversion = nce->nce_ipversion; 385 ndp_g_t *ndp; 386 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 387 388 if (ipversion == IPV4_VERSION) 389 ndp = ipst->ips_ndp4; 390 else 391 ndp = ipst->ips_ndp6; 392 393 /* Serialize deletes */ 394 mutex_enter(&nce->nce_lock); 395 if (nce->nce_flags & NCE_F_CONDEMNED) { 396 /* Some other thread is doing the delete */ 397 mutex_exit(&nce->nce_lock); 398 return; 399 } 400 /* 401 * Caller has a refhold. Also 1 ref for being in the list. Thus 402 * refcnt has to be >= 2 403 */ 404 ASSERT(nce->nce_refcnt >= 2); 405 nce->nce_flags |= NCE_F_CONDEMNED; 406 mutex_exit(&nce->nce_lock); 407 408 nce_fastpath_list_delete(nce); 409 410 /* Complete any waiting callbacks */ 411 nce_cb_dispatch(nce); 412 413 /* 414 * Cancel any running timer. Timeout can't be restarted 415 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 416 * Passing invalid timeout id is fine. 417 */ 418 if (nce->nce_timeout_id != 0) { 419 (void) untimeout(nce->nce_timeout_id); 420 nce->nce_timeout_id = 0; 421 } 422 423 mutex_enter(&ndp->ndp_g_lock); 424 if (nce->nce_ptpn == NULL) { 425 /* 426 * The last ndp walker has already removed this nce from 427 * the list after we marked the nce CONDEMNED and before 428 * we grabbed the global lock. 429 */ 430 mutex_exit(&ndp->ndp_g_lock); 431 return; 432 } 433 if (ndp->ndp_g_walker > 0) { 434 /* 435 * Can't unlink. The walker will clean up 436 */ 437 ndp->ndp_g_walker_cleanup = B_TRUE; 438 mutex_exit(&ndp->ndp_g_lock); 439 return; 440 } 441 442 /* 443 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 444 * the timer since it is marked CONDEMNED. 445 */ 446 ptpn = nce->nce_ptpn; 447 nce1 = nce->nce_next; 448 if (nce1 != NULL) 449 nce1->nce_ptpn = ptpn; 450 *ptpn = nce1; 451 nce->nce_ptpn = NULL; 452 nce->nce_next = NULL; 453 mutex_exit(&ndp->ndp_g_lock); 454 455 nce_ire_delete(nce); 456 } 457 458 void 459 ndp_inactive(nce_t *nce) 460 { 461 mblk_t **mpp; 462 ill_t *ill; 463 464 ASSERT(nce->nce_refcnt == 0); 465 ASSERT(MUTEX_HELD(&nce->nce_lock)); 466 ASSERT(nce->nce_fastpath == NULL); 467 468 /* Free all nce allocated messages */ 469 mpp = &nce->nce_first_mp_to_free; 470 do { 471 while (*mpp != NULL) { 472 mblk_t *mp; 473 474 mp = *mpp; 475 *mpp = mp->b_next; 476 477 inet_freemsg(mp); 478 } 479 } while (mpp++ != &nce->nce_last_mp_to_free); 480 481 if (nce->nce_ipversion == IPV6_VERSION) { 482 /* 483 * must have been cleaned up in nce_delete 484 */ 485 ASSERT(list_is_empty(&nce->nce_cb)); 486 list_destroy(&nce->nce_cb); 487 } 488 #ifdef DEBUG 489 nce_trace_cleanup(nce); 490 #endif 491 492 ill = nce->nce_ill; 493 mutex_enter(&ill->ill_lock); 494 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 495 (char *), "nce", (void *), nce); 496 ill->ill_nce_cnt--; 497 /* 498 * If the number of nce's associated with this ill have dropped 499 * to zero, check whether we need to restart any operation that 500 * is waiting for this to happen. 501 */ 502 if (ILL_DOWN_OK(ill)) { 503 /* ipif_ill_refrele_tail drops the ill_lock */ 504 ipif_ill_refrele_tail(ill); 505 } else { 506 mutex_exit(&ill->ill_lock); 507 } 508 mutex_destroy(&nce->nce_lock); 509 if (nce->nce_mp != NULL) 510 inet_freemsg(nce->nce_mp); 511 } 512 513 /* 514 * ndp_walk routine. Delete the nce if it is associated with the ill 515 * that is going away. Always called as a writer. 516 */ 517 void 518 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 519 { 520 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 521 ndp_delete(nce); 522 } 523 } 524 525 /* 526 * Walk a list of to be inactive NCEs and blow away all the ires. 527 */ 528 static void 529 nce_ire_delete_list(nce_t *nce) 530 { 531 nce_t *nce_next; 532 533 ASSERT(nce != NULL); 534 while (nce != NULL) { 535 nce_next = nce->nce_next; 536 nce->nce_next = NULL; 537 538 /* 539 * It is possible for the last ndp walker (this thread) 540 * to come here after ndp_delete has marked the nce CONDEMNED 541 * and before it has removed the nce from the fastpath list 542 * or called untimeout. So we need to do it here. It is safe 543 * for both ndp_delete and this thread to do it twice or 544 * even simultaneously since each of the threads has a 545 * reference on the nce. 546 */ 547 nce_fastpath_list_delete(nce); 548 /* 549 * Cancel any running timer. Timeout can't be restarted 550 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 551 * Passing invalid timeout id is fine. 552 */ 553 if (nce->nce_timeout_id != 0) { 554 (void) untimeout(nce->nce_timeout_id); 555 nce->nce_timeout_id = 0; 556 } 557 /* 558 * We might hit this func thus in the v4 case: 559 * ipif_down->ipif_ndp_down->ndp_walk 560 */ 561 562 if (nce->nce_ipversion == IPV4_VERSION) { 563 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 564 IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill); 565 } else { 566 ASSERT(nce->nce_ipversion == IPV6_VERSION); 567 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 568 IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill); 569 } 570 NCE_REFRELE_NOTR(nce); 571 nce = nce_next; 572 } 573 } 574 575 /* 576 * Delete an ire when the nce goes away. 577 */ 578 /* ARGSUSED */ 579 static void 580 nce_ire_delete(nce_t *nce) 581 { 582 if (nce->nce_ipversion == IPV6_VERSION) { 583 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 584 nce_ire_delete1, (char *)nce, nce->nce_ill); 585 NCE_REFRELE_NOTR(nce); 586 } else { 587 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 588 nce_ire_delete1, (char *)nce, nce->nce_ill); 589 NCE_REFRELE_NOTR(nce); 590 } 591 } 592 593 /* 594 * ire_walk routine used to delete every IRE that shares this nce 595 */ 596 static void 597 nce_ire_delete1(ire_t *ire, char *nce_arg) 598 { 599 nce_t *nce = (nce_t *)nce_arg; 600 601 ASSERT(ire->ire_type == IRE_CACHE); 602 603 if (ire->ire_nce == nce) { 604 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 605 ire_delete(ire); 606 } 607 } 608 609 /* 610 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 611 */ 612 boolean_t 613 ndp_restart_dad(nce_t *nce) 614 { 615 boolean_t started; 616 boolean_t dropped; 617 618 if (nce == NULL) 619 return (B_FALSE); 620 mutex_enter(&nce->nce_lock); 621 if (nce->nce_state == ND_PROBE) { 622 mutex_exit(&nce->nce_lock); 623 started = B_TRUE; 624 } else if (nce->nce_state == ND_REACHABLE) { 625 nce->nce_state = ND_PROBE; 626 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 627 mutex_exit(&nce->nce_lock); 628 dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE); 629 if (dropped) { 630 mutex_enter(&nce->nce_lock); 631 nce->nce_pcnt++; 632 mutex_exit(&nce->nce_lock); 633 } 634 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 635 started = B_TRUE; 636 } else { 637 mutex_exit(&nce->nce_lock); 638 started = B_FALSE; 639 } 640 return (started); 641 } 642 643 /* 644 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 645 * If one is found, the refcnt on the nce will be incremented. 646 */ 647 nce_t * 648 ndp_lookup_v6(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr, 649 boolean_t caller_holds_lock) 650 { 651 nce_t *nce; 652 ip_stack_t *ipst = ill->ill_ipst; 653 654 ASSERT(ill->ill_isv6); 655 if (!caller_holds_lock) 656 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 657 658 /* Get head of v6 hash table */ 659 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 660 nce = nce_lookup_addr(ill, match_illgrp, addr, nce); 661 if (nce == NULL) 662 nce = nce_lookup_mapping(ill, addr); 663 if (!caller_holds_lock) 664 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 665 return (nce); 666 } 667 /* 668 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 669 * If one is found, the refcnt on the nce will be incremented. 670 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 671 * so we skip the nce_lookup_mapping call. 672 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 673 */ 674 nce_t * 675 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 676 { 677 nce_t *nce; 678 in6_addr_t addr6; 679 ip_stack_t *ipst = ill->ill_ipst; 680 681 if (!caller_holds_lock) 682 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 683 684 /* Get head of v4 hash table */ 685 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 686 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 687 /* 688 * NOTE: IPv4 never matches across the illgrp since the NCE's we're 689 * looking up have fastpath headers that are inherently per-ill. 690 */ 691 nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce); 692 if (!caller_holds_lock) 693 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 694 return (nce); 695 } 696 697 /* 698 * Cache entry lookup. Try to find an nce matching the parameters passed. 699 * Look only for exact entries (no mappings). If an nce is found, increment 700 * the hold count on that nce. The caller passes in the start of the 701 * appropriate hash table, and must be holding the appropriate global 702 * lock (ndp_g_lock). 703 */ 704 static nce_t * 705 nce_lookup_addr(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr, 706 nce_t *nce) 707 { 708 ndp_g_t *ndp; 709 ip_stack_t *ipst = ill->ill_ipst; 710 711 if (ill->ill_isv6) 712 ndp = ipst->ips_ndp6; 713 else 714 ndp = ipst->ips_ndp4; 715 716 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 717 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 718 return (NULL); 719 for (; nce != NULL; nce = nce->nce_next) { 720 if (nce->nce_ill == ill || 721 match_illgrp && IS_IN_SAME_ILLGRP(ill, nce->nce_ill)) { 722 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 723 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 724 &ipv6_all_ones)) { 725 mutex_enter(&nce->nce_lock); 726 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 727 NCE_REFHOLD_LOCKED(nce); 728 mutex_exit(&nce->nce_lock); 729 break; 730 } 731 mutex_exit(&nce->nce_lock); 732 } 733 } 734 } 735 return (nce); 736 } 737 738 /* 739 * Cache entry lookup. Try to find an nce matching the parameters passed. 740 * Look only for mappings. 741 */ 742 static nce_t * 743 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 744 { 745 nce_t *nce; 746 ip_stack_t *ipst = ill->ill_ipst; 747 748 ASSERT(ill != NULL && ill->ill_isv6); 749 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 750 if (!IN6_IS_ADDR_MULTICAST(addr)) 751 return (NULL); 752 nce = ipst->ips_ndp6->nce_mask_entries; 753 for (; nce != NULL; nce = nce->nce_next) 754 if (nce->nce_ill == ill && 755 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 756 mutex_enter(&nce->nce_lock); 757 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 758 NCE_REFHOLD_LOCKED(nce); 759 mutex_exit(&nce->nce_lock); 760 break; 761 } 762 mutex_exit(&nce->nce_lock); 763 } 764 return (nce); 765 } 766 767 /* 768 * Process passed in parameters either from an incoming packet or via 769 * user ioctl. 770 */ 771 static void 772 nce_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 773 { 774 ill_t *ill = nce->nce_ill; 775 uint32_t hw_addr_len = ill->ill_nd_lla_len; 776 mblk_t *mp; 777 boolean_t ll_updated = B_FALSE; 778 boolean_t ll_changed; 779 ip_stack_t *ipst = ill->ill_ipst; 780 781 ASSERT(nce->nce_ipversion == IPV6_VERSION); 782 /* 783 * No updates of link layer address or the neighbor state is 784 * allowed, when the cache is in NONUD state. This still 785 * allows for responding to reachability solicitation. 786 */ 787 mutex_enter(&nce->nce_lock); 788 if (nce->nce_state == ND_INCOMPLETE) { 789 if (hw_addr == NULL) { 790 mutex_exit(&nce->nce_lock); 791 return; 792 } 793 nce_set_ll(nce, hw_addr); 794 /* 795 * Update nce state and send the queued packets 796 * back to ip this time ire will be added. 797 */ 798 if (flag & ND_NA_FLAG_SOLICITED) { 799 nce_update(nce, ND_REACHABLE, NULL); 800 } else { 801 nce_update(nce, ND_STALE, NULL); 802 } 803 mutex_exit(&nce->nce_lock); 804 nce_fastpath(nce); 805 nce_cb_dispatch(nce); /* complete callbacks */ 806 mutex_enter(&nce->nce_lock); 807 mp = nce->nce_qd_mp; 808 nce->nce_qd_mp = NULL; 809 mutex_exit(&nce->nce_lock); 810 while (mp != NULL) { 811 mblk_t *nxt_mp, *data_mp; 812 813 nxt_mp = mp->b_next; 814 mp->b_next = NULL; 815 816 if (mp->b_datap->db_type == M_CTL) 817 data_mp = mp->b_cont; 818 else 819 data_mp = mp; 820 if (data_mp->b_prev != NULL) { 821 ill_t *inbound_ill; 822 queue_t *fwdq = NULL; 823 uint_t ifindex; 824 825 ifindex = (uint_t)(uintptr_t)data_mp->b_prev; 826 inbound_ill = ill_lookup_on_ifindex(ifindex, 827 B_TRUE, NULL, NULL, NULL, NULL, ipst); 828 if (inbound_ill == NULL) { 829 data_mp->b_prev = NULL; 830 freemsg(mp); 831 return; 832 } else { 833 fwdq = inbound_ill->ill_rq; 834 } 835 data_mp->b_prev = NULL; 836 /* 837 * Send a forwarded packet back into ip_rput_v6 838 * just as in ire_send_v6(). 839 * Extract the queue from b_prev (set in 840 * ip_rput_data_v6). 841 */ 842 if (fwdq != NULL) { 843 /* 844 * Forwarded packets hop count will 845 * get decremented in ip_rput_data_v6 846 */ 847 if (data_mp != mp) 848 freeb(mp); 849 put(fwdq, data_mp); 850 } else { 851 /* 852 * Send locally originated packets back 853 * into ip_wput_v6. 854 */ 855 put(ill->ill_wq, mp); 856 } 857 ill_refrele(inbound_ill); 858 } else { 859 put(ill->ill_wq, mp); 860 } 861 mp = nxt_mp; 862 } 863 return; 864 } 865 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 866 if (!is_adv) { 867 /* If this is a SOLICITATION request only */ 868 if (ll_changed) 869 nce_update(nce, ND_STALE, hw_addr); 870 mutex_exit(&nce->nce_lock); 871 nce_cb_dispatch(nce); 872 return; 873 } 874 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 875 /* If in any other state than REACHABLE, ignore */ 876 if (nce->nce_state == ND_REACHABLE) { 877 nce_update(nce, ND_STALE, NULL); 878 } 879 mutex_exit(&nce->nce_lock); 880 nce_cb_dispatch(nce); 881 return; 882 } else { 883 if (ll_changed) { 884 nce_update(nce, ND_UNCHANGED, hw_addr); 885 ll_updated = B_TRUE; 886 } 887 if (flag & ND_NA_FLAG_SOLICITED) { 888 nce_update(nce, ND_REACHABLE, NULL); 889 } else { 890 if (ll_updated) { 891 nce_update(nce, ND_STALE, NULL); 892 } 893 } 894 mutex_exit(&nce->nce_lock); 895 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 896 NCE_F_ISROUTER)) { 897 ire_t *ire; 898 899 /* 900 * Router turned to host. We need to remove the 901 * entry as well as any default route that may be 902 * using this as a next hop. This is required by 903 * section 7.2.5 of RFC 2461. 904 */ 905 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 906 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 907 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 908 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 909 MATCH_IRE_DEFAULT, ipst); 910 if (ire != NULL) { 911 ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); 912 ire_delete(ire); 913 ire_refrele(ire); 914 } 915 ndp_delete(nce); /* will do nce_cb_dispatch */ 916 } else { 917 nce_cb_dispatch(nce); 918 } 919 } 920 } 921 922 /* 923 * Walker state structure used by ndp_process() / ndp_process_entry(). 924 */ 925 typedef struct ndp_process_data { 926 ill_t *np_ill; /* ill/illgrp to match against */ 927 const in6_addr_t *np_addr; /* IPv6 address to match */ 928 uchar_t *np_hw_addr; /* passed to nce_process() */ 929 uint32_t np_flag; /* passed to nce_process() */ 930 boolean_t np_is_adv; /* passed to nce_process() */ 931 } ndp_process_data_t; 932 933 /* 934 * Walker callback used by ndp_process() for IPMP groups: calls nce_process() 935 * for each NCE with a matching address that's in the same IPMP group. 936 */ 937 static void 938 ndp_process_entry(nce_t *nce, void *arg) 939 { 940 ndp_process_data_t *npp = arg; 941 942 if (IS_IN_SAME_ILLGRP(nce->nce_ill, npp->np_ill) && 943 IN6_ARE_ADDR_EQUAL(&nce->nce_addr, npp->np_addr) && 944 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 945 nce_process(nce, npp->np_hw_addr, npp->np_flag, npp->np_is_adv); 946 } 947 } 948 949 /* 950 * Wrapper around nce_process() that handles IPMP. In particular, for IPMP, 951 * NCEs are per-underlying-ill (because of nce_fp_mp) and thus we may have 952 * more than one NCE for a given IPv6 address to tend to. In that case, we 953 * need to walk all NCEs and callback nce_process() for each one. Since this 954 * is expensive, in the non-IPMP case we just directly call nce_process(). 955 * Ultimately, nce_fp_mp needs to be moved out of the nce_t so that all IP 956 * interfaces in an IPMP group share the same NCEs -- at which point this 957 * function can be removed entirely. 958 */ 959 void 960 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 961 { 962 ill_t *ill = nce->nce_ill; 963 struct ndp_g_s *ndp = ill->ill_ipst->ips_ndp6; 964 ndp_process_data_t np; 965 966 if (ill->ill_grp == NULL) { 967 nce_process(nce, hw_addr, flag, is_adv); 968 return; 969 } 970 971 /* IPMP case: walk all NCEs */ 972 np.np_ill = ill; 973 np.np_addr = &nce->nce_addr; 974 np.np_flag = flag; 975 np.np_is_adv = is_adv; 976 np.np_hw_addr = hw_addr; 977 978 ndp_walk_common(ndp, NULL, (pfi_t)ndp_process_entry, &np, ALL_ZONES); 979 } 980 981 /* 982 * Pass arg1 to the pfi supplied, along with each nce in existence. 983 * ndp_walk() places a REFHOLD on the nce and drops the lock when 984 * walking the hash list. 985 */ 986 void 987 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 988 boolean_t trace) 989 { 990 nce_t *nce; 991 nce_t *nce1; 992 nce_t **ncep; 993 nce_t *free_nce_list = NULL; 994 995 mutex_enter(&ndp->ndp_g_lock); 996 /* Prevent ndp_delete from unlink and free of NCE */ 997 ndp->ndp_g_walker++; 998 mutex_exit(&ndp->ndp_g_lock); 999 for (ncep = ndp->nce_hash_tbl; 1000 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 1001 for (nce = *ncep; nce != NULL; nce = nce1) { 1002 nce1 = nce->nce_next; 1003 if (ill == NULL || nce->nce_ill == ill) { 1004 if (trace) { 1005 NCE_REFHOLD(nce); 1006 (*pfi)(nce, arg1); 1007 NCE_REFRELE(nce); 1008 } else { 1009 NCE_REFHOLD_NOTR(nce); 1010 (*pfi)(nce, arg1); 1011 NCE_REFRELE_NOTR(nce); 1012 } 1013 } 1014 } 1015 } 1016 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 1017 nce1 = nce->nce_next; 1018 if (ill == NULL || nce->nce_ill == ill) { 1019 if (trace) { 1020 NCE_REFHOLD(nce); 1021 (*pfi)(nce, arg1); 1022 NCE_REFRELE(nce); 1023 } else { 1024 NCE_REFHOLD_NOTR(nce); 1025 (*pfi)(nce, arg1); 1026 NCE_REFRELE_NOTR(nce); 1027 } 1028 } 1029 } 1030 mutex_enter(&ndp->ndp_g_lock); 1031 ndp->ndp_g_walker--; 1032 /* 1033 * While NCE's are removed from global list they are placed 1034 * in a private list, to be passed to nce_ire_delete_list(). 1035 * The reason is, there may be ires pointing to this nce 1036 * which needs to cleaned up. 1037 */ 1038 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 1039 /* Time to delete condemned entries */ 1040 for (ncep = ndp->nce_hash_tbl; 1041 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 1042 nce = *ncep; 1043 if (nce != NULL) { 1044 nce_remove(ndp, nce, &free_nce_list); 1045 } 1046 } 1047 nce = ndp->nce_mask_entries; 1048 if (nce != NULL) { 1049 nce_remove(ndp, nce, &free_nce_list); 1050 } 1051 ndp->ndp_g_walker_cleanup = B_FALSE; 1052 } 1053 1054 mutex_exit(&ndp->ndp_g_lock); 1055 1056 if (free_nce_list != NULL) { 1057 nce_ire_delete_list(free_nce_list); 1058 } 1059 } 1060 1061 /* 1062 * Walk everything. 1063 * Note that ill can be NULL hence can't derive the ipst from it. 1064 */ 1065 void 1066 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) 1067 { 1068 ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); 1069 ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); 1070 } 1071 1072 /* 1073 * Process resolve requests. Handles both mapped entries 1074 * as well as cases that needs to be send out on the wire. 1075 * Lookup a NCE for a given IRE. Regardless of whether one exists 1076 * or one is created, we defer making ire point to nce until the 1077 * ire is actually added at which point the nce_refcnt on the nce is 1078 * incremented. This is done primarily to have symmetry between ire_add() 1079 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1080 */ 1081 int 1082 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1083 { 1084 nce_t *nce, *hw_nce = NULL; 1085 int err; 1086 ill_t *ipmp_ill; 1087 uint16_t nce_flags; 1088 mblk_t *mp_nce = NULL; 1089 ip_stack_t *ipst = ill->ill_ipst; 1090 uchar_t *hwaddr = NULL; 1091 1092 ASSERT(ill->ill_isv6); 1093 1094 if (IN6_IS_ADDR_MULTICAST(dst)) 1095 return (nce_set_multicast(ill, dst)); 1096 1097 nce_flags = (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0; 1098 1099 /* 1100 * If `ill' is under IPMP, then first check to see if there's an NCE 1101 * for `dst' on the IPMP meta-interface (e.g., because an application 1102 * explicitly did an SIOCLIFSETND to tie a hardware address to `dst'). 1103 * If so, we use that hardware address when creating the NCE below. 1104 * Note that we don't yet have a mechanism to remove these NCEs if the 1105 * NCE for `dst' on the IPMP meta-interface is subsequently removed -- 1106 * but rather than build such a beast, we should fix NCEs so that they 1107 * can be properly shared across an IPMP group. 1108 */ 1109 if (IS_UNDER_IPMP(ill)) { 1110 if ((ipmp_ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) { 1111 hw_nce = ndp_lookup_v6(ipmp_ill, B_FALSE, dst, B_FALSE); 1112 if (hw_nce != NULL && hw_nce->nce_res_mp != NULL) { 1113 hwaddr = hw_nce->nce_res_mp->b_rptr + 1114 NCE_LL_ADDR_OFFSET(ipmp_ill); 1115 nce_flags |= hw_nce->nce_flags; 1116 } 1117 ill_refrele(ipmp_ill); 1118 } 1119 } 1120 1121 err = ndp_lookup_then_add_v6(ill, 1122 B_FALSE, /* NCE fastpath is per ill; don't match across group */ 1123 hwaddr, 1124 dst, 1125 &ipv6_all_ones, 1126 &ipv6_all_zeros, 1127 0, 1128 nce_flags, 1129 hwaddr != NULL ? ND_REACHABLE : ND_INCOMPLETE, 1130 &nce); 1131 1132 if (hw_nce != NULL) 1133 NCE_REFRELE(hw_nce); 1134 1135 switch (err) { 1136 case 0: 1137 /* 1138 * New cache entry was created. Make sure that the state 1139 * is not ND_INCOMPLETE. It can be in some other state 1140 * even before we send out the solicitation as we could 1141 * get un-solicited advertisements. 1142 * 1143 * If this is an XRESOLV interface, simply return 0, 1144 * since we don't want to solicit just yet. 1145 */ 1146 if (ill->ill_flags & ILLF_XRESOLV) { 1147 NCE_REFRELE(nce); 1148 return (0); 1149 } 1150 1151 mutex_enter(&nce->nce_lock); 1152 if (nce->nce_state != ND_INCOMPLETE) { 1153 mutex_exit(&nce->nce_lock); 1154 NCE_REFRELE(nce); 1155 return (0); 1156 } 1157 if (nce->nce_rcnt == 0) { 1158 /* The caller will free mp */ 1159 mutex_exit(&nce->nce_lock); 1160 ndp_delete(nce); 1161 NCE_REFRELE(nce); 1162 return (ESRCH); 1163 } 1164 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1165 if (mp_nce == NULL) { 1166 /* The caller will free mp */ 1167 mutex_exit(&nce->nce_lock); 1168 ndp_delete(nce); 1169 NCE_REFRELE(nce); 1170 return (ENOMEM); 1171 } 1172 nce_queue_mp(nce, mp_nce); 1173 ip_ndp_resolve(nce); 1174 mutex_exit(&nce->nce_lock); 1175 NCE_REFRELE(nce); 1176 return (EINPROGRESS); 1177 case EEXIST: 1178 /* Resolution in progress just queue the packet */ 1179 mutex_enter(&nce->nce_lock); 1180 if (nce->nce_state == ND_INCOMPLETE) { 1181 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1182 if (mp_nce == NULL) { 1183 err = ENOMEM; 1184 } else { 1185 nce_queue_mp(nce, mp_nce); 1186 err = EINPROGRESS; 1187 } 1188 } else { 1189 /* 1190 * Any other state implies we have 1191 * a nce but IRE needs to be added ... 1192 * ire_add_v6() will take care of the 1193 * the case when the nce becomes CONDEMNED 1194 * before the ire is added to the table. 1195 */ 1196 err = 0; 1197 } 1198 mutex_exit(&nce->nce_lock); 1199 NCE_REFRELE(nce); 1200 break; 1201 default: 1202 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1203 break; 1204 } 1205 return (err); 1206 } 1207 1208 /* 1209 * When there is no resolver, the link layer template is passed in 1210 * the IRE. 1211 * Lookup a NCE for a given IRE. Regardless of whether one exists 1212 * or one is created, we defer making ire point to nce until the 1213 * ire is actually added at which point the nce_refcnt on the nce is 1214 * incremented. This is done primarily to have symmetry between ire_add() 1215 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1216 */ 1217 int 1218 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1219 { 1220 nce_t *nce; 1221 int err = 0; 1222 1223 ASSERT(ill != NULL); 1224 ASSERT(ill->ill_isv6); 1225 if (IN6_IS_ADDR_MULTICAST(dst)) { 1226 err = nce_set_multicast(ill, dst); 1227 return (err); 1228 } 1229 1230 err = ndp_lookup_then_add_v6(ill, 1231 B_FALSE, /* NCE fastpath is per ill; don't match across group */ 1232 NULL, /* hardware address */ 1233 dst, 1234 &ipv6_all_ones, 1235 &ipv6_all_zeros, 1236 0, 1237 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1238 ND_REACHABLE, 1239 &nce); 1240 1241 switch (err) { 1242 case 0: 1243 /* 1244 * Cache entry with a proper resolver cookie was 1245 * created. 1246 */ 1247 NCE_REFRELE(nce); 1248 break; 1249 case EEXIST: 1250 err = 0; 1251 NCE_REFRELE(nce); 1252 break; 1253 default: 1254 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1255 break; 1256 } 1257 return (err); 1258 } 1259 1260 /* 1261 * For each interface an entry is added for the unspecified multicast group. 1262 * Here that mapping is used to form the multicast cache entry for a particular 1263 * multicast destination. 1264 */ 1265 static int 1266 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1267 { 1268 nce_t *mnce; /* Multicast mapping entry */ 1269 nce_t *nce; 1270 uchar_t *hw_addr = NULL; 1271 int err = 0; 1272 ip_stack_t *ipst = ill->ill_ipst; 1273 1274 ASSERT(ill != NULL); 1275 ASSERT(ill->ill_isv6); 1276 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1277 1278 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1279 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst)); 1280 nce = nce_lookup_addr(ill, B_FALSE, dst, nce); 1281 if (nce != NULL) { 1282 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1283 NCE_REFRELE(nce); 1284 return (0); 1285 } 1286 /* No entry, now lookup for a mapping this should never fail */ 1287 mnce = nce_lookup_mapping(ill, dst); 1288 if (mnce == NULL) { 1289 /* Something broken for the interface. */ 1290 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1291 return (ESRCH); 1292 } 1293 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1294 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1295 /* 1296 * For IRE_IF_RESOLVER a hardware mapping can be 1297 * generated, for IRE_IF_NORESOLVER, resolution cookie 1298 * in the ill is copied in ndp_add_v6(). 1299 */ 1300 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1301 if (hw_addr == NULL) { 1302 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1303 NCE_REFRELE(mnce); 1304 return (ENOMEM); 1305 } 1306 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1307 } 1308 NCE_REFRELE(mnce); 1309 /* 1310 * IRE_IF_NORESOLVER type simply copies the resolution 1311 * cookie passed in. So no hw_addr is needed. 1312 */ 1313 err = ndp_add_v6(ill, 1314 hw_addr, 1315 dst, 1316 &ipv6_all_ones, 1317 &ipv6_all_zeros, 1318 0, 1319 NCE_F_NONUD, 1320 ND_REACHABLE, 1321 &nce); 1322 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1323 if (hw_addr != NULL) 1324 kmem_free(hw_addr, ill->ill_nd_lla_len); 1325 if (err != 0) { 1326 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1327 return (err); 1328 } 1329 NCE_REFRELE(nce); 1330 return (0); 1331 } 1332 1333 /* 1334 * Return the link layer address, and any flags of a nce. 1335 */ 1336 int 1337 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1338 { 1339 nce_t *nce; 1340 in6_addr_t *addr; 1341 sin6_t *sin6; 1342 dl_unitdata_req_t *dl; 1343 1344 ASSERT(ill != NULL && ill->ill_isv6); 1345 sin6 = (sin6_t *)&lnr->lnr_addr; 1346 addr = &sin6->sin6_addr; 1347 1348 /* 1349 * NOTE: if the ill is an IPMP interface, then match against the whole 1350 * illgrp. This e.g. allows in.ndpd to retrieve the link layer 1351 * addresses for the data addresses on an IPMP interface even though 1352 * ipif_ndp_up() created them with an nce_ill of ipif_bound_ill. 1353 */ 1354 nce = ndp_lookup_v6(ill, IS_IPMP(ill), addr, B_FALSE); 1355 if (nce == NULL) 1356 return (ESRCH); 1357 /* If in INCOMPLETE state, no link layer address is available yet */ 1358 if (nce->nce_state == ND_INCOMPLETE) 1359 goto done; 1360 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1361 if (ill->ill_flags & ILLF_XRESOLV) 1362 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1363 else 1364 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1365 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1366 sizeof (lnr->lnr_hdw_addr)); 1367 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1368 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1369 if (nce->nce_flags & NCE_F_ISROUTER) 1370 lnr->lnr_flags = NDF_ISROUTER_ON; 1371 if (nce->nce_flags & NCE_F_ANYCAST) 1372 lnr->lnr_flags |= NDF_ANYCAST_ON; 1373 done: 1374 NCE_REFRELE(nce); 1375 return (0); 1376 } 1377 1378 /* 1379 * Send Enable/Disable multicast reqs to driver. 1380 */ 1381 int 1382 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1383 uint32_t hw_addr_offset, mblk_t *mp) 1384 { 1385 nce_t *nce; 1386 uchar_t *hw_addr; 1387 ip_stack_t *ipst = ill->ill_ipst; 1388 1389 ASSERT(ill != NULL && ill->ill_isv6); 1390 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1391 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1392 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1393 freemsg(mp); 1394 return (EINVAL); 1395 } 1396 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1397 nce = nce_lookup_mapping(ill, addr); 1398 if (nce == NULL) { 1399 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1400 freemsg(mp); 1401 return (ESRCH); 1402 } 1403 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1404 /* 1405 * Update dl_addr_length and dl_addr_offset for primitives that 1406 * have physical addresses as opposed to full saps 1407 */ 1408 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1409 case DL_ENABMULTI_REQ: 1410 /* Track the state if this is the first enabmulti */ 1411 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1412 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1413 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1414 break; 1415 case DL_DISABMULTI_REQ: 1416 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1417 break; 1418 default: 1419 NCE_REFRELE(nce); 1420 ip1dbg(("ndp_mcastreq: default\n")); 1421 return (EINVAL); 1422 } 1423 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1424 NCE_REFRELE(nce); 1425 ill_dlpi_send(ill, mp); 1426 return (0); 1427 } 1428 1429 1430 /* 1431 * Send out a NS for resolving the ip address in nce. 1432 */ 1433 void 1434 ip_ndp_resolve(nce_t *nce) 1435 { 1436 in6_addr_t sender6 = ipv6_all_zeros; 1437 uint32_t ms; 1438 mblk_t *mp; 1439 ip6_t *ip6h; 1440 1441 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1442 /* 1443 * Pick the src from outgoing packet, if one is available. 1444 * Otherwise let nce_xmit figure out the src. 1445 */ 1446 if ((mp = nce->nce_qd_mp) != NULL) { 1447 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1448 if (mp->b_datap->db_type == M_CTL) 1449 mp = mp->b_cont; 1450 ip6h = (ip6_t *)mp->b_rptr; 1451 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1452 /* 1453 * This message should have been pulled up already in 1454 * ip_wput_v6. We can't do pullups here because 1455 * the message could be from the nce_qd_mp which could 1456 * have b_next/b_prev non-NULL. 1457 */ 1458 ASSERT(MBLKL(mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN); 1459 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1460 } 1461 sender6 = ip6h->ip6_src; 1462 } 1463 ms = nce_solicit(nce, sender6); 1464 mutex_exit(&nce->nce_lock); 1465 if (ms == 0) { 1466 if (nce->nce_state != ND_REACHABLE) { 1467 nce_resolv_failed(nce); 1468 ndp_delete(nce); 1469 } 1470 } else { 1471 NDP_RESTART_TIMER(nce, (clock_t)ms); 1472 } 1473 mutex_enter(&nce->nce_lock); 1474 } 1475 1476 /* 1477 * Send a neighbor solicitation. 1478 * Returns number of milliseconds after which we should either rexmit or abort. 1479 * Return of zero means we should abort. 1480 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1481 * 1482 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1483 * the packet. 1484 */ 1485 uint32_t 1486 nce_solicit(nce_t *nce, in6_addr_t sender) 1487 { 1488 boolean_t dropped; 1489 1490 ASSERT(nce->nce_ipversion == IPV6_VERSION); 1491 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1492 1493 if (nce->nce_rcnt == 0) 1494 return (0); 1495 1496 nce->nce_rcnt--; 1497 mutex_exit(&nce->nce_lock); 1498 dropped = nce_xmit_solicit(nce, B_TRUE, &sender, 0); 1499 mutex_enter(&nce->nce_lock); 1500 if (dropped) 1501 nce->nce_rcnt++; 1502 return (nce->nce_ill->ill_reachable_retrans_time); 1503 } 1504 1505 /* 1506 * Attempt to recover an address on an interface that's been marked as a 1507 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1508 * no easy way to just probe the address and have the right thing happen if 1509 * it's no longer in use. Instead, we just bring it up normally and allow the 1510 * regular interface start-up logic to probe for a remaining duplicate and take 1511 * us back down if necessary. 1512 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1513 * ip_ndp_excl. 1514 */ 1515 /* ARGSUSED */ 1516 static void 1517 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1518 { 1519 ill_t *ill = rq->q_ptr; 1520 ipif_t *ipif; 1521 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1522 1523 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1524 /* 1525 * We do not support recovery of proxy ARP'd interfaces, 1526 * because the system lacks a complete proxy ARP mechanism. 1527 */ 1528 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1529 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1530 continue; 1531 } 1532 1533 /* 1534 * If we have already recovered or if the interface is going 1535 * away, then ignore. 1536 */ 1537 mutex_enter(&ill->ill_lock); 1538 if (!(ipif->ipif_flags & IPIF_DUPLICATE) || 1539 (ipif->ipif_state_flags & IPIF_CONDEMNED)) { 1540 mutex_exit(&ill->ill_lock); 1541 continue; 1542 } 1543 1544 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1545 ill->ill_ipif_dup_count--; 1546 mutex_exit(&ill->ill_lock); 1547 ipif->ipif_was_dup = B_TRUE; 1548 1549 VERIFY(ipif_ndp_up(ipif, B_TRUE) != EINPROGRESS); 1550 (void) ipif_up_done_v6(ipif); 1551 } 1552 freeb(mp); 1553 } 1554 1555 /* 1556 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1557 * As long as someone else holds the address, the interface will stay down. 1558 * When that conflict goes away, the interface is brought back up. This is 1559 * done so that accidental shutdowns of addresses aren't made permanent. Your 1560 * server will recover from a failure. 1561 * 1562 * For DHCP and temporary addresses, recovery is not done in the kernel. 1563 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1564 * 1565 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1566 */ 1567 static void 1568 ipif6_dup_recovery(void *arg) 1569 { 1570 ipif_t *ipif = arg; 1571 1572 ipif->ipif_recovery_id = 0; 1573 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1574 return; 1575 1576 /* 1577 * No lock, because this is just an optimization. 1578 */ 1579 if (ipif->ipif_state_flags & IPIF_CONDEMNED) 1580 return; 1581 1582 /* If the link is down, we'll retry this later */ 1583 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1584 return; 1585 1586 ndp_do_recovery(ipif); 1587 } 1588 1589 /* 1590 * Perform interface recovery by forcing the duplicate interfaces up and 1591 * allowing the system to determine which ones should stay up. 1592 * 1593 * Called both by recovery timer expiry and link-up notification. 1594 */ 1595 void 1596 ndp_do_recovery(ipif_t *ipif) 1597 { 1598 ill_t *ill = ipif->ipif_ill; 1599 mblk_t *mp; 1600 ip_stack_t *ipst = ill->ill_ipst; 1601 1602 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1603 if (mp == NULL) { 1604 mutex_enter(&ill->ill_lock); 1605 if (ipif->ipif_recovery_id == 0 && 1606 !(ipif->ipif_state_flags & IPIF_CONDEMNED)) { 1607 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1608 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1609 } 1610 mutex_exit(&ill->ill_lock); 1611 } else { 1612 /* 1613 * A recovery timer may still be running if we got here from 1614 * ill_restart_dad(); cancel that timer. 1615 */ 1616 if (ipif->ipif_recovery_id != 0) 1617 (void) untimeout(ipif->ipif_recovery_id); 1618 ipif->ipif_recovery_id = 0; 1619 1620 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1621 sizeof (ipif->ipif_v6lcl_addr)); 1622 ill_refhold(ill); 1623 qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP, 1624 B_FALSE); 1625 } 1626 } 1627 1628 /* 1629 * Find the MAC and IP addresses in an NA/NS message. 1630 */ 1631 static void 1632 ip_ndp_find_addresses(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, in6_addr_t *targp, 1633 uchar_t **haddr, uint_t *haddrlenp) 1634 { 1635 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1636 icmp6_t *icmp6 = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1637 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 1638 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 1639 uchar_t *addr; 1640 int alen = 0; 1641 1642 if (dl_mp == NULL) { 1643 nd_opt_hdr_t *opt = NULL; 1644 int len; 1645 1646 /* 1647 * If it's from the fast-path, then it can't be a probe 1648 * message, and thus must include a linkaddr option. 1649 * Extract that here. 1650 */ 1651 switch (icmp6->icmp6_type) { 1652 case ND_NEIGHBOR_SOLICIT: 1653 len = mp->b_wptr - (uchar_t *)ns; 1654 if ((len -= sizeof (*ns)) > 0) { 1655 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), 1656 len, ND_OPT_SOURCE_LINKADDR); 1657 } 1658 break; 1659 case ND_NEIGHBOR_ADVERT: 1660 len = mp->b_wptr - (uchar_t *)na; 1661 if ((len -= sizeof (*na)) > 0) { 1662 opt = ndp_get_option((nd_opt_hdr_t *)(na + 1), 1663 len, ND_OPT_TARGET_LINKADDR); 1664 } 1665 break; 1666 } 1667 1668 if (opt != NULL && opt->nd_opt_len * 8 - sizeof (*opt) >= 1669 ill->ill_nd_lla_len) { 1670 addr = (uchar_t *)(opt + 1); 1671 alen = ill->ill_nd_lla_len; 1672 } 1673 1674 /* 1675 * We cheat a bit here for the sake of printing usable log 1676 * messages in the rare case where the reply we got was unicast 1677 * without a source linkaddr option, and the interface is in 1678 * fastpath mode. (Sigh.) 1679 */ 1680 if (alen == 0 && ill->ill_type == IFT_ETHER && 1681 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1682 struct ether_header *pether; 1683 1684 pether = (struct ether_header *)((char *)ip6h - 1685 sizeof (*pether)); 1686 addr = pether->ether_shost.ether_addr_octet; 1687 alen = ETHERADDRL; 1688 } 1689 } else { 1690 dl_unitdata_ind_t *dlu; 1691 1692 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1693 alen = dlu->dl_src_addr_length; 1694 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1695 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1696 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1697 if (ill->ill_sap_length < 0) { 1698 alen += ill->ill_sap_length; 1699 } else { 1700 addr += ill->ill_sap_length; 1701 alen -= ill->ill_sap_length; 1702 } 1703 } 1704 } 1705 1706 if (alen > 0) { 1707 *haddr = addr; 1708 *haddrlenp = alen; 1709 } else { 1710 *haddr = NULL; 1711 *haddrlenp = 0; 1712 } 1713 1714 /* nd_ns_target and nd_na_target are at the same offset, so we cheat */ 1715 *targp = ns->nd_ns_target; 1716 } 1717 1718 /* 1719 * This is for exclusive changes due to NDP duplicate address detection 1720 * failure. 1721 */ 1722 /* ARGSUSED */ 1723 static void 1724 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1725 { 1726 ill_t *ill = rq->q_ptr; 1727 ipif_t *ipif; 1728 mblk_t *dl_mp = NULL; 1729 uchar_t *haddr; 1730 uint_t haddrlen; 1731 ip_stack_t *ipst = ill->ill_ipst; 1732 in6_addr_t targ; 1733 1734 if (DB_TYPE(mp) != M_DATA) { 1735 dl_mp = mp; 1736 mp = mp->b_cont; 1737 } 1738 1739 ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen); 1740 if (haddr != NULL && haddrlen == ill->ill_phys_addr_length) { 1741 /* 1742 * Ignore conflicts generated by misbehaving switches that 1743 * just reflect our own messages back to us. For IPMP, we may 1744 * see reflections across any ill in the illgrp. 1745 */ 1746 if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 || 1747 IS_UNDER_IPMP(ill) && 1748 ipmp_illgrp_find_ill(ill->ill_grp, haddr, haddrlen) != NULL) 1749 goto ignore_conflict; 1750 } 1751 1752 /* 1753 * Look up the appropriate ipif. 1754 */ 1755 ipif = ipif_lookup_addr_v6(&targ, ill, ALL_ZONES, NULL, NULL, NULL, 1756 NULL, ipst); 1757 if (ipif == NULL) 1758 goto ignore_conflict; 1759 1760 /* Reload the ill to match the ipif */ 1761 ill = ipif->ipif_ill; 1762 1763 /* If it's already duplicate or ineligible, then don't do anything. */ 1764 if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) { 1765 ipif_refrele(ipif); 1766 goto ignore_conflict; 1767 } 1768 1769 /* 1770 * If this is a failure during duplicate recovery, then don't 1771 * complain. It may take a long time to recover. 1772 */ 1773 if (!ipif->ipif_was_dup) { 1774 char ibuf[LIFNAMSIZ]; 1775 char hbuf[MAC_STR_LEN]; 1776 char sbuf[INET6_ADDRSTRLEN]; 1777 1778 ipif_get_name(ipif, ibuf, sizeof (ibuf)); 1779 cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);" 1780 " disabled", ibuf, 1781 inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)), 1782 mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf))); 1783 } 1784 mutex_enter(&ill->ill_lock); 1785 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1786 ipif->ipif_flags |= IPIF_DUPLICATE; 1787 ill->ill_ipif_dup_count++; 1788 mutex_exit(&ill->ill_lock); 1789 (void) ipif_down(ipif, NULL, NULL); 1790 ipif_down_tail(ipif); 1791 mutex_enter(&ill->ill_lock); 1792 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1793 ill->ill_net_type == IRE_IF_RESOLVER && 1794 !(ipif->ipif_state_flags & IPIF_CONDEMNED) && 1795 ipst->ips_ip_dup_recovery > 0) { 1796 ASSERT(ipif->ipif_recovery_id == 0); 1797 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1798 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1799 } 1800 mutex_exit(&ill->ill_lock); 1801 ipif_refrele(ipif); 1802 ignore_conflict: 1803 if (dl_mp != NULL) 1804 freeb(dl_mp); 1805 freemsg(mp); 1806 } 1807 1808 /* 1809 * Handle failure by tearing down the ipifs with the specified address. Note 1810 * that tearing down the ipif also means deleting the nce through ipif_down, so 1811 * it's not possible to do recovery by just restarting the nce timer. Instead, 1812 * we start a timer on the ipif. 1813 */ 1814 static void 1815 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1816 { 1817 if ((mp = copymsg(mp)) != NULL) { 1818 if (dl_mp == NULL) 1819 dl_mp = mp; 1820 else if ((dl_mp = copyb(dl_mp)) != NULL) 1821 dl_mp->b_cont = mp; 1822 if (dl_mp == NULL) { 1823 freemsg(mp); 1824 } else { 1825 ill_refhold(ill); 1826 qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP, 1827 B_FALSE); 1828 } 1829 } 1830 } 1831 1832 /* 1833 * Handle a discovered conflict: some other system is advertising that it owns 1834 * one of our IP addresses. We need to defend ourselves, or just shut down the 1835 * interface. 1836 */ 1837 static void 1838 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1839 { 1840 ipif_t *ipif; 1841 uint32_t now; 1842 uint_t maxdefense; 1843 uint_t defs; 1844 ip_stack_t *ipst = ill->ill_ipst; 1845 1846 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1847 NULL, NULL, ipst); 1848 if (ipif == NULL) 1849 return; 1850 1851 /* 1852 * First, figure out if this address is disposable. 1853 */ 1854 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1855 maxdefense = ipst->ips_ip_max_temp_defend; 1856 else 1857 maxdefense = ipst->ips_ip_max_defend; 1858 1859 /* 1860 * Now figure out how many times we've defended ourselves. Ignore 1861 * defenses that happened long in the past. 1862 */ 1863 now = gethrestime_sec(); 1864 mutex_enter(&nce->nce_lock); 1865 if ((defs = nce->nce_defense_count) > 0 && 1866 now - nce->nce_defense_time > ipst->ips_ip_defend_interval) { 1867 nce->nce_defense_count = defs = 0; 1868 } 1869 nce->nce_defense_count++; 1870 nce->nce_defense_time = now; 1871 mutex_exit(&nce->nce_lock); 1872 ipif_refrele(ipif); 1873 1874 /* 1875 * If we've defended ourselves too many times already, then give up and 1876 * tear down the interface(s) using this address. Otherwise, defend by 1877 * sending out an unsolicited Neighbor Advertisement. 1878 */ 1879 if (defs >= maxdefense) { 1880 ip_ndp_failure(ill, mp, dl_mp); 1881 } else { 1882 char hbuf[MAC_STR_LEN]; 1883 char sbuf[INET6_ADDRSTRLEN]; 1884 uchar_t *haddr; 1885 uint_t haddrlen; 1886 in6_addr_t targ; 1887 1888 ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen); 1889 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1890 mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)), 1891 inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)), 1892 ill->ill_name); 1893 1894 (void) nce_xmit_advert(nce, B_FALSE, &ipv6_all_hosts_mcast, 0); 1895 } 1896 } 1897 1898 static void 1899 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1900 { 1901 nd_neighbor_solicit_t *ns; 1902 uint32_t hlen = ill->ill_nd_lla_len; 1903 uchar_t *haddr = NULL; 1904 icmp6_t *icmp_nd; 1905 ip6_t *ip6h; 1906 nce_t *our_nce = NULL; 1907 in6_addr_t target; 1908 in6_addr_t src; 1909 int len; 1910 int flag = 0; 1911 nd_opt_hdr_t *opt = NULL; 1912 boolean_t bad_solicit = B_FALSE; 1913 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1914 1915 ip6h = (ip6_t *)mp->b_rptr; 1916 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1917 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1918 src = ip6h->ip6_src; 1919 ns = (nd_neighbor_solicit_t *)icmp_nd; 1920 target = ns->nd_ns_target; 1921 if (IN6_IS_ADDR_MULTICAST(&target)) { 1922 if (ip_debug > 2) { 1923 /* ip1dbg */ 1924 pr_addr_dbg("ndp_input_solicit: Target is" 1925 " multicast! %s\n", AF_INET6, &target); 1926 } 1927 bad_solicit = B_TRUE; 1928 goto done; 1929 } 1930 if (len > sizeof (nd_neighbor_solicit_t)) { 1931 /* Options present */ 1932 opt = (nd_opt_hdr_t *)&ns[1]; 1933 len -= sizeof (nd_neighbor_solicit_t); 1934 if (!ndp_verify_optlen(opt, len)) { 1935 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1936 bad_solicit = B_TRUE; 1937 goto done; 1938 } 1939 1940 } 1941 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1942 /* Check to see if this is a valid DAD solicitation */ 1943 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1944 if (ip_debug > 2) { 1945 /* ip1dbg */ 1946 pr_addr_dbg("ndp_input_solicit: IPv6 " 1947 "Destination is not solicited node " 1948 "multicast %s\n", AF_INET6, 1949 &ip6h->ip6_dst); 1950 } 1951 bad_solicit = B_TRUE; 1952 goto done; 1953 } 1954 } 1955 1956 /* 1957 * NOTE: with IPMP, it's possible the nominated multicast ill (which 1958 * received this packet if it's multicast) is not the ill tied to 1959 * e.g. the IPMP ill's data link-local. So we match across the illgrp 1960 * to ensure we find the associated NCE. 1961 */ 1962 our_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE); 1963 /* 1964 * If this is a valid Solicitation, a permanent 1965 * entry should exist in the cache 1966 */ 1967 if (our_nce == NULL || 1968 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1969 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1970 "ifname=%s ", ill->ill_name)); 1971 if (ip_debug > 2) { 1972 /* ip1dbg */ 1973 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1974 } 1975 bad_solicit = B_TRUE; 1976 goto done; 1977 } 1978 1979 /* At this point we should have a verified NS per spec */ 1980 if (opt != NULL) { 1981 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1982 if (opt != NULL) { 1983 haddr = (uchar_t *)&opt[1]; 1984 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1985 hlen == 0) { 1986 ip1dbg(("ndp_input_solicit: bad SLLA\n")); 1987 bad_solicit = B_TRUE; 1988 goto done; 1989 } 1990 } 1991 } 1992 1993 /* If sending directly to peer, set the unicast flag */ 1994 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 1995 flag |= NDP_UNICAST; 1996 1997 /* 1998 * Create/update the entry for the soliciting node. 1999 * or respond to outstanding queries, don't if 2000 * the source is unspecified address. 2001 */ 2002 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 2003 int err; 2004 nce_t *nnce; 2005 2006 ASSERT(ill->ill_isv6); 2007 /* 2008 * Regular solicitations *must* include the Source Link-Layer 2009 * Address option. Ignore messages that do not. 2010 */ 2011 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 2012 ip1dbg(("ndp_input_solicit: source link-layer address " 2013 "option missing with a specified source.\n")); 2014 bad_solicit = B_TRUE; 2015 goto done; 2016 } 2017 2018 /* 2019 * This is a regular solicitation. If we're still in the 2020 * process of verifying the address, then don't respond at all 2021 * and don't keep track of the sender. 2022 */ 2023 if (our_nce->nce_state == ND_PROBE) 2024 goto done; 2025 2026 /* 2027 * If the solicitation doesn't have sender hardware address 2028 * (legal for unicast solicitation), then process without 2029 * installing the return NCE. Either we already know it, or 2030 * we'll be forced to look it up when (and if) we reply to the 2031 * packet. 2032 */ 2033 if (haddr == NULL) 2034 goto no_source; 2035 2036 err = ndp_lookup_then_add_v6(ill, 2037 B_FALSE, 2038 haddr, 2039 &src, /* Soliciting nodes address */ 2040 &ipv6_all_ones, 2041 &ipv6_all_zeros, 2042 0, 2043 0, 2044 ND_STALE, 2045 &nnce); 2046 switch (err) { 2047 case 0: 2048 /* done with this entry */ 2049 NCE_REFRELE(nnce); 2050 break; 2051 case EEXIST: 2052 /* 2053 * B_FALSE indicates this is not an an advertisement. 2054 */ 2055 ndp_process(nnce, haddr, 0, B_FALSE); 2056 NCE_REFRELE(nnce); 2057 break; 2058 default: 2059 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 2060 err)); 2061 goto done; 2062 } 2063 no_source: 2064 flag |= NDP_SOLICITED; 2065 } else { 2066 /* 2067 * No source link layer address option should be present in a 2068 * valid DAD request. 2069 */ 2070 if (haddr != NULL) { 2071 ip1dbg(("ndp_input_solicit: source link-layer address " 2072 "option present with an unspecified source.\n")); 2073 bad_solicit = B_TRUE; 2074 goto done; 2075 } 2076 if (our_nce->nce_state == ND_PROBE) { 2077 /* 2078 * Internally looped-back probes won't have DLPI 2079 * attached to them. External ones (which are sent by 2080 * multicast) always will. Just ignore our own 2081 * transmissions. 2082 */ 2083 if (dl_mp != NULL) { 2084 /* 2085 * If someone else is probing our address, then 2086 * we've crossed wires. Declare failure. 2087 */ 2088 ip_ndp_failure(ill, mp, dl_mp); 2089 } 2090 goto done; 2091 } 2092 /* 2093 * This is a DAD probe. Multicast the advertisement to the 2094 * all-nodes address. 2095 */ 2096 src = ipv6_all_hosts_mcast; 2097 } 2098 /* Response to a solicitation */ 2099 (void) nce_xmit_advert(our_nce, B_TRUE, &src, flag); 2100 done: 2101 if (bad_solicit) 2102 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2103 if (our_nce != NULL) 2104 NCE_REFRELE(our_nce); 2105 } 2106 2107 void 2108 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2109 { 2110 nd_neighbor_advert_t *na; 2111 uint32_t hlen = ill->ill_nd_lla_len; 2112 uchar_t *haddr = NULL; 2113 icmp6_t *icmp_nd; 2114 ip6_t *ip6h; 2115 nce_t *dst_nce = NULL; 2116 in6_addr_t target; 2117 nd_opt_hdr_t *opt = NULL; 2118 int len; 2119 ip_stack_t *ipst = ill->ill_ipst; 2120 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2121 2122 ip6h = (ip6_t *)mp->b_rptr; 2123 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2124 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2125 na = (nd_neighbor_advert_t *)icmp_nd; 2126 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2127 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2128 ip1dbg(("ndp_input_advert: Target is multicast but the " 2129 "solicited flag is not zero\n")); 2130 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2131 return; 2132 } 2133 target = na->nd_na_target; 2134 if (IN6_IS_ADDR_MULTICAST(&target)) { 2135 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2136 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2137 return; 2138 } 2139 if (len > sizeof (nd_neighbor_advert_t)) { 2140 opt = (nd_opt_hdr_t *)&na[1]; 2141 if (!ndp_verify_optlen(opt, 2142 len - sizeof (nd_neighbor_advert_t))) { 2143 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2144 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2145 return; 2146 } 2147 /* At this point we have a verified NA per spec */ 2148 len -= sizeof (nd_neighbor_advert_t); 2149 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2150 if (opt != NULL) { 2151 haddr = (uchar_t *)&opt[1]; 2152 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2153 hlen == 0) { 2154 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2155 BUMP_MIB(mib, 2156 ipv6IfIcmpInBadNeighborAdvertisements); 2157 return; 2158 } 2159 } 2160 } 2161 2162 /* 2163 * NOTE: we match across the illgrp since we need to do DAD for all of 2164 * our local addresses, and those are spread across all the active 2165 * ills in the group. 2166 */ 2167 if ((dst_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE)) == NULL) 2168 return; 2169 2170 if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2171 /* 2172 * Someone just advertised one of our local addresses. First, 2173 * check it it was us -- if so, we can safely ignore it. 2174 */ 2175 if (haddr != NULL) { 2176 if (!nce_cmp_ll_addr(dst_nce, haddr, hlen)) 2177 goto out; /* from us -- no conflict */ 2178 2179 /* 2180 * If we're in an IPMP group, check if this is an echo 2181 * from another ill in the group. Use the double- 2182 * checked locking pattern to avoid grabbing 2183 * ill_g_lock in the non-IPMP case. 2184 */ 2185 if (IS_UNDER_IPMP(ill)) { 2186 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2187 if (IS_UNDER_IPMP(ill) && ipmp_illgrp_find_ill( 2188 ill->ill_grp, haddr, hlen) != NULL) { 2189 rw_exit(&ipst->ips_ill_g_lock); 2190 goto out; 2191 } 2192 rw_exit(&ipst->ips_ill_g_lock); 2193 } 2194 } 2195 2196 /* 2197 * Our own (looped-back) unsolicited neighbor advertisements 2198 * will get here with dl_mp == NULL. (These will usually be 2199 * filtered by the `haddr' checks above, but point-to-point 2200 * links have no hardware address and thus make it here.) 2201 */ 2202 if (dl_mp == NULL && dst_nce->nce_state != ND_PROBE) 2203 goto out; 2204 2205 /* 2206 * This appears to be a real conflict. If we're trying to 2207 * configure this NCE (ND_PROBE), then shut it down. 2208 * Otherwise, handle the discovered conflict. 2209 * 2210 * In the ND_PROBE case, dl_mp might be NULL if we're getting 2211 * a unicast reply. This isn't typically done (multicast is 2212 * the norm in response to a probe), but we can handle it. 2213 */ 2214 if (dst_nce->nce_state == ND_PROBE) 2215 ip_ndp_failure(ill, mp, dl_mp); 2216 else 2217 ip_ndp_conflict(ill, mp, dl_mp, dst_nce); 2218 } else { 2219 if (na->nd_na_flags_reserved & ND_NA_FLAG_ROUTER) 2220 dst_nce->nce_flags |= NCE_F_ISROUTER; 2221 2222 /* B_TRUE indicates this an advertisement */ 2223 ndp_process(dst_nce, haddr, na->nd_na_flags_reserved, B_TRUE); 2224 } 2225 out: 2226 NCE_REFRELE(dst_nce); 2227 } 2228 2229 /* 2230 * Process NDP neighbor solicitation/advertisement messages. 2231 * The checksum has already checked o.k before reaching here. 2232 */ 2233 void 2234 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2235 { 2236 icmp6_t *icmp_nd; 2237 ip6_t *ip6h; 2238 int len; 2239 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2240 2241 2242 if (!pullupmsg(mp, -1)) { 2243 ip1dbg(("ndp_input: pullupmsg failed\n")); 2244 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2245 goto done; 2246 } 2247 ip6h = (ip6_t *)mp->b_rptr; 2248 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2249 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2250 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2251 goto done; 2252 } 2253 /* 2254 * NDP does not accept any extension headers between the 2255 * IP header and the ICMP header since e.g. a routing 2256 * header could be dangerous. 2257 * This assumes that any AH or ESP headers are removed 2258 * by ip prior to passing the packet to ndp_input. 2259 */ 2260 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2261 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2262 ip6h->ip6_nxt)); 2263 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2264 goto done; 2265 } 2266 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2267 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2268 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2269 if (icmp_nd->icmp6_code != 0) { 2270 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2271 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2272 goto done; 2273 } 2274 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2275 /* 2276 * Make sure packet length is large enough for either 2277 * a NS or a NA icmp packet. 2278 */ 2279 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2280 ip1dbg(("ndp_input: packet too short\n")); 2281 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2282 goto done; 2283 } 2284 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2285 ndp_input_solicit(ill, mp, dl_mp); 2286 } else { 2287 ndp_input_advert(ill, mp, dl_mp); 2288 } 2289 done: 2290 freemsg(mp); 2291 } 2292 2293 /* 2294 * Utility routine to send an advertisement. Assumes that the NCE cannot 2295 * go away (e.g., because it's refheld). 2296 */ 2297 static boolean_t 2298 nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *target, 2299 uint_t flags) 2300 { 2301 ASSERT((flags & NDP_PROBE) == 0); 2302 2303 if (nce->nce_flags & NCE_F_ISROUTER) 2304 flags |= NDP_ISROUTER; 2305 if (!(nce->nce_flags & NCE_F_ANYCAST)) 2306 flags |= NDP_ORIDE; 2307 2308 return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_ADVERT, use_nd_lla, 2309 &nce->nce_addr, target, flags)); 2310 } 2311 2312 /* 2313 * Utility routine to send a solicitation. Assumes that the NCE cannot 2314 * go away (e.g., because it's refheld). 2315 */ 2316 static boolean_t 2317 nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *sender, 2318 uint_t flags) 2319 { 2320 if (flags & NDP_PROBE) 2321 sender = &ipv6_all_zeros; 2322 2323 return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, use_nd_lla, 2324 sender, &nce->nce_addr, flags)); 2325 } 2326 2327 /* 2328 * nce_xmit is called to form and transmit a ND solicitation or 2329 * advertisement ICMP packet. 2330 * 2331 * If the source address is unspecified and this isn't a probe (used for 2332 * duplicate address detection), an appropriate source address and link layer 2333 * address will be chosen here. The link layer address option is included if 2334 * the source is specified (i.e., all non-probe packets), and omitted (per the 2335 * specification) otherwise. 2336 * 2337 * It returns B_FALSE only if it does a successful put() to the 2338 * corresponding ill's ill_wq otherwise returns B_TRUE. 2339 */ 2340 static boolean_t 2341 nce_xmit(ill_t *ill, uint8_t type, boolean_t use_nd_lla, 2342 const in6_addr_t *sender, const in6_addr_t *target, int flag) 2343 { 2344 ill_t *hwaddr_ill; 2345 uint32_t len; 2346 icmp6_t *icmp6; 2347 mblk_t *mp; 2348 ip6_t *ip6h; 2349 nd_opt_hdr_t *opt; 2350 uint_t plen, maxplen; 2351 ip6i_t *ip6i; 2352 ipif_t *src_ipif = NULL; 2353 uint8_t *hw_addr; 2354 zoneid_t zoneid = GLOBAL_ZONEID; 2355 char buf[INET6_ADDRSTRLEN]; 2356 2357 ASSERT(!IS_IPMP(ill)); 2358 2359 /* 2360 * Check that the sender is actually a usable address on `ill', and if 2361 * so, track that as the src_ipif. If not, for solicitations, set the 2362 * sender to :: so that a new one will be picked below; for adverts, 2363 * drop the packet since we expect nce_xmit_advert() to always provide 2364 * a valid sender. 2365 */ 2366 if (!IN6_IS_ADDR_UNSPECIFIED(sender)) { 2367 if ((src_ipif = ip_ndp_lookup_addr_v6(sender, ill)) == NULL || 2368 !src_ipif->ipif_addr_ready) { 2369 if (src_ipif != NULL) { 2370 ipif_refrele(src_ipif); 2371 src_ipif = NULL; 2372 } 2373 if (type == ND_NEIGHBOR_ADVERT) { 2374 ip1dbg(("nce_xmit: No source ipif for src %s\n", 2375 inet_ntop(AF_INET6, sender, buf, 2376 sizeof (buf)))); 2377 return (B_TRUE); 2378 } 2379 sender = &ipv6_all_zeros; 2380 } 2381 } 2382 2383 /* 2384 * If we still have an unspecified source (sender) address and this 2385 * isn't a probe, select a source address from `ill'. 2386 */ 2387 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2388 ASSERT(type != ND_NEIGHBOR_ADVERT); 2389 /* 2390 * Pick a source address for this solicitation, but restrict 2391 * the selection to addresses assigned to the output 2392 * interface. We do this because the destination will create 2393 * a neighbor cache entry for the source address of this 2394 * packet, so the source address needs to be a valid neighbor. 2395 */ 2396 src_ipif = ipif_select_source_v6(ill, target, B_TRUE, 2397 IPV6_PREFER_SRC_DEFAULT, ALL_ZONES); 2398 if (src_ipif == NULL) { 2399 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2400 inet_ntop(AF_INET6, target, buf, sizeof (buf)))); 2401 return (B_TRUE); 2402 } 2403 sender = &src_ipif->ipif_v6src_addr; 2404 } 2405 2406 /* 2407 * We're either sending a probe or we have a source address. 2408 */ 2409 ASSERT((flag & NDP_PROBE) || src_ipif != NULL); 2410 2411 maxplen = roundup(sizeof (nd_opt_hdr_t) + ND_MAX_HDW_LEN, 8); 2412 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2413 maxplen; 2414 mp = allocb(len, BPRI_LO); 2415 if (mp == NULL) { 2416 if (src_ipif != NULL) 2417 ipif_refrele(src_ipif); 2418 return (B_TRUE); 2419 } 2420 bzero((char *)mp->b_rptr, len); 2421 mp->b_wptr = mp->b_rptr + len; 2422 2423 ip6i = (ip6i_t *)mp->b_rptr; 2424 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2425 ip6i->ip6i_nxt = IPPROTO_RAW; 2426 ip6i->ip6i_flags = IP6I_HOPLIMIT; 2427 if (flag & NDP_PROBE) 2428 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2429 2430 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2431 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2432 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2433 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2434 ip6h->ip6_hops = IPV6_MAX_HOPS; 2435 ip6h->ip6_src = *sender; 2436 ip6h->ip6_dst = *target; 2437 icmp6 = (icmp6_t *)&ip6h[1]; 2438 2439 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2440 sizeof (nd_neighbor_advert_t)); 2441 2442 if (type == ND_NEIGHBOR_SOLICIT) { 2443 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2444 2445 if (!(flag & NDP_PROBE)) 2446 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2447 ns->nd_ns_target = *target; 2448 if (!(flag & NDP_UNICAST)) { 2449 /* Form multicast address of the target */ 2450 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2451 ip6h->ip6_dst.s6_addr32[3] |= 2452 ns->nd_ns_target.s6_addr32[3]; 2453 } 2454 } else { 2455 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2456 2457 ASSERT(!(flag & NDP_PROBE)); 2458 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2459 na->nd_na_target = *sender; 2460 if (flag & NDP_ISROUTER) 2461 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2462 if (flag & NDP_SOLICITED) 2463 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2464 if (flag & NDP_ORIDE) 2465 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2466 } 2467 2468 hw_addr = NULL; 2469 if (!(flag & NDP_PROBE)) { 2470 /* 2471 * Use our source address to find the hardware address to put 2472 * in the packet, so that the hardware address and IP address 2473 * will match up -- even if that hardware address doesn't 2474 * match the ill we actually transmit the packet through. 2475 */ 2476 if (IS_IPMP(src_ipif->ipif_ill)) { 2477 hwaddr_ill = ipmp_ipif_hold_bound_ill(src_ipif); 2478 if (hwaddr_ill == NULL) { 2479 ip1dbg(("nce_xmit: no bound ill!\n")); 2480 ipif_refrele(src_ipif); 2481 freemsg(mp); 2482 return (B_TRUE); 2483 } 2484 } else { 2485 hwaddr_ill = src_ipif->ipif_ill; 2486 ill_refhold(hwaddr_ill); /* for symmetry */ 2487 } 2488 2489 plen = roundup(sizeof (nd_opt_hdr_t) + 2490 hwaddr_ill->ill_nd_lla_len, 8); 2491 2492 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2493 hwaddr_ill->ill_phys_addr; 2494 if (hw_addr != NULL) { 2495 /* Fill in link layer address and option len */ 2496 opt->nd_opt_len = (uint8_t)(plen / 8); 2497 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2498 } 2499 2500 ill_refrele(hwaddr_ill); 2501 } 2502 2503 if (hw_addr == NULL) 2504 plen = 0; 2505 2506 /* Fix up the length of the packet now that plen is known */ 2507 len -= (maxplen - plen); 2508 mp->b_wptr = mp->b_rptr + len; 2509 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2510 2511 icmp6->icmp6_type = type; 2512 icmp6->icmp6_code = 0; 2513 /* 2514 * Prepare for checksum by putting icmp length in the icmp 2515 * checksum field. The checksum is calculated in ip_wput_v6. 2516 */ 2517 icmp6->icmp6_cksum = ip6h->ip6_plen; 2518 2519 /* 2520 * Before we toss the src_ipif, look up the zoneid to pass to 2521 * ip_output_v6(). This is to ensure unicast ND_NEIGHBOR_ADVERT 2522 * packets to be routed correctly by IP (we cannot guarantee that the 2523 * global zone has an interface route to the destination). 2524 */ 2525 if (src_ipif != NULL) { 2526 if ((zoneid = src_ipif->ipif_zoneid) == ALL_ZONES) 2527 zoneid = GLOBAL_ZONEID; 2528 ipif_refrele(src_ipif); 2529 } 2530 2531 ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT); 2532 return (B_FALSE); 2533 } 2534 2535 /* 2536 * Make a link layer address (does not include the SAP) from an nce. 2537 * To form the link layer address, use the last four bytes of ipv6 2538 * address passed in and the fixed offset stored in nce. 2539 */ 2540 static void 2541 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2542 { 2543 uchar_t *mask, *to; 2544 ill_t *ill = nce->nce_ill; 2545 int len; 2546 2547 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2548 return; 2549 ASSERT(nce->nce_res_mp != NULL); 2550 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2551 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2552 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2553 ASSERT(addr != NULL); 2554 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2555 addrpos, ill->ill_nd_lla_len); 2556 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2557 IPV6_ADDR_LEN); 2558 mask = (uchar_t *)&nce->nce_extract_mask; 2559 mask += (IPV6_ADDR_LEN - len); 2560 addr += (IPV6_ADDR_LEN - len); 2561 to = addrpos + nce->nce_ll_extract_start; 2562 while (len-- > 0) 2563 *to++ |= *mask++ & *addr++; 2564 } 2565 2566 mblk_t * 2567 nce_udreq_alloc(ill_t *ill) 2568 { 2569 mblk_t *template_mp = NULL; 2570 dl_unitdata_req_t *dlur; 2571 int sap_length; 2572 2573 ASSERT(ill->ill_isv6); 2574 2575 sap_length = ill->ill_sap_length; 2576 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2577 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2578 if (template_mp == NULL) 2579 return (NULL); 2580 2581 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2582 dlur->dl_priority.dl_min = 0; 2583 dlur->dl_priority.dl_max = 0; 2584 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2585 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2586 2587 /* Copy in the SAP value. */ 2588 NCE_LL_SAP_COPY(ill, template_mp); 2589 2590 return (template_mp); 2591 } 2592 2593 /* 2594 * NDP retransmit timer. 2595 * This timer goes off when: 2596 * a. It is time to retransmit NS for resolver. 2597 * b. It is time to send reachability probes. 2598 */ 2599 void 2600 ndp_timer(void *arg) 2601 { 2602 nce_t *nce = arg; 2603 ill_t *ill = nce->nce_ill; 2604 char addrbuf[INET6_ADDRSTRLEN]; 2605 boolean_t dropped = B_FALSE; 2606 ip_stack_t *ipst = ill->ill_ipst; 2607 2608 /* 2609 * The timer has to be cancelled by ndp_delete before doing the final 2610 * refrele. So the NCE is guaranteed to exist when the timer runs 2611 * until it clears the timeout_id. Before clearing the timeout_id 2612 * bump up the refcnt so that we can continue to use the nce 2613 */ 2614 ASSERT(nce != NULL); 2615 2616 mutex_enter(&nce->nce_lock); 2617 NCE_REFHOLD_LOCKED(nce); 2618 nce->nce_timeout_id = 0; 2619 2620 /* 2621 * Check the reachability state first. 2622 */ 2623 switch (nce->nce_state) { 2624 case ND_DELAY: 2625 nce->nce_state = ND_PROBE; 2626 mutex_exit(&nce->nce_lock); 2627 (void) nce_xmit_solicit(nce, B_FALSE, &ipv6_all_zeros, 2628 NDP_UNICAST); 2629 if (ip_debug > 3) { 2630 /* ip2dbg */ 2631 pr_addr_dbg("ndp_timer: state for %s changed " 2632 "to PROBE\n", AF_INET6, &nce->nce_addr); 2633 } 2634 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2635 NCE_REFRELE(nce); 2636 return; 2637 case ND_PROBE: 2638 /* must be retransmit timer */ 2639 nce->nce_pcnt--; 2640 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2641 nce->nce_pcnt >= -1); 2642 if (nce->nce_pcnt > 0) { 2643 /* 2644 * As per RFC2461, the nce gets deleted after 2645 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2646 * Note that the first unicast solicitation is sent 2647 * during the DELAY state. 2648 */ 2649 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2650 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2651 addrbuf, sizeof (addrbuf)))); 2652 mutex_exit(&nce->nce_lock); 2653 dropped = nce_xmit_solicit(nce, B_FALSE, 2654 &ipv6_all_zeros, 2655 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2656 NDP_UNICAST); 2657 if (dropped) { 2658 mutex_enter(&nce->nce_lock); 2659 nce->nce_pcnt++; 2660 mutex_exit(&nce->nce_lock); 2661 } 2662 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2663 } else if (nce->nce_pcnt < 0) { 2664 /* No hope, delete the nce */ 2665 nce->nce_state = ND_UNREACHABLE; 2666 mutex_exit(&nce->nce_lock); 2667 if (ip_debug > 2) { 2668 /* ip1dbg */ 2669 pr_addr_dbg("ndp_timer: Delete IRE for" 2670 " dst %s\n", AF_INET6, &nce->nce_addr); 2671 } 2672 ndp_delete(nce); 2673 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2674 /* Wait RetransTimer, before deleting the entry */ 2675 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2676 nce->nce_pcnt, inet_ntop(AF_INET6, 2677 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2678 mutex_exit(&nce->nce_lock); 2679 /* Wait one interval before killing */ 2680 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2681 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2682 ipif_t *ipif; 2683 2684 /* 2685 * We're done probing, and we can now declare this 2686 * address to be usable. Let IP know that it's ok to 2687 * use. 2688 */ 2689 nce->nce_state = ND_REACHABLE; 2690 mutex_exit(&nce->nce_lock); 2691 ipif = ip_ndp_lookup_addr_v6(&nce->nce_addr, 2692 nce->nce_ill); 2693 if (ipif != NULL) { 2694 if (ipif->ipif_was_dup) { 2695 char ibuf[LIFNAMSIZ + 10]; 2696 char sbuf[INET6_ADDRSTRLEN]; 2697 2698 ipif->ipif_was_dup = B_FALSE; 2699 (void) inet_ntop(AF_INET6, 2700 &ipif->ipif_v6lcl_addr, 2701 sbuf, sizeof (sbuf)); 2702 ipif_get_name(ipif, ibuf, 2703 sizeof (ibuf)); 2704 cmn_err(CE_NOTE, "recovered address " 2705 "%s on %s", sbuf, ibuf); 2706 } 2707 if ((ipif->ipif_flags & IPIF_UP) && 2708 !ipif->ipif_addr_ready) 2709 ipif_up_notify(ipif); 2710 ipif->ipif_addr_ready = 1; 2711 ipif_refrele(ipif); 2712 } 2713 /* Begin defending our new address */ 2714 nce->nce_unsolicit_count = 0; 2715 dropped = nce_xmit_advert(nce, B_FALSE, 2716 &ipv6_all_hosts_mcast, 0); 2717 if (dropped) { 2718 nce->nce_unsolicit_count = 1; 2719 NDP_RESTART_TIMER(nce, 2720 ipst->ips_ip_ndp_unsolicit_interval); 2721 } else if (ipst->ips_ip_ndp_defense_interval != 0) { 2722 NDP_RESTART_TIMER(nce, 2723 ipst->ips_ip_ndp_defense_interval); 2724 } 2725 } else { 2726 /* 2727 * This is an address we're probing to be our own, but 2728 * the ill is down. Wait until it comes back before 2729 * doing anything, but switch to reachable state so 2730 * that the restart will work. 2731 */ 2732 nce->nce_state = ND_REACHABLE; 2733 mutex_exit(&nce->nce_lock); 2734 } 2735 NCE_REFRELE(nce); 2736 return; 2737 case ND_INCOMPLETE: { 2738 ip6_t *ip6h; 2739 ip6i_t *ip6i; 2740 mblk_t *mp, *datamp, *nextmp, **prevmpp; 2741 2742 /* 2743 * Per case (2) in the nce_queue_mp() comments, scan nce_qd_mp 2744 * for any IPMP probe packets, and toss 'em. IPMP probe 2745 * packets will always be at the head of nce_qd_mp and always 2746 * have an ip6i_t header, so we can stop at the first queued 2747 * ND packet without an ip6i_t. 2748 */ 2749 prevmpp = &nce->nce_qd_mp; 2750 for (mp = nce->nce_qd_mp; mp != NULL; mp = nextmp) { 2751 nextmp = mp->b_next; 2752 datamp = (DB_TYPE(mp) == M_CTL) ? mp->b_cont : mp; 2753 ip6h = (ip6_t *)datamp->b_rptr; 2754 if (ip6h->ip6_nxt != IPPROTO_RAW) 2755 break; 2756 2757 ip6i = (ip6i_t *)ip6h; 2758 if (ip6i->ip6i_flags & IP6I_IPMP_PROBE) { 2759 inet_freemsg(mp); 2760 *prevmpp = nextmp; 2761 } else { 2762 prevmpp = &mp->b_next; 2763 } 2764 } 2765 ip_ndp_resolve(nce); 2766 mutex_exit(&nce->nce_lock); 2767 NCE_REFRELE(nce); 2768 break; 2769 } 2770 case ND_REACHABLE: 2771 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2772 nce->nce_unsolicit_count != 0) || 2773 ((nce->nce_flags & NCE_F_PERMANENT) && 2774 ipst->ips_ip_ndp_defense_interval != 0)) { 2775 if (nce->nce_unsolicit_count > 0) 2776 nce->nce_unsolicit_count--; 2777 mutex_exit(&nce->nce_lock); 2778 dropped = nce_xmit_advert(nce, B_FALSE, 2779 &ipv6_all_hosts_mcast, 0); 2780 if (dropped) { 2781 mutex_enter(&nce->nce_lock); 2782 nce->nce_unsolicit_count++; 2783 mutex_exit(&nce->nce_lock); 2784 } 2785 if (nce->nce_unsolicit_count != 0) { 2786 NDP_RESTART_TIMER(nce, 2787 ipst->ips_ip_ndp_unsolicit_interval); 2788 } else { 2789 NDP_RESTART_TIMER(nce, 2790 ipst->ips_ip_ndp_defense_interval); 2791 } 2792 } else { 2793 mutex_exit(&nce->nce_lock); 2794 } 2795 NCE_REFRELE(nce); 2796 break; 2797 default: 2798 mutex_exit(&nce->nce_lock); 2799 NCE_REFRELE(nce); 2800 break; 2801 } 2802 } 2803 2804 /* 2805 * Set a link layer address from the ll_addr passed in. 2806 * Copy SAP from ill. 2807 */ 2808 static void 2809 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2810 { 2811 ill_t *ill = nce->nce_ill; 2812 uchar_t *woffset; 2813 2814 ASSERT(ll_addr != NULL); 2815 /* Always called before fast_path_probe */ 2816 ASSERT(nce->nce_fp_mp == NULL); 2817 if (ill->ill_sap_length != 0) { 2818 /* 2819 * Copy the SAP type specified in the 2820 * request into the xmit template. 2821 */ 2822 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2823 } 2824 if (ill->ill_phys_addr_length > 0) { 2825 /* 2826 * The bcopy() below used to be called for the physical address 2827 * length rather than the link layer address length. For 2828 * ethernet and many other media, the phys_addr and lla are 2829 * identical. 2830 * However, with xresolv interfaces being introduced, the 2831 * phys_addr and lla are no longer the same, and the physical 2832 * address may not have any useful meaning, so we use the lla 2833 * for IPv6 address resolution and destination addressing. 2834 * 2835 * For PPP or other interfaces with a zero length 2836 * physical address, don't do anything here. 2837 * The bcopy() with a zero phys_addr length was previously 2838 * a no-op for interfaces with a zero-length physical address. 2839 * Using the lla for them would change the way they operate. 2840 * Doing nothing in such cases preserves expected behavior. 2841 */ 2842 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2843 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2844 } 2845 } 2846 2847 static boolean_t 2848 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2849 { 2850 ill_t *ill = nce->nce_ill; 2851 uchar_t *ll_offset; 2852 2853 ASSERT(nce->nce_res_mp != NULL); 2854 if (ll_addr == NULL) 2855 return (B_FALSE); 2856 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2857 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2858 return (B_TRUE); 2859 return (B_FALSE); 2860 } 2861 2862 /* 2863 * Updates the link layer address or the reachability state of 2864 * a cache entry. Reset probe counter if needed. 2865 */ 2866 static void 2867 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2868 { 2869 ill_t *ill = nce->nce_ill; 2870 boolean_t need_stop_timer = B_FALSE; 2871 boolean_t need_fastpath_update = B_FALSE; 2872 2873 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2874 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2875 /* 2876 * If this interface does not do NUD, there is no point 2877 * in allowing an update to the cache entry. Although 2878 * we will respond to NS. 2879 * The only time we accept an update for a resolver when 2880 * NUD is turned off is when it has just been created. 2881 * Non-Resolvers will always be created as REACHABLE. 2882 */ 2883 if (new_state != ND_UNCHANGED) { 2884 if ((nce->nce_flags & NCE_F_NONUD) && 2885 (nce->nce_state != ND_INCOMPLETE)) 2886 return; 2887 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2888 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2889 need_stop_timer = B_TRUE; 2890 if (new_state == ND_REACHABLE) 2891 nce->nce_last = TICK_TO_MSEC(lbolt64); 2892 else { 2893 /* We force NUD in this case */ 2894 nce->nce_last = 0; 2895 } 2896 nce->nce_state = new_state; 2897 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2898 } 2899 /* 2900 * In case of fast path we need to free the the fastpath 2901 * M_DATA and do another probe. Otherwise we can just 2902 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2903 * whatever packets that happens to be transmitting at the time. 2904 */ 2905 if (new_ll_addr != NULL) { 2906 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2907 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2908 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2909 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2910 if (nce->nce_fp_mp != NULL) { 2911 freemsg(nce->nce_fp_mp); 2912 nce->nce_fp_mp = NULL; 2913 } 2914 need_fastpath_update = B_TRUE; 2915 } 2916 mutex_exit(&nce->nce_lock); 2917 if (need_stop_timer) { 2918 (void) untimeout(nce->nce_timeout_id); 2919 nce->nce_timeout_id = 0; 2920 } 2921 if (need_fastpath_update) 2922 nce_fastpath(nce); 2923 mutex_enter(&nce->nce_lock); 2924 } 2925 2926 void 2927 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2928 { 2929 uint_t count = 0; 2930 mblk_t **mpp, *tmp; 2931 2932 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2933 2934 for (mpp = &nce->nce_qd_mp; *mpp != NULL; mpp = &(*mpp)->b_next) { 2935 if (++count > nce->nce_ill->ill_max_buf) { 2936 tmp = nce->nce_qd_mp->b_next; 2937 nce->nce_qd_mp->b_next = NULL; 2938 nce->nce_qd_mp->b_prev = NULL; 2939 freemsg(nce->nce_qd_mp); 2940 nce->nce_qd_mp = tmp; 2941 } 2942 } 2943 2944 if (head_insert) { 2945 mp->b_next = nce->nce_qd_mp; 2946 nce->nce_qd_mp = mp; 2947 } else { 2948 *mpp = mp; 2949 } 2950 } 2951 2952 static void 2953 nce_queue_mp(nce_t *nce, mblk_t *mp) 2954 { 2955 boolean_t head_insert = B_FALSE; 2956 ip6_t *ip6h; 2957 ip6i_t *ip6i; 2958 mblk_t *data_mp; 2959 2960 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2961 2962 if (mp->b_datap->db_type == M_CTL) 2963 data_mp = mp->b_cont; 2964 else 2965 data_mp = mp; 2966 ip6h = (ip6_t *)data_mp->b_rptr; 2967 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2968 /* 2969 * This message should have been pulled up already in 2970 * ip_wput_v6. We can't do pullups here because the message 2971 * could be from the nce_qd_mp which could have b_next/b_prev 2972 * non-NULL. 2973 */ 2974 ip6i = (ip6i_t *)ip6h; 2975 ASSERT(MBLKL(data_mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN); 2976 2977 /* 2978 * If this packet is marked IP6I_IPMP_PROBE, then we need to: 2979 * 2980 * 1. Insert it at the head of the nce_qd_mp list. Consider 2981 * the normal (non-probe) load-speading case where the 2982 * source address of the ND packet is not tied to nce_ill. 2983 * If the ill bound to the source address cannot receive, 2984 * the response to the ND packet will not be received. 2985 * However, if ND packets for nce_ill's probes are queued 2986 * behind that ND packet, those probes will also fail to 2987 * be sent, and thus in.mpathd will erroneously conclude 2988 * that nce_ill has also failed. 2989 * 2990 * 2. Drop the probe packet in ndp_timer() if the ND did 2991 * not succeed on the first attempt. This ensures that 2992 * ND problems do not manifest as probe RTT spikes. 2993 */ 2994 if (ip6i->ip6i_flags & IP6I_IPMP_PROBE) 2995 head_insert = B_TRUE; 2996 } 2997 nce_queue_mp_common(nce, mp, head_insert); 2998 } 2999 3000 /* 3001 * Called when address resolution failed due to a timeout. 3002 * Send an ICMP unreachable in response to all queued packets. 3003 */ 3004 void 3005 nce_resolv_failed(nce_t *nce) 3006 { 3007 mblk_t *mp, *nxt_mp, *first_mp; 3008 char buf[INET6_ADDRSTRLEN]; 3009 ip6_t *ip6h; 3010 zoneid_t zoneid = GLOBAL_ZONEID; 3011 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3012 3013 ip1dbg(("nce_resolv_failed: dst %s\n", 3014 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 3015 mutex_enter(&nce->nce_lock); 3016 mp = nce->nce_qd_mp; 3017 nce->nce_qd_mp = NULL; 3018 mutex_exit(&nce->nce_lock); 3019 while (mp != NULL) { 3020 nxt_mp = mp->b_next; 3021 mp->b_next = NULL; 3022 mp->b_prev = NULL; 3023 3024 first_mp = mp; 3025 if (mp->b_datap->db_type == M_CTL) { 3026 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 3027 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3028 zoneid = io->ipsec_out_zoneid; 3029 ASSERT(zoneid != ALL_ZONES); 3030 mp = mp->b_cont; 3031 mp->b_next = NULL; 3032 mp->b_prev = NULL; 3033 } 3034 3035 ip6h = (ip6_t *)mp->b_rptr; 3036 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3037 ip6i_t *ip6i; 3038 /* 3039 * This message should have been pulled up already 3040 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 3041 * the header is pulled up. 3042 */ 3043 ip6i = (ip6i_t *)ip6h; 3044 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 3045 sizeof (ip6i_t) + IPV6_HDR_LEN); 3046 mp->b_rptr += sizeof (ip6i_t); 3047 } 3048 /* 3049 * Ignore failure since icmp_unreachable_v6 will silently 3050 * drop packets with an unspecified source address. 3051 */ 3052 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst); 3053 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 3054 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst); 3055 mp = nxt_mp; 3056 } 3057 nce_cb_dispatch(nce); 3058 } 3059 3060 /* 3061 * Called by SIOCSNDP* ioctl to add/change an nce entry 3062 * and the corresponding attributes. 3063 * Disallow states other than ND_REACHABLE or ND_STALE. 3064 */ 3065 int 3066 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 3067 { 3068 sin6_t *sin6; 3069 in6_addr_t *addr; 3070 nce_t *nce; 3071 int err; 3072 uint16_t new_flags = 0; 3073 uint16_t old_flags = 0; 3074 int inflags = lnr->lnr_flags; 3075 ip_stack_t *ipst = ill->ill_ipst; 3076 3077 ASSERT(ill->ill_isv6); 3078 if ((lnr->lnr_state_create != ND_REACHABLE) && 3079 (lnr->lnr_state_create != ND_STALE)) 3080 return (EINVAL); 3081 3082 if (lnr->lnr_hdw_len > ND_MAX_HDW_LEN) 3083 return (EINVAL); 3084 3085 sin6 = (sin6_t *)&lnr->lnr_addr; 3086 addr = &sin6->sin6_addr; 3087 3088 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 3089 /* We know it can not be mapping so just look in the hash table */ 3090 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 3091 /* See comment in ndp_query() regarding IS_IPMP(ill) usage */ 3092 nce = nce_lookup_addr(ill, IS_IPMP(ill), addr, nce); 3093 if (nce != NULL) 3094 new_flags = nce->nce_flags; 3095 3096 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 3097 case NDF_ISROUTER_ON: 3098 new_flags |= NCE_F_ISROUTER; 3099 break; 3100 case NDF_ISROUTER_OFF: 3101 new_flags &= ~NCE_F_ISROUTER; 3102 break; 3103 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3104 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3105 if (nce != NULL) 3106 NCE_REFRELE(nce); 3107 return (EINVAL); 3108 } 3109 3110 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3111 case NDF_ANYCAST_ON: 3112 new_flags |= NCE_F_ANYCAST; 3113 break; 3114 case NDF_ANYCAST_OFF: 3115 new_flags &= ~NCE_F_ANYCAST; 3116 break; 3117 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3118 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3119 if (nce != NULL) 3120 NCE_REFRELE(nce); 3121 return (EINVAL); 3122 } 3123 3124 if (nce == NULL) { 3125 err = ndp_add_v6(ill, 3126 (uchar_t *)lnr->lnr_hdw_addr, 3127 addr, 3128 &ipv6_all_ones, 3129 &ipv6_all_zeros, 3130 0, 3131 new_flags, 3132 lnr->lnr_state_create, 3133 &nce); 3134 if (err != 0) { 3135 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3136 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3137 return (err); 3138 } 3139 } 3140 old_flags = nce->nce_flags; 3141 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3142 /* 3143 * Router turned to host, delete all ires. 3144 * XXX Just delete the entry, but we need to add too. 3145 */ 3146 nce->nce_flags &= ~NCE_F_ISROUTER; 3147 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3148 ndp_delete(nce); 3149 NCE_REFRELE(nce); 3150 return (0); 3151 } 3152 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3153 3154 mutex_enter(&nce->nce_lock); 3155 nce->nce_flags = new_flags; 3156 mutex_exit(&nce->nce_lock); 3157 /* 3158 * Note that we ignore the state at this point, which 3159 * should be either STALE or REACHABLE. Instead we let 3160 * the link layer address passed in to determine the state 3161 * much like incoming packets. 3162 */ 3163 nce_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3164 NCE_REFRELE(nce); 3165 return (0); 3166 } 3167 3168 /* 3169 * If the device driver supports it, we make nce_fp_mp to have 3170 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3171 * The caller ensures there is hold on nce for this function. 3172 * Note that since ill_fastpath_probe() copies the mblk there is 3173 * no need for the hold beyond this function. 3174 */ 3175 void 3176 nce_fastpath(nce_t *nce) 3177 { 3178 ill_t *ill = nce->nce_ill; 3179 int res; 3180 3181 ASSERT(ill != NULL); 3182 ASSERT(nce->nce_state != ND_INITIAL && nce->nce_state != ND_INCOMPLETE); 3183 3184 if (nce->nce_fp_mp != NULL) { 3185 /* Already contains fastpath info */ 3186 return; 3187 } 3188 if (nce->nce_res_mp != NULL) { 3189 nce_fastpath_list_add(nce); 3190 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3191 /* 3192 * EAGAIN is an indication of a transient error 3193 * i.e. allocation failure etc. leave the nce in the list it 3194 * will be updated when another probe happens for another ire 3195 * if not it will be taken out of the list when the ire is 3196 * deleted. 3197 */ 3198 3199 if (res != 0 && res != EAGAIN) 3200 nce_fastpath_list_delete(nce); 3201 } 3202 } 3203 3204 /* 3205 * Drain the list of nce's waiting for fastpath response. 3206 */ 3207 void 3208 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3209 void *arg) 3210 { 3211 3212 nce_t *next_nce; 3213 nce_t *current_nce; 3214 nce_t *first_nce; 3215 nce_t *prev_nce = NULL; 3216 3217 mutex_enter(&ill->ill_lock); 3218 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3219 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3220 next_nce = current_nce->nce_fastpath; 3221 /* 3222 * Take it off the list if we're flushing, or if the callback 3223 * routine tells us to do so. Otherwise, leave the nce in the 3224 * fastpath list to handle any pending response from the lower 3225 * layer. We can't drain the list when the callback routine 3226 * comparison failed, because the response is asynchronous in 3227 * nature, and may not arrive in the same order as the list 3228 * insertion. 3229 */ 3230 if (func == NULL || func(current_nce, arg)) { 3231 current_nce->nce_fastpath = NULL; 3232 if (current_nce == first_nce) 3233 ill->ill_fastpath_list = first_nce = next_nce; 3234 else 3235 prev_nce->nce_fastpath = next_nce; 3236 } else { 3237 /* previous element that is still in the list */ 3238 prev_nce = current_nce; 3239 } 3240 current_nce = next_nce; 3241 } 3242 mutex_exit(&ill->ill_lock); 3243 } 3244 3245 /* 3246 * Add nce to the nce fastpath list. 3247 */ 3248 void 3249 nce_fastpath_list_add(nce_t *nce) 3250 { 3251 ill_t *ill; 3252 3253 ill = nce->nce_ill; 3254 3255 mutex_enter(&ill->ill_lock); 3256 mutex_enter(&nce->nce_lock); 3257 3258 /* 3259 * if nce has not been deleted and 3260 * is not already in the list add it. 3261 */ 3262 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3263 (nce->nce_fastpath == NULL)) { 3264 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3265 ill->ill_fastpath_list = nce; 3266 } 3267 3268 mutex_exit(&nce->nce_lock); 3269 mutex_exit(&ill->ill_lock); 3270 } 3271 3272 /* 3273 * remove nce from the nce fastpath list. 3274 */ 3275 void 3276 nce_fastpath_list_delete(nce_t *nce) 3277 { 3278 nce_t *nce_ptr; 3279 3280 ill_t *ill; 3281 3282 ill = nce->nce_ill; 3283 ASSERT(ill != NULL); 3284 3285 mutex_enter(&ill->ill_lock); 3286 if (nce->nce_fastpath == NULL) 3287 goto done; 3288 3289 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3290 3291 if (ill->ill_fastpath_list == nce) { 3292 ill->ill_fastpath_list = nce->nce_fastpath; 3293 } else { 3294 nce_ptr = ill->ill_fastpath_list; 3295 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3296 if (nce_ptr->nce_fastpath == nce) { 3297 nce_ptr->nce_fastpath = nce->nce_fastpath; 3298 break; 3299 } 3300 nce_ptr = nce_ptr->nce_fastpath; 3301 } 3302 } 3303 3304 nce->nce_fastpath = NULL; 3305 done: 3306 mutex_exit(&ill->ill_lock); 3307 } 3308 3309 /* 3310 * Update all NCE's that are not in fastpath mode and 3311 * have an nce_fp_mp that matches mp. mp->b_cont contains 3312 * the fastpath header. 3313 * 3314 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3315 */ 3316 boolean_t 3317 ndp_fastpath_update(nce_t *nce, void *arg) 3318 { 3319 mblk_t *mp, *fp_mp; 3320 uchar_t *mp_rptr, *ud_mp_rptr; 3321 mblk_t *ud_mp = nce->nce_res_mp; 3322 ptrdiff_t cmplen; 3323 3324 if (nce->nce_flags & NCE_F_MAPPING) 3325 return (B_TRUE); 3326 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3327 return (B_TRUE); 3328 3329 ip2dbg(("ndp_fastpath_update: trying\n")); 3330 mp = (mblk_t *)arg; 3331 mp_rptr = mp->b_rptr; 3332 cmplen = mp->b_wptr - mp_rptr; 3333 ASSERT(cmplen >= 0); 3334 ud_mp_rptr = ud_mp->b_rptr; 3335 /* 3336 * The nce is locked here to prevent any other threads 3337 * from accessing and changing nce_res_mp when the IPv6 address 3338 * becomes resolved to an lla while we're in the middle 3339 * of looking at and comparing the hardware address (lla). 3340 * It is also locked to prevent multiple threads in nce_fastpath_update 3341 * from examining nce_res_mp atthe same time. 3342 */ 3343 mutex_enter(&nce->nce_lock); 3344 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3345 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3346 mutex_exit(&nce->nce_lock); 3347 /* 3348 * Don't take the ire off the fastpath list yet, 3349 * since the response may come later. 3350 */ 3351 return (B_FALSE); 3352 } 3353 /* Matched - install mp as the fastpath mp */ 3354 ip1dbg(("ndp_fastpath_update: match\n")); 3355 fp_mp = dupb(mp->b_cont); 3356 if (fp_mp != NULL) { 3357 nce->nce_fp_mp = fp_mp; 3358 } 3359 mutex_exit(&nce->nce_lock); 3360 return (B_TRUE); 3361 } 3362 3363 /* 3364 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3365 * driver. Note that it assumes IP is exclusive... 3366 */ 3367 /* ARGSUSED */ 3368 void 3369 ndp_fastpath_flush(nce_t *nce, char *arg) 3370 { 3371 if (nce->nce_flags & NCE_F_MAPPING) 3372 return; 3373 /* No fastpath info? */ 3374 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3375 return; 3376 3377 if (nce->nce_ipversion == IPV4_VERSION && 3378 nce->nce_flags & NCE_F_BCAST) { 3379 /* 3380 * IPv4 BROADCAST entries: 3381 * We can't delete the nce since it is difficult to 3382 * recreate these without going through the 3383 * ipif down/up dance. 3384 * 3385 * All access to nce->nce_fp_mp in the case of these 3386 * is protected by nce_lock. 3387 */ 3388 mutex_enter(&nce->nce_lock); 3389 if (nce->nce_fp_mp != NULL) { 3390 freeb(nce->nce_fp_mp); 3391 nce->nce_fp_mp = NULL; 3392 mutex_exit(&nce->nce_lock); 3393 nce_fastpath(nce); 3394 } else { 3395 mutex_exit(&nce->nce_lock); 3396 } 3397 } else { 3398 /* Just delete the NCE... */ 3399 ndp_delete(nce); 3400 } 3401 } 3402 3403 /* 3404 * Return a pointer to a given option in the packet. 3405 * Assumes that option part of the packet have already been validated. 3406 */ 3407 nd_opt_hdr_t * 3408 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3409 { 3410 while (optlen > 0) { 3411 if (opt->nd_opt_type == opt_type) 3412 return (opt); 3413 optlen -= 8 * opt->nd_opt_len; 3414 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3415 } 3416 return (NULL); 3417 } 3418 3419 /* 3420 * Verify all option lengths present are > 0, also check to see 3421 * if the option lengths and packet length are consistent. 3422 */ 3423 boolean_t 3424 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3425 { 3426 ASSERT(opt != NULL); 3427 while (optlen > 0) { 3428 if (opt->nd_opt_len == 0) 3429 return (B_FALSE); 3430 optlen -= 8 * opt->nd_opt_len; 3431 if (optlen < 0) 3432 return (B_FALSE); 3433 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3434 } 3435 return (B_TRUE); 3436 } 3437 3438 /* 3439 * ndp_walk function. 3440 * Free a fraction of the NCE cache entries. 3441 * A fraction of zero means to not free any in that category. 3442 */ 3443 void 3444 ndp_cache_reclaim(nce_t *nce, char *arg) 3445 { 3446 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3447 uint_t rand; 3448 3449 if (nce->nce_flags & NCE_F_PERMANENT) 3450 return; 3451 3452 rand = (uint_t)lbolt + 3453 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3454 if (ncr->ncr_host != 0 && 3455 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3456 ndp_delete(nce); 3457 return; 3458 } 3459 } 3460 3461 /* 3462 * ndp_walk function. 3463 * Count the number of NCEs that can be deleted. 3464 * These would be hosts but not routers. 3465 */ 3466 void 3467 ndp_cache_count(nce_t *nce, char *arg) 3468 { 3469 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3470 3471 if (nce->nce_flags & NCE_F_PERMANENT) 3472 return; 3473 3474 ncc->ncc_total++; 3475 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3476 ncc->ncc_host++; 3477 } 3478 3479 #ifdef DEBUG 3480 void 3481 nce_trace_ref(nce_t *nce) 3482 { 3483 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3484 3485 if (nce->nce_trace_disable) 3486 return; 3487 3488 if (!th_trace_ref(nce, nce->nce_ill->ill_ipst)) { 3489 nce->nce_trace_disable = B_TRUE; 3490 nce_trace_cleanup(nce); 3491 } 3492 } 3493 3494 void 3495 nce_untrace_ref(nce_t *nce) 3496 { 3497 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3498 3499 if (!nce->nce_trace_disable) 3500 th_trace_unref(nce); 3501 } 3502 3503 static void 3504 nce_trace_cleanup(const nce_t *nce) 3505 { 3506 th_trace_cleanup(nce, nce->nce_trace_disable); 3507 } 3508 #endif 3509 3510 /* 3511 * Called when address resolution fails due to a timeout. 3512 * Send an ICMP unreachable in response to all queued packets. 3513 */ 3514 void 3515 arp_resolv_failed(nce_t *nce) 3516 { 3517 mblk_t *mp, *nxt_mp, *first_mp; 3518 char buf[INET6_ADDRSTRLEN]; 3519 zoneid_t zoneid = GLOBAL_ZONEID; 3520 struct in_addr ipv4addr; 3521 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3522 3523 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3524 ip3dbg(("arp_resolv_failed: dst %s\n", 3525 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3526 mutex_enter(&nce->nce_lock); 3527 mp = nce->nce_qd_mp; 3528 nce->nce_qd_mp = NULL; 3529 mutex_exit(&nce->nce_lock); 3530 3531 while (mp != NULL) { 3532 nxt_mp = mp->b_next; 3533 mp->b_next = NULL; 3534 mp->b_prev = NULL; 3535 3536 first_mp = mp; 3537 /* 3538 * Send icmp unreachable messages 3539 * to the hosts. 3540 */ 3541 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst); 3542 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3543 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3544 ICMP_HOST_UNREACHABLE, zoneid, ipst); 3545 mp = nxt_mp; 3546 } 3547 } 3548 3549 int 3550 ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3551 nce_t **newnce, nce_t *src_nce) 3552 { 3553 int err; 3554 nce_t *nce; 3555 in6_addr_t addr6; 3556 ip_stack_t *ipst = ill->ill_ipst; 3557 3558 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3559 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3560 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3561 /* 3562 * NOTE: IPv4 never matches across the illgrp since the NCE's we're 3563 * looking up have fastpath headers that are inherently per-ill. 3564 */ 3565 nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce); 3566 if (nce == NULL) { 3567 err = ndp_add_v4(ill, addr, flags, newnce, src_nce); 3568 } else { 3569 *newnce = nce; 3570 err = EEXIST; 3571 } 3572 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3573 return (err); 3574 } 3575 3576 /* 3577 * NDP Cache Entry creation routine for IPv4. 3578 * Mapped entries are handled in arp. 3579 * This routine must always be called with ndp4->ndp_g_lock held. 3580 * Prior to return, nce_refcnt is incremented. 3581 */ 3582 static int 3583 ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3584 nce_t **newnce, nce_t *src_nce) 3585 { 3586 static nce_t nce_nil; 3587 nce_t *nce; 3588 mblk_t *mp; 3589 mblk_t *template = NULL; 3590 nce_t **ncep; 3591 ip_stack_t *ipst = ill->ill_ipst; 3592 uint16_t state = ND_INITIAL; 3593 int err; 3594 3595 ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock)); 3596 ASSERT(!ill->ill_isv6); 3597 ASSERT((flags & NCE_F_MAPPING) == 0); 3598 3599 if (ill->ill_resolver_mp == NULL) 3600 return (EINVAL); 3601 /* 3602 * Allocate the mblk to hold the nce. 3603 */ 3604 mp = allocb(sizeof (nce_t), BPRI_MED); 3605 if (mp == NULL) 3606 return (ENOMEM); 3607 3608 nce = (nce_t *)mp->b_rptr; 3609 mp->b_wptr = (uchar_t *)&nce[1]; 3610 *nce = nce_nil; 3611 nce->nce_ill = ill; 3612 nce->nce_ipversion = IPV4_VERSION; 3613 nce->nce_flags = flags; 3614 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3615 nce->nce_rcnt = ill->ill_xmit_count; 3616 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3617 nce->nce_mask = ipv6_all_ones; 3618 nce->nce_extract_mask = ipv6_all_zeros; 3619 nce->nce_ll_extract_start = 0; 3620 nce->nce_qd_mp = NULL; 3621 nce->nce_mp = mp; 3622 /* This one is for nce getting created */ 3623 nce->nce_refcnt = 1; 3624 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3625 ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3626 3627 nce->nce_trace_disable = B_FALSE; 3628 3629 if (src_nce != NULL) { 3630 /* 3631 * src_nce has been provided by the caller. The only 3632 * caller who provides a non-null, non-broadcast 3633 * src_nce is from ip_newroute() which must pass in 3634 * a ND_REACHABLE src_nce (this condition is verified 3635 * via an ASSERT for the save_ire->ire_nce in ip_newroute()) 3636 */ 3637 mutex_enter(&src_nce->nce_lock); 3638 state = src_nce->nce_state; 3639 if ((src_nce->nce_flags & NCE_F_CONDEMNED) || 3640 (ipst->ips_ndp4->ndp_g_hw_change > 0)) { 3641 /* 3642 * src_nce has been deleted, or 3643 * ip_arp_news is in the middle of 3644 * flushing entries in the the nce. 3645 * Fail the add, since we don't know 3646 * if it is safe to copy the contents of 3647 * src_nce 3648 */ 3649 DTRACE_PROBE2(nce__bad__src__nce, 3650 nce_t *, src_nce, ill_t *, ill); 3651 mutex_exit(&src_nce->nce_lock); 3652 err = EINVAL; 3653 goto err_ret; 3654 } 3655 template = copyb(src_nce->nce_res_mp); 3656 mutex_exit(&src_nce->nce_lock); 3657 if (template == NULL) { 3658 err = ENOMEM; 3659 goto err_ret; 3660 } 3661 } else if (flags & NCE_F_BCAST) { 3662 /* 3663 * broadcast nce. 3664 */ 3665 template = copyb(ill->ill_bcast_mp); 3666 if (template == NULL) { 3667 err = ENOMEM; 3668 goto err_ret; 3669 } 3670 state = ND_REACHABLE; 3671 } else if (ill->ill_net_type == IRE_IF_NORESOLVER) { 3672 /* 3673 * NORESOLVER entries are always created in the REACHABLE 3674 * state. We create a nce_res_mp with the IP nexthop address 3675 * in the destination address in the DLPI hdr if the 3676 * physical length is exactly 4 bytes. 3677 * 3678 * XXX not clear which drivers set ill_phys_addr_length to 3679 * IP_ADDR_LEN. 3680 */ 3681 if (ill->ill_phys_addr_length == IP_ADDR_LEN) { 3682 template = ill_dlur_gen((uchar_t *)addr, 3683 ill->ill_phys_addr_length, 3684 ill->ill_sap, ill->ill_sap_length); 3685 } else { 3686 template = copyb(ill->ill_resolver_mp); 3687 } 3688 if (template == NULL) { 3689 err = ENOMEM; 3690 goto err_ret; 3691 } 3692 state = ND_REACHABLE; 3693 } 3694 nce->nce_fp_mp = NULL; 3695 nce->nce_res_mp = template; 3696 nce->nce_state = state; 3697 if (state == ND_REACHABLE) { 3698 nce->nce_last = TICK_TO_MSEC(lbolt64); 3699 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3700 } else { 3701 nce->nce_last = 0; 3702 if (state == ND_INITIAL) 3703 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3704 } 3705 3706 ASSERT((nce->nce_res_mp == NULL && nce->nce_state == ND_INITIAL) || 3707 (nce->nce_res_mp != NULL && nce->nce_state == ND_REACHABLE)); 3708 /* 3709 * Atomically ensure that the ill is not CONDEMNED, before 3710 * adding the NCE. 3711 */ 3712 mutex_enter(&ill->ill_lock); 3713 if (ill->ill_state_flags & ILL_CONDEMNED) { 3714 mutex_exit(&ill->ill_lock); 3715 err = EINVAL; 3716 goto err_ret; 3717 } 3718 if ((nce->nce_next = *ncep) != NULL) 3719 nce->nce_next->nce_ptpn = &nce->nce_next; 3720 *ncep = nce; 3721 nce->nce_ptpn = ncep; 3722 *newnce = nce; 3723 /* This one is for nce being used by an active thread */ 3724 NCE_REFHOLD(*newnce); 3725 3726 /* Bump up the number of nce's referencing this ill */ 3727 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 3728 (char *), "nce", (void *), nce); 3729 ill->ill_nce_cnt++; 3730 mutex_exit(&ill->ill_lock); 3731 DTRACE_PROBE1(ndp__add__v4, nce_t *, nce); 3732 return (0); 3733 err_ret: 3734 freeb(mp); 3735 freemsg(template); 3736 return (err); 3737 } 3738 3739 /* 3740 * ndp_walk routine to delete all entries that have a given destination or 3741 * gateway address and cached link layer (MAC) address. This is used when ARP 3742 * informs us that a network-to-link-layer mapping may have changed. 3743 */ 3744 void 3745 nce_delete_hw_changed(nce_t *nce, void *arg) 3746 { 3747 nce_hw_map_t *hwm = arg; 3748 mblk_t *mp; 3749 dl_unitdata_req_t *dlu; 3750 uchar_t *macaddr; 3751 ill_t *ill; 3752 int saplen; 3753 ipaddr_t nce_addr; 3754 3755 if (nce->nce_state != ND_REACHABLE) 3756 return; 3757 3758 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3759 if (nce_addr != hwm->hwm_addr) 3760 return; 3761 3762 mutex_enter(&nce->nce_lock); 3763 if ((mp = nce->nce_res_mp) == NULL) { 3764 mutex_exit(&nce->nce_lock); 3765 return; 3766 } 3767 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3768 macaddr = (uchar_t *)(dlu + 1); 3769 ill = nce->nce_ill; 3770 if ((saplen = ill->ill_sap_length) > 0) 3771 macaddr += saplen; 3772 else 3773 saplen = -saplen; 3774 3775 /* 3776 * If the hardware address is unchanged, then leave this one alone. 3777 * Note that saplen == abs(saplen) now. 3778 */ 3779 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 3780 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 3781 mutex_exit(&nce->nce_lock); 3782 return; 3783 } 3784 mutex_exit(&nce->nce_lock); 3785 3786 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 3787 ndp_delete(nce); 3788 } 3789 3790 /* 3791 * This function verifies whether a given IPv4 address is potentially known to 3792 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 3793 * so that it can continue to look for hardware changes on that address. 3794 */ 3795 boolean_t 3796 ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns) 3797 { 3798 nce_t *nce; 3799 struct in_addr nceaddr; 3800 ip_stack_t *ipst = ns->netstack_ip; 3801 3802 if (addr == INADDR_ANY) 3803 return (B_FALSE); 3804 3805 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3806 nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr); 3807 for (; nce != NULL; nce = nce->nce_next) { 3808 /* Note that only v4 mapped entries are in the table. */ 3809 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 3810 if (addr == nceaddr.s_addr && 3811 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 3812 /* Single flag check; no lock needed */ 3813 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 3814 break; 3815 } 3816 } 3817 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3818 return (nce != NULL); 3819 } 3820 3821 /* 3822 * Wrapper around ipif_lookup_addr_exact_v6() that allows ND to work properly 3823 * with IPMP. Specifically, since neighbor discovery is always done on 3824 * underlying interfaces (even for addresses owned by an IPMP interface), we 3825 * need to check for `v6addrp' on both `ill' and on the IPMP meta-interface 3826 * associated with `ill' (if it exists). 3827 */ 3828 static ipif_t * 3829 ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill) 3830 { 3831 ipif_t *ipif; 3832 ip_stack_t *ipst = ill->ill_ipst; 3833 3834 ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst); 3835 if (ipif == NULL && IS_UNDER_IPMP(ill)) { 3836 if ((ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) { 3837 ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst); 3838 ill_refrele(ill); 3839 } 3840 } 3841 return (ipif); 3842 } 3843