1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/sysmacros.h> 33 #include <sys/errno.h> 34 #include <sys/dlpi.h> 35 #include <sys/socket.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/vtrace.h> 41 #include <sys/kmem.h> 42 #include <sys/zone.h> 43 #include <sys/ethernet.h> 44 #include <sys/sdt.h> 45 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/if_dl.h> 49 #include <net/route.h> 50 #include <netinet/in.h> 51 #include <netinet/ip6.h> 52 #include <netinet/icmp6.h> 53 54 #include <inet/common.h> 55 #include <inet/mi.h> 56 #include <inet/mib2.h> 57 #include <inet/nd.h> 58 #include <inet/ip.h> 59 #include <inet/ip_impl.h> 60 #include <inet/ipclassifier.h> 61 #include <inet/ip_if.h> 62 #include <inet/ip_ire.h> 63 #include <inet/ip_rts.h> 64 #include <inet/ip6.h> 65 #include <inet/ip_ndp.h> 66 #include <inet/ipsec_impl.h> 67 #include <inet/ipsec_info.h> 68 #include <inet/sctp_ip.h> 69 70 /* 71 * Function names with nce_ prefix are static while function 72 * names with ndp_ prefix are used by rest of the IP. 73 * 74 * Lock ordering: 75 * 76 * ndp_g_lock -> ill_lock -> nce_lock 77 * 78 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 79 * nce_next. Nce_lock protects the contents of the NCE (particularly 80 * nce_refcnt). 81 */ 82 83 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 84 uint32_t ll_addr_len); 85 static void nce_ire_delete(nce_t *nce); 86 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 87 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 88 static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *, nce_t *); 89 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 90 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 91 uchar_t *addr); 92 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 93 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 94 static mblk_t *nce_udreq_alloc(ill_t *ill); 95 static void nce_update(nce_t *nce, uint16_t new_state, 96 uchar_t *new_ll_addr); 97 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 98 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 99 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 100 const in6_addr_t *target, int flag); 101 static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t, 102 nce_t **, nce_t *); 103 104 #ifdef DEBUG 105 static void nce_trace_cleanup(const nce_t *); 106 #endif 107 108 #define NCE_HASH_PTR_V4(ipst, addr) \ 109 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 110 111 #define NCE_HASH_PTR_V6(ipst, addr) \ 112 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ 113 NCE_TABLE_SIZE)])) 114 115 /* 116 * Compute default flags to use for an advertisement of this nce's address. 117 */ 118 static int 119 nce_advert_flags(const nce_t *nce) 120 { 121 int flag = 0; 122 123 if (nce->nce_flags & NCE_F_ISROUTER) 124 flag |= NDP_ISROUTER; 125 if (!(nce->nce_flags & NCE_F_ANYCAST)) 126 flag |= NDP_ORIDE; 127 128 return (flag); 129 } 130 131 /* Non-tunable probe interval, based on link capabilities */ 132 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 133 134 /* 135 * NDP Cache Entry creation routine. 136 * Mapped entries will never do NUD . 137 * This routine must always be called with ndp6->ndp_g_lock held. 138 * Prior to return, nce_refcnt is incremented. 139 */ 140 int 141 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 142 const in6_addr_t *mask, const in6_addr_t *extract_mask, 143 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 144 nce_t **newnce) 145 { 146 static nce_t nce_nil; 147 nce_t *nce; 148 mblk_t *mp; 149 mblk_t *template; 150 nce_t **ncep; 151 int err; 152 boolean_t dropped = B_FALSE; 153 ip_stack_t *ipst = ill->ill_ipst; 154 155 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 156 ASSERT(ill != NULL && ill->ill_isv6); 157 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 158 ip0dbg(("ndp_add_v6: no addr\n")); 159 return (EINVAL); 160 } 161 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 162 ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags)); 163 return (EINVAL); 164 } 165 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 166 (flags & NCE_F_MAPPING)) { 167 ip0dbg(("ndp_add_v6: extract mask zero for mapping")); 168 return (EINVAL); 169 } 170 /* 171 * Allocate the mblk to hold the nce. 172 * 173 * XXX This can come out of a separate cache - nce_cache. 174 * We don't need the mp anymore as there are no more 175 * "qwriter"s 176 */ 177 mp = allocb(sizeof (nce_t), BPRI_MED); 178 if (mp == NULL) 179 return (ENOMEM); 180 181 nce = (nce_t *)mp->b_rptr; 182 mp->b_wptr = (uchar_t *)&nce[1]; 183 *nce = nce_nil; 184 185 /* 186 * This one holds link layer address 187 */ 188 if (ill->ill_net_type == IRE_IF_RESOLVER) { 189 template = nce_udreq_alloc(ill); 190 } else { 191 if (ill->ill_resolver_mp == NULL) { 192 freeb(mp); 193 return (EINVAL); 194 } 195 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 196 template = copyb(ill->ill_resolver_mp); 197 } 198 if (template == NULL) { 199 freeb(mp); 200 return (ENOMEM); 201 } 202 nce->nce_ill = ill; 203 nce->nce_ipversion = IPV6_VERSION; 204 nce->nce_flags = flags; 205 nce->nce_state = state; 206 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 207 nce->nce_rcnt = ill->ill_xmit_count; 208 nce->nce_addr = *addr; 209 nce->nce_mask = *mask; 210 nce->nce_extract_mask = *extract_mask; 211 nce->nce_ll_extract_start = hw_extract_start; 212 nce->nce_fp_mp = NULL; 213 nce->nce_res_mp = template; 214 if (state == ND_REACHABLE) 215 nce->nce_last = TICK_TO_MSEC(lbolt64); 216 else 217 nce->nce_last = 0; 218 nce->nce_qd_mp = NULL; 219 nce->nce_mp = mp; 220 if (hw_addr != NULL) 221 nce_set_ll(nce, hw_addr); 222 /* This one is for nce getting created */ 223 nce->nce_refcnt = 1; 224 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 225 if (nce->nce_flags & NCE_F_MAPPING) { 226 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 227 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 228 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 229 ncep = &ipst->ips_ndp6->nce_mask_entries; 230 } else { 231 ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 232 } 233 234 nce->nce_trace_disable = B_FALSE; 235 236 /* 237 * Atomically ensure that the ill is not CONDEMNED, before 238 * adding the NCE. 239 */ 240 mutex_enter(&ill->ill_lock); 241 if (ill->ill_state_flags & ILL_CONDEMNED) { 242 mutex_exit(&ill->ill_lock); 243 freeb(mp); 244 freeb(template); 245 return (EINVAL); 246 } 247 if ((nce->nce_next = *ncep) != NULL) 248 nce->nce_next->nce_ptpn = &nce->nce_next; 249 *ncep = nce; 250 nce->nce_ptpn = ncep; 251 *newnce = nce; 252 /* This one is for nce being used by an active thread */ 253 NCE_REFHOLD(*newnce); 254 255 /* Bump up the number of nce's referencing this ill */ 256 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 257 (char *), "nce", (void *), nce); 258 ill->ill_nce_cnt++; 259 mutex_exit(&ill->ill_lock); 260 261 err = 0; 262 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 263 mutex_enter(&nce->nce_lock); 264 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 265 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 266 mutex_exit(&nce->nce_lock); 267 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 268 &ipv6_all_zeros, addr, NDP_PROBE); 269 if (dropped) { 270 mutex_enter(&nce->nce_lock); 271 nce->nce_pcnt++; 272 mutex_exit(&nce->nce_lock); 273 } 274 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 275 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 276 err = EINPROGRESS; 277 } else if (flags & NCE_F_UNSOL_ADV) { 278 /* 279 * We account for the transmit below by assigning one 280 * less than the ndd variable. Subsequent decrements 281 * are done in ndp_timer. 282 */ 283 mutex_enter(&nce->nce_lock); 284 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 285 nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1; 286 mutex_exit(&nce->nce_lock); 287 dropped = nce_xmit(ill, 288 ND_NEIGHBOR_ADVERT, 289 ill, /* ill to be used for extracting ill_nd_lla */ 290 B_TRUE, /* use ill_nd_lla */ 291 addr, /* Source and target of the advertisement pkt */ 292 &ipv6_all_hosts_mcast, /* Destination of the packet */ 293 nce_advert_flags(nce)); 294 mutex_enter(&nce->nce_lock); 295 if (dropped) 296 nce->nce_unsolicit_count++; 297 if (nce->nce_unsolicit_count != 0) { 298 nce->nce_timeout_id = timeout(ndp_timer, nce, 299 MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval)); 300 } 301 mutex_exit(&nce->nce_lock); 302 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 303 } 304 /* 305 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 306 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 307 * We call nce_fastpath from nce_update if the link layer address of 308 * the peer changes from nce_update 309 */ 310 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 311 nce_fastpath(nce); 312 return (err); 313 } 314 315 int 316 ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 317 const in6_addr_t *mask, const in6_addr_t *extract_mask, 318 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 319 nce_t **newnce) 320 { 321 int err = 0; 322 nce_t *nce; 323 ip_stack_t *ipst = ill->ill_ipst; 324 325 ASSERT(ill->ill_isv6); 326 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 327 328 /* Get head of v6 hash table */ 329 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 330 nce = nce_lookup_addr(ill, addr, nce); 331 if (nce == NULL) { 332 err = ndp_add_v6(ill, 333 hw_addr, 334 addr, 335 mask, 336 extract_mask, 337 hw_extract_start, 338 flags, 339 state, 340 newnce); 341 } else { 342 *newnce = nce; 343 err = EEXIST; 344 } 345 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 346 return (err); 347 } 348 349 /* 350 * Remove all the CONDEMNED nces from the appropriate hash table. 351 * We create a private list of NCEs, these may have ires pointing 352 * to them, so the list will be passed through to clean up dependent 353 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 354 */ 355 static void 356 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 357 { 358 nce_t *nce1; 359 nce_t **ptpn; 360 361 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 362 ASSERT(ndp->ndp_g_walker == 0); 363 for (; nce; nce = nce1) { 364 nce1 = nce->nce_next; 365 mutex_enter(&nce->nce_lock); 366 if (nce->nce_flags & NCE_F_CONDEMNED) { 367 ptpn = nce->nce_ptpn; 368 nce1 = nce->nce_next; 369 if (nce1 != NULL) 370 nce1->nce_ptpn = ptpn; 371 *ptpn = nce1; 372 nce->nce_ptpn = NULL; 373 nce->nce_next = NULL; 374 nce->nce_next = *free_nce_list; 375 *free_nce_list = nce; 376 } 377 mutex_exit(&nce->nce_lock); 378 } 379 } 380 381 /* 382 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 383 * will return this NCE. Also no new IREs will be created that 384 * point to this NCE (See ire_add_v6). Also no new timeouts will 385 * be started (See NDP_RESTART_TIMER). 386 * 2. Cancel any currently running timeouts. 387 * 3. If there is an ndp walker, return. The walker will do the cleanup. 388 * This ensures that walkers see a consistent list of NCEs while walking. 389 * 4. Otherwise remove the NCE from the list of NCEs 390 * 5. Delete all IREs pointing to this NCE. 391 */ 392 void 393 ndp_delete(nce_t *nce) 394 { 395 nce_t **ptpn; 396 nce_t *nce1; 397 int ipversion = nce->nce_ipversion; 398 ndp_g_t *ndp; 399 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 400 401 if (ipversion == IPV4_VERSION) 402 ndp = ipst->ips_ndp4; 403 else 404 ndp = ipst->ips_ndp6; 405 406 /* Serialize deletes */ 407 mutex_enter(&nce->nce_lock); 408 if (nce->nce_flags & NCE_F_CONDEMNED) { 409 /* Some other thread is doing the delete */ 410 mutex_exit(&nce->nce_lock); 411 return; 412 } 413 /* 414 * Caller has a refhold. Also 1 ref for being in the list. Thus 415 * refcnt has to be >= 2 416 */ 417 ASSERT(nce->nce_refcnt >= 2); 418 nce->nce_flags |= NCE_F_CONDEMNED; 419 mutex_exit(&nce->nce_lock); 420 421 nce_fastpath_list_delete(nce); 422 423 /* 424 * Cancel any running timer. Timeout can't be restarted 425 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 426 * Passing invalid timeout id is fine. 427 */ 428 if (nce->nce_timeout_id != 0) { 429 (void) untimeout(nce->nce_timeout_id); 430 nce->nce_timeout_id = 0; 431 } 432 433 mutex_enter(&ndp->ndp_g_lock); 434 if (nce->nce_ptpn == NULL) { 435 /* 436 * The last ndp walker has already removed this nce from 437 * the list after we marked the nce CONDEMNED and before 438 * we grabbed the global lock. 439 */ 440 mutex_exit(&ndp->ndp_g_lock); 441 return; 442 } 443 if (ndp->ndp_g_walker > 0) { 444 /* 445 * Can't unlink. The walker will clean up 446 */ 447 ndp->ndp_g_walker_cleanup = B_TRUE; 448 mutex_exit(&ndp->ndp_g_lock); 449 return; 450 } 451 452 /* 453 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 454 * the timer since it is marked CONDEMNED. 455 */ 456 ptpn = nce->nce_ptpn; 457 nce1 = nce->nce_next; 458 if (nce1 != NULL) 459 nce1->nce_ptpn = ptpn; 460 *ptpn = nce1; 461 nce->nce_ptpn = NULL; 462 nce->nce_next = NULL; 463 mutex_exit(&ndp->ndp_g_lock); 464 465 nce_ire_delete(nce); 466 } 467 468 void 469 ndp_inactive(nce_t *nce) 470 { 471 mblk_t **mpp; 472 ill_t *ill; 473 474 ASSERT(nce->nce_refcnt == 0); 475 ASSERT(MUTEX_HELD(&nce->nce_lock)); 476 ASSERT(nce->nce_fastpath == NULL); 477 478 /* Free all nce allocated messages */ 479 mpp = &nce->nce_first_mp_to_free; 480 do { 481 while (*mpp != NULL) { 482 mblk_t *mp; 483 484 mp = *mpp; 485 *mpp = mp->b_next; 486 487 inet_freemsg(mp); 488 } 489 } while (mpp++ != &nce->nce_last_mp_to_free); 490 491 #ifdef DEBUG 492 nce_trace_cleanup(nce); 493 #endif 494 495 ill = nce->nce_ill; 496 mutex_enter(&ill->ill_lock); 497 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 498 (char *), "nce", (void *), nce); 499 ill->ill_nce_cnt--; 500 /* 501 * If the number of nce's associated with this ill have dropped 502 * to zero, check whether we need to restart any operation that 503 * is waiting for this to happen. 504 */ 505 if (ILL_DOWN_OK(ill)) { 506 /* ipif_ill_refrele_tail drops the ill_lock */ 507 ipif_ill_refrele_tail(ill); 508 } else { 509 mutex_exit(&ill->ill_lock); 510 } 511 mutex_destroy(&nce->nce_lock); 512 if (nce->nce_mp != NULL) 513 inet_freemsg(nce->nce_mp); 514 } 515 516 /* 517 * ndp_walk routine. Delete the nce if it is associated with the ill 518 * that is going away. Always called as a writer. 519 */ 520 void 521 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 522 { 523 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 524 ndp_delete(nce); 525 } 526 } 527 528 /* 529 * Walk a list of to be inactive NCEs and blow away all the ires. 530 */ 531 static void 532 nce_ire_delete_list(nce_t *nce) 533 { 534 nce_t *nce_next; 535 536 ASSERT(nce != NULL); 537 while (nce != NULL) { 538 nce_next = nce->nce_next; 539 nce->nce_next = NULL; 540 541 /* 542 * It is possible for the last ndp walker (this thread) 543 * to come here after ndp_delete has marked the nce CONDEMNED 544 * and before it has removed the nce from the fastpath list 545 * or called untimeout. So we need to do it here. It is safe 546 * for both ndp_delete and this thread to do it twice or 547 * even simultaneously since each of the threads has a 548 * reference on the nce. 549 */ 550 nce_fastpath_list_delete(nce); 551 /* 552 * Cancel any running timer. Timeout can't be restarted 553 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 554 * Passing invalid timeout id is fine. 555 */ 556 if (nce->nce_timeout_id != 0) { 557 (void) untimeout(nce->nce_timeout_id); 558 nce->nce_timeout_id = 0; 559 } 560 /* 561 * We might hit this func thus in the v4 case: 562 * ipif_down->ipif_ndp_down->ndp_walk 563 */ 564 565 if (nce->nce_ipversion == IPV4_VERSION) { 566 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 567 IRE_CACHE, nce_ire_delete1, 568 (char *)nce, nce->nce_ill); 569 } else { 570 ASSERT(nce->nce_ipversion == IPV6_VERSION); 571 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 572 IRE_CACHE, nce_ire_delete1, 573 (char *)nce, nce->nce_ill); 574 } 575 NCE_REFRELE_NOTR(nce); 576 nce = nce_next; 577 } 578 } 579 580 /* 581 * Delete an ire when the nce goes away. 582 */ 583 /* ARGSUSED */ 584 static void 585 nce_ire_delete(nce_t *nce) 586 { 587 if (nce->nce_ipversion == IPV6_VERSION) { 588 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 589 nce_ire_delete1, (char *)nce, nce->nce_ill); 590 NCE_REFRELE_NOTR(nce); 591 } else { 592 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 593 nce_ire_delete1, (char *)nce, nce->nce_ill); 594 NCE_REFRELE_NOTR(nce); 595 } 596 } 597 598 /* 599 * ire_walk routine used to delete every IRE that shares this nce 600 */ 601 static void 602 nce_ire_delete1(ire_t *ire, char *nce_arg) 603 { 604 nce_t *nce = (nce_t *)nce_arg; 605 606 ASSERT(ire->ire_type == IRE_CACHE); 607 608 if (ire->ire_nce == nce) { 609 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 610 ire_delete(ire); 611 } 612 } 613 614 /* 615 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 616 */ 617 boolean_t 618 ndp_restart_dad(nce_t *nce) 619 { 620 boolean_t started; 621 boolean_t dropped; 622 623 if (nce == NULL) 624 return (B_FALSE); 625 mutex_enter(&nce->nce_lock); 626 if (nce->nce_state == ND_PROBE) { 627 mutex_exit(&nce->nce_lock); 628 started = B_TRUE; 629 } else if (nce->nce_state == ND_REACHABLE) { 630 nce->nce_state = ND_PROBE; 631 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 632 mutex_exit(&nce->nce_lock); 633 dropped = nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, NULL, 634 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, NDP_PROBE); 635 if (dropped) { 636 mutex_enter(&nce->nce_lock); 637 nce->nce_pcnt++; 638 mutex_exit(&nce->nce_lock); 639 } 640 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 641 started = B_TRUE; 642 } else { 643 mutex_exit(&nce->nce_lock); 644 started = B_FALSE; 645 } 646 return (started); 647 } 648 649 /* 650 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 651 * If one is found, the refcnt on the nce will be incremented. 652 */ 653 nce_t * 654 ndp_lookup_v6(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 655 { 656 nce_t *nce; 657 ip_stack_t *ipst; 658 659 ASSERT(ill != NULL); 660 ipst = ill->ill_ipst; 661 662 ASSERT(ill != NULL && ill->ill_isv6); 663 if (!caller_holds_lock) { 664 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 665 } 666 667 /* Get head of v6 hash table */ 668 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 669 nce = nce_lookup_addr(ill, addr, nce); 670 if (nce == NULL) 671 nce = nce_lookup_mapping(ill, addr); 672 if (!caller_holds_lock) 673 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 674 return (nce); 675 } 676 /* 677 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 678 * If one is found, the refcnt on the nce will be incremented. 679 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 680 * so we skip the nce_lookup_mapping call. 681 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 682 */ 683 nce_t * 684 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 685 { 686 nce_t *nce; 687 in6_addr_t addr6; 688 ip_stack_t *ipst = ill->ill_ipst; 689 690 if (!caller_holds_lock) { 691 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 692 } 693 694 /* Get head of v4 hash table */ 695 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 696 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 697 nce = nce_lookup_addr(ill, &addr6, nce); 698 if (!caller_holds_lock) 699 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 700 return (nce); 701 } 702 703 /* 704 * Cache entry lookup. Try to find an nce matching the parameters passed. 705 * Look only for exact entries (no mappings). If an nce is found, increment 706 * the hold count on that nce. The caller passes in the start of the 707 * appropriate hash table, and must be holding the appropriate global 708 * lock (ndp_g_lock). 709 */ 710 static nce_t * 711 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr, nce_t *nce) 712 { 713 ndp_g_t *ndp; 714 ip_stack_t *ipst = ill->ill_ipst; 715 716 if (ill->ill_isv6) 717 ndp = ipst->ips_ndp6; 718 else 719 ndp = ipst->ips_ndp4; 720 721 ASSERT(ill != NULL); 722 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 723 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 724 return (NULL); 725 for (; nce != NULL; nce = nce->nce_next) { 726 if (nce->nce_ill == ill) { 727 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 728 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 729 &ipv6_all_ones)) { 730 mutex_enter(&nce->nce_lock); 731 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 732 NCE_REFHOLD_LOCKED(nce); 733 mutex_exit(&nce->nce_lock); 734 break; 735 } 736 mutex_exit(&nce->nce_lock); 737 } 738 } 739 } 740 return (nce); 741 } 742 743 /* 744 * Cache entry lookup. Try to find an nce matching the parameters passed. 745 * Look only for mappings. 746 */ 747 static nce_t * 748 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 749 { 750 nce_t *nce; 751 ip_stack_t *ipst = ill->ill_ipst; 752 753 ASSERT(ill != NULL && ill->ill_isv6); 754 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 755 if (!IN6_IS_ADDR_MULTICAST(addr)) 756 return (NULL); 757 nce = ipst->ips_ndp6->nce_mask_entries; 758 for (; nce != NULL; nce = nce->nce_next) 759 if (nce->nce_ill == ill && 760 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 761 mutex_enter(&nce->nce_lock); 762 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 763 NCE_REFHOLD_LOCKED(nce); 764 mutex_exit(&nce->nce_lock); 765 break; 766 } 767 mutex_exit(&nce->nce_lock); 768 } 769 return (nce); 770 } 771 772 /* 773 * Process passed in parameters either from an incoming packet or via 774 * user ioctl. 775 */ 776 void 777 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 778 { 779 ill_t *ill = nce->nce_ill; 780 uint32_t hw_addr_len = ill->ill_nd_lla_len; 781 mblk_t *mp; 782 boolean_t ll_updated = B_FALSE; 783 boolean_t ll_changed; 784 ip_stack_t *ipst = ill->ill_ipst; 785 786 ASSERT(nce->nce_ipversion == IPV6_VERSION); 787 /* 788 * No updates of link layer address or the neighbor state is 789 * allowed, when the cache is in NONUD state. This still 790 * allows for responding to reachability solicitation. 791 */ 792 mutex_enter(&nce->nce_lock); 793 if (nce->nce_state == ND_INCOMPLETE) { 794 if (hw_addr == NULL) { 795 mutex_exit(&nce->nce_lock); 796 return; 797 } 798 nce_set_ll(nce, hw_addr); 799 /* 800 * Update nce state and send the queued packets 801 * back to ip this time ire will be added. 802 */ 803 if (flag & ND_NA_FLAG_SOLICITED) { 804 nce_update(nce, ND_REACHABLE, NULL); 805 } else { 806 nce_update(nce, ND_STALE, NULL); 807 } 808 mutex_exit(&nce->nce_lock); 809 nce_fastpath(nce); 810 mutex_enter(&nce->nce_lock); 811 mp = nce->nce_qd_mp; 812 nce->nce_qd_mp = NULL; 813 mutex_exit(&nce->nce_lock); 814 while (mp != NULL) { 815 mblk_t *nxt_mp, *data_mp; 816 817 nxt_mp = mp->b_next; 818 mp->b_next = NULL; 819 820 if (mp->b_datap->db_type == M_CTL) 821 data_mp = mp->b_cont; 822 else 823 data_mp = mp; 824 if (data_mp->b_prev != NULL) { 825 ill_t *inbound_ill; 826 queue_t *fwdq = NULL; 827 uint_t ifindex; 828 829 ifindex = (uint_t)(uintptr_t)data_mp->b_prev; 830 inbound_ill = ill_lookup_on_ifindex(ifindex, 831 B_TRUE, NULL, NULL, NULL, NULL, ipst); 832 if (inbound_ill == NULL) { 833 data_mp->b_prev = NULL; 834 freemsg(mp); 835 return; 836 } else { 837 fwdq = inbound_ill->ill_rq; 838 } 839 data_mp->b_prev = NULL; 840 /* 841 * Send a forwarded packet back into ip_rput_v6 842 * just as in ire_send_v6(). 843 * Extract the queue from b_prev (set in 844 * ip_rput_data_v6). 845 */ 846 if (fwdq != NULL) { 847 /* 848 * Forwarded packets hop count will 849 * get decremented in ip_rput_data_v6 850 */ 851 if (data_mp != mp) 852 freeb(mp); 853 put(fwdq, data_mp); 854 } else { 855 /* 856 * Send locally originated packets back 857 * into * ip_wput_v6. 858 */ 859 put(ill->ill_wq, mp); 860 } 861 ill_refrele(inbound_ill); 862 } else { 863 put(ill->ill_wq, mp); 864 } 865 mp = nxt_mp; 866 } 867 return; 868 } 869 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 870 if (!is_adv) { 871 /* If this is a SOLICITATION request only */ 872 if (ll_changed) 873 nce_update(nce, ND_STALE, hw_addr); 874 mutex_exit(&nce->nce_lock); 875 return; 876 } 877 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 878 /* If in any other state than REACHABLE, ignore */ 879 if (nce->nce_state == ND_REACHABLE) { 880 nce_update(nce, ND_STALE, NULL); 881 } 882 mutex_exit(&nce->nce_lock); 883 return; 884 } else { 885 if (ll_changed) { 886 nce_update(nce, ND_UNCHANGED, hw_addr); 887 ll_updated = B_TRUE; 888 } 889 if (flag & ND_NA_FLAG_SOLICITED) { 890 nce_update(nce, ND_REACHABLE, NULL); 891 } else { 892 if (ll_updated) { 893 nce_update(nce, ND_STALE, NULL); 894 } 895 } 896 mutex_exit(&nce->nce_lock); 897 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 898 NCE_F_ISROUTER)) { 899 ire_t *ire; 900 901 /* 902 * Router turned to host. We need to remove the 903 * entry as well as any default route that may be 904 * using this as a next hop. This is required by 905 * section 7.2.5 of RFC 2461. 906 */ 907 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 908 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 909 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 910 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 911 MATCH_IRE_DEFAULT, ipst); 912 if (ire != NULL) { 913 ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); 914 ire_delete(ire); 915 ire_refrele(ire); 916 } 917 ndp_delete(nce); 918 } 919 } 920 } 921 922 /* 923 * Pass arg1 to the pfi supplied, along with each nce in existence. 924 * ndp_walk() places a REFHOLD on the nce and drops the lock when 925 * walking the hash list. 926 */ 927 void 928 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 929 boolean_t trace) 930 { 931 932 nce_t *nce; 933 nce_t *nce1; 934 nce_t **ncep; 935 nce_t *free_nce_list = NULL; 936 937 mutex_enter(&ndp->ndp_g_lock); 938 /* Prevent ndp_delete from unlink and free of NCE */ 939 ndp->ndp_g_walker++; 940 mutex_exit(&ndp->ndp_g_lock); 941 for (ncep = ndp->nce_hash_tbl; 942 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 943 for (nce = *ncep; nce != NULL; nce = nce1) { 944 nce1 = nce->nce_next; 945 if (ill == NULL || nce->nce_ill == ill) { 946 if (trace) { 947 NCE_REFHOLD(nce); 948 (*pfi)(nce, arg1); 949 NCE_REFRELE(nce); 950 } else { 951 NCE_REFHOLD_NOTR(nce); 952 (*pfi)(nce, arg1); 953 NCE_REFRELE_NOTR(nce); 954 } 955 } 956 } 957 } 958 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 959 nce1 = nce->nce_next; 960 if (ill == NULL || nce->nce_ill == ill) { 961 if (trace) { 962 NCE_REFHOLD(nce); 963 (*pfi)(nce, arg1); 964 NCE_REFRELE(nce); 965 } else { 966 NCE_REFHOLD_NOTR(nce); 967 (*pfi)(nce, arg1); 968 NCE_REFRELE_NOTR(nce); 969 } 970 } 971 } 972 mutex_enter(&ndp->ndp_g_lock); 973 ndp->ndp_g_walker--; 974 /* 975 * While NCE's are removed from global list they are placed 976 * in a private list, to be passed to nce_ire_delete_list(). 977 * The reason is, there may be ires pointing to this nce 978 * which needs to cleaned up. 979 */ 980 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 981 /* Time to delete condemned entries */ 982 for (ncep = ndp->nce_hash_tbl; 983 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 984 nce = *ncep; 985 if (nce != NULL) { 986 nce_remove(ndp, nce, &free_nce_list); 987 } 988 } 989 nce = ndp->nce_mask_entries; 990 if (nce != NULL) { 991 nce_remove(ndp, nce, &free_nce_list); 992 } 993 ndp->ndp_g_walker_cleanup = B_FALSE; 994 } 995 996 mutex_exit(&ndp->ndp_g_lock); 997 998 if (free_nce_list != NULL) { 999 nce_ire_delete_list(free_nce_list); 1000 } 1001 } 1002 1003 /* 1004 * Walk everything. 1005 * Note that ill can be NULL hence can't derive the ipst from it. 1006 */ 1007 void 1008 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) 1009 { 1010 ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); 1011 ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); 1012 } 1013 1014 /* 1015 * Process resolve requests. Handles both mapped entries 1016 * as well as cases that needs to be send out on the wire. 1017 * Lookup a NCE for a given IRE. Regardless of whether one exists 1018 * or one is created, we defer making ire point to nce until the 1019 * ire is actually added at which point the nce_refcnt on the nce is 1020 * incremented. This is done primarily to have symmetry between ire_add() 1021 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1022 */ 1023 int 1024 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1025 { 1026 nce_t *nce; 1027 int err = 0; 1028 uint32_t ms; 1029 mblk_t *mp_nce = NULL; 1030 ip_stack_t *ipst = ill->ill_ipst; 1031 1032 ASSERT(ill->ill_isv6); 1033 if (IN6_IS_ADDR_MULTICAST(dst)) { 1034 err = nce_set_multicast(ill, dst); 1035 return (err); 1036 } 1037 err = ndp_lookup_then_add_v6(ill, 1038 NULL, /* No hardware address */ 1039 dst, 1040 &ipv6_all_ones, 1041 &ipv6_all_zeros, 1042 0, 1043 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1044 ND_INCOMPLETE, 1045 &nce); 1046 1047 switch (err) { 1048 case 0: 1049 /* 1050 * New cache entry was created. Make sure that the state 1051 * is not ND_INCOMPLETE. It can be in some other state 1052 * even before we send out the solicitation as we could 1053 * get un-solicited advertisements. 1054 * 1055 * If this is an XRESOLV interface, simply return 0, 1056 * since we don't want to solicit just yet. 1057 */ 1058 if (ill->ill_flags & ILLF_XRESOLV) { 1059 NCE_REFRELE(nce); 1060 return (0); 1061 } 1062 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1063 mutex_enter(&nce->nce_lock); 1064 if (nce->nce_state != ND_INCOMPLETE) { 1065 mutex_exit(&nce->nce_lock); 1066 rw_exit(&ipst->ips_ill_g_lock); 1067 NCE_REFRELE(nce); 1068 return (0); 1069 } 1070 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1071 if (mp_nce == NULL) { 1072 /* The caller will free mp */ 1073 mutex_exit(&nce->nce_lock); 1074 rw_exit(&ipst->ips_ill_g_lock); 1075 ndp_delete(nce); 1076 NCE_REFRELE(nce); 1077 return (ENOMEM); 1078 } 1079 ms = nce_solicit(nce, mp_nce); 1080 rw_exit(&ipst->ips_ill_g_lock); 1081 if (ms == 0) { 1082 /* The caller will free mp */ 1083 if (mp_nce != mp) 1084 freeb(mp_nce); 1085 mutex_exit(&nce->nce_lock); 1086 ndp_delete(nce); 1087 NCE_REFRELE(nce); 1088 return (EBUSY); 1089 } 1090 mutex_exit(&nce->nce_lock); 1091 NDP_RESTART_TIMER(nce, (clock_t)ms); 1092 NCE_REFRELE(nce); 1093 return (EINPROGRESS); 1094 case EEXIST: 1095 /* Resolution in progress just queue the packet */ 1096 mutex_enter(&nce->nce_lock); 1097 if (nce->nce_state == ND_INCOMPLETE) { 1098 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1099 if (mp_nce == NULL) { 1100 err = ENOMEM; 1101 } else { 1102 nce_queue_mp(nce, mp_nce); 1103 err = EINPROGRESS; 1104 } 1105 } else { 1106 /* 1107 * Any other state implies we have 1108 * a nce but IRE needs to be added ... 1109 * ire_add_v6() will take care of the 1110 * the case when the nce becomes CONDEMNED 1111 * before the ire is added to the table. 1112 */ 1113 err = 0; 1114 } 1115 mutex_exit(&nce->nce_lock); 1116 NCE_REFRELE(nce); 1117 break; 1118 default: 1119 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1120 break; 1121 } 1122 return (err); 1123 } 1124 1125 /* 1126 * When there is no resolver, the link layer template is passed in 1127 * the IRE. 1128 * Lookup a NCE for a given IRE. Regardless of whether one exists 1129 * or one is created, we defer making ire point to nce until the 1130 * ire is actually added at which point the nce_refcnt on the nce is 1131 * incremented. This is done primarily to have symmetry between ire_add() 1132 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1133 */ 1134 int 1135 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1136 { 1137 nce_t *nce; 1138 int err = 0; 1139 1140 ASSERT(ill != NULL); 1141 ASSERT(ill->ill_isv6); 1142 if (IN6_IS_ADDR_MULTICAST(dst)) { 1143 err = nce_set_multicast(ill, dst); 1144 return (err); 1145 } 1146 1147 err = ndp_lookup_then_add_v6(ill, 1148 NULL, /* hardware address */ 1149 dst, 1150 &ipv6_all_ones, 1151 &ipv6_all_zeros, 1152 0, 1153 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1154 ND_REACHABLE, 1155 &nce); 1156 1157 switch (err) { 1158 case 0: 1159 /* 1160 * Cache entry with a proper resolver cookie was 1161 * created. 1162 */ 1163 NCE_REFRELE(nce); 1164 break; 1165 case EEXIST: 1166 err = 0; 1167 NCE_REFRELE(nce); 1168 break; 1169 default: 1170 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1171 break; 1172 } 1173 return (err); 1174 } 1175 1176 /* 1177 * For each interface an entry is added for the unspecified multicast group. 1178 * Here that mapping is used to form the multicast cache entry for a particular 1179 * multicast destination. 1180 */ 1181 static int 1182 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1183 { 1184 nce_t *mnce; /* Multicast mapping entry */ 1185 nce_t *nce; 1186 uchar_t *hw_addr = NULL; 1187 int err = 0; 1188 ip_stack_t *ipst = ill->ill_ipst; 1189 1190 ASSERT(ill != NULL); 1191 ASSERT(ill->ill_isv6); 1192 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1193 1194 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1195 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst)); 1196 nce = nce_lookup_addr(ill, dst, nce); 1197 if (nce != NULL) { 1198 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1199 NCE_REFRELE(nce); 1200 return (0); 1201 } 1202 /* No entry, now lookup for a mapping this should never fail */ 1203 mnce = nce_lookup_mapping(ill, dst); 1204 if (mnce == NULL) { 1205 /* Something broken for the interface. */ 1206 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1207 return (ESRCH); 1208 } 1209 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1210 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1211 /* 1212 * For IRE_IF_RESOLVER a hardware mapping can be 1213 * generated, for IRE_IF_NORESOLVER, resolution cookie 1214 * in the ill is copied in ndp_add_v6(). 1215 */ 1216 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1217 if (hw_addr == NULL) { 1218 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1219 NCE_REFRELE(mnce); 1220 return (ENOMEM); 1221 } 1222 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1223 } 1224 NCE_REFRELE(mnce); 1225 /* 1226 * IRE_IF_NORESOLVER type simply copies the resolution 1227 * cookie passed in. So no hw_addr is needed. 1228 */ 1229 err = ndp_add_v6(ill, 1230 hw_addr, 1231 dst, 1232 &ipv6_all_ones, 1233 &ipv6_all_zeros, 1234 0, 1235 NCE_F_NONUD, 1236 ND_REACHABLE, 1237 &nce); 1238 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1239 if (hw_addr != NULL) 1240 kmem_free(hw_addr, ill->ill_nd_lla_len); 1241 if (err != 0) { 1242 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1243 return (err); 1244 } 1245 NCE_REFRELE(nce); 1246 return (0); 1247 } 1248 1249 /* 1250 * Return the link layer address, and any flags of a nce. 1251 */ 1252 int 1253 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1254 { 1255 nce_t *nce; 1256 in6_addr_t *addr; 1257 sin6_t *sin6; 1258 dl_unitdata_req_t *dl; 1259 1260 ASSERT(ill != NULL && ill->ill_isv6); 1261 sin6 = (sin6_t *)&lnr->lnr_addr; 1262 addr = &sin6->sin6_addr; 1263 1264 nce = ndp_lookup_v6(ill, addr, B_FALSE); 1265 if (nce == NULL) 1266 return (ESRCH); 1267 /* If in INCOMPLETE state, no link layer address is available yet */ 1268 if (nce->nce_state == ND_INCOMPLETE) 1269 goto done; 1270 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1271 if (ill->ill_flags & ILLF_XRESOLV) 1272 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1273 else 1274 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1275 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1276 sizeof (lnr->lnr_hdw_addr)); 1277 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1278 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1279 if (nce->nce_flags & NCE_F_ISROUTER) 1280 lnr->lnr_flags = NDF_ISROUTER_ON; 1281 if (nce->nce_flags & NCE_F_ANYCAST) 1282 lnr->lnr_flags |= NDF_ANYCAST_ON; 1283 done: 1284 NCE_REFRELE(nce); 1285 return (0); 1286 } 1287 1288 /* 1289 * Send Enable/Disable multicast reqs to driver. 1290 */ 1291 int 1292 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1293 uint32_t hw_addr_offset, mblk_t *mp) 1294 { 1295 nce_t *nce; 1296 uchar_t *hw_addr; 1297 ip_stack_t *ipst = ill->ill_ipst; 1298 1299 ASSERT(ill != NULL && ill->ill_isv6); 1300 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1301 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1302 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1303 freemsg(mp); 1304 return (EINVAL); 1305 } 1306 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1307 nce = nce_lookup_mapping(ill, addr); 1308 if (nce == NULL) { 1309 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1310 freemsg(mp); 1311 return (ESRCH); 1312 } 1313 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1314 /* 1315 * Update dl_addr_length and dl_addr_offset for primitives that 1316 * have physical addresses as opposed to full saps 1317 */ 1318 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1319 case DL_ENABMULTI_REQ: 1320 /* Track the state if this is the first enabmulti */ 1321 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1322 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1323 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1324 break; 1325 case DL_DISABMULTI_REQ: 1326 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1327 break; 1328 default: 1329 NCE_REFRELE(nce); 1330 ip1dbg(("ndp_mcastreq: default\n")); 1331 return (EINVAL); 1332 } 1333 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1334 NCE_REFRELE(nce); 1335 ill_dlpi_send(ill, mp); 1336 return (0); 1337 } 1338 1339 /* 1340 * Send a neighbor solicitation. 1341 * Returns number of milliseconds after which we should either rexmit or abort. 1342 * Return of zero means we should abort. 1343 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1344 * 1345 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1346 * the packet. 1347 * NOTE: This routine does not consume mp. 1348 */ 1349 uint32_t 1350 nce_solicit(nce_t *nce, mblk_t *mp) 1351 { 1352 ill_t *ill; 1353 ill_t *src_ill; 1354 ip6_t *ip6h; 1355 in6_addr_t src; 1356 in6_addr_t dst; 1357 ipif_t *ipif; 1358 ip6i_t *ip6i; 1359 boolean_t dropped = B_FALSE; 1360 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 1361 1362 ASSERT(RW_READ_HELD(&ipst->ips_ill_g_lock)); 1363 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1364 ill = nce->nce_ill; 1365 ASSERT(ill != NULL); 1366 1367 if (nce->nce_rcnt == 0) { 1368 return (0); 1369 } 1370 1371 if (mp == NULL) { 1372 ASSERT(nce->nce_qd_mp != NULL); 1373 mp = nce->nce_qd_mp; 1374 } else { 1375 nce_queue_mp(nce, mp); 1376 } 1377 1378 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1379 if (mp->b_datap->db_type == M_CTL) 1380 mp = mp->b_cont; 1381 1382 ip6h = (ip6_t *)mp->b_rptr; 1383 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1384 /* 1385 * This message should have been pulled up already in 1386 * ip_wput_v6. We can't do pullups here because the message 1387 * could be from the nce_qd_mp which could have b_next/b_prev 1388 * non-NULL. 1389 */ 1390 ip6i = (ip6i_t *)ip6h; 1391 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1392 sizeof (ip6i_t) + IPV6_HDR_LEN); 1393 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1394 } 1395 src = ip6h->ip6_src; 1396 /* 1397 * If the src of outgoing packet is one of the assigned interface 1398 * addresses use it, otherwise we will pick the source address below. 1399 */ 1400 src_ill = ill; 1401 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1402 if (ill->ill_group != NULL) 1403 src_ill = ill->ill_group->illgrp_ill; 1404 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1405 for (ipif = src_ill->ill_ipif; ipif != NULL; 1406 ipif = ipif->ipif_next) { 1407 if (IN6_ARE_ADDR_EQUAL(&src, 1408 &ipif->ipif_v6lcl_addr)) { 1409 break; 1410 } 1411 } 1412 if (ipif != NULL) 1413 break; 1414 } 1415 /* 1416 * If no relevant ipif can be found, then it's not one of our 1417 * addresses. Reset to :: and let nce_xmit. If an ipif can be 1418 * found, but it's not yet done with DAD verification, then 1419 * just postpone this transmission until later. 1420 */ 1421 if (src_ill == NULL) 1422 src = ipv6_all_zeros; 1423 else if (!ipif->ipif_addr_ready) 1424 return (ill->ill_reachable_retrans_time); 1425 } 1426 dst = nce->nce_addr; 1427 /* 1428 * If source address is unspecified, nce_xmit will choose 1429 * one for us and initialize the hardware address also 1430 * appropriately. 1431 */ 1432 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1433 src_ill = NULL; 1434 nce->nce_rcnt--; 1435 mutex_exit(&nce->nce_lock); 1436 rw_exit(&ipst->ips_ill_g_lock); 1437 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1438 &dst, 0); 1439 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1440 mutex_enter(&nce->nce_lock); 1441 if (dropped) 1442 nce->nce_rcnt++; 1443 return (ill->ill_reachable_retrans_time); 1444 } 1445 1446 /* 1447 * Attempt to recover an address on an interface that's been marked as a 1448 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1449 * no easy way to just probe the address and have the right thing happen if 1450 * it's no longer in use. Instead, we just bring it up normally and allow the 1451 * regular interface start-up logic to probe for a remaining duplicate and take 1452 * us back down if necessary. 1453 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1454 * ip_ndp_excl. 1455 */ 1456 /* ARGSUSED */ 1457 static void 1458 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1459 { 1460 ill_t *ill = rq->q_ptr; 1461 ipif_t *ipif; 1462 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1463 1464 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1465 /* 1466 * We do not support recovery of proxy ARP'd interfaces, 1467 * because the system lacks a complete proxy ARP mechanism. 1468 */ 1469 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1470 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1471 continue; 1472 } 1473 1474 /* 1475 * If we have already recovered or if the interface is going 1476 * away, then ignore. 1477 */ 1478 mutex_enter(&ill->ill_lock); 1479 if (!(ipif->ipif_flags & IPIF_DUPLICATE) || 1480 (ipif->ipif_flags & (IPIF_MOVING | IPIF_CONDEMNED))) { 1481 mutex_exit(&ill->ill_lock); 1482 continue; 1483 } 1484 1485 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1486 ill->ill_ipif_dup_count--; 1487 mutex_exit(&ill->ill_lock); 1488 ipif->ipif_was_dup = B_TRUE; 1489 1490 if (ipif_ndp_up(ipif) != EINPROGRESS) 1491 (void) ipif_up_done_v6(ipif); 1492 } 1493 freeb(mp); 1494 } 1495 1496 /* 1497 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1498 * As long as someone else holds the address, the interface will stay down. 1499 * When that conflict goes away, the interface is brought back up. This is 1500 * done so that accidental shutdowns of addresses aren't made permanent. Your 1501 * server will recover from a failure. 1502 * 1503 * For DHCP and temporary addresses, recovery is not done in the kernel. 1504 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1505 * 1506 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1507 */ 1508 static void 1509 ipif6_dup_recovery(void *arg) 1510 { 1511 ipif_t *ipif = arg; 1512 1513 ipif->ipif_recovery_id = 0; 1514 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1515 return; 1516 1517 /* 1518 * No lock, because this is just an optimization. 1519 */ 1520 if (ipif->ipif_state_flags & (IPIF_MOVING | IPIF_CONDEMNED)) 1521 return; 1522 1523 /* If the link is down, we'll retry this later */ 1524 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1525 return; 1526 1527 ndp_do_recovery(ipif); 1528 } 1529 1530 /* 1531 * Perform interface recovery by forcing the duplicate interfaces up and 1532 * allowing the system to determine which ones should stay up. 1533 * 1534 * Called both by recovery timer expiry and link-up notification. 1535 */ 1536 void 1537 ndp_do_recovery(ipif_t *ipif) 1538 { 1539 ill_t *ill = ipif->ipif_ill; 1540 mblk_t *mp; 1541 ip_stack_t *ipst = ill->ill_ipst; 1542 1543 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1544 if (mp == NULL) { 1545 mutex_enter(&ill->ill_lock); 1546 if (ipif->ipif_recovery_id == 0 && 1547 !(ipif->ipif_state_flags & (IPIF_MOVING | 1548 IPIF_CONDEMNED))) { 1549 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1550 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1551 } 1552 mutex_exit(&ill->ill_lock); 1553 } else { 1554 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1555 sizeof (ipif->ipif_v6lcl_addr)); 1556 ill_refhold(ill); 1557 qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP, 1558 B_FALSE); 1559 } 1560 } 1561 1562 /* 1563 * Find the solicitation in the given message, and extract printable details 1564 * (MAC and IP addresses) from it. 1565 */ 1566 static nd_neighbor_solicit_t * 1567 ip_ndp_find_solicitation(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, char *hbuf, 1568 size_t hlen, char *sbuf, size_t slen, uchar_t **haddr) 1569 { 1570 nd_neighbor_solicit_t *ns; 1571 ip6_t *ip6h; 1572 uchar_t *addr; 1573 int alen; 1574 1575 alen = 0; 1576 ip6h = (ip6_t *)mp->b_rptr; 1577 if (dl_mp == NULL) { 1578 nd_opt_hdr_t *opt; 1579 int nslen; 1580 1581 /* 1582 * If it's from the fast-path, then it can't be a probe 1583 * message, and thus must include the source linkaddr option. 1584 * Extract that here. 1585 */ 1586 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1587 nslen = mp->b_wptr - (uchar_t *)ns; 1588 if ((nslen -= sizeof (*ns)) > 0) { 1589 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), nslen, 1590 ND_OPT_SOURCE_LINKADDR); 1591 if (opt != NULL && 1592 opt->nd_opt_len * 8 - sizeof (*opt) >= 1593 ill->ill_nd_lla_len) { 1594 addr = (uchar_t *)(opt + 1); 1595 alen = ill->ill_nd_lla_len; 1596 } 1597 } 1598 /* 1599 * We cheat a bit here for the sake of printing usable log 1600 * messages in the rare case where the reply we got was unicast 1601 * without a source linkaddr option, and the interface is in 1602 * fastpath mode. (Sigh.) 1603 */ 1604 if (alen == 0 && ill->ill_type == IFT_ETHER && 1605 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1606 struct ether_header *pether; 1607 1608 pether = (struct ether_header *)((char *)ip6h - 1609 sizeof (*pether)); 1610 addr = pether->ether_shost.ether_addr_octet; 1611 alen = ETHERADDRL; 1612 } 1613 } else { 1614 dl_unitdata_ind_t *dlu; 1615 1616 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1617 alen = dlu->dl_src_addr_length; 1618 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1619 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1620 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1621 if (ill->ill_sap_length < 0) { 1622 alen += ill->ill_sap_length; 1623 } else { 1624 addr += ill->ill_sap_length; 1625 alen -= ill->ill_sap_length; 1626 } 1627 } 1628 } 1629 if (alen > 0) { 1630 *haddr = addr; 1631 (void) mac_colon_addr(addr, alen, hbuf, hlen); 1632 } else { 1633 *haddr = NULL; 1634 (void) strcpy(hbuf, "?"); 1635 } 1636 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1637 (void) inet_ntop(AF_INET6, &ns->nd_ns_target, sbuf, slen); 1638 return (ns); 1639 } 1640 1641 /* 1642 * This is for exclusive changes due to NDP duplicate address detection 1643 * failure. 1644 */ 1645 /* ARGSUSED */ 1646 static void 1647 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1648 { 1649 ill_t *ill = rq->q_ptr; 1650 ipif_t *ipif; 1651 char ibuf[LIFNAMSIZ + 10]; /* 10 digits for logical i/f number */ 1652 char hbuf[MAC_STR_LEN]; 1653 char sbuf[INET6_ADDRSTRLEN]; 1654 nd_neighbor_solicit_t *ns; 1655 mblk_t *dl_mp = NULL; 1656 uchar_t *haddr; 1657 ip_stack_t *ipst = ill->ill_ipst; 1658 1659 if (DB_TYPE(mp) != M_DATA) { 1660 dl_mp = mp; 1661 mp = mp->b_cont; 1662 } 1663 ns = ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, sizeof (hbuf), sbuf, 1664 sizeof (sbuf), &haddr); 1665 if (haddr != NULL && 1666 bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) { 1667 /* 1668 * Ignore conflicts generated by misbehaving switches that just 1669 * reflect our own messages back to us. 1670 */ 1671 goto ignore_conflict; 1672 } 1673 1674 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1675 1676 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1677 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 1678 &ns->nd_ns_target)) { 1679 continue; 1680 } 1681 1682 /* If it's already marked, then don't do anything. */ 1683 if (ipif->ipif_flags & IPIF_DUPLICATE) 1684 continue; 1685 1686 /* 1687 * If this is a failure during duplicate recovery, then don't 1688 * complain. It may take a long time to recover. 1689 */ 1690 if (!ipif->ipif_was_dup) { 1691 ipif_get_name(ipif, ibuf, sizeof (ibuf)); 1692 cmn_err(CE_WARN, "%s has duplicate address %s (in " 1693 "use by %s); disabled", ibuf, sbuf, hbuf); 1694 } 1695 mutex_enter(&ill->ill_lock); 1696 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1697 ipif->ipif_flags |= IPIF_DUPLICATE; 1698 ill->ill_ipif_dup_count++; 1699 mutex_exit(&ill->ill_lock); 1700 (void) ipif_down(ipif, NULL, NULL); 1701 ipif_down_tail(ipif); 1702 mutex_enter(&ill->ill_lock); 1703 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1704 ill->ill_net_type == IRE_IF_RESOLVER && 1705 !(ipif->ipif_state_flags & (IPIF_MOVING | 1706 IPIF_CONDEMNED)) && 1707 ipst->ips_ip_dup_recovery > 0) { 1708 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1709 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1710 } 1711 mutex_exit(&ill->ill_lock); 1712 } 1713 ignore_conflict: 1714 if (dl_mp != NULL) 1715 freeb(dl_mp); 1716 freemsg(mp); 1717 } 1718 1719 /* 1720 * Handle failure by tearing down the ipifs with the specified address. Note 1721 * that tearing down the ipif also means deleting the nce through ipif_down, so 1722 * it's not possible to do recovery by just restarting the nce timer. Instead, 1723 * we start a timer on the ipif. 1724 */ 1725 static void 1726 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1727 { 1728 if ((mp = copymsg(mp)) != NULL) { 1729 if (dl_mp == NULL) 1730 dl_mp = mp; 1731 else if ((dl_mp = copyb(dl_mp)) != NULL) 1732 dl_mp->b_cont = mp; 1733 if (dl_mp == NULL) { 1734 freemsg(mp); 1735 } else { 1736 ill_refhold(ill); 1737 qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP, 1738 B_FALSE); 1739 } 1740 } 1741 ndp_delete(nce); 1742 } 1743 1744 /* 1745 * Handle a discovered conflict: some other system is advertising that it owns 1746 * one of our IP addresses. We need to defend ourselves, or just shut down the 1747 * interface. 1748 */ 1749 static void 1750 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1751 { 1752 ipif_t *ipif; 1753 uint32_t now; 1754 uint_t maxdefense; 1755 uint_t defs; 1756 ip_stack_t *ipst = ill->ill_ipst; 1757 1758 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1759 NULL, NULL, ipst); 1760 if (ipif == NULL) 1761 return; 1762 /* 1763 * First, figure out if this address is disposable. 1764 */ 1765 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1766 maxdefense = ipst->ips_ip_max_temp_defend; 1767 else 1768 maxdefense = ipst->ips_ip_max_defend; 1769 1770 /* 1771 * Now figure out how many times we've defended ourselves. Ignore 1772 * defenses that happened long in the past. 1773 */ 1774 now = gethrestime_sec(); 1775 mutex_enter(&nce->nce_lock); 1776 if ((defs = nce->nce_defense_count) > 0 && 1777 now - nce->nce_defense_time > ipst->ips_ip_defend_interval) { 1778 nce->nce_defense_count = defs = 0; 1779 } 1780 nce->nce_defense_count++; 1781 nce->nce_defense_time = now; 1782 mutex_exit(&nce->nce_lock); 1783 ipif_refrele(ipif); 1784 1785 /* 1786 * If we've defended ourselves too many times already, then give up and 1787 * tear down the interface(s) using this address. Otherwise, defend by 1788 * sending out an unsolicited Neighbor Advertisement. 1789 */ 1790 if (defs >= maxdefense) { 1791 ip_ndp_failure(ill, mp, dl_mp, nce); 1792 } else { 1793 char hbuf[MAC_STR_LEN]; 1794 char sbuf[INET6_ADDRSTRLEN]; 1795 uchar_t *haddr; 1796 1797 (void) ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, 1798 sizeof (hbuf), sbuf, sizeof (sbuf), &haddr); 1799 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1800 hbuf, sbuf, ill->ill_name); 1801 (void) nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, B_FALSE, 1802 &nce->nce_addr, &ipv6_all_hosts_mcast, 1803 nce_advert_flags(nce)); 1804 } 1805 } 1806 1807 static void 1808 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1809 { 1810 nd_neighbor_solicit_t *ns; 1811 uint32_t hlen = ill->ill_nd_lla_len; 1812 uchar_t *haddr = NULL; 1813 icmp6_t *icmp_nd; 1814 ip6_t *ip6h; 1815 nce_t *our_nce = NULL; 1816 in6_addr_t target; 1817 in6_addr_t src; 1818 int len; 1819 int flag = 0; 1820 nd_opt_hdr_t *opt = NULL; 1821 boolean_t bad_solicit = B_FALSE; 1822 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1823 1824 ip6h = (ip6_t *)mp->b_rptr; 1825 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1826 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1827 src = ip6h->ip6_src; 1828 ns = (nd_neighbor_solicit_t *)icmp_nd; 1829 target = ns->nd_ns_target; 1830 if (IN6_IS_ADDR_MULTICAST(&target)) { 1831 if (ip_debug > 2) { 1832 /* ip1dbg */ 1833 pr_addr_dbg("ndp_input_solicit: Target is" 1834 " multicast! %s\n", AF_INET6, &target); 1835 } 1836 bad_solicit = B_TRUE; 1837 goto done; 1838 } 1839 if (len > sizeof (nd_neighbor_solicit_t)) { 1840 /* Options present */ 1841 opt = (nd_opt_hdr_t *)&ns[1]; 1842 len -= sizeof (nd_neighbor_solicit_t); 1843 if (!ndp_verify_optlen(opt, len)) { 1844 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1845 bad_solicit = B_TRUE; 1846 goto done; 1847 } 1848 } 1849 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1850 /* Check to see if this is a valid DAD solicitation */ 1851 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1852 if (ip_debug > 2) { 1853 /* ip1dbg */ 1854 pr_addr_dbg("ndp_input_solicit: IPv6 " 1855 "Destination is not solicited node " 1856 "multicast %s\n", AF_INET6, 1857 &ip6h->ip6_dst); 1858 } 1859 bad_solicit = B_TRUE; 1860 goto done; 1861 } 1862 } 1863 1864 our_nce = ndp_lookup_v6(ill, &target, B_FALSE); 1865 /* 1866 * If this is a valid Solicitation, a permanent 1867 * entry should exist in the cache 1868 */ 1869 if (our_nce == NULL || 1870 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1871 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1872 "ifname=%s ", ill->ill_name)); 1873 if (ip_debug > 2) { 1874 /* ip1dbg */ 1875 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1876 } 1877 bad_solicit = B_TRUE; 1878 goto done; 1879 } 1880 1881 /* At this point we should have a verified NS per spec */ 1882 if (opt != NULL) { 1883 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1884 if (opt != NULL) { 1885 haddr = (uchar_t *)&opt[1]; 1886 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1887 hlen == 0) { 1888 ip1dbg(("ndp_input_advert: bad SLLA\n")); 1889 bad_solicit = B_TRUE; 1890 goto done; 1891 } 1892 } 1893 } 1894 1895 /* If sending directly to peer, set the unicast flag */ 1896 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 1897 flag |= NDP_UNICAST; 1898 1899 /* 1900 * Create/update the entry for the soliciting node. 1901 * or respond to outstanding queries, don't if 1902 * the source is unspecified address. 1903 */ 1904 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1905 int err; 1906 nce_t *nnce; 1907 1908 ASSERT(ill->ill_isv6); 1909 /* 1910 * Regular solicitations *must* include the Source Link-Layer 1911 * Address option. Ignore messages that do not. 1912 */ 1913 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1914 ip1dbg(("ndp_input_solicit: source link-layer address " 1915 "option missing with a specified source.\n")); 1916 bad_solicit = B_TRUE; 1917 goto done; 1918 } 1919 1920 /* 1921 * This is a regular solicitation. If we're still in the 1922 * process of verifying the address, then don't respond at all 1923 * and don't keep track of the sender. 1924 */ 1925 if (our_nce->nce_state == ND_PROBE) 1926 goto done; 1927 1928 /* 1929 * If the solicitation doesn't have sender hardware address 1930 * (legal for unicast solicitation), then process without 1931 * installing the return NCE. Either we already know it, or 1932 * we'll be forced to look it up when (and if) we reply to the 1933 * packet. 1934 */ 1935 if (haddr == NULL) 1936 goto no_source; 1937 1938 err = ndp_lookup_then_add_v6(ill, 1939 haddr, 1940 &src, /* Soliciting nodes address */ 1941 &ipv6_all_ones, 1942 &ipv6_all_zeros, 1943 0, 1944 0, 1945 ND_STALE, 1946 &nnce); 1947 switch (err) { 1948 case 0: 1949 /* done with this entry */ 1950 NCE_REFRELE(nnce); 1951 break; 1952 case EEXIST: 1953 /* 1954 * B_FALSE indicates this is not an 1955 * an advertisement. 1956 */ 1957 ndp_process(nnce, haddr, 0, B_FALSE); 1958 NCE_REFRELE(nnce); 1959 break; 1960 default: 1961 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1962 err)); 1963 goto done; 1964 } 1965 no_source: 1966 flag |= NDP_SOLICITED; 1967 } else { 1968 /* 1969 * No source link layer address option should be present in a 1970 * valid DAD request. 1971 */ 1972 if (haddr != NULL) { 1973 ip1dbg(("ndp_input_solicit: source link-layer address " 1974 "option present with an unspecified source.\n")); 1975 bad_solicit = B_TRUE; 1976 goto done; 1977 } 1978 if (our_nce->nce_state == ND_PROBE) { 1979 /* 1980 * Internally looped-back probes won't have DLPI 1981 * attached to them. External ones (which are sent by 1982 * multicast) always will. Just ignore our own 1983 * transmissions. 1984 */ 1985 if (dl_mp != NULL) { 1986 /* 1987 * If someone else is probing our address, then 1988 * we've crossed wires. Declare failure. 1989 */ 1990 ip_ndp_failure(ill, mp, dl_mp, our_nce); 1991 } 1992 goto done; 1993 } 1994 /* 1995 * This is a DAD probe. Multicast the advertisement to the 1996 * all-nodes address. 1997 */ 1998 src = ipv6_all_hosts_mcast; 1999 } 2000 flag |= nce_advert_flags(our_nce); 2001 /* Response to a solicitation */ 2002 (void) nce_xmit(ill, 2003 ND_NEIGHBOR_ADVERT, 2004 ill, /* ill to be used for extracting ill_nd_lla */ 2005 B_TRUE, /* use ill_nd_lla */ 2006 &target, /* Source and target of the advertisement pkt */ 2007 &src, /* IP Destination (source of original pkt) */ 2008 flag); 2009 done: 2010 if (bad_solicit) 2011 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2012 if (our_nce != NULL) 2013 NCE_REFRELE(our_nce); 2014 } 2015 2016 void 2017 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2018 { 2019 nd_neighbor_advert_t *na; 2020 uint32_t hlen = ill->ill_nd_lla_len; 2021 uchar_t *haddr = NULL; 2022 icmp6_t *icmp_nd; 2023 ip6_t *ip6h; 2024 nce_t *dst_nce = NULL; 2025 in6_addr_t target; 2026 nd_opt_hdr_t *opt = NULL; 2027 int len; 2028 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2029 ip_stack_t *ipst = ill->ill_ipst; 2030 2031 ip6h = (ip6_t *)mp->b_rptr; 2032 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2033 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2034 na = (nd_neighbor_advert_t *)icmp_nd; 2035 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2036 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2037 ip1dbg(("ndp_input_advert: Target is multicast but the " 2038 "solicited flag is not zero\n")); 2039 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2040 return; 2041 } 2042 target = na->nd_na_target; 2043 if (IN6_IS_ADDR_MULTICAST(&target)) { 2044 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2045 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2046 return; 2047 } 2048 if (len > sizeof (nd_neighbor_advert_t)) { 2049 opt = (nd_opt_hdr_t *)&na[1]; 2050 if (!ndp_verify_optlen(opt, 2051 len - sizeof (nd_neighbor_advert_t))) { 2052 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2053 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2054 return; 2055 } 2056 /* At this point we have a verified NA per spec */ 2057 len -= sizeof (nd_neighbor_advert_t); 2058 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2059 if (opt != NULL) { 2060 haddr = (uchar_t *)&opt[1]; 2061 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2062 hlen == 0) { 2063 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2064 BUMP_MIB(mib, 2065 ipv6IfIcmpInBadNeighborAdvertisements); 2066 return; 2067 } 2068 } 2069 } 2070 2071 /* 2072 * If this interface is part of the group look at all the 2073 * ills in the group. 2074 */ 2075 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2076 if (ill->ill_group != NULL) 2077 ill = ill->ill_group->illgrp_ill; 2078 2079 for (; ill != NULL; ill = ill->ill_group_next) { 2080 mutex_enter(&ill->ill_lock); 2081 if (!ILL_CAN_LOOKUP(ill)) { 2082 mutex_exit(&ill->ill_lock); 2083 continue; 2084 } 2085 ill_refhold_locked(ill); 2086 mutex_exit(&ill->ill_lock); 2087 dst_nce = ndp_lookup_v6(ill, &target, B_FALSE); 2088 /* We have to drop the lock since ndp_process calls put* */ 2089 rw_exit(&ipst->ips_ill_g_lock); 2090 if (dst_nce != NULL) { 2091 if ((dst_nce->nce_flags & NCE_F_PERMANENT) && 2092 dst_nce->nce_state == ND_PROBE) { 2093 /* 2094 * Someone else sent an advertisement for an 2095 * address that we're trying to configure. 2096 * Tear it down. Note that dl_mp might be NULL 2097 * if we're getting a unicast reply. This 2098 * isn't typically done (multicast is the norm 2099 * in response to a probe), but ip_ndp_failure 2100 * will handle the dl_mp == NULL case as well. 2101 */ 2102 ip_ndp_failure(ill, mp, dl_mp, dst_nce); 2103 } else if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2104 /* 2105 * Someone just announced one of our local 2106 * addresses. If it wasn't us, then this is a 2107 * conflict. Defend the address or shut it 2108 * down. 2109 */ 2110 if (dl_mp != NULL && 2111 (haddr == NULL || 2112 nce_cmp_ll_addr(dst_nce, haddr, 2113 ill->ill_nd_lla_len))) { 2114 ip_ndp_conflict(ill, mp, dl_mp, 2115 dst_nce); 2116 } 2117 } else { 2118 if (na->nd_na_flags_reserved & 2119 ND_NA_FLAG_ROUTER) { 2120 dst_nce->nce_flags |= NCE_F_ISROUTER; 2121 } 2122 /* B_TRUE indicates this an advertisement */ 2123 ndp_process(dst_nce, haddr, 2124 na->nd_na_flags_reserved, B_TRUE); 2125 } 2126 NCE_REFRELE(dst_nce); 2127 } 2128 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2129 ill_refrele(ill); 2130 } 2131 rw_exit(&ipst->ips_ill_g_lock); 2132 } 2133 2134 /* 2135 * Process NDP neighbor solicitation/advertisement messages. 2136 * The checksum has already checked o.k before reaching here. 2137 */ 2138 void 2139 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2140 { 2141 icmp6_t *icmp_nd; 2142 ip6_t *ip6h; 2143 int len; 2144 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2145 2146 2147 if (!pullupmsg(mp, -1)) { 2148 ip1dbg(("ndp_input: pullupmsg failed\n")); 2149 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2150 goto done; 2151 } 2152 ip6h = (ip6_t *)mp->b_rptr; 2153 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2154 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2155 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2156 goto done; 2157 } 2158 /* 2159 * NDP does not accept any extension headers between the 2160 * IP header and the ICMP header since e.g. a routing 2161 * header could be dangerous. 2162 * This assumes that any AH or ESP headers are removed 2163 * by ip prior to passing the packet to ndp_input. 2164 */ 2165 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2166 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2167 ip6h->ip6_nxt)); 2168 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2169 goto done; 2170 } 2171 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2172 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2173 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2174 if (icmp_nd->icmp6_code != 0) { 2175 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2176 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2177 goto done; 2178 } 2179 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2180 /* 2181 * Make sure packet length is large enough for either 2182 * a NS or a NA icmp packet. 2183 */ 2184 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2185 ip1dbg(("ndp_input: packet too short\n")); 2186 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2187 goto done; 2188 } 2189 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2190 ndp_input_solicit(ill, mp, dl_mp); 2191 } else { 2192 ndp_input_advert(ill, mp, dl_mp); 2193 } 2194 done: 2195 freemsg(mp); 2196 } 2197 2198 /* 2199 * nce_xmit is called to form and transmit a ND solicitation or 2200 * advertisement ICMP packet. 2201 * 2202 * If the source address is unspecified and this isn't a probe (used for 2203 * duplicate address detection), an appropriate source address and link layer 2204 * address will be chosen here. The link layer address option is included if 2205 * the source is specified (i.e., all non-probe packets), and omitted (per the 2206 * specification) otherwise. 2207 * 2208 * It returns B_FALSE only if it does a successful put() to the 2209 * corresponding ill's ill_wq otherwise returns B_TRUE. 2210 */ 2211 static boolean_t 2212 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 2213 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 2214 int flag) 2215 { 2216 uint32_t len; 2217 icmp6_t *icmp6; 2218 mblk_t *mp; 2219 ip6_t *ip6h; 2220 nd_opt_hdr_t *opt; 2221 uint_t plen; 2222 ip6i_t *ip6i; 2223 ipif_t *src_ipif = NULL; 2224 uint8_t *hw_addr; 2225 zoneid_t zoneid = GLOBAL_ZONEID; 2226 2227 /* 2228 * If we have a unspecified source(sender) address, select a 2229 * proper source address for the solicitation here itself so 2230 * that we can initialize the h/w address correctly. This is 2231 * needed for interface groups as source address can come from 2232 * the whole group and the h/w address initialized from ill will 2233 * be wrong if the source address comes from a different ill. 2234 * 2235 * If the sender is specified then we use this address in order 2236 * to lookup the zoneid before calling ip_output_v6(). This is to 2237 * enable unicast ND_NEIGHBOR_ADVERT packets to be routed correctly 2238 * by IP (we cannot guarantee that the global zone has an interface 2239 * route to the destination). 2240 * 2241 * Note that the NA never comes here with the unspecified source 2242 * address. The following asserts that whenever the source 2243 * address is specified, the haddr also should be specified. 2244 */ 2245 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 2246 2247 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2248 ASSERT(operation != ND_NEIGHBOR_ADVERT); 2249 /* 2250 * Pick a source address for this solicitation, but 2251 * restrict the selection to addresses assigned to the 2252 * output interface (or interface group). We do this 2253 * because the destination will create a neighbor cache 2254 * entry for the source address of this packet, so the 2255 * source address had better be a valid neighbor. 2256 */ 2257 src_ipif = ipif_select_source_v6(ill, target, RESTRICT_TO_ILL, 2258 IPV6_PREFER_SRC_DEFAULT, ALL_ZONES); 2259 if (src_ipif == NULL) { 2260 char buf[INET6_ADDRSTRLEN]; 2261 2262 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2263 inet_ntop(AF_INET6, (char *)target, buf, 2264 sizeof (buf)))); 2265 return (B_TRUE); 2266 } 2267 sender = &src_ipif->ipif_v6src_addr; 2268 hwaddr_ill = src_ipif->ipif_ill; 2269 } else if (!(IN6_IS_ADDR_UNSPECIFIED(sender))) { 2270 zoneid = ipif_lookup_addr_zoneid_v6(sender, ill, ill->ill_ipst); 2271 /* 2272 * It's possible for ipif_lookup_addr_zoneid_v6() to return 2273 * ALL_ZONES if it cannot find a matching ipif for the address 2274 * we are trying to use. In this case we err on the side of 2275 * trying to send the packet by defaulting to the GLOBAL_ZONEID. 2276 */ 2277 if (zoneid == ALL_ZONES) 2278 zoneid = GLOBAL_ZONEID; 2279 } 2280 2281 /* 2282 * Always make sure that the NS/NA packets don't get load 2283 * spread. This is needed so that the probe packets sent 2284 * by the in.mpathd daemon can really go out on the desired 2285 * interface. Probe packets are made to go out on a desired 2286 * interface by including a ip6i with ATTACH_IF flag. As these 2287 * packets indirectly end up sending/receiving NS/NA packets 2288 * (neighbor doing NUD), we have to make sure that NA 2289 * also go out on the same interface. 2290 */ 2291 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7) / 8; 2292 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2293 plen * 8; 2294 mp = allocb(len, BPRI_LO); 2295 if (mp == NULL) { 2296 if (src_ipif != NULL) 2297 ipif_refrele(src_ipif); 2298 return (B_TRUE); 2299 } 2300 bzero((char *)mp->b_rptr, len); 2301 mp->b_wptr = mp->b_rptr + len; 2302 2303 ip6i = (ip6i_t *)mp->b_rptr; 2304 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2305 ip6i->ip6i_nxt = IPPROTO_RAW; 2306 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2307 if (flag & NDP_PROBE) 2308 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2309 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2310 2311 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2312 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2313 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2314 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2315 ip6h->ip6_hops = IPV6_MAX_HOPS; 2316 ip6h->ip6_dst = *target; 2317 icmp6 = (icmp6_t *)&ip6h[1]; 2318 2319 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2320 sizeof (nd_neighbor_advert_t)); 2321 2322 if (operation == ND_NEIGHBOR_SOLICIT) { 2323 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2324 2325 if (!(flag & NDP_PROBE)) 2326 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2327 ip6h->ip6_src = *sender; 2328 ns->nd_ns_target = *target; 2329 if (!(flag & NDP_UNICAST)) { 2330 /* Form multicast address of the target */ 2331 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2332 ip6h->ip6_dst.s6_addr32[3] |= 2333 ns->nd_ns_target.s6_addr32[3]; 2334 } 2335 } else { 2336 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2337 2338 ASSERT(!(flag & NDP_PROBE)); 2339 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2340 ip6h->ip6_src = *sender; 2341 na->nd_na_target = *sender; 2342 if (flag & NDP_ISROUTER) 2343 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2344 if (flag & NDP_SOLICITED) 2345 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2346 if (flag & NDP_ORIDE) 2347 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2348 } 2349 2350 hw_addr = NULL; 2351 if (!(flag & NDP_PROBE)) { 2352 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2353 hwaddr_ill->ill_phys_addr; 2354 if (hw_addr != NULL) { 2355 /* Fill in link layer address and option len */ 2356 opt->nd_opt_len = (uint8_t)plen; 2357 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2358 } 2359 } 2360 if (hw_addr == NULL) { 2361 /* If there's no link layer address option, then strip it. */ 2362 len -= plen * 8; 2363 mp->b_wptr = mp->b_rptr + len; 2364 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2365 } 2366 2367 icmp6->icmp6_type = (uint8_t)operation; 2368 icmp6->icmp6_code = 0; 2369 /* 2370 * Prepare for checksum by putting icmp length in the icmp 2371 * checksum field. The checksum is calculated in ip_wput_v6. 2372 */ 2373 icmp6->icmp6_cksum = ip6h->ip6_plen; 2374 2375 if (src_ipif != NULL) 2376 ipif_refrele(src_ipif); 2377 2378 ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT); 2379 return (B_FALSE); 2380 } 2381 2382 /* 2383 * Make a link layer address (does not include the SAP) from an nce. 2384 * To form the link layer address, use the last four bytes of ipv6 2385 * address passed in and the fixed offset stored in nce. 2386 */ 2387 static void 2388 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2389 { 2390 uchar_t *mask, *to; 2391 ill_t *ill = nce->nce_ill; 2392 int len; 2393 2394 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2395 return; 2396 ASSERT(nce->nce_res_mp != NULL); 2397 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2398 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2399 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2400 ASSERT(addr != NULL); 2401 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2402 addrpos, ill->ill_nd_lla_len); 2403 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2404 IPV6_ADDR_LEN); 2405 mask = (uchar_t *)&nce->nce_extract_mask; 2406 mask += (IPV6_ADDR_LEN - len); 2407 addr += (IPV6_ADDR_LEN - len); 2408 to = addrpos + nce->nce_ll_extract_start; 2409 while (len-- > 0) 2410 *to++ |= *mask++ & *addr++; 2411 } 2412 2413 mblk_t * 2414 nce_udreq_alloc(ill_t *ill) 2415 { 2416 mblk_t *template_mp = NULL; 2417 dl_unitdata_req_t *dlur; 2418 int sap_length; 2419 2420 ASSERT(ill->ill_isv6); 2421 2422 sap_length = ill->ill_sap_length; 2423 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2424 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2425 if (template_mp == NULL) 2426 return (NULL); 2427 2428 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2429 dlur->dl_priority.dl_min = 0; 2430 dlur->dl_priority.dl_max = 0; 2431 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2432 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2433 2434 /* Copy in the SAP value. */ 2435 NCE_LL_SAP_COPY(ill, template_mp); 2436 2437 return (template_mp); 2438 } 2439 2440 /* 2441 * NDP retransmit timer. 2442 * This timer goes off when: 2443 * a. It is time to retransmit NS for resolver. 2444 * b. It is time to send reachability probes. 2445 */ 2446 void 2447 ndp_timer(void *arg) 2448 { 2449 nce_t *nce = arg; 2450 ill_t *ill = nce->nce_ill; 2451 uint32_t ms; 2452 char addrbuf[INET6_ADDRSTRLEN]; 2453 mblk_t *mp; 2454 boolean_t dropped = B_FALSE; 2455 ip_stack_t *ipst = ill->ill_ipst; 2456 2457 /* 2458 * The timer has to be cancelled by ndp_delete before doing the final 2459 * refrele. So the NCE is guaranteed to exist when the timer runs 2460 * until it clears the timeout_id. Before clearing the timeout_id 2461 * bump up the refcnt so that we can continue to use the nce 2462 */ 2463 ASSERT(nce != NULL); 2464 2465 /* 2466 * Grab the ill_g_lock now itself to avoid lock order problems. 2467 * nce_solicit needs ill_g_lock to be able to traverse ills 2468 */ 2469 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2470 mutex_enter(&nce->nce_lock); 2471 NCE_REFHOLD_LOCKED(nce); 2472 nce->nce_timeout_id = 0; 2473 2474 /* 2475 * Check the reachability state first. 2476 */ 2477 switch (nce->nce_state) { 2478 case ND_DELAY: 2479 rw_exit(&ipst->ips_ill_g_lock); 2480 nce->nce_state = ND_PROBE; 2481 mutex_exit(&nce->nce_lock); 2482 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2483 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2484 if (ip_debug > 3) { 2485 /* ip2dbg */ 2486 pr_addr_dbg("ndp_timer: state for %s changed " 2487 "to PROBE\n", AF_INET6, &nce->nce_addr); 2488 } 2489 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2490 NCE_REFRELE(nce); 2491 return; 2492 case ND_PROBE: 2493 /* must be retransmit timer */ 2494 rw_exit(&ipst->ips_ill_g_lock); 2495 nce->nce_pcnt--; 2496 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2497 nce->nce_pcnt >= -1); 2498 if (nce->nce_pcnt > 0) { 2499 /* 2500 * As per RFC2461, the nce gets deleted after 2501 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2502 * Note that the first unicast solicitation is sent 2503 * during the DELAY state. 2504 */ 2505 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2506 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2507 addrbuf, sizeof (addrbuf)))); 2508 mutex_exit(&nce->nce_lock); 2509 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, 2510 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, 2511 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2512 NDP_UNICAST); 2513 if (dropped) { 2514 mutex_enter(&nce->nce_lock); 2515 nce->nce_pcnt++; 2516 mutex_exit(&nce->nce_lock); 2517 } 2518 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2519 } else if (nce->nce_pcnt < 0) { 2520 /* No hope, delete the nce */ 2521 nce->nce_state = ND_UNREACHABLE; 2522 mutex_exit(&nce->nce_lock); 2523 if (ip_debug > 2) { 2524 /* ip1dbg */ 2525 pr_addr_dbg("ndp_timer: Delete IRE for" 2526 " dst %s\n", AF_INET6, &nce->nce_addr); 2527 } 2528 ndp_delete(nce); 2529 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2530 /* Wait RetransTimer, before deleting the entry */ 2531 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2532 nce->nce_pcnt, inet_ntop(AF_INET6, 2533 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2534 mutex_exit(&nce->nce_lock); 2535 /* Wait one interval before killing */ 2536 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2537 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2538 ipif_t *ipif; 2539 2540 /* 2541 * We're done probing, and we can now declare this 2542 * address to be usable. Let IP know that it's ok to 2543 * use. 2544 */ 2545 nce->nce_state = ND_REACHABLE; 2546 mutex_exit(&nce->nce_lock); 2547 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, 2548 ALL_ZONES, NULL, NULL, NULL, NULL, ipst); 2549 if (ipif != NULL) { 2550 if (ipif->ipif_was_dup) { 2551 char ibuf[LIFNAMSIZ + 10]; 2552 char sbuf[INET6_ADDRSTRLEN]; 2553 2554 ipif->ipif_was_dup = B_FALSE; 2555 (void) inet_ntop(AF_INET6, 2556 &ipif->ipif_v6lcl_addr, 2557 sbuf, sizeof (sbuf)); 2558 ipif_get_name(ipif, ibuf, 2559 sizeof (ibuf)); 2560 cmn_err(CE_NOTE, "recovered address " 2561 "%s on %s", sbuf, ibuf); 2562 } 2563 if ((ipif->ipif_flags & IPIF_UP) && 2564 !ipif->ipif_addr_ready) { 2565 ip_rts_ifmsg(ipif); 2566 ip_rts_newaddrmsg(RTM_ADD, 0, ipif); 2567 sctp_update_ipif(ipif, SCTP_IPIF_UP); 2568 } 2569 ipif->ipif_addr_ready = 1; 2570 ipif_refrele(ipif); 2571 } 2572 /* Begin defending our new address */ 2573 nce->nce_unsolicit_count = 0; 2574 dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, 2575 B_FALSE, &nce->nce_addr, &ipv6_all_hosts_mcast, 2576 nce_advert_flags(nce)); 2577 if (dropped) { 2578 nce->nce_unsolicit_count = 1; 2579 NDP_RESTART_TIMER(nce, 2580 ipst->ips_ip_ndp_unsolicit_interval); 2581 } else if (ipst->ips_ip_ndp_defense_interval != 0) { 2582 NDP_RESTART_TIMER(nce, 2583 ipst->ips_ip_ndp_defense_interval); 2584 } 2585 } else { 2586 /* 2587 * This is an address we're probing to be our own, but 2588 * the ill is down. Wait until it comes back before 2589 * doing anything, but switch to reachable state so 2590 * that the restart will work. 2591 */ 2592 nce->nce_state = ND_REACHABLE; 2593 mutex_exit(&nce->nce_lock); 2594 } 2595 NCE_REFRELE(nce); 2596 return; 2597 case ND_INCOMPLETE: 2598 /* 2599 * Must be resolvers retransmit timer. 2600 */ 2601 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2602 ip6i_t *ip6i; 2603 ip6_t *ip6h; 2604 mblk_t *data_mp; 2605 2606 /* 2607 * Walk the list of packets queued, and see if there 2608 * are any multipathing probe packets. Such packets 2609 * are always queued at the head. Since this is a 2610 * retransmit timer firing, mark such packets as 2611 * delayed in ND resolution. This info will be used 2612 * in ip_wput_v6(). Multipathing probe packets will 2613 * always have an ip6i_t. Once we hit a packet without 2614 * it, we can break out of this loop. 2615 */ 2616 if (mp->b_datap->db_type == M_CTL) 2617 data_mp = mp->b_cont; 2618 else 2619 data_mp = mp; 2620 2621 ip6h = (ip6_t *)data_mp->b_rptr; 2622 if (ip6h->ip6_nxt != IPPROTO_RAW) 2623 break; 2624 2625 /* 2626 * This message should have been pulled up already in 2627 * ip_wput_v6. We can't do pullups here because the 2628 * b_next/b_prev is non-NULL. 2629 */ 2630 ip6i = (ip6i_t *)ip6h; 2631 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2632 sizeof (ip6i_t) + IPV6_HDR_LEN); 2633 2634 /* Mark this packet as delayed due to ND resolution */ 2635 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2636 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2637 } 2638 if (nce->nce_qd_mp != NULL) { 2639 ms = nce_solicit(nce, NULL); 2640 rw_exit(&ipst->ips_ill_g_lock); 2641 if (ms == 0) { 2642 if (nce->nce_state != ND_REACHABLE) { 2643 mutex_exit(&nce->nce_lock); 2644 nce_resolv_failed(nce); 2645 ndp_delete(nce); 2646 } else { 2647 mutex_exit(&nce->nce_lock); 2648 } 2649 } else { 2650 mutex_exit(&nce->nce_lock); 2651 NDP_RESTART_TIMER(nce, (clock_t)ms); 2652 } 2653 NCE_REFRELE(nce); 2654 return; 2655 } 2656 mutex_exit(&nce->nce_lock); 2657 rw_exit(&ipst->ips_ill_g_lock); 2658 NCE_REFRELE(nce); 2659 break; 2660 case ND_REACHABLE : 2661 rw_exit(&ipst->ips_ill_g_lock); 2662 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2663 nce->nce_unsolicit_count != 0) || 2664 ((nce->nce_flags & NCE_F_PERMANENT) && 2665 ipst->ips_ip_ndp_defense_interval != 0)) { 2666 if (nce->nce_unsolicit_count > 0) 2667 nce->nce_unsolicit_count--; 2668 mutex_exit(&nce->nce_lock); 2669 dropped = nce_xmit(ill, 2670 ND_NEIGHBOR_ADVERT, 2671 ill, /* ill to be used for hw addr */ 2672 B_FALSE, /* use ill_phys_addr */ 2673 &nce->nce_addr, 2674 &ipv6_all_hosts_mcast, 2675 nce_advert_flags(nce)); 2676 if (dropped) { 2677 mutex_enter(&nce->nce_lock); 2678 nce->nce_unsolicit_count++; 2679 mutex_exit(&nce->nce_lock); 2680 } 2681 if (nce->nce_unsolicit_count != 0) { 2682 NDP_RESTART_TIMER(nce, 2683 ipst->ips_ip_ndp_unsolicit_interval); 2684 } else { 2685 NDP_RESTART_TIMER(nce, 2686 ipst->ips_ip_ndp_defense_interval); 2687 } 2688 } else { 2689 mutex_exit(&nce->nce_lock); 2690 } 2691 NCE_REFRELE(nce); 2692 break; 2693 default: 2694 rw_exit(&ipst->ips_ill_g_lock); 2695 mutex_exit(&nce->nce_lock); 2696 NCE_REFRELE(nce); 2697 break; 2698 } 2699 } 2700 2701 /* 2702 * Set a link layer address from the ll_addr passed in. 2703 * Copy SAP from ill. 2704 */ 2705 static void 2706 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2707 { 2708 ill_t *ill = nce->nce_ill; 2709 uchar_t *woffset; 2710 2711 ASSERT(ll_addr != NULL); 2712 /* Always called before fast_path_probe */ 2713 ASSERT(nce->nce_fp_mp == NULL); 2714 if (ill->ill_sap_length != 0) { 2715 /* 2716 * Copy the SAP type specified in the 2717 * request into the xmit template. 2718 */ 2719 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2720 } 2721 if (ill->ill_phys_addr_length > 0) { 2722 /* 2723 * The bcopy() below used to be called for the physical address 2724 * length rather than the link layer address length. For 2725 * ethernet and many other media, the phys_addr and lla are 2726 * identical. 2727 * However, with xresolv interfaces being introduced, the 2728 * phys_addr and lla are no longer the same, and the physical 2729 * address may not have any useful meaning, so we use the lla 2730 * for IPv6 address resolution and destination addressing. 2731 * 2732 * For PPP or other interfaces with a zero length 2733 * physical address, don't do anything here. 2734 * The bcopy() with a zero phys_addr length was previously 2735 * a no-op for interfaces with a zero-length physical address. 2736 * Using the lla for them would change the way they operate. 2737 * Doing nothing in such cases preserves expected behavior. 2738 */ 2739 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2740 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2741 } 2742 } 2743 2744 static boolean_t 2745 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2746 { 2747 ill_t *ill = nce->nce_ill; 2748 uchar_t *ll_offset; 2749 2750 ASSERT(nce->nce_res_mp != NULL); 2751 if (ll_addr == NULL) 2752 return (B_FALSE); 2753 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2754 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2755 return (B_TRUE); 2756 return (B_FALSE); 2757 } 2758 2759 /* 2760 * Updates the link layer address or the reachability state of 2761 * a cache entry. Reset probe counter if needed. 2762 */ 2763 static void 2764 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2765 { 2766 ill_t *ill = nce->nce_ill; 2767 boolean_t need_stop_timer = B_FALSE; 2768 boolean_t need_fastpath_update = B_FALSE; 2769 2770 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2771 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2772 /* 2773 * If this interface does not do NUD, there is no point 2774 * in allowing an update to the cache entry. Although 2775 * we will respond to NS. 2776 * The only time we accept an update for a resolver when 2777 * NUD is turned off is when it has just been created. 2778 * Non-Resolvers will always be created as REACHABLE. 2779 */ 2780 if (new_state != ND_UNCHANGED) { 2781 if ((nce->nce_flags & NCE_F_NONUD) && 2782 (nce->nce_state != ND_INCOMPLETE)) 2783 return; 2784 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2785 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2786 need_stop_timer = B_TRUE; 2787 if (new_state == ND_REACHABLE) 2788 nce->nce_last = TICK_TO_MSEC(lbolt64); 2789 else { 2790 /* We force NUD in this case */ 2791 nce->nce_last = 0; 2792 } 2793 nce->nce_state = new_state; 2794 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2795 } 2796 /* 2797 * In case of fast path we need to free the the fastpath 2798 * M_DATA and do another probe. Otherwise we can just 2799 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2800 * whatever packets that happens to be transmitting at the time. 2801 */ 2802 if (new_ll_addr != NULL) { 2803 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2804 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2805 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2806 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2807 if (nce->nce_fp_mp != NULL) { 2808 freemsg(nce->nce_fp_mp); 2809 nce->nce_fp_mp = NULL; 2810 } 2811 need_fastpath_update = B_TRUE; 2812 } 2813 mutex_exit(&nce->nce_lock); 2814 if (need_stop_timer) { 2815 (void) untimeout(nce->nce_timeout_id); 2816 nce->nce_timeout_id = 0; 2817 } 2818 if (need_fastpath_update) 2819 nce_fastpath(nce); 2820 mutex_enter(&nce->nce_lock); 2821 } 2822 2823 void 2824 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2825 { 2826 uint_t count = 0; 2827 mblk_t **mpp; 2828 2829 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2830 2831 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2832 mpp = &(*mpp)->b_next) { 2833 if (++count > 2834 nce->nce_ill->ill_max_buf) { 2835 mblk_t *tmp = nce->nce_qd_mp->b_next; 2836 2837 nce->nce_qd_mp->b_next = NULL; 2838 nce->nce_qd_mp->b_prev = NULL; 2839 freemsg(nce->nce_qd_mp); 2840 nce->nce_qd_mp = tmp; 2841 } 2842 } 2843 /* put this on the list */ 2844 if (head_insert) { 2845 mp->b_next = nce->nce_qd_mp; 2846 nce->nce_qd_mp = mp; 2847 } else { 2848 *mpp = mp; 2849 } 2850 } 2851 2852 static void 2853 nce_queue_mp(nce_t *nce, mblk_t *mp) 2854 { 2855 boolean_t head_insert = B_FALSE; 2856 ip6_t *ip6h; 2857 ip6i_t *ip6i; 2858 mblk_t *data_mp; 2859 2860 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2861 2862 if (mp->b_datap->db_type == M_CTL) 2863 data_mp = mp->b_cont; 2864 else 2865 data_mp = mp; 2866 ip6h = (ip6_t *)data_mp->b_rptr; 2867 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2868 /* 2869 * This message should have been pulled up already in 2870 * ip_wput_v6. We can't do pullups here because the message 2871 * could be from the nce_qd_mp which could have b_next/b_prev 2872 * non-NULL. 2873 */ 2874 ip6i = (ip6i_t *)ip6h; 2875 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2876 sizeof (ip6i_t) + IPV6_HDR_LEN); 2877 /* 2878 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 2879 * This has 2 aspects mentioned below. 2880 * 1. Perform head insertion in the nce_qd_mp for these packets. 2881 * This ensures that next retransmit of ND solicitation 2882 * will use the interface specified by the probe packet, 2883 * for both NS and NA. This corresponds to the src address 2884 * in the IPv6 packet. If we insert at tail, we will be 2885 * depending on the packet at the head for successful 2886 * ND resolution. This is not reliable, because the interface 2887 * on which the NA arrives could be different from the interface 2888 * on which the NS was sent, and if the receiving interface is 2889 * failed, it will appear that the sending interface is also 2890 * failed, causing in.mpathd to misdiagnose this as link 2891 * failure. 2892 * 2. Drop the original packet, if the ND resolution did not 2893 * succeed in the first attempt. However we will create the 2894 * nce and the ire, as soon as the ND resolution succeeds. 2895 * We don't gain anything by queueing multiple probe packets 2896 * and sending them back-to-back once resolution succeeds. 2897 * It is sufficient to send just 1 packet after ND resolution 2898 * succeeds. Since mpathd is sending down probe packets at a 2899 * constant rate, we don't need to send the queued packet. We 2900 * need to queue it only for NDP resolution. The benefit of 2901 * dropping the probe packets that were delayed in ND 2902 * resolution, is that in.mpathd will not see inflated 2903 * RTT. If the ND resolution does not succeed within 2904 * in.mpathd's failure detection time, mpathd may detect 2905 * a failure, and it does not matter whether the packet 2906 * was queued or dropped. 2907 */ 2908 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2909 head_insert = B_TRUE; 2910 } 2911 2912 nce_queue_mp_common(nce, mp, head_insert); 2913 } 2914 2915 /* 2916 * Called when address resolution failed due to a timeout. 2917 * Send an ICMP unreachable in response to all queued packets. 2918 */ 2919 void 2920 nce_resolv_failed(nce_t *nce) 2921 { 2922 mblk_t *mp, *nxt_mp, *first_mp; 2923 char buf[INET6_ADDRSTRLEN]; 2924 ip6_t *ip6h; 2925 zoneid_t zoneid = GLOBAL_ZONEID; 2926 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 2927 2928 ip1dbg(("nce_resolv_failed: dst %s\n", 2929 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 2930 mutex_enter(&nce->nce_lock); 2931 mp = nce->nce_qd_mp; 2932 nce->nce_qd_mp = NULL; 2933 mutex_exit(&nce->nce_lock); 2934 while (mp != NULL) { 2935 nxt_mp = mp->b_next; 2936 mp->b_next = NULL; 2937 mp->b_prev = NULL; 2938 2939 first_mp = mp; 2940 if (mp->b_datap->db_type == M_CTL) { 2941 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 2942 ASSERT(io->ipsec_out_type == IPSEC_OUT); 2943 zoneid = io->ipsec_out_zoneid; 2944 ASSERT(zoneid != ALL_ZONES); 2945 mp = mp->b_cont; 2946 mp->b_next = NULL; 2947 mp->b_prev = NULL; 2948 } 2949 2950 ip6h = (ip6_t *)mp->b_rptr; 2951 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2952 ip6i_t *ip6i; 2953 /* 2954 * This message should have been pulled up already 2955 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 2956 * the header is pulled up. 2957 */ 2958 ip6i = (ip6i_t *)ip6h; 2959 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 2960 sizeof (ip6i_t) + IPV6_HDR_LEN); 2961 mp->b_rptr += sizeof (ip6i_t); 2962 } 2963 /* 2964 * Ignore failure since icmp_unreachable_v6 will silently 2965 * drop packets with an unspecified source address. 2966 */ 2967 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst); 2968 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 2969 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst); 2970 mp = nxt_mp; 2971 } 2972 } 2973 2974 /* 2975 * Called by SIOCSNDP* ioctl to add/change an nce entry 2976 * and the corresponding attributes. 2977 * Disallow states other than ND_REACHABLE or ND_STALE. 2978 */ 2979 int 2980 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 2981 { 2982 sin6_t *sin6; 2983 in6_addr_t *addr; 2984 nce_t *nce; 2985 int err; 2986 uint16_t new_flags = 0; 2987 uint16_t old_flags = 0; 2988 int inflags = lnr->lnr_flags; 2989 ip_stack_t *ipst = ill->ill_ipst; 2990 2991 ASSERT(ill->ill_isv6); 2992 if ((lnr->lnr_state_create != ND_REACHABLE) && 2993 (lnr->lnr_state_create != ND_STALE)) 2994 return (EINVAL); 2995 2996 sin6 = (sin6_t *)&lnr->lnr_addr; 2997 addr = &sin6->sin6_addr; 2998 2999 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 3000 /* We know it can not be mapping so just look in the hash table */ 3001 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 3002 nce = nce_lookup_addr(ill, addr, nce); 3003 if (nce != NULL) 3004 new_flags = nce->nce_flags; 3005 3006 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 3007 case NDF_ISROUTER_ON: 3008 new_flags |= NCE_F_ISROUTER; 3009 break; 3010 case NDF_ISROUTER_OFF: 3011 new_flags &= ~NCE_F_ISROUTER; 3012 break; 3013 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3014 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3015 if (nce != NULL) 3016 NCE_REFRELE(nce); 3017 return (EINVAL); 3018 } 3019 3020 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3021 case NDF_ANYCAST_ON: 3022 new_flags |= NCE_F_ANYCAST; 3023 break; 3024 case NDF_ANYCAST_OFF: 3025 new_flags &= ~NCE_F_ANYCAST; 3026 break; 3027 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3028 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3029 if (nce != NULL) 3030 NCE_REFRELE(nce); 3031 return (EINVAL); 3032 } 3033 3034 if (nce == NULL) { 3035 err = ndp_add_v6(ill, 3036 (uchar_t *)lnr->lnr_hdw_addr, 3037 addr, 3038 &ipv6_all_ones, 3039 &ipv6_all_zeros, 3040 0, 3041 new_flags, 3042 lnr->lnr_state_create, 3043 &nce); 3044 if (err != 0) { 3045 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3046 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3047 return (err); 3048 } 3049 } 3050 old_flags = nce->nce_flags; 3051 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3052 /* 3053 * Router turned to host, delete all ires. 3054 * XXX Just delete the entry, but we need to add too. 3055 */ 3056 nce->nce_flags &= ~NCE_F_ISROUTER; 3057 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3058 ndp_delete(nce); 3059 NCE_REFRELE(nce); 3060 return (0); 3061 } 3062 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3063 3064 mutex_enter(&nce->nce_lock); 3065 nce->nce_flags = new_flags; 3066 mutex_exit(&nce->nce_lock); 3067 /* 3068 * Note that we ignore the state at this point, which 3069 * should be either STALE or REACHABLE. Instead we let 3070 * the link layer address passed in to determine the state 3071 * much like incoming packets. 3072 */ 3073 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3074 NCE_REFRELE(nce); 3075 return (0); 3076 } 3077 3078 /* 3079 * If the device driver supports it, we make nce_fp_mp to have 3080 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3081 * The caller ensures there is hold on nce for this function. 3082 * Note that since ill_fastpath_probe() copies the mblk there is 3083 * no need for the hold beyond this function. 3084 */ 3085 void 3086 nce_fastpath(nce_t *nce) 3087 { 3088 ill_t *ill = nce->nce_ill; 3089 int res; 3090 3091 ASSERT(ill != NULL); 3092 ASSERT(nce->nce_state != ND_INITIAL && nce->nce_state != ND_INCOMPLETE); 3093 3094 if (nce->nce_fp_mp != NULL) { 3095 /* Already contains fastpath info */ 3096 return; 3097 } 3098 if (nce->nce_res_mp != NULL) { 3099 nce_fastpath_list_add(nce); 3100 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3101 /* 3102 * EAGAIN is an indication of a transient error 3103 * i.e. allocation failure etc. leave the nce in the list it 3104 * will be updated when another probe happens for another ire 3105 * if not it will be taken out of the list when the ire is 3106 * deleted. 3107 */ 3108 3109 if (res != 0 && res != EAGAIN) 3110 nce_fastpath_list_delete(nce); 3111 } 3112 } 3113 3114 /* 3115 * Drain the list of nce's waiting for fastpath response. 3116 */ 3117 void 3118 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3119 void *arg) 3120 { 3121 3122 nce_t *next_nce; 3123 nce_t *current_nce; 3124 nce_t *first_nce; 3125 nce_t *prev_nce = NULL; 3126 3127 mutex_enter(&ill->ill_lock); 3128 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3129 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3130 next_nce = current_nce->nce_fastpath; 3131 /* 3132 * Take it off the list if we're flushing, or if the callback 3133 * routine tells us to do so. Otherwise, leave the nce in the 3134 * fastpath list to handle any pending response from the lower 3135 * layer. We can't drain the list when the callback routine 3136 * comparison failed, because the response is asynchronous in 3137 * nature, and may not arrive in the same order as the list 3138 * insertion. 3139 */ 3140 if (func == NULL || func(current_nce, arg)) { 3141 current_nce->nce_fastpath = NULL; 3142 if (current_nce == first_nce) 3143 ill->ill_fastpath_list = first_nce = next_nce; 3144 else 3145 prev_nce->nce_fastpath = next_nce; 3146 } else { 3147 /* previous element that is still in the list */ 3148 prev_nce = current_nce; 3149 } 3150 current_nce = next_nce; 3151 } 3152 mutex_exit(&ill->ill_lock); 3153 } 3154 3155 /* 3156 * Add nce to the nce fastpath list. 3157 */ 3158 void 3159 nce_fastpath_list_add(nce_t *nce) 3160 { 3161 ill_t *ill; 3162 3163 ill = nce->nce_ill; 3164 3165 mutex_enter(&ill->ill_lock); 3166 mutex_enter(&nce->nce_lock); 3167 3168 /* 3169 * if nce has not been deleted and 3170 * is not already in the list add it. 3171 */ 3172 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3173 (nce->nce_fastpath == NULL)) { 3174 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3175 ill->ill_fastpath_list = nce; 3176 } 3177 3178 mutex_exit(&nce->nce_lock); 3179 mutex_exit(&ill->ill_lock); 3180 } 3181 3182 /* 3183 * remove nce from the nce fastpath list. 3184 */ 3185 void 3186 nce_fastpath_list_delete(nce_t *nce) 3187 { 3188 nce_t *nce_ptr; 3189 3190 ill_t *ill; 3191 3192 ill = nce->nce_ill; 3193 ASSERT(ill != NULL); 3194 3195 mutex_enter(&ill->ill_lock); 3196 if (nce->nce_fastpath == NULL) 3197 goto done; 3198 3199 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3200 3201 if (ill->ill_fastpath_list == nce) { 3202 ill->ill_fastpath_list = nce->nce_fastpath; 3203 } else { 3204 nce_ptr = ill->ill_fastpath_list; 3205 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3206 if (nce_ptr->nce_fastpath == nce) { 3207 nce_ptr->nce_fastpath = nce->nce_fastpath; 3208 break; 3209 } 3210 nce_ptr = nce_ptr->nce_fastpath; 3211 } 3212 } 3213 3214 nce->nce_fastpath = NULL; 3215 done: 3216 mutex_exit(&ill->ill_lock); 3217 } 3218 3219 /* 3220 * Update all NCE's that are not in fastpath mode and 3221 * have an nce_fp_mp that matches mp. mp->b_cont contains 3222 * the fastpath header. 3223 * 3224 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3225 */ 3226 boolean_t 3227 ndp_fastpath_update(nce_t *nce, void *arg) 3228 { 3229 mblk_t *mp, *fp_mp; 3230 uchar_t *mp_rptr, *ud_mp_rptr; 3231 mblk_t *ud_mp = nce->nce_res_mp; 3232 ptrdiff_t cmplen; 3233 3234 if (nce->nce_flags & NCE_F_MAPPING) 3235 return (B_TRUE); 3236 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3237 return (B_TRUE); 3238 3239 ip2dbg(("ndp_fastpath_update: trying\n")); 3240 mp = (mblk_t *)arg; 3241 mp_rptr = mp->b_rptr; 3242 cmplen = mp->b_wptr - mp_rptr; 3243 ASSERT(cmplen >= 0); 3244 ud_mp_rptr = ud_mp->b_rptr; 3245 /* 3246 * The nce is locked here to prevent any other threads 3247 * from accessing and changing nce_res_mp when the IPv6 address 3248 * becomes resolved to an lla while we're in the middle 3249 * of looking at and comparing the hardware address (lla). 3250 * It is also locked to prevent multiple threads in nce_fastpath_update 3251 * from examining nce_res_mp atthe same time. 3252 */ 3253 mutex_enter(&nce->nce_lock); 3254 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3255 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3256 mutex_exit(&nce->nce_lock); 3257 /* 3258 * Don't take the ire off the fastpath list yet, 3259 * since the response may come later. 3260 */ 3261 return (B_FALSE); 3262 } 3263 /* Matched - install mp as the fastpath mp */ 3264 ip1dbg(("ndp_fastpath_update: match\n")); 3265 fp_mp = dupb(mp->b_cont); 3266 if (fp_mp != NULL) { 3267 nce->nce_fp_mp = fp_mp; 3268 } 3269 mutex_exit(&nce->nce_lock); 3270 return (B_TRUE); 3271 } 3272 3273 /* 3274 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3275 * driver. Note that it assumes IP is exclusive... 3276 */ 3277 /* ARGSUSED */ 3278 void 3279 ndp_fastpath_flush(nce_t *nce, char *arg) 3280 { 3281 if (nce->nce_flags & NCE_F_MAPPING) 3282 return; 3283 /* No fastpath info? */ 3284 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3285 return; 3286 3287 if (nce->nce_ipversion == IPV4_VERSION && 3288 nce->nce_flags & NCE_F_BCAST) { 3289 /* 3290 * IPv4 BROADCAST entries: 3291 * We can't delete the nce since it is difficult to 3292 * recreate these without going through the 3293 * ipif down/up dance. 3294 * 3295 * All access to nce->nce_fp_mp in the case of these 3296 * is protected by nce_lock. 3297 */ 3298 mutex_enter(&nce->nce_lock); 3299 if (nce->nce_fp_mp != NULL) { 3300 freeb(nce->nce_fp_mp); 3301 nce->nce_fp_mp = NULL; 3302 mutex_exit(&nce->nce_lock); 3303 nce_fastpath(nce); 3304 } else { 3305 mutex_exit(&nce->nce_lock); 3306 } 3307 } else { 3308 /* Just delete the NCE... */ 3309 ndp_delete(nce); 3310 } 3311 } 3312 3313 /* 3314 * Return a pointer to a given option in the packet. 3315 * Assumes that option part of the packet have already been validated. 3316 */ 3317 nd_opt_hdr_t * 3318 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3319 { 3320 while (optlen > 0) { 3321 if (opt->nd_opt_type == opt_type) 3322 return (opt); 3323 optlen -= 8 * opt->nd_opt_len; 3324 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3325 } 3326 return (NULL); 3327 } 3328 3329 /* 3330 * Verify all option lengths present are > 0, also check to see 3331 * if the option lengths and packet length are consistent. 3332 */ 3333 boolean_t 3334 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3335 { 3336 ASSERT(opt != NULL); 3337 while (optlen > 0) { 3338 if (opt->nd_opt_len == 0) 3339 return (B_FALSE); 3340 optlen -= 8 * opt->nd_opt_len; 3341 if (optlen < 0) 3342 return (B_FALSE); 3343 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3344 } 3345 return (B_TRUE); 3346 } 3347 3348 /* 3349 * ndp_walk function. 3350 * Free a fraction of the NCE cache entries. 3351 * A fraction of zero means to not free any in that category. 3352 */ 3353 void 3354 ndp_cache_reclaim(nce_t *nce, char *arg) 3355 { 3356 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3357 uint_t rand; 3358 3359 if (nce->nce_flags & NCE_F_PERMANENT) 3360 return; 3361 3362 rand = (uint_t)lbolt + 3363 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3364 if (ncr->ncr_host != 0 && 3365 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3366 ndp_delete(nce); 3367 return; 3368 } 3369 } 3370 3371 /* 3372 * ndp_walk function. 3373 * Count the number of NCEs that can be deleted. 3374 * These would be hosts but not routers. 3375 */ 3376 void 3377 ndp_cache_count(nce_t *nce, char *arg) 3378 { 3379 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3380 3381 if (nce->nce_flags & NCE_F_PERMANENT) 3382 return; 3383 3384 ncc->ncc_total++; 3385 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3386 ncc->ncc_host++; 3387 } 3388 3389 #ifdef DEBUG 3390 void 3391 nce_trace_ref(nce_t *nce) 3392 { 3393 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3394 3395 if (nce->nce_trace_disable) 3396 return; 3397 3398 if (!th_trace_ref(nce, nce->nce_ill->ill_ipst)) { 3399 nce->nce_trace_disable = B_TRUE; 3400 nce_trace_cleanup(nce); 3401 } 3402 } 3403 3404 void 3405 nce_untrace_ref(nce_t *nce) 3406 { 3407 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3408 3409 if (!nce->nce_trace_disable) 3410 th_trace_unref(nce); 3411 } 3412 3413 static void 3414 nce_trace_cleanup(const nce_t *nce) 3415 { 3416 th_trace_cleanup(nce, nce->nce_trace_disable); 3417 } 3418 #endif 3419 3420 /* 3421 * Called when address resolution fails due to a timeout. 3422 * Send an ICMP unreachable in response to all queued packets. 3423 */ 3424 void 3425 arp_resolv_failed(nce_t *nce) 3426 { 3427 mblk_t *mp, *nxt_mp, *first_mp; 3428 char buf[INET6_ADDRSTRLEN]; 3429 zoneid_t zoneid = GLOBAL_ZONEID; 3430 struct in_addr ipv4addr; 3431 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3432 3433 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3434 ip3dbg(("arp_resolv_failed: dst %s\n", 3435 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3436 mutex_enter(&nce->nce_lock); 3437 mp = nce->nce_qd_mp; 3438 nce->nce_qd_mp = NULL; 3439 mutex_exit(&nce->nce_lock); 3440 3441 while (mp != NULL) { 3442 nxt_mp = mp->b_next; 3443 mp->b_next = NULL; 3444 mp->b_prev = NULL; 3445 3446 first_mp = mp; 3447 /* 3448 * Send icmp unreachable messages 3449 * to the hosts. 3450 */ 3451 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst); 3452 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3453 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3454 ICMP_HOST_UNREACHABLE, zoneid, ipst); 3455 mp = nxt_mp; 3456 } 3457 } 3458 3459 int 3460 ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3461 nce_t **newnce, nce_t *src_nce) 3462 { 3463 int err; 3464 nce_t *nce; 3465 in6_addr_t addr6; 3466 ip_stack_t *ipst = ill->ill_ipst; 3467 3468 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3469 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3470 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3471 nce = nce_lookup_addr(ill, &addr6, nce); 3472 if (nce == NULL) { 3473 err = ndp_add_v4(ill, addr, flags, newnce, src_nce); 3474 } else { 3475 *newnce = nce; 3476 err = EEXIST; 3477 } 3478 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3479 return (err); 3480 } 3481 3482 /* 3483 * NDP Cache Entry creation routine for IPv4. 3484 * Mapped entries are handled in arp. 3485 * This routine must always be called with ndp4->ndp_g_lock held. 3486 * Prior to return, nce_refcnt is incremented. 3487 */ 3488 static int 3489 ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3490 nce_t **newnce, nce_t *src_nce) 3491 { 3492 static nce_t nce_nil; 3493 nce_t *nce; 3494 mblk_t *mp; 3495 mblk_t *template = NULL; 3496 nce_t **ncep; 3497 ip_stack_t *ipst = ill->ill_ipst; 3498 uint16_t state = ND_INITIAL; 3499 int err; 3500 3501 ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock)); 3502 ASSERT(!ill->ill_isv6); 3503 ASSERT((flags & NCE_F_MAPPING) == 0); 3504 3505 if (ill->ill_resolver_mp == NULL) 3506 return (EINVAL); 3507 /* 3508 * Allocate the mblk to hold the nce. 3509 */ 3510 mp = allocb(sizeof (nce_t), BPRI_MED); 3511 if (mp == NULL) 3512 return (ENOMEM); 3513 3514 nce = (nce_t *)mp->b_rptr; 3515 mp->b_wptr = (uchar_t *)&nce[1]; 3516 *nce = nce_nil; 3517 nce->nce_ill = ill; 3518 nce->nce_ipversion = IPV4_VERSION; 3519 nce->nce_flags = flags; 3520 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3521 nce->nce_rcnt = ill->ill_xmit_count; 3522 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3523 nce->nce_mask = ipv6_all_ones; 3524 nce->nce_extract_mask = ipv6_all_zeros; 3525 nce->nce_ll_extract_start = 0; 3526 nce->nce_qd_mp = NULL; 3527 nce->nce_mp = mp; 3528 /* This one is for nce getting created */ 3529 nce->nce_refcnt = 1; 3530 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3531 ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3532 3533 nce->nce_trace_disable = B_FALSE; 3534 3535 if (src_nce != NULL) { 3536 /* 3537 * src_nce has been provided by the caller. The only 3538 * caller who provides a non-null, non-broadcast 3539 * src_nce is from ip_newroute() which must pass in 3540 * a ND_REACHABLE src_nce (this condition is verified 3541 * via an ASSERT for the save_ire->ire_nce in ip_newroute()) 3542 */ 3543 mutex_enter(&src_nce->nce_lock); 3544 state = src_nce->nce_state; 3545 if ((src_nce->nce_flags & NCE_F_CONDEMNED) || 3546 (ipst->ips_ndp4->ndp_g_hw_change > 0)) { 3547 /* 3548 * src_nce has been deleted, or 3549 * ip_arp_news is in the middle of 3550 * flushing entries in the the nce. 3551 * Fail the add, since we don't know 3552 * if it is safe to copy the contents of 3553 * src_nce 3554 */ 3555 DTRACE_PROBE2(nce__bad__src__nce, 3556 nce_t *, src_nce, ill_t *, ill); 3557 mutex_exit(&src_nce->nce_lock); 3558 err = EINVAL; 3559 goto err_ret; 3560 } 3561 template = copyb(src_nce->nce_res_mp); 3562 mutex_exit(&src_nce->nce_lock); 3563 if (template == NULL) { 3564 err = ENOMEM; 3565 goto err_ret; 3566 } 3567 } else if (flags & NCE_F_BCAST) { 3568 /* 3569 * broadcast nce. 3570 */ 3571 template = copyb(ill->ill_bcast_mp); 3572 if (template == NULL) { 3573 err = ENOMEM; 3574 goto err_ret; 3575 } 3576 state = ND_REACHABLE; 3577 } else if (ill->ill_net_type == IRE_IF_NORESOLVER) { 3578 /* 3579 * NORESOLVER entries are always created in the REACHABLE 3580 * state. We create a nce_res_mp with the IP nexthop address 3581 * in the destination address in the DLPI hdr if the 3582 * physical length is exactly 4 bytes. 3583 * 3584 * XXX not clear which drivers set ill_phys_addr_length to 3585 * IP_ADDR_LEN. 3586 */ 3587 if (ill->ill_phys_addr_length == IP_ADDR_LEN) { 3588 template = ill_dlur_gen((uchar_t *)addr, 3589 ill->ill_phys_addr_length, 3590 ill->ill_sap, ill->ill_sap_length); 3591 } else { 3592 template = copyb(ill->ill_resolver_mp); 3593 } 3594 if (template == NULL) { 3595 err = ENOMEM; 3596 goto err_ret; 3597 } 3598 state = ND_REACHABLE; 3599 } 3600 nce->nce_fp_mp = NULL; 3601 nce->nce_res_mp = template; 3602 nce->nce_state = state; 3603 if (state == ND_REACHABLE) { 3604 nce->nce_last = TICK_TO_MSEC(lbolt64); 3605 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3606 } else { 3607 nce->nce_last = 0; 3608 if (state == ND_INITIAL) 3609 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3610 } 3611 3612 ASSERT((nce->nce_res_mp == NULL && nce->nce_state == ND_INITIAL) || 3613 (nce->nce_res_mp != NULL && nce->nce_state == ND_REACHABLE)); 3614 /* 3615 * Atomically ensure that the ill is not CONDEMNED, before 3616 * adding the NCE. 3617 */ 3618 mutex_enter(&ill->ill_lock); 3619 if (ill->ill_state_flags & ILL_CONDEMNED) { 3620 mutex_exit(&ill->ill_lock); 3621 err = EINVAL; 3622 goto err_ret; 3623 } 3624 if ((nce->nce_next = *ncep) != NULL) 3625 nce->nce_next->nce_ptpn = &nce->nce_next; 3626 *ncep = nce; 3627 nce->nce_ptpn = ncep; 3628 *newnce = nce; 3629 /* This one is for nce being used by an active thread */ 3630 NCE_REFHOLD(*newnce); 3631 3632 /* Bump up the number of nce's referencing this ill */ 3633 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 3634 (char *), "nce", (void *), nce); 3635 ill->ill_nce_cnt++; 3636 mutex_exit(&ill->ill_lock); 3637 DTRACE_PROBE1(ndp__add__v4, nce_t *, nce); 3638 return (0); 3639 err_ret: 3640 freeb(mp); 3641 freemsg(template); 3642 return (err); 3643 } 3644 3645 /* 3646 * ndp_walk routine to delete all entries that have a given destination or 3647 * gateway address and cached link layer (MAC) address. This is used when ARP 3648 * informs us that a network-to-link-layer mapping may have changed. 3649 */ 3650 void 3651 nce_delete_hw_changed(nce_t *nce, void *arg) 3652 { 3653 nce_hw_map_t *hwm = arg; 3654 mblk_t *mp; 3655 dl_unitdata_req_t *dlu; 3656 uchar_t *macaddr; 3657 ill_t *ill; 3658 int saplen; 3659 ipaddr_t nce_addr; 3660 3661 if (nce->nce_state != ND_REACHABLE) 3662 return; 3663 3664 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3665 if (nce_addr != hwm->hwm_addr) 3666 return; 3667 3668 mutex_enter(&nce->nce_lock); 3669 if ((mp = nce->nce_res_mp) == NULL) { 3670 mutex_exit(&nce->nce_lock); 3671 return; 3672 } 3673 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3674 macaddr = (uchar_t *)(dlu + 1); 3675 ill = nce->nce_ill; 3676 if ((saplen = ill->ill_sap_length) > 0) 3677 macaddr += saplen; 3678 else 3679 saplen = -saplen; 3680 3681 /* 3682 * If the hardware address is unchanged, then leave this one alone. 3683 * Note that saplen == abs(saplen) now. 3684 */ 3685 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 3686 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 3687 mutex_exit(&nce->nce_lock); 3688 return; 3689 } 3690 mutex_exit(&nce->nce_lock); 3691 3692 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 3693 ndp_delete(nce); 3694 } 3695 3696 /* 3697 * This function verifies whether a given IPv4 address is potentially known to 3698 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 3699 * so that it can continue to look for hardware changes on that address. 3700 */ 3701 boolean_t 3702 ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns) 3703 { 3704 nce_t *nce; 3705 struct in_addr nceaddr; 3706 ip_stack_t *ipst = ns->netstack_ip; 3707 3708 if (addr == INADDR_ANY) 3709 return (B_FALSE); 3710 3711 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3712 nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr); 3713 for (; nce != NULL; nce = nce->nce_next) { 3714 /* Note that only v4 mapped entries are in the table. */ 3715 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 3716 if (addr == nceaddr.s_addr && 3717 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 3718 /* Single flag check; no lock needed */ 3719 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 3720 break; 3721 } 3722 } 3723 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3724 return (nce != NULL); 3725 } 3726