1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/sysmacros.h> 33 #include <sys/errno.h> 34 #include <sys/dlpi.h> 35 #include <sys/socket.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/vtrace.h> 41 #include <sys/kmem.h> 42 #include <sys/zone.h> 43 #include <sys/ethernet.h> 44 #include <sys/sdt.h> 45 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/if_dl.h> 49 #include <net/route.h> 50 #include <netinet/in.h> 51 #include <netinet/ip6.h> 52 #include <netinet/icmp6.h> 53 54 #include <inet/common.h> 55 #include <inet/mi.h> 56 #include <inet/mib2.h> 57 #include <inet/nd.h> 58 #include <inet/ip.h> 59 #include <inet/ip_impl.h> 60 #include <inet/ipclassifier.h> 61 #include <inet/ip_if.h> 62 #include <inet/ip_ire.h> 63 #include <inet/ip_rts.h> 64 #include <inet/ip6.h> 65 #include <inet/ip_ndp.h> 66 #include <inet/ipsec_impl.h> 67 #include <inet/ipsec_info.h> 68 #include <inet/sctp_ip.h> 69 70 /* 71 * Function names with nce_ prefix are static while function 72 * names with ndp_ prefix are used by rest of the IP. 73 * 74 * Lock ordering: 75 * 76 * ndp_g_lock -> ill_lock -> nce_lock 77 * 78 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 79 * nce_next. Nce_lock protects the contents of the NCE (particularly 80 * nce_refcnt). 81 */ 82 83 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 84 uint32_t ll_addr_len); 85 static void nce_ire_delete(nce_t *nce); 86 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 87 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 88 static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *, nce_t *); 89 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 90 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 91 uchar_t *addr); 92 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 93 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 94 static mblk_t *nce_udreq_alloc(ill_t *ill); 95 static void nce_update(nce_t *nce, uint16_t new_state, 96 uchar_t *new_ll_addr); 97 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 98 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 99 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 100 const in6_addr_t *target, int flag); 101 static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t, 102 nce_t **, nce_t *); 103 104 #ifdef DEBUG 105 static void nce_trace_cleanup(const nce_t *); 106 #endif 107 108 #define NCE_HASH_PTR_V4(ipst, addr) \ 109 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 110 111 #define NCE_HASH_PTR_V6(ipst, addr) \ 112 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ 113 NCE_TABLE_SIZE)])) 114 115 /* 116 * Compute default flags to use for an advertisement of this nce's address. 117 */ 118 static int 119 nce_advert_flags(const nce_t *nce) 120 { 121 int flag = 0; 122 123 if (nce->nce_flags & NCE_F_ISROUTER) 124 flag |= NDP_ISROUTER; 125 return (flag); 126 } 127 128 /* Non-tunable probe interval, based on link capabilities */ 129 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 130 131 /* 132 * NDP Cache Entry creation routine. 133 * Mapped entries will never do NUD . 134 * This routine must always be called with ndp6->ndp_g_lock held. 135 * Prior to return, nce_refcnt is incremented. 136 */ 137 int 138 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 139 const in6_addr_t *mask, const in6_addr_t *extract_mask, 140 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 141 nce_t **newnce) 142 { 143 static nce_t nce_nil; 144 nce_t *nce; 145 mblk_t *mp; 146 mblk_t *template; 147 nce_t **ncep; 148 int err; 149 boolean_t dropped = B_FALSE; 150 ip_stack_t *ipst = ill->ill_ipst; 151 152 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 153 ASSERT(ill != NULL && ill->ill_isv6); 154 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 155 ip0dbg(("ndp_add_v6: no addr\n")); 156 return (EINVAL); 157 } 158 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 159 ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags)); 160 return (EINVAL); 161 } 162 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 163 (flags & NCE_F_MAPPING)) { 164 ip0dbg(("ndp_add_v6: extract mask zero for mapping")); 165 return (EINVAL); 166 } 167 /* 168 * Allocate the mblk to hold the nce. 169 * 170 * XXX This can come out of a separate cache - nce_cache. 171 * We don't need the mp anymore as there are no more 172 * "qwriter"s 173 */ 174 mp = allocb(sizeof (nce_t), BPRI_MED); 175 if (mp == NULL) 176 return (ENOMEM); 177 178 nce = (nce_t *)mp->b_rptr; 179 mp->b_wptr = (uchar_t *)&nce[1]; 180 *nce = nce_nil; 181 182 /* 183 * This one holds link layer address 184 */ 185 if (ill->ill_net_type == IRE_IF_RESOLVER) { 186 template = nce_udreq_alloc(ill); 187 } else { 188 if (ill->ill_resolver_mp == NULL) { 189 freeb(mp); 190 return (EINVAL); 191 } 192 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 193 template = copyb(ill->ill_resolver_mp); 194 } 195 if (template == NULL) { 196 freeb(mp); 197 return (ENOMEM); 198 } 199 nce->nce_ill = ill; 200 nce->nce_ipversion = IPV6_VERSION; 201 nce->nce_flags = flags; 202 nce->nce_state = state; 203 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 204 nce->nce_rcnt = ill->ill_xmit_count; 205 nce->nce_addr = *addr; 206 nce->nce_mask = *mask; 207 nce->nce_extract_mask = *extract_mask; 208 nce->nce_ll_extract_start = hw_extract_start; 209 nce->nce_fp_mp = NULL; 210 nce->nce_res_mp = template; 211 if (state == ND_REACHABLE) 212 nce->nce_last = TICK_TO_MSEC(lbolt64); 213 else 214 nce->nce_last = 0; 215 nce->nce_qd_mp = NULL; 216 nce->nce_mp = mp; 217 if (hw_addr != NULL) 218 nce_set_ll(nce, hw_addr); 219 /* This one is for nce getting created */ 220 nce->nce_refcnt = 1; 221 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 222 if (nce->nce_flags & NCE_F_MAPPING) { 223 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 224 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 225 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 226 ncep = &ipst->ips_ndp6->nce_mask_entries; 227 } else { 228 ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 229 } 230 231 nce->nce_trace_disable = B_FALSE; 232 233 /* 234 * Atomically ensure that the ill is not CONDEMNED, before 235 * adding the NCE. 236 */ 237 mutex_enter(&ill->ill_lock); 238 if (ill->ill_state_flags & ILL_CONDEMNED) { 239 mutex_exit(&ill->ill_lock); 240 freeb(mp); 241 freeb(template); 242 return (EINVAL); 243 } 244 if ((nce->nce_next = *ncep) != NULL) 245 nce->nce_next->nce_ptpn = &nce->nce_next; 246 *ncep = nce; 247 nce->nce_ptpn = ncep; 248 *newnce = nce; 249 /* This one is for nce being used by an active thread */ 250 NCE_REFHOLD(*newnce); 251 252 /* Bump up the number of nce's referencing this ill */ 253 ill->ill_nce_cnt++; 254 mutex_exit(&ill->ill_lock); 255 256 err = 0; 257 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 258 mutex_enter(&nce->nce_lock); 259 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 260 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 261 mutex_exit(&nce->nce_lock); 262 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 263 &ipv6_all_zeros, addr, NDP_PROBE); 264 if (dropped) { 265 mutex_enter(&nce->nce_lock); 266 nce->nce_pcnt++; 267 mutex_exit(&nce->nce_lock); 268 } 269 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 270 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 271 err = EINPROGRESS; 272 } else if (flags & NCE_F_UNSOL_ADV) { 273 /* 274 * We account for the transmit below by assigning one 275 * less than the ndd variable. Subsequent decrements 276 * are done in ndp_timer. 277 */ 278 mutex_enter(&nce->nce_lock); 279 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 280 nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1; 281 mutex_exit(&nce->nce_lock); 282 dropped = nce_xmit(ill, 283 ND_NEIGHBOR_ADVERT, 284 ill, /* ill to be used for extracting ill_nd_lla */ 285 B_TRUE, /* use ill_nd_lla */ 286 addr, /* Source and target of the advertisement pkt */ 287 &ipv6_all_hosts_mcast, /* Destination of the packet */ 288 nce_advert_flags(nce)); 289 mutex_enter(&nce->nce_lock); 290 if (dropped) 291 nce->nce_unsolicit_count++; 292 if (nce->nce_unsolicit_count != 0) { 293 nce->nce_timeout_id = timeout(ndp_timer, nce, 294 MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval)); 295 } 296 mutex_exit(&nce->nce_lock); 297 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 298 } 299 /* 300 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 301 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 302 * We call nce_fastpath from nce_update if the link layer address of 303 * the peer changes from nce_update 304 */ 305 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 306 nce_fastpath(nce); 307 return (err); 308 } 309 310 int 311 ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 312 const in6_addr_t *mask, const in6_addr_t *extract_mask, 313 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 314 nce_t **newnce) 315 { 316 int err = 0; 317 nce_t *nce; 318 ip_stack_t *ipst = ill->ill_ipst; 319 320 ASSERT(ill->ill_isv6); 321 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 322 323 /* Get head of v6 hash table */ 324 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 325 nce = nce_lookup_addr(ill, addr, nce); 326 if (nce == NULL) { 327 err = ndp_add_v6(ill, 328 hw_addr, 329 addr, 330 mask, 331 extract_mask, 332 hw_extract_start, 333 flags, 334 state, 335 newnce); 336 } else { 337 *newnce = nce; 338 err = EEXIST; 339 } 340 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 341 return (err); 342 } 343 344 /* 345 * Remove all the CONDEMNED nces from the appropriate hash table. 346 * We create a private list of NCEs, these may have ires pointing 347 * to them, so the list will be passed through to clean up dependent 348 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 349 */ 350 static void 351 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 352 { 353 nce_t *nce1; 354 nce_t **ptpn; 355 356 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 357 ASSERT(ndp->ndp_g_walker == 0); 358 for (; nce; nce = nce1) { 359 nce1 = nce->nce_next; 360 mutex_enter(&nce->nce_lock); 361 if (nce->nce_flags & NCE_F_CONDEMNED) { 362 ptpn = nce->nce_ptpn; 363 nce1 = nce->nce_next; 364 if (nce1 != NULL) 365 nce1->nce_ptpn = ptpn; 366 *ptpn = nce1; 367 nce->nce_ptpn = NULL; 368 nce->nce_next = NULL; 369 nce->nce_next = *free_nce_list; 370 *free_nce_list = nce; 371 } 372 mutex_exit(&nce->nce_lock); 373 } 374 } 375 376 /* 377 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 378 * will return this NCE. Also no new IREs will be created that 379 * point to this NCE (See ire_add_v6). Also no new timeouts will 380 * be started (See NDP_RESTART_TIMER). 381 * 2. Cancel any currently running timeouts. 382 * 3. If there is an ndp walker, return. The walker will do the cleanup. 383 * This ensures that walkers see a consistent list of NCEs while walking. 384 * 4. Otherwise remove the NCE from the list of NCEs 385 * 5. Delete all IREs pointing to this NCE. 386 */ 387 void 388 ndp_delete(nce_t *nce) 389 { 390 nce_t **ptpn; 391 nce_t *nce1; 392 int ipversion = nce->nce_ipversion; 393 ndp_g_t *ndp; 394 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 395 396 if (ipversion == IPV4_VERSION) 397 ndp = ipst->ips_ndp4; 398 else 399 ndp = ipst->ips_ndp6; 400 401 /* Serialize deletes */ 402 mutex_enter(&nce->nce_lock); 403 if (nce->nce_flags & NCE_F_CONDEMNED) { 404 /* Some other thread is doing the delete */ 405 mutex_exit(&nce->nce_lock); 406 return; 407 } 408 /* 409 * Caller has a refhold. Also 1 ref for being in the list. Thus 410 * refcnt has to be >= 2 411 */ 412 ASSERT(nce->nce_refcnt >= 2); 413 nce->nce_flags |= NCE_F_CONDEMNED; 414 mutex_exit(&nce->nce_lock); 415 416 nce_fastpath_list_delete(nce); 417 418 /* 419 * Cancel any running timer. Timeout can't be restarted 420 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 421 * Passing invalid timeout id is fine. 422 */ 423 if (nce->nce_timeout_id != 0) { 424 (void) untimeout(nce->nce_timeout_id); 425 nce->nce_timeout_id = 0; 426 } 427 428 mutex_enter(&ndp->ndp_g_lock); 429 if (nce->nce_ptpn == NULL) { 430 /* 431 * The last ndp walker has already removed this nce from 432 * the list after we marked the nce CONDEMNED and before 433 * we grabbed the global lock. 434 */ 435 mutex_exit(&ndp->ndp_g_lock); 436 return; 437 } 438 if (ndp->ndp_g_walker > 0) { 439 /* 440 * Can't unlink. The walker will clean up 441 */ 442 ndp->ndp_g_walker_cleanup = B_TRUE; 443 mutex_exit(&ndp->ndp_g_lock); 444 return; 445 } 446 447 /* 448 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 449 * the timer since it is marked CONDEMNED. 450 */ 451 ptpn = nce->nce_ptpn; 452 nce1 = nce->nce_next; 453 if (nce1 != NULL) 454 nce1->nce_ptpn = ptpn; 455 *ptpn = nce1; 456 nce->nce_ptpn = NULL; 457 nce->nce_next = NULL; 458 mutex_exit(&ndp->ndp_g_lock); 459 460 nce_ire_delete(nce); 461 } 462 463 void 464 ndp_inactive(nce_t *nce) 465 { 466 mblk_t **mpp; 467 ill_t *ill; 468 469 ASSERT(nce->nce_refcnt == 0); 470 ASSERT(MUTEX_HELD(&nce->nce_lock)); 471 ASSERT(nce->nce_fastpath == NULL); 472 473 /* Free all nce allocated messages */ 474 mpp = &nce->nce_first_mp_to_free; 475 do { 476 while (*mpp != NULL) { 477 mblk_t *mp; 478 479 mp = *mpp; 480 *mpp = mp->b_next; 481 482 inet_freemsg(mp); 483 } 484 } while (mpp++ != &nce->nce_last_mp_to_free); 485 486 #ifdef DEBUG 487 nce_trace_cleanup(nce); 488 #endif 489 490 ill = nce->nce_ill; 491 mutex_enter(&ill->ill_lock); 492 ill->ill_nce_cnt--; 493 /* 494 * If the number of nce's associated with this ill have dropped 495 * to zero, check whether we need to restart any operation that 496 * is waiting for this to happen. 497 */ 498 if (ill->ill_nce_cnt == 0) { 499 /* ipif_ill_refrele_tail drops the ill_lock */ 500 ipif_ill_refrele_tail(ill); 501 } else { 502 mutex_exit(&ill->ill_lock); 503 } 504 mutex_destroy(&nce->nce_lock); 505 if (nce->nce_mp != NULL) 506 inet_freemsg(nce->nce_mp); 507 } 508 509 /* 510 * ndp_walk routine. Delete the nce if it is associated with the ill 511 * that is going away. Always called as a writer. 512 */ 513 void 514 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 515 { 516 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 517 ndp_delete(nce); 518 } 519 } 520 521 /* 522 * Walk a list of to be inactive NCEs and blow away all the ires. 523 */ 524 static void 525 nce_ire_delete_list(nce_t *nce) 526 { 527 nce_t *nce_next; 528 529 ASSERT(nce != NULL); 530 while (nce != NULL) { 531 nce_next = nce->nce_next; 532 nce->nce_next = NULL; 533 534 /* 535 * It is possible for the last ndp walker (this thread) 536 * to come here after ndp_delete has marked the nce CONDEMNED 537 * and before it has removed the nce from the fastpath list 538 * or called untimeout. So we need to do it here. It is safe 539 * for both ndp_delete and this thread to do it twice or 540 * even simultaneously since each of the threads has a 541 * reference on the nce. 542 */ 543 nce_fastpath_list_delete(nce); 544 /* 545 * Cancel any running timer. Timeout can't be restarted 546 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 547 * Passing invalid timeout id is fine. 548 */ 549 if (nce->nce_timeout_id != 0) { 550 (void) untimeout(nce->nce_timeout_id); 551 nce->nce_timeout_id = 0; 552 } 553 /* 554 * We might hit this func thus in the v4 case: 555 * ipif_down->ipif_ndp_down->ndp_walk 556 */ 557 558 if (nce->nce_ipversion == IPV4_VERSION) { 559 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 560 IRE_CACHE, nce_ire_delete1, 561 (char *)nce, nce->nce_ill); 562 } else { 563 ASSERT(nce->nce_ipversion == IPV6_VERSION); 564 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 565 IRE_CACHE, nce_ire_delete1, 566 (char *)nce, nce->nce_ill); 567 } 568 NCE_REFRELE_NOTR(nce); 569 nce = nce_next; 570 } 571 } 572 573 /* 574 * Delete an ire when the nce goes away. 575 */ 576 /* ARGSUSED */ 577 static void 578 nce_ire_delete(nce_t *nce) 579 { 580 if (nce->nce_ipversion == IPV6_VERSION) { 581 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 582 nce_ire_delete1, (char *)nce, nce->nce_ill); 583 NCE_REFRELE_NOTR(nce); 584 } else { 585 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 586 nce_ire_delete1, (char *)nce, nce->nce_ill); 587 NCE_REFRELE_NOTR(nce); 588 } 589 } 590 591 /* 592 * ire_walk routine used to delete every IRE that shares this nce 593 */ 594 static void 595 nce_ire_delete1(ire_t *ire, char *nce_arg) 596 { 597 nce_t *nce = (nce_t *)nce_arg; 598 599 ASSERT(ire->ire_type == IRE_CACHE); 600 601 if (ire->ire_nce == nce) { 602 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 603 ire_delete(ire); 604 } 605 } 606 607 /* 608 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 609 */ 610 boolean_t 611 ndp_restart_dad(nce_t *nce) 612 { 613 boolean_t started; 614 boolean_t dropped; 615 616 if (nce == NULL) 617 return (B_FALSE); 618 mutex_enter(&nce->nce_lock); 619 if (nce->nce_state == ND_PROBE) { 620 mutex_exit(&nce->nce_lock); 621 started = B_TRUE; 622 } else if (nce->nce_state == ND_REACHABLE) { 623 nce->nce_state = ND_PROBE; 624 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 625 mutex_exit(&nce->nce_lock); 626 dropped = nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, NULL, 627 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, NDP_PROBE); 628 if (dropped) { 629 mutex_enter(&nce->nce_lock); 630 nce->nce_pcnt++; 631 mutex_exit(&nce->nce_lock); 632 } 633 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 634 started = B_TRUE; 635 } else { 636 mutex_exit(&nce->nce_lock); 637 started = B_FALSE; 638 } 639 return (started); 640 } 641 642 /* 643 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 644 * If one is found, the refcnt on the nce will be incremented. 645 */ 646 nce_t * 647 ndp_lookup_v6(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 648 { 649 nce_t *nce; 650 ip_stack_t *ipst; 651 652 ASSERT(ill != NULL); 653 ipst = ill->ill_ipst; 654 655 ASSERT(ill != NULL && ill->ill_isv6); 656 if (!caller_holds_lock) { 657 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 658 } 659 660 /* Get head of v6 hash table */ 661 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 662 nce = nce_lookup_addr(ill, addr, nce); 663 if (nce == NULL) 664 nce = nce_lookup_mapping(ill, addr); 665 if (!caller_holds_lock) 666 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 667 return (nce); 668 } 669 /* 670 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 671 * If one is found, the refcnt on the nce will be incremented. 672 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 673 * so we skip the nce_lookup_mapping call. 674 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 675 */ 676 nce_t * 677 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 678 { 679 nce_t *nce; 680 in6_addr_t addr6; 681 ip_stack_t *ipst = ill->ill_ipst; 682 683 if (!caller_holds_lock) { 684 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 685 } 686 687 /* Get head of v4 hash table */ 688 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 689 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 690 nce = nce_lookup_addr(ill, &addr6, nce); 691 if (!caller_holds_lock) 692 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 693 return (nce); 694 } 695 696 /* 697 * Cache entry lookup. Try to find an nce matching the parameters passed. 698 * Look only for exact entries (no mappings). If an nce is found, increment 699 * the hold count on that nce. The caller passes in the start of the 700 * appropriate hash table, and must be holding the appropriate global 701 * lock (ndp_g_lock). 702 */ 703 static nce_t * 704 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr, nce_t *nce) 705 { 706 ndp_g_t *ndp; 707 ip_stack_t *ipst = ill->ill_ipst; 708 709 if (ill->ill_isv6) 710 ndp = ipst->ips_ndp6; 711 else 712 ndp = ipst->ips_ndp4; 713 714 ASSERT(ill != NULL); 715 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 716 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 717 return (NULL); 718 for (; nce != NULL; nce = nce->nce_next) { 719 if (nce->nce_ill == ill) { 720 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 721 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 722 &ipv6_all_ones)) { 723 mutex_enter(&nce->nce_lock); 724 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 725 NCE_REFHOLD_LOCKED(nce); 726 mutex_exit(&nce->nce_lock); 727 break; 728 } 729 mutex_exit(&nce->nce_lock); 730 } 731 } 732 } 733 return (nce); 734 } 735 736 /* 737 * Cache entry lookup. Try to find an nce matching the parameters passed. 738 * Look only for mappings. 739 */ 740 static nce_t * 741 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 742 { 743 nce_t *nce; 744 ip_stack_t *ipst = ill->ill_ipst; 745 746 ASSERT(ill != NULL && ill->ill_isv6); 747 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 748 if (!IN6_IS_ADDR_MULTICAST(addr)) 749 return (NULL); 750 nce = ipst->ips_ndp6->nce_mask_entries; 751 for (; nce != NULL; nce = nce->nce_next) 752 if (nce->nce_ill == ill && 753 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 754 mutex_enter(&nce->nce_lock); 755 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 756 NCE_REFHOLD_LOCKED(nce); 757 mutex_exit(&nce->nce_lock); 758 break; 759 } 760 mutex_exit(&nce->nce_lock); 761 } 762 return (nce); 763 } 764 765 /* 766 * Process passed in parameters either from an incoming packet or via 767 * user ioctl. 768 */ 769 void 770 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 771 { 772 ill_t *ill = nce->nce_ill; 773 uint32_t hw_addr_len = ill->ill_nd_lla_len; 774 mblk_t *mp; 775 boolean_t ll_updated = B_FALSE; 776 boolean_t ll_changed; 777 ip_stack_t *ipst = ill->ill_ipst; 778 779 ASSERT(nce->nce_ipversion == IPV6_VERSION); 780 /* 781 * No updates of link layer address or the neighbor state is 782 * allowed, when the cache is in NONUD state. This still 783 * allows for responding to reachability solicitation. 784 */ 785 mutex_enter(&nce->nce_lock); 786 if (nce->nce_state == ND_INCOMPLETE) { 787 if (hw_addr == NULL) { 788 mutex_exit(&nce->nce_lock); 789 return; 790 } 791 nce_set_ll(nce, hw_addr); 792 /* 793 * Update nce state and send the queued packets 794 * back to ip this time ire will be added. 795 */ 796 if (flag & ND_NA_FLAG_SOLICITED) { 797 nce_update(nce, ND_REACHABLE, NULL); 798 } else { 799 nce_update(nce, ND_STALE, NULL); 800 } 801 mutex_exit(&nce->nce_lock); 802 nce_fastpath(nce); 803 mutex_enter(&nce->nce_lock); 804 mp = nce->nce_qd_mp; 805 nce->nce_qd_mp = NULL; 806 mutex_exit(&nce->nce_lock); 807 while (mp != NULL) { 808 mblk_t *nxt_mp, *data_mp; 809 810 nxt_mp = mp->b_next; 811 mp->b_next = NULL; 812 813 if (mp->b_datap->db_type == M_CTL) 814 data_mp = mp->b_cont; 815 else 816 data_mp = mp; 817 if (data_mp->b_prev != NULL) { 818 ill_t *inbound_ill; 819 queue_t *fwdq = NULL; 820 uint_t ifindex; 821 822 ifindex = (uint_t)(uintptr_t)data_mp->b_prev; 823 inbound_ill = ill_lookup_on_ifindex(ifindex, 824 B_TRUE, NULL, NULL, NULL, NULL, ipst); 825 if (inbound_ill == NULL) { 826 data_mp->b_prev = NULL; 827 freemsg(mp); 828 return; 829 } else { 830 fwdq = inbound_ill->ill_rq; 831 } 832 data_mp->b_prev = NULL; 833 /* 834 * Send a forwarded packet back into ip_rput_v6 835 * just as in ire_send_v6(). 836 * Extract the queue from b_prev (set in 837 * ip_rput_data_v6). 838 */ 839 if (fwdq != NULL) { 840 /* 841 * Forwarded packets hop count will 842 * get decremented in ip_rput_data_v6 843 */ 844 if (data_mp != mp) 845 freeb(mp); 846 put(fwdq, data_mp); 847 } else { 848 /* 849 * Send locally originated packets back 850 * into * ip_wput_v6. 851 */ 852 put(ill->ill_wq, mp); 853 } 854 ill_refrele(inbound_ill); 855 } else { 856 put(ill->ill_wq, mp); 857 } 858 mp = nxt_mp; 859 } 860 return; 861 } 862 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 863 if (!is_adv) { 864 /* If this is a SOLICITATION request only */ 865 if (ll_changed) 866 nce_update(nce, ND_STALE, hw_addr); 867 mutex_exit(&nce->nce_lock); 868 return; 869 } 870 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 871 /* If in any other state than REACHABLE, ignore */ 872 if (nce->nce_state == ND_REACHABLE) { 873 nce_update(nce, ND_STALE, NULL); 874 } 875 mutex_exit(&nce->nce_lock); 876 return; 877 } else { 878 if (ll_changed) { 879 nce_update(nce, ND_UNCHANGED, hw_addr); 880 ll_updated = B_TRUE; 881 } 882 if (flag & ND_NA_FLAG_SOLICITED) { 883 nce_update(nce, ND_REACHABLE, NULL); 884 } else { 885 if (ll_updated) { 886 nce_update(nce, ND_STALE, NULL); 887 } 888 } 889 mutex_exit(&nce->nce_lock); 890 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 891 NCE_F_ISROUTER)) { 892 ire_t *ire; 893 894 /* 895 * Router turned to host. We need to remove the 896 * entry as well as any default route that may be 897 * using this as a next hop. This is required by 898 * section 7.2.5 of RFC 2461. 899 */ 900 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 901 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 902 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 903 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 904 MATCH_IRE_DEFAULT, ipst); 905 if (ire != NULL) { 906 ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); 907 ire_delete(ire); 908 ire_refrele(ire); 909 } 910 ndp_delete(nce); 911 } 912 } 913 } 914 915 /* 916 * Pass arg1 to the pfi supplied, along with each nce in existence. 917 * ndp_walk() places a REFHOLD on the nce and drops the lock when 918 * walking the hash list. 919 */ 920 void 921 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 922 boolean_t trace) 923 { 924 925 nce_t *nce; 926 nce_t *nce1; 927 nce_t **ncep; 928 nce_t *free_nce_list = NULL; 929 930 mutex_enter(&ndp->ndp_g_lock); 931 /* Prevent ndp_delete from unlink and free of NCE */ 932 ndp->ndp_g_walker++; 933 mutex_exit(&ndp->ndp_g_lock); 934 for (ncep = ndp->nce_hash_tbl; 935 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 936 for (nce = *ncep; nce != NULL; nce = nce1) { 937 nce1 = nce->nce_next; 938 if (ill == NULL || nce->nce_ill == ill) { 939 if (trace) { 940 NCE_REFHOLD(nce); 941 (*pfi)(nce, arg1); 942 NCE_REFRELE(nce); 943 } else { 944 NCE_REFHOLD_NOTR(nce); 945 (*pfi)(nce, arg1); 946 NCE_REFRELE_NOTR(nce); 947 } 948 } 949 } 950 } 951 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 952 nce1 = nce->nce_next; 953 if (ill == NULL || nce->nce_ill == ill) { 954 if (trace) { 955 NCE_REFHOLD(nce); 956 (*pfi)(nce, arg1); 957 NCE_REFRELE(nce); 958 } else { 959 NCE_REFHOLD_NOTR(nce); 960 (*pfi)(nce, arg1); 961 NCE_REFRELE_NOTR(nce); 962 } 963 } 964 } 965 mutex_enter(&ndp->ndp_g_lock); 966 ndp->ndp_g_walker--; 967 /* 968 * While NCE's are removed from global list they are placed 969 * in a private list, to be passed to nce_ire_delete_list(). 970 * The reason is, there may be ires pointing to this nce 971 * which needs to cleaned up. 972 */ 973 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 974 /* Time to delete condemned entries */ 975 for (ncep = ndp->nce_hash_tbl; 976 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 977 nce = *ncep; 978 if (nce != NULL) { 979 nce_remove(ndp, nce, &free_nce_list); 980 } 981 } 982 nce = ndp->nce_mask_entries; 983 if (nce != NULL) { 984 nce_remove(ndp, nce, &free_nce_list); 985 } 986 ndp->ndp_g_walker_cleanup = B_FALSE; 987 } 988 989 mutex_exit(&ndp->ndp_g_lock); 990 991 if (free_nce_list != NULL) { 992 nce_ire_delete_list(free_nce_list); 993 } 994 } 995 996 /* 997 * Walk everything. 998 * Note that ill can be NULL hence can't derive the ipst from it. 999 */ 1000 void 1001 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) 1002 { 1003 ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); 1004 ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); 1005 } 1006 1007 /* 1008 * Process resolve requests. Handles both mapped entries 1009 * as well as cases that needs to be send out on the wire. 1010 * Lookup a NCE for a given IRE. Regardless of whether one exists 1011 * or one is created, we defer making ire point to nce until the 1012 * ire is actually added at which point the nce_refcnt on the nce is 1013 * incremented. This is done primarily to have symmetry between ire_add() 1014 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1015 */ 1016 int 1017 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1018 { 1019 nce_t *nce; 1020 int err = 0; 1021 uint32_t ms; 1022 mblk_t *mp_nce = NULL; 1023 ip_stack_t *ipst = ill->ill_ipst; 1024 1025 ASSERT(ill->ill_isv6); 1026 if (IN6_IS_ADDR_MULTICAST(dst)) { 1027 err = nce_set_multicast(ill, dst); 1028 return (err); 1029 } 1030 err = ndp_lookup_then_add_v6(ill, 1031 NULL, /* No hardware address */ 1032 dst, 1033 &ipv6_all_ones, 1034 &ipv6_all_zeros, 1035 0, 1036 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1037 ND_INCOMPLETE, 1038 &nce); 1039 1040 switch (err) { 1041 case 0: 1042 /* 1043 * New cache entry was created. Make sure that the state 1044 * is not ND_INCOMPLETE. It can be in some other state 1045 * even before we send out the solicitation as we could 1046 * get un-solicited advertisements. 1047 * 1048 * If this is an XRESOLV interface, simply return 0, 1049 * since we don't want to solicit just yet. 1050 */ 1051 if (ill->ill_flags & ILLF_XRESOLV) { 1052 NCE_REFRELE(nce); 1053 return (0); 1054 } 1055 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1056 mutex_enter(&nce->nce_lock); 1057 if (nce->nce_state != ND_INCOMPLETE) { 1058 mutex_exit(&nce->nce_lock); 1059 rw_exit(&ipst->ips_ill_g_lock); 1060 NCE_REFRELE(nce); 1061 return (0); 1062 } 1063 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1064 if (mp_nce == NULL) { 1065 /* The caller will free mp */ 1066 mutex_exit(&nce->nce_lock); 1067 rw_exit(&ipst->ips_ill_g_lock); 1068 ndp_delete(nce); 1069 NCE_REFRELE(nce); 1070 return (ENOMEM); 1071 } 1072 ms = nce_solicit(nce, mp_nce); 1073 rw_exit(&ipst->ips_ill_g_lock); 1074 if (ms == 0) { 1075 /* The caller will free mp */ 1076 if (mp_nce != mp) 1077 freeb(mp_nce); 1078 mutex_exit(&nce->nce_lock); 1079 ndp_delete(nce); 1080 NCE_REFRELE(nce); 1081 return (EBUSY); 1082 } 1083 mutex_exit(&nce->nce_lock); 1084 NDP_RESTART_TIMER(nce, (clock_t)ms); 1085 NCE_REFRELE(nce); 1086 return (EINPROGRESS); 1087 case EEXIST: 1088 /* Resolution in progress just queue the packet */ 1089 mutex_enter(&nce->nce_lock); 1090 if (nce->nce_state == ND_INCOMPLETE) { 1091 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1092 if (mp_nce == NULL) { 1093 err = ENOMEM; 1094 } else { 1095 nce_queue_mp(nce, mp_nce); 1096 err = EINPROGRESS; 1097 } 1098 } else { 1099 /* 1100 * Any other state implies we have 1101 * a nce but IRE needs to be added ... 1102 * ire_add_v6() will take care of the 1103 * the case when the nce becomes CONDEMNED 1104 * before the ire is added to the table. 1105 */ 1106 err = 0; 1107 } 1108 mutex_exit(&nce->nce_lock); 1109 NCE_REFRELE(nce); 1110 break; 1111 default: 1112 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1113 break; 1114 } 1115 return (err); 1116 } 1117 1118 /* 1119 * When there is no resolver, the link layer template is passed in 1120 * the IRE. 1121 * Lookup a NCE for a given IRE. Regardless of whether one exists 1122 * or one is created, we defer making ire point to nce until the 1123 * ire is actually added at which point the nce_refcnt on the nce is 1124 * incremented. This is done primarily to have symmetry between ire_add() 1125 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1126 */ 1127 int 1128 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1129 { 1130 nce_t *nce; 1131 int err = 0; 1132 1133 ASSERT(ill != NULL); 1134 ASSERT(ill->ill_isv6); 1135 if (IN6_IS_ADDR_MULTICAST(dst)) { 1136 err = nce_set_multicast(ill, dst); 1137 return (err); 1138 } 1139 1140 err = ndp_lookup_then_add_v6(ill, 1141 NULL, /* hardware address */ 1142 dst, 1143 &ipv6_all_ones, 1144 &ipv6_all_zeros, 1145 0, 1146 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1147 ND_REACHABLE, 1148 &nce); 1149 1150 switch (err) { 1151 case 0: 1152 /* 1153 * Cache entry with a proper resolver cookie was 1154 * created. 1155 */ 1156 NCE_REFRELE(nce); 1157 break; 1158 case EEXIST: 1159 err = 0; 1160 NCE_REFRELE(nce); 1161 break; 1162 default: 1163 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1164 break; 1165 } 1166 return (err); 1167 } 1168 1169 /* 1170 * For each interface an entry is added for the unspecified multicast group. 1171 * Here that mapping is used to form the multicast cache entry for a particular 1172 * multicast destination. 1173 */ 1174 static int 1175 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1176 { 1177 nce_t *mnce; /* Multicast mapping entry */ 1178 nce_t *nce; 1179 uchar_t *hw_addr = NULL; 1180 int err = 0; 1181 ip_stack_t *ipst = ill->ill_ipst; 1182 1183 ASSERT(ill != NULL); 1184 ASSERT(ill->ill_isv6); 1185 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1186 1187 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1188 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst)); 1189 nce = nce_lookup_addr(ill, dst, nce); 1190 if (nce != NULL) { 1191 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1192 NCE_REFRELE(nce); 1193 return (0); 1194 } 1195 /* No entry, now lookup for a mapping this should never fail */ 1196 mnce = nce_lookup_mapping(ill, dst); 1197 if (mnce == NULL) { 1198 /* Something broken for the interface. */ 1199 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1200 return (ESRCH); 1201 } 1202 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1203 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1204 /* 1205 * For IRE_IF_RESOLVER a hardware mapping can be 1206 * generated, for IRE_IF_NORESOLVER, resolution cookie 1207 * in the ill is copied in ndp_add_v6(). 1208 */ 1209 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1210 if (hw_addr == NULL) { 1211 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1212 NCE_REFRELE(mnce); 1213 return (ENOMEM); 1214 } 1215 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1216 } 1217 NCE_REFRELE(mnce); 1218 /* 1219 * IRE_IF_NORESOLVER type simply copies the resolution 1220 * cookie passed in. So no hw_addr is needed. 1221 */ 1222 err = ndp_add_v6(ill, 1223 hw_addr, 1224 dst, 1225 &ipv6_all_ones, 1226 &ipv6_all_zeros, 1227 0, 1228 NCE_F_NONUD, 1229 ND_REACHABLE, 1230 &nce); 1231 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1232 if (hw_addr != NULL) 1233 kmem_free(hw_addr, ill->ill_nd_lla_len); 1234 if (err != 0) { 1235 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1236 return (err); 1237 } 1238 NCE_REFRELE(nce); 1239 return (0); 1240 } 1241 1242 /* 1243 * Return the link layer address, and any flags of a nce. 1244 */ 1245 int 1246 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1247 { 1248 nce_t *nce; 1249 in6_addr_t *addr; 1250 sin6_t *sin6; 1251 dl_unitdata_req_t *dl; 1252 1253 ASSERT(ill != NULL && ill->ill_isv6); 1254 sin6 = (sin6_t *)&lnr->lnr_addr; 1255 addr = &sin6->sin6_addr; 1256 1257 nce = ndp_lookup_v6(ill, addr, B_FALSE); 1258 if (nce == NULL) 1259 return (ESRCH); 1260 /* If in INCOMPLETE state, no link layer address is available yet */ 1261 if (nce->nce_state == ND_INCOMPLETE) 1262 goto done; 1263 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1264 if (ill->ill_flags & ILLF_XRESOLV) 1265 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1266 else 1267 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1268 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1269 sizeof (lnr->lnr_hdw_addr)); 1270 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1271 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1272 if (nce->nce_flags & NCE_F_ISROUTER) 1273 lnr->lnr_flags = NDF_ISROUTER_ON; 1274 if (nce->nce_flags & NCE_F_ANYCAST) 1275 lnr->lnr_flags |= NDF_ANYCAST_ON; 1276 done: 1277 NCE_REFRELE(nce); 1278 return (0); 1279 } 1280 1281 /* 1282 * Send Enable/Disable multicast reqs to driver. 1283 */ 1284 int 1285 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1286 uint32_t hw_addr_offset, mblk_t *mp) 1287 { 1288 nce_t *nce; 1289 uchar_t *hw_addr; 1290 ip_stack_t *ipst = ill->ill_ipst; 1291 1292 ASSERT(ill != NULL && ill->ill_isv6); 1293 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1294 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1295 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1296 freemsg(mp); 1297 return (EINVAL); 1298 } 1299 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1300 nce = nce_lookup_mapping(ill, addr); 1301 if (nce == NULL) { 1302 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1303 freemsg(mp); 1304 return (ESRCH); 1305 } 1306 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1307 /* 1308 * Update dl_addr_length and dl_addr_offset for primitives that 1309 * have physical addresses as opposed to full saps 1310 */ 1311 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1312 case DL_ENABMULTI_REQ: 1313 /* Track the state if this is the first enabmulti */ 1314 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1315 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1316 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1317 break; 1318 case DL_DISABMULTI_REQ: 1319 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1320 break; 1321 default: 1322 NCE_REFRELE(nce); 1323 ip1dbg(("ndp_mcastreq: default\n")); 1324 return (EINVAL); 1325 } 1326 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1327 NCE_REFRELE(nce); 1328 ill_dlpi_send(ill, mp); 1329 return (0); 1330 } 1331 1332 /* 1333 * Send a neighbor solicitation. 1334 * Returns number of milliseconds after which we should either rexmit or abort. 1335 * Return of zero means we should abort. 1336 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1337 * 1338 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1339 * the packet. 1340 * NOTE: This routine does not consume mp. 1341 */ 1342 uint32_t 1343 nce_solicit(nce_t *nce, mblk_t *mp) 1344 { 1345 ill_t *ill; 1346 ill_t *src_ill; 1347 ip6_t *ip6h; 1348 in6_addr_t src; 1349 in6_addr_t dst; 1350 ipif_t *ipif; 1351 ip6i_t *ip6i; 1352 boolean_t dropped = B_FALSE; 1353 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 1354 1355 ASSERT(RW_READ_HELD(&ipst->ips_ill_g_lock)); 1356 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1357 ill = nce->nce_ill; 1358 ASSERT(ill != NULL); 1359 1360 if (nce->nce_rcnt == 0) { 1361 return (0); 1362 } 1363 1364 if (mp == NULL) { 1365 ASSERT(nce->nce_qd_mp != NULL); 1366 mp = nce->nce_qd_mp; 1367 } else { 1368 nce_queue_mp(nce, mp); 1369 } 1370 1371 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1372 if (mp->b_datap->db_type == M_CTL) 1373 mp = mp->b_cont; 1374 1375 ip6h = (ip6_t *)mp->b_rptr; 1376 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1377 /* 1378 * This message should have been pulled up already in 1379 * ip_wput_v6. We can't do pullups here because the message 1380 * could be from the nce_qd_mp which could have b_next/b_prev 1381 * non-NULL. 1382 */ 1383 ip6i = (ip6i_t *)ip6h; 1384 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1385 sizeof (ip6i_t) + IPV6_HDR_LEN); 1386 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1387 } 1388 src = ip6h->ip6_src; 1389 /* 1390 * If the src of outgoing packet is one of the assigned interface 1391 * addresses use it, otherwise we will pick the source address below. 1392 */ 1393 src_ill = ill; 1394 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1395 if (ill->ill_group != NULL) 1396 src_ill = ill->ill_group->illgrp_ill; 1397 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1398 for (ipif = src_ill->ill_ipif; ipif != NULL; 1399 ipif = ipif->ipif_next) { 1400 if (IN6_ARE_ADDR_EQUAL(&src, 1401 &ipif->ipif_v6lcl_addr)) { 1402 break; 1403 } 1404 } 1405 if (ipif != NULL) 1406 break; 1407 } 1408 /* 1409 * If no relevant ipif can be found, then it's not one of our 1410 * addresses. Reset to :: and let nce_xmit. If an ipif can be 1411 * found, but it's not yet done with DAD verification, then 1412 * just postpone this transmission until later. 1413 */ 1414 if (src_ill == NULL) 1415 src = ipv6_all_zeros; 1416 else if (!ipif->ipif_addr_ready) 1417 return (ill->ill_reachable_retrans_time); 1418 } 1419 dst = nce->nce_addr; 1420 /* 1421 * If source address is unspecified, nce_xmit will choose 1422 * one for us and initialize the hardware address also 1423 * appropriately. 1424 */ 1425 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1426 src_ill = NULL; 1427 nce->nce_rcnt--; 1428 mutex_exit(&nce->nce_lock); 1429 rw_exit(&ipst->ips_ill_g_lock); 1430 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1431 &dst, 0); 1432 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1433 mutex_enter(&nce->nce_lock); 1434 if (dropped) 1435 nce->nce_rcnt++; 1436 return (ill->ill_reachable_retrans_time); 1437 } 1438 1439 /* 1440 * Attempt to recover an address on an interface that's been marked as a 1441 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1442 * no easy way to just probe the address and have the right thing happen if 1443 * it's no longer in use. Instead, we just bring it up normally and allow the 1444 * regular interface start-up logic to probe for a remaining duplicate and take 1445 * us back down if necessary. 1446 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1447 * ip_ndp_excl. 1448 */ 1449 /* ARGSUSED */ 1450 static void 1451 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1452 { 1453 ill_t *ill = rq->q_ptr; 1454 ipif_t *ipif; 1455 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1456 1457 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1458 /* 1459 * We do not support recovery of proxy ARP'd interfaces, 1460 * because the system lacks a complete proxy ARP mechanism. 1461 */ 1462 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1463 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1464 continue; 1465 } 1466 1467 /* 1468 * If we have already recovered or if the interface is going 1469 * away, then ignore. 1470 */ 1471 mutex_enter(&ill->ill_lock); 1472 if (!(ipif->ipif_flags & IPIF_DUPLICATE) || 1473 (ipif->ipif_flags & (IPIF_MOVING | IPIF_CONDEMNED))) { 1474 mutex_exit(&ill->ill_lock); 1475 continue; 1476 } 1477 1478 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1479 ill->ill_ipif_dup_count--; 1480 mutex_exit(&ill->ill_lock); 1481 ipif->ipif_was_dup = B_TRUE; 1482 1483 if (ipif_ndp_up(ipif) != EINPROGRESS) 1484 (void) ipif_up_done_v6(ipif); 1485 } 1486 freeb(mp); 1487 } 1488 1489 /* 1490 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1491 * As long as someone else holds the address, the interface will stay down. 1492 * When that conflict goes away, the interface is brought back up. This is 1493 * done so that accidental shutdowns of addresses aren't made permanent. Your 1494 * server will recover from a failure. 1495 * 1496 * For DHCP and temporary addresses, recovery is not done in the kernel. 1497 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1498 * 1499 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1500 */ 1501 static void 1502 ipif6_dup_recovery(void *arg) 1503 { 1504 ipif_t *ipif = arg; 1505 1506 ipif->ipif_recovery_id = 0; 1507 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1508 return; 1509 1510 /* 1511 * No lock, because this is just an optimization. 1512 */ 1513 if (ipif->ipif_state_flags & (IPIF_MOVING | IPIF_CONDEMNED)) 1514 return; 1515 1516 /* If the link is down, we'll retry this later */ 1517 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1518 return; 1519 1520 ndp_do_recovery(ipif); 1521 } 1522 1523 /* 1524 * Perform interface recovery by forcing the duplicate interfaces up and 1525 * allowing the system to determine which ones should stay up. 1526 * 1527 * Called both by recovery timer expiry and link-up notification. 1528 */ 1529 void 1530 ndp_do_recovery(ipif_t *ipif) 1531 { 1532 ill_t *ill = ipif->ipif_ill; 1533 mblk_t *mp; 1534 ip_stack_t *ipst = ill->ill_ipst; 1535 1536 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1537 if (mp == NULL) { 1538 mutex_enter(&ill->ill_lock); 1539 if (ipif->ipif_recovery_id == 0 && 1540 !(ipif->ipif_state_flags & (IPIF_MOVING | 1541 IPIF_CONDEMNED))) { 1542 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1543 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1544 } 1545 mutex_exit(&ill->ill_lock); 1546 } else { 1547 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1548 sizeof (ipif->ipif_v6lcl_addr)); 1549 ill_refhold(ill); 1550 qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP, 1551 B_FALSE); 1552 } 1553 } 1554 1555 /* 1556 * Find the solicitation in the given message, and extract printable details 1557 * (MAC and IP addresses) from it. 1558 */ 1559 static nd_neighbor_solicit_t * 1560 ip_ndp_find_solicitation(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, char *hbuf, 1561 size_t hlen, char *sbuf, size_t slen, uchar_t **haddr) 1562 { 1563 nd_neighbor_solicit_t *ns; 1564 ip6_t *ip6h; 1565 uchar_t *addr; 1566 int alen; 1567 1568 alen = 0; 1569 ip6h = (ip6_t *)mp->b_rptr; 1570 if (dl_mp == NULL) { 1571 nd_opt_hdr_t *opt; 1572 int nslen; 1573 1574 /* 1575 * If it's from the fast-path, then it can't be a probe 1576 * message, and thus must include the source linkaddr option. 1577 * Extract that here. 1578 */ 1579 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1580 nslen = mp->b_wptr - (uchar_t *)ns; 1581 if ((nslen -= sizeof (*ns)) > 0) { 1582 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), nslen, 1583 ND_OPT_SOURCE_LINKADDR); 1584 if (opt != NULL && 1585 opt->nd_opt_len * 8 - sizeof (*opt) >= 1586 ill->ill_nd_lla_len) { 1587 addr = (uchar_t *)(opt + 1); 1588 alen = ill->ill_nd_lla_len; 1589 } 1590 } 1591 /* 1592 * We cheat a bit here for the sake of printing usable log 1593 * messages in the rare case where the reply we got was unicast 1594 * without a source linkaddr option, and the interface is in 1595 * fastpath mode. (Sigh.) 1596 */ 1597 if (alen == 0 && ill->ill_type == IFT_ETHER && 1598 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1599 struct ether_header *pether; 1600 1601 pether = (struct ether_header *)((char *)ip6h - 1602 sizeof (*pether)); 1603 addr = pether->ether_shost.ether_addr_octet; 1604 alen = ETHERADDRL; 1605 } 1606 } else { 1607 dl_unitdata_ind_t *dlu; 1608 1609 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1610 alen = dlu->dl_src_addr_length; 1611 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1612 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1613 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1614 if (ill->ill_sap_length < 0) { 1615 alen += ill->ill_sap_length; 1616 } else { 1617 addr += ill->ill_sap_length; 1618 alen -= ill->ill_sap_length; 1619 } 1620 } 1621 } 1622 if (alen > 0) { 1623 *haddr = addr; 1624 (void) mac_colon_addr(addr, alen, hbuf, hlen); 1625 } else { 1626 *haddr = NULL; 1627 (void) strcpy(hbuf, "?"); 1628 } 1629 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1630 (void) inet_ntop(AF_INET6, &ns->nd_ns_target, sbuf, slen); 1631 return (ns); 1632 } 1633 1634 /* 1635 * This is for exclusive changes due to NDP duplicate address detection 1636 * failure. 1637 */ 1638 /* ARGSUSED */ 1639 static void 1640 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1641 { 1642 ill_t *ill = rq->q_ptr; 1643 ipif_t *ipif; 1644 char ibuf[LIFNAMSIZ + 10]; /* 10 digits for logical i/f number */ 1645 char hbuf[MAC_STR_LEN]; 1646 char sbuf[INET6_ADDRSTRLEN]; 1647 nd_neighbor_solicit_t *ns; 1648 mblk_t *dl_mp = NULL; 1649 uchar_t *haddr; 1650 ip_stack_t *ipst = ill->ill_ipst; 1651 1652 if (DB_TYPE(mp) != M_DATA) { 1653 dl_mp = mp; 1654 mp = mp->b_cont; 1655 } 1656 ns = ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, sizeof (hbuf), sbuf, 1657 sizeof (sbuf), &haddr); 1658 if (haddr != NULL && 1659 bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) { 1660 /* 1661 * Ignore conflicts generated by misbehaving switches that just 1662 * reflect our own messages back to us. 1663 */ 1664 goto ignore_conflict; 1665 } 1666 1667 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1668 1669 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1670 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 1671 &ns->nd_ns_target)) { 1672 continue; 1673 } 1674 1675 /* If it's already marked, then don't do anything. */ 1676 if (ipif->ipif_flags & IPIF_DUPLICATE) 1677 continue; 1678 1679 /* 1680 * If this is a failure during duplicate recovery, then don't 1681 * complain. It may take a long time to recover. 1682 */ 1683 if (!ipif->ipif_was_dup) { 1684 ipif_get_name(ipif, ibuf, sizeof (ibuf)); 1685 cmn_err(CE_WARN, "%s has duplicate address %s (in " 1686 "use by %s); disabled", ibuf, sbuf, hbuf); 1687 } 1688 mutex_enter(&ill->ill_lock); 1689 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1690 ipif->ipif_flags |= IPIF_DUPLICATE; 1691 ill->ill_ipif_dup_count++; 1692 mutex_exit(&ill->ill_lock); 1693 (void) ipif_down(ipif, NULL, NULL); 1694 ipif_down_tail(ipif); 1695 mutex_enter(&ill->ill_lock); 1696 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1697 ill->ill_net_type == IRE_IF_RESOLVER && 1698 !(ipif->ipif_state_flags & (IPIF_MOVING | 1699 IPIF_CONDEMNED)) && 1700 ipst->ips_ip_dup_recovery > 0) { 1701 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1702 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1703 } 1704 mutex_exit(&ill->ill_lock); 1705 } 1706 ignore_conflict: 1707 if (dl_mp != NULL) 1708 freeb(dl_mp); 1709 freemsg(mp); 1710 } 1711 1712 /* 1713 * Handle failure by tearing down the ipifs with the specified address. Note 1714 * that tearing down the ipif also means deleting the nce through ipif_down, so 1715 * it's not possible to do recovery by just restarting the nce timer. Instead, 1716 * we start a timer on the ipif. 1717 */ 1718 static void 1719 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1720 { 1721 if ((mp = copymsg(mp)) != NULL) { 1722 if (dl_mp == NULL) 1723 dl_mp = mp; 1724 else if ((dl_mp = copyb(dl_mp)) != NULL) 1725 dl_mp->b_cont = mp; 1726 if (dl_mp == NULL) { 1727 freemsg(mp); 1728 } else { 1729 ill_refhold(ill); 1730 qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP, 1731 B_FALSE); 1732 } 1733 } 1734 ndp_delete(nce); 1735 } 1736 1737 /* 1738 * Handle a discovered conflict: some other system is advertising that it owns 1739 * one of our IP addresses. We need to defend ourselves, or just shut down the 1740 * interface. 1741 */ 1742 static void 1743 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1744 { 1745 ipif_t *ipif; 1746 uint32_t now; 1747 uint_t maxdefense; 1748 uint_t defs; 1749 ip_stack_t *ipst = ill->ill_ipst; 1750 1751 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1752 NULL, NULL, ipst); 1753 if (ipif == NULL) 1754 return; 1755 /* 1756 * First, figure out if this address is disposable. 1757 */ 1758 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1759 maxdefense = ipst->ips_ip_max_temp_defend; 1760 else 1761 maxdefense = ipst->ips_ip_max_defend; 1762 1763 /* 1764 * Now figure out how many times we've defended ourselves. Ignore 1765 * defenses that happened long in the past. 1766 */ 1767 now = gethrestime_sec(); 1768 mutex_enter(&nce->nce_lock); 1769 if ((defs = nce->nce_defense_count) > 0 && 1770 now - nce->nce_defense_time > ipst->ips_ip_defend_interval) { 1771 nce->nce_defense_count = defs = 0; 1772 } 1773 nce->nce_defense_count++; 1774 nce->nce_defense_time = now; 1775 mutex_exit(&nce->nce_lock); 1776 ipif_refrele(ipif); 1777 1778 /* 1779 * If we've defended ourselves too many times already, then give up and 1780 * tear down the interface(s) using this address. Otherwise, defend by 1781 * sending out an unsolicited Neighbor Advertisement. 1782 */ 1783 if (defs >= maxdefense) { 1784 ip_ndp_failure(ill, mp, dl_mp, nce); 1785 } else { 1786 char hbuf[MAC_STR_LEN]; 1787 char sbuf[INET6_ADDRSTRLEN]; 1788 uchar_t *haddr; 1789 1790 (void) ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, 1791 sizeof (hbuf), sbuf, sizeof (sbuf), &haddr); 1792 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1793 hbuf, sbuf, ill->ill_name); 1794 (void) nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, B_FALSE, 1795 &nce->nce_addr, &ipv6_all_hosts_mcast, 1796 nce_advert_flags(nce)); 1797 } 1798 } 1799 1800 static void 1801 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1802 { 1803 nd_neighbor_solicit_t *ns; 1804 uint32_t hlen = ill->ill_nd_lla_len; 1805 uchar_t *haddr = NULL; 1806 icmp6_t *icmp_nd; 1807 ip6_t *ip6h; 1808 nce_t *our_nce = NULL; 1809 in6_addr_t target; 1810 in6_addr_t src; 1811 int len; 1812 int flag = 0; 1813 nd_opt_hdr_t *opt = NULL; 1814 boolean_t bad_solicit = B_FALSE; 1815 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1816 1817 ip6h = (ip6_t *)mp->b_rptr; 1818 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1819 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1820 src = ip6h->ip6_src; 1821 ns = (nd_neighbor_solicit_t *)icmp_nd; 1822 target = ns->nd_ns_target; 1823 if (IN6_IS_ADDR_MULTICAST(&target)) { 1824 if (ip_debug > 2) { 1825 /* ip1dbg */ 1826 pr_addr_dbg("ndp_input_solicit: Target is" 1827 " multicast! %s\n", AF_INET6, &target); 1828 } 1829 bad_solicit = B_TRUE; 1830 goto done; 1831 } 1832 if (len > sizeof (nd_neighbor_solicit_t)) { 1833 /* Options present */ 1834 opt = (nd_opt_hdr_t *)&ns[1]; 1835 len -= sizeof (nd_neighbor_solicit_t); 1836 if (!ndp_verify_optlen(opt, len)) { 1837 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1838 bad_solicit = B_TRUE; 1839 goto done; 1840 } 1841 } 1842 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1843 /* Check to see if this is a valid DAD solicitation */ 1844 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1845 if (ip_debug > 2) { 1846 /* ip1dbg */ 1847 pr_addr_dbg("ndp_input_solicit: IPv6 " 1848 "Destination is not solicited node " 1849 "multicast %s\n", AF_INET6, 1850 &ip6h->ip6_dst); 1851 } 1852 bad_solicit = B_TRUE; 1853 goto done; 1854 } 1855 } 1856 1857 our_nce = ndp_lookup_v6(ill, &target, B_FALSE); 1858 /* 1859 * If this is a valid Solicitation, a permanent 1860 * entry should exist in the cache 1861 */ 1862 if (our_nce == NULL || 1863 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1864 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1865 "ifname=%s ", ill->ill_name)); 1866 if (ip_debug > 2) { 1867 /* ip1dbg */ 1868 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1869 } 1870 bad_solicit = B_TRUE; 1871 goto done; 1872 } 1873 1874 /* At this point we should have a verified NS per spec */ 1875 if (opt != NULL) { 1876 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1877 if (opt != NULL) { 1878 haddr = (uchar_t *)&opt[1]; 1879 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1880 hlen == 0) { 1881 ip1dbg(("ndp_input_advert: bad SLLA\n")); 1882 bad_solicit = B_TRUE; 1883 goto done; 1884 } 1885 } 1886 } 1887 1888 /* If sending directly to peer, set the unicast flag */ 1889 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 1890 flag |= NDP_UNICAST; 1891 1892 /* 1893 * Create/update the entry for the soliciting node. 1894 * or respond to outstanding queries, don't if 1895 * the source is unspecified address. 1896 */ 1897 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1898 int err; 1899 nce_t *nnce; 1900 1901 ASSERT(ill->ill_isv6); 1902 /* 1903 * Regular solicitations *must* include the Source Link-Layer 1904 * Address option. Ignore messages that do not. 1905 */ 1906 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1907 ip1dbg(("ndp_input_solicit: source link-layer address " 1908 "option missing with a specified source.\n")); 1909 bad_solicit = B_TRUE; 1910 goto done; 1911 } 1912 1913 /* 1914 * This is a regular solicitation. If we're still in the 1915 * process of verifying the address, then don't respond at all 1916 * and don't keep track of the sender. 1917 */ 1918 if (our_nce->nce_state == ND_PROBE) 1919 goto done; 1920 1921 /* 1922 * If the solicitation doesn't have sender hardware address 1923 * (legal for unicast solicitation), then process without 1924 * installing the return NCE. Either we already know it, or 1925 * we'll be forced to look it up when (and if) we reply to the 1926 * packet. 1927 */ 1928 if (haddr == NULL) 1929 goto no_source; 1930 1931 err = ndp_lookup_then_add_v6(ill, 1932 haddr, 1933 &src, /* Soliciting nodes address */ 1934 &ipv6_all_ones, 1935 &ipv6_all_zeros, 1936 0, 1937 0, 1938 ND_STALE, 1939 &nnce); 1940 switch (err) { 1941 case 0: 1942 /* done with this entry */ 1943 NCE_REFRELE(nnce); 1944 break; 1945 case EEXIST: 1946 /* 1947 * B_FALSE indicates this is not an 1948 * an advertisement. 1949 */ 1950 ndp_process(nnce, haddr, 0, B_FALSE); 1951 NCE_REFRELE(nnce); 1952 break; 1953 default: 1954 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1955 err)); 1956 goto done; 1957 } 1958 no_source: 1959 flag |= NDP_SOLICITED; 1960 } else { 1961 /* 1962 * No source link layer address option should be present in a 1963 * valid DAD request. 1964 */ 1965 if (haddr != NULL) { 1966 ip1dbg(("ndp_input_solicit: source link-layer address " 1967 "option present with an unspecified source.\n")); 1968 bad_solicit = B_TRUE; 1969 goto done; 1970 } 1971 if (our_nce->nce_state == ND_PROBE) { 1972 /* 1973 * Internally looped-back probes won't have DLPI 1974 * attached to them. External ones (which are sent by 1975 * multicast) always will. Just ignore our own 1976 * transmissions. 1977 */ 1978 if (dl_mp != NULL) { 1979 /* 1980 * If someone else is probing our address, then 1981 * we've crossed wires. Declare failure. 1982 */ 1983 ip_ndp_failure(ill, mp, dl_mp, our_nce); 1984 } 1985 goto done; 1986 } 1987 /* 1988 * This is a DAD probe. Multicast the advertisement to the 1989 * all-nodes address. 1990 */ 1991 src = ipv6_all_hosts_mcast; 1992 } 1993 flag |= nce_advert_flags(our_nce); 1994 /* Response to a solicitation */ 1995 (void) nce_xmit(ill, 1996 ND_NEIGHBOR_ADVERT, 1997 ill, /* ill to be used for extracting ill_nd_lla */ 1998 B_TRUE, /* use ill_nd_lla */ 1999 &target, /* Source and target of the advertisement pkt */ 2000 &src, /* IP Destination (source of original pkt) */ 2001 flag); 2002 done: 2003 if (bad_solicit) 2004 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2005 if (our_nce != NULL) 2006 NCE_REFRELE(our_nce); 2007 } 2008 2009 void 2010 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2011 { 2012 nd_neighbor_advert_t *na; 2013 uint32_t hlen = ill->ill_nd_lla_len; 2014 uchar_t *haddr = NULL; 2015 icmp6_t *icmp_nd; 2016 ip6_t *ip6h; 2017 nce_t *dst_nce = NULL; 2018 in6_addr_t target; 2019 nd_opt_hdr_t *opt = NULL; 2020 int len; 2021 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2022 ip_stack_t *ipst = ill->ill_ipst; 2023 2024 ip6h = (ip6_t *)mp->b_rptr; 2025 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2026 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2027 na = (nd_neighbor_advert_t *)icmp_nd; 2028 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2029 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2030 ip1dbg(("ndp_input_advert: Target is multicast but the " 2031 "solicited flag is not zero\n")); 2032 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2033 return; 2034 } 2035 target = na->nd_na_target; 2036 if (IN6_IS_ADDR_MULTICAST(&target)) { 2037 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2038 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2039 return; 2040 } 2041 if (len > sizeof (nd_neighbor_advert_t)) { 2042 opt = (nd_opt_hdr_t *)&na[1]; 2043 if (!ndp_verify_optlen(opt, 2044 len - sizeof (nd_neighbor_advert_t))) { 2045 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2046 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2047 return; 2048 } 2049 /* At this point we have a verified NA per spec */ 2050 len -= sizeof (nd_neighbor_advert_t); 2051 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2052 if (opt != NULL) { 2053 haddr = (uchar_t *)&opt[1]; 2054 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2055 hlen == 0) { 2056 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2057 BUMP_MIB(mib, 2058 ipv6IfIcmpInBadNeighborAdvertisements); 2059 return; 2060 } 2061 } 2062 } 2063 2064 /* 2065 * If this interface is part of the group look at all the 2066 * ills in the group. 2067 */ 2068 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2069 if (ill->ill_group != NULL) 2070 ill = ill->ill_group->illgrp_ill; 2071 2072 for (; ill != NULL; ill = ill->ill_group_next) { 2073 mutex_enter(&ill->ill_lock); 2074 if (!ILL_CAN_LOOKUP(ill)) { 2075 mutex_exit(&ill->ill_lock); 2076 continue; 2077 } 2078 ill_refhold_locked(ill); 2079 mutex_exit(&ill->ill_lock); 2080 dst_nce = ndp_lookup_v6(ill, &target, B_FALSE); 2081 /* We have to drop the lock since ndp_process calls put* */ 2082 rw_exit(&ipst->ips_ill_g_lock); 2083 if (dst_nce != NULL) { 2084 if ((dst_nce->nce_flags & NCE_F_PERMANENT) && 2085 dst_nce->nce_state == ND_PROBE) { 2086 /* 2087 * Someone else sent an advertisement for an 2088 * address that we're trying to configure. 2089 * Tear it down. Note that dl_mp might be NULL 2090 * if we're getting a unicast reply. This 2091 * isn't typically done (multicast is the norm 2092 * in response to a probe), but ip_ndp_failure 2093 * will handle the dl_mp == NULL case as well. 2094 */ 2095 ip_ndp_failure(ill, mp, dl_mp, dst_nce); 2096 } else if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2097 /* 2098 * Someone just announced one of our local 2099 * addresses. If it wasn't us, then this is a 2100 * conflict. Defend the address or shut it 2101 * down. 2102 */ 2103 if (dl_mp != NULL && 2104 (haddr == NULL || 2105 nce_cmp_ll_addr(dst_nce, haddr, 2106 ill->ill_nd_lla_len))) { 2107 ip_ndp_conflict(ill, mp, dl_mp, 2108 dst_nce); 2109 } 2110 } else { 2111 if (na->nd_na_flags_reserved & 2112 ND_NA_FLAG_ROUTER) { 2113 dst_nce->nce_flags |= NCE_F_ISROUTER; 2114 } 2115 /* B_TRUE indicates this an advertisement */ 2116 ndp_process(dst_nce, haddr, 2117 na->nd_na_flags_reserved, B_TRUE); 2118 } 2119 NCE_REFRELE(dst_nce); 2120 } 2121 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2122 ill_refrele(ill); 2123 } 2124 rw_exit(&ipst->ips_ill_g_lock); 2125 } 2126 2127 /* 2128 * Process NDP neighbor solicitation/advertisement messages. 2129 * The checksum has already checked o.k before reaching here. 2130 */ 2131 void 2132 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2133 { 2134 icmp6_t *icmp_nd; 2135 ip6_t *ip6h; 2136 int len; 2137 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2138 2139 2140 if (!pullupmsg(mp, -1)) { 2141 ip1dbg(("ndp_input: pullupmsg failed\n")); 2142 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2143 goto done; 2144 } 2145 ip6h = (ip6_t *)mp->b_rptr; 2146 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2147 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2148 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2149 goto done; 2150 } 2151 /* 2152 * NDP does not accept any extension headers between the 2153 * IP header and the ICMP header since e.g. a routing 2154 * header could be dangerous. 2155 * This assumes that any AH or ESP headers are removed 2156 * by ip prior to passing the packet to ndp_input. 2157 */ 2158 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2159 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2160 ip6h->ip6_nxt)); 2161 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2162 goto done; 2163 } 2164 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2165 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2166 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2167 if (icmp_nd->icmp6_code != 0) { 2168 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2169 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2170 goto done; 2171 } 2172 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2173 /* 2174 * Make sure packet length is large enough for either 2175 * a NS or a NA icmp packet. 2176 */ 2177 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2178 ip1dbg(("ndp_input: packet too short\n")); 2179 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2180 goto done; 2181 } 2182 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2183 ndp_input_solicit(ill, mp, dl_mp); 2184 } else { 2185 ndp_input_advert(ill, mp, dl_mp); 2186 } 2187 done: 2188 freemsg(mp); 2189 } 2190 2191 /* 2192 * nce_xmit is called to form and transmit a ND solicitation or 2193 * advertisement ICMP packet. 2194 * 2195 * If the source address is unspecified and this isn't a probe (used for 2196 * duplicate address detection), an appropriate source address and link layer 2197 * address will be chosen here. The link layer address option is included if 2198 * the source is specified (i.e., all non-probe packets), and omitted (per the 2199 * specification) otherwise. 2200 * 2201 * It returns B_FALSE only if it does a successful put() to the 2202 * corresponding ill's ill_wq otherwise returns B_TRUE. 2203 */ 2204 static boolean_t 2205 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 2206 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 2207 int flag) 2208 { 2209 uint32_t len; 2210 icmp6_t *icmp6; 2211 mblk_t *mp; 2212 ip6_t *ip6h; 2213 nd_opt_hdr_t *opt; 2214 uint_t plen; 2215 ip6i_t *ip6i; 2216 ipif_t *src_ipif = NULL; 2217 uint8_t *hw_addr; 2218 zoneid_t zoneid = GLOBAL_ZONEID; 2219 2220 /* 2221 * If we have a unspecified source(sender) address, select a 2222 * proper source address for the solicitation here itself so 2223 * that we can initialize the h/w address correctly. This is 2224 * needed for interface groups as source address can come from 2225 * the whole group and the h/w address initialized from ill will 2226 * be wrong if the source address comes from a different ill. 2227 * 2228 * If the sender is specified then we use this address in order 2229 * to lookup the zoneid before calling ip_output_v6(). This is to 2230 * enable unicast ND_NEIGHBOR_ADVERT packets to be routed correctly 2231 * by IP (we cannot guarantee that the global zone has an interface 2232 * route to the destination). 2233 * 2234 * Note that the NA never comes here with the unspecified source 2235 * address. The following asserts that whenever the source 2236 * address is specified, the haddr also should be specified. 2237 */ 2238 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 2239 2240 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2241 ASSERT(operation != ND_NEIGHBOR_ADVERT); 2242 /* 2243 * Pick a source address for this solicitation, but 2244 * restrict the selection to addresses assigned to the 2245 * output interface (or interface group). We do this 2246 * because the destination will create a neighbor cache 2247 * entry for the source address of this packet, so the 2248 * source address had better be a valid neighbor. 2249 */ 2250 src_ipif = ipif_select_source_v6(ill, target, RESTRICT_TO_ILL, 2251 IPV6_PREFER_SRC_DEFAULT, ALL_ZONES); 2252 if (src_ipif == NULL) { 2253 char buf[INET6_ADDRSTRLEN]; 2254 2255 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2256 inet_ntop(AF_INET6, (char *)target, buf, 2257 sizeof (buf)))); 2258 return (B_TRUE); 2259 } 2260 sender = &src_ipif->ipif_v6src_addr; 2261 hwaddr_ill = src_ipif->ipif_ill; 2262 } else if (!(IN6_IS_ADDR_UNSPECIFIED(sender))) { 2263 zoneid = ipif_lookup_addr_zoneid_v6(sender, ill, ill->ill_ipst); 2264 /* 2265 * It's possible for ipif_lookup_addr_zoneid_v6() to return 2266 * ALL_ZONES if it cannot find a matching ipif for the address 2267 * we are trying to use. In this case we err on the side of 2268 * trying to send the packet by defaulting to the GLOBAL_ZONEID. 2269 */ 2270 if (zoneid == ALL_ZONES) 2271 zoneid = GLOBAL_ZONEID; 2272 } 2273 2274 /* 2275 * Always make sure that the NS/NA packets don't get load 2276 * spread. This is needed so that the probe packets sent 2277 * by the in.mpathd daemon can really go out on the desired 2278 * interface. Probe packets are made to go out on a desired 2279 * interface by including a ip6i with ATTACH_IF flag. As these 2280 * packets indirectly end up sending/receiving NS/NA packets 2281 * (neighbor doing NUD), we have to make sure that NA 2282 * also go out on the same interface. 2283 */ 2284 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7) / 8; 2285 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2286 plen * 8; 2287 mp = allocb(len, BPRI_LO); 2288 if (mp == NULL) { 2289 if (src_ipif != NULL) 2290 ipif_refrele(src_ipif); 2291 return (B_TRUE); 2292 } 2293 bzero((char *)mp->b_rptr, len); 2294 mp->b_wptr = mp->b_rptr + len; 2295 2296 ip6i = (ip6i_t *)mp->b_rptr; 2297 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2298 ip6i->ip6i_nxt = IPPROTO_RAW; 2299 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2300 if (flag & NDP_PROBE) 2301 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2302 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2303 2304 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2305 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2306 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2307 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2308 ip6h->ip6_hops = IPV6_MAX_HOPS; 2309 ip6h->ip6_dst = *target; 2310 icmp6 = (icmp6_t *)&ip6h[1]; 2311 2312 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2313 sizeof (nd_neighbor_advert_t)); 2314 2315 if (operation == ND_NEIGHBOR_SOLICIT) { 2316 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2317 2318 if (!(flag & NDP_PROBE)) 2319 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2320 ip6h->ip6_src = *sender; 2321 ns->nd_ns_target = *target; 2322 if (!(flag & NDP_UNICAST)) { 2323 /* Form multicast address of the target */ 2324 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2325 ip6h->ip6_dst.s6_addr32[3] |= 2326 ns->nd_ns_target.s6_addr32[3]; 2327 } 2328 } else { 2329 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2330 2331 ASSERT(!(flag & NDP_PROBE)); 2332 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2333 ip6h->ip6_src = *sender; 2334 na->nd_na_target = *sender; 2335 if (flag & NDP_ISROUTER) 2336 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2337 if (flag & NDP_SOLICITED) 2338 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2339 if (flag & NDP_ORIDE) 2340 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2341 } 2342 2343 hw_addr = NULL; 2344 if (!(flag & NDP_PROBE)) { 2345 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2346 hwaddr_ill->ill_phys_addr; 2347 if (hw_addr != NULL) { 2348 /* Fill in link layer address and option len */ 2349 opt->nd_opt_len = (uint8_t)plen; 2350 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2351 } 2352 } 2353 if (hw_addr == NULL) { 2354 /* If there's no link layer address option, then strip it. */ 2355 len -= plen * 8; 2356 mp->b_wptr = mp->b_rptr + len; 2357 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2358 } 2359 2360 icmp6->icmp6_type = (uint8_t)operation; 2361 icmp6->icmp6_code = 0; 2362 /* 2363 * Prepare for checksum by putting icmp length in the icmp 2364 * checksum field. The checksum is calculated in ip_wput_v6. 2365 */ 2366 icmp6->icmp6_cksum = ip6h->ip6_plen; 2367 2368 if (src_ipif != NULL) 2369 ipif_refrele(src_ipif); 2370 2371 ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT); 2372 return (B_FALSE); 2373 } 2374 2375 /* 2376 * Make a link layer address (does not include the SAP) from an nce. 2377 * To form the link layer address, use the last four bytes of ipv6 2378 * address passed in and the fixed offset stored in nce. 2379 */ 2380 static void 2381 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2382 { 2383 uchar_t *mask, *to; 2384 ill_t *ill = nce->nce_ill; 2385 int len; 2386 2387 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2388 return; 2389 ASSERT(nce->nce_res_mp != NULL); 2390 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2391 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2392 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2393 ASSERT(addr != NULL); 2394 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2395 addrpos, ill->ill_nd_lla_len); 2396 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2397 IPV6_ADDR_LEN); 2398 mask = (uchar_t *)&nce->nce_extract_mask; 2399 mask += (IPV6_ADDR_LEN - len); 2400 addr += (IPV6_ADDR_LEN - len); 2401 to = addrpos + nce->nce_ll_extract_start; 2402 while (len-- > 0) 2403 *to++ |= *mask++ & *addr++; 2404 } 2405 2406 mblk_t * 2407 nce_udreq_alloc(ill_t *ill) 2408 { 2409 mblk_t *template_mp = NULL; 2410 dl_unitdata_req_t *dlur; 2411 int sap_length; 2412 2413 ASSERT(ill->ill_isv6); 2414 2415 sap_length = ill->ill_sap_length; 2416 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2417 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2418 if (template_mp == NULL) 2419 return (NULL); 2420 2421 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2422 dlur->dl_priority.dl_min = 0; 2423 dlur->dl_priority.dl_max = 0; 2424 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2425 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2426 2427 /* Copy in the SAP value. */ 2428 NCE_LL_SAP_COPY(ill, template_mp); 2429 2430 return (template_mp); 2431 } 2432 2433 /* 2434 * NDP retransmit timer. 2435 * This timer goes off when: 2436 * a. It is time to retransmit NS for resolver. 2437 * b. It is time to send reachability probes. 2438 */ 2439 void 2440 ndp_timer(void *arg) 2441 { 2442 nce_t *nce = arg; 2443 ill_t *ill = nce->nce_ill; 2444 uint32_t ms; 2445 char addrbuf[INET6_ADDRSTRLEN]; 2446 mblk_t *mp; 2447 boolean_t dropped = B_FALSE; 2448 ip_stack_t *ipst = ill->ill_ipst; 2449 2450 /* 2451 * The timer has to be cancelled by ndp_delete before doing the final 2452 * refrele. So the NCE is guaranteed to exist when the timer runs 2453 * until it clears the timeout_id. Before clearing the timeout_id 2454 * bump up the refcnt so that we can continue to use the nce 2455 */ 2456 ASSERT(nce != NULL); 2457 2458 /* 2459 * Grab the ill_g_lock now itself to avoid lock order problems. 2460 * nce_solicit needs ill_g_lock to be able to traverse ills 2461 */ 2462 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2463 mutex_enter(&nce->nce_lock); 2464 NCE_REFHOLD_LOCKED(nce); 2465 nce->nce_timeout_id = 0; 2466 2467 /* 2468 * Check the reachability state first. 2469 */ 2470 switch (nce->nce_state) { 2471 case ND_DELAY: 2472 rw_exit(&ipst->ips_ill_g_lock); 2473 nce->nce_state = ND_PROBE; 2474 mutex_exit(&nce->nce_lock); 2475 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2476 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2477 if (ip_debug > 3) { 2478 /* ip2dbg */ 2479 pr_addr_dbg("ndp_timer: state for %s changed " 2480 "to PROBE\n", AF_INET6, &nce->nce_addr); 2481 } 2482 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2483 NCE_REFRELE(nce); 2484 return; 2485 case ND_PROBE: 2486 /* must be retransmit timer */ 2487 rw_exit(&ipst->ips_ill_g_lock); 2488 nce->nce_pcnt--; 2489 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2490 nce->nce_pcnt >= -1); 2491 if (nce->nce_pcnt > 0) { 2492 /* 2493 * As per RFC2461, the nce gets deleted after 2494 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2495 * Note that the first unicast solicitation is sent 2496 * during the DELAY state. 2497 */ 2498 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2499 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2500 addrbuf, sizeof (addrbuf)))); 2501 mutex_exit(&nce->nce_lock); 2502 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, 2503 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, 2504 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2505 NDP_UNICAST); 2506 if (dropped) { 2507 mutex_enter(&nce->nce_lock); 2508 nce->nce_pcnt++; 2509 mutex_exit(&nce->nce_lock); 2510 } 2511 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2512 } else if (nce->nce_pcnt < 0) { 2513 /* No hope, delete the nce */ 2514 nce->nce_state = ND_UNREACHABLE; 2515 mutex_exit(&nce->nce_lock); 2516 if (ip_debug > 2) { 2517 /* ip1dbg */ 2518 pr_addr_dbg("ndp_timer: Delete IRE for" 2519 " dst %s\n", AF_INET6, &nce->nce_addr); 2520 } 2521 ndp_delete(nce); 2522 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2523 /* Wait RetransTimer, before deleting the entry */ 2524 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2525 nce->nce_pcnt, inet_ntop(AF_INET6, 2526 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2527 mutex_exit(&nce->nce_lock); 2528 /* Wait one interval before killing */ 2529 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2530 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2531 ipif_t *ipif; 2532 2533 /* 2534 * We're done probing, and we can now declare this 2535 * address to be usable. Let IP know that it's ok to 2536 * use. 2537 */ 2538 nce->nce_state = ND_REACHABLE; 2539 mutex_exit(&nce->nce_lock); 2540 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, 2541 ALL_ZONES, NULL, NULL, NULL, NULL, ipst); 2542 if (ipif != NULL) { 2543 if (ipif->ipif_was_dup) { 2544 char ibuf[LIFNAMSIZ + 10]; 2545 char sbuf[INET6_ADDRSTRLEN]; 2546 2547 ipif->ipif_was_dup = B_FALSE; 2548 (void) inet_ntop(AF_INET6, 2549 &ipif->ipif_v6lcl_addr, 2550 sbuf, sizeof (sbuf)); 2551 ipif_get_name(ipif, ibuf, 2552 sizeof (ibuf)); 2553 cmn_err(CE_NOTE, "recovered address " 2554 "%s on %s", sbuf, ibuf); 2555 } 2556 if ((ipif->ipif_flags & IPIF_UP) && 2557 !ipif->ipif_addr_ready) { 2558 ip_rts_ifmsg(ipif); 2559 ip_rts_newaddrmsg(RTM_ADD, 0, ipif); 2560 sctp_update_ipif(ipif, SCTP_IPIF_UP); 2561 } 2562 ipif->ipif_addr_ready = 1; 2563 ipif_refrele(ipif); 2564 } 2565 /* Begin defending our new address */ 2566 nce->nce_unsolicit_count = 0; 2567 dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, 2568 B_FALSE, &nce->nce_addr, &ipv6_all_hosts_mcast, 2569 nce_advert_flags(nce)); 2570 if (dropped) { 2571 nce->nce_unsolicit_count = 1; 2572 NDP_RESTART_TIMER(nce, 2573 ipst->ips_ip_ndp_unsolicit_interval); 2574 } else if (ipst->ips_ip_ndp_defense_interval != 0) { 2575 NDP_RESTART_TIMER(nce, 2576 ipst->ips_ip_ndp_defense_interval); 2577 } 2578 } else { 2579 /* 2580 * This is an address we're probing to be our own, but 2581 * the ill is down. Wait until it comes back before 2582 * doing anything, but switch to reachable state so 2583 * that the restart will work. 2584 */ 2585 nce->nce_state = ND_REACHABLE; 2586 mutex_exit(&nce->nce_lock); 2587 } 2588 NCE_REFRELE(nce); 2589 return; 2590 case ND_INCOMPLETE: 2591 /* 2592 * Must be resolvers retransmit timer. 2593 */ 2594 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2595 ip6i_t *ip6i; 2596 ip6_t *ip6h; 2597 mblk_t *data_mp; 2598 2599 /* 2600 * Walk the list of packets queued, and see if there 2601 * are any multipathing probe packets. Such packets 2602 * are always queued at the head. Since this is a 2603 * retransmit timer firing, mark such packets as 2604 * delayed in ND resolution. This info will be used 2605 * in ip_wput_v6(). Multipathing probe packets will 2606 * always have an ip6i_t. Once we hit a packet without 2607 * it, we can break out of this loop. 2608 */ 2609 if (mp->b_datap->db_type == M_CTL) 2610 data_mp = mp->b_cont; 2611 else 2612 data_mp = mp; 2613 2614 ip6h = (ip6_t *)data_mp->b_rptr; 2615 if (ip6h->ip6_nxt != IPPROTO_RAW) 2616 break; 2617 2618 /* 2619 * This message should have been pulled up already in 2620 * ip_wput_v6. We can't do pullups here because the 2621 * b_next/b_prev is non-NULL. 2622 */ 2623 ip6i = (ip6i_t *)ip6h; 2624 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2625 sizeof (ip6i_t) + IPV6_HDR_LEN); 2626 2627 /* Mark this packet as delayed due to ND resolution */ 2628 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2629 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2630 } 2631 if (nce->nce_qd_mp != NULL) { 2632 ms = nce_solicit(nce, NULL); 2633 rw_exit(&ipst->ips_ill_g_lock); 2634 if (ms == 0) { 2635 if (nce->nce_state != ND_REACHABLE) { 2636 mutex_exit(&nce->nce_lock); 2637 nce_resolv_failed(nce); 2638 ndp_delete(nce); 2639 } else { 2640 mutex_exit(&nce->nce_lock); 2641 } 2642 } else { 2643 mutex_exit(&nce->nce_lock); 2644 NDP_RESTART_TIMER(nce, (clock_t)ms); 2645 } 2646 NCE_REFRELE(nce); 2647 return; 2648 } 2649 mutex_exit(&nce->nce_lock); 2650 rw_exit(&ipst->ips_ill_g_lock); 2651 NCE_REFRELE(nce); 2652 break; 2653 case ND_REACHABLE : 2654 rw_exit(&ipst->ips_ill_g_lock); 2655 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2656 nce->nce_unsolicit_count != 0) || 2657 ((nce->nce_flags & NCE_F_PERMANENT) && 2658 ipst->ips_ip_ndp_defense_interval != 0)) { 2659 if (nce->nce_unsolicit_count > 0) 2660 nce->nce_unsolicit_count--; 2661 mutex_exit(&nce->nce_lock); 2662 dropped = nce_xmit(ill, 2663 ND_NEIGHBOR_ADVERT, 2664 ill, /* ill to be used for hw addr */ 2665 B_FALSE, /* use ill_phys_addr */ 2666 &nce->nce_addr, 2667 &ipv6_all_hosts_mcast, 2668 nce_advert_flags(nce)); 2669 if (dropped) { 2670 mutex_enter(&nce->nce_lock); 2671 nce->nce_unsolicit_count++; 2672 mutex_exit(&nce->nce_lock); 2673 } 2674 if (nce->nce_unsolicit_count != 0) { 2675 NDP_RESTART_TIMER(nce, 2676 ipst->ips_ip_ndp_unsolicit_interval); 2677 } else { 2678 NDP_RESTART_TIMER(nce, 2679 ipst->ips_ip_ndp_defense_interval); 2680 } 2681 } else { 2682 mutex_exit(&nce->nce_lock); 2683 } 2684 NCE_REFRELE(nce); 2685 break; 2686 default: 2687 rw_exit(&ipst->ips_ill_g_lock); 2688 mutex_exit(&nce->nce_lock); 2689 NCE_REFRELE(nce); 2690 break; 2691 } 2692 } 2693 2694 /* 2695 * Set a link layer address from the ll_addr passed in. 2696 * Copy SAP from ill. 2697 */ 2698 static void 2699 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2700 { 2701 ill_t *ill = nce->nce_ill; 2702 uchar_t *woffset; 2703 2704 ASSERT(ll_addr != NULL); 2705 /* Always called before fast_path_probe */ 2706 ASSERT(nce->nce_fp_mp == NULL); 2707 if (ill->ill_sap_length != 0) { 2708 /* 2709 * Copy the SAP type specified in the 2710 * request into the xmit template. 2711 */ 2712 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2713 } 2714 if (ill->ill_phys_addr_length > 0) { 2715 /* 2716 * The bcopy() below used to be called for the physical address 2717 * length rather than the link layer address length. For 2718 * ethernet and many other media, the phys_addr and lla are 2719 * identical. 2720 * However, with xresolv interfaces being introduced, the 2721 * phys_addr and lla are no longer the same, and the physical 2722 * address may not have any useful meaning, so we use the lla 2723 * for IPv6 address resolution and destination addressing. 2724 * 2725 * For PPP or other interfaces with a zero length 2726 * physical address, don't do anything here. 2727 * The bcopy() with a zero phys_addr length was previously 2728 * a no-op for interfaces with a zero-length physical address. 2729 * Using the lla for them would change the way they operate. 2730 * Doing nothing in such cases preserves expected behavior. 2731 */ 2732 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2733 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2734 } 2735 } 2736 2737 static boolean_t 2738 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2739 { 2740 ill_t *ill = nce->nce_ill; 2741 uchar_t *ll_offset; 2742 2743 ASSERT(nce->nce_res_mp != NULL); 2744 if (ll_addr == NULL) 2745 return (B_FALSE); 2746 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2747 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2748 return (B_TRUE); 2749 return (B_FALSE); 2750 } 2751 2752 /* 2753 * Updates the link layer address or the reachability state of 2754 * a cache entry. Reset probe counter if needed. 2755 */ 2756 static void 2757 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2758 { 2759 ill_t *ill = nce->nce_ill; 2760 boolean_t need_stop_timer = B_FALSE; 2761 boolean_t need_fastpath_update = B_FALSE; 2762 2763 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2764 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2765 /* 2766 * If this interface does not do NUD, there is no point 2767 * in allowing an update to the cache entry. Although 2768 * we will respond to NS. 2769 * The only time we accept an update for a resolver when 2770 * NUD is turned off is when it has just been created. 2771 * Non-Resolvers will always be created as REACHABLE. 2772 */ 2773 if (new_state != ND_UNCHANGED) { 2774 if ((nce->nce_flags & NCE_F_NONUD) && 2775 (nce->nce_state != ND_INCOMPLETE)) 2776 return; 2777 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2778 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2779 need_stop_timer = B_TRUE; 2780 if (new_state == ND_REACHABLE) 2781 nce->nce_last = TICK_TO_MSEC(lbolt64); 2782 else { 2783 /* We force NUD in this case */ 2784 nce->nce_last = 0; 2785 } 2786 nce->nce_state = new_state; 2787 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2788 } 2789 /* 2790 * In case of fast path we need to free the the fastpath 2791 * M_DATA and do another probe. Otherwise we can just 2792 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2793 * whatever packets that happens to be transmitting at the time. 2794 */ 2795 if (new_ll_addr != NULL) { 2796 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2797 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2798 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2799 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2800 if (nce->nce_fp_mp != NULL) { 2801 freemsg(nce->nce_fp_mp); 2802 nce->nce_fp_mp = NULL; 2803 } 2804 need_fastpath_update = B_TRUE; 2805 } 2806 mutex_exit(&nce->nce_lock); 2807 if (need_stop_timer) { 2808 (void) untimeout(nce->nce_timeout_id); 2809 nce->nce_timeout_id = 0; 2810 } 2811 if (need_fastpath_update) 2812 nce_fastpath(nce); 2813 mutex_enter(&nce->nce_lock); 2814 } 2815 2816 void 2817 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2818 { 2819 uint_t count = 0; 2820 mblk_t **mpp; 2821 2822 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2823 2824 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2825 mpp = &(*mpp)->b_next) { 2826 if (++count > 2827 nce->nce_ill->ill_max_buf) { 2828 mblk_t *tmp = nce->nce_qd_mp->b_next; 2829 2830 nce->nce_qd_mp->b_next = NULL; 2831 nce->nce_qd_mp->b_prev = NULL; 2832 freemsg(nce->nce_qd_mp); 2833 nce->nce_qd_mp = tmp; 2834 } 2835 } 2836 /* put this on the list */ 2837 if (head_insert) { 2838 mp->b_next = nce->nce_qd_mp; 2839 nce->nce_qd_mp = mp; 2840 } else { 2841 *mpp = mp; 2842 } 2843 } 2844 2845 static void 2846 nce_queue_mp(nce_t *nce, mblk_t *mp) 2847 { 2848 boolean_t head_insert = B_FALSE; 2849 ip6_t *ip6h; 2850 ip6i_t *ip6i; 2851 mblk_t *data_mp; 2852 2853 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2854 2855 if (mp->b_datap->db_type == M_CTL) 2856 data_mp = mp->b_cont; 2857 else 2858 data_mp = mp; 2859 ip6h = (ip6_t *)data_mp->b_rptr; 2860 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2861 /* 2862 * This message should have been pulled up already in 2863 * ip_wput_v6. We can't do pullups here because the message 2864 * could be from the nce_qd_mp which could have b_next/b_prev 2865 * non-NULL. 2866 */ 2867 ip6i = (ip6i_t *)ip6h; 2868 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2869 sizeof (ip6i_t) + IPV6_HDR_LEN); 2870 /* 2871 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 2872 * This has 2 aspects mentioned below. 2873 * 1. Perform head insertion in the nce_qd_mp for these packets. 2874 * This ensures that next retransmit of ND solicitation 2875 * will use the interface specified by the probe packet, 2876 * for both NS and NA. This corresponds to the src address 2877 * in the IPv6 packet. If we insert at tail, we will be 2878 * depending on the packet at the head for successful 2879 * ND resolution. This is not reliable, because the interface 2880 * on which the NA arrives could be different from the interface 2881 * on which the NS was sent, and if the receiving interface is 2882 * failed, it will appear that the sending interface is also 2883 * failed, causing in.mpathd to misdiagnose this as link 2884 * failure. 2885 * 2. Drop the original packet, if the ND resolution did not 2886 * succeed in the first attempt. However we will create the 2887 * nce and the ire, as soon as the ND resolution succeeds. 2888 * We don't gain anything by queueing multiple probe packets 2889 * and sending them back-to-back once resolution succeeds. 2890 * It is sufficient to send just 1 packet after ND resolution 2891 * succeeds. Since mpathd is sending down probe packets at a 2892 * constant rate, we don't need to send the queued packet. We 2893 * need to queue it only for NDP resolution. The benefit of 2894 * dropping the probe packets that were delayed in ND 2895 * resolution, is that in.mpathd will not see inflated 2896 * RTT. If the ND resolution does not succeed within 2897 * in.mpathd's failure detection time, mpathd may detect 2898 * a failure, and it does not matter whether the packet 2899 * was queued or dropped. 2900 */ 2901 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2902 head_insert = B_TRUE; 2903 } 2904 2905 nce_queue_mp_common(nce, mp, head_insert); 2906 } 2907 2908 /* 2909 * Called when address resolution failed due to a timeout. 2910 * Send an ICMP unreachable in response to all queued packets. 2911 */ 2912 void 2913 nce_resolv_failed(nce_t *nce) 2914 { 2915 mblk_t *mp, *nxt_mp, *first_mp; 2916 char buf[INET6_ADDRSTRLEN]; 2917 ip6_t *ip6h; 2918 zoneid_t zoneid = GLOBAL_ZONEID; 2919 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 2920 2921 ip1dbg(("nce_resolv_failed: dst %s\n", 2922 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 2923 mutex_enter(&nce->nce_lock); 2924 mp = nce->nce_qd_mp; 2925 nce->nce_qd_mp = NULL; 2926 mutex_exit(&nce->nce_lock); 2927 while (mp != NULL) { 2928 nxt_mp = mp->b_next; 2929 mp->b_next = NULL; 2930 mp->b_prev = NULL; 2931 2932 first_mp = mp; 2933 if (mp->b_datap->db_type == M_CTL) { 2934 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 2935 ASSERT(io->ipsec_out_type == IPSEC_OUT); 2936 zoneid = io->ipsec_out_zoneid; 2937 ASSERT(zoneid != ALL_ZONES); 2938 mp = mp->b_cont; 2939 } 2940 2941 ip6h = (ip6_t *)mp->b_rptr; 2942 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2943 ip6i_t *ip6i; 2944 /* 2945 * This message should have been pulled up already 2946 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 2947 * the header is pulled up. 2948 */ 2949 ip6i = (ip6i_t *)ip6h; 2950 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 2951 sizeof (ip6i_t) + IPV6_HDR_LEN); 2952 mp->b_rptr += sizeof (ip6i_t); 2953 } 2954 /* 2955 * Ignore failure since icmp_unreachable_v6 will silently 2956 * drop packets with an unspecified source address. 2957 */ 2958 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst); 2959 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 2960 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst); 2961 mp = nxt_mp; 2962 } 2963 } 2964 2965 /* 2966 * Called by SIOCSNDP* ioctl to add/change an nce entry 2967 * and the corresponding attributes. 2968 * Disallow states other than ND_REACHABLE or ND_STALE. 2969 */ 2970 int 2971 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 2972 { 2973 sin6_t *sin6; 2974 in6_addr_t *addr; 2975 nce_t *nce; 2976 int err; 2977 uint16_t new_flags = 0; 2978 uint16_t old_flags = 0; 2979 int inflags = lnr->lnr_flags; 2980 ip_stack_t *ipst = ill->ill_ipst; 2981 2982 ASSERT(ill->ill_isv6); 2983 if ((lnr->lnr_state_create != ND_REACHABLE) && 2984 (lnr->lnr_state_create != ND_STALE)) 2985 return (EINVAL); 2986 2987 sin6 = (sin6_t *)&lnr->lnr_addr; 2988 addr = &sin6->sin6_addr; 2989 2990 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 2991 /* We know it can not be mapping so just look in the hash table */ 2992 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 2993 nce = nce_lookup_addr(ill, addr, nce); 2994 if (nce != NULL) 2995 new_flags = nce->nce_flags; 2996 2997 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 2998 case NDF_ISROUTER_ON: 2999 new_flags |= NCE_F_ISROUTER; 3000 break; 3001 case NDF_ISROUTER_OFF: 3002 new_flags &= ~NCE_F_ISROUTER; 3003 break; 3004 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3005 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3006 if (nce != NULL) 3007 NCE_REFRELE(nce); 3008 return (EINVAL); 3009 } 3010 3011 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3012 case NDF_ANYCAST_ON: 3013 new_flags |= NCE_F_ANYCAST; 3014 break; 3015 case NDF_ANYCAST_OFF: 3016 new_flags &= ~NCE_F_ANYCAST; 3017 break; 3018 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3019 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3020 if (nce != NULL) 3021 NCE_REFRELE(nce); 3022 return (EINVAL); 3023 } 3024 3025 if (nce == NULL) { 3026 err = ndp_add_v6(ill, 3027 (uchar_t *)lnr->lnr_hdw_addr, 3028 addr, 3029 &ipv6_all_ones, 3030 &ipv6_all_zeros, 3031 0, 3032 new_flags, 3033 lnr->lnr_state_create, 3034 &nce); 3035 if (err != 0) { 3036 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3037 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3038 return (err); 3039 } 3040 } 3041 old_flags = nce->nce_flags; 3042 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3043 /* 3044 * Router turned to host, delete all ires. 3045 * XXX Just delete the entry, but we need to add too. 3046 */ 3047 nce->nce_flags &= ~NCE_F_ISROUTER; 3048 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3049 ndp_delete(nce); 3050 NCE_REFRELE(nce); 3051 return (0); 3052 } 3053 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3054 3055 mutex_enter(&nce->nce_lock); 3056 nce->nce_flags = new_flags; 3057 mutex_exit(&nce->nce_lock); 3058 /* 3059 * Note that we ignore the state at this point, which 3060 * should be either STALE or REACHABLE. Instead we let 3061 * the link layer address passed in to determine the state 3062 * much like incoming packets. 3063 */ 3064 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3065 NCE_REFRELE(nce); 3066 return (0); 3067 } 3068 3069 /* 3070 * If the device driver supports it, we make nce_fp_mp to have 3071 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3072 * The caller ensures there is hold on nce for this function. 3073 * Note that since ill_fastpath_probe() copies the mblk there is 3074 * no need for the hold beyond this function. 3075 */ 3076 void 3077 nce_fastpath(nce_t *nce) 3078 { 3079 ill_t *ill = nce->nce_ill; 3080 int res; 3081 3082 ASSERT(ill != NULL); 3083 ASSERT(nce->nce_state != ND_INITIAL && nce->nce_state != ND_INCOMPLETE); 3084 3085 if (nce->nce_fp_mp != NULL) { 3086 /* Already contains fastpath info */ 3087 return; 3088 } 3089 if (nce->nce_res_mp != NULL) { 3090 nce_fastpath_list_add(nce); 3091 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3092 /* 3093 * EAGAIN is an indication of a transient error 3094 * i.e. allocation failure etc. leave the nce in the list it 3095 * will be updated when another probe happens for another ire 3096 * if not it will be taken out of the list when the ire is 3097 * deleted. 3098 */ 3099 3100 if (res != 0 && res != EAGAIN) 3101 nce_fastpath_list_delete(nce); 3102 } 3103 } 3104 3105 /* 3106 * Drain the list of nce's waiting for fastpath response. 3107 */ 3108 void 3109 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3110 void *arg) 3111 { 3112 3113 nce_t *next_nce; 3114 nce_t *current_nce; 3115 nce_t *first_nce; 3116 nce_t *prev_nce = NULL; 3117 3118 mutex_enter(&ill->ill_lock); 3119 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3120 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3121 next_nce = current_nce->nce_fastpath; 3122 /* 3123 * Take it off the list if we're flushing, or if the callback 3124 * routine tells us to do so. Otherwise, leave the nce in the 3125 * fastpath list to handle any pending response from the lower 3126 * layer. We can't drain the list when the callback routine 3127 * comparison failed, because the response is asynchronous in 3128 * nature, and may not arrive in the same order as the list 3129 * insertion. 3130 */ 3131 if (func == NULL || func(current_nce, arg)) { 3132 current_nce->nce_fastpath = NULL; 3133 if (current_nce == first_nce) 3134 ill->ill_fastpath_list = first_nce = next_nce; 3135 else 3136 prev_nce->nce_fastpath = next_nce; 3137 } else { 3138 /* previous element that is still in the list */ 3139 prev_nce = current_nce; 3140 } 3141 current_nce = next_nce; 3142 } 3143 mutex_exit(&ill->ill_lock); 3144 } 3145 3146 /* 3147 * Add nce to the nce fastpath list. 3148 */ 3149 void 3150 nce_fastpath_list_add(nce_t *nce) 3151 { 3152 ill_t *ill; 3153 3154 ill = nce->nce_ill; 3155 3156 mutex_enter(&ill->ill_lock); 3157 mutex_enter(&nce->nce_lock); 3158 3159 /* 3160 * if nce has not been deleted and 3161 * is not already in the list add it. 3162 */ 3163 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3164 (nce->nce_fastpath == NULL)) { 3165 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3166 ill->ill_fastpath_list = nce; 3167 } 3168 3169 mutex_exit(&nce->nce_lock); 3170 mutex_exit(&ill->ill_lock); 3171 } 3172 3173 /* 3174 * remove nce from the nce fastpath list. 3175 */ 3176 void 3177 nce_fastpath_list_delete(nce_t *nce) 3178 { 3179 nce_t *nce_ptr; 3180 3181 ill_t *ill; 3182 3183 ill = nce->nce_ill; 3184 ASSERT(ill != NULL); 3185 3186 mutex_enter(&ill->ill_lock); 3187 if (nce->nce_fastpath == NULL) 3188 goto done; 3189 3190 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3191 3192 if (ill->ill_fastpath_list == nce) { 3193 ill->ill_fastpath_list = nce->nce_fastpath; 3194 } else { 3195 nce_ptr = ill->ill_fastpath_list; 3196 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3197 if (nce_ptr->nce_fastpath == nce) { 3198 nce_ptr->nce_fastpath = nce->nce_fastpath; 3199 break; 3200 } 3201 nce_ptr = nce_ptr->nce_fastpath; 3202 } 3203 } 3204 3205 nce->nce_fastpath = NULL; 3206 done: 3207 mutex_exit(&ill->ill_lock); 3208 } 3209 3210 /* 3211 * Update all NCE's that are not in fastpath mode and 3212 * have an nce_fp_mp that matches mp. mp->b_cont contains 3213 * the fastpath header. 3214 * 3215 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3216 */ 3217 boolean_t 3218 ndp_fastpath_update(nce_t *nce, void *arg) 3219 { 3220 mblk_t *mp, *fp_mp; 3221 uchar_t *mp_rptr, *ud_mp_rptr; 3222 mblk_t *ud_mp = nce->nce_res_mp; 3223 ptrdiff_t cmplen; 3224 3225 if (nce->nce_flags & NCE_F_MAPPING) 3226 return (B_TRUE); 3227 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3228 return (B_TRUE); 3229 3230 ip2dbg(("ndp_fastpath_update: trying\n")); 3231 mp = (mblk_t *)arg; 3232 mp_rptr = mp->b_rptr; 3233 cmplen = mp->b_wptr - mp_rptr; 3234 ASSERT(cmplen >= 0); 3235 ud_mp_rptr = ud_mp->b_rptr; 3236 /* 3237 * The nce is locked here to prevent any other threads 3238 * from accessing and changing nce_res_mp when the IPv6 address 3239 * becomes resolved to an lla while we're in the middle 3240 * of looking at and comparing the hardware address (lla). 3241 * It is also locked to prevent multiple threads in nce_fastpath_update 3242 * from examining nce_res_mp atthe same time. 3243 */ 3244 mutex_enter(&nce->nce_lock); 3245 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3246 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3247 mutex_exit(&nce->nce_lock); 3248 /* 3249 * Don't take the ire off the fastpath list yet, 3250 * since the response may come later. 3251 */ 3252 return (B_FALSE); 3253 } 3254 /* Matched - install mp as the fastpath mp */ 3255 ip1dbg(("ndp_fastpath_update: match\n")); 3256 fp_mp = dupb(mp->b_cont); 3257 if (fp_mp != NULL) { 3258 nce->nce_fp_mp = fp_mp; 3259 } 3260 mutex_exit(&nce->nce_lock); 3261 return (B_TRUE); 3262 } 3263 3264 /* 3265 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3266 * driver. Note that it assumes IP is exclusive... 3267 */ 3268 /* ARGSUSED */ 3269 void 3270 ndp_fastpath_flush(nce_t *nce, char *arg) 3271 { 3272 if (nce->nce_flags & NCE_F_MAPPING) 3273 return; 3274 /* No fastpath info? */ 3275 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3276 return; 3277 3278 if (nce->nce_ipversion == IPV4_VERSION && 3279 nce->nce_flags & NCE_F_BCAST) { 3280 /* 3281 * IPv4 BROADCAST entries: 3282 * We can't delete the nce since it is difficult to 3283 * recreate these without going through the 3284 * ipif down/up dance. 3285 * 3286 * All access to nce->nce_fp_mp in the case of these 3287 * is protected by nce_lock. 3288 */ 3289 mutex_enter(&nce->nce_lock); 3290 if (nce->nce_fp_mp != NULL) { 3291 freeb(nce->nce_fp_mp); 3292 nce->nce_fp_mp = NULL; 3293 mutex_exit(&nce->nce_lock); 3294 nce_fastpath(nce); 3295 } else { 3296 mutex_exit(&nce->nce_lock); 3297 } 3298 } else { 3299 /* Just delete the NCE... */ 3300 ndp_delete(nce); 3301 } 3302 } 3303 3304 /* 3305 * Return a pointer to a given option in the packet. 3306 * Assumes that option part of the packet have already been validated. 3307 */ 3308 nd_opt_hdr_t * 3309 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3310 { 3311 while (optlen > 0) { 3312 if (opt->nd_opt_type == opt_type) 3313 return (opt); 3314 optlen -= 8 * opt->nd_opt_len; 3315 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3316 } 3317 return (NULL); 3318 } 3319 3320 /* 3321 * Verify all option lengths present are > 0, also check to see 3322 * if the option lengths and packet length are consistent. 3323 */ 3324 boolean_t 3325 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3326 { 3327 ASSERT(opt != NULL); 3328 while (optlen > 0) { 3329 if (opt->nd_opt_len == 0) 3330 return (B_FALSE); 3331 optlen -= 8 * opt->nd_opt_len; 3332 if (optlen < 0) 3333 return (B_FALSE); 3334 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3335 } 3336 return (B_TRUE); 3337 } 3338 3339 /* 3340 * ndp_walk function. 3341 * Free a fraction of the NCE cache entries. 3342 * A fraction of zero means to not free any in that category. 3343 */ 3344 void 3345 ndp_cache_reclaim(nce_t *nce, char *arg) 3346 { 3347 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3348 uint_t rand; 3349 3350 if (nce->nce_flags & NCE_F_PERMANENT) 3351 return; 3352 3353 rand = (uint_t)lbolt + 3354 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3355 if (ncr->ncr_host != 0 && 3356 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3357 ndp_delete(nce); 3358 return; 3359 } 3360 } 3361 3362 /* 3363 * ndp_walk function. 3364 * Count the number of NCEs that can be deleted. 3365 * These would be hosts but not routers. 3366 */ 3367 void 3368 ndp_cache_count(nce_t *nce, char *arg) 3369 { 3370 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3371 3372 if (nce->nce_flags & NCE_F_PERMANENT) 3373 return; 3374 3375 ncc->ncc_total++; 3376 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3377 ncc->ncc_host++; 3378 } 3379 3380 #ifdef DEBUG 3381 void 3382 nce_trace_ref(nce_t *nce) 3383 { 3384 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3385 3386 if (nce->nce_trace_disable) 3387 return; 3388 3389 if (!th_trace_ref(nce, nce->nce_ill->ill_ipst)) { 3390 nce->nce_trace_disable = B_TRUE; 3391 nce_trace_cleanup(nce); 3392 } 3393 } 3394 3395 void 3396 nce_untrace_ref(nce_t *nce) 3397 { 3398 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3399 3400 if (!nce->nce_trace_disable) 3401 th_trace_unref(nce); 3402 } 3403 3404 static void 3405 nce_trace_cleanup(const nce_t *nce) 3406 { 3407 th_trace_cleanup(nce, nce->nce_trace_disable); 3408 } 3409 #endif 3410 3411 /* 3412 * Called when address resolution fails due to a timeout. 3413 * Send an ICMP unreachable in response to all queued packets. 3414 */ 3415 void 3416 arp_resolv_failed(nce_t *nce) 3417 { 3418 mblk_t *mp, *nxt_mp, *first_mp; 3419 char buf[INET6_ADDRSTRLEN]; 3420 zoneid_t zoneid = GLOBAL_ZONEID; 3421 struct in_addr ipv4addr; 3422 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3423 3424 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3425 ip3dbg(("arp_resolv_failed: dst %s\n", 3426 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3427 mutex_enter(&nce->nce_lock); 3428 mp = nce->nce_qd_mp; 3429 nce->nce_qd_mp = NULL; 3430 mutex_exit(&nce->nce_lock); 3431 3432 while (mp != NULL) { 3433 nxt_mp = mp->b_next; 3434 mp->b_next = NULL; 3435 mp->b_prev = NULL; 3436 3437 first_mp = mp; 3438 /* 3439 * Send icmp unreachable messages 3440 * to the hosts. 3441 */ 3442 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst); 3443 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3444 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3445 ICMP_HOST_UNREACHABLE, zoneid, ipst); 3446 mp = nxt_mp; 3447 } 3448 } 3449 3450 int 3451 ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3452 nce_t **newnce, nce_t *src_nce) 3453 { 3454 int err; 3455 nce_t *nce; 3456 in6_addr_t addr6; 3457 ip_stack_t *ipst = ill->ill_ipst; 3458 3459 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3460 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3461 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3462 nce = nce_lookup_addr(ill, &addr6, nce); 3463 if (nce == NULL) { 3464 err = ndp_add_v4(ill, addr, flags, newnce, src_nce); 3465 } else { 3466 *newnce = nce; 3467 err = EEXIST; 3468 } 3469 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3470 return (err); 3471 } 3472 3473 /* 3474 * NDP Cache Entry creation routine for IPv4. 3475 * Mapped entries are handled in arp. 3476 * This routine must always be called with ndp4->ndp_g_lock held. 3477 * Prior to return, nce_refcnt is incremented. 3478 */ 3479 static int 3480 ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3481 nce_t **newnce, nce_t *src_nce) 3482 { 3483 static nce_t nce_nil; 3484 nce_t *nce; 3485 mblk_t *mp; 3486 mblk_t *template = NULL; 3487 nce_t **ncep; 3488 ip_stack_t *ipst = ill->ill_ipst; 3489 uint16_t state = ND_INITIAL; 3490 int err; 3491 3492 ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock)); 3493 ASSERT(!ill->ill_isv6); 3494 ASSERT((flags & NCE_F_MAPPING) == 0); 3495 3496 if (ill->ill_resolver_mp == NULL) 3497 return (EINVAL); 3498 /* 3499 * Allocate the mblk to hold the nce. 3500 */ 3501 mp = allocb(sizeof (nce_t), BPRI_MED); 3502 if (mp == NULL) 3503 return (ENOMEM); 3504 3505 nce = (nce_t *)mp->b_rptr; 3506 mp->b_wptr = (uchar_t *)&nce[1]; 3507 *nce = nce_nil; 3508 nce->nce_ill = ill; 3509 nce->nce_ipversion = IPV4_VERSION; 3510 nce->nce_flags = flags; 3511 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3512 nce->nce_rcnt = ill->ill_xmit_count; 3513 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3514 nce->nce_mask = ipv6_all_ones; 3515 nce->nce_extract_mask = ipv6_all_zeros; 3516 nce->nce_ll_extract_start = 0; 3517 nce->nce_qd_mp = NULL; 3518 nce->nce_mp = mp; 3519 /* This one is for nce getting created */ 3520 nce->nce_refcnt = 1; 3521 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3522 ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3523 3524 nce->nce_trace_disable = B_FALSE; 3525 3526 if (src_nce != NULL) { 3527 /* 3528 * src_nce has been provided by the caller. The only 3529 * caller who provides a non-null, non-broadcast 3530 * src_nce is from ip_newroute() which must pass in 3531 * a ND_REACHABLE src_nce (this condition is verified 3532 * via an ASSERT for the save_ire->ire_nce in ip_newroute()) 3533 */ 3534 mutex_enter(&src_nce->nce_lock); 3535 state = src_nce->nce_state; 3536 if ((src_nce->nce_flags & NCE_F_CONDEMNED) || 3537 (ipst->ips_ndp4->ndp_g_hw_change > 0)) { 3538 /* 3539 * src_nce has been deleted, or 3540 * ip_arp_news is in the middle of 3541 * flushing entries in the the nce. 3542 * Fail the add, since we don't know 3543 * if it is safe to copy the contents of 3544 * src_nce 3545 */ 3546 DTRACE_PROBE2(nce__bad__src__nce, 3547 nce_t *, src_nce, ill_t *, ill); 3548 mutex_exit(&src_nce->nce_lock); 3549 err = EINVAL; 3550 goto err_ret; 3551 } 3552 template = copyb(src_nce->nce_res_mp); 3553 mutex_exit(&src_nce->nce_lock); 3554 if (template == NULL) { 3555 err = ENOMEM; 3556 goto err_ret; 3557 } 3558 } else if (flags & NCE_F_BCAST) { 3559 /* 3560 * broadcast nce. 3561 */ 3562 template = copyb(ill->ill_bcast_mp); 3563 if (template == NULL) { 3564 err = ENOMEM; 3565 goto err_ret; 3566 } 3567 state = ND_REACHABLE; 3568 } else if (ill->ill_net_type == IRE_IF_NORESOLVER) { 3569 /* 3570 * NORESOLVER entries are always created in the REACHABLE 3571 * state. We create a nce_res_mp with the IP nexthop address 3572 * in the destination address in the DLPI hdr if the 3573 * physical length is exactly 4 bytes. 3574 * 3575 * XXX not clear which drivers set ill_phys_addr_length to 3576 * IP_ADDR_LEN. 3577 */ 3578 if (ill->ill_phys_addr_length == IP_ADDR_LEN) { 3579 template = ill_dlur_gen((uchar_t *)addr, 3580 ill->ill_phys_addr_length, 3581 ill->ill_sap, ill->ill_sap_length); 3582 } else { 3583 template = copyb(ill->ill_resolver_mp); 3584 } 3585 if (template == NULL) { 3586 err = ENOMEM; 3587 goto err_ret; 3588 } 3589 state = ND_REACHABLE; 3590 } 3591 nce->nce_fp_mp = NULL; 3592 nce->nce_res_mp = template; 3593 nce->nce_state = state; 3594 if (state == ND_REACHABLE) { 3595 nce->nce_last = TICK_TO_MSEC(lbolt64); 3596 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3597 } else { 3598 nce->nce_last = 0; 3599 if (state == ND_INITIAL) 3600 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3601 } 3602 3603 ASSERT((nce->nce_res_mp == NULL && nce->nce_state == ND_INITIAL) || 3604 (nce->nce_res_mp != NULL && nce->nce_state == ND_REACHABLE)); 3605 /* 3606 * Atomically ensure that the ill is not CONDEMNED, before 3607 * adding the NCE. 3608 */ 3609 mutex_enter(&ill->ill_lock); 3610 if (ill->ill_state_flags & ILL_CONDEMNED) { 3611 mutex_exit(&ill->ill_lock); 3612 err = EINVAL; 3613 goto err_ret; 3614 } 3615 if ((nce->nce_next = *ncep) != NULL) 3616 nce->nce_next->nce_ptpn = &nce->nce_next; 3617 *ncep = nce; 3618 nce->nce_ptpn = ncep; 3619 *newnce = nce; 3620 /* This one is for nce being used by an active thread */ 3621 NCE_REFHOLD(*newnce); 3622 3623 /* Bump up the number of nce's referencing this ill */ 3624 ill->ill_nce_cnt++; 3625 mutex_exit(&ill->ill_lock); 3626 DTRACE_PROBE1(ndp__add__v4, nce_t *, nce); 3627 return (0); 3628 err_ret: 3629 freeb(mp); 3630 freemsg(template); 3631 return (err); 3632 } 3633 3634 void 3635 ndp_flush_qd_mp(nce_t *nce) 3636 { 3637 mblk_t *qd_mp, *qd_next; 3638 3639 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3640 qd_mp = nce->nce_qd_mp; 3641 nce->nce_qd_mp = NULL; 3642 while (qd_mp != NULL) { 3643 qd_next = qd_mp->b_next; 3644 qd_mp->b_next = NULL; 3645 qd_mp->b_prev = NULL; 3646 freemsg(qd_mp); 3647 qd_mp = qd_next; 3648 } 3649 } 3650 3651 3652 /* 3653 * ndp_walk routine to delete all entries that have a given destination or 3654 * gateway address and cached link layer (MAC) address. This is used when ARP 3655 * informs us that a network-to-link-layer mapping may have changed. 3656 */ 3657 void 3658 nce_delete_hw_changed(nce_t *nce, void *arg) 3659 { 3660 nce_hw_map_t *hwm = arg; 3661 mblk_t *mp; 3662 dl_unitdata_req_t *dlu; 3663 uchar_t *macaddr; 3664 ill_t *ill; 3665 int saplen; 3666 ipaddr_t nce_addr; 3667 3668 if (nce->nce_state != ND_REACHABLE) 3669 return; 3670 3671 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3672 if (nce_addr != hwm->hwm_addr) 3673 return; 3674 3675 mutex_enter(&nce->nce_lock); 3676 if ((mp = nce->nce_res_mp) == NULL) { 3677 mutex_exit(&nce->nce_lock); 3678 return; 3679 } 3680 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3681 macaddr = (uchar_t *)(dlu + 1); 3682 ill = nce->nce_ill; 3683 if ((saplen = ill->ill_sap_length) > 0) 3684 macaddr += saplen; 3685 else 3686 saplen = -saplen; 3687 3688 /* 3689 * If the hardware address is unchanged, then leave this one alone. 3690 * Note that saplen == abs(saplen) now. 3691 */ 3692 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 3693 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 3694 mutex_exit(&nce->nce_lock); 3695 return; 3696 } 3697 mutex_exit(&nce->nce_lock); 3698 3699 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 3700 ndp_delete(nce); 3701 } 3702 3703 /* 3704 * This function verifies whether a given IPv4 address is potentially known to 3705 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 3706 * so that it can continue to look for hardware changes on that address. 3707 */ 3708 boolean_t 3709 ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns) 3710 { 3711 nce_t *nce; 3712 struct in_addr nceaddr; 3713 ip_stack_t *ipst = ns->netstack_ip; 3714 3715 if (addr == INADDR_ANY) 3716 return (B_FALSE); 3717 3718 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3719 nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr); 3720 for (; nce != NULL; nce = nce->nce_next) { 3721 /* Note that only v4 mapped entries are in the table. */ 3722 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 3723 if (addr == nceaddr.s_addr && 3724 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 3725 /* Single flag check; no lock needed */ 3726 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 3727 break; 3728 } 3729 } 3730 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3731 return (nce != NULL); 3732 } 3733