1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/sysmacros.h> 33 #include <sys/errno.h> 34 #include <sys/dlpi.h> 35 #include <sys/socket.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/vtrace.h> 41 #include <sys/kmem.h> 42 #include <sys/zone.h> 43 #include <sys/ethernet.h> 44 #include <sys/sdt.h> 45 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/if_dl.h> 49 #include <net/route.h> 50 #include <netinet/in.h> 51 #include <netinet/ip6.h> 52 #include <netinet/icmp6.h> 53 54 #include <inet/common.h> 55 #include <inet/mi.h> 56 #include <inet/mib2.h> 57 #include <inet/nd.h> 58 #include <inet/ip.h> 59 #include <inet/ip_impl.h> 60 #include <inet/ipclassifier.h> 61 #include <inet/ip_if.h> 62 #include <inet/ip_ire.h> 63 #include <inet/ip_rts.h> 64 #include <inet/ip6.h> 65 #include <inet/ip_ndp.h> 66 #include <inet/ipsec_impl.h> 67 #include <inet/ipsec_info.h> 68 #include <inet/sctp_ip.h> 69 70 /* 71 * Function names with nce_ prefix are static while function 72 * names with ndp_ prefix are used by rest of the IP. 73 * 74 * Lock ordering: 75 * 76 * ndp_g_lock -> ill_lock -> nce_lock 77 * 78 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 79 * nce_next. Nce_lock protects the contents of the NCE (particularly 80 * nce_refcnt). 81 */ 82 83 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 84 uint32_t ll_addr_len); 85 static void nce_ire_delete(nce_t *nce); 86 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 87 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 88 static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *, nce_t *); 89 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 90 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 91 uchar_t *addr); 92 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 93 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 94 static mblk_t *nce_udreq_alloc(ill_t *ill); 95 static void nce_update(nce_t *nce, uint16_t new_state, 96 uchar_t *new_ll_addr); 97 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 98 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 99 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 100 const in6_addr_t *target, int flag); 101 static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t, 102 nce_t **, nce_t *); 103 104 #ifdef DEBUG 105 static void nce_trace_cleanup(const nce_t *); 106 #endif 107 108 #define NCE_HASH_PTR_V4(ipst, addr) \ 109 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 110 111 #define NCE_HASH_PTR_V6(ipst, addr) \ 112 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ 113 NCE_TABLE_SIZE)])) 114 115 /* 116 * Compute default flags to use for an advertisement of this nce's address. 117 */ 118 static int 119 nce_advert_flags(const nce_t *nce) 120 { 121 int flag = 0; 122 123 if (nce->nce_flags & NCE_F_ISROUTER) 124 flag |= NDP_ISROUTER; 125 if (!(nce->nce_flags & NCE_F_ANYCAST)) 126 flag |= NDP_ORIDE; 127 128 return (flag); 129 } 130 131 /* Non-tunable probe interval, based on link capabilities */ 132 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 133 134 /* 135 * NDP Cache Entry creation routine. 136 * Mapped entries will never do NUD . 137 * This routine must always be called with ndp6->ndp_g_lock held. 138 * Prior to return, nce_refcnt is incremented. 139 */ 140 int 141 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 142 const in6_addr_t *mask, const in6_addr_t *extract_mask, 143 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 144 nce_t **newnce) 145 { 146 static nce_t nce_nil; 147 nce_t *nce; 148 mblk_t *mp; 149 mblk_t *template; 150 nce_t **ncep; 151 int err; 152 boolean_t dropped = B_FALSE; 153 ip_stack_t *ipst = ill->ill_ipst; 154 155 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 156 ASSERT(ill != NULL && ill->ill_isv6); 157 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 158 ip0dbg(("ndp_add_v6: no addr\n")); 159 return (EINVAL); 160 } 161 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 162 ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags)); 163 return (EINVAL); 164 } 165 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 166 (flags & NCE_F_MAPPING)) { 167 ip0dbg(("ndp_add_v6: extract mask zero for mapping")); 168 return (EINVAL); 169 } 170 /* 171 * Allocate the mblk to hold the nce. 172 * 173 * XXX This can come out of a separate cache - nce_cache. 174 * We don't need the mp anymore as there are no more 175 * "qwriter"s 176 */ 177 mp = allocb(sizeof (nce_t), BPRI_MED); 178 if (mp == NULL) 179 return (ENOMEM); 180 181 nce = (nce_t *)mp->b_rptr; 182 mp->b_wptr = (uchar_t *)&nce[1]; 183 *nce = nce_nil; 184 185 /* 186 * This one holds link layer address 187 */ 188 if (ill->ill_net_type == IRE_IF_RESOLVER) { 189 template = nce_udreq_alloc(ill); 190 } else { 191 if (ill->ill_resolver_mp == NULL) { 192 freeb(mp); 193 return (EINVAL); 194 } 195 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 196 template = copyb(ill->ill_resolver_mp); 197 } 198 if (template == NULL) { 199 freeb(mp); 200 return (ENOMEM); 201 } 202 nce->nce_ill = ill; 203 nce->nce_ipversion = IPV6_VERSION; 204 nce->nce_flags = flags; 205 nce->nce_state = state; 206 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 207 nce->nce_rcnt = ill->ill_xmit_count; 208 nce->nce_addr = *addr; 209 nce->nce_mask = *mask; 210 nce->nce_extract_mask = *extract_mask; 211 nce->nce_ll_extract_start = hw_extract_start; 212 nce->nce_fp_mp = NULL; 213 nce->nce_res_mp = template; 214 if (state == ND_REACHABLE) 215 nce->nce_last = TICK_TO_MSEC(lbolt64); 216 else 217 nce->nce_last = 0; 218 nce->nce_qd_mp = NULL; 219 nce->nce_mp = mp; 220 if (hw_addr != NULL) 221 nce_set_ll(nce, hw_addr); 222 /* This one is for nce getting created */ 223 nce->nce_refcnt = 1; 224 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 225 if (nce->nce_flags & NCE_F_MAPPING) { 226 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 227 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 228 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 229 ncep = &ipst->ips_ndp6->nce_mask_entries; 230 } else { 231 ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 232 } 233 234 nce->nce_trace_disable = B_FALSE; 235 236 /* 237 * Atomically ensure that the ill is not CONDEMNED, before 238 * adding the NCE. 239 */ 240 mutex_enter(&ill->ill_lock); 241 if (ill->ill_state_flags & ILL_CONDEMNED) { 242 mutex_exit(&ill->ill_lock); 243 freeb(mp); 244 freeb(template); 245 return (EINVAL); 246 } 247 if ((nce->nce_next = *ncep) != NULL) 248 nce->nce_next->nce_ptpn = &nce->nce_next; 249 *ncep = nce; 250 nce->nce_ptpn = ncep; 251 *newnce = nce; 252 /* This one is for nce being used by an active thread */ 253 NCE_REFHOLD(*newnce); 254 255 /* Bump up the number of nce's referencing this ill */ 256 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 257 (char *), "nce", (void *), nce); 258 ill->ill_cnt_nce++; 259 mutex_exit(&ill->ill_lock); 260 261 err = 0; 262 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 263 mutex_enter(&nce->nce_lock); 264 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 265 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 266 mutex_exit(&nce->nce_lock); 267 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 268 &ipv6_all_zeros, addr, NDP_PROBE); 269 if (dropped) { 270 mutex_enter(&nce->nce_lock); 271 nce->nce_pcnt++; 272 mutex_exit(&nce->nce_lock); 273 } 274 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 275 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 276 err = EINPROGRESS; 277 } else if (flags & NCE_F_UNSOL_ADV) { 278 /* 279 * We account for the transmit below by assigning one 280 * less than the ndd variable. Subsequent decrements 281 * are done in ndp_timer. 282 */ 283 mutex_enter(&nce->nce_lock); 284 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 285 nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1; 286 mutex_exit(&nce->nce_lock); 287 dropped = nce_xmit(ill, 288 ND_NEIGHBOR_ADVERT, 289 ill, /* ill to be used for extracting ill_nd_lla */ 290 B_TRUE, /* use ill_nd_lla */ 291 addr, /* Source and target of the advertisement pkt */ 292 &ipv6_all_hosts_mcast, /* Destination of the packet */ 293 nce_advert_flags(nce)); 294 mutex_enter(&nce->nce_lock); 295 if (dropped) 296 nce->nce_unsolicit_count++; 297 if (nce->nce_unsolicit_count != 0) { 298 nce->nce_timeout_id = timeout(ndp_timer, nce, 299 MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval)); 300 } 301 mutex_exit(&nce->nce_lock); 302 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 303 } 304 /* 305 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 306 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 307 * We call nce_fastpath from nce_update if the link layer address of 308 * the peer changes from nce_update 309 */ 310 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 311 nce_fastpath(nce); 312 return (err); 313 } 314 315 int 316 ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 317 const in6_addr_t *mask, const in6_addr_t *extract_mask, 318 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 319 nce_t **newnce) 320 { 321 int err = 0; 322 nce_t *nce; 323 ip_stack_t *ipst = ill->ill_ipst; 324 325 ASSERT(ill->ill_isv6); 326 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 327 328 /* Get head of v6 hash table */ 329 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 330 nce = nce_lookup_addr(ill, addr, nce); 331 if (nce == NULL) { 332 err = ndp_add_v6(ill, 333 hw_addr, 334 addr, 335 mask, 336 extract_mask, 337 hw_extract_start, 338 flags, 339 state, 340 newnce); 341 } else { 342 *newnce = nce; 343 err = EEXIST; 344 } 345 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 346 return (err); 347 } 348 349 /* 350 * Remove all the CONDEMNED nces from the appropriate hash table. 351 * We create a private list of NCEs, these may have ires pointing 352 * to them, so the list will be passed through to clean up dependent 353 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 354 */ 355 static void 356 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 357 { 358 nce_t *nce1; 359 nce_t **ptpn; 360 361 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 362 ASSERT(ndp->ndp_g_walker == 0); 363 for (; nce; nce = nce1) { 364 nce1 = nce->nce_next; 365 mutex_enter(&nce->nce_lock); 366 if (nce->nce_flags & NCE_F_CONDEMNED) { 367 ptpn = nce->nce_ptpn; 368 nce1 = nce->nce_next; 369 if (nce1 != NULL) 370 nce1->nce_ptpn = ptpn; 371 *ptpn = nce1; 372 nce->nce_ptpn = NULL; 373 nce->nce_next = NULL; 374 nce->nce_next = *free_nce_list; 375 *free_nce_list = nce; 376 } 377 mutex_exit(&nce->nce_lock); 378 } 379 } 380 381 /* 382 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 383 * will return this NCE. Also no new IREs will be created that 384 * point to this NCE (See ire_add_v6). Also no new timeouts will 385 * be started (See NDP_RESTART_TIMER). 386 * 2. Cancel any currently running timeouts. 387 * 3. If there is an ndp walker, return. The walker will do the cleanup. 388 * This ensures that walkers see a consistent list of NCEs while walking. 389 * 4. Otherwise remove the NCE from the list of NCEs 390 * 5. Delete all IREs pointing to this NCE. 391 */ 392 void 393 ndp_delete(nce_t *nce) 394 { 395 nce_t **ptpn; 396 nce_t *nce1; 397 int ipversion = nce->nce_ipversion; 398 ndp_g_t *ndp; 399 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 400 401 if (ipversion == IPV4_VERSION) 402 ndp = ipst->ips_ndp4; 403 else 404 ndp = ipst->ips_ndp6; 405 406 /* Serialize deletes */ 407 mutex_enter(&nce->nce_lock); 408 if (nce->nce_flags & NCE_F_CONDEMNED) { 409 /* Some other thread is doing the delete */ 410 mutex_exit(&nce->nce_lock); 411 return; 412 } 413 /* 414 * Caller has a refhold. Also 1 ref for being in the list. Thus 415 * refcnt has to be >= 2 416 */ 417 ASSERT(nce->nce_refcnt >= 2); 418 nce->nce_flags |= NCE_F_CONDEMNED; 419 mutex_exit(&nce->nce_lock); 420 421 nce_fastpath_list_delete(nce); 422 423 /* 424 * Cancel any running timer. Timeout can't be restarted 425 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 426 * Passing invalid timeout id is fine. 427 */ 428 if (nce->nce_timeout_id != 0) { 429 (void) untimeout(nce->nce_timeout_id); 430 nce->nce_timeout_id = 0; 431 } 432 433 mutex_enter(&ndp->ndp_g_lock); 434 if (nce->nce_ptpn == NULL) { 435 /* 436 * The last ndp walker has already removed this nce from 437 * the list after we marked the nce CONDEMNED and before 438 * we grabbed the global lock. 439 */ 440 mutex_exit(&ndp->ndp_g_lock); 441 return; 442 } 443 if (ndp->ndp_g_walker > 0) { 444 /* 445 * Can't unlink. The walker will clean up 446 */ 447 ndp->ndp_g_walker_cleanup = B_TRUE; 448 mutex_exit(&ndp->ndp_g_lock); 449 return; 450 } 451 452 /* 453 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 454 * the timer since it is marked CONDEMNED. 455 */ 456 ptpn = nce->nce_ptpn; 457 nce1 = nce->nce_next; 458 if (nce1 != NULL) 459 nce1->nce_ptpn = ptpn; 460 *ptpn = nce1; 461 nce->nce_ptpn = NULL; 462 nce->nce_next = NULL; 463 mutex_exit(&ndp->ndp_g_lock); 464 465 nce_ire_delete(nce); 466 } 467 468 void 469 ndp_inactive(nce_t *nce) 470 { 471 mblk_t **mpp; 472 ill_t *ill; 473 474 ASSERT(nce->nce_refcnt == 0); 475 ASSERT(MUTEX_HELD(&nce->nce_lock)); 476 ASSERT(nce->nce_fastpath == NULL); 477 478 /* Free all nce allocated messages */ 479 mpp = &nce->nce_first_mp_to_free; 480 do { 481 while (*mpp != NULL) { 482 mblk_t *mp; 483 484 mp = *mpp; 485 *mpp = mp->b_next; 486 487 inet_freemsg(mp); 488 } 489 } while (mpp++ != &nce->nce_last_mp_to_free); 490 491 #ifdef DEBUG 492 nce_trace_cleanup(nce); 493 #endif 494 495 ill = nce->nce_ill; 496 mutex_enter(&ill->ill_lock); 497 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 498 (char *), "nce", (void *), nce); 499 ill->ill_cnt_nce--; 500 /* 501 * If the number of nce's associated with this ill have dropped 502 * to zero, check whether we need to restart any operation that 503 * is waiting for this to happen. 504 */ 505 if (ILL_DOWN_OK(ill)) { 506 /* ipif_ill_refrele_tail drops the ill_lock */ 507 ipif_ill_refrele_tail(ill); 508 } else { 509 mutex_exit(&ill->ill_lock); 510 } 511 mutex_destroy(&nce->nce_lock); 512 if (nce->nce_mp != NULL) 513 inet_freemsg(nce->nce_mp); 514 } 515 516 /* 517 * ndp_walk routine. Delete the nce if it is associated with the ill 518 * that is going away. Always called as a writer. 519 */ 520 void 521 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 522 { 523 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 524 ndp_delete(nce); 525 } 526 } 527 528 /* 529 * Walk a list of to be inactive NCEs and blow away all the ires. 530 */ 531 static void 532 nce_ire_delete_list(nce_t *nce) 533 { 534 nce_t *nce_next; 535 536 ASSERT(nce != NULL); 537 while (nce != NULL) { 538 nce_next = nce->nce_next; 539 nce->nce_next = NULL; 540 541 /* 542 * It is possible for the last ndp walker (this thread) 543 * to come here after ndp_delete has marked the nce CONDEMNED 544 * and before it has removed the nce from the fastpath list 545 * or called untimeout. So we need to do it here. It is safe 546 * for both ndp_delete and this thread to do it twice or 547 * even simultaneously since each of the threads has a 548 * reference on the nce. 549 */ 550 nce_fastpath_list_delete(nce); 551 /* 552 * Cancel any running timer. Timeout can't be restarted 553 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 554 * Passing invalid timeout id is fine. 555 */ 556 if (nce->nce_timeout_id != 0) { 557 (void) untimeout(nce->nce_timeout_id); 558 nce->nce_timeout_id = 0; 559 } 560 /* 561 * We might hit this func thus in the v4 case: 562 * ipif_down->ipif_ndp_down->ndp_walk 563 */ 564 565 if (nce->nce_ipversion == IPV4_VERSION) { 566 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 567 IRE_CACHE, nce_ire_delete1, 568 (char *)nce, nce->nce_ill); 569 } else { 570 ASSERT(nce->nce_ipversion == IPV6_VERSION); 571 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 572 IRE_CACHE, nce_ire_delete1, 573 (char *)nce, nce->nce_ill); 574 } 575 NCE_REFRELE_NOTR(nce); 576 nce = nce_next; 577 } 578 } 579 580 /* 581 * Delete an ire when the nce goes away. 582 */ 583 /* ARGSUSED */ 584 static void 585 nce_ire_delete(nce_t *nce) 586 { 587 if (nce->nce_ipversion == IPV6_VERSION) { 588 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 589 nce_ire_delete1, (char *)nce, nce->nce_ill); 590 NCE_REFRELE_NOTR(nce); 591 } else { 592 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 593 nce_ire_delete1, (char *)nce, nce->nce_ill); 594 NCE_REFRELE_NOTR(nce); 595 } 596 } 597 598 /* 599 * ire_walk routine used to delete every IRE that shares this nce 600 */ 601 static void 602 nce_ire_delete1(ire_t *ire, char *nce_arg) 603 { 604 nce_t *nce = (nce_t *)nce_arg; 605 606 ASSERT(ire->ire_type == IRE_CACHE); 607 608 if (ire->ire_nce == nce) { 609 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 610 ire_delete(ire); 611 } 612 } 613 614 /* 615 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 616 */ 617 boolean_t 618 ndp_restart_dad(nce_t *nce) 619 { 620 boolean_t started; 621 boolean_t dropped; 622 623 if (nce == NULL) 624 return (B_FALSE); 625 mutex_enter(&nce->nce_lock); 626 if (nce->nce_state == ND_PROBE) { 627 mutex_exit(&nce->nce_lock); 628 started = B_TRUE; 629 } else if (nce->nce_state == ND_REACHABLE) { 630 nce->nce_state = ND_PROBE; 631 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 632 mutex_exit(&nce->nce_lock); 633 dropped = nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, NULL, 634 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, NDP_PROBE); 635 if (dropped) { 636 mutex_enter(&nce->nce_lock); 637 nce->nce_pcnt++; 638 mutex_exit(&nce->nce_lock); 639 } 640 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 641 started = B_TRUE; 642 } else { 643 mutex_exit(&nce->nce_lock); 644 started = B_FALSE; 645 } 646 return (started); 647 } 648 649 /* 650 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 651 * If one is found, the refcnt on the nce will be incremented. 652 */ 653 nce_t * 654 ndp_lookup_v6(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 655 { 656 nce_t *nce; 657 ip_stack_t *ipst; 658 659 ASSERT(ill != NULL); 660 ipst = ill->ill_ipst; 661 662 ASSERT(ill != NULL && ill->ill_isv6); 663 if (!caller_holds_lock) { 664 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 665 } 666 667 /* Get head of v6 hash table */ 668 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 669 nce = nce_lookup_addr(ill, addr, nce); 670 if (nce == NULL) 671 nce = nce_lookup_mapping(ill, addr); 672 if (!caller_holds_lock) 673 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 674 return (nce); 675 } 676 /* 677 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 678 * If one is found, the refcnt on the nce will be incremented. 679 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 680 * so we skip the nce_lookup_mapping call. 681 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 682 */ 683 nce_t * 684 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 685 { 686 nce_t *nce; 687 in6_addr_t addr6; 688 ip_stack_t *ipst = ill->ill_ipst; 689 690 if (!caller_holds_lock) { 691 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 692 } 693 694 /* Get head of v4 hash table */ 695 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 696 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 697 nce = nce_lookup_addr(ill, &addr6, nce); 698 if (!caller_holds_lock) 699 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 700 return (nce); 701 } 702 703 /* 704 * Cache entry lookup. Try to find an nce matching the parameters passed. 705 * Look only for exact entries (no mappings). If an nce is found, increment 706 * the hold count on that nce. The caller passes in the start of the 707 * appropriate hash table, and must be holding the appropriate global 708 * lock (ndp_g_lock). 709 */ 710 static nce_t * 711 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr, nce_t *nce) 712 { 713 ndp_g_t *ndp; 714 ip_stack_t *ipst = ill->ill_ipst; 715 716 if (ill->ill_isv6) 717 ndp = ipst->ips_ndp6; 718 else 719 ndp = ipst->ips_ndp4; 720 721 ASSERT(ill != NULL); 722 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 723 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 724 return (NULL); 725 for (; nce != NULL; nce = nce->nce_next) { 726 if (nce->nce_ill == ill) { 727 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 728 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 729 &ipv6_all_ones)) { 730 mutex_enter(&nce->nce_lock); 731 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 732 NCE_REFHOLD_LOCKED(nce); 733 mutex_exit(&nce->nce_lock); 734 break; 735 } 736 mutex_exit(&nce->nce_lock); 737 } 738 } 739 } 740 return (nce); 741 } 742 743 /* 744 * Cache entry lookup. Try to find an nce matching the parameters passed. 745 * Look only for mappings. 746 */ 747 static nce_t * 748 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 749 { 750 nce_t *nce; 751 ip_stack_t *ipst = ill->ill_ipst; 752 753 ASSERT(ill != NULL && ill->ill_isv6); 754 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 755 if (!IN6_IS_ADDR_MULTICAST(addr)) 756 return (NULL); 757 nce = ipst->ips_ndp6->nce_mask_entries; 758 for (; nce != NULL; nce = nce->nce_next) 759 if (nce->nce_ill == ill && 760 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 761 mutex_enter(&nce->nce_lock); 762 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 763 NCE_REFHOLD_LOCKED(nce); 764 mutex_exit(&nce->nce_lock); 765 break; 766 } 767 mutex_exit(&nce->nce_lock); 768 } 769 return (nce); 770 } 771 772 /* 773 * Process passed in parameters either from an incoming packet or via 774 * user ioctl. 775 */ 776 void 777 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 778 { 779 ill_t *ill = nce->nce_ill; 780 uint32_t hw_addr_len = ill->ill_nd_lla_len; 781 mblk_t *mp; 782 boolean_t ll_updated = B_FALSE; 783 boolean_t ll_changed; 784 ip_stack_t *ipst = ill->ill_ipst; 785 786 ASSERT(nce->nce_ipversion == IPV6_VERSION); 787 /* 788 * No updates of link layer address or the neighbor state is 789 * allowed, when the cache is in NONUD state. This still 790 * allows for responding to reachability solicitation. 791 */ 792 mutex_enter(&nce->nce_lock); 793 if (nce->nce_state == ND_INCOMPLETE) { 794 if (hw_addr == NULL) { 795 mutex_exit(&nce->nce_lock); 796 return; 797 } 798 nce_set_ll(nce, hw_addr); 799 /* 800 * Update nce state and send the queued packets 801 * back to ip this time ire will be added. 802 */ 803 if (flag & ND_NA_FLAG_SOLICITED) { 804 nce_update(nce, ND_REACHABLE, NULL); 805 } else { 806 nce_update(nce, ND_STALE, NULL); 807 } 808 mutex_exit(&nce->nce_lock); 809 nce_fastpath(nce); 810 mutex_enter(&nce->nce_lock); 811 mp = nce->nce_qd_mp; 812 nce->nce_qd_mp = NULL; 813 mutex_exit(&nce->nce_lock); 814 while (mp != NULL) { 815 mblk_t *nxt_mp, *data_mp; 816 817 nxt_mp = mp->b_next; 818 mp->b_next = NULL; 819 820 if (mp->b_datap->db_type == M_CTL) 821 data_mp = mp->b_cont; 822 else 823 data_mp = mp; 824 if (data_mp->b_prev != NULL) { 825 ill_t *inbound_ill; 826 queue_t *fwdq = NULL; 827 uint_t ifindex; 828 829 ifindex = (uint_t)(uintptr_t)data_mp->b_prev; 830 inbound_ill = ill_lookup_on_ifindex(ifindex, 831 B_TRUE, NULL, NULL, NULL, NULL, ipst); 832 if (inbound_ill == NULL) { 833 data_mp->b_prev = NULL; 834 freemsg(mp); 835 return; 836 } else { 837 fwdq = inbound_ill->ill_rq; 838 } 839 data_mp->b_prev = NULL; 840 /* 841 * Send a forwarded packet back into ip_rput_v6 842 * just as in ire_send_v6(). 843 * Extract the queue from b_prev (set in 844 * ip_rput_data_v6). 845 */ 846 if (fwdq != NULL) { 847 /* 848 * Forwarded packets hop count will 849 * get decremented in ip_rput_data_v6 850 */ 851 if (data_mp != mp) 852 freeb(mp); 853 put(fwdq, data_mp); 854 } else { 855 /* 856 * Send locally originated packets back 857 * into * ip_wput_v6. 858 */ 859 put(ill->ill_wq, mp); 860 } 861 ill_refrele(inbound_ill); 862 } else { 863 put(ill->ill_wq, mp); 864 } 865 mp = nxt_mp; 866 } 867 return; 868 } 869 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 870 if (!is_adv) { 871 /* If this is a SOLICITATION request only */ 872 if (ll_changed) 873 nce_update(nce, ND_STALE, hw_addr); 874 mutex_exit(&nce->nce_lock); 875 return; 876 } 877 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 878 /* If in any other state than REACHABLE, ignore */ 879 if (nce->nce_state == ND_REACHABLE) { 880 nce_update(nce, ND_STALE, NULL); 881 } 882 mutex_exit(&nce->nce_lock); 883 return; 884 } else { 885 if (ll_changed) { 886 nce_update(nce, ND_UNCHANGED, hw_addr); 887 ll_updated = B_TRUE; 888 } 889 if (flag & ND_NA_FLAG_SOLICITED) { 890 nce_update(nce, ND_REACHABLE, NULL); 891 } else { 892 if (ll_updated) { 893 nce_update(nce, ND_STALE, NULL); 894 } 895 } 896 mutex_exit(&nce->nce_lock); 897 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 898 NCE_F_ISROUTER)) { 899 ire_t *ire; 900 901 /* 902 * Router turned to host. We need to remove the 903 * entry as well as any default route that may be 904 * using this as a next hop. This is required by 905 * section 7.2.5 of RFC 2461. 906 */ 907 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 908 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 909 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 910 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 911 MATCH_IRE_DEFAULT, ipst); 912 if (ire != NULL) { 913 ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); 914 ire_delete(ire); 915 ire_refrele(ire); 916 } 917 ndp_delete(nce); 918 } 919 } 920 } 921 922 /* 923 * Pass arg1 to the pfi supplied, along with each nce in existence. 924 * ndp_walk() places a REFHOLD on the nce and drops the lock when 925 * walking the hash list. 926 */ 927 void 928 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 929 boolean_t trace) 930 { 931 932 nce_t *nce; 933 nce_t *nce1; 934 nce_t **ncep; 935 nce_t *free_nce_list = NULL; 936 937 mutex_enter(&ndp->ndp_g_lock); 938 /* Prevent ndp_delete from unlink and free of NCE */ 939 ndp->ndp_g_walker++; 940 mutex_exit(&ndp->ndp_g_lock); 941 for (ncep = ndp->nce_hash_tbl; 942 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 943 for (nce = *ncep; nce != NULL; nce = nce1) { 944 nce1 = nce->nce_next; 945 if (ill == NULL || nce->nce_ill == ill) { 946 if (trace) { 947 NCE_REFHOLD(nce); 948 (*pfi)(nce, arg1); 949 NCE_REFRELE(nce); 950 } else { 951 NCE_REFHOLD_NOTR(nce); 952 (*pfi)(nce, arg1); 953 NCE_REFRELE_NOTR(nce); 954 } 955 } 956 } 957 } 958 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 959 nce1 = nce->nce_next; 960 if (ill == NULL || nce->nce_ill == ill) { 961 if (trace) { 962 NCE_REFHOLD(nce); 963 (*pfi)(nce, arg1); 964 NCE_REFRELE(nce); 965 } else { 966 NCE_REFHOLD_NOTR(nce); 967 (*pfi)(nce, arg1); 968 NCE_REFRELE_NOTR(nce); 969 } 970 } 971 } 972 mutex_enter(&ndp->ndp_g_lock); 973 ndp->ndp_g_walker--; 974 /* 975 * While NCE's are removed from global list they are placed 976 * in a private list, to be passed to nce_ire_delete_list(). 977 * The reason is, there may be ires pointing to this nce 978 * which needs to cleaned up. 979 */ 980 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 981 /* Time to delete condemned entries */ 982 for (ncep = ndp->nce_hash_tbl; 983 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 984 nce = *ncep; 985 if (nce != NULL) { 986 nce_remove(ndp, nce, &free_nce_list); 987 } 988 } 989 nce = ndp->nce_mask_entries; 990 if (nce != NULL) { 991 nce_remove(ndp, nce, &free_nce_list); 992 } 993 ndp->ndp_g_walker_cleanup = B_FALSE; 994 } 995 996 mutex_exit(&ndp->ndp_g_lock); 997 998 if (free_nce_list != NULL) { 999 nce_ire_delete_list(free_nce_list); 1000 } 1001 } 1002 1003 /* 1004 * Walk everything. 1005 * Note that ill can be NULL hence can't derive the ipst from it. 1006 */ 1007 void 1008 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) 1009 { 1010 ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); 1011 ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); 1012 } 1013 1014 /* 1015 * Process resolve requests. Handles both mapped entries 1016 * as well as cases that needs to be send out on the wire. 1017 * Lookup a NCE for a given IRE. Regardless of whether one exists 1018 * or one is created, we defer making ire point to nce until the 1019 * ire is actually added at which point the nce_refcnt on the nce is 1020 * incremented. This is done primarily to have symmetry between ire_add() 1021 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1022 */ 1023 int 1024 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1025 { 1026 nce_t *nce; 1027 int err = 0; 1028 uint32_t ms; 1029 mblk_t *mp_nce = NULL; 1030 ip_stack_t *ipst = ill->ill_ipst; 1031 1032 ASSERT(ill->ill_isv6); 1033 if (IN6_IS_ADDR_MULTICAST(dst)) { 1034 err = nce_set_multicast(ill, dst); 1035 return (err); 1036 } 1037 err = ndp_lookup_then_add_v6(ill, 1038 NULL, /* No hardware address */ 1039 dst, 1040 &ipv6_all_ones, 1041 &ipv6_all_zeros, 1042 0, 1043 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1044 ND_INCOMPLETE, 1045 &nce); 1046 1047 switch (err) { 1048 case 0: 1049 /* 1050 * New cache entry was created. Make sure that the state 1051 * is not ND_INCOMPLETE. It can be in some other state 1052 * even before we send out the solicitation as we could 1053 * get un-solicited advertisements. 1054 * 1055 * If this is an XRESOLV interface, simply return 0, 1056 * since we don't want to solicit just yet. 1057 */ 1058 if (ill->ill_flags & ILLF_XRESOLV) { 1059 NCE_REFRELE(nce); 1060 return (0); 1061 } 1062 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1063 mutex_enter(&nce->nce_lock); 1064 if (nce->nce_state != ND_INCOMPLETE) { 1065 mutex_exit(&nce->nce_lock); 1066 rw_exit(&ipst->ips_ill_g_lock); 1067 NCE_REFRELE(nce); 1068 return (0); 1069 } 1070 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1071 if (mp_nce == NULL) { 1072 /* The caller will free mp */ 1073 mutex_exit(&nce->nce_lock); 1074 rw_exit(&ipst->ips_ill_g_lock); 1075 ndp_delete(nce); 1076 NCE_REFRELE(nce); 1077 return (ENOMEM); 1078 } 1079 ms = nce_solicit(nce, mp_nce); 1080 rw_exit(&ipst->ips_ill_g_lock); 1081 if (ms == 0) { 1082 /* The caller will free mp */ 1083 if (mp_nce != mp) 1084 freeb(mp_nce); 1085 mutex_exit(&nce->nce_lock); 1086 ndp_delete(nce); 1087 NCE_REFRELE(nce); 1088 return (EBUSY); 1089 } 1090 mutex_exit(&nce->nce_lock); 1091 NDP_RESTART_TIMER(nce, (clock_t)ms); 1092 NCE_REFRELE(nce); 1093 return (EINPROGRESS); 1094 case EEXIST: 1095 /* Resolution in progress just queue the packet */ 1096 mutex_enter(&nce->nce_lock); 1097 if (nce->nce_state == ND_INCOMPLETE) { 1098 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1099 if (mp_nce == NULL) { 1100 err = ENOMEM; 1101 } else { 1102 nce_queue_mp(nce, mp_nce); 1103 err = EINPROGRESS; 1104 } 1105 } else { 1106 /* 1107 * Any other state implies we have 1108 * a nce but IRE needs to be added ... 1109 * ire_add_v6() will take care of the 1110 * the case when the nce becomes CONDEMNED 1111 * before the ire is added to the table. 1112 */ 1113 err = 0; 1114 } 1115 mutex_exit(&nce->nce_lock); 1116 NCE_REFRELE(nce); 1117 break; 1118 default: 1119 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1120 break; 1121 } 1122 return (err); 1123 } 1124 1125 /* 1126 * When there is no resolver, the link layer template is passed in 1127 * the IRE. 1128 * Lookup a NCE for a given IRE. Regardless of whether one exists 1129 * or one is created, we defer making ire point to nce until the 1130 * ire is actually added at which point the nce_refcnt on the nce is 1131 * incremented. This is done primarily to have symmetry between ire_add() 1132 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1133 */ 1134 int 1135 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1136 { 1137 nce_t *nce; 1138 int err = 0; 1139 1140 ASSERT(ill != NULL); 1141 ASSERT(ill->ill_isv6); 1142 if (IN6_IS_ADDR_MULTICAST(dst)) { 1143 err = nce_set_multicast(ill, dst); 1144 return (err); 1145 } 1146 1147 err = ndp_lookup_then_add_v6(ill, 1148 NULL, /* hardware address */ 1149 dst, 1150 &ipv6_all_ones, 1151 &ipv6_all_zeros, 1152 0, 1153 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1154 ND_REACHABLE, 1155 &nce); 1156 1157 switch (err) { 1158 case 0: 1159 /* 1160 * Cache entry with a proper resolver cookie was 1161 * created. 1162 */ 1163 NCE_REFRELE(nce); 1164 break; 1165 case EEXIST: 1166 err = 0; 1167 NCE_REFRELE(nce); 1168 break; 1169 default: 1170 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1171 break; 1172 } 1173 return (err); 1174 } 1175 1176 /* 1177 * For each interface an entry is added for the unspecified multicast group. 1178 * Here that mapping is used to form the multicast cache entry for a particular 1179 * multicast destination. 1180 */ 1181 static int 1182 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1183 { 1184 nce_t *mnce; /* Multicast mapping entry */ 1185 nce_t *nce; 1186 uchar_t *hw_addr = NULL; 1187 int err = 0; 1188 ip_stack_t *ipst = ill->ill_ipst; 1189 1190 ASSERT(ill != NULL); 1191 ASSERT(ill->ill_isv6); 1192 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1193 1194 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1195 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst)); 1196 nce = nce_lookup_addr(ill, dst, nce); 1197 if (nce != NULL) { 1198 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1199 NCE_REFRELE(nce); 1200 return (0); 1201 } 1202 /* No entry, now lookup for a mapping this should never fail */ 1203 mnce = nce_lookup_mapping(ill, dst); 1204 if (mnce == NULL) { 1205 /* Something broken for the interface. */ 1206 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1207 return (ESRCH); 1208 } 1209 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1210 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1211 /* 1212 * For IRE_IF_RESOLVER a hardware mapping can be 1213 * generated, for IRE_IF_NORESOLVER, resolution cookie 1214 * in the ill is copied in ndp_add_v6(). 1215 */ 1216 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1217 if (hw_addr == NULL) { 1218 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1219 NCE_REFRELE(mnce); 1220 return (ENOMEM); 1221 } 1222 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1223 } 1224 NCE_REFRELE(mnce); 1225 /* 1226 * IRE_IF_NORESOLVER type simply copies the resolution 1227 * cookie passed in. So no hw_addr is needed. 1228 */ 1229 err = ndp_add_v6(ill, 1230 hw_addr, 1231 dst, 1232 &ipv6_all_ones, 1233 &ipv6_all_zeros, 1234 0, 1235 NCE_F_NONUD, 1236 ND_REACHABLE, 1237 &nce); 1238 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1239 if (hw_addr != NULL) 1240 kmem_free(hw_addr, ill->ill_nd_lla_len); 1241 if (err != 0) { 1242 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1243 return (err); 1244 } 1245 NCE_REFRELE(nce); 1246 return (0); 1247 } 1248 1249 /* 1250 * Return the link layer address, and any flags of a nce. 1251 */ 1252 int 1253 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1254 { 1255 nce_t *nce; 1256 in6_addr_t *addr; 1257 sin6_t *sin6; 1258 dl_unitdata_req_t *dl; 1259 1260 ASSERT(ill != NULL && ill->ill_isv6); 1261 sin6 = (sin6_t *)&lnr->lnr_addr; 1262 addr = &sin6->sin6_addr; 1263 1264 nce = ndp_lookup_v6(ill, addr, B_FALSE); 1265 if (nce == NULL) 1266 return (ESRCH); 1267 /* If in INCOMPLETE state, no link layer address is available yet */ 1268 if (nce->nce_state == ND_INCOMPLETE) 1269 goto done; 1270 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1271 if (ill->ill_flags & ILLF_XRESOLV) 1272 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1273 else 1274 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1275 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1276 sizeof (lnr->lnr_hdw_addr)); 1277 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1278 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1279 if (nce->nce_flags & NCE_F_ISROUTER) 1280 lnr->lnr_flags = NDF_ISROUTER_ON; 1281 if (nce->nce_flags & NCE_F_ANYCAST) 1282 lnr->lnr_flags |= NDF_ANYCAST_ON; 1283 done: 1284 NCE_REFRELE(nce); 1285 return (0); 1286 } 1287 1288 /* 1289 * Send Enable/Disable multicast reqs to driver. 1290 */ 1291 int 1292 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1293 uint32_t hw_addr_offset, mblk_t *mp) 1294 { 1295 nce_t *nce; 1296 uchar_t *hw_addr; 1297 ip_stack_t *ipst = ill->ill_ipst; 1298 1299 ASSERT(ill != NULL && ill->ill_isv6); 1300 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1301 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1302 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1303 freemsg(mp); 1304 return (EINVAL); 1305 } 1306 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1307 nce = nce_lookup_mapping(ill, addr); 1308 if (nce == NULL) { 1309 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1310 freemsg(mp); 1311 return (ESRCH); 1312 } 1313 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1314 /* 1315 * Update dl_addr_length and dl_addr_offset for primitives that 1316 * have physical addresses as opposed to full saps 1317 */ 1318 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1319 case DL_ENABMULTI_REQ: 1320 /* Track the state if this is the first enabmulti */ 1321 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1322 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1323 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1324 break; 1325 case DL_DISABMULTI_REQ: 1326 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1327 break; 1328 default: 1329 NCE_REFRELE(nce); 1330 ip1dbg(("ndp_mcastreq: default\n")); 1331 return (EINVAL); 1332 } 1333 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1334 NCE_REFRELE(nce); 1335 ill_dlpi_send(ill, mp); 1336 return (0); 1337 } 1338 1339 /* 1340 * Send a neighbor solicitation. 1341 * Returns number of milliseconds after which we should either rexmit or abort. 1342 * Return of zero means we should abort. 1343 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1344 * 1345 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1346 * the packet. 1347 * NOTE: This routine does not consume mp. 1348 */ 1349 uint32_t 1350 nce_solicit(nce_t *nce, mblk_t *mp) 1351 { 1352 ill_t *ill; 1353 ill_t *src_ill; 1354 ip6_t *ip6h; 1355 in6_addr_t src; 1356 in6_addr_t dst; 1357 ipif_t *ipif; 1358 ip6i_t *ip6i; 1359 boolean_t dropped = B_FALSE; 1360 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 1361 1362 ASSERT(RW_READ_HELD(&ipst->ips_ill_g_lock)); 1363 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1364 ill = nce->nce_ill; 1365 ASSERT(ill != NULL); 1366 1367 if (nce->nce_rcnt == 0) { 1368 return (0); 1369 } 1370 1371 if (mp == NULL) { 1372 ASSERT(nce->nce_qd_mp != NULL); 1373 mp = nce->nce_qd_mp; 1374 } else { 1375 nce_queue_mp(nce, mp); 1376 } 1377 1378 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1379 if (mp->b_datap->db_type == M_CTL) 1380 mp = mp->b_cont; 1381 1382 ip6h = (ip6_t *)mp->b_rptr; 1383 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1384 /* 1385 * This message should have been pulled up already in 1386 * ip_wput_v6. We can't do pullups here because the message 1387 * could be from the nce_qd_mp which could have b_next/b_prev 1388 * non-NULL. 1389 */ 1390 ip6i = (ip6i_t *)ip6h; 1391 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1392 sizeof (ip6i_t) + IPV6_HDR_LEN); 1393 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1394 } 1395 src = ip6h->ip6_src; 1396 /* 1397 * If the src of outgoing packet is one of the assigned interface 1398 * addresses use it, otherwise we will pick the source address below. 1399 */ 1400 src_ill = ill; 1401 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1402 if (ill->ill_group != NULL) 1403 src_ill = ill->ill_group->illgrp_ill; 1404 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1405 for (ipif = src_ill->ill_ipif; ipif != NULL; 1406 ipif = ipif->ipif_next) { 1407 if (IN6_ARE_ADDR_EQUAL(&src, 1408 &ipif->ipif_v6lcl_addr)) { 1409 break; 1410 } 1411 } 1412 if (ipif != NULL) 1413 break; 1414 } 1415 /* 1416 * If no relevant ipif can be found, then it's not one of our 1417 * addresses. Reset to :: and let nce_xmit. If an ipif can be 1418 * found, but it's not yet done with DAD verification, then 1419 * just postpone this transmission until later. 1420 */ 1421 if (src_ill == NULL) 1422 src = ipv6_all_zeros; 1423 else if (!ipif->ipif_addr_ready) 1424 return (ill->ill_reachable_retrans_time); 1425 } 1426 dst = nce->nce_addr; 1427 /* 1428 * If source address is unspecified, nce_xmit will choose 1429 * one for us and initialize the hardware address also 1430 * appropriately. 1431 */ 1432 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1433 src_ill = NULL; 1434 nce->nce_rcnt--; 1435 mutex_exit(&nce->nce_lock); 1436 rw_exit(&ipst->ips_ill_g_lock); 1437 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1438 &dst, 0); 1439 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1440 mutex_enter(&nce->nce_lock); 1441 if (dropped) 1442 nce->nce_rcnt++; 1443 return (ill->ill_reachable_retrans_time); 1444 } 1445 1446 /* 1447 * Attempt to recover an address on an interface that's been marked as a 1448 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1449 * no easy way to just probe the address and have the right thing happen if 1450 * it's no longer in use. Instead, we just bring it up normally and allow the 1451 * regular interface start-up logic to probe for a remaining duplicate and take 1452 * us back down if necessary. 1453 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1454 * ip_ndp_excl. 1455 */ 1456 /* ARGSUSED */ 1457 static void 1458 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1459 { 1460 ill_t *ill = rq->q_ptr; 1461 ipif_t *ipif; 1462 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1463 1464 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1465 /* 1466 * We do not support recovery of proxy ARP'd interfaces, 1467 * because the system lacks a complete proxy ARP mechanism. 1468 */ 1469 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1470 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1471 continue; 1472 } 1473 1474 /* 1475 * If we have already recovered or if the interface is going 1476 * away, then ignore. 1477 */ 1478 mutex_enter(&ill->ill_lock); 1479 if (!(ipif->ipif_flags & IPIF_DUPLICATE) || 1480 (ipif->ipif_flags & (IPIF_MOVING | IPIF_CONDEMNED))) { 1481 mutex_exit(&ill->ill_lock); 1482 continue; 1483 } 1484 1485 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1486 ill->ill_ipif_dup_count--; 1487 mutex_exit(&ill->ill_lock); 1488 ipif->ipif_was_dup = B_TRUE; 1489 1490 if (ipif_ndp_up(ipif) != EINPROGRESS) 1491 (void) ipif_up_done_v6(ipif); 1492 } 1493 freeb(mp); 1494 } 1495 1496 /* 1497 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1498 * As long as someone else holds the address, the interface will stay down. 1499 * When that conflict goes away, the interface is brought back up. This is 1500 * done so that accidental shutdowns of addresses aren't made permanent. Your 1501 * server will recover from a failure. 1502 * 1503 * For DHCP and temporary addresses, recovery is not done in the kernel. 1504 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1505 * 1506 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1507 */ 1508 static void 1509 ipif6_dup_recovery(void *arg) 1510 { 1511 ipif_t *ipif = arg; 1512 1513 ipif->ipif_recovery_id = 0; 1514 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1515 return; 1516 1517 /* 1518 * No lock, because this is just an optimization. 1519 */ 1520 if (ipif->ipif_state_flags & (IPIF_MOVING | IPIF_CONDEMNED)) 1521 return; 1522 1523 /* If the link is down, we'll retry this later */ 1524 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1525 return; 1526 1527 ndp_do_recovery(ipif); 1528 } 1529 1530 /* 1531 * Perform interface recovery by forcing the duplicate interfaces up and 1532 * allowing the system to determine which ones should stay up. 1533 * 1534 * Called both by recovery timer expiry and link-up notification. 1535 */ 1536 void 1537 ndp_do_recovery(ipif_t *ipif) 1538 { 1539 ill_t *ill = ipif->ipif_ill; 1540 mblk_t *mp; 1541 ip_stack_t *ipst = ill->ill_ipst; 1542 1543 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1544 if (mp == NULL) { 1545 mutex_enter(&ill->ill_lock); 1546 if (ipif->ipif_recovery_id == 0 && 1547 !(ipif->ipif_state_flags & (IPIF_MOVING | 1548 IPIF_CONDEMNED))) { 1549 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1550 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1551 } 1552 mutex_exit(&ill->ill_lock); 1553 } else { 1554 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1555 sizeof (ipif->ipif_v6lcl_addr)); 1556 ill_refhold(ill); 1557 qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP, 1558 B_FALSE); 1559 } 1560 } 1561 1562 /* 1563 * Find the solicitation in the given message, and extract printable details 1564 * (MAC and IP addresses) from it. 1565 */ 1566 static nd_neighbor_solicit_t * 1567 ip_ndp_find_solicitation(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, char *hbuf, 1568 size_t hlen, char *sbuf, size_t slen, uchar_t **haddr) 1569 { 1570 nd_neighbor_solicit_t *ns; 1571 ip6_t *ip6h; 1572 uchar_t *addr; 1573 int alen; 1574 1575 alen = 0; 1576 ip6h = (ip6_t *)mp->b_rptr; 1577 if (dl_mp == NULL) { 1578 nd_opt_hdr_t *opt; 1579 int nslen; 1580 1581 /* 1582 * If it's from the fast-path, then it can't be a probe 1583 * message, and thus must include the source linkaddr option. 1584 * Extract that here. 1585 */ 1586 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1587 nslen = mp->b_wptr - (uchar_t *)ns; 1588 if ((nslen -= sizeof (*ns)) > 0) { 1589 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), nslen, 1590 ND_OPT_SOURCE_LINKADDR); 1591 if (opt != NULL && 1592 opt->nd_opt_len * 8 - sizeof (*opt) >= 1593 ill->ill_nd_lla_len) { 1594 addr = (uchar_t *)(opt + 1); 1595 alen = ill->ill_nd_lla_len; 1596 } 1597 } 1598 /* 1599 * We cheat a bit here for the sake of printing usable log 1600 * messages in the rare case where the reply we got was unicast 1601 * without a source linkaddr option, and the interface is in 1602 * fastpath mode. (Sigh.) 1603 */ 1604 if (alen == 0 && ill->ill_type == IFT_ETHER && 1605 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1606 struct ether_header *pether; 1607 1608 pether = (struct ether_header *)((char *)ip6h - 1609 sizeof (*pether)); 1610 addr = pether->ether_shost.ether_addr_octet; 1611 alen = ETHERADDRL; 1612 } 1613 } else { 1614 dl_unitdata_ind_t *dlu; 1615 1616 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1617 alen = dlu->dl_src_addr_length; 1618 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1619 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1620 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1621 if (ill->ill_sap_length < 0) { 1622 alen += ill->ill_sap_length; 1623 } else { 1624 addr += ill->ill_sap_length; 1625 alen -= ill->ill_sap_length; 1626 } 1627 } 1628 } 1629 if (alen > 0) { 1630 *haddr = addr; 1631 (void) mac_colon_addr(addr, alen, hbuf, hlen); 1632 } else { 1633 *haddr = NULL; 1634 (void) strcpy(hbuf, "?"); 1635 } 1636 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1637 (void) inet_ntop(AF_INET6, &ns->nd_ns_target, sbuf, slen); 1638 return (ns); 1639 } 1640 1641 /* 1642 * This is for exclusive changes due to NDP duplicate address detection 1643 * failure. 1644 */ 1645 /* ARGSUSED */ 1646 static void 1647 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1648 { 1649 ill_t *ill = rq->q_ptr; 1650 ipif_t *ipif; 1651 char ibuf[LIFNAMSIZ + 10]; /* 10 digits for logical i/f number */ 1652 char hbuf[MAC_STR_LEN]; 1653 char sbuf[INET6_ADDRSTRLEN]; 1654 nd_neighbor_solicit_t *ns; 1655 mblk_t *dl_mp = NULL; 1656 uchar_t *haddr; 1657 ip_stack_t *ipst = ill->ill_ipst; 1658 1659 if (DB_TYPE(mp) != M_DATA) { 1660 dl_mp = mp; 1661 mp = mp->b_cont; 1662 } 1663 ns = ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, sizeof (hbuf), sbuf, 1664 sizeof (sbuf), &haddr); 1665 if (haddr != NULL && 1666 bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) { 1667 /* 1668 * Ignore conflicts generated by misbehaving switches that just 1669 * reflect our own messages back to us. 1670 */ 1671 goto ignore_conflict; 1672 } 1673 1674 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1675 1676 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1677 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 1678 &ns->nd_ns_target)) { 1679 continue; 1680 } 1681 1682 /* If it's already marked, then don't do anything. */ 1683 if (ipif->ipif_flags & IPIF_DUPLICATE) 1684 continue; 1685 1686 /* 1687 * If this is a failure during duplicate recovery, then don't 1688 * complain. It may take a long time to recover. 1689 */ 1690 if (!ipif->ipif_was_dup) { 1691 ipif_get_name(ipif, ibuf, sizeof (ibuf)); 1692 cmn_err(CE_WARN, "%s has duplicate address %s (in " 1693 "use by %s); disabled", ibuf, sbuf, hbuf); 1694 } 1695 mutex_enter(&ill->ill_lock); 1696 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1697 ipif->ipif_flags |= IPIF_DUPLICATE; 1698 ill->ill_ipif_dup_count++; 1699 mutex_exit(&ill->ill_lock); 1700 (void) ipif_down(ipif, NULL, NULL); 1701 ipif_down_tail(ipif); 1702 mutex_enter(&ill->ill_lock); 1703 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1704 ill->ill_net_type == IRE_IF_RESOLVER && 1705 !(ipif->ipif_state_flags & (IPIF_MOVING | 1706 IPIF_CONDEMNED)) && 1707 ipst->ips_ip_dup_recovery > 0) { 1708 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1709 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1710 } 1711 mutex_exit(&ill->ill_lock); 1712 } 1713 ignore_conflict: 1714 if (dl_mp != NULL) 1715 freeb(dl_mp); 1716 freemsg(mp); 1717 } 1718 1719 /* 1720 * Handle failure by tearing down the ipifs with the specified address. Note 1721 * that tearing down the ipif also means deleting the nce through ipif_down, so 1722 * it's not possible to do recovery by just restarting the nce timer. Instead, 1723 * we start a timer on the ipif. 1724 */ 1725 static void 1726 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1727 { 1728 if ((mp = copymsg(mp)) != NULL) { 1729 if (dl_mp == NULL) 1730 dl_mp = mp; 1731 else if ((dl_mp = copyb(dl_mp)) != NULL) 1732 dl_mp->b_cont = mp; 1733 if (dl_mp == NULL) { 1734 freemsg(mp); 1735 } else { 1736 ill_refhold(ill); 1737 qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP, 1738 B_FALSE); 1739 } 1740 } 1741 ndp_delete(nce); 1742 } 1743 1744 /* 1745 * Handle a discovered conflict: some other system is advertising that it owns 1746 * one of our IP addresses. We need to defend ourselves, or just shut down the 1747 * interface. 1748 */ 1749 static void 1750 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1751 { 1752 ipif_t *ipif; 1753 uint32_t now; 1754 uint_t maxdefense; 1755 uint_t defs; 1756 ip_stack_t *ipst = ill->ill_ipst; 1757 1758 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1759 NULL, NULL, ipst); 1760 if (ipif == NULL) 1761 return; 1762 /* 1763 * First, figure out if this address is disposable. 1764 */ 1765 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1766 maxdefense = ipst->ips_ip_max_temp_defend; 1767 else 1768 maxdefense = ipst->ips_ip_max_defend; 1769 1770 /* 1771 * Now figure out how many times we've defended ourselves. Ignore 1772 * defenses that happened long in the past. 1773 */ 1774 now = gethrestime_sec(); 1775 mutex_enter(&nce->nce_lock); 1776 if ((defs = nce->nce_defense_count) > 0 && 1777 now - nce->nce_defense_time > ipst->ips_ip_defend_interval) { 1778 nce->nce_defense_count = defs = 0; 1779 } 1780 nce->nce_defense_count++; 1781 nce->nce_defense_time = now; 1782 mutex_exit(&nce->nce_lock); 1783 ipif_refrele(ipif); 1784 1785 /* 1786 * If we've defended ourselves too many times already, then give up and 1787 * tear down the interface(s) using this address. Otherwise, defend by 1788 * sending out an unsolicited Neighbor Advertisement. 1789 */ 1790 if (defs >= maxdefense) { 1791 ip_ndp_failure(ill, mp, dl_mp, nce); 1792 } else { 1793 char hbuf[MAC_STR_LEN]; 1794 char sbuf[INET6_ADDRSTRLEN]; 1795 uchar_t *haddr; 1796 1797 (void) ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, 1798 sizeof (hbuf), sbuf, sizeof (sbuf), &haddr); 1799 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1800 hbuf, sbuf, ill->ill_name); 1801 (void) nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, B_FALSE, 1802 &nce->nce_addr, &ipv6_all_hosts_mcast, 1803 nce_advert_flags(nce)); 1804 } 1805 } 1806 1807 static void 1808 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1809 { 1810 nd_neighbor_solicit_t *ns; 1811 uint32_t hlen = ill->ill_nd_lla_len; 1812 uchar_t *haddr = NULL; 1813 icmp6_t *icmp_nd; 1814 ip6_t *ip6h; 1815 nce_t *our_nce = NULL; 1816 in6_addr_t target; 1817 in6_addr_t src; 1818 int len; 1819 int flag = 0; 1820 nd_opt_hdr_t *opt = NULL; 1821 boolean_t bad_solicit = B_FALSE; 1822 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1823 1824 ip6h = (ip6_t *)mp->b_rptr; 1825 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1826 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1827 src = ip6h->ip6_src; 1828 ns = (nd_neighbor_solicit_t *)icmp_nd; 1829 target = ns->nd_ns_target; 1830 if (IN6_IS_ADDR_MULTICAST(&target)) { 1831 if (ip_debug > 2) { 1832 /* ip1dbg */ 1833 pr_addr_dbg("ndp_input_solicit: Target is" 1834 " multicast! %s\n", AF_INET6, &target); 1835 } 1836 bad_solicit = B_TRUE; 1837 goto done; 1838 } 1839 if (len > sizeof (nd_neighbor_solicit_t)) { 1840 /* Options present */ 1841 opt = (nd_opt_hdr_t *)&ns[1]; 1842 len -= sizeof (nd_neighbor_solicit_t); 1843 if (!ndp_verify_optlen(opt, len)) { 1844 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1845 bad_solicit = B_TRUE; 1846 goto done; 1847 } 1848 } 1849 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1850 /* Check to see if this is a valid DAD solicitation */ 1851 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1852 if (ip_debug > 2) { 1853 /* ip1dbg */ 1854 pr_addr_dbg("ndp_input_solicit: IPv6 " 1855 "Destination is not solicited node " 1856 "multicast %s\n", AF_INET6, 1857 &ip6h->ip6_dst); 1858 } 1859 bad_solicit = B_TRUE; 1860 goto done; 1861 } 1862 } 1863 1864 our_nce = ndp_lookup_v6(ill, &target, B_FALSE); 1865 /* 1866 * If this is a valid Solicitation, a permanent 1867 * entry should exist in the cache 1868 */ 1869 if (our_nce == NULL || 1870 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1871 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1872 "ifname=%s ", ill->ill_name)); 1873 if (ip_debug > 2) { 1874 /* ip1dbg */ 1875 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1876 } 1877 bad_solicit = B_TRUE; 1878 goto done; 1879 } 1880 1881 /* At this point we should have a verified NS per spec */ 1882 if (opt != NULL) { 1883 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1884 if (opt != NULL) { 1885 haddr = (uchar_t *)&opt[1]; 1886 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1887 hlen == 0) { 1888 ip1dbg(("ndp_input_advert: bad SLLA\n")); 1889 bad_solicit = B_TRUE; 1890 goto done; 1891 } 1892 } 1893 } 1894 1895 /* If sending directly to peer, set the unicast flag */ 1896 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 1897 flag |= NDP_UNICAST; 1898 1899 /* 1900 * Create/update the entry for the soliciting node. 1901 * or respond to outstanding queries, don't if 1902 * the source is unspecified address. 1903 */ 1904 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1905 int err; 1906 nce_t *nnce; 1907 1908 ASSERT(ill->ill_isv6); 1909 /* 1910 * Regular solicitations *must* include the Source Link-Layer 1911 * Address option. Ignore messages that do not. 1912 */ 1913 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1914 ip1dbg(("ndp_input_solicit: source link-layer address " 1915 "option missing with a specified source.\n")); 1916 bad_solicit = B_TRUE; 1917 goto done; 1918 } 1919 1920 /* 1921 * This is a regular solicitation. If we're still in the 1922 * process of verifying the address, then don't respond at all 1923 * and don't keep track of the sender. 1924 */ 1925 if (our_nce->nce_state == ND_PROBE) 1926 goto done; 1927 1928 /* 1929 * If the solicitation doesn't have sender hardware address 1930 * (legal for unicast solicitation), then process without 1931 * installing the return NCE. Either we already know it, or 1932 * we'll be forced to look it up when (and if) we reply to the 1933 * packet. 1934 */ 1935 if (haddr == NULL) 1936 goto no_source; 1937 1938 err = ndp_lookup_then_add_v6(ill, 1939 haddr, 1940 &src, /* Soliciting nodes address */ 1941 &ipv6_all_ones, 1942 &ipv6_all_zeros, 1943 0, 1944 0, 1945 ND_STALE, 1946 &nnce); 1947 switch (err) { 1948 case 0: 1949 /* done with this entry */ 1950 NCE_REFRELE(nnce); 1951 break; 1952 case EEXIST: 1953 /* 1954 * B_FALSE indicates this is not an 1955 * an advertisement. 1956 */ 1957 ndp_process(nnce, haddr, 0, B_FALSE); 1958 NCE_REFRELE(nnce); 1959 break; 1960 default: 1961 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1962 err)); 1963 goto done; 1964 } 1965 no_source: 1966 flag |= NDP_SOLICITED; 1967 } else { 1968 /* 1969 * No source link layer address option should be present in a 1970 * valid DAD request. 1971 */ 1972 if (haddr != NULL) { 1973 ip1dbg(("ndp_input_solicit: source link-layer address " 1974 "option present with an unspecified source.\n")); 1975 bad_solicit = B_TRUE; 1976 goto done; 1977 } 1978 if (our_nce->nce_state == ND_PROBE) { 1979 /* 1980 * Internally looped-back probes won't have DLPI 1981 * attached to them. External ones (which are sent by 1982 * multicast) always will. Just ignore our own 1983 * transmissions. 1984 */ 1985 if (dl_mp != NULL) { 1986 /* 1987 * If someone else is probing our address, then 1988 * we've crossed wires. Declare failure. 1989 */ 1990 ip_ndp_failure(ill, mp, dl_mp, our_nce); 1991 } 1992 goto done; 1993 } 1994 /* 1995 * This is a DAD probe. Multicast the advertisement to the 1996 * all-nodes address. 1997 */ 1998 src = ipv6_all_hosts_mcast; 1999 } 2000 flag |= nce_advert_flags(our_nce); 2001 /* Response to a solicitation */ 2002 (void) nce_xmit(ill, 2003 ND_NEIGHBOR_ADVERT, 2004 ill, /* ill to be used for extracting ill_nd_lla */ 2005 B_TRUE, /* use ill_nd_lla */ 2006 &target, /* Source and target of the advertisement pkt */ 2007 &src, /* IP Destination (source of original pkt) */ 2008 flag); 2009 done: 2010 if (bad_solicit) 2011 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2012 if (our_nce != NULL) 2013 NCE_REFRELE(our_nce); 2014 } 2015 2016 void 2017 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2018 { 2019 nd_neighbor_advert_t *na; 2020 uint32_t hlen = ill->ill_nd_lla_len; 2021 uchar_t *haddr = NULL; 2022 icmp6_t *icmp_nd; 2023 ip6_t *ip6h; 2024 nce_t *dst_nce = NULL; 2025 in6_addr_t target; 2026 nd_opt_hdr_t *opt = NULL; 2027 int len; 2028 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2029 ip_stack_t *ipst = ill->ill_ipst; 2030 2031 ip6h = (ip6_t *)mp->b_rptr; 2032 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2033 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2034 na = (nd_neighbor_advert_t *)icmp_nd; 2035 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2036 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2037 ip1dbg(("ndp_input_advert: Target is multicast but the " 2038 "solicited flag is not zero\n")); 2039 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2040 return; 2041 } 2042 target = na->nd_na_target; 2043 if (IN6_IS_ADDR_MULTICAST(&target)) { 2044 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2045 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2046 return; 2047 } 2048 if (len > sizeof (nd_neighbor_advert_t)) { 2049 opt = (nd_opt_hdr_t *)&na[1]; 2050 if (!ndp_verify_optlen(opt, 2051 len - sizeof (nd_neighbor_advert_t))) { 2052 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2053 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2054 return; 2055 } 2056 /* At this point we have a verified NA per spec */ 2057 len -= sizeof (nd_neighbor_advert_t); 2058 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2059 if (opt != NULL) { 2060 haddr = (uchar_t *)&opt[1]; 2061 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2062 hlen == 0) { 2063 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2064 BUMP_MIB(mib, 2065 ipv6IfIcmpInBadNeighborAdvertisements); 2066 return; 2067 } 2068 } 2069 } 2070 2071 /* 2072 * If this interface is part of the group look at all the 2073 * ills in the group. 2074 */ 2075 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2076 if (ill->ill_group != NULL) 2077 ill = ill->ill_group->illgrp_ill; 2078 2079 for (; ill != NULL; ill = ill->ill_group_next) { 2080 mutex_enter(&ill->ill_lock); 2081 if (!ILL_CAN_LOOKUP(ill)) { 2082 mutex_exit(&ill->ill_lock); 2083 continue; 2084 } 2085 ill_refhold_locked(ill); 2086 mutex_exit(&ill->ill_lock); 2087 dst_nce = ndp_lookup_v6(ill, &target, B_FALSE); 2088 /* We have to drop the lock since ndp_process calls put* */ 2089 rw_exit(&ipst->ips_ill_g_lock); 2090 if (dst_nce != NULL) { 2091 if ((dst_nce->nce_flags & NCE_F_PERMANENT) && 2092 dst_nce->nce_state == ND_PROBE) { 2093 /* 2094 * Someone else sent an advertisement for an 2095 * address that we're trying to configure. 2096 * Tear it down. Note that dl_mp might be NULL 2097 * if we're getting a unicast reply. This 2098 * isn't typically done (multicast is the norm 2099 * in response to a probe), but ip_ndp_failure 2100 * will handle the dl_mp == NULL case as well. 2101 */ 2102 ip_ndp_failure(ill, mp, dl_mp, dst_nce); 2103 } else if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2104 /* 2105 * Someone just announced one of our local 2106 * addresses. If it wasn't us, then this is a 2107 * conflict. Defend the address or shut it 2108 * down. 2109 */ 2110 if (dl_mp != NULL && 2111 (haddr == NULL || 2112 nce_cmp_ll_addr(dst_nce, haddr, 2113 ill->ill_nd_lla_len))) { 2114 ip_ndp_conflict(ill, mp, dl_mp, 2115 dst_nce); 2116 } 2117 } else { 2118 if (na->nd_na_flags_reserved & 2119 ND_NA_FLAG_ROUTER) { 2120 dst_nce->nce_flags |= NCE_F_ISROUTER; 2121 } 2122 /* B_TRUE indicates this an advertisement */ 2123 ndp_process(dst_nce, haddr, 2124 na->nd_na_flags_reserved, B_TRUE); 2125 } 2126 NCE_REFRELE(dst_nce); 2127 } 2128 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2129 ill_refrele(ill); 2130 } 2131 rw_exit(&ipst->ips_ill_g_lock); 2132 } 2133 2134 /* 2135 * Process NDP neighbor solicitation/advertisement messages. 2136 * The checksum has already checked o.k before reaching here. 2137 */ 2138 void 2139 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2140 { 2141 icmp6_t *icmp_nd; 2142 ip6_t *ip6h; 2143 int len; 2144 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2145 2146 2147 if (!pullupmsg(mp, -1)) { 2148 ip1dbg(("ndp_input: pullupmsg failed\n")); 2149 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2150 goto done; 2151 } 2152 ip6h = (ip6_t *)mp->b_rptr; 2153 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2154 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2155 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2156 goto done; 2157 } 2158 /* 2159 * NDP does not accept any extension headers between the 2160 * IP header and the ICMP header since e.g. a routing 2161 * header could be dangerous. 2162 * This assumes that any AH or ESP headers are removed 2163 * by ip prior to passing the packet to ndp_input. 2164 */ 2165 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2166 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2167 ip6h->ip6_nxt)); 2168 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2169 goto done; 2170 } 2171 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2172 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2173 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2174 if (icmp_nd->icmp6_code != 0) { 2175 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2176 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2177 goto done; 2178 } 2179 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2180 /* 2181 * Make sure packet length is large enough for either 2182 * a NS or a NA icmp packet. 2183 */ 2184 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2185 ip1dbg(("ndp_input: packet too short\n")); 2186 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2187 goto done; 2188 } 2189 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2190 ndp_input_solicit(ill, mp, dl_mp); 2191 } else { 2192 ndp_input_advert(ill, mp, dl_mp); 2193 } 2194 done: 2195 freemsg(mp); 2196 } 2197 2198 /* 2199 * nce_xmit is called to form and transmit a ND solicitation or 2200 * advertisement ICMP packet. 2201 * 2202 * If the source address is unspecified and this isn't a probe (used for 2203 * duplicate address detection), an appropriate source address and link layer 2204 * address will be chosen here. The link layer address option is included if 2205 * the source is specified (i.e., all non-probe packets), and omitted (per the 2206 * specification) otherwise. 2207 * 2208 * It returns B_FALSE only if it does a successful put() to the 2209 * corresponding ill's ill_wq otherwise returns B_TRUE. 2210 */ 2211 static boolean_t 2212 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 2213 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 2214 int flag) 2215 { 2216 uint32_t len; 2217 icmp6_t *icmp6; 2218 mblk_t *mp; 2219 ip6_t *ip6h; 2220 nd_opt_hdr_t *opt; 2221 uint_t plen; 2222 ip6i_t *ip6i; 2223 ipif_t *src_ipif = NULL; 2224 uint8_t *hw_addr; 2225 zoneid_t zoneid = GLOBAL_ZONEID; 2226 2227 /* 2228 * If we have a unspecified source(sender) address, select a 2229 * proper source address for the solicitation here itself so 2230 * that we can initialize the h/w address correctly. This is 2231 * needed for interface groups as source address can come from 2232 * the whole group and the h/w address initialized from ill will 2233 * be wrong if the source address comes from a different ill. 2234 * 2235 * If the sender is specified then we use this address in order 2236 * to lookup the zoneid before calling ip_output_v6(). This is to 2237 * enable unicast ND_NEIGHBOR_ADVERT packets to be routed correctly 2238 * by IP (we cannot guarantee that the global zone has an interface 2239 * route to the destination). 2240 * 2241 * Note that the NA never comes here with the unspecified source 2242 * address. The following asserts that whenever the source 2243 * address is specified, the haddr also should be specified. 2244 */ 2245 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 2246 2247 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2248 ASSERT(operation != ND_NEIGHBOR_ADVERT); 2249 /* 2250 * Pick a source address for this solicitation, but 2251 * restrict the selection to addresses assigned to the 2252 * output interface (or interface group). We do this 2253 * because the destination will create a neighbor cache 2254 * entry for the source address of this packet, so the 2255 * source address had better be a valid neighbor. 2256 */ 2257 src_ipif = ipif_select_source_v6(ill, target, RESTRICT_TO_ILL, 2258 IPV6_PREFER_SRC_DEFAULT, ALL_ZONES); 2259 if (src_ipif == NULL) { 2260 char buf[INET6_ADDRSTRLEN]; 2261 2262 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2263 inet_ntop(AF_INET6, (char *)target, buf, 2264 sizeof (buf)))); 2265 return (B_TRUE); 2266 } 2267 sender = &src_ipif->ipif_v6src_addr; 2268 hwaddr_ill = src_ipif->ipif_ill; 2269 } else if (!(IN6_IS_ADDR_UNSPECIFIED(sender))) { 2270 zoneid = ipif_lookup_addr_zoneid_v6(sender, ill, ill->ill_ipst); 2271 /* 2272 * It's possible for ipif_lookup_addr_zoneid_v6() to return 2273 * ALL_ZONES if it cannot find a matching ipif for the address 2274 * we are trying to use. In this case we err on the side of 2275 * trying to send the packet by defaulting to the GLOBAL_ZONEID. 2276 */ 2277 if (zoneid == ALL_ZONES) 2278 zoneid = GLOBAL_ZONEID; 2279 } 2280 2281 /* 2282 * Always make sure that the NS/NA packets don't get load 2283 * spread. This is needed so that the probe packets sent 2284 * by the in.mpathd daemon can really go out on the desired 2285 * interface. Probe packets are made to go out on a desired 2286 * interface by including a ip6i with ATTACH_IF flag. As these 2287 * packets indirectly end up sending/receiving NS/NA packets 2288 * (neighbor doing NUD), we have to make sure that NA 2289 * also go out on the same interface. 2290 */ 2291 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7) / 8; 2292 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2293 plen * 8; 2294 mp = allocb(len, BPRI_LO); 2295 if (mp == NULL) { 2296 if (src_ipif != NULL) 2297 ipif_refrele(src_ipif); 2298 return (B_TRUE); 2299 } 2300 bzero((char *)mp->b_rptr, len); 2301 mp->b_wptr = mp->b_rptr + len; 2302 2303 ip6i = (ip6i_t *)mp->b_rptr; 2304 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2305 ip6i->ip6i_nxt = IPPROTO_RAW; 2306 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2307 if (flag & NDP_PROBE) 2308 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2309 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2310 2311 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2312 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2313 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2314 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2315 ip6h->ip6_hops = IPV6_MAX_HOPS; 2316 ip6h->ip6_dst = *target; 2317 icmp6 = (icmp6_t *)&ip6h[1]; 2318 2319 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2320 sizeof (nd_neighbor_advert_t)); 2321 2322 if (operation == ND_NEIGHBOR_SOLICIT) { 2323 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2324 2325 if (!(flag & NDP_PROBE)) 2326 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2327 ip6h->ip6_src = *sender; 2328 ns->nd_ns_target = *target; 2329 if (!(flag & NDP_UNICAST)) { 2330 /* Form multicast address of the target */ 2331 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2332 ip6h->ip6_dst.s6_addr32[3] |= 2333 ns->nd_ns_target.s6_addr32[3]; 2334 } 2335 } else { 2336 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2337 2338 ASSERT(!(flag & NDP_PROBE)); 2339 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2340 ip6h->ip6_src = *sender; 2341 na->nd_na_target = *sender; 2342 if (flag & NDP_ISROUTER) 2343 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2344 if (flag & NDP_SOLICITED) 2345 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2346 if (flag & NDP_ORIDE) 2347 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2348 } 2349 2350 hw_addr = NULL; 2351 if (!(flag & NDP_PROBE)) { 2352 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2353 hwaddr_ill->ill_phys_addr; 2354 if (hw_addr != NULL) { 2355 /* Fill in link layer address and option len */ 2356 opt->nd_opt_len = (uint8_t)plen; 2357 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2358 } 2359 } 2360 if (hw_addr == NULL) { 2361 /* If there's no link layer address option, then strip it. */ 2362 len -= plen * 8; 2363 mp->b_wptr = mp->b_rptr + len; 2364 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2365 } 2366 2367 icmp6->icmp6_type = (uint8_t)operation; 2368 icmp6->icmp6_code = 0; 2369 /* 2370 * Prepare for checksum by putting icmp length in the icmp 2371 * checksum field. The checksum is calculated in ip_wput_v6. 2372 */ 2373 icmp6->icmp6_cksum = ip6h->ip6_plen; 2374 2375 if (src_ipif != NULL) 2376 ipif_refrele(src_ipif); 2377 2378 ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT); 2379 return (B_FALSE); 2380 } 2381 2382 /* 2383 * Make a link layer address (does not include the SAP) from an nce. 2384 * To form the link layer address, use the last four bytes of ipv6 2385 * address passed in and the fixed offset stored in nce. 2386 */ 2387 static void 2388 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2389 { 2390 uchar_t *mask, *to; 2391 ill_t *ill = nce->nce_ill; 2392 int len; 2393 2394 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2395 return; 2396 ASSERT(nce->nce_res_mp != NULL); 2397 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2398 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2399 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2400 ASSERT(addr != NULL); 2401 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2402 addrpos, ill->ill_nd_lla_len); 2403 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2404 IPV6_ADDR_LEN); 2405 mask = (uchar_t *)&nce->nce_extract_mask; 2406 mask += (IPV6_ADDR_LEN - len); 2407 addr += (IPV6_ADDR_LEN - len); 2408 to = addrpos + nce->nce_ll_extract_start; 2409 while (len-- > 0) 2410 *to++ |= *mask++ & *addr++; 2411 } 2412 2413 mblk_t * 2414 nce_udreq_alloc(ill_t *ill) 2415 { 2416 mblk_t *template_mp = NULL; 2417 dl_unitdata_req_t *dlur; 2418 int sap_length; 2419 2420 ASSERT(ill->ill_isv6); 2421 2422 sap_length = ill->ill_sap_length; 2423 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2424 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2425 if (template_mp == NULL) 2426 return (NULL); 2427 2428 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2429 dlur->dl_priority.dl_min = 0; 2430 dlur->dl_priority.dl_max = 0; 2431 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2432 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2433 2434 /* Copy in the SAP value. */ 2435 NCE_LL_SAP_COPY(ill, template_mp); 2436 2437 return (template_mp); 2438 } 2439 2440 /* 2441 * NDP retransmit timer. 2442 * This timer goes off when: 2443 * a. It is time to retransmit NS for resolver. 2444 * b. It is time to send reachability probes. 2445 */ 2446 void 2447 ndp_timer(void *arg) 2448 { 2449 nce_t *nce = arg; 2450 ill_t *ill = nce->nce_ill; 2451 uint32_t ms; 2452 char addrbuf[INET6_ADDRSTRLEN]; 2453 mblk_t *mp; 2454 boolean_t dropped = B_FALSE; 2455 ip_stack_t *ipst = ill->ill_ipst; 2456 2457 /* 2458 * The timer has to be cancelled by ndp_delete before doing the final 2459 * refrele. So the NCE is guaranteed to exist when the timer runs 2460 * until it clears the timeout_id. Before clearing the timeout_id 2461 * bump up the refcnt so that we can continue to use the nce 2462 */ 2463 ASSERT(nce != NULL); 2464 2465 /* 2466 * Grab the ill_g_lock now itself to avoid lock order problems. 2467 * nce_solicit needs ill_g_lock to be able to traverse ills 2468 */ 2469 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2470 mutex_enter(&nce->nce_lock); 2471 NCE_REFHOLD_LOCKED(nce); 2472 nce->nce_timeout_id = 0; 2473 2474 /* 2475 * Check the reachability state first. 2476 */ 2477 switch (nce->nce_state) { 2478 case ND_DELAY: 2479 rw_exit(&ipst->ips_ill_g_lock); 2480 nce->nce_state = ND_PROBE; 2481 mutex_exit(&nce->nce_lock); 2482 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2483 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2484 if (ip_debug > 3) { 2485 /* ip2dbg */ 2486 pr_addr_dbg("ndp_timer: state for %s changed " 2487 "to PROBE\n", AF_INET6, &nce->nce_addr); 2488 } 2489 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2490 NCE_REFRELE(nce); 2491 return; 2492 case ND_PROBE: 2493 /* must be retransmit timer */ 2494 rw_exit(&ipst->ips_ill_g_lock); 2495 nce->nce_pcnt--; 2496 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2497 nce->nce_pcnt >= -1); 2498 if (nce->nce_pcnt > 0) { 2499 /* 2500 * As per RFC2461, the nce gets deleted after 2501 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2502 * Note that the first unicast solicitation is sent 2503 * during the DELAY state. 2504 */ 2505 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2506 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2507 addrbuf, sizeof (addrbuf)))); 2508 mutex_exit(&nce->nce_lock); 2509 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, 2510 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, 2511 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2512 NDP_UNICAST); 2513 if (dropped) { 2514 mutex_enter(&nce->nce_lock); 2515 nce->nce_pcnt++; 2516 mutex_exit(&nce->nce_lock); 2517 } 2518 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2519 } else if (nce->nce_pcnt < 0) { 2520 /* No hope, delete the nce */ 2521 nce->nce_state = ND_UNREACHABLE; 2522 mutex_exit(&nce->nce_lock); 2523 if (ip_debug > 2) { 2524 /* ip1dbg */ 2525 pr_addr_dbg("ndp_timer: Delete IRE for" 2526 " dst %s\n", AF_INET6, &nce->nce_addr); 2527 } 2528 ndp_delete(nce); 2529 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2530 /* Wait RetransTimer, before deleting the entry */ 2531 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2532 nce->nce_pcnt, inet_ntop(AF_INET6, 2533 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2534 mutex_exit(&nce->nce_lock); 2535 /* Wait one interval before killing */ 2536 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2537 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2538 ipif_t *ipif; 2539 2540 /* 2541 * We're done probing, and we can now declare this 2542 * address to be usable. Let IP know that it's ok to 2543 * use. 2544 */ 2545 nce->nce_state = ND_REACHABLE; 2546 mutex_exit(&nce->nce_lock); 2547 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, 2548 ALL_ZONES, NULL, NULL, NULL, NULL, ipst); 2549 if (ipif != NULL) { 2550 if (ipif->ipif_was_dup) { 2551 char ibuf[LIFNAMSIZ + 10]; 2552 char sbuf[INET6_ADDRSTRLEN]; 2553 2554 ipif->ipif_was_dup = B_FALSE; 2555 (void) inet_ntop(AF_INET6, 2556 &ipif->ipif_v6lcl_addr, 2557 sbuf, sizeof (sbuf)); 2558 ipif_get_name(ipif, ibuf, 2559 sizeof (ibuf)); 2560 cmn_err(CE_NOTE, "recovered address " 2561 "%s on %s", sbuf, ibuf); 2562 } 2563 if ((ipif->ipif_flags & IPIF_UP) && 2564 !ipif->ipif_addr_ready) { 2565 ip_rts_ifmsg(ipif); 2566 ip_rts_newaddrmsg(RTM_ADD, 0, ipif); 2567 sctp_update_ipif(ipif, SCTP_IPIF_UP); 2568 } 2569 ipif->ipif_addr_ready = 1; 2570 ipif_refrele(ipif); 2571 } 2572 /* Begin defending our new address */ 2573 nce->nce_unsolicit_count = 0; 2574 dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, 2575 B_FALSE, &nce->nce_addr, &ipv6_all_hosts_mcast, 2576 nce_advert_flags(nce)); 2577 if (dropped) { 2578 nce->nce_unsolicit_count = 1; 2579 NDP_RESTART_TIMER(nce, 2580 ipst->ips_ip_ndp_unsolicit_interval); 2581 } else if (ipst->ips_ip_ndp_defense_interval != 0) { 2582 NDP_RESTART_TIMER(nce, 2583 ipst->ips_ip_ndp_defense_interval); 2584 } 2585 } else { 2586 /* 2587 * This is an address we're probing to be our own, but 2588 * the ill is down. Wait until it comes back before 2589 * doing anything, but switch to reachable state so 2590 * that the restart will work. 2591 */ 2592 nce->nce_state = ND_REACHABLE; 2593 mutex_exit(&nce->nce_lock); 2594 } 2595 NCE_REFRELE(nce); 2596 return; 2597 case ND_INCOMPLETE: 2598 /* 2599 * Must be resolvers retransmit timer. 2600 */ 2601 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2602 ip6i_t *ip6i; 2603 ip6_t *ip6h; 2604 mblk_t *data_mp; 2605 2606 /* 2607 * Walk the list of packets queued, and see if there 2608 * are any multipathing probe packets. Such packets 2609 * are always queued at the head. Since this is a 2610 * retransmit timer firing, mark such packets as 2611 * delayed in ND resolution. This info will be used 2612 * in ip_wput_v6(). Multipathing probe packets will 2613 * always have an ip6i_t. Once we hit a packet without 2614 * it, we can break out of this loop. 2615 */ 2616 if (mp->b_datap->db_type == M_CTL) 2617 data_mp = mp->b_cont; 2618 else 2619 data_mp = mp; 2620 2621 ip6h = (ip6_t *)data_mp->b_rptr; 2622 if (ip6h->ip6_nxt != IPPROTO_RAW) 2623 break; 2624 2625 /* 2626 * This message should have been pulled up already in 2627 * ip_wput_v6. We can't do pullups here because the 2628 * b_next/b_prev is non-NULL. 2629 */ 2630 ip6i = (ip6i_t *)ip6h; 2631 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2632 sizeof (ip6i_t) + IPV6_HDR_LEN); 2633 2634 /* Mark this packet as delayed due to ND resolution */ 2635 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2636 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2637 } 2638 if (nce->nce_qd_mp != NULL) { 2639 ms = nce_solicit(nce, NULL); 2640 rw_exit(&ipst->ips_ill_g_lock); 2641 if (ms == 0) { 2642 if (nce->nce_state != ND_REACHABLE) { 2643 mutex_exit(&nce->nce_lock); 2644 nce_resolv_failed(nce); 2645 ndp_delete(nce); 2646 } else { 2647 mutex_exit(&nce->nce_lock); 2648 } 2649 } else { 2650 mutex_exit(&nce->nce_lock); 2651 NDP_RESTART_TIMER(nce, (clock_t)ms); 2652 } 2653 NCE_REFRELE(nce); 2654 return; 2655 } 2656 mutex_exit(&nce->nce_lock); 2657 rw_exit(&ipst->ips_ill_g_lock); 2658 NCE_REFRELE(nce); 2659 break; 2660 case ND_REACHABLE : 2661 rw_exit(&ipst->ips_ill_g_lock); 2662 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2663 nce->nce_unsolicit_count != 0) || 2664 ((nce->nce_flags & NCE_F_PERMANENT) && 2665 ipst->ips_ip_ndp_defense_interval != 0)) { 2666 if (nce->nce_unsolicit_count > 0) 2667 nce->nce_unsolicit_count--; 2668 mutex_exit(&nce->nce_lock); 2669 dropped = nce_xmit(ill, 2670 ND_NEIGHBOR_ADVERT, 2671 ill, /* ill to be used for hw addr */ 2672 B_FALSE, /* use ill_phys_addr */ 2673 &nce->nce_addr, 2674 &ipv6_all_hosts_mcast, 2675 nce_advert_flags(nce)); 2676 if (dropped) { 2677 mutex_enter(&nce->nce_lock); 2678 nce->nce_unsolicit_count++; 2679 mutex_exit(&nce->nce_lock); 2680 } 2681 if (nce->nce_unsolicit_count != 0) { 2682 NDP_RESTART_TIMER(nce, 2683 ipst->ips_ip_ndp_unsolicit_interval); 2684 } else { 2685 NDP_RESTART_TIMER(nce, 2686 ipst->ips_ip_ndp_defense_interval); 2687 } 2688 } else { 2689 mutex_exit(&nce->nce_lock); 2690 } 2691 NCE_REFRELE(nce); 2692 break; 2693 default: 2694 rw_exit(&ipst->ips_ill_g_lock); 2695 mutex_exit(&nce->nce_lock); 2696 NCE_REFRELE(nce); 2697 break; 2698 } 2699 } 2700 2701 /* 2702 * Set a link layer address from the ll_addr passed in. 2703 * Copy SAP from ill. 2704 */ 2705 static void 2706 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2707 { 2708 ill_t *ill = nce->nce_ill; 2709 uchar_t *woffset; 2710 2711 ASSERT(ll_addr != NULL); 2712 /* Always called before fast_path_probe */ 2713 ASSERT(nce->nce_fp_mp == NULL); 2714 if (ill->ill_sap_length != 0) { 2715 /* 2716 * Copy the SAP type specified in the 2717 * request into the xmit template. 2718 */ 2719 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2720 } 2721 if (ill->ill_phys_addr_length > 0) { 2722 /* 2723 * The bcopy() below used to be called for the physical address 2724 * length rather than the link layer address length. For 2725 * ethernet and many other media, the phys_addr and lla are 2726 * identical. 2727 * However, with xresolv interfaces being introduced, the 2728 * phys_addr and lla are no longer the same, and the physical 2729 * address may not have any useful meaning, so we use the lla 2730 * for IPv6 address resolution and destination addressing. 2731 * 2732 * For PPP or other interfaces with a zero length 2733 * physical address, don't do anything here. 2734 * The bcopy() with a zero phys_addr length was previously 2735 * a no-op for interfaces with a zero-length physical address. 2736 * Using the lla for them would change the way they operate. 2737 * Doing nothing in such cases preserves expected behavior. 2738 */ 2739 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2740 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2741 } 2742 } 2743 2744 static boolean_t 2745 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2746 { 2747 ill_t *ill = nce->nce_ill; 2748 uchar_t *ll_offset; 2749 2750 ASSERT(nce->nce_res_mp != NULL); 2751 if (ll_addr == NULL) 2752 return (B_FALSE); 2753 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2754 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2755 return (B_TRUE); 2756 return (B_FALSE); 2757 } 2758 2759 /* 2760 * Updates the link layer address or the reachability state of 2761 * a cache entry. Reset probe counter if needed. 2762 */ 2763 static void 2764 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2765 { 2766 ill_t *ill = nce->nce_ill; 2767 boolean_t need_stop_timer = B_FALSE; 2768 boolean_t need_fastpath_update = B_FALSE; 2769 2770 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2771 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2772 /* 2773 * If this interface does not do NUD, there is no point 2774 * in allowing an update to the cache entry. Although 2775 * we will respond to NS. 2776 * The only time we accept an update for a resolver when 2777 * NUD is turned off is when it has just been created. 2778 * Non-Resolvers will always be created as REACHABLE. 2779 */ 2780 if (new_state != ND_UNCHANGED) { 2781 if ((nce->nce_flags & NCE_F_NONUD) && 2782 (nce->nce_state != ND_INCOMPLETE)) 2783 return; 2784 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2785 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2786 need_stop_timer = B_TRUE; 2787 if (new_state == ND_REACHABLE) 2788 nce->nce_last = TICK_TO_MSEC(lbolt64); 2789 else { 2790 /* We force NUD in this case */ 2791 nce->nce_last = 0; 2792 } 2793 nce->nce_state = new_state; 2794 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2795 } 2796 /* 2797 * In case of fast path we need to free the the fastpath 2798 * M_DATA and do another probe. Otherwise we can just 2799 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2800 * whatever packets that happens to be transmitting at the time. 2801 */ 2802 if (new_ll_addr != NULL) { 2803 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2804 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2805 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2806 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2807 if (nce->nce_fp_mp != NULL) { 2808 freemsg(nce->nce_fp_mp); 2809 nce->nce_fp_mp = NULL; 2810 } 2811 need_fastpath_update = B_TRUE; 2812 } 2813 mutex_exit(&nce->nce_lock); 2814 if (need_stop_timer) { 2815 (void) untimeout(nce->nce_timeout_id); 2816 nce->nce_timeout_id = 0; 2817 } 2818 if (need_fastpath_update) 2819 nce_fastpath(nce); 2820 mutex_enter(&nce->nce_lock); 2821 } 2822 2823 void 2824 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2825 { 2826 uint_t count = 0; 2827 mblk_t **mpp; 2828 2829 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2830 2831 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2832 mpp = &(*mpp)->b_next) { 2833 if (++count > 2834 nce->nce_ill->ill_max_buf) { 2835 mblk_t *tmp = nce->nce_qd_mp->b_next; 2836 2837 nce->nce_qd_mp->b_next = NULL; 2838 nce->nce_qd_mp->b_prev = NULL; 2839 freemsg(nce->nce_qd_mp); 2840 nce->nce_qd_mp = tmp; 2841 } 2842 } 2843 /* put this on the list */ 2844 if (head_insert) { 2845 mp->b_next = nce->nce_qd_mp; 2846 nce->nce_qd_mp = mp; 2847 } else { 2848 *mpp = mp; 2849 } 2850 } 2851 2852 static void 2853 nce_queue_mp(nce_t *nce, mblk_t *mp) 2854 { 2855 boolean_t head_insert = B_FALSE; 2856 ip6_t *ip6h; 2857 ip6i_t *ip6i; 2858 mblk_t *data_mp; 2859 2860 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2861 2862 if (mp->b_datap->db_type == M_CTL) 2863 data_mp = mp->b_cont; 2864 else 2865 data_mp = mp; 2866 ip6h = (ip6_t *)data_mp->b_rptr; 2867 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2868 /* 2869 * This message should have been pulled up already in 2870 * ip_wput_v6. We can't do pullups here because the message 2871 * could be from the nce_qd_mp which could have b_next/b_prev 2872 * non-NULL. 2873 */ 2874 ip6i = (ip6i_t *)ip6h; 2875 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2876 sizeof (ip6i_t) + IPV6_HDR_LEN); 2877 /* 2878 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 2879 * This has 2 aspects mentioned below. 2880 * 1. Perform head insertion in the nce_qd_mp for these packets. 2881 * This ensures that next retransmit of ND solicitation 2882 * will use the interface specified by the probe packet, 2883 * for both NS and NA. This corresponds to the src address 2884 * in the IPv6 packet. If we insert at tail, we will be 2885 * depending on the packet at the head for successful 2886 * ND resolution. This is not reliable, because the interface 2887 * on which the NA arrives could be different from the interface 2888 * on which the NS was sent, and if the receiving interface is 2889 * failed, it will appear that the sending interface is also 2890 * failed, causing in.mpathd to misdiagnose this as link 2891 * failure. 2892 * 2. Drop the original packet, if the ND resolution did not 2893 * succeed in the first attempt. However we will create the 2894 * nce and the ire, as soon as the ND resolution succeeds. 2895 * We don't gain anything by queueing multiple probe packets 2896 * and sending them back-to-back once resolution succeeds. 2897 * It is sufficient to send just 1 packet after ND resolution 2898 * succeeds. Since mpathd is sending down probe packets at a 2899 * constant rate, we don't need to send the queued packet. We 2900 * need to queue it only for NDP resolution. The benefit of 2901 * dropping the probe packets that were delayed in ND 2902 * resolution, is that in.mpathd will not see inflated 2903 * RTT. If the ND resolution does not succeed within 2904 * in.mpathd's failure detection time, mpathd may detect 2905 * a failure, and it does not matter whether the packet 2906 * was queued or dropped. 2907 */ 2908 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2909 head_insert = B_TRUE; 2910 } 2911 2912 nce_queue_mp_common(nce, mp, head_insert); 2913 } 2914 2915 /* 2916 * Called when address resolution failed due to a timeout. 2917 * Send an ICMP unreachable in response to all queued packets. 2918 */ 2919 void 2920 nce_resolv_failed(nce_t *nce) 2921 { 2922 mblk_t *mp, *nxt_mp, *first_mp; 2923 char buf[INET6_ADDRSTRLEN]; 2924 ip6_t *ip6h; 2925 zoneid_t zoneid = GLOBAL_ZONEID; 2926 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 2927 2928 ip1dbg(("nce_resolv_failed: dst %s\n", 2929 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 2930 mutex_enter(&nce->nce_lock); 2931 mp = nce->nce_qd_mp; 2932 nce->nce_qd_mp = NULL; 2933 mutex_exit(&nce->nce_lock); 2934 while (mp != NULL) { 2935 nxt_mp = mp->b_next; 2936 mp->b_next = NULL; 2937 mp->b_prev = NULL; 2938 2939 first_mp = mp; 2940 if (mp->b_datap->db_type == M_CTL) { 2941 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 2942 ASSERT(io->ipsec_out_type == IPSEC_OUT); 2943 zoneid = io->ipsec_out_zoneid; 2944 ASSERT(zoneid != ALL_ZONES); 2945 mp = mp->b_cont; 2946 } 2947 2948 ip6h = (ip6_t *)mp->b_rptr; 2949 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2950 ip6i_t *ip6i; 2951 /* 2952 * This message should have been pulled up already 2953 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 2954 * the header is pulled up. 2955 */ 2956 ip6i = (ip6i_t *)ip6h; 2957 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 2958 sizeof (ip6i_t) + IPV6_HDR_LEN); 2959 mp->b_rptr += sizeof (ip6i_t); 2960 } 2961 /* 2962 * Ignore failure since icmp_unreachable_v6 will silently 2963 * drop packets with an unspecified source address. 2964 */ 2965 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst); 2966 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 2967 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst); 2968 mp = nxt_mp; 2969 } 2970 } 2971 2972 /* 2973 * Called by SIOCSNDP* ioctl to add/change an nce entry 2974 * and the corresponding attributes. 2975 * Disallow states other than ND_REACHABLE or ND_STALE. 2976 */ 2977 int 2978 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 2979 { 2980 sin6_t *sin6; 2981 in6_addr_t *addr; 2982 nce_t *nce; 2983 int err; 2984 uint16_t new_flags = 0; 2985 uint16_t old_flags = 0; 2986 int inflags = lnr->lnr_flags; 2987 ip_stack_t *ipst = ill->ill_ipst; 2988 2989 ASSERT(ill->ill_isv6); 2990 if ((lnr->lnr_state_create != ND_REACHABLE) && 2991 (lnr->lnr_state_create != ND_STALE)) 2992 return (EINVAL); 2993 2994 sin6 = (sin6_t *)&lnr->lnr_addr; 2995 addr = &sin6->sin6_addr; 2996 2997 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 2998 /* We know it can not be mapping so just look in the hash table */ 2999 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 3000 nce = nce_lookup_addr(ill, addr, nce); 3001 if (nce != NULL) 3002 new_flags = nce->nce_flags; 3003 3004 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 3005 case NDF_ISROUTER_ON: 3006 new_flags |= NCE_F_ISROUTER; 3007 break; 3008 case NDF_ISROUTER_OFF: 3009 new_flags &= ~NCE_F_ISROUTER; 3010 break; 3011 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3012 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3013 if (nce != NULL) 3014 NCE_REFRELE(nce); 3015 return (EINVAL); 3016 } 3017 3018 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3019 case NDF_ANYCAST_ON: 3020 new_flags |= NCE_F_ANYCAST; 3021 break; 3022 case NDF_ANYCAST_OFF: 3023 new_flags &= ~NCE_F_ANYCAST; 3024 break; 3025 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3026 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3027 if (nce != NULL) 3028 NCE_REFRELE(nce); 3029 return (EINVAL); 3030 } 3031 3032 if (nce == NULL) { 3033 err = ndp_add_v6(ill, 3034 (uchar_t *)lnr->lnr_hdw_addr, 3035 addr, 3036 &ipv6_all_ones, 3037 &ipv6_all_zeros, 3038 0, 3039 new_flags, 3040 lnr->lnr_state_create, 3041 &nce); 3042 if (err != 0) { 3043 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3044 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3045 return (err); 3046 } 3047 } 3048 old_flags = nce->nce_flags; 3049 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3050 /* 3051 * Router turned to host, delete all ires. 3052 * XXX Just delete the entry, but we need to add too. 3053 */ 3054 nce->nce_flags &= ~NCE_F_ISROUTER; 3055 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3056 ndp_delete(nce); 3057 NCE_REFRELE(nce); 3058 return (0); 3059 } 3060 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3061 3062 mutex_enter(&nce->nce_lock); 3063 nce->nce_flags = new_flags; 3064 mutex_exit(&nce->nce_lock); 3065 /* 3066 * Note that we ignore the state at this point, which 3067 * should be either STALE or REACHABLE. Instead we let 3068 * the link layer address passed in to determine the state 3069 * much like incoming packets. 3070 */ 3071 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3072 NCE_REFRELE(nce); 3073 return (0); 3074 } 3075 3076 /* 3077 * If the device driver supports it, we make nce_fp_mp to have 3078 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3079 * The caller ensures there is hold on nce for this function. 3080 * Note that since ill_fastpath_probe() copies the mblk there is 3081 * no need for the hold beyond this function. 3082 */ 3083 void 3084 nce_fastpath(nce_t *nce) 3085 { 3086 ill_t *ill = nce->nce_ill; 3087 int res; 3088 3089 ASSERT(ill != NULL); 3090 ASSERT(nce->nce_state != ND_INITIAL && nce->nce_state != ND_INCOMPLETE); 3091 3092 if (nce->nce_fp_mp != NULL) { 3093 /* Already contains fastpath info */ 3094 return; 3095 } 3096 if (nce->nce_res_mp != NULL) { 3097 nce_fastpath_list_add(nce); 3098 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3099 /* 3100 * EAGAIN is an indication of a transient error 3101 * i.e. allocation failure etc. leave the nce in the list it 3102 * will be updated when another probe happens for another ire 3103 * if not it will be taken out of the list when the ire is 3104 * deleted. 3105 */ 3106 3107 if (res != 0 && res != EAGAIN) 3108 nce_fastpath_list_delete(nce); 3109 } 3110 } 3111 3112 /* 3113 * Drain the list of nce's waiting for fastpath response. 3114 */ 3115 void 3116 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3117 void *arg) 3118 { 3119 3120 nce_t *next_nce; 3121 nce_t *current_nce; 3122 nce_t *first_nce; 3123 nce_t *prev_nce = NULL; 3124 3125 mutex_enter(&ill->ill_lock); 3126 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3127 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3128 next_nce = current_nce->nce_fastpath; 3129 /* 3130 * Take it off the list if we're flushing, or if the callback 3131 * routine tells us to do so. Otherwise, leave the nce in the 3132 * fastpath list to handle any pending response from the lower 3133 * layer. We can't drain the list when the callback routine 3134 * comparison failed, because the response is asynchronous in 3135 * nature, and may not arrive in the same order as the list 3136 * insertion. 3137 */ 3138 if (func == NULL || func(current_nce, arg)) { 3139 current_nce->nce_fastpath = NULL; 3140 if (current_nce == first_nce) 3141 ill->ill_fastpath_list = first_nce = next_nce; 3142 else 3143 prev_nce->nce_fastpath = next_nce; 3144 } else { 3145 /* previous element that is still in the list */ 3146 prev_nce = current_nce; 3147 } 3148 current_nce = next_nce; 3149 } 3150 mutex_exit(&ill->ill_lock); 3151 } 3152 3153 /* 3154 * Add nce to the nce fastpath list. 3155 */ 3156 void 3157 nce_fastpath_list_add(nce_t *nce) 3158 { 3159 ill_t *ill; 3160 3161 ill = nce->nce_ill; 3162 3163 mutex_enter(&ill->ill_lock); 3164 mutex_enter(&nce->nce_lock); 3165 3166 /* 3167 * if nce has not been deleted and 3168 * is not already in the list add it. 3169 */ 3170 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3171 (nce->nce_fastpath == NULL)) { 3172 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3173 ill->ill_fastpath_list = nce; 3174 } 3175 3176 mutex_exit(&nce->nce_lock); 3177 mutex_exit(&ill->ill_lock); 3178 } 3179 3180 /* 3181 * remove nce from the nce fastpath list. 3182 */ 3183 void 3184 nce_fastpath_list_delete(nce_t *nce) 3185 { 3186 nce_t *nce_ptr; 3187 3188 ill_t *ill; 3189 3190 ill = nce->nce_ill; 3191 ASSERT(ill != NULL); 3192 3193 mutex_enter(&ill->ill_lock); 3194 if (nce->nce_fastpath == NULL) 3195 goto done; 3196 3197 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3198 3199 if (ill->ill_fastpath_list == nce) { 3200 ill->ill_fastpath_list = nce->nce_fastpath; 3201 } else { 3202 nce_ptr = ill->ill_fastpath_list; 3203 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3204 if (nce_ptr->nce_fastpath == nce) { 3205 nce_ptr->nce_fastpath = nce->nce_fastpath; 3206 break; 3207 } 3208 nce_ptr = nce_ptr->nce_fastpath; 3209 } 3210 } 3211 3212 nce->nce_fastpath = NULL; 3213 done: 3214 mutex_exit(&ill->ill_lock); 3215 } 3216 3217 /* 3218 * Update all NCE's that are not in fastpath mode and 3219 * have an nce_fp_mp that matches mp. mp->b_cont contains 3220 * the fastpath header. 3221 * 3222 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3223 */ 3224 boolean_t 3225 ndp_fastpath_update(nce_t *nce, void *arg) 3226 { 3227 mblk_t *mp, *fp_mp; 3228 uchar_t *mp_rptr, *ud_mp_rptr; 3229 mblk_t *ud_mp = nce->nce_res_mp; 3230 ptrdiff_t cmplen; 3231 3232 if (nce->nce_flags & NCE_F_MAPPING) 3233 return (B_TRUE); 3234 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3235 return (B_TRUE); 3236 3237 ip2dbg(("ndp_fastpath_update: trying\n")); 3238 mp = (mblk_t *)arg; 3239 mp_rptr = mp->b_rptr; 3240 cmplen = mp->b_wptr - mp_rptr; 3241 ASSERT(cmplen >= 0); 3242 ud_mp_rptr = ud_mp->b_rptr; 3243 /* 3244 * The nce is locked here to prevent any other threads 3245 * from accessing and changing nce_res_mp when the IPv6 address 3246 * becomes resolved to an lla while we're in the middle 3247 * of looking at and comparing the hardware address (lla). 3248 * It is also locked to prevent multiple threads in nce_fastpath_update 3249 * from examining nce_res_mp atthe same time. 3250 */ 3251 mutex_enter(&nce->nce_lock); 3252 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3253 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3254 mutex_exit(&nce->nce_lock); 3255 /* 3256 * Don't take the ire off the fastpath list yet, 3257 * since the response may come later. 3258 */ 3259 return (B_FALSE); 3260 } 3261 /* Matched - install mp as the fastpath mp */ 3262 ip1dbg(("ndp_fastpath_update: match\n")); 3263 fp_mp = dupb(mp->b_cont); 3264 if (fp_mp != NULL) { 3265 nce->nce_fp_mp = fp_mp; 3266 } 3267 mutex_exit(&nce->nce_lock); 3268 return (B_TRUE); 3269 } 3270 3271 /* 3272 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3273 * driver. Note that it assumes IP is exclusive... 3274 */ 3275 /* ARGSUSED */ 3276 void 3277 ndp_fastpath_flush(nce_t *nce, char *arg) 3278 { 3279 if (nce->nce_flags & NCE_F_MAPPING) 3280 return; 3281 /* No fastpath info? */ 3282 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3283 return; 3284 3285 if (nce->nce_ipversion == IPV4_VERSION && 3286 nce->nce_flags & NCE_F_BCAST) { 3287 /* 3288 * IPv4 BROADCAST entries: 3289 * We can't delete the nce since it is difficult to 3290 * recreate these without going through the 3291 * ipif down/up dance. 3292 * 3293 * All access to nce->nce_fp_mp in the case of these 3294 * is protected by nce_lock. 3295 */ 3296 mutex_enter(&nce->nce_lock); 3297 if (nce->nce_fp_mp != NULL) { 3298 freeb(nce->nce_fp_mp); 3299 nce->nce_fp_mp = NULL; 3300 mutex_exit(&nce->nce_lock); 3301 nce_fastpath(nce); 3302 } else { 3303 mutex_exit(&nce->nce_lock); 3304 } 3305 } else { 3306 /* Just delete the NCE... */ 3307 ndp_delete(nce); 3308 } 3309 } 3310 3311 /* 3312 * Return a pointer to a given option in the packet. 3313 * Assumes that option part of the packet have already been validated. 3314 */ 3315 nd_opt_hdr_t * 3316 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3317 { 3318 while (optlen > 0) { 3319 if (opt->nd_opt_type == opt_type) 3320 return (opt); 3321 optlen -= 8 * opt->nd_opt_len; 3322 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3323 } 3324 return (NULL); 3325 } 3326 3327 /* 3328 * Verify all option lengths present are > 0, also check to see 3329 * if the option lengths and packet length are consistent. 3330 */ 3331 boolean_t 3332 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3333 { 3334 ASSERT(opt != NULL); 3335 while (optlen > 0) { 3336 if (opt->nd_opt_len == 0) 3337 return (B_FALSE); 3338 optlen -= 8 * opt->nd_opt_len; 3339 if (optlen < 0) 3340 return (B_FALSE); 3341 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3342 } 3343 return (B_TRUE); 3344 } 3345 3346 /* 3347 * ndp_walk function. 3348 * Free a fraction of the NCE cache entries. 3349 * A fraction of zero means to not free any in that category. 3350 */ 3351 void 3352 ndp_cache_reclaim(nce_t *nce, char *arg) 3353 { 3354 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3355 uint_t rand; 3356 3357 if (nce->nce_flags & NCE_F_PERMANENT) 3358 return; 3359 3360 rand = (uint_t)lbolt + 3361 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3362 if (ncr->ncr_host != 0 && 3363 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3364 ndp_delete(nce); 3365 return; 3366 } 3367 } 3368 3369 /* 3370 * ndp_walk function. 3371 * Count the number of NCEs that can be deleted. 3372 * These would be hosts but not routers. 3373 */ 3374 void 3375 ndp_cache_count(nce_t *nce, char *arg) 3376 { 3377 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3378 3379 if (nce->nce_flags & NCE_F_PERMANENT) 3380 return; 3381 3382 ncc->ncc_total++; 3383 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3384 ncc->ncc_host++; 3385 } 3386 3387 #ifdef DEBUG 3388 void 3389 nce_trace_ref(nce_t *nce) 3390 { 3391 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3392 3393 if (nce->nce_trace_disable) 3394 return; 3395 3396 if (!th_trace_ref(nce, nce->nce_ill->ill_ipst)) { 3397 nce->nce_trace_disable = B_TRUE; 3398 nce_trace_cleanup(nce); 3399 } 3400 } 3401 3402 void 3403 nce_untrace_ref(nce_t *nce) 3404 { 3405 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3406 3407 if (!nce->nce_trace_disable) 3408 th_trace_unref(nce); 3409 } 3410 3411 static void 3412 nce_trace_cleanup(const nce_t *nce) 3413 { 3414 th_trace_cleanup(nce, nce->nce_trace_disable); 3415 } 3416 #endif 3417 3418 /* 3419 * Called when address resolution fails due to a timeout. 3420 * Send an ICMP unreachable in response to all queued packets. 3421 */ 3422 void 3423 arp_resolv_failed(nce_t *nce) 3424 { 3425 mblk_t *mp, *nxt_mp, *first_mp; 3426 char buf[INET6_ADDRSTRLEN]; 3427 zoneid_t zoneid = GLOBAL_ZONEID; 3428 struct in_addr ipv4addr; 3429 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3430 3431 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3432 ip3dbg(("arp_resolv_failed: dst %s\n", 3433 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3434 mutex_enter(&nce->nce_lock); 3435 mp = nce->nce_qd_mp; 3436 nce->nce_qd_mp = NULL; 3437 mutex_exit(&nce->nce_lock); 3438 3439 while (mp != NULL) { 3440 nxt_mp = mp->b_next; 3441 mp->b_next = NULL; 3442 mp->b_prev = NULL; 3443 3444 first_mp = mp; 3445 /* 3446 * Send icmp unreachable messages 3447 * to the hosts. 3448 */ 3449 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst); 3450 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3451 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3452 ICMP_HOST_UNREACHABLE, zoneid, ipst); 3453 mp = nxt_mp; 3454 } 3455 } 3456 3457 int 3458 ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3459 nce_t **newnce, nce_t *src_nce) 3460 { 3461 int err; 3462 nce_t *nce; 3463 in6_addr_t addr6; 3464 ip_stack_t *ipst = ill->ill_ipst; 3465 3466 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3467 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3468 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3469 nce = nce_lookup_addr(ill, &addr6, nce); 3470 if (nce == NULL) { 3471 err = ndp_add_v4(ill, addr, flags, newnce, src_nce); 3472 } else { 3473 *newnce = nce; 3474 err = EEXIST; 3475 } 3476 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3477 return (err); 3478 } 3479 3480 /* 3481 * NDP Cache Entry creation routine for IPv4. 3482 * Mapped entries are handled in arp. 3483 * This routine must always be called with ndp4->ndp_g_lock held. 3484 * Prior to return, nce_refcnt is incremented. 3485 */ 3486 static int 3487 ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3488 nce_t **newnce, nce_t *src_nce) 3489 { 3490 static nce_t nce_nil; 3491 nce_t *nce; 3492 mblk_t *mp; 3493 mblk_t *template = NULL; 3494 nce_t **ncep; 3495 ip_stack_t *ipst = ill->ill_ipst; 3496 uint16_t state = ND_INITIAL; 3497 int err; 3498 3499 ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock)); 3500 ASSERT(!ill->ill_isv6); 3501 ASSERT((flags & NCE_F_MAPPING) == 0); 3502 3503 if (ill->ill_resolver_mp == NULL) 3504 return (EINVAL); 3505 /* 3506 * Allocate the mblk to hold the nce. 3507 */ 3508 mp = allocb(sizeof (nce_t), BPRI_MED); 3509 if (mp == NULL) 3510 return (ENOMEM); 3511 3512 nce = (nce_t *)mp->b_rptr; 3513 mp->b_wptr = (uchar_t *)&nce[1]; 3514 *nce = nce_nil; 3515 nce->nce_ill = ill; 3516 nce->nce_ipversion = IPV4_VERSION; 3517 nce->nce_flags = flags; 3518 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3519 nce->nce_rcnt = ill->ill_xmit_count; 3520 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3521 nce->nce_mask = ipv6_all_ones; 3522 nce->nce_extract_mask = ipv6_all_zeros; 3523 nce->nce_ll_extract_start = 0; 3524 nce->nce_qd_mp = NULL; 3525 nce->nce_mp = mp; 3526 /* This one is for nce getting created */ 3527 nce->nce_refcnt = 1; 3528 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3529 ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3530 3531 nce->nce_trace_disable = B_FALSE; 3532 3533 if (src_nce != NULL) { 3534 /* 3535 * src_nce has been provided by the caller. The only 3536 * caller who provides a non-null, non-broadcast 3537 * src_nce is from ip_newroute() which must pass in 3538 * a ND_REACHABLE src_nce (this condition is verified 3539 * via an ASSERT for the save_ire->ire_nce in ip_newroute()) 3540 */ 3541 mutex_enter(&src_nce->nce_lock); 3542 state = src_nce->nce_state; 3543 if ((src_nce->nce_flags & NCE_F_CONDEMNED) || 3544 (ipst->ips_ndp4->ndp_g_hw_change > 0)) { 3545 /* 3546 * src_nce has been deleted, or 3547 * ip_arp_news is in the middle of 3548 * flushing entries in the the nce. 3549 * Fail the add, since we don't know 3550 * if it is safe to copy the contents of 3551 * src_nce 3552 */ 3553 DTRACE_PROBE2(nce__bad__src__nce, 3554 nce_t *, src_nce, ill_t *, ill); 3555 mutex_exit(&src_nce->nce_lock); 3556 err = EINVAL; 3557 goto err_ret; 3558 } 3559 template = copyb(src_nce->nce_res_mp); 3560 mutex_exit(&src_nce->nce_lock); 3561 if (template == NULL) { 3562 err = ENOMEM; 3563 goto err_ret; 3564 } 3565 } else if (flags & NCE_F_BCAST) { 3566 /* 3567 * broadcast nce. 3568 */ 3569 template = copyb(ill->ill_bcast_mp); 3570 if (template == NULL) { 3571 err = ENOMEM; 3572 goto err_ret; 3573 } 3574 state = ND_REACHABLE; 3575 } else if (ill->ill_net_type == IRE_IF_NORESOLVER) { 3576 /* 3577 * NORESOLVER entries are always created in the REACHABLE 3578 * state. We create a nce_res_mp with the IP nexthop address 3579 * in the destination address in the DLPI hdr if the 3580 * physical length is exactly 4 bytes. 3581 * 3582 * XXX not clear which drivers set ill_phys_addr_length to 3583 * IP_ADDR_LEN. 3584 */ 3585 if (ill->ill_phys_addr_length == IP_ADDR_LEN) { 3586 template = ill_dlur_gen((uchar_t *)addr, 3587 ill->ill_phys_addr_length, 3588 ill->ill_sap, ill->ill_sap_length); 3589 } else { 3590 template = copyb(ill->ill_resolver_mp); 3591 } 3592 if (template == NULL) { 3593 err = ENOMEM; 3594 goto err_ret; 3595 } 3596 state = ND_REACHABLE; 3597 } 3598 nce->nce_fp_mp = NULL; 3599 nce->nce_res_mp = template; 3600 nce->nce_state = state; 3601 if (state == ND_REACHABLE) { 3602 nce->nce_last = TICK_TO_MSEC(lbolt64); 3603 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3604 } else { 3605 nce->nce_last = 0; 3606 if (state == ND_INITIAL) 3607 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3608 } 3609 3610 ASSERT((nce->nce_res_mp == NULL && nce->nce_state == ND_INITIAL) || 3611 (nce->nce_res_mp != NULL && nce->nce_state == ND_REACHABLE)); 3612 /* 3613 * Atomically ensure that the ill is not CONDEMNED, before 3614 * adding the NCE. 3615 */ 3616 mutex_enter(&ill->ill_lock); 3617 if (ill->ill_state_flags & ILL_CONDEMNED) { 3618 mutex_exit(&ill->ill_lock); 3619 err = EINVAL; 3620 goto err_ret; 3621 } 3622 if ((nce->nce_next = *ncep) != NULL) 3623 nce->nce_next->nce_ptpn = &nce->nce_next; 3624 *ncep = nce; 3625 nce->nce_ptpn = ncep; 3626 *newnce = nce; 3627 /* This one is for nce being used by an active thread */ 3628 NCE_REFHOLD(*newnce); 3629 3630 /* Bump up the number of nce's referencing this ill */ 3631 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 3632 (char *), "nce", (void *), nce); 3633 ill->ill_cnt_nce++; 3634 mutex_exit(&ill->ill_lock); 3635 DTRACE_PROBE1(ndp__add__v4, nce_t *, nce); 3636 return (0); 3637 err_ret: 3638 freeb(mp); 3639 freemsg(template); 3640 return (err); 3641 } 3642 3643 void 3644 ndp_flush_qd_mp(nce_t *nce) 3645 { 3646 mblk_t *qd_mp, *qd_next; 3647 3648 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3649 qd_mp = nce->nce_qd_mp; 3650 nce->nce_qd_mp = NULL; 3651 while (qd_mp != NULL) { 3652 qd_next = qd_mp->b_next; 3653 qd_mp->b_next = NULL; 3654 qd_mp->b_prev = NULL; 3655 freemsg(qd_mp); 3656 qd_mp = qd_next; 3657 } 3658 } 3659 3660 3661 /* 3662 * ndp_walk routine to delete all entries that have a given destination or 3663 * gateway address and cached link layer (MAC) address. This is used when ARP 3664 * informs us that a network-to-link-layer mapping may have changed. 3665 */ 3666 void 3667 nce_delete_hw_changed(nce_t *nce, void *arg) 3668 { 3669 nce_hw_map_t *hwm = arg; 3670 mblk_t *mp; 3671 dl_unitdata_req_t *dlu; 3672 uchar_t *macaddr; 3673 ill_t *ill; 3674 int saplen; 3675 ipaddr_t nce_addr; 3676 3677 if (nce->nce_state != ND_REACHABLE) 3678 return; 3679 3680 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3681 if (nce_addr != hwm->hwm_addr) 3682 return; 3683 3684 mutex_enter(&nce->nce_lock); 3685 if ((mp = nce->nce_res_mp) == NULL) { 3686 mutex_exit(&nce->nce_lock); 3687 return; 3688 } 3689 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3690 macaddr = (uchar_t *)(dlu + 1); 3691 ill = nce->nce_ill; 3692 if ((saplen = ill->ill_sap_length) > 0) 3693 macaddr += saplen; 3694 else 3695 saplen = -saplen; 3696 3697 /* 3698 * If the hardware address is unchanged, then leave this one alone. 3699 * Note that saplen == abs(saplen) now. 3700 */ 3701 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 3702 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 3703 mutex_exit(&nce->nce_lock); 3704 return; 3705 } 3706 mutex_exit(&nce->nce_lock); 3707 3708 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 3709 ndp_delete(nce); 3710 } 3711 3712 /* 3713 * This function verifies whether a given IPv4 address is potentially known to 3714 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 3715 * so that it can continue to look for hardware changes on that address. 3716 */ 3717 boolean_t 3718 ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns) 3719 { 3720 nce_t *nce; 3721 struct in_addr nceaddr; 3722 ip_stack_t *ipst = ns->netstack_ip; 3723 3724 if (addr == INADDR_ANY) 3725 return (B_FALSE); 3726 3727 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3728 nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr); 3729 for (; nce != NULL; nce = nce->nce_next) { 3730 /* Note that only v4 mapped entries are in the table. */ 3731 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 3732 if (addr == nceaddr.s_addr && 3733 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 3734 /* Single flag check; no lock needed */ 3735 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 3736 break; 3737 } 3738 } 3739 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3740 return (nce != NULL); 3741 } 3742