1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/sysmacros.h> 33 #include <sys/errno.h> 34 #include <sys/dlpi.h> 35 #include <sys/socket.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/vtrace.h> 41 #include <sys/kmem.h> 42 #include <sys/zone.h> 43 #include <sys/ethernet.h> 44 #include <sys/sdt.h> 45 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/if_dl.h> 49 #include <net/route.h> 50 #include <netinet/in.h> 51 #include <netinet/ip6.h> 52 #include <netinet/icmp6.h> 53 54 #include <inet/common.h> 55 #include <inet/mi.h> 56 #include <inet/mib2.h> 57 #include <inet/nd.h> 58 #include <inet/ip.h> 59 #include <inet/ip_impl.h> 60 #include <inet/ipclassifier.h> 61 #include <inet/ip_if.h> 62 #include <inet/ip_ire.h> 63 #include <inet/ip_rts.h> 64 #include <inet/ip6.h> 65 #include <inet/ip_ndp.h> 66 #include <inet/ipsec_impl.h> 67 #include <inet/ipsec_info.h> 68 #include <inet/sctp_ip.h> 69 70 /* 71 * Function names with nce_ prefix are static while function 72 * names with ndp_ prefix are used by rest of the IP. 73 * 74 * Lock ordering: 75 * 76 * ndp_g_lock -> ill_lock -> nce_lock 77 * 78 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 79 * nce_next. Nce_lock protects the contents of the NCE (particularly 80 * nce_refcnt). 81 */ 82 83 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 84 uint32_t ll_addr_len); 85 static void nce_ire_delete(nce_t *nce); 86 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 87 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 88 static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *, nce_t *); 89 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 90 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 91 uchar_t *addr); 92 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 93 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 94 static mblk_t *nce_udreq_alloc(ill_t *ill); 95 static void nce_update(nce_t *nce, uint16_t new_state, 96 uchar_t *new_ll_addr); 97 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 98 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 99 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 100 const in6_addr_t *target, int flag); 101 static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t, 102 nce_t **, nce_t *); 103 104 #ifdef DEBUG 105 static void nce_trace_cleanup(const nce_t *); 106 #endif 107 108 #define NCE_HASH_PTR_V4(ipst, addr) \ 109 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 110 111 #define NCE_HASH_PTR_V6(ipst, addr) \ 112 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ 113 NCE_TABLE_SIZE)])) 114 115 /* 116 * Compute default flags to use for an advertisement of this nce's address. 117 */ 118 static int 119 nce_advert_flags(const nce_t *nce) 120 { 121 int flag = 0; 122 123 if (nce->nce_flags & NCE_F_ISROUTER) 124 flag |= NDP_ISROUTER; 125 if (!(nce->nce_flags & NCE_F_ANYCAST)) 126 flag |= NDP_ORIDE; 127 128 return (flag); 129 } 130 131 /* Non-tunable probe interval, based on link capabilities */ 132 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 133 134 /* 135 * NDP Cache Entry creation routine. 136 * Mapped entries will never do NUD . 137 * This routine must always be called with ndp6->ndp_g_lock held. 138 * Prior to return, nce_refcnt is incremented. 139 */ 140 int 141 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 142 const in6_addr_t *mask, const in6_addr_t *extract_mask, 143 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 144 nce_t **newnce) 145 { 146 static nce_t nce_nil; 147 nce_t *nce; 148 mblk_t *mp; 149 mblk_t *template; 150 nce_t **ncep; 151 int err; 152 boolean_t dropped = B_FALSE; 153 ip_stack_t *ipst = ill->ill_ipst; 154 155 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 156 ASSERT(ill != NULL && ill->ill_isv6); 157 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 158 ip0dbg(("ndp_add_v6: no addr\n")); 159 return (EINVAL); 160 } 161 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 162 ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags)); 163 return (EINVAL); 164 } 165 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 166 (flags & NCE_F_MAPPING)) { 167 ip0dbg(("ndp_add_v6: extract mask zero for mapping")); 168 return (EINVAL); 169 } 170 /* 171 * Allocate the mblk to hold the nce. 172 * 173 * XXX This can come out of a separate cache - nce_cache. 174 * We don't need the mp anymore as there are no more 175 * "qwriter"s 176 */ 177 mp = allocb(sizeof (nce_t), BPRI_MED); 178 if (mp == NULL) 179 return (ENOMEM); 180 181 nce = (nce_t *)mp->b_rptr; 182 mp->b_wptr = (uchar_t *)&nce[1]; 183 *nce = nce_nil; 184 185 /* 186 * This one holds link layer address 187 */ 188 if (ill->ill_net_type == IRE_IF_RESOLVER) { 189 template = nce_udreq_alloc(ill); 190 } else { 191 if (ill->ill_resolver_mp == NULL) { 192 freeb(mp); 193 return (EINVAL); 194 } 195 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 196 template = copyb(ill->ill_resolver_mp); 197 } 198 if (template == NULL) { 199 freeb(mp); 200 return (ENOMEM); 201 } 202 nce->nce_ill = ill; 203 nce->nce_ipversion = IPV6_VERSION; 204 nce->nce_flags = flags; 205 nce->nce_state = state; 206 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 207 nce->nce_rcnt = ill->ill_xmit_count; 208 nce->nce_addr = *addr; 209 nce->nce_mask = *mask; 210 nce->nce_extract_mask = *extract_mask; 211 nce->nce_ll_extract_start = hw_extract_start; 212 nce->nce_fp_mp = NULL; 213 nce->nce_res_mp = template; 214 if (state == ND_REACHABLE) 215 nce->nce_last = TICK_TO_MSEC(lbolt64); 216 else 217 nce->nce_last = 0; 218 nce->nce_qd_mp = NULL; 219 nce->nce_mp = mp; 220 if (hw_addr != NULL) 221 nce_set_ll(nce, hw_addr); 222 /* This one is for nce getting created */ 223 nce->nce_refcnt = 1; 224 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 225 if (nce->nce_flags & NCE_F_MAPPING) { 226 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 227 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 228 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 229 ncep = &ipst->ips_ndp6->nce_mask_entries; 230 } else { 231 ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 232 } 233 234 nce->nce_trace_disable = B_FALSE; 235 236 /* 237 * Atomically ensure that the ill is not CONDEMNED, before 238 * adding the NCE. 239 */ 240 mutex_enter(&ill->ill_lock); 241 if (ill->ill_state_flags & ILL_CONDEMNED) { 242 mutex_exit(&ill->ill_lock); 243 freeb(mp); 244 freeb(template); 245 return (EINVAL); 246 } 247 if ((nce->nce_next = *ncep) != NULL) 248 nce->nce_next->nce_ptpn = &nce->nce_next; 249 *ncep = nce; 250 nce->nce_ptpn = ncep; 251 *newnce = nce; 252 /* This one is for nce being used by an active thread */ 253 NCE_REFHOLD(*newnce); 254 255 /* Bump up the number of nce's referencing this ill */ 256 ill->ill_nce_cnt++; 257 mutex_exit(&ill->ill_lock); 258 259 err = 0; 260 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 261 mutex_enter(&nce->nce_lock); 262 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 263 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 264 mutex_exit(&nce->nce_lock); 265 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 266 &ipv6_all_zeros, addr, NDP_PROBE); 267 if (dropped) { 268 mutex_enter(&nce->nce_lock); 269 nce->nce_pcnt++; 270 mutex_exit(&nce->nce_lock); 271 } 272 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 273 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 274 err = EINPROGRESS; 275 } else if (flags & NCE_F_UNSOL_ADV) { 276 /* 277 * We account for the transmit below by assigning one 278 * less than the ndd variable. Subsequent decrements 279 * are done in ndp_timer. 280 */ 281 mutex_enter(&nce->nce_lock); 282 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 283 nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1; 284 mutex_exit(&nce->nce_lock); 285 dropped = nce_xmit(ill, 286 ND_NEIGHBOR_ADVERT, 287 ill, /* ill to be used for extracting ill_nd_lla */ 288 B_TRUE, /* use ill_nd_lla */ 289 addr, /* Source and target of the advertisement pkt */ 290 &ipv6_all_hosts_mcast, /* Destination of the packet */ 291 nce_advert_flags(nce)); 292 mutex_enter(&nce->nce_lock); 293 if (dropped) 294 nce->nce_unsolicit_count++; 295 if (nce->nce_unsolicit_count != 0) { 296 nce->nce_timeout_id = timeout(ndp_timer, nce, 297 MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval)); 298 } 299 mutex_exit(&nce->nce_lock); 300 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 301 } 302 /* 303 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 304 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 305 * We call nce_fastpath from nce_update if the link layer address of 306 * the peer changes from nce_update 307 */ 308 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 309 nce_fastpath(nce); 310 return (err); 311 } 312 313 int 314 ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 315 const in6_addr_t *mask, const in6_addr_t *extract_mask, 316 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 317 nce_t **newnce) 318 { 319 int err = 0; 320 nce_t *nce; 321 ip_stack_t *ipst = ill->ill_ipst; 322 323 ASSERT(ill->ill_isv6); 324 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 325 326 /* Get head of v6 hash table */ 327 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 328 nce = nce_lookup_addr(ill, addr, nce); 329 if (nce == NULL) { 330 err = ndp_add_v6(ill, 331 hw_addr, 332 addr, 333 mask, 334 extract_mask, 335 hw_extract_start, 336 flags, 337 state, 338 newnce); 339 } else { 340 *newnce = nce; 341 err = EEXIST; 342 } 343 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 344 return (err); 345 } 346 347 /* 348 * Remove all the CONDEMNED nces from the appropriate hash table. 349 * We create a private list of NCEs, these may have ires pointing 350 * to them, so the list will be passed through to clean up dependent 351 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 352 */ 353 static void 354 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 355 { 356 nce_t *nce1; 357 nce_t **ptpn; 358 359 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 360 ASSERT(ndp->ndp_g_walker == 0); 361 for (; nce; nce = nce1) { 362 nce1 = nce->nce_next; 363 mutex_enter(&nce->nce_lock); 364 if (nce->nce_flags & NCE_F_CONDEMNED) { 365 ptpn = nce->nce_ptpn; 366 nce1 = nce->nce_next; 367 if (nce1 != NULL) 368 nce1->nce_ptpn = ptpn; 369 *ptpn = nce1; 370 nce->nce_ptpn = NULL; 371 nce->nce_next = NULL; 372 nce->nce_next = *free_nce_list; 373 *free_nce_list = nce; 374 } 375 mutex_exit(&nce->nce_lock); 376 } 377 } 378 379 /* 380 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 381 * will return this NCE. Also no new IREs will be created that 382 * point to this NCE (See ire_add_v6). Also no new timeouts will 383 * be started (See NDP_RESTART_TIMER). 384 * 2. Cancel any currently running timeouts. 385 * 3. If there is an ndp walker, return. The walker will do the cleanup. 386 * This ensures that walkers see a consistent list of NCEs while walking. 387 * 4. Otherwise remove the NCE from the list of NCEs 388 * 5. Delete all IREs pointing to this NCE. 389 */ 390 void 391 ndp_delete(nce_t *nce) 392 { 393 nce_t **ptpn; 394 nce_t *nce1; 395 int ipversion = nce->nce_ipversion; 396 ndp_g_t *ndp; 397 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 398 399 if (ipversion == IPV4_VERSION) 400 ndp = ipst->ips_ndp4; 401 else 402 ndp = ipst->ips_ndp6; 403 404 /* Serialize deletes */ 405 mutex_enter(&nce->nce_lock); 406 if (nce->nce_flags & NCE_F_CONDEMNED) { 407 /* Some other thread is doing the delete */ 408 mutex_exit(&nce->nce_lock); 409 return; 410 } 411 /* 412 * Caller has a refhold. Also 1 ref for being in the list. Thus 413 * refcnt has to be >= 2 414 */ 415 ASSERT(nce->nce_refcnt >= 2); 416 nce->nce_flags |= NCE_F_CONDEMNED; 417 mutex_exit(&nce->nce_lock); 418 419 nce_fastpath_list_delete(nce); 420 421 /* 422 * Cancel any running timer. Timeout can't be restarted 423 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 424 * Passing invalid timeout id is fine. 425 */ 426 if (nce->nce_timeout_id != 0) { 427 (void) untimeout(nce->nce_timeout_id); 428 nce->nce_timeout_id = 0; 429 } 430 431 mutex_enter(&ndp->ndp_g_lock); 432 if (nce->nce_ptpn == NULL) { 433 /* 434 * The last ndp walker has already removed this nce from 435 * the list after we marked the nce CONDEMNED and before 436 * we grabbed the global lock. 437 */ 438 mutex_exit(&ndp->ndp_g_lock); 439 return; 440 } 441 if (ndp->ndp_g_walker > 0) { 442 /* 443 * Can't unlink. The walker will clean up 444 */ 445 ndp->ndp_g_walker_cleanup = B_TRUE; 446 mutex_exit(&ndp->ndp_g_lock); 447 return; 448 } 449 450 /* 451 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 452 * the timer since it is marked CONDEMNED. 453 */ 454 ptpn = nce->nce_ptpn; 455 nce1 = nce->nce_next; 456 if (nce1 != NULL) 457 nce1->nce_ptpn = ptpn; 458 *ptpn = nce1; 459 nce->nce_ptpn = NULL; 460 nce->nce_next = NULL; 461 mutex_exit(&ndp->ndp_g_lock); 462 463 nce_ire_delete(nce); 464 } 465 466 void 467 ndp_inactive(nce_t *nce) 468 { 469 mblk_t **mpp; 470 ill_t *ill; 471 472 ASSERT(nce->nce_refcnt == 0); 473 ASSERT(MUTEX_HELD(&nce->nce_lock)); 474 ASSERT(nce->nce_fastpath == NULL); 475 476 /* Free all nce allocated messages */ 477 mpp = &nce->nce_first_mp_to_free; 478 do { 479 while (*mpp != NULL) { 480 mblk_t *mp; 481 482 mp = *mpp; 483 *mpp = mp->b_next; 484 485 inet_freemsg(mp); 486 } 487 } while (mpp++ != &nce->nce_last_mp_to_free); 488 489 #ifdef DEBUG 490 nce_trace_cleanup(nce); 491 #endif 492 493 ill = nce->nce_ill; 494 mutex_enter(&ill->ill_lock); 495 ill->ill_nce_cnt--; 496 /* 497 * If the number of nce's associated with this ill have dropped 498 * to zero, check whether we need to restart any operation that 499 * is waiting for this to happen. 500 */ 501 if (ill->ill_nce_cnt == 0) { 502 /* ipif_ill_refrele_tail drops the ill_lock */ 503 ipif_ill_refrele_tail(ill); 504 } else { 505 mutex_exit(&ill->ill_lock); 506 } 507 mutex_destroy(&nce->nce_lock); 508 if (nce->nce_mp != NULL) 509 inet_freemsg(nce->nce_mp); 510 } 511 512 /* 513 * ndp_walk routine. Delete the nce if it is associated with the ill 514 * that is going away. Always called as a writer. 515 */ 516 void 517 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 518 { 519 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 520 ndp_delete(nce); 521 } 522 } 523 524 /* 525 * Walk a list of to be inactive NCEs and blow away all the ires. 526 */ 527 static void 528 nce_ire_delete_list(nce_t *nce) 529 { 530 nce_t *nce_next; 531 532 ASSERT(nce != NULL); 533 while (nce != NULL) { 534 nce_next = nce->nce_next; 535 nce->nce_next = NULL; 536 537 /* 538 * It is possible for the last ndp walker (this thread) 539 * to come here after ndp_delete has marked the nce CONDEMNED 540 * and before it has removed the nce from the fastpath list 541 * or called untimeout. So we need to do it here. It is safe 542 * for both ndp_delete and this thread to do it twice or 543 * even simultaneously since each of the threads has a 544 * reference on the nce. 545 */ 546 nce_fastpath_list_delete(nce); 547 /* 548 * Cancel any running timer. Timeout can't be restarted 549 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 550 * Passing invalid timeout id is fine. 551 */ 552 if (nce->nce_timeout_id != 0) { 553 (void) untimeout(nce->nce_timeout_id); 554 nce->nce_timeout_id = 0; 555 } 556 /* 557 * We might hit this func thus in the v4 case: 558 * ipif_down->ipif_ndp_down->ndp_walk 559 */ 560 561 if (nce->nce_ipversion == IPV4_VERSION) { 562 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 563 IRE_CACHE, nce_ire_delete1, 564 (char *)nce, nce->nce_ill); 565 } else { 566 ASSERT(nce->nce_ipversion == IPV6_VERSION); 567 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 568 IRE_CACHE, nce_ire_delete1, 569 (char *)nce, nce->nce_ill); 570 } 571 NCE_REFRELE_NOTR(nce); 572 nce = nce_next; 573 } 574 } 575 576 /* 577 * Delete an ire when the nce goes away. 578 */ 579 /* ARGSUSED */ 580 static void 581 nce_ire_delete(nce_t *nce) 582 { 583 if (nce->nce_ipversion == IPV6_VERSION) { 584 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 585 nce_ire_delete1, (char *)nce, nce->nce_ill); 586 NCE_REFRELE_NOTR(nce); 587 } else { 588 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 589 nce_ire_delete1, (char *)nce, nce->nce_ill); 590 NCE_REFRELE_NOTR(nce); 591 } 592 } 593 594 /* 595 * ire_walk routine used to delete every IRE that shares this nce 596 */ 597 static void 598 nce_ire_delete1(ire_t *ire, char *nce_arg) 599 { 600 nce_t *nce = (nce_t *)nce_arg; 601 602 ASSERT(ire->ire_type == IRE_CACHE); 603 604 if (ire->ire_nce == nce) { 605 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 606 ire_delete(ire); 607 } 608 } 609 610 /* 611 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 612 */ 613 boolean_t 614 ndp_restart_dad(nce_t *nce) 615 { 616 boolean_t started; 617 boolean_t dropped; 618 619 if (nce == NULL) 620 return (B_FALSE); 621 mutex_enter(&nce->nce_lock); 622 if (nce->nce_state == ND_PROBE) { 623 mutex_exit(&nce->nce_lock); 624 started = B_TRUE; 625 } else if (nce->nce_state == ND_REACHABLE) { 626 nce->nce_state = ND_PROBE; 627 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 628 mutex_exit(&nce->nce_lock); 629 dropped = nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, NULL, 630 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, NDP_PROBE); 631 if (dropped) { 632 mutex_enter(&nce->nce_lock); 633 nce->nce_pcnt++; 634 mutex_exit(&nce->nce_lock); 635 } 636 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 637 started = B_TRUE; 638 } else { 639 mutex_exit(&nce->nce_lock); 640 started = B_FALSE; 641 } 642 return (started); 643 } 644 645 /* 646 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 647 * If one is found, the refcnt on the nce will be incremented. 648 */ 649 nce_t * 650 ndp_lookup_v6(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 651 { 652 nce_t *nce; 653 ip_stack_t *ipst; 654 655 ASSERT(ill != NULL); 656 ipst = ill->ill_ipst; 657 658 ASSERT(ill != NULL && ill->ill_isv6); 659 if (!caller_holds_lock) { 660 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 661 } 662 663 /* Get head of v6 hash table */ 664 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 665 nce = nce_lookup_addr(ill, addr, nce); 666 if (nce == NULL) 667 nce = nce_lookup_mapping(ill, addr); 668 if (!caller_holds_lock) 669 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 670 return (nce); 671 } 672 /* 673 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 674 * If one is found, the refcnt on the nce will be incremented. 675 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 676 * so we skip the nce_lookup_mapping call. 677 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 678 */ 679 nce_t * 680 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 681 { 682 nce_t *nce; 683 in6_addr_t addr6; 684 ip_stack_t *ipst = ill->ill_ipst; 685 686 if (!caller_holds_lock) { 687 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 688 } 689 690 /* Get head of v4 hash table */ 691 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 692 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 693 nce = nce_lookup_addr(ill, &addr6, nce); 694 if (!caller_holds_lock) 695 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 696 return (nce); 697 } 698 699 /* 700 * Cache entry lookup. Try to find an nce matching the parameters passed. 701 * Look only for exact entries (no mappings). If an nce is found, increment 702 * the hold count on that nce. The caller passes in the start of the 703 * appropriate hash table, and must be holding the appropriate global 704 * lock (ndp_g_lock). 705 */ 706 static nce_t * 707 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr, nce_t *nce) 708 { 709 ndp_g_t *ndp; 710 ip_stack_t *ipst = ill->ill_ipst; 711 712 if (ill->ill_isv6) 713 ndp = ipst->ips_ndp6; 714 else 715 ndp = ipst->ips_ndp4; 716 717 ASSERT(ill != NULL); 718 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 719 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 720 return (NULL); 721 for (; nce != NULL; nce = nce->nce_next) { 722 if (nce->nce_ill == ill) { 723 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 724 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 725 &ipv6_all_ones)) { 726 mutex_enter(&nce->nce_lock); 727 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 728 NCE_REFHOLD_LOCKED(nce); 729 mutex_exit(&nce->nce_lock); 730 break; 731 } 732 mutex_exit(&nce->nce_lock); 733 } 734 } 735 } 736 return (nce); 737 } 738 739 /* 740 * Cache entry lookup. Try to find an nce matching the parameters passed. 741 * Look only for mappings. 742 */ 743 static nce_t * 744 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 745 { 746 nce_t *nce; 747 ip_stack_t *ipst = ill->ill_ipst; 748 749 ASSERT(ill != NULL && ill->ill_isv6); 750 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 751 if (!IN6_IS_ADDR_MULTICAST(addr)) 752 return (NULL); 753 nce = ipst->ips_ndp6->nce_mask_entries; 754 for (; nce != NULL; nce = nce->nce_next) 755 if (nce->nce_ill == ill && 756 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 757 mutex_enter(&nce->nce_lock); 758 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 759 NCE_REFHOLD_LOCKED(nce); 760 mutex_exit(&nce->nce_lock); 761 break; 762 } 763 mutex_exit(&nce->nce_lock); 764 } 765 return (nce); 766 } 767 768 /* 769 * Process passed in parameters either from an incoming packet or via 770 * user ioctl. 771 */ 772 void 773 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 774 { 775 ill_t *ill = nce->nce_ill; 776 uint32_t hw_addr_len = ill->ill_nd_lla_len; 777 mblk_t *mp; 778 boolean_t ll_updated = B_FALSE; 779 boolean_t ll_changed; 780 ip_stack_t *ipst = ill->ill_ipst; 781 782 ASSERT(nce->nce_ipversion == IPV6_VERSION); 783 /* 784 * No updates of link layer address or the neighbor state is 785 * allowed, when the cache is in NONUD state. This still 786 * allows for responding to reachability solicitation. 787 */ 788 mutex_enter(&nce->nce_lock); 789 if (nce->nce_state == ND_INCOMPLETE) { 790 if (hw_addr == NULL) { 791 mutex_exit(&nce->nce_lock); 792 return; 793 } 794 nce_set_ll(nce, hw_addr); 795 /* 796 * Update nce state and send the queued packets 797 * back to ip this time ire will be added. 798 */ 799 if (flag & ND_NA_FLAG_SOLICITED) { 800 nce_update(nce, ND_REACHABLE, NULL); 801 } else { 802 nce_update(nce, ND_STALE, NULL); 803 } 804 mutex_exit(&nce->nce_lock); 805 nce_fastpath(nce); 806 mutex_enter(&nce->nce_lock); 807 mp = nce->nce_qd_mp; 808 nce->nce_qd_mp = NULL; 809 mutex_exit(&nce->nce_lock); 810 while (mp != NULL) { 811 mblk_t *nxt_mp, *data_mp; 812 813 nxt_mp = mp->b_next; 814 mp->b_next = NULL; 815 816 if (mp->b_datap->db_type == M_CTL) 817 data_mp = mp->b_cont; 818 else 819 data_mp = mp; 820 if (data_mp->b_prev != NULL) { 821 ill_t *inbound_ill; 822 queue_t *fwdq = NULL; 823 uint_t ifindex; 824 825 ifindex = (uint_t)(uintptr_t)data_mp->b_prev; 826 inbound_ill = ill_lookup_on_ifindex(ifindex, 827 B_TRUE, NULL, NULL, NULL, NULL, ipst); 828 if (inbound_ill == NULL) { 829 data_mp->b_prev = NULL; 830 freemsg(mp); 831 return; 832 } else { 833 fwdq = inbound_ill->ill_rq; 834 } 835 data_mp->b_prev = NULL; 836 /* 837 * Send a forwarded packet back into ip_rput_v6 838 * just as in ire_send_v6(). 839 * Extract the queue from b_prev (set in 840 * ip_rput_data_v6). 841 */ 842 if (fwdq != NULL) { 843 /* 844 * Forwarded packets hop count will 845 * get decremented in ip_rput_data_v6 846 */ 847 if (data_mp != mp) 848 freeb(mp); 849 put(fwdq, data_mp); 850 } else { 851 /* 852 * Send locally originated packets back 853 * into * ip_wput_v6. 854 */ 855 put(ill->ill_wq, mp); 856 } 857 ill_refrele(inbound_ill); 858 } else { 859 put(ill->ill_wq, mp); 860 } 861 mp = nxt_mp; 862 } 863 return; 864 } 865 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 866 if (!is_adv) { 867 /* If this is a SOLICITATION request only */ 868 if (ll_changed) 869 nce_update(nce, ND_STALE, hw_addr); 870 mutex_exit(&nce->nce_lock); 871 return; 872 } 873 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 874 /* If in any other state than REACHABLE, ignore */ 875 if (nce->nce_state == ND_REACHABLE) { 876 nce_update(nce, ND_STALE, NULL); 877 } 878 mutex_exit(&nce->nce_lock); 879 return; 880 } else { 881 if (ll_changed) { 882 nce_update(nce, ND_UNCHANGED, hw_addr); 883 ll_updated = B_TRUE; 884 } 885 if (flag & ND_NA_FLAG_SOLICITED) { 886 nce_update(nce, ND_REACHABLE, NULL); 887 } else { 888 if (ll_updated) { 889 nce_update(nce, ND_STALE, NULL); 890 } 891 } 892 mutex_exit(&nce->nce_lock); 893 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 894 NCE_F_ISROUTER)) { 895 ire_t *ire; 896 897 /* 898 * Router turned to host. We need to remove the 899 * entry as well as any default route that may be 900 * using this as a next hop. This is required by 901 * section 7.2.5 of RFC 2461. 902 */ 903 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 904 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 905 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 906 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 907 MATCH_IRE_DEFAULT, ipst); 908 if (ire != NULL) { 909 ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); 910 ire_delete(ire); 911 ire_refrele(ire); 912 } 913 ndp_delete(nce); 914 } 915 } 916 } 917 918 /* 919 * Pass arg1 to the pfi supplied, along with each nce in existence. 920 * ndp_walk() places a REFHOLD on the nce and drops the lock when 921 * walking the hash list. 922 */ 923 void 924 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 925 boolean_t trace) 926 { 927 928 nce_t *nce; 929 nce_t *nce1; 930 nce_t **ncep; 931 nce_t *free_nce_list = NULL; 932 933 mutex_enter(&ndp->ndp_g_lock); 934 /* Prevent ndp_delete from unlink and free of NCE */ 935 ndp->ndp_g_walker++; 936 mutex_exit(&ndp->ndp_g_lock); 937 for (ncep = ndp->nce_hash_tbl; 938 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 939 for (nce = *ncep; nce != NULL; nce = nce1) { 940 nce1 = nce->nce_next; 941 if (ill == NULL || nce->nce_ill == ill) { 942 if (trace) { 943 NCE_REFHOLD(nce); 944 (*pfi)(nce, arg1); 945 NCE_REFRELE(nce); 946 } else { 947 NCE_REFHOLD_NOTR(nce); 948 (*pfi)(nce, arg1); 949 NCE_REFRELE_NOTR(nce); 950 } 951 } 952 } 953 } 954 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 955 nce1 = nce->nce_next; 956 if (ill == NULL || nce->nce_ill == ill) { 957 if (trace) { 958 NCE_REFHOLD(nce); 959 (*pfi)(nce, arg1); 960 NCE_REFRELE(nce); 961 } else { 962 NCE_REFHOLD_NOTR(nce); 963 (*pfi)(nce, arg1); 964 NCE_REFRELE_NOTR(nce); 965 } 966 } 967 } 968 mutex_enter(&ndp->ndp_g_lock); 969 ndp->ndp_g_walker--; 970 /* 971 * While NCE's are removed from global list they are placed 972 * in a private list, to be passed to nce_ire_delete_list(). 973 * The reason is, there may be ires pointing to this nce 974 * which needs to cleaned up. 975 */ 976 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 977 /* Time to delete condemned entries */ 978 for (ncep = ndp->nce_hash_tbl; 979 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 980 nce = *ncep; 981 if (nce != NULL) { 982 nce_remove(ndp, nce, &free_nce_list); 983 } 984 } 985 nce = ndp->nce_mask_entries; 986 if (nce != NULL) { 987 nce_remove(ndp, nce, &free_nce_list); 988 } 989 ndp->ndp_g_walker_cleanup = B_FALSE; 990 } 991 992 mutex_exit(&ndp->ndp_g_lock); 993 994 if (free_nce_list != NULL) { 995 nce_ire_delete_list(free_nce_list); 996 } 997 } 998 999 /* 1000 * Walk everything. 1001 * Note that ill can be NULL hence can't derive the ipst from it. 1002 */ 1003 void 1004 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) 1005 { 1006 ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); 1007 ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); 1008 } 1009 1010 /* 1011 * Process resolve requests. Handles both mapped entries 1012 * as well as cases that needs to be send out on the wire. 1013 * Lookup a NCE for a given IRE. Regardless of whether one exists 1014 * or one is created, we defer making ire point to nce until the 1015 * ire is actually added at which point the nce_refcnt on the nce is 1016 * incremented. This is done primarily to have symmetry between ire_add() 1017 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1018 */ 1019 int 1020 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1021 { 1022 nce_t *nce; 1023 int err = 0; 1024 uint32_t ms; 1025 mblk_t *mp_nce = NULL; 1026 ip_stack_t *ipst = ill->ill_ipst; 1027 1028 ASSERT(ill->ill_isv6); 1029 if (IN6_IS_ADDR_MULTICAST(dst)) { 1030 err = nce_set_multicast(ill, dst); 1031 return (err); 1032 } 1033 err = ndp_lookup_then_add_v6(ill, 1034 NULL, /* No hardware address */ 1035 dst, 1036 &ipv6_all_ones, 1037 &ipv6_all_zeros, 1038 0, 1039 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1040 ND_INCOMPLETE, 1041 &nce); 1042 1043 switch (err) { 1044 case 0: 1045 /* 1046 * New cache entry was created. Make sure that the state 1047 * is not ND_INCOMPLETE. It can be in some other state 1048 * even before we send out the solicitation as we could 1049 * get un-solicited advertisements. 1050 * 1051 * If this is an XRESOLV interface, simply return 0, 1052 * since we don't want to solicit just yet. 1053 */ 1054 if (ill->ill_flags & ILLF_XRESOLV) { 1055 NCE_REFRELE(nce); 1056 return (0); 1057 } 1058 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1059 mutex_enter(&nce->nce_lock); 1060 if (nce->nce_state != ND_INCOMPLETE) { 1061 mutex_exit(&nce->nce_lock); 1062 rw_exit(&ipst->ips_ill_g_lock); 1063 NCE_REFRELE(nce); 1064 return (0); 1065 } 1066 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1067 if (mp_nce == NULL) { 1068 /* The caller will free mp */ 1069 mutex_exit(&nce->nce_lock); 1070 rw_exit(&ipst->ips_ill_g_lock); 1071 ndp_delete(nce); 1072 NCE_REFRELE(nce); 1073 return (ENOMEM); 1074 } 1075 ms = nce_solicit(nce, mp_nce); 1076 rw_exit(&ipst->ips_ill_g_lock); 1077 if (ms == 0) { 1078 /* The caller will free mp */ 1079 if (mp_nce != mp) 1080 freeb(mp_nce); 1081 mutex_exit(&nce->nce_lock); 1082 ndp_delete(nce); 1083 NCE_REFRELE(nce); 1084 return (EBUSY); 1085 } 1086 mutex_exit(&nce->nce_lock); 1087 NDP_RESTART_TIMER(nce, (clock_t)ms); 1088 NCE_REFRELE(nce); 1089 return (EINPROGRESS); 1090 case EEXIST: 1091 /* Resolution in progress just queue the packet */ 1092 mutex_enter(&nce->nce_lock); 1093 if (nce->nce_state == ND_INCOMPLETE) { 1094 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1095 if (mp_nce == NULL) { 1096 err = ENOMEM; 1097 } else { 1098 nce_queue_mp(nce, mp_nce); 1099 err = EINPROGRESS; 1100 } 1101 } else { 1102 /* 1103 * Any other state implies we have 1104 * a nce but IRE needs to be added ... 1105 * ire_add_v6() will take care of the 1106 * the case when the nce becomes CONDEMNED 1107 * before the ire is added to the table. 1108 */ 1109 err = 0; 1110 } 1111 mutex_exit(&nce->nce_lock); 1112 NCE_REFRELE(nce); 1113 break; 1114 default: 1115 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1116 break; 1117 } 1118 return (err); 1119 } 1120 1121 /* 1122 * When there is no resolver, the link layer template is passed in 1123 * the IRE. 1124 * Lookup a NCE for a given IRE. Regardless of whether one exists 1125 * or one is created, we defer making ire point to nce until the 1126 * ire is actually added at which point the nce_refcnt on the nce is 1127 * incremented. This is done primarily to have symmetry between ire_add() 1128 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1129 */ 1130 int 1131 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1132 { 1133 nce_t *nce; 1134 int err = 0; 1135 1136 ASSERT(ill != NULL); 1137 ASSERT(ill->ill_isv6); 1138 if (IN6_IS_ADDR_MULTICAST(dst)) { 1139 err = nce_set_multicast(ill, dst); 1140 return (err); 1141 } 1142 1143 err = ndp_lookup_then_add_v6(ill, 1144 NULL, /* hardware address */ 1145 dst, 1146 &ipv6_all_ones, 1147 &ipv6_all_zeros, 1148 0, 1149 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1150 ND_REACHABLE, 1151 &nce); 1152 1153 switch (err) { 1154 case 0: 1155 /* 1156 * Cache entry with a proper resolver cookie was 1157 * created. 1158 */ 1159 NCE_REFRELE(nce); 1160 break; 1161 case EEXIST: 1162 err = 0; 1163 NCE_REFRELE(nce); 1164 break; 1165 default: 1166 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1167 break; 1168 } 1169 return (err); 1170 } 1171 1172 /* 1173 * For each interface an entry is added for the unspecified multicast group. 1174 * Here that mapping is used to form the multicast cache entry for a particular 1175 * multicast destination. 1176 */ 1177 static int 1178 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1179 { 1180 nce_t *mnce; /* Multicast mapping entry */ 1181 nce_t *nce; 1182 uchar_t *hw_addr = NULL; 1183 int err = 0; 1184 ip_stack_t *ipst = ill->ill_ipst; 1185 1186 ASSERT(ill != NULL); 1187 ASSERT(ill->ill_isv6); 1188 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1189 1190 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1191 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst)); 1192 nce = nce_lookup_addr(ill, dst, nce); 1193 if (nce != NULL) { 1194 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1195 NCE_REFRELE(nce); 1196 return (0); 1197 } 1198 /* No entry, now lookup for a mapping this should never fail */ 1199 mnce = nce_lookup_mapping(ill, dst); 1200 if (mnce == NULL) { 1201 /* Something broken for the interface. */ 1202 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1203 return (ESRCH); 1204 } 1205 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1206 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1207 /* 1208 * For IRE_IF_RESOLVER a hardware mapping can be 1209 * generated, for IRE_IF_NORESOLVER, resolution cookie 1210 * in the ill is copied in ndp_add_v6(). 1211 */ 1212 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1213 if (hw_addr == NULL) { 1214 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1215 NCE_REFRELE(mnce); 1216 return (ENOMEM); 1217 } 1218 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1219 } 1220 NCE_REFRELE(mnce); 1221 /* 1222 * IRE_IF_NORESOLVER type simply copies the resolution 1223 * cookie passed in. So no hw_addr is needed. 1224 */ 1225 err = ndp_add_v6(ill, 1226 hw_addr, 1227 dst, 1228 &ipv6_all_ones, 1229 &ipv6_all_zeros, 1230 0, 1231 NCE_F_NONUD, 1232 ND_REACHABLE, 1233 &nce); 1234 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1235 if (hw_addr != NULL) 1236 kmem_free(hw_addr, ill->ill_nd_lla_len); 1237 if (err != 0) { 1238 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1239 return (err); 1240 } 1241 NCE_REFRELE(nce); 1242 return (0); 1243 } 1244 1245 /* 1246 * Return the link layer address, and any flags of a nce. 1247 */ 1248 int 1249 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1250 { 1251 nce_t *nce; 1252 in6_addr_t *addr; 1253 sin6_t *sin6; 1254 dl_unitdata_req_t *dl; 1255 1256 ASSERT(ill != NULL && ill->ill_isv6); 1257 sin6 = (sin6_t *)&lnr->lnr_addr; 1258 addr = &sin6->sin6_addr; 1259 1260 nce = ndp_lookup_v6(ill, addr, B_FALSE); 1261 if (nce == NULL) 1262 return (ESRCH); 1263 /* If in INCOMPLETE state, no link layer address is available yet */ 1264 if (nce->nce_state == ND_INCOMPLETE) 1265 goto done; 1266 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1267 if (ill->ill_flags & ILLF_XRESOLV) 1268 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1269 else 1270 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1271 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1272 sizeof (lnr->lnr_hdw_addr)); 1273 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1274 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1275 if (nce->nce_flags & NCE_F_ISROUTER) 1276 lnr->lnr_flags = NDF_ISROUTER_ON; 1277 if (nce->nce_flags & NCE_F_ANYCAST) 1278 lnr->lnr_flags |= NDF_ANYCAST_ON; 1279 done: 1280 NCE_REFRELE(nce); 1281 return (0); 1282 } 1283 1284 /* 1285 * Send Enable/Disable multicast reqs to driver. 1286 */ 1287 int 1288 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1289 uint32_t hw_addr_offset, mblk_t *mp) 1290 { 1291 nce_t *nce; 1292 uchar_t *hw_addr; 1293 ip_stack_t *ipst = ill->ill_ipst; 1294 1295 ASSERT(ill != NULL && ill->ill_isv6); 1296 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1297 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1298 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1299 freemsg(mp); 1300 return (EINVAL); 1301 } 1302 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1303 nce = nce_lookup_mapping(ill, addr); 1304 if (nce == NULL) { 1305 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1306 freemsg(mp); 1307 return (ESRCH); 1308 } 1309 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1310 /* 1311 * Update dl_addr_length and dl_addr_offset for primitives that 1312 * have physical addresses as opposed to full saps 1313 */ 1314 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1315 case DL_ENABMULTI_REQ: 1316 /* Track the state if this is the first enabmulti */ 1317 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1318 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1319 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1320 break; 1321 case DL_DISABMULTI_REQ: 1322 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1323 break; 1324 default: 1325 NCE_REFRELE(nce); 1326 ip1dbg(("ndp_mcastreq: default\n")); 1327 return (EINVAL); 1328 } 1329 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1330 NCE_REFRELE(nce); 1331 ill_dlpi_send(ill, mp); 1332 return (0); 1333 } 1334 1335 /* 1336 * Send a neighbor solicitation. 1337 * Returns number of milliseconds after which we should either rexmit or abort. 1338 * Return of zero means we should abort. 1339 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1340 * 1341 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1342 * the packet. 1343 * NOTE: This routine does not consume mp. 1344 */ 1345 uint32_t 1346 nce_solicit(nce_t *nce, mblk_t *mp) 1347 { 1348 ill_t *ill; 1349 ill_t *src_ill; 1350 ip6_t *ip6h; 1351 in6_addr_t src; 1352 in6_addr_t dst; 1353 ipif_t *ipif; 1354 ip6i_t *ip6i; 1355 boolean_t dropped = B_FALSE; 1356 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 1357 1358 ASSERT(RW_READ_HELD(&ipst->ips_ill_g_lock)); 1359 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1360 ill = nce->nce_ill; 1361 ASSERT(ill != NULL); 1362 1363 if (nce->nce_rcnt == 0) { 1364 return (0); 1365 } 1366 1367 if (mp == NULL) { 1368 ASSERT(nce->nce_qd_mp != NULL); 1369 mp = nce->nce_qd_mp; 1370 } else { 1371 nce_queue_mp(nce, mp); 1372 } 1373 1374 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1375 if (mp->b_datap->db_type == M_CTL) 1376 mp = mp->b_cont; 1377 1378 ip6h = (ip6_t *)mp->b_rptr; 1379 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1380 /* 1381 * This message should have been pulled up already in 1382 * ip_wput_v6. We can't do pullups here because the message 1383 * could be from the nce_qd_mp which could have b_next/b_prev 1384 * non-NULL. 1385 */ 1386 ip6i = (ip6i_t *)ip6h; 1387 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1388 sizeof (ip6i_t) + IPV6_HDR_LEN); 1389 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1390 } 1391 src = ip6h->ip6_src; 1392 /* 1393 * If the src of outgoing packet is one of the assigned interface 1394 * addresses use it, otherwise we will pick the source address below. 1395 */ 1396 src_ill = ill; 1397 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1398 if (ill->ill_group != NULL) 1399 src_ill = ill->ill_group->illgrp_ill; 1400 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1401 for (ipif = src_ill->ill_ipif; ipif != NULL; 1402 ipif = ipif->ipif_next) { 1403 if (IN6_ARE_ADDR_EQUAL(&src, 1404 &ipif->ipif_v6lcl_addr)) { 1405 break; 1406 } 1407 } 1408 if (ipif != NULL) 1409 break; 1410 } 1411 /* 1412 * If no relevant ipif can be found, then it's not one of our 1413 * addresses. Reset to :: and let nce_xmit. If an ipif can be 1414 * found, but it's not yet done with DAD verification, then 1415 * just postpone this transmission until later. 1416 */ 1417 if (src_ill == NULL) 1418 src = ipv6_all_zeros; 1419 else if (!ipif->ipif_addr_ready) 1420 return (ill->ill_reachable_retrans_time); 1421 } 1422 dst = nce->nce_addr; 1423 /* 1424 * If source address is unspecified, nce_xmit will choose 1425 * one for us and initialize the hardware address also 1426 * appropriately. 1427 */ 1428 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1429 src_ill = NULL; 1430 nce->nce_rcnt--; 1431 mutex_exit(&nce->nce_lock); 1432 rw_exit(&ipst->ips_ill_g_lock); 1433 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1434 &dst, 0); 1435 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1436 mutex_enter(&nce->nce_lock); 1437 if (dropped) 1438 nce->nce_rcnt++; 1439 return (ill->ill_reachable_retrans_time); 1440 } 1441 1442 /* 1443 * Attempt to recover an address on an interface that's been marked as a 1444 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1445 * no easy way to just probe the address and have the right thing happen if 1446 * it's no longer in use. Instead, we just bring it up normally and allow the 1447 * regular interface start-up logic to probe for a remaining duplicate and take 1448 * us back down if necessary. 1449 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1450 * ip_ndp_excl. 1451 */ 1452 /* ARGSUSED */ 1453 static void 1454 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1455 { 1456 ill_t *ill = rq->q_ptr; 1457 ipif_t *ipif; 1458 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1459 1460 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1461 /* 1462 * We do not support recovery of proxy ARP'd interfaces, 1463 * because the system lacks a complete proxy ARP mechanism. 1464 */ 1465 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1466 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1467 continue; 1468 } 1469 1470 /* 1471 * If we have already recovered or if the interface is going 1472 * away, then ignore. 1473 */ 1474 mutex_enter(&ill->ill_lock); 1475 if (!(ipif->ipif_flags & IPIF_DUPLICATE) || 1476 (ipif->ipif_flags & (IPIF_MOVING | IPIF_CONDEMNED))) { 1477 mutex_exit(&ill->ill_lock); 1478 continue; 1479 } 1480 1481 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1482 ill->ill_ipif_dup_count--; 1483 mutex_exit(&ill->ill_lock); 1484 ipif->ipif_was_dup = B_TRUE; 1485 1486 if (ipif_ndp_up(ipif) != EINPROGRESS) 1487 (void) ipif_up_done_v6(ipif); 1488 } 1489 freeb(mp); 1490 } 1491 1492 /* 1493 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1494 * As long as someone else holds the address, the interface will stay down. 1495 * When that conflict goes away, the interface is brought back up. This is 1496 * done so that accidental shutdowns of addresses aren't made permanent. Your 1497 * server will recover from a failure. 1498 * 1499 * For DHCP and temporary addresses, recovery is not done in the kernel. 1500 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1501 * 1502 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1503 */ 1504 static void 1505 ipif6_dup_recovery(void *arg) 1506 { 1507 ipif_t *ipif = arg; 1508 1509 ipif->ipif_recovery_id = 0; 1510 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1511 return; 1512 1513 /* 1514 * No lock, because this is just an optimization. 1515 */ 1516 if (ipif->ipif_state_flags & (IPIF_MOVING | IPIF_CONDEMNED)) 1517 return; 1518 1519 /* If the link is down, we'll retry this later */ 1520 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1521 return; 1522 1523 ndp_do_recovery(ipif); 1524 } 1525 1526 /* 1527 * Perform interface recovery by forcing the duplicate interfaces up and 1528 * allowing the system to determine which ones should stay up. 1529 * 1530 * Called both by recovery timer expiry and link-up notification. 1531 */ 1532 void 1533 ndp_do_recovery(ipif_t *ipif) 1534 { 1535 ill_t *ill = ipif->ipif_ill; 1536 mblk_t *mp; 1537 ip_stack_t *ipst = ill->ill_ipst; 1538 1539 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1540 if (mp == NULL) { 1541 mutex_enter(&ill->ill_lock); 1542 if (ipif->ipif_recovery_id == 0 && 1543 !(ipif->ipif_state_flags & (IPIF_MOVING | 1544 IPIF_CONDEMNED))) { 1545 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1546 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1547 } 1548 mutex_exit(&ill->ill_lock); 1549 } else { 1550 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1551 sizeof (ipif->ipif_v6lcl_addr)); 1552 ill_refhold(ill); 1553 qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP, 1554 B_FALSE); 1555 } 1556 } 1557 1558 /* 1559 * Find the solicitation in the given message, and extract printable details 1560 * (MAC and IP addresses) from it. 1561 */ 1562 static nd_neighbor_solicit_t * 1563 ip_ndp_find_solicitation(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, char *hbuf, 1564 size_t hlen, char *sbuf, size_t slen, uchar_t **haddr) 1565 { 1566 nd_neighbor_solicit_t *ns; 1567 ip6_t *ip6h; 1568 uchar_t *addr; 1569 int alen; 1570 1571 alen = 0; 1572 ip6h = (ip6_t *)mp->b_rptr; 1573 if (dl_mp == NULL) { 1574 nd_opt_hdr_t *opt; 1575 int nslen; 1576 1577 /* 1578 * If it's from the fast-path, then it can't be a probe 1579 * message, and thus must include the source linkaddr option. 1580 * Extract that here. 1581 */ 1582 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1583 nslen = mp->b_wptr - (uchar_t *)ns; 1584 if ((nslen -= sizeof (*ns)) > 0) { 1585 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), nslen, 1586 ND_OPT_SOURCE_LINKADDR); 1587 if (opt != NULL && 1588 opt->nd_opt_len * 8 - sizeof (*opt) >= 1589 ill->ill_nd_lla_len) { 1590 addr = (uchar_t *)(opt + 1); 1591 alen = ill->ill_nd_lla_len; 1592 } 1593 } 1594 /* 1595 * We cheat a bit here for the sake of printing usable log 1596 * messages in the rare case where the reply we got was unicast 1597 * without a source linkaddr option, and the interface is in 1598 * fastpath mode. (Sigh.) 1599 */ 1600 if (alen == 0 && ill->ill_type == IFT_ETHER && 1601 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1602 struct ether_header *pether; 1603 1604 pether = (struct ether_header *)((char *)ip6h - 1605 sizeof (*pether)); 1606 addr = pether->ether_shost.ether_addr_octet; 1607 alen = ETHERADDRL; 1608 } 1609 } else { 1610 dl_unitdata_ind_t *dlu; 1611 1612 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1613 alen = dlu->dl_src_addr_length; 1614 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1615 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1616 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1617 if (ill->ill_sap_length < 0) { 1618 alen += ill->ill_sap_length; 1619 } else { 1620 addr += ill->ill_sap_length; 1621 alen -= ill->ill_sap_length; 1622 } 1623 } 1624 } 1625 if (alen > 0) { 1626 *haddr = addr; 1627 (void) mac_colon_addr(addr, alen, hbuf, hlen); 1628 } else { 1629 *haddr = NULL; 1630 (void) strcpy(hbuf, "?"); 1631 } 1632 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1633 (void) inet_ntop(AF_INET6, &ns->nd_ns_target, sbuf, slen); 1634 return (ns); 1635 } 1636 1637 /* 1638 * This is for exclusive changes due to NDP duplicate address detection 1639 * failure. 1640 */ 1641 /* ARGSUSED */ 1642 static void 1643 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1644 { 1645 ill_t *ill = rq->q_ptr; 1646 ipif_t *ipif; 1647 char ibuf[LIFNAMSIZ + 10]; /* 10 digits for logical i/f number */ 1648 char hbuf[MAC_STR_LEN]; 1649 char sbuf[INET6_ADDRSTRLEN]; 1650 nd_neighbor_solicit_t *ns; 1651 mblk_t *dl_mp = NULL; 1652 uchar_t *haddr; 1653 ip_stack_t *ipst = ill->ill_ipst; 1654 1655 if (DB_TYPE(mp) != M_DATA) { 1656 dl_mp = mp; 1657 mp = mp->b_cont; 1658 } 1659 ns = ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, sizeof (hbuf), sbuf, 1660 sizeof (sbuf), &haddr); 1661 if (haddr != NULL && 1662 bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) { 1663 /* 1664 * Ignore conflicts generated by misbehaving switches that just 1665 * reflect our own messages back to us. 1666 */ 1667 goto ignore_conflict; 1668 } 1669 1670 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1671 1672 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1673 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 1674 &ns->nd_ns_target)) { 1675 continue; 1676 } 1677 1678 /* If it's already marked, then don't do anything. */ 1679 if (ipif->ipif_flags & IPIF_DUPLICATE) 1680 continue; 1681 1682 /* 1683 * If this is a failure during duplicate recovery, then don't 1684 * complain. It may take a long time to recover. 1685 */ 1686 if (!ipif->ipif_was_dup) { 1687 ipif_get_name(ipif, ibuf, sizeof (ibuf)); 1688 cmn_err(CE_WARN, "%s has duplicate address %s (in " 1689 "use by %s); disabled", ibuf, sbuf, hbuf); 1690 } 1691 mutex_enter(&ill->ill_lock); 1692 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1693 ipif->ipif_flags |= IPIF_DUPLICATE; 1694 ill->ill_ipif_dup_count++; 1695 mutex_exit(&ill->ill_lock); 1696 (void) ipif_down(ipif, NULL, NULL); 1697 ipif_down_tail(ipif); 1698 mutex_enter(&ill->ill_lock); 1699 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1700 ill->ill_net_type == IRE_IF_RESOLVER && 1701 !(ipif->ipif_state_flags & (IPIF_MOVING | 1702 IPIF_CONDEMNED)) && 1703 ipst->ips_ip_dup_recovery > 0) { 1704 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1705 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1706 } 1707 mutex_exit(&ill->ill_lock); 1708 } 1709 ignore_conflict: 1710 if (dl_mp != NULL) 1711 freeb(dl_mp); 1712 freemsg(mp); 1713 } 1714 1715 /* 1716 * Handle failure by tearing down the ipifs with the specified address. Note 1717 * that tearing down the ipif also means deleting the nce through ipif_down, so 1718 * it's not possible to do recovery by just restarting the nce timer. Instead, 1719 * we start a timer on the ipif. 1720 */ 1721 static void 1722 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1723 { 1724 if ((mp = copymsg(mp)) != NULL) { 1725 if (dl_mp == NULL) 1726 dl_mp = mp; 1727 else if ((dl_mp = copyb(dl_mp)) != NULL) 1728 dl_mp->b_cont = mp; 1729 if (dl_mp == NULL) { 1730 freemsg(mp); 1731 } else { 1732 ill_refhold(ill); 1733 qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP, 1734 B_FALSE); 1735 } 1736 } 1737 ndp_delete(nce); 1738 } 1739 1740 /* 1741 * Handle a discovered conflict: some other system is advertising that it owns 1742 * one of our IP addresses. We need to defend ourselves, or just shut down the 1743 * interface. 1744 */ 1745 static void 1746 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1747 { 1748 ipif_t *ipif; 1749 uint32_t now; 1750 uint_t maxdefense; 1751 uint_t defs; 1752 ip_stack_t *ipst = ill->ill_ipst; 1753 1754 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1755 NULL, NULL, ipst); 1756 if (ipif == NULL) 1757 return; 1758 /* 1759 * First, figure out if this address is disposable. 1760 */ 1761 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1762 maxdefense = ipst->ips_ip_max_temp_defend; 1763 else 1764 maxdefense = ipst->ips_ip_max_defend; 1765 1766 /* 1767 * Now figure out how many times we've defended ourselves. Ignore 1768 * defenses that happened long in the past. 1769 */ 1770 now = gethrestime_sec(); 1771 mutex_enter(&nce->nce_lock); 1772 if ((defs = nce->nce_defense_count) > 0 && 1773 now - nce->nce_defense_time > ipst->ips_ip_defend_interval) { 1774 nce->nce_defense_count = defs = 0; 1775 } 1776 nce->nce_defense_count++; 1777 nce->nce_defense_time = now; 1778 mutex_exit(&nce->nce_lock); 1779 ipif_refrele(ipif); 1780 1781 /* 1782 * If we've defended ourselves too many times already, then give up and 1783 * tear down the interface(s) using this address. Otherwise, defend by 1784 * sending out an unsolicited Neighbor Advertisement. 1785 */ 1786 if (defs >= maxdefense) { 1787 ip_ndp_failure(ill, mp, dl_mp, nce); 1788 } else { 1789 char hbuf[MAC_STR_LEN]; 1790 char sbuf[INET6_ADDRSTRLEN]; 1791 uchar_t *haddr; 1792 1793 (void) ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, 1794 sizeof (hbuf), sbuf, sizeof (sbuf), &haddr); 1795 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1796 hbuf, sbuf, ill->ill_name); 1797 (void) nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, B_FALSE, 1798 &nce->nce_addr, &ipv6_all_hosts_mcast, 1799 nce_advert_flags(nce)); 1800 } 1801 } 1802 1803 static void 1804 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1805 { 1806 nd_neighbor_solicit_t *ns; 1807 uint32_t hlen = ill->ill_nd_lla_len; 1808 uchar_t *haddr = NULL; 1809 icmp6_t *icmp_nd; 1810 ip6_t *ip6h; 1811 nce_t *our_nce = NULL; 1812 in6_addr_t target; 1813 in6_addr_t src; 1814 int len; 1815 int flag = 0; 1816 nd_opt_hdr_t *opt = NULL; 1817 boolean_t bad_solicit = B_FALSE; 1818 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1819 1820 ip6h = (ip6_t *)mp->b_rptr; 1821 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1822 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1823 src = ip6h->ip6_src; 1824 ns = (nd_neighbor_solicit_t *)icmp_nd; 1825 target = ns->nd_ns_target; 1826 if (IN6_IS_ADDR_MULTICAST(&target)) { 1827 if (ip_debug > 2) { 1828 /* ip1dbg */ 1829 pr_addr_dbg("ndp_input_solicit: Target is" 1830 " multicast! %s\n", AF_INET6, &target); 1831 } 1832 bad_solicit = B_TRUE; 1833 goto done; 1834 } 1835 if (len > sizeof (nd_neighbor_solicit_t)) { 1836 /* Options present */ 1837 opt = (nd_opt_hdr_t *)&ns[1]; 1838 len -= sizeof (nd_neighbor_solicit_t); 1839 if (!ndp_verify_optlen(opt, len)) { 1840 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1841 bad_solicit = B_TRUE; 1842 goto done; 1843 } 1844 } 1845 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1846 /* Check to see if this is a valid DAD solicitation */ 1847 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1848 if (ip_debug > 2) { 1849 /* ip1dbg */ 1850 pr_addr_dbg("ndp_input_solicit: IPv6 " 1851 "Destination is not solicited node " 1852 "multicast %s\n", AF_INET6, 1853 &ip6h->ip6_dst); 1854 } 1855 bad_solicit = B_TRUE; 1856 goto done; 1857 } 1858 } 1859 1860 our_nce = ndp_lookup_v6(ill, &target, B_FALSE); 1861 /* 1862 * If this is a valid Solicitation, a permanent 1863 * entry should exist in the cache 1864 */ 1865 if (our_nce == NULL || 1866 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1867 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1868 "ifname=%s ", ill->ill_name)); 1869 if (ip_debug > 2) { 1870 /* ip1dbg */ 1871 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1872 } 1873 bad_solicit = B_TRUE; 1874 goto done; 1875 } 1876 1877 /* At this point we should have a verified NS per spec */ 1878 if (opt != NULL) { 1879 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1880 if (opt != NULL) { 1881 haddr = (uchar_t *)&opt[1]; 1882 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1883 hlen == 0) { 1884 ip1dbg(("ndp_input_advert: bad SLLA\n")); 1885 bad_solicit = B_TRUE; 1886 goto done; 1887 } 1888 } 1889 } 1890 1891 /* If sending directly to peer, set the unicast flag */ 1892 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 1893 flag |= NDP_UNICAST; 1894 1895 /* 1896 * Create/update the entry for the soliciting node. 1897 * or respond to outstanding queries, don't if 1898 * the source is unspecified address. 1899 */ 1900 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1901 int err; 1902 nce_t *nnce; 1903 1904 ASSERT(ill->ill_isv6); 1905 /* 1906 * Regular solicitations *must* include the Source Link-Layer 1907 * Address option. Ignore messages that do not. 1908 */ 1909 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1910 ip1dbg(("ndp_input_solicit: source link-layer address " 1911 "option missing with a specified source.\n")); 1912 bad_solicit = B_TRUE; 1913 goto done; 1914 } 1915 1916 /* 1917 * This is a regular solicitation. If we're still in the 1918 * process of verifying the address, then don't respond at all 1919 * and don't keep track of the sender. 1920 */ 1921 if (our_nce->nce_state == ND_PROBE) 1922 goto done; 1923 1924 /* 1925 * If the solicitation doesn't have sender hardware address 1926 * (legal for unicast solicitation), then process without 1927 * installing the return NCE. Either we already know it, or 1928 * we'll be forced to look it up when (and if) we reply to the 1929 * packet. 1930 */ 1931 if (haddr == NULL) 1932 goto no_source; 1933 1934 err = ndp_lookup_then_add_v6(ill, 1935 haddr, 1936 &src, /* Soliciting nodes address */ 1937 &ipv6_all_ones, 1938 &ipv6_all_zeros, 1939 0, 1940 0, 1941 ND_STALE, 1942 &nnce); 1943 switch (err) { 1944 case 0: 1945 /* done with this entry */ 1946 NCE_REFRELE(nnce); 1947 break; 1948 case EEXIST: 1949 /* 1950 * B_FALSE indicates this is not an 1951 * an advertisement. 1952 */ 1953 ndp_process(nnce, haddr, 0, B_FALSE); 1954 NCE_REFRELE(nnce); 1955 break; 1956 default: 1957 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1958 err)); 1959 goto done; 1960 } 1961 no_source: 1962 flag |= NDP_SOLICITED; 1963 } else { 1964 /* 1965 * No source link layer address option should be present in a 1966 * valid DAD request. 1967 */ 1968 if (haddr != NULL) { 1969 ip1dbg(("ndp_input_solicit: source link-layer address " 1970 "option present with an unspecified source.\n")); 1971 bad_solicit = B_TRUE; 1972 goto done; 1973 } 1974 if (our_nce->nce_state == ND_PROBE) { 1975 /* 1976 * Internally looped-back probes won't have DLPI 1977 * attached to them. External ones (which are sent by 1978 * multicast) always will. Just ignore our own 1979 * transmissions. 1980 */ 1981 if (dl_mp != NULL) { 1982 /* 1983 * If someone else is probing our address, then 1984 * we've crossed wires. Declare failure. 1985 */ 1986 ip_ndp_failure(ill, mp, dl_mp, our_nce); 1987 } 1988 goto done; 1989 } 1990 /* 1991 * This is a DAD probe. Multicast the advertisement to the 1992 * all-nodes address. 1993 */ 1994 src = ipv6_all_hosts_mcast; 1995 } 1996 flag |= nce_advert_flags(our_nce); 1997 /* Response to a solicitation */ 1998 (void) nce_xmit(ill, 1999 ND_NEIGHBOR_ADVERT, 2000 ill, /* ill to be used for extracting ill_nd_lla */ 2001 B_TRUE, /* use ill_nd_lla */ 2002 &target, /* Source and target of the advertisement pkt */ 2003 &src, /* IP Destination (source of original pkt) */ 2004 flag); 2005 done: 2006 if (bad_solicit) 2007 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2008 if (our_nce != NULL) 2009 NCE_REFRELE(our_nce); 2010 } 2011 2012 void 2013 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2014 { 2015 nd_neighbor_advert_t *na; 2016 uint32_t hlen = ill->ill_nd_lla_len; 2017 uchar_t *haddr = NULL; 2018 icmp6_t *icmp_nd; 2019 ip6_t *ip6h; 2020 nce_t *dst_nce = NULL; 2021 in6_addr_t target; 2022 nd_opt_hdr_t *opt = NULL; 2023 int len; 2024 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2025 ip_stack_t *ipst = ill->ill_ipst; 2026 2027 ip6h = (ip6_t *)mp->b_rptr; 2028 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2029 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2030 na = (nd_neighbor_advert_t *)icmp_nd; 2031 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2032 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2033 ip1dbg(("ndp_input_advert: Target is multicast but the " 2034 "solicited flag is not zero\n")); 2035 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2036 return; 2037 } 2038 target = na->nd_na_target; 2039 if (IN6_IS_ADDR_MULTICAST(&target)) { 2040 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2041 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2042 return; 2043 } 2044 if (len > sizeof (nd_neighbor_advert_t)) { 2045 opt = (nd_opt_hdr_t *)&na[1]; 2046 if (!ndp_verify_optlen(opt, 2047 len - sizeof (nd_neighbor_advert_t))) { 2048 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2049 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2050 return; 2051 } 2052 /* At this point we have a verified NA per spec */ 2053 len -= sizeof (nd_neighbor_advert_t); 2054 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2055 if (opt != NULL) { 2056 haddr = (uchar_t *)&opt[1]; 2057 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2058 hlen == 0) { 2059 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2060 BUMP_MIB(mib, 2061 ipv6IfIcmpInBadNeighborAdvertisements); 2062 return; 2063 } 2064 } 2065 } 2066 2067 /* 2068 * If this interface is part of the group look at all the 2069 * ills in the group. 2070 */ 2071 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2072 if (ill->ill_group != NULL) 2073 ill = ill->ill_group->illgrp_ill; 2074 2075 for (; ill != NULL; ill = ill->ill_group_next) { 2076 mutex_enter(&ill->ill_lock); 2077 if (!ILL_CAN_LOOKUP(ill)) { 2078 mutex_exit(&ill->ill_lock); 2079 continue; 2080 } 2081 ill_refhold_locked(ill); 2082 mutex_exit(&ill->ill_lock); 2083 dst_nce = ndp_lookup_v6(ill, &target, B_FALSE); 2084 /* We have to drop the lock since ndp_process calls put* */ 2085 rw_exit(&ipst->ips_ill_g_lock); 2086 if (dst_nce != NULL) { 2087 if ((dst_nce->nce_flags & NCE_F_PERMANENT) && 2088 dst_nce->nce_state == ND_PROBE) { 2089 /* 2090 * Someone else sent an advertisement for an 2091 * address that we're trying to configure. 2092 * Tear it down. Note that dl_mp might be NULL 2093 * if we're getting a unicast reply. This 2094 * isn't typically done (multicast is the norm 2095 * in response to a probe), but ip_ndp_failure 2096 * will handle the dl_mp == NULL case as well. 2097 */ 2098 ip_ndp_failure(ill, mp, dl_mp, dst_nce); 2099 } else if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2100 /* 2101 * Someone just announced one of our local 2102 * addresses. If it wasn't us, then this is a 2103 * conflict. Defend the address or shut it 2104 * down. 2105 */ 2106 if (dl_mp != NULL && 2107 (haddr == NULL || 2108 nce_cmp_ll_addr(dst_nce, haddr, 2109 ill->ill_nd_lla_len))) { 2110 ip_ndp_conflict(ill, mp, dl_mp, 2111 dst_nce); 2112 } 2113 } else { 2114 if (na->nd_na_flags_reserved & 2115 ND_NA_FLAG_ROUTER) { 2116 dst_nce->nce_flags |= NCE_F_ISROUTER; 2117 } 2118 /* B_TRUE indicates this an advertisement */ 2119 ndp_process(dst_nce, haddr, 2120 na->nd_na_flags_reserved, B_TRUE); 2121 } 2122 NCE_REFRELE(dst_nce); 2123 } 2124 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2125 ill_refrele(ill); 2126 } 2127 rw_exit(&ipst->ips_ill_g_lock); 2128 } 2129 2130 /* 2131 * Process NDP neighbor solicitation/advertisement messages. 2132 * The checksum has already checked o.k before reaching here. 2133 */ 2134 void 2135 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2136 { 2137 icmp6_t *icmp_nd; 2138 ip6_t *ip6h; 2139 int len; 2140 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2141 2142 2143 if (!pullupmsg(mp, -1)) { 2144 ip1dbg(("ndp_input: pullupmsg failed\n")); 2145 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2146 goto done; 2147 } 2148 ip6h = (ip6_t *)mp->b_rptr; 2149 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2150 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2151 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2152 goto done; 2153 } 2154 /* 2155 * NDP does not accept any extension headers between the 2156 * IP header and the ICMP header since e.g. a routing 2157 * header could be dangerous. 2158 * This assumes that any AH or ESP headers are removed 2159 * by ip prior to passing the packet to ndp_input. 2160 */ 2161 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2162 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2163 ip6h->ip6_nxt)); 2164 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2165 goto done; 2166 } 2167 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2168 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2169 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2170 if (icmp_nd->icmp6_code != 0) { 2171 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2172 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2173 goto done; 2174 } 2175 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2176 /* 2177 * Make sure packet length is large enough for either 2178 * a NS or a NA icmp packet. 2179 */ 2180 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2181 ip1dbg(("ndp_input: packet too short\n")); 2182 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2183 goto done; 2184 } 2185 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2186 ndp_input_solicit(ill, mp, dl_mp); 2187 } else { 2188 ndp_input_advert(ill, mp, dl_mp); 2189 } 2190 done: 2191 freemsg(mp); 2192 } 2193 2194 /* 2195 * nce_xmit is called to form and transmit a ND solicitation or 2196 * advertisement ICMP packet. 2197 * 2198 * If the source address is unspecified and this isn't a probe (used for 2199 * duplicate address detection), an appropriate source address and link layer 2200 * address will be chosen here. The link layer address option is included if 2201 * the source is specified (i.e., all non-probe packets), and omitted (per the 2202 * specification) otherwise. 2203 * 2204 * It returns B_FALSE only if it does a successful put() to the 2205 * corresponding ill's ill_wq otherwise returns B_TRUE. 2206 */ 2207 static boolean_t 2208 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 2209 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 2210 int flag) 2211 { 2212 uint32_t len; 2213 icmp6_t *icmp6; 2214 mblk_t *mp; 2215 ip6_t *ip6h; 2216 nd_opt_hdr_t *opt; 2217 uint_t plen; 2218 ip6i_t *ip6i; 2219 ipif_t *src_ipif = NULL; 2220 uint8_t *hw_addr; 2221 zoneid_t zoneid = GLOBAL_ZONEID; 2222 2223 /* 2224 * If we have a unspecified source(sender) address, select a 2225 * proper source address for the solicitation here itself so 2226 * that we can initialize the h/w address correctly. This is 2227 * needed for interface groups as source address can come from 2228 * the whole group and the h/w address initialized from ill will 2229 * be wrong if the source address comes from a different ill. 2230 * 2231 * If the sender is specified then we use this address in order 2232 * to lookup the zoneid before calling ip_output_v6(). This is to 2233 * enable unicast ND_NEIGHBOR_ADVERT packets to be routed correctly 2234 * by IP (we cannot guarantee that the global zone has an interface 2235 * route to the destination). 2236 * 2237 * Note that the NA never comes here with the unspecified source 2238 * address. The following asserts that whenever the source 2239 * address is specified, the haddr also should be specified. 2240 */ 2241 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 2242 2243 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2244 ASSERT(operation != ND_NEIGHBOR_ADVERT); 2245 /* 2246 * Pick a source address for this solicitation, but 2247 * restrict the selection to addresses assigned to the 2248 * output interface (or interface group). We do this 2249 * because the destination will create a neighbor cache 2250 * entry for the source address of this packet, so the 2251 * source address had better be a valid neighbor. 2252 */ 2253 src_ipif = ipif_select_source_v6(ill, target, RESTRICT_TO_ILL, 2254 IPV6_PREFER_SRC_DEFAULT, ALL_ZONES); 2255 if (src_ipif == NULL) { 2256 char buf[INET6_ADDRSTRLEN]; 2257 2258 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2259 inet_ntop(AF_INET6, (char *)target, buf, 2260 sizeof (buf)))); 2261 return (B_TRUE); 2262 } 2263 sender = &src_ipif->ipif_v6src_addr; 2264 hwaddr_ill = src_ipif->ipif_ill; 2265 } else if (!(IN6_IS_ADDR_UNSPECIFIED(sender))) { 2266 zoneid = ipif_lookup_addr_zoneid_v6(sender, ill, ill->ill_ipst); 2267 /* 2268 * It's possible for ipif_lookup_addr_zoneid_v6() to return 2269 * ALL_ZONES if it cannot find a matching ipif for the address 2270 * we are trying to use. In this case we err on the side of 2271 * trying to send the packet by defaulting to the GLOBAL_ZONEID. 2272 */ 2273 if (zoneid == ALL_ZONES) 2274 zoneid = GLOBAL_ZONEID; 2275 } 2276 2277 /* 2278 * Always make sure that the NS/NA packets don't get load 2279 * spread. This is needed so that the probe packets sent 2280 * by the in.mpathd daemon can really go out on the desired 2281 * interface. Probe packets are made to go out on a desired 2282 * interface by including a ip6i with ATTACH_IF flag. As these 2283 * packets indirectly end up sending/receiving NS/NA packets 2284 * (neighbor doing NUD), we have to make sure that NA 2285 * also go out on the same interface. 2286 */ 2287 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7) / 8; 2288 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2289 plen * 8; 2290 mp = allocb(len, BPRI_LO); 2291 if (mp == NULL) { 2292 if (src_ipif != NULL) 2293 ipif_refrele(src_ipif); 2294 return (B_TRUE); 2295 } 2296 bzero((char *)mp->b_rptr, len); 2297 mp->b_wptr = mp->b_rptr + len; 2298 2299 ip6i = (ip6i_t *)mp->b_rptr; 2300 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2301 ip6i->ip6i_nxt = IPPROTO_RAW; 2302 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2303 if (flag & NDP_PROBE) 2304 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2305 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2306 2307 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2308 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2309 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2310 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2311 ip6h->ip6_hops = IPV6_MAX_HOPS; 2312 ip6h->ip6_dst = *target; 2313 icmp6 = (icmp6_t *)&ip6h[1]; 2314 2315 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2316 sizeof (nd_neighbor_advert_t)); 2317 2318 if (operation == ND_NEIGHBOR_SOLICIT) { 2319 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2320 2321 if (!(flag & NDP_PROBE)) 2322 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2323 ip6h->ip6_src = *sender; 2324 ns->nd_ns_target = *target; 2325 if (!(flag & NDP_UNICAST)) { 2326 /* Form multicast address of the target */ 2327 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2328 ip6h->ip6_dst.s6_addr32[3] |= 2329 ns->nd_ns_target.s6_addr32[3]; 2330 } 2331 } else { 2332 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2333 2334 ASSERT(!(flag & NDP_PROBE)); 2335 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2336 ip6h->ip6_src = *sender; 2337 na->nd_na_target = *sender; 2338 if (flag & NDP_ISROUTER) 2339 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2340 if (flag & NDP_SOLICITED) 2341 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2342 if (flag & NDP_ORIDE) 2343 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2344 } 2345 2346 hw_addr = NULL; 2347 if (!(flag & NDP_PROBE)) { 2348 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2349 hwaddr_ill->ill_phys_addr; 2350 if (hw_addr != NULL) { 2351 /* Fill in link layer address and option len */ 2352 opt->nd_opt_len = (uint8_t)plen; 2353 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2354 } 2355 } 2356 if (hw_addr == NULL) { 2357 /* If there's no link layer address option, then strip it. */ 2358 len -= plen * 8; 2359 mp->b_wptr = mp->b_rptr + len; 2360 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2361 } 2362 2363 icmp6->icmp6_type = (uint8_t)operation; 2364 icmp6->icmp6_code = 0; 2365 /* 2366 * Prepare for checksum by putting icmp length in the icmp 2367 * checksum field. The checksum is calculated in ip_wput_v6. 2368 */ 2369 icmp6->icmp6_cksum = ip6h->ip6_plen; 2370 2371 if (src_ipif != NULL) 2372 ipif_refrele(src_ipif); 2373 2374 ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT); 2375 return (B_FALSE); 2376 } 2377 2378 /* 2379 * Make a link layer address (does not include the SAP) from an nce. 2380 * To form the link layer address, use the last four bytes of ipv6 2381 * address passed in and the fixed offset stored in nce. 2382 */ 2383 static void 2384 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2385 { 2386 uchar_t *mask, *to; 2387 ill_t *ill = nce->nce_ill; 2388 int len; 2389 2390 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2391 return; 2392 ASSERT(nce->nce_res_mp != NULL); 2393 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2394 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2395 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2396 ASSERT(addr != NULL); 2397 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2398 addrpos, ill->ill_nd_lla_len); 2399 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2400 IPV6_ADDR_LEN); 2401 mask = (uchar_t *)&nce->nce_extract_mask; 2402 mask += (IPV6_ADDR_LEN - len); 2403 addr += (IPV6_ADDR_LEN - len); 2404 to = addrpos + nce->nce_ll_extract_start; 2405 while (len-- > 0) 2406 *to++ |= *mask++ & *addr++; 2407 } 2408 2409 mblk_t * 2410 nce_udreq_alloc(ill_t *ill) 2411 { 2412 mblk_t *template_mp = NULL; 2413 dl_unitdata_req_t *dlur; 2414 int sap_length; 2415 2416 ASSERT(ill->ill_isv6); 2417 2418 sap_length = ill->ill_sap_length; 2419 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2420 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2421 if (template_mp == NULL) 2422 return (NULL); 2423 2424 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2425 dlur->dl_priority.dl_min = 0; 2426 dlur->dl_priority.dl_max = 0; 2427 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2428 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2429 2430 /* Copy in the SAP value. */ 2431 NCE_LL_SAP_COPY(ill, template_mp); 2432 2433 return (template_mp); 2434 } 2435 2436 /* 2437 * NDP retransmit timer. 2438 * This timer goes off when: 2439 * a. It is time to retransmit NS for resolver. 2440 * b. It is time to send reachability probes. 2441 */ 2442 void 2443 ndp_timer(void *arg) 2444 { 2445 nce_t *nce = arg; 2446 ill_t *ill = nce->nce_ill; 2447 uint32_t ms; 2448 char addrbuf[INET6_ADDRSTRLEN]; 2449 mblk_t *mp; 2450 boolean_t dropped = B_FALSE; 2451 ip_stack_t *ipst = ill->ill_ipst; 2452 2453 /* 2454 * The timer has to be cancelled by ndp_delete before doing the final 2455 * refrele. So the NCE is guaranteed to exist when the timer runs 2456 * until it clears the timeout_id. Before clearing the timeout_id 2457 * bump up the refcnt so that we can continue to use the nce 2458 */ 2459 ASSERT(nce != NULL); 2460 2461 /* 2462 * Grab the ill_g_lock now itself to avoid lock order problems. 2463 * nce_solicit needs ill_g_lock to be able to traverse ills 2464 */ 2465 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2466 mutex_enter(&nce->nce_lock); 2467 NCE_REFHOLD_LOCKED(nce); 2468 nce->nce_timeout_id = 0; 2469 2470 /* 2471 * Check the reachability state first. 2472 */ 2473 switch (nce->nce_state) { 2474 case ND_DELAY: 2475 rw_exit(&ipst->ips_ill_g_lock); 2476 nce->nce_state = ND_PROBE; 2477 mutex_exit(&nce->nce_lock); 2478 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2479 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2480 if (ip_debug > 3) { 2481 /* ip2dbg */ 2482 pr_addr_dbg("ndp_timer: state for %s changed " 2483 "to PROBE\n", AF_INET6, &nce->nce_addr); 2484 } 2485 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2486 NCE_REFRELE(nce); 2487 return; 2488 case ND_PROBE: 2489 /* must be retransmit timer */ 2490 rw_exit(&ipst->ips_ill_g_lock); 2491 nce->nce_pcnt--; 2492 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2493 nce->nce_pcnt >= -1); 2494 if (nce->nce_pcnt > 0) { 2495 /* 2496 * As per RFC2461, the nce gets deleted after 2497 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2498 * Note that the first unicast solicitation is sent 2499 * during the DELAY state. 2500 */ 2501 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2502 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2503 addrbuf, sizeof (addrbuf)))); 2504 mutex_exit(&nce->nce_lock); 2505 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, 2506 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, 2507 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2508 NDP_UNICAST); 2509 if (dropped) { 2510 mutex_enter(&nce->nce_lock); 2511 nce->nce_pcnt++; 2512 mutex_exit(&nce->nce_lock); 2513 } 2514 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2515 } else if (nce->nce_pcnt < 0) { 2516 /* No hope, delete the nce */ 2517 nce->nce_state = ND_UNREACHABLE; 2518 mutex_exit(&nce->nce_lock); 2519 if (ip_debug > 2) { 2520 /* ip1dbg */ 2521 pr_addr_dbg("ndp_timer: Delete IRE for" 2522 " dst %s\n", AF_INET6, &nce->nce_addr); 2523 } 2524 ndp_delete(nce); 2525 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2526 /* Wait RetransTimer, before deleting the entry */ 2527 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2528 nce->nce_pcnt, inet_ntop(AF_INET6, 2529 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2530 mutex_exit(&nce->nce_lock); 2531 /* Wait one interval before killing */ 2532 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2533 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2534 ipif_t *ipif; 2535 2536 /* 2537 * We're done probing, and we can now declare this 2538 * address to be usable. Let IP know that it's ok to 2539 * use. 2540 */ 2541 nce->nce_state = ND_REACHABLE; 2542 mutex_exit(&nce->nce_lock); 2543 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, 2544 ALL_ZONES, NULL, NULL, NULL, NULL, ipst); 2545 if (ipif != NULL) { 2546 if (ipif->ipif_was_dup) { 2547 char ibuf[LIFNAMSIZ + 10]; 2548 char sbuf[INET6_ADDRSTRLEN]; 2549 2550 ipif->ipif_was_dup = B_FALSE; 2551 (void) inet_ntop(AF_INET6, 2552 &ipif->ipif_v6lcl_addr, 2553 sbuf, sizeof (sbuf)); 2554 ipif_get_name(ipif, ibuf, 2555 sizeof (ibuf)); 2556 cmn_err(CE_NOTE, "recovered address " 2557 "%s on %s", sbuf, ibuf); 2558 } 2559 if ((ipif->ipif_flags & IPIF_UP) && 2560 !ipif->ipif_addr_ready) { 2561 ip_rts_ifmsg(ipif); 2562 ip_rts_newaddrmsg(RTM_ADD, 0, ipif); 2563 sctp_update_ipif(ipif, SCTP_IPIF_UP); 2564 } 2565 ipif->ipif_addr_ready = 1; 2566 ipif_refrele(ipif); 2567 } 2568 /* Begin defending our new address */ 2569 nce->nce_unsolicit_count = 0; 2570 dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, 2571 B_FALSE, &nce->nce_addr, &ipv6_all_hosts_mcast, 2572 nce_advert_flags(nce)); 2573 if (dropped) { 2574 nce->nce_unsolicit_count = 1; 2575 NDP_RESTART_TIMER(nce, 2576 ipst->ips_ip_ndp_unsolicit_interval); 2577 } else if (ipst->ips_ip_ndp_defense_interval != 0) { 2578 NDP_RESTART_TIMER(nce, 2579 ipst->ips_ip_ndp_defense_interval); 2580 } 2581 } else { 2582 /* 2583 * This is an address we're probing to be our own, but 2584 * the ill is down. Wait until it comes back before 2585 * doing anything, but switch to reachable state so 2586 * that the restart will work. 2587 */ 2588 nce->nce_state = ND_REACHABLE; 2589 mutex_exit(&nce->nce_lock); 2590 } 2591 NCE_REFRELE(nce); 2592 return; 2593 case ND_INCOMPLETE: 2594 /* 2595 * Must be resolvers retransmit timer. 2596 */ 2597 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2598 ip6i_t *ip6i; 2599 ip6_t *ip6h; 2600 mblk_t *data_mp; 2601 2602 /* 2603 * Walk the list of packets queued, and see if there 2604 * are any multipathing probe packets. Such packets 2605 * are always queued at the head. Since this is a 2606 * retransmit timer firing, mark such packets as 2607 * delayed in ND resolution. This info will be used 2608 * in ip_wput_v6(). Multipathing probe packets will 2609 * always have an ip6i_t. Once we hit a packet without 2610 * it, we can break out of this loop. 2611 */ 2612 if (mp->b_datap->db_type == M_CTL) 2613 data_mp = mp->b_cont; 2614 else 2615 data_mp = mp; 2616 2617 ip6h = (ip6_t *)data_mp->b_rptr; 2618 if (ip6h->ip6_nxt != IPPROTO_RAW) 2619 break; 2620 2621 /* 2622 * This message should have been pulled up already in 2623 * ip_wput_v6. We can't do pullups here because the 2624 * b_next/b_prev is non-NULL. 2625 */ 2626 ip6i = (ip6i_t *)ip6h; 2627 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2628 sizeof (ip6i_t) + IPV6_HDR_LEN); 2629 2630 /* Mark this packet as delayed due to ND resolution */ 2631 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2632 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2633 } 2634 if (nce->nce_qd_mp != NULL) { 2635 ms = nce_solicit(nce, NULL); 2636 rw_exit(&ipst->ips_ill_g_lock); 2637 if (ms == 0) { 2638 if (nce->nce_state != ND_REACHABLE) { 2639 mutex_exit(&nce->nce_lock); 2640 nce_resolv_failed(nce); 2641 ndp_delete(nce); 2642 } else { 2643 mutex_exit(&nce->nce_lock); 2644 } 2645 } else { 2646 mutex_exit(&nce->nce_lock); 2647 NDP_RESTART_TIMER(nce, (clock_t)ms); 2648 } 2649 NCE_REFRELE(nce); 2650 return; 2651 } 2652 mutex_exit(&nce->nce_lock); 2653 rw_exit(&ipst->ips_ill_g_lock); 2654 NCE_REFRELE(nce); 2655 break; 2656 case ND_REACHABLE : 2657 rw_exit(&ipst->ips_ill_g_lock); 2658 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2659 nce->nce_unsolicit_count != 0) || 2660 ((nce->nce_flags & NCE_F_PERMANENT) && 2661 ipst->ips_ip_ndp_defense_interval != 0)) { 2662 if (nce->nce_unsolicit_count > 0) 2663 nce->nce_unsolicit_count--; 2664 mutex_exit(&nce->nce_lock); 2665 dropped = nce_xmit(ill, 2666 ND_NEIGHBOR_ADVERT, 2667 ill, /* ill to be used for hw addr */ 2668 B_FALSE, /* use ill_phys_addr */ 2669 &nce->nce_addr, 2670 &ipv6_all_hosts_mcast, 2671 nce_advert_flags(nce)); 2672 if (dropped) { 2673 mutex_enter(&nce->nce_lock); 2674 nce->nce_unsolicit_count++; 2675 mutex_exit(&nce->nce_lock); 2676 } 2677 if (nce->nce_unsolicit_count != 0) { 2678 NDP_RESTART_TIMER(nce, 2679 ipst->ips_ip_ndp_unsolicit_interval); 2680 } else { 2681 NDP_RESTART_TIMER(nce, 2682 ipst->ips_ip_ndp_defense_interval); 2683 } 2684 } else { 2685 mutex_exit(&nce->nce_lock); 2686 } 2687 NCE_REFRELE(nce); 2688 break; 2689 default: 2690 rw_exit(&ipst->ips_ill_g_lock); 2691 mutex_exit(&nce->nce_lock); 2692 NCE_REFRELE(nce); 2693 break; 2694 } 2695 } 2696 2697 /* 2698 * Set a link layer address from the ll_addr passed in. 2699 * Copy SAP from ill. 2700 */ 2701 static void 2702 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2703 { 2704 ill_t *ill = nce->nce_ill; 2705 uchar_t *woffset; 2706 2707 ASSERT(ll_addr != NULL); 2708 /* Always called before fast_path_probe */ 2709 ASSERT(nce->nce_fp_mp == NULL); 2710 if (ill->ill_sap_length != 0) { 2711 /* 2712 * Copy the SAP type specified in the 2713 * request into the xmit template. 2714 */ 2715 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2716 } 2717 if (ill->ill_phys_addr_length > 0) { 2718 /* 2719 * The bcopy() below used to be called for the physical address 2720 * length rather than the link layer address length. For 2721 * ethernet and many other media, the phys_addr and lla are 2722 * identical. 2723 * However, with xresolv interfaces being introduced, the 2724 * phys_addr and lla are no longer the same, and the physical 2725 * address may not have any useful meaning, so we use the lla 2726 * for IPv6 address resolution and destination addressing. 2727 * 2728 * For PPP or other interfaces with a zero length 2729 * physical address, don't do anything here. 2730 * The bcopy() with a zero phys_addr length was previously 2731 * a no-op for interfaces with a zero-length physical address. 2732 * Using the lla for them would change the way they operate. 2733 * Doing nothing in such cases preserves expected behavior. 2734 */ 2735 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2736 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2737 } 2738 } 2739 2740 static boolean_t 2741 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2742 { 2743 ill_t *ill = nce->nce_ill; 2744 uchar_t *ll_offset; 2745 2746 ASSERT(nce->nce_res_mp != NULL); 2747 if (ll_addr == NULL) 2748 return (B_FALSE); 2749 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2750 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2751 return (B_TRUE); 2752 return (B_FALSE); 2753 } 2754 2755 /* 2756 * Updates the link layer address or the reachability state of 2757 * a cache entry. Reset probe counter if needed. 2758 */ 2759 static void 2760 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2761 { 2762 ill_t *ill = nce->nce_ill; 2763 boolean_t need_stop_timer = B_FALSE; 2764 boolean_t need_fastpath_update = B_FALSE; 2765 2766 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2767 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2768 /* 2769 * If this interface does not do NUD, there is no point 2770 * in allowing an update to the cache entry. Although 2771 * we will respond to NS. 2772 * The only time we accept an update for a resolver when 2773 * NUD is turned off is when it has just been created. 2774 * Non-Resolvers will always be created as REACHABLE. 2775 */ 2776 if (new_state != ND_UNCHANGED) { 2777 if ((nce->nce_flags & NCE_F_NONUD) && 2778 (nce->nce_state != ND_INCOMPLETE)) 2779 return; 2780 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2781 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2782 need_stop_timer = B_TRUE; 2783 if (new_state == ND_REACHABLE) 2784 nce->nce_last = TICK_TO_MSEC(lbolt64); 2785 else { 2786 /* We force NUD in this case */ 2787 nce->nce_last = 0; 2788 } 2789 nce->nce_state = new_state; 2790 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2791 } 2792 /* 2793 * In case of fast path we need to free the the fastpath 2794 * M_DATA and do another probe. Otherwise we can just 2795 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2796 * whatever packets that happens to be transmitting at the time. 2797 */ 2798 if (new_ll_addr != NULL) { 2799 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2800 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2801 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2802 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2803 if (nce->nce_fp_mp != NULL) { 2804 freemsg(nce->nce_fp_mp); 2805 nce->nce_fp_mp = NULL; 2806 } 2807 need_fastpath_update = B_TRUE; 2808 } 2809 mutex_exit(&nce->nce_lock); 2810 if (need_stop_timer) { 2811 (void) untimeout(nce->nce_timeout_id); 2812 nce->nce_timeout_id = 0; 2813 } 2814 if (need_fastpath_update) 2815 nce_fastpath(nce); 2816 mutex_enter(&nce->nce_lock); 2817 } 2818 2819 void 2820 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2821 { 2822 uint_t count = 0; 2823 mblk_t **mpp; 2824 2825 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2826 2827 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2828 mpp = &(*mpp)->b_next) { 2829 if (++count > 2830 nce->nce_ill->ill_max_buf) { 2831 mblk_t *tmp = nce->nce_qd_mp->b_next; 2832 2833 nce->nce_qd_mp->b_next = NULL; 2834 nce->nce_qd_mp->b_prev = NULL; 2835 freemsg(nce->nce_qd_mp); 2836 nce->nce_qd_mp = tmp; 2837 } 2838 } 2839 /* put this on the list */ 2840 if (head_insert) { 2841 mp->b_next = nce->nce_qd_mp; 2842 nce->nce_qd_mp = mp; 2843 } else { 2844 *mpp = mp; 2845 } 2846 } 2847 2848 static void 2849 nce_queue_mp(nce_t *nce, mblk_t *mp) 2850 { 2851 boolean_t head_insert = B_FALSE; 2852 ip6_t *ip6h; 2853 ip6i_t *ip6i; 2854 mblk_t *data_mp; 2855 2856 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2857 2858 if (mp->b_datap->db_type == M_CTL) 2859 data_mp = mp->b_cont; 2860 else 2861 data_mp = mp; 2862 ip6h = (ip6_t *)data_mp->b_rptr; 2863 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2864 /* 2865 * This message should have been pulled up already in 2866 * ip_wput_v6. We can't do pullups here because the message 2867 * could be from the nce_qd_mp which could have b_next/b_prev 2868 * non-NULL. 2869 */ 2870 ip6i = (ip6i_t *)ip6h; 2871 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2872 sizeof (ip6i_t) + IPV6_HDR_LEN); 2873 /* 2874 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 2875 * This has 2 aspects mentioned below. 2876 * 1. Perform head insertion in the nce_qd_mp for these packets. 2877 * This ensures that next retransmit of ND solicitation 2878 * will use the interface specified by the probe packet, 2879 * for both NS and NA. This corresponds to the src address 2880 * in the IPv6 packet. If we insert at tail, we will be 2881 * depending on the packet at the head for successful 2882 * ND resolution. This is not reliable, because the interface 2883 * on which the NA arrives could be different from the interface 2884 * on which the NS was sent, and if the receiving interface is 2885 * failed, it will appear that the sending interface is also 2886 * failed, causing in.mpathd to misdiagnose this as link 2887 * failure. 2888 * 2. Drop the original packet, if the ND resolution did not 2889 * succeed in the first attempt. However we will create the 2890 * nce and the ire, as soon as the ND resolution succeeds. 2891 * We don't gain anything by queueing multiple probe packets 2892 * and sending them back-to-back once resolution succeeds. 2893 * It is sufficient to send just 1 packet after ND resolution 2894 * succeeds. Since mpathd is sending down probe packets at a 2895 * constant rate, we don't need to send the queued packet. We 2896 * need to queue it only for NDP resolution. The benefit of 2897 * dropping the probe packets that were delayed in ND 2898 * resolution, is that in.mpathd will not see inflated 2899 * RTT. If the ND resolution does not succeed within 2900 * in.mpathd's failure detection time, mpathd may detect 2901 * a failure, and it does not matter whether the packet 2902 * was queued or dropped. 2903 */ 2904 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2905 head_insert = B_TRUE; 2906 } 2907 2908 nce_queue_mp_common(nce, mp, head_insert); 2909 } 2910 2911 /* 2912 * Called when address resolution failed due to a timeout. 2913 * Send an ICMP unreachable in response to all queued packets. 2914 */ 2915 void 2916 nce_resolv_failed(nce_t *nce) 2917 { 2918 mblk_t *mp, *nxt_mp, *first_mp; 2919 char buf[INET6_ADDRSTRLEN]; 2920 ip6_t *ip6h; 2921 zoneid_t zoneid = GLOBAL_ZONEID; 2922 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 2923 2924 ip1dbg(("nce_resolv_failed: dst %s\n", 2925 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 2926 mutex_enter(&nce->nce_lock); 2927 mp = nce->nce_qd_mp; 2928 nce->nce_qd_mp = NULL; 2929 mutex_exit(&nce->nce_lock); 2930 while (mp != NULL) { 2931 nxt_mp = mp->b_next; 2932 mp->b_next = NULL; 2933 mp->b_prev = NULL; 2934 2935 first_mp = mp; 2936 if (mp->b_datap->db_type == M_CTL) { 2937 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 2938 ASSERT(io->ipsec_out_type == IPSEC_OUT); 2939 zoneid = io->ipsec_out_zoneid; 2940 ASSERT(zoneid != ALL_ZONES); 2941 mp = mp->b_cont; 2942 } 2943 2944 ip6h = (ip6_t *)mp->b_rptr; 2945 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2946 ip6i_t *ip6i; 2947 /* 2948 * This message should have been pulled up already 2949 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 2950 * the header is pulled up. 2951 */ 2952 ip6i = (ip6i_t *)ip6h; 2953 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 2954 sizeof (ip6i_t) + IPV6_HDR_LEN); 2955 mp->b_rptr += sizeof (ip6i_t); 2956 } 2957 /* 2958 * Ignore failure since icmp_unreachable_v6 will silently 2959 * drop packets with an unspecified source address. 2960 */ 2961 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst); 2962 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 2963 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst); 2964 mp = nxt_mp; 2965 } 2966 } 2967 2968 /* 2969 * Called by SIOCSNDP* ioctl to add/change an nce entry 2970 * and the corresponding attributes. 2971 * Disallow states other than ND_REACHABLE or ND_STALE. 2972 */ 2973 int 2974 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 2975 { 2976 sin6_t *sin6; 2977 in6_addr_t *addr; 2978 nce_t *nce; 2979 int err; 2980 uint16_t new_flags = 0; 2981 uint16_t old_flags = 0; 2982 int inflags = lnr->lnr_flags; 2983 ip_stack_t *ipst = ill->ill_ipst; 2984 2985 ASSERT(ill->ill_isv6); 2986 if ((lnr->lnr_state_create != ND_REACHABLE) && 2987 (lnr->lnr_state_create != ND_STALE)) 2988 return (EINVAL); 2989 2990 sin6 = (sin6_t *)&lnr->lnr_addr; 2991 addr = &sin6->sin6_addr; 2992 2993 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 2994 /* We know it can not be mapping so just look in the hash table */ 2995 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 2996 nce = nce_lookup_addr(ill, addr, nce); 2997 if (nce != NULL) 2998 new_flags = nce->nce_flags; 2999 3000 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 3001 case NDF_ISROUTER_ON: 3002 new_flags |= NCE_F_ISROUTER; 3003 break; 3004 case NDF_ISROUTER_OFF: 3005 new_flags &= ~NCE_F_ISROUTER; 3006 break; 3007 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3008 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3009 if (nce != NULL) 3010 NCE_REFRELE(nce); 3011 return (EINVAL); 3012 } 3013 3014 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3015 case NDF_ANYCAST_ON: 3016 new_flags |= NCE_F_ANYCAST; 3017 break; 3018 case NDF_ANYCAST_OFF: 3019 new_flags &= ~NCE_F_ANYCAST; 3020 break; 3021 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3022 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3023 if (nce != NULL) 3024 NCE_REFRELE(nce); 3025 return (EINVAL); 3026 } 3027 3028 if (nce == NULL) { 3029 err = ndp_add_v6(ill, 3030 (uchar_t *)lnr->lnr_hdw_addr, 3031 addr, 3032 &ipv6_all_ones, 3033 &ipv6_all_zeros, 3034 0, 3035 new_flags, 3036 lnr->lnr_state_create, 3037 &nce); 3038 if (err != 0) { 3039 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3040 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3041 return (err); 3042 } 3043 } 3044 old_flags = nce->nce_flags; 3045 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3046 /* 3047 * Router turned to host, delete all ires. 3048 * XXX Just delete the entry, but we need to add too. 3049 */ 3050 nce->nce_flags &= ~NCE_F_ISROUTER; 3051 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3052 ndp_delete(nce); 3053 NCE_REFRELE(nce); 3054 return (0); 3055 } 3056 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3057 3058 mutex_enter(&nce->nce_lock); 3059 nce->nce_flags = new_flags; 3060 mutex_exit(&nce->nce_lock); 3061 /* 3062 * Note that we ignore the state at this point, which 3063 * should be either STALE or REACHABLE. Instead we let 3064 * the link layer address passed in to determine the state 3065 * much like incoming packets. 3066 */ 3067 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3068 NCE_REFRELE(nce); 3069 return (0); 3070 } 3071 3072 /* 3073 * If the device driver supports it, we make nce_fp_mp to have 3074 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3075 * The caller ensures there is hold on nce for this function. 3076 * Note that since ill_fastpath_probe() copies the mblk there is 3077 * no need for the hold beyond this function. 3078 */ 3079 void 3080 nce_fastpath(nce_t *nce) 3081 { 3082 ill_t *ill = nce->nce_ill; 3083 int res; 3084 3085 ASSERT(ill != NULL); 3086 ASSERT(nce->nce_state != ND_INITIAL && nce->nce_state != ND_INCOMPLETE); 3087 3088 if (nce->nce_fp_mp != NULL) { 3089 /* Already contains fastpath info */ 3090 return; 3091 } 3092 if (nce->nce_res_mp != NULL) { 3093 nce_fastpath_list_add(nce); 3094 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3095 /* 3096 * EAGAIN is an indication of a transient error 3097 * i.e. allocation failure etc. leave the nce in the list it 3098 * will be updated when another probe happens for another ire 3099 * if not it will be taken out of the list when the ire is 3100 * deleted. 3101 */ 3102 3103 if (res != 0 && res != EAGAIN) 3104 nce_fastpath_list_delete(nce); 3105 } 3106 } 3107 3108 /* 3109 * Drain the list of nce's waiting for fastpath response. 3110 */ 3111 void 3112 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3113 void *arg) 3114 { 3115 3116 nce_t *next_nce; 3117 nce_t *current_nce; 3118 nce_t *first_nce; 3119 nce_t *prev_nce = NULL; 3120 3121 mutex_enter(&ill->ill_lock); 3122 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3123 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3124 next_nce = current_nce->nce_fastpath; 3125 /* 3126 * Take it off the list if we're flushing, or if the callback 3127 * routine tells us to do so. Otherwise, leave the nce in the 3128 * fastpath list to handle any pending response from the lower 3129 * layer. We can't drain the list when the callback routine 3130 * comparison failed, because the response is asynchronous in 3131 * nature, and may not arrive in the same order as the list 3132 * insertion. 3133 */ 3134 if (func == NULL || func(current_nce, arg)) { 3135 current_nce->nce_fastpath = NULL; 3136 if (current_nce == first_nce) 3137 ill->ill_fastpath_list = first_nce = next_nce; 3138 else 3139 prev_nce->nce_fastpath = next_nce; 3140 } else { 3141 /* previous element that is still in the list */ 3142 prev_nce = current_nce; 3143 } 3144 current_nce = next_nce; 3145 } 3146 mutex_exit(&ill->ill_lock); 3147 } 3148 3149 /* 3150 * Add nce to the nce fastpath list. 3151 */ 3152 void 3153 nce_fastpath_list_add(nce_t *nce) 3154 { 3155 ill_t *ill; 3156 3157 ill = nce->nce_ill; 3158 3159 mutex_enter(&ill->ill_lock); 3160 mutex_enter(&nce->nce_lock); 3161 3162 /* 3163 * if nce has not been deleted and 3164 * is not already in the list add it. 3165 */ 3166 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3167 (nce->nce_fastpath == NULL)) { 3168 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3169 ill->ill_fastpath_list = nce; 3170 } 3171 3172 mutex_exit(&nce->nce_lock); 3173 mutex_exit(&ill->ill_lock); 3174 } 3175 3176 /* 3177 * remove nce from the nce fastpath list. 3178 */ 3179 void 3180 nce_fastpath_list_delete(nce_t *nce) 3181 { 3182 nce_t *nce_ptr; 3183 3184 ill_t *ill; 3185 3186 ill = nce->nce_ill; 3187 ASSERT(ill != NULL); 3188 3189 mutex_enter(&ill->ill_lock); 3190 if (nce->nce_fastpath == NULL) 3191 goto done; 3192 3193 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3194 3195 if (ill->ill_fastpath_list == nce) { 3196 ill->ill_fastpath_list = nce->nce_fastpath; 3197 } else { 3198 nce_ptr = ill->ill_fastpath_list; 3199 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3200 if (nce_ptr->nce_fastpath == nce) { 3201 nce_ptr->nce_fastpath = nce->nce_fastpath; 3202 break; 3203 } 3204 nce_ptr = nce_ptr->nce_fastpath; 3205 } 3206 } 3207 3208 nce->nce_fastpath = NULL; 3209 done: 3210 mutex_exit(&ill->ill_lock); 3211 } 3212 3213 /* 3214 * Update all NCE's that are not in fastpath mode and 3215 * have an nce_fp_mp that matches mp. mp->b_cont contains 3216 * the fastpath header. 3217 * 3218 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3219 */ 3220 boolean_t 3221 ndp_fastpath_update(nce_t *nce, void *arg) 3222 { 3223 mblk_t *mp, *fp_mp; 3224 uchar_t *mp_rptr, *ud_mp_rptr; 3225 mblk_t *ud_mp = nce->nce_res_mp; 3226 ptrdiff_t cmplen; 3227 3228 if (nce->nce_flags & NCE_F_MAPPING) 3229 return (B_TRUE); 3230 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3231 return (B_TRUE); 3232 3233 ip2dbg(("ndp_fastpath_update: trying\n")); 3234 mp = (mblk_t *)arg; 3235 mp_rptr = mp->b_rptr; 3236 cmplen = mp->b_wptr - mp_rptr; 3237 ASSERT(cmplen >= 0); 3238 ud_mp_rptr = ud_mp->b_rptr; 3239 /* 3240 * The nce is locked here to prevent any other threads 3241 * from accessing and changing nce_res_mp when the IPv6 address 3242 * becomes resolved to an lla while we're in the middle 3243 * of looking at and comparing the hardware address (lla). 3244 * It is also locked to prevent multiple threads in nce_fastpath_update 3245 * from examining nce_res_mp atthe same time. 3246 */ 3247 mutex_enter(&nce->nce_lock); 3248 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3249 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3250 mutex_exit(&nce->nce_lock); 3251 /* 3252 * Don't take the ire off the fastpath list yet, 3253 * since the response may come later. 3254 */ 3255 return (B_FALSE); 3256 } 3257 /* Matched - install mp as the fastpath mp */ 3258 ip1dbg(("ndp_fastpath_update: match\n")); 3259 fp_mp = dupb(mp->b_cont); 3260 if (fp_mp != NULL) { 3261 nce->nce_fp_mp = fp_mp; 3262 } 3263 mutex_exit(&nce->nce_lock); 3264 return (B_TRUE); 3265 } 3266 3267 /* 3268 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3269 * driver. Note that it assumes IP is exclusive... 3270 */ 3271 /* ARGSUSED */ 3272 void 3273 ndp_fastpath_flush(nce_t *nce, char *arg) 3274 { 3275 if (nce->nce_flags & NCE_F_MAPPING) 3276 return; 3277 /* No fastpath info? */ 3278 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3279 return; 3280 3281 if (nce->nce_ipversion == IPV4_VERSION && 3282 nce->nce_flags & NCE_F_BCAST) { 3283 /* 3284 * IPv4 BROADCAST entries: 3285 * We can't delete the nce since it is difficult to 3286 * recreate these without going through the 3287 * ipif down/up dance. 3288 * 3289 * All access to nce->nce_fp_mp in the case of these 3290 * is protected by nce_lock. 3291 */ 3292 mutex_enter(&nce->nce_lock); 3293 if (nce->nce_fp_mp != NULL) { 3294 freeb(nce->nce_fp_mp); 3295 nce->nce_fp_mp = NULL; 3296 mutex_exit(&nce->nce_lock); 3297 nce_fastpath(nce); 3298 } else { 3299 mutex_exit(&nce->nce_lock); 3300 } 3301 } else { 3302 /* Just delete the NCE... */ 3303 ndp_delete(nce); 3304 } 3305 } 3306 3307 /* 3308 * Return a pointer to a given option in the packet. 3309 * Assumes that option part of the packet have already been validated. 3310 */ 3311 nd_opt_hdr_t * 3312 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3313 { 3314 while (optlen > 0) { 3315 if (opt->nd_opt_type == opt_type) 3316 return (opt); 3317 optlen -= 8 * opt->nd_opt_len; 3318 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3319 } 3320 return (NULL); 3321 } 3322 3323 /* 3324 * Verify all option lengths present are > 0, also check to see 3325 * if the option lengths and packet length are consistent. 3326 */ 3327 boolean_t 3328 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3329 { 3330 ASSERT(opt != NULL); 3331 while (optlen > 0) { 3332 if (opt->nd_opt_len == 0) 3333 return (B_FALSE); 3334 optlen -= 8 * opt->nd_opt_len; 3335 if (optlen < 0) 3336 return (B_FALSE); 3337 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3338 } 3339 return (B_TRUE); 3340 } 3341 3342 /* 3343 * ndp_walk function. 3344 * Free a fraction of the NCE cache entries. 3345 * A fraction of zero means to not free any in that category. 3346 */ 3347 void 3348 ndp_cache_reclaim(nce_t *nce, char *arg) 3349 { 3350 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3351 uint_t rand; 3352 3353 if (nce->nce_flags & NCE_F_PERMANENT) 3354 return; 3355 3356 rand = (uint_t)lbolt + 3357 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3358 if (ncr->ncr_host != 0 && 3359 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3360 ndp_delete(nce); 3361 return; 3362 } 3363 } 3364 3365 /* 3366 * ndp_walk function. 3367 * Count the number of NCEs that can be deleted. 3368 * These would be hosts but not routers. 3369 */ 3370 void 3371 ndp_cache_count(nce_t *nce, char *arg) 3372 { 3373 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3374 3375 if (nce->nce_flags & NCE_F_PERMANENT) 3376 return; 3377 3378 ncc->ncc_total++; 3379 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3380 ncc->ncc_host++; 3381 } 3382 3383 #ifdef DEBUG 3384 void 3385 nce_trace_ref(nce_t *nce) 3386 { 3387 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3388 3389 if (nce->nce_trace_disable) 3390 return; 3391 3392 if (!th_trace_ref(nce, nce->nce_ill->ill_ipst)) { 3393 nce->nce_trace_disable = B_TRUE; 3394 nce_trace_cleanup(nce); 3395 } 3396 } 3397 3398 void 3399 nce_untrace_ref(nce_t *nce) 3400 { 3401 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3402 3403 if (!nce->nce_trace_disable) 3404 th_trace_unref(nce); 3405 } 3406 3407 static void 3408 nce_trace_cleanup(const nce_t *nce) 3409 { 3410 th_trace_cleanup(nce, nce->nce_trace_disable); 3411 } 3412 #endif 3413 3414 /* 3415 * Called when address resolution fails due to a timeout. 3416 * Send an ICMP unreachable in response to all queued packets. 3417 */ 3418 void 3419 arp_resolv_failed(nce_t *nce) 3420 { 3421 mblk_t *mp, *nxt_mp, *first_mp; 3422 char buf[INET6_ADDRSTRLEN]; 3423 zoneid_t zoneid = GLOBAL_ZONEID; 3424 struct in_addr ipv4addr; 3425 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3426 3427 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3428 ip3dbg(("arp_resolv_failed: dst %s\n", 3429 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3430 mutex_enter(&nce->nce_lock); 3431 mp = nce->nce_qd_mp; 3432 nce->nce_qd_mp = NULL; 3433 mutex_exit(&nce->nce_lock); 3434 3435 while (mp != NULL) { 3436 nxt_mp = mp->b_next; 3437 mp->b_next = NULL; 3438 mp->b_prev = NULL; 3439 3440 first_mp = mp; 3441 /* 3442 * Send icmp unreachable messages 3443 * to the hosts. 3444 */ 3445 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst); 3446 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3447 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3448 ICMP_HOST_UNREACHABLE, zoneid, ipst); 3449 mp = nxt_mp; 3450 } 3451 } 3452 3453 int 3454 ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3455 nce_t **newnce, nce_t *src_nce) 3456 { 3457 int err; 3458 nce_t *nce; 3459 in6_addr_t addr6; 3460 ip_stack_t *ipst = ill->ill_ipst; 3461 3462 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3463 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3464 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3465 nce = nce_lookup_addr(ill, &addr6, nce); 3466 if (nce == NULL) { 3467 err = ndp_add_v4(ill, addr, flags, newnce, src_nce); 3468 } else { 3469 *newnce = nce; 3470 err = EEXIST; 3471 } 3472 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3473 return (err); 3474 } 3475 3476 /* 3477 * NDP Cache Entry creation routine for IPv4. 3478 * Mapped entries are handled in arp. 3479 * This routine must always be called with ndp4->ndp_g_lock held. 3480 * Prior to return, nce_refcnt is incremented. 3481 */ 3482 static int 3483 ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3484 nce_t **newnce, nce_t *src_nce) 3485 { 3486 static nce_t nce_nil; 3487 nce_t *nce; 3488 mblk_t *mp; 3489 mblk_t *template = NULL; 3490 nce_t **ncep; 3491 ip_stack_t *ipst = ill->ill_ipst; 3492 uint16_t state = ND_INITIAL; 3493 int err; 3494 3495 ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock)); 3496 ASSERT(!ill->ill_isv6); 3497 ASSERT((flags & NCE_F_MAPPING) == 0); 3498 3499 if (ill->ill_resolver_mp == NULL) 3500 return (EINVAL); 3501 /* 3502 * Allocate the mblk to hold the nce. 3503 */ 3504 mp = allocb(sizeof (nce_t), BPRI_MED); 3505 if (mp == NULL) 3506 return (ENOMEM); 3507 3508 nce = (nce_t *)mp->b_rptr; 3509 mp->b_wptr = (uchar_t *)&nce[1]; 3510 *nce = nce_nil; 3511 nce->nce_ill = ill; 3512 nce->nce_ipversion = IPV4_VERSION; 3513 nce->nce_flags = flags; 3514 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3515 nce->nce_rcnt = ill->ill_xmit_count; 3516 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3517 nce->nce_mask = ipv6_all_ones; 3518 nce->nce_extract_mask = ipv6_all_zeros; 3519 nce->nce_ll_extract_start = 0; 3520 nce->nce_qd_mp = NULL; 3521 nce->nce_mp = mp; 3522 /* This one is for nce getting created */ 3523 nce->nce_refcnt = 1; 3524 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3525 ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3526 3527 nce->nce_trace_disable = B_FALSE; 3528 3529 if (src_nce != NULL) { 3530 /* 3531 * src_nce has been provided by the caller. The only 3532 * caller who provides a non-null, non-broadcast 3533 * src_nce is from ip_newroute() which must pass in 3534 * a ND_REACHABLE src_nce (this condition is verified 3535 * via an ASSERT for the save_ire->ire_nce in ip_newroute()) 3536 */ 3537 mutex_enter(&src_nce->nce_lock); 3538 state = src_nce->nce_state; 3539 if ((src_nce->nce_flags & NCE_F_CONDEMNED) || 3540 (ipst->ips_ndp4->ndp_g_hw_change > 0)) { 3541 /* 3542 * src_nce has been deleted, or 3543 * ip_arp_news is in the middle of 3544 * flushing entries in the the nce. 3545 * Fail the add, since we don't know 3546 * if it is safe to copy the contents of 3547 * src_nce 3548 */ 3549 DTRACE_PROBE2(nce__bad__src__nce, 3550 nce_t *, src_nce, ill_t *, ill); 3551 mutex_exit(&src_nce->nce_lock); 3552 err = EINVAL; 3553 goto err_ret; 3554 } 3555 template = copyb(src_nce->nce_res_mp); 3556 mutex_exit(&src_nce->nce_lock); 3557 if (template == NULL) { 3558 err = ENOMEM; 3559 goto err_ret; 3560 } 3561 } else if (flags & NCE_F_BCAST) { 3562 /* 3563 * broadcast nce. 3564 */ 3565 template = copyb(ill->ill_bcast_mp); 3566 if (template == NULL) { 3567 err = ENOMEM; 3568 goto err_ret; 3569 } 3570 state = ND_REACHABLE; 3571 } else if (ill->ill_net_type == IRE_IF_NORESOLVER) { 3572 /* 3573 * NORESOLVER entries are always created in the REACHABLE 3574 * state. We create a nce_res_mp with the IP nexthop address 3575 * in the destination address in the DLPI hdr if the 3576 * physical length is exactly 4 bytes. 3577 * 3578 * XXX not clear which drivers set ill_phys_addr_length to 3579 * IP_ADDR_LEN. 3580 */ 3581 if (ill->ill_phys_addr_length == IP_ADDR_LEN) { 3582 template = ill_dlur_gen((uchar_t *)addr, 3583 ill->ill_phys_addr_length, 3584 ill->ill_sap, ill->ill_sap_length); 3585 } else { 3586 template = copyb(ill->ill_resolver_mp); 3587 } 3588 if (template == NULL) { 3589 err = ENOMEM; 3590 goto err_ret; 3591 } 3592 state = ND_REACHABLE; 3593 } 3594 nce->nce_fp_mp = NULL; 3595 nce->nce_res_mp = template; 3596 nce->nce_state = state; 3597 if (state == ND_REACHABLE) { 3598 nce->nce_last = TICK_TO_MSEC(lbolt64); 3599 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3600 } else { 3601 nce->nce_last = 0; 3602 if (state == ND_INITIAL) 3603 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3604 } 3605 3606 ASSERT((nce->nce_res_mp == NULL && nce->nce_state == ND_INITIAL) || 3607 (nce->nce_res_mp != NULL && nce->nce_state == ND_REACHABLE)); 3608 /* 3609 * Atomically ensure that the ill is not CONDEMNED, before 3610 * adding the NCE. 3611 */ 3612 mutex_enter(&ill->ill_lock); 3613 if (ill->ill_state_flags & ILL_CONDEMNED) { 3614 mutex_exit(&ill->ill_lock); 3615 err = EINVAL; 3616 goto err_ret; 3617 } 3618 if ((nce->nce_next = *ncep) != NULL) 3619 nce->nce_next->nce_ptpn = &nce->nce_next; 3620 *ncep = nce; 3621 nce->nce_ptpn = ncep; 3622 *newnce = nce; 3623 /* This one is for nce being used by an active thread */ 3624 NCE_REFHOLD(*newnce); 3625 3626 /* Bump up the number of nce's referencing this ill */ 3627 ill->ill_nce_cnt++; 3628 mutex_exit(&ill->ill_lock); 3629 DTRACE_PROBE1(ndp__add__v4, nce_t *, nce); 3630 return (0); 3631 err_ret: 3632 freeb(mp); 3633 freemsg(template); 3634 return (err); 3635 } 3636 3637 void 3638 ndp_flush_qd_mp(nce_t *nce) 3639 { 3640 mblk_t *qd_mp, *qd_next; 3641 3642 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3643 qd_mp = nce->nce_qd_mp; 3644 nce->nce_qd_mp = NULL; 3645 while (qd_mp != NULL) { 3646 qd_next = qd_mp->b_next; 3647 qd_mp->b_next = NULL; 3648 qd_mp->b_prev = NULL; 3649 freemsg(qd_mp); 3650 qd_mp = qd_next; 3651 } 3652 } 3653 3654 3655 /* 3656 * ndp_walk routine to delete all entries that have a given destination or 3657 * gateway address and cached link layer (MAC) address. This is used when ARP 3658 * informs us that a network-to-link-layer mapping may have changed. 3659 */ 3660 void 3661 nce_delete_hw_changed(nce_t *nce, void *arg) 3662 { 3663 nce_hw_map_t *hwm = arg; 3664 mblk_t *mp; 3665 dl_unitdata_req_t *dlu; 3666 uchar_t *macaddr; 3667 ill_t *ill; 3668 int saplen; 3669 ipaddr_t nce_addr; 3670 3671 if (nce->nce_state != ND_REACHABLE) 3672 return; 3673 3674 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3675 if (nce_addr != hwm->hwm_addr) 3676 return; 3677 3678 mutex_enter(&nce->nce_lock); 3679 if ((mp = nce->nce_res_mp) == NULL) { 3680 mutex_exit(&nce->nce_lock); 3681 return; 3682 } 3683 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3684 macaddr = (uchar_t *)(dlu + 1); 3685 ill = nce->nce_ill; 3686 if ((saplen = ill->ill_sap_length) > 0) 3687 macaddr += saplen; 3688 else 3689 saplen = -saplen; 3690 3691 /* 3692 * If the hardware address is unchanged, then leave this one alone. 3693 * Note that saplen == abs(saplen) now. 3694 */ 3695 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 3696 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 3697 mutex_exit(&nce->nce_lock); 3698 return; 3699 } 3700 mutex_exit(&nce->nce_lock); 3701 3702 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 3703 ndp_delete(nce); 3704 } 3705 3706 /* 3707 * This function verifies whether a given IPv4 address is potentially known to 3708 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 3709 * so that it can continue to look for hardware changes on that address. 3710 */ 3711 boolean_t 3712 ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns) 3713 { 3714 nce_t *nce; 3715 struct in_addr nceaddr; 3716 ip_stack_t *ipst = ns->netstack_ip; 3717 3718 if (addr == INADDR_ANY) 3719 return (B_FALSE); 3720 3721 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3722 nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr); 3723 for (; nce != NULL; nce = nce->nce_next) { 3724 /* Note that only v4 mapped entries are in the table. */ 3725 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 3726 if (addr == nceaddr.s_addr && 3727 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 3728 /* Single flag check; no lock needed */ 3729 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 3730 break; 3731 } 3732 } 3733 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3734 return (nce != NULL); 3735 } 3736