1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/stropts.h> 29 #include <sys/strsun.h> 30 #include <sys/sysmacros.h> 31 #include <sys/errno.h> 32 #include <sys/dlpi.h> 33 #include <sys/socket.h> 34 #include <sys/ddi.h> 35 #include <sys/sunddi.h> 36 #include <sys/cmn_err.h> 37 #include <sys/debug.h> 38 #include <sys/vtrace.h> 39 #include <sys/kmem.h> 40 #include <sys/zone.h> 41 #include <sys/ethernet.h> 42 #include <sys/sdt.h> 43 44 #include <net/if.h> 45 #include <net/if_types.h> 46 #include <net/if_dl.h> 47 #include <net/route.h> 48 #include <netinet/in.h> 49 #include <netinet/ip6.h> 50 #include <netinet/icmp6.h> 51 52 #include <inet/common.h> 53 #include <inet/mi.h> 54 #include <inet/mib2.h> 55 #include <inet/nd.h> 56 #include <inet/ip.h> 57 #include <inet/ip_impl.h> 58 #include <inet/ipclassifier.h> 59 #include <inet/ip_if.h> 60 #include <inet/ip_ire.h> 61 #include <inet/ip_rts.h> 62 #include <inet/ip6.h> 63 #include <inet/ip_ndp.h> 64 #include <inet/ipsec_impl.h> 65 #include <inet/ipsec_info.h> 66 #include <inet/sctp_ip.h> 67 68 /* 69 * Function names with nce_ prefix are static while function 70 * names with ndp_ prefix are used by rest of the IP. 71 * 72 * Lock ordering: 73 * 74 * ndp_g_lock -> ill_lock -> nce_lock 75 * 76 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 77 * nce_next. Nce_lock protects the contents of the NCE (particularly 78 * nce_refcnt). 79 */ 80 81 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 82 uint32_t ll_addr_len); 83 static void nce_ire_delete(nce_t *nce); 84 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 85 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 86 static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *, nce_t *); 87 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 88 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 89 uchar_t *addr); 90 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 91 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 92 static mblk_t *nce_udreq_alloc(ill_t *ill); 93 static void nce_update(nce_t *nce, uint16_t new_state, 94 uchar_t *new_ll_addr); 95 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 96 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 97 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 98 const in6_addr_t *target, int flag); 99 static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t, 100 nce_t **, nce_t *); 101 102 #ifdef DEBUG 103 static void nce_trace_cleanup(const nce_t *); 104 #endif 105 106 #define NCE_HASH_PTR_V4(ipst, addr) \ 107 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 108 109 #define NCE_HASH_PTR_V6(ipst, addr) \ 110 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ 111 NCE_TABLE_SIZE)])) 112 113 /* 114 * Compute default flags to use for an advertisement of this nce's address. 115 */ 116 static int 117 nce_advert_flags(const nce_t *nce) 118 { 119 int flag = 0; 120 121 if (nce->nce_flags & NCE_F_ISROUTER) 122 flag |= NDP_ISROUTER; 123 if (!(nce->nce_flags & NCE_F_ANYCAST)) 124 flag |= NDP_ORIDE; 125 126 return (flag); 127 } 128 129 /* Non-tunable probe interval, based on link capabilities */ 130 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 131 132 /* 133 * NDP Cache Entry creation routine. 134 * Mapped entries will never do NUD . 135 * This routine must always be called with ndp6->ndp_g_lock held. 136 * Prior to return, nce_refcnt is incremented. 137 */ 138 int 139 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 140 const in6_addr_t *mask, const in6_addr_t *extract_mask, 141 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 142 nce_t **newnce) 143 { 144 static nce_t nce_nil; 145 nce_t *nce; 146 mblk_t *mp; 147 mblk_t *template; 148 nce_t **ncep; 149 int err; 150 boolean_t dropped = B_FALSE; 151 ip_stack_t *ipst = ill->ill_ipst; 152 153 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 154 ASSERT(ill != NULL && ill->ill_isv6); 155 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 156 ip0dbg(("ndp_add_v6: no addr\n")); 157 return (EINVAL); 158 } 159 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 160 ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags)); 161 return (EINVAL); 162 } 163 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 164 (flags & NCE_F_MAPPING)) { 165 ip0dbg(("ndp_add_v6: extract mask zero for mapping")); 166 return (EINVAL); 167 } 168 /* 169 * Allocate the mblk to hold the nce. 170 * 171 * XXX This can come out of a separate cache - nce_cache. 172 * We don't need the mp anymore as there are no more 173 * "qwriter"s 174 */ 175 mp = allocb(sizeof (nce_t), BPRI_MED); 176 if (mp == NULL) 177 return (ENOMEM); 178 179 nce = (nce_t *)mp->b_rptr; 180 mp->b_wptr = (uchar_t *)&nce[1]; 181 *nce = nce_nil; 182 183 /* 184 * This one holds link layer address 185 */ 186 if (ill->ill_net_type == IRE_IF_RESOLVER) { 187 template = nce_udreq_alloc(ill); 188 } else { 189 if (ill->ill_resolver_mp == NULL) { 190 freeb(mp); 191 return (EINVAL); 192 } 193 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 194 template = copyb(ill->ill_resolver_mp); 195 } 196 if (template == NULL) { 197 freeb(mp); 198 return (ENOMEM); 199 } 200 nce->nce_ill = ill; 201 nce->nce_ipversion = IPV6_VERSION; 202 nce->nce_flags = flags; 203 nce->nce_state = state; 204 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 205 nce->nce_rcnt = ill->ill_xmit_count; 206 nce->nce_addr = *addr; 207 nce->nce_mask = *mask; 208 nce->nce_extract_mask = *extract_mask; 209 nce->nce_ll_extract_start = hw_extract_start; 210 nce->nce_fp_mp = NULL; 211 nce->nce_res_mp = template; 212 if (state == ND_REACHABLE) 213 nce->nce_last = TICK_TO_MSEC(lbolt64); 214 else 215 nce->nce_last = 0; 216 nce->nce_qd_mp = NULL; 217 nce->nce_mp = mp; 218 if (hw_addr != NULL) 219 nce_set_ll(nce, hw_addr); 220 /* This one is for nce getting created */ 221 nce->nce_refcnt = 1; 222 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 223 if (nce->nce_flags & NCE_F_MAPPING) { 224 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 225 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 226 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 227 ncep = &ipst->ips_ndp6->nce_mask_entries; 228 } else { 229 ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 230 } 231 232 nce->nce_trace_disable = B_FALSE; 233 234 /* 235 * Atomically ensure that the ill is not CONDEMNED, before 236 * adding the NCE. 237 */ 238 mutex_enter(&ill->ill_lock); 239 if (ill->ill_state_flags & ILL_CONDEMNED) { 240 mutex_exit(&ill->ill_lock); 241 freeb(mp); 242 freeb(template); 243 return (EINVAL); 244 } 245 if ((nce->nce_next = *ncep) != NULL) 246 nce->nce_next->nce_ptpn = &nce->nce_next; 247 *ncep = nce; 248 nce->nce_ptpn = ncep; 249 *newnce = nce; 250 /* This one is for nce being used by an active thread */ 251 NCE_REFHOLD(*newnce); 252 253 /* Bump up the number of nce's referencing this ill */ 254 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 255 (char *), "nce", (void *), nce); 256 ill->ill_nce_cnt++; 257 mutex_exit(&ill->ill_lock); 258 259 err = 0; 260 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 261 mutex_enter(&nce->nce_lock); 262 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 263 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 264 mutex_exit(&nce->nce_lock); 265 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 266 &ipv6_all_zeros, addr, NDP_PROBE); 267 if (dropped) { 268 mutex_enter(&nce->nce_lock); 269 nce->nce_pcnt++; 270 mutex_exit(&nce->nce_lock); 271 } 272 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 273 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 274 err = EINPROGRESS; 275 } else if (flags & NCE_F_UNSOL_ADV) { 276 /* 277 * We account for the transmit below by assigning one 278 * less than the ndd variable. Subsequent decrements 279 * are done in ndp_timer. 280 */ 281 mutex_enter(&nce->nce_lock); 282 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 283 nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1; 284 mutex_exit(&nce->nce_lock); 285 dropped = nce_xmit(ill, 286 ND_NEIGHBOR_ADVERT, 287 ill, /* ill to be used for extracting ill_nd_lla */ 288 B_TRUE, /* use ill_nd_lla */ 289 addr, /* Source and target of the advertisement pkt */ 290 &ipv6_all_hosts_mcast, /* Destination of the packet */ 291 nce_advert_flags(nce)); 292 mutex_enter(&nce->nce_lock); 293 if (dropped) 294 nce->nce_unsolicit_count++; 295 if (nce->nce_unsolicit_count != 0) { 296 nce->nce_timeout_id = timeout(ndp_timer, nce, 297 MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval)); 298 } 299 mutex_exit(&nce->nce_lock); 300 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 301 } 302 /* 303 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 304 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 305 * We call nce_fastpath from nce_update if the link layer address of 306 * the peer changes from nce_update 307 */ 308 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 309 nce_fastpath(nce); 310 return (err); 311 } 312 313 int 314 ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 315 const in6_addr_t *mask, const in6_addr_t *extract_mask, 316 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 317 nce_t **newnce) 318 { 319 int err = 0; 320 nce_t *nce; 321 ip_stack_t *ipst = ill->ill_ipst; 322 323 ASSERT(ill->ill_isv6); 324 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 325 326 /* Get head of v6 hash table */ 327 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 328 nce = nce_lookup_addr(ill, addr, nce); 329 if (nce == NULL) { 330 err = ndp_add_v6(ill, 331 hw_addr, 332 addr, 333 mask, 334 extract_mask, 335 hw_extract_start, 336 flags, 337 state, 338 newnce); 339 } else { 340 *newnce = nce; 341 err = EEXIST; 342 } 343 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 344 return (err); 345 } 346 347 /* 348 * Remove all the CONDEMNED nces from the appropriate hash table. 349 * We create a private list of NCEs, these may have ires pointing 350 * to them, so the list will be passed through to clean up dependent 351 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 352 */ 353 static void 354 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 355 { 356 nce_t *nce1; 357 nce_t **ptpn; 358 359 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 360 ASSERT(ndp->ndp_g_walker == 0); 361 for (; nce; nce = nce1) { 362 nce1 = nce->nce_next; 363 mutex_enter(&nce->nce_lock); 364 if (nce->nce_flags & NCE_F_CONDEMNED) { 365 ptpn = nce->nce_ptpn; 366 nce1 = nce->nce_next; 367 if (nce1 != NULL) 368 nce1->nce_ptpn = ptpn; 369 *ptpn = nce1; 370 nce->nce_ptpn = NULL; 371 nce->nce_next = NULL; 372 nce->nce_next = *free_nce_list; 373 *free_nce_list = nce; 374 } 375 mutex_exit(&nce->nce_lock); 376 } 377 } 378 379 /* 380 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 381 * will return this NCE. Also no new IREs will be created that 382 * point to this NCE (See ire_add_v6). Also no new timeouts will 383 * be started (See NDP_RESTART_TIMER). 384 * 2. Cancel any currently running timeouts. 385 * 3. If there is an ndp walker, return. The walker will do the cleanup. 386 * This ensures that walkers see a consistent list of NCEs while walking. 387 * 4. Otherwise remove the NCE from the list of NCEs 388 * 5. Delete all IREs pointing to this NCE. 389 */ 390 void 391 ndp_delete(nce_t *nce) 392 { 393 nce_t **ptpn; 394 nce_t *nce1; 395 int ipversion = nce->nce_ipversion; 396 ndp_g_t *ndp; 397 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 398 399 if (ipversion == IPV4_VERSION) 400 ndp = ipst->ips_ndp4; 401 else 402 ndp = ipst->ips_ndp6; 403 404 /* Serialize deletes */ 405 mutex_enter(&nce->nce_lock); 406 if (nce->nce_flags & NCE_F_CONDEMNED) { 407 /* Some other thread is doing the delete */ 408 mutex_exit(&nce->nce_lock); 409 return; 410 } 411 /* 412 * Caller has a refhold. Also 1 ref for being in the list. Thus 413 * refcnt has to be >= 2 414 */ 415 ASSERT(nce->nce_refcnt >= 2); 416 nce->nce_flags |= NCE_F_CONDEMNED; 417 mutex_exit(&nce->nce_lock); 418 419 nce_fastpath_list_delete(nce); 420 421 /* 422 * Cancel any running timer. Timeout can't be restarted 423 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 424 * Passing invalid timeout id is fine. 425 */ 426 if (nce->nce_timeout_id != 0) { 427 (void) untimeout(nce->nce_timeout_id); 428 nce->nce_timeout_id = 0; 429 } 430 431 mutex_enter(&ndp->ndp_g_lock); 432 if (nce->nce_ptpn == NULL) { 433 /* 434 * The last ndp walker has already removed this nce from 435 * the list after we marked the nce CONDEMNED and before 436 * we grabbed the global lock. 437 */ 438 mutex_exit(&ndp->ndp_g_lock); 439 return; 440 } 441 if (ndp->ndp_g_walker > 0) { 442 /* 443 * Can't unlink. The walker will clean up 444 */ 445 ndp->ndp_g_walker_cleanup = B_TRUE; 446 mutex_exit(&ndp->ndp_g_lock); 447 return; 448 } 449 450 /* 451 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 452 * the timer since it is marked CONDEMNED. 453 */ 454 ptpn = nce->nce_ptpn; 455 nce1 = nce->nce_next; 456 if (nce1 != NULL) 457 nce1->nce_ptpn = ptpn; 458 *ptpn = nce1; 459 nce->nce_ptpn = NULL; 460 nce->nce_next = NULL; 461 mutex_exit(&ndp->ndp_g_lock); 462 463 nce_ire_delete(nce); 464 } 465 466 void 467 ndp_inactive(nce_t *nce) 468 { 469 mblk_t **mpp; 470 ill_t *ill; 471 472 ASSERT(nce->nce_refcnt == 0); 473 ASSERT(MUTEX_HELD(&nce->nce_lock)); 474 ASSERT(nce->nce_fastpath == NULL); 475 476 /* Free all nce allocated messages */ 477 mpp = &nce->nce_first_mp_to_free; 478 do { 479 while (*mpp != NULL) { 480 mblk_t *mp; 481 482 mp = *mpp; 483 *mpp = mp->b_next; 484 485 inet_freemsg(mp); 486 } 487 } while (mpp++ != &nce->nce_last_mp_to_free); 488 489 #ifdef DEBUG 490 nce_trace_cleanup(nce); 491 #endif 492 493 ill = nce->nce_ill; 494 mutex_enter(&ill->ill_lock); 495 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 496 (char *), "nce", (void *), nce); 497 ill->ill_nce_cnt--; 498 /* 499 * If the number of nce's associated with this ill have dropped 500 * to zero, check whether we need to restart any operation that 501 * is waiting for this to happen. 502 */ 503 if (ILL_DOWN_OK(ill)) { 504 /* ipif_ill_refrele_tail drops the ill_lock */ 505 ipif_ill_refrele_tail(ill); 506 } else { 507 mutex_exit(&ill->ill_lock); 508 } 509 mutex_destroy(&nce->nce_lock); 510 if (nce->nce_mp != NULL) 511 inet_freemsg(nce->nce_mp); 512 } 513 514 /* 515 * ndp_walk routine. Delete the nce if it is associated with the ill 516 * that is going away. Always called as a writer. 517 */ 518 void 519 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 520 { 521 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 522 ndp_delete(nce); 523 } 524 } 525 526 /* 527 * Walk a list of to be inactive NCEs and blow away all the ires. 528 */ 529 static void 530 nce_ire_delete_list(nce_t *nce) 531 { 532 nce_t *nce_next; 533 534 ASSERT(nce != NULL); 535 while (nce != NULL) { 536 nce_next = nce->nce_next; 537 nce->nce_next = NULL; 538 539 /* 540 * It is possible for the last ndp walker (this thread) 541 * to come here after ndp_delete has marked the nce CONDEMNED 542 * and before it has removed the nce from the fastpath list 543 * or called untimeout. So we need to do it here. It is safe 544 * for both ndp_delete and this thread to do it twice or 545 * even simultaneously since each of the threads has a 546 * reference on the nce. 547 */ 548 nce_fastpath_list_delete(nce); 549 /* 550 * Cancel any running timer. Timeout can't be restarted 551 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 552 * Passing invalid timeout id is fine. 553 */ 554 if (nce->nce_timeout_id != 0) { 555 (void) untimeout(nce->nce_timeout_id); 556 nce->nce_timeout_id = 0; 557 } 558 /* 559 * We might hit this func thus in the v4 case: 560 * ipif_down->ipif_ndp_down->ndp_walk 561 */ 562 563 if (nce->nce_ipversion == IPV4_VERSION) { 564 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 565 IRE_CACHE, nce_ire_delete1, 566 (char *)nce, nce->nce_ill); 567 } else { 568 ASSERT(nce->nce_ipversion == IPV6_VERSION); 569 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 570 IRE_CACHE, nce_ire_delete1, 571 (char *)nce, nce->nce_ill); 572 } 573 NCE_REFRELE_NOTR(nce); 574 nce = nce_next; 575 } 576 } 577 578 /* 579 * Delete an ire when the nce goes away. 580 */ 581 /* ARGSUSED */ 582 static void 583 nce_ire_delete(nce_t *nce) 584 { 585 if (nce->nce_ipversion == IPV6_VERSION) { 586 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 587 nce_ire_delete1, (char *)nce, nce->nce_ill); 588 NCE_REFRELE_NOTR(nce); 589 } else { 590 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 591 nce_ire_delete1, (char *)nce, nce->nce_ill); 592 NCE_REFRELE_NOTR(nce); 593 } 594 } 595 596 /* 597 * ire_walk routine used to delete every IRE that shares this nce 598 */ 599 static void 600 nce_ire_delete1(ire_t *ire, char *nce_arg) 601 { 602 nce_t *nce = (nce_t *)nce_arg; 603 604 ASSERT(ire->ire_type == IRE_CACHE); 605 606 if (ire->ire_nce == nce) { 607 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 608 ire_delete(ire); 609 } 610 } 611 612 /* 613 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 614 */ 615 boolean_t 616 ndp_restart_dad(nce_t *nce) 617 { 618 boolean_t started; 619 boolean_t dropped; 620 621 if (nce == NULL) 622 return (B_FALSE); 623 mutex_enter(&nce->nce_lock); 624 if (nce->nce_state == ND_PROBE) { 625 mutex_exit(&nce->nce_lock); 626 started = B_TRUE; 627 } else if (nce->nce_state == ND_REACHABLE) { 628 nce->nce_state = ND_PROBE; 629 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 630 mutex_exit(&nce->nce_lock); 631 dropped = nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, NULL, 632 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, NDP_PROBE); 633 if (dropped) { 634 mutex_enter(&nce->nce_lock); 635 nce->nce_pcnt++; 636 mutex_exit(&nce->nce_lock); 637 } 638 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 639 started = B_TRUE; 640 } else { 641 mutex_exit(&nce->nce_lock); 642 started = B_FALSE; 643 } 644 return (started); 645 } 646 647 /* 648 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 649 * If one is found, the refcnt on the nce will be incremented. 650 */ 651 nce_t * 652 ndp_lookup_v6(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 653 { 654 nce_t *nce; 655 ip_stack_t *ipst; 656 657 ASSERT(ill != NULL); 658 ipst = ill->ill_ipst; 659 660 ASSERT(ill != NULL && ill->ill_isv6); 661 if (!caller_holds_lock) { 662 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 663 } 664 665 /* Get head of v6 hash table */ 666 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 667 nce = nce_lookup_addr(ill, addr, nce); 668 if (nce == NULL) 669 nce = nce_lookup_mapping(ill, addr); 670 if (!caller_holds_lock) 671 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 672 return (nce); 673 } 674 /* 675 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 676 * If one is found, the refcnt on the nce will be incremented. 677 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 678 * so we skip the nce_lookup_mapping call. 679 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 680 */ 681 nce_t * 682 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 683 { 684 nce_t *nce; 685 in6_addr_t addr6; 686 ip_stack_t *ipst = ill->ill_ipst; 687 688 if (!caller_holds_lock) { 689 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 690 } 691 692 /* Get head of v4 hash table */ 693 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 694 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 695 nce = nce_lookup_addr(ill, &addr6, nce); 696 if (!caller_holds_lock) 697 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 698 return (nce); 699 } 700 701 /* 702 * Cache entry lookup. Try to find an nce matching the parameters passed. 703 * Look only for exact entries (no mappings). If an nce is found, increment 704 * the hold count on that nce. The caller passes in the start of the 705 * appropriate hash table, and must be holding the appropriate global 706 * lock (ndp_g_lock). 707 */ 708 static nce_t * 709 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr, nce_t *nce) 710 { 711 ndp_g_t *ndp; 712 ip_stack_t *ipst = ill->ill_ipst; 713 714 if (ill->ill_isv6) 715 ndp = ipst->ips_ndp6; 716 else 717 ndp = ipst->ips_ndp4; 718 719 ASSERT(ill != NULL); 720 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 721 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 722 return (NULL); 723 for (; nce != NULL; nce = nce->nce_next) { 724 if (nce->nce_ill == ill) { 725 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 726 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 727 &ipv6_all_ones)) { 728 mutex_enter(&nce->nce_lock); 729 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 730 NCE_REFHOLD_LOCKED(nce); 731 mutex_exit(&nce->nce_lock); 732 break; 733 } 734 mutex_exit(&nce->nce_lock); 735 } 736 } 737 } 738 return (nce); 739 } 740 741 /* 742 * Cache entry lookup. Try to find an nce matching the parameters passed. 743 * Look only for mappings. 744 */ 745 static nce_t * 746 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 747 { 748 nce_t *nce; 749 ip_stack_t *ipst = ill->ill_ipst; 750 751 ASSERT(ill != NULL && ill->ill_isv6); 752 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 753 if (!IN6_IS_ADDR_MULTICAST(addr)) 754 return (NULL); 755 nce = ipst->ips_ndp6->nce_mask_entries; 756 for (; nce != NULL; nce = nce->nce_next) 757 if (nce->nce_ill == ill && 758 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 759 mutex_enter(&nce->nce_lock); 760 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 761 NCE_REFHOLD_LOCKED(nce); 762 mutex_exit(&nce->nce_lock); 763 break; 764 } 765 mutex_exit(&nce->nce_lock); 766 } 767 return (nce); 768 } 769 770 /* 771 * Process passed in parameters either from an incoming packet or via 772 * user ioctl. 773 */ 774 void 775 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 776 { 777 ill_t *ill = nce->nce_ill; 778 uint32_t hw_addr_len = ill->ill_nd_lla_len; 779 mblk_t *mp; 780 boolean_t ll_updated = B_FALSE; 781 boolean_t ll_changed; 782 ip_stack_t *ipst = ill->ill_ipst; 783 784 ASSERT(nce->nce_ipversion == IPV6_VERSION); 785 /* 786 * No updates of link layer address or the neighbor state is 787 * allowed, when the cache is in NONUD state. This still 788 * allows for responding to reachability solicitation. 789 */ 790 mutex_enter(&nce->nce_lock); 791 if (nce->nce_state == ND_INCOMPLETE) { 792 if (hw_addr == NULL) { 793 mutex_exit(&nce->nce_lock); 794 return; 795 } 796 nce_set_ll(nce, hw_addr); 797 /* 798 * Update nce state and send the queued packets 799 * back to ip this time ire will be added. 800 */ 801 if (flag & ND_NA_FLAG_SOLICITED) { 802 nce_update(nce, ND_REACHABLE, NULL); 803 } else { 804 nce_update(nce, ND_STALE, NULL); 805 } 806 mutex_exit(&nce->nce_lock); 807 nce_fastpath(nce); 808 mutex_enter(&nce->nce_lock); 809 mp = nce->nce_qd_mp; 810 nce->nce_qd_mp = NULL; 811 mutex_exit(&nce->nce_lock); 812 while (mp != NULL) { 813 mblk_t *nxt_mp, *data_mp; 814 815 nxt_mp = mp->b_next; 816 mp->b_next = NULL; 817 818 if (mp->b_datap->db_type == M_CTL) 819 data_mp = mp->b_cont; 820 else 821 data_mp = mp; 822 if (data_mp->b_prev != NULL) { 823 ill_t *inbound_ill; 824 queue_t *fwdq = NULL; 825 uint_t ifindex; 826 827 ifindex = (uint_t)(uintptr_t)data_mp->b_prev; 828 inbound_ill = ill_lookup_on_ifindex(ifindex, 829 B_TRUE, NULL, NULL, NULL, NULL, ipst); 830 if (inbound_ill == NULL) { 831 data_mp->b_prev = NULL; 832 freemsg(mp); 833 return; 834 } else { 835 fwdq = inbound_ill->ill_rq; 836 } 837 data_mp->b_prev = NULL; 838 /* 839 * Send a forwarded packet back into ip_rput_v6 840 * just as in ire_send_v6(). 841 * Extract the queue from b_prev (set in 842 * ip_rput_data_v6). 843 */ 844 if (fwdq != NULL) { 845 /* 846 * Forwarded packets hop count will 847 * get decremented in ip_rput_data_v6 848 */ 849 if (data_mp != mp) 850 freeb(mp); 851 put(fwdq, data_mp); 852 } else { 853 /* 854 * Send locally originated packets back 855 * into * ip_wput_v6. 856 */ 857 put(ill->ill_wq, mp); 858 } 859 ill_refrele(inbound_ill); 860 } else { 861 put(ill->ill_wq, mp); 862 } 863 mp = nxt_mp; 864 } 865 return; 866 } 867 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 868 if (!is_adv) { 869 /* If this is a SOLICITATION request only */ 870 if (ll_changed) 871 nce_update(nce, ND_STALE, hw_addr); 872 mutex_exit(&nce->nce_lock); 873 return; 874 } 875 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 876 /* If in any other state than REACHABLE, ignore */ 877 if (nce->nce_state == ND_REACHABLE) { 878 nce_update(nce, ND_STALE, NULL); 879 } 880 mutex_exit(&nce->nce_lock); 881 return; 882 } else { 883 if (ll_changed) { 884 nce_update(nce, ND_UNCHANGED, hw_addr); 885 ll_updated = B_TRUE; 886 } 887 if (flag & ND_NA_FLAG_SOLICITED) { 888 nce_update(nce, ND_REACHABLE, NULL); 889 } else { 890 if (ll_updated) { 891 nce_update(nce, ND_STALE, NULL); 892 } 893 } 894 mutex_exit(&nce->nce_lock); 895 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 896 NCE_F_ISROUTER)) { 897 ire_t *ire; 898 899 /* 900 * Router turned to host. We need to remove the 901 * entry as well as any default route that may be 902 * using this as a next hop. This is required by 903 * section 7.2.5 of RFC 2461. 904 */ 905 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 906 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 907 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 908 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 909 MATCH_IRE_DEFAULT, ipst); 910 if (ire != NULL) { 911 ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); 912 ire_delete(ire); 913 ire_refrele(ire); 914 } 915 ndp_delete(nce); 916 } 917 } 918 } 919 920 /* 921 * Pass arg1 to the pfi supplied, along with each nce in existence. 922 * ndp_walk() places a REFHOLD on the nce and drops the lock when 923 * walking the hash list. 924 */ 925 void 926 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 927 boolean_t trace) 928 { 929 930 nce_t *nce; 931 nce_t *nce1; 932 nce_t **ncep; 933 nce_t *free_nce_list = NULL; 934 935 mutex_enter(&ndp->ndp_g_lock); 936 /* Prevent ndp_delete from unlink and free of NCE */ 937 ndp->ndp_g_walker++; 938 mutex_exit(&ndp->ndp_g_lock); 939 for (ncep = ndp->nce_hash_tbl; 940 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 941 for (nce = *ncep; nce != NULL; nce = nce1) { 942 nce1 = nce->nce_next; 943 if (ill == NULL || nce->nce_ill == ill) { 944 if (trace) { 945 NCE_REFHOLD(nce); 946 (*pfi)(nce, arg1); 947 NCE_REFRELE(nce); 948 } else { 949 NCE_REFHOLD_NOTR(nce); 950 (*pfi)(nce, arg1); 951 NCE_REFRELE_NOTR(nce); 952 } 953 } 954 } 955 } 956 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 957 nce1 = nce->nce_next; 958 if (ill == NULL || nce->nce_ill == ill) { 959 if (trace) { 960 NCE_REFHOLD(nce); 961 (*pfi)(nce, arg1); 962 NCE_REFRELE(nce); 963 } else { 964 NCE_REFHOLD_NOTR(nce); 965 (*pfi)(nce, arg1); 966 NCE_REFRELE_NOTR(nce); 967 } 968 } 969 } 970 mutex_enter(&ndp->ndp_g_lock); 971 ndp->ndp_g_walker--; 972 /* 973 * While NCE's are removed from global list they are placed 974 * in a private list, to be passed to nce_ire_delete_list(). 975 * The reason is, there may be ires pointing to this nce 976 * which needs to cleaned up. 977 */ 978 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 979 /* Time to delete condemned entries */ 980 for (ncep = ndp->nce_hash_tbl; 981 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 982 nce = *ncep; 983 if (nce != NULL) { 984 nce_remove(ndp, nce, &free_nce_list); 985 } 986 } 987 nce = ndp->nce_mask_entries; 988 if (nce != NULL) { 989 nce_remove(ndp, nce, &free_nce_list); 990 } 991 ndp->ndp_g_walker_cleanup = B_FALSE; 992 } 993 994 mutex_exit(&ndp->ndp_g_lock); 995 996 if (free_nce_list != NULL) { 997 nce_ire_delete_list(free_nce_list); 998 } 999 } 1000 1001 /* 1002 * Walk everything. 1003 * Note that ill can be NULL hence can't derive the ipst from it. 1004 */ 1005 void 1006 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) 1007 { 1008 ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); 1009 ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); 1010 } 1011 1012 /* 1013 * Process resolve requests. Handles both mapped entries 1014 * as well as cases that needs to be send out on the wire. 1015 * Lookup a NCE for a given IRE. Regardless of whether one exists 1016 * or one is created, we defer making ire point to nce until the 1017 * ire is actually added at which point the nce_refcnt on the nce is 1018 * incremented. This is done primarily to have symmetry between ire_add() 1019 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1020 */ 1021 int 1022 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1023 { 1024 nce_t *nce; 1025 int err = 0; 1026 uint32_t ms; 1027 mblk_t *mp_nce = NULL; 1028 ip_stack_t *ipst = ill->ill_ipst; 1029 1030 ASSERT(ill->ill_isv6); 1031 if (IN6_IS_ADDR_MULTICAST(dst)) { 1032 err = nce_set_multicast(ill, dst); 1033 return (err); 1034 } 1035 err = ndp_lookup_then_add_v6(ill, 1036 NULL, /* No hardware address */ 1037 dst, 1038 &ipv6_all_ones, 1039 &ipv6_all_zeros, 1040 0, 1041 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1042 ND_INCOMPLETE, 1043 &nce); 1044 1045 switch (err) { 1046 case 0: 1047 /* 1048 * New cache entry was created. Make sure that the state 1049 * is not ND_INCOMPLETE. It can be in some other state 1050 * even before we send out the solicitation as we could 1051 * get un-solicited advertisements. 1052 * 1053 * If this is an XRESOLV interface, simply return 0, 1054 * since we don't want to solicit just yet. 1055 */ 1056 if (ill->ill_flags & ILLF_XRESOLV) { 1057 NCE_REFRELE(nce); 1058 return (0); 1059 } 1060 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1061 mutex_enter(&nce->nce_lock); 1062 if (nce->nce_state != ND_INCOMPLETE) { 1063 mutex_exit(&nce->nce_lock); 1064 rw_exit(&ipst->ips_ill_g_lock); 1065 NCE_REFRELE(nce); 1066 return (0); 1067 } 1068 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1069 if (mp_nce == NULL) { 1070 /* The caller will free mp */ 1071 mutex_exit(&nce->nce_lock); 1072 rw_exit(&ipst->ips_ill_g_lock); 1073 ndp_delete(nce); 1074 NCE_REFRELE(nce); 1075 return (ENOMEM); 1076 } 1077 ms = nce_solicit(nce, mp_nce); 1078 rw_exit(&ipst->ips_ill_g_lock); 1079 if (ms == 0) { 1080 /* The caller will free mp */ 1081 if (mp_nce != mp) 1082 freeb(mp_nce); 1083 mutex_exit(&nce->nce_lock); 1084 ndp_delete(nce); 1085 NCE_REFRELE(nce); 1086 return (EBUSY); 1087 } 1088 mutex_exit(&nce->nce_lock); 1089 NDP_RESTART_TIMER(nce, (clock_t)ms); 1090 NCE_REFRELE(nce); 1091 return (EINPROGRESS); 1092 case EEXIST: 1093 /* Resolution in progress just queue the packet */ 1094 mutex_enter(&nce->nce_lock); 1095 if (nce->nce_state == ND_INCOMPLETE) { 1096 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1097 if (mp_nce == NULL) { 1098 err = ENOMEM; 1099 } else { 1100 nce_queue_mp(nce, mp_nce); 1101 err = EINPROGRESS; 1102 } 1103 } else { 1104 /* 1105 * Any other state implies we have 1106 * a nce but IRE needs to be added ... 1107 * ire_add_v6() will take care of the 1108 * the case when the nce becomes CONDEMNED 1109 * before the ire is added to the table. 1110 */ 1111 err = 0; 1112 } 1113 mutex_exit(&nce->nce_lock); 1114 NCE_REFRELE(nce); 1115 break; 1116 default: 1117 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1118 break; 1119 } 1120 return (err); 1121 } 1122 1123 /* 1124 * When there is no resolver, the link layer template is passed in 1125 * the IRE. 1126 * Lookup a NCE for a given IRE. Regardless of whether one exists 1127 * or one is created, we defer making ire point to nce until the 1128 * ire is actually added at which point the nce_refcnt on the nce is 1129 * incremented. This is done primarily to have symmetry between ire_add() 1130 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1131 */ 1132 int 1133 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1134 { 1135 nce_t *nce; 1136 int err = 0; 1137 1138 ASSERT(ill != NULL); 1139 ASSERT(ill->ill_isv6); 1140 if (IN6_IS_ADDR_MULTICAST(dst)) { 1141 err = nce_set_multicast(ill, dst); 1142 return (err); 1143 } 1144 1145 err = ndp_lookup_then_add_v6(ill, 1146 NULL, /* hardware address */ 1147 dst, 1148 &ipv6_all_ones, 1149 &ipv6_all_zeros, 1150 0, 1151 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1152 ND_REACHABLE, 1153 &nce); 1154 1155 switch (err) { 1156 case 0: 1157 /* 1158 * Cache entry with a proper resolver cookie was 1159 * created. 1160 */ 1161 NCE_REFRELE(nce); 1162 break; 1163 case EEXIST: 1164 err = 0; 1165 NCE_REFRELE(nce); 1166 break; 1167 default: 1168 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1169 break; 1170 } 1171 return (err); 1172 } 1173 1174 /* 1175 * For each interface an entry is added for the unspecified multicast group. 1176 * Here that mapping is used to form the multicast cache entry for a particular 1177 * multicast destination. 1178 */ 1179 static int 1180 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1181 { 1182 nce_t *mnce; /* Multicast mapping entry */ 1183 nce_t *nce; 1184 uchar_t *hw_addr = NULL; 1185 int err = 0; 1186 ip_stack_t *ipst = ill->ill_ipst; 1187 1188 ASSERT(ill != NULL); 1189 ASSERT(ill->ill_isv6); 1190 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1191 1192 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1193 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst)); 1194 nce = nce_lookup_addr(ill, dst, nce); 1195 if (nce != NULL) { 1196 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1197 NCE_REFRELE(nce); 1198 return (0); 1199 } 1200 /* No entry, now lookup for a mapping this should never fail */ 1201 mnce = nce_lookup_mapping(ill, dst); 1202 if (mnce == NULL) { 1203 /* Something broken for the interface. */ 1204 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1205 return (ESRCH); 1206 } 1207 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1208 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1209 /* 1210 * For IRE_IF_RESOLVER a hardware mapping can be 1211 * generated, for IRE_IF_NORESOLVER, resolution cookie 1212 * in the ill is copied in ndp_add_v6(). 1213 */ 1214 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1215 if (hw_addr == NULL) { 1216 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1217 NCE_REFRELE(mnce); 1218 return (ENOMEM); 1219 } 1220 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1221 } 1222 NCE_REFRELE(mnce); 1223 /* 1224 * IRE_IF_NORESOLVER type simply copies the resolution 1225 * cookie passed in. So no hw_addr is needed. 1226 */ 1227 err = ndp_add_v6(ill, 1228 hw_addr, 1229 dst, 1230 &ipv6_all_ones, 1231 &ipv6_all_zeros, 1232 0, 1233 NCE_F_NONUD, 1234 ND_REACHABLE, 1235 &nce); 1236 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1237 if (hw_addr != NULL) 1238 kmem_free(hw_addr, ill->ill_nd_lla_len); 1239 if (err != 0) { 1240 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1241 return (err); 1242 } 1243 NCE_REFRELE(nce); 1244 return (0); 1245 } 1246 1247 /* 1248 * Return the link layer address, and any flags of a nce. 1249 */ 1250 int 1251 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1252 { 1253 nce_t *nce; 1254 in6_addr_t *addr; 1255 sin6_t *sin6; 1256 dl_unitdata_req_t *dl; 1257 1258 ASSERT(ill != NULL && ill->ill_isv6); 1259 sin6 = (sin6_t *)&lnr->lnr_addr; 1260 addr = &sin6->sin6_addr; 1261 1262 nce = ndp_lookup_v6(ill, addr, B_FALSE); 1263 if (nce == NULL) 1264 return (ESRCH); 1265 /* If in INCOMPLETE state, no link layer address is available yet */ 1266 if (nce->nce_state == ND_INCOMPLETE) 1267 goto done; 1268 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1269 if (ill->ill_flags & ILLF_XRESOLV) 1270 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1271 else 1272 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1273 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1274 sizeof (lnr->lnr_hdw_addr)); 1275 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1276 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1277 if (nce->nce_flags & NCE_F_ISROUTER) 1278 lnr->lnr_flags = NDF_ISROUTER_ON; 1279 if (nce->nce_flags & NCE_F_ANYCAST) 1280 lnr->lnr_flags |= NDF_ANYCAST_ON; 1281 done: 1282 NCE_REFRELE(nce); 1283 return (0); 1284 } 1285 1286 /* 1287 * Send Enable/Disable multicast reqs to driver. 1288 */ 1289 int 1290 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1291 uint32_t hw_addr_offset, mblk_t *mp) 1292 { 1293 nce_t *nce; 1294 uchar_t *hw_addr; 1295 ip_stack_t *ipst = ill->ill_ipst; 1296 1297 ASSERT(ill != NULL && ill->ill_isv6); 1298 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1299 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1300 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1301 freemsg(mp); 1302 return (EINVAL); 1303 } 1304 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1305 nce = nce_lookup_mapping(ill, addr); 1306 if (nce == NULL) { 1307 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1308 freemsg(mp); 1309 return (ESRCH); 1310 } 1311 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1312 /* 1313 * Update dl_addr_length and dl_addr_offset for primitives that 1314 * have physical addresses as opposed to full saps 1315 */ 1316 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1317 case DL_ENABMULTI_REQ: 1318 /* Track the state if this is the first enabmulti */ 1319 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1320 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1321 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1322 break; 1323 case DL_DISABMULTI_REQ: 1324 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1325 break; 1326 default: 1327 NCE_REFRELE(nce); 1328 ip1dbg(("ndp_mcastreq: default\n")); 1329 return (EINVAL); 1330 } 1331 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1332 NCE_REFRELE(nce); 1333 ill_dlpi_send(ill, mp); 1334 return (0); 1335 } 1336 1337 /* 1338 * Send a neighbor solicitation. 1339 * Returns number of milliseconds after which we should either rexmit or abort. 1340 * Return of zero means we should abort. 1341 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1342 * 1343 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1344 * the packet. 1345 * NOTE: This routine does not consume mp. 1346 */ 1347 uint32_t 1348 nce_solicit(nce_t *nce, mblk_t *mp) 1349 { 1350 ill_t *ill; 1351 ill_t *src_ill; 1352 ip6_t *ip6h; 1353 in6_addr_t src; 1354 in6_addr_t dst; 1355 ipif_t *ipif; 1356 ip6i_t *ip6i; 1357 boolean_t dropped = B_FALSE; 1358 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 1359 1360 ASSERT(RW_READ_HELD(&ipst->ips_ill_g_lock)); 1361 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1362 ill = nce->nce_ill; 1363 ASSERT(ill != NULL); 1364 1365 if (nce->nce_rcnt == 0) { 1366 return (0); 1367 } 1368 1369 if (mp == NULL) { 1370 ASSERT(nce->nce_qd_mp != NULL); 1371 mp = nce->nce_qd_mp; 1372 } else { 1373 nce_queue_mp(nce, mp); 1374 } 1375 1376 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1377 if (mp->b_datap->db_type == M_CTL) 1378 mp = mp->b_cont; 1379 1380 ip6h = (ip6_t *)mp->b_rptr; 1381 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1382 /* 1383 * This message should have been pulled up already in 1384 * ip_wput_v6. We can't do pullups here because the message 1385 * could be from the nce_qd_mp which could have b_next/b_prev 1386 * non-NULL. 1387 */ 1388 ip6i = (ip6i_t *)ip6h; 1389 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1390 sizeof (ip6i_t) + IPV6_HDR_LEN); 1391 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1392 } 1393 src = ip6h->ip6_src; 1394 /* 1395 * If the src of outgoing packet is one of the assigned interface 1396 * addresses use it, otherwise we will pick the source address below. 1397 */ 1398 src_ill = ill; 1399 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1400 if (ill->ill_group != NULL) 1401 src_ill = ill->ill_group->illgrp_ill; 1402 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1403 for (ipif = src_ill->ill_ipif; ipif != NULL; 1404 ipif = ipif->ipif_next) { 1405 if (IN6_ARE_ADDR_EQUAL(&src, 1406 &ipif->ipif_v6lcl_addr)) { 1407 break; 1408 } 1409 } 1410 if (ipif != NULL) 1411 break; 1412 } 1413 /* 1414 * If no relevant ipif can be found, then it's not one of our 1415 * addresses. Reset to :: and let nce_xmit. If an ipif can be 1416 * found, but it's not yet done with DAD verification, then 1417 * just postpone this transmission until later. 1418 */ 1419 if (src_ill == NULL) 1420 src = ipv6_all_zeros; 1421 else if (!ipif->ipif_addr_ready) 1422 return (ill->ill_reachable_retrans_time); 1423 } 1424 dst = nce->nce_addr; 1425 /* 1426 * If source address is unspecified, nce_xmit will choose 1427 * one for us and initialize the hardware address also 1428 * appropriately. 1429 */ 1430 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1431 src_ill = NULL; 1432 nce->nce_rcnt--; 1433 mutex_exit(&nce->nce_lock); 1434 rw_exit(&ipst->ips_ill_g_lock); 1435 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1436 &dst, 0); 1437 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1438 mutex_enter(&nce->nce_lock); 1439 if (dropped) 1440 nce->nce_rcnt++; 1441 return (ill->ill_reachable_retrans_time); 1442 } 1443 1444 /* 1445 * Attempt to recover an address on an interface that's been marked as a 1446 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1447 * no easy way to just probe the address and have the right thing happen if 1448 * it's no longer in use. Instead, we just bring it up normally and allow the 1449 * regular interface start-up logic to probe for a remaining duplicate and take 1450 * us back down if necessary. 1451 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1452 * ip_ndp_excl. 1453 */ 1454 /* ARGSUSED */ 1455 static void 1456 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1457 { 1458 ill_t *ill = rq->q_ptr; 1459 ipif_t *ipif; 1460 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1461 1462 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1463 /* 1464 * We do not support recovery of proxy ARP'd interfaces, 1465 * because the system lacks a complete proxy ARP mechanism. 1466 */ 1467 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1468 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1469 continue; 1470 } 1471 1472 /* 1473 * If we have already recovered or if the interface is going 1474 * away, then ignore. 1475 */ 1476 mutex_enter(&ill->ill_lock); 1477 if (!(ipif->ipif_flags & IPIF_DUPLICATE) || 1478 (ipif->ipif_flags & (IPIF_MOVING | IPIF_CONDEMNED))) { 1479 mutex_exit(&ill->ill_lock); 1480 continue; 1481 } 1482 1483 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1484 ill->ill_ipif_dup_count--; 1485 mutex_exit(&ill->ill_lock); 1486 ipif->ipif_was_dup = B_TRUE; 1487 1488 if (ipif_ndp_up(ipif) != EINPROGRESS) 1489 (void) ipif_up_done_v6(ipif); 1490 } 1491 freeb(mp); 1492 } 1493 1494 /* 1495 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1496 * As long as someone else holds the address, the interface will stay down. 1497 * When that conflict goes away, the interface is brought back up. This is 1498 * done so that accidental shutdowns of addresses aren't made permanent. Your 1499 * server will recover from a failure. 1500 * 1501 * For DHCP and temporary addresses, recovery is not done in the kernel. 1502 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1503 * 1504 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1505 */ 1506 static void 1507 ipif6_dup_recovery(void *arg) 1508 { 1509 ipif_t *ipif = arg; 1510 1511 ipif->ipif_recovery_id = 0; 1512 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1513 return; 1514 1515 /* 1516 * No lock, because this is just an optimization. 1517 */ 1518 if (ipif->ipif_state_flags & (IPIF_MOVING | IPIF_CONDEMNED)) 1519 return; 1520 1521 /* If the link is down, we'll retry this later */ 1522 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1523 return; 1524 1525 ndp_do_recovery(ipif); 1526 } 1527 1528 /* 1529 * Perform interface recovery by forcing the duplicate interfaces up and 1530 * allowing the system to determine which ones should stay up. 1531 * 1532 * Called both by recovery timer expiry and link-up notification. 1533 */ 1534 void 1535 ndp_do_recovery(ipif_t *ipif) 1536 { 1537 ill_t *ill = ipif->ipif_ill; 1538 mblk_t *mp; 1539 ip_stack_t *ipst = ill->ill_ipst; 1540 1541 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1542 if (mp == NULL) { 1543 mutex_enter(&ill->ill_lock); 1544 if (ipif->ipif_recovery_id == 0 && 1545 !(ipif->ipif_state_flags & (IPIF_MOVING | 1546 IPIF_CONDEMNED))) { 1547 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1548 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1549 } 1550 mutex_exit(&ill->ill_lock); 1551 } else { 1552 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1553 sizeof (ipif->ipif_v6lcl_addr)); 1554 ill_refhold(ill); 1555 qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP, 1556 B_FALSE); 1557 } 1558 } 1559 1560 /* 1561 * Find the solicitation in the given message, and extract printable details 1562 * (MAC and IP addresses) from it. 1563 */ 1564 static nd_neighbor_solicit_t * 1565 ip_ndp_find_solicitation(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, char *hbuf, 1566 size_t hlen, char *sbuf, size_t slen, uchar_t **haddr) 1567 { 1568 nd_neighbor_solicit_t *ns; 1569 ip6_t *ip6h; 1570 uchar_t *addr; 1571 int alen; 1572 1573 alen = 0; 1574 ip6h = (ip6_t *)mp->b_rptr; 1575 if (dl_mp == NULL) { 1576 nd_opt_hdr_t *opt; 1577 int nslen; 1578 1579 /* 1580 * If it's from the fast-path, then it can't be a probe 1581 * message, and thus must include the source linkaddr option. 1582 * Extract that here. 1583 */ 1584 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1585 nslen = mp->b_wptr - (uchar_t *)ns; 1586 if ((nslen -= sizeof (*ns)) > 0) { 1587 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), nslen, 1588 ND_OPT_SOURCE_LINKADDR); 1589 if (opt != NULL && 1590 opt->nd_opt_len * 8 - sizeof (*opt) >= 1591 ill->ill_nd_lla_len) { 1592 addr = (uchar_t *)(opt + 1); 1593 alen = ill->ill_nd_lla_len; 1594 } 1595 } 1596 /* 1597 * We cheat a bit here for the sake of printing usable log 1598 * messages in the rare case where the reply we got was unicast 1599 * without a source linkaddr option, and the interface is in 1600 * fastpath mode. (Sigh.) 1601 */ 1602 if (alen == 0 && ill->ill_type == IFT_ETHER && 1603 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1604 struct ether_header *pether; 1605 1606 pether = (struct ether_header *)((char *)ip6h - 1607 sizeof (*pether)); 1608 addr = pether->ether_shost.ether_addr_octet; 1609 alen = ETHERADDRL; 1610 } 1611 } else { 1612 dl_unitdata_ind_t *dlu; 1613 1614 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1615 alen = dlu->dl_src_addr_length; 1616 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1617 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1618 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1619 if (ill->ill_sap_length < 0) { 1620 alen += ill->ill_sap_length; 1621 } else { 1622 addr += ill->ill_sap_length; 1623 alen -= ill->ill_sap_length; 1624 } 1625 } 1626 } 1627 if (alen > 0) { 1628 *haddr = addr; 1629 (void) mac_colon_addr(addr, alen, hbuf, hlen); 1630 } else { 1631 *haddr = NULL; 1632 (void) strcpy(hbuf, "?"); 1633 } 1634 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1635 (void) inet_ntop(AF_INET6, &ns->nd_ns_target, sbuf, slen); 1636 return (ns); 1637 } 1638 1639 /* 1640 * This is for exclusive changes due to NDP duplicate address detection 1641 * failure. 1642 */ 1643 /* ARGSUSED */ 1644 static void 1645 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1646 { 1647 ill_t *ill = rq->q_ptr; 1648 ipif_t *ipif; 1649 char ibuf[LIFNAMSIZ + 10]; /* 10 digits for logical i/f number */ 1650 char hbuf[MAC_STR_LEN]; 1651 char sbuf[INET6_ADDRSTRLEN]; 1652 nd_neighbor_solicit_t *ns; 1653 mblk_t *dl_mp = NULL; 1654 uchar_t *haddr; 1655 ip_stack_t *ipst = ill->ill_ipst; 1656 1657 if (DB_TYPE(mp) != M_DATA) { 1658 dl_mp = mp; 1659 mp = mp->b_cont; 1660 } 1661 ns = ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, sizeof (hbuf), sbuf, 1662 sizeof (sbuf), &haddr); 1663 if (haddr != NULL && 1664 bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) { 1665 /* 1666 * Ignore conflicts generated by misbehaving switches that just 1667 * reflect our own messages back to us. 1668 */ 1669 goto ignore_conflict; 1670 } 1671 1672 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1673 1674 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1675 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 1676 &ns->nd_ns_target)) { 1677 continue; 1678 } 1679 1680 /* If it's already marked, then don't do anything. */ 1681 if (ipif->ipif_flags & IPIF_DUPLICATE) 1682 continue; 1683 1684 /* 1685 * If this is a failure during duplicate recovery, then don't 1686 * complain. It may take a long time to recover. 1687 */ 1688 if (!ipif->ipif_was_dup) { 1689 ipif_get_name(ipif, ibuf, sizeof (ibuf)); 1690 cmn_err(CE_WARN, "%s has duplicate address %s (in " 1691 "use by %s); disabled", ibuf, sbuf, hbuf); 1692 } 1693 mutex_enter(&ill->ill_lock); 1694 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1695 ipif->ipif_flags |= IPIF_DUPLICATE; 1696 ill->ill_ipif_dup_count++; 1697 mutex_exit(&ill->ill_lock); 1698 (void) ipif_down(ipif, NULL, NULL); 1699 ipif_down_tail(ipif); 1700 mutex_enter(&ill->ill_lock); 1701 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1702 ill->ill_net_type == IRE_IF_RESOLVER && 1703 !(ipif->ipif_state_flags & (IPIF_MOVING | 1704 IPIF_CONDEMNED)) && 1705 ipst->ips_ip_dup_recovery > 0) { 1706 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1707 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1708 } 1709 mutex_exit(&ill->ill_lock); 1710 } 1711 ignore_conflict: 1712 if (dl_mp != NULL) 1713 freeb(dl_mp); 1714 freemsg(mp); 1715 } 1716 1717 /* 1718 * Handle failure by tearing down the ipifs with the specified address. Note 1719 * that tearing down the ipif also means deleting the nce through ipif_down, so 1720 * it's not possible to do recovery by just restarting the nce timer. Instead, 1721 * we start a timer on the ipif. 1722 */ 1723 static void 1724 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1725 { 1726 if ((mp = copymsg(mp)) != NULL) { 1727 if (dl_mp == NULL) 1728 dl_mp = mp; 1729 else if ((dl_mp = copyb(dl_mp)) != NULL) 1730 dl_mp->b_cont = mp; 1731 if (dl_mp == NULL) { 1732 freemsg(mp); 1733 } else { 1734 ill_refhold(ill); 1735 qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP, 1736 B_FALSE); 1737 } 1738 } 1739 ndp_delete(nce); 1740 } 1741 1742 /* 1743 * Handle a discovered conflict: some other system is advertising that it owns 1744 * one of our IP addresses. We need to defend ourselves, or just shut down the 1745 * interface. 1746 */ 1747 static void 1748 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1749 { 1750 ipif_t *ipif; 1751 uint32_t now; 1752 uint_t maxdefense; 1753 uint_t defs; 1754 ip_stack_t *ipst = ill->ill_ipst; 1755 1756 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1757 NULL, NULL, ipst); 1758 if (ipif == NULL) 1759 return; 1760 /* 1761 * First, figure out if this address is disposable. 1762 */ 1763 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1764 maxdefense = ipst->ips_ip_max_temp_defend; 1765 else 1766 maxdefense = ipst->ips_ip_max_defend; 1767 1768 /* 1769 * Now figure out how many times we've defended ourselves. Ignore 1770 * defenses that happened long in the past. 1771 */ 1772 now = gethrestime_sec(); 1773 mutex_enter(&nce->nce_lock); 1774 if ((defs = nce->nce_defense_count) > 0 && 1775 now - nce->nce_defense_time > ipst->ips_ip_defend_interval) { 1776 nce->nce_defense_count = defs = 0; 1777 } 1778 nce->nce_defense_count++; 1779 nce->nce_defense_time = now; 1780 mutex_exit(&nce->nce_lock); 1781 ipif_refrele(ipif); 1782 1783 /* 1784 * If we've defended ourselves too many times already, then give up and 1785 * tear down the interface(s) using this address. Otherwise, defend by 1786 * sending out an unsolicited Neighbor Advertisement. 1787 */ 1788 if (defs >= maxdefense) { 1789 ip_ndp_failure(ill, mp, dl_mp, nce); 1790 } else { 1791 char hbuf[MAC_STR_LEN]; 1792 char sbuf[INET6_ADDRSTRLEN]; 1793 uchar_t *haddr; 1794 1795 (void) ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, 1796 sizeof (hbuf), sbuf, sizeof (sbuf), &haddr); 1797 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1798 hbuf, sbuf, ill->ill_name); 1799 (void) nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, B_FALSE, 1800 &nce->nce_addr, &ipv6_all_hosts_mcast, 1801 nce_advert_flags(nce)); 1802 } 1803 } 1804 1805 static void 1806 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1807 { 1808 nd_neighbor_solicit_t *ns; 1809 uint32_t hlen = ill->ill_nd_lla_len; 1810 uchar_t *haddr = NULL; 1811 icmp6_t *icmp_nd; 1812 ip6_t *ip6h; 1813 nce_t *our_nce = NULL; 1814 in6_addr_t target; 1815 in6_addr_t src; 1816 int len; 1817 int flag = 0; 1818 nd_opt_hdr_t *opt = NULL; 1819 boolean_t bad_solicit = B_FALSE; 1820 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1821 1822 ip6h = (ip6_t *)mp->b_rptr; 1823 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1824 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1825 src = ip6h->ip6_src; 1826 ns = (nd_neighbor_solicit_t *)icmp_nd; 1827 target = ns->nd_ns_target; 1828 if (IN6_IS_ADDR_MULTICAST(&target)) { 1829 if (ip_debug > 2) { 1830 /* ip1dbg */ 1831 pr_addr_dbg("ndp_input_solicit: Target is" 1832 " multicast! %s\n", AF_INET6, &target); 1833 } 1834 bad_solicit = B_TRUE; 1835 goto done; 1836 } 1837 if (len > sizeof (nd_neighbor_solicit_t)) { 1838 /* Options present */ 1839 opt = (nd_opt_hdr_t *)&ns[1]; 1840 len -= sizeof (nd_neighbor_solicit_t); 1841 if (!ndp_verify_optlen(opt, len)) { 1842 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1843 bad_solicit = B_TRUE; 1844 goto done; 1845 } 1846 } 1847 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1848 /* Check to see if this is a valid DAD solicitation */ 1849 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1850 if (ip_debug > 2) { 1851 /* ip1dbg */ 1852 pr_addr_dbg("ndp_input_solicit: IPv6 " 1853 "Destination is not solicited node " 1854 "multicast %s\n", AF_INET6, 1855 &ip6h->ip6_dst); 1856 } 1857 bad_solicit = B_TRUE; 1858 goto done; 1859 } 1860 } 1861 1862 our_nce = ndp_lookup_v6(ill, &target, B_FALSE); 1863 /* 1864 * If this is a valid Solicitation, a permanent 1865 * entry should exist in the cache 1866 */ 1867 if (our_nce == NULL || 1868 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1869 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1870 "ifname=%s ", ill->ill_name)); 1871 if (ip_debug > 2) { 1872 /* ip1dbg */ 1873 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1874 } 1875 bad_solicit = B_TRUE; 1876 goto done; 1877 } 1878 1879 /* At this point we should have a verified NS per spec */ 1880 if (opt != NULL) { 1881 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1882 if (opt != NULL) { 1883 haddr = (uchar_t *)&opt[1]; 1884 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1885 hlen == 0) { 1886 ip1dbg(("ndp_input_advert: bad SLLA\n")); 1887 bad_solicit = B_TRUE; 1888 goto done; 1889 } 1890 } 1891 } 1892 1893 /* If sending directly to peer, set the unicast flag */ 1894 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 1895 flag |= NDP_UNICAST; 1896 1897 /* 1898 * Create/update the entry for the soliciting node. 1899 * or respond to outstanding queries, don't if 1900 * the source is unspecified address. 1901 */ 1902 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1903 int err; 1904 nce_t *nnce; 1905 1906 ASSERT(ill->ill_isv6); 1907 /* 1908 * Regular solicitations *must* include the Source Link-Layer 1909 * Address option. Ignore messages that do not. 1910 */ 1911 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1912 ip1dbg(("ndp_input_solicit: source link-layer address " 1913 "option missing with a specified source.\n")); 1914 bad_solicit = B_TRUE; 1915 goto done; 1916 } 1917 1918 /* 1919 * This is a regular solicitation. If we're still in the 1920 * process of verifying the address, then don't respond at all 1921 * and don't keep track of the sender. 1922 */ 1923 if (our_nce->nce_state == ND_PROBE) 1924 goto done; 1925 1926 /* 1927 * If the solicitation doesn't have sender hardware address 1928 * (legal for unicast solicitation), then process without 1929 * installing the return NCE. Either we already know it, or 1930 * we'll be forced to look it up when (and if) we reply to the 1931 * packet. 1932 */ 1933 if (haddr == NULL) 1934 goto no_source; 1935 1936 err = ndp_lookup_then_add_v6(ill, 1937 haddr, 1938 &src, /* Soliciting nodes address */ 1939 &ipv6_all_ones, 1940 &ipv6_all_zeros, 1941 0, 1942 0, 1943 ND_STALE, 1944 &nnce); 1945 switch (err) { 1946 case 0: 1947 /* done with this entry */ 1948 NCE_REFRELE(nnce); 1949 break; 1950 case EEXIST: 1951 /* 1952 * B_FALSE indicates this is not an 1953 * an advertisement. 1954 */ 1955 ndp_process(nnce, haddr, 0, B_FALSE); 1956 NCE_REFRELE(nnce); 1957 break; 1958 default: 1959 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1960 err)); 1961 goto done; 1962 } 1963 no_source: 1964 flag |= NDP_SOLICITED; 1965 } else { 1966 /* 1967 * No source link layer address option should be present in a 1968 * valid DAD request. 1969 */ 1970 if (haddr != NULL) { 1971 ip1dbg(("ndp_input_solicit: source link-layer address " 1972 "option present with an unspecified source.\n")); 1973 bad_solicit = B_TRUE; 1974 goto done; 1975 } 1976 if (our_nce->nce_state == ND_PROBE) { 1977 /* 1978 * Internally looped-back probes won't have DLPI 1979 * attached to them. External ones (which are sent by 1980 * multicast) always will. Just ignore our own 1981 * transmissions. 1982 */ 1983 if (dl_mp != NULL) { 1984 /* 1985 * If someone else is probing our address, then 1986 * we've crossed wires. Declare failure. 1987 */ 1988 ip_ndp_failure(ill, mp, dl_mp, our_nce); 1989 } 1990 goto done; 1991 } 1992 /* 1993 * This is a DAD probe. Multicast the advertisement to the 1994 * all-nodes address. 1995 */ 1996 src = ipv6_all_hosts_mcast; 1997 } 1998 flag |= nce_advert_flags(our_nce); 1999 /* Response to a solicitation */ 2000 (void) nce_xmit(ill, 2001 ND_NEIGHBOR_ADVERT, 2002 ill, /* ill to be used for extracting ill_nd_lla */ 2003 B_TRUE, /* use ill_nd_lla */ 2004 &target, /* Source and target of the advertisement pkt */ 2005 &src, /* IP Destination (source of original pkt) */ 2006 flag); 2007 done: 2008 if (bad_solicit) 2009 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2010 if (our_nce != NULL) 2011 NCE_REFRELE(our_nce); 2012 } 2013 2014 void 2015 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2016 { 2017 nd_neighbor_advert_t *na; 2018 uint32_t hlen = ill->ill_nd_lla_len; 2019 uchar_t *haddr = NULL; 2020 icmp6_t *icmp_nd; 2021 ip6_t *ip6h; 2022 nce_t *dst_nce = NULL; 2023 in6_addr_t target; 2024 nd_opt_hdr_t *opt = NULL; 2025 int len; 2026 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2027 ip_stack_t *ipst = ill->ill_ipst; 2028 2029 ip6h = (ip6_t *)mp->b_rptr; 2030 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2031 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2032 na = (nd_neighbor_advert_t *)icmp_nd; 2033 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2034 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2035 ip1dbg(("ndp_input_advert: Target is multicast but the " 2036 "solicited flag is not zero\n")); 2037 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2038 return; 2039 } 2040 target = na->nd_na_target; 2041 if (IN6_IS_ADDR_MULTICAST(&target)) { 2042 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2043 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2044 return; 2045 } 2046 if (len > sizeof (nd_neighbor_advert_t)) { 2047 opt = (nd_opt_hdr_t *)&na[1]; 2048 if (!ndp_verify_optlen(opt, 2049 len - sizeof (nd_neighbor_advert_t))) { 2050 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2051 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2052 return; 2053 } 2054 /* At this point we have a verified NA per spec */ 2055 len -= sizeof (nd_neighbor_advert_t); 2056 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2057 if (opt != NULL) { 2058 haddr = (uchar_t *)&opt[1]; 2059 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2060 hlen == 0) { 2061 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2062 BUMP_MIB(mib, 2063 ipv6IfIcmpInBadNeighborAdvertisements); 2064 return; 2065 } 2066 } 2067 } 2068 2069 /* 2070 * If this interface is part of the group look at all the 2071 * ills in the group. 2072 */ 2073 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2074 if (ill->ill_group != NULL) 2075 ill = ill->ill_group->illgrp_ill; 2076 2077 for (; ill != NULL; ill = ill->ill_group_next) { 2078 mutex_enter(&ill->ill_lock); 2079 if (!ILL_CAN_LOOKUP(ill)) { 2080 mutex_exit(&ill->ill_lock); 2081 continue; 2082 } 2083 ill_refhold_locked(ill); 2084 mutex_exit(&ill->ill_lock); 2085 dst_nce = ndp_lookup_v6(ill, &target, B_FALSE); 2086 /* We have to drop the lock since ndp_process calls put* */ 2087 rw_exit(&ipst->ips_ill_g_lock); 2088 if (dst_nce != NULL) { 2089 if ((dst_nce->nce_flags & NCE_F_PERMANENT) && 2090 dst_nce->nce_state == ND_PROBE) { 2091 /* 2092 * Someone else sent an advertisement for an 2093 * address that we're trying to configure. 2094 * Tear it down. Note that dl_mp might be NULL 2095 * if we're getting a unicast reply. This 2096 * isn't typically done (multicast is the norm 2097 * in response to a probe), but ip_ndp_failure 2098 * will handle the dl_mp == NULL case as well. 2099 */ 2100 ip_ndp_failure(ill, mp, dl_mp, dst_nce); 2101 } else if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2102 /* 2103 * Someone just announced one of our local 2104 * addresses. If it wasn't us, then this is a 2105 * conflict. Defend the address or shut it 2106 * down. 2107 */ 2108 if (dl_mp != NULL && 2109 (haddr == NULL || 2110 nce_cmp_ll_addr(dst_nce, haddr, 2111 ill->ill_nd_lla_len))) { 2112 ip_ndp_conflict(ill, mp, dl_mp, 2113 dst_nce); 2114 } 2115 } else { 2116 if (na->nd_na_flags_reserved & 2117 ND_NA_FLAG_ROUTER) { 2118 dst_nce->nce_flags |= NCE_F_ISROUTER; 2119 } 2120 /* B_TRUE indicates this an advertisement */ 2121 ndp_process(dst_nce, haddr, 2122 na->nd_na_flags_reserved, B_TRUE); 2123 } 2124 NCE_REFRELE(dst_nce); 2125 } 2126 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2127 ill_refrele(ill); 2128 } 2129 rw_exit(&ipst->ips_ill_g_lock); 2130 } 2131 2132 /* 2133 * Process NDP neighbor solicitation/advertisement messages. 2134 * The checksum has already checked o.k before reaching here. 2135 */ 2136 void 2137 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2138 { 2139 icmp6_t *icmp_nd; 2140 ip6_t *ip6h; 2141 int len; 2142 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2143 2144 2145 if (!pullupmsg(mp, -1)) { 2146 ip1dbg(("ndp_input: pullupmsg failed\n")); 2147 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2148 goto done; 2149 } 2150 ip6h = (ip6_t *)mp->b_rptr; 2151 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2152 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2153 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2154 goto done; 2155 } 2156 /* 2157 * NDP does not accept any extension headers between the 2158 * IP header and the ICMP header since e.g. a routing 2159 * header could be dangerous. 2160 * This assumes that any AH or ESP headers are removed 2161 * by ip prior to passing the packet to ndp_input. 2162 */ 2163 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2164 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2165 ip6h->ip6_nxt)); 2166 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2167 goto done; 2168 } 2169 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2170 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2171 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2172 if (icmp_nd->icmp6_code != 0) { 2173 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2174 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2175 goto done; 2176 } 2177 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2178 /* 2179 * Make sure packet length is large enough for either 2180 * a NS or a NA icmp packet. 2181 */ 2182 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2183 ip1dbg(("ndp_input: packet too short\n")); 2184 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2185 goto done; 2186 } 2187 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2188 ndp_input_solicit(ill, mp, dl_mp); 2189 } else { 2190 ndp_input_advert(ill, mp, dl_mp); 2191 } 2192 done: 2193 freemsg(mp); 2194 } 2195 2196 /* 2197 * nce_xmit is called to form and transmit a ND solicitation or 2198 * advertisement ICMP packet. 2199 * 2200 * If the source address is unspecified and this isn't a probe (used for 2201 * duplicate address detection), an appropriate source address and link layer 2202 * address will be chosen here. The link layer address option is included if 2203 * the source is specified (i.e., all non-probe packets), and omitted (per the 2204 * specification) otherwise. 2205 * 2206 * It returns B_FALSE only if it does a successful put() to the 2207 * corresponding ill's ill_wq otherwise returns B_TRUE. 2208 */ 2209 static boolean_t 2210 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 2211 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 2212 int flag) 2213 { 2214 uint32_t len; 2215 icmp6_t *icmp6; 2216 mblk_t *mp; 2217 ip6_t *ip6h; 2218 nd_opt_hdr_t *opt; 2219 uint_t plen; 2220 ip6i_t *ip6i; 2221 ipif_t *src_ipif = NULL; 2222 uint8_t *hw_addr; 2223 zoneid_t zoneid = GLOBAL_ZONEID; 2224 2225 /* 2226 * If we have a unspecified source(sender) address, select a 2227 * proper source address for the solicitation here itself so 2228 * that we can initialize the h/w address correctly. This is 2229 * needed for interface groups as source address can come from 2230 * the whole group and the h/w address initialized from ill will 2231 * be wrong if the source address comes from a different ill. 2232 * 2233 * If the sender is specified then we use this address in order 2234 * to lookup the zoneid before calling ip_output_v6(). This is to 2235 * enable unicast ND_NEIGHBOR_ADVERT packets to be routed correctly 2236 * by IP (we cannot guarantee that the global zone has an interface 2237 * route to the destination). 2238 * 2239 * Note that the NA never comes here with the unspecified source 2240 * address. The following asserts that whenever the source 2241 * address is specified, the haddr also should be specified. 2242 */ 2243 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 2244 2245 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2246 ASSERT(operation != ND_NEIGHBOR_ADVERT); 2247 /* 2248 * Pick a source address for this solicitation, but 2249 * restrict the selection to addresses assigned to the 2250 * output interface (or interface group). We do this 2251 * because the destination will create a neighbor cache 2252 * entry for the source address of this packet, so the 2253 * source address had better be a valid neighbor. 2254 */ 2255 src_ipif = ipif_select_source_v6(ill, target, RESTRICT_TO_ILL, 2256 IPV6_PREFER_SRC_DEFAULT, ALL_ZONES); 2257 if (src_ipif == NULL) { 2258 char buf[INET6_ADDRSTRLEN]; 2259 2260 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2261 inet_ntop(AF_INET6, (char *)target, buf, 2262 sizeof (buf)))); 2263 return (B_TRUE); 2264 } 2265 sender = &src_ipif->ipif_v6src_addr; 2266 hwaddr_ill = src_ipif->ipif_ill; 2267 } else if (!(IN6_IS_ADDR_UNSPECIFIED(sender))) { 2268 zoneid = ipif_lookup_addr_zoneid_v6(sender, ill, ill->ill_ipst); 2269 /* 2270 * It's possible for ipif_lookup_addr_zoneid_v6() to return 2271 * ALL_ZONES if it cannot find a matching ipif for the address 2272 * we are trying to use. In this case we err on the side of 2273 * trying to send the packet by defaulting to the GLOBAL_ZONEID. 2274 */ 2275 if (zoneid == ALL_ZONES) 2276 zoneid = GLOBAL_ZONEID; 2277 } 2278 2279 /* 2280 * Always make sure that the NS/NA packets don't get load 2281 * spread. This is needed so that the probe packets sent 2282 * by the in.mpathd daemon can really go out on the desired 2283 * interface. Probe packets are made to go out on a desired 2284 * interface by including a ip6i with ATTACH_IF flag. As these 2285 * packets indirectly end up sending/receiving NS/NA packets 2286 * (neighbor doing NUD), we have to make sure that NA 2287 * also go out on the same interface. 2288 */ 2289 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7) / 8; 2290 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2291 plen * 8; 2292 mp = allocb(len, BPRI_LO); 2293 if (mp == NULL) { 2294 if (src_ipif != NULL) 2295 ipif_refrele(src_ipif); 2296 return (B_TRUE); 2297 } 2298 bzero((char *)mp->b_rptr, len); 2299 mp->b_wptr = mp->b_rptr + len; 2300 2301 ip6i = (ip6i_t *)mp->b_rptr; 2302 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2303 ip6i->ip6i_nxt = IPPROTO_RAW; 2304 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2305 if (flag & NDP_PROBE) 2306 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2307 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2308 2309 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2310 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2311 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2312 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2313 ip6h->ip6_hops = IPV6_MAX_HOPS; 2314 ip6h->ip6_dst = *target; 2315 icmp6 = (icmp6_t *)&ip6h[1]; 2316 2317 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2318 sizeof (nd_neighbor_advert_t)); 2319 2320 if (operation == ND_NEIGHBOR_SOLICIT) { 2321 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2322 2323 if (!(flag & NDP_PROBE)) 2324 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2325 ip6h->ip6_src = *sender; 2326 ns->nd_ns_target = *target; 2327 if (!(flag & NDP_UNICAST)) { 2328 /* Form multicast address of the target */ 2329 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2330 ip6h->ip6_dst.s6_addr32[3] |= 2331 ns->nd_ns_target.s6_addr32[3]; 2332 } 2333 } else { 2334 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2335 2336 ASSERT(!(flag & NDP_PROBE)); 2337 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2338 ip6h->ip6_src = *sender; 2339 na->nd_na_target = *sender; 2340 if (flag & NDP_ISROUTER) 2341 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2342 if (flag & NDP_SOLICITED) 2343 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2344 if (flag & NDP_ORIDE) 2345 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2346 } 2347 2348 hw_addr = NULL; 2349 if (!(flag & NDP_PROBE)) { 2350 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2351 hwaddr_ill->ill_phys_addr; 2352 if (hw_addr != NULL) { 2353 /* Fill in link layer address and option len */ 2354 opt->nd_opt_len = (uint8_t)plen; 2355 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2356 } 2357 } 2358 if (hw_addr == NULL) { 2359 /* If there's no link layer address option, then strip it. */ 2360 len -= plen * 8; 2361 mp->b_wptr = mp->b_rptr + len; 2362 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2363 } 2364 2365 icmp6->icmp6_type = (uint8_t)operation; 2366 icmp6->icmp6_code = 0; 2367 /* 2368 * Prepare for checksum by putting icmp length in the icmp 2369 * checksum field. The checksum is calculated in ip_wput_v6. 2370 */ 2371 icmp6->icmp6_cksum = ip6h->ip6_plen; 2372 2373 if (src_ipif != NULL) 2374 ipif_refrele(src_ipif); 2375 2376 ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT); 2377 return (B_FALSE); 2378 } 2379 2380 /* 2381 * Make a link layer address (does not include the SAP) from an nce. 2382 * To form the link layer address, use the last four bytes of ipv6 2383 * address passed in and the fixed offset stored in nce. 2384 */ 2385 static void 2386 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2387 { 2388 uchar_t *mask, *to; 2389 ill_t *ill = nce->nce_ill; 2390 int len; 2391 2392 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2393 return; 2394 ASSERT(nce->nce_res_mp != NULL); 2395 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2396 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2397 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2398 ASSERT(addr != NULL); 2399 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2400 addrpos, ill->ill_nd_lla_len); 2401 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2402 IPV6_ADDR_LEN); 2403 mask = (uchar_t *)&nce->nce_extract_mask; 2404 mask += (IPV6_ADDR_LEN - len); 2405 addr += (IPV6_ADDR_LEN - len); 2406 to = addrpos + nce->nce_ll_extract_start; 2407 while (len-- > 0) 2408 *to++ |= *mask++ & *addr++; 2409 } 2410 2411 mblk_t * 2412 nce_udreq_alloc(ill_t *ill) 2413 { 2414 mblk_t *template_mp = NULL; 2415 dl_unitdata_req_t *dlur; 2416 int sap_length; 2417 2418 ASSERT(ill->ill_isv6); 2419 2420 sap_length = ill->ill_sap_length; 2421 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2422 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2423 if (template_mp == NULL) 2424 return (NULL); 2425 2426 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2427 dlur->dl_priority.dl_min = 0; 2428 dlur->dl_priority.dl_max = 0; 2429 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2430 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2431 2432 /* Copy in the SAP value. */ 2433 NCE_LL_SAP_COPY(ill, template_mp); 2434 2435 return (template_mp); 2436 } 2437 2438 /* 2439 * NDP retransmit timer. 2440 * This timer goes off when: 2441 * a. It is time to retransmit NS for resolver. 2442 * b. It is time to send reachability probes. 2443 */ 2444 void 2445 ndp_timer(void *arg) 2446 { 2447 nce_t *nce = arg; 2448 ill_t *ill = nce->nce_ill; 2449 uint32_t ms; 2450 char addrbuf[INET6_ADDRSTRLEN]; 2451 mblk_t *mp; 2452 boolean_t dropped = B_FALSE; 2453 ip_stack_t *ipst = ill->ill_ipst; 2454 2455 /* 2456 * The timer has to be cancelled by ndp_delete before doing the final 2457 * refrele. So the NCE is guaranteed to exist when the timer runs 2458 * until it clears the timeout_id. Before clearing the timeout_id 2459 * bump up the refcnt so that we can continue to use the nce 2460 */ 2461 ASSERT(nce != NULL); 2462 2463 /* 2464 * Grab the ill_g_lock now itself to avoid lock order problems. 2465 * nce_solicit needs ill_g_lock to be able to traverse ills 2466 */ 2467 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2468 mutex_enter(&nce->nce_lock); 2469 NCE_REFHOLD_LOCKED(nce); 2470 nce->nce_timeout_id = 0; 2471 2472 /* 2473 * Check the reachability state first. 2474 */ 2475 switch (nce->nce_state) { 2476 case ND_DELAY: 2477 rw_exit(&ipst->ips_ill_g_lock); 2478 nce->nce_state = ND_PROBE; 2479 mutex_exit(&nce->nce_lock); 2480 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2481 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2482 if (ip_debug > 3) { 2483 /* ip2dbg */ 2484 pr_addr_dbg("ndp_timer: state for %s changed " 2485 "to PROBE\n", AF_INET6, &nce->nce_addr); 2486 } 2487 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2488 NCE_REFRELE(nce); 2489 return; 2490 case ND_PROBE: 2491 /* must be retransmit timer */ 2492 rw_exit(&ipst->ips_ill_g_lock); 2493 nce->nce_pcnt--; 2494 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2495 nce->nce_pcnt >= -1); 2496 if (nce->nce_pcnt > 0) { 2497 /* 2498 * As per RFC2461, the nce gets deleted after 2499 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2500 * Note that the first unicast solicitation is sent 2501 * during the DELAY state. 2502 */ 2503 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2504 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2505 addrbuf, sizeof (addrbuf)))); 2506 mutex_exit(&nce->nce_lock); 2507 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, 2508 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, 2509 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2510 NDP_UNICAST); 2511 if (dropped) { 2512 mutex_enter(&nce->nce_lock); 2513 nce->nce_pcnt++; 2514 mutex_exit(&nce->nce_lock); 2515 } 2516 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2517 } else if (nce->nce_pcnt < 0) { 2518 /* No hope, delete the nce */ 2519 nce->nce_state = ND_UNREACHABLE; 2520 mutex_exit(&nce->nce_lock); 2521 if (ip_debug > 2) { 2522 /* ip1dbg */ 2523 pr_addr_dbg("ndp_timer: Delete IRE for" 2524 " dst %s\n", AF_INET6, &nce->nce_addr); 2525 } 2526 ndp_delete(nce); 2527 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2528 /* Wait RetransTimer, before deleting the entry */ 2529 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2530 nce->nce_pcnt, inet_ntop(AF_INET6, 2531 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2532 mutex_exit(&nce->nce_lock); 2533 /* Wait one interval before killing */ 2534 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2535 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2536 ipif_t *ipif; 2537 2538 /* 2539 * We're done probing, and we can now declare this 2540 * address to be usable. Let IP know that it's ok to 2541 * use. 2542 */ 2543 nce->nce_state = ND_REACHABLE; 2544 mutex_exit(&nce->nce_lock); 2545 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, 2546 ALL_ZONES, NULL, NULL, NULL, NULL, ipst); 2547 if (ipif != NULL) { 2548 if (ipif->ipif_was_dup) { 2549 char ibuf[LIFNAMSIZ + 10]; 2550 char sbuf[INET6_ADDRSTRLEN]; 2551 2552 ipif->ipif_was_dup = B_FALSE; 2553 (void) inet_ntop(AF_INET6, 2554 &ipif->ipif_v6lcl_addr, 2555 sbuf, sizeof (sbuf)); 2556 ipif_get_name(ipif, ibuf, 2557 sizeof (ibuf)); 2558 cmn_err(CE_NOTE, "recovered address " 2559 "%s on %s", sbuf, ibuf); 2560 } 2561 if ((ipif->ipif_flags & IPIF_UP) && 2562 !ipif->ipif_addr_ready) 2563 ipif_up_notify(ipif); 2564 ipif->ipif_addr_ready = 1; 2565 ipif_refrele(ipif); 2566 } 2567 /* Begin defending our new address */ 2568 nce->nce_unsolicit_count = 0; 2569 dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, 2570 B_FALSE, &nce->nce_addr, &ipv6_all_hosts_mcast, 2571 nce_advert_flags(nce)); 2572 if (dropped) { 2573 nce->nce_unsolicit_count = 1; 2574 NDP_RESTART_TIMER(nce, 2575 ipst->ips_ip_ndp_unsolicit_interval); 2576 } else if (ipst->ips_ip_ndp_defense_interval != 0) { 2577 NDP_RESTART_TIMER(nce, 2578 ipst->ips_ip_ndp_defense_interval); 2579 } 2580 } else { 2581 /* 2582 * This is an address we're probing to be our own, but 2583 * the ill is down. Wait until it comes back before 2584 * doing anything, but switch to reachable state so 2585 * that the restart will work. 2586 */ 2587 nce->nce_state = ND_REACHABLE; 2588 mutex_exit(&nce->nce_lock); 2589 } 2590 NCE_REFRELE(nce); 2591 return; 2592 case ND_INCOMPLETE: 2593 /* 2594 * Must be resolvers retransmit timer. 2595 */ 2596 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2597 ip6i_t *ip6i; 2598 ip6_t *ip6h; 2599 mblk_t *data_mp; 2600 2601 /* 2602 * Walk the list of packets queued, and see if there 2603 * are any multipathing probe packets. Such packets 2604 * are always queued at the head. Since this is a 2605 * retransmit timer firing, mark such packets as 2606 * delayed in ND resolution. This info will be used 2607 * in ip_wput_v6(). Multipathing probe packets will 2608 * always have an ip6i_t. Once we hit a packet without 2609 * it, we can break out of this loop. 2610 */ 2611 if (mp->b_datap->db_type == M_CTL) 2612 data_mp = mp->b_cont; 2613 else 2614 data_mp = mp; 2615 2616 ip6h = (ip6_t *)data_mp->b_rptr; 2617 if (ip6h->ip6_nxt != IPPROTO_RAW) 2618 break; 2619 2620 /* 2621 * This message should have been pulled up already in 2622 * ip_wput_v6. We can't do pullups here because the 2623 * b_next/b_prev is non-NULL. 2624 */ 2625 ip6i = (ip6i_t *)ip6h; 2626 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2627 sizeof (ip6i_t) + IPV6_HDR_LEN); 2628 2629 /* Mark this packet as delayed due to ND resolution */ 2630 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2631 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2632 } 2633 if (nce->nce_qd_mp != NULL) { 2634 ms = nce_solicit(nce, NULL); 2635 rw_exit(&ipst->ips_ill_g_lock); 2636 if (ms == 0) { 2637 if (nce->nce_state != ND_REACHABLE) { 2638 mutex_exit(&nce->nce_lock); 2639 nce_resolv_failed(nce); 2640 ndp_delete(nce); 2641 } else { 2642 mutex_exit(&nce->nce_lock); 2643 } 2644 } else { 2645 mutex_exit(&nce->nce_lock); 2646 NDP_RESTART_TIMER(nce, (clock_t)ms); 2647 } 2648 NCE_REFRELE(nce); 2649 return; 2650 } 2651 mutex_exit(&nce->nce_lock); 2652 rw_exit(&ipst->ips_ill_g_lock); 2653 NCE_REFRELE(nce); 2654 break; 2655 case ND_REACHABLE : 2656 rw_exit(&ipst->ips_ill_g_lock); 2657 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2658 nce->nce_unsolicit_count != 0) || 2659 ((nce->nce_flags & NCE_F_PERMANENT) && 2660 ipst->ips_ip_ndp_defense_interval != 0)) { 2661 if (nce->nce_unsolicit_count > 0) 2662 nce->nce_unsolicit_count--; 2663 mutex_exit(&nce->nce_lock); 2664 dropped = nce_xmit(ill, 2665 ND_NEIGHBOR_ADVERT, 2666 ill, /* ill to be used for hw addr */ 2667 B_FALSE, /* use ill_phys_addr */ 2668 &nce->nce_addr, 2669 &ipv6_all_hosts_mcast, 2670 nce_advert_flags(nce)); 2671 if (dropped) { 2672 mutex_enter(&nce->nce_lock); 2673 nce->nce_unsolicit_count++; 2674 mutex_exit(&nce->nce_lock); 2675 } 2676 if (nce->nce_unsolicit_count != 0) { 2677 NDP_RESTART_TIMER(nce, 2678 ipst->ips_ip_ndp_unsolicit_interval); 2679 } else { 2680 NDP_RESTART_TIMER(nce, 2681 ipst->ips_ip_ndp_defense_interval); 2682 } 2683 } else { 2684 mutex_exit(&nce->nce_lock); 2685 } 2686 NCE_REFRELE(nce); 2687 break; 2688 default: 2689 rw_exit(&ipst->ips_ill_g_lock); 2690 mutex_exit(&nce->nce_lock); 2691 NCE_REFRELE(nce); 2692 break; 2693 } 2694 } 2695 2696 /* 2697 * Set a link layer address from the ll_addr passed in. 2698 * Copy SAP from ill. 2699 */ 2700 static void 2701 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2702 { 2703 ill_t *ill = nce->nce_ill; 2704 uchar_t *woffset; 2705 2706 ASSERT(ll_addr != NULL); 2707 /* Always called before fast_path_probe */ 2708 ASSERT(nce->nce_fp_mp == NULL); 2709 if (ill->ill_sap_length != 0) { 2710 /* 2711 * Copy the SAP type specified in the 2712 * request into the xmit template. 2713 */ 2714 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2715 } 2716 if (ill->ill_phys_addr_length > 0) { 2717 /* 2718 * The bcopy() below used to be called for the physical address 2719 * length rather than the link layer address length. For 2720 * ethernet and many other media, the phys_addr and lla are 2721 * identical. 2722 * However, with xresolv interfaces being introduced, the 2723 * phys_addr and lla are no longer the same, and the physical 2724 * address may not have any useful meaning, so we use the lla 2725 * for IPv6 address resolution and destination addressing. 2726 * 2727 * For PPP or other interfaces with a zero length 2728 * physical address, don't do anything here. 2729 * The bcopy() with a zero phys_addr length was previously 2730 * a no-op for interfaces with a zero-length physical address. 2731 * Using the lla for them would change the way they operate. 2732 * Doing nothing in such cases preserves expected behavior. 2733 */ 2734 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2735 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2736 } 2737 } 2738 2739 static boolean_t 2740 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2741 { 2742 ill_t *ill = nce->nce_ill; 2743 uchar_t *ll_offset; 2744 2745 ASSERT(nce->nce_res_mp != NULL); 2746 if (ll_addr == NULL) 2747 return (B_FALSE); 2748 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2749 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2750 return (B_TRUE); 2751 return (B_FALSE); 2752 } 2753 2754 /* 2755 * Updates the link layer address or the reachability state of 2756 * a cache entry. Reset probe counter if needed. 2757 */ 2758 static void 2759 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2760 { 2761 ill_t *ill = nce->nce_ill; 2762 boolean_t need_stop_timer = B_FALSE; 2763 boolean_t need_fastpath_update = B_FALSE; 2764 2765 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2766 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2767 /* 2768 * If this interface does not do NUD, there is no point 2769 * in allowing an update to the cache entry. Although 2770 * we will respond to NS. 2771 * The only time we accept an update for a resolver when 2772 * NUD is turned off is when it has just been created. 2773 * Non-Resolvers will always be created as REACHABLE. 2774 */ 2775 if (new_state != ND_UNCHANGED) { 2776 if ((nce->nce_flags & NCE_F_NONUD) && 2777 (nce->nce_state != ND_INCOMPLETE)) 2778 return; 2779 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2780 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2781 need_stop_timer = B_TRUE; 2782 if (new_state == ND_REACHABLE) 2783 nce->nce_last = TICK_TO_MSEC(lbolt64); 2784 else { 2785 /* We force NUD in this case */ 2786 nce->nce_last = 0; 2787 } 2788 nce->nce_state = new_state; 2789 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2790 } 2791 /* 2792 * In case of fast path we need to free the the fastpath 2793 * M_DATA and do another probe. Otherwise we can just 2794 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2795 * whatever packets that happens to be transmitting at the time. 2796 */ 2797 if (new_ll_addr != NULL) { 2798 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2799 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2800 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2801 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2802 if (nce->nce_fp_mp != NULL) { 2803 freemsg(nce->nce_fp_mp); 2804 nce->nce_fp_mp = NULL; 2805 } 2806 need_fastpath_update = B_TRUE; 2807 } 2808 mutex_exit(&nce->nce_lock); 2809 if (need_stop_timer) { 2810 (void) untimeout(nce->nce_timeout_id); 2811 nce->nce_timeout_id = 0; 2812 } 2813 if (need_fastpath_update) 2814 nce_fastpath(nce); 2815 mutex_enter(&nce->nce_lock); 2816 } 2817 2818 void 2819 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2820 { 2821 uint_t count = 0; 2822 mblk_t **mpp; 2823 2824 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2825 2826 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2827 mpp = &(*mpp)->b_next) { 2828 if (++count > 2829 nce->nce_ill->ill_max_buf) { 2830 mblk_t *tmp = nce->nce_qd_mp->b_next; 2831 2832 nce->nce_qd_mp->b_next = NULL; 2833 nce->nce_qd_mp->b_prev = NULL; 2834 freemsg(nce->nce_qd_mp); 2835 nce->nce_qd_mp = tmp; 2836 } 2837 } 2838 /* put this on the list */ 2839 if (head_insert) { 2840 mp->b_next = nce->nce_qd_mp; 2841 nce->nce_qd_mp = mp; 2842 } else { 2843 *mpp = mp; 2844 } 2845 } 2846 2847 static void 2848 nce_queue_mp(nce_t *nce, mblk_t *mp) 2849 { 2850 boolean_t head_insert = B_FALSE; 2851 ip6_t *ip6h; 2852 ip6i_t *ip6i; 2853 mblk_t *data_mp; 2854 2855 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2856 2857 if (mp->b_datap->db_type == M_CTL) 2858 data_mp = mp->b_cont; 2859 else 2860 data_mp = mp; 2861 ip6h = (ip6_t *)data_mp->b_rptr; 2862 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2863 /* 2864 * This message should have been pulled up already in 2865 * ip_wput_v6. We can't do pullups here because the message 2866 * could be from the nce_qd_mp which could have b_next/b_prev 2867 * non-NULL. 2868 */ 2869 ip6i = (ip6i_t *)ip6h; 2870 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2871 sizeof (ip6i_t) + IPV6_HDR_LEN); 2872 /* 2873 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 2874 * This has 2 aspects mentioned below. 2875 * 1. Perform head insertion in the nce_qd_mp for these packets. 2876 * This ensures that next retransmit of ND solicitation 2877 * will use the interface specified by the probe packet, 2878 * for both NS and NA. This corresponds to the src address 2879 * in the IPv6 packet. If we insert at tail, we will be 2880 * depending on the packet at the head for successful 2881 * ND resolution. This is not reliable, because the interface 2882 * on which the NA arrives could be different from the interface 2883 * on which the NS was sent, and if the receiving interface is 2884 * failed, it will appear that the sending interface is also 2885 * failed, causing in.mpathd to misdiagnose this as link 2886 * failure. 2887 * 2. Drop the original packet, if the ND resolution did not 2888 * succeed in the first attempt. However we will create the 2889 * nce and the ire, as soon as the ND resolution succeeds. 2890 * We don't gain anything by queueing multiple probe packets 2891 * and sending them back-to-back once resolution succeeds. 2892 * It is sufficient to send just 1 packet after ND resolution 2893 * succeeds. Since mpathd is sending down probe packets at a 2894 * constant rate, we don't need to send the queued packet. We 2895 * need to queue it only for NDP resolution. The benefit of 2896 * dropping the probe packets that were delayed in ND 2897 * resolution, is that in.mpathd will not see inflated 2898 * RTT. If the ND resolution does not succeed within 2899 * in.mpathd's failure detection time, mpathd may detect 2900 * a failure, and it does not matter whether the packet 2901 * was queued or dropped. 2902 */ 2903 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2904 head_insert = B_TRUE; 2905 } 2906 2907 nce_queue_mp_common(nce, mp, head_insert); 2908 } 2909 2910 /* 2911 * Called when address resolution failed due to a timeout. 2912 * Send an ICMP unreachable in response to all queued packets. 2913 */ 2914 void 2915 nce_resolv_failed(nce_t *nce) 2916 { 2917 mblk_t *mp, *nxt_mp, *first_mp; 2918 char buf[INET6_ADDRSTRLEN]; 2919 ip6_t *ip6h; 2920 zoneid_t zoneid = GLOBAL_ZONEID; 2921 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 2922 2923 ip1dbg(("nce_resolv_failed: dst %s\n", 2924 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 2925 mutex_enter(&nce->nce_lock); 2926 mp = nce->nce_qd_mp; 2927 nce->nce_qd_mp = NULL; 2928 mutex_exit(&nce->nce_lock); 2929 while (mp != NULL) { 2930 nxt_mp = mp->b_next; 2931 mp->b_next = NULL; 2932 mp->b_prev = NULL; 2933 2934 first_mp = mp; 2935 if (mp->b_datap->db_type == M_CTL) { 2936 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 2937 ASSERT(io->ipsec_out_type == IPSEC_OUT); 2938 zoneid = io->ipsec_out_zoneid; 2939 ASSERT(zoneid != ALL_ZONES); 2940 mp = mp->b_cont; 2941 mp->b_next = NULL; 2942 mp->b_prev = NULL; 2943 } 2944 2945 ip6h = (ip6_t *)mp->b_rptr; 2946 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2947 ip6i_t *ip6i; 2948 /* 2949 * This message should have been pulled up already 2950 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 2951 * the header is pulled up. 2952 */ 2953 ip6i = (ip6i_t *)ip6h; 2954 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 2955 sizeof (ip6i_t) + IPV6_HDR_LEN); 2956 mp->b_rptr += sizeof (ip6i_t); 2957 } 2958 /* 2959 * Ignore failure since icmp_unreachable_v6 will silently 2960 * drop packets with an unspecified source address. 2961 */ 2962 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst); 2963 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 2964 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst); 2965 mp = nxt_mp; 2966 } 2967 } 2968 2969 /* 2970 * Called by SIOCSNDP* ioctl to add/change an nce entry 2971 * and the corresponding attributes. 2972 * Disallow states other than ND_REACHABLE or ND_STALE. 2973 */ 2974 int 2975 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 2976 { 2977 sin6_t *sin6; 2978 in6_addr_t *addr; 2979 nce_t *nce; 2980 int err; 2981 uint16_t new_flags = 0; 2982 uint16_t old_flags = 0; 2983 int inflags = lnr->lnr_flags; 2984 ip_stack_t *ipst = ill->ill_ipst; 2985 2986 ASSERT(ill->ill_isv6); 2987 if ((lnr->lnr_state_create != ND_REACHABLE) && 2988 (lnr->lnr_state_create != ND_STALE)) 2989 return (EINVAL); 2990 2991 sin6 = (sin6_t *)&lnr->lnr_addr; 2992 addr = &sin6->sin6_addr; 2993 2994 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 2995 /* We know it can not be mapping so just look in the hash table */ 2996 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 2997 nce = nce_lookup_addr(ill, addr, nce); 2998 if (nce != NULL) 2999 new_flags = nce->nce_flags; 3000 3001 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 3002 case NDF_ISROUTER_ON: 3003 new_flags |= NCE_F_ISROUTER; 3004 break; 3005 case NDF_ISROUTER_OFF: 3006 new_flags &= ~NCE_F_ISROUTER; 3007 break; 3008 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3009 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3010 if (nce != NULL) 3011 NCE_REFRELE(nce); 3012 return (EINVAL); 3013 } 3014 3015 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3016 case NDF_ANYCAST_ON: 3017 new_flags |= NCE_F_ANYCAST; 3018 break; 3019 case NDF_ANYCAST_OFF: 3020 new_flags &= ~NCE_F_ANYCAST; 3021 break; 3022 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3023 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3024 if (nce != NULL) 3025 NCE_REFRELE(nce); 3026 return (EINVAL); 3027 } 3028 3029 if (nce == NULL) { 3030 err = ndp_add_v6(ill, 3031 (uchar_t *)lnr->lnr_hdw_addr, 3032 addr, 3033 &ipv6_all_ones, 3034 &ipv6_all_zeros, 3035 0, 3036 new_flags, 3037 lnr->lnr_state_create, 3038 &nce); 3039 if (err != 0) { 3040 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3041 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3042 return (err); 3043 } 3044 } 3045 old_flags = nce->nce_flags; 3046 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3047 /* 3048 * Router turned to host, delete all ires. 3049 * XXX Just delete the entry, but we need to add too. 3050 */ 3051 nce->nce_flags &= ~NCE_F_ISROUTER; 3052 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3053 ndp_delete(nce); 3054 NCE_REFRELE(nce); 3055 return (0); 3056 } 3057 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3058 3059 mutex_enter(&nce->nce_lock); 3060 nce->nce_flags = new_flags; 3061 mutex_exit(&nce->nce_lock); 3062 /* 3063 * Note that we ignore the state at this point, which 3064 * should be either STALE or REACHABLE. Instead we let 3065 * the link layer address passed in to determine the state 3066 * much like incoming packets. 3067 */ 3068 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3069 NCE_REFRELE(nce); 3070 return (0); 3071 } 3072 3073 /* 3074 * If the device driver supports it, we make nce_fp_mp to have 3075 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3076 * The caller ensures there is hold on nce for this function. 3077 * Note that since ill_fastpath_probe() copies the mblk there is 3078 * no need for the hold beyond this function. 3079 */ 3080 void 3081 nce_fastpath(nce_t *nce) 3082 { 3083 ill_t *ill = nce->nce_ill; 3084 int res; 3085 3086 ASSERT(ill != NULL); 3087 ASSERT(nce->nce_state != ND_INITIAL && nce->nce_state != ND_INCOMPLETE); 3088 3089 if (nce->nce_fp_mp != NULL) { 3090 /* Already contains fastpath info */ 3091 return; 3092 } 3093 if (nce->nce_res_mp != NULL) { 3094 nce_fastpath_list_add(nce); 3095 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3096 /* 3097 * EAGAIN is an indication of a transient error 3098 * i.e. allocation failure etc. leave the nce in the list it 3099 * will be updated when another probe happens for another ire 3100 * if not it will be taken out of the list when the ire is 3101 * deleted. 3102 */ 3103 3104 if (res != 0 && res != EAGAIN) 3105 nce_fastpath_list_delete(nce); 3106 } 3107 } 3108 3109 /* 3110 * Drain the list of nce's waiting for fastpath response. 3111 */ 3112 void 3113 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3114 void *arg) 3115 { 3116 3117 nce_t *next_nce; 3118 nce_t *current_nce; 3119 nce_t *first_nce; 3120 nce_t *prev_nce = NULL; 3121 3122 mutex_enter(&ill->ill_lock); 3123 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3124 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3125 next_nce = current_nce->nce_fastpath; 3126 /* 3127 * Take it off the list if we're flushing, or if the callback 3128 * routine tells us to do so. Otherwise, leave the nce in the 3129 * fastpath list to handle any pending response from the lower 3130 * layer. We can't drain the list when the callback routine 3131 * comparison failed, because the response is asynchronous in 3132 * nature, and may not arrive in the same order as the list 3133 * insertion. 3134 */ 3135 if (func == NULL || func(current_nce, arg)) { 3136 current_nce->nce_fastpath = NULL; 3137 if (current_nce == first_nce) 3138 ill->ill_fastpath_list = first_nce = next_nce; 3139 else 3140 prev_nce->nce_fastpath = next_nce; 3141 } else { 3142 /* previous element that is still in the list */ 3143 prev_nce = current_nce; 3144 } 3145 current_nce = next_nce; 3146 } 3147 mutex_exit(&ill->ill_lock); 3148 } 3149 3150 /* 3151 * Add nce to the nce fastpath list. 3152 */ 3153 void 3154 nce_fastpath_list_add(nce_t *nce) 3155 { 3156 ill_t *ill; 3157 3158 ill = nce->nce_ill; 3159 3160 mutex_enter(&ill->ill_lock); 3161 mutex_enter(&nce->nce_lock); 3162 3163 /* 3164 * if nce has not been deleted and 3165 * is not already in the list add it. 3166 */ 3167 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3168 (nce->nce_fastpath == NULL)) { 3169 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3170 ill->ill_fastpath_list = nce; 3171 } 3172 3173 mutex_exit(&nce->nce_lock); 3174 mutex_exit(&ill->ill_lock); 3175 } 3176 3177 /* 3178 * remove nce from the nce fastpath list. 3179 */ 3180 void 3181 nce_fastpath_list_delete(nce_t *nce) 3182 { 3183 nce_t *nce_ptr; 3184 3185 ill_t *ill; 3186 3187 ill = nce->nce_ill; 3188 ASSERT(ill != NULL); 3189 3190 mutex_enter(&ill->ill_lock); 3191 if (nce->nce_fastpath == NULL) 3192 goto done; 3193 3194 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3195 3196 if (ill->ill_fastpath_list == nce) { 3197 ill->ill_fastpath_list = nce->nce_fastpath; 3198 } else { 3199 nce_ptr = ill->ill_fastpath_list; 3200 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3201 if (nce_ptr->nce_fastpath == nce) { 3202 nce_ptr->nce_fastpath = nce->nce_fastpath; 3203 break; 3204 } 3205 nce_ptr = nce_ptr->nce_fastpath; 3206 } 3207 } 3208 3209 nce->nce_fastpath = NULL; 3210 done: 3211 mutex_exit(&ill->ill_lock); 3212 } 3213 3214 /* 3215 * Update all NCE's that are not in fastpath mode and 3216 * have an nce_fp_mp that matches mp. mp->b_cont contains 3217 * the fastpath header. 3218 * 3219 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3220 */ 3221 boolean_t 3222 ndp_fastpath_update(nce_t *nce, void *arg) 3223 { 3224 mblk_t *mp, *fp_mp; 3225 uchar_t *mp_rptr, *ud_mp_rptr; 3226 mblk_t *ud_mp = nce->nce_res_mp; 3227 ptrdiff_t cmplen; 3228 3229 if (nce->nce_flags & NCE_F_MAPPING) 3230 return (B_TRUE); 3231 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3232 return (B_TRUE); 3233 3234 ip2dbg(("ndp_fastpath_update: trying\n")); 3235 mp = (mblk_t *)arg; 3236 mp_rptr = mp->b_rptr; 3237 cmplen = mp->b_wptr - mp_rptr; 3238 ASSERT(cmplen >= 0); 3239 ud_mp_rptr = ud_mp->b_rptr; 3240 /* 3241 * The nce is locked here to prevent any other threads 3242 * from accessing and changing nce_res_mp when the IPv6 address 3243 * becomes resolved to an lla while we're in the middle 3244 * of looking at and comparing the hardware address (lla). 3245 * It is also locked to prevent multiple threads in nce_fastpath_update 3246 * from examining nce_res_mp atthe same time. 3247 */ 3248 mutex_enter(&nce->nce_lock); 3249 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3250 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3251 mutex_exit(&nce->nce_lock); 3252 /* 3253 * Don't take the ire off the fastpath list yet, 3254 * since the response may come later. 3255 */ 3256 return (B_FALSE); 3257 } 3258 /* Matched - install mp as the fastpath mp */ 3259 ip1dbg(("ndp_fastpath_update: match\n")); 3260 fp_mp = dupb(mp->b_cont); 3261 if (fp_mp != NULL) { 3262 nce->nce_fp_mp = fp_mp; 3263 } 3264 mutex_exit(&nce->nce_lock); 3265 return (B_TRUE); 3266 } 3267 3268 /* 3269 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3270 * driver. Note that it assumes IP is exclusive... 3271 */ 3272 /* ARGSUSED */ 3273 void 3274 ndp_fastpath_flush(nce_t *nce, char *arg) 3275 { 3276 if (nce->nce_flags & NCE_F_MAPPING) 3277 return; 3278 /* No fastpath info? */ 3279 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3280 return; 3281 3282 if (nce->nce_ipversion == IPV4_VERSION && 3283 nce->nce_flags & NCE_F_BCAST) { 3284 /* 3285 * IPv4 BROADCAST entries: 3286 * We can't delete the nce since it is difficult to 3287 * recreate these without going through the 3288 * ipif down/up dance. 3289 * 3290 * All access to nce->nce_fp_mp in the case of these 3291 * is protected by nce_lock. 3292 */ 3293 mutex_enter(&nce->nce_lock); 3294 if (nce->nce_fp_mp != NULL) { 3295 freeb(nce->nce_fp_mp); 3296 nce->nce_fp_mp = NULL; 3297 mutex_exit(&nce->nce_lock); 3298 nce_fastpath(nce); 3299 } else { 3300 mutex_exit(&nce->nce_lock); 3301 } 3302 } else { 3303 /* Just delete the NCE... */ 3304 ndp_delete(nce); 3305 } 3306 } 3307 3308 /* 3309 * Return a pointer to a given option in the packet. 3310 * Assumes that option part of the packet have already been validated. 3311 */ 3312 nd_opt_hdr_t * 3313 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3314 { 3315 while (optlen > 0) { 3316 if (opt->nd_opt_type == opt_type) 3317 return (opt); 3318 optlen -= 8 * opt->nd_opt_len; 3319 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3320 } 3321 return (NULL); 3322 } 3323 3324 /* 3325 * Verify all option lengths present are > 0, also check to see 3326 * if the option lengths and packet length are consistent. 3327 */ 3328 boolean_t 3329 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3330 { 3331 ASSERT(opt != NULL); 3332 while (optlen > 0) { 3333 if (opt->nd_opt_len == 0) 3334 return (B_FALSE); 3335 optlen -= 8 * opt->nd_opt_len; 3336 if (optlen < 0) 3337 return (B_FALSE); 3338 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3339 } 3340 return (B_TRUE); 3341 } 3342 3343 /* 3344 * ndp_walk function. 3345 * Free a fraction of the NCE cache entries. 3346 * A fraction of zero means to not free any in that category. 3347 */ 3348 void 3349 ndp_cache_reclaim(nce_t *nce, char *arg) 3350 { 3351 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3352 uint_t rand; 3353 3354 if (nce->nce_flags & NCE_F_PERMANENT) 3355 return; 3356 3357 rand = (uint_t)lbolt + 3358 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3359 if (ncr->ncr_host != 0 && 3360 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3361 ndp_delete(nce); 3362 return; 3363 } 3364 } 3365 3366 /* 3367 * ndp_walk function. 3368 * Count the number of NCEs that can be deleted. 3369 * These would be hosts but not routers. 3370 */ 3371 void 3372 ndp_cache_count(nce_t *nce, char *arg) 3373 { 3374 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3375 3376 if (nce->nce_flags & NCE_F_PERMANENT) 3377 return; 3378 3379 ncc->ncc_total++; 3380 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3381 ncc->ncc_host++; 3382 } 3383 3384 #ifdef DEBUG 3385 void 3386 nce_trace_ref(nce_t *nce) 3387 { 3388 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3389 3390 if (nce->nce_trace_disable) 3391 return; 3392 3393 if (!th_trace_ref(nce, nce->nce_ill->ill_ipst)) { 3394 nce->nce_trace_disable = B_TRUE; 3395 nce_trace_cleanup(nce); 3396 } 3397 } 3398 3399 void 3400 nce_untrace_ref(nce_t *nce) 3401 { 3402 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3403 3404 if (!nce->nce_trace_disable) 3405 th_trace_unref(nce); 3406 } 3407 3408 static void 3409 nce_trace_cleanup(const nce_t *nce) 3410 { 3411 th_trace_cleanup(nce, nce->nce_trace_disable); 3412 } 3413 #endif 3414 3415 /* 3416 * Called when address resolution fails due to a timeout. 3417 * Send an ICMP unreachable in response to all queued packets. 3418 */ 3419 void 3420 arp_resolv_failed(nce_t *nce) 3421 { 3422 mblk_t *mp, *nxt_mp, *first_mp; 3423 char buf[INET6_ADDRSTRLEN]; 3424 zoneid_t zoneid = GLOBAL_ZONEID; 3425 struct in_addr ipv4addr; 3426 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3427 3428 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3429 ip3dbg(("arp_resolv_failed: dst %s\n", 3430 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3431 mutex_enter(&nce->nce_lock); 3432 mp = nce->nce_qd_mp; 3433 nce->nce_qd_mp = NULL; 3434 mutex_exit(&nce->nce_lock); 3435 3436 while (mp != NULL) { 3437 nxt_mp = mp->b_next; 3438 mp->b_next = NULL; 3439 mp->b_prev = NULL; 3440 3441 first_mp = mp; 3442 /* 3443 * Send icmp unreachable messages 3444 * to the hosts. 3445 */ 3446 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst); 3447 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3448 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3449 ICMP_HOST_UNREACHABLE, zoneid, ipst); 3450 mp = nxt_mp; 3451 } 3452 } 3453 3454 int 3455 ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3456 nce_t **newnce, nce_t *src_nce) 3457 { 3458 int err; 3459 nce_t *nce; 3460 in6_addr_t addr6; 3461 ip_stack_t *ipst = ill->ill_ipst; 3462 3463 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3464 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3465 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3466 nce = nce_lookup_addr(ill, &addr6, nce); 3467 if (nce == NULL) { 3468 err = ndp_add_v4(ill, addr, flags, newnce, src_nce); 3469 } else { 3470 *newnce = nce; 3471 err = EEXIST; 3472 } 3473 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3474 return (err); 3475 } 3476 3477 /* 3478 * NDP Cache Entry creation routine for IPv4. 3479 * Mapped entries are handled in arp. 3480 * This routine must always be called with ndp4->ndp_g_lock held. 3481 * Prior to return, nce_refcnt is incremented. 3482 */ 3483 static int 3484 ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3485 nce_t **newnce, nce_t *src_nce) 3486 { 3487 static nce_t nce_nil; 3488 nce_t *nce; 3489 mblk_t *mp; 3490 mblk_t *template = NULL; 3491 nce_t **ncep; 3492 ip_stack_t *ipst = ill->ill_ipst; 3493 uint16_t state = ND_INITIAL; 3494 int err; 3495 3496 ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock)); 3497 ASSERT(!ill->ill_isv6); 3498 ASSERT((flags & NCE_F_MAPPING) == 0); 3499 3500 if (ill->ill_resolver_mp == NULL) 3501 return (EINVAL); 3502 /* 3503 * Allocate the mblk to hold the nce. 3504 */ 3505 mp = allocb(sizeof (nce_t), BPRI_MED); 3506 if (mp == NULL) 3507 return (ENOMEM); 3508 3509 nce = (nce_t *)mp->b_rptr; 3510 mp->b_wptr = (uchar_t *)&nce[1]; 3511 *nce = nce_nil; 3512 nce->nce_ill = ill; 3513 nce->nce_ipversion = IPV4_VERSION; 3514 nce->nce_flags = flags; 3515 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3516 nce->nce_rcnt = ill->ill_xmit_count; 3517 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3518 nce->nce_mask = ipv6_all_ones; 3519 nce->nce_extract_mask = ipv6_all_zeros; 3520 nce->nce_ll_extract_start = 0; 3521 nce->nce_qd_mp = NULL; 3522 nce->nce_mp = mp; 3523 /* This one is for nce getting created */ 3524 nce->nce_refcnt = 1; 3525 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3526 ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3527 3528 nce->nce_trace_disable = B_FALSE; 3529 3530 if (src_nce != NULL) { 3531 /* 3532 * src_nce has been provided by the caller. The only 3533 * caller who provides a non-null, non-broadcast 3534 * src_nce is from ip_newroute() which must pass in 3535 * a ND_REACHABLE src_nce (this condition is verified 3536 * via an ASSERT for the save_ire->ire_nce in ip_newroute()) 3537 */ 3538 mutex_enter(&src_nce->nce_lock); 3539 state = src_nce->nce_state; 3540 if ((src_nce->nce_flags & NCE_F_CONDEMNED) || 3541 (ipst->ips_ndp4->ndp_g_hw_change > 0)) { 3542 /* 3543 * src_nce has been deleted, or 3544 * ip_arp_news is in the middle of 3545 * flushing entries in the the nce. 3546 * Fail the add, since we don't know 3547 * if it is safe to copy the contents of 3548 * src_nce 3549 */ 3550 DTRACE_PROBE2(nce__bad__src__nce, 3551 nce_t *, src_nce, ill_t *, ill); 3552 mutex_exit(&src_nce->nce_lock); 3553 err = EINVAL; 3554 goto err_ret; 3555 } 3556 template = copyb(src_nce->nce_res_mp); 3557 mutex_exit(&src_nce->nce_lock); 3558 if (template == NULL) { 3559 err = ENOMEM; 3560 goto err_ret; 3561 } 3562 } else if (flags & NCE_F_BCAST) { 3563 /* 3564 * broadcast nce. 3565 */ 3566 template = copyb(ill->ill_bcast_mp); 3567 if (template == NULL) { 3568 err = ENOMEM; 3569 goto err_ret; 3570 } 3571 state = ND_REACHABLE; 3572 } else if (ill->ill_net_type == IRE_IF_NORESOLVER) { 3573 /* 3574 * NORESOLVER entries are always created in the REACHABLE 3575 * state. We create a nce_res_mp with the IP nexthop address 3576 * in the destination address in the DLPI hdr if the 3577 * physical length is exactly 4 bytes. 3578 * 3579 * XXX not clear which drivers set ill_phys_addr_length to 3580 * IP_ADDR_LEN. 3581 */ 3582 if (ill->ill_phys_addr_length == IP_ADDR_LEN) { 3583 template = ill_dlur_gen((uchar_t *)addr, 3584 ill->ill_phys_addr_length, 3585 ill->ill_sap, ill->ill_sap_length); 3586 } else { 3587 template = copyb(ill->ill_resolver_mp); 3588 } 3589 if (template == NULL) { 3590 err = ENOMEM; 3591 goto err_ret; 3592 } 3593 state = ND_REACHABLE; 3594 } 3595 nce->nce_fp_mp = NULL; 3596 nce->nce_res_mp = template; 3597 nce->nce_state = state; 3598 if (state == ND_REACHABLE) { 3599 nce->nce_last = TICK_TO_MSEC(lbolt64); 3600 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3601 } else { 3602 nce->nce_last = 0; 3603 if (state == ND_INITIAL) 3604 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3605 } 3606 3607 ASSERT((nce->nce_res_mp == NULL && nce->nce_state == ND_INITIAL) || 3608 (nce->nce_res_mp != NULL && nce->nce_state == ND_REACHABLE)); 3609 /* 3610 * Atomically ensure that the ill is not CONDEMNED, before 3611 * adding the NCE. 3612 */ 3613 mutex_enter(&ill->ill_lock); 3614 if (ill->ill_state_flags & ILL_CONDEMNED) { 3615 mutex_exit(&ill->ill_lock); 3616 err = EINVAL; 3617 goto err_ret; 3618 } 3619 if ((nce->nce_next = *ncep) != NULL) 3620 nce->nce_next->nce_ptpn = &nce->nce_next; 3621 *ncep = nce; 3622 nce->nce_ptpn = ncep; 3623 *newnce = nce; 3624 /* This one is for nce being used by an active thread */ 3625 NCE_REFHOLD(*newnce); 3626 3627 /* Bump up the number of nce's referencing this ill */ 3628 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 3629 (char *), "nce", (void *), nce); 3630 ill->ill_nce_cnt++; 3631 mutex_exit(&ill->ill_lock); 3632 DTRACE_PROBE1(ndp__add__v4, nce_t *, nce); 3633 return (0); 3634 err_ret: 3635 freeb(mp); 3636 freemsg(template); 3637 return (err); 3638 } 3639 3640 /* 3641 * ndp_walk routine to delete all entries that have a given destination or 3642 * gateway address and cached link layer (MAC) address. This is used when ARP 3643 * informs us that a network-to-link-layer mapping may have changed. 3644 */ 3645 void 3646 nce_delete_hw_changed(nce_t *nce, void *arg) 3647 { 3648 nce_hw_map_t *hwm = arg; 3649 mblk_t *mp; 3650 dl_unitdata_req_t *dlu; 3651 uchar_t *macaddr; 3652 ill_t *ill; 3653 int saplen; 3654 ipaddr_t nce_addr; 3655 3656 if (nce->nce_state != ND_REACHABLE) 3657 return; 3658 3659 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3660 if (nce_addr != hwm->hwm_addr) 3661 return; 3662 3663 mutex_enter(&nce->nce_lock); 3664 if ((mp = nce->nce_res_mp) == NULL) { 3665 mutex_exit(&nce->nce_lock); 3666 return; 3667 } 3668 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3669 macaddr = (uchar_t *)(dlu + 1); 3670 ill = nce->nce_ill; 3671 if ((saplen = ill->ill_sap_length) > 0) 3672 macaddr += saplen; 3673 else 3674 saplen = -saplen; 3675 3676 /* 3677 * If the hardware address is unchanged, then leave this one alone. 3678 * Note that saplen == abs(saplen) now. 3679 */ 3680 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 3681 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 3682 mutex_exit(&nce->nce_lock); 3683 return; 3684 } 3685 mutex_exit(&nce->nce_lock); 3686 3687 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 3688 ndp_delete(nce); 3689 } 3690 3691 /* 3692 * This function verifies whether a given IPv4 address is potentially known to 3693 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 3694 * so that it can continue to look for hardware changes on that address. 3695 */ 3696 boolean_t 3697 ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns) 3698 { 3699 nce_t *nce; 3700 struct in_addr nceaddr; 3701 ip_stack_t *ipst = ns->netstack_ip; 3702 3703 if (addr == INADDR_ANY) 3704 return (B_FALSE); 3705 3706 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3707 nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr); 3708 for (; nce != NULL; nce = nce->nce_next) { 3709 /* Note that only v4 mapped entries are in the table. */ 3710 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 3711 if (addr == nceaddr.s_addr && 3712 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 3713 /* Single flag check; no lock needed */ 3714 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 3715 break; 3716 } 3717 } 3718 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3719 return (nce != NULL); 3720 } 3721