1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/sysmacros.h> 33 #include <sys/errno.h> 34 #include <sys/dlpi.h> 35 #include <sys/socket.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/vtrace.h> 41 #include <sys/kmem.h> 42 #include <sys/zone.h> 43 #include <sys/ethernet.h> 44 #include <sys/sdt.h> 45 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/if_dl.h> 49 #include <net/route.h> 50 #include <netinet/in.h> 51 #include <netinet/ip6.h> 52 #include <netinet/icmp6.h> 53 54 #include <inet/common.h> 55 #include <inet/mi.h> 56 #include <inet/mib2.h> 57 #include <inet/nd.h> 58 #include <inet/ip.h> 59 #include <inet/ip_impl.h> 60 #include <inet/ipclassifier.h> 61 #include <inet/ip_if.h> 62 #include <inet/ip_ire.h> 63 #include <inet/ip_rts.h> 64 #include <inet/ip6.h> 65 #include <inet/ip_ndp.h> 66 #include <inet/ipsec_impl.h> 67 #include <inet/ipsec_info.h> 68 #include <inet/sctp_ip.h> 69 70 /* 71 * Function names with nce_ prefix are static while function 72 * names with ndp_ prefix are used by rest of the IP. 73 * 74 * Lock ordering: 75 * 76 * ndp_g_lock -> ill_lock -> nce_lock 77 * 78 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 79 * nce_next. Nce_lock protects the contents of the NCE (particularly 80 * nce_refcnt). 81 */ 82 83 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 84 uint32_t ll_addr_len); 85 static void nce_ire_delete(nce_t *nce); 86 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 87 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 88 static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *, nce_t *); 89 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 90 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 91 uchar_t *addr); 92 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 93 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 94 static void nce_report1(nce_t *nce, uchar_t *mp_arg); 95 static mblk_t *nce_udreq_alloc(ill_t *ill); 96 static void nce_update(nce_t *nce, uint16_t new_state, 97 uchar_t *new_ll_addr); 98 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 99 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 100 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 101 const in6_addr_t *target, int flag); 102 extern void th_trace_rrecord(th_trace_t *); 103 static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t, 104 nce_t **, nce_t *); 105 106 /* 107 * We track the time of creation of the nce in the nce_init_time field 108 * of IPv4 nce_t entries. If an nce is stuck in the ND_INITIAL state for 109 * more than NCE_STUCK_TIMEOUT milliseconds, trigger the nce-stuck dtrace 110 * probe to assist in debugging. This probe will be fired from 111 * nce_thread_exit() for debug kernels, and from nce_report1() when 112 * 'ndd -get /dev/ip ip_ndp_cache_report' is invoked on both debug and 113 * non-debug kernels. 114 */ 115 #define NCE_STUCK_TIMEOUT 120000 116 117 #ifdef NCE_DEBUG 118 void nce_trace_inactive(nce_t *); 119 #endif 120 121 #define NCE_HASH_PTR_V4(ipst, addr) \ 122 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 123 124 #define NCE_HASH_PTR_V6(ipst, addr) \ 125 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ 126 NCE_TABLE_SIZE)])) 127 128 /* 129 * Compute default flags to use for an advertisement of this nce's address. 130 */ 131 static int 132 nce_advert_flags(const nce_t *nce) 133 { 134 int flag = 0; 135 136 if (nce->nce_flags & NCE_F_ISROUTER) 137 flag |= NDP_ISROUTER; 138 return (flag); 139 } 140 141 /* Non-tunable probe interval, based on link capabilities */ 142 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 143 144 /* 145 * NDP Cache Entry creation routine. 146 * Mapped entries will never do NUD . 147 * This routine must always be called with ndp6->ndp_g_lock held. 148 * Prior to return, nce_refcnt is incremented. 149 */ 150 int 151 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 152 const in6_addr_t *mask, const in6_addr_t *extract_mask, 153 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 154 nce_t **newnce) 155 { 156 static nce_t nce_nil; 157 nce_t *nce; 158 mblk_t *mp; 159 mblk_t *template; 160 nce_t **ncep; 161 int err; 162 boolean_t dropped = B_FALSE; 163 ip_stack_t *ipst = ill->ill_ipst; 164 165 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 166 ASSERT(ill != NULL && ill->ill_isv6); 167 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 168 ip0dbg(("ndp_add_v6: no addr\n")); 169 return (EINVAL); 170 } 171 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 172 ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags)); 173 return (EINVAL); 174 } 175 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 176 (flags & NCE_F_MAPPING)) { 177 ip0dbg(("ndp_add_v6: extract mask zero for mapping")); 178 return (EINVAL); 179 } 180 /* 181 * Allocate the mblk to hold the nce. 182 * 183 * XXX This can come out of a separate cache - nce_cache. 184 * We don't need the mp anymore as there are no more 185 * "qwriter"s 186 */ 187 mp = allocb(sizeof (nce_t), BPRI_MED); 188 if (mp == NULL) 189 return (ENOMEM); 190 191 nce = (nce_t *)mp->b_rptr; 192 mp->b_wptr = (uchar_t *)&nce[1]; 193 *nce = nce_nil; 194 195 /* 196 * This one holds link layer address 197 */ 198 if (ill->ill_net_type == IRE_IF_RESOLVER) { 199 template = nce_udreq_alloc(ill); 200 } else { 201 if (ill->ill_resolver_mp == NULL) { 202 freeb(mp); 203 return (EINVAL); 204 } 205 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 206 template = copyb(ill->ill_resolver_mp); 207 } 208 if (template == NULL) { 209 freeb(mp); 210 return (ENOMEM); 211 } 212 nce->nce_ill = ill; 213 nce->nce_ipversion = IPV6_VERSION; 214 nce->nce_flags = flags; 215 nce->nce_state = state; 216 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 217 nce->nce_rcnt = ill->ill_xmit_count; 218 nce->nce_addr = *addr; 219 nce->nce_mask = *mask; 220 nce->nce_extract_mask = *extract_mask; 221 nce->nce_ll_extract_start = hw_extract_start; 222 nce->nce_fp_mp = NULL; 223 nce->nce_res_mp = template; 224 if (state == ND_REACHABLE) 225 nce->nce_last = TICK_TO_MSEC(lbolt64); 226 else 227 nce->nce_last = 0; 228 nce->nce_qd_mp = NULL; 229 nce->nce_mp = mp; 230 if (hw_addr != NULL) 231 nce_set_ll(nce, hw_addr); 232 /* This one is for nce getting created */ 233 nce->nce_refcnt = 1; 234 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 235 if (nce->nce_flags & NCE_F_MAPPING) { 236 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 237 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 238 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 239 ncep = &ipst->ips_ndp6->nce_mask_entries; 240 } else { 241 ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 242 } 243 244 #ifdef NCE_DEBUG 245 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 246 #endif 247 /* 248 * Atomically ensure that the ill is not CONDEMNED, before 249 * adding the NCE. 250 */ 251 mutex_enter(&ill->ill_lock); 252 if (ill->ill_state_flags & ILL_CONDEMNED) { 253 mutex_exit(&ill->ill_lock); 254 freeb(mp); 255 freeb(template); 256 return (EINVAL); 257 } 258 if ((nce->nce_next = *ncep) != NULL) 259 nce->nce_next->nce_ptpn = &nce->nce_next; 260 *ncep = nce; 261 nce->nce_ptpn = ncep; 262 *newnce = nce; 263 /* This one is for nce being used by an active thread */ 264 NCE_REFHOLD(*newnce); 265 266 /* Bump up the number of nce's referencing this ill */ 267 ill->ill_nce_cnt++; 268 mutex_exit(&ill->ill_lock); 269 270 err = 0; 271 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 272 mutex_enter(&nce->nce_lock); 273 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 274 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 275 mutex_exit(&nce->nce_lock); 276 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 277 &ipv6_all_zeros, addr, NDP_PROBE); 278 if (dropped) { 279 mutex_enter(&nce->nce_lock); 280 nce->nce_pcnt++; 281 mutex_exit(&nce->nce_lock); 282 } 283 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 284 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 285 err = EINPROGRESS; 286 } else if (flags & NCE_F_UNSOL_ADV) { 287 /* 288 * We account for the transmit below by assigning one 289 * less than the ndd variable. Subsequent decrements 290 * are done in ndp_timer. 291 */ 292 mutex_enter(&nce->nce_lock); 293 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 294 nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1; 295 mutex_exit(&nce->nce_lock); 296 dropped = nce_xmit(ill, 297 ND_NEIGHBOR_ADVERT, 298 ill, /* ill to be used for extracting ill_nd_lla */ 299 B_TRUE, /* use ill_nd_lla */ 300 addr, /* Source and target of the advertisement pkt */ 301 &ipv6_all_hosts_mcast, /* Destination of the packet */ 302 nce_advert_flags(nce)); 303 mutex_enter(&nce->nce_lock); 304 if (dropped) 305 nce->nce_unsolicit_count++; 306 if (nce->nce_unsolicit_count != 0) { 307 nce->nce_timeout_id = timeout(ndp_timer, nce, 308 MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval)); 309 } 310 mutex_exit(&nce->nce_lock); 311 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 312 } 313 /* 314 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 315 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 316 * We call nce_fastpath from nce_update if the link layer address of 317 * the peer changes from nce_update 318 */ 319 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 320 nce_fastpath(nce); 321 return (err); 322 } 323 324 int 325 ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 326 const in6_addr_t *mask, const in6_addr_t *extract_mask, 327 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 328 nce_t **newnce) 329 { 330 int err = 0; 331 nce_t *nce; 332 ip_stack_t *ipst = ill->ill_ipst; 333 334 ASSERT(ill->ill_isv6); 335 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 336 337 /* Get head of v6 hash table */ 338 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 339 nce = nce_lookup_addr(ill, addr, nce); 340 if (nce == NULL) { 341 err = ndp_add_v6(ill, 342 hw_addr, 343 addr, 344 mask, 345 extract_mask, 346 hw_extract_start, 347 flags, 348 state, 349 newnce); 350 } else { 351 *newnce = nce; 352 err = EEXIST; 353 } 354 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 355 return (err); 356 } 357 358 /* 359 * Remove all the CONDEMNED nces from the appropriate hash table. 360 * We create a private list of NCEs, these may have ires pointing 361 * to them, so the list will be passed through to clean up dependent 362 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 363 */ 364 static void 365 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 366 { 367 nce_t *nce1; 368 nce_t **ptpn; 369 370 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 371 ASSERT(ndp->ndp_g_walker == 0); 372 for (; nce; nce = nce1) { 373 nce1 = nce->nce_next; 374 mutex_enter(&nce->nce_lock); 375 if (nce->nce_flags & NCE_F_CONDEMNED) { 376 ptpn = nce->nce_ptpn; 377 nce1 = nce->nce_next; 378 if (nce1 != NULL) 379 nce1->nce_ptpn = ptpn; 380 *ptpn = nce1; 381 nce->nce_ptpn = NULL; 382 nce->nce_next = NULL; 383 nce->nce_next = *free_nce_list; 384 *free_nce_list = nce; 385 } 386 mutex_exit(&nce->nce_lock); 387 } 388 } 389 390 /* 391 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 392 * will return this NCE. Also no new IREs will be created that 393 * point to this NCE (See ire_add_v6). Also no new timeouts will 394 * be started (See NDP_RESTART_TIMER). 395 * 2. Cancel any currently running timeouts. 396 * 3. If there is an ndp walker, return. The walker will do the cleanup. 397 * This ensures that walkers see a consistent list of NCEs while walking. 398 * 4. Otherwise remove the NCE from the list of NCEs 399 * 5. Delete all IREs pointing to this NCE. 400 */ 401 void 402 ndp_delete(nce_t *nce) 403 { 404 nce_t **ptpn; 405 nce_t *nce1; 406 int ipversion = nce->nce_ipversion; 407 ndp_g_t *ndp; 408 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 409 410 if (ipversion == IPV4_VERSION) 411 ndp = ipst->ips_ndp4; 412 else 413 ndp = ipst->ips_ndp6; 414 415 /* Serialize deletes */ 416 mutex_enter(&nce->nce_lock); 417 if (nce->nce_flags & NCE_F_CONDEMNED) { 418 /* Some other thread is doing the delete */ 419 mutex_exit(&nce->nce_lock); 420 return; 421 } 422 /* 423 * Caller has a refhold. Also 1 ref for being in the list. Thus 424 * refcnt has to be >= 2 425 */ 426 ASSERT(nce->nce_refcnt >= 2); 427 nce->nce_flags |= NCE_F_CONDEMNED; 428 mutex_exit(&nce->nce_lock); 429 430 nce_fastpath_list_delete(nce); 431 432 /* 433 * Cancel any running timer. Timeout can't be restarted 434 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 435 * Passing invalid timeout id is fine. 436 */ 437 if (nce->nce_timeout_id != 0) { 438 (void) untimeout(nce->nce_timeout_id); 439 nce->nce_timeout_id = 0; 440 } 441 442 mutex_enter(&ndp->ndp_g_lock); 443 if (nce->nce_ptpn == NULL) { 444 /* 445 * The last ndp walker has already removed this nce from 446 * the list after we marked the nce CONDEMNED and before 447 * we grabbed the global lock. 448 */ 449 mutex_exit(&ndp->ndp_g_lock); 450 return; 451 } 452 if (ndp->ndp_g_walker > 0) { 453 /* 454 * Can't unlink. The walker will clean up 455 */ 456 ndp->ndp_g_walker_cleanup = B_TRUE; 457 mutex_exit(&ndp->ndp_g_lock); 458 return; 459 } 460 461 /* 462 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 463 * the timer since it is marked CONDEMNED. 464 */ 465 ptpn = nce->nce_ptpn; 466 nce1 = nce->nce_next; 467 if (nce1 != NULL) 468 nce1->nce_ptpn = ptpn; 469 *ptpn = nce1; 470 nce->nce_ptpn = NULL; 471 nce->nce_next = NULL; 472 mutex_exit(&ndp->ndp_g_lock); 473 474 nce_ire_delete(nce); 475 } 476 477 void 478 ndp_inactive(nce_t *nce) 479 { 480 mblk_t **mpp; 481 ill_t *ill; 482 483 ASSERT(nce->nce_refcnt == 0); 484 ASSERT(MUTEX_HELD(&nce->nce_lock)); 485 ASSERT(nce->nce_fastpath == NULL); 486 487 /* Free all nce allocated messages */ 488 mpp = &nce->nce_first_mp_to_free; 489 do { 490 while (*mpp != NULL) { 491 mblk_t *mp; 492 493 mp = *mpp; 494 *mpp = mp->b_next; 495 496 inet_freemsg(mp); 497 } 498 } while (mpp++ != &nce->nce_last_mp_to_free); 499 500 #ifdef NCE_DEBUG 501 nce_trace_inactive(nce); 502 #endif 503 504 ill = nce->nce_ill; 505 mutex_enter(&ill->ill_lock); 506 ill->ill_nce_cnt--; 507 /* 508 * If the number of nce's associated with this ill have dropped 509 * to zero, check whether we need to restart any operation that 510 * is waiting for this to happen. 511 */ 512 if (ill->ill_nce_cnt == 0) { 513 /* ipif_ill_refrele_tail drops the ill_lock */ 514 ipif_ill_refrele_tail(ill); 515 } else { 516 mutex_exit(&ill->ill_lock); 517 } 518 mutex_destroy(&nce->nce_lock); 519 if (nce->nce_mp != NULL) 520 inet_freemsg(nce->nce_mp); 521 } 522 523 /* 524 * ndp_walk routine. Delete the nce if it is associated with the ill 525 * that is going away. Always called as a writer. 526 */ 527 void 528 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 529 { 530 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 531 ndp_delete(nce); 532 } 533 } 534 535 /* 536 * Walk a list of to be inactive NCEs and blow away all the ires. 537 */ 538 static void 539 nce_ire_delete_list(nce_t *nce) 540 { 541 nce_t *nce_next; 542 543 ASSERT(nce != NULL); 544 while (nce != NULL) { 545 nce_next = nce->nce_next; 546 nce->nce_next = NULL; 547 548 /* 549 * It is possible for the last ndp walker (this thread) 550 * to come here after ndp_delete has marked the nce CONDEMNED 551 * and before it has removed the nce from the fastpath list 552 * or called untimeout. So we need to do it here. It is safe 553 * for both ndp_delete and this thread to do it twice or 554 * even simultaneously since each of the threads has a 555 * reference on the nce. 556 */ 557 nce_fastpath_list_delete(nce); 558 /* 559 * Cancel any running timer. Timeout can't be restarted 560 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 561 * Passing invalid timeout id is fine. 562 */ 563 if (nce->nce_timeout_id != 0) { 564 (void) untimeout(nce->nce_timeout_id); 565 nce->nce_timeout_id = 0; 566 } 567 /* 568 * We might hit this func thus in the v4 case: 569 * ipif_down->ipif_ndp_down->ndp_walk 570 */ 571 572 if (nce->nce_ipversion == IPV4_VERSION) { 573 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 574 IRE_CACHE, nce_ire_delete1, 575 (char *)nce, nce->nce_ill); 576 } else { 577 ASSERT(nce->nce_ipversion == IPV6_VERSION); 578 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 579 IRE_CACHE, nce_ire_delete1, 580 (char *)nce, nce->nce_ill); 581 } 582 NCE_REFRELE_NOTR(nce); 583 nce = nce_next; 584 } 585 } 586 587 /* 588 * Delete an ire when the nce goes away. 589 */ 590 /* ARGSUSED */ 591 static void 592 nce_ire_delete(nce_t *nce) 593 { 594 if (nce->nce_ipversion == IPV6_VERSION) { 595 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 596 nce_ire_delete1, (char *)nce, nce->nce_ill); 597 NCE_REFRELE_NOTR(nce); 598 } else { 599 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 600 nce_ire_delete1, (char *)nce, nce->nce_ill); 601 NCE_REFRELE_NOTR(nce); 602 } 603 } 604 605 /* 606 * ire_walk routine used to delete every IRE that shares this nce 607 */ 608 static void 609 nce_ire_delete1(ire_t *ire, char *nce_arg) 610 { 611 nce_t *nce = (nce_t *)nce_arg; 612 613 ASSERT(ire->ire_type == IRE_CACHE); 614 615 if (ire->ire_nce == nce) { 616 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 617 ire_delete(ire); 618 } 619 } 620 621 /* 622 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 623 */ 624 boolean_t 625 ndp_restart_dad(nce_t *nce) 626 { 627 boolean_t started; 628 boolean_t dropped; 629 630 if (nce == NULL) 631 return (B_FALSE); 632 mutex_enter(&nce->nce_lock); 633 if (nce->nce_state == ND_PROBE) { 634 mutex_exit(&nce->nce_lock); 635 started = B_TRUE; 636 } else if (nce->nce_state == ND_REACHABLE) { 637 nce->nce_state = ND_PROBE; 638 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 639 mutex_exit(&nce->nce_lock); 640 dropped = nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, NULL, 641 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, NDP_PROBE); 642 if (dropped) { 643 mutex_enter(&nce->nce_lock); 644 nce->nce_pcnt++; 645 mutex_exit(&nce->nce_lock); 646 } 647 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 648 started = B_TRUE; 649 } else { 650 mutex_exit(&nce->nce_lock); 651 started = B_FALSE; 652 } 653 return (started); 654 } 655 656 /* 657 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 658 * If one is found, the refcnt on the nce will be incremented. 659 */ 660 nce_t * 661 ndp_lookup_v6(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 662 { 663 nce_t *nce; 664 ip_stack_t *ipst; 665 666 ASSERT(ill != NULL); 667 ipst = ill->ill_ipst; 668 669 ASSERT(ill != NULL && ill->ill_isv6); 670 if (!caller_holds_lock) { 671 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 672 } 673 674 /* Get head of v6 hash table */ 675 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 676 nce = nce_lookup_addr(ill, addr, nce); 677 if (nce == NULL) 678 nce = nce_lookup_mapping(ill, addr); 679 if (!caller_holds_lock) 680 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 681 return (nce); 682 } 683 /* 684 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 685 * If one is found, the refcnt on the nce will be incremented. 686 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 687 * so we skip the nce_lookup_mapping call. 688 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 689 */ 690 nce_t * 691 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 692 { 693 nce_t *nce; 694 in6_addr_t addr6; 695 ip_stack_t *ipst = ill->ill_ipst; 696 697 if (!caller_holds_lock) { 698 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 699 } 700 701 /* Get head of v4 hash table */ 702 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 703 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 704 nce = nce_lookup_addr(ill, &addr6, nce); 705 if (!caller_holds_lock) 706 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 707 return (nce); 708 } 709 710 /* 711 * Cache entry lookup. Try to find an nce matching the parameters passed. 712 * Look only for exact entries (no mappings). If an nce is found, increment 713 * the hold count on that nce. The caller passes in the start of the 714 * appropriate hash table, and must be holding the appropriate global 715 * lock (ndp_g_lock). 716 */ 717 static nce_t * 718 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr, nce_t *nce) 719 { 720 ndp_g_t *ndp; 721 ip_stack_t *ipst = ill->ill_ipst; 722 723 if (ill->ill_isv6) 724 ndp = ipst->ips_ndp6; 725 else 726 ndp = ipst->ips_ndp4; 727 728 ASSERT(ill != NULL); 729 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 730 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 731 return (NULL); 732 for (; nce != NULL; nce = nce->nce_next) { 733 if (nce->nce_ill == ill) { 734 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 735 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 736 &ipv6_all_ones)) { 737 mutex_enter(&nce->nce_lock); 738 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 739 NCE_REFHOLD_LOCKED(nce); 740 mutex_exit(&nce->nce_lock); 741 break; 742 } 743 mutex_exit(&nce->nce_lock); 744 } 745 } 746 } 747 return (nce); 748 } 749 750 /* 751 * Cache entry lookup. Try to find an nce matching the parameters passed. 752 * Look only for mappings. 753 */ 754 static nce_t * 755 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 756 { 757 nce_t *nce; 758 ip_stack_t *ipst = ill->ill_ipst; 759 760 ASSERT(ill != NULL && ill->ill_isv6); 761 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 762 if (!IN6_IS_ADDR_MULTICAST(addr)) 763 return (NULL); 764 nce = ipst->ips_ndp6->nce_mask_entries; 765 for (; nce != NULL; nce = nce->nce_next) 766 if (nce->nce_ill == ill && 767 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 768 mutex_enter(&nce->nce_lock); 769 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 770 NCE_REFHOLD_LOCKED(nce); 771 mutex_exit(&nce->nce_lock); 772 break; 773 } 774 mutex_exit(&nce->nce_lock); 775 } 776 return (nce); 777 } 778 779 /* 780 * Process passed in parameters either from an incoming packet or via 781 * user ioctl. 782 */ 783 void 784 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 785 { 786 ill_t *ill = nce->nce_ill; 787 uint32_t hw_addr_len = ill->ill_nd_lla_len; 788 mblk_t *mp; 789 boolean_t ll_updated = B_FALSE; 790 boolean_t ll_changed; 791 ip_stack_t *ipst = ill->ill_ipst; 792 793 ASSERT(nce->nce_ipversion == IPV6_VERSION); 794 /* 795 * No updates of link layer address or the neighbor state is 796 * allowed, when the cache is in NONUD state. This still 797 * allows for responding to reachability solicitation. 798 */ 799 mutex_enter(&nce->nce_lock); 800 if (nce->nce_state == ND_INCOMPLETE) { 801 if (hw_addr == NULL) { 802 mutex_exit(&nce->nce_lock); 803 return; 804 } 805 nce_set_ll(nce, hw_addr); 806 /* 807 * Update nce state and send the queued packets 808 * back to ip this time ire will be added. 809 */ 810 if (flag & ND_NA_FLAG_SOLICITED) { 811 nce_update(nce, ND_REACHABLE, NULL); 812 } else { 813 nce_update(nce, ND_STALE, NULL); 814 } 815 mutex_exit(&nce->nce_lock); 816 nce_fastpath(nce); 817 mutex_enter(&nce->nce_lock); 818 mp = nce->nce_qd_mp; 819 nce->nce_qd_mp = NULL; 820 mutex_exit(&nce->nce_lock); 821 while (mp != NULL) { 822 mblk_t *nxt_mp, *data_mp; 823 824 nxt_mp = mp->b_next; 825 mp->b_next = NULL; 826 827 if (mp->b_datap->db_type == M_CTL) 828 data_mp = mp->b_cont; 829 else 830 data_mp = mp; 831 if (data_mp->b_prev != NULL) { 832 ill_t *inbound_ill; 833 queue_t *fwdq = NULL; 834 uint_t ifindex; 835 836 ifindex = (uint_t)(uintptr_t)data_mp->b_prev; 837 inbound_ill = ill_lookup_on_ifindex(ifindex, 838 B_TRUE, NULL, NULL, NULL, NULL, ipst); 839 if (inbound_ill == NULL) { 840 data_mp->b_prev = NULL; 841 freemsg(mp); 842 return; 843 } else { 844 fwdq = inbound_ill->ill_rq; 845 } 846 data_mp->b_prev = NULL; 847 /* 848 * Send a forwarded packet back into ip_rput_v6 849 * just as in ire_send_v6(). 850 * Extract the queue from b_prev (set in 851 * ip_rput_data_v6). 852 */ 853 if (fwdq != NULL) { 854 /* 855 * Forwarded packets hop count will 856 * get decremented in ip_rput_data_v6 857 */ 858 if (data_mp != mp) 859 freeb(mp); 860 put(fwdq, data_mp); 861 } else { 862 /* 863 * Send locally originated packets back 864 * into * ip_wput_v6. 865 */ 866 put(ill->ill_wq, mp); 867 } 868 ill_refrele(inbound_ill); 869 } else { 870 put(ill->ill_wq, mp); 871 } 872 mp = nxt_mp; 873 } 874 return; 875 } 876 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 877 if (!is_adv) { 878 /* If this is a SOLICITATION request only */ 879 if (ll_changed) 880 nce_update(nce, ND_STALE, hw_addr); 881 mutex_exit(&nce->nce_lock); 882 return; 883 } 884 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 885 /* If in any other state than REACHABLE, ignore */ 886 if (nce->nce_state == ND_REACHABLE) { 887 nce_update(nce, ND_STALE, NULL); 888 } 889 mutex_exit(&nce->nce_lock); 890 return; 891 } else { 892 if (ll_changed) { 893 nce_update(nce, ND_UNCHANGED, hw_addr); 894 ll_updated = B_TRUE; 895 } 896 if (flag & ND_NA_FLAG_SOLICITED) { 897 nce_update(nce, ND_REACHABLE, NULL); 898 } else { 899 if (ll_updated) { 900 nce_update(nce, ND_STALE, NULL); 901 } 902 } 903 mutex_exit(&nce->nce_lock); 904 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 905 NCE_F_ISROUTER)) { 906 ire_t *ire; 907 908 /* 909 * Router turned to host. We need to remove the 910 * entry as well as any default route that may be 911 * using this as a next hop. This is required by 912 * section 7.2.5 of RFC 2461. 913 */ 914 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 915 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 916 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 917 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 918 MATCH_IRE_DEFAULT, ipst); 919 if (ire != NULL) { 920 ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); 921 ire_delete(ire); 922 ire_refrele(ire); 923 } 924 ndp_delete(nce); 925 } 926 } 927 } 928 929 /* 930 * Pass arg1 to the pfi supplied, along with each nce in existence. 931 * ndp_walk() places a REFHOLD on the nce and drops the lock when 932 * walking the hash list. 933 */ 934 void 935 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 936 boolean_t trace) 937 { 938 939 nce_t *nce; 940 nce_t *nce1; 941 nce_t **ncep; 942 nce_t *free_nce_list = NULL; 943 944 mutex_enter(&ndp->ndp_g_lock); 945 /* Prevent ndp_delete from unlink and free of NCE */ 946 ndp->ndp_g_walker++; 947 mutex_exit(&ndp->ndp_g_lock); 948 for (ncep = ndp->nce_hash_tbl; 949 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 950 for (nce = *ncep; nce != NULL; nce = nce1) { 951 nce1 = nce->nce_next; 952 if (ill == NULL || nce->nce_ill == ill) { 953 if (trace) { 954 NCE_REFHOLD(nce); 955 (*pfi)(nce, arg1); 956 NCE_REFRELE(nce); 957 } else { 958 NCE_REFHOLD_NOTR(nce); 959 (*pfi)(nce, arg1); 960 NCE_REFRELE_NOTR(nce); 961 } 962 } 963 } 964 } 965 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 966 nce1 = nce->nce_next; 967 if (ill == NULL || nce->nce_ill == ill) { 968 if (trace) { 969 NCE_REFHOLD(nce); 970 (*pfi)(nce, arg1); 971 NCE_REFRELE(nce); 972 } else { 973 NCE_REFHOLD_NOTR(nce); 974 (*pfi)(nce, arg1); 975 NCE_REFRELE_NOTR(nce); 976 } 977 } 978 } 979 mutex_enter(&ndp->ndp_g_lock); 980 ndp->ndp_g_walker--; 981 /* 982 * While NCE's are removed from global list they are placed 983 * in a private list, to be passed to nce_ire_delete_list(). 984 * The reason is, there may be ires pointing to this nce 985 * which needs to cleaned up. 986 */ 987 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 988 /* Time to delete condemned entries */ 989 for (ncep = ndp->nce_hash_tbl; 990 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 991 nce = *ncep; 992 if (nce != NULL) { 993 nce_remove(ndp, nce, &free_nce_list); 994 } 995 } 996 nce = ndp->nce_mask_entries; 997 if (nce != NULL) { 998 nce_remove(ndp, nce, &free_nce_list); 999 } 1000 ndp->ndp_g_walker_cleanup = B_FALSE; 1001 } 1002 1003 mutex_exit(&ndp->ndp_g_lock); 1004 1005 if (free_nce_list != NULL) { 1006 nce_ire_delete_list(free_nce_list); 1007 } 1008 } 1009 1010 /* 1011 * Walk everything. 1012 * Note that ill can be NULL hence can't derive the ipst from it. 1013 */ 1014 void 1015 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) 1016 { 1017 ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); 1018 ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); 1019 } 1020 1021 /* 1022 * Process resolve requests. Handles both mapped entries 1023 * as well as cases that needs to be send out on the wire. 1024 * Lookup a NCE for a given IRE. Regardless of whether one exists 1025 * or one is created, we defer making ire point to nce until the 1026 * ire is actually added at which point the nce_refcnt on the nce is 1027 * incremented. This is done primarily to have symmetry between ire_add() 1028 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1029 */ 1030 int 1031 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1032 { 1033 nce_t *nce; 1034 int err = 0; 1035 uint32_t ms; 1036 mblk_t *mp_nce = NULL; 1037 ip_stack_t *ipst = ill->ill_ipst; 1038 1039 ASSERT(ill->ill_isv6); 1040 if (IN6_IS_ADDR_MULTICAST(dst)) { 1041 err = nce_set_multicast(ill, dst); 1042 return (err); 1043 } 1044 err = ndp_lookup_then_add_v6(ill, 1045 NULL, /* No hardware address */ 1046 dst, 1047 &ipv6_all_ones, 1048 &ipv6_all_zeros, 1049 0, 1050 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1051 ND_INCOMPLETE, 1052 &nce); 1053 1054 switch (err) { 1055 case 0: 1056 /* 1057 * New cache entry was created. Make sure that the state 1058 * is not ND_INCOMPLETE. It can be in some other state 1059 * even before we send out the solicitation as we could 1060 * get un-solicited advertisements. 1061 * 1062 * If this is an XRESOLV interface, simply return 0, 1063 * since we don't want to solicit just yet. 1064 */ 1065 if (ill->ill_flags & ILLF_XRESOLV) { 1066 NCE_REFRELE(nce); 1067 return (0); 1068 } 1069 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1070 mutex_enter(&nce->nce_lock); 1071 if (nce->nce_state != ND_INCOMPLETE) { 1072 mutex_exit(&nce->nce_lock); 1073 rw_exit(&ipst->ips_ill_g_lock); 1074 NCE_REFRELE(nce); 1075 return (0); 1076 } 1077 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1078 if (mp_nce == NULL) { 1079 /* The caller will free mp */ 1080 mutex_exit(&nce->nce_lock); 1081 rw_exit(&ipst->ips_ill_g_lock); 1082 ndp_delete(nce); 1083 NCE_REFRELE(nce); 1084 return (ENOMEM); 1085 } 1086 ms = nce_solicit(nce, mp_nce); 1087 rw_exit(&ipst->ips_ill_g_lock); 1088 if (ms == 0) { 1089 /* The caller will free mp */ 1090 if (mp_nce != mp) 1091 freeb(mp_nce); 1092 mutex_exit(&nce->nce_lock); 1093 ndp_delete(nce); 1094 NCE_REFRELE(nce); 1095 return (EBUSY); 1096 } 1097 mutex_exit(&nce->nce_lock); 1098 NDP_RESTART_TIMER(nce, (clock_t)ms); 1099 NCE_REFRELE(nce); 1100 return (EINPROGRESS); 1101 case EEXIST: 1102 /* Resolution in progress just queue the packet */ 1103 mutex_enter(&nce->nce_lock); 1104 if (nce->nce_state == ND_INCOMPLETE) { 1105 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1106 if (mp_nce == NULL) { 1107 err = ENOMEM; 1108 } else { 1109 nce_queue_mp(nce, mp_nce); 1110 err = EINPROGRESS; 1111 } 1112 } else { 1113 /* 1114 * Any other state implies we have 1115 * a nce but IRE needs to be added ... 1116 * ire_add_v6() will take care of the 1117 * the case when the nce becomes CONDEMNED 1118 * before the ire is added to the table. 1119 */ 1120 err = 0; 1121 } 1122 mutex_exit(&nce->nce_lock); 1123 NCE_REFRELE(nce); 1124 break; 1125 default: 1126 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1127 break; 1128 } 1129 return (err); 1130 } 1131 1132 /* 1133 * When there is no resolver, the link layer template is passed in 1134 * the IRE. 1135 * Lookup a NCE for a given IRE. Regardless of whether one exists 1136 * or one is created, we defer making ire point to nce until the 1137 * ire is actually added at which point the nce_refcnt on the nce is 1138 * incremented. This is done primarily to have symmetry between ire_add() 1139 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1140 */ 1141 int 1142 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1143 { 1144 nce_t *nce; 1145 int err = 0; 1146 1147 ASSERT(ill != NULL); 1148 ASSERT(ill->ill_isv6); 1149 if (IN6_IS_ADDR_MULTICAST(dst)) { 1150 err = nce_set_multicast(ill, dst); 1151 return (err); 1152 } 1153 1154 err = ndp_lookup_then_add_v6(ill, 1155 NULL, /* hardware address */ 1156 dst, 1157 &ipv6_all_ones, 1158 &ipv6_all_zeros, 1159 0, 1160 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1161 ND_REACHABLE, 1162 &nce); 1163 1164 switch (err) { 1165 case 0: 1166 /* 1167 * Cache entry with a proper resolver cookie was 1168 * created. 1169 */ 1170 NCE_REFRELE(nce); 1171 break; 1172 case EEXIST: 1173 err = 0; 1174 NCE_REFRELE(nce); 1175 break; 1176 default: 1177 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1178 break; 1179 } 1180 return (err); 1181 } 1182 1183 /* 1184 * For each interface an entry is added for the unspecified multicast group. 1185 * Here that mapping is used to form the multicast cache entry for a particular 1186 * multicast destination. 1187 */ 1188 static int 1189 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1190 { 1191 nce_t *mnce; /* Multicast mapping entry */ 1192 nce_t *nce; 1193 uchar_t *hw_addr = NULL; 1194 int err = 0; 1195 ip_stack_t *ipst = ill->ill_ipst; 1196 1197 ASSERT(ill != NULL); 1198 ASSERT(ill->ill_isv6); 1199 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1200 1201 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1202 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst)); 1203 nce = nce_lookup_addr(ill, dst, nce); 1204 if (nce != NULL) { 1205 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1206 NCE_REFRELE(nce); 1207 return (0); 1208 } 1209 /* No entry, now lookup for a mapping this should never fail */ 1210 mnce = nce_lookup_mapping(ill, dst); 1211 if (mnce == NULL) { 1212 /* Something broken for the interface. */ 1213 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1214 return (ESRCH); 1215 } 1216 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1217 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1218 /* 1219 * For IRE_IF_RESOLVER a hardware mapping can be 1220 * generated, for IRE_IF_NORESOLVER, resolution cookie 1221 * in the ill is copied in ndp_add_v6(). 1222 */ 1223 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1224 if (hw_addr == NULL) { 1225 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1226 NCE_REFRELE(mnce); 1227 return (ENOMEM); 1228 } 1229 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1230 } 1231 NCE_REFRELE(mnce); 1232 /* 1233 * IRE_IF_NORESOLVER type simply copies the resolution 1234 * cookie passed in. So no hw_addr is needed. 1235 */ 1236 err = ndp_add_v6(ill, 1237 hw_addr, 1238 dst, 1239 &ipv6_all_ones, 1240 &ipv6_all_zeros, 1241 0, 1242 NCE_F_NONUD, 1243 ND_REACHABLE, 1244 &nce); 1245 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1246 if (hw_addr != NULL) 1247 kmem_free(hw_addr, ill->ill_nd_lla_len); 1248 if (err != 0) { 1249 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1250 return (err); 1251 } 1252 NCE_REFRELE(nce); 1253 return (0); 1254 } 1255 1256 /* 1257 * Return the link layer address, and any flags of a nce. 1258 */ 1259 int 1260 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1261 { 1262 nce_t *nce; 1263 in6_addr_t *addr; 1264 sin6_t *sin6; 1265 dl_unitdata_req_t *dl; 1266 1267 ASSERT(ill != NULL && ill->ill_isv6); 1268 sin6 = (sin6_t *)&lnr->lnr_addr; 1269 addr = &sin6->sin6_addr; 1270 1271 nce = ndp_lookup_v6(ill, addr, B_FALSE); 1272 if (nce == NULL) 1273 return (ESRCH); 1274 /* If in INCOMPLETE state, no link layer address is available yet */ 1275 if (nce->nce_state == ND_INCOMPLETE) 1276 goto done; 1277 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1278 if (ill->ill_flags & ILLF_XRESOLV) 1279 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1280 else 1281 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1282 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1283 sizeof (lnr->lnr_hdw_addr)); 1284 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1285 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1286 if (nce->nce_flags & NCE_F_ISROUTER) 1287 lnr->lnr_flags = NDF_ISROUTER_ON; 1288 if (nce->nce_flags & NCE_F_ANYCAST) 1289 lnr->lnr_flags |= NDF_ANYCAST_ON; 1290 done: 1291 NCE_REFRELE(nce); 1292 return (0); 1293 } 1294 1295 /* 1296 * Send Enable/Disable multicast reqs to driver. 1297 */ 1298 int 1299 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1300 uint32_t hw_addr_offset, mblk_t *mp) 1301 { 1302 nce_t *nce; 1303 uchar_t *hw_addr; 1304 ip_stack_t *ipst = ill->ill_ipst; 1305 1306 ASSERT(ill != NULL && ill->ill_isv6); 1307 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1308 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1309 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1310 freemsg(mp); 1311 return (EINVAL); 1312 } 1313 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1314 nce = nce_lookup_mapping(ill, addr); 1315 if (nce == NULL) { 1316 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1317 freemsg(mp); 1318 return (ESRCH); 1319 } 1320 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1321 /* 1322 * Update dl_addr_length and dl_addr_offset for primitives that 1323 * have physical addresses as opposed to full saps 1324 */ 1325 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1326 case DL_ENABMULTI_REQ: 1327 /* Track the state if this is the first enabmulti */ 1328 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1329 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1330 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1331 break; 1332 case DL_DISABMULTI_REQ: 1333 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1334 break; 1335 default: 1336 NCE_REFRELE(nce); 1337 ip1dbg(("ndp_mcastreq: default\n")); 1338 return (EINVAL); 1339 } 1340 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1341 NCE_REFRELE(nce); 1342 ill_dlpi_send(ill, mp); 1343 return (0); 1344 } 1345 1346 /* 1347 * Send a neighbor solicitation. 1348 * Returns number of milliseconds after which we should either rexmit or abort. 1349 * Return of zero means we should abort. 1350 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1351 * 1352 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1353 * the packet. 1354 * NOTE: This routine does not consume mp. 1355 */ 1356 uint32_t 1357 nce_solicit(nce_t *nce, mblk_t *mp) 1358 { 1359 ill_t *ill; 1360 ill_t *src_ill; 1361 ip6_t *ip6h; 1362 in6_addr_t src; 1363 in6_addr_t dst; 1364 ipif_t *ipif; 1365 ip6i_t *ip6i; 1366 boolean_t dropped = B_FALSE; 1367 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 1368 1369 ASSERT(RW_READ_HELD(&ipst->ips_ill_g_lock)); 1370 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1371 ill = nce->nce_ill; 1372 ASSERT(ill != NULL); 1373 1374 if (nce->nce_rcnt == 0) { 1375 return (0); 1376 } 1377 1378 if (mp == NULL) { 1379 ASSERT(nce->nce_qd_mp != NULL); 1380 mp = nce->nce_qd_mp; 1381 } else { 1382 nce_queue_mp(nce, mp); 1383 } 1384 1385 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1386 if (mp->b_datap->db_type == M_CTL) 1387 mp = mp->b_cont; 1388 1389 ip6h = (ip6_t *)mp->b_rptr; 1390 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1391 /* 1392 * This message should have been pulled up already in 1393 * ip_wput_v6. We can't do pullups here because the message 1394 * could be from the nce_qd_mp which could have b_next/b_prev 1395 * non-NULL. 1396 */ 1397 ip6i = (ip6i_t *)ip6h; 1398 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1399 sizeof (ip6i_t) + IPV6_HDR_LEN); 1400 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1401 } 1402 src = ip6h->ip6_src; 1403 /* 1404 * If the src of outgoing packet is one of the assigned interface 1405 * addresses use it, otherwise we will pick the source address below. 1406 */ 1407 src_ill = ill; 1408 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1409 if (ill->ill_group != NULL) 1410 src_ill = ill->ill_group->illgrp_ill; 1411 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1412 for (ipif = src_ill->ill_ipif; ipif != NULL; 1413 ipif = ipif->ipif_next) { 1414 if (IN6_ARE_ADDR_EQUAL(&src, 1415 &ipif->ipif_v6lcl_addr)) { 1416 break; 1417 } 1418 } 1419 if (ipif != NULL) 1420 break; 1421 } 1422 /* 1423 * If no relevant ipif can be found, then it's not one of our 1424 * addresses. Reset to :: and let nce_xmit. If an ipif can be 1425 * found, but it's not yet done with DAD verification, then 1426 * just postpone this transmission until later. 1427 */ 1428 if (src_ill == NULL) 1429 src = ipv6_all_zeros; 1430 else if (!ipif->ipif_addr_ready) 1431 return (ill->ill_reachable_retrans_time); 1432 } 1433 dst = nce->nce_addr; 1434 /* 1435 * If source address is unspecified, nce_xmit will choose 1436 * one for us and initialize the hardware address also 1437 * appropriately. 1438 */ 1439 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1440 src_ill = NULL; 1441 nce->nce_rcnt--; 1442 mutex_exit(&nce->nce_lock); 1443 rw_exit(&ipst->ips_ill_g_lock); 1444 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1445 &dst, 0); 1446 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1447 mutex_enter(&nce->nce_lock); 1448 if (dropped) 1449 nce->nce_rcnt++; 1450 return (ill->ill_reachable_retrans_time); 1451 } 1452 1453 /* 1454 * Attempt to recover an address on an interface that's been marked as a 1455 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1456 * no easy way to just probe the address and have the right thing happen if 1457 * it's no longer in use. Instead, we just bring it up normally and allow the 1458 * regular interface start-up logic to probe for a remaining duplicate and take 1459 * us back down if necessary. 1460 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1461 * ip_ndp_excl. 1462 */ 1463 /* ARGSUSED */ 1464 static void 1465 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1466 { 1467 ill_t *ill = rq->q_ptr; 1468 ipif_t *ipif; 1469 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1470 1471 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1472 /* 1473 * We do not support recovery of proxy ARP'd interfaces, 1474 * because the system lacks a complete proxy ARP mechanism. 1475 */ 1476 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1477 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1478 continue; 1479 } 1480 1481 /* 1482 * If we have already recovered or if the interface is going 1483 * away, then ignore. 1484 */ 1485 mutex_enter(&ill->ill_lock); 1486 if (!(ipif->ipif_flags & IPIF_DUPLICATE) || 1487 (ipif->ipif_flags & (IPIF_MOVING | IPIF_CONDEMNED))) { 1488 mutex_exit(&ill->ill_lock); 1489 continue; 1490 } 1491 1492 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1493 ill->ill_ipif_dup_count--; 1494 mutex_exit(&ill->ill_lock); 1495 ipif->ipif_was_dup = B_TRUE; 1496 1497 if (ipif_ndp_up(ipif) != EINPROGRESS) 1498 (void) ipif_up_done_v6(ipif); 1499 } 1500 freeb(mp); 1501 } 1502 1503 /* 1504 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1505 * As long as someone else holds the address, the interface will stay down. 1506 * When that conflict goes away, the interface is brought back up. This is 1507 * done so that accidental shutdowns of addresses aren't made permanent. Your 1508 * server will recover from a failure. 1509 * 1510 * For DHCP and temporary addresses, recovery is not done in the kernel. 1511 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1512 * 1513 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1514 */ 1515 static void 1516 ipif6_dup_recovery(void *arg) 1517 { 1518 ipif_t *ipif = arg; 1519 1520 ipif->ipif_recovery_id = 0; 1521 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1522 return; 1523 1524 /* 1525 * No lock, because this is just an optimization. 1526 */ 1527 if (ipif->ipif_state_flags & (IPIF_MOVING | IPIF_CONDEMNED)) 1528 return; 1529 1530 /* If the link is down, we'll retry this later */ 1531 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1532 return; 1533 1534 ndp_do_recovery(ipif); 1535 } 1536 1537 /* 1538 * Perform interface recovery by forcing the duplicate interfaces up and 1539 * allowing the system to determine which ones should stay up. 1540 * 1541 * Called both by recovery timer expiry and link-up notification. 1542 */ 1543 void 1544 ndp_do_recovery(ipif_t *ipif) 1545 { 1546 ill_t *ill = ipif->ipif_ill; 1547 mblk_t *mp; 1548 ip_stack_t *ipst = ill->ill_ipst; 1549 1550 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1551 if (mp == NULL) { 1552 mutex_enter(&ill->ill_lock); 1553 if (ipif->ipif_recovery_id == 0 && 1554 !(ipif->ipif_state_flags & (IPIF_MOVING | 1555 IPIF_CONDEMNED))) { 1556 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1557 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1558 } 1559 mutex_exit(&ill->ill_lock); 1560 } else { 1561 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1562 sizeof (ipif->ipif_v6lcl_addr)); 1563 ill_refhold(ill); 1564 qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP, 1565 B_FALSE); 1566 } 1567 } 1568 1569 /* 1570 * Find the solicitation in the given message, and extract printable details 1571 * (MAC and IP addresses) from it. 1572 */ 1573 static nd_neighbor_solicit_t * 1574 ip_ndp_find_solicitation(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, char *hbuf, 1575 size_t hlen, char *sbuf, size_t slen, uchar_t **haddr) 1576 { 1577 nd_neighbor_solicit_t *ns; 1578 ip6_t *ip6h; 1579 uchar_t *addr; 1580 int alen; 1581 1582 alen = 0; 1583 ip6h = (ip6_t *)mp->b_rptr; 1584 if (dl_mp == NULL) { 1585 nd_opt_hdr_t *opt; 1586 int nslen; 1587 1588 /* 1589 * If it's from the fast-path, then it can't be a probe 1590 * message, and thus must include the source linkaddr option. 1591 * Extract that here. 1592 */ 1593 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1594 nslen = mp->b_wptr - (uchar_t *)ns; 1595 if ((nslen -= sizeof (*ns)) > 0) { 1596 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), nslen, 1597 ND_OPT_SOURCE_LINKADDR); 1598 if (opt != NULL && 1599 opt->nd_opt_len * 8 - sizeof (*opt) >= 1600 ill->ill_nd_lla_len) { 1601 addr = (uchar_t *)(opt + 1); 1602 alen = ill->ill_nd_lla_len; 1603 } 1604 } 1605 /* 1606 * We cheat a bit here for the sake of printing usable log 1607 * messages in the rare case where the reply we got was unicast 1608 * without a source linkaddr option, and the interface is in 1609 * fastpath mode. (Sigh.) 1610 */ 1611 if (alen == 0 && ill->ill_type == IFT_ETHER && 1612 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1613 struct ether_header *pether; 1614 1615 pether = (struct ether_header *)((char *)ip6h - 1616 sizeof (*pether)); 1617 addr = pether->ether_shost.ether_addr_octet; 1618 alen = ETHERADDRL; 1619 } 1620 } else { 1621 dl_unitdata_ind_t *dlu; 1622 1623 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1624 alen = dlu->dl_src_addr_length; 1625 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1626 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1627 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1628 if (ill->ill_sap_length < 0) { 1629 alen += ill->ill_sap_length; 1630 } else { 1631 addr += ill->ill_sap_length; 1632 alen -= ill->ill_sap_length; 1633 } 1634 } 1635 } 1636 if (alen > 0) { 1637 *haddr = addr; 1638 (void) mac_colon_addr(addr, alen, hbuf, hlen); 1639 } else { 1640 *haddr = NULL; 1641 (void) strcpy(hbuf, "?"); 1642 } 1643 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1644 (void) inet_ntop(AF_INET6, &ns->nd_ns_target, sbuf, slen); 1645 return (ns); 1646 } 1647 1648 /* 1649 * This is for exclusive changes due to NDP duplicate address detection 1650 * failure. 1651 */ 1652 /* ARGSUSED */ 1653 static void 1654 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1655 { 1656 ill_t *ill = rq->q_ptr; 1657 ipif_t *ipif; 1658 char ibuf[LIFNAMSIZ + 10]; /* 10 digits for logical i/f number */ 1659 char hbuf[MAC_STR_LEN]; 1660 char sbuf[INET6_ADDRSTRLEN]; 1661 nd_neighbor_solicit_t *ns; 1662 mblk_t *dl_mp = NULL; 1663 uchar_t *haddr; 1664 ip_stack_t *ipst = ill->ill_ipst; 1665 1666 if (DB_TYPE(mp) != M_DATA) { 1667 dl_mp = mp; 1668 mp = mp->b_cont; 1669 } 1670 ns = ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, sizeof (hbuf), sbuf, 1671 sizeof (sbuf), &haddr); 1672 if (haddr != NULL && 1673 bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) { 1674 /* 1675 * Ignore conflicts generated by misbehaving switches that just 1676 * reflect our own messages back to us. 1677 */ 1678 goto ignore_conflict; 1679 } 1680 1681 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1682 1683 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1684 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 1685 &ns->nd_ns_target)) { 1686 continue; 1687 } 1688 1689 /* If it's already marked, then don't do anything. */ 1690 if (ipif->ipif_flags & IPIF_DUPLICATE) 1691 continue; 1692 1693 /* 1694 * If this is a failure during duplicate recovery, then don't 1695 * complain. It may take a long time to recover. 1696 */ 1697 if (!ipif->ipif_was_dup) { 1698 ipif_get_name(ipif, ibuf, sizeof (ibuf)); 1699 cmn_err(CE_WARN, "%s has duplicate address %s (in " 1700 "use by %s); disabled", ibuf, sbuf, hbuf); 1701 } 1702 mutex_enter(&ill->ill_lock); 1703 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1704 ipif->ipif_flags |= IPIF_DUPLICATE; 1705 ill->ill_ipif_dup_count++; 1706 mutex_exit(&ill->ill_lock); 1707 (void) ipif_down(ipif, NULL, NULL); 1708 ipif_down_tail(ipif); 1709 mutex_enter(&ill->ill_lock); 1710 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1711 ill->ill_net_type == IRE_IF_RESOLVER && 1712 !(ipif->ipif_state_flags & (IPIF_MOVING | 1713 IPIF_CONDEMNED)) && 1714 ipst->ips_ip_dup_recovery > 0) { 1715 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1716 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1717 } 1718 mutex_exit(&ill->ill_lock); 1719 } 1720 ignore_conflict: 1721 if (dl_mp != NULL) 1722 freeb(dl_mp); 1723 freemsg(mp); 1724 } 1725 1726 /* 1727 * Handle failure by tearing down the ipifs with the specified address. Note 1728 * that tearing down the ipif also means deleting the nce through ipif_down, so 1729 * it's not possible to do recovery by just restarting the nce timer. Instead, 1730 * we start a timer on the ipif. 1731 */ 1732 static void 1733 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1734 { 1735 if ((mp = copymsg(mp)) != NULL) { 1736 if (dl_mp == NULL) 1737 dl_mp = mp; 1738 else if ((dl_mp = copyb(dl_mp)) != NULL) 1739 dl_mp->b_cont = mp; 1740 if (dl_mp == NULL) { 1741 freemsg(mp); 1742 } else { 1743 ill_refhold(ill); 1744 qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP, 1745 B_FALSE); 1746 } 1747 } 1748 ndp_delete(nce); 1749 } 1750 1751 /* 1752 * Handle a discovered conflict: some other system is advertising that it owns 1753 * one of our IP addresses. We need to defend ourselves, or just shut down the 1754 * interface. 1755 */ 1756 static void 1757 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1758 { 1759 ipif_t *ipif; 1760 uint32_t now; 1761 uint_t maxdefense; 1762 uint_t defs; 1763 ip_stack_t *ipst = ill->ill_ipst; 1764 1765 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1766 NULL, NULL, ipst); 1767 if (ipif == NULL) 1768 return; 1769 /* 1770 * First, figure out if this address is disposable. 1771 */ 1772 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1773 maxdefense = ipst->ips_ip_max_temp_defend; 1774 else 1775 maxdefense = ipst->ips_ip_max_defend; 1776 1777 /* 1778 * Now figure out how many times we've defended ourselves. Ignore 1779 * defenses that happened long in the past. 1780 */ 1781 now = gethrestime_sec(); 1782 mutex_enter(&nce->nce_lock); 1783 if ((defs = nce->nce_defense_count) > 0 && 1784 now - nce->nce_defense_time > ipst->ips_ip_defend_interval) { 1785 nce->nce_defense_count = defs = 0; 1786 } 1787 nce->nce_defense_count++; 1788 nce->nce_defense_time = now; 1789 mutex_exit(&nce->nce_lock); 1790 ipif_refrele(ipif); 1791 1792 /* 1793 * If we've defended ourselves too many times already, then give up and 1794 * tear down the interface(s) using this address. Otherwise, defend by 1795 * sending out an unsolicited Neighbor Advertisement. 1796 */ 1797 if (defs >= maxdefense) { 1798 ip_ndp_failure(ill, mp, dl_mp, nce); 1799 } else { 1800 char hbuf[MAC_STR_LEN]; 1801 char sbuf[INET6_ADDRSTRLEN]; 1802 uchar_t *haddr; 1803 1804 (void) ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, 1805 sizeof (hbuf), sbuf, sizeof (sbuf), &haddr); 1806 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1807 hbuf, sbuf, ill->ill_name); 1808 (void) nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, B_FALSE, 1809 &nce->nce_addr, &ipv6_all_hosts_mcast, 1810 nce_advert_flags(nce)); 1811 } 1812 } 1813 1814 static void 1815 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1816 { 1817 nd_neighbor_solicit_t *ns; 1818 uint32_t hlen = ill->ill_nd_lla_len; 1819 uchar_t *haddr = NULL; 1820 icmp6_t *icmp_nd; 1821 ip6_t *ip6h; 1822 nce_t *our_nce = NULL; 1823 in6_addr_t target; 1824 in6_addr_t src; 1825 int len; 1826 int flag = 0; 1827 nd_opt_hdr_t *opt = NULL; 1828 boolean_t bad_solicit = B_FALSE; 1829 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1830 1831 ip6h = (ip6_t *)mp->b_rptr; 1832 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1833 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1834 src = ip6h->ip6_src; 1835 ns = (nd_neighbor_solicit_t *)icmp_nd; 1836 target = ns->nd_ns_target; 1837 if (IN6_IS_ADDR_MULTICAST(&target)) { 1838 if (ip_debug > 2) { 1839 /* ip1dbg */ 1840 pr_addr_dbg("ndp_input_solicit: Target is" 1841 " multicast! %s\n", AF_INET6, &target); 1842 } 1843 bad_solicit = B_TRUE; 1844 goto done; 1845 } 1846 if (len > sizeof (nd_neighbor_solicit_t)) { 1847 /* Options present */ 1848 opt = (nd_opt_hdr_t *)&ns[1]; 1849 len -= sizeof (nd_neighbor_solicit_t); 1850 if (!ndp_verify_optlen(opt, len)) { 1851 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1852 bad_solicit = B_TRUE; 1853 goto done; 1854 } 1855 } 1856 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1857 /* Check to see if this is a valid DAD solicitation */ 1858 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1859 if (ip_debug > 2) { 1860 /* ip1dbg */ 1861 pr_addr_dbg("ndp_input_solicit: IPv6 " 1862 "Destination is not solicited node " 1863 "multicast %s\n", AF_INET6, 1864 &ip6h->ip6_dst); 1865 } 1866 bad_solicit = B_TRUE; 1867 goto done; 1868 } 1869 } 1870 1871 our_nce = ndp_lookup_v6(ill, &target, B_FALSE); 1872 /* 1873 * If this is a valid Solicitation, a permanent 1874 * entry should exist in the cache 1875 */ 1876 if (our_nce == NULL || 1877 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1878 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1879 "ifname=%s ", ill->ill_name)); 1880 if (ip_debug > 2) { 1881 /* ip1dbg */ 1882 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1883 } 1884 bad_solicit = B_TRUE; 1885 goto done; 1886 } 1887 1888 /* At this point we should have a verified NS per spec */ 1889 if (opt != NULL) { 1890 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1891 if (opt != NULL) { 1892 haddr = (uchar_t *)&opt[1]; 1893 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1894 hlen == 0) { 1895 ip1dbg(("ndp_input_advert: bad SLLA\n")); 1896 bad_solicit = B_TRUE; 1897 goto done; 1898 } 1899 } 1900 } 1901 1902 /* If sending directly to peer, set the unicast flag */ 1903 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 1904 flag |= NDP_UNICAST; 1905 1906 /* 1907 * Create/update the entry for the soliciting node. 1908 * or respond to outstanding queries, don't if 1909 * the source is unspecified address. 1910 */ 1911 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1912 int err; 1913 nce_t *nnce; 1914 1915 ASSERT(ill->ill_isv6); 1916 /* 1917 * Regular solicitations *must* include the Source Link-Layer 1918 * Address option. Ignore messages that do not. 1919 */ 1920 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1921 ip1dbg(("ndp_input_solicit: source link-layer address " 1922 "option missing with a specified source.\n")); 1923 bad_solicit = B_TRUE; 1924 goto done; 1925 } 1926 1927 /* 1928 * This is a regular solicitation. If we're still in the 1929 * process of verifying the address, then don't respond at all 1930 * and don't keep track of the sender. 1931 */ 1932 if (our_nce->nce_state == ND_PROBE) 1933 goto done; 1934 1935 /* 1936 * If the solicitation doesn't have sender hardware address 1937 * (legal for unicast solicitation), then process without 1938 * installing the return NCE. Either we already know it, or 1939 * we'll be forced to look it up when (and if) we reply to the 1940 * packet. 1941 */ 1942 if (haddr == NULL) 1943 goto no_source; 1944 1945 err = ndp_lookup_then_add_v6(ill, 1946 haddr, 1947 &src, /* Soliciting nodes address */ 1948 &ipv6_all_ones, 1949 &ipv6_all_zeros, 1950 0, 1951 0, 1952 ND_STALE, 1953 &nnce); 1954 switch (err) { 1955 case 0: 1956 /* done with this entry */ 1957 NCE_REFRELE(nnce); 1958 break; 1959 case EEXIST: 1960 /* 1961 * B_FALSE indicates this is not an 1962 * an advertisement. 1963 */ 1964 ndp_process(nnce, haddr, 0, B_FALSE); 1965 NCE_REFRELE(nnce); 1966 break; 1967 default: 1968 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1969 err)); 1970 goto done; 1971 } 1972 no_source: 1973 flag |= NDP_SOLICITED; 1974 } else { 1975 /* 1976 * No source link layer address option should be present in a 1977 * valid DAD request. 1978 */ 1979 if (haddr != NULL) { 1980 ip1dbg(("ndp_input_solicit: source link-layer address " 1981 "option present with an unspecified source.\n")); 1982 bad_solicit = B_TRUE; 1983 goto done; 1984 } 1985 if (our_nce->nce_state == ND_PROBE) { 1986 /* 1987 * Internally looped-back probes won't have DLPI 1988 * attached to them. External ones (which are sent by 1989 * multicast) always will. Just ignore our own 1990 * transmissions. 1991 */ 1992 if (dl_mp != NULL) { 1993 /* 1994 * If someone else is probing our address, then 1995 * we've crossed wires. Declare failure. 1996 */ 1997 ip_ndp_failure(ill, mp, dl_mp, our_nce); 1998 } 1999 goto done; 2000 } 2001 /* 2002 * This is a DAD probe. Multicast the advertisement to the 2003 * all-nodes address. 2004 */ 2005 src = ipv6_all_hosts_mcast; 2006 } 2007 flag |= nce_advert_flags(our_nce); 2008 /* Response to a solicitation */ 2009 (void) nce_xmit(ill, 2010 ND_NEIGHBOR_ADVERT, 2011 ill, /* ill to be used for extracting ill_nd_lla */ 2012 B_TRUE, /* use ill_nd_lla */ 2013 &target, /* Source and target of the advertisement pkt */ 2014 &src, /* IP Destination (source of original pkt) */ 2015 flag); 2016 done: 2017 if (bad_solicit) 2018 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2019 if (our_nce != NULL) 2020 NCE_REFRELE(our_nce); 2021 } 2022 2023 void 2024 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2025 { 2026 nd_neighbor_advert_t *na; 2027 uint32_t hlen = ill->ill_nd_lla_len; 2028 uchar_t *haddr = NULL; 2029 icmp6_t *icmp_nd; 2030 ip6_t *ip6h; 2031 nce_t *dst_nce = NULL; 2032 in6_addr_t target; 2033 nd_opt_hdr_t *opt = NULL; 2034 int len; 2035 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2036 ip_stack_t *ipst = ill->ill_ipst; 2037 2038 ip6h = (ip6_t *)mp->b_rptr; 2039 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2040 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2041 na = (nd_neighbor_advert_t *)icmp_nd; 2042 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2043 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2044 ip1dbg(("ndp_input_advert: Target is multicast but the " 2045 "solicited flag is not zero\n")); 2046 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2047 return; 2048 } 2049 target = na->nd_na_target; 2050 if (IN6_IS_ADDR_MULTICAST(&target)) { 2051 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2052 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2053 return; 2054 } 2055 if (len > sizeof (nd_neighbor_advert_t)) { 2056 opt = (nd_opt_hdr_t *)&na[1]; 2057 if (!ndp_verify_optlen(opt, 2058 len - sizeof (nd_neighbor_advert_t))) { 2059 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2060 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2061 return; 2062 } 2063 /* At this point we have a verified NA per spec */ 2064 len -= sizeof (nd_neighbor_advert_t); 2065 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2066 if (opt != NULL) { 2067 haddr = (uchar_t *)&opt[1]; 2068 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2069 hlen == 0) { 2070 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2071 BUMP_MIB(mib, 2072 ipv6IfIcmpInBadNeighborAdvertisements); 2073 return; 2074 } 2075 } 2076 } 2077 2078 /* 2079 * If this interface is part of the group look at all the 2080 * ills in the group. 2081 */ 2082 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2083 if (ill->ill_group != NULL) 2084 ill = ill->ill_group->illgrp_ill; 2085 2086 for (; ill != NULL; ill = ill->ill_group_next) { 2087 mutex_enter(&ill->ill_lock); 2088 if (!ILL_CAN_LOOKUP(ill)) { 2089 mutex_exit(&ill->ill_lock); 2090 continue; 2091 } 2092 ill_refhold_locked(ill); 2093 mutex_exit(&ill->ill_lock); 2094 dst_nce = ndp_lookup_v6(ill, &target, B_FALSE); 2095 /* We have to drop the lock since ndp_process calls put* */ 2096 rw_exit(&ipst->ips_ill_g_lock); 2097 if (dst_nce != NULL) { 2098 if ((dst_nce->nce_flags & NCE_F_PERMANENT) && 2099 dst_nce->nce_state == ND_PROBE) { 2100 /* 2101 * Someone else sent an advertisement for an 2102 * address that we're trying to configure. 2103 * Tear it down. Note that dl_mp might be NULL 2104 * if we're getting a unicast reply. This 2105 * isn't typically done (multicast is the norm 2106 * in response to a probe), but ip_ndp_failure 2107 * will handle the dl_mp == NULL case as well. 2108 */ 2109 ip_ndp_failure(ill, mp, dl_mp, dst_nce); 2110 } else if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2111 /* 2112 * Someone just announced one of our local 2113 * addresses. If it wasn't us, then this is a 2114 * conflict. Defend the address or shut it 2115 * down. 2116 */ 2117 if (dl_mp != NULL && 2118 (haddr == NULL || 2119 nce_cmp_ll_addr(dst_nce, haddr, 2120 ill->ill_nd_lla_len))) { 2121 ip_ndp_conflict(ill, mp, dl_mp, 2122 dst_nce); 2123 } 2124 } else { 2125 if (na->nd_na_flags_reserved & 2126 ND_NA_FLAG_ROUTER) { 2127 dst_nce->nce_flags |= NCE_F_ISROUTER; 2128 } 2129 /* B_TRUE indicates this an advertisement */ 2130 ndp_process(dst_nce, haddr, 2131 na->nd_na_flags_reserved, B_TRUE); 2132 } 2133 NCE_REFRELE(dst_nce); 2134 } 2135 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2136 ill_refrele(ill); 2137 } 2138 rw_exit(&ipst->ips_ill_g_lock); 2139 } 2140 2141 /* 2142 * Process NDP neighbor solicitation/advertisement messages. 2143 * The checksum has already checked o.k before reaching here. 2144 */ 2145 void 2146 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2147 { 2148 icmp6_t *icmp_nd; 2149 ip6_t *ip6h; 2150 int len; 2151 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2152 2153 2154 if (!pullupmsg(mp, -1)) { 2155 ip1dbg(("ndp_input: pullupmsg failed\n")); 2156 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2157 goto done; 2158 } 2159 ip6h = (ip6_t *)mp->b_rptr; 2160 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2161 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2162 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2163 goto done; 2164 } 2165 /* 2166 * NDP does not accept any extension headers between the 2167 * IP header and the ICMP header since e.g. a routing 2168 * header could be dangerous. 2169 * This assumes that any AH or ESP headers are removed 2170 * by ip prior to passing the packet to ndp_input. 2171 */ 2172 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2173 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2174 ip6h->ip6_nxt)); 2175 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2176 goto done; 2177 } 2178 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2179 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2180 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2181 if (icmp_nd->icmp6_code != 0) { 2182 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2183 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2184 goto done; 2185 } 2186 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2187 /* 2188 * Make sure packet length is large enough for either 2189 * a NS or a NA icmp packet. 2190 */ 2191 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2192 ip1dbg(("ndp_input: packet too short\n")); 2193 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2194 goto done; 2195 } 2196 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2197 ndp_input_solicit(ill, mp, dl_mp); 2198 } else { 2199 ndp_input_advert(ill, mp, dl_mp); 2200 } 2201 done: 2202 freemsg(mp); 2203 } 2204 2205 /* 2206 * nce_xmit is called to form and transmit a ND solicitation or 2207 * advertisement ICMP packet. 2208 * 2209 * If the source address is unspecified and this isn't a probe (used for 2210 * duplicate address detection), an appropriate source address and link layer 2211 * address will be chosen here. The link layer address option is included if 2212 * the source is specified (i.e., all non-probe packets), and omitted (per the 2213 * specification) otherwise. 2214 * 2215 * It returns B_FALSE only if it does a successful put() to the 2216 * corresponding ill's ill_wq otherwise returns B_TRUE. 2217 */ 2218 static boolean_t 2219 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 2220 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 2221 int flag) 2222 { 2223 uint32_t len; 2224 icmp6_t *icmp6; 2225 mblk_t *mp; 2226 ip6_t *ip6h; 2227 nd_opt_hdr_t *opt; 2228 uint_t plen; 2229 ip6i_t *ip6i; 2230 ipif_t *src_ipif = NULL; 2231 uint8_t *hw_addr; 2232 zoneid_t zoneid = GLOBAL_ZONEID; 2233 2234 /* 2235 * If we have a unspecified source(sender) address, select a 2236 * proper source address for the solicitation here itself so 2237 * that we can initialize the h/w address correctly. This is 2238 * needed for interface groups as source address can come from 2239 * the whole group and the h/w address initialized from ill will 2240 * be wrong if the source address comes from a different ill. 2241 * 2242 * If the sender is specified then we use this address in order 2243 * to lookup the zoneid before calling ip_output_v6(). This is to 2244 * enable unicast ND_NEIGHBOR_ADVERT packets to be routed correctly 2245 * by IP (we cannot guarantee that the global zone has an interface 2246 * route to the destination). 2247 * 2248 * Note that the NA never comes here with the unspecified source 2249 * address. The following asserts that whenever the source 2250 * address is specified, the haddr also should be specified. 2251 */ 2252 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 2253 2254 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2255 ASSERT(operation != ND_NEIGHBOR_ADVERT); 2256 /* 2257 * Pick a source address for this solicitation, but 2258 * restrict the selection to addresses assigned to the 2259 * output interface (or interface group). We do this 2260 * because the destination will create a neighbor cache 2261 * entry for the source address of this packet, so the 2262 * source address had better be a valid neighbor. 2263 */ 2264 src_ipif = ipif_select_source_v6(ill, target, RESTRICT_TO_ILL, 2265 IPV6_PREFER_SRC_DEFAULT, ALL_ZONES); 2266 if (src_ipif == NULL) { 2267 char buf[INET6_ADDRSTRLEN]; 2268 2269 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2270 inet_ntop(AF_INET6, (char *)target, buf, 2271 sizeof (buf)))); 2272 return (B_TRUE); 2273 } 2274 sender = &src_ipif->ipif_v6src_addr; 2275 hwaddr_ill = src_ipif->ipif_ill; 2276 } else if (!(IN6_IS_ADDR_UNSPECIFIED(sender))) { 2277 zoneid = ipif_lookup_addr_zoneid_v6(sender, ill, ill->ill_ipst); 2278 /* 2279 * It's possible for ipif_lookup_addr_zoneid_v6() to return 2280 * ALL_ZONES if it cannot find a matching ipif for the address 2281 * we are trying to use. In this case we err on the side of 2282 * trying to send the packet by defaulting to the GLOBAL_ZONEID. 2283 */ 2284 if (zoneid == ALL_ZONES) 2285 zoneid = GLOBAL_ZONEID; 2286 } 2287 2288 /* 2289 * Always make sure that the NS/NA packets don't get load 2290 * spread. This is needed so that the probe packets sent 2291 * by the in.mpathd daemon can really go out on the desired 2292 * interface. Probe packets are made to go out on a desired 2293 * interface by including a ip6i with ATTACH_IF flag. As these 2294 * packets indirectly end up sending/receiving NS/NA packets 2295 * (neighbor doing NUD), we have to make sure that NA 2296 * also go out on the same interface. 2297 */ 2298 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7) / 8; 2299 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2300 plen * 8; 2301 mp = allocb(len, BPRI_LO); 2302 if (mp == NULL) { 2303 if (src_ipif != NULL) 2304 ipif_refrele(src_ipif); 2305 return (B_TRUE); 2306 } 2307 bzero((char *)mp->b_rptr, len); 2308 mp->b_wptr = mp->b_rptr + len; 2309 2310 ip6i = (ip6i_t *)mp->b_rptr; 2311 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2312 ip6i->ip6i_nxt = IPPROTO_RAW; 2313 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2314 if (flag & NDP_PROBE) 2315 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2316 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2317 2318 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2319 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2320 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2321 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2322 ip6h->ip6_hops = IPV6_MAX_HOPS; 2323 ip6h->ip6_dst = *target; 2324 icmp6 = (icmp6_t *)&ip6h[1]; 2325 2326 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2327 sizeof (nd_neighbor_advert_t)); 2328 2329 if (operation == ND_NEIGHBOR_SOLICIT) { 2330 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2331 2332 if (!(flag & NDP_PROBE)) 2333 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2334 ip6h->ip6_src = *sender; 2335 ns->nd_ns_target = *target; 2336 if (!(flag & NDP_UNICAST)) { 2337 /* Form multicast address of the target */ 2338 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2339 ip6h->ip6_dst.s6_addr32[3] |= 2340 ns->nd_ns_target.s6_addr32[3]; 2341 } 2342 } else { 2343 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2344 2345 ASSERT(!(flag & NDP_PROBE)); 2346 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2347 ip6h->ip6_src = *sender; 2348 na->nd_na_target = *sender; 2349 if (flag & NDP_ISROUTER) 2350 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2351 if (flag & NDP_SOLICITED) 2352 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2353 if (flag & NDP_ORIDE) 2354 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2355 } 2356 2357 hw_addr = NULL; 2358 if (!(flag & NDP_PROBE)) { 2359 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2360 hwaddr_ill->ill_phys_addr; 2361 if (hw_addr != NULL) { 2362 /* Fill in link layer address and option len */ 2363 opt->nd_opt_len = (uint8_t)plen; 2364 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2365 } 2366 } 2367 if (hw_addr == NULL) { 2368 /* If there's no link layer address option, then strip it. */ 2369 len -= plen * 8; 2370 mp->b_wptr = mp->b_rptr + len; 2371 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2372 } 2373 2374 icmp6->icmp6_type = (uint8_t)operation; 2375 icmp6->icmp6_code = 0; 2376 /* 2377 * Prepare for checksum by putting icmp length in the icmp 2378 * checksum field. The checksum is calculated in ip_wput_v6. 2379 */ 2380 icmp6->icmp6_cksum = ip6h->ip6_plen; 2381 2382 if (src_ipif != NULL) 2383 ipif_refrele(src_ipif); 2384 2385 ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT); 2386 return (B_FALSE); 2387 } 2388 2389 /* 2390 * Make a link layer address (does not include the SAP) from an nce. 2391 * To form the link layer address, use the last four bytes of ipv6 2392 * address passed in and the fixed offset stored in nce. 2393 */ 2394 static void 2395 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2396 { 2397 uchar_t *mask, *to; 2398 ill_t *ill = nce->nce_ill; 2399 int len; 2400 2401 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2402 return; 2403 ASSERT(nce->nce_res_mp != NULL); 2404 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2405 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2406 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2407 ASSERT(addr != NULL); 2408 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2409 addrpos, ill->ill_nd_lla_len); 2410 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2411 IPV6_ADDR_LEN); 2412 mask = (uchar_t *)&nce->nce_extract_mask; 2413 mask += (IPV6_ADDR_LEN - len); 2414 addr += (IPV6_ADDR_LEN - len); 2415 to = addrpos + nce->nce_ll_extract_start; 2416 while (len-- > 0) 2417 *to++ |= *mask++ & *addr++; 2418 } 2419 2420 /* 2421 * Pass a cache report back out via NDD. 2422 */ 2423 /* ARGSUSED */ 2424 int 2425 ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 2426 { 2427 ip_stack_t *ipst; 2428 2429 if (CONN_Q(q)) 2430 ipst = CONNQ_TO_IPST(q); 2431 else 2432 ipst = ILLQ_TO_IPST(q); 2433 2434 (void) mi_mpprintf(mp, "ifname hardware addr flags" 2435 " proto addr/mask"); 2436 ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp, ipst); 2437 return (0); 2438 } 2439 2440 /* 2441 * Add a single line to the NDP Cache Entry Report. 2442 */ 2443 static void 2444 nce_report1(nce_t *nce, uchar_t *mp_arg) 2445 { 2446 ill_t *ill = nce->nce_ill; 2447 char local_buf[INET6_ADDRSTRLEN]; 2448 uchar_t flags_buf[10]; 2449 uint32_t flags = nce->nce_flags; 2450 mblk_t *mp = (mblk_t *)mp_arg; 2451 uchar_t *h; 2452 uchar_t *m = flags_buf; 2453 in6_addr_t v6addr; 2454 uint64_t now; 2455 2456 /* 2457 * Lock the nce to protect nce_res_mp from being changed 2458 * if an external resolver address resolution completes 2459 * while nce_res_mp is being accessed here. 2460 * 2461 * Deal with all address formats, not just Ethernet-specific 2462 * In addition, make sure that the mblk has enough space 2463 * before writing to it. If is doesn't, allocate a new one. 2464 */ 2465 if (nce->nce_ipversion == IPV4_VERSION) { 2466 /* 2467 * Don't include v4 NCEs in NDP cache entry report. 2468 * But sanity check for lingering ND_INITIAL entries 2469 * when we do 'ndd -get /dev/ip ip_ndp_cache_report' 2470 */ 2471 if (nce->nce_state == ND_INITIAL) { 2472 2473 now = TICK_TO_MSEC(lbolt64); 2474 if (now - nce->nce_init_time > NCE_STUCK_TIMEOUT) { 2475 DTRACE_PROBE1(nce__stuck, nce_t *, nce); 2476 } 2477 } 2478 return; 2479 } 2480 2481 ASSERT(ill != NULL); 2482 v6addr = nce->nce_mask; 2483 if (flags & NCE_F_PERMANENT) 2484 *m++ = 'P'; 2485 if (flags & NCE_F_ISROUTER) 2486 *m++ = 'R'; 2487 if (flags & NCE_F_MAPPING) 2488 *m++ = 'M'; 2489 *m = '\0'; 2490 2491 if (ill->ill_net_type == IRE_IF_RESOLVER) { 2492 size_t addrlen; 2493 char *addr_buf; 2494 dl_unitdata_req_t *dl; 2495 2496 mutex_enter(&nce->nce_lock); 2497 h = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2498 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 2499 if (ill->ill_flags & ILLF_XRESOLV) 2500 addrlen = (3 * (dl->dl_dest_addr_length)); 2501 else 2502 addrlen = (3 * (ill->ill_nd_lla_len)); 2503 if (addrlen <= 0) { 2504 mutex_exit(&nce->nce_lock); 2505 (void) mi_mpprintf(mp, 2506 "%8s %9s %5s %s/%d", 2507 ill->ill_name, 2508 "None", 2509 (uchar_t *)&flags_buf, 2510 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2511 (char *)local_buf, sizeof (local_buf)), 2512 ip_mask_to_plen_v6(&v6addr)); 2513 } else { 2514 /* 2515 * Convert the hardware/lla address to ascii 2516 */ 2517 addr_buf = kmem_zalloc(addrlen, KM_NOSLEEP); 2518 if (addr_buf == NULL) { 2519 mutex_exit(&nce->nce_lock); 2520 return; 2521 } 2522 (void) mac_colon_addr((uint8_t *)h, 2523 (ill->ill_flags & ILLF_XRESOLV) ? 2524 dl->dl_dest_addr_length : ill->ill_nd_lla_len, 2525 addr_buf, addrlen); 2526 mutex_exit(&nce->nce_lock); 2527 (void) mi_mpprintf(mp, "%8s %17s %5s %s/%d", 2528 ill->ill_name, addr_buf, (uchar_t *)&flags_buf, 2529 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2530 (char *)local_buf, sizeof (local_buf)), 2531 ip_mask_to_plen_v6(&v6addr)); 2532 kmem_free(addr_buf, addrlen); 2533 } 2534 } else { 2535 (void) mi_mpprintf(mp, 2536 "%8s %9s %5s %s/%d", 2537 ill->ill_name, 2538 "None", 2539 (uchar_t *)&flags_buf, 2540 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2541 (char *)local_buf, sizeof (local_buf)), 2542 ip_mask_to_plen_v6(&v6addr)); 2543 } 2544 } 2545 2546 mblk_t * 2547 nce_udreq_alloc(ill_t *ill) 2548 { 2549 mblk_t *template_mp = NULL; 2550 dl_unitdata_req_t *dlur; 2551 int sap_length; 2552 2553 ASSERT(ill->ill_isv6); 2554 2555 sap_length = ill->ill_sap_length; 2556 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2557 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2558 if (template_mp == NULL) 2559 return (NULL); 2560 2561 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2562 dlur->dl_priority.dl_min = 0; 2563 dlur->dl_priority.dl_max = 0; 2564 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2565 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2566 2567 /* Copy in the SAP value. */ 2568 NCE_LL_SAP_COPY(ill, template_mp); 2569 2570 return (template_mp); 2571 } 2572 2573 /* 2574 * NDP retransmit timer. 2575 * This timer goes off when: 2576 * a. It is time to retransmit NS for resolver. 2577 * b. It is time to send reachability probes. 2578 */ 2579 void 2580 ndp_timer(void *arg) 2581 { 2582 nce_t *nce = arg; 2583 ill_t *ill = nce->nce_ill; 2584 uint32_t ms; 2585 char addrbuf[INET6_ADDRSTRLEN]; 2586 mblk_t *mp; 2587 boolean_t dropped = B_FALSE; 2588 ip_stack_t *ipst = ill->ill_ipst; 2589 2590 /* 2591 * The timer has to be cancelled by ndp_delete before doing the final 2592 * refrele. So the NCE is guaranteed to exist when the timer runs 2593 * until it clears the timeout_id. Before clearing the timeout_id 2594 * bump up the refcnt so that we can continue to use the nce 2595 */ 2596 ASSERT(nce != NULL); 2597 2598 /* 2599 * Grab the ill_g_lock now itself to avoid lock order problems. 2600 * nce_solicit needs ill_g_lock to be able to traverse ills 2601 */ 2602 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2603 mutex_enter(&nce->nce_lock); 2604 NCE_REFHOLD_LOCKED(nce); 2605 nce->nce_timeout_id = 0; 2606 2607 /* 2608 * Check the reachability state first. 2609 */ 2610 switch (nce->nce_state) { 2611 case ND_DELAY: 2612 rw_exit(&ipst->ips_ill_g_lock); 2613 nce->nce_state = ND_PROBE; 2614 mutex_exit(&nce->nce_lock); 2615 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2616 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2617 if (ip_debug > 3) { 2618 /* ip2dbg */ 2619 pr_addr_dbg("ndp_timer: state for %s changed " 2620 "to PROBE\n", AF_INET6, &nce->nce_addr); 2621 } 2622 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2623 NCE_REFRELE(nce); 2624 return; 2625 case ND_PROBE: 2626 /* must be retransmit timer */ 2627 rw_exit(&ipst->ips_ill_g_lock); 2628 nce->nce_pcnt--; 2629 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2630 nce->nce_pcnt >= -1); 2631 if (nce->nce_pcnt > 0) { 2632 /* 2633 * As per RFC2461, the nce gets deleted after 2634 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2635 * Note that the first unicast solicitation is sent 2636 * during the DELAY state. 2637 */ 2638 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2639 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2640 addrbuf, sizeof (addrbuf)))); 2641 mutex_exit(&nce->nce_lock); 2642 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, 2643 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, 2644 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2645 NDP_UNICAST); 2646 if (dropped) { 2647 mutex_enter(&nce->nce_lock); 2648 nce->nce_pcnt++; 2649 mutex_exit(&nce->nce_lock); 2650 } 2651 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2652 } else if (nce->nce_pcnt < 0) { 2653 /* No hope, delete the nce */ 2654 nce->nce_state = ND_UNREACHABLE; 2655 mutex_exit(&nce->nce_lock); 2656 if (ip_debug > 2) { 2657 /* ip1dbg */ 2658 pr_addr_dbg("ndp_timer: Delete IRE for" 2659 " dst %s\n", AF_INET6, &nce->nce_addr); 2660 } 2661 ndp_delete(nce); 2662 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2663 /* Wait RetransTimer, before deleting the entry */ 2664 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2665 nce->nce_pcnt, inet_ntop(AF_INET6, 2666 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2667 mutex_exit(&nce->nce_lock); 2668 /* Wait one interval before killing */ 2669 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2670 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2671 ipif_t *ipif; 2672 2673 /* 2674 * We're done probing, and we can now declare this 2675 * address to be usable. Let IP know that it's ok to 2676 * use. 2677 */ 2678 nce->nce_state = ND_REACHABLE; 2679 mutex_exit(&nce->nce_lock); 2680 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, 2681 ALL_ZONES, NULL, NULL, NULL, NULL, ipst); 2682 if (ipif != NULL) { 2683 if (ipif->ipif_was_dup) { 2684 char ibuf[LIFNAMSIZ + 10]; 2685 char sbuf[INET6_ADDRSTRLEN]; 2686 2687 ipif->ipif_was_dup = B_FALSE; 2688 (void) inet_ntop(AF_INET6, 2689 &ipif->ipif_v6lcl_addr, 2690 sbuf, sizeof (sbuf)); 2691 ipif_get_name(ipif, ibuf, 2692 sizeof (ibuf)); 2693 cmn_err(CE_NOTE, "recovered address " 2694 "%s on %s", sbuf, ibuf); 2695 } 2696 if ((ipif->ipif_flags & IPIF_UP) && 2697 !ipif->ipif_addr_ready) { 2698 ip_rts_ifmsg(ipif); 2699 ip_rts_newaddrmsg(RTM_ADD, 0, ipif); 2700 sctp_update_ipif(ipif, SCTP_IPIF_UP); 2701 } 2702 ipif->ipif_addr_ready = 1; 2703 ipif_refrele(ipif); 2704 } 2705 /* Begin defending our new address */ 2706 nce->nce_unsolicit_count = 0; 2707 dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, 2708 B_FALSE, &nce->nce_addr, &ipv6_all_hosts_mcast, 2709 nce_advert_flags(nce)); 2710 if (dropped) { 2711 nce->nce_unsolicit_count = 1; 2712 NDP_RESTART_TIMER(nce, 2713 ipst->ips_ip_ndp_unsolicit_interval); 2714 } else if (ipst->ips_ip_ndp_defense_interval != 0) { 2715 NDP_RESTART_TIMER(nce, 2716 ipst->ips_ip_ndp_defense_interval); 2717 } 2718 } else { 2719 /* 2720 * This is an address we're probing to be our own, but 2721 * the ill is down. Wait until it comes back before 2722 * doing anything, but switch to reachable state so 2723 * that the restart will work. 2724 */ 2725 nce->nce_state = ND_REACHABLE; 2726 mutex_exit(&nce->nce_lock); 2727 } 2728 NCE_REFRELE(nce); 2729 return; 2730 case ND_INCOMPLETE: 2731 /* 2732 * Must be resolvers retransmit timer. 2733 */ 2734 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2735 ip6i_t *ip6i; 2736 ip6_t *ip6h; 2737 mblk_t *data_mp; 2738 2739 /* 2740 * Walk the list of packets queued, and see if there 2741 * are any multipathing probe packets. Such packets 2742 * are always queued at the head. Since this is a 2743 * retransmit timer firing, mark such packets as 2744 * delayed in ND resolution. This info will be used 2745 * in ip_wput_v6(). Multipathing probe packets will 2746 * always have an ip6i_t. Once we hit a packet without 2747 * it, we can break out of this loop. 2748 */ 2749 if (mp->b_datap->db_type == M_CTL) 2750 data_mp = mp->b_cont; 2751 else 2752 data_mp = mp; 2753 2754 ip6h = (ip6_t *)data_mp->b_rptr; 2755 if (ip6h->ip6_nxt != IPPROTO_RAW) 2756 break; 2757 2758 /* 2759 * This message should have been pulled up already in 2760 * ip_wput_v6. We can't do pullups here because the 2761 * b_next/b_prev is non-NULL. 2762 */ 2763 ip6i = (ip6i_t *)ip6h; 2764 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2765 sizeof (ip6i_t) + IPV6_HDR_LEN); 2766 2767 /* Mark this packet as delayed due to ND resolution */ 2768 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2769 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2770 } 2771 if (nce->nce_qd_mp != NULL) { 2772 ms = nce_solicit(nce, NULL); 2773 rw_exit(&ipst->ips_ill_g_lock); 2774 if (ms == 0) { 2775 if (nce->nce_state != ND_REACHABLE) { 2776 mutex_exit(&nce->nce_lock); 2777 nce_resolv_failed(nce); 2778 ndp_delete(nce); 2779 } else { 2780 mutex_exit(&nce->nce_lock); 2781 } 2782 } else { 2783 mutex_exit(&nce->nce_lock); 2784 NDP_RESTART_TIMER(nce, (clock_t)ms); 2785 } 2786 NCE_REFRELE(nce); 2787 return; 2788 } 2789 mutex_exit(&nce->nce_lock); 2790 rw_exit(&ipst->ips_ill_g_lock); 2791 NCE_REFRELE(nce); 2792 break; 2793 case ND_REACHABLE : 2794 rw_exit(&ipst->ips_ill_g_lock); 2795 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2796 nce->nce_unsolicit_count != 0) || 2797 ((nce->nce_flags & NCE_F_PERMANENT) && 2798 ipst->ips_ip_ndp_defense_interval != 0)) { 2799 if (nce->nce_unsolicit_count > 0) 2800 nce->nce_unsolicit_count--; 2801 mutex_exit(&nce->nce_lock); 2802 dropped = nce_xmit(ill, 2803 ND_NEIGHBOR_ADVERT, 2804 ill, /* ill to be used for hw addr */ 2805 B_FALSE, /* use ill_phys_addr */ 2806 &nce->nce_addr, 2807 &ipv6_all_hosts_mcast, 2808 nce_advert_flags(nce)); 2809 if (dropped) { 2810 mutex_enter(&nce->nce_lock); 2811 nce->nce_unsolicit_count++; 2812 mutex_exit(&nce->nce_lock); 2813 } 2814 if (nce->nce_unsolicit_count != 0) { 2815 NDP_RESTART_TIMER(nce, 2816 ipst->ips_ip_ndp_unsolicit_interval); 2817 } else { 2818 NDP_RESTART_TIMER(nce, 2819 ipst->ips_ip_ndp_defense_interval); 2820 } 2821 } else { 2822 mutex_exit(&nce->nce_lock); 2823 } 2824 NCE_REFRELE(nce); 2825 break; 2826 default: 2827 rw_exit(&ipst->ips_ill_g_lock); 2828 mutex_exit(&nce->nce_lock); 2829 NCE_REFRELE(nce); 2830 break; 2831 } 2832 } 2833 2834 /* 2835 * Set a link layer address from the ll_addr passed in. 2836 * Copy SAP from ill. 2837 */ 2838 static void 2839 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2840 { 2841 ill_t *ill = nce->nce_ill; 2842 uchar_t *woffset; 2843 2844 ASSERT(ll_addr != NULL); 2845 /* Always called before fast_path_probe */ 2846 ASSERT(nce->nce_fp_mp == NULL); 2847 if (ill->ill_sap_length != 0) { 2848 /* 2849 * Copy the SAP type specified in the 2850 * request into the xmit template. 2851 */ 2852 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2853 } 2854 if (ill->ill_phys_addr_length > 0) { 2855 /* 2856 * The bcopy() below used to be called for the physical address 2857 * length rather than the link layer address length. For 2858 * ethernet and many other media, the phys_addr and lla are 2859 * identical. 2860 * However, with xresolv interfaces being introduced, the 2861 * phys_addr and lla are no longer the same, and the physical 2862 * address may not have any useful meaning, so we use the lla 2863 * for IPv6 address resolution and destination addressing. 2864 * 2865 * For PPP or other interfaces with a zero length 2866 * physical address, don't do anything here. 2867 * The bcopy() with a zero phys_addr length was previously 2868 * a no-op for interfaces with a zero-length physical address. 2869 * Using the lla for them would change the way they operate. 2870 * Doing nothing in such cases preserves expected behavior. 2871 */ 2872 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2873 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2874 } 2875 } 2876 2877 static boolean_t 2878 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2879 { 2880 ill_t *ill = nce->nce_ill; 2881 uchar_t *ll_offset; 2882 2883 ASSERT(nce->nce_res_mp != NULL); 2884 if (ll_addr == NULL) 2885 return (B_FALSE); 2886 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2887 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2888 return (B_TRUE); 2889 return (B_FALSE); 2890 } 2891 2892 /* 2893 * Updates the link layer address or the reachability state of 2894 * a cache entry. Reset probe counter if needed. 2895 */ 2896 static void 2897 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2898 { 2899 ill_t *ill = nce->nce_ill; 2900 boolean_t need_stop_timer = B_FALSE; 2901 boolean_t need_fastpath_update = B_FALSE; 2902 2903 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2904 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2905 /* 2906 * If this interface does not do NUD, there is no point 2907 * in allowing an update to the cache entry. Although 2908 * we will respond to NS. 2909 * The only time we accept an update for a resolver when 2910 * NUD is turned off is when it has just been created. 2911 * Non-Resolvers will always be created as REACHABLE. 2912 */ 2913 if (new_state != ND_UNCHANGED) { 2914 if ((nce->nce_flags & NCE_F_NONUD) && 2915 (nce->nce_state != ND_INCOMPLETE)) 2916 return; 2917 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2918 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2919 need_stop_timer = B_TRUE; 2920 if (new_state == ND_REACHABLE) 2921 nce->nce_last = TICK_TO_MSEC(lbolt64); 2922 else { 2923 /* We force NUD in this case */ 2924 nce->nce_last = 0; 2925 } 2926 nce->nce_state = new_state; 2927 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2928 } 2929 /* 2930 * In case of fast path we need to free the the fastpath 2931 * M_DATA and do another probe. Otherwise we can just 2932 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2933 * whatever packets that happens to be transmitting at the time. 2934 */ 2935 if (new_ll_addr != NULL) { 2936 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2937 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2938 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2939 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2940 if (nce->nce_fp_mp != NULL) { 2941 freemsg(nce->nce_fp_mp); 2942 nce->nce_fp_mp = NULL; 2943 } 2944 need_fastpath_update = B_TRUE; 2945 } 2946 mutex_exit(&nce->nce_lock); 2947 if (need_stop_timer) { 2948 (void) untimeout(nce->nce_timeout_id); 2949 nce->nce_timeout_id = 0; 2950 } 2951 if (need_fastpath_update) 2952 nce_fastpath(nce); 2953 mutex_enter(&nce->nce_lock); 2954 } 2955 2956 void 2957 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2958 { 2959 uint_t count = 0; 2960 mblk_t **mpp; 2961 2962 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2963 2964 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2965 mpp = &(*mpp)->b_next) { 2966 if (++count > 2967 nce->nce_ill->ill_max_buf) { 2968 mblk_t *tmp = nce->nce_qd_mp->b_next; 2969 2970 nce->nce_qd_mp->b_next = NULL; 2971 nce->nce_qd_mp->b_prev = NULL; 2972 freemsg(nce->nce_qd_mp); 2973 nce->nce_qd_mp = tmp; 2974 } 2975 } 2976 /* put this on the list */ 2977 if (head_insert) { 2978 mp->b_next = nce->nce_qd_mp; 2979 nce->nce_qd_mp = mp; 2980 } else { 2981 *mpp = mp; 2982 } 2983 } 2984 2985 static void 2986 nce_queue_mp(nce_t *nce, mblk_t *mp) 2987 { 2988 boolean_t head_insert = B_FALSE; 2989 ip6_t *ip6h; 2990 ip6i_t *ip6i; 2991 mblk_t *data_mp; 2992 2993 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2994 2995 if (mp->b_datap->db_type == M_CTL) 2996 data_mp = mp->b_cont; 2997 else 2998 data_mp = mp; 2999 ip6h = (ip6_t *)data_mp->b_rptr; 3000 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3001 /* 3002 * This message should have been pulled up already in 3003 * ip_wput_v6. We can't do pullups here because the message 3004 * could be from the nce_qd_mp which could have b_next/b_prev 3005 * non-NULL. 3006 */ 3007 ip6i = (ip6i_t *)ip6h; 3008 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 3009 sizeof (ip6i_t) + IPV6_HDR_LEN); 3010 /* 3011 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 3012 * This has 2 aspects mentioned below. 3013 * 1. Perform head insertion in the nce_qd_mp for these packets. 3014 * This ensures that next retransmit of ND solicitation 3015 * will use the interface specified by the probe packet, 3016 * for both NS and NA. This corresponds to the src address 3017 * in the IPv6 packet. If we insert at tail, we will be 3018 * depending on the packet at the head for successful 3019 * ND resolution. This is not reliable, because the interface 3020 * on which the NA arrives could be different from the interface 3021 * on which the NS was sent, and if the receiving interface is 3022 * failed, it will appear that the sending interface is also 3023 * failed, causing in.mpathd to misdiagnose this as link 3024 * failure. 3025 * 2. Drop the original packet, if the ND resolution did not 3026 * succeed in the first attempt. However we will create the 3027 * nce and the ire, as soon as the ND resolution succeeds. 3028 * We don't gain anything by queueing multiple probe packets 3029 * and sending them back-to-back once resolution succeeds. 3030 * It is sufficient to send just 1 packet after ND resolution 3031 * succeeds. Since mpathd is sending down probe packets at a 3032 * constant rate, we don't need to send the queued packet. We 3033 * need to queue it only for NDP resolution. The benefit of 3034 * dropping the probe packets that were delayed in ND 3035 * resolution, is that in.mpathd will not see inflated 3036 * RTT. If the ND resolution does not succeed within 3037 * in.mpathd's failure detection time, mpathd may detect 3038 * a failure, and it does not matter whether the packet 3039 * was queued or dropped. 3040 */ 3041 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 3042 head_insert = B_TRUE; 3043 } 3044 3045 nce_queue_mp_common(nce, mp, head_insert); 3046 } 3047 3048 /* 3049 * Called when address resolution failed due to a timeout. 3050 * Send an ICMP unreachable in response to all queued packets. 3051 */ 3052 void 3053 nce_resolv_failed(nce_t *nce) 3054 { 3055 mblk_t *mp, *nxt_mp, *first_mp; 3056 char buf[INET6_ADDRSTRLEN]; 3057 ip6_t *ip6h; 3058 zoneid_t zoneid = GLOBAL_ZONEID; 3059 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3060 3061 ip1dbg(("nce_resolv_failed: dst %s\n", 3062 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 3063 mutex_enter(&nce->nce_lock); 3064 mp = nce->nce_qd_mp; 3065 nce->nce_qd_mp = NULL; 3066 mutex_exit(&nce->nce_lock); 3067 while (mp != NULL) { 3068 nxt_mp = mp->b_next; 3069 mp->b_next = NULL; 3070 mp->b_prev = NULL; 3071 3072 first_mp = mp; 3073 if (mp->b_datap->db_type == M_CTL) { 3074 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 3075 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3076 zoneid = io->ipsec_out_zoneid; 3077 ASSERT(zoneid != ALL_ZONES); 3078 mp = mp->b_cont; 3079 } 3080 3081 ip6h = (ip6_t *)mp->b_rptr; 3082 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3083 ip6i_t *ip6i; 3084 /* 3085 * This message should have been pulled up already 3086 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 3087 * the header is pulled up. 3088 */ 3089 ip6i = (ip6i_t *)ip6h; 3090 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 3091 sizeof (ip6i_t) + IPV6_HDR_LEN); 3092 mp->b_rptr += sizeof (ip6i_t); 3093 } 3094 /* 3095 * Ignore failure since icmp_unreachable_v6 will silently 3096 * drop packets with an unspecified source address. 3097 */ 3098 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst); 3099 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 3100 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst); 3101 mp = nxt_mp; 3102 } 3103 } 3104 3105 /* 3106 * Called by SIOCSNDP* ioctl to add/change an nce entry 3107 * and the corresponding attributes. 3108 * Disallow states other than ND_REACHABLE or ND_STALE. 3109 */ 3110 int 3111 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 3112 { 3113 sin6_t *sin6; 3114 in6_addr_t *addr; 3115 nce_t *nce; 3116 int err; 3117 uint16_t new_flags = 0; 3118 uint16_t old_flags = 0; 3119 int inflags = lnr->lnr_flags; 3120 ip_stack_t *ipst = ill->ill_ipst; 3121 3122 ASSERT(ill->ill_isv6); 3123 if ((lnr->lnr_state_create != ND_REACHABLE) && 3124 (lnr->lnr_state_create != ND_STALE)) 3125 return (EINVAL); 3126 3127 sin6 = (sin6_t *)&lnr->lnr_addr; 3128 addr = &sin6->sin6_addr; 3129 3130 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 3131 /* We know it can not be mapping so just look in the hash table */ 3132 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 3133 nce = nce_lookup_addr(ill, addr, nce); 3134 if (nce != NULL) 3135 new_flags = nce->nce_flags; 3136 3137 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 3138 case NDF_ISROUTER_ON: 3139 new_flags |= NCE_F_ISROUTER; 3140 break; 3141 case NDF_ISROUTER_OFF: 3142 new_flags &= ~NCE_F_ISROUTER; 3143 break; 3144 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3145 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3146 if (nce != NULL) 3147 NCE_REFRELE(nce); 3148 return (EINVAL); 3149 } 3150 3151 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3152 case NDF_ANYCAST_ON: 3153 new_flags |= NCE_F_ANYCAST; 3154 break; 3155 case NDF_ANYCAST_OFF: 3156 new_flags &= ~NCE_F_ANYCAST; 3157 break; 3158 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3159 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3160 if (nce != NULL) 3161 NCE_REFRELE(nce); 3162 return (EINVAL); 3163 } 3164 3165 if (nce == NULL) { 3166 err = ndp_add_v6(ill, 3167 (uchar_t *)lnr->lnr_hdw_addr, 3168 addr, 3169 &ipv6_all_ones, 3170 &ipv6_all_zeros, 3171 0, 3172 new_flags, 3173 lnr->lnr_state_create, 3174 &nce); 3175 if (err != 0) { 3176 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3177 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3178 return (err); 3179 } 3180 } 3181 old_flags = nce->nce_flags; 3182 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3183 /* 3184 * Router turned to host, delete all ires. 3185 * XXX Just delete the entry, but we need to add too. 3186 */ 3187 nce->nce_flags &= ~NCE_F_ISROUTER; 3188 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3189 ndp_delete(nce); 3190 NCE_REFRELE(nce); 3191 return (0); 3192 } 3193 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3194 3195 mutex_enter(&nce->nce_lock); 3196 nce->nce_flags = new_flags; 3197 mutex_exit(&nce->nce_lock); 3198 /* 3199 * Note that we ignore the state at this point, which 3200 * should be either STALE or REACHABLE. Instead we let 3201 * the link layer address passed in to determine the state 3202 * much like incoming packets. 3203 */ 3204 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3205 NCE_REFRELE(nce); 3206 return (0); 3207 } 3208 3209 /* 3210 * If the device driver supports it, we make nce_fp_mp to have 3211 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3212 * The caller ensures there is hold on nce for this function. 3213 * Note that since ill_fastpath_probe() copies the mblk there is 3214 * no need for the hold beyond this function. 3215 */ 3216 void 3217 nce_fastpath(nce_t *nce) 3218 { 3219 ill_t *ill = nce->nce_ill; 3220 int res; 3221 3222 ASSERT(ill != NULL); 3223 ASSERT(nce->nce_state != ND_INITIAL && nce->nce_state != ND_INCOMPLETE); 3224 3225 if (nce->nce_fp_mp != NULL) { 3226 /* Already contains fastpath info */ 3227 return; 3228 } 3229 if (nce->nce_res_mp != NULL) { 3230 nce_fastpath_list_add(nce); 3231 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3232 /* 3233 * EAGAIN is an indication of a transient error 3234 * i.e. allocation failure etc. leave the nce in the list it 3235 * will be updated when another probe happens for another ire 3236 * if not it will be taken out of the list when the ire is 3237 * deleted. 3238 */ 3239 3240 if (res != 0 && res != EAGAIN) 3241 nce_fastpath_list_delete(nce); 3242 } 3243 } 3244 3245 /* 3246 * Drain the list of nce's waiting for fastpath response. 3247 */ 3248 void 3249 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3250 void *arg) 3251 { 3252 3253 nce_t *next_nce; 3254 nce_t *current_nce; 3255 nce_t *first_nce; 3256 nce_t *prev_nce = NULL; 3257 3258 mutex_enter(&ill->ill_lock); 3259 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3260 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3261 next_nce = current_nce->nce_fastpath; 3262 /* 3263 * Take it off the list if we're flushing, or if the callback 3264 * routine tells us to do so. Otherwise, leave the nce in the 3265 * fastpath list to handle any pending response from the lower 3266 * layer. We can't drain the list when the callback routine 3267 * comparison failed, because the response is asynchronous in 3268 * nature, and may not arrive in the same order as the list 3269 * insertion. 3270 */ 3271 if (func == NULL || func(current_nce, arg)) { 3272 current_nce->nce_fastpath = NULL; 3273 if (current_nce == first_nce) 3274 ill->ill_fastpath_list = first_nce = next_nce; 3275 else 3276 prev_nce->nce_fastpath = next_nce; 3277 } else { 3278 /* previous element that is still in the list */ 3279 prev_nce = current_nce; 3280 } 3281 current_nce = next_nce; 3282 } 3283 mutex_exit(&ill->ill_lock); 3284 } 3285 3286 /* 3287 * Add nce to the nce fastpath list. 3288 */ 3289 void 3290 nce_fastpath_list_add(nce_t *nce) 3291 { 3292 ill_t *ill; 3293 3294 ill = nce->nce_ill; 3295 3296 mutex_enter(&ill->ill_lock); 3297 mutex_enter(&nce->nce_lock); 3298 3299 /* 3300 * if nce has not been deleted and 3301 * is not already in the list add it. 3302 */ 3303 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3304 (nce->nce_fastpath == NULL)) { 3305 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3306 ill->ill_fastpath_list = nce; 3307 } 3308 3309 mutex_exit(&nce->nce_lock); 3310 mutex_exit(&ill->ill_lock); 3311 } 3312 3313 /* 3314 * remove nce from the nce fastpath list. 3315 */ 3316 void 3317 nce_fastpath_list_delete(nce_t *nce) 3318 { 3319 nce_t *nce_ptr; 3320 3321 ill_t *ill; 3322 3323 ill = nce->nce_ill; 3324 ASSERT(ill != NULL); 3325 3326 mutex_enter(&ill->ill_lock); 3327 if (nce->nce_fastpath == NULL) 3328 goto done; 3329 3330 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3331 3332 if (ill->ill_fastpath_list == nce) { 3333 ill->ill_fastpath_list = nce->nce_fastpath; 3334 } else { 3335 nce_ptr = ill->ill_fastpath_list; 3336 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3337 if (nce_ptr->nce_fastpath == nce) { 3338 nce_ptr->nce_fastpath = nce->nce_fastpath; 3339 break; 3340 } 3341 nce_ptr = nce_ptr->nce_fastpath; 3342 } 3343 } 3344 3345 nce->nce_fastpath = NULL; 3346 done: 3347 mutex_exit(&ill->ill_lock); 3348 } 3349 3350 /* 3351 * Update all NCE's that are not in fastpath mode and 3352 * have an nce_fp_mp that matches mp. mp->b_cont contains 3353 * the fastpath header. 3354 * 3355 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3356 */ 3357 boolean_t 3358 ndp_fastpath_update(nce_t *nce, void *arg) 3359 { 3360 mblk_t *mp, *fp_mp; 3361 uchar_t *mp_rptr, *ud_mp_rptr; 3362 mblk_t *ud_mp = nce->nce_res_mp; 3363 ptrdiff_t cmplen; 3364 3365 if (nce->nce_flags & NCE_F_MAPPING) 3366 return (B_TRUE); 3367 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3368 return (B_TRUE); 3369 3370 ip2dbg(("ndp_fastpath_update: trying\n")); 3371 mp = (mblk_t *)arg; 3372 mp_rptr = mp->b_rptr; 3373 cmplen = mp->b_wptr - mp_rptr; 3374 ASSERT(cmplen >= 0); 3375 ud_mp_rptr = ud_mp->b_rptr; 3376 /* 3377 * The nce is locked here to prevent any other threads 3378 * from accessing and changing nce_res_mp when the IPv6 address 3379 * becomes resolved to an lla while we're in the middle 3380 * of looking at and comparing the hardware address (lla). 3381 * It is also locked to prevent multiple threads in nce_fastpath_update 3382 * from examining nce_res_mp atthe same time. 3383 */ 3384 mutex_enter(&nce->nce_lock); 3385 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3386 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3387 mutex_exit(&nce->nce_lock); 3388 /* 3389 * Don't take the ire off the fastpath list yet, 3390 * since the response may come later. 3391 */ 3392 return (B_FALSE); 3393 } 3394 /* Matched - install mp as the fastpath mp */ 3395 ip1dbg(("ndp_fastpath_update: match\n")); 3396 fp_mp = dupb(mp->b_cont); 3397 if (fp_mp != NULL) { 3398 nce->nce_fp_mp = fp_mp; 3399 } 3400 mutex_exit(&nce->nce_lock); 3401 return (B_TRUE); 3402 } 3403 3404 /* 3405 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3406 * driver. Note that it assumes IP is exclusive... 3407 */ 3408 /* ARGSUSED */ 3409 void 3410 ndp_fastpath_flush(nce_t *nce, char *arg) 3411 { 3412 if (nce->nce_flags & NCE_F_MAPPING) 3413 return; 3414 /* No fastpath info? */ 3415 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3416 return; 3417 3418 if (nce->nce_ipversion == IPV4_VERSION && 3419 nce->nce_flags & NCE_F_BCAST) { 3420 /* 3421 * IPv4 BROADCAST entries: 3422 * We can't delete the nce since it is difficult to 3423 * recreate these without going through the 3424 * ipif down/up dance. 3425 * 3426 * All access to nce->nce_fp_mp in the case of these 3427 * is protected by nce_lock. 3428 */ 3429 mutex_enter(&nce->nce_lock); 3430 if (nce->nce_fp_mp != NULL) { 3431 freeb(nce->nce_fp_mp); 3432 nce->nce_fp_mp = NULL; 3433 mutex_exit(&nce->nce_lock); 3434 nce_fastpath(nce); 3435 } else { 3436 mutex_exit(&nce->nce_lock); 3437 } 3438 } else { 3439 /* Just delete the NCE... */ 3440 ndp_delete(nce); 3441 } 3442 } 3443 3444 /* 3445 * Return a pointer to a given option in the packet. 3446 * Assumes that option part of the packet have already been validated. 3447 */ 3448 nd_opt_hdr_t * 3449 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3450 { 3451 while (optlen > 0) { 3452 if (opt->nd_opt_type == opt_type) 3453 return (opt); 3454 optlen -= 8 * opt->nd_opt_len; 3455 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3456 } 3457 return (NULL); 3458 } 3459 3460 /* 3461 * Verify all option lengths present are > 0, also check to see 3462 * if the option lengths and packet length are consistent. 3463 */ 3464 boolean_t 3465 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3466 { 3467 ASSERT(opt != NULL); 3468 while (optlen > 0) { 3469 if (opt->nd_opt_len == 0) 3470 return (B_FALSE); 3471 optlen -= 8 * opt->nd_opt_len; 3472 if (optlen < 0) 3473 return (B_FALSE); 3474 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3475 } 3476 return (B_TRUE); 3477 } 3478 3479 /* 3480 * ndp_walk function. 3481 * Free a fraction of the NCE cache entries. 3482 * A fraction of zero means to not free any in that category. 3483 */ 3484 void 3485 ndp_cache_reclaim(nce_t *nce, char *arg) 3486 { 3487 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3488 uint_t rand; 3489 3490 if (nce->nce_flags & NCE_F_PERMANENT) 3491 return; 3492 3493 rand = (uint_t)lbolt + 3494 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3495 if (ncr->ncr_host != 0 && 3496 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3497 ndp_delete(nce); 3498 return; 3499 } 3500 } 3501 3502 /* 3503 * ndp_walk function. 3504 * Count the number of NCEs that can be deleted. 3505 * These would be hosts but not routers. 3506 */ 3507 void 3508 ndp_cache_count(nce_t *nce, char *arg) 3509 { 3510 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3511 3512 if (nce->nce_flags & NCE_F_PERMANENT) 3513 return; 3514 3515 ncc->ncc_total++; 3516 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3517 ncc->ncc_host++; 3518 } 3519 3520 #ifdef NCE_DEBUG 3521 th_trace_t * 3522 th_trace_nce_lookup(nce_t *nce) 3523 { 3524 int bucket_id; 3525 th_trace_t *th_trace; 3526 3527 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3528 3529 bucket_id = IP_TR_HASH(curthread); 3530 ASSERT(bucket_id < IP_TR_HASH_MAX); 3531 3532 for (th_trace = nce->nce_trace[bucket_id]; th_trace != NULL; 3533 th_trace = th_trace->th_next) { 3534 if (th_trace->th_id == curthread) 3535 return (th_trace); 3536 } 3537 return (NULL); 3538 } 3539 3540 void 3541 nce_trace_ref(nce_t *nce) 3542 { 3543 int bucket_id; 3544 th_trace_t *th_trace; 3545 3546 /* 3547 * Attempt to locate the trace buffer for the curthread. 3548 * If it does not exist, then allocate a new trace buffer 3549 * and link it in list of trace bufs for this ipif, at the head 3550 */ 3551 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3552 3553 if (nce->nce_trace_disable == B_TRUE) 3554 return; 3555 3556 th_trace = th_trace_nce_lookup(nce); 3557 if (th_trace == NULL) { 3558 bucket_id = IP_TR_HASH(curthread); 3559 th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), 3560 KM_NOSLEEP); 3561 if (th_trace == NULL) { 3562 nce->nce_trace_disable = B_TRUE; 3563 nce_trace_inactive(nce); 3564 return; 3565 } 3566 th_trace->th_id = curthread; 3567 th_trace->th_next = nce->nce_trace[bucket_id]; 3568 th_trace->th_prev = &nce->nce_trace[bucket_id]; 3569 if (th_trace->th_next != NULL) 3570 th_trace->th_next->th_prev = &th_trace->th_next; 3571 nce->nce_trace[bucket_id] = th_trace; 3572 } 3573 ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1); 3574 th_trace->th_refcnt++; 3575 th_trace_rrecord(th_trace); 3576 } 3577 3578 void 3579 nce_untrace_ref(nce_t *nce) 3580 { 3581 th_trace_t *th_trace; 3582 3583 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3584 3585 if (nce->nce_trace_disable == B_TRUE) 3586 return; 3587 3588 th_trace = th_trace_nce_lookup(nce); 3589 ASSERT(th_trace != NULL && th_trace->th_refcnt > 0); 3590 3591 th_trace_rrecord(th_trace); 3592 th_trace->th_refcnt--; 3593 } 3594 3595 void 3596 nce_trace_inactive(nce_t *nce) 3597 { 3598 th_trace_t *th_trace; 3599 int i; 3600 3601 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3602 3603 for (i = 0; i < IP_TR_HASH_MAX; i++) { 3604 while (nce->nce_trace[i] != NULL) { 3605 th_trace = nce->nce_trace[i]; 3606 3607 /* unlink th_trace and free it */ 3608 nce->nce_trace[i] = th_trace->th_next; 3609 if (th_trace->th_next != NULL) 3610 th_trace->th_next->th_prev = 3611 &nce->nce_trace[i]; 3612 3613 th_trace->th_next = NULL; 3614 th_trace->th_prev = NULL; 3615 kmem_free(th_trace, sizeof (th_trace_t)); 3616 } 3617 } 3618 3619 } 3620 3621 /* ARGSUSED */ 3622 int 3623 nce_thread_exit(nce_t *nce, caddr_t arg) 3624 { 3625 th_trace_t *th_trace; 3626 uint64_t now; 3627 3628 mutex_enter(&nce->nce_lock); 3629 if (nce->nce_state == ND_INITIAL) { 3630 3631 now = TICK_TO_MSEC(lbolt64); 3632 if (now - nce->nce_init_time > NCE_STUCK_TIMEOUT) { 3633 DTRACE_PROBE1(nce__stuck, nce_t *, nce); 3634 } 3635 } 3636 th_trace = th_trace_nce_lookup(nce); 3637 3638 if (th_trace == NULL) { 3639 mutex_exit(&nce->nce_lock); 3640 return (0); 3641 } 3642 3643 ASSERT(th_trace->th_refcnt == 0); 3644 3645 /* unlink th_trace and free it */ 3646 *th_trace->th_prev = th_trace->th_next; 3647 if (th_trace->th_next != NULL) 3648 th_trace->th_next->th_prev = th_trace->th_prev; 3649 th_trace->th_next = NULL; 3650 th_trace->th_prev = NULL; 3651 kmem_free(th_trace, sizeof (th_trace_t)); 3652 mutex_exit(&nce->nce_lock); 3653 return (0); 3654 } 3655 #endif 3656 3657 /* 3658 * Called when address resolution fails due to a timeout. 3659 * Send an ICMP unreachable in response to all queued packets. 3660 */ 3661 void 3662 arp_resolv_failed(nce_t *nce) 3663 { 3664 mblk_t *mp, *nxt_mp, *first_mp; 3665 char buf[INET6_ADDRSTRLEN]; 3666 zoneid_t zoneid = GLOBAL_ZONEID; 3667 struct in_addr ipv4addr; 3668 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3669 3670 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3671 ip3dbg(("arp_resolv_failed: dst %s\n", 3672 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3673 mutex_enter(&nce->nce_lock); 3674 mp = nce->nce_qd_mp; 3675 nce->nce_qd_mp = NULL; 3676 mutex_exit(&nce->nce_lock); 3677 3678 while (mp != NULL) { 3679 nxt_mp = mp->b_next; 3680 mp->b_next = NULL; 3681 mp->b_prev = NULL; 3682 3683 first_mp = mp; 3684 /* 3685 * Send icmp unreachable messages 3686 * to the hosts. 3687 */ 3688 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst); 3689 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3690 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3691 ICMP_HOST_UNREACHABLE, zoneid, ipst); 3692 mp = nxt_mp; 3693 } 3694 } 3695 3696 int 3697 ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3698 nce_t **newnce, nce_t *src_nce) 3699 { 3700 int err; 3701 nce_t *nce; 3702 in6_addr_t addr6; 3703 ip_stack_t *ipst = ill->ill_ipst; 3704 3705 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3706 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3707 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3708 nce = nce_lookup_addr(ill, &addr6, nce); 3709 if (nce == NULL) { 3710 err = ndp_add_v4(ill, addr, flags, newnce, src_nce); 3711 } else { 3712 *newnce = nce; 3713 err = EEXIST; 3714 } 3715 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3716 return (err); 3717 } 3718 3719 /* 3720 * NDP Cache Entry creation routine for IPv4. 3721 * Mapped entries are handled in arp. 3722 * This routine must always be called with ndp4->ndp_g_lock held. 3723 * Prior to return, nce_refcnt is incremented. 3724 */ 3725 static int 3726 ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3727 nce_t **newnce, nce_t *src_nce) 3728 { 3729 static nce_t nce_nil; 3730 nce_t *nce; 3731 mblk_t *mp; 3732 mblk_t *template = NULL; 3733 nce_t **ncep; 3734 ip_stack_t *ipst = ill->ill_ipst; 3735 uint16_t state = ND_INITIAL; 3736 int err; 3737 3738 ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock)); 3739 ASSERT(!ill->ill_isv6); 3740 ASSERT((flags & NCE_F_MAPPING) == 0); 3741 3742 if (ill->ill_resolver_mp == NULL) 3743 return (EINVAL); 3744 /* 3745 * Allocate the mblk to hold the nce. 3746 */ 3747 mp = allocb(sizeof (nce_t), BPRI_MED); 3748 if (mp == NULL) 3749 return (ENOMEM); 3750 3751 nce = (nce_t *)mp->b_rptr; 3752 mp->b_wptr = (uchar_t *)&nce[1]; 3753 *nce = nce_nil; 3754 nce->nce_ill = ill; 3755 nce->nce_ipversion = IPV4_VERSION; 3756 nce->nce_flags = flags; 3757 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3758 nce->nce_rcnt = ill->ill_xmit_count; 3759 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3760 nce->nce_mask = ipv6_all_ones; 3761 nce->nce_extract_mask = ipv6_all_zeros; 3762 nce->nce_ll_extract_start = 0; 3763 nce->nce_qd_mp = NULL; 3764 nce->nce_mp = mp; 3765 /* This one is for nce getting created */ 3766 nce->nce_refcnt = 1; 3767 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3768 ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3769 3770 #ifdef NCE_DEBUG 3771 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 3772 #endif 3773 if (src_nce != NULL) { 3774 /* 3775 * src_nce has been provided by the caller. The only 3776 * caller who provides a non-null, non-broadcast 3777 * src_nce is from ip_newroute() which must pass in 3778 * a ND_REACHABLE src_nce (this condition is verified 3779 * via an ASSERT for the save_ire->ire_nce in ip_newroute()) 3780 */ 3781 mutex_enter(&src_nce->nce_lock); 3782 state = src_nce->nce_state; 3783 if ((src_nce->nce_flags & NCE_F_CONDEMNED) || 3784 (ipst->ips_ndp4->ndp_g_hw_change > 0)) { 3785 /* 3786 * src_nce has been deleted, or 3787 * ip_arp_news is in the middle of 3788 * flushing entries in the the nce. 3789 * Fail the add, since we don't know 3790 * if it is safe to copy the contents of 3791 * src_nce 3792 */ 3793 DTRACE_PROBE2(nce__bad__src__nce, 3794 nce_t *, src_nce, ill_t *, ill); 3795 mutex_exit(&src_nce->nce_lock); 3796 err = EINVAL; 3797 goto err_ret; 3798 } 3799 template = copyb(src_nce->nce_res_mp); 3800 mutex_exit(&src_nce->nce_lock); 3801 if (template == NULL) { 3802 err = ENOMEM; 3803 goto err_ret; 3804 } 3805 } else if (flags & NCE_F_BCAST) { 3806 /* 3807 * broadcast nce. 3808 */ 3809 template = copyb(ill->ill_bcast_mp); 3810 if (template == NULL) { 3811 err = ENOMEM; 3812 goto err_ret; 3813 } 3814 state = ND_REACHABLE; 3815 } else if (ill->ill_net_type == IRE_IF_NORESOLVER) { 3816 /* 3817 * NORESOLVER entries are always created in the REACHABLE 3818 * state. We create a nce_res_mp with the IP nexthop address 3819 * in the destination address in the DLPI hdr if the 3820 * physical length is exactly 4 bytes. 3821 * 3822 * XXX not clear which drivers set ill_phys_addr_length to 3823 * IP_ADDR_LEN. 3824 */ 3825 if (ill->ill_phys_addr_length == IP_ADDR_LEN) { 3826 template = ill_dlur_gen((uchar_t *)addr, 3827 ill->ill_phys_addr_length, 3828 ill->ill_sap, ill->ill_sap_length); 3829 } else { 3830 template = copyb(ill->ill_resolver_mp); 3831 } 3832 if (template == NULL) { 3833 err = ENOMEM; 3834 goto err_ret; 3835 } 3836 state = ND_REACHABLE; 3837 } 3838 nce->nce_fp_mp = NULL; 3839 nce->nce_res_mp = template; 3840 nce->nce_state = state; 3841 if (state == ND_REACHABLE) { 3842 nce->nce_last = TICK_TO_MSEC(lbolt64); 3843 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3844 } else { 3845 nce->nce_last = 0; 3846 if (state == ND_INITIAL) 3847 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3848 } 3849 3850 ASSERT((nce->nce_res_mp == NULL && nce->nce_state == ND_INITIAL) || 3851 (nce->nce_res_mp != NULL && nce->nce_state == ND_REACHABLE)); 3852 /* 3853 * Atomically ensure that the ill is not CONDEMNED, before 3854 * adding the NCE. 3855 */ 3856 mutex_enter(&ill->ill_lock); 3857 if (ill->ill_state_flags & ILL_CONDEMNED) { 3858 mutex_exit(&ill->ill_lock); 3859 err = EINVAL; 3860 goto err_ret; 3861 } 3862 if ((nce->nce_next = *ncep) != NULL) 3863 nce->nce_next->nce_ptpn = &nce->nce_next; 3864 *ncep = nce; 3865 nce->nce_ptpn = ncep; 3866 *newnce = nce; 3867 /* This one is for nce being used by an active thread */ 3868 NCE_REFHOLD(*newnce); 3869 3870 /* Bump up the number of nce's referencing this ill */ 3871 ill->ill_nce_cnt++; 3872 mutex_exit(&ill->ill_lock); 3873 DTRACE_PROBE1(ndp__add__v4, nce_t *, nce); 3874 return (0); 3875 err_ret: 3876 freeb(mp); 3877 freemsg(template); 3878 return (err); 3879 } 3880 3881 void 3882 ndp_flush_qd_mp(nce_t *nce) 3883 { 3884 mblk_t *qd_mp, *qd_next; 3885 3886 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3887 qd_mp = nce->nce_qd_mp; 3888 nce->nce_qd_mp = NULL; 3889 while (qd_mp != NULL) { 3890 qd_next = qd_mp->b_next; 3891 qd_mp->b_next = NULL; 3892 qd_mp->b_prev = NULL; 3893 freemsg(qd_mp); 3894 qd_mp = qd_next; 3895 } 3896 } 3897 3898 3899 /* 3900 * ndp_walk routine to delete all entries that have a given destination or 3901 * gateway address and cached link layer (MAC) address. This is used when ARP 3902 * informs us that a network-to-link-layer mapping may have changed. 3903 */ 3904 void 3905 nce_delete_hw_changed(nce_t *nce, void *arg) 3906 { 3907 nce_hw_map_t *hwm = arg; 3908 mblk_t *mp; 3909 dl_unitdata_req_t *dlu; 3910 uchar_t *macaddr; 3911 ill_t *ill; 3912 int saplen; 3913 ipaddr_t nce_addr; 3914 3915 if (nce->nce_state != ND_REACHABLE) 3916 return; 3917 3918 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3919 if (nce_addr != hwm->hwm_addr) 3920 return; 3921 3922 mutex_enter(&nce->nce_lock); 3923 if ((mp = nce->nce_res_mp) == NULL) { 3924 mutex_exit(&nce->nce_lock); 3925 return; 3926 } 3927 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3928 macaddr = (uchar_t *)(dlu + 1); 3929 ill = nce->nce_ill; 3930 if ((saplen = ill->ill_sap_length) > 0) 3931 macaddr += saplen; 3932 else 3933 saplen = -saplen; 3934 3935 /* 3936 * If the hardware address is unchanged, then leave this one alone. 3937 * Note that saplen == abs(saplen) now. 3938 */ 3939 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 3940 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 3941 mutex_exit(&nce->nce_lock); 3942 return; 3943 } 3944 mutex_exit(&nce->nce_lock); 3945 3946 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 3947 ndp_delete(nce); 3948 } 3949 3950 /* 3951 * This function verifies whether a given IPv4 address is potentially known to 3952 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 3953 * so that it can continue to look for hardware changes on that address. 3954 */ 3955 boolean_t 3956 ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns) 3957 { 3958 nce_t *nce; 3959 struct in_addr nceaddr; 3960 ip_stack_t *ipst = ns->netstack_ip; 3961 3962 if (addr == INADDR_ANY) 3963 return (B_FALSE); 3964 3965 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3966 nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr); 3967 for (; nce != NULL; nce = nce->nce_next) { 3968 /* Note that only v4 mapped entries are in the table. */ 3969 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 3970 if (addr == nceaddr.s_addr && 3971 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 3972 /* Single flag check; no lock needed */ 3973 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 3974 break; 3975 } 3976 } 3977 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3978 return (nce != NULL); 3979 } 3980