1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/stropts.h> 29 #include <sys/strsun.h> 30 #include <sys/sysmacros.h> 31 #include <sys/errno.h> 32 #include <sys/dlpi.h> 33 #include <sys/socket.h> 34 #include <sys/ddi.h> 35 #include <sys/sunddi.h> 36 #include <sys/cmn_err.h> 37 #include <sys/debug.h> 38 #include <sys/vtrace.h> 39 #include <sys/kmem.h> 40 #include <sys/zone.h> 41 #include <sys/ethernet.h> 42 #include <sys/sdt.h> 43 44 #include <net/if.h> 45 #include <net/if_types.h> 46 #include <net/if_dl.h> 47 #include <net/route.h> 48 #include <netinet/in.h> 49 #include <netinet/ip6.h> 50 #include <netinet/icmp6.h> 51 52 #include <inet/common.h> 53 #include <inet/mi.h> 54 #include <inet/mib2.h> 55 #include <inet/nd.h> 56 #include <inet/ip.h> 57 #include <inet/ip_impl.h> 58 #include <inet/ipclassifier.h> 59 #include <inet/ip_if.h> 60 #include <inet/ip_ire.h> 61 #include <inet/ip_rts.h> 62 #include <inet/ip6.h> 63 #include <inet/ip_ndp.h> 64 #include <inet/ipsec_impl.h> 65 #include <inet/ipsec_info.h> 66 #include <inet/sctp_ip.h> 67 68 /* 69 * Function names with nce_ prefix are static while function 70 * names with ndp_ prefix are used by rest of the IP. 71 * 72 * Lock ordering: 73 * 74 * ndp_g_lock -> ill_lock -> nce_lock 75 * 76 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 77 * nce_next. Nce_lock protects the contents of the NCE (particularly 78 * nce_refcnt). 79 */ 80 81 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 82 uint32_t ll_addr_len); 83 static void nce_ire_delete(nce_t *nce); 84 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 85 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 86 static nce_t *nce_lookup_addr(ill_t *, boolean_t, const in6_addr_t *, 87 nce_t *); 88 static nce_t *nce_lookup_mapping(ill_t *, const in6_addr_t *); 89 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 90 uchar_t *addr); 91 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 92 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 93 static mblk_t *nce_udreq_alloc(ill_t *ill); 94 static void nce_update(nce_t *nce, uint16_t new_state, 95 uchar_t *new_ll_addr); 96 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 97 static boolean_t nce_xmit(ill_t *ill, uint8_t type, 98 boolean_t use_lla_addr, const in6_addr_t *sender, 99 const in6_addr_t *target, int flag); 100 static boolean_t nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, 101 const in6_addr_t *target, uint_t flags); 102 static boolean_t nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, 103 const in6_addr_t *src, uint_t flags); 104 static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t, 105 nce_t **, nce_t *); 106 static ipif_t *ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill); 107 108 #ifdef DEBUG 109 static void nce_trace_cleanup(const nce_t *); 110 #endif 111 112 #define NCE_HASH_PTR_V4(ipst, addr) \ 113 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 114 115 #define NCE_HASH_PTR_V6(ipst, addr) \ 116 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ 117 NCE_TABLE_SIZE)])) 118 119 /* Non-tunable probe interval, based on link capabilities */ 120 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 121 122 /* 123 * NDP Cache Entry creation routine. 124 * Mapped entries will never do NUD . 125 * This routine must always be called with ndp6->ndp_g_lock held. 126 * Prior to return, nce_refcnt is incremented. 127 */ 128 int 129 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 130 const in6_addr_t *mask, const in6_addr_t *extract_mask, 131 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 132 nce_t **newnce) 133 { 134 static nce_t nce_nil; 135 nce_t *nce; 136 mblk_t *mp; 137 mblk_t *template; 138 nce_t **ncep; 139 int err; 140 boolean_t dropped = B_FALSE; 141 ip_stack_t *ipst = ill->ill_ipst; 142 143 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 144 ASSERT(ill != NULL && ill->ill_isv6); 145 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 146 ip0dbg(("ndp_add_v6: no addr\n")); 147 return (EINVAL); 148 } 149 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 150 ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags)); 151 return (EINVAL); 152 } 153 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 154 (flags & NCE_F_MAPPING)) { 155 ip0dbg(("ndp_add_v6: extract mask zero for mapping")); 156 return (EINVAL); 157 } 158 /* 159 * Allocate the mblk to hold the nce. 160 * 161 * XXX This can come out of a separate cache - nce_cache. 162 * We don't need the mp anymore as there are no more 163 * "qwriter"s 164 */ 165 mp = allocb(sizeof (nce_t), BPRI_MED); 166 if (mp == NULL) 167 return (ENOMEM); 168 169 nce = (nce_t *)mp->b_rptr; 170 mp->b_wptr = (uchar_t *)&nce[1]; 171 *nce = nce_nil; 172 173 /* 174 * This one holds link layer address 175 */ 176 if (ill->ill_net_type == IRE_IF_RESOLVER) { 177 template = nce_udreq_alloc(ill); 178 } else { 179 if (ill->ill_resolver_mp == NULL) { 180 freeb(mp); 181 return (EINVAL); 182 } 183 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 184 template = copyb(ill->ill_resolver_mp); 185 } 186 if (template == NULL) { 187 freeb(mp); 188 return (ENOMEM); 189 } 190 nce->nce_ill = ill; 191 nce->nce_ipversion = IPV6_VERSION; 192 nce->nce_flags = flags; 193 nce->nce_state = state; 194 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 195 nce->nce_rcnt = ill->ill_xmit_count; 196 nce->nce_addr = *addr; 197 nce->nce_mask = *mask; 198 nce->nce_extract_mask = *extract_mask; 199 nce->nce_ll_extract_start = hw_extract_start; 200 nce->nce_fp_mp = NULL; 201 nce->nce_res_mp = template; 202 if (state == ND_REACHABLE) 203 nce->nce_last = TICK_TO_MSEC(lbolt64); 204 else 205 nce->nce_last = 0; 206 nce->nce_qd_mp = NULL; 207 nce->nce_mp = mp; 208 if (hw_addr != NULL) 209 nce_set_ll(nce, hw_addr); 210 /* This one is for nce getting created */ 211 nce->nce_refcnt = 1; 212 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 213 if (nce->nce_flags & NCE_F_MAPPING) { 214 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 215 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 216 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 217 ncep = &ipst->ips_ndp6->nce_mask_entries; 218 } else { 219 ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 220 } 221 222 nce->nce_trace_disable = B_FALSE; 223 224 /* 225 * Atomically ensure that the ill is not CONDEMNED, before 226 * adding the NCE. 227 */ 228 mutex_enter(&ill->ill_lock); 229 if (ill->ill_state_flags & ILL_CONDEMNED) { 230 mutex_exit(&ill->ill_lock); 231 freeb(mp); 232 freeb(template); 233 return (EINVAL); 234 } 235 if ((nce->nce_next = *ncep) != NULL) 236 nce->nce_next->nce_ptpn = &nce->nce_next; 237 *ncep = nce; 238 nce->nce_ptpn = ncep; 239 *newnce = nce; 240 /* This one is for nce being used by an active thread */ 241 NCE_REFHOLD(*newnce); 242 243 /* Bump up the number of nce's referencing this ill */ 244 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 245 (char *), "nce", (void *), nce); 246 ill->ill_nce_cnt++; 247 mutex_exit(&ill->ill_lock); 248 249 err = 0; 250 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 251 mutex_enter(&nce->nce_lock); 252 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 253 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 254 mutex_exit(&nce->nce_lock); 255 dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE); 256 if (dropped) { 257 mutex_enter(&nce->nce_lock); 258 nce->nce_pcnt++; 259 mutex_exit(&nce->nce_lock); 260 } 261 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 262 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 263 err = EINPROGRESS; 264 } else if (flags & NCE_F_UNSOL_ADV) { 265 /* 266 * We account for the transmit below by assigning one 267 * less than the ndd variable. Subsequent decrements 268 * are done in ndp_timer. 269 */ 270 mutex_enter(&nce->nce_lock); 271 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 272 nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1; 273 mutex_exit(&nce->nce_lock); 274 dropped = nce_xmit_advert(nce, B_TRUE, &ipv6_all_hosts_mcast, 275 0); 276 mutex_enter(&nce->nce_lock); 277 if (dropped) 278 nce->nce_unsolicit_count++; 279 if (nce->nce_unsolicit_count != 0) { 280 ASSERT(nce->nce_timeout_id == 0); 281 nce->nce_timeout_id = timeout(ndp_timer, nce, 282 MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval)); 283 } 284 mutex_exit(&nce->nce_lock); 285 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 286 } 287 288 /* 289 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 290 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 291 * We call nce_fastpath from nce_update if the link layer address of 292 * the peer changes from nce_update 293 */ 294 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 295 nce_fastpath(nce); 296 return (err); 297 } 298 299 int 300 ndp_lookup_then_add_v6(ill_t *ill, boolean_t match_illgrp, uchar_t *hw_addr, 301 const in6_addr_t *addr, const in6_addr_t *mask, 302 const in6_addr_t *extract_mask, uint32_t hw_extract_start, uint16_t flags, 303 uint16_t state, nce_t **newnce) 304 { 305 int err = 0; 306 nce_t *nce; 307 ip_stack_t *ipst = ill->ill_ipst; 308 309 ASSERT(ill->ill_isv6); 310 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 311 312 /* Get head of v6 hash table */ 313 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 314 nce = nce_lookup_addr(ill, match_illgrp, addr, nce); 315 if (nce == NULL) { 316 err = ndp_add_v6(ill, 317 hw_addr, 318 addr, 319 mask, 320 extract_mask, 321 hw_extract_start, 322 flags, 323 state, 324 newnce); 325 } else { 326 *newnce = nce; 327 err = EEXIST; 328 } 329 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 330 return (err); 331 } 332 333 /* 334 * Remove all the CONDEMNED nces from the appropriate hash table. 335 * We create a private list of NCEs, these may have ires pointing 336 * to them, so the list will be passed through to clean up dependent 337 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 338 */ 339 static void 340 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 341 { 342 nce_t *nce1; 343 nce_t **ptpn; 344 345 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 346 ASSERT(ndp->ndp_g_walker == 0); 347 for (; nce; nce = nce1) { 348 nce1 = nce->nce_next; 349 mutex_enter(&nce->nce_lock); 350 if (nce->nce_flags & NCE_F_CONDEMNED) { 351 ptpn = nce->nce_ptpn; 352 nce1 = nce->nce_next; 353 if (nce1 != NULL) 354 nce1->nce_ptpn = ptpn; 355 *ptpn = nce1; 356 nce->nce_ptpn = NULL; 357 nce->nce_next = NULL; 358 nce->nce_next = *free_nce_list; 359 *free_nce_list = nce; 360 } 361 mutex_exit(&nce->nce_lock); 362 } 363 } 364 365 /* 366 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 367 * will return this NCE. Also no new IREs will be created that 368 * point to this NCE (See ire_add_v6). Also no new timeouts will 369 * be started (See NDP_RESTART_TIMER). 370 * 2. Cancel any currently running timeouts. 371 * 3. If there is an ndp walker, return. The walker will do the cleanup. 372 * This ensures that walkers see a consistent list of NCEs while walking. 373 * 4. Otherwise remove the NCE from the list of NCEs 374 * 5. Delete all IREs pointing to this NCE. 375 */ 376 void 377 ndp_delete(nce_t *nce) 378 { 379 nce_t **ptpn; 380 nce_t *nce1; 381 int ipversion = nce->nce_ipversion; 382 ndp_g_t *ndp; 383 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 384 385 if (ipversion == IPV4_VERSION) 386 ndp = ipst->ips_ndp4; 387 else 388 ndp = ipst->ips_ndp6; 389 390 /* Serialize deletes */ 391 mutex_enter(&nce->nce_lock); 392 if (nce->nce_flags & NCE_F_CONDEMNED) { 393 /* Some other thread is doing the delete */ 394 mutex_exit(&nce->nce_lock); 395 return; 396 } 397 /* 398 * Caller has a refhold. Also 1 ref for being in the list. Thus 399 * refcnt has to be >= 2 400 */ 401 ASSERT(nce->nce_refcnt >= 2); 402 nce->nce_flags |= NCE_F_CONDEMNED; 403 mutex_exit(&nce->nce_lock); 404 405 nce_fastpath_list_delete(nce); 406 407 /* 408 * Cancel any running timer. Timeout can't be restarted 409 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 410 * Passing invalid timeout id is fine. 411 */ 412 if (nce->nce_timeout_id != 0) { 413 (void) untimeout(nce->nce_timeout_id); 414 nce->nce_timeout_id = 0; 415 } 416 417 mutex_enter(&ndp->ndp_g_lock); 418 if (nce->nce_ptpn == NULL) { 419 /* 420 * The last ndp walker has already removed this nce from 421 * the list after we marked the nce CONDEMNED and before 422 * we grabbed the global lock. 423 */ 424 mutex_exit(&ndp->ndp_g_lock); 425 return; 426 } 427 if (ndp->ndp_g_walker > 0) { 428 /* 429 * Can't unlink. The walker will clean up 430 */ 431 ndp->ndp_g_walker_cleanup = B_TRUE; 432 mutex_exit(&ndp->ndp_g_lock); 433 return; 434 } 435 436 /* 437 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 438 * the timer since it is marked CONDEMNED. 439 */ 440 ptpn = nce->nce_ptpn; 441 nce1 = nce->nce_next; 442 if (nce1 != NULL) 443 nce1->nce_ptpn = ptpn; 444 *ptpn = nce1; 445 nce->nce_ptpn = NULL; 446 nce->nce_next = NULL; 447 mutex_exit(&ndp->ndp_g_lock); 448 449 nce_ire_delete(nce); 450 } 451 452 void 453 ndp_inactive(nce_t *nce) 454 { 455 mblk_t **mpp; 456 ill_t *ill; 457 458 ASSERT(nce->nce_refcnt == 0); 459 ASSERT(MUTEX_HELD(&nce->nce_lock)); 460 ASSERT(nce->nce_fastpath == NULL); 461 462 /* Free all nce allocated messages */ 463 mpp = &nce->nce_first_mp_to_free; 464 do { 465 while (*mpp != NULL) { 466 mblk_t *mp; 467 468 mp = *mpp; 469 *mpp = mp->b_next; 470 471 inet_freemsg(mp); 472 } 473 } while (mpp++ != &nce->nce_last_mp_to_free); 474 475 #ifdef DEBUG 476 nce_trace_cleanup(nce); 477 #endif 478 479 ill = nce->nce_ill; 480 mutex_enter(&ill->ill_lock); 481 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 482 (char *), "nce", (void *), nce); 483 ill->ill_nce_cnt--; 484 /* 485 * If the number of nce's associated with this ill have dropped 486 * to zero, check whether we need to restart any operation that 487 * is waiting for this to happen. 488 */ 489 if (ILL_DOWN_OK(ill)) { 490 /* ipif_ill_refrele_tail drops the ill_lock */ 491 ipif_ill_refrele_tail(ill); 492 } else { 493 mutex_exit(&ill->ill_lock); 494 } 495 mutex_destroy(&nce->nce_lock); 496 if (nce->nce_mp != NULL) 497 inet_freemsg(nce->nce_mp); 498 } 499 500 /* 501 * ndp_walk routine. Delete the nce if it is associated with the ill 502 * that is going away. Always called as a writer. 503 */ 504 void 505 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 506 { 507 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 508 ndp_delete(nce); 509 } 510 } 511 512 /* 513 * Walk a list of to be inactive NCEs and blow away all the ires. 514 */ 515 static void 516 nce_ire_delete_list(nce_t *nce) 517 { 518 nce_t *nce_next; 519 520 ASSERT(nce != NULL); 521 while (nce != NULL) { 522 nce_next = nce->nce_next; 523 nce->nce_next = NULL; 524 525 /* 526 * It is possible for the last ndp walker (this thread) 527 * to come here after ndp_delete has marked the nce CONDEMNED 528 * and before it has removed the nce from the fastpath list 529 * or called untimeout. So we need to do it here. It is safe 530 * for both ndp_delete and this thread to do it twice or 531 * even simultaneously since each of the threads has a 532 * reference on the nce. 533 */ 534 nce_fastpath_list_delete(nce); 535 /* 536 * Cancel any running timer. Timeout can't be restarted 537 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 538 * Passing invalid timeout id is fine. 539 */ 540 if (nce->nce_timeout_id != 0) { 541 (void) untimeout(nce->nce_timeout_id); 542 nce->nce_timeout_id = 0; 543 } 544 /* 545 * We might hit this func thus in the v4 case: 546 * ipif_down->ipif_ndp_down->ndp_walk 547 */ 548 549 if (nce->nce_ipversion == IPV4_VERSION) { 550 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 551 IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill); 552 } else { 553 ASSERT(nce->nce_ipversion == IPV6_VERSION); 554 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 555 IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill); 556 } 557 NCE_REFRELE_NOTR(nce); 558 nce = nce_next; 559 } 560 } 561 562 /* 563 * Delete an ire when the nce goes away. 564 */ 565 /* ARGSUSED */ 566 static void 567 nce_ire_delete(nce_t *nce) 568 { 569 if (nce->nce_ipversion == IPV6_VERSION) { 570 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 571 nce_ire_delete1, (char *)nce, nce->nce_ill); 572 NCE_REFRELE_NOTR(nce); 573 } else { 574 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 575 nce_ire_delete1, (char *)nce, nce->nce_ill); 576 NCE_REFRELE_NOTR(nce); 577 } 578 } 579 580 /* 581 * ire_walk routine used to delete every IRE that shares this nce 582 */ 583 static void 584 nce_ire_delete1(ire_t *ire, char *nce_arg) 585 { 586 nce_t *nce = (nce_t *)nce_arg; 587 588 ASSERT(ire->ire_type == IRE_CACHE); 589 590 if (ire->ire_nce == nce) { 591 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 592 ire_delete(ire); 593 } 594 } 595 596 /* 597 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 598 */ 599 boolean_t 600 ndp_restart_dad(nce_t *nce) 601 { 602 boolean_t started; 603 boolean_t dropped; 604 605 if (nce == NULL) 606 return (B_FALSE); 607 mutex_enter(&nce->nce_lock); 608 if (nce->nce_state == ND_PROBE) { 609 mutex_exit(&nce->nce_lock); 610 started = B_TRUE; 611 } else if (nce->nce_state == ND_REACHABLE) { 612 nce->nce_state = ND_PROBE; 613 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 614 mutex_exit(&nce->nce_lock); 615 dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE); 616 if (dropped) { 617 mutex_enter(&nce->nce_lock); 618 nce->nce_pcnt++; 619 mutex_exit(&nce->nce_lock); 620 } 621 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 622 started = B_TRUE; 623 } else { 624 mutex_exit(&nce->nce_lock); 625 started = B_FALSE; 626 } 627 return (started); 628 } 629 630 /* 631 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 632 * If one is found, the refcnt on the nce will be incremented. 633 */ 634 nce_t * 635 ndp_lookup_v6(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr, 636 boolean_t caller_holds_lock) 637 { 638 nce_t *nce; 639 ip_stack_t *ipst = ill->ill_ipst; 640 641 ASSERT(ill->ill_isv6); 642 if (!caller_holds_lock) 643 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 644 645 /* Get head of v6 hash table */ 646 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 647 nce = nce_lookup_addr(ill, match_illgrp, addr, nce); 648 if (nce == NULL) 649 nce = nce_lookup_mapping(ill, addr); 650 if (!caller_holds_lock) 651 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 652 return (nce); 653 } 654 /* 655 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 656 * If one is found, the refcnt on the nce will be incremented. 657 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 658 * so we skip the nce_lookup_mapping call. 659 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 660 */ 661 nce_t * 662 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 663 { 664 nce_t *nce; 665 in6_addr_t addr6; 666 ip_stack_t *ipst = ill->ill_ipst; 667 668 if (!caller_holds_lock) 669 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 670 671 /* Get head of v4 hash table */ 672 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 673 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 674 /* 675 * NOTE: IPv4 never matches across the illgrp since the NCE's we're 676 * looking up have fastpath headers that are inherently per-ill. 677 */ 678 nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce); 679 if (!caller_holds_lock) 680 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 681 return (nce); 682 } 683 684 /* 685 * Cache entry lookup. Try to find an nce matching the parameters passed. 686 * Look only for exact entries (no mappings). If an nce is found, increment 687 * the hold count on that nce. The caller passes in the start of the 688 * appropriate hash table, and must be holding the appropriate global 689 * lock (ndp_g_lock). 690 */ 691 static nce_t * 692 nce_lookup_addr(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr, 693 nce_t *nce) 694 { 695 ndp_g_t *ndp; 696 ip_stack_t *ipst = ill->ill_ipst; 697 698 if (ill->ill_isv6) 699 ndp = ipst->ips_ndp6; 700 else 701 ndp = ipst->ips_ndp4; 702 703 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 704 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 705 return (NULL); 706 for (; nce != NULL; nce = nce->nce_next) { 707 if (nce->nce_ill == ill || 708 match_illgrp && IS_IN_SAME_ILLGRP(ill, nce->nce_ill)) { 709 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 710 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 711 &ipv6_all_ones)) { 712 mutex_enter(&nce->nce_lock); 713 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 714 NCE_REFHOLD_LOCKED(nce); 715 mutex_exit(&nce->nce_lock); 716 break; 717 } 718 mutex_exit(&nce->nce_lock); 719 } 720 } 721 } 722 return (nce); 723 } 724 725 /* 726 * Cache entry lookup. Try to find an nce matching the parameters passed. 727 * Look only for mappings. 728 */ 729 static nce_t * 730 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 731 { 732 nce_t *nce; 733 ip_stack_t *ipst = ill->ill_ipst; 734 735 ASSERT(ill != NULL && ill->ill_isv6); 736 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 737 if (!IN6_IS_ADDR_MULTICAST(addr)) 738 return (NULL); 739 nce = ipst->ips_ndp6->nce_mask_entries; 740 for (; nce != NULL; nce = nce->nce_next) 741 if (nce->nce_ill == ill && 742 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 743 mutex_enter(&nce->nce_lock); 744 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 745 NCE_REFHOLD_LOCKED(nce); 746 mutex_exit(&nce->nce_lock); 747 break; 748 } 749 mutex_exit(&nce->nce_lock); 750 } 751 return (nce); 752 } 753 754 /* 755 * Process passed in parameters either from an incoming packet or via 756 * user ioctl. 757 */ 758 static void 759 nce_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 760 { 761 ill_t *ill = nce->nce_ill; 762 uint32_t hw_addr_len = ill->ill_nd_lla_len; 763 mblk_t *mp; 764 boolean_t ll_updated = B_FALSE; 765 boolean_t ll_changed; 766 ip_stack_t *ipst = ill->ill_ipst; 767 768 ASSERT(nce->nce_ipversion == IPV6_VERSION); 769 /* 770 * No updates of link layer address or the neighbor state is 771 * allowed, when the cache is in NONUD state. This still 772 * allows for responding to reachability solicitation. 773 */ 774 mutex_enter(&nce->nce_lock); 775 if (nce->nce_state == ND_INCOMPLETE) { 776 if (hw_addr == NULL) { 777 mutex_exit(&nce->nce_lock); 778 return; 779 } 780 nce_set_ll(nce, hw_addr); 781 /* 782 * Update nce state and send the queued packets 783 * back to ip this time ire will be added. 784 */ 785 if (flag & ND_NA_FLAG_SOLICITED) { 786 nce_update(nce, ND_REACHABLE, NULL); 787 } else { 788 nce_update(nce, ND_STALE, NULL); 789 } 790 mutex_exit(&nce->nce_lock); 791 nce_fastpath(nce); 792 mutex_enter(&nce->nce_lock); 793 mp = nce->nce_qd_mp; 794 nce->nce_qd_mp = NULL; 795 mutex_exit(&nce->nce_lock); 796 while (mp != NULL) { 797 mblk_t *nxt_mp, *data_mp; 798 799 nxt_mp = mp->b_next; 800 mp->b_next = NULL; 801 802 if (mp->b_datap->db_type == M_CTL) 803 data_mp = mp->b_cont; 804 else 805 data_mp = mp; 806 if (data_mp->b_prev != NULL) { 807 ill_t *inbound_ill; 808 queue_t *fwdq = NULL; 809 uint_t ifindex; 810 811 ifindex = (uint_t)(uintptr_t)data_mp->b_prev; 812 inbound_ill = ill_lookup_on_ifindex(ifindex, 813 B_TRUE, NULL, NULL, NULL, NULL, ipst); 814 if (inbound_ill == NULL) { 815 data_mp->b_prev = NULL; 816 freemsg(mp); 817 return; 818 } else { 819 fwdq = inbound_ill->ill_rq; 820 } 821 data_mp->b_prev = NULL; 822 /* 823 * Send a forwarded packet back into ip_rput_v6 824 * just as in ire_send_v6(). 825 * Extract the queue from b_prev (set in 826 * ip_rput_data_v6). 827 */ 828 if (fwdq != NULL) { 829 /* 830 * Forwarded packets hop count will 831 * get decremented in ip_rput_data_v6 832 */ 833 if (data_mp != mp) 834 freeb(mp); 835 put(fwdq, data_mp); 836 } else { 837 /* 838 * Send locally originated packets back 839 * into ip_wput_v6. 840 */ 841 put(ill->ill_wq, mp); 842 } 843 ill_refrele(inbound_ill); 844 } else { 845 put(ill->ill_wq, mp); 846 } 847 mp = nxt_mp; 848 } 849 return; 850 } 851 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 852 if (!is_adv) { 853 /* If this is a SOLICITATION request only */ 854 if (ll_changed) 855 nce_update(nce, ND_STALE, hw_addr); 856 mutex_exit(&nce->nce_lock); 857 return; 858 } 859 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 860 /* If in any other state than REACHABLE, ignore */ 861 if (nce->nce_state == ND_REACHABLE) { 862 nce_update(nce, ND_STALE, NULL); 863 } 864 mutex_exit(&nce->nce_lock); 865 return; 866 } else { 867 if (ll_changed) { 868 nce_update(nce, ND_UNCHANGED, hw_addr); 869 ll_updated = B_TRUE; 870 } 871 if (flag & ND_NA_FLAG_SOLICITED) { 872 nce_update(nce, ND_REACHABLE, NULL); 873 } else { 874 if (ll_updated) { 875 nce_update(nce, ND_STALE, NULL); 876 } 877 } 878 mutex_exit(&nce->nce_lock); 879 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 880 NCE_F_ISROUTER)) { 881 ire_t *ire; 882 883 /* 884 * Router turned to host. We need to remove the 885 * entry as well as any default route that may be 886 * using this as a next hop. This is required by 887 * section 7.2.5 of RFC 2461. 888 */ 889 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 890 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 891 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 892 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 893 MATCH_IRE_DEFAULT, ipst); 894 if (ire != NULL) { 895 ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); 896 ire_delete(ire); 897 ire_refrele(ire); 898 } 899 ndp_delete(nce); 900 } 901 } 902 } 903 904 /* 905 * Walker state structure used by ndp_process() / ndp_process_entry(). 906 */ 907 typedef struct ndp_process_data { 908 ill_t *np_ill; /* ill/illgrp to match against */ 909 const in6_addr_t *np_addr; /* IPv6 address to match */ 910 uchar_t *np_hw_addr; /* passed to nce_process() */ 911 uint32_t np_flag; /* passed to nce_process() */ 912 boolean_t np_is_adv; /* passed to nce_process() */ 913 } ndp_process_data_t; 914 915 /* 916 * Walker callback used by ndp_process() for IPMP groups: calls nce_process() 917 * for each NCE with a matching address that's in the same IPMP group. 918 */ 919 static void 920 ndp_process_entry(nce_t *nce, void *arg) 921 { 922 ndp_process_data_t *npp = arg; 923 924 if (IS_IN_SAME_ILLGRP(nce->nce_ill, npp->np_ill) && 925 IN6_ARE_ADDR_EQUAL(&nce->nce_addr, npp->np_addr) && 926 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 927 nce_process(nce, npp->np_hw_addr, npp->np_flag, npp->np_is_adv); 928 } 929 } 930 931 /* 932 * Wrapper around nce_process() that handles IPMP. In particular, for IPMP, 933 * NCEs are per-underlying-ill (because of nce_fp_mp) and thus we may have 934 * more than one NCE for a given IPv6 address to tend to. In that case, we 935 * need to walk all NCEs and callback nce_process() for each one. Since this 936 * is expensive, in the non-IPMP case we just directly call nce_process(). 937 * Ultimately, nce_fp_mp needs to be moved out of the nce_t so that all IP 938 * interfaces in an IPMP group share the same NCEs -- at which point this 939 * function can be removed entirely. 940 */ 941 void 942 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 943 { 944 ill_t *ill = nce->nce_ill; 945 struct ndp_g_s *ndp = ill->ill_ipst->ips_ndp6; 946 ndp_process_data_t np; 947 948 if (ill->ill_grp == NULL) { 949 nce_process(nce, hw_addr, flag, is_adv); 950 return; 951 } 952 953 /* IPMP case: walk all NCEs */ 954 np.np_ill = ill; 955 np.np_addr = &nce->nce_addr; 956 np.np_flag = flag; 957 np.np_is_adv = is_adv; 958 np.np_hw_addr = hw_addr; 959 960 ndp_walk_common(ndp, NULL, (pfi_t)ndp_process_entry, &np, ALL_ZONES); 961 } 962 963 /* 964 * Pass arg1 to the pfi supplied, along with each nce in existence. 965 * ndp_walk() places a REFHOLD on the nce and drops the lock when 966 * walking the hash list. 967 */ 968 void 969 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 970 boolean_t trace) 971 { 972 nce_t *nce; 973 nce_t *nce1; 974 nce_t **ncep; 975 nce_t *free_nce_list = NULL; 976 977 mutex_enter(&ndp->ndp_g_lock); 978 /* Prevent ndp_delete from unlink and free of NCE */ 979 ndp->ndp_g_walker++; 980 mutex_exit(&ndp->ndp_g_lock); 981 for (ncep = ndp->nce_hash_tbl; 982 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 983 for (nce = *ncep; nce != NULL; nce = nce1) { 984 nce1 = nce->nce_next; 985 if (ill == NULL || nce->nce_ill == ill) { 986 if (trace) { 987 NCE_REFHOLD(nce); 988 (*pfi)(nce, arg1); 989 NCE_REFRELE(nce); 990 } else { 991 NCE_REFHOLD_NOTR(nce); 992 (*pfi)(nce, arg1); 993 NCE_REFRELE_NOTR(nce); 994 } 995 } 996 } 997 } 998 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 999 nce1 = nce->nce_next; 1000 if (ill == NULL || nce->nce_ill == ill) { 1001 if (trace) { 1002 NCE_REFHOLD(nce); 1003 (*pfi)(nce, arg1); 1004 NCE_REFRELE(nce); 1005 } else { 1006 NCE_REFHOLD_NOTR(nce); 1007 (*pfi)(nce, arg1); 1008 NCE_REFRELE_NOTR(nce); 1009 } 1010 } 1011 } 1012 mutex_enter(&ndp->ndp_g_lock); 1013 ndp->ndp_g_walker--; 1014 /* 1015 * While NCE's are removed from global list they are placed 1016 * in a private list, to be passed to nce_ire_delete_list(). 1017 * The reason is, there may be ires pointing to this nce 1018 * which needs to cleaned up. 1019 */ 1020 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 1021 /* Time to delete condemned entries */ 1022 for (ncep = ndp->nce_hash_tbl; 1023 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 1024 nce = *ncep; 1025 if (nce != NULL) { 1026 nce_remove(ndp, nce, &free_nce_list); 1027 } 1028 } 1029 nce = ndp->nce_mask_entries; 1030 if (nce != NULL) { 1031 nce_remove(ndp, nce, &free_nce_list); 1032 } 1033 ndp->ndp_g_walker_cleanup = B_FALSE; 1034 } 1035 1036 mutex_exit(&ndp->ndp_g_lock); 1037 1038 if (free_nce_list != NULL) { 1039 nce_ire_delete_list(free_nce_list); 1040 } 1041 } 1042 1043 /* 1044 * Walk everything. 1045 * Note that ill can be NULL hence can't derive the ipst from it. 1046 */ 1047 void 1048 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) 1049 { 1050 ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); 1051 ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); 1052 } 1053 1054 /* 1055 * Process resolve requests. Handles both mapped entries 1056 * as well as cases that needs to be send out on the wire. 1057 * Lookup a NCE for a given IRE. Regardless of whether one exists 1058 * or one is created, we defer making ire point to nce until the 1059 * ire is actually added at which point the nce_refcnt on the nce is 1060 * incremented. This is done primarily to have symmetry between ire_add() 1061 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1062 */ 1063 int 1064 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1065 { 1066 nce_t *nce, *hw_nce = NULL; 1067 int err; 1068 ill_t *ipmp_ill; 1069 uint16_t nce_flags; 1070 uint32_t ms; 1071 mblk_t *mp_nce = NULL; 1072 ip_stack_t *ipst = ill->ill_ipst; 1073 uchar_t *hwaddr = NULL; 1074 1075 ASSERT(ill->ill_isv6); 1076 1077 if (IN6_IS_ADDR_MULTICAST(dst)) 1078 return (nce_set_multicast(ill, dst)); 1079 1080 nce_flags = (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0; 1081 1082 /* 1083 * If `ill' is under IPMP, then first check to see if there's an NCE 1084 * for `dst' on the IPMP meta-interface (e.g., because an application 1085 * explicitly did an SIOCLIFSETND to tie a hardware address to `dst'). 1086 * If so, we use that hardware address when creating the NCE below. 1087 * Note that we don't yet have a mechanism to remove these NCEs if the 1088 * NCE for `dst' on the IPMP meta-interface is subsequently removed -- 1089 * but rather than build such a beast, we should fix NCEs so that they 1090 * can be properly shared across an IPMP group. 1091 */ 1092 if (IS_UNDER_IPMP(ill)) { 1093 if ((ipmp_ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) { 1094 hw_nce = ndp_lookup_v6(ipmp_ill, B_FALSE, dst, B_FALSE); 1095 if (hw_nce != NULL && hw_nce->nce_res_mp != NULL) { 1096 hwaddr = hw_nce->nce_res_mp->b_rptr + 1097 NCE_LL_ADDR_OFFSET(ipmp_ill); 1098 nce_flags |= hw_nce->nce_flags; 1099 } 1100 ill_refrele(ipmp_ill); 1101 } 1102 } 1103 1104 err = ndp_lookup_then_add_v6(ill, 1105 B_FALSE, /* NCE fastpath is per ill; don't match across group */ 1106 hwaddr, 1107 dst, 1108 &ipv6_all_ones, 1109 &ipv6_all_zeros, 1110 0, 1111 nce_flags, 1112 hwaddr != NULL ? ND_REACHABLE : ND_INCOMPLETE, 1113 &nce); 1114 1115 if (hw_nce != NULL) 1116 NCE_REFRELE(hw_nce); 1117 1118 switch (err) { 1119 case 0: 1120 /* 1121 * New cache entry was created. Make sure that the state 1122 * is not ND_INCOMPLETE. It can be in some other state 1123 * even before we send out the solicitation as we could 1124 * get un-solicited advertisements. 1125 * 1126 * If this is an XRESOLV interface, simply return 0, 1127 * since we don't want to solicit just yet. 1128 */ 1129 if (ill->ill_flags & ILLF_XRESOLV) { 1130 NCE_REFRELE(nce); 1131 return (0); 1132 } 1133 1134 mutex_enter(&nce->nce_lock); 1135 if (nce->nce_state != ND_INCOMPLETE) { 1136 mutex_exit(&nce->nce_lock); 1137 NCE_REFRELE(nce); 1138 return (0); 1139 } 1140 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1141 if (mp_nce == NULL) { 1142 /* The caller will free mp */ 1143 mutex_exit(&nce->nce_lock); 1144 ndp_delete(nce); 1145 NCE_REFRELE(nce); 1146 return (ENOMEM); 1147 } 1148 if ((ms = nce_solicit(nce, mp_nce)) == 0) { 1149 /* The caller will free mp */ 1150 if (mp_nce != mp) 1151 freeb(mp_nce); 1152 mutex_exit(&nce->nce_lock); 1153 ndp_delete(nce); 1154 NCE_REFRELE(nce); 1155 return (EBUSY); 1156 } 1157 mutex_exit(&nce->nce_lock); 1158 NDP_RESTART_TIMER(nce, (clock_t)ms); 1159 NCE_REFRELE(nce); 1160 return (EINPROGRESS); 1161 case EEXIST: 1162 /* Resolution in progress just queue the packet */ 1163 mutex_enter(&nce->nce_lock); 1164 if (nce->nce_state == ND_INCOMPLETE) { 1165 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1166 if (mp_nce == NULL) { 1167 err = ENOMEM; 1168 } else { 1169 nce_queue_mp(nce, mp_nce); 1170 err = EINPROGRESS; 1171 } 1172 } else { 1173 /* 1174 * Any other state implies we have 1175 * a nce but IRE needs to be added ... 1176 * ire_add_v6() will take care of the 1177 * the case when the nce becomes CONDEMNED 1178 * before the ire is added to the table. 1179 */ 1180 err = 0; 1181 } 1182 mutex_exit(&nce->nce_lock); 1183 NCE_REFRELE(nce); 1184 break; 1185 default: 1186 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1187 break; 1188 } 1189 return (err); 1190 } 1191 1192 /* 1193 * When there is no resolver, the link layer template is passed in 1194 * the IRE. 1195 * Lookup a NCE for a given IRE. Regardless of whether one exists 1196 * or one is created, we defer making ire point to nce until the 1197 * ire is actually added at which point the nce_refcnt on the nce is 1198 * incremented. This is done primarily to have symmetry between ire_add() 1199 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1200 */ 1201 int 1202 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1203 { 1204 nce_t *nce; 1205 int err = 0; 1206 1207 ASSERT(ill != NULL); 1208 ASSERT(ill->ill_isv6); 1209 if (IN6_IS_ADDR_MULTICAST(dst)) { 1210 err = nce_set_multicast(ill, dst); 1211 return (err); 1212 } 1213 1214 err = ndp_lookup_then_add_v6(ill, 1215 B_FALSE, /* NCE fastpath is per ill; don't match across group */ 1216 NULL, /* hardware address */ 1217 dst, 1218 &ipv6_all_ones, 1219 &ipv6_all_zeros, 1220 0, 1221 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1222 ND_REACHABLE, 1223 &nce); 1224 1225 switch (err) { 1226 case 0: 1227 /* 1228 * Cache entry with a proper resolver cookie was 1229 * created. 1230 */ 1231 NCE_REFRELE(nce); 1232 break; 1233 case EEXIST: 1234 err = 0; 1235 NCE_REFRELE(nce); 1236 break; 1237 default: 1238 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1239 break; 1240 } 1241 return (err); 1242 } 1243 1244 /* 1245 * For each interface an entry is added for the unspecified multicast group. 1246 * Here that mapping is used to form the multicast cache entry for a particular 1247 * multicast destination. 1248 */ 1249 static int 1250 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1251 { 1252 nce_t *mnce; /* Multicast mapping entry */ 1253 nce_t *nce; 1254 uchar_t *hw_addr = NULL; 1255 int err = 0; 1256 ip_stack_t *ipst = ill->ill_ipst; 1257 1258 ASSERT(ill != NULL); 1259 ASSERT(ill->ill_isv6); 1260 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1261 1262 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1263 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst)); 1264 nce = nce_lookup_addr(ill, B_FALSE, dst, nce); 1265 if (nce != NULL) { 1266 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1267 NCE_REFRELE(nce); 1268 return (0); 1269 } 1270 /* No entry, now lookup for a mapping this should never fail */ 1271 mnce = nce_lookup_mapping(ill, dst); 1272 if (mnce == NULL) { 1273 /* Something broken for the interface. */ 1274 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1275 return (ESRCH); 1276 } 1277 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1278 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1279 /* 1280 * For IRE_IF_RESOLVER a hardware mapping can be 1281 * generated, for IRE_IF_NORESOLVER, resolution cookie 1282 * in the ill is copied in ndp_add_v6(). 1283 */ 1284 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1285 if (hw_addr == NULL) { 1286 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1287 NCE_REFRELE(mnce); 1288 return (ENOMEM); 1289 } 1290 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1291 } 1292 NCE_REFRELE(mnce); 1293 /* 1294 * IRE_IF_NORESOLVER type simply copies the resolution 1295 * cookie passed in. So no hw_addr is needed. 1296 */ 1297 err = ndp_add_v6(ill, 1298 hw_addr, 1299 dst, 1300 &ipv6_all_ones, 1301 &ipv6_all_zeros, 1302 0, 1303 NCE_F_NONUD, 1304 ND_REACHABLE, 1305 &nce); 1306 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1307 if (hw_addr != NULL) 1308 kmem_free(hw_addr, ill->ill_nd_lla_len); 1309 if (err != 0) { 1310 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1311 return (err); 1312 } 1313 NCE_REFRELE(nce); 1314 return (0); 1315 } 1316 1317 /* 1318 * Return the link layer address, and any flags of a nce. 1319 */ 1320 int 1321 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1322 { 1323 nce_t *nce; 1324 in6_addr_t *addr; 1325 sin6_t *sin6; 1326 dl_unitdata_req_t *dl; 1327 1328 ASSERT(ill != NULL && ill->ill_isv6); 1329 sin6 = (sin6_t *)&lnr->lnr_addr; 1330 addr = &sin6->sin6_addr; 1331 1332 /* 1333 * NOTE: if the ill is an IPMP interface, then match against the whole 1334 * illgrp. This e.g. allows in.ndpd to retrieve the link layer 1335 * addresses for the data addresses on an IPMP interface even though 1336 * ipif_ndp_up() created them with an nce_ill of ipif_bound_ill. 1337 */ 1338 nce = ndp_lookup_v6(ill, IS_IPMP(ill), addr, B_FALSE); 1339 if (nce == NULL) 1340 return (ESRCH); 1341 /* If in INCOMPLETE state, no link layer address is available yet */ 1342 if (nce->nce_state == ND_INCOMPLETE) 1343 goto done; 1344 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1345 if (ill->ill_flags & ILLF_XRESOLV) 1346 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1347 else 1348 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1349 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1350 sizeof (lnr->lnr_hdw_addr)); 1351 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1352 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1353 if (nce->nce_flags & NCE_F_ISROUTER) 1354 lnr->lnr_flags = NDF_ISROUTER_ON; 1355 if (nce->nce_flags & NCE_F_ANYCAST) 1356 lnr->lnr_flags |= NDF_ANYCAST_ON; 1357 done: 1358 NCE_REFRELE(nce); 1359 return (0); 1360 } 1361 1362 /* 1363 * Send Enable/Disable multicast reqs to driver. 1364 */ 1365 int 1366 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1367 uint32_t hw_addr_offset, mblk_t *mp) 1368 { 1369 nce_t *nce; 1370 uchar_t *hw_addr; 1371 ip_stack_t *ipst = ill->ill_ipst; 1372 1373 ASSERT(ill != NULL && ill->ill_isv6); 1374 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1375 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1376 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1377 freemsg(mp); 1378 return (EINVAL); 1379 } 1380 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1381 nce = nce_lookup_mapping(ill, addr); 1382 if (nce == NULL) { 1383 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1384 freemsg(mp); 1385 return (ESRCH); 1386 } 1387 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1388 /* 1389 * Update dl_addr_length and dl_addr_offset for primitives that 1390 * have physical addresses as opposed to full saps 1391 */ 1392 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1393 case DL_ENABMULTI_REQ: 1394 /* Track the state if this is the first enabmulti */ 1395 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1396 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1397 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1398 break; 1399 case DL_DISABMULTI_REQ: 1400 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1401 break; 1402 default: 1403 NCE_REFRELE(nce); 1404 ip1dbg(("ndp_mcastreq: default\n")); 1405 return (EINVAL); 1406 } 1407 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1408 NCE_REFRELE(nce); 1409 ill_dlpi_send(ill, mp); 1410 return (0); 1411 } 1412 1413 /* 1414 * Send a neighbor solicitation. 1415 * Returns number of milliseconds after which we should either rexmit or abort. 1416 * Return of zero means we should abort. 1417 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1418 * 1419 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1420 * the packet. 1421 * NOTE: This routine does not consume mp. 1422 */ 1423 uint32_t 1424 nce_solicit(nce_t *nce, mblk_t *mp) 1425 { 1426 ip6_t *ip6h; 1427 in6_addr_t sender; 1428 boolean_t dropped; 1429 1430 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1431 1432 if (nce->nce_rcnt == 0) 1433 return (0); 1434 1435 if (mp == NULL) { 1436 ASSERT(nce->nce_qd_mp != NULL); 1437 mp = nce->nce_qd_mp; 1438 } else { 1439 nce_queue_mp(nce, mp); 1440 } 1441 1442 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1443 if (mp->b_datap->db_type == M_CTL) 1444 mp = mp->b_cont; 1445 1446 ip6h = (ip6_t *)mp->b_rptr; 1447 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1448 /* 1449 * This message should have been pulled up already in 1450 * ip_wput_v6. We can't do pullups here because the message 1451 * could be from the nce_qd_mp which could have b_next/b_prev 1452 * non-NULL. 1453 */ 1454 ASSERT(MBLKL(mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN); 1455 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1456 } 1457 1458 /* 1459 * Need to copy the sender address into a local since `mp' can 1460 * go away once we drop nce_lock. 1461 */ 1462 sender = ip6h->ip6_src; 1463 nce->nce_rcnt--; 1464 mutex_exit(&nce->nce_lock); 1465 dropped = nce_xmit_solicit(nce, B_TRUE, &sender, 0); 1466 mutex_enter(&nce->nce_lock); 1467 if (dropped) 1468 nce->nce_rcnt++; 1469 return (nce->nce_ill->ill_reachable_retrans_time); 1470 } 1471 1472 /* 1473 * Attempt to recover an address on an interface that's been marked as a 1474 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1475 * no easy way to just probe the address and have the right thing happen if 1476 * it's no longer in use. Instead, we just bring it up normally and allow the 1477 * regular interface start-up logic to probe for a remaining duplicate and take 1478 * us back down if necessary. 1479 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1480 * ip_ndp_excl. 1481 */ 1482 /* ARGSUSED */ 1483 static void 1484 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1485 { 1486 ill_t *ill = rq->q_ptr; 1487 ipif_t *ipif; 1488 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1489 1490 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1491 /* 1492 * We do not support recovery of proxy ARP'd interfaces, 1493 * because the system lacks a complete proxy ARP mechanism. 1494 */ 1495 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1496 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1497 continue; 1498 } 1499 1500 /* 1501 * If we have already recovered or if the interface is going 1502 * away, then ignore. 1503 */ 1504 mutex_enter(&ill->ill_lock); 1505 if (!(ipif->ipif_flags & IPIF_DUPLICATE) || 1506 (ipif->ipif_state_flags & IPIF_CONDEMNED)) { 1507 mutex_exit(&ill->ill_lock); 1508 continue; 1509 } 1510 1511 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1512 ill->ill_ipif_dup_count--; 1513 mutex_exit(&ill->ill_lock); 1514 ipif->ipif_was_dup = B_TRUE; 1515 1516 VERIFY(ipif_ndp_up(ipif, B_TRUE) != EINPROGRESS); 1517 (void) ipif_up_done_v6(ipif); 1518 } 1519 freeb(mp); 1520 } 1521 1522 /* 1523 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1524 * As long as someone else holds the address, the interface will stay down. 1525 * When that conflict goes away, the interface is brought back up. This is 1526 * done so that accidental shutdowns of addresses aren't made permanent. Your 1527 * server will recover from a failure. 1528 * 1529 * For DHCP and temporary addresses, recovery is not done in the kernel. 1530 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1531 * 1532 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1533 */ 1534 static void 1535 ipif6_dup_recovery(void *arg) 1536 { 1537 ipif_t *ipif = arg; 1538 1539 ipif->ipif_recovery_id = 0; 1540 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1541 return; 1542 1543 /* 1544 * No lock, because this is just an optimization. 1545 */ 1546 if (ipif->ipif_state_flags & IPIF_CONDEMNED) 1547 return; 1548 1549 /* If the link is down, we'll retry this later */ 1550 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1551 return; 1552 1553 ndp_do_recovery(ipif); 1554 } 1555 1556 /* 1557 * Perform interface recovery by forcing the duplicate interfaces up and 1558 * allowing the system to determine which ones should stay up. 1559 * 1560 * Called both by recovery timer expiry and link-up notification. 1561 */ 1562 void 1563 ndp_do_recovery(ipif_t *ipif) 1564 { 1565 ill_t *ill = ipif->ipif_ill; 1566 mblk_t *mp; 1567 ip_stack_t *ipst = ill->ill_ipst; 1568 1569 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1570 if (mp == NULL) { 1571 mutex_enter(&ill->ill_lock); 1572 if (ipif->ipif_recovery_id == 0 && 1573 !(ipif->ipif_state_flags & IPIF_CONDEMNED)) { 1574 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1575 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1576 } 1577 mutex_exit(&ill->ill_lock); 1578 } else { 1579 /* 1580 * A recovery timer may still be running if we got here from 1581 * ill_restart_dad(); cancel that timer. 1582 */ 1583 if (ipif->ipif_recovery_id != 0) 1584 (void) untimeout(ipif->ipif_recovery_id); 1585 ipif->ipif_recovery_id = 0; 1586 1587 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1588 sizeof (ipif->ipif_v6lcl_addr)); 1589 ill_refhold(ill); 1590 qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP, 1591 B_FALSE); 1592 } 1593 } 1594 1595 /* 1596 * Find the MAC and IP addresses in an NA/NS message. 1597 */ 1598 static void 1599 ip_ndp_find_addresses(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, in6_addr_t *targp, 1600 uchar_t **haddr, uint_t *haddrlenp) 1601 { 1602 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1603 icmp6_t *icmp6 = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1604 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 1605 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 1606 uchar_t *addr; 1607 int alen = 0; 1608 1609 if (dl_mp == NULL) { 1610 nd_opt_hdr_t *opt; 1611 int len; 1612 1613 /* 1614 * If it's from the fast-path, then it can't be a probe 1615 * message, and thus must include a linkaddr option. 1616 * Extract that here. 1617 */ 1618 switch (icmp6->icmp6_type) { 1619 case ND_NEIGHBOR_SOLICIT: 1620 len = mp->b_wptr - (uchar_t *)ns; 1621 if ((len -= sizeof (*ns)) > 0) { 1622 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), 1623 len, ND_OPT_SOURCE_LINKADDR); 1624 } 1625 break; 1626 case ND_NEIGHBOR_ADVERT: 1627 len = mp->b_wptr - (uchar_t *)na; 1628 if ((len -= sizeof (*na)) > 0) { 1629 opt = ndp_get_option((nd_opt_hdr_t *)(na + 1), 1630 len, ND_OPT_TARGET_LINKADDR); 1631 } 1632 break; 1633 } 1634 1635 if (opt != NULL && opt->nd_opt_len * 8 - sizeof (*opt) >= 1636 ill->ill_nd_lla_len) { 1637 addr = (uchar_t *)(opt + 1); 1638 alen = ill->ill_nd_lla_len; 1639 } 1640 1641 /* 1642 * We cheat a bit here for the sake of printing usable log 1643 * messages in the rare case where the reply we got was unicast 1644 * without a source linkaddr option, and the interface is in 1645 * fastpath mode. (Sigh.) 1646 */ 1647 if (alen == 0 && ill->ill_type == IFT_ETHER && 1648 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1649 struct ether_header *pether; 1650 1651 pether = (struct ether_header *)((char *)ip6h - 1652 sizeof (*pether)); 1653 addr = pether->ether_shost.ether_addr_octet; 1654 alen = ETHERADDRL; 1655 } 1656 } else { 1657 dl_unitdata_ind_t *dlu; 1658 1659 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1660 alen = dlu->dl_src_addr_length; 1661 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1662 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1663 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1664 if (ill->ill_sap_length < 0) { 1665 alen += ill->ill_sap_length; 1666 } else { 1667 addr += ill->ill_sap_length; 1668 alen -= ill->ill_sap_length; 1669 } 1670 } 1671 } 1672 1673 if (alen > 0) { 1674 *haddr = addr; 1675 *haddrlenp = alen; 1676 } else { 1677 *haddr = NULL; 1678 *haddrlenp = 0; 1679 } 1680 1681 /* nd_ns_target and nd_na_target are at the same offset, so we cheat */ 1682 *targp = ns->nd_ns_target; 1683 } 1684 1685 /* 1686 * This is for exclusive changes due to NDP duplicate address detection 1687 * failure. 1688 */ 1689 /* ARGSUSED */ 1690 static void 1691 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1692 { 1693 ill_t *ill = rq->q_ptr; 1694 ipif_t *ipif; 1695 mblk_t *dl_mp = NULL; 1696 uchar_t *haddr; 1697 uint_t haddrlen; 1698 ip_stack_t *ipst = ill->ill_ipst; 1699 in6_addr_t targ; 1700 1701 if (DB_TYPE(mp) != M_DATA) { 1702 dl_mp = mp; 1703 mp = mp->b_cont; 1704 } 1705 1706 ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen); 1707 if (haddr != NULL && haddrlen == ill->ill_phys_addr_length) { 1708 /* 1709 * Ignore conflicts generated by misbehaving switches that 1710 * just reflect our own messages back to us. For IPMP, we may 1711 * see reflections across any ill in the illgrp. 1712 */ 1713 if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 || 1714 IS_UNDER_IPMP(ill) && 1715 ipmp_illgrp_find_ill(ill->ill_grp, haddr, haddrlen) != NULL) 1716 goto ignore_conflict; 1717 } 1718 1719 /* 1720 * Look up the appropriate ipif. 1721 */ 1722 ipif = ipif_lookup_addr_v6(&targ, ill, ALL_ZONES, NULL, NULL, NULL, 1723 NULL, ipst); 1724 if (ipif == NULL) 1725 goto ignore_conflict; 1726 1727 /* Reload the ill to match the ipif */ 1728 ill = ipif->ipif_ill; 1729 1730 /* If it's already duplicate or ineligible, then don't do anything. */ 1731 if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) { 1732 ipif_refrele(ipif); 1733 goto ignore_conflict; 1734 } 1735 1736 /* 1737 * If this is a failure during duplicate recovery, then don't 1738 * complain. It may take a long time to recover. 1739 */ 1740 if (!ipif->ipif_was_dup) { 1741 char ibuf[LIFNAMSIZ]; 1742 char hbuf[MAC_STR_LEN]; 1743 char sbuf[INET6_ADDRSTRLEN]; 1744 1745 ipif_get_name(ipif, ibuf, sizeof (ibuf)); 1746 cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);" 1747 " disabled", ibuf, 1748 inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)), 1749 mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf))); 1750 } 1751 mutex_enter(&ill->ill_lock); 1752 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1753 ipif->ipif_flags |= IPIF_DUPLICATE; 1754 ill->ill_ipif_dup_count++; 1755 mutex_exit(&ill->ill_lock); 1756 (void) ipif_down(ipif, NULL, NULL); 1757 ipif_down_tail(ipif); 1758 mutex_enter(&ill->ill_lock); 1759 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1760 ill->ill_net_type == IRE_IF_RESOLVER && 1761 !(ipif->ipif_state_flags & IPIF_CONDEMNED) && 1762 ipst->ips_ip_dup_recovery > 0) { 1763 ASSERT(ipif->ipif_recovery_id == 0); 1764 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1765 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1766 } 1767 mutex_exit(&ill->ill_lock); 1768 ipif_refrele(ipif); 1769 ignore_conflict: 1770 if (dl_mp != NULL) 1771 freeb(dl_mp); 1772 freemsg(mp); 1773 } 1774 1775 /* 1776 * Handle failure by tearing down the ipifs with the specified address. Note 1777 * that tearing down the ipif also means deleting the nce through ipif_down, so 1778 * it's not possible to do recovery by just restarting the nce timer. Instead, 1779 * we start a timer on the ipif. 1780 */ 1781 static void 1782 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1783 { 1784 if ((mp = copymsg(mp)) != NULL) { 1785 if (dl_mp == NULL) 1786 dl_mp = mp; 1787 else if ((dl_mp = copyb(dl_mp)) != NULL) 1788 dl_mp->b_cont = mp; 1789 if (dl_mp == NULL) { 1790 freemsg(mp); 1791 } else { 1792 ill_refhold(ill); 1793 qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP, 1794 B_FALSE); 1795 } 1796 } 1797 } 1798 1799 /* 1800 * Handle a discovered conflict: some other system is advertising that it owns 1801 * one of our IP addresses. We need to defend ourselves, or just shut down the 1802 * interface. 1803 */ 1804 static void 1805 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1806 { 1807 ipif_t *ipif; 1808 uint32_t now; 1809 uint_t maxdefense; 1810 uint_t defs; 1811 ip_stack_t *ipst = ill->ill_ipst; 1812 1813 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1814 NULL, NULL, ipst); 1815 if (ipif == NULL) 1816 return; 1817 1818 /* 1819 * First, figure out if this address is disposable. 1820 */ 1821 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1822 maxdefense = ipst->ips_ip_max_temp_defend; 1823 else 1824 maxdefense = ipst->ips_ip_max_defend; 1825 1826 /* 1827 * Now figure out how many times we've defended ourselves. Ignore 1828 * defenses that happened long in the past. 1829 */ 1830 now = gethrestime_sec(); 1831 mutex_enter(&nce->nce_lock); 1832 if ((defs = nce->nce_defense_count) > 0 && 1833 now - nce->nce_defense_time > ipst->ips_ip_defend_interval) { 1834 nce->nce_defense_count = defs = 0; 1835 } 1836 nce->nce_defense_count++; 1837 nce->nce_defense_time = now; 1838 mutex_exit(&nce->nce_lock); 1839 ipif_refrele(ipif); 1840 1841 /* 1842 * If we've defended ourselves too many times already, then give up and 1843 * tear down the interface(s) using this address. Otherwise, defend by 1844 * sending out an unsolicited Neighbor Advertisement. 1845 */ 1846 if (defs >= maxdefense) { 1847 ip_ndp_failure(ill, mp, dl_mp); 1848 } else { 1849 char hbuf[MAC_STR_LEN]; 1850 char sbuf[INET6_ADDRSTRLEN]; 1851 uchar_t *haddr; 1852 uint_t haddrlen; 1853 in6_addr_t targ; 1854 1855 ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen); 1856 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1857 mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)), 1858 inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)), 1859 ill->ill_name); 1860 1861 (void) nce_xmit_advert(nce, B_FALSE, &ipv6_all_hosts_mcast, 0); 1862 } 1863 } 1864 1865 static void 1866 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1867 { 1868 nd_neighbor_solicit_t *ns; 1869 uint32_t hlen = ill->ill_nd_lla_len; 1870 uchar_t *haddr = NULL; 1871 icmp6_t *icmp_nd; 1872 ip6_t *ip6h; 1873 nce_t *our_nce = NULL; 1874 in6_addr_t target; 1875 in6_addr_t src; 1876 int len; 1877 int flag = 0; 1878 nd_opt_hdr_t *opt = NULL; 1879 boolean_t bad_solicit = B_FALSE; 1880 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1881 1882 ip6h = (ip6_t *)mp->b_rptr; 1883 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1884 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1885 src = ip6h->ip6_src; 1886 ns = (nd_neighbor_solicit_t *)icmp_nd; 1887 target = ns->nd_ns_target; 1888 if (IN6_IS_ADDR_MULTICAST(&target)) { 1889 if (ip_debug > 2) { 1890 /* ip1dbg */ 1891 pr_addr_dbg("ndp_input_solicit: Target is" 1892 " multicast! %s\n", AF_INET6, &target); 1893 } 1894 bad_solicit = B_TRUE; 1895 goto done; 1896 } 1897 if (len > sizeof (nd_neighbor_solicit_t)) { 1898 /* Options present */ 1899 opt = (nd_opt_hdr_t *)&ns[1]; 1900 len -= sizeof (nd_neighbor_solicit_t); 1901 if (!ndp_verify_optlen(opt, len)) { 1902 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1903 bad_solicit = B_TRUE; 1904 goto done; 1905 } 1906 1907 } 1908 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1909 /* Check to see if this is a valid DAD solicitation */ 1910 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1911 if (ip_debug > 2) { 1912 /* ip1dbg */ 1913 pr_addr_dbg("ndp_input_solicit: IPv6 " 1914 "Destination is not solicited node " 1915 "multicast %s\n", AF_INET6, 1916 &ip6h->ip6_dst); 1917 } 1918 bad_solicit = B_TRUE; 1919 goto done; 1920 } 1921 } 1922 1923 /* 1924 * NOTE: with IPMP, it's possible the nominated multicast ill (which 1925 * received this packet if it's multicast) is not the ill tied to 1926 * e.g. the IPMP ill's data link-local. So we match across the illgrp 1927 * to ensure we find the associated NCE. 1928 */ 1929 our_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE); 1930 /* 1931 * If this is a valid Solicitation, a permanent 1932 * entry should exist in the cache 1933 */ 1934 if (our_nce == NULL || 1935 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1936 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1937 "ifname=%s ", ill->ill_name)); 1938 if (ip_debug > 2) { 1939 /* ip1dbg */ 1940 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1941 } 1942 bad_solicit = B_TRUE; 1943 goto done; 1944 } 1945 1946 /* At this point we should have a verified NS per spec */ 1947 if (opt != NULL) { 1948 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1949 if (opt != NULL) { 1950 haddr = (uchar_t *)&opt[1]; 1951 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1952 hlen == 0) { 1953 ip1dbg(("ndp_input_solicit: bad SLLA\n")); 1954 bad_solicit = B_TRUE; 1955 goto done; 1956 } 1957 } 1958 } 1959 1960 /* If sending directly to peer, set the unicast flag */ 1961 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 1962 flag |= NDP_UNICAST; 1963 1964 /* 1965 * Create/update the entry for the soliciting node. 1966 * or respond to outstanding queries, don't if 1967 * the source is unspecified address. 1968 */ 1969 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1970 int err; 1971 nce_t *nnce; 1972 1973 ASSERT(ill->ill_isv6); 1974 /* 1975 * Regular solicitations *must* include the Source Link-Layer 1976 * Address option. Ignore messages that do not. 1977 */ 1978 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1979 ip1dbg(("ndp_input_solicit: source link-layer address " 1980 "option missing with a specified source.\n")); 1981 bad_solicit = B_TRUE; 1982 goto done; 1983 } 1984 1985 /* 1986 * This is a regular solicitation. If we're still in the 1987 * process of verifying the address, then don't respond at all 1988 * and don't keep track of the sender. 1989 */ 1990 if (our_nce->nce_state == ND_PROBE) 1991 goto done; 1992 1993 /* 1994 * If the solicitation doesn't have sender hardware address 1995 * (legal for unicast solicitation), then process without 1996 * installing the return NCE. Either we already know it, or 1997 * we'll be forced to look it up when (and if) we reply to the 1998 * packet. 1999 */ 2000 if (haddr == NULL) 2001 goto no_source; 2002 2003 err = ndp_lookup_then_add_v6(ill, 2004 B_FALSE, 2005 haddr, 2006 &src, /* Soliciting nodes address */ 2007 &ipv6_all_ones, 2008 &ipv6_all_zeros, 2009 0, 2010 0, 2011 ND_STALE, 2012 &nnce); 2013 switch (err) { 2014 case 0: 2015 /* done with this entry */ 2016 NCE_REFRELE(nnce); 2017 break; 2018 case EEXIST: 2019 /* 2020 * B_FALSE indicates this is not an an advertisement. 2021 */ 2022 ndp_process(nnce, haddr, 0, B_FALSE); 2023 NCE_REFRELE(nnce); 2024 break; 2025 default: 2026 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 2027 err)); 2028 goto done; 2029 } 2030 no_source: 2031 flag |= NDP_SOLICITED; 2032 } else { 2033 /* 2034 * No source link layer address option should be present in a 2035 * valid DAD request. 2036 */ 2037 if (haddr != NULL) { 2038 ip1dbg(("ndp_input_solicit: source link-layer address " 2039 "option present with an unspecified source.\n")); 2040 bad_solicit = B_TRUE; 2041 goto done; 2042 } 2043 if (our_nce->nce_state == ND_PROBE) { 2044 /* 2045 * Internally looped-back probes won't have DLPI 2046 * attached to them. External ones (which are sent by 2047 * multicast) always will. Just ignore our own 2048 * transmissions. 2049 */ 2050 if (dl_mp != NULL) { 2051 /* 2052 * If someone else is probing our address, then 2053 * we've crossed wires. Declare failure. 2054 */ 2055 ip_ndp_failure(ill, mp, dl_mp); 2056 } 2057 goto done; 2058 } 2059 /* 2060 * This is a DAD probe. Multicast the advertisement to the 2061 * all-nodes address. 2062 */ 2063 src = ipv6_all_hosts_mcast; 2064 } 2065 /* Response to a solicitation */ 2066 (void) nce_xmit_advert(our_nce, B_TRUE, &src, flag); 2067 done: 2068 if (bad_solicit) 2069 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2070 if (our_nce != NULL) 2071 NCE_REFRELE(our_nce); 2072 } 2073 2074 void 2075 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2076 { 2077 nd_neighbor_advert_t *na; 2078 uint32_t hlen = ill->ill_nd_lla_len; 2079 uchar_t *haddr = NULL; 2080 icmp6_t *icmp_nd; 2081 ip6_t *ip6h; 2082 nce_t *dst_nce = NULL; 2083 in6_addr_t target; 2084 nd_opt_hdr_t *opt = NULL; 2085 int len; 2086 ip_stack_t *ipst = ill->ill_ipst; 2087 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2088 2089 ip6h = (ip6_t *)mp->b_rptr; 2090 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2091 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2092 na = (nd_neighbor_advert_t *)icmp_nd; 2093 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2094 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2095 ip1dbg(("ndp_input_advert: Target is multicast but the " 2096 "solicited flag is not zero\n")); 2097 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2098 return; 2099 } 2100 target = na->nd_na_target; 2101 if (IN6_IS_ADDR_MULTICAST(&target)) { 2102 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2103 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2104 return; 2105 } 2106 if (len > sizeof (nd_neighbor_advert_t)) { 2107 opt = (nd_opt_hdr_t *)&na[1]; 2108 if (!ndp_verify_optlen(opt, 2109 len - sizeof (nd_neighbor_advert_t))) { 2110 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2111 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2112 return; 2113 } 2114 /* At this point we have a verified NA per spec */ 2115 len -= sizeof (nd_neighbor_advert_t); 2116 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2117 if (opt != NULL) { 2118 haddr = (uchar_t *)&opt[1]; 2119 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2120 hlen == 0) { 2121 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2122 BUMP_MIB(mib, 2123 ipv6IfIcmpInBadNeighborAdvertisements); 2124 return; 2125 } 2126 } 2127 } 2128 2129 /* 2130 * NOTE: we match across the illgrp since we need to do DAD for all of 2131 * our local addresses, and those are spread across all the active 2132 * ills in the group. 2133 */ 2134 if ((dst_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE)) == NULL) 2135 return; 2136 2137 if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2138 /* 2139 * Someone just advertised one of our local addresses. First, 2140 * check it it was us -- if so, we can safely ignore it. 2141 */ 2142 if (haddr != NULL) { 2143 if (!nce_cmp_ll_addr(dst_nce, haddr, hlen)) 2144 goto out; /* from us -- no conflict */ 2145 2146 /* 2147 * If we're in an IPMP group, check if this is an echo 2148 * from another ill in the group. Use the double- 2149 * checked locking pattern to avoid grabbing 2150 * ill_g_lock in the non-IPMP case. 2151 */ 2152 if (IS_UNDER_IPMP(ill)) { 2153 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2154 if (IS_UNDER_IPMP(ill) && ipmp_illgrp_find_ill( 2155 ill->ill_grp, haddr, hlen) != NULL) { 2156 rw_exit(&ipst->ips_ill_g_lock); 2157 goto out; 2158 } 2159 rw_exit(&ipst->ips_ill_g_lock); 2160 } 2161 } 2162 2163 /* 2164 * This appears to be a real conflict. If we're trying to 2165 * configure this NCE (ND_PROBE), then shut it down. 2166 * Otherwise, handle the discovered conflict. 2167 * 2168 * Note that dl_mp might be NULL if we're getting a unicast 2169 * reply. This isn't typically done (multicast is the norm in 2170 * response to a probe), but we can handle the dl_mp == NULL 2171 * case as well. 2172 */ 2173 if (dst_nce->nce_state == ND_PROBE) 2174 ip_ndp_failure(ill, mp, dl_mp); 2175 else 2176 ip_ndp_conflict(ill, mp, dl_mp, dst_nce); 2177 } else { 2178 if (na->nd_na_flags_reserved & ND_NA_FLAG_ROUTER) 2179 dst_nce->nce_flags |= NCE_F_ISROUTER; 2180 2181 /* B_TRUE indicates this an advertisement */ 2182 ndp_process(dst_nce, haddr, na->nd_na_flags_reserved, B_TRUE); 2183 } 2184 out: 2185 NCE_REFRELE(dst_nce); 2186 } 2187 2188 /* 2189 * Process NDP neighbor solicitation/advertisement messages. 2190 * The checksum has already checked o.k before reaching here. 2191 */ 2192 void 2193 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2194 { 2195 icmp6_t *icmp_nd; 2196 ip6_t *ip6h; 2197 int len; 2198 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2199 2200 2201 if (!pullupmsg(mp, -1)) { 2202 ip1dbg(("ndp_input: pullupmsg failed\n")); 2203 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2204 goto done; 2205 } 2206 ip6h = (ip6_t *)mp->b_rptr; 2207 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2208 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2209 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2210 goto done; 2211 } 2212 /* 2213 * NDP does not accept any extension headers between the 2214 * IP header and the ICMP header since e.g. a routing 2215 * header could be dangerous. 2216 * This assumes that any AH or ESP headers are removed 2217 * by ip prior to passing the packet to ndp_input. 2218 */ 2219 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2220 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2221 ip6h->ip6_nxt)); 2222 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2223 goto done; 2224 } 2225 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2226 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2227 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2228 if (icmp_nd->icmp6_code != 0) { 2229 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2230 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2231 goto done; 2232 } 2233 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2234 /* 2235 * Make sure packet length is large enough for either 2236 * a NS or a NA icmp packet. 2237 */ 2238 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2239 ip1dbg(("ndp_input: packet too short\n")); 2240 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2241 goto done; 2242 } 2243 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2244 ndp_input_solicit(ill, mp, dl_mp); 2245 } else { 2246 ndp_input_advert(ill, mp, dl_mp); 2247 } 2248 done: 2249 freemsg(mp); 2250 } 2251 2252 /* 2253 * Utility routine to send an advertisement. Assumes that the NCE cannot 2254 * go away (e.g., because it's refheld). 2255 */ 2256 static boolean_t 2257 nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *target, 2258 uint_t flags) 2259 { 2260 ASSERT((flags & NDP_PROBE) == 0); 2261 2262 if (nce->nce_flags & NCE_F_ISROUTER) 2263 flags |= NDP_ISROUTER; 2264 if (!(nce->nce_flags & NCE_F_ANYCAST)) 2265 flags |= NDP_ORIDE; 2266 2267 return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_ADVERT, use_nd_lla, 2268 &nce->nce_addr, target, flags)); 2269 } 2270 2271 /* 2272 * Utility routine to send a solicitation. Assumes that the NCE cannot 2273 * go away (e.g., because it's refheld). 2274 */ 2275 static boolean_t 2276 nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *sender, 2277 uint_t flags) 2278 { 2279 if (flags & NDP_PROBE) 2280 sender = &ipv6_all_zeros; 2281 2282 return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, use_nd_lla, 2283 sender, &nce->nce_addr, flags)); 2284 } 2285 2286 /* 2287 * nce_xmit is called to form and transmit a ND solicitation or 2288 * advertisement ICMP packet. 2289 * 2290 * If the source address is unspecified and this isn't a probe (used for 2291 * duplicate address detection), an appropriate source address and link layer 2292 * address will be chosen here. The link layer address option is included if 2293 * the source is specified (i.e., all non-probe packets), and omitted (per the 2294 * specification) otherwise. 2295 * 2296 * It returns B_FALSE only if it does a successful put() to the 2297 * corresponding ill's ill_wq otherwise returns B_TRUE. 2298 */ 2299 static boolean_t 2300 nce_xmit(ill_t *ill, uint8_t type, boolean_t use_nd_lla, 2301 const in6_addr_t *sender, const in6_addr_t *target, int flag) 2302 { 2303 ill_t *hwaddr_ill; 2304 uint32_t len; 2305 icmp6_t *icmp6; 2306 mblk_t *mp; 2307 ip6_t *ip6h; 2308 nd_opt_hdr_t *opt; 2309 uint_t plen, maxplen; 2310 ip6i_t *ip6i; 2311 ipif_t *src_ipif = NULL; 2312 uint8_t *hw_addr; 2313 zoneid_t zoneid = GLOBAL_ZONEID; 2314 char buf[INET6_ADDRSTRLEN]; 2315 2316 ASSERT(!IS_IPMP(ill)); 2317 2318 /* 2319 * Check that the sender is actually a usable address on `ill', and if 2320 * so, track that as the src_ipif. If not, for solicitations, set the 2321 * sender to :: so that a new one will be picked below; for adverts, 2322 * drop the packet since we expect nce_xmit_advert() to always provide 2323 * a valid sender. 2324 */ 2325 if (!IN6_IS_ADDR_UNSPECIFIED(sender)) { 2326 if ((src_ipif = ip_ndp_lookup_addr_v6(sender, ill)) == NULL || 2327 !src_ipif->ipif_addr_ready) { 2328 if (src_ipif != NULL) { 2329 ipif_refrele(src_ipif); 2330 src_ipif = NULL; 2331 } 2332 if (type == ND_NEIGHBOR_ADVERT) { 2333 ip1dbg(("nce_xmit: No source ipif for src %s\n", 2334 inet_ntop(AF_INET6, sender, buf, 2335 sizeof (buf)))); 2336 return (B_TRUE); 2337 } 2338 sender = &ipv6_all_zeros; 2339 } 2340 } 2341 2342 /* 2343 * If we still have an unspecified source (sender) address and this 2344 * isn't a probe, select a source address from `ill'. 2345 */ 2346 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2347 ASSERT(type != ND_NEIGHBOR_ADVERT); 2348 /* 2349 * Pick a source address for this solicitation, but restrict 2350 * the selection to addresses assigned to the output 2351 * interface. We do this because the destination will create 2352 * a neighbor cache entry for the source address of this 2353 * packet, so the source address needs to be a valid neighbor. 2354 */ 2355 src_ipif = ipif_select_source_v6(ill, target, B_TRUE, 2356 IPV6_PREFER_SRC_DEFAULT, ALL_ZONES); 2357 if (src_ipif == NULL) { 2358 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2359 inet_ntop(AF_INET6, target, buf, sizeof (buf)))); 2360 return (B_TRUE); 2361 } 2362 sender = &src_ipif->ipif_v6src_addr; 2363 } 2364 2365 /* 2366 * We're either sending a probe or we have a source address. 2367 */ 2368 ASSERT((flag & NDP_PROBE) || src_ipif != NULL); 2369 2370 maxplen = roundup(sizeof (nd_opt_hdr_t) + ND_MAX_HDW_LEN, 8); 2371 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2372 maxplen; 2373 mp = allocb(len, BPRI_LO); 2374 if (mp == NULL) { 2375 if (src_ipif != NULL) 2376 ipif_refrele(src_ipif); 2377 return (B_TRUE); 2378 } 2379 bzero((char *)mp->b_rptr, len); 2380 mp->b_wptr = mp->b_rptr + len; 2381 2382 ip6i = (ip6i_t *)mp->b_rptr; 2383 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2384 ip6i->ip6i_nxt = IPPROTO_RAW; 2385 ip6i->ip6i_flags = IP6I_HOPLIMIT; 2386 if (flag & NDP_PROBE) 2387 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2388 2389 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2390 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2391 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2392 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2393 ip6h->ip6_hops = IPV6_MAX_HOPS; 2394 ip6h->ip6_src = *sender; 2395 ip6h->ip6_dst = *target; 2396 icmp6 = (icmp6_t *)&ip6h[1]; 2397 2398 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2399 sizeof (nd_neighbor_advert_t)); 2400 2401 if (type == ND_NEIGHBOR_SOLICIT) { 2402 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2403 2404 if (!(flag & NDP_PROBE)) 2405 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2406 ns->nd_ns_target = *target; 2407 if (!(flag & NDP_UNICAST)) { 2408 /* Form multicast address of the target */ 2409 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2410 ip6h->ip6_dst.s6_addr32[3] |= 2411 ns->nd_ns_target.s6_addr32[3]; 2412 } 2413 } else { 2414 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2415 2416 ASSERT(!(flag & NDP_PROBE)); 2417 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2418 na->nd_na_target = *sender; 2419 if (flag & NDP_ISROUTER) 2420 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2421 if (flag & NDP_SOLICITED) 2422 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2423 if (flag & NDP_ORIDE) 2424 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2425 } 2426 2427 hw_addr = NULL; 2428 if (!(flag & NDP_PROBE)) { 2429 /* 2430 * Use our source address to find the hardware address to put 2431 * in the packet, so that the hardware address and IP address 2432 * will match up -- even if that hardware address doesn't 2433 * match the ill we actually transmit the packet through. 2434 */ 2435 if (IS_IPMP(src_ipif->ipif_ill)) { 2436 hwaddr_ill = ipmp_ipif_hold_bound_ill(src_ipif); 2437 if (hwaddr_ill == NULL) { 2438 ip1dbg(("nce_xmit: no bound ill!\n")); 2439 ipif_refrele(src_ipif); 2440 freemsg(mp); 2441 return (B_TRUE); 2442 } 2443 } else { 2444 hwaddr_ill = src_ipif->ipif_ill; 2445 ill_refhold(hwaddr_ill); /* for symmetry */ 2446 } 2447 2448 plen = roundup(sizeof (nd_opt_hdr_t) + 2449 hwaddr_ill->ill_nd_lla_len, 8); 2450 2451 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2452 hwaddr_ill->ill_phys_addr; 2453 if (hw_addr != NULL) { 2454 /* Fill in link layer address and option len */ 2455 opt->nd_opt_len = (uint8_t)(plen / 8); 2456 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2457 } 2458 2459 ill_refrele(hwaddr_ill); 2460 } 2461 2462 if (hw_addr == NULL) 2463 plen = 0; 2464 2465 /* Fix up the length of the packet now that plen is known */ 2466 len -= (maxplen - plen); 2467 mp->b_wptr = mp->b_rptr + len; 2468 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2469 2470 icmp6->icmp6_type = type; 2471 icmp6->icmp6_code = 0; 2472 /* 2473 * Prepare for checksum by putting icmp length in the icmp 2474 * checksum field. The checksum is calculated in ip_wput_v6. 2475 */ 2476 icmp6->icmp6_cksum = ip6h->ip6_plen; 2477 2478 /* 2479 * Before we toss the src_ipif, look up the zoneid to pass to 2480 * ip_output_v6(). This is to ensure unicast ND_NEIGHBOR_ADVERT 2481 * packets to be routed correctly by IP (we cannot guarantee that the 2482 * global zone has an interface route to the destination). 2483 */ 2484 if (src_ipif != NULL) { 2485 if ((zoneid = src_ipif->ipif_zoneid) == ALL_ZONES) 2486 zoneid = GLOBAL_ZONEID; 2487 ipif_refrele(src_ipif); 2488 } 2489 2490 ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT); 2491 return (B_FALSE); 2492 } 2493 2494 /* 2495 * Make a link layer address (does not include the SAP) from an nce. 2496 * To form the link layer address, use the last four bytes of ipv6 2497 * address passed in and the fixed offset stored in nce. 2498 */ 2499 static void 2500 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2501 { 2502 uchar_t *mask, *to; 2503 ill_t *ill = nce->nce_ill; 2504 int len; 2505 2506 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2507 return; 2508 ASSERT(nce->nce_res_mp != NULL); 2509 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2510 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2511 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2512 ASSERT(addr != NULL); 2513 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2514 addrpos, ill->ill_nd_lla_len); 2515 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2516 IPV6_ADDR_LEN); 2517 mask = (uchar_t *)&nce->nce_extract_mask; 2518 mask += (IPV6_ADDR_LEN - len); 2519 addr += (IPV6_ADDR_LEN - len); 2520 to = addrpos + nce->nce_ll_extract_start; 2521 while (len-- > 0) 2522 *to++ |= *mask++ & *addr++; 2523 } 2524 2525 mblk_t * 2526 nce_udreq_alloc(ill_t *ill) 2527 { 2528 mblk_t *template_mp = NULL; 2529 dl_unitdata_req_t *dlur; 2530 int sap_length; 2531 2532 ASSERT(ill->ill_isv6); 2533 2534 sap_length = ill->ill_sap_length; 2535 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2536 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2537 if (template_mp == NULL) 2538 return (NULL); 2539 2540 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2541 dlur->dl_priority.dl_min = 0; 2542 dlur->dl_priority.dl_max = 0; 2543 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2544 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2545 2546 /* Copy in the SAP value. */ 2547 NCE_LL_SAP_COPY(ill, template_mp); 2548 2549 return (template_mp); 2550 } 2551 2552 /* 2553 * NDP retransmit timer. 2554 * This timer goes off when: 2555 * a. It is time to retransmit NS for resolver. 2556 * b. It is time to send reachability probes. 2557 */ 2558 void 2559 ndp_timer(void *arg) 2560 { 2561 nce_t *nce = arg; 2562 ill_t *ill = nce->nce_ill; 2563 uint32_t ms; 2564 char addrbuf[INET6_ADDRSTRLEN]; 2565 boolean_t dropped = B_FALSE; 2566 ip_stack_t *ipst = ill->ill_ipst; 2567 2568 /* 2569 * The timer has to be cancelled by ndp_delete before doing the final 2570 * refrele. So the NCE is guaranteed to exist when the timer runs 2571 * until it clears the timeout_id. Before clearing the timeout_id 2572 * bump up the refcnt so that we can continue to use the nce 2573 */ 2574 ASSERT(nce != NULL); 2575 2576 mutex_enter(&nce->nce_lock); 2577 NCE_REFHOLD_LOCKED(nce); 2578 nce->nce_timeout_id = 0; 2579 2580 /* 2581 * Check the reachability state first. 2582 */ 2583 switch (nce->nce_state) { 2584 case ND_DELAY: 2585 nce->nce_state = ND_PROBE; 2586 mutex_exit(&nce->nce_lock); 2587 (void) nce_xmit_solicit(nce, B_FALSE, &ipv6_all_zeros, 2588 NDP_UNICAST); 2589 if (ip_debug > 3) { 2590 /* ip2dbg */ 2591 pr_addr_dbg("ndp_timer: state for %s changed " 2592 "to PROBE\n", AF_INET6, &nce->nce_addr); 2593 } 2594 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2595 NCE_REFRELE(nce); 2596 return; 2597 case ND_PROBE: 2598 /* must be retransmit timer */ 2599 nce->nce_pcnt--; 2600 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2601 nce->nce_pcnt >= -1); 2602 if (nce->nce_pcnt > 0) { 2603 /* 2604 * As per RFC2461, the nce gets deleted after 2605 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2606 * Note that the first unicast solicitation is sent 2607 * during the DELAY state. 2608 */ 2609 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2610 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2611 addrbuf, sizeof (addrbuf)))); 2612 mutex_exit(&nce->nce_lock); 2613 dropped = nce_xmit_solicit(nce, B_FALSE, 2614 &ipv6_all_zeros, 2615 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2616 NDP_UNICAST); 2617 if (dropped) { 2618 mutex_enter(&nce->nce_lock); 2619 nce->nce_pcnt++; 2620 mutex_exit(&nce->nce_lock); 2621 } 2622 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2623 } else if (nce->nce_pcnt < 0) { 2624 /* No hope, delete the nce */ 2625 nce->nce_state = ND_UNREACHABLE; 2626 mutex_exit(&nce->nce_lock); 2627 if (ip_debug > 2) { 2628 /* ip1dbg */ 2629 pr_addr_dbg("ndp_timer: Delete IRE for" 2630 " dst %s\n", AF_INET6, &nce->nce_addr); 2631 } 2632 ndp_delete(nce); 2633 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2634 /* Wait RetransTimer, before deleting the entry */ 2635 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2636 nce->nce_pcnt, inet_ntop(AF_INET6, 2637 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2638 mutex_exit(&nce->nce_lock); 2639 /* Wait one interval before killing */ 2640 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2641 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2642 ipif_t *ipif; 2643 2644 /* 2645 * We're done probing, and we can now declare this 2646 * address to be usable. Let IP know that it's ok to 2647 * use. 2648 */ 2649 nce->nce_state = ND_REACHABLE; 2650 mutex_exit(&nce->nce_lock); 2651 ipif = ip_ndp_lookup_addr_v6(&nce->nce_addr, 2652 nce->nce_ill); 2653 if (ipif != NULL) { 2654 if (ipif->ipif_was_dup) { 2655 char ibuf[LIFNAMSIZ + 10]; 2656 char sbuf[INET6_ADDRSTRLEN]; 2657 2658 ipif->ipif_was_dup = B_FALSE; 2659 (void) inet_ntop(AF_INET6, 2660 &ipif->ipif_v6lcl_addr, 2661 sbuf, sizeof (sbuf)); 2662 ipif_get_name(ipif, ibuf, 2663 sizeof (ibuf)); 2664 cmn_err(CE_NOTE, "recovered address " 2665 "%s on %s", sbuf, ibuf); 2666 } 2667 if ((ipif->ipif_flags & IPIF_UP) && 2668 !ipif->ipif_addr_ready) 2669 ipif_up_notify(ipif); 2670 ipif->ipif_addr_ready = 1; 2671 ipif_refrele(ipif); 2672 } 2673 /* Begin defending our new address */ 2674 nce->nce_unsolicit_count = 0; 2675 dropped = nce_xmit_advert(nce, B_FALSE, 2676 &ipv6_all_hosts_mcast, 0); 2677 if (dropped) { 2678 nce->nce_unsolicit_count = 1; 2679 NDP_RESTART_TIMER(nce, 2680 ipst->ips_ip_ndp_unsolicit_interval); 2681 } else if (ipst->ips_ip_ndp_defense_interval != 0) { 2682 NDP_RESTART_TIMER(nce, 2683 ipst->ips_ip_ndp_defense_interval); 2684 } 2685 } else { 2686 /* 2687 * This is an address we're probing to be our own, but 2688 * the ill is down. Wait until it comes back before 2689 * doing anything, but switch to reachable state so 2690 * that the restart will work. 2691 */ 2692 nce->nce_state = ND_REACHABLE; 2693 mutex_exit(&nce->nce_lock); 2694 } 2695 NCE_REFRELE(nce); 2696 return; 2697 case ND_INCOMPLETE: { 2698 ip6_t *ip6h; 2699 ip6i_t *ip6i; 2700 mblk_t *mp, *datamp, *nextmp, **prevmpp; 2701 2702 /* 2703 * Per case (2) in the nce_queue_mp() comments, scan nce_qd_mp 2704 * for any IPMP probe packets, and toss 'em. IPMP probe 2705 * packets will always be at the head of nce_qd_mp and always 2706 * have an ip6i_t header, so we can stop at the first queued 2707 * ND packet without an ip6i_t. 2708 */ 2709 prevmpp = &nce->nce_qd_mp; 2710 for (mp = nce->nce_qd_mp; mp != NULL; mp = nextmp) { 2711 nextmp = mp->b_next; 2712 datamp = (DB_TYPE(mp) == M_CTL) ? mp->b_cont : mp; 2713 ip6h = (ip6_t *)datamp->b_rptr; 2714 if (ip6h->ip6_nxt != IPPROTO_RAW) 2715 break; 2716 2717 ip6i = (ip6i_t *)ip6h; 2718 if (ip6i->ip6i_flags & IP6I_IPMP_PROBE) { 2719 inet_freemsg(mp); 2720 *prevmpp = nextmp; 2721 } else { 2722 prevmpp = &mp->b_next; 2723 } 2724 } 2725 2726 /* 2727 * Must be resolver's retransmit timer. 2728 */ 2729 if (nce->nce_qd_mp != NULL) { 2730 if ((ms = nce_solicit(nce, NULL)) == 0) { 2731 if (nce->nce_state != ND_REACHABLE) { 2732 mutex_exit(&nce->nce_lock); 2733 nce_resolv_failed(nce); 2734 ndp_delete(nce); 2735 } else { 2736 mutex_exit(&nce->nce_lock); 2737 } 2738 } else { 2739 mutex_exit(&nce->nce_lock); 2740 NDP_RESTART_TIMER(nce, (clock_t)ms); 2741 } 2742 NCE_REFRELE(nce); 2743 return; 2744 } 2745 mutex_exit(&nce->nce_lock); 2746 NCE_REFRELE(nce); 2747 break; 2748 } 2749 case ND_REACHABLE: 2750 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2751 nce->nce_unsolicit_count != 0) || 2752 ((nce->nce_flags & NCE_F_PERMANENT) && 2753 ipst->ips_ip_ndp_defense_interval != 0)) { 2754 if (nce->nce_unsolicit_count > 0) 2755 nce->nce_unsolicit_count--; 2756 mutex_exit(&nce->nce_lock); 2757 dropped = nce_xmit_advert(nce, B_FALSE, 2758 &ipv6_all_hosts_mcast, 0); 2759 if (dropped) { 2760 mutex_enter(&nce->nce_lock); 2761 nce->nce_unsolicit_count++; 2762 mutex_exit(&nce->nce_lock); 2763 } 2764 if (nce->nce_unsolicit_count != 0) { 2765 NDP_RESTART_TIMER(nce, 2766 ipst->ips_ip_ndp_unsolicit_interval); 2767 } else { 2768 NDP_RESTART_TIMER(nce, 2769 ipst->ips_ip_ndp_defense_interval); 2770 } 2771 } else { 2772 mutex_exit(&nce->nce_lock); 2773 } 2774 NCE_REFRELE(nce); 2775 break; 2776 default: 2777 mutex_exit(&nce->nce_lock); 2778 NCE_REFRELE(nce); 2779 break; 2780 } 2781 } 2782 2783 /* 2784 * Set a link layer address from the ll_addr passed in. 2785 * Copy SAP from ill. 2786 */ 2787 static void 2788 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2789 { 2790 ill_t *ill = nce->nce_ill; 2791 uchar_t *woffset; 2792 2793 ASSERT(ll_addr != NULL); 2794 /* Always called before fast_path_probe */ 2795 ASSERT(nce->nce_fp_mp == NULL); 2796 if (ill->ill_sap_length != 0) { 2797 /* 2798 * Copy the SAP type specified in the 2799 * request into the xmit template. 2800 */ 2801 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2802 } 2803 if (ill->ill_phys_addr_length > 0) { 2804 /* 2805 * The bcopy() below used to be called for the physical address 2806 * length rather than the link layer address length. For 2807 * ethernet and many other media, the phys_addr and lla are 2808 * identical. 2809 * However, with xresolv interfaces being introduced, the 2810 * phys_addr and lla are no longer the same, and the physical 2811 * address may not have any useful meaning, so we use the lla 2812 * for IPv6 address resolution and destination addressing. 2813 * 2814 * For PPP or other interfaces with a zero length 2815 * physical address, don't do anything here. 2816 * The bcopy() with a zero phys_addr length was previously 2817 * a no-op for interfaces with a zero-length physical address. 2818 * Using the lla for them would change the way they operate. 2819 * Doing nothing in such cases preserves expected behavior. 2820 */ 2821 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2822 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2823 } 2824 } 2825 2826 static boolean_t 2827 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2828 { 2829 ill_t *ill = nce->nce_ill; 2830 uchar_t *ll_offset; 2831 2832 ASSERT(nce->nce_res_mp != NULL); 2833 if (ll_addr == NULL) 2834 return (B_FALSE); 2835 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2836 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2837 return (B_TRUE); 2838 return (B_FALSE); 2839 } 2840 2841 /* 2842 * Updates the link layer address or the reachability state of 2843 * a cache entry. Reset probe counter if needed. 2844 */ 2845 static void 2846 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2847 { 2848 ill_t *ill = nce->nce_ill; 2849 boolean_t need_stop_timer = B_FALSE; 2850 boolean_t need_fastpath_update = B_FALSE; 2851 2852 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2853 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2854 /* 2855 * If this interface does not do NUD, there is no point 2856 * in allowing an update to the cache entry. Although 2857 * we will respond to NS. 2858 * The only time we accept an update for a resolver when 2859 * NUD is turned off is when it has just been created. 2860 * Non-Resolvers will always be created as REACHABLE. 2861 */ 2862 if (new_state != ND_UNCHANGED) { 2863 if ((nce->nce_flags & NCE_F_NONUD) && 2864 (nce->nce_state != ND_INCOMPLETE)) 2865 return; 2866 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2867 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2868 need_stop_timer = B_TRUE; 2869 if (new_state == ND_REACHABLE) 2870 nce->nce_last = TICK_TO_MSEC(lbolt64); 2871 else { 2872 /* We force NUD in this case */ 2873 nce->nce_last = 0; 2874 } 2875 nce->nce_state = new_state; 2876 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2877 } 2878 /* 2879 * In case of fast path we need to free the the fastpath 2880 * M_DATA and do another probe. Otherwise we can just 2881 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2882 * whatever packets that happens to be transmitting at the time. 2883 */ 2884 if (new_ll_addr != NULL) { 2885 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2886 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2887 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2888 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2889 if (nce->nce_fp_mp != NULL) { 2890 freemsg(nce->nce_fp_mp); 2891 nce->nce_fp_mp = NULL; 2892 } 2893 need_fastpath_update = B_TRUE; 2894 } 2895 mutex_exit(&nce->nce_lock); 2896 if (need_stop_timer) { 2897 (void) untimeout(nce->nce_timeout_id); 2898 nce->nce_timeout_id = 0; 2899 } 2900 if (need_fastpath_update) 2901 nce_fastpath(nce); 2902 mutex_enter(&nce->nce_lock); 2903 } 2904 2905 void 2906 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2907 { 2908 uint_t count = 0; 2909 mblk_t **mpp, *tmp; 2910 2911 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2912 2913 for (mpp = &nce->nce_qd_mp; *mpp != NULL; mpp = &(*mpp)->b_next) { 2914 if (++count > nce->nce_ill->ill_max_buf) { 2915 tmp = nce->nce_qd_mp->b_next; 2916 nce->nce_qd_mp->b_next = NULL; 2917 nce->nce_qd_mp->b_prev = NULL; 2918 freemsg(nce->nce_qd_mp); 2919 nce->nce_qd_mp = tmp; 2920 } 2921 } 2922 2923 if (head_insert) { 2924 mp->b_next = nce->nce_qd_mp; 2925 nce->nce_qd_mp = mp; 2926 } else { 2927 *mpp = mp; 2928 } 2929 } 2930 2931 static void 2932 nce_queue_mp(nce_t *nce, mblk_t *mp) 2933 { 2934 boolean_t head_insert = B_FALSE; 2935 ip6_t *ip6h; 2936 ip6i_t *ip6i; 2937 mblk_t *data_mp; 2938 2939 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2940 2941 if (mp->b_datap->db_type == M_CTL) 2942 data_mp = mp->b_cont; 2943 else 2944 data_mp = mp; 2945 ip6h = (ip6_t *)data_mp->b_rptr; 2946 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2947 /* 2948 * This message should have been pulled up already in 2949 * ip_wput_v6. We can't do pullups here because the message 2950 * could be from the nce_qd_mp which could have b_next/b_prev 2951 * non-NULL. 2952 */ 2953 ip6i = (ip6i_t *)ip6h; 2954 ASSERT(MBLKL(data_mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN); 2955 2956 /* 2957 * If this packet is marked IP6I_IPMP_PROBE, then we need to: 2958 * 2959 * 1. Insert it at the head of the nce_qd_mp list. Consider 2960 * the normal (non-probe) load-speading case where the 2961 * source address of the ND packet is not tied to nce_ill. 2962 * If the ill bound to the source address cannot receive, 2963 * the response to the ND packet will not be received. 2964 * However, if ND packets for nce_ill's probes are queued 2965 * behind that ND packet, those probes will also fail to 2966 * be sent, and thus in.mpathd will erroneously conclude 2967 * that nce_ill has also failed. 2968 * 2969 * 2. Drop the probe packet in ndp_timer() if the ND did 2970 * not succeed on the first attempt. This ensures that 2971 * ND problems do not manifest as probe RTT spikes. 2972 */ 2973 if (ip6i->ip6i_flags & IP6I_IPMP_PROBE) 2974 head_insert = B_TRUE; 2975 } 2976 nce_queue_mp_common(nce, mp, head_insert); 2977 } 2978 2979 /* 2980 * Called when address resolution failed due to a timeout. 2981 * Send an ICMP unreachable in response to all queued packets. 2982 */ 2983 void 2984 nce_resolv_failed(nce_t *nce) 2985 { 2986 mblk_t *mp, *nxt_mp, *first_mp; 2987 char buf[INET6_ADDRSTRLEN]; 2988 ip6_t *ip6h; 2989 zoneid_t zoneid = GLOBAL_ZONEID; 2990 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 2991 2992 ip1dbg(("nce_resolv_failed: dst %s\n", 2993 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 2994 mutex_enter(&nce->nce_lock); 2995 mp = nce->nce_qd_mp; 2996 nce->nce_qd_mp = NULL; 2997 mutex_exit(&nce->nce_lock); 2998 while (mp != NULL) { 2999 nxt_mp = mp->b_next; 3000 mp->b_next = NULL; 3001 mp->b_prev = NULL; 3002 3003 first_mp = mp; 3004 if (mp->b_datap->db_type == M_CTL) { 3005 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 3006 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3007 zoneid = io->ipsec_out_zoneid; 3008 ASSERT(zoneid != ALL_ZONES); 3009 mp = mp->b_cont; 3010 mp->b_next = NULL; 3011 mp->b_prev = NULL; 3012 } 3013 3014 ip6h = (ip6_t *)mp->b_rptr; 3015 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3016 ip6i_t *ip6i; 3017 /* 3018 * This message should have been pulled up already 3019 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 3020 * the header is pulled up. 3021 */ 3022 ip6i = (ip6i_t *)ip6h; 3023 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 3024 sizeof (ip6i_t) + IPV6_HDR_LEN); 3025 mp->b_rptr += sizeof (ip6i_t); 3026 } 3027 /* 3028 * Ignore failure since icmp_unreachable_v6 will silently 3029 * drop packets with an unspecified source address. 3030 */ 3031 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst); 3032 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 3033 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst); 3034 mp = nxt_mp; 3035 } 3036 } 3037 3038 /* 3039 * Called by SIOCSNDP* ioctl to add/change an nce entry 3040 * and the corresponding attributes. 3041 * Disallow states other than ND_REACHABLE or ND_STALE. 3042 */ 3043 int 3044 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 3045 { 3046 sin6_t *sin6; 3047 in6_addr_t *addr; 3048 nce_t *nce; 3049 int err; 3050 uint16_t new_flags = 0; 3051 uint16_t old_flags = 0; 3052 int inflags = lnr->lnr_flags; 3053 ip_stack_t *ipst = ill->ill_ipst; 3054 3055 ASSERT(ill->ill_isv6); 3056 if ((lnr->lnr_state_create != ND_REACHABLE) && 3057 (lnr->lnr_state_create != ND_STALE)) 3058 return (EINVAL); 3059 3060 if (lnr->lnr_hdw_len > ND_MAX_HDW_LEN) 3061 return (EINVAL); 3062 3063 sin6 = (sin6_t *)&lnr->lnr_addr; 3064 addr = &sin6->sin6_addr; 3065 3066 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 3067 /* We know it can not be mapping so just look in the hash table */ 3068 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 3069 /* See comment in ndp_query() regarding IS_IPMP(ill) usage */ 3070 nce = nce_lookup_addr(ill, IS_IPMP(ill), addr, nce); 3071 if (nce != NULL) 3072 new_flags = nce->nce_flags; 3073 3074 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 3075 case NDF_ISROUTER_ON: 3076 new_flags |= NCE_F_ISROUTER; 3077 break; 3078 case NDF_ISROUTER_OFF: 3079 new_flags &= ~NCE_F_ISROUTER; 3080 break; 3081 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3082 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3083 if (nce != NULL) 3084 NCE_REFRELE(nce); 3085 return (EINVAL); 3086 } 3087 3088 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3089 case NDF_ANYCAST_ON: 3090 new_flags |= NCE_F_ANYCAST; 3091 break; 3092 case NDF_ANYCAST_OFF: 3093 new_flags &= ~NCE_F_ANYCAST; 3094 break; 3095 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3096 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3097 if (nce != NULL) 3098 NCE_REFRELE(nce); 3099 return (EINVAL); 3100 } 3101 3102 if (nce == NULL) { 3103 err = ndp_add_v6(ill, 3104 (uchar_t *)lnr->lnr_hdw_addr, 3105 addr, 3106 &ipv6_all_ones, 3107 &ipv6_all_zeros, 3108 0, 3109 new_flags, 3110 lnr->lnr_state_create, 3111 &nce); 3112 if (err != 0) { 3113 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3114 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3115 return (err); 3116 } 3117 } 3118 old_flags = nce->nce_flags; 3119 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3120 /* 3121 * Router turned to host, delete all ires. 3122 * XXX Just delete the entry, but we need to add too. 3123 */ 3124 nce->nce_flags &= ~NCE_F_ISROUTER; 3125 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3126 ndp_delete(nce); 3127 NCE_REFRELE(nce); 3128 return (0); 3129 } 3130 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3131 3132 mutex_enter(&nce->nce_lock); 3133 nce->nce_flags = new_flags; 3134 mutex_exit(&nce->nce_lock); 3135 /* 3136 * Note that we ignore the state at this point, which 3137 * should be either STALE or REACHABLE. Instead we let 3138 * the link layer address passed in to determine the state 3139 * much like incoming packets. 3140 */ 3141 nce_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3142 NCE_REFRELE(nce); 3143 return (0); 3144 } 3145 3146 /* 3147 * If the device driver supports it, we make nce_fp_mp to have 3148 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3149 * The caller ensures there is hold on nce for this function. 3150 * Note that since ill_fastpath_probe() copies the mblk there is 3151 * no need for the hold beyond this function. 3152 */ 3153 void 3154 nce_fastpath(nce_t *nce) 3155 { 3156 ill_t *ill = nce->nce_ill; 3157 int res; 3158 3159 ASSERT(ill != NULL); 3160 ASSERT(nce->nce_state != ND_INITIAL && nce->nce_state != ND_INCOMPLETE); 3161 3162 if (nce->nce_fp_mp != NULL) { 3163 /* Already contains fastpath info */ 3164 return; 3165 } 3166 if (nce->nce_res_mp != NULL) { 3167 nce_fastpath_list_add(nce); 3168 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3169 /* 3170 * EAGAIN is an indication of a transient error 3171 * i.e. allocation failure etc. leave the nce in the list it 3172 * will be updated when another probe happens for another ire 3173 * if not it will be taken out of the list when the ire is 3174 * deleted. 3175 */ 3176 3177 if (res != 0 && res != EAGAIN) 3178 nce_fastpath_list_delete(nce); 3179 } 3180 } 3181 3182 /* 3183 * Drain the list of nce's waiting for fastpath response. 3184 */ 3185 void 3186 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3187 void *arg) 3188 { 3189 3190 nce_t *next_nce; 3191 nce_t *current_nce; 3192 nce_t *first_nce; 3193 nce_t *prev_nce = NULL; 3194 3195 mutex_enter(&ill->ill_lock); 3196 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3197 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3198 next_nce = current_nce->nce_fastpath; 3199 /* 3200 * Take it off the list if we're flushing, or if the callback 3201 * routine tells us to do so. Otherwise, leave the nce in the 3202 * fastpath list to handle any pending response from the lower 3203 * layer. We can't drain the list when the callback routine 3204 * comparison failed, because the response is asynchronous in 3205 * nature, and may not arrive in the same order as the list 3206 * insertion. 3207 */ 3208 if (func == NULL || func(current_nce, arg)) { 3209 current_nce->nce_fastpath = NULL; 3210 if (current_nce == first_nce) 3211 ill->ill_fastpath_list = first_nce = next_nce; 3212 else 3213 prev_nce->nce_fastpath = next_nce; 3214 } else { 3215 /* previous element that is still in the list */ 3216 prev_nce = current_nce; 3217 } 3218 current_nce = next_nce; 3219 } 3220 mutex_exit(&ill->ill_lock); 3221 } 3222 3223 /* 3224 * Add nce to the nce fastpath list. 3225 */ 3226 void 3227 nce_fastpath_list_add(nce_t *nce) 3228 { 3229 ill_t *ill; 3230 3231 ill = nce->nce_ill; 3232 3233 mutex_enter(&ill->ill_lock); 3234 mutex_enter(&nce->nce_lock); 3235 3236 /* 3237 * if nce has not been deleted and 3238 * is not already in the list add it. 3239 */ 3240 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3241 (nce->nce_fastpath == NULL)) { 3242 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3243 ill->ill_fastpath_list = nce; 3244 } 3245 3246 mutex_exit(&nce->nce_lock); 3247 mutex_exit(&ill->ill_lock); 3248 } 3249 3250 /* 3251 * remove nce from the nce fastpath list. 3252 */ 3253 void 3254 nce_fastpath_list_delete(nce_t *nce) 3255 { 3256 nce_t *nce_ptr; 3257 3258 ill_t *ill; 3259 3260 ill = nce->nce_ill; 3261 ASSERT(ill != NULL); 3262 3263 mutex_enter(&ill->ill_lock); 3264 if (nce->nce_fastpath == NULL) 3265 goto done; 3266 3267 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3268 3269 if (ill->ill_fastpath_list == nce) { 3270 ill->ill_fastpath_list = nce->nce_fastpath; 3271 } else { 3272 nce_ptr = ill->ill_fastpath_list; 3273 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3274 if (nce_ptr->nce_fastpath == nce) { 3275 nce_ptr->nce_fastpath = nce->nce_fastpath; 3276 break; 3277 } 3278 nce_ptr = nce_ptr->nce_fastpath; 3279 } 3280 } 3281 3282 nce->nce_fastpath = NULL; 3283 done: 3284 mutex_exit(&ill->ill_lock); 3285 } 3286 3287 /* 3288 * Update all NCE's that are not in fastpath mode and 3289 * have an nce_fp_mp that matches mp. mp->b_cont contains 3290 * the fastpath header. 3291 * 3292 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3293 */ 3294 boolean_t 3295 ndp_fastpath_update(nce_t *nce, void *arg) 3296 { 3297 mblk_t *mp, *fp_mp; 3298 uchar_t *mp_rptr, *ud_mp_rptr; 3299 mblk_t *ud_mp = nce->nce_res_mp; 3300 ptrdiff_t cmplen; 3301 3302 if (nce->nce_flags & NCE_F_MAPPING) 3303 return (B_TRUE); 3304 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3305 return (B_TRUE); 3306 3307 ip2dbg(("ndp_fastpath_update: trying\n")); 3308 mp = (mblk_t *)arg; 3309 mp_rptr = mp->b_rptr; 3310 cmplen = mp->b_wptr - mp_rptr; 3311 ASSERT(cmplen >= 0); 3312 ud_mp_rptr = ud_mp->b_rptr; 3313 /* 3314 * The nce is locked here to prevent any other threads 3315 * from accessing and changing nce_res_mp when the IPv6 address 3316 * becomes resolved to an lla while we're in the middle 3317 * of looking at and comparing the hardware address (lla). 3318 * It is also locked to prevent multiple threads in nce_fastpath_update 3319 * from examining nce_res_mp atthe same time. 3320 */ 3321 mutex_enter(&nce->nce_lock); 3322 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3323 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3324 mutex_exit(&nce->nce_lock); 3325 /* 3326 * Don't take the ire off the fastpath list yet, 3327 * since the response may come later. 3328 */ 3329 return (B_FALSE); 3330 } 3331 /* Matched - install mp as the fastpath mp */ 3332 ip1dbg(("ndp_fastpath_update: match\n")); 3333 fp_mp = dupb(mp->b_cont); 3334 if (fp_mp != NULL) { 3335 nce->nce_fp_mp = fp_mp; 3336 } 3337 mutex_exit(&nce->nce_lock); 3338 return (B_TRUE); 3339 } 3340 3341 /* 3342 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3343 * driver. Note that it assumes IP is exclusive... 3344 */ 3345 /* ARGSUSED */ 3346 void 3347 ndp_fastpath_flush(nce_t *nce, char *arg) 3348 { 3349 if (nce->nce_flags & NCE_F_MAPPING) 3350 return; 3351 /* No fastpath info? */ 3352 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3353 return; 3354 3355 if (nce->nce_ipversion == IPV4_VERSION && 3356 nce->nce_flags & NCE_F_BCAST) { 3357 /* 3358 * IPv4 BROADCAST entries: 3359 * We can't delete the nce since it is difficult to 3360 * recreate these without going through the 3361 * ipif down/up dance. 3362 * 3363 * All access to nce->nce_fp_mp in the case of these 3364 * is protected by nce_lock. 3365 */ 3366 mutex_enter(&nce->nce_lock); 3367 if (nce->nce_fp_mp != NULL) { 3368 freeb(nce->nce_fp_mp); 3369 nce->nce_fp_mp = NULL; 3370 mutex_exit(&nce->nce_lock); 3371 nce_fastpath(nce); 3372 } else { 3373 mutex_exit(&nce->nce_lock); 3374 } 3375 } else { 3376 /* Just delete the NCE... */ 3377 ndp_delete(nce); 3378 } 3379 } 3380 3381 /* 3382 * Return a pointer to a given option in the packet. 3383 * Assumes that option part of the packet have already been validated. 3384 */ 3385 nd_opt_hdr_t * 3386 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3387 { 3388 while (optlen > 0) { 3389 if (opt->nd_opt_type == opt_type) 3390 return (opt); 3391 optlen -= 8 * opt->nd_opt_len; 3392 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3393 } 3394 return (NULL); 3395 } 3396 3397 /* 3398 * Verify all option lengths present are > 0, also check to see 3399 * if the option lengths and packet length are consistent. 3400 */ 3401 boolean_t 3402 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3403 { 3404 ASSERT(opt != NULL); 3405 while (optlen > 0) { 3406 if (opt->nd_opt_len == 0) 3407 return (B_FALSE); 3408 optlen -= 8 * opt->nd_opt_len; 3409 if (optlen < 0) 3410 return (B_FALSE); 3411 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3412 } 3413 return (B_TRUE); 3414 } 3415 3416 /* 3417 * ndp_walk function. 3418 * Free a fraction of the NCE cache entries. 3419 * A fraction of zero means to not free any in that category. 3420 */ 3421 void 3422 ndp_cache_reclaim(nce_t *nce, char *arg) 3423 { 3424 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3425 uint_t rand; 3426 3427 if (nce->nce_flags & NCE_F_PERMANENT) 3428 return; 3429 3430 rand = (uint_t)lbolt + 3431 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3432 if (ncr->ncr_host != 0 && 3433 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3434 ndp_delete(nce); 3435 return; 3436 } 3437 } 3438 3439 /* 3440 * ndp_walk function. 3441 * Count the number of NCEs that can be deleted. 3442 * These would be hosts but not routers. 3443 */ 3444 void 3445 ndp_cache_count(nce_t *nce, char *arg) 3446 { 3447 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3448 3449 if (nce->nce_flags & NCE_F_PERMANENT) 3450 return; 3451 3452 ncc->ncc_total++; 3453 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3454 ncc->ncc_host++; 3455 } 3456 3457 #ifdef DEBUG 3458 void 3459 nce_trace_ref(nce_t *nce) 3460 { 3461 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3462 3463 if (nce->nce_trace_disable) 3464 return; 3465 3466 if (!th_trace_ref(nce, nce->nce_ill->ill_ipst)) { 3467 nce->nce_trace_disable = B_TRUE; 3468 nce_trace_cleanup(nce); 3469 } 3470 } 3471 3472 void 3473 nce_untrace_ref(nce_t *nce) 3474 { 3475 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3476 3477 if (!nce->nce_trace_disable) 3478 th_trace_unref(nce); 3479 } 3480 3481 static void 3482 nce_trace_cleanup(const nce_t *nce) 3483 { 3484 th_trace_cleanup(nce, nce->nce_trace_disable); 3485 } 3486 #endif 3487 3488 /* 3489 * Called when address resolution fails due to a timeout. 3490 * Send an ICMP unreachable in response to all queued packets. 3491 */ 3492 void 3493 arp_resolv_failed(nce_t *nce) 3494 { 3495 mblk_t *mp, *nxt_mp, *first_mp; 3496 char buf[INET6_ADDRSTRLEN]; 3497 zoneid_t zoneid = GLOBAL_ZONEID; 3498 struct in_addr ipv4addr; 3499 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3500 3501 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3502 ip3dbg(("arp_resolv_failed: dst %s\n", 3503 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3504 mutex_enter(&nce->nce_lock); 3505 mp = nce->nce_qd_mp; 3506 nce->nce_qd_mp = NULL; 3507 mutex_exit(&nce->nce_lock); 3508 3509 while (mp != NULL) { 3510 nxt_mp = mp->b_next; 3511 mp->b_next = NULL; 3512 mp->b_prev = NULL; 3513 3514 first_mp = mp; 3515 /* 3516 * Send icmp unreachable messages 3517 * to the hosts. 3518 */ 3519 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst); 3520 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3521 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3522 ICMP_HOST_UNREACHABLE, zoneid, ipst); 3523 mp = nxt_mp; 3524 } 3525 } 3526 3527 int 3528 ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3529 nce_t **newnce, nce_t *src_nce) 3530 { 3531 int err; 3532 nce_t *nce; 3533 in6_addr_t addr6; 3534 ip_stack_t *ipst = ill->ill_ipst; 3535 3536 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3537 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3538 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3539 /* 3540 * NOTE: IPv4 never matches across the illgrp since the NCE's we're 3541 * looking up have fastpath headers that are inherently per-ill. 3542 */ 3543 nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce); 3544 if (nce == NULL) { 3545 err = ndp_add_v4(ill, addr, flags, newnce, src_nce); 3546 } else { 3547 *newnce = nce; 3548 err = EEXIST; 3549 } 3550 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3551 return (err); 3552 } 3553 3554 /* 3555 * NDP Cache Entry creation routine for IPv4. 3556 * Mapped entries are handled in arp. 3557 * This routine must always be called with ndp4->ndp_g_lock held. 3558 * Prior to return, nce_refcnt is incremented. 3559 */ 3560 static int 3561 ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3562 nce_t **newnce, nce_t *src_nce) 3563 { 3564 static nce_t nce_nil; 3565 nce_t *nce; 3566 mblk_t *mp; 3567 mblk_t *template = NULL; 3568 nce_t **ncep; 3569 ip_stack_t *ipst = ill->ill_ipst; 3570 uint16_t state = ND_INITIAL; 3571 int err; 3572 3573 ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock)); 3574 ASSERT(!ill->ill_isv6); 3575 ASSERT((flags & NCE_F_MAPPING) == 0); 3576 3577 if (ill->ill_resolver_mp == NULL) 3578 return (EINVAL); 3579 /* 3580 * Allocate the mblk to hold the nce. 3581 */ 3582 mp = allocb(sizeof (nce_t), BPRI_MED); 3583 if (mp == NULL) 3584 return (ENOMEM); 3585 3586 nce = (nce_t *)mp->b_rptr; 3587 mp->b_wptr = (uchar_t *)&nce[1]; 3588 *nce = nce_nil; 3589 nce->nce_ill = ill; 3590 nce->nce_ipversion = IPV4_VERSION; 3591 nce->nce_flags = flags; 3592 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3593 nce->nce_rcnt = ill->ill_xmit_count; 3594 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3595 nce->nce_mask = ipv6_all_ones; 3596 nce->nce_extract_mask = ipv6_all_zeros; 3597 nce->nce_ll_extract_start = 0; 3598 nce->nce_qd_mp = NULL; 3599 nce->nce_mp = mp; 3600 /* This one is for nce getting created */ 3601 nce->nce_refcnt = 1; 3602 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3603 ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3604 3605 nce->nce_trace_disable = B_FALSE; 3606 3607 if (src_nce != NULL) { 3608 /* 3609 * src_nce has been provided by the caller. The only 3610 * caller who provides a non-null, non-broadcast 3611 * src_nce is from ip_newroute() which must pass in 3612 * a ND_REACHABLE src_nce (this condition is verified 3613 * via an ASSERT for the save_ire->ire_nce in ip_newroute()) 3614 */ 3615 mutex_enter(&src_nce->nce_lock); 3616 state = src_nce->nce_state; 3617 if ((src_nce->nce_flags & NCE_F_CONDEMNED) || 3618 (ipst->ips_ndp4->ndp_g_hw_change > 0)) { 3619 /* 3620 * src_nce has been deleted, or 3621 * ip_arp_news is in the middle of 3622 * flushing entries in the the nce. 3623 * Fail the add, since we don't know 3624 * if it is safe to copy the contents of 3625 * src_nce 3626 */ 3627 DTRACE_PROBE2(nce__bad__src__nce, 3628 nce_t *, src_nce, ill_t *, ill); 3629 mutex_exit(&src_nce->nce_lock); 3630 err = EINVAL; 3631 goto err_ret; 3632 } 3633 template = copyb(src_nce->nce_res_mp); 3634 mutex_exit(&src_nce->nce_lock); 3635 if (template == NULL) { 3636 err = ENOMEM; 3637 goto err_ret; 3638 } 3639 } else if (flags & NCE_F_BCAST) { 3640 /* 3641 * broadcast nce. 3642 */ 3643 template = copyb(ill->ill_bcast_mp); 3644 if (template == NULL) { 3645 err = ENOMEM; 3646 goto err_ret; 3647 } 3648 state = ND_REACHABLE; 3649 } else if (ill->ill_net_type == IRE_IF_NORESOLVER) { 3650 /* 3651 * NORESOLVER entries are always created in the REACHABLE 3652 * state. We create a nce_res_mp with the IP nexthop address 3653 * in the destination address in the DLPI hdr if the 3654 * physical length is exactly 4 bytes. 3655 * 3656 * XXX not clear which drivers set ill_phys_addr_length to 3657 * IP_ADDR_LEN. 3658 */ 3659 if (ill->ill_phys_addr_length == IP_ADDR_LEN) { 3660 template = ill_dlur_gen((uchar_t *)addr, 3661 ill->ill_phys_addr_length, 3662 ill->ill_sap, ill->ill_sap_length); 3663 } else { 3664 template = copyb(ill->ill_resolver_mp); 3665 } 3666 if (template == NULL) { 3667 err = ENOMEM; 3668 goto err_ret; 3669 } 3670 state = ND_REACHABLE; 3671 } 3672 nce->nce_fp_mp = NULL; 3673 nce->nce_res_mp = template; 3674 nce->nce_state = state; 3675 if (state == ND_REACHABLE) { 3676 nce->nce_last = TICK_TO_MSEC(lbolt64); 3677 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3678 } else { 3679 nce->nce_last = 0; 3680 if (state == ND_INITIAL) 3681 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3682 } 3683 3684 ASSERT((nce->nce_res_mp == NULL && nce->nce_state == ND_INITIAL) || 3685 (nce->nce_res_mp != NULL && nce->nce_state == ND_REACHABLE)); 3686 /* 3687 * Atomically ensure that the ill is not CONDEMNED, before 3688 * adding the NCE. 3689 */ 3690 mutex_enter(&ill->ill_lock); 3691 if (ill->ill_state_flags & ILL_CONDEMNED) { 3692 mutex_exit(&ill->ill_lock); 3693 err = EINVAL; 3694 goto err_ret; 3695 } 3696 if ((nce->nce_next = *ncep) != NULL) 3697 nce->nce_next->nce_ptpn = &nce->nce_next; 3698 *ncep = nce; 3699 nce->nce_ptpn = ncep; 3700 *newnce = nce; 3701 /* This one is for nce being used by an active thread */ 3702 NCE_REFHOLD(*newnce); 3703 3704 /* Bump up the number of nce's referencing this ill */ 3705 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 3706 (char *), "nce", (void *), nce); 3707 ill->ill_nce_cnt++; 3708 mutex_exit(&ill->ill_lock); 3709 DTRACE_PROBE1(ndp__add__v4, nce_t *, nce); 3710 return (0); 3711 err_ret: 3712 freeb(mp); 3713 freemsg(template); 3714 return (err); 3715 } 3716 3717 /* 3718 * ndp_walk routine to delete all entries that have a given destination or 3719 * gateway address and cached link layer (MAC) address. This is used when ARP 3720 * informs us that a network-to-link-layer mapping may have changed. 3721 */ 3722 void 3723 nce_delete_hw_changed(nce_t *nce, void *arg) 3724 { 3725 nce_hw_map_t *hwm = arg; 3726 mblk_t *mp; 3727 dl_unitdata_req_t *dlu; 3728 uchar_t *macaddr; 3729 ill_t *ill; 3730 int saplen; 3731 ipaddr_t nce_addr; 3732 3733 if (nce->nce_state != ND_REACHABLE) 3734 return; 3735 3736 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3737 if (nce_addr != hwm->hwm_addr) 3738 return; 3739 3740 mutex_enter(&nce->nce_lock); 3741 if ((mp = nce->nce_res_mp) == NULL) { 3742 mutex_exit(&nce->nce_lock); 3743 return; 3744 } 3745 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3746 macaddr = (uchar_t *)(dlu + 1); 3747 ill = nce->nce_ill; 3748 if ((saplen = ill->ill_sap_length) > 0) 3749 macaddr += saplen; 3750 else 3751 saplen = -saplen; 3752 3753 /* 3754 * If the hardware address is unchanged, then leave this one alone. 3755 * Note that saplen == abs(saplen) now. 3756 */ 3757 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 3758 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 3759 mutex_exit(&nce->nce_lock); 3760 return; 3761 } 3762 mutex_exit(&nce->nce_lock); 3763 3764 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 3765 ndp_delete(nce); 3766 } 3767 3768 /* 3769 * This function verifies whether a given IPv4 address is potentially known to 3770 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 3771 * so that it can continue to look for hardware changes on that address. 3772 */ 3773 boolean_t 3774 ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns) 3775 { 3776 nce_t *nce; 3777 struct in_addr nceaddr; 3778 ip_stack_t *ipst = ns->netstack_ip; 3779 3780 if (addr == INADDR_ANY) 3781 return (B_FALSE); 3782 3783 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3784 nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr); 3785 for (; nce != NULL; nce = nce->nce_next) { 3786 /* Note that only v4 mapped entries are in the table. */ 3787 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 3788 if (addr == nceaddr.s_addr && 3789 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 3790 /* Single flag check; no lock needed */ 3791 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 3792 break; 3793 } 3794 } 3795 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3796 return (nce != NULL); 3797 } 3798 3799 /* 3800 * Wrapper around ipif_lookup_addr_exact_v6() that allows ND to work properly 3801 * with IPMP. Specifically, since neighbor discovery is always done on 3802 * underlying interfaces (even for addresses owned by an IPMP interface), we 3803 * need to check for `v6addrp' on both `ill' and on the IPMP meta-interface 3804 * associated with `ill' (if it exists). 3805 */ 3806 static ipif_t * 3807 ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill) 3808 { 3809 ipif_t *ipif; 3810 ip_stack_t *ipst = ill->ill_ipst; 3811 3812 ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst); 3813 if (ipif == NULL && IS_UNDER_IPMP(ill)) { 3814 if ((ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) { 3815 ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst); 3816 ill_refrele(ill); 3817 } 3818 } 3819 return (ipif); 3820 } 3821