1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/stropts.h> 29 #include <sys/strsun.h> 30 #include <sys/sysmacros.h> 31 #include <sys/errno.h> 32 #include <sys/dlpi.h> 33 #include <sys/socket.h> 34 #include <sys/ddi.h> 35 #include <sys/sunddi.h> 36 #include <sys/cmn_err.h> 37 #include <sys/debug.h> 38 #include <sys/vtrace.h> 39 #include <sys/kmem.h> 40 #include <sys/zone.h> 41 #include <sys/ethernet.h> 42 #include <sys/sdt.h> 43 44 #include <net/if.h> 45 #include <net/if_types.h> 46 #include <net/if_dl.h> 47 #include <net/route.h> 48 #include <netinet/in.h> 49 #include <netinet/ip6.h> 50 #include <netinet/icmp6.h> 51 52 #include <inet/common.h> 53 #include <inet/mi.h> 54 #include <inet/mib2.h> 55 #include <inet/nd.h> 56 #include <inet/ip.h> 57 #include <inet/ip_impl.h> 58 #include <inet/ipclassifier.h> 59 #include <inet/ip_if.h> 60 #include <inet/ip_ire.h> 61 #include <inet/ip_rts.h> 62 #include <inet/ip6.h> 63 #include <inet/ip_ndp.h> 64 #include <inet/ipsec_impl.h> 65 #include <inet/ipsec_info.h> 66 #include <inet/sctp_ip.h> 67 68 /* 69 * Function names with nce_ prefix are static while function 70 * names with ndp_ prefix are used by rest of the IP. 71 * 72 * Lock ordering: 73 * 74 * ndp_g_lock -> ill_lock -> nce_lock 75 * 76 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 77 * nce_next. Nce_lock protects the contents of the NCE (particularly 78 * nce_refcnt). 79 */ 80 81 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 82 uint32_t ll_addr_len); 83 static void nce_ire_delete(nce_t *nce); 84 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 85 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 86 static nce_t *nce_lookup_addr(ill_t *, boolean_t, const in6_addr_t *, 87 nce_t *); 88 static nce_t *nce_lookup_mapping(ill_t *, const in6_addr_t *); 89 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 90 uchar_t *addr); 91 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 92 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 93 static mblk_t *nce_udreq_alloc(ill_t *ill); 94 static void nce_update(nce_t *nce, uint16_t new_state, 95 uchar_t *new_ll_addr); 96 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 97 static boolean_t nce_xmit(ill_t *ill, uint8_t type, 98 boolean_t use_lla_addr, const in6_addr_t *sender, 99 const in6_addr_t *target, int flag); 100 static boolean_t nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, 101 const in6_addr_t *target, uint_t flags); 102 static boolean_t nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, 103 const in6_addr_t *src, uint_t flags); 104 static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t, 105 nce_t **, nce_t *); 106 static ipif_t *ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill); 107 108 #ifdef DEBUG 109 static void nce_trace_cleanup(const nce_t *); 110 #endif 111 112 #define NCE_HASH_PTR_V4(ipst, addr) \ 113 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 114 115 #define NCE_HASH_PTR_V6(ipst, addr) \ 116 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ 117 NCE_TABLE_SIZE)])) 118 119 /* Non-tunable probe interval, based on link capabilities */ 120 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 121 122 /* 123 * NDP Cache Entry creation routine. 124 * Mapped entries will never do NUD . 125 * This routine must always be called with ndp6->ndp_g_lock held. 126 * Prior to return, nce_refcnt is incremented. 127 */ 128 int 129 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 130 const in6_addr_t *mask, const in6_addr_t *extract_mask, 131 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 132 nce_t **newnce) 133 { 134 static nce_t nce_nil; 135 nce_t *nce; 136 mblk_t *mp; 137 mblk_t *template; 138 nce_t **ncep; 139 int err; 140 boolean_t dropped = B_FALSE; 141 ip_stack_t *ipst = ill->ill_ipst; 142 143 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 144 ASSERT(ill != NULL && ill->ill_isv6); 145 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 146 ip0dbg(("ndp_add_v6: no addr\n")); 147 return (EINVAL); 148 } 149 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 150 ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags)); 151 return (EINVAL); 152 } 153 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 154 (flags & NCE_F_MAPPING)) { 155 ip0dbg(("ndp_add_v6: extract mask zero for mapping")); 156 return (EINVAL); 157 } 158 /* 159 * Allocate the mblk to hold the nce. 160 * 161 * XXX This can come out of a separate cache - nce_cache. 162 * We don't need the mp anymore as there are no more 163 * "qwriter"s 164 */ 165 mp = allocb(sizeof (nce_t), BPRI_MED); 166 if (mp == NULL) 167 return (ENOMEM); 168 169 nce = (nce_t *)mp->b_rptr; 170 mp->b_wptr = (uchar_t *)&nce[1]; 171 *nce = nce_nil; 172 173 /* 174 * This one holds link layer address 175 */ 176 if (ill->ill_net_type == IRE_IF_RESOLVER) { 177 template = nce_udreq_alloc(ill); 178 } else { 179 if (ill->ill_resolver_mp == NULL) { 180 freeb(mp); 181 return (EINVAL); 182 } 183 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 184 template = copyb(ill->ill_resolver_mp); 185 } 186 if (template == NULL) { 187 freeb(mp); 188 return (ENOMEM); 189 } 190 nce->nce_ill = ill; 191 nce->nce_ipversion = IPV6_VERSION; 192 nce->nce_flags = flags; 193 nce->nce_state = state; 194 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 195 nce->nce_rcnt = ill->ill_xmit_count; 196 nce->nce_addr = *addr; 197 nce->nce_mask = *mask; 198 nce->nce_extract_mask = *extract_mask; 199 nce->nce_ll_extract_start = hw_extract_start; 200 nce->nce_fp_mp = NULL; 201 nce->nce_res_mp = template; 202 if (state == ND_REACHABLE) 203 nce->nce_last = TICK_TO_MSEC(lbolt64); 204 else 205 nce->nce_last = 0; 206 nce->nce_qd_mp = NULL; 207 nce->nce_mp = mp; 208 if (hw_addr != NULL) 209 nce_set_ll(nce, hw_addr); 210 /* This one is for nce getting created */ 211 nce->nce_refcnt = 1; 212 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 213 if (nce->nce_flags & NCE_F_MAPPING) { 214 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 215 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 216 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 217 ncep = &ipst->ips_ndp6->nce_mask_entries; 218 } else { 219 ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 220 } 221 222 nce->nce_trace_disable = B_FALSE; 223 224 /* 225 * Atomically ensure that the ill is not CONDEMNED, before 226 * adding the NCE. 227 */ 228 mutex_enter(&ill->ill_lock); 229 if (ill->ill_state_flags & ILL_CONDEMNED) { 230 mutex_exit(&ill->ill_lock); 231 freeb(mp); 232 freeb(template); 233 return (EINVAL); 234 } 235 if ((nce->nce_next = *ncep) != NULL) 236 nce->nce_next->nce_ptpn = &nce->nce_next; 237 *ncep = nce; 238 nce->nce_ptpn = ncep; 239 *newnce = nce; 240 /* This one is for nce being used by an active thread */ 241 NCE_REFHOLD(*newnce); 242 243 /* Bump up the number of nce's referencing this ill */ 244 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 245 (char *), "nce", (void *), nce); 246 ill->ill_nce_cnt++; 247 mutex_exit(&ill->ill_lock); 248 249 err = 0; 250 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 251 mutex_enter(&nce->nce_lock); 252 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 253 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 254 mutex_exit(&nce->nce_lock); 255 dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE); 256 if (dropped) { 257 mutex_enter(&nce->nce_lock); 258 nce->nce_pcnt++; 259 mutex_exit(&nce->nce_lock); 260 } 261 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 262 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 263 err = EINPROGRESS; 264 } else if (flags & NCE_F_UNSOL_ADV) { 265 /* 266 * We account for the transmit below by assigning one 267 * less than the ndd variable. Subsequent decrements 268 * are done in ndp_timer. 269 */ 270 mutex_enter(&nce->nce_lock); 271 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 272 nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1; 273 mutex_exit(&nce->nce_lock); 274 dropped = nce_xmit_advert(nce, B_TRUE, &ipv6_all_hosts_mcast, 275 0); 276 mutex_enter(&nce->nce_lock); 277 if (dropped) 278 nce->nce_unsolicit_count++; 279 if (nce->nce_unsolicit_count != 0) { 280 ASSERT(nce->nce_timeout_id == 0); 281 nce->nce_timeout_id = timeout(ndp_timer, nce, 282 MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval)); 283 } 284 mutex_exit(&nce->nce_lock); 285 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 286 } 287 288 /* 289 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 290 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 291 * We call nce_fastpath from nce_update if the link layer address of 292 * the peer changes from nce_update 293 */ 294 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 295 nce_fastpath(nce); 296 return (err); 297 } 298 299 int 300 ndp_lookup_then_add_v6(ill_t *ill, boolean_t match_illgrp, uchar_t *hw_addr, 301 const in6_addr_t *addr, const in6_addr_t *mask, 302 const in6_addr_t *extract_mask, uint32_t hw_extract_start, uint16_t flags, 303 uint16_t state, nce_t **newnce) 304 { 305 int err = 0; 306 nce_t *nce; 307 ip_stack_t *ipst = ill->ill_ipst; 308 309 ASSERT(ill->ill_isv6); 310 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 311 312 /* Get head of v6 hash table */ 313 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 314 nce = nce_lookup_addr(ill, match_illgrp, addr, nce); 315 if (nce == NULL) { 316 err = ndp_add_v6(ill, 317 hw_addr, 318 addr, 319 mask, 320 extract_mask, 321 hw_extract_start, 322 flags, 323 state, 324 newnce); 325 } else { 326 *newnce = nce; 327 err = EEXIST; 328 } 329 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 330 return (err); 331 } 332 333 /* 334 * Remove all the CONDEMNED nces from the appropriate hash table. 335 * We create a private list of NCEs, these may have ires pointing 336 * to them, so the list will be passed through to clean up dependent 337 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 338 */ 339 static void 340 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 341 { 342 nce_t *nce1; 343 nce_t **ptpn; 344 345 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 346 ASSERT(ndp->ndp_g_walker == 0); 347 for (; nce; nce = nce1) { 348 nce1 = nce->nce_next; 349 mutex_enter(&nce->nce_lock); 350 if (nce->nce_flags & NCE_F_CONDEMNED) { 351 ptpn = nce->nce_ptpn; 352 nce1 = nce->nce_next; 353 if (nce1 != NULL) 354 nce1->nce_ptpn = ptpn; 355 *ptpn = nce1; 356 nce->nce_ptpn = NULL; 357 nce->nce_next = NULL; 358 nce->nce_next = *free_nce_list; 359 *free_nce_list = nce; 360 } 361 mutex_exit(&nce->nce_lock); 362 } 363 } 364 365 /* 366 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 367 * will return this NCE. Also no new IREs will be created that 368 * point to this NCE (See ire_add_v6). Also no new timeouts will 369 * be started (See NDP_RESTART_TIMER). 370 * 2. Cancel any currently running timeouts. 371 * 3. If there is an ndp walker, return. The walker will do the cleanup. 372 * This ensures that walkers see a consistent list of NCEs while walking. 373 * 4. Otherwise remove the NCE from the list of NCEs 374 * 5. Delete all IREs pointing to this NCE. 375 */ 376 void 377 ndp_delete(nce_t *nce) 378 { 379 nce_t **ptpn; 380 nce_t *nce1; 381 int ipversion = nce->nce_ipversion; 382 ndp_g_t *ndp; 383 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 384 385 if (ipversion == IPV4_VERSION) 386 ndp = ipst->ips_ndp4; 387 else 388 ndp = ipst->ips_ndp6; 389 390 /* Serialize deletes */ 391 mutex_enter(&nce->nce_lock); 392 if (nce->nce_flags & NCE_F_CONDEMNED) { 393 /* Some other thread is doing the delete */ 394 mutex_exit(&nce->nce_lock); 395 return; 396 } 397 /* 398 * Caller has a refhold. Also 1 ref for being in the list. Thus 399 * refcnt has to be >= 2 400 */ 401 ASSERT(nce->nce_refcnt >= 2); 402 nce->nce_flags |= NCE_F_CONDEMNED; 403 mutex_exit(&nce->nce_lock); 404 405 nce_fastpath_list_delete(nce); 406 407 /* 408 * Cancel any running timer. Timeout can't be restarted 409 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 410 * Passing invalid timeout id is fine. 411 */ 412 if (nce->nce_timeout_id != 0) { 413 (void) untimeout(nce->nce_timeout_id); 414 nce->nce_timeout_id = 0; 415 } 416 417 mutex_enter(&ndp->ndp_g_lock); 418 if (nce->nce_ptpn == NULL) { 419 /* 420 * The last ndp walker has already removed this nce from 421 * the list after we marked the nce CONDEMNED and before 422 * we grabbed the global lock. 423 */ 424 mutex_exit(&ndp->ndp_g_lock); 425 return; 426 } 427 if (ndp->ndp_g_walker > 0) { 428 /* 429 * Can't unlink. The walker will clean up 430 */ 431 ndp->ndp_g_walker_cleanup = B_TRUE; 432 mutex_exit(&ndp->ndp_g_lock); 433 return; 434 } 435 436 /* 437 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 438 * the timer since it is marked CONDEMNED. 439 */ 440 ptpn = nce->nce_ptpn; 441 nce1 = nce->nce_next; 442 if (nce1 != NULL) 443 nce1->nce_ptpn = ptpn; 444 *ptpn = nce1; 445 nce->nce_ptpn = NULL; 446 nce->nce_next = NULL; 447 mutex_exit(&ndp->ndp_g_lock); 448 449 nce_ire_delete(nce); 450 } 451 452 void 453 ndp_inactive(nce_t *nce) 454 { 455 mblk_t **mpp; 456 ill_t *ill; 457 458 ASSERT(nce->nce_refcnt == 0); 459 ASSERT(MUTEX_HELD(&nce->nce_lock)); 460 ASSERT(nce->nce_fastpath == NULL); 461 462 /* Free all nce allocated messages */ 463 mpp = &nce->nce_first_mp_to_free; 464 do { 465 while (*mpp != NULL) { 466 mblk_t *mp; 467 468 mp = *mpp; 469 *mpp = mp->b_next; 470 471 inet_freemsg(mp); 472 } 473 } while (mpp++ != &nce->nce_last_mp_to_free); 474 475 #ifdef DEBUG 476 nce_trace_cleanup(nce); 477 #endif 478 479 ill = nce->nce_ill; 480 mutex_enter(&ill->ill_lock); 481 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 482 (char *), "nce", (void *), nce); 483 ill->ill_nce_cnt--; 484 /* 485 * If the number of nce's associated with this ill have dropped 486 * to zero, check whether we need to restart any operation that 487 * is waiting for this to happen. 488 */ 489 if (ILL_DOWN_OK(ill)) { 490 /* ipif_ill_refrele_tail drops the ill_lock */ 491 ipif_ill_refrele_tail(ill); 492 } else { 493 mutex_exit(&ill->ill_lock); 494 } 495 mutex_destroy(&nce->nce_lock); 496 if (nce->nce_mp != NULL) 497 inet_freemsg(nce->nce_mp); 498 } 499 500 /* 501 * ndp_walk routine. Delete the nce if it is associated with the ill 502 * that is going away. Always called as a writer. 503 */ 504 void 505 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 506 { 507 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 508 ndp_delete(nce); 509 } 510 } 511 512 /* 513 * Walk a list of to be inactive NCEs and blow away all the ires. 514 */ 515 static void 516 nce_ire_delete_list(nce_t *nce) 517 { 518 nce_t *nce_next; 519 520 ASSERT(nce != NULL); 521 while (nce != NULL) { 522 nce_next = nce->nce_next; 523 nce->nce_next = NULL; 524 525 /* 526 * It is possible for the last ndp walker (this thread) 527 * to come here after ndp_delete has marked the nce CONDEMNED 528 * and before it has removed the nce from the fastpath list 529 * or called untimeout. So we need to do it here. It is safe 530 * for both ndp_delete and this thread to do it twice or 531 * even simultaneously since each of the threads has a 532 * reference on the nce. 533 */ 534 nce_fastpath_list_delete(nce); 535 /* 536 * Cancel any running timer. Timeout can't be restarted 537 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 538 * Passing invalid timeout id is fine. 539 */ 540 if (nce->nce_timeout_id != 0) { 541 (void) untimeout(nce->nce_timeout_id); 542 nce->nce_timeout_id = 0; 543 } 544 /* 545 * We might hit this func thus in the v4 case: 546 * ipif_down->ipif_ndp_down->ndp_walk 547 */ 548 549 if (nce->nce_ipversion == IPV4_VERSION) { 550 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 551 IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill); 552 } else { 553 ASSERT(nce->nce_ipversion == IPV6_VERSION); 554 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 555 IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill); 556 } 557 NCE_REFRELE_NOTR(nce); 558 nce = nce_next; 559 } 560 } 561 562 /* 563 * Delete an ire when the nce goes away. 564 */ 565 /* ARGSUSED */ 566 static void 567 nce_ire_delete(nce_t *nce) 568 { 569 if (nce->nce_ipversion == IPV6_VERSION) { 570 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 571 nce_ire_delete1, (char *)nce, nce->nce_ill); 572 NCE_REFRELE_NOTR(nce); 573 } else { 574 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 575 nce_ire_delete1, (char *)nce, nce->nce_ill); 576 NCE_REFRELE_NOTR(nce); 577 } 578 } 579 580 /* 581 * ire_walk routine used to delete every IRE that shares this nce 582 */ 583 static void 584 nce_ire_delete1(ire_t *ire, char *nce_arg) 585 { 586 nce_t *nce = (nce_t *)nce_arg; 587 588 ASSERT(ire->ire_type == IRE_CACHE); 589 590 if (ire->ire_nce == nce) { 591 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 592 ire_delete(ire); 593 } 594 } 595 596 /* 597 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 598 */ 599 boolean_t 600 ndp_restart_dad(nce_t *nce) 601 { 602 boolean_t started; 603 boolean_t dropped; 604 605 if (nce == NULL) 606 return (B_FALSE); 607 mutex_enter(&nce->nce_lock); 608 if (nce->nce_state == ND_PROBE) { 609 mutex_exit(&nce->nce_lock); 610 started = B_TRUE; 611 } else if (nce->nce_state == ND_REACHABLE) { 612 nce->nce_state = ND_PROBE; 613 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 614 mutex_exit(&nce->nce_lock); 615 dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE); 616 if (dropped) { 617 mutex_enter(&nce->nce_lock); 618 nce->nce_pcnt++; 619 mutex_exit(&nce->nce_lock); 620 } 621 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 622 started = B_TRUE; 623 } else { 624 mutex_exit(&nce->nce_lock); 625 started = B_FALSE; 626 } 627 return (started); 628 } 629 630 /* 631 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 632 * If one is found, the refcnt on the nce will be incremented. 633 */ 634 nce_t * 635 ndp_lookup_v6(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr, 636 boolean_t caller_holds_lock) 637 { 638 nce_t *nce; 639 ip_stack_t *ipst = ill->ill_ipst; 640 641 ASSERT(ill->ill_isv6); 642 if (!caller_holds_lock) 643 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 644 645 /* Get head of v6 hash table */ 646 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 647 nce = nce_lookup_addr(ill, match_illgrp, addr, nce); 648 if (nce == NULL) 649 nce = nce_lookup_mapping(ill, addr); 650 if (!caller_holds_lock) 651 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 652 return (nce); 653 } 654 /* 655 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 656 * If one is found, the refcnt on the nce will be incremented. 657 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 658 * so we skip the nce_lookup_mapping call. 659 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 660 */ 661 nce_t * 662 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 663 { 664 nce_t *nce; 665 in6_addr_t addr6; 666 ip_stack_t *ipst = ill->ill_ipst; 667 668 if (!caller_holds_lock) 669 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 670 671 /* Get head of v4 hash table */ 672 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 673 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 674 /* 675 * NOTE: IPv4 never matches across the illgrp since the NCE's we're 676 * looking up have fastpath headers that are inherently per-ill. 677 */ 678 nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce); 679 if (!caller_holds_lock) 680 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 681 return (nce); 682 } 683 684 /* 685 * Cache entry lookup. Try to find an nce matching the parameters passed. 686 * Look only for exact entries (no mappings). If an nce is found, increment 687 * the hold count on that nce. The caller passes in the start of the 688 * appropriate hash table, and must be holding the appropriate global 689 * lock (ndp_g_lock). 690 */ 691 static nce_t * 692 nce_lookup_addr(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr, 693 nce_t *nce) 694 { 695 ndp_g_t *ndp; 696 ip_stack_t *ipst = ill->ill_ipst; 697 698 if (ill->ill_isv6) 699 ndp = ipst->ips_ndp6; 700 else 701 ndp = ipst->ips_ndp4; 702 703 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 704 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 705 return (NULL); 706 for (; nce != NULL; nce = nce->nce_next) { 707 if (nce->nce_ill == ill || 708 match_illgrp && IS_IN_SAME_ILLGRP(ill, nce->nce_ill)) { 709 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 710 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 711 &ipv6_all_ones)) { 712 mutex_enter(&nce->nce_lock); 713 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 714 NCE_REFHOLD_LOCKED(nce); 715 mutex_exit(&nce->nce_lock); 716 break; 717 } 718 mutex_exit(&nce->nce_lock); 719 } 720 } 721 } 722 return (nce); 723 } 724 725 /* 726 * Cache entry lookup. Try to find an nce matching the parameters passed. 727 * Look only for mappings. 728 */ 729 static nce_t * 730 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 731 { 732 nce_t *nce; 733 ip_stack_t *ipst = ill->ill_ipst; 734 735 ASSERT(ill != NULL && ill->ill_isv6); 736 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 737 if (!IN6_IS_ADDR_MULTICAST(addr)) 738 return (NULL); 739 nce = ipst->ips_ndp6->nce_mask_entries; 740 for (; nce != NULL; nce = nce->nce_next) 741 if (nce->nce_ill == ill && 742 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 743 mutex_enter(&nce->nce_lock); 744 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 745 NCE_REFHOLD_LOCKED(nce); 746 mutex_exit(&nce->nce_lock); 747 break; 748 } 749 mutex_exit(&nce->nce_lock); 750 } 751 return (nce); 752 } 753 754 /* 755 * Process passed in parameters either from an incoming packet or via 756 * user ioctl. 757 */ 758 static void 759 nce_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 760 { 761 ill_t *ill = nce->nce_ill; 762 uint32_t hw_addr_len = ill->ill_nd_lla_len; 763 mblk_t *mp; 764 boolean_t ll_updated = B_FALSE; 765 boolean_t ll_changed; 766 ip_stack_t *ipst = ill->ill_ipst; 767 768 ASSERT(nce->nce_ipversion == IPV6_VERSION); 769 /* 770 * No updates of link layer address or the neighbor state is 771 * allowed, when the cache is in NONUD state. This still 772 * allows for responding to reachability solicitation. 773 */ 774 mutex_enter(&nce->nce_lock); 775 if (nce->nce_state == ND_INCOMPLETE) { 776 if (hw_addr == NULL) { 777 mutex_exit(&nce->nce_lock); 778 return; 779 } 780 nce_set_ll(nce, hw_addr); 781 /* 782 * Update nce state and send the queued packets 783 * back to ip this time ire will be added. 784 */ 785 if (flag & ND_NA_FLAG_SOLICITED) { 786 nce_update(nce, ND_REACHABLE, NULL); 787 } else { 788 nce_update(nce, ND_STALE, NULL); 789 } 790 mutex_exit(&nce->nce_lock); 791 nce_fastpath(nce); 792 mutex_enter(&nce->nce_lock); 793 mp = nce->nce_qd_mp; 794 nce->nce_qd_mp = NULL; 795 mutex_exit(&nce->nce_lock); 796 while (mp != NULL) { 797 mblk_t *nxt_mp, *data_mp; 798 799 nxt_mp = mp->b_next; 800 mp->b_next = NULL; 801 802 if (mp->b_datap->db_type == M_CTL) 803 data_mp = mp->b_cont; 804 else 805 data_mp = mp; 806 if (data_mp->b_prev != NULL) { 807 ill_t *inbound_ill; 808 queue_t *fwdq = NULL; 809 uint_t ifindex; 810 811 ifindex = (uint_t)(uintptr_t)data_mp->b_prev; 812 inbound_ill = ill_lookup_on_ifindex(ifindex, 813 B_TRUE, NULL, NULL, NULL, NULL, ipst); 814 if (inbound_ill == NULL) { 815 data_mp->b_prev = NULL; 816 freemsg(mp); 817 return; 818 } else { 819 fwdq = inbound_ill->ill_rq; 820 } 821 data_mp->b_prev = NULL; 822 /* 823 * Send a forwarded packet back into ip_rput_v6 824 * just as in ire_send_v6(). 825 * Extract the queue from b_prev (set in 826 * ip_rput_data_v6). 827 */ 828 if (fwdq != NULL) { 829 /* 830 * Forwarded packets hop count will 831 * get decremented in ip_rput_data_v6 832 */ 833 if (data_mp != mp) 834 freeb(mp); 835 put(fwdq, data_mp); 836 } else { 837 /* 838 * Send locally originated packets back 839 * into ip_wput_v6. 840 */ 841 put(ill->ill_wq, mp); 842 } 843 ill_refrele(inbound_ill); 844 } else { 845 put(ill->ill_wq, mp); 846 } 847 mp = nxt_mp; 848 } 849 return; 850 } 851 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 852 if (!is_adv) { 853 /* If this is a SOLICITATION request only */ 854 if (ll_changed) 855 nce_update(nce, ND_STALE, hw_addr); 856 mutex_exit(&nce->nce_lock); 857 return; 858 } 859 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 860 /* If in any other state than REACHABLE, ignore */ 861 if (nce->nce_state == ND_REACHABLE) { 862 nce_update(nce, ND_STALE, NULL); 863 } 864 mutex_exit(&nce->nce_lock); 865 return; 866 } else { 867 if (ll_changed) { 868 nce_update(nce, ND_UNCHANGED, hw_addr); 869 ll_updated = B_TRUE; 870 } 871 if (flag & ND_NA_FLAG_SOLICITED) { 872 nce_update(nce, ND_REACHABLE, NULL); 873 } else { 874 if (ll_updated) { 875 nce_update(nce, ND_STALE, NULL); 876 } 877 } 878 mutex_exit(&nce->nce_lock); 879 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 880 NCE_F_ISROUTER)) { 881 ire_t *ire; 882 883 /* 884 * Router turned to host. We need to remove the 885 * entry as well as any default route that may be 886 * using this as a next hop. This is required by 887 * section 7.2.5 of RFC 2461. 888 */ 889 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 890 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 891 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 892 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 893 MATCH_IRE_DEFAULT, ipst); 894 if (ire != NULL) { 895 ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); 896 ire_delete(ire); 897 ire_refrele(ire); 898 } 899 ndp_delete(nce); 900 } 901 } 902 } 903 904 /* 905 * Walker state structure used by ndp_process() / ndp_process_entry(). 906 */ 907 typedef struct ndp_process_data { 908 ill_t *np_ill; /* ill/illgrp to match against */ 909 const in6_addr_t *np_addr; /* IPv6 address to match */ 910 uchar_t *np_hw_addr; /* passed to nce_process() */ 911 uint32_t np_flag; /* passed to nce_process() */ 912 boolean_t np_is_adv; /* passed to nce_process() */ 913 } ndp_process_data_t; 914 915 /* 916 * Walker callback used by ndp_process() for IPMP groups: calls nce_process() 917 * for each NCE with a matching address that's in the same IPMP group. 918 */ 919 static void 920 ndp_process_entry(nce_t *nce, void *arg) 921 { 922 ndp_process_data_t *npp = arg; 923 924 if (IS_IN_SAME_ILLGRP(nce->nce_ill, npp->np_ill) && 925 IN6_ARE_ADDR_EQUAL(&nce->nce_addr, npp->np_addr) && 926 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 927 nce_process(nce, npp->np_hw_addr, npp->np_flag, npp->np_is_adv); 928 } 929 } 930 931 /* 932 * Wrapper around nce_process() that handles IPMP. In particular, for IPMP, 933 * NCEs are per-underlying-ill (because of nce_fp_mp) and thus we may have 934 * more than one NCE for a given IPv6 address to tend to. In that case, we 935 * need to walk all NCEs and callback nce_process() for each one. Since this 936 * is expensive, in the non-IPMP case we just directly call nce_process(). 937 * Ultimately, nce_fp_mp needs to be moved out of the nce_t so that all IP 938 * interfaces in an IPMP group share the same NCEs -- at which point this 939 * function can be removed entirely. 940 */ 941 void 942 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 943 { 944 ill_t *ill = nce->nce_ill; 945 struct ndp_g_s *ndp = ill->ill_ipst->ips_ndp6; 946 ndp_process_data_t np; 947 948 if (ill->ill_grp == NULL) { 949 nce_process(nce, hw_addr, flag, is_adv); 950 return; 951 } 952 953 /* IPMP case: walk all NCEs */ 954 np.np_ill = ill; 955 np.np_addr = &nce->nce_addr; 956 np.np_flag = flag; 957 np.np_is_adv = is_adv; 958 np.np_hw_addr = hw_addr; 959 960 ndp_walk_common(ndp, NULL, (pfi_t)ndp_process_entry, &np, ALL_ZONES); 961 } 962 963 /* 964 * Pass arg1 to the pfi supplied, along with each nce in existence. 965 * ndp_walk() places a REFHOLD on the nce and drops the lock when 966 * walking the hash list. 967 */ 968 void 969 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 970 boolean_t trace) 971 { 972 nce_t *nce; 973 nce_t *nce1; 974 nce_t **ncep; 975 nce_t *free_nce_list = NULL; 976 977 mutex_enter(&ndp->ndp_g_lock); 978 /* Prevent ndp_delete from unlink and free of NCE */ 979 ndp->ndp_g_walker++; 980 mutex_exit(&ndp->ndp_g_lock); 981 for (ncep = ndp->nce_hash_tbl; 982 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 983 for (nce = *ncep; nce != NULL; nce = nce1) { 984 nce1 = nce->nce_next; 985 if (ill == NULL || nce->nce_ill == ill) { 986 if (trace) { 987 NCE_REFHOLD(nce); 988 (*pfi)(nce, arg1); 989 NCE_REFRELE(nce); 990 } else { 991 NCE_REFHOLD_NOTR(nce); 992 (*pfi)(nce, arg1); 993 NCE_REFRELE_NOTR(nce); 994 } 995 } 996 } 997 } 998 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 999 nce1 = nce->nce_next; 1000 if (ill == NULL || nce->nce_ill == ill) { 1001 if (trace) { 1002 NCE_REFHOLD(nce); 1003 (*pfi)(nce, arg1); 1004 NCE_REFRELE(nce); 1005 } else { 1006 NCE_REFHOLD_NOTR(nce); 1007 (*pfi)(nce, arg1); 1008 NCE_REFRELE_NOTR(nce); 1009 } 1010 } 1011 } 1012 mutex_enter(&ndp->ndp_g_lock); 1013 ndp->ndp_g_walker--; 1014 /* 1015 * While NCE's are removed from global list they are placed 1016 * in a private list, to be passed to nce_ire_delete_list(). 1017 * The reason is, there may be ires pointing to this nce 1018 * which needs to cleaned up. 1019 */ 1020 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 1021 /* Time to delete condemned entries */ 1022 for (ncep = ndp->nce_hash_tbl; 1023 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 1024 nce = *ncep; 1025 if (nce != NULL) { 1026 nce_remove(ndp, nce, &free_nce_list); 1027 } 1028 } 1029 nce = ndp->nce_mask_entries; 1030 if (nce != NULL) { 1031 nce_remove(ndp, nce, &free_nce_list); 1032 } 1033 ndp->ndp_g_walker_cleanup = B_FALSE; 1034 } 1035 1036 mutex_exit(&ndp->ndp_g_lock); 1037 1038 if (free_nce_list != NULL) { 1039 nce_ire_delete_list(free_nce_list); 1040 } 1041 } 1042 1043 /* 1044 * Walk everything. 1045 * Note that ill can be NULL hence can't derive the ipst from it. 1046 */ 1047 void 1048 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) 1049 { 1050 ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); 1051 ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); 1052 } 1053 1054 /* 1055 * Process resolve requests. Handles both mapped entries 1056 * as well as cases that needs to be send out on the wire. 1057 * Lookup a NCE for a given IRE. Regardless of whether one exists 1058 * or one is created, we defer making ire point to nce until the 1059 * ire is actually added at which point the nce_refcnt on the nce is 1060 * incremented. This is done primarily to have symmetry between ire_add() 1061 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1062 */ 1063 int 1064 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1065 { 1066 nce_t *nce, *hw_nce = NULL; 1067 int err; 1068 ill_t *ipmp_ill; 1069 uint16_t nce_flags; 1070 uint32_t ms; 1071 mblk_t *mp_nce = NULL; 1072 ip_stack_t *ipst = ill->ill_ipst; 1073 uchar_t *hwaddr = NULL; 1074 1075 ASSERT(ill->ill_isv6); 1076 1077 if (IN6_IS_ADDR_MULTICAST(dst)) 1078 return (nce_set_multicast(ill, dst)); 1079 1080 nce_flags = (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0; 1081 1082 /* 1083 * If `ill' is under IPMP, then first check to see if there's an NCE 1084 * for `dst' on the IPMP meta-interface (e.g., because an application 1085 * explicitly did an SIOCLIFSETND to tie a hardware address to `dst'). 1086 * If so, we use that hardware address when creating the NCE below. 1087 * Note that we don't yet have a mechanism to remove these NCEs if the 1088 * NCE for `dst' on the IPMP meta-interface is subsequently removed -- 1089 * but rather than build such a beast, we should fix NCEs so that they 1090 * can be properly shared across an IPMP group. 1091 */ 1092 if (IS_UNDER_IPMP(ill)) { 1093 if ((ipmp_ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) { 1094 hw_nce = ndp_lookup_v6(ipmp_ill, B_FALSE, dst, B_FALSE); 1095 if (hw_nce != NULL && hw_nce->nce_res_mp != NULL) { 1096 hwaddr = hw_nce->nce_res_mp->b_rptr + 1097 NCE_LL_ADDR_OFFSET(ipmp_ill); 1098 nce_flags |= hw_nce->nce_flags; 1099 } 1100 ill_refrele(ipmp_ill); 1101 } 1102 } 1103 1104 err = ndp_lookup_then_add_v6(ill, 1105 B_FALSE, /* NCE fastpath is per ill; don't match across group */ 1106 hwaddr, 1107 dst, 1108 &ipv6_all_ones, 1109 &ipv6_all_zeros, 1110 0, 1111 nce_flags, 1112 hwaddr != NULL ? ND_REACHABLE : ND_INCOMPLETE, 1113 &nce); 1114 1115 if (hw_nce != NULL) 1116 NCE_REFRELE(hw_nce); 1117 1118 switch (err) { 1119 case 0: 1120 /* 1121 * New cache entry was created. Make sure that the state 1122 * is not ND_INCOMPLETE. It can be in some other state 1123 * even before we send out the solicitation as we could 1124 * get un-solicited advertisements. 1125 * 1126 * If this is an XRESOLV interface, simply return 0, 1127 * since we don't want to solicit just yet. 1128 */ 1129 if (ill->ill_flags & ILLF_XRESOLV) { 1130 NCE_REFRELE(nce); 1131 return (0); 1132 } 1133 1134 mutex_enter(&nce->nce_lock); 1135 if (nce->nce_state != ND_INCOMPLETE) { 1136 mutex_exit(&nce->nce_lock); 1137 NCE_REFRELE(nce); 1138 return (0); 1139 } 1140 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1141 if (mp_nce == NULL) { 1142 /* The caller will free mp */ 1143 mutex_exit(&nce->nce_lock); 1144 ndp_delete(nce); 1145 NCE_REFRELE(nce); 1146 return (ENOMEM); 1147 } 1148 if ((ms = nce_solicit(nce, mp_nce)) == 0) { 1149 /* The caller will free mp */ 1150 if (mp_nce != mp) 1151 freeb(mp_nce); 1152 mutex_exit(&nce->nce_lock); 1153 ndp_delete(nce); 1154 NCE_REFRELE(nce); 1155 return (EBUSY); 1156 } 1157 mutex_exit(&nce->nce_lock); 1158 NDP_RESTART_TIMER(nce, (clock_t)ms); 1159 NCE_REFRELE(nce); 1160 return (EINPROGRESS); 1161 case EEXIST: 1162 /* Resolution in progress just queue the packet */ 1163 mutex_enter(&nce->nce_lock); 1164 if (nce->nce_state == ND_INCOMPLETE) { 1165 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1166 if (mp_nce == NULL) { 1167 err = ENOMEM; 1168 } else { 1169 nce_queue_mp(nce, mp_nce); 1170 err = EINPROGRESS; 1171 } 1172 } else { 1173 /* 1174 * Any other state implies we have 1175 * a nce but IRE needs to be added ... 1176 * ire_add_v6() will take care of the 1177 * the case when the nce becomes CONDEMNED 1178 * before the ire is added to the table. 1179 */ 1180 err = 0; 1181 } 1182 mutex_exit(&nce->nce_lock); 1183 NCE_REFRELE(nce); 1184 break; 1185 default: 1186 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1187 break; 1188 } 1189 return (err); 1190 } 1191 1192 /* 1193 * When there is no resolver, the link layer template is passed in 1194 * the IRE. 1195 * Lookup a NCE for a given IRE. Regardless of whether one exists 1196 * or one is created, we defer making ire point to nce until the 1197 * ire is actually added at which point the nce_refcnt on the nce is 1198 * incremented. This is done primarily to have symmetry between ire_add() 1199 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1200 */ 1201 int 1202 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1203 { 1204 nce_t *nce; 1205 int err = 0; 1206 1207 ASSERT(ill != NULL); 1208 ASSERT(ill->ill_isv6); 1209 if (IN6_IS_ADDR_MULTICAST(dst)) { 1210 err = nce_set_multicast(ill, dst); 1211 return (err); 1212 } 1213 1214 err = ndp_lookup_then_add_v6(ill, 1215 B_FALSE, /* NCE fastpath is per ill; don't match across group */ 1216 NULL, /* hardware address */ 1217 dst, 1218 &ipv6_all_ones, 1219 &ipv6_all_zeros, 1220 0, 1221 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1222 ND_REACHABLE, 1223 &nce); 1224 1225 switch (err) { 1226 case 0: 1227 /* 1228 * Cache entry with a proper resolver cookie was 1229 * created. 1230 */ 1231 NCE_REFRELE(nce); 1232 break; 1233 case EEXIST: 1234 err = 0; 1235 NCE_REFRELE(nce); 1236 break; 1237 default: 1238 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1239 break; 1240 } 1241 return (err); 1242 } 1243 1244 /* 1245 * For each interface an entry is added for the unspecified multicast group. 1246 * Here that mapping is used to form the multicast cache entry for a particular 1247 * multicast destination. 1248 */ 1249 static int 1250 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1251 { 1252 nce_t *mnce; /* Multicast mapping entry */ 1253 nce_t *nce; 1254 uchar_t *hw_addr = NULL; 1255 int err = 0; 1256 ip_stack_t *ipst = ill->ill_ipst; 1257 1258 ASSERT(ill != NULL); 1259 ASSERT(ill->ill_isv6); 1260 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1261 1262 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1263 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst)); 1264 nce = nce_lookup_addr(ill, B_FALSE, dst, nce); 1265 if (nce != NULL) { 1266 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1267 NCE_REFRELE(nce); 1268 return (0); 1269 } 1270 /* No entry, now lookup for a mapping this should never fail */ 1271 mnce = nce_lookup_mapping(ill, dst); 1272 if (mnce == NULL) { 1273 /* Something broken for the interface. */ 1274 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1275 return (ESRCH); 1276 } 1277 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1278 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1279 /* 1280 * For IRE_IF_RESOLVER a hardware mapping can be 1281 * generated, for IRE_IF_NORESOLVER, resolution cookie 1282 * in the ill is copied in ndp_add_v6(). 1283 */ 1284 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1285 if (hw_addr == NULL) { 1286 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1287 NCE_REFRELE(mnce); 1288 return (ENOMEM); 1289 } 1290 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1291 } 1292 NCE_REFRELE(mnce); 1293 /* 1294 * IRE_IF_NORESOLVER type simply copies the resolution 1295 * cookie passed in. So no hw_addr is needed. 1296 */ 1297 err = ndp_add_v6(ill, 1298 hw_addr, 1299 dst, 1300 &ipv6_all_ones, 1301 &ipv6_all_zeros, 1302 0, 1303 NCE_F_NONUD, 1304 ND_REACHABLE, 1305 &nce); 1306 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1307 if (hw_addr != NULL) 1308 kmem_free(hw_addr, ill->ill_nd_lla_len); 1309 if (err != 0) { 1310 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1311 return (err); 1312 } 1313 NCE_REFRELE(nce); 1314 return (0); 1315 } 1316 1317 /* 1318 * Return the link layer address, and any flags of a nce. 1319 */ 1320 int 1321 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1322 { 1323 nce_t *nce; 1324 in6_addr_t *addr; 1325 sin6_t *sin6; 1326 dl_unitdata_req_t *dl; 1327 1328 ASSERT(ill != NULL && ill->ill_isv6); 1329 sin6 = (sin6_t *)&lnr->lnr_addr; 1330 addr = &sin6->sin6_addr; 1331 1332 /* 1333 * NOTE: if the ill is an IPMP interface, then match against the whole 1334 * illgrp. This e.g. allows in.ndpd to retrieve the link layer 1335 * addresses for the data addresses on an IPMP interface even though 1336 * ipif_ndp_up() created them with an nce_ill of ipif_bound_ill. 1337 */ 1338 nce = ndp_lookup_v6(ill, IS_IPMP(ill), addr, B_FALSE); 1339 if (nce == NULL) 1340 return (ESRCH); 1341 /* If in INCOMPLETE state, no link layer address is available yet */ 1342 if (nce->nce_state == ND_INCOMPLETE) 1343 goto done; 1344 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1345 if (ill->ill_flags & ILLF_XRESOLV) 1346 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1347 else 1348 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1349 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1350 sizeof (lnr->lnr_hdw_addr)); 1351 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1352 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1353 if (nce->nce_flags & NCE_F_ISROUTER) 1354 lnr->lnr_flags = NDF_ISROUTER_ON; 1355 if (nce->nce_flags & NCE_F_ANYCAST) 1356 lnr->lnr_flags |= NDF_ANYCAST_ON; 1357 done: 1358 NCE_REFRELE(nce); 1359 return (0); 1360 } 1361 1362 /* 1363 * Send Enable/Disable multicast reqs to driver. 1364 */ 1365 int 1366 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1367 uint32_t hw_addr_offset, mblk_t *mp) 1368 { 1369 nce_t *nce; 1370 uchar_t *hw_addr; 1371 ip_stack_t *ipst = ill->ill_ipst; 1372 1373 ASSERT(ill != NULL && ill->ill_isv6); 1374 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1375 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1376 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1377 freemsg(mp); 1378 return (EINVAL); 1379 } 1380 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1381 nce = nce_lookup_mapping(ill, addr); 1382 if (nce == NULL) { 1383 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1384 freemsg(mp); 1385 return (ESRCH); 1386 } 1387 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1388 /* 1389 * Update dl_addr_length and dl_addr_offset for primitives that 1390 * have physical addresses as opposed to full saps 1391 */ 1392 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1393 case DL_ENABMULTI_REQ: 1394 /* Track the state if this is the first enabmulti */ 1395 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1396 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1397 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1398 break; 1399 case DL_DISABMULTI_REQ: 1400 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1401 break; 1402 default: 1403 NCE_REFRELE(nce); 1404 ip1dbg(("ndp_mcastreq: default\n")); 1405 return (EINVAL); 1406 } 1407 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1408 NCE_REFRELE(nce); 1409 ill_dlpi_send(ill, mp); 1410 return (0); 1411 } 1412 1413 /* 1414 * Send a neighbor solicitation. 1415 * Returns number of milliseconds after which we should either rexmit or abort. 1416 * Return of zero means we should abort. 1417 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1418 * 1419 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1420 * the packet. 1421 * NOTE: This routine does not consume mp. 1422 */ 1423 uint32_t 1424 nce_solicit(nce_t *nce, mblk_t *mp) 1425 { 1426 ip6_t *ip6h; 1427 in6_addr_t sender; 1428 boolean_t dropped; 1429 1430 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1431 1432 if (nce->nce_rcnt == 0) 1433 return (0); 1434 1435 if (mp == NULL) { 1436 ASSERT(nce->nce_qd_mp != NULL); 1437 mp = nce->nce_qd_mp; 1438 } else { 1439 nce_queue_mp(nce, mp); 1440 } 1441 1442 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1443 if (mp->b_datap->db_type == M_CTL) 1444 mp = mp->b_cont; 1445 1446 ip6h = (ip6_t *)mp->b_rptr; 1447 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1448 /* 1449 * This message should have been pulled up already in 1450 * ip_wput_v6. We can't do pullups here because the message 1451 * could be from the nce_qd_mp which could have b_next/b_prev 1452 * non-NULL. 1453 */ 1454 ASSERT(MBLKL(mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN); 1455 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1456 } 1457 1458 /* 1459 * Need to copy the sender address into a local since `mp' can 1460 * go away once we drop nce_lock. 1461 */ 1462 sender = ip6h->ip6_src; 1463 nce->nce_rcnt--; 1464 mutex_exit(&nce->nce_lock); 1465 dropped = nce_xmit_solicit(nce, B_TRUE, &sender, 0); 1466 mutex_enter(&nce->nce_lock); 1467 if (dropped) 1468 nce->nce_rcnt++; 1469 return (nce->nce_ill->ill_reachable_retrans_time); 1470 } 1471 1472 /* 1473 * Attempt to recover an address on an interface that's been marked as a 1474 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1475 * no easy way to just probe the address and have the right thing happen if 1476 * it's no longer in use. Instead, we just bring it up normally and allow the 1477 * regular interface start-up logic to probe for a remaining duplicate and take 1478 * us back down if necessary. 1479 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1480 * ip_ndp_excl. 1481 */ 1482 /* ARGSUSED */ 1483 static void 1484 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1485 { 1486 ill_t *ill = rq->q_ptr; 1487 ipif_t *ipif; 1488 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1489 1490 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1491 /* 1492 * We do not support recovery of proxy ARP'd interfaces, 1493 * because the system lacks a complete proxy ARP mechanism. 1494 */ 1495 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1496 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1497 continue; 1498 } 1499 1500 /* 1501 * If we have already recovered or if the interface is going 1502 * away, then ignore. 1503 */ 1504 mutex_enter(&ill->ill_lock); 1505 if (!(ipif->ipif_flags & IPIF_DUPLICATE) || 1506 (ipif->ipif_state_flags & IPIF_CONDEMNED)) { 1507 mutex_exit(&ill->ill_lock); 1508 continue; 1509 } 1510 1511 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1512 ill->ill_ipif_dup_count--; 1513 mutex_exit(&ill->ill_lock); 1514 ipif->ipif_was_dup = B_TRUE; 1515 1516 VERIFY(ipif_ndp_up(ipif, B_TRUE) != EINPROGRESS); 1517 (void) ipif_up_done_v6(ipif); 1518 } 1519 freeb(mp); 1520 } 1521 1522 /* 1523 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1524 * As long as someone else holds the address, the interface will stay down. 1525 * When that conflict goes away, the interface is brought back up. This is 1526 * done so that accidental shutdowns of addresses aren't made permanent. Your 1527 * server will recover from a failure. 1528 * 1529 * For DHCP and temporary addresses, recovery is not done in the kernel. 1530 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1531 * 1532 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1533 */ 1534 static void 1535 ipif6_dup_recovery(void *arg) 1536 { 1537 ipif_t *ipif = arg; 1538 1539 ipif->ipif_recovery_id = 0; 1540 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1541 return; 1542 1543 /* 1544 * No lock, because this is just an optimization. 1545 */ 1546 if (ipif->ipif_state_flags & IPIF_CONDEMNED) 1547 return; 1548 1549 /* If the link is down, we'll retry this later */ 1550 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1551 return; 1552 1553 ndp_do_recovery(ipif); 1554 } 1555 1556 /* 1557 * Perform interface recovery by forcing the duplicate interfaces up and 1558 * allowing the system to determine which ones should stay up. 1559 * 1560 * Called both by recovery timer expiry and link-up notification. 1561 */ 1562 void 1563 ndp_do_recovery(ipif_t *ipif) 1564 { 1565 ill_t *ill = ipif->ipif_ill; 1566 mblk_t *mp; 1567 ip_stack_t *ipst = ill->ill_ipst; 1568 1569 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1570 if (mp == NULL) { 1571 mutex_enter(&ill->ill_lock); 1572 if (ipif->ipif_recovery_id == 0 && 1573 !(ipif->ipif_state_flags & IPIF_CONDEMNED)) { 1574 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1575 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1576 } 1577 mutex_exit(&ill->ill_lock); 1578 } else { 1579 /* 1580 * A recovery timer may still be running if we got here from 1581 * ill_restart_dad(); cancel that timer. 1582 */ 1583 if (ipif->ipif_recovery_id != 0) 1584 (void) untimeout(ipif->ipif_recovery_id); 1585 ipif->ipif_recovery_id = 0; 1586 1587 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1588 sizeof (ipif->ipif_v6lcl_addr)); 1589 ill_refhold(ill); 1590 qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP, 1591 B_FALSE); 1592 } 1593 } 1594 1595 /* 1596 * Find the MAC and IP addresses in an NA/NS message. 1597 */ 1598 static void 1599 ip_ndp_find_addresses(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, in6_addr_t *targp, 1600 uchar_t **haddr, uint_t *haddrlenp) 1601 { 1602 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1603 icmp6_t *icmp6 = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1604 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 1605 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 1606 uchar_t *addr; 1607 int alen = 0; 1608 1609 if (dl_mp == NULL) { 1610 nd_opt_hdr_t *opt = NULL; 1611 int len; 1612 1613 /* 1614 * If it's from the fast-path, then it can't be a probe 1615 * message, and thus must include a linkaddr option. 1616 * Extract that here. 1617 */ 1618 switch (icmp6->icmp6_type) { 1619 case ND_NEIGHBOR_SOLICIT: 1620 len = mp->b_wptr - (uchar_t *)ns; 1621 if ((len -= sizeof (*ns)) > 0) { 1622 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), 1623 len, ND_OPT_SOURCE_LINKADDR); 1624 } 1625 break; 1626 case ND_NEIGHBOR_ADVERT: 1627 len = mp->b_wptr - (uchar_t *)na; 1628 if ((len -= sizeof (*na)) > 0) { 1629 opt = ndp_get_option((nd_opt_hdr_t *)(na + 1), 1630 len, ND_OPT_TARGET_LINKADDR); 1631 } 1632 break; 1633 } 1634 1635 if (opt != NULL && opt->nd_opt_len * 8 - sizeof (*opt) >= 1636 ill->ill_nd_lla_len) { 1637 addr = (uchar_t *)(opt + 1); 1638 alen = ill->ill_nd_lla_len; 1639 } 1640 1641 /* 1642 * We cheat a bit here for the sake of printing usable log 1643 * messages in the rare case where the reply we got was unicast 1644 * without a source linkaddr option, and the interface is in 1645 * fastpath mode. (Sigh.) 1646 */ 1647 if (alen == 0 && ill->ill_type == IFT_ETHER && 1648 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1649 struct ether_header *pether; 1650 1651 pether = (struct ether_header *)((char *)ip6h - 1652 sizeof (*pether)); 1653 addr = pether->ether_shost.ether_addr_octet; 1654 alen = ETHERADDRL; 1655 } 1656 } else { 1657 dl_unitdata_ind_t *dlu; 1658 1659 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1660 alen = dlu->dl_src_addr_length; 1661 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1662 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1663 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1664 if (ill->ill_sap_length < 0) { 1665 alen += ill->ill_sap_length; 1666 } else { 1667 addr += ill->ill_sap_length; 1668 alen -= ill->ill_sap_length; 1669 } 1670 } 1671 } 1672 1673 if (alen > 0) { 1674 *haddr = addr; 1675 *haddrlenp = alen; 1676 } else { 1677 *haddr = NULL; 1678 *haddrlenp = 0; 1679 } 1680 1681 /* nd_ns_target and nd_na_target are at the same offset, so we cheat */ 1682 *targp = ns->nd_ns_target; 1683 } 1684 1685 /* 1686 * This is for exclusive changes due to NDP duplicate address detection 1687 * failure. 1688 */ 1689 /* ARGSUSED */ 1690 static void 1691 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1692 { 1693 ill_t *ill = rq->q_ptr; 1694 ipif_t *ipif; 1695 mblk_t *dl_mp = NULL; 1696 uchar_t *haddr; 1697 uint_t haddrlen; 1698 ip_stack_t *ipst = ill->ill_ipst; 1699 in6_addr_t targ; 1700 1701 if (DB_TYPE(mp) != M_DATA) { 1702 dl_mp = mp; 1703 mp = mp->b_cont; 1704 } 1705 1706 ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen); 1707 if (haddr != NULL && haddrlen == ill->ill_phys_addr_length) { 1708 /* 1709 * Ignore conflicts generated by misbehaving switches that 1710 * just reflect our own messages back to us. For IPMP, we may 1711 * see reflections across any ill in the illgrp. 1712 */ 1713 if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 || 1714 IS_UNDER_IPMP(ill) && 1715 ipmp_illgrp_find_ill(ill->ill_grp, haddr, haddrlen) != NULL) 1716 goto ignore_conflict; 1717 } 1718 1719 /* 1720 * Look up the appropriate ipif. 1721 */ 1722 ipif = ipif_lookup_addr_v6(&targ, ill, ALL_ZONES, NULL, NULL, NULL, 1723 NULL, ipst); 1724 if (ipif == NULL) 1725 goto ignore_conflict; 1726 1727 /* Reload the ill to match the ipif */ 1728 ill = ipif->ipif_ill; 1729 1730 /* If it's already duplicate or ineligible, then don't do anything. */ 1731 if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) { 1732 ipif_refrele(ipif); 1733 goto ignore_conflict; 1734 } 1735 1736 /* 1737 * If this is a failure during duplicate recovery, then don't 1738 * complain. It may take a long time to recover. 1739 */ 1740 if (!ipif->ipif_was_dup) { 1741 char ibuf[LIFNAMSIZ]; 1742 char hbuf[MAC_STR_LEN]; 1743 char sbuf[INET6_ADDRSTRLEN]; 1744 1745 ipif_get_name(ipif, ibuf, sizeof (ibuf)); 1746 cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);" 1747 " disabled", ibuf, 1748 inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)), 1749 mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf))); 1750 } 1751 mutex_enter(&ill->ill_lock); 1752 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1753 ipif->ipif_flags |= IPIF_DUPLICATE; 1754 ill->ill_ipif_dup_count++; 1755 mutex_exit(&ill->ill_lock); 1756 (void) ipif_down(ipif, NULL, NULL); 1757 ipif_down_tail(ipif); 1758 mutex_enter(&ill->ill_lock); 1759 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1760 ill->ill_net_type == IRE_IF_RESOLVER && 1761 !(ipif->ipif_state_flags & IPIF_CONDEMNED) && 1762 ipst->ips_ip_dup_recovery > 0) { 1763 ASSERT(ipif->ipif_recovery_id == 0); 1764 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1765 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1766 } 1767 mutex_exit(&ill->ill_lock); 1768 ipif_refrele(ipif); 1769 ignore_conflict: 1770 if (dl_mp != NULL) 1771 freeb(dl_mp); 1772 freemsg(mp); 1773 } 1774 1775 /* 1776 * Handle failure by tearing down the ipifs with the specified address. Note 1777 * that tearing down the ipif also means deleting the nce through ipif_down, so 1778 * it's not possible to do recovery by just restarting the nce timer. Instead, 1779 * we start a timer on the ipif. 1780 */ 1781 static void 1782 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1783 { 1784 if ((mp = copymsg(mp)) != NULL) { 1785 if (dl_mp == NULL) 1786 dl_mp = mp; 1787 else if ((dl_mp = copyb(dl_mp)) != NULL) 1788 dl_mp->b_cont = mp; 1789 if (dl_mp == NULL) { 1790 freemsg(mp); 1791 } else { 1792 ill_refhold(ill); 1793 qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP, 1794 B_FALSE); 1795 } 1796 } 1797 } 1798 1799 /* 1800 * Handle a discovered conflict: some other system is advertising that it owns 1801 * one of our IP addresses. We need to defend ourselves, or just shut down the 1802 * interface. 1803 */ 1804 static void 1805 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1806 { 1807 ipif_t *ipif; 1808 uint32_t now; 1809 uint_t maxdefense; 1810 uint_t defs; 1811 ip_stack_t *ipst = ill->ill_ipst; 1812 1813 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1814 NULL, NULL, ipst); 1815 if (ipif == NULL) 1816 return; 1817 1818 /* 1819 * First, figure out if this address is disposable. 1820 */ 1821 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1822 maxdefense = ipst->ips_ip_max_temp_defend; 1823 else 1824 maxdefense = ipst->ips_ip_max_defend; 1825 1826 /* 1827 * Now figure out how many times we've defended ourselves. Ignore 1828 * defenses that happened long in the past. 1829 */ 1830 now = gethrestime_sec(); 1831 mutex_enter(&nce->nce_lock); 1832 if ((defs = nce->nce_defense_count) > 0 && 1833 now - nce->nce_defense_time > ipst->ips_ip_defend_interval) { 1834 nce->nce_defense_count = defs = 0; 1835 } 1836 nce->nce_defense_count++; 1837 nce->nce_defense_time = now; 1838 mutex_exit(&nce->nce_lock); 1839 ipif_refrele(ipif); 1840 1841 /* 1842 * If we've defended ourselves too many times already, then give up and 1843 * tear down the interface(s) using this address. Otherwise, defend by 1844 * sending out an unsolicited Neighbor Advertisement. 1845 */ 1846 if (defs >= maxdefense) { 1847 ip_ndp_failure(ill, mp, dl_mp); 1848 } else { 1849 char hbuf[MAC_STR_LEN]; 1850 char sbuf[INET6_ADDRSTRLEN]; 1851 uchar_t *haddr; 1852 uint_t haddrlen; 1853 in6_addr_t targ; 1854 1855 ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen); 1856 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1857 mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)), 1858 inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)), 1859 ill->ill_name); 1860 1861 (void) nce_xmit_advert(nce, B_FALSE, &ipv6_all_hosts_mcast, 0); 1862 } 1863 } 1864 1865 static void 1866 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1867 { 1868 nd_neighbor_solicit_t *ns; 1869 uint32_t hlen = ill->ill_nd_lla_len; 1870 uchar_t *haddr = NULL; 1871 icmp6_t *icmp_nd; 1872 ip6_t *ip6h; 1873 nce_t *our_nce = NULL; 1874 in6_addr_t target; 1875 in6_addr_t src; 1876 int len; 1877 int flag = 0; 1878 nd_opt_hdr_t *opt = NULL; 1879 boolean_t bad_solicit = B_FALSE; 1880 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1881 1882 ip6h = (ip6_t *)mp->b_rptr; 1883 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1884 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1885 src = ip6h->ip6_src; 1886 ns = (nd_neighbor_solicit_t *)icmp_nd; 1887 target = ns->nd_ns_target; 1888 if (IN6_IS_ADDR_MULTICAST(&target)) { 1889 if (ip_debug > 2) { 1890 /* ip1dbg */ 1891 pr_addr_dbg("ndp_input_solicit: Target is" 1892 " multicast! %s\n", AF_INET6, &target); 1893 } 1894 bad_solicit = B_TRUE; 1895 goto done; 1896 } 1897 if (len > sizeof (nd_neighbor_solicit_t)) { 1898 /* Options present */ 1899 opt = (nd_opt_hdr_t *)&ns[1]; 1900 len -= sizeof (nd_neighbor_solicit_t); 1901 if (!ndp_verify_optlen(opt, len)) { 1902 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1903 bad_solicit = B_TRUE; 1904 goto done; 1905 } 1906 1907 } 1908 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1909 /* Check to see if this is a valid DAD solicitation */ 1910 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1911 if (ip_debug > 2) { 1912 /* ip1dbg */ 1913 pr_addr_dbg("ndp_input_solicit: IPv6 " 1914 "Destination is not solicited node " 1915 "multicast %s\n", AF_INET6, 1916 &ip6h->ip6_dst); 1917 } 1918 bad_solicit = B_TRUE; 1919 goto done; 1920 } 1921 } 1922 1923 /* 1924 * NOTE: with IPMP, it's possible the nominated multicast ill (which 1925 * received this packet if it's multicast) is not the ill tied to 1926 * e.g. the IPMP ill's data link-local. So we match across the illgrp 1927 * to ensure we find the associated NCE. 1928 */ 1929 our_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE); 1930 /* 1931 * If this is a valid Solicitation, a permanent 1932 * entry should exist in the cache 1933 */ 1934 if (our_nce == NULL || 1935 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1936 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1937 "ifname=%s ", ill->ill_name)); 1938 if (ip_debug > 2) { 1939 /* ip1dbg */ 1940 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1941 } 1942 bad_solicit = B_TRUE; 1943 goto done; 1944 } 1945 1946 /* At this point we should have a verified NS per spec */ 1947 if (opt != NULL) { 1948 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1949 if (opt != NULL) { 1950 haddr = (uchar_t *)&opt[1]; 1951 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1952 hlen == 0) { 1953 ip1dbg(("ndp_input_solicit: bad SLLA\n")); 1954 bad_solicit = B_TRUE; 1955 goto done; 1956 } 1957 } 1958 } 1959 1960 /* If sending directly to peer, set the unicast flag */ 1961 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 1962 flag |= NDP_UNICAST; 1963 1964 /* 1965 * Create/update the entry for the soliciting node. 1966 * or respond to outstanding queries, don't if 1967 * the source is unspecified address. 1968 */ 1969 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1970 int err; 1971 nce_t *nnce; 1972 1973 ASSERT(ill->ill_isv6); 1974 /* 1975 * Regular solicitations *must* include the Source Link-Layer 1976 * Address option. Ignore messages that do not. 1977 */ 1978 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1979 ip1dbg(("ndp_input_solicit: source link-layer address " 1980 "option missing with a specified source.\n")); 1981 bad_solicit = B_TRUE; 1982 goto done; 1983 } 1984 1985 /* 1986 * This is a regular solicitation. If we're still in the 1987 * process of verifying the address, then don't respond at all 1988 * and don't keep track of the sender. 1989 */ 1990 if (our_nce->nce_state == ND_PROBE) 1991 goto done; 1992 1993 /* 1994 * If the solicitation doesn't have sender hardware address 1995 * (legal for unicast solicitation), then process without 1996 * installing the return NCE. Either we already know it, or 1997 * we'll be forced to look it up when (and if) we reply to the 1998 * packet. 1999 */ 2000 if (haddr == NULL) 2001 goto no_source; 2002 2003 err = ndp_lookup_then_add_v6(ill, 2004 B_FALSE, 2005 haddr, 2006 &src, /* Soliciting nodes address */ 2007 &ipv6_all_ones, 2008 &ipv6_all_zeros, 2009 0, 2010 0, 2011 ND_STALE, 2012 &nnce); 2013 switch (err) { 2014 case 0: 2015 /* done with this entry */ 2016 NCE_REFRELE(nnce); 2017 break; 2018 case EEXIST: 2019 /* 2020 * B_FALSE indicates this is not an an advertisement. 2021 */ 2022 ndp_process(nnce, haddr, 0, B_FALSE); 2023 NCE_REFRELE(nnce); 2024 break; 2025 default: 2026 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 2027 err)); 2028 goto done; 2029 } 2030 no_source: 2031 flag |= NDP_SOLICITED; 2032 } else { 2033 /* 2034 * No source link layer address option should be present in a 2035 * valid DAD request. 2036 */ 2037 if (haddr != NULL) { 2038 ip1dbg(("ndp_input_solicit: source link-layer address " 2039 "option present with an unspecified source.\n")); 2040 bad_solicit = B_TRUE; 2041 goto done; 2042 } 2043 if (our_nce->nce_state == ND_PROBE) { 2044 /* 2045 * Internally looped-back probes won't have DLPI 2046 * attached to them. External ones (which are sent by 2047 * multicast) always will. Just ignore our own 2048 * transmissions. 2049 */ 2050 if (dl_mp != NULL) { 2051 /* 2052 * If someone else is probing our address, then 2053 * we've crossed wires. Declare failure. 2054 */ 2055 ip_ndp_failure(ill, mp, dl_mp); 2056 } 2057 goto done; 2058 } 2059 /* 2060 * This is a DAD probe. Multicast the advertisement to the 2061 * all-nodes address. 2062 */ 2063 src = ipv6_all_hosts_mcast; 2064 } 2065 /* Response to a solicitation */ 2066 (void) nce_xmit_advert(our_nce, B_TRUE, &src, flag); 2067 done: 2068 if (bad_solicit) 2069 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2070 if (our_nce != NULL) 2071 NCE_REFRELE(our_nce); 2072 } 2073 2074 void 2075 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2076 { 2077 nd_neighbor_advert_t *na; 2078 uint32_t hlen = ill->ill_nd_lla_len; 2079 uchar_t *haddr = NULL; 2080 icmp6_t *icmp_nd; 2081 ip6_t *ip6h; 2082 nce_t *dst_nce = NULL; 2083 in6_addr_t target; 2084 nd_opt_hdr_t *opt = NULL; 2085 int len; 2086 ip_stack_t *ipst = ill->ill_ipst; 2087 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2088 2089 ip6h = (ip6_t *)mp->b_rptr; 2090 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2091 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2092 na = (nd_neighbor_advert_t *)icmp_nd; 2093 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2094 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2095 ip1dbg(("ndp_input_advert: Target is multicast but the " 2096 "solicited flag is not zero\n")); 2097 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2098 return; 2099 } 2100 target = na->nd_na_target; 2101 if (IN6_IS_ADDR_MULTICAST(&target)) { 2102 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2103 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2104 return; 2105 } 2106 if (len > sizeof (nd_neighbor_advert_t)) { 2107 opt = (nd_opt_hdr_t *)&na[1]; 2108 if (!ndp_verify_optlen(opt, 2109 len - sizeof (nd_neighbor_advert_t))) { 2110 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2111 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2112 return; 2113 } 2114 /* At this point we have a verified NA per spec */ 2115 len -= sizeof (nd_neighbor_advert_t); 2116 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2117 if (opt != NULL) { 2118 haddr = (uchar_t *)&opt[1]; 2119 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2120 hlen == 0) { 2121 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2122 BUMP_MIB(mib, 2123 ipv6IfIcmpInBadNeighborAdvertisements); 2124 return; 2125 } 2126 } 2127 } 2128 2129 /* 2130 * NOTE: we match across the illgrp since we need to do DAD for all of 2131 * our local addresses, and those are spread across all the active 2132 * ills in the group. 2133 */ 2134 if ((dst_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE)) == NULL) 2135 return; 2136 2137 if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2138 /* 2139 * Someone just advertised one of our local addresses. First, 2140 * check it it was us -- if so, we can safely ignore it. 2141 */ 2142 if (haddr != NULL) { 2143 if (!nce_cmp_ll_addr(dst_nce, haddr, hlen)) 2144 goto out; /* from us -- no conflict */ 2145 2146 /* 2147 * If we're in an IPMP group, check if this is an echo 2148 * from another ill in the group. Use the double- 2149 * checked locking pattern to avoid grabbing 2150 * ill_g_lock in the non-IPMP case. 2151 */ 2152 if (IS_UNDER_IPMP(ill)) { 2153 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2154 if (IS_UNDER_IPMP(ill) && ipmp_illgrp_find_ill( 2155 ill->ill_grp, haddr, hlen) != NULL) { 2156 rw_exit(&ipst->ips_ill_g_lock); 2157 goto out; 2158 } 2159 rw_exit(&ipst->ips_ill_g_lock); 2160 } 2161 } 2162 2163 /* 2164 * Our own (looped-back) unsolicited neighbor advertisements 2165 * will get here with dl_mp == NULL. (These will usually be 2166 * filtered by the `haddr' checks above, but point-to-point 2167 * links have no hardware address and thus make it here.) 2168 */ 2169 if (dl_mp == NULL && dst_nce->nce_state != ND_PROBE) 2170 goto out; 2171 2172 /* 2173 * This appears to be a real conflict. If we're trying to 2174 * configure this NCE (ND_PROBE), then shut it down. 2175 * Otherwise, handle the discovered conflict. 2176 * 2177 * In the ND_PROBE case, dl_mp might be NULL if we're getting 2178 * a unicast reply. This isn't typically done (multicast is 2179 * the norm in response to a probe), but we can handle it. 2180 */ 2181 if (dst_nce->nce_state == ND_PROBE) 2182 ip_ndp_failure(ill, mp, dl_mp); 2183 else 2184 ip_ndp_conflict(ill, mp, dl_mp, dst_nce); 2185 } else { 2186 if (na->nd_na_flags_reserved & ND_NA_FLAG_ROUTER) 2187 dst_nce->nce_flags |= NCE_F_ISROUTER; 2188 2189 /* B_TRUE indicates this an advertisement */ 2190 ndp_process(dst_nce, haddr, na->nd_na_flags_reserved, B_TRUE); 2191 } 2192 out: 2193 NCE_REFRELE(dst_nce); 2194 } 2195 2196 /* 2197 * Process NDP neighbor solicitation/advertisement messages. 2198 * The checksum has already checked o.k before reaching here. 2199 */ 2200 void 2201 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2202 { 2203 icmp6_t *icmp_nd; 2204 ip6_t *ip6h; 2205 int len; 2206 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2207 2208 2209 if (!pullupmsg(mp, -1)) { 2210 ip1dbg(("ndp_input: pullupmsg failed\n")); 2211 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2212 goto done; 2213 } 2214 ip6h = (ip6_t *)mp->b_rptr; 2215 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2216 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2217 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2218 goto done; 2219 } 2220 /* 2221 * NDP does not accept any extension headers between the 2222 * IP header and the ICMP header since e.g. a routing 2223 * header could be dangerous. 2224 * This assumes that any AH or ESP headers are removed 2225 * by ip prior to passing the packet to ndp_input. 2226 */ 2227 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2228 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2229 ip6h->ip6_nxt)); 2230 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2231 goto done; 2232 } 2233 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2234 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2235 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2236 if (icmp_nd->icmp6_code != 0) { 2237 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2238 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2239 goto done; 2240 } 2241 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2242 /* 2243 * Make sure packet length is large enough for either 2244 * a NS or a NA icmp packet. 2245 */ 2246 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2247 ip1dbg(("ndp_input: packet too short\n")); 2248 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2249 goto done; 2250 } 2251 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2252 ndp_input_solicit(ill, mp, dl_mp); 2253 } else { 2254 ndp_input_advert(ill, mp, dl_mp); 2255 } 2256 done: 2257 freemsg(mp); 2258 } 2259 2260 /* 2261 * Utility routine to send an advertisement. Assumes that the NCE cannot 2262 * go away (e.g., because it's refheld). 2263 */ 2264 static boolean_t 2265 nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *target, 2266 uint_t flags) 2267 { 2268 ASSERT((flags & NDP_PROBE) == 0); 2269 2270 if (nce->nce_flags & NCE_F_ISROUTER) 2271 flags |= NDP_ISROUTER; 2272 if (!(nce->nce_flags & NCE_F_ANYCAST)) 2273 flags |= NDP_ORIDE; 2274 2275 return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_ADVERT, use_nd_lla, 2276 &nce->nce_addr, target, flags)); 2277 } 2278 2279 /* 2280 * Utility routine to send a solicitation. Assumes that the NCE cannot 2281 * go away (e.g., because it's refheld). 2282 */ 2283 static boolean_t 2284 nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *sender, 2285 uint_t flags) 2286 { 2287 if (flags & NDP_PROBE) 2288 sender = &ipv6_all_zeros; 2289 2290 return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, use_nd_lla, 2291 sender, &nce->nce_addr, flags)); 2292 } 2293 2294 /* 2295 * nce_xmit is called to form and transmit a ND solicitation or 2296 * advertisement ICMP packet. 2297 * 2298 * If the source address is unspecified and this isn't a probe (used for 2299 * duplicate address detection), an appropriate source address and link layer 2300 * address will be chosen here. The link layer address option is included if 2301 * the source is specified (i.e., all non-probe packets), and omitted (per the 2302 * specification) otherwise. 2303 * 2304 * It returns B_FALSE only if it does a successful put() to the 2305 * corresponding ill's ill_wq otherwise returns B_TRUE. 2306 */ 2307 static boolean_t 2308 nce_xmit(ill_t *ill, uint8_t type, boolean_t use_nd_lla, 2309 const in6_addr_t *sender, const in6_addr_t *target, int flag) 2310 { 2311 ill_t *hwaddr_ill; 2312 uint32_t len; 2313 icmp6_t *icmp6; 2314 mblk_t *mp; 2315 ip6_t *ip6h; 2316 nd_opt_hdr_t *opt; 2317 uint_t plen, maxplen; 2318 ip6i_t *ip6i; 2319 ipif_t *src_ipif = NULL; 2320 uint8_t *hw_addr; 2321 zoneid_t zoneid = GLOBAL_ZONEID; 2322 char buf[INET6_ADDRSTRLEN]; 2323 2324 ASSERT(!IS_IPMP(ill)); 2325 2326 /* 2327 * Check that the sender is actually a usable address on `ill', and if 2328 * so, track that as the src_ipif. If not, for solicitations, set the 2329 * sender to :: so that a new one will be picked below; for adverts, 2330 * drop the packet since we expect nce_xmit_advert() to always provide 2331 * a valid sender. 2332 */ 2333 if (!IN6_IS_ADDR_UNSPECIFIED(sender)) { 2334 if ((src_ipif = ip_ndp_lookup_addr_v6(sender, ill)) == NULL || 2335 !src_ipif->ipif_addr_ready) { 2336 if (src_ipif != NULL) { 2337 ipif_refrele(src_ipif); 2338 src_ipif = NULL; 2339 } 2340 if (type == ND_NEIGHBOR_ADVERT) { 2341 ip1dbg(("nce_xmit: No source ipif for src %s\n", 2342 inet_ntop(AF_INET6, sender, buf, 2343 sizeof (buf)))); 2344 return (B_TRUE); 2345 } 2346 sender = &ipv6_all_zeros; 2347 } 2348 } 2349 2350 /* 2351 * If we still have an unspecified source (sender) address and this 2352 * isn't a probe, select a source address from `ill'. 2353 */ 2354 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2355 ASSERT(type != ND_NEIGHBOR_ADVERT); 2356 /* 2357 * Pick a source address for this solicitation, but restrict 2358 * the selection to addresses assigned to the output 2359 * interface. We do this because the destination will create 2360 * a neighbor cache entry for the source address of this 2361 * packet, so the source address needs to be a valid neighbor. 2362 */ 2363 src_ipif = ipif_select_source_v6(ill, target, B_TRUE, 2364 IPV6_PREFER_SRC_DEFAULT, ALL_ZONES); 2365 if (src_ipif == NULL) { 2366 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2367 inet_ntop(AF_INET6, target, buf, sizeof (buf)))); 2368 return (B_TRUE); 2369 } 2370 sender = &src_ipif->ipif_v6src_addr; 2371 } 2372 2373 /* 2374 * We're either sending a probe or we have a source address. 2375 */ 2376 ASSERT((flag & NDP_PROBE) || src_ipif != NULL); 2377 2378 maxplen = roundup(sizeof (nd_opt_hdr_t) + ND_MAX_HDW_LEN, 8); 2379 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2380 maxplen; 2381 mp = allocb(len, BPRI_LO); 2382 if (mp == NULL) { 2383 if (src_ipif != NULL) 2384 ipif_refrele(src_ipif); 2385 return (B_TRUE); 2386 } 2387 bzero((char *)mp->b_rptr, len); 2388 mp->b_wptr = mp->b_rptr + len; 2389 2390 ip6i = (ip6i_t *)mp->b_rptr; 2391 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2392 ip6i->ip6i_nxt = IPPROTO_RAW; 2393 ip6i->ip6i_flags = IP6I_HOPLIMIT; 2394 if (flag & NDP_PROBE) 2395 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2396 2397 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2398 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2399 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2400 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2401 ip6h->ip6_hops = IPV6_MAX_HOPS; 2402 ip6h->ip6_src = *sender; 2403 ip6h->ip6_dst = *target; 2404 icmp6 = (icmp6_t *)&ip6h[1]; 2405 2406 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2407 sizeof (nd_neighbor_advert_t)); 2408 2409 if (type == ND_NEIGHBOR_SOLICIT) { 2410 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2411 2412 if (!(flag & NDP_PROBE)) 2413 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2414 ns->nd_ns_target = *target; 2415 if (!(flag & NDP_UNICAST)) { 2416 /* Form multicast address of the target */ 2417 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2418 ip6h->ip6_dst.s6_addr32[3] |= 2419 ns->nd_ns_target.s6_addr32[3]; 2420 } 2421 } else { 2422 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2423 2424 ASSERT(!(flag & NDP_PROBE)); 2425 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2426 na->nd_na_target = *sender; 2427 if (flag & NDP_ISROUTER) 2428 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2429 if (flag & NDP_SOLICITED) 2430 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2431 if (flag & NDP_ORIDE) 2432 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2433 } 2434 2435 hw_addr = NULL; 2436 if (!(flag & NDP_PROBE)) { 2437 /* 2438 * Use our source address to find the hardware address to put 2439 * in the packet, so that the hardware address and IP address 2440 * will match up -- even if that hardware address doesn't 2441 * match the ill we actually transmit the packet through. 2442 */ 2443 if (IS_IPMP(src_ipif->ipif_ill)) { 2444 hwaddr_ill = ipmp_ipif_hold_bound_ill(src_ipif); 2445 if (hwaddr_ill == NULL) { 2446 ip1dbg(("nce_xmit: no bound ill!\n")); 2447 ipif_refrele(src_ipif); 2448 freemsg(mp); 2449 return (B_TRUE); 2450 } 2451 } else { 2452 hwaddr_ill = src_ipif->ipif_ill; 2453 ill_refhold(hwaddr_ill); /* for symmetry */ 2454 } 2455 2456 plen = roundup(sizeof (nd_opt_hdr_t) + 2457 hwaddr_ill->ill_nd_lla_len, 8); 2458 2459 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2460 hwaddr_ill->ill_phys_addr; 2461 if (hw_addr != NULL) { 2462 /* Fill in link layer address and option len */ 2463 opt->nd_opt_len = (uint8_t)(plen / 8); 2464 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2465 } 2466 2467 ill_refrele(hwaddr_ill); 2468 } 2469 2470 if (hw_addr == NULL) 2471 plen = 0; 2472 2473 /* Fix up the length of the packet now that plen is known */ 2474 len -= (maxplen - plen); 2475 mp->b_wptr = mp->b_rptr + len; 2476 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2477 2478 icmp6->icmp6_type = type; 2479 icmp6->icmp6_code = 0; 2480 /* 2481 * Prepare for checksum by putting icmp length in the icmp 2482 * checksum field. The checksum is calculated in ip_wput_v6. 2483 */ 2484 icmp6->icmp6_cksum = ip6h->ip6_plen; 2485 2486 /* 2487 * Before we toss the src_ipif, look up the zoneid to pass to 2488 * ip_output_v6(). This is to ensure unicast ND_NEIGHBOR_ADVERT 2489 * packets to be routed correctly by IP (we cannot guarantee that the 2490 * global zone has an interface route to the destination). 2491 */ 2492 if (src_ipif != NULL) { 2493 if ((zoneid = src_ipif->ipif_zoneid) == ALL_ZONES) 2494 zoneid = GLOBAL_ZONEID; 2495 ipif_refrele(src_ipif); 2496 } 2497 2498 ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT); 2499 return (B_FALSE); 2500 } 2501 2502 /* 2503 * Make a link layer address (does not include the SAP) from an nce. 2504 * To form the link layer address, use the last four bytes of ipv6 2505 * address passed in and the fixed offset stored in nce. 2506 */ 2507 static void 2508 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2509 { 2510 uchar_t *mask, *to; 2511 ill_t *ill = nce->nce_ill; 2512 int len; 2513 2514 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2515 return; 2516 ASSERT(nce->nce_res_mp != NULL); 2517 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2518 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2519 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2520 ASSERT(addr != NULL); 2521 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2522 addrpos, ill->ill_nd_lla_len); 2523 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2524 IPV6_ADDR_LEN); 2525 mask = (uchar_t *)&nce->nce_extract_mask; 2526 mask += (IPV6_ADDR_LEN - len); 2527 addr += (IPV6_ADDR_LEN - len); 2528 to = addrpos + nce->nce_ll_extract_start; 2529 while (len-- > 0) 2530 *to++ |= *mask++ & *addr++; 2531 } 2532 2533 mblk_t * 2534 nce_udreq_alloc(ill_t *ill) 2535 { 2536 mblk_t *template_mp = NULL; 2537 dl_unitdata_req_t *dlur; 2538 int sap_length; 2539 2540 ASSERT(ill->ill_isv6); 2541 2542 sap_length = ill->ill_sap_length; 2543 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2544 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2545 if (template_mp == NULL) 2546 return (NULL); 2547 2548 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2549 dlur->dl_priority.dl_min = 0; 2550 dlur->dl_priority.dl_max = 0; 2551 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2552 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2553 2554 /* Copy in the SAP value. */ 2555 NCE_LL_SAP_COPY(ill, template_mp); 2556 2557 return (template_mp); 2558 } 2559 2560 /* 2561 * NDP retransmit timer. 2562 * This timer goes off when: 2563 * a. It is time to retransmit NS for resolver. 2564 * b. It is time to send reachability probes. 2565 */ 2566 void 2567 ndp_timer(void *arg) 2568 { 2569 nce_t *nce = arg; 2570 ill_t *ill = nce->nce_ill; 2571 uint32_t ms; 2572 char addrbuf[INET6_ADDRSTRLEN]; 2573 boolean_t dropped = B_FALSE; 2574 ip_stack_t *ipst = ill->ill_ipst; 2575 2576 /* 2577 * The timer has to be cancelled by ndp_delete before doing the final 2578 * refrele. So the NCE is guaranteed to exist when the timer runs 2579 * until it clears the timeout_id. Before clearing the timeout_id 2580 * bump up the refcnt so that we can continue to use the nce 2581 */ 2582 ASSERT(nce != NULL); 2583 2584 mutex_enter(&nce->nce_lock); 2585 NCE_REFHOLD_LOCKED(nce); 2586 nce->nce_timeout_id = 0; 2587 2588 /* 2589 * Check the reachability state first. 2590 */ 2591 switch (nce->nce_state) { 2592 case ND_DELAY: 2593 nce->nce_state = ND_PROBE; 2594 mutex_exit(&nce->nce_lock); 2595 (void) nce_xmit_solicit(nce, B_FALSE, &ipv6_all_zeros, 2596 NDP_UNICAST); 2597 if (ip_debug > 3) { 2598 /* ip2dbg */ 2599 pr_addr_dbg("ndp_timer: state for %s changed " 2600 "to PROBE\n", AF_INET6, &nce->nce_addr); 2601 } 2602 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2603 NCE_REFRELE(nce); 2604 return; 2605 case ND_PROBE: 2606 /* must be retransmit timer */ 2607 nce->nce_pcnt--; 2608 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2609 nce->nce_pcnt >= -1); 2610 if (nce->nce_pcnt > 0) { 2611 /* 2612 * As per RFC2461, the nce gets deleted after 2613 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2614 * Note that the first unicast solicitation is sent 2615 * during the DELAY state. 2616 */ 2617 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2618 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2619 addrbuf, sizeof (addrbuf)))); 2620 mutex_exit(&nce->nce_lock); 2621 dropped = nce_xmit_solicit(nce, B_FALSE, 2622 &ipv6_all_zeros, 2623 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2624 NDP_UNICAST); 2625 if (dropped) { 2626 mutex_enter(&nce->nce_lock); 2627 nce->nce_pcnt++; 2628 mutex_exit(&nce->nce_lock); 2629 } 2630 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2631 } else if (nce->nce_pcnt < 0) { 2632 /* No hope, delete the nce */ 2633 nce->nce_state = ND_UNREACHABLE; 2634 mutex_exit(&nce->nce_lock); 2635 if (ip_debug > 2) { 2636 /* ip1dbg */ 2637 pr_addr_dbg("ndp_timer: Delete IRE for" 2638 " dst %s\n", AF_INET6, &nce->nce_addr); 2639 } 2640 ndp_delete(nce); 2641 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2642 /* Wait RetransTimer, before deleting the entry */ 2643 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2644 nce->nce_pcnt, inet_ntop(AF_INET6, 2645 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2646 mutex_exit(&nce->nce_lock); 2647 /* Wait one interval before killing */ 2648 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2649 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2650 ipif_t *ipif; 2651 2652 /* 2653 * We're done probing, and we can now declare this 2654 * address to be usable. Let IP know that it's ok to 2655 * use. 2656 */ 2657 nce->nce_state = ND_REACHABLE; 2658 mutex_exit(&nce->nce_lock); 2659 ipif = ip_ndp_lookup_addr_v6(&nce->nce_addr, 2660 nce->nce_ill); 2661 if (ipif != NULL) { 2662 if (ipif->ipif_was_dup) { 2663 char ibuf[LIFNAMSIZ + 10]; 2664 char sbuf[INET6_ADDRSTRLEN]; 2665 2666 ipif->ipif_was_dup = B_FALSE; 2667 (void) inet_ntop(AF_INET6, 2668 &ipif->ipif_v6lcl_addr, 2669 sbuf, sizeof (sbuf)); 2670 ipif_get_name(ipif, ibuf, 2671 sizeof (ibuf)); 2672 cmn_err(CE_NOTE, "recovered address " 2673 "%s on %s", sbuf, ibuf); 2674 } 2675 if ((ipif->ipif_flags & IPIF_UP) && 2676 !ipif->ipif_addr_ready) 2677 ipif_up_notify(ipif); 2678 ipif->ipif_addr_ready = 1; 2679 ipif_refrele(ipif); 2680 } 2681 /* Begin defending our new address */ 2682 nce->nce_unsolicit_count = 0; 2683 dropped = nce_xmit_advert(nce, B_FALSE, 2684 &ipv6_all_hosts_mcast, 0); 2685 if (dropped) { 2686 nce->nce_unsolicit_count = 1; 2687 NDP_RESTART_TIMER(nce, 2688 ipst->ips_ip_ndp_unsolicit_interval); 2689 } else if (ipst->ips_ip_ndp_defense_interval != 0) { 2690 NDP_RESTART_TIMER(nce, 2691 ipst->ips_ip_ndp_defense_interval); 2692 } 2693 } else { 2694 /* 2695 * This is an address we're probing to be our own, but 2696 * the ill is down. Wait until it comes back before 2697 * doing anything, but switch to reachable state so 2698 * that the restart will work. 2699 */ 2700 nce->nce_state = ND_REACHABLE; 2701 mutex_exit(&nce->nce_lock); 2702 } 2703 NCE_REFRELE(nce); 2704 return; 2705 case ND_INCOMPLETE: { 2706 ip6_t *ip6h; 2707 ip6i_t *ip6i; 2708 mblk_t *mp, *datamp, *nextmp, **prevmpp; 2709 2710 /* 2711 * Per case (2) in the nce_queue_mp() comments, scan nce_qd_mp 2712 * for any IPMP probe packets, and toss 'em. IPMP probe 2713 * packets will always be at the head of nce_qd_mp and always 2714 * have an ip6i_t header, so we can stop at the first queued 2715 * ND packet without an ip6i_t. 2716 */ 2717 prevmpp = &nce->nce_qd_mp; 2718 for (mp = nce->nce_qd_mp; mp != NULL; mp = nextmp) { 2719 nextmp = mp->b_next; 2720 datamp = (DB_TYPE(mp) == M_CTL) ? mp->b_cont : mp; 2721 ip6h = (ip6_t *)datamp->b_rptr; 2722 if (ip6h->ip6_nxt != IPPROTO_RAW) 2723 break; 2724 2725 ip6i = (ip6i_t *)ip6h; 2726 if (ip6i->ip6i_flags & IP6I_IPMP_PROBE) { 2727 inet_freemsg(mp); 2728 *prevmpp = nextmp; 2729 } else { 2730 prevmpp = &mp->b_next; 2731 } 2732 } 2733 2734 /* 2735 * Must be resolver's retransmit timer. 2736 */ 2737 if (nce->nce_qd_mp != NULL) { 2738 if ((ms = nce_solicit(nce, NULL)) == 0) { 2739 if (nce->nce_state != ND_REACHABLE) { 2740 mutex_exit(&nce->nce_lock); 2741 nce_resolv_failed(nce); 2742 ndp_delete(nce); 2743 } else { 2744 mutex_exit(&nce->nce_lock); 2745 } 2746 } else { 2747 mutex_exit(&nce->nce_lock); 2748 NDP_RESTART_TIMER(nce, (clock_t)ms); 2749 } 2750 NCE_REFRELE(nce); 2751 return; 2752 } 2753 mutex_exit(&nce->nce_lock); 2754 NCE_REFRELE(nce); 2755 break; 2756 } 2757 case ND_REACHABLE: 2758 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2759 nce->nce_unsolicit_count != 0) || 2760 ((nce->nce_flags & NCE_F_PERMANENT) && 2761 ipst->ips_ip_ndp_defense_interval != 0)) { 2762 if (nce->nce_unsolicit_count > 0) 2763 nce->nce_unsolicit_count--; 2764 mutex_exit(&nce->nce_lock); 2765 dropped = nce_xmit_advert(nce, B_FALSE, 2766 &ipv6_all_hosts_mcast, 0); 2767 if (dropped) { 2768 mutex_enter(&nce->nce_lock); 2769 nce->nce_unsolicit_count++; 2770 mutex_exit(&nce->nce_lock); 2771 } 2772 if (nce->nce_unsolicit_count != 0) { 2773 NDP_RESTART_TIMER(nce, 2774 ipst->ips_ip_ndp_unsolicit_interval); 2775 } else { 2776 NDP_RESTART_TIMER(nce, 2777 ipst->ips_ip_ndp_defense_interval); 2778 } 2779 } else { 2780 mutex_exit(&nce->nce_lock); 2781 } 2782 NCE_REFRELE(nce); 2783 break; 2784 default: 2785 mutex_exit(&nce->nce_lock); 2786 NCE_REFRELE(nce); 2787 break; 2788 } 2789 } 2790 2791 /* 2792 * Set a link layer address from the ll_addr passed in. 2793 * Copy SAP from ill. 2794 */ 2795 static void 2796 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2797 { 2798 ill_t *ill = nce->nce_ill; 2799 uchar_t *woffset; 2800 2801 ASSERT(ll_addr != NULL); 2802 /* Always called before fast_path_probe */ 2803 ASSERT(nce->nce_fp_mp == NULL); 2804 if (ill->ill_sap_length != 0) { 2805 /* 2806 * Copy the SAP type specified in the 2807 * request into the xmit template. 2808 */ 2809 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2810 } 2811 if (ill->ill_phys_addr_length > 0) { 2812 /* 2813 * The bcopy() below used to be called for the physical address 2814 * length rather than the link layer address length. For 2815 * ethernet and many other media, the phys_addr and lla are 2816 * identical. 2817 * However, with xresolv interfaces being introduced, the 2818 * phys_addr and lla are no longer the same, and the physical 2819 * address may not have any useful meaning, so we use the lla 2820 * for IPv6 address resolution and destination addressing. 2821 * 2822 * For PPP or other interfaces with a zero length 2823 * physical address, don't do anything here. 2824 * The bcopy() with a zero phys_addr length was previously 2825 * a no-op for interfaces with a zero-length physical address. 2826 * Using the lla for them would change the way they operate. 2827 * Doing nothing in such cases preserves expected behavior. 2828 */ 2829 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2830 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2831 } 2832 } 2833 2834 static boolean_t 2835 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2836 { 2837 ill_t *ill = nce->nce_ill; 2838 uchar_t *ll_offset; 2839 2840 ASSERT(nce->nce_res_mp != NULL); 2841 if (ll_addr == NULL) 2842 return (B_FALSE); 2843 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2844 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2845 return (B_TRUE); 2846 return (B_FALSE); 2847 } 2848 2849 /* 2850 * Updates the link layer address or the reachability state of 2851 * a cache entry. Reset probe counter if needed. 2852 */ 2853 static void 2854 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2855 { 2856 ill_t *ill = nce->nce_ill; 2857 boolean_t need_stop_timer = B_FALSE; 2858 boolean_t need_fastpath_update = B_FALSE; 2859 2860 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2861 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2862 /* 2863 * If this interface does not do NUD, there is no point 2864 * in allowing an update to the cache entry. Although 2865 * we will respond to NS. 2866 * The only time we accept an update for a resolver when 2867 * NUD is turned off is when it has just been created. 2868 * Non-Resolvers will always be created as REACHABLE. 2869 */ 2870 if (new_state != ND_UNCHANGED) { 2871 if ((nce->nce_flags & NCE_F_NONUD) && 2872 (nce->nce_state != ND_INCOMPLETE)) 2873 return; 2874 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2875 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2876 need_stop_timer = B_TRUE; 2877 if (new_state == ND_REACHABLE) 2878 nce->nce_last = TICK_TO_MSEC(lbolt64); 2879 else { 2880 /* We force NUD in this case */ 2881 nce->nce_last = 0; 2882 } 2883 nce->nce_state = new_state; 2884 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2885 } 2886 /* 2887 * In case of fast path we need to free the the fastpath 2888 * M_DATA and do another probe. Otherwise we can just 2889 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2890 * whatever packets that happens to be transmitting at the time. 2891 */ 2892 if (new_ll_addr != NULL) { 2893 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2894 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2895 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2896 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2897 if (nce->nce_fp_mp != NULL) { 2898 freemsg(nce->nce_fp_mp); 2899 nce->nce_fp_mp = NULL; 2900 } 2901 need_fastpath_update = B_TRUE; 2902 } 2903 mutex_exit(&nce->nce_lock); 2904 if (need_stop_timer) { 2905 (void) untimeout(nce->nce_timeout_id); 2906 nce->nce_timeout_id = 0; 2907 } 2908 if (need_fastpath_update) 2909 nce_fastpath(nce); 2910 mutex_enter(&nce->nce_lock); 2911 } 2912 2913 void 2914 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2915 { 2916 uint_t count = 0; 2917 mblk_t **mpp, *tmp; 2918 2919 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2920 2921 for (mpp = &nce->nce_qd_mp; *mpp != NULL; mpp = &(*mpp)->b_next) { 2922 if (++count > nce->nce_ill->ill_max_buf) { 2923 tmp = nce->nce_qd_mp->b_next; 2924 nce->nce_qd_mp->b_next = NULL; 2925 nce->nce_qd_mp->b_prev = NULL; 2926 freemsg(nce->nce_qd_mp); 2927 nce->nce_qd_mp = tmp; 2928 } 2929 } 2930 2931 if (head_insert) { 2932 mp->b_next = nce->nce_qd_mp; 2933 nce->nce_qd_mp = mp; 2934 } else { 2935 *mpp = mp; 2936 } 2937 } 2938 2939 static void 2940 nce_queue_mp(nce_t *nce, mblk_t *mp) 2941 { 2942 boolean_t head_insert = B_FALSE; 2943 ip6_t *ip6h; 2944 ip6i_t *ip6i; 2945 mblk_t *data_mp; 2946 2947 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2948 2949 if (mp->b_datap->db_type == M_CTL) 2950 data_mp = mp->b_cont; 2951 else 2952 data_mp = mp; 2953 ip6h = (ip6_t *)data_mp->b_rptr; 2954 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2955 /* 2956 * This message should have been pulled up already in 2957 * ip_wput_v6. We can't do pullups here because the message 2958 * could be from the nce_qd_mp which could have b_next/b_prev 2959 * non-NULL. 2960 */ 2961 ip6i = (ip6i_t *)ip6h; 2962 ASSERT(MBLKL(data_mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN); 2963 2964 /* 2965 * If this packet is marked IP6I_IPMP_PROBE, then we need to: 2966 * 2967 * 1. Insert it at the head of the nce_qd_mp list. Consider 2968 * the normal (non-probe) load-speading case where the 2969 * source address of the ND packet is not tied to nce_ill. 2970 * If the ill bound to the source address cannot receive, 2971 * the response to the ND packet will not be received. 2972 * However, if ND packets for nce_ill's probes are queued 2973 * behind that ND packet, those probes will also fail to 2974 * be sent, and thus in.mpathd will erroneously conclude 2975 * that nce_ill has also failed. 2976 * 2977 * 2. Drop the probe packet in ndp_timer() if the ND did 2978 * not succeed on the first attempt. This ensures that 2979 * ND problems do not manifest as probe RTT spikes. 2980 */ 2981 if (ip6i->ip6i_flags & IP6I_IPMP_PROBE) 2982 head_insert = B_TRUE; 2983 } 2984 nce_queue_mp_common(nce, mp, head_insert); 2985 } 2986 2987 /* 2988 * Called when address resolution failed due to a timeout. 2989 * Send an ICMP unreachable in response to all queued packets. 2990 */ 2991 void 2992 nce_resolv_failed(nce_t *nce) 2993 { 2994 mblk_t *mp, *nxt_mp, *first_mp; 2995 char buf[INET6_ADDRSTRLEN]; 2996 ip6_t *ip6h; 2997 zoneid_t zoneid = GLOBAL_ZONEID; 2998 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 2999 3000 ip1dbg(("nce_resolv_failed: dst %s\n", 3001 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 3002 mutex_enter(&nce->nce_lock); 3003 mp = nce->nce_qd_mp; 3004 nce->nce_qd_mp = NULL; 3005 mutex_exit(&nce->nce_lock); 3006 while (mp != NULL) { 3007 nxt_mp = mp->b_next; 3008 mp->b_next = NULL; 3009 mp->b_prev = NULL; 3010 3011 first_mp = mp; 3012 if (mp->b_datap->db_type == M_CTL) { 3013 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 3014 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3015 zoneid = io->ipsec_out_zoneid; 3016 ASSERT(zoneid != ALL_ZONES); 3017 mp = mp->b_cont; 3018 mp->b_next = NULL; 3019 mp->b_prev = NULL; 3020 } 3021 3022 ip6h = (ip6_t *)mp->b_rptr; 3023 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3024 ip6i_t *ip6i; 3025 /* 3026 * This message should have been pulled up already 3027 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 3028 * the header is pulled up. 3029 */ 3030 ip6i = (ip6i_t *)ip6h; 3031 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 3032 sizeof (ip6i_t) + IPV6_HDR_LEN); 3033 mp->b_rptr += sizeof (ip6i_t); 3034 } 3035 /* 3036 * Ignore failure since icmp_unreachable_v6 will silently 3037 * drop packets with an unspecified source address. 3038 */ 3039 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst); 3040 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 3041 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst); 3042 mp = nxt_mp; 3043 } 3044 } 3045 3046 /* 3047 * Called by SIOCSNDP* ioctl to add/change an nce entry 3048 * and the corresponding attributes. 3049 * Disallow states other than ND_REACHABLE or ND_STALE. 3050 */ 3051 int 3052 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 3053 { 3054 sin6_t *sin6; 3055 in6_addr_t *addr; 3056 nce_t *nce; 3057 int err; 3058 uint16_t new_flags = 0; 3059 uint16_t old_flags = 0; 3060 int inflags = lnr->lnr_flags; 3061 ip_stack_t *ipst = ill->ill_ipst; 3062 3063 ASSERT(ill->ill_isv6); 3064 if ((lnr->lnr_state_create != ND_REACHABLE) && 3065 (lnr->lnr_state_create != ND_STALE)) 3066 return (EINVAL); 3067 3068 if (lnr->lnr_hdw_len > ND_MAX_HDW_LEN) 3069 return (EINVAL); 3070 3071 sin6 = (sin6_t *)&lnr->lnr_addr; 3072 addr = &sin6->sin6_addr; 3073 3074 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 3075 /* We know it can not be mapping so just look in the hash table */ 3076 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 3077 /* See comment in ndp_query() regarding IS_IPMP(ill) usage */ 3078 nce = nce_lookup_addr(ill, IS_IPMP(ill), addr, nce); 3079 if (nce != NULL) 3080 new_flags = nce->nce_flags; 3081 3082 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 3083 case NDF_ISROUTER_ON: 3084 new_flags |= NCE_F_ISROUTER; 3085 break; 3086 case NDF_ISROUTER_OFF: 3087 new_flags &= ~NCE_F_ISROUTER; 3088 break; 3089 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3090 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3091 if (nce != NULL) 3092 NCE_REFRELE(nce); 3093 return (EINVAL); 3094 } 3095 3096 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3097 case NDF_ANYCAST_ON: 3098 new_flags |= NCE_F_ANYCAST; 3099 break; 3100 case NDF_ANYCAST_OFF: 3101 new_flags &= ~NCE_F_ANYCAST; 3102 break; 3103 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3104 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3105 if (nce != NULL) 3106 NCE_REFRELE(nce); 3107 return (EINVAL); 3108 } 3109 3110 if (nce == NULL) { 3111 err = ndp_add_v6(ill, 3112 (uchar_t *)lnr->lnr_hdw_addr, 3113 addr, 3114 &ipv6_all_ones, 3115 &ipv6_all_zeros, 3116 0, 3117 new_flags, 3118 lnr->lnr_state_create, 3119 &nce); 3120 if (err != 0) { 3121 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3122 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3123 return (err); 3124 } 3125 } 3126 old_flags = nce->nce_flags; 3127 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3128 /* 3129 * Router turned to host, delete all ires. 3130 * XXX Just delete the entry, but we need to add too. 3131 */ 3132 nce->nce_flags &= ~NCE_F_ISROUTER; 3133 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3134 ndp_delete(nce); 3135 NCE_REFRELE(nce); 3136 return (0); 3137 } 3138 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3139 3140 mutex_enter(&nce->nce_lock); 3141 nce->nce_flags = new_flags; 3142 mutex_exit(&nce->nce_lock); 3143 /* 3144 * Note that we ignore the state at this point, which 3145 * should be either STALE or REACHABLE. Instead we let 3146 * the link layer address passed in to determine the state 3147 * much like incoming packets. 3148 */ 3149 nce_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3150 NCE_REFRELE(nce); 3151 return (0); 3152 } 3153 3154 /* 3155 * If the device driver supports it, we make nce_fp_mp to have 3156 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3157 * The caller ensures there is hold on nce for this function. 3158 * Note that since ill_fastpath_probe() copies the mblk there is 3159 * no need for the hold beyond this function. 3160 */ 3161 void 3162 nce_fastpath(nce_t *nce) 3163 { 3164 ill_t *ill = nce->nce_ill; 3165 int res; 3166 3167 ASSERT(ill != NULL); 3168 ASSERT(nce->nce_state != ND_INITIAL && nce->nce_state != ND_INCOMPLETE); 3169 3170 if (nce->nce_fp_mp != NULL) { 3171 /* Already contains fastpath info */ 3172 return; 3173 } 3174 if (nce->nce_res_mp != NULL) { 3175 nce_fastpath_list_add(nce); 3176 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3177 /* 3178 * EAGAIN is an indication of a transient error 3179 * i.e. allocation failure etc. leave the nce in the list it 3180 * will be updated when another probe happens for another ire 3181 * if not it will be taken out of the list when the ire is 3182 * deleted. 3183 */ 3184 3185 if (res != 0 && res != EAGAIN) 3186 nce_fastpath_list_delete(nce); 3187 } 3188 } 3189 3190 /* 3191 * Drain the list of nce's waiting for fastpath response. 3192 */ 3193 void 3194 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3195 void *arg) 3196 { 3197 3198 nce_t *next_nce; 3199 nce_t *current_nce; 3200 nce_t *first_nce; 3201 nce_t *prev_nce = NULL; 3202 3203 mutex_enter(&ill->ill_lock); 3204 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3205 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3206 next_nce = current_nce->nce_fastpath; 3207 /* 3208 * Take it off the list if we're flushing, or if the callback 3209 * routine tells us to do so. Otherwise, leave the nce in the 3210 * fastpath list to handle any pending response from the lower 3211 * layer. We can't drain the list when the callback routine 3212 * comparison failed, because the response is asynchronous in 3213 * nature, and may not arrive in the same order as the list 3214 * insertion. 3215 */ 3216 if (func == NULL || func(current_nce, arg)) { 3217 current_nce->nce_fastpath = NULL; 3218 if (current_nce == first_nce) 3219 ill->ill_fastpath_list = first_nce = next_nce; 3220 else 3221 prev_nce->nce_fastpath = next_nce; 3222 } else { 3223 /* previous element that is still in the list */ 3224 prev_nce = current_nce; 3225 } 3226 current_nce = next_nce; 3227 } 3228 mutex_exit(&ill->ill_lock); 3229 } 3230 3231 /* 3232 * Add nce to the nce fastpath list. 3233 */ 3234 void 3235 nce_fastpath_list_add(nce_t *nce) 3236 { 3237 ill_t *ill; 3238 3239 ill = nce->nce_ill; 3240 3241 mutex_enter(&ill->ill_lock); 3242 mutex_enter(&nce->nce_lock); 3243 3244 /* 3245 * if nce has not been deleted and 3246 * is not already in the list add it. 3247 */ 3248 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3249 (nce->nce_fastpath == NULL)) { 3250 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3251 ill->ill_fastpath_list = nce; 3252 } 3253 3254 mutex_exit(&nce->nce_lock); 3255 mutex_exit(&ill->ill_lock); 3256 } 3257 3258 /* 3259 * remove nce from the nce fastpath list. 3260 */ 3261 void 3262 nce_fastpath_list_delete(nce_t *nce) 3263 { 3264 nce_t *nce_ptr; 3265 3266 ill_t *ill; 3267 3268 ill = nce->nce_ill; 3269 ASSERT(ill != NULL); 3270 3271 mutex_enter(&ill->ill_lock); 3272 if (nce->nce_fastpath == NULL) 3273 goto done; 3274 3275 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3276 3277 if (ill->ill_fastpath_list == nce) { 3278 ill->ill_fastpath_list = nce->nce_fastpath; 3279 } else { 3280 nce_ptr = ill->ill_fastpath_list; 3281 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3282 if (nce_ptr->nce_fastpath == nce) { 3283 nce_ptr->nce_fastpath = nce->nce_fastpath; 3284 break; 3285 } 3286 nce_ptr = nce_ptr->nce_fastpath; 3287 } 3288 } 3289 3290 nce->nce_fastpath = NULL; 3291 done: 3292 mutex_exit(&ill->ill_lock); 3293 } 3294 3295 /* 3296 * Update all NCE's that are not in fastpath mode and 3297 * have an nce_fp_mp that matches mp. mp->b_cont contains 3298 * the fastpath header. 3299 * 3300 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3301 */ 3302 boolean_t 3303 ndp_fastpath_update(nce_t *nce, void *arg) 3304 { 3305 mblk_t *mp, *fp_mp; 3306 uchar_t *mp_rptr, *ud_mp_rptr; 3307 mblk_t *ud_mp = nce->nce_res_mp; 3308 ptrdiff_t cmplen; 3309 3310 if (nce->nce_flags & NCE_F_MAPPING) 3311 return (B_TRUE); 3312 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3313 return (B_TRUE); 3314 3315 ip2dbg(("ndp_fastpath_update: trying\n")); 3316 mp = (mblk_t *)arg; 3317 mp_rptr = mp->b_rptr; 3318 cmplen = mp->b_wptr - mp_rptr; 3319 ASSERT(cmplen >= 0); 3320 ud_mp_rptr = ud_mp->b_rptr; 3321 /* 3322 * The nce is locked here to prevent any other threads 3323 * from accessing and changing nce_res_mp when the IPv6 address 3324 * becomes resolved to an lla while we're in the middle 3325 * of looking at and comparing the hardware address (lla). 3326 * It is also locked to prevent multiple threads in nce_fastpath_update 3327 * from examining nce_res_mp atthe same time. 3328 */ 3329 mutex_enter(&nce->nce_lock); 3330 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3331 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3332 mutex_exit(&nce->nce_lock); 3333 /* 3334 * Don't take the ire off the fastpath list yet, 3335 * since the response may come later. 3336 */ 3337 return (B_FALSE); 3338 } 3339 /* Matched - install mp as the fastpath mp */ 3340 ip1dbg(("ndp_fastpath_update: match\n")); 3341 fp_mp = dupb(mp->b_cont); 3342 if (fp_mp != NULL) { 3343 nce->nce_fp_mp = fp_mp; 3344 } 3345 mutex_exit(&nce->nce_lock); 3346 return (B_TRUE); 3347 } 3348 3349 /* 3350 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3351 * driver. Note that it assumes IP is exclusive... 3352 */ 3353 /* ARGSUSED */ 3354 void 3355 ndp_fastpath_flush(nce_t *nce, char *arg) 3356 { 3357 if (nce->nce_flags & NCE_F_MAPPING) 3358 return; 3359 /* No fastpath info? */ 3360 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3361 return; 3362 3363 if (nce->nce_ipversion == IPV4_VERSION && 3364 nce->nce_flags & NCE_F_BCAST) { 3365 /* 3366 * IPv4 BROADCAST entries: 3367 * We can't delete the nce since it is difficult to 3368 * recreate these without going through the 3369 * ipif down/up dance. 3370 * 3371 * All access to nce->nce_fp_mp in the case of these 3372 * is protected by nce_lock. 3373 */ 3374 mutex_enter(&nce->nce_lock); 3375 if (nce->nce_fp_mp != NULL) { 3376 freeb(nce->nce_fp_mp); 3377 nce->nce_fp_mp = NULL; 3378 mutex_exit(&nce->nce_lock); 3379 nce_fastpath(nce); 3380 } else { 3381 mutex_exit(&nce->nce_lock); 3382 } 3383 } else { 3384 /* Just delete the NCE... */ 3385 ndp_delete(nce); 3386 } 3387 } 3388 3389 /* 3390 * Return a pointer to a given option in the packet. 3391 * Assumes that option part of the packet have already been validated. 3392 */ 3393 nd_opt_hdr_t * 3394 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3395 { 3396 while (optlen > 0) { 3397 if (opt->nd_opt_type == opt_type) 3398 return (opt); 3399 optlen -= 8 * opt->nd_opt_len; 3400 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3401 } 3402 return (NULL); 3403 } 3404 3405 /* 3406 * Verify all option lengths present are > 0, also check to see 3407 * if the option lengths and packet length are consistent. 3408 */ 3409 boolean_t 3410 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3411 { 3412 ASSERT(opt != NULL); 3413 while (optlen > 0) { 3414 if (opt->nd_opt_len == 0) 3415 return (B_FALSE); 3416 optlen -= 8 * opt->nd_opt_len; 3417 if (optlen < 0) 3418 return (B_FALSE); 3419 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3420 } 3421 return (B_TRUE); 3422 } 3423 3424 /* 3425 * ndp_walk function. 3426 * Free a fraction of the NCE cache entries. 3427 * A fraction of zero means to not free any in that category. 3428 */ 3429 void 3430 ndp_cache_reclaim(nce_t *nce, char *arg) 3431 { 3432 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3433 uint_t rand; 3434 3435 if (nce->nce_flags & NCE_F_PERMANENT) 3436 return; 3437 3438 rand = (uint_t)lbolt + 3439 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3440 if (ncr->ncr_host != 0 && 3441 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3442 ndp_delete(nce); 3443 return; 3444 } 3445 } 3446 3447 /* 3448 * ndp_walk function. 3449 * Count the number of NCEs that can be deleted. 3450 * These would be hosts but not routers. 3451 */ 3452 void 3453 ndp_cache_count(nce_t *nce, char *arg) 3454 { 3455 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3456 3457 if (nce->nce_flags & NCE_F_PERMANENT) 3458 return; 3459 3460 ncc->ncc_total++; 3461 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3462 ncc->ncc_host++; 3463 } 3464 3465 #ifdef DEBUG 3466 void 3467 nce_trace_ref(nce_t *nce) 3468 { 3469 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3470 3471 if (nce->nce_trace_disable) 3472 return; 3473 3474 if (!th_trace_ref(nce, nce->nce_ill->ill_ipst)) { 3475 nce->nce_trace_disable = B_TRUE; 3476 nce_trace_cleanup(nce); 3477 } 3478 } 3479 3480 void 3481 nce_untrace_ref(nce_t *nce) 3482 { 3483 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3484 3485 if (!nce->nce_trace_disable) 3486 th_trace_unref(nce); 3487 } 3488 3489 static void 3490 nce_trace_cleanup(const nce_t *nce) 3491 { 3492 th_trace_cleanup(nce, nce->nce_trace_disable); 3493 } 3494 #endif 3495 3496 /* 3497 * Called when address resolution fails due to a timeout. 3498 * Send an ICMP unreachable in response to all queued packets. 3499 */ 3500 void 3501 arp_resolv_failed(nce_t *nce) 3502 { 3503 mblk_t *mp, *nxt_mp, *first_mp; 3504 char buf[INET6_ADDRSTRLEN]; 3505 zoneid_t zoneid = GLOBAL_ZONEID; 3506 struct in_addr ipv4addr; 3507 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3508 3509 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3510 ip3dbg(("arp_resolv_failed: dst %s\n", 3511 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3512 mutex_enter(&nce->nce_lock); 3513 mp = nce->nce_qd_mp; 3514 nce->nce_qd_mp = NULL; 3515 mutex_exit(&nce->nce_lock); 3516 3517 while (mp != NULL) { 3518 nxt_mp = mp->b_next; 3519 mp->b_next = NULL; 3520 mp->b_prev = NULL; 3521 3522 first_mp = mp; 3523 /* 3524 * Send icmp unreachable messages 3525 * to the hosts. 3526 */ 3527 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst); 3528 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3529 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3530 ICMP_HOST_UNREACHABLE, zoneid, ipst); 3531 mp = nxt_mp; 3532 } 3533 } 3534 3535 int 3536 ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3537 nce_t **newnce, nce_t *src_nce) 3538 { 3539 int err; 3540 nce_t *nce; 3541 in6_addr_t addr6; 3542 ip_stack_t *ipst = ill->ill_ipst; 3543 3544 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3545 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3546 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3547 /* 3548 * NOTE: IPv4 never matches across the illgrp since the NCE's we're 3549 * looking up have fastpath headers that are inherently per-ill. 3550 */ 3551 nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce); 3552 if (nce == NULL) { 3553 err = ndp_add_v4(ill, addr, flags, newnce, src_nce); 3554 } else { 3555 *newnce = nce; 3556 err = EEXIST; 3557 } 3558 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3559 return (err); 3560 } 3561 3562 /* 3563 * NDP Cache Entry creation routine for IPv4. 3564 * Mapped entries are handled in arp. 3565 * This routine must always be called with ndp4->ndp_g_lock held. 3566 * Prior to return, nce_refcnt is incremented. 3567 */ 3568 static int 3569 ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3570 nce_t **newnce, nce_t *src_nce) 3571 { 3572 static nce_t nce_nil; 3573 nce_t *nce; 3574 mblk_t *mp; 3575 mblk_t *template = NULL; 3576 nce_t **ncep; 3577 ip_stack_t *ipst = ill->ill_ipst; 3578 uint16_t state = ND_INITIAL; 3579 int err; 3580 3581 ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock)); 3582 ASSERT(!ill->ill_isv6); 3583 ASSERT((flags & NCE_F_MAPPING) == 0); 3584 3585 if (ill->ill_resolver_mp == NULL) 3586 return (EINVAL); 3587 /* 3588 * Allocate the mblk to hold the nce. 3589 */ 3590 mp = allocb(sizeof (nce_t), BPRI_MED); 3591 if (mp == NULL) 3592 return (ENOMEM); 3593 3594 nce = (nce_t *)mp->b_rptr; 3595 mp->b_wptr = (uchar_t *)&nce[1]; 3596 *nce = nce_nil; 3597 nce->nce_ill = ill; 3598 nce->nce_ipversion = IPV4_VERSION; 3599 nce->nce_flags = flags; 3600 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3601 nce->nce_rcnt = ill->ill_xmit_count; 3602 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3603 nce->nce_mask = ipv6_all_ones; 3604 nce->nce_extract_mask = ipv6_all_zeros; 3605 nce->nce_ll_extract_start = 0; 3606 nce->nce_qd_mp = NULL; 3607 nce->nce_mp = mp; 3608 /* This one is for nce getting created */ 3609 nce->nce_refcnt = 1; 3610 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3611 ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3612 3613 nce->nce_trace_disable = B_FALSE; 3614 3615 if (src_nce != NULL) { 3616 /* 3617 * src_nce has been provided by the caller. The only 3618 * caller who provides a non-null, non-broadcast 3619 * src_nce is from ip_newroute() which must pass in 3620 * a ND_REACHABLE src_nce (this condition is verified 3621 * via an ASSERT for the save_ire->ire_nce in ip_newroute()) 3622 */ 3623 mutex_enter(&src_nce->nce_lock); 3624 state = src_nce->nce_state; 3625 if ((src_nce->nce_flags & NCE_F_CONDEMNED) || 3626 (ipst->ips_ndp4->ndp_g_hw_change > 0)) { 3627 /* 3628 * src_nce has been deleted, or 3629 * ip_arp_news is in the middle of 3630 * flushing entries in the the nce. 3631 * Fail the add, since we don't know 3632 * if it is safe to copy the contents of 3633 * src_nce 3634 */ 3635 DTRACE_PROBE2(nce__bad__src__nce, 3636 nce_t *, src_nce, ill_t *, ill); 3637 mutex_exit(&src_nce->nce_lock); 3638 err = EINVAL; 3639 goto err_ret; 3640 } 3641 template = copyb(src_nce->nce_res_mp); 3642 mutex_exit(&src_nce->nce_lock); 3643 if (template == NULL) { 3644 err = ENOMEM; 3645 goto err_ret; 3646 } 3647 } else if (flags & NCE_F_BCAST) { 3648 /* 3649 * broadcast nce. 3650 */ 3651 template = copyb(ill->ill_bcast_mp); 3652 if (template == NULL) { 3653 err = ENOMEM; 3654 goto err_ret; 3655 } 3656 state = ND_REACHABLE; 3657 } else if (ill->ill_net_type == IRE_IF_NORESOLVER) { 3658 /* 3659 * NORESOLVER entries are always created in the REACHABLE 3660 * state. We create a nce_res_mp with the IP nexthop address 3661 * in the destination address in the DLPI hdr if the 3662 * physical length is exactly 4 bytes. 3663 * 3664 * XXX not clear which drivers set ill_phys_addr_length to 3665 * IP_ADDR_LEN. 3666 */ 3667 if (ill->ill_phys_addr_length == IP_ADDR_LEN) { 3668 template = ill_dlur_gen((uchar_t *)addr, 3669 ill->ill_phys_addr_length, 3670 ill->ill_sap, ill->ill_sap_length); 3671 } else { 3672 template = copyb(ill->ill_resolver_mp); 3673 } 3674 if (template == NULL) { 3675 err = ENOMEM; 3676 goto err_ret; 3677 } 3678 state = ND_REACHABLE; 3679 } 3680 nce->nce_fp_mp = NULL; 3681 nce->nce_res_mp = template; 3682 nce->nce_state = state; 3683 if (state == ND_REACHABLE) { 3684 nce->nce_last = TICK_TO_MSEC(lbolt64); 3685 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3686 } else { 3687 nce->nce_last = 0; 3688 if (state == ND_INITIAL) 3689 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3690 } 3691 3692 ASSERT((nce->nce_res_mp == NULL && nce->nce_state == ND_INITIAL) || 3693 (nce->nce_res_mp != NULL && nce->nce_state == ND_REACHABLE)); 3694 /* 3695 * Atomically ensure that the ill is not CONDEMNED, before 3696 * adding the NCE. 3697 */ 3698 mutex_enter(&ill->ill_lock); 3699 if (ill->ill_state_flags & ILL_CONDEMNED) { 3700 mutex_exit(&ill->ill_lock); 3701 err = EINVAL; 3702 goto err_ret; 3703 } 3704 if ((nce->nce_next = *ncep) != NULL) 3705 nce->nce_next->nce_ptpn = &nce->nce_next; 3706 *ncep = nce; 3707 nce->nce_ptpn = ncep; 3708 *newnce = nce; 3709 /* This one is for nce being used by an active thread */ 3710 NCE_REFHOLD(*newnce); 3711 3712 /* Bump up the number of nce's referencing this ill */ 3713 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 3714 (char *), "nce", (void *), nce); 3715 ill->ill_nce_cnt++; 3716 mutex_exit(&ill->ill_lock); 3717 DTRACE_PROBE1(ndp__add__v4, nce_t *, nce); 3718 return (0); 3719 err_ret: 3720 freeb(mp); 3721 freemsg(template); 3722 return (err); 3723 } 3724 3725 /* 3726 * ndp_walk routine to delete all entries that have a given destination or 3727 * gateway address and cached link layer (MAC) address. This is used when ARP 3728 * informs us that a network-to-link-layer mapping may have changed. 3729 */ 3730 void 3731 nce_delete_hw_changed(nce_t *nce, void *arg) 3732 { 3733 nce_hw_map_t *hwm = arg; 3734 mblk_t *mp; 3735 dl_unitdata_req_t *dlu; 3736 uchar_t *macaddr; 3737 ill_t *ill; 3738 int saplen; 3739 ipaddr_t nce_addr; 3740 3741 if (nce->nce_state != ND_REACHABLE) 3742 return; 3743 3744 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3745 if (nce_addr != hwm->hwm_addr) 3746 return; 3747 3748 mutex_enter(&nce->nce_lock); 3749 if ((mp = nce->nce_res_mp) == NULL) { 3750 mutex_exit(&nce->nce_lock); 3751 return; 3752 } 3753 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3754 macaddr = (uchar_t *)(dlu + 1); 3755 ill = nce->nce_ill; 3756 if ((saplen = ill->ill_sap_length) > 0) 3757 macaddr += saplen; 3758 else 3759 saplen = -saplen; 3760 3761 /* 3762 * If the hardware address is unchanged, then leave this one alone. 3763 * Note that saplen == abs(saplen) now. 3764 */ 3765 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 3766 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 3767 mutex_exit(&nce->nce_lock); 3768 return; 3769 } 3770 mutex_exit(&nce->nce_lock); 3771 3772 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 3773 ndp_delete(nce); 3774 } 3775 3776 /* 3777 * This function verifies whether a given IPv4 address is potentially known to 3778 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 3779 * so that it can continue to look for hardware changes on that address. 3780 */ 3781 boolean_t 3782 ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns) 3783 { 3784 nce_t *nce; 3785 struct in_addr nceaddr; 3786 ip_stack_t *ipst = ns->netstack_ip; 3787 3788 if (addr == INADDR_ANY) 3789 return (B_FALSE); 3790 3791 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3792 nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr); 3793 for (; nce != NULL; nce = nce->nce_next) { 3794 /* Note that only v4 mapped entries are in the table. */ 3795 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 3796 if (addr == nceaddr.s_addr && 3797 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 3798 /* Single flag check; no lock needed */ 3799 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 3800 break; 3801 } 3802 } 3803 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3804 return (nce != NULL); 3805 } 3806 3807 /* 3808 * Wrapper around ipif_lookup_addr_exact_v6() that allows ND to work properly 3809 * with IPMP. Specifically, since neighbor discovery is always done on 3810 * underlying interfaces (even for addresses owned by an IPMP interface), we 3811 * need to check for `v6addrp' on both `ill' and on the IPMP meta-interface 3812 * associated with `ill' (if it exists). 3813 */ 3814 static ipif_t * 3815 ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill) 3816 { 3817 ipif_t *ipif; 3818 ip_stack_t *ipst = ill->ill_ipst; 3819 3820 ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst); 3821 if (ipif == NULL && IS_UNDER_IPMP(ill)) { 3822 if ((ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) { 3823 ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst); 3824 ill_refrele(ill); 3825 } 3826 } 3827 return (ipif); 3828 } 3829