1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/sysmacros.h> 33 #include <sys/errno.h> 34 #include <sys/dlpi.h> 35 #include <sys/socket.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/vtrace.h> 41 #include <sys/kmem.h> 42 #include <sys/zone.h> 43 #include <sys/ethernet.h> 44 #include <sys/sdt.h> 45 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/if_dl.h> 49 #include <net/route.h> 50 #include <netinet/in.h> 51 #include <netinet/ip6.h> 52 #include <netinet/icmp6.h> 53 54 #include <inet/common.h> 55 #include <inet/mi.h> 56 #include <inet/mib2.h> 57 #include <inet/nd.h> 58 #include <inet/ip.h> 59 #include <inet/ip_impl.h> 60 #include <inet/ipclassifier.h> 61 #include <inet/ip_if.h> 62 #include <inet/ip_ire.h> 63 #include <inet/ip_rts.h> 64 #include <inet/ip6.h> 65 #include <inet/ip_ndp.h> 66 #include <inet/ipsec_impl.h> 67 #include <inet/ipsec_info.h> 68 #include <inet/sctp_ip.h> 69 70 /* 71 * Function names with nce_ prefix are static while function 72 * names with ndp_ prefix are used by rest of the IP. 73 * 74 * Lock ordering: 75 * 76 * ndp_g_lock -> ill_lock -> nce_lock 77 * 78 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 79 * nce_next. Nce_lock protects the contents of the NCE (particularly 80 * nce_refcnt). 81 */ 82 83 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 84 uint32_t ll_addr_len); 85 static void nce_ire_delete(nce_t *nce); 86 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 87 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 88 static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *, nce_t *); 89 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 90 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 91 uchar_t *addr); 92 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 93 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 94 static void nce_report1(nce_t *nce, uchar_t *mp_arg); 95 static mblk_t *nce_udreq_alloc(ill_t *ill); 96 static void nce_update(nce_t *nce, uint16_t new_state, 97 uchar_t *new_ll_addr); 98 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 99 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 100 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 101 const in6_addr_t *target, int flag); 102 static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t, 103 nce_t **, nce_t *); 104 105 /* 106 * We track the time of creation of the nce in the nce_init_time field 107 * of IPv4 nce_t entries. If an nce is stuck in the ND_INITIAL state for 108 * more than NCE_STUCK_TIMEOUT milliseconds, trigger the nce-stuck dtrace 109 * probe to assist in debugging. This probe is fired from from nce_report1() 110 * when 'ndd -get /dev/ip ip_ndp_cache_report' is invoked. 111 */ 112 #define NCE_STUCK_TIMEOUT 120000 113 114 #ifdef DEBUG 115 static void nce_trace_cleanup(const nce_t *); 116 #endif 117 118 #define NCE_HASH_PTR_V4(ipst, addr) \ 119 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 120 121 #define NCE_HASH_PTR_V6(ipst, addr) \ 122 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ 123 NCE_TABLE_SIZE)])) 124 125 /* 126 * Compute default flags to use for an advertisement of this nce's address. 127 */ 128 static int 129 nce_advert_flags(const nce_t *nce) 130 { 131 int flag = 0; 132 133 if (nce->nce_flags & NCE_F_ISROUTER) 134 flag |= NDP_ISROUTER; 135 return (flag); 136 } 137 138 /* Non-tunable probe interval, based on link capabilities */ 139 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 140 141 /* 142 * NDP Cache Entry creation routine. 143 * Mapped entries will never do NUD . 144 * This routine must always be called with ndp6->ndp_g_lock held. 145 * Prior to return, nce_refcnt is incremented. 146 */ 147 int 148 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 149 const in6_addr_t *mask, const in6_addr_t *extract_mask, 150 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 151 nce_t **newnce) 152 { 153 static nce_t nce_nil; 154 nce_t *nce; 155 mblk_t *mp; 156 mblk_t *template; 157 nce_t **ncep; 158 int err; 159 boolean_t dropped = B_FALSE; 160 ip_stack_t *ipst = ill->ill_ipst; 161 162 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 163 ASSERT(ill != NULL && ill->ill_isv6); 164 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 165 ip0dbg(("ndp_add_v6: no addr\n")); 166 return (EINVAL); 167 } 168 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 169 ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags)); 170 return (EINVAL); 171 } 172 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 173 (flags & NCE_F_MAPPING)) { 174 ip0dbg(("ndp_add_v6: extract mask zero for mapping")); 175 return (EINVAL); 176 } 177 /* 178 * Allocate the mblk to hold the nce. 179 * 180 * XXX This can come out of a separate cache - nce_cache. 181 * We don't need the mp anymore as there are no more 182 * "qwriter"s 183 */ 184 mp = allocb(sizeof (nce_t), BPRI_MED); 185 if (mp == NULL) 186 return (ENOMEM); 187 188 nce = (nce_t *)mp->b_rptr; 189 mp->b_wptr = (uchar_t *)&nce[1]; 190 *nce = nce_nil; 191 192 /* 193 * This one holds link layer address 194 */ 195 if (ill->ill_net_type == IRE_IF_RESOLVER) { 196 template = nce_udreq_alloc(ill); 197 } else { 198 if (ill->ill_resolver_mp == NULL) { 199 freeb(mp); 200 return (EINVAL); 201 } 202 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 203 template = copyb(ill->ill_resolver_mp); 204 } 205 if (template == NULL) { 206 freeb(mp); 207 return (ENOMEM); 208 } 209 nce->nce_ill = ill; 210 nce->nce_ipversion = IPV6_VERSION; 211 nce->nce_flags = flags; 212 nce->nce_state = state; 213 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 214 nce->nce_rcnt = ill->ill_xmit_count; 215 nce->nce_addr = *addr; 216 nce->nce_mask = *mask; 217 nce->nce_extract_mask = *extract_mask; 218 nce->nce_ll_extract_start = hw_extract_start; 219 nce->nce_fp_mp = NULL; 220 nce->nce_res_mp = template; 221 if (state == ND_REACHABLE) 222 nce->nce_last = TICK_TO_MSEC(lbolt64); 223 else 224 nce->nce_last = 0; 225 nce->nce_qd_mp = NULL; 226 nce->nce_mp = mp; 227 if (hw_addr != NULL) 228 nce_set_ll(nce, hw_addr); 229 /* This one is for nce getting created */ 230 nce->nce_refcnt = 1; 231 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 232 if (nce->nce_flags & NCE_F_MAPPING) { 233 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 234 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 235 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 236 ncep = &ipst->ips_ndp6->nce_mask_entries; 237 } else { 238 ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 239 } 240 241 nce->nce_trace_disable = B_FALSE; 242 243 /* 244 * Atomically ensure that the ill is not CONDEMNED, before 245 * adding the NCE. 246 */ 247 mutex_enter(&ill->ill_lock); 248 if (ill->ill_state_flags & ILL_CONDEMNED) { 249 mutex_exit(&ill->ill_lock); 250 freeb(mp); 251 freeb(template); 252 return (EINVAL); 253 } 254 if ((nce->nce_next = *ncep) != NULL) 255 nce->nce_next->nce_ptpn = &nce->nce_next; 256 *ncep = nce; 257 nce->nce_ptpn = ncep; 258 *newnce = nce; 259 /* This one is for nce being used by an active thread */ 260 NCE_REFHOLD(*newnce); 261 262 /* Bump up the number of nce's referencing this ill */ 263 ill->ill_nce_cnt++; 264 mutex_exit(&ill->ill_lock); 265 266 err = 0; 267 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 268 mutex_enter(&nce->nce_lock); 269 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 270 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 271 mutex_exit(&nce->nce_lock); 272 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 273 &ipv6_all_zeros, addr, NDP_PROBE); 274 if (dropped) { 275 mutex_enter(&nce->nce_lock); 276 nce->nce_pcnt++; 277 mutex_exit(&nce->nce_lock); 278 } 279 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 280 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 281 err = EINPROGRESS; 282 } else if (flags & NCE_F_UNSOL_ADV) { 283 /* 284 * We account for the transmit below by assigning one 285 * less than the ndd variable. Subsequent decrements 286 * are done in ndp_timer. 287 */ 288 mutex_enter(&nce->nce_lock); 289 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 290 nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1; 291 mutex_exit(&nce->nce_lock); 292 dropped = nce_xmit(ill, 293 ND_NEIGHBOR_ADVERT, 294 ill, /* ill to be used for extracting ill_nd_lla */ 295 B_TRUE, /* use ill_nd_lla */ 296 addr, /* Source and target of the advertisement pkt */ 297 &ipv6_all_hosts_mcast, /* Destination of the packet */ 298 nce_advert_flags(nce)); 299 mutex_enter(&nce->nce_lock); 300 if (dropped) 301 nce->nce_unsolicit_count++; 302 if (nce->nce_unsolicit_count != 0) { 303 nce->nce_timeout_id = timeout(ndp_timer, nce, 304 MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval)); 305 } 306 mutex_exit(&nce->nce_lock); 307 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 308 } 309 /* 310 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 311 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 312 * We call nce_fastpath from nce_update if the link layer address of 313 * the peer changes from nce_update 314 */ 315 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 316 nce_fastpath(nce); 317 return (err); 318 } 319 320 int 321 ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 322 const in6_addr_t *mask, const in6_addr_t *extract_mask, 323 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 324 nce_t **newnce) 325 { 326 int err = 0; 327 nce_t *nce; 328 ip_stack_t *ipst = ill->ill_ipst; 329 330 ASSERT(ill->ill_isv6); 331 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 332 333 /* Get head of v6 hash table */ 334 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 335 nce = nce_lookup_addr(ill, addr, nce); 336 if (nce == NULL) { 337 err = ndp_add_v6(ill, 338 hw_addr, 339 addr, 340 mask, 341 extract_mask, 342 hw_extract_start, 343 flags, 344 state, 345 newnce); 346 } else { 347 *newnce = nce; 348 err = EEXIST; 349 } 350 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 351 return (err); 352 } 353 354 /* 355 * Remove all the CONDEMNED nces from the appropriate hash table. 356 * We create a private list of NCEs, these may have ires pointing 357 * to them, so the list will be passed through to clean up dependent 358 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 359 */ 360 static void 361 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 362 { 363 nce_t *nce1; 364 nce_t **ptpn; 365 366 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 367 ASSERT(ndp->ndp_g_walker == 0); 368 for (; nce; nce = nce1) { 369 nce1 = nce->nce_next; 370 mutex_enter(&nce->nce_lock); 371 if (nce->nce_flags & NCE_F_CONDEMNED) { 372 ptpn = nce->nce_ptpn; 373 nce1 = nce->nce_next; 374 if (nce1 != NULL) 375 nce1->nce_ptpn = ptpn; 376 *ptpn = nce1; 377 nce->nce_ptpn = NULL; 378 nce->nce_next = NULL; 379 nce->nce_next = *free_nce_list; 380 *free_nce_list = nce; 381 } 382 mutex_exit(&nce->nce_lock); 383 } 384 } 385 386 /* 387 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 388 * will return this NCE. Also no new IREs will be created that 389 * point to this NCE (See ire_add_v6). Also no new timeouts will 390 * be started (See NDP_RESTART_TIMER). 391 * 2. Cancel any currently running timeouts. 392 * 3. If there is an ndp walker, return. The walker will do the cleanup. 393 * This ensures that walkers see a consistent list of NCEs while walking. 394 * 4. Otherwise remove the NCE from the list of NCEs 395 * 5. Delete all IREs pointing to this NCE. 396 */ 397 void 398 ndp_delete(nce_t *nce) 399 { 400 nce_t **ptpn; 401 nce_t *nce1; 402 int ipversion = nce->nce_ipversion; 403 ndp_g_t *ndp; 404 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 405 406 if (ipversion == IPV4_VERSION) 407 ndp = ipst->ips_ndp4; 408 else 409 ndp = ipst->ips_ndp6; 410 411 /* Serialize deletes */ 412 mutex_enter(&nce->nce_lock); 413 if (nce->nce_flags & NCE_F_CONDEMNED) { 414 /* Some other thread is doing the delete */ 415 mutex_exit(&nce->nce_lock); 416 return; 417 } 418 /* 419 * Caller has a refhold. Also 1 ref for being in the list. Thus 420 * refcnt has to be >= 2 421 */ 422 ASSERT(nce->nce_refcnt >= 2); 423 nce->nce_flags |= NCE_F_CONDEMNED; 424 mutex_exit(&nce->nce_lock); 425 426 nce_fastpath_list_delete(nce); 427 428 /* 429 * Cancel any running timer. Timeout can't be restarted 430 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 431 * Passing invalid timeout id is fine. 432 */ 433 if (nce->nce_timeout_id != 0) { 434 (void) untimeout(nce->nce_timeout_id); 435 nce->nce_timeout_id = 0; 436 } 437 438 mutex_enter(&ndp->ndp_g_lock); 439 if (nce->nce_ptpn == NULL) { 440 /* 441 * The last ndp walker has already removed this nce from 442 * the list after we marked the nce CONDEMNED and before 443 * we grabbed the global lock. 444 */ 445 mutex_exit(&ndp->ndp_g_lock); 446 return; 447 } 448 if (ndp->ndp_g_walker > 0) { 449 /* 450 * Can't unlink. The walker will clean up 451 */ 452 ndp->ndp_g_walker_cleanup = B_TRUE; 453 mutex_exit(&ndp->ndp_g_lock); 454 return; 455 } 456 457 /* 458 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 459 * the timer since it is marked CONDEMNED. 460 */ 461 ptpn = nce->nce_ptpn; 462 nce1 = nce->nce_next; 463 if (nce1 != NULL) 464 nce1->nce_ptpn = ptpn; 465 *ptpn = nce1; 466 nce->nce_ptpn = NULL; 467 nce->nce_next = NULL; 468 mutex_exit(&ndp->ndp_g_lock); 469 470 nce_ire_delete(nce); 471 } 472 473 void 474 ndp_inactive(nce_t *nce) 475 { 476 mblk_t **mpp; 477 ill_t *ill; 478 479 ASSERT(nce->nce_refcnt == 0); 480 ASSERT(MUTEX_HELD(&nce->nce_lock)); 481 ASSERT(nce->nce_fastpath == NULL); 482 483 /* Free all nce allocated messages */ 484 mpp = &nce->nce_first_mp_to_free; 485 do { 486 while (*mpp != NULL) { 487 mblk_t *mp; 488 489 mp = *mpp; 490 *mpp = mp->b_next; 491 492 inet_freemsg(mp); 493 } 494 } while (mpp++ != &nce->nce_last_mp_to_free); 495 496 #ifdef DEBUG 497 nce_trace_cleanup(nce); 498 #endif 499 500 ill = nce->nce_ill; 501 mutex_enter(&ill->ill_lock); 502 ill->ill_nce_cnt--; 503 /* 504 * If the number of nce's associated with this ill have dropped 505 * to zero, check whether we need to restart any operation that 506 * is waiting for this to happen. 507 */ 508 if (ill->ill_nce_cnt == 0) { 509 /* ipif_ill_refrele_tail drops the ill_lock */ 510 ipif_ill_refrele_tail(ill); 511 } else { 512 mutex_exit(&ill->ill_lock); 513 } 514 mutex_destroy(&nce->nce_lock); 515 if (nce->nce_mp != NULL) 516 inet_freemsg(nce->nce_mp); 517 } 518 519 /* 520 * ndp_walk routine. Delete the nce if it is associated with the ill 521 * that is going away. Always called as a writer. 522 */ 523 void 524 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 525 { 526 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 527 ndp_delete(nce); 528 } 529 } 530 531 /* 532 * Walk a list of to be inactive NCEs and blow away all the ires. 533 */ 534 static void 535 nce_ire_delete_list(nce_t *nce) 536 { 537 nce_t *nce_next; 538 539 ASSERT(nce != NULL); 540 while (nce != NULL) { 541 nce_next = nce->nce_next; 542 nce->nce_next = NULL; 543 544 /* 545 * It is possible for the last ndp walker (this thread) 546 * to come here after ndp_delete has marked the nce CONDEMNED 547 * and before it has removed the nce from the fastpath list 548 * or called untimeout. So we need to do it here. It is safe 549 * for both ndp_delete and this thread to do it twice or 550 * even simultaneously since each of the threads has a 551 * reference on the nce. 552 */ 553 nce_fastpath_list_delete(nce); 554 /* 555 * Cancel any running timer. Timeout can't be restarted 556 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 557 * Passing invalid timeout id is fine. 558 */ 559 if (nce->nce_timeout_id != 0) { 560 (void) untimeout(nce->nce_timeout_id); 561 nce->nce_timeout_id = 0; 562 } 563 /* 564 * We might hit this func thus in the v4 case: 565 * ipif_down->ipif_ndp_down->ndp_walk 566 */ 567 568 if (nce->nce_ipversion == IPV4_VERSION) { 569 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 570 IRE_CACHE, nce_ire_delete1, 571 (char *)nce, nce->nce_ill); 572 } else { 573 ASSERT(nce->nce_ipversion == IPV6_VERSION); 574 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 575 IRE_CACHE, nce_ire_delete1, 576 (char *)nce, nce->nce_ill); 577 } 578 NCE_REFRELE_NOTR(nce); 579 nce = nce_next; 580 } 581 } 582 583 /* 584 * Delete an ire when the nce goes away. 585 */ 586 /* ARGSUSED */ 587 static void 588 nce_ire_delete(nce_t *nce) 589 { 590 if (nce->nce_ipversion == IPV6_VERSION) { 591 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 592 nce_ire_delete1, (char *)nce, nce->nce_ill); 593 NCE_REFRELE_NOTR(nce); 594 } else { 595 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 596 nce_ire_delete1, (char *)nce, nce->nce_ill); 597 NCE_REFRELE_NOTR(nce); 598 } 599 } 600 601 /* 602 * ire_walk routine used to delete every IRE that shares this nce 603 */ 604 static void 605 nce_ire_delete1(ire_t *ire, char *nce_arg) 606 { 607 nce_t *nce = (nce_t *)nce_arg; 608 609 ASSERT(ire->ire_type == IRE_CACHE); 610 611 if (ire->ire_nce == nce) { 612 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 613 ire_delete(ire); 614 } 615 } 616 617 /* 618 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 619 */ 620 boolean_t 621 ndp_restart_dad(nce_t *nce) 622 { 623 boolean_t started; 624 boolean_t dropped; 625 626 if (nce == NULL) 627 return (B_FALSE); 628 mutex_enter(&nce->nce_lock); 629 if (nce->nce_state == ND_PROBE) { 630 mutex_exit(&nce->nce_lock); 631 started = B_TRUE; 632 } else if (nce->nce_state == ND_REACHABLE) { 633 nce->nce_state = ND_PROBE; 634 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 635 mutex_exit(&nce->nce_lock); 636 dropped = nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, NULL, 637 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, NDP_PROBE); 638 if (dropped) { 639 mutex_enter(&nce->nce_lock); 640 nce->nce_pcnt++; 641 mutex_exit(&nce->nce_lock); 642 } 643 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 644 started = B_TRUE; 645 } else { 646 mutex_exit(&nce->nce_lock); 647 started = B_FALSE; 648 } 649 return (started); 650 } 651 652 /* 653 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 654 * If one is found, the refcnt on the nce will be incremented. 655 */ 656 nce_t * 657 ndp_lookup_v6(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 658 { 659 nce_t *nce; 660 ip_stack_t *ipst; 661 662 ASSERT(ill != NULL); 663 ipst = ill->ill_ipst; 664 665 ASSERT(ill != NULL && ill->ill_isv6); 666 if (!caller_holds_lock) { 667 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 668 } 669 670 /* Get head of v6 hash table */ 671 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 672 nce = nce_lookup_addr(ill, addr, nce); 673 if (nce == NULL) 674 nce = nce_lookup_mapping(ill, addr); 675 if (!caller_holds_lock) 676 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 677 return (nce); 678 } 679 /* 680 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 681 * If one is found, the refcnt on the nce will be incremented. 682 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 683 * so we skip the nce_lookup_mapping call. 684 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 685 */ 686 nce_t * 687 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 688 { 689 nce_t *nce; 690 in6_addr_t addr6; 691 ip_stack_t *ipst = ill->ill_ipst; 692 693 if (!caller_holds_lock) { 694 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 695 } 696 697 /* Get head of v4 hash table */ 698 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 699 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 700 nce = nce_lookup_addr(ill, &addr6, nce); 701 if (!caller_holds_lock) 702 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 703 return (nce); 704 } 705 706 /* 707 * Cache entry lookup. Try to find an nce matching the parameters passed. 708 * Look only for exact entries (no mappings). If an nce is found, increment 709 * the hold count on that nce. The caller passes in the start of the 710 * appropriate hash table, and must be holding the appropriate global 711 * lock (ndp_g_lock). 712 */ 713 static nce_t * 714 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr, nce_t *nce) 715 { 716 ndp_g_t *ndp; 717 ip_stack_t *ipst = ill->ill_ipst; 718 719 if (ill->ill_isv6) 720 ndp = ipst->ips_ndp6; 721 else 722 ndp = ipst->ips_ndp4; 723 724 ASSERT(ill != NULL); 725 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 726 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 727 return (NULL); 728 for (; nce != NULL; nce = nce->nce_next) { 729 if (nce->nce_ill == ill) { 730 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 731 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 732 &ipv6_all_ones)) { 733 mutex_enter(&nce->nce_lock); 734 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 735 NCE_REFHOLD_LOCKED(nce); 736 mutex_exit(&nce->nce_lock); 737 break; 738 } 739 mutex_exit(&nce->nce_lock); 740 } 741 } 742 } 743 return (nce); 744 } 745 746 /* 747 * Cache entry lookup. Try to find an nce matching the parameters passed. 748 * Look only for mappings. 749 */ 750 static nce_t * 751 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 752 { 753 nce_t *nce; 754 ip_stack_t *ipst = ill->ill_ipst; 755 756 ASSERT(ill != NULL && ill->ill_isv6); 757 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 758 if (!IN6_IS_ADDR_MULTICAST(addr)) 759 return (NULL); 760 nce = ipst->ips_ndp6->nce_mask_entries; 761 for (; nce != NULL; nce = nce->nce_next) 762 if (nce->nce_ill == ill && 763 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 764 mutex_enter(&nce->nce_lock); 765 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 766 NCE_REFHOLD_LOCKED(nce); 767 mutex_exit(&nce->nce_lock); 768 break; 769 } 770 mutex_exit(&nce->nce_lock); 771 } 772 return (nce); 773 } 774 775 /* 776 * Process passed in parameters either from an incoming packet or via 777 * user ioctl. 778 */ 779 void 780 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 781 { 782 ill_t *ill = nce->nce_ill; 783 uint32_t hw_addr_len = ill->ill_nd_lla_len; 784 mblk_t *mp; 785 boolean_t ll_updated = B_FALSE; 786 boolean_t ll_changed; 787 ip_stack_t *ipst = ill->ill_ipst; 788 789 ASSERT(nce->nce_ipversion == IPV6_VERSION); 790 /* 791 * No updates of link layer address or the neighbor state is 792 * allowed, when the cache is in NONUD state. This still 793 * allows for responding to reachability solicitation. 794 */ 795 mutex_enter(&nce->nce_lock); 796 if (nce->nce_state == ND_INCOMPLETE) { 797 if (hw_addr == NULL) { 798 mutex_exit(&nce->nce_lock); 799 return; 800 } 801 nce_set_ll(nce, hw_addr); 802 /* 803 * Update nce state and send the queued packets 804 * back to ip this time ire will be added. 805 */ 806 if (flag & ND_NA_FLAG_SOLICITED) { 807 nce_update(nce, ND_REACHABLE, NULL); 808 } else { 809 nce_update(nce, ND_STALE, NULL); 810 } 811 mutex_exit(&nce->nce_lock); 812 nce_fastpath(nce); 813 mutex_enter(&nce->nce_lock); 814 mp = nce->nce_qd_mp; 815 nce->nce_qd_mp = NULL; 816 mutex_exit(&nce->nce_lock); 817 while (mp != NULL) { 818 mblk_t *nxt_mp, *data_mp; 819 820 nxt_mp = mp->b_next; 821 mp->b_next = NULL; 822 823 if (mp->b_datap->db_type == M_CTL) 824 data_mp = mp->b_cont; 825 else 826 data_mp = mp; 827 if (data_mp->b_prev != NULL) { 828 ill_t *inbound_ill; 829 queue_t *fwdq = NULL; 830 uint_t ifindex; 831 832 ifindex = (uint_t)(uintptr_t)data_mp->b_prev; 833 inbound_ill = ill_lookup_on_ifindex(ifindex, 834 B_TRUE, NULL, NULL, NULL, NULL, ipst); 835 if (inbound_ill == NULL) { 836 data_mp->b_prev = NULL; 837 freemsg(mp); 838 return; 839 } else { 840 fwdq = inbound_ill->ill_rq; 841 } 842 data_mp->b_prev = NULL; 843 /* 844 * Send a forwarded packet back into ip_rput_v6 845 * just as in ire_send_v6(). 846 * Extract the queue from b_prev (set in 847 * ip_rput_data_v6). 848 */ 849 if (fwdq != NULL) { 850 /* 851 * Forwarded packets hop count will 852 * get decremented in ip_rput_data_v6 853 */ 854 if (data_mp != mp) 855 freeb(mp); 856 put(fwdq, data_mp); 857 } else { 858 /* 859 * Send locally originated packets back 860 * into * ip_wput_v6. 861 */ 862 put(ill->ill_wq, mp); 863 } 864 ill_refrele(inbound_ill); 865 } else { 866 put(ill->ill_wq, mp); 867 } 868 mp = nxt_mp; 869 } 870 return; 871 } 872 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 873 if (!is_adv) { 874 /* If this is a SOLICITATION request only */ 875 if (ll_changed) 876 nce_update(nce, ND_STALE, hw_addr); 877 mutex_exit(&nce->nce_lock); 878 return; 879 } 880 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 881 /* If in any other state than REACHABLE, ignore */ 882 if (nce->nce_state == ND_REACHABLE) { 883 nce_update(nce, ND_STALE, NULL); 884 } 885 mutex_exit(&nce->nce_lock); 886 return; 887 } else { 888 if (ll_changed) { 889 nce_update(nce, ND_UNCHANGED, hw_addr); 890 ll_updated = B_TRUE; 891 } 892 if (flag & ND_NA_FLAG_SOLICITED) { 893 nce_update(nce, ND_REACHABLE, NULL); 894 } else { 895 if (ll_updated) { 896 nce_update(nce, ND_STALE, NULL); 897 } 898 } 899 mutex_exit(&nce->nce_lock); 900 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 901 NCE_F_ISROUTER)) { 902 ire_t *ire; 903 904 /* 905 * Router turned to host. We need to remove the 906 * entry as well as any default route that may be 907 * using this as a next hop. This is required by 908 * section 7.2.5 of RFC 2461. 909 */ 910 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 911 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 912 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 913 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 914 MATCH_IRE_DEFAULT, ipst); 915 if (ire != NULL) { 916 ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); 917 ire_delete(ire); 918 ire_refrele(ire); 919 } 920 ndp_delete(nce); 921 } 922 } 923 } 924 925 /* 926 * Pass arg1 to the pfi supplied, along with each nce in existence. 927 * ndp_walk() places a REFHOLD on the nce and drops the lock when 928 * walking the hash list. 929 */ 930 void 931 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 932 boolean_t trace) 933 { 934 935 nce_t *nce; 936 nce_t *nce1; 937 nce_t **ncep; 938 nce_t *free_nce_list = NULL; 939 940 mutex_enter(&ndp->ndp_g_lock); 941 /* Prevent ndp_delete from unlink and free of NCE */ 942 ndp->ndp_g_walker++; 943 mutex_exit(&ndp->ndp_g_lock); 944 for (ncep = ndp->nce_hash_tbl; 945 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 946 for (nce = *ncep; nce != NULL; nce = nce1) { 947 nce1 = nce->nce_next; 948 if (ill == NULL || nce->nce_ill == ill) { 949 if (trace) { 950 NCE_REFHOLD(nce); 951 (*pfi)(nce, arg1); 952 NCE_REFRELE(nce); 953 } else { 954 NCE_REFHOLD_NOTR(nce); 955 (*pfi)(nce, arg1); 956 NCE_REFRELE_NOTR(nce); 957 } 958 } 959 } 960 } 961 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 962 nce1 = nce->nce_next; 963 if (ill == NULL || nce->nce_ill == ill) { 964 if (trace) { 965 NCE_REFHOLD(nce); 966 (*pfi)(nce, arg1); 967 NCE_REFRELE(nce); 968 } else { 969 NCE_REFHOLD_NOTR(nce); 970 (*pfi)(nce, arg1); 971 NCE_REFRELE_NOTR(nce); 972 } 973 } 974 } 975 mutex_enter(&ndp->ndp_g_lock); 976 ndp->ndp_g_walker--; 977 /* 978 * While NCE's are removed from global list they are placed 979 * in a private list, to be passed to nce_ire_delete_list(). 980 * The reason is, there may be ires pointing to this nce 981 * which needs to cleaned up. 982 */ 983 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 984 /* Time to delete condemned entries */ 985 for (ncep = ndp->nce_hash_tbl; 986 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 987 nce = *ncep; 988 if (nce != NULL) { 989 nce_remove(ndp, nce, &free_nce_list); 990 } 991 } 992 nce = ndp->nce_mask_entries; 993 if (nce != NULL) { 994 nce_remove(ndp, nce, &free_nce_list); 995 } 996 ndp->ndp_g_walker_cleanup = B_FALSE; 997 } 998 999 mutex_exit(&ndp->ndp_g_lock); 1000 1001 if (free_nce_list != NULL) { 1002 nce_ire_delete_list(free_nce_list); 1003 } 1004 } 1005 1006 /* 1007 * Walk everything. 1008 * Note that ill can be NULL hence can't derive the ipst from it. 1009 */ 1010 void 1011 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) 1012 { 1013 ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); 1014 ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); 1015 } 1016 1017 /* 1018 * Process resolve requests. Handles both mapped entries 1019 * as well as cases that needs to be send out on the wire. 1020 * Lookup a NCE for a given IRE. Regardless of whether one exists 1021 * or one is created, we defer making ire point to nce until the 1022 * ire is actually added at which point the nce_refcnt on the nce is 1023 * incremented. This is done primarily to have symmetry between ire_add() 1024 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1025 */ 1026 int 1027 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1028 { 1029 nce_t *nce; 1030 int err = 0; 1031 uint32_t ms; 1032 mblk_t *mp_nce = NULL; 1033 ip_stack_t *ipst = ill->ill_ipst; 1034 1035 ASSERT(ill->ill_isv6); 1036 if (IN6_IS_ADDR_MULTICAST(dst)) { 1037 err = nce_set_multicast(ill, dst); 1038 return (err); 1039 } 1040 err = ndp_lookup_then_add_v6(ill, 1041 NULL, /* No hardware address */ 1042 dst, 1043 &ipv6_all_ones, 1044 &ipv6_all_zeros, 1045 0, 1046 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1047 ND_INCOMPLETE, 1048 &nce); 1049 1050 switch (err) { 1051 case 0: 1052 /* 1053 * New cache entry was created. Make sure that the state 1054 * is not ND_INCOMPLETE. It can be in some other state 1055 * even before we send out the solicitation as we could 1056 * get un-solicited advertisements. 1057 * 1058 * If this is an XRESOLV interface, simply return 0, 1059 * since we don't want to solicit just yet. 1060 */ 1061 if (ill->ill_flags & ILLF_XRESOLV) { 1062 NCE_REFRELE(nce); 1063 return (0); 1064 } 1065 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1066 mutex_enter(&nce->nce_lock); 1067 if (nce->nce_state != ND_INCOMPLETE) { 1068 mutex_exit(&nce->nce_lock); 1069 rw_exit(&ipst->ips_ill_g_lock); 1070 NCE_REFRELE(nce); 1071 return (0); 1072 } 1073 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1074 if (mp_nce == NULL) { 1075 /* The caller will free mp */ 1076 mutex_exit(&nce->nce_lock); 1077 rw_exit(&ipst->ips_ill_g_lock); 1078 ndp_delete(nce); 1079 NCE_REFRELE(nce); 1080 return (ENOMEM); 1081 } 1082 ms = nce_solicit(nce, mp_nce); 1083 rw_exit(&ipst->ips_ill_g_lock); 1084 if (ms == 0) { 1085 /* The caller will free mp */ 1086 if (mp_nce != mp) 1087 freeb(mp_nce); 1088 mutex_exit(&nce->nce_lock); 1089 ndp_delete(nce); 1090 NCE_REFRELE(nce); 1091 return (EBUSY); 1092 } 1093 mutex_exit(&nce->nce_lock); 1094 NDP_RESTART_TIMER(nce, (clock_t)ms); 1095 NCE_REFRELE(nce); 1096 return (EINPROGRESS); 1097 case EEXIST: 1098 /* Resolution in progress just queue the packet */ 1099 mutex_enter(&nce->nce_lock); 1100 if (nce->nce_state == ND_INCOMPLETE) { 1101 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1102 if (mp_nce == NULL) { 1103 err = ENOMEM; 1104 } else { 1105 nce_queue_mp(nce, mp_nce); 1106 err = EINPROGRESS; 1107 } 1108 } else { 1109 /* 1110 * Any other state implies we have 1111 * a nce but IRE needs to be added ... 1112 * ire_add_v6() will take care of the 1113 * the case when the nce becomes CONDEMNED 1114 * before the ire is added to the table. 1115 */ 1116 err = 0; 1117 } 1118 mutex_exit(&nce->nce_lock); 1119 NCE_REFRELE(nce); 1120 break; 1121 default: 1122 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1123 break; 1124 } 1125 return (err); 1126 } 1127 1128 /* 1129 * When there is no resolver, the link layer template is passed in 1130 * the IRE. 1131 * Lookup a NCE for a given IRE. Regardless of whether one exists 1132 * or one is created, we defer making ire point to nce until the 1133 * ire is actually added at which point the nce_refcnt on the nce is 1134 * incremented. This is done primarily to have symmetry between ire_add() 1135 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1136 */ 1137 int 1138 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1139 { 1140 nce_t *nce; 1141 int err = 0; 1142 1143 ASSERT(ill != NULL); 1144 ASSERT(ill->ill_isv6); 1145 if (IN6_IS_ADDR_MULTICAST(dst)) { 1146 err = nce_set_multicast(ill, dst); 1147 return (err); 1148 } 1149 1150 err = ndp_lookup_then_add_v6(ill, 1151 NULL, /* hardware address */ 1152 dst, 1153 &ipv6_all_ones, 1154 &ipv6_all_zeros, 1155 0, 1156 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1157 ND_REACHABLE, 1158 &nce); 1159 1160 switch (err) { 1161 case 0: 1162 /* 1163 * Cache entry with a proper resolver cookie was 1164 * created. 1165 */ 1166 NCE_REFRELE(nce); 1167 break; 1168 case EEXIST: 1169 err = 0; 1170 NCE_REFRELE(nce); 1171 break; 1172 default: 1173 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1174 break; 1175 } 1176 return (err); 1177 } 1178 1179 /* 1180 * For each interface an entry is added for the unspecified multicast group. 1181 * Here that mapping is used to form the multicast cache entry for a particular 1182 * multicast destination. 1183 */ 1184 static int 1185 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1186 { 1187 nce_t *mnce; /* Multicast mapping entry */ 1188 nce_t *nce; 1189 uchar_t *hw_addr = NULL; 1190 int err = 0; 1191 ip_stack_t *ipst = ill->ill_ipst; 1192 1193 ASSERT(ill != NULL); 1194 ASSERT(ill->ill_isv6); 1195 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1196 1197 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1198 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst)); 1199 nce = nce_lookup_addr(ill, dst, nce); 1200 if (nce != NULL) { 1201 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1202 NCE_REFRELE(nce); 1203 return (0); 1204 } 1205 /* No entry, now lookup for a mapping this should never fail */ 1206 mnce = nce_lookup_mapping(ill, dst); 1207 if (mnce == NULL) { 1208 /* Something broken for the interface. */ 1209 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1210 return (ESRCH); 1211 } 1212 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1213 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1214 /* 1215 * For IRE_IF_RESOLVER a hardware mapping can be 1216 * generated, for IRE_IF_NORESOLVER, resolution cookie 1217 * in the ill is copied in ndp_add_v6(). 1218 */ 1219 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1220 if (hw_addr == NULL) { 1221 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1222 NCE_REFRELE(mnce); 1223 return (ENOMEM); 1224 } 1225 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1226 } 1227 NCE_REFRELE(mnce); 1228 /* 1229 * IRE_IF_NORESOLVER type simply copies the resolution 1230 * cookie passed in. So no hw_addr is needed. 1231 */ 1232 err = ndp_add_v6(ill, 1233 hw_addr, 1234 dst, 1235 &ipv6_all_ones, 1236 &ipv6_all_zeros, 1237 0, 1238 NCE_F_NONUD, 1239 ND_REACHABLE, 1240 &nce); 1241 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1242 if (hw_addr != NULL) 1243 kmem_free(hw_addr, ill->ill_nd_lla_len); 1244 if (err != 0) { 1245 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1246 return (err); 1247 } 1248 NCE_REFRELE(nce); 1249 return (0); 1250 } 1251 1252 /* 1253 * Return the link layer address, and any flags of a nce. 1254 */ 1255 int 1256 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1257 { 1258 nce_t *nce; 1259 in6_addr_t *addr; 1260 sin6_t *sin6; 1261 dl_unitdata_req_t *dl; 1262 1263 ASSERT(ill != NULL && ill->ill_isv6); 1264 sin6 = (sin6_t *)&lnr->lnr_addr; 1265 addr = &sin6->sin6_addr; 1266 1267 nce = ndp_lookup_v6(ill, addr, B_FALSE); 1268 if (nce == NULL) 1269 return (ESRCH); 1270 /* If in INCOMPLETE state, no link layer address is available yet */ 1271 if (nce->nce_state == ND_INCOMPLETE) 1272 goto done; 1273 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1274 if (ill->ill_flags & ILLF_XRESOLV) 1275 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1276 else 1277 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1278 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1279 sizeof (lnr->lnr_hdw_addr)); 1280 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1281 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1282 if (nce->nce_flags & NCE_F_ISROUTER) 1283 lnr->lnr_flags = NDF_ISROUTER_ON; 1284 if (nce->nce_flags & NCE_F_ANYCAST) 1285 lnr->lnr_flags |= NDF_ANYCAST_ON; 1286 done: 1287 NCE_REFRELE(nce); 1288 return (0); 1289 } 1290 1291 /* 1292 * Send Enable/Disable multicast reqs to driver. 1293 */ 1294 int 1295 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1296 uint32_t hw_addr_offset, mblk_t *mp) 1297 { 1298 nce_t *nce; 1299 uchar_t *hw_addr; 1300 ip_stack_t *ipst = ill->ill_ipst; 1301 1302 ASSERT(ill != NULL && ill->ill_isv6); 1303 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1304 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1305 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1306 freemsg(mp); 1307 return (EINVAL); 1308 } 1309 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1310 nce = nce_lookup_mapping(ill, addr); 1311 if (nce == NULL) { 1312 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1313 freemsg(mp); 1314 return (ESRCH); 1315 } 1316 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1317 /* 1318 * Update dl_addr_length and dl_addr_offset for primitives that 1319 * have physical addresses as opposed to full saps 1320 */ 1321 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1322 case DL_ENABMULTI_REQ: 1323 /* Track the state if this is the first enabmulti */ 1324 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1325 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1326 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1327 break; 1328 case DL_DISABMULTI_REQ: 1329 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1330 break; 1331 default: 1332 NCE_REFRELE(nce); 1333 ip1dbg(("ndp_mcastreq: default\n")); 1334 return (EINVAL); 1335 } 1336 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1337 NCE_REFRELE(nce); 1338 ill_dlpi_send(ill, mp); 1339 return (0); 1340 } 1341 1342 /* 1343 * Send a neighbor solicitation. 1344 * Returns number of milliseconds after which we should either rexmit or abort. 1345 * Return of zero means we should abort. 1346 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1347 * 1348 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1349 * the packet. 1350 * NOTE: This routine does not consume mp. 1351 */ 1352 uint32_t 1353 nce_solicit(nce_t *nce, mblk_t *mp) 1354 { 1355 ill_t *ill; 1356 ill_t *src_ill; 1357 ip6_t *ip6h; 1358 in6_addr_t src; 1359 in6_addr_t dst; 1360 ipif_t *ipif; 1361 ip6i_t *ip6i; 1362 boolean_t dropped = B_FALSE; 1363 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 1364 1365 ASSERT(RW_READ_HELD(&ipst->ips_ill_g_lock)); 1366 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1367 ill = nce->nce_ill; 1368 ASSERT(ill != NULL); 1369 1370 if (nce->nce_rcnt == 0) { 1371 return (0); 1372 } 1373 1374 if (mp == NULL) { 1375 ASSERT(nce->nce_qd_mp != NULL); 1376 mp = nce->nce_qd_mp; 1377 } else { 1378 nce_queue_mp(nce, mp); 1379 } 1380 1381 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1382 if (mp->b_datap->db_type == M_CTL) 1383 mp = mp->b_cont; 1384 1385 ip6h = (ip6_t *)mp->b_rptr; 1386 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1387 /* 1388 * This message should have been pulled up already in 1389 * ip_wput_v6. We can't do pullups here because the message 1390 * could be from the nce_qd_mp which could have b_next/b_prev 1391 * non-NULL. 1392 */ 1393 ip6i = (ip6i_t *)ip6h; 1394 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1395 sizeof (ip6i_t) + IPV6_HDR_LEN); 1396 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1397 } 1398 src = ip6h->ip6_src; 1399 /* 1400 * If the src of outgoing packet is one of the assigned interface 1401 * addresses use it, otherwise we will pick the source address below. 1402 */ 1403 src_ill = ill; 1404 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1405 if (ill->ill_group != NULL) 1406 src_ill = ill->ill_group->illgrp_ill; 1407 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1408 for (ipif = src_ill->ill_ipif; ipif != NULL; 1409 ipif = ipif->ipif_next) { 1410 if (IN6_ARE_ADDR_EQUAL(&src, 1411 &ipif->ipif_v6lcl_addr)) { 1412 break; 1413 } 1414 } 1415 if (ipif != NULL) 1416 break; 1417 } 1418 /* 1419 * If no relevant ipif can be found, then it's not one of our 1420 * addresses. Reset to :: and let nce_xmit. If an ipif can be 1421 * found, but it's not yet done with DAD verification, then 1422 * just postpone this transmission until later. 1423 */ 1424 if (src_ill == NULL) 1425 src = ipv6_all_zeros; 1426 else if (!ipif->ipif_addr_ready) 1427 return (ill->ill_reachable_retrans_time); 1428 } 1429 dst = nce->nce_addr; 1430 /* 1431 * If source address is unspecified, nce_xmit will choose 1432 * one for us and initialize the hardware address also 1433 * appropriately. 1434 */ 1435 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1436 src_ill = NULL; 1437 nce->nce_rcnt--; 1438 mutex_exit(&nce->nce_lock); 1439 rw_exit(&ipst->ips_ill_g_lock); 1440 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1441 &dst, 0); 1442 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1443 mutex_enter(&nce->nce_lock); 1444 if (dropped) 1445 nce->nce_rcnt++; 1446 return (ill->ill_reachable_retrans_time); 1447 } 1448 1449 /* 1450 * Attempt to recover an address on an interface that's been marked as a 1451 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1452 * no easy way to just probe the address and have the right thing happen if 1453 * it's no longer in use. Instead, we just bring it up normally and allow the 1454 * regular interface start-up logic to probe for a remaining duplicate and take 1455 * us back down if necessary. 1456 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1457 * ip_ndp_excl. 1458 */ 1459 /* ARGSUSED */ 1460 static void 1461 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1462 { 1463 ill_t *ill = rq->q_ptr; 1464 ipif_t *ipif; 1465 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1466 1467 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1468 /* 1469 * We do not support recovery of proxy ARP'd interfaces, 1470 * because the system lacks a complete proxy ARP mechanism. 1471 */ 1472 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1473 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1474 continue; 1475 } 1476 1477 /* 1478 * If we have already recovered or if the interface is going 1479 * away, then ignore. 1480 */ 1481 mutex_enter(&ill->ill_lock); 1482 if (!(ipif->ipif_flags & IPIF_DUPLICATE) || 1483 (ipif->ipif_flags & (IPIF_MOVING | IPIF_CONDEMNED))) { 1484 mutex_exit(&ill->ill_lock); 1485 continue; 1486 } 1487 1488 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1489 ill->ill_ipif_dup_count--; 1490 mutex_exit(&ill->ill_lock); 1491 ipif->ipif_was_dup = B_TRUE; 1492 1493 if (ipif_ndp_up(ipif) != EINPROGRESS) 1494 (void) ipif_up_done_v6(ipif); 1495 } 1496 freeb(mp); 1497 } 1498 1499 /* 1500 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1501 * As long as someone else holds the address, the interface will stay down. 1502 * When that conflict goes away, the interface is brought back up. This is 1503 * done so that accidental shutdowns of addresses aren't made permanent. Your 1504 * server will recover from a failure. 1505 * 1506 * For DHCP and temporary addresses, recovery is not done in the kernel. 1507 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1508 * 1509 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1510 */ 1511 static void 1512 ipif6_dup_recovery(void *arg) 1513 { 1514 ipif_t *ipif = arg; 1515 1516 ipif->ipif_recovery_id = 0; 1517 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1518 return; 1519 1520 /* 1521 * No lock, because this is just an optimization. 1522 */ 1523 if (ipif->ipif_state_flags & (IPIF_MOVING | IPIF_CONDEMNED)) 1524 return; 1525 1526 /* If the link is down, we'll retry this later */ 1527 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1528 return; 1529 1530 ndp_do_recovery(ipif); 1531 } 1532 1533 /* 1534 * Perform interface recovery by forcing the duplicate interfaces up and 1535 * allowing the system to determine which ones should stay up. 1536 * 1537 * Called both by recovery timer expiry and link-up notification. 1538 */ 1539 void 1540 ndp_do_recovery(ipif_t *ipif) 1541 { 1542 ill_t *ill = ipif->ipif_ill; 1543 mblk_t *mp; 1544 ip_stack_t *ipst = ill->ill_ipst; 1545 1546 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1547 if (mp == NULL) { 1548 mutex_enter(&ill->ill_lock); 1549 if (ipif->ipif_recovery_id == 0 && 1550 !(ipif->ipif_state_flags & (IPIF_MOVING | 1551 IPIF_CONDEMNED))) { 1552 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1553 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1554 } 1555 mutex_exit(&ill->ill_lock); 1556 } else { 1557 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1558 sizeof (ipif->ipif_v6lcl_addr)); 1559 ill_refhold(ill); 1560 qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP, 1561 B_FALSE); 1562 } 1563 } 1564 1565 /* 1566 * Find the solicitation in the given message, and extract printable details 1567 * (MAC and IP addresses) from it. 1568 */ 1569 static nd_neighbor_solicit_t * 1570 ip_ndp_find_solicitation(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, char *hbuf, 1571 size_t hlen, char *sbuf, size_t slen, uchar_t **haddr) 1572 { 1573 nd_neighbor_solicit_t *ns; 1574 ip6_t *ip6h; 1575 uchar_t *addr; 1576 int alen; 1577 1578 alen = 0; 1579 ip6h = (ip6_t *)mp->b_rptr; 1580 if (dl_mp == NULL) { 1581 nd_opt_hdr_t *opt; 1582 int nslen; 1583 1584 /* 1585 * If it's from the fast-path, then it can't be a probe 1586 * message, and thus must include the source linkaddr option. 1587 * Extract that here. 1588 */ 1589 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1590 nslen = mp->b_wptr - (uchar_t *)ns; 1591 if ((nslen -= sizeof (*ns)) > 0) { 1592 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), nslen, 1593 ND_OPT_SOURCE_LINKADDR); 1594 if (opt != NULL && 1595 opt->nd_opt_len * 8 - sizeof (*opt) >= 1596 ill->ill_nd_lla_len) { 1597 addr = (uchar_t *)(opt + 1); 1598 alen = ill->ill_nd_lla_len; 1599 } 1600 } 1601 /* 1602 * We cheat a bit here for the sake of printing usable log 1603 * messages in the rare case where the reply we got was unicast 1604 * without a source linkaddr option, and the interface is in 1605 * fastpath mode. (Sigh.) 1606 */ 1607 if (alen == 0 && ill->ill_type == IFT_ETHER && 1608 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1609 struct ether_header *pether; 1610 1611 pether = (struct ether_header *)((char *)ip6h - 1612 sizeof (*pether)); 1613 addr = pether->ether_shost.ether_addr_octet; 1614 alen = ETHERADDRL; 1615 } 1616 } else { 1617 dl_unitdata_ind_t *dlu; 1618 1619 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1620 alen = dlu->dl_src_addr_length; 1621 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1622 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1623 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1624 if (ill->ill_sap_length < 0) { 1625 alen += ill->ill_sap_length; 1626 } else { 1627 addr += ill->ill_sap_length; 1628 alen -= ill->ill_sap_length; 1629 } 1630 } 1631 } 1632 if (alen > 0) { 1633 *haddr = addr; 1634 (void) mac_colon_addr(addr, alen, hbuf, hlen); 1635 } else { 1636 *haddr = NULL; 1637 (void) strcpy(hbuf, "?"); 1638 } 1639 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1640 (void) inet_ntop(AF_INET6, &ns->nd_ns_target, sbuf, slen); 1641 return (ns); 1642 } 1643 1644 /* 1645 * This is for exclusive changes due to NDP duplicate address detection 1646 * failure. 1647 */ 1648 /* ARGSUSED */ 1649 static void 1650 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1651 { 1652 ill_t *ill = rq->q_ptr; 1653 ipif_t *ipif; 1654 char ibuf[LIFNAMSIZ + 10]; /* 10 digits for logical i/f number */ 1655 char hbuf[MAC_STR_LEN]; 1656 char sbuf[INET6_ADDRSTRLEN]; 1657 nd_neighbor_solicit_t *ns; 1658 mblk_t *dl_mp = NULL; 1659 uchar_t *haddr; 1660 ip_stack_t *ipst = ill->ill_ipst; 1661 1662 if (DB_TYPE(mp) != M_DATA) { 1663 dl_mp = mp; 1664 mp = mp->b_cont; 1665 } 1666 ns = ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, sizeof (hbuf), sbuf, 1667 sizeof (sbuf), &haddr); 1668 if (haddr != NULL && 1669 bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) { 1670 /* 1671 * Ignore conflicts generated by misbehaving switches that just 1672 * reflect our own messages back to us. 1673 */ 1674 goto ignore_conflict; 1675 } 1676 1677 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1678 1679 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1680 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 1681 &ns->nd_ns_target)) { 1682 continue; 1683 } 1684 1685 /* If it's already marked, then don't do anything. */ 1686 if (ipif->ipif_flags & IPIF_DUPLICATE) 1687 continue; 1688 1689 /* 1690 * If this is a failure during duplicate recovery, then don't 1691 * complain. It may take a long time to recover. 1692 */ 1693 if (!ipif->ipif_was_dup) { 1694 ipif_get_name(ipif, ibuf, sizeof (ibuf)); 1695 cmn_err(CE_WARN, "%s has duplicate address %s (in " 1696 "use by %s); disabled", ibuf, sbuf, hbuf); 1697 } 1698 mutex_enter(&ill->ill_lock); 1699 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1700 ipif->ipif_flags |= IPIF_DUPLICATE; 1701 ill->ill_ipif_dup_count++; 1702 mutex_exit(&ill->ill_lock); 1703 (void) ipif_down(ipif, NULL, NULL); 1704 ipif_down_tail(ipif); 1705 mutex_enter(&ill->ill_lock); 1706 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1707 ill->ill_net_type == IRE_IF_RESOLVER && 1708 !(ipif->ipif_state_flags & (IPIF_MOVING | 1709 IPIF_CONDEMNED)) && 1710 ipst->ips_ip_dup_recovery > 0) { 1711 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1712 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1713 } 1714 mutex_exit(&ill->ill_lock); 1715 } 1716 ignore_conflict: 1717 if (dl_mp != NULL) 1718 freeb(dl_mp); 1719 freemsg(mp); 1720 } 1721 1722 /* 1723 * Handle failure by tearing down the ipifs with the specified address. Note 1724 * that tearing down the ipif also means deleting the nce through ipif_down, so 1725 * it's not possible to do recovery by just restarting the nce timer. Instead, 1726 * we start a timer on the ipif. 1727 */ 1728 static void 1729 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1730 { 1731 if ((mp = copymsg(mp)) != NULL) { 1732 if (dl_mp == NULL) 1733 dl_mp = mp; 1734 else if ((dl_mp = copyb(dl_mp)) != NULL) 1735 dl_mp->b_cont = mp; 1736 if (dl_mp == NULL) { 1737 freemsg(mp); 1738 } else { 1739 ill_refhold(ill); 1740 qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP, 1741 B_FALSE); 1742 } 1743 } 1744 ndp_delete(nce); 1745 } 1746 1747 /* 1748 * Handle a discovered conflict: some other system is advertising that it owns 1749 * one of our IP addresses. We need to defend ourselves, or just shut down the 1750 * interface. 1751 */ 1752 static void 1753 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1754 { 1755 ipif_t *ipif; 1756 uint32_t now; 1757 uint_t maxdefense; 1758 uint_t defs; 1759 ip_stack_t *ipst = ill->ill_ipst; 1760 1761 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1762 NULL, NULL, ipst); 1763 if (ipif == NULL) 1764 return; 1765 /* 1766 * First, figure out if this address is disposable. 1767 */ 1768 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1769 maxdefense = ipst->ips_ip_max_temp_defend; 1770 else 1771 maxdefense = ipst->ips_ip_max_defend; 1772 1773 /* 1774 * Now figure out how many times we've defended ourselves. Ignore 1775 * defenses that happened long in the past. 1776 */ 1777 now = gethrestime_sec(); 1778 mutex_enter(&nce->nce_lock); 1779 if ((defs = nce->nce_defense_count) > 0 && 1780 now - nce->nce_defense_time > ipst->ips_ip_defend_interval) { 1781 nce->nce_defense_count = defs = 0; 1782 } 1783 nce->nce_defense_count++; 1784 nce->nce_defense_time = now; 1785 mutex_exit(&nce->nce_lock); 1786 ipif_refrele(ipif); 1787 1788 /* 1789 * If we've defended ourselves too many times already, then give up and 1790 * tear down the interface(s) using this address. Otherwise, defend by 1791 * sending out an unsolicited Neighbor Advertisement. 1792 */ 1793 if (defs >= maxdefense) { 1794 ip_ndp_failure(ill, mp, dl_mp, nce); 1795 } else { 1796 char hbuf[MAC_STR_LEN]; 1797 char sbuf[INET6_ADDRSTRLEN]; 1798 uchar_t *haddr; 1799 1800 (void) ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, 1801 sizeof (hbuf), sbuf, sizeof (sbuf), &haddr); 1802 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1803 hbuf, sbuf, ill->ill_name); 1804 (void) nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, B_FALSE, 1805 &nce->nce_addr, &ipv6_all_hosts_mcast, 1806 nce_advert_flags(nce)); 1807 } 1808 } 1809 1810 static void 1811 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1812 { 1813 nd_neighbor_solicit_t *ns; 1814 uint32_t hlen = ill->ill_nd_lla_len; 1815 uchar_t *haddr = NULL; 1816 icmp6_t *icmp_nd; 1817 ip6_t *ip6h; 1818 nce_t *our_nce = NULL; 1819 in6_addr_t target; 1820 in6_addr_t src; 1821 int len; 1822 int flag = 0; 1823 nd_opt_hdr_t *opt = NULL; 1824 boolean_t bad_solicit = B_FALSE; 1825 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1826 1827 ip6h = (ip6_t *)mp->b_rptr; 1828 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1829 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1830 src = ip6h->ip6_src; 1831 ns = (nd_neighbor_solicit_t *)icmp_nd; 1832 target = ns->nd_ns_target; 1833 if (IN6_IS_ADDR_MULTICAST(&target)) { 1834 if (ip_debug > 2) { 1835 /* ip1dbg */ 1836 pr_addr_dbg("ndp_input_solicit: Target is" 1837 " multicast! %s\n", AF_INET6, &target); 1838 } 1839 bad_solicit = B_TRUE; 1840 goto done; 1841 } 1842 if (len > sizeof (nd_neighbor_solicit_t)) { 1843 /* Options present */ 1844 opt = (nd_opt_hdr_t *)&ns[1]; 1845 len -= sizeof (nd_neighbor_solicit_t); 1846 if (!ndp_verify_optlen(opt, len)) { 1847 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1848 bad_solicit = B_TRUE; 1849 goto done; 1850 } 1851 } 1852 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1853 /* Check to see if this is a valid DAD solicitation */ 1854 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1855 if (ip_debug > 2) { 1856 /* ip1dbg */ 1857 pr_addr_dbg("ndp_input_solicit: IPv6 " 1858 "Destination is not solicited node " 1859 "multicast %s\n", AF_INET6, 1860 &ip6h->ip6_dst); 1861 } 1862 bad_solicit = B_TRUE; 1863 goto done; 1864 } 1865 } 1866 1867 our_nce = ndp_lookup_v6(ill, &target, B_FALSE); 1868 /* 1869 * If this is a valid Solicitation, a permanent 1870 * entry should exist in the cache 1871 */ 1872 if (our_nce == NULL || 1873 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1874 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1875 "ifname=%s ", ill->ill_name)); 1876 if (ip_debug > 2) { 1877 /* ip1dbg */ 1878 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1879 } 1880 bad_solicit = B_TRUE; 1881 goto done; 1882 } 1883 1884 /* At this point we should have a verified NS per spec */ 1885 if (opt != NULL) { 1886 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1887 if (opt != NULL) { 1888 haddr = (uchar_t *)&opt[1]; 1889 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1890 hlen == 0) { 1891 ip1dbg(("ndp_input_advert: bad SLLA\n")); 1892 bad_solicit = B_TRUE; 1893 goto done; 1894 } 1895 } 1896 } 1897 1898 /* If sending directly to peer, set the unicast flag */ 1899 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 1900 flag |= NDP_UNICAST; 1901 1902 /* 1903 * Create/update the entry for the soliciting node. 1904 * or respond to outstanding queries, don't if 1905 * the source is unspecified address. 1906 */ 1907 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1908 int err; 1909 nce_t *nnce; 1910 1911 ASSERT(ill->ill_isv6); 1912 /* 1913 * Regular solicitations *must* include the Source Link-Layer 1914 * Address option. Ignore messages that do not. 1915 */ 1916 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1917 ip1dbg(("ndp_input_solicit: source link-layer address " 1918 "option missing with a specified source.\n")); 1919 bad_solicit = B_TRUE; 1920 goto done; 1921 } 1922 1923 /* 1924 * This is a regular solicitation. If we're still in the 1925 * process of verifying the address, then don't respond at all 1926 * and don't keep track of the sender. 1927 */ 1928 if (our_nce->nce_state == ND_PROBE) 1929 goto done; 1930 1931 /* 1932 * If the solicitation doesn't have sender hardware address 1933 * (legal for unicast solicitation), then process without 1934 * installing the return NCE. Either we already know it, or 1935 * we'll be forced to look it up when (and if) we reply to the 1936 * packet. 1937 */ 1938 if (haddr == NULL) 1939 goto no_source; 1940 1941 err = ndp_lookup_then_add_v6(ill, 1942 haddr, 1943 &src, /* Soliciting nodes address */ 1944 &ipv6_all_ones, 1945 &ipv6_all_zeros, 1946 0, 1947 0, 1948 ND_STALE, 1949 &nnce); 1950 switch (err) { 1951 case 0: 1952 /* done with this entry */ 1953 NCE_REFRELE(nnce); 1954 break; 1955 case EEXIST: 1956 /* 1957 * B_FALSE indicates this is not an 1958 * an advertisement. 1959 */ 1960 ndp_process(nnce, haddr, 0, B_FALSE); 1961 NCE_REFRELE(nnce); 1962 break; 1963 default: 1964 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1965 err)); 1966 goto done; 1967 } 1968 no_source: 1969 flag |= NDP_SOLICITED; 1970 } else { 1971 /* 1972 * No source link layer address option should be present in a 1973 * valid DAD request. 1974 */ 1975 if (haddr != NULL) { 1976 ip1dbg(("ndp_input_solicit: source link-layer address " 1977 "option present with an unspecified source.\n")); 1978 bad_solicit = B_TRUE; 1979 goto done; 1980 } 1981 if (our_nce->nce_state == ND_PROBE) { 1982 /* 1983 * Internally looped-back probes won't have DLPI 1984 * attached to them. External ones (which are sent by 1985 * multicast) always will. Just ignore our own 1986 * transmissions. 1987 */ 1988 if (dl_mp != NULL) { 1989 /* 1990 * If someone else is probing our address, then 1991 * we've crossed wires. Declare failure. 1992 */ 1993 ip_ndp_failure(ill, mp, dl_mp, our_nce); 1994 } 1995 goto done; 1996 } 1997 /* 1998 * This is a DAD probe. Multicast the advertisement to the 1999 * all-nodes address. 2000 */ 2001 src = ipv6_all_hosts_mcast; 2002 } 2003 flag |= nce_advert_flags(our_nce); 2004 /* Response to a solicitation */ 2005 (void) nce_xmit(ill, 2006 ND_NEIGHBOR_ADVERT, 2007 ill, /* ill to be used for extracting ill_nd_lla */ 2008 B_TRUE, /* use ill_nd_lla */ 2009 &target, /* Source and target of the advertisement pkt */ 2010 &src, /* IP Destination (source of original pkt) */ 2011 flag); 2012 done: 2013 if (bad_solicit) 2014 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2015 if (our_nce != NULL) 2016 NCE_REFRELE(our_nce); 2017 } 2018 2019 void 2020 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2021 { 2022 nd_neighbor_advert_t *na; 2023 uint32_t hlen = ill->ill_nd_lla_len; 2024 uchar_t *haddr = NULL; 2025 icmp6_t *icmp_nd; 2026 ip6_t *ip6h; 2027 nce_t *dst_nce = NULL; 2028 in6_addr_t target; 2029 nd_opt_hdr_t *opt = NULL; 2030 int len; 2031 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2032 ip_stack_t *ipst = ill->ill_ipst; 2033 2034 ip6h = (ip6_t *)mp->b_rptr; 2035 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2036 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2037 na = (nd_neighbor_advert_t *)icmp_nd; 2038 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2039 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2040 ip1dbg(("ndp_input_advert: Target is multicast but the " 2041 "solicited flag is not zero\n")); 2042 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2043 return; 2044 } 2045 target = na->nd_na_target; 2046 if (IN6_IS_ADDR_MULTICAST(&target)) { 2047 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2048 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2049 return; 2050 } 2051 if (len > sizeof (nd_neighbor_advert_t)) { 2052 opt = (nd_opt_hdr_t *)&na[1]; 2053 if (!ndp_verify_optlen(opt, 2054 len - sizeof (nd_neighbor_advert_t))) { 2055 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2056 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2057 return; 2058 } 2059 /* At this point we have a verified NA per spec */ 2060 len -= sizeof (nd_neighbor_advert_t); 2061 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2062 if (opt != NULL) { 2063 haddr = (uchar_t *)&opt[1]; 2064 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2065 hlen == 0) { 2066 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2067 BUMP_MIB(mib, 2068 ipv6IfIcmpInBadNeighborAdvertisements); 2069 return; 2070 } 2071 } 2072 } 2073 2074 /* 2075 * If this interface is part of the group look at all the 2076 * ills in the group. 2077 */ 2078 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2079 if (ill->ill_group != NULL) 2080 ill = ill->ill_group->illgrp_ill; 2081 2082 for (; ill != NULL; ill = ill->ill_group_next) { 2083 mutex_enter(&ill->ill_lock); 2084 if (!ILL_CAN_LOOKUP(ill)) { 2085 mutex_exit(&ill->ill_lock); 2086 continue; 2087 } 2088 ill_refhold_locked(ill); 2089 mutex_exit(&ill->ill_lock); 2090 dst_nce = ndp_lookup_v6(ill, &target, B_FALSE); 2091 /* We have to drop the lock since ndp_process calls put* */ 2092 rw_exit(&ipst->ips_ill_g_lock); 2093 if (dst_nce != NULL) { 2094 if ((dst_nce->nce_flags & NCE_F_PERMANENT) && 2095 dst_nce->nce_state == ND_PROBE) { 2096 /* 2097 * Someone else sent an advertisement for an 2098 * address that we're trying to configure. 2099 * Tear it down. Note that dl_mp might be NULL 2100 * if we're getting a unicast reply. This 2101 * isn't typically done (multicast is the norm 2102 * in response to a probe), but ip_ndp_failure 2103 * will handle the dl_mp == NULL case as well. 2104 */ 2105 ip_ndp_failure(ill, mp, dl_mp, dst_nce); 2106 } else if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2107 /* 2108 * Someone just announced one of our local 2109 * addresses. If it wasn't us, then this is a 2110 * conflict. Defend the address or shut it 2111 * down. 2112 */ 2113 if (dl_mp != NULL && 2114 (haddr == NULL || 2115 nce_cmp_ll_addr(dst_nce, haddr, 2116 ill->ill_nd_lla_len))) { 2117 ip_ndp_conflict(ill, mp, dl_mp, 2118 dst_nce); 2119 } 2120 } else { 2121 if (na->nd_na_flags_reserved & 2122 ND_NA_FLAG_ROUTER) { 2123 dst_nce->nce_flags |= NCE_F_ISROUTER; 2124 } 2125 /* B_TRUE indicates this an advertisement */ 2126 ndp_process(dst_nce, haddr, 2127 na->nd_na_flags_reserved, B_TRUE); 2128 } 2129 NCE_REFRELE(dst_nce); 2130 } 2131 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2132 ill_refrele(ill); 2133 } 2134 rw_exit(&ipst->ips_ill_g_lock); 2135 } 2136 2137 /* 2138 * Process NDP neighbor solicitation/advertisement messages. 2139 * The checksum has already checked o.k before reaching here. 2140 */ 2141 void 2142 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2143 { 2144 icmp6_t *icmp_nd; 2145 ip6_t *ip6h; 2146 int len; 2147 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2148 2149 2150 if (!pullupmsg(mp, -1)) { 2151 ip1dbg(("ndp_input: pullupmsg failed\n")); 2152 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2153 goto done; 2154 } 2155 ip6h = (ip6_t *)mp->b_rptr; 2156 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2157 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2158 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2159 goto done; 2160 } 2161 /* 2162 * NDP does not accept any extension headers between the 2163 * IP header and the ICMP header since e.g. a routing 2164 * header could be dangerous. 2165 * This assumes that any AH or ESP headers are removed 2166 * by ip prior to passing the packet to ndp_input. 2167 */ 2168 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2169 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2170 ip6h->ip6_nxt)); 2171 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2172 goto done; 2173 } 2174 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2175 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2176 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2177 if (icmp_nd->icmp6_code != 0) { 2178 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2179 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2180 goto done; 2181 } 2182 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2183 /* 2184 * Make sure packet length is large enough for either 2185 * a NS or a NA icmp packet. 2186 */ 2187 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2188 ip1dbg(("ndp_input: packet too short\n")); 2189 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2190 goto done; 2191 } 2192 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2193 ndp_input_solicit(ill, mp, dl_mp); 2194 } else { 2195 ndp_input_advert(ill, mp, dl_mp); 2196 } 2197 done: 2198 freemsg(mp); 2199 } 2200 2201 /* 2202 * nce_xmit is called to form and transmit a ND solicitation or 2203 * advertisement ICMP packet. 2204 * 2205 * If the source address is unspecified and this isn't a probe (used for 2206 * duplicate address detection), an appropriate source address and link layer 2207 * address will be chosen here. The link layer address option is included if 2208 * the source is specified (i.e., all non-probe packets), and omitted (per the 2209 * specification) otherwise. 2210 * 2211 * It returns B_FALSE only if it does a successful put() to the 2212 * corresponding ill's ill_wq otherwise returns B_TRUE. 2213 */ 2214 static boolean_t 2215 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 2216 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 2217 int flag) 2218 { 2219 uint32_t len; 2220 icmp6_t *icmp6; 2221 mblk_t *mp; 2222 ip6_t *ip6h; 2223 nd_opt_hdr_t *opt; 2224 uint_t plen; 2225 ip6i_t *ip6i; 2226 ipif_t *src_ipif = NULL; 2227 uint8_t *hw_addr; 2228 zoneid_t zoneid = GLOBAL_ZONEID; 2229 2230 /* 2231 * If we have a unspecified source(sender) address, select a 2232 * proper source address for the solicitation here itself so 2233 * that we can initialize the h/w address correctly. This is 2234 * needed for interface groups as source address can come from 2235 * the whole group and the h/w address initialized from ill will 2236 * be wrong if the source address comes from a different ill. 2237 * 2238 * If the sender is specified then we use this address in order 2239 * to lookup the zoneid before calling ip_output_v6(). This is to 2240 * enable unicast ND_NEIGHBOR_ADVERT packets to be routed correctly 2241 * by IP (we cannot guarantee that the global zone has an interface 2242 * route to the destination). 2243 * 2244 * Note that the NA never comes here with the unspecified source 2245 * address. The following asserts that whenever the source 2246 * address is specified, the haddr also should be specified. 2247 */ 2248 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 2249 2250 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2251 ASSERT(operation != ND_NEIGHBOR_ADVERT); 2252 /* 2253 * Pick a source address for this solicitation, but 2254 * restrict the selection to addresses assigned to the 2255 * output interface (or interface group). We do this 2256 * because the destination will create a neighbor cache 2257 * entry for the source address of this packet, so the 2258 * source address had better be a valid neighbor. 2259 */ 2260 src_ipif = ipif_select_source_v6(ill, target, RESTRICT_TO_ILL, 2261 IPV6_PREFER_SRC_DEFAULT, ALL_ZONES); 2262 if (src_ipif == NULL) { 2263 char buf[INET6_ADDRSTRLEN]; 2264 2265 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2266 inet_ntop(AF_INET6, (char *)target, buf, 2267 sizeof (buf)))); 2268 return (B_TRUE); 2269 } 2270 sender = &src_ipif->ipif_v6src_addr; 2271 hwaddr_ill = src_ipif->ipif_ill; 2272 } else if (!(IN6_IS_ADDR_UNSPECIFIED(sender))) { 2273 zoneid = ipif_lookup_addr_zoneid_v6(sender, ill, ill->ill_ipst); 2274 /* 2275 * It's possible for ipif_lookup_addr_zoneid_v6() to return 2276 * ALL_ZONES if it cannot find a matching ipif for the address 2277 * we are trying to use. In this case we err on the side of 2278 * trying to send the packet by defaulting to the GLOBAL_ZONEID. 2279 */ 2280 if (zoneid == ALL_ZONES) 2281 zoneid = GLOBAL_ZONEID; 2282 } 2283 2284 /* 2285 * Always make sure that the NS/NA packets don't get load 2286 * spread. This is needed so that the probe packets sent 2287 * by the in.mpathd daemon can really go out on the desired 2288 * interface. Probe packets are made to go out on a desired 2289 * interface by including a ip6i with ATTACH_IF flag. As these 2290 * packets indirectly end up sending/receiving NS/NA packets 2291 * (neighbor doing NUD), we have to make sure that NA 2292 * also go out on the same interface. 2293 */ 2294 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7) / 8; 2295 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2296 plen * 8; 2297 mp = allocb(len, BPRI_LO); 2298 if (mp == NULL) { 2299 if (src_ipif != NULL) 2300 ipif_refrele(src_ipif); 2301 return (B_TRUE); 2302 } 2303 bzero((char *)mp->b_rptr, len); 2304 mp->b_wptr = mp->b_rptr + len; 2305 2306 ip6i = (ip6i_t *)mp->b_rptr; 2307 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2308 ip6i->ip6i_nxt = IPPROTO_RAW; 2309 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2310 if (flag & NDP_PROBE) 2311 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2312 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2313 2314 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2315 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2316 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2317 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2318 ip6h->ip6_hops = IPV6_MAX_HOPS; 2319 ip6h->ip6_dst = *target; 2320 icmp6 = (icmp6_t *)&ip6h[1]; 2321 2322 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2323 sizeof (nd_neighbor_advert_t)); 2324 2325 if (operation == ND_NEIGHBOR_SOLICIT) { 2326 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2327 2328 if (!(flag & NDP_PROBE)) 2329 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2330 ip6h->ip6_src = *sender; 2331 ns->nd_ns_target = *target; 2332 if (!(flag & NDP_UNICAST)) { 2333 /* Form multicast address of the target */ 2334 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2335 ip6h->ip6_dst.s6_addr32[3] |= 2336 ns->nd_ns_target.s6_addr32[3]; 2337 } 2338 } else { 2339 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2340 2341 ASSERT(!(flag & NDP_PROBE)); 2342 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2343 ip6h->ip6_src = *sender; 2344 na->nd_na_target = *sender; 2345 if (flag & NDP_ISROUTER) 2346 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2347 if (flag & NDP_SOLICITED) 2348 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2349 if (flag & NDP_ORIDE) 2350 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2351 } 2352 2353 hw_addr = NULL; 2354 if (!(flag & NDP_PROBE)) { 2355 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2356 hwaddr_ill->ill_phys_addr; 2357 if (hw_addr != NULL) { 2358 /* Fill in link layer address and option len */ 2359 opt->nd_opt_len = (uint8_t)plen; 2360 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2361 } 2362 } 2363 if (hw_addr == NULL) { 2364 /* If there's no link layer address option, then strip it. */ 2365 len -= plen * 8; 2366 mp->b_wptr = mp->b_rptr + len; 2367 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2368 } 2369 2370 icmp6->icmp6_type = (uint8_t)operation; 2371 icmp6->icmp6_code = 0; 2372 /* 2373 * Prepare for checksum by putting icmp length in the icmp 2374 * checksum field. The checksum is calculated in ip_wput_v6. 2375 */ 2376 icmp6->icmp6_cksum = ip6h->ip6_plen; 2377 2378 if (src_ipif != NULL) 2379 ipif_refrele(src_ipif); 2380 2381 ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT); 2382 return (B_FALSE); 2383 } 2384 2385 /* 2386 * Make a link layer address (does not include the SAP) from an nce. 2387 * To form the link layer address, use the last four bytes of ipv6 2388 * address passed in and the fixed offset stored in nce. 2389 */ 2390 static void 2391 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2392 { 2393 uchar_t *mask, *to; 2394 ill_t *ill = nce->nce_ill; 2395 int len; 2396 2397 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2398 return; 2399 ASSERT(nce->nce_res_mp != NULL); 2400 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2401 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2402 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2403 ASSERT(addr != NULL); 2404 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2405 addrpos, ill->ill_nd_lla_len); 2406 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2407 IPV6_ADDR_LEN); 2408 mask = (uchar_t *)&nce->nce_extract_mask; 2409 mask += (IPV6_ADDR_LEN - len); 2410 addr += (IPV6_ADDR_LEN - len); 2411 to = addrpos + nce->nce_ll_extract_start; 2412 while (len-- > 0) 2413 *to++ |= *mask++ & *addr++; 2414 } 2415 2416 /* 2417 * Pass a cache report back out via NDD. 2418 */ 2419 /* ARGSUSED */ 2420 int 2421 ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 2422 { 2423 ip_stack_t *ipst; 2424 2425 if (CONN_Q(q)) 2426 ipst = CONNQ_TO_IPST(q); 2427 else 2428 ipst = ILLQ_TO_IPST(q); 2429 2430 (void) mi_mpprintf(mp, "ifname hardware addr flags" 2431 " proto addr/mask"); 2432 ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp, ipst); 2433 return (0); 2434 } 2435 2436 /* 2437 * Add a single line to the NDP Cache Entry Report. 2438 */ 2439 static void 2440 nce_report1(nce_t *nce, uchar_t *mp_arg) 2441 { 2442 ill_t *ill = nce->nce_ill; 2443 char local_buf[INET6_ADDRSTRLEN]; 2444 uchar_t flags_buf[10]; 2445 uint32_t flags = nce->nce_flags; 2446 mblk_t *mp = (mblk_t *)mp_arg; 2447 uchar_t *h; 2448 uchar_t *m = flags_buf; 2449 in6_addr_t v6addr; 2450 uint64_t now; 2451 2452 /* 2453 * Lock the nce to protect nce_res_mp from being changed 2454 * if an external resolver address resolution completes 2455 * while nce_res_mp is being accessed here. 2456 * 2457 * Deal with all address formats, not just Ethernet-specific 2458 * In addition, make sure that the mblk has enough space 2459 * before writing to it. If is doesn't, allocate a new one. 2460 */ 2461 if (nce->nce_ipversion == IPV4_VERSION) { 2462 /* 2463 * Don't include v4 NCEs in NDP cache entry report. 2464 * But sanity check for lingering ND_INITIAL entries 2465 * when we do 'ndd -get /dev/ip ip_ndp_cache_report' 2466 */ 2467 if (nce->nce_state == ND_INITIAL) { 2468 2469 now = TICK_TO_MSEC(lbolt64); 2470 if (now - nce->nce_init_time > NCE_STUCK_TIMEOUT) { 2471 DTRACE_PROBE1(nce__stuck, nce_t *, nce); 2472 } 2473 } 2474 return; 2475 } 2476 2477 ASSERT(ill != NULL); 2478 v6addr = nce->nce_mask; 2479 if (flags & NCE_F_PERMANENT) 2480 *m++ = 'P'; 2481 if (flags & NCE_F_ISROUTER) 2482 *m++ = 'R'; 2483 if (flags & NCE_F_MAPPING) 2484 *m++ = 'M'; 2485 *m = '\0'; 2486 2487 if (ill->ill_net_type == IRE_IF_RESOLVER) { 2488 size_t addrlen; 2489 char *addr_buf; 2490 dl_unitdata_req_t *dl; 2491 2492 mutex_enter(&nce->nce_lock); 2493 h = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2494 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 2495 if (ill->ill_flags & ILLF_XRESOLV) 2496 addrlen = (3 * (dl->dl_dest_addr_length)); 2497 else 2498 addrlen = (3 * (ill->ill_nd_lla_len)); 2499 if (addrlen <= 0) { 2500 mutex_exit(&nce->nce_lock); 2501 (void) mi_mpprintf(mp, 2502 "%8s %9s %5s %s/%d", 2503 ill->ill_name, 2504 "None", 2505 (uchar_t *)&flags_buf, 2506 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2507 (char *)local_buf, sizeof (local_buf)), 2508 ip_mask_to_plen_v6(&v6addr)); 2509 } else { 2510 /* 2511 * Convert the hardware/lla address to ascii 2512 */ 2513 addr_buf = kmem_zalloc(addrlen, KM_NOSLEEP); 2514 if (addr_buf == NULL) { 2515 mutex_exit(&nce->nce_lock); 2516 return; 2517 } 2518 (void) mac_colon_addr((uint8_t *)h, 2519 (ill->ill_flags & ILLF_XRESOLV) ? 2520 dl->dl_dest_addr_length : ill->ill_nd_lla_len, 2521 addr_buf, addrlen); 2522 mutex_exit(&nce->nce_lock); 2523 (void) mi_mpprintf(mp, "%8s %17s %5s %s/%d", 2524 ill->ill_name, addr_buf, (uchar_t *)&flags_buf, 2525 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2526 (char *)local_buf, sizeof (local_buf)), 2527 ip_mask_to_plen_v6(&v6addr)); 2528 kmem_free(addr_buf, addrlen); 2529 } 2530 } else { 2531 (void) mi_mpprintf(mp, 2532 "%8s %9s %5s %s/%d", 2533 ill->ill_name, 2534 "None", 2535 (uchar_t *)&flags_buf, 2536 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2537 (char *)local_buf, sizeof (local_buf)), 2538 ip_mask_to_plen_v6(&v6addr)); 2539 } 2540 } 2541 2542 mblk_t * 2543 nce_udreq_alloc(ill_t *ill) 2544 { 2545 mblk_t *template_mp = NULL; 2546 dl_unitdata_req_t *dlur; 2547 int sap_length; 2548 2549 ASSERT(ill->ill_isv6); 2550 2551 sap_length = ill->ill_sap_length; 2552 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2553 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2554 if (template_mp == NULL) 2555 return (NULL); 2556 2557 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2558 dlur->dl_priority.dl_min = 0; 2559 dlur->dl_priority.dl_max = 0; 2560 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2561 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2562 2563 /* Copy in the SAP value. */ 2564 NCE_LL_SAP_COPY(ill, template_mp); 2565 2566 return (template_mp); 2567 } 2568 2569 /* 2570 * NDP retransmit timer. 2571 * This timer goes off when: 2572 * a. It is time to retransmit NS for resolver. 2573 * b. It is time to send reachability probes. 2574 */ 2575 void 2576 ndp_timer(void *arg) 2577 { 2578 nce_t *nce = arg; 2579 ill_t *ill = nce->nce_ill; 2580 uint32_t ms; 2581 char addrbuf[INET6_ADDRSTRLEN]; 2582 mblk_t *mp; 2583 boolean_t dropped = B_FALSE; 2584 ip_stack_t *ipst = ill->ill_ipst; 2585 2586 /* 2587 * The timer has to be cancelled by ndp_delete before doing the final 2588 * refrele. So the NCE is guaranteed to exist when the timer runs 2589 * until it clears the timeout_id. Before clearing the timeout_id 2590 * bump up the refcnt so that we can continue to use the nce 2591 */ 2592 ASSERT(nce != NULL); 2593 2594 /* 2595 * Grab the ill_g_lock now itself to avoid lock order problems. 2596 * nce_solicit needs ill_g_lock to be able to traverse ills 2597 */ 2598 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2599 mutex_enter(&nce->nce_lock); 2600 NCE_REFHOLD_LOCKED(nce); 2601 nce->nce_timeout_id = 0; 2602 2603 /* 2604 * Check the reachability state first. 2605 */ 2606 switch (nce->nce_state) { 2607 case ND_DELAY: 2608 rw_exit(&ipst->ips_ill_g_lock); 2609 nce->nce_state = ND_PROBE; 2610 mutex_exit(&nce->nce_lock); 2611 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2612 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2613 if (ip_debug > 3) { 2614 /* ip2dbg */ 2615 pr_addr_dbg("ndp_timer: state for %s changed " 2616 "to PROBE\n", AF_INET6, &nce->nce_addr); 2617 } 2618 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2619 NCE_REFRELE(nce); 2620 return; 2621 case ND_PROBE: 2622 /* must be retransmit timer */ 2623 rw_exit(&ipst->ips_ill_g_lock); 2624 nce->nce_pcnt--; 2625 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2626 nce->nce_pcnt >= -1); 2627 if (nce->nce_pcnt > 0) { 2628 /* 2629 * As per RFC2461, the nce gets deleted after 2630 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2631 * Note that the first unicast solicitation is sent 2632 * during the DELAY state. 2633 */ 2634 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2635 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2636 addrbuf, sizeof (addrbuf)))); 2637 mutex_exit(&nce->nce_lock); 2638 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, 2639 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, 2640 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2641 NDP_UNICAST); 2642 if (dropped) { 2643 mutex_enter(&nce->nce_lock); 2644 nce->nce_pcnt++; 2645 mutex_exit(&nce->nce_lock); 2646 } 2647 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2648 } else if (nce->nce_pcnt < 0) { 2649 /* No hope, delete the nce */ 2650 nce->nce_state = ND_UNREACHABLE; 2651 mutex_exit(&nce->nce_lock); 2652 if (ip_debug > 2) { 2653 /* ip1dbg */ 2654 pr_addr_dbg("ndp_timer: Delete IRE for" 2655 " dst %s\n", AF_INET6, &nce->nce_addr); 2656 } 2657 ndp_delete(nce); 2658 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2659 /* Wait RetransTimer, before deleting the entry */ 2660 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2661 nce->nce_pcnt, inet_ntop(AF_INET6, 2662 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2663 mutex_exit(&nce->nce_lock); 2664 /* Wait one interval before killing */ 2665 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2666 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2667 ipif_t *ipif; 2668 2669 /* 2670 * We're done probing, and we can now declare this 2671 * address to be usable. Let IP know that it's ok to 2672 * use. 2673 */ 2674 nce->nce_state = ND_REACHABLE; 2675 mutex_exit(&nce->nce_lock); 2676 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, 2677 ALL_ZONES, NULL, NULL, NULL, NULL, ipst); 2678 if (ipif != NULL) { 2679 if (ipif->ipif_was_dup) { 2680 char ibuf[LIFNAMSIZ + 10]; 2681 char sbuf[INET6_ADDRSTRLEN]; 2682 2683 ipif->ipif_was_dup = B_FALSE; 2684 (void) inet_ntop(AF_INET6, 2685 &ipif->ipif_v6lcl_addr, 2686 sbuf, sizeof (sbuf)); 2687 ipif_get_name(ipif, ibuf, 2688 sizeof (ibuf)); 2689 cmn_err(CE_NOTE, "recovered address " 2690 "%s on %s", sbuf, ibuf); 2691 } 2692 if ((ipif->ipif_flags & IPIF_UP) && 2693 !ipif->ipif_addr_ready) { 2694 ip_rts_ifmsg(ipif); 2695 ip_rts_newaddrmsg(RTM_ADD, 0, ipif); 2696 sctp_update_ipif(ipif, SCTP_IPIF_UP); 2697 } 2698 ipif->ipif_addr_ready = 1; 2699 ipif_refrele(ipif); 2700 } 2701 /* Begin defending our new address */ 2702 nce->nce_unsolicit_count = 0; 2703 dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, 2704 B_FALSE, &nce->nce_addr, &ipv6_all_hosts_mcast, 2705 nce_advert_flags(nce)); 2706 if (dropped) { 2707 nce->nce_unsolicit_count = 1; 2708 NDP_RESTART_TIMER(nce, 2709 ipst->ips_ip_ndp_unsolicit_interval); 2710 } else if (ipst->ips_ip_ndp_defense_interval != 0) { 2711 NDP_RESTART_TIMER(nce, 2712 ipst->ips_ip_ndp_defense_interval); 2713 } 2714 } else { 2715 /* 2716 * This is an address we're probing to be our own, but 2717 * the ill is down. Wait until it comes back before 2718 * doing anything, but switch to reachable state so 2719 * that the restart will work. 2720 */ 2721 nce->nce_state = ND_REACHABLE; 2722 mutex_exit(&nce->nce_lock); 2723 } 2724 NCE_REFRELE(nce); 2725 return; 2726 case ND_INCOMPLETE: 2727 /* 2728 * Must be resolvers retransmit timer. 2729 */ 2730 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2731 ip6i_t *ip6i; 2732 ip6_t *ip6h; 2733 mblk_t *data_mp; 2734 2735 /* 2736 * Walk the list of packets queued, and see if there 2737 * are any multipathing probe packets. Such packets 2738 * are always queued at the head. Since this is a 2739 * retransmit timer firing, mark such packets as 2740 * delayed in ND resolution. This info will be used 2741 * in ip_wput_v6(). Multipathing probe packets will 2742 * always have an ip6i_t. Once we hit a packet without 2743 * it, we can break out of this loop. 2744 */ 2745 if (mp->b_datap->db_type == M_CTL) 2746 data_mp = mp->b_cont; 2747 else 2748 data_mp = mp; 2749 2750 ip6h = (ip6_t *)data_mp->b_rptr; 2751 if (ip6h->ip6_nxt != IPPROTO_RAW) 2752 break; 2753 2754 /* 2755 * This message should have been pulled up already in 2756 * ip_wput_v6. We can't do pullups here because the 2757 * b_next/b_prev is non-NULL. 2758 */ 2759 ip6i = (ip6i_t *)ip6h; 2760 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2761 sizeof (ip6i_t) + IPV6_HDR_LEN); 2762 2763 /* Mark this packet as delayed due to ND resolution */ 2764 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2765 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2766 } 2767 if (nce->nce_qd_mp != NULL) { 2768 ms = nce_solicit(nce, NULL); 2769 rw_exit(&ipst->ips_ill_g_lock); 2770 if (ms == 0) { 2771 if (nce->nce_state != ND_REACHABLE) { 2772 mutex_exit(&nce->nce_lock); 2773 nce_resolv_failed(nce); 2774 ndp_delete(nce); 2775 } else { 2776 mutex_exit(&nce->nce_lock); 2777 } 2778 } else { 2779 mutex_exit(&nce->nce_lock); 2780 NDP_RESTART_TIMER(nce, (clock_t)ms); 2781 } 2782 NCE_REFRELE(nce); 2783 return; 2784 } 2785 mutex_exit(&nce->nce_lock); 2786 rw_exit(&ipst->ips_ill_g_lock); 2787 NCE_REFRELE(nce); 2788 break; 2789 case ND_REACHABLE : 2790 rw_exit(&ipst->ips_ill_g_lock); 2791 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2792 nce->nce_unsolicit_count != 0) || 2793 ((nce->nce_flags & NCE_F_PERMANENT) && 2794 ipst->ips_ip_ndp_defense_interval != 0)) { 2795 if (nce->nce_unsolicit_count > 0) 2796 nce->nce_unsolicit_count--; 2797 mutex_exit(&nce->nce_lock); 2798 dropped = nce_xmit(ill, 2799 ND_NEIGHBOR_ADVERT, 2800 ill, /* ill to be used for hw addr */ 2801 B_FALSE, /* use ill_phys_addr */ 2802 &nce->nce_addr, 2803 &ipv6_all_hosts_mcast, 2804 nce_advert_flags(nce)); 2805 if (dropped) { 2806 mutex_enter(&nce->nce_lock); 2807 nce->nce_unsolicit_count++; 2808 mutex_exit(&nce->nce_lock); 2809 } 2810 if (nce->nce_unsolicit_count != 0) { 2811 NDP_RESTART_TIMER(nce, 2812 ipst->ips_ip_ndp_unsolicit_interval); 2813 } else { 2814 NDP_RESTART_TIMER(nce, 2815 ipst->ips_ip_ndp_defense_interval); 2816 } 2817 } else { 2818 mutex_exit(&nce->nce_lock); 2819 } 2820 NCE_REFRELE(nce); 2821 break; 2822 default: 2823 rw_exit(&ipst->ips_ill_g_lock); 2824 mutex_exit(&nce->nce_lock); 2825 NCE_REFRELE(nce); 2826 break; 2827 } 2828 } 2829 2830 /* 2831 * Set a link layer address from the ll_addr passed in. 2832 * Copy SAP from ill. 2833 */ 2834 static void 2835 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2836 { 2837 ill_t *ill = nce->nce_ill; 2838 uchar_t *woffset; 2839 2840 ASSERT(ll_addr != NULL); 2841 /* Always called before fast_path_probe */ 2842 ASSERT(nce->nce_fp_mp == NULL); 2843 if (ill->ill_sap_length != 0) { 2844 /* 2845 * Copy the SAP type specified in the 2846 * request into the xmit template. 2847 */ 2848 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2849 } 2850 if (ill->ill_phys_addr_length > 0) { 2851 /* 2852 * The bcopy() below used to be called for the physical address 2853 * length rather than the link layer address length. For 2854 * ethernet and many other media, the phys_addr and lla are 2855 * identical. 2856 * However, with xresolv interfaces being introduced, the 2857 * phys_addr and lla are no longer the same, and the physical 2858 * address may not have any useful meaning, so we use the lla 2859 * for IPv6 address resolution and destination addressing. 2860 * 2861 * For PPP or other interfaces with a zero length 2862 * physical address, don't do anything here. 2863 * The bcopy() with a zero phys_addr length was previously 2864 * a no-op for interfaces with a zero-length physical address. 2865 * Using the lla for them would change the way they operate. 2866 * Doing nothing in such cases preserves expected behavior. 2867 */ 2868 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2869 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2870 } 2871 } 2872 2873 static boolean_t 2874 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2875 { 2876 ill_t *ill = nce->nce_ill; 2877 uchar_t *ll_offset; 2878 2879 ASSERT(nce->nce_res_mp != NULL); 2880 if (ll_addr == NULL) 2881 return (B_FALSE); 2882 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2883 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2884 return (B_TRUE); 2885 return (B_FALSE); 2886 } 2887 2888 /* 2889 * Updates the link layer address or the reachability state of 2890 * a cache entry. Reset probe counter if needed. 2891 */ 2892 static void 2893 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2894 { 2895 ill_t *ill = nce->nce_ill; 2896 boolean_t need_stop_timer = B_FALSE; 2897 boolean_t need_fastpath_update = B_FALSE; 2898 2899 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2900 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2901 /* 2902 * If this interface does not do NUD, there is no point 2903 * in allowing an update to the cache entry. Although 2904 * we will respond to NS. 2905 * The only time we accept an update for a resolver when 2906 * NUD is turned off is when it has just been created. 2907 * Non-Resolvers will always be created as REACHABLE. 2908 */ 2909 if (new_state != ND_UNCHANGED) { 2910 if ((nce->nce_flags & NCE_F_NONUD) && 2911 (nce->nce_state != ND_INCOMPLETE)) 2912 return; 2913 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2914 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2915 need_stop_timer = B_TRUE; 2916 if (new_state == ND_REACHABLE) 2917 nce->nce_last = TICK_TO_MSEC(lbolt64); 2918 else { 2919 /* We force NUD in this case */ 2920 nce->nce_last = 0; 2921 } 2922 nce->nce_state = new_state; 2923 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2924 } 2925 /* 2926 * In case of fast path we need to free the the fastpath 2927 * M_DATA and do another probe. Otherwise we can just 2928 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2929 * whatever packets that happens to be transmitting at the time. 2930 */ 2931 if (new_ll_addr != NULL) { 2932 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2933 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2934 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2935 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2936 if (nce->nce_fp_mp != NULL) { 2937 freemsg(nce->nce_fp_mp); 2938 nce->nce_fp_mp = NULL; 2939 } 2940 need_fastpath_update = B_TRUE; 2941 } 2942 mutex_exit(&nce->nce_lock); 2943 if (need_stop_timer) { 2944 (void) untimeout(nce->nce_timeout_id); 2945 nce->nce_timeout_id = 0; 2946 } 2947 if (need_fastpath_update) 2948 nce_fastpath(nce); 2949 mutex_enter(&nce->nce_lock); 2950 } 2951 2952 void 2953 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2954 { 2955 uint_t count = 0; 2956 mblk_t **mpp; 2957 2958 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2959 2960 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2961 mpp = &(*mpp)->b_next) { 2962 if (++count > 2963 nce->nce_ill->ill_max_buf) { 2964 mblk_t *tmp = nce->nce_qd_mp->b_next; 2965 2966 nce->nce_qd_mp->b_next = NULL; 2967 nce->nce_qd_mp->b_prev = NULL; 2968 freemsg(nce->nce_qd_mp); 2969 nce->nce_qd_mp = tmp; 2970 } 2971 } 2972 /* put this on the list */ 2973 if (head_insert) { 2974 mp->b_next = nce->nce_qd_mp; 2975 nce->nce_qd_mp = mp; 2976 } else { 2977 *mpp = mp; 2978 } 2979 } 2980 2981 static void 2982 nce_queue_mp(nce_t *nce, mblk_t *mp) 2983 { 2984 boolean_t head_insert = B_FALSE; 2985 ip6_t *ip6h; 2986 ip6i_t *ip6i; 2987 mblk_t *data_mp; 2988 2989 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2990 2991 if (mp->b_datap->db_type == M_CTL) 2992 data_mp = mp->b_cont; 2993 else 2994 data_mp = mp; 2995 ip6h = (ip6_t *)data_mp->b_rptr; 2996 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2997 /* 2998 * This message should have been pulled up already in 2999 * ip_wput_v6. We can't do pullups here because the message 3000 * could be from the nce_qd_mp which could have b_next/b_prev 3001 * non-NULL. 3002 */ 3003 ip6i = (ip6i_t *)ip6h; 3004 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 3005 sizeof (ip6i_t) + IPV6_HDR_LEN); 3006 /* 3007 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 3008 * This has 2 aspects mentioned below. 3009 * 1. Perform head insertion in the nce_qd_mp for these packets. 3010 * This ensures that next retransmit of ND solicitation 3011 * will use the interface specified by the probe packet, 3012 * for both NS and NA. This corresponds to the src address 3013 * in the IPv6 packet. If we insert at tail, we will be 3014 * depending on the packet at the head for successful 3015 * ND resolution. This is not reliable, because the interface 3016 * on which the NA arrives could be different from the interface 3017 * on which the NS was sent, and if the receiving interface is 3018 * failed, it will appear that the sending interface is also 3019 * failed, causing in.mpathd to misdiagnose this as link 3020 * failure. 3021 * 2. Drop the original packet, if the ND resolution did not 3022 * succeed in the first attempt. However we will create the 3023 * nce and the ire, as soon as the ND resolution succeeds. 3024 * We don't gain anything by queueing multiple probe packets 3025 * and sending them back-to-back once resolution succeeds. 3026 * It is sufficient to send just 1 packet after ND resolution 3027 * succeeds. Since mpathd is sending down probe packets at a 3028 * constant rate, we don't need to send the queued packet. We 3029 * need to queue it only for NDP resolution. The benefit of 3030 * dropping the probe packets that were delayed in ND 3031 * resolution, is that in.mpathd will not see inflated 3032 * RTT. If the ND resolution does not succeed within 3033 * in.mpathd's failure detection time, mpathd may detect 3034 * a failure, and it does not matter whether the packet 3035 * was queued or dropped. 3036 */ 3037 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 3038 head_insert = B_TRUE; 3039 } 3040 3041 nce_queue_mp_common(nce, mp, head_insert); 3042 } 3043 3044 /* 3045 * Called when address resolution failed due to a timeout. 3046 * Send an ICMP unreachable in response to all queued packets. 3047 */ 3048 void 3049 nce_resolv_failed(nce_t *nce) 3050 { 3051 mblk_t *mp, *nxt_mp, *first_mp; 3052 char buf[INET6_ADDRSTRLEN]; 3053 ip6_t *ip6h; 3054 zoneid_t zoneid = GLOBAL_ZONEID; 3055 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3056 3057 ip1dbg(("nce_resolv_failed: dst %s\n", 3058 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 3059 mutex_enter(&nce->nce_lock); 3060 mp = nce->nce_qd_mp; 3061 nce->nce_qd_mp = NULL; 3062 mutex_exit(&nce->nce_lock); 3063 while (mp != NULL) { 3064 nxt_mp = mp->b_next; 3065 mp->b_next = NULL; 3066 mp->b_prev = NULL; 3067 3068 first_mp = mp; 3069 if (mp->b_datap->db_type == M_CTL) { 3070 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 3071 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3072 zoneid = io->ipsec_out_zoneid; 3073 ASSERT(zoneid != ALL_ZONES); 3074 mp = mp->b_cont; 3075 } 3076 3077 ip6h = (ip6_t *)mp->b_rptr; 3078 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3079 ip6i_t *ip6i; 3080 /* 3081 * This message should have been pulled up already 3082 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 3083 * the header is pulled up. 3084 */ 3085 ip6i = (ip6i_t *)ip6h; 3086 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 3087 sizeof (ip6i_t) + IPV6_HDR_LEN); 3088 mp->b_rptr += sizeof (ip6i_t); 3089 } 3090 /* 3091 * Ignore failure since icmp_unreachable_v6 will silently 3092 * drop packets with an unspecified source address. 3093 */ 3094 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst); 3095 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 3096 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst); 3097 mp = nxt_mp; 3098 } 3099 } 3100 3101 /* 3102 * Called by SIOCSNDP* ioctl to add/change an nce entry 3103 * and the corresponding attributes. 3104 * Disallow states other than ND_REACHABLE or ND_STALE. 3105 */ 3106 int 3107 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 3108 { 3109 sin6_t *sin6; 3110 in6_addr_t *addr; 3111 nce_t *nce; 3112 int err; 3113 uint16_t new_flags = 0; 3114 uint16_t old_flags = 0; 3115 int inflags = lnr->lnr_flags; 3116 ip_stack_t *ipst = ill->ill_ipst; 3117 3118 ASSERT(ill->ill_isv6); 3119 if ((lnr->lnr_state_create != ND_REACHABLE) && 3120 (lnr->lnr_state_create != ND_STALE)) 3121 return (EINVAL); 3122 3123 sin6 = (sin6_t *)&lnr->lnr_addr; 3124 addr = &sin6->sin6_addr; 3125 3126 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 3127 /* We know it can not be mapping so just look in the hash table */ 3128 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 3129 nce = nce_lookup_addr(ill, addr, nce); 3130 if (nce != NULL) 3131 new_flags = nce->nce_flags; 3132 3133 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 3134 case NDF_ISROUTER_ON: 3135 new_flags |= NCE_F_ISROUTER; 3136 break; 3137 case NDF_ISROUTER_OFF: 3138 new_flags &= ~NCE_F_ISROUTER; 3139 break; 3140 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3141 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3142 if (nce != NULL) 3143 NCE_REFRELE(nce); 3144 return (EINVAL); 3145 } 3146 3147 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3148 case NDF_ANYCAST_ON: 3149 new_flags |= NCE_F_ANYCAST; 3150 break; 3151 case NDF_ANYCAST_OFF: 3152 new_flags &= ~NCE_F_ANYCAST; 3153 break; 3154 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3155 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3156 if (nce != NULL) 3157 NCE_REFRELE(nce); 3158 return (EINVAL); 3159 } 3160 3161 if (nce == NULL) { 3162 err = ndp_add_v6(ill, 3163 (uchar_t *)lnr->lnr_hdw_addr, 3164 addr, 3165 &ipv6_all_ones, 3166 &ipv6_all_zeros, 3167 0, 3168 new_flags, 3169 lnr->lnr_state_create, 3170 &nce); 3171 if (err != 0) { 3172 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3173 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3174 return (err); 3175 } 3176 } 3177 old_flags = nce->nce_flags; 3178 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3179 /* 3180 * Router turned to host, delete all ires. 3181 * XXX Just delete the entry, but we need to add too. 3182 */ 3183 nce->nce_flags &= ~NCE_F_ISROUTER; 3184 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3185 ndp_delete(nce); 3186 NCE_REFRELE(nce); 3187 return (0); 3188 } 3189 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3190 3191 mutex_enter(&nce->nce_lock); 3192 nce->nce_flags = new_flags; 3193 mutex_exit(&nce->nce_lock); 3194 /* 3195 * Note that we ignore the state at this point, which 3196 * should be either STALE or REACHABLE. Instead we let 3197 * the link layer address passed in to determine the state 3198 * much like incoming packets. 3199 */ 3200 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3201 NCE_REFRELE(nce); 3202 return (0); 3203 } 3204 3205 /* 3206 * If the device driver supports it, we make nce_fp_mp to have 3207 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3208 * The caller ensures there is hold on nce for this function. 3209 * Note that since ill_fastpath_probe() copies the mblk there is 3210 * no need for the hold beyond this function. 3211 */ 3212 void 3213 nce_fastpath(nce_t *nce) 3214 { 3215 ill_t *ill = nce->nce_ill; 3216 int res; 3217 3218 ASSERT(ill != NULL); 3219 ASSERT(nce->nce_state != ND_INITIAL && nce->nce_state != ND_INCOMPLETE); 3220 3221 if (nce->nce_fp_mp != NULL) { 3222 /* Already contains fastpath info */ 3223 return; 3224 } 3225 if (nce->nce_res_mp != NULL) { 3226 nce_fastpath_list_add(nce); 3227 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3228 /* 3229 * EAGAIN is an indication of a transient error 3230 * i.e. allocation failure etc. leave the nce in the list it 3231 * will be updated when another probe happens for another ire 3232 * if not it will be taken out of the list when the ire is 3233 * deleted. 3234 */ 3235 3236 if (res != 0 && res != EAGAIN) 3237 nce_fastpath_list_delete(nce); 3238 } 3239 } 3240 3241 /* 3242 * Drain the list of nce's waiting for fastpath response. 3243 */ 3244 void 3245 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3246 void *arg) 3247 { 3248 3249 nce_t *next_nce; 3250 nce_t *current_nce; 3251 nce_t *first_nce; 3252 nce_t *prev_nce = NULL; 3253 3254 mutex_enter(&ill->ill_lock); 3255 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3256 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3257 next_nce = current_nce->nce_fastpath; 3258 /* 3259 * Take it off the list if we're flushing, or if the callback 3260 * routine tells us to do so. Otherwise, leave the nce in the 3261 * fastpath list to handle any pending response from the lower 3262 * layer. We can't drain the list when the callback routine 3263 * comparison failed, because the response is asynchronous in 3264 * nature, and may not arrive in the same order as the list 3265 * insertion. 3266 */ 3267 if (func == NULL || func(current_nce, arg)) { 3268 current_nce->nce_fastpath = NULL; 3269 if (current_nce == first_nce) 3270 ill->ill_fastpath_list = first_nce = next_nce; 3271 else 3272 prev_nce->nce_fastpath = next_nce; 3273 } else { 3274 /* previous element that is still in the list */ 3275 prev_nce = current_nce; 3276 } 3277 current_nce = next_nce; 3278 } 3279 mutex_exit(&ill->ill_lock); 3280 } 3281 3282 /* 3283 * Add nce to the nce fastpath list. 3284 */ 3285 void 3286 nce_fastpath_list_add(nce_t *nce) 3287 { 3288 ill_t *ill; 3289 3290 ill = nce->nce_ill; 3291 3292 mutex_enter(&ill->ill_lock); 3293 mutex_enter(&nce->nce_lock); 3294 3295 /* 3296 * if nce has not been deleted and 3297 * is not already in the list add it. 3298 */ 3299 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3300 (nce->nce_fastpath == NULL)) { 3301 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3302 ill->ill_fastpath_list = nce; 3303 } 3304 3305 mutex_exit(&nce->nce_lock); 3306 mutex_exit(&ill->ill_lock); 3307 } 3308 3309 /* 3310 * remove nce from the nce fastpath list. 3311 */ 3312 void 3313 nce_fastpath_list_delete(nce_t *nce) 3314 { 3315 nce_t *nce_ptr; 3316 3317 ill_t *ill; 3318 3319 ill = nce->nce_ill; 3320 ASSERT(ill != NULL); 3321 3322 mutex_enter(&ill->ill_lock); 3323 if (nce->nce_fastpath == NULL) 3324 goto done; 3325 3326 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3327 3328 if (ill->ill_fastpath_list == nce) { 3329 ill->ill_fastpath_list = nce->nce_fastpath; 3330 } else { 3331 nce_ptr = ill->ill_fastpath_list; 3332 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3333 if (nce_ptr->nce_fastpath == nce) { 3334 nce_ptr->nce_fastpath = nce->nce_fastpath; 3335 break; 3336 } 3337 nce_ptr = nce_ptr->nce_fastpath; 3338 } 3339 } 3340 3341 nce->nce_fastpath = NULL; 3342 done: 3343 mutex_exit(&ill->ill_lock); 3344 } 3345 3346 /* 3347 * Update all NCE's that are not in fastpath mode and 3348 * have an nce_fp_mp that matches mp. mp->b_cont contains 3349 * the fastpath header. 3350 * 3351 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3352 */ 3353 boolean_t 3354 ndp_fastpath_update(nce_t *nce, void *arg) 3355 { 3356 mblk_t *mp, *fp_mp; 3357 uchar_t *mp_rptr, *ud_mp_rptr; 3358 mblk_t *ud_mp = nce->nce_res_mp; 3359 ptrdiff_t cmplen; 3360 3361 if (nce->nce_flags & NCE_F_MAPPING) 3362 return (B_TRUE); 3363 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3364 return (B_TRUE); 3365 3366 ip2dbg(("ndp_fastpath_update: trying\n")); 3367 mp = (mblk_t *)arg; 3368 mp_rptr = mp->b_rptr; 3369 cmplen = mp->b_wptr - mp_rptr; 3370 ASSERT(cmplen >= 0); 3371 ud_mp_rptr = ud_mp->b_rptr; 3372 /* 3373 * The nce is locked here to prevent any other threads 3374 * from accessing and changing nce_res_mp when the IPv6 address 3375 * becomes resolved to an lla while we're in the middle 3376 * of looking at and comparing the hardware address (lla). 3377 * It is also locked to prevent multiple threads in nce_fastpath_update 3378 * from examining nce_res_mp atthe same time. 3379 */ 3380 mutex_enter(&nce->nce_lock); 3381 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3382 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3383 mutex_exit(&nce->nce_lock); 3384 /* 3385 * Don't take the ire off the fastpath list yet, 3386 * since the response may come later. 3387 */ 3388 return (B_FALSE); 3389 } 3390 /* Matched - install mp as the fastpath mp */ 3391 ip1dbg(("ndp_fastpath_update: match\n")); 3392 fp_mp = dupb(mp->b_cont); 3393 if (fp_mp != NULL) { 3394 nce->nce_fp_mp = fp_mp; 3395 } 3396 mutex_exit(&nce->nce_lock); 3397 return (B_TRUE); 3398 } 3399 3400 /* 3401 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3402 * driver. Note that it assumes IP is exclusive... 3403 */ 3404 /* ARGSUSED */ 3405 void 3406 ndp_fastpath_flush(nce_t *nce, char *arg) 3407 { 3408 if (nce->nce_flags & NCE_F_MAPPING) 3409 return; 3410 /* No fastpath info? */ 3411 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3412 return; 3413 3414 if (nce->nce_ipversion == IPV4_VERSION && 3415 nce->nce_flags & NCE_F_BCAST) { 3416 /* 3417 * IPv4 BROADCAST entries: 3418 * We can't delete the nce since it is difficult to 3419 * recreate these without going through the 3420 * ipif down/up dance. 3421 * 3422 * All access to nce->nce_fp_mp in the case of these 3423 * is protected by nce_lock. 3424 */ 3425 mutex_enter(&nce->nce_lock); 3426 if (nce->nce_fp_mp != NULL) { 3427 freeb(nce->nce_fp_mp); 3428 nce->nce_fp_mp = NULL; 3429 mutex_exit(&nce->nce_lock); 3430 nce_fastpath(nce); 3431 } else { 3432 mutex_exit(&nce->nce_lock); 3433 } 3434 } else { 3435 /* Just delete the NCE... */ 3436 ndp_delete(nce); 3437 } 3438 } 3439 3440 /* 3441 * Return a pointer to a given option in the packet. 3442 * Assumes that option part of the packet have already been validated. 3443 */ 3444 nd_opt_hdr_t * 3445 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3446 { 3447 while (optlen > 0) { 3448 if (opt->nd_opt_type == opt_type) 3449 return (opt); 3450 optlen -= 8 * opt->nd_opt_len; 3451 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3452 } 3453 return (NULL); 3454 } 3455 3456 /* 3457 * Verify all option lengths present are > 0, also check to see 3458 * if the option lengths and packet length are consistent. 3459 */ 3460 boolean_t 3461 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3462 { 3463 ASSERT(opt != NULL); 3464 while (optlen > 0) { 3465 if (opt->nd_opt_len == 0) 3466 return (B_FALSE); 3467 optlen -= 8 * opt->nd_opt_len; 3468 if (optlen < 0) 3469 return (B_FALSE); 3470 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3471 } 3472 return (B_TRUE); 3473 } 3474 3475 /* 3476 * ndp_walk function. 3477 * Free a fraction of the NCE cache entries. 3478 * A fraction of zero means to not free any in that category. 3479 */ 3480 void 3481 ndp_cache_reclaim(nce_t *nce, char *arg) 3482 { 3483 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3484 uint_t rand; 3485 3486 if (nce->nce_flags & NCE_F_PERMANENT) 3487 return; 3488 3489 rand = (uint_t)lbolt + 3490 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3491 if (ncr->ncr_host != 0 && 3492 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3493 ndp_delete(nce); 3494 return; 3495 } 3496 } 3497 3498 /* 3499 * ndp_walk function. 3500 * Count the number of NCEs that can be deleted. 3501 * These would be hosts but not routers. 3502 */ 3503 void 3504 ndp_cache_count(nce_t *nce, char *arg) 3505 { 3506 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3507 3508 if (nce->nce_flags & NCE_F_PERMANENT) 3509 return; 3510 3511 ncc->ncc_total++; 3512 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3513 ncc->ncc_host++; 3514 } 3515 3516 #ifdef DEBUG 3517 void 3518 nce_trace_ref(nce_t *nce) 3519 { 3520 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3521 3522 if (nce->nce_trace_disable) 3523 return; 3524 3525 if (!th_trace_ref(nce, nce->nce_ill->ill_ipst)) { 3526 nce->nce_trace_disable = B_TRUE; 3527 nce_trace_cleanup(nce); 3528 } 3529 } 3530 3531 void 3532 nce_untrace_ref(nce_t *nce) 3533 { 3534 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3535 3536 if (!nce->nce_trace_disable) 3537 th_trace_unref(nce); 3538 } 3539 3540 static void 3541 nce_trace_cleanup(const nce_t *nce) 3542 { 3543 th_trace_cleanup(nce, nce->nce_trace_disable); 3544 } 3545 #endif 3546 3547 /* 3548 * Called when address resolution fails due to a timeout. 3549 * Send an ICMP unreachable in response to all queued packets. 3550 */ 3551 void 3552 arp_resolv_failed(nce_t *nce) 3553 { 3554 mblk_t *mp, *nxt_mp, *first_mp; 3555 char buf[INET6_ADDRSTRLEN]; 3556 zoneid_t zoneid = GLOBAL_ZONEID; 3557 struct in_addr ipv4addr; 3558 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3559 3560 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3561 ip3dbg(("arp_resolv_failed: dst %s\n", 3562 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3563 mutex_enter(&nce->nce_lock); 3564 mp = nce->nce_qd_mp; 3565 nce->nce_qd_mp = NULL; 3566 mutex_exit(&nce->nce_lock); 3567 3568 while (mp != NULL) { 3569 nxt_mp = mp->b_next; 3570 mp->b_next = NULL; 3571 mp->b_prev = NULL; 3572 3573 first_mp = mp; 3574 /* 3575 * Send icmp unreachable messages 3576 * to the hosts. 3577 */ 3578 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst); 3579 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3580 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3581 ICMP_HOST_UNREACHABLE, zoneid, ipst); 3582 mp = nxt_mp; 3583 } 3584 } 3585 3586 int 3587 ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3588 nce_t **newnce, nce_t *src_nce) 3589 { 3590 int err; 3591 nce_t *nce; 3592 in6_addr_t addr6; 3593 ip_stack_t *ipst = ill->ill_ipst; 3594 3595 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3596 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3597 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3598 nce = nce_lookup_addr(ill, &addr6, nce); 3599 if (nce == NULL) { 3600 err = ndp_add_v4(ill, addr, flags, newnce, src_nce); 3601 } else { 3602 *newnce = nce; 3603 err = EEXIST; 3604 } 3605 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3606 return (err); 3607 } 3608 3609 /* 3610 * NDP Cache Entry creation routine for IPv4. 3611 * Mapped entries are handled in arp. 3612 * This routine must always be called with ndp4->ndp_g_lock held. 3613 * Prior to return, nce_refcnt is incremented. 3614 */ 3615 static int 3616 ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 3617 nce_t **newnce, nce_t *src_nce) 3618 { 3619 static nce_t nce_nil; 3620 nce_t *nce; 3621 mblk_t *mp; 3622 mblk_t *template = NULL; 3623 nce_t **ncep; 3624 ip_stack_t *ipst = ill->ill_ipst; 3625 uint16_t state = ND_INITIAL; 3626 int err; 3627 3628 ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock)); 3629 ASSERT(!ill->ill_isv6); 3630 ASSERT((flags & NCE_F_MAPPING) == 0); 3631 3632 if (ill->ill_resolver_mp == NULL) 3633 return (EINVAL); 3634 /* 3635 * Allocate the mblk to hold the nce. 3636 */ 3637 mp = allocb(sizeof (nce_t), BPRI_MED); 3638 if (mp == NULL) 3639 return (ENOMEM); 3640 3641 nce = (nce_t *)mp->b_rptr; 3642 mp->b_wptr = (uchar_t *)&nce[1]; 3643 *nce = nce_nil; 3644 nce->nce_ill = ill; 3645 nce->nce_ipversion = IPV4_VERSION; 3646 nce->nce_flags = flags; 3647 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3648 nce->nce_rcnt = ill->ill_xmit_count; 3649 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3650 nce->nce_mask = ipv6_all_ones; 3651 nce->nce_extract_mask = ipv6_all_zeros; 3652 nce->nce_ll_extract_start = 0; 3653 nce->nce_qd_mp = NULL; 3654 nce->nce_mp = mp; 3655 /* This one is for nce getting created */ 3656 nce->nce_refcnt = 1; 3657 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3658 ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3659 3660 nce->nce_trace_disable = B_FALSE; 3661 3662 if (src_nce != NULL) { 3663 /* 3664 * src_nce has been provided by the caller. The only 3665 * caller who provides a non-null, non-broadcast 3666 * src_nce is from ip_newroute() which must pass in 3667 * a ND_REACHABLE src_nce (this condition is verified 3668 * via an ASSERT for the save_ire->ire_nce in ip_newroute()) 3669 */ 3670 mutex_enter(&src_nce->nce_lock); 3671 state = src_nce->nce_state; 3672 if ((src_nce->nce_flags & NCE_F_CONDEMNED) || 3673 (ipst->ips_ndp4->ndp_g_hw_change > 0)) { 3674 /* 3675 * src_nce has been deleted, or 3676 * ip_arp_news is in the middle of 3677 * flushing entries in the the nce. 3678 * Fail the add, since we don't know 3679 * if it is safe to copy the contents of 3680 * src_nce 3681 */ 3682 DTRACE_PROBE2(nce__bad__src__nce, 3683 nce_t *, src_nce, ill_t *, ill); 3684 mutex_exit(&src_nce->nce_lock); 3685 err = EINVAL; 3686 goto err_ret; 3687 } 3688 template = copyb(src_nce->nce_res_mp); 3689 mutex_exit(&src_nce->nce_lock); 3690 if (template == NULL) { 3691 err = ENOMEM; 3692 goto err_ret; 3693 } 3694 } else if (flags & NCE_F_BCAST) { 3695 /* 3696 * broadcast nce. 3697 */ 3698 template = copyb(ill->ill_bcast_mp); 3699 if (template == NULL) { 3700 err = ENOMEM; 3701 goto err_ret; 3702 } 3703 state = ND_REACHABLE; 3704 } else if (ill->ill_net_type == IRE_IF_NORESOLVER) { 3705 /* 3706 * NORESOLVER entries are always created in the REACHABLE 3707 * state. We create a nce_res_mp with the IP nexthop address 3708 * in the destination address in the DLPI hdr if the 3709 * physical length is exactly 4 bytes. 3710 * 3711 * XXX not clear which drivers set ill_phys_addr_length to 3712 * IP_ADDR_LEN. 3713 */ 3714 if (ill->ill_phys_addr_length == IP_ADDR_LEN) { 3715 template = ill_dlur_gen((uchar_t *)addr, 3716 ill->ill_phys_addr_length, 3717 ill->ill_sap, ill->ill_sap_length); 3718 } else { 3719 template = copyb(ill->ill_resolver_mp); 3720 } 3721 if (template == NULL) { 3722 err = ENOMEM; 3723 goto err_ret; 3724 } 3725 state = ND_REACHABLE; 3726 } 3727 nce->nce_fp_mp = NULL; 3728 nce->nce_res_mp = template; 3729 nce->nce_state = state; 3730 if (state == ND_REACHABLE) { 3731 nce->nce_last = TICK_TO_MSEC(lbolt64); 3732 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3733 } else { 3734 nce->nce_last = 0; 3735 if (state == ND_INITIAL) 3736 nce->nce_init_time = TICK_TO_MSEC(lbolt64); 3737 } 3738 3739 ASSERT((nce->nce_res_mp == NULL && nce->nce_state == ND_INITIAL) || 3740 (nce->nce_res_mp != NULL && nce->nce_state == ND_REACHABLE)); 3741 /* 3742 * Atomically ensure that the ill is not CONDEMNED, before 3743 * adding the NCE. 3744 */ 3745 mutex_enter(&ill->ill_lock); 3746 if (ill->ill_state_flags & ILL_CONDEMNED) { 3747 mutex_exit(&ill->ill_lock); 3748 err = EINVAL; 3749 goto err_ret; 3750 } 3751 if ((nce->nce_next = *ncep) != NULL) 3752 nce->nce_next->nce_ptpn = &nce->nce_next; 3753 *ncep = nce; 3754 nce->nce_ptpn = ncep; 3755 *newnce = nce; 3756 /* This one is for nce being used by an active thread */ 3757 NCE_REFHOLD(*newnce); 3758 3759 /* Bump up the number of nce's referencing this ill */ 3760 ill->ill_nce_cnt++; 3761 mutex_exit(&ill->ill_lock); 3762 DTRACE_PROBE1(ndp__add__v4, nce_t *, nce); 3763 return (0); 3764 err_ret: 3765 freeb(mp); 3766 freemsg(template); 3767 return (err); 3768 } 3769 3770 void 3771 ndp_flush_qd_mp(nce_t *nce) 3772 { 3773 mblk_t *qd_mp, *qd_next; 3774 3775 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3776 qd_mp = nce->nce_qd_mp; 3777 nce->nce_qd_mp = NULL; 3778 while (qd_mp != NULL) { 3779 qd_next = qd_mp->b_next; 3780 qd_mp->b_next = NULL; 3781 qd_mp->b_prev = NULL; 3782 freemsg(qd_mp); 3783 qd_mp = qd_next; 3784 } 3785 } 3786 3787 3788 /* 3789 * ndp_walk routine to delete all entries that have a given destination or 3790 * gateway address and cached link layer (MAC) address. This is used when ARP 3791 * informs us that a network-to-link-layer mapping may have changed. 3792 */ 3793 void 3794 nce_delete_hw_changed(nce_t *nce, void *arg) 3795 { 3796 nce_hw_map_t *hwm = arg; 3797 mblk_t *mp; 3798 dl_unitdata_req_t *dlu; 3799 uchar_t *macaddr; 3800 ill_t *ill; 3801 int saplen; 3802 ipaddr_t nce_addr; 3803 3804 if (nce->nce_state != ND_REACHABLE) 3805 return; 3806 3807 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3808 if (nce_addr != hwm->hwm_addr) 3809 return; 3810 3811 mutex_enter(&nce->nce_lock); 3812 if ((mp = nce->nce_res_mp) == NULL) { 3813 mutex_exit(&nce->nce_lock); 3814 return; 3815 } 3816 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3817 macaddr = (uchar_t *)(dlu + 1); 3818 ill = nce->nce_ill; 3819 if ((saplen = ill->ill_sap_length) > 0) 3820 macaddr += saplen; 3821 else 3822 saplen = -saplen; 3823 3824 /* 3825 * If the hardware address is unchanged, then leave this one alone. 3826 * Note that saplen == abs(saplen) now. 3827 */ 3828 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 3829 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 3830 mutex_exit(&nce->nce_lock); 3831 return; 3832 } 3833 mutex_exit(&nce->nce_lock); 3834 3835 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 3836 ndp_delete(nce); 3837 } 3838 3839 /* 3840 * This function verifies whether a given IPv4 address is potentially known to 3841 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 3842 * so that it can continue to look for hardware changes on that address. 3843 */ 3844 boolean_t 3845 ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns) 3846 { 3847 nce_t *nce; 3848 struct in_addr nceaddr; 3849 ip_stack_t *ipst = ns->netstack_ip; 3850 3851 if (addr == INADDR_ANY) 3852 return (B_FALSE); 3853 3854 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3855 nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr); 3856 for (; nce != NULL; nce = nce->nce_next) { 3857 /* Note that only v4 mapped entries are in the table. */ 3858 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 3859 if (addr == nceaddr.s_addr && 3860 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 3861 /* Single flag check; no lock needed */ 3862 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 3863 break; 3864 } 3865 } 3866 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3867 return (nce != NULL); 3868 } 3869