1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/sysmacros.h> 33 #include <sys/errno.h> 34 #include <sys/dlpi.h> 35 #include <sys/socket.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/vtrace.h> 41 #include <sys/kmem.h> 42 #include <sys/zone.h> 43 #include <sys/ethernet.h> 44 #include <sys/sdt.h> 45 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/if_dl.h> 49 #include <net/route.h> 50 #include <netinet/in.h> 51 #include <netinet/ip6.h> 52 #include <netinet/icmp6.h> 53 54 #include <inet/common.h> 55 #include <inet/mi.h> 56 #include <inet/mib2.h> 57 #include <inet/nd.h> 58 #include <inet/ip.h> 59 #include <inet/ip_if.h> 60 #include <inet/ip_ire.h> 61 #include <inet/ip_rts.h> 62 #include <inet/ip6.h> 63 #include <inet/ip_ndp.h> 64 #include <inet/ipsec_impl.h> 65 #include <inet/ipsec_info.h> 66 #include <inet/sctp_ip.h> 67 68 /* 69 * Function names with nce_ prefix are static while function 70 * names with ndp_ prefix are used by rest of the IP. 71 * 72 * Lock ordering: 73 * 74 * ndp_g_lock -> ill_lock -> nce_lock 75 * 76 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 77 * nce_next. Nce_lock protects the contents of the NCE (particularly 78 * nce_refcnt). 79 */ 80 81 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 82 uint32_t ll_addr_len); 83 static void nce_fastpath(nce_t *nce); 84 static void nce_ire_delete(nce_t *nce); 85 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 86 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 87 static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *, nce_t *); 88 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 89 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 90 uchar_t *addr); 91 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 92 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 93 static void nce_report1(nce_t *nce, uchar_t *mp_arg); 94 static mblk_t *nce_udreq_alloc(ill_t *ill); 95 static void nce_update(nce_t *nce, uint16_t new_state, 96 uchar_t *new_ll_addr); 97 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 98 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 99 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 100 const in6_addr_t *target, int flag); 101 extern void th_trace_rrecord(th_trace_t *); 102 static int ndp_lookup_then_add_v6(ill_t *, uchar_t *, 103 const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, 104 uint32_t, uint16_t, uint16_t, nce_t **, mblk_t *, mblk_t *); 105 static int ndp_lookup_then_add_v4(ill_t *, uchar_t *, 106 const in_addr_t *, const in_addr_t *, const in_addr_t *, 107 uint32_t, uint16_t, uint16_t, nce_t **, mblk_t *, mblk_t *); 108 static int ndp_add_v6(ill_t *, uchar_t *, const in6_addr_t *, 109 const in6_addr_t *, const in6_addr_t *, uint32_t, uint16_t, uint16_t, 110 nce_t **); 111 static int ndp_add_v4(ill_t *, uchar_t *, const in_addr_t *, 112 const in_addr_t *, const in_addr_t *, uint32_t, uint16_t, uint16_t, 113 nce_t **, mblk_t *, mblk_t *); 114 115 116 #ifdef NCE_DEBUG 117 void nce_trace_inactive(nce_t *); 118 #endif 119 120 ndp_g_t ndp4, ndp6; 121 122 #define NCE_HASH_PTR_V4(addr) \ 123 (&(ndp4.nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 124 125 #define NCE_HASH_PTR_V6(addr) \ 126 (&(ndp6.nce_hash_tbl[NCE_ADDR_HASH_V6(addr, NCE_TABLE_SIZE)])) 127 128 /* 129 * Compute default flags to use for an advertisement of this nce's address. 130 */ 131 static int 132 nce_advert_flags(const nce_t *nce) 133 { 134 int flag = 0; 135 136 if (nce->nce_flags & NCE_F_ISROUTER) 137 flag |= NDP_ISROUTER; 138 if (!(nce->nce_flags & NCE_F_PROXY)) 139 flag |= NDP_ORIDE; 140 return (flag); 141 } 142 143 int 144 ndp_add(ill_t *ill, uchar_t *hw_addr, const void *addr, 145 const void *mask, const void *extract_mask, 146 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 147 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 148 { 149 int status; 150 151 if (ill->ill_isv6) 152 status = ndp_add_v6(ill, hw_addr, (in6_addr_t *)addr, 153 (in6_addr_t *)mask, (in6_addr_t *)extract_mask, 154 hw_extract_start, flags, state, newnce); 155 else 156 status = ndp_add_v4(ill, hw_addr, (in_addr_t *)addr, 157 (in_addr_t *)mask, (in_addr_t *)extract_mask, 158 hw_extract_start, flags, state, newnce, fp_mp, res_mp); 159 return (status); 160 } 161 162 /* Non-tunable probe interval, based on link capabilities */ 163 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 164 165 /* 166 * NDP Cache Entry creation routine. 167 * Mapped entries will never do NUD . 168 * This routine must always be called with ndp6.ndp_g_lock held. 169 * Prior to return, nce_refcnt is incremented. 170 */ 171 static int 172 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 173 const in6_addr_t *mask, const in6_addr_t *extract_mask, 174 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 175 nce_t **newnce) 176 { 177 static nce_t nce_nil; 178 nce_t *nce; 179 mblk_t *mp; 180 mblk_t *template; 181 nce_t **ncep; 182 int err; 183 boolean_t dropped = B_FALSE; 184 185 ASSERT(MUTEX_HELD(&ndp6.ndp_g_lock)); 186 ASSERT(ill != NULL && ill->ill_isv6); 187 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 188 ip0dbg(("ndp_add: no addr\n")); 189 return (EINVAL); 190 } 191 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 192 ip0dbg(("ndp_add: flags = %x\n", (int)flags)); 193 return (EINVAL); 194 } 195 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 196 (flags & NCE_F_MAPPING)) { 197 ip0dbg(("ndp_add: extract mask zero for mapping")); 198 return (EINVAL); 199 } 200 /* 201 * Allocate the mblk to hold the nce. 202 * 203 * XXX This can come out of a separate cache - nce_cache. 204 * We don't need the mp anymore as there are no more 205 * "qwriter"s 206 */ 207 mp = allocb(sizeof (nce_t), BPRI_MED); 208 if (mp == NULL) 209 return (ENOMEM); 210 211 nce = (nce_t *)mp->b_rptr; 212 mp->b_wptr = (uchar_t *)&nce[1]; 213 *nce = nce_nil; 214 215 /* 216 * This one holds link layer address 217 */ 218 if (ill->ill_net_type == IRE_IF_RESOLVER) { 219 template = nce_udreq_alloc(ill); 220 } else { 221 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 222 ASSERT((ill->ill_resolver_mp != NULL)); 223 template = copyb(ill->ill_resolver_mp); 224 } 225 if (template == NULL) { 226 freeb(mp); 227 return (ENOMEM); 228 } 229 nce->nce_ill = ill; 230 nce->nce_ipversion = IPV6_VERSION; 231 nce->nce_flags = flags; 232 nce->nce_state = state; 233 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 234 nce->nce_rcnt = ill->ill_xmit_count; 235 nce->nce_addr = *addr; 236 nce->nce_mask = *mask; 237 nce->nce_extract_mask = *extract_mask; 238 nce->nce_ll_extract_start = hw_extract_start; 239 nce->nce_fp_mp = NULL; 240 nce->nce_res_mp = template; 241 if (state == ND_REACHABLE) 242 nce->nce_last = TICK_TO_MSEC(lbolt64); 243 else 244 nce->nce_last = 0; 245 nce->nce_qd_mp = NULL; 246 nce->nce_mp = mp; 247 if (hw_addr != NULL) 248 nce_set_ll(nce, hw_addr); 249 /* This one is for nce getting created */ 250 nce->nce_refcnt = 1; 251 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 252 if (nce->nce_flags & NCE_F_MAPPING) { 253 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 254 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 255 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 256 ncep = &ndp6.nce_mask_entries; 257 } else { 258 ncep = ((nce_t **)NCE_HASH_PTR_V6(*addr)); 259 } 260 261 #ifdef NCE_DEBUG 262 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 263 #endif 264 /* 265 * Atomically ensure that the ill is not CONDEMNED, before 266 * adding the NCE. 267 */ 268 mutex_enter(&ill->ill_lock); 269 if (ill->ill_state_flags & ILL_CONDEMNED) { 270 mutex_exit(&ill->ill_lock); 271 freeb(mp); 272 freeb(template); 273 return (EINVAL); 274 } 275 if ((nce->nce_next = *ncep) != NULL) 276 nce->nce_next->nce_ptpn = &nce->nce_next; 277 *ncep = nce; 278 nce->nce_ptpn = ncep; 279 *newnce = nce; 280 /* This one is for nce being used by an active thread */ 281 NCE_REFHOLD(*newnce); 282 283 /* Bump up the number of nce's referencing this ill */ 284 ill->ill_nce_cnt++; 285 mutex_exit(&ill->ill_lock); 286 287 err = 0; 288 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 289 mutex_enter(&nce->nce_lock); 290 mutex_exit(&ndp6.ndp_g_lock); 291 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 292 mutex_exit(&nce->nce_lock); 293 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 294 &ipv6_all_zeros, addr, NDP_PROBE); 295 if (dropped) { 296 mutex_enter(&nce->nce_lock); 297 nce->nce_pcnt++; 298 mutex_exit(&nce->nce_lock); 299 } 300 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 301 mutex_enter(&ndp6.ndp_g_lock); 302 err = EINPROGRESS; 303 } else if (flags & NCE_F_UNSOL_ADV) { 304 /* 305 * We account for the transmit below by assigning one 306 * less than the ndd variable. Subsequent decrements 307 * are done in ndp_timer. 308 */ 309 mutex_enter(&nce->nce_lock); 310 mutex_exit(&ndp6.ndp_g_lock); 311 nce->nce_unsolicit_count = ip_ndp_unsolicit_count - 1; 312 mutex_exit(&nce->nce_lock); 313 dropped = nce_xmit(ill, 314 ND_NEIGHBOR_ADVERT, 315 ill, /* ill to be used for extracting ill_nd_lla */ 316 B_TRUE, /* use ill_nd_lla */ 317 addr, /* Source and target of the advertisement pkt */ 318 &ipv6_all_hosts_mcast, /* Destination of the packet */ 319 nce_advert_flags(nce)); 320 mutex_enter(&nce->nce_lock); 321 if (dropped) 322 nce->nce_unsolicit_count++; 323 if (nce->nce_unsolicit_count != 0) { 324 nce->nce_timeout_id = timeout(ndp_timer, nce, 325 MSEC_TO_TICK(ip_ndp_unsolicit_interval)); 326 } 327 mutex_exit(&nce->nce_lock); 328 mutex_enter(&ndp6.ndp_g_lock); 329 } 330 /* 331 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 332 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 333 * We call nce_fastpath from nce_update if the link layer address of 334 * the peer changes from nce_update 335 */ 336 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 337 nce_fastpath(nce); 338 return (err); 339 } 340 341 int 342 ndp_lookup_then_add(ill_t *ill, uchar_t *hw_addr, const void *addr, 343 const void *mask, const void *extract_mask, 344 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 345 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 346 { 347 int status; 348 349 if (ill->ill_isv6) { 350 status = ndp_lookup_then_add_v6(ill, hw_addr, 351 (in6_addr_t *)addr, (in6_addr_t *)mask, 352 (in6_addr_t *)extract_mask, hw_extract_start, flags, 353 state, newnce, fp_mp, res_mp); 354 } else { 355 status = ndp_lookup_then_add_v4(ill, hw_addr, 356 (in_addr_t *)addr, (in_addr_t *)mask, 357 (in_addr_t *)extract_mask, hw_extract_start, flags, 358 state, newnce, fp_mp, res_mp); 359 } 360 361 return (status); 362 } 363 364 static int 365 ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 366 const in6_addr_t *mask, const in6_addr_t *extract_mask, 367 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 368 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 369 { 370 int err = 0; 371 nce_t *nce; 372 373 ASSERT(ill != NULL && ill->ill_isv6); 374 mutex_enter(&ndp6.ndp_g_lock); 375 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); /* head of v6 hash table */ 376 nce = nce_lookup_addr(ill, addr, nce); 377 if (nce == NULL) { 378 err = ndp_add(ill, 379 hw_addr, 380 addr, 381 mask, 382 extract_mask, 383 hw_extract_start, 384 flags, 385 state, 386 newnce, 387 fp_mp, 388 res_mp); 389 } else { 390 *newnce = nce; 391 err = EEXIST; 392 } 393 mutex_exit(&ndp6.ndp_g_lock); 394 return (err); 395 } 396 397 /* 398 * Remove all the CONDEMNED nces from the appropriate hash table. 399 * We create a private list of NCEs, these may have ires pointing 400 * to them, so the list will be passed through to clean up dependent 401 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 402 */ 403 static void 404 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 405 { 406 nce_t *nce1; 407 nce_t **ptpn; 408 409 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 410 ASSERT(ndp->ndp_g_walker == 0); 411 for (; nce; nce = nce1) { 412 nce1 = nce->nce_next; 413 mutex_enter(&nce->nce_lock); 414 if (nce->nce_flags & NCE_F_CONDEMNED) { 415 ptpn = nce->nce_ptpn; 416 nce1 = nce->nce_next; 417 if (nce1 != NULL) 418 nce1->nce_ptpn = ptpn; 419 *ptpn = nce1; 420 nce->nce_ptpn = NULL; 421 nce->nce_next = NULL; 422 nce->nce_next = *free_nce_list; 423 *free_nce_list = nce; 424 } 425 mutex_exit(&nce->nce_lock); 426 } 427 } 428 429 /* 430 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 431 * will return this NCE. Also no new IREs will be created that 432 * point to this NCE (See ire_add_v6). Also no new timeouts will 433 * be started (See NDP_RESTART_TIMER). 434 * 2. Cancel any currently running timeouts. 435 * 3. If there is an ndp walker, return. The walker will do the cleanup. 436 * This ensures that walkers see a consistent list of NCEs while walking. 437 * 4. Otherwise remove the NCE from the list of NCEs 438 * 5. Delete all IREs pointing to this NCE. 439 */ 440 void 441 ndp_delete(nce_t *nce) 442 { 443 nce_t **ptpn; 444 nce_t *nce1; 445 int ipversion = nce->nce_ipversion; 446 ndp_g_t *ndp = (ipversion == IPV4_VERSION ? &ndp4 : &ndp6); 447 448 /* Serialize deletes */ 449 mutex_enter(&nce->nce_lock); 450 if (nce->nce_flags & NCE_F_CONDEMNED) { 451 /* Some other thread is doing the delete */ 452 mutex_exit(&nce->nce_lock); 453 return; 454 } 455 /* 456 * Caller has a refhold. Also 1 ref for being in the list. Thus 457 * refcnt has to be >= 2 458 */ 459 ASSERT(nce->nce_refcnt >= 2); 460 nce->nce_flags |= NCE_F_CONDEMNED; 461 mutex_exit(&nce->nce_lock); 462 463 nce_fastpath_list_delete(nce); 464 465 /* 466 * Cancel any running timer. Timeout can't be restarted 467 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 468 * Passing invalid timeout id is fine. 469 */ 470 if (nce->nce_timeout_id != 0) { 471 (void) untimeout(nce->nce_timeout_id); 472 nce->nce_timeout_id = 0; 473 } 474 475 mutex_enter(&ndp->ndp_g_lock); 476 if (nce->nce_ptpn == NULL) { 477 /* 478 * The last ndp walker has already removed this nce from 479 * the list after we marked the nce CONDEMNED and before 480 * we grabbed the global lock. 481 */ 482 mutex_exit(&ndp->ndp_g_lock); 483 return; 484 } 485 if (ndp->ndp_g_walker > 0) { 486 /* 487 * Can't unlink. The walker will clean up 488 */ 489 ndp->ndp_g_walker_cleanup = B_TRUE; 490 mutex_exit(&ndp->ndp_g_lock); 491 return; 492 } 493 494 /* 495 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 496 * the timer since it is marked CONDEMNED. 497 */ 498 ptpn = nce->nce_ptpn; 499 nce1 = nce->nce_next; 500 if (nce1 != NULL) 501 nce1->nce_ptpn = ptpn; 502 *ptpn = nce1; 503 nce->nce_ptpn = NULL; 504 nce->nce_next = NULL; 505 mutex_exit(&ndp->ndp_g_lock); 506 507 nce_ire_delete(nce); 508 } 509 510 void 511 ndp_inactive(nce_t *nce) 512 { 513 mblk_t **mpp; 514 ill_t *ill; 515 516 ASSERT(nce->nce_refcnt == 0); 517 ASSERT(MUTEX_HELD(&nce->nce_lock)); 518 ASSERT(nce->nce_fastpath == NULL); 519 520 /* Free all nce allocated messages */ 521 mpp = &nce->nce_first_mp_to_free; 522 do { 523 while (*mpp != NULL) { 524 mblk_t *mp; 525 526 mp = *mpp; 527 *mpp = mp->b_next; 528 mp->b_next = NULL; 529 mp->b_prev = NULL; 530 freemsg(mp); 531 } 532 } while (mpp++ != &nce->nce_last_mp_to_free); 533 534 #ifdef NCE_DEBUG 535 nce_trace_inactive(nce); 536 #endif 537 538 ill = nce->nce_ill; 539 mutex_enter(&ill->ill_lock); 540 ill->ill_nce_cnt--; 541 /* 542 * If the number of nce's associated with this ill have dropped 543 * to zero, check whether we need to restart any operation that 544 * is waiting for this to happen. 545 */ 546 if (ill->ill_nce_cnt == 0) { 547 /* ipif_ill_refrele_tail drops the ill_lock */ 548 ipif_ill_refrele_tail(ill); 549 } else { 550 mutex_exit(&ill->ill_lock); 551 } 552 mutex_destroy(&nce->nce_lock); 553 freeb(nce->nce_mp); 554 } 555 556 /* 557 * ndp_walk routine. Delete the nce if it is associated with the ill 558 * that is going away. Always called as a writer. 559 */ 560 void 561 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 562 { 563 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 564 ndp_delete(nce); 565 } 566 } 567 568 /* 569 * Walk a list of to be inactive NCEs and blow away all the ires. 570 */ 571 static void 572 nce_ire_delete_list(nce_t *nce) 573 { 574 nce_t *nce_next; 575 576 ASSERT(nce != NULL); 577 while (nce != NULL) { 578 nce_next = nce->nce_next; 579 nce->nce_next = NULL; 580 581 /* 582 * It is possible for the last ndp walker (this thread) 583 * to come here after ndp_delete has marked the nce CONDEMNED 584 * and before it has removed the nce from the fastpath list 585 * or called untimeout. So we need to do it here. It is safe 586 * for both ndp_delete and this thread to do it twice or 587 * even simultaneously since each of the threads has a 588 * reference on the nce. 589 */ 590 nce_fastpath_list_delete(nce); 591 /* 592 * Cancel any running timer. Timeout can't be restarted 593 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 594 * Passing invalid timeout id is fine. 595 */ 596 if (nce->nce_timeout_id != 0) { 597 (void) untimeout(nce->nce_timeout_id); 598 nce->nce_timeout_id = 0; 599 } 600 /* 601 * We might hit this func thus in the v4 case: 602 * ipif_down->ipif_ndp_down->ndp_walk 603 */ 604 605 if (nce->nce_ipversion == IPV4_VERSION) { 606 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 607 IRE_CACHE, nce_ire_delete1, 608 (char *)nce, nce->nce_ill); 609 } else { 610 ASSERT(nce->nce_ipversion == IPV6_VERSION); 611 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 612 IRE_CACHE, nce_ire_delete1, 613 (char *)nce, nce->nce_ill); 614 } 615 NCE_REFRELE_NOTR(nce); 616 nce = nce_next; 617 } 618 } 619 620 /* 621 * Delete an ire when the nce goes away. 622 */ 623 /* ARGSUSED */ 624 static void 625 nce_ire_delete(nce_t *nce) 626 { 627 if (nce->nce_ipversion == IPV6_VERSION) { 628 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 629 nce_ire_delete1, (char *)nce, nce->nce_ill); 630 NCE_REFRELE_NOTR(nce); 631 } else { 632 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 633 nce_ire_delete1, (char *)nce, nce->nce_ill); 634 NCE_REFRELE_NOTR(nce); 635 } 636 } 637 638 /* 639 * ire_walk routine used to delete every IRE that shares this nce 640 */ 641 static void 642 nce_ire_delete1(ire_t *ire, char *nce_arg) 643 { 644 nce_t *nce = (nce_t *)nce_arg; 645 646 ASSERT(ire->ire_type == IRE_CACHE); 647 648 if (ire->ire_nce == nce) { 649 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 650 ire_delete(ire); 651 } 652 } 653 654 /* 655 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 656 */ 657 boolean_t 658 ndp_restart_dad(nce_t *nce) 659 { 660 boolean_t started; 661 boolean_t dropped; 662 663 if (nce == NULL) 664 return (B_FALSE); 665 mutex_enter(&nce->nce_lock); 666 if (nce->nce_state == ND_PROBE) { 667 mutex_exit(&nce->nce_lock); 668 started = B_TRUE; 669 } else if (nce->nce_state == ND_REACHABLE) { 670 nce->nce_state = ND_PROBE; 671 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 672 mutex_exit(&nce->nce_lock); 673 dropped = nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, NULL, 674 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, NDP_PROBE); 675 if (dropped) { 676 mutex_enter(&nce->nce_lock); 677 nce->nce_pcnt++; 678 mutex_exit(&nce->nce_lock); 679 } 680 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 681 started = B_TRUE; 682 } else { 683 mutex_exit(&nce->nce_lock); 684 started = B_FALSE; 685 } 686 return (started); 687 } 688 689 /* 690 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 691 * If one is found, the refcnt on the nce will be incremented. 692 */ 693 nce_t * 694 ndp_lookup_v6(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 695 { 696 nce_t *nce; 697 698 ASSERT(ill != NULL && ill->ill_isv6); 699 if (!caller_holds_lock) { 700 mutex_enter(&ndp6.ndp_g_lock); 701 } 702 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); /* head of v6 hash table */ 703 nce = nce_lookup_addr(ill, addr, nce); 704 if (nce == NULL) 705 nce = nce_lookup_mapping(ill, addr); 706 if (!caller_holds_lock) 707 mutex_exit(&ndp6.ndp_g_lock); 708 return (nce); 709 } 710 /* 711 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 712 * If one is found, the refcnt on the nce will be incremented. 713 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 714 * so we skip the nce_lookup_mapping call. 715 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 716 */ 717 nce_t * 718 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 719 { 720 nce_t *nce; 721 in6_addr_t addr6; 722 723 if (!caller_holds_lock) { 724 mutex_enter(&ndp4.ndp_g_lock); 725 } 726 nce = *((nce_t **)NCE_HASH_PTR_V4(*addr)); /* head of v6 hash table */ 727 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 728 nce = nce_lookup_addr(ill, &addr6, nce); 729 if (!caller_holds_lock) 730 mutex_exit(&ndp4.ndp_g_lock); 731 return (nce); 732 } 733 734 /* 735 * Cache entry lookup. Try to find an nce matching the parameters passed. 736 * Look only for exact entries (no mappings). If an nce is found, increment 737 * the hold count on that nce. The caller passes in the start of the 738 * appropriate hash table, and must be holding the appropriate global 739 * lock (ndp_g_lock). 740 */ 741 static nce_t * 742 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr, nce_t *nce) 743 { 744 ndp_g_t *ndp = (ill->ill_isv6 ? &ndp6 : &ndp4); 745 746 ASSERT(ill != NULL); 747 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 748 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 749 return (NULL); 750 for (; nce != NULL; nce = nce->nce_next) { 751 if (nce->nce_ill == ill) { 752 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 753 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 754 &ipv6_all_ones)) { 755 mutex_enter(&nce->nce_lock); 756 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 757 NCE_REFHOLD_LOCKED(nce); 758 mutex_exit(&nce->nce_lock); 759 break; 760 } 761 mutex_exit(&nce->nce_lock); 762 } 763 } 764 } 765 return (nce); 766 } 767 768 /* 769 * Cache entry lookup. Try to find an nce matching the parameters passed. 770 * Look only for mappings. 771 */ 772 static nce_t * 773 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 774 { 775 nce_t *nce; 776 777 ASSERT(ill != NULL && ill->ill_isv6); 778 ASSERT(MUTEX_HELD(&ndp6.ndp_g_lock)); 779 if (!IN6_IS_ADDR_MULTICAST(addr)) 780 return (NULL); 781 nce = ndp6.nce_mask_entries; 782 for (; nce != NULL; nce = nce->nce_next) 783 if (nce->nce_ill == ill && 784 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 785 mutex_enter(&nce->nce_lock); 786 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 787 NCE_REFHOLD_LOCKED(nce); 788 mutex_exit(&nce->nce_lock); 789 break; 790 } 791 mutex_exit(&nce->nce_lock); 792 } 793 return (nce); 794 } 795 796 /* 797 * Process passed in parameters either from an incoming packet or via 798 * user ioctl. 799 */ 800 void 801 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 802 { 803 ill_t *ill = nce->nce_ill; 804 uint32_t hw_addr_len = ill->ill_nd_lla_len; 805 mblk_t *mp; 806 boolean_t ll_updated = B_FALSE; 807 boolean_t ll_changed; 808 809 ASSERT(nce->nce_ipversion == IPV6_VERSION); 810 /* 811 * No updates of link layer address or the neighbor state is 812 * allowed, when the cache is in NONUD state. This still 813 * allows for responding to reachability solicitation. 814 */ 815 mutex_enter(&nce->nce_lock); 816 if (nce->nce_state == ND_INCOMPLETE) { 817 if (hw_addr == NULL) { 818 mutex_exit(&nce->nce_lock); 819 return; 820 } 821 nce_set_ll(nce, hw_addr); 822 /* 823 * Update nce state and send the queued packets 824 * back to ip this time ire will be added. 825 */ 826 if (flag & ND_NA_FLAG_SOLICITED) { 827 nce_update(nce, ND_REACHABLE, NULL); 828 } else { 829 nce_update(nce, ND_STALE, NULL); 830 } 831 mutex_exit(&nce->nce_lock); 832 nce_fastpath(nce); 833 mutex_enter(&nce->nce_lock); 834 mp = nce->nce_qd_mp; 835 nce->nce_qd_mp = NULL; 836 mutex_exit(&nce->nce_lock); 837 while (mp != NULL) { 838 mblk_t *nxt_mp; 839 840 nxt_mp = mp->b_next; 841 mp->b_next = NULL; 842 if (mp->b_prev != NULL) { 843 ill_t *inbound_ill; 844 queue_t *fwdq = NULL; 845 uint_t ifindex; 846 847 ifindex = (uint_t)(uintptr_t)mp->b_prev; 848 inbound_ill = ill_lookup_on_ifindex(ifindex, 849 B_TRUE, NULL, NULL, NULL, NULL); 850 if (inbound_ill == NULL) { 851 mp->b_prev = NULL; 852 freemsg(mp); 853 return; 854 } else { 855 fwdq = inbound_ill->ill_rq; 856 } 857 mp->b_prev = NULL; 858 /* 859 * Send a forwarded packet back into ip_rput_v6 860 * just as in ire_send_v6(). 861 * Extract the queue from b_prev (set in 862 * ip_rput_data_v6). 863 */ 864 if (fwdq != NULL) { 865 /* 866 * Forwarded packets hop count will 867 * get decremented in ip_rput_data_v6 868 */ 869 put(fwdq, mp); 870 } else { 871 /* 872 * Send locally originated packets back 873 * into * ip_wput_v6. 874 */ 875 put(ill->ill_wq, mp); 876 } 877 ill_refrele(inbound_ill); 878 } else { 879 put(ill->ill_wq, mp); 880 } 881 mp = nxt_mp; 882 } 883 return; 884 } 885 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 886 if (!is_adv) { 887 /* If this is a SOLICITATION request only */ 888 if (ll_changed) 889 nce_update(nce, ND_STALE, hw_addr); 890 mutex_exit(&nce->nce_lock); 891 return; 892 } 893 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 894 /* If in any other state than REACHABLE, ignore */ 895 if (nce->nce_state == ND_REACHABLE) { 896 nce_update(nce, ND_STALE, NULL); 897 } 898 mutex_exit(&nce->nce_lock); 899 return; 900 } else { 901 if (ll_changed) { 902 nce_update(nce, ND_UNCHANGED, hw_addr); 903 ll_updated = B_TRUE; 904 } 905 if (flag & ND_NA_FLAG_SOLICITED) { 906 nce_update(nce, ND_REACHABLE, NULL); 907 } else { 908 if (ll_updated) { 909 nce_update(nce, ND_STALE, NULL); 910 } 911 } 912 mutex_exit(&nce->nce_lock); 913 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 914 NCE_F_ISROUTER)) { 915 ire_t *ire; 916 917 /* 918 * Router turned to host. We need to remove the 919 * entry as well as any default route that may be 920 * using this as a next hop. This is required by 921 * section 7.2.5 of RFC 2461. 922 */ 923 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 924 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 925 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 926 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 927 MATCH_IRE_DEFAULT); 928 if (ire != NULL) { 929 ip_rts_rtmsg(RTM_DELETE, ire, 0); 930 ire_delete(ire); 931 ire_refrele(ire); 932 } 933 ndp_delete(nce); 934 } 935 } 936 } 937 938 /* 939 * Pass arg1 to the pfi supplied, along with each nce in existence. 940 * ndp_walk() places a REFHOLD on the nce and drops the lock when 941 * walking the hash list. 942 */ 943 void 944 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 945 boolean_t trace) 946 { 947 948 nce_t *nce; 949 nce_t *nce1; 950 nce_t **ncep; 951 nce_t *free_nce_list = NULL; 952 953 mutex_enter(&ndp->ndp_g_lock); 954 /* Prevent ndp_delete from unlink and free of NCE */ 955 ndp->ndp_g_walker++; 956 mutex_exit(&ndp->ndp_g_lock); 957 for (ncep = ndp->nce_hash_tbl; 958 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 959 for (nce = *ncep; nce != NULL; nce = nce1) { 960 nce1 = nce->nce_next; 961 if (ill == NULL || nce->nce_ill == ill) { 962 if (trace) { 963 NCE_REFHOLD(nce); 964 (*pfi)(nce, arg1); 965 NCE_REFRELE(nce); 966 } else { 967 NCE_REFHOLD_NOTR(nce); 968 (*pfi)(nce, arg1); 969 NCE_REFRELE_NOTR(nce); 970 } 971 } 972 } 973 } 974 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 975 nce1 = nce->nce_next; 976 if (ill == NULL || nce->nce_ill == ill) { 977 if (trace) { 978 NCE_REFHOLD(nce); 979 (*pfi)(nce, arg1); 980 NCE_REFRELE(nce); 981 } else { 982 NCE_REFHOLD_NOTR(nce); 983 (*pfi)(nce, arg1); 984 NCE_REFRELE_NOTR(nce); 985 } 986 } 987 } 988 mutex_enter(&ndp->ndp_g_lock); 989 ndp->ndp_g_walker--; 990 /* 991 * While NCE's are removed from global list they are placed 992 * in a private list, to be passed to nce_ire_delete_list(). 993 * The reason is, there may be ires pointing to this nce 994 * which needs to cleaned up. 995 */ 996 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 997 /* Time to delete condemned entries */ 998 for (ncep = ndp->nce_hash_tbl; 999 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 1000 nce = *ncep; 1001 if (nce != NULL) { 1002 nce_remove(ndp, nce, &free_nce_list); 1003 } 1004 } 1005 nce = ndp->nce_mask_entries; 1006 if (nce != NULL) { 1007 nce_remove(ndp, nce, &free_nce_list); 1008 } 1009 ndp->ndp_g_walker_cleanup = B_FALSE; 1010 } 1011 mutex_exit(&ndp->ndp_g_lock); 1012 1013 if (free_nce_list != NULL) { 1014 nce_ire_delete_list(free_nce_list); 1015 } 1016 } 1017 1018 void 1019 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1) 1020 { 1021 ndp_walk_common(&ndp4, ill, pfi, arg1, B_TRUE); 1022 ndp_walk_common(&ndp6, ill, pfi, arg1, B_TRUE); 1023 } 1024 1025 /* 1026 * Prepend the zoneid using an ipsec_out_t for later use by functions like 1027 * ip_rput_v6() after neighbor discovery has taken place. If the message 1028 * block already has a M_CTL at the front of it, then simply set the zoneid 1029 * appropriately. 1030 */ 1031 static mblk_t * 1032 ndp_prepend_zone(mblk_t *mp, zoneid_t zoneid) 1033 { 1034 mblk_t *first_mp; 1035 ipsec_out_t *io; 1036 1037 ASSERT(zoneid != ALL_ZONES); 1038 if (mp->b_datap->db_type == M_CTL) { 1039 io = (ipsec_out_t *)mp->b_rptr; 1040 ASSERT(io->ipsec_out_type == IPSEC_OUT); 1041 io->ipsec_out_zoneid = zoneid; 1042 return (mp); 1043 } 1044 1045 first_mp = ipsec_alloc_ipsec_out(); 1046 if (first_mp == NULL) 1047 return (NULL); 1048 io = (ipsec_out_t *)first_mp->b_rptr; 1049 /* This is not a secure packet */ 1050 io->ipsec_out_secure = B_FALSE; 1051 io->ipsec_out_zoneid = zoneid; 1052 first_mp->b_cont = mp; 1053 return (first_mp); 1054 } 1055 1056 /* 1057 * Process resolve requests. Handles both mapped entries 1058 * as well as cases that needs to be send out on the wire. 1059 * Lookup a NCE for a given IRE. Regardless of whether one exists 1060 * or one is created, we defer making ire point to nce until the 1061 * ire is actually added at which point the nce_refcnt on the nce is 1062 * incremented. This is done primarily to have symmetry between ire_add() 1063 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1064 */ 1065 int 1066 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1067 { 1068 nce_t *nce; 1069 int err = 0; 1070 uint32_t ms; 1071 mblk_t *mp_nce = NULL; 1072 1073 ASSERT(ill != NULL); 1074 ASSERT(ill->ill_isv6); 1075 if (IN6_IS_ADDR_MULTICAST(dst)) { 1076 err = nce_set_multicast(ill, dst); 1077 return (err); 1078 } 1079 err = ndp_lookup_then_add(ill, 1080 NULL, /* No hardware address */ 1081 dst, 1082 &ipv6_all_ones, 1083 &ipv6_all_zeros, 1084 0, 1085 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1086 ND_INCOMPLETE, 1087 &nce, 1088 NULL, /* let ndp_add figure out fastpath mp and dlureq_mp for v6 */ 1089 NULL); 1090 1091 switch (err) { 1092 case 0: 1093 /* 1094 * New cache entry was created. Make sure that the state 1095 * is not ND_INCOMPLETE. It can be in some other state 1096 * even before we send out the solicitation as we could 1097 * get un-solicited advertisements. 1098 * 1099 * If this is an XRESOLV interface, simply return 0, 1100 * since we don't want to solicit just yet. 1101 */ 1102 if (ill->ill_flags & ILLF_XRESOLV) { 1103 NCE_REFRELE(nce); 1104 return (0); 1105 } 1106 rw_enter(&ill_g_lock, RW_READER); 1107 mutex_enter(&nce->nce_lock); 1108 if (nce->nce_state != ND_INCOMPLETE) { 1109 mutex_exit(&nce->nce_lock); 1110 rw_exit(&ill_g_lock); 1111 NCE_REFRELE(nce); 1112 return (0); 1113 } 1114 mp_nce = ndp_prepend_zone(mp, zoneid); 1115 if (mp_nce == NULL) { 1116 /* The caller will free mp */ 1117 mutex_exit(&nce->nce_lock); 1118 rw_exit(&ill_g_lock); 1119 ndp_delete(nce); 1120 NCE_REFRELE(nce); 1121 return (ENOMEM); 1122 } 1123 ms = nce_solicit(nce, mp_nce); 1124 rw_exit(&ill_g_lock); 1125 if (ms == 0) { 1126 /* The caller will free mp */ 1127 if (mp_nce != mp) 1128 freeb(mp_nce); 1129 mutex_exit(&nce->nce_lock); 1130 ndp_delete(nce); 1131 NCE_REFRELE(nce); 1132 return (EBUSY); 1133 } 1134 mutex_exit(&nce->nce_lock); 1135 NDP_RESTART_TIMER(nce, (clock_t)ms); 1136 NCE_REFRELE(nce); 1137 return (EINPROGRESS); 1138 case EEXIST: 1139 /* Resolution in progress just queue the packet */ 1140 mutex_enter(&nce->nce_lock); 1141 if (nce->nce_state == ND_INCOMPLETE) { 1142 mp_nce = ndp_prepend_zone(mp, zoneid); 1143 if (mp_nce == NULL) { 1144 err = ENOMEM; 1145 } else { 1146 nce_queue_mp(nce, mp_nce); 1147 err = EINPROGRESS; 1148 } 1149 } else { 1150 /* 1151 * Any other state implies we have 1152 * a nce but IRE needs to be added ... 1153 * ire_add_v6() will take care of the 1154 * the case when the nce becomes CONDEMNED 1155 * before the ire is added to the table. 1156 */ 1157 err = 0; 1158 } 1159 mutex_exit(&nce->nce_lock); 1160 NCE_REFRELE(nce); 1161 break; 1162 default: 1163 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1164 break; 1165 } 1166 return (err); 1167 } 1168 1169 /* 1170 * When there is no resolver, the link layer template is passed in 1171 * the IRE. 1172 * Lookup a NCE for a given IRE. Regardless of whether one exists 1173 * or one is created, we defer making ire point to nce until the 1174 * ire is actually added at which point the nce_refcnt on the nce is 1175 * incremented. This is done primarily to have symmetry between ire_add() 1176 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1177 */ 1178 int 1179 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1180 { 1181 nce_t *nce; 1182 int err = 0; 1183 1184 ASSERT(ill != NULL); 1185 ASSERT(ill->ill_isv6); 1186 if (IN6_IS_ADDR_MULTICAST(dst)) { 1187 err = nce_set_multicast(ill, dst); 1188 return (err); 1189 } 1190 1191 err = ndp_lookup_then_add(ill, 1192 NULL, /* hardware address */ 1193 dst, 1194 &ipv6_all_ones, 1195 &ipv6_all_zeros, 1196 0, 1197 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1198 ND_REACHABLE, 1199 &nce, 1200 NULL, /* let ndp_add figure out fp_mp/dlureq_mp for v6 */ 1201 NULL); 1202 1203 switch (err) { 1204 case 0: 1205 /* 1206 * Cache entry with a proper resolver cookie was 1207 * created. 1208 */ 1209 NCE_REFRELE(nce); 1210 break; 1211 case EEXIST: 1212 err = 0; 1213 NCE_REFRELE(nce); 1214 break; 1215 default: 1216 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1217 break; 1218 } 1219 return (err); 1220 } 1221 1222 /* 1223 * For each interface an entry is added for the unspecified multicast group. 1224 * Here that mapping is used to form the multicast cache entry for a particular 1225 * multicast destination. 1226 */ 1227 static int 1228 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1229 { 1230 nce_t *mnce; /* Multicast mapping entry */ 1231 nce_t *nce; 1232 uchar_t *hw_addr = NULL; 1233 int err = 0; 1234 1235 ASSERT(ill != NULL); 1236 ASSERT(ill->ill_isv6); 1237 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1238 1239 mutex_enter(&ndp6.ndp_g_lock); 1240 nce = *((nce_t **)NCE_HASH_PTR_V6(*dst)); 1241 nce = nce_lookup_addr(ill, dst, nce); 1242 if (nce != NULL) { 1243 mutex_exit(&ndp6.ndp_g_lock); 1244 NCE_REFRELE(nce); 1245 return (0); 1246 } 1247 /* No entry, now lookup for a mapping this should never fail */ 1248 mnce = nce_lookup_mapping(ill, dst); 1249 if (mnce == NULL) { 1250 /* Something broken for the interface. */ 1251 mutex_exit(&ndp6.ndp_g_lock); 1252 return (ESRCH); 1253 } 1254 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1255 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1256 /* 1257 * For IRE_IF_RESOLVER a hardware mapping can be 1258 * generated, for IRE_IF_NORESOLVER, resolution cookie 1259 * in the ill is copied in ndp_add(). 1260 */ 1261 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1262 if (hw_addr == NULL) { 1263 mutex_exit(&ndp6.ndp_g_lock); 1264 NCE_REFRELE(mnce); 1265 return (ENOMEM); 1266 } 1267 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1268 } 1269 NCE_REFRELE(mnce); 1270 /* 1271 * IRE_IF_NORESOLVER type simply copies the resolution 1272 * cookie passed in. So no hw_addr is needed. 1273 */ 1274 err = ndp_add(ill, 1275 hw_addr, 1276 dst, 1277 &ipv6_all_ones, 1278 &ipv6_all_zeros, 1279 0, 1280 NCE_F_NONUD, 1281 ND_REACHABLE, 1282 &nce, 1283 NULL, 1284 NULL); 1285 mutex_exit(&ndp6.ndp_g_lock); 1286 if (hw_addr != NULL) 1287 kmem_free(hw_addr, ill->ill_nd_lla_len); 1288 if (err != 0) { 1289 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1290 return (err); 1291 } 1292 NCE_REFRELE(nce); 1293 return (0); 1294 } 1295 1296 /* 1297 * Return the link layer address, and any flags of a nce. 1298 */ 1299 int 1300 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1301 { 1302 nce_t *nce; 1303 in6_addr_t *addr; 1304 sin6_t *sin6; 1305 dl_unitdata_req_t *dl; 1306 1307 ASSERT(ill != NULL && ill->ill_isv6); 1308 sin6 = (sin6_t *)&lnr->lnr_addr; 1309 addr = &sin6->sin6_addr; 1310 1311 nce = ndp_lookup_v6(ill, addr, B_FALSE); 1312 if (nce == NULL) 1313 return (ESRCH); 1314 /* If in INCOMPLETE state, no link layer address is available yet */ 1315 if (nce->nce_state == ND_INCOMPLETE) 1316 goto done; 1317 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1318 if (ill->ill_flags & ILLF_XRESOLV) 1319 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1320 else 1321 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1322 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1323 sizeof (lnr->lnr_hdw_addr)); 1324 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1325 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1326 if (nce->nce_flags & NCE_F_ISROUTER) 1327 lnr->lnr_flags = NDF_ISROUTER_ON; 1328 if (nce->nce_flags & NCE_F_PROXY) 1329 lnr->lnr_flags |= NDF_PROXY_ON; 1330 if (nce->nce_flags & NCE_F_ANYCAST) 1331 lnr->lnr_flags |= NDF_ANYCAST_ON; 1332 done: 1333 NCE_REFRELE(nce); 1334 return (0); 1335 } 1336 1337 /* 1338 * Send Enable/Disable multicast reqs to driver. 1339 */ 1340 int 1341 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1342 uint32_t hw_addr_offset, mblk_t *mp) 1343 { 1344 nce_t *nce; 1345 uchar_t *hw_addr; 1346 1347 ASSERT(ill != NULL && ill->ill_isv6); 1348 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1349 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1350 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1351 freemsg(mp); 1352 return (EINVAL); 1353 } 1354 mutex_enter(&ndp6.ndp_g_lock); 1355 nce = nce_lookup_mapping(ill, addr); 1356 if (nce == NULL) { 1357 mutex_exit(&ndp6.ndp_g_lock); 1358 freemsg(mp); 1359 return (ESRCH); 1360 } 1361 mutex_exit(&ndp6.ndp_g_lock); 1362 /* 1363 * Update dl_addr_length and dl_addr_offset for primitives that 1364 * have physical addresses as opposed to full saps 1365 */ 1366 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1367 case DL_ENABMULTI_REQ: 1368 /* Track the state if this is the first enabmulti */ 1369 if (ill->ill_dlpi_multicast_state == IDMS_UNKNOWN) 1370 ill->ill_dlpi_multicast_state = IDMS_INPROGRESS; 1371 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1372 break; 1373 case DL_DISABMULTI_REQ: 1374 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1375 break; 1376 default: 1377 NCE_REFRELE(nce); 1378 ip1dbg(("ndp_mcastreq: default\n")); 1379 return (EINVAL); 1380 } 1381 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1382 NCE_REFRELE(nce); 1383 putnext(ill->ill_wq, mp); 1384 return (0); 1385 } 1386 1387 /* 1388 * Send a neighbor solicitation. 1389 * Returns number of milliseconds after which we should either rexmit or abort. 1390 * Return of zero means we should abort. 1391 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1392 * 1393 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1394 * the packet. 1395 * NOTE: This routine does not consume mp. 1396 */ 1397 uint32_t 1398 nce_solicit(nce_t *nce, mblk_t *mp) 1399 { 1400 ill_t *ill; 1401 ill_t *src_ill; 1402 ip6_t *ip6h; 1403 in6_addr_t src; 1404 in6_addr_t dst; 1405 ipif_t *ipif; 1406 ip6i_t *ip6i; 1407 boolean_t dropped = B_FALSE; 1408 1409 ASSERT(RW_READ_HELD(&ill_g_lock)); 1410 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1411 ill = nce->nce_ill; 1412 ASSERT(ill != NULL); 1413 1414 if (nce->nce_rcnt == 0) { 1415 return (0); 1416 } 1417 1418 if (mp == NULL) { 1419 ASSERT(nce->nce_qd_mp != NULL); 1420 mp = nce->nce_qd_mp; 1421 } else { 1422 nce_queue_mp(nce, mp); 1423 } 1424 1425 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1426 if (mp->b_datap->db_type == M_CTL) 1427 mp = mp->b_cont; 1428 1429 ip6h = (ip6_t *)mp->b_rptr; 1430 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1431 /* 1432 * This message should have been pulled up already in 1433 * ip_wput_v6. We can't do pullups here because the message 1434 * could be from the nce_qd_mp which could have b_next/b_prev 1435 * non-NULL. 1436 */ 1437 ip6i = (ip6i_t *)ip6h; 1438 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1439 sizeof (ip6i_t) + IPV6_HDR_LEN); 1440 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1441 } 1442 src = ip6h->ip6_src; 1443 /* 1444 * If the src of outgoing packet is one of the assigned interface 1445 * addresses use it, otherwise we will pick the source address below. 1446 */ 1447 src_ill = ill; 1448 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1449 if (ill->ill_group != NULL) 1450 src_ill = ill->ill_group->illgrp_ill; 1451 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1452 for (ipif = src_ill->ill_ipif; ipif != NULL; 1453 ipif = ipif->ipif_next) { 1454 if (IN6_ARE_ADDR_EQUAL(&src, 1455 &ipif->ipif_v6lcl_addr)) { 1456 break; 1457 } 1458 } 1459 if (ipif != NULL) 1460 break; 1461 } 1462 /* 1463 * If no relevant ipif can be found, then it's not one of our 1464 * addresses. Reset to :: and let nce_xmit. If an ipif can be 1465 * found, but it's not yet done with DAD verification, then 1466 * just postpone this transmission until later. 1467 */ 1468 if (src_ill == NULL) 1469 src = ipv6_all_zeros; 1470 else if (!ipif->ipif_addr_ready) 1471 return (ill->ill_reachable_retrans_time); 1472 } 1473 dst = nce->nce_addr; 1474 /* 1475 * If source address is unspecified, nce_xmit will choose 1476 * one for us and initialize the hardware address also 1477 * appropriately. 1478 */ 1479 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1480 src_ill = NULL; 1481 nce->nce_rcnt--; 1482 mutex_exit(&nce->nce_lock); 1483 rw_exit(&ill_g_lock); 1484 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1485 &dst, 0); 1486 rw_enter(&ill_g_lock, RW_READER); 1487 mutex_enter(&nce->nce_lock); 1488 if (dropped) 1489 nce->nce_rcnt++; 1490 return (ill->ill_reachable_retrans_time); 1491 } 1492 1493 /* 1494 * Attempt to recover an address on an interface that's been marked as a 1495 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1496 * no easy way to just probe the address and have the right thing happen if 1497 * it's no longer in use. Instead, we just bring it up normally and allow the 1498 * regular interface start-up logic to probe for a remaining duplicate and take 1499 * us back down if necessary. 1500 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1501 * ip_ndp_excl. 1502 */ 1503 /* ARGSUSED */ 1504 static void 1505 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1506 { 1507 ill_t *ill = rq->q_ptr; 1508 ipif_t *ipif; 1509 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1510 1511 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1512 /* 1513 * We do not support recovery of proxy ARP'd interfaces, 1514 * because the system lacks a complete proxy ARP mechanism. 1515 */ 1516 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1517 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1518 continue; 1519 } 1520 1521 /* 1522 * If we have already recovered, then ignore. 1523 */ 1524 mutex_enter(&ill->ill_lock); 1525 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) { 1526 mutex_exit(&ill->ill_lock); 1527 continue; 1528 } 1529 1530 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1531 ill->ill_ipif_dup_count--; 1532 mutex_exit(&ill->ill_lock); 1533 ipif->ipif_was_dup = B_TRUE; 1534 1535 if (ipif_ndp_up(ipif, addr, B_FALSE) != EINPROGRESS) 1536 (void) ipif_up_done_v6(ipif); 1537 } 1538 freeb(mp); 1539 } 1540 1541 /* 1542 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1543 * As long as someone else holds the address, the interface will stay down. 1544 * When that conflict goes away, the interface is brought back up. This is 1545 * done so that accidental shutdowns of addresses aren't made permanent. Your 1546 * server will recover from a failure. 1547 * 1548 * For DHCP and temporary addresses, recovery is not done in the kernel. 1549 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1550 * 1551 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1552 */ 1553 static void 1554 ipif6_dup_recovery(void *arg) 1555 { 1556 ipif_t *ipif = arg; 1557 1558 ipif->ipif_recovery_id = 0; 1559 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1560 return; 1561 1562 /* If the link is down, we'll retry this later */ 1563 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1564 return; 1565 1566 ndp_do_recovery(ipif); 1567 } 1568 1569 /* 1570 * Perform interface recovery by forcing the duplicate interfaces up and 1571 * allowing the system to determine which ones should stay up. 1572 * 1573 * Called both by recovery timer expiry and link-up notification. 1574 */ 1575 void 1576 ndp_do_recovery(ipif_t *ipif) 1577 { 1578 ill_t *ill = ipif->ipif_ill; 1579 mblk_t *mp; 1580 1581 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1582 if (mp == NULL) { 1583 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1584 ipif, MSEC_TO_TICK(ip_dup_recovery)); 1585 } else { 1586 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1587 sizeof (ipif->ipif_v6lcl_addr)); 1588 ill_refhold(ill); 1589 (void) qwriter_ip(NULL, ill, ill->ill_rq, mp, ip_ndp_recover, 1590 CUR_OP, B_FALSE); 1591 } 1592 } 1593 1594 /* 1595 * Find the solicitation in the given message, and extract printable details 1596 * (MAC and IP addresses) from it. 1597 */ 1598 static nd_neighbor_solicit_t * 1599 ip_ndp_find_solicitation(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, char *hbuf, 1600 size_t hlen, char *sbuf, size_t slen, uchar_t **haddr) 1601 { 1602 nd_neighbor_solicit_t *ns; 1603 ip6_t *ip6h; 1604 uchar_t *addr; 1605 int alen; 1606 1607 alen = 0; 1608 ip6h = (ip6_t *)mp->b_rptr; 1609 if (dl_mp == NULL) { 1610 nd_opt_hdr_t *opt; 1611 int nslen; 1612 1613 /* 1614 * If it's from the fast-path, then it can't be a probe 1615 * message, and thus must include the source linkaddr option. 1616 * Extract that here. 1617 */ 1618 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1619 nslen = mp->b_wptr - (uchar_t *)ns; 1620 if ((nslen -= sizeof (*ns)) > 0) { 1621 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), nslen, 1622 ND_OPT_SOURCE_LINKADDR); 1623 if (opt != NULL && 1624 opt->nd_opt_len * 8 - sizeof (*opt) >= 1625 ill->ill_nd_lla_len) { 1626 addr = (uchar_t *)(opt + 1); 1627 alen = ill->ill_nd_lla_len; 1628 } 1629 } 1630 /* 1631 * We cheat a bit here for the sake of printing usable log 1632 * messages in the rare case where the reply we got was unicast 1633 * without a source linkaddr option, and the interface is in 1634 * fastpath mode. (Sigh.) 1635 */ 1636 if (alen == 0 && ill->ill_type == IFT_ETHER && 1637 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1638 struct ether_header *pether; 1639 1640 pether = (struct ether_header *)((char *)ip6h - 1641 sizeof (*pether)); 1642 addr = pether->ether_shost.ether_addr_octet; 1643 alen = ETHERADDRL; 1644 } 1645 } else { 1646 dl_unitdata_ind_t *dlu; 1647 1648 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1649 alen = dlu->dl_src_addr_length; 1650 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1651 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1652 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1653 if (ill->ill_sap_length < 0) { 1654 alen += ill->ill_sap_length; 1655 } else { 1656 addr += ill->ill_sap_length; 1657 alen -= ill->ill_sap_length; 1658 } 1659 } 1660 } 1661 if (alen > 0) { 1662 *haddr = addr; 1663 (void) mac_colon_addr(addr, alen, hbuf, hlen); 1664 } else { 1665 *haddr = NULL; 1666 (void) strcpy(hbuf, "?"); 1667 } 1668 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1669 (void) inet_ntop(AF_INET6, &ns->nd_ns_target, sbuf, slen); 1670 return (ns); 1671 } 1672 1673 /* 1674 * This is for exclusive changes due to NDP duplicate address detection 1675 * failure. 1676 */ 1677 /* ARGSUSED */ 1678 static void 1679 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1680 { 1681 ill_t *ill = rq->q_ptr; 1682 ipif_t *ipif; 1683 char ibuf[LIFNAMSIZ + 10]; /* 10 digits for logical i/f number */ 1684 char hbuf[MAC_STR_LEN]; 1685 char sbuf[INET6_ADDRSTRLEN]; 1686 nd_neighbor_solicit_t *ns; 1687 mblk_t *dl_mp = NULL; 1688 uchar_t *haddr; 1689 1690 if (DB_TYPE(mp) != M_DATA) { 1691 dl_mp = mp; 1692 mp = mp->b_cont; 1693 } 1694 ns = ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, sizeof (hbuf), sbuf, 1695 sizeof (sbuf), &haddr); 1696 if (haddr != NULL && 1697 bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) { 1698 /* 1699 * Ignore conflicts generated by misbehaving switches that just 1700 * reflect our own messages back to us. 1701 */ 1702 goto ignore_conflict; 1703 } 1704 (void) strlcpy(ibuf, ill->ill_name, sizeof (ibuf)); 1705 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1706 1707 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1708 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 1709 &ns->nd_ns_target)) { 1710 continue; 1711 } 1712 1713 /* If it's already marked, then don't do anything. */ 1714 if (ipif->ipif_flags & IPIF_DUPLICATE) 1715 continue; 1716 1717 /* 1718 * If this is a failure during duplicate recovery, then don't 1719 * complain. It may take a long time to recover. 1720 */ 1721 if (!ipif->ipif_was_dup) { 1722 if (ipif->ipif_id != 0) { 1723 (void) snprintf(ibuf + ill->ill_name_length - 1, 1724 sizeof (ibuf) - ill->ill_name_length + 1, 1725 ":%d", ipif->ipif_id); 1726 } 1727 cmn_err(CE_WARN, "%s has duplicate address %s (in " 1728 "use by %s); disabled", ibuf, sbuf, hbuf); 1729 } 1730 mutex_enter(&ill->ill_lock); 1731 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1732 ipif->ipif_flags |= IPIF_DUPLICATE; 1733 ill->ill_ipif_dup_count++; 1734 mutex_exit(&ill->ill_lock); 1735 (void) ipif_down(ipif, NULL, NULL); 1736 ipif_down_tail(ipif); 1737 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1738 ill->ill_net_type == IRE_IF_RESOLVER && 1739 ip_dup_recovery > 0) 1740 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1741 ipif, MSEC_TO_TICK(ip_dup_recovery)); 1742 } 1743 ignore_conflict: 1744 if (dl_mp != NULL) 1745 freeb(dl_mp); 1746 freemsg(mp); 1747 } 1748 1749 /* 1750 * Handle failure by tearing down the ipifs with the specified address. Note 1751 * that tearing down the ipif also means deleting the nce through ipif_down, so 1752 * it's not possible to do recovery by just restarting the nce timer. Instead, 1753 * we start a timer on the ipif. 1754 */ 1755 static void 1756 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1757 { 1758 if ((mp = copymsg(mp)) != NULL) { 1759 if (dl_mp == NULL) 1760 dl_mp = mp; 1761 else if ((dl_mp = copyb(dl_mp)) != NULL) 1762 dl_mp->b_cont = mp; 1763 if (dl_mp == NULL) { 1764 freemsg(mp); 1765 } else { 1766 ill_refhold(ill); 1767 (void) qwriter_ip(NULL, ill, ill->ill_rq, dl_mp, 1768 ip_ndp_excl, CUR_OP, B_FALSE); 1769 } 1770 } 1771 ndp_delete(nce); 1772 } 1773 1774 /* 1775 * Handle a discovered conflict: some other system is advertising that it owns 1776 * one of our IP addresses. We need to defend ourselves, or just shut down the 1777 * interface. 1778 */ 1779 static void 1780 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1781 { 1782 ipif_t *ipif; 1783 uint32_t now; 1784 uint_t maxdefense; 1785 uint_t defs; 1786 1787 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1788 NULL, NULL); 1789 if (ipif == NULL) 1790 return; 1791 /* 1792 * First, figure out if this address is disposable. 1793 */ 1794 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1795 maxdefense = ip_max_temp_defend; 1796 else 1797 maxdefense = ip_max_defend; 1798 1799 /* 1800 * Now figure out how many times we've defended ourselves. Ignore 1801 * defenses that happened long in the past. 1802 */ 1803 now = gethrestime_sec(); 1804 mutex_enter(&nce->nce_lock); 1805 if ((defs = nce->nce_defense_count) > 0 && 1806 now - nce->nce_defense_time > ip_defend_interval) { 1807 nce->nce_defense_count = defs = 0; 1808 } 1809 nce->nce_defense_count++; 1810 nce->nce_defense_time = now; 1811 mutex_exit(&nce->nce_lock); 1812 ipif_refrele(ipif); 1813 1814 /* 1815 * If we've defended ourselves too many times already, then give up and 1816 * tear down the interface(s) using this address. Otherwise, defend by 1817 * sending out an unsolicited Neighbor Advertisement. 1818 */ 1819 if (defs >= maxdefense) { 1820 ip_ndp_failure(ill, mp, dl_mp, nce); 1821 } else { 1822 char hbuf[MAC_STR_LEN]; 1823 char sbuf[INET6_ADDRSTRLEN]; 1824 uchar_t *haddr; 1825 1826 (void) ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, 1827 sizeof (hbuf), sbuf, sizeof (sbuf), &haddr); 1828 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1829 hbuf, sbuf, ill->ill_name); 1830 (void) nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, B_FALSE, 1831 &nce->nce_addr, &ipv6_all_hosts_mcast, 1832 nce_advert_flags(nce)); 1833 } 1834 } 1835 1836 static void 1837 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1838 { 1839 nd_neighbor_solicit_t *ns; 1840 uint32_t hlen = ill->ill_nd_lla_len; 1841 uchar_t *haddr = NULL; 1842 icmp6_t *icmp_nd; 1843 ip6_t *ip6h; 1844 nce_t *our_nce = NULL; 1845 in6_addr_t target; 1846 in6_addr_t src; 1847 int len; 1848 int flag = 0; 1849 nd_opt_hdr_t *opt = NULL; 1850 boolean_t bad_solicit = B_FALSE; 1851 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1852 1853 ip6h = (ip6_t *)mp->b_rptr; 1854 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1855 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1856 src = ip6h->ip6_src; 1857 ns = (nd_neighbor_solicit_t *)icmp_nd; 1858 target = ns->nd_ns_target; 1859 if (IN6_IS_ADDR_MULTICAST(&target)) { 1860 if (ip_debug > 2) { 1861 /* ip1dbg */ 1862 pr_addr_dbg("ndp_input_solicit: Target is" 1863 " multicast! %s\n", AF_INET6, &target); 1864 } 1865 bad_solicit = B_TRUE; 1866 goto done; 1867 } 1868 if (len > sizeof (nd_neighbor_solicit_t)) { 1869 /* Options present */ 1870 opt = (nd_opt_hdr_t *)&ns[1]; 1871 len -= sizeof (nd_neighbor_solicit_t); 1872 if (!ndp_verify_optlen(opt, len)) { 1873 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1874 bad_solicit = B_TRUE; 1875 goto done; 1876 } 1877 } 1878 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1879 /* Check to see if this is a valid DAD solicitation */ 1880 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1881 if (ip_debug > 2) { 1882 /* ip1dbg */ 1883 pr_addr_dbg("ndp_input_solicit: IPv6 " 1884 "Destination is not solicited node " 1885 "multicast %s\n", AF_INET6, 1886 &ip6h->ip6_dst); 1887 } 1888 bad_solicit = B_TRUE; 1889 goto done; 1890 } 1891 } 1892 1893 our_nce = ndp_lookup_v6(ill, &target, B_FALSE); 1894 /* 1895 * If this is a valid Solicitation, a permanent 1896 * entry should exist in the cache 1897 */ 1898 if (our_nce == NULL || 1899 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1900 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1901 "ifname=%s ", ill->ill_name)); 1902 if (ip_debug > 2) { 1903 /* ip1dbg */ 1904 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1905 } 1906 bad_solicit = B_TRUE; 1907 goto done; 1908 } 1909 1910 /* At this point we should have a verified NS per spec */ 1911 if (opt != NULL) { 1912 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1913 if (opt != NULL) { 1914 haddr = (uchar_t *)&opt[1]; 1915 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1916 hlen == 0) { 1917 ip1dbg(("ndp_input_advert: bad SLLA\n")); 1918 bad_solicit = B_TRUE; 1919 goto done; 1920 } 1921 } 1922 } 1923 1924 /* If sending directly to peer, set the unicast flag */ 1925 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 1926 flag |= NDP_UNICAST; 1927 1928 /* 1929 * Create/update the entry for the soliciting node. 1930 * or respond to outstanding queries, don't if 1931 * the source is unspecified address. 1932 */ 1933 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1934 int err; 1935 nce_t *nnce; 1936 1937 ASSERT(ill->ill_isv6); 1938 /* 1939 * Regular solicitations *must* include the Source Link-Layer 1940 * Address option. Ignore messages that do not. 1941 */ 1942 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1943 ip1dbg(("ndp_input_solicit: source link-layer address " 1944 "option missing with a specified source.\n")); 1945 bad_solicit = B_TRUE; 1946 goto done; 1947 } 1948 1949 /* 1950 * This is a regular solicitation. If we're still in the 1951 * process of verifying the address, then don't respond at all 1952 * and don't keep track of the sender. 1953 */ 1954 if (our_nce->nce_state == ND_PROBE) 1955 goto done; 1956 1957 /* 1958 * If the solicitation doesn't have sender hardware address 1959 * (legal for unicast solicitation), then process without 1960 * installing the return NCE. Either we already know it, or 1961 * we'll be forced to look it up when (and if) we reply to the 1962 * packet. 1963 */ 1964 if (haddr == NULL) 1965 goto no_source; 1966 1967 err = ndp_lookup_then_add(ill, 1968 haddr, 1969 &src, /* Soliciting nodes address */ 1970 &ipv6_all_ones, 1971 &ipv6_all_zeros, 1972 0, 1973 0, 1974 ND_STALE, 1975 &nnce, 1976 NULL, 1977 NULL); 1978 switch (err) { 1979 case 0: 1980 /* done with this entry */ 1981 NCE_REFRELE(nnce); 1982 break; 1983 case EEXIST: 1984 /* 1985 * B_FALSE indicates this is not an 1986 * an advertisement. 1987 */ 1988 ndp_process(nnce, haddr, 0, B_FALSE); 1989 NCE_REFRELE(nnce); 1990 break; 1991 default: 1992 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1993 err)); 1994 goto done; 1995 } 1996 no_source: 1997 flag |= NDP_SOLICITED; 1998 } else { 1999 /* 2000 * No source link layer address option should be present in a 2001 * valid DAD request. 2002 */ 2003 if (haddr != NULL) { 2004 ip1dbg(("ndp_input_solicit: source link-layer address " 2005 "option present with an unspecified source.\n")); 2006 bad_solicit = B_TRUE; 2007 goto done; 2008 } 2009 if (our_nce->nce_state == ND_PROBE) { 2010 /* 2011 * Internally looped-back probes won't have DLPI 2012 * attached to them. External ones (which are sent by 2013 * multicast) always will. Just ignore our own 2014 * transmissions. 2015 */ 2016 if (dl_mp != NULL) { 2017 /* 2018 * If someone else is probing our address, then 2019 * we've crossed wires. Declare failure. 2020 */ 2021 ip_ndp_failure(ill, mp, dl_mp, our_nce); 2022 } 2023 goto done; 2024 } 2025 /* 2026 * This is a DAD probe. Multicast the advertisement to the 2027 * all-nodes address. 2028 */ 2029 src = ipv6_all_hosts_mcast; 2030 } 2031 flag |= nce_advert_flags(our_nce); 2032 /* Response to a solicitation */ 2033 (void) nce_xmit(ill, 2034 ND_NEIGHBOR_ADVERT, 2035 ill, /* ill to be used for extracting ill_nd_lla */ 2036 B_TRUE, /* use ill_nd_lla */ 2037 &target, /* Source and target of the advertisement pkt */ 2038 &src, /* IP Destination (source of original pkt) */ 2039 flag); 2040 done: 2041 if (bad_solicit) 2042 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2043 if (our_nce != NULL) 2044 NCE_REFRELE(our_nce); 2045 } 2046 2047 void 2048 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2049 { 2050 nd_neighbor_advert_t *na; 2051 uint32_t hlen = ill->ill_nd_lla_len; 2052 uchar_t *haddr = NULL; 2053 icmp6_t *icmp_nd; 2054 ip6_t *ip6h; 2055 nce_t *dst_nce = NULL; 2056 in6_addr_t target; 2057 nd_opt_hdr_t *opt = NULL; 2058 int len; 2059 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2060 2061 ip6h = (ip6_t *)mp->b_rptr; 2062 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2063 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2064 na = (nd_neighbor_advert_t *)icmp_nd; 2065 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2066 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2067 ip1dbg(("ndp_input_advert: Target is multicast but the " 2068 "solicited flag is not zero\n")); 2069 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2070 return; 2071 } 2072 target = na->nd_na_target; 2073 if (IN6_IS_ADDR_MULTICAST(&target)) { 2074 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2075 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2076 return; 2077 } 2078 if (len > sizeof (nd_neighbor_advert_t)) { 2079 opt = (nd_opt_hdr_t *)&na[1]; 2080 if (!ndp_verify_optlen(opt, 2081 len - sizeof (nd_neighbor_advert_t))) { 2082 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2083 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2084 return; 2085 } 2086 /* At this point we have a verified NA per spec */ 2087 len -= sizeof (nd_neighbor_advert_t); 2088 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2089 if (opt != NULL) { 2090 haddr = (uchar_t *)&opt[1]; 2091 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2092 hlen == 0) { 2093 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2094 BUMP_MIB(mib, 2095 ipv6IfIcmpInBadNeighborAdvertisements); 2096 return; 2097 } 2098 } 2099 } 2100 2101 /* 2102 * If this interface is part of the group look at all the 2103 * ills in the group. 2104 */ 2105 rw_enter(&ill_g_lock, RW_READER); 2106 if (ill->ill_group != NULL) 2107 ill = ill->ill_group->illgrp_ill; 2108 2109 for (; ill != NULL; ill = ill->ill_group_next) { 2110 mutex_enter(&ill->ill_lock); 2111 if (!ILL_CAN_LOOKUP(ill)) { 2112 mutex_exit(&ill->ill_lock); 2113 continue; 2114 } 2115 ill_refhold_locked(ill); 2116 mutex_exit(&ill->ill_lock); 2117 dst_nce = ndp_lookup_v6(ill, &target, B_FALSE); 2118 /* We have to drop the lock since ndp_process calls put* */ 2119 rw_exit(&ill_g_lock); 2120 if (dst_nce != NULL) { 2121 if ((dst_nce->nce_flags & NCE_F_PERMANENT) && 2122 dst_nce->nce_state == ND_PROBE) { 2123 /* 2124 * Someone else sent an advertisement for an 2125 * address that we're trying to configure. 2126 * Tear it down. Note that dl_mp might be NULL 2127 * if we're getting a unicast reply. This 2128 * isn't typically done (multicast is the norm 2129 * in response to a probe), but ip_ndp_failure 2130 * will handle the dl_mp == NULL case as well. 2131 */ 2132 ip_ndp_failure(ill, mp, dl_mp, dst_nce); 2133 } else if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2134 /* 2135 * Someone just announced one of our local 2136 * addresses. If it wasn't us, then this is a 2137 * conflict. Defend the address or shut it 2138 * down. 2139 */ 2140 if (dl_mp != NULL && 2141 (haddr == NULL || 2142 nce_cmp_ll_addr(dst_nce, haddr, 2143 ill->ill_nd_lla_len))) { 2144 ip_ndp_conflict(ill, mp, dl_mp, 2145 dst_nce); 2146 } 2147 } else { 2148 if (na->nd_na_flags_reserved & 2149 ND_NA_FLAG_ROUTER) { 2150 dst_nce->nce_flags |= NCE_F_ISROUTER; 2151 } 2152 /* B_TRUE indicates this an advertisement */ 2153 ndp_process(dst_nce, haddr, 2154 na->nd_na_flags_reserved, B_TRUE); 2155 } 2156 NCE_REFRELE(dst_nce); 2157 } 2158 rw_enter(&ill_g_lock, RW_READER); 2159 ill_refrele(ill); 2160 } 2161 rw_exit(&ill_g_lock); 2162 } 2163 2164 /* 2165 * Process NDP neighbor solicitation/advertisement messages. 2166 * The checksum has already checked o.k before reaching here. 2167 */ 2168 void 2169 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2170 { 2171 icmp6_t *icmp_nd; 2172 ip6_t *ip6h; 2173 int len; 2174 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2175 2176 2177 if (!pullupmsg(mp, -1)) { 2178 ip1dbg(("ndp_input: pullupmsg failed\n")); 2179 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 2180 goto done; 2181 } 2182 ip6h = (ip6_t *)mp->b_rptr; 2183 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2184 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2185 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2186 goto done; 2187 } 2188 /* 2189 * NDP does not accept any extension headers between the 2190 * IP header and the ICMP header since e.g. a routing 2191 * header could be dangerous. 2192 * This assumes that any AH or ESP headers are removed 2193 * by ip prior to passing the packet to ndp_input. 2194 */ 2195 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2196 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2197 ip6h->ip6_nxt)); 2198 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2199 goto done; 2200 } 2201 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2202 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2203 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2204 if (icmp_nd->icmp6_code != 0) { 2205 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2206 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2207 goto done; 2208 } 2209 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2210 /* 2211 * Make sure packet length is large enough for either 2212 * a NS or a NA icmp packet. 2213 */ 2214 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2215 ip1dbg(("ndp_input: packet too short\n")); 2216 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2217 goto done; 2218 } 2219 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2220 ndp_input_solicit(ill, mp, dl_mp); 2221 } else { 2222 ndp_input_advert(ill, mp, dl_mp); 2223 } 2224 done: 2225 freemsg(mp); 2226 } 2227 2228 /* 2229 * nce_xmit is called to form and transmit a ND solicitation or 2230 * advertisement ICMP packet. 2231 * 2232 * If the source address is unspecified and this isn't a probe (used for 2233 * duplicate address detection), an appropriate source address and link layer 2234 * address will be chosen here. The link layer address option is included if 2235 * the source is specified (i.e., all non-probe packets), and omitted (per the 2236 * specification) otherwise. 2237 * 2238 * It returns B_FALSE only if it does a successful put() to the 2239 * corresponding ill's ill_wq otherwise returns B_TRUE. 2240 */ 2241 static boolean_t 2242 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 2243 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 2244 int flag) 2245 { 2246 uint32_t len; 2247 icmp6_t *icmp6; 2248 mblk_t *mp; 2249 ip6_t *ip6h; 2250 nd_opt_hdr_t *opt; 2251 uint_t plen; 2252 ip6i_t *ip6i; 2253 ipif_t *src_ipif = NULL; 2254 uint8_t *hw_addr; 2255 2256 /* 2257 * If we have a unspecified source(sender) address, select a 2258 * proper source address for the solicitation here itself so 2259 * that we can initialize the h/w address correctly. This is 2260 * needed for interface groups as source address can come from 2261 * the whole group and the h/w address initialized from ill will 2262 * be wrong if the source address comes from a different ill. 2263 * 2264 * Note that the NA never comes here with the unspecified source 2265 * address. The following asserts that whenever the source 2266 * address is specified, the haddr also should be specified. 2267 */ 2268 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 2269 2270 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2271 ASSERT(operation != ND_NEIGHBOR_ADVERT); 2272 /* 2273 * Pick a source address for this solicitation, but 2274 * restrict the selection to addresses assigned to the 2275 * output interface (or interface group). We do this 2276 * because the destination will create a neighbor cache 2277 * entry for the source address of this packet, so the 2278 * source address had better be a valid neighbor. 2279 */ 2280 src_ipif = ipif_select_source_v6(ill, target, RESTRICT_TO_ILL, 2281 IPV6_PREFER_SRC_DEFAULT, GLOBAL_ZONEID); 2282 if (src_ipif == NULL) { 2283 char buf[INET6_ADDRSTRLEN]; 2284 2285 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2286 inet_ntop(AF_INET6, (char *)target, buf, 2287 sizeof (buf)))); 2288 return (B_TRUE); 2289 } 2290 sender = &src_ipif->ipif_v6src_addr; 2291 hwaddr_ill = src_ipif->ipif_ill; 2292 } 2293 2294 /* 2295 * Always make sure that the NS/NA packets don't get load 2296 * spread. This is needed so that the probe packets sent 2297 * by the in.mpathd daemon can really go out on the desired 2298 * interface. Probe packets are made to go out on a desired 2299 * interface by including a ip6i with ATTACH_IF flag. As these 2300 * packets indirectly end up sending/receiving NS/NA packets 2301 * (neighbor doing NUD), we have to make sure that NA 2302 * also go out on the same interface. 2303 */ 2304 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7) / 8; 2305 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2306 plen * 8; 2307 mp = allocb(len, BPRI_LO); 2308 if (mp == NULL) { 2309 if (src_ipif != NULL) 2310 ipif_refrele(src_ipif); 2311 return (B_TRUE); 2312 } 2313 bzero((char *)mp->b_rptr, len); 2314 mp->b_wptr = mp->b_rptr + len; 2315 2316 ip6i = (ip6i_t *)mp->b_rptr; 2317 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2318 ip6i->ip6i_nxt = IPPROTO_RAW; 2319 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2320 if (flag & NDP_PROBE) 2321 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2322 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2323 2324 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2325 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2326 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2327 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2328 ip6h->ip6_hops = IPV6_MAX_HOPS; 2329 ip6h->ip6_dst = *target; 2330 icmp6 = (icmp6_t *)&ip6h[1]; 2331 2332 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2333 sizeof (nd_neighbor_advert_t)); 2334 2335 if (operation == ND_NEIGHBOR_SOLICIT) { 2336 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2337 2338 if (!(flag & NDP_PROBE)) 2339 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2340 ip6h->ip6_src = *sender; 2341 ns->nd_ns_target = *target; 2342 if (!(flag & NDP_UNICAST)) { 2343 /* Form multicast address of the target */ 2344 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2345 ip6h->ip6_dst.s6_addr32[3] |= 2346 ns->nd_ns_target.s6_addr32[3]; 2347 } 2348 } else { 2349 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2350 2351 ASSERT(!(flag & NDP_PROBE)); 2352 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2353 ip6h->ip6_src = *sender; 2354 na->nd_na_target = *sender; 2355 if (flag & NDP_ISROUTER) 2356 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2357 if (flag & NDP_SOLICITED) 2358 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2359 if (flag & NDP_ORIDE) 2360 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2361 } 2362 2363 hw_addr = NULL; 2364 if (!(flag & NDP_PROBE)) { 2365 mutex_enter(&hwaddr_ill->ill_lock); 2366 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2367 hwaddr_ill->ill_phys_addr; 2368 if (hw_addr != NULL) { 2369 /* Fill in link layer address and option len */ 2370 opt->nd_opt_len = (uint8_t)plen; 2371 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2372 } 2373 mutex_exit(&hwaddr_ill->ill_lock); 2374 } 2375 if (hw_addr == NULL) { 2376 /* If there's no link layer address option, then strip it. */ 2377 len -= plen * 8; 2378 mp->b_wptr = mp->b_rptr + len; 2379 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2380 } 2381 2382 icmp6->icmp6_type = (uint8_t)operation; 2383 icmp6->icmp6_code = 0; 2384 /* 2385 * Prepare for checksum by putting icmp length in the icmp 2386 * checksum field. The checksum is calculated in ip_wput_v6. 2387 */ 2388 icmp6->icmp6_cksum = ip6h->ip6_plen; 2389 2390 if (src_ipif != NULL) 2391 ipif_refrele(src_ipif); 2392 if (canput(ill->ill_wq)) { 2393 put(ill->ill_wq, mp); 2394 return (B_FALSE); 2395 } 2396 freemsg(mp); 2397 return (B_TRUE); 2398 } 2399 2400 /* 2401 * Make a link layer address (does not include the SAP) from an nce. 2402 * To form the link layer address, use the last four bytes of ipv6 2403 * address passed in and the fixed offset stored in nce. 2404 */ 2405 static void 2406 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2407 { 2408 uchar_t *mask, *to; 2409 ill_t *ill = nce->nce_ill; 2410 int len; 2411 2412 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2413 return; 2414 ASSERT(nce->nce_res_mp != NULL); 2415 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2416 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2417 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2418 ASSERT(addr != NULL); 2419 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2420 addrpos, ill->ill_nd_lla_len); 2421 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2422 IPV6_ADDR_LEN); 2423 mask = (uchar_t *)&nce->nce_extract_mask; 2424 mask += (IPV6_ADDR_LEN - len); 2425 addr += (IPV6_ADDR_LEN - len); 2426 to = addrpos + nce->nce_ll_extract_start; 2427 while (len-- > 0) 2428 *to++ |= *mask++ & *addr++; 2429 } 2430 2431 /* 2432 * Pass a cache report back out via NDD. 2433 */ 2434 /* ARGSUSED */ 2435 int 2436 ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 2437 { 2438 (void) mi_mpprintf(mp, "ifname hardware addr flags" 2439 " proto addr/mask"); 2440 ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp); 2441 return (0); 2442 } 2443 2444 /* 2445 * Add a single line to the NDP Cache Entry Report. 2446 */ 2447 static void 2448 nce_report1(nce_t *nce, uchar_t *mp_arg) 2449 { 2450 ill_t *ill = nce->nce_ill; 2451 char local_buf[INET6_ADDRSTRLEN]; 2452 uchar_t flags_buf[10]; 2453 uint32_t flags = nce->nce_flags; 2454 mblk_t *mp = (mblk_t *)mp_arg; 2455 uchar_t *h; 2456 uchar_t *m = flags_buf; 2457 in6_addr_t v6addr; 2458 2459 /* 2460 * Lock the nce to protect nce_res_mp from being changed 2461 * if an external resolver address resolution completes 2462 * while nce_res_mp is being accessed here. 2463 * 2464 * Deal with all address formats, not just Ethernet-specific 2465 * In addition, make sure that the mblk has enough space 2466 * before writing to it. If is doesn't, allocate a new one. 2467 */ 2468 if (nce->nce_ipversion == IPV4_VERSION) 2469 /* Don't include v4 nce_ts in NDP cache entry report */ 2470 return; 2471 2472 ASSERT(ill != NULL); 2473 v6addr = nce->nce_mask; 2474 if (flags & NCE_F_PERMANENT) 2475 *m++ = 'P'; 2476 if (flags & NCE_F_ISROUTER) 2477 *m++ = 'R'; 2478 if (flags & NCE_F_MAPPING) 2479 *m++ = 'M'; 2480 *m = '\0'; 2481 2482 if (ill->ill_net_type == IRE_IF_RESOLVER) { 2483 size_t addrlen; 2484 char *addr_buf; 2485 dl_unitdata_req_t *dl; 2486 2487 mutex_enter(&nce->nce_lock); 2488 h = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2489 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 2490 if (ill->ill_flags & ILLF_XRESOLV) 2491 addrlen = (3 * (dl->dl_dest_addr_length)); 2492 else 2493 addrlen = (3 * (ill->ill_nd_lla_len)); 2494 if (addrlen <= 0) { 2495 mutex_exit(&nce->nce_lock); 2496 (void) mi_mpprintf(mp, 2497 "%8s %9s %5s %s/%d", 2498 ill->ill_name, 2499 "None", 2500 (uchar_t *)&flags_buf, 2501 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2502 (char *)local_buf, sizeof (local_buf)), 2503 ip_mask_to_plen_v6(&v6addr)); 2504 } else { 2505 /* 2506 * Convert the hardware/lla address to ascii 2507 */ 2508 addr_buf = kmem_zalloc(addrlen, KM_NOSLEEP); 2509 if (addr_buf == NULL) { 2510 mutex_exit(&nce->nce_lock); 2511 return; 2512 } 2513 (void) mac_colon_addr((uint8_t *)h, 2514 (ill->ill_flags & ILLF_XRESOLV) ? 2515 dl->dl_dest_addr_length : ill->ill_nd_lla_len, 2516 addr_buf, addrlen); 2517 mutex_exit(&nce->nce_lock); 2518 (void) mi_mpprintf(mp, "%8s %17s %5s %s/%d", 2519 ill->ill_name, addr_buf, (uchar_t *)&flags_buf, 2520 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2521 (char *)local_buf, sizeof (local_buf)), 2522 ip_mask_to_plen_v6(&v6addr)); 2523 kmem_free(addr_buf, addrlen); 2524 } 2525 } else { 2526 (void) mi_mpprintf(mp, 2527 "%8s %9s %5s %s/%d", 2528 ill->ill_name, 2529 "None", 2530 (uchar_t *)&flags_buf, 2531 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2532 (char *)local_buf, sizeof (local_buf)), 2533 ip_mask_to_plen_v6(&v6addr)); 2534 } 2535 } 2536 2537 mblk_t * 2538 nce_udreq_alloc(ill_t *ill) 2539 { 2540 mblk_t *template_mp = NULL; 2541 dl_unitdata_req_t *dlur; 2542 int sap_length; 2543 2544 ASSERT(ill->ill_isv6); 2545 2546 sap_length = ill->ill_sap_length; 2547 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2548 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2549 if (template_mp == NULL) 2550 return (NULL); 2551 2552 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2553 dlur->dl_priority.dl_min = 0; 2554 dlur->dl_priority.dl_max = 0; 2555 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2556 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2557 2558 /* Copy in the SAP value. */ 2559 NCE_LL_SAP_COPY(ill, template_mp); 2560 2561 return (template_mp); 2562 } 2563 2564 /* 2565 * NDP retransmit timer. 2566 * This timer goes off when: 2567 * a. It is time to retransmit NS for resolver. 2568 * b. It is time to send reachability probes. 2569 */ 2570 void 2571 ndp_timer(void *arg) 2572 { 2573 nce_t *nce = arg; 2574 ill_t *ill = nce->nce_ill; 2575 uint32_t ms; 2576 char addrbuf[INET6_ADDRSTRLEN]; 2577 mblk_t *mp; 2578 boolean_t dropped = B_FALSE; 2579 2580 /* 2581 * The timer has to be cancelled by ndp_delete before doing the final 2582 * refrele. So the NCE is guaranteed to exist when the timer runs 2583 * until it clears the timeout_id. Before clearing the timeout_id 2584 * bump up the refcnt so that we can continue to use the nce 2585 */ 2586 ASSERT(nce != NULL); 2587 2588 /* 2589 * Grab the ill_g_lock now itself to avoid lock order problems. 2590 * nce_solicit needs ill_g_lock to be able to traverse ills 2591 */ 2592 rw_enter(&ill_g_lock, RW_READER); 2593 mutex_enter(&nce->nce_lock); 2594 NCE_REFHOLD_LOCKED(nce); 2595 nce->nce_timeout_id = 0; 2596 2597 /* 2598 * Check the reachability state first. 2599 */ 2600 switch (nce->nce_state) { 2601 case ND_DELAY: 2602 rw_exit(&ill_g_lock); 2603 nce->nce_state = ND_PROBE; 2604 mutex_exit(&nce->nce_lock); 2605 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2606 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2607 if (ip_debug > 3) { 2608 /* ip2dbg */ 2609 pr_addr_dbg("ndp_timer: state for %s changed " 2610 "to PROBE\n", AF_INET6, &nce->nce_addr); 2611 } 2612 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2613 NCE_REFRELE(nce); 2614 return; 2615 case ND_PROBE: 2616 /* must be retransmit timer */ 2617 rw_exit(&ill_g_lock); 2618 nce->nce_pcnt--; 2619 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2620 nce->nce_pcnt >= -1); 2621 if (nce->nce_pcnt > 0) { 2622 /* 2623 * As per RFC2461, the nce gets deleted after 2624 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2625 * Note that the first unicast solicitation is sent 2626 * during the DELAY state. 2627 */ 2628 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2629 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2630 addrbuf, sizeof (addrbuf)))); 2631 mutex_exit(&nce->nce_lock); 2632 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, 2633 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, 2634 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2635 NDP_UNICAST); 2636 if (dropped) { 2637 mutex_enter(&nce->nce_lock); 2638 nce->nce_pcnt++; 2639 mutex_exit(&nce->nce_lock); 2640 } 2641 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2642 } else if (nce->nce_pcnt < 0) { 2643 /* No hope, delete the nce */ 2644 nce->nce_state = ND_UNREACHABLE; 2645 mutex_exit(&nce->nce_lock); 2646 if (ip_debug > 2) { 2647 /* ip1dbg */ 2648 pr_addr_dbg("ndp_timer: Delete IRE for" 2649 " dst %s\n", AF_INET6, &nce->nce_addr); 2650 } 2651 ndp_delete(nce); 2652 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2653 /* Wait RetransTimer, before deleting the entry */ 2654 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2655 nce->nce_pcnt, inet_ntop(AF_INET6, 2656 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2657 mutex_exit(&nce->nce_lock); 2658 /* Wait one interval before killing */ 2659 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2660 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2661 ipif_t *ipif; 2662 2663 /* 2664 * We're done probing, and we can now declare this 2665 * address to be usable. Let IP know that it's ok to 2666 * use. 2667 */ 2668 nce->nce_state = ND_REACHABLE; 2669 mutex_exit(&nce->nce_lock); 2670 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, 2671 ALL_ZONES, NULL, NULL, NULL, NULL); 2672 if (ipif != NULL) { 2673 if (ipif->ipif_was_dup) { 2674 char ibuf[LIFNAMSIZ + 10]; 2675 char sbuf[INET6_ADDRSTRLEN]; 2676 2677 ipif->ipif_was_dup = B_FALSE; 2678 (void) strlcpy(ibuf, ill->ill_name, 2679 sizeof (ibuf)); 2680 (void) inet_ntop(AF_INET6, 2681 &ipif->ipif_v6lcl_addr, 2682 sbuf, sizeof (sbuf)); 2683 if (ipif->ipif_id != 0) { 2684 (void) snprintf(ibuf + 2685 ill->ill_name_length - 1, 2686 sizeof (ibuf) - 2687 ill->ill_name_length + 1, 2688 ":%d", ipif->ipif_id); 2689 } 2690 cmn_err(CE_NOTE, "recovered address " 2691 "%s on %s", sbuf, ibuf); 2692 } 2693 if ((ipif->ipif_flags & IPIF_UP) && 2694 !ipif->ipif_addr_ready) { 2695 ip_rts_ifmsg(ipif); 2696 ip_rts_newaddrmsg(RTM_ADD, 0, ipif); 2697 sctp_update_ipif(ipif, SCTP_IPIF_UP); 2698 } 2699 ipif->ipif_addr_ready = 1; 2700 ipif_refrele(ipif); 2701 } 2702 /* Begin defending our new address */ 2703 nce->nce_unsolicit_count = 0; 2704 dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, 2705 B_FALSE, &nce->nce_addr, &ipv6_all_hosts_mcast, 2706 nce_advert_flags(nce)); 2707 if (dropped) { 2708 nce->nce_unsolicit_count = 1; 2709 NDP_RESTART_TIMER(nce, 2710 ip_ndp_unsolicit_interval); 2711 } else if (ip_ndp_defense_interval != 0) { 2712 NDP_RESTART_TIMER(nce, ip_ndp_defense_interval); 2713 } 2714 } else { 2715 /* 2716 * This is an address we're probing to be our own, but 2717 * the ill is down. Wait until it comes back before 2718 * doing anything, but switch to reachable state so 2719 * that the restart will work. 2720 */ 2721 nce->nce_state = ND_REACHABLE; 2722 mutex_exit(&nce->nce_lock); 2723 } 2724 NCE_REFRELE(nce); 2725 return; 2726 case ND_INCOMPLETE: 2727 /* 2728 * Must be resolvers retransmit timer. 2729 */ 2730 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2731 ip6i_t *ip6i; 2732 ip6_t *ip6h; 2733 mblk_t *data_mp; 2734 2735 /* 2736 * Walk the list of packets queued, and see if there 2737 * are any multipathing probe packets. Such packets 2738 * are always queued at the head. Since this is a 2739 * retransmit timer firing, mark such packets as 2740 * delayed in ND resolution. This info will be used 2741 * in ip_wput_v6(). Multipathing probe packets will 2742 * always have an ip6i_t. Once we hit a packet without 2743 * it, we can break out of this loop. 2744 */ 2745 if (mp->b_datap->db_type == M_CTL) 2746 data_mp = mp->b_cont; 2747 else 2748 data_mp = mp; 2749 2750 ip6h = (ip6_t *)data_mp->b_rptr; 2751 if (ip6h->ip6_nxt != IPPROTO_RAW) 2752 break; 2753 2754 /* 2755 * This message should have been pulled up already in 2756 * ip_wput_v6. We can't do pullups here because the 2757 * b_next/b_prev is non-NULL. 2758 */ 2759 ip6i = (ip6i_t *)ip6h; 2760 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2761 sizeof (ip6i_t) + IPV6_HDR_LEN); 2762 2763 /* Mark this packet as delayed due to ND resolution */ 2764 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2765 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2766 } 2767 if (nce->nce_qd_mp != NULL) { 2768 ms = nce_solicit(nce, NULL); 2769 rw_exit(&ill_g_lock); 2770 if (ms == 0) { 2771 if (nce->nce_state != ND_REACHABLE) { 2772 mutex_exit(&nce->nce_lock); 2773 nce_resolv_failed(nce); 2774 ndp_delete(nce); 2775 } else { 2776 mutex_exit(&nce->nce_lock); 2777 } 2778 } else { 2779 mutex_exit(&nce->nce_lock); 2780 NDP_RESTART_TIMER(nce, (clock_t)ms); 2781 } 2782 NCE_REFRELE(nce); 2783 return; 2784 } 2785 mutex_exit(&nce->nce_lock); 2786 rw_exit(&ill_g_lock); 2787 NCE_REFRELE(nce); 2788 break; 2789 case ND_REACHABLE : 2790 rw_exit(&ill_g_lock); 2791 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2792 nce->nce_unsolicit_count != 0) || 2793 ((nce->nce_flags & NCE_F_PERMANENT) && 2794 ip_ndp_defense_interval != 0)) { 2795 if (nce->nce_unsolicit_count > 0) 2796 nce->nce_unsolicit_count--; 2797 mutex_exit(&nce->nce_lock); 2798 dropped = nce_xmit(ill, 2799 ND_NEIGHBOR_ADVERT, 2800 ill, /* ill to be used for hw addr */ 2801 B_FALSE, /* use ill_phys_addr */ 2802 &nce->nce_addr, 2803 &ipv6_all_hosts_mcast, 2804 nce_advert_flags(nce)); 2805 if (dropped) { 2806 mutex_enter(&nce->nce_lock); 2807 nce->nce_unsolicit_count++; 2808 mutex_exit(&nce->nce_lock); 2809 } 2810 if (nce->nce_unsolicit_count != 0) { 2811 NDP_RESTART_TIMER(nce, 2812 ip_ndp_unsolicit_interval); 2813 } else { 2814 NDP_RESTART_TIMER(nce, 2815 ip_ndp_defense_interval); 2816 } 2817 } else { 2818 mutex_exit(&nce->nce_lock); 2819 } 2820 NCE_REFRELE(nce); 2821 break; 2822 default: 2823 rw_exit(&ill_g_lock); 2824 mutex_exit(&nce->nce_lock); 2825 NCE_REFRELE(nce); 2826 break; 2827 } 2828 } 2829 2830 /* 2831 * Set a link layer address from the ll_addr passed in. 2832 * Copy SAP from ill. 2833 */ 2834 static void 2835 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2836 { 2837 ill_t *ill = nce->nce_ill; 2838 uchar_t *woffset; 2839 2840 ASSERT(ll_addr != NULL); 2841 /* Always called before fast_path_probe */ 2842 ASSERT(nce->nce_fp_mp == NULL); 2843 if (ill->ill_sap_length != 0) { 2844 /* 2845 * Copy the SAP type specified in the 2846 * request into the xmit template. 2847 */ 2848 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2849 } 2850 if (ill->ill_phys_addr_length > 0) { 2851 /* 2852 * The bcopy() below used to be called for the physical address 2853 * length rather than the link layer address length. For 2854 * ethernet and many other media, the phys_addr and lla are 2855 * identical. 2856 * However, with xresolv interfaces being introduced, the 2857 * phys_addr and lla are no longer the same, and the physical 2858 * address may not have any useful meaning, so we use the lla 2859 * for IPv6 address resolution and destination addressing. 2860 * 2861 * For PPP or other interfaces with a zero length 2862 * physical address, don't do anything here. 2863 * The bcopy() with a zero phys_addr length was previously 2864 * a no-op for interfaces with a zero-length physical address. 2865 * Using the lla for them would change the way they operate. 2866 * Doing nothing in such cases preserves expected behavior. 2867 */ 2868 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2869 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2870 } 2871 } 2872 2873 static boolean_t 2874 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2875 { 2876 ill_t *ill = nce->nce_ill; 2877 uchar_t *ll_offset; 2878 2879 ASSERT(nce->nce_res_mp != NULL); 2880 if (ll_addr == NULL) 2881 return (B_FALSE); 2882 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2883 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2884 return (B_TRUE); 2885 return (B_FALSE); 2886 } 2887 2888 /* 2889 * Updates the link layer address or the reachability state of 2890 * a cache entry. Reset probe counter if needed. 2891 */ 2892 static void 2893 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2894 { 2895 ill_t *ill = nce->nce_ill; 2896 boolean_t need_stop_timer = B_FALSE; 2897 boolean_t need_fastpath_update = B_FALSE; 2898 2899 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2900 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2901 /* 2902 * If this interface does not do NUD, there is no point 2903 * in allowing an update to the cache entry. Although 2904 * we will respond to NS. 2905 * The only time we accept an update for a resolver when 2906 * NUD is turned off is when it has just been created. 2907 * Non-Resolvers will always be created as REACHABLE. 2908 */ 2909 if (new_state != ND_UNCHANGED) { 2910 if ((nce->nce_flags & NCE_F_NONUD) && 2911 (nce->nce_state != ND_INCOMPLETE)) 2912 return; 2913 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2914 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2915 need_stop_timer = B_TRUE; 2916 if (new_state == ND_REACHABLE) 2917 nce->nce_last = TICK_TO_MSEC(lbolt64); 2918 else { 2919 /* We force NUD in this case */ 2920 nce->nce_last = 0; 2921 } 2922 nce->nce_state = new_state; 2923 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2924 } 2925 /* 2926 * In case of fast path we need to free the the fastpath 2927 * M_DATA and do another probe. Otherwise we can just 2928 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2929 * whatever packets that happens to be transmitting at the time. 2930 */ 2931 if (new_ll_addr != NULL) { 2932 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2933 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2934 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2935 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2936 if (nce->nce_fp_mp != NULL) { 2937 freemsg(nce->nce_fp_mp); 2938 nce->nce_fp_mp = NULL; 2939 } 2940 need_fastpath_update = B_TRUE; 2941 } 2942 mutex_exit(&nce->nce_lock); 2943 if (need_stop_timer) { 2944 (void) untimeout(nce->nce_timeout_id); 2945 nce->nce_timeout_id = 0; 2946 } 2947 if (need_fastpath_update) 2948 nce_fastpath(nce); 2949 mutex_enter(&nce->nce_lock); 2950 } 2951 2952 void 2953 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2954 { 2955 uint_t count = 0; 2956 mblk_t **mpp; 2957 2958 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2959 2960 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2961 mpp = &(*mpp)->b_next) { 2962 if (++count > 2963 nce->nce_ill->ill_max_buf) { 2964 mblk_t *tmp = nce->nce_qd_mp->b_next; 2965 2966 nce->nce_qd_mp->b_next = NULL; 2967 nce->nce_qd_mp->b_prev = NULL; 2968 freemsg(nce->nce_qd_mp); 2969 nce->nce_qd_mp = tmp; 2970 } 2971 } 2972 /* put this on the list */ 2973 if (head_insert) { 2974 mp->b_next = nce->nce_qd_mp; 2975 nce->nce_qd_mp = mp; 2976 } else { 2977 *mpp = mp; 2978 } 2979 } 2980 2981 static void 2982 nce_queue_mp(nce_t *nce, mblk_t *mp) 2983 { 2984 boolean_t head_insert = B_FALSE; 2985 ip6_t *ip6h; 2986 ip6i_t *ip6i; 2987 mblk_t *data_mp; 2988 2989 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2990 2991 if (mp->b_datap->db_type == M_CTL) 2992 data_mp = mp->b_cont; 2993 else 2994 data_mp = mp; 2995 ip6h = (ip6_t *)data_mp->b_rptr; 2996 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2997 /* 2998 * This message should have been pulled up already in 2999 * ip_wput_v6. We can't do pullups here because the message 3000 * could be from the nce_qd_mp which could have b_next/b_prev 3001 * non-NULL. 3002 */ 3003 ip6i = (ip6i_t *)ip6h; 3004 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 3005 sizeof (ip6i_t) + IPV6_HDR_LEN); 3006 /* 3007 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 3008 * This has 2 aspects mentioned below. 3009 * 1. Perform head insertion in the nce_qd_mp for these packets. 3010 * This ensures that next retransmit of ND solicitation 3011 * will use the interface specified by the probe packet, 3012 * for both NS and NA. This corresponds to the src address 3013 * in the IPv6 packet. If we insert at tail, we will be 3014 * depending on the packet at the head for successful 3015 * ND resolution. This is not reliable, because the interface 3016 * on which the NA arrives could be different from the interface 3017 * on which the NS was sent, and if the receiving interface is 3018 * failed, it will appear that the sending interface is also 3019 * failed, causing in.mpathd to misdiagnose this as link 3020 * failure. 3021 * 2. Drop the original packet, if the ND resolution did not 3022 * succeed in the first attempt. However we will create the 3023 * nce and the ire, as soon as the ND resolution succeeds. 3024 * We don't gain anything by queueing multiple probe packets 3025 * and sending them back-to-back once resolution succeeds. 3026 * It is sufficient to send just 1 packet after ND resolution 3027 * succeeds. Since mpathd is sending down probe packets at a 3028 * constant rate, we don't need to send the queued packet. We 3029 * need to queue it only for NDP resolution. The benefit of 3030 * dropping the probe packets that were delayed in ND 3031 * resolution, is that in.mpathd will not see inflated 3032 * RTT. If the ND resolution does not succeed within 3033 * in.mpathd's failure detection time, mpathd may detect 3034 * a failure, and it does not matter whether the packet 3035 * was queued or dropped. 3036 */ 3037 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 3038 head_insert = B_TRUE; 3039 } 3040 3041 nce_queue_mp_common(nce, mp, head_insert); 3042 } 3043 3044 /* 3045 * Called when address resolution failed due to a timeout. 3046 * Send an ICMP unreachable in response to all queued packets. 3047 */ 3048 void 3049 nce_resolv_failed(nce_t *nce) 3050 { 3051 mblk_t *mp, *nxt_mp, *first_mp; 3052 char buf[INET6_ADDRSTRLEN]; 3053 ip6_t *ip6h; 3054 zoneid_t zoneid = GLOBAL_ZONEID; 3055 3056 ip1dbg(("nce_resolv_failed: dst %s\n", 3057 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 3058 mutex_enter(&nce->nce_lock); 3059 mp = nce->nce_qd_mp; 3060 nce->nce_qd_mp = NULL; 3061 mutex_exit(&nce->nce_lock); 3062 while (mp != NULL) { 3063 nxt_mp = mp->b_next; 3064 mp->b_next = NULL; 3065 mp->b_prev = NULL; 3066 3067 first_mp = mp; 3068 if (mp->b_datap->db_type == M_CTL) { 3069 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 3070 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3071 zoneid = io->ipsec_out_zoneid; 3072 ASSERT(zoneid != ALL_ZONES); 3073 mp = mp->b_cont; 3074 } 3075 3076 ip6h = (ip6_t *)mp->b_rptr; 3077 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3078 ip6i_t *ip6i; 3079 /* 3080 * This message should have been pulled up already 3081 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 3082 * the header is pulled up. 3083 */ 3084 ip6i = (ip6i_t *)ip6h; 3085 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 3086 sizeof (ip6i_t) + IPV6_HDR_LEN); 3087 mp->b_rptr += sizeof (ip6i_t); 3088 } 3089 /* 3090 * Ignore failure since icmp_unreachable_v6 will silently 3091 * drop packets with an unspecified source address. 3092 */ 3093 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid); 3094 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 3095 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE); 3096 mp = nxt_mp; 3097 } 3098 } 3099 3100 /* 3101 * Called by SIOCSNDP* ioctl to add/change an nce entry 3102 * and the corresponding attributes. 3103 * Disallow states other than ND_REACHABLE or ND_STALE. 3104 */ 3105 int 3106 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 3107 { 3108 sin6_t *sin6; 3109 in6_addr_t *addr; 3110 nce_t *nce; 3111 int err; 3112 uint16_t new_flags = 0; 3113 uint16_t old_flags = 0; 3114 int inflags = lnr->lnr_flags; 3115 3116 ASSERT(ill->ill_isv6); 3117 if ((lnr->lnr_state_create != ND_REACHABLE) && 3118 (lnr->lnr_state_create != ND_STALE)) 3119 return (EINVAL); 3120 3121 sin6 = (sin6_t *)&lnr->lnr_addr; 3122 addr = &sin6->sin6_addr; 3123 3124 mutex_enter(&ndp6.ndp_g_lock); 3125 /* We know it can not be mapping so just look in the hash table */ 3126 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); 3127 nce = nce_lookup_addr(ill, addr, nce); 3128 if (nce != NULL) 3129 new_flags = nce->nce_flags; 3130 3131 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 3132 case NDF_ISROUTER_ON: 3133 new_flags |= NCE_F_ISROUTER; 3134 break; 3135 case NDF_ISROUTER_OFF: 3136 new_flags &= ~NCE_F_ISROUTER; 3137 break; 3138 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3139 mutex_exit(&ndp6.ndp_g_lock); 3140 if (nce != NULL) 3141 NCE_REFRELE(nce); 3142 return (EINVAL); 3143 } 3144 3145 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3146 case NDF_ANYCAST_ON: 3147 new_flags |= NCE_F_ANYCAST; 3148 break; 3149 case NDF_ANYCAST_OFF: 3150 new_flags &= ~NCE_F_ANYCAST; 3151 break; 3152 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3153 mutex_exit(&ndp6.ndp_g_lock); 3154 if (nce != NULL) 3155 NCE_REFRELE(nce); 3156 return (EINVAL); 3157 } 3158 3159 switch (inflags & (NDF_PROXY_ON|NDF_PROXY_OFF)) { 3160 case NDF_PROXY_ON: 3161 new_flags |= NCE_F_PROXY; 3162 break; 3163 case NDF_PROXY_OFF: 3164 new_flags &= ~NCE_F_PROXY; 3165 break; 3166 case (NDF_PROXY_OFF|NDF_PROXY_ON): 3167 mutex_exit(&ndp6.ndp_g_lock); 3168 if (nce != NULL) 3169 NCE_REFRELE(nce); 3170 return (EINVAL); 3171 } 3172 3173 if (nce == NULL) { 3174 err = ndp_add(ill, 3175 (uchar_t *)lnr->lnr_hdw_addr, 3176 addr, 3177 &ipv6_all_ones, 3178 &ipv6_all_zeros, 3179 0, 3180 new_flags, 3181 lnr->lnr_state_create, 3182 &nce, 3183 NULL, 3184 NULL); 3185 if (err != 0) { 3186 mutex_exit(&ndp6.ndp_g_lock); 3187 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3188 return (err); 3189 } 3190 } 3191 old_flags = nce->nce_flags; 3192 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3193 /* 3194 * Router turned to host, delete all ires. 3195 * XXX Just delete the entry, but we need to add too. 3196 */ 3197 nce->nce_flags &= ~NCE_F_ISROUTER; 3198 mutex_exit(&ndp6.ndp_g_lock); 3199 ndp_delete(nce); 3200 NCE_REFRELE(nce); 3201 return (0); 3202 } 3203 mutex_exit(&ndp6.ndp_g_lock); 3204 3205 mutex_enter(&nce->nce_lock); 3206 nce->nce_flags = new_flags; 3207 mutex_exit(&nce->nce_lock); 3208 /* 3209 * Note that we ignore the state at this point, which 3210 * should be either STALE or REACHABLE. Instead we let 3211 * the link layer address passed in to determine the state 3212 * much like incoming packets. 3213 */ 3214 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3215 NCE_REFRELE(nce); 3216 return (0); 3217 } 3218 3219 /* 3220 * If the device driver supports it, we make nce_fp_mp to have 3221 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3222 * The caller insures there is hold on nce for this function. 3223 * Note that since ill_fastpath_probe() copies the mblk there is 3224 * no need for the hold beyond this function. 3225 */ 3226 static void 3227 nce_fastpath(nce_t *nce) 3228 { 3229 ill_t *ill = nce->nce_ill; 3230 int res; 3231 3232 ASSERT(ill != NULL); 3233 if (nce->nce_fp_mp != NULL) { 3234 /* Already contains fastpath info */ 3235 return; 3236 } 3237 if (nce->nce_res_mp != NULL) { 3238 nce_fastpath_list_add(nce); 3239 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3240 /* 3241 * EAGAIN is an indication of a transient error 3242 * i.e. allocation failure etc. leave the nce in the list it 3243 * will be updated when another probe happens for another ire 3244 * if not it will be taken out of the list when the ire is 3245 * deleted. 3246 */ 3247 3248 if (res != 0 && res != EAGAIN) 3249 nce_fastpath_list_delete(nce); 3250 } 3251 } 3252 3253 /* 3254 * Drain the list of nce's waiting for fastpath response. 3255 */ 3256 void 3257 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3258 void *arg) 3259 { 3260 3261 nce_t *next_nce; 3262 nce_t *current_nce; 3263 nce_t *first_nce; 3264 nce_t *prev_nce = NULL; 3265 3266 ASSERT(ill != NULL && ill->ill_isv6); 3267 3268 mutex_enter(&ill->ill_lock); 3269 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3270 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3271 next_nce = current_nce->nce_fastpath; 3272 /* 3273 * Take it off the list if we're flushing, or if the callback 3274 * routine tells us to do so. Otherwise, leave the nce in the 3275 * fastpath list to handle any pending response from the lower 3276 * layer. We can't drain the list when the callback routine 3277 * comparison failed, because the response is asynchronous in 3278 * nature, and may not arrive in the same order as the list 3279 * insertion. 3280 */ 3281 if (func == NULL || func(current_nce, arg)) { 3282 current_nce->nce_fastpath = NULL; 3283 if (current_nce == first_nce) 3284 ill->ill_fastpath_list = first_nce = next_nce; 3285 else 3286 prev_nce->nce_fastpath = next_nce; 3287 } else { 3288 /* previous element that is still in the list */ 3289 prev_nce = current_nce; 3290 } 3291 current_nce = next_nce; 3292 } 3293 mutex_exit(&ill->ill_lock); 3294 } 3295 3296 /* 3297 * Add nce to the nce fastpath list. 3298 */ 3299 void 3300 nce_fastpath_list_add(nce_t *nce) 3301 { 3302 ill_t *ill; 3303 3304 ill = nce->nce_ill; 3305 ASSERT(ill != NULL && ill->ill_isv6); 3306 3307 mutex_enter(&ill->ill_lock); 3308 mutex_enter(&nce->nce_lock); 3309 3310 /* 3311 * if nce has not been deleted and 3312 * is not already in the list add it. 3313 */ 3314 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3315 (nce->nce_fastpath == NULL)) { 3316 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3317 ill->ill_fastpath_list = nce; 3318 } 3319 3320 mutex_exit(&nce->nce_lock); 3321 mutex_exit(&ill->ill_lock); 3322 } 3323 3324 /* 3325 * remove nce from the nce fastpath list. 3326 */ 3327 void 3328 nce_fastpath_list_delete(nce_t *nce) 3329 { 3330 nce_t *nce_ptr; 3331 3332 ill_t *ill; 3333 3334 ill = nce->nce_ill; 3335 ASSERT(ill != NULL); 3336 if (!ill->ill_isv6) { 3337 /* 3338 * v4 nce_t's do not have nce_fastpath set. 3339 */ 3340 return; 3341 } 3342 3343 mutex_enter(&ill->ill_lock); 3344 if (nce->nce_fastpath == NULL) 3345 goto done; 3346 3347 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3348 3349 if (ill->ill_fastpath_list == nce) { 3350 ill->ill_fastpath_list = nce->nce_fastpath; 3351 } else { 3352 nce_ptr = ill->ill_fastpath_list; 3353 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3354 if (nce_ptr->nce_fastpath == nce) { 3355 nce_ptr->nce_fastpath = nce->nce_fastpath; 3356 break; 3357 } 3358 nce_ptr = nce_ptr->nce_fastpath; 3359 } 3360 } 3361 3362 nce->nce_fastpath = NULL; 3363 done: 3364 mutex_exit(&ill->ill_lock); 3365 } 3366 3367 /* 3368 * Update all NCE's that are not in fastpath mode and 3369 * have an nce_fp_mp that matches mp. mp->b_cont contains 3370 * the fastpath header. 3371 * 3372 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3373 */ 3374 boolean_t 3375 ndp_fastpath_update(nce_t *nce, void *arg) 3376 { 3377 mblk_t *mp, *fp_mp; 3378 uchar_t *mp_rptr, *ud_mp_rptr; 3379 mblk_t *ud_mp = nce->nce_res_mp; 3380 ptrdiff_t cmplen; 3381 3382 if (nce->nce_flags & NCE_F_MAPPING) 3383 return (B_TRUE); 3384 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3385 return (B_TRUE); 3386 3387 ip2dbg(("ndp_fastpath_update: trying\n")); 3388 mp = (mblk_t *)arg; 3389 mp_rptr = mp->b_rptr; 3390 cmplen = mp->b_wptr - mp_rptr; 3391 ASSERT(cmplen >= 0); 3392 ud_mp_rptr = ud_mp->b_rptr; 3393 /* 3394 * The nce is locked here to prevent any other threads 3395 * from accessing and changing nce_res_mp when the IPv6 address 3396 * becomes resolved to an lla while we're in the middle 3397 * of looking at and comparing the hardware address (lla). 3398 * It is also locked to prevent multiple threads in nce_fastpath_update 3399 * from examining nce_res_mp atthe same time. 3400 */ 3401 mutex_enter(&nce->nce_lock); 3402 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3403 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3404 mutex_exit(&nce->nce_lock); 3405 /* 3406 * Don't take the ire off the fastpath list yet, 3407 * since the response may come later. 3408 */ 3409 return (B_FALSE); 3410 } 3411 /* Matched - install mp as the fastpath mp */ 3412 ip1dbg(("ndp_fastpath_update: match\n")); 3413 fp_mp = dupb(mp->b_cont); 3414 if (fp_mp != NULL) { 3415 nce->nce_fp_mp = fp_mp; 3416 } 3417 mutex_exit(&nce->nce_lock); 3418 return (B_TRUE); 3419 } 3420 3421 /* 3422 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3423 * driver. Note that it assumes IP is exclusive... 3424 */ 3425 /* ARGSUSED */ 3426 void 3427 ndp_fastpath_flush(nce_t *nce, char *arg) 3428 { 3429 if (nce->nce_flags & NCE_F_MAPPING) 3430 return; 3431 /* No fastpath info? */ 3432 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3433 return; 3434 3435 /* Just delete the NCE... */ 3436 ndp_delete(nce); 3437 } 3438 3439 /* 3440 * Return a pointer to a given option in the packet. 3441 * Assumes that option part of the packet have already been validated. 3442 */ 3443 nd_opt_hdr_t * 3444 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3445 { 3446 while (optlen > 0) { 3447 if (opt->nd_opt_type == opt_type) 3448 return (opt); 3449 optlen -= 8 * opt->nd_opt_len; 3450 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3451 } 3452 return (NULL); 3453 } 3454 3455 /* 3456 * Verify all option lengths present are > 0, also check to see 3457 * if the option lengths and packet length are consistent. 3458 */ 3459 boolean_t 3460 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3461 { 3462 ASSERT(opt != NULL); 3463 while (optlen > 0) { 3464 if (opt->nd_opt_len == 0) 3465 return (B_FALSE); 3466 optlen -= 8 * opt->nd_opt_len; 3467 if (optlen < 0) 3468 return (B_FALSE); 3469 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3470 } 3471 return (B_TRUE); 3472 } 3473 3474 /* 3475 * ndp_walk function. 3476 * Free a fraction of the NCE cache entries. 3477 * A fraction of zero means to not free any in that category. 3478 */ 3479 void 3480 ndp_cache_reclaim(nce_t *nce, char *arg) 3481 { 3482 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3483 uint_t rand; 3484 3485 if (nce->nce_flags & NCE_F_PERMANENT) 3486 return; 3487 3488 rand = (uint_t)lbolt + 3489 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3490 if (ncr->ncr_host != 0 && 3491 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3492 ndp_delete(nce); 3493 return; 3494 } 3495 } 3496 3497 /* 3498 * ndp_walk function. 3499 * Count the number of NCEs that can be deleted. 3500 * These would be hosts but not routers. 3501 */ 3502 void 3503 ndp_cache_count(nce_t *nce, char *arg) 3504 { 3505 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3506 3507 if (nce->nce_flags & NCE_F_PERMANENT) 3508 return; 3509 3510 ncc->ncc_total++; 3511 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3512 ncc->ncc_host++; 3513 } 3514 3515 #ifdef NCE_DEBUG 3516 th_trace_t * 3517 th_trace_nce_lookup(nce_t *nce) 3518 { 3519 int bucket_id; 3520 th_trace_t *th_trace; 3521 3522 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3523 3524 bucket_id = IP_TR_HASH(curthread); 3525 ASSERT(bucket_id < IP_TR_HASH_MAX); 3526 3527 for (th_trace = nce->nce_trace[bucket_id]; th_trace != NULL; 3528 th_trace = th_trace->th_next) { 3529 if (th_trace->th_id == curthread) 3530 return (th_trace); 3531 } 3532 return (NULL); 3533 } 3534 3535 void 3536 nce_trace_ref(nce_t *nce) 3537 { 3538 int bucket_id; 3539 th_trace_t *th_trace; 3540 3541 /* 3542 * Attempt to locate the trace buffer for the curthread. 3543 * If it does not exist, then allocate a new trace buffer 3544 * and link it in list of trace bufs for this ipif, at the head 3545 */ 3546 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3547 3548 if (nce->nce_trace_disable == B_TRUE) 3549 return; 3550 3551 th_trace = th_trace_nce_lookup(nce); 3552 if (th_trace == NULL) { 3553 bucket_id = IP_TR_HASH(curthread); 3554 th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), 3555 KM_NOSLEEP); 3556 if (th_trace == NULL) { 3557 nce->nce_trace_disable = B_TRUE; 3558 nce_trace_inactive(nce); 3559 return; 3560 } 3561 th_trace->th_id = curthread; 3562 th_trace->th_next = nce->nce_trace[bucket_id]; 3563 th_trace->th_prev = &nce->nce_trace[bucket_id]; 3564 if (th_trace->th_next != NULL) 3565 th_trace->th_next->th_prev = &th_trace->th_next; 3566 nce->nce_trace[bucket_id] = th_trace; 3567 } 3568 ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1); 3569 th_trace->th_refcnt++; 3570 th_trace_rrecord(th_trace); 3571 } 3572 3573 void 3574 nce_untrace_ref(nce_t *nce) 3575 { 3576 th_trace_t *th_trace; 3577 3578 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3579 3580 if (nce->nce_trace_disable == B_TRUE) 3581 return; 3582 3583 th_trace = th_trace_nce_lookup(nce); 3584 ASSERT(th_trace != NULL && th_trace->th_refcnt > 0); 3585 3586 th_trace_rrecord(th_trace); 3587 th_trace->th_refcnt--; 3588 } 3589 3590 void 3591 nce_trace_inactive(nce_t *nce) 3592 { 3593 th_trace_t *th_trace; 3594 int i; 3595 3596 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3597 3598 for (i = 0; i < IP_TR_HASH_MAX; i++) { 3599 while (nce->nce_trace[i] != NULL) { 3600 th_trace = nce->nce_trace[i]; 3601 3602 /* unlink th_trace and free it */ 3603 nce->nce_trace[i] = th_trace->th_next; 3604 if (th_trace->th_next != NULL) 3605 th_trace->th_next->th_prev = 3606 &nce->nce_trace[i]; 3607 3608 th_trace->th_next = NULL; 3609 th_trace->th_prev = NULL; 3610 kmem_free(th_trace, sizeof (th_trace_t)); 3611 } 3612 } 3613 3614 } 3615 3616 /* ARGSUSED */ 3617 int 3618 nce_thread_exit(nce_t *nce, caddr_t arg) 3619 { 3620 th_trace_t *th_trace; 3621 3622 mutex_enter(&nce->nce_lock); 3623 th_trace = th_trace_nce_lookup(nce); 3624 3625 if (th_trace == NULL) { 3626 mutex_exit(&nce->nce_lock); 3627 return (0); 3628 } 3629 3630 ASSERT(th_trace->th_refcnt == 0); 3631 3632 /* unlink th_trace and free it */ 3633 *th_trace->th_prev = th_trace->th_next; 3634 if (th_trace->th_next != NULL) 3635 th_trace->th_next->th_prev = th_trace->th_prev; 3636 th_trace->th_next = NULL; 3637 th_trace->th_prev = NULL; 3638 kmem_free(th_trace, sizeof (th_trace_t)); 3639 mutex_exit(&nce->nce_lock); 3640 return (0); 3641 } 3642 #endif 3643 3644 /* 3645 * Called when address resolution fails due to a timeout. 3646 * Send an ICMP unreachable in response to all queued packets. 3647 */ 3648 void 3649 arp_resolv_failed(nce_t *nce) 3650 { 3651 mblk_t *mp, *nxt_mp, *first_mp; 3652 char buf[INET6_ADDRSTRLEN]; 3653 zoneid_t zoneid = GLOBAL_ZONEID; 3654 struct in_addr ipv4addr; 3655 3656 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3657 ip3dbg(("arp_resolv_failed: dst %s\n", 3658 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3659 mutex_enter(&nce->nce_lock); 3660 mp = nce->nce_qd_mp; 3661 nce->nce_qd_mp = NULL; 3662 mutex_exit(&nce->nce_lock); 3663 3664 while (mp != NULL) { 3665 nxt_mp = mp->b_next; 3666 mp->b_next = NULL; 3667 mp->b_prev = NULL; 3668 3669 first_mp = mp; 3670 /* 3671 * Send icmp unreachable messages 3672 * to the hosts. 3673 */ 3674 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid); 3675 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3676 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3677 ICMP_HOST_UNREACHABLE); 3678 mp = nxt_mp; 3679 } 3680 } 3681 3682 static int 3683 ndp_lookup_then_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, 3684 const in_addr_t *mask, const in_addr_t *extract_mask, 3685 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 3686 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 3687 { 3688 int err = 0; 3689 nce_t *nce; 3690 in6_addr_t addr6; 3691 3692 mutex_enter(&ndp4.ndp_g_lock); 3693 nce = *((nce_t **)NCE_HASH_PTR_V4(*addr)); 3694 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3695 nce = nce_lookup_addr(ill, &addr6, nce); 3696 if (nce == NULL) { 3697 err = ndp_add_v4(ill, 3698 hw_addr, 3699 addr, 3700 mask, 3701 extract_mask, 3702 hw_extract_start, 3703 flags, 3704 state, 3705 newnce, 3706 fp_mp, 3707 res_mp); 3708 } else { 3709 *newnce = nce; 3710 err = EEXIST; 3711 } 3712 mutex_exit(&ndp4.ndp_g_lock); 3713 return (err); 3714 } 3715 3716 /* 3717 * NDP Cache Entry creation routine for IPv4. 3718 * Mapped entries are handled in arp. 3719 * This routine must always be called with ndp4.ndp_g_lock held. 3720 * Prior to return, nce_refcnt is incremented. 3721 */ 3722 static int 3723 ndp_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, 3724 const in_addr_t *mask, const in_addr_t *extract_mask, 3725 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 3726 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 3727 { 3728 static nce_t nce_nil; 3729 nce_t *nce; 3730 mblk_t *mp; 3731 mblk_t *template; 3732 nce_t **ncep; 3733 3734 ASSERT(MUTEX_HELD(&ndp4.ndp_g_lock)); 3735 ASSERT(ill != NULL); 3736 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 3737 return (EINVAL); 3738 } 3739 ASSERT((flags & NCE_F_MAPPING) == 0); 3740 ASSERT(extract_mask == NULL); 3741 /* 3742 * Allocate the mblk to hold the nce. 3743 */ 3744 mp = allocb(sizeof (nce_t), BPRI_MED); 3745 if (mp == NULL) 3746 return (ENOMEM); 3747 3748 nce = (nce_t *)mp->b_rptr; 3749 mp->b_wptr = (uchar_t *)&nce[1]; 3750 *nce = nce_nil; 3751 3752 /* 3753 * This one holds link layer address; if res_mp has been provided 3754 * by the caller, accept it without any further checks. Otherwise, 3755 * for V4, we fill it up with ill_resolver_mp here, then in 3756 * in ire_arpresolve(), we fill it up with the ARP query 3757 * once its formulated. 3758 */ 3759 if (res_mp != NULL) { 3760 template = res_mp; 3761 } else { 3762 template = copyb(ill->ill_resolver_mp); 3763 } 3764 if (template == NULL) { 3765 freeb(mp); 3766 return (ENOMEM); 3767 } 3768 nce->nce_ill = ill; 3769 nce->nce_ipversion = IPV4_VERSION; 3770 nce->nce_flags = flags; 3771 nce->nce_state = state; 3772 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3773 nce->nce_rcnt = ill->ill_xmit_count; 3774 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3775 if (*mask == IP_HOST_MASK) { 3776 nce->nce_mask = ipv6_all_ones; 3777 } else { 3778 IN6_IPADDR_TO_V4MAPPED(*mask, &nce->nce_mask); 3779 } 3780 nce->nce_extract_mask = ipv6_all_zeros; 3781 nce->nce_ll_extract_start = hw_extract_start; 3782 nce->nce_fp_mp = (fp_mp? fp_mp : NULL); 3783 nce->nce_res_mp = template; 3784 if (state == ND_REACHABLE) 3785 nce->nce_last = TICK_TO_MSEC(lbolt64); 3786 else 3787 nce->nce_last = 0; 3788 nce->nce_qd_mp = NULL; 3789 nce->nce_mp = mp; 3790 if (hw_addr != NULL) 3791 nce_set_ll(nce, hw_addr); 3792 /* This one is for nce getting created */ 3793 nce->nce_refcnt = 1; 3794 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3795 ncep = ((nce_t **)NCE_HASH_PTR_V4(*addr)); 3796 3797 #ifdef NCE_DEBUG 3798 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 3799 #endif 3800 /* 3801 * Atomically ensure that the ill is not CONDEMNED, before 3802 * adding the NCE. 3803 */ 3804 mutex_enter(&ill->ill_lock); 3805 if (ill->ill_state_flags & ILL_CONDEMNED) { 3806 mutex_exit(&ill->ill_lock); 3807 freeb(mp); 3808 if (res_mp == NULL) { 3809 /* 3810 * template was locally allocated. need to free it. 3811 */ 3812 freeb(template); 3813 } 3814 return (EINVAL); 3815 } 3816 if ((nce->nce_next = *ncep) != NULL) 3817 nce->nce_next->nce_ptpn = &nce->nce_next; 3818 *ncep = nce; 3819 nce->nce_ptpn = ncep; 3820 *newnce = nce; 3821 /* This one is for nce being used by an active thread */ 3822 NCE_REFHOLD(*newnce); 3823 3824 /* Bump up the number of nce's referencing this ill */ 3825 ill->ill_nce_cnt++; 3826 mutex_exit(&ill->ill_lock); 3827 return (0); 3828 } 3829 3830 void 3831 ndp_flush_qd_mp(nce_t *nce) 3832 { 3833 mblk_t *qd_mp, *qd_next; 3834 3835 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3836 qd_mp = nce->nce_qd_mp; 3837 nce->nce_qd_mp = NULL; 3838 while (qd_mp != NULL) { 3839 qd_next = qd_mp->b_next; 3840 qd_mp->b_next = NULL; 3841 qd_mp->b_prev = NULL; 3842 freemsg(qd_mp); 3843 qd_mp = qd_next; 3844 } 3845 } 3846 3847 nce_t * 3848 nce_reinit(nce_t *nce) 3849 { 3850 nce_t *newnce = NULL; 3851 in_addr_t nce_addr, nce_mask; 3852 3853 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3854 IN6_V4MAPPED_TO_IPADDR(&nce->nce_mask, nce_mask); 3855 /* 3856 * delete the old one. this will get rid of any ire's pointing 3857 * at this nce. 3858 */ 3859 ndp_delete(nce); 3860 /* 3861 * create a new nce with the same addr and mask. 3862 */ 3863 mutex_enter(&ndp4.ndp_g_lock); 3864 (void) ndp_add_v4(nce->nce_ill, NULL, &nce_addr, &nce_mask, NULL, 0, 0, 3865 ND_INITIAL, &newnce, NULL, NULL); 3866 mutex_exit(&ndp4.ndp_g_lock); 3867 /* 3868 * refrele the old nce. 3869 */ 3870 NCE_REFRELE(nce); 3871 return (newnce); 3872 } 3873 3874 /* 3875 * ndp_walk routine to delete all entries that have a given destination or 3876 * gateway address and cached link layer (MAC) address. This is used when ARP 3877 * informs us that a network-to-link-layer mapping may have changed. 3878 */ 3879 void 3880 nce_delete_hw_changed(nce_t *nce, void *arg) 3881 { 3882 nce_hw_map_t *hwm = arg; 3883 mblk_t *mp; 3884 dl_unitdata_req_t *dlu; 3885 uchar_t *macaddr; 3886 ill_t *ill; 3887 int saplen; 3888 ipaddr_t nce_addr; 3889 3890 if (nce->nce_state != ND_REACHABLE) 3891 return; 3892 3893 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3894 if (nce_addr != hwm->hwm_addr) 3895 return; 3896 3897 mutex_enter(&nce->nce_lock); 3898 if ((mp = nce->nce_res_mp) == NULL) { 3899 mutex_exit(&nce->nce_lock); 3900 return; 3901 } 3902 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3903 macaddr = (uchar_t *)(dlu + 1); 3904 ill = nce->nce_ill; 3905 if ((saplen = ill->ill_sap_length) > 0) 3906 macaddr += saplen; 3907 else 3908 saplen = -saplen; 3909 3910 /* 3911 * If the hardware address is unchanged, then leave this one alone. 3912 * Note that saplen == abs(saplen) now. 3913 */ 3914 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 3915 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 3916 mutex_exit(&nce->nce_lock); 3917 return; 3918 } 3919 mutex_exit(&nce->nce_lock); 3920 3921 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 3922 ndp_delete(nce); 3923 } 3924 3925 /* 3926 * This function verifies whether a given IPv4 address is potentially known to 3927 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 3928 * so that it can continue to look for hardware changes on that address. 3929 */ 3930 boolean_t 3931 ndp_lookup_ipaddr(in_addr_t addr) 3932 { 3933 nce_t *nce; 3934 struct in_addr nceaddr; 3935 3936 if (addr == INADDR_ANY) 3937 return (B_FALSE); 3938 3939 mutex_enter(&ndp4.ndp_g_lock); 3940 nce = *(nce_t **)NCE_HASH_PTR_V4(addr); 3941 for (; nce != NULL; nce = nce->nce_next) { 3942 /* Note that only v4 mapped entries are in the table. */ 3943 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 3944 if (addr == nceaddr.s_addr && 3945 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 3946 /* Single flag check; no lock needed */ 3947 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 3948 break; 3949 } 3950 } 3951 mutex_exit(&ndp4.ndp_g_lock); 3952 return (nce != NULL); 3953 } 3954