1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/sysmacros.h> 33 #include <sys/errno.h> 34 #include <sys/dlpi.h> 35 #include <sys/socket.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/vtrace.h> 41 #include <sys/kmem.h> 42 #include <sys/zone.h> 43 #include <sys/ethernet.h> 44 #include <sys/sdt.h> 45 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/if_dl.h> 49 #include <net/route.h> 50 #include <netinet/in.h> 51 #include <netinet/ip6.h> 52 #include <netinet/icmp6.h> 53 54 #include <inet/common.h> 55 #include <inet/mi.h> 56 #include <inet/mib2.h> 57 #include <inet/nd.h> 58 #include <inet/ip.h> 59 #include <inet/ip_impl.h> 60 #include <inet/ip_if.h> 61 #include <inet/ip_ire.h> 62 #include <inet/ip_rts.h> 63 #include <inet/ip6.h> 64 #include <inet/ip_ndp.h> 65 #include <inet/ipsec_impl.h> 66 #include <inet/ipsec_info.h> 67 #include <inet/sctp_ip.h> 68 69 /* 70 * Function names with nce_ prefix are static while function 71 * names with ndp_ prefix are used by rest of the IP. 72 * 73 * Lock ordering: 74 * 75 * ndp_g_lock -> ill_lock -> nce_lock 76 * 77 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 78 * nce_next. Nce_lock protects the contents of the NCE (particularly 79 * nce_refcnt). 80 */ 81 82 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 83 uint32_t ll_addr_len); 84 static void nce_fastpath(nce_t *nce); 85 static void nce_ire_delete(nce_t *nce); 86 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 87 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 88 static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *, nce_t *); 89 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 90 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 91 uchar_t *addr); 92 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 93 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 94 static void nce_report1(nce_t *nce, uchar_t *mp_arg); 95 static mblk_t *nce_udreq_alloc(ill_t *ill); 96 static void nce_update(nce_t *nce, uint16_t new_state, 97 uchar_t *new_ll_addr); 98 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 99 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 100 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 101 const in6_addr_t *target, int flag); 102 extern void th_trace_rrecord(th_trace_t *); 103 static int ndp_lookup_then_add_v6(ill_t *, uchar_t *, 104 const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, 105 uint32_t, uint16_t, uint16_t, nce_t **, mblk_t *, mblk_t *); 106 static int ndp_lookup_then_add_v4(ill_t *, uchar_t *, 107 const in_addr_t *, const in_addr_t *, const in_addr_t *, 108 uint32_t, uint16_t, uint16_t, nce_t **, mblk_t *, mblk_t *); 109 static int ndp_add_v6(ill_t *, uchar_t *, const in6_addr_t *, 110 const in6_addr_t *, const in6_addr_t *, uint32_t, uint16_t, uint16_t, 111 nce_t **); 112 static int ndp_add_v4(ill_t *, uchar_t *, const in_addr_t *, 113 const in_addr_t *, const in_addr_t *, uint32_t, uint16_t, uint16_t, 114 nce_t **, mblk_t *, mblk_t *); 115 116 117 #ifdef NCE_DEBUG 118 void nce_trace_inactive(nce_t *); 119 #endif 120 121 ndp_g_t ndp4, ndp6; 122 123 #define NCE_HASH_PTR_V4(addr) \ 124 (&(ndp4.nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 125 126 #define NCE_HASH_PTR_V6(addr) \ 127 (&(ndp6.nce_hash_tbl[NCE_ADDR_HASH_V6(addr, NCE_TABLE_SIZE)])) 128 129 /* 130 * Compute default flags to use for an advertisement of this nce's address. 131 */ 132 static int 133 nce_advert_flags(const nce_t *nce) 134 { 135 int flag = 0; 136 137 if (nce->nce_flags & NCE_F_ISROUTER) 138 flag |= NDP_ISROUTER; 139 if (!(nce->nce_flags & NCE_F_PROXY)) 140 flag |= NDP_ORIDE; 141 return (flag); 142 } 143 144 int 145 ndp_add(ill_t *ill, uchar_t *hw_addr, const void *addr, 146 const void *mask, const void *extract_mask, 147 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 148 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 149 { 150 int status; 151 152 if (ill->ill_isv6) 153 status = ndp_add_v6(ill, hw_addr, (in6_addr_t *)addr, 154 (in6_addr_t *)mask, (in6_addr_t *)extract_mask, 155 hw_extract_start, flags, state, newnce); 156 else 157 status = ndp_add_v4(ill, hw_addr, (in_addr_t *)addr, 158 (in_addr_t *)mask, (in_addr_t *)extract_mask, 159 hw_extract_start, flags, state, newnce, fp_mp, res_mp); 160 return (status); 161 } 162 163 /* Non-tunable probe interval, based on link capabilities */ 164 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 165 166 /* 167 * NDP Cache Entry creation routine. 168 * Mapped entries will never do NUD . 169 * This routine must always be called with ndp6.ndp_g_lock held. 170 * Prior to return, nce_refcnt is incremented. 171 */ 172 static int 173 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 174 const in6_addr_t *mask, const in6_addr_t *extract_mask, 175 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 176 nce_t **newnce) 177 { 178 static nce_t nce_nil; 179 nce_t *nce; 180 mblk_t *mp; 181 mblk_t *template; 182 nce_t **ncep; 183 int err; 184 boolean_t dropped = B_FALSE; 185 186 ASSERT(MUTEX_HELD(&ndp6.ndp_g_lock)); 187 ASSERT(ill != NULL && ill->ill_isv6); 188 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 189 ip0dbg(("ndp_add: no addr\n")); 190 return (EINVAL); 191 } 192 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 193 ip0dbg(("ndp_add: flags = %x\n", (int)flags)); 194 return (EINVAL); 195 } 196 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 197 (flags & NCE_F_MAPPING)) { 198 ip0dbg(("ndp_add: extract mask zero for mapping")); 199 return (EINVAL); 200 } 201 /* 202 * Allocate the mblk to hold the nce. 203 * 204 * XXX This can come out of a separate cache - nce_cache. 205 * We don't need the mp anymore as there are no more 206 * "qwriter"s 207 */ 208 mp = allocb(sizeof (nce_t), BPRI_MED); 209 if (mp == NULL) 210 return (ENOMEM); 211 212 nce = (nce_t *)mp->b_rptr; 213 mp->b_wptr = (uchar_t *)&nce[1]; 214 *nce = nce_nil; 215 216 /* 217 * This one holds link layer address 218 */ 219 if (ill->ill_net_type == IRE_IF_RESOLVER) { 220 template = nce_udreq_alloc(ill); 221 } else { 222 if (ill->ill_resolver_mp == NULL) { 223 freeb(mp); 224 return (EINVAL); 225 } 226 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 227 template = copyb(ill->ill_resolver_mp); 228 } 229 if (template == NULL) { 230 freeb(mp); 231 return (ENOMEM); 232 } 233 nce->nce_ill = ill; 234 nce->nce_ipversion = IPV6_VERSION; 235 nce->nce_flags = flags; 236 nce->nce_state = state; 237 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 238 nce->nce_rcnt = ill->ill_xmit_count; 239 nce->nce_addr = *addr; 240 nce->nce_mask = *mask; 241 nce->nce_extract_mask = *extract_mask; 242 nce->nce_ll_extract_start = hw_extract_start; 243 nce->nce_fp_mp = NULL; 244 nce->nce_res_mp = template; 245 if (state == ND_REACHABLE) 246 nce->nce_last = TICK_TO_MSEC(lbolt64); 247 else 248 nce->nce_last = 0; 249 nce->nce_qd_mp = NULL; 250 nce->nce_mp = mp; 251 if (hw_addr != NULL) 252 nce_set_ll(nce, hw_addr); 253 /* This one is for nce getting created */ 254 nce->nce_refcnt = 1; 255 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 256 if (nce->nce_flags & NCE_F_MAPPING) { 257 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 258 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 259 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 260 ncep = &ndp6.nce_mask_entries; 261 } else { 262 ncep = ((nce_t **)NCE_HASH_PTR_V6(*addr)); 263 } 264 265 #ifdef NCE_DEBUG 266 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 267 #endif 268 /* 269 * Atomically ensure that the ill is not CONDEMNED, before 270 * adding the NCE. 271 */ 272 mutex_enter(&ill->ill_lock); 273 if (ill->ill_state_flags & ILL_CONDEMNED) { 274 mutex_exit(&ill->ill_lock); 275 freeb(mp); 276 freeb(template); 277 return (EINVAL); 278 } 279 if ((nce->nce_next = *ncep) != NULL) 280 nce->nce_next->nce_ptpn = &nce->nce_next; 281 *ncep = nce; 282 nce->nce_ptpn = ncep; 283 *newnce = nce; 284 /* This one is for nce being used by an active thread */ 285 NCE_REFHOLD(*newnce); 286 287 /* Bump up the number of nce's referencing this ill */ 288 ill->ill_nce_cnt++; 289 mutex_exit(&ill->ill_lock); 290 291 err = 0; 292 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 293 mutex_enter(&nce->nce_lock); 294 mutex_exit(&ndp6.ndp_g_lock); 295 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 296 mutex_exit(&nce->nce_lock); 297 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 298 &ipv6_all_zeros, addr, NDP_PROBE); 299 if (dropped) { 300 mutex_enter(&nce->nce_lock); 301 nce->nce_pcnt++; 302 mutex_exit(&nce->nce_lock); 303 } 304 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 305 mutex_enter(&ndp6.ndp_g_lock); 306 err = EINPROGRESS; 307 } else if (flags & NCE_F_UNSOL_ADV) { 308 /* 309 * We account for the transmit below by assigning one 310 * less than the ndd variable. Subsequent decrements 311 * are done in ndp_timer. 312 */ 313 mutex_enter(&nce->nce_lock); 314 mutex_exit(&ndp6.ndp_g_lock); 315 nce->nce_unsolicit_count = ip_ndp_unsolicit_count - 1; 316 mutex_exit(&nce->nce_lock); 317 dropped = nce_xmit(ill, 318 ND_NEIGHBOR_ADVERT, 319 ill, /* ill to be used for extracting ill_nd_lla */ 320 B_TRUE, /* use ill_nd_lla */ 321 addr, /* Source and target of the advertisement pkt */ 322 &ipv6_all_hosts_mcast, /* Destination of the packet */ 323 nce_advert_flags(nce)); 324 mutex_enter(&nce->nce_lock); 325 if (dropped) 326 nce->nce_unsolicit_count++; 327 if (nce->nce_unsolicit_count != 0) { 328 nce->nce_timeout_id = timeout(ndp_timer, nce, 329 MSEC_TO_TICK(ip_ndp_unsolicit_interval)); 330 } 331 mutex_exit(&nce->nce_lock); 332 mutex_enter(&ndp6.ndp_g_lock); 333 } 334 /* 335 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 336 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 337 * We call nce_fastpath from nce_update if the link layer address of 338 * the peer changes from nce_update 339 */ 340 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 341 nce_fastpath(nce); 342 return (err); 343 } 344 345 int 346 ndp_lookup_then_add(ill_t *ill, uchar_t *hw_addr, const void *addr, 347 const void *mask, const void *extract_mask, 348 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 349 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 350 { 351 int status; 352 353 if (ill->ill_isv6) { 354 status = ndp_lookup_then_add_v6(ill, hw_addr, 355 (in6_addr_t *)addr, (in6_addr_t *)mask, 356 (in6_addr_t *)extract_mask, hw_extract_start, flags, 357 state, newnce, fp_mp, res_mp); 358 } else { 359 status = ndp_lookup_then_add_v4(ill, hw_addr, 360 (in_addr_t *)addr, (in_addr_t *)mask, 361 (in_addr_t *)extract_mask, hw_extract_start, flags, 362 state, newnce, fp_mp, res_mp); 363 } 364 365 return (status); 366 } 367 368 static int 369 ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 370 const in6_addr_t *mask, const in6_addr_t *extract_mask, 371 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 372 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 373 { 374 int err = 0; 375 nce_t *nce; 376 377 ASSERT(ill != NULL && ill->ill_isv6); 378 mutex_enter(&ndp6.ndp_g_lock); 379 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); /* head of v6 hash table */ 380 nce = nce_lookup_addr(ill, addr, nce); 381 if (nce == NULL) { 382 err = ndp_add(ill, 383 hw_addr, 384 addr, 385 mask, 386 extract_mask, 387 hw_extract_start, 388 flags, 389 state, 390 newnce, 391 fp_mp, 392 res_mp); 393 } else { 394 *newnce = nce; 395 err = EEXIST; 396 } 397 mutex_exit(&ndp6.ndp_g_lock); 398 return (err); 399 } 400 401 /* 402 * Remove all the CONDEMNED nces from the appropriate hash table. 403 * We create a private list of NCEs, these may have ires pointing 404 * to them, so the list will be passed through to clean up dependent 405 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 406 */ 407 static void 408 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 409 { 410 nce_t *nce1; 411 nce_t **ptpn; 412 413 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 414 ASSERT(ndp->ndp_g_walker == 0); 415 for (; nce; nce = nce1) { 416 nce1 = nce->nce_next; 417 mutex_enter(&nce->nce_lock); 418 if (nce->nce_flags & NCE_F_CONDEMNED) { 419 ptpn = nce->nce_ptpn; 420 nce1 = nce->nce_next; 421 if (nce1 != NULL) 422 nce1->nce_ptpn = ptpn; 423 *ptpn = nce1; 424 nce->nce_ptpn = NULL; 425 nce->nce_next = NULL; 426 nce->nce_next = *free_nce_list; 427 *free_nce_list = nce; 428 } 429 mutex_exit(&nce->nce_lock); 430 } 431 } 432 433 /* 434 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 435 * will return this NCE. Also no new IREs will be created that 436 * point to this NCE (See ire_add_v6). Also no new timeouts will 437 * be started (See NDP_RESTART_TIMER). 438 * 2. Cancel any currently running timeouts. 439 * 3. If there is an ndp walker, return. The walker will do the cleanup. 440 * This ensures that walkers see a consistent list of NCEs while walking. 441 * 4. Otherwise remove the NCE from the list of NCEs 442 * 5. Delete all IREs pointing to this NCE. 443 */ 444 void 445 ndp_delete(nce_t *nce) 446 { 447 nce_t **ptpn; 448 nce_t *nce1; 449 int ipversion = nce->nce_ipversion; 450 ndp_g_t *ndp = (ipversion == IPV4_VERSION ? &ndp4 : &ndp6); 451 452 /* Serialize deletes */ 453 mutex_enter(&nce->nce_lock); 454 if (nce->nce_flags & NCE_F_CONDEMNED) { 455 /* Some other thread is doing the delete */ 456 mutex_exit(&nce->nce_lock); 457 return; 458 } 459 /* 460 * Caller has a refhold. Also 1 ref for being in the list. Thus 461 * refcnt has to be >= 2 462 */ 463 ASSERT(nce->nce_refcnt >= 2); 464 nce->nce_flags |= NCE_F_CONDEMNED; 465 mutex_exit(&nce->nce_lock); 466 467 nce_fastpath_list_delete(nce); 468 469 /* 470 * Cancel any running timer. Timeout can't be restarted 471 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 472 * Passing invalid timeout id is fine. 473 */ 474 if (nce->nce_timeout_id != 0) { 475 (void) untimeout(nce->nce_timeout_id); 476 nce->nce_timeout_id = 0; 477 } 478 479 mutex_enter(&ndp->ndp_g_lock); 480 if (nce->nce_ptpn == NULL) { 481 /* 482 * The last ndp walker has already removed this nce from 483 * the list after we marked the nce CONDEMNED and before 484 * we grabbed the global lock. 485 */ 486 mutex_exit(&ndp->ndp_g_lock); 487 return; 488 } 489 if (ndp->ndp_g_walker > 0) { 490 /* 491 * Can't unlink. The walker will clean up 492 */ 493 ndp->ndp_g_walker_cleanup = B_TRUE; 494 mutex_exit(&ndp->ndp_g_lock); 495 return; 496 } 497 498 /* 499 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 500 * the timer since it is marked CONDEMNED. 501 */ 502 ptpn = nce->nce_ptpn; 503 nce1 = nce->nce_next; 504 if (nce1 != NULL) 505 nce1->nce_ptpn = ptpn; 506 *ptpn = nce1; 507 nce->nce_ptpn = NULL; 508 nce->nce_next = NULL; 509 mutex_exit(&ndp->ndp_g_lock); 510 511 nce_ire_delete(nce); 512 } 513 514 void 515 ndp_inactive(nce_t *nce) 516 { 517 mblk_t **mpp; 518 ill_t *ill; 519 520 ASSERT(nce->nce_refcnt == 0); 521 ASSERT(MUTEX_HELD(&nce->nce_lock)); 522 ASSERT(nce->nce_fastpath == NULL); 523 524 /* Free all nce allocated messages */ 525 mpp = &nce->nce_first_mp_to_free; 526 do { 527 while (*mpp != NULL) { 528 mblk_t *mp; 529 530 mp = *mpp; 531 *mpp = mp->b_next; 532 533 inet_freemsg(mp); 534 } 535 } while (mpp++ != &nce->nce_last_mp_to_free); 536 537 #ifdef NCE_DEBUG 538 nce_trace_inactive(nce); 539 #endif 540 541 ill = nce->nce_ill; 542 mutex_enter(&ill->ill_lock); 543 ill->ill_nce_cnt--; 544 /* 545 * If the number of nce's associated with this ill have dropped 546 * to zero, check whether we need to restart any operation that 547 * is waiting for this to happen. 548 */ 549 if (ill->ill_nce_cnt == 0) { 550 /* ipif_ill_refrele_tail drops the ill_lock */ 551 ipif_ill_refrele_tail(ill); 552 } else { 553 mutex_exit(&ill->ill_lock); 554 } 555 mutex_destroy(&nce->nce_lock); 556 if (nce->nce_mp != NULL) 557 inet_freemsg(nce->nce_mp); 558 } 559 560 /* 561 * ndp_walk routine. Delete the nce if it is associated with the ill 562 * that is going away. Always called as a writer. 563 */ 564 void 565 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 566 { 567 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 568 ndp_delete(nce); 569 } 570 } 571 572 /* 573 * Walk a list of to be inactive NCEs and blow away all the ires. 574 */ 575 static void 576 nce_ire_delete_list(nce_t *nce) 577 { 578 nce_t *nce_next; 579 580 ASSERT(nce != NULL); 581 while (nce != NULL) { 582 nce_next = nce->nce_next; 583 nce->nce_next = NULL; 584 585 /* 586 * It is possible for the last ndp walker (this thread) 587 * to come here after ndp_delete has marked the nce CONDEMNED 588 * and before it has removed the nce from the fastpath list 589 * or called untimeout. So we need to do it here. It is safe 590 * for both ndp_delete and this thread to do it twice or 591 * even simultaneously since each of the threads has a 592 * reference on the nce. 593 */ 594 nce_fastpath_list_delete(nce); 595 /* 596 * Cancel any running timer. Timeout can't be restarted 597 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 598 * Passing invalid timeout id is fine. 599 */ 600 if (nce->nce_timeout_id != 0) { 601 (void) untimeout(nce->nce_timeout_id); 602 nce->nce_timeout_id = 0; 603 } 604 /* 605 * We might hit this func thus in the v4 case: 606 * ipif_down->ipif_ndp_down->ndp_walk 607 */ 608 609 if (nce->nce_ipversion == IPV4_VERSION) { 610 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 611 IRE_CACHE, nce_ire_delete1, 612 (char *)nce, nce->nce_ill); 613 } else { 614 ASSERT(nce->nce_ipversion == IPV6_VERSION); 615 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 616 IRE_CACHE, nce_ire_delete1, 617 (char *)nce, nce->nce_ill); 618 } 619 NCE_REFRELE_NOTR(nce); 620 nce = nce_next; 621 } 622 } 623 624 /* 625 * Delete an ire when the nce goes away. 626 */ 627 /* ARGSUSED */ 628 static void 629 nce_ire_delete(nce_t *nce) 630 { 631 if (nce->nce_ipversion == IPV6_VERSION) { 632 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 633 nce_ire_delete1, (char *)nce, nce->nce_ill); 634 NCE_REFRELE_NOTR(nce); 635 } else { 636 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 637 nce_ire_delete1, (char *)nce, nce->nce_ill); 638 NCE_REFRELE_NOTR(nce); 639 } 640 } 641 642 /* 643 * ire_walk routine used to delete every IRE that shares this nce 644 */ 645 static void 646 nce_ire_delete1(ire_t *ire, char *nce_arg) 647 { 648 nce_t *nce = (nce_t *)nce_arg; 649 650 ASSERT(ire->ire_type == IRE_CACHE); 651 652 if (ire->ire_nce == nce) { 653 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 654 ire_delete(ire); 655 } 656 } 657 658 /* 659 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 660 */ 661 boolean_t 662 ndp_restart_dad(nce_t *nce) 663 { 664 boolean_t started; 665 boolean_t dropped; 666 667 if (nce == NULL) 668 return (B_FALSE); 669 mutex_enter(&nce->nce_lock); 670 if (nce->nce_state == ND_PROBE) { 671 mutex_exit(&nce->nce_lock); 672 started = B_TRUE; 673 } else if (nce->nce_state == ND_REACHABLE) { 674 nce->nce_state = ND_PROBE; 675 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 676 mutex_exit(&nce->nce_lock); 677 dropped = nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, NULL, 678 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, NDP_PROBE); 679 if (dropped) { 680 mutex_enter(&nce->nce_lock); 681 nce->nce_pcnt++; 682 mutex_exit(&nce->nce_lock); 683 } 684 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 685 started = B_TRUE; 686 } else { 687 mutex_exit(&nce->nce_lock); 688 started = B_FALSE; 689 } 690 return (started); 691 } 692 693 /* 694 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 695 * If one is found, the refcnt on the nce will be incremented. 696 */ 697 nce_t * 698 ndp_lookup_v6(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 699 { 700 nce_t *nce; 701 702 ASSERT(ill != NULL && ill->ill_isv6); 703 if (!caller_holds_lock) { 704 mutex_enter(&ndp6.ndp_g_lock); 705 } 706 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); /* head of v6 hash table */ 707 nce = nce_lookup_addr(ill, addr, nce); 708 if (nce == NULL) 709 nce = nce_lookup_mapping(ill, addr); 710 if (!caller_holds_lock) 711 mutex_exit(&ndp6.ndp_g_lock); 712 return (nce); 713 } 714 /* 715 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 716 * If one is found, the refcnt on the nce will be incremented. 717 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 718 * so we skip the nce_lookup_mapping call. 719 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 720 */ 721 nce_t * 722 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 723 { 724 nce_t *nce; 725 in6_addr_t addr6; 726 727 if (!caller_holds_lock) { 728 mutex_enter(&ndp4.ndp_g_lock); 729 } 730 nce = *((nce_t **)NCE_HASH_PTR_V4(*addr)); /* head of v6 hash table */ 731 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 732 nce = nce_lookup_addr(ill, &addr6, nce); 733 if (!caller_holds_lock) 734 mutex_exit(&ndp4.ndp_g_lock); 735 return (nce); 736 } 737 738 /* 739 * Cache entry lookup. Try to find an nce matching the parameters passed. 740 * Look only for exact entries (no mappings). If an nce is found, increment 741 * the hold count on that nce. The caller passes in the start of the 742 * appropriate hash table, and must be holding the appropriate global 743 * lock (ndp_g_lock). 744 */ 745 static nce_t * 746 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr, nce_t *nce) 747 { 748 ndp_g_t *ndp = (ill->ill_isv6 ? &ndp6 : &ndp4); 749 750 ASSERT(ill != NULL); 751 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 752 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 753 return (NULL); 754 for (; nce != NULL; nce = nce->nce_next) { 755 if (nce->nce_ill == ill) { 756 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 757 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 758 &ipv6_all_ones)) { 759 mutex_enter(&nce->nce_lock); 760 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 761 NCE_REFHOLD_LOCKED(nce); 762 mutex_exit(&nce->nce_lock); 763 break; 764 } 765 mutex_exit(&nce->nce_lock); 766 } 767 } 768 } 769 return (nce); 770 } 771 772 /* 773 * Cache entry lookup. Try to find an nce matching the parameters passed. 774 * Look only for mappings. 775 */ 776 static nce_t * 777 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 778 { 779 nce_t *nce; 780 781 ASSERT(ill != NULL && ill->ill_isv6); 782 ASSERT(MUTEX_HELD(&ndp6.ndp_g_lock)); 783 if (!IN6_IS_ADDR_MULTICAST(addr)) 784 return (NULL); 785 nce = ndp6.nce_mask_entries; 786 for (; nce != NULL; nce = nce->nce_next) 787 if (nce->nce_ill == ill && 788 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 789 mutex_enter(&nce->nce_lock); 790 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 791 NCE_REFHOLD_LOCKED(nce); 792 mutex_exit(&nce->nce_lock); 793 break; 794 } 795 mutex_exit(&nce->nce_lock); 796 } 797 return (nce); 798 } 799 800 /* 801 * Process passed in parameters either from an incoming packet or via 802 * user ioctl. 803 */ 804 void 805 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 806 { 807 ill_t *ill = nce->nce_ill; 808 uint32_t hw_addr_len = ill->ill_nd_lla_len; 809 mblk_t *mp; 810 boolean_t ll_updated = B_FALSE; 811 boolean_t ll_changed; 812 813 ASSERT(nce->nce_ipversion == IPV6_VERSION); 814 /* 815 * No updates of link layer address or the neighbor state is 816 * allowed, when the cache is in NONUD state. This still 817 * allows for responding to reachability solicitation. 818 */ 819 mutex_enter(&nce->nce_lock); 820 if (nce->nce_state == ND_INCOMPLETE) { 821 if (hw_addr == NULL) { 822 mutex_exit(&nce->nce_lock); 823 return; 824 } 825 nce_set_ll(nce, hw_addr); 826 /* 827 * Update nce state and send the queued packets 828 * back to ip this time ire will be added. 829 */ 830 if (flag & ND_NA_FLAG_SOLICITED) { 831 nce_update(nce, ND_REACHABLE, NULL); 832 } else { 833 nce_update(nce, ND_STALE, NULL); 834 } 835 mutex_exit(&nce->nce_lock); 836 nce_fastpath(nce); 837 mutex_enter(&nce->nce_lock); 838 mp = nce->nce_qd_mp; 839 nce->nce_qd_mp = NULL; 840 mutex_exit(&nce->nce_lock); 841 while (mp != NULL) { 842 mblk_t *nxt_mp, *data_mp; 843 844 nxt_mp = mp->b_next; 845 mp->b_next = NULL; 846 847 if (mp->b_datap->db_type == M_CTL) 848 data_mp = mp->b_cont; 849 else 850 data_mp = mp; 851 if (data_mp->b_prev != NULL) { 852 ill_t *inbound_ill; 853 queue_t *fwdq = NULL; 854 uint_t ifindex; 855 856 ifindex = (uint_t)(uintptr_t)data_mp->b_prev; 857 inbound_ill = ill_lookup_on_ifindex(ifindex, 858 B_TRUE, NULL, NULL, NULL, NULL); 859 if (inbound_ill == NULL) { 860 data_mp->b_prev = NULL; 861 freemsg(mp); 862 return; 863 } else { 864 fwdq = inbound_ill->ill_rq; 865 } 866 data_mp->b_prev = NULL; 867 /* 868 * Send a forwarded packet back into ip_rput_v6 869 * just as in ire_send_v6(). 870 * Extract the queue from b_prev (set in 871 * ip_rput_data_v6). 872 */ 873 if (fwdq != NULL) { 874 /* 875 * Forwarded packets hop count will 876 * get decremented in ip_rput_data_v6 877 */ 878 if (data_mp != mp) 879 freeb(mp); 880 put(fwdq, data_mp); 881 } else { 882 /* 883 * Send locally originated packets back 884 * into * ip_wput_v6. 885 */ 886 put(ill->ill_wq, mp); 887 } 888 ill_refrele(inbound_ill); 889 } else { 890 put(ill->ill_wq, mp); 891 } 892 mp = nxt_mp; 893 } 894 return; 895 } 896 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 897 if (!is_adv) { 898 /* If this is a SOLICITATION request only */ 899 if (ll_changed) 900 nce_update(nce, ND_STALE, hw_addr); 901 mutex_exit(&nce->nce_lock); 902 return; 903 } 904 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 905 /* If in any other state than REACHABLE, ignore */ 906 if (nce->nce_state == ND_REACHABLE) { 907 nce_update(nce, ND_STALE, NULL); 908 } 909 mutex_exit(&nce->nce_lock); 910 return; 911 } else { 912 if (ll_changed) { 913 nce_update(nce, ND_UNCHANGED, hw_addr); 914 ll_updated = B_TRUE; 915 } 916 if (flag & ND_NA_FLAG_SOLICITED) { 917 nce_update(nce, ND_REACHABLE, NULL); 918 } else { 919 if (ll_updated) { 920 nce_update(nce, ND_STALE, NULL); 921 } 922 } 923 mutex_exit(&nce->nce_lock); 924 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 925 NCE_F_ISROUTER)) { 926 ire_t *ire; 927 928 /* 929 * Router turned to host. We need to remove the 930 * entry as well as any default route that may be 931 * using this as a next hop. This is required by 932 * section 7.2.5 of RFC 2461. 933 */ 934 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 935 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 936 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 937 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 938 MATCH_IRE_DEFAULT); 939 if (ire != NULL) { 940 ip_rts_rtmsg(RTM_DELETE, ire, 0); 941 ire_delete(ire); 942 ire_refrele(ire); 943 } 944 ndp_delete(nce); 945 } 946 } 947 } 948 949 /* 950 * Pass arg1 to the pfi supplied, along with each nce in existence. 951 * ndp_walk() places a REFHOLD on the nce and drops the lock when 952 * walking the hash list. 953 */ 954 void 955 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 956 boolean_t trace) 957 { 958 959 nce_t *nce; 960 nce_t *nce1; 961 nce_t **ncep; 962 nce_t *free_nce_list = NULL; 963 964 mutex_enter(&ndp->ndp_g_lock); 965 /* Prevent ndp_delete from unlink and free of NCE */ 966 ndp->ndp_g_walker++; 967 mutex_exit(&ndp->ndp_g_lock); 968 for (ncep = ndp->nce_hash_tbl; 969 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 970 for (nce = *ncep; nce != NULL; nce = nce1) { 971 nce1 = nce->nce_next; 972 if (ill == NULL || nce->nce_ill == ill) { 973 if (trace) { 974 NCE_REFHOLD(nce); 975 (*pfi)(nce, arg1); 976 NCE_REFRELE(nce); 977 } else { 978 NCE_REFHOLD_NOTR(nce); 979 (*pfi)(nce, arg1); 980 NCE_REFRELE_NOTR(nce); 981 } 982 } 983 } 984 } 985 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 986 nce1 = nce->nce_next; 987 if (ill == NULL || nce->nce_ill == ill) { 988 if (trace) { 989 NCE_REFHOLD(nce); 990 (*pfi)(nce, arg1); 991 NCE_REFRELE(nce); 992 } else { 993 NCE_REFHOLD_NOTR(nce); 994 (*pfi)(nce, arg1); 995 NCE_REFRELE_NOTR(nce); 996 } 997 } 998 } 999 mutex_enter(&ndp->ndp_g_lock); 1000 ndp->ndp_g_walker--; 1001 /* 1002 * While NCE's are removed from global list they are placed 1003 * in a private list, to be passed to nce_ire_delete_list(). 1004 * The reason is, there may be ires pointing to this nce 1005 * which needs to cleaned up. 1006 */ 1007 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 1008 /* Time to delete condemned entries */ 1009 for (ncep = ndp->nce_hash_tbl; 1010 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 1011 nce = *ncep; 1012 if (nce != NULL) { 1013 nce_remove(ndp, nce, &free_nce_list); 1014 } 1015 } 1016 nce = ndp->nce_mask_entries; 1017 if (nce != NULL) { 1018 nce_remove(ndp, nce, &free_nce_list); 1019 } 1020 ndp->ndp_g_walker_cleanup = B_FALSE; 1021 } 1022 mutex_exit(&ndp->ndp_g_lock); 1023 1024 if (free_nce_list != NULL) { 1025 nce_ire_delete_list(free_nce_list); 1026 } 1027 } 1028 1029 void 1030 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1) 1031 { 1032 ndp_walk_common(&ndp4, ill, pfi, arg1, B_TRUE); 1033 ndp_walk_common(&ndp6, ill, pfi, arg1, B_TRUE); 1034 } 1035 1036 /* 1037 * Process resolve requests. Handles both mapped entries 1038 * as well as cases that needs to be send out on the wire. 1039 * Lookup a NCE for a given IRE. Regardless of whether one exists 1040 * or one is created, we defer making ire point to nce until the 1041 * ire is actually added at which point the nce_refcnt on the nce is 1042 * incremented. This is done primarily to have symmetry between ire_add() 1043 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1044 */ 1045 int 1046 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1047 { 1048 nce_t *nce; 1049 int err = 0; 1050 uint32_t ms; 1051 mblk_t *mp_nce = NULL; 1052 1053 ASSERT(ill != NULL); 1054 ASSERT(ill->ill_isv6); 1055 if (IN6_IS_ADDR_MULTICAST(dst)) { 1056 err = nce_set_multicast(ill, dst); 1057 return (err); 1058 } 1059 err = ndp_lookup_then_add(ill, 1060 NULL, /* No hardware address */ 1061 dst, 1062 &ipv6_all_ones, 1063 &ipv6_all_zeros, 1064 0, 1065 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1066 ND_INCOMPLETE, 1067 &nce, 1068 NULL, /* let ndp_add figure out fastpath mp and dlureq_mp for v6 */ 1069 NULL); 1070 1071 switch (err) { 1072 case 0: 1073 /* 1074 * New cache entry was created. Make sure that the state 1075 * is not ND_INCOMPLETE. It can be in some other state 1076 * even before we send out the solicitation as we could 1077 * get un-solicited advertisements. 1078 * 1079 * If this is an XRESOLV interface, simply return 0, 1080 * since we don't want to solicit just yet. 1081 */ 1082 if (ill->ill_flags & ILLF_XRESOLV) { 1083 NCE_REFRELE(nce); 1084 return (0); 1085 } 1086 rw_enter(&ill_g_lock, RW_READER); 1087 mutex_enter(&nce->nce_lock); 1088 if (nce->nce_state != ND_INCOMPLETE) { 1089 mutex_exit(&nce->nce_lock); 1090 rw_exit(&ill_g_lock); 1091 NCE_REFRELE(nce); 1092 return (0); 1093 } 1094 mp_nce = ip_prepend_zoneid(mp, zoneid); 1095 if (mp_nce == NULL) { 1096 /* The caller will free mp */ 1097 mutex_exit(&nce->nce_lock); 1098 rw_exit(&ill_g_lock); 1099 ndp_delete(nce); 1100 NCE_REFRELE(nce); 1101 return (ENOMEM); 1102 } 1103 ms = nce_solicit(nce, mp_nce); 1104 rw_exit(&ill_g_lock); 1105 if (ms == 0) { 1106 /* The caller will free mp */ 1107 if (mp_nce != mp) 1108 freeb(mp_nce); 1109 mutex_exit(&nce->nce_lock); 1110 ndp_delete(nce); 1111 NCE_REFRELE(nce); 1112 return (EBUSY); 1113 } 1114 mutex_exit(&nce->nce_lock); 1115 NDP_RESTART_TIMER(nce, (clock_t)ms); 1116 NCE_REFRELE(nce); 1117 return (EINPROGRESS); 1118 case EEXIST: 1119 /* Resolution in progress just queue the packet */ 1120 mutex_enter(&nce->nce_lock); 1121 if (nce->nce_state == ND_INCOMPLETE) { 1122 mp_nce = ip_prepend_zoneid(mp, zoneid); 1123 if (mp_nce == NULL) { 1124 err = ENOMEM; 1125 } else { 1126 nce_queue_mp(nce, mp_nce); 1127 err = EINPROGRESS; 1128 } 1129 } else { 1130 /* 1131 * Any other state implies we have 1132 * a nce but IRE needs to be added ... 1133 * ire_add_v6() will take care of the 1134 * the case when the nce becomes CONDEMNED 1135 * before the ire is added to the table. 1136 */ 1137 err = 0; 1138 } 1139 mutex_exit(&nce->nce_lock); 1140 NCE_REFRELE(nce); 1141 break; 1142 default: 1143 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1144 break; 1145 } 1146 return (err); 1147 } 1148 1149 /* 1150 * When there is no resolver, the link layer template is passed in 1151 * the IRE. 1152 * Lookup a NCE for a given IRE. Regardless of whether one exists 1153 * or one is created, we defer making ire point to nce until the 1154 * ire is actually added at which point the nce_refcnt on the nce is 1155 * incremented. This is done primarily to have symmetry between ire_add() 1156 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1157 */ 1158 int 1159 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1160 { 1161 nce_t *nce; 1162 int err = 0; 1163 1164 ASSERT(ill != NULL); 1165 ASSERT(ill->ill_isv6); 1166 if (IN6_IS_ADDR_MULTICAST(dst)) { 1167 err = nce_set_multicast(ill, dst); 1168 return (err); 1169 } 1170 1171 err = ndp_lookup_then_add(ill, 1172 NULL, /* hardware address */ 1173 dst, 1174 &ipv6_all_ones, 1175 &ipv6_all_zeros, 1176 0, 1177 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1178 ND_REACHABLE, 1179 &nce, 1180 NULL, /* let ndp_add figure out fp_mp/dlureq_mp for v6 */ 1181 NULL); 1182 1183 switch (err) { 1184 case 0: 1185 /* 1186 * Cache entry with a proper resolver cookie was 1187 * created. 1188 */ 1189 NCE_REFRELE(nce); 1190 break; 1191 case EEXIST: 1192 err = 0; 1193 NCE_REFRELE(nce); 1194 break; 1195 default: 1196 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1197 break; 1198 } 1199 return (err); 1200 } 1201 1202 /* 1203 * For each interface an entry is added for the unspecified multicast group. 1204 * Here that mapping is used to form the multicast cache entry for a particular 1205 * multicast destination. 1206 */ 1207 static int 1208 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1209 { 1210 nce_t *mnce; /* Multicast mapping entry */ 1211 nce_t *nce; 1212 uchar_t *hw_addr = NULL; 1213 int err = 0; 1214 1215 ASSERT(ill != NULL); 1216 ASSERT(ill->ill_isv6); 1217 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1218 1219 mutex_enter(&ndp6.ndp_g_lock); 1220 nce = *((nce_t **)NCE_HASH_PTR_V6(*dst)); 1221 nce = nce_lookup_addr(ill, dst, nce); 1222 if (nce != NULL) { 1223 mutex_exit(&ndp6.ndp_g_lock); 1224 NCE_REFRELE(nce); 1225 return (0); 1226 } 1227 /* No entry, now lookup for a mapping this should never fail */ 1228 mnce = nce_lookup_mapping(ill, dst); 1229 if (mnce == NULL) { 1230 /* Something broken for the interface. */ 1231 mutex_exit(&ndp6.ndp_g_lock); 1232 return (ESRCH); 1233 } 1234 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1235 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1236 /* 1237 * For IRE_IF_RESOLVER a hardware mapping can be 1238 * generated, for IRE_IF_NORESOLVER, resolution cookie 1239 * in the ill is copied in ndp_add(). 1240 */ 1241 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1242 if (hw_addr == NULL) { 1243 mutex_exit(&ndp6.ndp_g_lock); 1244 NCE_REFRELE(mnce); 1245 return (ENOMEM); 1246 } 1247 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1248 } 1249 NCE_REFRELE(mnce); 1250 /* 1251 * IRE_IF_NORESOLVER type simply copies the resolution 1252 * cookie passed in. So no hw_addr is needed. 1253 */ 1254 err = ndp_add(ill, 1255 hw_addr, 1256 dst, 1257 &ipv6_all_ones, 1258 &ipv6_all_zeros, 1259 0, 1260 NCE_F_NONUD, 1261 ND_REACHABLE, 1262 &nce, 1263 NULL, 1264 NULL); 1265 mutex_exit(&ndp6.ndp_g_lock); 1266 if (hw_addr != NULL) 1267 kmem_free(hw_addr, ill->ill_nd_lla_len); 1268 if (err != 0) { 1269 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1270 return (err); 1271 } 1272 NCE_REFRELE(nce); 1273 return (0); 1274 } 1275 1276 /* 1277 * Return the link layer address, and any flags of a nce. 1278 */ 1279 int 1280 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1281 { 1282 nce_t *nce; 1283 in6_addr_t *addr; 1284 sin6_t *sin6; 1285 dl_unitdata_req_t *dl; 1286 1287 ASSERT(ill != NULL && ill->ill_isv6); 1288 sin6 = (sin6_t *)&lnr->lnr_addr; 1289 addr = &sin6->sin6_addr; 1290 1291 nce = ndp_lookup_v6(ill, addr, B_FALSE); 1292 if (nce == NULL) 1293 return (ESRCH); 1294 /* If in INCOMPLETE state, no link layer address is available yet */ 1295 if (nce->nce_state == ND_INCOMPLETE) 1296 goto done; 1297 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1298 if (ill->ill_flags & ILLF_XRESOLV) 1299 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1300 else 1301 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1302 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1303 sizeof (lnr->lnr_hdw_addr)); 1304 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1305 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1306 if (nce->nce_flags & NCE_F_ISROUTER) 1307 lnr->lnr_flags = NDF_ISROUTER_ON; 1308 if (nce->nce_flags & NCE_F_PROXY) 1309 lnr->lnr_flags |= NDF_PROXY_ON; 1310 if (nce->nce_flags & NCE_F_ANYCAST) 1311 lnr->lnr_flags |= NDF_ANYCAST_ON; 1312 done: 1313 NCE_REFRELE(nce); 1314 return (0); 1315 } 1316 1317 /* 1318 * Send Enable/Disable multicast reqs to driver. 1319 */ 1320 int 1321 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1322 uint32_t hw_addr_offset, mblk_t *mp) 1323 { 1324 nce_t *nce; 1325 uchar_t *hw_addr; 1326 1327 ASSERT(ill != NULL && ill->ill_isv6); 1328 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1329 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1330 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1331 freemsg(mp); 1332 return (EINVAL); 1333 } 1334 mutex_enter(&ndp6.ndp_g_lock); 1335 nce = nce_lookup_mapping(ill, addr); 1336 if (nce == NULL) { 1337 mutex_exit(&ndp6.ndp_g_lock); 1338 freemsg(mp); 1339 return (ESRCH); 1340 } 1341 mutex_exit(&ndp6.ndp_g_lock); 1342 /* 1343 * Update dl_addr_length and dl_addr_offset for primitives that 1344 * have physical addresses as opposed to full saps 1345 */ 1346 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1347 case DL_ENABMULTI_REQ: 1348 /* Track the state if this is the first enabmulti */ 1349 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1350 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1351 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1352 break; 1353 case DL_DISABMULTI_REQ: 1354 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1355 break; 1356 default: 1357 NCE_REFRELE(nce); 1358 ip1dbg(("ndp_mcastreq: default\n")); 1359 return (EINVAL); 1360 } 1361 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1362 NCE_REFRELE(nce); 1363 putnext(ill->ill_wq, mp); 1364 return (0); 1365 } 1366 1367 /* 1368 * Send a neighbor solicitation. 1369 * Returns number of milliseconds after which we should either rexmit or abort. 1370 * Return of zero means we should abort. 1371 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1372 * 1373 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1374 * the packet. 1375 * NOTE: This routine does not consume mp. 1376 */ 1377 uint32_t 1378 nce_solicit(nce_t *nce, mblk_t *mp) 1379 { 1380 ill_t *ill; 1381 ill_t *src_ill; 1382 ip6_t *ip6h; 1383 in6_addr_t src; 1384 in6_addr_t dst; 1385 ipif_t *ipif; 1386 ip6i_t *ip6i; 1387 boolean_t dropped = B_FALSE; 1388 1389 ASSERT(RW_READ_HELD(&ill_g_lock)); 1390 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1391 ill = nce->nce_ill; 1392 ASSERT(ill != NULL); 1393 1394 if (nce->nce_rcnt == 0) { 1395 return (0); 1396 } 1397 1398 if (mp == NULL) { 1399 ASSERT(nce->nce_qd_mp != NULL); 1400 mp = nce->nce_qd_mp; 1401 } else { 1402 nce_queue_mp(nce, mp); 1403 } 1404 1405 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1406 if (mp->b_datap->db_type == M_CTL) 1407 mp = mp->b_cont; 1408 1409 ip6h = (ip6_t *)mp->b_rptr; 1410 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1411 /* 1412 * This message should have been pulled up already in 1413 * ip_wput_v6. We can't do pullups here because the message 1414 * could be from the nce_qd_mp which could have b_next/b_prev 1415 * non-NULL. 1416 */ 1417 ip6i = (ip6i_t *)ip6h; 1418 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1419 sizeof (ip6i_t) + IPV6_HDR_LEN); 1420 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1421 } 1422 src = ip6h->ip6_src; 1423 /* 1424 * If the src of outgoing packet is one of the assigned interface 1425 * addresses use it, otherwise we will pick the source address below. 1426 */ 1427 src_ill = ill; 1428 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1429 if (ill->ill_group != NULL) 1430 src_ill = ill->ill_group->illgrp_ill; 1431 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1432 for (ipif = src_ill->ill_ipif; ipif != NULL; 1433 ipif = ipif->ipif_next) { 1434 if (IN6_ARE_ADDR_EQUAL(&src, 1435 &ipif->ipif_v6lcl_addr)) { 1436 break; 1437 } 1438 } 1439 if (ipif != NULL) 1440 break; 1441 } 1442 /* 1443 * If no relevant ipif can be found, then it's not one of our 1444 * addresses. Reset to :: and let nce_xmit. If an ipif can be 1445 * found, but it's not yet done with DAD verification, then 1446 * just postpone this transmission until later. 1447 */ 1448 if (src_ill == NULL) 1449 src = ipv6_all_zeros; 1450 else if (!ipif->ipif_addr_ready) 1451 return (ill->ill_reachable_retrans_time); 1452 } 1453 dst = nce->nce_addr; 1454 /* 1455 * If source address is unspecified, nce_xmit will choose 1456 * one for us and initialize the hardware address also 1457 * appropriately. 1458 */ 1459 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1460 src_ill = NULL; 1461 nce->nce_rcnt--; 1462 mutex_exit(&nce->nce_lock); 1463 rw_exit(&ill_g_lock); 1464 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1465 &dst, 0); 1466 rw_enter(&ill_g_lock, RW_READER); 1467 mutex_enter(&nce->nce_lock); 1468 if (dropped) 1469 nce->nce_rcnt++; 1470 return (ill->ill_reachable_retrans_time); 1471 } 1472 1473 /* 1474 * Attempt to recover an address on an interface that's been marked as a 1475 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1476 * no easy way to just probe the address and have the right thing happen if 1477 * it's no longer in use. Instead, we just bring it up normally and allow the 1478 * regular interface start-up logic to probe for a remaining duplicate and take 1479 * us back down if necessary. 1480 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1481 * ip_ndp_excl. 1482 */ 1483 /* ARGSUSED */ 1484 static void 1485 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1486 { 1487 ill_t *ill = rq->q_ptr; 1488 ipif_t *ipif; 1489 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1490 1491 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1492 /* 1493 * We do not support recovery of proxy ARP'd interfaces, 1494 * because the system lacks a complete proxy ARP mechanism. 1495 */ 1496 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1497 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1498 continue; 1499 } 1500 1501 /* 1502 * If we have already recovered or if the interface is going 1503 * away, then ignore. 1504 */ 1505 mutex_enter(&ill->ill_lock); 1506 if (!(ipif->ipif_flags & IPIF_DUPLICATE) || 1507 (ipif->ipif_flags & (IPIF_MOVING | IPIF_CONDEMNED))) { 1508 mutex_exit(&ill->ill_lock); 1509 continue; 1510 } 1511 1512 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1513 ill->ill_ipif_dup_count--; 1514 mutex_exit(&ill->ill_lock); 1515 ipif->ipif_was_dup = B_TRUE; 1516 1517 if (ipif_ndp_up(ipif, addr, B_FALSE) != EINPROGRESS) 1518 (void) ipif_up_done_v6(ipif); 1519 } 1520 freeb(mp); 1521 } 1522 1523 /* 1524 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1525 * As long as someone else holds the address, the interface will stay down. 1526 * When that conflict goes away, the interface is brought back up. This is 1527 * done so that accidental shutdowns of addresses aren't made permanent. Your 1528 * server will recover from a failure. 1529 * 1530 * For DHCP and temporary addresses, recovery is not done in the kernel. 1531 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1532 * 1533 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1534 */ 1535 static void 1536 ipif6_dup_recovery(void *arg) 1537 { 1538 ipif_t *ipif = arg; 1539 1540 ipif->ipif_recovery_id = 0; 1541 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1542 return; 1543 1544 /* 1545 * No lock, because this is just an optimization. 1546 */ 1547 if (ipif->ipif_state_flags & (IPIF_MOVING | IPIF_CONDEMNED)) 1548 return; 1549 1550 /* If the link is down, we'll retry this later */ 1551 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1552 return; 1553 1554 ndp_do_recovery(ipif); 1555 } 1556 1557 /* 1558 * Perform interface recovery by forcing the duplicate interfaces up and 1559 * allowing the system to determine which ones should stay up. 1560 * 1561 * Called both by recovery timer expiry and link-up notification. 1562 */ 1563 void 1564 ndp_do_recovery(ipif_t *ipif) 1565 { 1566 ill_t *ill = ipif->ipif_ill; 1567 mblk_t *mp; 1568 1569 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1570 if (mp == NULL) { 1571 mutex_enter(&ill->ill_lock); 1572 if (ipif->ipif_recovery_id == 0 && 1573 !(ipif->ipif_state_flags & (IPIF_MOVING | 1574 IPIF_CONDEMNED))) { 1575 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1576 ipif, MSEC_TO_TICK(ip_dup_recovery)); 1577 } 1578 mutex_exit(&ill->ill_lock); 1579 } else { 1580 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1581 sizeof (ipif->ipif_v6lcl_addr)); 1582 ill_refhold(ill); 1583 (void) qwriter_ip(NULL, ill, ill->ill_rq, mp, ip_ndp_recover, 1584 CUR_OP, B_FALSE); 1585 } 1586 } 1587 1588 /* 1589 * Find the solicitation in the given message, and extract printable details 1590 * (MAC and IP addresses) from it. 1591 */ 1592 static nd_neighbor_solicit_t * 1593 ip_ndp_find_solicitation(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, char *hbuf, 1594 size_t hlen, char *sbuf, size_t slen, uchar_t **haddr) 1595 { 1596 nd_neighbor_solicit_t *ns; 1597 ip6_t *ip6h; 1598 uchar_t *addr; 1599 int alen; 1600 1601 alen = 0; 1602 ip6h = (ip6_t *)mp->b_rptr; 1603 if (dl_mp == NULL) { 1604 nd_opt_hdr_t *opt; 1605 int nslen; 1606 1607 /* 1608 * If it's from the fast-path, then it can't be a probe 1609 * message, and thus must include the source linkaddr option. 1610 * Extract that here. 1611 */ 1612 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1613 nslen = mp->b_wptr - (uchar_t *)ns; 1614 if ((nslen -= sizeof (*ns)) > 0) { 1615 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), nslen, 1616 ND_OPT_SOURCE_LINKADDR); 1617 if (opt != NULL && 1618 opt->nd_opt_len * 8 - sizeof (*opt) >= 1619 ill->ill_nd_lla_len) { 1620 addr = (uchar_t *)(opt + 1); 1621 alen = ill->ill_nd_lla_len; 1622 } 1623 } 1624 /* 1625 * We cheat a bit here for the sake of printing usable log 1626 * messages in the rare case where the reply we got was unicast 1627 * without a source linkaddr option, and the interface is in 1628 * fastpath mode. (Sigh.) 1629 */ 1630 if (alen == 0 && ill->ill_type == IFT_ETHER && 1631 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1632 struct ether_header *pether; 1633 1634 pether = (struct ether_header *)((char *)ip6h - 1635 sizeof (*pether)); 1636 addr = pether->ether_shost.ether_addr_octet; 1637 alen = ETHERADDRL; 1638 } 1639 } else { 1640 dl_unitdata_ind_t *dlu; 1641 1642 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1643 alen = dlu->dl_src_addr_length; 1644 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1645 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1646 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1647 if (ill->ill_sap_length < 0) { 1648 alen += ill->ill_sap_length; 1649 } else { 1650 addr += ill->ill_sap_length; 1651 alen -= ill->ill_sap_length; 1652 } 1653 } 1654 } 1655 if (alen > 0) { 1656 *haddr = addr; 1657 (void) mac_colon_addr(addr, alen, hbuf, hlen); 1658 } else { 1659 *haddr = NULL; 1660 (void) strcpy(hbuf, "?"); 1661 } 1662 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1663 (void) inet_ntop(AF_INET6, &ns->nd_ns_target, sbuf, slen); 1664 return (ns); 1665 } 1666 1667 /* 1668 * This is for exclusive changes due to NDP duplicate address detection 1669 * failure. 1670 */ 1671 /* ARGSUSED */ 1672 static void 1673 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1674 { 1675 ill_t *ill = rq->q_ptr; 1676 ipif_t *ipif; 1677 char ibuf[LIFNAMSIZ + 10]; /* 10 digits for logical i/f number */ 1678 char hbuf[MAC_STR_LEN]; 1679 char sbuf[INET6_ADDRSTRLEN]; 1680 nd_neighbor_solicit_t *ns; 1681 mblk_t *dl_mp = NULL; 1682 uchar_t *haddr; 1683 1684 if (DB_TYPE(mp) != M_DATA) { 1685 dl_mp = mp; 1686 mp = mp->b_cont; 1687 } 1688 ns = ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, sizeof (hbuf), sbuf, 1689 sizeof (sbuf), &haddr); 1690 if (haddr != NULL && 1691 bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) { 1692 /* 1693 * Ignore conflicts generated by misbehaving switches that just 1694 * reflect our own messages back to us. 1695 */ 1696 goto ignore_conflict; 1697 } 1698 (void) strlcpy(ibuf, ill->ill_name, sizeof (ibuf)); 1699 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1700 1701 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1702 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 1703 &ns->nd_ns_target)) { 1704 continue; 1705 } 1706 1707 /* If it's already marked, then don't do anything. */ 1708 if (ipif->ipif_flags & IPIF_DUPLICATE) 1709 continue; 1710 1711 /* 1712 * If this is a failure during duplicate recovery, then don't 1713 * complain. It may take a long time to recover. 1714 */ 1715 if (!ipif->ipif_was_dup) { 1716 if (ipif->ipif_id != 0) { 1717 (void) snprintf(ibuf + ill->ill_name_length - 1, 1718 sizeof (ibuf) - ill->ill_name_length + 1, 1719 ":%d", ipif->ipif_id); 1720 } 1721 cmn_err(CE_WARN, "%s has duplicate address %s (in " 1722 "use by %s); disabled", ibuf, sbuf, hbuf); 1723 } 1724 mutex_enter(&ill->ill_lock); 1725 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1726 ipif->ipif_flags |= IPIF_DUPLICATE; 1727 ill->ill_ipif_dup_count++; 1728 mutex_exit(&ill->ill_lock); 1729 (void) ipif_down(ipif, NULL, NULL); 1730 ipif_down_tail(ipif); 1731 mutex_enter(&ill->ill_lock); 1732 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1733 ill->ill_net_type == IRE_IF_RESOLVER && 1734 !(ipif->ipif_state_flags & (IPIF_MOVING | 1735 IPIF_CONDEMNED)) && 1736 ip_dup_recovery > 0) { 1737 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1738 ipif, MSEC_TO_TICK(ip_dup_recovery)); 1739 } 1740 mutex_exit(&ill->ill_lock); 1741 } 1742 ignore_conflict: 1743 if (dl_mp != NULL) 1744 freeb(dl_mp); 1745 freemsg(mp); 1746 } 1747 1748 /* 1749 * Handle failure by tearing down the ipifs with the specified address. Note 1750 * that tearing down the ipif also means deleting the nce through ipif_down, so 1751 * it's not possible to do recovery by just restarting the nce timer. Instead, 1752 * we start a timer on the ipif. 1753 */ 1754 static void 1755 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1756 { 1757 if ((mp = copymsg(mp)) != NULL) { 1758 if (dl_mp == NULL) 1759 dl_mp = mp; 1760 else if ((dl_mp = copyb(dl_mp)) != NULL) 1761 dl_mp->b_cont = mp; 1762 if (dl_mp == NULL) { 1763 freemsg(mp); 1764 } else { 1765 ill_refhold(ill); 1766 (void) qwriter_ip(NULL, ill, ill->ill_rq, dl_mp, 1767 ip_ndp_excl, CUR_OP, B_FALSE); 1768 } 1769 } 1770 ndp_delete(nce); 1771 } 1772 1773 /* 1774 * Handle a discovered conflict: some other system is advertising that it owns 1775 * one of our IP addresses. We need to defend ourselves, or just shut down the 1776 * interface. 1777 */ 1778 static void 1779 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1780 { 1781 ipif_t *ipif; 1782 uint32_t now; 1783 uint_t maxdefense; 1784 uint_t defs; 1785 1786 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1787 NULL, NULL); 1788 if (ipif == NULL) 1789 return; 1790 /* 1791 * First, figure out if this address is disposable. 1792 */ 1793 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1794 maxdefense = ip_max_temp_defend; 1795 else 1796 maxdefense = ip_max_defend; 1797 1798 /* 1799 * Now figure out how many times we've defended ourselves. Ignore 1800 * defenses that happened long in the past. 1801 */ 1802 now = gethrestime_sec(); 1803 mutex_enter(&nce->nce_lock); 1804 if ((defs = nce->nce_defense_count) > 0 && 1805 now - nce->nce_defense_time > ip_defend_interval) { 1806 nce->nce_defense_count = defs = 0; 1807 } 1808 nce->nce_defense_count++; 1809 nce->nce_defense_time = now; 1810 mutex_exit(&nce->nce_lock); 1811 ipif_refrele(ipif); 1812 1813 /* 1814 * If we've defended ourselves too many times already, then give up and 1815 * tear down the interface(s) using this address. Otherwise, defend by 1816 * sending out an unsolicited Neighbor Advertisement. 1817 */ 1818 if (defs >= maxdefense) { 1819 ip_ndp_failure(ill, mp, dl_mp, nce); 1820 } else { 1821 char hbuf[MAC_STR_LEN]; 1822 char sbuf[INET6_ADDRSTRLEN]; 1823 uchar_t *haddr; 1824 1825 (void) ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, 1826 sizeof (hbuf), sbuf, sizeof (sbuf), &haddr); 1827 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1828 hbuf, sbuf, ill->ill_name); 1829 (void) nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, B_FALSE, 1830 &nce->nce_addr, &ipv6_all_hosts_mcast, 1831 nce_advert_flags(nce)); 1832 } 1833 } 1834 1835 static void 1836 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1837 { 1838 nd_neighbor_solicit_t *ns; 1839 uint32_t hlen = ill->ill_nd_lla_len; 1840 uchar_t *haddr = NULL; 1841 icmp6_t *icmp_nd; 1842 ip6_t *ip6h; 1843 nce_t *our_nce = NULL; 1844 in6_addr_t target; 1845 in6_addr_t src; 1846 int len; 1847 int flag = 0; 1848 nd_opt_hdr_t *opt = NULL; 1849 boolean_t bad_solicit = B_FALSE; 1850 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1851 1852 ip6h = (ip6_t *)mp->b_rptr; 1853 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1854 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1855 src = ip6h->ip6_src; 1856 ns = (nd_neighbor_solicit_t *)icmp_nd; 1857 target = ns->nd_ns_target; 1858 if (IN6_IS_ADDR_MULTICAST(&target)) { 1859 if (ip_debug > 2) { 1860 /* ip1dbg */ 1861 pr_addr_dbg("ndp_input_solicit: Target is" 1862 " multicast! %s\n", AF_INET6, &target); 1863 } 1864 bad_solicit = B_TRUE; 1865 goto done; 1866 } 1867 if (len > sizeof (nd_neighbor_solicit_t)) { 1868 /* Options present */ 1869 opt = (nd_opt_hdr_t *)&ns[1]; 1870 len -= sizeof (nd_neighbor_solicit_t); 1871 if (!ndp_verify_optlen(opt, len)) { 1872 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1873 bad_solicit = B_TRUE; 1874 goto done; 1875 } 1876 } 1877 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1878 /* Check to see if this is a valid DAD solicitation */ 1879 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1880 if (ip_debug > 2) { 1881 /* ip1dbg */ 1882 pr_addr_dbg("ndp_input_solicit: IPv6 " 1883 "Destination is not solicited node " 1884 "multicast %s\n", AF_INET6, 1885 &ip6h->ip6_dst); 1886 } 1887 bad_solicit = B_TRUE; 1888 goto done; 1889 } 1890 } 1891 1892 our_nce = ndp_lookup_v6(ill, &target, B_FALSE); 1893 /* 1894 * If this is a valid Solicitation, a permanent 1895 * entry should exist in the cache 1896 */ 1897 if (our_nce == NULL || 1898 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1899 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1900 "ifname=%s ", ill->ill_name)); 1901 if (ip_debug > 2) { 1902 /* ip1dbg */ 1903 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1904 } 1905 bad_solicit = B_TRUE; 1906 goto done; 1907 } 1908 1909 /* At this point we should have a verified NS per spec */ 1910 if (opt != NULL) { 1911 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1912 if (opt != NULL) { 1913 haddr = (uchar_t *)&opt[1]; 1914 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1915 hlen == 0) { 1916 ip1dbg(("ndp_input_advert: bad SLLA\n")); 1917 bad_solicit = B_TRUE; 1918 goto done; 1919 } 1920 } 1921 } 1922 1923 /* If sending directly to peer, set the unicast flag */ 1924 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 1925 flag |= NDP_UNICAST; 1926 1927 /* 1928 * Create/update the entry for the soliciting node. 1929 * or respond to outstanding queries, don't if 1930 * the source is unspecified address. 1931 */ 1932 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1933 int err; 1934 nce_t *nnce; 1935 1936 ASSERT(ill->ill_isv6); 1937 /* 1938 * Regular solicitations *must* include the Source Link-Layer 1939 * Address option. Ignore messages that do not. 1940 */ 1941 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1942 ip1dbg(("ndp_input_solicit: source link-layer address " 1943 "option missing with a specified source.\n")); 1944 bad_solicit = B_TRUE; 1945 goto done; 1946 } 1947 1948 /* 1949 * This is a regular solicitation. If we're still in the 1950 * process of verifying the address, then don't respond at all 1951 * and don't keep track of the sender. 1952 */ 1953 if (our_nce->nce_state == ND_PROBE) 1954 goto done; 1955 1956 /* 1957 * If the solicitation doesn't have sender hardware address 1958 * (legal for unicast solicitation), then process without 1959 * installing the return NCE. Either we already know it, or 1960 * we'll be forced to look it up when (and if) we reply to the 1961 * packet. 1962 */ 1963 if (haddr == NULL) 1964 goto no_source; 1965 1966 err = ndp_lookup_then_add(ill, 1967 haddr, 1968 &src, /* Soliciting nodes address */ 1969 &ipv6_all_ones, 1970 &ipv6_all_zeros, 1971 0, 1972 0, 1973 ND_STALE, 1974 &nnce, 1975 NULL, 1976 NULL); 1977 switch (err) { 1978 case 0: 1979 /* done with this entry */ 1980 NCE_REFRELE(nnce); 1981 break; 1982 case EEXIST: 1983 /* 1984 * B_FALSE indicates this is not an 1985 * an advertisement. 1986 */ 1987 ndp_process(nnce, haddr, 0, B_FALSE); 1988 NCE_REFRELE(nnce); 1989 break; 1990 default: 1991 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1992 err)); 1993 goto done; 1994 } 1995 no_source: 1996 flag |= NDP_SOLICITED; 1997 } else { 1998 /* 1999 * No source link layer address option should be present in a 2000 * valid DAD request. 2001 */ 2002 if (haddr != NULL) { 2003 ip1dbg(("ndp_input_solicit: source link-layer address " 2004 "option present with an unspecified source.\n")); 2005 bad_solicit = B_TRUE; 2006 goto done; 2007 } 2008 if (our_nce->nce_state == ND_PROBE) { 2009 /* 2010 * Internally looped-back probes won't have DLPI 2011 * attached to them. External ones (which are sent by 2012 * multicast) always will. Just ignore our own 2013 * transmissions. 2014 */ 2015 if (dl_mp != NULL) { 2016 /* 2017 * If someone else is probing our address, then 2018 * we've crossed wires. Declare failure. 2019 */ 2020 ip_ndp_failure(ill, mp, dl_mp, our_nce); 2021 } 2022 goto done; 2023 } 2024 /* 2025 * This is a DAD probe. Multicast the advertisement to the 2026 * all-nodes address. 2027 */ 2028 src = ipv6_all_hosts_mcast; 2029 } 2030 flag |= nce_advert_flags(our_nce); 2031 /* Response to a solicitation */ 2032 (void) nce_xmit(ill, 2033 ND_NEIGHBOR_ADVERT, 2034 ill, /* ill to be used for extracting ill_nd_lla */ 2035 B_TRUE, /* use ill_nd_lla */ 2036 &target, /* Source and target of the advertisement pkt */ 2037 &src, /* IP Destination (source of original pkt) */ 2038 flag); 2039 done: 2040 if (bad_solicit) 2041 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2042 if (our_nce != NULL) 2043 NCE_REFRELE(our_nce); 2044 } 2045 2046 void 2047 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2048 { 2049 nd_neighbor_advert_t *na; 2050 uint32_t hlen = ill->ill_nd_lla_len; 2051 uchar_t *haddr = NULL; 2052 icmp6_t *icmp_nd; 2053 ip6_t *ip6h; 2054 nce_t *dst_nce = NULL; 2055 in6_addr_t target; 2056 nd_opt_hdr_t *opt = NULL; 2057 int len; 2058 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2059 2060 ip6h = (ip6_t *)mp->b_rptr; 2061 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2062 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2063 na = (nd_neighbor_advert_t *)icmp_nd; 2064 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2065 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2066 ip1dbg(("ndp_input_advert: Target is multicast but the " 2067 "solicited flag is not zero\n")); 2068 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2069 return; 2070 } 2071 target = na->nd_na_target; 2072 if (IN6_IS_ADDR_MULTICAST(&target)) { 2073 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2074 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2075 return; 2076 } 2077 if (len > sizeof (nd_neighbor_advert_t)) { 2078 opt = (nd_opt_hdr_t *)&na[1]; 2079 if (!ndp_verify_optlen(opt, 2080 len - sizeof (nd_neighbor_advert_t))) { 2081 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2082 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2083 return; 2084 } 2085 /* At this point we have a verified NA per spec */ 2086 len -= sizeof (nd_neighbor_advert_t); 2087 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2088 if (opt != NULL) { 2089 haddr = (uchar_t *)&opt[1]; 2090 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2091 hlen == 0) { 2092 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2093 BUMP_MIB(mib, 2094 ipv6IfIcmpInBadNeighborAdvertisements); 2095 return; 2096 } 2097 } 2098 } 2099 2100 /* 2101 * If this interface is part of the group look at all the 2102 * ills in the group. 2103 */ 2104 rw_enter(&ill_g_lock, RW_READER); 2105 if (ill->ill_group != NULL) 2106 ill = ill->ill_group->illgrp_ill; 2107 2108 for (; ill != NULL; ill = ill->ill_group_next) { 2109 mutex_enter(&ill->ill_lock); 2110 if (!ILL_CAN_LOOKUP(ill)) { 2111 mutex_exit(&ill->ill_lock); 2112 continue; 2113 } 2114 ill_refhold_locked(ill); 2115 mutex_exit(&ill->ill_lock); 2116 dst_nce = ndp_lookup_v6(ill, &target, B_FALSE); 2117 /* We have to drop the lock since ndp_process calls put* */ 2118 rw_exit(&ill_g_lock); 2119 if (dst_nce != NULL) { 2120 if ((dst_nce->nce_flags & NCE_F_PERMANENT) && 2121 dst_nce->nce_state == ND_PROBE) { 2122 /* 2123 * Someone else sent an advertisement for an 2124 * address that we're trying to configure. 2125 * Tear it down. Note that dl_mp might be NULL 2126 * if we're getting a unicast reply. This 2127 * isn't typically done (multicast is the norm 2128 * in response to a probe), but ip_ndp_failure 2129 * will handle the dl_mp == NULL case as well. 2130 */ 2131 ip_ndp_failure(ill, mp, dl_mp, dst_nce); 2132 } else if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2133 /* 2134 * Someone just announced one of our local 2135 * addresses. If it wasn't us, then this is a 2136 * conflict. Defend the address or shut it 2137 * down. 2138 */ 2139 if (dl_mp != NULL && 2140 (haddr == NULL || 2141 nce_cmp_ll_addr(dst_nce, haddr, 2142 ill->ill_nd_lla_len))) { 2143 ip_ndp_conflict(ill, mp, dl_mp, 2144 dst_nce); 2145 } 2146 } else { 2147 if (na->nd_na_flags_reserved & 2148 ND_NA_FLAG_ROUTER) { 2149 dst_nce->nce_flags |= NCE_F_ISROUTER; 2150 } 2151 /* B_TRUE indicates this an advertisement */ 2152 ndp_process(dst_nce, haddr, 2153 na->nd_na_flags_reserved, B_TRUE); 2154 } 2155 NCE_REFRELE(dst_nce); 2156 } 2157 rw_enter(&ill_g_lock, RW_READER); 2158 ill_refrele(ill); 2159 } 2160 rw_exit(&ill_g_lock); 2161 } 2162 2163 /* 2164 * Process NDP neighbor solicitation/advertisement messages. 2165 * The checksum has already checked o.k before reaching here. 2166 */ 2167 void 2168 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2169 { 2170 icmp6_t *icmp_nd; 2171 ip6_t *ip6h; 2172 int len; 2173 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2174 2175 2176 if (!pullupmsg(mp, -1)) { 2177 ip1dbg(("ndp_input: pullupmsg failed\n")); 2178 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2179 goto done; 2180 } 2181 ip6h = (ip6_t *)mp->b_rptr; 2182 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2183 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2184 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2185 goto done; 2186 } 2187 /* 2188 * NDP does not accept any extension headers between the 2189 * IP header and the ICMP header since e.g. a routing 2190 * header could be dangerous. 2191 * This assumes that any AH or ESP headers are removed 2192 * by ip prior to passing the packet to ndp_input. 2193 */ 2194 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2195 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2196 ip6h->ip6_nxt)); 2197 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2198 goto done; 2199 } 2200 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2201 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2202 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2203 if (icmp_nd->icmp6_code != 0) { 2204 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2205 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2206 goto done; 2207 } 2208 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2209 /* 2210 * Make sure packet length is large enough for either 2211 * a NS or a NA icmp packet. 2212 */ 2213 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2214 ip1dbg(("ndp_input: packet too short\n")); 2215 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2216 goto done; 2217 } 2218 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2219 ndp_input_solicit(ill, mp, dl_mp); 2220 } else { 2221 ndp_input_advert(ill, mp, dl_mp); 2222 } 2223 done: 2224 freemsg(mp); 2225 } 2226 2227 /* 2228 * nce_xmit is called to form and transmit a ND solicitation or 2229 * advertisement ICMP packet. 2230 * 2231 * If the source address is unspecified and this isn't a probe (used for 2232 * duplicate address detection), an appropriate source address and link layer 2233 * address will be chosen here. The link layer address option is included if 2234 * the source is specified (i.e., all non-probe packets), and omitted (per the 2235 * specification) otherwise. 2236 * 2237 * It returns B_FALSE only if it does a successful put() to the 2238 * corresponding ill's ill_wq otherwise returns B_TRUE. 2239 */ 2240 static boolean_t 2241 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 2242 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 2243 int flag) 2244 { 2245 uint32_t len; 2246 icmp6_t *icmp6; 2247 mblk_t *mp; 2248 ip6_t *ip6h; 2249 nd_opt_hdr_t *opt; 2250 uint_t plen; 2251 ip6i_t *ip6i; 2252 ipif_t *src_ipif = NULL; 2253 uint8_t *hw_addr; 2254 2255 /* 2256 * If we have a unspecified source(sender) address, select a 2257 * proper source address for the solicitation here itself so 2258 * that we can initialize the h/w address correctly. This is 2259 * needed for interface groups as source address can come from 2260 * the whole group and the h/w address initialized from ill will 2261 * be wrong if the source address comes from a different ill. 2262 * 2263 * Note that the NA never comes here with the unspecified source 2264 * address. The following asserts that whenever the source 2265 * address is specified, the haddr also should be specified. 2266 */ 2267 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 2268 2269 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2270 ASSERT(operation != ND_NEIGHBOR_ADVERT); 2271 /* 2272 * Pick a source address for this solicitation, but 2273 * restrict the selection to addresses assigned to the 2274 * output interface (or interface group). We do this 2275 * because the destination will create a neighbor cache 2276 * entry for the source address of this packet, so the 2277 * source address had better be a valid neighbor. 2278 */ 2279 src_ipif = ipif_select_source_v6(ill, target, RESTRICT_TO_ILL, 2280 IPV6_PREFER_SRC_DEFAULT, GLOBAL_ZONEID); 2281 if (src_ipif == NULL) { 2282 char buf[INET6_ADDRSTRLEN]; 2283 2284 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2285 inet_ntop(AF_INET6, (char *)target, buf, 2286 sizeof (buf)))); 2287 return (B_TRUE); 2288 } 2289 sender = &src_ipif->ipif_v6src_addr; 2290 hwaddr_ill = src_ipif->ipif_ill; 2291 } 2292 2293 /* 2294 * Always make sure that the NS/NA packets don't get load 2295 * spread. This is needed so that the probe packets sent 2296 * by the in.mpathd daemon can really go out on the desired 2297 * interface. Probe packets are made to go out on a desired 2298 * interface by including a ip6i with ATTACH_IF flag. As these 2299 * packets indirectly end up sending/receiving NS/NA packets 2300 * (neighbor doing NUD), we have to make sure that NA 2301 * also go out on the same interface. 2302 */ 2303 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7) / 8; 2304 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2305 plen * 8; 2306 mp = allocb(len, BPRI_LO); 2307 if (mp == NULL) { 2308 if (src_ipif != NULL) 2309 ipif_refrele(src_ipif); 2310 return (B_TRUE); 2311 } 2312 bzero((char *)mp->b_rptr, len); 2313 mp->b_wptr = mp->b_rptr + len; 2314 2315 ip6i = (ip6i_t *)mp->b_rptr; 2316 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2317 ip6i->ip6i_nxt = IPPROTO_RAW; 2318 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2319 if (flag & NDP_PROBE) 2320 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2321 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2322 2323 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2324 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2325 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2326 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2327 ip6h->ip6_hops = IPV6_MAX_HOPS; 2328 ip6h->ip6_dst = *target; 2329 icmp6 = (icmp6_t *)&ip6h[1]; 2330 2331 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2332 sizeof (nd_neighbor_advert_t)); 2333 2334 if (operation == ND_NEIGHBOR_SOLICIT) { 2335 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2336 2337 if (!(flag & NDP_PROBE)) 2338 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2339 ip6h->ip6_src = *sender; 2340 ns->nd_ns_target = *target; 2341 if (!(flag & NDP_UNICAST)) { 2342 /* Form multicast address of the target */ 2343 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2344 ip6h->ip6_dst.s6_addr32[3] |= 2345 ns->nd_ns_target.s6_addr32[3]; 2346 } 2347 } else { 2348 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2349 2350 ASSERT(!(flag & NDP_PROBE)); 2351 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2352 ip6h->ip6_src = *sender; 2353 na->nd_na_target = *sender; 2354 if (flag & NDP_ISROUTER) 2355 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2356 if (flag & NDP_SOLICITED) 2357 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2358 if (flag & NDP_ORIDE) 2359 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2360 } 2361 2362 hw_addr = NULL; 2363 if (!(flag & NDP_PROBE)) { 2364 mutex_enter(&hwaddr_ill->ill_lock); 2365 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2366 hwaddr_ill->ill_phys_addr; 2367 if (hw_addr != NULL) { 2368 /* Fill in link layer address and option len */ 2369 opt->nd_opt_len = (uint8_t)plen; 2370 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2371 } 2372 mutex_exit(&hwaddr_ill->ill_lock); 2373 } 2374 if (hw_addr == NULL) { 2375 /* If there's no link layer address option, then strip it. */ 2376 len -= plen * 8; 2377 mp->b_wptr = mp->b_rptr + len; 2378 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2379 } 2380 2381 icmp6->icmp6_type = (uint8_t)operation; 2382 icmp6->icmp6_code = 0; 2383 /* 2384 * Prepare for checksum by putting icmp length in the icmp 2385 * checksum field. The checksum is calculated in ip_wput_v6. 2386 */ 2387 icmp6->icmp6_cksum = ip6h->ip6_plen; 2388 2389 if (src_ipif != NULL) 2390 ipif_refrele(src_ipif); 2391 if (canput(ill->ill_wq)) { 2392 put(ill->ill_wq, mp); 2393 return (B_FALSE); 2394 } 2395 freemsg(mp); 2396 return (B_TRUE); 2397 } 2398 2399 /* 2400 * Make a link layer address (does not include the SAP) from an nce. 2401 * To form the link layer address, use the last four bytes of ipv6 2402 * address passed in and the fixed offset stored in nce. 2403 */ 2404 static void 2405 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2406 { 2407 uchar_t *mask, *to; 2408 ill_t *ill = nce->nce_ill; 2409 int len; 2410 2411 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2412 return; 2413 ASSERT(nce->nce_res_mp != NULL); 2414 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2415 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2416 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2417 ASSERT(addr != NULL); 2418 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2419 addrpos, ill->ill_nd_lla_len); 2420 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2421 IPV6_ADDR_LEN); 2422 mask = (uchar_t *)&nce->nce_extract_mask; 2423 mask += (IPV6_ADDR_LEN - len); 2424 addr += (IPV6_ADDR_LEN - len); 2425 to = addrpos + nce->nce_ll_extract_start; 2426 while (len-- > 0) 2427 *to++ |= *mask++ & *addr++; 2428 } 2429 2430 /* 2431 * Pass a cache report back out via NDD. 2432 */ 2433 /* ARGSUSED */ 2434 int 2435 ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 2436 { 2437 (void) mi_mpprintf(mp, "ifname hardware addr flags" 2438 " proto addr/mask"); 2439 ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp); 2440 return (0); 2441 } 2442 2443 /* 2444 * Add a single line to the NDP Cache Entry Report. 2445 */ 2446 static void 2447 nce_report1(nce_t *nce, uchar_t *mp_arg) 2448 { 2449 ill_t *ill = nce->nce_ill; 2450 char local_buf[INET6_ADDRSTRLEN]; 2451 uchar_t flags_buf[10]; 2452 uint32_t flags = nce->nce_flags; 2453 mblk_t *mp = (mblk_t *)mp_arg; 2454 uchar_t *h; 2455 uchar_t *m = flags_buf; 2456 in6_addr_t v6addr; 2457 2458 /* 2459 * Lock the nce to protect nce_res_mp from being changed 2460 * if an external resolver address resolution completes 2461 * while nce_res_mp is being accessed here. 2462 * 2463 * Deal with all address formats, not just Ethernet-specific 2464 * In addition, make sure that the mblk has enough space 2465 * before writing to it. If is doesn't, allocate a new one. 2466 */ 2467 if (nce->nce_ipversion == IPV4_VERSION) 2468 /* Don't include v4 nce_ts in NDP cache entry report */ 2469 return; 2470 2471 ASSERT(ill != NULL); 2472 v6addr = nce->nce_mask; 2473 if (flags & NCE_F_PERMANENT) 2474 *m++ = 'P'; 2475 if (flags & NCE_F_ISROUTER) 2476 *m++ = 'R'; 2477 if (flags & NCE_F_MAPPING) 2478 *m++ = 'M'; 2479 *m = '\0'; 2480 2481 if (ill->ill_net_type == IRE_IF_RESOLVER) { 2482 size_t addrlen; 2483 char *addr_buf; 2484 dl_unitdata_req_t *dl; 2485 2486 mutex_enter(&nce->nce_lock); 2487 h = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2488 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 2489 if (ill->ill_flags & ILLF_XRESOLV) 2490 addrlen = (3 * (dl->dl_dest_addr_length)); 2491 else 2492 addrlen = (3 * (ill->ill_nd_lla_len)); 2493 if (addrlen <= 0) { 2494 mutex_exit(&nce->nce_lock); 2495 (void) mi_mpprintf(mp, 2496 "%8s %9s %5s %s/%d", 2497 ill->ill_name, 2498 "None", 2499 (uchar_t *)&flags_buf, 2500 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2501 (char *)local_buf, sizeof (local_buf)), 2502 ip_mask_to_plen_v6(&v6addr)); 2503 } else { 2504 /* 2505 * Convert the hardware/lla address to ascii 2506 */ 2507 addr_buf = kmem_zalloc(addrlen, KM_NOSLEEP); 2508 if (addr_buf == NULL) { 2509 mutex_exit(&nce->nce_lock); 2510 return; 2511 } 2512 (void) mac_colon_addr((uint8_t *)h, 2513 (ill->ill_flags & ILLF_XRESOLV) ? 2514 dl->dl_dest_addr_length : ill->ill_nd_lla_len, 2515 addr_buf, addrlen); 2516 mutex_exit(&nce->nce_lock); 2517 (void) mi_mpprintf(mp, "%8s %17s %5s %s/%d", 2518 ill->ill_name, addr_buf, (uchar_t *)&flags_buf, 2519 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2520 (char *)local_buf, sizeof (local_buf)), 2521 ip_mask_to_plen_v6(&v6addr)); 2522 kmem_free(addr_buf, addrlen); 2523 } 2524 } else { 2525 (void) mi_mpprintf(mp, 2526 "%8s %9s %5s %s/%d", 2527 ill->ill_name, 2528 "None", 2529 (uchar_t *)&flags_buf, 2530 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2531 (char *)local_buf, sizeof (local_buf)), 2532 ip_mask_to_plen_v6(&v6addr)); 2533 } 2534 } 2535 2536 mblk_t * 2537 nce_udreq_alloc(ill_t *ill) 2538 { 2539 mblk_t *template_mp = NULL; 2540 dl_unitdata_req_t *dlur; 2541 int sap_length; 2542 2543 ASSERT(ill->ill_isv6); 2544 2545 sap_length = ill->ill_sap_length; 2546 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2547 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2548 if (template_mp == NULL) 2549 return (NULL); 2550 2551 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2552 dlur->dl_priority.dl_min = 0; 2553 dlur->dl_priority.dl_max = 0; 2554 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2555 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2556 2557 /* Copy in the SAP value. */ 2558 NCE_LL_SAP_COPY(ill, template_mp); 2559 2560 return (template_mp); 2561 } 2562 2563 /* 2564 * NDP retransmit timer. 2565 * This timer goes off when: 2566 * a. It is time to retransmit NS for resolver. 2567 * b. It is time to send reachability probes. 2568 */ 2569 void 2570 ndp_timer(void *arg) 2571 { 2572 nce_t *nce = arg; 2573 ill_t *ill = nce->nce_ill; 2574 uint32_t ms; 2575 char addrbuf[INET6_ADDRSTRLEN]; 2576 mblk_t *mp; 2577 boolean_t dropped = B_FALSE; 2578 2579 /* 2580 * The timer has to be cancelled by ndp_delete before doing the final 2581 * refrele. So the NCE is guaranteed to exist when the timer runs 2582 * until it clears the timeout_id. Before clearing the timeout_id 2583 * bump up the refcnt so that we can continue to use the nce 2584 */ 2585 ASSERT(nce != NULL); 2586 2587 /* 2588 * Grab the ill_g_lock now itself to avoid lock order problems. 2589 * nce_solicit needs ill_g_lock to be able to traverse ills 2590 */ 2591 rw_enter(&ill_g_lock, RW_READER); 2592 mutex_enter(&nce->nce_lock); 2593 NCE_REFHOLD_LOCKED(nce); 2594 nce->nce_timeout_id = 0; 2595 2596 /* 2597 * Check the reachability state first. 2598 */ 2599 switch (nce->nce_state) { 2600 case ND_DELAY: 2601 rw_exit(&ill_g_lock); 2602 nce->nce_state = ND_PROBE; 2603 mutex_exit(&nce->nce_lock); 2604 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2605 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2606 if (ip_debug > 3) { 2607 /* ip2dbg */ 2608 pr_addr_dbg("ndp_timer: state for %s changed " 2609 "to PROBE\n", AF_INET6, &nce->nce_addr); 2610 } 2611 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2612 NCE_REFRELE(nce); 2613 return; 2614 case ND_PROBE: 2615 /* must be retransmit timer */ 2616 rw_exit(&ill_g_lock); 2617 nce->nce_pcnt--; 2618 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2619 nce->nce_pcnt >= -1); 2620 if (nce->nce_pcnt > 0) { 2621 /* 2622 * As per RFC2461, the nce gets deleted after 2623 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2624 * Note that the first unicast solicitation is sent 2625 * during the DELAY state. 2626 */ 2627 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2628 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2629 addrbuf, sizeof (addrbuf)))); 2630 mutex_exit(&nce->nce_lock); 2631 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, 2632 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, 2633 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2634 NDP_UNICAST); 2635 if (dropped) { 2636 mutex_enter(&nce->nce_lock); 2637 nce->nce_pcnt++; 2638 mutex_exit(&nce->nce_lock); 2639 } 2640 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2641 } else if (nce->nce_pcnt < 0) { 2642 /* No hope, delete the nce */ 2643 nce->nce_state = ND_UNREACHABLE; 2644 mutex_exit(&nce->nce_lock); 2645 if (ip_debug > 2) { 2646 /* ip1dbg */ 2647 pr_addr_dbg("ndp_timer: Delete IRE for" 2648 " dst %s\n", AF_INET6, &nce->nce_addr); 2649 } 2650 ndp_delete(nce); 2651 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2652 /* Wait RetransTimer, before deleting the entry */ 2653 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2654 nce->nce_pcnt, inet_ntop(AF_INET6, 2655 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2656 mutex_exit(&nce->nce_lock); 2657 /* Wait one interval before killing */ 2658 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2659 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2660 ipif_t *ipif; 2661 2662 /* 2663 * We're done probing, and we can now declare this 2664 * address to be usable. Let IP know that it's ok to 2665 * use. 2666 */ 2667 nce->nce_state = ND_REACHABLE; 2668 mutex_exit(&nce->nce_lock); 2669 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, 2670 ALL_ZONES, NULL, NULL, NULL, NULL); 2671 if (ipif != NULL) { 2672 if (ipif->ipif_was_dup) { 2673 char ibuf[LIFNAMSIZ + 10]; 2674 char sbuf[INET6_ADDRSTRLEN]; 2675 2676 ipif->ipif_was_dup = B_FALSE; 2677 (void) strlcpy(ibuf, ill->ill_name, 2678 sizeof (ibuf)); 2679 (void) inet_ntop(AF_INET6, 2680 &ipif->ipif_v6lcl_addr, 2681 sbuf, sizeof (sbuf)); 2682 if (ipif->ipif_id != 0) { 2683 (void) snprintf(ibuf + 2684 ill->ill_name_length - 1, 2685 sizeof (ibuf) - 2686 ill->ill_name_length + 1, 2687 ":%d", ipif->ipif_id); 2688 } 2689 cmn_err(CE_NOTE, "recovered address " 2690 "%s on %s", sbuf, ibuf); 2691 } 2692 if ((ipif->ipif_flags & IPIF_UP) && 2693 !ipif->ipif_addr_ready) { 2694 ip_rts_ifmsg(ipif); 2695 ip_rts_newaddrmsg(RTM_ADD, 0, ipif); 2696 sctp_update_ipif(ipif, SCTP_IPIF_UP); 2697 } 2698 ipif->ipif_addr_ready = 1; 2699 ipif_refrele(ipif); 2700 } 2701 /* Begin defending our new address */ 2702 nce->nce_unsolicit_count = 0; 2703 dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, 2704 B_FALSE, &nce->nce_addr, &ipv6_all_hosts_mcast, 2705 nce_advert_flags(nce)); 2706 if (dropped) { 2707 nce->nce_unsolicit_count = 1; 2708 NDP_RESTART_TIMER(nce, 2709 ip_ndp_unsolicit_interval); 2710 } else if (ip_ndp_defense_interval != 0) { 2711 NDP_RESTART_TIMER(nce, ip_ndp_defense_interval); 2712 } 2713 } else { 2714 /* 2715 * This is an address we're probing to be our own, but 2716 * the ill is down. Wait until it comes back before 2717 * doing anything, but switch to reachable state so 2718 * that the restart will work. 2719 */ 2720 nce->nce_state = ND_REACHABLE; 2721 mutex_exit(&nce->nce_lock); 2722 } 2723 NCE_REFRELE(nce); 2724 return; 2725 case ND_INCOMPLETE: 2726 /* 2727 * Must be resolvers retransmit timer. 2728 */ 2729 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2730 ip6i_t *ip6i; 2731 ip6_t *ip6h; 2732 mblk_t *data_mp; 2733 2734 /* 2735 * Walk the list of packets queued, and see if there 2736 * are any multipathing probe packets. Such packets 2737 * are always queued at the head. Since this is a 2738 * retransmit timer firing, mark such packets as 2739 * delayed in ND resolution. This info will be used 2740 * in ip_wput_v6(). Multipathing probe packets will 2741 * always have an ip6i_t. Once we hit a packet without 2742 * it, we can break out of this loop. 2743 */ 2744 if (mp->b_datap->db_type == M_CTL) 2745 data_mp = mp->b_cont; 2746 else 2747 data_mp = mp; 2748 2749 ip6h = (ip6_t *)data_mp->b_rptr; 2750 if (ip6h->ip6_nxt != IPPROTO_RAW) 2751 break; 2752 2753 /* 2754 * This message should have been pulled up already in 2755 * ip_wput_v6. We can't do pullups here because the 2756 * b_next/b_prev is non-NULL. 2757 */ 2758 ip6i = (ip6i_t *)ip6h; 2759 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2760 sizeof (ip6i_t) + IPV6_HDR_LEN); 2761 2762 /* Mark this packet as delayed due to ND resolution */ 2763 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2764 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2765 } 2766 if (nce->nce_qd_mp != NULL) { 2767 ms = nce_solicit(nce, NULL); 2768 rw_exit(&ill_g_lock); 2769 if (ms == 0) { 2770 if (nce->nce_state != ND_REACHABLE) { 2771 mutex_exit(&nce->nce_lock); 2772 nce_resolv_failed(nce); 2773 ndp_delete(nce); 2774 } else { 2775 mutex_exit(&nce->nce_lock); 2776 } 2777 } else { 2778 mutex_exit(&nce->nce_lock); 2779 NDP_RESTART_TIMER(nce, (clock_t)ms); 2780 } 2781 NCE_REFRELE(nce); 2782 return; 2783 } 2784 mutex_exit(&nce->nce_lock); 2785 rw_exit(&ill_g_lock); 2786 NCE_REFRELE(nce); 2787 break; 2788 case ND_REACHABLE : 2789 rw_exit(&ill_g_lock); 2790 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2791 nce->nce_unsolicit_count != 0) || 2792 ((nce->nce_flags & NCE_F_PERMANENT) && 2793 ip_ndp_defense_interval != 0)) { 2794 if (nce->nce_unsolicit_count > 0) 2795 nce->nce_unsolicit_count--; 2796 mutex_exit(&nce->nce_lock); 2797 dropped = nce_xmit(ill, 2798 ND_NEIGHBOR_ADVERT, 2799 ill, /* ill to be used for hw addr */ 2800 B_FALSE, /* use ill_phys_addr */ 2801 &nce->nce_addr, 2802 &ipv6_all_hosts_mcast, 2803 nce_advert_flags(nce)); 2804 if (dropped) { 2805 mutex_enter(&nce->nce_lock); 2806 nce->nce_unsolicit_count++; 2807 mutex_exit(&nce->nce_lock); 2808 } 2809 if (nce->nce_unsolicit_count != 0) { 2810 NDP_RESTART_TIMER(nce, 2811 ip_ndp_unsolicit_interval); 2812 } else { 2813 NDP_RESTART_TIMER(nce, 2814 ip_ndp_defense_interval); 2815 } 2816 } else { 2817 mutex_exit(&nce->nce_lock); 2818 } 2819 NCE_REFRELE(nce); 2820 break; 2821 default: 2822 rw_exit(&ill_g_lock); 2823 mutex_exit(&nce->nce_lock); 2824 NCE_REFRELE(nce); 2825 break; 2826 } 2827 } 2828 2829 /* 2830 * Set a link layer address from the ll_addr passed in. 2831 * Copy SAP from ill. 2832 */ 2833 static void 2834 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2835 { 2836 ill_t *ill = nce->nce_ill; 2837 uchar_t *woffset; 2838 2839 ASSERT(ll_addr != NULL); 2840 /* Always called before fast_path_probe */ 2841 ASSERT(nce->nce_fp_mp == NULL); 2842 if (ill->ill_sap_length != 0) { 2843 /* 2844 * Copy the SAP type specified in the 2845 * request into the xmit template. 2846 */ 2847 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2848 } 2849 if (ill->ill_phys_addr_length > 0) { 2850 /* 2851 * The bcopy() below used to be called for the physical address 2852 * length rather than the link layer address length. For 2853 * ethernet and many other media, the phys_addr and lla are 2854 * identical. 2855 * However, with xresolv interfaces being introduced, the 2856 * phys_addr and lla are no longer the same, and the physical 2857 * address may not have any useful meaning, so we use the lla 2858 * for IPv6 address resolution and destination addressing. 2859 * 2860 * For PPP or other interfaces with a zero length 2861 * physical address, don't do anything here. 2862 * The bcopy() with a zero phys_addr length was previously 2863 * a no-op for interfaces with a zero-length physical address. 2864 * Using the lla for them would change the way they operate. 2865 * Doing nothing in such cases preserves expected behavior. 2866 */ 2867 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2868 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2869 } 2870 } 2871 2872 static boolean_t 2873 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2874 { 2875 ill_t *ill = nce->nce_ill; 2876 uchar_t *ll_offset; 2877 2878 ASSERT(nce->nce_res_mp != NULL); 2879 if (ll_addr == NULL) 2880 return (B_FALSE); 2881 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2882 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2883 return (B_TRUE); 2884 return (B_FALSE); 2885 } 2886 2887 /* 2888 * Updates the link layer address or the reachability state of 2889 * a cache entry. Reset probe counter if needed. 2890 */ 2891 static void 2892 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2893 { 2894 ill_t *ill = nce->nce_ill; 2895 boolean_t need_stop_timer = B_FALSE; 2896 boolean_t need_fastpath_update = B_FALSE; 2897 2898 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2899 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2900 /* 2901 * If this interface does not do NUD, there is no point 2902 * in allowing an update to the cache entry. Although 2903 * we will respond to NS. 2904 * The only time we accept an update for a resolver when 2905 * NUD is turned off is when it has just been created. 2906 * Non-Resolvers will always be created as REACHABLE. 2907 */ 2908 if (new_state != ND_UNCHANGED) { 2909 if ((nce->nce_flags & NCE_F_NONUD) && 2910 (nce->nce_state != ND_INCOMPLETE)) 2911 return; 2912 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2913 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2914 need_stop_timer = B_TRUE; 2915 if (new_state == ND_REACHABLE) 2916 nce->nce_last = TICK_TO_MSEC(lbolt64); 2917 else { 2918 /* We force NUD in this case */ 2919 nce->nce_last = 0; 2920 } 2921 nce->nce_state = new_state; 2922 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2923 } 2924 /* 2925 * In case of fast path we need to free the the fastpath 2926 * M_DATA and do another probe. Otherwise we can just 2927 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2928 * whatever packets that happens to be transmitting at the time. 2929 */ 2930 if (new_ll_addr != NULL) { 2931 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2932 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2933 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2934 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2935 if (nce->nce_fp_mp != NULL) { 2936 freemsg(nce->nce_fp_mp); 2937 nce->nce_fp_mp = NULL; 2938 } 2939 need_fastpath_update = B_TRUE; 2940 } 2941 mutex_exit(&nce->nce_lock); 2942 if (need_stop_timer) { 2943 (void) untimeout(nce->nce_timeout_id); 2944 nce->nce_timeout_id = 0; 2945 } 2946 if (need_fastpath_update) 2947 nce_fastpath(nce); 2948 mutex_enter(&nce->nce_lock); 2949 } 2950 2951 void 2952 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2953 { 2954 uint_t count = 0; 2955 mblk_t **mpp; 2956 2957 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2958 2959 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2960 mpp = &(*mpp)->b_next) { 2961 if (++count > 2962 nce->nce_ill->ill_max_buf) { 2963 mblk_t *tmp = nce->nce_qd_mp->b_next; 2964 2965 nce->nce_qd_mp->b_next = NULL; 2966 nce->nce_qd_mp->b_prev = NULL; 2967 freemsg(nce->nce_qd_mp); 2968 nce->nce_qd_mp = tmp; 2969 } 2970 } 2971 /* put this on the list */ 2972 if (head_insert) { 2973 mp->b_next = nce->nce_qd_mp; 2974 nce->nce_qd_mp = mp; 2975 } else { 2976 *mpp = mp; 2977 } 2978 } 2979 2980 static void 2981 nce_queue_mp(nce_t *nce, mblk_t *mp) 2982 { 2983 boolean_t head_insert = B_FALSE; 2984 ip6_t *ip6h; 2985 ip6i_t *ip6i; 2986 mblk_t *data_mp; 2987 2988 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2989 2990 if (mp->b_datap->db_type == M_CTL) 2991 data_mp = mp->b_cont; 2992 else 2993 data_mp = mp; 2994 ip6h = (ip6_t *)data_mp->b_rptr; 2995 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2996 /* 2997 * This message should have been pulled up already in 2998 * ip_wput_v6. We can't do pullups here because the message 2999 * could be from the nce_qd_mp which could have b_next/b_prev 3000 * non-NULL. 3001 */ 3002 ip6i = (ip6i_t *)ip6h; 3003 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 3004 sizeof (ip6i_t) + IPV6_HDR_LEN); 3005 /* 3006 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 3007 * This has 2 aspects mentioned below. 3008 * 1. Perform head insertion in the nce_qd_mp for these packets. 3009 * This ensures that next retransmit of ND solicitation 3010 * will use the interface specified by the probe packet, 3011 * for both NS and NA. This corresponds to the src address 3012 * in the IPv6 packet. If we insert at tail, we will be 3013 * depending on the packet at the head for successful 3014 * ND resolution. This is not reliable, because the interface 3015 * on which the NA arrives could be different from the interface 3016 * on which the NS was sent, and if the receiving interface is 3017 * failed, it will appear that the sending interface is also 3018 * failed, causing in.mpathd to misdiagnose this as link 3019 * failure. 3020 * 2. Drop the original packet, if the ND resolution did not 3021 * succeed in the first attempt. However we will create the 3022 * nce and the ire, as soon as the ND resolution succeeds. 3023 * We don't gain anything by queueing multiple probe packets 3024 * and sending them back-to-back once resolution succeeds. 3025 * It is sufficient to send just 1 packet after ND resolution 3026 * succeeds. Since mpathd is sending down probe packets at a 3027 * constant rate, we don't need to send the queued packet. We 3028 * need to queue it only for NDP resolution. The benefit of 3029 * dropping the probe packets that were delayed in ND 3030 * resolution, is that in.mpathd will not see inflated 3031 * RTT. If the ND resolution does not succeed within 3032 * in.mpathd's failure detection time, mpathd may detect 3033 * a failure, and it does not matter whether the packet 3034 * was queued or dropped. 3035 */ 3036 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 3037 head_insert = B_TRUE; 3038 } 3039 3040 nce_queue_mp_common(nce, mp, head_insert); 3041 } 3042 3043 /* 3044 * Called when address resolution failed due to a timeout. 3045 * Send an ICMP unreachable in response to all queued packets. 3046 */ 3047 void 3048 nce_resolv_failed(nce_t *nce) 3049 { 3050 mblk_t *mp, *nxt_mp, *first_mp; 3051 char buf[INET6_ADDRSTRLEN]; 3052 ip6_t *ip6h; 3053 zoneid_t zoneid = GLOBAL_ZONEID; 3054 3055 ip1dbg(("nce_resolv_failed: dst %s\n", 3056 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 3057 mutex_enter(&nce->nce_lock); 3058 mp = nce->nce_qd_mp; 3059 nce->nce_qd_mp = NULL; 3060 mutex_exit(&nce->nce_lock); 3061 while (mp != NULL) { 3062 nxt_mp = mp->b_next; 3063 mp->b_next = NULL; 3064 mp->b_prev = NULL; 3065 3066 first_mp = mp; 3067 if (mp->b_datap->db_type == M_CTL) { 3068 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 3069 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3070 zoneid = io->ipsec_out_zoneid; 3071 ASSERT(zoneid != ALL_ZONES); 3072 mp = mp->b_cont; 3073 } 3074 3075 ip6h = (ip6_t *)mp->b_rptr; 3076 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3077 ip6i_t *ip6i; 3078 /* 3079 * This message should have been pulled up already 3080 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 3081 * the header is pulled up. 3082 */ 3083 ip6i = (ip6i_t *)ip6h; 3084 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 3085 sizeof (ip6i_t) + IPV6_HDR_LEN); 3086 mp->b_rptr += sizeof (ip6i_t); 3087 } 3088 /* 3089 * Ignore failure since icmp_unreachable_v6 will silently 3090 * drop packets with an unspecified source address. 3091 */ 3092 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid); 3093 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 3094 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid); 3095 mp = nxt_mp; 3096 } 3097 } 3098 3099 /* 3100 * Called by SIOCSNDP* ioctl to add/change an nce entry 3101 * and the corresponding attributes. 3102 * Disallow states other than ND_REACHABLE or ND_STALE. 3103 */ 3104 int 3105 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 3106 { 3107 sin6_t *sin6; 3108 in6_addr_t *addr; 3109 nce_t *nce; 3110 int err; 3111 uint16_t new_flags = 0; 3112 uint16_t old_flags = 0; 3113 int inflags = lnr->lnr_flags; 3114 3115 ASSERT(ill->ill_isv6); 3116 if ((lnr->lnr_state_create != ND_REACHABLE) && 3117 (lnr->lnr_state_create != ND_STALE)) 3118 return (EINVAL); 3119 3120 sin6 = (sin6_t *)&lnr->lnr_addr; 3121 addr = &sin6->sin6_addr; 3122 3123 mutex_enter(&ndp6.ndp_g_lock); 3124 /* We know it can not be mapping so just look in the hash table */ 3125 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); 3126 nce = nce_lookup_addr(ill, addr, nce); 3127 if (nce != NULL) 3128 new_flags = nce->nce_flags; 3129 3130 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 3131 case NDF_ISROUTER_ON: 3132 new_flags |= NCE_F_ISROUTER; 3133 break; 3134 case NDF_ISROUTER_OFF: 3135 new_flags &= ~NCE_F_ISROUTER; 3136 break; 3137 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3138 mutex_exit(&ndp6.ndp_g_lock); 3139 if (nce != NULL) 3140 NCE_REFRELE(nce); 3141 return (EINVAL); 3142 } 3143 3144 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3145 case NDF_ANYCAST_ON: 3146 new_flags |= NCE_F_ANYCAST; 3147 break; 3148 case NDF_ANYCAST_OFF: 3149 new_flags &= ~NCE_F_ANYCAST; 3150 break; 3151 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3152 mutex_exit(&ndp6.ndp_g_lock); 3153 if (nce != NULL) 3154 NCE_REFRELE(nce); 3155 return (EINVAL); 3156 } 3157 3158 switch (inflags & (NDF_PROXY_ON|NDF_PROXY_OFF)) { 3159 case NDF_PROXY_ON: 3160 new_flags |= NCE_F_PROXY; 3161 break; 3162 case NDF_PROXY_OFF: 3163 new_flags &= ~NCE_F_PROXY; 3164 break; 3165 case (NDF_PROXY_OFF|NDF_PROXY_ON): 3166 mutex_exit(&ndp6.ndp_g_lock); 3167 if (nce != NULL) 3168 NCE_REFRELE(nce); 3169 return (EINVAL); 3170 } 3171 3172 if (nce == NULL) { 3173 err = ndp_add(ill, 3174 (uchar_t *)lnr->lnr_hdw_addr, 3175 addr, 3176 &ipv6_all_ones, 3177 &ipv6_all_zeros, 3178 0, 3179 new_flags, 3180 lnr->lnr_state_create, 3181 &nce, 3182 NULL, 3183 NULL); 3184 if (err != 0) { 3185 mutex_exit(&ndp6.ndp_g_lock); 3186 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3187 return (err); 3188 } 3189 } 3190 old_flags = nce->nce_flags; 3191 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3192 /* 3193 * Router turned to host, delete all ires. 3194 * XXX Just delete the entry, but we need to add too. 3195 */ 3196 nce->nce_flags &= ~NCE_F_ISROUTER; 3197 mutex_exit(&ndp6.ndp_g_lock); 3198 ndp_delete(nce); 3199 NCE_REFRELE(nce); 3200 return (0); 3201 } 3202 mutex_exit(&ndp6.ndp_g_lock); 3203 3204 mutex_enter(&nce->nce_lock); 3205 nce->nce_flags = new_flags; 3206 mutex_exit(&nce->nce_lock); 3207 /* 3208 * Note that we ignore the state at this point, which 3209 * should be either STALE or REACHABLE. Instead we let 3210 * the link layer address passed in to determine the state 3211 * much like incoming packets. 3212 */ 3213 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3214 NCE_REFRELE(nce); 3215 return (0); 3216 } 3217 3218 /* 3219 * If the device driver supports it, we make nce_fp_mp to have 3220 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3221 * The caller insures there is hold on nce for this function. 3222 * Note that since ill_fastpath_probe() copies the mblk there is 3223 * no need for the hold beyond this function. 3224 */ 3225 static void 3226 nce_fastpath(nce_t *nce) 3227 { 3228 ill_t *ill = nce->nce_ill; 3229 int res; 3230 3231 ASSERT(ill != NULL); 3232 if (nce->nce_fp_mp != NULL) { 3233 /* Already contains fastpath info */ 3234 return; 3235 } 3236 if (nce->nce_res_mp != NULL) { 3237 nce_fastpath_list_add(nce); 3238 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3239 /* 3240 * EAGAIN is an indication of a transient error 3241 * i.e. allocation failure etc. leave the nce in the list it 3242 * will be updated when another probe happens for another ire 3243 * if not it will be taken out of the list when the ire is 3244 * deleted. 3245 */ 3246 3247 if (res != 0 && res != EAGAIN) 3248 nce_fastpath_list_delete(nce); 3249 } 3250 } 3251 3252 /* 3253 * Drain the list of nce's waiting for fastpath response. 3254 */ 3255 void 3256 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3257 void *arg) 3258 { 3259 3260 nce_t *next_nce; 3261 nce_t *current_nce; 3262 nce_t *first_nce; 3263 nce_t *prev_nce = NULL; 3264 3265 ASSERT(ill != NULL && ill->ill_isv6); 3266 3267 mutex_enter(&ill->ill_lock); 3268 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3269 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3270 next_nce = current_nce->nce_fastpath; 3271 /* 3272 * Take it off the list if we're flushing, or if the callback 3273 * routine tells us to do so. Otherwise, leave the nce in the 3274 * fastpath list to handle any pending response from the lower 3275 * layer. We can't drain the list when the callback routine 3276 * comparison failed, because the response is asynchronous in 3277 * nature, and may not arrive in the same order as the list 3278 * insertion. 3279 */ 3280 if (func == NULL || func(current_nce, arg)) { 3281 current_nce->nce_fastpath = NULL; 3282 if (current_nce == first_nce) 3283 ill->ill_fastpath_list = first_nce = next_nce; 3284 else 3285 prev_nce->nce_fastpath = next_nce; 3286 } else { 3287 /* previous element that is still in the list */ 3288 prev_nce = current_nce; 3289 } 3290 current_nce = next_nce; 3291 } 3292 mutex_exit(&ill->ill_lock); 3293 } 3294 3295 /* 3296 * Add nce to the nce fastpath list. 3297 */ 3298 void 3299 nce_fastpath_list_add(nce_t *nce) 3300 { 3301 ill_t *ill; 3302 3303 ill = nce->nce_ill; 3304 ASSERT(ill != NULL && ill->ill_isv6); 3305 3306 mutex_enter(&ill->ill_lock); 3307 mutex_enter(&nce->nce_lock); 3308 3309 /* 3310 * if nce has not been deleted and 3311 * is not already in the list add it. 3312 */ 3313 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3314 (nce->nce_fastpath == NULL)) { 3315 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3316 ill->ill_fastpath_list = nce; 3317 } 3318 3319 mutex_exit(&nce->nce_lock); 3320 mutex_exit(&ill->ill_lock); 3321 } 3322 3323 /* 3324 * remove nce from the nce fastpath list. 3325 */ 3326 void 3327 nce_fastpath_list_delete(nce_t *nce) 3328 { 3329 nce_t *nce_ptr; 3330 3331 ill_t *ill; 3332 3333 ill = nce->nce_ill; 3334 ASSERT(ill != NULL); 3335 if (!ill->ill_isv6) { 3336 /* 3337 * v4 nce_t's do not have nce_fastpath set. 3338 */ 3339 return; 3340 } 3341 3342 mutex_enter(&ill->ill_lock); 3343 if (nce->nce_fastpath == NULL) 3344 goto done; 3345 3346 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3347 3348 if (ill->ill_fastpath_list == nce) { 3349 ill->ill_fastpath_list = nce->nce_fastpath; 3350 } else { 3351 nce_ptr = ill->ill_fastpath_list; 3352 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3353 if (nce_ptr->nce_fastpath == nce) { 3354 nce_ptr->nce_fastpath = nce->nce_fastpath; 3355 break; 3356 } 3357 nce_ptr = nce_ptr->nce_fastpath; 3358 } 3359 } 3360 3361 nce->nce_fastpath = NULL; 3362 done: 3363 mutex_exit(&ill->ill_lock); 3364 } 3365 3366 /* 3367 * Update all NCE's that are not in fastpath mode and 3368 * have an nce_fp_mp that matches mp. mp->b_cont contains 3369 * the fastpath header. 3370 * 3371 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3372 */ 3373 boolean_t 3374 ndp_fastpath_update(nce_t *nce, void *arg) 3375 { 3376 mblk_t *mp, *fp_mp; 3377 uchar_t *mp_rptr, *ud_mp_rptr; 3378 mblk_t *ud_mp = nce->nce_res_mp; 3379 ptrdiff_t cmplen; 3380 3381 if (nce->nce_flags & NCE_F_MAPPING) 3382 return (B_TRUE); 3383 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3384 return (B_TRUE); 3385 3386 ip2dbg(("ndp_fastpath_update: trying\n")); 3387 mp = (mblk_t *)arg; 3388 mp_rptr = mp->b_rptr; 3389 cmplen = mp->b_wptr - mp_rptr; 3390 ASSERT(cmplen >= 0); 3391 ud_mp_rptr = ud_mp->b_rptr; 3392 /* 3393 * The nce is locked here to prevent any other threads 3394 * from accessing and changing nce_res_mp when the IPv6 address 3395 * becomes resolved to an lla while we're in the middle 3396 * of looking at and comparing the hardware address (lla). 3397 * It is also locked to prevent multiple threads in nce_fastpath_update 3398 * from examining nce_res_mp atthe same time. 3399 */ 3400 mutex_enter(&nce->nce_lock); 3401 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3402 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3403 mutex_exit(&nce->nce_lock); 3404 /* 3405 * Don't take the ire off the fastpath list yet, 3406 * since the response may come later. 3407 */ 3408 return (B_FALSE); 3409 } 3410 /* Matched - install mp as the fastpath mp */ 3411 ip1dbg(("ndp_fastpath_update: match\n")); 3412 fp_mp = dupb(mp->b_cont); 3413 if (fp_mp != NULL) { 3414 nce->nce_fp_mp = fp_mp; 3415 } 3416 mutex_exit(&nce->nce_lock); 3417 return (B_TRUE); 3418 } 3419 3420 /* 3421 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3422 * driver. Note that it assumes IP is exclusive... 3423 */ 3424 /* ARGSUSED */ 3425 void 3426 ndp_fastpath_flush(nce_t *nce, char *arg) 3427 { 3428 if (nce->nce_flags & NCE_F_MAPPING) 3429 return; 3430 /* No fastpath info? */ 3431 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3432 return; 3433 3434 /* Just delete the NCE... */ 3435 ndp_delete(nce); 3436 } 3437 3438 /* 3439 * Return a pointer to a given option in the packet. 3440 * Assumes that option part of the packet have already been validated. 3441 */ 3442 nd_opt_hdr_t * 3443 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3444 { 3445 while (optlen > 0) { 3446 if (opt->nd_opt_type == opt_type) 3447 return (opt); 3448 optlen -= 8 * opt->nd_opt_len; 3449 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3450 } 3451 return (NULL); 3452 } 3453 3454 /* 3455 * Verify all option lengths present are > 0, also check to see 3456 * if the option lengths and packet length are consistent. 3457 */ 3458 boolean_t 3459 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3460 { 3461 ASSERT(opt != NULL); 3462 while (optlen > 0) { 3463 if (opt->nd_opt_len == 0) 3464 return (B_FALSE); 3465 optlen -= 8 * opt->nd_opt_len; 3466 if (optlen < 0) 3467 return (B_FALSE); 3468 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3469 } 3470 return (B_TRUE); 3471 } 3472 3473 /* 3474 * ndp_walk function. 3475 * Free a fraction of the NCE cache entries. 3476 * A fraction of zero means to not free any in that category. 3477 */ 3478 void 3479 ndp_cache_reclaim(nce_t *nce, char *arg) 3480 { 3481 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3482 uint_t rand; 3483 3484 if (nce->nce_flags & NCE_F_PERMANENT) 3485 return; 3486 3487 rand = (uint_t)lbolt + 3488 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3489 if (ncr->ncr_host != 0 && 3490 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3491 ndp_delete(nce); 3492 return; 3493 } 3494 } 3495 3496 /* 3497 * ndp_walk function. 3498 * Count the number of NCEs that can be deleted. 3499 * These would be hosts but not routers. 3500 */ 3501 void 3502 ndp_cache_count(nce_t *nce, char *arg) 3503 { 3504 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3505 3506 if (nce->nce_flags & NCE_F_PERMANENT) 3507 return; 3508 3509 ncc->ncc_total++; 3510 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3511 ncc->ncc_host++; 3512 } 3513 3514 #ifdef NCE_DEBUG 3515 th_trace_t * 3516 th_trace_nce_lookup(nce_t *nce) 3517 { 3518 int bucket_id; 3519 th_trace_t *th_trace; 3520 3521 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3522 3523 bucket_id = IP_TR_HASH(curthread); 3524 ASSERT(bucket_id < IP_TR_HASH_MAX); 3525 3526 for (th_trace = nce->nce_trace[bucket_id]; th_trace != NULL; 3527 th_trace = th_trace->th_next) { 3528 if (th_trace->th_id == curthread) 3529 return (th_trace); 3530 } 3531 return (NULL); 3532 } 3533 3534 void 3535 nce_trace_ref(nce_t *nce) 3536 { 3537 int bucket_id; 3538 th_trace_t *th_trace; 3539 3540 /* 3541 * Attempt to locate the trace buffer for the curthread. 3542 * If it does not exist, then allocate a new trace buffer 3543 * and link it in list of trace bufs for this ipif, at the head 3544 */ 3545 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3546 3547 if (nce->nce_trace_disable == B_TRUE) 3548 return; 3549 3550 th_trace = th_trace_nce_lookup(nce); 3551 if (th_trace == NULL) { 3552 bucket_id = IP_TR_HASH(curthread); 3553 th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), 3554 KM_NOSLEEP); 3555 if (th_trace == NULL) { 3556 nce->nce_trace_disable = B_TRUE; 3557 nce_trace_inactive(nce); 3558 return; 3559 } 3560 th_trace->th_id = curthread; 3561 th_trace->th_next = nce->nce_trace[bucket_id]; 3562 th_trace->th_prev = &nce->nce_trace[bucket_id]; 3563 if (th_trace->th_next != NULL) 3564 th_trace->th_next->th_prev = &th_trace->th_next; 3565 nce->nce_trace[bucket_id] = th_trace; 3566 } 3567 ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1); 3568 th_trace->th_refcnt++; 3569 th_trace_rrecord(th_trace); 3570 } 3571 3572 void 3573 nce_untrace_ref(nce_t *nce) 3574 { 3575 th_trace_t *th_trace; 3576 3577 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3578 3579 if (nce->nce_trace_disable == B_TRUE) 3580 return; 3581 3582 th_trace = th_trace_nce_lookup(nce); 3583 ASSERT(th_trace != NULL && th_trace->th_refcnt > 0); 3584 3585 th_trace_rrecord(th_trace); 3586 th_trace->th_refcnt--; 3587 } 3588 3589 void 3590 nce_trace_inactive(nce_t *nce) 3591 { 3592 th_trace_t *th_trace; 3593 int i; 3594 3595 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3596 3597 for (i = 0; i < IP_TR_HASH_MAX; i++) { 3598 while (nce->nce_trace[i] != NULL) { 3599 th_trace = nce->nce_trace[i]; 3600 3601 /* unlink th_trace and free it */ 3602 nce->nce_trace[i] = th_trace->th_next; 3603 if (th_trace->th_next != NULL) 3604 th_trace->th_next->th_prev = 3605 &nce->nce_trace[i]; 3606 3607 th_trace->th_next = NULL; 3608 th_trace->th_prev = NULL; 3609 kmem_free(th_trace, sizeof (th_trace_t)); 3610 } 3611 } 3612 3613 } 3614 3615 /* ARGSUSED */ 3616 int 3617 nce_thread_exit(nce_t *nce, caddr_t arg) 3618 { 3619 th_trace_t *th_trace; 3620 3621 mutex_enter(&nce->nce_lock); 3622 th_trace = th_trace_nce_lookup(nce); 3623 3624 if (th_trace == NULL) { 3625 mutex_exit(&nce->nce_lock); 3626 return (0); 3627 } 3628 3629 ASSERT(th_trace->th_refcnt == 0); 3630 3631 /* unlink th_trace and free it */ 3632 *th_trace->th_prev = th_trace->th_next; 3633 if (th_trace->th_next != NULL) 3634 th_trace->th_next->th_prev = th_trace->th_prev; 3635 th_trace->th_next = NULL; 3636 th_trace->th_prev = NULL; 3637 kmem_free(th_trace, sizeof (th_trace_t)); 3638 mutex_exit(&nce->nce_lock); 3639 return (0); 3640 } 3641 #endif 3642 3643 /* 3644 * Called when address resolution fails due to a timeout. 3645 * Send an ICMP unreachable in response to all queued packets. 3646 */ 3647 void 3648 arp_resolv_failed(nce_t *nce) 3649 { 3650 mblk_t *mp, *nxt_mp, *first_mp; 3651 char buf[INET6_ADDRSTRLEN]; 3652 zoneid_t zoneid = GLOBAL_ZONEID; 3653 struct in_addr ipv4addr; 3654 3655 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3656 ip3dbg(("arp_resolv_failed: dst %s\n", 3657 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3658 mutex_enter(&nce->nce_lock); 3659 mp = nce->nce_qd_mp; 3660 nce->nce_qd_mp = NULL; 3661 mutex_exit(&nce->nce_lock); 3662 3663 while (mp != NULL) { 3664 nxt_mp = mp->b_next; 3665 mp->b_next = NULL; 3666 mp->b_prev = NULL; 3667 3668 first_mp = mp; 3669 /* 3670 * Send icmp unreachable messages 3671 * to the hosts. 3672 */ 3673 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid); 3674 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3675 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3676 ICMP_HOST_UNREACHABLE, zoneid); 3677 mp = nxt_mp; 3678 } 3679 } 3680 3681 static int 3682 ndp_lookup_then_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, 3683 const in_addr_t *mask, const in_addr_t *extract_mask, 3684 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 3685 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 3686 { 3687 int err = 0; 3688 nce_t *nce; 3689 in6_addr_t addr6; 3690 3691 mutex_enter(&ndp4.ndp_g_lock); 3692 nce = *((nce_t **)NCE_HASH_PTR_V4(*addr)); 3693 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3694 nce = nce_lookup_addr(ill, &addr6, nce); 3695 if (nce == NULL) { 3696 err = ndp_add_v4(ill, 3697 hw_addr, 3698 addr, 3699 mask, 3700 extract_mask, 3701 hw_extract_start, 3702 flags, 3703 state, 3704 newnce, 3705 fp_mp, 3706 res_mp); 3707 } else { 3708 *newnce = nce; 3709 err = EEXIST; 3710 } 3711 mutex_exit(&ndp4.ndp_g_lock); 3712 return (err); 3713 } 3714 3715 /* 3716 * NDP Cache Entry creation routine for IPv4. 3717 * Mapped entries are handled in arp. 3718 * This routine must always be called with ndp4.ndp_g_lock held. 3719 * Prior to return, nce_refcnt is incremented. 3720 */ 3721 static int 3722 ndp_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, 3723 const in_addr_t *mask, const in_addr_t *extract_mask, 3724 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 3725 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 3726 { 3727 static nce_t nce_nil; 3728 nce_t *nce; 3729 mblk_t *mp; 3730 mblk_t *template; 3731 nce_t **ncep; 3732 3733 ASSERT(MUTEX_HELD(&ndp4.ndp_g_lock)); 3734 ASSERT(ill != NULL); 3735 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 3736 return (EINVAL); 3737 } 3738 ASSERT((flags & NCE_F_MAPPING) == 0); 3739 ASSERT(extract_mask == NULL); 3740 /* 3741 * Allocate the mblk to hold the nce. 3742 */ 3743 mp = allocb(sizeof (nce_t), BPRI_MED); 3744 if (mp == NULL) 3745 return (ENOMEM); 3746 3747 nce = (nce_t *)mp->b_rptr; 3748 mp->b_wptr = (uchar_t *)&nce[1]; 3749 *nce = nce_nil; 3750 3751 /* 3752 * This one holds link layer address; if res_mp has been provided 3753 * by the caller, accept it without any further checks. Otherwise, 3754 * for V4, we fill it up with ill_resolver_mp here, then in 3755 * in ire_arpresolve(), we fill it up with the ARP query 3756 * once its formulated. 3757 */ 3758 if (res_mp != NULL) { 3759 template = res_mp; 3760 } else { 3761 if (ill->ill_resolver_mp == NULL) { 3762 freeb(mp); 3763 return (EINVAL); 3764 } 3765 template = copyb(ill->ill_resolver_mp); 3766 } 3767 if (template == NULL) { 3768 freeb(mp); 3769 return (ENOMEM); 3770 } 3771 nce->nce_ill = ill; 3772 nce->nce_ipversion = IPV4_VERSION; 3773 nce->nce_flags = flags; 3774 nce->nce_state = state; 3775 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3776 nce->nce_rcnt = ill->ill_xmit_count; 3777 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3778 if (*mask == IP_HOST_MASK) { 3779 nce->nce_mask = ipv6_all_ones; 3780 } else { 3781 IN6_IPADDR_TO_V4MAPPED(*mask, &nce->nce_mask); 3782 } 3783 nce->nce_extract_mask = ipv6_all_zeros; 3784 nce->nce_ll_extract_start = hw_extract_start; 3785 nce->nce_fp_mp = (fp_mp? fp_mp : NULL); 3786 nce->nce_res_mp = template; 3787 if (state == ND_REACHABLE) 3788 nce->nce_last = TICK_TO_MSEC(lbolt64); 3789 else 3790 nce->nce_last = 0; 3791 nce->nce_qd_mp = NULL; 3792 nce->nce_mp = mp; 3793 if (hw_addr != NULL) 3794 nce_set_ll(nce, hw_addr); 3795 /* This one is for nce getting created */ 3796 nce->nce_refcnt = 1; 3797 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3798 ncep = ((nce_t **)NCE_HASH_PTR_V4(*addr)); 3799 3800 #ifdef NCE_DEBUG 3801 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 3802 #endif 3803 /* 3804 * Atomically ensure that the ill is not CONDEMNED, before 3805 * adding the NCE. 3806 */ 3807 mutex_enter(&ill->ill_lock); 3808 if (ill->ill_state_flags & ILL_CONDEMNED) { 3809 mutex_exit(&ill->ill_lock); 3810 freeb(mp); 3811 if (res_mp == NULL) { 3812 /* 3813 * template was locally allocated. need to free it. 3814 */ 3815 freeb(template); 3816 } 3817 return (EINVAL); 3818 } 3819 if ((nce->nce_next = *ncep) != NULL) 3820 nce->nce_next->nce_ptpn = &nce->nce_next; 3821 *ncep = nce; 3822 nce->nce_ptpn = ncep; 3823 *newnce = nce; 3824 /* This one is for nce being used by an active thread */ 3825 NCE_REFHOLD(*newnce); 3826 3827 /* Bump up the number of nce's referencing this ill */ 3828 ill->ill_nce_cnt++; 3829 mutex_exit(&ill->ill_lock); 3830 return (0); 3831 } 3832 3833 void 3834 ndp_flush_qd_mp(nce_t *nce) 3835 { 3836 mblk_t *qd_mp, *qd_next; 3837 3838 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3839 qd_mp = nce->nce_qd_mp; 3840 nce->nce_qd_mp = NULL; 3841 while (qd_mp != NULL) { 3842 qd_next = qd_mp->b_next; 3843 qd_mp->b_next = NULL; 3844 qd_mp->b_prev = NULL; 3845 freemsg(qd_mp); 3846 qd_mp = qd_next; 3847 } 3848 } 3849 3850 nce_t * 3851 nce_reinit(nce_t *nce) 3852 { 3853 nce_t *newnce = NULL; 3854 in_addr_t nce_addr, nce_mask; 3855 3856 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3857 IN6_V4MAPPED_TO_IPADDR(&nce->nce_mask, nce_mask); 3858 /* 3859 * delete the old one. this will get rid of any ire's pointing 3860 * at this nce. 3861 */ 3862 ndp_delete(nce); 3863 /* 3864 * create a new nce with the same addr and mask. 3865 */ 3866 mutex_enter(&ndp4.ndp_g_lock); 3867 (void) ndp_add_v4(nce->nce_ill, NULL, &nce_addr, &nce_mask, NULL, 0, 0, 3868 ND_INITIAL, &newnce, NULL, NULL); 3869 mutex_exit(&ndp4.ndp_g_lock); 3870 /* 3871 * refrele the old nce. 3872 */ 3873 NCE_REFRELE(nce); 3874 return (newnce); 3875 } 3876 3877 /* 3878 * ndp_walk routine to delete all entries that have a given destination or 3879 * gateway address and cached link layer (MAC) address. This is used when ARP 3880 * informs us that a network-to-link-layer mapping may have changed. 3881 */ 3882 void 3883 nce_delete_hw_changed(nce_t *nce, void *arg) 3884 { 3885 nce_hw_map_t *hwm = arg; 3886 mblk_t *mp; 3887 dl_unitdata_req_t *dlu; 3888 uchar_t *macaddr; 3889 ill_t *ill; 3890 int saplen; 3891 ipaddr_t nce_addr; 3892 3893 if (nce->nce_state != ND_REACHABLE) 3894 return; 3895 3896 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3897 if (nce_addr != hwm->hwm_addr) 3898 return; 3899 3900 mutex_enter(&nce->nce_lock); 3901 if ((mp = nce->nce_res_mp) == NULL) { 3902 mutex_exit(&nce->nce_lock); 3903 return; 3904 } 3905 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3906 macaddr = (uchar_t *)(dlu + 1); 3907 ill = nce->nce_ill; 3908 if ((saplen = ill->ill_sap_length) > 0) 3909 macaddr += saplen; 3910 else 3911 saplen = -saplen; 3912 3913 /* 3914 * If the hardware address is unchanged, then leave this one alone. 3915 * Note that saplen == abs(saplen) now. 3916 */ 3917 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 3918 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 3919 mutex_exit(&nce->nce_lock); 3920 return; 3921 } 3922 mutex_exit(&nce->nce_lock); 3923 3924 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 3925 ndp_delete(nce); 3926 } 3927 3928 /* 3929 * This function verifies whether a given IPv4 address is potentially known to 3930 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 3931 * so that it can continue to look for hardware changes on that address. 3932 */ 3933 boolean_t 3934 ndp_lookup_ipaddr(in_addr_t addr) 3935 { 3936 nce_t *nce; 3937 struct in_addr nceaddr; 3938 3939 if (addr == INADDR_ANY) 3940 return (B_FALSE); 3941 3942 mutex_enter(&ndp4.ndp_g_lock); 3943 nce = *(nce_t **)NCE_HASH_PTR_V4(addr); 3944 for (; nce != NULL; nce = nce->nce_next) { 3945 /* Note that only v4 mapped entries are in the table. */ 3946 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 3947 if (addr == nceaddr.s_addr && 3948 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 3949 /* Single flag check; no lock needed */ 3950 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 3951 break; 3952 } 3953 } 3954 mutex_exit(&ndp4.ndp_g_lock); 3955 return (nce != NULL); 3956 } 3957