1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/sysmacros.h> 33 #include <sys/errno.h> 34 #include <sys/dlpi.h> 35 #include <sys/socket.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/vtrace.h> 41 #include <sys/kmem.h> 42 #include <sys/zone.h> 43 #include <sys/ethernet.h> 44 #include <sys/sdt.h> 45 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/if_dl.h> 49 #include <net/route.h> 50 #include <netinet/in.h> 51 #include <netinet/ip6.h> 52 #include <netinet/icmp6.h> 53 54 #include <inet/common.h> 55 #include <inet/mi.h> 56 #include <inet/mib2.h> 57 #include <inet/nd.h> 58 #include <inet/ip.h> 59 #include <inet/ip_impl.h> 60 #include <inet/ip_if.h> 61 #include <inet/ip_ire.h> 62 #include <inet/ip_rts.h> 63 #include <inet/ip6.h> 64 #include <inet/ip_ndp.h> 65 #include <inet/ipsec_impl.h> 66 #include <inet/ipsec_info.h> 67 #include <inet/sctp_ip.h> 68 69 /* 70 * Function names with nce_ prefix are static while function 71 * names with ndp_ prefix are used by rest of the IP. 72 * 73 * Lock ordering: 74 * 75 * ndp_g_lock -> ill_lock -> nce_lock 76 * 77 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 78 * nce_next. Nce_lock protects the contents of the NCE (particularly 79 * nce_refcnt). 80 */ 81 82 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 83 uint32_t ll_addr_len); 84 static void nce_fastpath(nce_t *nce); 85 static void nce_ire_delete(nce_t *nce); 86 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 87 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 88 static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *, nce_t *); 89 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 90 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 91 uchar_t *addr); 92 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 93 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 94 static void nce_report1(nce_t *nce, uchar_t *mp_arg); 95 static mblk_t *nce_udreq_alloc(ill_t *ill); 96 static void nce_update(nce_t *nce, uint16_t new_state, 97 uchar_t *new_ll_addr); 98 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 99 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 100 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 101 const in6_addr_t *target, int flag); 102 extern void th_trace_rrecord(th_trace_t *); 103 static int ndp_lookup_then_add_v6(ill_t *, uchar_t *, 104 const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, 105 uint32_t, uint16_t, uint16_t, nce_t **, mblk_t *, mblk_t *); 106 static int ndp_lookup_then_add_v4(ill_t *, uchar_t *, 107 const in_addr_t *, const in_addr_t *, const in_addr_t *, 108 uint32_t, uint16_t, uint16_t, nce_t **, mblk_t *, mblk_t *); 109 static int ndp_add_v6(ill_t *, uchar_t *, const in6_addr_t *, 110 const in6_addr_t *, const in6_addr_t *, uint32_t, uint16_t, uint16_t, 111 nce_t **); 112 static int ndp_add_v4(ill_t *, uchar_t *, const in_addr_t *, 113 const in_addr_t *, const in_addr_t *, uint32_t, uint16_t, uint16_t, 114 nce_t **, mblk_t *, mblk_t *); 115 116 117 #ifdef NCE_DEBUG 118 void nce_trace_inactive(nce_t *); 119 #endif 120 121 ndp_g_t ndp4, ndp6; 122 123 #define NCE_HASH_PTR_V4(addr) \ 124 (&(ndp4.nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 125 126 #define NCE_HASH_PTR_V6(addr) \ 127 (&(ndp6.nce_hash_tbl[NCE_ADDR_HASH_V6(addr, NCE_TABLE_SIZE)])) 128 129 /* 130 * Compute default flags to use for an advertisement of this nce's address. 131 */ 132 static int 133 nce_advert_flags(const nce_t *nce) 134 { 135 int flag = 0; 136 137 if (nce->nce_flags & NCE_F_ISROUTER) 138 flag |= NDP_ISROUTER; 139 if (!(nce->nce_flags & NCE_F_PROXY)) 140 flag |= NDP_ORIDE; 141 return (flag); 142 } 143 144 int 145 ndp_add(ill_t *ill, uchar_t *hw_addr, const void *addr, 146 const void *mask, const void *extract_mask, 147 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 148 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 149 { 150 int status; 151 152 if (ill->ill_isv6) 153 status = ndp_add_v6(ill, hw_addr, (in6_addr_t *)addr, 154 (in6_addr_t *)mask, (in6_addr_t *)extract_mask, 155 hw_extract_start, flags, state, newnce); 156 else 157 status = ndp_add_v4(ill, hw_addr, (in_addr_t *)addr, 158 (in_addr_t *)mask, (in_addr_t *)extract_mask, 159 hw_extract_start, flags, state, newnce, fp_mp, res_mp); 160 return (status); 161 } 162 163 /* Non-tunable probe interval, based on link capabilities */ 164 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 165 166 /* 167 * NDP Cache Entry creation routine. 168 * Mapped entries will never do NUD . 169 * This routine must always be called with ndp6.ndp_g_lock held. 170 * Prior to return, nce_refcnt is incremented. 171 */ 172 static int 173 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 174 const in6_addr_t *mask, const in6_addr_t *extract_mask, 175 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 176 nce_t **newnce) 177 { 178 static nce_t nce_nil; 179 nce_t *nce; 180 mblk_t *mp; 181 mblk_t *template; 182 nce_t **ncep; 183 int err; 184 boolean_t dropped = B_FALSE; 185 186 ASSERT(MUTEX_HELD(&ndp6.ndp_g_lock)); 187 ASSERT(ill != NULL && ill->ill_isv6); 188 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 189 ip0dbg(("ndp_add: no addr\n")); 190 return (EINVAL); 191 } 192 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 193 ip0dbg(("ndp_add: flags = %x\n", (int)flags)); 194 return (EINVAL); 195 } 196 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 197 (flags & NCE_F_MAPPING)) { 198 ip0dbg(("ndp_add: extract mask zero for mapping")); 199 return (EINVAL); 200 } 201 /* 202 * Allocate the mblk to hold the nce. 203 * 204 * XXX This can come out of a separate cache - nce_cache. 205 * We don't need the mp anymore as there are no more 206 * "qwriter"s 207 */ 208 mp = allocb(sizeof (nce_t), BPRI_MED); 209 if (mp == NULL) 210 return (ENOMEM); 211 212 nce = (nce_t *)mp->b_rptr; 213 mp->b_wptr = (uchar_t *)&nce[1]; 214 *nce = nce_nil; 215 216 /* 217 * This one holds link layer address 218 */ 219 if (ill->ill_net_type == IRE_IF_RESOLVER) { 220 template = nce_udreq_alloc(ill); 221 } else { 222 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 223 ASSERT((ill->ill_resolver_mp != NULL)); 224 template = copyb(ill->ill_resolver_mp); 225 } 226 if (template == NULL) { 227 freeb(mp); 228 return (ENOMEM); 229 } 230 nce->nce_ill = ill; 231 nce->nce_ipversion = IPV6_VERSION; 232 nce->nce_flags = flags; 233 nce->nce_state = state; 234 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 235 nce->nce_rcnt = ill->ill_xmit_count; 236 nce->nce_addr = *addr; 237 nce->nce_mask = *mask; 238 nce->nce_extract_mask = *extract_mask; 239 nce->nce_ll_extract_start = hw_extract_start; 240 nce->nce_fp_mp = NULL; 241 nce->nce_res_mp = template; 242 if (state == ND_REACHABLE) 243 nce->nce_last = TICK_TO_MSEC(lbolt64); 244 else 245 nce->nce_last = 0; 246 nce->nce_qd_mp = NULL; 247 nce->nce_mp = mp; 248 if (hw_addr != NULL) 249 nce_set_ll(nce, hw_addr); 250 /* This one is for nce getting created */ 251 nce->nce_refcnt = 1; 252 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 253 if (nce->nce_flags & NCE_F_MAPPING) { 254 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 255 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 256 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 257 ncep = &ndp6.nce_mask_entries; 258 } else { 259 ncep = ((nce_t **)NCE_HASH_PTR_V6(*addr)); 260 } 261 262 #ifdef NCE_DEBUG 263 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 264 #endif 265 /* 266 * Atomically ensure that the ill is not CONDEMNED, before 267 * adding the NCE. 268 */ 269 mutex_enter(&ill->ill_lock); 270 if (ill->ill_state_flags & ILL_CONDEMNED) { 271 mutex_exit(&ill->ill_lock); 272 freeb(mp); 273 freeb(template); 274 return (EINVAL); 275 } 276 if ((nce->nce_next = *ncep) != NULL) 277 nce->nce_next->nce_ptpn = &nce->nce_next; 278 *ncep = nce; 279 nce->nce_ptpn = ncep; 280 *newnce = nce; 281 /* This one is for nce being used by an active thread */ 282 NCE_REFHOLD(*newnce); 283 284 /* Bump up the number of nce's referencing this ill */ 285 ill->ill_nce_cnt++; 286 mutex_exit(&ill->ill_lock); 287 288 err = 0; 289 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 290 mutex_enter(&nce->nce_lock); 291 mutex_exit(&ndp6.ndp_g_lock); 292 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 293 mutex_exit(&nce->nce_lock); 294 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 295 &ipv6_all_zeros, addr, NDP_PROBE); 296 if (dropped) { 297 mutex_enter(&nce->nce_lock); 298 nce->nce_pcnt++; 299 mutex_exit(&nce->nce_lock); 300 } 301 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 302 mutex_enter(&ndp6.ndp_g_lock); 303 err = EINPROGRESS; 304 } else if (flags & NCE_F_UNSOL_ADV) { 305 /* 306 * We account for the transmit below by assigning one 307 * less than the ndd variable. Subsequent decrements 308 * are done in ndp_timer. 309 */ 310 mutex_enter(&nce->nce_lock); 311 mutex_exit(&ndp6.ndp_g_lock); 312 nce->nce_unsolicit_count = ip_ndp_unsolicit_count - 1; 313 mutex_exit(&nce->nce_lock); 314 dropped = nce_xmit(ill, 315 ND_NEIGHBOR_ADVERT, 316 ill, /* ill to be used for extracting ill_nd_lla */ 317 B_TRUE, /* use ill_nd_lla */ 318 addr, /* Source and target of the advertisement pkt */ 319 &ipv6_all_hosts_mcast, /* Destination of the packet */ 320 nce_advert_flags(nce)); 321 mutex_enter(&nce->nce_lock); 322 if (dropped) 323 nce->nce_unsolicit_count++; 324 if (nce->nce_unsolicit_count != 0) { 325 nce->nce_timeout_id = timeout(ndp_timer, nce, 326 MSEC_TO_TICK(ip_ndp_unsolicit_interval)); 327 } 328 mutex_exit(&nce->nce_lock); 329 mutex_enter(&ndp6.ndp_g_lock); 330 } 331 /* 332 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 333 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 334 * We call nce_fastpath from nce_update if the link layer address of 335 * the peer changes from nce_update 336 */ 337 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 338 nce_fastpath(nce); 339 return (err); 340 } 341 342 int 343 ndp_lookup_then_add(ill_t *ill, uchar_t *hw_addr, const void *addr, 344 const void *mask, const void *extract_mask, 345 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 346 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 347 { 348 int status; 349 350 if (ill->ill_isv6) { 351 status = ndp_lookup_then_add_v6(ill, hw_addr, 352 (in6_addr_t *)addr, (in6_addr_t *)mask, 353 (in6_addr_t *)extract_mask, hw_extract_start, flags, 354 state, newnce, fp_mp, res_mp); 355 } else { 356 status = ndp_lookup_then_add_v4(ill, hw_addr, 357 (in_addr_t *)addr, (in_addr_t *)mask, 358 (in_addr_t *)extract_mask, hw_extract_start, flags, 359 state, newnce, fp_mp, res_mp); 360 } 361 362 return (status); 363 } 364 365 static int 366 ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 367 const in6_addr_t *mask, const in6_addr_t *extract_mask, 368 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 369 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 370 { 371 int err = 0; 372 nce_t *nce; 373 374 ASSERT(ill != NULL && ill->ill_isv6); 375 mutex_enter(&ndp6.ndp_g_lock); 376 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); /* head of v6 hash table */ 377 nce = nce_lookup_addr(ill, addr, nce); 378 if (nce == NULL) { 379 err = ndp_add(ill, 380 hw_addr, 381 addr, 382 mask, 383 extract_mask, 384 hw_extract_start, 385 flags, 386 state, 387 newnce, 388 fp_mp, 389 res_mp); 390 } else { 391 *newnce = nce; 392 err = EEXIST; 393 } 394 mutex_exit(&ndp6.ndp_g_lock); 395 return (err); 396 } 397 398 /* 399 * Remove all the CONDEMNED nces from the appropriate hash table. 400 * We create a private list of NCEs, these may have ires pointing 401 * to them, so the list will be passed through to clean up dependent 402 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 403 */ 404 static void 405 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 406 { 407 nce_t *nce1; 408 nce_t **ptpn; 409 410 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 411 ASSERT(ndp->ndp_g_walker == 0); 412 for (; nce; nce = nce1) { 413 nce1 = nce->nce_next; 414 mutex_enter(&nce->nce_lock); 415 if (nce->nce_flags & NCE_F_CONDEMNED) { 416 ptpn = nce->nce_ptpn; 417 nce1 = nce->nce_next; 418 if (nce1 != NULL) 419 nce1->nce_ptpn = ptpn; 420 *ptpn = nce1; 421 nce->nce_ptpn = NULL; 422 nce->nce_next = NULL; 423 nce->nce_next = *free_nce_list; 424 *free_nce_list = nce; 425 } 426 mutex_exit(&nce->nce_lock); 427 } 428 } 429 430 /* 431 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 432 * will return this NCE. Also no new IREs will be created that 433 * point to this NCE (See ire_add_v6). Also no new timeouts will 434 * be started (See NDP_RESTART_TIMER). 435 * 2. Cancel any currently running timeouts. 436 * 3. If there is an ndp walker, return. The walker will do the cleanup. 437 * This ensures that walkers see a consistent list of NCEs while walking. 438 * 4. Otherwise remove the NCE from the list of NCEs 439 * 5. Delete all IREs pointing to this NCE. 440 */ 441 void 442 ndp_delete(nce_t *nce) 443 { 444 nce_t **ptpn; 445 nce_t *nce1; 446 int ipversion = nce->nce_ipversion; 447 ndp_g_t *ndp = (ipversion == IPV4_VERSION ? &ndp4 : &ndp6); 448 449 /* Serialize deletes */ 450 mutex_enter(&nce->nce_lock); 451 if (nce->nce_flags & NCE_F_CONDEMNED) { 452 /* Some other thread is doing the delete */ 453 mutex_exit(&nce->nce_lock); 454 return; 455 } 456 /* 457 * Caller has a refhold. Also 1 ref for being in the list. Thus 458 * refcnt has to be >= 2 459 */ 460 ASSERT(nce->nce_refcnt >= 2); 461 nce->nce_flags |= NCE_F_CONDEMNED; 462 mutex_exit(&nce->nce_lock); 463 464 nce_fastpath_list_delete(nce); 465 466 /* 467 * Cancel any running timer. Timeout can't be restarted 468 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 469 * Passing invalid timeout id is fine. 470 */ 471 if (nce->nce_timeout_id != 0) { 472 (void) untimeout(nce->nce_timeout_id); 473 nce->nce_timeout_id = 0; 474 } 475 476 mutex_enter(&ndp->ndp_g_lock); 477 if (nce->nce_ptpn == NULL) { 478 /* 479 * The last ndp walker has already removed this nce from 480 * the list after we marked the nce CONDEMNED and before 481 * we grabbed the global lock. 482 */ 483 mutex_exit(&ndp->ndp_g_lock); 484 return; 485 } 486 if (ndp->ndp_g_walker > 0) { 487 /* 488 * Can't unlink. The walker will clean up 489 */ 490 ndp->ndp_g_walker_cleanup = B_TRUE; 491 mutex_exit(&ndp->ndp_g_lock); 492 return; 493 } 494 495 /* 496 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 497 * the timer since it is marked CONDEMNED. 498 */ 499 ptpn = nce->nce_ptpn; 500 nce1 = nce->nce_next; 501 if (nce1 != NULL) 502 nce1->nce_ptpn = ptpn; 503 *ptpn = nce1; 504 nce->nce_ptpn = NULL; 505 nce->nce_next = NULL; 506 mutex_exit(&ndp->ndp_g_lock); 507 508 nce_ire_delete(nce); 509 } 510 511 void 512 ndp_inactive(nce_t *nce) 513 { 514 mblk_t **mpp; 515 ill_t *ill; 516 517 ASSERT(nce->nce_refcnt == 0); 518 ASSERT(MUTEX_HELD(&nce->nce_lock)); 519 ASSERT(nce->nce_fastpath == NULL); 520 521 /* Free all nce allocated messages */ 522 mpp = &nce->nce_first_mp_to_free; 523 do { 524 while (*mpp != NULL) { 525 mblk_t *mp; 526 527 mp = *mpp; 528 *mpp = mp->b_next; 529 mp->b_next = NULL; 530 mp->b_prev = NULL; 531 freemsg(mp); 532 } 533 } while (mpp++ != &nce->nce_last_mp_to_free); 534 535 #ifdef NCE_DEBUG 536 nce_trace_inactive(nce); 537 #endif 538 539 ill = nce->nce_ill; 540 mutex_enter(&ill->ill_lock); 541 ill->ill_nce_cnt--; 542 /* 543 * If the number of nce's associated with this ill have dropped 544 * to zero, check whether we need to restart any operation that 545 * is waiting for this to happen. 546 */ 547 if (ill->ill_nce_cnt == 0) { 548 /* ipif_ill_refrele_tail drops the ill_lock */ 549 ipif_ill_refrele_tail(ill); 550 } else { 551 mutex_exit(&ill->ill_lock); 552 } 553 mutex_destroy(&nce->nce_lock); 554 freeb(nce->nce_mp); 555 } 556 557 /* 558 * ndp_walk routine. Delete the nce if it is associated with the ill 559 * that is going away. Always called as a writer. 560 */ 561 void 562 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 563 { 564 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 565 ndp_delete(nce); 566 } 567 } 568 569 /* 570 * Walk a list of to be inactive NCEs and blow away all the ires. 571 */ 572 static void 573 nce_ire_delete_list(nce_t *nce) 574 { 575 nce_t *nce_next; 576 577 ASSERT(nce != NULL); 578 while (nce != NULL) { 579 nce_next = nce->nce_next; 580 nce->nce_next = NULL; 581 582 /* 583 * It is possible for the last ndp walker (this thread) 584 * to come here after ndp_delete has marked the nce CONDEMNED 585 * and before it has removed the nce from the fastpath list 586 * or called untimeout. So we need to do it here. It is safe 587 * for both ndp_delete and this thread to do it twice or 588 * even simultaneously since each of the threads has a 589 * reference on the nce. 590 */ 591 nce_fastpath_list_delete(nce); 592 /* 593 * Cancel any running timer. Timeout can't be restarted 594 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 595 * Passing invalid timeout id is fine. 596 */ 597 if (nce->nce_timeout_id != 0) { 598 (void) untimeout(nce->nce_timeout_id); 599 nce->nce_timeout_id = 0; 600 } 601 /* 602 * We might hit this func thus in the v4 case: 603 * ipif_down->ipif_ndp_down->ndp_walk 604 */ 605 606 if (nce->nce_ipversion == IPV4_VERSION) { 607 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 608 IRE_CACHE, nce_ire_delete1, 609 (char *)nce, nce->nce_ill); 610 } else { 611 ASSERT(nce->nce_ipversion == IPV6_VERSION); 612 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 613 IRE_CACHE, nce_ire_delete1, 614 (char *)nce, nce->nce_ill); 615 } 616 NCE_REFRELE_NOTR(nce); 617 nce = nce_next; 618 } 619 } 620 621 /* 622 * Delete an ire when the nce goes away. 623 */ 624 /* ARGSUSED */ 625 static void 626 nce_ire_delete(nce_t *nce) 627 { 628 if (nce->nce_ipversion == IPV6_VERSION) { 629 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 630 nce_ire_delete1, (char *)nce, nce->nce_ill); 631 NCE_REFRELE_NOTR(nce); 632 } else { 633 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 634 nce_ire_delete1, (char *)nce, nce->nce_ill); 635 NCE_REFRELE_NOTR(nce); 636 } 637 } 638 639 /* 640 * ire_walk routine used to delete every IRE that shares this nce 641 */ 642 static void 643 nce_ire_delete1(ire_t *ire, char *nce_arg) 644 { 645 nce_t *nce = (nce_t *)nce_arg; 646 647 ASSERT(ire->ire_type == IRE_CACHE); 648 649 if (ire->ire_nce == nce) { 650 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 651 ire_delete(ire); 652 } 653 } 654 655 /* 656 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 657 */ 658 boolean_t 659 ndp_restart_dad(nce_t *nce) 660 { 661 boolean_t started; 662 boolean_t dropped; 663 664 if (nce == NULL) 665 return (B_FALSE); 666 mutex_enter(&nce->nce_lock); 667 if (nce->nce_state == ND_PROBE) { 668 mutex_exit(&nce->nce_lock); 669 started = B_TRUE; 670 } else if (nce->nce_state == ND_REACHABLE) { 671 nce->nce_state = ND_PROBE; 672 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 673 mutex_exit(&nce->nce_lock); 674 dropped = nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, NULL, 675 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, NDP_PROBE); 676 if (dropped) { 677 mutex_enter(&nce->nce_lock); 678 nce->nce_pcnt++; 679 mutex_exit(&nce->nce_lock); 680 } 681 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 682 started = B_TRUE; 683 } else { 684 mutex_exit(&nce->nce_lock); 685 started = B_FALSE; 686 } 687 return (started); 688 } 689 690 /* 691 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 692 * If one is found, the refcnt on the nce will be incremented. 693 */ 694 nce_t * 695 ndp_lookup_v6(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 696 { 697 nce_t *nce; 698 699 ASSERT(ill != NULL && ill->ill_isv6); 700 if (!caller_holds_lock) { 701 mutex_enter(&ndp6.ndp_g_lock); 702 } 703 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); /* head of v6 hash table */ 704 nce = nce_lookup_addr(ill, addr, nce); 705 if (nce == NULL) 706 nce = nce_lookup_mapping(ill, addr); 707 if (!caller_holds_lock) 708 mutex_exit(&ndp6.ndp_g_lock); 709 return (nce); 710 } 711 /* 712 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 713 * If one is found, the refcnt on the nce will be incremented. 714 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 715 * so we skip the nce_lookup_mapping call. 716 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 717 */ 718 nce_t * 719 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 720 { 721 nce_t *nce; 722 in6_addr_t addr6; 723 724 if (!caller_holds_lock) { 725 mutex_enter(&ndp4.ndp_g_lock); 726 } 727 nce = *((nce_t **)NCE_HASH_PTR_V4(*addr)); /* head of v6 hash table */ 728 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 729 nce = nce_lookup_addr(ill, &addr6, nce); 730 if (!caller_holds_lock) 731 mutex_exit(&ndp4.ndp_g_lock); 732 return (nce); 733 } 734 735 /* 736 * Cache entry lookup. Try to find an nce matching the parameters passed. 737 * Look only for exact entries (no mappings). If an nce is found, increment 738 * the hold count on that nce. The caller passes in the start of the 739 * appropriate hash table, and must be holding the appropriate global 740 * lock (ndp_g_lock). 741 */ 742 static nce_t * 743 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr, nce_t *nce) 744 { 745 ndp_g_t *ndp = (ill->ill_isv6 ? &ndp6 : &ndp4); 746 747 ASSERT(ill != NULL); 748 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 749 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 750 return (NULL); 751 for (; nce != NULL; nce = nce->nce_next) { 752 if (nce->nce_ill == ill) { 753 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 754 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 755 &ipv6_all_ones)) { 756 mutex_enter(&nce->nce_lock); 757 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 758 NCE_REFHOLD_LOCKED(nce); 759 mutex_exit(&nce->nce_lock); 760 break; 761 } 762 mutex_exit(&nce->nce_lock); 763 } 764 } 765 } 766 return (nce); 767 } 768 769 /* 770 * Cache entry lookup. Try to find an nce matching the parameters passed. 771 * Look only for mappings. 772 */ 773 static nce_t * 774 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 775 { 776 nce_t *nce; 777 778 ASSERT(ill != NULL && ill->ill_isv6); 779 ASSERT(MUTEX_HELD(&ndp6.ndp_g_lock)); 780 if (!IN6_IS_ADDR_MULTICAST(addr)) 781 return (NULL); 782 nce = ndp6.nce_mask_entries; 783 for (; nce != NULL; nce = nce->nce_next) 784 if (nce->nce_ill == ill && 785 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 786 mutex_enter(&nce->nce_lock); 787 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 788 NCE_REFHOLD_LOCKED(nce); 789 mutex_exit(&nce->nce_lock); 790 break; 791 } 792 mutex_exit(&nce->nce_lock); 793 } 794 return (nce); 795 } 796 797 /* 798 * Process passed in parameters either from an incoming packet or via 799 * user ioctl. 800 */ 801 void 802 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 803 { 804 ill_t *ill = nce->nce_ill; 805 uint32_t hw_addr_len = ill->ill_nd_lla_len; 806 mblk_t *mp; 807 boolean_t ll_updated = B_FALSE; 808 boolean_t ll_changed; 809 810 ASSERT(nce->nce_ipversion == IPV6_VERSION); 811 /* 812 * No updates of link layer address or the neighbor state is 813 * allowed, when the cache is in NONUD state. This still 814 * allows for responding to reachability solicitation. 815 */ 816 mutex_enter(&nce->nce_lock); 817 if (nce->nce_state == ND_INCOMPLETE) { 818 if (hw_addr == NULL) { 819 mutex_exit(&nce->nce_lock); 820 return; 821 } 822 nce_set_ll(nce, hw_addr); 823 /* 824 * Update nce state and send the queued packets 825 * back to ip this time ire will be added. 826 */ 827 if (flag & ND_NA_FLAG_SOLICITED) { 828 nce_update(nce, ND_REACHABLE, NULL); 829 } else { 830 nce_update(nce, ND_STALE, NULL); 831 } 832 mutex_exit(&nce->nce_lock); 833 nce_fastpath(nce); 834 mutex_enter(&nce->nce_lock); 835 mp = nce->nce_qd_mp; 836 nce->nce_qd_mp = NULL; 837 mutex_exit(&nce->nce_lock); 838 while (mp != NULL) { 839 mblk_t *nxt_mp; 840 841 nxt_mp = mp->b_next; 842 mp->b_next = NULL; 843 if (mp->b_prev != NULL) { 844 ill_t *inbound_ill; 845 queue_t *fwdq = NULL; 846 uint_t ifindex; 847 848 ifindex = (uint_t)(uintptr_t)mp->b_prev; 849 inbound_ill = ill_lookup_on_ifindex(ifindex, 850 B_TRUE, NULL, NULL, NULL, NULL); 851 if (inbound_ill == NULL) { 852 mp->b_prev = NULL; 853 freemsg(mp); 854 return; 855 } else { 856 fwdq = inbound_ill->ill_rq; 857 } 858 mp->b_prev = NULL; 859 /* 860 * Send a forwarded packet back into ip_rput_v6 861 * just as in ire_send_v6(). 862 * Extract the queue from b_prev (set in 863 * ip_rput_data_v6). 864 */ 865 if (fwdq != NULL) { 866 /* 867 * Forwarded packets hop count will 868 * get decremented in ip_rput_data_v6 869 */ 870 put(fwdq, mp); 871 } else { 872 /* 873 * Send locally originated packets back 874 * into * ip_wput_v6. 875 */ 876 put(ill->ill_wq, mp); 877 } 878 ill_refrele(inbound_ill); 879 } else { 880 put(ill->ill_wq, mp); 881 } 882 mp = nxt_mp; 883 } 884 return; 885 } 886 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 887 if (!is_adv) { 888 /* If this is a SOLICITATION request only */ 889 if (ll_changed) 890 nce_update(nce, ND_STALE, hw_addr); 891 mutex_exit(&nce->nce_lock); 892 return; 893 } 894 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 895 /* If in any other state than REACHABLE, ignore */ 896 if (nce->nce_state == ND_REACHABLE) { 897 nce_update(nce, ND_STALE, NULL); 898 } 899 mutex_exit(&nce->nce_lock); 900 return; 901 } else { 902 if (ll_changed) { 903 nce_update(nce, ND_UNCHANGED, hw_addr); 904 ll_updated = B_TRUE; 905 } 906 if (flag & ND_NA_FLAG_SOLICITED) { 907 nce_update(nce, ND_REACHABLE, NULL); 908 } else { 909 if (ll_updated) { 910 nce_update(nce, ND_STALE, NULL); 911 } 912 } 913 mutex_exit(&nce->nce_lock); 914 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 915 NCE_F_ISROUTER)) { 916 ire_t *ire; 917 918 /* 919 * Router turned to host. We need to remove the 920 * entry as well as any default route that may be 921 * using this as a next hop. This is required by 922 * section 7.2.5 of RFC 2461. 923 */ 924 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 925 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 926 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 927 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 928 MATCH_IRE_DEFAULT); 929 if (ire != NULL) { 930 ip_rts_rtmsg(RTM_DELETE, ire, 0); 931 ire_delete(ire); 932 ire_refrele(ire); 933 } 934 ndp_delete(nce); 935 } 936 } 937 } 938 939 /* 940 * Pass arg1 to the pfi supplied, along with each nce in existence. 941 * ndp_walk() places a REFHOLD on the nce and drops the lock when 942 * walking the hash list. 943 */ 944 void 945 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 946 boolean_t trace) 947 { 948 949 nce_t *nce; 950 nce_t *nce1; 951 nce_t **ncep; 952 nce_t *free_nce_list = NULL; 953 954 mutex_enter(&ndp->ndp_g_lock); 955 /* Prevent ndp_delete from unlink and free of NCE */ 956 ndp->ndp_g_walker++; 957 mutex_exit(&ndp->ndp_g_lock); 958 for (ncep = ndp->nce_hash_tbl; 959 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 960 for (nce = *ncep; nce != NULL; nce = nce1) { 961 nce1 = nce->nce_next; 962 if (ill == NULL || nce->nce_ill == ill) { 963 if (trace) { 964 NCE_REFHOLD(nce); 965 (*pfi)(nce, arg1); 966 NCE_REFRELE(nce); 967 } else { 968 NCE_REFHOLD_NOTR(nce); 969 (*pfi)(nce, arg1); 970 NCE_REFRELE_NOTR(nce); 971 } 972 } 973 } 974 } 975 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 976 nce1 = nce->nce_next; 977 if (ill == NULL || nce->nce_ill == ill) { 978 if (trace) { 979 NCE_REFHOLD(nce); 980 (*pfi)(nce, arg1); 981 NCE_REFRELE(nce); 982 } else { 983 NCE_REFHOLD_NOTR(nce); 984 (*pfi)(nce, arg1); 985 NCE_REFRELE_NOTR(nce); 986 } 987 } 988 } 989 mutex_enter(&ndp->ndp_g_lock); 990 ndp->ndp_g_walker--; 991 /* 992 * While NCE's are removed from global list they are placed 993 * in a private list, to be passed to nce_ire_delete_list(). 994 * The reason is, there may be ires pointing to this nce 995 * which needs to cleaned up. 996 */ 997 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 998 /* Time to delete condemned entries */ 999 for (ncep = ndp->nce_hash_tbl; 1000 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 1001 nce = *ncep; 1002 if (nce != NULL) { 1003 nce_remove(ndp, nce, &free_nce_list); 1004 } 1005 } 1006 nce = ndp->nce_mask_entries; 1007 if (nce != NULL) { 1008 nce_remove(ndp, nce, &free_nce_list); 1009 } 1010 ndp->ndp_g_walker_cleanup = B_FALSE; 1011 } 1012 mutex_exit(&ndp->ndp_g_lock); 1013 1014 if (free_nce_list != NULL) { 1015 nce_ire_delete_list(free_nce_list); 1016 } 1017 } 1018 1019 void 1020 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1) 1021 { 1022 ndp_walk_common(&ndp4, ill, pfi, arg1, B_TRUE); 1023 ndp_walk_common(&ndp6, ill, pfi, arg1, B_TRUE); 1024 } 1025 1026 /* 1027 * Process resolve requests. Handles both mapped entries 1028 * as well as cases that needs to be send out on the wire. 1029 * Lookup a NCE for a given IRE. Regardless of whether one exists 1030 * or one is created, we defer making ire point to nce until the 1031 * ire is actually added at which point the nce_refcnt on the nce is 1032 * incremented. This is done primarily to have symmetry between ire_add() 1033 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1034 */ 1035 int 1036 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1037 { 1038 nce_t *nce; 1039 int err = 0; 1040 uint32_t ms; 1041 mblk_t *mp_nce = NULL; 1042 1043 ASSERT(ill != NULL); 1044 ASSERT(ill->ill_isv6); 1045 if (IN6_IS_ADDR_MULTICAST(dst)) { 1046 err = nce_set_multicast(ill, dst); 1047 return (err); 1048 } 1049 err = ndp_lookup_then_add(ill, 1050 NULL, /* No hardware address */ 1051 dst, 1052 &ipv6_all_ones, 1053 &ipv6_all_zeros, 1054 0, 1055 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1056 ND_INCOMPLETE, 1057 &nce, 1058 NULL, /* let ndp_add figure out fastpath mp and dlureq_mp for v6 */ 1059 NULL); 1060 1061 switch (err) { 1062 case 0: 1063 /* 1064 * New cache entry was created. Make sure that the state 1065 * is not ND_INCOMPLETE. It can be in some other state 1066 * even before we send out the solicitation as we could 1067 * get un-solicited advertisements. 1068 * 1069 * If this is an XRESOLV interface, simply return 0, 1070 * since we don't want to solicit just yet. 1071 */ 1072 if (ill->ill_flags & ILLF_XRESOLV) { 1073 NCE_REFRELE(nce); 1074 return (0); 1075 } 1076 rw_enter(&ill_g_lock, RW_READER); 1077 mutex_enter(&nce->nce_lock); 1078 if (nce->nce_state != ND_INCOMPLETE) { 1079 mutex_exit(&nce->nce_lock); 1080 rw_exit(&ill_g_lock); 1081 NCE_REFRELE(nce); 1082 return (0); 1083 } 1084 mp_nce = ip_prepend_zoneid(mp, zoneid); 1085 if (mp_nce == NULL) { 1086 /* The caller will free mp */ 1087 mutex_exit(&nce->nce_lock); 1088 rw_exit(&ill_g_lock); 1089 ndp_delete(nce); 1090 NCE_REFRELE(nce); 1091 return (ENOMEM); 1092 } 1093 ms = nce_solicit(nce, mp_nce); 1094 rw_exit(&ill_g_lock); 1095 if (ms == 0) { 1096 /* The caller will free mp */ 1097 if (mp_nce != mp) 1098 freeb(mp_nce); 1099 mutex_exit(&nce->nce_lock); 1100 ndp_delete(nce); 1101 NCE_REFRELE(nce); 1102 return (EBUSY); 1103 } 1104 mutex_exit(&nce->nce_lock); 1105 NDP_RESTART_TIMER(nce, (clock_t)ms); 1106 NCE_REFRELE(nce); 1107 return (EINPROGRESS); 1108 case EEXIST: 1109 /* Resolution in progress just queue the packet */ 1110 mutex_enter(&nce->nce_lock); 1111 if (nce->nce_state == ND_INCOMPLETE) { 1112 mp_nce = ip_prepend_zoneid(mp, zoneid); 1113 if (mp_nce == NULL) { 1114 err = ENOMEM; 1115 } else { 1116 nce_queue_mp(nce, mp_nce); 1117 err = EINPROGRESS; 1118 } 1119 } else { 1120 /* 1121 * Any other state implies we have 1122 * a nce but IRE needs to be added ... 1123 * ire_add_v6() will take care of the 1124 * the case when the nce becomes CONDEMNED 1125 * before the ire is added to the table. 1126 */ 1127 err = 0; 1128 } 1129 mutex_exit(&nce->nce_lock); 1130 NCE_REFRELE(nce); 1131 break; 1132 default: 1133 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1134 break; 1135 } 1136 return (err); 1137 } 1138 1139 /* 1140 * When there is no resolver, the link layer template is passed in 1141 * the IRE. 1142 * Lookup a NCE for a given IRE. Regardless of whether one exists 1143 * or one is created, we defer making ire point to nce until the 1144 * ire is actually added at which point the nce_refcnt on the nce is 1145 * incremented. This is done primarily to have symmetry between ire_add() 1146 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1147 */ 1148 int 1149 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1150 { 1151 nce_t *nce; 1152 int err = 0; 1153 1154 ASSERT(ill != NULL); 1155 ASSERT(ill->ill_isv6); 1156 if (IN6_IS_ADDR_MULTICAST(dst)) { 1157 err = nce_set_multicast(ill, dst); 1158 return (err); 1159 } 1160 1161 err = ndp_lookup_then_add(ill, 1162 NULL, /* hardware address */ 1163 dst, 1164 &ipv6_all_ones, 1165 &ipv6_all_zeros, 1166 0, 1167 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1168 ND_REACHABLE, 1169 &nce, 1170 NULL, /* let ndp_add figure out fp_mp/dlureq_mp for v6 */ 1171 NULL); 1172 1173 switch (err) { 1174 case 0: 1175 /* 1176 * Cache entry with a proper resolver cookie was 1177 * created. 1178 */ 1179 NCE_REFRELE(nce); 1180 break; 1181 case EEXIST: 1182 err = 0; 1183 NCE_REFRELE(nce); 1184 break; 1185 default: 1186 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1187 break; 1188 } 1189 return (err); 1190 } 1191 1192 /* 1193 * For each interface an entry is added for the unspecified multicast group. 1194 * Here that mapping is used to form the multicast cache entry for a particular 1195 * multicast destination. 1196 */ 1197 static int 1198 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1199 { 1200 nce_t *mnce; /* Multicast mapping entry */ 1201 nce_t *nce; 1202 uchar_t *hw_addr = NULL; 1203 int err = 0; 1204 1205 ASSERT(ill != NULL); 1206 ASSERT(ill->ill_isv6); 1207 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1208 1209 mutex_enter(&ndp6.ndp_g_lock); 1210 nce = *((nce_t **)NCE_HASH_PTR_V6(*dst)); 1211 nce = nce_lookup_addr(ill, dst, nce); 1212 if (nce != NULL) { 1213 mutex_exit(&ndp6.ndp_g_lock); 1214 NCE_REFRELE(nce); 1215 return (0); 1216 } 1217 /* No entry, now lookup for a mapping this should never fail */ 1218 mnce = nce_lookup_mapping(ill, dst); 1219 if (mnce == NULL) { 1220 /* Something broken for the interface. */ 1221 mutex_exit(&ndp6.ndp_g_lock); 1222 return (ESRCH); 1223 } 1224 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1225 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1226 /* 1227 * For IRE_IF_RESOLVER a hardware mapping can be 1228 * generated, for IRE_IF_NORESOLVER, resolution cookie 1229 * in the ill is copied in ndp_add(). 1230 */ 1231 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1232 if (hw_addr == NULL) { 1233 mutex_exit(&ndp6.ndp_g_lock); 1234 NCE_REFRELE(mnce); 1235 return (ENOMEM); 1236 } 1237 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1238 } 1239 NCE_REFRELE(mnce); 1240 /* 1241 * IRE_IF_NORESOLVER type simply copies the resolution 1242 * cookie passed in. So no hw_addr is needed. 1243 */ 1244 err = ndp_add(ill, 1245 hw_addr, 1246 dst, 1247 &ipv6_all_ones, 1248 &ipv6_all_zeros, 1249 0, 1250 NCE_F_NONUD, 1251 ND_REACHABLE, 1252 &nce, 1253 NULL, 1254 NULL); 1255 mutex_exit(&ndp6.ndp_g_lock); 1256 if (hw_addr != NULL) 1257 kmem_free(hw_addr, ill->ill_nd_lla_len); 1258 if (err != 0) { 1259 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1260 return (err); 1261 } 1262 NCE_REFRELE(nce); 1263 return (0); 1264 } 1265 1266 /* 1267 * Return the link layer address, and any flags of a nce. 1268 */ 1269 int 1270 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1271 { 1272 nce_t *nce; 1273 in6_addr_t *addr; 1274 sin6_t *sin6; 1275 dl_unitdata_req_t *dl; 1276 1277 ASSERT(ill != NULL && ill->ill_isv6); 1278 sin6 = (sin6_t *)&lnr->lnr_addr; 1279 addr = &sin6->sin6_addr; 1280 1281 nce = ndp_lookup_v6(ill, addr, B_FALSE); 1282 if (nce == NULL) 1283 return (ESRCH); 1284 /* If in INCOMPLETE state, no link layer address is available yet */ 1285 if (nce->nce_state == ND_INCOMPLETE) 1286 goto done; 1287 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1288 if (ill->ill_flags & ILLF_XRESOLV) 1289 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1290 else 1291 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1292 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1293 sizeof (lnr->lnr_hdw_addr)); 1294 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1295 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1296 if (nce->nce_flags & NCE_F_ISROUTER) 1297 lnr->lnr_flags = NDF_ISROUTER_ON; 1298 if (nce->nce_flags & NCE_F_PROXY) 1299 lnr->lnr_flags |= NDF_PROXY_ON; 1300 if (nce->nce_flags & NCE_F_ANYCAST) 1301 lnr->lnr_flags |= NDF_ANYCAST_ON; 1302 done: 1303 NCE_REFRELE(nce); 1304 return (0); 1305 } 1306 1307 /* 1308 * Send Enable/Disable multicast reqs to driver. 1309 */ 1310 int 1311 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1312 uint32_t hw_addr_offset, mblk_t *mp) 1313 { 1314 nce_t *nce; 1315 uchar_t *hw_addr; 1316 1317 ASSERT(ill != NULL && ill->ill_isv6); 1318 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1319 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1320 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1321 freemsg(mp); 1322 return (EINVAL); 1323 } 1324 mutex_enter(&ndp6.ndp_g_lock); 1325 nce = nce_lookup_mapping(ill, addr); 1326 if (nce == NULL) { 1327 mutex_exit(&ndp6.ndp_g_lock); 1328 freemsg(mp); 1329 return (ESRCH); 1330 } 1331 mutex_exit(&ndp6.ndp_g_lock); 1332 /* 1333 * Update dl_addr_length and dl_addr_offset for primitives that 1334 * have physical addresses as opposed to full saps 1335 */ 1336 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1337 case DL_ENABMULTI_REQ: 1338 /* Track the state if this is the first enabmulti */ 1339 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1340 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1341 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1342 break; 1343 case DL_DISABMULTI_REQ: 1344 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1345 break; 1346 default: 1347 NCE_REFRELE(nce); 1348 ip1dbg(("ndp_mcastreq: default\n")); 1349 return (EINVAL); 1350 } 1351 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1352 NCE_REFRELE(nce); 1353 putnext(ill->ill_wq, mp); 1354 return (0); 1355 } 1356 1357 /* 1358 * Send a neighbor solicitation. 1359 * Returns number of milliseconds after which we should either rexmit or abort. 1360 * Return of zero means we should abort. 1361 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1362 * 1363 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1364 * the packet. 1365 * NOTE: This routine does not consume mp. 1366 */ 1367 uint32_t 1368 nce_solicit(nce_t *nce, mblk_t *mp) 1369 { 1370 ill_t *ill; 1371 ill_t *src_ill; 1372 ip6_t *ip6h; 1373 in6_addr_t src; 1374 in6_addr_t dst; 1375 ipif_t *ipif; 1376 ip6i_t *ip6i; 1377 boolean_t dropped = B_FALSE; 1378 1379 ASSERT(RW_READ_HELD(&ill_g_lock)); 1380 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1381 ill = nce->nce_ill; 1382 ASSERT(ill != NULL); 1383 1384 if (nce->nce_rcnt == 0) { 1385 return (0); 1386 } 1387 1388 if (mp == NULL) { 1389 ASSERT(nce->nce_qd_mp != NULL); 1390 mp = nce->nce_qd_mp; 1391 } else { 1392 nce_queue_mp(nce, mp); 1393 } 1394 1395 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1396 if (mp->b_datap->db_type == M_CTL) 1397 mp = mp->b_cont; 1398 1399 ip6h = (ip6_t *)mp->b_rptr; 1400 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1401 /* 1402 * This message should have been pulled up already in 1403 * ip_wput_v6. We can't do pullups here because the message 1404 * could be from the nce_qd_mp which could have b_next/b_prev 1405 * non-NULL. 1406 */ 1407 ip6i = (ip6i_t *)ip6h; 1408 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1409 sizeof (ip6i_t) + IPV6_HDR_LEN); 1410 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1411 } 1412 src = ip6h->ip6_src; 1413 /* 1414 * If the src of outgoing packet is one of the assigned interface 1415 * addresses use it, otherwise we will pick the source address below. 1416 */ 1417 src_ill = ill; 1418 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1419 if (ill->ill_group != NULL) 1420 src_ill = ill->ill_group->illgrp_ill; 1421 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1422 for (ipif = src_ill->ill_ipif; ipif != NULL; 1423 ipif = ipif->ipif_next) { 1424 if (IN6_ARE_ADDR_EQUAL(&src, 1425 &ipif->ipif_v6lcl_addr)) { 1426 break; 1427 } 1428 } 1429 if (ipif != NULL) 1430 break; 1431 } 1432 /* 1433 * If no relevant ipif can be found, then it's not one of our 1434 * addresses. Reset to :: and let nce_xmit. If an ipif can be 1435 * found, but it's not yet done with DAD verification, then 1436 * just postpone this transmission until later. 1437 */ 1438 if (src_ill == NULL) 1439 src = ipv6_all_zeros; 1440 else if (!ipif->ipif_addr_ready) 1441 return (ill->ill_reachable_retrans_time); 1442 } 1443 dst = nce->nce_addr; 1444 /* 1445 * If source address is unspecified, nce_xmit will choose 1446 * one for us and initialize the hardware address also 1447 * appropriately. 1448 */ 1449 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1450 src_ill = NULL; 1451 nce->nce_rcnt--; 1452 mutex_exit(&nce->nce_lock); 1453 rw_exit(&ill_g_lock); 1454 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1455 &dst, 0); 1456 rw_enter(&ill_g_lock, RW_READER); 1457 mutex_enter(&nce->nce_lock); 1458 if (dropped) 1459 nce->nce_rcnt++; 1460 return (ill->ill_reachable_retrans_time); 1461 } 1462 1463 /* 1464 * Attempt to recover an address on an interface that's been marked as a 1465 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1466 * no easy way to just probe the address and have the right thing happen if 1467 * it's no longer in use. Instead, we just bring it up normally and allow the 1468 * regular interface start-up logic to probe for a remaining duplicate and take 1469 * us back down if necessary. 1470 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1471 * ip_ndp_excl. 1472 */ 1473 /* ARGSUSED */ 1474 static void 1475 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1476 { 1477 ill_t *ill = rq->q_ptr; 1478 ipif_t *ipif; 1479 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1480 1481 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1482 /* 1483 * We do not support recovery of proxy ARP'd interfaces, 1484 * because the system lacks a complete proxy ARP mechanism. 1485 */ 1486 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1487 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1488 continue; 1489 } 1490 1491 /* 1492 * If we have already recovered, then ignore. 1493 */ 1494 mutex_enter(&ill->ill_lock); 1495 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) { 1496 mutex_exit(&ill->ill_lock); 1497 continue; 1498 } 1499 1500 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1501 ill->ill_ipif_dup_count--; 1502 mutex_exit(&ill->ill_lock); 1503 ipif->ipif_was_dup = B_TRUE; 1504 1505 if (ipif_ndp_up(ipif, addr, B_FALSE) != EINPROGRESS) 1506 (void) ipif_up_done_v6(ipif); 1507 } 1508 freeb(mp); 1509 } 1510 1511 /* 1512 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1513 * As long as someone else holds the address, the interface will stay down. 1514 * When that conflict goes away, the interface is brought back up. This is 1515 * done so that accidental shutdowns of addresses aren't made permanent. Your 1516 * server will recover from a failure. 1517 * 1518 * For DHCP and temporary addresses, recovery is not done in the kernel. 1519 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1520 * 1521 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1522 */ 1523 static void 1524 ipif6_dup_recovery(void *arg) 1525 { 1526 ipif_t *ipif = arg; 1527 1528 ipif->ipif_recovery_id = 0; 1529 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1530 return; 1531 1532 /* If the link is down, we'll retry this later */ 1533 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1534 return; 1535 1536 ndp_do_recovery(ipif); 1537 } 1538 1539 /* 1540 * Perform interface recovery by forcing the duplicate interfaces up and 1541 * allowing the system to determine which ones should stay up. 1542 * 1543 * Called both by recovery timer expiry and link-up notification. 1544 */ 1545 void 1546 ndp_do_recovery(ipif_t *ipif) 1547 { 1548 ill_t *ill = ipif->ipif_ill; 1549 mblk_t *mp; 1550 1551 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1552 if (mp == NULL) { 1553 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1554 ipif, MSEC_TO_TICK(ip_dup_recovery)); 1555 } else { 1556 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1557 sizeof (ipif->ipif_v6lcl_addr)); 1558 ill_refhold(ill); 1559 (void) qwriter_ip(NULL, ill, ill->ill_rq, mp, ip_ndp_recover, 1560 CUR_OP, B_FALSE); 1561 } 1562 } 1563 1564 /* 1565 * Find the solicitation in the given message, and extract printable details 1566 * (MAC and IP addresses) from it. 1567 */ 1568 static nd_neighbor_solicit_t * 1569 ip_ndp_find_solicitation(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, char *hbuf, 1570 size_t hlen, char *sbuf, size_t slen, uchar_t **haddr) 1571 { 1572 nd_neighbor_solicit_t *ns; 1573 ip6_t *ip6h; 1574 uchar_t *addr; 1575 int alen; 1576 1577 alen = 0; 1578 ip6h = (ip6_t *)mp->b_rptr; 1579 if (dl_mp == NULL) { 1580 nd_opt_hdr_t *opt; 1581 int nslen; 1582 1583 /* 1584 * If it's from the fast-path, then it can't be a probe 1585 * message, and thus must include the source linkaddr option. 1586 * Extract that here. 1587 */ 1588 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1589 nslen = mp->b_wptr - (uchar_t *)ns; 1590 if ((nslen -= sizeof (*ns)) > 0) { 1591 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), nslen, 1592 ND_OPT_SOURCE_LINKADDR); 1593 if (opt != NULL && 1594 opt->nd_opt_len * 8 - sizeof (*opt) >= 1595 ill->ill_nd_lla_len) { 1596 addr = (uchar_t *)(opt + 1); 1597 alen = ill->ill_nd_lla_len; 1598 } 1599 } 1600 /* 1601 * We cheat a bit here for the sake of printing usable log 1602 * messages in the rare case where the reply we got was unicast 1603 * without a source linkaddr option, and the interface is in 1604 * fastpath mode. (Sigh.) 1605 */ 1606 if (alen == 0 && ill->ill_type == IFT_ETHER && 1607 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1608 struct ether_header *pether; 1609 1610 pether = (struct ether_header *)((char *)ip6h - 1611 sizeof (*pether)); 1612 addr = pether->ether_shost.ether_addr_octet; 1613 alen = ETHERADDRL; 1614 } 1615 } else { 1616 dl_unitdata_ind_t *dlu; 1617 1618 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1619 alen = dlu->dl_src_addr_length; 1620 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1621 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1622 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1623 if (ill->ill_sap_length < 0) { 1624 alen += ill->ill_sap_length; 1625 } else { 1626 addr += ill->ill_sap_length; 1627 alen -= ill->ill_sap_length; 1628 } 1629 } 1630 } 1631 if (alen > 0) { 1632 *haddr = addr; 1633 (void) mac_colon_addr(addr, alen, hbuf, hlen); 1634 } else { 1635 *haddr = NULL; 1636 (void) strcpy(hbuf, "?"); 1637 } 1638 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1639 (void) inet_ntop(AF_INET6, &ns->nd_ns_target, sbuf, slen); 1640 return (ns); 1641 } 1642 1643 /* 1644 * This is for exclusive changes due to NDP duplicate address detection 1645 * failure. 1646 */ 1647 /* ARGSUSED */ 1648 static void 1649 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1650 { 1651 ill_t *ill = rq->q_ptr; 1652 ipif_t *ipif; 1653 char ibuf[LIFNAMSIZ + 10]; /* 10 digits for logical i/f number */ 1654 char hbuf[MAC_STR_LEN]; 1655 char sbuf[INET6_ADDRSTRLEN]; 1656 nd_neighbor_solicit_t *ns; 1657 mblk_t *dl_mp = NULL; 1658 uchar_t *haddr; 1659 1660 if (DB_TYPE(mp) != M_DATA) { 1661 dl_mp = mp; 1662 mp = mp->b_cont; 1663 } 1664 ns = ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, sizeof (hbuf), sbuf, 1665 sizeof (sbuf), &haddr); 1666 if (haddr != NULL && 1667 bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) { 1668 /* 1669 * Ignore conflicts generated by misbehaving switches that just 1670 * reflect our own messages back to us. 1671 */ 1672 goto ignore_conflict; 1673 } 1674 (void) strlcpy(ibuf, ill->ill_name, sizeof (ibuf)); 1675 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1676 1677 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1678 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 1679 &ns->nd_ns_target)) { 1680 continue; 1681 } 1682 1683 /* If it's already marked, then don't do anything. */ 1684 if (ipif->ipif_flags & IPIF_DUPLICATE) 1685 continue; 1686 1687 /* 1688 * If this is a failure during duplicate recovery, then don't 1689 * complain. It may take a long time to recover. 1690 */ 1691 if (!ipif->ipif_was_dup) { 1692 if (ipif->ipif_id != 0) { 1693 (void) snprintf(ibuf + ill->ill_name_length - 1, 1694 sizeof (ibuf) - ill->ill_name_length + 1, 1695 ":%d", ipif->ipif_id); 1696 } 1697 cmn_err(CE_WARN, "%s has duplicate address %s (in " 1698 "use by %s); disabled", ibuf, sbuf, hbuf); 1699 } 1700 mutex_enter(&ill->ill_lock); 1701 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1702 ipif->ipif_flags |= IPIF_DUPLICATE; 1703 ill->ill_ipif_dup_count++; 1704 mutex_exit(&ill->ill_lock); 1705 (void) ipif_down(ipif, NULL, NULL); 1706 ipif_down_tail(ipif); 1707 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1708 ill->ill_net_type == IRE_IF_RESOLVER && 1709 ip_dup_recovery > 0) 1710 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1711 ipif, MSEC_TO_TICK(ip_dup_recovery)); 1712 } 1713 ignore_conflict: 1714 if (dl_mp != NULL) 1715 freeb(dl_mp); 1716 freemsg(mp); 1717 } 1718 1719 /* 1720 * Handle failure by tearing down the ipifs with the specified address. Note 1721 * that tearing down the ipif also means deleting the nce through ipif_down, so 1722 * it's not possible to do recovery by just restarting the nce timer. Instead, 1723 * we start a timer on the ipif. 1724 */ 1725 static void 1726 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1727 { 1728 if ((mp = copymsg(mp)) != NULL) { 1729 if (dl_mp == NULL) 1730 dl_mp = mp; 1731 else if ((dl_mp = copyb(dl_mp)) != NULL) 1732 dl_mp->b_cont = mp; 1733 if (dl_mp == NULL) { 1734 freemsg(mp); 1735 } else { 1736 ill_refhold(ill); 1737 (void) qwriter_ip(NULL, ill, ill->ill_rq, dl_mp, 1738 ip_ndp_excl, CUR_OP, B_FALSE); 1739 } 1740 } 1741 ndp_delete(nce); 1742 } 1743 1744 /* 1745 * Handle a discovered conflict: some other system is advertising that it owns 1746 * one of our IP addresses. We need to defend ourselves, or just shut down the 1747 * interface. 1748 */ 1749 static void 1750 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1751 { 1752 ipif_t *ipif; 1753 uint32_t now; 1754 uint_t maxdefense; 1755 uint_t defs; 1756 1757 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1758 NULL, NULL); 1759 if (ipif == NULL) 1760 return; 1761 /* 1762 * First, figure out if this address is disposable. 1763 */ 1764 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1765 maxdefense = ip_max_temp_defend; 1766 else 1767 maxdefense = ip_max_defend; 1768 1769 /* 1770 * Now figure out how many times we've defended ourselves. Ignore 1771 * defenses that happened long in the past. 1772 */ 1773 now = gethrestime_sec(); 1774 mutex_enter(&nce->nce_lock); 1775 if ((defs = nce->nce_defense_count) > 0 && 1776 now - nce->nce_defense_time > ip_defend_interval) { 1777 nce->nce_defense_count = defs = 0; 1778 } 1779 nce->nce_defense_count++; 1780 nce->nce_defense_time = now; 1781 mutex_exit(&nce->nce_lock); 1782 ipif_refrele(ipif); 1783 1784 /* 1785 * If we've defended ourselves too many times already, then give up and 1786 * tear down the interface(s) using this address. Otherwise, defend by 1787 * sending out an unsolicited Neighbor Advertisement. 1788 */ 1789 if (defs >= maxdefense) { 1790 ip_ndp_failure(ill, mp, dl_mp, nce); 1791 } else { 1792 char hbuf[MAC_STR_LEN]; 1793 char sbuf[INET6_ADDRSTRLEN]; 1794 uchar_t *haddr; 1795 1796 (void) ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, 1797 sizeof (hbuf), sbuf, sizeof (sbuf), &haddr); 1798 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1799 hbuf, sbuf, ill->ill_name); 1800 (void) nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, B_FALSE, 1801 &nce->nce_addr, &ipv6_all_hosts_mcast, 1802 nce_advert_flags(nce)); 1803 } 1804 } 1805 1806 static void 1807 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1808 { 1809 nd_neighbor_solicit_t *ns; 1810 uint32_t hlen = ill->ill_nd_lla_len; 1811 uchar_t *haddr = NULL; 1812 icmp6_t *icmp_nd; 1813 ip6_t *ip6h; 1814 nce_t *our_nce = NULL; 1815 in6_addr_t target; 1816 in6_addr_t src; 1817 int len; 1818 int flag = 0; 1819 nd_opt_hdr_t *opt = NULL; 1820 boolean_t bad_solicit = B_FALSE; 1821 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1822 1823 ip6h = (ip6_t *)mp->b_rptr; 1824 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1825 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1826 src = ip6h->ip6_src; 1827 ns = (nd_neighbor_solicit_t *)icmp_nd; 1828 target = ns->nd_ns_target; 1829 if (IN6_IS_ADDR_MULTICAST(&target)) { 1830 if (ip_debug > 2) { 1831 /* ip1dbg */ 1832 pr_addr_dbg("ndp_input_solicit: Target is" 1833 " multicast! %s\n", AF_INET6, &target); 1834 } 1835 bad_solicit = B_TRUE; 1836 goto done; 1837 } 1838 if (len > sizeof (nd_neighbor_solicit_t)) { 1839 /* Options present */ 1840 opt = (nd_opt_hdr_t *)&ns[1]; 1841 len -= sizeof (nd_neighbor_solicit_t); 1842 if (!ndp_verify_optlen(opt, len)) { 1843 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1844 bad_solicit = B_TRUE; 1845 goto done; 1846 } 1847 } 1848 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1849 /* Check to see if this is a valid DAD solicitation */ 1850 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1851 if (ip_debug > 2) { 1852 /* ip1dbg */ 1853 pr_addr_dbg("ndp_input_solicit: IPv6 " 1854 "Destination is not solicited node " 1855 "multicast %s\n", AF_INET6, 1856 &ip6h->ip6_dst); 1857 } 1858 bad_solicit = B_TRUE; 1859 goto done; 1860 } 1861 } 1862 1863 our_nce = ndp_lookup_v6(ill, &target, B_FALSE); 1864 /* 1865 * If this is a valid Solicitation, a permanent 1866 * entry should exist in the cache 1867 */ 1868 if (our_nce == NULL || 1869 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1870 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1871 "ifname=%s ", ill->ill_name)); 1872 if (ip_debug > 2) { 1873 /* ip1dbg */ 1874 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1875 } 1876 bad_solicit = B_TRUE; 1877 goto done; 1878 } 1879 1880 /* At this point we should have a verified NS per spec */ 1881 if (opt != NULL) { 1882 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1883 if (opt != NULL) { 1884 haddr = (uchar_t *)&opt[1]; 1885 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1886 hlen == 0) { 1887 ip1dbg(("ndp_input_advert: bad SLLA\n")); 1888 bad_solicit = B_TRUE; 1889 goto done; 1890 } 1891 } 1892 } 1893 1894 /* If sending directly to peer, set the unicast flag */ 1895 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 1896 flag |= NDP_UNICAST; 1897 1898 /* 1899 * Create/update the entry for the soliciting node. 1900 * or respond to outstanding queries, don't if 1901 * the source is unspecified address. 1902 */ 1903 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1904 int err; 1905 nce_t *nnce; 1906 1907 ASSERT(ill->ill_isv6); 1908 /* 1909 * Regular solicitations *must* include the Source Link-Layer 1910 * Address option. Ignore messages that do not. 1911 */ 1912 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1913 ip1dbg(("ndp_input_solicit: source link-layer address " 1914 "option missing with a specified source.\n")); 1915 bad_solicit = B_TRUE; 1916 goto done; 1917 } 1918 1919 /* 1920 * This is a regular solicitation. If we're still in the 1921 * process of verifying the address, then don't respond at all 1922 * and don't keep track of the sender. 1923 */ 1924 if (our_nce->nce_state == ND_PROBE) 1925 goto done; 1926 1927 /* 1928 * If the solicitation doesn't have sender hardware address 1929 * (legal for unicast solicitation), then process without 1930 * installing the return NCE. Either we already know it, or 1931 * we'll be forced to look it up when (and if) we reply to the 1932 * packet. 1933 */ 1934 if (haddr == NULL) 1935 goto no_source; 1936 1937 err = ndp_lookup_then_add(ill, 1938 haddr, 1939 &src, /* Soliciting nodes address */ 1940 &ipv6_all_ones, 1941 &ipv6_all_zeros, 1942 0, 1943 0, 1944 ND_STALE, 1945 &nnce, 1946 NULL, 1947 NULL); 1948 switch (err) { 1949 case 0: 1950 /* done with this entry */ 1951 NCE_REFRELE(nnce); 1952 break; 1953 case EEXIST: 1954 /* 1955 * B_FALSE indicates this is not an 1956 * an advertisement. 1957 */ 1958 ndp_process(nnce, haddr, 0, B_FALSE); 1959 NCE_REFRELE(nnce); 1960 break; 1961 default: 1962 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1963 err)); 1964 goto done; 1965 } 1966 no_source: 1967 flag |= NDP_SOLICITED; 1968 } else { 1969 /* 1970 * No source link layer address option should be present in a 1971 * valid DAD request. 1972 */ 1973 if (haddr != NULL) { 1974 ip1dbg(("ndp_input_solicit: source link-layer address " 1975 "option present with an unspecified source.\n")); 1976 bad_solicit = B_TRUE; 1977 goto done; 1978 } 1979 if (our_nce->nce_state == ND_PROBE) { 1980 /* 1981 * Internally looped-back probes won't have DLPI 1982 * attached to them. External ones (which are sent by 1983 * multicast) always will. Just ignore our own 1984 * transmissions. 1985 */ 1986 if (dl_mp != NULL) { 1987 /* 1988 * If someone else is probing our address, then 1989 * we've crossed wires. Declare failure. 1990 */ 1991 ip_ndp_failure(ill, mp, dl_mp, our_nce); 1992 } 1993 goto done; 1994 } 1995 /* 1996 * This is a DAD probe. Multicast the advertisement to the 1997 * all-nodes address. 1998 */ 1999 src = ipv6_all_hosts_mcast; 2000 } 2001 flag |= nce_advert_flags(our_nce); 2002 /* Response to a solicitation */ 2003 (void) nce_xmit(ill, 2004 ND_NEIGHBOR_ADVERT, 2005 ill, /* ill to be used for extracting ill_nd_lla */ 2006 B_TRUE, /* use ill_nd_lla */ 2007 &target, /* Source and target of the advertisement pkt */ 2008 &src, /* IP Destination (source of original pkt) */ 2009 flag); 2010 done: 2011 if (bad_solicit) 2012 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2013 if (our_nce != NULL) 2014 NCE_REFRELE(our_nce); 2015 } 2016 2017 void 2018 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2019 { 2020 nd_neighbor_advert_t *na; 2021 uint32_t hlen = ill->ill_nd_lla_len; 2022 uchar_t *haddr = NULL; 2023 icmp6_t *icmp_nd; 2024 ip6_t *ip6h; 2025 nce_t *dst_nce = NULL; 2026 in6_addr_t target; 2027 nd_opt_hdr_t *opt = NULL; 2028 int len; 2029 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2030 2031 ip6h = (ip6_t *)mp->b_rptr; 2032 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2033 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2034 na = (nd_neighbor_advert_t *)icmp_nd; 2035 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2036 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2037 ip1dbg(("ndp_input_advert: Target is multicast but the " 2038 "solicited flag is not zero\n")); 2039 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2040 return; 2041 } 2042 target = na->nd_na_target; 2043 if (IN6_IS_ADDR_MULTICAST(&target)) { 2044 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2045 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2046 return; 2047 } 2048 if (len > sizeof (nd_neighbor_advert_t)) { 2049 opt = (nd_opt_hdr_t *)&na[1]; 2050 if (!ndp_verify_optlen(opt, 2051 len - sizeof (nd_neighbor_advert_t))) { 2052 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2053 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2054 return; 2055 } 2056 /* At this point we have a verified NA per spec */ 2057 len -= sizeof (nd_neighbor_advert_t); 2058 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2059 if (opt != NULL) { 2060 haddr = (uchar_t *)&opt[1]; 2061 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2062 hlen == 0) { 2063 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2064 BUMP_MIB(mib, 2065 ipv6IfIcmpInBadNeighborAdvertisements); 2066 return; 2067 } 2068 } 2069 } 2070 2071 /* 2072 * If this interface is part of the group look at all the 2073 * ills in the group. 2074 */ 2075 rw_enter(&ill_g_lock, RW_READER); 2076 if (ill->ill_group != NULL) 2077 ill = ill->ill_group->illgrp_ill; 2078 2079 for (; ill != NULL; ill = ill->ill_group_next) { 2080 mutex_enter(&ill->ill_lock); 2081 if (!ILL_CAN_LOOKUP(ill)) { 2082 mutex_exit(&ill->ill_lock); 2083 continue; 2084 } 2085 ill_refhold_locked(ill); 2086 mutex_exit(&ill->ill_lock); 2087 dst_nce = ndp_lookup_v6(ill, &target, B_FALSE); 2088 /* We have to drop the lock since ndp_process calls put* */ 2089 rw_exit(&ill_g_lock); 2090 if (dst_nce != NULL) { 2091 if ((dst_nce->nce_flags & NCE_F_PERMANENT) && 2092 dst_nce->nce_state == ND_PROBE) { 2093 /* 2094 * Someone else sent an advertisement for an 2095 * address that we're trying to configure. 2096 * Tear it down. Note that dl_mp might be NULL 2097 * if we're getting a unicast reply. This 2098 * isn't typically done (multicast is the norm 2099 * in response to a probe), but ip_ndp_failure 2100 * will handle the dl_mp == NULL case as well. 2101 */ 2102 ip_ndp_failure(ill, mp, dl_mp, dst_nce); 2103 } else if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2104 /* 2105 * Someone just announced one of our local 2106 * addresses. If it wasn't us, then this is a 2107 * conflict. Defend the address or shut it 2108 * down. 2109 */ 2110 if (dl_mp != NULL && 2111 (haddr == NULL || 2112 nce_cmp_ll_addr(dst_nce, haddr, 2113 ill->ill_nd_lla_len))) { 2114 ip_ndp_conflict(ill, mp, dl_mp, 2115 dst_nce); 2116 } 2117 } else { 2118 if (na->nd_na_flags_reserved & 2119 ND_NA_FLAG_ROUTER) { 2120 dst_nce->nce_flags |= NCE_F_ISROUTER; 2121 } 2122 /* B_TRUE indicates this an advertisement */ 2123 ndp_process(dst_nce, haddr, 2124 na->nd_na_flags_reserved, B_TRUE); 2125 } 2126 NCE_REFRELE(dst_nce); 2127 } 2128 rw_enter(&ill_g_lock, RW_READER); 2129 ill_refrele(ill); 2130 } 2131 rw_exit(&ill_g_lock); 2132 } 2133 2134 /* 2135 * Process NDP neighbor solicitation/advertisement messages. 2136 * The checksum has already checked o.k before reaching here. 2137 */ 2138 void 2139 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2140 { 2141 icmp6_t *icmp_nd; 2142 ip6_t *ip6h; 2143 int len; 2144 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2145 2146 2147 if (!pullupmsg(mp, -1)) { 2148 ip1dbg(("ndp_input: pullupmsg failed\n")); 2149 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 2150 goto done; 2151 } 2152 ip6h = (ip6_t *)mp->b_rptr; 2153 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2154 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2155 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2156 goto done; 2157 } 2158 /* 2159 * NDP does not accept any extension headers between the 2160 * IP header and the ICMP header since e.g. a routing 2161 * header could be dangerous. 2162 * This assumes that any AH or ESP headers are removed 2163 * by ip prior to passing the packet to ndp_input. 2164 */ 2165 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2166 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2167 ip6h->ip6_nxt)); 2168 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2169 goto done; 2170 } 2171 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2172 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2173 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2174 if (icmp_nd->icmp6_code != 0) { 2175 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2176 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2177 goto done; 2178 } 2179 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2180 /* 2181 * Make sure packet length is large enough for either 2182 * a NS or a NA icmp packet. 2183 */ 2184 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2185 ip1dbg(("ndp_input: packet too short\n")); 2186 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2187 goto done; 2188 } 2189 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2190 ndp_input_solicit(ill, mp, dl_mp); 2191 } else { 2192 ndp_input_advert(ill, mp, dl_mp); 2193 } 2194 done: 2195 freemsg(mp); 2196 } 2197 2198 /* 2199 * nce_xmit is called to form and transmit a ND solicitation or 2200 * advertisement ICMP packet. 2201 * 2202 * If the source address is unspecified and this isn't a probe (used for 2203 * duplicate address detection), an appropriate source address and link layer 2204 * address will be chosen here. The link layer address option is included if 2205 * the source is specified (i.e., all non-probe packets), and omitted (per the 2206 * specification) otherwise. 2207 * 2208 * It returns B_FALSE only if it does a successful put() to the 2209 * corresponding ill's ill_wq otherwise returns B_TRUE. 2210 */ 2211 static boolean_t 2212 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 2213 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 2214 int flag) 2215 { 2216 uint32_t len; 2217 icmp6_t *icmp6; 2218 mblk_t *mp; 2219 ip6_t *ip6h; 2220 nd_opt_hdr_t *opt; 2221 uint_t plen; 2222 ip6i_t *ip6i; 2223 ipif_t *src_ipif = NULL; 2224 uint8_t *hw_addr; 2225 2226 /* 2227 * If we have a unspecified source(sender) address, select a 2228 * proper source address for the solicitation here itself so 2229 * that we can initialize the h/w address correctly. This is 2230 * needed for interface groups as source address can come from 2231 * the whole group and the h/w address initialized from ill will 2232 * be wrong if the source address comes from a different ill. 2233 * 2234 * Note that the NA never comes here with the unspecified source 2235 * address. The following asserts that whenever the source 2236 * address is specified, the haddr also should be specified. 2237 */ 2238 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 2239 2240 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2241 ASSERT(operation != ND_NEIGHBOR_ADVERT); 2242 /* 2243 * Pick a source address for this solicitation, but 2244 * restrict the selection to addresses assigned to the 2245 * output interface (or interface group). We do this 2246 * because the destination will create a neighbor cache 2247 * entry for the source address of this packet, so the 2248 * source address had better be a valid neighbor. 2249 */ 2250 src_ipif = ipif_select_source_v6(ill, target, RESTRICT_TO_ILL, 2251 IPV6_PREFER_SRC_DEFAULT, GLOBAL_ZONEID); 2252 if (src_ipif == NULL) { 2253 char buf[INET6_ADDRSTRLEN]; 2254 2255 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2256 inet_ntop(AF_INET6, (char *)target, buf, 2257 sizeof (buf)))); 2258 return (B_TRUE); 2259 } 2260 sender = &src_ipif->ipif_v6src_addr; 2261 hwaddr_ill = src_ipif->ipif_ill; 2262 } 2263 2264 /* 2265 * Always make sure that the NS/NA packets don't get load 2266 * spread. This is needed so that the probe packets sent 2267 * by the in.mpathd daemon can really go out on the desired 2268 * interface. Probe packets are made to go out on a desired 2269 * interface by including a ip6i with ATTACH_IF flag. As these 2270 * packets indirectly end up sending/receiving NS/NA packets 2271 * (neighbor doing NUD), we have to make sure that NA 2272 * also go out on the same interface. 2273 */ 2274 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7) / 8; 2275 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2276 plen * 8; 2277 mp = allocb(len, BPRI_LO); 2278 if (mp == NULL) { 2279 if (src_ipif != NULL) 2280 ipif_refrele(src_ipif); 2281 return (B_TRUE); 2282 } 2283 bzero((char *)mp->b_rptr, len); 2284 mp->b_wptr = mp->b_rptr + len; 2285 2286 ip6i = (ip6i_t *)mp->b_rptr; 2287 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2288 ip6i->ip6i_nxt = IPPROTO_RAW; 2289 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2290 if (flag & NDP_PROBE) 2291 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2292 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2293 2294 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2295 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2296 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2297 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2298 ip6h->ip6_hops = IPV6_MAX_HOPS; 2299 ip6h->ip6_dst = *target; 2300 icmp6 = (icmp6_t *)&ip6h[1]; 2301 2302 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2303 sizeof (nd_neighbor_advert_t)); 2304 2305 if (operation == ND_NEIGHBOR_SOLICIT) { 2306 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2307 2308 if (!(flag & NDP_PROBE)) 2309 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2310 ip6h->ip6_src = *sender; 2311 ns->nd_ns_target = *target; 2312 if (!(flag & NDP_UNICAST)) { 2313 /* Form multicast address of the target */ 2314 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2315 ip6h->ip6_dst.s6_addr32[3] |= 2316 ns->nd_ns_target.s6_addr32[3]; 2317 } 2318 } else { 2319 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2320 2321 ASSERT(!(flag & NDP_PROBE)); 2322 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2323 ip6h->ip6_src = *sender; 2324 na->nd_na_target = *sender; 2325 if (flag & NDP_ISROUTER) 2326 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2327 if (flag & NDP_SOLICITED) 2328 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2329 if (flag & NDP_ORIDE) 2330 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2331 } 2332 2333 hw_addr = NULL; 2334 if (!(flag & NDP_PROBE)) { 2335 mutex_enter(&hwaddr_ill->ill_lock); 2336 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2337 hwaddr_ill->ill_phys_addr; 2338 if (hw_addr != NULL) { 2339 /* Fill in link layer address and option len */ 2340 opt->nd_opt_len = (uint8_t)plen; 2341 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2342 } 2343 mutex_exit(&hwaddr_ill->ill_lock); 2344 } 2345 if (hw_addr == NULL) { 2346 /* If there's no link layer address option, then strip it. */ 2347 len -= plen * 8; 2348 mp->b_wptr = mp->b_rptr + len; 2349 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2350 } 2351 2352 icmp6->icmp6_type = (uint8_t)operation; 2353 icmp6->icmp6_code = 0; 2354 /* 2355 * Prepare for checksum by putting icmp length in the icmp 2356 * checksum field. The checksum is calculated in ip_wput_v6. 2357 */ 2358 icmp6->icmp6_cksum = ip6h->ip6_plen; 2359 2360 if (src_ipif != NULL) 2361 ipif_refrele(src_ipif); 2362 if (canput(ill->ill_wq)) { 2363 put(ill->ill_wq, mp); 2364 return (B_FALSE); 2365 } 2366 freemsg(mp); 2367 return (B_TRUE); 2368 } 2369 2370 /* 2371 * Make a link layer address (does not include the SAP) from an nce. 2372 * To form the link layer address, use the last four bytes of ipv6 2373 * address passed in and the fixed offset stored in nce. 2374 */ 2375 static void 2376 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2377 { 2378 uchar_t *mask, *to; 2379 ill_t *ill = nce->nce_ill; 2380 int len; 2381 2382 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2383 return; 2384 ASSERT(nce->nce_res_mp != NULL); 2385 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2386 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2387 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2388 ASSERT(addr != NULL); 2389 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2390 addrpos, ill->ill_nd_lla_len); 2391 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2392 IPV6_ADDR_LEN); 2393 mask = (uchar_t *)&nce->nce_extract_mask; 2394 mask += (IPV6_ADDR_LEN - len); 2395 addr += (IPV6_ADDR_LEN - len); 2396 to = addrpos + nce->nce_ll_extract_start; 2397 while (len-- > 0) 2398 *to++ |= *mask++ & *addr++; 2399 } 2400 2401 /* 2402 * Pass a cache report back out via NDD. 2403 */ 2404 /* ARGSUSED */ 2405 int 2406 ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 2407 { 2408 (void) mi_mpprintf(mp, "ifname hardware addr flags" 2409 " proto addr/mask"); 2410 ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp); 2411 return (0); 2412 } 2413 2414 /* 2415 * Add a single line to the NDP Cache Entry Report. 2416 */ 2417 static void 2418 nce_report1(nce_t *nce, uchar_t *mp_arg) 2419 { 2420 ill_t *ill = nce->nce_ill; 2421 char local_buf[INET6_ADDRSTRLEN]; 2422 uchar_t flags_buf[10]; 2423 uint32_t flags = nce->nce_flags; 2424 mblk_t *mp = (mblk_t *)mp_arg; 2425 uchar_t *h; 2426 uchar_t *m = flags_buf; 2427 in6_addr_t v6addr; 2428 2429 /* 2430 * Lock the nce to protect nce_res_mp from being changed 2431 * if an external resolver address resolution completes 2432 * while nce_res_mp is being accessed here. 2433 * 2434 * Deal with all address formats, not just Ethernet-specific 2435 * In addition, make sure that the mblk has enough space 2436 * before writing to it. If is doesn't, allocate a new one. 2437 */ 2438 if (nce->nce_ipversion == IPV4_VERSION) 2439 /* Don't include v4 nce_ts in NDP cache entry report */ 2440 return; 2441 2442 ASSERT(ill != NULL); 2443 v6addr = nce->nce_mask; 2444 if (flags & NCE_F_PERMANENT) 2445 *m++ = 'P'; 2446 if (flags & NCE_F_ISROUTER) 2447 *m++ = 'R'; 2448 if (flags & NCE_F_MAPPING) 2449 *m++ = 'M'; 2450 *m = '\0'; 2451 2452 if (ill->ill_net_type == IRE_IF_RESOLVER) { 2453 size_t addrlen; 2454 char *addr_buf; 2455 dl_unitdata_req_t *dl; 2456 2457 mutex_enter(&nce->nce_lock); 2458 h = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2459 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 2460 if (ill->ill_flags & ILLF_XRESOLV) 2461 addrlen = (3 * (dl->dl_dest_addr_length)); 2462 else 2463 addrlen = (3 * (ill->ill_nd_lla_len)); 2464 if (addrlen <= 0) { 2465 mutex_exit(&nce->nce_lock); 2466 (void) mi_mpprintf(mp, 2467 "%8s %9s %5s %s/%d", 2468 ill->ill_name, 2469 "None", 2470 (uchar_t *)&flags_buf, 2471 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2472 (char *)local_buf, sizeof (local_buf)), 2473 ip_mask_to_plen_v6(&v6addr)); 2474 } else { 2475 /* 2476 * Convert the hardware/lla address to ascii 2477 */ 2478 addr_buf = kmem_zalloc(addrlen, KM_NOSLEEP); 2479 if (addr_buf == NULL) { 2480 mutex_exit(&nce->nce_lock); 2481 return; 2482 } 2483 (void) mac_colon_addr((uint8_t *)h, 2484 (ill->ill_flags & ILLF_XRESOLV) ? 2485 dl->dl_dest_addr_length : ill->ill_nd_lla_len, 2486 addr_buf, addrlen); 2487 mutex_exit(&nce->nce_lock); 2488 (void) mi_mpprintf(mp, "%8s %17s %5s %s/%d", 2489 ill->ill_name, addr_buf, (uchar_t *)&flags_buf, 2490 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2491 (char *)local_buf, sizeof (local_buf)), 2492 ip_mask_to_plen_v6(&v6addr)); 2493 kmem_free(addr_buf, addrlen); 2494 } 2495 } else { 2496 (void) mi_mpprintf(mp, 2497 "%8s %9s %5s %s/%d", 2498 ill->ill_name, 2499 "None", 2500 (uchar_t *)&flags_buf, 2501 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2502 (char *)local_buf, sizeof (local_buf)), 2503 ip_mask_to_plen_v6(&v6addr)); 2504 } 2505 } 2506 2507 mblk_t * 2508 nce_udreq_alloc(ill_t *ill) 2509 { 2510 mblk_t *template_mp = NULL; 2511 dl_unitdata_req_t *dlur; 2512 int sap_length; 2513 2514 ASSERT(ill->ill_isv6); 2515 2516 sap_length = ill->ill_sap_length; 2517 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2518 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2519 if (template_mp == NULL) 2520 return (NULL); 2521 2522 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2523 dlur->dl_priority.dl_min = 0; 2524 dlur->dl_priority.dl_max = 0; 2525 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2526 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2527 2528 /* Copy in the SAP value. */ 2529 NCE_LL_SAP_COPY(ill, template_mp); 2530 2531 return (template_mp); 2532 } 2533 2534 /* 2535 * NDP retransmit timer. 2536 * This timer goes off when: 2537 * a. It is time to retransmit NS for resolver. 2538 * b. It is time to send reachability probes. 2539 */ 2540 void 2541 ndp_timer(void *arg) 2542 { 2543 nce_t *nce = arg; 2544 ill_t *ill = nce->nce_ill; 2545 uint32_t ms; 2546 char addrbuf[INET6_ADDRSTRLEN]; 2547 mblk_t *mp; 2548 boolean_t dropped = B_FALSE; 2549 2550 /* 2551 * The timer has to be cancelled by ndp_delete before doing the final 2552 * refrele. So the NCE is guaranteed to exist when the timer runs 2553 * until it clears the timeout_id. Before clearing the timeout_id 2554 * bump up the refcnt so that we can continue to use the nce 2555 */ 2556 ASSERT(nce != NULL); 2557 2558 /* 2559 * Grab the ill_g_lock now itself to avoid lock order problems. 2560 * nce_solicit needs ill_g_lock to be able to traverse ills 2561 */ 2562 rw_enter(&ill_g_lock, RW_READER); 2563 mutex_enter(&nce->nce_lock); 2564 NCE_REFHOLD_LOCKED(nce); 2565 nce->nce_timeout_id = 0; 2566 2567 /* 2568 * Check the reachability state first. 2569 */ 2570 switch (nce->nce_state) { 2571 case ND_DELAY: 2572 rw_exit(&ill_g_lock); 2573 nce->nce_state = ND_PROBE; 2574 mutex_exit(&nce->nce_lock); 2575 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2576 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2577 if (ip_debug > 3) { 2578 /* ip2dbg */ 2579 pr_addr_dbg("ndp_timer: state for %s changed " 2580 "to PROBE\n", AF_INET6, &nce->nce_addr); 2581 } 2582 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2583 NCE_REFRELE(nce); 2584 return; 2585 case ND_PROBE: 2586 /* must be retransmit timer */ 2587 rw_exit(&ill_g_lock); 2588 nce->nce_pcnt--; 2589 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2590 nce->nce_pcnt >= -1); 2591 if (nce->nce_pcnt > 0) { 2592 /* 2593 * As per RFC2461, the nce gets deleted after 2594 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2595 * Note that the first unicast solicitation is sent 2596 * during the DELAY state. 2597 */ 2598 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2599 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2600 addrbuf, sizeof (addrbuf)))); 2601 mutex_exit(&nce->nce_lock); 2602 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, 2603 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, 2604 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2605 NDP_UNICAST); 2606 if (dropped) { 2607 mutex_enter(&nce->nce_lock); 2608 nce->nce_pcnt++; 2609 mutex_exit(&nce->nce_lock); 2610 } 2611 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2612 } else if (nce->nce_pcnt < 0) { 2613 /* No hope, delete the nce */ 2614 nce->nce_state = ND_UNREACHABLE; 2615 mutex_exit(&nce->nce_lock); 2616 if (ip_debug > 2) { 2617 /* ip1dbg */ 2618 pr_addr_dbg("ndp_timer: Delete IRE for" 2619 " dst %s\n", AF_INET6, &nce->nce_addr); 2620 } 2621 ndp_delete(nce); 2622 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2623 /* Wait RetransTimer, before deleting the entry */ 2624 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2625 nce->nce_pcnt, inet_ntop(AF_INET6, 2626 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2627 mutex_exit(&nce->nce_lock); 2628 /* Wait one interval before killing */ 2629 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2630 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2631 ipif_t *ipif; 2632 2633 /* 2634 * We're done probing, and we can now declare this 2635 * address to be usable. Let IP know that it's ok to 2636 * use. 2637 */ 2638 nce->nce_state = ND_REACHABLE; 2639 mutex_exit(&nce->nce_lock); 2640 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, 2641 ALL_ZONES, NULL, NULL, NULL, NULL); 2642 if (ipif != NULL) { 2643 if (ipif->ipif_was_dup) { 2644 char ibuf[LIFNAMSIZ + 10]; 2645 char sbuf[INET6_ADDRSTRLEN]; 2646 2647 ipif->ipif_was_dup = B_FALSE; 2648 (void) strlcpy(ibuf, ill->ill_name, 2649 sizeof (ibuf)); 2650 (void) inet_ntop(AF_INET6, 2651 &ipif->ipif_v6lcl_addr, 2652 sbuf, sizeof (sbuf)); 2653 if (ipif->ipif_id != 0) { 2654 (void) snprintf(ibuf + 2655 ill->ill_name_length - 1, 2656 sizeof (ibuf) - 2657 ill->ill_name_length + 1, 2658 ":%d", ipif->ipif_id); 2659 } 2660 cmn_err(CE_NOTE, "recovered address " 2661 "%s on %s", sbuf, ibuf); 2662 } 2663 if ((ipif->ipif_flags & IPIF_UP) && 2664 !ipif->ipif_addr_ready) { 2665 ip_rts_ifmsg(ipif); 2666 ip_rts_newaddrmsg(RTM_ADD, 0, ipif); 2667 sctp_update_ipif(ipif, SCTP_IPIF_UP); 2668 } 2669 ipif->ipif_addr_ready = 1; 2670 ipif_refrele(ipif); 2671 } 2672 /* Begin defending our new address */ 2673 nce->nce_unsolicit_count = 0; 2674 dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, 2675 B_FALSE, &nce->nce_addr, &ipv6_all_hosts_mcast, 2676 nce_advert_flags(nce)); 2677 if (dropped) { 2678 nce->nce_unsolicit_count = 1; 2679 NDP_RESTART_TIMER(nce, 2680 ip_ndp_unsolicit_interval); 2681 } else if (ip_ndp_defense_interval != 0) { 2682 NDP_RESTART_TIMER(nce, ip_ndp_defense_interval); 2683 } 2684 } else { 2685 /* 2686 * This is an address we're probing to be our own, but 2687 * the ill is down. Wait until it comes back before 2688 * doing anything, but switch to reachable state so 2689 * that the restart will work. 2690 */ 2691 nce->nce_state = ND_REACHABLE; 2692 mutex_exit(&nce->nce_lock); 2693 } 2694 NCE_REFRELE(nce); 2695 return; 2696 case ND_INCOMPLETE: 2697 /* 2698 * Must be resolvers retransmit timer. 2699 */ 2700 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2701 ip6i_t *ip6i; 2702 ip6_t *ip6h; 2703 mblk_t *data_mp; 2704 2705 /* 2706 * Walk the list of packets queued, and see if there 2707 * are any multipathing probe packets. Such packets 2708 * are always queued at the head. Since this is a 2709 * retransmit timer firing, mark such packets as 2710 * delayed in ND resolution. This info will be used 2711 * in ip_wput_v6(). Multipathing probe packets will 2712 * always have an ip6i_t. Once we hit a packet without 2713 * it, we can break out of this loop. 2714 */ 2715 if (mp->b_datap->db_type == M_CTL) 2716 data_mp = mp->b_cont; 2717 else 2718 data_mp = mp; 2719 2720 ip6h = (ip6_t *)data_mp->b_rptr; 2721 if (ip6h->ip6_nxt != IPPROTO_RAW) 2722 break; 2723 2724 /* 2725 * This message should have been pulled up already in 2726 * ip_wput_v6. We can't do pullups here because the 2727 * b_next/b_prev is non-NULL. 2728 */ 2729 ip6i = (ip6i_t *)ip6h; 2730 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2731 sizeof (ip6i_t) + IPV6_HDR_LEN); 2732 2733 /* Mark this packet as delayed due to ND resolution */ 2734 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2735 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2736 } 2737 if (nce->nce_qd_mp != NULL) { 2738 ms = nce_solicit(nce, NULL); 2739 rw_exit(&ill_g_lock); 2740 if (ms == 0) { 2741 if (nce->nce_state != ND_REACHABLE) { 2742 mutex_exit(&nce->nce_lock); 2743 nce_resolv_failed(nce); 2744 ndp_delete(nce); 2745 } else { 2746 mutex_exit(&nce->nce_lock); 2747 } 2748 } else { 2749 mutex_exit(&nce->nce_lock); 2750 NDP_RESTART_TIMER(nce, (clock_t)ms); 2751 } 2752 NCE_REFRELE(nce); 2753 return; 2754 } 2755 mutex_exit(&nce->nce_lock); 2756 rw_exit(&ill_g_lock); 2757 NCE_REFRELE(nce); 2758 break; 2759 case ND_REACHABLE : 2760 rw_exit(&ill_g_lock); 2761 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2762 nce->nce_unsolicit_count != 0) || 2763 ((nce->nce_flags & NCE_F_PERMANENT) && 2764 ip_ndp_defense_interval != 0)) { 2765 if (nce->nce_unsolicit_count > 0) 2766 nce->nce_unsolicit_count--; 2767 mutex_exit(&nce->nce_lock); 2768 dropped = nce_xmit(ill, 2769 ND_NEIGHBOR_ADVERT, 2770 ill, /* ill to be used for hw addr */ 2771 B_FALSE, /* use ill_phys_addr */ 2772 &nce->nce_addr, 2773 &ipv6_all_hosts_mcast, 2774 nce_advert_flags(nce)); 2775 if (dropped) { 2776 mutex_enter(&nce->nce_lock); 2777 nce->nce_unsolicit_count++; 2778 mutex_exit(&nce->nce_lock); 2779 } 2780 if (nce->nce_unsolicit_count != 0) { 2781 NDP_RESTART_TIMER(nce, 2782 ip_ndp_unsolicit_interval); 2783 } else { 2784 NDP_RESTART_TIMER(nce, 2785 ip_ndp_defense_interval); 2786 } 2787 } else { 2788 mutex_exit(&nce->nce_lock); 2789 } 2790 NCE_REFRELE(nce); 2791 break; 2792 default: 2793 rw_exit(&ill_g_lock); 2794 mutex_exit(&nce->nce_lock); 2795 NCE_REFRELE(nce); 2796 break; 2797 } 2798 } 2799 2800 /* 2801 * Set a link layer address from the ll_addr passed in. 2802 * Copy SAP from ill. 2803 */ 2804 static void 2805 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2806 { 2807 ill_t *ill = nce->nce_ill; 2808 uchar_t *woffset; 2809 2810 ASSERT(ll_addr != NULL); 2811 /* Always called before fast_path_probe */ 2812 ASSERT(nce->nce_fp_mp == NULL); 2813 if (ill->ill_sap_length != 0) { 2814 /* 2815 * Copy the SAP type specified in the 2816 * request into the xmit template. 2817 */ 2818 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2819 } 2820 if (ill->ill_phys_addr_length > 0) { 2821 /* 2822 * The bcopy() below used to be called for the physical address 2823 * length rather than the link layer address length. For 2824 * ethernet and many other media, the phys_addr and lla are 2825 * identical. 2826 * However, with xresolv interfaces being introduced, the 2827 * phys_addr and lla are no longer the same, and the physical 2828 * address may not have any useful meaning, so we use the lla 2829 * for IPv6 address resolution and destination addressing. 2830 * 2831 * For PPP or other interfaces with a zero length 2832 * physical address, don't do anything here. 2833 * The bcopy() with a zero phys_addr length was previously 2834 * a no-op for interfaces with a zero-length physical address. 2835 * Using the lla for them would change the way they operate. 2836 * Doing nothing in such cases preserves expected behavior. 2837 */ 2838 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2839 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2840 } 2841 } 2842 2843 static boolean_t 2844 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2845 { 2846 ill_t *ill = nce->nce_ill; 2847 uchar_t *ll_offset; 2848 2849 ASSERT(nce->nce_res_mp != NULL); 2850 if (ll_addr == NULL) 2851 return (B_FALSE); 2852 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2853 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2854 return (B_TRUE); 2855 return (B_FALSE); 2856 } 2857 2858 /* 2859 * Updates the link layer address or the reachability state of 2860 * a cache entry. Reset probe counter if needed. 2861 */ 2862 static void 2863 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2864 { 2865 ill_t *ill = nce->nce_ill; 2866 boolean_t need_stop_timer = B_FALSE; 2867 boolean_t need_fastpath_update = B_FALSE; 2868 2869 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2870 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2871 /* 2872 * If this interface does not do NUD, there is no point 2873 * in allowing an update to the cache entry. Although 2874 * we will respond to NS. 2875 * The only time we accept an update for a resolver when 2876 * NUD is turned off is when it has just been created. 2877 * Non-Resolvers will always be created as REACHABLE. 2878 */ 2879 if (new_state != ND_UNCHANGED) { 2880 if ((nce->nce_flags & NCE_F_NONUD) && 2881 (nce->nce_state != ND_INCOMPLETE)) 2882 return; 2883 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2884 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2885 need_stop_timer = B_TRUE; 2886 if (new_state == ND_REACHABLE) 2887 nce->nce_last = TICK_TO_MSEC(lbolt64); 2888 else { 2889 /* We force NUD in this case */ 2890 nce->nce_last = 0; 2891 } 2892 nce->nce_state = new_state; 2893 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2894 } 2895 /* 2896 * In case of fast path we need to free the the fastpath 2897 * M_DATA and do another probe. Otherwise we can just 2898 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2899 * whatever packets that happens to be transmitting at the time. 2900 */ 2901 if (new_ll_addr != NULL) { 2902 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2903 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2904 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2905 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2906 if (nce->nce_fp_mp != NULL) { 2907 freemsg(nce->nce_fp_mp); 2908 nce->nce_fp_mp = NULL; 2909 } 2910 need_fastpath_update = B_TRUE; 2911 } 2912 mutex_exit(&nce->nce_lock); 2913 if (need_stop_timer) { 2914 (void) untimeout(nce->nce_timeout_id); 2915 nce->nce_timeout_id = 0; 2916 } 2917 if (need_fastpath_update) 2918 nce_fastpath(nce); 2919 mutex_enter(&nce->nce_lock); 2920 } 2921 2922 void 2923 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2924 { 2925 uint_t count = 0; 2926 mblk_t **mpp; 2927 2928 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2929 2930 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2931 mpp = &(*mpp)->b_next) { 2932 if (++count > 2933 nce->nce_ill->ill_max_buf) { 2934 mblk_t *tmp = nce->nce_qd_mp->b_next; 2935 2936 nce->nce_qd_mp->b_next = NULL; 2937 nce->nce_qd_mp->b_prev = NULL; 2938 freemsg(nce->nce_qd_mp); 2939 nce->nce_qd_mp = tmp; 2940 } 2941 } 2942 /* put this on the list */ 2943 if (head_insert) { 2944 mp->b_next = nce->nce_qd_mp; 2945 nce->nce_qd_mp = mp; 2946 } else { 2947 *mpp = mp; 2948 } 2949 } 2950 2951 static void 2952 nce_queue_mp(nce_t *nce, mblk_t *mp) 2953 { 2954 boolean_t head_insert = B_FALSE; 2955 ip6_t *ip6h; 2956 ip6i_t *ip6i; 2957 mblk_t *data_mp; 2958 2959 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2960 2961 if (mp->b_datap->db_type == M_CTL) 2962 data_mp = mp->b_cont; 2963 else 2964 data_mp = mp; 2965 ip6h = (ip6_t *)data_mp->b_rptr; 2966 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2967 /* 2968 * This message should have been pulled up already in 2969 * ip_wput_v6. We can't do pullups here because the message 2970 * could be from the nce_qd_mp which could have b_next/b_prev 2971 * non-NULL. 2972 */ 2973 ip6i = (ip6i_t *)ip6h; 2974 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2975 sizeof (ip6i_t) + IPV6_HDR_LEN); 2976 /* 2977 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 2978 * This has 2 aspects mentioned below. 2979 * 1. Perform head insertion in the nce_qd_mp for these packets. 2980 * This ensures that next retransmit of ND solicitation 2981 * will use the interface specified by the probe packet, 2982 * for both NS and NA. This corresponds to the src address 2983 * in the IPv6 packet. If we insert at tail, we will be 2984 * depending on the packet at the head for successful 2985 * ND resolution. This is not reliable, because the interface 2986 * on which the NA arrives could be different from the interface 2987 * on which the NS was sent, and if the receiving interface is 2988 * failed, it will appear that the sending interface is also 2989 * failed, causing in.mpathd to misdiagnose this as link 2990 * failure. 2991 * 2. Drop the original packet, if the ND resolution did not 2992 * succeed in the first attempt. However we will create the 2993 * nce and the ire, as soon as the ND resolution succeeds. 2994 * We don't gain anything by queueing multiple probe packets 2995 * and sending them back-to-back once resolution succeeds. 2996 * It is sufficient to send just 1 packet after ND resolution 2997 * succeeds. Since mpathd is sending down probe packets at a 2998 * constant rate, we don't need to send the queued packet. We 2999 * need to queue it only for NDP resolution. The benefit of 3000 * dropping the probe packets that were delayed in ND 3001 * resolution, is that in.mpathd will not see inflated 3002 * RTT. If the ND resolution does not succeed within 3003 * in.mpathd's failure detection time, mpathd may detect 3004 * a failure, and it does not matter whether the packet 3005 * was queued or dropped. 3006 */ 3007 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 3008 head_insert = B_TRUE; 3009 } 3010 3011 nce_queue_mp_common(nce, mp, head_insert); 3012 } 3013 3014 /* 3015 * Called when address resolution failed due to a timeout. 3016 * Send an ICMP unreachable in response to all queued packets. 3017 */ 3018 void 3019 nce_resolv_failed(nce_t *nce) 3020 { 3021 mblk_t *mp, *nxt_mp, *first_mp; 3022 char buf[INET6_ADDRSTRLEN]; 3023 ip6_t *ip6h; 3024 zoneid_t zoneid = GLOBAL_ZONEID; 3025 3026 ip1dbg(("nce_resolv_failed: dst %s\n", 3027 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 3028 mutex_enter(&nce->nce_lock); 3029 mp = nce->nce_qd_mp; 3030 nce->nce_qd_mp = NULL; 3031 mutex_exit(&nce->nce_lock); 3032 while (mp != NULL) { 3033 nxt_mp = mp->b_next; 3034 mp->b_next = NULL; 3035 mp->b_prev = NULL; 3036 3037 first_mp = mp; 3038 if (mp->b_datap->db_type == M_CTL) { 3039 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 3040 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3041 zoneid = io->ipsec_out_zoneid; 3042 ASSERT(zoneid != ALL_ZONES); 3043 mp = mp->b_cont; 3044 } 3045 3046 ip6h = (ip6_t *)mp->b_rptr; 3047 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3048 ip6i_t *ip6i; 3049 /* 3050 * This message should have been pulled up already 3051 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 3052 * the header is pulled up. 3053 */ 3054 ip6i = (ip6i_t *)ip6h; 3055 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 3056 sizeof (ip6i_t) + IPV6_HDR_LEN); 3057 mp->b_rptr += sizeof (ip6i_t); 3058 } 3059 /* 3060 * Ignore failure since icmp_unreachable_v6 will silently 3061 * drop packets with an unspecified source address. 3062 */ 3063 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid); 3064 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 3065 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid); 3066 mp = nxt_mp; 3067 } 3068 } 3069 3070 /* 3071 * Called by SIOCSNDP* ioctl to add/change an nce entry 3072 * and the corresponding attributes. 3073 * Disallow states other than ND_REACHABLE or ND_STALE. 3074 */ 3075 int 3076 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 3077 { 3078 sin6_t *sin6; 3079 in6_addr_t *addr; 3080 nce_t *nce; 3081 int err; 3082 uint16_t new_flags = 0; 3083 uint16_t old_flags = 0; 3084 int inflags = lnr->lnr_flags; 3085 3086 ASSERT(ill->ill_isv6); 3087 if ((lnr->lnr_state_create != ND_REACHABLE) && 3088 (lnr->lnr_state_create != ND_STALE)) 3089 return (EINVAL); 3090 3091 sin6 = (sin6_t *)&lnr->lnr_addr; 3092 addr = &sin6->sin6_addr; 3093 3094 mutex_enter(&ndp6.ndp_g_lock); 3095 /* We know it can not be mapping so just look in the hash table */ 3096 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); 3097 nce = nce_lookup_addr(ill, addr, nce); 3098 if (nce != NULL) 3099 new_flags = nce->nce_flags; 3100 3101 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 3102 case NDF_ISROUTER_ON: 3103 new_flags |= NCE_F_ISROUTER; 3104 break; 3105 case NDF_ISROUTER_OFF: 3106 new_flags &= ~NCE_F_ISROUTER; 3107 break; 3108 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3109 mutex_exit(&ndp6.ndp_g_lock); 3110 if (nce != NULL) 3111 NCE_REFRELE(nce); 3112 return (EINVAL); 3113 } 3114 3115 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3116 case NDF_ANYCAST_ON: 3117 new_flags |= NCE_F_ANYCAST; 3118 break; 3119 case NDF_ANYCAST_OFF: 3120 new_flags &= ~NCE_F_ANYCAST; 3121 break; 3122 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3123 mutex_exit(&ndp6.ndp_g_lock); 3124 if (nce != NULL) 3125 NCE_REFRELE(nce); 3126 return (EINVAL); 3127 } 3128 3129 switch (inflags & (NDF_PROXY_ON|NDF_PROXY_OFF)) { 3130 case NDF_PROXY_ON: 3131 new_flags |= NCE_F_PROXY; 3132 break; 3133 case NDF_PROXY_OFF: 3134 new_flags &= ~NCE_F_PROXY; 3135 break; 3136 case (NDF_PROXY_OFF|NDF_PROXY_ON): 3137 mutex_exit(&ndp6.ndp_g_lock); 3138 if (nce != NULL) 3139 NCE_REFRELE(nce); 3140 return (EINVAL); 3141 } 3142 3143 if (nce == NULL) { 3144 err = ndp_add(ill, 3145 (uchar_t *)lnr->lnr_hdw_addr, 3146 addr, 3147 &ipv6_all_ones, 3148 &ipv6_all_zeros, 3149 0, 3150 new_flags, 3151 lnr->lnr_state_create, 3152 &nce, 3153 NULL, 3154 NULL); 3155 if (err != 0) { 3156 mutex_exit(&ndp6.ndp_g_lock); 3157 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3158 return (err); 3159 } 3160 } 3161 old_flags = nce->nce_flags; 3162 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3163 /* 3164 * Router turned to host, delete all ires. 3165 * XXX Just delete the entry, but we need to add too. 3166 */ 3167 nce->nce_flags &= ~NCE_F_ISROUTER; 3168 mutex_exit(&ndp6.ndp_g_lock); 3169 ndp_delete(nce); 3170 NCE_REFRELE(nce); 3171 return (0); 3172 } 3173 mutex_exit(&ndp6.ndp_g_lock); 3174 3175 mutex_enter(&nce->nce_lock); 3176 nce->nce_flags = new_flags; 3177 mutex_exit(&nce->nce_lock); 3178 /* 3179 * Note that we ignore the state at this point, which 3180 * should be either STALE or REACHABLE. Instead we let 3181 * the link layer address passed in to determine the state 3182 * much like incoming packets. 3183 */ 3184 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3185 NCE_REFRELE(nce); 3186 return (0); 3187 } 3188 3189 /* 3190 * If the device driver supports it, we make nce_fp_mp to have 3191 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3192 * The caller insures there is hold on nce for this function. 3193 * Note that since ill_fastpath_probe() copies the mblk there is 3194 * no need for the hold beyond this function. 3195 */ 3196 static void 3197 nce_fastpath(nce_t *nce) 3198 { 3199 ill_t *ill = nce->nce_ill; 3200 int res; 3201 3202 ASSERT(ill != NULL); 3203 if (nce->nce_fp_mp != NULL) { 3204 /* Already contains fastpath info */ 3205 return; 3206 } 3207 if (nce->nce_res_mp != NULL) { 3208 nce_fastpath_list_add(nce); 3209 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3210 /* 3211 * EAGAIN is an indication of a transient error 3212 * i.e. allocation failure etc. leave the nce in the list it 3213 * will be updated when another probe happens for another ire 3214 * if not it will be taken out of the list when the ire is 3215 * deleted. 3216 */ 3217 3218 if (res != 0 && res != EAGAIN) 3219 nce_fastpath_list_delete(nce); 3220 } 3221 } 3222 3223 /* 3224 * Drain the list of nce's waiting for fastpath response. 3225 */ 3226 void 3227 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3228 void *arg) 3229 { 3230 3231 nce_t *next_nce; 3232 nce_t *current_nce; 3233 nce_t *first_nce; 3234 nce_t *prev_nce = NULL; 3235 3236 ASSERT(ill != NULL && ill->ill_isv6); 3237 3238 mutex_enter(&ill->ill_lock); 3239 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3240 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3241 next_nce = current_nce->nce_fastpath; 3242 /* 3243 * Take it off the list if we're flushing, or if the callback 3244 * routine tells us to do so. Otherwise, leave the nce in the 3245 * fastpath list to handle any pending response from the lower 3246 * layer. We can't drain the list when the callback routine 3247 * comparison failed, because the response is asynchronous in 3248 * nature, and may not arrive in the same order as the list 3249 * insertion. 3250 */ 3251 if (func == NULL || func(current_nce, arg)) { 3252 current_nce->nce_fastpath = NULL; 3253 if (current_nce == first_nce) 3254 ill->ill_fastpath_list = first_nce = next_nce; 3255 else 3256 prev_nce->nce_fastpath = next_nce; 3257 } else { 3258 /* previous element that is still in the list */ 3259 prev_nce = current_nce; 3260 } 3261 current_nce = next_nce; 3262 } 3263 mutex_exit(&ill->ill_lock); 3264 } 3265 3266 /* 3267 * Add nce to the nce fastpath list. 3268 */ 3269 void 3270 nce_fastpath_list_add(nce_t *nce) 3271 { 3272 ill_t *ill; 3273 3274 ill = nce->nce_ill; 3275 ASSERT(ill != NULL && ill->ill_isv6); 3276 3277 mutex_enter(&ill->ill_lock); 3278 mutex_enter(&nce->nce_lock); 3279 3280 /* 3281 * if nce has not been deleted and 3282 * is not already in the list add it. 3283 */ 3284 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3285 (nce->nce_fastpath == NULL)) { 3286 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3287 ill->ill_fastpath_list = nce; 3288 } 3289 3290 mutex_exit(&nce->nce_lock); 3291 mutex_exit(&ill->ill_lock); 3292 } 3293 3294 /* 3295 * remove nce from the nce fastpath list. 3296 */ 3297 void 3298 nce_fastpath_list_delete(nce_t *nce) 3299 { 3300 nce_t *nce_ptr; 3301 3302 ill_t *ill; 3303 3304 ill = nce->nce_ill; 3305 ASSERT(ill != NULL); 3306 if (!ill->ill_isv6) { 3307 /* 3308 * v4 nce_t's do not have nce_fastpath set. 3309 */ 3310 return; 3311 } 3312 3313 mutex_enter(&ill->ill_lock); 3314 if (nce->nce_fastpath == NULL) 3315 goto done; 3316 3317 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3318 3319 if (ill->ill_fastpath_list == nce) { 3320 ill->ill_fastpath_list = nce->nce_fastpath; 3321 } else { 3322 nce_ptr = ill->ill_fastpath_list; 3323 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3324 if (nce_ptr->nce_fastpath == nce) { 3325 nce_ptr->nce_fastpath = nce->nce_fastpath; 3326 break; 3327 } 3328 nce_ptr = nce_ptr->nce_fastpath; 3329 } 3330 } 3331 3332 nce->nce_fastpath = NULL; 3333 done: 3334 mutex_exit(&ill->ill_lock); 3335 } 3336 3337 /* 3338 * Update all NCE's that are not in fastpath mode and 3339 * have an nce_fp_mp that matches mp. mp->b_cont contains 3340 * the fastpath header. 3341 * 3342 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3343 */ 3344 boolean_t 3345 ndp_fastpath_update(nce_t *nce, void *arg) 3346 { 3347 mblk_t *mp, *fp_mp; 3348 uchar_t *mp_rptr, *ud_mp_rptr; 3349 mblk_t *ud_mp = nce->nce_res_mp; 3350 ptrdiff_t cmplen; 3351 3352 if (nce->nce_flags & NCE_F_MAPPING) 3353 return (B_TRUE); 3354 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3355 return (B_TRUE); 3356 3357 ip2dbg(("ndp_fastpath_update: trying\n")); 3358 mp = (mblk_t *)arg; 3359 mp_rptr = mp->b_rptr; 3360 cmplen = mp->b_wptr - mp_rptr; 3361 ASSERT(cmplen >= 0); 3362 ud_mp_rptr = ud_mp->b_rptr; 3363 /* 3364 * The nce is locked here to prevent any other threads 3365 * from accessing and changing nce_res_mp when the IPv6 address 3366 * becomes resolved to an lla while we're in the middle 3367 * of looking at and comparing the hardware address (lla). 3368 * It is also locked to prevent multiple threads in nce_fastpath_update 3369 * from examining nce_res_mp atthe same time. 3370 */ 3371 mutex_enter(&nce->nce_lock); 3372 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3373 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3374 mutex_exit(&nce->nce_lock); 3375 /* 3376 * Don't take the ire off the fastpath list yet, 3377 * since the response may come later. 3378 */ 3379 return (B_FALSE); 3380 } 3381 /* Matched - install mp as the fastpath mp */ 3382 ip1dbg(("ndp_fastpath_update: match\n")); 3383 fp_mp = dupb(mp->b_cont); 3384 if (fp_mp != NULL) { 3385 nce->nce_fp_mp = fp_mp; 3386 } 3387 mutex_exit(&nce->nce_lock); 3388 return (B_TRUE); 3389 } 3390 3391 /* 3392 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3393 * driver. Note that it assumes IP is exclusive... 3394 */ 3395 /* ARGSUSED */ 3396 void 3397 ndp_fastpath_flush(nce_t *nce, char *arg) 3398 { 3399 if (nce->nce_flags & NCE_F_MAPPING) 3400 return; 3401 /* No fastpath info? */ 3402 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3403 return; 3404 3405 /* Just delete the NCE... */ 3406 ndp_delete(nce); 3407 } 3408 3409 /* 3410 * Return a pointer to a given option in the packet. 3411 * Assumes that option part of the packet have already been validated. 3412 */ 3413 nd_opt_hdr_t * 3414 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3415 { 3416 while (optlen > 0) { 3417 if (opt->nd_opt_type == opt_type) 3418 return (opt); 3419 optlen -= 8 * opt->nd_opt_len; 3420 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3421 } 3422 return (NULL); 3423 } 3424 3425 /* 3426 * Verify all option lengths present are > 0, also check to see 3427 * if the option lengths and packet length are consistent. 3428 */ 3429 boolean_t 3430 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3431 { 3432 ASSERT(opt != NULL); 3433 while (optlen > 0) { 3434 if (opt->nd_opt_len == 0) 3435 return (B_FALSE); 3436 optlen -= 8 * opt->nd_opt_len; 3437 if (optlen < 0) 3438 return (B_FALSE); 3439 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3440 } 3441 return (B_TRUE); 3442 } 3443 3444 /* 3445 * ndp_walk function. 3446 * Free a fraction of the NCE cache entries. 3447 * A fraction of zero means to not free any in that category. 3448 */ 3449 void 3450 ndp_cache_reclaim(nce_t *nce, char *arg) 3451 { 3452 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3453 uint_t rand; 3454 3455 if (nce->nce_flags & NCE_F_PERMANENT) 3456 return; 3457 3458 rand = (uint_t)lbolt + 3459 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3460 if (ncr->ncr_host != 0 && 3461 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3462 ndp_delete(nce); 3463 return; 3464 } 3465 } 3466 3467 /* 3468 * ndp_walk function. 3469 * Count the number of NCEs that can be deleted. 3470 * These would be hosts but not routers. 3471 */ 3472 void 3473 ndp_cache_count(nce_t *nce, char *arg) 3474 { 3475 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3476 3477 if (nce->nce_flags & NCE_F_PERMANENT) 3478 return; 3479 3480 ncc->ncc_total++; 3481 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3482 ncc->ncc_host++; 3483 } 3484 3485 #ifdef NCE_DEBUG 3486 th_trace_t * 3487 th_trace_nce_lookup(nce_t *nce) 3488 { 3489 int bucket_id; 3490 th_trace_t *th_trace; 3491 3492 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3493 3494 bucket_id = IP_TR_HASH(curthread); 3495 ASSERT(bucket_id < IP_TR_HASH_MAX); 3496 3497 for (th_trace = nce->nce_trace[bucket_id]; th_trace != NULL; 3498 th_trace = th_trace->th_next) { 3499 if (th_trace->th_id == curthread) 3500 return (th_trace); 3501 } 3502 return (NULL); 3503 } 3504 3505 void 3506 nce_trace_ref(nce_t *nce) 3507 { 3508 int bucket_id; 3509 th_trace_t *th_trace; 3510 3511 /* 3512 * Attempt to locate the trace buffer for the curthread. 3513 * If it does not exist, then allocate a new trace buffer 3514 * and link it in list of trace bufs for this ipif, at the head 3515 */ 3516 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3517 3518 if (nce->nce_trace_disable == B_TRUE) 3519 return; 3520 3521 th_trace = th_trace_nce_lookup(nce); 3522 if (th_trace == NULL) { 3523 bucket_id = IP_TR_HASH(curthread); 3524 th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), 3525 KM_NOSLEEP); 3526 if (th_trace == NULL) { 3527 nce->nce_trace_disable = B_TRUE; 3528 nce_trace_inactive(nce); 3529 return; 3530 } 3531 th_trace->th_id = curthread; 3532 th_trace->th_next = nce->nce_trace[bucket_id]; 3533 th_trace->th_prev = &nce->nce_trace[bucket_id]; 3534 if (th_trace->th_next != NULL) 3535 th_trace->th_next->th_prev = &th_trace->th_next; 3536 nce->nce_trace[bucket_id] = th_trace; 3537 } 3538 ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1); 3539 th_trace->th_refcnt++; 3540 th_trace_rrecord(th_trace); 3541 } 3542 3543 void 3544 nce_untrace_ref(nce_t *nce) 3545 { 3546 th_trace_t *th_trace; 3547 3548 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3549 3550 if (nce->nce_trace_disable == B_TRUE) 3551 return; 3552 3553 th_trace = th_trace_nce_lookup(nce); 3554 ASSERT(th_trace != NULL && th_trace->th_refcnt > 0); 3555 3556 th_trace_rrecord(th_trace); 3557 th_trace->th_refcnt--; 3558 } 3559 3560 void 3561 nce_trace_inactive(nce_t *nce) 3562 { 3563 th_trace_t *th_trace; 3564 int i; 3565 3566 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3567 3568 for (i = 0; i < IP_TR_HASH_MAX; i++) { 3569 while (nce->nce_trace[i] != NULL) { 3570 th_trace = nce->nce_trace[i]; 3571 3572 /* unlink th_trace and free it */ 3573 nce->nce_trace[i] = th_trace->th_next; 3574 if (th_trace->th_next != NULL) 3575 th_trace->th_next->th_prev = 3576 &nce->nce_trace[i]; 3577 3578 th_trace->th_next = NULL; 3579 th_trace->th_prev = NULL; 3580 kmem_free(th_trace, sizeof (th_trace_t)); 3581 } 3582 } 3583 3584 } 3585 3586 /* ARGSUSED */ 3587 int 3588 nce_thread_exit(nce_t *nce, caddr_t arg) 3589 { 3590 th_trace_t *th_trace; 3591 3592 mutex_enter(&nce->nce_lock); 3593 th_trace = th_trace_nce_lookup(nce); 3594 3595 if (th_trace == NULL) { 3596 mutex_exit(&nce->nce_lock); 3597 return (0); 3598 } 3599 3600 ASSERT(th_trace->th_refcnt == 0); 3601 3602 /* unlink th_trace and free it */ 3603 *th_trace->th_prev = th_trace->th_next; 3604 if (th_trace->th_next != NULL) 3605 th_trace->th_next->th_prev = th_trace->th_prev; 3606 th_trace->th_next = NULL; 3607 th_trace->th_prev = NULL; 3608 kmem_free(th_trace, sizeof (th_trace_t)); 3609 mutex_exit(&nce->nce_lock); 3610 return (0); 3611 } 3612 #endif 3613 3614 /* 3615 * Called when address resolution fails due to a timeout. 3616 * Send an ICMP unreachable in response to all queued packets. 3617 */ 3618 void 3619 arp_resolv_failed(nce_t *nce) 3620 { 3621 mblk_t *mp, *nxt_mp, *first_mp; 3622 char buf[INET6_ADDRSTRLEN]; 3623 zoneid_t zoneid = GLOBAL_ZONEID; 3624 struct in_addr ipv4addr; 3625 3626 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3627 ip3dbg(("arp_resolv_failed: dst %s\n", 3628 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3629 mutex_enter(&nce->nce_lock); 3630 mp = nce->nce_qd_mp; 3631 nce->nce_qd_mp = NULL; 3632 mutex_exit(&nce->nce_lock); 3633 3634 while (mp != NULL) { 3635 nxt_mp = mp->b_next; 3636 mp->b_next = NULL; 3637 mp->b_prev = NULL; 3638 3639 first_mp = mp; 3640 /* 3641 * Send icmp unreachable messages 3642 * to the hosts. 3643 */ 3644 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid); 3645 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3646 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3647 ICMP_HOST_UNREACHABLE, zoneid); 3648 mp = nxt_mp; 3649 } 3650 } 3651 3652 static int 3653 ndp_lookup_then_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, 3654 const in_addr_t *mask, const in_addr_t *extract_mask, 3655 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 3656 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 3657 { 3658 int err = 0; 3659 nce_t *nce; 3660 in6_addr_t addr6; 3661 3662 mutex_enter(&ndp4.ndp_g_lock); 3663 nce = *((nce_t **)NCE_HASH_PTR_V4(*addr)); 3664 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3665 nce = nce_lookup_addr(ill, &addr6, nce); 3666 if (nce == NULL) { 3667 err = ndp_add_v4(ill, 3668 hw_addr, 3669 addr, 3670 mask, 3671 extract_mask, 3672 hw_extract_start, 3673 flags, 3674 state, 3675 newnce, 3676 fp_mp, 3677 res_mp); 3678 } else { 3679 *newnce = nce; 3680 err = EEXIST; 3681 } 3682 mutex_exit(&ndp4.ndp_g_lock); 3683 return (err); 3684 } 3685 3686 /* 3687 * NDP Cache Entry creation routine for IPv4. 3688 * Mapped entries are handled in arp. 3689 * This routine must always be called with ndp4.ndp_g_lock held. 3690 * Prior to return, nce_refcnt is incremented. 3691 */ 3692 static int 3693 ndp_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, 3694 const in_addr_t *mask, const in_addr_t *extract_mask, 3695 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 3696 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 3697 { 3698 static nce_t nce_nil; 3699 nce_t *nce; 3700 mblk_t *mp; 3701 mblk_t *template; 3702 nce_t **ncep; 3703 3704 ASSERT(MUTEX_HELD(&ndp4.ndp_g_lock)); 3705 ASSERT(ill != NULL); 3706 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 3707 return (EINVAL); 3708 } 3709 ASSERT((flags & NCE_F_MAPPING) == 0); 3710 ASSERT(extract_mask == NULL); 3711 /* 3712 * Allocate the mblk to hold the nce. 3713 */ 3714 mp = allocb(sizeof (nce_t), BPRI_MED); 3715 if (mp == NULL) 3716 return (ENOMEM); 3717 3718 nce = (nce_t *)mp->b_rptr; 3719 mp->b_wptr = (uchar_t *)&nce[1]; 3720 *nce = nce_nil; 3721 3722 /* 3723 * This one holds link layer address; if res_mp has been provided 3724 * by the caller, accept it without any further checks. Otherwise, 3725 * for V4, we fill it up with ill_resolver_mp here, then in 3726 * in ire_arpresolve(), we fill it up with the ARP query 3727 * once its formulated. 3728 */ 3729 if (res_mp != NULL) { 3730 template = res_mp; 3731 } else { 3732 template = copyb(ill->ill_resolver_mp); 3733 } 3734 if (template == NULL) { 3735 freeb(mp); 3736 return (ENOMEM); 3737 } 3738 nce->nce_ill = ill; 3739 nce->nce_ipversion = IPV4_VERSION; 3740 nce->nce_flags = flags; 3741 nce->nce_state = state; 3742 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3743 nce->nce_rcnt = ill->ill_xmit_count; 3744 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3745 if (*mask == IP_HOST_MASK) { 3746 nce->nce_mask = ipv6_all_ones; 3747 } else { 3748 IN6_IPADDR_TO_V4MAPPED(*mask, &nce->nce_mask); 3749 } 3750 nce->nce_extract_mask = ipv6_all_zeros; 3751 nce->nce_ll_extract_start = hw_extract_start; 3752 nce->nce_fp_mp = (fp_mp? fp_mp : NULL); 3753 nce->nce_res_mp = template; 3754 if (state == ND_REACHABLE) 3755 nce->nce_last = TICK_TO_MSEC(lbolt64); 3756 else 3757 nce->nce_last = 0; 3758 nce->nce_qd_mp = NULL; 3759 nce->nce_mp = mp; 3760 if (hw_addr != NULL) 3761 nce_set_ll(nce, hw_addr); 3762 /* This one is for nce getting created */ 3763 nce->nce_refcnt = 1; 3764 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3765 ncep = ((nce_t **)NCE_HASH_PTR_V4(*addr)); 3766 3767 #ifdef NCE_DEBUG 3768 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 3769 #endif 3770 /* 3771 * Atomically ensure that the ill is not CONDEMNED, before 3772 * adding the NCE. 3773 */ 3774 mutex_enter(&ill->ill_lock); 3775 if (ill->ill_state_flags & ILL_CONDEMNED) { 3776 mutex_exit(&ill->ill_lock); 3777 freeb(mp); 3778 if (res_mp == NULL) { 3779 /* 3780 * template was locally allocated. need to free it. 3781 */ 3782 freeb(template); 3783 } 3784 return (EINVAL); 3785 } 3786 if ((nce->nce_next = *ncep) != NULL) 3787 nce->nce_next->nce_ptpn = &nce->nce_next; 3788 *ncep = nce; 3789 nce->nce_ptpn = ncep; 3790 *newnce = nce; 3791 /* This one is for nce being used by an active thread */ 3792 NCE_REFHOLD(*newnce); 3793 3794 /* Bump up the number of nce's referencing this ill */ 3795 ill->ill_nce_cnt++; 3796 mutex_exit(&ill->ill_lock); 3797 return (0); 3798 } 3799 3800 void 3801 ndp_flush_qd_mp(nce_t *nce) 3802 { 3803 mblk_t *qd_mp, *qd_next; 3804 3805 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3806 qd_mp = nce->nce_qd_mp; 3807 nce->nce_qd_mp = NULL; 3808 while (qd_mp != NULL) { 3809 qd_next = qd_mp->b_next; 3810 qd_mp->b_next = NULL; 3811 qd_mp->b_prev = NULL; 3812 freemsg(qd_mp); 3813 qd_mp = qd_next; 3814 } 3815 } 3816 3817 nce_t * 3818 nce_reinit(nce_t *nce) 3819 { 3820 nce_t *newnce = NULL; 3821 in_addr_t nce_addr, nce_mask; 3822 3823 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3824 IN6_V4MAPPED_TO_IPADDR(&nce->nce_mask, nce_mask); 3825 /* 3826 * delete the old one. this will get rid of any ire's pointing 3827 * at this nce. 3828 */ 3829 ndp_delete(nce); 3830 /* 3831 * create a new nce with the same addr and mask. 3832 */ 3833 mutex_enter(&ndp4.ndp_g_lock); 3834 (void) ndp_add_v4(nce->nce_ill, NULL, &nce_addr, &nce_mask, NULL, 0, 0, 3835 ND_INITIAL, &newnce, NULL, NULL); 3836 mutex_exit(&ndp4.ndp_g_lock); 3837 /* 3838 * refrele the old nce. 3839 */ 3840 NCE_REFRELE(nce); 3841 return (newnce); 3842 } 3843 3844 /* 3845 * ndp_walk routine to delete all entries that have a given destination or 3846 * gateway address and cached link layer (MAC) address. This is used when ARP 3847 * informs us that a network-to-link-layer mapping may have changed. 3848 */ 3849 void 3850 nce_delete_hw_changed(nce_t *nce, void *arg) 3851 { 3852 nce_hw_map_t *hwm = arg; 3853 mblk_t *mp; 3854 dl_unitdata_req_t *dlu; 3855 uchar_t *macaddr; 3856 ill_t *ill; 3857 int saplen; 3858 ipaddr_t nce_addr; 3859 3860 if (nce->nce_state != ND_REACHABLE) 3861 return; 3862 3863 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3864 if (nce_addr != hwm->hwm_addr) 3865 return; 3866 3867 mutex_enter(&nce->nce_lock); 3868 if ((mp = nce->nce_res_mp) == NULL) { 3869 mutex_exit(&nce->nce_lock); 3870 return; 3871 } 3872 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3873 macaddr = (uchar_t *)(dlu + 1); 3874 ill = nce->nce_ill; 3875 if ((saplen = ill->ill_sap_length) > 0) 3876 macaddr += saplen; 3877 else 3878 saplen = -saplen; 3879 3880 /* 3881 * If the hardware address is unchanged, then leave this one alone. 3882 * Note that saplen == abs(saplen) now. 3883 */ 3884 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 3885 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 3886 mutex_exit(&nce->nce_lock); 3887 return; 3888 } 3889 mutex_exit(&nce->nce_lock); 3890 3891 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 3892 ndp_delete(nce); 3893 } 3894 3895 /* 3896 * This function verifies whether a given IPv4 address is potentially known to 3897 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 3898 * so that it can continue to look for hardware changes on that address. 3899 */ 3900 boolean_t 3901 ndp_lookup_ipaddr(in_addr_t addr) 3902 { 3903 nce_t *nce; 3904 struct in_addr nceaddr; 3905 3906 if (addr == INADDR_ANY) 3907 return (B_FALSE); 3908 3909 mutex_enter(&ndp4.ndp_g_lock); 3910 nce = *(nce_t **)NCE_HASH_PTR_V4(addr); 3911 for (; nce != NULL; nce = nce->nce_next) { 3912 /* Note that only v4 mapped entries are in the table. */ 3913 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 3914 if (addr == nceaddr.s_addr && 3915 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 3916 /* Single flag check; no lock needed */ 3917 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 3918 break; 3919 } 3920 } 3921 mutex_exit(&ndp4.ndp_g_lock); 3922 return (nce != NULL); 3923 } 3924