1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/sysmacros.h> 33 #include <sys/errno.h> 34 #include <sys/dlpi.h> 35 #include <sys/socket.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/vtrace.h> 41 #include <sys/kmem.h> 42 #include <sys/zone.h> 43 #include <sys/ethernet.h> 44 #include <sys/sdt.h> 45 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/if_dl.h> 49 #include <net/route.h> 50 #include <netinet/in.h> 51 #include <netinet/ip6.h> 52 #include <netinet/icmp6.h> 53 54 #include <inet/common.h> 55 #include <inet/mi.h> 56 #include <inet/mib2.h> 57 #include <inet/nd.h> 58 #include <inet/ip.h> 59 #include <inet/ip_impl.h> 60 #include <inet/ip_if.h> 61 #include <inet/ip_ire.h> 62 #include <inet/ip_rts.h> 63 #include <inet/ip6.h> 64 #include <inet/ip_ndp.h> 65 #include <inet/ipsec_impl.h> 66 #include <inet/ipsec_info.h> 67 #include <inet/sctp_ip.h> 68 69 /* 70 * Function names with nce_ prefix are static while function 71 * names with ndp_ prefix are used by rest of the IP. 72 * 73 * Lock ordering: 74 * 75 * ndp_g_lock -> ill_lock -> nce_lock 76 * 77 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 78 * nce_next. Nce_lock protects the contents of the NCE (particularly 79 * nce_refcnt). 80 */ 81 82 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 83 uint32_t ll_addr_len); 84 static void nce_fastpath(nce_t *nce); 85 static void nce_ire_delete(nce_t *nce); 86 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 87 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 88 static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *, nce_t *); 89 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 90 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 91 uchar_t *addr); 92 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 93 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 94 static void nce_report1(nce_t *nce, uchar_t *mp_arg); 95 static mblk_t *nce_udreq_alloc(ill_t *ill); 96 static void nce_update(nce_t *nce, uint16_t new_state, 97 uchar_t *new_ll_addr); 98 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 99 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 100 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 101 const in6_addr_t *target, int flag); 102 extern void th_trace_rrecord(th_trace_t *); 103 static int ndp_lookup_then_add_v6(ill_t *, uchar_t *, 104 const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, 105 uint32_t, uint16_t, uint16_t, nce_t **, mblk_t *, mblk_t *); 106 static int ndp_lookup_then_add_v4(ill_t *, uchar_t *, 107 const in_addr_t *, const in_addr_t *, const in_addr_t *, 108 uint32_t, uint16_t, uint16_t, nce_t **, mblk_t *, mblk_t *); 109 static int ndp_add_v6(ill_t *, uchar_t *, const in6_addr_t *, 110 const in6_addr_t *, const in6_addr_t *, uint32_t, uint16_t, uint16_t, 111 nce_t **); 112 static int ndp_add_v4(ill_t *, uchar_t *, const in_addr_t *, 113 const in_addr_t *, const in_addr_t *, uint32_t, uint16_t, uint16_t, 114 nce_t **, mblk_t *, mblk_t *); 115 116 117 #ifdef NCE_DEBUG 118 void nce_trace_inactive(nce_t *); 119 #endif 120 121 ndp_g_t ndp4, ndp6; 122 123 #define NCE_HASH_PTR_V4(addr) \ 124 (&(ndp4.nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 125 126 #define NCE_HASH_PTR_V6(addr) \ 127 (&(ndp6.nce_hash_tbl[NCE_ADDR_HASH_V6(addr, NCE_TABLE_SIZE)])) 128 129 /* 130 * Compute default flags to use for an advertisement of this nce's address. 131 */ 132 static int 133 nce_advert_flags(const nce_t *nce) 134 { 135 int flag = 0; 136 137 if (nce->nce_flags & NCE_F_ISROUTER) 138 flag |= NDP_ISROUTER; 139 if (!(nce->nce_flags & NCE_F_PROXY)) 140 flag |= NDP_ORIDE; 141 return (flag); 142 } 143 144 int 145 ndp_add(ill_t *ill, uchar_t *hw_addr, const void *addr, 146 const void *mask, const void *extract_mask, 147 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 148 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 149 { 150 int status; 151 152 if (ill->ill_isv6) 153 status = ndp_add_v6(ill, hw_addr, (in6_addr_t *)addr, 154 (in6_addr_t *)mask, (in6_addr_t *)extract_mask, 155 hw_extract_start, flags, state, newnce); 156 else 157 status = ndp_add_v4(ill, hw_addr, (in_addr_t *)addr, 158 (in_addr_t *)mask, (in_addr_t *)extract_mask, 159 hw_extract_start, flags, state, newnce, fp_mp, res_mp); 160 return (status); 161 } 162 163 /* Non-tunable probe interval, based on link capabilities */ 164 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 165 166 /* 167 * NDP Cache Entry creation routine. 168 * Mapped entries will never do NUD . 169 * This routine must always be called with ndp6.ndp_g_lock held. 170 * Prior to return, nce_refcnt is incremented. 171 */ 172 static int 173 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 174 const in6_addr_t *mask, const in6_addr_t *extract_mask, 175 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 176 nce_t **newnce) 177 { 178 static nce_t nce_nil; 179 nce_t *nce; 180 mblk_t *mp; 181 mblk_t *template; 182 nce_t **ncep; 183 int err; 184 boolean_t dropped = B_FALSE; 185 186 ASSERT(MUTEX_HELD(&ndp6.ndp_g_lock)); 187 ASSERT(ill != NULL && ill->ill_isv6); 188 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 189 ip0dbg(("ndp_add: no addr\n")); 190 return (EINVAL); 191 } 192 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 193 ip0dbg(("ndp_add: flags = %x\n", (int)flags)); 194 return (EINVAL); 195 } 196 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 197 (flags & NCE_F_MAPPING)) { 198 ip0dbg(("ndp_add: extract mask zero for mapping")); 199 return (EINVAL); 200 } 201 /* 202 * Allocate the mblk to hold the nce. 203 * 204 * XXX This can come out of a separate cache - nce_cache. 205 * We don't need the mp anymore as there are no more 206 * "qwriter"s 207 */ 208 mp = allocb(sizeof (nce_t), BPRI_MED); 209 if (mp == NULL) 210 return (ENOMEM); 211 212 nce = (nce_t *)mp->b_rptr; 213 mp->b_wptr = (uchar_t *)&nce[1]; 214 *nce = nce_nil; 215 216 /* 217 * This one holds link layer address 218 */ 219 if (ill->ill_net_type == IRE_IF_RESOLVER) { 220 template = nce_udreq_alloc(ill); 221 } else { 222 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 223 ASSERT((ill->ill_resolver_mp != NULL)); 224 template = copyb(ill->ill_resolver_mp); 225 } 226 if (template == NULL) { 227 freeb(mp); 228 return (ENOMEM); 229 } 230 nce->nce_ill = ill; 231 nce->nce_ipversion = IPV6_VERSION; 232 nce->nce_flags = flags; 233 nce->nce_state = state; 234 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 235 nce->nce_rcnt = ill->ill_xmit_count; 236 nce->nce_addr = *addr; 237 nce->nce_mask = *mask; 238 nce->nce_extract_mask = *extract_mask; 239 nce->nce_ll_extract_start = hw_extract_start; 240 nce->nce_fp_mp = NULL; 241 nce->nce_res_mp = template; 242 if (state == ND_REACHABLE) 243 nce->nce_last = TICK_TO_MSEC(lbolt64); 244 else 245 nce->nce_last = 0; 246 nce->nce_qd_mp = NULL; 247 nce->nce_mp = mp; 248 if (hw_addr != NULL) 249 nce_set_ll(nce, hw_addr); 250 /* This one is for nce getting created */ 251 nce->nce_refcnt = 1; 252 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 253 if (nce->nce_flags & NCE_F_MAPPING) { 254 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 255 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 256 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 257 ncep = &ndp6.nce_mask_entries; 258 } else { 259 ncep = ((nce_t **)NCE_HASH_PTR_V6(*addr)); 260 } 261 262 #ifdef NCE_DEBUG 263 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 264 #endif 265 /* 266 * Atomically ensure that the ill is not CONDEMNED, before 267 * adding the NCE. 268 */ 269 mutex_enter(&ill->ill_lock); 270 if (ill->ill_state_flags & ILL_CONDEMNED) { 271 mutex_exit(&ill->ill_lock); 272 freeb(mp); 273 freeb(template); 274 return (EINVAL); 275 } 276 if ((nce->nce_next = *ncep) != NULL) 277 nce->nce_next->nce_ptpn = &nce->nce_next; 278 *ncep = nce; 279 nce->nce_ptpn = ncep; 280 *newnce = nce; 281 /* This one is for nce being used by an active thread */ 282 NCE_REFHOLD(*newnce); 283 284 /* Bump up the number of nce's referencing this ill */ 285 ill->ill_nce_cnt++; 286 mutex_exit(&ill->ill_lock); 287 288 err = 0; 289 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 290 mutex_enter(&nce->nce_lock); 291 mutex_exit(&ndp6.ndp_g_lock); 292 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 293 mutex_exit(&nce->nce_lock); 294 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 295 &ipv6_all_zeros, addr, NDP_PROBE); 296 if (dropped) { 297 mutex_enter(&nce->nce_lock); 298 nce->nce_pcnt++; 299 mutex_exit(&nce->nce_lock); 300 } 301 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 302 mutex_enter(&ndp6.ndp_g_lock); 303 err = EINPROGRESS; 304 } else if (flags & NCE_F_UNSOL_ADV) { 305 /* 306 * We account for the transmit below by assigning one 307 * less than the ndd variable. Subsequent decrements 308 * are done in ndp_timer. 309 */ 310 mutex_enter(&nce->nce_lock); 311 mutex_exit(&ndp6.ndp_g_lock); 312 nce->nce_unsolicit_count = ip_ndp_unsolicit_count - 1; 313 mutex_exit(&nce->nce_lock); 314 dropped = nce_xmit(ill, 315 ND_NEIGHBOR_ADVERT, 316 ill, /* ill to be used for extracting ill_nd_lla */ 317 B_TRUE, /* use ill_nd_lla */ 318 addr, /* Source and target of the advertisement pkt */ 319 &ipv6_all_hosts_mcast, /* Destination of the packet */ 320 nce_advert_flags(nce)); 321 mutex_enter(&nce->nce_lock); 322 if (dropped) 323 nce->nce_unsolicit_count++; 324 if (nce->nce_unsolicit_count != 0) { 325 nce->nce_timeout_id = timeout(ndp_timer, nce, 326 MSEC_TO_TICK(ip_ndp_unsolicit_interval)); 327 } 328 mutex_exit(&nce->nce_lock); 329 mutex_enter(&ndp6.ndp_g_lock); 330 } 331 /* 332 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 333 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 334 * We call nce_fastpath from nce_update if the link layer address of 335 * the peer changes from nce_update 336 */ 337 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 338 nce_fastpath(nce); 339 return (err); 340 } 341 342 int 343 ndp_lookup_then_add(ill_t *ill, uchar_t *hw_addr, const void *addr, 344 const void *mask, const void *extract_mask, 345 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 346 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 347 { 348 int status; 349 350 if (ill->ill_isv6) { 351 status = ndp_lookup_then_add_v6(ill, hw_addr, 352 (in6_addr_t *)addr, (in6_addr_t *)mask, 353 (in6_addr_t *)extract_mask, hw_extract_start, flags, 354 state, newnce, fp_mp, res_mp); 355 } else { 356 status = ndp_lookup_then_add_v4(ill, hw_addr, 357 (in_addr_t *)addr, (in_addr_t *)mask, 358 (in_addr_t *)extract_mask, hw_extract_start, flags, 359 state, newnce, fp_mp, res_mp); 360 } 361 362 return (status); 363 } 364 365 static int 366 ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 367 const in6_addr_t *mask, const in6_addr_t *extract_mask, 368 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 369 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 370 { 371 int err = 0; 372 nce_t *nce; 373 374 ASSERT(ill != NULL && ill->ill_isv6); 375 mutex_enter(&ndp6.ndp_g_lock); 376 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); /* head of v6 hash table */ 377 nce = nce_lookup_addr(ill, addr, nce); 378 if (nce == NULL) { 379 err = ndp_add(ill, 380 hw_addr, 381 addr, 382 mask, 383 extract_mask, 384 hw_extract_start, 385 flags, 386 state, 387 newnce, 388 fp_mp, 389 res_mp); 390 } else { 391 *newnce = nce; 392 err = EEXIST; 393 } 394 mutex_exit(&ndp6.ndp_g_lock); 395 return (err); 396 } 397 398 /* 399 * Remove all the CONDEMNED nces from the appropriate hash table. 400 * We create a private list of NCEs, these may have ires pointing 401 * to them, so the list will be passed through to clean up dependent 402 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 403 */ 404 static void 405 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 406 { 407 nce_t *nce1; 408 nce_t **ptpn; 409 410 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 411 ASSERT(ndp->ndp_g_walker == 0); 412 for (; nce; nce = nce1) { 413 nce1 = nce->nce_next; 414 mutex_enter(&nce->nce_lock); 415 if (nce->nce_flags & NCE_F_CONDEMNED) { 416 ptpn = nce->nce_ptpn; 417 nce1 = nce->nce_next; 418 if (nce1 != NULL) 419 nce1->nce_ptpn = ptpn; 420 *ptpn = nce1; 421 nce->nce_ptpn = NULL; 422 nce->nce_next = NULL; 423 nce->nce_next = *free_nce_list; 424 *free_nce_list = nce; 425 } 426 mutex_exit(&nce->nce_lock); 427 } 428 } 429 430 /* 431 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 432 * will return this NCE. Also no new IREs will be created that 433 * point to this NCE (See ire_add_v6). Also no new timeouts will 434 * be started (See NDP_RESTART_TIMER). 435 * 2. Cancel any currently running timeouts. 436 * 3. If there is an ndp walker, return. The walker will do the cleanup. 437 * This ensures that walkers see a consistent list of NCEs while walking. 438 * 4. Otherwise remove the NCE from the list of NCEs 439 * 5. Delete all IREs pointing to this NCE. 440 */ 441 void 442 ndp_delete(nce_t *nce) 443 { 444 nce_t **ptpn; 445 nce_t *nce1; 446 int ipversion = nce->nce_ipversion; 447 ndp_g_t *ndp = (ipversion == IPV4_VERSION ? &ndp4 : &ndp6); 448 449 /* Serialize deletes */ 450 mutex_enter(&nce->nce_lock); 451 if (nce->nce_flags & NCE_F_CONDEMNED) { 452 /* Some other thread is doing the delete */ 453 mutex_exit(&nce->nce_lock); 454 return; 455 } 456 /* 457 * Caller has a refhold. Also 1 ref for being in the list. Thus 458 * refcnt has to be >= 2 459 */ 460 ASSERT(nce->nce_refcnt >= 2); 461 nce->nce_flags |= NCE_F_CONDEMNED; 462 mutex_exit(&nce->nce_lock); 463 464 nce_fastpath_list_delete(nce); 465 466 /* 467 * Cancel any running timer. Timeout can't be restarted 468 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 469 * Passing invalid timeout id is fine. 470 */ 471 if (nce->nce_timeout_id != 0) { 472 (void) untimeout(nce->nce_timeout_id); 473 nce->nce_timeout_id = 0; 474 } 475 476 mutex_enter(&ndp->ndp_g_lock); 477 if (nce->nce_ptpn == NULL) { 478 /* 479 * The last ndp walker has already removed this nce from 480 * the list after we marked the nce CONDEMNED and before 481 * we grabbed the global lock. 482 */ 483 mutex_exit(&ndp->ndp_g_lock); 484 return; 485 } 486 if (ndp->ndp_g_walker > 0) { 487 /* 488 * Can't unlink. The walker will clean up 489 */ 490 ndp->ndp_g_walker_cleanup = B_TRUE; 491 mutex_exit(&ndp->ndp_g_lock); 492 return; 493 } 494 495 /* 496 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 497 * the timer since it is marked CONDEMNED. 498 */ 499 ptpn = nce->nce_ptpn; 500 nce1 = nce->nce_next; 501 if (nce1 != NULL) 502 nce1->nce_ptpn = ptpn; 503 *ptpn = nce1; 504 nce->nce_ptpn = NULL; 505 nce->nce_next = NULL; 506 mutex_exit(&ndp->ndp_g_lock); 507 508 nce_ire_delete(nce); 509 } 510 511 void 512 ndp_inactive(nce_t *nce) 513 { 514 mblk_t **mpp; 515 ill_t *ill; 516 517 ASSERT(nce->nce_refcnt == 0); 518 ASSERT(MUTEX_HELD(&nce->nce_lock)); 519 ASSERT(nce->nce_fastpath == NULL); 520 521 /* Free all nce allocated messages */ 522 mpp = &nce->nce_first_mp_to_free; 523 do { 524 while (*mpp != NULL) { 525 mblk_t *mp; 526 527 mp = *mpp; 528 *mpp = mp->b_next; 529 530 inet_freemsg(mp); 531 } 532 } while (mpp++ != &nce->nce_last_mp_to_free); 533 534 #ifdef NCE_DEBUG 535 nce_trace_inactive(nce); 536 #endif 537 538 ill = nce->nce_ill; 539 mutex_enter(&ill->ill_lock); 540 ill->ill_nce_cnt--; 541 /* 542 * If the number of nce's associated with this ill have dropped 543 * to zero, check whether we need to restart any operation that 544 * is waiting for this to happen. 545 */ 546 if (ill->ill_nce_cnt == 0) { 547 /* ipif_ill_refrele_tail drops the ill_lock */ 548 ipif_ill_refrele_tail(ill); 549 } else { 550 mutex_exit(&ill->ill_lock); 551 } 552 mutex_destroy(&nce->nce_lock); 553 if (nce->nce_mp != NULL) 554 inet_freemsg(nce->nce_mp); 555 } 556 557 /* 558 * ndp_walk routine. Delete the nce if it is associated with the ill 559 * that is going away. Always called as a writer. 560 */ 561 void 562 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 563 { 564 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 565 ndp_delete(nce); 566 } 567 } 568 569 /* 570 * Walk a list of to be inactive NCEs and blow away all the ires. 571 */ 572 static void 573 nce_ire_delete_list(nce_t *nce) 574 { 575 nce_t *nce_next; 576 577 ASSERT(nce != NULL); 578 while (nce != NULL) { 579 nce_next = nce->nce_next; 580 nce->nce_next = NULL; 581 582 /* 583 * It is possible for the last ndp walker (this thread) 584 * to come here after ndp_delete has marked the nce CONDEMNED 585 * and before it has removed the nce from the fastpath list 586 * or called untimeout. So we need to do it here. It is safe 587 * for both ndp_delete and this thread to do it twice or 588 * even simultaneously since each of the threads has a 589 * reference on the nce. 590 */ 591 nce_fastpath_list_delete(nce); 592 /* 593 * Cancel any running timer. Timeout can't be restarted 594 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 595 * Passing invalid timeout id is fine. 596 */ 597 if (nce->nce_timeout_id != 0) { 598 (void) untimeout(nce->nce_timeout_id); 599 nce->nce_timeout_id = 0; 600 } 601 /* 602 * We might hit this func thus in the v4 case: 603 * ipif_down->ipif_ndp_down->ndp_walk 604 */ 605 606 if (nce->nce_ipversion == IPV4_VERSION) { 607 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 608 IRE_CACHE, nce_ire_delete1, 609 (char *)nce, nce->nce_ill); 610 } else { 611 ASSERT(nce->nce_ipversion == IPV6_VERSION); 612 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 613 IRE_CACHE, nce_ire_delete1, 614 (char *)nce, nce->nce_ill); 615 } 616 NCE_REFRELE_NOTR(nce); 617 nce = nce_next; 618 } 619 } 620 621 /* 622 * Delete an ire when the nce goes away. 623 */ 624 /* ARGSUSED */ 625 static void 626 nce_ire_delete(nce_t *nce) 627 { 628 if (nce->nce_ipversion == IPV6_VERSION) { 629 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 630 nce_ire_delete1, (char *)nce, nce->nce_ill); 631 NCE_REFRELE_NOTR(nce); 632 } else { 633 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 634 nce_ire_delete1, (char *)nce, nce->nce_ill); 635 NCE_REFRELE_NOTR(nce); 636 } 637 } 638 639 /* 640 * ire_walk routine used to delete every IRE that shares this nce 641 */ 642 static void 643 nce_ire_delete1(ire_t *ire, char *nce_arg) 644 { 645 nce_t *nce = (nce_t *)nce_arg; 646 647 ASSERT(ire->ire_type == IRE_CACHE); 648 649 if (ire->ire_nce == nce) { 650 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 651 ire_delete(ire); 652 } 653 } 654 655 /* 656 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 657 */ 658 boolean_t 659 ndp_restart_dad(nce_t *nce) 660 { 661 boolean_t started; 662 boolean_t dropped; 663 664 if (nce == NULL) 665 return (B_FALSE); 666 mutex_enter(&nce->nce_lock); 667 if (nce->nce_state == ND_PROBE) { 668 mutex_exit(&nce->nce_lock); 669 started = B_TRUE; 670 } else if (nce->nce_state == ND_REACHABLE) { 671 nce->nce_state = ND_PROBE; 672 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 673 mutex_exit(&nce->nce_lock); 674 dropped = nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, NULL, 675 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, NDP_PROBE); 676 if (dropped) { 677 mutex_enter(&nce->nce_lock); 678 nce->nce_pcnt++; 679 mutex_exit(&nce->nce_lock); 680 } 681 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 682 started = B_TRUE; 683 } else { 684 mutex_exit(&nce->nce_lock); 685 started = B_FALSE; 686 } 687 return (started); 688 } 689 690 /* 691 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 692 * If one is found, the refcnt on the nce will be incremented. 693 */ 694 nce_t * 695 ndp_lookup_v6(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 696 { 697 nce_t *nce; 698 699 ASSERT(ill != NULL && ill->ill_isv6); 700 if (!caller_holds_lock) { 701 mutex_enter(&ndp6.ndp_g_lock); 702 } 703 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); /* head of v6 hash table */ 704 nce = nce_lookup_addr(ill, addr, nce); 705 if (nce == NULL) 706 nce = nce_lookup_mapping(ill, addr); 707 if (!caller_holds_lock) 708 mutex_exit(&ndp6.ndp_g_lock); 709 return (nce); 710 } 711 /* 712 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 713 * If one is found, the refcnt on the nce will be incremented. 714 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 715 * so we skip the nce_lookup_mapping call. 716 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 717 */ 718 nce_t * 719 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 720 { 721 nce_t *nce; 722 in6_addr_t addr6; 723 724 if (!caller_holds_lock) { 725 mutex_enter(&ndp4.ndp_g_lock); 726 } 727 nce = *((nce_t **)NCE_HASH_PTR_V4(*addr)); /* head of v6 hash table */ 728 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 729 nce = nce_lookup_addr(ill, &addr6, nce); 730 if (!caller_holds_lock) 731 mutex_exit(&ndp4.ndp_g_lock); 732 return (nce); 733 } 734 735 /* 736 * Cache entry lookup. Try to find an nce matching the parameters passed. 737 * Look only for exact entries (no mappings). If an nce is found, increment 738 * the hold count on that nce. The caller passes in the start of the 739 * appropriate hash table, and must be holding the appropriate global 740 * lock (ndp_g_lock). 741 */ 742 static nce_t * 743 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr, nce_t *nce) 744 { 745 ndp_g_t *ndp = (ill->ill_isv6 ? &ndp6 : &ndp4); 746 747 ASSERT(ill != NULL); 748 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 749 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 750 return (NULL); 751 for (; nce != NULL; nce = nce->nce_next) { 752 if (nce->nce_ill == ill) { 753 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 754 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 755 &ipv6_all_ones)) { 756 mutex_enter(&nce->nce_lock); 757 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 758 NCE_REFHOLD_LOCKED(nce); 759 mutex_exit(&nce->nce_lock); 760 break; 761 } 762 mutex_exit(&nce->nce_lock); 763 } 764 } 765 } 766 return (nce); 767 } 768 769 /* 770 * Cache entry lookup. Try to find an nce matching the parameters passed. 771 * Look only for mappings. 772 */ 773 static nce_t * 774 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 775 { 776 nce_t *nce; 777 778 ASSERT(ill != NULL && ill->ill_isv6); 779 ASSERT(MUTEX_HELD(&ndp6.ndp_g_lock)); 780 if (!IN6_IS_ADDR_MULTICAST(addr)) 781 return (NULL); 782 nce = ndp6.nce_mask_entries; 783 for (; nce != NULL; nce = nce->nce_next) 784 if (nce->nce_ill == ill && 785 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 786 mutex_enter(&nce->nce_lock); 787 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 788 NCE_REFHOLD_LOCKED(nce); 789 mutex_exit(&nce->nce_lock); 790 break; 791 } 792 mutex_exit(&nce->nce_lock); 793 } 794 return (nce); 795 } 796 797 /* 798 * Process passed in parameters either from an incoming packet or via 799 * user ioctl. 800 */ 801 void 802 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 803 { 804 ill_t *ill = nce->nce_ill; 805 uint32_t hw_addr_len = ill->ill_nd_lla_len; 806 mblk_t *mp; 807 boolean_t ll_updated = B_FALSE; 808 boolean_t ll_changed; 809 810 ASSERT(nce->nce_ipversion == IPV6_VERSION); 811 /* 812 * No updates of link layer address or the neighbor state is 813 * allowed, when the cache is in NONUD state. This still 814 * allows for responding to reachability solicitation. 815 */ 816 mutex_enter(&nce->nce_lock); 817 if (nce->nce_state == ND_INCOMPLETE) { 818 if (hw_addr == NULL) { 819 mutex_exit(&nce->nce_lock); 820 return; 821 } 822 nce_set_ll(nce, hw_addr); 823 /* 824 * Update nce state and send the queued packets 825 * back to ip this time ire will be added. 826 */ 827 if (flag & ND_NA_FLAG_SOLICITED) { 828 nce_update(nce, ND_REACHABLE, NULL); 829 } else { 830 nce_update(nce, ND_STALE, NULL); 831 } 832 mutex_exit(&nce->nce_lock); 833 nce_fastpath(nce); 834 mutex_enter(&nce->nce_lock); 835 mp = nce->nce_qd_mp; 836 nce->nce_qd_mp = NULL; 837 mutex_exit(&nce->nce_lock); 838 while (mp != NULL) { 839 mblk_t *nxt_mp, *data_mp; 840 841 nxt_mp = mp->b_next; 842 mp->b_next = NULL; 843 844 if (mp->b_datap->db_type == M_CTL) 845 data_mp = mp->b_cont; 846 else 847 data_mp = mp; 848 if (data_mp->b_prev != NULL) { 849 ill_t *inbound_ill; 850 queue_t *fwdq = NULL; 851 uint_t ifindex; 852 853 ifindex = (uint_t)(uintptr_t)data_mp->b_prev; 854 inbound_ill = ill_lookup_on_ifindex(ifindex, 855 B_TRUE, NULL, NULL, NULL, NULL); 856 if (inbound_ill == NULL) { 857 data_mp->b_prev = NULL; 858 freemsg(mp); 859 return; 860 } else { 861 fwdq = inbound_ill->ill_rq; 862 } 863 data_mp->b_prev = NULL; 864 /* 865 * Send a forwarded packet back into ip_rput_v6 866 * just as in ire_send_v6(). 867 * Extract the queue from b_prev (set in 868 * ip_rput_data_v6). 869 */ 870 if (fwdq != NULL) { 871 /* 872 * Forwarded packets hop count will 873 * get decremented in ip_rput_data_v6 874 */ 875 if (data_mp != mp) 876 freeb(mp); 877 put(fwdq, data_mp); 878 } else { 879 /* 880 * Send locally originated packets back 881 * into * ip_wput_v6. 882 */ 883 put(ill->ill_wq, mp); 884 } 885 ill_refrele(inbound_ill); 886 } else { 887 put(ill->ill_wq, mp); 888 } 889 mp = nxt_mp; 890 } 891 return; 892 } 893 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 894 if (!is_adv) { 895 /* If this is a SOLICITATION request only */ 896 if (ll_changed) 897 nce_update(nce, ND_STALE, hw_addr); 898 mutex_exit(&nce->nce_lock); 899 return; 900 } 901 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 902 /* If in any other state than REACHABLE, ignore */ 903 if (nce->nce_state == ND_REACHABLE) { 904 nce_update(nce, ND_STALE, NULL); 905 } 906 mutex_exit(&nce->nce_lock); 907 return; 908 } else { 909 if (ll_changed) { 910 nce_update(nce, ND_UNCHANGED, hw_addr); 911 ll_updated = B_TRUE; 912 } 913 if (flag & ND_NA_FLAG_SOLICITED) { 914 nce_update(nce, ND_REACHABLE, NULL); 915 } else { 916 if (ll_updated) { 917 nce_update(nce, ND_STALE, NULL); 918 } 919 } 920 mutex_exit(&nce->nce_lock); 921 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 922 NCE_F_ISROUTER)) { 923 ire_t *ire; 924 925 /* 926 * Router turned to host. We need to remove the 927 * entry as well as any default route that may be 928 * using this as a next hop. This is required by 929 * section 7.2.5 of RFC 2461. 930 */ 931 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 932 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 933 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 934 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 935 MATCH_IRE_DEFAULT); 936 if (ire != NULL) { 937 ip_rts_rtmsg(RTM_DELETE, ire, 0); 938 ire_delete(ire); 939 ire_refrele(ire); 940 } 941 ndp_delete(nce); 942 } 943 } 944 } 945 946 /* 947 * Pass arg1 to the pfi supplied, along with each nce in existence. 948 * ndp_walk() places a REFHOLD on the nce and drops the lock when 949 * walking the hash list. 950 */ 951 void 952 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 953 boolean_t trace) 954 { 955 956 nce_t *nce; 957 nce_t *nce1; 958 nce_t **ncep; 959 nce_t *free_nce_list = NULL; 960 961 mutex_enter(&ndp->ndp_g_lock); 962 /* Prevent ndp_delete from unlink and free of NCE */ 963 ndp->ndp_g_walker++; 964 mutex_exit(&ndp->ndp_g_lock); 965 for (ncep = ndp->nce_hash_tbl; 966 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 967 for (nce = *ncep; nce != NULL; nce = nce1) { 968 nce1 = nce->nce_next; 969 if (ill == NULL || nce->nce_ill == ill) { 970 if (trace) { 971 NCE_REFHOLD(nce); 972 (*pfi)(nce, arg1); 973 NCE_REFRELE(nce); 974 } else { 975 NCE_REFHOLD_NOTR(nce); 976 (*pfi)(nce, arg1); 977 NCE_REFRELE_NOTR(nce); 978 } 979 } 980 } 981 } 982 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 983 nce1 = nce->nce_next; 984 if (ill == NULL || nce->nce_ill == ill) { 985 if (trace) { 986 NCE_REFHOLD(nce); 987 (*pfi)(nce, arg1); 988 NCE_REFRELE(nce); 989 } else { 990 NCE_REFHOLD_NOTR(nce); 991 (*pfi)(nce, arg1); 992 NCE_REFRELE_NOTR(nce); 993 } 994 } 995 } 996 mutex_enter(&ndp->ndp_g_lock); 997 ndp->ndp_g_walker--; 998 /* 999 * While NCE's are removed from global list they are placed 1000 * in a private list, to be passed to nce_ire_delete_list(). 1001 * The reason is, there may be ires pointing to this nce 1002 * which needs to cleaned up. 1003 */ 1004 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 1005 /* Time to delete condemned entries */ 1006 for (ncep = ndp->nce_hash_tbl; 1007 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 1008 nce = *ncep; 1009 if (nce != NULL) { 1010 nce_remove(ndp, nce, &free_nce_list); 1011 } 1012 } 1013 nce = ndp->nce_mask_entries; 1014 if (nce != NULL) { 1015 nce_remove(ndp, nce, &free_nce_list); 1016 } 1017 ndp->ndp_g_walker_cleanup = B_FALSE; 1018 } 1019 mutex_exit(&ndp->ndp_g_lock); 1020 1021 if (free_nce_list != NULL) { 1022 nce_ire_delete_list(free_nce_list); 1023 } 1024 } 1025 1026 void 1027 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1) 1028 { 1029 ndp_walk_common(&ndp4, ill, pfi, arg1, B_TRUE); 1030 ndp_walk_common(&ndp6, ill, pfi, arg1, B_TRUE); 1031 } 1032 1033 /* 1034 * Process resolve requests. Handles both mapped entries 1035 * as well as cases that needs to be send out on the wire. 1036 * Lookup a NCE for a given IRE. Regardless of whether one exists 1037 * or one is created, we defer making ire point to nce until the 1038 * ire is actually added at which point the nce_refcnt on the nce is 1039 * incremented. This is done primarily to have symmetry between ire_add() 1040 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1041 */ 1042 int 1043 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1044 { 1045 nce_t *nce; 1046 int err = 0; 1047 uint32_t ms; 1048 mblk_t *mp_nce = NULL; 1049 1050 ASSERT(ill != NULL); 1051 ASSERT(ill->ill_isv6); 1052 if (IN6_IS_ADDR_MULTICAST(dst)) { 1053 err = nce_set_multicast(ill, dst); 1054 return (err); 1055 } 1056 err = ndp_lookup_then_add(ill, 1057 NULL, /* No hardware address */ 1058 dst, 1059 &ipv6_all_ones, 1060 &ipv6_all_zeros, 1061 0, 1062 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1063 ND_INCOMPLETE, 1064 &nce, 1065 NULL, /* let ndp_add figure out fastpath mp and dlureq_mp for v6 */ 1066 NULL); 1067 1068 switch (err) { 1069 case 0: 1070 /* 1071 * New cache entry was created. Make sure that the state 1072 * is not ND_INCOMPLETE. It can be in some other state 1073 * even before we send out the solicitation as we could 1074 * get un-solicited advertisements. 1075 * 1076 * If this is an XRESOLV interface, simply return 0, 1077 * since we don't want to solicit just yet. 1078 */ 1079 if (ill->ill_flags & ILLF_XRESOLV) { 1080 NCE_REFRELE(nce); 1081 return (0); 1082 } 1083 rw_enter(&ill_g_lock, RW_READER); 1084 mutex_enter(&nce->nce_lock); 1085 if (nce->nce_state != ND_INCOMPLETE) { 1086 mutex_exit(&nce->nce_lock); 1087 rw_exit(&ill_g_lock); 1088 NCE_REFRELE(nce); 1089 return (0); 1090 } 1091 mp_nce = ip_prepend_zoneid(mp, zoneid); 1092 if (mp_nce == NULL) { 1093 /* The caller will free mp */ 1094 mutex_exit(&nce->nce_lock); 1095 rw_exit(&ill_g_lock); 1096 ndp_delete(nce); 1097 NCE_REFRELE(nce); 1098 return (ENOMEM); 1099 } 1100 ms = nce_solicit(nce, mp_nce); 1101 rw_exit(&ill_g_lock); 1102 if (ms == 0) { 1103 /* The caller will free mp */ 1104 if (mp_nce != mp) 1105 freeb(mp_nce); 1106 mutex_exit(&nce->nce_lock); 1107 ndp_delete(nce); 1108 NCE_REFRELE(nce); 1109 return (EBUSY); 1110 } 1111 mutex_exit(&nce->nce_lock); 1112 NDP_RESTART_TIMER(nce, (clock_t)ms); 1113 NCE_REFRELE(nce); 1114 return (EINPROGRESS); 1115 case EEXIST: 1116 /* Resolution in progress just queue the packet */ 1117 mutex_enter(&nce->nce_lock); 1118 if (nce->nce_state == ND_INCOMPLETE) { 1119 mp_nce = ip_prepend_zoneid(mp, zoneid); 1120 if (mp_nce == NULL) { 1121 err = ENOMEM; 1122 } else { 1123 nce_queue_mp(nce, mp_nce); 1124 err = EINPROGRESS; 1125 } 1126 } else { 1127 /* 1128 * Any other state implies we have 1129 * a nce but IRE needs to be added ... 1130 * ire_add_v6() will take care of the 1131 * the case when the nce becomes CONDEMNED 1132 * before the ire is added to the table. 1133 */ 1134 err = 0; 1135 } 1136 mutex_exit(&nce->nce_lock); 1137 NCE_REFRELE(nce); 1138 break; 1139 default: 1140 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1141 break; 1142 } 1143 return (err); 1144 } 1145 1146 /* 1147 * When there is no resolver, the link layer template is passed in 1148 * the IRE. 1149 * Lookup a NCE for a given IRE. Regardless of whether one exists 1150 * or one is created, we defer making ire point to nce until the 1151 * ire is actually added at which point the nce_refcnt on the nce is 1152 * incremented. This is done primarily to have symmetry between ire_add() 1153 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1154 */ 1155 int 1156 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1157 { 1158 nce_t *nce; 1159 int err = 0; 1160 1161 ASSERT(ill != NULL); 1162 ASSERT(ill->ill_isv6); 1163 if (IN6_IS_ADDR_MULTICAST(dst)) { 1164 err = nce_set_multicast(ill, dst); 1165 return (err); 1166 } 1167 1168 err = ndp_lookup_then_add(ill, 1169 NULL, /* hardware address */ 1170 dst, 1171 &ipv6_all_ones, 1172 &ipv6_all_zeros, 1173 0, 1174 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1175 ND_REACHABLE, 1176 &nce, 1177 NULL, /* let ndp_add figure out fp_mp/dlureq_mp for v6 */ 1178 NULL); 1179 1180 switch (err) { 1181 case 0: 1182 /* 1183 * Cache entry with a proper resolver cookie was 1184 * created. 1185 */ 1186 NCE_REFRELE(nce); 1187 break; 1188 case EEXIST: 1189 err = 0; 1190 NCE_REFRELE(nce); 1191 break; 1192 default: 1193 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1194 break; 1195 } 1196 return (err); 1197 } 1198 1199 /* 1200 * For each interface an entry is added for the unspecified multicast group. 1201 * Here that mapping is used to form the multicast cache entry for a particular 1202 * multicast destination. 1203 */ 1204 static int 1205 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1206 { 1207 nce_t *mnce; /* Multicast mapping entry */ 1208 nce_t *nce; 1209 uchar_t *hw_addr = NULL; 1210 int err = 0; 1211 1212 ASSERT(ill != NULL); 1213 ASSERT(ill->ill_isv6); 1214 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1215 1216 mutex_enter(&ndp6.ndp_g_lock); 1217 nce = *((nce_t **)NCE_HASH_PTR_V6(*dst)); 1218 nce = nce_lookup_addr(ill, dst, nce); 1219 if (nce != NULL) { 1220 mutex_exit(&ndp6.ndp_g_lock); 1221 NCE_REFRELE(nce); 1222 return (0); 1223 } 1224 /* No entry, now lookup for a mapping this should never fail */ 1225 mnce = nce_lookup_mapping(ill, dst); 1226 if (mnce == NULL) { 1227 /* Something broken for the interface. */ 1228 mutex_exit(&ndp6.ndp_g_lock); 1229 return (ESRCH); 1230 } 1231 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1232 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1233 /* 1234 * For IRE_IF_RESOLVER a hardware mapping can be 1235 * generated, for IRE_IF_NORESOLVER, resolution cookie 1236 * in the ill is copied in ndp_add(). 1237 */ 1238 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1239 if (hw_addr == NULL) { 1240 mutex_exit(&ndp6.ndp_g_lock); 1241 NCE_REFRELE(mnce); 1242 return (ENOMEM); 1243 } 1244 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1245 } 1246 NCE_REFRELE(mnce); 1247 /* 1248 * IRE_IF_NORESOLVER type simply copies the resolution 1249 * cookie passed in. So no hw_addr is needed. 1250 */ 1251 err = ndp_add(ill, 1252 hw_addr, 1253 dst, 1254 &ipv6_all_ones, 1255 &ipv6_all_zeros, 1256 0, 1257 NCE_F_NONUD, 1258 ND_REACHABLE, 1259 &nce, 1260 NULL, 1261 NULL); 1262 mutex_exit(&ndp6.ndp_g_lock); 1263 if (hw_addr != NULL) 1264 kmem_free(hw_addr, ill->ill_nd_lla_len); 1265 if (err != 0) { 1266 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1267 return (err); 1268 } 1269 NCE_REFRELE(nce); 1270 return (0); 1271 } 1272 1273 /* 1274 * Return the link layer address, and any flags of a nce. 1275 */ 1276 int 1277 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1278 { 1279 nce_t *nce; 1280 in6_addr_t *addr; 1281 sin6_t *sin6; 1282 dl_unitdata_req_t *dl; 1283 1284 ASSERT(ill != NULL && ill->ill_isv6); 1285 sin6 = (sin6_t *)&lnr->lnr_addr; 1286 addr = &sin6->sin6_addr; 1287 1288 nce = ndp_lookup_v6(ill, addr, B_FALSE); 1289 if (nce == NULL) 1290 return (ESRCH); 1291 /* If in INCOMPLETE state, no link layer address is available yet */ 1292 if (nce->nce_state == ND_INCOMPLETE) 1293 goto done; 1294 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1295 if (ill->ill_flags & ILLF_XRESOLV) 1296 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1297 else 1298 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1299 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1300 sizeof (lnr->lnr_hdw_addr)); 1301 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1302 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1303 if (nce->nce_flags & NCE_F_ISROUTER) 1304 lnr->lnr_flags = NDF_ISROUTER_ON; 1305 if (nce->nce_flags & NCE_F_PROXY) 1306 lnr->lnr_flags |= NDF_PROXY_ON; 1307 if (nce->nce_flags & NCE_F_ANYCAST) 1308 lnr->lnr_flags |= NDF_ANYCAST_ON; 1309 done: 1310 NCE_REFRELE(nce); 1311 return (0); 1312 } 1313 1314 /* 1315 * Send Enable/Disable multicast reqs to driver. 1316 */ 1317 int 1318 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1319 uint32_t hw_addr_offset, mblk_t *mp) 1320 { 1321 nce_t *nce; 1322 uchar_t *hw_addr; 1323 1324 ASSERT(ill != NULL && ill->ill_isv6); 1325 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1326 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1327 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1328 freemsg(mp); 1329 return (EINVAL); 1330 } 1331 mutex_enter(&ndp6.ndp_g_lock); 1332 nce = nce_lookup_mapping(ill, addr); 1333 if (nce == NULL) { 1334 mutex_exit(&ndp6.ndp_g_lock); 1335 freemsg(mp); 1336 return (ESRCH); 1337 } 1338 mutex_exit(&ndp6.ndp_g_lock); 1339 /* 1340 * Update dl_addr_length and dl_addr_offset for primitives that 1341 * have physical addresses as opposed to full saps 1342 */ 1343 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1344 case DL_ENABMULTI_REQ: 1345 /* Track the state if this is the first enabmulti */ 1346 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1347 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1348 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1349 break; 1350 case DL_DISABMULTI_REQ: 1351 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1352 break; 1353 default: 1354 NCE_REFRELE(nce); 1355 ip1dbg(("ndp_mcastreq: default\n")); 1356 return (EINVAL); 1357 } 1358 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1359 NCE_REFRELE(nce); 1360 putnext(ill->ill_wq, mp); 1361 return (0); 1362 } 1363 1364 /* 1365 * Send a neighbor solicitation. 1366 * Returns number of milliseconds after which we should either rexmit or abort. 1367 * Return of zero means we should abort. 1368 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1369 * 1370 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1371 * the packet. 1372 * NOTE: This routine does not consume mp. 1373 */ 1374 uint32_t 1375 nce_solicit(nce_t *nce, mblk_t *mp) 1376 { 1377 ill_t *ill; 1378 ill_t *src_ill; 1379 ip6_t *ip6h; 1380 in6_addr_t src; 1381 in6_addr_t dst; 1382 ipif_t *ipif; 1383 ip6i_t *ip6i; 1384 boolean_t dropped = B_FALSE; 1385 1386 ASSERT(RW_READ_HELD(&ill_g_lock)); 1387 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1388 ill = nce->nce_ill; 1389 ASSERT(ill != NULL); 1390 1391 if (nce->nce_rcnt == 0) { 1392 return (0); 1393 } 1394 1395 if (mp == NULL) { 1396 ASSERT(nce->nce_qd_mp != NULL); 1397 mp = nce->nce_qd_mp; 1398 } else { 1399 nce_queue_mp(nce, mp); 1400 } 1401 1402 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1403 if (mp->b_datap->db_type == M_CTL) 1404 mp = mp->b_cont; 1405 1406 ip6h = (ip6_t *)mp->b_rptr; 1407 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1408 /* 1409 * This message should have been pulled up already in 1410 * ip_wput_v6. We can't do pullups here because the message 1411 * could be from the nce_qd_mp which could have b_next/b_prev 1412 * non-NULL. 1413 */ 1414 ip6i = (ip6i_t *)ip6h; 1415 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1416 sizeof (ip6i_t) + IPV6_HDR_LEN); 1417 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1418 } 1419 src = ip6h->ip6_src; 1420 /* 1421 * If the src of outgoing packet is one of the assigned interface 1422 * addresses use it, otherwise we will pick the source address below. 1423 */ 1424 src_ill = ill; 1425 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1426 if (ill->ill_group != NULL) 1427 src_ill = ill->ill_group->illgrp_ill; 1428 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1429 for (ipif = src_ill->ill_ipif; ipif != NULL; 1430 ipif = ipif->ipif_next) { 1431 if (IN6_ARE_ADDR_EQUAL(&src, 1432 &ipif->ipif_v6lcl_addr)) { 1433 break; 1434 } 1435 } 1436 if (ipif != NULL) 1437 break; 1438 } 1439 /* 1440 * If no relevant ipif can be found, then it's not one of our 1441 * addresses. Reset to :: and let nce_xmit. If an ipif can be 1442 * found, but it's not yet done with DAD verification, then 1443 * just postpone this transmission until later. 1444 */ 1445 if (src_ill == NULL) 1446 src = ipv6_all_zeros; 1447 else if (!ipif->ipif_addr_ready) 1448 return (ill->ill_reachable_retrans_time); 1449 } 1450 dst = nce->nce_addr; 1451 /* 1452 * If source address is unspecified, nce_xmit will choose 1453 * one for us and initialize the hardware address also 1454 * appropriately. 1455 */ 1456 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1457 src_ill = NULL; 1458 nce->nce_rcnt--; 1459 mutex_exit(&nce->nce_lock); 1460 rw_exit(&ill_g_lock); 1461 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1462 &dst, 0); 1463 rw_enter(&ill_g_lock, RW_READER); 1464 mutex_enter(&nce->nce_lock); 1465 if (dropped) 1466 nce->nce_rcnt++; 1467 return (ill->ill_reachable_retrans_time); 1468 } 1469 1470 /* 1471 * Attempt to recover an address on an interface that's been marked as a 1472 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1473 * no easy way to just probe the address and have the right thing happen if 1474 * it's no longer in use. Instead, we just bring it up normally and allow the 1475 * regular interface start-up logic to probe for a remaining duplicate and take 1476 * us back down if necessary. 1477 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1478 * ip_ndp_excl. 1479 */ 1480 /* ARGSUSED */ 1481 static void 1482 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1483 { 1484 ill_t *ill = rq->q_ptr; 1485 ipif_t *ipif; 1486 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1487 1488 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1489 /* 1490 * We do not support recovery of proxy ARP'd interfaces, 1491 * because the system lacks a complete proxy ARP mechanism. 1492 */ 1493 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1494 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1495 continue; 1496 } 1497 1498 /* 1499 * If we have already recovered, then ignore. 1500 */ 1501 mutex_enter(&ill->ill_lock); 1502 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) { 1503 mutex_exit(&ill->ill_lock); 1504 continue; 1505 } 1506 1507 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1508 ill->ill_ipif_dup_count--; 1509 mutex_exit(&ill->ill_lock); 1510 ipif->ipif_was_dup = B_TRUE; 1511 1512 if (ipif_ndp_up(ipif, addr, B_FALSE) != EINPROGRESS) 1513 (void) ipif_up_done_v6(ipif); 1514 } 1515 freeb(mp); 1516 } 1517 1518 /* 1519 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1520 * As long as someone else holds the address, the interface will stay down. 1521 * When that conflict goes away, the interface is brought back up. This is 1522 * done so that accidental shutdowns of addresses aren't made permanent. Your 1523 * server will recover from a failure. 1524 * 1525 * For DHCP and temporary addresses, recovery is not done in the kernel. 1526 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1527 * 1528 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1529 */ 1530 static void 1531 ipif6_dup_recovery(void *arg) 1532 { 1533 ipif_t *ipif = arg; 1534 1535 ipif->ipif_recovery_id = 0; 1536 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1537 return; 1538 1539 /* If the link is down, we'll retry this later */ 1540 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1541 return; 1542 1543 ndp_do_recovery(ipif); 1544 } 1545 1546 /* 1547 * Perform interface recovery by forcing the duplicate interfaces up and 1548 * allowing the system to determine which ones should stay up. 1549 * 1550 * Called both by recovery timer expiry and link-up notification. 1551 */ 1552 void 1553 ndp_do_recovery(ipif_t *ipif) 1554 { 1555 ill_t *ill = ipif->ipif_ill; 1556 mblk_t *mp; 1557 1558 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1559 if (mp == NULL) { 1560 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1561 ipif, MSEC_TO_TICK(ip_dup_recovery)); 1562 } else { 1563 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1564 sizeof (ipif->ipif_v6lcl_addr)); 1565 ill_refhold(ill); 1566 (void) qwriter_ip(NULL, ill, ill->ill_rq, mp, ip_ndp_recover, 1567 CUR_OP, B_FALSE); 1568 } 1569 } 1570 1571 /* 1572 * Find the solicitation in the given message, and extract printable details 1573 * (MAC and IP addresses) from it. 1574 */ 1575 static nd_neighbor_solicit_t * 1576 ip_ndp_find_solicitation(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, char *hbuf, 1577 size_t hlen, char *sbuf, size_t slen, uchar_t **haddr) 1578 { 1579 nd_neighbor_solicit_t *ns; 1580 ip6_t *ip6h; 1581 uchar_t *addr; 1582 int alen; 1583 1584 alen = 0; 1585 ip6h = (ip6_t *)mp->b_rptr; 1586 if (dl_mp == NULL) { 1587 nd_opt_hdr_t *opt; 1588 int nslen; 1589 1590 /* 1591 * If it's from the fast-path, then it can't be a probe 1592 * message, and thus must include the source linkaddr option. 1593 * Extract that here. 1594 */ 1595 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1596 nslen = mp->b_wptr - (uchar_t *)ns; 1597 if ((nslen -= sizeof (*ns)) > 0) { 1598 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), nslen, 1599 ND_OPT_SOURCE_LINKADDR); 1600 if (opt != NULL && 1601 opt->nd_opt_len * 8 - sizeof (*opt) >= 1602 ill->ill_nd_lla_len) { 1603 addr = (uchar_t *)(opt + 1); 1604 alen = ill->ill_nd_lla_len; 1605 } 1606 } 1607 /* 1608 * We cheat a bit here for the sake of printing usable log 1609 * messages in the rare case where the reply we got was unicast 1610 * without a source linkaddr option, and the interface is in 1611 * fastpath mode. (Sigh.) 1612 */ 1613 if (alen == 0 && ill->ill_type == IFT_ETHER && 1614 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1615 struct ether_header *pether; 1616 1617 pether = (struct ether_header *)((char *)ip6h - 1618 sizeof (*pether)); 1619 addr = pether->ether_shost.ether_addr_octet; 1620 alen = ETHERADDRL; 1621 } 1622 } else { 1623 dl_unitdata_ind_t *dlu; 1624 1625 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1626 alen = dlu->dl_src_addr_length; 1627 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1628 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1629 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1630 if (ill->ill_sap_length < 0) { 1631 alen += ill->ill_sap_length; 1632 } else { 1633 addr += ill->ill_sap_length; 1634 alen -= ill->ill_sap_length; 1635 } 1636 } 1637 } 1638 if (alen > 0) { 1639 *haddr = addr; 1640 (void) mac_colon_addr(addr, alen, hbuf, hlen); 1641 } else { 1642 *haddr = NULL; 1643 (void) strcpy(hbuf, "?"); 1644 } 1645 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1646 (void) inet_ntop(AF_INET6, &ns->nd_ns_target, sbuf, slen); 1647 return (ns); 1648 } 1649 1650 /* 1651 * This is for exclusive changes due to NDP duplicate address detection 1652 * failure. 1653 */ 1654 /* ARGSUSED */ 1655 static void 1656 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1657 { 1658 ill_t *ill = rq->q_ptr; 1659 ipif_t *ipif; 1660 char ibuf[LIFNAMSIZ + 10]; /* 10 digits for logical i/f number */ 1661 char hbuf[MAC_STR_LEN]; 1662 char sbuf[INET6_ADDRSTRLEN]; 1663 nd_neighbor_solicit_t *ns; 1664 mblk_t *dl_mp = NULL; 1665 uchar_t *haddr; 1666 1667 if (DB_TYPE(mp) != M_DATA) { 1668 dl_mp = mp; 1669 mp = mp->b_cont; 1670 } 1671 ns = ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, sizeof (hbuf), sbuf, 1672 sizeof (sbuf), &haddr); 1673 if (haddr != NULL && 1674 bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) { 1675 /* 1676 * Ignore conflicts generated by misbehaving switches that just 1677 * reflect our own messages back to us. 1678 */ 1679 goto ignore_conflict; 1680 } 1681 (void) strlcpy(ibuf, ill->ill_name, sizeof (ibuf)); 1682 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1683 1684 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1685 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 1686 &ns->nd_ns_target)) { 1687 continue; 1688 } 1689 1690 /* If it's already marked, then don't do anything. */ 1691 if (ipif->ipif_flags & IPIF_DUPLICATE) 1692 continue; 1693 1694 /* 1695 * If this is a failure during duplicate recovery, then don't 1696 * complain. It may take a long time to recover. 1697 */ 1698 if (!ipif->ipif_was_dup) { 1699 if (ipif->ipif_id != 0) { 1700 (void) snprintf(ibuf + ill->ill_name_length - 1, 1701 sizeof (ibuf) - ill->ill_name_length + 1, 1702 ":%d", ipif->ipif_id); 1703 } 1704 cmn_err(CE_WARN, "%s has duplicate address %s (in " 1705 "use by %s); disabled", ibuf, sbuf, hbuf); 1706 } 1707 mutex_enter(&ill->ill_lock); 1708 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1709 ipif->ipif_flags |= IPIF_DUPLICATE; 1710 ill->ill_ipif_dup_count++; 1711 mutex_exit(&ill->ill_lock); 1712 (void) ipif_down(ipif, NULL, NULL); 1713 ipif_down_tail(ipif); 1714 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1715 ill->ill_net_type == IRE_IF_RESOLVER && 1716 ip_dup_recovery > 0) 1717 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1718 ipif, MSEC_TO_TICK(ip_dup_recovery)); 1719 } 1720 ignore_conflict: 1721 if (dl_mp != NULL) 1722 freeb(dl_mp); 1723 freemsg(mp); 1724 } 1725 1726 /* 1727 * Handle failure by tearing down the ipifs with the specified address. Note 1728 * that tearing down the ipif also means deleting the nce through ipif_down, so 1729 * it's not possible to do recovery by just restarting the nce timer. Instead, 1730 * we start a timer on the ipif. 1731 */ 1732 static void 1733 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1734 { 1735 if ((mp = copymsg(mp)) != NULL) { 1736 if (dl_mp == NULL) 1737 dl_mp = mp; 1738 else if ((dl_mp = copyb(dl_mp)) != NULL) 1739 dl_mp->b_cont = mp; 1740 if (dl_mp == NULL) { 1741 freemsg(mp); 1742 } else { 1743 ill_refhold(ill); 1744 (void) qwriter_ip(NULL, ill, ill->ill_rq, dl_mp, 1745 ip_ndp_excl, CUR_OP, B_FALSE); 1746 } 1747 } 1748 ndp_delete(nce); 1749 } 1750 1751 /* 1752 * Handle a discovered conflict: some other system is advertising that it owns 1753 * one of our IP addresses. We need to defend ourselves, or just shut down the 1754 * interface. 1755 */ 1756 static void 1757 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1758 { 1759 ipif_t *ipif; 1760 uint32_t now; 1761 uint_t maxdefense; 1762 uint_t defs; 1763 1764 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1765 NULL, NULL); 1766 if (ipif == NULL) 1767 return; 1768 /* 1769 * First, figure out if this address is disposable. 1770 */ 1771 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1772 maxdefense = ip_max_temp_defend; 1773 else 1774 maxdefense = ip_max_defend; 1775 1776 /* 1777 * Now figure out how many times we've defended ourselves. Ignore 1778 * defenses that happened long in the past. 1779 */ 1780 now = gethrestime_sec(); 1781 mutex_enter(&nce->nce_lock); 1782 if ((defs = nce->nce_defense_count) > 0 && 1783 now - nce->nce_defense_time > ip_defend_interval) { 1784 nce->nce_defense_count = defs = 0; 1785 } 1786 nce->nce_defense_count++; 1787 nce->nce_defense_time = now; 1788 mutex_exit(&nce->nce_lock); 1789 ipif_refrele(ipif); 1790 1791 /* 1792 * If we've defended ourselves too many times already, then give up and 1793 * tear down the interface(s) using this address. Otherwise, defend by 1794 * sending out an unsolicited Neighbor Advertisement. 1795 */ 1796 if (defs >= maxdefense) { 1797 ip_ndp_failure(ill, mp, dl_mp, nce); 1798 } else { 1799 char hbuf[MAC_STR_LEN]; 1800 char sbuf[INET6_ADDRSTRLEN]; 1801 uchar_t *haddr; 1802 1803 (void) ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, 1804 sizeof (hbuf), sbuf, sizeof (sbuf), &haddr); 1805 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1806 hbuf, sbuf, ill->ill_name); 1807 (void) nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, B_FALSE, 1808 &nce->nce_addr, &ipv6_all_hosts_mcast, 1809 nce_advert_flags(nce)); 1810 } 1811 } 1812 1813 static void 1814 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1815 { 1816 nd_neighbor_solicit_t *ns; 1817 uint32_t hlen = ill->ill_nd_lla_len; 1818 uchar_t *haddr = NULL; 1819 icmp6_t *icmp_nd; 1820 ip6_t *ip6h; 1821 nce_t *our_nce = NULL; 1822 in6_addr_t target; 1823 in6_addr_t src; 1824 int len; 1825 int flag = 0; 1826 nd_opt_hdr_t *opt = NULL; 1827 boolean_t bad_solicit = B_FALSE; 1828 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1829 1830 ip6h = (ip6_t *)mp->b_rptr; 1831 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1832 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1833 src = ip6h->ip6_src; 1834 ns = (nd_neighbor_solicit_t *)icmp_nd; 1835 target = ns->nd_ns_target; 1836 if (IN6_IS_ADDR_MULTICAST(&target)) { 1837 if (ip_debug > 2) { 1838 /* ip1dbg */ 1839 pr_addr_dbg("ndp_input_solicit: Target is" 1840 " multicast! %s\n", AF_INET6, &target); 1841 } 1842 bad_solicit = B_TRUE; 1843 goto done; 1844 } 1845 if (len > sizeof (nd_neighbor_solicit_t)) { 1846 /* Options present */ 1847 opt = (nd_opt_hdr_t *)&ns[1]; 1848 len -= sizeof (nd_neighbor_solicit_t); 1849 if (!ndp_verify_optlen(opt, len)) { 1850 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1851 bad_solicit = B_TRUE; 1852 goto done; 1853 } 1854 } 1855 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1856 /* Check to see if this is a valid DAD solicitation */ 1857 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1858 if (ip_debug > 2) { 1859 /* ip1dbg */ 1860 pr_addr_dbg("ndp_input_solicit: IPv6 " 1861 "Destination is not solicited node " 1862 "multicast %s\n", AF_INET6, 1863 &ip6h->ip6_dst); 1864 } 1865 bad_solicit = B_TRUE; 1866 goto done; 1867 } 1868 } 1869 1870 our_nce = ndp_lookup_v6(ill, &target, B_FALSE); 1871 /* 1872 * If this is a valid Solicitation, a permanent 1873 * entry should exist in the cache 1874 */ 1875 if (our_nce == NULL || 1876 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1877 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1878 "ifname=%s ", ill->ill_name)); 1879 if (ip_debug > 2) { 1880 /* ip1dbg */ 1881 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1882 } 1883 bad_solicit = B_TRUE; 1884 goto done; 1885 } 1886 1887 /* At this point we should have a verified NS per spec */ 1888 if (opt != NULL) { 1889 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1890 if (opt != NULL) { 1891 haddr = (uchar_t *)&opt[1]; 1892 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1893 hlen == 0) { 1894 ip1dbg(("ndp_input_advert: bad SLLA\n")); 1895 bad_solicit = B_TRUE; 1896 goto done; 1897 } 1898 } 1899 } 1900 1901 /* If sending directly to peer, set the unicast flag */ 1902 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 1903 flag |= NDP_UNICAST; 1904 1905 /* 1906 * Create/update the entry for the soliciting node. 1907 * or respond to outstanding queries, don't if 1908 * the source is unspecified address. 1909 */ 1910 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1911 int err; 1912 nce_t *nnce; 1913 1914 ASSERT(ill->ill_isv6); 1915 /* 1916 * Regular solicitations *must* include the Source Link-Layer 1917 * Address option. Ignore messages that do not. 1918 */ 1919 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1920 ip1dbg(("ndp_input_solicit: source link-layer address " 1921 "option missing with a specified source.\n")); 1922 bad_solicit = B_TRUE; 1923 goto done; 1924 } 1925 1926 /* 1927 * This is a regular solicitation. If we're still in the 1928 * process of verifying the address, then don't respond at all 1929 * and don't keep track of the sender. 1930 */ 1931 if (our_nce->nce_state == ND_PROBE) 1932 goto done; 1933 1934 /* 1935 * If the solicitation doesn't have sender hardware address 1936 * (legal for unicast solicitation), then process without 1937 * installing the return NCE. Either we already know it, or 1938 * we'll be forced to look it up when (and if) we reply to the 1939 * packet. 1940 */ 1941 if (haddr == NULL) 1942 goto no_source; 1943 1944 err = ndp_lookup_then_add(ill, 1945 haddr, 1946 &src, /* Soliciting nodes address */ 1947 &ipv6_all_ones, 1948 &ipv6_all_zeros, 1949 0, 1950 0, 1951 ND_STALE, 1952 &nnce, 1953 NULL, 1954 NULL); 1955 switch (err) { 1956 case 0: 1957 /* done with this entry */ 1958 NCE_REFRELE(nnce); 1959 break; 1960 case EEXIST: 1961 /* 1962 * B_FALSE indicates this is not an 1963 * an advertisement. 1964 */ 1965 ndp_process(nnce, haddr, 0, B_FALSE); 1966 NCE_REFRELE(nnce); 1967 break; 1968 default: 1969 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1970 err)); 1971 goto done; 1972 } 1973 no_source: 1974 flag |= NDP_SOLICITED; 1975 } else { 1976 /* 1977 * No source link layer address option should be present in a 1978 * valid DAD request. 1979 */ 1980 if (haddr != NULL) { 1981 ip1dbg(("ndp_input_solicit: source link-layer address " 1982 "option present with an unspecified source.\n")); 1983 bad_solicit = B_TRUE; 1984 goto done; 1985 } 1986 if (our_nce->nce_state == ND_PROBE) { 1987 /* 1988 * Internally looped-back probes won't have DLPI 1989 * attached to them. External ones (which are sent by 1990 * multicast) always will. Just ignore our own 1991 * transmissions. 1992 */ 1993 if (dl_mp != NULL) { 1994 /* 1995 * If someone else is probing our address, then 1996 * we've crossed wires. Declare failure. 1997 */ 1998 ip_ndp_failure(ill, mp, dl_mp, our_nce); 1999 } 2000 goto done; 2001 } 2002 /* 2003 * This is a DAD probe. Multicast the advertisement to the 2004 * all-nodes address. 2005 */ 2006 src = ipv6_all_hosts_mcast; 2007 } 2008 flag |= nce_advert_flags(our_nce); 2009 /* Response to a solicitation */ 2010 (void) nce_xmit(ill, 2011 ND_NEIGHBOR_ADVERT, 2012 ill, /* ill to be used for extracting ill_nd_lla */ 2013 B_TRUE, /* use ill_nd_lla */ 2014 &target, /* Source and target of the advertisement pkt */ 2015 &src, /* IP Destination (source of original pkt) */ 2016 flag); 2017 done: 2018 if (bad_solicit) 2019 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2020 if (our_nce != NULL) 2021 NCE_REFRELE(our_nce); 2022 } 2023 2024 void 2025 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2026 { 2027 nd_neighbor_advert_t *na; 2028 uint32_t hlen = ill->ill_nd_lla_len; 2029 uchar_t *haddr = NULL; 2030 icmp6_t *icmp_nd; 2031 ip6_t *ip6h; 2032 nce_t *dst_nce = NULL; 2033 in6_addr_t target; 2034 nd_opt_hdr_t *opt = NULL; 2035 int len; 2036 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2037 2038 ip6h = (ip6_t *)mp->b_rptr; 2039 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2040 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2041 na = (nd_neighbor_advert_t *)icmp_nd; 2042 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2043 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2044 ip1dbg(("ndp_input_advert: Target is multicast but the " 2045 "solicited flag is not zero\n")); 2046 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2047 return; 2048 } 2049 target = na->nd_na_target; 2050 if (IN6_IS_ADDR_MULTICAST(&target)) { 2051 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2052 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2053 return; 2054 } 2055 if (len > sizeof (nd_neighbor_advert_t)) { 2056 opt = (nd_opt_hdr_t *)&na[1]; 2057 if (!ndp_verify_optlen(opt, 2058 len - sizeof (nd_neighbor_advert_t))) { 2059 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2060 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2061 return; 2062 } 2063 /* At this point we have a verified NA per spec */ 2064 len -= sizeof (nd_neighbor_advert_t); 2065 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2066 if (opt != NULL) { 2067 haddr = (uchar_t *)&opt[1]; 2068 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2069 hlen == 0) { 2070 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2071 BUMP_MIB(mib, 2072 ipv6IfIcmpInBadNeighborAdvertisements); 2073 return; 2074 } 2075 } 2076 } 2077 2078 /* 2079 * If this interface is part of the group look at all the 2080 * ills in the group. 2081 */ 2082 rw_enter(&ill_g_lock, RW_READER); 2083 if (ill->ill_group != NULL) 2084 ill = ill->ill_group->illgrp_ill; 2085 2086 for (; ill != NULL; ill = ill->ill_group_next) { 2087 mutex_enter(&ill->ill_lock); 2088 if (!ILL_CAN_LOOKUP(ill)) { 2089 mutex_exit(&ill->ill_lock); 2090 continue; 2091 } 2092 ill_refhold_locked(ill); 2093 mutex_exit(&ill->ill_lock); 2094 dst_nce = ndp_lookup_v6(ill, &target, B_FALSE); 2095 /* We have to drop the lock since ndp_process calls put* */ 2096 rw_exit(&ill_g_lock); 2097 if (dst_nce != NULL) { 2098 if ((dst_nce->nce_flags & NCE_F_PERMANENT) && 2099 dst_nce->nce_state == ND_PROBE) { 2100 /* 2101 * Someone else sent an advertisement for an 2102 * address that we're trying to configure. 2103 * Tear it down. Note that dl_mp might be NULL 2104 * if we're getting a unicast reply. This 2105 * isn't typically done (multicast is the norm 2106 * in response to a probe), but ip_ndp_failure 2107 * will handle the dl_mp == NULL case as well. 2108 */ 2109 ip_ndp_failure(ill, mp, dl_mp, dst_nce); 2110 } else if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2111 /* 2112 * Someone just announced one of our local 2113 * addresses. If it wasn't us, then this is a 2114 * conflict. Defend the address or shut it 2115 * down. 2116 */ 2117 if (dl_mp != NULL && 2118 (haddr == NULL || 2119 nce_cmp_ll_addr(dst_nce, haddr, 2120 ill->ill_nd_lla_len))) { 2121 ip_ndp_conflict(ill, mp, dl_mp, 2122 dst_nce); 2123 } 2124 } else { 2125 if (na->nd_na_flags_reserved & 2126 ND_NA_FLAG_ROUTER) { 2127 dst_nce->nce_flags |= NCE_F_ISROUTER; 2128 } 2129 /* B_TRUE indicates this an advertisement */ 2130 ndp_process(dst_nce, haddr, 2131 na->nd_na_flags_reserved, B_TRUE); 2132 } 2133 NCE_REFRELE(dst_nce); 2134 } 2135 rw_enter(&ill_g_lock, RW_READER); 2136 ill_refrele(ill); 2137 } 2138 rw_exit(&ill_g_lock); 2139 } 2140 2141 /* 2142 * Process NDP neighbor solicitation/advertisement messages. 2143 * The checksum has already checked o.k before reaching here. 2144 */ 2145 void 2146 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2147 { 2148 icmp6_t *icmp_nd; 2149 ip6_t *ip6h; 2150 int len; 2151 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2152 2153 2154 if (!pullupmsg(mp, -1)) { 2155 ip1dbg(("ndp_input: pullupmsg failed\n")); 2156 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 2157 goto done; 2158 } 2159 ip6h = (ip6_t *)mp->b_rptr; 2160 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2161 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2162 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2163 goto done; 2164 } 2165 /* 2166 * NDP does not accept any extension headers between the 2167 * IP header and the ICMP header since e.g. a routing 2168 * header could be dangerous. 2169 * This assumes that any AH or ESP headers are removed 2170 * by ip prior to passing the packet to ndp_input. 2171 */ 2172 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2173 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2174 ip6h->ip6_nxt)); 2175 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2176 goto done; 2177 } 2178 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2179 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2180 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2181 if (icmp_nd->icmp6_code != 0) { 2182 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2183 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2184 goto done; 2185 } 2186 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2187 /* 2188 * Make sure packet length is large enough for either 2189 * a NS or a NA icmp packet. 2190 */ 2191 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2192 ip1dbg(("ndp_input: packet too short\n")); 2193 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2194 goto done; 2195 } 2196 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2197 ndp_input_solicit(ill, mp, dl_mp); 2198 } else { 2199 ndp_input_advert(ill, mp, dl_mp); 2200 } 2201 done: 2202 freemsg(mp); 2203 } 2204 2205 /* 2206 * nce_xmit is called to form and transmit a ND solicitation or 2207 * advertisement ICMP packet. 2208 * 2209 * If the source address is unspecified and this isn't a probe (used for 2210 * duplicate address detection), an appropriate source address and link layer 2211 * address will be chosen here. The link layer address option is included if 2212 * the source is specified (i.e., all non-probe packets), and omitted (per the 2213 * specification) otherwise. 2214 * 2215 * It returns B_FALSE only if it does a successful put() to the 2216 * corresponding ill's ill_wq otherwise returns B_TRUE. 2217 */ 2218 static boolean_t 2219 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 2220 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 2221 int flag) 2222 { 2223 uint32_t len; 2224 icmp6_t *icmp6; 2225 mblk_t *mp; 2226 ip6_t *ip6h; 2227 nd_opt_hdr_t *opt; 2228 uint_t plen; 2229 ip6i_t *ip6i; 2230 ipif_t *src_ipif = NULL; 2231 uint8_t *hw_addr; 2232 2233 /* 2234 * If we have a unspecified source(sender) address, select a 2235 * proper source address for the solicitation here itself so 2236 * that we can initialize the h/w address correctly. This is 2237 * needed for interface groups as source address can come from 2238 * the whole group and the h/w address initialized from ill will 2239 * be wrong if the source address comes from a different ill. 2240 * 2241 * Note that the NA never comes here with the unspecified source 2242 * address. The following asserts that whenever the source 2243 * address is specified, the haddr also should be specified. 2244 */ 2245 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 2246 2247 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2248 ASSERT(operation != ND_NEIGHBOR_ADVERT); 2249 /* 2250 * Pick a source address for this solicitation, but 2251 * restrict the selection to addresses assigned to the 2252 * output interface (or interface group). We do this 2253 * because the destination will create a neighbor cache 2254 * entry for the source address of this packet, so the 2255 * source address had better be a valid neighbor. 2256 */ 2257 src_ipif = ipif_select_source_v6(ill, target, RESTRICT_TO_ILL, 2258 IPV6_PREFER_SRC_DEFAULT, GLOBAL_ZONEID); 2259 if (src_ipif == NULL) { 2260 char buf[INET6_ADDRSTRLEN]; 2261 2262 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2263 inet_ntop(AF_INET6, (char *)target, buf, 2264 sizeof (buf)))); 2265 return (B_TRUE); 2266 } 2267 sender = &src_ipif->ipif_v6src_addr; 2268 hwaddr_ill = src_ipif->ipif_ill; 2269 } 2270 2271 /* 2272 * Always make sure that the NS/NA packets don't get load 2273 * spread. This is needed so that the probe packets sent 2274 * by the in.mpathd daemon can really go out on the desired 2275 * interface. Probe packets are made to go out on a desired 2276 * interface by including a ip6i with ATTACH_IF flag. As these 2277 * packets indirectly end up sending/receiving NS/NA packets 2278 * (neighbor doing NUD), we have to make sure that NA 2279 * also go out on the same interface. 2280 */ 2281 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7) / 8; 2282 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2283 plen * 8; 2284 mp = allocb(len, BPRI_LO); 2285 if (mp == NULL) { 2286 if (src_ipif != NULL) 2287 ipif_refrele(src_ipif); 2288 return (B_TRUE); 2289 } 2290 bzero((char *)mp->b_rptr, len); 2291 mp->b_wptr = mp->b_rptr + len; 2292 2293 ip6i = (ip6i_t *)mp->b_rptr; 2294 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2295 ip6i->ip6i_nxt = IPPROTO_RAW; 2296 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2297 if (flag & NDP_PROBE) 2298 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2299 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2300 2301 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2302 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2303 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2304 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2305 ip6h->ip6_hops = IPV6_MAX_HOPS; 2306 ip6h->ip6_dst = *target; 2307 icmp6 = (icmp6_t *)&ip6h[1]; 2308 2309 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2310 sizeof (nd_neighbor_advert_t)); 2311 2312 if (operation == ND_NEIGHBOR_SOLICIT) { 2313 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2314 2315 if (!(flag & NDP_PROBE)) 2316 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2317 ip6h->ip6_src = *sender; 2318 ns->nd_ns_target = *target; 2319 if (!(flag & NDP_UNICAST)) { 2320 /* Form multicast address of the target */ 2321 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2322 ip6h->ip6_dst.s6_addr32[3] |= 2323 ns->nd_ns_target.s6_addr32[3]; 2324 } 2325 } else { 2326 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2327 2328 ASSERT(!(flag & NDP_PROBE)); 2329 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2330 ip6h->ip6_src = *sender; 2331 na->nd_na_target = *sender; 2332 if (flag & NDP_ISROUTER) 2333 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2334 if (flag & NDP_SOLICITED) 2335 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2336 if (flag & NDP_ORIDE) 2337 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2338 } 2339 2340 hw_addr = NULL; 2341 if (!(flag & NDP_PROBE)) { 2342 mutex_enter(&hwaddr_ill->ill_lock); 2343 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2344 hwaddr_ill->ill_phys_addr; 2345 if (hw_addr != NULL) { 2346 /* Fill in link layer address and option len */ 2347 opt->nd_opt_len = (uint8_t)plen; 2348 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2349 } 2350 mutex_exit(&hwaddr_ill->ill_lock); 2351 } 2352 if (hw_addr == NULL) { 2353 /* If there's no link layer address option, then strip it. */ 2354 len -= plen * 8; 2355 mp->b_wptr = mp->b_rptr + len; 2356 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2357 } 2358 2359 icmp6->icmp6_type = (uint8_t)operation; 2360 icmp6->icmp6_code = 0; 2361 /* 2362 * Prepare for checksum by putting icmp length in the icmp 2363 * checksum field. The checksum is calculated in ip_wput_v6. 2364 */ 2365 icmp6->icmp6_cksum = ip6h->ip6_plen; 2366 2367 if (src_ipif != NULL) 2368 ipif_refrele(src_ipif); 2369 if (canput(ill->ill_wq)) { 2370 put(ill->ill_wq, mp); 2371 return (B_FALSE); 2372 } 2373 freemsg(mp); 2374 return (B_TRUE); 2375 } 2376 2377 /* 2378 * Make a link layer address (does not include the SAP) from an nce. 2379 * To form the link layer address, use the last four bytes of ipv6 2380 * address passed in and the fixed offset stored in nce. 2381 */ 2382 static void 2383 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2384 { 2385 uchar_t *mask, *to; 2386 ill_t *ill = nce->nce_ill; 2387 int len; 2388 2389 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2390 return; 2391 ASSERT(nce->nce_res_mp != NULL); 2392 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2393 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2394 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2395 ASSERT(addr != NULL); 2396 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2397 addrpos, ill->ill_nd_lla_len); 2398 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2399 IPV6_ADDR_LEN); 2400 mask = (uchar_t *)&nce->nce_extract_mask; 2401 mask += (IPV6_ADDR_LEN - len); 2402 addr += (IPV6_ADDR_LEN - len); 2403 to = addrpos + nce->nce_ll_extract_start; 2404 while (len-- > 0) 2405 *to++ |= *mask++ & *addr++; 2406 } 2407 2408 /* 2409 * Pass a cache report back out via NDD. 2410 */ 2411 /* ARGSUSED */ 2412 int 2413 ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 2414 { 2415 (void) mi_mpprintf(mp, "ifname hardware addr flags" 2416 " proto addr/mask"); 2417 ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp); 2418 return (0); 2419 } 2420 2421 /* 2422 * Add a single line to the NDP Cache Entry Report. 2423 */ 2424 static void 2425 nce_report1(nce_t *nce, uchar_t *mp_arg) 2426 { 2427 ill_t *ill = nce->nce_ill; 2428 char local_buf[INET6_ADDRSTRLEN]; 2429 uchar_t flags_buf[10]; 2430 uint32_t flags = nce->nce_flags; 2431 mblk_t *mp = (mblk_t *)mp_arg; 2432 uchar_t *h; 2433 uchar_t *m = flags_buf; 2434 in6_addr_t v6addr; 2435 2436 /* 2437 * Lock the nce to protect nce_res_mp from being changed 2438 * if an external resolver address resolution completes 2439 * while nce_res_mp is being accessed here. 2440 * 2441 * Deal with all address formats, not just Ethernet-specific 2442 * In addition, make sure that the mblk has enough space 2443 * before writing to it. If is doesn't, allocate a new one. 2444 */ 2445 if (nce->nce_ipversion == IPV4_VERSION) 2446 /* Don't include v4 nce_ts in NDP cache entry report */ 2447 return; 2448 2449 ASSERT(ill != NULL); 2450 v6addr = nce->nce_mask; 2451 if (flags & NCE_F_PERMANENT) 2452 *m++ = 'P'; 2453 if (flags & NCE_F_ISROUTER) 2454 *m++ = 'R'; 2455 if (flags & NCE_F_MAPPING) 2456 *m++ = 'M'; 2457 *m = '\0'; 2458 2459 if (ill->ill_net_type == IRE_IF_RESOLVER) { 2460 size_t addrlen; 2461 char *addr_buf; 2462 dl_unitdata_req_t *dl; 2463 2464 mutex_enter(&nce->nce_lock); 2465 h = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2466 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 2467 if (ill->ill_flags & ILLF_XRESOLV) 2468 addrlen = (3 * (dl->dl_dest_addr_length)); 2469 else 2470 addrlen = (3 * (ill->ill_nd_lla_len)); 2471 if (addrlen <= 0) { 2472 mutex_exit(&nce->nce_lock); 2473 (void) mi_mpprintf(mp, 2474 "%8s %9s %5s %s/%d", 2475 ill->ill_name, 2476 "None", 2477 (uchar_t *)&flags_buf, 2478 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2479 (char *)local_buf, sizeof (local_buf)), 2480 ip_mask_to_plen_v6(&v6addr)); 2481 } else { 2482 /* 2483 * Convert the hardware/lla address to ascii 2484 */ 2485 addr_buf = kmem_zalloc(addrlen, KM_NOSLEEP); 2486 if (addr_buf == NULL) { 2487 mutex_exit(&nce->nce_lock); 2488 return; 2489 } 2490 (void) mac_colon_addr((uint8_t *)h, 2491 (ill->ill_flags & ILLF_XRESOLV) ? 2492 dl->dl_dest_addr_length : ill->ill_nd_lla_len, 2493 addr_buf, addrlen); 2494 mutex_exit(&nce->nce_lock); 2495 (void) mi_mpprintf(mp, "%8s %17s %5s %s/%d", 2496 ill->ill_name, addr_buf, (uchar_t *)&flags_buf, 2497 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2498 (char *)local_buf, sizeof (local_buf)), 2499 ip_mask_to_plen_v6(&v6addr)); 2500 kmem_free(addr_buf, addrlen); 2501 } 2502 } else { 2503 (void) mi_mpprintf(mp, 2504 "%8s %9s %5s %s/%d", 2505 ill->ill_name, 2506 "None", 2507 (uchar_t *)&flags_buf, 2508 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2509 (char *)local_buf, sizeof (local_buf)), 2510 ip_mask_to_plen_v6(&v6addr)); 2511 } 2512 } 2513 2514 mblk_t * 2515 nce_udreq_alloc(ill_t *ill) 2516 { 2517 mblk_t *template_mp = NULL; 2518 dl_unitdata_req_t *dlur; 2519 int sap_length; 2520 2521 ASSERT(ill->ill_isv6); 2522 2523 sap_length = ill->ill_sap_length; 2524 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2525 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2526 if (template_mp == NULL) 2527 return (NULL); 2528 2529 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2530 dlur->dl_priority.dl_min = 0; 2531 dlur->dl_priority.dl_max = 0; 2532 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2533 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2534 2535 /* Copy in the SAP value. */ 2536 NCE_LL_SAP_COPY(ill, template_mp); 2537 2538 return (template_mp); 2539 } 2540 2541 /* 2542 * NDP retransmit timer. 2543 * This timer goes off when: 2544 * a. It is time to retransmit NS for resolver. 2545 * b. It is time to send reachability probes. 2546 */ 2547 void 2548 ndp_timer(void *arg) 2549 { 2550 nce_t *nce = arg; 2551 ill_t *ill = nce->nce_ill; 2552 uint32_t ms; 2553 char addrbuf[INET6_ADDRSTRLEN]; 2554 mblk_t *mp; 2555 boolean_t dropped = B_FALSE; 2556 2557 /* 2558 * The timer has to be cancelled by ndp_delete before doing the final 2559 * refrele. So the NCE is guaranteed to exist when the timer runs 2560 * until it clears the timeout_id. Before clearing the timeout_id 2561 * bump up the refcnt so that we can continue to use the nce 2562 */ 2563 ASSERT(nce != NULL); 2564 2565 /* 2566 * Grab the ill_g_lock now itself to avoid lock order problems. 2567 * nce_solicit needs ill_g_lock to be able to traverse ills 2568 */ 2569 rw_enter(&ill_g_lock, RW_READER); 2570 mutex_enter(&nce->nce_lock); 2571 NCE_REFHOLD_LOCKED(nce); 2572 nce->nce_timeout_id = 0; 2573 2574 /* 2575 * Check the reachability state first. 2576 */ 2577 switch (nce->nce_state) { 2578 case ND_DELAY: 2579 rw_exit(&ill_g_lock); 2580 nce->nce_state = ND_PROBE; 2581 mutex_exit(&nce->nce_lock); 2582 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2583 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2584 if (ip_debug > 3) { 2585 /* ip2dbg */ 2586 pr_addr_dbg("ndp_timer: state for %s changed " 2587 "to PROBE\n", AF_INET6, &nce->nce_addr); 2588 } 2589 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2590 NCE_REFRELE(nce); 2591 return; 2592 case ND_PROBE: 2593 /* must be retransmit timer */ 2594 rw_exit(&ill_g_lock); 2595 nce->nce_pcnt--; 2596 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2597 nce->nce_pcnt >= -1); 2598 if (nce->nce_pcnt > 0) { 2599 /* 2600 * As per RFC2461, the nce gets deleted after 2601 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2602 * Note that the first unicast solicitation is sent 2603 * during the DELAY state. 2604 */ 2605 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2606 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2607 addrbuf, sizeof (addrbuf)))); 2608 mutex_exit(&nce->nce_lock); 2609 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, 2610 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, 2611 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2612 NDP_UNICAST); 2613 if (dropped) { 2614 mutex_enter(&nce->nce_lock); 2615 nce->nce_pcnt++; 2616 mutex_exit(&nce->nce_lock); 2617 } 2618 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2619 } else if (nce->nce_pcnt < 0) { 2620 /* No hope, delete the nce */ 2621 nce->nce_state = ND_UNREACHABLE; 2622 mutex_exit(&nce->nce_lock); 2623 if (ip_debug > 2) { 2624 /* ip1dbg */ 2625 pr_addr_dbg("ndp_timer: Delete IRE for" 2626 " dst %s\n", AF_INET6, &nce->nce_addr); 2627 } 2628 ndp_delete(nce); 2629 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2630 /* Wait RetransTimer, before deleting the entry */ 2631 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2632 nce->nce_pcnt, inet_ntop(AF_INET6, 2633 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2634 mutex_exit(&nce->nce_lock); 2635 /* Wait one interval before killing */ 2636 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2637 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2638 ipif_t *ipif; 2639 2640 /* 2641 * We're done probing, and we can now declare this 2642 * address to be usable. Let IP know that it's ok to 2643 * use. 2644 */ 2645 nce->nce_state = ND_REACHABLE; 2646 mutex_exit(&nce->nce_lock); 2647 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, 2648 ALL_ZONES, NULL, NULL, NULL, NULL); 2649 if (ipif != NULL) { 2650 if (ipif->ipif_was_dup) { 2651 char ibuf[LIFNAMSIZ + 10]; 2652 char sbuf[INET6_ADDRSTRLEN]; 2653 2654 ipif->ipif_was_dup = B_FALSE; 2655 (void) strlcpy(ibuf, ill->ill_name, 2656 sizeof (ibuf)); 2657 (void) inet_ntop(AF_INET6, 2658 &ipif->ipif_v6lcl_addr, 2659 sbuf, sizeof (sbuf)); 2660 if (ipif->ipif_id != 0) { 2661 (void) snprintf(ibuf + 2662 ill->ill_name_length - 1, 2663 sizeof (ibuf) - 2664 ill->ill_name_length + 1, 2665 ":%d", ipif->ipif_id); 2666 } 2667 cmn_err(CE_NOTE, "recovered address " 2668 "%s on %s", sbuf, ibuf); 2669 } 2670 if ((ipif->ipif_flags & IPIF_UP) && 2671 !ipif->ipif_addr_ready) { 2672 ip_rts_ifmsg(ipif); 2673 ip_rts_newaddrmsg(RTM_ADD, 0, ipif); 2674 sctp_update_ipif(ipif, SCTP_IPIF_UP); 2675 } 2676 ipif->ipif_addr_ready = 1; 2677 ipif_refrele(ipif); 2678 } 2679 /* Begin defending our new address */ 2680 nce->nce_unsolicit_count = 0; 2681 dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, 2682 B_FALSE, &nce->nce_addr, &ipv6_all_hosts_mcast, 2683 nce_advert_flags(nce)); 2684 if (dropped) { 2685 nce->nce_unsolicit_count = 1; 2686 NDP_RESTART_TIMER(nce, 2687 ip_ndp_unsolicit_interval); 2688 } else if (ip_ndp_defense_interval != 0) { 2689 NDP_RESTART_TIMER(nce, ip_ndp_defense_interval); 2690 } 2691 } else { 2692 /* 2693 * This is an address we're probing to be our own, but 2694 * the ill is down. Wait until it comes back before 2695 * doing anything, but switch to reachable state so 2696 * that the restart will work. 2697 */ 2698 nce->nce_state = ND_REACHABLE; 2699 mutex_exit(&nce->nce_lock); 2700 } 2701 NCE_REFRELE(nce); 2702 return; 2703 case ND_INCOMPLETE: 2704 /* 2705 * Must be resolvers retransmit timer. 2706 */ 2707 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2708 ip6i_t *ip6i; 2709 ip6_t *ip6h; 2710 mblk_t *data_mp; 2711 2712 /* 2713 * Walk the list of packets queued, and see if there 2714 * are any multipathing probe packets. Such packets 2715 * are always queued at the head. Since this is a 2716 * retransmit timer firing, mark such packets as 2717 * delayed in ND resolution. This info will be used 2718 * in ip_wput_v6(). Multipathing probe packets will 2719 * always have an ip6i_t. Once we hit a packet without 2720 * it, we can break out of this loop. 2721 */ 2722 if (mp->b_datap->db_type == M_CTL) 2723 data_mp = mp->b_cont; 2724 else 2725 data_mp = mp; 2726 2727 ip6h = (ip6_t *)data_mp->b_rptr; 2728 if (ip6h->ip6_nxt != IPPROTO_RAW) 2729 break; 2730 2731 /* 2732 * This message should have been pulled up already in 2733 * ip_wput_v6. We can't do pullups here because the 2734 * b_next/b_prev is non-NULL. 2735 */ 2736 ip6i = (ip6i_t *)ip6h; 2737 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2738 sizeof (ip6i_t) + IPV6_HDR_LEN); 2739 2740 /* Mark this packet as delayed due to ND resolution */ 2741 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2742 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2743 } 2744 if (nce->nce_qd_mp != NULL) { 2745 ms = nce_solicit(nce, NULL); 2746 rw_exit(&ill_g_lock); 2747 if (ms == 0) { 2748 if (nce->nce_state != ND_REACHABLE) { 2749 mutex_exit(&nce->nce_lock); 2750 nce_resolv_failed(nce); 2751 ndp_delete(nce); 2752 } else { 2753 mutex_exit(&nce->nce_lock); 2754 } 2755 } else { 2756 mutex_exit(&nce->nce_lock); 2757 NDP_RESTART_TIMER(nce, (clock_t)ms); 2758 } 2759 NCE_REFRELE(nce); 2760 return; 2761 } 2762 mutex_exit(&nce->nce_lock); 2763 rw_exit(&ill_g_lock); 2764 NCE_REFRELE(nce); 2765 break; 2766 case ND_REACHABLE : 2767 rw_exit(&ill_g_lock); 2768 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2769 nce->nce_unsolicit_count != 0) || 2770 ((nce->nce_flags & NCE_F_PERMANENT) && 2771 ip_ndp_defense_interval != 0)) { 2772 if (nce->nce_unsolicit_count > 0) 2773 nce->nce_unsolicit_count--; 2774 mutex_exit(&nce->nce_lock); 2775 dropped = nce_xmit(ill, 2776 ND_NEIGHBOR_ADVERT, 2777 ill, /* ill to be used for hw addr */ 2778 B_FALSE, /* use ill_phys_addr */ 2779 &nce->nce_addr, 2780 &ipv6_all_hosts_mcast, 2781 nce_advert_flags(nce)); 2782 if (dropped) { 2783 mutex_enter(&nce->nce_lock); 2784 nce->nce_unsolicit_count++; 2785 mutex_exit(&nce->nce_lock); 2786 } 2787 if (nce->nce_unsolicit_count != 0) { 2788 NDP_RESTART_TIMER(nce, 2789 ip_ndp_unsolicit_interval); 2790 } else { 2791 NDP_RESTART_TIMER(nce, 2792 ip_ndp_defense_interval); 2793 } 2794 } else { 2795 mutex_exit(&nce->nce_lock); 2796 } 2797 NCE_REFRELE(nce); 2798 break; 2799 default: 2800 rw_exit(&ill_g_lock); 2801 mutex_exit(&nce->nce_lock); 2802 NCE_REFRELE(nce); 2803 break; 2804 } 2805 } 2806 2807 /* 2808 * Set a link layer address from the ll_addr passed in. 2809 * Copy SAP from ill. 2810 */ 2811 static void 2812 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2813 { 2814 ill_t *ill = nce->nce_ill; 2815 uchar_t *woffset; 2816 2817 ASSERT(ll_addr != NULL); 2818 /* Always called before fast_path_probe */ 2819 ASSERT(nce->nce_fp_mp == NULL); 2820 if (ill->ill_sap_length != 0) { 2821 /* 2822 * Copy the SAP type specified in the 2823 * request into the xmit template. 2824 */ 2825 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2826 } 2827 if (ill->ill_phys_addr_length > 0) { 2828 /* 2829 * The bcopy() below used to be called for the physical address 2830 * length rather than the link layer address length. For 2831 * ethernet and many other media, the phys_addr and lla are 2832 * identical. 2833 * However, with xresolv interfaces being introduced, the 2834 * phys_addr and lla are no longer the same, and the physical 2835 * address may not have any useful meaning, so we use the lla 2836 * for IPv6 address resolution and destination addressing. 2837 * 2838 * For PPP or other interfaces with a zero length 2839 * physical address, don't do anything here. 2840 * The bcopy() with a zero phys_addr length was previously 2841 * a no-op for interfaces with a zero-length physical address. 2842 * Using the lla for them would change the way they operate. 2843 * Doing nothing in such cases preserves expected behavior. 2844 */ 2845 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2846 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2847 } 2848 } 2849 2850 static boolean_t 2851 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2852 { 2853 ill_t *ill = nce->nce_ill; 2854 uchar_t *ll_offset; 2855 2856 ASSERT(nce->nce_res_mp != NULL); 2857 if (ll_addr == NULL) 2858 return (B_FALSE); 2859 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2860 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2861 return (B_TRUE); 2862 return (B_FALSE); 2863 } 2864 2865 /* 2866 * Updates the link layer address or the reachability state of 2867 * a cache entry. Reset probe counter if needed. 2868 */ 2869 static void 2870 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2871 { 2872 ill_t *ill = nce->nce_ill; 2873 boolean_t need_stop_timer = B_FALSE; 2874 boolean_t need_fastpath_update = B_FALSE; 2875 2876 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2877 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2878 /* 2879 * If this interface does not do NUD, there is no point 2880 * in allowing an update to the cache entry. Although 2881 * we will respond to NS. 2882 * The only time we accept an update for a resolver when 2883 * NUD is turned off is when it has just been created. 2884 * Non-Resolvers will always be created as REACHABLE. 2885 */ 2886 if (new_state != ND_UNCHANGED) { 2887 if ((nce->nce_flags & NCE_F_NONUD) && 2888 (nce->nce_state != ND_INCOMPLETE)) 2889 return; 2890 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2891 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2892 need_stop_timer = B_TRUE; 2893 if (new_state == ND_REACHABLE) 2894 nce->nce_last = TICK_TO_MSEC(lbolt64); 2895 else { 2896 /* We force NUD in this case */ 2897 nce->nce_last = 0; 2898 } 2899 nce->nce_state = new_state; 2900 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2901 } 2902 /* 2903 * In case of fast path we need to free the the fastpath 2904 * M_DATA and do another probe. Otherwise we can just 2905 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2906 * whatever packets that happens to be transmitting at the time. 2907 */ 2908 if (new_ll_addr != NULL) { 2909 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2910 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2911 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2912 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2913 if (nce->nce_fp_mp != NULL) { 2914 freemsg(nce->nce_fp_mp); 2915 nce->nce_fp_mp = NULL; 2916 } 2917 need_fastpath_update = B_TRUE; 2918 } 2919 mutex_exit(&nce->nce_lock); 2920 if (need_stop_timer) { 2921 (void) untimeout(nce->nce_timeout_id); 2922 nce->nce_timeout_id = 0; 2923 } 2924 if (need_fastpath_update) 2925 nce_fastpath(nce); 2926 mutex_enter(&nce->nce_lock); 2927 } 2928 2929 void 2930 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2931 { 2932 uint_t count = 0; 2933 mblk_t **mpp; 2934 2935 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2936 2937 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2938 mpp = &(*mpp)->b_next) { 2939 if (++count > 2940 nce->nce_ill->ill_max_buf) { 2941 mblk_t *tmp = nce->nce_qd_mp->b_next; 2942 2943 nce->nce_qd_mp->b_next = NULL; 2944 nce->nce_qd_mp->b_prev = NULL; 2945 freemsg(nce->nce_qd_mp); 2946 nce->nce_qd_mp = tmp; 2947 } 2948 } 2949 /* put this on the list */ 2950 if (head_insert) { 2951 mp->b_next = nce->nce_qd_mp; 2952 nce->nce_qd_mp = mp; 2953 } else { 2954 *mpp = mp; 2955 } 2956 } 2957 2958 static void 2959 nce_queue_mp(nce_t *nce, mblk_t *mp) 2960 { 2961 boolean_t head_insert = B_FALSE; 2962 ip6_t *ip6h; 2963 ip6i_t *ip6i; 2964 mblk_t *data_mp; 2965 2966 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2967 2968 if (mp->b_datap->db_type == M_CTL) 2969 data_mp = mp->b_cont; 2970 else 2971 data_mp = mp; 2972 ip6h = (ip6_t *)data_mp->b_rptr; 2973 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2974 /* 2975 * This message should have been pulled up already in 2976 * ip_wput_v6. We can't do pullups here because the message 2977 * could be from the nce_qd_mp which could have b_next/b_prev 2978 * non-NULL. 2979 */ 2980 ip6i = (ip6i_t *)ip6h; 2981 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2982 sizeof (ip6i_t) + IPV6_HDR_LEN); 2983 /* 2984 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 2985 * This has 2 aspects mentioned below. 2986 * 1. Perform head insertion in the nce_qd_mp for these packets. 2987 * This ensures that next retransmit of ND solicitation 2988 * will use the interface specified by the probe packet, 2989 * for both NS and NA. This corresponds to the src address 2990 * in the IPv6 packet. If we insert at tail, we will be 2991 * depending on the packet at the head for successful 2992 * ND resolution. This is not reliable, because the interface 2993 * on which the NA arrives could be different from the interface 2994 * on which the NS was sent, and if the receiving interface is 2995 * failed, it will appear that the sending interface is also 2996 * failed, causing in.mpathd to misdiagnose this as link 2997 * failure. 2998 * 2. Drop the original packet, if the ND resolution did not 2999 * succeed in the first attempt. However we will create the 3000 * nce and the ire, as soon as the ND resolution succeeds. 3001 * We don't gain anything by queueing multiple probe packets 3002 * and sending them back-to-back once resolution succeeds. 3003 * It is sufficient to send just 1 packet after ND resolution 3004 * succeeds. Since mpathd is sending down probe packets at a 3005 * constant rate, we don't need to send the queued packet. We 3006 * need to queue it only for NDP resolution. The benefit of 3007 * dropping the probe packets that were delayed in ND 3008 * resolution, is that in.mpathd will not see inflated 3009 * RTT. If the ND resolution does not succeed within 3010 * in.mpathd's failure detection time, mpathd may detect 3011 * a failure, and it does not matter whether the packet 3012 * was queued or dropped. 3013 */ 3014 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 3015 head_insert = B_TRUE; 3016 } 3017 3018 nce_queue_mp_common(nce, mp, head_insert); 3019 } 3020 3021 /* 3022 * Called when address resolution failed due to a timeout. 3023 * Send an ICMP unreachable in response to all queued packets. 3024 */ 3025 void 3026 nce_resolv_failed(nce_t *nce) 3027 { 3028 mblk_t *mp, *nxt_mp, *first_mp; 3029 char buf[INET6_ADDRSTRLEN]; 3030 ip6_t *ip6h; 3031 zoneid_t zoneid = GLOBAL_ZONEID; 3032 3033 ip1dbg(("nce_resolv_failed: dst %s\n", 3034 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 3035 mutex_enter(&nce->nce_lock); 3036 mp = nce->nce_qd_mp; 3037 nce->nce_qd_mp = NULL; 3038 mutex_exit(&nce->nce_lock); 3039 while (mp != NULL) { 3040 nxt_mp = mp->b_next; 3041 mp->b_next = NULL; 3042 mp->b_prev = NULL; 3043 3044 first_mp = mp; 3045 if (mp->b_datap->db_type == M_CTL) { 3046 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 3047 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3048 zoneid = io->ipsec_out_zoneid; 3049 ASSERT(zoneid != ALL_ZONES); 3050 mp = mp->b_cont; 3051 } 3052 3053 ip6h = (ip6_t *)mp->b_rptr; 3054 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3055 ip6i_t *ip6i; 3056 /* 3057 * This message should have been pulled up already 3058 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 3059 * the header is pulled up. 3060 */ 3061 ip6i = (ip6i_t *)ip6h; 3062 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 3063 sizeof (ip6i_t) + IPV6_HDR_LEN); 3064 mp->b_rptr += sizeof (ip6i_t); 3065 } 3066 /* 3067 * Ignore failure since icmp_unreachable_v6 will silently 3068 * drop packets with an unspecified source address. 3069 */ 3070 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid); 3071 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 3072 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid); 3073 mp = nxt_mp; 3074 } 3075 } 3076 3077 /* 3078 * Called by SIOCSNDP* ioctl to add/change an nce entry 3079 * and the corresponding attributes. 3080 * Disallow states other than ND_REACHABLE or ND_STALE. 3081 */ 3082 int 3083 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 3084 { 3085 sin6_t *sin6; 3086 in6_addr_t *addr; 3087 nce_t *nce; 3088 int err; 3089 uint16_t new_flags = 0; 3090 uint16_t old_flags = 0; 3091 int inflags = lnr->lnr_flags; 3092 3093 ASSERT(ill->ill_isv6); 3094 if ((lnr->lnr_state_create != ND_REACHABLE) && 3095 (lnr->lnr_state_create != ND_STALE)) 3096 return (EINVAL); 3097 3098 sin6 = (sin6_t *)&lnr->lnr_addr; 3099 addr = &sin6->sin6_addr; 3100 3101 mutex_enter(&ndp6.ndp_g_lock); 3102 /* We know it can not be mapping so just look in the hash table */ 3103 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); 3104 nce = nce_lookup_addr(ill, addr, nce); 3105 if (nce != NULL) 3106 new_flags = nce->nce_flags; 3107 3108 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 3109 case NDF_ISROUTER_ON: 3110 new_flags |= NCE_F_ISROUTER; 3111 break; 3112 case NDF_ISROUTER_OFF: 3113 new_flags &= ~NCE_F_ISROUTER; 3114 break; 3115 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3116 mutex_exit(&ndp6.ndp_g_lock); 3117 if (nce != NULL) 3118 NCE_REFRELE(nce); 3119 return (EINVAL); 3120 } 3121 3122 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3123 case NDF_ANYCAST_ON: 3124 new_flags |= NCE_F_ANYCAST; 3125 break; 3126 case NDF_ANYCAST_OFF: 3127 new_flags &= ~NCE_F_ANYCAST; 3128 break; 3129 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3130 mutex_exit(&ndp6.ndp_g_lock); 3131 if (nce != NULL) 3132 NCE_REFRELE(nce); 3133 return (EINVAL); 3134 } 3135 3136 switch (inflags & (NDF_PROXY_ON|NDF_PROXY_OFF)) { 3137 case NDF_PROXY_ON: 3138 new_flags |= NCE_F_PROXY; 3139 break; 3140 case NDF_PROXY_OFF: 3141 new_flags &= ~NCE_F_PROXY; 3142 break; 3143 case (NDF_PROXY_OFF|NDF_PROXY_ON): 3144 mutex_exit(&ndp6.ndp_g_lock); 3145 if (nce != NULL) 3146 NCE_REFRELE(nce); 3147 return (EINVAL); 3148 } 3149 3150 if (nce == NULL) { 3151 err = ndp_add(ill, 3152 (uchar_t *)lnr->lnr_hdw_addr, 3153 addr, 3154 &ipv6_all_ones, 3155 &ipv6_all_zeros, 3156 0, 3157 new_flags, 3158 lnr->lnr_state_create, 3159 &nce, 3160 NULL, 3161 NULL); 3162 if (err != 0) { 3163 mutex_exit(&ndp6.ndp_g_lock); 3164 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3165 return (err); 3166 } 3167 } 3168 old_flags = nce->nce_flags; 3169 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3170 /* 3171 * Router turned to host, delete all ires. 3172 * XXX Just delete the entry, but we need to add too. 3173 */ 3174 nce->nce_flags &= ~NCE_F_ISROUTER; 3175 mutex_exit(&ndp6.ndp_g_lock); 3176 ndp_delete(nce); 3177 NCE_REFRELE(nce); 3178 return (0); 3179 } 3180 mutex_exit(&ndp6.ndp_g_lock); 3181 3182 mutex_enter(&nce->nce_lock); 3183 nce->nce_flags = new_flags; 3184 mutex_exit(&nce->nce_lock); 3185 /* 3186 * Note that we ignore the state at this point, which 3187 * should be either STALE or REACHABLE. Instead we let 3188 * the link layer address passed in to determine the state 3189 * much like incoming packets. 3190 */ 3191 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3192 NCE_REFRELE(nce); 3193 return (0); 3194 } 3195 3196 /* 3197 * If the device driver supports it, we make nce_fp_mp to have 3198 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3199 * The caller insures there is hold on nce for this function. 3200 * Note that since ill_fastpath_probe() copies the mblk there is 3201 * no need for the hold beyond this function. 3202 */ 3203 static void 3204 nce_fastpath(nce_t *nce) 3205 { 3206 ill_t *ill = nce->nce_ill; 3207 int res; 3208 3209 ASSERT(ill != NULL); 3210 if (nce->nce_fp_mp != NULL) { 3211 /* Already contains fastpath info */ 3212 return; 3213 } 3214 if (nce->nce_res_mp != NULL) { 3215 nce_fastpath_list_add(nce); 3216 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3217 /* 3218 * EAGAIN is an indication of a transient error 3219 * i.e. allocation failure etc. leave the nce in the list it 3220 * will be updated when another probe happens for another ire 3221 * if not it will be taken out of the list when the ire is 3222 * deleted. 3223 */ 3224 3225 if (res != 0 && res != EAGAIN) 3226 nce_fastpath_list_delete(nce); 3227 } 3228 } 3229 3230 /* 3231 * Drain the list of nce's waiting for fastpath response. 3232 */ 3233 void 3234 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3235 void *arg) 3236 { 3237 3238 nce_t *next_nce; 3239 nce_t *current_nce; 3240 nce_t *first_nce; 3241 nce_t *prev_nce = NULL; 3242 3243 ASSERT(ill != NULL && ill->ill_isv6); 3244 3245 mutex_enter(&ill->ill_lock); 3246 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3247 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3248 next_nce = current_nce->nce_fastpath; 3249 /* 3250 * Take it off the list if we're flushing, or if the callback 3251 * routine tells us to do so. Otherwise, leave the nce in the 3252 * fastpath list to handle any pending response from the lower 3253 * layer. We can't drain the list when the callback routine 3254 * comparison failed, because the response is asynchronous in 3255 * nature, and may not arrive in the same order as the list 3256 * insertion. 3257 */ 3258 if (func == NULL || func(current_nce, arg)) { 3259 current_nce->nce_fastpath = NULL; 3260 if (current_nce == first_nce) 3261 ill->ill_fastpath_list = first_nce = next_nce; 3262 else 3263 prev_nce->nce_fastpath = next_nce; 3264 } else { 3265 /* previous element that is still in the list */ 3266 prev_nce = current_nce; 3267 } 3268 current_nce = next_nce; 3269 } 3270 mutex_exit(&ill->ill_lock); 3271 } 3272 3273 /* 3274 * Add nce to the nce fastpath list. 3275 */ 3276 void 3277 nce_fastpath_list_add(nce_t *nce) 3278 { 3279 ill_t *ill; 3280 3281 ill = nce->nce_ill; 3282 ASSERT(ill != NULL && ill->ill_isv6); 3283 3284 mutex_enter(&ill->ill_lock); 3285 mutex_enter(&nce->nce_lock); 3286 3287 /* 3288 * if nce has not been deleted and 3289 * is not already in the list add it. 3290 */ 3291 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3292 (nce->nce_fastpath == NULL)) { 3293 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3294 ill->ill_fastpath_list = nce; 3295 } 3296 3297 mutex_exit(&nce->nce_lock); 3298 mutex_exit(&ill->ill_lock); 3299 } 3300 3301 /* 3302 * remove nce from the nce fastpath list. 3303 */ 3304 void 3305 nce_fastpath_list_delete(nce_t *nce) 3306 { 3307 nce_t *nce_ptr; 3308 3309 ill_t *ill; 3310 3311 ill = nce->nce_ill; 3312 ASSERT(ill != NULL); 3313 if (!ill->ill_isv6) { 3314 /* 3315 * v4 nce_t's do not have nce_fastpath set. 3316 */ 3317 return; 3318 } 3319 3320 mutex_enter(&ill->ill_lock); 3321 if (nce->nce_fastpath == NULL) 3322 goto done; 3323 3324 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3325 3326 if (ill->ill_fastpath_list == nce) { 3327 ill->ill_fastpath_list = nce->nce_fastpath; 3328 } else { 3329 nce_ptr = ill->ill_fastpath_list; 3330 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3331 if (nce_ptr->nce_fastpath == nce) { 3332 nce_ptr->nce_fastpath = nce->nce_fastpath; 3333 break; 3334 } 3335 nce_ptr = nce_ptr->nce_fastpath; 3336 } 3337 } 3338 3339 nce->nce_fastpath = NULL; 3340 done: 3341 mutex_exit(&ill->ill_lock); 3342 } 3343 3344 /* 3345 * Update all NCE's that are not in fastpath mode and 3346 * have an nce_fp_mp that matches mp. mp->b_cont contains 3347 * the fastpath header. 3348 * 3349 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3350 */ 3351 boolean_t 3352 ndp_fastpath_update(nce_t *nce, void *arg) 3353 { 3354 mblk_t *mp, *fp_mp; 3355 uchar_t *mp_rptr, *ud_mp_rptr; 3356 mblk_t *ud_mp = nce->nce_res_mp; 3357 ptrdiff_t cmplen; 3358 3359 if (nce->nce_flags & NCE_F_MAPPING) 3360 return (B_TRUE); 3361 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3362 return (B_TRUE); 3363 3364 ip2dbg(("ndp_fastpath_update: trying\n")); 3365 mp = (mblk_t *)arg; 3366 mp_rptr = mp->b_rptr; 3367 cmplen = mp->b_wptr - mp_rptr; 3368 ASSERT(cmplen >= 0); 3369 ud_mp_rptr = ud_mp->b_rptr; 3370 /* 3371 * The nce is locked here to prevent any other threads 3372 * from accessing and changing nce_res_mp when the IPv6 address 3373 * becomes resolved to an lla while we're in the middle 3374 * of looking at and comparing the hardware address (lla). 3375 * It is also locked to prevent multiple threads in nce_fastpath_update 3376 * from examining nce_res_mp atthe same time. 3377 */ 3378 mutex_enter(&nce->nce_lock); 3379 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3380 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3381 mutex_exit(&nce->nce_lock); 3382 /* 3383 * Don't take the ire off the fastpath list yet, 3384 * since the response may come later. 3385 */ 3386 return (B_FALSE); 3387 } 3388 /* Matched - install mp as the fastpath mp */ 3389 ip1dbg(("ndp_fastpath_update: match\n")); 3390 fp_mp = dupb(mp->b_cont); 3391 if (fp_mp != NULL) { 3392 nce->nce_fp_mp = fp_mp; 3393 } 3394 mutex_exit(&nce->nce_lock); 3395 return (B_TRUE); 3396 } 3397 3398 /* 3399 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3400 * driver. Note that it assumes IP is exclusive... 3401 */ 3402 /* ARGSUSED */ 3403 void 3404 ndp_fastpath_flush(nce_t *nce, char *arg) 3405 { 3406 if (nce->nce_flags & NCE_F_MAPPING) 3407 return; 3408 /* No fastpath info? */ 3409 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3410 return; 3411 3412 /* Just delete the NCE... */ 3413 ndp_delete(nce); 3414 } 3415 3416 /* 3417 * Return a pointer to a given option in the packet. 3418 * Assumes that option part of the packet have already been validated. 3419 */ 3420 nd_opt_hdr_t * 3421 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3422 { 3423 while (optlen > 0) { 3424 if (opt->nd_opt_type == opt_type) 3425 return (opt); 3426 optlen -= 8 * opt->nd_opt_len; 3427 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3428 } 3429 return (NULL); 3430 } 3431 3432 /* 3433 * Verify all option lengths present are > 0, also check to see 3434 * if the option lengths and packet length are consistent. 3435 */ 3436 boolean_t 3437 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3438 { 3439 ASSERT(opt != NULL); 3440 while (optlen > 0) { 3441 if (opt->nd_opt_len == 0) 3442 return (B_FALSE); 3443 optlen -= 8 * opt->nd_opt_len; 3444 if (optlen < 0) 3445 return (B_FALSE); 3446 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3447 } 3448 return (B_TRUE); 3449 } 3450 3451 /* 3452 * ndp_walk function. 3453 * Free a fraction of the NCE cache entries. 3454 * A fraction of zero means to not free any in that category. 3455 */ 3456 void 3457 ndp_cache_reclaim(nce_t *nce, char *arg) 3458 { 3459 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3460 uint_t rand; 3461 3462 if (nce->nce_flags & NCE_F_PERMANENT) 3463 return; 3464 3465 rand = (uint_t)lbolt + 3466 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3467 if (ncr->ncr_host != 0 && 3468 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3469 ndp_delete(nce); 3470 return; 3471 } 3472 } 3473 3474 /* 3475 * ndp_walk function. 3476 * Count the number of NCEs that can be deleted. 3477 * These would be hosts but not routers. 3478 */ 3479 void 3480 ndp_cache_count(nce_t *nce, char *arg) 3481 { 3482 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3483 3484 if (nce->nce_flags & NCE_F_PERMANENT) 3485 return; 3486 3487 ncc->ncc_total++; 3488 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3489 ncc->ncc_host++; 3490 } 3491 3492 #ifdef NCE_DEBUG 3493 th_trace_t * 3494 th_trace_nce_lookup(nce_t *nce) 3495 { 3496 int bucket_id; 3497 th_trace_t *th_trace; 3498 3499 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3500 3501 bucket_id = IP_TR_HASH(curthread); 3502 ASSERT(bucket_id < IP_TR_HASH_MAX); 3503 3504 for (th_trace = nce->nce_trace[bucket_id]; th_trace != NULL; 3505 th_trace = th_trace->th_next) { 3506 if (th_trace->th_id == curthread) 3507 return (th_trace); 3508 } 3509 return (NULL); 3510 } 3511 3512 void 3513 nce_trace_ref(nce_t *nce) 3514 { 3515 int bucket_id; 3516 th_trace_t *th_trace; 3517 3518 /* 3519 * Attempt to locate the trace buffer for the curthread. 3520 * If it does not exist, then allocate a new trace buffer 3521 * and link it in list of trace bufs for this ipif, at the head 3522 */ 3523 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3524 3525 if (nce->nce_trace_disable == B_TRUE) 3526 return; 3527 3528 th_trace = th_trace_nce_lookup(nce); 3529 if (th_trace == NULL) { 3530 bucket_id = IP_TR_HASH(curthread); 3531 th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), 3532 KM_NOSLEEP); 3533 if (th_trace == NULL) { 3534 nce->nce_trace_disable = B_TRUE; 3535 nce_trace_inactive(nce); 3536 return; 3537 } 3538 th_trace->th_id = curthread; 3539 th_trace->th_next = nce->nce_trace[bucket_id]; 3540 th_trace->th_prev = &nce->nce_trace[bucket_id]; 3541 if (th_trace->th_next != NULL) 3542 th_trace->th_next->th_prev = &th_trace->th_next; 3543 nce->nce_trace[bucket_id] = th_trace; 3544 } 3545 ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1); 3546 th_trace->th_refcnt++; 3547 th_trace_rrecord(th_trace); 3548 } 3549 3550 void 3551 nce_untrace_ref(nce_t *nce) 3552 { 3553 th_trace_t *th_trace; 3554 3555 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3556 3557 if (nce->nce_trace_disable == B_TRUE) 3558 return; 3559 3560 th_trace = th_trace_nce_lookup(nce); 3561 ASSERT(th_trace != NULL && th_trace->th_refcnt > 0); 3562 3563 th_trace_rrecord(th_trace); 3564 th_trace->th_refcnt--; 3565 } 3566 3567 void 3568 nce_trace_inactive(nce_t *nce) 3569 { 3570 th_trace_t *th_trace; 3571 int i; 3572 3573 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3574 3575 for (i = 0; i < IP_TR_HASH_MAX; i++) { 3576 while (nce->nce_trace[i] != NULL) { 3577 th_trace = nce->nce_trace[i]; 3578 3579 /* unlink th_trace and free it */ 3580 nce->nce_trace[i] = th_trace->th_next; 3581 if (th_trace->th_next != NULL) 3582 th_trace->th_next->th_prev = 3583 &nce->nce_trace[i]; 3584 3585 th_trace->th_next = NULL; 3586 th_trace->th_prev = NULL; 3587 kmem_free(th_trace, sizeof (th_trace_t)); 3588 } 3589 } 3590 3591 } 3592 3593 /* ARGSUSED */ 3594 int 3595 nce_thread_exit(nce_t *nce, caddr_t arg) 3596 { 3597 th_trace_t *th_trace; 3598 3599 mutex_enter(&nce->nce_lock); 3600 th_trace = th_trace_nce_lookup(nce); 3601 3602 if (th_trace == NULL) { 3603 mutex_exit(&nce->nce_lock); 3604 return (0); 3605 } 3606 3607 ASSERT(th_trace->th_refcnt == 0); 3608 3609 /* unlink th_trace and free it */ 3610 *th_trace->th_prev = th_trace->th_next; 3611 if (th_trace->th_next != NULL) 3612 th_trace->th_next->th_prev = th_trace->th_prev; 3613 th_trace->th_next = NULL; 3614 th_trace->th_prev = NULL; 3615 kmem_free(th_trace, sizeof (th_trace_t)); 3616 mutex_exit(&nce->nce_lock); 3617 return (0); 3618 } 3619 #endif 3620 3621 /* 3622 * Called when address resolution fails due to a timeout. 3623 * Send an ICMP unreachable in response to all queued packets. 3624 */ 3625 void 3626 arp_resolv_failed(nce_t *nce) 3627 { 3628 mblk_t *mp, *nxt_mp, *first_mp; 3629 char buf[INET6_ADDRSTRLEN]; 3630 zoneid_t zoneid = GLOBAL_ZONEID; 3631 struct in_addr ipv4addr; 3632 3633 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3634 ip3dbg(("arp_resolv_failed: dst %s\n", 3635 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3636 mutex_enter(&nce->nce_lock); 3637 mp = nce->nce_qd_mp; 3638 nce->nce_qd_mp = NULL; 3639 mutex_exit(&nce->nce_lock); 3640 3641 while (mp != NULL) { 3642 nxt_mp = mp->b_next; 3643 mp->b_next = NULL; 3644 mp->b_prev = NULL; 3645 3646 first_mp = mp; 3647 /* 3648 * Send icmp unreachable messages 3649 * to the hosts. 3650 */ 3651 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid); 3652 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3653 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3654 ICMP_HOST_UNREACHABLE, zoneid); 3655 mp = nxt_mp; 3656 } 3657 } 3658 3659 static int 3660 ndp_lookup_then_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, 3661 const in_addr_t *mask, const in_addr_t *extract_mask, 3662 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 3663 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 3664 { 3665 int err = 0; 3666 nce_t *nce; 3667 in6_addr_t addr6; 3668 3669 mutex_enter(&ndp4.ndp_g_lock); 3670 nce = *((nce_t **)NCE_HASH_PTR_V4(*addr)); 3671 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3672 nce = nce_lookup_addr(ill, &addr6, nce); 3673 if (nce == NULL) { 3674 err = ndp_add_v4(ill, 3675 hw_addr, 3676 addr, 3677 mask, 3678 extract_mask, 3679 hw_extract_start, 3680 flags, 3681 state, 3682 newnce, 3683 fp_mp, 3684 res_mp); 3685 } else { 3686 *newnce = nce; 3687 err = EEXIST; 3688 } 3689 mutex_exit(&ndp4.ndp_g_lock); 3690 return (err); 3691 } 3692 3693 /* 3694 * NDP Cache Entry creation routine for IPv4. 3695 * Mapped entries are handled in arp. 3696 * This routine must always be called with ndp4.ndp_g_lock held. 3697 * Prior to return, nce_refcnt is incremented. 3698 */ 3699 static int 3700 ndp_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, 3701 const in_addr_t *mask, const in_addr_t *extract_mask, 3702 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 3703 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 3704 { 3705 static nce_t nce_nil; 3706 nce_t *nce; 3707 mblk_t *mp; 3708 mblk_t *template; 3709 nce_t **ncep; 3710 3711 ASSERT(MUTEX_HELD(&ndp4.ndp_g_lock)); 3712 ASSERT(ill != NULL); 3713 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 3714 return (EINVAL); 3715 } 3716 ASSERT((flags & NCE_F_MAPPING) == 0); 3717 ASSERT(extract_mask == NULL); 3718 /* 3719 * Allocate the mblk to hold the nce. 3720 */ 3721 mp = allocb(sizeof (nce_t), BPRI_MED); 3722 if (mp == NULL) 3723 return (ENOMEM); 3724 3725 nce = (nce_t *)mp->b_rptr; 3726 mp->b_wptr = (uchar_t *)&nce[1]; 3727 *nce = nce_nil; 3728 3729 /* 3730 * This one holds link layer address; if res_mp has been provided 3731 * by the caller, accept it without any further checks. Otherwise, 3732 * for V4, we fill it up with ill_resolver_mp here, then in 3733 * in ire_arpresolve(), we fill it up with the ARP query 3734 * once its formulated. 3735 */ 3736 if (res_mp != NULL) { 3737 template = res_mp; 3738 } else { 3739 template = copyb(ill->ill_resolver_mp); 3740 } 3741 if (template == NULL) { 3742 freeb(mp); 3743 return (ENOMEM); 3744 } 3745 nce->nce_ill = ill; 3746 nce->nce_ipversion = IPV4_VERSION; 3747 nce->nce_flags = flags; 3748 nce->nce_state = state; 3749 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3750 nce->nce_rcnt = ill->ill_xmit_count; 3751 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3752 if (*mask == IP_HOST_MASK) { 3753 nce->nce_mask = ipv6_all_ones; 3754 } else { 3755 IN6_IPADDR_TO_V4MAPPED(*mask, &nce->nce_mask); 3756 } 3757 nce->nce_extract_mask = ipv6_all_zeros; 3758 nce->nce_ll_extract_start = hw_extract_start; 3759 nce->nce_fp_mp = (fp_mp? fp_mp : NULL); 3760 nce->nce_res_mp = template; 3761 if (state == ND_REACHABLE) 3762 nce->nce_last = TICK_TO_MSEC(lbolt64); 3763 else 3764 nce->nce_last = 0; 3765 nce->nce_qd_mp = NULL; 3766 nce->nce_mp = mp; 3767 if (hw_addr != NULL) 3768 nce_set_ll(nce, hw_addr); 3769 /* This one is for nce getting created */ 3770 nce->nce_refcnt = 1; 3771 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3772 ncep = ((nce_t **)NCE_HASH_PTR_V4(*addr)); 3773 3774 #ifdef NCE_DEBUG 3775 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 3776 #endif 3777 /* 3778 * Atomically ensure that the ill is not CONDEMNED, before 3779 * adding the NCE. 3780 */ 3781 mutex_enter(&ill->ill_lock); 3782 if (ill->ill_state_flags & ILL_CONDEMNED) { 3783 mutex_exit(&ill->ill_lock); 3784 freeb(mp); 3785 if (res_mp == NULL) { 3786 /* 3787 * template was locally allocated. need to free it. 3788 */ 3789 freeb(template); 3790 } 3791 return (EINVAL); 3792 } 3793 if ((nce->nce_next = *ncep) != NULL) 3794 nce->nce_next->nce_ptpn = &nce->nce_next; 3795 *ncep = nce; 3796 nce->nce_ptpn = ncep; 3797 *newnce = nce; 3798 /* This one is for nce being used by an active thread */ 3799 NCE_REFHOLD(*newnce); 3800 3801 /* Bump up the number of nce's referencing this ill */ 3802 ill->ill_nce_cnt++; 3803 mutex_exit(&ill->ill_lock); 3804 return (0); 3805 } 3806 3807 void 3808 ndp_flush_qd_mp(nce_t *nce) 3809 { 3810 mblk_t *qd_mp, *qd_next; 3811 3812 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3813 qd_mp = nce->nce_qd_mp; 3814 nce->nce_qd_mp = NULL; 3815 while (qd_mp != NULL) { 3816 qd_next = qd_mp->b_next; 3817 qd_mp->b_next = NULL; 3818 qd_mp->b_prev = NULL; 3819 freemsg(qd_mp); 3820 qd_mp = qd_next; 3821 } 3822 } 3823 3824 nce_t * 3825 nce_reinit(nce_t *nce) 3826 { 3827 nce_t *newnce = NULL; 3828 in_addr_t nce_addr, nce_mask; 3829 3830 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3831 IN6_V4MAPPED_TO_IPADDR(&nce->nce_mask, nce_mask); 3832 /* 3833 * delete the old one. this will get rid of any ire's pointing 3834 * at this nce. 3835 */ 3836 ndp_delete(nce); 3837 /* 3838 * create a new nce with the same addr and mask. 3839 */ 3840 mutex_enter(&ndp4.ndp_g_lock); 3841 (void) ndp_add_v4(nce->nce_ill, NULL, &nce_addr, &nce_mask, NULL, 0, 0, 3842 ND_INITIAL, &newnce, NULL, NULL); 3843 mutex_exit(&ndp4.ndp_g_lock); 3844 /* 3845 * refrele the old nce. 3846 */ 3847 NCE_REFRELE(nce); 3848 return (newnce); 3849 } 3850 3851 /* 3852 * ndp_walk routine to delete all entries that have a given destination or 3853 * gateway address and cached link layer (MAC) address. This is used when ARP 3854 * informs us that a network-to-link-layer mapping may have changed. 3855 */ 3856 void 3857 nce_delete_hw_changed(nce_t *nce, void *arg) 3858 { 3859 nce_hw_map_t *hwm = arg; 3860 mblk_t *mp; 3861 dl_unitdata_req_t *dlu; 3862 uchar_t *macaddr; 3863 ill_t *ill; 3864 int saplen; 3865 ipaddr_t nce_addr; 3866 3867 if (nce->nce_state != ND_REACHABLE) 3868 return; 3869 3870 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3871 if (nce_addr != hwm->hwm_addr) 3872 return; 3873 3874 mutex_enter(&nce->nce_lock); 3875 if ((mp = nce->nce_res_mp) == NULL) { 3876 mutex_exit(&nce->nce_lock); 3877 return; 3878 } 3879 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3880 macaddr = (uchar_t *)(dlu + 1); 3881 ill = nce->nce_ill; 3882 if ((saplen = ill->ill_sap_length) > 0) 3883 macaddr += saplen; 3884 else 3885 saplen = -saplen; 3886 3887 /* 3888 * If the hardware address is unchanged, then leave this one alone. 3889 * Note that saplen == abs(saplen) now. 3890 */ 3891 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 3892 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 3893 mutex_exit(&nce->nce_lock); 3894 return; 3895 } 3896 mutex_exit(&nce->nce_lock); 3897 3898 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 3899 ndp_delete(nce); 3900 } 3901 3902 /* 3903 * This function verifies whether a given IPv4 address is potentially known to 3904 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 3905 * so that it can continue to look for hardware changes on that address. 3906 */ 3907 boolean_t 3908 ndp_lookup_ipaddr(in_addr_t addr) 3909 { 3910 nce_t *nce; 3911 struct in_addr nceaddr; 3912 3913 if (addr == INADDR_ANY) 3914 return (B_FALSE); 3915 3916 mutex_enter(&ndp4.ndp_g_lock); 3917 nce = *(nce_t **)NCE_HASH_PTR_V4(addr); 3918 for (; nce != NULL; nce = nce->nce_next) { 3919 /* Note that only v4 mapped entries are in the table. */ 3920 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 3921 if (addr == nceaddr.s_addr && 3922 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 3923 /* Single flag check; no lock needed */ 3924 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 3925 break; 3926 } 3927 } 3928 mutex_exit(&ndp4.ndp_g_lock); 3929 return (nce != NULL); 3930 } 3931