1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/sysmacros.h> 33 #include <sys/errno.h> 34 #include <sys/dlpi.h> 35 #include <sys/socket.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/vtrace.h> 41 #include <sys/kmem.h> 42 #include <sys/zone.h> 43 #include <sys/ethernet.h> 44 #include <sys/sdt.h> 45 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/if_dl.h> 49 #include <net/route.h> 50 #include <netinet/in.h> 51 #include <netinet/ip6.h> 52 #include <netinet/icmp6.h> 53 54 #include <inet/common.h> 55 #include <inet/mi.h> 56 #include <inet/mib2.h> 57 #include <inet/nd.h> 58 #include <inet/ip.h> 59 #include <inet/ip_impl.h> 60 #include <inet/ipclassifier.h> 61 #include <inet/ip_if.h> 62 #include <inet/ip_ire.h> 63 #include <inet/ip_rts.h> 64 #include <inet/ip6.h> 65 #include <inet/ip_ndp.h> 66 #include <inet/ipsec_impl.h> 67 #include <inet/ipsec_info.h> 68 #include <inet/sctp_ip.h> 69 70 /* 71 * Function names with nce_ prefix are static while function 72 * names with ndp_ prefix are used by rest of the IP. 73 * 74 * Lock ordering: 75 * 76 * ndp_g_lock -> ill_lock -> nce_lock 77 * 78 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 79 * nce_next. Nce_lock protects the contents of the NCE (particularly 80 * nce_refcnt). 81 */ 82 83 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 84 uint32_t ll_addr_len); 85 static void nce_ire_delete(nce_t *nce); 86 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 87 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 88 static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *, nce_t *); 89 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 90 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 91 uchar_t *addr); 92 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 93 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 94 static void nce_report1(nce_t *nce, uchar_t *mp_arg); 95 static mblk_t *nce_udreq_alloc(ill_t *ill); 96 static void nce_update(nce_t *nce, uint16_t new_state, 97 uchar_t *new_ll_addr); 98 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 99 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 100 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 101 const in6_addr_t *target, int flag); 102 extern void th_trace_rrecord(th_trace_t *); 103 static int ndp_lookup_then_add_v6(ill_t *, uchar_t *, 104 const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, 105 uint32_t, uint16_t, uint16_t, nce_t **, mblk_t *, mblk_t *); 106 static int ndp_lookup_then_add_v4(ill_t *, uchar_t *, 107 const in_addr_t *, const in_addr_t *, const in_addr_t *, 108 uint32_t, uint16_t, uint16_t, nce_t **, mblk_t *, mblk_t *); 109 static int ndp_add_v6(ill_t *, uchar_t *, const in6_addr_t *, 110 const in6_addr_t *, const in6_addr_t *, uint32_t, uint16_t, uint16_t, 111 nce_t **); 112 static int ndp_add_v4(ill_t *, uchar_t *, const in_addr_t *, 113 const in_addr_t *, const in_addr_t *, uint32_t, uint16_t, uint16_t, 114 nce_t **, mblk_t *, mblk_t *); 115 116 117 #ifdef NCE_DEBUG 118 void nce_trace_inactive(nce_t *); 119 #endif 120 121 #define NCE_HASH_PTR_V4(ipst, addr) \ 122 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 123 124 #define NCE_HASH_PTR_V6(ipst, addr) \ 125 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ 126 NCE_TABLE_SIZE)])) 127 128 /* 129 * Compute default flags to use for an advertisement of this nce's address. 130 */ 131 static int 132 nce_advert_flags(const nce_t *nce) 133 { 134 int flag = 0; 135 136 if (nce->nce_flags & NCE_F_ISROUTER) 137 flag |= NDP_ISROUTER; 138 if (!(nce->nce_flags & NCE_F_PROXY)) 139 flag |= NDP_ORIDE; 140 return (flag); 141 } 142 143 int 144 ndp_add(ill_t *ill, uchar_t *hw_addr, const void *addr, 145 const void *mask, const void *extract_mask, 146 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 147 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 148 { 149 int status; 150 151 if (ill->ill_isv6) 152 status = ndp_add_v6(ill, hw_addr, (in6_addr_t *)addr, 153 (in6_addr_t *)mask, (in6_addr_t *)extract_mask, 154 hw_extract_start, flags, state, newnce); 155 else 156 status = ndp_add_v4(ill, hw_addr, (in_addr_t *)addr, 157 (in_addr_t *)mask, (in_addr_t *)extract_mask, 158 hw_extract_start, flags, state, newnce, fp_mp, res_mp); 159 return (status); 160 } 161 162 /* Non-tunable probe interval, based on link capabilities */ 163 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 164 165 /* 166 * NDP Cache Entry creation routine. 167 * Mapped entries will never do NUD . 168 * This routine must always be called with ndp6->ndp_g_lock held. 169 * Prior to return, nce_refcnt is incremented. 170 */ 171 static int 172 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 173 const in6_addr_t *mask, const in6_addr_t *extract_mask, 174 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 175 nce_t **newnce) 176 { 177 static nce_t nce_nil; 178 nce_t *nce; 179 mblk_t *mp; 180 mblk_t *template; 181 nce_t **ncep; 182 int err; 183 boolean_t dropped = B_FALSE; 184 ip_stack_t *ipst = ill->ill_ipst; 185 186 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 187 ASSERT(ill != NULL && ill->ill_isv6); 188 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 189 ip0dbg(("ndp_add: no addr\n")); 190 return (EINVAL); 191 } 192 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 193 ip0dbg(("ndp_add: flags = %x\n", (int)flags)); 194 return (EINVAL); 195 } 196 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 197 (flags & NCE_F_MAPPING)) { 198 ip0dbg(("ndp_add: extract mask zero for mapping")); 199 return (EINVAL); 200 } 201 /* 202 * Allocate the mblk to hold the nce. 203 * 204 * XXX This can come out of a separate cache - nce_cache. 205 * We don't need the mp anymore as there are no more 206 * "qwriter"s 207 */ 208 mp = allocb(sizeof (nce_t), BPRI_MED); 209 if (mp == NULL) 210 return (ENOMEM); 211 212 nce = (nce_t *)mp->b_rptr; 213 mp->b_wptr = (uchar_t *)&nce[1]; 214 *nce = nce_nil; 215 216 /* 217 * This one holds link layer address 218 */ 219 if (ill->ill_net_type == IRE_IF_RESOLVER) { 220 template = nce_udreq_alloc(ill); 221 } else { 222 if (ill->ill_resolver_mp == NULL) { 223 freeb(mp); 224 return (EINVAL); 225 } 226 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 227 template = copyb(ill->ill_resolver_mp); 228 } 229 if (template == NULL) { 230 freeb(mp); 231 return (ENOMEM); 232 } 233 nce->nce_ill = ill; 234 nce->nce_ipversion = IPV6_VERSION; 235 nce->nce_flags = flags; 236 nce->nce_state = state; 237 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 238 nce->nce_rcnt = ill->ill_xmit_count; 239 nce->nce_addr = *addr; 240 nce->nce_mask = *mask; 241 nce->nce_extract_mask = *extract_mask; 242 nce->nce_ll_extract_start = hw_extract_start; 243 nce->nce_fp_mp = NULL; 244 nce->nce_res_mp = template; 245 if (state == ND_REACHABLE) 246 nce->nce_last = TICK_TO_MSEC(lbolt64); 247 else 248 nce->nce_last = 0; 249 nce->nce_qd_mp = NULL; 250 nce->nce_mp = mp; 251 if (hw_addr != NULL) 252 nce_set_ll(nce, hw_addr); 253 /* This one is for nce getting created */ 254 nce->nce_refcnt = 1; 255 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 256 if (nce->nce_flags & NCE_F_MAPPING) { 257 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 258 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 259 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 260 ncep = &ipst->ips_ndp6->nce_mask_entries; 261 } else { 262 ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 263 } 264 265 #ifdef NCE_DEBUG 266 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 267 #endif 268 /* 269 * Atomically ensure that the ill is not CONDEMNED, before 270 * adding the NCE. 271 */ 272 mutex_enter(&ill->ill_lock); 273 if (ill->ill_state_flags & ILL_CONDEMNED) { 274 mutex_exit(&ill->ill_lock); 275 freeb(mp); 276 freeb(template); 277 return (EINVAL); 278 } 279 if ((nce->nce_next = *ncep) != NULL) 280 nce->nce_next->nce_ptpn = &nce->nce_next; 281 *ncep = nce; 282 nce->nce_ptpn = ncep; 283 *newnce = nce; 284 /* This one is for nce being used by an active thread */ 285 NCE_REFHOLD(*newnce); 286 287 /* Bump up the number of nce's referencing this ill */ 288 ill->ill_nce_cnt++; 289 mutex_exit(&ill->ill_lock); 290 291 err = 0; 292 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 293 mutex_enter(&nce->nce_lock); 294 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 295 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 296 mutex_exit(&nce->nce_lock); 297 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 298 &ipv6_all_zeros, addr, NDP_PROBE); 299 if (dropped) { 300 mutex_enter(&nce->nce_lock); 301 nce->nce_pcnt++; 302 mutex_exit(&nce->nce_lock); 303 } 304 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 305 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 306 err = EINPROGRESS; 307 } else if (flags & NCE_F_UNSOL_ADV) { 308 /* 309 * We account for the transmit below by assigning one 310 * less than the ndd variable. Subsequent decrements 311 * are done in ndp_timer. 312 */ 313 mutex_enter(&nce->nce_lock); 314 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 315 nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1; 316 mutex_exit(&nce->nce_lock); 317 dropped = nce_xmit(ill, 318 ND_NEIGHBOR_ADVERT, 319 ill, /* ill to be used for extracting ill_nd_lla */ 320 B_TRUE, /* use ill_nd_lla */ 321 addr, /* Source and target of the advertisement pkt */ 322 &ipv6_all_hosts_mcast, /* Destination of the packet */ 323 nce_advert_flags(nce)); 324 mutex_enter(&nce->nce_lock); 325 if (dropped) 326 nce->nce_unsolicit_count++; 327 if (nce->nce_unsolicit_count != 0) { 328 nce->nce_timeout_id = timeout(ndp_timer, nce, 329 MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval)); 330 } 331 mutex_exit(&nce->nce_lock); 332 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 333 } 334 /* 335 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 336 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 337 * We call nce_fastpath from nce_update if the link layer address of 338 * the peer changes from nce_update 339 */ 340 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 341 nce_fastpath(nce); 342 return (err); 343 } 344 345 int 346 ndp_lookup_then_add(ill_t *ill, uchar_t *hw_addr, const void *addr, 347 const void *mask, const void *extract_mask, 348 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 349 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 350 { 351 int status; 352 353 if (ill->ill_isv6) { 354 status = ndp_lookup_then_add_v6(ill, hw_addr, 355 (in6_addr_t *)addr, (in6_addr_t *)mask, 356 (in6_addr_t *)extract_mask, hw_extract_start, flags, 357 state, newnce, fp_mp, res_mp); 358 } else { 359 status = ndp_lookup_then_add_v4(ill, hw_addr, 360 (in_addr_t *)addr, (in_addr_t *)mask, 361 (in_addr_t *)extract_mask, hw_extract_start, flags, 362 state, newnce, fp_mp, res_mp); 363 } 364 365 return (status); 366 } 367 368 static int 369 ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 370 const in6_addr_t *mask, const in6_addr_t *extract_mask, 371 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 372 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 373 { 374 int err = 0; 375 nce_t *nce; 376 ip_stack_t *ipst = ill->ill_ipst; 377 378 ASSERT(ill != NULL && ill->ill_isv6); 379 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 380 381 /* Get head of v6 hash table */ 382 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 383 nce = nce_lookup_addr(ill, addr, nce); 384 if (nce == NULL) { 385 err = ndp_add(ill, 386 hw_addr, 387 addr, 388 mask, 389 extract_mask, 390 hw_extract_start, 391 flags, 392 state, 393 newnce, 394 fp_mp, 395 res_mp); 396 } else { 397 *newnce = nce; 398 err = EEXIST; 399 } 400 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 401 return (err); 402 } 403 404 /* 405 * Remove all the CONDEMNED nces from the appropriate hash table. 406 * We create a private list of NCEs, these may have ires pointing 407 * to them, so the list will be passed through to clean up dependent 408 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 409 */ 410 static void 411 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 412 { 413 nce_t *nce1; 414 nce_t **ptpn; 415 416 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 417 ASSERT(ndp->ndp_g_walker == 0); 418 for (; nce; nce = nce1) { 419 nce1 = nce->nce_next; 420 mutex_enter(&nce->nce_lock); 421 if (nce->nce_flags & NCE_F_CONDEMNED) { 422 ptpn = nce->nce_ptpn; 423 nce1 = nce->nce_next; 424 if (nce1 != NULL) 425 nce1->nce_ptpn = ptpn; 426 *ptpn = nce1; 427 nce->nce_ptpn = NULL; 428 nce->nce_next = NULL; 429 nce->nce_next = *free_nce_list; 430 *free_nce_list = nce; 431 } 432 mutex_exit(&nce->nce_lock); 433 } 434 } 435 436 /* 437 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 438 * will return this NCE. Also no new IREs will be created that 439 * point to this NCE (See ire_add_v6). Also no new timeouts will 440 * be started (See NDP_RESTART_TIMER). 441 * 2. Cancel any currently running timeouts. 442 * 3. If there is an ndp walker, return. The walker will do the cleanup. 443 * This ensures that walkers see a consistent list of NCEs while walking. 444 * 4. Otherwise remove the NCE from the list of NCEs 445 * 5. Delete all IREs pointing to this NCE. 446 */ 447 void 448 ndp_delete(nce_t *nce) 449 { 450 nce_t **ptpn; 451 nce_t *nce1; 452 int ipversion = nce->nce_ipversion; 453 ndp_g_t *ndp; 454 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 455 456 if (ipversion == IPV4_VERSION) 457 ndp = ipst->ips_ndp4; 458 else 459 ndp = ipst->ips_ndp6; 460 461 /* Serialize deletes */ 462 mutex_enter(&nce->nce_lock); 463 if (nce->nce_flags & NCE_F_CONDEMNED) { 464 /* Some other thread is doing the delete */ 465 mutex_exit(&nce->nce_lock); 466 return; 467 } 468 /* 469 * Caller has a refhold. Also 1 ref for being in the list. Thus 470 * refcnt has to be >= 2 471 */ 472 ASSERT(nce->nce_refcnt >= 2); 473 nce->nce_flags |= NCE_F_CONDEMNED; 474 mutex_exit(&nce->nce_lock); 475 476 nce_fastpath_list_delete(nce); 477 478 /* 479 * Cancel any running timer. Timeout can't be restarted 480 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 481 * Passing invalid timeout id is fine. 482 */ 483 if (nce->nce_timeout_id != 0) { 484 (void) untimeout(nce->nce_timeout_id); 485 nce->nce_timeout_id = 0; 486 } 487 488 mutex_enter(&ndp->ndp_g_lock); 489 if (nce->nce_ptpn == NULL) { 490 /* 491 * The last ndp walker has already removed this nce from 492 * the list after we marked the nce CONDEMNED and before 493 * we grabbed the global lock. 494 */ 495 mutex_exit(&ndp->ndp_g_lock); 496 return; 497 } 498 if (ndp->ndp_g_walker > 0) { 499 /* 500 * Can't unlink. The walker will clean up 501 */ 502 ndp->ndp_g_walker_cleanup = B_TRUE; 503 mutex_exit(&ndp->ndp_g_lock); 504 return; 505 } 506 507 /* 508 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 509 * the timer since it is marked CONDEMNED. 510 */ 511 ptpn = nce->nce_ptpn; 512 nce1 = nce->nce_next; 513 if (nce1 != NULL) 514 nce1->nce_ptpn = ptpn; 515 *ptpn = nce1; 516 nce->nce_ptpn = NULL; 517 nce->nce_next = NULL; 518 mutex_exit(&ndp->ndp_g_lock); 519 520 nce_ire_delete(nce); 521 } 522 523 void 524 ndp_inactive(nce_t *nce) 525 { 526 mblk_t **mpp; 527 ill_t *ill; 528 529 ASSERT(nce->nce_refcnt == 0); 530 ASSERT(MUTEX_HELD(&nce->nce_lock)); 531 ASSERT(nce->nce_fastpath == NULL); 532 533 /* Free all nce allocated messages */ 534 mpp = &nce->nce_first_mp_to_free; 535 do { 536 while (*mpp != NULL) { 537 mblk_t *mp; 538 539 mp = *mpp; 540 *mpp = mp->b_next; 541 542 inet_freemsg(mp); 543 } 544 } while (mpp++ != &nce->nce_last_mp_to_free); 545 546 #ifdef NCE_DEBUG 547 nce_trace_inactive(nce); 548 #endif 549 550 ill = nce->nce_ill; 551 mutex_enter(&ill->ill_lock); 552 ill->ill_nce_cnt--; 553 /* 554 * If the number of nce's associated with this ill have dropped 555 * to zero, check whether we need to restart any operation that 556 * is waiting for this to happen. 557 */ 558 if (ill->ill_nce_cnt == 0) { 559 /* ipif_ill_refrele_tail drops the ill_lock */ 560 ipif_ill_refrele_tail(ill); 561 } else { 562 mutex_exit(&ill->ill_lock); 563 } 564 mutex_destroy(&nce->nce_lock); 565 if (nce->nce_mp != NULL) 566 inet_freemsg(nce->nce_mp); 567 } 568 569 /* 570 * ndp_walk routine. Delete the nce if it is associated with the ill 571 * that is going away. Always called as a writer. 572 */ 573 void 574 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 575 { 576 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 577 ndp_delete(nce); 578 } 579 } 580 581 /* 582 * Walk a list of to be inactive NCEs and blow away all the ires. 583 */ 584 static void 585 nce_ire_delete_list(nce_t *nce) 586 { 587 nce_t *nce_next; 588 589 ASSERT(nce != NULL); 590 while (nce != NULL) { 591 nce_next = nce->nce_next; 592 nce->nce_next = NULL; 593 594 /* 595 * It is possible for the last ndp walker (this thread) 596 * to come here after ndp_delete has marked the nce CONDEMNED 597 * and before it has removed the nce from the fastpath list 598 * or called untimeout. So we need to do it here. It is safe 599 * for both ndp_delete and this thread to do it twice or 600 * even simultaneously since each of the threads has a 601 * reference on the nce. 602 */ 603 nce_fastpath_list_delete(nce); 604 /* 605 * Cancel any running timer. Timeout can't be restarted 606 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 607 * Passing invalid timeout id is fine. 608 */ 609 if (nce->nce_timeout_id != 0) { 610 (void) untimeout(nce->nce_timeout_id); 611 nce->nce_timeout_id = 0; 612 } 613 /* 614 * We might hit this func thus in the v4 case: 615 * ipif_down->ipif_ndp_down->ndp_walk 616 */ 617 618 if (nce->nce_ipversion == IPV4_VERSION) { 619 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 620 IRE_CACHE, nce_ire_delete1, 621 (char *)nce, nce->nce_ill); 622 } else { 623 ASSERT(nce->nce_ipversion == IPV6_VERSION); 624 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 625 IRE_CACHE, nce_ire_delete1, 626 (char *)nce, nce->nce_ill); 627 } 628 NCE_REFRELE_NOTR(nce); 629 nce = nce_next; 630 } 631 } 632 633 /* 634 * Delete an ire when the nce goes away. 635 */ 636 /* ARGSUSED */ 637 static void 638 nce_ire_delete(nce_t *nce) 639 { 640 if (nce->nce_ipversion == IPV6_VERSION) { 641 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 642 nce_ire_delete1, (char *)nce, nce->nce_ill); 643 NCE_REFRELE_NOTR(nce); 644 } else { 645 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 646 nce_ire_delete1, (char *)nce, nce->nce_ill); 647 NCE_REFRELE_NOTR(nce); 648 } 649 } 650 651 /* 652 * ire_walk routine used to delete every IRE that shares this nce 653 */ 654 static void 655 nce_ire_delete1(ire_t *ire, char *nce_arg) 656 { 657 nce_t *nce = (nce_t *)nce_arg; 658 659 ASSERT(ire->ire_type == IRE_CACHE); 660 661 if (ire->ire_nce == nce) { 662 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 663 ire_delete(ire); 664 } 665 } 666 667 /* 668 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 669 */ 670 boolean_t 671 ndp_restart_dad(nce_t *nce) 672 { 673 boolean_t started; 674 boolean_t dropped; 675 676 if (nce == NULL) 677 return (B_FALSE); 678 mutex_enter(&nce->nce_lock); 679 if (nce->nce_state == ND_PROBE) { 680 mutex_exit(&nce->nce_lock); 681 started = B_TRUE; 682 } else if (nce->nce_state == ND_REACHABLE) { 683 nce->nce_state = ND_PROBE; 684 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 685 mutex_exit(&nce->nce_lock); 686 dropped = nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, NULL, 687 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, NDP_PROBE); 688 if (dropped) { 689 mutex_enter(&nce->nce_lock); 690 nce->nce_pcnt++; 691 mutex_exit(&nce->nce_lock); 692 } 693 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 694 started = B_TRUE; 695 } else { 696 mutex_exit(&nce->nce_lock); 697 started = B_FALSE; 698 } 699 return (started); 700 } 701 702 /* 703 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 704 * If one is found, the refcnt on the nce will be incremented. 705 */ 706 nce_t * 707 ndp_lookup_v6(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 708 { 709 nce_t *nce; 710 ip_stack_t *ipst; 711 712 ASSERT(ill != NULL); 713 ipst = ill->ill_ipst; 714 715 ASSERT(ill != NULL && ill->ill_isv6); 716 if (!caller_holds_lock) { 717 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 718 } 719 720 /* Get head of v6 hash table */ 721 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 722 nce = nce_lookup_addr(ill, addr, nce); 723 if (nce == NULL) 724 nce = nce_lookup_mapping(ill, addr); 725 if (!caller_holds_lock) 726 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 727 return (nce); 728 } 729 /* 730 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 731 * If one is found, the refcnt on the nce will be incremented. 732 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 733 * so we skip the nce_lookup_mapping call. 734 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 735 */ 736 nce_t * 737 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 738 { 739 nce_t *nce; 740 in6_addr_t addr6; 741 ip_stack_t *ipst = ill->ill_ipst; 742 743 if (!caller_holds_lock) { 744 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 745 } 746 747 /* Get head of v4 hash table */ 748 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 749 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 750 nce = nce_lookup_addr(ill, &addr6, nce); 751 if (!caller_holds_lock) 752 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 753 return (nce); 754 } 755 756 /* 757 * Cache entry lookup. Try to find an nce matching the parameters passed. 758 * Look only for exact entries (no mappings). If an nce is found, increment 759 * the hold count on that nce. The caller passes in the start of the 760 * appropriate hash table, and must be holding the appropriate global 761 * lock (ndp_g_lock). 762 */ 763 static nce_t * 764 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr, nce_t *nce) 765 { 766 ndp_g_t *ndp; 767 ip_stack_t *ipst = ill->ill_ipst; 768 769 if (ill->ill_isv6) 770 ndp = ipst->ips_ndp6; 771 else 772 ndp = ipst->ips_ndp4; 773 774 ASSERT(ill != NULL); 775 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 776 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 777 return (NULL); 778 for (; nce != NULL; nce = nce->nce_next) { 779 if (nce->nce_ill == ill) { 780 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 781 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 782 &ipv6_all_ones)) { 783 mutex_enter(&nce->nce_lock); 784 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 785 NCE_REFHOLD_LOCKED(nce); 786 mutex_exit(&nce->nce_lock); 787 break; 788 } 789 mutex_exit(&nce->nce_lock); 790 } 791 } 792 } 793 return (nce); 794 } 795 796 /* 797 * Cache entry lookup. Try to find an nce matching the parameters passed. 798 * Look only for mappings. 799 */ 800 static nce_t * 801 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 802 { 803 nce_t *nce; 804 ip_stack_t *ipst = ill->ill_ipst; 805 806 ASSERT(ill != NULL && ill->ill_isv6); 807 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 808 if (!IN6_IS_ADDR_MULTICAST(addr)) 809 return (NULL); 810 nce = ipst->ips_ndp6->nce_mask_entries; 811 for (; nce != NULL; nce = nce->nce_next) 812 if (nce->nce_ill == ill && 813 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 814 mutex_enter(&nce->nce_lock); 815 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 816 NCE_REFHOLD_LOCKED(nce); 817 mutex_exit(&nce->nce_lock); 818 break; 819 } 820 mutex_exit(&nce->nce_lock); 821 } 822 return (nce); 823 } 824 825 /* 826 * Process passed in parameters either from an incoming packet or via 827 * user ioctl. 828 */ 829 void 830 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 831 { 832 ill_t *ill = nce->nce_ill; 833 uint32_t hw_addr_len = ill->ill_nd_lla_len; 834 mblk_t *mp; 835 boolean_t ll_updated = B_FALSE; 836 boolean_t ll_changed; 837 ip_stack_t *ipst = ill->ill_ipst; 838 839 ASSERT(nce->nce_ipversion == IPV6_VERSION); 840 /* 841 * No updates of link layer address or the neighbor state is 842 * allowed, when the cache is in NONUD state. This still 843 * allows for responding to reachability solicitation. 844 */ 845 mutex_enter(&nce->nce_lock); 846 if (nce->nce_state == ND_INCOMPLETE) { 847 if (hw_addr == NULL) { 848 mutex_exit(&nce->nce_lock); 849 return; 850 } 851 nce_set_ll(nce, hw_addr); 852 /* 853 * Update nce state and send the queued packets 854 * back to ip this time ire will be added. 855 */ 856 if (flag & ND_NA_FLAG_SOLICITED) { 857 nce_update(nce, ND_REACHABLE, NULL); 858 } else { 859 nce_update(nce, ND_STALE, NULL); 860 } 861 mutex_exit(&nce->nce_lock); 862 nce_fastpath(nce); 863 mutex_enter(&nce->nce_lock); 864 mp = nce->nce_qd_mp; 865 nce->nce_qd_mp = NULL; 866 mutex_exit(&nce->nce_lock); 867 while (mp != NULL) { 868 mblk_t *nxt_mp, *data_mp; 869 870 nxt_mp = mp->b_next; 871 mp->b_next = NULL; 872 873 if (mp->b_datap->db_type == M_CTL) 874 data_mp = mp->b_cont; 875 else 876 data_mp = mp; 877 if (data_mp->b_prev != NULL) { 878 ill_t *inbound_ill; 879 queue_t *fwdq = NULL; 880 uint_t ifindex; 881 882 ifindex = (uint_t)(uintptr_t)data_mp->b_prev; 883 inbound_ill = ill_lookup_on_ifindex(ifindex, 884 B_TRUE, NULL, NULL, NULL, NULL, ipst); 885 if (inbound_ill == NULL) { 886 data_mp->b_prev = NULL; 887 freemsg(mp); 888 return; 889 } else { 890 fwdq = inbound_ill->ill_rq; 891 } 892 data_mp->b_prev = NULL; 893 /* 894 * Send a forwarded packet back into ip_rput_v6 895 * just as in ire_send_v6(). 896 * Extract the queue from b_prev (set in 897 * ip_rput_data_v6). 898 */ 899 if (fwdq != NULL) { 900 /* 901 * Forwarded packets hop count will 902 * get decremented in ip_rput_data_v6 903 */ 904 if (data_mp != mp) 905 freeb(mp); 906 put(fwdq, data_mp); 907 } else { 908 /* 909 * Send locally originated packets back 910 * into * ip_wput_v6. 911 */ 912 put(ill->ill_wq, mp); 913 } 914 ill_refrele(inbound_ill); 915 } else { 916 put(ill->ill_wq, mp); 917 } 918 mp = nxt_mp; 919 } 920 return; 921 } 922 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 923 if (!is_adv) { 924 /* If this is a SOLICITATION request only */ 925 if (ll_changed) 926 nce_update(nce, ND_STALE, hw_addr); 927 mutex_exit(&nce->nce_lock); 928 return; 929 } 930 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 931 /* If in any other state than REACHABLE, ignore */ 932 if (nce->nce_state == ND_REACHABLE) { 933 nce_update(nce, ND_STALE, NULL); 934 } 935 mutex_exit(&nce->nce_lock); 936 return; 937 } else { 938 if (ll_changed) { 939 nce_update(nce, ND_UNCHANGED, hw_addr); 940 ll_updated = B_TRUE; 941 } 942 if (flag & ND_NA_FLAG_SOLICITED) { 943 nce_update(nce, ND_REACHABLE, NULL); 944 } else { 945 if (ll_updated) { 946 nce_update(nce, ND_STALE, NULL); 947 } 948 } 949 mutex_exit(&nce->nce_lock); 950 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 951 NCE_F_ISROUTER)) { 952 ire_t *ire; 953 954 /* 955 * Router turned to host. We need to remove the 956 * entry as well as any default route that may be 957 * using this as a next hop. This is required by 958 * section 7.2.5 of RFC 2461. 959 */ 960 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 961 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 962 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 963 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 964 MATCH_IRE_DEFAULT, ipst); 965 if (ire != NULL) { 966 ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); 967 ire_delete(ire); 968 ire_refrele(ire); 969 } 970 ndp_delete(nce); 971 } 972 } 973 } 974 975 /* 976 * Pass arg1 to the pfi supplied, along with each nce in existence. 977 * ndp_walk() places a REFHOLD on the nce and drops the lock when 978 * walking the hash list. 979 */ 980 void 981 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 982 boolean_t trace) 983 { 984 985 nce_t *nce; 986 nce_t *nce1; 987 nce_t **ncep; 988 nce_t *free_nce_list = NULL; 989 990 mutex_enter(&ndp->ndp_g_lock); 991 /* Prevent ndp_delete from unlink and free of NCE */ 992 ndp->ndp_g_walker++; 993 mutex_exit(&ndp->ndp_g_lock); 994 for (ncep = ndp->nce_hash_tbl; 995 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 996 for (nce = *ncep; nce != NULL; nce = nce1) { 997 nce1 = nce->nce_next; 998 if (ill == NULL || nce->nce_ill == ill) { 999 if (trace) { 1000 NCE_REFHOLD(nce); 1001 (*pfi)(nce, arg1); 1002 NCE_REFRELE(nce); 1003 } else { 1004 NCE_REFHOLD_NOTR(nce); 1005 (*pfi)(nce, arg1); 1006 NCE_REFRELE_NOTR(nce); 1007 } 1008 } 1009 } 1010 } 1011 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 1012 nce1 = nce->nce_next; 1013 if (ill == NULL || nce->nce_ill == ill) { 1014 if (trace) { 1015 NCE_REFHOLD(nce); 1016 (*pfi)(nce, arg1); 1017 NCE_REFRELE(nce); 1018 } else { 1019 NCE_REFHOLD_NOTR(nce); 1020 (*pfi)(nce, arg1); 1021 NCE_REFRELE_NOTR(nce); 1022 } 1023 } 1024 } 1025 mutex_enter(&ndp->ndp_g_lock); 1026 ndp->ndp_g_walker--; 1027 /* 1028 * While NCE's are removed from global list they are placed 1029 * in a private list, to be passed to nce_ire_delete_list(). 1030 * The reason is, there may be ires pointing to this nce 1031 * which needs to cleaned up. 1032 */ 1033 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 1034 /* Time to delete condemned entries */ 1035 for (ncep = ndp->nce_hash_tbl; 1036 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 1037 nce = *ncep; 1038 if (nce != NULL) { 1039 nce_remove(ndp, nce, &free_nce_list); 1040 } 1041 } 1042 nce = ndp->nce_mask_entries; 1043 if (nce != NULL) { 1044 nce_remove(ndp, nce, &free_nce_list); 1045 } 1046 ndp->ndp_g_walker_cleanup = B_FALSE; 1047 } 1048 mutex_exit(&ndp->ndp_g_lock); 1049 1050 if (free_nce_list != NULL) { 1051 nce_ire_delete_list(free_nce_list); 1052 } 1053 } 1054 1055 /* 1056 * Walk everything. 1057 * Note that ill can be NULL hence can't derive the ipst from it. 1058 */ 1059 void 1060 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) 1061 { 1062 ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); 1063 ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); 1064 } 1065 1066 /* 1067 * Process resolve requests. Handles both mapped entries 1068 * as well as cases that needs to be send out on the wire. 1069 * Lookup a NCE for a given IRE. Regardless of whether one exists 1070 * or one is created, we defer making ire point to nce until the 1071 * ire is actually added at which point the nce_refcnt on the nce is 1072 * incremented. This is done primarily to have symmetry between ire_add() 1073 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1074 */ 1075 int 1076 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1077 { 1078 nce_t *nce; 1079 int err = 0; 1080 uint32_t ms; 1081 mblk_t *mp_nce = NULL; 1082 ip_stack_t *ipst = ill->ill_ipst; 1083 1084 ASSERT(ill != NULL); 1085 ASSERT(ill->ill_isv6); 1086 if (IN6_IS_ADDR_MULTICAST(dst)) { 1087 err = nce_set_multicast(ill, dst); 1088 return (err); 1089 } 1090 err = ndp_lookup_then_add(ill, 1091 NULL, /* No hardware address */ 1092 dst, 1093 &ipv6_all_ones, 1094 &ipv6_all_zeros, 1095 0, 1096 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1097 ND_INCOMPLETE, 1098 &nce, 1099 NULL, /* let ndp_add figure out fastpath mp and dlureq_mp for v6 */ 1100 NULL); 1101 1102 switch (err) { 1103 case 0: 1104 /* 1105 * New cache entry was created. Make sure that the state 1106 * is not ND_INCOMPLETE. It can be in some other state 1107 * even before we send out the solicitation as we could 1108 * get un-solicited advertisements. 1109 * 1110 * If this is an XRESOLV interface, simply return 0, 1111 * since we don't want to solicit just yet. 1112 */ 1113 if (ill->ill_flags & ILLF_XRESOLV) { 1114 NCE_REFRELE(nce); 1115 return (0); 1116 } 1117 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1118 mutex_enter(&nce->nce_lock); 1119 if (nce->nce_state != ND_INCOMPLETE) { 1120 mutex_exit(&nce->nce_lock); 1121 rw_exit(&ipst->ips_ill_g_lock); 1122 NCE_REFRELE(nce); 1123 return (0); 1124 } 1125 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1126 if (mp_nce == NULL) { 1127 /* The caller will free mp */ 1128 mutex_exit(&nce->nce_lock); 1129 rw_exit(&ipst->ips_ill_g_lock); 1130 ndp_delete(nce); 1131 NCE_REFRELE(nce); 1132 return (ENOMEM); 1133 } 1134 ms = nce_solicit(nce, mp_nce); 1135 rw_exit(&ipst->ips_ill_g_lock); 1136 if (ms == 0) { 1137 /* The caller will free mp */ 1138 if (mp_nce != mp) 1139 freeb(mp_nce); 1140 mutex_exit(&nce->nce_lock); 1141 ndp_delete(nce); 1142 NCE_REFRELE(nce); 1143 return (EBUSY); 1144 } 1145 mutex_exit(&nce->nce_lock); 1146 NDP_RESTART_TIMER(nce, (clock_t)ms); 1147 NCE_REFRELE(nce); 1148 return (EINPROGRESS); 1149 case EEXIST: 1150 /* Resolution in progress just queue the packet */ 1151 mutex_enter(&nce->nce_lock); 1152 if (nce->nce_state == ND_INCOMPLETE) { 1153 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 1154 if (mp_nce == NULL) { 1155 err = ENOMEM; 1156 } else { 1157 nce_queue_mp(nce, mp_nce); 1158 err = EINPROGRESS; 1159 } 1160 } else { 1161 /* 1162 * Any other state implies we have 1163 * a nce but IRE needs to be added ... 1164 * ire_add_v6() will take care of the 1165 * the case when the nce becomes CONDEMNED 1166 * before the ire is added to the table. 1167 */ 1168 err = 0; 1169 } 1170 mutex_exit(&nce->nce_lock); 1171 NCE_REFRELE(nce); 1172 break; 1173 default: 1174 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1175 break; 1176 } 1177 return (err); 1178 } 1179 1180 /* 1181 * When there is no resolver, the link layer template is passed in 1182 * the IRE. 1183 * Lookup a NCE for a given IRE. Regardless of whether one exists 1184 * or one is created, we defer making ire point to nce until the 1185 * ire is actually added at which point the nce_refcnt on the nce is 1186 * incremented. This is done primarily to have symmetry between ire_add() 1187 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1188 */ 1189 int 1190 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1191 { 1192 nce_t *nce; 1193 int err = 0; 1194 1195 ASSERT(ill != NULL); 1196 ASSERT(ill->ill_isv6); 1197 if (IN6_IS_ADDR_MULTICAST(dst)) { 1198 err = nce_set_multicast(ill, dst); 1199 return (err); 1200 } 1201 1202 err = ndp_lookup_then_add(ill, 1203 NULL, /* hardware address */ 1204 dst, 1205 &ipv6_all_ones, 1206 &ipv6_all_zeros, 1207 0, 1208 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1209 ND_REACHABLE, 1210 &nce, 1211 NULL, /* let ndp_add figure out fp_mp/dlureq_mp for v6 */ 1212 NULL); 1213 1214 switch (err) { 1215 case 0: 1216 /* 1217 * Cache entry with a proper resolver cookie was 1218 * created. 1219 */ 1220 NCE_REFRELE(nce); 1221 break; 1222 case EEXIST: 1223 err = 0; 1224 NCE_REFRELE(nce); 1225 break; 1226 default: 1227 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1228 break; 1229 } 1230 return (err); 1231 } 1232 1233 /* 1234 * For each interface an entry is added for the unspecified multicast group. 1235 * Here that mapping is used to form the multicast cache entry for a particular 1236 * multicast destination. 1237 */ 1238 static int 1239 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1240 { 1241 nce_t *mnce; /* Multicast mapping entry */ 1242 nce_t *nce; 1243 uchar_t *hw_addr = NULL; 1244 int err = 0; 1245 ip_stack_t *ipst = ill->ill_ipst; 1246 1247 ASSERT(ill != NULL); 1248 ASSERT(ill->ill_isv6); 1249 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1250 1251 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1252 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst)); 1253 nce = nce_lookup_addr(ill, dst, nce); 1254 if (nce != NULL) { 1255 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1256 NCE_REFRELE(nce); 1257 return (0); 1258 } 1259 /* No entry, now lookup for a mapping this should never fail */ 1260 mnce = nce_lookup_mapping(ill, dst); 1261 if (mnce == NULL) { 1262 /* Something broken for the interface. */ 1263 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1264 return (ESRCH); 1265 } 1266 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1267 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1268 /* 1269 * For IRE_IF_RESOLVER a hardware mapping can be 1270 * generated, for IRE_IF_NORESOLVER, resolution cookie 1271 * in the ill is copied in ndp_add(). 1272 */ 1273 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1274 if (hw_addr == NULL) { 1275 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1276 NCE_REFRELE(mnce); 1277 return (ENOMEM); 1278 } 1279 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1280 } 1281 NCE_REFRELE(mnce); 1282 /* 1283 * IRE_IF_NORESOLVER type simply copies the resolution 1284 * cookie passed in. So no hw_addr is needed. 1285 */ 1286 err = ndp_add(ill, 1287 hw_addr, 1288 dst, 1289 &ipv6_all_ones, 1290 &ipv6_all_zeros, 1291 0, 1292 NCE_F_NONUD, 1293 ND_REACHABLE, 1294 &nce, 1295 NULL, 1296 NULL); 1297 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1298 if (hw_addr != NULL) 1299 kmem_free(hw_addr, ill->ill_nd_lla_len); 1300 if (err != 0) { 1301 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1302 return (err); 1303 } 1304 NCE_REFRELE(nce); 1305 return (0); 1306 } 1307 1308 /* 1309 * Return the link layer address, and any flags of a nce. 1310 */ 1311 int 1312 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1313 { 1314 nce_t *nce; 1315 in6_addr_t *addr; 1316 sin6_t *sin6; 1317 dl_unitdata_req_t *dl; 1318 1319 ASSERT(ill != NULL && ill->ill_isv6); 1320 sin6 = (sin6_t *)&lnr->lnr_addr; 1321 addr = &sin6->sin6_addr; 1322 1323 nce = ndp_lookup_v6(ill, addr, B_FALSE); 1324 if (nce == NULL) 1325 return (ESRCH); 1326 /* If in INCOMPLETE state, no link layer address is available yet */ 1327 if (nce->nce_state == ND_INCOMPLETE) 1328 goto done; 1329 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1330 if (ill->ill_flags & ILLF_XRESOLV) 1331 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1332 else 1333 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1334 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1335 sizeof (lnr->lnr_hdw_addr)); 1336 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1337 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1338 if (nce->nce_flags & NCE_F_ISROUTER) 1339 lnr->lnr_flags = NDF_ISROUTER_ON; 1340 if (nce->nce_flags & NCE_F_PROXY) 1341 lnr->lnr_flags |= NDF_PROXY_ON; 1342 if (nce->nce_flags & NCE_F_ANYCAST) 1343 lnr->lnr_flags |= NDF_ANYCAST_ON; 1344 done: 1345 NCE_REFRELE(nce); 1346 return (0); 1347 } 1348 1349 /* 1350 * Send Enable/Disable multicast reqs to driver. 1351 */ 1352 int 1353 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1354 uint32_t hw_addr_offset, mblk_t *mp) 1355 { 1356 nce_t *nce; 1357 uchar_t *hw_addr; 1358 ip_stack_t *ipst = ill->ill_ipst; 1359 1360 ASSERT(ill != NULL && ill->ill_isv6); 1361 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1362 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1363 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1364 freemsg(mp); 1365 return (EINVAL); 1366 } 1367 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1368 nce = nce_lookup_mapping(ill, addr); 1369 if (nce == NULL) { 1370 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1371 freemsg(mp); 1372 return (ESRCH); 1373 } 1374 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1375 /* 1376 * Update dl_addr_length and dl_addr_offset for primitives that 1377 * have physical addresses as opposed to full saps 1378 */ 1379 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1380 case DL_ENABMULTI_REQ: 1381 /* Track the state if this is the first enabmulti */ 1382 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1383 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1384 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1385 break; 1386 case DL_DISABMULTI_REQ: 1387 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1388 break; 1389 default: 1390 NCE_REFRELE(nce); 1391 ip1dbg(("ndp_mcastreq: default\n")); 1392 return (EINVAL); 1393 } 1394 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1395 NCE_REFRELE(nce); 1396 putnext(ill->ill_wq, mp); 1397 return (0); 1398 } 1399 1400 /* 1401 * Send a neighbor solicitation. 1402 * Returns number of milliseconds after which we should either rexmit or abort. 1403 * Return of zero means we should abort. 1404 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1405 * 1406 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1407 * the packet. 1408 * NOTE: This routine does not consume mp. 1409 */ 1410 uint32_t 1411 nce_solicit(nce_t *nce, mblk_t *mp) 1412 { 1413 ill_t *ill; 1414 ill_t *src_ill; 1415 ip6_t *ip6h; 1416 in6_addr_t src; 1417 in6_addr_t dst; 1418 ipif_t *ipif; 1419 ip6i_t *ip6i; 1420 boolean_t dropped = B_FALSE; 1421 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 1422 1423 ASSERT(RW_READ_HELD(&ipst->ips_ill_g_lock)); 1424 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1425 ill = nce->nce_ill; 1426 ASSERT(ill != NULL); 1427 1428 if (nce->nce_rcnt == 0) { 1429 return (0); 1430 } 1431 1432 if (mp == NULL) { 1433 ASSERT(nce->nce_qd_mp != NULL); 1434 mp = nce->nce_qd_mp; 1435 } else { 1436 nce_queue_mp(nce, mp); 1437 } 1438 1439 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1440 if (mp->b_datap->db_type == M_CTL) 1441 mp = mp->b_cont; 1442 1443 ip6h = (ip6_t *)mp->b_rptr; 1444 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1445 /* 1446 * This message should have been pulled up already in 1447 * ip_wput_v6. We can't do pullups here because the message 1448 * could be from the nce_qd_mp which could have b_next/b_prev 1449 * non-NULL. 1450 */ 1451 ip6i = (ip6i_t *)ip6h; 1452 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1453 sizeof (ip6i_t) + IPV6_HDR_LEN); 1454 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1455 } 1456 src = ip6h->ip6_src; 1457 /* 1458 * If the src of outgoing packet is one of the assigned interface 1459 * addresses use it, otherwise we will pick the source address below. 1460 */ 1461 src_ill = ill; 1462 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1463 if (ill->ill_group != NULL) 1464 src_ill = ill->ill_group->illgrp_ill; 1465 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1466 for (ipif = src_ill->ill_ipif; ipif != NULL; 1467 ipif = ipif->ipif_next) { 1468 if (IN6_ARE_ADDR_EQUAL(&src, 1469 &ipif->ipif_v6lcl_addr)) { 1470 break; 1471 } 1472 } 1473 if (ipif != NULL) 1474 break; 1475 } 1476 /* 1477 * If no relevant ipif can be found, then it's not one of our 1478 * addresses. Reset to :: and let nce_xmit. If an ipif can be 1479 * found, but it's not yet done with DAD verification, then 1480 * just postpone this transmission until later. 1481 */ 1482 if (src_ill == NULL) 1483 src = ipv6_all_zeros; 1484 else if (!ipif->ipif_addr_ready) 1485 return (ill->ill_reachable_retrans_time); 1486 } 1487 dst = nce->nce_addr; 1488 /* 1489 * If source address is unspecified, nce_xmit will choose 1490 * one for us and initialize the hardware address also 1491 * appropriately. 1492 */ 1493 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1494 src_ill = NULL; 1495 nce->nce_rcnt--; 1496 mutex_exit(&nce->nce_lock); 1497 rw_exit(&ipst->ips_ill_g_lock); 1498 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1499 &dst, 0); 1500 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1501 mutex_enter(&nce->nce_lock); 1502 if (dropped) 1503 nce->nce_rcnt++; 1504 return (ill->ill_reachable_retrans_time); 1505 } 1506 1507 /* 1508 * Attempt to recover an address on an interface that's been marked as a 1509 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1510 * no easy way to just probe the address and have the right thing happen if 1511 * it's no longer in use. Instead, we just bring it up normally and allow the 1512 * regular interface start-up logic to probe for a remaining duplicate and take 1513 * us back down if necessary. 1514 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1515 * ip_ndp_excl. 1516 */ 1517 /* ARGSUSED */ 1518 static void 1519 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1520 { 1521 ill_t *ill = rq->q_ptr; 1522 ipif_t *ipif; 1523 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1524 1525 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1526 /* 1527 * We do not support recovery of proxy ARP'd interfaces, 1528 * because the system lacks a complete proxy ARP mechanism. 1529 */ 1530 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1531 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1532 continue; 1533 } 1534 1535 /* 1536 * If we have already recovered or if the interface is going 1537 * away, then ignore. 1538 */ 1539 mutex_enter(&ill->ill_lock); 1540 if (!(ipif->ipif_flags & IPIF_DUPLICATE) || 1541 (ipif->ipif_flags & (IPIF_MOVING | IPIF_CONDEMNED))) { 1542 mutex_exit(&ill->ill_lock); 1543 continue; 1544 } 1545 1546 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1547 ill->ill_ipif_dup_count--; 1548 mutex_exit(&ill->ill_lock); 1549 ipif->ipif_was_dup = B_TRUE; 1550 1551 if (ipif_ndp_up(ipif, addr) != EINPROGRESS) 1552 (void) ipif_up_done_v6(ipif); 1553 } 1554 freeb(mp); 1555 } 1556 1557 /* 1558 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1559 * As long as someone else holds the address, the interface will stay down. 1560 * When that conflict goes away, the interface is brought back up. This is 1561 * done so that accidental shutdowns of addresses aren't made permanent. Your 1562 * server will recover from a failure. 1563 * 1564 * For DHCP and temporary addresses, recovery is not done in the kernel. 1565 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1566 * 1567 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1568 */ 1569 static void 1570 ipif6_dup_recovery(void *arg) 1571 { 1572 ipif_t *ipif = arg; 1573 1574 ipif->ipif_recovery_id = 0; 1575 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1576 return; 1577 1578 /* 1579 * No lock, because this is just an optimization. 1580 */ 1581 if (ipif->ipif_state_flags & (IPIF_MOVING | IPIF_CONDEMNED)) 1582 return; 1583 1584 /* If the link is down, we'll retry this later */ 1585 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1586 return; 1587 1588 ndp_do_recovery(ipif); 1589 } 1590 1591 /* 1592 * Perform interface recovery by forcing the duplicate interfaces up and 1593 * allowing the system to determine which ones should stay up. 1594 * 1595 * Called both by recovery timer expiry and link-up notification. 1596 */ 1597 void 1598 ndp_do_recovery(ipif_t *ipif) 1599 { 1600 ill_t *ill = ipif->ipif_ill; 1601 mblk_t *mp; 1602 ip_stack_t *ipst = ill->ill_ipst; 1603 1604 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1605 if (mp == NULL) { 1606 mutex_enter(&ill->ill_lock); 1607 if (ipif->ipif_recovery_id == 0 && 1608 !(ipif->ipif_state_flags & (IPIF_MOVING | 1609 IPIF_CONDEMNED))) { 1610 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1611 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1612 } 1613 mutex_exit(&ill->ill_lock); 1614 } else { 1615 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1616 sizeof (ipif->ipif_v6lcl_addr)); 1617 ill_refhold(ill); 1618 (void) qwriter_ip(NULL, ill, ill->ill_rq, mp, ip_ndp_recover, 1619 CUR_OP, B_FALSE); 1620 } 1621 } 1622 1623 /* 1624 * Find the solicitation in the given message, and extract printable details 1625 * (MAC and IP addresses) from it. 1626 */ 1627 static nd_neighbor_solicit_t * 1628 ip_ndp_find_solicitation(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, char *hbuf, 1629 size_t hlen, char *sbuf, size_t slen, uchar_t **haddr) 1630 { 1631 nd_neighbor_solicit_t *ns; 1632 ip6_t *ip6h; 1633 uchar_t *addr; 1634 int alen; 1635 1636 alen = 0; 1637 ip6h = (ip6_t *)mp->b_rptr; 1638 if (dl_mp == NULL) { 1639 nd_opt_hdr_t *opt; 1640 int nslen; 1641 1642 /* 1643 * If it's from the fast-path, then it can't be a probe 1644 * message, and thus must include the source linkaddr option. 1645 * Extract that here. 1646 */ 1647 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1648 nslen = mp->b_wptr - (uchar_t *)ns; 1649 if ((nslen -= sizeof (*ns)) > 0) { 1650 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), nslen, 1651 ND_OPT_SOURCE_LINKADDR); 1652 if (opt != NULL && 1653 opt->nd_opt_len * 8 - sizeof (*opt) >= 1654 ill->ill_nd_lla_len) { 1655 addr = (uchar_t *)(opt + 1); 1656 alen = ill->ill_nd_lla_len; 1657 } 1658 } 1659 /* 1660 * We cheat a bit here for the sake of printing usable log 1661 * messages in the rare case where the reply we got was unicast 1662 * without a source linkaddr option, and the interface is in 1663 * fastpath mode. (Sigh.) 1664 */ 1665 if (alen == 0 && ill->ill_type == IFT_ETHER && 1666 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1667 struct ether_header *pether; 1668 1669 pether = (struct ether_header *)((char *)ip6h - 1670 sizeof (*pether)); 1671 addr = pether->ether_shost.ether_addr_octet; 1672 alen = ETHERADDRL; 1673 } 1674 } else { 1675 dl_unitdata_ind_t *dlu; 1676 1677 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1678 alen = dlu->dl_src_addr_length; 1679 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1680 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1681 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1682 if (ill->ill_sap_length < 0) { 1683 alen += ill->ill_sap_length; 1684 } else { 1685 addr += ill->ill_sap_length; 1686 alen -= ill->ill_sap_length; 1687 } 1688 } 1689 } 1690 if (alen > 0) { 1691 *haddr = addr; 1692 (void) mac_colon_addr(addr, alen, hbuf, hlen); 1693 } else { 1694 *haddr = NULL; 1695 (void) strcpy(hbuf, "?"); 1696 } 1697 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1698 (void) inet_ntop(AF_INET6, &ns->nd_ns_target, sbuf, slen); 1699 return (ns); 1700 } 1701 1702 /* 1703 * This is for exclusive changes due to NDP duplicate address detection 1704 * failure. 1705 */ 1706 /* ARGSUSED */ 1707 static void 1708 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1709 { 1710 ill_t *ill = rq->q_ptr; 1711 ipif_t *ipif; 1712 char ibuf[LIFNAMSIZ + 10]; /* 10 digits for logical i/f number */ 1713 char hbuf[MAC_STR_LEN]; 1714 char sbuf[INET6_ADDRSTRLEN]; 1715 nd_neighbor_solicit_t *ns; 1716 mblk_t *dl_mp = NULL; 1717 uchar_t *haddr; 1718 ip_stack_t *ipst = ill->ill_ipst; 1719 1720 if (DB_TYPE(mp) != M_DATA) { 1721 dl_mp = mp; 1722 mp = mp->b_cont; 1723 } 1724 ns = ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, sizeof (hbuf), sbuf, 1725 sizeof (sbuf), &haddr); 1726 if (haddr != NULL && 1727 bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) { 1728 /* 1729 * Ignore conflicts generated by misbehaving switches that just 1730 * reflect our own messages back to us. 1731 */ 1732 goto ignore_conflict; 1733 } 1734 (void) strlcpy(ibuf, ill->ill_name, sizeof (ibuf)); 1735 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1736 1737 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1738 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 1739 &ns->nd_ns_target)) { 1740 continue; 1741 } 1742 1743 /* If it's already marked, then don't do anything. */ 1744 if (ipif->ipif_flags & IPIF_DUPLICATE) 1745 continue; 1746 1747 /* 1748 * If this is a failure during duplicate recovery, then don't 1749 * complain. It may take a long time to recover. 1750 */ 1751 if (!ipif->ipif_was_dup) { 1752 if (ipif->ipif_id != 0) { 1753 (void) snprintf(ibuf + ill->ill_name_length - 1, 1754 sizeof (ibuf) - ill->ill_name_length + 1, 1755 ":%d", ipif->ipif_id); 1756 } 1757 cmn_err(CE_WARN, "%s has duplicate address %s (in " 1758 "use by %s); disabled", ibuf, sbuf, hbuf); 1759 } 1760 mutex_enter(&ill->ill_lock); 1761 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1762 ipif->ipif_flags |= IPIF_DUPLICATE; 1763 ill->ill_ipif_dup_count++; 1764 mutex_exit(&ill->ill_lock); 1765 (void) ipif_down(ipif, NULL, NULL); 1766 ipif_down_tail(ipif); 1767 mutex_enter(&ill->ill_lock); 1768 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1769 ill->ill_net_type == IRE_IF_RESOLVER && 1770 !(ipif->ipif_state_flags & (IPIF_MOVING | 1771 IPIF_CONDEMNED)) && 1772 ipst->ips_ip_dup_recovery > 0) { 1773 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1774 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1775 } 1776 mutex_exit(&ill->ill_lock); 1777 } 1778 ignore_conflict: 1779 if (dl_mp != NULL) 1780 freeb(dl_mp); 1781 freemsg(mp); 1782 } 1783 1784 /* 1785 * Handle failure by tearing down the ipifs with the specified address. Note 1786 * that tearing down the ipif also means deleting the nce through ipif_down, so 1787 * it's not possible to do recovery by just restarting the nce timer. Instead, 1788 * we start a timer on the ipif. 1789 */ 1790 static void 1791 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1792 { 1793 if ((mp = copymsg(mp)) != NULL) { 1794 if (dl_mp == NULL) 1795 dl_mp = mp; 1796 else if ((dl_mp = copyb(dl_mp)) != NULL) 1797 dl_mp->b_cont = mp; 1798 if (dl_mp == NULL) { 1799 freemsg(mp); 1800 } else { 1801 ill_refhold(ill); 1802 (void) qwriter_ip(NULL, ill, ill->ill_rq, dl_mp, 1803 ip_ndp_excl, CUR_OP, B_FALSE); 1804 } 1805 } 1806 ndp_delete(nce); 1807 } 1808 1809 /* 1810 * Handle a discovered conflict: some other system is advertising that it owns 1811 * one of our IP addresses. We need to defend ourselves, or just shut down the 1812 * interface. 1813 */ 1814 static void 1815 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1816 { 1817 ipif_t *ipif; 1818 uint32_t now; 1819 uint_t maxdefense; 1820 uint_t defs; 1821 ip_stack_t *ipst = ill->ill_ipst; 1822 1823 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1824 NULL, NULL, ipst); 1825 if (ipif == NULL) 1826 return; 1827 /* 1828 * First, figure out if this address is disposable. 1829 */ 1830 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1831 maxdefense = ipst->ips_ip_max_temp_defend; 1832 else 1833 maxdefense = ipst->ips_ip_max_defend; 1834 1835 /* 1836 * Now figure out how many times we've defended ourselves. Ignore 1837 * defenses that happened long in the past. 1838 */ 1839 now = gethrestime_sec(); 1840 mutex_enter(&nce->nce_lock); 1841 if ((defs = nce->nce_defense_count) > 0 && 1842 now - nce->nce_defense_time > ipst->ips_ip_defend_interval) { 1843 nce->nce_defense_count = defs = 0; 1844 } 1845 nce->nce_defense_count++; 1846 nce->nce_defense_time = now; 1847 mutex_exit(&nce->nce_lock); 1848 ipif_refrele(ipif); 1849 1850 /* 1851 * If we've defended ourselves too many times already, then give up and 1852 * tear down the interface(s) using this address. Otherwise, defend by 1853 * sending out an unsolicited Neighbor Advertisement. 1854 */ 1855 if (defs >= maxdefense) { 1856 ip_ndp_failure(ill, mp, dl_mp, nce); 1857 } else { 1858 char hbuf[MAC_STR_LEN]; 1859 char sbuf[INET6_ADDRSTRLEN]; 1860 uchar_t *haddr; 1861 1862 (void) ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, 1863 sizeof (hbuf), sbuf, sizeof (sbuf), &haddr); 1864 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1865 hbuf, sbuf, ill->ill_name); 1866 (void) nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, B_FALSE, 1867 &nce->nce_addr, &ipv6_all_hosts_mcast, 1868 nce_advert_flags(nce)); 1869 } 1870 } 1871 1872 static void 1873 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1874 { 1875 nd_neighbor_solicit_t *ns; 1876 uint32_t hlen = ill->ill_nd_lla_len; 1877 uchar_t *haddr = NULL; 1878 icmp6_t *icmp_nd; 1879 ip6_t *ip6h; 1880 nce_t *our_nce = NULL; 1881 in6_addr_t target; 1882 in6_addr_t src; 1883 int len; 1884 int flag = 0; 1885 nd_opt_hdr_t *opt = NULL; 1886 boolean_t bad_solicit = B_FALSE; 1887 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1888 1889 ip6h = (ip6_t *)mp->b_rptr; 1890 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1891 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1892 src = ip6h->ip6_src; 1893 ns = (nd_neighbor_solicit_t *)icmp_nd; 1894 target = ns->nd_ns_target; 1895 if (IN6_IS_ADDR_MULTICAST(&target)) { 1896 if (ip_debug > 2) { 1897 /* ip1dbg */ 1898 pr_addr_dbg("ndp_input_solicit: Target is" 1899 " multicast! %s\n", AF_INET6, &target); 1900 } 1901 bad_solicit = B_TRUE; 1902 goto done; 1903 } 1904 if (len > sizeof (nd_neighbor_solicit_t)) { 1905 /* Options present */ 1906 opt = (nd_opt_hdr_t *)&ns[1]; 1907 len -= sizeof (nd_neighbor_solicit_t); 1908 if (!ndp_verify_optlen(opt, len)) { 1909 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1910 bad_solicit = B_TRUE; 1911 goto done; 1912 } 1913 } 1914 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1915 /* Check to see if this is a valid DAD solicitation */ 1916 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1917 if (ip_debug > 2) { 1918 /* ip1dbg */ 1919 pr_addr_dbg("ndp_input_solicit: IPv6 " 1920 "Destination is not solicited node " 1921 "multicast %s\n", AF_INET6, 1922 &ip6h->ip6_dst); 1923 } 1924 bad_solicit = B_TRUE; 1925 goto done; 1926 } 1927 } 1928 1929 our_nce = ndp_lookup_v6(ill, &target, B_FALSE); 1930 /* 1931 * If this is a valid Solicitation, a permanent 1932 * entry should exist in the cache 1933 */ 1934 if (our_nce == NULL || 1935 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1936 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1937 "ifname=%s ", ill->ill_name)); 1938 if (ip_debug > 2) { 1939 /* ip1dbg */ 1940 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1941 } 1942 bad_solicit = B_TRUE; 1943 goto done; 1944 } 1945 1946 /* At this point we should have a verified NS per spec */ 1947 if (opt != NULL) { 1948 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1949 if (opt != NULL) { 1950 haddr = (uchar_t *)&opt[1]; 1951 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1952 hlen == 0) { 1953 ip1dbg(("ndp_input_advert: bad SLLA\n")); 1954 bad_solicit = B_TRUE; 1955 goto done; 1956 } 1957 } 1958 } 1959 1960 /* If sending directly to peer, set the unicast flag */ 1961 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 1962 flag |= NDP_UNICAST; 1963 1964 /* 1965 * Create/update the entry for the soliciting node. 1966 * or respond to outstanding queries, don't if 1967 * the source is unspecified address. 1968 */ 1969 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1970 int err; 1971 nce_t *nnce; 1972 1973 ASSERT(ill->ill_isv6); 1974 /* 1975 * Regular solicitations *must* include the Source Link-Layer 1976 * Address option. Ignore messages that do not. 1977 */ 1978 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1979 ip1dbg(("ndp_input_solicit: source link-layer address " 1980 "option missing with a specified source.\n")); 1981 bad_solicit = B_TRUE; 1982 goto done; 1983 } 1984 1985 /* 1986 * This is a regular solicitation. If we're still in the 1987 * process of verifying the address, then don't respond at all 1988 * and don't keep track of the sender. 1989 */ 1990 if (our_nce->nce_state == ND_PROBE) 1991 goto done; 1992 1993 /* 1994 * If the solicitation doesn't have sender hardware address 1995 * (legal for unicast solicitation), then process without 1996 * installing the return NCE. Either we already know it, or 1997 * we'll be forced to look it up when (and if) we reply to the 1998 * packet. 1999 */ 2000 if (haddr == NULL) 2001 goto no_source; 2002 2003 err = ndp_lookup_then_add(ill, 2004 haddr, 2005 &src, /* Soliciting nodes address */ 2006 &ipv6_all_ones, 2007 &ipv6_all_zeros, 2008 0, 2009 0, 2010 ND_STALE, 2011 &nnce, 2012 NULL, 2013 NULL); 2014 switch (err) { 2015 case 0: 2016 /* done with this entry */ 2017 NCE_REFRELE(nnce); 2018 break; 2019 case EEXIST: 2020 /* 2021 * B_FALSE indicates this is not an 2022 * an advertisement. 2023 */ 2024 ndp_process(nnce, haddr, 0, B_FALSE); 2025 NCE_REFRELE(nnce); 2026 break; 2027 default: 2028 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 2029 err)); 2030 goto done; 2031 } 2032 no_source: 2033 flag |= NDP_SOLICITED; 2034 } else { 2035 /* 2036 * No source link layer address option should be present in a 2037 * valid DAD request. 2038 */ 2039 if (haddr != NULL) { 2040 ip1dbg(("ndp_input_solicit: source link-layer address " 2041 "option present with an unspecified source.\n")); 2042 bad_solicit = B_TRUE; 2043 goto done; 2044 } 2045 if (our_nce->nce_state == ND_PROBE) { 2046 /* 2047 * Internally looped-back probes won't have DLPI 2048 * attached to them. External ones (which are sent by 2049 * multicast) always will. Just ignore our own 2050 * transmissions. 2051 */ 2052 if (dl_mp != NULL) { 2053 /* 2054 * If someone else is probing our address, then 2055 * we've crossed wires. Declare failure. 2056 */ 2057 ip_ndp_failure(ill, mp, dl_mp, our_nce); 2058 } 2059 goto done; 2060 } 2061 /* 2062 * This is a DAD probe. Multicast the advertisement to the 2063 * all-nodes address. 2064 */ 2065 src = ipv6_all_hosts_mcast; 2066 } 2067 flag |= nce_advert_flags(our_nce); 2068 /* Response to a solicitation */ 2069 (void) nce_xmit(ill, 2070 ND_NEIGHBOR_ADVERT, 2071 ill, /* ill to be used for extracting ill_nd_lla */ 2072 B_TRUE, /* use ill_nd_lla */ 2073 &target, /* Source and target of the advertisement pkt */ 2074 &src, /* IP Destination (source of original pkt) */ 2075 flag); 2076 done: 2077 if (bad_solicit) 2078 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2079 if (our_nce != NULL) 2080 NCE_REFRELE(our_nce); 2081 } 2082 2083 void 2084 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2085 { 2086 nd_neighbor_advert_t *na; 2087 uint32_t hlen = ill->ill_nd_lla_len; 2088 uchar_t *haddr = NULL; 2089 icmp6_t *icmp_nd; 2090 ip6_t *ip6h; 2091 nce_t *dst_nce = NULL; 2092 in6_addr_t target; 2093 nd_opt_hdr_t *opt = NULL; 2094 int len; 2095 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2096 ip_stack_t *ipst = ill->ill_ipst; 2097 2098 ip6h = (ip6_t *)mp->b_rptr; 2099 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2100 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2101 na = (nd_neighbor_advert_t *)icmp_nd; 2102 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2103 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2104 ip1dbg(("ndp_input_advert: Target is multicast but the " 2105 "solicited flag is not zero\n")); 2106 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2107 return; 2108 } 2109 target = na->nd_na_target; 2110 if (IN6_IS_ADDR_MULTICAST(&target)) { 2111 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2112 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2113 return; 2114 } 2115 if (len > sizeof (nd_neighbor_advert_t)) { 2116 opt = (nd_opt_hdr_t *)&na[1]; 2117 if (!ndp_verify_optlen(opt, 2118 len - sizeof (nd_neighbor_advert_t))) { 2119 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2120 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2121 return; 2122 } 2123 /* At this point we have a verified NA per spec */ 2124 len -= sizeof (nd_neighbor_advert_t); 2125 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2126 if (opt != NULL) { 2127 haddr = (uchar_t *)&opt[1]; 2128 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2129 hlen == 0) { 2130 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2131 BUMP_MIB(mib, 2132 ipv6IfIcmpInBadNeighborAdvertisements); 2133 return; 2134 } 2135 } 2136 } 2137 2138 /* 2139 * If this interface is part of the group look at all the 2140 * ills in the group. 2141 */ 2142 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2143 if (ill->ill_group != NULL) 2144 ill = ill->ill_group->illgrp_ill; 2145 2146 for (; ill != NULL; ill = ill->ill_group_next) { 2147 mutex_enter(&ill->ill_lock); 2148 if (!ILL_CAN_LOOKUP(ill)) { 2149 mutex_exit(&ill->ill_lock); 2150 continue; 2151 } 2152 ill_refhold_locked(ill); 2153 mutex_exit(&ill->ill_lock); 2154 dst_nce = ndp_lookup_v6(ill, &target, B_FALSE); 2155 /* We have to drop the lock since ndp_process calls put* */ 2156 rw_exit(&ipst->ips_ill_g_lock); 2157 if (dst_nce != NULL) { 2158 if ((dst_nce->nce_flags & NCE_F_PERMANENT) && 2159 dst_nce->nce_state == ND_PROBE) { 2160 /* 2161 * Someone else sent an advertisement for an 2162 * address that we're trying to configure. 2163 * Tear it down. Note that dl_mp might be NULL 2164 * if we're getting a unicast reply. This 2165 * isn't typically done (multicast is the norm 2166 * in response to a probe), but ip_ndp_failure 2167 * will handle the dl_mp == NULL case as well. 2168 */ 2169 ip_ndp_failure(ill, mp, dl_mp, dst_nce); 2170 } else if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2171 /* 2172 * Someone just announced one of our local 2173 * addresses. If it wasn't us, then this is a 2174 * conflict. Defend the address or shut it 2175 * down. 2176 */ 2177 if (dl_mp != NULL && 2178 (haddr == NULL || 2179 nce_cmp_ll_addr(dst_nce, haddr, 2180 ill->ill_nd_lla_len))) { 2181 ip_ndp_conflict(ill, mp, dl_mp, 2182 dst_nce); 2183 } 2184 } else { 2185 if (na->nd_na_flags_reserved & 2186 ND_NA_FLAG_ROUTER) { 2187 dst_nce->nce_flags |= NCE_F_ISROUTER; 2188 } 2189 /* B_TRUE indicates this an advertisement */ 2190 ndp_process(dst_nce, haddr, 2191 na->nd_na_flags_reserved, B_TRUE); 2192 } 2193 NCE_REFRELE(dst_nce); 2194 } 2195 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2196 ill_refrele(ill); 2197 } 2198 rw_exit(&ipst->ips_ill_g_lock); 2199 } 2200 2201 /* 2202 * Process NDP neighbor solicitation/advertisement messages. 2203 * The checksum has already checked o.k before reaching here. 2204 */ 2205 void 2206 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2207 { 2208 icmp6_t *icmp_nd; 2209 ip6_t *ip6h; 2210 int len; 2211 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2212 2213 2214 if (!pullupmsg(mp, -1)) { 2215 ip1dbg(("ndp_input: pullupmsg failed\n")); 2216 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2217 goto done; 2218 } 2219 ip6h = (ip6_t *)mp->b_rptr; 2220 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2221 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2222 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2223 goto done; 2224 } 2225 /* 2226 * NDP does not accept any extension headers between the 2227 * IP header and the ICMP header since e.g. a routing 2228 * header could be dangerous. 2229 * This assumes that any AH or ESP headers are removed 2230 * by ip prior to passing the packet to ndp_input. 2231 */ 2232 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2233 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2234 ip6h->ip6_nxt)); 2235 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2236 goto done; 2237 } 2238 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2239 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2240 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2241 if (icmp_nd->icmp6_code != 0) { 2242 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2243 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2244 goto done; 2245 } 2246 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2247 /* 2248 * Make sure packet length is large enough for either 2249 * a NS or a NA icmp packet. 2250 */ 2251 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2252 ip1dbg(("ndp_input: packet too short\n")); 2253 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2254 goto done; 2255 } 2256 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2257 ndp_input_solicit(ill, mp, dl_mp); 2258 } else { 2259 ndp_input_advert(ill, mp, dl_mp); 2260 } 2261 done: 2262 freemsg(mp); 2263 } 2264 2265 /* 2266 * nce_xmit is called to form and transmit a ND solicitation or 2267 * advertisement ICMP packet. 2268 * 2269 * If the source address is unspecified and this isn't a probe (used for 2270 * duplicate address detection), an appropriate source address and link layer 2271 * address will be chosen here. The link layer address option is included if 2272 * the source is specified (i.e., all non-probe packets), and omitted (per the 2273 * specification) otherwise. 2274 * 2275 * It returns B_FALSE only if it does a successful put() to the 2276 * corresponding ill's ill_wq otherwise returns B_TRUE. 2277 */ 2278 static boolean_t 2279 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 2280 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 2281 int flag) 2282 { 2283 uint32_t len; 2284 icmp6_t *icmp6; 2285 mblk_t *mp; 2286 ip6_t *ip6h; 2287 nd_opt_hdr_t *opt; 2288 uint_t plen; 2289 ip6i_t *ip6i; 2290 ipif_t *src_ipif = NULL; 2291 uint8_t *hw_addr; 2292 zoneid_t zoneid = GLOBAL_ZONEID; 2293 2294 /* 2295 * If we have a unspecified source(sender) address, select a 2296 * proper source address for the solicitation here itself so 2297 * that we can initialize the h/w address correctly. This is 2298 * needed for interface groups as source address can come from 2299 * the whole group and the h/w address initialized from ill will 2300 * be wrong if the source address comes from a different ill. 2301 * 2302 * If the sender is specified then we use this address in order 2303 * to lookup the zoneid before calling ip_output_v6(). This is to 2304 * enable unicast ND_NEIGHBOR_ADVERT packets to be routed correctly 2305 * by IP (we cannot guarantee that the global zone has an interface 2306 * route to the destination). 2307 * 2308 * Note that the NA never comes here with the unspecified source 2309 * address. The following asserts that whenever the source 2310 * address is specified, the haddr also should be specified. 2311 */ 2312 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 2313 2314 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2315 ASSERT(operation != ND_NEIGHBOR_ADVERT); 2316 /* 2317 * Pick a source address for this solicitation, but 2318 * restrict the selection to addresses assigned to the 2319 * output interface (or interface group). We do this 2320 * because the destination will create a neighbor cache 2321 * entry for the source address of this packet, so the 2322 * source address had better be a valid neighbor. 2323 */ 2324 src_ipif = ipif_select_source_v6(ill, target, RESTRICT_TO_ILL, 2325 IPV6_PREFER_SRC_DEFAULT, ALL_ZONES); 2326 if (src_ipif == NULL) { 2327 char buf[INET6_ADDRSTRLEN]; 2328 2329 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2330 inet_ntop(AF_INET6, (char *)target, buf, 2331 sizeof (buf)))); 2332 return (B_TRUE); 2333 } 2334 sender = &src_ipif->ipif_v6src_addr; 2335 hwaddr_ill = src_ipif->ipif_ill; 2336 } else if (!(IN6_IS_ADDR_UNSPECIFIED(sender))) { 2337 zoneid = ipif_lookup_addr_zoneid_v6(sender, ill, ill->ill_ipst); 2338 /* 2339 * It's possible for ipif_lookup_addr_zoneid_v6() to return 2340 * ALL_ZONES if it cannot find a matching ipif for the address 2341 * we are trying to use. In this case we err on the side of 2342 * trying to send the packet by defaulting to the GLOBAL_ZONEID. 2343 */ 2344 if (zoneid == ALL_ZONES) 2345 zoneid = GLOBAL_ZONEID; 2346 } 2347 2348 /* 2349 * Always make sure that the NS/NA packets don't get load 2350 * spread. This is needed so that the probe packets sent 2351 * by the in.mpathd daemon can really go out on the desired 2352 * interface. Probe packets are made to go out on a desired 2353 * interface by including a ip6i with ATTACH_IF flag. As these 2354 * packets indirectly end up sending/receiving NS/NA packets 2355 * (neighbor doing NUD), we have to make sure that NA 2356 * also go out on the same interface. 2357 */ 2358 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7) / 8; 2359 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2360 plen * 8; 2361 mp = allocb(len, BPRI_LO); 2362 if (mp == NULL) { 2363 if (src_ipif != NULL) 2364 ipif_refrele(src_ipif); 2365 return (B_TRUE); 2366 } 2367 bzero((char *)mp->b_rptr, len); 2368 mp->b_wptr = mp->b_rptr + len; 2369 2370 ip6i = (ip6i_t *)mp->b_rptr; 2371 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2372 ip6i->ip6i_nxt = IPPROTO_RAW; 2373 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2374 if (flag & NDP_PROBE) 2375 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2376 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2377 2378 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2379 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2380 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2381 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2382 ip6h->ip6_hops = IPV6_MAX_HOPS; 2383 ip6h->ip6_dst = *target; 2384 icmp6 = (icmp6_t *)&ip6h[1]; 2385 2386 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2387 sizeof (nd_neighbor_advert_t)); 2388 2389 if (operation == ND_NEIGHBOR_SOLICIT) { 2390 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2391 2392 if (!(flag & NDP_PROBE)) 2393 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2394 ip6h->ip6_src = *sender; 2395 ns->nd_ns_target = *target; 2396 if (!(flag & NDP_UNICAST)) { 2397 /* Form multicast address of the target */ 2398 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2399 ip6h->ip6_dst.s6_addr32[3] |= 2400 ns->nd_ns_target.s6_addr32[3]; 2401 } 2402 } else { 2403 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2404 2405 ASSERT(!(flag & NDP_PROBE)); 2406 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2407 ip6h->ip6_src = *sender; 2408 na->nd_na_target = *sender; 2409 if (flag & NDP_ISROUTER) 2410 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2411 if (flag & NDP_SOLICITED) 2412 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2413 if (flag & NDP_ORIDE) 2414 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2415 } 2416 2417 hw_addr = NULL; 2418 if (!(flag & NDP_PROBE)) { 2419 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2420 hwaddr_ill->ill_phys_addr; 2421 if (hw_addr != NULL) { 2422 /* Fill in link layer address and option len */ 2423 opt->nd_opt_len = (uint8_t)plen; 2424 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2425 } 2426 } 2427 if (hw_addr == NULL) { 2428 /* If there's no link layer address option, then strip it. */ 2429 len -= plen * 8; 2430 mp->b_wptr = mp->b_rptr + len; 2431 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2432 } 2433 2434 icmp6->icmp6_type = (uint8_t)operation; 2435 icmp6->icmp6_code = 0; 2436 /* 2437 * Prepare for checksum by putting icmp length in the icmp 2438 * checksum field. The checksum is calculated in ip_wput_v6. 2439 */ 2440 icmp6->icmp6_cksum = ip6h->ip6_plen; 2441 2442 if (src_ipif != NULL) 2443 ipif_refrele(src_ipif); 2444 2445 ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT); 2446 return (B_FALSE); 2447 } 2448 2449 /* 2450 * Make a link layer address (does not include the SAP) from an nce. 2451 * To form the link layer address, use the last four bytes of ipv6 2452 * address passed in and the fixed offset stored in nce. 2453 */ 2454 static void 2455 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2456 { 2457 uchar_t *mask, *to; 2458 ill_t *ill = nce->nce_ill; 2459 int len; 2460 2461 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2462 return; 2463 ASSERT(nce->nce_res_mp != NULL); 2464 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2465 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2466 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2467 ASSERT(addr != NULL); 2468 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2469 addrpos, ill->ill_nd_lla_len); 2470 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2471 IPV6_ADDR_LEN); 2472 mask = (uchar_t *)&nce->nce_extract_mask; 2473 mask += (IPV6_ADDR_LEN - len); 2474 addr += (IPV6_ADDR_LEN - len); 2475 to = addrpos + nce->nce_ll_extract_start; 2476 while (len-- > 0) 2477 *to++ |= *mask++ & *addr++; 2478 } 2479 2480 /* 2481 * Pass a cache report back out via NDD. 2482 */ 2483 /* ARGSUSED */ 2484 int 2485 ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 2486 { 2487 ip_stack_t *ipst; 2488 2489 if (CONN_Q(q)) 2490 ipst = CONNQ_TO_IPST(q); 2491 else 2492 ipst = ILLQ_TO_IPST(q); 2493 2494 (void) mi_mpprintf(mp, "ifname hardware addr flags" 2495 " proto addr/mask"); 2496 ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp, ipst); 2497 return (0); 2498 } 2499 2500 /* 2501 * Add a single line to the NDP Cache Entry Report. 2502 */ 2503 static void 2504 nce_report1(nce_t *nce, uchar_t *mp_arg) 2505 { 2506 ill_t *ill = nce->nce_ill; 2507 char local_buf[INET6_ADDRSTRLEN]; 2508 uchar_t flags_buf[10]; 2509 uint32_t flags = nce->nce_flags; 2510 mblk_t *mp = (mblk_t *)mp_arg; 2511 uchar_t *h; 2512 uchar_t *m = flags_buf; 2513 in6_addr_t v6addr; 2514 2515 /* 2516 * Lock the nce to protect nce_res_mp from being changed 2517 * if an external resolver address resolution completes 2518 * while nce_res_mp is being accessed here. 2519 * 2520 * Deal with all address formats, not just Ethernet-specific 2521 * In addition, make sure that the mblk has enough space 2522 * before writing to it. If is doesn't, allocate a new one. 2523 */ 2524 if (nce->nce_ipversion == IPV4_VERSION) 2525 /* Don't include v4 nce_ts in NDP cache entry report */ 2526 return; 2527 2528 ASSERT(ill != NULL); 2529 v6addr = nce->nce_mask; 2530 if (flags & NCE_F_PERMANENT) 2531 *m++ = 'P'; 2532 if (flags & NCE_F_ISROUTER) 2533 *m++ = 'R'; 2534 if (flags & NCE_F_MAPPING) 2535 *m++ = 'M'; 2536 *m = '\0'; 2537 2538 if (ill->ill_net_type == IRE_IF_RESOLVER) { 2539 size_t addrlen; 2540 char *addr_buf; 2541 dl_unitdata_req_t *dl; 2542 2543 mutex_enter(&nce->nce_lock); 2544 h = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2545 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 2546 if (ill->ill_flags & ILLF_XRESOLV) 2547 addrlen = (3 * (dl->dl_dest_addr_length)); 2548 else 2549 addrlen = (3 * (ill->ill_nd_lla_len)); 2550 if (addrlen <= 0) { 2551 mutex_exit(&nce->nce_lock); 2552 (void) mi_mpprintf(mp, 2553 "%8s %9s %5s %s/%d", 2554 ill->ill_name, 2555 "None", 2556 (uchar_t *)&flags_buf, 2557 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2558 (char *)local_buf, sizeof (local_buf)), 2559 ip_mask_to_plen_v6(&v6addr)); 2560 } else { 2561 /* 2562 * Convert the hardware/lla address to ascii 2563 */ 2564 addr_buf = kmem_zalloc(addrlen, KM_NOSLEEP); 2565 if (addr_buf == NULL) { 2566 mutex_exit(&nce->nce_lock); 2567 return; 2568 } 2569 (void) mac_colon_addr((uint8_t *)h, 2570 (ill->ill_flags & ILLF_XRESOLV) ? 2571 dl->dl_dest_addr_length : ill->ill_nd_lla_len, 2572 addr_buf, addrlen); 2573 mutex_exit(&nce->nce_lock); 2574 (void) mi_mpprintf(mp, "%8s %17s %5s %s/%d", 2575 ill->ill_name, addr_buf, (uchar_t *)&flags_buf, 2576 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2577 (char *)local_buf, sizeof (local_buf)), 2578 ip_mask_to_plen_v6(&v6addr)); 2579 kmem_free(addr_buf, addrlen); 2580 } 2581 } else { 2582 (void) mi_mpprintf(mp, 2583 "%8s %9s %5s %s/%d", 2584 ill->ill_name, 2585 "None", 2586 (uchar_t *)&flags_buf, 2587 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2588 (char *)local_buf, sizeof (local_buf)), 2589 ip_mask_to_plen_v6(&v6addr)); 2590 } 2591 } 2592 2593 mblk_t * 2594 nce_udreq_alloc(ill_t *ill) 2595 { 2596 mblk_t *template_mp = NULL; 2597 dl_unitdata_req_t *dlur; 2598 int sap_length; 2599 2600 ASSERT(ill->ill_isv6); 2601 2602 sap_length = ill->ill_sap_length; 2603 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2604 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2605 if (template_mp == NULL) 2606 return (NULL); 2607 2608 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2609 dlur->dl_priority.dl_min = 0; 2610 dlur->dl_priority.dl_max = 0; 2611 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2612 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2613 2614 /* Copy in the SAP value. */ 2615 NCE_LL_SAP_COPY(ill, template_mp); 2616 2617 return (template_mp); 2618 } 2619 2620 /* 2621 * NDP retransmit timer. 2622 * This timer goes off when: 2623 * a. It is time to retransmit NS for resolver. 2624 * b. It is time to send reachability probes. 2625 */ 2626 void 2627 ndp_timer(void *arg) 2628 { 2629 nce_t *nce = arg; 2630 ill_t *ill = nce->nce_ill; 2631 uint32_t ms; 2632 char addrbuf[INET6_ADDRSTRLEN]; 2633 mblk_t *mp; 2634 boolean_t dropped = B_FALSE; 2635 ip_stack_t *ipst = ill->ill_ipst; 2636 2637 /* 2638 * The timer has to be cancelled by ndp_delete before doing the final 2639 * refrele. So the NCE is guaranteed to exist when the timer runs 2640 * until it clears the timeout_id. Before clearing the timeout_id 2641 * bump up the refcnt so that we can continue to use the nce 2642 */ 2643 ASSERT(nce != NULL); 2644 2645 /* 2646 * Grab the ill_g_lock now itself to avoid lock order problems. 2647 * nce_solicit needs ill_g_lock to be able to traverse ills 2648 */ 2649 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2650 mutex_enter(&nce->nce_lock); 2651 NCE_REFHOLD_LOCKED(nce); 2652 nce->nce_timeout_id = 0; 2653 2654 /* 2655 * Check the reachability state first. 2656 */ 2657 switch (nce->nce_state) { 2658 case ND_DELAY: 2659 rw_exit(&ipst->ips_ill_g_lock); 2660 nce->nce_state = ND_PROBE; 2661 mutex_exit(&nce->nce_lock); 2662 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2663 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2664 if (ip_debug > 3) { 2665 /* ip2dbg */ 2666 pr_addr_dbg("ndp_timer: state for %s changed " 2667 "to PROBE\n", AF_INET6, &nce->nce_addr); 2668 } 2669 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2670 NCE_REFRELE(nce); 2671 return; 2672 case ND_PROBE: 2673 /* must be retransmit timer */ 2674 rw_exit(&ipst->ips_ill_g_lock); 2675 nce->nce_pcnt--; 2676 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2677 nce->nce_pcnt >= -1); 2678 if (nce->nce_pcnt > 0) { 2679 /* 2680 * As per RFC2461, the nce gets deleted after 2681 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2682 * Note that the first unicast solicitation is sent 2683 * during the DELAY state. 2684 */ 2685 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2686 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2687 addrbuf, sizeof (addrbuf)))); 2688 mutex_exit(&nce->nce_lock); 2689 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, 2690 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, 2691 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2692 NDP_UNICAST); 2693 if (dropped) { 2694 mutex_enter(&nce->nce_lock); 2695 nce->nce_pcnt++; 2696 mutex_exit(&nce->nce_lock); 2697 } 2698 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2699 } else if (nce->nce_pcnt < 0) { 2700 /* No hope, delete the nce */ 2701 nce->nce_state = ND_UNREACHABLE; 2702 mutex_exit(&nce->nce_lock); 2703 if (ip_debug > 2) { 2704 /* ip1dbg */ 2705 pr_addr_dbg("ndp_timer: Delete IRE for" 2706 " dst %s\n", AF_INET6, &nce->nce_addr); 2707 } 2708 ndp_delete(nce); 2709 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2710 /* Wait RetransTimer, before deleting the entry */ 2711 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2712 nce->nce_pcnt, inet_ntop(AF_INET6, 2713 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2714 mutex_exit(&nce->nce_lock); 2715 /* Wait one interval before killing */ 2716 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2717 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2718 ipif_t *ipif; 2719 2720 /* 2721 * We're done probing, and we can now declare this 2722 * address to be usable. Let IP know that it's ok to 2723 * use. 2724 */ 2725 nce->nce_state = ND_REACHABLE; 2726 mutex_exit(&nce->nce_lock); 2727 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, 2728 ALL_ZONES, NULL, NULL, NULL, NULL, ipst); 2729 if (ipif != NULL) { 2730 if (ipif->ipif_was_dup) { 2731 char ibuf[LIFNAMSIZ + 10]; 2732 char sbuf[INET6_ADDRSTRLEN]; 2733 2734 ipif->ipif_was_dup = B_FALSE; 2735 (void) strlcpy(ibuf, ill->ill_name, 2736 sizeof (ibuf)); 2737 (void) inet_ntop(AF_INET6, 2738 &ipif->ipif_v6lcl_addr, 2739 sbuf, sizeof (sbuf)); 2740 if (ipif->ipif_id != 0) { 2741 (void) snprintf(ibuf + 2742 ill->ill_name_length - 1, 2743 sizeof (ibuf) - 2744 ill->ill_name_length + 1, 2745 ":%d", ipif->ipif_id); 2746 } 2747 cmn_err(CE_NOTE, "recovered address " 2748 "%s on %s", sbuf, ibuf); 2749 } 2750 if ((ipif->ipif_flags & IPIF_UP) && 2751 !ipif->ipif_addr_ready) { 2752 ip_rts_ifmsg(ipif); 2753 ip_rts_newaddrmsg(RTM_ADD, 0, ipif); 2754 sctp_update_ipif(ipif, SCTP_IPIF_UP); 2755 } 2756 ipif->ipif_addr_ready = 1; 2757 ipif_refrele(ipif); 2758 } 2759 /* Begin defending our new address */ 2760 nce->nce_unsolicit_count = 0; 2761 dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, 2762 B_FALSE, &nce->nce_addr, &ipv6_all_hosts_mcast, 2763 nce_advert_flags(nce)); 2764 if (dropped) { 2765 nce->nce_unsolicit_count = 1; 2766 NDP_RESTART_TIMER(nce, 2767 ipst->ips_ip_ndp_unsolicit_interval); 2768 } else if (ipst->ips_ip_ndp_defense_interval != 0) { 2769 NDP_RESTART_TIMER(nce, 2770 ipst->ips_ip_ndp_defense_interval); 2771 } 2772 } else { 2773 /* 2774 * This is an address we're probing to be our own, but 2775 * the ill is down. Wait until it comes back before 2776 * doing anything, but switch to reachable state so 2777 * that the restart will work. 2778 */ 2779 nce->nce_state = ND_REACHABLE; 2780 mutex_exit(&nce->nce_lock); 2781 } 2782 NCE_REFRELE(nce); 2783 return; 2784 case ND_INCOMPLETE: 2785 /* 2786 * Must be resolvers retransmit timer. 2787 */ 2788 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2789 ip6i_t *ip6i; 2790 ip6_t *ip6h; 2791 mblk_t *data_mp; 2792 2793 /* 2794 * Walk the list of packets queued, and see if there 2795 * are any multipathing probe packets. Such packets 2796 * are always queued at the head. Since this is a 2797 * retransmit timer firing, mark such packets as 2798 * delayed in ND resolution. This info will be used 2799 * in ip_wput_v6(). Multipathing probe packets will 2800 * always have an ip6i_t. Once we hit a packet without 2801 * it, we can break out of this loop. 2802 */ 2803 if (mp->b_datap->db_type == M_CTL) 2804 data_mp = mp->b_cont; 2805 else 2806 data_mp = mp; 2807 2808 ip6h = (ip6_t *)data_mp->b_rptr; 2809 if (ip6h->ip6_nxt != IPPROTO_RAW) 2810 break; 2811 2812 /* 2813 * This message should have been pulled up already in 2814 * ip_wput_v6. We can't do pullups here because the 2815 * b_next/b_prev is non-NULL. 2816 */ 2817 ip6i = (ip6i_t *)ip6h; 2818 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2819 sizeof (ip6i_t) + IPV6_HDR_LEN); 2820 2821 /* Mark this packet as delayed due to ND resolution */ 2822 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2823 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2824 } 2825 if (nce->nce_qd_mp != NULL) { 2826 ms = nce_solicit(nce, NULL); 2827 rw_exit(&ipst->ips_ill_g_lock); 2828 if (ms == 0) { 2829 if (nce->nce_state != ND_REACHABLE) { 2830 mutex_exit(&nce->nce_lock); 2831 nce_resolv_failed(nce); 2832 ndp_delete(nce); 2833 } else { 2834 mutex_exit(&nce->nce_lock); 2835 } 2836 } else { 2837 mutex_exit(&nce->nce_lock); 2838 NDP_RESTART_TIMER(nce, (clock_t)ms); 2839 } 2840 NCE_REFRELE(nce); 2841 return; 2842 } 2843 mutex_exit(&nce->nce_lock); 2844 rw_exit(&ipst->ips_ill_g_lock); 2845 NCE_REFRELE(nce); 2846 break; 2847 case ND_REACHABLE : 2848 rw_exit(&ipst->ips_ill_g_lock); 2849 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2850 nce->nce_unsolicit_count != 0) || 2851 ((nce->nce_flags & NCE_F_PERMANENT) && 2852 ipst->ips_ip_ndp_defense_interval != 0)) { 2853 if (nce->nce_unsolicit_count > 0) 2854 nce->nce_unsolicit_count--; 2855 mutex_exit(&nce->nce_lock); 2856 dropped = nce_xmit(ill, 2857 ND_NEIGHBOR_ADVERT, 2858 ill, /* ill to be used for hw addr */ 2859 B_FALSE, /* use ill_phys_addr */ 2860 &nce->nce_addr, 2861 &ipv6_all_hosts_mcast, 2862 nce_advert_flags(nce)); 2863 if (dropped) { 2864 mutex_enter(&nce->nce_lock); 2865 nce->nce_unsolicit_count++; 2866 mutex_exit(&nce->nce_lock); 2867 } 2868 if (nce->nce_unsolicit_count != 0) { 2869 NDP_RESTART_TIMER(nce, 2870 ipst->ips_ip_ndp_unsolicit_interval); 2871 } else { 2872 NDP_RESTART_TIMER(nce, 2873 ipst->ips_ip_ndp_defense_interval); 2874 } 2875 } else { 2876 mutex_exit(&nce->nce_lock); 2877 } 2878 NCE_REFRELE(nce); 2879 break; 2880 default: 2881 rw_exit(&ipst->ips_ill_g_lock); 2882 mutex_exit(&nce->nce_lock); 2883 NCE_REFRELE(nce); 2884 break; 2885 } 2886 } 2887 2888 /* 2889 * Set a link layer address from the ll_addr passed in. 2890 * Copy SAP from ill. 2891 */ 2892 static void 2893 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2894 { 2895 ill_t *ill = nce->nce_ill; 2896 uchar_t *woffset; 2897 2898 ASSERT(ll_addr != NULL); 2899 /* Always called before fast_path_probe */ 2900 ASSERT(nce->nce_fp_mp == NULL); 2901 if (ill->ill_sap_length != 0) { 2902 /* 2903 * Copy the SAP type specified in the 2904 * request into the xmit template. 2905 */ 2906 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2907 } 2908 if (ill->ill_phys_addr_length > 0) { 2909 /* 2910 * The bcopy() below used to be called for the physical address 2911 * length rather than the link layer address length. For 2912 * ethernet and many other media, the phys_addr and lla are 2913 * identical. 2914 * However, with xresolv interfaces being introduced, the 2915 * phys_addr and lla are no longer the same, and the physical 2916 * address may not have any useful meaning, so we use the lla 2917 * for IPv6 address resolution and destination addressing. 2918 * 2919 * For PPP or other interfaces with a zero length 2920 * physical address, don't do anything here. 2921 * The bcopy() with a zero phys_addr length was previously 2922 * a no-op for interfaces with a zero-length physical address. 2923 * Using the lla for them would change the way they operate. 2924 * Doing nothing in such cases preserves expected behavior. 2925 */ 2926 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2927 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2928 } 2929 } 2930 2931 static boolean_t 2932 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2933 { 2934 ill_t *ill = nce->nce_ill; 2935 uchar_t *ll_offset; 2936 2937 ASSERT(nce->nce_res_mp != NULL); 2938 if (ll_addr == NULL) 2939 return (B_FALSE); 2940 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2941 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2942 return (B_TRUE); 2943 return (B_FALSE); 2944 } 2945 2946 /* 2947 * Updates the link layer address or the reachability state of 2948 * a cache entry. Reset probe counter if needed. 2949 */ 2950 static void 2951 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2952 { 2953 ill_t *ill = nce->nce_ill; 2954 boolean_t need_stop_timer = B_FALSE; 2955 boolean_t need_fastpath_update = B_FALSE; 2956 2957 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2958 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2959 /* 2960 * If this interface does not do NUD, there is no point 2961 * in allowing an update to the cache entry. Although 2962 * we will respond to NS. 2963 * The only time we accept an update for a resolver when 2964 * NUD is turned off is when it has just been created. 2965 * Non-Resolvers will always be created as REACHABLE. 2966 */ 2967 if (new_state != ND_UNCHANGED) { 2968 if ((nce->nce_flags & NCE_F_NONUD) && 2969 (nce->nce_state != ND_INCOMPLETE)) 2970 return; 2971 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2972 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2973 need_stop_timer = B_TRUE; 2974 if (new_state == ND_REACHABLE) 2975 nce->nce_last = TICK_TO_MSEC(lbolt64); 2976 else { 2977 /* We force NUD in this case */ 2978 nce->nce_last = 0; 2979 } 2980 nce->nce_state = new_state; 2981 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2982 } 2983 /* 2984 * In case of fast path we need to free the the fastpath 2985 * M_DATA and do another probe. Otherwise we can just 2986 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2987 * whatever packets that happens to be transmitting at the time. 2988 */ 2989 if (new_ll_addr != NULL) { 2990 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2991 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2992 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2993 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2994 if (nce->nce_fp_mp != NULL) { 2995 freemsg(nce->nce_fp_mp); 2996 nce->nce_fp_mp = NULL; 2997 } 2998 need_fastpath_update = B_TRUE; 2999 } 3000 mutex_exit(&nce->nce_lock); 3001 if (need_stop_timer) { 3002 (void) untimeout(nce->nce_timeout_id); 3003 nce->nce_timeout_id = 0; 3004 } 3005 if (need_fastpath_update) 3006 nce_fastpath(nce); 3007 mutex_enter(&nce->nce_lock); 3008 } 3009 3010 void 3011 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 3012 { 3013 uint_t count = 0; 3014 mblk_t **mpp; 3015 3016 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3017 3018 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 3019 mpp = &(*mpp)->b_next) { 3020 if (++count > 3021 nce->nce_ill->ill_max_buf) { 3022 mblk_t *tmp = nce->nce_qd_mp->b_next; 3023 3024 nce->nce_qd_mp->b_next = NULL; 3025 nce->nce_qd_mp->b_prev = NULL; 3026 freemsg(nce->nce_qd_mp); 3027 nce->nce_qd_mp = tmp; 3028 } 3029 } 3030 /* put this on the list */ 3031 if (head_insert) { 3032 mp->b_next = nce->nce_qd_mp; 3033 nce->nce_qd_mp = mp; 3034 } else { 3035 *mpp = mp; 3036 } 3037 } 3038 3039 static void 3040 nce_queue_mp(nce_t *nce, mblk_t *mp) 3041 { 3042 boolean_t head_insert = B_FALSE; 3043 ip6_t *ip6h; 3044 ip6i_t *ip6i; 3045 mblk_t *data_mp; 3046 3047 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3048 3049 if (mp->b_datap->db_type == M_CTL) 3050 data_mp = mp->b_cont; 3051 else 3052 data_mp = mp; 3053 ip6h = (ip6_t *)data_mp->b_rptr; 3054 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3055 /* 3056 * This message should have been pulled up already in 3057 * ip_wput_v6. We can't do pullups here because the message 3058 * could be from the nce_qd_mp which could have b_next/b_prev 3059 * non-NULL. 3060 */ 3061 ip6i = (ip6i_t *)ip6h; 3062 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 3063 sizeof (ip6i_t) + IPV6_HDR_LEN); 3064 /* 3065 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 3066 * This has 2 aspects mentioned below. 3067 * 1. Perform head insertion in the nce_qd_mp for these packets. 3068 * This ensures that next retransmit of ND solicitation 3069 * will use the interface specified by the probe packet, 3070 * for both NS and NA. This corresponds to the src address 3071 * in the IPv6 packet. If we insert at tail, we will be 3072 * depending on the packet at the head for successful 3073 * ND resolution. This is not reliable, because the interface 3074 * on which the NA arrives could be different from the interface 3075 * on which the NS was sent, and if the receiving interface is 3076 * failed, it will appear that the sending interface is also 3077 * failed, causing in.mpathd to misdiagnose this as link 3078 * failure. 3079 * 2. Drop the original packet, if the ND resolution did not 3080 * succeed in the first attempt. However we will create the 3081 * nce and the ire, as soon as the ND resolution succeeds. 3082 * We don't gain anything by queueing multiple probe packets 3083 * and sending them back-to-back once resolution succeeds. 3084 * It is sufficient to send just 1 packet after ND resolution 3085 * succeeds. Since mpathd is sending down probe packets at a 3086 * constant rate, we don't need to send the queued packet. We 3087 * need to queue it only for NDP resolution. The benefit of 3088 * dropping the probe packets that were delayed in ND 3089 * resolution, is that in.mpathd will not see inflated 3090 * RTT. If the ND resolution does not succeed within 3091 * in.mpathd's failure detection time, mpathd may detect 3092 * a failure, and it does not matter whether the packet 3093 * was queued or dropped. 3094 */ 3095 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 3096 head_insert = B_TRUE; 3097 } 3098 3099 nce_queue_mp_common(nce, mp, head_insert); 3100 } 3101 3102 /* 3103 * Called when address resolution failed due to a timeout. 3104 * Send an ICMP unreachable in response to all queued packets. 3105 */ 3106 void 3107 nce_resolv_failed(nce_t *nce) 3108 { 3109 mblk_t *mp, *nxt_mp, *first_mp; 3110 char buf[INET6_ADDRSTRLEN]; 3111 ip6_t *ip6h; 3112 zoneid_t zoneid = GLOBAL_ZONEID; 3113 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3114 3115 ip1dbg(("nce_resolv_failed: dst %s\n", 3116 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 3117 mutex_enter(&nce->nce_lock); 3118 mp = nce->nce_qd_mp; 3119 nce->nce_qd_mp = NULL; 3120 mutex_exit(&nce->nce_lock); 3121 while (mp != NULL) { 3122 nxt_mp = mp->b_next; 3123 mp->b_next = NULL; 3124 mp->b_prev = NULL; 3125 3126 first_mp = mp; 3127 if (mp->b_datap->db_type == M_CTL) { 3128 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 3129 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3130 zoneid = io->ipsec_out_zoneid; 3131 ASSERT(zoneid != ALL_ZONES); 3132 mp = mp->b_cont; 3133 } 3134 3135 ip6h = (ip6_t *)mp->b_rptr; 3136 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3137 ip6i_t *ip6i; 3138 /* 3139 * This message should have been pulled up already 3140 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 3141 * the header is pulled up. 3142 */ 3143 ip6i = (ip6i_t *)ip6h; 3144 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 3145 sizeof (ip6i_t) + IPV6_HDR_LEN); 3146 mp->b_rptr += sizeof (ip6i_t); 3147 } 3148 /* 3149 * Ignore failure since icmp_unreachable_v6 will silently 3150 * drop packets with an unspecified source address. 3151 */ 3152 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst); 3153 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 3154 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst); 3155 mp = nxt_mp; 3156 } 3157 } 3158 3159 /* 3160 * Called by SIOCSNDP* ioctl to add/change an nce entry 3161 * and the corresponding attributes. 3162 * Disallow states other than ND_REACHABLE or ND_STALE. 3163 */ 3164 int 3165 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 3166 { 3167 sin6_t *sin6; 3168 in6_addr_t *addr; 3169 nce_t *nce; 3170 int err; 3171 uint16_t new_flags = 0; 3172 uint16_t old_flags = 0; 3173 int inflags = lnr->lnr_flags; 3174 ip_stack_t *ipst = ill->ill_ipst; 3175 3176 ASSERT(ill->ill_isv6); 3177 if ((lnr->lnr_state_create != ND_REACHABLE) && 3178 (lnr->lnr_state_create != ND_STALE)) 3179 return (EINVAL); 3180 3181 sin6 = (sin6_t *)&lnr->lnr_addr; 3182 addr = &sin6->sin6_addr; 3183 3184 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 3185 /* We know it can not be mapping so just look in the hash table */ 3186 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 3187 nce = nce_lookup_addr(ill, addr, nce); 3188 if (nce != NULL) 3189 new_flags = nce->nce_flags; 3190 3191 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 3192 case NDF_ISROUTER_ON: 3193 new_flags |= NCE_F_ISROUTER; 3194 break; 3195 case NDF_ISROUTER_OFF: 3196 new_flags &= ~NCE_F_ISROUTER; 3197 break; 3198 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3199 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3200 if (nce != NULL) 3201 NCE_REFRELE(nce); 3202 return (EINVAL); 3203 } 3204 3205 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3206 case NDF_ANYCAST_ON: 3207 new_flags |= NCE_F_ANYCAST; 3208 break; 3209 case NDF_ANYCAST_OFF: 3210 new_flags &= ~NCE_F_ANYCAST; 3211 break; 3212 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3213 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3214 if (nce != NULL) 3215 NCE_REFRELE(nce); 3216 return (EINVAL); 3217 } 3218 3219 switch (inflags & (NDF_PROXY_ON|NDF_PROXY_OFF)) { 3220 case NDF_PROXY_ON: 3221 new_flags |= NCE_F_PROXY; 3222 break; 3223 case NDF_PROXY_OFF: 3224 new_flags &= ~NCE_F_PROXY; 3225 break; 3226 case (NDF_PROXY_OFF|NDF_PROXY_ON): 3227 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3228 if (nce != NULL) 3229 NCE_REFRELE(nce); 3230 return (EINVAL); 3231 } 3232 3233 if (nce == NULL) { 3234 err = ndp_add(ill, 3235 (uchar_t *)lnr->lnr_hdw_addr, 3236 addr, 3237 &ipv6_all_ones, 3238 &ipv6_all_zeros, 3239 0, 3240 new_flags, 3241 lnr->lnr_state_create, 3242 &nce, 3243 NULL, 3244 NULL); 3245 if (err != 0) { 3246 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3247 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3248 return (err); 3249 } 3250 } 3251 old_flags = nce->nce_flags; 3252 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3253 /* 3254 * Router turned to host, delete all ires. 3255 * XXX Just delete the entry, but we need to add too. 3256 */ 3257 nce->nce_flags &= ~NCE_F_ISROUTER; 3258 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3259 ndp_delete(nce); 3260 NCE_REFRELE(nce); 3261 return (0); 3262 } 3263 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3264 3265 mutex_enter(&nce->nce_lock); 3266 nce->nce_flags = new_flags; 3267 mutex_exit(&nce->nce_lock); 3268 /* 3269 * Note that we ignore the state at this point, which 3270 * should be either STALE or REACHABLE. Instead we let 3271 * the link layer address passed in to determine the state 3272 * much like incoming packets. 3273 */ 3274 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3275 NCE_REFRELE(nce); 3276 return (0); 3277 } 3278 3279 /* 3280 * If the device driver supports it, we make nce_fp_mp to have 3281 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3282 * The caller insures there is hold on nce for this function. 3283 * Note that since ill_fastpath_probe() copies the mblk there is 3284 * no need for the hold beyond this function. 3285 */ 3286 void 3287 nce_fastpath(nce_t *nce) 3288 { 3289 ill_t *ill = nce->nce_ill; 3290 int res; 3291 3292 ASSERT(ill != NULL); 3293 if ((nce->nce_fp_mp != NULL) || 3294 !(ire_nce_valid_dlureq_mp(nce->nce_res_mp))) { 3295 /* 3296 * Already contains fastpath info or nce is not 3297 * resolved, so cant process fastpath yet. 3298 */ 3299 return; 3300 } 3301 if (nce->nce_res_mp != NULL) { 3302 nce_fastpath_list_add(nce); 3303 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3304 /* 3305 * EAGAIN is an indication of a transient error 3306 * i.e. allocation failure etc. leave the nce in the list it 3307 * will be updated when another probe happens for another ire 3308 * if not it will be taken out of the list when the ire is 3309 * deleted. 3310 */ 3311 3312 if (res != 0 && res != EAGAIN) 3313 nce_fastpath_list_delete(nce); 3314 } 3315 } 3316 3317 /* 3318 * Drain the list of nce's waiting for fastpath response. 3319 */ 3320 void 3321 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3322 void *arg) 3323 { 3324 3325 nce_t *next_nce; 3326 nce_t *current_nce; 3327 nce_t *first_nce; 3328 nce_t *prev_nce = NULL; 3329 3330 mutex_enter(&ill->ill_lock); 3331 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3332 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3333 next_nce = current_nce->nce_fastpath; 3334 /* 3335 * Take it off the list if we're flushing, or if the callback 3336 * routine tells us to do so. Otherwise, leave the nce in the 3337 * fastpath list to handle any pending response from the lower 3338 * layer. We can't drain the list when the callback routine 3339 * comparison failed, because the response is asynchronous in 3340 * nature, and may not arrive in the same order as the list 3341 * insertion. 3342 */ 3343 if (func == NULL || func(current_nce, arg)) { 3344 current_nce->nce_fastpath = NULL; 3345 if (current_nce == first_nce) 3346 ill->ill_fastpath_list = first_nce = next_nce; 3347 else 3348 prev_nce->nce_fastpath = next_nce; 3349 } else { 3350 /* previous element that is still in the list */ 3351 prev_nce = current_nce; 3352 } 3353 current_nce = next_nce; 3354 } 3355 mutex_exit(&ill->ill_lock); 3356 } 3357 3358 /* 3359 * Add nce to the nce fastpath list. 3360 */ 3361 void 3362 nce_fastpath_list_add(nce_t *nce) 3363 { 3364 ill_t *ill; 3365 3366 ill = nce->nce_ill; 3367 3368 mutex_enter(&ill->ill_lock); 3369 mutex_enter(&nce->nce_lock); 3370 3371 /* 3372 * if nce has not been deleted and 3373 * is not already in the list add it. 3374 */ 3375 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3376 (nce->nce_fastpath == NULL)) { 3377 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3378 ill->ill_fastpath_list = nce; 3379 } 3380 3381 mutex_exit(&nce->nce_lock); 3382 mutex_exit(&ill->ill_lock); 3383 } 3384 3385 /* 3386 * remove nce from the nce fastpath list. 3387 */ 3388 void 3389 nce_fastpath_list_delete(nce_t *nce) 3390 { 3391 nce_t *nce_ptr; 3392 3393 ill_t *ill; 3394 3395 ill = nce->nce_ill; 3396 ASSERT(ill != NULL); 3397 3398 mutex_enter(&ill->ill_lock); 3399 if (nce->nce_fastpath == NULL) 3400 goto done; 3401 3402 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3403 3404 if (ill->ill_fastpath_list == nce) { 3405 ill->ill_fastpath_list = nce->nce_fastpath; 3406 } else { 3407 nce_ptr = ill->ill_fastpath_list; 3408 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3409 if (nce_ptr->nce_fastpath == nce) { 3410 nce_ptr->nce_fastpath = nce->nce_fastpath; 3411 break; 3412 } 3413 nce_ptr = nce_ptr->nce_fastpath; 3414 } 3415 } 3416 3417 nce->nce_fastpath = NULL; 3418 done: 3419 mutex_exit(&ill->ill_lock); 3420 } 3421 3422 /* 3423 * Update all NCE's that are not in fastpath mode and 3424 * have an nce_fp_mp that matches mp. mp->b_cont contains 3425 * the fastpath header. 3426 * 3427 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3428 */ 3429 boolean_t 3430 ndp_fastpath_update(nce_t *nce, void *arg) 3431 { 3432 mblk_t *mp, *fp_mp; 3433 uchar_t *mp_rptr, *ud_mp_rptr; 3434 mblk_t *ud_mp = nce->nce_res_mp; 3435 ptrdiff_t cmplen; 3436 3437 if (nce->nce_flags & NCE_F_MAPPING) 3438 return (B_TRUE); 3439 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3440 return (B_TRUE); 3441 3442 ip2dbg(("ndp_fastpath_update: trying\n")); 3443 mp = (mblk_t *)arg; 3444 mp_rptr = mp->b_rptr; 3445 cmplen = mp->b_wptr - mp_rptr; 3446 ASSERT(cmplen >= 0); 3447 ud_mp_rptr = ud_mp->b_rptr; 3448 /* 3449 * The nce is locked here to prevent any other threads 3450 * from accessing and changing nce_res_mp when the IPv6 address 3451 * becomes resolved to an lla while we're in the middle 3452 * of looking at and comparing the hardware address (lla). 3453 * It is also locked to prevent multiple threads in nce_fastpath_update 3454 * from examining nce_res_mp atthe same time. 3455 */ 3456 mutex_enter(&nce->nce_lock); 3457 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3458 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3459 mutex_exit(&nce->nce_lock); 3460 /* 3461 * Don't take the ire off the fastpath list yet, 3462 * since the response may come later. 3463 */ 3464 return (B_FALSE); 3465 } 3466 /* Matched - install mp as the fastpath mp */ 3467 ip1dbg(("ndp_fastpath_update: match\n")); 3468 fp_mp = dupb(mp->b_cont); 3469 if (fp_mp != NULL) { 3470 nce->nce_fp_mp = fp_mp; 3471 } 3472 mutex_exit(&nce->nce_lock); 3473 return (B_TRUE); 3474 } 3475 3476 /* 3477 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3478 * driver. Note that it assumes IP is exclusive... 3479 */ 3480 /* ARGSUSED */ 3481 void 3482 ndp_fastpath_flush(nce_t *nce, char *arg) 3483 { 3484 if (nce->nce_flags & NCE_F_MAPPING) 3485 return; 3486 /* No fastpath info? */ 3487 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3488 return; 3489 3490 if (nce->nce_ipversion == IPV4_VERSION && 3491 nce->nce_flags & NCE_F_BCAST) { 3492 /* 3493 * IPv4 BROADCAST entries: 3494 * We can't delete the nce since it is difficult to 3495 * recreate these without going through the 3496 * ipif down/up dance. 3497 * 3498 * All access to nce->nce_fp_mp in the case of these 3499 * is protected by nce_lock. 3500 */ 3501 mutex_enter(&nce->nce_lock); 3502 if (nce->nce_fp_mp != NULL) { 3503 freeb(nce->nce_fp_mp); 3504 nce->nce_fp_mp = NULL; 3505 mutex_exit(&nce->nce_lock); 3506 nce_fastpath(nce); 3507 } else { 3508 mutex_exit(&nce->nce_lock); 3509 } 3510 } else { 3511 /* Just delete the NCE... */ 3512 ndp_delete(nce); 3513 } 3514 } 3515 3516 /* 3517 * Return a pointer to a given option in the packet. 3518 * Assumes that option part of the packet have already been validated. 3519 */ 3520 nd_opt_hdr_t * 3521 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3522 { 3523 while (optlen > 0) { 3524 if (opt->nd_opt_type == opt_type) 3525 return (opt); 3526 optlen -= 8 * opt->nd_opt_len; 3527 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3528 } 3529 return (NULL); 3530 } 3531 3532 /* 3533 * Verify all option lengths present are > 0, also check to see 3534 * if the option lengths and packet length are consistent. 3535 */ 3536 boolean_t 3537 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3538 { 3539 ASSERT(opt != NULL); 3540 while (optlen > 0) { 3541 if (opt->nd_opt_len == 0) 3542 return (B_FALSE); 3543 optlen -= 8 * opt->nd_opt_len; 3544 if (optlen < 0) 3545 return (B_FALSE); 3546 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3547 } 3548 return (B_TRUE); 3549 } 3550 3551 /* 3552 * ndp_walk function. 3553 * Free a fraction of the NCE cache entries. 3554 * A fraction of zero means to not free any in that category. 3555 */ 3556 void 3557 ndp_cache_reclaim(nce_t *nce, char *arg) 3558 { 3559 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3560 uint_t rand; 3561 3562 if (nce->nce_flags & NCE_F_PERMANENT) 3563 return; 3564 3565 rand = (uint_t)lbolt + 3566 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3567 if (ncr->ncr_host != 0 && 3568 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3569 ndp_delete(nce); 3570 return; 3571 } 3572 } 3573 3574 /* 3575 * ndp_walk function. 3576 * Count the number of NCEs that can be deleted. 3577 * These would be hosts but not routers. 3578 */ 3579 void 3580 ndp_cache_count(nce_t *nce, char *arg) 3581 { 3582 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3583 3584 if (nce->nce_flags & NCE_F_PERMANENT) 3585 return; 3586 3587 ncc->ncc_total++; 3588 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3589 ncc->ncc_host++; 3590 } 3591 3592 #ifdef NCE_DEBUG 3593 th_trace_t * 3594 th_trace_nce_lookup(nce_t *nce) 3595 { 3596 int bucket_id; 3597 th_trace_t *th_trace; 3598 3599 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3600 3601 bucket_id = IP_TR_HASH(curthread); 3602 ASSERT(bucket_id < IP_TR_HASH_MAX); 3603 3604 for (th_trace = nce->nce_trace[bucket_id]; th_trace != NULL; 3605 th_trace = th_trace->th_next) { 3606 if (th_trace->th_id == curthread) 3607 return (th_trace); 3608 } 3609 return (NULL); 3610 } 3611 3612 void 3613 nce_trace_ref(nce_t *nce) 3614 { 3615 int bucket_id; 3616 th_trace_t *th_trace; 3617 3618 /* 3619 * Attempt to locate the trace buffer for the curthread. 3620 * If it does not exist, then allocate a new trace buffer 3621 * and link it in list of trace bufs for this ipif, at the head 3622 */ 3623 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3624 3625 if (nce->nce_trace_disable == B_TRUE) 3626 return; 3627 3628 th_trace = th_trace_nce_lookup(nce); 3629 if (th_trace == NULL) { 3630 bucket_id = IP_TR_HASH(curthread); 3631 th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), 3632 KM_NOSLEEP); 3633 if (th_trace == NULL) { 3634 nce->nce_trace_disable = B_TRUE; 3635 nce_trace_inactive(nce); 3636 return; 3637 } 3638 th_trace->th_id = curthread; 3639 th_trace->th_next = nce->nce_trace[bucket_id]; 3640 th_trace->th_prev = &nce->nce_trace[bucket_id]; 3641 if (th_trace->th_next != NULL) 3642 th_trace->th_next->th_prev = &th_trace->th_next; 3643 nce->nce_trace[bucket_id] = th_trace; 3644 } 3645 ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1); 3646 th_trace->th_refcnt++; 3647 th_trace_rrecord(th_trace); 3648 } 3649 3650 void 3651 nce_untrace_ref(nce_t *nce) 3652 { 3653 th_trace_t *th_trace; 3654 3655 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3656 3657 if (nce->nce_trace_disable == B_TRUE) 3658 return; 3659 3660 th_trace = th_trace_nce_lookup(nce); 3661 ASSERT(th_trace != NULL && th_trace->th_refcnt > 0); 3662 3663 th_trace_rrecord(th_trace); 3664 th_trace->th_refcnt--; 3665 } 3666 3667 void 3668 nce_trace_inactive(nce_t *nce) 3669 { 3670 th_trace_t *th_trace; 3671 int i; 3672 3673 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3674 3675 for (i = 0; i < IP_TR_HASH_MAX; i++) { 3676 while (nce->nce_trace[i] != NULL) { 3677 th_trace = nce->nce_trace[i]; 3678 3679 /* unlink th_trace and free it */ 3680 nce->nce_trace[i] = th_trace->th_next; 3681 if (th_trace->th_next != NULL) 3682 th_trace->th_next->th_prev = 3683 &nce->nce_trace[i]; 3684 3685 th_trace->th_next = NULL; 3686 th_trace->th_prev = NULL; 3687 kmem_free(th_trace, sizeof (th_trace_t)); 3688 } 3689 } 3690 3691 } 3692 3693 /* ARGSUSED */ 3694 int 3695 nce_thread_exit(nce_t *nce, caddr_t arg) 3696 { 3697 th_trace_t *th_trace; 3698 3699 mutex_enter(&nce->nce_lock); 3700 th_trace = th_trace_nce_lookup(nce); 3701 3702 if (th_trace == NULL) { 3703 mutex_exit(&nce->nce_lock); 3704 return (0); 3705 } 3706 3707 ASSERT(th_trace->th_refcnt == 0); 3708 3709 /* unlink th_trace and free it */ 3710 *th_trace->th_prev = th_trace->th_next; 3711 if (th_trace->th_next != NULL) 3712 th_trace->th_next->th_prev = th_trace->th_prev; 3713 th_trace->th_next = NULL; 3714 th_trace->th_prev = NULL; 3715 kmem_free(th_trace, sizeof (th_trace_t)); 3716 mutex_exit(&nce->nce_lock); 3717 return (0); 3718 } 3719 #endif 3720 3721 /* 3722 * Called when address resolution fails due to a timeout. 3723 * Send an ICMP unreachable in response to all queued packets. 3724 */ 3725 void 3726 arp_resolv_failed(nce_t *nce) 3727 { 3728 mblk_t *mp, *nxt_mp, *first_mp; 3729 char buf[INET6_ADDRSTRLEN]; 3730 zoneid_t zoneid = GLOBAL_ZONEID; 3731 struct in_addr ipv4addr; 3732 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3733 3734 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3735 ip3dbg(("arp_resolv_failed: dst %s\n", 3736 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3737 mutex_enter(&nce->nce_lock); 3738 mp = nce->nce_qd_mp; 3739 nce->nce_qd_mp = NULL; 3740 mutex_exit(&nce->nce_lock); 3741 3742 while (mp != NULL) { 3743 nxt_mp = mp->b_next; 3744 mp->b_next = NULL; 3745 mp->b_prev = NULL; 3746 3747 first_mp = mp; 3748 /* 3749 * Send icmp unreachable messages 3750 * to the hosts. 3751 */ 3752 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst); 3753 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3754 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3755 ICMP_HOST_UNREACHABLE, zoneid, ipst); 3756 mp = nxt_mp; 3757 } 3758 } 3759 3760 static int 3761 ndp_lookup_then_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, 3762 const in_addr_t *mask, const in_addr_t *extract_mask, 3763 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 3764 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 3765 { 3766 int err = 0; 3767 nce_t *nce; 3768 in6_addr_t addr6; 3769 ip_stack_t *ipst = ill->ill_ipst; 3770 3771 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3772 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3773 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3774 nce = nce_lookup_addr(ill, &addr6, nce); 3775 if (nce == NULL) { 3776 err = ndp_add_v4(ill, 3777 hw_addr, 3778 addr, 3779 mask, 3780 extract_mask, 3781 hw_extract_start, 3782 flags, 3783 state, 3784 newnce, 3785 fp_mp, 3786 res_mp); 3787 } else { 3788 *newnce = nce; 3789 err = EEXIST; 3790 } 3791 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3792 return (err); 3793 } 3794 3795 /* 3796 * NDP Cache Entry creation routine for IPv4. 3797 * Mapped entries are handled in arp. 3798 * This routine must always be called with ndp4->ndp_g_lock held. 3799 * Prior to return, nce_refcnt is incremented. 3800 */ 3801 static int 3802 ndp_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, 3803 const in_addr_t *mask, const in_addr_t *extract_mask, 3804 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 3805 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 3806 { 3807 static nce_t nce_nil; 3808 nce_t *nce; 3809 mblk_t *mp; 3810 mblk_t *template; 3811 nce_t **ncep; 3812 ip_stack_t *ipst = ill->ill_ipst; 3813 3814 ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock)); 3815 ASSERT(ill != NULL); 3816 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 3817 return (EINVAL); 3818 } 3819 ASSERT((flags & NCE_F_MAPPING) == 0); 3820 ASSERT(extract_mask == NULL); 3821 /* 3822 * Allocate the mblk to hold the nce. 3823 */ 3824 mp = allocb(sizeof (nce_t), BPRI_MED); 3825 if (mp == NULL) 3826 return (ENOMEM); 3827 3828 nce = (nce_t *)mp->b_rptr; 3829 mp->b_wptr = (uchar_t *)&nce[1]; 3830 *nce = nce_nil; 3831 3832 /* 3833 * This one holds link layer address; if res_mp has been provided 3834 * by the caller, accept it without any further checks. Otherwise, 3835 * for V4, we fill it up with ill_resolver_mp here, then in 3836 * in ire_arpresolve(), we fill it up with the ARP query 3837 * once its formulated. 3838 */ 3839 if (res_mp != NULL) { 3840 template = res_mp; 3841 } else { 3842 if (ill->ill_resolver_mp == NULL) { 3843 freeb(mp); 3844 return (EINVAL); 3845 } 3846 template = copyb(ill->ill_resolver_mp); 3847 } 3848 if (template == NULL) { 3849 freeb(mp); 3850 return (ENOMEM); 3851 } 3852 nce->nce_ill = ill; 3853 nce->nce_ipversion = IPV4_VERSION; 3854 nce->nce_flags = flags; 3855 nce->nce_state = state; 3856 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3857 nce->nce_rcnt = ill->ill_xmit_count; 3858 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3859 if (*mask == IP_HOST_MASK) { 3860 nce->nce_mask = ipv6_all_ones; 3861 } else { 3862 IN6_IPADDR_TO_V4MAPPED(*mask, &nce->nce_mask); 3863 } 3864 nce->nce_extract_mask = ipv6_all_zeros; 3865 nce->nce_ll_extract_start = hw_extract_start; 3866 nce->nce_fp_mp = (fp_mp? fp_mp : NULL); 3867 nce->nce_res_mp = template; 3868 if (state == ND_REACHABLE) 3869 nce->nce_last = TICK_TO_MSEC(lbolt64); 3870 else 3871 nce->nce_last = 0; 3872 nce->nce_qd_mp = NULL; 3873 nce->nce_mp = mp; 3874 if (hw_addr != NULL) 3875 nce_set_ll(nce, hw_addr); 3876 /* This one is for nce getting created */ 3877 nce->nce_refcnt = 1; 3878 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3879 ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 3880 3881 #ifdef NCE_DEBUG 3882 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 3883 #endif 3884 /* 3885 * Atomically ensure that the ill is not CONDEMNED, before 3886 * adding the NCE. 3887 */ 3888 mutex_enter(&ill->ill_lock); 3889 if (ill->ill_state_flags & ILL_CONDEMNED) { 3890 mutex_exit(&ill->ill_lock); 3891 freeb(mp); 3892 if (res_mp == NULL) { 3893 /* 3894 * template was locally allocated. need to free it. 3895 */ 3896 freeb(template); 3897 } 3898 return (EINVAL); 3899 } 3900 if ((nce->nce_next = *ncep) != NULL) 3901 nce->nce_next->nce_ptpn = &nce->nce_next; 3902 *ncep = nce; 3903 nce->nce_ptpn = ncep; 3904 *newnce = nce; 3905 /* This one is for nce being used by an active thread */ 3906 NCE_REFHOLD(*newnce); 3907 3908 /* Bump up the number of nce's referencing this ill */ 3909 ill->ill_nce_cnt++; 3910 mutex_exit(&ill->ill_lock); 3911 return (0); 3912 } 3913 3914 void 3915 ndp_flush_qd_mp(nce_t *nce) 3916 { 3917 mblk_t *qd_mp, *qd_next; 3918 3919 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3920 qd_mp = nce->nce_qd_mp; 3921 nce->nce_qd_mp = NULL; 3922 while (qd_mp != NULL) { 3923 qd_next = qd_mp->b_next; 3924 qd_mp->b_next = NULL; 3925 qd_mp->b_prev = NULL; 3926 freemsg(qd_mp); 3927 qd_mp = qd_next; 3928 } 3929 } 3930 3931 nce_t * 3932 nce_reinit(nce_t *nce) 3933 { 3934 nce_t *newnce = NULL; 3935 in_addr_t nce_addr, nce_mask; 3936 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3937 3938 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3939 IN6_V4MAPPED_TO_IPADDR(&nce->nce_mask, nce_mask); 3940 /* 3941 * delete the old one. this will get rid of any ire's pointing 3942 * at this nce. 3943 */ 3944 ndp_delete(nce); 3945 /* 3946 * create a new nce with the same addr and mask. 3947 */ 3948 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3949 (void) ndp_add_v4(nce->nce_ill, NULL, &nce_addr, &nce_mask, NULL, 0, 0, 3950 ND_INITIAL, &newnce, NULL, NULL); 3951 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3952 /* 3953 * refrele the old nce. 3954 */ 3955 NCE_REFRELE(nce); 3956 return (newnce); 3957 } 3958 3959 /* 3960 * ndp_walk routine to delete all entries that have a given destination or 3961 * gateway address and cached link layer (MAC) address. This is used when ARP 3962 * informs us that a network-to-link-layer mapping may have changed. 3963 */ 3964 void 3965 nce_delete_hw_changed(nce_t *nce, void *arg) 3966 { 3967 nce_hw_map_t *hwm = arg; 3968 mblk_t *mp; 3969 dl_unitdata_req_t *dlu; 3970 uchar_t *macaddr; 3971 ill_t *ill; 3972 int saplen; 3973 ipaddr_t nce_addr; 3974 3975 if (nce->nce_state != ND_REACHABLE) 3976 return; 3977 3978 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3979 if (nce_addr != hwm->hwm_addr) 3980 return; 3981 3982 mutex_enter(&nce->nce_lock); 3983 if ((mp = nce->nce_res_mp) == NULL) { 3984 mutex_exit(&nce->nce_lock); 3985 return; 3986 } 3987 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3988 macaddr = (uchar_t *)(dlu + 1); 3989 ill = nce->nce_ill; 3990 if ((saplen = ill->ill_sap_length) > 0) 3991 macaddr += saplen; 3992 else 3993 saplen = -saplen; 3994 3995 /* 3996 * If the hardware address is unchanged, then leave this one alone. 3997 * Note that saplen == abs(saplen) now. 3998 */ 3999 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 4000 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 4001 mutex_exit(&nce->nce_lock); 4002 return; 4003 } 4004 mutex_exit(&nce->nce_lock); 4005 4006 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 4007 ndp_delete(nce); 4008 } 4009 4010 /* 4011 * This function verifies whether a given IPv4 address is potentially known to 4012 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 4013 * so that it can continue to look for hardware changes on that address. 4014 */ 4015 boolean_t 4016 ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns) 4017 { 4018 nce_t *nce; 4019 struct in_addr nceaddr; 4020 ip_stack_t *ipst = ns->netstack_ip; 4021 4022 if (addr == INADDR_ANY) 4023 return (B_FALSE); 4024 4025 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 4026 nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr); 4027 for (; nce != NULL; nce = nce->nce_next) { 4028 /* Note that only v4 mapped entries are in the table. */ 4029 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 4030 if (addr == nceaddr.s_addr && 4031 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 4032 /* Single flag check; no lock needed */ 4033 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 4034 break; 4035 } 4036 } 4037 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 4038 return (nce != NULL); 4039 } 4040